{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 57529, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7382537502824663e-05, "grad_norm": 3.1821536341829453, "learning_rate": 0.0, "loss": 0.5095, "step": 1 }, { "epoch": 3.4765075005649325e-05, "grad_norm": 3.778851259617751, "learning_rate": 5.793742757821553e-10, "loss": 0.6921, "step": 2 }, { "epoch": 5.214761250847399e-05, "grad_norm": 3.511712090316594, "learning_rate": 1.1587485515643105e-09, "loss": 0.4948, "step": 3 }, { "epoch": 6.953015001129865e-05, "grad_norm": 4.920977804483846, "learning_rate": 1.7381228273464658e-09, "loss": 0.5124, "step": 4 }, { "epoch": 8.69126875141233e-05, "grad_norm": 2.4439491810227927, "learning_rate": 2.317497103128621e-09, "loss": 0.5436, "step": 5 }, { "epoch": 0.00010429522501694798, "grad_norm": 3.3823048893795495, "learning_rate": 2.8968713789107765e-09, "loss": 0.5495, "step": 6 }, { "epoch": 0.00012167776251977263, "grad_norm": 3.9431269224413734, "learning_rate": 3.4762456546929316e-09, "loss": 0.5755, "step": 7 }, { "epoch": 0.0001390603000225973, "grad_norm": 2.5207977403502198, "learning_rate": 4.055619930475086e-09, "loss": 0.4315, "step": 8 }, { "epoch": 0.00015644283752542197, "grad_norm": 2.840161585512824, "learning_rate": 4.634994206257242e-09, "loss": 0.4915, "step": 9 }, { "epoch": 0.0001738253750282466, "grad_norm": 3.2622119077390574, "learning_rate": 5.214368482039397e-09, "loss": 0.4968, "step": 10 }, { "epoch": 0.00019120791253107128, "grad_norm": 4.041658104684947, "learning_rate": 5.793742757821553e-09, "loss": 0.4079, "step": 11 }, { "epoch": 0.00020859045003389595, "grad_norm": 2.877610411645185, "learning_rate": 6.373117033603707e-09, "loss": 0.4473, "step": 12 }, { "epoch": 0.00022597298753672062, "grad_norm": 3.5238692621824903, "learning_rate": 6.952491309385863e-09, "loss": 0.5479, "step": 13 }, { "epoch": 0.00024335552503954526, "grad_norm": 3.374359159269471, "learning_rate": 7.531865585168019e-09, "loss": 0.6661, "step": 14 }, { "epoch": 0.00026073806254236993, "grad_norm": 2.278212277880221, "learning_rate": 8.111239860950172e-09, "loss": 0.4923, "step": 15 }, { "epoch": 0.0002781206000451946, "grad_norm": 4.893024046372135, "learning_rate": 8.690614136732329e-09, "loss": 0.5233, "step": 16 }, { "epoch": 0.00029550313754801927, "grad_norm": 2.5524703763333982, "learning_rate": 9.269988412514484e-09, "loss": 0.5142, "step": 17 }, { "epoch": 0.00031288567505084394, "grad_norm": 2.282971429281273, "learning_rate": 9.849362688296638e-09, "loss": 0.478, "step": 18 }, { "epoch": 0.0003302682125536686, "grad_norm": 1.8653732271762462, "learning_rate": 1.0428736964078794e-08, "loss": 0.4585, "step": 19 }, { "epoch": 0.0003476507500564932, "grad_norm": 2.365016467765659, "learning_rate": 1.100811123986095e-08, "loss": 0.4261, "step": 20 }, { "epoch": 0.0003650332875593179, "grad_norm": 2.815712020009856, "learning_rate": 1.1587485515643106e-08, "loss": 0.5462, "step": 21 }, { "epoch": 0.00038241582506214257, "grad_norm": 2.158923267109747, "learning_rate": 1.216685979142526e-08, "loss": 0.3068, "step": 22 }, { "epoch": 0.00039979836256496724, "grad_norm": 2.45527643811235, "learning_rate": 1.2746234067207415e-08, "loss": 0.4602, "step": 23 }, { "epoch": 0.0004171809000677919, "grad_norm": 3.201954983828188, "learning_rate": 1.3325608342989571e-08, "loss": 0.4444, "step": 24 }, { "epoch": 0.0004345634375706166, "grad_norm": 1.8236828141364458, "learning_rate": 1.3904982618771726e-08, "loss": 0.3738, "step": 25 }, { "epoch": 0.00045194597507344124, "grad_norm": 2.612985933416675, "learning_rate": 1.4484356894553881e-08, "loss": 0.4024, "step": 26 }, { "epoch": 0.00046932851257626586, "grad_norm": 2.248864640863358, "learning_rate": 1.5063731170336038e-08, "loss": 0.4044, "step": 27 }, { "epoch": 0.00048671105007909053, "grad_norm": 2.924960753554596, "learning_rate": 1.564310544611819e-08, "loss": 0.6574, "step": 28 }, { "epoch": 0.0005040935875819153, "grad_norm": 2.137077336357922, "learning_rate": 1.6222479721900345e-08, "loss": 0.5471, "step": 29 }, { "epoch": 0.0005214761250847399, "grad_norm": 2.01043187867393, "learning_rate": 1.6801853997682505e-08, "loss": 0.5528, "step": 30 }, { "epoch": 0.0005388586625875645, "grad_norm": 3.869836662326409, "learning_rate": 1.7381228273464658e-08, "loss": 0.2895, "step": 31 }, { "epoch": 0.0005562412000903892, "grad_norm": 3.2156531116880407, "learning_rate": 1.7960602549246812e-08, "loss": 0.3722, "step": 32 }, { "epoch": 0.0005736237375932138, "grad_norm": 4.598246358753986, "learning_rate": 1.853997682502897e-08, "loss": 0.4651, "step": 33 }, { "epoch": 0.0005910062750960385, "grad_norm": 2.816758896194178, "learning_rate": 1.9119351100811122e-08, "loss": 0.439, "step": 34 }, { "epoch": 0.0006083888125988632, "grad_norm": 2.3335068789376177, "learning_rate": 1.9698725376593275e-08, "loss": 0.4422, "step": 35 }, { "epoch": 0.0006257713501016879, "grad_norm": 3.4657159210559434, "learning_rate": 2.0278099652375435e-08, "loss": 0.4973, "step": 36 }, { "epoch": 0.0006431538876045125, "grad_norm": 2.3249668273795305, "learning_rate": 2.085747392815759e-08, "loss": 0.5214, "step": 37 }, { "epoch": 0.0006605364251073372, "grad_norm": 2.2959452080147535, "learning_rate": 2.1436848203939745e-08, "loss": 0.3189, "step": 38 }, { "epoch": 0.0006779189626101618, "grad_norm": 2.4033793696656036, "learning_rate": 2.20162224797219e-08, "loss": 0.416, "step": 39 }, { "epoch": 0.0006953015001129865, "grad_norm": 5.472430025430975, "learning_rate": 2.2595596755504052e-08, "loss": 0.5941, "step": 40 }, { "epoch": 0.0007126840376158112, "grad_norm": 1.5998660656936814, "learning_rate": 2.3174971031286212e-08, "loss": 0.3868, "step": 41 }, { "epoch": 0.0007300665751186358, "grad_norm": 3.1991583982921097, "learning_rate": 2.3754345307068366e-08, "loss": 0.6803, "step": 42 }, { "epoch": 0.0007474491126214605, "grad_norm": 1.8716530439556396, "learning_rate": 2.433371958285052e-08, "loss": 0.4662, "step": 43 }, { "epoch": 0.0007648316501242851, "grad_norm": 2.758254202660399, "learning_rate": 2.4913093858632676e-08, "loss": 0.5964, "step": 44 }, { "epoch": 0.0007822141876271099, "grad_norm": 2.2721560125298317, "learning_rate": 2.549246813441483e-08, "loss": 0.3494, "step": 45 }, { "epoch": 0.0007995967251299345, "grad_norm": 1.9911495700234083, "learning_rate": 2.607184241019699e-08, "loss": 0.4769, "step": 46 }, { "epoch": 0.0008169792626327591, "grad_norm": 2.475318314525939, "learning_rate": 2.6651216685979143e-08, "loss": 0.4334, "step": 47 }, { "epoch": 0.0008343618001355838, "grad_norm": 1.768683084914585, "learning_rate": 2.7230590961761296e-08, "loss": 0.2934, "step": 48 }, { "epoch": 0.0008517443376384084, "grad_norm": 1.5472999334281903, "learning_rate": 2.7809965237543453e-08, "loss": 0.4302, "step": 49 }, { "epoch": 0.0008691268751412331, "grad_norm": 2.818439925891779, "learning_rate": 2.8389339513325606e-08, "loss": 0.5177, "step": 50 }, { "epoch": 0.0008865094126440578, "grad_norm": 1.8324997195424695, "learning_rate": 2.8968713789107763e-08, "loss": 0.304, "step": 51 }, { "epoch": 0.0009038919501468825, "grad_norm": 2.2114430105737455, "learning_rate": 2.954808806488992e-08, "loss": 0.4777, "step": 52 }, { "epoch": 0.0009212744876497071, "grad_norm": 2.1610133484161396, "learning_rate": 3.0127462340672076e-08, "loss": 0.3485, "step": 53 }, { "epoch": 0.0009386570251525317, "grad_norm": 1.4719468402725755, "learning_rate": 3.0706836616454226e-08, "loss": 0.3113, "step": 54 }, { "epoch": 0.0009560395626553564, "grad_norm": 1.6609981426176996, "learning_rate": 3.128621089223638e-08, "loss": 0.348, "step": 55 }, { "epoch": 0.0009734221001581811, "grad_norm": 3.1309580324614177, "learning_rate": 3.186558516801854e-08, "loss": 0.3978, "step": 56 }, { "epoch": 0.0009908046376610057, "grad_norm": 2.212488684658719, "learning_rate": 3.244495944380069e-08, "loss": 0.3016, "step": 57 }, { "epoch": 0.0010081871751638305, "grad_norm": 2.68342798637555, "learning_rate": 3.3024333719582846e-08, "loss": 0.2742, "step": 58 }, { "epoch": 0.0010255697126666551, "grad_norm": 22.528663832143884, "learning_rate": 3.360370799536501e-08, "loss": 0.5599, "step": 59 }, { "epoch": 0.0010429522501694797, "grad_norm": 1.5221837914175436, "learning_rate": 3.418308227114716e-08, "loss": 0.265, "step": 60 }, { "epoch": 0.0010603347876723044, "grad_norm": 1.7266679700513423, "learning_rate": 3.4762456546929317e-08, "loss": 0.5061, "step": 61 }, { "epoch": 0.001077717325175129, "grad_norm": 1.857550672360383, "learning_rate": 3.5341830822711473e-08, "loss": 0.3356, "step": 62 }, { "epoch": 0.0010950998626779538, "grad_norm": 2.665701042599295, "learning_rate": 3.5921205098493623e-08, "loss": 0.4421, "step": 63 }, { "epoch": 0.0011124824001807784, "grad_norm": 1.7555635241956187, "learning_rate": 3.650057937427578e-08, "loss": 0.5152, "step": 64 }, { "epoch": 0.001129864937683603, "grad_norm": 6.871260979370008, "learning_rate": 3.707995365005794e-08, "loss": 0.6362, "step": 65 }, { "epoch": 0.0011472474751864276, "grad_norm": 1.6783591570403977, "learning_rate": 3.765932792584009e-08, "loss": 0.6324, "step": 66 }, { "epoch": 0.0011646300126892525, "grad_norm": 1.3185364485321875, "learning_rate": 3.8238702201622244e-08, "loss": 0.4181, "step": 67 }, { "epoch": 0.001182012550192077, "grad_norm": 1.7466992104513912, "learning_rate": 3.88180764774044e-08, "loss": 0.5838, "step": 68 }, { "epoch": 0.0011993950876949017, "grad_norm": 2.057674772850659, "learning_rate": 3.939745075318655e-08, "loss": 0.4907, "step": 69 }, { "epoch": 0.0012167776251977263, "grad_norm": 4.936698667326129, "learning_rate": 3.9976825028968714e-08, "loss": 0.5005, "step": 70 }, { "epoch": 0.001234160162700551, "grad_norm": 2.332660865561236, "learning_rate": 4.055619930475087e-08, "loss": 0.3704, "step": 71 }, { "epoch": 0.0012515427002033758, "grad_norm": 1.8294959185550248, "learning_rate": 4.113557358053303e-08, "loss": 0.5733, "step": 72 }, { "epoch": 0.0012689252377062004, "grad_norm": 1.7178156554375532, "learning_rate": 4.171494785631518e-08, "loss": 0.3204, "step": 73 }, { "epoch": 0.001286307775209025, "grad_norm": 2.4203080342497674, "learning_rate": 4.2294322132097334e-08, "loss": 0.281, "step": 74 }, { "epoch": 0.0013036903127118496, "grad_norm": 3.492341587323392, "learning_rate": 4.287369640787949e-08, "loss": 0.4204, "step": 75 }, { "epoch": 0.0013210728502146744, "grad_norm": 3.043454905358116, "learning_rate": 4.345307068366164e-08, "loss": 0.7784, "step": 76 }, { "epoch": 0.001338455387717499, "grad_norm": 1.983091826316898, "learning_rate": 4.40324449594438e-08, "loss": 0.412, "step": 77 }, { "epoch": 0.0013558379252203237, "grad_norm": 2.782668726621937, "learning_rate": 4.4611819235225954e-08, "loss": 0.4602, "step": 78 }, { "epoch": 0.0013732204627231483, "grad_norm": 2.180875793680845, "learning_rate": 4.5191193511008104e-08, "loss": 0.2852, "step": 79 }, { "epoch": 0.001390603000225973, "grad_norm": 4.469864721698117, "learning_rate": 4.577056778679027e-08, "loss": 0.4591, "step": 80 }, { "epoch": 0.0014079855377287977, "grad_norm": 3.994903941771515, "learning_rate": 4.6349942062572424e-08, "loss": 0.6811, "step": 81 }, { "epoch": 0.0014253680752316224, "grad_norm": 4.268676692914501, "learning_rate": 4.6929316338354574e-08, "loss": 0.4687, "step": 82 }, { "epoch": 0.001442750612734447, "grad_norm": 13.417040266542077, "learning_rate": 4.750869061413673e-08, "loss": 0.679, "step": 83 }, { "epoch": 0.0014601331502372716, "grad_norm": 1.8903611845288872, "learning_rate": 4.808806488991889e-08, "loss": 0.3778, "step": 84 }, { "epoch": 0.0014775156877400962, "grad_norm": 1.9981808004115547, "learning_rate": 4.866743916570104e-08, "loss": 0.5467, "step": 85 }, { "epoch": 0.001494898225242921, "grad_norm": 1.3937320210454451, "learning_rate": 4.9246813441483195e-08, "loss": 0.2864, "step": 86 }, { "epoch": 0.0015122807627457456, "grad_norm": 2.0621892296290194, "learning_rate": 4.982618771726535e-08, "loss": 0.4713, "step": 87 }, { "epoch": 0.0015296633002485703, "grad_norm": 2.0460242393850883, "learning_rate": 5.04055619930475e-08, "loss": 0.5423, "step": 88 }, { "epoch": 0.0015470458377513949, "grad_norm": 2.653445141603392, "learning_rate": 5.098493626882966e-08, "loss": 0.4794, "step": 89 }, { "epoch": 0.0015644283752542197, "grad_norm": 1.8597903181657265, "learning_rate": 5.1564310544611815e-08, "loss": 0.3006, "step": 90 }, { "epoch": 0.0015818109127570443, "grad_norm": 1.1797614364919504, "learning_rate": 5.214368482039398e-08, "loss": 0.2392, "step": 91 }, { "epoch": 0.001599193450259869, "grad_norm": 1.5388105092598012, "learning_rate": 5.272305909617613e-08, "loss": 0.4773, "step": 92 }, { "epoch": 0.0016165759877626936, "grad_norm": 1.361262808940005, "learning_rate": 5.3302433371958285e-08, "loss": 0.5018, "step": 93 }, { "epoch": 0.0016339585252655182, "grad_norm": 1.610120384712262, "learning_rate": 5.388180764774044e-08, "loss": 0.3898, "step": 94 }, { "epoch": 0.001651341062768343, "grad_norm": 2.3216352090308066, "learning_rate": 5.446118192352259e-08, "loss": 0.4413, "step": 95 }, { "epoch": 0.0016687236002711676, "grad_norm": 1.7199652489051378, "learning_rate": 5.504055619930475e-08, "loss": 0.3707, "step": 96 }, { "epoch": 0.0016861061377739922, "grad_norm": 2.0475146872560654, "learning_rate": 5.5619930475086905e-08, "loss": 0.4314, "step": 97 }, { "epoch": 0.0017034886752768168, "grad_norm": 1.6239798201206015, "learning_rate": 5.6199304750869055e-08, "loss": 0.3948, "step": 98 }, { "epoch": 0.0017208712127796415, "grad_norm": 1.998545337871701, "learning_rate": 5.677867902665121e-08, "loss": 0.4237, "step": 99 }, { "epoch": 0.0017382537502824663, "grad_norm": 1.8782423796399481, "learning_rate": 5.735805330243337e-08, "loss": 0.5462, "step": 100 }, { "epoch": 0.001755636287785291, "grad_norm": 2.700234293214862, "learning_rate": 5.7937427578215525e-08, "loss": 0.2333, "step": 101 }, { "epoch": 0.0017730188252881155, "grad_norm": 3.028568138295861, "learning_rate": 5.851680185399768e-08, "loss": 0.4864, "step": 102 }, { "epoch": 0.0017904013627909401, "grad_norm": 2.100166015920455, "learning_rate": 5.909617612977984e-08, "loss": 0.3501, "step": 103 }, { "epoch": 0.001807783900293765, "grad_norm": 3.505195892681497, "learning_rate": 5.967555040556199e-08, "loss": 0.3264, "step": 104 }, { "epoch": 0.0018251664377965896, "grad_norm": 2.110055259332501, "learning_rate": 6.025492468134415e-08, "loss": 0.4487, "step": 105 }, { "epoch": 0.0018425489752994142, "grad_norm": 2.424097639578042, "learning_rate": 6.08342989571263e-08, "loss": 0.8053, "step": 106 }, { "epoch": 0.0018599315128022388, "grad_norm": 2.7120634342131935, "learning_rate": 6.141367323290845e-08, "loss": 0.5071, "step": 107 }, { "epoch": 0.0018773140503050634, "grad_norm": 1.8521460274870147, "learning_rate": 6.199304750869062e-08, "loss": 0.3618, "step": 108 }, { "epoch": 0.0018946965878078883, "grad_norm": 2.445697344089993, "learning_rate": 6.257242178447277e-08, "loss": 0.5052, "step": 109 }, { "epoch": 0.0019120791253107129, "grad_norm": 3.092151530740555, "learning_rate": 6.315179606025492e-08, "loss": 0.2556, "step": 110 }, { "epoch": 0.0019294616628135375, "grad_norm": 1.574783055215655, "learning_rate": 6.373117033603708e-08, "loss": 0.2624, "step": 111 }, { "epoch": 0.0019468442003163621, "grad_norm": 2.598691407477304, "learning_rate": 6.431054461181923e-08, "loss": 0.3878, "step": 112 }, { "epoch": 0.0019642267378191867, "grad_norm": 2.3134320942624713, "learning_rate": 6.488991888760138e-08, "loss": 0.2997, "step": 113 }, { "epoch": 0.0019816092753220113, "grad_norm": 1.8003541109706063, "learning_rate": 6.546929316338354e-08, "loss": 0.4197, "step": 114 }, { "epoch": 0.001998991812824836, "grad_norm": 2.777663039191112, "learning_rate": 6.604866743916569e-08, "loss": 0.2601, "step": 115 }, { "epoch": 0.002016374350327661, "grad_norm": 1.767619241057746, "learning_rate": 6.662804171494784e-08, "loss": 0.3257, "step": 116 }, { "epoch": 0.0020337568878304856, "grad_norm": 2.1917120827973617, "learning_rate": 6.720741599073002e-08, "loss": 0.4758, "step": 117 }, { "epoch": 0.0020511394253333102, "grad_norm": 1.9691142350890345, "learning_rate": 6.778679026651217e-08, "loss": 0.2434, "step": 118 }, { "epoch": 0.002068521962836135, "grad_norm": 1.2911535970498984, "learning_rate": 6.836616454229432e-08, "loss": 0.3604, "step": 119 }, { "epoch": 0.0020859045003389595, "grad_norm": 2.0005656294608625, "learning_rate": 6.894553881807648e-08, "loss": 0.2345, "step": 120 }, { "epoch": 0.002103287037841784, "grad_norm": 1.653939939857536, "learning_rate": 6.952491309385863e-08, "loss": 0.4048, "step": 121 }, { "epoch": 0.0021206695753446087, "grad_norm": 2.21603194961244, "learning_rate": 7.010428736964078e-08, "loss": 0.4416, "step": 122 }, { "epoch": 0.0021380521128474333, "grad_norm": 2.5436071881332336, "learning_rate": 7.068366164542295e-08, "loss": 0.3863, "step": 123 }, { "epoch": 0.002155434650350258, "grad_norm": 1.8752460577187933, "learning_rate": 7.12630359212051e-08, "loss": 0.5305, "step": 124 }, { "epoch": 0.002172817187853083, "grad_norm": 2.2515486094911563, "learning_rate": 7.184241019698725e-08, "loss": 0.5023, "step": 125 }, { "epoch": 0.0021901997253559076, "grad_norm": 3.488615743003316, "learning_rate": 7.242178447276941e-08, "loss": 0.5738, "step": 126 }, { "epoch": 0.002207582262858732, "grad_norm": 2.199420517596961, "learning_rate": 7.300115874855156e-08, "loss": 0.5818, "step": 127 }, { "epoch": 0.002224964800361557, "grad_norm": 1.5624614799205132, "learning_rate": 7.358053302433371e-08, "loss": 0.4111, "step": 128 }, { "epoch": 0.0022423473378643814, "grad_norm": 2.041205772960234, "learning_rate": 7.415990730011587e-08, "loss": 0.3755, "step": 129 }, { "epoch": 0.002259729875367206, "grad_norm": 1.9159978495676304, "learning_rate": 7.473928157589802e-08, "loss": 0.5074, "step": 130 }, { "epoch": 0.0022771124128700307, "grad_norm": 1.9237617149321191, "learning_rate": 7.531865585168017e-08, "loss": 0.3088, "step": 131 }, { "epoch": 0.0022944949503728553, "grad_norm": 1.6837308373810862, "learning_rate": 7.589803012746234e-08, "loss": 0.409, "step": 132 }, { "epoch": 0.00231187748787568, "grad_norm": 2.6037861261506943, "learning_rate": 7.647740440324449e-08, "loss": 0.4846, "step": 133 }, { "epoch": 0.002329260025378505, "grad_norm": 1.8389864274335497, "learning_rate": 7.705677867902664e-08, "loss": 0.5381, "step": 134 }, { "epoch": 0.0023466425628813296, "grad_norm": 1.5908044625044533, "learning_rate": 7.76361529548088e-08, "loss": 0.381, "step": 135 }, { "epoch": 0.002364025100384154, "grad_norm": 1.7101305024112055, "learning_rate": 7.821552723059095e-08, "loss": 0.5562, "step": 136 }, { "epoch": 0.002381407637886979, "grad_norm": 2.1571949283313256, "learning_rate": 7.87949015063731e-08, "loss": 0.5895, "step": 137 }, { "epoch": 0.0023987901753898034, "grad_norm": 2.500424934869072, "learning_rate": 7.937427578215528e-08, "loss": 0.2205, "step": 138 }, { "epoch": 0.002416172712892628, "grad_norm": 2.1545672144357857, "learning_rate": 7.995365005793743e-08, "loss": 0.3885, "step": 139 }, { "epoch": 0.0024335552503954526, "grad_norm": 1.7034363606092704, "learning_rate": 8.053302433371959e-08, "loss": 0.329, "step": 140 }, { "epoch": 0.0024509377878982773, "grad_norm": 3.038580627789881, "learning_rate": 8.111239860950174e-08, "loss": 0.3619, "step": 141 }, { "epoch": 0.002468320325401102, "grad_norm": 2.0095162589849958, "learning_rate": 8.169177288528389e-08, "loss": 0.3359, "step": 142 }, { "epoch": 0.002485702862903927, "grad_norm": 1.6535252615595168, "learning_rate": 8.227114716106605e-08, "loss": 0.2956, "step": 143 }, { "epoch": 0.0025030854004067515, "grad_norm": 2.711010507657419, "learning_rate": 8.28505214368482e-08, "loss": 0.5978, "step": 144 }, { "epoch": 0.002520467937909576, "grad_norm": 2.6184072317965352, "learning_rate": 8.342989571263035e-08, "loss": 0.4488, "step": 145 }, { "epoch": 0.0025378504754124008, "grad_norm": 2.0505536516491243, "learning_rate": 8.400926998841252e-08, "loss": 0.4595, "step": 146 }, { "epoch": 0.0025552330129152254, "grad_norm": 1.6165153939045789, "learning_rate": 8.458864426419467e-08, "loss": 0.2888, "step": 147 }, { "epoch": 0.00257261555041805, "grad_norm": 2.271689526289292, "learning_rate": 8.516801853997682e-08, "loss": 0.6333, "step": 148 }, { "epoch": 0.0025899980879208746, "grad_norm": 2.614358500721218, "learning_rate": 8.574739281575898e-08, "loss": 0.5115, "step": 149 }, { "epoch": 0.0026073806254236992, "grad_norm": 1.2504688087981153, "learning_rate": 8.632676709154113e-08, "loss": 0.3605, "step": 150 }, { "epoch": 0.002624763162926524, "grad_norm": 3.185729728555962, "learning_rate": 8.690614136732328e-08, "loss": 0.5052, "step": 151 }, { "epoch": 0.002642145700429349, "grad_norm": 2.0400839893731026, "learning_rate": 8.748551564310544e-08, "loss": 0.5665, "step": 152 }, { "epoch": 0.0026595282379321735, "grad_norm": 1.8510266194530731, "learning_rate": 8.80648899188876e-08, "loss": 0.2831, "step": 153 }, { "epoch": 0.002676910775434998, "grad_norm": 2.928455910828709, "learning_rate": 8.864426419466975e-08, "loss": 0.4159, "step": 154 }, { "epoch": 0.0026942933129378227, "grad_norm": 2.1664712115209634, "learning_rate": 8.922363847045191e-08, "loss": 0.4114, "step": 155 }, { "epoch": 0.0027116758504406474, "grad_norm": 1.9683290088535421, "learning_rate": 8.980301274623406e-08, "loss": 0.3908, "step": 156 }, { "epoch": 0.002729058387943472, "grad_norm": 2.05160315491997, "learning_rate": 9.038238702201621e-08, "loss": 0.5321, "step": 157 }, { "epoch": 0.0027464409254462966, "grad_norm": 1.9297117674817086, "learning_rate": 9.096176129779837e-08, "loss": 0.3526, "step": 158 }, { "epoch": 0.002763823462949121, "grad_norm": 2.079636308305834, "learning_rate": 9.154113557358054e-08, "loss": 0.3223, "step": 159 }, { "epoch": 0.002781206000451946, "grad_norm": 1.6565115183630599, "learning_rate": 9.212050984936269e-08, "loss": 0.3327, "step": 160 }, { "epoch": 0.0027985885379547704, "grad_norm": 2.0639759494910415, "learning_rate": 9.269988412514485e-08, "loss": 0.4164, "step": 161 }, { "epoch": 0.0028159710754575955, "grad_norm": 2.845530608599441, "learning_rate": 9.3279258400927e-08, "loss": 0.4165, "step": 162 }, { "epoch": 0.00283335361296042, "grad_norm": 1.7199424324741177, "learning_rate": 9.385863267670915e-08, "loss": 0.4552, "step": 163 }, { "epoch": 0.0028507361504632447, "grad_norm": 6.882829270339343, "learning_rate": 9.443800695249131e-08, "loss": 0.3909, "step": 164 }, { "epoch": 0.0028681186879660693, "grad_norm": 1.912440528396514, "learning_rate": 9.501738122827346e-08, "loss": 0.307, "step": 165 }, { "epoch": 0.002885501225468894, "grad_norm": 1.7476471151507078, "learning_rate": 9.559675550405561e-08, "loss": 0.2844, "step": 166 }, { "epoch": 0.0029028837629717186, "grad_norm": 1.6847426836501749, "learning_rate": 9.617612977983778e-08, "loss": 0.3412, "step": 167 }, { "epoch": 0.002920266300474543, "grad_norm": 1.6142676028094989, "learning_rate": 9.675550405561993e-08, "loss": 0.2976, "step": 168 }, { "epoch": 0.002937648837977368, "grad_norm": 2.2260345589486152, "learning_rate": 9.733487833140208e-08, "loss": 0.3652, "step": 169 }, { "epoch": 0.0029550313754801924, "grad_norm": 2.286522759478066, "learning_rate": 9.791425260718424e-08, "loss": 0.4325, "step": 170 }, { "epoch": 0.0029724139129830175, "grad_norm": 2.6073176088956798, "learning_rate": 9.849362688296639e-08, "loss": 0.3486, "step": 171 }, { "epoch": 0.002989796450485842, "grad_norm": 3.3888686737452507, "learning_rate": 9.907300115874854e-08, "loss": 0.5767, "step": 172 }, { "epoch": 0.0030071789879886667, "grad_norm": 2.3387881524756056, "learning_rate": 9.96523754345307e-08, "loss": 0.3681, "step": 173 }, { "epoch": 0.0030245615254914913, "grad_norm": 2.698691856307262, "learning_rate": 1.0023174971031285e-07, "loss": 0.3715, "step": 174 }, { "epoch": 0.003041944062994316, "grad_norm": 1.5894684564105503, "learning_rate": 1.00811123986095e-07, "loss": 0.4425, "step": 175 }, { "epoch": 0.0030593266004971405, "grad_norm": 1.8500142088410587, "learning_rate": 1.0139049826187717e-07, "loss": 0.341, "step": 176 }, { "epoch": 0.003076709137999965, "grad_norm": 2.9632522572419218, "learning_rate": 1.0196987253765932e-07, "loss": 0.4332, "step": 177 }, { "epoch": 0.0030940916755027898, "grad_norm": 1.615319054972068, "learning_rate": 1.0254924681344147e-07, "loss": 0.255, "step": 178 }, { "epoch": 0.0031114742130056144, "grad_norm": 2.0982374526423877, "learning_rate": 1.0312862108922363e-07, "loss": 0.4602, "step": 179 }, { "epoch": 0.0031288567505084394, "grad_norm": 1.0864296273480183, "learning_rate": 1.0370799536500579e-07, "loss": 0.2395, "step": 180 }, { "epoch": 0.003146239288011264, "grad_norm": 1.8518812857526687, "learning_rate": 1.0428736964078796e-07, "loss": 0.341, "step": 181 }, { "epoch": 0.0031636218255140887, "grad_norm": 2.5240202945484826, "learning_rate": 1.048667439165701e-07, "loss": 0.2973, "step": 182 }, { "epoch": 0.0031810043630169133, "grad_norm": 1.9069706206868364, "learning_rate": 1.0544611819235226e-07, "loss": 0.3757, "step": 183 }, { "epoch": 0.003198386900519738, "grad_norm": 2.897978912517424, "learning_rate": 1.0602549246813442e-07, "loss": 0.2675, "step": 184 }, { "epoch": 0.0032157694380225625, "grad_norm": 2.0014508399091118, "learning_rate": 1.0660486674391657e-07, "loss": 0.3982, "step": 185 }, { "epoch": 0.003233151975525387, "grad_norm": 2.346137418922999, "learning_rate": 1.0718424101969872e-07, "loss": 0.5054, "step": 186 }, { "epoch": 0.0032505345130282117, "grad_norm": 2.6683114627237545, "learning_rate": 1.0776361529548088e-07, "loss": 0.5393, "step": 187 }, { "epoch": 0.0032679170505310363, "grad_norm": 2.06494372433753, "learning_rate": 1.0834298957126303e-07, "loss": 0.3584, "step": 188 }, { "epoch": 0.0032852995880338614, "grad_norm": 1.3002431605121096, "learning_rate": 1.0892236384704518e-07, "loss": 0.3608, "step": 189 }, { "epoch": 0.003302682125536686, "grad_norm": 2.281682378218076, "learning_rate": 1.0950173812282735e-07, "loss": 0.2929, "step": 190 }, { "epoch": 0.0033200646630395106, "grad_norm": 1.76325233221378, "learning_rate": 1.100811123986095e-07, "loss": 0.3022, "step": 191 }, { "epoch": 0.0033374472005423352, "grad_norm": 1.5527918586293048, "learning_rate": 1.1066048667439165e-07, "loss": 0.285, "step": 192 }, { "epoch": 0.00335482973804516, "grad_norm": 3.3754828005838373, "learning_rate": 1.1123986095017381e-07, "loss": 0.6335, "step": 193 }, { "epoch": 0.0033722122755479845, "grad_norm": 1.813866317517564, "learning_rate": 1.1181923522595596e-07, "loss": 0.3505, "step": 194 }, { "epoch": 0.003389594813050809, "grad_norm": 1.7653923928102653, "learning_rate": 1.1239860950173811e-07, "loss": 0.2941, "step": 195 }, { "epoch": 0.0034069773505536337, "grad_norm": 2.033199249567724, "learning_rate": 1.1297798377752027e-07, "loss": 0.241, "step": 196 }, { "epoch": 0.0034243598880564583, "grad_norm": 3.066406072756783, "learning_rate": 1.1355735805330242e-07, "loss": 0.7475, "step": 197 }, { "epoch": 0.003441742425559283, "grad_norm": 1.912526490993174, "learning_rate": 1.1413673232908457e-07, "loss": 0.2615, "step": 198 }, { "epoch": 0.003459124963062108, "grad_norm": 1.9568464562489487, "learning_rate": 1.1471610660486674e-07, "loss": 0.3475, "step": 199 }, { "epoch": 0.0034765075005649326, "grad_norm": 2.3427131287499967, "learning_rate": 1.1529548088064889e-07, "loss": 0.3849, "step": 200 }, { "epoch": 0.003493890038067757, "grad_norm": 1.4151171122678523, "learning_rate": 1.1587485515643105e-07, "loss": 0.2697, "step": 201 }, { "epoch": 0.003511272575570582, "grad_norm": 1.610924347293655, "learning_rate": 1.1645422943221321e-07, "loss": 0.3713, "step": 202 }, { "epoch": 0.0035286551130734064, "grad_norm": 1.4047779404652014, "learning_rate": 1.1703360370799536e-07, "loss": 0.3281, "step": 203 }, { "epoch": 0.003546037650576231, "grad_norm": 2.126988457827536, "learning_rate": 1.1761297798377751e-07, "loss": 0.4159, "step": 204 }, { "epoch": 0.0035634201880790557, "grad_norm": 2.2061214044652475, "learning_rate": 1.1819235225955968e-07, "loss": 0.4648, "step": 205 }, { "epoch": 0.0035808027255818803, "grad_norm": 2.304926109883653, "learning_rate": 1.1877172653534183e-07, "loss": 0.4423, "step": 206 }, { "epoch": 0.003598185263084705, "grad_norm": 1.8184750980824207, "learning_rate": 1.1935110081112398e-07, "loss": 0.4043, "step": 207 }, { "epoch": 0.00361556780058753, "grad_norm": 1.9113137675011236, "learning_rate": 1.1993047508690613e-07, "loss": 0.3625, "step": 208 }, { "epoch": 0.0036329503380903546, "grad_norm": 2.4378022470788134, "learning_rate": 1.205098493626883e-07, "loss": 0.5869, "step": 209 }, { "epoch": 0.003650332875593179, "grad_norm": 2.2105715019819985, "learning_rate": 1.2108922363847045e-07, "loss": 0.4055, "step": 210 }, { "epoch": 0.003667715413096004, "grad_norm": 1.645745597839629, "learning_rate": 1.216685979142526e-07, "loss": 0.3461, "step": 211 }, { "epoch": 0.0036850979505988284, "grad_norm": 2.6374940437552907, "learning_rate": 1.2224797219003475e-07, "loss": 0.2707, "step": 212 }, { "epoch": 0.003702480488101653, "grad_norm": 2.207202433129023, "learning_rate": 1.228273464658169e-07, "loss": 0.3783, "step": 213 }, { "epoch": 0.0037198630256044776, "grad_norm": 4.1988252490555, "learning_rate": 1.2340672074159906e-07, "loss": 0.4057, "step": 214 }, { "epoch": 0.0037372455631073023, "grad_norm": 2.9516026216343954, "learning_rate": 1.2398609501738123e-07, "loss": 0.4076, "step": 215 }, { "epoch": 0.003754628100610127, "grad_norm": 2.3067913760034164, "learning_rate": 1.2456546929316338e-07, "loss": 0.5701, "step": 216 }, { "epoch": 0.003772010638112952, "grad_norm": 1.8818842650577416, "learning_rate": 1.2514484356894553e-07, "loss": 0.3139, "step": 217 }, { "epoch": 0.0037893931756157765, "grad_norm": 2.1809052260713973, "learning_rate": 1.257242178447277e-07, "loss": 0.377, "step": 218 }, { "epoch": 0.003806775713118601, "grad_norm": 1.6605898798764591, "learning_rate": 1.2630359212050983e-07, "loss": 0.4683, "step": 219 }, { "epoch": 0.0038241582506214258, "grad_norm": 2.6106321465632143, "learning_rate": 1.26882966396292e-07, "loss": 0.6982, "step": 220 }, { "epoch": 0.0038415407881242504, "grad_norm": 2.785618505794709, "learning_rate": 1.2746234067207416e-07, "loss": 0.3042, "step": 221 }, { "epoch": 0.003858923325627075, "grad_norm": 2.084108760123271, "learning_rate": 1.280417149478563e-07, "loss": 0.3584, "step": 222 }, { "epoch": 0.0038763058631298996, "grad_norm": 2.5355682327033886, "learning_rate": 1.2862108922363846e-07, "loss": 0.4231, "step": 223 }, { "epoch": 0.0038936884006327242, "grad_norm": 3.6687103938394503, "learning_rate": 1.2920046349942064e-07, "loss": 0.7336, "step": 224 }, { "epoch": 0.003911070938135549, "grad_norm": 1.9934187915643407, "learning_rate": 1.2977983777520276e-07, "loss": 0.6552, "step": 225 }, { "epoch": 0.0039284534756383735, "grad_norm": 3.327193344499363, "learning_rate": 1.3035921205098494e-07, "loss": 0.6303, "step": 226 }, { "epoch": 0.0039458360131411985, "grad_norm": 3.4671324668748267, "learning_rate": 1.3093858632676709e-07, "loss": 0.4055, "step": 227 }, { "epoch": 0.003963218550644023, "grad_norm": 2.186544539121781, "learning_rate": 1.3151796060254924e-07, "loss": 0.2937, "step": 228 }, { "epoch": 0.003980601088146848, "grad_norm": 2.4256919748901478, "learning_rate": 1.3209733487833139e-07, "loss": 0.4522, "step": 229 }, { "epoch": 0.003997983625649672, "grad_norm": 1.9388814857118426, "learning_rate": 1.3267670915411356e-07, "loss": 0.3799, "step": 230 }, { "epoch": 0.004015366163152497, "grad_norm": 1.2378097310721972, "learning_rate": 1.3325608342989569e-07, "loss": 0.2915, "step": 231 }, { "epoch": 0.004032748700655322, "grad_norm": 1.337442450656628, "learning_rate": 1.3383545770567786e-07, "loss": 0.5812, "step": 232 }, { "epoch": 0.004050131238158146, "grad_norm": 3.504214975128119, "learning_rate": 1.3441483198146004e-07, "loss": 0.515, "step": 233 }, { "epoch": 0.004067513775660971, "grad_norm": 2.2908935112993456, "learning_rate": 1.3499420625724216e-07, "loss": 0.3905, "step": 234 }, { "epoch": 0.004084896313163795, "grad_norm": 2.119117905617723, "learning_rate": 1.3557358053302434e-07, "loss": 0.3493, "step": 235 }, { "epoch": 0.0041022788506666205, "grad_norm": 2.55847400835347, "learning_rate": 1.361529548088065e-07, "loss": 0.5151, "step": 236 }, { "epoch": 0.004119661388169445, "grad_norm": 2.3778635376641266, "learning_rate": 1.3673232908458864e-07, "loss": 0.355, "step": 237 }, { "epoch": 0.00413704392567227, "grad_norm": 2.118239341727318, "learning_rate": 1.373117033603708e-07, "loss": 0.3415, "step": 238 }, { "epoch": 0.004154426463175094, "grad_norm": 1.6201802329672945, "learning_rate": 1.3789107763615297e-07, "loss": 0.27, "step": 239 }, { "epoch": 0.004171809000677919, "grad_norm": 1.8892119566496601, "learning_rate": 1.384704519119351e-07, "loss": 0.4892, "step": 240 }, { "epoch": 0.004189191538180744, "grad_norm": 1.9971191865569144, "learning_rate": 1.3904982618771727e-07, "loss": 0.3961, "step": 241 }, { "epoch": 0.004206574075683568, "grad_norm": 2.3636963832806326, "learning_rate": 1.3962920046349942e-07, "loss": 0.2681, "step": 242 }, { "epoch": 0.004223956613186393, "grad_norm": 4.228249225311098, "learning_rate": 1.4020857473928157e-07, "loss": 0.4371, "step": 243 }, { "epoch": 0.004241339150689217, "grad_norm": 1.669321657550328, "learning_rate": 1.4078794901506372e-07, "loss": 0.3197, "step": 244 }, { "epoch": 0.0042587216881920424, "grad_norm": 1.4132945529313778, "learning_rate": 1.413673232908459e-07, "loss": 0.5597, "step": 245 }, { "epoch": 0.004276104225694867, "grad_norm": 1.8211527330721065, "learning_rate": 1.4194669756662802e-07, "loss": 0.5671, "step": 246 }, { "epoch": 0.004293486763197692, "grad_norm": 2.18749875741686, "learning_rate": 1.425260718424102e-07, "loss": 0.4189, "step": 247 }, { "epoch": 0.004310869300700516, "grad_norm": 1.694328691354488, "learning_rate": 1.4310544611819234e-07, "loss": 0.5752, "step": 248 }, { "epoch": 0.004328251838203341, "grad_norm": 1.8978688378659176, "learning_rate": 1.436848203939745e-07, "loss": 0.3271, "step": 249 }, { "epoch": 0.004345634375706166, "grad_norm": 2.495252903255388, "learning_rate": 1.4426419466975664e-07, "loss": 0.6034, "step": 250 }, { "epoch": 0.00436301691320899, "grad_norm": 1.6119826577848793, "learning_rate": 1.4484356894553882e-07, "loss": 0.2885, "step": 251 }, { "epoch": 0.004380399450711815, "grad_norm": 2.1833731325091796, "learning_rate": 1.4542294322132094e-07, "loss": 0.3606, "step": 252 }, { "epoch": 0.004397781988214639, "grad_norm": 1.9353163126853186, "learning_rate": 1.4600231749710312e-07, "loss": 0.6297, "step": 253 }, { "epoch": 0.004415164525717464, "grad_norm": 2.0932256032613608, "learning_rate": 1.465816917728853e-07, "loss": 0.452, "step": 254 }, { "epoch": 0.004432547063220289, "grad_norm": 2.5948809040472627, "learning_rate": 1.4716106604866742e-07, "loss": 0.544, "step": 255 }, { "epoch": 0.004449929600723114, "grad_norm": 1.790600631673023, "learning_rate": 1.477404403244496e-07, "loss": 0.4506, "step": 256 }, { "epoch": 0.004467312138225938, "grad_norm": 2.0814235942432333, "learning_rate": 1.4831981460023175e-07, "loss": 0.5331, "step": 257 }, { "epoch": 0.004484694675728763, "grad_norm": 2.5918663212087325, "learning_rate": 1.488991888760139e-07, "loss": 0.4405, "step": 258 }, { "epoch": 0.004502077213231588, "grad_norm": 2.106071475622504, "learning_rate": 1.4947856315179605e-07, "loss": 0.4495, "step": 259 }, { "epoch": 0.004519459750734412, "grad_norm": 1.340489271563713, "learning_rate": 1.5005793742757822e-07, "loss": 0.2575, "step": 260 }, { "epoch": 0.004536842288237237, "grad_norm": 2.7537856897445505, "learning_rate": 1.5063731170336035e-07, "loss": 0.4296, "step": 261 }, { "epoch": 0.004554224825740061, "grad_norm": 1.9016619870244402, "learning_rate": 1.5121668597914252e-07, "loss": 0.5593, "step": 262 }, { "epoch": 0.004571607363242886, "grad_norm": 1.4962578580280124, "learning_rate": 1.5179606025492467e-07, "loss": 0.2671, "step": 263 }, { "epoch": 0.004588989900745711, "grad_norm": 2.3048395861451527, "learning_rate": 1.5237543453070682e-07, "loss": 0.3616, "step": 264 }, { "epoch": 0.004606372438248536, "grad_norm": 3.5303620180638866, "learning_rate": 1.5295480880648897e-07, "loss": 0.3012, "step": 265 }, { "epoch": 0.00462375497575136, "grad_norm": 1.696675084836527, "learning_rate": 1.5353418308227115e-07, "loss": 0.2507, "step": 266 }, { "epoch": 0.004641137513254185, "grad_norm": 1.5576592431090095, "learning_rate": 1.5411355735805327e-07, "loss": 0.2826, "step": 267 }, { "epoch": 0.00465852005075701, "grad_norm": 1.5223619063648455, "learning_rate": 1.5469293163383545e-07, "loss": 0.2909, "step": 268 }, { "epoch": 0.004675902588259834, "grad_norm": 2.8019557139499938, "learning_rate": 1.552723059096176e-07, "loss": 0.2545, "step": 269 }, { "epoch": 0.004693285125762659, "grad_norm": 1.7981685580878894, "learning_rate": 1.5585168018539975e-07, "loss": 0.4959, "step": 270 }, { "epoch": 0.004710667663265483, "grad_norm": 1.9310951674705492, "learning_rate": 1.564310544611819e-07, "loss": 0.4367, "step": 271 }, { "epoch": 0.004728050200768308, "grad_norm": 2.028594321532356, "learning_rate": 1.5701042873696408e-07, "loss": 0.2516, "step": 272 }, { "epoch": 0.0047454327382711325, "grad_norm": 1.8977384143328584, "learning_rate": 1.575898030127462e-07, "loss": 0.2978, "step": 273 }, { "epoch": 0.004762815275773958, "grad_norm": 3.163487572103092, "learning_rate": 1.5816917728852838e-07, "loss": 0.58, "step": 274 }, { "epoch": 0.004780197813276782, "grad_norm": 3.4863693429463742, "learning_rate": 1.5874855156431055e-07, "loss": 0.4794, "step": 275 }, { "epoch": 0.004797580350779607, "grad_norm": 2.324221959059223, "learning_rate": 1.593279258400927e-07, "loss": 0.4276, "step": 276 }, { "epoch": 0.004814962888282432, "grad_norm": 1.9141002679995287, "learning_rate": 1.5990730011587486e-07, "loss": 0.3889, "step": 277 }, { "epoch": 0.004832345425785256, "grad_norm": 2.516857593303654, "learning_rate": 1.60486674391657e-07, "loss": 0.6307, "step": 278 }, { "epoch": 0.004849727963288081, "grad_norm": 2.158793388252872, "learning_rate": 1.6106604866743918e-07, "loss": 0.3551, "step": 279 }, { "epoch": 0.004867110500790905, "grad_norm": 5.7254924190608385, "learning_rate": 1.616454229432213e-07, "loss": 0.5123, "step": 280 }, { "epoch": 0.00488449303829373, "grad_norm": 1.3012615144749546, "learning_rate": 1.6222479721900348e-07, "loss": 0.2741, "step": 281 }, { "epoch": 0.0049018755757965545, "grad_norm": 1.1308117968137938, "learning_rate": 1.6280417149478563e-07, "loss": 0.3294, "step": 282 }, { "epoch": 0.0049192581132993796, "grad_norm": 2.1789323043366977, "learning_rate": 1.6338354577056778e-07, "loss": 0.4342, "step": 283 }, { "epoch": 0.004936640650802204, "grad_norm": 3.8713977091980856, "learning_rate": 1.6396292004634993e-07, "loss": 0.5209, "step": 284 }, { "epoch": 0.004954023188305029, "grad_norm": 2.3663913085855137, "learning_rate": 1.645422943221321e-07, "loss": 0.2897, "step": 285 }, { "epoch": 0.004971405725807854, "grad_norm": 1.4418617777168004, "learning_rate": 1.6512166859791423e-07, "loss": 0.2973, "step": 286 }, { "epoch": 0.004988788263310678, "grad_norm": 2.1770478985576394, "learning_rate": 1.657010428736964e-07, "loss": 0.3432, "step": 287 }, { "epoch": 0.005006170800813503, "grad_norm": 2.2378587834736994, "learning_rate": 1.6628041714947856e-07, "loss": 0.4383, "step": 288 }, { "epoch": 0.005023553338316327, "grad_norm": 1.5264578645118239, "learning_rate": 1.668597914252607e-07, "loss": 0.3261, "step": 289 }, { "epoch": 0.005040935875819152, "grad_norm": 1.3017546343793578, "learning_rate": 1.6743916570104286e-07, "loss": 0.375, "step": 290 }, { "epoch": 0.0050583184133219765, "grad_norm": 1.841176638230541, "learning_rate": 1.6801853997682504e-07, "loss": 0.4011, "step": 291 }, { "epoch": 0.0050757009508248015, "grad_norm": 2.98207837366946, "learning_rate": 1.6859791425260716e-07, "loss": 0.6498, "step": 292 }, { "epoch": 0.005093083488327626, "grad_norm": 5.066161252036294, "learning_rate": 1.6917728852838934e-07, "loss": 0.5967, "step": 293 }, { "epoch": 0.005110466025830451, "grad_norm": 1.9361431997726213, "learning_rate": 1.6975666280417149e-07, "loss": 0.3706, "step": 294 }, { "epoch": 0.005127848563333276, "grad_norm": 1.375380937113587, "learning_rate": 1.7033603707995364e-07, "loss": 0.3643, "step": 295 }, { "epoch": 0.0051452311008361, "grad_norm": 2.8398144863997596, "learning_rate": 1.709154113557358e-07, "loss": 0.3437, "step": 296 }, { "epoch": 0.005162613638338925, "grad_norm": 2.911689668459046, "learning_rate": 1.7149478563151796e-07, "loss": 0.2893, "step": 297 }, { "epoch": 0.005179996175841749, "grad_norm": 2.6616128278257754, "learning_rate": 1.720741599073001e-07, "loss": 0.353, "step": 298 }, { "epoch": 0.005197378713344574, "grad_norm": 1.7978277343841995, "learning_rate": 1.7265353418308226e-07, "loss": 0.3278, "step": 299 }, { "epoch": 0.0052147612508473985, "grad_norm": 2.151957609884122, "learning_rate": 1.7323290845886444e-07, "loss": 0.4328, "step": 300 }, { "epoch": 0.0052321437883502235, "grad_norm": 3.0994821897708977, "learning_rate": 1.7381228273464656e-07, "loss": 0.6427, "step": 301 }, { "epoch": 0.005249526325853048, "grad_norm": 1.860446178431801, "learning_rate": 1.7439165701042874e-07, "loss": 0.5169, "step": 302 }, { "epoch": 0.005266908863355873, "grad_norm": 2.2161958621145863, "learning_rate": 1.749710312862109e-07, "loss": 0.3353, "step": 303 }, { "epoch": 0.005284291400858698, "grad_norm": 2.697504527213007, "learning_rate": 1.7555040556199304e-07, "loss": 0.3298, "step": 304 }, { "epoch": 0.005301673938361522, "grad_norm": 3.214842338713187, "learning_rate": 1.761297798377752e-07, "loss": 0.26, "step": 305 }, { "epoch": 0.005319056475864347, "grad_norm": 3.9462798799970766, "learning_rate": 1.7670915411355737e-07, "loss": 0.3351, "step": 306 }, { "epoch": 0.005336439013367171, "grad_norm": 2.0000981362823524, "learning_rate": 1.772885283893395e-07, "loss": 0.4574, "step": 307 }, { "epoch": 0.005353821550869996, "grad_norm": 2.020738996833162, "learning_rate": 1.7786790266512167e-07, "loss": 0.5082, "step": 308 }, { "epoch": 0.00537120408837282, "grad_norm": 1.9388460124840272, "learning_rate": 1.7844727694090382e-07, "loss": 0.4943, "step": 309 }, { "epoch": 0.0053885866258756455, "grad_norm": 1.7723966266490556, "learning_rate": 1.7902665121668597e-07, "loss": 0.4369, "step": 310 }, { "epoch": 0.00540596916337847, "grad_norm": 1.6358365300168674, "learning_rate": 1.7960602549246812e-07, "loss": 0.2895, "step": 311 }, { "epoch": 0.005423351700881295, "grad_norm": 1.7462580939094206, "learning_rate": 1.801853997682503e-07, "loss": 0.4698, "step": 312 }, { "epoch": 0.005440734238384119, "grad_norm": 1.0455419627167024, "learning_rate": 1.8076477404403242e-07, "loss": 0.2319, "step": 313 }, { "epoch": 0.005458116775886944, "grad_norm": 2.175559539908393, "learning_rate": 1.813441483198146e-07, "loss": 0.5297, "step": 314 }, { "epoch": 0.005475499313389769, "grad_norm": 2.543071042345097, "learning_rate": 1.8192352259559674e-07, "loss": 0.7412, "step": 315 }, { "epoch": 0.005492881850892593, "grad_norm": 1.9655563264370886, "learning_rate": 1.825028968713789e-07, "loss": 0.4911, "step": 316 }, { "epoch": 0.005510264388395418, "grad_norm": 1.4663243613494854, "learning_rate": 1.8308227114716107e-07, "loss": 0.3381, "step": 317 }, { "epoch": 0.005527646925898242, "grad_norm": 1.5875505259652078, "learning_rate": 1.8366164542294322e-07, "loss": 0.2407, "step": 318 }, { "epoch": 0.0055450294634010674, "grad_norm": 3.2683088344811337, "learning_rate": 1.8424101969872537e-07, "loss": 0.3101, "step": 319 }, { "epoch": 0.005562412000903892, "grad_norm": 1.407445915358862, "learning_rate": 1.8482039397450752e-07, "loss": 0.3216, "step": 320 }, { "epoch": 0.005579794538406717, "grad_norm": 3.3669155051872197, "learning_rate": 1.853997682502897e-07, "loss": 0.3206, "step": 321 }, { "epoch": 0.005597177075909541, "grad_norm": 1.9977372589679792, "learning_rate": 1.8597914252607182e-07, "loss": 0.3808, "step": 322 }, { "epoch": 0.005614559613412366, "grad_norm": 1.6805174735131312, "learning_rate": 1.86558516801854e-07, "loss": 0.3072, "step": 323 }, { "epoch": 0.005631942150915191, "grad_norm": 7.301157408526554, "learning_rate": 1.8713789107763615e-07, "loss": 0.3373, "step": 324 }, { "epoch": 0.005649324688418015, "grad_norm": 1.9498608977419474, "learning_rate": 1.877172653534183e-07, "loss": 0.6147, "step": 325 }, { "epoch": 0.00566670722592084, "grad_norm": 3.3870334372475237, "learning_rate": 1.8829663962920045e-07, "loss": 0.2289, "step": 326 }, { "epoch": 0.005684089763423664, "grad_norm": 1.3577215926250588, "learning_rate": 1.8887601390498262e-07, "loss": 0.2854, "step": 327 }, { "epoch": 0.005701472300926489, "grad_norm": 2.0373125753077552, "learning_rate": 1.8945538818076475e-07, "loss": 0.4037, "step": 328 }, { "epoch": 0.005718854838429314, "grad_norm": 2.4334376383785705, "learning_rate": 1.9003476245654692e-07, "loss": 0.3849, "step": 329 }, { "epoch": 0.005736237375932139, "grad_norm": 2.2978541034648523, "learning_rate": 1.9061413673232907e-07, "loss": 0.3732, "step": 330 }, { "epoch": 0.005753619913434963, "grad_norm": 1.6761210907617583, "learning_rate": 1.9119351100811122e-07, "loss": 0.3119, "step": 331 }, { "epoch": 0.005771002450937788, "grad_norm": 2.0993103117071654, "learning_rate": 1.9177288528389337e-07, "loss": 0.3349, "step": 332 }, { "epoch": 0.005788384988440613, "grad_norm": 2.542116254860645, "learning_rate": 1.9235225955967555e-07, "loss": 0.6087, "step": 333 }, { "epoch": 0.005805767525943437, "grad_norm": 1.3178071492145513, "learning_rate": 1.9293163383545768e-07, "loss": 0.5523, "step": 334 }, { "epoch": 0.005823150063446262, "grad_norm": 2.831264476978753, "learning_rate": 1.9351100811123985e-07, "loss": 0.3615, "step": 335 }, { "epoch": 0.005840532600949086, "grad_norm": 1.4385124322299823, "learning_rate": 1.94090382387022e-07, "loss": 0.4752, "step": 336 }, { "epoch": 0.005857915138451911, "grad_norm": 1.8848610252711446, "learning_rate": 1.9466975666280415e-07, "loss": 0.4222, "step": 337 }, { "epoch": 0.005875297675954736, "grad_norm": 1.9701670458496698, "learning_rate": 1.9524913093858633e-07, "loss": 0.2714, "step": 338 }, { "epoch": 0.005892680213457561, "grad_norm": 2.1935046314563253, "learning_rate": 1.9582850521436848e-07, "loss": 0.4377, "step": 339 }, { "epoch": 0.005910062750960385, "grad_norm": 1.2580163155467552, "learning_rate": 1.9640787949015063e-07, "loss": 0.313, "step": 340 }, { "epoch": 0.00592744528846321, "grad_norm": 2.0376226086472853, "learning_rate": 1.9698725376593278e-07, "loss": 0.4887, "step": 341 }, { "epoch": 0.005944827825966035, "grad_norm": 2.799250570087003, "learning_rate": 1.9756662804171496e-07, "loss": 0.3196, "step": 342 }, { "epoch": 0.005962210363468859, "grad_norm": 3.0968961391888303, "learning_rate": 1.9814600231749708e-07, "loss": 0.3002, "step": 343 }, { "epoch": 0.005979592900971684, "grad_norm": 3.65611848593727, "learning_rate": 1.9872537659327926e-07, "loss": 0.4178, "step": 344 }, { "epoch": 0.005996975438474508, "grad_norm": 1.3471993854809439, "learning_rate": 1.993047508690614e-07, "loss": 0.2639, "step": 345 }, { "epoch": 0.006014357975977333, "grad_norm": 2.6795146277449895, "learning_rate": 1.9988412514484356e-07, "loss": 0.7131, "step": 346 }, { "epoch": 0.0060317405134801575, "grad_norm": 1.595419719906898, "learning_rate": 2.004634994206257e-07, "loss": 0.4676, "step": 347 }, { "epoch": 0.006049123050982983, "grad_norm": 2.194990160179039, "learning_rate": 2.0104287369640788e-07, "loss": 0.3759, "step": 348 }, { "epoch": 0.006066505588485807, "grad_norm": 2.5255459257813015, "learning_rate": 2.0162224797219e-07, "loss": 0.4432, "step": 349 }, { "epoch": 0.006083888125988632, "grad_norm": 1.7892383561052951, "learning_rate": 2.0220162224797218e-07, "loss": 0.3098, "step": 350 }, { "epoch": 0.006101270663491457, "grad_norm": 1.4754551104849878, "learning_rate": 2.0278099652375433e-07, "loss": 0.4599, "step": 351 }, { "epoch": 0.006118653200994281, "grad_norm": 2.958384111864428, "learning_rate": 2.0336037079953648e-07, "loss": 0.3479, "step": 352 }, { "epoch": 0.006136035738497106, "grad_norm": 1.3982633033413812, "learning_rate": 2.0393974507531863e-07, "loss": 0.5061, "step": 353 }, { "epoch": 0.00615341827599993, "grad_norm": 2.122397166800289, "learning_rate": 2.045191193511008e-07, "loss": 0.5345, "step": 354 }, { "epoch": 0.006170800813502755, "grad_norm": 2.094100360221985, "learning_rate": 2.0509849362688293e-07, "loss": 0.2004, "step": 355 }, { "epoch": 0.0061881833510055795, "grad_norm": 3.2446003049693926, "learning_rate": 2.056778679026651e-07, "loss": 0.3686, "step": 356 }, { "epoch": 0.0062055658885084046, "grad_norm": 2.1462297907227117, "learning_rate": 2.0625724217844726e-07, "loss": 0.3782, "step": 357 }, { "epoch": 0.006222948426011229, "grad_norm": 2.3834821119256624, "learning_rate": 2.0683661645422944e-07, "loss": 0.4953, "step": 358 }, { "epoch": 0.006240330963514054, "grad_norm": 2.9506618058909893, "learning_rate": 2.0741599073001159e-07, "loss": 0.2791, "step": 359 }, { "epoch": 0.006257713501016879, "grad_norm": 1.6840527201515527, "learning_rate": 2.0799536500579374e-07, "loss": 0.3102, "step": 360 }, { "epoch": 0.006275096038519703, "grad_norm": 1.59113809079084, "learning_rate": 2.085747392815759e-07, "loss": 0.346, "step": 361 }, { "epoch": 0.006292478576022528, "grad_norm": 2.8911423161774166, "learning_rate": 2.0915411355735804e-07, "loss": 0.5946, "step": 362 }, { "epoch": 0.006309861113525352, "grad_norm": 1.7742999420747285, "learning_rate": 2.097334878331402e-07, "loss": 0.5088, "step": 363 }, { "epoch": 0.006327243651028177, "grad_norm": 1.8971167740382415, "learning_rate": 2.1031286210892236e-07, "loss": 0.4649, "step": 364 }, { "epoch": 0.0063446261885310015, "grad_norm": 1.4882910464821006, "learning_rate": 2.1089223638470451e-07, "loss": 0.4882, "step": 365 }, { "epoch": 0.0063620087260338265, "grad_norm": 2.3791624984588413, "learning_rate": 2.1147161066048666e-07, "loss": 0.376, "step": 366 }, { "epoch": 0.006379391263536651, "grad_norm": 1.8505964837890754, "learning_rate": 2.1205098493626884e-07, "loss": 0.3069, "step": 367 }, { "epoch": 0.006396773801039476, "grad_norm": 2.1849025028156435, "learning_rate": 2.1263035921205096e-07, "loss": 0.4394, "step": 368 }, { "epoch": 0.006414156338542301, "grad_norm": 2.6608751900977956, "learning_rate": 2.1320973348783314e-07, "loss": 0.5209, "step": 369 }, { "epoch": 0.006431538876045125, "grad_norm": 2.183591879633907, "learning_rate": 2.137891077636153e-07, "loss": 0.2935, "step": 370 }, { "epoch": 0.00644892141354795, "grad_norm": 2.4812103147880835, "learning_rate": 2.1436848203939744e-07, "loss": 0.2623, "step": 371 }, { "epoch": 0.006466303951050774, "grad_norm": 2.784029384278244, "learning_rate": 2.149478563151796e-07, "loss": 0.5239, "step": 372 }, { "epoch": 0.006483686488553599, "grad_norm": 3.3144694316491763, "learning_rate": 2.1552723059096177e-07, "loss": 0.4435, "step": 373 }, { "epoch": 0.0065010690260564235, "grad_norm": 1.706174410854859, "learning_rate": 2.161066048667439e-07, "loss": 0.2334, "step": 374 }, { "epoch": 0.0065184515635592485, "grad_norm": 2.7023032358159185, "learning_rate": 2.1668597914252607e-07, "loss": 0.3328, "step": 375 }, { "epoch": 0.006535834101062073, "grad_norm": 1.7391131071001489, "learning_rate": 2.1726535341830822e-07, "loss": 0.3645, "step": 376 }, { "epoch": 0.006553216638564898, "grad_norm": 2.687180057640234, "learning_rate": 2.1784472769409037e-07, "loss": 0.6985, "step": 377 }, { "epoch": 0.006570599176067723, "grad_norm": 1.684165499720292, "learning_rate": 2.1842410196987252e-07, "loss": 0.3189, "step": 378 }, { "epoch": 0.006587981713570547, "grad_norm": 1.5426900611139625, "learning_rate": 2.190034762456547e-07, "loss": 0.3246, "step": 379 }, { "epoch": 0.006605364251073372, "grad_norm": 2.2299233342308313, "learning_rate": 2.1958285052143684e-07, "loss": 0.2104, "step": 380 }, { "epoch": 0.006622746788576196, "grad_norm": 2.205123303117038, "learning_rate": 2.20162224797219e-07, "loss": 0.3747, "step": 381 }, { "epoch": 0.006640129326079021, "grad_norm": 2.3382667123230645, "learning_rate": 2.2074159907300117e-07, "loss": 0.3922, "step": 382 }, { "epoch": 0.006657511863581845, "grad_norm": 2.1527138540386197, "learning_rate": 2.213209733487833e-07, "loss": 0.3595, "step": 383 }, { "epoch": 0.0066748944010846705, "grad_norm": 1.8018730872841966, "learning_rate": 2.2190034762456547e-07, "loss": 0.353, "step": 384 }, { "epoch": 0.006692276938587495, "grad_norm": 3.2967311407571596, "learning_rate": 2.2247972190034762e-07, "loss": 0.5815, "step": 385 }, { "epoch": 0.00670965947609032, "grad_norm": 1.6549711720525528, "learning_rate": 2.2305909617612977e-07, "loss": 0.3499, "step": 386 }, { "epoch": 0.006727042013593145, "grad_norm": 2.4351357333462027, "learning_rate": 2.2363847045191192e-07, "loss": 0.324, "step": 387 }, { "epoch": 0.006744424551095969, "grad_norm": 4.472046307488736, "learning_rate": 2.242178447276941e-07, "loss": 0.685, "step": 388 }, { "epoch": 0.006761807088598794, "grad_norm": 2.3848376169407612, "learning_rate": 2.2479721900347622e-07, "loss": 0.3638, "step": 389 }, { "epoch": 0.006779189626101618, "grad_norm": 2.444368248022965, "learning_rate": 2.253765932792584e-07, "loss": 0.5562, "step": 390 }, { "epoch": 0.006796572163604443, "grad_norm": 1.7246667787713164, "learning_rate": 2.2595596755504055e-07, "loss": 0.4502, "step": 391 }, { "epoch": 0.006813954701107267, "grad_norm": 2.020346305286941, "learning_rate": 2.265353418308227e-07, "loss": 0.448, "step": 392 }, { "epoch": 0.0068313372386100924, "grad_norm": 1.4259247806076585, "learning_rate": 2.2711471610660485e-07, "loss": 0.3139, "step": 393 }, { "epoch": 0.006848719776112917, "grad_norm": 1.861081567509849, "learning_rate": 2.2769409038238702e-07, "loss": 0.3343, "step": 394 }, { "epoch": 0.006866102313615742, "grad_norm": 2.809248071918871, "learning_rate": 2.2827346465816915e-07, "loss": 0.2914, "step": 395 }, { "epoch": 0.006883484851118566, "grad_norm": 2.435681998323558, "learning_rate": 2.2885283893395132e-07, "loss": 0.441, "step": 396 }, { "epoch": 0.006900867388621391, "grad_norm": 4.309480292964172, "learning_rate": 2.2943221320973348e-07, "loss": 0.3585, "step": 397 }, { "epoch": 0.006918249926124216, "grad_norm": 2.855474690802586, "learning_rate": 2.3001158748551563e-07, "loss": 0.4468, "step": 398 }, { "epoch": 0.00693563246362704, "grad_norm": 1.5526302309110898, "learning_rate": 2.3059096176129778e-07, "loss": 0.4141, "step": 399 }, { "epoch": 0.006953015001129865, "grad_norm": 1.1533660012996187, "learning_rate": 2.3117033603707995e-07, "loss": 0.386, "step": 400 }, { "epoch": 0.006970397538632689, "grad_norm": 3.3370714906042953, "learning_rate": 2.317497103128621e-07, "loss": 0.7839, "step": 401 }, { "epoch": 0.006987780076135514, "grad_norm": 3.913030015870355, "learning_rate": 2.3232908458864425e-07, "loss": 0.633, "step": 402 }, { "epoch": 0.007005162613638339, "grad_norm": 2.2492390164176, "learning_rate": 2.3290845886442643e-07, "loss": 0.5181, "step": 403 }, { "epoch": 0.007022545151141164, "grad_norm": 2.9198411449418553, "learning_rate": 2.3348783314020855e-07, "loss": 0.5089, "step": 404 }, { "epoch": 0.007039927688643988, "grad_norm": 2.022920584012156, "learning_rate": 2.3406720741599073e-07, "loss": 0.4473, "step": 405 }, { "epoch": 0.007057310226146813, "grad_norm": 3.1706066704360287, "learning_rate": 2.3464658169177288e-07, "loss": 0.5831, "step": 406 }, { "epoch": 0.007074692763649638, "grad_norm": 2.8590057840522127, "learning_rate": 2.3522595596755503e-07, "loss": 0.4328, "step": 407 }, { "epoch": 0.007092075301152462, "grad_norm": 1.4834415352127472, "learning_rate": 2.3580533024333718e-07, "loss": 0.2647, "step": 408 }, { "epoch": 0.007109457838655287, "grad_norm": 1.9318473545257453, "learning_rate": 2.3638470451911936e-07, "loss": 0.4356, "step": 409 }, { "epoch": 0.007126840376158111, "grad_norm": 2.4624643410546105, "learning_rate": 2.3696407879490148e-07, "loss": 0.3916, "step": 410 }, { "epoch": 0.007144222913660936, "grad_norm": 2.6850683069665964, "learning_rate": 2.3754345307068366e-07, "loss": 0.4476, "step": 411 }, { "epoch": 0.007161605451163761, "grad_norm": 3.9945087157564374, "learning_rate": 2.381228273464658e-07, "loss": 0.5126, "step": 412 }, { "epoch": 0.007178987988666586, "grad_norm": 1.3285958723768176, "learning_rate": 2.3870220162224796e-07, "loss": 0.2834, "step": 413 }, { "epoch": 0.00719637052616941, "grad_norm": 1.8634385627975296, "learning_rate": 2.392815758980301e-07, "loss": 0.5657, "step": 414 }, { "epoch": 0.007213753063672235, "grad_norm": 2.1441873510662943, "learning_rate": 2.3986095017381226e-07, "loss": 0.4609, "step": 415 }, { "epoch": 0.00723113560117506, "grad_norm": 2.2994380246840374, "learning_rate": 2.404403244495944e-07, "loss": 0.5432, "step": 416 }, { "epoch": 0.007248518138677884, "grad_norm": 2.6638136434575137, "learning_rate": 2.410196987253766e-07, "loss": 0.449, "step": 417 }, { "epoch": 0.007265900676180709, "grad_norm": 3.0993516340290435, "learning_rate": 2.415990730011587e-07, "loss": 0.4274, "step": 418 }, { "epoch": 0.007283283213683533, "grad_norm": 4.874674806894065, "learning_rate": 2.421784472769409e-07, "loss": 0.594, "step": 419 }, { "epoch": 0.007300665751186358, "grad_norm": 2.3387610181035776, "learning_rate": 2.4275782155272306e-07, "loss": 0.555, "step": 420 }, { "epoch": 0.0073180482886891825, "grad_norm": 1.8776084964953152, "learning_rate": 2.433371958285052e-07, "loss": 0.2862, "step": 421 }, { "epoch": 0.007335430826192008, "grad_norm": 1.956531870736571, "learning_rate": 2.4391657010428736e-07, "loss": 0.5375, "step": 422 }, { "epoch": 0.007352813363694832, "grad_norm": 2.5167462818134205, "learning_rate": 2.444959443800695e-07, "loss": 0.2231, "step": 423 }, { "epoch": 0.007370195901197657, "grad_norm": 1.7407538907898101, "learning_rate": 2.4507531865585166e-07, "loss": 0.3604, "step": 424 }, { "epoch": 0.007387578438700482, "grad_norm": 1.8121902323340593, "learning_rate": 2.456546929316338e-07, "loss": 0.3699, "step": 425 }, { "epoch": 0.007404960976203306, "grad_norm": 1.745630085563704, "learning_rate": 2.46234067207416e-07, "loss": 0.273, "step": 426 }, { "epoch": 0.007422343513706131, "grad_norm": 2.1799181495650886, "learning_rate": 2.468134414831981e-07, "loss": 0.5183, "step": 427 }, { "epoch": 0.007439726051208955, "grad_norm": 4.369582576468998, "learning_rate": 2.473928157589803e-07, "loss": 0.9498, "step": 428 }, { "epoch": 0.00745710858871178, "grad_norm": 2.7222037442115625, "learning_rate": 2.4797219003476246e-07, "loss": 0.4616, "step": 429 }, { "epoch": 0.0074744911262146045, "grad_norm": 1.3985518029012403, "learning_rate": 2.485515643105446e-07, "loss": 0.3686, "step": 430 }, { "epoch": 0.0074918736637174296, "grad_norm": 1.7677475095255009, "learning_rate": 2.4913093858632676e-07, "loss": 0.3866, "step": 431 }, { "epoch": 0.007509256201220254, "grad_norm": 2.3571047833730496, "learning_rate": 2.497103128621089e-07, "loss": 0.3447, "step": 432 }, { "epoch": 0.007526638738723079, "grad_norm": 1.4389404381235618, "learning_rate": 2.5028968713789106e-07, "loss": 0.2375, "step": 433 }, { "epoch": 0.007544021276225904, "grad_norm": 4.415072104420103, "learning_rate": 2.508690614136732e-07, "loss": 0.4975, "step": 434 }, { "epoch": 0.007561403813728728, "grad_norm": 4.058024630445556, "learning_rate": 2.514484356894554e-07, "loss": 0.5452, "step": 435 }, { "epoch": 0.007578786351231553, "grad_norm": 1.4913744174288355, "learning_rate": 2.520278099652375e-07, "loss": 0.2523, "step": 436 }, { "epoch": 0.007596168888734377, "grad_norm": 1.7308150119794614, "learning_rate": 2.5260718424101966e-07, "loss": 0.4164, "step": 437 }, { "epoch": 0.007613551426237202, "grad_norm": 1.9468970739955367, "learning_rate": 2.531865585168018e-07, "loss": 0.3978, "step": 438 }, { "epoch": 0.0076309339637400265, "grad_norm": 1.8506335013125876, "learning_rate": 2.53765932792584e-07, "loss": 0.5546, "step": 439 }, { "epoch": 0.0076483165012428515, "grad_norm": 1.897501872542368, "learning_rate": 2.5434530706836617e-07, "loss": 0.5509, "step": 440 }, { "epoch": 0.007665699038745676, "grad_norm": 1.9612389653951712, "learning_rate": 2.549246813441483e-07, "loss": 0.4666, "step": 441 }, { "epoch": 0.007683081576248501, "grad_norm": 5.866960080529335, "learning_rate": 2.555040556199304e-07, "loss": 0.4989, "step": 442 }, { "epoch": 0.007700464113751326, "grad_norm": 1.7131237899608003, "learning_rate": 2.560834298957126e-07, "loss": 0.4839, "step": 443 }, { "epoch": 0.00771784665125415, "grad_norm": 5.429599137340769, "learning_rate": 2.5666280417149477e-07, "loss": 0.621, "step": 444 }, { "epoch": 0.007735229188756975, "grad_norm": 1.7859721525202918, "learning_rate": 2.572421784472769e-07, "loss": 0.5833, "step": 445 }, { "epoch": 0.007752611726259799, "grad_norm": 1.7349360438898331, "learning_rate": 2.578215527230591e-07, "loss": 0.341, "step": 446 }, { "epoch": 0.007769994263762624, "grad_norm": 1.276947701266211, "learning_rate": 2.5840092699884127e-07, "loss": 0.3293, "step": 447 }, { "epoch": 0.0077873768012654485, "grad_norm": 2.0801399276263353, "learning_rate": 2.5898030127462337e-07, "loss": 0.4658, "step": 448 }, { "epoch": 0.0078047593387682735, "grad_norm": 2.1332883335402864, "learning_rate": 2.595596755504055e-07, "loss": 0.3683, "step": 449 }, { "epoch": 0.007822141876271099, "grad_norm": 1.1597654669897943, "learning_rate": 2.601390498261877e-07, "loss": 0.2431, "step": 450 }, { "epoch": 0.007839524413773922, "grad_norm": 1.6486878207181157, "learning_rate": 2.6071842410196987e-07, "loss": 0.4196, "step": 451 }, { "epoch": 0.007856906951276747, "grad_norm": 1.6672914830941614, "learning_rate": 2.61297798377752e-07, "loss": 0.2822, "step": 452 }, { "epoch": 0.007874289488779572, "grad_norm": 18.356756817023204, "learning_rate": 2.6187717265353417e-07, "loss": 0.5309, "step": 453 }, { "epoch": 0.007891672026282397, "grad_norm": 1.6015265277274011, "learning_rate": 2.624565469293163e-07, "loss": 0.3282, "step": 454 }, { "epoch": 0.007909054563785222, "grad_norm": 1.515963593086846, "learning_rate": 2.6303592120509847e-07, "loss": 0.2875, "step": 455 }, { "epoch": 0.007926437101288045, "grad_norm": 1.8591172407846122, "learning_rate": 2.636152954808806e-07, "loss": 0.4207, "step": 456 }, { "epoch": 0.00794381963879087, "grad_norm": 1.6992357376681142, "learning_rate": 2.6419466975666277e-07, "loss": 0.4859, "step": 457 }, { "epoch": 0.007961202176293695, "grad_norm": 1.7380460632011288, "learning_rate": 2.64774044032445e-07, "loss": 0.4169, "step": 458 }, { "epoch": 0.00797858471379652, "grad_norm": 1.9186437644069372, "learning_rate": 2.653534183082271e-07, "loss": 0.2182, "step": 459 }, { "epoch": 0.007995967251299344, "grad_norm": 1.1374155826525274, "learning_rate": 2.659327925840092e-07, "loss": 0.467, "step": 460 }, { "epoch": 0.008013349788802169, "grad_norm": 1.3485639405551804, "learning_rate": 2.6651216685979137e-07, "loss": 0.2321, "step": 461 }, { "epoch": 0.008030732326304994, "grad_norm": 1.9480767531832572, "learning_rate": 2.670915411355736e-07, "loss": 0.2236, "step": 462 }, { "epoch": 0.008048114863807819, "grad_norm": 1.7961679941920234, "learning_rate": 2.676709154113557e-07, "loss": 0.2062, "step": 463 }, { "epoch": 0.008065497401310644, "grad_norm": 1.8465194968556942, "learning_rate": 2.682502896871379e-07, "loss": 0.3702, "step": 464 }, { "epoch": 0.008082879938813467, "grad_norm": 2.1280209558988865, "learning_rate": 2.688296639629201e-07, "loss": 0.2956, "step": 465 }, { "epoch": 0.008100262476316292, "grad_norm": 1.6090110262289266, "learning_rate": 2.694090382387022e-07, "loss": 0.431, "step": 466 }, { "epoch": 0.008117645013819117, "grad_norm": 1.9425108795081092, "learning_rate": 2.699884125144843e-07, "loss": 0.7783, "step": 467 }, { "epoch": 0.008135027551321942, "grad_norm": 3.368740840030252, "learning_rate": 2.705677867902665e-07, "loss": 0.4325, "step": 468 }, { "epoch": 0.008152410088824766, "grad_norm": 1.8123416294343568, "learning_rate": 2.711471610660487e-07, "loss": 0.4437, "step": 469 }, { "epoch": 0.00816979262632759, "grad_norm": 2.5777163278135804, "learning_rate": 2.7172653534183083e-07, "loss": 0.5786, "step": 470 }, { "epoch": 0.008187175163830416, "grad_norm": 2.0659646154176543, "learning_rate": 2.72305909617613e-07, "loss": 0.3925, "step": 471 }, { "epoch": 0.008204557701333241, "grad_norm": 1.9267339006885997, "learning_rate": 2.728852838933951e-07, "loss": 0.4112, "step": 472 }, { "epoch": 0.008221940238836066, "grad_norm": 1.7568522064374985, "learning_rate": 2.734646581691773e-07, "loss": 0.2946, "step": 473 }, { "epoch": 0.00823932277633889, "grad_norm": 1.7244249415899944, "learning_rate": 2.7404403244495943e-07, "loss": 0.4072, "step": 474 }, { "epoch": 0.008256705313841714, "grad_norm": 1.6707409502648944, "learning_rate": 2.746234067207416e-07, "loss": 0.4323, "step": 475 }, { "epoch": 0.00827408785134454, "grad_norm": 2.5100986125066673, "learning_rate": 2.7520278099652373e-07, "loss": 0.3892, "step": 476 }, { "epoch": 0.008291470388847364, "grad_norm": 1.7702106790742362, "learning_rate": 2.7578215527230593e-07, "loss": 0.557, "step": 477 }, { "epoch": 0.008308852926350188, "grad_norm": 2.859147509603228, "learning_rate": 2.763615295480881e-07, "loss": 0.5283, "step": 478 }, { "epoch": 0.008326235463853013, "grad_norm": 1.6171775224753089, "learning_rate": 2.769409038238702e-07, "loss": 0.3547, "step": 479 }, { "epoch": 0.008343618001355838, "grad_norm": 1.0649618321055663, "learning_rate": 2.7752027809965233e-07, "loss": 0.3309, "step": 480 }, { "epoch": 0.008361000538858663, "grad_norm": 2.3367307628638443, "learning_rate": 2.7809965237543453e-07, "loss": 0.4946, "step": 481 }, { "epoch": 0.008378383076361488, "grad_norm": 2.816356446154258, "learning_rate": 2.786790266512167e-07, "loss": 0.4191, "step": 482 }, { "epoch": 0.008395765613864311, "grad_norm": 2.5592070089974794, "learning_rate": 2.7925840092699883e-07, "loss": 0.3862, "step": 483 }, { "epoch": 0.008413148151367136, "grad_norm": 4.760214339227788, "learning_rate": 2.79837775202781e-07, "loss": 0.5391, "step": 484 }, { "epoch": 0.008430530688869961, "grad_norm": 2.0385975278384754, "learning_rate": 2.8041714947856313e-07, "loss": 0.5296, "step": 485 }, { "epoch": 0.008447913226372786, "grad_norm": 2.3344147975933645, "learning_rate": 2.809965237543453e-07, "loss": 0.3819, "step": 486 }, { "epoch": 0.00846529576387561, "grad_norm": 2.5260371540086344, "learning_rate": 2.8157589803012743e-07, "loss": 0.4068, "step": 487 }, { "epoch": 0.008482678301378435, "grad_norm": 7.443155300539265, "learning_rate": 2.8215527230590964e-07, "loss": 0.3692, "step": 488 }, { "epoch": 0.00850006083888126, "grad_norm": 1.721080291368544, "learning_rate": 2.827346465816918e-07, "loss": 0.3847, "step": 489 }, { "epoch": 0.008517443376384085, "grad_norm": 2.8410744751849832, "learning_rate": 2.8331402085747394e-07, "loss": 0.679, "step": 490 }, { "epoch": 0.00853482591388691, "grad_norm": 1.9380118102608344, "learning_rate": 2.8389339513325603e-07, "loss": 0.2644, "step": 491 }, { "epoch": 0.008552208451389733, "grad_norm": 3.453990050728586, "learning_rate": 2.8447276940903824e-07, "loss": 0.3463, "step": 492 }, { "epoch": 0.008569590988892558, "grad_norm": 2.093922698845995, "learning_rate": 2.850521436848204e-07, "loss": 0.4057, "step": 493 }, { "epoch": 0.008586973526395383, "grad_norm": 2.7657769486357524, "learning_rate": 2.8563151796060254e-07, "loss": 0.6396, "step": 494 }, { "epoch": 0.008604356063898208, "grad_norm": 1.7745929739196196, "learning_rate": 2.862108922363847e-07, "loss": 0.3805, "step": 495 }, { "epoch": 0.008621738601401032, "grad_norm": 1.742539077615906, "learning_rate": 2.867902665121669e-07, "loss": 0.5039, "step": 496 }, { "epoch": 0.008639121138903857, "grad_norm": 3.380464980388457, "learning_rate": 2.87369640787949e-07, "loss": 0.2839, "step": 497 }, { "epoch": 0.008656503676406682, "grad_norm": 2.307938309999756, "learning_rate": 2.8794901506373114e-07, "loss": 0.3381, "step": 498 }, { "epoch": 0.008673886213909507, "grad_norm": 2.9311017709410683, "learning_rate": 2.885283893395133e-07, "loss": 0.5086, "step": 499 }, { "epoch": 0.008691268751412332, "grad_norm": 1.6566943239073217, "learning_rate": 2.891077636152955e-07, "loss": 0.2716, "step": 500 }, { "epoch": 0.008708651288915155, "grad_norm": 2.115010823374564, "learning_rate": 2.8968713789107764e-07, "loss": 0.3335, "step": 501 }, { "epoch": 0.00872603382641798, "grad_norm": 4.7031433074023345, "learning_rate": 2.902665121668598e-07, "loss": 0.365, "step": 502 }, { "epoch": 0.008743416363920805, "grad_norm": 3.224729655678973, "learning_rate": 2.908458864426419e-07, "loss": 0.4049, "step": 503 }, { "epoch": 0.00876079890142363, "grad_norm": 1.5108665740980096, "learning_rate": 2.914252607184241e-07, "loss": 0.2711, "step": 504 }, { "epoch": 0.008778181438926454, "grad_norm": 3.3981123563850018, "learning_rate": 2.9200463499420624e-07, "loss": 0.4356, "step": 505 }, { "epoch": 0.008795563976429279, "grad_norm": 2.082844494457908, "learning_rate": 2.925840092699884e-07, "loss": 0.5018, "step": 506 }, { "epoch": 0.008812946513932104, "grad_norm": 1.8401079187244365, "learning_rate": 2.931633835457706e-07, "loss": 0.3098, "step": 507 }, { "epoch": 0.008830329051434929, "grad_norm": 1.6774780398438147, "learning_rate": 2.9374275782155274e-07, "loss": 0.316, "step": 508 }, { "epoch": 0.008847711588937754, "grad_norm": 4.900839057801756, "learning_rate": 2.9432213209733484e-07, "loss": 0.77, "step": 509 }, { "epoch": 0.008865094126440577, "grad_norm": 2.1311741401927695, "learning_rate": 2.94901506373117e-07, "loss": 0.2492, "step": 510 }, { "epoch": 0.008882476663943402, "grad_norm": 2.370549703461567, "learning_rate": 2.954808806488992e-07, "loss": 0.6201, "step": 511 }, { "epoch": 0.008899859201446227, "grad_norm": 2.6207210809115966, "learning_rate": 2.9606025492468134e-07, "loss": 0.7113, "step": 512 }, { "epoch": 0.008917241738949052, "grad_norm": 1.7103606364399149, "learning_rate": 2.966396292004635e-07, "loss": 0.244, "step": 513 }, { "epoch": 0.008934624276451876, "grad_norm": 1.1572488430253631, "learning_rate": 2.9721900347624564e-07, "loss": 0.2347, "step": 514 }, { "epoch": 0.0089520068139547, "grad_norm": 2.9591319756772076, "learning_rate": 2.977983777520278e-07, "loss": 0.262, "step": 515 }, { "epoch": 0.008969389351457526, "grad_norm": 2.8788944654856214, "learning_rate": 2.9837775202780994e-07, "loss": 0.421, "step": 516 }, { "epoch": 0.00898677188896035, "grad_norm": 2.0697560721496204, "learning_rate": 2.989571263035921e-07, "loss": 0.2478, "step": 517 }, { "epoch": 0.009004154426463176, "grad_norm": 1.8963381761089741, "learning_rate": 2.9953650057937425e-07, "loss": 0.3342, "step": 518 }, { "epoch": 0.009021536963966, "grad_norm": 2.1103399424311715, "learning_rate": 3.0011587485515645e-07, "loss": 0.356, "step": 519 }, { "epoch": 0.009038919501468824, "grad_norm": 3.617256079114626, "learning_rate": 3.006952491309386e-07, "loss": 0.4604, "step": 520 }, { "epoch": 0.00905630203897165, "grad_norm": 3.693982426149102, "learning_rate": 3.012746234067207e-07, "loss": 0.3117, "step": 521 }, { "epoch": 0.009073684576474474, "grad_norm": 2.167440757563945, "learning_rate": 3.0185399768250285e-07, "loss": 0.2902, "step": 522 }, { "epoch": 0.009091067113977298, "grad_norm": 2.709262315956423, "learning_rate": 3.0243337195828505e-07, "loss": 0.3937, "step": 523 }, { "epoch": 0.009108449651480123, "grad_norm": 1.9428995131732878, "learning_rate": 3.030127462340672e-07, "loss": 0.4167, "step": 524 }, { "epoch": 0.009125832188982948, "grad_norm": 1.8109969487666777, "learning_rate": 3.0359212050984935e-07, "loss": 0.6071, "step": 525 }, { "epoch": 0.009143214726485773, "grad_norm": 2.6317209343698496, "learning_rate": 3.041714947856315e-07, "loss": 0.4866, "step": 526 }, { "epoch": 0.009160597263988598, "grad_norm": 1.781499236846585, "learning_rate": 3.0475086906141365e-07, "loss": 0.5441, "step": 527 }, { "epoch": 0.009177979801491421, "grad_norm": 2.0761745505626434, "learning_rate": 3.053302433371958e-07, "loss": 0.3411, "step": 528 }, { "epoch": 0.009195362338994246, "grad_norm": 1.9909568270823206, "learning_rate": 3.0590961761297795e-07, "loss": 0.2583, "step": 529 }, { "epoch": 0.009212744876497071, "grad_norm": 3.4176621248151404, "learning_rate": 3.0648899188876015e-07, "loss": 0.8055, "step": 530 }, { "epoch": 0.009230127413999896, "grad_norm": 2.608304163234406, "learning_rate": 3.070683661645423e-07, "loss": 0.7547, "step": 531 }, { "epoch": 0.00924750995150272, "grad_norm": 2.5575379023753086, "learning_rate": 3.0764774044032445e-07, "loss": 0.2283, "step": 532 }, { "epoch": 0.009264892489005545, "grad_norm": 4.404513085668433, "learning_rate": 3.0822711471610655e-07, "loss": 0.4701, "step": 533 }, { "epoch": 0.00928227502650837, "grad_norm": 1.710000669980159, "learning_rate": 3.0880648899188875e-07, "loss": 0.2481, "step": 534 }, { "epoch": 0.009299657564011195, "grad_norm": 1.8815695968546138, "learning_rate": 3.093858632676709e-07, "loss": 0.649, "step": 535 }, { "epoch": 0.00931704010151402, "grad_norm": 2.9638199197011383, "learning_rate": 3.0996523754345305e-07, "loss": 0.5011, "step": 536 }, { "epoch": 0.009334422639016843, "grad_norm": 2.213678849038141, "learning_rate": 3.105446118192352e-07, "loss": 0.2936, "step": 537 }, { "epoch": 0.009351805176519668, "grad_norm": 2.188000978080385, "learning_rate": 3.111239860950174e-07, "loss": 0.2136, "step": 538 }, { "epoch": 0.009369187714022493, "grad_norm": 2.269706913165005, "learning_rate": 3.117033603707995e-07, "loss": 0.4208, "step": 539 }, { "epoch": 0.009386570251525318, "grad_norm": 2.3054169767222366, "learning_rate": 3.1228273464658165e-07, "loss": 0.4295, "step": 540 }, { "epoch": 0.009403952789028142, "grad_norm": 2.07421464325583, "learning_rate": 3.128621089223638e-07, "loss": 0.5654, "step": 541 }, { "epoch": 0.009421335326530967, "grad_norm": 1.4720572806801315, "learning_rate": 3.13441483198146e-07, "loss": 0.4373, "step": 542 }, { "epoch": 0.009438717864033792, "grad_norm": 1.5396798294773806, "learning_rate": 3.1402085747392816e-07, "loss": 0.3145, "step": 543 }, { "epoch": 0.009456100401536617, "grad_norm": 1.0978979501953001, "learning_rate": 3.146002317497103e-07, "loss": 0.1632, "step": 544 }, { "epoch": 0.009473482939039442, "grad_norm": 2.122524416599182, "learning_rate": 3.151796060254924e-07, "loss": 0.3327, "step": 545 }, { "epoch": 0.009490865476542265, "grad_norm": 1.3251859220948963, "learning_rate": 3.157589803012746e-07, "loss": 0.4248, "step": 546 }, { "epoch": 0.00950824801404509, "grad_norm": 3.5825308107820026, "learning_rate": 3.1633835457705676e-07, "loss": 0.5906, "step": 547 }, { "epoch": 0.009525630551547915, "grad_norm": 1.6820829404645687, "learning_rate": 3.169177288528389e-07, "loss": 0.5449, "step": 548 }, { "epoch": 0.00954301308905074, "grad_norm": 2.636019034133382, "learning_rate": 3.174971031286211e-07, "loss": 0.384, "step": 549 }, { "epoch": 0.009560395626553564, "grad_norm": 1.6351773756563754, "learning_rate": 3.1807647740440326e-07, "loss": 0.2838, "step": 550 }, { "epoch": 0.009577778164056389, "grad_norm": 3.067668163908917, "learning_rate": 3.186558516801854e-07, "loss": 0.6192, "step": 551 }, { "epoch": 0.009595160701559214, "grad_norm": 2.6945576588057434, "learning_rate": 3.192352259559675e-07, "loss": 0.2563, "step": 552 }, { "epoch": 0.009612543239062039, "grad_norm": 1.060301724324767, "learning_rate": 3.198146002317497e-07, "loss": 0.2359, "step": 553 }, { "epoch": 0.009629925776564864, "grad_norm": 1.6076060750302006, "learning_rate": 3.2039397450753186e-07, "loss": 0.4549, "step": 554 }, { "epoch": 0.009647308314067687, "grad_norm": 1.7760208825905894, "learning_rate": 3.20973348783314e-07, "loss": 0.4554, "step": 555 }, { "epoch": 0.009664690851570512, "grad_norm": 1.5063134905677231, "learning_rate": 3.2155272305909616e-07, "loss": 0.387, "step": 556 }, { "epoch": 0.009682073389073337, "grad_norm": 2.652553856621329, "learning_rate": 3.2213209733487836e-07, "loss": 0.61, "step": 557 }, { "epoch": 0.009699455926576162, "grad_norm": 1.755644518095514, "learning_rate": 3.2271147161066046e-07, "loss": 0.3033, "step": 558 }, { "epoch": 0.009716838464078986, "grad_norm": 1.7079496586808385, "learning_rate": 3.232908458864426e-07, "loss": 0.496, "step": 559 }, { "epoch": 0.00973422100158181, "grad_norm": 1.5605814408001912, "learning_rate": 3.2387022016222476e-07, "loss": 0.2669, "step": 560 }, { "epoch": 0.009751603539084636, "grad_norm": 2.1670865663797803, "learning_rate": 3.2444959443800696e-07, "loss": 0.3067, "step": 561 }, { "epoch": 0.00976898607658746, "grad_norm": 1.66800877188268, "learning_rate": 3.250289687137891e-07, "loss": 0.3347, "step": 562 }, { "epoch": 0.009786368614090286, "grad_norm": 1.5146375661686884, "learning_rate": 3.2560834298957126e-07, "loss": 0.3228, "step": 563 }, { "epoch": 0.009803751151593109, "grad_norm": 6.459563206583111, "learning_rate": 3.2618771726535336e-07, "loss": 0.323, "step": 564 }, { "epoch": 0.009821133689095934, "grad_norm": 2.8381826564739687, "learning_rate": 3.2676709154113556e-07, "loss": 0.4583, "step": 565 }, { "epoch": 0.009838516226598759, "grad_norm": 2.63114695940773, "learning_rate": 3.273464658169177e-07, "loss": 0.3696, "step": 566 }, { "epoch": 0.009855898764101584, "grad_norm": 2.3287539437320808, "learning_rate": 3.2792584009269986e-07, "loss": 0.2043, "step": 567 }, { "epoch": 0.009873281301604407, "grad_norm": 2.18405687969443, "learning_rate": 3.28505214368482e-07, "loss": 0.3271, "step": 568 }, { "epoch": 0.009890663839107233, "grad_norm": 1.6601072368669514, "learning_rate": 3.290845886442642e-07, "loss": 0.4071, "step": 569 }, { "epoch": 0.009908046376610058, "grad_norm": 3.5434910370317643, "learning_rate": 3.296639629200463e-07, "loss": 0.4807, "step": 570 }, { "epoch": 0.009925428914112883, "grad_norm": 1.924076572945549, "learning_rate": 3.3024333719582846e-07, "loss": 0.4369, "step": 571 }, { "epoch": 0.009942811451615708, "grad_norm": 2.2675156463903665, "learning_rate": 3.3082271147161067e-07, "loss": 0.3029, "step": 572 }, { "epoch": 0.009960193989118531, "grad_norm": 2.1021939978648083, "learning_rate": 3.314020857473928e-07, "loss": 0.3405, "step": 573 }, { "epoch": 0.009977576526621356, "grad_norm": 2.3149577758521658, "learning_rate": 3.3198146002317497e-07, "loss": 0.3414, "step": 574 }, { "epoch": 0.009994959064124181, "grad_norm": 2.7299333125912795, "learning_rate": 3.325608342989571e-07, "loss": 0.5566, "step": 575 }, { "epoch": 0.010012341601627006, "grad_norm": 2.25546189158667, "learning_rate": 3.3314020857473927e-07, "loss": 0.5004, "step": 576 }, { "epoch": 0.01002972413912983, "grad_norm": 2.1592113185336426, "learning_rate": 3.337195828505214e-07, "loss": 0.472, "step": 577 }, { "epoch": 0.010047106676632655, "grad_norm": 2.2368179408822995, "learning_rate": 3.3429895712630357e-07, "loss": 0.3957, "step": 578 }, { "epoch": 0.01006448921413548, "grad_norm": 2.457959923097674, "learning_rate": 3.348783314020857e-07, "loss": 0.3856, "step": 579 }, { "epoch": 0.010081871751638305, "grad_norm": 2.247087347778487, "learning_rate": 3.354577056778679e-07, "loss": 0.3605, "step": 580 }, { "epoch": 0.01009925428914113, "grad_norm": 1.8700542302833931, "learning_rate": 3.3603707995365007e-07, "loss": 0.2546, "step": 581 }, { "epoch": 0.010116636826643953, "grad_norm": 2.8529300346100097, "learning_rate": 3.3661645422943217e-07, "loss": 0.6114, "step": 582 }, { "epoch": 0.010134019364146778, "grad_norm": 1.8886638754463398, "learning_rate": 3.371958285052143e-07, "loss": 0.2607, "step": 583 }, { "epoch": 0.010151401901649603, "grad_norm": 1.8846680285730009, "learning_rate": 3.377752027809965e-07, "loss": 0.3218, "step": 584 }, { "epoch": 0.010168784439152428, "grad_norm": 2.5793811059586034, "learning_rate": 3.3835457705677867e-07, "loss": 0.6508, "step": 585 }, { "epoch": 0.010186166976655251, "grad_norm": 1.547224431227358, "learning_rate": 3.389339513325608e-07, "loss": 0.2821, "step": 586 }, { "epoch": 0.010203549514158076, "grad_norm": 1.254351370900638, "learning_rate": 3.3951332560834297e-07, "loss": 0.2096, "step": 587 }, { "epoch": 0.010220932051660902, "grad_norm": 2.2783617259174584, "learning_rate": 3.400926998841251e-07, "loss": 0.369, "step": 588 }, { "epoch": 0.010238314589163727, "grad_norm": 1.9322457651251292, "learning_rate": 3.4067207415990727e-07, "loss": 0.2816, "step": 589 }, { "epoch": 0.010255697126666552, "grad_norm": 3.1993579743680187, "learning_rate": 3.412514484356894e-07, "loss": 0.3123, "step": 590 }, { "epoch": 0.010273079664169375, "grad_norm": 1.7096226304762459, "learning_rate": 3.418308227114716e-07, "loss": 0.4947, "step": 591 }, { "epoch": 0.0102904622016722, "grad_norm": 2.517768671252262, "learning_rate": 3.424101969872538e-07, "loss": 0.4574, "step": 592 }, { "epoch": 0.010307844739175025, "grad_norm": 2.651615189899926, "learning_rate": 3.429895712630359e-07, "loss": 0.4502, "step": 593 }, { "epoch": 0.01032522727667785, "grad_norm": 4.9735595823952305, "learning_rate": 3.43568945538818e-07, "loss": 0.3991, "step": 594 }, { "epoch": 0.010342609814180673, "grad_norm": 1.6605312817532039, "learning_rate": 3.441483198146002e-07, "loss": 0.4422, "step": 595 }, { "epoch": 0.010359992351683498, "grad_norm": 1.776667276733381, "learning_rate": 3.447276940903824e-07, "loss": 0.419, "step": 596 }, { "epoch": 0.010377374889186324, "grad_norm": 1.8302655750451695, "learning_rate": 3.453070683661645e-07, "loss": 0.5469, "step": 597 }, { "epoch": 0.010394757426689149, "grad_norm": 1.2471700138805075, "learning_rate": 3.458864426419467e-07, "loss": 0.3721, "step": 598 }, { "epoch": 0.010412139964191974, "grad_norm": 1.8389217699906624, "learning_rate": 3.464658169177289e-07, "loss": 0.4561, "step": 599 }, { "epoch": 0.010429522501694797, "grad_norm": 3.2042193355447415, "learning_rate": 3.47045191193511e-07, "loss": 0.5841, "step": 600 }, { "epoch": 0.010446905039197622, "grad_norm": 2.576247063237591, "learning_rate": 3.476245654692931e-07, "loss": 0.4334, "step": 601 }, { "epoch": 0.010464287576700447, "grad_norm": 1.7654781123724215, "learning_rate": 3.482039397450753e-07, "loss": 0.3714, "step": 602 }, { "epoch": 0.010481670114203272, "grad_norm": 15.824869406929327, "learning_rate": 3.487833140208575e-07, "loss": 0.5064, "step": 603 }, { "epoch": 0.010499052651706095, "grad_norm": 2.306036965922114, "learning_rate": 3.4936268829663963e-07, "loss": 0.2747, "step": 604 }, { "epoch": 0.01051643518920892, "grad_norm": 3.3245065879207054, "learning_rate": 3.499420625724218e-07, "loss": 0.4122, "step": 605 }, { "epoch": 0.010533817726711745, "grad_norm": 2.1950170673429015, "learning_rate": 3.505214368482039e-07, "loss": 0.2752, "step": 606 }, { "epoch": 0.01055120026421457, "grad_norm": 2.2426977360389952, "learning_rate": 3.511008111239861e-07, "loss": 0.5621, "step": 607 }, { "epoch": 0.010568582801717396, "grad_norm": 2.552418017222689, "learning_rate": 3.5168018539976823e-07, "loss": 0.4354, "step": 608 }, { "epoch": 0.010585965339220219, "grad_norm": 1.8185955162380707, "learning_rate": 3.522595596755504e-07, "loss": 0.3872, "step": 609 }, { "epoch": 0.010603347876723044, "grad_norm": 2.4463169782571303, "learning_rate": 3.5283893395133253e-07, "loss": 0.5198, "step": 610 }, { "epoch": 0.010620730414225869, "grad_norm": 1.7920952726533126, "learning_rate": 3.5341830822711473e-07, "loss": 0.3888, "step": 611 }, { "epoch": 0.010638112951728694, "grad_norm": 1.8366653165186568, "learning_rate": 3.5399768250289683e-07, "loss": 0.4631, "step": 612 }, { "epoch": 0.010655495489231517, "grad_norm": 2.8426910477800167, "learning_rate": 3.54577056778679e-07, "loss": 0.3402, "step": 613 }, { "epoch": 0.010672878026734342, "grad_norm": 1.9160690695351297, "learning_rate": 3.551564310544612e-07, "loss": 0.376, "step": 614 }, { "epoch": 0.010690260564237167, "grad_norm": 3.4088362126615586, "learning_rate": 3.5573580533024333e-07, "loss": 0.412, "step": 615 }, { "epoch": 0.010707643101739992, "grad_norm": 1.711837970508113, "learning_rate": 3.563151796060255e-07, "loss": 0.2771, "step": 616 }, { "epoch": 0.010725025639242816, "grad_norm": 1.973934403843197, "learning_rate": 3.5689455388180763e-07, "loss": 0.4172, "step": 617 }, { "epoch": 0.01074240817674564, "grad_norm": 1.455278971394672, "learning_rate": 3.574739281575898e-07, "loss": 0.5726, "step": 618 }, { "epoch": 0.010759790714248466, "grad_norm": 3.2005088989848796, "learning_rate": 3.5805330243337193e-07, "loss": 0.2705, "step": 619 }, { "epoch": 0.010777173251751291, "grad_norm": 1.4452722759656753, "learning_rate": 3.586326767091541e-07, "loss": 0.2481, "step": 620 }, { "epoch": 0.010794555789254116, "grad_norm": 2.1676756491457456, "learning_rate": 3.5921205098493623e-07, "loss": 0.4034, "step": 621 }, { "epoch": 0.01081193832675694, "grad_norm": 1.4755603528434205, "learning_rate": 3.5979142526071844e-07, "loss": 0.3705, "step": 622 }, { "epoch": 0.010829320864259764, "grad_norm": 2.5652873897305875, "learning_rate": 3.603707995365006e-07, "loss": 0.4501, "step": 623 }, { "epoch": 0.01084670340176259, "grad_norm": 2.120852697326593, "learning_rate": 3.609501738122827e-07, "loss": 0.5552, "step": 624 }, { "epoch": 0.010864085939265414, "grad_norm": 1.231763365783663, "learning_rate": 3.6152954808806483e-07, "loss": 0.3059, "step": 625 }, { "epoch": 0.010881468476768238, "grad_norm": 1.504368527533019, "learning_rate": 3.6210892236384704e-07, "loss": 0.2565, "step": 626 }, { "epoch": 0.010898851014271063, "grad_norm": 2.014545988061134, "learning_rate": 3.626882966396292e-07, "loss": 0.3642, "step": 627 }, { "epoch": 0.010916233551773888, "grad_norm": 2.185253067183176, "learning_rate": 3.6326767091541134e-07, "loss": 0.2772, "step": 628 }, { "epoch": 0.010933616089276713, "grad_norm": 2.1619328503088058, "learning_rate": 3.638470451911935e-07, "loss": 0.2329, "step": 629 }, { "epoch": 0.010950998626779538, "grad_norm": 1.676496850031014, "learning_rate": 3.644264194669757e-07, "loss": 0.2682, "step": 630 }, { "epoch": 0.010968381164282361, "grad_norm": 2.225921486378188, "learning_rate": 3.650057937427578e-07, "loss": 0.3336, "step": 631 }, { "epoch": 0.010985763701785186, "grad_norm": 1.9911178494047463, "learning_rate": 3.6558516801853994e-07, "loss": 0.47, "step": 632 }, { "epoch": 0.011003146239288011, "grad_norm": 2.662856373869266, "learning_rate": 3.6616454229432214e-07, "loss": 0.7728, "step": 633 }, { "epoch": 0.011020528776790836, "grad_norm": 1.6206296890985878, "learning_rate": 3.667439165701043e-07, "loss": 0.2138, "step": 634 }, { "epoch": 0.01103791131429366, "grad_norm": 1.290261102705079, "learning_rate": 3.6732329084588644e-07, "loss": 0.423, "step": 635 }, { "epoch": 0.011055293851796485, "grad_norm": 2.1986947548485656, "learning_rate": 3.679026651216686e-07, "loss": 0.2515, "step": 636 }, { "epoch": 0.01107267638929931, "grad_norm": 1.6879557456459848, "learning_rate": 3.6848203939745074e-07, "loss": 0.2815, "step": 637 }, { "epoch": 0.011090058926802135, "grad_norm": 3.6267442087647033, "learning_rate": 3.690614136732329e-07, "loss": 0.5761, "step": 638 }, { "epoch": 0.01110744146430496, "grad_norm": 1.9289057461091006, "learning_rate": 3.6964078794901504e-07, "loss": 0.4233, "step": 639 }, { "epoch": 0.011124824001807783, "grad_norm": 2.512400862370963, "learning_rate": 3.702201622247972e-07, "loss": 0.4706, "step": 640 }, { "epoch": 0.011142206539310608, "grad_norm": 3.65604300292617, "learning_rate": 3.707995365005794e-07, "loss": 0.5196, "step": 641 }, { "epoch": 0.011159589076813433, "grad_norm": 3.7254290758834596, "learning_rate": 3.7137891077636154e-07, "loss": 0.3798, "step": 642 }, { "epoch": 0.011176971614316258, "grad_norm": 4.4556005607456095, "learning_rate": 3.7195828505214364e-07, "loss": 0.344, "step": 643 }, { "epoch": 0.011194354151819082, "grad_norm": 2.797678800249918, "learning_rate": 3.725376593279258e-07, "loss": 0.2164, "step": 644 }, { "epoch": 0.011211736689321907, "grad_norm": 3.7880304707711185, "learning_rate": 3.73117033603708e-07, "loss": 0.4697, "step": 645 }, { "epoch": 0.011229119226824732, "grad_norm": 4.559192053722016, "learning_rate": 3.7369640787949015e-07, "loss": 0.3372, "step": 646 }, { "epoch": 0.011246501764327557, "grad_norm": 2.963519172268813, "learning_rate": 3.742757821552723e-07, "loss": 0.3549, "step": 647 }, { "epoch": 0.011263884301830382, "grad_norm": 2.028066117309974, "learning_rate": 3.7485515643105445e-07, "loss": 0.2958, "step": 648 }, { "epoch": 0.011281266839333205, "grad_norm": 1.7696427489280786, "learning_rate": 3.754345307068366e-07, "loss": 0.3478, "step": 649 }, { "epoch": 0.01129864937683603, "grad_norm": 1.9418893128071544, "learning_rate": 3.7601390498261875e-07, "loss": 0.2154, "step": 650 }, { "epoch": 0.011316031914338855, "grad_norm": 1.7403946755524262, "learning_rate": 3.765932792584009e-07, "loss": 0.4071, "step": 651 }, { "epoch": 0.01133341445184168, "grad_norm": 2.023562981181048, "learning_rate": 3.7717265353418305e-07, "loss": 0.5651, "step": 652 }, { "epoch": 0.011350796989344504, "grad_norm": 2.056882811065644, "learning_rate": 3.7775202780996525e-07, "loss": 0.4728, "step": 653 }, { "epoch": 0.011368179526847329, "grad_norm": 1.4525156431506494, "learning_rate": 3.783314020857474e-07, "loss": 0.4146, "step": 654 }, { "epoch": 0.011385562064350154, "grad_norm": 1.1159348205369026, "learning_rate": 3.789107763615295e-07, "loss": 0.3462, "step": 655 }, { "epoch": 0.011402944601852979, "grad_norm": 3.5768555270795543, "learning_rate": 3.794901506373117e-07, "loss": 0.5252, "step": 656 }, { "epoch": 0.011420327139355804, "grad_norm": 3.59134879431975, "learning_rate": 3.8006952491309385e-07, "loss": 0.4627, "step": 657 }, { "epoch": 0.011437709676858627, "grad_norm": 1.8415510984897274, "learning_rate": 3.80648899188876e-07, "loss": 0.3627, "step": 658 }, { "epoch": 0.011455092214361452, "grad_norm": 2.8951203549344284, "learning_rate": 3.8122827346465815e-07, "loss": 0.5016, "step": 659 }, { "epoch": 0.011472474751864277, "grad_norm": 2.5351563169665625, "learning_rate": 3.8180764774044035e-07, "loss": 0.3823, "step": 660 }, { "epoch": 0.011489857289367102, "grad_norm": 2.6461120413026724, "learning_rate": 3.8238702201622245e-07, "loss": 0.3034, "step": 661 }, { "epoch": 0.011507239826869926, "grad_norm": 3.013308763804078, "learning_rate": 3.829663962920046e-07, "loss": 0.7351, "step": 662 }, { "epoch": 0.01152462236437275, "grad_norm": 2.0988353925600127, "learning_rate": 3.8354577056778675e-07, "loss": 0.3146, "step": 663 }, { "epoch": 0.011542004901875576, "grad_norm": 1.9351105482237796, "learning_rate": 3.8412514484356895e-07, "loss": 0.3413, "step": 664 }, { "epoch": 0.0115593874393784, "grad_norm": 1.6177365252123286, "learning_rate": 3.847045191193511e-07, "loss": 0.3881, "step": 665 }, { "epoch": 0.011576769976881226, "grad_norm": 4.762296351431317, "learning_rate": 3.8528389339513325e-07, "loss": 0.8644, "step": 666 }, { "epoch": 0.01159415251438405, "grad_norm": 2.1105535238815696, "learning_rate": 3.8586326767091535e-07, "loss": 0.4277, "step": 667 }, { "epoch": 0.011611535051886874, "grad_norm": 2.825988777755246, "learning_rate": 3.8644264194669755e-07, "loss": 0.4436, "step": 668 }, { "epoch": 0.0116289175893897, "grad_norm": 1.9245207969235594, "learning_rate": 3.870220162224797e-07, "loss": 0.7097, "step": 669 }, { "epoch": 0.011646300126892524, "grad_norm": 2.4636146717018756, "learning_rate": 3.8760139049826185e-07, "loss": 0.2254, "step": 670 }, { "epoch": 0.011663682664395348, "grad_norm": 2.04334700169994, "learning_rate": 3.88180764774044e-07, "loss": 0.3561, "step": 671 }, { "epoch": 0.011681065201898173, "grad_norm": 3.2536897602309236, "learning_rate": 3.887601390498262e-07, "loss": 0.5098, "step": 672 }, { "epoch": 0.011698447739400998, "grad_norm": 2.224376370156393, "learning_rate": 3.893395133256083e-07, "loss": 0.1887, "step": 673 }, { "epoch": 0.011715830276903823, "grad_norm": 1.3505404476970904, "learning_rate": 3.8991888760139045e-07, "loss": 0.2977, "step": 674 }, { "epoch": 0.011733212814406648, "grad_norm": 1.8601975935850517, "learning_rate": 3.9049826187717266e-07, "loss": 0.5245, "step": 675 }, { "epoch": 0.011750595351909471, "grad_norm": 6.687757817338511, "learning_rate": 3.910776361529548e-07, "loss": 0.6551, "step": 676 }, { "epoch": 0.011767977889412296, "grad_norm": 1.8253567947833818, "learning_rate": 3.9165701042873696e-07, "loss": 0.2967, "step": 677 }, { "epoch": 0.011785360426915121, "grad_norm": 2.59579469837857, "learning_rate": 3.922363847045191e-07, "loss": 0.4001, "step": 678 }, { "epoch": 0.011802742964417946, "grad_norm": 2.535424496544725, "learning_rate": 3.9281575898030126e-07, "loss": 0.4506, "step": 679 }, { "epoch": 0.01182012550192077, "grad_norm": 1.9403012840948373, "learning_rate": 3.933951332560834e-07, "loss": 0.2903, "step": 680 }, { "epoch": 0.011837508039423595, "grad_norm": 2.0357211504868933, "learning_rate": 3.9397450753186556e-07, "loss": 0.6915, "step": 681 }, { "epoch": 0.01185489057692642, "grad_norm": 1.6861499152262491, "learning_rate": 3.945538818076477e-07, "loss": 0.3742, "step": 682 }, { "epoch": 0.011872273114429245, "grad_norm": 1.3013525210515184, "learning_rate": 3.951332560834299e-07, "loss": 0.4206, "step": 683 }, { "epoch": 0.01188965565193207, "grad_norm": 3.6031109246900157, "learning_rate": 3.9571263035921206e-07, "loss": 0.3447, "step": 684 }, { "epoch": 0.011907038189434893, "grad_norm": 2.939649694932723, "learning_rate": 3.9629200463499416e-07, "loss": 0.4444, "step": 685 }, { "epoch": 0.011924420726937718, "grad_norm": 3.074890726097189, "learning_rate": 3.968713789107763e-07, "loss": 0.5497, "step": 686 }, { "epoch": 0.011941803264440543, "grad_norm": 1.7204552280163599, "learning_rate": 3.974507531865585e-07, "loss": 0.5337, "step": 687 }, { "epoch": 0.011959185801943368, "grad_norm": 2.6652714552905286, "learning_rate": 3.9803012746234066e-07, "loss": 0.4681, "step": 688 }, { "epoch": 0.011976568339446192, "grad_norm": 1.570715310412138, "learning_rate": 3.986095017381228e-07, "loss": 0.4764, "step": 689 }, { "epoch": 0.011993950876949017, "grad_norm": 1.8337392846266205, "learning_rate": 3.9918887601390496e-07, "loss": 0.2973, "step": 690 }, { "epoch": 0.012011333414451842, "grad_norm": 1.8901536773938432, "learning_rate": 3.997682502896871e-07, "loss": 0.4116, "step": 691 }, { "epoch": 0.012028715951954667, "grad_norm": 1.9089940201065043, "learning_rate": 4.0034762456546926e-07, "loss": 0.4131, "step": 692 }, { "epoch": 0.012046098489457492, "grad_norm": 1.299065589682152, "learning_rate": 4.009269988412514e-07, "loss": 0.3448, "step": 693 }, { "epoch": 0.012063481026960315, "grad_norm": 1.8459117091402188, "learning_rate": 4.0150637311703356e-07, "loss": 0.3108, "step": 694 }, { "epoch": 0.01208086356446314, "grad_norm": 1.146585221699496, "learning_rate": 4.0208574739281576e-07, "loss": 0.1615, "step": 695 }, { "epoch": 0.012098246101965965, "grad_norm": 1.6125770744256758, "learning_rate": 4.026651216685979e-07, "loss": 0.3165, "step": 696 }, { "epoch": 0.01211562863946879, "grad_norm": 3.129934542383049, "learning_rate": 4.0324449594438e-07, "loss": 0.6151, "step": 697 }, { "epoch": 0.012133011176971614, "grad_norm": 3.291314802859083, "learning_rate": 4.038238702201622e-07, "loss": 0.5579, "step": 698 }, { "epoch": 0.012150393714474439, "grad_norm": 1.7179272064719686, "learning_rate": 4.0440324449594436e-07, "loss": 0.3449, "step": 699 }, { "epoch": 0.012167776251977264, "grad_norm": 2.0744574061040186, "learning_rate": 4.049826187717265e-07, "loss": 0.3875, "step": 700 }, { "epoch": 0.012185158789480089, "grad_norm": 2.323338738988889, "learning_rate": 4.0556199304750867e-07, "loss": 0.4665, "step": 701 }, { "epoch": 0.012202541326982914, "grad_norm": 5.203343178813894, "learning_rate": 4.0614136732329087e-07, "loss": 0.3102, "step": 702 }, { "epoch": 0.012219923864485737, "grad_norm": 1.0693097152540674, "learning_rate": 4.0672074159907297e-07, "loss": 0.4127, "step": 703 }, { "epoch": 0.012237306401988562, "grad_norm": 2.559392736689086, "learning_rate": 4.073001158748551e-07, "loss": 0.3311, "step": 704 }, { "epoch": 0.012254688939491387, "grad_norm": 2.052083816975764, "learning_rate": 4.0787949015063727e-07, "loss": 0.341, "step": 705 }, { "epoch": 0.012272071476994212, "grad_norm": 2.8231614451534686, "learning_rate": 4.0845886442641947e-07, "loss": 0.632, "step": 706 }, { "epoch": 0.012289454014497036, "grad_norm": 2.6956959441842505, "learning_rate": 4.090382387022016e-07, "loss": 0.4708, "step": 707 }, { "epoch": 0.01230683655199986, "grad_norm": 2.2840212055261686, "learning_rate": 4.0961761297798377e-07, "loss": 0.4837, "step": 708 }, { "epoch": 0.012324219089502686, "grad_norm": 4.21791433135693, "learning_rate": 4.1019698725376587e-07, "loss": 0.7426, "step": 709 }, { "epoch": 0.01234160162700551, "grad_norm": 2.8981344797586703, "learning_rate": 4.1077636152954807e-07, "loss": 0.4445, "step": 710 }, { "epoch": 0.012358984164508336, "grad_norm": 1.7041884256683912, "learning_rate": 4.113557358053302e-07, "loss": 0.4057, "step": 711 }, { "epoch": 0.012376366702011159, "grad_norm": 1.5853698197786807, "learning_rate": 4.1193511008111237e-07, "loss": 0.4585, "step": 712 }, { "epoch": 0.012393749239513984, "grad_norm": 1.8548044590927155, "learning_rate": 4.125144843568945e-07, "loss": 0.4353, "step": 713 }, { "epoch": 0.012411131777016809, "grad_norm": 1.4933654315131673, "learning_rate": 4.130938586326767e-07, "loss": 0.456, "step": 714 }, { "epoch": 0.012428514314519634, "grad_norm": 2.997581553075141, "learning_rate": 4.1367323290845887e-07, "loss": 0.3656, "step": 715 }, { "epoch": 0.012445896852022457, "grad_norm": 1.9927818015260435, "learning_rate": 4.1425260718424097e-07, "loss": 0.3894, "step": 716 }, { "epoch": 0.012463279389525283, "grad_norm": 2.111688818021432, "learning_rate": 4.1483198146002317e-07, "loss": 0.2504, "step": 717 }, { "epoch": 0.012480661927028108, "grad_norm": 3.3215450350506717, "learning_rate": 4.154113557358053e-07, "loss": 0.2442, "step": 718 }, { "epoch": 0.012498044464530933, "grad_norm": 1.8578973172107722, "learning_rate": 4.1599073001158747e-07, "loss": 0.7335, "step": 719 }, { "epoch": 0.012515427002033758, "grad_norm": 2.523070676337488, "learning_rate": 4.165701042873696e-07, "loss": 0.515, "step": 720 }, { "epoch": 0.012532809539536581, "grad_norm": 3.0841504147278345, "learning_rate": 4.171494785631518e-07, "loss": 0.4936, "step": 721 }, { "epoch": 0.012550192077039406, "grad_norm": 3.2301871978366994, "learning_rate": 4.177288528389339e-07, "loss": 0.278, "step": 722 }, { "epoch": 0.012567574614542231, "grad_norm": 1.6975782389911196, "learning_rate": 4.1830822711471607e-07, "loss": 0.3, "step": 723 }, { "epoch": 0.012584957152045056, "grad_norm": 1.5914337045010623, "learning_rate": 4.188876013904982e-07, "loss": 0.3362, "step": 724 }, { "epoch": 0.01260233968954788, "grad_norm": 1.2042336403312184, "learning_rate": 4.194669756662804e-07, "loss": 0.3863, "step": 725 }, { "epoch": 0.012619722227050705, "grad_norm": 3.6054974141295744, "learning_rate": 4.200463499420626e-07, "loss": 0.68, "step": 726 }, { "epoch": 0.01263710476455353, "grad_norm": 1.6963541625866208, "learning_rate": 4.206257242178447e-07, "loss": 0.3065, "step": 727 }, { "epoch": 0.012654487302056355, "grad_norm": 1.8198353940399394, "learning_rate": 4.212050984936268e-07, "loss": 0.3571, "step": 728 }, { "epoch": 0.01267186983955918, "grad_norm": 2.641794669351685, "learning_rate": 4.2178447276940903e-07, "loss": 0.3899, "step": 729 }, { "epoch": 0.012689252377062003, "grad_norm": 2.7554905256802464, "learning_rate": 4.223638470451912e-07, "loss": 0.379, "step": 730 }, { "epoch": 0.012706634914564828, "grad_norm": 1.8414815559729363, "learning_rate": 4.2294322132097333e-07, "loss": 0.2968, "step": 731 }, { "epoch": 0.012724017452067653, "grad_norm": 1.8605716287087033, "learning_rate": 4.235225955967555e-07, "loss": 0.256, "step": 732 }, { "epoch": 0.012741399989570478, "grad_norm": 1.3024061540370553, "learning_rate": 4.241019698725377e-07, "loss": 0.3531, "step": 733 }, { "epoch": 0.012758782527073301, "grad_norm": 1.810933867791692, "learning_rate": 4.246813441483198e-07, "loss": 0.4203, "step": 734 }, { "epoch": 0.012776165064576126, "grad_norm": 1.5775879154979202, "learning_rate": 4.2526071842410193e-07, "loss": 0.2966, "step": 735 }, { "epoch": 0.012793547602078952, "grad_norm": 1.8197582711047473, "learning_rate": 4.258400926998841e-07, "loss": 0.5692, "step": 736 }, { "epoch": 0.012810930139581777, "grad_norm": 2.375965330050107, "learning_rate": 4.264194669756663e-07, "loss": 0.2847, "step": 737 }, { "epoch": 0.012828312677084602, "grad_norm": 3.1308836871106864, "learning_rate": 4.2699884125144843e-07, "loss": 0.5789, "step": 738 }, { "epoch": 0.012845695214587425, "grad_norm": 1.3503057502361226, "learning_rate": 4.275782155272306e-07, "loss": 0.3517, "step": 739 }, { "epoch": 0.01286307775209025, "grad_norm": 2.840401353960483, "learning_rate": 4.2815758980301273e-07, "loss": 0.3056, "step": 740 }, { "epoch": 0.012880460289593075, "grad_norm": 3.4496833309490724, "learning_rate": 4.287369640787949e-07, "loss": 0.4476, "step": 741 }, { "epoch": 0.0128978428270959, "grad_norm": 1.8964173061890635, "learning_rate": 4.2931633835457703e-07, "loss": 0.4533, "step": 742 }, { "epoch": 0.012915225364598723, "grad_norm": 2.8929074946225093, "learning_rate": 4.298957126303592e-07, "loss": 0.4234, "step": 743 }, { "epoch": 0.012932607902101548, "grad_norm": 2.334355829361739, "learning_rate": 4.304750869061414e-07, "loss": 0.2633, "step": 744 }, { "epoch": 0.012949990439604374, "grad_norm": 2.1950460828332035, "learning_rate": 4.3105446118192353e-07, "loss": 0.3537, "step": 745 }, { "epoch": 0.012967372977107199, "grad_norm": 2.377494983585878, "learning_rate": 4.3163383545770563e-07, "loss": 0.4439, "step": 746 }, { "epoch": 0.012984755514610024, "grad_norm": 1.5410277542905901, "learning_rate": 4.322132097334878e-07, "loss": 0.4415, "step": 747 }, { "epoch": 0.013002138052112847, "grad_norm": 4.045880479599733, "learning_rate": 4.3279258400927e-07, "loss": 0.4119, "step": 748 }, { "epoch": 0.013019520589615672, "grad_norm": 2.156693050204883, "learning_rate": 4.3337195828505213e-07, "loss": 0.4273, "step": 749 }, { "epoch": 0.013036903127118497, "grad_norm": 1.870315747285449, "learning_rate": 4.339513325608343e-07, "loss": 0.4161, "step": 750 }, { "epoch": 0.013054285664621322, "grad_norm": 4.2257238868262, "learning_rate": 4.3453070683661643e-07, "loss": 0.2114, "step": 751 }, { "epoch": 0.013071668202124145, "grad_norm": 3.021477097971275, "learning_rate": 4.351100811123986e-07, "loss": 0.4896, "step": 752 }, { "epoch": 0.01308905073962697, "grad_norm": 3.8438885539494203, "learning_rate": 4.3568945538818073e-07, "loss": 0.6149, "step": 753 }, { "epoch": 0.013106433277129795, "grad_norm": 1.963482952404835, "learning_rate": 4.362688296639629e-07, "loss": 0.3453, "step": 754 }, { "epoch": 0.01312381581463262, "grad_norm": 1.7222462644618577, "learning_rate": 4.3684820393974503e-07, "loss": 0.2955, "step": 755 }, { "epoch": 0.013141198352135446, "grad_norm": 1.4715322399385677, "learning_rate": 4.3742757821552724e-07, "loss": 0.3642, "step": 756 }, { "epoch": 0.013158580889638269, "grad_norm": 2.027865123231848, "learning_rate": 4.380069524913094e-07, "loss": 0.3737, "step": 757 }, { "epoch": 0.013175963427141094, "grad_norm": 1.6853098074275625, "learning_rate": 4.385863267670915e-07, "loss": 0.3228, "step": 758 }, { "epoch": 0.013193345964643919, "grad_norm": 1.9083195274397975, "learning_rate": 4.391657010428737e-07, "loss": 0.3538, "step": 759 }, { "epoch": 0.013210728502146744, "grad_norm": 2.2627100996387304, "learning_rate": 4.3974507531865584e-07, "loss": 0.3693, "step": 760 }, { "epoch": 0.013228111039649567, "grad_norm": 2.5335029812064573, "learning_rate": 4.40324449594438e-07, "loss": 0.3738, "step": 761 }, { "epoch": 0.013245493577152392, "grad_norm": 2.4388886120743374, "learning_rate": 4.4090382387022014e-07, "loss": 0.439, "step": 762 }, { "epoch": 0.013262876114655217, "grad_norm": 2.3246454415729727, "learning_rate": 4.4148319814600234e-07, "loss": 0.5257, "step": 763 }, { "epoch": 0.013280258652158042, "grad_norm": 1.997956362693355, "learning_rate": 4.4206257242178444e-07, "loss": 0.3183, "step": 764 }, { "epoch": 0.013297641189660868, "grad_norm": 2.082004544865105, "learning_rate": 4.426419466975666e-07, "loss": 0.5232, "step": 765 }, { "epoch": 0.01331502372716369, "grad_norm": 1.5568461542467826, "learning_rate": 4.4322132097334874e-07, "loss": 0.529, "step": 766 }, { "epoch": 0.013332406264666516, "grad_norm": 1.5578572313733088, "learning_rate": 4.4380069524913094e-07, "loss": 0.2911, "step": 767 }, { "epoch": 0.013349788802169341, "grad_norm": 2.3737244505514243, "learning_rate": 4.443800695249131e-07, "loss": 0.274, "step": 768 }, { "epoch": 0.013367171339672166, "grad_norm": 1.8185942121023748, "learning_rate": 4.4495944380069524e-07, "loss": 0.5868, "step": 769 }, { "epoch": 0.01338455387717499, "grad_norm": 1.513003805978839, "learning_rate": 4.4553881807647734e-07, "loss": 0.3099, "step": 770 }, { "epoch": 0.013401936414677814, "grad_norm": 1.7177264368639495, "learning_rate": 4.4611819235225954e-07, "loss": 0.4525, "step": 771 }, { "epoch": 0.01341931895218064, "grad_norm": 1.8221323905330142, "learning_rate": 4.466975666280417e-07, "loss": 0.328, "step": 772 }, { "epoch": 0.013436701489683464, "grad_norm": 1.61226503045449, "learning_rate": 4.4727694090382384e-07, "loss": 0.2888, "step": 773 }, { "epoch": 0.01345408402718629, "grad_norm": 1.184565329941797, "learning_rate": 4.47856315179606e-07, "loss": 0.1926, "step": 774 }, { "epoch": 0.013471466564689113, "grad_norm": 2.139450858465226, "learning_rate": 4.484356894553882e-07, "loss": 0.2697, "step": 775 }, { "epoch": 0.013488849102191938, "grad_norm": 5.092920608319597, "learning_rate": 4.490150637311703e-07, "loss": 0.4439, "step": 776 }, { "epoch": 0.013506231639694763, "grad_norm": 1.3666510653967727, "learning_rate": 4.4959443800695244e-07, "loss": 0.3185, "step": 777 }, { "epoch": 0.013523614177197588, "grad_norm": 2.121432259098473, "learning_rate": 4.501738122827346e-07, "loss": 0.9245, "step": 778 }, { "epoch": 0.013540996714700411, "grad_norm": 2.1240449248053723, "learning_rate": 4.507531865585168e-07, "loss": 0.5271, "step": 779 }, { "epoch": 0.013558379252203236, "grad_norm": 2.4914576903539767, "learning_rate": 4.5133256083429895e-07, "loss": 0.265, "step": 780 }, { "epoch": 0.013575761789706061, "grad_norm": 2.4739017365346934, "learning_rate": 4.519119351100811e-07, "loss": 0.3431, "step": 781 }, { "epoch": 0.013593144327208886, "grad_norm": 2.5207147279504083, "learning_rate": 4.524913093858633e-07, "loss": 0.3288, "step": 782 }, { "epoch": 0.01361052686471171, "grad_norm": 2.352982612479814, "learning_rate": 4.530706836616454e-07, "loss": 0.5648, "step": 783 }, { "epoch": 0.013627909402214535, "grad_norm": 1.7044320895818617, "learning_rate": 4.5365005793742755e-07, "loss": 0.3431, "step": 784 }, { "epoch": 0.01364529193971736, "grad_norm": 2.4174747669394216, "learning_rate": 4.542294322132097e-07, "loss": 0.3966, "step": 785 }, { "epoch": 0.013662674477220185, "grad_norm": 1.5952689677750502, "learning_rate": 4.548088064889919e-07, "loss": 0.3678, "step": 786 }, { "epoch": 0.01368005701472301, "grad_norm": 1.6233216686346876, "learning_rate": 4.5538818076477405e-07, "loss": 0.2576, "step": 787 }, { "epoch": 0.013697439552225833, "grad_norm": 2.7738491116771926, "learning_rate": 4.559675550405562e-07, "loss": 0.5122, "step": 788 }, { "epoch": 0.013714822089728658, "grad_norm": 1.5284655323367946, "learning_rate": 4.565469293163383e-07, "loss": 0.2152, "step": 789 }, { "epoch": 0.013732204627231483, "grad_norm": 4.823538647122156, "learning_rate": 4.571263035921205e-07, "loss": 0.7271, "step": 790 }, { "epoch": 0.013749587164734308, "grad_norm": 1.9709009126895742, "learning_rate": 4.5770567786790265e-07, "loss": 0.3171, "step": 791 }, { "epoch": 0.013766969702237132, "grad_norm": 1.4163533540122215, "learning_rate": 4.582850521436848e-07, "loss": 0.2325, "step": 792 }, { "epoch": 0.013784352239739957, "grad_norm": 1.5034083124596749, "learning_rate": 4.5886442641946695e-07, "loss": 0.3755, "step": 793 }, { "epoch": 0.013801734777242782, "grad_norm": 2.0891534173441397, "learning_rate": 4.5944380069524915e-07, "loss": 0.4427, "step": 794 }, { "epoch": 0.013819117314745607, "grad_norm": 2.3003077768764952, "learning_rate": 4.6002317497103125e-07, "loss": 0.3008, "step": 795 }, { "epoch": 0.013836499852248432, "grad_norm": 2.058311844977282, "learning_rate": 4.606025492468134e-07, "loss": 0.5256, "step": 796 }, { "epoch": 0.013853882389751255, "grad_norm": 1.9546105113074292, "learning_rate": 4.6118192352259555e-07, "loss": 0.4993, "step": 797 }, { "epoch": 0.01387126492725408, "grad_norm": 1.7848063386102613, "learning_rate": 4.6176129779837775e-07, "loss": 0.5744, "step": 798 }, { "epoch": 0.013888647464756905, "grad_norm": 1.9396605506117386, "learning_rate": 4.623406720741599e-07, "loss": 0.4392, "step": 799 }, { "epoch": 0.01390603000225973, "grad_norm": 2.1314846972666044, "learning_rate": 4.6292004634994205e-07, "loss": 0.4767, "step": 800 }, { "epoch": 0.013923412539762554, "grad_norm": 2.0329484966767444, "learning_rate": 4.634994206257242e-07, "loss": 0.3165, "step": 801 }, { "epoch": 0.013940795077265379, "grad_norm": 3.00286663901285, "learning_rate": 4.6407879490150635e-07, "loss": 0.4832, "step": 802 }, { "epoch": 0.013958177614768204, "grad_norm": 2.225685202324633, "learning_rate": 4.646581691772885e-07, "loss": 0.3153, "step": 803 }, { "epoch": 0.013975560152271029, "grad_norm": 1.8147063718591223, "learning_rate": 4.6523754345307065e-07, "loss": 0.2537, "step": 804 }, { "epoch": 0.013992942689773854, "grad_norm": 1.5828820727114898, "learning_rate": 4.6581691772885286e-07, "loss": 0.1948, "step": 805 }, { "epoch": 0.014010325227276677, "grad_norm": 1.6540477269873033, "learning_rate": 4.66396292004635e-07, "loss": 0.3373, "step": 806 }, { "epoch": 0.014027707764779502, "grad_norm": 2.751389215324817, "learning_rate": 4.669756662804171e-07, "loss": 0.6246, "step": 807 }, { "epoch": 0.014045090302282327, "grad_norm": 3.363047134274079, "learning_rate": 4.6755504055619925e-07, "loss": 0.4073, "step": 808 }, { "epoch": 0.014062472839785152, "grad_norm": 2.0387616672321696, "learning_rate": 4.6813441483198146e-07, "loss": 0.3548, "step": 809 }, { "epoch": 0.014079855377287976, "grad_norm": 2.45366756541205, "learning_rate": 4.687137891077636e-07, "loss": 0.4774, "step": 810 }, { "epoch": 0.0140972379147908, "grad_norm": 2.4361772663017387, "learning_rate": 4.6929316338354576e-07, "loss": 0.5403, "step": 811 }, { "epoch": 0.014114620452293626, "grad_norm": 2.1613266662161568, "learning_rate": 4.698725376593279e-07, "loss": 0.2467, "step": 812 }, { "epoch": 0.01413200298979645, "grad_norm": 2.819289064011703, "learning_rate": 4.7045191193511006e-07, "loss": 0.4138, "step": 813 }, { "epoch": 0.014149385527299276, "grad_norm": 1.1995113329133564, "learning_rate": 4.710312862108922e-07, "loss": 0.2485, "step": 814 }, { "epoch": 0.0141667680648021, "grad_norm": 1.6475830669125533, "learning_rate": 4.7161066048667436e-07, "loss": 0.4583, "step": 815 }, { "epoch": 0.014184150602304924, "grad_norm": 2.075050491249255, "learning_rate": 4.721900347624565e-07, "loss": 0.6413, "step": 816 }, { "epoch": 0.01420153313980775, "grad_norm": 1.550968782795801, "learning_rate": 4.727694090382387e-07, "loss": 0.2568, "step": 817 }, { "epoch": 0.014218915677310574, "grad_norm": 2.5943434472053113, "learning_rate": 4.7334878331402086e-07, "loss": 0.5098, "step": 818 }, { "epoch": 0.014236298214813398, "grad_norm": 1.9687034768272014, "learning_rate": 4.7392815758980296e-07, "loss": 0.3724, "step": 819 }, { "epoch": 0.014253680752316223, "grad_norm": 2.0726664719055994, "learning_rate": 4.745075318655851e-07, "loss": 0.209, "step": 820 }, { "epoch": 0.014271063289819048, "grad_norm": 2.4893487741451037, "learning_rate": 4.750869061413673e-07, "loss": 0.5853, "step": 821 }, { "epoch": 0.014288445827321873, "grad_norm": 1.846424497248688, "learning_rate": 4.7566628041714946e-07, "loss": 0.3444, "step": 822 }, { "epoch": 0.014305828364824698, "grad_norm": 1.8949709234903847, "learning_rate": 4.762456546929316e-07, "loss": 0.2662, "step": 823 }, { "epoch": 0.014323210902327521, "grad_norm": 2.3839628747678407, "learning_rate": 4.768250289687138e-07, "loss": 0.4447, "step": 824 }, { "epoch": 0.014340593439830346, "grad_norm": 8.145048697629743, "learning_rate": 4.774044032444959e-07, "loss": 0.5389, "step": 825 }, { "epoch": 0.014357975977333171, "grad_norm": 1.9667758117111513, "learning_rate": 4.779837775202781e-07, "loss": 0.5274, "step": 826 }, { "epoch": 0.014375358514835996, "grad_norm": 2.847035292575947, "learning_rate": 4.785631517960602e-07, "loss": 0.3348, "step": 827 }, { "epoch": 0.01439274105233882, "grad_norm": 2.215568860726107, "learning_rate": 4.791425260718424e-07, "loss": 0.5054, "step": 828 }, { "epoch": 0.014410123589841645, "grad_norm": 3.3132816485610554, "learning_rate": 4.797219003476245e-07, "loss": 0.5785, "step": 829 }, { "epoch": 0.01442750612734447, "grad_norm": 3.8350859709512566, "learning_rate": 4.803012746234067e-07, "loss": 0.6205, "step": 830 }, { "epoch": 0.014444888664847295, "grad_norm": 1.7588112836850887, "learning_rate": 4.808806488991888e-07, "loss": 0.3264, "step": 831 }, { "epoch": 0.01446227120235012, "grad_norm": 1.9369266695645067, "learning_rate": 4.814600231749711e-07, "loss": 0.5058, "step": 832 }, { "epoch": 0.014479653739852943, "grad_norm": 2.6089921336397373, "learning_rate": 4.820393974507532e-07, "loss": 0.3915, "step": 833 }, { "epoch": 0.014497036277355768, "grad_norm": 9.385458888574702, "learning_rate": 4.826187717265353e-07, "loss": 0.4886, "step": 834 }, { "epoch": 0.014514418814858593, "grad_norm": 3.195301918782947, "learning_rate": 4.831981460023174e-07, "loss": 0.5781, "step": 835 }, { "epoch": 0.014531801352361418, "grad_norm": 10.29310097610288, "learning_rate": 4.837775202780997e-07, "loss": 0.4751, "step": 836 }, { "epoch": 0.014549183889864242, "grad_norm": 2.200643839268026, "learning_rate": 4.843568945538818e-07, "loss": 0.3196, "step": 837 }, { "epoch": 0.014566566427367067, "grad_norm": 2.3158100789856326, "learning_rate": 4.84936268829664e-07, "loss": 0.2991, "step": 838 }, { "epoch": 0.014583948964869892, "grad_norm": 4.288492984864219, "learning_rate": 4.855156431054461e-07, "loss": 0.2868, "step": 839 }, { "epoch": 0.014601331502372717, "grad_norm": 2.7340826759424925, "learning_rate": 4.860950173812283e-07, "loss": 0.4009, "step": 840 }, { "epoch": 0.014618714039875542, "grad_norm": 2.3487521292193536, "learning_rate": 4.866743916570104e-07, "loss": 0.4434, "step": 841 }, { "epoch": 0.014636096577378365, "grad_norm": 1.9979671740786473, "learning_rate": 4.872537659327926e-07, "loss": 0.3371, "step": 842 }, { "epoch": 0.01465347911488119, "grad_norm": 3.5493679394672037, "learning_rate": 4.878331402085747e-07, "loss": 0.5937, "step": 843 }, { "epoch": 0.014670861652384015, "grad_norm": 4.2353028069410925, "learning_rate": 4.884125144843569e-07, "loss": 0.3066, "step": 844 }, { "epoch": 0.01468824418988684, "grad_norm": 1.7021709743947822, "learning_rate": 4.88991888760139e-07, "loss": 0.5144, "step": 845 }, { "epoch": 0.014705626727389664, "grad_norm": 1.8697398226220534, "learning_rate": 4.895712630359212e-07, "loss": 0.5026, "step": 846 }, { "epoch": 0.014723009264892489, "grad_norm": 2.206068280864719, "learning_rate": 4.901506373117033e-07, "loss": 0.5597, "step": 847 }, { "epoch": 0.014740391802395314, "grad_norm": 1.375650671044022, "learning_rate": 4.907300115874855e-07, "loss": 0.3357, "step": 848 }, { "epoch": 0.014757774339898139, "grad_norm": 2.383964751345434, "learning_rate": 4.913093858632676e-07, "loss": 0.2228, "step": 849 }, { "epoch": 0.014775156877400964, "grad_norm": 1.7999530968736062, "learning_rate": 4.918887601390498e-07, "loss": 0.4032, "step": 850 }, { "epoch": 0.014792539414903787, "grad_norm": 2.4899642580386567, "learning_rate": 4.92468134414832e-07, "loss": 0.3582, "step": 851 }, { "epoch": 0.014809921952406612, "grad_norm": 2.2681645930790513, "learning_rate": 4.930475086906141e-07, "loss": 0.6218, "step": 852 }, { "epoch": 0.014827304489909437, "grad_norm": 1.7959862623634548, "learning_rate": 4.936268829663962e-07, "loss": 0.3841, "step": 853 }, { "epoch": 0.014844687027412262, "grad_norm": 1.6158496729739276, "learning_rate": 4.942062572421784e-07, "loss": 0.4717, "step": 854 }, { "epoch": 0.014862069564915086, "grad_norm": 2.1544246287431337, "learning_rate": 4.947856315179606e-07, "loss": 0.3279, "step": 855 }, { "epoch": 0.01487945210241791, "grad_norm": 2.55297013644433, "learning_rate": 4.953650057937428e-07, "loss": 0.8241, "step": 856 }, { "epoch": 0.014896834639920736, "grad_norm": 1.6682553443656314, "learning_rate": 4.959443800695249e-07, "loss": 0.2705, "step": 857 }, { "epoch": 0.01491421717742356, "grad_norm": 2.1946592438182413, "learning_rate": 4.96523754345307e-07, "loss": 0.355, "step": 858 }, { "epoch": 0.014931599714926386, "grad_norm": 1.2965512851352399, "learning_rate": 4.971031286210892e-07, "loss": 0.3754, "step": 859 }, { "epoch": 0.014948982252429209, "grad_norm": 4.229369284296741, "learning_rate": 4.976825028968714e-07, "loss": 0.5191, "step": 860 }, { "epoch": 0.014966364789932034, "grad_norm": 2.054232417676685, "learning_rate": 4.982618771726535e-07, "loss": 0.4825, "step": 861 }, { "epoch": 0.014983747327434859, "grad_norm": 1.8689596639819541, "learning_rate": 4.988412514484357e-07, "loss": 0.3877, "step": 862 }, { "epoch": 0.015001129864937684, "grad_norm": 2.252752406239022, "learning_rate": 4.994206257242178e-07, "loss": 0.4556, "step": 863 }, { "epoch": 0.015018512402440507, "grad_norm": 2.0910880360404787, "learning_rate": 5e-07, "loss": 0.4133, "step": 864 }, { "epoch": 0.015035894939943333, "grad_norm": 1.9023793495550905, "learning_rate": 5.005793742757821e-07, "loss": 0.6526, "step": 865 }, { "epoch": 0.015053277477446158, "grad_norm": 1.4297595968725425, "learning_rate": 5.011587485515643e-07, "loss": 0.5539, "step": 866 }, { "epoch": 0.015070660014948983, "grad_norm": 2.0883337855385085, "learning_rate": 5.017381228273464e-07, "loss": 0.2539, "step": 867 }, { "epoch": 0.015088042552451808, "grad_norm": 2.0184045952773113, "learning_rate": 5.023174971031287e-07, "loss": 0.3721, "step": 868 }, { "epoch": 0.015105425089954631, "grad_norm": 2.025533255769914, "learning_rate": 5.028968713789108e-07, "loss": 0.4442, "step": 869 }, { "epoch": 0.015122807627457456, "grad_norm": 2.6926405797600084, "learning_rate": 5.03476245654693e-07, "loss": 0.3127, "step": 870 }, { "epoch": 0.015140190164960281, "grad_norm": 1.6439524042791247, "learning_rate": 5.04055619930475e-07, "loss": 0.4417, "step": 871 }, { "epoch": 0.015157572702463106, "grad_norm": 2.4454832674501903, "learning_rate": 5.046349942062572e-07, "loss": 0.7798, "step": 872 }, { "epoch": 0.01517495523996593, "grad_norm": 2.331329733044161, "learning_rate": 5.052143684820393e-07, "loss": 0.6087, "step": 873 }, { "epoch": 0.015192337777468755, "grad_norm": 1.9423502453283197, "learning_rate": 5.057937427578215e-07, "loss": 0.2244, "step": 874 }, { "epoch": 0.01520972031497158, "grad_norm": 3.2201226894821957, "learning_rate": 5.063731170336036e-07, "loss": 0.3231, "step": 875 }, { "epoch": 0.015227102852474405, "grad_norm": 1.9988606945678025, "learning_rate": 5.069524913093859e-07, "loss": 0.7178, "step": 876 }, { "epoch": 0.01524448538997723, "grad_norm": 11.276092804620001, "learning_rate": 5.07531865585168e-07, "loss": 0.6222, "step": 877 }, { "epoch": 0.015261867927480053, "grad_norm": 2.2330477280351557, "learning_rate": 5.081112398609502e-07, "loss": 0.3346, "step": 878 }, { "epoch": 0.015279250464982878, "grad_norm": 2.5465617537010483, "learning_rate": 5.086906141367323e-07, "loss": 0.5076, "step": 879 }, { "epoch": 0.015296633002485703, "grad_norm": 3.3587136183993858, "learning_rate": 5.092699884125145e-07, "loss": 0.2466, "step": 880 }, { "epoch": 0.015314015539988528, "grad_norm": 1.6358659274768315, "learning_rate": 5.098493626882966e-07, "loss": 0.6024, "step": 881 }, { "epoch": 0.015331398077491351, "grad_norm": 4.050112209258062, "learning_rate": 5.104287369640788e-07, "loss": 0.5181, "step": 882 }, { "epoch": 0.015348780614994176, "grad_norm": 2.2005517607734477, "learning_rate": 5.110081112398608e-07, "loss": 0.2773, "step": 883 }, { "epoch": 0.015366163152497002, "grad_norm": 2.2812472915504047, "learning_rate": 5.115874855156431e-07, "loss": 0.5007, "step": 884 }, { "epoch": 0.015383545689999827, "grad_norm": 1.8619089862025395, "learning_rate": 5.121668597914252e-07, "loss": 0.4322, "step": 885 }, { "epoch": 0.015400928227502652, "grad_norm": 2.5679521035890214, "learning_rate": 5.127462340672074e-07, "loss": 0.6119, "step": 886 }, { "epoch": 0.015418310765005475, "grad_norm": 2.2322743988212506, "learning_rate": 5.133256083429895e-07, "loss": 0.4126, "step": 887 }, { "epoch": 0.0154356933025083, "grad_norm": 5.323369580232072, "learning_rate": 5.139049826187717e-07, "loss": 0.5219, "step": 888 }, { "epoch": 0.015453075840011125, "grad_norm": 1.5378215967022726, "learning_rate": 5.144843568945538e-07, "loss": 0.4077, "step": 889 }, { "epoch": 0.01547045837751395, "grad_norm": 1.5113056144879224, "learning_rate": 5.15063731170336e-07, "loss": 0.3389, "step": 890 }, { "epoch": 0.015487840915016773, "grad_norm": 1.512084563485028, "learning_rate": 5.156431054461182e-07, "loss": 0.4383, "step": 891 }, { "epoch": 0.015505223452519598, "grad_norm": 1.8401193563036697, "learning_rate": 5.162224797219004e-07, "loss": 1.0004, "step": 892 }, { "epoch": 0.015522605990022423, "grad_norm": 2.7967160944267158, "learning_rate": 5.168018539976825e-07, "loss": 0.4575, "step": 893 }, { "epoch": 0.015539988527525249, "grad_norm": 2.2590737626117248, "learning_rate": 5.173812282734647e-07, "loss": 0.5331, "step": 894 }, { "epoch": 0.015557371065028074, "grad_norm": 1.3305743128645624, "learning_rate": 5.179606025492467e-07, "loss": 0.3679, "step": 895 }, { "epoch": 0.015574753602530897, "grad_norm": 1.876321299565446, "learning_rate": 5.185399768250289e-07, "loss": 0.4916, "step": 896 }, { "epoch": 0.015592136140033722, "grad_norm": 2.172147034017286, "learning_rate": 5.19119351100811e-07, "loss": 0.7875, "step": 897 }, { "epoch": 0.015609518677536547, "grad_norm": 1.918553397644419, "learning_rate": 5.196987253765932e-07, "loss": 0.5061, "step": 898 }, { "epoch": 0.01562690121503937, "grad_norm": 2.439251941745649, "learning_rate": 5.202780996523754e-07, "loss": 0.2986, "step": 899 }, { "epoch": 0.015644283752542197, "grad_norm": 1.4323642330740092, "learning_rate": 5.208574739281576e-07, "loss": 0.2098, "step": 900 }, { "epoch": 0.01566166629004502, "grad_norm": 1.8570140400225943, "learning_rate": 5.214368482039397e-07, "loss": 0.4306, "step": 901 }, { "epoch": 0.015679048827547844, "grad_norm": 2.3545877090824896, "learning_rate": 5.220162224797219e-07, "loss": 0.3983, "step": 902 }, { "epoch": 0.01569643136505067, "grad_norm": 1.421401085323632, "learning_rate": 5.22595596755504e-07, "loss": 0.3082, "step": 903 }, { "epoch": 0.015713813902553494, "grad_norm": 1.7049781370989454, "learning_rate": 5.231749710312862e-07, "loss": 0.5409, "step": 904 }, { "epoch": 0.01573119644005632, "grad_norm": 2.1288014359928704, "learning_rate": 5.237543453070683e-07, "loss": 0.2786, "step": 905 }, { "epoch": 0.015748578977559144, "grad_norm": 2.107071668704884, "learning_rate": 5.243337195828505e-07, "loss": 0.3296, "step": 906 }, { "epoch": 0.015765961515061967, "grad_norm": 2.0946796567591037, "learning_rate": 5.249130938586326e-07, "loss": 0.4541, "step": 907 }, { "epoch": 0.015783344052564794, "grad_norm": 1.7033143218032227, "learning_rate": 5.254924681344148e-07, "loss": 0.3718, "step": 908 }, { "epoch": 0.015800726590067617, "grad_norm": 2.8448954599843073, "learning_rate": 5.260718424101969e-07, "loss": 0.267, "step": 909 }, { "epoch": 0.015818109127570444, "grad_norm": 1.93393093313239, "learning_rate": 5.266512166859791e-07, "loss": 0.2147, "step": 910 }, { "epoch": 0.015835491665073267, "grad_norm": 2.2988573187044086, "learning_rate": 5.272305909617612e-07, "loss": 0.3524, "step": 911 }, { "epoch": 0.01585287420257609, "grad_norm": 6.630199053664004, "learning_rate": 5.278099652375434e-07, "loss": 0.3512, "step": 912 }, { "epoch": 0.015870256740078918, "grad_norm": 2.4426692411037734, "learning_rate": 5.283893395133255e-07, "loss": 0.7331, "step": 913 }, { "epoch": 0.01588763927758174, "grad_norm": 2.0414509990922323, "learning_rate": 5.289687137891078e-07, "loss": 0.2837, "step": 914 }, { "epoch": 0.015905021815084568, "grad_norm": 1.5726863156494284, "learning_rate": 5.2954808806489e-07, "loss": 0.5426, "step": 915 }, { "epoch": 0.01592240435258739, "grad_norm": 2.233111166841694, "learning_rate": 5.301274623406721e-07, "loss": 0.6552, "step": 916 }, { "epoch": 0.015939786890090214, "grad_norm": 1.9978618731775875, "learning_rate": 5.307068366164542e-07, "loss": 0.2168, "step": 917 }, { "epoch": 0.01595716942759304, "grad_norm": 2.9532090636530657, "learning_rate": 5.312862108922364e-07, "loss": 0.656, "step": 918 }, { "epoch": 0.015974551965095864, "grad_norm": 1.8138927786098014, "learning_rate": 5.318655851680184e-07, "loss": 0.436, "step": 919 }, { "epoch": 0.015991934502598688, "grad_norm": 10.718958499426241, "learning_rate": 5.324449594438006e-07, "loss": 0.3934, "step": 920 }, { "epoch": 0.016009317040101514, "grad_norm": 1.553836130806356, "learning_rate": 5.330243337195827e-07, "loss": 0.3878, "step": 921 }, { "epoch": 0.016026699577604338, "grad_norm": 2.0687244800470763, "learning_rate": 5.33603707995365e-07, "loss": 0.4045, "step": 922 }, { "epoch": 0.016044082115107165, "grad_norm": 2.260187113163865, "learning_rate": 5.341830822711472e-07, "loss": 0.5474, "step": 923 }, { "epoch": 0.016061464652609988, "grad_norm": 2.5734354762942546, "learning_rate": 5.347624565469293e-07, "loss": 0.5214, "step": 924 }, { "epoch": 0.01607884719011281, "grad_norm": 2.9356519663672747, "learning_rate": 5.353418308227115e-07, "loss": 0.4095, "step": 925 }, { "epoch": 0.016096229727615638, "grad_norm": 2.1666369869485425, "learning_rate": 5.359212050984936e-07, "loss": 0.4035, "step": 926 }, { "epoch": 0.01611361226511846, "grad_norm": 1.0296080525216469, "learning_rate": 5.365005793742758e-07, "loss": 0.4937, "step": 927 }, { "epoch": 0.016130994802621288, "grad_norm": 2.537928954542445, "learning_rate": 5.370799536500579e-07, "loss": 0.3616, "step": 928 }, { "epoch": 0.01614837734012411, "grad_norm": 2.1339422814590128, "learning_rate": 5.376593279258402e-07, "loss": 0.484, "step": 929 }, { "epoch": 0.016165759877626935, "grad_norm": 2.06851544038332, "learning_rate": 5.382387022016223e-07, "loss": 0.3011, "step": 930 }, { "epoch": 0.01618314241512976, "grad_norm": 2.03732905750119, "learning_rate": 5.388180764774044e-07, "loss": 0.4304, "step": 931 }, { "epoch": 0.016200524952632585, "grad_norm": 2.6743717221265166, "learning_rate": 5.393974507531865e-07, "loss": 0.3265, "step": 932 }, { "epoch": 0.01621790749013541, "grad_norm": 2.1625075606117163, "learning_rate": 5.399768250289687e-07, "loss": 0.6847, "step": 933 }, { "epoch": 0.016235290027638235, "grad_norm": 1.8707694579184282, "learning_rate": 5.405561993047508e-07, "loss": 0.4417, "step": 934 }, { "epoch": 0.016252672565141058, "grad_norm": 3.1363223031446297, "learning_rate": 5.41135573580533e-07, "loss": 0.3276, "step": 935 }, { "epoch": 0.016270055102643885, "grad_norm": 3.0262786904359427, "learning_rate": 5.417149478563151e-07, "loss": 0.5134, "step": 936 }, { "epoch": 0.01628743764014671, "grad_norm": 2.187676182395887, "learning_rate": 5.422943221320974e-07, "loss": 0.3243, "step": 937 }, { "epoch": 0.01630482017764953, "grad_norm": 1.7082027373727682, "learning_rate": 5.428736964078795e-07, "loss": 0.4985, "step": 938 }, { "epoch": 0.01632220271515236, "grad_norm": 1.9465236414113911, "learning_rate": 5.434530706836617e-07, "loss": 0.6303, "step": 939 }, { "epoch": 0.01633958525265518, "grad_norm": 3.381039735533692, "learning_rate": 5.440324449594438e-07, "loss": 0.5474, "step": 940 }, { "epoch": 0.01635696779015801, "grad_norm": 2.193459381502858, "learning_rate": 5.44611819235226e-07, "loss": 0.3328, "step": 941 }, { "epoch": 0.016374350327660832, "grad_norm": 2.300572849353857, "learning_rate": 5.451911935110081e-07, "loss": 0.3787, "step": 942 }, { "epoch": 0.016391732865163655, "grad_norm": 1.3229179809458453, "learning_rate": 5.457705677867902e-07, "loss": 0.4841, "step": 943 }, { "epoch": 0.016409115402666482, "grad_norm": 1.4953388153927034, "learning_rate": 5.463499420625723e-07, "loss": 0.2982, "step": 944 }, { "epoch": 0.016426497940169305, "grad_norm": 1.973241063578992, "learning_rate": 5.469293163383546e-07, "loss": 0.1904, "step": 945 }, { "epoch": 0.016443880477672132, "grad_norm": 1.56310524010764, "learning_rate": 5.475086906141367e-07, "loss": 0.3317, "step": 946 }, { "epoch": 0.016461263015174955, "grad_norm": 1.7048871490278605, "learning_rate": 5.480880648899189e-07, "loss": 0.4115, "step": 947 }, { "epoch": 0.01647864555267778, "grad_norm": 3.0432195049800694, "learning_rate": 5.48667439165701e-07, "loss": 0.4401, "step": 948 }, { "epoch": 0.016496028090180605, "grad_norm": 2.994347745686001, "learning_rate": 5.492468134414832e-07, "loss": 0.4969, "step": 949 }, { "epoch": 0.01651341062768343, "grad_norm": 9.944998075866357, "learning_rate": 5.498261877172653e-07, "loss": 0.8252, "step": 950 }, { "epoch": 0.016530793165186256, "grad_norm": 1.8980004018205585, "learning_rate": 5.504055619930475e-07, "loss": 0.8259, "step": 951 }, { "epoch": 0.01654817570268908, "grad_norm": 2.196034394025077, "learning_rate": 5.509849362688297e-07, "loss": 0.5687, "step": 952 }, { "epoch": 0.016565558240191902, "grad_norm": 3.025980788414251, "learning_rate": 5.515643105446119e-07, "loss": 0.3513, "step": 953 }, { "epoch": 0.01658294077769473, "grad_norm": 2.766210916468148, "learning_rate": 5.52143684820394e-07, "loss": 0.5386, "step": 954 }, { "epoch": 0.016600323315197552, "grad_norm": 2.322026625432549, "learning_rate": 5.527230590961762e-07, "loss": 0.2757, "step": 955 }, { "epoch": 0.016617705852700376, "grad_norm": 1.5156915669753506, "learning_rate": 5.533024333719582e-07, "loss": 0.2898, "step": 956 }, { "epoch": 0.016635088390203202, "grad_norm": 1.824868341010163, "learning_rate": 5.538818076477404e-07, "loss": 0.7279, "step": 957 }, { "epoch": 0.016652470927706026, "grad_norm": 2.9618795396213358, "learning_rate": 5.544611819235225e-07, "loss": 0.517, "step": 958 }, { "epoch": 0.016669853465208852, "grad_norm": 2.0667657830460873, "learning_rate": 5.550405561993047e-07, "loss": 0.4976, "step": 959 }, { "epoch": 0.016687236002711676, "grad_norm": 1.4874091973321892, "learning_rate": 5.556199304750869e-07, "loss": 0.5866, "step": 960 }, { "epoch": 0.0167046185402145, "grad_norm": 3.925565198896773, "learning_rate": 5.561993047508691e-07, "loss": 0.6134, "step": 961 }, { "epoch": 0.016722001077717326, "grad_norm": 5.1013485492428465, "learning_rate": 5.567786790266512e-07, "loss": 0.3676, "step": 962 }, { "epoch": 0.01673938361522015, "grad_norm": 1.6338849392262296, "learning_rate": 5.573580533024334e-07, "loss": 0.3491, "step": 963 }, { "epoch": 0.016756766152722976, "grad_norm": 5.682187214932303, "learning_rate": 5.579374275782155e-07, "loss": 0.573, "step": 964 }, { "epoch": 0.0167741486902258, "grad_norm": 2.444327010987938, "learning_rate": 5.585168018539977e-07, "loss": 0.3106, "step": 965 }, { "epoch": 0.016791531227728623, "grad_norm": 1.5673173965072924, "learning_rate": 5.590961761297798e-07, "loss": 0.6783, "step": 966 }, { "epoch": 0.01680891376523145, "grad_norm": 2.21627306976194, "learning_rate": 5.59675550405562e-07, "loss": 0.9037, "step": 967 }, { "epoch": 0.016826296302734273, "grad_norm": 1.7448472686462437, "learning_rate": 5.602549246813441e-07, "loss": 0.408, "step": 968 }, { "epoch": 0.0168436788402371, "grad_norm": 1.707017154606945, "learning_rate": 5.608342989571263e-07, "loss": 0.2802, "step": 969 }, { "epoch": 0.016861061377739923, "grad_norm": 1.9468893778399583, "learning_rate": 5.614136732329084e-07, "loss": 0.4253, "step": 970 }, { "epoch": 0.016878443915242746, "grad_norm": 1.2607978976389917, "learning_rate": 5.619930475086906e-07, "loss": 0.419, "step": 971 }, { "epoch": 0.016895826452745573, "grad_norm": 1.8787292505361237, "learning_rate": 5.625724217844727e-07, "loss": 0.3836, "step": 972 }, { "epoch": 0.016913208990248396, "grad_norm": 1.6712770581947214, "learning_rate": 5.631517960602549e-07, "loss": 0.5113, "step": 973 }, { "epoch": 0.01693059152775122, "grad_norm": 2.150784659354044, "learning_rate": 5.63731170336037e-07, "loss": 0.3107, "step": 974 }, { "epoch": 0.016947974065254046, "grad_norm": 2.737314579397427, "learning_rate": 5.643105446118193e-07, "loss": 0.4833, "step": 975 }, { "epoch": 0.01696535660275687, "grad_norm": 3.5447998504915925, "learning_rate": 5.648899188876014e-07, "loss": 0.7084, "step": 976 }, { "epoch": 0.016982739140259696, "grad_norm": 1.4573240722289, "learning_rate": 5.654692931633836e-07, "loss": 0.7703, "step": 977 }, { "epoch": 0.01700012167776252, "grad_norm": 2.9713794470349173, "learning_rate": 5.660486674391657e-07, "loss": 0.2321, "step": 978 }, { "epoch": 0.017017504215265343, "grad_norm": 2.0133301996812087, "learning_rate": 5.666280417149479e-07, "loss": 0.4859, "step": 979 }, { "epoch": 0.01703488675276817, "grad_norm": 2.3871590748866485, "learning_rate": 5.672074159907299e-07, "loss": 0.5576, "step": 980 }, { "epoch": 0.017052269290270993, "grad_norm": 2.3129243942985775, "learning_rate": 5.677867902665121e-07, "loss": 0.4667, "step": 981 }, { "epoch": 0.01706965182777382, "grad_norm": 2.194094326205224, "learning_rate": 5.683661645422942e-07, "loss": 0.378, "step": 982 }, { "epoch": 0.017087034365276643, "grad_norm": 1.8925766456942028, "learning_rate": 5.689455388180765e-07, "loss": 0.6239, "step": 983 }, { "epoch": 0.017104416902779467, "grad_norm": 2.666412235277584, "learning_rate": 5.695249130938586e-07, "loss": 0.5669, "step": 984 }, { "epoch": 0.017121799440282293, "grad_norm": 2.8636825164285584, "learning_rate": 5.701042873696408e-07, "loss": 0.6151, "step": 985 }, { "epoch": 0.017139181977785117, "grad_norm": 3.1723499289391306, "learning_rate": 5.706836616454229e-07, "loss": 0.4992, "step": 986 }, { "epoch": 0.017156564515287943, "grad_norm": 3.2805791663513038, "learning_rate": 5.712630359212051e-07, "loss": 0.5808, "step": 987 }, { "epoch": 0.017173947052790767, "grad_norm": 1.8300149970698456, "learning_rate": 5.718424101969872e-07, "loss": 0.4971, "step": 988 }, { "epoch": 0.01719132959029359, "grad_norm": 3.288485341802352, "learning_rate": 5.724217844727694e-07, "loss": 0.5185, "step": 989 }, { "epoch": 0.017208712127796417, "grad_norm": 2.838640689693225, "learning_rate": 5.730011587485515e-07, "loss": 0.7771, "step": 990 }, { "epoch": 0.01722609466529924, "grad_norm": 3.7599796265394154, "learning_rate": 5.735805330243338e-07, "loss": 0.3056, "step": 991 }, { "epoch": 0.017243477202802063, "grad_norm": 1.9992597317539142, "learning_rate": 5.741599073001158e-07, "loss": 0.288, "step": 992 }, { "epoch": 0.01726085974030489, "grad_norm": 5.084889290235409, "learning_rate": 5.74739281575898e-07, "loss": 0.363, "step": 993 }, { "epoch": 0.017278242277807714, "grad_norm": 1.762223447952449, "learning_rate": 5.753186558516801e-07, "loss": 0.1805, "step": 994 }, { "epoch": 0.01729562481531054, "grad_norm": 1.7564240776439297, "learning_rate": 5.758980301274623e-07, "loss": 0.2908, "step": 995 }, { "epoch": 0.017313007352813364, "grad_norm": 2.3729012436693604, "learning_rate": 5.764774044032444e-07, "loss": 0.5941, "step": 996 }, { "epoch": 0.017330389890316187, "grad_norm": 2.2220820276886455, "learning_rate": 5.770567786790266e-07, "loss": 0.2794, "step": 997 }, { "epoch": 0.017347772427819014, "grad_norm": 3.0101050206820803, "learning_rate": 5.776361529548088e-07, "loss": 0.6614, "step": 998 }, { "epoch": 0.017365154965321837, "grad_norm": 1.8434321974941348, "learning_rate": 5.78215527230591e-07, "loss": 0.3145, "step": 999 }, { "epoch": 0.017382537502824664, "grad_norm": 2.4722990835304826, "learning_rate": 5.787949015063731e-07, "loss": 0.346, "step": 1000 }, { "epoch": 0.017399920040327487, "grad_norm": 3.9249926078966313, "learning_rate": 5.793742757821553e-07, "loss": 0.52, "step": 1001 }, { "epoch": 0.01741730257783031, "grad_norm": 3.023031445405531, "learning_rate": 5.799536500579374e-07, "loss": 0.6534, "step": 1002 }, { "epoch": 0.017434685115333137, "grad_norm": 1.3007031853035977, "learning_rate": 5.805330243337196e-07, "loss": 0.6689, "step": 1003 }, { "epoch": 0.01745206765283596, "grad_norm": 1.7891826980131633, "learning_rate": 5.811123986095016e-07, "loss": 0.4006, "step": 1004 }, { "epoch": 0.017469450190338787, "grad_norm": 1.7922660461226265, "learning_rate": 5.816917728852838e-07, "loss": 0.5156, "step": 1005 }, { "epoch": 0.01748683272784161, "grad_norm": 2.6830117414890906, "learning_rate": 5.82271147161066e-07, "loss": 0.6588, "step": 1006 }, { "epoch": 0.017504215265344434, "grad_norm": 1.8503100151580678, "learning_rate": 5.828505214368482e-07, "loss": 0.472, "step": 1007 }, { "epoch": 0.01752159780284726, "grad_norm": 1.7298693003272692, "learning_rate": 5.834298957126303e-07, "loss": 0.3873, "step": 1008 }, { "epoch": 0.017538980340350084, "grad_norm": 3.341086548900157, "learning_rate": 5.840092699884125e-07, "loss": 0.444, "step": 1009 }, { "epoch": 0.017556362877852907, "grad_norm": 1.4913505751690628, "learning_rate": 5.845886442641946e-07, "loss": 0.3712, "step": 1010 }, { "epoch": 0.017573745415355734, "grad_norm": 1.7461670768756843, "learning_rate": 5.851680185399768e-07, "loss": 0.6039, "step": 1011 }, { "epoch": 0.017591127952858557, "grad_norm": 1.7372760225657964, "learning_rate": 5.857473928157589e-07, "loss": 0.4866, "step": 1012 }, { "epoch": 0.017608510490361384, "grad_norm": 17.570172700123766, "learning_rate": 5.863267670915412e-07, "loss": 0.869, "step": 1013 }, { "epoch": 0.017625893027864208, "grad_norm": 1.6106406273181413, "learning_rate": 5.869061413673233e-07, "loss": 0.3974, "step": 1014 }, { "epoch": 0.01764327556536703, "grad_norm": 1.9422222129326703, "learning_rate": 5.874855156431055e-07, "loss": 0.3653, "step": 1015 }, { "epoch": 0.017660658102869858, "grad_norm": 1.5423882962856301, "learning_rate": 5.880648899188875e-07, "loss": 0.3546, "step": 1016 }, { "epoch": 0.01767804064037268, "grad_norm": 2.86248866200797, "learning_rate": 5.886442641946697e-07, "loss": 0.4199, "step": 1017 }, { "epoch": 0.017695423177875508, "grad_norm": 1.686223771879047, "learning_rate": 5.892236384704518e-07, "loss": 0.2416, "step": 1018 }, { "epoch": 0.01771280571537833, "grad_norm": 2.5429580963608647, "learning_rate": 5.89803012746234e-07, "loss": 0.5186, "step": 1019 }, { "epoch": 0.017730188252881154, "grad_norm": 2.146389920176501, "learning_rate": 5.903823870220161e-07, "loss": 0.6239, "step": 1020 }, { "epoch": 0.01774757079038398, "grad_norm": 3.4217979047323883, "learning_rate": 5.909617612977984e-07, "loss": 0.581, "step": 1021 }, { "epoch": 0.017764953327886805, "grad_norm": 1.9102120014154917, "learning_rate": 5.915411355735805e-07, "loss": 0.7769, "step": 1022 }, { "epoch": 0.01778233586538963, "grad_norm": 2.7523615114361246, "learning_rate": 5.921205098493627e-07, "loss": 0.7016, "step": 1023 }, { "epoch": 0.017799718402892455, "grad_norm": 2.1814885024144615, "learning_rate": 5.926998841251448e-07, "loss": 0.4458, "step": 1024 }, { "epoch": 0.017817100940395278, "grad_norm": 2.962466839849716, "learning_rate": 5.93279258400927e-07, "loss": 0.3448, "step": 1025 }, { "epoch": 0.017834483477898105, "grad_norm": 1.9479340120789368, "learning_rate": 5.938586326767091e-07, "loss": 0.5695, "step": 1026 }, { "epoch": 0.017851866015400928, "grad_norm": 1.7238679848314213, "learning_rate": 5.944380069524913e-07, "loss": 0.4219, "step": 1027 }, { "epoch": 0.01786924855290375, "grad_norm": 2.478714691816811, "learning_rate": 5.950173812282733e-07, "loss": 0.4617, "step": 1028 }, { "epoch": 0.017886631090406578, "grad_norm": 2.3074638381399146, "learning_rate": 5.955967555040556e-07, "loss": 0.2388, "step": 1029 }, { "epoch": 0.0179040136279094, "grad_norm": 2.482009580055694, "learning_rate": 5.961761297798377e-07, "loss": 0.3034, "step": 1030 }, { "epoch": 0.017921396165412228, "grad_norm": 3.3549500616270564, "learning_rate": 5.967555040556199e-07, "loss": 0.4169, "step": 1031 }, { "epoch": 0.01793877870291505, "grad_norm": 2.190385612242866, "learning_rate": 5.97334878331402e-07, "loss": 0.3937, "step": 1032 }, { "epoch": 0.017956161240417875, "grad_norm": 2.3217709458314677, "learning_rate": 5.979142526071842e-07, "loss": 0.4031, "step": 1033 }, { "epoch": 0.0179735437779207, "grad_norm": 2.9124287493668457, "learning_rate": 5.984936268829663e-07, "loss": 0.3445, "step": 1034 }, { "epoch": 0.017990926315423525, "grad_norm": 3.4907312950113347, "learning_rate": 5.990730011587485e-07, "loss": 0.5402, "step": 1035 }, { "epoch": 0.01800830885292635, "grad_norm": 3.72638847547406, "learning_rate": 5.996523754345307e-07, "loss": 0.5091, "step": 1036 }, { "epoch": 0.018025691390429175, "grad_norm": 2.12342268831926, "learning_rate": 6.002317497103129e-07, "loss": 0.2988, "step": 1037 }, { "epoch": 0.018043073927932, "grad_norm": 1.5192172571494431, "learning_rate": 6.00811123986095e-07, "loss": 0.4855, "step": 1038 }, { "epoch": 0.018060456465434825, "grad_norm": 2.1703103480972117, "learning_rate": 6.013904982618772e-07, "loss": 0.4968, "step": 1039 }, { "epoch": 0.01807783900293765, "grad_norm": 1.8555238566057939, "learning_rate": 6.019698725376593e-07, "loss": 0.5536, "step": 1040 }, { "epoch": 0.018095221540440475, "grad_norm": 2.6805857578614263, "learning_rate": 6.025492468134414e-07, "loss": 0.3068, "step": 1041 }, { "epoch": 0.0181126040779433, "grad_norm": 1.2709350980038776, "learning_rate": 6.031286210892235e-07, "loss": 0.4779, "step": 1042 }, { "epoch": 0.018129986615446122, "grad_norm": 2.3722842157529493, "learning_rate": 6.037079953650057e-07, "loss": 0.3483, "step": 1043 }, { "epoch": 0.01814736915294895, "grad_norm": 1.9903043381430994, "learning_rate": 6.04287369640788e-07, "loss": 0.4146, "step": 1044 }, { "epoch": 0.018164751690451772, "grad_norm": 2.4256344427358334, "learning_rate": 6.048667439165701e-07, "loss": 0.3576, "step": 1045 }, { "epoch": 0.018182134227954595, "grad_norm": 1.5353032796902313, "learning_rate": 6.054461181923522e-07, "loss": 0.4717, "step": 1046 }, { "epoch": 0.018199516765457422, "grad_norm": 2.6120034468707756, "learning_rate": 6.060254924681344e-07, "loss": 0.3207, "step": 1047 }, { "epoch": 0.018216899302960245, "grad_norm": 2.7635206514264596, "learning_rate": 6.066048667439165e-07, "loss": 0.37, "step": 1048 }, { "epoch": 0.018234281840463072, "grad_norm": 1.9415259766121091, "learning_rate": 6.071842410196987e-07, "loss": 0.3282, "step": 1049 }, { "epoch": 0.018251664377965895, "grad_norm": 1.9358076019430133, "learning_rate": 6.077636152954808e-07, "loss": 0.4523, "step": 1050 }, { "epoch": 0.01826904691546872, "grad_norm": 2.0736759017683255, "learning_rate": 6.08342989571263e-07, "loss": 0.37, "step": 1051 }, { "epoch": 0.018286429452971546, "grad_norm": 1.5809497765920282, "learning_rate": 6.089223638470453e-07, "loss": 0.2823, "step": 1052 }, { "epoch": 0.01830381199047437, "grad_norm": 2.3329809235310353, "learning_rate": 6.095017381228273e-07, "loss": 0.2916, "step": 1053 }, { "epoch": 0.018321194527977196, "grad_norm": 1.7949834772259399, "learning_rate": 6.100811123986094e-07, "loss": 0.6505, "step": 1054 }, { "epoch": 0.01833857706548002, "grad_norm": 0.9339759414432154, "learning_rate": 6.106604866743916e-07, "loss": 0.3848, "step": 1055 }, { "epoch": 0.018355959602982842, "grad_norm": 2.7375338884264977, "learning_rate": 6.112398609501737e-07, "loss": 0.4917, "step": 1056 }, { "epoch": 0.01837334214048567, "grad_norm": 1.281455194511272, "learning_rate": 6.118192352259559e-07, "loss": 0.2824, "step": 1057 }, { "epoch": 0.018390724677988492, "grad_norm": 1.7939900809333056, "learning_rate": 6.12398609501738e-07, "loss": 0.2938, "step": 1058 }, { "epoch": 0.018408107215491316, "grad_norm": 2.146961556801002, "learning_rate": 6.129779837775203e-07, "loss": 0.3458, "step": 1059 }, { "epoch": 0.018425489752994142, "grad_norm": 2.0741074888332873, "learning_rate": 6.135573580533025e-07, "loss": 0.2734, "step": 1060 }, { "epoch": 0.018442872290496966, "grad_norm": 2.0086961280504023, "learning_rate": 6.141367323290846e-07, "loss": 0.409, "step": 1061 }, { "epoch": 0.018460254827999793, "grad_norm": 2.0907725042655168, "learning_rate": 6.147161066048668e-07, "loss": 0.47, "step": 1062 }, { "epoch": 0.018477637365502616, "grad_norm": 2.05883184532039, "learning_rate": 6.152954808806489e-07, "loss": 0.3314, "step": 1063 }, { "epoch": 0.01849501990300544, "grad_norm": 2.3257997888005786, "learning_rate": 6.158748551564311e-07, "loss": 0.2863, "step": 1064 }, { "epoch": 0.018512402440508266, "grad_norm": 2.1643518621060087, "learning_rate": 6.164542294322131e-07, "loss": 0.3226, "step": 1065 }, { "epoch": 0.01852978497801109, "grad_norm": 1.886261318100986, "learning_rate": 6.170336037079952e-07, "loss": 0.7512, "step": 1066 }, { "epoch": 0.018547167515513916, "grad_norm": 3.9321365278087654, "learning_rate": 6.176129779837775e-07, "loss": 0.4412, "step": 1067 }, { "epoch": 0.01856455005301674, "grad_norm": 2.0665976833174664, "learning_rate": 6.181923522595597e-07, "loss": 0.4952, "step": 1068 }, { "epoch": 0.018581932590519563, "grad_norm": 2.505789922616997, "learning_rate": 6.187717265353418e-07, "loss": 0.4131, "step": 1069 }, { "epoch": 0.01859931512802239, "grad_norm": 2.9156276790525433, "learning_rate": 6.19351100811124e-07, "loss": 0.6191, "step": 1070 }, { "epoch": 0.018616697665525213, "grad_norm": 2.7692125429804695, "learning_rate": 6.199304750869061e-07, "loss": 0.4385, "step": 1071 }, { "epoch": 0.01863408020302804, "grad_norm": 2.763983470908067, "learning_rate": 6.205098493626883e-07, "loss": 0.4473, "step": 1072 }, { "epoch": 0.018651462740530863, "grad_norm": 3.597388916771293, "learning_rate": 6.210892236384704e-07, "loss": 0.5817, "step": 1073 }, { "epoch": 0.018668845278033686, "grad_norm": 2.0986796687939853, "learning_rate": 6.216685979142526e-07, "loss": 0.5653, "step": 1074 }, { "epoch": 0.018686227815536513, "grad_norm": 1.8803112798258115, "learning_rate": 6.222479721900348e-07, "loss": 0.2918, "step": 1075 }, { "epoch": 0.018703610353039336, "grad_norm": 2.7329567893614213, "learning_rate": 6.22827346465817e-07, "loss": 0.3433, "step": 1076 }, { "epoch": 0.01872099289054216, "grad_norm": 2.2419300494400427, "learning_rate": 6.23406720741599e-07, "loss": 0.3199, "step": 1077 }, { "epoch": 0.018738375428044986, "grad_norm": 1.8332669649424251, "learning_rate": 6.239860950173812e-07, "loss": 0.2268, "step": 1078 }, { "epoch": 0.01875575796554781, "grad_norm": 2.125679951582925, "learning_rate": 6.245654692931633e-07, "loss": 0.3717, "step": 1079 }, { "epoch": 0.018773140503050637, "grad_norm": 1.6157728736452328, "learning_rate": 6.251448435689455e-07, "loss": 0.3536, "step": 1080 }, { "epoch": 0.01879052304055346, "grad_norm": 1.8271029156970082, "learning_rate": 6.257242178447276e-07, "loss": 0.3846, "step": 1081 }, { "epoch": 0.018807905578056283, "grad_norm": 1.7335360926046783, "learning_rate": 6.263035921205099e-07, "loss": 0.291, "step": 1082 }, { "epoch": 0.01882528811555911, "grad_norm": 1.5308068933332395, "learning_rate": 6.26882966396292e-07, "loss": 0.2272, "step": 1083 }, { "epoch": 0.018842670653061933, "grad_norm": 2.991932876477385, "learning_rate": 6.274623406720742e-07, "loss": 0.6004, "step": 1084 }, { "epoch": 0.01886005319056476, "grad_norm": 1.7799656987046142, "learning_rate": 6.280417149478563e-07, "loss": 0.4416, "step": 1085 }, { "epoch": 0.018877435728067583, "grad_norm": 1.79104480554293, "learning_rate": 6.286210892236385e-07, "loss": 0.2897, "step": 1086 }, { "epoch": 0.018894818265570407, "grad_norm": 2.5827238430862907, "learning_rate": 6.292004634994206e-07, "loss": 0.3746, "step": 1087 }, { "epoch": 0.018912200803073233, "grad_norm": 2.205602964521925, "learning_rate": 6.297798377752028e-07, "loss": 0.4473, "step": 1088 }, { "epoch": 0.018929583340576057, "grad_norm": 3.13482920770929, "learning_rate": 6.303592120509848e-07, "loss": 0.3656, "step": 1089 }, { "epoch": 0.018946965878078884, "grad_norm": 2.9044401834159634, "learning_rate": 6.309385863267671e-07, "loss": 0.6775, "step": 1090 }, { "epoch": 0.018964348415581707, "grad_norm": 1.5223773215463996, "learning_rate": 6.315179606025492e-07, "loss": 0.2394, "step": 1091 }, { "epoch": 0.01898173095308453, "grad_norm": 1.9323027989461108, "learning_rate": 6.320973348783314e-07, "loss": 0.2689, "step": 1092 }, { "epoch": 0.018999113490587357, "grad_norm": 2.3691170700057524, "learning_rate": 6.326767091541135e-07, "loss": 0.7947, "step": 1093 }, { "epoch": 0.01901649602809018, "grad_norm": 3.9435979489657194, "learning_rate": 6.332560834298957e-07, "loss": 0.2738, "step": 1094 }, { "epoch": 0.019033878565593004, "grad_norm": 2.324505096557037, "learning_rate": 6.338354577056778e-07, "loss": 0.5297, "step": 1095 }, { "epoch": 0.01905126110309583, "grad_norm": 2.082676942095259, "learning_rate": 6.3441483198146e-07, "loss": 0.3111, "step": 1096 }, { "epoch": 0.019068643640598654, "grad_norm": 1.9503817389563047, "learning_rate": 6.349942062572422e-07, "loss": 0.3934, "step": 1097 }, { "epoch": 0.01908602617810148, "grad_norm": 1.8728932555998694, "learning_rate": 6.355735805330244e-07, "loss": 0.4081, "step": 1098 }, { "epoch": 0.019103408715604304, "grad_norm": 2.5927341490916325, "learning_rate": 6.361529548088065e-07, "loss": 0.35, "step": 1099 }, { "epoch": 0.019120791253107127, "grad_norm": 2.190745615702677, "learning_rate": 6.367323290845887e-07, "loss": 0.2796, "step": 1100 }, { "epoch": 0.019138173790609954, "grad_norm": 1.8594086163126786, "learning_rate": 6.373117033603708e-07, "loss": 0.4427, "step": 1101 }, { "epoch": 0.019155556328112777, "grad_norm": 6.610145432739078, "learning_rate": 6.378910776361529e-07, "loss": 0.4676, "step": 1102 }, { "epoch": 0.019172938865615604, "grad_norm": 1.7198966822834376, "learning_rate": 6.38470451911935e-07, "loss": 0.3158, "step": 1103 }, { "epoch": 0.019190321403118427, "grad_norm": 2.3630171486513456, "learning_rate": 6.390498261877172e-07, "loss": 0.4531, "step": 1104 }, { "epoch": 0.01920770394062125, "grad_norm": 3.9130319183930604, "learning_rate": 6.396292004634994e-07, "loss": 0.5219, "step": 1105 }, { "epoch": 0.019225086478124077, "grad_norm": 3.1645558086725387, "learning_rate": 6.402085747392816e-07, "loss": 0.3256, "step": 1106 }, { "epoch": 0.0192424690156269, "grad_norm": 3.874399416186199, "learning_rate": 6.407879490150637e-07, "loss": 0.4255, "step": 1107 }, { "epoch": 0.019259851553129727, "grad_norm": 2.438669113487504, "learning_rate": 6.413673232908459e-07, "loss": 0.3994, "step": 1108 }, { "epoch": 0.01927723409063255, "grad_norm": 2.063225373272233, "learning_rate": 6.41946697566628e-07, "loss": 0.4662, "step": 1109 }, { "epoch": 0.019294616628135374, "grad_norm": 2.6087762271964503, "learning_rate": 6.425260718424102e-07, "loss": 0.3829, "step": 1110 }, { "epoch": 0.0193119991656382, "grad_norm": 2.1301261909525904, "learning_rate": 6.431054461181923e-07, "loss": 0.436, "step": 1111 }, { "epoch": 0.019329381703141024, "grad_norm": 1.6486893214648681, "learning_rate": 6.436848203939745e-07, "loss": 0.3984, "step": 1112 }, { "epoch": 0.019346764240643848, "grad_norm": 2.7647604340037373, "learning_rate": 6.442641946697567e-07, "loss": 0.2653, "step": 1113 }, { "epoch": 0.019364146778146674, "grad_norm": 2.008535648452746, "learning_rate": 6.448435689455388e-07, "loss": 0.2347, "step": 1114 }, { "epoch": 0.019381529315649498, "grad_norm": 2.8980500747231135, "learning_rate": 6.454229432213209e-07, "loss": 0.3918, "step": 1115 }, { "epoch": 0.019398911853152324, "grad_norm": 2.825523382141685, "learning_rate": 6.460023174971031e-07, "loss": 0.5878, "step": 1116 }, { "epoch": 0.019416294390655148, "grad_norm": 3.1514787077687916, "learning_rate": 6.465816917728852e-07, "loss": 0.4027, "step": 1117 }, { "epoch": 0.01943367692815797, "grad_norm": 2.3498169219520686, "learning_rate": 6.471610660486674e-07, "loss": 0.6745, "step": 1118 }, { "epoch": 0.019451059465660798, "grad_norm": 2.561336666543277, "learning_rate": 6.477404403244495e-07, "loss": 0.3983, "step": 1119 }, { "epoch": 0.01946844200316362, "grad_norm": 2.448251934461232, "learning_rate": 6.483198146002318e-07, "loss": 0.2919, "step": 1120 }, { "epoch": 0.019485824540666448, "grad_norm": 1.2580445591880576, "learning_rate": 6.488991888760139e-07, "loss": 0.3345, "step": 1121 }, { "epoch": 0.01950320707816927, "grad_norm": 2.380621076792366, "learning_rate": 6.494785631517961e-07, "loss": 0.2547, "step": 1122 }, { "epoch": 0.019520589615672095, "grad_norm": 2.832093888908675, "learning_rate": 6.500579374275782e-07, "loss": 0.6485, "step": 1123 }, { "epoch": 0.01953797215317492, "grad_norm": 2.8370448136391375, "learning_rate": 6.506373117033604e-07, "loss": 0.3709, "step": 1124 }, { "epoch": 0.019555354690677745, "grad_norm": 2.5550713186172813, "learning_rate": 6.512166859791425e-07, "loss": 0.3054, "step": 1125 }, { "epoch": 0.01957273722818057, "grad_norm": 3.422839159820629, "learning_rate": 6.517960602549246e-07, "loss": 0.4434, "step": 1126 }, { "epoch": 0.019590119765683395, "grad_norm": 1.5113695144754362, "learning_rate": 6.523754345307067e-07, "loss": 0.3651, "step": 1127 }, { "epoch": 0.019607502303186218, "grad_norm": 3.8960335637489503, "learning_rate": 6.52954808806489e-07, "loss": 0.3274, "step": 1128 }, { "epoch": 0.019624884840689045, "grad_norm": 1.9240828867380664, "learning_rate": 6.535341830822711e-07, "loss": 0.3766, "step": 1129 }, { "epoch": 0.019642267378191868, "grad_norm": 2.402362835699076, "learning_rate": 6.541135573580533e-07, "loss": 0.3408, "step": 1130 }, { "epoch": 0.01965964991569469, "grad_norm": 1.9660939743053845, "learning_rate": 6.546929316338354e-07, "loss": 0.5451, "step": 1131 }, { "epoch": 0.019677032453197518, "grad_norm": 2.340359072375767, "learning_rate": 6.552723059096176e-07, "loss": 0.3887, "step": 1132 }, { "epoch": 0.01969441499070034, "grad_norm": 1.410673684640205, "learning_rate": 6.558516801853997e-07, "loss": 0.221, "step": 1133 }, { "epoch": 0.01971179752820317, "grad_norm": 2.3624856332315565, "learning_rate": 6.564310544611819e-07, "loss": 0.3515, "step": 1134 }, { "epoch": 0.01972918006570599, "grad_norm": 1.8580628307201998, "learning_rate": 6.57010428736964e-07, "loss": 0.472, "step": 1135 }, { "epoch": 0.019746562603208815, "grad_norm": 3.264936912494491, "learning_rate": 6.575898030127463e-07, "loss": 0.3291, "step": 1136 }, { "epoch": 0.019763945140711642, "grad_norm": 2.7594848461594164, "learning_rate": 6.581691772885284e-07, "loss": 0.2983, "step": 1137 }, { "epoch": 0.019781327678214465, "grad_norm": 1.953486267827354, "learning_rate": 6.587485515643105e-07, "loss": 0.303, "step": 1138 }, { "epoch": 0.019798710215717292, "grad_norm": 2.469053537395064, "learning_rate": 6.593279258400926e-07, "loss": 0.6412, "step": 1139 }, { "epoch": 0.019816092753220115, "grad_norm": 1.8851846559344265, "learning_rate": 6.599073001158748e-07, "loss": 0.4143, "step": 1140 }, { "epoch": 0.01983347529072294, "grad_norm": 1.7363002964569643, "learning_rate": 6.604866743916569e-07, "loss": 0.3652, "step": 1141 }, { "epoch": 0.019850857828225765, "grad_norm": 2.914595165088835, "learning_rate": 6.610660486674391e-07, "loss": 0.4805, "step": 1142 }, { "epoch": 0.01986824036572859, "grad_norm": 2.3255495217785414, "learning_rate": 6.616454229432213e-07, "loss": 0.5961, "step": 1143 }, { "epoch": 0.019885622903231415, "grad_norm": 1.9119464525920578, "learning_rate": 6.622247972190035e-07, "loss": 0.3129, "step": 1144 }, { "epoch": 0.01990300544073424, "grad_norm": 1.4425115912569555, "learning_rate": 6.628041714947856e-07, "loss": 0.4037, "step": 1145 }, { "epoch": 0.019920387978237062, "grad_norm": 2.4243791906582453, "learning_rate": 6.633835457705678e-07, "loss": 0.3492, "step": 1146 }, { "epoch": 0.01993777051573989, "grad_norm": 1.935125928508007, "learning_rate": 6.639629200463499e-07, "loss": 0.2197, "step": 1147 }, { "epoch": 0.019955153053242712, "grad_norm": 4.50741197419428, "learning_rate": 6.645422943221321e-07, "loss": 0.4681, "step": 1148 }, { "epoch": 0.019972535590745535, "grad_norm": 2.6065773043236016, "learning_rate": 6.651216685979142e-07, "loss": 0.457, "step": 1149 }, { "epoch": 0.019989918128248362, "grad_norm": 2.272194290875452, "learning_rate": 6.657010428736963e-07, "loss": 0.4655, "step": 1150 }, { "epoch": 0.020007300665751186, "grad_norm": 2.574366480706679, "learning_rate": 6.662804171494785e-07, "loss": 0.5078, "step": 1151 }, { "epoch": 0.020024683203254012, "grad_norm": 3.0347892271037415, "learning_rate": 6.668597914252607e-07, "loss": 0.2153, "step": 1152 }, { "epoch": 0.020042065740756836, "grad_norm": 3.184597609688846, "learning_rate": 6.674391657010428e-07, "loss": 0.4031, "step": 1153 }, { "epoch": 0.02005944827825966, "grad_norm": 3.0155480045478504, "learning_rate": 6.68018539976825e-07, "loss": 0.4666, "step": 1154 }, { "epoch": 0.020076830815762486, "grad_norm": 1.5091108965384998, "learning_rate": 6.685979142526071e-07, "loss": 0.3065, "step": 1155 }, { "epoch": 0.02009421335326531, "grad_norm": 1.7593194564498884, "learning_rate": 6.691772885283893e-07, "loss": 0.2939, "step": 1156 }, { "epoch": 0.020111595890768136, "grad_norm": 1.8385253454841317, "learning_rate": 6.697566628041714e-07, "loss": 0.329, "step": 1157 }, { "epoch": 0.02012897842827096, "grad_norm": 1.6949245557115717, "learning_rate": 6.703360370799536e-07, "loss": 0.5702, "step": 1158 }, { "epoch": 0.020146360965773782, "grad_norm": 1.7641021390764324, "learning_rate": 6.709154113557358e-07, "loss": 0.3275, "step": 1159 }, { "epoch": 0.02016374350327661, "grad_norm": 3.5864886597998256, "learning_rate": 6.71494785631518e-07, "loss": 0.3616, "step": 1160 }, { "epoch": 0.020181126040779433, "grad_norm": 1.4807813769319336, "learning_rate": 6.720741599073001e-07, "loss": 0.4477, "step": 1161 }, { "epoch": 0.02019850857828226, "grad_norm": 1.9283815675103946, "learning_rate": 6.726535341830822e-07, "loss": 0.2788, "step": 1162 }, { "epoch": 0.020215891115785083, "grad_norm": 2.293895045856538, "learning_rate": 6.732329084588643e-07, "loss": 0.4098, "step": 1163 }, { "epoch": 0.020233273653287906, "grad_norm": 1.3370697500678685, "learning_rate": 6.738122827346465e-07, "loss": 0.3745, "step": 1164 }, { "epoch": 0.020250656190790733, "grad_norm": 2.138379726371246, "learning_rate": 6.743916570104286e-07, "loss": 0.6429, "step": 1165 }, { "epoch": 0.020268038728293556, "grad_norm": 4.36401322268089, "learning_rate": 6.749710312862109e-07, "loss": 0.3478, "step": 1166 }, { "epoch": 0.02028542126579638, "grad_norm": 1.6391372810487248, "learning_rate": 6.75550405561993e-07, "loss": 0.573, "step": 1167 }, { "epoch": 0.020302803803299206, "grad_norm": 2.8089223300626873, "learning_rate": 6.761297798377752e-07, "loss": 0.6074, "step": 1168 }, { "epoch": 0.02032018634080203, "grad_norm": 2.7554873522328975, "learning_rate": 6.767091541135573e-07, "loss": 0.6486, "step": 1169 }, { "epoch": 0.020337568878304856, "grad_norm": 1.7421994472879248, "learning_rate": 6.772885283893395e-07, "loss": 0.2092, "step": 1170 }, { "epoch": 0.02035495141580768, "grad_norm": 3.393140738737371, "learning_rate": 6.778679026651216e-07, "loss": 0.6272, "step": 1171 }, { "epoch": 0.020372333953310503, "grad_norm": 1.9268103785396122, "learning_rate": 6.784472769409038e-07, "loss": 0.4707, "step": 1172 }, { "epoch": 0.02038971649081333, "grad_norm": 1.6866057043632636, "learning_rate": 6.790266512166859e-07, "loss": 0.1779, "step": 1173 }, { "epoch": 0.020407099028316153, "grad_norm": 2.3434055794390316, "learning_rate": 6.796060254924682e-07, "loss": 0.3948, "step": 1174 }, { "epoch": 0.02042448156581898, "grad_norm": 1.7861787718290387, "learning_rate": 6.801853997682502e-07, "loss": 0.2396, "step": 1175 }, { "epoch": 0.020441864103321803, "grad_norm": 2.211704301770589, "learning_rate": 6.807647740440324e-07, "loss": 0.6048, "step": 1176 }, { "epoch": 0.020459246640824626, "grad_norm": 2.2708364010576947, "learning_rate": 6.813441483198145e-07, "loss": 0.3332, "step": 1177 }, { "epoch": 0.020476629178327453, "grad_norm": 2.79019537700081, "learning_rate": 6.819235225955967e-07, "loss": 0.4138, "step": 1178 }, { "epoch": 0.020494011715830276, "grad_norm": 2.465248082045729, "learning_rate": 6.825028968713788e-07, "loss": 0.5097, "step": 1179 }, { "epoch": 0.020511394253333103, "grad_norm": 2.4095028260397195, "learning_rate": 6.83082271147161e-07, "loss": 0.2958, "step": 1180 }, { "epoch": 0.020528776790835927, "grad_norm": 2.326493765347917, "learning_rate": 6.836616454229433e-07, "loss": 0.6172, "step": 1181 }, { "epoch": 0.02054615932833875, "grad_norm": 1.8372619595833866, "learning_rate": 6.842410196987254e-07, "loss": 0.2347, "step": 1182 }, { "epoch": 0.020563541865841577, "grad_norm": 2.8878769305655387, "learning_rate": 6.848203939745076e-07, "loss": 0.3319, "step": 1183 }, { "epoch": 0.0205809244033444, "grad_norm": 1.9015231199878297, "learning_rate": 6.853997682502897e-07, "loss": 0.4029, "step": 1184 }, { "epoch": 0.020598306940847223, "grad_norm": 2.2612767316958564, "learning_rate": 6.859791425260719e-07, "loss": 0.3573, "step": 1185 }, { "epoch": 0.02061568947835005, "grad_norm": 3.5617355981142182, "learning_rate": 6.86558516801854e-07, "loss": 0.468, "step": 1186 }, { "epoch": 0.020633072015852873, "grad_norm": 2.1518716755745215, "learning_rate": 6.87137891077636e-07, "loss": 0.5096, "step": 1187 }, { "epoch": 0.0206504545533557, "grad_norm": 1.8396765373981108, "learning_rate": 6.877172653534182e-07, "loss": 0.335, "step": 1188 }, { "epoch": 0.020667837090858523, "grad_norm": 2.402297488203516, "learning_rate": 6.882966396292005e-07, "loss": 0.4191, "step": 1189 }, { "epoch": 0.020685219628361347, "grad_norm": 3.1137948516217695, "learning_rate": 6.888760139049826e-07, "loss": 0.3911, "step": 1190 }, { "epoch": 0.020702602165864174, "grad_norm": 2.064316488547539, "learning_rate": 6.894553881807648e-07, "loss": 0.3021, "step": 1191 }, { "epoch": 0.020719984703366997, "grad_norm": 2.1658618390959106, "learning_rate": 6.900347624565469e-07, "loss": 0.5581, "step": 1192 }, { "epoch": 0.020737367240869824, "grad_norm": 2.1435788074911803, "learning_rate": 6.90614136732329e-07, "loss": 0.2345, "step": 1193 }, { "epoch": 0.020754749778372647, "grad_norm": 1.5449592823319376, "learning_rate": 6.911935110081112e-07, "loss": 0.3085, "step": 1194 }, { "epoch": 0.02077213231587547, "grad_norm": 2.3069729939900796, "learning_rate": 6.917728852838934e-07, "loss": 0.4957, "step": 1195 }, { "epoch": 0.020789514853378297, "grad_norm": 2.1735309956278344, "learning_rate": 6.923522595596755e-07, "loss": 0.4875, "step": 1196 }, { "epoch": 0.02080689739088112, "grad_norm": 3.142620201275958, "learning_rate": 6.929316338354578e-07, "loss": 0.4835, "step": 1197 }, { "epoch": 0.020824279928383947, "grad_norm": 2.56001609547622, "learning_rate": 6.935110081112399e-07, "loss": 0.2851, "step": 1198 }, { "epoch": 0.02084166246588677, "grad_norm": 3.032060937558399, "learning_rate": 6.94090382387022e-07, "loss": 0.2815, "step": 1199 }, { "epoch": 0.020859045003389594, "grad_norm": 2.8003943197622756, "learning_rate": 6.946697566628041e-07, "loss": 0.29, "step": 1200 }, { "epoch": 0.02087642754089242, "grad_norm": 2.3346011670034637, "learning_rate": 6.952491309385863e-07, "loss": 0.2769, "step": 1201 }, { "epoch": 0.020893810078395244, "grad_norm": 2.939619182456507, "learning_rate": 6.958285052143684e-07, "loss": 0.4207, "step": 1202 }, { "epoch": 0.020911192615898067, "grad_norm": 1.9277824555827288, "learning_rate": 6.964078794901506e-07, "loss": 0.4847, "step": 1203 }, { "epoch": 0.020928575153400894, "grad_norm": 4.5591045514905995, "learning_rate": 6.969872537659328e-07, "loss": 0.5671, "step": 1204 }, { "epoch": 0.020945957690903717, "grad_norm": 1.4314514287062743, "learning_rate": 6.97566628041715e-07, "loss": 0.2349, "step": 1205 }, { "epoch": 0.020963340228406544, "grad_norm": 2.4717118780431027, "learning_rate": 6.981460023174971e-07, "loss": 0.4573, "step": 1206 }, { "epoch": 0.020980722765909367, "grad_norm": 2.0199176975537645, "learning_rate": 6.987253765932793e-07, "loss": 0.5342, "step": 1207 }, { "epoch": 0.02099810530341219, "grad_norm": 1.0742557040607725, "learning_rate": 6.993047508690614e-07, "loss": 0.2576, "step": 1208 }, { "epoch": 0.021015487840915018, "grad_norm": 2.6465238867360146, "learning_rate": 6.998841251448436e-07, "loss": 0.3464, "step": 1209 }, { "epoch": 0.02103287037841784, "grad_norm": 2.1035539006303603, "learning_rate": 7.004634994206257e-07, "loss": 0.3713, "step": 1210 }, { "epoch": 0.021050252915920668, "grad_norm": 2.028348374141373, "learning_rate": 7.010428736964078e-07, "loss": 0.4792, "step": 1211 }, { "epoch": 0.02106763545342349, "grad_norm": 2.9095380724732256, "learning_rate": 7.0162224797219e-07, "loss": 0.4378, "step": 1212 }, { "epoch": 0.021085017990926314, "grad_norm": 2.2536508910773434, "learning_rate": 7.022016222479722e-07, "loss": 0.6379, "step": 1213 }, { "epoch": 0.02110240052842914, "grad_norm": 4.340422272054014, "learning_rate": 7.027809965237543e-07, "loss": 0.5461, "step": 1214 }, { "epoch": 0.021119783065931964, "grad_norm": 2.1327312992116747, "learning_rate": 7.033603707995365e-07, "loss": 0.3668, "step": 1215 }, { "epoch": 0.02113716560343479, "grad_norm": 3.2026438346353205, "learning_rate": 7.039397450753186e-07, "loss": 0.572, "step": 1216 }, { "epoch": 0.021154548140937614, "grad_norm": 2.717590199980487, "learning_rate": 7.045191193511008e-07, "loss": 0.3732, "step": 1217 }, { "epoch": 0.021171930678440438, "grad_norm": 1.380761630556546, "learning_rate": 7.050984936268829e-07, "loss": 0.3979, "step": 1218 }, { "epoch": 0.021189313215943265, "grad_norm": 2.693829701757554, "learning_rate": 7.056778679026651e-07, "loss": 0.276, "step": 1219 }, { "epoch": 0.021206695753446088, "grad_norm": 2.5755092899478558, "learning_rate": 7.062572421784473e-07, "loss": 0.4929, "step": 1220 }, { "epoch": 0.02122407829094891, "grad_norm": 3.442359458153421, "learning_rate": 7.068366164542295e-07, "loss": 0.2411, "step": 1221 }, { "epoch": 0.021241460828451738, "grad_norm": 1.5612433279418476, "learning_rate": 7.074159907300116e-07, "loss": 0.2547, "step": 1222 }, { "epoch": 0.02125884336595456, "grad_norm": 1.7923112021154297, "learning_rate": 7.079953650057937e-07, "loss": 0.4796, "step": 1223 }, { "epoch": 0.021276225903457388, "grad_norm": 1.6925114897794546, "learning_rate": 7.085747392815758e-07, "loss": 0.6271, "step": 1224 }, { "epoch": 0.02129360844096021, "grad_norm": 2.9267760606877573, "learning_rate": 7.09154113557358e-07, "loss": 0.5842, "step": 1225 }, { "epoch": 0.021310990978463035, "grad_norm": 3.465332950394806, "learning_rate": 7.097334878331401e-07, "loss": 0.4651, "step": 1226 }, { "epoch": 0.02132837351596586, "grad_norm": 3.1383468429821897, "learning_rate": 7.103128621089224e-07, "loss": 0.5259, "step": 1227 }, { "epoch": 0.021345756053468685, "grad_norm": 3.8466121501637085, "learning_rate": 7.108922363847045e-07, "loss": 0.3876, "step": 1228 }, { "epoch": 0.02136313859097151, "grad_norm": 2.3035579534764046, "learning_rate": 7.114716106604867e-07, "loss": 0.527, "step": 1229 }, { "epoch": 0.021380521128474335, "grad_norm": 2.6997324814944075, "learning_rate": 7.120509849362688e-07, "loss": 0.5074, "step": 1230 }, { "epoch": 0.021397903665977158, "grad_norm": 1.4054996649549174, "learning_rate": 7.12630359212051e-07, "loss": 0.3155, "step": 1231 }, { "epoch": 0.021415286203479985, "grad_norm": 2.060153686812449, "learning_rate": 7.132097334878331e-07, "loss": 0.3677, "step": 1232 }, { "epoch": 0.02143266874098281, "grad_norm": 2.1660207726910508, "learning_rate": 7.137891077636153e-07, "loss": 0.4558, "step": 1233 }, { "epoch": 0.02145005127848563, "grad_norm": 2.2191827110628894, "learning_rate": 7.143684820393974e-07, "loss": 0.4459, "step": 1234 }, { "epoch": 0.02146743381598846, "grad_norm": 3.221702396172172, "learning_rate": 7.149478563151796e-07, "loss": 0.5379, "step": 1235 }, { "epoch": 0.02148481635349128, "grad_norm": 1.4142985473906093, "learning_rate": 7.155272305909617e-07, "loss": 0.3684, "step": 1236 }, { "epoch": 0.02150219889099411, "grad_norm": 4.212735040734074, "learning_rate": 7.161066048667439e-07, "loss": 0.4413, "step": 1237 }, { "epoch": 0.021519581428496932, "grad_norm": 1.898316508088055, "learning_rate": 7.16685979142526e-07, "loss": 0.417, "step": 1238 }, { "epoch": 0.021536963965999755, "grad_norm": 2.7783908527663272, "learning_rate": 7.172653534183082e-07, "loss": 0.3219, "step": 1239 }, { "epoch": 0.021554346503502582, "grad_norm": 1.7181849880424547, "learning_rate": 7.178447276940903e-07, "loss": 0.3664, "step": 1240 }, { "epoch": 0.021571729041005405, "grad_norm": 2.208059868650783, "learning_rate": 7.184241019698725e-07, "loss": 0.4652, "step": 1241 }, { "epoch": 0.021589111578508232, "grad_norm": 3.0141026758480627, "learning_rate": 7.190034762456546e-07, "loss": 0.3553, "step": 1242 }, { "epoch": 0.021606494116011055, "grad_norm": 2.2309391115287376, "learning_rate": 7.195828505214369e-07, "loss": 0.4879, "step": 1243 }, { "epoch": 0.02162387665351388, "grad_norm": 2.9409459900274206, "learning_rate": 7.20162224797219e-07, "loss": 0.5917, "step": 1244 }, { "epoch": 0.021641259191016705, "grad_norm": 2.3597482818269597, "learning_rate": 7.207415990730012e-07, "loss": 0.4034, "step": 1245 }, { "epoch": 0.02165864172851953, "grad_norm": 1.6192307955538918, "learning_rate": 7.213209733487833e-07, "loss": 0.5098, "step": 1246 }, { "epoch": 0.021676024266022356, "grad_norm": 2.4191946801644746, "learning_rate": 7.219003476245654e-07, "loss": 0.5871, "step": 1247 }, { "epoch": 0.02169340680352518, "grad_norm": 1.9769052092166697, "learning_rate": 7.224797219003475e-07, "loss": 0.4274, "step": 1248 }, { "epoch": 0.021710789341028002, "grad_norm": 1.8112529150797947, "learning_rate": 7.230590961761297e-07, "loss": 0.6078, "step": 1249 }, { "epoch": 0.02172817187853083, "grad_norm": 1.3587595845302107, "learning_rate": 7.236384704519119e-07, "loss": 0.2659, "step": 1250 }, { "epoch": 0.021745554416033652, "grad_norm": 2.551080206506376, "learning_rate": 7.242178447276941e-07, "loss": 0.423, "step": 1251 }, { "epoch": 0.021762936953536476, "grad_norm": 1.78677969650157, "learning_rate": 7.247972190034762e-07, "loss": 0.2402, "step": 1252 }, { "epoch": 0.021780319491039302, "grad_norm": 2.6606632688993432, "learning_rate": 7.253765932792584e-07, "loss": 0.8029, "step": 1253 }, { "epoch": 0.021797702028542126, "grad_norm": 1.5029057855134913, "learning_rate": 7.259559675550405e-07, "loss": 0.5164, "step": 1254 }, { "epoch": 0.021815084566044952, "grad_norm": 2.670992193328825, "learning_rate": 7.265353418308227e-07, "loss": 0.4832, "step": 1255 }, { "epoch": 0.021832467103547776, "grad_norm": 2.0998456461141277, "learning_rate": 7.271147161066048e-07, "loss": 0.5526, "step": 1256 }, { "epoch": 0.0218498496410506, "grad_norm": 1.599585850795788, "learning_rate": 7.27694090382387e-07, "loss": 0.3515, "step": 1257 }, { "epoch": 0.021867232178553426, "grad_norm": 3.173083574852347, "learning_rate": 7.282734646581692e-07, "loss": 0.6688, "step": 1258 }, { "epoch": 0.02188461471605625, "grad_norm": 1.8532442105847513, "learning_rate": 7.288528389339514e-07, "loss": 0.3248, "step": 1259 }, { "epoch": 0.021901997253559076, "grad_norm": 3.19970295093198, "learning_rate": 7.294322132097334e-07, "loss": 0.4726, "step": 1260 }, { "epoch": 0.0219193797910619, "grad_norm": 3.293256695766402, "learning_rate": 7.300115874855156e-07, "loss": 0.3077, "step": 1261 }, { "epoch": 0.021936762328564723, "grad_norm": 1.8557667627446028, "learning_rate": 7.305909617612977e-07, "loss": 0.4008, "step": 1262 }, { "epoch": 0.02195414486606755, "grad_norm": 1.4227800010662377, "learning_rate": 7.311703360370799e-07, "loss": 0.3559, "step": 1263 }, { "epoch": 0.021971527403570373, "grad_norm": 1.299679039699774, "learning_rate": 7.31749710312862e-07, "loss": 0.4144, "step": 1264 }, { "epoch": 0.0219889099410732, "grad_norm": 2.901723620796067, "learning_rate": 7.323290845886443e-07, "loss": 0.3311, "step": 1265 }, { "epoch": 0.022006292478576023, "grad_norm": 2.186127508827397, "learning_rate": 7.329084588644264e-07, "loss": 0.5193, "step": 1266 }, { "epoch": 0.022023675016078846, "grad_norm": 1.4060745091724758, "learning_rate": 7.334878331402086e-07, "loss": 0.4024, "step": 1267 }, { "epoch": 0.022041057553581673, "grad_norm": 1.7024452104668502, "learning_rate": 7.340672074159907e-07, "loss": 0.4079, "step": 1268 }, { "epoch": 0.022058440091084496, "grad_norm": 1.5647792638390368, "learning_rate": 7.346465816917729e-07, "loss": 0.3468, "step": 1269 }, { "epoch": 0.02207582262858732, "grad_norm": 1.9998882757648895, "learning_rate": 7.35225955967555e-07, "loss": 0.3675, "step": 1270 }, { "epoch": 0.022093205166090146, "grad_norm": 2.1956393681355326, "learning_rate": 7.358053302433372e-07, "loss": 0.714, "step": 1271 }, { "epoch": 0.02211058770359297, "grad_norm": 1.4894039680572462, "learning_rate": 7.363847045191192e-07, "loss": 0.5016, "step": 1272 }, { "epoch": 0.022127970241095796, "grad_norm": 2.5577913714420064, "learning_rate": 7.369640787949015e-07, "loss": 0.6073, "step": 1273 }, { "epoch": 0.02214535277859862, "grad_norm": 3.011829066233873, "learning_rate": 7.375434530706836e-07, "loss": 0.3027, "step": 1274 }, { "epoch": 0.022162735316101443, "grad_norm": 1.8535480126058996, "learning_rate": 7.381228273464658e-07, "loss": 0.3942, "step": 1275 }, { "epoch": 0.02218011785360427, "grad_norm": 2.023108039482758, "learning_rate": 7.387022016222479e-07, "loss": 0.4812, "step": 1276 }, { "epoch": 0.022197500391107093, "grad_norm": 2.8924943645601844, "learning_rate": 7.392815758980301e-07, "loss": 0.5006, "step": 1277 }, { "epoch": 0.02221488292860992, "grad_norm": 1.9135622582465048, "learning_rate": 7.398609501738122e-07, "loss": 0.6662, "step": 1278 }, { "epoch": 0.022232265466112743, "grad_norm": 1.562434246291705, "learning_rate": 7.404403244495944e-07, "loss": 0.3913, "step": 1279 }, { "epoch": 0.022249648003615567, "grad_norm": 1.4696087424167836, "learning_rate": 7.410196987253765e-07, "loss": 0.4818, "step": 1280 }, { "epoch": 0.022267030541118393, "grad_norm": 1.1995348114814304, "learning_rate": 7.415990730011588e-07, "loss": 0.2461, "step": 1281 }, { "epoch": 0.022284413078621217, "grad_norm": 2.2536981625781607, "learning_rate": 7.421784472769409e-07, "loss": 0.7055, "step": 1282 }, { "epoch": 0.022301795616124043, "grad_norm": 2.2137334508411888, "learning_rate": 7.427578215527231e-07, "loss": 0.4949, "step": 1283 }, { "epoch": 0.022319178153626867, "grad_norm": 2.5378724393029546, "learning_rate": 7.433371958285051e-07, "loss": 0.7528, "step": 1284 }, { "epoch": 0.02233656069112969, "grad_norm": 2.1983720584269717, "learning_rate": 7.439165701042873e-07, "loss": 0.3252, "step": 1285 }, { "epoch": 0.022353943228632517, "grad_norm": 2.552829999624978, "learning_rate": 7.444959443800694e-07, "loss": 0.5701, "step": 1286 }, { "epoch": 0.02237132576613534, "grad_norm": 2.5028631641259054, "learning_rate": 7.450753186558516e-07, "loss": 0.3233, "step": 1287 }, { "epoch": 0.022388708303638163, "grad_norm": 2.349012668221781, "learning_rate": 7.456546929316338e-07, "loss": 0.4135, "step": 1288 }, { "epoch": 0.02240609084114099, "grad_norm": 2.6643537059318256, "learning_rate": 7.46234067207416e-07, "loss": 0.5914, "step": 1289 }, { "epoch": 0.022423473378643814, "grad_norm": 2.3192300831129122, "learning_rate": 7.468134414831981e-07, "loss": 0.6374, "step": 1290 }, { "epoch": 0.02244085591614664, "grad_norm": 2.2850730663655594, "learning_rate": 7.473928157589803e-07, "loss": 0.5577, "step": 1291 }, { "epoch": 0.022458238453649464, "grad_norm": 2.30458353747483, "learning_rate": 7.479721900347624e-07, "loss": 0.4426, "step": 1292 }, { "epoch": 0.022475620991152287, "grad_norm": 1.4638413898156335, "learning_rate": 7.485515643105446e-07, "loss": 0.5158, "step": 1293 }, { "epoch": 0.022493003528655114, "grad_norm": 1.9277757414143395, "learning_rate": 7.491309385863267e-07, "loss": 0.4112, "step": 1294 }, { "epoch": 0.022510386066157937, "grad_norm": 3.459212621167206, "learning_rate": 7.497103128621089e-07, "loss": 0.6813, "step": 1295 }, { "epoch": 0.022527768603660764, "grad_norm": 1.2102060316094896, "learning_rate": 7.50289687137891e-07, "loss": 0.2529, "step": 1296 }, { "epoch": 0.022545151141163587, "grad_norm": 2.5774505803424357, "learning_rate": 7.508690614136732e-07, "loss": 0.5159, "step": 1297 }, { "epoch": 0.02256253367866641, "grad_norm": 2.075036114726234, "learning_rate": 7.514484356894553e-07, "loss": 0.4394, "step": 1298 }, { "epoch": 0.022579916216169237, "grad_norm": 2.7322560002612297, "learning_rate": 7.520278099652375e-07, "loss": 0.549, "step": 1299 }, { "epoch": 0.02259729875367206, "grad_norm": 2.072443243878705, "learning_rate": 7.526071842410196e-07, "loss": 0.6512, "step": 1300 }, { "epoch": 0.022614681291174887, "grad_norm": 1.7382865645691297, "learning_rate": 7.531865585168018e-07, "loss": 0.4238, "step": 1301 }, { "epoch": 0.02263206382867771, "grad_norm": 1.4974580308804057, "learning_rate": 7.537659327925839e-07, "loss": 0.6484, "step": 1302 }, { "epoch": 0.022649446366180534, "grad_norm": 1.995052134596635, "learning_rate": 7.543453070683661e-07, "loss": 0.4171, "step": 1303 }, { "epoch": 0.02266682890368336, "grad_norm": 2.0520137686177224, "learning_rate": 7.549246813441483e-07, "loss": 0.5244, "step": 1304 }, { "epoch": 0.022684211441186184, "grad_norm": 3.1003766233989323, "learning_rate": 7.555040556199305e-07, "loss": 0.7579, "step": 1305 }, { "epoch": 0.022701593978689007, "grad_norm": 2.2642471956895736, "learning_rate": 7.560834298957126e-07, "loss": 0.6999, "step": 1306 }, { "epoch": 0.022718976516191834, "grad_norm": 1.3776830972714436, "learning_rate": 7.566628041714948e-07, "loss": 0.2071, "step": 1307 }, { "epoch": 0.022736359053694657, "grad_norm": 3.0109472221860956, "learning_rate": 7.572421784472768e-07, "loss": 0.3942, "step": 1308 }, { "epoch": 0.022753741591197484, "grad_norm": 1.8857034301008446, "learning_rate": 7.57821552723059e-07, "loss": 0.238, "step": 1309 }, { "epoch": 0.022771124128700308, "grad_norm": 3.3994351713057216, "learning_rate": 7.584009269988411e-07, "loss": 0.3766, "step": 1310 }, { "epoch": 0.02278850666620313, "grad_norm": 2.9579583217643344, "learning_rate": 7.589803012746234e-07, "loss": 0.4469, "step": 1311 }, { "epoch": 0.022805889203705958, "grad_norm": 1.8685278215447945, "learning_rate": 7.595596755504055e-07, "loss": 0.4771, "step": 1312 }, { "epoch": 0.02282327174120878, "grad_norm": 1.7501876018953422, "learning_rate": 7.601390498261877e-07, "loss": 0.3183, "step": 1313 }, { "epoch": 0.022840654278711608, "grad_norm": 2.2231083405663683, "learning_rate": 7.607184241019698e-07, "loss": 0.4848, "step": 1314 }, { "epoch": 0.02285803681621443, "grad_norm": 2.942554618262633, "learning_rate": 7.61297798377752e-07, "loss": 0.2864, "step": 1315 }, { "epoch": 0.022875419353717254, "grad_norm": 2.0656626222770993, "learning_rate": 7.618771726535341e-07, "loss": 0.4437, "step": 1316 }, { "epoch": 0.02289280189122008, "grad_norm": 0.9932588669593558, "learning_rate": 7.624565469293163e-07, "loss": 0.2926, "step": 1317 }, { "epoch": 0.022910184428722905, "grad_norm": 2.563242099179813, "learning_rate": 7.630359212050984e-07, "loss": 0.4013, "step": 1318 }, { "epoch": 0.02292756696622573, "grad_norm": 1.7503889132381034, "learning_rate": 7.636152954808807e-07, "loss": 0.3908, "step": 1319 }, { "epoch": 0.022944949503728555, "grad_norm": 1.7809678268642537, "learning_rate": 7.641946697566627e-07, "loss": 0.405, "step": 1320 }, { "epoch": 0.022962332041231378, "grad_norm": 2.2293510187310397, "learning_rate": 7.647740440324449e-07, "loss": 0.4198, "step": 1321 }, { "epoch": 0.022979714578734205, "grad_norm": 2.0437576306190817, "learning_rate": 7.65353418308227e-07, "loss": 0.5497, "step": 1322 }, { "epoch": 0.022997097116237028, "grad_norm": 2.1249858732641367, "learning_rate": 7.659327925840092e-07, "loss": 0.5534, "step": 1323 }, { "epoch": 0.02301447965373985, "grad_norm": 3.136047288392964, "learning_rate": 7.665121668597913e-07, "loss": 0.5361, "step": 1324 }, { "epoch": 0.023031862191242678, "grad_norm": 1.8053455535602518, "learning_rate": 7.670915411355735e-07, "loss": 0.4557, "step": 1325 }, { "epoch": 0.0230492447287455, "grad_norm": 3.4010008632611215, "learning_rate": 7.676709154113556e-07, "loss": 0.4015, "step": 1326 }, { "epoch": 0.023066627266248328, "grad_norm": 2.0554652306968064, "learning_rate": 7.682502896871379e-07, "loss": 0.3989, "step": 1327 }, { "epoch": 0.02308400980375115, "grad_norm": 1.7149837584721226, "learning_rate": 7.688296639629201e-07, "loss": 0.4513, "step": 1328 }, { "epoch": 0.023101392341253975, "grad_norm": 1.7799979600026616, "learning_rate": 7.694090382387022e-07, "loss": 0.4828, "step": 1329 }, { "epoch": 0.0231187748787568, "grad_norm": 4.54016008681113, "learning_rate": 7.699884125144844e-07, "loss": 0.3316, "step": 1330 }, { "epoch": 0.023136157416259625, "grad_norm": 1.285379507925101, "learning_rate": 7.705677867902665e-07, "loss": 0.2505, "step": 1331 }, { "epoch": 0.02315353995376245, "grad_norm": 1.7297588626740734, "learning_rate": 7.711471610660486e-07, "loss": 0.4643, "step": 1332 }, { "epoch": 0.023170922491265275, "grad_norm": 3.424260596012756, "learning_rate": 7.717265353418307e-07, "loss": 0.3037, "step": 1333 }, { "epoch": 0.0231883050287681, "grad_norm": 2.511777022006599, "learning_rate": 7.72305909617613e-07, "loss": 0.3413, "step": 1334 }, { "epoch": 0.023205687566270925, "grad_norm": 3.7950314439494575, "learning_rate": 7.728852838933951e-07, "loss": 0.2861, "step": 1335 }, { "epoch": 0.02322307010377375, "grad_norm": 4.4831180264653545, "learning_rate": 7.734646581691773e-07, "loss": 0.4496, "step": 1336 }, { "epoch": 0.023240452641276575, "grad_norm": 1.8244291877986532, "learning_rate": 7.740440324449594e-07, "loss": 0.2449, "step": 1337 }, { "epoch": 0.0232578351787794, "grad_norm": 4.601818561249083, "learning_rate": 7.746234067207416e-07, "loss": 0.6356, "step": 1338 }, { "epoch": 0.023275217716282222, "grad_norm": 2.097322271696696, "learning_rate": 7.752027809965237e-07, "loss": 0.4807, "step": 1339 }, { "epoch": 0.02329260025378505, "grad_norm": 1.5777075983055628, "learning_rate": 7.757821552723059e-07, "loss": 0.3288, "step": 1340 }, { "epoch": 0.023309982791287872, "grad_norm": 2.212473841875051, "learning_rate": 7.76361529548088e-07, "loss": 0.3369, "step": 1341 }, { "epoch": 0.023327365328790695, "grad_norm": 2.6685252284561516, "learning_rate": 7.769409038238703e-07, "loss": 0.3322, "step": 1342 }, { "epoch": 0.023344747866293522, "grad_norm": 2.827729037707699, "learning_rate": 7.775202780996524e-07, "loss": 0.5363, "step": 1343 }, { "epoch": 0.023362130403796345, "grad_norm": 2.930305989911283, "learning_rate": 7.780996523754346e-07, "loss": 0.5366, "step": 1344 }, { "epoch": 0.023379512941299172, "grad_norm": 5.447072637076832, "learning_rate": 7.786790266512166e-07, "loss": 0.3339, "step": 1345 }, { "epoch": 0.023396895478801995, "grad_norm": 3.3802404118740568, "learning_rate": 7.792584009269988e-07, "loss": 0.4203, "step": 1346 }, { "epoch": 0.02341427801630482, "grad_norm": 1.7195642149236037, "learning_rate": 7.798377752027809e-07, "loss": 0.3325, "step": 1347 }, { "epoch": 0.023431660553807646, "grad_norm": 4.495207908662824, "learning_rate": 7.804171494785631e-07, "loss": 0.6781, "step": 1348 }, { "epoch": 0.02344904309131047, "grad_norm": 2.3196336285134636, "learning_rate": 7.809965237543453e-07, "loss": 0.5085, "step": 1349 }, { "epoch": 0.023466425628813296, "grad_norm": 1.7877516362695907, "learning_rate": 7.815758980301275e-07, "loss": 0.3079, "step": 1350 }, { "epoch": 0.02348380816631612, "grad_norm": 1.087528332109165, "learning_rate": 7.821552723059096e-07, "loss": 0.357, "step": 1351 }, { "epoch": 0.023501190703818942, "grad_norm": 1.5880150068411545, "learning_rate": 7.827346465816918e-07, "loss": 0.4741, "step": 1352 }, { "epoch": 0.02351857324132177, "grad_norm": 1.9423618619287844, "learning_rate": 7.833140208574739e-07, "loss": 0.5393, "step": 1353 }, { "epoch": 0.023535955778824592, "grad_norm": 1.635306485806183, "learning_rate": 7.838933951332561e-07, "loss": 0.2435, "step": 1354 }, { "epoch": 0.02355333831632742, "grad_norm": 2.405530614399849, "learning_rate": 7.844727694090382e-07, "loss": 0.3105, "step": 1355 }, { "epoch": 0.023570720853830242, "grad_norm": 2.309853681659032, "learning_rate": 7.850521436848204e-07, "loss": 0.5549, "step": 1356 }, { "epoch": 0.023588103391333066, "grad_norm": 1.9226248980324059, "learning_rate": 7.856315179606025e-07, "loss": 0.2454, "step": 1357 }, { "epoch": 0.023605485928835893, "grad_norm": 3.370657756366531, "learning_rate": 7.862108922363847e-07, "loss": 0.5423, "step": 1358 }, { "epoch": 0.023622868466338716, "grad_norm": 2.777617841528844, "learning_rate": 7.867902665121668e-07, "loss": 0.3841, "step": 1359 }, { "epoch": 0.02364025100384154, "grad_norm": 1.9926392495313068, "learning_rate": 7.87369640787949e-07, "loss": 0.2181, "step": 1360 }, { "epoch": 0.023657633541344366, "grad_norm": 2.530098663358039, "learning_rate": 7.879490150637311e-07, "loss": 0.5925, "step": 1361 }, { "epoch": 0.02367501607884719, "grad_norm": 1.8859303505100735, "learning_rate": 7.885283893395133e-07, "loss": 0.3389, "step": 1362 }, { "epoch": 0.023692398616350016, "grad_norm": 2.653233350648231, "learning_rate": 7.891077636152954e-07, "loss": 0.4861, "step": 1363 }, { "epoch": 0.02370978115385284, "grad_norm": 1.963025758921183, "learning_rate": 7.896871378910776e-07, "loss": 0.3637, "step": 1364 }, { "epoch": 0.023727163691355663, "grad_norm": 1.8908393059541837, "learning_rate": 7.902665121668598e-07, "loss": 0.402, "step": 1365 }, { "epoch": 0.02374454622885849, "grad_norm": 1.6153323360528407, "learning_rate": 7.90845886442642e-07, "loss": 0.3969, "step": 1366 }, { "epoch": 0.023761928766361313, "grad_norm": 2.312187137462942, "learning_rate": 7.914252607184241e-07, "loss": 0.3123, "step": 1367 }, { "epoch": 0.02377931130386414, "grad_norm": 2.565301730787327, "learning_rate": 7.920046349942063e-07, "loss": 0.5862, "step": 1368 }, { "epoch": 0.023796693841366963, "grad_norm": 1.7848716889513765, "learning_rate": 7.925840092699883e-07, "loss": 0.5077, "step": 1369 }, { "epoch": 0.023814076378869786, "grad_norm": 2.0717421665831304, "learning_rate": 7.931633835457705e-07, "loss": 0.293, "step": 1370 }, { "epoch": 0.023831458916372613, "grad_norm": 2.100463864369058, "learning_rate": 7.937427578215526e-07, "loss": 0.4181, "step": 1371 }, { "epoch": 0.023848841453875436, "grad_norm": 2.466486477270316, "learning_rate": 7.943221320973349e-07, "loss": 0.4218, "step": 1372 }, { "epoch": 0.023866223991378263, "grad_norm": 1.3074865212216975, "learning_rate": 7.94901506373117e-07, "loss": 0.2619, "step": 1373 }, { "epoch": 0.023883606528881086, "grad_norm": 1.618216405097898, "learning_rate": 7.954808806488992e-07, "loss": 0.3657, "step": 1374 }, { "epoch": 0.02390098906638391, "grad_norm": 2.5057601765448365, "learning_rate": 7.960602549246813e-07, "loss": 0.4476, "step": 1375 }, { "epoch": 0.023918371603886737, "grad_norm": 1.9509923125321191, "learning_rate": 7.966396292004635e-07, "loss": 0.5951, "step": 1376 }, { "epoch": 0.02393575414138956, "grad_norm": 1.8346622682012155, "learning_rate": 7.972190034762456e-07, "loss": 0.3709, "step": 1377 }, { "epoch": 0.023953136678892383, "grad_norm": 1.7366222870043064, "learning_rate": 7.977983777520278e-07, "loss": 0.2941, "step": 1378 }, { "epoch": 0.02397051921639521, "grad_norm": 3.203710645087757, "learning_rate": 7.983777520278099e-07, "loss": 0.443, "step": 1379 }, { "epoch": 0.023987901753898033, "grad_norm": 1.6114176125888842, "learning_rate": 7.989571263035922e-07, "loss": 0.4241, "step": 1380 }, { "epoch": 0.02400528429140086, "grad_norm": 2.1418572103950977, "learning_rate": 7.995365005793742e-07, "loss": 0.5327, "step": 1381 }, { "epoch": 0.024022666828903683, "grad_norm": 1.5439137145583492, "learning_rate": 8.001158748551564e-07, "loss": 0.3549, "step": 1382 }, { "epoch": 0.024040049366406507, "grad_norm": 1.9955402066854833, "learning_rate": 8.006952491309385e-07, "loss": 0.3872, "step": 1383 }, { "epoch": 0.024057431903909333, "grad_norm": 1.963260087348614, "learning_rate": 8.012746234067207e-07, "loss": 0.3646, "step": 1384 }, { "epoch": 0.024074814441412157, "grad_norm": 1.4615864766320776, "learning_rate": 8.018539976825028e-07, "loss": 0.3554, "step": 1385 }, { "epoch": 0.024092196978914984, "grad_norm": 2.025513273753219, "learning_rate": 8.02433371958285e-07, "loss": 0.5003, "step": 1386 }, { "epoch": 0.024109579516417807, "grad_norm": 2.463052166921239, "learning_rate": 8.030127462340671e-07, "loss": 0.4021, "step": 1387 }, { "epoch": 0.02412696205392063, "grad_norm": 1.699649289772965, "learning_rate": 8.035921205098494e-07, "loss": 0.5397, "step": 1388 }, { "epoch": 0.024144344591423457, "grad_norm": 2.4557807707685977, "learning_rate": 8.041714947856315e-07, "loss": 0.3217, "step": 1389 }, { "epoch": 0.02416172712892628, "grad_norm": 1.6422759544201115, "learning_rate": 8.047508690614137e-07, "loss": 0.4514, "step": 1390 }, { "epoch": 0.024179109666429104, "grad_norm": 1.662593043498669, "learning_rate": 8.053302433371958e-07, "loss": 0.4973, "step": 1391 }, { "epoch": 0.02419649220393193, "grad_norm": 1.8809883811943862, "learning_rate": 8.05909617612978e-07, "loss": 0.6295, "step": 1392 }, { "epoch": 0.024213874741434754, "grad_norm": 1.5121729161866966, "learning_rate": 8.0648899188876e-07, "loss": 0.6104, "step": 1393 }, { "epoch": 0.02423125727893758, "grad_norm": 1.8184750994709185, "learning_rate": 8.070683661645422e-07, "loss": 0.3833, "step": 1394 }, { "epoch": 0.024248639816440404, "grad_norm": 1.9738848319281541, "learning_rate": 8.076477404403244e-07, "loss": 0.5157, "step": 1395 }, { "epoch": 0.024266022353943227, "grad_norm": 1.5878186059798414, "learning_rate": 8.082271147161066e-07, "loss": 0.2935, "step": 1396 }, { "epoch": 0.024283404891446054, "grad_norm": 1.825726848965064, "learning_rate": 8.088064889918887e-07, "loss": 0.4155, "step": 1397 }, { "epoch": 0.024300787428948877, "grad_norm": 1.1109283178804552, "learning_rate": 8.093858632676709e-07, "loss": 0.3951, "step": 1398 }, { "epoch": 0.024318169966451704, "grad_norm": 2.304027427837035, "learning_rate": 8.09965237543453e-07, "loss": 0.2172, "step": 1399 }, { "epoch": 0.024335552503954527, "grad_norm": 1.5569970908716435, "learning_rate": 8.105446118192352e-07, "loss": 0.2746, "step": 1400 }, { "epoch": 0.02435293504145735, "grad_norm": 2.2766383399699053, "learning_rate": 8.111239860950173e-07, "loss": 0.5546, "step": 1401 }, { "epoch": 0.024370317578960177, "grad_norm": 1.9336919455738126, "learning_rate": 8.117033603707995e-07, "loss": 0.5704, "step": 1402 }, { "epoch": 0.024387700116463, "grad_norm": 1.692073333442388, "learning_rate": 8.122827346465817e-07, "loss": 0.3635, "step": 1403 }, { "epoch": 0.024405082653965827, "grad_norm": 4.715966988592162, "learning_rate": 8.128621089223639e-07, "loss": 0.9912, "step": 1404 }, { "epoch": 0.02442246519146865, "grad_norm": 3.0117765573502693, "learning_rate": 8.134414831981459e-07, "loss": 0.4302, "step": 1405 }, { "epoch": 0.024439847728971474, "grad_norm": 1.4800463923139278, "learning_rate": 8.140208574739281e-07, "loss": 0.4492, "step": 1406 }, { "epoch": 0.0244572302664743, "grad_norm": 1.6323597561399719, "learning_rate": 8.146002317497102e-07, "loss": 0.2426, "step": 1407 }, { "epoch": 0.024474612803977124, "grad_norm": 1.4756520929875874, "learning_rate": 8.151796060254924e-07, "loss": 0.2365, "step": 1408 }, { "epoch": 0.024491995341479948, "grad_norm": 2.9889736121014328, "learning_rate": 8.157589803012745e-07, "loss": 0.3051, "step": 1409 }, { "epoch": 0.024509377878982774, "grad_norm": 2.153332154725056, "learning_rate": 8.163383545770567e-07, "loss": 0.2579, "step": 1410 }, { "epoch": 0.024526760416485598, "grad_norm": 1.931219889044011, "learning_rate": 8.169177288528389e-07, "loss": 0.2907, "step": 1411 }, { "epoch": 0.024544142953988424, "grad_norm": 1.5933071576118603, "learning_rate": 8.174971031286211e-07, "loss": 0.3048, "step": 1412 }, { "epoch": 0.024561525491491248, "grad_norm": 2.6133992470583003, "learning_rate": 8.180764774044032e-07, "loss": 0.2122, "step": 1413 }, { "epoch": 0.02457890802899407, "grad_norm": 2.2904892217023707, "learning_rate": 8.186558516801854e-07, "loss": 0.6084, "step": 1414 }, { "epoch": 0.024596290566496898, "grad_norm": 2.420100029706436, "learning_rate": 8.192352259559675e-07, "loss": 0.4665, "step": 1415 }, { "epoch": 0.02461367310399972, "grad_norm": 3.7183760571947073, "learning_rate": 8.198146002317497e-07, "loss": 0.5444, "step": 1416 }, { "epoch": 0.024631055641502548, "grad_norm": 2.318600994112699, "learning_rate": 8.203939745075317e-07, "loss": 0.4893, "step": 1417 }, { "epoch": 0.02464843817900537, "grad_norm": 1.7594898483547798, "learning_rate": 8.20973348783314e-07, "loss": 0.4566, "step": 1418 }, { "epoch": 0.024665820716508195, "grad_norm": 2.6433725870488214, "learning_rate": 8.215527230590961e-07, "loss": 0.4423, "step": 1419 }, { "epoch": 0.02468320325401102, "grad_norm": 3.5958351222296274, "learning_rate": 8.221320973348783e-07, "loss": 0.691, "step": 1420 }, { "epoch": 0.024700585791513845, "grad_norm": 5.852524842391465, "learning_rate": 8.227114716106604e-07, "loss": 0.841, "step": 1421 }, { "epoch": 0.02471796832901667, "grad_norm": 1.9561962292170223, "learning_rate": 8.232908458864426e-07, "loss": 0.3244, "step": 1422 }, { "epoch": 0.024735350866519495, "grad_norm": 1.9070226286848861, "learning_rate": 8.238702201622247e-07, "loss": 0.6277, "step": 1423 }, { "epoch": 0.024752733404022318, "grad_norm": 2.8224985344437363, "learning_rate": 8.244495944380069e-07, "loss": 0.3761, "step": 1424 }, { "epoch": 0.024770115941525145, "grad_norm": 2.1266321515025903, "learning_rate": 8.25028968713789e-07, "loss": 0.445, "step": 1425 }, { "epoch": 0.024787498479027968, "grad_norm": 1.8352502596314593, "learning_rate": 8.256083429895713e-07, "loss": 0.3121, "step": 1426 }, { "epoch": 0.02480488101653079, "grad_norm": 2.5102328960198825, "learning_rate": 8.261877172653534e-07, "loss": 0.49, "step": 1427 }, { "epoch": 0.024822263554033618, "grad_norm": 1.8801317744274695, "learning_rate": 8.267670915411356e-07, "loss": 0.4811, "step": 1428 }, { "epoch": 0.02483964609153644, "grad_norm": 1.4321361848089498, "learning_rate": 8.273464658169177e-07, "loss": 0.6578, "step": 1429 }, { "epoch": 0.02485702862903927, "grad_norm": 1.8745100703047193, "learning_rate": 8.279258400926998e-07, "loss": 0.6716, "step": 1430 }, { "epoch": 0.02487441116654209, "grad_norm": 1.7488236418299692, "learning_rate": 8.285052143684819e-07, "loss": 0.3556, "step": 1431 }, { "epoch": 0.024891793704044915, "grad_norm": 2.22626882128846, "learning_rate": 8.290845886442641e-07, "loss": 0.3592, "step": 1432 }, { "epoch": 0.024909176241547742, "grad_norm": 3.5517299527508004, "learning_rate": 8.296639629200463e-07, "loss": 0.3231, "step": 1433 }, { "epoch": 0.024926558779050565, "grad_norm": 2.1918798862952324, "learning_rate": 8.302433371958285e-07, "loss": 0.6388, "step": 1434 }, { "epoch": 0.024943941316553392, "grad_norm": 1.9798261216116937, "learning_rate": 8.308227114716106e-07, "loss": 0.3733, "step": 1435 }, { "epoch": 0.024961323854056215, "grad_norm": 1.1124382605473415, "learning_rate": 8.314020857473928e-07, "loss": 0.432, "step": 1436 }, { "epoch": 0.02497870639155904, "grad_norm": 8.543676421148826, "learning_rate": 8.319814600231749e-07, "loss": 0.4957, "step": 1437 }, { "epoch": 0.024996088929061865, "grad_norm": 1.3724898827131378, "learning_rate": 8.325608342989571e-07, "loss": 0.4645, "step": 1438 }, { "epoch": 0.02501347146656469, "grad_norm": 4.376471752798859, "learning_rate": 8.331402085747392e-07, "loss": 0.232, "step": 1439 }, { "epoch": 0.025030854004067515, "grad_norm": 1.619230677474512, "learning_rate": 8.337195828505214e-07, "loss": 0.3801, "step": 1440 }, { "epoch": 0.02504823654157034, "grad_norm": 2.857509483207561, "learning_rate": 8.342989571263037e-07, "loss": 0.4208, "step": 1441 }, { "epoch": 0.025065619079073162, "grad_norm": 1.5024070330660237, "learning_rate": 8.348783314020857e-07, "loss": 0.2536, "step": 1442 }, { "epoch": 0.02508300161657599, "grad_norm": 2.162258268731628, "learning_rate": 8.354577056778678e-07, "loss": 0.4138, "step": 1443 }, { "epoch": 0.025100384154078812, "grad_norm": 2.2951309055065643, "learning_rate": 8.3603707995365e-07, "loss": 0.5452, "step": 1444 }, { "epoch": 0.025117766691581635, "grad_norm": 1.8103040261473924, "learning_rate": 8.366164542294321e-07, "loss": 0.5223, "step": 1445 }, { "epoch": 0.025135149229084462, "grad_norm": 1.8405989515702408, "learning_rate": 8.371958285052143e-07, "loss": 0.3298, "step": 1446 }, { "epoch": 0.025152531766587286, "grad_norm": 1.3723661409130443, "learning_rate": 8.377752027809964e-07, "loss": 0.1861, "step": 1447 }, { "epoch": 0.025169914304090112, "grad_norm": 2.121443968415442, "learning_rate": 8.383545770567786e-07, "loss": 0.3793, "step": 1448 }, { "epoch": 0.025187296841592936, "grad_norm": 1.9905879445862977, "learning_rate": 8.389339513325609e-07, "loss": 0.3749, "step": 1449 }, { "epoch": 0.02520467937909576, "grad_norm": 1.9857885868532408, "learning_rate": 8.39513325608343e-07, "loss": 0.2975, "step": 1450 }, { "epoch": 0.025222061916598586, "grad_norm": 4.300440178649231, "learning_rate": 8.400926998841252e-07, "loss": 0.5778, "step": 1451 }, { "epoch": 0.02523944445410141, "grad_norm": 3.080540500270522, "learning_rate": 8.406720741599073e-07, "loss": 0.4661, "step": 1452 }, { "epoch": 0.025256826991604236, "grad_norm": 2.308458136088028, "learning_rate": 8.412514484356895e-07, "loss": 0.505, "step": 1453 }, { "epoch": 0.02527420952910706, "grad_norm": 1.8480999864669125, "learning_rate": 8.418308227114715e-07, "loss": 0.3426, "step": 1454 }, { "epoch": 0.025291592066609882, "grad_norm": 2.481783910740292, "learning_rate": 8.424101969872536e-07, "loss": 0.4226, "step": 1455 }, { "epoch": 0.02530897460411271, "grad_norm": 2.6596390517071273, "learning_rate": 8.429895712630359e-07, "loss": 0.2328, "step": 1456 }, { "epoch": 0.025326357141615533, "grad_norm": 2.3753812799629777, "learning_rate": 8.435689455388181e-07, "loss": 0.357, "step": 1457 }, { "epoch": 0.02534373967911836, "grad_norm": 1.304604402700794, "learning_rate": 8.441483198146002e-07, "loss": 0.4916, "step": 1458 }, { "epoch": 0.025361122216621183, "grad_norm": 1.487574250095368, "learning_rate": 8.447276940903824e-07, "loss": 0.2974, "step": 1459 }, { "epoch": 0.025378504754124006, "grad_norm": 2.090018745741246, "learning_rate": 8.453070683661645e-07, "loss": 0.5921, "step": 1460 }, { "epoch": 0.025395887291626833, "grad_norm": 2.2858320084247383, "learning_rate": 8.458864426419467e-07, "loss": 0.5702, "step": 1461 }, { "epoch": 0.025413269829129656, "grad_norm": 2.0989962962436044, "learning_rate": 8.464658169177288e-07, "loss": 0.2854, "step": 1462 }, { "epoch": 0.02543065236663248, "grad_norm": 2.3407947220150884, "learning_rate": 8.47045191193511e-07, "loss": 0.4194, "step": 1463 }, { "epoch": 0.025448034904135306, "grad_norm": 2.168737873990722, "learning_rate": 8.476245654692932e-07, "loss": 0.3716, "step": 1464 }, { "epoch": 0.02546541744163813, "grad_norm": 2.147233185769124, "learning_rate": 8.482039397450754e-07, "loss": 0.5574, "step": 1465 }, { "epoch": 0.025482799979140956, "grad_norm": 1.8899737657940572, "learning_rate": 8.487833140208574e-07, "loss": 0.3001, "step": 1466 }, { "epoch": 0.02550018251664378, "grad_norm": 1.7899564641627612, "learning_rate": 8.493626882966396e-07, "loss": 0.3811, "step": 1467 }, { "epoch": 0.025517565054146603, "grad_norm": 2.174205628393811, "learning_rate": 8.499420625724217e-07, "loss": 0.2381, "step": 1468 }, { "epoch": 0.02553494759164943, "grad_norm": 2.487757087181635, "learning_rate": 8.505214368482039e-07, "loss": 0.2541, "step": 1469 }, { "epoch": 0.025552330129152253, "grad_norm": 2.124669281632337, "learning_rate": 8.51100811123986e-07, "loss": 0.2428, "step": 1470 }, { "epoch": 0.02556971266665508, "grad_norm": 1.448735061116914, "learning_rate": 8.516801853997682e-07, "loss": 0.3372, "step": 1471 }, { "epoch": 0.025587095204157903, "grad_norm": 1.3007353013212408, "learning_rate": 8.522595596755504e-07, "loss": 0.2987, "step": 1472 }, { "epoch": 0.025604477741660726, "grad_norm": 1.5477432538504732, "learning_rate": 8.528389339513326e-07, "loss": 0.2718, "step": 1473 }, { "epoch": 0.025621860279163553, "grad_norm": 1.795562397799683, "learning_rate": 8.534183082271147e-07, "loss": 0.5403, "step": 1474 }, { "epoch": 0.025639242816666376, "grad_norm": 1.702232173026017, "learning_rate": 8.539976825028969e-07, "loss": 0.3648, "step": 1475 }, { "epoch": 0.025656625354169203, "grad_norm": 2.0018331645047946, "learning_rate": 8.54577056778679e-07, "loss": 0.5207, "step": 1476 }, { "epoch": 0.025674007891672027, "grad_norm": 2.967251601159997, "learning_rate": 8.551564310544612e-07, "loss": 0.3622, "step": 1477 }, { "epoch": 0.02569139042917485, "grad_norm": 2.0746259736510146, "learning_rate": 8.557358053302432e-07, "loss": 0.3013, "step": 1478 }, { "epoch": 0.025708772966677677, "grad_norm": 1.9151979295835826, "learning_rate": 8.563151796060255e-07, "loss": 0.4476, "step": 1479 }, { "epoch": 0.0257261555041805, "grad_norm": 1.7390974502211471, "learning_rate": 8.568945538818076e-07, "loss": 0.2808, "step": 1480 }, { "epoch": 0.025743538041683323, "grad_norm": 1.9250893319818831, "learning_rate": 8.574739281575898e-07, "loss": 0.3517, "step": 1481 }, { "epoch": 0.02576092057918615, "grad_norm": 4.04406373332491, "learning_rate": 8.580533024333719e-07, "loss": 0.521, "step": 1482 }, { "epoch": 0.025778303116688973, "grad_norm": 1.9979496001932238, "learning_rate": 8.586326767091541e-07, "loss": 0.5708, "step": 1483 }, { "epoch": 0.0257956856541918, "grad_norm": 2.0461962357989183, "learning_rate": 8.592120509849362e-07, "loss": 0.3922, "step": 1484 }, { "epoch": 0.025813068191694623, "grad_norm": 2.3133283293633746, "learning_rate": 8.597914252607184e-07, "loss": 0.7107, "step": 1485 }, { "epoch": 0.025830450729197447, "grad_norm": 2.0417961868862893, "learning_rate": 8.603707995365005e-07, "loss": 0.499, "step": 1486 }, { "epoch": 0.025847833266700274, "grad_norm": 1.4236799750706923, "learning_rate": 8.609501738122828e-07, "loss": 0.3551, "step": 1487 }, { "epoch": 0.025865215804203097, "grad_norm": 3.1026513352168283, "learning_rate": 8.615295480880649e-07, "loss": 0.4765, "step": 1488 }, { "epoch": 0.025882598341705924, "grad_norm": 1.9315105513462476, "learning_rate": 8.621089223638471e-07, "loss": 0.3314, "step": 1489 }, { "epoch": 0.025899980879208747, "grad_norm": 2.1402585963414578, "learning_rate": 8.626882966396291e-07, "loss": 0.3538, "step": 1490 }, { "epoch": 0.02591736341671157, "grad_norm": 2.0720660405916975, "learning_rate": 8.632676709154113e-07, "loss": 0.3694, "step": 1491 }, { "epoch": 0.025934745954214397, "grad_norm": 2.0195680662374342, "learning_rate": 8.638470451911934e-07, "loss": 0.3929, "step": 1492 }, { "epoch": 0.02595212849171722, "grad_norm": 2.640266595139826, "learning_rate": 8.644264194669756e-07, "loss": 0.3053, "step": 1493 }, { "epoch": 0.025969511029220047, "grad_norm": 1.5895734236035477, "learning_rate": 8.650057937427577e-07, "loss": 0.4407, "step": 1494 }, { "epoch": 0.02598689356672287, "grad_norm": 1.7467301643802589, "learning_rate": 8.6558516801854e-07, "loss": 0.2718, "step": 1495 }, { "epoch": 0.026004276104225694, "grad_norm": 5.056899204285074, "learning_rate": 8.661645422943221e-07, "loss": 0.6554, "step": 1496 }, { "epoch": 0.02602165864172852, "grad_norm": 1.9992383855029692, "learning_rate": 8.667439165701043e-07, "loss": 0.4464, "step": 1497 }, { "epoch": 0.026039041179231344, "grad_norm": 2.044005037424279, "learning_rate": 8.673232908458864e-07, "loss": 0.3611, "step": 1498 }, { "epoch": 0.026056423716734167, "grad_norm": 1.5510775599865614, "learning_rate": 8.679026651216686e-07, "loss": 0.4164, "step": 1499 }, { "epoch": 0.026073806254236994, "grad_norm": 1.2995253477287807, "learning_rate": 8.684820393974507e-07, "loss": 0.2638, "step": 1500 }, { "epoch": 0.026091188791739817, "grad_norm": 2.0377551337708324, "learning_rate": 8.690614136732329e-07, "loss": 0.3666, "step": 1501 }, { "epoch": 0.026108571329242644, "grad_norm": 2.365246574411361, "learning_rate": 8.696407879490151e-07, "loss": 0.4067, "step": 1502 }, { "epoch": 0.026125953866745467, "grad_norm": 2.755204252982697, "learning_rate": 8.702201622247972e-07, "loss": 0.5043, "step": 1503 }, { "epoch": 0.02614333640424829, "grad_norm": 2.103697443970957, "learning_rate": 8.707995365005793e-07, "loss": 0.2726, "step": 1504 }, { "epoch": 0.026160718941751118, "grad_norm": 4.930699107094718, "learning_rate": 8.713789107763615e-07, "loss": 0.7318, "step": 1505 }, { "epoch": 0.02617810147925394, "grad_norm": 1.59924676947375, "learning_rate": 8.719582850521436e-07, "loss": 0.355, "step": 1506 }, { "epoch": 0.026195484016756768, "grad_norm": 2.857873157641601, "learning_rate": 8.725376593279258e-07, "loss": 0.4154, "step": 1507 }, { "epoch": 0.02621286655425959, "grad_norm": 2.325805108809148, "learning_rate": 8.731170336037079e-07, "loss": 0.7688, "step": 1508 }, { "epoch": 0.026230249091762414, "grad_norm": 1.3889438864573167, "learning_rate": 8.736964078794901e-07, "loss": 0.305, "step": 1509 }, { "epoch": 0.02624763162926524, "grad_norm": 2.330725803938812, "learning_rate": 8.742757821552723e-07, "loss": 0.4047, "step": 1510 }, { "epoch": 0.026265014166768064, "grad_norm": 4.724474943347926, "learning_rate": 8.748551564310545e-07, "loss": 0.5851, "step": 1511 }, { "epoch": 0.02628239670427089, "grad_norm": 2.297810791614391, "learning_rate": 8.754345307068366e-07, "loss": 0.3564, "step": 1512 }, { "epoch": 0.026299779241773714, "grad_norm": 1.8225843473889274, "learning_rate": 8.760139049826188e-07, "loss": 0.491, "step": 1513 }, { "epoch": 0.026317161779276538, "grad_norm": 3.0712060357076, "learning_rate": 8.765932792584009e-07, "loss": 0.5212, "step": 1514 }, { "epoch": 0.026334544316779365, "grad_norm": 2.795799823902471, "learning_rate": 8.77172653534183e-07, "loss": 0.5462, "step": 1515 }, { "epoch": 0.026351926854282188, "grad_norm": 3.1644827471527703, "learning_rate": 8.777520278099651e-07, "loss": 1.1735, "step": 1516 }, { "epoch": 0.02636930939178501, "grad_norm": 1.5190591278080037, "learning_rate": 8.783314020857474e-07, "loss": 0.2597, "step": 1517 }, { "epoch": 0.026386691929287838, "grad_norm": 1.7158626704688327, "learning_rate": 8.789107763615295e-07, "loss": 0.5297, "step": 1518 }, { "epoch": 0.02640407446679066, "grad_norm": 2.2303952526506277, "learning_rate": 8.794901506373117e-07, "loss": 0.4931, "step": 1519 }, { "epoch": 0.026421457004293488, "grad_norm": 3.917590000768471, "learning_rate": 8.800695249130938e-07, "loss": 0.5586, "step": 1520 }, { "epoch": 0.02643883954179631, "grad_norm": 1.6423393239517718, "learning_rate": 8.80648899188876e-07, "loss": 0.4265, "step": 1521 }, { "epoch": 0.026456222079299135, "grad_norm": 2.8561127145255556, "learning_rate": 8.812282734646581e-07, "loss": 0.4257, "step": 1522 }, { "epoch": 0.02647360461680196, "grad_norm": 1.3320450854871781, "learning_rate": 8.818076477404403e-07, "loss": 0.3272, "step": 1523 }, { "epoch": 0.026490987154304785, "grad_norm": 2.2249022448202034, "learning_rate": 8.823870220162224e-07, "loss": 0.639, "step": 1524 }, { "epoch": 0.02650836969180761, "grad_norm": 3.135040038170644, "learning_rate": 8.829663962920047e-07, "loss": 0.3666, "step": 1525 }, { "epoch": 0.026525752229310435, "grad_norm": 1.3637076354735875, "learning_rate": 8.835457705677868e-07, "loss": 0.7081, "step": 1526 }, { "epoch": 0.026543134766813258, "grad_norm": 1.5563520567661588, "learning_rate": 8.841251448435689e-07, "loss": 0.5671, "step": 1527 }, { "epoch": 0.026560517304316085, "grad_norm": 4.252918255152735, "learning_rate": 8.84704519119351e-07, "loss": 0.3962, "step": 1528 }, { "epoch": 0.02657789984181891, "grad_norm": 2.5279075773272504, "learning_rate": 8.852838933951332e-07, "loss": 0.295, "step": 1529 }, { "epoch": 0.026595282379321735, "grad_norm": 2.454101867319077, "learning_rate": 8.858632676709153e-07, "loss": 0.6298, "step": 1530 }, { "epoch": 0.02661266491682456, "grad_norm": 1.3067489372314023, "learning_rate": 8.864426419466975e-07, "loss": 0.456, "step": 1531 }, { "epoch": 0.02663004745432738, "grad_norm": 1.827956790546158, "learning_rate": 8.870220162224796e-07, "loss": 0.3675, "step": 1532 }, { "epoch": 0.02664742999183021, "grad_norm": 3.8956891800015097, "learning_rate": 8.876013904982619e-07, "loss": 0.3754, "step": 1533 }, { "epoch": 0.026664812529333032, "grad_norm": 2.5821383991041738, "learning_rate": 8.88180764774044e-07, "loss": 0.4068, "step": 1534 }, { "epoch": 0.026682195066835855, "grad_norm": 2.8454108268654066, "learning_rate": 8.887601390498262e-07, "loss": 0.3649, "step": 1535 }, { "epoch": 0.026699577604338682, "grad_norm": 1.732016917115456, "learning_rate": 8.893395133256083e-07, "loss": 0.3346, "step": 1536 }, { "epoch": 0.026716960141841505, "grad_norm": 2.1503277425612564, "learning_rate": 8.899188876013905e-07, "loss": 0.3054, "step": 1537 }, { "epoch": 0.026734342679344332, "grad_norm": 2.102275703572308, "learning_rate": 8.904982618771726e-07, "loss": 0.4413, "step": 1538 }, { "epoch": 0.026751725216847155, "grad_norm": 1.7938574688794524, "learning_rate": 8.910776361529547e-07, "loss": 0.4148, "step": 1539 }, { "epoch": 0.02676910775434998, "grad_norm": 1.5144887857758924, "learning_rate": 8.916570104287369e-07, "loss": 0.3923, "step": 1540 }, { "epoch": 0.026786490291852805, "grad_norm": 2.140824775542664, "learning_rate": 8.922363847045191e-07, "loss": 0.3958, "step": 1541 }, { "epoch": 0.02680387282935563, "grad_norm": 3.269746050607758, "learning_rate": 8.928157589803012e-07, "loss": 0.3259, "step": 1542 }, { "epoch": 0.026821255366858456, "grad_norm": 2.29325064354783, "learning_rate": 8.933951332560834e-07, "loss": 0.5158, "step": 1543 }, { "epoch": 0.02683863790436128, "grad_norm": 1.7047981106791057, "learning_rate": 8.939745075318655e-07, "loss": 0.4058, "step": 1544 }, { "epoch": 0.026856020441864102, "grad_norm": 2.319323098467212, "learning_rate": 8.945538818076477e-07, "loss": 0.461, "step": 1545 }, { "epoch": 0.02687340297936693, "grad_norm": 2.5305445310908805, "learning_rate": 8.951332560834298e-07, "loss": 0.4097, "step": 1546 }, { "epoch": 0.026890785516869752, "grad_norm": 3.107319533751207, "learning_rate": 8.95712630359212e-07, "loss": 0.4493, "step": 1547 }, { "epoch": 0.02690816805437258, "grad_norm": 2.1187042105057965, "learning_rate": 8.962920046349942e-07, "loss": 0.4064, "step": 1548 }, { "epoch": 0.026925550591875402, "grad_norm": 3.5371653114814285, "learning_rate": 8.968713789107764e-07, "loss": 0.6829, "step": 1549 }, { "epoch": 0.026942933129378226, "grad_norm": 2.386445596333225, "learning_rate": 8.974507531865585e-07, "loss": 0.6421, "step": 1550 }, { "epoch": 0.026960315666881052, "grad_norm": 1.731199955062453, "learning_rate": 8.980301274623406e-07, "loss": 0.3991, "step": 1551 }, { "epoch": 0.026977698204383876, "grad_norm": 2.307515892270619, "learning_rate": 8.986095017381227e-07, "loss": 0.52, "step": 1552 }, { "epoch": 0.0269950807418867, "grad_norm": 2.7616621636947323, "learning_rate": 8.991888760139049e-07, "loss": 0.3531, "step": 1553 }, { "epoch": 0.027012463279389526, "grad_norm": 3.1091460319595163, "learning_rate": 8.99768250289687e-07, "loss": 0.6706, "step": 1554 }, { "epoch": 0.02702984581689235, "grad_norm": 3.261058652110843, "learning_rate": 9.003476245654692e-07, "loss": 0.4019, "step": 1555 }, { "epoch": 0.027047228354395176, "grad_norm": 1.9526459346492757, "learning_rate": 9.009269988412514e-07, "loss": 0.2543, "step": 1556 }, { "epoch": 0.027064610891898, "grad_norm": 1.7871705381565, "learning_rate": 9.015063731170336e-07, "loss": 0.5438, "step": 1557 }, { "epoch": 0.027081993429400823, "grad_norm": 4.051164762410545, "learning_rate": 9.020857473928157e-07, "loss": 0.6123, "step": 1558 }, { "epoch": 0.02709937596690365, "grad_norm": 1.9361721860736125, "learning_rate": 9.026651216685979e-07, "loss": 0.4607, "step": 1559 }, { "epoch": 0.027116758504406473, "grad_norm": 2.113954784253715, "learning_rate": 9.0324449594438e-07, "loss": 0.3019, "step": 1560 }, { "epoch": 0.0271341410419093, "grad_norm": 2.2840582649991683, "learning_rate": 9.038238702201622e-07, "loss": 0.4979, "step": 1561 }, { "epoch": 0.027151523579412123, "grad_norm": 1.4108597464503305, "learning_rate": 9.044032444959443e-07, "loss": 0.4387, "step": 1562 }, { "epoch": 0.027168906116914946, "grad_norm": 2.0266506768115464, "learning_rate": 9.049826187717266e-07, "loss": 0.4386, "step": 1563 }, { "epoch": 0.027186288654417773, "grad_norm": 2.065306134025961, "learning_rate": 9.055619930475086e-07, "loss": 0.4451, "step": 1564 }, { "epoch": 0.027203671191920596, "grad_norm": 3.0944737564457627, "learning_rate": 9.061413673232908e-07, "loss": 0.5118, "step": 1565 }, { "epoch": 0.02722105372942342, "grad_norm": 3.0844995194836637, "learning_rate": 9.067207415990729e-07, "loss": 0.5718, "step": 1566 }, { "epoch": 0.027238436266926246, "grad_norm": 1.4071684025444229, "learning_rate": 9.073001158748551e-07, "loss": 0.7372, "step": 1567 }, { "epoch": 0.02725581880442907, "grad_norm": 2.423630080211442, "learning_rate": 9.078794901506372e-07, "loss": 0.4428, "step": 1568 }, { "epoch": 0.027273201341931896, "grad_norm": 1.402300467239868, "learning_rate": 9.084588644264194e-07, "loss": 0.4109, "step": 1569 }, { "epoch": 0.02729058387943472, "grad_norm": 2.40257243118639, "learning_rate": 9.090382387022015e-07, "loss": 0.5438, "step": 1570 }, { "epoch": 0.027307966416937543, "grad_norm": 2.59350167504996, "learning_rate": 9.096176129779838e-07, "loss": 0.6147, "step": 1571 }, { "epoch": 0.02732534895444037, "grad_norm": 3.833267120992035, "learning_rate": 9.10196987253766e-07, "loss": 0.4917, "step": 1572 }, { "epoch": 0.027342731491943193, "grad_norm": 1.449074979950202, "learning_rate": 9.107763615295481e-07, "loss": 0.5788, "step": 1573 }, { "epoch": 0.02736011402944602, "grad_norm": 1.6222272452220656, "learning_rate": 9.113557358053302e-07, "loss": 0.4493, "step": 1574 }, { "epoch": 0.027377496566948843, "grad_norm": 2.079206334765193, "learning_rate": 9.119351100811124e-07, "loss": 0.5487, "step": 1575 }, { "epoch": 0.027394879104451667, "grad_norm": 2.334461146836335, "learning_rate": 9.125144843568944e-07, "loss": 0.4359, "step": 1576 }, { "epoch": 0.027412261641954493, "grad_norm": 2.3591018696691894, "learning_rate": 9.130938586326766e-07, "loss": 0.4119, "step": 1577 }, { "epoch": 0.027429644179457317, "grad_norm": 1.0587515938667693, "learning_rate": 9.136732329084587e-07, "loss": 0.2805, "step": 1578 }, { "epoch": 0.027447026716960143, "grad_norm": 2.397005644807122, "learning_rate": 9.14252607184241e-07, "loss": 0.4494, "step": 1579 }, { "epoch": 0.027464409254462967, "grad_norm": 1.4289244533596162, "learning_rate": 9.148319814600231e-07, "loss": 0.3701, "step": 1580 }, { "epoch": 0.02748179179196579, "grad_norm": 2.0640059197240177, "learning_rate": 9.154113557358053e-07, "loss": 0.5191, "step": 1581 }, { "epoch": 0.027499174329468617, "grad_norm": 1.6870145305031803, "learning_rate": 9.159907300115875e-07, "loss": 0.2134, "step": 1582 }, { "epoch": 0.02751655686697144, "grad_norm": 2.700857973246589, "learning_rate": 9.165701042873696e-07, "loss": 0.5863, "step": 1583 }, { "epoch": 0.027533939404474263, "grad_norm": 1.5764497339611607, "learning_rate": 9.171494785631518e-07, "loss": 0.352, "step": 1584 }, { "epoch": 0.02755132194197709, "grad_norm": 2.7034269375055175, "learning_rate": 9.177288528389339e-07, "loss": 0.3438, "step": 1585 }, { "epoch": 0.027568704479479914, "grad_norm": 2.48146829243277, "learning_rate": 9.183082271147162e-07, "loss": 0.2441, "step": 1586 }, { "epoch": 0.02758608701698274, "grad_norm": 2.4289932956265856, "learning_rate": 9.188876013904983e-07, "loss": 0.5837, "step": 1587 }, { "epoch": 0.027603469554485564, "grad_norm": 5.08249344190361, "learning_rate": 9.194669756662804e-07, "loss": 0.5056, "step": 1588 }, { "epoch": 0.027620852091988387, "grad_norm": 1.67429083780778, "learning_rate": 9.200463499420625e-07, "loss": 0.401, "step": 1589 }, { "epoch": 0.027638234629491214, "grad_norm": 2.7560829728721807, "learning_rate": 9.206257242178447e-07, "loss": 0.4689, "step": 1590 }, { "epoch": 0.027655617166994037, "grad_norm": 2.1971992440181736, "learning_rate": 9.212050984936268e-07, "loss": 0.5428, "step": 1591 }, { "epoch": 0.027672999704496864, "grad_norm": 2.059422256951946, "learning_rate": 9.21784472769409e-07, "loss": 0.7108, "step": 1592 }, { "epoch": 0.027690382241999687, "grad_norm": 1.4576719903131583, "learning_rate": 9.223638470451911e-07, "loss": 0.4189, "step": 1593 }, { "epoch": 0.02770776477950251, "grad_norm": 2.8648266801566473, "learning_rate": 9.229432213209734e-07, "loss": 0.3818, "step": 1594 }, { "epoch": 0.027725147317005337, "grad_norm": 2.4252294561125907, "learning_rate": 9.235225955967555e-07, "loss": 0.4695, "step": 1595 }, { "epoch": 0.02774252985450816, "grad_norm": 1.9277790521192286, "learning_rate": 9.241019698725377e-07, "loss": 0.4114, "step": 1596 }, { "epoch": 0.027759912392010987, "grad_norm": 2.574460440487886, "learning_rate": 9.246813441483198e-07, "loss": 0.6827, "step": 1597 }, { "epoch": 0.02777729492951381, "grad_norm": 1.8752752506604535, "learning_rate": 9.25260718424102e-07, "loss": 0.3085, "step": 1598 }, { "epoch": 0.027794677467016634, "grad_norm": 2.388185534925436, "learning_rate": 9.258400926998841e-07, "loss": 0.4395, "step": 1599 }, { "epoch": 0.02781206000451946, "grad_norm": 2.2830541704691805, "learning_rate": 9.264194669756662e-07, "loss": 0.402, "step": 1600 }, { "epoch": 0.027829442542022284, "grad_norm": 2.5883714054561255, "learning_rate": 9.269988412514484e-07, "loss": 0.4696, "step": 1601 }, { "epoch": 0.027846825079525107, "grad_norm": 1.5803413979018843, "learning_rate": 9.275782155272306e-07, "loss": 0.4539, "step": 1602 }, { "epoch": 0.027864207617027934, "grad_norm": 2.4081033724689522, "learning_rate": 9.281575898030127e-07, "loss": 0.3147, "step": 1603 }, { "epoch": 0.027881590154530757, "grad_norm": 5.459778870102592, "learning_rate": 9.287369640787949e-07, "loss": 0.7094, "step": 1604 }, { "epoch": 0.027898972692033584, "grad_norm": 1.845428112504111, "learning_rate": 9.29316338354577e-07, "loss": 0.3108, "step": 1605 }, { "epoch": 0.027916355229536408, "grad_norm": 1.5779412053742883, "learning_rate": 9.298957126303592e-07, "loss": 0.6292, "step": 1606 }, { "epoch": 0.02793373776703923, "grad_norm": 1.8105981177043382, "learning_rate": 9.304750869061413e-07, "loss": 0.5093, "step": 1607 }, { "epoch": 0.027951120304542058, "grad_norm": 1.654014493146659, "learning_rate": 9.310544611819235e-07, "loss": 0.3368, "step": 1608 }, { "epoch": 0.02796850284204488, "grad_norm": 2.0317966115143733, "learning_rate": 9.316338354577057e-07, "loss": 0.5474, "step": 1609 }, { "epoch": 0.027985885379547708, "grad_norm": 1.5070403775212478, "learning_rate": 9.322132097334879e-07, "loss": 0.8944, "step": 1610 }, { "epoch": 0.02800326791705053, "grad_norm": 3.5924295624551075, "learning_rate": 9.3279258400927e-07, "loss": 0.3495, "step": 1611 }, { "epoch": 0.028020650454553354, "grad_norm": 2.723257044439087, "learning_rate": 9.333719582850521e-07, "loss": 0.6617, "step": 1612 }, { "epoch": 0.02803803299205618, "grad_norm": 1.7995509751550736, "learning_rate": 9.339513325608342e-07, "loss": 0.6978, "step": 1613 }, { "epoch": 0.028055415529559005, "grad_norm": 1.4733415536714953, "learning_rate": 9.345307068366164e-07, "loss": 0.4208, "step": 1614 }, { "epoch": 0.02807279806706183, "grad_norm": 2.7355586651089236, "learning_rate": 9.351100811123985e-07, "loss": 0.3841, "step": 1615 }, { "epoch": 0.028090180604564655, "grad_norm": 2.836568902095401, "learning_rate": 9.356894553881807e-07, "loss": 0.3991, "step": 1616 }, { "epoch": 0.028107563142067478, "grad_norm": 1.6810453009378616, "learning_rate": 9.362688296639629e-07, "loss": 0.5238, "step": 1617 }, { "epoch": 0.028124945679570305, "grad_norm": 1.4133475874070938, "learning_rate": 9.368482039397451e-07, "loss": 0.4232, "step": 1618 }, { "epoch": 0.028142328217073128, "grad_norm": 2.7282897364807894, "learning_rate": 9.374275782155272e-07, "loss": 0.6843, "step": 1619 }, { "epoch": 0.02815971075457595, "grad_norm": 1.4084424790124863, "learning_rate": 9.380069524913094e-07, "loss": 0.2097, "step": 1620 }, { "epoch": 0.028177093292078778, "grad_norm": 4.553444679933671, "learning_rate": 9.385863267670915e-07, "loss": 1.2442, "step": 1621 }, { "epoch": 0.0281944758295816, "grad_norm": 1.4789755684650183, "learning_rate": 9.391657010428737e-07, "loss": 0.3314, "step": 1622 }, { "epoch": 0.028211858367084428, "grad_norm": 2.3316108952522545, "learning_rate": 9.397450753186558e-07, "loss": 0.3649, "step": 1623 }, { "epoch": 0.02822924090458725, "grad_norm": 1.3087934464400381, "learning_rate": 9.40324449594438e-07, "loss": 0.4267, "step": 1624 }, { "epoch": 0.028246623442090075, "grad_norm": 2.6687318047669297, "learning_rate": 9.409038238702201e-07, "loss": 0.4791, "step": 1625 }, { "epoch": 0.0282640059795929, "grad_norm": 1.267826840025876, "learning_rate": 9.414831981460023e-07, "loss": 0.2872, "step": 1626 }, { "epoch": 0.028281388517095725, "grad_norm": 1.8884380502866485, "learning_rate": 9.420625724217844e-07, "loss": 0.3816, "step": 1627 }, { "epoch": 0.02829877105459855, "grad_norm": 1.6017737342547027, "learning_rate": 9.426419466975666e-07, "loss": 0.5549, "step": 1628 }, { "epoch": 0.028316153592101375, "grad_norm": 1.6113180185835372, "learning_rate": 9.432213209733487e-07, "loss": 0.5195, "step": 1629 }, { "epoch": 0.0283335361296042, "grad_norm": 1.5791331140272964, "learning_rate": 9.438006952491309e-07, "loss": 0.5098, "step": 1630 }, { "epoch": 0.028350918667107025, "grad_norm": 3.4229511456799875, "learning_rate": 9.44380069524913e-07, "loss": 0.3611, "step": 1631 }, { "epoch": 0.02836830120460985, "grad_norm": 1.8033177666212081, "learning_rate": 9.449594438006953e-07, "loss": 0.23, "step": 1632 }, { "epoch": 0.028385683742112675, "grad_norm": 1.938379674519892, "learning_rate": 9.455388180764774e-07, "loss": 0.5787, "step": 1633 }, { "epoch": 0.0284030662796155, "grad_norm": 1.7607198890335587, "learning_rate": 9.461181923522596e-07, "loss": 0.2065, "step": 1634 }, { "epoch": 0.028420448817118322, "grad_norm": 1.8946027361975284, "learning_rate": 9.466975666280417e-07, "loss": 0.4183, "step": 1635 }, { "epoch": 0.02843783135462115, "grad_norm": 1.5249920065923648, "learning_rate": 9.472769409038238e-07, "loss": 0.2903, "step": 1636 }, { "epoch": 0.028455213892123972, "grad_norm": 3.0547447384103235, "learning_rate": 9.478563151796059e-07, "loss": 0.4411, "step": 1637 }, { "epoch": 0.028472596429626795, "grad_norm": 1.9994307032582, "learning_rate": 9.484356894553881e-07, "loss": 0.4472, "step": 1638 }, { "epoch": 0.028489978967129622, "grad_norm": 3.4769062390580237, "learning_rate": 9.490150637311702e-07, "loss": 0.4482, "step": 1639 }, { "epoch": 0.028507361504632445, "grad_norm": 1.9233139300298174, "learning_rate": 9.495944380069525e-07, "loss": 0.3976, "step": 1640 }, { "epoch": 0.028524744042135272, "grad_norm": 2.383643040314896, "learning_rate": 9.501738122827346e-07, "loss": 0.4247, "step": 1641 }, { "epoch": 0.028542126579638095, "grad_norm": 1.9954714937928049, "learning_rate": 9.507531865585168e-07, "loss": 0.5233, "step": 1642 }, { "epoch": 0.02855950911714092, "grad_norm": 1.9166412540928346, "learning_rate": 9.513325608342989e-07, "loss": 0.314, "step": 1643 }, { "epoch": 0.028576891654643746, "grad_norm": 1.360022425921209, "learning_rate": 9.519119351100811e-07, "loss": 0.371, "step": 1644 }, { "epoch": 0.02859427419214657, "grad_norm": 1.948041747208155, "learning_rate": 9.524913093858632e-07, "loss": 0.5323, "step": 1645 }, { "epoch": 0.028611656729649396, "grad_norm": 2.038693532357276, "learning_rate": 9.530706836616454e-07, "loss": 0.2456, "step": 1646 }, { "epoch": 0.02862903926715222, "grad_norm": 1.4903952319170721, "learning_rate": 9.536500579374276e-07, "loss": 0.2781, "step": 1647 }, { "epoch": 0.028646421804655042, "grad_norm": 1.6992242780307711, "learning_rate": 9.542294322132098e-07, "loss": 0.3211, "step": 1648 }, { "epoch": 0.02866380434215787, "grad_norm": 2.414725490962162, "learning_rate": 9.548088064889918e-07, "loss": 0.65, "step": 1649 }, { "epoch": 0.028681186879660692, "grad_norm": 1.5983655237653516, "learning_rate": 9.55388180764774e-07, "loss": 0.5386, "step": 1650 }, { "epoch": 0.02869856941716352, "grad_norm": 3.088322499235716, "learning_rate": 9.559675550405561e-07, "loss": 0.5161, "step": 1651 }, { "epoch": 0.028715951954666342, "grad_norm": 2.5268224883126926, "learning_rate": 9.565469293163384e-07, "loss": 0.4166, "step": 1652 }, { "epoch": 0.028733334492169166, "grad_norm": 1.7162809562393526, "learning_rate": 9.571263035921204e-07, "loss": 0.4119, "step": 1653 }, { "epoch": 0.028750717029671993, "grad_norm": 2.989513491185738, "learning_rate": 9.577056778679025e-07, "loss": 0.4424, "step": 1654 }, { "epoch": 0.028768099567174816, "grad_norm": 2.83555311334745, "learning_rate": 9.582850521436847e-07, "loss": 0.5891, "step": 1655 }, { "epoch": 0.02878548210467764, "grad_norm": 2.509434928679094, "learning_rate": 9.58864426419467e-07, "loss": 0.4813, "step": 1656 }, { "epoch": 0.028802864642180466, "grad_norm": 1.7209153972324158, "learning_rate": 9.59443800695249e-07, "loss": 0.6235, "step": 1657 }, { "epoch": 0.02882024717968329, "grad_norm": 2.031005321583534, "learning_rate": 9.600231749710313e-07, "loss": 0.6966, "step": 1658 }, { "epoch": 0.028837629717186116, "grad_norm": 1.809993080055608, "learning_rate": 9.606025492468133e-07, "loss": 0.3754, "step": 1659 }, { "epoch": 0.02885501225468894, "grad_norm": 1.635845916748455, "learning_rate": 9.611819235225956e-07, "loss": 0.4044, "step": 1660 }, { "epoch": 0.028872394792191763, "grad_norm": 2.072455714228672, "learning_rate": 9.617612977983776e-07, "loss": 0.4362, "step": 1661 }, { "epoch": 0.02888977732969459, "grad_norm": 2.645548602932533, "learning_rate": 9.623406720741599e-07, "loss": 0.4731, "step": 1662 }, { "epoch": 0.028907159867197413, "grad_norm": 2.0484523143642916, "learning_rate": 9.629200463499421e-07, "loss": 0.2991, "step": 1663 }, { "epoch": 0.02892454240470024, "grad_norm": 2.194237495461399, "learning_rate": 9.634994206257242e-07, "loss": 0.409, "step": 1664 }, { "epoch": 0.028941924942203063, "grad_norm": 1.8146026599868386, "learning_rate": 9.640787949015064e-07, "loss": 0.4507, "step": 1665 }, { "epoch": 0.028959307479705886, "grad_norm": 3.9748315842461346, "learning_rate": 9.646581691772885e-07, "loss": 0.3908, "step": 1666 }, { "epoch": 0.028976690017208713, "grad_norm": 1.6826331663774545, "learning_rate": 9.652375434530705e-07, "loss": 0.3476, "step": 1667 }, { "epoch": 0.028994072554711536, "grad_norm": 1.9128754545392106, "learning_rate": 9.658169177288528e-07, "loss": 0.6531, "step": 1668 }, { "epoch": 0.029011455092214363, "grad_norm": 1.9769714489314083, "learning_rate": 9.663962920046348e-07, "loss": 0.4196, "step": 1669 }, { "epoch": 0.029028837629717186, "grad_norm": 2.3231831005962094, "learning_rate": 9.66975666280417e-07, "loss": 0.4728, "step": 1670 }, { "epoch": 0.02904622016722001, "grad_norm": 1.3746651889380013, "learning_rate": 9.675550405561993e-07, "loss": 0.6325, "step": 1671 }, { "epoch": 0.029063602704722837, "grad_norm": 1.7521603618904462, "learning_rate": 9.681344148319814e-07, "loss": 0.2515, "step": 1672 }, { "epoch": 0.02908098524222566, "grad_norm": 3.275683326896224, "learning_rate": 9.687137891077636e-07, "loss": 0.8907, "step": 1673 }, { "epoch": 0.029098367779728483, "grad_norm": 3.1148787534080546, "learning_rate": 9.692931633835457e-07, "loss": 0.4806, "step": 1674 }, { "epoch": 0.02911575031723131, "grad_norm": 1.310262046803779, "learning_rate": 9.69872537659328e-07, "loss": 0.2484, "step": 1675 }, { "epoch": 0.029133132854734133, "grad_norm": 2.4026342657088167, "learning_rate": 9.7045191193511e-07, "loss": 0.8564, "step": 1676 }, { "epoch": 0.02915051539223696, "grad_norm": 2.3350725156296037, "learning_rate": 9.710312862108922e-07, "loss": 0.5317, "step": 1677 }, { "epoch": 0.029167897929739783, "grad_norm": 2.7845497365192653, "learning_rate": 9.716106604866745e-07, "loss": 0.6154, "step": 1678 }, { "epoch": 0.029185280467242607, "grad_norm": 1.7481455506384709, "learning_rate": 9.721900347624565e-07, "loss": 0.4468, "step": 1679 }, { "epoch": 0.029202663004745433, "grad_norm": 1.692953014930021, "learning_rate": 9.727694090382386e-07, "loss": 0.3996, "step": 1680 }, { "epoch": 0.029220045542248257, "grad_norm": 3.254013260753541, "learning_rate": 9.733487833140208e-07, "loss": 0.5237, "step": 1681 }, { "epoch": 0.029237428079751084, "grad_norm": 1.6688077016652236, "learning_rate": 9.739281575898029e-07, "loss": 0.4828, "step": 1682 }, { "epoch": 0.029254810617253907, "grad_norm": 1.5960926566311964, "learning_rate": 9.745075318655851e-07, "loss": 0.4104, "step": 1683 }, { "epoch": 0.02927219315475673, "grad_norm": 2.7016398201289396, "learning_rate": 9.750869061413672e-07, "loss": 0.3686, "step": 1684 }, { "epoch": 0.029289575692259557, "grad_norm": 3.568609107888037, "learning_rate": 9.756662804171494e-07, "loss": 0.7784, "step": 1685 }, { "epoch": 0.02930695822976238, "grad_norm": 4.5173221313175125, "learning_rate": 9.762456546929317e-07, "loss": 1.2345, "step": 1686 }, { "epoch": 0.029324340767265207, "grad_norm": 2.9328069689034026, "learning_rate": 9.768250289687137e-07, "loss": 0.4149, "step": 1687 }, { "epoch": 0.02934172330476803, "grad_norm": 1.9197229817207069, "learning_rate": 9.77404403244496e-07, "loss": 0.3552, "step": 1688 }, { "epoch": 0.029359105842270854, "grad_norm": 2.1124785880371637, "learning_rate": 9.77983777520278e-07, "loss": 0.6864, "step": 1689 }, { "epoch": 0.02937648837977368, "grad_norm": 2.8717117022375493, "learning_rate": 9.785631517960603e-07, "loss": 0.4099, "step": 1690 }, { "epoch": 0.029393870917276504, "grad_norm": 2.6488955302855706, "learning_rate": 9.791425260718423e-07, "loss": 0.5075, "step": 1691 }, { "epoch": 0.029411253454779327, "grad_norm": 2.0536659498215237, "learning_rate": 9.797219003476244e-07, "loss": 0.3716, "step": 1692 }, { "epoch": 0.029428635992282154, "grad_norm": 1.777475157044106, "learning_rate": 9.803012746234066e-07, "loss": 0.2793, "step": 1693 }, { "epoch": 0.029446018529784977, "grad_norm": 1.9591800836508737, "learning_rate": 9.80880648899189e-07, "loss": 0.4443, "step": 1694 }, { "epoch": 0.029463401067287804, "grad_norm": 2.207476295844589, "learning_rate": 9.81460023174971e-07, "loss": 0.5505, "step": 1695 }, { "epoch": 0.029480783604790627, "grad_norm": 2.2086912109498393, "learning_rate": 9.820393974507532e-07, "loss": 0.2552, "step": 1696 }, { "epoch": 0.02949816614229345, "grad_norm": 3.826507344968692, "learning_rate": 9.826187717265352e-07, "loss": 0.5107, "step": 1697 }, { "epoch": 0.029515548679796277, "grad_norm": 4.189499929109162, "learning_rate": 9.831981460023175e-07, "loss": 0.482, "step": 1698 }, { "epoch": 0.0295329312172991, "grad_norm": 1.5364037958698276, "learning_rate": 9.837775202780995e-07, "loss": 0.5044, "step": 1699 }, { "epoch": 0.029550313754801927, "grad_norm": 2.326335771529513, "learning_rate": 9.843568945538818e-07, "loss": 0.3962, "step": 1700 }, { "epoch": 0.02956769629230475, "grad_norm": 1.297642032561898, "learning_rate": 9.84936268829664e-07, "loss": 0.288, "step": 1701 }, { "epoch": 0.029585078829807574, "grad_norm": 1.4232611483618443, "learning_rate": 9.85515643105446e-07, "loss": 0.3594, "step": 1702 }, { "epoch": 0.0296024613673104, "grad_norm": 2.141428266001483, "learning_rate": 9.860950173812281e-07, "loss": 0.23, "step": 1703 }, { "epoch": 0.029619843904813224, "grad_norm": 1.4893906683930802, "learning_rate": 9.866743916570104e-07, "loss": 0.3726, "step": 1704 }, { "epoch": 0.02963722644231605, "grad_norm": 1.8031498733969211, "learning_rate": 9.872537659327924e-07, "loss": 0.2417, "step": 1705 }, { "epoch": 0.029654608979818874, "grad_norm": 1.5848613260079454, "learning_rate": 9.878331402085747e-07, "loss": 0.3113, "step": 1706 }, { "epoch": 0.029671991517321698, "grad_norm": 2.9238075973964355, "learning_rate": 9.884125144843567e-07, "loss": 0.5278, "step": 1707 }, { "epoch": 0.029689374054824524, "grad_norm": 1.1927228260769849, "learning_rate": 9.88991888760139e-07, "loss": 0.5787, "step": 1708 }, { "epoch": 0.029706756592327348, "grad_norm": 1.3834663380750198, "learning_rate": 9.895712630359213e-07, "loss": 0.5272, "step": 1709 }, { "epoch": 0.02972413912983017, "grad_norm": 5.615218321019927, "learning_rate": 9.901506373117033e-07, "loss": 0.3459, "step": 1710 }, { "epoch": 0.029741521667332998, "grad_norm": 2.135822644064355, "learning_rate": 9.907300115874856e-07, "loss": 0.5299, "step": 1711 }, { "epoch": 0.02975890420483582, "grad_norm": 4.829495394464055, "learning_rate": 9.913093858632676e-07, "loss": 0.3358, "step": 1712 }, { "epoch": 0.029776286742338648, "grad_norm": 1.8175674739070258, "learning_rate": 9.918887601390499e-07, "loss": 0.5561, "step": 1713 }, { "epoch": 0.02979366927984147, "grad_norm": 1.8879191289715302, "learning_rate": 9.92468134414832e-07, "loss": 0.6419, "step": 1714 }, { "epoch": 0.029811051817344295, "grad_norm": 1.965555147767448, "learning_rate": 9.93047508690614e-07, "loss": 0.7336, "step": 1715 }, { "epoch": 0.02982843435484712, "grad_norm": 1.7333628989586858, "learning_rate": 9.936268829663962e-07, "loss": 0.3476, "step": 1716 }, { "epoch": 0.029845816892349945, "grad_norm": 2.277745657393095, "learning_rate": 9.942062572421785e-07, "loss": 0.7678, "step": 1717 }, { "epoch": 0.02986319942985277, "grad_norm": 2.276831137813227, "learning_rate": 9.947856315179605e-07, "loss": 0.6539, "step": 1718 }, { "epoch": 0.029880581967355595, "grad_norm": 1.7553781535673003, "learning_rate": 9.953650057937428e-07, "loss": 0.3993, "step": 1719 }, { "epoch": 0.029897964504858418, "grad_norm": 2.0741519548374034, "learning_rate": 9.959443800695248e-07, "loss": 0.4013, "step": 1720 }, { "epoch": 0.029915347042361245, "grad_norm": 2.161637649857942, "learning_rate": 9.96523754345307e-07, "loss": 0.3563, "step": 1721 }, { "epoch": 0.029932729579864068, "grad_norm": 2.892087724645714, "learning_rate": 9.97103128621089e-07, "loss": 0.229, "step": 1722 }, { "epoch": 0.029950112117366895, "grad_norm": 1.9984687183131449, "learning_rate": 9.976825028968714e-07, "loss": 0.4261, "step": 1723 }, { "epoch": 0.029967494654869718, "grad_norm": 1.887748029345219, "learning_rate": 9.982618771726536e-07, "loss": 0.4162, "step": 1724 }, { "epoch": 0.02998487719237254, "grad_norm": 4.885258168528621, "learning_rate": 9.988412514484357e-07, "loss": 0.5938, "step": 1725 }, { "epoch": 0.03000225972987537, "grad_norm": 2.282729468271636, "learning_rate": 9.99420625724218e-07, "loss": 0.4555, "step": 1726 }, { "epoch": 0.03001964226737819, "grad_norm": 1.5937347662983752, "learning_rate": 1e-06, "loss": 0.3278, "step": 1727 }, { "epoch": 0.030037024804881015, "grad_norm": 5.021679672990689, "learning_rate": 9.999999992076148e-07, "loss": 0.6185, "step": 1728 }, { "epoch": 0.030054407342383842, "grad_norm": 2.4345056391483224, "learning_rate": 9.999999968304594e-07, "loss": 0.4709, "step": 1729 }, { "epoch": 0.030071789879886665, "grad_norm": 2.753404716627335, "learning_rate": 9.999999928685335e-07, "loss": 0.6064, "step": 1730 }, { "epoch": 0.030089172417389492, "grad_norm": 2.300705897509352, "learning_rate": 9.999999873218374e-07, "loss": 0.3592, "step": 1731 }, { "epoch": 0.030106554954892315, "grad_norm": 2.272231790004674, "learning_rate": 9.999999801903712e-07, "loss": 0.4014, "step": 1732 }, { "epoch": 0.03012393749239514, "grad_norm": 1.8869848908642686, "learning_rate": 9.999999714741345e-07, "loss": 0.5159, "step": 1733 }, { "epoch": 0.030141320029897965, "grad_norm": 2.8014674273174824, "learning_rate": 9.999999611731275e-07, "loss": 0.4631, "step": 1734 }, { "epoch": 0.03015870256740079, "grad_norm": 3.4166140708331723, "learning_rate": 9.999999492873505e-07, "loss": 0.3779, "step": 1735 }, { "epoch": 0.030176085104903615, "grad_norm": 2.6939151743780254, "learning_rate": 9.999999358168034e-07, "loss": 0.3607, "step": 1736 }, { "epoch": 0.03019346764240644, "grad_norm": 1.9290862378772573, "learning_rate": 9.99999920761486e-07, "loss": 0.309, "step": 1737 }, { "epoch": 0.030210850179909262, "grad_norm": 2.069141630658031, "learning_rate": 9.999999041213987e-07, "loss": 0.3921, "step": 1738 }, { "epoch": 0.03022823271741209, "grad_norm": 2.1060125832683694, "learning_rate": 9.999998858965412e-07, "loss": 0.5425, "step": 1739 }, { "epoch": 0.030245615254914912, "grad_norm": 2.883122605315793, "learning_rate": 9.99999866086914e-07, "loss": 0.5834, "step": 1740 }, { "epoch": 0.030262997792417735, "grad_norm": 1.8674354029276707, "learning_rate": 9.999998446925166e-07, "loss": 0.4547, "step": 1741 }, { "epoch": 0.030280380329920562, "grad_norm": 1.3297750861497937, "learning_rate": 9.999998217133496e-07, "loss": 0.5116, "step": 1742 }, { "epoch": 0.030297762867423386, "grad_norm": 2.366828383920913, "learning_rate": 9.999997971494128e-07, "loss": 0.4652, "step": 1743 }, { "epoch": 0.030315145404926212, "grad_norm": 1.7511834610703552, "learning_rate": 9.999997710007063e-07, "loss": 0.2644, "step": 1744 }, { "epoch": 0.030332527942429036, "grad_norm": 1.6905370576052001, "learning_rate": 9.999997432672302e-07, "loss": 0.4459, "step": 1745 }, { "epoch": 0.03034991047993186, "grad_norm": 1.547195378521068, "learning_rate": 9.999997139489847e-07, "loss": 0.4204, "step": 1746 }, { "epoch": 0.030367293017434686, "grad_norm": 2.6317209362674094, "learning_rate": 9.999996830459695e-07, "loss": 0.4085, "step": 1747 }, { "epoch": 0.03038467555493751, "grad_norm": 2.979830857136984, "learning_rate": 9.999996505581854e-07, "loss": 0.8777, "step": 1748 }, { "epoch": 0.030402058092440336, "grad_norm": 1.9067803729004473, "learning_rate": 9.999996164856318e-07, "loss": 0.4188, "step": 1749 }, { "epoch": 0.03041944062994316, "grad_norm": 1.563788178443891, "learning_rate": 9.99999580828309e-07, "loss": 0.4581, "step": 1750 }, { "epoch": 0.030436823167445982, "grad_norm": 2.0219606557452092, "learning_rate": 9.999995435862174e-07, "loss": 0.4606, "step": 1751 }, { "epoch": 0.03045420570494881, "grad_norm": 1.6900568157518696, "learning_rate": 9.99999504759357e-07, "loss": 0.294, "step": 1752 }, { "epoch": 0.030471588242451633, "grad_norm": 2.981620857178194, "learning_rate": 9.999994643477277e-07, "loss": 0.6591, "step": 1753 }, { "epoch": 0.03048897077995446, "grad_norm": 1.3470082126863587, "learning_rate": 9.999994223513298e-07, "loss": 0.3951, "step": 1754 }, { "epoch": 0.030506353317457283, "grad_norm": 1.895549659281231, "learning_rate": 9.999993787701635e-07, "loss": 0.4052, "step": 1755 }, { "epoch": 0.030523735854960106, "grad_norm": 2.350730933078783, "learning_rate": 9.999993336042288e-07, "loss": 0.3538, "step": 1756 }, { "epoch": 0.030541118392462933, "grad_norm": 1.7189493158491997, "learning_rate": 9.999992868535258e-07, "loss": 0.3955, "step": 1757 }, { "epoch": 0.030558500929965756, "grad_norm": 1.7538837901163569, "learning_rate": 9.999992385180548e-07, "loss": 0.3606, "step": 1758 }, { "epoch": 0.03057588346746858, "grad_norm": 3.468039893481297, "learning_rate": 9.99999188597816e-07, "loss": 0.6876, "step": 1759 }, { "epoch": 0.030593266004971406, "grad_norm": 4.925204588679859, "learning_rate": 9.999991370928093e-07, "loss": 0.4206, "step": 1760 }, { "epoch": 0.03061064854247423, "grad_norm": 2.8547183127337123, "learning_rate": 9.999990840030352e-07, "loss": 0.4753, "step": 1761 }, { "epoch": 0.030628031079977056, "grad_norm": 3.3769335137400565, "learning_rate": 9.999990293284935e-07, "loss": 0.3742, "step": 1762 }, { "epoch": 0.03064541361747988, "grad_norm": 1.7041506819598786, "learning_rate": 9.999989730691847e-07, "loss": 0.4299, "step": 1763 }, { "epoch": 0.030662796154982703, "grad_norm": 3.309374224967688, "learning_rate": 9.999989152251087e-07, "loss": 0.3094, "step": 1764 }, { "epoch": 0.03068017869248553, "grad_norm": 1.8727308453429714, "learning_rate": 9.99998855796266e-07, "loss": 0.7175, "step": 1765 }, { "epoch": 0.030697561229988353, "grad_norm": 1.545947678143189, "learning_rate": 9.999987947826564e-07, "loss": 0.418, "step": 1766 }, { "epoch": 0.03071494376749118, "grad_norm": 3.34396977553175, "learning_rate": 9.999987321842805e-07, "loss": 0.4667, "step": 1767 }, { "epoch": 0.030732326304994003, "grad_norm": 1.3606374116948416, "learning_rate": 9.99998668001138e-07, "loss": 0.384, "step": 1768 }, { "epoch": 0.030749708842496826, "grad_norm": 1.90732830488673, "learning_rate": 9.999986022332296e-07, "loss": 0.402, "step": 1769 }, { "epoch": 0.030767091379999653, "grad_norm": 1.9186715369526408, "learning_rate": 9.999985348805553e-07, "loss": 0.4681, "step": 1770 }, { "epoch": 0.030784473917502476, "grad_norm": 2.3959114514541358, "learning_rate": 9.999984659431155e-07, "loss": 0.7392, "step": 1771 }, { "epoch": 0.030801856455005303, "grad_norm": 2.0446830942190446, "learning_rate": 9.9999839542091e-07, "loss": 0.258, "step": 1772 }, { "epoch": 0.030819238992508127, "grad_norm": 1.798495950286099, "learning_rate": 9.999983233139393e-07, "loss": 0.3732, "step": 1773 }, { "epoch": 0.03083662153001095, "grad_norm": 2.2588957086154275, "learning_rate": 9.999982496222038e-07, "loss": 0.5091, "step": 1774 }, { "epoch": 0.030854004067513777, "grad_norm": 3.2592470004247556, "learning_rate": 9.999981743457033e-07, "loss": 0.337, "step": 1775 }, { "epoch": 0.0308713866050166, "grad_norm": 1.9487062832114268, "learning_rate": 9.999980974844382e-07, "loss": 0.3905, "step": 1776 }, { "epoch": 0.030888769142519423, "grad_norm": 1.9585338720840322, "learning_rate": 9.99998019038409e-07, "loss": 0.4472, "step": 1777 }, { "epoch": 0.03090615168002225, "grad_norm": 1.1577338443374858, "learning_rate": 9.999979390076157e-07, "loss": 0.2441, "step": 1778 }, { "epoch": 0.030923534217525073, "grad_norm": 1.9974517410862989, "learning_rate": 9.999978573920586e-07, "loss": 0.625, "step": 1779 }, { "epoch": 0.0309409167550279, "grad_norm": 3.2371345505691775, "learning_rate": 9.999977741917381e-07, "loss": 0.2681, "step": 1780 }, { "epoch": 0.030958299292530723, "grad_norm": 1.6756202446324937, "learning_rate": 9.99997689406654e-07, "loss": 0.4272, "step": 1781 }, { "epoch": 0.030975681830033547, "grad_norm": 1.977589172903361, "learning_rate": 9.999976030368074e-07, "loss": 0.4352, "step": 1782 }, { "epoch": 0.030993064367536374, "grad_norm": 1.8199762063605731, "learning_rate": 9.999975150821977e-07, "loss": 0.6046, "step": 1783 }, { "epoch": 0.031010446905039197, "grad_norm": 2.1159098034194104, "learning_rate": 9.999974255428255e-07, "loss": 0.3742, "step": 1784 }, { "epoch": 0.031027829442542024, "grad_norm": 2.4037719457023927, "learning_rate": 9.999973344186916e-07, "loss": 0.4751, "step": 1785 }, { "epoch": 0.031045211980044847, "grad_norm": 2.87976588919005, "learning_rate": 9.999972417097954e-07, "loss": 0.4368, "step": 1786 }, { "epoch": 0.03106259451754767, "grad_norm": 1.7451630233689026, "learning_rate": 9.999971474161376e-07, "loss": 0.4387, "step": 1787 }, { "epoch": 0.031079977055050497, "grad_norm": 2.2078819719534604, "learning_rate": 9.99997051537719e-07, "loss": 0.6087, "step": 1788 }, { "epoch": 0.03109735959255332, "grad_norm": 4.789668380116209, "learning_rate": 9.99996954074539e-07, "loss": 0.5836, "step": 1789 }, { "epoch": 0.031114742130056147, "grad_norm": 1.6210557964018546, "learning_rate": 9.999968550265984e-07, "loss": 0.3429, "step": 1790 }, { "epoch": 0.03113212466755897, "grad_norm": 1.778584621214962, "learning_rate": 9.999967543938975e-07, "loss": 0.4842, "step": 1791 }, { "epoch": 0.031149507205061794, "grad_norm": 3.789891001606025, "learning_rate": 9.999966521764366e-07, "loss": 0.4512, "step": 1792 }, { "epoch": 0.03116688974256462, "grad_norm": 1.8867931280509267, "learning_rate": 9.99996548374216e-07, "loss": 0.5255, "step": 1793 }, { "epoch": 0.031184272280067444, "grad_norm": 9.872945709995687, "learning_rate": 9.99996442987236e-07, "loss": 0.4173, "step": 1794 }, { "epoch": 0.031201654817570267, "grad_norm": 1.3694312966018631, "learning_rate": 9.99996336015497e-07, "loss": 0.2058, "step": 1795 }, { "epoch": 0.031219037355073094, "grad_norm": 1.5795898687912098, "learning_rate": 9.999962274589992e-07, "loss": 0.247, "step": 1796 }, { "epoch": 0.031236419892575917, "grad_norm": 1.99008765280739, "learning_rate": 9.99996117317743e-07, "loss": 0.3999, "step": 1797 }, { "epoch": 0.03125380243007874, "grad_norm": 1.9991560520616185, "learning_rate": 9.99996005591729e-07, "loss": 0.6434, "step": 1798 }, { "epoch": 0.03127118496758157, "grad_norm": 1.366169734176102, "learning_rate": 9.99995892280957e-07, "loss": 0.5173, "step": 1799 }, { "epoch": 0.031288567505084394, "grad_norm": 1.5825025125130547, "learning_rate": 9.99995777385428e-07, "loss": 0.3314, "step": 1800 }, { "epoch": 0.03130595004258722, "grad_norm": 1.6168649485378468, "learning_rate": 9.999956609051418e-07, "loss": 0.6479, "step": 1801 }, { "epoch": 0.03132333258009004, "grad_norm": 2.084008899472021, "learning_rate": 9.999955428400992e-07, "loss": 0.7784, "step": 1802 }, { "epoch": 0.031340715117592864, "grad_norm": 1.9562425368605876, "learning_rate": 9.999954231903005e-07, "loss": 0.7689, "step": 1803 }, { "epoch": 0.03135809765509569, "grad_norm": 1.8324422199821597, "learning_rate": 9.99995301955746e-07, "loss": 0.3033, "step": 1804 }, { "epoch": 0.03137548019259852, "grad_norm": 1.4495154776193557, "learning_rate": 9.999951791364358e-07, "loss": 0.2242, "step": 1805 }, { "epoch": 0.03139286273010134, "grad_norm": 2.132252812180006, "learning_rate": 9.999950547323707e-07, "loss": 0.4568, "step": 1806 }, { "epoch": 0.031410245267604164, "grad_norm": 1.9207444155893978, "learning_rate": 9.999949287435511e-07, "loss": 0.4198, "step": 1807 }, { "epoch": 0.03142762780510699, "grad_norm": 3.4023604410209067, "learning_rate": 9.999948011699771e-07, "loss": 0.7696, "step": 1808 }, { "epoch": 0.03144501034260981, "grad_norm": 2.9215984350873043, "learning_rate": 9.999946720116491e-07, "loss": 0.8131, "step": 1809 }, { "epoch": 0.03146239288011264, "grad_norm": 1.6941772721593888, "learning_rate": 9.99994541268568e-07, "loss": 0.346, "step": 1810 }, { "epoch": 0.031479775417615465, "grad_norm": 1.4955777729567457, "learning_rate": 9.999944089407336e-07, "loss": 0.6433, "step": 1811 }, { "epoch": 0.03149715795511829, "grad_norm": 1.4535037196929155, "learning_rate": 9.99994275028147e-07, "loss": 0.6333, "step": 1812 }, { "epoch": 0.03151454049262111, "grad_norm": 2.3505139653541165, "learning_rate": 9.99994139530808e-07, "loss": 0.4337, "step": 1813 }, { "epoch": 0.031531923030123934, "grad_norm": 1.8692945448933864, "learning_rate": 9.999940024487172e-07, "loss": 0.3296, "step": 1814 }, { "epoch": 0.031549305567626765, "grad_norm": 1.6526105118487753, "learning_rate": 9.99993863781875e-07, "loss": 0.3885, "step": 1815 }, { "epoch": 0.03156668810512959, "grad_norm": 2.1840477642395477, "learning_rate": 9.999937235302822e-07, "loss": 0.2467, "step": 1816 }, { "epoch": 0.03158407064263241, "grad_norm": 2.6048801569720594, "learning_rate": 9.999935816939388e-07, "loss": 0.5469, "step": 1817 }, { "epoch": 0.031601453180135235, "grad_norm": 1.5153413548138397, "learning_rate": 9.999934382728455e-07, "loss": 0.2927, "step": 1818 }, { "epoch": 0.03161883571763806, "grad_norm": 1.6506973869872552, "learning_rate": 9.999932932670024e-07, "loss": 0.6377, "step": 1819 }, { "epoch": 0.03163621825514089, "grad_norm": 2.666480479708552, "learning_rate": 9.999931466764107e-07, "loss": 0.7393, "step": 1820 }, { "epoch": 0.03165360079264371, "grad_norm": 2.1697117832374033, "learning_rate": 9.999929985010701e-07, "loss": 0.3883, "step": 1821 }, { "epoch": 0.031670983330146535, "grad_norm": 2.176012086997836, "learning_rate": 9.999928487409815e-07, "loss": 0.4779, "step": 1822 }, { "epoch": 0.03168836586764936, "grad_norm": 1.5214670221950861, "learning_rate": 9.99992697396145e-07, "loss": 0.4753, "step": 1823 }, { "epoch": 0.03170574840515218, "grad_norm": 1.483430070624695, "learning_rate": 9.999925444665616e-07, "loss": 0.5078, "step": 1824 }, { "epoch": 0.03172313094265501, "grad_norm": 1.8519412247276619, "learning_rate": 9.999923899522315e-07, "loss": 0.4685, "step": 1825 }, { "epoch": 0.031740513480157835, "grad_norm": 1.6938677046830937, "learning_rate": 9.999922338531551e-07, "loss": 0.5452, "step": 1826 }, { "epoch": 0.03175789601766066, "grad_norm": 1.6820153954836656, "learning_rate": 9.99992076169333e-07, "loss": 0.8608, "step": 1827 }, { "epoch": 0.03177527855516348, "grad_norm": 2.4880801640693933, "learning_rate": 9.999919169007658e-07, "loss": 0.5977, "step": 1828 }, { "epoch": 0.031792661092666305, "grad_norm": 1.4763075277942825, "learning_rate": 9.999917560474539e-07, "loss": 0.4436, "step": 1829 }, { "epoch": 0.031810043630169135, "grad_norm": 2.002713952008794, "learning_rate": 9.999915936093977e-07, "loss": 0.354, "step": 1830 }, { "epoch": 0.03182742616767196, "grad_norm": 2.1936937741062827, "learning_rate": 9.999914295865977e-07, "loss": 0.3331, "step": 1831 }, { "epoch": 0.03184480870517478, "grad_norm": 1.364896185971802, "learning_rate": 9.999912639790548e-07, "loss": 0.324, "step": 1832 }, { "epoch": 0.031862191242677605, "grad_norm": 2.55012827740525, "learning_rate": 9.999910967867691e-07, "loss": 0.2911, "step": 1833 }, { "epoch": 0.03187957378018043, "grad_norm": 2.753019978849716, "learning_rate": 9.999909280097414e-07, "loss": 0.631, "step": 1834 }, { "epoch": 0.03189695631768326, "grad_norm": 1.3926833554412303, "learning_rate": 9.999907576479721e-07, "loss": 0.2843, "step": 1835 }, { "epoch": 0.03191433885518608, "grad_norm": 2.5216469652512834, "learning_rate": 9.999905857014619e-07, "loss": 0.4559, "step": 1836 }, { "epoch": 0.031931721392688905, "grad_norm": 1.8873984975541684, "learning_rate": 9.999904121702112e-07, "loss": 0.3483, "step": 1837 }, { "epoch": 0.03194910393019173, "grad_norm": 1.53704143376628, "learning_rate": 9.999902370542203e-07, "loss": 0.5229, "step": 1838 }, { "epoch": 0.03196648646769455, "grad_norm": 2.7753796887167153, "learning_rate": 9.999900603534903e-07, "loss": 0.4611, "step": 1839 }, { "epoch": 0.031983869005197375, "grad_norm": 1.801184425841738, "learning_rate": 9.999898820680213e-07, "loss": 0.6024, "step": 1840 }, { "epoch": 0.032001251542700206, "grad_norm": 1.6552660289777146, "learning_rate": 9.999897021978142e-07, "loss": 0.5017, "step": 1841 }, { "epoch": 0.03201863408020303, "grad_norm": 2.2078654725710476, "learning_rate": 9.999895207428693e-07, "loss": 0.5412, "step": 1842 }, { "epoch": 0.03203601661770585, "grad_norm": 2.9579938250843703, "learning_rate": 9.999893377031874e-07, "loss": 0.3447, "step": 1843 }, { "epoch": 0.032053399155208676, "grad_norm": 1.9857392940056138, "learning_rate": 9.999891530787689e-07, "loss": 0.5277, "step": 1844 }, { "epoch": 0.0320707816927115, "grad_norm": 1.7737800908284505, "learning_rate": 9.999889668696146e-07, "loss": 0.3606, "step": 1845 }, { "epoch": 0.03208816423021433, "grad_norm": 1.8606544296661423, "learning_rate": 9.999887790757248e-07, "loss": 0.4697, "step": 1846 }, { "epoch": 0.03210554676771715, "grad_norm": 2.403306592970143, "learning_rate": 9.999885896971002e-07, "loss": 0.4018, "step": 1847 }, { "epoch": 0.032122929305219976, "grad_norm": 2.114319149449653, "learning_rate": 9.999883987337417e-07, "loss": 0.6372, "step": 1848 }, { "epoch": 0.0321403118427228, "grad_norm": 2.0977687049901292, "learning_rate": 9.999882061856495e-07, "loss": 0.2256, "step": 1849 }, { "epoch": 0.03215769438022562, "grad_norm": 1.6220616133314594, "learning_rate": 9.999880120528242e-07, "loss": 0.3203, "step": 1850 }, { "epoch": 0.03217507691772845, "grad_norm": 2.5972047146589134, "learning_rate": 9.999878163352669e-07, "loss": 0.4404, "step": 1851 }, { "epoch": 0.032192459455231276, "grad_norm": 2.3151067088645183, "learning_rate": 9.999876190329776e-07, "loss": 0.5376, "step": 1852 }, { "epoch": 0.0322098419927341, "grad_norm": 1.8245000041034827, "learning_rate": 9.999874201459572e-07, "loss": 0.4651, "step": 1853 }, { "epoch": 0.03222722453023692, "grad_norm": 2.3511427298448258, "learning_rate": 9.999872196742067e-07, "loss": 0.2453, "step": 1854 }, { "epoch": 0.032244607067739746, "grad_norm": 2.260877678646937, "learning_rate": 9.99987017617726e-07, "loss": 0.5462, "step": 1855 }, { "epoch": 0.032261989605242576, "grad_norm": 2.6417869871385498, "learning_rate": 9.999868139765164e-07, "loss": 0.4022, "step": 1856 }, { "epoch": 0.0322793721427454, "grad_norm": 3.7516148840705914, "learning_rate": 9.999866087505782e-07, "loss": 0.5632, "step": 1857 }, { "epoch": 0.03229675468024822, "grad_norm": 2.3839348471482764, "learning_rate": 9.999864019399123e-07, "loss": 0.4054, "step": 1858 }, { "epoch": 0.032314137217751046, "grad_norm": 2.3177884021207986, "learning_rate": 9.999861935445189e-07, "loss": 0.2748, "step": 1859 }, { "epoch": 0.03233151975525387, "grad_norm": 1.5055287477251837, "learning_rate": 9.99985983564399e-07, "loss": 0.3315, "step": 1860 }, { "epoch": 0.0323489022927567, "grad_norm": 8.329097877081256, "learning_rate": 9.999857719995532e-07, "loss": 0.6974, "step": 1861 }, { "epoch": 0.03236628483025952, "grad_norm": 1.3159912840109986, "learning_rate": 9.999855588499824e-07, "loss": 0.2719, "step": 1862 }, { "epoch": 0.032383667367762346, "grad_norm": 1.5398118591597976, "learning_rate": 9.999853441156869e-07, "loss": 0.3934, "step": 1863 }, { "epoch": 0.03240104990526517, "grad_norm": 1.8781271553626697, "learning_rate": 9.999851277966674e-07, "loss": 0.6257, "step": 1864 }, { "epoch": 0.03241843244276799, "grad_norm": 1.641521114730638, "learning_rate": 9.99984909892925e-07, "loss": 0.4856, "step": 1865 }, { "epoch": 0.03243581498027082, "grad_norm": 1.8370927524647462, "learning_rate": 9.999846904044598e-07, "loss": 0.7475, "step": 1866 }, { "epoch": 0.032453197517773646, "grad_norm": 1.9795560835585797, "learning_rate": 9.99984469331273e-07, "loss": 0.4007, "step": 1867 }, { "epoch": 0.03247058005527647, "grad_norm": 2.752644256459236, "learning_rate": 9.99984246673365e-07, "loss": 0.4329, "step": 1868 }, { "epoch": 0.03248796259277929, "grad_norm": 2.106851473689746, "learning_rate": 9.999840224307366e-07, "loss": 0.3071, "step": 1869 }, { "epoch": 0.032505345130282116, "grad_norm": 5.07681744617198, "learning_rate": 9.999837966033886e-07, "loss": 0.8114, "step": 1870 }, { "epoch": 0.03252272766778495, "grad_norm": 1.9279658444969312, "learning_rate": 9.999835691913216e-07, "loss": 0.5011, "step": 1871 }, { "epoch": 0.03254011020528777, "grad_norm": 1.8452627429966713, "learning_rate": 9.999833401945367e-07, "loss": 0.5077, "step": 1872 }, { "epoch": 0.03255749274279059, "grad_norm": 1.6675379106704187, "learning_rate": 9.999831096130337e-07, "loss": 0.4115, "step": 1873 }, { "epoch": 0.03257487528029342, "grad_norm": 2.2203343873693995, "learning_rate": 9.999828774468141e-07, "loss": 0.3766, "step": 1874 }, { "epoch": 0.03259225781779624, "grad_norm": 4.007561265172142, "learning_rate": 9.999826436958786e-07, "loss": 0.4186, "step": 1875 }, { "epoch": 0.03260964035529906, "grad_norm": 2.051108912093605, "learning_rate": 9.999824083602277e-07, "loss": 0.4793, "step": 1876 }, { "epoch": 0.032627022892801893, "grad_norm": 1.593565636913866, "learning_rate": 9.999821714398625e-07, "loss": 0.5231, "step": 1877 }, { "epoch": 0.03264440543030472, "grad_norm": 1.463082882678598, "learning_rate": 9.99981932934783e-07, "loss": 0.2673, "step": 1878 }, { "epoch": 0.03266178796780754, "grad_norm": 1.9032095343901458, "learning_rate": 9.99981692844991e-07, "loss": 0.4709, "step": 1879 }, { "epoch": 0.03267917050531036, "grad_norm": 2.9870822093942633, "learning_rate": 9.999814511704862e-07, "loss": 0.4125, "step": 1880 }, { "epoch": 0.03269655304281319, "grad_norm": 1.5059000378596166, "learning_rate": 9.9998120791127e-07, "loss": 0.3094, "step": 1881 }, { "epoch": 0.03271393558031602, "grad_norm": 3.81338268859516, "learning_rate": 9.999809630673432e-07, "loss": 0.488, "step": 1882 }, { "epoch": 0.03273131811781884, "grad_norm": 1.5181095455759082, "learning_rate": 9.999807166387063e-07, "loss": 0.2381, "step": 1883 }, { "epoch": 0.032748700655321664, "grad_norm": 1.7651668716235467, "learning_rate": 9.999804686253604e-07, "loss": 0.4916, "step": 1884 }, { "epoch": 0.03276608319282449, "grad_norm": 1.576365421485721, "learning_rate": 9.999802190273059e-07, "loss": 0.421, "step": 1885 }, { "epoch": 0.03278346573032731, "grad_norm": 1.7343680951135891, "learning_rate": 9.999799678445438e-07, "loss": 0.2523, "step": 1886 }, { "epoch": 0.03280084826783014, "grad_norm": 1.740667607843653, "learning_rate": 9.99979715077075e-07, "loss": 0.6074, "step": 1887 }, { "epoch": 0.032818230805332964, "grad_norm": 2.8474266446088023, "learning_rate": 9.999794607249e-07, "loss": 0.4541, "step": 1888 }, { "epoch": 0.03283561334283579, "grad_norm": 2.56680467027579, "learning_rate": 9.999792047880199e-07, "loss": 0.3738, "step": 1889 }, { "epoch": 0.03285299588033861, "grad_norm": 1.5529007942283666, "learning_rate": 9.999789472664352e-07, "loss": 0.2047, "step": 1890 }, { "epoch": 0.032870378417841434, "grad_norm": 1.6045097867241664, "learning_rate": 9.99978688160147e-07, "loss": 0.4991, "step": 1891 }, { "epoch": 0.032887760955344264, "grad_norm": 2.006392799828306, "learning_rate": 9.999784274691562e-07, "loss": 0.5473, "step": 1892 }, { "epoch": 0.03290514349284709, "grad_norm": 2.2882210348604874, "learning_rate": 9.999781651934634e-07, "loss": 0.3038, "step": 1893 }, { "epoch": 0.03292252603034991, "grad_norm": 2.890832121410418, "learning_rate": 9.999779013330694e-07, "loss": 0.4927, "step": 1894 }, { "epoch": 0.032939908567852734, "grad_norm": 1.761690348551217, "learning_rate": 9.999776358879752e-07, "loss": 0.3067, "step": 1895 }, { "epoch": 0.03295729110535556, "grad_norm": 3.471043310200143, "learning_rate": 9.999773688581814e-07, "loss": 0.4917, "step": 1896 }, { "epoch": 0.03297467364285839, "grad_norm": 2.755609245946269, "learning_rate": 9.999771002436892e-07, "loss": 0.2678, "step": 1897 }, { "epoch": 0.03299205618036121, "grad_norm": 3.2987837779866322, "learning_rate": 9.999768300444993e-07, "loss": 0.2875, "step": 1898 }, { "epoch": 0.033009438717864034, "grad_norm": 1.942470407361294, "learning_rate": 9.999765582606124e-07, "loss": 0.3455, "step": 1899 }, { "epoch": 0.03302682125536686, "grad_norm": 1.2294050314073368, "learning_rate": 9.999762848920294e-07, "loss": 0.2953, "step": 1900 }, { "epoch": 0.03304420379286968, "grad_norm": 2.5184774615937826, "learning_rate": 9.999760099387512e-07, "loss": 0.3856, "step": 1901 }, { "epoch": 0.03306158633037251, "grad_norm": 1.9046313829137187, "learning_rate": 9.999757334007788e-07, "loss": 0.3712, "step": 1902 }, { "epoch": 0.033078968867875334, "grad_norm": 2.465033184783663, "learning_rate": 9.999754552781133e-07, "loss": 0.3985, "step": 1903 }, { "epoch": 0.03309635140537816, "grad_norm": 1.9542766999369883, "learning_rate": 9.999751755707547e-07, "loss": 0.6134, "step": 1904 }, { "epoch": 0.03311373394288098, "grad_norm": 1.7219176500821616, "learning_rate": 9.99974894278705e-07, "loss": 0.4355, "step": 1905 }, { "epoch": 0.033131116480383804, "grad_norm": 2.389966018771379, "learning_rate": 9.999746114019642e-07, "loss": 0.3638, "step": 1906 }, { "epoch": 0.033148499017886635, "grad_norm": 4.0639906178991545, "learning_rate": 9.99974326940534e-07, "loss": 0.4787, "step": 1907 }, { "epoch": 0.03316588155538946, "grad_norm": 3.030443480068121, "learning_rate": 9.999740408944143e-07, "loss": 0.3627, "step": 1908 }, { "epoch": 0.03318326409289228, "grad_norm": 3.071526650490223, "learning_rate": 9.999737532636067e-07, "loss": 0.5274, "step": 1909 }, { "epoch": 0.033200646630395104, "grad_norm": 2.3731689308128394, "learning_rate": 9.999734640481122e-07, "loss": 0.2448, "step": 1910 }, { "epoch": 0.03321802916789793, "grad_norm": 1.9284403263886696, "learning_rate": 9.999731732479314e-07, "loss": 0.4118, "step": 1911 }, { "epoch": 0.03323541170540075, "grad_norm": 1.82125150536292, "learning_rate": 9.999728808630652e-07, "loss": 0.5857, "step": 1912 }, { "epoch": 0.03325279424290358, "grad_norm": 1.6806550722008702, "learning_rate": 9.999725868935148e-07, "loss": 0.3858, "step": 1913 }, { "epoch": 0.033270176780406405, "grad_norm": 1.9003615065034005, "learning_rate": 9.999722913392809e-07, "loss": 0.4387, "step": 1914 }, { "epoch": 0.03328755931790923, "grad_norm": 2.760035949976929, "learning_rate": 9.999719942003644e-07, "loss": 0.4362, "step": 1915 }, { "epoch": 0.03330494185541205, "grad_norm": 1.6515452147466503, "learning_rate": 9.999716954767666e-07, "loss": 0.8121, "step": 1916 }, { "epoch": 0.033322324392914875, "grad_norm": 0.9127678661429351, "learning_rate": 9.999713951684878e-07, "loss": 0.3826, "step": 1917 }, { "epoch": 0.033339706930417705, "grad_norm": 1.6332544784112186, "learning_rate": 9.999710932755297e-07, "loss": 0.4029, "step": 1918 }, { "epoch": 0.03335708946792053, "grad_norm": 1.5101397341363076, "learning_rate": 9.999707897978927e-07, "loss": 0.5014, "step": 1919 }, { "epoch": 0.03337447200542335, "grad_norm": 1.5814319179097363, "learning_rate": 9.99970484735578e-07, "loss": 0.5067, "step": 1920 }, { "epoch": 0.033391854542926175, "grad_norm": 2.1873539776045012, "learning_rate": 9.999701780885867e-07, "loss": 0.2677, "step": 1921 }, { "epoch": 0.033409237080429, "grad_norm": 4.294157120549654, "learning_rate": 9.999698698569195e-07, "loss": 0.3894, "step": 1922 }, { "epoch": 0.03342661961793183, "grad_norm": 1.7436288349521865, "learning_rate": 9.999695600405774e-07, "loss": 0.5815, "step": 1923 }, { "epoch": 0.03344400215543465, "grad_norm": 1.5920398431506444, "learning_rate": 9.999692486395615e-07, "loss": 0.3528, "step": 1924 }, { "epoch": 0.033461384692937475, "grad_norm": 2.9108567151486633, "learning_rate": 9.999689356538728e-07, "loss": 0.2946, "step": 1925 }, { "epoch": 0.0334787672304403, "grad_norm": 1.4804883936080395, "learning_rate": 9.99968621083512e-07, "loss": 0.5995, "step": 1926 }, { "epoch": 0.03349614976794312, "grad_norm": 4.49261766884172, "learning_rate": 9.999683049284806e-07, "loss": 0.7038, "step": 1927 }, { "epoch": 0.03351353230544595, "grad_norm": 2.5496664189471603, "learning_rate": 9.999679871887795e-07, "loss": 0.5728, "step": 1928 }, { "epoch": 0.033530914842948775, "grad_norm": 1.1753781292048462, "learning_rate": 9.99967667864409e-07, "loss": 0.3504, "step": 1929 }, { "epoch": 0.0335482973804516, "grad_norm": 1.5252870699497632, "learning_rate": 9.99967346955371e-07, "loss": 0.2666, "step": 1930 }, { "epoch": 0.03356567991795442, "grad_norm": 2.284150248761095, "learning_rate": 9.999670244616662e-07, "loss": 0.6697, "step": 1931 }, { "epoch": 0.033583062455457245, "grad_norm": 2.820667092167353, "learning_rate": 9.999667003832956e-07, "loss": 0.3518, "step": 1932 }, { "epoch": 0.033600444992960075, "grad_norm": 1.7610561794386004, "learning_rate": 9.999663747202602e-07, "loss": 0.7236, "step": 1933 }, { "epoch": 0.0336178275304629, "grad_norm": 2.3429119764237933, "learning_rate": 9.99966047472561e-07, "loss": 0.4083, "step": 1934 }, { "epoch": 0.03363521006796572, "grad_norm": 4.900816606521242, "learning_rate": 9.999657186401993e-07, "loss": 0.4364, "step": 1935 }, { "epoch": 0.033652592605468545, "grad_norm": 2.823244926705334, "learning_rate": 9.999653882231758e-07, "loss": 0.4567, "step": 1936 }, { "epoch": 0.03366997514297137, "grad_norm": 1.8559821209641212, "learning_rate": 9.999650562214915e-07, "loss": 0.3398, "step": 1937 }, { "epoch": 0.0336873576804742, "grad_norm": 1.9778062536269432, "learning_rate": 9.99964722635148e-07, "loss": 0.5431, "step": 1938 }, { "epoch": 0.03370474021797702, "grad_norm": 1.86072393313659, "learning_rate": 9.999643874641457e-07, "loss": 0.4057, "step": 1939 }, { "epoch": 0.033722122755479846, "grad_norm": 2.147630701265942, "learning_rate": 9.99964050708486e-07, "loss": 0.3444, "step": 1940 }, { "epoch": 0.03373950529298267, "grad_norm": 1.8239484874163843, "learning_rate": 9.9996371236817e-07, "loss": 0.5044, "step": 1941 }, { "epoch": 0.03375688783048549, "grad_norm": 1.8028404127815494, "learning_rate": 9.999633724431986e-07, "loss": 0.507, "step": 1942 }, { "epoch": 0.033774270367988315, "grad_norm": 5.0865014628273, "learning_rate": 9.999630309335731e-07, "loss": 0.3561, "step": 1943 }, { "epoch": 0.033791652905491146, "grad_norm": 3.0065255381303913, "learning_rate": 9.999626878392945e-07, "loss": 0.6748, "step": 1944 }, { "epoch": 0.03380903544299397, "grad_norm": 1.84232492416512, "learning_rate": 9.999623431603637e-07, "loss": 0.6411, "step": 1945 }, { "epoch": 0.03382641798049679, "grad_norm": 2.0668947791427295, "learning_rate": 9.99961996896782e-07, "loss": 0.6897, "step": 1946 }, { "epoch": 0.033843800517999616, "grad_norm": 1.561666381659506, "learning_rate": 9.999616490485503e-07, "loss": 0.58, "step": 1947 }, { "epoch": 0.03386118305550244, "grad_norm": 1.9350617797717908, "learning_rate": 9.9996129961567e-07, "loss": 0.2114, "step": 1948 }, { "epoch": 0.03387856559300527, "grad_norm": 2.25800721831074, "learning_rate": 9.99960948598142e-07, "loss": 0.4419, "step": 1949 }, { "epoch": 0.03389594813050809, "grad_norm": 1.7828287459685241, "learning_rate": 9.999605959959674e-07, "loss": 0.5835, "step": 1950 }, { "epoch": 0.033913330668010916, "grad_norm": 1.79221665946805, "learning_rate": 9.999602418091475e-07, "loss": 0.3609, "step": 1951 }, { "epoch": 0.03393071320551374, "grad_norm": 2.4588940889684126, "learning_rate": 9.999598860376831e-07, "loss": 0.3381, "step": 1952 }, { "epoch": 0.03394809574301656, "grad_norm": 2.7892748650076595, "learning_rate": 9.999595286815758e-07, "loss": 0.5267, "step": 1953 }, { "epoch": 0.03396547828051939, "grad_norm": 2.6544463174831336, "learning_rate": 9.999591697408262e-07, "loss": 0.6802, "step": 1954 }, { "epoch": 0.033982860818022216, "grad_norm": 1.9503578515338142, "learning_rate": 9.999588092154358e-07, "loss": 0.4228, "step": 1955 }, { "epoch": 0.03400024335552504, "grad_norm": 2.2113062611983225, "learning_rate": 9.999584471054057e-07, "loss": 0.8207, "step": 1956 }, { "epoch": 0.03401762589302786, "grad_norm": 2.4129604007461354, "learning_rate": 9.99958083410737e-07, "loss": 0.3919, "step": 1957 }, { "epoch": 0.034035008430530686, "grad_norm": 2.3226993745591544, "learning_rate": 9.999577181314306e-07, "loss": 0.4037, "step": 1958 }, { "epoch": 0.034052390968033516, "grad_norm": 3.6861087582881336, "learning_rate": 9.99957351267488e-07, "loss": 0.3679, "step": 1959 }, { "epoch": 0.03406977350553634, "grad_norm": 1.5516859984376652, "learning_rate": 9.999569828189104e-07, "loss": 0.5339, "step": 1960 }, { "epoch": 0.03408715604303916, "grad_norm": 1.7148224040273485, "learning_rate": 9.999566127856986e-07, "loss": 0.3989, "step": 1961 }, { "epoch": 0.034104538580541986, "grad_norm": 2.502410577265408, "learning_rate": 9.999562411678541e-07, "loss": 0.3989, "step": 1962 }, { "epoch": 0.03412192111804481, "grad_norm": 1.9446604351489925, "learning_rate": 9.999558679653781e-07, "loss": 0.371, "step": 1963 }, { "epoch": 0.03413930365554764, "grad_norm": 1.9359602569364132, "learning_rate": 9.999554931782716e-07, "loss": 0.3364, "step": 1964 }, { "epoch": 0.03415668619305046, "grad_norm": 1.52169023703, "learning_rate": 9.999551168065359e-07, "loss": 0.7166, "step": 1965 }, { "epoch": 0.034174068730553286, "grad_norm": 1.7030686937670563, "learning_rate": 9.99954738850172e-07, "loss": 0.5435, "step": 1966 }, { "epoch": 0.03419145126805611, "grad_norm": 3.4671604652166743, "learning_rate": 9.999543593091813e-07, "loss": 0.3143, "step": 1967 }, { "epoch": 0.03420883380555893, "grad_norm": 1.8138531135584086, "learning_rate": 9.999539781835648e-07, "loss": 0.545, "step": 1968 }, { "epoch": 0.03422621634306176, "grad_norm": 2.7523075868707236, "learning_rate": 9.99953595473324e-07, "loss": 0.3941, "step": 1969 }, { "epoch": 0.03424359888056459, "grad_norm": 2.505115003886726, "learning_rate": 9.9995321117846e-07, "loss": 0.6713, "step": 1970 }, { "epoch": 0.03426098141806741, "grad_norm": 3.117554508408816, "learning_rate": 9.999528252989739e-07, "loss": 0.4635, "step": 1971 }, { "epoch": 0.03427836395557023, "grad_norm": 1.8337734751804744, "learning_rate": 9.999524378348671e-07, "loss": 0.4785, "step": 1972 }, { "epoch": 0.03429574649307306, "grad_norm": 1.5200237781024704, "learning_rate": 9.999520487861407e-07, "loss": 0.5007, "step": 1973 }, { "epoch": 0.03431312903057589, "grad_norm": 2.522148853918073, "learning_rate": 9.999516581527958e-07, "loss": 0.3156, "step": 1974 }, { "epoch": 0.03433051156807871, "grad_norm": 2.289809593925544, "learning_rate": 9.99951265934834e-07, "loss": 0.435, "step": 1975 }, { "epoch": 0.03434789410558153, "grad_norm": 1.794386463430031, "learning_rate": 9.999508721322562e-07, "loss": 0.7285, "step": 1976 }, { "epoch": 0.03436527664308436, "grad_norm": 2.234064471178303, "learning_rate": 9.99950476745064e-07, "loss": 0.5407, "step": 1977 }, { "epoch": 0.03438265918058718, "grad_norm": 1.2825493858125523, "learning_rate": 9.999500797732583e-07, "loss": 0.3285, "step": 1978 }, { "epoch": 0.03440004171809, "grad_norm": 1.4071126248449883, "learning_rate": 9.999496812168405e-07, "loss": 0.3858, "step": 1979 }, { "epoch": 0.034417424255592834, "grad_norm": 2.130393619084529, "learning_rate": 9.999492810758119e-07, "loss": 0.4677, "step": 1980 }, { "epoch": 0.03443480679309566, "grad_norm": 1.7256540381908099, "learning_rate": 9.999488793501737e-07, "loss": 0.4657, "step": 1981 }, { "epoch": 0.03445218933059848, "grad_norm": 1.5887805335555136, "learning_rate": 9.999484760399273e-07, "loss": 0.5904, "step": 1982 }, { "epoch": 0.034469571868101304, "grad_norm": 1.412749313516459, "learning_rate": 9.999480711450738e-07, "loss": 0.202, "step": 1983 }, { "epoch": 0.03448695440560413, "grad_norm": 1.638668097935529, "learning_rate": 9.999476646656146e-07, "loss": 0.3965, "step": 1984 }, { "epoch": 0.03450433694310696, "grad_norm": 2.289182019263057, "learning_rate": 9.999472566015508e-07, "loss": 0.3249, "step": 1985 }, { "epoch": 0.03452171948060978, "grad_norm": 2.234603961463162, "learning_rate": 9.999468469528842e-07, "loss": 0.538, "step": 1986 }, { "epoch": 0.034539102018112604, "grad_norm": 1.9631695701479757, "learning_rate": 9.999464357196154e-07, "loss": 0.4146, "step": 1987 }, { "epoch": 0.03455648455561543, "grad_norm": 3.561143272498685, "learning_rate": 9.999460229017462e-07, "loss": 0.6308, "step": 1988 }, { "epoch": 0.03457386709311825, "grad_norm": 2.497424104738902, "learning_rate": 9.999456084992779e-07, "loss": 0.704, "step": 1989 }, { "epoch": 0.03459124963062108, "grad_norm": 3.192855310990487, "learning_rate": 9.999451925122116e-07, "loss": 0.6816, "step": 1990 }, { "epoch": 0.034608632168123904, "grad_norm": 4.602415500489538, "learning_rate": 9.999447749405485e-07, "loss": 0.4252, "step": 1991 }, { "epoch": 0.03462601470562673, "grad_norm": 1.4528876665093846, "learning_rate": 9.999443557842903e-07, "loss": 0.2592, "step": 1992 }, { "epoch": 0.03464339724312955, "grad_norm": 1.8313055409450119, "learning_rate": 9.999439350434382e-07, "loss": 0.5278, "step": 1993 }, { "epoch": 0.034660779780632374, "grad_norm": 2.3308141027185982, "learning_rate": 9.999435127179934e-07, "loss": 0.4048, "step": 1994 }, { "epoch": 0.034678162318135204, "grad_norm": 1.8073218498070414, "learning_rate": 9.999430888079574e-07, "loss": 0.4322, "step": 1995 }, { "epoch": 0.03469554485563803, "grad_norm": 1.6642455563865088, "learning_rate": 9.999426633133313e-07, "loss": 0.5451, "step": 1996 }, { "epoch": 0.03471292739314085, "grad_norm": 2.2015211099546788, "learning_rate": 9.999422362341166e-07, "loss": 0.3768, "step": 1997 }, { "epoch": 0.034730309930643674, "grad_norm": 2.5049129482225974, "learning_rate": 9.999418075703148e-07, "loss": 0.7305, "step": 1998 }, { "epoch": 0.0347476924681465, "grad_norm": 2.3816165006589403, "learning_rate": 9.999413773219272e-07, "loss": 0.3722, "step": 1999 }, { "epoch": 0.03476507500564933, "grad_norm": 2.4283437520745847, "learning_rate": 9.99940945488955e-07, "loss": 0.4907, "step": 2000 }, { "epoch": 0.03478245754315215, "grad_norm": 3.9299456794576884, "learning_rate": 9.999405120713995e-07, "loss": 0.3901, "step": 2001 }, { "epoch": 0.034799840080654974, "grad_norm": 1.990894089869727, "learning_rate": 9.999400770692625e-07, "loss": 0.3946, "step": 2002 }, { "epoch": 0.0348172226181578, "grad_norm": 2.3938968862933767, "learning_rate": 9.999396404825449e-07, "loss": 0.4171, "step": 2003 }, { "epoch": 0.03483460515566062, "grad_norm": 3.6931642181819395, "learning_rate": 9.999392023112482e-07, "loss": 0.5347, "step": 2004 }, { "epoch": 0.03485198769316345, "grad_norm": 1.3717500673191483, "learning_rate": 9.99938762555374e-07, "loss": 0.3592, "step": 2005 }, { "epoch": 0.034869370230666275, "grad_norm": 1.3199267255347993, "learning_rate": 9.999383212149237e-07, "loss": 0.3563, "step": 2006 }, { "epoch": 0.0348867527681691, "grad_norm": 1.6000140441234032, "learning_rate": 9.999378782898986e-07, "loss": 0.5979, "step": 2007 }, { "epoch": 0.03490413530567192, "grad_norm": 4.105977028338401, "learning_rate": 9.999374337803e-07, "loss": 0.3489, "step": 2008 }, { "epoch": 0.034921517843174744, "grad_norm": 1.9379964754966692, "learning_rate": 9.999369876861295e-07, "loss": 0.3331, "step": 2009 }, { "epoch": 0.034938900380677575, "grad_norm": 2.896351869017379, "learning_rate": 9.999365400073882e-07, "loss": 0.5144, "step": 2010 }, { "epoch": 0.0349562829181804, "grad_norm": 2.658766614826306, "learning_rate": 9.999360907440778e-07, "loss": 0.5788, "step": 2011 }, { "epoch": 0.03497366545568322, "grad_norm": 3.0077734323071814, "learning_rate": 9.999356398961996e-07, "loss": 0.27, "step": 2012 }, { "epoch": 0.034991047993186045, "grad_norm": 1.4924351740729145, "learning_rate": 9.99935187463755e-07, "loss": 0.3565, "step": 2013 }, { "epoch": 0.03500843053068887, "grad_norm": 2.204218896577555, "learning_rate": 9.999347334467456e-07, "loss": 0.6284, "step": 2014 }, { "epoch": 0.03502581306819169, "grad_norm": 1.834237516944971, "learning_rate": 9.999342778451727e-07, "loss": 0.4433, "step": 2015 }, { "epoch": 0.03504319560569452, "grad_norm": 2.224616080303543, "learning_rate": 9.99933820659038e-07, "loss": 0.5959, "step": 2016 }, { "epoch": 0.035060578143197345, "grad_norm": 1.8808826908900074, "learning_rate": 9.999333618883425e-07, "loss": 0.7198, "step": 2017 }, { "epoch": 0.03507796068070017, "grad_norm": 3.2686858064786146, "learning_rate": 9.99932901533088e-07, "loss": 0.36, "step": 2018 }, { "epoch": 0.03509534321820299, "grad_norm": 3.1060925496475127, "learning_rate": 9.999324395932758e-07, "loss": 0.3195, "step": 2019 }, { "epoch": 0.035112725755705815, "grad_norm": 4.41394283680342, "learning_rate": 9.999319760689075e-07, "loss": 0.6009, "step": 2020 }, { "epoch": 0.035130108293208645, "grad_norm": 2.564778383855675, "learning_rate": 9.999315109599845e-07, "loss": 0.4897, "step": 2021 }, { "epoch": 0.03514749083071147, "grad_norm": 1.7233226632848702, "learning_rate": 9.99931044266508e-07, "loss": 0.509, "step": 2022 }, { "epoch": 0.03516487336821429, "grad_norm": 1.8899464879017447, "learning_rate": 9.9993057598848e-07, "loss": 0.6139, "step": 2023 }, { "epoch": 0.035182255905717115, "grad_norm": 0.9108855821128045, "learning_rate": 9.999301061259018e-07, "loss": 0.2841, "step": 2024 }, { "epoch": 0.03519963844321994, "grad_norm": 2.4018161151247805, "learning_rate": 9.999296346787746e-07, "loss": 0.63, "step": 2025 }, { "epoch": 0.03521702098072277, "grad_norm": 2.3742515919562064, "learning_rate": 9.999291616471e-07, "loss": 0.4497, "step": 2026 }, { "epoch": 0.03523440351822559, "grad_norm": 1.5937472286474759, "learning_rate": 9.9992868703088e-07, "loss": 0.5978, "step": 2027 }, { "epoch": 0.035251786055728415, "grad_norm": 1.4588630903423772, "learning_rate": 9.999282108301154e-07, "loss": 0.6633, "step": 2028 }, { "epoch": 0.03526916859323124, "grad_norm": 1.46447945506527, "learning_rate": 9.999277330448081e-07, "loss": 0.3286, "step": 2029 }, { "epoch": 0.03528655113073406, "grad_norm": 2.5955575793494368, "learning_rate": 9.999272536749597e-07, "loss": 0.3204, "step": 2030 }, { "epoch": 0.03530393366823689, "grad_norm": 2.577549606457448, "learning_rate": 9.999267727205714e-07, "loss": 0.6153, "step": 2031 }, { "epoch": 0.035321316205739715, "grad_norm": 1.4542454409360586, "learning_rate": 9.999262901816448e-07, "loss": 0.3692, "step": 2032 }, { "epoch": 0.03533869874324254, "grad_norm": 1.9835711820426098, "learning_rate": 9.999258060581816e-07, "loss": 0.3001, "step": 2033 }, { "epoch": 0.03535608128074536, "grad_norm": 2.74953633298752, "learning_rate": 9.999253203501832e-07, "loss": 1.1011, "step": 2034 }, { "epoch": 0.035373463818248185, "grad_norm": 2.095023545900557, "learning_rate": 9.999248330576513e-07, "loss": 0.5292, "step": 2035 }, { "epoch": 0.035390846355751016, "grad_norm": 1.8071218084371992, "learning_rate": 9.999243441805872e-07, "loss": 0.4949, "step": 2036 }, { "epoch": 0.03540822889325384, "grad_norm": 2.5383516297292683, "learning_rate": 9.999238537189927e-07, "loss": 0.3662, "step": 2037 }, { "epoch": 0.03542561143075666, "grad_norm": 2.6987051772318074, "learning_rate": 9.99923361672869e-07, "loss": 0.3547, "step": 2038 }, { "epoch": 0.035442993968259486, "grad_norm": 2.8841587842049194, "learning_rate": 9.999228680422181e-07, "loss": 0.6648, "step": 2039 }, { "epoch": 0.03546037650576231, "grad_norm": 2.1297500481231895, "learning_rate": 9.999223728270413e-07, "loss": 0.5302, "step": 2040 }, { "epoch": 0.03547775904326514, "grad_norm": 1.805892877280919, "learning_rate": 9.999218760273402e-07, "loss": 0.526, "step": 2041 }, { "epoch": 0.03549514158076796, "grad_norm": 2.1530917051152296, "learning_rate": 9.999213776431164e-07, "loss": 0.3199, "step": 2042 }, { "epoch": 0.035512524118270786, "grad_norm": 2.417585248930496, "learning_rate": 9.999208776743714e-07, "loss": 0.355, "step": 2043 }, { "epoch": 0.03552990665577361, "grad_norm": 1.8422886160843484, "learning_rate": 9.99920376121107e-07, "loss": 0.5702, "step": 2044 }, { "epoch": 0.03554728919327643, "grad_norm": 2.1869525173963527, "learning_rate": 9.999198729833246e-07, "loss": 0.446, "step": 2045 }, { "epoch": 0.03556467173077926, "grad_norm": 1.7539100344938032, "learning_rate": 9.999193682610258e-07, "loss": 0.3432, "step": 2046 }, { "epoch": 0.035582054268282086, "grad_norm": 1.632810491707352, "learning_rate": 9.999188619542123e-07, "loss": 0.3794, "step": 2047 }, { "epoch": 0.03559943680578491, "grad_norm": 2.075476575561565, "learning_rate": 9.999183540628857e-07, "loss": 0.3533, "step": 2048 }, { "epoch": 0.03561681934328773, "grad_norm": 2.216004003684266, "learning_rate": 9.999178445870475e-07, "loss": 0.4755, "step": 2049 }, { "epoch": 0.035634201880790556, "grad_norm": 2.342229439501931, "learning_rate": 9.999173335266993e-07, "loss": 0.4491, "step": 2050 }, { "epoch": 0.03565158441829338, "grad_norm": 2.48775502669768, "learning_rate": 9.99916820881843e-07, "loss": 0.5433, "step": 2051 }, { "epoch": 0.03566896695579621, "grad_norm": 2.032919962872443, "learning_rate": 9.999163066524799e-07, "loss": 0.4764, "step": 2052 }, { "epoch": 0.03568634949329903, "grad_norm": 1.675763280696329, "learning_rate": 9.999157908386118e-07, "loss": 0.3266, "step": 2053 }, { "epoch": 0.035703732030801856, "grad_norm": 1.6017184790965864, "learning_rate": 9.999152734402403e-07, "loss": 0.1553, "step": 2054 }, { "epoch": 0.03572111456830468, "grad_norm": 2.1649269294781384, "learning_rate": 9.999147544573668e-07, "loss": 0.334, "step": 2055 }, { "epoch": 0.0357384971058075, "grad_norm": 2.0732943229727754, "learning_rate": 9.999142338899932e-07, "loss": 0.4319, "step": 2056 }, { "epoch": 0.03575587964331033, "grad_norm": 1.9111504780969317, "learning_rate": 9.999137117381215e-07, "loss": 0.842, "step": 2057 }, { "epoch": 0.035773262180813156, "grad_norm": 1.1884048958291609, "learning_rate": 9.999131880017527e-07, "loss": 0.6113, "step": 2058 }, { "epoch": 0.03579064471831598, "grad_norm": 2.471854994245119, "learning_rate": 9.999126626808886e-07, "loss": 0.4851, "step": 2059 }, { "epoch": 0.0358080272558188, "grad_norm": 2.4208990567354256, "learning_rate": 9.999121357755313e-07, "loss": 0.4591, "step": 2060 }, { "epoch": 0.035825409793321626, "grad_norm": 1.8491513914018198, "learning_rate": 9.99911607285682e-07, "loss": 0.5355, "step": 2061 }, { "epoch": 0.035842792330824456, "grad_norm": 2.0457844886866523, "learning_rate": 9.999110772113426e-07, "loss": 0.4846, "step": 2062 }, { "epoch": 0.03586017486832728, "grad_norm": 1.4877253397043615, "learning_rate": 9.999105455525147e-07, "loss": 0.3737, "step": 2063 }, { "epoch": 0.0358775574058301, "grad_norm": 2.4156175383343617, "learning_rate": 9.999100123092001e-07, "loss": 0.3901, "step": 2064 }, { "epoch": 0.035894939943332926, "grad_norm": 1.634143006232938, "learning_rate": 9.999094774814003e-07, "loss": 0.5578, "step": 2065 }, { "epoch": 0.03591232248083575, "grad_norm": 1.5208069033705895, "learning_rate": 9.999089410691171e-07, "loss": 0.3897, "step": 2066 }, { "epoch": 0.03592970501833858, "grad_norm": 1.920831610770372, "learning_rate": 9.999084030723521e-07, "loss": 0.3651, "step": 2067 }, { "epoch": 0.0359470875558414, "grad_norm": 1.9858373774917495, "learning_rate": 9.999078634911074e-07, "loss": 0.4765, "step": 2068 }, { "epoch": 0.03596447009334423, "grad_norm": 2.1912592509136277, "learning_rate": 9.999073223253841e-07, "loss": 0.6575, "step": 2069 }, { "epoch": 0.03598185263084705, "grad_norm": 1.2736256450199854, "learning_rate": 9.999067795751844e-07, "loss": 0.3658, "step": 2070 }, { "epoch": 0.03599923516834987, "grad_norm": 1.1381966966573953, "learning_rate": 9.999062352405097e-07, "loss": 0.267, "step": 2071 }, { "epoch": 0.0360166177058527, "grad_norm": 3.2357258725691453, "learning_rate": 9.99905689321362e-07, "loss": 0.5283, "step": 2072 }, { "epoch": 0.03603400024335553, "grad_norm": 2.1613207388318556, "learning_rate": 9.999051418177428e-07, "loss": 0.2687, "step": 2073 }, { "epoch": 0.03605138278085835, "grad_norm": 2.0868161192448693, "learning_rate": 9.999045927296542e-07, "loss": 0.6244, "step": 2074 }, { "epoch": 0.03606876531836117, "grad_norm": 1.7414770455767976, "learning_rate": 9.999040420570974e-07, "loss": 0.2029, "step": 2075 }, { "epoch": 0.036086147855864, "grad_norm": 2.446965253650495, "learning_rate": 9.999034898000745e-07, "loss": 0.4806, "step": 2076 }, { "epoch": 0.03610353039336683, "grad_norm": 3.5808691029373096, "learning_rate": 9.99902935958587e-07, "loss": 0.9466, "step": 2077 }, { "epoch": 0.03612091293086965, "grad_norm": 2.2795515424901898, "learning_rate": 9.999023805326373e-07, "loss": 0.4853, "step": 2078 }, { "epoch": 0.036138295468372474, "grad_norm": 1.3638149047618229, "learning_rate": 9.999018235222262e-07, "loss": 0.2753, "step": 2079 }, { "epoch": 0.0361556780058753, "grad_norm": 4.753154987394961, "learning_rate": 9.999012649273563e-07, "loss": 0.6297, "step": 2080 }, { "epoch": 0.03617306054337812, "grad_norm": 1.6339764066796552, "learning_rate": 9.999007047480287e-07, "loss": 0.6526, "step": 2081 }, { "epoch": 0.03619044308088095, "grad_norm": 1.5244392718076685, "learning_rate": 9.999001429842458e-07, "loss": 0.3501, "step": 2082 }, { "epoch": 0.036207825618383774, "grad_norm": 3.226008934614128, "learning_rate": 9.99899579636009e-07, "loss": 0.5412, "step": 2083 }, { "epoch": 0.0362252081558866, "grad_norm": 2.2651586595679567, "learning_rate": 9.998990147033202e-07, "loss": 0.3695, "step": 2084 }, { "epoch": 0.03624259069338942, "grad_norm": 2.7859045968053917, "learning_rate": 9.99898448186181e-07, "loss": 0.6598, "step": 2085 }, { "epoch": 0.036259973230892244, "grad_norm": 1.6162308042835634, "learning_rate": 9.998978800845935e-07, "loss": 0.5181, "step": 2086 }, { "epoch": 0.03627735576839507, "grad_norm": 2.526392373793322, "learning_rate": 9.998973103985592e-07, "loss": 1.1407, "step": 2087 }, { "epoch": 0.0362947383058979, "grad_norm": 1.4949069036695841, "learning_rate": 9.998967391280804e-07, "loss": 0.2855, "step": 2088 }, { "epoch": 0.03631212084340072, "grad_norm": 2.4925341773647887, "learning_rate": 9.998961662731582e-07, "loss": 0.7685, "step": 2089 }, { "epoch": 0.036329503380903544, "grad_norm": 2.00542996641186, "learning_rate": 9.998955918337948e-07, "loss": 0.4028, "step": 2090 }, { "epoch": 0.03634688591840637, "grad_norm": 2.8144338838537326, "learning_rate": 9.998950158099922e-07, "loss": 0.3235, "step": 2091 }, { "epoch": 0.03636426845590919, "grad_norm": 2.4156452733910654, "learning_rate": 9.99894438201752e-07, "loss": 0.5944, "step": 2092 }, { "epoch": 0.03638165099341202, "grad_norm": 2.3371499545882193, "learning_rate": 9.99893859009076e-07, "loss": 0.6378, "step": 2093 }, { "epoch": 0.036399033530914844, "grad_norm": 1.601878839075078, "learning_rate": 9.998932782319662e-07, "loss": 0.3343, "step": 2094 }, { "epoch": 0.03641641606841767, "grad_norm": 1.7722568785158035, "learning_rate": 9.998926958704242e-07, "loss": 0.441, "step": 2095 }, { "epoch": 0.03643379860592049, "grad_norm": 1.4560018711748457, "learning_rate": 9.998921119244522e-07, "loss": 0.446, "step": 2096 }, { "epoch": 0.036451181143423314, "grad_norm": 1.8350114960819328, "learning_rate": 9.998915263940515e-07, "loss": 0.3683, "step": 2097 }, { "epoch": 0.036468563680926144, "grad_norm": 1.5532077174830443, "learning_rate": 9.998909392792244e-07, "loss": 0.6082, "step": 2098 }, { "epoch": 0.03648594621842897, "grad_norm": 2.5166829538826594, "learning_rate": 9.998903505799729e-07, "loss": 0.6048, "step": 2099 }, { "epoch": 0.03650332875593179, "grad_norm": 5.814941806491963, "learning_rate": 9.998897602962985e-07, "loss": 0.8498, "step": 2100 }, { "epoch": 0.036520711293434614, "grad_norm": 2.2499886826053115, "learning_rate": 9.998891684282031e-07, "loss": 0.4958, "step": 2101 }, { "epoch": 0.03653809383093744, "grad_norm": 2.7829702711502904, "learning_rate": 9.998885749756888e-07, "loss": 0.8873, "step": 2102 }, { "epoch": 0.03655547636844027, "grad_norm": 1.7290729079992522, "learning_rate": 9.998879799387572e-07, "loss": 0.3263, "step": 2103 }, { "epoch": 0.03657285890594309, "grad_norm": 2.387484349351848, "learning_rate": 9.998873833174104e-07, "loss": 0.4245, "step": 2104 }, { "epoch": 0.036590241443445914, "grad_norm": 2.031509752643654, "learning_rate": 9.998867851116501e-07, "loss": 0.2329, "step": 2105 }, { "epoch": 0.03660762398094874, "grad_norm": 1.6668583588094275, "learning_rate": 9.998861853214786e-07, "loss": 0.3918, "step": 2106 }, { "epoch": 0.03662500651845156, "grad_norm": 1.6023217206370577, "learning_rate": 9.998855839468973e-07, "loss": 0.4903, "step": 2107 }, { "epoch": 0.03664238905595439, "grad_norm": 4.596972922849185, "learning_rate": 9.998849809879084e-07, "loss": 0.3476, "step": 2108 }, { "epoch": 0.036659771593457215, "grad_norm": 1.7712762174511296, "learning_rate": 9.998843764445138e-07, "loss": 0.3539, "step": 2109 }, { "epoch": 0.03667715413096004, "grad_norm": 2.0334481545394127, "learning_rate": 9.99883770316715e-07, "loss": 0.3692, "step": 2110 }, { "epoch": 0.03669453666846286, "grad_norm": 2.860067381198695, "learning_rate": 9.998831626045148e-07, "loss": 0.4937, "step": 2111 }, { "epoch": 0.036711919205965685, "grad_norm": 2.535516359857844, "learning_rate": 9.998825533079143e-07, "loss": 0.3805, "step": 2112 }, { "epoch": 0.036729301743468515, "grad_norm": 3.3269465502010345, "learning_rate": 9.99881942426916e-07, "loss": 0.5379, "step": 2113 }, { "epoch": 0.03674668428097134, "grad_norm": 1.8042317799998446, "learning_rate": 9.998813299615212e-07, "loss": 0.4519, "step": 2114 }, { "epoch": 0.03676406681847416, "grad_norm": 1.549170387954003, "learning_rate": 9.998807159117323e-07, "loss": 0.5331, "step": 2115 }, { "epoch": 0.036781449355976985, "grad_norm": 1.477234187358185, "learning_rate": 9.998801002775513e-07, "loss": 0.4085, "step": 2116 }, { "epoch": 0.03679883189347981, "grad_norm": 2.2393883298831625, "learning_rate": 9.9987948305898e-07, "loss": 0.4424, "step": 2117 }, { "epoch": 0.03681621443098263, "grad_norm": 1.904388892083366, "learning_rate": 9.9987886425602e-07, "loss": 0.4009, "step": 2118 }, { "epoch": 0.03683359696848546, "grad_norm": 2.254378256027864, "learning_rate": 9.998782438686741e-07, "loss": 0.4076, "step": 2119 }, { "epoch": 0.036850979505988285, "grad_norm": 2.8910002122558156, "learning_rate": 9.998776218969435e-07, "loss": 0.4293, "step": 2120 }, { "epoch": 0.03686836204349111, "grad_norm": 1.407893885454607, "learning_rate": 9.998769983408305e-07, "loss": 0.3114, "step": 2121 }, { "epoch": 0.03688574458099393, "grad_norm": 2.3290057685705965, "learning_rate": 9.998763732003372e-07, "loss": 0.5979, "step": 2122 }, { "epoch": 0.036903127118496755, "grad_norm": 1.3662506084702069, "learning_rate": 9.998757464754655e-07, "loss": 0.5066, "step": 2123 }, { "epoch": 0.036920509655999585, "grad_norm": 2.8112373106035085, "learning_rate": 9.99875118166217e-07, "loss": 0.7644, "step": 2124 }, { "epoch": 0.03693789219350241, "grad_norm": 1.3617560469934467, "learning_rate": 9.998744882725942e-07, "loss": 0.3956, "step": 2125 }, { "epoch": 0.03695527473100523, "grad_norm": 2.445414378963781, "learning_rate": 9.998738567945988e-07, "loss": 0.414, "step": 2126 }, { "epoch": 0.036972657268508055, "grad_norm": 1.600879187037956, "learning_rate": 9.99873223732233e-07, "loss": 0.3156, "step": 2127 }, { "epoch": 0.03699003980601088, "grad_norm": 1.5990827506041658, "learning_rate": 9.998725890854986e-07, "loss": 0.4952, "step": 2128 }, { "epoch": 0.03700742234351371, "grad_norm": 2.944164488023279, "learning_rate": 9.998719528543977e-07, "loss": 0.513, "step": 2129 }, { "epoch": 0.03702480488101653, "grad_norm": 2.8708715803792537, "learning_rate": 9.998713150389326e-07, "loss": 0.8532, "step": 2130 }, { "epoch": 0.037042187418519355, "grad_norm": 2.5503947228430297, "learning_rate": 9.99870675639105e-07, "loss": 0.3808, "step": 2131 }, { "epoch": 0.03705956995602218, "grad_norm": 2.384134039472407, "learning_rate": 9.998700346549166e-07, "loss": 0.5281, "step": 2132 }, { "epoch": 0.037076952493525, "grad_norm": 2.6127828454484785, "learning_rate": 9.998693920863701e-07, "loss": 0.559, "step": 2133 }, { "epoch": 0.03709433503102783, "grad_norm": 1.5448763013243016, "learning_rate": 9.998687479334671e-07, "loss": 0.186, "step": 2134 }, { "epoch": 0.037111717568530656, "grad_norm": 1.923023171340096, "learning_rate": 9.9986810219621e-07, "loss": 0.6045, "step": 2135 }, { "epoch": 0.03712910010603348, "grad_norm": 1.685014902500045, "learning_rate": 9.998674548746005e-07, "loss": 0.473, "step": 2136 }, { "epoch": 0.0371464826435363, "grad_norm": 1.7406399312142673, "learning_rate": 9.998668059686407e-07, "loss": 0.2564, "step": 2137 }, { "epoch": 0.037163865181039125, "grad_norm": 1.6921210494512247, "learning_rate": 9.99866155478333e-07, "loss": 0.2676, "step": 2138 }, { "epoch": 0.037181247718541956, "grad_norm": 1.7131873823564867, "learning_rate": 9.99865503403679e-07, "loss": 0.7504, "step": 2139 }, { "epoch": 0.03719863025604478, "grad_norm": 1.8882913003826536, "learning_rate": 9.99864849744681e-07, "loss": 0.348, "step": 2140 }, { "epoch": 0.0372160127935476, "grad_norm": 2.2975856094339937, "learning_rate": 9.99864194501341e-07, "loss": 0.7948, "step": 2141 }, { "epoch": 0.037233395331050426, "grad_norm": 1.8199471222722534, "learning_rate": 9.998635376736613e-07, "loss": 0.5647, "step": 2142 }, { "epoch": 0.03725077786855325, "grad_norm": 1.7879026705246837, "learning_rate": 9.998628792616437e-07, "loss": 0.5171, "step": 2143 }, { "epoch": 0.03726816040605608, "grad_norm": 2.480351869927413, "learning_rate": 9.998622192652903e-07, "loss": 0.4849, "step": 2144 }, { "epoch": 0.0372855429435589, "grad_norm": 2.152873487642936, "learning_rate": 9.998615576846035e-07, "loss": 0.4451, "step": 2145 }, { "epoch": 0.037302925481061726, "grad_norm": 3.102272708862859, "learning_rate": 9.99860894519585e-07, "loss": 0.4165, "step": 2146 }, { "epoch": 0.03732030801856455, "grad_norm": 2.2118307568192943, "learning_rate": 9.998602297702371e-07, "loss": 0.4144, "step": 2147 }, { "epoch": 0.03733769055606737, "grad_norm": 4.034412920817905, "learning_rate": 9.99859563436562e-07, "loss": 0.5861, "step": 2148 }, { "epoch": 0.0373550730935702, "grad_norm": 2.0860925364429335, "learning_rate": 9.998588955185616e-07, "loss": 0.6233, "step": 2149 }, { "epoch": 0.037372455631073026, "grad_norm": 1.765898953218311, "learning_rate": 9.99858226016238e-07, "loss": 0.2775, "step": 2150 }, { "epoch": 0.03738983816857585, "grad_norm": 1.918982556775529, "learning_rate": 9.998575549295936e-07, "loss": 0.2734, "step": 2151 }, { "epoch": 0.03740722070607867, "grad_norm": 2.539357592302166, "learning_rate": 9.998568822586303e-07, "loss": 0.4621, "step": 2152 }, { "epoch": 0.037424603243581496, "grad_norm": 2.1433938811278037, "learning_rate": 9.998562080033506e-07, "loss": 0.2832, "step": 2153 }, { "epoch": 0.03744198578108432, "grad_norm": 1.8725564756856703, "learning_rate": 9.99855532163756e-07, "loss": 0.4768, "step": 2154 }, { "epoch": 0.03745936831858715, "grad_norm": 2.183555018403863, "learning_rate": 9.99854854739849e-07, "loss": 0.3225, "step": 2155 }, { "epoch": 0.03747675085608997, "grad_norm": 2.5019365375643297, "learning_rate": 9.998541757316317e-07, "loss": 0.3863, "step": 2156 }, { "epoch": 0.037494133393592796, "grad_norm": 1.5713505092202362, "learning_rate": 9.998534951391064e-07, "loss": 0.752, "step": 2157 }, { "epoch": 0.03751151593109562, "grad_norm": 2.0319181798292005, "learning_rate": 9.998528129622753e-07, "loss": 0.3849, "step": 2158 }, { "epoch": 0.03752889846859844, "grad_norm": 1.9707446659272532, "learning_rate": 9.9985212920114e-07, "loss": 0.4779, "step": 2159 }, { "epoch": 0.03754628100610127, "grad_norm": 1.4672884046523478, "learning_rate": 9.998514438557033e-07, "loss": 0.391, "step": 2160 }, { "epoch": 0.037563663543604096, "grad_norm": 1.906353668803249, "learning_rate": 9.998507569259671e-07, "loss": 0.3445, "step": 2161 }, { "epoch": 0.03758104608110692, "grad_norm": 2.1197962257350635, "learning_rate": 9.998500684119336e-07, "loss": 0.5218, "step": 2162 }, { "epoch": 0.03759842861860974, "grad_norm": 1.8843921572715896, "learning_rate": 9.99849378313605e-07, "loss": 0.4815, "step": 2163 }, { "epoch": 0.037615811156112566, "grad_norm": 1.0716581400608642, "learning_rate": 9.998486866309834e-07, "loss": 0.3718, "step": 2164 }, { "epoch": 0.0376331936936154, "grad_norm": 2.51185193771756, "learning_rate": 9.998479933640714e-07, "loss": 0.3636, "step": 2165 }, { "epoch": 0.03765057623111822, "grad_norm": 1.9313976205918857, "learning_rate": 9.998472985128706e-07, "loss": 0.5247, "step": 2166 }, { "epoch": 0.03766795876862104, "grad_norm": 3.3122715903431628, "learning_rate": 9.998466020773835e-07, "loss": 0.5744, "step": 2167 }, { "epoch": 0.037685341306123867, "grad_norm": 2.4621148448843835, "learning_rate": 9.998459040576122e-07, "loss": 0.3807, "step": 2168 }, { "epoch": 0.03770272384362669, "grad_norm": 1.7355607941053102, "learning_rate": 9.99845204453559e-07, "loss": 0.2375, "step": 2169 }, { "epoch": 0.03772010638112952, "grad_norm": 1.4785350417372756, "learning_rate": 9.998445032652263e-07, "loss": 0.6287, "step": 2170 }, { "epoch": 0.03773748891863234, "grad_norm": 2.0372343148566023, "learning_rate": 9.99843800492616e-07, "loss": 0.3167, "step": 2171 }, { "epoch": 0.03775487145613517, "grad_norm": 2.190895384379765, "learning_rate": 9.998430961357305e-07, "loss": 0.383, "step": 2172 }, { "epoch": 0.03777225399363799, "grad_norm": 1.4718596849421302, "learning_rate": 9.998423901945721e-07, "loss": 0.4827, "step": 2173 }, { "epoch": 0.03778963653114081, "grad_norm": 1.6726304578821176, "learning_rate": 9.998416826691426e-07, "loss": 0.5139, "step": 2174 }, { "epoch": 0.037807019068643644, "grad_norm": 3.7330739419626764, "learning_rate": 9.99840973559445e-07, "loss": 0.7261, "step": 2175 }, { "epoch": 0.03782440160614647, "grad_norm": 1.3142800328084316, "learning_rate": 9.998402628654808e-07, "loss": 0.398, "step": 2176 }, { "epoch": 0.03784178414364929, "grad_norm": 1.4917650845552637, "learning_rate": 9.998395505872529e-07, "loss": 0.3984, "step": 2177 }, { "epoch": 0.037859166681152114, "grad_norm": 2.5208380773394423, "learning_rate": 9.998388367247627e-07, "loss": 0.6108, "step": 2178 }, { "epoch": 0.03787654921865494, "grad_norm": 1.4580850689591012, "learning_rate": 9.998381212780134e-07, "loss": 0.3886, "step": 2179 }, { "epoch": 0.03789393175615777, "grad_norm": 2.4802544597289597, "learning_rate": 9.998374042470068e-07, "loss": 0.3493, "step": 2180 }, { "epoch": 0.03791131429366059, "grad_norm": 2.8426862419848957, "learning_rate": 9.998366856317453e-07, "loss": 0.5688, "step": 2181 }, { "epoch": 0.037928696831163414, "grad_norm": 1.7772447050629552, "learning_rate": 9.99835965432231e-07, "loss": 0.4571, "step": 2182 }, { "epoch": 0.03794607936866624, "grad_norm": 3.1772805524267533, "learning_rate": 9.998352436484662e-07, "loss": 0.7485, "step": 2183 }, { "epoch": 0.03796346190616906, "grad_norm": 2.2508361893649975, "learning_rate": 9.998345202804535e-07, "loss": 0.4139, "step": 2184 }, { "epoch": 0.03798084444367189, "grad_norm": 1.8739458839051049, "learning_rate": 9.998337953281948e-07, "loss": 0.2792, "step": 2185 }, { "epoch": 0.037998226981174714, "grad_norm": 1.9432563294511778, "learning_rate": 9.998330687916927e-07, "loss": 0.3463, "step": 2186 }, { "epoch": 0.03801560951867754, "grad_norm": 2.0244844106960302, "learning_rate": 9.998323406709493e-07, "loss": 0.3246, "step": 2187 }, { "epoch": 0.03803299205618036, "grad_norm": 2.46517595867979, "learning_rate": 9.99831610965967e-07, "loss": 0.5071, "step": 2188 }, { "epoch": 0.038050374593683184, "grad_norm": 1.8935387743710335, "learning_rate": 9.998308796767482e-07, "loss": 0.3611, "step": 2189 }, { "epoch": 0.03806775713118601, "grad_norm": 2.1683861103491138, "learning_rate": 9.99830146803295e-07, "loss": 0.5851, "step": 2190 }, { "epoch": 0.03808513966868884, "grad_norm": 2.0255123517044087, "learning_rate": 9.998294123456099e-07, "loss": 0.338, "step": 2191 }, { "epoch": 0.03810252220619166, "grad_norm": 2.4332686919146456, "learning_rate": 9.998286763036952e-07, "loss": 0.2607, "step": 2192 }, { "epoch": 0.038119904743694484, "grad_norm": 1.7215747277302207, "learning_rate": 9.99827938677553e-07, "loss": 0.2889, "step": 2193 }, { "epoch": 0.03813728728119731, "grad_norm": 1.8580976213063294, "learning_rate": 9.998271994671861e-07, "loss": 0.475, "step": 2194 }, { "epoch": 0.03815466981870013, "grad_norm": 2.175595035944307, "learning_rate": 9.998264586725965e-07, "loss": 0.5487, "step": 2195 }, { "epoch": 0.03817205235620296, "grad_norm": 2.645891295277431, "learning_rate": 9.998257162937864e-07, "loss": 0.2941, "step": 2196 }, { "epoch": 0.038189434893705784, "grad_norm": 2.284154391615185, "learning_rate": 9.998249723307586e-07, "loss": 0.5524, "step": 2197 }, { "epoch": 0.03820681743120861, "grad_norm": 2.1539391056486794, "learning_rate": 9.998242267835152e-07, "loss": 0.7077, "step": 2198 }, { "epoch": 0.03822419996871143, "grad_norm": 2.5855843000031538, "learning_rate": 9.998234796520585e-07, "loss": 0.4796, "step": 2199 }, { "epoch": 0.038241582506214254, "grad_norm": 2.499619836479497, "learning_rate": 9.998227309363911e-07, "loss": 0.7314, "step": 2200 }, { "epoch": 0.038258965043717084, "grad_norm": 2.5387915625955104, "learning_rate": 9.998219806365152e-07, "loss": 0.2808, "step": 2201 }, { "epoch": 0.03827634758121991, "grad_norm": 1.9307104423944383, "learning_rate": 9.998212287524332e-07, "loss": 0.3408, "step": 2202 }, { "epoch": 0.03829373011872273, "grad_norm": 2.842785624852167, "learning_rate": 9.998204752841476e-07, "loss": 0.7695, "step": 2203 }, { "epoch": 0.038311112656225554, "grad_norm": 2.0877533164749824, "learning_rate": 9.998197202316607e-07, "loss": 0.5537, "step": 2204 }, { "epoch": 0.03832849519372838, "grad_norm": 1.5080622532533872, "learning_rate": 9.998189635949748e-07, "loss": 0.443, "step": 2205 }, { "epoch": 0.03834587773123121, "grad_norm": 2.188402608373308, "learning_rate": 9.998182053740923e-07, "loss": 0.3489, "step": 2206 }, { "epoch": 0.03836326026873403, "grad_norm": 1.725979909314172, "learning_rate": 9.998174455690158e-07, "loss": 0.322, "step": 2207 }, { "epoch": 0.038380642806236855, "grad_norm": 1.3791719351952216, "learning_rate": 9.998166841797475e-07, "loss": 0.3778, "step": 2208 }, { "epoch": 0.03839802534373968, "grad_norm": 2.0748601124732264, "learning_rate": 9.9981592120629e-07, "loss": 0.506, "step": 2209 }, { "epoch": 0.0384154078812425, "grad_norm": 2.8788298125321763, "learning_rate": 9.998151566486456e-07, "loss": 0.4394, "step": 2210 }, { "epoch": 0.03843279041874533, "grad_norm": 2.117524834122325, "learning_rate": 9.99814390506817e-07, "loss": 0.2675, "step": 2211 }, { "epoch": 0.038450172956248155, "grad_norm": 3.3436565244092025, "learning_rate": 9.99813622780806e-07, "loss": 0.6632, "step": 2212 }, { "epoch": 0.03846755549375098, "grad_norm": 2.236771094304294, "learning_rate": 9.998128534706154e-07, "loss": 0.4279, "step": 2213 }, { "epoch": 0.0384849380312538, "grad_norm": 2.586900799635573, "learning_rate": 9.998120825762479e-07, "loss": 0.4367, "step": 2214 }, { "epoch": 0.038502320568756625, "grad_norm": 4.4948751055900145, "learning_rate": 9.998113100977056e-07, "loss": 0.3041, "step": 2215 }, { "epoch": 0.038519703106259455, "grad_norm": 2.195807111668408, "learning_rate": 9.998105360349912e-07, "loss": 0.5169, "step": 2216 }, { "epoch": 0.03853708564376228, "grad_norm": 2.296898723082537, "learning_rate": 9.998097603881067e-07, "loss": 0.5991, "step": 2217 }, { "epoch": 0.0385544681812651, "grad_norm": 1.8943727286349512, "learning_rate": 9.99808983157055e-07, "loss": 0.3385, "step": 2218 }, { "epoch": 0.038571850718767925, "grad_norm": 1.838103297264292, "learning_rate": 9.998082043418384e-07, "loss": 0.3624, "step": 2219 }, { "epoch": 0.03858923325627075, "grad_norm": 3.732900223784961, "learning_rate": 9.998074239424593e-07, "loss": 0.3597, "step": 2220 }, { "epoch": 0.03860661579377358, "grad_norm": 2.4404053840514197, "learning_rate": 9.998066419589205e-07, "loss": 0.3266, "step": 2221 }, { "epoch": 0.0386239983312764, "grad_norm": 1.7470001098928334, "learning_rate": 9.99805858391224e-07, "loss": 0.3726, "step": 2222 }, { "epoch": 0.038641380868779225, "grad_norm": 2.157636424501695, "learning_rate": 9.998050732393726e-07, "loss": 0.578, "step": 2223 }, { "epoch": 0.03865876340628205, "grad_norm": 2.421412381306217, "learning_rate": 9.998042865033686e-07, "loss": 0.3877, "step": 2224 }, { "epoch": 0.03867614594378487, "grad_norm": 2.1711141422722116, "learning_rate": 9.998034981832148e-07, "loss": 0.5648, "step": 2225 }, { "epoch": 0.038693528481287695, "grad_norm": 1.5431080059549667, "learning_rate": 9.998027082789134e-07, "loss": 0.3279, "step": 2226 }, { "epoch": 0.038710911018790525, "grad_norm": 2.2018205933188493, "learning_rate": 9.99801916790467e-07, "loss": 0.3256, "step": 2227 }, { "epoch": 0.03872829355629335, "grad_norm": 4.978045975288385, "learning_rate": 9.99801123717878e-07, "loss": 0.5574, "step": 2228 }, { "epoch": 0.03874567609379617, "grad_norm": 3.5247589629107994, "learning_rate": 9.998003290611494e-07, "loss": 0.4407, "step": 2229 }, { "epoch": 0.038763058631298995, "grad_norm": 1.7252468519059287, "learning_rate": 9.997995328202832e-07, "loss": 0.2222, "step": 2230 }, { "epoch": 0.03878044116880182, "grad_norm": 1.948328815080365, "learning_rate": 9.997987349952817e-07, "loss": 0.57, "step": 2231 }, { "epoch": 0.03879782370630465, "grad_norm": 2.1735199790326747, "learning_rate": 9.99797935586148e-07, "loss": 0.4759, "step": 2232 }, { "epoch": 0.03881520624380747, "grad_norm": 2.1049194182078743, "learning_rate": 9.997971345928847e-07, "loss": 0.4282, "step": 2233 }, { "epoch": 0.038832588781310295, "grad_norm": 3.856101993561273, "learning_rate": 9.99796332015494e-07, "loss": 0.4493, "step": 2234 }, { "epoch": 0.03884997131881312, "grad_norm": 1.6884549359098553, "learning_rate": 9.997955278539783e-07, "loss": 0.3713, "step": 2235 }, { "epoch": 0.03886735385631594, "grad_norm": 1.6058158401187665, "learning_rate": 9.997947221083404e-07, "loss": 0.4987, "step": 2236 }, { "epoch": 0.03888473639381877, "grad_norm": 1.5943479454116676, "learning_rate": 9.997939147785828e-07, "loss": 0.2283, "step": 2237 }, { "epoch": 0.038902118931321596, "grad_norm": 1.921729793865989, "learning_rate": 9.997931058647082e-07, "loss": 0.4295, "step": 2238 }, { "epoch": 0.03891950146882442, "grad_norm": 1.655273483982015, "learning_rate": 9.997922953667192e-07, "loss": 0.6946, "step": 2239 }, { "epoch": 0.03893688400632724, "grad_norm": 1.6129012681550812, "learning_rate": 9.997914832846177e-07, "loss": 0.4592, "step": 2240 }, { "epoch": 0.038954266543830066, "grad_norm": 2.104591889147528, "learning_rate": 9.997906696184072e-07, "loss": 0.321, "step": 2241 }, { "epoch": 0.038971649081332896, "grad_norm": 1.9143322703183923, "learning_rate": 9.997898543680898e-07, "loss": 0.5439, "step": 2242 }, { "epoch": 0.03898903161883572, "grad_norm": 2.037001007449868, "learning_rate": 9.997890375336681e-07, "loss": 0.4894, "step": 2243 }, { "epoch": 0.03900641415633854, "grad_norm": 1.539782644643441, "learning_rate": 9.997882191151447e-07, "loss": 0.5373, "step": 2244 }, { "epoch": 0.039023796693841366, "grad_norm": 2.9634833618585668, "learning_rate": 9.997873991125223e-07, "loss": 0.4799, "step": 2245 }, { "epoch": 0.03904117923134419, "grad_norm": 2.459084591797516, "learning_rate": 9.997865775258034e-07, "loss": 0.5066, "step": 2246 }, { "epoch": 0.03905856176884702, "grad_norm": 2.310274263335959, "learning_rate": 9.997857543549906e-07, "loss": 0.3592, "step": 2247 }, { "epoch": 0.03907594430634984, "grad_norm": 3.4793870533246656, "learning_rate": 9.997849296000864e-07, "loss": 1.0079, "step": 2248 }, { "epoch": 0.039093326843852666, "grad_norm": 1.9881818858375375, "learning_rate": 9.997841032610939e-07, "loss": 0.6828, "step": 2249 }, { "epoch": 0.03911070938135549, "grad_norm": 5.1969691091984656, "learning_rate": 9.99783275338015e-07, "loss": 0.4653, "step": 2250 }, { "epoch": 0.03912809191885831, "grad_norm": 1.9656945860687964, "learning_rate": 9.99782445830853e-07, "loss": 0.4093, "step": 2251 }, { "epoch": 0.03914547445636114, "grad_norm": 2.174485839774573, "learning_rate": 9.9978161473961e-07, "loss": 0.3909, "step": 2252 }, { "epoch": 0.039162856993863966, "grad_norm": 1.3015444398864389, "learning_rate": 9.997807820642893e-07, "loss": 0.4993, "step": 2253 }, { "epoch": 0.03918023953136679, "grad_norm": 2.0400216740788575, "learning_rate": 9.997799478048926e-07, "loss": 0.4831, "step": 2254 }, { "epoch": 0.03919762206886961, "grad_norm": 2.8602058272464146, "learning_rate": 9.997791119614231e-07, "loss": 0.2919, "step": 2255 }, { "epoch": 0.039215004606372436, "grad_norm": 1.8100928564749883, "learning_rate": 9.997782745338837e-07, "loss": 0.4811, "step": 2256 }, { "epoch": 0.039232387143875266, "grad_norm": 2.118279129945005, "learning_rate": 9.997774355222766e-07, "loss": 0.5252, "step": 2257 }, { "epoch": 0.03924976968137809, "grad_norm": 2.560324511069667, "learning_rate": 9.997765949266044e-07, "loss": 0.6524, "step": 2258 }, { "epoch": 0.03926715221888091, "grad_norm": 2.1020815233310866, "learning_rate": 9.9977575274687e-07, "loss": 0.5408, "step": 2259 }, { "epoch": 0.039284534756383736, "grad_norm": 2.137869238815887, "learning_rate": 9.997749089830766e-07, "loss": 0.4057, "step": 2260 }, { "epoch": 0.03930191729388656, "grad_norm": 4.279687610905421, "learning_rate": 9.997740636352258e-07, "loss": 0.2687, "step": 2261 }, { "epoch": 0.03931929983138938, "grad_norm": 2.1956503986887514, "learning_rate": 9.997732167033209e-07, "loss": 0.9148, "step": 2262 }, { "epoch": 0.03933668236889221, "grad_norm": 2.1838894869087215, "learning_rate": 9.997723681873644e-07, "loss": 0.6008, "step": 2263 }, { "epoch": 0.039354064906395037, "grad_norm": 1.564025984051086, "learning_rate": 9.997715180873591e-07, "loss": 0.3319, "step": 2264 }, { "epoch": 0.03937144744389786, "grad_norm": 1.8883796908793549, "learning_rate": 9.997706664033078e-07, "loss": 0.3358, "step": 2265 }, { "epoch": 0.03938882998140068, "grad_norm": 1.9317615636358496, "learning_rate": 9.99769813135213e-07, "loss": 0.8165, "step": 2266 }, { "epoch": 0.039406212518903506, "grad_norm": 2.828622542953163, "learning_rate": 9.997689582830776e-07, "loss": 0.5457, "step": 2267 }, { "epoch": 0.03942359505640634, "grad_norm": 2.400097193324351, "learning_rate": 9.99768101846904e-07, "loss": 0.3945, "step": 2268 }, { "epoch": 0.03944097759390916, "grad_norm": 2.3961486725168397, "learning_rate": 9.99767243826695e-07, "loss": 0.5859, "step": 2269 }, { "epoch": 0.03945836013141198, "grad_norm": 2.1594499274442955, "learning_rate": 9.997663842224535e-07, "loss": 0.4186, "step": 2270 }, { "epoch": 0.03947574266891481, "grad_norm": 2.0344010546942264, "learning_rate": 9.997655230341821e-07, "loss": 0.5255, "step": 2271 }, { "epoch": 0.03949312520641763, "grad_norm": 3.2472221845453424, "learning_rate": 9.997646602618838e-07, "loss": 0.5096, "step": 2272 }, { "epoch": 0.03951050774392046, "grad_norm": 2.1851554339467496, "learning_rate": 9.99763795905561e-07, "loss": 0.5036, "step": 2273 }, { "epoch": 0.039527890281423284, "grad_norm": 1.7445370099352147, "learning_rate": 9.997629299652163e-07, "loss": 0.7304, "step": 2274 }, { "epoch": 0.03954527281892611, "grad_norm": 3.418147289687002, "learning_rate": 9.997620624408529e-07, "loss": 0.8481, "step": 2275 }, { "epoch": 0.03956265535642893, "grad_norm": 2.4638942237898727, "learning_rate": 9.997611933324732e-07, "loss": 0.4786, "step": 2276 }, { "epoch": 0.039580037893931753, "grad_norm": 1.7157695551510597, "learning_rate": 9.997603226400804e-07, "loss": 0.4581, "step": 2277 }, { "epoch": 0.039597420431434584, "grad_norm": 2.3573050126410737, "learning_rate": 9.997594503636766e-07, "loss": 0.5205, "step": 2278 }, { "epoch": 0.03961480296893741, "grad_norm": 1.8754231882197736, "learning_rate": 9.99758576503265e-07, "loss": 0.3068, "step": 2279 }, { "epoch": 0.03963218550644023, "grad_norm": 3.2258879755896723, "learning_rate": 9.997577010588486e-07, "loss": 0.3411, "step": 2280 }, { "epoch": 0.039649568043943054, "grad_norm": 1.2574945481730486, "learning_rate": 9.997568240304295e-07, "loss": 0.2008, "step": 2281 }, { "epoch": 0.03966695058144588, "grad_norm": 2.3765782836881777, "learning_rate": 9.997559454180109e-07, "loss": 0.4608, "step": 2282 }, { "epoch": 0.03968433311894871, "grad_norm": 1.8753126237257423, "learning_rate": 9.997550652215956e-07, "loss": 0.3133, "step": 2283 }, { "epoch": 0.03970171565645153, "grad_norm": 2.8135647024612207, "learning_rate": 9.99754183441186e-07, "loss": 0.3294, "step": 2284 }, { "epoch": 0.039719098193954354, "grad_norm": 1.6057139548544288, "learning_rate": 9.997533000767856e-07, "loss": 0.4731, "step": 2285 }, { "epoch": 0.03973648073145718, "grad_norm": 1.26769227243331, "learning_rate": 9.997524151283967e-07, "loss": 0.3444, "step": 2286 }, { "epoch": 0.03975386326896, "grad_norm": 3.3901027179097887, "learning_rate": 9.99751528596022e-07, "loss": 0.3586, "step": 2287 }, { "epoch": 0.03977124580646283, "grad_norm": 1.8660391009212194, "learning_rate": 9.997506404796648e-07, "loss": 0.2917, "step": 2288 }, { "epoch": 0.039788628343965654, "grad_norm": 1.6560531042195439, "learning_rate": 9.997497507793275e-07, "loss": 0.5646, "step": 2289 }, { "epoch": 0.03980601088146848, "grad_norm": 1.5913564512275051, "learning_rate": 9.997488594950133e-07, "loss": 0.6065, "step": 2290 }, { "epoch": 0.0398233934189713, "grad_norm": 2.2417518876924367, "learning_rate": 9.997479666267244e-07, "loss": 0.8369, "step": 2291 }, { "epoch": 0.039840775956474124, "grad_norm": 2.8079631935270046, "learning_rate": 9.997470721744642e-07, "loss": 0.5034, "step": 2292 }, { "epoch": 0.03985815849397695, "grad_norm": 1.789929778076135, "learning_rate": 9.997461761382353e-07, "loss": 0.4365, "step": 2293 }, { "epoch": 0.03987554103147978, "grad_norm": 2.194438191008019, "learning_rate": 9.997452785180408e-07, "loss": 0.2504, "step": 2294 }, { "epoch": 0.0398929235689826, "grad_norm": 1.4167504827452175, "learning_rate": 9.99744379313883e-07, "loss": 0.5975, "step": 2295 }, { "epoch": 0.039910306106485424, "grad_norm": 2.4735088314771922, "learning_rate": 9.997434785257653e-07, "loss": 0.3907, "step": 2296 }, { "epoch": 0.03992768864398825, "grad_norm": 1.3217803218554935, "learning_rate": 9.997425761536901e-07, "loss": 0.561, "step": 2297 }, { "epoch": 0.03994507118149107, "grad_norm": 2.3728042714518613, "learning_rate": 9.997416721976608e-07, "loss": 0.2577, "step": 2298 }, { "epoch": 0.0399624537189939, "grad_norm": 1.108206115319929, "learning_rate": 9.997407666576798e-07, "loss": 0.2658, "step": 2299 }, { "epoch": 0.039979836256496724, "grad_norm": 3.4247946751090574, "learning_rate": 9.997398595337502e-07, "loss": 0.3106, "step": 2300 }, { "epoch": 0.03999721879399955, "grad_norm": 1.6089346916828493, "learning_rate": 9.997389508258747e-07, "loss": 0.4835, "step": 2301 }, { "epoch": 0.04001460133150237, "grad_norm": 1.9051141361880377, "learning_rate": 9.997380405340562e-07, "loss": 1.0949, "step": 2302 }, { "epoch": 0.040031983869005194, "grad_norm": 2.122621906300232, "learning_rate": 9.99737128658298e-07, "loss": 0.5026, "step": 2303 }, { "epoch": 0.040049366406508025, "grad_norm": 1.9886872801455315, "learning_rate": 9.997362151986025e-07, "loss": 0.6581, "step": 2304 }, { "epoch": 0.04006674894401085, "grad_norm": 2.4405655698490443, "learning_rate": 9.997353001549726e-07, "loss": 0.2503, "step": 2305 }, { "epoch": 0.04008413148151367, "grad_norm": 1.8185662113997239, "learning_rate": 9.997343835274117e-07, "loss": 0.3808, "step": 2306 }, { "epoch": 0.040101514019016495, "grad_norm": 2.4946341339822737, "learning_rate": 9.997334653159222e-07, "loss": 0.3703, "step": 2307 }, { "epoch": 0.04011889655651932, "grad_norm": 2.106948382023665, "learning_rate": 9.99732545520507e-07, "loss": 0.2767, "step": 2308 }, { "epoch": 0.04013627909402215, "grad_norm": 3.6357062370426405, "learning_rate": 9.997316241411694e-07, "loss": 0.6168, "step": 2309 }, { "epoch": 0.04015366163152497, "grad_norm": 2.2231815142861384, "learning_rate": 9.99730701177912e-07, "loss": 0.4311, "step": 2310 }, { "epoch": 0.040171044169027795, "grad_norm": 1.3216614090020462, "learning_rate": 9.99729776630738e-07, "loss": 0.452, "step": 2311 }, { "epoch": 0.04018842670653062, "grad_norm": 1.246752206974592, "learning_rate": 9.997288504996499e-07, "loss": 0.2989, "step": 2312 }, { "epoch": 0.04020580924403344, "grad_norm": 1.3935000041434915, "learning_rate": 9.99727922784651e-07, "loss": 0.2938, "step": 2313 }, { "epoch": 0.04022319178153627, "grad_norm": 3.3484239963429094, "learning_rate": 9.99726993485744e-07, "loss": 0.522, "step": 2314 }, { "epoch": 0.040240574319039095, "grad_norm": 1.864803464471916, "learning_rate": 9.997260626029325e-07, "loss": 0.5595, "step": 2315 }, { "epoch": 0.04025795685654192, "grad_norm": 2.612532546520333, "learning_rate": 9.997251301362186e-07, "loss": 0.3692, "step": 2316 }, { "epoch": 0.04027533939404474, "grad_norm": 1.7570673038962155, "learning_rate": 9.997241960856056e-07, "loss": 0.3836, "step": 2317 }, { "epoch": 0.040292721931547565, "grad_norm": 1.3620203901148205, "learning_rate": 9.997232604510963e-07, "loss": 0.4544, "step": 2318 }, { "epoch": 0.040310104469050395, "grad_norm": 4.6707556890455315, "learning_rate": 9.99722323232694e-07, "loss": 0.6088, "step": 2319 }, { "epoch": 0.04032748700655322, "grad_norm": 2.461811767379903, "learning_rate": 9.997213844304016e-07, "loss": 0.5169, "step": 2320 }, { "epoch": 0.04034486954405604, "grad_norm": 2.647580848373185, "learning_rate": 9.997204440442217e-07, "loss": 0.4355, "step": 2321 }, { "epoch": 0.040362252081558865, "grad_norm": 3.206686929412487, "learning_rate": 9.997195020741577e-07, "loss": 0.45, "step": 2322 }, { "epoch": 0.04037963461906169, "grad_norm": 1.2882456357837322, "learning_rate": 9.997185585202124e-07, "loss": 0.2666, "step": 2323 }, { "epoch": 0.04039701715656452, "grad_norm": 2.3068749418651957, "learning_rate": 9.997176133823888e-07, "loss": 0.6033, "step": 2324 }, { "epoch": 0.04041439969406734, "grad_norm": 2.8700441987468386, "learning_rate": 9.9971666666069e-07, "loss": 0.7084, "step": 2325 }, { "epoch": 0.040431782231570165, "grad_norm": 2.8669452886515603, "learning_rate": 9.997157183551188e-07, "loss": 0.6247, "step": 2326 }, { "epoch": 0.04044916476907299, "grad_norm": 1.6944094936017235, "learning_rate": 9.997147684656783e-07, "loss": 0.3549, "step": 2327 }, { "epoch": 0.04046654730657581, "grad_norm": 2.113898669152435, "learning_rate": 9.997138169923716e-07, "loss": 0.4009, "step": 2328 }, { "epoch": 0.040483929844078635, "grad_norm": 2.339961959222368, "learning_rate": 9.997128639352018e-07, "loss": 0.433, "step": 2329 }, { "epoch": 0.040501312381581465, "grad_norm": 1.9747788001131592, "learning_rate": 9.997119092941714e-07, "loss": 0.3185, "step": 2330 }, { "epoch": 0.04051869491908429, "grad_norm": 2.3511523063649835, "learning_rate": 9.997109530692843e-07, "loss": 0.6124, "step": 2331 }, { "epoch": 0.04053607745658711, "grad_norm": 1.9502176284922326, "learning_rate": 9.997099952605425e-07, "loss": 0.243, "step": 2332 }, { "epoch": 0.040553459994089935, "grad_norm": 1.326110053947549, "learning_rate": 9.9970903586795e-07, "loss": 0.5173, "step": 2333 }, { "epoch": 0.04057084253159276, "grad_norm": 2.2533286442056872, "learning_rate": 9.997080748915092e-07, "loss": 0.5953, "step": 2334 }, { "epoch": 0.04058822506909559, "grad_norm": 2.0820269934308167, "learning_rate": 9.997071123312232e-07, "loss": 0.6765, "step": 2335 }, { "epoch": 0.04060560760659841, "grad_norm": 2.6652882396945725, "learning_rate": 9.997061481870956e-07, "loss": 0.3365, "step": 2336 }, { "epoch": 0.040622990144101236, "grad_norm": 2.177071069968397, "learning_rate": 9.997051824591288e-07, "loss": 0.3848, "step": 2337 }, { "epoch": 0.04064037268160406, "grad_norm": 2.078553789354853, "learning_rate": 9.99704215147326e-07, "loss": 0.5804, "step": 2338 }, { "epoch": 0.04065775521910688, "grad_norm": 2.021103637031476, "learning_rate": 9.997032462516906e-07, "loss": 0.2686, "step": 2339 }, { "epoch": 0.04067513775660971, "grad_norm": 1.848151473727606, "learning_rate": 9.997022757722254e-07, "loss": 0.3697, "step": 2340 }, { "epoch": 0.040692520294112536, "grad_norm": 3.334994951137767, "learning_rate": 9.997013037089334e-07, "loss": 0.4345, "step": 2341 }, { "epoch": 0.04070990283161536, "grad_norm": 2.4486073273727333, "learning_rate": 9.99700330061818e-07, "loss": 0.9085, "step": 2342 }, { "epoch": 0.04072728536911818, "grad_norm": 1.7886725218417627, "learning_rate": 9.99699354830882e-07, "loss": 0.241, "step": 2343 }, { "epoch": 0.040744667906621006, "grad_norm": 3.6323849066678005, "learning_rate": 9.996983780161288e-07, "loss": 0.6177, "step": 2344 }, { "epoch": 0.040762050444123836, "grad_norm": 1.6486597513524006, "learning_rate": 9.996973996175608e-07, "loss": 0.4232, "step": 2345 }, { "epoch": 0.04077943298162666, "grad_norm": 2.2697924360368757, "learning_rate": 9.99696419635182e-07, "loss": 0.4253, "step": 2346 }, { "epoch": 0.04079681551912948, "grad_norm": 2.0946432246993223, "learning_rate": 9.99695438068995e-07, "loss": 0.5919, "step": 2347 }, { "epoch": 0.040814198056632306, "grad_norm": 1.5481678672802215, "learning_rate": 9.99694454919003e-07, "loss": 0.2419, "step": 2348 }, { "epoch": 0.04083158059413513, "grad_norm": 2.939918930820301, "learning_rate": 9.99693470185209e-07, "loss": 0.4454, "step": 2349 }, { "epoch": 0.04084896313163796, "grad_norm": 1.6674285782312503, "learning_rate": 9.996924838676164e-07, "loss": 0.2657, "step": 2350 }, { "epoch": 0.04086634566914078, "grad_norm": 2.2443136931747207, "learning_rate": 9.996914959662282e-07, "loss": 0.7573, "step": 2351 }, { "epoch": 0.040883728206643606, "grad_norm": 2.158030445499981, "learning_rate": 9.996905064810472e-07, "loss": 0.767, "step": 2352 }, { "epoch": 0.04090111074414643, "grad_norm": 1.264482662054488, "learning_rate": 9.996895154120772e-07, "loss": 0.3884, "step": 2353 }, { "epoch": 0.04091849328164925, "grad_norm": 2.5684286679739285, "learning_rate": 9.996885227593208e-07, "loss": 0.3541, "step": 2354 }, { "epoch": 0.04093587581915208, "grad_norm": 1.158555842722473, "learning_rate": 9.996875285227813e-07, "loss": 0.3681, "step": 2355 }, { "epoch": 0.040953258356654906, "grad_norm": 3.428111384847683, "learning_rate": 9.996865327024618e-07, "loss": 0.5109, "step": 2356 }, { "epoch": 0.04097064089415773, "grad_norm": 3.3251842888678995, "learning_rate": 9.996855352983657e-07, "loss": 0.4573, "step": 2357 }, { "epoch": 0.04098802343166055, "grad_norm": 2.385008058269923, "learning_rate": 9.99684536310496e-07, "loss": 0.4097, "step": 2358 }, { "epoch": 0.041005405969163376, "grad_norm": 2.232803569543589, "learning_rate": 9.996835357388556e-07, "loss": 0.3477, "step": 2359 }, { "epoch": 0.041022788506666207, "grad_norm": 1.9198792594719796, "learning_rate": 9.996825335834481e-07, "loss": 0.3173, "step": 2360 }, { "epoch": 0.04104017104416903, "grad_norm": 2.885735173998621, "learning_rate": 9.996815298442767e-07, "loss": 0.4611, "step": 2361 }, { "epoch": 0.04105755358167185, "grad_norm": 1.4664152581299068, "learning_rate": 9.99680524521344e-07, "loss": 0.4727, "step": 2362 }, { "epoch": 0.041074936119174676, "grad_norm": 1.4628231314899445, "learning_rate": 9.996795176146538e-07, "loss": 0.2687, "step": 2363 }, { "epoch": 0.0410923186566775, "grad_norm": 2.8636800495008936, "learning_rate": 9.996785091242091e-07, "loss": 0.4491, "step": 2364 }, { "epoch": 0.04110970119418032, "grad_norm": 1.7971481162809653, "learning_rate": 9.99677499050013e-07, "loss": 0.1956, "step": 2365 }, { "epoch": 0.04112708373168315, "grad_norm": 1.9965859795563998, "learning_rate": 9.996764873920688e-07, "loss": 0.688, "step": 2366 }, { "epoch": 0.04114446626918598, "grad_norm": 1.7910415432725244, "learning_rate": 9.996754741503794e-07, "loss": 0.249, "step": 2367 }, { "epoch": 0.0411618488066888, "grad_norm": 2.8340026932333786, "learning_rate": 9.996744593249487e-07, "loss": 0.3659, "step": 2368 }, { "epoch": 0.04117923134419162, "grad_norm": 2.13534298467883, "learning_rate": 9.996734429157791e-07, "loss": 0.2915, "step": 2369 }, { "epoch": 0.04119661388169445, "grad_norm": 1.6239858175235289, "learning_rate": 9.996724249228744e-07, "loss": 0.4779, "step": 2370 }, { "epoch": 0.04121399641919728, "grad_norm": 1.681073657670442, "learning_rate": 9.996714053462377e-07, "loss": 0.6146, "step": 2371 }, { "epoch": 0.0412313789567001, "grad_norm": 1.9404928890946354, "learning_rate": 9.996703841858721e-07, "loss": 0.6928, "step": 2372 }, { "epoch": 0.041248761494202923, "grad_norm": 2.4377818246884786, "learning_rate": 9.996693614417811e-07, "loss": 0.3615, "step": 2373 }, { "epoch": 0.04126614403170575, "grad_norm": 2.1778778472899307, "learning_rate": 9.996683371139674e-07, "loss": 0.3969, "step": 2374 }, { "epoch": 0.04128352656920857, "grad_norm": 2.2007491603012292, "learning_rate": 9.996673112024348e-07, "loss": 0.4475, "step": 2375 }, { "epoch": 0.0413009091067114, "grad_norm": 3.075320727563036, "learning_rate": 9.996662837071865e-07, "loss": 0.4536, "step": 2376 }, { "epoch": 0.041318291644214224, "grad_norm": 2.0614208448249633, "learning_rate": 9.996652546282254e-07, "loss": 0.5382, "step": 2377 }, { "epoch": 0.04133567418171705, "grad_norm": 1.4105328812528983, "learning_rate": 9.996642239655552e-07, "loss": 0.5275, "step": 2378 }, { "epoch": 0.04135305671921987, "grad_norm": 1.798992770794493, "learning_rate": 9.996631917191785e-07, "loss": 0.3496, "step": 2379 }, { "epoch": 0.041370439256722694, "grad_norm": 2.126979723468864, "learning_rate": 9.996621578890993e-07, "loss": 0.4472, "step": 2380 }, { "epoch": 0.041387821794225524, "grad_norm": 1.2005818542407418, "learning_rate": 9.996611224753205e-07, "loss": 0.4048, "step": 2381 }, { "epoch": 0.04140520433172835, "grad_norm": 1.7569571855314867, "learning_rate": 9.996600854778457e-07, "loss": 0.293, "step": 2382 }, { "epoch": 0.04142258686923117, "grad_norm": 2.8994453772728876, "learning_rate": 9.996590468966778e-07, "loss": 0.3634, "step": 2383 }, { "epoch": 0.041439969406733994, "grad_norm": 1.436249265311273, "learning_rate": 9.9965800673182e-07, "loss": 0.4735, "step": 2384 }, { "epoch": 0.04145735194423682, "grad_norm": 3.506598427279988, "learning_rate": 9.996569649832762e-07, "loss": 0.399, "step": 2385 }, { "epoch": 0.04147473448173965, "grad_norm": 2.610162851358411, "learning_rate": 9.996559216510492e-07, "loss": 0.3327, "step": 2386 }, { "epoch": 0.04149211701924247, "grad_norm": 1.8159354397069458, "learning_rate": 9.996548767351424e-07, "loss": 0.4072, "step": 2387 }, { "epoch": 0.041509499556745294, "grad_norm": 2.9460723685311327, "learning_rate": 9.996538302355592e-07, "loss": 0.6784, "step": 2388 }, { "epoch": 0.04152688209424812, "grad_norm": 3.6050645891535305, "learning_rate": 9.996527821523028e-07, "loss": 0.5355, "step": 2389 }, { "epoch": 0.04154426463175094, "grad_norm": 2.023627042532417, "learning_rate": 9.996517324853767e-07, "loss": 0.3915, "step": 2390 }, { "epoch": 0.04156164716925377, "grad_norm": 1.895562641787387, "learning_rate": 9.996506812347843e-07, "loss": 0.6122, "step": 2391 }, { "epoch": 0.041579029706756594, "grad_norm": 1.733687389155477, "learning_rate": 9.996496284005284e-07, "loss": 0.5493, "step": 2392 }, { "epoch": 0.04159641224425942, "grad_norm": 1.910498331923191, "learning_rate": 9.996485739826131e-07, "loss": 0.4582, "step": 2393 }, { "epoch": 0.04161379478176224, "grad_norm": 1.8150210526641883, "learning_rate": 9.99647517981041e-07, "loss": 0.3071, "step": 2394 }, { "epoch": 0.041631177319265064, "grad_norm": 2.4940813095580117, "learning_rate": 9.996464603958159e-07, "loss": 0.3821, "step": 2395 }, { "epoch": 0.041648559856767894, "grad_norm": 2.172864810461417, "learning_rate": 9.99645401226941e-07, "loss": 0.6906, "step": 2396 }, { "epoch": 0.04166594239427072, "grad_norm": 2.774934534346997, "learning_rate": 9.9964434047442e-07, "loss": 0.5765, "step": 2397 }, { "epoch": 0.04168332493177354, "grad_norm": 2.776257467135734, "learning_rate": 9.996432781382555e-07, "loss": 0.3684, "step": 2398 }, { "epoch": 0.041700707469276364, "grad_norm": 2.010159195499529, "learning_rate": 9.996422142184517e-07, "loss": 0.4353, "step": 2399 }, { "epoch": 0.04171809000677919, "grad_norm": 2.7158148890877065, "learning_rate": 9.996411487150113e-07, "loss": 0.5121, "step": 2400 }, { "epoch": 0.04173547254428201, "grad_norm": 4.452673320764258, "learning_rate": 9.996400816279383e-07, "loss": 0.6064, "step": 2401 }, { "epoch": 0.04175285508178484, "grad_norm": 1.6465420638978365, "learning_rate": 9.996390129572353e-07, "loss": 0.4347, "step": 2402 }, { "epoch": 0.041770237619287665, "grad_norm": 2.5572457517523226, "learning_rate": 9.996379427029066e-07, "loss": 0.6064, "step": 2403 }, { "epoch": 0.04178762015679049, "grad_norm": 2.0958067882885443, "learning_rate": 9.99636870864955e-07, "loss": 0.4198, "step": 2404 }, { "epoch": 0.04180500269429331, "grad_norm": 1.9305579423678607, "learning_rate": 9.99635797443384e-07, "loss": 0.7739, "step": 2405 }, { "epoch": 0.041822385231796134, "grad_norm": 2.421702320075789, "learning_rate": 9.99634722438197e-07, "loss": 0.4706, "step": 2406 }, { "epoch": 0.041839767769298965, "grad_norm": 4.818770435512104, "learning_rate": 9.996336458493975e-07, "loss": 0.516, "step": 2407 }, { "epoch": 0.04185715030680179, "grad_norm": 1.5262770917669581, "learning_rate": 9.996325676769888e-07, "loss": 0.5173, "step": 2408 }, { "epoch": 0.04187453284430461, "grad_norm": 1.791405510948208, "learning_rate": 9.996314879209744e-07, "loss": 0.7091, "step": 2409 }, { "epoch": 0.041891915381807435, "grad_norm": 1.9790261257850237, "learning_rate": 9.996304065813576e-07, "loss": 0.6096, "step": 2410 }, { "epoch": 0.04190929791931026, "grad_norm": 1.3525553971992028, "learning_rate": 9.99629323658142e-07, "loss": 0.5251, "step": 2411 }, { "epoch": 0.04192668045681309, "grad_norm": 1.6407195259881613, "learning_rate": 9.996282391513309e-07, "loss": 0.4096, "step": 2412 }, { "epoch": 0.04194406299431591, "grad_norm": 1.955705200851395, "learning_rate": 9.99627153060928e-07, "loss": 0.4315, "step": 2413 }, { "epoch": 0.041961445531818735, "grad_norm": 1.2037650475389072, "learning_rate": 9.996260653869362e-07, "loss": 0.2599, "step": 2414 }, { "epoch": 0.04197882806932156, "grad_norm": 2.581652348895675, "learning_rate": 9.996249761293596e-07, "loss": 0.4339, "step": 2415 }, { "epoch": 0.04199621060682438, "grad_norm": 1.1594526451644456, "learning_rate": 9.996238852882011e-07, "loss": 0.2892, "step": 2416 }, { "epoch": 0.04201359314432721, "grad_norm": 2.3260421976579098, "learning_rate": 9.996227928634646e-07, "loss": 0.3242, "step": 2417 }, { "epoch": 0.042030975681830035, "grad_norm": 1.939708030650013, "learning_rate": 9.996216988551533e-07, "loss": 0.4208, "step": 2418 }, { "epoch": 0.04204835821933286, "grad_norm": 1.8517976147212258, "learning_rate": 9.996206032632706e-07, "loss": 0.4569, "step": 2419 }, { "epoch": 0.04206574075683568, "grad_norm": 2.6016287073834534, "learning_rate": 9.996195060878201e-07, "loss": 0.5336, "step": 2420 }, { "epoch": 0.042083123294338505, "grad_norm": 4.289837609149512, "learning_rate": 9.996184073288054e-07, "loss": 0.3812, "step": 2421 }, { "epoch": 0.042100505831841335, "grad_norm": 2.098165145489385, "learning_rate": 9.9961730698623e-07, "loss": 0.2621, "step": 2422 }, { "epoch": 0.04211788836934416, "grad_norm": 2.208921190954554, "learning_rate": 9.99616205060097e-07, "loss": 0.6633, "step": 2423 }, { "epoch": 0.04213527090684698, "grad_norm": 1.9789329166271223, "learning_rate": 9.9961510155041e-07, "loss": 0.4433, "step": 2424 }, { "epoch": 0.042152653444349805, "grad_norm": 1.6293585844679643, "learning_rate": 9.996139964571728e-07, "loss": 0.2155, "step": 2425 }, { "epoch": 0.04217003598185263, "grad_norm": 2.0702166197027587, "learning_rate": 9.996128897803888e-07, "loss": 0.2795, "step": 2426 }, { "epoch": 0.04218741851935546, "grad_norm": 1.242922716333476, "learning_rate": 9.996117815200613e-07, "loss": 0.3018, "step": 2427 }, { "epoch": 0.04220480105685828, "grad_norm": 1.6708266962625975, "learning_rate": 9.99610671676194e-07, "loss": 0.3129, "step": 2428 }, { "epoch": 0.042222183594361105, "grad_norm": 2.1842714152097242, "learning_rate": 9.996095602487906e-07, "loss": 0.4956, "step": 2429 }, { "epoch": 0.04223956613186393, "grad_norm": 1.6647596990805118, "learning_rate": 9.99608447237854e-07, "loss": 0.4584, "step": 2430 }, { "epoch": 0.04225694866936675, "grad_norm": 2.070132267987794, "learning_rate": 9.996073326433885e-07, "loss": 0.4115, "step": 2431 }, { "epoch": 0.04227433120686958, "grad_norm": 2.4994210115793436, "learning_rate": 9.996062164653972e-07, "loss": 0.6574, "step": 2432 }, { "epoch": 0.042291713744372406, "grad_norm": 2.1107932125671396, "learning_rate": 9.996050987038835e-07, "loss": 0.4631, "step": 2433 }, { "epoch": 0.04230909628187523, "grad_norm": 2.0617877075825612, "learning_rate": 9.99603979358851e-07, "loss": 0.2921, "step": 2434 }, { "epoch": 0.04232647881937805, "grad_norm": 1.9964514476321416, "learning_rate": 9.996028584303039e-07, "loss": 0.3569, "step": 2435 }, { "epoch": 0.042343861356880876, "grad_norm": 1.4034232365259287, "learning_rate": 9.996017359182449e-07, "loss": 0.3698, "step": 2436 }, { "epoch": 0.0423612438943837, "grad_norm": 2.8492003493444695, "learning_rate": 9.99600611822678e-07, "loss": 0.5126, "step": 2437 }, { "epoch": 0.04237862643188653, "grad_norm": 1.8317278626209954, "learning_rate": 9.995994861436064e-07, "loss": 0.3502, "step": 2438 }, { "epoch": 0.04239600896938935, "grad_norm": 2.024299907946176, "learning_rate": 9.995983588810342e-07, "loss": 0.4249, "step": 2439 }, { "epoch": 0.042413391506892176, "grad_norm": 1.8213400724637094, "learning_rate": 9.995972300349647e-07, "loss": 0.2717, "step": 2440 }, { "epoch": 0.042430774044395, "grad_norm": 2.890750071090152, "learning_rate": 9.995960996054014e-07, "loss": 0.7524, "step": 2441 }, { "epoch": 0.04244815658189782, "grad_norm": 1.9209178286188733, "learning_rate": 9.99594967592348e-07, "loss": 0.3614, "step": 2442 }, { "epoch": 0.04246553911940065, "grad_norm": 2.6286500909173, "learning_rate": 9.995938339958078e-07, "loss": 0.6136, "step": 2443 }, { "epoch": 0.042482921656903476, "grad_norm": 1.8659786036702342, "learning_rate": 9.99592698815785e-07, "loss": 0.4174, "step": 2444 }, { "epoch": 0.0425003041944063, "grad_norm": 1.8595788768179469, "learning_rate": 9.995915620522825e-07, "loss": 0.5851, "step": 2445 }, { "epoch": 0.04251768673190912, "grad_norm": 2.0697302460272238, "learning_rate": 9.995904237053044e-07, "loss": 0.4341, "step": 2446 }, { "epoch": 0.042535069269411946, "grad_norm": 0.9954706142935899, "learning_rate": 9.995892837748542e-07, "loss": 0.2462, "step": 2447 }, { "epoch": 0.042552451806914776, "grad_norm": 2.7871419713127197, "learning_rate": 9.995881422609356e-07, "loss": 0.4714, "step": 2448 }, { "epoch": 0.0425698343444176, "grad_norm": 4.75794050251734, "learning_rate": 9.99586999163552e-07, "loss": 0.3342, "step": 2449 }, { "epoch": 0.04258721688192042, "grad_norm": 2.0936297536369697, "learning_rate": 9.99585854482707e-07, "loss": 0.4993, "step": 2450 }, { "epoch": 0.042604599419423246, "grad_norm": 2.4832489423465582, "learning_rate": 9.995847082184044e-07, "loss": 0.4496, "step": 2451 }, { "epoch": 0.04262198195692607, "grad_norm": 1.7144770086259526, "learning_rate": 9.995835603706476e-07, "loss": 0.4351, "step": 2452 }, { "epoch": 0.0426393644944289, "grad_norm": 1.6515413332246098, "learning_rate": 9.995824109394405e-07, "loss": 0.3583, "step": 2453 }, { "epoch": 0.04265674703193172, "grad_norm": 1.7591543683206212, "learning_rate": 9.995812599247868e-07, "loss": 0.6813, "step": 2454 }, { "epoch": 0.042674129569434546, "grad_norm": 1.5053858640926672, "learning_rate": 9.995801073266899e-07, "loss": 0.757, "step": 2455 }, { "epoch": 0.04269151210693737, "grad_norm": 1.678239697733378, "learning_rate": 9.995789531451534e-07, "loss": 0.3268, "step": 2456 }, { "epoch": 0.04270889464444019, "grad_norm": 1.9557886138636025, "learning_rate": 9.995777973801812e-07, "loss": 0.5324, "step": 2457 }, { "epoch": 0.04272627718194302, "grad_norm": 2.0626521062404457, "learning_rate": 9.99576640031777e-07, "loss": 0.4761, "step": 2458 }, { "epoch": 0.042743659719445846, "grad_norm": 1.6519328801807944, "learning_rate": 9.995754810999442e-07, "loss": 0.335, "step": 2459 }, { "epoch": 0.04276104225694867, "grad_norm": 1.9781288482262673, "learning_rate": 9.995743205846867e-07, "loss": 0.3777, "step": 2460 }, { "epoch": 0.04277842479445149, "grad_norm": 2.942419208142126, "learning_rate": 9.995731584860078e-07, "loss": 0.3468, "step": 2461 }, { "epoch": 0.042795807331954316, "grad_norm": 1.6845632621440685, "learning_rate": 9.99571994803912e-07, "loss": 0.2437, "step": 2462 }, { "epoch": 0.04281318986945715, "grad_norm": 1.3250324761580508, "learning_rate": 9.99570829538402e-07, "loss": 0.3267, "step": 2463 }, { "epoch": 0.04283057240695997, "grad_norm": 1.34982364401308, "learning_rate": 9.99569662689482e-07, "loss": 0.4175, "step": 2464 }, { "epoch": 0.04284795494446279, "grad_norm": 2.32518500728049, "learning_rate": 9.995684942571558e-07, "loss": 0.4225, "step": 2465 }, { "epoch": 0.04286533748196562, "grad_norm": 2.4298429669625405, "learning_rate": 9.99567324241427e-07, "loss": 0.7211, "step": 2466 }, { "epoch": 0.04288272001946844, "grad_norm": 2.2740904478070982, "learning_rate": 9.995661526422991e-07, "loss": 0.5571, "step": 2467 }, { "epoch": 0.04290010255697126, "grad_norm": 1.7039420500803544, "learning_rate": 9.995649794597763e-07, "loss": 0.584, "step": 2468 }, { "epoch": 0.042917485094474093, "grad_norm": 2.2193049453370786, "learning_rate": 9.995638046938618e-07, "loss": 0.6157, "step": 2469 }, { "epoch": 0.04293486763197692, "grad_norm": 2.6085855475061908, "learning_rate": 9.995626283445593e-07, "loss": 0.4776, "step": 2470 }, { "epoch": 0.04295225016947974, "grad_norm": 2.6749796369424748, "learning_rate": 9.99561450411873e-07, "loss": 0.5104, "step": 2471 }, { "epoch": 0.04296963270698256, "grad_norm": 1.0726069956991715, "learning_rate": 9.995602708958064e-07, "loss": 0.3585, "step": 2472 }, { "epoch": 0.04298701524448539, "grad_norm": 2.564993281208684, "learning_rate": 9.99559089796363e-07, "loss": 0.7331, "step": 2473 }, { "epoch": 0.04300439778198822, "grad_norm": 2.3183783283430373, "learning_rate": 9.99557907113547e-07, "loss": 0.8926, "step": 2474 }, { "epoch": 0.04302178031949104, "grad_norm": 2.4784071547105864, "learning_rate": 9.995567228473619e-07, "loss": 0.3168, "step": 2475 }, { "epoch": 0.043039162856993864, "grad_norm": 2.104899965222413, "learning_rate": 9.995555369978112e-07, "loss": 0.369, "step": 2476 }, { "epoch": 0.04305654539449669, "grad_norm": 1.6697226032832606, "learning_rate": 9.995543495648992e-07, "loss": 0.57, "step": 2477 }, { "epoch": 0.04307392793199951, "grad_norm": 1.80335616772548, "learning_rate": 9.995531605486292e-07, "loss": 0.3785, "step": 2478 }, { "epoch": 0.04309131046950234, "grad_norm": 1.2301368905233228, "learning_rate": 9.995519699490052e-07, "loss": 0.3235, "step": 2479 }, { "epoch": 0.043108693007005164, "grad_norm": 2.1900011248846623, "learning_rate": 9.99550777766031e-07, "loss": 0.4812, "step": 2480 }, { "epoch": 0.04312607554450799, "grad_norm": 1.5488143783385229, "learning_rate": 9.995495839997103e-07, "loss": 0.3995, "step": 2481 }, { "epoch": 0.04314345808201081, "grad_norm": 2.0775298437406886, "learning_rate": 9.995483886500469e-07, "loss": 0.3521, "step": 2482 }, { "epoch": 0.043160840619513634, "grad_norm": 1.7887432413559257, "learning_rate": 9.995471917170446e-07, "loss": 0.2969, "step": 2483 }, { "epoch": 0.043178223157016464, "grad_norm": 2.5793419904152044, "learning_rate": 9.99545993200707e-07, "loss": 0.5087, "step": 2484 }, { "epoch": 0.04319560569451929, "grad_norm": 2.7108683859985563, "learning_rate": 9.995447931010384e-07, "loss": 0.3344, "step": 2485 }, { "epoch": 0.04321298823202211, "grad_norm": 1.538583834250829, "learning_rate": 9.99543591418042e-07, "loss": 0.5688, "step": 2486 }, { "epoch": 0.043230370769524934, "grad_norm": 2.11064761183125, "learning_rate": 9.99542388151722e-07, "loss": 0.7294, "step": 2487 }, { "epoch": 0.04324775330702776, "grad_norm": 1.8002160555602988, "learning_rate": 9.995411833020821e-07, "loss": 0.3879, "step": 2488 }, { "epoch": 0.04326513584453059, "grad_norm": 2.2135307564347824, "learning_rate": 9.995399768691262e-07, "loss": 0.4249, "step": 2489 }, { "epoch": 0.04328251838203341, "grad_norm": 3.21671958940895, "learning_rate": 9.995387688528577e-07, "loss": 0.4629, "step": 2490 }, { "epoch": 0.043299900919536234, "grad_norm": 1.6580465576407697, "learning_rate": 9.995375592532812e-07, "loss": 0.297, "step": 2491 }, { "epoch": 0.04331728345703906, "grad_norm": 2.321560796346642, "learning_rate": 9.995363480704e-07, "loss": 0.3877, "step": 2492 }, { "epoch": 0.04333466599454188, "grad_norm": 2.25688161017873, "learning_rate": 9.995351353042178e-07, "loss": 0.4249, "step": 2493 }, { "epoch": 0.04335204853204471, "grad_norm": 1.7384533327289555, "learning_rate": 9.995339209547388e-07, "loss": 0.3537, "step": 2494 }, { "epoch": 0.043369431069547534, "grad_norm": 1.800067231252152, "learning_rate": 9.995327050219668e-07, "loss": 0.5042, "step": 2495 }, { "epoch": 0.04338681360705036, "grad_norm": 1.9648177739320032, "learning_rate": 9.995314875059058e-07, "loss": 0.4621, "step": 2496 }, { "epoch": 0.04340419614455318, "grad_norm": 1.7363404186499851, "learning_rate": 9.995302684065591e-07, "loss": 0.2832, "step": 2497 }, { "epoch": 0.043421578682056004, "grad_norm": 2.2736560825474075, "learning_rate": 9.99529047723931e-07, "loss": 0.4183, "step": 2498 }, { "epoch": 0.043438961219558835, "grad_norm": 1.323106028704356, "learning_rate": 9.995278254580254e-07, "loss": 0.569, "step": 2499 }, { "epoch": 0.04345634375706166, "grad_norm": 1.3585385971632533, "learning_rate": 9.99526601608846e-07, "loss": 0.277, "step": 2500 }, { "epoch": 0.04347372629456448, "grad_norm": 1.266830763094152, "learning_rate": 9.995253761763968e-07, "loss": 0.509, "step": 2501 }, { "epoch": 0.043491108832067304, "grad_norm": 2.366187554267106, "learning_rate": 9.995241491606815e-07, "loss": 0.6327, "step": 2502 }, { "epoch": 0.04350849136957013, "grad_norm": 2.166282427852388, "learning_rate": 9.995229205617042e-07, "loss": 0.4247, "step": 2503 }, { "epoch": 0.04352587390707295, "grad_norm": 4.5844007247528, "learning_rate": 9.995216903794687e-07, "loss": 0.3749, "step": 2504 }, { "epoch": 0.04354325644457578, "grad_norm": 1.444202243644033, "learning_rate": 9.995204586139786e-07, "loss": 0.3227, "step": 2505 }, { "epoch": 0.043560638982078605, "grad_norm": 2.182670761166277, "learning_rate": 9.995192252652385e-07, "loss": 0.7432, "step": 2506 }, { "epoch": 0.04357802151958143, "grad_norm": 3.082628560151398, "learning_rate": 9.995179903332517e-07, "loss": 0.5541, "step": 2507 }, { "epoch": 0.04359540405708425, "grad_norm": 3.881210676339276, "learning_rate": 9.995167538180225e-07, "loss": 0.2914, "step": 2508 }, { "epoch": 0.043612786594587075, "grad_norm": 1.6492415877522382, "learning_rate": 9.995155157195547e-07, "loss": 0.5934, "step": 2509 }, { "epoch": 0.043630169132089905, "grad_norm": 2.2723701080373906, "learning_rate": 9.99514276037852e-07, "loss": 0.3659, "step": 2510 }, { "epoch": 0.04364755166959273, "grad_norm": 1.3063721091691785, "learning_rate": 9.995130347729186e-07, "loss": 0.4354, "step": 2511 }, { "epoch": 0.04366493420709555, "grad_norm": 2.482277203882336, "learning_rate": 9.995117919247582e-07, "loss": 0.2995, "step": 2512 }, { "epoch": 0.043682316744598375, "grad_norm": 1.8192131271945458, "learning_rate": 9.995105474933748e-07, "loss": 0.6191, "step": 2513 }, { "epoch": 0.0436996992821012, "grad_norm": 1.6959392800714916, "learning_rate": 9.995093014787728e-07, "loss": 0.1798, "step": 2514 }, { "epoch": 0.04371708181960403, "grad_norm": 2.5896850608276063, "learning_rate": 9.995080538809556e-07, "loss": 0.2921, "step": 2515 }, { "epoch": 0.04373446435710685, "grad_norm": 2.0699479313717313, "learning_rate": 9.995068046999273e-07, "loss": 0.4231, "step": 2516 }, { "epoch": 0.043751846894609675, "grad_norm": 1.9704569634686007, "learning_rate": 9.995055539356917e-07, "loss": 0.3757, "step": 2517 }, { "epoch": 0.0437692294321125, "grad_norm": 1.5245153795940787, "learning_rate": 9.995043015882531e-07, "loss": 0.3422, "step": 2518 }, { "epoch": 0.04378661196961532, "grad_norm": 1.740339626571518, "learning_rate": 9.995030476576155e-07, "loss": 0.6027, "step": 2519 }, { "epoch": 0.04380399450711815, "grad_norm": 1.4605231842437727, "learning_rate": 9.995017921437824e-07, "loss": 0.7736, "step": 2520 }, { "epoch": 0.043821377044620975, "grad_norm": 1.5691132797372267, "learning_rate": 9.995005350467582e-07, "loss": 0.467, "step": 2521 }, { "epoch": 0.0438387595821238, "grad_norm": 1.3656186390920435, "learning_rate": 9.994992763665468e-07, "loss": 0.35, "step": 2522 }, { "epoch": 0.04385614211962662, "grad_norm": 1.9370776268398324, "learning_rate": 9.99498016103152e-07, "loss": 0.5755, "step": 2523 }, { "epoch": 0.043873524657129445, "grad_norm": 2.1065258470490433, "learning_rate": 9.99496754256578e-07, "loss": 0.3026, "step": 2524 }, { "epoch": 0.043890907194632275, "grad_norm": 2.2291764648634467, "learning_rate": 9.994954908268287e-07, "loss": 0.724, "step": 2525 }, { "epoch": 0.0439082897321351, "grad_norm": 2.3311084807955407, "learning_rate": 9.994942258139083e-07, "loss": 0.9487, "step": 2526 }, { "epoch": 0.04392567226963792, "grad_norm": 2.6189804919342508, "learning_rate": 9.994929592178206e-07, "loss": 0.4193, "step": 2527 }, { "epoch": 0.043943054807140745, "grad_norm": 1.9625010494947943, "learning_rate": 9.994916910385696e-07, "loss": 0.5172, "step": 2528 }, { "epoch": 0.04396043734464357, "grad_norm": 3.038785016768905, "learning_rate": 9.994904212761594e-07, "loss": 0.5013, "step": 2529 }, { "epoch": 0.0439778198821464, "grad_norm": 1.6251325427151952, "learning_rate": 9.99489149930594e-07, "loss": 0.4541, "step": 2530 }, { "epoch": 0.04399520241964922, "grad_norm": 4.0708880809757435, "learning_rate": 9.994878770018775e-07, "loss": 0.376, "step": 2531 }, { "epoch": 0.044012584957152046, "grad_norm": 2.7374702761861496, "learning_rate": 9.994866024900139e-07, "loss": 0.4726, "step": 2532 }, { "epoch": 0.04402996749465487, "grad_norm": 1.9804186591015704, "learning_rate": 9.99485326395007e-07, "loss": 0.4109, "step": 2533 }, { "epoch": 0.04404735003215769, "grad_norm": 2.5784616742962734, "learning_rate": 9.994840487168614e-07, "loss": 0.3877, "step": 2534 }, { "epoch": 0.04406473256966052, "grad_norm": 1.5720077397976442, "learning_rate": 9.994827694555805e-07, "loss": 0.3352, "step": 2535 }, { "epoch": 0.044082115107163346, "grad_norm": 1.825104097716898, "learning_rate": 9.994814886111689e-07, "loss": 0.8467, "step": 2536 }, { "epoch": 0.04409949764466617, "grad_norm": 2.55216516239708, "learning_rate": 9.994802061836303e-07, "loss": 0.2628, "step": 2537 }, { "epoch": 0.04411688018216899, "grad_norm": 2.1276859187751485, "learning_rate": 9.99478922172969e-07, "loss": 0.5033, "step": 2538 }, { "epoch": 0.044134262719671816, "grad_norm": 3.03839247200462, "learning_rate": 9.994776365791887e-07, "loss": 0.5386, "step": 2539 }, { "epoch": 0.04415164525717464, "grad_norm": 1.7576142718165195, "learning_rate": 9.99476349402294e-07, "loss": 0.6511, "step": 2540 }, { "epoch": 0.04416902779467747, "grad_norm": 3.381202972628823, "learning_rate": 9.994750606422886e-07, "loss": 0.4462, "step": 2541 }, { "epoch": 0.04418641033218029, "grad_norm": 2.6585004656345186, "learning_rate": 9.994737702991768e-07, "loss": 0.4891, "step": 2542 }, { "epoch": 0.044203792869683116, "grad_norm": 4.25413244316872, "learning_rate": 9.994724783729624e-07, "loss": 0.4035, "step": 2543 }, { "epoch": 0.04422117540718594, "grad_norm": 1.2260095928113464, "learning_rate": 9.994711848636498e-07, "loss": 0.8162, "step": 2544 }, { "epoch": 0.04423855794468876, "grad_norm": 1.882749963164607, "learning_rate": 9.99469889771243e-07, "loss": 0.3334, "step": 2545 }, { "epoch": 0.04425594048219159, "grad_norm": 1.941727753455434, "learning_rate": 9.99468593095746e-07, "loss": 0.3964, "step": 2546 }, { "epoch": 0.044273323019694416, "grad_norm": 2.5132490718623846, "learning_rate": 9.994672948371632e-07, "loss": 0.3212, "step": 2547 }, { "epoch": 0.04429070555719724, "grad_norm": 2.5740376614352516, "learning_rate": 9.994659949954981e-07, "loss": 0.6229, "step": 2548 }, { "epoch": 0.04430808809470006, "grad_norm": 1.214520763544262, "learning_rate": 9.994646935707556e-07, "loss": 0.4616, "step": 2549 }, { "epoch": 0.044325470632202886, "grad_norm": 4.035971874565061, "learning_rate": 9.994633905629395e-07, "loss": 0.4804, "step": 2550 }, { "epoch": 0.044342853169705716, "grad_norm": 1.9786833219587285, "learning_rate": 9.994620859720537e-07, "loss": 0.4272, "step": 2551 }, { "epoch": 0.04436023570720854, "grad_norm": 1.6528574955336797, "learning_rate": 9.994607797981024e-07, "loss": 0.4706, "step": 2552 }, { "epoch": 0.04437761824471136, "grad_norm": 2.424651493528911, "learning_rate": 9.9945947204109e-07, "loss": 0.8315, "step": 2553 }, { "epoch": 0.044395000782214186, "grad_norm": 1.1510368780496152, "learning_rate": 9.994581627010205e-07, "loss": 0.399, "step": 2554 }, { "epoch": 0.04441238331971701, "grad_norm": 2.2151783734937642, "learning_rate": 9.994568517778983e-07, "loss": 0.6872, "step": 2555 }, { "epoch": 0.04442976585721984, "grad_norm": 1.8629270297234477, "learning_rate": 9.99455539271727e-07, "loss": 0.3766, "step": 2556 }, { "epoch": 0.04444714839472266, "grad_norm": 1.9657075256707481, "learning_rate": 9.99454225182511e-07, "loss": 0.7411, "step": 2557 }, { "epoch": 0.044464530932225486, "grad_norm": 3.2052122313319837, "learning_rate": 9.994529095102549e-07, "loss": 0.6549, "step": 2558 }, { "epoch": 0.04448191346972831, "grad_norm": 2.577194361847012, "learning_rate": 9.994515922549623e-07, "loss": 0.4385, "step": 2559 }, { "epoch": 0.04449929600723113, "grad_norm": 1.8876094671147985, "learning_rate": 9.994502734166376e-07, "loss": 0.5286, "step": 2560 }, { "epoch": 0.04451667854473396, "grad_norm": 2.1504539481557856, "learning_rate": 9.99448952995285e-07, "loss": 0.529, "step": 2561 }, { "epoch": 0.04453406108223679, "grad_norm": 1.2486853203865198, "learning_rate": 9.994476309909086e-07, "loss": 0.4086, "step": 2562 }, { "epoch": 0.04455144361973961, "grad_norm": 1.5451133914063397, "learning_rate": 9.994463074035126e-07, "loss": 0.3875, "step": 2563 }, { "epoch": 0.04456882615724243, "grad_norm": 1.6926235529936249, "learning_rate": 9.994449822331012e-07, "loss": 0.3115, "step": 2564 }, { "epoch": 0.04458620869474526, "grad_norm": 2.2814893788272816, "learning_rate": 9.994436554796788e-07, "loss": 0.3824, "step": 2565 }, { "epoch": 0.04460359123224809, "grad_norm": 1.767812956686594, "learning_rate": 9.994423271432492e-07, "loss": 0.5906, "step": 2566 }, { "epoch": 0.04462097376975091, "grad_norm": 1.9437786965518973, "learning_rate": 9.99440997223817e-07, "loss": 0.5614, "step": 2567 }, { "epoch": 0.04463835630725373, "grad_norm": 3.335750105745167, "learning_rate": 9.994396657213863e-07, "loss": 0.4066, "step": 2568 }, { "epoch": 0.04465573884475656, "grad_norm": 1.7229815516982074, "learning_rate": 9.994383326359613e-07, "loss": 0.4012, "step": 2569 }, { "epoch": 0.04467312138225938, "grad_norm": 1.7400691364967527, "learning_rate": 9.99436997967546e-07, "loss": 0.4402, "step": 2570 }, { "epoch": 0.04469050391976221, "grad_norm": 1.7515835528956538, "learning_rate": 9.99435661716145e-07, "loss": 0.2169, "step": 2571 }, { "epoch": 0.044707886457265034, "grad_norm": 1.4457067310119975, "learning_rate": 9.994343238817624e-07, "loss": 0.3012, "step": 2572 }, { "epoch": 0.04472526899476786, "grad_norm": 2.4590521025390593, "learning_rate": 9.994329844644025e-07, "loss": 0.9311, "step": 2573 }, { "epoch": 0.04474265153227068, "grad_norm": 1.9780469233946754, "learning_rate": 9.994316434640693e-07, "loss": 0.3605, "step": 2574 }, { "epoch": 0.044760034069773504, "grad_norm": 2.611884383405204, "learning_rate": 9.994303008807672e-07, "loss": 0.3821, "step": 2575 }, { "epoch": 0.04477741660727633, "grad_norm": 1.6626884277882303, "learning_rate": 9.994289567145004e-07, "loss": 0.3927, "step": 2576 }, { "epoch": 0.04479479914477916, "grad_norm": 1.7210540064125237, "learning_rate": 9.994276109652736e-07, "loss": 0.6474, "step": 2577 }, { "epoch": 0.04481218168228198, "grad_norm": 3.278250202754781, "learning_rate": 9.994262636330902e-07, "loss": 0.5783, "step": 2578 }, { "epoch": 0.044829564219784804, "grad_norm": 2.518370158222007, "learning_rate": 9.994249147179552e-07, "loss": 0.7095, "step": 2579 }, { "epoch": 0.04484694675728763, "grad_norm": 7.296761374760128, "learning_rate": 9.994235642198727e-07, "loss": 0.7519, "step": 2580 }, { "epoch": 0.04486432929479045, "grad_norm": 2.5165380610522976, "learning_rate": 9.994222121388467e-07, "loss": 0.4092, "step": 2581 }, { "epoch": 0.04488171183229328, "grad_norm": 2.6773524023046633, "learning_rate": 9.99420858474882e-07, "loss": 0.3339, "step": 2582 }, { "epoch": 0.044899094369796104, "grad_norm": 2.661111151712924, "learning_rate": 9.994195032279823e-07, "loss": 0.4235, "step": 2583 }, { "epoch": 0.04491647690729893, "grad_norm": 2.04460467580619, "learning_rate": 9.994181463981524e-07, "loss": 0.3456, "step": 2584 }, { "epoch": 0.04493385944480175, "grad_norm": 3.200073923304278, "learning_rate": 9.994167879853963e-07, "loss": 0.4568, "step": 2585 }, { "epoch": 0.044951241982304574, "grad_norm": 2.1386142250209597, "learning_rate": 9.994154279897185e-07, "loss": 0.6661, "step": 2586 }, { "epoch": 0.044968624519807404, "grad_norm": 2.0022694343636958, "learning_rate": 9.99414066411123e-07, "loss": 0.4965, "step": 2587 }, { "epoch": 0.04498600705731023, "grad_norm": 2.8632907341695804, "learning_rate": 9.994127032496144e-07, "loss": 0.5893, "step": 2588 }, { "epoch": 0.04500338959481305, "grad_norm": 2.3136468628343536, "learning_rate": 9.99411338505197e-07, "loss": 0.4556, "step": 2589 }, { "epoch": 0.045020772132315874, "grad_norm": 2.8035404240096753, "learning_rate": 9.99409972177875e-07, "loss": 0.5183, "step": 2590 }, { "epoch": 0.0450381546698187, "grad_norm": 2.387299490424319, "learning_rate": 9.99408604267653e-07, "loss": 0.7321, "step": 2591 }, { "epoch": 0.04505553720732153, "grad_norm": 1.4558018194356672, "learning_rate": 9.99407234774535e-07, "loss": 0.4076, "step": 2592 }, { "epoch": 0.04507291974482435, "grad_norm": 2.2429466682053594, "learning_rate": 9.994058636985255e-07, "loss": 0.3713, "step": 2593 }, { "epoch": 0.045090302282327174, "grad_norm": 1.6411887156219147, "learning_rate": 9.994044910396286e-07, "loss": 0.461, "step": 2594 }, { "epoch": 0.04510768481983, "grad_norm": 2.1633118147200117, "learning_rate": 9.994031167978491e-07, "loss": 0.6073, "step": 2595 }, { "epoch": 0.04512506735733282, "grad_norm": 1.4593702033113842, "learning_rate": 9.994017409731912e-07, "loss": 0.3887, "step": 2596 }, { "epoch": 0.04514244989483565, "grad_norm": 1.9807975873356394, "learning_rate": 9.994003635656588e-07, "loss": 0.7088, "step": 2597 }, { "epoch": 0.045159832432338474, "grad_norm": 1.8466170445976362, "learning_rate": 9.993989845752572e-07, "loss": 0.3693, "step": 2598 }, { "epoch": 0.0451772149698413, "grad_norm": 1.3251506963421493, "learning_rate": 9.993976040019898e-07, "loss": 0.4956, "step": 2599 }, { "epoch": 0.04519459750734412, "grad_norm": 1.228846770849532, "learning_rate": 9.993962218458616e-07, "loss": 0.2011, "step": 2600 }, { "epoch": 0.045211980044846944, "grad_norm": 2.009768530560637, "learning_rate": 9.993948381068767e-07, "loss": 0.6838, "step": 2601 }, { "epoch": 0.045229362582349775, "grad_norm": 1.60709423380449, "learning_rate": 9.993934527850395e-07, "loss": 0.3113, "step": 2602 }, { "epoch": 0.0452467451198526, "grad_norm": 1.8412148038834881, "learning_rate": 9.993920658803548e-07, "loss": 0.2517, "step": 2603 }, { "epoch": 0.04526412765735542, "grad_norm": 2.350826243616183, "learning_rate": 9.993906773928262e-07, "loss": 0.3641, "step": 2604 }, { "epoch": 0.045281510194858245, "grad_norm": 2.8357857853766073, "learning_rate": 9.993892873224588e-07, "loss": 0.28, "step": 2605 }, { "epoch": 0.04529889273236107, "grad_norm": 2.7152542613562476, "learning_rate": 9.993878956692568e-07, "loss": 0.5818, "step": 2606 }, { "epoch": 0.0453162752698639, "grad_norm": 1.8709252111664199, "learning_rate": 9.993865024332244e-07, "loss": 0.3576, "step": 2607 }, { "epoch": 0.04533365780736672, "grad_norm": 1.6009514683195276, "learning_rate": 9.993851076143663e-07, "loss": 0.2403, "step": 2608 }, { "epoch": 0.045351040344869545, "grad_norm": 2.140589431683715, "learning_rate": 9.993837112126868e-07, "loss": 0.4689, "step": 2609 }, { "epoch": 0.04536842288237237, "grad_norm": 2.978102299742002, "learning_rate": 9.993823132281903e-07, "loss": 0.3311, "step": 2610 }, { "epoch": 0.04538580541987519, "grad_norm": 2.8298582147375075, "learning_rate": 9.993809136608814e-07, "loss": 0.5959, "step": 2611 }, { "epoch": 0.045403187957378015, "grad_norm": 1.3615551792495368, "learning_rate": 9.993795125107642e-07, "loss": 0.4383, "step": 2612 }, { "epoch": 0.045420570494880845, "grad_norm": 1.8490972326518957, "learning_rate": 9.993781097778433e-07, "loss": 0.5987, "step": 2613 }, { "epoch": 0.04543795303238367, "grad_norm": 2.123638678680958, "learning_rate": 9.993767054621234e-07, "loss": 0.3067, "step": 2614 }, { "epoch": 0.04545533556988649, "grad_norm": 1.7571137878982679, "learning_rate": 9.993752995636087e-07, "loss": 0.432, "step": 2615 }, { "epoch": 0.045472718107389315, "grad_norm": 3.2420012815192765, "learning_rate": 9.993738920823035e-07, "loss": 0.4234, "step": 2616 }, { "epoch": 0.04549010064489214, "grad_norm": 1.4939375804876547, "learning_rate": 9.993724830182126e-07, "loss": 0.3857, "step": 2617 }, { "epoch": 0.04550748318239497, "grad_norm": 1.3389996029340905, "learning_rate": 9.993710723713403e-07, "loss": 0.3373, "step": 2618 }, { "epoch": 0.04552486571989779, "grad_norm": 3.0138944584278273, "learning_rate": 9.993696601416912e-07, "loss": 0.7018, "step": 2619 }, { "epoch": 0.045542248257400615, "grad_norm": 1.794786905816659, "learning_rate": 9.993682463292696e-07, "loss": 0.2896, "step": 2620 }, { "epoch": 0.04555963079490344, "grad_norm": 1.6557322064462912, "learning_rate": 9.993668309340798e-07, "loss": 0.2681, "step": 2621 }, { "epoch": 0.04557701333240626, "grad_norm": 1.8979767804557088, "learning_rate": 9.993654139561269e-07, "loss": 0.3487, "step": 2622 }, { "epoch": 0.04559439586990909, "grad_norm": 1.6016463782452603, "learning_rate": 9.99363995395415e-07, "loss": 0.565, "step": 2623 }, { "epoch": 0.045611778407411915, "grad_norm": 2.06016086720031, "learning_rate": 9.993625752519485e-07, "loss": 0.2142, "step": 2624 }, { "epoch": 0.04562916094491474, "grad_norm": 1.803874282455696, "learning_rate": 9.99361153525732e-07, "loss": 0.4292, "step": 2625 }, { "epoch": 0.04564654348241756, "grad_norm": 2.9732147225894416, "learning_rate": 9.9935973021677e-07, "loss": 0.5891, "step": 2626 }, { "epoch": 0.045663926019920385, "grad_norm": 1.8891690843662996, "learning_rate": 9.99358305325067e-07, "loss": 0.4647, "step": 2627 }, { "epoch": 0.045681308557423216, "grad_norm": 2.1035817300633703, "learning_rate": 9.993568788506277e-07, "loss": 0.7354, "step": 2628 }, { "epoch": 0.04569869109492604, "grad_norm": 2.297745738916065, "learning_rate": 9.993554507934565e-07, "loss": 0.252, "step": 2629 }, { "epoch": 0.04571607363242886, "grad_norm": 1.6803390500531379, "learning_rate": 9.993540211535578e-07, "loss": 0.4473, "step": 2630 }, { "epoch": 0.045733456169931685, "grad_norm": 3.103454533854395, "learning_rate": 9.993525899309364e-07, "loss": 0.5174, "step": 2631 }, { "epoch": 0.04575083870743451, "grad_norm": 2.122445872587821, "learning_rate": 9.993511571255966e-07, "loss": 0.2938, "step": 2632 }, { "epoch": 0.04576822124493734, "grad_norm": 2.0296236081434236, "learning_rate": 9.99349722737543e-07, "loss": 0.3701, "step": 2633 }, { "epoch": 0.04578560378244016, "grad_norm": 1.8715966489508986, "learning_rate": 9.993482867667801e-07, "loss": 0.3174, "step": 2634 }, { "epoch": 0.045802986319942986, "grad_norm": 2.661196687620895, "learning_rate": 9.993468492133127e-07, "loss": 0.5852, "step": 2635 }, { "epoch": 0.04582036885744581, "grad_norm": 2.0129949711603476, "learning_rate": 9.993454100771449e-07, "loss": 0.5828, "step": 2636 }, { "epoch": 0.04583775139494863, "grad_norm": 1.9771472837228052, "learning_rate": 9.993439693582816e-07, "loss": 0.5703, "step": 2637 }, { "epoch": 0.04585513393245146, "grad_norm": 1.866884792913677, "learning_rate": 9.993425270567276e-07, "loss": 0.4333, "step": 2638 }, { "epoch": 0.045872516469954286, "grad_norm": 2.191618451954724, "learning_rate": 9.993410831724868e-07, "loss": 0.5, "step": 2639 }, { "epoch": 0.04588989900745711, "grad_norm": 2.1153707703170928, "learning_rate": 9.993396377055643e-07, "loss": 0.2629, "step": 2640 }, { "epoch": 0.04590728154495993, "grad_norm": 1.8927778706322296, "learning_rate": 9.993381906559643e-07, "loss": 0.6676, "step": 2641 }, { "epoch": 0.045924664082462756, "grad_norm": 1.6000045641646057, "learning_rate": 9.99336742023692e-07, "loss": 0.6832, "step": 2642 }, { "epoch": 0.04594204661996558, "grad_norm": 1.5552071689471247, "learning_rate": 9.993352918087514e-07, "loss": 0.5089, "step": 2643 }, { "epoch": 0.04595942915746841, "grad_norm": 2.782000740099545, "learning_rate": 9.993338400111472e-07, "loss": 0.8148, "step": 2644 }, { "epoch": 0.04597681169497123, "grad_norm": 2.184426241981594, "learning_rate": 9.993323866308842e-07, "loss": 0.4532, "step": 2645 }, { "epoch": 0.045994194232474056, "grad_norm": 2.3190950474896836, "learning_rate": 9.99330931667967e-07, "loss": 0.4955, "step": 2646 }, { "epoch": 0.04601157676997688, "grad_norm": 1.314454337452952, "learning_rate": 9.993294751224e-07, "loss": 0.448, "step": 2647 }, { "epoch": 0.0460289593074797, "grad_norm": 1.8292925443981631, "learning_rate": 9.99328016994188e-07, "loss": 0.4337, "step": 2648 }, { "epoch": 0.04604634184498253, "grad_norm": 3.5402150526713676, "learning_rate": 9.993265572833357e-07, "loss": 0.6191, "step": 2649 }, { "epoch": 0.046063724382485356, "grad_norm": 1.8439143300145844, "learning_rate": 9.993250959898473e-07, "loss": 0.4597, "step": 2650 }, { "epoch": 0.04608110691998818, "grad_norm": 2.0042950132331314, "learning_rate": 9.993236331137277e-07, "loss": 0.8902, "step": 2651 }, { "epoch": 0.046098489457491, "grad_norm": 1.961118614981264, "learning_rate": 9.993221686549818e-07, "loss": 0.3035, "step": 2652 }, { "epoch": 0.046115871994993826, "grad_norm": 1.717194865801854, "learning_rate": 9.99320702613614e-07, "loss": 0.2299, "step": 2653 }, { "epoch": 0.046133254532496656, "grad_norm": 1.7358833150633781, "learning_rate": 9.993192349896288e-07, "loss": 0.5498, "step": 2654 }, { "epoch": 0.04615063706999948, "grad_norm": 1.47550784900477, "learning_rate": 9.99317765783031e-07, "loss": 0.2671, "step": 2655 }, { "epoch": 0.0461680196075023, "grad_norm": 1.8185334665067494, "learning_rate": 9.993162949938252e-07, "loss": 0.3643, "step": 2656 }, { "epoch": 0.046185402145005126, "grad_norm": 2.145051150318867, "learning_rate": 9.993148226220162e-07, "loss": 0.1867, "step": 2657 }, { "epoch": 0.04620278468250795, "grad_norm": 4.176200359272064, "learning_rate": 9.993133486676085e-07, "loss": 0.554, "step": 2658 }, { "epoch": 0.04622016722001078, "grad_norm": 1.897588950695092, "learning_rate": 9.99311873130607e-07, "loss": 0.2892, "step": 2659 }, { "epoch": 0.0462375497575136, "grad_norm": 1.5875433125727838, "learning_rate": 9.993103960110162e-07, "loss": 0.3882, "step": 2660 }, { "epoch": 0.04625493229501643, "grad_norm": 1.849294870414017, "learning_rate": 9.993089173088407e-07, "loss": 0.2833, "step": 2661 }, { "epoch": 0.04627231483251925, "grad_norm": 1.4627712954771732, "learning_rate": 9.993074370240855e-07, "loss": 0.2561, "step": 2662 }, { "epoch": 0.04628969737002207, "grad_norm": 2.164254234687732, "learning_rate": 9.993059551567549e-07, "loss": 0.4802, "step": 2663 }, { "epoch": 0.0463070799075249, "grad_norm": 1.841840429081029, "learning_rate": 9.993044717068538e-07, "loss": 0.3182, "step": 2664 }, { "epoch": 0.04632446244502773, "grad_norm": 2.404324197406005, "learning_rate": 9.993029866743868e-07, "loss": 0.3617, "step": 2665 }, { "epoch": 0.04634184498253055, "grad_norm": 3.6191368434819755, "learning_rate": 9.993015000593588e-07, "loss": 0.3727, "step": 2666 }, { "epoch": 0.04635922752003337, "grad_norm": 1.3747879080740804, "learning_rate": 9.993000118617745e-07, "loss": 0.2473, "step": 2667 }, { "epoch": 0.0463766100575362, "grad_norm": 3.1733040617628676, "learning_rate": 9.992985220816384e-07, "loss": 0.5328, "step": 2668 }, { "epoch": 0.04639399259503903, "grad_norm": 2.2037702499286653, "learning_rate": 9.992970307189554e-07, "loss": 0.4891, "step": 2669 }, { "epoch": 0.04641137513254185, "grad_norm": 2.1674406630893177, "learning_rate": 9.992955377737302e-07, "loss": 0.6358, "step": 2670 }, { "epoch": 0.046428757670044674, "grad_norm": 1.570696258925831, "learning_rate": 9.992940432459674e-07, "loss": 0.3025, "step": 2671 }, { "epoch": 0.0464461402075475, "grad_norm": 1.5717176271785376, "learning_rate": 9.992925471356719e-07, "loss": 0.278, "step": 2672 }, { "epoch": 0.04646352274505032, "grad_norm": 1.5909334516994833, "learning_rate": 9.992910494428485e-07, "loss": 0.4827, "step": 2673 }, { "epoch": 0.04648090528255315, "grad_norm": 2.0426457104616778, "learning_rate": 9.992895501675015e-07, "loss": 0.3897, "step": 2674 }, { "epoch": 0.046498287820055974, "grad_norm": 2.3951518887529524, "learning_rate": 9.992880493096361e-07, "loss": 0.3005, "step": 2675 }, { "epoch": 0.0465156703575588, "grad_norm": 1.9070856128627092, "learning_rate": 9.99286546869257e-07, "loss": 0.486, "step": 2676 }, { "epoch": 0.04653305289506162, "grad_norm": 2.1375052483715504, "learning_rate": 9.992850428463691e-07, "loss": 0.6804, "step": 2677 }, { "epoch": 0.046550435432564444, "grad_norm": 1.9731257732032115, "learning_rate": 9.992835372409766e-07, "loss": 0.3608, "step": 2678 }, { "epoch": 0.04656781797006727, "grad_norm": 1.478280502928028, "learning_rate": 9.992820300530848e-07, "loss": 0.2375, "step": 2679 }, { "epoch": 0.0465852005075701, "grad_norm": 1.689673014646749, "learning_rate": 9.992805212826984e-07, "loss": 0.5241, "step": 2680 }, { "epoch": 0.04660258304507292, "grad_norm": 2.2920093792017906, "learning_rate": 9.992790109298222e-07, "loss": 0.5025, "step": 2681 }, { "epoch": 0.046619965582575744, "grad_norm": 2.5195435998543423, "learning_rate": 9.992774989944605e-07, "loss": 0.3416, "step": 2682 }, { "epoch": 0.04663734812007857, "grad_norm": 1.663268445150997, "learning_rate": 9.992759854766188e-07, "loss": 0.4496, "step": 2683 }, { "epoch": 0.04665473065758139, "grad_norm": 1.555278376616642, "learning_rate": 9.992744703763016e-07, "loss": 0.5994, "step": 2684 }, { "epoch": 0.04667211319508422, "grad_norm": 2.860474225934606, "learning_rate": 9.992729536935135e-07, "loss": 0.583, "step": 2685 }, { "epoch": 0.046689495732587044, "grad_norm": 2.029799152124284, "learning_rate": 9.992714354282594e-07, "loss": 0.5995, "step": 2686 }, { "epoch": 0.04670687827008987, "grad_norm": 1.5126701632236745, "learning_rate": 9.992699155805445e-07, "loss": 0.3886, "step": 2687 }, { "epoch": 0.04672426080759269, "grad_norm": 1.7901240786288546, "learning_rate": 9.99268394150373e-07, "loss": 0.3725, "step": 2688 }, { "epoch": 0.046741643345095514, "grad_norm": 2.212929213015614, "learning_rate": 9.992668711377503e-07, "loss": 0.4485, "step": 2689 }, { "epoch": 0.046759025882598344, "grad_norm": 3.733815115648942, "learning_rate": 9.992653465426809e-07, "loss": 0.3027, "step": 2690 }, { "epoch": 0.04677640842010117, "grad_norm": 1.2796113310577077, "learning_rate": 9.992638203651696e-07, "loss": 0.4367, "step": 2691 }, { "epoch": 0.04679379095760399, "grad_norm": 2.6685030101843616, "learning_rate": 9.992622926052213e-07, "loss": 0.4975, "step": 2692 }, { "epoch": 0.046811173495106814, "grad_norm": 1.5340962561146896, "learning_rate": 9.992607632628409e-07, "loss": 0.6087, "step": 2693 }, { "epoch": 0.04682855603260964, "grad_norm": 2.0865089766018237, "learning_rate": 9.992592323380333e-07, "loss": 0.281, "step": 2694 }, { "epoch": 0.04684593857011247, "grad_norm": 2.7581188395891956, "learning_rate": 9.992576998308032e-07, "loss": 0.4451, "step": 2695 }, { "epoch": 0.04686332110761529, "grad_norm": 3.700419987956177, "learning_rate": 9.992561657411556e-07, "loss": 0.3568, "step": 2696 }, { "epoch": 0.046880703645118114, "grad_norm": 1.7980915309776182, "learning_rate": 9.992546300690953e-07, "loss": 0.6328, "step": 2697 }, { "epoch": 0.04689808618262094, "grad_norm": 1.6146033181124841, "learning_rate": 9.992530928146272e-07, "loss": 0.6482, "step": 2698 }, { "epoch": 0.04691546872012376, "grad_norm": 1.8474107136394864, "learning_rate": 9.99251553977756e-07, "loss": 0.6206, "step": 2699 }, { "epoch": 0.04693285125762659, "grad_norm": 2.5423036586471297, "learning_rate": 9.992500135584867e-07, "loss": 0.8753, "step": 2700 }, { "epoch": 0.046950233795129415, "grad_norm": 1.9012711822020303, "learning_rate": 9.992484715568243e-07, "loss": 0.2763, "step": 2701 }, { "epoch": 0.04696761633263224, "grad_norm": 1.9389280726004061, "learning_rate": 9.992469279727735e-07, "loss": 0.7211, "step": 2702 }, { "epoch": 0.04698499887013506, "grad_norm": 1.7207190711794393, "learning_rate": 9.992453828063393e-07, "loss": 0.2569, "step": 2703 }, { "epoch": 0.047002381407637885, "grad_norm": 2.1567443041202266, "learning_rate": 9.992438360575268e-07, "loss": 0.5554, "step": 2704 }, { "epoch": 0.047019763945140715, "grad_norm": 1.5733119511473848, "learning_rate": 9.992422877263404e-07, "loss": 0.4033, "step": 2705 }, { "epoch": 0.04703714648264354, "grad_norm": 2.381392261132698, "learning_rate": 9.992407378127855e-07, "loss": 0.3189, "step": 2706 }, { "epoch": 0.04705452902014636, "grad_norm": 1.3123212743458477, "learning_rate": 9.992391863168665e-07, "loss": 0.5562, "step": 2707 }, { "epoch": 0.047071911557649185, "grad_norm": 4.316029270576377, "learning_rate": 9.992376332385888e-07, "loss": 0.4307, "step": 2708 }, { "epoch": 0.04708929409515201, "grad_norm": 2.32273582265176, "learning_rate": 9.99236078577957e-07, "loss": 0.4941, "step": 2709 }, { "epoch": 0.04710667663265484, "grad_norm": 1.667784601081681, "learning_rate": 9.992345223349763e-07, "loss": 0.3383, "step": 2710 }, { "epoch": 0.04712405917015766, "grad_norm": 1.5491859980977323, "learning_rate": 9.992329645096513e-07, "loss": 0.3625, "step": 2711 }, { "epoch": 0.047141441707660485, "grad_norm": 2.4321362570244625, "learning_rate": 9.992314051019874e-07, "loss": 0.3912, "step": 2712 }, { "epoch": 0.04715882424516331, "grad_norm": 2.2007100674111753, "learning_rate": 9.992298441119891e-07, "loss": 0.3288, "step": 2713 }, { "epoch": 0.04717620678266613, "grad_norm": 3.572975987075944, "learning_rate": 9.992282815396615e-07, "loss": 0.2736, "step": 2714 }, { "epoch": 0.047193589320168955, "grad_norm": 2.24570948607054, "learning_rate": 9.992267173850097e-07, "loss": 0.4405, "step": 2715 }, { "epoch": 0.047210971857671785, "grad_norm": 2.5064583010443413, "learning_rate": 9.992251516480386e-07, "loss": 0.6848, "step": 2716 }, { "epoch": 0.04722835439517461, "grad_norm": 1.2599922430917807, "learning_rate": 9.992235843287529e-07, "loss": 0.2982, "step": 2717 }, { "epoch": 0.04724573693267743, "grad_norm": 2.035562576977072, "learning_rate": 9.992220154271577e-07, "loss": 0.303, "step": 2718 }, { "epoch": 0.047263119470180255, "grad_norm": 1.8104681034599714, "learning_rate": 9.992204449432584e-07, "loss": 0.3863, "step": 2719 }, { "epoch": 0.04728050200768308, "grad_norm": 1.126205084270141, "learning_rate": 9.992188728770594e-07, "loss": 0.4306, "step": 2720 }, { "epoch": 0.04729788454518591, "grad_norm": 1.776933983994641, "learning_rate": 9.992172992285658e-07, "loss": 0.5175, "step": 2721 }, { "epoch": 0.04731526708268873, "grad_norm": 1.5941089186169122, "learning_rate": 9.992157239977826e-07, "loss": 0.3387, "step": 2722 }, { "epoch": 0.047332649620191555, "grad_norm": 1.9897499792583349, "learning_rate": 9.99214147184715e-07, "loss": 0.3958, "step": 2723 }, { "epoch": 0.04735003215769438, "grad_norm": 2.561640774330947, "learning_rate": 9.99212568789368e-07, "loss": 0.4931, "step": 2724 }, { "epoch": 0.0473674146951972, "grad_norm": 2.352215525396735, "learning_rate": 9.992109888117463e-07, "loss": 0.3444, "step": 2725 }, { "epoch": 0.04738479723270003, "grad_norm": 1.506056707150727, "learning_rate": 9.992094072518553e-07, "loss": 0.224, "step": 2726 }, { "epoch": 0.047402179770202856, "grad_norm": 1.3267012161421685, "learning_rate": 9.992078241096998e-07, "loss": 0.5839, "step": 2727 }, { "epoch": 0.04741956230770568, "grad_norm": 2.382945447805491, "learning_rate": 9.992062393852846e-07, "loss": 0.3998, "step": 2728 }, { "epoch": 0.0474369448452085, "grad_norm": 2.0543173287913787, "learning_rate": 9.99204653078615e-07, "loss": 0.3256, "step": 2729 }, { "epoch": 0.047454327382711325, "grad_norm": 2.316234874631939, "learning_rate": 9.992030651896959e-07, "loss": 0.5728, "step": 2730 }, { "epoch": 0.047471709920214156, "grad_norm": 1.838280502913492, "learning_rate": 9.992014757185326e-07, "loss": 0.3518, "step": 2731 }, { "epoch": 0.04748909245771698, "grad_norm": 2.107025763182627, "learning_rate": 9.991998846651298e-07, "loss": 0.471, "step": 2732 }, { "epoch": 0.0475064749952198, "grad_norm": 2.1648065641286527, "learning_rate": 9.991982920294927e-07, "loss": 0.3629, "step": 2733 }, { "epoch": 0.047523857532722626, "grad_norm": 1.5077957383680511, "learning_rate": 9.991966978116262e-07, "loss": 0.3878, "step": 2734 }, { "epoch": 0.04754124007022545, "grad_norm": 2.9081979456131504, "learning_rate": 9.991951020115357e-07, "loss": 0.3188, "step": 2735 }, { "epoch": 0.04755862260772828, "grad_norm": 1.8541272429336517, "learning_rate": 9.991935046292259e-07, "loss": 0.6344, "step": 2736 }, { "epoch": 0.0475760051452311, "grad_norm": 4.550439069207581, "learning_rate": 9.99191905664702e-07, "loss": 0.6769, "step": 2737 }, { "epoch": 0.047593387682733926, "grad_norm": 2.118069194843992, "learning_rate": 9.991903051179692e-07, "loss": 0.9434, "step": 2738 }, { "epoch": 0.04761077022023675, "grad_norm": 1.8495025205492228, "learning_rate": 9.991887029890323e-07, "loss": 0.2174, "step": 2739 }, { "epoch": 0.04762815275773957, "grad_norm": 1.4277690068562028, "learning_rate": 9.991870992778965e-07, "loss": 0.5063, "step": 2740 }, { "epoch": 0.0476455352952424, "grad_norm": 1.8572571929867423, "learning_rate": 9.99185493984567e-07, "loss": 0.3658, "step": 2741 }, { "epoch": 0.047662917832745226, "grad_norm": 2.2112419456825716, "learning_rate": 9.99183887109049e-07, "loss": 0.5641, "step": 2742 }, { "epoch": 0.04768030037024805, "grad_norm": 1.8967939989059919, "learning_rate": 9.991822786513471e-07, "loss": 0.2127, "step": 2743 }, { "epoch": 0.04769768290775087, "grad_norm": 2.734312877640173, "learning_rate": 9.991806686114666e-07, "loss": 0.4849, "step": 2744 }, { "epoch": 0.047715065445253696, "grad_norm": 2.719990673484034, "learning_rate": 9.99179056989413e-07, "loss": 0.4702, "step": 2745 }, { "epoch": 0.047732447982756526, "grad_norm": 1.8582972232592596, "learning_rate": 9.991774437851908e-07, "loss": 0.4508, "step": 2746 }, { "epoch": 0.04774983052025935, "grad_norm": 3.0841456501572195, "learning_rate": 9.991758289988055e-07, "loss": 0.4787, "step": 2747 }, { "epoch": 0.04776721305776217, "grad_norm": 2.671166356731113, "learning_rate": 9.991742126302623e-07, "loss": 0.7001, "step": 2748 }, { "epoch": 0.047784595595264996, "grad_norm": 2.671398822282457, "learning_rate": 9.991725946795659e-07, "loss": 0.5188, "step": 2749 }, { "epoch": 0.04780197813276782, "grad_norm": 2.362743596563734, "learning_rate": 9.991709751467217e-07, "loss": 0.4783, "step": 2750 }, { "epoch": 0.04781936067027064, "grad_norm": 1.8657552990688686, "learning_rate": 9.99169354031735e-07, "loss": 0.4502, "step": 2751 }, { "epoch": 0.04783674320777347, "grad_norm": 1.8528795440169556, "learning_rate": 9.991677313346105e-07, "loss": 0.3647, "step": 2752 }, { "epoch": 0.047854125745276296, "grad_norm": 2.5178450991920074, "learning_rate": 9.991661070553537e-07, "loss": 0.3868, "step": 2753 }, { "epoch": 0.04787150828277912, "grad_norm": 1.5418710823831199, "learning_rate": 9.991644811939696e-07, "loss": 0.8399, "step": 2754 }, { "epoch": 0.04788889082028194, "grad_norm": 2.1785980668553226, "learning_rate": 9.991628537504635e-07, "loss": 0.3679, "step": 2755 }, { "epoch": 0.047906273357784766, "grad_norm": 1.8631535845476983, "learning_rate": 9.991612247248405e-07, "loss": 0.3004, "step": 2756 }, { "epoch": 0.0479236558952876, "grad_norm": 1.943296601933343, "learning_rate": 9.991595941171056e-07, "loss": 0.2726, "step": 2757 }, { "epoch": 0.04794103843279042, "grad_norm": 3.334086274624298, "learning_rate": 9.99157961927264e-07, "loss": 0.5072, "step": 2758 }, { "epoch": 0.04795842097029324, "grad_norm": 2.1167791817192154, "learning_rate": 9.991563281553211e-07, "loss": 0.7676, "step": 2759 }, { "epoch": 0.047975803507796067, "grad_norm": 2.4108463247156067, "learning_rate": 9.99154692801282e-07, "loss": 0.8105, "step": 2760 }, { "epoch": 0.04799318604529889, "grad_norm": 2.7190810156612604, "learning_rate": 9.991530558651516e-07, "loss": 0.544, "step": 2761 }, { "epoch": 0.04801056858280172, "grad_norm": 1.8961664036347232, "learning_rate": 9.991514173469353e-07, "loss": 0.3621, "step": 2762 }, { "epoch": 0.04802795112030454, "grad_norm": 1.5776513569051842, "learning_rate": 9.991497772466386e-07, "loss": 0.2756, "step": 2763 }, { "epoch": 0.04804533365780737, "grad_norm": 1.8251480392254074, "learning_rate": 9.991481355642661e-07, "loss": 0.361, "step": 2764 }, { "epoch": 0.04806271619531019, "grad_norm": 2.148182493964272, "learning_rate": 9.991464922998234e-07, "loss": 0.5942, "step": 2765 }, { "epoch": 0.04808009873281301, "grad_norm": 1.934398664124418, "learning_rate": 9.991448474533156e-07, "loss": 0.6005, "step": 2766 }, { "epoch": 0.048097481270315844, "grad_norm": 2.6741132422985476, "learning_rate": 9.99143201024748e-07, "loss": 0.4663, "step": 2767 }, { "epoch": 0.04811486380781867, "grad_norm": 1.5236935687247062, "learning_rate": 9.991415530141257e-07, "loss": 0.49, "step": 2768 }, { "epoch": 0.04813224634532149, "grad_norm": 3.3983484331866602, "learning_rate": 9.991399034214538e-07, "loss": 0.2472, "step": 2769 }, { "epoch": 0.048149628882824314, "grad_norm": 1.7750684970604935, "learning_rate": 9.991382522467379e-07, "loss": 0.1675, "step": 2770 }, { "epoch": 0.04816701142032714, "grad_norm": 2.9716875471570474, "learning_rate": 9.99136599489983e-07, "loss": 0.2642, "step": 2771 }, { "epoch": 0.04818439395782997, "grad_norm": 2.888436203351229, "learning_rate": 9.991349451511942e-07, "loss": 0.4511, "step": 2772 }, { "epoch": 0.04820177649533279, "grad_norm": 2.941665676873424, "learning_rate": 9.991332892303771e-07, "loss": 0.3736, "step": 2773 }, { "epoch": 0.048219159032835614, "grad_norm": 4.210883063117887, "learning_rate": 9.991316317275367e-07, "loss": 0.401, "step": 2774 }, { "epoch": 0.04823654157033844, "grad_norm": 1.9191394699710707, "learning_rate": 9.991299726426784e-07, "loss": 0.4332, "step": 2775 }, { "epoch": 0.04825392410784126, "grad_norm": 1.4072605650412815, "learning_rate": 9.991283119758071e-07, "loss": 0.4403, "step": 2776 }, { "epoch": 0.04827130664534409, "grad_norm": 1.3931190543917014, "learning_rate": 9.991266497269284e-07, "loss": 0.3145, "step": 2777 }, { "epoch": 0.048288689182846914, "grad_norm": 3.7131055085180806, "learning_rate": 9.991249858960476e-07, "loss": 0.4159, "step": 2778 }, { "epoch": 0.04830607172034974, "grad_norm": 1.7385746145632954, "learning_rate": 9.9912332048317e-07, "loss": 0.3866, "step": 2779 }, { "epoch": 0.04832345425785256, "grad_norm": 1.834488934404974, "learning_rate": 9.991216534883005e-07, "loss": 0.4695, "step": 2780 }, { "epoch": 0.048340836795355384, "grad_norm": 9.25219226037666, "learning_rate": 9.991199849114449e-07, "loss": 0.3909, "step": 2781 }, { "epoch": 0.04835821933285821, "grad_norm": 2.188344496450871, "learning_rate": 9.99118314752608e-07, "loss": 0.3468, "step": 2782 }, { "epoch": 0.04837560187036104, "grad_norm": 1.7151028472081382, "learning_rate": 9.991166430117954e-07, "loss": 0.5464, "step": 2783 }, { "epoch": 0.04839298440786386, "grad_norm": 1.6707623633724458, "learning_rate": 9.991149696890123e-07, "loss": 0.3215, "step": 2784 }, { "epoch": 0.048410366945366684, "grad_norm": 2.1620919847895945, "learning_rate": 9.99113294784264e-07, "loss": 0.3136, "step": 2785 }, { "epoch": 0.04842774948286951, "grad_norm": 2.1554763223972526, "learning_rate": 9.991116182975558e-07, "loss": 0.5464, "step": 2786 }, { "epoch": 0.04844513202037233, "grad_norm": 1.8071481886063105, "learning_rate": 9.991099402288932e-07, "loss": 0.5672, "step": 2787 }, { "epoch": 0.04846251455787516, "grad_norm": 1.7800019981452562, "learning_rate": 9.991082605782812e-07, "loss": 0.5543, "step": 2788 }, { "epoch": 0.048479897095377984, "grad_norm": 2.612660731156435, "learning_rate": 9.991065793457253e-07, "loss": 0.4732, "step": 2789 }, { "epoch": 0.04849727963288081, "grad_norm": 2.507453917199832, "learning_rate": 9.99104896531231e-07, "loss": 0.5592, "step": 2790 }, { "epoch": 0.04851466217038363, "grad_norm": 1.7743231182286947, "learning_rate": 9.991032121348033e-07, "loss": 0.5141, "step": 2791 }, { "epoch": 0.048532044707886454, "grad_norm": 2.834893578325904, "learning_rate": 9.991015261564477e-07, "loss": 0.325, "step": 2792 }, { "epoch": 0.048549427245389284, "grad_norm": 3.1314160249541705, "learning_rate": 9.990998385961695e-07, "loss": 0.6106, "step": 2793 }, { "epoch": 0.04856680978289211, "grad_norm": 1.2931899532624411, "learning_rate": 9.990981494539741e-07, "loss": 0.3404, "step": 2794 }, { "epoch": 0.04858419232039493, "grad_norm": 3.658852519709864, "learning_rate": 9.990964587298669e-07, "loss": 0.6427, "step": 2795 }, { "epoch": 0.048601574857897754, "grad_norm": 2.202680701584942, "learning_rate": 9.990947664238531e-07, "loss": 0.2371, "step": 2796 }, { "epoch": 0.04861895739540058, "grad_norm": 3.2161634291489243, "learning_rate": 9.990930725359382e-07, "loss": 0.457, "step": 2797 }, { "epoch": 0.04863633993290341, "grad_norm": 2.3339514680786624, "learning_rate": 9.990913770661275e-07, "loss": 0.6277, "step": 2798 }, { "epoch": 0.04865372247040623, "grad_norm": 3.210125897654272, "learning_rate": 9.990896800144266e-07, "loss": 0.4577, "step": 2799 }, { "epoch": 0.048671105007909055, "grad_norm": 2.336494886863314, "learning_rate": 9.990879813808404e-07, "loss": 0.5635, "step": 2800 }, { "epoch": 0.04868848754541188, "grad_norm": 1.5302996392881276, "learning_rate": 9.990862811653747e-07, "loss": 0.4729, "step": 2801 }, { "epoch": 0.0487058700829147, "grad_norm": 1.4295126340968378, "learning_rate": 9.990845793680347e-07, "loss": 0.5622, "step": 2802 }, { "epoch": 0.04872325262041753, "grad_norm": 2.977417740292696, "learning_rate": 9.990828759888258e-07, "loss": 0.437, "step": 2803 }, { "epoch": 0.048740635157920355, "grad_norm": 1.800599447436016, "learning_rate": 9.990811710277535e-07, "loss": 0.6058, "step": 2804 }, { "epoch": 0.04875801769542318, "grad_norm": 3.199497407465532, "learning_rate": 9.99079464484823e-07, "loss": 0.4334, "step": 2805 }, { "epoch": 0.048775400232926, "grad_norm": 2.8619000350255304, "learning_rate": 9.990777563600401e-07, "loss": 0.5694, "step": 2806 }, { "epoch": 0.048792782770428825, "grad_norm": 2.257688910808498, "learning_rate": 9.9907604665341e-07, "loss": 0.5019, "step": 2807 }, { "epoch": 0.048810165307931655, "grad_norm": 2.4113301473975994, "learning_rate": 9.990743353649379e-07, "loss": 0.2851, "step": 2808 }, { "epoch": 0.04882754784543448, "grad_norm": 1.7934903846942134, "learning_rate": 9.990726224946294e-07, "loss": 0.6062, "step": 2809 }, { "epoch": 0.0488449303829373, "grad_norm": 2.145385771300282, "learning_rate": 9.9907090804249e-07, "loss": 0.6697, "step": 2810 }, { "epoch": 0.048862312920440125, "grad_norm": 2.594541765367772, "learning_rate": 9.990691920085251e-07, "loss": 0.6407, "step": 2811 }, { "epoch": 0.04887969545794295, "grad_norm": 2.068663687615588, "learning_rate": 9.9906747439274e-07, "loss": 0.4546, "step": 2812 }, { "epoch": 0.04889707799544578, "grad_norm": 1.356309877341885, "learning_rate": 9.990657551951404e-07, "loss": 0.596, "step": 2813 }, { "epoch": 0.0489144605329486, "grad_norm": 1.8302153290358139, "learning_rate": 9.990640344157317e-07, "loss": 0.3888, "step": 2814 }, { "epoch": 0.048931843070451425, "grad_norm": 2.1182533101281895, "learning_rate": 9.99062312054519e-07, "loss": 0.5298, "step": 2815 }, { "epoch": 0.04894922560795425, "grad_norm": 1.313775786607654, "learning_rate": 9.990605881115081e-07, "loss": 0.3942, "step": 2816 }, { "epoch": 0.04896660814545707, "grad_norm": 2.155814662263691, "learning_rate": 9.990588625867045e-07, "loss": 0.6469, "step": 2817 }, { "epoch": 0.048983990682959895, "grad_norm": 2.507313006547122, "learning_rate": 9.990571354801137e-07, "loss": 0.5156, "step": 2818 }, { "epoch": 0.049001373220462725, "grad_norm": 2.4548477082399773, "learning_rate": 9.990554067917407e-07, "loss": 0.4153, "step": 2819 }, { "epoch": 0.04901875575796555, "grad_norm": 2.4099679799408475, "learning_rate": 9.990536765215914e-07, "loss": 0.3414, "step": 2820 }, { "epoch": 0.04903613829546837, "grad_norm": 1.6632465410710306, "learning_rate": 9.990519446696713e-07, "loss": 0.4734, "step": 2821 }, { "epoch": 0.049053520832971195, "grad_norm": 2.1940394021159753, "learning_rate": 9.990502112359857e-07, "loss": 0.318, "step": 2822 }, { "epoch": 0.04907090337047402, "grad_norm": 1.4533120860186983, "learning_rate": 9.9904847622054e-07, "loss": 0.3152, "step": 2823 }, { "epoch": 0.04908828590797685, "grad_norm": 2.364586890980901, "learning_rate": 9.990467396233401e-07, "loss": 0.29, "step": 2824 }, { "epoch": 0.04910566844547967, "grad_norm": 1.3253183518914866, "learning_rate": 9.990450014443913e-07, "loss": 0.4784, "step": 2825 }, { "epoch": 0.049123050982982495, "grad_norm": 1.8682599099049233, "learning_rate": 9.99043261683699e-07, "loss": 0.7099, "step": 2826 }, { "epoch": 0.04914043352048532, "grad_norm": 1.838578718221131, "learning_rate": 9.990415203412687e-07, "loss": 0.4348, "step": 2827 }, { "epoch": 0.04915781605798814, "grad_norm": 1.218557135455549, "learning_rate": 9.990397774171062e-07, "loss": 0.2856, "step": 2828 }, { "epoch": 0.04917519859549097, "grad_norm": 1.7025472068611573, "learning_rate": 9.990380329112169e-07, "loss": 0.4785, "step": 2829 }, { "epoch": 0.049192581132993796, "grad_norm": 2.086388491302185, "learning_rate": 9.990362868236061e-07, "loss": 0.3951, "step": 2830 }, { "epoch": 0.04920996367049662, "grad_norm": 2.730871771641944, "learning_rate": 9.990345391542796e-07, "loss": 0.5151, "step": 2831 }, { "epoch": 0.04922734620799944, "grad_norm": 1.9154541351843393, "learning_rate": 9.990327899032428e-07, "loss": 0.4806, "step": 2832 }, { "epoch": 0.049244728745502266, "grad_norm": 1.352299319369093, "learning_rate": 9.990310390705012e-07, "loss": 0.3781, "step": 2833 }, { "epoch": 0.049262111283005096, "grad_norm": 2.849760038562136, "learning_rate": 9.990292866560607e-07, "loss": 0.4937, "step": 2834 }, { "epoch": 0.04927949382050792, "grad_norm": 2.212408068071228, "learning_rate": 9.990275326599263e-07, "loss": 0.4919, "step": 2835 }, { "epoch": 0.04929687635801074, "grad_norm": 2.2254192012090885, "learning_rate": 9.99025777082104e-07, "loss": 0.6508, "step": 2836 }, { "epoch": 0.049314258895513566, "grad_norm": 2.5283816888359003, "learning_rate": 9.990240199225994e-07, "loss": 0.3519, "step": 2837 }, { "epoch": 0.04933164143301639, "grad_norm": 1.4720009781764507, "learning_rate": 9.990222611814176e-07, "loss": 0.2689, "step": 2838 }, { "epoch": 0.04934902397051922, "grad_norm": 2.2110319353451655, "learning_rate": 9.990205008585646e-07, "loss": 0.4809, "step": 2839 }, { "epoch": 0.04936640650802204, "grad_norm": 1.9033263528797006, "learning_rate": 9.99018738954046e-07, "loss": 0.3827, "step": 2840 }, { "epoch": 0.049383789045524866, "grad_norm": 2.23187003841596, "learning_rate": 9.99016975467867e-07, "loss": 0.4296, "step": 2841 }, { "epoch": 0.04940117158302769, "grad_norm": 1.7343885055274935, "learning_rate": 9.990152104000333e-07, "loss": 0.4031, "step": 2842 }, { "epoch": 0.04941855412053051, "grad_norm": 2.0626937916538384, "learning_rate": 9.990134437505509e-07, "loss": 0.439, "step": 2843 }, { "epoch": 0.04943593665803334, "grad_norm": 2.46306982489281, "learning_rate": 9.99011675519425e-07, "loss": 0.6364, "step": 2844 }, { "epoch": 0.049453319195536166, "grad_norm": 3.979408283418573, "learning_rate": 9.990099057066613e-07, "loss": 0.6487, "step": 2845 }, { "epoch": 0.04947070173303899, "grad_norm": 2.3432713503366935, "learning_rate": 9.990081343122654e-07, "loss": 0.684, "step": 2846 }, { "epoch": 0.04948808427054181, "grad_norm": 2.2535071235227946, "learning_rate": 9.99006361336243e-07, "loss": 0.3752, "step": 2847 }, { "epoch": 0.049505466808044636, "grad_norm": 1.9839942968886417, "learning_rate": 9.990045867785997e-07, "loss": 0.3221, "step": 2848 }, { "epoch": 0.049522849345547466, "grad_norm": 2.4105608689052596, "learning_rate": 9.990028106393408e-07, "loss": 0.4021, "step": 2849 }, { "epoch": 0.04954023188305029, "grad_norm": 1.9354980612471417, "learning_rate": 9.990010329184726e-07, "loss": 0.4614, "step": 2850 }, { "epoch": 0.04955761442055311, "grad_norm": 2.3477518396042596, "learning_rate": 9.98999253616e-07, "loss": 0.637, "step": 2851 }, { "epoch": 0.049574996958055936, "grad_norm": 2.897968422809475, "learning_rate": 9.989974727319292e-07, "loss": 0.3083, "step": 2852 }, { "epoch": 0.04959237949555876, "grad_norm": 1.4997459782836218, "learning_rate": 9.989956902662658e-07, "loss": 0.342, "step": 2853 }, { "epoch": 0.04960976203306158, "grad_norm": 1.7754368693627578, "learning_rate": 9.98993906219015e-07, "loss": 0.4837, "step": 2854 }, { "epoch": 0.04962714457056441, "grad_norm": 1.7878242367025219, "learning_rate": 9.989921205901828e-07, "loss": 0.3899, "step": 2855 }, { "epoch": 0.049644527108067237, "grad_norm": 2.3740247402391, "learning_rate": 9.98990333379775e-07, "loss": 0.3868, "step": 2856 }, { "epoch": 0.04966190964557006, "grad_norm": 1.6366965266222384, "learning_rate": 9.989885445877966e-07, "loss": 0.205, "step": 2857 }, { "epoch": 0.04967929218307288, "grad_norm": 1.5542792130602903, "learning_rate": 9.989867542142541e-07, "loss": 0.2598, "step": 2858 }, { "epoch": 0.049696674720575706, "grad_norm": 1.9010727437117658, "learning_rate": 9.989849622591527e-07, "loss": 0.8691, "step": 2859 }, { "epoch": 0.04971405725807854, "grad_norm": 2.2099881863582316, "learning_rate": 9.989831687224982e-07, "loss": 0.2728, "step": 2860 }, { "epoch": 0.04973143979558136, "grad_norm": 2.7751918173583308, "learning_rate": 9.989813736042961e-07, "loss": 0.4172, "step": 2861 }, { "epoch": 0.04974882233308418, "grad_norm": 1.711759042750471, "learning_rate": 9.989795769045525e-07, "loss": 0.294, "step": 2862 }, { "epoch": 0.04976620487058701, "grad_norm": 1.3620371893810594, "learning_rate": 9.989777786232728e-07, "loss": 0.5096, "step": 2863 }, { "epoch": 0.04978358740808983, "grad_norm": 2.14514706207775, "learning_rate": 9.989759787604628e-07, "loss": 0.6639, "step": 2864 }, { "epoch": 0.04980096994559266, "grad_norm": 2.920154776678062, "learning_rate": 9.98974177316128e-07, "loss": 1.0541, "step": 2865 }, { "epoch": 0.049818352483095484, "grad_norm": 1.3205640329865889, "learning_rate": 9.989723742902744e-07, "loss": 0.2981, "step": 2866 }, { "epoch": 0.04983573502059831, "grad_norm": 1.9228502851829798, "learning_rate": 9.989705696829076e-07, "loss": 0.8623, "step": 2867 }, { "epoch": 0.04985311755810113, "grad_norm": 2.3287622116081637, "learning_rate": 9.989687634940332e-07, "loss": 0.3999, "step": 2868 }, { "epoch": 0.049870500095603953, "grad_norm": 2.4792071760001724, "learning_rate": 9.98966955723657e-07, "loss": 0.5581, "step": 2869 }, { "epoch": 0.049887882633106784, "grad_norm": 3.0357899709899487, "learning_rate": 9.98965146371785e-07, "loss": 0.4888, "step": 2870 }, { "epoch": 0.04990526517060961, "grad_norm": 1.6937549714790656, "learning_rate": 9.989633354384223e-07, "loss": 0.4295, "step": 2871 }, { "epoch": 0.04992264770811243, "grad_norm": 1.8180778686668404, "learning_rate": 9.989615229235754e-07, "loss": 0.7615, "step": 2872 }, { "epoch": 0.049940030245615254, "grad_norm": 5.467949047815775, "learning_rate": 9.989597088272495e-07, "loss": 0.3862, "step": 2873 }, { "epoch": 0.04995741278311808, "grad_norm": 2.136234273866109, "learning_rate": 9.989578931494506e-07, "loss": 0.4345, "step": 2874 }, { "epoch": 0.04997479532062091, "grad_norm": 4.934796957819374, "learning_rate": 9.989560758901843e-07, "loss": 0.4116, "step": 2875 }, { "epoch": 0.04999217785812373, "grad_norm": 1.4131862827979782, "learning_rate": 9.989542570494564e-07, "loss": 0.2698, "step": 2876 }, { "epoch": 0.050009560395626554, "grad_norm": 2.0520100363421365, "learning_rate": 9.989524366272727e-07, "loss": 0.3274, "step": 2877 }, { "epoch": 0.05002694293312938, "grad_norm": 2.338948285754957, "learning_rate": 9.989506146236391e-07, "loss": 0.4381, "step": 2878 }, { "epoch": 0.0500443254706322, "grad_norm": 1.6476713479962126, "learning_rate": 9.989487910385613e-07, "loss": 0.285, "step": 2879 }, { "epoch": 0.05006170800813503, "grad_norm": 1.7683438676610106, "learning_rate": 9.989469658720449e-07, "loss": 0.3298, "step": 2880 }, { "epoch": 0.050079090545637854, "grad_norm": 1.707880377276374, "learning_rate": 9.989451391240959e-07, "loss": 0.252, "step": 2881 }, { "epoch": 0.05009647308314068, "grad_norm": 2.591529173188786, "learning_rate": 9.989433107947198e-07, "loss": 0.3131, "step": 2882 }, { "epoch": 0.0501138556206435, "grad_norm": 2.032908655804006, "learning_rate": 9.98941480883923e-07, "loss": 0.3753, "step": 2883 }, { "epoch": 0.050131238158146324, "grad_norm": 2.0568937844042425, "learning_rate": 9.989396493917106e-07, "loss": 0.3937, "step": 2884 }, { "epoch": 0.050148620695649154, "grad_norm": 2.588973606152592, "learning_rate": 9.989378163180887e-07, "loss": 0.3571, "step": 2885 }, { "epoch": 0.05016600323315198, "grad_norm": 1.732352778105997, "learning_rate": 9.989359816630632e-07, "loss": 0.5995, "step": 2886 }, { "epoch": 0.0501833857706548, "grad_norm": 1.885848036800764, "learning_rate": 9.989341454266399e-07, "loss": 0.349, "step": 2887 }, { "epoch": 0.050200768308157624, "grad_norm": 2.6068779923552574, "learning_rate": 9.989323076088243e-07, "loss": 0.446, "step": 2888 }, { "epoch": 0.05021815084566045, "grad_norm": 1.85705018157926, "learning_rate": 9.989304682096227e-07, "loss": 0.2558, "step": 2889 }, { "epoch": 0.05023553338316327, "grad_norm": 1.5166366358043681, "learning_rate": 9.989286272290407e-07, "loss": 0.499, "step": 2890 }, { "epoch": 0.0502529159206661, "grad_norm": 2.1591530421261456, "learning_rate": 9.98926784667084e-07, "loss": 0.5021, "step": 2891 }, { "epoch": 0.050270298458168924, "grad_norm": 1.2887420801560618, "learning_rate": 9.989249405237587e-07, "loss": 0.551, "step": 2892 }, { "epoch": 0.05028768099567175, "grad_norm": 2.9878620290866116, "learning_rate": 9.989230947990706e-07, "loss": 0.5197, "step": 2893 }, { "epoch": 0.05030506353317457, "grad_norm": 2.4280016933806414, "learning_rate": 9.989212474930252e-07, "loss": 0.5602, "step": 2894 }, { "epoch": 0.050322446070677394, "grad_norm": 1.92578454094921, "learning_rate": 9.989193986056287e-07, "loss": 0.7682, "step": 2895 }, { "epoch": 0.050339828608180225, "grad_norm": 2.5237108143178633, "learning_rate": 9.98917548136887e-07, "loss": 0.3548, "step": 2896 }, { "epoch": 0.05035721114568305, "grad_norm": 1.4465231772968412, "learning_rate": 9.98915696086806e-07, "loss": 0.4454, "step": 2897 }, { "epoch": 0.05037459368318587, "grad_norm": 2.5448162991319063, "learning_rate": 9.989138424553913e-07, "loss": 0.371, "step": 2898 }, { "epoch": 0.050391976220688695, "grad_norm": 2.4700157292247296, "learning_rate": 9.989119872426487e-07, "loss": 0.6516, "step": 2899 }, { "epoch": 0.05040935875819152, "grad_norm": 1.4080737177774387, "learning_rate": 9.989101304485846e-07, "loss": 0.4778, "step": 2900 }, { "epoch": 0.05042674129569435, "grad_norm": 1.9312233865222437, "learning_rate": 9.989082720732044e-07, "loss": 0.3234, "step": 2901 }, { "epoch": 0.05044412383319717, "grad_norm": 1.3916507795612372, "learning_rate": 9.98906412116514e-07, "loss": 0.2945, "step": 2902 }, { "epoch": 0.050461506370699995, "grad_norm": 1.5565963045876248, "learning_rate": 9.989045505785198e-07, "loss": 0.336, "step": 2903 }, { "epoch": 0.05047888890820282, "grad_norm": 1.389880468419804, "learning_rate": 9.989026874592272e-07, "loss": 0.4069, "step": 2904 }, { "epoch": 0.05049627144570564, "grad_norm": 6.599398208420969, "learning_rate": 9.989008227586422e-07, "loss": 0.4059, "step": 2905 }, { "epoch": 0.05051365398320847, "grad_norm": 1.5908838750307317, "learning_rate": 9.988989564767708e-07, "loss": 0.509, "step": 2906 }, { "epoch": 0.050531036520711295, "grad_norm": 2.1017645236709503, "learning_rate": 9.98897088613619e-07, "loss": 0.7424, "step": 2907 }, { "epoch": 0.05054841905821412, "grad_norm": 2.5945178424215323, "learning_rate": 9.988952191691923e-07, "loss": 0.26, "step": 2908 }, { "epoch": 0.05056580159571694, "grad_norm": 4.004474121453953, "learning_rate": 9.988933481434972e-07, "loss": 0.4446, "step": 2909 }, { "epoch": 0.050583184133219765, "grad_norm": 2.3571381166111625, "learning_rate": 9.988914755365394e-07, "loss": 0.9127, "step": 2910 }, { "epoch": 0.050600566670722595, "grad_norm": 2.0441100975929913, "learning_rate": 9.988896013483245e-07, "loss": 0.4678, "step": 2911 }, { "epoch": 0.05061794920822542, "grad_norm": 2.2833275361428487, "learning_rate": 9.98887725578859e-07, "loss": 0.5301, "step": 2912 }, { "epoch": 0.05063533174572824, "grad_norm": 2.2163481019518714, "learning_rate": 9.988858482281485e-07, "loss": 0.6129, "step": 2913 }, { "epoch": 0.050652714283231065, "grad_norm": 2.238510150241473, "learning_rate": 9.98883969296199e-07, "loss": 0.471, "step": 2914 }, { "epoch": 0.05067009682073389, "grad_norm": 2.496310065891376, "learning_rate": 9.988820887830164e-07, "loss": 0.5801, "step": 2915 }, { "epoch": 0.05068747935823672, "grad_norm": 1.678827990020343, "learning_rate": 9.988802066886071e-07, "loss": 0.5422, "step": 2916 }, { "epoch": 0.05070486189573954, "grad_norm": 1.5860386414765502, "learning_rate": 9.988783230129765e-07, "loss": 0.2783, "step": 2917 }, { "epoch": 0.050722244433242365, "grad_norm": 1.4685257306511568, "learning_rate": 9.988764377561307e-07, "loss": 0.399, "step": 2918 }, { "epoch": 0.05073962697074519, "grad_norm": 1.3731598468172599, "learning_rate": 9.988745509180758e-07, "loss": 0.477, "step": 2919 }, { "epoch": 0.05075700950824801, "grad_norm": 1.6495022964246315, "learning_rate": 9.988726624988177e-07, "loss": 0.2416, "step": 2920 }, { "epoch": 0.05077439204575084, "grad_norm": 3.6374981942010667, "learning_rate": 9.988707724983626e-07, "loss": 0.4749, "step": 2921 }, { "epoch": 0.050791774583253665, "grad_norm": 1.9807741043054647, "learning_rate": 9.98868880916716e-07, "loss": 0.9453, "step": 2922 }, { "epoch": 0.05080915712075649, "grad_norm": 1.8525746183640162, "learning_rate": 9.988669877538844e-07, "loss": 0.4783, "step": 2923 }, { "epoch": 0.05082653965825931, "grad_norm": 2.2899909328111727, "learning_rate": 9.988650930098736e-07, "loss": 0.5485, "step": 2924 }, { "epoch": 0.050843922195762135, "grad_norm": 1.8341902040273814, "learning_rate": 9.988631966846898e-07, "loss": 0.3986, "step": 2925 }, { "epoch": 0.05086130473326496, "grad_norm": 1.8479990787741494, "learning_rate": 9.988612987783385e-07, "loss": 0.2501, "step": 2926 }, { "epoch": 0.05087868727076779, "grad_norm": 1.6860650916953275, "learning_rate": 9.98859399290826e-07, "loss": 0.2865, "step": 2927 }, { "epoch": 0.05089606980827061, "grad_norm": 2.2282739613098883, "learning_rate": 9.988574982221586e-07, "loss": 0.5309, "step": 2928 }, { "epoch": 0.050913452345773436, "grad_norm": 3.8004965420976586, "learning_rate": 9.988555955723421e-07, "loss": 0.3895, "step": 2929 }, { "epoch": 0.05093083488327626, "grad_norm": 1.5838013565173692, "learning_rate": 9.988536913413826e-07, "loss": 0.5652, "step": 2930 }, { "epoch": 0.05094821742077908, "grad_norm": 2.5927348266090897, "learning_rate": 9.988517855292859e-07, "loss": 0.5451, "step": 2931 }, { "epoch": 0.05096559995828191, "grad_norm": 2.912623078827793, "learning_rate": 9.988498781360582e-07, "loss": 0.5156, "step": 2932 }, { "epoch": 0.050982982495784736, "grad_norm": 1.4991111384417732, "learning_rate": 9.988479691617055e-07, "loss": 0.7829, "step": 2933 }, { "epoch": 0.05100036503328756, "grad_norm": 1.7802108926046498, "learning_rate": 9.98846058606234e-07, "loss": 0.5709, "step": 2934 }, { "epoch": 0.05101774757079038, "grad_norm": 2.983823226909069, "learning_rate": 9.988441464696494e-07, "loss": 0.5072, "step": 2935 }, { "epoch": 0.051035130108293206, "grad_norm": 1.880645078036631, "learning_rate": 9.988422327519583e-07, "loss": 0.2658, "step": 2936 }, { "epoch": 0.051052512645796036, "grad_norm": 1.0807572518770017, "learning_rate": 9.988403174531663e-07, "loss": 0.3998, "step": 2937 }, { "epoch": 0.05106989518329886, "grad_norm": 1.1070803999591403, "learning_rate": 9.988384005732798e-07, "loss": 0.1607, "step": 2938 }, { "epoch": 0.05108727772080168, "grad_norm": 2.3459399740548106, "learning_rate": 9.988364821123046e-07, "loss": 0.4197, "step": 2939 }, { "epoch": 0.051104660258304506, "grad_norm": 1.849861387043461, "learning_rate": 9.988345620702466e-07, "loss": 0.4504, "step": 2940 }, { "epoch": 0.05112204279580733, "grad_norm": 1.497680769650306, "learning_rate": 9.988326404471126e-07, "loss": 0.1858, "step": 2941 }, { "epoch": 0.05113942533331016, "grad_norm": 1.831266787800505, "learning_rate": 9.988307172429083e-07, "loss": 0.2273, "step": 2942 }, { "epoch": 0.05115680787081298, "grad_norm": 1.562502595109236, "learning_rate": 9.988287924576395e-07, "loss": 0.5536, "step": 2943 }, { "epoch": 0.051174190408315806, "grad_norm": 2.8465616796788074, "learning_rate": 9.988268660913128e-07, "loss": 0.7899, "step": 2944 }, { "epoch": 0.05119157294581863, "grad_norm": 1.9725295781289307, "learning_rate": 9.988249381439339e-07, "loss": 0.6886, "step": 2945 }, { "epoch": 0.05120895548332145, "grad_norm": 1.8059413187016016, "learning_rate": 9.98823008615509e-07, "loss": 0.3449, "step": 2946 }, { "epoch": 0.05122633802082428, "grad_norm": 1.6978237651242543, "learning_rate": 9.988210775060448e-07, "loss": 0.5345, "step": 2947 }, { "epoch": 0.051243720558327106, "grad_norm": 1.4484824928213895, "learning_rate": 9.988191448155465e-07, "loss": 0.4262, "step": 2948 }, { "epoch": 0.05126110309582993, "grad_norm": 6.896598081720866, "learning_rate": 9.988172105440207e-07, "loss": 0.5428, "step": 2949 }, { "epoch": 0.05127848563333275, "grad_norm": 2.879165376998973, "learning_rate": 9.988152746914734e-07, "loss": 0.7008, "step": 2950 }, { "epoch": 0.051295868170835576, "grad_norm": 2.17443724706611, "learning_rate": 9.98813337257911e-07, "loss": 0.4338, "step": 2951 }, { "epoch": 0.051313250708338407, "grad_norm": 4.587812003807413, "learning_rate": 9.988113982433396e-07, "loss": 0.242, "step": 2952 }, { "epoch": 0.05133063324584123, "grad_norm": 1.370257584542745, "learning_rate": 9.988094576477648e-07, "loss": 0.5911, "step": 2953 }, { "epoch": 0.05134801578334405, "grad_norm": 1.4368131068671304, "learning_rate": 9.988075154711935e-07, "loss": 0.4054, "step": 2954 }, { "epoch": 0.051365398320846876, "grad_norm": 3.9779805596549553, "learning_rate": 9.988055717136312e-07, "loss": 0.4773, "step": 2955 }, { "epoch": 0.0513827808583497, "grad_norm": 2.1162474642525737, "learning_rate": 9.988036263750847e-07, "loss": 0.7899, "step": 2956 }, { "epoch": 0.05140016339585252, "grad_norm": 3.264850250286093, "learning_rate": 9.988016794555597e-07, "loss": 0.5075, "step": 2957 }, { "epoch": 0.05141754593335535, "grad_norm": 1.8395437319166006, "learning_rate": 9.987997309550624e-07, "loss": 0.2997, "step": 2958 }, { "epoch": 0.05143492847085818, "grad_norm": 1.3253707403977608, "learning_rate": 9.987977808735991e-07, "loss": 0.3529, "step": 2959 }, { "epoch": 0.051452311008361, "grad_norm": 1.933944916448838, "learning_rate": 9.987958292111762e-07, "loss": 0.7819, "step": 2960 }, { "epoch": 0.05146969354586382, "grad_norm": 2.6136763645648333, "learning_rate": 9.987938759677995e-07, "loss": 0.2758, "step": 2961 }, { "epoch": 0.05148707608336665, "grad_norm": 1.6214334680042788, "learning_rate": 9.987919211434752e-07, "loss": 0.2752, "step": 2962 }, { "epoch": 0.05150445862086948, "grad_norm": 1.7142687253647297, "learning_rate": 9.987899647382098e-07, "loss": 0.2873, "step": 2963 }, { "epoch": 0.0515218411583723, "grad_norm": 2.3876385212703717, "learning_rate": 9.987880067520092e-07, "loss": 0.4773, "step": 2964 }, { "epoch": 0.051539223695875123, "grad_norm": 1.7948470143587025, "learning_rate": 9.9878604718488e-07, "loss": 0.4035, "step": 2965 }, { "epoch": 0.05155660623337795, "grad_norm": 1.5925471697204134, "learning_rate": 9.987840860368279e-07, "loss": 0.3852, "step": 2966 }, { "epoch": 0.05157398877088077, "grad_norm": 1.8141747913622654, "learning_rate": 9.987821233078596e-07, "loss": 0.7508, "step": 2967 }, { "epoch": 0.0515913713083836, "grad_norm": 2.1103834708599636, "learning_rate": 9.987801589979807e-07, "loss": 0.5399, "step": 2968 }, { "epoch": 0.051608753845886424, "grad_norm": 2.090536384754198, "learning_rate": 9.987781931071982e-07, "loss": 0.481, "step": 2969 }, { "epoch": 0.05162613638338925, "grad_norm": 1.6972666462323014, "learning_rate": 9.987762256355177e-07, "loss": 0.7547, "step": 2970 }, { "epoch": 0.05164351892089207, "grad_norm": 2.3300427771637535, "learning_rate": 9.987742565829457e-07, "loss": 0.4098, "step": 2971 }, { "epoch": 0.051660901458394894, "grad_norm": 1.79742636782898, "learning_rate": 9.987722859494888e-07, "loss": 0.4491, "step": 2972 }, { "epoch": 0.051678283995897724, "grad_norm": 1.6730265028133129, "learning_rate": 9.987703137351525e-07, "loss": 0.2028, "step": 2973 }, { "epoch": 0.05169566653340055, "grad_norm": 3.610071928915733, "learning_rate": 9.987683399399435e-07, "loss": 0.2773, "step": 2974 }, { "epoch": 0.05171304907090337, "grad_norm": 1.6452467934117736, "learning_rate": 9.98766364563868e-07, "loss": 0.6722, "step": 2975 }, { "epoch": 0.051730431608406194, "grad_norm": 2.325017770604705, "learning_rate": 9.987643876069323e-07, "loss": 0.4073, "step": 2976 }, { "epoch": 0.05174781414590902, "grad_norm": 1.6990410841591395, "learning_rate": 9.987624090691424e-07, "loss": 0.3416, "step": 2977 }, { "epoch": 0.05176519668341185, "grad_norm": 1.5469182358942812, "learning_rate": 9.98760428950505e-07, "loss": 0.9511, "step": 2978 }, { "epoch": 0.05178257922091467, "grad_norm": 1.4521410354350643, "learning_rate": 9.98758447251026e-07, "loss": 0.3281, "step": 2979 }, { "epoch": 0.051799961758417494, "grad_norm": 1.9565998384325993, "learning_rate": 9.98756463970712e-07, "loss": 0.41, "step": 2980 }, { "epoch": 0.05181734429592032, "grad_norm": 1.8633370657063988, "learning_rate": 9.98754479109569e-07, "loss": 0.3579, "step": 2981 }, { "epoch": 0.05183472683342314, "grad_norm": 1.4581888539655656, "learning_rate": 9.987524926676033e-07, "loss": 0.3307, "step": 2982 }, { "epoch": 0.05185210937092597, "grad_norm": 1.8626960156713612, "learning_rate": 9.987505046448215e-07, "loss": 0.7188, "step": 2983 }, { "epoch": 0.051869491908428794, "grad_norm": 3.201294874884223, "learning_rate": 9.987485150412295e-07, "loss": 0.5011, "step": 2984 }, { "epoch": 0.05188687444593162, "grad_norm": 1.8549499374149272, "learning_rate": 9.987465238568341e-07, "loss": 0.424, "step": 2985 }, { "epoch": 0.05190425698343444, "grad_norm": 1.5711240740403678, "learning_rate": 9.98744531091641e-07, "loss": 0.4372, "step": 2986 }, { "epoch": 0.051921639520937264, "grad_norm": 2.096183693082965, "learning_rate": 9.987425367456573e-07, "loss": 0.2972, "step": 2987 }, { "epoch": 0.051939022058440094, "grad_norm": 1.5134923815694867, "learning_rate": 9.987405408188885e-07, "loss": 0.6312, "step": 2988 }, { "epoch": 0.05195640459594292, "grad_norm": 1.9405155523370228, "learning_rate": 9.987385433113414e-07, "loss": 0.3939, "step": 2989 }, { "epoch": 0.05197378713344574, "grad_norm": 2.6289805520879455, "learning_rate": 9.987365442230222e-07, "loss": 0.5072, "step": 2990 }, { "epoch": 0.051991169670948564, "grad_norm": 1.7339185884868822, "learning_rate": 9.987345435539372e-07, "loss": 0.3126, "step": 2991 }, { "epoch": 0.05200855220845139, "grad_norm": 2.6538090673643833, "learning_rate": 9.987325413040927e-07, "loss": 0.4135, "step": 2992 }, { "epoch": 0.05202593474595421, "grad_norm": 2.948326672812857, "learning_rate": 9.987305374734954e-07, "loss": 0.4455, "step": 2993 }, { "epoch": 0.05204331728345704, "grad_norm": 2.7354021994283744, "learning_rate": 9.987285320621513e-07, "loss": 0.5796, "step": 2994 }, { "epoch": 0.052060699820959865, "grad_norm": 2.195620725770374, "learning_rate": 9.987265250700668e-07, "loss": 0.3909, "step": 2995 }, { "epoch": 0.05207808235846269, "grad_norm": 1.343266804157997, "learning_rate": 9.987245164972484e-07, "loss": 0.5666, "step": 2996 }, { "epoch": 0.05209546489596551, "grad_norm": 1.8702648100734565, "learning_rate": 9.987225063437022e-07, "loss": 0.7784, "step": 2997 }, { "epoch": 0.052112847433468334, "grad_norm": 2.517409772890376, "learning_rate": 9.98720494609435e-07, "loss": 0.4255, "step": 2998 }, { "epoch": 0.052130229970971165, "grad_norm": 2.945266629878664, "learning_rate": 9.987184812944527e-07, "loss": 0.3475, "step": 2999 }, { "epoch": 0.05214761250847399, "grad_norm": 1.821068439071768, "learning_rate": 9.98716466398762e-07, "loss": 0.6131, "step": 3000 }, { "epoch": 0.05216499504597681, "grad_norm": 1.7133841815350392, "learning_rate": 9.987144499223692e-07, "loss": 0.4376, "step": 3001 }, { "epoch": 0.052182377583479635, "grad_norm": 1.556268001183871, "learning_rate": 9.987124318652807e-07, "loss": 0.6199, "step": 3002 }, { "epoch": 0.05219976012098246, "grad_norm": 2.650696246473833, "learning_rate": 9.987104122275028e-07, "loss": 0.4287, "step": 3003 }, { "epoch": 0.05221714265848529, "grad_norm": 1.5038946623334417, "learning_rate": 9.987083910090422e-07, "loss": 0.3029, "step": 3004 }, { "epoch": 0.05223452519598811, "grad_norm": 2.269012674131188, "learning_rate": 9.987063682099048e-07, "loss": 0.3919, "step": 3005 }, { "epoch": 0.052251907733490935, "grad_norm": 1.5829822811615701, "learning_rate": 9.987043438300974e-07, "loss": 0.2989, "step": 3006 }, { "epoch": 0.05226929027099376, "grad_norm": 2.9600849528514184, "learning_rate": 9.987023178696264e-07, "loss": 0.1863, "step": 3007 }, { "epoch": 0.05228667280849658, "grad_norm": 2.387778795243504, "learning_rate": 9.98700290328498e-07, "loss": 0.4115, "step": 3008 }, { "epoch": 0.05230405534599941, "grad_norm": 1.588973533120905, "learning_rate": 9.98698261206719e-07, "loss": 0.6858, "step": 3009 }, { "epoch": 0.052321437883502235, "grad_norm": 2.2574258368717133, "learning_rate": 9.986962305042954e-07, "loss": 0.6442, "step": 3010 }, { "epoch": 0.05233882042100506, "grad_norm": 1.9533838745609278, "learning_rate": 9.986941982212338e-07, "loss": 0.9615, "step": 3011 }, { "epoch": 0.05235620295850788, "grad_norm": 1.9894952468525033, "learning_rate": 9.986921643575407e-07, "loss": 0.4299, "step": 3012 }, { "epoch": 0.052373585496010705, "grad_norm": 4.714336253733726, "learning_rate": 9.986901289132227e-07, "loss": 0.7313, "step": 3013 }, { "epoch": 0.052390968033513535, "grad_norm": 2.512614375002529, "learning_rate": 9.986880918882857e-07, "loss": 0.5833, "step": 3014 }, { "epoch": 0.05240835057101636, "grad_norm": 1.7005365627071316, "learning_rate": 9.98686053282737e-07, "loss": 0.4634, "step": 3015 }, { "epoch": 0.05242573310851918, "grad_norm": 2.738665327507154, "learning_rate": 9.986840130965823e-07, "loss": 0.3343, "step": 3016 }, { "epoch": 0.052443115646022005, "grad_norm": 1.6651130714086162, "learning_rate": 9.986819713298285e-07, "loss": 0.5435, "step": 3017 }, { "epoch": 0.05246049818352483, "grad_norm": 2.6776543064334413, "learning_rate": 9.986799279824818e-07, "loss": 0.5504, "step": 3018 }, { "epoch": 0.05247788072102766, "grad_norm": 2.4274761851732256, "learning_rate": 9.98677883054549e-07, "loss": 0.7807, "step": 3019 }, { "epoch": 0.05249526325853048, "grad_norm": 1.2011533021020468, "learning_rate": 9.98675836546036e-07, "loss": 0.4465, "step": 3020 }, { "epoch": 0.052512645796033305, "grad_norm": 3.9038331828012613, "learning_rate": 9.986737884569498e-07, "loss": 0.6383, "step": 3021 }, { "epoch": 0.05253002833353613, "grad_norm": 1.8864656996682945, "learning_rate": 9.986717387872968e-07, "loss": 0.6172, "step": 3022 }, { "epoch": 0.05254741087103895, "grad_norm": 4.3339083066472694, "learning_rate": 9.986696875370836e-07, "loss": 0.452, "step": 3023 }, { "epoch": 0.05256479340854178, "grad_norm": 1.4023137757579391, "learning_rate": 9.986676347063166e-07, "loss": 0.538, "step": 3024 }, { "epoch": 0.052582175946044606, "grad_norm": 1.9976742973421138, "learning_rate": 9.986655802950022e-07, "loss": 0.4599, "step": 3025 }, { "epoch": 0.05259955848354743, "grad_norm": 1.5384837147259895, "learning_rate": 9.986635243031468e-07, "loss": 0.2667, "step": 3026 }, { "epoch": 0.05261694102105025, "grad_norm": 3.554489804614218, "learning_rate": 9.986614667307572e-07, "loss": 0.4503, "step": 3027 }, { "epoch": 0.052634323558553076, "grad_norm": 2.497304279150419, "learning_rate": 9.986594075778398e-07, "loss": 0.4792, "step": 3028 }, { "epoch": 0.0526517060960559, "grad_norm": 2.4377581164767617, "learning_rate": 9.986573468444014e-07, "loss": 0.5462, "step": 3029 }, { "epoch": 0.05266908863355873, "grad_norm": 1.6726071840888619, "learning_rate": 9.986552845304479e-07, "loss": 0.3508, "step": 3030 }, { "epoch": 0.05268647117106155, "grad_norm": 1.7797661740231752, "learning_rate": 9.986532206359864e-07, "loss": 0.6131, "step": 3031 }, { "epoch": 0.052703853708564376, "grad_norm": 1.548780596974186, "learning_rate": 9.98651155161023e-07, "loss": 0.3323, "step": 3032 }, { "epoch": 0.0527212362460672, "grad_norm": 2.252738581350986, "learning_rate": 9.98649088105565e-07, "loss": 0.5485, "step": 3033 }, { "epoch": 0.05273861878357002, "grad_norm": 4.048321012632047, "learning_rate": 9.98647019469618e-07, "loss": 0.7736, "step": 3034 }, { "epoch": 0.05275600132107285, "grad_norm": 2.157368534593953, "learning_rate": 9.986449492531891e-07, "loss": 0.5032, "step": 3035 }, { "epoch": 0.052773383858575676, "grad_norm": 2.6781380975303093, "learning_rate": 9.986428774562849e-07, "loss": 0.2957, "step": 3036 }, { "epoch": 0.0527907663960785, "grad_norm": 2.7799672550736423, "learning_rate": 9.986408040789118e-07, "loss": 0.4484, "step": 3037 }, { "epoch": 0.05280814893358132, "grad_norm": 2.2746942596713198, "learning_rate": 9.986387291210764e-07, "loss": 0.6668, "step": 3038 }, { "epoch": 0.052825531471084146, "grad_norm": 1.9360849185421893, "learning_rate": 9.986366525827854e-07, "loss": 0.5782, "step": 3039 }, { "epoch": 0.052842914008586976, "grad_norm": 2.541754277250566, "learning_rate": 9.98634574464045e-07, "loss": 0.7061, "step": 3040 }, { "epoch": 0.0528602965460898, "grad_norm": 2.903874577220666, "learning_rate": 9.986324947648623e-07, "loss": 0.3828, "step": 3041 }, { "epoch": 0.05287767908359262, "grad_norm": 1.682283462845443, "learning_rate": 9.986304134852435e-07, "loss": 0.6018, "step": 3042 }, { "epoch": 0.052895061621095446, "grad_norm": 1.2745314554473548, "learning_rate": 9.986283306251954e-07, "loss": 0.4999, "step": 3043 }, { "epoch": 0.05291244415859827, "grad_norm": 1.9718927878583574, "learning_rate": 9.986262461847245e-07, "loss": 0.3724, "step": 3044 }, { "epoch": 0.0529298266961011, "grad_norm": 3.6319038833903736, "learning_rate": 9.986241601638374e-07, "loss": 0.5062, "step": 3045 }, { "epoch": 0.05294720923360392, "grad_norm": 2.2101239459843116, "learning_rate": 9.98622072562541e-07, "loss": 0.8524, "step": 3046 }, { "epoch": 0.052964591771106746, "grad_norm": 1.6317832688754843, "learning_rate": 9.986199833808415e-07, "loss": 0.7265, "step": 3047 }, { "epoch": 0.05298197430860957, "grad_norm": 1.1687752933782551, "learning_rate": 9.986178926187457e-07, "loss": 0.4919, "step": 3048 }, { "epoch": 0.05299935684611239, "grad_norm": 1.4636321194388557, "learning_rate": 9.986158002762602e-07, "loss": 0.4092, "step": 3049 }, { "epoch": 0.05301673938361522, "grad_norm": 1.6279913327059092, "learning_rate": 9.986137063533917e-07, "loss": 0.5609, "step": 3050 }, { "epoch": 0.053034121921118046, "grad_norm": 1.840049091965851, "learning_rate": 9.986116108501467e-07, "loss": 0.2853, "step": 3051 }, { "epoch": 0.05305150445862087, "grad_norm": 1.8140414077644345, "learning_rate": 9.98609513766532e-07, "loss": 0.4125, "step": 3052 }, { "epoch": 0.05306888699612369, "grad_norm": 2.725234059441562, "learning_rate": 9.986074151025543e-07, "loss": 0.3105, "step": 3053 }, { "epoch": 0.053086269533626516, "grad_norm": 1.1285595405760127, "learning_rate": 9.9860531485822e-07, "loss": 0.4622, "step": 3054 }, { "epoch": 0.05310365207112935, "grad_norm": 2.2361860154944675, "learning_rate": 9.98603213033536e-07, "loss": 0.4879, "step": 3055 }, { "epoch": 0.05312103460863217, "grad_norm": 1.479405528787139, "learning_rate": 9.986011096285088e-07, "loss": 0.303, "step": 3056 }, { "epoch": 0.05313841714613499, "grad_norm": 1.9519727492809285, "learning_rate": 9.985990046431452e-07, "loss": 0.4732, "step": 3057 }, { "epoch": 0.05315579968363782, "grad_norm": 3.160235451254635, "learning_rate": 9.985968980774517e-07, "loss": 0.3243, "step": 3058 }, { "epoch": 0.05317318222114064, "grad_norm": 1.7586248562383577, "learning_rate": 9.98594789931435e-07, "loss": 0.7415, "step": 3059 }, { "epoch": 0.05319056475864347, "grad_norm": 2.128948435062386, "learning_rate": 9.985926802051018e-07, "loss": 0.2421, "step": 3060 }, { "epoch": 0.053207947296146293, "grad_norm": 1.4102846566357201, "learning_rate": 9.98590568898459e-07, "loss": 0.492, "step": 3061 }, { "epoch": 0.05322532983364912, "grad_norm": 1.5043517860118818, "learning_rate": 9.985884560115132e-07, "loss": 0.4816, "step": 3062 }, { "epoch": 0.05324271237115194, "grad_norm": 2.702871081721089, "learning_rate": 9.98586341544271e-07, "loss": 0.6916, "step": 3063 }, { "epoch": 0.05326009490865476, "grad_norm": 2.9338980830776666, "learning_rate": 9.98584225496739e-07, "loss": 0.6133, "step": 3064 }, { "epoch": 0.05327747744615759, "grad_norm": 7.3140671292944655, "learning_rate": 9.985821078689241e-07, "loss": 0.6896, "step": 3065 }, { "epoch": 0.05329485998366042, "grad_norm": 1.6300891769616923, "learning_rate": 9.98579988660833e-07, "loss": 0.3984, "step": 3066 }, { "epoch": 0.05331224252116324, "grad_norm": 1.1692898314937086, "learning_rate": 9.985778678724722e-07, "loss": 0.2578, "step": 3067 }, { "epoch": 0.053329625058666064, "grad_norm": 2.0139263644166103, "learning_rate": 9.985757455038487e-07, "loss": 0.5571, "step": 3068 }, { "epoch": 0.05334700759616889, "grad_norm": 2.488478835631166, "learning_rate": 9.985736215549692e-07, "loss": 0.4146, "step": 3069 }, { "epoch": 0.05336439013367171, "grad_norm": 2.432407774670031, "learning_rate": 9.985714960258402e-07, "loss": 0.3441, "step": 3070 }, { "epoch": 0.05338177267117454, "grad_norm": 1.6498793081988545, "learning_rate": 9.985693689164686e-07, "loss": 0.7313, "step": 3071 }, { "epoch": 0.053399155208677364, "grad_norm": 2.3028843454966483, "learning_rate": 9.985672402268613e-07, "loss": 0.6669, "step": 3072 }, { "epoch": 0.05341653774618019, "grad_norm": 1.9958549853291996, "learning_rate": 9.985651099570246e-07, "loss": 0.6905, "step": 3073 }, { "epoch": 0.05343392028368301, "grad_norm": 1.8930542580063927, "learning_rate": 9.985629781069657e-07, "loss": 0.3357, "step": 3074 }, { "epoch": 0.053451302821185834, "grad_norm": 2.762897680279245, "learning_rate": 9.985608446766911e-07, "loss": 0.5207, "step": 3075 }, { "epoch": 0.053468685358688664, "grad_norm": 1.34638526017277, "learning_rate": 9.985587096662076e-07, "loss": 0.4084, "step": 3076 }, { "epoch": 0.05348606789619149, "grad_norm": 1.9467822979368332, "learning_rate": 9.98556573075522e-07, "loss": 0.2645, "step": 3077 }, { "epoch": 0.05350345043369431, "grad_norm": 1.7567855062104873, "learning_rate": 9.985544349046413e-07, "loss": 0.6826, "step": 3078 }, { "epoch": 0.053520832971197134, "grad_norm": 2.273540688429571, "learning_rate": 9.985522951535719e-07, "loss": 0.4349, "step": 3079 }, { "epoch": 0.05353821550869996, "grad_norm": 10.665922644393858, "learning_rate": 9.985501538223207e-07, "loss": 0.8134, "step": 3080 }, { "epoch": 0.05355559804620279, "grad_norm": 1.9517368253779674, "learning_rate": 9.985480109108947e-07, "loss": 0.3842, "step": 3081 }, { "epoch": 0.05357298058370561, "grad_norm": 1.6095176076138444, "learning_rate": 9.985458664193004e-07, "loss": 0.2356, "step": 3082 }, { "epoch": 0.053590363121208434, "grad_norm": 2.6798917987024895, "learning_rate": 9.985437203475446e-07, "loss": 0.3546, "step": 3083 }, { "epoch": 0.05360774565871126, "grad_norm": 3.090981767635544, "learning_rate": 9.985415726956346e-07, "loss": 0.3602, "step": 3084 }, { "epoch": 0.05362512819621408, "grad_norm": 2.1831878258969994, "learning_rate": 9.985394234635765e-07, "loss": 0.5674, "step": 3085 }, { "epoch": 0.05364251073371691, "grad_norm": 3.382491473259138, "learning_rate": 9.985372726513774e-07, "loss": 1.044, "step": 3086 }, { "epoch": 0.053659893271219734, "grad_norm": 1.5258872441115516, "learning_rate": 9.985351202590445e-07, "loss": 0.6371, "step": 3087 }, { "epoch": 0.05367727580872256, "grad_norm": 2.3615490885466794, "learning_rate": 9.985329662865839e-07, "loss": 0.6202, "step": 3088 }, { "epoch": 0.05369465834622538, "grad_norm": 2.0278394335183902, "learning_rate": 9.98530810734003e-07, "loss": 0.5534, "step": 3089 }, { "epoch": 0.053712040883728204, "grad_norm": 4.09237069831266, "learning_rate": 9.985286536013084e-07, "loss": 0.4841, "step": 3090 }, { "epoch": 0.053729423421231035, "grad_norm": 1.4231740781612163, "learning_rate": 9.985264948885069e-07, "loss": 0.4767, "step": 3091 }, { "epoch": 0.05374680595873386, "grad_norm": 2.0846972295553563, "learning_rate": 9.985243345956054e-07, "loss": 0.5598, "step": 3092 }, { "epoch": 0.05376418849623668, "grad_norm": 2.2039070369883413, "learning_rate": 9.985221727226108e-07, "loss": 0.5379, "step": 3093 }, { "epoch": 0.053781571033739504, "grad_norm": 2.4443688421280085, "learning_rate": 9.9852000926953e-07, "loss": 0.4819, "step": 3094 }, { "epoch": 0.05379895357124233, "grad_norm": 2.3150380310922807, "learning_rate": 9.985178442363698e-07, "loss": 0.2585, "step": 3095 }, { "epoch": 0.05381633610874516, "grad_norm": 1.554931319310539, "learning_rate": 9.98515677623137e-07, "loss": 0.4539, "step": 3096 }, { "epoch": 0.05383371864624798, "grad_norm": 1.588818888056204, "learning_rate": 9.985135094298383e-07, "loss": 0.325, "step": 3097 }, { "epoch": 0.053851101183750805, "grad_norm": 1.580678475462708, "learning_rate": 9.98511339656481e-07, "loss": 0.3714, "step": 3098 }, { "epoch": 0.05386848372125363, "grad_norm": 2.3967066361023757, "learning_rate": 9.985091683030718e-07, "loss": 0.344, "step": 3099 }, { "epoch": 0.05388586625875645, "grad_norm": 2.350863732755497, "learning_rate": 9.985069953696173e-07, "loss": 0.3599, "step": 3100 }, { "epoch": 0.053903248796259275, "grad_norm": 3.2637074913371267, "learning_rate": 9.985048208561247e-07, "loss": 0.5158, "step": 3101 }, { "epoch": 0.053920631333762105, "grad_norm": 2.174367851788729, "learning_rate": 9.985026447626008e-07, "loss": 0.5582, "step": 3102 }, { "epoch": 0.05393801387126493, "grad_norm": 1.9251881780029045, "learning_rate": 9.985004670890528e-07, "loss": 0.6627, "step": 3103 }, { "epoch": 0.05395539640876775, "grad_norm": 1.75930719163297, "learning_rate": 9.98498287835487e-07, "loss": 0.4336, "step": 3104 }, { "epoch": 0.053972778946270575, "grad_norm": 1.785340339011558, "learning_rate": 9.984961070019107e-07, "loss": 0.4526, "step": 3105 }, { "epoch": 0.0539901614837734, "grad_norm": 1.848874617571737, "learning_rate": 9.984939245883307e-07, "loss": 0.2807, "step": 3106 }, { "epoch": 0.05400754402127623, "grad_norm": 1.5039250562705078, "learning_rate": 9.984917405947539e-07, "loss": 0.4691, "step": 3107 }, { "epoch": 0.05402492655877905, "grad_norm": 2.3995813138085573, "learning_rate": 9.984895550211874e-07, "loss": 0.4696, "step": 3108 }, { "epoch": 0.054042309096281875, "grad_norm": 2.2200649481869545, "learning_rate": 9.98487367867638e-07, "loss": 0.4791, "step": 3109 }, { "epoch": 0.0540596916337847, "grad_norm": 2.3374630490158728, "learning_rate": 9.984851791341124e-07, "loss": 0.8281, "step": 3110 }, { "epoch": 0.05407707417128752, "grad_norm": 1.6953729941408133, "learning_rate": 9.98482988820618e-07, "loss": 0.6613, "step": 3111 }, { "epoch": 0.05409445670879035, "grad_norm": 1.3627146557438896, "learning_rate": 9.984807969271613e-07, "loss": 0.3802, "step": 3112 }, { "epoch": 0.054111839246293175, "grad_norm": 2.746162020073221, "learning_rate": 9.984786034537499e-07, "loss": 0.7885, "step": 3113 }, { "epoch": 0.054129221783796, "grad_norm": 1.4737263132094522, "learning_rate": 9.984764084003897e-07, "loss": 0.3245, "step": 3114 }, { "epoch": 0.05414660432129882, "grad_norm": 1.5526967929329056, "learning_rate": 9.984742117670887e-07, "loss": 0.6815, "step": 3115 }, { "epoch": 0.054163986858801645, "grad_norm": 2.5349478382029114, "learning_rate": 9.984720135538533e-07, "loss": 0.5632, "step": 3116 }, { "epoch": 0.054181369396304475, "grad_norm": 2.0321466474994305, "learning_rate": 9.984698137606906e-07, "loss": 0.4416, "step": 3117 }, { "epoch": 0.0541987519338073, "grad_norm": 1.3965379767282369, "learning_rate": 9.984676123876077e-07, "loss": 0.3197, "step": 3118 }, { "epoch": 0.05421613447131012, "grad_norm": 1.2245215732223609, "learning_rate": 9.984654094346112e-07, "loss": 0.2003, "step": 3119 }, { "epoch": 0.054233517008812945, "grad_norm": 2.218668565332408, "learning_rate": 9.984632049017085e-07, "loss": 0.4058, "step": 3120 }, { "epoch": 0.05425089954631577, "grad_norm": 3.5977754382913294, "learning_rate": 9.984609987889064e-07, "loss": 0.4588, "step": 3121 }, { "epoch": 0.0542682820838186, "grad_norm": 2.2160382868708024, "learning_rate": 9.984587910962117e-07, "loss": 0.62, "step": 3122 }, { "epoch": 0.05428566462132142, "grad_norm": 2.809695840750753, "learning_rate": 9.98456581823632e-07, "loss": 0.6341, "step": 3123 }, { "epoch": 0.054303047158824246, "grad_norm": 2.3934391004894824, "learning_rate": 9.984543709711734e-07, "loss": 0.389, "step": 3124 }, { "epoch": 0.05432042969632707, "grad_norm": 3.6821042769850765, "learning_rate": 9.98452158538844e-07, "loss": 0.6025, "step": 3125 }, { "epoch": 0.05433781223382989, "grad_norm": 1.9444290641396385, "learning_rate": 9.984499445266497e-07, "loss": 0.9777, "step": 3126 }, { "epoch": 0.05435519477133272, "grad_norm": 2.1776334857396784, "learning_rate": 9.984477289345983e-07, "loss": 0.6091, "step": 3127 }, { "epoch": 0.054372577308835546, "grad_norm": 1.3584744254467778, "learning_rate": 9.984455117626967e-07, "loss": 0.236, "step": 3128 }, { "epoch": 0.05438995984633837, "grad_norm": 1.997162511649628, "learning_rate": 9.984432930109514e-07, "loss": 0.4839, "step": 3129 }, { "epoch": 0.05440734238384119, "grad_norm": 2.076962988616775, "learning_rate": 9.984410726793701e-07, "loss": 0.2634, "step": 3130 }, { "epoch": 0.054424724921344016, "grad_norm": 2.7277427783579795, "learning_rate": 9.984388507679596e-07, "loss": 0.4545, "step": 3131 }, { "epoch": 0.05444210745884684, "grad_norm": 1.8882703345810645, "learning_rate": 9.984366272767268e-07, "loss": 0.6184, "step": 3132 }, { "epoch": 0.05445948999634967, "grad_norm": 1.426283417078041, "learning_rate": 9.98434402205679e-07, "loss": 0.4327, "step": 3133 }, { "epoch": 0.05447687253385249, "grad_norm": 2.8417785819499395, "learning_rate": 9.984321755548229e-07, "loss": 0.4581, "step": 3134 }, { "epoch": 0.054494255071355316, "grad_norm": 1.7983576293138068, "learning_rate": 9.984299473241658e-07, "loss": 0.31, "step": 3135 }, { "epoch": 0.05451163760885814, "grad_norm": 1.8619318736664672, "learning_rate": 9.984277175137148e-07, "loss": 0.5965, "step": 3136 }, { "epoch": 0.05452902014636096, "grad_norm": 1.6473092392474562, "learning_rate": 9.984254861234768e-07, "loss": 0.5812, "step": 3137 }, { "epoch": 0.05454640268386379, "grad_norm": 5.948269530048118, "learning_rate": 9.98423253153459e-07, "loss": 0.4701, "step": 3138 }, { "epoch": 0.054563785221366616, "grad_norm": 1.8287677312578934, "learning_rate": 9.984210186036686e-07, "loss": 0.4823, "step": 3139 }, { "epoch": 0.05458116775886944, "grad_norm": 1.7214807605311948, "learning_rate": 9.984187824741125e-07, "loss": 0.2695, "step": 3140 }, { "epoch": 0.05459855029637226, "grad_norm": 2.6248783113261207, "learning_rate": 9.984165447647976e-07, "loss": 0.4995, "step": 3141 }, { "epoch": 0.054615932833875086, "grad_norm": 2.9994342426059886, "learning_rate": 9.984143054757316e-07, "loss": 0.4341, "step": 3142 }, { "epoch": 0.054633315371377916, "grad_norm": 2.3308965747514714, "learning_rate": 9.984120646069207e-07, "loss": 0.7523, "step": 3143 }, { "epoch": 0.05465069790888074, "grad_norm": 1.4608349935326364, "learning_rate": 9.984098221583728e-07, "loss": 0.5889, "step": 3144 }, { "epoch": 0.05466808044638356, "grad_norm": 2.296650364610234, "learning_rate": 9.984075781300947e-07, "loss": 0.3209, "step": 3145 }, { "epoch": 0.054685462983886386, "grad_norm": 2.564226850359483, "learning_rate": 9.984053325220937e-07, "loss": 0.6434, "step": 3146 }, { "epoch": 0.05470284552138921, "grad_norm": 2.0706247874252806, "learning_rate": 9.984030853343763e-07, "loss": 0.2425, "step": 3147 }, { "epoch": 0.05472022805889204, "grad_norm": 1.5899545426292176, "learning_rate": 9.984008365669504e-07, "loss": 0.3543, "step": 3148 }, { "epoch": 0.05473761059639486, "grad_norm": 1.1020606804197102, "learning_rate": 9.983985862198226e-07, "loss": 0.2591, "step": 3149 }, { "epoch": 0.054754993133897686, "grad_norm": 2.472437021042117, "learning_rate": 9.983963342930004e-07, "loss": 0.6421, "step": 3150 }, { "epoch": 0.05477237567140051, "grad_norm": 2.7303500567081738, "learning_rate": 9.983940807864906e-07, "loss": 0.4836, "step": 3151 }, { "epoch": 0.05478975820890333, "grad_norm": 2.429637825410918, "learning_rate": 9.983918257003007e-07, "loss": 0.2017, "step": 3152 }, { "epoch": 0.05480714074640616, "grad_norm": 2.5470228041718364, "learning_rate": 9.983895690344374e-07, "loss": 0.5077, "step": 3153 }, { "epoch": 0.05482452328390899, "grad_norm": 1.996070250998614, "learning_rate": 9.983873107889085e-07, "loss": 0.2176, "step": 3154 }, { "epoch": 0.05484190582141181, "grad_norm": 2.098363975051736, "learning_rate": 9.983850509637204e-07, "loss": 0.9362, "step": 3155 }, { "epoch": 0.05485928835891463, "grad_norm": 2.327240011237263, "learning_rate": 9.983827895588808e-07, "loss": 0.4407, "step": 3156 }, { "epoch": 0.05487667089641746, "grad_norm": 1.7764555697366828, "learning_rate": 9.983805265743965e-07, "loss": 0.5866, "step": 3157 }, { "epoch": 0.05489405343392029, "grad_norm": 2.8239304946943715, "learning_rate": 9.983782620102751e-07, "loss": 0.7149, "step": 3158 }, { "epoch": 0.05491143597142311, "grad_norm": 2.115475929156378, "learning_rate": 9.983759958665235e-07, "loss": 0.425, "step": 3159 }, { "epoch": 0.05492881850892593, "grad_norm": 2.151243344363838, "learning_rate": 9.983737281431488e-07, "loss": 0.3881, "step": 3160 }, { "epoch": 0.05494620104642876, "grad_norm": 2.810521561548817, "learning_rate": 9.983714588401585e-07, "loss": 0.8021, "step": 3161 }, { "epoch": 0.05496358358393158, "grad_norm": 3.349681702640224, "learning_rate": 9.983691879575595e-07, "loss": 0.4846, "step": 3162 }, { "epoch": 0.05498096612143441, "grad_norm": 2.206019969262441, "learning_rate": 9.98366915495359e-07, "loss": 0.3366, "step": 3163 }, { "epoch": 0.054998348658937234, "grad_norm": 2.0834174681385678, "learning_rate": 9.983646414535645e-07, "loss": 0.3412, "step": 3164 }, { "epoch": 0.05501573119644006, "grad_norm": 1.7141337333215005, "learning_rate": 9.98362365832183e-07, "loss": 0.4255, "step": 3165 }, { "epoch": 0.05503311373394288, "grad_norm": 3.0624407921953942, "learning_rate": 9.983600886312218e-07, "loss": 0.8435, "step": 3166 }, { "epoch": 0.055050496271445704, "grad_norm": 2.2822798858896927, "learning_rate": 9.983578098506877e-07, "loss": 0.5498, "step": 3167 }, { "epoch": 0.05506787880894853, "grad_norm": 1.5143174152293781, "learning_rate": 9.983555294905885e-07, "loss": 0.2401, "step": 3168 }, { "epoch": 0.05508526134645136, "grad_norm": 2.115524115059283, "learning_rate": 9.983532475509314e-07, "loss": 0.4847, "step": 3169 }, { "epoch": 0.05510264388395418, "grad_norm": 1.9714133426497888, "learning_rate": 9.983509640317231e-07, "loss": 0.2205, "step": 3170 }, { "epoch": 0.055120026421457004, "grad_norm": 2.62439718786876, "learning_rate": 9.983486789329714e-07, "loss": 0.2821, "step": 3171 }, { "epoch": 0.05513740895895983, "grad_norm": 2.0301504789462372, "learning_rate": 9.98346392254683e-07, "loss": 0.4228, "step": 3172 }, { "epoch": 0.05515479149646265, "grad_norm": 1.6214737999079916, "learning_rate": 9.983441039968657e-07, "loss": 0.2011, "step": 3173 }, { "epoch": 0.05517217403396548, "grad_norm": 1.2546278261911799, "learning_rate": 9.983418141595265e-07, "loss": 0.3141, "step": 3174 }, { "epoch": 0.055189556571468304, "grad_norm": 2.351467075636418, "learning_rate": 9.983395227426728e-07, "loss": 0.3235, "step": 3175 }, { "epoch": 0.05520693910897113, "grad_norm": 2.293374520053552, "learning_rate": 9.983372297463115e-07, "loss": 0.4501, "step": 3176 }, { "epoch": 0.05522432164647395, "grad_norm": 2.9109470415388814, "learning_rate": 9.983349351704502e-07, "loss": 0.6293, "step": 3177 }, { "epoch": 0.055241704183976774, "grad_norm": 1.490213364637478, "learning_rate": 9.98332639015096e-07, "loss": 0.3237, "step": 3178 }, { "epoch": 0.055259086721479604, "grad_norm": 3.858712563601717, "learning_rate": 9.983303412802562e-07, "loss": 0.4891, "step": 3179 }, { "epoch": 0.05527646925898243, "grad_norm": 1.7175620511787155, "learning_rate": 9.983280419659382e-07, "loss": 0.2928, "step": 3180 }, { "epoch": 0.05529385179648525, "grad_norm": 1.8134014381087002, "learning_rate": 9.983257410721493e-07, "loss": 0.4017, "step": 3181 }, { "epoch": 0.055311234333988074, "grad_norm": 4.003426639571597, "learning_rate": 9.983234385988967e-07, "loss": 0.5497, "step": 3182 }, { "epoch": 0.0553286168714909, "grad_norm": 2.727085092409296, "learning_rate": 9.983211345461876e-07, "loss": 0.3404, "step": 3183 }, { "epoch": 0.05534599940899373, "grad_norm": 1.2678545081883903, "learning_rate": 9.983188289140294e-07, "loss": 0.4861, "step": 3184 }, { "epoch": 0.05536338194649655, "grad_norm": 1.5658661845335682, "learning_rate": 9.983165217024295e-07, "loss": 0.5896, "step": 3185 }, { "epoch": 0.055380764483999374, "grad_norm": 3.082521650054783, "learning_rate": 9.983142129113953e-07, "loss": 0.3914, "step": 3186 }, { "epoch": 0.0553981470215022, "grad_norm": 2.039969009556858, "learning_rate": 9.983119025409337e-07, "loss": 0.3167, "step": 3187 }, { "epoch": 0.05541552955900502, "grad_norm": 1.5260458630244673, "learning_rate": 9.983095905910523e-07, "loss": 0.4711, "step": 3188 }, { "epoch": 0.05543291209650785, "grad_norm": 2.0220465162739196, "learning_rate": 9.983072770617585e-07, "loss": 0.3368, "step": 3189 }, { "epoch": 0.055450294634010674, "grad_norm": 2.273200117555065, "learning_rate": 9.983049619530594e-07, "loss": 0.2723, "step": 3190 }, { "epoch": 0.0554676771715135, "grad_norm": 3.2560572907490717, "learning_rate": 9.983026452649627e-07, "loss": 0.5182, "step": 3191 }, { "epoch": 0.05548505970901632, "grad_norm": 1.9165256560992163, "learning_rate": 9.983003269974755e-07, "loss": 0.4103, "step": 3192 }, { "epoch": 0.055502442246519144, "grad_norm": 1.938863284930637, "learning_rate": 9.98298007150605e-07, "loss": 0.3583, "step": 3193 }, { "epoch": 0.055519824784021975, "grad_norm": 2.949847813302236, "learning_rate": 9.982956857243588e-07, "loss": 0.5616, "step": 3194 }, { "epoch": 0.0555372073215248, "grad_norm": 1.869834807028254, "learning_rate": 9.98293362718744e-07, "loss": 0.3527, "step": 3195 }, { "epoch": 0.05555458985902762, "grad_norm": 2.048632569065003, "learning_rate": 9.982910381337685e-07, "loss": 0.3739, "step": 3196 }, { "epoch": 0.055571972396530445, "grad_norm": 1.881963408235243, "learning_rate": 9.98288711969439e-07, "loss": 0.6665, "step": 3197 }, { "epoch": 0.05558935493403327, "grad_norm": 1.3173373881086337, "learning_rate": 9.98286384225763e-07, "loss": 0.3943, "step": 3198 }, { "epoch": 0.0556067374715361, "grad_norm": 1.5448409510435526, "learning_rate": 9.982840549027484e-07, "loss": 0.3311, "step": 3199 }, { "epoch": 0.05562412000903892, "grad_norm": 1.8699842284274704, "learning_rate": 9.982817240004022e-07, "loss": 0.3981, "step": 3200 }, { "epoch": 0.055641502546541745, "grad_norm": 1.2639605488252508, "learning_rate": 9.982793915187318e-07, "loss": 0.4439, "step": 3201 }, { "epoch": 0.05565888508404457, "grad_norm": 1.9758089059801782, "learning_rate": 9.982770574577446e-07, "loss": 0.469, "step": 3202 }, { "epoch": 0.05567626762154739, "grad_norm": 1.5524629771420566, "learning_rate": 9.982747218174481e-07, "loss": 0.4994, "step": 3203 }, { "epoch": 0.055693650159050215, "grad_norm": 2.722477634191439, "learning_rate": 9.982723845978496e-07, "loss": 0.683, "step": 3204 }, { "epoch": 0.055711032696553045, "grad_norm": 3.3202347135976353, "learning_rate": 9.982700457989563e-07, "loss": 0.5122, "step": 3205 }, { "epoch": 0.05572841523405587, "grad_norm": 2.2649047160564573, "learning_rate": 9.98267705420776e-07, "loss": 0.7441, "step": 3206 }, { "epoch": 0.05574579777155869, "grad_norm": 1.8883126960016283, "learning_rate": 9.98265363463316e-07, "loss": 0.6044, "step": 3207 }, { "epoch": 0.055763180309061515, "grad_norm": 1.456456332303312, "learning_rate": 9.982630199265838e-07, "loss": 0.5243, "step": 3208 }, { "epoch": 0.05578056284656434, "grad_norm": 6.454406281243913, "learning_rate": 9.982606748105865e-07, "loss": 0.4464, "step": 3209 }, { "epoch": 0.05579794538406717, "grad_norm": 2.9775176797075993, "learning_rate": 9.982583281153319e-07, "loss": 0.2791, "step": 3210 }, { "epoch": 0.05581532792156999, "grad_norm": 2.699943917426664, "learning_rate": 9.98255979840827e-07, "loss": 0.4412, "step": 3211 }, { "epoch": 0.055832710459072815, "grad_norm": 1.483271159835467, "learning_rate": 9.982536299870798e-07, "loss": 0.2417, "step": 3212 }, { "epoch": 0.05585009299657564, "grad_norm": 1.280215020871298, "learning_rate": 9.982512785540973e-07, "loss": 0.2721, "step": 3213 }, { "epoch": 0.05586747553407846, "grad_norm": 1.8698004302383222, "learning_rate": 9.982489255418874e-07, "loss": 0.2306, "step": 3214 }, { "epoch": 0.05588485807158129, "grad_norm": 1.9104499846032297, "learning_rate": 9.982465709504571e-07, "loss": 0.3924, "step": 3215 }, { "epoch": 0.055902240609084115, "grad_norm": 3.2735211170864282, "learning_rate": 9.98244214779814e-07, "loss": 0.5512, "step": 3216 }, { "epoch": 0.05591962314658694, "grad_norm": 2.1930635767096036, "learning_rate": 9.982418570299656e-07, "loss": 0.4683, "step": 3217 }, { "epoch": 0.05593700568408976, "grad_norm": 3.472552241947036, "learning_rate": 9.982394977009194e-07, "loss": 0.415, "step": 3218 }, { "epoch": 0.055954388221592585, "grad_norm": 1.532553254773401, "learning_rate": 9.98237136792683e-07, "loss": 0.594, "step": 3219 }, { "epoch": 0.055971770759095416, "grad_norm": 2.447971281166362, "learning_rate": 9.982347743052637e-07, "loss": 0.7286, "step": 3220 }, { "epoch": 0.05598915329659824, "grad_norm": 1.8344021104452315, "learning_rate": 9.982324102386688e-07, "loss": 0.3754, "step": 3221 }, { "epoch": 0.05600653583410106, "grad_norm": 2.34243154915636, "learning_rate": 9.982300445929063e-07, "loss": 0.4766, "step": 3222 }, { "epoch": 0.056023918371603885, "grad_norm": 1.5425994023555896, "learning_rate": 9.982276773679832e-07, "loss": 0.2938, "step": 3223 }, { "epoch": 0.05604130090910671, "grad_norm": 2.403534970524258, "learning_rate": 9.982253085639075e-07, "loss": 0.6012, "step": 3224 }, { "epoch": 0.05605868344660954, "grad_norm": 2.022386836650483, "learning_rate": 9.982229381806863e-07, "loss": 0.3928, "step": 3225 }, { "epoch": 0.05607606598411236, "grad_norm": 1.9441329815851331, "learning_rate": 9.98220566218327e-07, "loss": 0.3772, "step": 3226 }, { "epoch": 0.056093448521615186, "grad_norm": 1.780268748005617, "learning_rate": 9.982181926768377e-07, "loss": 0.4761, "step": 3227 }, { "epoch": 0.05611083105911801, "grad_norm": 10.159215648949713, "learning_rate": 9.982158175562257e-07, "loss": 0.7502, "step": 3228 }, { "epoch": 0.05612821359662083, "grad_norm": 2.159684855193888, "learning_rate": 9.982134408564982e-07, "loss": 0.4045, "step": 3229 }, { "epoch": 0.05614559613412366, "grad_norm": 3.571231201724294, "learning_rate": 9.98211062577663e-07, "loss": 0.572, "step": 3230 }, { "epoch": 0.056162978671626486, "grad_norm": 2.5279796401850136, "learning_rate": 9.982086827197277e-07, "loss": 0.4968, "step": 3231 }, { "epoch": 0.05618036120912931, "grad_norm": 1.797794873263817, "learning_rate": 9.982063012826995e-07, "loss": 0.5182, "step": 3232 }, { "epoch": 0.05619774374663213, "grad_norm": 1.7502988262503545, "learning_rate": 9.982039182665863e-07, "loss": 0.7708, "step": 3233 }, { "epoch": 0.056215126284134956, "grad_norm": 1.784000725668545, "learning_rate": 9.982015336713956e-07, "loss": 0.3561, "step": 3234 }, { "epoch": 0.056232508821637786, "grad_norm": 2.177711413169622, "learning_rate": 9.981991474971347e-07, "loss": 0.5843, "step": 3235 }, { "epoch": 0.05624989135914061, "grad_norm": 2.2126157301755778, "learning_rate": 9.981967597438116e-07, "loss": 0.3158, "step": 3236 }, { "epoch": 0.05626727389664343, "grad_norm": 1.887928064424567, "learning_rate": 9.981943704114335e-07, "loss": 0.6006, "step": 3237 }, { "epoch": 0.056284656434146256, "grad_norm": 1.6938706659943663, "learning_rate": 9.98191979500008e-07, "loss": 0.3858, "step": 3238 }, { "epoch": 0.05630203897164908, "grad_norm": 2.295255888402798, "learning_rate": 9.98189587009543e-07, "loss": 0.5742, "step": 3239 }, { "epoch": 0.0563194215091519, "grad_norm": 2.1505700322660832, "learning_rate": 9.981871929400458e-07, "loss": 0.541, "step": 3240 }, { "epoch": 0.05633680404665473, "grad_norm": 2.1505223972331104, "learning_rate": 9.98184797291524e-07, "loss": 0.4138, "step": 3241 }, { "epoch": 0.056354186584157556, "grad_norm": 3.364193607394902, "learning_rate": 9.981824000639852e-07, "loss": 0.4658, "step": 3242 }, { "epoch": 0.05637156912166038, "grad_norm": 1.4051339073042295, "learning_rate": 9.98180001257437e-07, "loss": 0.7509, "step": 3243 }, { "epoch": 0.0563889516591632, "grad_norm": 2.312385073098865, "learning_rate": 9.98177600871887e-07, "loss": 0.2486, "step": 3244 }, { "epoch": 0.056406334196666026, "grad_norm": 1.196179818323407, "learning_rate": 9.981751989073432e-07, "loss": 0.6904, "step": 3245 }, { "epoch": 0.056423716734168856, "grad_norm": 1.469543065722162, "learning_rate": 9.981727953638126e-07, "loss": 0.3197, "step": 3246 }, { "epoch": 0.05644109927167168, "grad_norm": 2.36010193232571, "learning_rate": 9.98170390241303e-07, "loss": 0.6725, "step": 3247 }, { "epoch": 0.0564584818091745, "grad_norm": 1.885617532659255, "learning_rate": 9.981679835398222e-07, "loss": 0.4167, "step": 3248 }, { "epoch": 0.056475864346677326, "grad_norm": 2.188524533083164, "learning_rate": 9.981655752593778e-07, "loss": 0.8068, "step": 3249 }, { "epoch": 0.05649324688418015, "grad_norm": 2.006525008029179, "learning_rate": 9.981631653999774e-07, "loss": 0.2256, "step": 3250 }, { "epoch": 0.05651062942168298, "grad_norm": 2.8902955545318485, "learning_rate": 9.981607539616283e-07, "loss": 0.5212, "step": 3251 }, { "epoch": 0.0565280119591858, "grad_norm": 3.2594876767219545, "learning_rate": 9.981583409443387e-07, "loss": 0.4485, "step": 3252 }, { "epoch": 0.05654539449668863, "grad_norm": 2.86492682576518, "learning_rate": 9.981559263481162e-07, "loss": 0.4927, "step": 3253 }, { "epoch": 0.05656277703419145, "grad_norm": 1.7155079437696714, "learning_rate": 9.981535101729679e-07, "loss": 0.4017, "step": 3254 }, { "epoch": 0.05658015957169427, "grad_norm": 2.166021614472222, "learning_rate": 9.98151092418902e-07, "loss": 0.4799, "step": 3255 }, { "epoch": 0.0565975421091971, "grad_norm": 2.2283192914506436, "learning_rate": 9.981486730859256e-07, "loss": 0.4444, "step": 3256 }, { "epoch": 0.05661492464669993, "grad_norm": 3.2456723511535888, "learning_rate": 9.981462521740471e-07, "loss": 0.7741, "step": 3257 }, { "epoch": 0.05663230718420275, "grad_norm": 1.9293521991370108, "learning_rate": 9.981438296832736e-07, "loss": 0.3957, "step": 3258 }, { "epoch": 0.05664968972170557, "grad_norm": 1.5675527390925708, "learning_rate": 9.981414056136133e-07, "loss": 0.3545, "step": 3259 }, { "epoch": 0.0566670722592084, "grad_norm": 2.1839225878504553, "learning_rate": 9.981389799650732e-07, "loss": 0.5456, "step": 3260 }, { "epoch": 0.05668445479671123, "grad_norm": 1.971403270367202, "learning_rate": 9.981365527376616e-07, "loss": 0.5649, "step": 3261 }, { "epoch": 0.05670183733421405, "grad_norm": 3.0705040830817594, "learning_rate": 9.981341239313858e-07, "loss": 0.5419, "step": 3262 }, { "epoch": 0.056719219871716874, "grad_norm": 1.3736059552622921, "learning_rate": 9.981316935462538e-07, "loss": 0.2646, "step": 3263 }, { "epoch": 0.0567366024092197, "grad_norm": 1.521070968612602, "learning_rate": 9.98129261582273e-07, "loss": 0.3293, "step": 3264 }, { "epoch": 0.05675398494672252, "grad_norm": 1.8560863948083801, "learning_rate": 9.981268280394516e-07, "loss": 0.4488, "step": 3265 }, { "epoch": 0.05677136748422535, "grad_norm": 2.0104794080748105, "learning_rate": 9.981243929177966e-07, "loss": 0.4309, "step": 3266 }, { "epoch": 0.056788750021728174, "grad_norm": 2.307775160125904, "learning_rate": 9.981219562173163e-07, "loss": 0.6385, "step": 3267 }, { "epoch": 0.056806132559231, "grad_norm": 2.4327317859440414, "learning_rate": 9.98119517938018e-07, "loss": 0.413, "step": 3268 }, { "epoch": 0.05682351509673382, "grad_norm": 2.1125464736362374, "learning_rate": 9.981170780799099e-07, "loss": 0.6863, "step": 3269 }, { "epoch": 0.056840897634236644, "grad_norm": 2.027046330439673, "learning_rate": 9.981146366429994e-07, "loss": 0.3613, "step": 3270 }, { "epoch": 0.056858280171739474, "grad_norm": 1.7104133721218118, "learning_rate": 9.981121936272943e-07, "loss": 0.3947, "step": 3271 }, { "epoch": 0.0568756627092423, "grad_norm": 1.8760148849570286, "learning_rate": 9.981097490328023e-07, "loss": 0.6019, "step": 3272 }, { "epoch": 0.05689304524674512, "grad_norm": 1.4709539605713597, "learning_rate": 9.981073028595312e-07, "loss": 0.4754, "step": 3273 }, { "epoch": 0.056910427784247944, "grad_norm": 1.7025348161989864, "learning_rate": 9.981048551074889e-07, "loss": 0.6195, "step": 3274 }, { "epoch": 0.05692781032175077, "grad_norm": 1.6407595198757945, "learning_rate": 9.98102405776683e-07, "loss": 0.6412, "step": 3275 }, { "epoch": 0.05694519285925359, "grad_norm": 2.0842341545661602, "learning_rate": 9.980999548671212e-07, "loss": 0.3741, "step": 3276 }, { "epoch": 0.05696257539675642, "grad_norm": 2.2099752611491015, "learning_rate": 9.980975023788112e-07, "loss": 0.5634, "step": 3277 }, { "epoch": 0.056979957934259244, "grad_norm": 2.3942541277126765, "learning_rate": 9.980950483117611e-07, "loss": 0.5272, "step": 3278 }, { "epoch": 0.05699734047176207, "grad_norm": 2.2437628151737155, "learning_rate": 9.980925926659786e-07, "loss": 0.3932, "step": 3279 }, { "epoch": 0.05701472300926489, "grad_norm": 2.094794357933113, "learning_rate": 9.980901354414712e-07, "loss": 0.5683, "step": 3280 }, { "epoch": 0.057032105546767714, "grad_norm": 3.162659685363516, "learning_rate": 9.98087676638247e-07, "loss": 0.7508, "step": 3281 }, { "epoch": 0.057049488084270544, "grad_norm": 3.244624275027769, "learning_rate": 9.980852162563136e-07, "loss": 0.524, "step": 3282 }, { "epoch": 0.05706687062177337, "grad_norm": 3.4237561038442093, "learning_rate": 9.980827542956788e-07, "loss": 0.8825, "step": 3283 }, { "epoch": 0.05708425315927619, "grad_norm": 1.9314210019434777, "learning_rate": 9.980802907563508e-07, "loss": 0.5075, "step": 3284 }, { "epoch": 0.057101635696779014, "grad_norm": 1.914115472735631, "learning_rate": 9.980778256383367e-07, "loss": 0.3255, "step": 3285 }, { "epoch": 0.05711901823428184, "grad_norm": 4.14015669786956, "learning_rate": 9.980753589416448e-07, "loss": 0.4246, "step": 3286 }, { "epoch": 0.05713640077178467, "grad_norm": 2.0772787450733916, "learning_rate": 9.98072890666283e-07, "loss": 0.3892, "step": 3287 }, { "epoch": 0.05715378330928749, "grad_norm": 1.8604927430342855, "learning_rate": 9.980704208122587e-07, "loss": 0.7437, "step": 3288 }, { "epoch": 0.057171165846790314, "grad_norm": 2.56678606399723, "learning_rate": 9.980679493795801e-07, "loss": 0.3282, "step": 3289 }, { "epoch": 0.05718854838429314, "grad_norm": 1.865243038806283, "learning_rate": 9.980654763682547e-07, "loss": 0.7064, "step": 3290 }, { "epoch": 0.05720593092179596, "grad_norm": 2.1008450078964023, "learning_rate": 9.980630017782907e-07, "loss": 0.7275, "step": 3291 }, { "epoch": 0.05722331345929879, "grad_norm": 1.4686413899111954, "learning_rate": 9.980605256096957e-07, "loss": 0.3457, "step": 3292 }, { "epoch": 0.057240695996801615, "grad_norm": 1.885273738349578, "learning_rate": 9.980580478624777e-07, "loss": 0.3139, "step": 3293 }, { "epoch": 0.05725807853430444, "grad_norm": 1.8092705995609346, "learning_rate": 9.980555685366445e-07, "loss": 0.4436, "step": 3294 }, { "epoch": 0.05727546107180726, "grad_norm": 2.0313540367200957, "learning_rate": 9.980530876322038e-07, "loss": 0.4548, "step": 3295 }, { "epoch": 0.057292843609310085, "grad_norm": 3.3730573229104763, "learning_rate": 9.980506051491637e-07, "loss": 0.2319, "step": 3296 }, { "epoch": 0.057310226146812915, "grad_norm": 2.4145913999869584, "learning_rate": 9.98048121087532e-07, "loss": 0.5837, "step": 3297 }, { "epoch": 0.05732760868431574, "grad_norm": 1.7342929246918266, "learning_rate": 9.980456354473163e-07, "loss": 0.4685, "step": 3298 }, { "epoch": 0.05734499122181856, "grad_norm": 2.4336921022558045, "learning_rate": 9.980431482285249e-07, "loss": 1.1621, "step": 3299 }, { "epoch": 0.057362373759321385, "grad_norm": 1.390810032785864, "learning_rate": 9.980406594311656e-07, "loss": 0.5961, "step": 3300 }, { "epoch": 0.05737975629682421, "grad_norm": 2.6019696900722016, "learning_rate": 9.980381690552461e-07, "loss": 0.2817, "step": 3301 }, { "epoch": 0.05739713883432704, "grad_norm": 1.3979659484665663, "learning_rate": 9.980356771007744e-07, "loss": 0.3386, "step": 3302 }, { "epoch": 0.05741452137182986, "grad_norm": 2.0652143376898606, "learning_rate": 9.980331835677584e-07, "loss": 0.2315, "step": 3303 }, { "epoch": 0.057431903909332685, "grad_norm": 2.461548601053264, "learning_rate": 9.98030688456206e-07, "loss": 0.5801, "step": 3304 }, { "epoch": 0.05744928644683551, "grad_norm": 2.8551042960321493, "learning_rate": 9.980281917661249e-07, "loss": 0.4814, "step": 3305 }, { "epoch": 0.05746666898433833, "grad_norm": 1.8820621949686742, "learning_rate": 9.980256934975234e-07, "loss": 0.5807, "step": 3306 }, { "epoch": 0.057484051521841155, "grad_norm": 1.6021935427860614, "learning_rate": 9.98023193650409e-07, "loss": 0.4331, "step": 3307 }, { "epoch": 0.057501434059343985, "grad_norm": 4.165657614157099, "learning_rate": 9.980206922247902e-07, "loss": 0.5928, "step": 3308 }, { "epoch": 0.05751881659684681, "grad_norm": 1.5783224045659305, "learning_rate": 9.980181892206744e-07, "loss": 0.5272, "step": 3309 }, { "epoch": 0.05753619913434963, "grad_norm": 1.0908591040029534, "learning_rate": 9.980156846380697e-07, "loss": 0.3741, "step": 3310 }, { "epoch": 0.057553581671852455, "grad_norm": 2.429599035691564, "learning_rate": 9.980131784769841e-07, "loss": 0.455, "step": 3311 }, { "epoch": 0.05757096420935528, "grad_norm": 1.3413857732918537, "learning_rate": 9.980106707374254e-07, "loss": 0.4993, "step": 3312 }, { "epoch": 0.05758834674685811, "grad_norm": 2.894291364082265, "learning_rate": 9.980081614194017e-07, "loss": 0.9584, "step": 3313 }, { "epoch": 0.05760572928436093, "grad_norm": 2.3722895226068643, "learning_rate": 9.980056505229208e-07, "loss": 0.418, "step": 3314 }, { "epoch": 0.057623111821863755, "grad_norm": 1.391615432051983, "learning_rate": 9.98003138047991e-07, "loss": 0.5107, "step": 3315 }, { "epoch": 0.05764049435936658, "grad_norm": 1.4980187031875607, "learning_rate": 9.980006239946198e-07, "loss": 0.4024, "step": 3316 }, { "epoch": 0.0576578768968694, "grad_norm": 2.210758322861201, "learning_rate": 9.979981083628155e-07, "loss": 0.9293, "step": 3317 }, { "epoch": 0.05767525943437223, "grad_norm": 1.709929004868691, "learning_rate": 9.97995591152586e-07, "loss": 0.6865, "step": 3318 }, { "epoch": 0.057692641971875055, "grad_norm": 2.066063649923837, "learning_rate": 9.97993072363939e-07, "loss": 0.3283, "step": 3319 }, { "epoch": 0.05771002450937788, "grad_norm": 2.082839249304017, "learning_rate": 9.979905519968828e-07, "loss": 0.4031, "step": 3320 }, { "epoch": 0.0577274070468807, "grad_norm": 2.655137849226962, "learning_rate": 9.979880300514255e-07, "loss": 0.4195, "step": 3321 }, { "epoch": 0.057744789584383525, "grad_norm": 1.9023612790123177, "learning_rate": 9.979855065275746e-07, "loss": 0.3531, "step": 3322 }, { "epoch": 0.057762172121886356, "grad_norm": 1.2293555070266995, "learning_rate": 9.979829814253387e-07, "loss": 0.2124, "step": 3323 }, { "epoch": 0.05777955465938918, "grad_norm": 2.20348551316466, "learning_rate": 9.979804547447252e-07, "loss": 0.3773, "step": 3324 }, { "epoch": 0.057796937196892, "grad_norm": 1.728556615953477, "learning_rate": 9.979779264857427e-07, "loss": 0.5129, "step": 3325 }, { "epoch": 0.057814319734394826, "grad_norm": 1.6232898660890427, "learning_rate": 9.979753966483988e-07, "loss": 0.288, "step": 3326 }, { "epoch": 0.05783170227189765, "grad_norm": 2.0366345784344575, "learning_rate": 9.979728652327017e-07, "loss": 0.7034, "step": 3327 }, { "epoch": 0.05784908480940048, "grad_norm": 3.0322255432360143, "learning_rate": 9.979703322386591e-07, "loss": 0.6607, "step": 3328 }, { "epoch": 0.0578664673469033, "grad_norm": 1.5088664371930784, "learning_rate": 9.979677976662797e-07, "loss": 0.2902, "step": 3329 }, { "epoch": 0.057883849884406126, "grad_norm": 2.771689525654452, "learning_rate": 9.979652615155708e-07, "loss": 0.6413, "step": 3330 }, { "epoch": 0.05790123242190895, "grad_norm": 1.982715761668563, "learning_rate": 9.97962723786541e-07, "loss": 0.2536, "step": 3331 }, { "epoch": 0.05791861495941177, "grad_norm": 2.1541933229553463, "learning_rate": 9.97960184479198e-07, "loss": 0.537, "step": 3332 }, { "epoch": 0.0579359974969146, "grad_norm": 1.4277439400950205, "learning_rate": 9.9795764359355e-07, "loss": 0.5259, "step": 3333 }, { "epoch": 0.057953380034417426, "grad_norm": 2.6891318167229867, "learning_rate": 9.97955101129605e-07, "loss": 0.2312, "step": 3334 }, { "epoch": 0.05797076257192025, "grad_norm": 1.7556658761792827, "learning_rate": 9.97952557087371e-07, "loss": 0.2911, "step": 3335 }, { "epoch": 0.05798814510942307, "grad_norm": 1.4671207648744016, "learning_rate": 9.979500114668562e-07, "loss": 0.2686, "step": 3336 }, { "epoch": 0.058005527646925896, "grad_norm": 2.449149326152483, "learning_rate": 9.979474642680688e-07, "loss": 0.6203, "step": 3337 }, { "epoch": 0.058022910184428726, "grad_norm": 2.498650276798147, "learning_rate": 9.979449154910165e-07, "loss": 0.5791, "step": 3338 }, { "epoch": 0.05804029272193155, "grad_norm": 2.443897956163478, "learning_rate": 9.979423651357074e-07, "loss": 0.3475, "step": 3339 }, { "epoch": 0.05805767525943437, "grad_norm": 1.9747321996067801, "learning_rate": 9.979398132021498e-07, "loss": 0.2977, "step": 3340 }, { "epoch": 0.058075057796937196, "grad_norm": 3.9607162247096506, "learning_rate": 9.97937259690352e-07, "loss": 0.4646, "step": 3341 }, { "epoch": 0.05809244033444002, "grad_norm": 2.7323724149418602, "learning_rate": 9.979347046003214e-07, "loss": 0.3737, "step": 3342 }, { "epoch": 0.05810982287194284, "grad_norm": 1.7119853988818516, "learning_rate": 9.979321479320668e-07, "loss": 0.5347, "step": 3343 }, { "epoch": 0.05812720540944567, "grad_norm": 1.4894302461126185, "learning_rate": 9.97929589685596e-07, "loss": 0.6112, "step": 3344 }, { "epoch": 0.058144587946948496, "grad_norm": 3.3076085868762837, "learning_rate": 9.979270298609172e-07, "loss": 0.7871, "step": 3345 }, { "epoch": 0.05816197048445132, "grad_norm": 1.9730929601175862, "learning_rate": 9.979244684580384e-07, "loss": 0.3346, "step": 3346 }, { "epoch": 0.05817935302195414, "grad_norm": 1.2925560998343812, "learning_rate": 9.979219054769676e-07, "loss": 0.6435, "step": 3347 }, { "epoch": 0.058196735559456966, "grad_norm": 1.9132753371946183, "learning_rate": 9.97919340917713e-07, "loss": 0.6643, "step": 3348 }, { "epoch": 0.0582141180969598, "grad_norm": 1.5301147754842874, "learning_rate": 9.979167747802832e-07, "loss": 0.5725, "step": 3349 }, { "epoch": 0.05823150063446262, "grad_norm": 2.3408888361685634, "learning_rate": 9.979142070646858e-07, "loss": 0.5612, "step": 3350 }, { "epoch": 0.05824888317196544, "grad_norm": 3.3023713356458653, "learning_rate": 9.97911637770929e-07, "loss": 0.6813, "step": 3351 }, { "epoch": 0.058266265709468266, "grad_norm": 3.6506080636959037, "learning_rate": 9.97909066899021e-07, "loss": 0.4844, "step": 3352 }, { "epoch": 0.05828364824697109, "grad_norm": 1.671967556604807, "learning_rate": 9.9790649444897e-07, "loss": 0.3664, "step": 3353 }, { "epoch": 0.05830103078447392, "grad_norm": 2.0757156713128095, "learning_rate": 9.979039204207842e-07, "loss": 0.5877, "step": 3354 }, { "epoch": 0.05831841332197674, "grad_norm": 2.870565857644593, "learning_rate": 9.979013448144717e-07, "loss": 0.4371, "step": 3355 }, { "epoch": 0.05833579585947957, "grad_norm": 2.275930260336682, "learning_rate": 9.978987676300406e-07, "loss": 0.5992, "step": 3356 }, { "epoch": 0.05835317839698239, "grad_norm": 1.4802104718962052, "learning_rate": 9.978961888674991e-07, "loss": 0.3878, "step": 3357 }, { "epoch": 0.05837056093448521, "grad_norm": 2.6325905724140584, "learning_rate": 9.978936085268555e-07, "loss": 0.5479, "step": 3358 }, { "epoch": 0.058387943471988044, "grad_norm": 1.929457258585753, "learning_rate": 9.978910266081178e-07, "loss": 0.388, "step": 3359 }, { "epoch": 0.05840532600949087, "grad_norm": 1.8301132167236638, "learning_rate": 9.978884431112943e-07, "loss": 0.571, "step": 3360 }, { "epoch": 0.05842270854699369, "grad_norm": 2.0755035107404836, "learning_rate": 9.978858580363931e-07, "loss": 0.6367, "step": 3361 }, { "epoch": 0.058440091084496514, "grad_norm": 2.0532974776812893, "learning_rate": 9.978832713834224e-07, "loss": 0.4477, "step": 3362 }, { "epoch": 0.05845747362199934, "grad_norm": 1.4342432828368457, "learning_rate": 9.978806831523907e-07, "loss": 0.5115, "step": 3363 }, { "epoch": 0.05847485615950217, "grad_norm": 2.3981057253196907, "learning_rate": 9.978780933433057e-07, "loss": 0.4426, "step": 3364 }, { "epoch": 0.05849223869700499, "grad_norm": 2.092732363757424, "learning_rate": 9.978755019561757e-07, "loss": 0.518, "step": 3365 }, { "epoch": 0.058509621234507814, "grad_norm": 1.8257227687692794, "learning_rate": 9.978729089910095e-07, "loss": 0.4518, "step": 3366 }, { "epoch": 0.05852700377201064, "grad_norm": 1.8570625502235725, "learning_rate": 9.978703144478144e-07, "loss": 0.5037, "step": 3367 }, { "epoch": 0.05854438630951346, "grad_norm": 1.8741876793680434, "learning_rate": 9.978677183265996e-07, "loss": 0.4538, "step": 3368 }, { "epoch": 0.05856176884701629, "grad_norm": 1.9888882959470653, "learning_rate": 9.978651206273724e-07, "loss": 0.3755, "step": 3369 }, { "epoch": 0.058579151384519114, "grad_norm": 1.7441844909477533, "learning_rate": 9.978625213501417e-07, "loss": 0.44, "step": 3370 }, { "epoch": 0.05859653392202194, "grad_norm": 2.971966836047531, "learning_rate": 9.978599204949153e-07, "loss": 0.3138, "step": 3371 }, { "epoch": 0.05861391645952476, "grad_norm": 1.4789132446599043, "learning_rate": 9.97857318061702e-07, "loss": 0.2943, "step": 3372 }, { "epoch": 0.058631298997027584, "grad_norm": 1.330556675430827, "learning_rate": 9.978547140505095e-07, "loss": 0.2555, "step": 3373 }, { "epoch": 0.058648681534530414, "grad_norm": 2.041018965696273, "learning_rate": 9.978521084613464e-07, "loss": 0.6326, "step": 3374 }, { "epoch": 0.05866606407203324, "grad_norm": 1.670486320128263, "learning_rate": 9.978495012942206e-07, "loss": 0.3314, "step": 3375 }, { "epoch": 0.05868344660953606, "grad_norm": 2.533244227917406, "learning_rate": 9.978468925491406e-07, "loss": 0.5077, "step": 3376 }, { "epoch": 0.058700829147038884, "grad_norm": 1.9253019728639245, "learning_rate": 9.978442822261145e-07, "loss": 0.3386, "step": 3377 }, { "epoch": 0.05871821168454171, "grad_norm": 1.604392753043763, "learning_rate": 9.97841670325151e-07, "loss": 0.6604, "step": 3378 }, { "epoch": 0.05873559422204453, "grad_norm": 7.750663325534932, "learning_rate": 9.97839056846258e-07, "loss": 0.4308, "step": 3379 }, { "epoch": 0.05875297675954736, "grad_norm": 2.0990523321278216, "learning_rate": 9.978364417894438e-07, "loss": 0.4781, "step": 3380 }, { "epoch": 0.058770359297050184, "grad_norm": 2.356924018011239, "learning_rate": 9.97833825154717e-07, "loss": 0.384, "step": 3381 }, { "epoch": 0.05878774183455301, "grad_norm": 1.9767167957202971, "learning_rate": 9.978312069420854e-07, "loss": 0.6474, "step": 3382 }, { "epoch": 0.05880512437205583, "grad_norm": 1.579863604959685, "learning_rate": 9.978285871515577e-07, "loss": 0.3817, "step": 3383 }, { "epoch": 0.058822506909558654, "grad_norm": 1.5352453189762092, "learning_rate": 9.97825965783142e-07, "loss": 0.3231, "step": 3384 }, { "epoch": 0.058839889447061484, "grad_norm": 2.258881530459627, "learning_rate": 9.978233428368467e-07, "loss": 0.2572, "step": 3385 }, { "epoch": 0.05885727198456431, "grad_norm": 1.4673224329412724, "learning_rate": 9.9782071831268e-07, "loss": 0.2611, "step": 3386 }, { "epoch": 0.05887465452206713, "grad_norm": 2.8563064186613043, "learning_rate": 9.978180922106505e-07, "loss": 0.4665, "step": 3387 }, { "epoch": 0.058892037059569954, "grad_norm": 4.831232801185969, "learning_rate": 9.97815464530766e-07, "loss": 0.4138, "step": 3388 }, { "epoch": 0.05890941959707278, "grad_norm": 1.9155529740817612, "learning_rate": 9.978128352730354e-07, "loss": 0.273, "step": 3389 }, { "epoch": 0.05892680213457561, "grad_norm": 1.7976731059447404, "learning_rate": 9.978102044374669e-07, "loss": 0.418, "step": 3390 }, { "epoch": 0.05894418467207843, "grad_norm": 1.6394403934576578, "learning_rate": 9.978075720240686e-07, "loss": 0.1819, "step": 3391 }, { "epoch": 0.058961567209581255, "grad_norm": 2.0270997908841606, "learning_rate": 9.97804938032849e-07, "loss": 0.7175, "step": 3392 }, { "epoch": 0.05897894974708408, "grad_norm": 2.209260616967481, "learning_rate": 9.978023024638163e-07, "loss": 0.5057, "step": 3393 }, { "epoch": 0.0589963322845869, "grad_norm": 3.3223503916796946, "learning_rate": 9.97799665316979e-07, "loss": 0.3664, "step": 3394 }, { "epoch": 0.05901371482208973, "grad_norm": 2.841449809084264, "learning_rate": 9.977970265923454e-07, "loss": 0.5628, "step": 3395 }, { "epoch": 0.059031097359592555, "grad_norm": 1.9805614785474057, "learning_rate": 9.97794386289924e-07, "loss": 0.5804, "step": 3396 }, { "epoch": 0.05904847989709538, "grad_norm": 1.6838254948750424, "learning_rate": 9.97791744409723e-07, "loss": 0.3049, "step": 3397 }, { "epoch": 0.0590658624345982, "grad_norm": 1.6834647923334725, "learning_rate": 9.97789100951751e-07, "loss": 0.4559, "step": 3398 }, { "epoch": 0.059083244972101025, "grad_norm": 2.1955267880801554, "learning_rate": 9.97786455916016e-07, "loss": 0.4917, "step": 3399 }, { "epoch": 0.059100627509603855, "grad_norm": 2.649984912008863, "learning_rate": 9.977838093025267e-07, "loss": 0.5003, "step": 3400 }, { "epoch": 0.05911801004710668, "grad_norm": 1.748090495569205, "learning_rate": 9.977811611112914e-07, "loss": 0.3244, "step": 3401 }, { "epoch": 0.0591353925846095, "grad_norm": 1.834782160067974, "learning_rate": 9.977785113423185e-07, "loss": 0.748, "step": 3402 }, { "epoch": 0.059152775122112325, "grad_norm": 1.8380436367579864, "learning_rate": 9.977758599956163e-07, "loss": 0.3727, "step": 3403 }, { "epoch": 0.05917015765961515, "grad_norm": 1.9877789936722756, "learning_rate": 9.977732070711932e-07, "loss": 0.2893, "step": 3404 }, { "epoch": 0.05918754019711798, "grad_norm": 1.742474433010321, "learning_rate": 9.977705525690578e-07, "loss": 0.373, "step": 3405 }, { "epoch": 0.0592049227346208, "grad_norm": 1.7247707953322902, "learning_rate": 9.977678964892185e-07, "loss": 0.4583, "step": 3406 }, { "epoch": 0.059222305272123625, "grad_norm": 1.9471361161690601, "learning_rate": 9.977652388316835e-07, "loss": 0.593, "step": 3407 }, { "epoch": 0.05923968780962645, "grad_norm": 1.8730668350348811, "learning_rate": 9.977625795964614e-07, "loss": 0.5118, "step": 3408 }, { "epoch": 0.05925707034712927, "grad_norm": 1.7399505398491333, "learning_rate": 9.977599187835606e-07, "loss": 0.4385, "step": 3409 }, { "epoch": 0.0592744528846321, "grad_norm": 3.0258557672863984, "learning_rate": 9.977572563929894e-07, "loss": 1.2347, "step": 3410 }, { "epoch": 0.059291835422134925, "grad_norm": 1.8583430933149139, "learning_rate": 9.977545924247566e-07, "loss": 0.5237, "step": 3411 }, { "epoch": 0.05930921795963775, "grad_norm": 3.057713747832469, "learning_rate": 9.9775192687887e-07, "loss": 0.6242, "step": 3412 }, { "epoch": 0.05932660049714057, "grad_norm": 1.2342320514734002, "learning_rate": 9.977492597553387e-07, "loss": 0.6211, "step": 3413 }, { "epoch": 0.059343983034643395, "grad_norm": 2.01506024655584, "learning_rate": 9.977465910541708e-07, "loss": 0.4443, "step": 3414 }, { "epoch": 0.05936136557214622, "grad_norm": 2.041716559869041, "learning_rate": 9.977439207753749e-07, "loss": 0.4077, "step": 3415 }, { "epoch": 0.05937874810964905, "grad_norm": 2.5236991355776626, "learning_rate": 9.977412489189593e-07, "loss": 0.8429, "step": 3416 }, { "epoch": 0.05939613064715187, "grad_norm": 1.6576293553179702, "learning_rate": 9.977385754849328e-07, "loss": 0.6786, "step": 3417 }, { "epoch": 0.059413513184654695, "grad_norm": 2.1674645513806765, "learning_rate": 9.977359004733036e-07, "loss": 0.6181, "step": 3418 }, { "epoch": 0.05943089572215752, "grad_norm": 1.741254180990965, "learning_rate": 9.977332238840802e-07, "loss": 0.3354, "step": 3419 }, { "epoch": 0.05944827825966034, "grad_norm": 2.7776537184003476, "learning_rate": 9.97730545717271e-07, "loss": 0.6401, "step": 3420 }, { "epoch": 0.05946566079716317, "grad_norm": 2.2730580782201804, "learning_rate": 9.977278659728849e-07, "loss": 0.7297, "step": 3421 }, { "epoch": 0.059483043334665996, "grad_norm": 2.1446948615420838, "learning_rate": 9.977251846509299e-07, "loss": 0.5915, "step": 3422 }, { "epoch": 0.05950042587216882, "grad_norm": 1.6539595343502127, "learning_rate": 9.977225017514148e-07, "loss": 0.293, "step": 3423 }, { "epoch": 0.05951780840967164, "grad_norm": 1.3946413541979843, "learning_rate": 9.977198172743478e-07, "loss": 0.6691, "step": 3424 }, { "epoch": 0.059535190947174466, "grad_norm": 2.024500102060884, "learning_rate": 9.977171312197377e-07, "loss": 0.561, "step": 3425 }, { "epoch": 0.059552573484677296, "grad_norm": 2.019577964687651, "learning_rate": 9.97714443587593e-07, "loss": 0.4118, "step": 3426 }, { "epoch": 0.05956995602218012, "grad_norm": 2.5169747938218743, "learning_rate": 9.97711754377922e-07, "loss": 0.4357, "step": 3427 }, { "epoch": 0.05958733855968294, "grad_norm": 3.5172965733600647, "learning_rate": 9.977090635907336e-07, "loss": 0.4589, "step": 3428 }, { "epoch": 0.059604721097185766, "grad_norm": 1.9873463228630115, "learning_rate": 9.97706371226036e-07, "loss": 0.4253, "step": 3429 }, { "epoch": 0.05962210363468859, "grad_norm": 1.6004779156817548, "learning_rate": 9.977036772838379e-07, "loss": 0.54, "step": 3430 }, { "epoch": 0.05963948617219142, "grad_norm": 1.4335185863107809, "learning_rate": 9.977009817641474e-07, "loss": 0.5145, "step": 3431 }, { "epoch": 0.05965686870969424, "grad_norm": 2.3173444763285, "learning_rate": 9.97698284666974e-07, "loss": 0.3029, "step": 3432 }, { "epoch": 0.059674251247197066, "grad_norm": 2.2877178924774815, "learning_rate": 9.976955859923253e-07, "loss": 0.2914, "step": 3433 }, { "epoch": 0.05969163378469989, "grad_norm": 2.7731980469870865, "learning_rate": 9.976928857402104e-07, "loss": 0.4313, "step": 3434 }, { "epoch": 0.05970901632220271, "grad_norm": 1.5918032823241821, "learning_rate": 9.976901839106374e-07, "loss": 0.5316, "step": 3435 }, { "epoch": 0.05972639885970554, "grad_norm": 2.334699173448986, "learning_rate": 9.976874805036152e-07, "loss": 0.3147, "step": 3436 }, { "epoch": 0.059743781397208366, "grad_norm": 2.168564598308202, "learning_rate": 9.976847755191524e-07, "loss": 0.3365, "step": 3437 }, { "epoch": 0.05976116393471119, "grad_norm": 1.1508336593988637, "learning_rate": 9.976820689572575e-07, "loss": 0.1945, "step": 3438 }, { "epoch": 0.05977854647221401, "grad_norm": 1.5700248416731473, "learning_rate": 9.97679360817939e-07, "loss": 0.3668, "step": 3439 }, { "epoch": 0.059795929009716836, "grad_norm": 2.035458735350061, "learning_rate": 9.976766511012055e-07, "loss": 0.4514, "step": 3440 }, { "epoch": 0.059813311547219666, "grad_norm": 1.503171234373795, "learning_rate": 9.976739398070657e-07, "loss": 0.7479, "step": 3441 }, { "epoch": 0.05983069408472249, "grad_norm": 3.6488432612134547, "learning_rate": 9.97671226935528e-07, "loss": 0.3589, "step": 3442 }, { "epoch": 0.05984807662222531, "grad_norm": 2.073901899399372, "learning_rate": 9.976685124866013e-07, "loss": 0.3382, "step": 3443 }, { "epoch": 0.059865459159728136, "grad_norm": 3.067948601277482, "learning_rate": 9.97665796460294e-07, "loss": 0.5992, "step": 3444 }, { "epoch": 0.05988284169723096, "grad_norm": 1.53501510184897, "learning_rate": 9.976630788566147e-07, "loss": 0.5208, "step": 3445 }, { "epoch": 0.05990022423473379, "grad_norm": 3.962728332732352, "learning_rate": 9.976603596755722e-07, "loss": 0.5848, "step": 3446 }, { "epoch": 0.05991760677223661, "grad_norm": 1.9873287208048656, "learning_rate": 9.976576389171747e-07, "loss": 0.304, "step": 3447 }, { "epoch": 0.059934989309739437, "grad_norm": 1.6902215880668117, "learning_rate": 9.976549165814313e-07, "loss": 0.5091, "step": 3448 }, { "epoch": 0.05995237184724226, "grad_norm": 1.7342082080265206, "learning_rate": 9.976521926683503e-07, "loss": 0.5531, "step": 3449 }, { "epoch": 0.05996975438474508, "grad_norm": 2.547160244992383, "learning_rate": 9.976494671779404e-07, "loss": 0.4879, "step": 3450 }, { "epoch": 0.059987136922247906, "grad_norm": 3.3670213281984642, "learning_rate": 9.976467401102103e-07, "loss": 0.2651, "step": 3451 }, { "epoch": 0.06000451945975074, "grad_norm": 1.8680061479326207, "learning_rate": 9.976440114651688e-07, "loss": 0.628, "step": 3452 }, { "epoch": 0.06002190199725356, "grad_norm": 1.623090095704559, "learning_rate": 9.976412812428244e-07, "loss": 0.6447, "step": 3453 }, { "epoch": 0.06003928453475638, "grad_norm": 1.621899014691152, "learning_rate": 9.976385494431854e-07, "loss": 0.3359, "step": 3454 }, { "epoch": 0.06005666707225921, "grad_norm": 1.283771334044028, "learning_rate": 9.97635816066261e-07, "loss": 0.3785, "step": 3455 }, { "epoch": 0.06007404960976203, "grad_norm": 1.5262501321330593, "learning_rate": 9.976330811120597e-07, "loss": 0.5313, "step": 3456 }, { "epoch": 0.06009143214726486, "grad_norm": 2.285766488646099, "learning_rate": 9.976303445805902e-07, "loss": 0.4789, "step": 3457 }, { "epoch": 0.060108814684767684, "grad_norm": 2.2781830011141992, "learning_rate": 9.976276064718608e-07, "loss": 0.4623, "step": 3458 }, { "epoch": 0.06012619722227051, "grad_norm": 1.2330400080037554, "learning_rate": 9.97624866785881e-07, "loss": 0.3591, "step": 3459 }, { "epoch": 0.06014357975977333, "grad_norm": 1.5548532822214647, "learning_rate": 9.976221255226584e-07, "loss": 0.474, "step": 3460 }, { "epoch": 0.06016096229727615, "grad_norm": 2.2239505101968198, "learning_rate": 9.976193826822027e-07, "loss": 0.3269, "step": 3461 }, { "epoch": 0.060178344834778984, "grad_norm": 2.6887611921080823, "learning_rate": 9.976166382645216e-07, "loss": 0.4619, "step": 3462 }, { "epoch": 0.06019572737228181, "grad_norm": 2.6661616060538966, "learning_rate": 9.97613892269625e-07, "loss": 0.3849, "step": 3463 }, { "epoch": 0.06021310990978463, "grad_norm": 1.3520027333533486, "learning_rate": 9.976111446975204e-07, "loss": 0.4441, "step": 3464 }, { "epoch": 0.060230492447287454, "grad_norm": 2.4983806742897787, "learning_rate": 9.976083955482174e-07, "loss": 0.7846, "step": 3465 }, { "epoch": 0.06024787498479028, "grad_norm": 1.4415010557119523, "learning_rate": 9.976056448217244e-07, "loss": 0.3777, "step": 3466 }, { "epoch": 0.06026525752229311, "grad_norm": 3.085336556155551, "learning_rate": 9.976028925180497e-07, "loss": 0.6438, "step": 3467 }, { "epoch": 0.06028264005979593, "grad_norm": 2.567413426444372, "learning_rate": 9.976001386372026e-07, "loss": 0.5591, "step": 3468 }, { "epoch": 0.060300022597298754, "grad_norm": 2.739456150750411, "learning_rate": 9.975973831791916e-07, "loss": 0.6606, "step": 3469 }, { "epoch": 0.06031740513480158, "grad_norm": 1.642417217091182, "learning_rate": 9.975946261440259e-07, "loss": 0.4919, "step": 3470 }, { "epoch": 0.0603347876723044, "grad_norm": 2.838609658379506, "learning_rate": 9.975918675317133e-07, "loss": 0.4867, "step": 3471 }, { "epoch": 0.06035217020980723, "grad_norm": 2.0844799505140656, "learning_rate": 9.975891073422631e-07, "loss": 0.5033, "step": 3472 }, { "epoch": 0.060369552747310054, "grad_norm": 2.4220056760676987, "learning_rate": 9.975863455756841e-07, "loss": 0.6285, "step": 3473 }, { "epoch": 0.06038693528481288, "grad_norm": 1.5695280443705255, "learning_rate": 9.975835822319851e-07, "loss": 0.3693, "step": 3474 }, { "epoch": 0.0604043178223157, "grad_norm": 3.3965305389505516, "learning_rate": 9.975808173111747e-07, "loss": 0.343, "step": 3475 }, { "epoch": 0.060421700359818524, "grad_norm": 2.271597797422207, "learning_rate": 9.975780508132615e-07, "loss": 0.7055, "step": 3476 }, { "epoch": 0.060439082897321354, "grad_norm": 2.286178182976606, "learning_rate": 9.975752827382545e-07, "loss": 0.8299, "step": 3477 }, { "epoch": 0.06045646543482418, "grad_norm": 2.357478397606372, "learning_rate": 9.975725130861624e-07, "loss": 0.3709, "step": 3478 }, { "epoch": 0.060473847972327, "grad_norm": 1.3296168490756297, "learning_rate": 9.97569741856994e-07, "loss": 0.2378, "step": 3479 }, { "epoch": 0.060491230509829824, "grad_norm": 2.6263992327528554, "learning_rate": 9.975669690507582e-07, "loss": 0.3441, "step": 3480 }, { "epoch": 0.06050861304733265, "grad_norm": 2.5604490049431385, "learning_rate": 9.975641946674636e-07, "loss": 0.2485, "step": 3481 }, { "epoch": 0.06052599558483547, "grad_norm": 2.8707149645500483, "learning_rate": 9.97561418707119e-07, "loss": 0.7914, "step": 3482 }, { "epoch": 0.0605433781223383, "grad_norm": 3.485037820716208, "learning_rate": 9.975586411697335e-07, "loss": 0.7028, "step": 3483 }, { "epoch": 0.060560760659841124, "grad_norm": 1.8678767807104109, "learning_rate": 9.975558620553155e-07, "loss": 0.4063, "step": 3484 }, { "epoch": 0.06057814319734395, "grad_norm": 4.051540950369108, "learning_rate": 9.975530813638739e-07, "loss": 0.3098, "step": 3485 }, { "epoch": 0.06059552573484677, "grad_norm": 2.7431577598314694, "learning_rate": 9.975502990954176e-07, "loss": 0.6428, "step": 3486 }, { "epoch": 0.060612908272349594, "grad_norm": 2.233573480975856, "learning_rate": 9.975475152499555e-07, "loss": 0.4163, "step": 3487 }, { "epoch": 0.060630290809852425, "grad_norm": 1.933326489211788, "learning_rate": 9.975447298274965e-07, "loss": 0.4414, "step": 3488 }, { "epoch": 0.06064767334735525, "grad_norm": 1.426457912826069, "learning_rate": 9.97541942828049e-07, "loss": 0.2178, "step": 3489 }, { "epoch": 0.06066505588485807, "grad_norm": 1.914347436620128, "learning_rate": 9.97539154251622e-07, "loss": 0.8646, "step": 3490 }, { "epoch": 0.060682438422360895, "grad_norm": 1.8001504798855212, "learning_rate": 9.975363640982247e-07, "loss": 0.7078, "step": 3491 }, { "epoch": 0.06069982095986372, "grad_norm": 2.691456358730807, "learning_rate": 9.975335723678656e-07, "loss": 0.5848, "step": 3492 }, { "epoch": 0.06071720349736655, "grad_norm": 2.141957086537983, "learning_rate": 9.975307790605535e-07, "loss": 0.4561, "step": 3493 }, { "epoch": 0.06073458603486937, "grad_norm": 3.2074424068166656, "learning_rate": 9.975279841762975e-07, "loss": 0.2107, "step": 3494 }, { "epoch": 0.060751968572372195, "grad_norm": 4.27628699972749, "learning_rate": 9.975251877151063e-07, "loss": 0.5472, "step": 3495 }, { "epoch": 0.06076935110987502, "grad_norm": 1.9744873569626613, "learning_rate": 9.975223896769887e-07, "loss": 0.2544, "step": 3496 }, { "epoch": 0.06078673364737784, "grad_norm": 1.7178410326154625, "learning_rate": 9.975195900619538e-07, "loss": 0.3696, "step": 3497 }, { "epoch": 0.06080411618488067, "grad_norm": 1.6689510333078499, "learning_rate": 9.975167888700103e-07, "loss": 0.666, "step": 3498 }, { "epoch": 0.060821498722383495, "grad_norm": 2.114443187472777, "learning_rate": 9.975139861011672e-07, "loss": 0.3003, "step": 3499 }, { "epoch": 0.06083888125988632, "grad_norm": 2.8470077614272356, "learning_rate": 9.97511181755433e-07, "loss": 0.4167, "step": 3500 }, { "epoch": 0.06085626379738914, "grad_norm": 2.4921555987918236, "learning_rate": 9.975083758328174e-07, "loss": 0.7357, "step": 3501 }, { "epoch": 0.060873646334891965, "grad_norm": 1.3838697737118155, "learning_rate": 9.975055683333285e-07, "loss": 0.3808, "step": 3502 }, { "epoch": 0.060891028872394795, "grad_norm": 1.6100672837022718, "learning_rate": 9.975027592569753e-07, "loss": 0.4797, "step": 3503 }, { "epoch": 0.06090841140989762, "grad_norm": 2.2987997828373734, "learning_rate": 9.974999486037672e-07, "loss": 0.5654, "step": 3504 }, { "epoch": 0.06092579394740044, "grad_norm": 1.429460552047689, "learning_rate": 9.974971363737126e-07, "loss": 0.4003, "step": 3505 }, { "epoch": 0.060943176484903265, "grad_norm": 2.10020563733295, "learning_rate": 9.974943225668208e-07, "loss": 0.5885, "step": 3506 }, { "epoch": 0.06096055902240609, "grad_norm": 1.504461327556583, "learning_rate": 9.974915071831003e-07, "loss": 0.5334, "step": 3507 }, { "epoch": 0.06097794155990892, "grad_norm": 2.1877388438261662, "learning_rate": 9.974886902225605e-07, "loss": 0.8573, "step": 3508 }, { "epoch": 0.06099532409741174, "grad_norm": 2.0236681881238385, "learning_rate": 9.9748587168521e-07, "loss": 0.8501, "step": 3509 }, { "epoch": 0.061012706634914565, "grad_norm": 1.7281353456226238, "learning_rate": 9.974830515710576e-07, "loss": 0.6092, "step": 3510 }, { "epoch": 0.06103008917241739, "grad_norm": 1.6752723105791785, "learning_rate": 9.974802298801127e-07, "loss": 0.4499, "step": 3511 }, { "epoch": 0.06104747170992021, "grad_norm": 1.8306343556755793, "learning_rate": 9.974774066123838e-07, "loss": 0.4795, "step": 3512 }, { "epoch": 0.06106485424742304, "grad_norm": 1.5528737398182204, "learning_rate": 9.974745817678803e-07, "loss": 0.5806, "step": 3513 }, { "epoch": 0.061082236784925865, "grad_norm": 3.717654455732081, "learning_rate": 9.974717553466104e-07, "loss": 0.3042, "step": 3514 }, { "epoch": 0.06109961932242869, "grad_norm": 2.9781606939382366, "learning_rate": 9.974689273485842e-07, "loss": 0.4674, "step": 3515 }, { "epoch": 0.06111700185993151, "grad_norm": 1.6160255056017072, "learning_rate": 9.974660977738096e-07, "loss": 0.4245, "step": 3516 }, { "epoch": 0.061134384397434335, "grad_norm": 1.890015379448377, "learning_rate": 9.974632666222962e-07, "loss": 0.5051, "step": 3517 }, { "epoch": 0.06115176693493716, "grad_norm": 2.4075487105117532, "learning_rate": 9.974604338940527e-07, "loss": 0.3655, "step": 3518 }, { "epoch": 0.06116914947243999, "grad_norm": 1.624477296924059, "learning_rate": 9.97457599589088e-07, "loss": 0.3198, "step": 3519 }, { "epoch": 0.06118653200994281, "grad_norm": 1.490637909250099, "learning_rate": 9.974547637074113e-07, "loss": 0.4739, "step": 3520 }, { "epoch": 0.061203914547445636, "grad_norm": 1.9646346982432854, "learning_rate": 9.974519262490316e-07, "loss": 0.6482, "step": 3521 }, { "epoch": 0.06122129708494846, "grad_norm": 2.908143043631489, "learning_rate": 9.974490872139576e-07, "loss": 0.446, "step": 3522 }, { "epoch": 0.06123867962245128, "grad_norm": 2.9738317871523576, "learning_rate": 9.974462466021987e-07, "loss": 0.7577, "step": 3523 }, { "epoch": 0.06125606215995411, "grad_norm": 1.6226766363532954, "learning_rate": 9.974434044137638e-07, "loss": 0.7141, "step": 3524 }, { "epoch": 0.061273444697456936, "grad_norm": 2.5073848978667774, "learning_rate": 9.974405606486616e-07, "loss": 0.3564, "step": 3525 }, { "epoch": 0.06129082723495976, "grad_norm": 2.9542157546546166, "learning_rate": 9.974377153069013e-07, "loss": 0.5707, "step": 3526 }, { "epoch": 0.06130820977246258, "grad_norm": 1.8760568312541797, "learning_rate": 9.974348683884921e-07, "loss": 0.3991, "step": 3527 }, { "epoch": 0.061325592309965406, "grad_norm": 1.633070444973766, "learning_rate": 9.97432019893443e-07, "loss": 0.4677, "step": 3528 }, { "epoch": 0.061342974847468236, "grad_norm": 1.9164431036802014, "learning_rate": 9.974291698217626e-07, "loss": 0.6739, "step": 3529 }, { "epoch": 0.06136035738497106, "grad_norm": 1.5511546870709474, "learning_rate": 9.974263181734604e-07, "loss": 0.6842, "step": 3530 }, { "epoch": 0.06137773992247388, "grad_norm": 2.707532042437973, "learning_rate": 9.974234649485451e-07, "loss": 0.7546, "step": 3531 }, { "epoch": 0.061395122459976706, "grad_norm": 2.0903364107783324, "learning_rate": 9.974206101470258e-07, "loss": 0.2858, "step": 3532 }, { "epoch": 0.06141250499747953, "grad_norm": 1.8941167130629888, "learning_rate": 9.97417753768912e-07, "loss": 0.3489, "step": 3533 }, { "epoch": 0.06142988753498236, "grad_norm": 3.5650686130230422, "learning_rate": 9.974148958142124e-07, "loss": 0.3685, "step": 3534 }, { "epoch": 0.06144727007248518, "grad_norm": 2.5802212127888375, "learning_rate": 9.97412036282936e-07, "loss": 0.3566, "step": 3535 }, { "epoch": 0.061464652609988006, "grad_norm": 2.4907959884974535, "learning_rate": 9.97409175175092e-07, "loss": 0.6333, "step": 3536 }, { "epoch": 0.06148203514749083, "grad_norm": 2.2633774485793325, "learning_rate": 9.974063124906891e-07, "loss": 0.3739, "step": 3537 }, { "epoch": 0.06149941768499365, "grad_norm": 1.8528115574125241, "learning_rate": 9.97403448229737e-07, "loss": 0.6223, "step": 3538 }, { "epoch": 0.06151680022249648, "grad_norm": 1.6395908995311745, "learning_rate": 9.974005823922444e-07, "loss": 0.3791, "step": 3539 }, { "epoch": 0.061534182759999306, "grad_norm": 1.9082192564086558, "learning_rate": 9.973977149782203e-07, "loss": 0.5743, "step": 3540 }, { "epoch": 0.06155156529750213, "grad_norm": 1.9751680388860386, "learning_rate": 9.97394845987674e-07, "loss": 0.5698, "step": 3541 }, { "epoch": 0.06156894783500495, "grad_norm": 2.4832887556448333, "learning_rate": 9.973919754206145e-07, "loss": 0.355, "step": 3542 }, { "epoch": 0.061586330372507776, "grad_norm": 1.555681483040838, "learning_rate": 9.97389103277051e-07, "loss": 0.3661, "step": 3543 }, { "epoch": 0.061603712910010607, "grad_norm": 2.8583549880904218, "learning_rate": 9.973862295569927e-07, "loss": 0.4342, "step": 3544 }, { "epoch": 0.06162109544751343, "grad_norm": 4.260733518395719, "learning_rate": 9.973833542604482e-07, "loss": 0.9569, "step": 3545 }, { "epoch": 0.06163847798501625, "grad_norm": 1.8995967196959826, "learning_rate": 9.973804773874271e-07, "loss": 0.709, "step": 3546 }, { "epoch": 0.061655860522519076, "grad_norm": 1.796603436361639, "learning_rate": 9.973775989379382e-07, "loss": 0.2961, "step": 3547 }, { "epoch": 0.0616732430600219, "grad_norm": 2.2348587923662335, "learning_rate": 9.97374718911991e-07, "loss": 0.4459, "step": 3548 }, { "epoch": 0.06169062559752473, "grad_norm": 1.8826608270751395, "learning_rate": 9.973718373095943e-07, "loss": 0.5719, "step": 3549 }, { "epoch": 0.06170800813502755, "grad_norm": 2.8820570382628623, "learning_rate": 9.973689541307575e-07, "loss": 0.5218, "step": 3550 }, { "epoch": 0.06172539067253038, "grad_norm": 2.445212123199305, "learning_rate": 9.973660693754895e-07, "loss": 0.2677, "step": 3551 }, { "epoch": 0.0617427732100332, "grad_norm": 1.959740077194305, "learning_rate": 9.973631830437996e-07, "loss": 0.5878, "step": 3552 }, { "epoch": 0.06176015574753602, "grad_norm": 2.7390845677878803, "learning_rate": 9.973602951356968e-07, "loss": 0.4289, "step": 3553 }, { "epoch": 0.06177753828503885, "grad_norm": 1.4553574353735113, "learning_rate": 9.973574056511902e-07, "loss": 0.4535, "step": 3554 }, { "epoch": 0.06179492082254168, "grad_norm": 1.7735333911006117, "learning_rate": 9.973545145902894e-07, "loss": 0.5375, "step": 3555 }, { "epoch": 0.0618123033600445, "grad_norm": 1.412488317708173, "learning_rate": 9.973516219530028e-07, "loss": 0.4347, "step": 3556 }, { "epoch": 0.061829685897547323, "grad_norm": 1.6467365359851478, "learning_rate": 9.973487277393403e-07, "loss": 0.3702, "step": 3557 }, { "epoch": 0.06184706843505015, "grad_norm": 2.5848503992685847, "learning_rate": 9.973458319493108e-07, "loss": 0.3244, "step": 3558 }, { "epoch": 0.06186445097255297, "grad_norm": 2.5296233474916403, "learning_rate": 9.973429345829237e-07, "loss": 0.5718, "step": 3559 }, { "epoch": 0.0618818335100558, "grad_norm": 1.620280552925221, "learning_rate": 9.973400356401875e-07, "loss": 0.3784, "step": 3560 }, { "epoch": 0.061899216047558624, "grad_norm": 2.515625298984929, "learning_rate": 9.973371351211123e-07, "loss": 0.6129, "step": 3561 }, { "epoch": 0.06191659858506145, "grad_norm": 1.5396531502892234, "learning_rate": 9.973342330257063e-07, "loss": 0.7471, "step": 3562 }, { "epoch": 0.06193398112256427, "grad_norm": 1.9264034167280635, "learning_rate": 9.973313293539797e-07, "loss": 0.4985, "step": 3563 }, { "epoch": 0.061951363660067094, "grad_norm": 2.451414146689782, "learning_rate": 9.973284241059411e-07, "loss": 0.4734, "step": 3564 }, { "epoch": 0.061968746197569924, "grad_norm": 2.223961383173772, "learning_rate": 9.973255172815998e-07, "loss": 0.6127, "step": 3565 }, { "epoch": 0.06198612873507275, "grad_norm": 14.02286097459673, "learning_rate": 9.973226088809651e-07, "loss": 0.4587, "step": 3566 }, { "epoch": 0.06200351127257557, "grad_norm": 2.227659089620706, "learning_rate": 9.973196989040462e-07, "loss": 0.2696, "step": 3567 }, { "epoch": 0.062020893810078394, "grad_norm": 2.079850750407993, "learning_rate": 9.973167873508522e-07, "loss": 0.2675, "step": 3568 }, { "epoch": 0.06203827634758122, "grad_norm": 2.4211177017290058, "learning_rate": 9.973138742213927e-07, "loss": 0.3215, "step": 3569 }, { "epoch": 0.06205565888508405, "grad_norm": 2.5005206699390747, "learning_rate": 9.973109595156764e-07, "loss": 0.5422, "step": 3570 }, { "epoch": 0.06207304142258687, "grad_norm": 2.0976406551909172, "learning_rate": 9.97308043233713e-07, "loss": 0.3615, "step": 3571 }, { "epoch": 0.062090423960089694, "grad_norm": 1.801421271954025, "learning_rate": 9.973051253755113e-07, "loss": 0.3011, "step": 3572 }, { "epoch": 0.06210780649759252, "grad_norm": 1.743450483786267, "learning_rate": 9.97302205941081e-07, "loss": 0.5076, "step": 3573 }, { "epoch": 0.06212518903509534, "grad_norm": 4.090321748566882, "learning_rate": 9.97299284930431e-07, "loss": 0.2882, "step": 3574 }, { "epoch": 0.06214257157259817, "grad_norm": 5.688505218065706, "learning_rate": 9.972963623435709e-07, "loss": 0.7146, "step": 3575 }, { "epoch": 0.062159954110100994, "grad_norm": 2.187088944652882, "learning_rate": 9.972934381805096e-07, "loss": 0.3288, "step": 3576 }, { "epoch": 0.06217733664760382, "grad_norm": 1.6320928663477947, "learning_rate": 9.972905124412566e-07, "loss": 0.7787, "step": 3577 }, { "epoch": 0.06219471918510664, "grad_norm": 2.629676717259999, "learning_rate": 9.972875851258212e-07, "loss": 0.3631, "step": 3578 }, { "epoch": 0.062212101722609464, "grad_norm": 1.6315533560729134, "learning_rate": 9.972846562342124e-07, "loss": 0.3874, "step": 3579 }, { "epoch": 0.062229484260112294, "grad_norm": 1.88528109014697, "learning_rate": 9.9728172576644e-07, "loss": 0.2737, "step": 3580 }, { "epoch": 0.06224686679761512, "grad_norm": 1.5790439042519948, "learning_rate": 9.972787937225125e-07, "loss": 0.6661, "step": 3581 }, { "epoch": 0.06226424933511794, "grad_norm": 2.7007397424368036, "learning_rate": 9.9727586010244e-07, "loss": 0.9575, "step": 3582 }, { "epoch": 0.062281631872620764, "grad_norm": 2.228931996397419, "learning_rate": 9.972729249062312e-07, "loss": 0.5627, "step": 3583 }, { "epoch": 0.06229901441012359, "grad_norm": 2.561398790640699, "learning_rate": 9.97269988133896e-07, "loss": 0.5949, "step": 3584 }, { "epoch": 0.06231639694762642, "grad_norm": 3.147896437556213, "learning_rate": 9.97267049785443e-07, "loss": 0.7463, "step": 3585 }, { "epoch": 0.06233377948512924, "grad_norm": 2.652097656240605, "learning_rate": 9.97264109860882e-07, "loss": 0.3175, "step": 3586 }, { "epoch": 0.062351162022632065, "grad_norm": 2.7181524962720616, "learning_rate": 9.972611683602223e-07, "loss": 0.3554, "step": 3587 }, { "epoch": 0.06236854456013489, "grad_norm": 2.9850871207085543, "learning_rate": 9.97258225283473e-07, "loss": 0.4104, "step": 3588 }, { "epoch": 0.06238592709763771, "grad_norm": 1.60742490320002, "learning_rate": 9.972552806306437e-07, "loss": 0.49, "step": 3589 }, { "epoch": 0.062403309635140534, "grad_norm": 2.290654396925268, "learning_rate": 9.972523344017436e-07, "loss": 0.3271, "step": 3590 }, { "epoch": 0.062420692172643365, "grad_norm": 2.226498769105111, "learning_rate": 9.972493865967819e-07, "loss": 0.397, "step": 3591 }, { "epoch": 0.06243807471014619, "grad_norm": 8.409325984005209, "learning_rate": 9.97246437215768e-07, "loss": 0.5872, "step": 3592 }, { "epoch": 0.06245545724764901, "grad_norm": 1.4864997948130947, "learning_rate": 9.972434862587113e-07, "loss": 0.7748, "step": 3593 }, { "epoch": 0.062472839785151835, "grad_norm": 2.4857011214319398, "learning_rate": 9.972405337256213e-07, "loss": 0.9051, "step": 3594 }, { "epoch": 0.06249022232265466, "grad_norm": 1.5850358870186771, "learning_rate": 9.972375796165072e-07, "loss": 0.5083, "step": 3595 }, { "epoch": 0.06250760486015748, "grad_norm": 1.525758057405747, "learning_rate": 9.972346239313782e-07, "loss": 0.5147, "step": 3596 }, { "epoch": 0.06252498739766031, "grad_norm": 2.459411096331208, "learning_rate": 9.97231666670244e-07, "loss": 0.2857, "step": 3597 }, { "epoch": 0.06254236993516314, "grad_norm": 2.0702850476386305, "learning_rate": 9.972287078331139e-07, "loss": 0.4715, "step": 3598 }, { "epoch": 0.06255975247266596, "grad_norm": 2.3271020860915757, "learning_rate": 9.972257474199971e-07, "loss": 0.7053, "step": 3599 }, { "epoch": 0.06257713501016879, "grad_norm": 1.7724750582011413, "learning_rate": 9.972227854309032e-07, "loss": 0.4032, "step": 3600 }, { "epoch": 0.0625945175476716, "grad_norm": 1.8584861145996014, "learning_rate": 9.972198218658415e-07, "loss": 0.3538, "step": 3601 }, { "epoch": 0.06261190008517444, "grad_norm": 2.3134960692164834, "learning_rate": 9.972168567248212e-07, "loss": 0.3673, "step": 3602 }, { "epoch": 0.06262928262267727, "grad_norm": 1.9332323659100419, "learning_rate": 9.97213890007852e-07, "loss": 0.4751, "step": 3603 }, { "epoch": 0.06264666516018008, "grad_norm": 2.309203887616271, "learning_rate": 9.97210921714943e-07, "loss": 0.3591, "step": 3604 }, { "epoch": 0.06266404769768291, "grad_norm": 1.7808276683199564, "learning_rate": 9.972079518461039e-07, "loss": 0.2646, "step": 3605 }, { "epoch": 0.06268143023518573, "grad_norm": 1.688176522793968, "learning_rate": 9.97204980401344e-07, "loss": 0.4081, "step": 3606 }, { "epoch": 0.06269881277268856, "grad_norm": 1.9691361537870793, "learning_rate": 9.972020073806726e-07, "loss": 0.5717, "step": 3607 }, { "epoch": 0.06271619531019137, "grad_norm": 1.6769259515643415, "learning_rate": 9.971990327840993e-07, "loss": 0.3466, "step": 3608 }, { "epoch": 0.0627335778476942, "grad_norm": 3.1070509975679896, "learning_rate": 9.971960566116334e-07, "loss": 0.6447, "step": 3609 }, { "epoch": 0.06275096038519704, "grad_norm": 1.4750420063385585, "learning_rate": 9.971930788632846e-07, "loss": 0.4725, "step": 3610 }, { "epoch": 0.06276834292269985, "grad_norm": 1.4272094475292796, "learning_rate": 9.971900995390619e-07, "loss": 0.4955, "step": 3611 }, { "epoch": 0.06278572546020268, "grad_norm": 2.192597422465564, "learning_rate": 9.97187118638975e-07, "loss": 0.5106, "step": 3612 }, { "epoch": 0.0628031079977055, "grad_norm": 1.014759738553794, "learning_rate": 9.971841361630335e-07, "loss": 0.2458, "step": 3613 }, { "epoch": 0.06282049053520833, "grad_norm": 1.9599401832337406, "learning_rate": 9.971811521112467e-07, "loss": 0.457, "step": 3614 }, { "epoch": 0.06283787307271116, "grad_norm": 2.511352716351271, "learning_rate": 9.971781664836238e-07, "loss": 0.874, "step": 3615 }, { "epoch": 0.06285525561021398, "grad_norm": 2.131958550078761, "learning_rate": 9.971751792801746e-07, "loss": 0.408, "step": 3616 }, { "epoch": 0.0628726381477168, "grad_norm": 1.4382226467430448, "learning_rate": 9.971721905009085e-07, "loss": 0.3187, "step": 3617 }, { "epoch": 0.06289002068521962, "grad_norm": 1.9151075251971812, "learning_rate": 9.97169200145835e-07, "loss": 0.3205, "step": 3618 }, { "epoch": 0.06290740322272245, "grad_norm": 1.8168170768733332, "learning_rate": 9.971662082149634e-07, "loss": 0.4135, "step": 3619 }, { "epoch": 0.06292478576022528, "grad_norm": 1.6833750179441767, "learning_rate": 9.971632147083033e-07, "loss": 0.6178, "step": 3620 }, { "epoch": 0.0629421682977281, "grad_norm": 3.278078172762467, "learning_rate": 9.971602196258641e-07, "loss": 0.4867, "step": 3621 }, { "epoch": 0.06295955083523093, "grad_norm": 1.5933648130012275, "learning_rate": 9.971572229676555e-07, "loss": 0.558, "step": 3622 }, { "epoch": 0.06297693337273375, "grad_norm": 2.1932764419567126, "learning_rate": 9.97154224733687e-07, "loss": 0.5333, "step": 3623 }, { "epoch": 0.06299431591023658, "grad_norm": 1.6892294441225129, "learning_rate": 9.971512249239677e-07, "loss": 0.3711, "step": 3624 }, { "epoch": 0.0630116984477394, "grad_norm": 1.7941264466271127, "learning_rate": 9.971482235385076e-07, "loss": 0.7654, "step": 3625 }, { "epoch": 0.06302908098524222, "grad_norm": 2.45655548941343, "learning_rate": 9.97145220577316e-07, "loss": 0.8004, "step": 3626 }, { "epoch": 0.06304646352274505, "grad_norm": 1.9153028609853546, "learning_rate": 9.971422160404024e-07, "loss": 0.6352, "step": 3627 }, { "epoch": 0.06306384606024787, "grad_norm": 1.7229157167182707, "learning_rate": 9.971392099277761e-07, "loss": 0.542, "step": 3628 }, { "epoch": 0.0630812285977507, "grad_norm": 2.2504511422806206, "learning_rate": 9.971362022394473e-07, "loss": 0.4158, "step": 3629 }, { "epoch": 0.06309861113525353, "grad_norm": 2.4125804241340627, "learning_rate": 9.971331929754248e-07, "loss": 0.3202, "step": 3630 }, { "epoch": 0.06311599367275635, "grad_norm": 2.5073375415998287, "learning_rate": 9.971301821357186e-07, "loss": 0.7821, "step": 3631 }, { "epoch": 0.06313337621025918, "grad_norm": 1.6507472276264825, "learning_rate": 9.971271697203381e-07, "loss": 0.374, "step": 3632 }, { "epoch": 0.06315075874776199, "grad_norm": 2.6157695795034672, "learning_rate": 9.971241557292926e-07, "loss": 0.5267, "step": 3633 }, { "epoch": 0.06316814128526482, "grad_norm": 2.2631037537149647, "learning_rate": 9.97121140162592e-07, "loss": 0.7311, "step": 3634 }, { "epoch": 0.06318552382276765, "grad_norm": 2.19465875872504, "learning_rate": 9.97118123020246e-07, "loss": 0.6216, "step": 3635 }, { "epoch": 0.06320290636027047, "grad_norm": 1.9218419197121392, "learning_rate": 9.971151043022636e-07, "loss": 0.4453, "step": 3636 }, { "epoch": 0.0632202888977733, "grad_norm": 2.6449837188773038, "learning_rate": 9.971120840086547e-07, "loss": 0.8602, "step": 3637 }, { "epoch": 0.06323767143527612, "grad_norm": 1.2193809335493937, "learning_rate": 9.971090621394288e-07, "loss": 0.6068, "step": 3638 }, { "epoch": 0.06325505397277895, "grad_norm": 1.875474092840699, "learning_rate": 9.971060386945959e-07, "loss": 0.2155, "step": 3639 }, { "epoch": 0.06327243651028178, "grad_norm": 2.0503106465663663, "learning_rate": 9.971030136741647e-07, "loss": 0.4383, "step": 3640 }, { "epoch": 0.06328981904778459, "grad_norm": 2.8194254787900443, "learning_rate": 9.970999870781456e-07, "loss": 0.5479, "step": 3641 }, { "epoch": 0.06330720158528742, "grad_norm": 2.3327916528997155, "learning_rate": 9.97096958906548e-07, "loss": 0.5024, "step": 3642 }, { "epoch": 0.06332458412279024, "grad_norm": 2.3065413389716944, "learning_rate": 9.970939291593812e-07, "loss": 0.4548, "step": 3643 }, { "epoch": 0.06334196666029307, "grad_norm": 2.447924570430277, "learning_rate": 9.97090897836655e-07, "loss": 0.4502, "step": 3644 }, { "epoch": 0.0633593491977959, "grad_norm": 1.900944057648301, "learning_rate": 9.970878649383789e-07, "loss": 0.7904, "step": 3645 }, { "epoch": 0.06337673173529872, "grad_norm": 1.5277170830197981, "learning_rate": 9.970848304645626e-07, "loss": 0.447, "step": 3646 }, { "epoch": 0.06339411427280155, "grad_norm": 2.5772903202538537, "learning_rate": 9.97081794415216e-07, "loss": 0.6339, "step": 3647 }, { "epoch": 0.06341149681030436, "grad_norm": 1.9962529295114728, "learning_rate": 9.970787567903482e-07, "loss": 0.5687, "step": 3648 }, { "epoch": 0.0634288793478072, "grad_norm": 3.110929663098411, "learning_rate": 9.970757175899693e-07, "loss": 0.5203, "step": 3649 }, { "epoch": 0.06344626188531002, "grad_norm": 2.3114662768197047, "learning_rate": 9.970726768140886e-07, "loss": 0.4618, "step": 3650 }, { "epoch": 0.06346364442281284, "grad_norm": 2.821515298818804, "learning_rate": 9.970696344627158e-07, "loss": 0.3574, "step": 3651 }, { "epoch": 0.06348102696031567, "grad_norm": 3.490669317955227, "learning_rate": 9.970665905358607e-07, "loss": 0.8165, "step": 3652 }, { "epoch": 0.06349840949781849, "grad_norm": 2.18253443785561, "learning_rate": 9.970635450335327e-07, "loss": 0.2901, "step": 3653 }, { "epoch": 0.06351579203532132, "grad_norm": 1.6019083633445843, "learning_rate": 9.970604979557417e-07, "loss": 0.4952, "step": 3654 }, { "epoch": 0.06353317457282415, "grad_norm": 1.2513479863120076, "learning_rate": 9.970574493024973e-07, "loss": 0.2209, "step": 3655 }, { "epoch": 0.06355055711032696, "grad_norm": 1.593384162858798, "learning_rate": 9.97054399073809e-07, "loss": 0.2632, "step": 3656 }, { "epoch": 0.0635679396478298, "grad_norm": 1.8129852582324146, "learning_rate": 9.970513472696866e-07, "loss": 0.3339, "step": 3657 }, { "epoch": 0.06358532218533261, "grad_norm": 1.893754708361697, "learning_rate": 9.970482938901398e-07, "loss": 0.2935, "step": 3658 }, { "epoch": 0.06360270472283544, "grad_norm": 1.3761209374888472, "learning_rate": 9.970452389351783e-07, "loss": 0.5955, "step": 3659 }, { "epoch": 0.06362008726033827, "grad_norm": 2.0452728286191317, "learning_rate": 9.970421824048117e-07, "loss": 0.5891, "step": 3660 }, { "epoch": 0.06363746979784109, "grad_norm": 2.8337210741390817, "learning_rate": 9.970391242990494e-07, "loss": 0.4395, "step": 3661 }, { "epoch": 0.06365485233534392, "grad_norm": 1.7921836039899297, "learning_rate": 9.970360646179017e-07, "loss": 0.4759, "step": 3662 }, { "epoch": 0.06367223487284673, "grad_norm": 1.3452528448501913, "learning_rate": 9.97033003361378e-07, "loss": 0.5226, "step": 3663 }, { "epoch": 0.06368961741034956, "grad_norm": 1.783029351625037, "learning_rate": 9.97029940529488e-07, "loss": 0.8179, "step": 3664 }, { "epoch": 0.0637069999478524, "grad_norm": 2.241649169421362, "learning_rate": 9.970268761222413e-07, "loss": 0.5723, "step": 3665 }, { "epoch": 0.06372438248535521, "grad_norm": 2.215027782251556, "learning_rate": 9.970238101396477e-07, "loss": 0.3432, "step": 3666 }, { "epoch": 0.06374176502285804, "grad_norm": 1.5161531751092008, "learning_rate": 9.97020742581717e-07, "loss": 0.4744, "step": 3667 }, { "epoch": 0.06375914756036086, "grad_norm": 2.3910726945236953, "learning_rate": 9.970176734484588e-07, "loss": 0.3497, "step": 3668 }, { "epoch": 0.06377653009786369, "grad_norm": 2.7205127092187804, "learning_rate": 9.970146027398828e-07, "loss": 0.6181, "step": 3669 }, { "epoch": 0.06379391263536652, "grad_norm": 1.947325806478814, "learning_rate": 9.970115304559992e-07, "loss": 0.2807, "step": 3670 }, { "epoch": 0.06381129517286933, "grad_norm": 1.9237484721175537, "learning_rate": 9.97008456596817e-07, "loss": 0.4002, "step": 3671 }, { "epoch": 0.06382867771037216, "grad_norm": 1.8463267400614756, "learning_rate": 9.970053811623464e-07, "loss": 0.5204, "step": 3672 }, { "epoch": 0.06384606024787498, "grad_norm": 2.0417883609623613, "learning_rate": 9.97002304152597e-07, "loss": 0.6714, "step": 3673 }, { "epoch": 0.06386344278537781, "grad_norm": 2.3429085897290896, "learning_rate": 9.969992255675788e-07, "loss": 0.3925, "step": 3674 }, { "epoch": 0.06388082532288063, "grad_norm": 2.5032294656994085, "learning_rate": 9.969961454073011e-07, "loss": 0.489, "step": 3675 }, { "epoch": 0.06389820786038346, "grad_norm": 2.611713157632538, "learning_rate": 9.96993063671774e-07, "loss": 0.5331, "step": 3676 }, { "epoch": 0.06391559039788629, "grad_norm": 2.0844252830439833, "learning_rate": 9.969899803610074e-07, "loss": 0.3973, "step": 3677 }, { "epoch": 0.0639329729353891, "grad_norm": 4.290137808379793, "learning_rate": 9.969868954750105e-07, "loss": 0.7518, "step": 3678 }, { "epoch": 0.06395035547289193, "grad_norm": 2.4995935485769327, "learning_rate": 9.969838090137937e-07, "loss": 0.601, "step": 3679 }, { "epoch": 0.06396773801039475, "grad_norm": 2.6002219060305607, "learning_rate": 9.969807209773664e-07, "loss": 0.5686, "step": 3680 }, { "epoch": 0.06398512054789758, "grad_norm": 2.7083223733095556, "learning_rate": 9.969776313657385e-07, "loss": 0.6853, "step": 3681 }, { "epoch": 0.06400250308540041, "grad_norm": 1.9662553778985403, "learning_rate": 9.9697454017892e-07, "loss": 0.7149, "step": 3682 }, { "epoch": 0.06401988562290323, "grad_norm": 2.3448254124291026, "learning_rate": 9.969714474169202e-07, "loss": 0.1862, "step": 3683 }, { "epoch": 0.06403726816040606, "grad_norm": 2.315395933572684, "learning_rate": 9.969683530797494e-07, "loss": 0.5601, "step": 3684 }, { "epoch": 0.06405465069790887, "grad_norm": 1.1501306979271138, "learning_rate": 9.969652571674171e-07, "loss": 0.3831, "step": 3685 }, { "epoch": 0.0640720332354117, "grad_norm": 1.5357695101396156, "learning_rate": 9.969621596799335e-07, "loss": 0.4673, "step": 3686 }, { "epoch": 0.06408941577291453, "grad_norm": 1.9998544723277154, "learning_rate": 9.969590606173077e-07, "loss": 0.5443, "step": 3687 }, { "epoch": 0.06410679831041735, "grad_norm": 5.275618733485083, "learning_rate": 9.969559599795504e-07, "loss": 1.0813, "step": 3688 }, { "epoch": 0.06412418084792018, "grad_norm": 1.9278042775690587, "learning_rate": 9.969528577666706e-07, "loss": 0.5038, "step": 3689 }, { "epoch": 0.064141563385423, "grad_norm": 3.7327291870670343, "learning_rate": 9.96949753978679e-07, "loss": 0.3565, "step": 3690 }, { "epoch": 0.06415894592292583, "grad_norm": 1.6980268564108283, "learning_rate": 9.969466486155843e-07, "loss": 0.4829, "step": 3691 }, { "epoch": 0.06417632846042866, "grad_norm": 4.556535156719322, "learning_rate": 9.969435416773975e-07, "loss": 0.3759, "step": 3692 }, { "epoch": 0.06419371099793147, "grad_norm": 1.9926843961743652, "learning_rate": 9.969404331641278e-07, "loss": 0.5997, "step": 3693 }, { "epoch": 0.0642110935354343, "grad_norm": 1.8432230439043016, "learning_rate": 9.969373230757852e-07, "loss": 0.2536, "step": 3694 }, { "epoch": 0.06422847607293712, "grad_norm": 1.929405127379781, "learning_rate": 9.969342114123795e-07, "loss": 0.3686, "step": 3695 }, { "epoch": 0.06424585861043995, "grad_norm": 1.4152295390347378, "learning_rate": 9.969310981739207e-07, "loss": 0.3246, "step": 3696 }, { "epoch": 0.06426324114794278, "grad_norm": 1.7871312342191588, "learning_rate": 9.969279833604188e-07, "loss": 0.3777, "step": 3697 }, { "epoch": 0.0642806236854456, "grad_norm": 2.3234072145779847, "learning_rate": 9.969248669718832e-07, "loss": 0.4232, "step": 3698 }, { "epoch": 0.06429800622294843, "grad_norm": 1.8759368284544047, "learning_rate": 9.96921749008324e-07, "loss": 0.5682, "step": 3699 }, { "epoch": 0.06431538876045124, "grad_norm": 2.4718346685897714, "learning_rate": 9.969186294697512e-07, "loss": 0.7035, "step": 3700 }, { "epoch": 0.06433277129795408, "grad_norm": 1.7653116411133085, "learning_rate": 9.969155083561747e-07, "loss": 0.4384, "step": 3701 }, { "epoch": 0.0643501538354569, "grad_norm": 1.7811945384953953, "learning_rate": 9.969123856676043e-07, "loss": 0.7527, "step": 3702 }, { "epoch": 0.06436753637295972, "grad_norm": 1.2378006889193238, "learning_rate": 9.969092614040497e-07, "loss": 0.5949, "step": 3703 }, { "epoch": 0.06438491891046255, "grad_norm": 1.7069938246222516, "learning_rate": 9.969061355655212e-07, "loss": 0.3635, "step": 3704 }, { "epoch": 0.06440230144796537, "grad_norm": 1.7865687480988817, "learning_rate": 9.969030081520285e-07, "loss": 0.4588, "step": 3705 }, { "epoch": 0.0644196839854682, "grad_norm": 3.710230873021325, "learning_rate": 9.968998791635814e-07, "loss": 0.5379, "step": 3706 }, { "epoch": 0.06443706652297103, "grad_norm": 1.8043073058439405, "learning_rate": 9.9689674860019e-07, "loss": 0.2692, "step": 3707 }, { "epoch": 0.06445444906047385, "grad_norm": 2.127847777205006, "learning_rate": 9.96893616461864e-07, "loss": 0.3715, "step": 3708 }, { "epoch": 0.06447183159797668, "grad_norm": 1.7256063209719072, "learning_rate": 9.968904827486138e-07, "loss": 0.5448, "step": 3709 }, { "epoch": 0.06448921413547949, "grad_norm": 2.025706219856635, "learning_rate": 9.96887347460449e-07, "loss": 0.3233, "step": 3710 }, { "epoch": 0.06450659667298232, "grad_norm": 1.4014517408324731, "learning_rate": 9.968842105973793e-07, "loss": 0.4016, "step": 3711 }, { "epoch": 0.06452397921048515, "grad_norm": 2.1189976690843824, "learning_rate": 9.968810721594149e-07, "loss": 0.3831, "step": 3712 }, { "epoch": 0.06454136174798797, "grad_norm": 2.6285063049567112, "learning_rate": 9.96877932146566e-07, "loss": 0.7736, "step": 3713 }, { "epoch": 0.0645587442854908, "grad_norm": 10.079061736185444, "learning_rate": 9.968747905588422e-07, "loss": 0.8683, "step": 3714 }, { "epoch": 0.06457612682299362, "grad_norm": 2.528713225370321, "learning_rate": 9.968716473962536e-07, "loss": 0.8097, "step": 3715 }, { "epoch": 0.06459350936049645, "grad_norm": 5.835596919319653, "learning_rate": 9.9686850265881e-07, "loss": 0.291, "step": 3716 }, { "epoch": 0.06461089189799928, "grad_norm": 2.7187335445382175, "learning_rate": 9.968653563465216e-07, "loss": 0.5798, "step": 3717 }, { "epoch": 0.06462827443550209, "grad_norm": 2.5565546276294717, "learning_rate": 9.968622084593983e-07, "loss": 0.3275, "step": 3718 }, { "epoch": 0.06464565697300492, "grad_norm": 2.487653601892031, "learning_rate": 9.9685905899745e-07, "loss": 0.4801, "step": 3719 }, { "epoch": 0.06466303951050774, "grad_norm": 1.2274296937739746, "learning_rate": 9.968559079606867e-07, "loss": 0.3182, "step": 3720 }, { "epoch": 0.06468042204801057, "grad_norm": 1.9121451253405664, "learning_rate": 9.968527553491184e-07, "loss": 0.3252, "step": 3721 }, { "epoch": 0.0646978045855134, "grad_norm": 3.2203358808253784, "learning_rate": 9.96849601162755e-07, "loss": 0.3086, "step": 3722 }, { "epoch": 0.06471518712301622, "grad_norm": 1.8169515673879146, "learning_rate": 9.968464454016066e-07, "loss": 0.6516, "step": 3723 }, { "epoch": 0.06473256966051905, "grad_norm": 2.438832431682005, "learning_rate": 9.968432880656834e-07, "loss": 0.3555, "step": 3724 }, { "epoch": 0.06474995219802186, "grad_norm": 1.702519479561295, "learning_rate": 9.96840129154995e-07, "loss": 0.2564, "step": 3725 }, { "epoch": 0.06476733473552469, "grad_norm": 2.3280125928022297, "learning_rate": 9.968369686695518e-07, "loss": 0.5994, "step": 3726 }, { "epoch": 0.06478471727302752, "grad_norm": 1.6033393404361083, "learning_rate": 9.968338066093635e-07, "loss": 0.2558, "step": 3727 }, { "epoch": 0.06480209981053034, "grad_norm": 1.7859103988592724, "learning_rate": 9.968306429744404e-07, "loss": 0.4383, "step": 3728 }, { "epoch": 0.06481948234803317, "grad_norm": 2.915651253065494, "learning_rate": 9.968274777647922e-07, "loss": 0.6239, "step": 3729 }, { "epoch": 0.06483686488553599, "grad_norm": 2.3868737258080843, "learning_rate": 9.968243109804292e-07, "loss": 0.4739, "step": 3730 }, { "epoch": 0.06485424742303882, "grad_norm": 1.5789684507665929, "learning_rate": 9.968211426213615e-07, "loss": 0.4197, "step": 3731 }, { "epoch": 0.06487162996054165, "grad_norm": 3.00571232386308, "learning_rate": 9.968179726875988e-07, "loss": 0.4511, "step": 3732 }, { "epoch": 0.06488901249804446, "grad_norm": 2.0248083011885436, "learning_rate": 9.968148011791514e-07, "loss": 0.5275, "step": 3733 }, { "epoch": 0.06490639503554729, "grad_norm": 1.5083664918594073, "learning_rate": 9.968116280960294e-07, "loss": 0.4449, "step": 3734 }, { "epoch": 0.06492377757305011, "grad_norm": 1.308216103277937, "learning_rate": 9.968084534382426e-07, "loss": 0.4236, "step": 3735 }, { "epoch": 0.06494116011055294, "grad_norm": 2.0347053659867242, "learning_rate": 9.968052772058014e-07, "loss": 0.6754, "step": 3736 }, { "epoch": 0.06495854264805577, "grad_norm": 2.2773886410099076, "learning_rate": 9.968020993987156e-07, "loss": 0.3421, "step": 3737 }, { "epoch": 0.06497592518555859, "grad_norm": 2.2131178600648687, "learning_rate": 9.967989200169953e-07, "loss": 0.4524, "step": 3738 }, { "epoch": 0.06499330772306142, "grad_norm": 1.4131325697060446, "learning_rate": 9.967957390606508e-07, "loss": 0.3911, "step": 3739 }, { "epoch": 0.06501069026056423, "grad_norm": 4.983660378478888, "learning_rate": 9.967925565296917e-07, "loss": 0.6226, "step": 3740 }, { "epoch": 0.06502807279806706, "grad_norm": 2.0106685442898335, "learning_rate": 9.967893724241286e-07, "loss": 0.2709, "step": 3741 }, { "epoch": 0.0650454553355699, "grad_norm": 2.269076707638168, "learning_rate": 9.967861867439715e-07, "loss": 0.6022, "step": 3742 }, { "epoch": 0.06506283787307271, "grad_norm": 4.453167933612797, "learning_rate": 9.967829994892303e-07, "loss": 0.5502, "step": 3743 }, { "epoch": 0.06508022041057554, "grad_norm": 1.8316981381596682, "learning_rate": 9.96779810659915e-07, "loss": 0.2472, "step": 3744 }, { "epoch": 0.06509760294807836, "grad_norm": 2.7564202814311876, "learning_rate": 9.96776620256036e-07, "loss": 0.6114, "step": 3745 }, { "epoch": 0.06511498548558119, "grad_norm": 2.783225139044282, "learning_rate": 9.967734282776035e-07, "loss": 0.4254, "step": 3746 }, { "epoch": 0.065132368023084, "grad_norm": 2.2163971217930976, "learning_rate": 9.967702347246273e-07, "loss": 0.6663, "step": 3747 }, { "epoch": 0.06514975056058683, "grad_norm": 2.0039299101217654, "learning_rate": 9.967670395971177e-07, "loss": 0.6023, "step": 3748 }, { "epoch": 0.06516713309808966, "grad_norm": 1.6050183946561642, "learning_rate": 9.967638428950848e-07, "loss": 0.3643, "step": 3749 }, { "epoch": 0.06518451563559248, "grad_norm": 2.810514825771938, "learning_rate": 9.967606446185387e-07, "loss": 0.5655, "step": 3750 }, { "epoch": 0.06520189817309531, "grad_norm": 1.4295593750828026, "learning_rate": 9.967574447674895e-07, "loss": 0.4505, "step": 3751 }, { "epoch": 0.06521928071059813, "grad_norm": 2.393603347037809, "learning_rate": 9.967542433419475e-07, "loss": 0.3592, "step": 3752 }, { "epoch": 0.06523666324810096, "grad_norm": 1.4536757856134694, "learning_rate": 9.967510403419226e-07, "loss": 0.581, "step": 3753 }, { "epoch": 0.06525404578560379, "grad_norm": 1.7556508985425874, "learning_rate": 9.96747835767425e-07, "loss": 0.3296, "step": 3754 }, { "epoch": 0.0652714283231066, "grad_norm": 3.6486197932239994, "learning_rate": 9.967446296184653e-07, "loss": 0.4743, "step": 3755 }, { "epoch": 0.06528881086060943, "grad_norm": 3.224322383862883, "learning_rate": 9.96741421895053e-07, "loss": 0.5769, "step": 3756 }, { "epoch": 0.06530619339811225, "grad_norm": 1.9877479627860994, "learning_rate": 9.967382125971987e-07, "loss": 0.5031, "step": 3757 }, { "epoch": 0.06532357593561508, "grad_norm": 3.004528657559689, "learning_rate": 9.967350017249125e-07, "loss": 0.2571, "step": 3758 }, { "epoch": 0.06534095847311791, "grad_norm": 2.911485165398424, "learning_rate": 9.967317892782043e-07, "loss": 0.4604, "step": 3759 }, { "epoch": 0.06535834101062073, "grad_norm": 2.213365987971702, "learning_rate": 9.967285752570848e-07, "loss": 0.716, "step": 3760 }, { "epoch": 0.06537572354812356, "grad_norm": 2.744228460576156, "learning_rate": 9.967253596615638e-07, "loss": 0.5582, "step": 3761 }, { "epoch": 0.06539310608562637, "grad_norm": 3.3263591018477854, "learning_rate": 9.967221424916514e-07, "loss": 0.8591, "step": 3762 }, { "epoch": 0.0654104886231292, "grad_norm": 1.6797386134096124, "learning_rate": 9.967189237473582e-07, "loss": 0.4504, "step": 3763 }, { "epoch": 0.06542787116063203, "grad_norm": 6.199110021979278, "learning_rate": 9.967157034286942e-07, "loss": 0.4755, "step": 3764 }, { "epoch": 0.06544525369813485, "grad_norm": 1.753386677016256, "learning_rate": 9.967124815356695e-07, "loss": 0.6386, "step": 3765 }, { "epoch": 0.06546263623563768, "grad_norm": 4.908521135164322, "learning_rate": 9.967092580682944e-07, "loss": 0.8483, "step": 3766 }, { "epoch": 0.0654800187731405, "grad_norm": 2.192149010284101, "learning_rate": 9.967060330265791e-07, "loss": 0.4092, "step": 3767 }, { "epoch": 0.06549740131064333, "grad_norm": 2.51911940960762, "learning_rate": 9.96702806410534e-07, "loss": 0.3247, "step": 3768 }, { "epoch": 0.06551478384814616, "grad_norm": 1.9421555907151014, "learning_rate": 9.966995782201687e-07, "loss": 0.3543, "step": 3769 }, { "epoch": 0.06553216638564897, "grad_norm": 2.592288767941154, "learning_rate": 9.966963484554945e-07, "loss": 0.4799, "step": 3770 }, { "epoch": 0.0655495489231518, "grad_norm": 2.8765705316140573, "learning_rate": 9.966931171165206e-07, "loss": 0.2559, "step": 3771 }, { "epoch": 0.06556693146065462, "grad_norm": 4.162386624622642, "learning_rate": 9.966898842032581e-07, "loss": 0.5458, "step": 3772 }, { "epoch": 0.06558431399815745, "grad_norm": 4.900466907129818, "learning_rate": 9.966866497157165e-07, "loss": 0.3879, "step": 3773 }, { "epoch": 0.06560169653566028, "grad_norm": 2.268722245808047, "learning_rate": 9.966834136539065e-07, "loss": 0.4202, "step": 3774 }, { "epoch": 0.0656190790731631, "grad_norm": 2.1507103906642566, "learning_rate": 9.96680176017838e-07, "loss": 0.5261, "step": 3775 }, { "epoch": 0.06563646161066593, "grad_norm": 2.861712499179219, "learning_rate": 9.96676936807522e-07, "loss": 0.5752, "step": 3776 }, { "epoch": 0.06565384414816874, "grad_norm": 4.164748698793191, "learning_rate": 9.966736960229677e-07, "loss": 0.6419, "step": 3777 }, { "epoch": 0.06567122668567157, "grad_norm": 2.2360314592611568, "learning_rate": 9.966704536641863e-07, "loss": 0.3186, "step": 3778 }, { "epoch": 0.0656886092231744, "grad_norm": 2.5863312999408876, "learning_rate": 9.966672097311877e-07, "loss": 0.4467, "step": 3779 }, { "epoch": 0.06570599176067722, "grad_norm": 4.601944127759962, "learning_rate": 9.96663964223982e-07, "loss": 1.3255, "step": 3780 }, { "epoch": 0.06572337429818005, "grad_norm": 1.7737510465102286, "learning_rate": 9.9666071714258e-07, "loss": 0.3132, "step": 3781 }, { "epoch": 0.06574075683568287, "grad_norm": 1.7072546486453979, "learning_rate": 9.966574684869914e-07, "loss": 0.2902, "step": 3782 }, { "epoch": 0.0657581393731857, "grad_norm": 2.417453548441654, "learning_rate": 9.966542182572268e-07, "loss": 0.5107, "step": 3783 }, { "epoch": 0.06577552191068853, "grad_norm": 1.4617895675760821, "learning_rate": 9.966509664532966e-07, "loss": 0.6744, "step": 3784 }, { "epoch": 0.06579290444819134, "grad_norm": 3.610184312386349, "learning_rate": 9.96647713075211e-07, "loss": 0.6454, "step": 3785 }, { "epoch": 0.06581028698569417, "grad_norm": 2.1673461771735703, "learning_rate": 9.966444581229801e-07, "loss": 0.3635, "step": 3786 }, { "epoch": 0.06582766952319699, "grad_norm": 2.0545438371223694, "learning_rate": 9.966412015966144e-07, "loss": 0.3923, "step": 3787 }, { "epoch": 0.06584505206069982, "grad_norm": 2.556821874742703, "learning_rate": 9.966379434961244e-07, "loss": 0.3923, "step": 3788 }, { "epoch": 0.06586243459820265, "grad_norm": 2.6222438525881255, "learning_rate": 9.966346838215201e-07, "loss": 0.3193, "step": 3789 }, { "epoch": 0.06587981713570547, "grad_norm": 1.7553735741309522, "learning_rate": 9.966314225728121e-07, "loss": 0.4095, "step": 3790 }, { "epoch": 0.0658971996732083, "grad_norm": 2.1365233819385776, "learning_rate": 9.966281597500105e-07, "loss": 0.3838, "step": 3791 }, { "epoch": 0.06591458221071111, "grad_norm": 2.4183833673237536, "learning_rate": 9.96624895353126e-07, "loss": 0.5029, "step": 3792 }, { "epoch": 0.06593196474821394, "grad_norm": 2.1109085490410227, "learning_rate": 9.966216293821685e-07, "loss": 0.3792, "step": 3793 }, { "epoch": 0.06594934728571678, "grad_norm": 3.965074736851452, "learning_rate": 9.966183618371486e-07, "loss": 0.383, "step": 3794 }, { "epoch": 0.06596672982321959, "grad_norm": 1.5835478425046992, "learning_rate": 9.966150927180767e-07, "loss": 0.4158, "step": 3795 }, { "epoch": 0.06598411236072242, "grad_norm": 1.86435736794022, "learning_rate": 9.966118220249631e-07, "loss": 0.4294, "step": 3796 }, { "epoch": 0.06600149489822524, "grad_norm": 1.7913609539559643, "learning_rate": 9.96608549757818e-07, "loss": 0.3388, "step": 3797 }, { "epoch": 0.06601887743572807, "grad_norm": 3.1273939476574393, "learning_rate": 9.966052759166522e-07, "loss": 0.5325, "step": 3798 }, { "epoch": 0.0660362599732309, "grad_norm": 2.3937047478327442, "learning_rate": 9.966020005014754e-07, "loss": 0.3601, "step": 3799 }, { "epoch": 0.06605364251073371, "grad_norm": 2.2069360305097936, "learning_rate": 9.965987235122987e-07, "loss": 0.71, "step": 3800 }, { "epoch": 0.06607102504823655, "grad_norm": 2.5033847975688737, "learning_rate": 9.96595444949132e-07, "loss": 0.5776, "step": 3801 }, { "epoch": 0.06608840758573936, "grad_norm": 1.6997775683199994, "learning_rate": 9.96592164811986e-07, "loss": 0.4355, "step": 3802 }, { "epoch": 0.06610579012324219, "grad_norm": 2.5395943969344907, "learning_rate": 9.96588883100871e-07, "loss": 0.5082, "step": 3803 }, { "epoch": 0.06612317266074502, "grad_norm": 2.336246155117016, "learning_rate": 9.96585599815797e-07, "loss": 0.2675, "step": 3804 }, { "epoch": 0.06614055519824784, "grad_norm": 1.9699356412217912, "learning_rate": 9.965823149567749e-07, "loss": 0.7908, "step": 3805 }, { "epoch": 0.06615793773575067, "grad_norm": 2.545556779380498, "learning_rate": 9.965790285238152e-07, "loss": 0.3741, "step": 3806 }, { "epoch": 0.06617532027325349, "grad_norm": 1.5602810662781466, "learning_rate": 9.96575740516928e-07, "loss": 0.4335, "step": 3807 }, { "epoch": 0.06619270281075632, "grad_norm": 3.211661247814018, "learning_rate": 9.965724509361236e-07, "loss": 0.5565, "step": 3808 }, { "epoch": 0.06621008534825915, "grad_norm": 2.548011871310045, "learning_rate": 9.965691597814128e-07, "loss": 1.3301, "step": 3809 }, { "epoch": 0.06622746788576196, "grad_norm": 1.88039140498358, "learning_rate": 9.965658670528058e-07, "loss": 0.3801, "step": 3810 }, { "epoch": 0.06624485042326479, "grad_norm": 2.489202115056375, "learning_rate": 9.965625727503133e-07, "loss": 0.5527, "step": 3811 }, { "epoch": 0.06626223296076761, "grad_norm": 2.767306630923825, "learning_rate": 9.965592768739453e-07, "loss": 0.4525, "step": 3812 }, { "epoch": 0.06627961549827044, "grad_norm": 2.1522974962322063, "learning_rate": 9.965559794237126e-07, "loss": 0.5741, "step": 3813 }, { "epoch": 0.06629699803577327, "grad_norm": 1.7105640228225016, "learning_rate": 9.965526803996257e-07, "loss": 0.3811, "step": 3814 }, { "epoch": 0.06631438057327609, "grad_norm": 1.3648060279523357, "learning_rate": 9.965493798016946e-07, "loss": 0.17, "step": 3815 }, { "epoch": 0.06633176311077892, "grad_norm": 1.4260529569171032, "learning_rate": 9.965460776299302e-07, "loss": 0.496, "step": 3816 }, { "epoch": 0.06634914564828173, "grad_norm": 2.5997118811952933, "learning_rate": 9.965427738843428e-07, "loss": 0.4806, "step": 3817 }, { "epoch": 0.06636652818578456, "grad_norm": 2.156499834525279, "learning_rate": 9.965394685649432e-07, "loss": 0.3548, "step": 3818 }, { "epoch": 0.06638391072328738, "grad_norm": 2.5656594410586355, "learning_rate": 9.96536161671741e-07, "loss": 0.5739, "step": 3819 }, { "epoch": 0.06640129326079021, "grad_norm": 2.6622671967780023, "learning_rate": 9.965328532047477e-07, "loss": 0.4733, "step": 3820 }, { "epoch": 0.06641867579829304, "grad_norm": 1.4202302155720803, "learning_rate": 9.965295431639733e-07, "loss": 0.3487, "step": 3821 }, { "epoch": 0.06643605833579586, "grad_norm": 1.6627831628133736, "learning_rate": 9.965262315494282e-07, "loss": 0.3997, "step": 3822 }, { "epoch": 0.06645344087329869, "grad_norm": 1.8068600252939033, "learning_rate": 9.96522918361123e-07, "loss": 0.4156, "step": 3823 }, { "epoch": 0.0664708234108015, "grad_norm": 1.4601313193473315, "learning_rate": 9.965196035990685e-07, "loss": 0.2337, "step": 3824 }, { "epoch": 0.06648820594830433, "grad_norm": 2.187883370892586, "learning_rate": 9.965162872632746e-07, "loss": 0.2623, "step": 3825 }, { "epoch": 0.06650558848580716, "grad_norm": 2.9343768321052983, "learning_rate": 9.965129693537523e-07, "loss": 0.5267, "step": 3826 }, { "epoch": 0.06652297102330998, "grad_norm": 2.135034746268231, "learning_rate": 9.96509649870512e-07, "loss": 0.4867, "step": 3827 }, { "epoch": 0.06654035356081281, "grad_norm": 0.9450675871918857, "learning_rate": 9.965063288135641e-07, "loss": 0.4958, "step": 3828 }, { "epoch": 0.06655773609831563, "grad_norm": 1.9686998404253182, "learning_rate": 9.965030061829193e-07, "loss": 0.4732, "step": 3829 }, { "epoch": 0.06657511863581846, "grad_norm": 1.597035072077714, "learning_rate": 9.96499681978588e-07, "loss": 0.5927, "step": 3830 }, { "epoch": 0.06659250117332129, "grad_norm": 2.415092466103129, "learning_rate": 9.964963562005808e-07, "loss": 0.4882, "step": 3831 }, { "epoch": 0.0666098837108241, "grad_norm": 3.2492574675455663, "learning_rate": 9.96493028848908e-07, "loss": 0.3824, "step": 3832 }, { "epoch": 0.06662726624832693, "grad_norm": 1.8024369656259267, "learning_rate": 9.964896999235807e-07, "loss": 0.5898, "step": 3833 }, { "epoch": 0.06664464878582975, "grad_norm": 1.4721530205437563, "learning_rate": 9.964863694246093e-07, "loss": 0.427, "step": 3834 }, { "epoch": 0.06666203132333258, "grad_norm": 1.3181841710373872, "learning_rate": 9.964830373520037e-07, "loss": 0.361, "step": 3835 }, { "epoch": 0.06667941386083541, "grad_norm": 1.7605662091601912, "learning_rate": 9.964797037057752e-07, "loss": 0.5921, "step": 3836 }, { "epoch": 0.06669679639833823, "grad_norm": 1.726076325104894, "learning_rate": 9.96476368485934e-07, "loss": 0.4315, "step": 3837 }, { "epoch": 0.06671417893584106, "grad_norm": 1.076481929144307, "learning_rate": 9.964730316924907e-07, "loss": 0.2688, "step": 3838 }, { "epoch": 0.06673156147334387, "grad_norm": 3.704071727501136, "learning_rate": 9.96469693325456e-07, "loss": 0.5315, "step": 3839 }, { "epoch": 0.0667489440108467, "grad_norm": 2.4474692099912896, "learning_rate": 9.964663533848407e-07, "loss": 0.2457, "step": 3840 }, { "epoch": 0.06676632654834953, "grad_norm": 2.03265956276902, "learning_rate": 9.964630118706549e-07, "loss": 0.3768, "step": 3841 }, { "epoch": 0.06678370908585235, "grad_norm": 1.8124178876179782, "learning_rate": 9.964596687829093e-07, "loss": 0.414, "step": 3842 }, { "epoch": 0.06680109162335518, "grad_norm": 2.614761740215789, "learning_rate": 9.964563241216147e-07, "loss": 0.4664, "step": 3843 }, { "epoch": 0.066818474160858, "grad_norm": 3.916257476535532, "learning_rate": 9.964529778867816e-07, "loss": 0.6103, "step": 3844 }, { "epoch": 0.06683585669836083, "grad_norm": 1.6779733236457444, "learning_rate": 9.964496300784206e-07, "loss": 0.4283, "step": 3845 }, { "epoch": 0.06685323923586366, "grad_norm": 1.718656497011061, "learning_rate": 9.964462806965424e-07, "loss": 0.5932, "step": 3846 }, { "epoch": 0.06687062177336647, "grad_norm": 2.094282550293946, "learning_rate": 9.964429297411577e-07, "loss": 0.3857, "step": 3847 }, { "epoch": 0.0668880043108693, "grad_norm": 1.4959367424173085, "learning_rate": 9.964395772122766e-07, "loss": 0.3422, "step": 3848 }, { "epoch": 0.06690538684837212, "grad_norm": 3.000588757885757, "learning_rate": 9.964362231099103e-07, "loss": 0.4126, "step": 3849 }, { "epoch": 0.06692276938587495, "grad_norm": 1.6890668526342731, "learning_rate": 9.964328674340692e-07, "loss": 0.437, "step": 3850 }, { "epoch": 0.06694015192337778, "grad_norm": 2.6403799881491823, "learning_rate": 9.964295101847637e-07, "loss": 0.5327, "step": 3851 }, { "epoch": 0.0669575344608806, "grad_norm": 1.8647401855440218, "learning_rate": 9.96426151362005e-07, "loss": 0.5309, "step": 3852 }, { "epoch": 0.06697491699838343, "grad_norm": 2.25396415808034, "learning_rate": 9.964227909658035e-07, "loss": 0.5057, "step": 3853 }, { "epoch": 0.06699229953588624, "grad_norm": 2.086164366145064, "learning_rate": 9.964194289961695e-07, "loss": 0.56, "step": 3854 }, { "epoch": 0.06700968207338907, "grad_norm": 2.4781657921008584, "learning_rate": 9.964160654531142e-07, "loss": 0.4531, "step": 3855 }, { "epoch": 0.0670270646108919, "grad_norm": 3.3220819811636226, "learning_rate": 9.964127003366478e-07, "loss": 0.4532, "step": 3856 }, { "epoch": 0.06704444714839472, "grad_norm": 1.5517351832253854, "learning_rate": 9.964093336467812e-07, "loss": 0.4052, "step": 3857 }, { "epoch": 0.06706182968589755, "grad_norm": 3.448174002764116, "learning_rate": 9.964059653835251e-07, "loss": 0.4577, "step": 3858 }, { "epoch": 0.06707921222340037, "grad_norm": 1.2532829086488984, "learning_rate": 9.964025955468903e-07, "loss": 0.4976, "step": 3859 }, { "epoch": 0.0670965947609032, "grad_norm": 1.966413579229068, "learning_rate": 9.963992241368871e-07, "loss": 0.293, "step": 3860 }, { "epoch": 0.06711397729840603, "grad_norm": 1.1398434704701146, "learning_rate": 9.963958511535264e-07, "loss": 0.683, "step": 3861 }, { "epoch": 0.06713135983590884, "grad_norm": 1.567860705414807, "learning_rate": 9.963924765968187e-07, "loss": 0.846, "step": 3862 }, { "epoch": 0.06714874237341167, "grad_norm": 3.3064676128491817, "learning_rate": 9.963891004667752e-07, "loss": 0.3912, "step": 3863 }, { "epoch": 0.06716612491091449, "grad_norm": 2.422887476428782, "learning_rate": 9.96385722763406e-07, "loss": 0.5951, "step": 3864 }, { "epoch": 0.06718350744841732, "grad_norm": 1.5596586064928368, "learning_rate": 9.963823434867222e-07, "loss": 0.2191, "step": 3865 }, { "epoch": 0.06720088998592015, "grad_norm": 2.0981107851097973, "learning_rate": 9.963789626367344e-07, "loss": 0.5203, "step": 3866 }, { "epoch": 0.06721827252342297, "grad_norm": 1.7592711028657344, "learning_rate": 9.963755802134534e-07, "loss": 0.3876, "step": 3867 }, { "epoch": 0.0672356550609258, "grad_norm": 1.8791646864427887, "learning_rate": 9.963721962168898e-07, "loss": 0.3537, "step": 3868 }, { "epoch": 0.06725303759842861, "grad_norm": 1.8134377327616842, "learning_rate": 9.963688106470542e-07, "loss": 0.5223, "step": 3869 }, { "epoch": 0.06727042013593144, "grad_norm": 2.094498591613361, "learning_rate": 9.963654235039576e-07, "loss": 0.5315, "step": 3870 }, { "epoch": 0.06728780267343427, "grad_norm": 1.6449113023397492, "learning_rate": 9.963620347876107e-07, "loss": 0.5073, "step": 3871 }, { "epoch": 0.06730518521093709, "grad_norm": 1.4069189115770981, "learning_rate": 9.96358644498024e-07, "loss": 0.3359, "step": 3872 }, { "epoch": 0.06732256774843992, "grad_norm": 1.482436809654685, "learning_rate": 9.963552526352084e-07, "loss": 0.4494, "step": 3873 }, { "epoch": 0.06733995028594274, "grad_norm": 1.844737477854715, "learning_rate": 9.963518591991748e-07, "loss": 0.2764, "step": 3874 }, { "epoch": 0.06735733282344557, "grad_norm": 1.6521164368674095, "learning_rate": 9.963484641899337e-07, "loss": 0.5051, "step": 3875 }, { "epoch": 0.0673747153609484, "grad_norm": 1.8792610647075714, "learning_rate": 9.96345067607496e-07, "loss": 0.4781, "step": 3876 }, { "epoch": 0.06739209789845121, "grad_norm": 2.1117530576131536, "learning_rate": 9.963416694518725e-07, "loss": 0.3996, "step": 3877 }, { "epoch": 0.06740948043595404, "grad_norm": 2.1581287942149308, "learning_rate": 9.963382697230738e-07, "loss": 0.3101, "step": 3878 }, { "epoch": 0.06742686297345686, "grad_norm": 1.578637576190734, "learning_rate": 9.963348684211109e-07, "loss": 0.5625, "step": 3879 }, { "epoch": 0.06744424551095969, "grad_norm": 1.906247142745605, "learning_rate": 9.963314655459943e-07, "loss": 0.2899, "step": 3880 }, { "epoch": 0.06746162804846252, "grad_norm": 2.354934872314676, "learning_rate": 9.96328061097735e-07, "loss": 0.2351, "step": 3881 }, { "epoch": 0.06747901058596534, "grad_norm": 2.0703252775234646, "learning_rate": 9.963246550763438e-07, "loss": 0.4421, "step": 3882 }, { "epoch": 0.06749639312346817, "grad_norm": 2.512741443181316, "learning_rate": 9.963212474818315e-07, "loss": 0.7644, "step": 3883 }, { "epoch": 0.06751377566097098, "grad_norm": 2.7531636336571683, "learning_rate": 9.963178383142088e-07, "loss": 0.4705, "step": 3884 }, { "epoch": 0.06753115819847381, "grad_norm": 2.1365459264577016, "learning_rate": 9.963144275734864e-07, "loss": 0.3785, "step": 3885 }, { "epoch": 0.06754854073597663, "grad_norm": 3.253594189800679, "learning_rate": 9.963110152596755e-07, "loss": 0.3528, "step": 3886 }, { "epoch": 0.06756592327347946, "grad_norm": 2.118170032783313, "learning_rate": 9.963076013727864e-07, "loss": 0.3519, "step": 3887 }, { "epoch": 0.06758330581098229, "grad_norm": 1.9708277605602862, "learning_rate": 9.963041859128302e-07, "loss": 0.2575, "step": 3888 }, { "epoch": 0.06760068834848511, "grad_norm": 1.0807413490407758, "learning_rate": 9.96300768879818e-07, "loss": 0.283, "step": 3889 }, { "epoch": 0.06761807088598794, "grad_norm": 1.8308589380334375, "learning_rate": 9.962973502737602e-07, "loss": 0.4794, "step": 3890 }, { "epoch": 0.06763545342349075, "grad_norm": 1.375951685889157, "learning_rate": 9.962939300946677e-07, "loss": 0.1902, "step": 3891 }, { "epoch": 0.06765283596099358, "grad_norm": 2.363191783609259, "learning_rate": 9.962905083425515e-07, "loss": 0.3585, "step": 3892 }, { "epoch": 0.06767021849849642, "grad_norm": 2.676891857372966, "learning_rate": 9.962870850174224e-07, "loss": 0.8104, "step": 3893 }, { "epoch": 0.06768760103599923, "grad_norm": 3.5496919811560725, "learning_rate": 9.96283660119291e-07, "loss": 0.4379, "step": 3894 }, { "epoch": 0.06770498357350206, "grad_norm": 1.6524097022953832, "learning_rate": 9.962802336481685e-07, "loss": 0.2657, "step": 3895 }, { "epoch": 0.06772236611100488, "grad_norm": 2.1574802046321895, "learning_rate": 9.962768056040657e-07, "loss": 0.358, "step": 3896 }, { "epoch": 0.06773974864850771, "grad_norm": 2.2699641429939574, "learning_rate": 9.962733759869935e-07, "loss": 0.5352, "step": 3897 }, { "epoch": 0.06775713118601054, "grad_norm": 2.5269403894915423, "learning_rate": 9.962699447969624e-07, "loss": 0.5869, "step": 3898 }, { "epoch": 0.06777451372351335, "grad_norm": 1.4624288392256197, "learning_rate": 9.962665120339838e-07, "loss": 0.5557, "step": 3899 }, { "epoch": 0.06779189626101619, "grad_norm": 5.55760188357714, "learning_rate": 9.962630776980681e-07, "loss": 0.3836, "step": 3900 }, { "epoch": 0.067809278798519, "grad_norm": 1.7937226865996743, "learning_rate": 9.962596417892265e-07, "loss": 0.5205, "step": 3901 }, { "epoch": 0.06782666133602183, "grad_norm": 2.923539020351874, "learning_rate": 9.962562043074698e-07, "loss": 0.4577, "step": 3902 }, { "epoch": 0.06784404387352466, "grad_norm": 2.3813517227946845, "learning_rate": 9.962527652528088e-07, "loss": 0.3151, "step": 3903 }, { "epoch": 0.06786142641102748, "grad_norm": 2.644156405186076, "learning_rate": 9.962493246252546e-07, "loss": 0.5701, "step": 3904 }, { "epoch": 0.06787880894853031, "grad_norm": 2.0428348625521884, "learning_rate": 9.96245882424818e-07, "loss": 0.4682, "step": 3905 }, { "epoch": 0.06789619148603313, "grad_norm": 2.8667112469885847, "learning_rate": 9.962424386515099e-07, "loss": 0.554, "step": 3906 }, { "epoch": 0.06791357402353596, "grad_norm": 2.428489419278644, "learning_rate": 9.962389933053412e-07, "loss": 0.6157, "step": 3907 }, { "epoch": 0.06793095656103879, "grad_norm": 1.8757227201799618, "learning_rate": 9.962355463863227e-07, "loss": 0.557, "step": 3908 }, { "epoch": 0.0679483390985416, "grad_norm": 2.4269806834013417, "learning_rate": 9.96232097894466e-07, "loss": 0.6814, "step": 3909 }, { "epoch": 0.06796572163604443, "grad_norm": 1.3709884015755185, "learning_rate": 9.96228647829781e-07, "loss": 0.337, "step": 3910 }, { "epoch": 0.06798310417354725, "grad_norm": 5.900954425313612, "learning_rate": 9.962251961922792e-07, "loss": 0.5223, "step": 3911 }, { "epoch": 0.06800048671105008, "grad_norm": 2.5740574847820796, "learning_rate": 9.962217429819714e-07, "loss": 0.5682, "step": 3912 }, { "epoch": 0.06801786924855291, "grad_norm": 1.6451406355910807, "learning_rate": 9.962182881988689e-07, "loss": 0.57, "step": 3913 }, { "epoch": 0.06803525178605573, "grad_norm": 1.3603400829316796, "learning_rate": 9.962148318429822e-07, "loss": 0.3062, "step": 3914 }, { "epoch": 0.06805263432355856, "grad_norm": 4.050929991340252, "learning_rate": 9.962113739143225e-07, "loss": 0.3984, "step": 3915 }, { "epoch": 0.06807001686106137, "grad_norm": 2.6058768192197306, "learning_rate": 9.962079144129006e-07, "loss": 0.3669, "step": 3916 }, { "epoch": 0.0680873993985642, "grad_norm": 3.6220412929484342, "learning_rate": 9.962044533387277e-07, "loss": 0.4798, "step": 3917 }, { "epoch": 0.06810478193606703, "grad_norm": 2.286147643941993, "learning_rate": 9.962009906918145e-07, "loss": 0.5191, "step": 3918 }, { "epoch": 0.06812216447356985, "grad_norm": 1.2812184762873382, "learning_rate": 9.96197526472172e-07, "loss": 0.2301, "step": 3919 }, { "epoch": 0.06813954701107268, "grad_norm": 1.7062968377415286, "learning_rate": 9.961940606798114e-07, "loss": 0.3911, "step": 3920 }, { "epoch": 0.0681569295485755, "grad_norm": 2.9314473400473853, "learning_rate": 9.961905933147436e-07, "loss": 0.3802, "step": 3921 }, { "epoch": 0.06817431208607833, "grad_norm": 2.0428378263233453, "learning_rate": 9.961871243769794e-07, "loss": 0.1624, "step": 3922 }, { "epoch": 0.06819169462358116, "grad_norm": 2.0084618670732937, "learning_rate": 9.961836538665301e-07, "loss": 0.594, "step": 3923 }, { "epoch": 0.06820907716108397, "grad_norm": 1.2954787375897774, "learning_rate": 9.961801817834065e-07, "loss": 0.505, "step": 3924 }, { "epoch": 0.0682264596985868, "grad_norm": 2.0805861555294807, "learning_rate": 9.961767081276196e-07, "loss": 0.3843, "step": 3925 }, { "epoch": 0.06824384223608962, "grad_norm": 2.2870447200532515, "learning_rate": 9.961732328991805e-07, "loss": 0.5574, "step": 3926 }, { "epoch": 0.06826122477359245, "grad_norm": 2.281164390639522, "learning_rate": 9.961697560981003e-07, "loss": 0.6154, "step": 3927 }, { "epoch": 0.06827860731109528, "grad_norm": 3.1809295749315503, "learning_rate": 9.961662777243896e-07, "loss": 0.3485, "step": 3928 }, { "epoch": 0.0682959898485981, "grad_norm": 1.6888211616059043, "learning_rate": 9.9616279777806e-07, "loss": 0.5039, "step": 3929 }, { "epoch": 0.06831337238610093, "grad_norm": 2.539983922235976, "learning_rate": 9.96159316259122e-07, "loss": 0.5378, "step": 3930 }, { "epoch": 0.06833075492360374, "grad_norm": 1.8083085601657203, "learning_rate": 9.96155833167587e-07, "loss": 0.3551, "step": 3931 }, { "epoch": 0.06834813746110657, "grad_norm": 1.7751070972464915, "learning_rate": 9.96152348503466e-07, "loss": 0.3614, "step": 3932 }, { "epoch": 0.0683655199986094, "grad_norm": 1.6344418377092467, "learning_rate": 9.9614886226677e-07, "loss": 0.3411, "step": 3933 }, { "epoch": 0.06838290253611222, "grad_norm": 2.395332629336235, "learning_rate": 9.9614537445751e-07, "loss": 0.4421, "step": 3934 }, { "epoch": 0.06840028507361505, "grad_norm": 2.078016537044277, "learning_rate": 9.961418850756969e-07, "loss": 0.3095, "step": 3935 }, { "epoch": 0.06841766761111787, "grad_norm": 6.659150092781967, "learning_rate": 9.96138394121342e-07, "loss": 0.5884, "step": 3936 }, { "epoch": 0.0684350501486207, "grad_norm": 2.0254424688017627, "learning_rate": 9.961349015944564e-07, "loss": 0.4211, "step": 3937 }, { "epoch": 0.06845243268612353, "grad_norm": 3.2087601728788804, "learning_rate": 9.961314074950511e-07, "loss": 0.3336, "step": 3938 }, { "epoch": 0.06846981522362634, "grad_norm": 1.9927645943393377, "learning_rate": 9.961279118231369e-07, "loss": 0.4526, "step": 3939 }, { "epoch": 0.06848719776112917, "grad_norm": 1.8181920237274913, "learning_rate": 9.961244145787252e-07, "loss": 0.859, "step": 3940 }, { "epoch": 0.06850458029863199, "grad_norm": 2.05566029353982, "learning_rate": 9.961209157618272e-07, "loss": 0.3272, "step": 3941 }, { "epoch": 0.06852196283613482, "grad_norm": 2.3557974492004816, "learning_rate": 9.961174153724536e-07, "loss": 0.4294, "step": 3942 }, { "epoch": 0.06853934537363765, "grad_norm": 1.5296622173501304, "learning_rate": 9.961139134106158e-07, "loss": 0.4804, "step": 3943 }, { "epoch": 0.06855672791114047, "grad_norm": 2.0277419683608655, "learning_rate": 9.961104098763249e-07, "loss": 0.4329, "step": 3944 }, { "epoch": 0.0685741104486433, "grad_norm": 1.912077860699543, "learning_rate": 9.961069047695916e-07, "loss": 0.4162, "step": 3945 }, { "epoch": 0.06859149298614611, "grad_norm": 3.1091270146236685, "learning_rate": 9.961033980904275e-07, "loss": 0.5927, "step": 3946 }, { "epoch": 0.06860887552364894, "grad_norm": 2.4290292665857858, "learning_rate": 9.960998898388436e-07, "loss": 0.7217, "step": 3947 }, { "epoch": 0.06862625806115177, "grad_norm": 1.6619947969744329, "learning_rate": 9.960963800148507e-07, "loss": 0.4757, "step": 3948 }, { "epoch": 0.06864364059865459, "grad_norm": 3.7079886678743916, "learning_rate": 9.960928686184603e-07, "loss": 0.3993, "step": 3949 }, { "epoch": 0.06866102313615742, "grad_norm": 6.498274757306823, "learning_rate": 9.960893556496833e-07, "loss": 0.3921, "step": 3950 }, { "epoch": 0.06867840567366024, "grad_norm": 1.8548981823359647, "learning_rate": 9.96085841108531e-07, "loss": 0.4091, "step": 3951 }, { "epoch": 0.06869578821116307, "grad_norm": 2.547438603970739, "learning_rate": 9.960823249950146e-07, "loss": 0.3823, "step": 3952 }, { "epoch": 0.0687131707486659, "grad_norm": 2.330338906441215, "learning_rate": 9.960788073091448e-07, "loss": 0.4477, "step": 3953 }, { "epoch": 0.06873055328616871, "grad_norm": 1.9425478637596143, "learning_rate": 9.960752880509334e-07, "loss": 0.4512, "step": 3954 }, { "epoch": 0.06874793582367154, "grad_norm": 2.7123552679999783, "learning_rate": 9.960717672203908e-07, "loss": 0.3971, "step": 3955 }, { "epoch": 0.06876531836117436, "grad_norm": 2.434146711566418, "learning_rate": 9.96068244817529e-07, "loss": 0.3111, "step": 3956 }, { "epoch": 0.06878270089867719, "grad_norm": 1.7119656378808654, "learning_rate": 9.960647208423584e-07, "loss": 0.7649, "step": 3957 }, { "epoch": 0.06880008343618, "grad_norm": 1.9241081448668749, "learning_rate": 9.960611952948907e-07, "loss": 0.3868, "step": 3958 }, { "epoch": 0.06881746597368284, "grad_norm": 1.4781361699525937, "learning_rate": 9.960576681751368e-07, "loss": 0.403, "step": 3959 }, { "epoch": 0.06883484851118567, "grad_norm": 3.3268186244461, "learning_rate": 9.96054139483108e-07, "loss": 0.5643, "step": 3960 }, { "epoch": 0.06885223104868848, "grad_norm": 2.16243398090478, "learning_rate": 9.960506092188157e-07, "loss": 0.6251, "step": 3961 }, { "epoch": 0.06886961358619131, "grad_norm": 2.0791021801088667, "learning_rate": 9.960470773822705e-07, "loss": 0.5364, "step": 3962 }, { "epoch": 0.06888699612369413, "grad_norm": 1.5612053695155608, "learning_rate": 9.96043543973484e-07, "loss": 0.4951, "step": 3963 }, { "epoch": 0.06890437866119696, "grad_norm": 1.835475518218917, "learning_rate": 9.960400089924674e-07, "loss": 0.6204, "step": 3964 }, { "epoch": 0.06892176119869979, "grad_norm": 2.075299801760873, "learning_rate": 9.96036472439232e-07, "loss": 0.401, "step": 3965 }, { "epoch": 0.06893914373620261, "grad_norm": 2.198569398131538, "learning_rate": 9.960329343137886e-07, "loss": 0.4996, "step": 3966 }, { "epoch": 0.06895652627370544, "grad_norm": 1.5001660064950213, "learning_rate": 9.960293946161485e-07, "loss": 0.3528, "step": 3967 }, { "epoch": 0.06897390881120825, "grad_norm": 2.4424182641209575, "learning_rate": 9.960258533463233e-07, "loss": 0.5016, "step": 3968 }, { "epoch": 0.06899129134871108, "grad_norm": 1.7360676211633888, "learning_rate": 9.96022310504324e-07, "loss": 0.328, "step": 3969 }, { "epoch": 0.06900867388621391, "grad_norm": 2.0883345060964738, "learning_rate": 9.960187660901618e-07, "loss": 0.5756, "step": 3970 }, { "epoch": 0.06902605642371673, "grad_norm": 2.429461327118108, "learning_rate": 9.96015220103848e-07, "loss": 0.4984, "step": 3971 }, { "epoch": 0.06904343896121956, "grad_norm": 1.9938093464897582, "learning_rate": 9.960116725453938e-07, "loss": 0.3141, "step": 3972 }, { "epoch": 0.06906082149872238, "grad_norm": 1.9261294387796934, "learning_rate": 9.960081234148102e-07, "loss": 0.4053, "step": 3973 }, { "epoch": 0.06907820403622521, "grad_norm": 2.4643080172117964, "learning_rate": 9.960045727121092e-07, "loss": 0.5301, "step": 3974 }, { "epoch": 0.06909558657372804, "grad_norm": 1.5552726414024058, "learning_rate": 9.96001020437301e-07, "loss": 0.2214, "step": 3975 }, { "epoch": 0.06911296911123085, "grad_norm": 1.9268008783539856, "learning_rate": 9.959974665903978e-07, "loss": 0.4504, "step": 3976 }, { "epoch": 0.06913035164873368, "grad_norm": 3.784531308236994, "learning_rate": 9.959939111714104e-07, "loss": 0.9243, "step": 3977 }, { "epoch": 0.0691477341862365, "grad_norm": 1.4160445506689106, "learning_rate": 9.9599035418035e-07, "loss": 0.3617, "step": 3978 }, { "epoch": 0.06916511672373933, "grad_norm": 1.9533449403970113, "learning_rate": 9.959867956172282e-07, "loss": 0.4957, "step": 3979 }, { "epoch": 0.06918249926124216, "grad_norm": 2.5188767986975287, "learning_rate": 9.95983235482056e-07, "loss": 0.7666, "step": 3980 }, { "epoch": 0.06919988179874498, "grad_norm": 1.8532003378952835, "learning_rate": 9.959796737748447e-07, "loss": 0.3743, "step": 3981 }, { "epoch": 0.06921726433624781, "grad_norm": 2.5873217216186717, "learning_rate": 9.959761104956058e-07, "loss": 0.4888, "step": 3982 }, { "epoch": 0.06923464687375062, "grad_norm": 1.4758393431995507, "learning_rate": 9.959725456443504e-07, "loss": 0.26, "step": 3983 }, { "epoch": 0.06925202941125345, "grad_norm": 2.8288084334243067, "learning_rate": 9.959689792210899e-07, "loss": 0.4878, "step": 3984 }, { "epoch": 0.06926941194875628, "grad_norm": 2.2256804230649507, "learning_rate": 9.959654112258354e-07, "loss": 0.8286, "step": 3985 }, { "epoch": 0.0692867944862591, "grad_norm": 2.993972334097112, "learning_rate": 9.959618416585984e-07, "loss": 0.4175, "step": 3986 }, { "epoch": 0.06930417702376193, "grad_norm": 1.7694047797629715, "learning_rate": 9.959582705193905e-07, "loss": 0.2249, "step": 3987 }, { "epoch": 0.06932155956126475, "grad_norm": 2.5282985832725906, "learning_rate": 9.959546978082225e-07, "loss": 0.6174, "step": 3988 }, { "epoch": 0.06933894209876758, "grad_norm": 2.2758657698523876, "learning_rate": 9.95951123525106e-07, "loss": 0.4118, "step": 3989 }, { "epoch": 0.06935632463627041, "grad_norm": 2.4803130804951263, "learning_rate": 9.95947547670052e-07, "loss": 0.506, "step": 3990 }, { "epoch": 0.06937370717377322, "grad_norm": 1.4519274395422912, "learning_rate": 9.959439702430723e-07, "loss": 0.4575, "step": 3991 }, { "epoch": 0.06939108971127605, "grad_norm": 1.3831617836472636, "learning_rate": 9.959403912441782e-07, "loss": 0.474, "step": 3992 }, { "epoch": 0.06940847224877887, "grad_norm": 1.8129359740916058, "learning_rate": 9.959368106733806e-07, "loss": 0.4643, "step": 3993 }, { "epoch": 0.0694258547862817, "grad_norm": 1.6890337161112696, "learning_rate": 9.959332285306912e-07, "loss": 0.496, "step": 3994 }, { "epoch": 0.06944323732378453, "grad_norm": 3.4585848693554038, "learning_rate": 9.959296448161214e-07, "loss": 0.7101, "step": 3995 }, { "epoch": 0.06946061986128735, "grad_norm": 4.462930694846758, "learning_rate": 9.959260595296824e-07, "loss": 0.5201, "step": 3996 }, { "epoch": 0.06947800239879018, "grad_norm": 1.960798507360578, "learning_rate": 9.959224726713855e-07, "loss": 0.5366, "step": 3997 }, { "epoch": 0.069495384936293, "grad_norm": 1.9259022177688032, "learning_rate": 9.959188842412421e-07, "loss": 0.3267, "step": 3998 }, { "epoch": 0.06951276747379583, "grad_norm": 2.711374162161007, "learning_rate": 9.959152942392638e-07, "loss": 0.5387, "step": 3999 }, { "epoch": 0.06953015001129866, "grad_norm": 3.8898333075813847, "learning_rate": 9.959117026654619e-07, "loss": 0.4065, "step": 4000 }, { "epoch": 0.06954753254880147, "grad_norm": 4.020570969037126, "learning_rate": 9.959081095198474e-07, "loss": 0.3725, "step": 4001 }, { "epoch": 0.0695649150863043, "grad_norm": 1.918571530143741, "learning_rate": 9.959045148024321e-07, "loss": 0.4552, "step": 4002 }, { "epoch": 0.06958229762380712, "grad_norm": 1.4162520513249168, "learning_rate": 9.959009185132274e-07, "loss": 0.5426, "step": 4003 }, { "epoch": 0.06959968016130995, "grad_norm": 1.5360261376372084, "learning_rate": 9.958973206522447e-07, "loss": 0.5014, "step": 4004 }, { "epoch": 0.06961706269881278, "grad_norm": 2.8352394715182787, "learning_rate": 9.95893721219495e-07, "loss": 0.5558, "step": 4005 }, { "epoch": 0.0696344452363156, "grad_norm": 1.658068258525216, "learning_rate": 9.958901202149901e-07, "loss": 0.3386, "step": 4006 }, { "epoch": 0.06965182777381843, "grad_norm": 3.0710550324847152, "learning_rate": 9.958865176387414e-07, "loss": 0.3397, "step": 4007 }, { "epoch": 0.06966921031132124, "grad_norm": 2.7383317019976223, "learning_rate": 9.958829134907603e-07, "loss": 0.2837, "step": 4008 }, { "epoch": 0.06968659284882407, "grad_norm": 2.6353891324828838, "learning_rate": 9.95879307771058e-07, "loss": 0.5112, "step": 4009 }, { "epoch": 0.0697039753863269, "grad_norm": 1.944927796392921, "learning_rate": 9.958757004796462e-07, "loss": 0.4205, "step": 4010 }, { "epoch": 0.06972135792382972, "grad_norm": 2.0226214637770465, "learning_rate": 9.95872091616536e-07, "loss": 0.478, "step": 4011 }, { "epoch": 0.06973874046133255, "grad_norm": 2.3668202732438797, "learning_rate": 9.958684811817394e-07, "loss": 0.4045, "step": 4012 }, { "epoch": 0.06975612299883537, "grad_norm": 1.284413627787948, "learning_rate": 9.958648691752672e-07, "loss": 0.2179, "step": 4013 }, { "epoch": 0.0697735055363382, "grad_norm": 1.6455766521363533, "learning_rate": 9.958612555971311e-07, "loss": 0.4782, "step": 4014 }, { "epoch": 0.06979088807384103, "grad_norm": 2.1752501758729585, "learning_rate": 9.958576404473428e-07, "loss": 0.462, "step": 4015 }, { "epoch": 0.06980827061134384, "grad_norm": 1.8653860868375582, "learning_rate": 9.958540237259134e-07, "loss": 0.5028, "step": 4016 }, { "epoch": 0.06982565314884667, "grad_norm": 1.9047261955384562, "learning_rate": 9.958504054328546e-07, "loss": 0.3107, "step": 4017 }, { "epoch": 0.06984303568634949, "grad_norm": 1.548892214444809, "learning_rate": 9.958467855681779e-07, "loss": 0.2009, "step": 4018 }, { "epoch": 0.06986041822385232, "grad_norm": 2.456131020686538, "learning_rate": 9.958431641318944e-07, "loss": 0.5215, "step": 4019 }, { "epoch": 0.06987780076135515, "grad_norm": 2.795478160219678, "learning_rate": 9.95839541124016e-07, "loss": 0.4994, "step": 4020 }, { "epoch": 0.06989518329885797, "grad_norm": 1.8886324697149468, "learning_rate": 9.958359165445542e-07, "loss": 0.3916, "step": 4021 }, { "epoch": 0.0699125658363608, "grad_norm": 1.9634761820644344, "learning_rate": 9.958322903935202e-07, "loss": 0.652, "step": 4022 }, { "epoch": 0.06992994837386361, "grad_norm": 2.8279199484925313, "learning_rate": 9.958286626709255e-07, "loss": 0.3408, "step": 4023 }, { "epoch": 0.06994733091136644, "grad_norm": 2.25540812375847, "learning_rate": 9.958250333767818e-07, "loss": 0.4348, "step": 4024 }, { "epoch": 0.06996471344886926, "grad_norm": 1.851320990307235, "learning_rate": 9.958214025111003e-07, "loss": 0.5704, "step": 4025 }, { "epoch": 0.06998209598637209, "grad_norm": 1.3197962892388435, "learning_rate": 9.95817770073893e-07, "loss": 0.4696, "step": 4026 }, { "epoch": 0.06999947852387492, "grad_norm": 2.8873254801627652, "learning_rate": 9.95814136065171e-07, "loss": 0.4623, "step": 4027 }, { "epoch": 0.07001686106137774, "grad_norm": 1.251093367211183, "learning_rate": 9.958105004849459e-07, "loss": 0.3039, "step": 4028 }, { "epoch": 0.07003424359888057, "grad_norm": 1.9739723888169665, "learning_rate": 9.958068633332292e-07, "loss": 0.6929, "step": 4029 }, { "epoch": 0.07005162613638338, "grad_norm": 1.3747256247371245, "learning_rate": 9.958032246100328e-07, "loss": 0.5662, "step": 4030 }, { "epoch": 0.07006900867388621, "grad_norm": 1.2059698922287527, "learning_rate": 9.957995843153678e-07, "loss": 0.3737, "step": 4031 }, { "epoch": 0.07008639121138904, "grad_norm": 2.5886275170585, "learning_rate": 9.957959424492457e-07, "loss": 0.427, "step": 4032 }, { "epoch": 0.07010377374889186, "grad_norm": 2.643092387935558, "learning_rate": 9.957922990116784e-07, "loss": 0.4153, "step": 4033 }, { "epoch": 0.07012115628639469, "grad_norm": 2.807572777740023, "learning_rate": 9.957886540026772e-07, "loss": 0.3718, "step": 4034 }, { "epoch": 0.0701385388238975, "grad_norm": 1.5676239609698899, "learning_rate": 9.957850074222538e-07, "loss": 0.4069, "step": 4035 }, { "epoch": 0.07015592136140034, "grad_norm": 1.7926455715100238, "learning_rate": 9.957813592704194e-07, "loss": 0.4309, "step": 4036 }, { "epoch": 0.07017330389890317, "grad_norm": 2.1437184274538326, "learning_rate": 9.95777709547186e-07, "loss": 0.7385, "step": 4037 }, { "epoch": 0.07019068643640598, "grad_norm": 2.2728121033138966, "learning_rate": 9.95774058252565e-07, "loss": 0.6142, "step": 4038 }, { "epoch": 0.07020806897390881, "grad_norm": 2.448435319239528, "learning_rate": 9.95770405386568e-07, "loss": 0.5284, "step": 4039 }, { "epoch": 0.07022545151141163, "grad_norm": 1.6236691861146137, "learning_rate": 9.957667509492067e-07, "loss": 0.2688, "step": 4040 }, { "epoch": 0.07024283404891446, "grad_norm": 1.6971501116296555, "learning_rate": 9.957630949404923e-07, "loss": 0.3562, "step": 4041 }, { "epoch": 0.07026021658641729, "grad_norm": 2.1206160906463416, "learning_rate": 9.957594373604369e-07, "loss": 0.6828, "step": 4042 }, { "epoch": 0.0702775991239201, "grad_norm": 9.246346542580055, "learning_rate": 9.957557782090515e-07, "loss": 0.5219, "step": 4043 }, { "epoch": 0.07029498166142294, "grad_norm": 1.6462230718782331, "learning_rate": 9.957521174863482e-07, "loss": 0.4139, "step": 4044 }, { "epoch": 0.07031236419892575, "grad_norm": 2.3328480419724418, "learning_rate": 9.957484551923383e-07, "loss": 0.3518, "step": 4045 }, { "epoch": 0.07032974673642858, "grad_norm": 2.2408239165122743, "learning_rate": 9.957447913270335e-07, "loss": 0.4451, "step": 4046 }, { "epoch": 0.07034712927393141, "grad_norm": 1.7764048284328107, "learning_rate": 9.957411258904456e-07, "loss": 0.331, "step": 4047 }, { "epoch": 0.07036451181143423, "grad_norm": 2.6657613407075313, "learning_rate": 9.95737458882586e-07, "loss": 0.347, "step": 4048 }, { "epoch": 0.07038189434893706, "grad_norm": 2.0473042067933602, "learning_rate": 9.957337903034663e-07, "loss": 0.3223, "step": 4049 }, { "epoch": 0.07039927688643988, "grad_norm": 1.8827835683404357, "learning_rate": 9.957301201530983e-07, "loss": 0.4796, "step": 4050 }, { "epoch": 0.0704166594239427, "grad_norm": 1.644395801993902, "learning_rate": 9.957264484314935e-07, "loss": 0.4192, "step": 4051 }, { "epoch": 0.07043404196144554, "grad_norm": 1.458235154991789, "learning_rate": 9.957227751386635e-07, "loss": 0.4338, "step": 4052 }, { "epoch": 0.07045142449894835, "grad_norm": 2.3621107404774158, "learning_rate": 9.9571910027462e-07, "loss": 0.5717, "step": 4053 }, { "epoch": 0.07046880703645118, "grad_norm": 2.3123044764062604, "learning_rate": 9.95715423839375e-07, "loss": 1.0792, "step": 4054 }, { "epoch": 0.070486189573954, "grad_norm": 2.9394672925610754, "learning_rate": 9.957117458329396e-07, "loss": 0.249, "step": 4055 }, { "epoch": 0.07050357211145683, "grad_norm": 2.950333281707726, "learning_rate": 9.957080662553255e-07, "loss": 0.3852, "step": 4056 }, { "epoch": 0.07052095464895966, "grad_norm": 3.1549025271095905, "learning_rate": 9.957043851065446e-07, "loss": 0.3209, "step": 4057 }, { "epoch": 0.07053833718646248, "grad_norm": 2.102370806025002, "learning_rate": 9.957007023866088e-07, "loss": 0.7516, "step": 4058 }, { "epoch": 0.07055571972396531, "grad_norm": 2.041303148702543, "learning_rate": 9.956970180955293e-07, "loss": 0.2849, "step": 4059 }, { "epoch": 0.07057310226146812, "grad_norm": 1.8608009234675538, "learning_rate": 9.956933322333178e-07, "loss": 0.3001, "step": 4060 }, { "epoch": 0.07059048479897095, "grad_norm": 4.865562959881219, "learning_rate": 9.956896447999864e-07, "loss": 0.4241, "step": 4061 }, { "epoch": 0.07060786733647378, "grad_norm": 1.6240129092291338, "learning_rate": 9.956859557955463e-07, "loss": 0.3174, "step": 4062 }, { "epoch": 0.0706252498739766, "grad_norm": 2.283523504983644, "learning_rate": 9.956822652200096e-07, "loss": 0.434, "step": 4063 }, { "epoch": 0.07064263241147943, "grad_norm": 1.3319722413107926, "learning_rate": 9.956785730733877e-07, "loss": 0.5107, "step": 4064 }, { "epoch": 0.07066001494898225, "grad_norm": 2.265574749547911, "learning_rate": 9.956748793556925e-07, "loss": 0.6557, "step": 4065 }, { "epoch": 0.07067739748648508, "grad_norm": 1.4225662991510843, "learning_rate": 9.956711840669356e-07, "loss": 0.3788, "step": 4066 }, { "epoch": 0.07069478002398791, "grad_norm": 1.5423810389049404, "learning_rate": 9.956674872071287e-07, "loss": 0.3713, "step": 4067 }, { "epoch": 0.07071216256149072, "grad_norm": 3.54473943657612, "learning_rate": 9.956637887762834e-07, "loss": 0.4215, "step": 4068 }, { "epoch": 0.07072954509899355, "grad_norm": 2.015095092336053, "learning_rate": 9.95660088774412e-07, "loss": 0.6934, "step": 4069 }, { "epoch": 0.07074692763649637, "grad_norm": 1.4767112748022215, "learning_rate": 9.956563872015256e-07, "loss": 0.348, "step": 4070 }, { "epoch": 0.0707643101739992, "grad_norm": 2.0353161505247592, "learning_rate": 9.95652684057636e-07, "loss": 0.3368, "step": 4071 }, { "epoch": 0.07078169271150203, "grad_norm": 1.8705074864707878, "learning_rate": 9.956489793427552e-07, "loss": 0.4854, "step": 4072 }, { "epoch": 0.07079907524900485, "grad_norm": 1.709144213023836, "learning_rate": 9.956452730568948e-07, "loss": 0.3389, "step": 4073 }, { "epoch": 0.07081645778650768, "grad_norm": 5.371352171458477, "learning_rate": 9.956415652000666e-07, "loss": 0.6104, "step": 4074 }, { "epoch": 0.0708338403240105, "grad_norm": 2.57326675158703, "learning_rate": 9.956378557722821e-07, "loss": 0.3859, "step": 4075 }, { "epoch": 0.07085122286151332, "grad_norm": 3.7826635249407166, "learning_rate": 9.956341447735537e-07, "loss": 0.5149, "step": 4076 }, { "epoch": 0.07086860539901615, "grad_norm": 2.4405654890207895, "learning_rate": 9.956304322038924e-07, "loss": 0.3227, "step": 4077 }, { "epoch": 0.07088598793651897, "grad_norm": 1.9532048579529215, "learning_rate": 9.956267180633104e-07, "loss": 0.3338, "step": 4078 }, { "epoch": 0.0709033704740218, "grad_norm": 6.280837375189729, "learning_rate": 9.956230023518194e-07, "loss": 0.4942, "step": 4079 }, { "epoch": 0.07092075301152462, "grad_norm": 4.187492934303186, "learning_rate": 9.95619285069431e-07, "loss": 0.5382, "step": 4080 }, { "epoch": 0.07093813554902745, "grad_norm": 1.388351650493331, "learning_rate": 9.956155662161575e-07, "loss": 0.2235, "step": 4081 }, { "epoch": 0.07095551808653028, "grad_norm": 1.5188058308918095, "learning_rate": 9.9561184579201e-07, "loss": 0.6939, "step": 4082 }, { "epoch": 0.0709729006240331, "grad_norm": 1.6066631487297904, "learning_rate": 9.956081237970006e-07, "loss": 0.2225, "step": 4083 }, { "epoch": 0.07099028316153592, "grad_norm": 1.9345517603490614, "learning_rate": 9.956044002311414e-07, "loss": 0.5842, "step": 4084 }, { "epoch": 0.07100766569903874, "grad_norm": 1.9567904834657248, "learning_rate": 9.956006750944436e-07, "loss": 0.2796, "step": 4085 }, { "epoch": 0.07102504823654157, "grad_norm": 2.7232686653827867, "learning_rate": 9.955969483869196e-07, "loss": 0.4167, "step": 4086 }, { "epoch": 0.0710424307740444, "grad_norm": 3.044158116236512, "learning_rate": 9.955932201085807e-07, "loss": 0.3968, "step": 4087 }, { "epoch": 0.07105981331154722, "grad_norm": 2.041169718944598, "learning_rate": 9.95589490259439e-07, "loss": 0.42, "step": 4088 }, { "epoch": 0.07107719584905005, "grad_norm": 2.7627198093614056, "learning_rate": 9.955857588395063e-07, "loss": 0.4477, "step": 4089 }, { "epoch": 0.07109457838655286, "grad_norm": 1.4602778336390219, "learning_rate": 9.955820258487943e-07, "loss": 0.4251, "step": 4090 }, { "epoch": 0.0711119609240557, "grad_norm": 3.23269679429081, "learning_rate": 9.955782912873152e-07, "loss": 0.348, "step": 4091 }, { "epoch": 0.07112934346155853, "grad_norm": 1.3956452084031794, "learning_rate": 9.955745551550805e-07, "loss": 0.24, "step": 4092 }, { "epoch": 0.07114672599906134, "grad_norm": 1.2942340559090426, "learning_rate": 9.955708174521019e-07, "loss": 0.2932, "step": 4093 }, { "epoch": 0.07116410853656417, "grad_norm": 1.3696195613348063, "learning_rate": 9.955670781783915e-07, "loss": 1.007, "step": 4094 }, { "epoch": 0.07118149107406699, "grad_norm": 6.119536459055046, "learning_rate": 9.95563337333961e-07, "loss": 0.5598, "step": 4095 }, { "epoch": 0.07119887361156982, "grad_norm": 1.7367283374792453, "learning_rate": 9.955595949188226e-07, "loss": 0.4297, "step": 4096 }, { "epoch": 0.07121625614907263, "grad_norm": 1.3871748791462593, "learning_rate": 9.95555850932988e-07, "loss": 0.2577, "step": 4097 }, { "epoch": 0.07123363868657547, "grad_norm": 2.892393033744381, "learning_rate": 9.95552105376469e-07, "loss": 0.5201, "step": 4098 }, { "epoch": 0.0712510212240783, "grad_norm": 1.3355913775783164, "learning_rate": 9.955483582492773e-07, "loss": 0.2818, "step": 4099 }, { "epoch": 0.07126840376158111, "grad_norm": 2.5320723865112824, "learning_rate": 9.955446095514248e-07, "loss": 0.6546, "step": 4100 }, { "epoch": 0.07128578629908394, "grad_norm": 2.4535973989925086, "learning_rate": 9.955408592829235e-07, "loss": 0.322, "step": 4101 }, { "epoch": 0.07130316883658676, "grad_norm": 2.537490584055512, "learning_rate": 9.955371074437856e-07, "loss": 0.6202, "step": 4102 }, { "epoch": 0.07132055137408959, "grad_norm": 1.947333662693047, "learning_rate": 9.955333540340227e-07, "loss": 0.5476, "step": 4103 }, { "epoch": 0.07133793391159242, "grad_norm": 1.9967194717659054, "learning_rate": 9.955295990536466e-07, "loss": 0.2479, "step": 4104 }, { "epoch": 0.07135531644909524, "grad_norm": 2.505184143693094, "learning_rate": 9.955258425026691e-07, "loss": 0.3261, "step": 4105 }, { "epoch": 0.07137269898659807, "grad_norm": 2.3064821972255545, "learning_rate": 9.955220843811025e-07, "loss": 0.4285, "step": 4106 }, { "epoch": 0.07139008152410088, "grad_norm": 2.25900421112079, "learning_rate": 9.955183246889586e-07, "loss": 0.4401, "step": 4107 }, { "epoch": 0.07140746406160371, "grad_norm": 1.9924501863395858, "learning_rate": 9.955145634262492e-07, "loss": 0.4412, "step": 4108 }, { "epoch": 0.07142484659910654, "grad_norm": 1.803807095469772, "learning_rate": 9.95510800592986e-07, "loss": 0.7736, "step": 4109 }, { "epoch": 0.07144222913660936, "grad_norm": 2.292158090017227, "learning_rate": 9.955070361891816e-07, "loss": 0.5666, "step": 4110 }, { "epoch": 0.07145961167411219, "grad_norm": 1.6560630540020285, "learning_rate": 9.955032702148472e-07, "loss": 0.3307, "step": 4111 }, { "epoch": 0.071476994211615, "grad_norm": 2.0624016625786976, "learning_rate": 9.954995026699952e-07, "loss": 0.4806, "step": 4112 }, { "epoch": 0.07149437674911784, "grad_norm": 1.7110200588235474, "learning_rate": 9.954957335546373e-07, "loss": 0.3, "step": 4113 }, { "epoch": 0.07151175928662067, "grad_norm": 6.264256932342326, "learning_rate": 9.954919628687855e-07, "loss": 0.4205, "step": 4114 }, { "epoch": 0.07152914182412348, "grad_norm": 1.7635525065897408, "learning_rate": 9.954881906124518e-07, "loss": 0.5502, "step": 4115 }, { "epoch": 0.07154652436162631, "grad_norm": 1.8752956982590772, "learning_rate": 9.954844167856483e-07, "loss": 0.5061, "step": 4116 }, { "epoch": 0.07156390689912913, "grad_norm": 2.2059691544892135, "learning_rate": 9.954806413883866e-07, "loss": 0.424, "step": 4117 }, { "epoch": 0.07158128943663196, "grad_norm": 2.1605964621069336, "learning_rate": 9.95476864420679e-07, "loss": 0.3159, "step": 4118 }, { "epoch": 0.07159867197413479, "grad_norm": 1.5667842694406322, "learning_rate": 9.954730858825373e-07, "loss": 0.578, "step": 4119 }, { "epoch": 0.0716160545116376, "grad_norm": 1.661146794224746, "learning_rate": 9.954693057739736e-07, "loss": 0.376, "step": 4120 }, { "epoch": 0.07163343704914044, "grad_norm": 1.7572413711654622, "learning_rate": 9.954655240949995e-07, "loss": 0.2621, "step": 4121 }, { "epoch": 0.07165081958664325, "grad_norm": 3.281623467301483, "learning_rate": 9.954617408456277e-07, "loss": 0.2704, "step": 4122 }, { "epoch": 0.07166820212414608, "grad_norm": 1.786435774927574, "learning_rate": 9.954579560258696e-07, "loss": 0.3994, "step": 4123 }, { "epoch": 0.07168558466164891, "grad_norm": 2.315648212163924, "learning_rate": 9.954541696357373e-07, "loss": 0.3865, "step": 4124 }, { "epoch": 0.07170296719915173, "grad_norm": 1.259413554711509, "learning_rate": 9.95450381675243e-07, "loss": 0.5033, "step": 4125 }, { "epoch": 0.07172034973665456, "grad_norm": 1.8267523323244288, "learning_rate": 9.954465921443984e-07, "loss": 0.4816, "step": 4126 }, { "epoch": 0.07173773227415738, "grad_norm": 1.688661847163159, "learning_rate": 9.954428010432158e-07, "loss": 0.437, "step": 4127 }, { "epoch": 0.0717551148116602, "grad_norm": 1.3539632502626235, "learning_rate": 9.954390083717071e-07, "loss": 0.3626, "step": 4128 }, { "epoch": 0.07177249734916304, "grad_norm": 2.513465143637571, "learning_rate": 9.954352141298845e-07, "loss": 0.3314, "step": 4129 }, { "epoch": 0.07178987988666585, "grad_norm": 2.669030436151194, "learning_rate": 9.954314183177598e-07, "loss": 0.5242, "step": 4130 }, { "epoch": 0.07180726242416868, "grad_norm": 6.0005855259219185, "learning_rate": 9.954276209353449e-07, "loss": 0.7164, "step": 4131 }, { "epoch": 0.0718246449616715, "grad_norm": 1.6585097594583114, "learning_rate": 9.954238219826522e-07, "loss": 0.4482, "step": 4132 }, { "epoch": 0.07184202749917433, "grad_norm": 1.6750207092211797, "learning_rate": 9.954200214596936e-07, "loss": 0.3051, "step": 4133 }, { "epoch": 0.07185941003667716, "grad_norm": 1.928537546071789, "learning_rate": 9.954162193664808e-07, "loss": 0.5467, "step": 4134 }, { "epoch": 0.07187679257417998, "grad_norm": 2.225282409501216, "learning_rate": 9.954124157030264e-07, "loss": 0.2926, "step": 4135 }, { "epoch": 0.0718941751116828, "grad_norm": 1.5116077406312278, "learning_rate": 9.954086104693422e-07, "loss": 0.5514, "step": 4136 }, { "epoch": 0.07191155764918562, "grad_norm": 2.738829044250408, "learning_rate": 9.954048036654403e-07, "loss": 0.3814, "step": 4137 }, { "epoch": 0.07192894018668845, "grad_norm": 1.36493285913534, "learning_rate": 9.954009952913327e-07, "loss": 0.4985, "step": 4138 }, { "epoch": 0.07194632272419128, "grad_norm": 3.5600956234055965, "learning_rate": 9.953971853470314e-07, "loss": 0.4969, "step": 4139 }, { "epoch": 0.0719637052616941, "grad_norm": 1.737504088329836, "learning_rate": 9.953933738325487e-07, "loss": 0.2774, "step": 4140 }, { "epoch": 0.07198108779919693, "grad_norm": 1.618756454571834, "learning_rate": 9.953895607478965e-07, "loss": 0.5836, "step": 4141 }, { "epoch": 0.07199847033669975, "grad_norm": 1.5996905524210807, "learning_rate": 9.953857460930872e-07, "loss": 0.2629, "step": 4142 }, { "epoch": 0.07201585287420258, "grad_norm": 1.3256160034287392, "learning_rate": 9.953819298681323e-07, "loss": 0.3203, "step": 4143 }, { "epoch": 0.0720332354117054, "grad_norm": 2.144237243581105, "learning_rate": 9.953781120730444e-07, "loss": 0.5184, "step": 4144 }, { "epoch": 0.07205061794920822, "grad_norm": 1.9382083038507123, "learning_rate": 9.953742927078355e-07, "loss": 0.6247, "step": 4145 }, { "epoch": 0.07206800048671105, "grad_norm": 3.246239385827268, "learning_rate": 9.953704717725177e-07, "loss": 0.5292, "step": 4146 }, { "epoch": 0.07208538302421387, "grad_norm": 3.036053805161641, "learning_rate": 9.953666492671028e-07, "loss": 0.3126, "step": 4147 }, { "epoch": 0.0721027655617167, "grad_norm": 2.0815652810660685, "learning_rate": 9.953628251916035e-07, "loss": 0.2759, "step": 4148 }, { "epoch": 0.07212014809921953, "grad_norm": 1.4749619536783467, "learning_rate": 9.953589995460314e-07, "loss": 0.5175, "step": 4149 }, { "epoch": 0.07213753063672235, "grad_norm": 1.917321485707816, "learning_rate": 9.953551723303988e-07, "loss": 0.4182, "step": 4150 }, { "epoch": 0.07215491317422518, "grad_norm": 1.334669194092202, "learning_rate": 9.95351343544718e-07, "loss": 0.3321, "step": 4151 }, { "epoch": 0.072172295711728, "grad_norm": 2.2424875711765555, "learning_rate": 9.953475131890007e-07, "loss": 0.4246, "step": 4152 }, { "epoch": 0.07218967824923082, "grad_norm": 2.9463891199982015, "learning_rate": 9.953436812632597e-07, "loss": 0.6749, "step": 4153 }, { "epoch": 0.07220706078673365, "grad_norm": 1.9185582770217533, "learning_rate": 9.953398477675066e-07, "loss": 0.4763, "step": 4154 }, { "epoch": 0.07222444332423647, "grad_norm": 1.7470960500655557, "learning_rate": 9.953360127017537e-07, "loss": 0.4204, "step": 4155 }, { "epoch": 0.0722418258617393, "grad_norm": 1.9586347359854956, "learning_rate": 9.953321760660132e-07, "loss": 0.2699, "step": 4156 }, { "epoch": 0.07225920839924212, "grad_norm": 1.4390995526739327, "learning_rate": 9.953283378602973e-07, "loss": 0.5065, "step": 4157 }, { "epoch": 0.07227659093674495, "grad_norm": 1.9540500086033268, "learning_rate": 9.95324498084618e-07, "loss": 0.3492, "step": 4158 }, { "epoch": 0.07229397347424778, "grad_norm": 2.144945024236513, "learning_rate": 9.953206567389875e-07, "loss": 0.5015, "step": 4159 }, { "epoch": 0.0723113560117506, "grad_norm": 1.6442668613857516, "learning_rate": 9.953168138234182e-07, "loss": 0.3595, "step": 4160 }, { "epoch": 0.07232873854925342, "grad_norm": 2.1053924669793274, "learning_rate": 9.95312969337922e-07, "loss": 0.3209, "step": 4161 }, { "epoch": 0.07234612108675624, "grad_norm": 1.9820658583167212, "learning_rate": 9.953091232825113e-07, "loss": 0.339, "step": 4162 }, { "epoch": 0.07236350362425907, "grad_norm": 1.6414603651625703, "learning_rate": 9.953052756571982e-07, "loss": 0.4432, "step": 4163 }, { "epoch": 0.0723808861617619, "grad_norm": 1.9543386690685454, "learning_rate": 9.95301426461995e-07, "loss": 0.6771, "step": 4164 }, { "epoch": 0.07239826869926472, "grad_norm": 2.194841863648675, "learning_rate": 9.952975756969137e-07, "loss": 0.5715, "step": 4165 }, { "epoch": 0.07241565123676755, "grad_norm": 2.0622377110593146, "learning_rate": 9.952937233619666e-07, "loss": 0.4184, "step": 4166 }, { "epoch": 0.07243303377427036, "grad_norm": 3.7194966763633053, "learning_rate": 9.95289869457166e-07, "loss": 0.3703, "step": 4167 }, { "epoch": 0.0724504163117732, "grad_norm": 2.8680667811482987, "learning_rate": 9.95286013982524e-07, "loss": 0.6447, "step": 4168 }, { "epoch": 0.07246779884927601, "grad_norm": 1.8685899329467281, "learning_rate": 9.952821569380527e-07, "loss": 0.4794, "step": 4169 }, { "epoch": 0.07248518138677884, "grad_norm": 3.3339523276579137, "learning_rate": 9.952782983237647e-07, "loss": 0.5771, "step": 4170 }, { "epoch": 0.07250256392428167, "grad_norm": 1.8963582213245103, "learning_rate": 9.952744381396718e-07, "loss": 0.665, "step": 4171 }, { "epoch": 0.07251994646178449, "grad_norm": 2.465272036724049, "learning_rate": 9.952705763857865e-07, "loss": 0.7352, "step": 4172 }, { "epoch": 0.07253732899928732, "grad_norm": 1.5916079354007655, "learning_rate": 9.95266713062121e-07, "loss": 0.4567, "step": 4173 }, { "epoch": 0.07255471153679013, "grad_norm": 1.6563136491115362, "learning_rate": 9.952628481686876e-07, "loss": 0.3591, "step": 4174 }, { "epoch": 0.07257209407429296, "grad_norm": 2.3714389478418334, "learning_rate": 9.952589817054984e-07, "loss": 0.4529, "step": 4175 }, { "epoch": 0.0725894766117958, "grad_norm": 1.6441960940175044, "learning_rate": 9.952551136725657e-07, "loss": 0.261, "step": 4176 }, { "epoch": 0.07260685914929861, "grad_norm": 3.335126359780018, "learning_rate": 9.95251244069902e-07, "loss": 0.2846, "step": 4177 }, { "epoch": 0.07262424168680144, "grad_norm": 1.7202519918934243, "learning_rate": 9.952473728975192e-07, "loss": 0.329, "step": 4178 }, { "epoch": 0.07264162422430426, "grad_norm": 1.6724299332024601, "learning_rate": 9.952435001554298e-07, "loss": 0.2591, "step": 4179 }, { "epoch": 0.07265900676180709, "grad_norm": 2.4876891768237743, "learning_rate": 9.952396258436457e-07, "loss": 0.584, "step": 4180 }, { "epoch": 0.07267638929930992, "grad_norm": 1.5995453783340008, "learning_rate": 9.952357499621799e-07, "loss": 0.4795, "step": 4181 }, { "epoch": 0.07269377183681273, "grad_norm": 1.8206249258941773, "learning_rate": 9.95231872511044e-07, "loss": 0.4199, "step": 4182 }, { "epoch": 0.07271115437431556, "grad_norm": 1.962111840498575, "learning_rate": 9.952279934902506e-07, "loss": 0.4789, "step": 4183 }, { "epoch": 0.07272853691181838, "grad_norm": 1.7992360253905308, "learning_rate": 9.95224112899812e-07, "loss": 0.534, "step": 4184 }, { "epoch": 0.07274591944932121, "grad_norm": 2.34450649868078, "learning_rate": 9.952202307397405e-07, "loss": 0.3171, "step": 4185 }, { "epoch": 0.07276330198682404, "grad_norm": 1.8282775864363774, "learning_rate": 9.952163470100483e-07, "loss": 0.6563, "step": 4186 }, { "epoch": 0.07278068452432686, "grad_norm": 1.6686062885510171, "learning_rate": 9.952124617107477e-07, "loss": 0.2633, "step": 4187 }, { "epoch": 0.07279806706182969, "grad_norm": 2.304047051134026, "learning_rate": 9.95208574841851e-07, "loss": 0.6327, "step": 4188 }, { "epoch": 0.0728154495993325, "grad_norm": 2.0217908186233537, "learning_rate": 9.95204686403371e-07, "loss": 0.5373, "step": 4189 }, { "epoch": 0.07283283213683533, "grad_norm": 2.511908053203184, "learning_rate": 9.952007963953191e-07, "loss": 0.5657, "step": 4190 }, { "epoch": 0.07285021467433817, "grad_norm": 2.110141450405753, "learning_rate": 9.951969048177085e-07, "loss": 0.3009, "step": 4191 }, { "epoch": 0.07286759721184098, "grad_norm": 7.19545757604365, "learning_rate": 9.95193011670551e-07, "loss": 0.3512, "step": 4192 }, { "epoch": 0.07288497974934381, "grad_norm": 1.9136186968361388, "learning_rate": 9.951891169538594e-07, "loss": 0.4881, "step": 4193 }, { "epoch": 0.07290236228684663, "grad_norm": 2.6055524402098578, "learning_rate": 9.951852206676455e-07, "loss": 0.4408, "step": 4194 }, { "epoch": 0.07291974482434946, "grad_norm": 2.0492443169629944, "learning_rate": 9.95181322811922e-07, "loss": 0.2549, "step": 4195 }, { "epoch": 0.07293712736185229, "grad_norm": 1.7105202806725566, "learning_rate": 9.951774233867012e-07, "loss": 0.6138, "step": 4196 }, { "epoch": 0.0729545098993551, "grad_norm": 2.0321746672860823, "learning_rate": 9.951735223919953e-07, "loss": 0.5933, "step": 4197 }, { "epoch": 0.07297189243685794, "grad_norm": 1.5103049501330184, "learning_rate": 9.95169619827817e-07, "loss": 0.357, "step": 4198 }, { "epoch": 0.07298927497436075, "grad_norm": 3.011738812353182, "learning_rate": 9.951657156941785e-07, "loss": 0.5462, "step": 4199 }, { "epoch": 0.07300665751186358, "grad_norm": 2.509927607428492, "learning_rate": 9.95161809991092e-07, "loss": 0.3824, "step": 4200 }, { "epoch": 0.07302404004936641, "grad_norm": 2.3687517934544253, "learning_rate": 9.9515790271857e-07, "loss": 0.3612, "step": 4201 }, { "epoch": 0.07304142258686923, "grad_norm": 2.065073824942927, "learning_rate": 9.95153993876625e-07, "loss": 0.5845, "step": 4202 }, { "epoch": 0.07305880512437206, "grad_norm": 1.7120210956642954, "learning_rate": 9.951500834652692e-07, "loss": 0.3055, "step": 4203 }, { "epoch": 0.07307618766187488, "grad_norm": 1.586289478836586, "learning_rate": 9.95146171484515e-07, "loss": 0.5545, "step": 4204 }, { "epoch": 0.0730935701993777, "grad_norm": 2.750427817882723, "learning_rate": 9.95142257934375e-07, "loss": 0.3761, "step": 4205 }, { "epoch": 0.07311095273688054, "grad_norm": 2.867452577736897, "learning_rate": 9.951383428148615e-07, "loss": 0.6239, "step": 4206 }, { "epoch": 0.07312833527438335, "grad_norm": 2.8156949780040756, "learning_rate": 9.95134426125987e-07, "loss": 0.4313, "step": 4207 }, { "epoch": 0.07314571781188618, "grad_norm": 2.1346345011871852, "learning_rate": 9.951305078677637e-07, "loss": 0.4378, "step": 4208 }, { "epoch": 0.073163100349389, "grad_norm": 1.8294261809643337, "learning_rate": 9.951265880402043e-07, "loss": 0.5344, "step": 4209 }, { "epoch": 0.07318048288689183, "grad_norm": 1.953195385481696, "learning_rate": 9.951226666433207e-07, "loss": 0.3182, "step": 4210 }, { "epoch": 0.07319786542439466, "grad_norm": 2.9316598042124657, "learning_rate": 9.95118743677126e-07, "loss": 0.5874, "step": 4211 }, { "epoch": 0.07321524796189748, "grad_norm": 2.0863284731750356, "learning_rate": 9.951148191416323e-07, "loss": 0.2638, "step": 4212 }, { "epoch": 0.0732326304994003, "grad_norm": 2.530487959478695, "learning_rate": 9.951108930368519e-07, "loss": 0.4991, "step": 4213 }, { "epoch": 0.07325001303690312, "grad_norm": 1.3857110379971402, "learning_rate": 9.951069653627975e-07, "loss": 0.5474, "step": 4214 }, { "epoch": 0.07326739557440595, "grad_norm": 1.215809314288733, "learning_rate": 9.951030361194814e-07, "loss": 0.3851, "step": 4215 }, { "epoch": 0.07328477811190878, "grad_norm": 2.4213492910772847, "learning_rate": 9.950991053069163e-07, "loss": 0.6849, "step": 4216 }, { "epoch": 0.0733021606494116, "grad_norm": 4.096349362065572, "learning_rate": 9.950951729251142e-07, "loss": 0.5339, "step": 4217 }, { "epoch": 0.07331954318691443, "grad_norm": 2.433069152219352, "learning_rate": 9.950912389740879e-07, "loss": 0.4596, "step": 4218 }, { "epoch": 0.07333692572441725, "grad_norm": 2.0743323192637373, "learning_rate": 9.950873034538498e-07, "loss": 0.6541, "step": 4219 }, { "epoch": 0.07335430826192008, "grad_norm": 2.6471891044361286, "learning_rate": 9.950833663644124e-07, "loss": 0.3289, "step": 4220 }, { "epoch": 0.0733716907994229, "grad_norm": 1.6486443943164688, "learning_rate": 9.950794277057883e-07, "loss": 0.4549, "step": 4221 }, { "epoch": 0.07338907333692572, "grad_norm": 2.428372346006012, "learning_rate": 9.950754874779897e-07, "loss": 0.3778, "step": 4222 }, { "epoch": 0.07340645587442855, "grad_norm": 2.0035588183764257, "learning_rate": 9.950715456810293e-07, "loss": 0.284, "step": 4223 }, { "epoch": 0.07342383841193137, "grad_norm": 2.8822754350287227, "learning_rate": 9.950676023149195e-07, "loss": 0.3472, "step": 4224 }, { "epoch": 0.0734412209494342, "grad_norm": 3.1841684533115355, "learning_rate": 9.950636573796728e-07, "loss": 0.4898, "step": 4225 }, { "epoch": 0.07345860348693703, "grad_norm": 1.9644969926504081, "learning_rate": 9.950597108753016e-07, "loss": 0.4134, "step": 4226 }, { "epoch": 0.07347598602443985, "grad_norm": 2.1736908449067034, "learning_rate": 9.950557628018186e-07, "loss": 0.6391, "step": 4227 }, { "epoch": 0.07349336856194268, "grad_norm": 1.7329760928840943, "learning_rate": 9.95051813159236e-07, "loss": 0.5578, "step": 4228 }, { "epoch": 0.07351075109944549, "grad_norm": 3.4261649535882017, "learning_rate": 9.95047861947567e-07, "loss": 0.8268, "step": 4229 }, { "epoch": 0.07352813363694832, "grad_norm": 2.291752124532766, "learning_rate": 9.950439091668235e-07, "loss": 0.5427, "step": 4230 }, { "epoch": 0.07354551617445115, "grad_norm": 2.245916576504766, "learning_rate": 9.950399548170182e-07, "loss": 0.3014, "step": 4231 }, { "epoch": 0.07356289871195397, "grad_norm": 3.0468135060999226, "learning_rate": 9.950359988981636e-07, "loss": 0.4159, "step": 4232 }, { "epoch": 0.0735802812494568, "grad_norm": 1.4388480134739048, "learning_rate": 9.950320414102723e-07, "loss": 0.2729, "step": 4233 }, { "epoch": 0.07359766378695962, "grad_norm": 1.817209173090111, "learning_rate": 9.95028082353357e-07, "loss": 0.4739, "step": 4234 }, { "epoch": 0.07361504632446245, "grad_norm": 2.3842939849325804, "learning_rate": 9.9502412172743e-07, "loss": 0.5859, "step": 4235 }, { "epoch": 0.07363242886196526, "grad_norm": 2.020612357414968, "learning_rate": 9.950201595325037e-07, "loss": 0.2905, "step": 4236 }, { "epoch": 0.0736498113994681, "grad_norm": 3.283770011554917, "learning_rate": 9.950161957685911e-07, "loss": 0.5615, "step": 4237 }, { "epoch": 0.07366719393697092, "grad_norm": 2.2224784540620934, "learning_rate": 9.950122304357045e-07, "loss": 0.4142, "step": 4238 }, { "epoch": 0.07368457647447374, "grad_norm": 1.4671533248992104, "learning_rate": 9.950082635338565e-07, "loss": 0.3811, "step": 4239 }, { "epoch": 0.07370195901197657, "grad_norm": 2.2854446554409282, "learning_rate": 9.950042950630597e-07, "loss": 0.3255, "step": 4240 }, { "epoch": 0.07371934154947939, "grad_norm": 1.5054232302470185, "learning_rate": 9.950003250233267e-07, "loss": 0.4199, "step": 4241 }, { "epoch": 0.07373672408698222, "grad_norm": 4.58646457963293, "learning_rate": 9.9499635341467e-07, "loss": 0.5524, "step": 4242 }, { "epoch": 0.07375410662448505, "grad_norm": 1.566608324788679, "learning_rate": 9.949923802371024e-07, "loss": 0.3992, "step": 4243 }, { "epoch": 0.07377148916198786, "grad_norm": 2.3345351700173342, "learning_rate": 9.949884054906364e-07, "loss": 0.5238, "step": 4244 }, { "epoch": 0.0737888716994907, "grad_norm": 2.066011722790456, "learning_rate": 9.949844291752843e-07, "loss": 0.2871, "step": 4245 }, { "epoch": 0.07380625423699351, "grad_norm": 2.0009474819816044, "learning_rate": 9.949804512910589e-07, "loss": 0.7009, "step": 4246 }, { "epoch": 0.07382363677449634, "grad_norm": 2.4499585103757835, "learning_rate": 9.94976471837973e-07, "loss": 0.7815, "step": 4247 }, { "epoch": 0.07384101931199917, "grad_norm": 1.817384667525126, "learning_rate": 9.94972490816039e-07, "loss": 0.567, "step": 4248 }, { "epoch": 0.07385840184950199, "grad_norm": 1.219355156599646, "learning_rate": 9.949685082252696e-07, "loss": 0.2594, "step": 4249 }, { "epoch": 0.07387578438700482, "grad_norm": 5.48229335675371, "learning_rate": 9.949645240656773e-07, "loss": 0.3818, "step": 4250 }, { "epoch": 0.07389316692450763, "grad_norm": 2.3791372843004766, "learning_rate": 9.94960538337275e-07, "loss": 0.4971, "step": 4251 }, { "epoch": 0.07391054946201046, "grad_norm": 2.155863447538934, "learning_rate": 9.94956551040075e-07, "loss": 0.2737, "step": 4252 }, { "epoch": 0.0739279319995133, "grad_norm": 1.6010867271623455, "learning_rate": 9.949525621740901e-07, "loss": 0.4451, "step": 4253 }, { "epoch": 0.07394531453701611, "grad_norm": 1.682451992176818, "learning_rate": 9.94948571739333e-07, "loss": 0.2608, "step": 4254 }, { "epoch": 0.07396269707451894, "grad_norm": 1.6975178144325136, "learning_rate": 9.949445797358163e-07, "loss": 0.4792, "step": 4255 }, { "epoch": 0.07398007961202176, "grad_norm": 1.8851003494392304, "learning_rate": 9.949405861635528e-07, "loss": 0.2809, "step": 4256 }, { "epoch": 0.07399746214952459, "grad_norm": 2.097210099864434, "learning_rate": 9.949365910225546e-07, "loss": 0.4434, "step": 4257 }, { "epoch": 0.07401484468702742, "grad_norm": 2.568169178691097, "learning_rate": 9.94932594312835e-07, "loss": 0.4082, "step": 4258 }, { "epoch": 0.07403222722453023, "grad_norm": 3.3861482940991747, "learning_rate": 9.949285960344064e-07, "loss": 0.6811, "step": 4259 }, { "epoch": 0.07404960976203306, "grad_norm": 1.3154938641330631, "learning_rate": 9.949245961872816e-07, "loss": 0.3466, "step": 4260 }, { "epoch": 0.07406699229953588, "grad_norm": 1.7449867116056648, "learning_rate": 9.949205947714731e-07, "loss": 0.6995, "step": 4261 }, { "epoch": 0.07408437483703871, "grad_norm": 3.0439078539516333, "learning_rate": 9.949165917869937e-07, "loss": 0.651, "step": 4262 }, { "epoch": 0.07410175737454154, "grad_norm": 2.3617227950494786, "learning_rate": 9.949125872338559e-07, "loss": 0.7642, "step": 4263 }, { "epoch": 0.07411913991204436, "grad_norm": 1.859074293845251, "learning_rate": 9.949085811120727e-07, "loss": 0.2234, "step": 4264 }, { "epoch": 0.07413652244954719, "grad_norm": 2.066558954940806, "learning_rate": 9.949045734216568e-07, "loss": 0.5325, "step": 4265 }, { "epoch": 0.07415390498705, "grad_norm": 8.026676356773711, "learning_rate": 9.949005641626205e-07, "loss": 0.6501, "step": 4266 }, { "epoch": 0.07417128752455283, "grad_norm": 2.315306287974522, "learning_rate": 9.948965533349766e-07, "loss": 0.4531, "step": 4267 }, { "epoch": 0.07418867006205566, "grad_norm": 5.435533812039165, "learning_rate": 9.948925409387383e-07, "loss": 0.5926, "step": 4268 }, { "epoch": 0.07420605259955848, "grad_norm": 1.855544460908158, "learning_rate": 9.948885269739177e-07, "loss": 0.4555, "step": 4269 }, { "epoch": 0.07422343513706131, "grad_norm": 2.92121025911346, "learning_rate": 9.948845114405279e-07, "loss": 0.7267, "step": 4270 }, { "epoch": 0.07424081767456413, "grad_norm": 2.4551547681278705, "learning_rate": 9.948804943385817e-07, "loss": 0.3971, "step": 4271 }, { "epoch": 0.07425820021206696, "grad_norm": 2.360251145157338, "learning_rate": 9.948764756680916e-07, "loss": 0.4267, "step": 4272 }, { "epoch": 0.07427558274956979, "grad_norm": 2.2472369942676877, "learning_rate": 9.948724554290702e-07, "loss": 0.3218, "step": 4273 }, { "epoch": 0.0742929652870726, "grad_norm": 1.8906870451097932, "learning_rate": 9.948684336215307e-07, "loss": 0.575, "step": 4274 }, { "epoch": 0.07431034782457543, "grad_norm": 2.085110927104855, "learning_rate": 9.948644102454855e-07, "loss": 0.443, "step": 4275 }, { "epoch": 0.07432773036207825, "grad_norm": 1.6653897536154207, "learning_rate": 9.948603853009474e-07, "loss": 0.4804, "step": 4276 }, { "epoch": 0.07434511289958108, "grad_norm": 2.1413833778615294, "learning_rate": 9.94856358787929e-07, "loss": 0.5286, "step": 4277 }, { "epoch": 0.07436249543708391, "grad_norm": 1.4625182607818017, "learning_rate": 9.948523307064436e-07, "loss": 0.2867, "step": 4278 }, { "epoch": 0.07437987797458673, "grad_norm": 2.270187399920698, "learning_rate": 9.948483010565034e-07, "loss": 0.5936, "step": 4279 }, { "epoch": 0.07439726051208956, "grad_norm": 2.3245547285305403, "learning_rate": 9.948442698381217e-07, "loss": 0.3921, "step": 4280 }, { "epoch": 0.07441464304959237, "grad_norm": 2.8608122735977615, "learning_rate": 9.948402370513107e-07, "loss": 0.5055, "step": 4281 }, { "epoch": 0.0744320255870952, "grad_norm": 1.405269543851417, "learning_rate": 9.948362026960834e-07, "loss": 0.7194, "step": 4282 }, { "epoch": 0.07444940812459803, "grad_norm": 2.2427215045172835, "learning_rate": 9.94832166772453e-07, "loss": 0.5003, "step": 4283 }, { "epoch": 0.07446679066210085, "grad_norm": 4.480844082021412, "learning_rate": 9.948281292804315e-07, "loss": 0.3452, "step": 4284 }, { "epoch": 0.07448417319960368, "grad_norm": 2.086550927251453, "learning_rate": 9.948240902200324e-07, "loss": 0.3277, "step": 4285 }, { "epoch": 0.0745015557371065, "grad_norm": 2.5772491213322053, "learning_rate": 9.948200495912681e-07, "loss": 0.2198, "step": 4286 }, { "epoch": 0.07451893827460933, "grad_norm": 2.099546696820726, "learning_rate": 9.948160073941517e-07, "loss": 0.678, "step": 4287 }, { "epoch": 0.07453632081211216, "grad_norm": 1.803513610782057, "learning_rate": 9.948119636286959e-07, "loss": 0.453, "step": 4288 }, { "epoch": 0.07455370334961497, "grad_norm": 1.8620669484094725, "learning_rate": 9.948079182949133e-07, "loss": 0.5999, "step": 4289 }, { "epoch": 0.0745710858871178, "grad_norm": 1.7102278327142584, "learning_rate": 9.948038713928168e-07, "loss": 0.524, "step": 4290 }, { "epoch": 0.07458846842462062, "grad_norm": 1.117465513634633, "learning_rate": 9.947998229224195e-07, "loss": 0.6491, "step": 4291 }, { "epoch": 0.07460585096212345, "grad_norm": 2.261901970533755, "learning_rate": 9.947957728837341e-07, "loss": 0.8401, "step": 4292 }, { "epoch": 0.07462323349962628, "grad_norm": 5.075726174009991, "learning_rate": 9.947917212767734e-07, "loss": 0.4753, "step": 4293 }, { "epoch": 0.0746406160371291, "grad_norm": 2.243170210000869, "learning_rate": 9.9478766810155e-07, "loss": 0.59, "step": 4294 }, { "epoch": 0.07465799857463193, "grad_norm": 2.0596759573068586, "learning_rate": 9.947836133580772e-07, "loss": 0.7211, "step": 4295 }, { "epoch": 0.07467538111213474, "grad_norm": 2.1085873402902853, "learning_rate": 9.947795570463677e-07, "loss": 0.6336, "step": 4296 }, { "epoch": 0.07469276364963758, "grad_norm": 3.2974016320155295, "learning_rate": 9.94775499166434e-07, "loss": 0.5632, "step": 4297 }, { "epoch": 0.0747101461871404, "grad_norm": 2.0669850887546275, "learning_rate": 9.947714397182895e-07, "loss": 0.4574, "step": 4298 }, { "epoch": 0.07472752872464322, "grad_norm": 1.427094571370279, "learning_rate": 9.947673787019466e-07, "loss": 0.4343, "step": 4299 }, { "epoch": 0.07474491126214605, "grad_norm": 1.672150792515241, "learning_rate": 9.947633161174186e-07, "loss": 0.5552, "step": 4300 }, { "epoch": 0.07476229379964887, "grad_norm": 1.4584070612668243, "learning_rate": 9.94759251964718e-07, "loss": 0.2039, "step": 4301 }, { "epoch": 0.0747796763371517, "grad_norm": 3.249753348202184, "learning_rate": 9.947551862438579e-07, "loss": 0.4848, "step": 4302 }, { "epoch": 0.07479705887465453, "grad_norm": 1.579058120833256, "learning_rate": 9.947511189548513e-07, "loss": 0.4815, "step": 4303 }, { "epoch": 0.07481444141215735, "grad_norm": 1.587550033433162, "learning_rate": 9.947470500977107e-07, "loss": 0.6863, "step": 4304 }, { "epoch": 0.07483182394966018, "grad_norm": 1.9370163348483707, "learning_rate": 9.947429796724492e-07, "loss": 0.5159, "step": 4305 }, { "epoch": 0.07484920648716299, "grad_norm": 1.6022118904492553, "learning_rate": 9.947389076790798e-07, "loss": 0.3192, "step": 4306 }, { "epoch": 0.07486658902466582, "grad_norm": 2.605766426295806, "learning_rate": 9.947348341176153e-07, "loss": 0.3736, "step": 4307 }, { "epoch": 0.07488397156216864, "grad_norm": 2.728936854713005, "learning_rate": 9.947307589880688e-07, "loss": 0.379, "step": 4308 }, { "epoch": 0.07490135409967147, "grad_norm": 3.109780179648893, "learning_rate": 9.947266822904529e-07, "loss": 0.601, "step": 4309 }, { "epoch": 0.0749187366371743, "grad_norm": 1.8021893908915605, "learning_rate": 9.947226040247806e-07, "loss": 0.4246, "step": 4310 }, { "epoch": 0.07493611917467712, "grad_norm": 2.3289406448497116, "learning_rate": 9.947185241910653e-07, "loss": 0.4313, "step": 4311 }, { "epoch": 0.07495350171217995, "grad_norm": 2.2649063169106185, "learning_rate": 9.94714442789319e-07, "loss": 0.4284, "step": 4312 }, { "epoch": 0.07497088424968276, "grad_norm": 1.2572456679401436, "learning_rate": 9.947103598195554e-07, "loss": 0.4219, "step": 4313 }, { "epoch": 0.07498826678718559, "grad_norm": 3.003628458048402, "learning_rate": 9.947062752817873e-07, "loss": 0.2732, "step": 4314 }, { "epoch": 0.07500564932468842, "grad_norm": 1.4620931067782745, "learning_rate": 9.947021891760274e-07, "loss": 0.3143, "step": 4315 }, { "epoch": 0.07502303186219124, "grad_norm": 2.362897339544736, "learning_rate": 9.94698101502289e-07, "loss": 0.5791, "step": 4316 }, { "epoch": 0.07504041439969407, "grad_norm": 1.9025888610991808, "learning_rate": 9.946940122605847e-07, "loss": 0.5973, "step": 4317 }, { "epoch": 0.07505779693719689, "grad_norm": 1.5753164912337687, "learning_rate": 9.946899214509277e-07, "loss": 0.2942, "step": 4318 }, { "epoch": 0.07507517947469972, "grad_norm": 1.6938772797879227, "learning_rate": 9.94685829073331e-07, "loss": 0.238, "step": 4319 }, { "epoch": 0.07509256201220255, "grad_norm": 1.6511772426411335, "learning_rate": 9.946817351278072e-07, "loss": 0.3876, "step": 4320 }, { "epoch": 0.07510994454970536, "grad_norm": 1.2324164572392742, "learning_rate": 9.946776396143696e-07, "loss": 0.6027, "step": 4321 }, { "epoch": 0.07512732708720819, "grad_norm": 1.7337840481172357, "learning_rate": 9.946735425330312e-07, "loss": 0.4531, "step": 4322 }, { "epoch": 0.07514470962471101, "grad_norm": 1.9785267504134296, "learning_rate": 9.946694438838048e-07, "loss": 0.3273, "step": 4323 }, { "epoch": 0.07516209216221384, "grad_norm": 1.2241974580612198, "learning_rate": 9.946653436667036e-07, "loss": 0.4051, "step": 4324 }, { "epoch": 0.07517947469971667, "grad_norm": 2.2386024812479888, "learning_rate": 9.946612418817406e-07, "loss": 0.2496, "step": 4325 }, { "epoch": 0.07519685723721949, "grad_norm": 2.1237523759874093, "learning_rate": 9.946571385289286e-07, "loss": 0.4287, "step": 4326 }, { "epoch": 0.07521423977472232, "grad_norm": 2.488764157737098, "learning_rate": 9.946530336082806e-07, "loss": 0.3679, "step": 4327 }, { "epoch": 0.07523162231222513, "grad_norm": 2.258097943310663, "learning_rate": 9.946489271198097e-07, "loss": 0.5052, "step": 4328 }, { "epoch": 0.07524900484972796, "grad_norm": 1.966701362692975, "learning_rate": 9.94644819063529e-07, "loss": 0.4492, "step": 4329 }, { "epoch": 0.0752663873872308, "grad_norm": 2.1138264820498844, "learning_rate": 9.946407094394516e-07, "loss": 0.4122, "step": 4330 }, { "epoch": 0.07528376992473361, "grad_norm": 2.0109485844418074, "learning_rate": 9.946365982475902e-07, "loss": 0.4832, "step": 4331 }, { "epoch": 0.07530115246223644, "grad_norm": 1.4728235231028324, "learning_rate": 9.94632485487958e-07, "loss": 0.4426, "step": 4332 }, { "epoch": 0.07531853499973926, "grad_norm": 1.427966970148459, "learning_rate": 9.946283711605679e-07, "loss": 0.3855, "step": 4333 }, { "epoch": 0.07533591753724209, "grad_norm": 2.10157346248813, "learning_rate": 9.946242552654332e-07, "loss": 0.5421, "step": 4334 }, { "epoch": 0.07535330007474492, "grad_norm": 1.5025193578045974, "learning_rate": 9.94620137802567e-07, "loss": 0.2873, "step": 4335 }, { "epoch": 0.07537068261224773, "grad_norm": 6.435549568093364, "learning_rate": 9.946160187719819e-07, "loss": 0.4172, "step": 4336 }, { "epoch": 0.07538806514975056, "grad_norm": 3.717430226709866, "learning_rate": 9.946118981736915e-07, "loss": 0.5611, "step": 4337 }, { "epoch": 0.07540544768725338, "grad_norm": 2.370764755773008, "learning_rate": 9.946077760077082e-07, "loss": 0.408, "step": 4338 }, { "epoch": 0.07542283022475621, "grad_norm": 2.8344105484105486, "learning_rate": 9.94603652274046e-07, "loss": 0.4746, "step": 4339 }, { "epoch": 0.07544021276225904, "grad_norm": 2.1686442399077537, "learning_rate": 9.94599526972717e-07, "loss": 0.6171, "step": 4340 }, { "epoch": 0.07545759529976186, "grad_norm": 2.929053192228765, "learning_rate": 9.945954001037349e-07, "loss": 0.5167, "step": 4341 }, { "epoch": 0.07547497783726469, "grad_norm": 3.142104544158015, "learning_rate": 9.945912716671123e-07, "loss": 0.539, "step": 4342 }, { "epoch": 0.0754923603747675, "grad_norm": 1.5777614461197984, "learning_rate": 9.945871416628628e-07, "loss": 0.3992, "step": 4343 }, { "epoch": 0.07550974291227033, "grad_norm": 1.4804848819524905, "learning_rate": 9.945830100909993e-07, "loss": 0.5783, "step": 4344 }, { "epoch": 0.07552712544977316, "grad_norm": 1.8683361640300273, "learning_rate": 9.945788769515346e-07, "loss": 0.3301, "step": 4345 }, { "epoch": 0.07554450798727598, "grad_norm": 2.337001263280053, "learning_rate": 9.945747422444823e-07, "loss": 0.4873, "step": 4346 }, { "epoch": 0.07556189052477881, "grad_norm": 1.638890577820283, "learning_rate": 9.945706059698552e-07, "loss": 0.5805, "step": 4347 }, { "epoch": 0.07557927306228163, "grad_norm": 2.8176153773586465, "learning_rate": 9.945664681276664e-07, "loss": 0.4194, "step": 4348 }, { "epoch": 0.07559665559978446, "grad_norm": 2.9067311413893258, "learning_rate": 9.945623287179291e-07, "loss": 0.4805, "step": 4349 }, { "epoch": 0.07561403813728729, "grad_norm": 1.626914837782957, "learning_rate": 9.945581877406566e-07, "loss": 0.4098, "step": 4350 }, { "epoch": 0.0756314206747901, "grad_norm": 3.826101003004946, "learning_rate": 9.945540451958616e-07, "loss": 0.5906, "step": 4351 }, { "epoch": 0.07564880321229293, "grad_norm": 1.4998509865262581, "learning_rate": 9.945499010835575e-07, "loss": 0.3628, "step": 4352 }, { "epoch": 0.07566618574979575, "grad_norm": 1.9804720403583027, "learning_rate": 9.945457554037573e-07, "loss": 0.5435, "step": 4353 }, { "epoch": 0.07568356828729858, "grad_norm": 2.033131167155189, "learning_rate": 9.945416081564744e-07, "loss": 0.6597, "step": 4354 }, { "epoch": 0.07570095082480141, "grad_norm": 5.005545007460216, "learning_rate": 9.945374593417217e-07, "loss": 0.4578, "step": 4355 }, { "epoch": 0.07571833336230423, "grad_norm": 1.6874973494969712, "learning_rate": 9.945333089595124e-07, "loss": 0.4636, "step": 4356 }, { "epoch": 0.07573571589980706, "grad_norm": 1.4669634613059024, "learning_rate": 9.945291570098596e-07, "loss": 0.5151, "step": 4357 }, { "epoch": 0.07575309843730987, "grad_norm": 1.6003303846441317, "learning_rate": 9.945250034927768e-07, "loss": 0.3057, "step": 4358 }, { "epoch": 0.0757704809748127, "grad_norm": 1.5894244492701792, "learning_rate": 9.945208484082769e-07, "loss": 0.44, "step": 4359 }, { "epoch": 0.07578786351231553, "grad_norm": 1.814661954774238, "learning_rate": 9.945166917563727e-07, "loss": 0.4447, "step": 4360 }, { "epoch": 0.07580524604981835, "grad_norm": 1.6071588873790386, "learning_rate": 9.945125335370782e-07, "loss": 0.5645, "step": 4361 }, { "epoch": 0.07582262858732118, "grad_norm": 1.414111104615472, "learning_rate": 9.94508373750406e-07, "loss": 0.3054, "step": 4362 }, { "epoch": 0.075840011124824, "grad_norm": 4.377792541786679, "learning_rate": 9.94504212396369e-07, "loss": 0.3933, "step": 4363 }, { "epoch": 0.07585739366232683, "grad_norm": 1.7233060462751084, "learning_rate": 9.945000494749814e-07, "loss": 0.529, "step": 4364 }, { "epoch": 0.07587477619982966, "grad_norm": 1.094709022978193, "learning_rate": 9.944958849862554e-07, "loss": 0.4015, "step": 4365 }, { "epoch": 0.07589215873733247, "grad_norm": 6.181197937041866, "learning_rate": 9.944917189302046e-07, "loss": 0.3401, "step": 4366 }, { "epoch": 0.0759095412748353, "grad_norm": 1.4443658435684934, "learning_rate": 9.944875513068426e-07, "loss": 0.4605, "step": 4367 }, { "epoch": 0.07592692381233812, "grad_norm": 2.5160012571277375, "learning_rate": 9.944833821161818e-07, "loss": 0.509, "step": 4368 }, { "epoch": 0.07594430634984095, "grad_norm": 1.6312510226009274, "learning_rate": 9.94479211358236e-07, "loss": 0.5758, "step": 4369 }, { "epoch": 0.07596168888734378, "grad_norm": 1.7807880742233702, "learning_rate": 9.944750390330181e-07, "loss": 0.6952, "step": 4370 }, { "epoch": 0.0759790714248466, "grad_norm": 1.9127111693302796, "learning_rate": 9.944708651405417e-07, "loss": 0.5096, "step": 4371 }, { "epoch": 0.07599645396234943, "grad_norm": 2.1604419859806496, "learning_rate": 9.944666896808196e-07, "loss": 0.4707, "step": 4372 }, { "epoch": 0.07601383649985224, "grad_norm": 1.536222906149055, "learning_rate": 9.944625126538653e-07, "loss": 0.4593, "step": 4373 }, { "epoch": 0.07603121903735507, "grad_norm": 3.4080363006539747, "learning_rate": 9.94458334059692e-07, "loss": 0.9296, "step": 4374 }, { "epoch": 0.07604860157485789, "grad_norm": 2.1727062994753408, "learning_rate": 9.94454153898313e-07, "loss": 0.4446, "step": 4375 }, { "epoch": 0.07606598411236072, "grad_norm": 1.827914659072915, "learning_rate": 9.944499721697413e-07, "loss": 0.5582, "step": 4376 }, { "epoch": 0.07608336664986355, "grad_norm": 1.443000635860201, "learning_rate": 9.944457888739902e-07, "loss": 0.2753, "step": 4377 }, { "epoch": 0.07610074918736637, "grad_norm": 1.8862156481175267, "learning_rate": 9.944416040110734e-07, "loss": 0.3662, "step": 4378 }, { "epoch": 0.0761181317248692, "grad_norm": 1.9427357959714258, "learning_rate": 9.944374175810037e-07, "loss": 0.491, "step": 4379 }, { "epoch": 0.07613551426237201, "grad_norm": 1.8383161347477044, "learning_rate": 9.944332295837945e-07, "loss": 0.3104, "step": 4380 }, { "epoch": 0.07615289679987484, "grad_norm": 1.287308187872495, "learning_rate": 9.944290400194592e-07, "loss": 0.3762, "step": 4381 }, { "epoch": 0.07617027933737767, "grad_norm": 3.3094558893137584, "learning_rate": 9.944248488880108e-07, "loss": 0.6598, "step": 4382 }, { "epoch": 0.07618766187488049, "grad_norm": 1.5979783502448441, "learning_rate": 9.944206561894628e-07, "loss": 0.5025, "step": 4383 }, { "epoch": 0.07620504441238332, "grad_norm": 1.5883700949128075, "learning_rate": 9.944164619238284e-07, "loss": 0.5996, "step": 4384 }, { "epoch": 0.07622242694988614, "grad_norm": 1.3098931242186687, "learning_rate": 9.944122660911212e-07, "loss": 0.4329, "step": 4385 }, { "epoch": 0.07623980948738897, "grad_norm": 4.094325487865854, "learning_rate": 9.944080686913538e-07, "loss": 0.4205, "step": 4386 }, { "epoch": 0.0762571920248918, "grad_norm": 2.213724450379839, "learning_rate": 9.944038697245402e-07, "loss": 0.6508, "step": 4387 }, { "epoch": 0.07627457456239461, "grad_norm": 1.8751477481306336, "learning_rate": 9.943996691906935e-07, "loss": 0.4059, "step": 4388 }, { "epoch": 0.07629195709989745, "grad_norm": 2.232702615184969, "learning_rate": 9.943954670898267e-07, "loss": 0.5393, "step": 4389 }, { "epoch": 0.07630933963740026, "grad_norm": 2.3063168396727973, "learning_rate": 9.943912634219536e-07, "loss": 0.3079, "step": 4390 }, { "epoch": 0.07632672217490309, "grad_norm": 3.0239943902686304, "learning_rate": 9.943870581870872e-07, "loss": 0.3316, "step": 4391 }, { "epoch": 0.07634410471240592, "grad_norm": 2.2284492445264243, "learning_rate": 9.94382851385241e-07, "loss": 0.6056, "step": 4392 }, { "epoch": 0.07636148724990874, "grad_norm": 2.144737033359681, "learning_rate": 9.943786430164283e-07, "loss": 0.5041, "step": 4393 }, { "epoch": 0.07637886978741157, "grad_norm": 3.1970118328728048, "learning_rate": 9.943744330806622e-07, "loss": 0.3841, "step": 4394 }, { "epoch": 0.07639625232491438, "grad_norm": 1.5023615350707182, "learning_rate": 9.943702215779565e-07, "loss": 0.3258, "step": 4395 }, { "epoch": 0.07641363486241722, "grad_norm": 1.468424653133495, "learning_rate": 9.94366008508324e-07, "loss": 0.479, "step": 4396 }, { "epoch": 0.07643101739992005, "grad_norm": 1.8187133669391198, "learning_rate": 9.943617938717785e-07, "loss": 0.5065, "step": 4397 }, { "epoch": 0.07644839993742286, "grad_norm": 1.7702129016976644, "learning_rate": 9.943575776683333e-07, "loss": 0.2628, "step": 4398 }, { "epoch": 0.07646578247492569, "grad_norm": 2.1481014285853974, "learning_rate": 9.943533598980015e-07, "loss": 0.4692, "step": 4399 }, { "epoch": 0.07648316501242851, "grad_norm": 1.692190244601129, "learning_rate": 9.94349140560797e-07, "loss": 0.5061, "step": 4400 }, { "epoch": 0.07650054754993134, "grad_norm": 2.4876926711616694, "learning_rate": 9.943449196567323e-07, "loss": 0.4427, "step": 4401 }, { "epoch": 0.07651793008743417, "grad_norm": 1.510782675073191, "learning_rate": 9.943406971858215e-07, "loss": 0.4241, "step": 4402 }, { "epoch": 0.07653531262493699, "grad_norm": 1.180543555162727, "learning_rate": 9.943364731480779e-07, "loss": 0.3554, "step": 4403 }, { "epoch": 0.07655269516243982, "grad_norm": 1.532834442680803, "learning_rate": 9.943322475435147e-07, "loss": 0.4161, "step": 4404 }, { "epoch": 0.07657007769994263, "grad_norm": 4.93545194006977, "learning_rate": 9.943280203721451e-07, "loss": 0.6426, "step": 4405 }, { "epoch": 0.07658746023744546, "grad_norm": 2.09633521329379, "learning_rate": 9.943237916339832e-07, "loss": 0.4447, "step": 4406 }, { "epoch": 0.07660484277494829, "grad_norm": 1.3189126909084785, "learning_rate": 9.943195613290417e-07, "loss": 0.5746, "step": 4407 }, { "epoch": 0.07662222531245111, "grad_norm": 2.473928969632525, "learning_rate": 9.94315329457334e-07, "loss": 0.4218, "step": 4408 }, { "epoch": 0.07663960784995394, "grad_norm": 2.2693725907273623, "learning_rate": 9.943110960188742e-07, "loss": 0.6429, "step": 4409 }, { "epoch": 0.07665699038745676, "grad_norm": 1.844145026351589, "learning_rate": 9.94306861013675e-07, "loss": 0.4392, "step": 4410 }, { "epoch": 0.07667437292495959, "grad_norm": 1.8578004629295566, "learning_rate": 9.943026244417505e-07, "loss": 0.401, "step": 4411 }, { "epoch": 0.07669175546246242, "grad_norm": 3.0769303049735295, "learning_rate": 9.942983863031133e-07, "loss": 0.6362, "step": 4412 }, { "epoch": 0.07670913799996523, "grad_norm": 1.7118981727724119, "learning_rate": 9.942941465977775e-07, "loss": 0.5247, "step": 4413 }, { "epoch": 0.07672652053746806, "grad_norm": 1.7831389783554346, "learning_rate": 9.942899053257562e-07, "loss": 0.4853, "step": 4414 }, { "epoch": 0.07674390307497088, "grad_norm": 1.6241333712219883, "learning_rate": 9.942856624870632e-07, "loss": 0.2566, "step": 4415 }, { "epoch": 0.07676128561247371, "grad_norm": 1.8225554346922315, "learning_rate": 9.942814180817115e-07, "loss": 0.2269, "step": 4416 }, { "epoch": 0.07677866814997654, "grad_norm": 1.6458306399138365, "learning_rate": 9.942771721097148e-07, "loss": 0.2465, "step": 4417 }, { "epoch": 0.07679605068747936, "grad_norm": 2.181440597665777, "learning_rate": 9.942729245710865e-07, "loss": 0.1716, "step": 4418 }, { "epoch": 0.07681343322498219, "grad_norm": 1.5781088326706318, "learning_rate": 9.942686754658403e-07, "loss": 0.296, "step": 4419 }, { "epoch": 0.076830815762485, "grad_norm": 2.6272947664055613, "learning_rate": 9.94264424793989e-07, "loss": 0.5212, "step": 4420 }, { "epoch": 0.07684819829998783, "grad_norm": 1.2931443660462039, "learning_rate": 9.942601725555468e-07, "loss": 0.3863, "step": 4421 }, { "epoch": 0.07686558083749066, "grad_norm": 1.4879246259566123, "learning_rate": 9.94255918750527e-07, "loss": 0.3256, "step": 4422 }, { "epoch": 0.07688296337499348, "grad_norm": 1.162183264815652, "learning_rate": 9.94251663378943e-07, "loss": 0.5805, "step": 4423 }, { "epoch": 0.07690034591249631, "grad_norm": 2.4676047349893024, "learning_rate": 9.94247406440808e-07, "loss": 0.6427, "step": 4424 }, { "epoch": 0.07691772844999913, "grad_norm": 2.7347433184104886, "learning_rate": 9.942431479361358e-07, "loss": 0.4776, "step": 4425 }, { "epoch": 0.07693511098750196, "grad_norm": 1.5290823617863003, "learning_rate": 9.9423888786494e-07, "loss": 1.0314, "step": 4426 }, { "epoch": 0.07695249352500479, "grad_norm": 1.36787903464614, "learning_rate": 9.94234626227234e-07, "loss": 0.2956, "step": 4427 }, { "epoch": 0.0769698760625076, "grad_norm": 1.7386124851991995, "learning_rate": 9.942303630230311e-07, "loss": 0.4051, "step": 4428 }, { "epoch": 0.07698725860001043, "grad_norm": 2.0994706376280607, "learning_rate": 9.942260982523448e-07, "loss": 0.5099, "step": 4429 }, { "epoch": 0.07700464113751325, "grad_norm": 1.2099431032787926, "learning_rate": 9.942218319151893e-07, "loss": 0.4741, "step": 4430 }, { "epoch": 0.07702202367501608, "grad_norm": 4.439957233258931, "learning_rate": 9.942175640115774e-07, "loss": 0.6609, "step": 4431 }, { "epoch": 0.07703940621251891, "grad_norm": 6.7470006499719, "learning_rate": 9.942132945415227e-07, "loss": 0.371, "step": 4432 }, { "epoch": 0.07705678875002173, "grad_norm": 2.0869524024636896, "learning_rate": 9.942090235050391e-07, "loss": 0.2707, "step": 4433 }, { "epoch": 0.07707417128752456, "grad_norm": 2.399268932062939, "learning_rate": 9.942047509021397e-07, "loss": 0.5861, "step": 4434 }, { "epoch": 0.07709155382502737, "grad_norm": 1.8011901061858693, "learning_rate": 9.942004767328385e-07, "loss": 0.8845, "step": 4435 }, { "epoch": 0.0771089363625302, "grad_norm": 1.1031407277683116, "learning_rate": 9.941962009971485e-07, "loss": 0.2873, "step": 4436 }, { "epoch": 0.07712631890003303, "grad_norm": 1.6800961999577446, "learning_rate": 9.941919236950837e-07, "loss": 0.7323, "step": 4437 }, { "epoch": 0.07714370143753585, "grad_norm": 1.8061256350957198, "learning_rate": 9.941876448266576e-07, "loss": 0.8102, "step": 4438 }, { "epoch": 0.07716108397503868, "grad_norm": 1.3968221132469798, "learning_rate": 9.941833643918838e-07, "loss": 0.471, "step": 4439 }, { "epoch": 0.0771784665125415, "grad_norm": 1.6204910435414863, "learning_rate": 9.941790823907753e-07, "loss": 0.4602, "step": 4440 }, { "epoch": 0.07719584905004433, "grad_norm": 1.3842617304553462, "learning_rate": 9.941747988233465e-07, "loss": 0.3262, "step": 4441 }, { "epoch": 0.07721323158754716, "grad_norm": 3.042420607738469, "learning_rate": 9.941705136896104e-07, "loss": 0.4308, "step": 4442 }, { "epoch": 0.07723061412504997, "grad_norm": 2.1499479154594066, "learning_rate": 9.94166226989581e-07, "loss": 0.4498, "step": 4443 }, { "epoch": 0.0772479966625528, "grad_norm": 1.6784591780180058, "learning_rate": 9.941619387232714e-07, "loss": 0.4276, "step": 4444 }, { "epoch": 0.07726537920005562, "grad_norm": 1.2235034884042415, "learning_rate": 9.941576488906956e-07, "loss": 0.4336, "step": 4445 }, { "epoch": 0.07728276173755845, "grad_norm": 4.240249367131289, "learning_rate": 9.94153357491867e-07, "loss": 0.4945, "step": 4446 }, { "epoch": 0.07730014427506127, "grad_norm": 2.4230435725815327, "learning_rate": 9.941490645267993e-07, "loss": 0.2787, "step": 4447 }, { "epoch": 0.0773175268125641, "grad_norm": 1.9311057314703342, "learning_rate": 9.941447699955062e-07, "loss": 0.5107, "step": 4448 }, { "epoch": 0.07733490935006693, "grad_norm": 1.7236240444631705, "learning_rate": 9.941404738980007e-07, "loss": 0.513, "step": 4449 }, { "epoch": 0.07735229188756974, "grad_norm": 3.2928718598739155, "learning_rate": 9.941361762342974e-07, "loss": 0.5131, "step": 4450 }, { "epoch": 0.07736967442507257, "grad_norm": 2.08629018309746, "learning_rate": 9.94131877004409e-07, "loss": 0.6329, "step": 4451 }, { "epoch": 0.07738705696257539, "grad_norm": 6.459827362713921, "learning_rate": 9.941275762083498e-07, "loss": 0.672, "step": 4452 }, { "epoch": 0.07740443950007822, "grad_norm": 1.8699470233130422, "learning_rate": 9.941232738461332e-07, "loss": 0.4429, "step": 4453 }, { "epoch": 0.07742182203758105, "grad_norm": 1.741559706746324, "learning_rate": 9.941189699177727e-07, "loss": 0.7341, "step": 4454 }, { "epoch": 0.07743920457508387, "grad_norm": 1.7077484760496537, "learning_rate": 9.94114664423282e-07, "loss": 0.5156, "step": 4455 }, { "epoch": 0.0774565871125867, "grad_norm": 1.9189257103477793, "learning_rate": 9.941103573626749e-07, "loss": 0.7353, "step": 4456 }, { "epoch": 0.07747396965008951, "grad_norm": 2.9191002953531213, "learning_rate": 9.941060487359648e-07, "loss": 0.6952, "step": 4457 }, { "epoch": 0.07749135218759234, "grad_norm": 2.0362065279884214, "learning_rate": 9.941017385431657e-07, "loss": 0.6874, "step": 4458 }, { "epoch": 0.07750873472509517, "grad_norm": 2.1074788669007787, "learning_rate": 9.940974267842908e-07, "loss": 0.551, "step": 4459 }, { "epoch": 0.07752611726259799, "grad_norm": 1.6285364325816274, "learning_rate": 9.940931134593543e-07, "loss": 0.2548, "step": 4460 }, { "epoch": 0.07754349980010082, "grad_norm": 1.9112298990327836, "learning_rate": 9.940887985683694e-07, "loss": 0.349, "step": 4461 }, { "epoch": 0.07756088233760364, "grad_norm": 2.2444445272313915, "learning_rate": 9.9408448211135e-07, "loss": 0.5763, "step": 4462 }, { "epoch": 0.07757826487510647, "grad_norm": 2.342074317259741, "learning_rate": 9.9408016408831e-07, "loss": 0.6761, "step": 4463 }, { "epoch": 0.0775956474126093, "grad_norm": 2.507839664910299, "learning_rate": 9.940758444992626e-07, "loss": 0.3188, "step": 4464 }, { "epoch": 0.07761302995011211, "grad_norm": 2.177287379566176, "learning_rate": 9.940715233442216e-07, "loss": 0.5822, "step": 4465 }, { "epoch": 0.07763041248761494, "grad_norm": 4.686687238620129, "learning_rate": 9.940672006232009e-07, "loss": 0.2694, "step": 4466 }, { "epoch": 0.07764779502511776, "grad_norm": 2.241345144141494, "learning_rate": 9.940628763362144e-07, "loss": 0.3924, "step": 4467 }, { "epoch": 0.07766517756262059, "grad_norm": 2.1187182449174813, "learning_rate": 9.940585504832752e-07, "loss": 0.6176, "step": 4468 }, { "epoch": 0.07768256010012342, "grad_norm": 1.407110002071893, "learning_rate": 9.940542230643976e-07, "loss": 0.6893, "step": 4469 }, { "epoch": 0.07769994263762624, "grad_norm": 1.446809516006323, "learning_rate": 9.94049894079595e-07, "loss": 0.6358, "step": 4470 }, { "epoch": 0.07771732517512907, "grad_norm": 2.232071663123264, "learning_rate": 9.940455635288812e-07, "loss": 0.7067, "step": 4471 }, { "epoch": 0.07773470771263188, "grad_norm": 1.9596641841994387, "learning_rate": 9.940412314122699e-07, "loss": 0.4094, "step": 4472 }, { "epoch": 0.07775209025013471, "grad_norm": 2.4353608998338205, "learning_rate": 9.940368977297746e-07, "loss": 0.4238, "step": 4473 }, { "epoch": 0.07776947278763754, "grad_norm": 1.33477812352354, "learning_rate": 9.940325624814096e-07, "loss": 0.3987, "step": 4474 }, { "epoch": 0.07778685532514036, "grad_norm": 3.803511332902124, "learning_rate": 9.94028225667188e-07, "loss": 0.6643, "step": 4475 }, { "epoch": 0.07780423786264319, "grad_norm": 1.6999336670700314, "learning_rate": 9.940238872871243e-07, "loss": 0.3439, "step": 4476 }, { "epoch": 0.07782162040014601, "grad_norm": 1.6631584440647773, "learning_rate": 9.940195473412317e-07, "loss": 0.3281, "step": 4477 }, { "epoch": 0.07783900293764884, "grad_norm": 1.2191476242773098, "learning_rate": 9.940152058295238e-07, "loss": 0.3298, "step": 4478 }, { "epoch": 0.07785638547515167, "grad_norm": 2.189645909429803, "learning_rate": 9.940108627520146e-07, "loss": 0.2876, "step": 4479 }, { "epoch": 0.07787376801265448, "grad_norm": 2.6755352739009304, "learning_rate": 9.940065181087182e-07, "loss": 0.8136, "step": 4480 }, { "epoch": 0.07789115055015731, "grad_norm": 1.4640460394729453, "learning_rate": 9.94002171899648e-07, "loss": 0.6264, "step": 4481 }, { "epoch": 0.07790853308766013, "grad_norm": 2.632876375087732, "learning_rate": 9.939978241248178e-07, "loss": 0.2457, "step": 4482 }, { "epoch": 0.07792591562516296, "grad_norm": 1.8471652796269418, "learning_rate": 9.939934747842413e-07, "loss": 0.5314, "step": 4483 }, { "epoch": 0.07794329816266579, "grad_norm": 1.6730017037980247, "learning_rate": 9.939891238779325e-07, "loss": 0.3851, "step": 4484 }, { "epoch": 0.07796068070016861, "grad_norm": 2.0050021565559195, "learning_rate": 9.939847714059052e-07, "loss": 0.7834, "step": 4485 }, { "epoch": 0.07797806323767144, "grad_norm": 2.310247866331882, "learning_rate": 9.93980417368173e-07, "loss": 0.6354, "step": 4486 }, { "epoch": 0.07799544577517425, "grad_norm": 1.949122494091298, "learning_rate": 9.939760617647496e-07, "loss": 0.4449, "step": 4487 }, { "epoch": 0.07801282831267708, "grad_norm": 1.4480523142232844, "learning_rate": 9.939717045956492e-07, "loss": 0.5248, "step": 4488 }, { "epoch": 0.07803021085017992, "grad_norm": 1.3052403834715898, "learning_rate": 9.939673458608855e-07, "loss": 0.3256, "step": 4489 }, { "epoch": 0.07804759338768273, "grad_norm": 2.553667671307501, "learning_rate": 9.939629855604722e-07, "loss": 0.5157, "step": 4490 }, { "epoch": 0.07806497592518556, "grad_norm": 1.8948542491275562, "learning_rate": 9.93958623694423e-07, "loss": 0.561, "step": 4491 }, { "epoch": 0.07808235846268838, "grad_norm": 2.1204753152431137, "learning_rate": 9.93954260262752e-07, "loss": 0.3434, "step": 4492 }, { "epoch": 0.07809974100019121, "grad_norm": 2.208359919431266, "learning_rate": 9.939498952654728e-07, "loss": 0.2791, "step": 4493 }, { "epoch": 0.07811712353769404, "grad_norm": 2.0348834644899605, "learning_rate": 9.939455287025993e-07, "loss": 0.5471, "step": 4494 }, { "epoch": 0.07813450607519686, "grad_norm": 5.750595213010114, "learning_rate": 9.939411605741456e-07, "loss": 1.3259, "step": 4495 }, { "epoch": 0.07815188861269969, "grad_norm": 1.7623153939735707, "learning_rate": 9.93936790880125e-07, "loss": 0.4801, "step": 4496 }, { "epoch": 0.0781692711502025, "grad_norm": 1.5488754886058662, "learning_rate": 9.93932419620552e-07, "loss": 0.464, "step": 4497 }, { "epoch": 0.07818665368770533, "grad_norm": 2.6118076764502924, "learning_rate": 9.939280467954398e-07, "loss": 0.3657, "step": 4498 }, { "epoch": 0.07820403622520816, "grad_norm": 3.713032755554144, "learning_rate": 9.939236724048028e-07, "loss": 0.417, "step": 4499 }, { "epoch": 0.07822141876271098, "grad_norm": 3.1103337455941547, "learning_rate": 9.939192964486545e-07, "loss": 0.4572, "step": 4500 }, { "epoch": 0.07823880130021381, "grad_norm": 2.622814191955631, "learning_rate": 9.93914918927009e-07, "loss": 0.6289, "step": 4501 }, { "epoch": 0.07825618383771663, "grad_norm": 2.499394841699872, "learning_rate": 9.9391053983988e-07, "loss": 0.4976, "step": 4502 }, { "epoch": 0.07827356637521946, "grad_norm": 2.05323752380042, "learning_rate": 9.939061591872817e-07, "loss": 0.451, "step": 4503 }, { "epoch": 0.07829094891272229, "grad_norm": 3.319612270803425, "learning_rate": 9.939017769692276e-07, "loss": 0.3725, "step": 4504 }, { "epoch": 0.0783083314502251, "grad_norm": 2.599391466031554, "learning_rate": 9.938973931857318e-07, "loss": 0.4634, "step": 4505 }, { "epoch": 0.07832571398772793, "grad_norm": 2.388455117630817, "learning_rate": 9.938930078368083e-07, "loss": 0.5475, "step": 4506 }, { "epoch": 0.07834309652523075, "grad_norm": 3.4428292345412816, "learning_rate": 9.938886209224705e-07, "loss": 0.758, "step": 4507 }, { "epoch": 0.07836047906273358, "grad_norm": 2.068572843905815, "learning_rate": 9.938842324427328e-07, "loss": 0.5561, "step": 4508 }, { "epoch": 0.07837786160023641, "grad_norm": 1.3791420429428682, "learning_rate": 9.93879842397609e-07, "loss": 0.515, "step": 4509 }, { "epoch": 0.07839524413773923, "grad_norm": 2.6658788505364366, "learning_rate": 9.938754507871128e-07, "loss": 0.4817, "step": 4510 }, { "epoch": 0.07841262667524206, "grad_norm": 2.4204784019283005, "learning_rate": 9.938710576112584e-07, "loss": 0.5663, "step": 4511 }, { "epoch": 0.07843000921274487, "grad_norm": 3.5948784963921723, "learning_rate": 9.938666628700597e-07, "loss": 0.3604, "step": 4512 }, { "epoch": 0.0784473917502477, "grad_norm": 1.5399824035920775, "learning_rate": 9.938622665635304e-07, "loss": 0.4575, "step": 4513 }, { "epoch": 0.07846477428775053, "grad_norm": 1.7297577450716244, "learning_rate": 9.938578686916848e-07, "loss": 0.2884, "step": 4514 }, { "epoch": 0.07848215682525335, "grad_norm": 1.2898503836390804, "learning_rate": 9.938534692545364e-07, "loss": 0.4164, "step": 4515 }, { "epoch": 0.07849953936275618, "grad_norm": 2.9978102083280183, "learning_rate": 9.938490682520995e-07, "loss": 0.3927, "step": 4516 }, { "epoch": 0.078516921900259, "grad_norm": 1.6895610604484053, "learning_rate": 9.938446656843878e-07, "loss": 0.3134, "step": 4517 }, { "epoch": 0.07853430443776183, "grad_norm": 1.867559713538721, "learning_rate": 9.938402615514152e-07, "loss": 0.3668, "step": 4518 }, { "epoch": 0.07855168697526464, "grad_norm": 2.156711337157563, "learning_rate": 9.93835855853196e-07, "loss": 0.5822, "step": 4519 }, { "epoch": 0.07856906951276747, "grad_norm": 1.9648493452701161, "learning_rate": 9.93831448589744e-07, "loss": 0.3504, "step": 4520 }, { "epoch": 0.0785864520502703, "grad_norm": 1.485432130050762, "learning_rate": 9.938270397610733e-07, "loss": 0.2755, "step": 4521 }, { "epoch": 0.07860383458777312, "grad_norm": 2.0823952793552003, "learning_rate": 9.938226293671976e-07, "loss": 0.588, "step": 4522 }, { "epoch": 0.07862121712527595, "grad_norm": 1.4328539838567567, "learning_rate": 9.93818217408131e-07, "loss": 0.4882, "step": 4523 }, { "epoch": 0.07863859966277877, "grad_norm": 2.2792411499644185, "learning_rate": 9.938138038838875e-07, "loss": 0.3452, "step": 4524 }, { "epoch": 0.0786559822002816, "grad_norm": 3.1374723243581273, "learning_rate": 9.93809388794481e-07, "loss": 0.6331, "step": 4525 }, { "epoch": 0.07867336473778443, "grad_norm": 2.3770161638105614, "learning_rate": 9.938049721399258e-07, "loss": 0.4176, "step": 4526 }, { "epoch": 0.07869074727528724, "grad_norm": 2.0980900669940925, "learning_rate": 9.938005539202353e-07, "loss": 0.5495, "step": 4527 }, { "epoch": 0.07870812981279007, "grad_norm": 1.2308878795046778, "learning_rate": 9.937961341354241e-07, "loss": 0.3372, "step": 4528 }, { "epoch": 0.07872551235029289, "grad_norm": 2.3218445690855365, "learning_rate": 9.93791712785506e-07, "loss": 0.617, "step": 4529 }, { "epoch": 0.07874289488779572, "grad_norm": 2.192331352088995, "learning_rate": 9.937872898704952e-07, "loss": 0.5996, "step": 4530 }, { "epoch": 0.07876027742529855, "grad_norm": 1.193065660833907, "learning_rate": 9.937828653904052e-07, "loss": 0.4703, "step": 4531 }, { "epoch": 0.07877765996280137, "grad_norm": 1.193819067406581, "learning_rate": 9.937784393452504e-07, "loss": 0.7393, "step": 4532 }, { "epoch": 0.0787950425003042, "grad_norm": 2.4165933482931847, "learning_rate": 9.937740117350448e-07, "loss": 0.2352, "step": 4533 }, { "epoch": 0.07881242503780701, "grad_norm": 3.6538745028394843, "learning_rate": 9.937695825598025e-07, "loss": 0.4106, "step": 4534 }, { "epoch": 0.07882980757530984, "grad_norm": 1.4127620434465953, "learning_rate": 9.937651518195374e-07, "loss": 0.3649, "step": 4535 }, { "epoch": 0.07884719011281267, "grad_norm": 2.0507195397750753, "learning_rate": 9.937607195142634e-07, "loss": 0.399, "step": 4536 }, { "epoch": 0.07886457265031549, "grad_norm": 2.148338368089992, "learning_rate": 9.93756285643995e-07, "loss": 0.4179, "step": 4537 }, { "epoch": 0.07888195518781832, "grad_norm": 2.0219237967012638, "learning_rate": 9.93751850208746e-07, "loss": 0.5773, "step": 4538 }, { "epoch": 0.07889933772532114, "grad_norm": 1.0824944452910295, "learning_rate": 9.937474132085302e-07, "loss": 0.2798, "step": 4539 }, { "epoch": 0.07891672026282397, "grad_norm": 1.9605413910977507, "learning_rate": 9.93742974643362e-07, "loss": 0.358, "step": 4540 }, { "epoch": 0.0789341028003268, "grad_norm": 1.9012377694406761, "learning_rate": 9.937385345132553e-07, "loss": 0.3386, "step": 4541 }, { "epoch": 0.07895148533782961, "grad_norm": 2.233942802900083, "learning_rate": 9.937340928182244e-07, "loss": 0.4785, "step": 4542 }, { "epoch": 0.07896886787533244, "grad_norm": 1.7760766512674275, "learning_rate": 9.937296495582832e-07, "loss": 0.4394, "step": 4543 }, { "epoch": 0.07898625041283526, "grad_norm": 2.3229533792534167, "learning_rate": 9.937252047334457e-07, "loss": 0.338, "step": 4544 }, { "epoch": 0.07900363295033809, "grad_norm": 1.7891033917088501, "learning_rate": 9.937207583437262e-07, "loss": 0.5317, "step": 4545 }, { "epoch": 0.07902101548784092, "grad_norm": 1.8794359417641366, "learning_rate": 9.937163103891388e-07, "loss": 0.4508, "step": 4546 }, { "epoch": 0.07903839802534374, "grad_norm": 1.567216181852416, "learning_rate": 9.937118608696974e-07, "loss": 0.3022, "step": 4547 }, { "epoch": 0.07905578056284657, "grad_norm": 1.8440400343866956, "learning_rate": 9.937074097854162e-07, "loss": 0.3736, "step": 4548 }, { "epoch": 0.07907316310034938, "grad_norm": 2.6838885213647647, "learning_rate": 9.937029571363091e-07, "loss": 0.5862, "step": 4549 }, { "epoch": 0.07909054563785221, "grad_norm": 1.5175193489613608, "learning_rate": 9.936985029223905e-07, "loss": 0.3821, "step": 4550 }, { "epoch": 0.07910792817535504, "grad_norm": 1.9650776229911784, "learning_rate": 9.936940471436746e-07, "loss": 0.3676, "step": 4551 }, { "epoch": 0.07912531071285786, "grad_norm": 1.6080175977541842, "learning_rate": 9.936895898001754e-07, "loss": 0.5753, "step": 4552 }, { "epoch": 0.07914269325036069, "grad_norm": 2.9304881276915173, "learning_rate": 9.936851308919068e-07, "loss": 0.3374, "step": 4553 }, { "epoch": 0.07916007578786351, "grad_norm": 2.6937162983012324, "learning_rate": 9.93680670418883e-07, "loss": 0.4818, "step": 4554 }, { "epoch": 0.07917745832536634, "grad_norm": 1.9232357920230314, "learning_rate": 9.936762083811184e-07, "loss": 0.5875, "step": 4555 }, { "epoch": 0.07919484086286917, "grad_norm": 1.787025420147465, "learning_rate": 9.93671744778627e-07, "loss": 0.3767, "step": 4556 }, { "epoch": 0.07921222340037198, "grad_norm": 1.6618188703112213, "learning_rate": 9.936672796114232e-07, "loss": 0.4026, "step": 4557 }, { "epoch": 0.07922960593787481, "grad_norm": 2.248102994260279, "learning_rate": 9.936628128795206e-07, "loss": 0.5697, "step": 4558 }, { "epoch": 0.07924698847537763, "grad_norm": 2.099462192708191, "learning_rate": 9.936583445829338e-07, "loss": 0.3175, "step": 4559 }, { "epoch": 0.07926437101288046, "grad_norm": 2.3905681988277885, "learning_rate": 9.936538747216768e-07, "loss": 0.3287, "step": 4560 }, { "epoch": 0.07928175355038329, "grad_norm": 3.554758111975576, "learning_rate": 9.936494032957636e-07, "loss": 0.527, "step": 4561 }, { "epoch": 0.07929913608788611, "grad_norm": 1.8893241510986323, "learning_rate": 9.936449303052087e-07, "loss": 0.5442, "step": 4562 }, { "epoch": 0.07931651862538894, "grad_norm": 2.2316669423380224, "learning_rate": 9.936404557500259e-07, "loss": 0.4352, "step": 4563 }, { "epoch": 0.07933390116289175, "grad_norm": 2.351436126347114, "learning_rate": 9.936359796302299e-07, "loss": 0.1456, "step": 4564 }, { "epoch": 0.07935128370039458, "grad_norm": 5.366842286950952, "learning_rate": 9.936315019458346e-07, "loss": 0.8927, "step": 4565 }, { "epoch": 0.07936866623789741, "grad_norm": 2.2624179008475425, "learning_rate": 9.93627022696854e-07, "loss": 0.5145, "step": 4566 }, { "epoch": 0.07938604877540023, "grad_norm": 2.5396287188068905, "learning_rate": 9.936225418833027e-07, "loss": 0.5177, "step": 4567 }, { "epoch": 0.07940343131290306, "grad_norm": 2.1628796611358845, "learning_rate": 9.936180595051947e-07, "loss": 0.4918, "step": 4568 }, { "epoch": 0.07942081385040588, "grad_norm": 3.2932334220896466, "learning_rate": 9.93613575562544e-07, "loss": 0.7768, "step": 4569 }, { "epoch": 0.07943819638790871, "grad_norm": 3.5219731438340345, "learning_rate": 9.93609090055365e-07, "loss": 0.4182, "step": 4570 }, { "epoch": 0.07945557892541154, "grad_norm": 2.6023929014226486, "learning_rate": 9.936046029836721e-07, "loss": 0.6226, "step": 4571 }, { "epoch": 0.07947296146291435, "grad_norm": 3.23549401759254, "learning_rate": 9.936001143474793e-07, "loss": 0.2835, "step": 4572 }, { "epoch": 0.07949034400041718, "grad_norm": 1.4779698681150322, "learning_rate": 9.935956241468009e-07, "loss": 0.512, "step": 4573 }, { "epoch": 0.07950772653792, "grad_norm": 3.4539838963282765, "learning_rate": 9.93591132381651e-07, "loss": 0.532, "step": 4574 }, { "epoch": 0.07952510907542283, "grad_norm": 1.41660631389643, "learning_rate": 9.93586639052044e-07, "loss": 0.3983, "step": 4575 }, { "epoch": 0.07954249161292566, "grad_norm": 1.3649944702057941, "learning_rate": 9.935821441579942e-07, "loss": 0.6004, "step": 4576 }, { "epoch": 0.07955987415042848, "grad_norm": 1.6855434386055168, "learning_rate": 9.935776476995156e-07, "loss": 0.5439, "step": 4577 }, { "epoch": 0.07957725668793131, "grad_norm": 1.36852753641597, "learning_rate": 9.935731496766226e-07, "loss": 0.7082, "step": 4578 }, { "epoch": 0.07959463922543412, "grad_norm": 2.0249299515698285, "learning_rate": 9.935686500893297e-07, "loss": 0.5251, "step": 4579 }, { "epoch": 0.07961202176293695, "grad_norm": 2.8802758571480145, "learning_rate": 9.935641489376505e-07, "loss": 0.3501, "step": 4580 }, { "epoch": 0.07962940430043979, "grad_norm": 5.233979158830694, "learning_rate": 9.935596462216e-07, "loss": 0.3303, "step": 4581 }, { "epoch": 0.0796467868379426, "grad_norm": 2.270763618458081, "learning_rate": 9.93555141941192e-07, "loss": 0.3674, "step": 4582 }, { "epoch": 0.07966416937544543, "grad_norm": 2.278669834207587, "learning_rate": 9.93550636096441e-07, "loss": 0.4521, "step": 4583 }, { "epoch": 0.07968155191294825, "grad_norm": 2.9924227978682927, "learning_rate": 9.93546128687361e-07, "loss": 0.8529, "step": 4584 }, { "epoch": 0.07969893445045108, "grad_norm": 1.5414454787805256, "learning_rate": 9.935416197139666e-07, "loss": 0.4543, "step": 4585 }, { "epoch": 0.0797163169879539, "grad_norm": 2.2053514277422996, "learning_rate": 9.935371091762721e-07, "loss": 0.292, "step": 4586 }, { "epoch": 0.07973369952545672, "grad_norm": 4.265807278672841, "learning_rate": 9.935325970742916e-07, "loss": 0.5634, "step": 4587 }, { "epoch": 0.07975108206295956, "grad_norm": 1.7297314271225992, "learning_rate": 9.935280834080395e-07, "loss": 0.3091, "step": 4588 }, { "epoch": 0.07976846460046237, "grad_norm": 1.5592404820211672, "learning_rate": 9.935235681775299e-07, "loss": 0.4182, "step": 4589 }, { "epoch": 0.0797858471379652, "grad_norm": 3.4576219452036394, "learning_rate": 9.935190513827774e-07, "loss": 0.5228, "step": 4590 }, { "epoch": 0.07980322967546802, "grad_norm": 2.3093673972897846, "learning_rate": 9.935145330237963e-07, "loss": 0.6168, "step": 4591 }, { "epoch": 0.07982061221297085, "grad_norm": 3.4092983153263554, "learning_rate": 9.935100131006007e-07, "loss": 0.7336, "step": 4592 }, { "epoch": 0.07983799475047368, "grad_norm": 2.08968620230278, "learning_rate": 9.935054916132053e-07, "loss": 0.5166, "step": 4593 }, { "epoch": 0.0798553772879765, "grad_norm": 2.0488297557141517, "learning_rate": 9.93500968561624e-07, "loss": 0.4608, "step": 4594 }, { "epoch": 0.07987275982547933, "grad_norm": 2.143447886884164, "learning_rate": 9.934964439458713e-07, "loss": 0.408, "step": 4595 }, { "epoch": 0.07989014236298214, "grad_norm": 2.3096903073230735, "learning_rate": 9.934919177659616e-07, "loss": 0.6707, "step": 4596 }, { "epoch": 0.07990752490048497, "grad_norm": 2.27066504307411, "learning_rate": 9.934873900219092e-07, "loss": 0.4611, "step": 4597 }, { "epoch": 0.0799249074379878, "grad_norm": 1.7982872737664186, "learning_rate": 9.934828607137286e-07, "loss": 0.2309, "step": 4598 }, { "epoch": 0.07994228997549062, "grad_norm": 2.7375856355975063, "learning_rate": 9.934783298414338e-07, "loss": 0.4921, "step": 4599 }, { "epoch": 0.07995967251299345, "grad_norm": 1.2609198196098892, "learning_rate": 9.934737974050395e-07, "loss": 0.3025, "step": 4600 }, { "epoch": 0.07997705505049627, "grad_norm": 2.4634723448652975, "learning_rate": 9.9346926340456e-07, "loss": 0.5208, "step": 4601 }, { "epoch": 0.0799944375879991, "grad_norm": 1.7432101373839046, "learning_rate": 9.934647278400095e-07, "loss": 0.5762, "step": 4602 }, { "epoch": 0.08001182012550193, "grad_norm": 3.455923655935039, "learning_rate": 9.934601907114025e-07, "loss": 0.4721, "step": 4603 }, { "epoch": 0.08002920266300474, "grad_norm": 4.05016678565666, "learning_rate": 9.934556520187535e-07, "loss": 0.4555, "step": 4604 }, { "epoch": 0.08004658520050757, "grad_norm": 1.5261004599563193, "learning_rate": 9.934511117620767e-07, "loss": 0.3423, "step": 4605 }, { "epoch": 0.08006396773801039, "grad_norm": 3.1961367577282087, "learning_rate": 9.934465699413866e-07, "loss": 0.4808, "step": 4606 }, { "epoch": 0.08008135027551322, "grad_norm": 1.314329823437798, "learning_rate": 9.934420265566974e-07, "loss": 0.3413, "step": 4607 }, { "epoch": 0.08009873281301605, "grad_norm": 2.2532315946184815, "learning_rate": 9.934374816080238e-07, "loss": 0.4298, "step": 4608 }, { "epoch": 0.08011611535051887, "grad_norm": 1.6014336107866398, "learning_rate": 9.934329350953797e-07, "loss": 0.6806, "step": 4609 }, { "epoch": 0.0801334978880217, "grad_norm": 2.379175117384944, "learning_rate": 9.934283870187803e-07, "loss": 0.3056, "step": 4610 }, { "epoch": 0.08015088042552451, "grad_norm": 1.9259603753316241, "learning_rate": 9.934238373782392e-07, "loss": 0.3238, "step": 4611 }, { "epoch": 0.08016826296302734, "grad_norm": 1.7425275404629366, "learning_rate": 9.934192861737715e-07, "loss": 0.5291, "step": 4612 }, { "epoch": 0.08018564550053017, "grad_norm": 2.256897225506501, "learning_rate": 9.934147334053913e-07, "loss": 0.6538, "step": 4613 }, { "epoch": 0.08020302803803299, "grad_norm": 2.1505908456192295, "learning_rate": 9.934101790731128e-07, "loss": 0.649, "step": 4614 }, { "epoch": 0.08022041057553582, "grad_norm": 2.316111058268676, "learning_rate": 9.93405623176951e-07, "loss": 0.4046, "step": 4615 }, { "epoch": 0.08023779311303864, "grad_norm": 1.9498146476841995, "learning_rate": 9.934010657169198e-07, "loss": 0.3486, "step": 4616 }, { "epoch": 0.08025517565054147, "grad_norm": 1.1510810551209893, "learning_rate": 9.93396506693034e-07, "loss": 0.347, "step": 4617 }, { "epoch": 0.0802725581880443, "grad_norm": 3.5864433391207884, "learning_rate": 9.93391946105308e-07, "loss": 0.53, "step": 4618 }, { "epoch": 0.08028994072554711, "grad_norm": 1.589984646072736, "learning_rate": 9.933873839537559e-07, "loss": 0.381, "step": 4619 }, { "epoch": 0.08030732326304994, "grad_norm": 2.689798844429494, "learning_rate": 9.933828202383928e-07, "loss": 0.6242, "step": 4620 }, { "epoch": 0.08032470580055276, "grad_norm": 3.368085325082927, "learning_rate": 9.933782549592324e-07, "loss": 0.5676, "step": 4621 }, { "epoch": 0.08034208833805559, "grad_norm": 1.5784184029942678, "learning_rate": 9.933736881162897e-07, "loss": 0.4977, "step": 4622 }, { "epoch": 0.08035947087555842, "grad_norm": 2.269070605174812, "learning_rate": 9.933691197095792e-07, "loss": 0.4041, "step": 4623 }, { "epoch": 0.08037685341306124, "grad_norm": 1.675382647528554, "learning_rate": 9.933645497391152e-07, "loss": 0.6346, "step": 4624 }, { "epoch": 0.08039423595056407, "grad_norm": 2.8462265014166204, "learning_rate": 9.93359978204912e-07, "loss": 0.5786, "step": 4625 }, { "epoch": 0.08041161848806688, "grad_norm": 1.4007777835391326, "learning_rate": 9.933554051069845e-07, "loss": 0.4406, "step": 4626 }, { "epoch": 0.08042900102556971, "grad_norm": 2.069901558660154, "learning_rate": 9.933508304453469e-07, "loss": 0.4809, "step": 4627 }, { "epoch": 0.08044638356307254, "grad_norm": 1.5012976642688305, "learning_rate": 9.93346254220014e-07, "loss": 0.2769, "step": 4628 }, { "epoch": 0.08046376610057536, "grad_norm": 2.507361586569766, "learning_rate": 9.933416764309999e-07, "loss": 0.6373, "step": 4629 }, { "epoch": 0.08048114863807819, "grad_norm": 2.8099148316479234, "learning_rate": 9.933370970783193e-07, "loss": 0.2696, "step": 4630 }, { "epoch": 0.080498531175581, "grad_norm": 1.7499536624591232, "learning_rate": 9.933325161619867e-07, "loss": 0.4449, "step": 4631 }, { "epoch": 0.08051591371308384, "grad_norm": 2.1783425612370184, "learning_rate": 9.933279336820168e-07, "loss": 0.4413, "step": 4632 }, { "epoch": 0.08053329625058667, "grad_norm": 1.613927932092303, "learning_rate": 9.933233496384237e-07, "loss": 0.4116, "step": 4633 }, { "epoch": 0.08055067878808948, "grad_norm": 2.212277802269982, "learning_rate": 9.933187640312224e-07, "loss": 0.4098, "step": 4634 }, { "epoch": 0.08056806132559231, "grad_norm": 6.593454740424441, "learning_rate": 9.933141768604271e-07, "loss": 0.4152, "step": 4635 }, { "epoch": 0.08058544386309513, "grad_norm": 2.584700945583201, "learning_rate": 9.933095881260526e-07, "loss": 0.4211, "step": 4636 }, { "epoch": 0.08060282640059796, "grad_norm": 2.6271203752219114, "learning_rate": 9.933049978281133e-07, "loss": 0.5256, "step": 4637 }, { "epoch": 0.08062020893810079, "grad_norm": 2.3872908259360095, "learning_rate": 9.933004059666238e-07, "loss": 0.3796, "step": 4638 }, { "epoch": 0.0806375914756036, "grad_norm": 3.0621657762706396, "learning_rate": 9.932958125415987e-07, "loss": 0.3727, "step": 4639 }, { "epoch": 0.08065497401310644, "grad_norm": 1.6983929839048018, "learning_rate": 9.932912175530523e-07, "loss": 0.6874, "step": 4640 }, { "epoch": 0.08067235655060925, "grad_norm": 1.661664751061401, "learning_rate": 9.932866210009994e-07, "loss": 0.5409, "step": 4641 }, { "epoch": 0.08068973908811208, "grad_norm": 1.8005067616580697, "learning_rate": 9.932820228854545e-07, "loss": 0.3593, "step": 4642 }, { "epoch": 0.08070712162561491, "grad_norm": 1.123710883332001, "learning_rate": 9.932774232064321e-07, "loss": 0.1961, "step": 4643 }, { "epoch": 0.08072450416311773, "grad_norm": 1.7738169164601463, "learning_rate": 9.932728219639469e-07, "loss": 0.5201, "step": 4644 }, { "epoch": 0.08074188670062056, "grad_norm": 2.985733902073667, "learning_rate": 9.932682191580135e-07, "loss": 0.6248, "step": 4645 }, { "epoch": 0.08075926923812338, "grad_norm": 1.5253769317017876, "learning_rate": 9.932636147886464e-07, "loss": 0.5793, "step": 4646 }, { "epoch": 0.08077665177562621, "grad_norm": 1.1846501220305963, "learning_rate": 9.932590088558601e-07, "loss": 0.346, "step": 4647 }, { "epoch": 0.08079403431312904, "grad_norm": 2.5256994364987224, "learning_rate": 9.932544013596696e-07, "loss": 0.4101, "step": 4648 }, { "epoch": 0.08081141685063185, "grad_norm": 2.321730156529842, "learning_rate": 9.93249792300089e-07, "loss": 0.7285, "step": 4649 }, { "epoch": 0.08082879938813468, "grad_norm": 1.772139821579366, "learning_rate": 9.932451816771331e-07, "loss": 0.3123, "step": 4650 }, { "epoch": 0.0808461819256375, "grad_norm": 2.0678881864295575, "learning_rate": 9.932405694908168e-07, "loss": 0.3693, "step": 4651 }, { "epoch": 0.08086356446314033, "grad_norm": 3.470195103111331, "learning_rate": 9.932359557411544e-07, "loss": 0.4043, "step": 4652 }, { "epoch": 0.08088094700064316, "grad_norm": 1.9239126843454069, "learning_rate": 9.932313404281604e-07, "loss": 0.5788, "step": 4653 }, { "epoch": 0.08089832953814598, "grad_norm": 1.7279257184046266, "learning_rate": 9.9322672355185e-07, "loss": 0.5109, "step": 4654 }, { "epoch": 0.08091571207564881, "grad_norm": 2.8395208261386253, "learning_rate": 9.93222105112237e-07, "loss": 0.3261, "step": 4655 }, { "epoch": 0.08093309461315162, "grad_norm": 1.983604501205764, "learning_rate": 9.932174851093368e-07, "loss": 0.4005, "step": 4656 }, { "epoch": 0.08095047715065445, "grad_norm": 2.0144495825259474, "learning_rate": 9.932128635431637e-07, "loss": 0.4362, "step": 4657 }, { "epoch": 0.08096785968815727, "grad_norm": 3.0610584621580923, "learning_rate": 9.932082404137323e-07, "loss": 0.2321, "step": 4658 }, { "epoch": 0.0809852422256601, "grad_norm": 1.9771919512785652, "learning_rate": 9.932036157210575e-07, "loss": 0.3463, "step": 4659 }, { "epoch": 0.08100262476316293, "grad_norm": 3.2864481308263804, "learning_rate": 9.931989894651536e-07, "loss": 0.4625, "step": 4660 }, { "epoch": 0.08102000730066575, "grad_norm": 1.3192294420484019, "learning_rate": 9.931943616460357e-07, "loss": 0.621, "step": 4661 }, { "epoch": 0.08103738983816858, "grad_norm": 1.6322756442663613, "learning_rate": 9.931897322637179e-07, "loss": 0.311, "step": 4662 }, { "epoch": 0.0810547723756714, "grad_norm": 1.244087132250964, "learning_rate": 9.931851013182154e-07, "loss": 0.4067, "step": 4663 }, { "epoch": 0.08107215491317422, "grad_norm": 1.667754521631079, "learning_rate": 9.931804688095426e-07, "loss": 0.3561, "step": 4664 }, { "epoch": 0.08108953745067705, "grad_norm": 3.2366281219962114, "learning_rate": 9.931758347377143e-07, "loss": 0.4449, "step": 4665 }, { "epoch": 0.08110691998817987, "grad_norm": 1.4494744037322622, "learning_rate": 9.931711991027452e-07, "loss": 0.3261, "step": 4666 }, { "epoch": 0.0811243025256827, "grad_norm": 3.355036220881183, "learning_rate": 9.9316656190465e-07, "loss": 0.738, "step": 4667 }, { "epoch": 0.08114168506318552, "grad_norm": 1.8090575678220648, "learning_rate": 9.931619231434433e-07, "loss": 0.5263, "step": 4668 }, { "epoch": 0.08115906760068835, "grad_norm": 4.496779101762325, "learning_rate": 9.931572828191396e-07, "loss": 0.4696, "step": 4669 }, { "epoch": 0.08117645013819118, "grad_norm": 2.271550389157077, "learning_rate": 9.93152640931754e-07, "loss": 0.2323, "step": 4670 }, { "epoch": 0.081193832675694, "grad_norm": 1.0861619130784688, "learning_rate": 9.931479974813013e-07, "loss": 0.2997, "step": 4671 }, { "epoch": 0.08121121521319682, "grad_norm": 2.26495317290563, "learning_rate": 9.931433524677958e-07, "loss": 0.2678, "step": 4672 }, { "epoch": 0.08122859775069964, "grad_norm": 2.5501994545852007, "learning_rate": 9.931387058912523e-07, "loss": 0.3086, "step": 4673 }, { "epoch": 0.08124598028820247, "grad_norm": 1.9383408447315345, "learning_rate": 9.931340577516859e-07, "loss": 0.2008, "step": 4674 }, { "epoch": 0.0812633628257053, "grad_norm": 3.4751334371805123, "learning_rate": 9.931294080491108e-07, "loss": 0.6864, "step": 4675 }, { "epoch": 0.08128074536320812, "grad_norm": 2.6539516955600355, "learning_rate": 9.93124756783542e-07, "loss": 0.4492, "step": 4676 }, { "epoch": 0.08129812790071095, "grad_norm": 2.0361761372678826, "learning_rate": 9.931201039549943e-07, "loss": 0.3102, "step": 4677 }, { "epoch": 0.08131551043821376, "grad_norm": 4.28564398026328, "learning_rate": 9.931154495634824e-07, "loss": 0.5213, "step": 4678 }, { "epoch": 0.0813328929757166, "grad_norm": 2.308481456984604, "learning_rate": 9.93110793609021e-07, "loss": 0.2644, "step": 4679 }, { "epoch": 0.08135027551321942, "grad_norm": 1.8638380802906995, "learning_rate": 9.931061360916251e-07, "loss": 0.4164, "step": 4680 }, { "epoch": 0.08136765805072224, "grad_norm": 1.4188321513066326, "learning_rate": 9.931014770113092e-07, "loss": 0.3049, "step": 4681 }, { "epoch": 0.08138504058822507, "grad_norm": 1.343755919402571, "learning_rate": 9.93096816368088e-07, "loss": 0.2406, "step": 4682 }, { "epoch": 0.08140242312572789, "grad_norm": 2.4516728637548524, "learning_rate": 9.930921541619766e-07, "loss": 0.4596, "step": 4683 }, { "epoch": 0.08141980566323072, "grad_norm": 2.1891796376506134, "learning_rate": 9.930874903929894e-07, "loss": 0.3834, "step": 4684 }, { "epoch": 0.08143718820073355, "grad_norm": 3.009654643199225, "learning_rate": 9.930828250611413e-07, "loss": 0.4599, "step": 4685 }, { "epoch": 0.08145457073823636, "grad_norm": 1.5634902777122606, "learning_rate": 9.930781581664473e-07, "loss": 0.3647, "step": 4686 }, { "epoch": 0.0814719532757392, "grad_norm": 1.5168187524333034, "learning_rate": 9.93073489708922e-07, "loss": 0.225, "step": 4687 }, { "epoch": 0.08148933581324201, "grad_norm": 1.3208011957180665, "learning_rate": 9.930688196885803e-07, "loss": 0.2958, "step": 4688 }, { "epoch": 0.08150671835074484, "grad_norm": 5.9077801639652945, "learning_rate": 9.930641481054368e-07, "loss": 0.5988, "step": 4689 }, { "epoch": 0.08152410088824767, "grad_norm": 2.068438144852394, "learning_rate": 9.930594749595065e-07, "loss": 0.2713, "step": 4690 }, { "epoch": 0.08154148342575049, "grad_norm": 1.8240278303476263, "learning_rate": 9.930548002508041e-07, "loss": 0.9493, "step": 4691 }, { "epoch": 0.08155886596325332, "grad_norm": 2.509003719461089, "learning_rate": 9.930501239793447e-07, "loss": 0.3713, "step": 4692 }, { "epoch": 0.08157624850075613, "grad_norm": 2.001276797288261, "learning_rate": 9.930454461451427e-07, "loss": 0.5292, "step": 4693 }, { "epoch": 0.08159363103825897, "grad_norm": 1.715779065371428, "learning_rate": 9.930407667482131e-07, "loss": 0.5028, "step": 4694 }, { "epoch": 0.0816110135757618, "grad_norm": 1.490996473830879, "learning_rate": 9.93036085788571e-07, "loss": 0.7776, "step": 4695 }, { "epoch": 0.08162839611326461, "grad_norm": 1.3022944942334966, "learning_rate": 9.930314032662307e-07, "loss": 0.4443, "step": 4696 }, { "epoch": 0.08164577865076744, "grad_norm": 1.8630384901352357, "learning_rate": 9.930267191812073e-07, "loss": 0.3141, "step": 4697 }, { "epoch": 0.08166316118827026, "grad_norm": 1.6480831514254397, "learning_rate": 9.93022033533516e-07, "loss": 0.503, "step": 4698 }, { "epoch": 0.08168054372577309, "grad_norm": 3.632823004840357, "learning_rate": 9.930173463231711e-07, "loss": 0.4563, "step": 4699 }, { "epoch": 0.08169792626327592, "grad_norm": 1.7888235286563818, "learning_rate": 9.930126575501878e-07, "loss": 0.759, "step": 4700 }, { "epoch": 0.08171530880077874, "grad_norm": 1.589743607897569, "learning_rate": 9.930079672145806e-07, "loss": 0.5843, "step": 4701 }, { "epoch": 0.08173269133828157, "grad_norm": 1.7743335683267898, "learning_rate": 9.930032753163649e-07, "loss": 0.4497, "step": 4702 }, { "epoch": 0.08175007387578438, "grad_norm": 1.8810396925225348, "learning_rate": 9.929985818555553e-07, "loss": 0.2634, "step": 4703 }, { "epoch": 0.08176745641328721, "grad_norm": 1.6957096138174028, "learning_rate": 9.929938868321664e-07, "loss": 0.3065, "step": 4704 }, { "epoch": 0.08178483895079004, "grad_norm": 2.554221737937042, "learning_rate": 9.929891902462134e-07, "loss": 0.4944, "step": 4705 }, { "epoch": 0.08180222148829286, "grad_norm": 1.847105575118785, "learning_rate": 9.929844920977113e-07, "loss": 0.2392, "step": 4706 }, { "epoch": 0.08181960402579569, "grad_norm": 9.126499319436443, "learning_rate": 9.929797923866748e-07, "loss": 0.6977, "step": 4707 }, { "epoch": 0.0818369865632985, "grad_norm": 2.016993400500283, "learning_rate": 9.929750911131187e-07, "loss": 0.5464, "step": 4708 }, { "epoch": 0.08185436910080134, "grad_norm": 2.7845215234339373, "learning_rate": 9.929703882770581e-07, "loss": 0.6283, "step": 4709 }, { "epoch": 0.08187175163830417, "grad_norm": 1.5435438591092867, "learning_rate": 9.929656838785076e-07, "loss": 0.5077, "step": 4710 }, { "epoch": 0.08188913417580698, "grad_norm": 2.7411661619419374, "learning_rate": 9.929609779174827e-07, "loss": 0.5055, "step": 4711 }, { "epoch": 0.08190651671330981, "grad_norm": 1.543660487545196, "learning_rate": 9.929562703939978e-07, "loss": 0.1881, "step": 4712 }, { "epoch": 0.08192389925081263, "grad_norm": 2.6984203651001843, "learning_rate": 9.929515613080678e-07, "loss": 0.5844, "step": 4713 }, { "epoch": 0.08194128178831546, "grad_norm": 1.7239890752027986, "learning_rate": 9.92946850659708e-07, "loss": 0.3862, "step": 4714 }, { "epoch": 0.08195866432581829, "grad_norm": 1.0405792731743395, "learning_rate": 9.92942138448933e-07, "loss": 0.2403, "step": 4715 }, { "epoch": 0.0819760468633211, "grad_norm": 1.7700468947988932, "learning_rate": 9.929374246757579e-07, "loss": 0.2965, "step": 4716 }, { "epoch": 0.08199342940082394, "grad_norm": 1.5219798005513872, "learning_rate": 9.929327093401977e-07, "loss": 0.4925, "step": 4717 }, { "epoch": 0.08201081193832675, "grad_norm": 3.4657174405668796, "learning_rate": 9.929279924422672e-07, "loss": 0.4116, "step": 4718 }, { "epoch": 0.08202819447582958, "grad_norm": 1.468515519989065, "learning_rate": 9.929232739819813e-07, "loss": 0.6157, "step": 4719 }, { "epoch": 0.08204557701333241, "grad_norm": 1.8911102905839439, "learning_rate": 9.92918553959355e-07, "loss": 0.5143, "step": 4720 }, { "epoch": 0.08206295955083523, "grad_norm": 1.705673709769985, "learning_rate": 9.929138323744034e-07, "loss": 0.3879, "step": 4721 }, { "epoch": 0.08208034208833806, "grad_norm": 2.0133688155458427, "learning_rate": 9.929091092271414e-07, "loss": 0.3879, "step": 4722 }, { "epoch": 0.08209772462584088, "grad_norm": 1.8941285864254835, "learning_rate": 9.92904384517584e-07, "loss": 0.5203, "step": 4723 }, { "epoch": 0.0821151071633437, "grad_norm": 2.951517979388008, "learning_rate": 9.928996582457462e-07, "loss": 0.5761, "step": 4724 }, { "epoch": 0.08213248970084652, "grad_norm": 1.1776808486661834, "learning_rate": 9.928949304116424e-07, "loss": 0.281, "step": 4725 }, { "epoch": 0.08214987223834935, "grad_norm": 1.4526177756469103, "learning_rate": 9.928902010152887e-07, "loss": 0.5247, "step": 4726 }, { "epoch": 0.08216725477585218, "grad_norm": 1.2618300013072534, "learning_rate": 9.928854700566992e-07, "loss": 0.4395, "step": 4727 }, { "epoch": 0.082184637313355, "grad_norm": 2.5367210496934183, "learning_rate": 9.928807375358891e-07, "loss": 0.4504, "step": 4728 }, { "epoch": 0.08220201985085783, "grad_norm": 2.1361958433420845, "learning_rate": 9.928760034528735e-07, "loss": 0.7671, "step": 4729 }, { "epoch": 0.08221940238836065, "grad_norm": 4.120112965990903, "learning_rate": 9.928712678076676e-07, "loss": 0.3319, "step": 4730 }, { "epoch": 0.08223678492586348, "grad_norm": 1.405599084643979, "learning_rate": 9.92866530600286e-07, "loss": 0.7256, "step": 4731 }, { "epoch": 0.0822541674633663, "grad_norm": 3.1521576053686053, "learning_rate": 9.928617918307441e-07, "loss": 0.5583, "step": 4732 }, { "epoch": 0.08227155000086912, "grad_norm": 3.5292357891363264, "learning_rate": 9.928570514990564e-07, "loss": 0.5859, "step": 4733 }, { "epoch": 0.08228893253837195, "grad_norm": 1.5122610102270377, "learning_rate": 9.928523096052385e-07, "loss": 0.5069, "step": 4734 }, { "epoch": 0.08230631507587477, "grad_norm": 2.1919232897416614, "learning_rate": 9.92847566149305e-07, "loss": 0.6409, "step": 4735 }, { "epoch": 0.0823236976133776, "grad_norm": 1.5735504518190215, "learning_rate": 9.928428211312713e-07, "loss": 0.9007, "step": 4736 }, { "epoch": 0.08234108015088043, "grad_norm": 1.6688918669295658, "learning_rate": 9.928380745511522e-07, "loss": 0.2746, "step": 4737 }, { "epoch": 0.08235846268838325, "grad_norm": 2.702798826815521, "learning_rate": 9.928333264089626e-07, "loss": 0.6498, "step": 4738 }, { "epoch": 0.08237584522588608, "grad_norm": 2.3474114266558077, "learning_rate": 9.92828576704718e-07, "loss": 0.4873, "step": 4739 }, { "epoch": 0.0823932277633889, "grad_norm": 1.9385444631789945, "learning_rate": 9.928238254384331e-07, "loss": 0.4055, "step": 4740 }, { "epoch": 0.08241061030089172, "grad_norm": 2.07760502896211, "learning_rate": 9.92819072610123e-07, "loss": 0.3134, "step": 4741 }, { "epoch": 0.08242799283839455, "grad_norm": 1.741514597748484, "learning_rate": 9.928143182198028e-07, "loss": 0.4423, "step": 4742 }, { "epoch": 0.08244537537589737, "grad_norm": 2.7414997467908293, "learning_rate": 9.928095622674877e-07, "loss": 0.4425, "step": 4743 }, { "epoch": 0.0824627579134002, "grad_norm": 1.8422711423678209, "learning_rate": 9.928048047531928e-07, "loss": 0.5725, "step": 4744 }, { "epoch": 0.08248014045090302, "grad_norm": 1.873581816102024, "learning_rate": 9.928000456769327e-07, "loss": 0.344, "step": 4745 }, { "epoch": 0.08249752298840585, "grad_norm": 2.420506812148079, "learning_rate": 9.92795285038723e-07, "loss": 0.526, "step": 4746 }, { "epoch": 0.08251490552590868, "grad_norm": 2.051414182249971, "learning_rate": 9.927905228385787e-07, "loss": 0.5367, "step": 4747 }, { "epoch": 0.0825322880634115, "grad_norm": 1.407682014280902, "learning_rate": 9.927857590765148e-07, "loss": 0.4174, "step": 4748 }, { "epoch": 0.08254967060091432, "grad_norm": 1.5357862013330774, "learning_rate": 9.927809937525462e-07, "loss": 0.2962, "step": 4749 }, { "epoch": 0.08256705313841714, "grad_norm": 1.5456044944167828, "learning_rate": 9.927762268666883e-07, "loss": 0.3871, "step": 4750 }, { "epoch": 0.08258443567591997, "grad_norm": 1.4019660404114593, "learning_rate": 9.927714584189563e-07, "loss": 0.3691, "step": 4751 }, { "epoch": 0.0826018182134228, "grad_norm": 1.5264292659654333, "learning_rate": 9.92766688409365e-07, "loss": 0.3342, "step": 4752 }, { "epoch": 0.08261920075092562, "grad_norm": 1.8048759288772338, "learning_rate": 9.927619168379296e-07, "loss": 0.5103, "step": 4753 }, { "epoch": 0.08263658328842845, "grad_norm": 3.290933362550281, "learning_rate": 9.927571437046653e-07, "loss": 0.4375, "step": 4754 }, { "epoch": 0.08265396582593126, "grad_norm": 3.95702091798488, "learning_rate": 9.927523690095873e-07, "loss": 0.5906, "step": 4755 }, { "epoch": 0.0826713483634341, "grad_norm": 2.9324696727282054, "learning_rate": 9.927475927527106e-07, "loss": 0.7627, "step": 4756 }, { "epoch": 0.08268873090093692, "grad_norm": 2.3928132335794006, "learning_rate": 9.927428149340505e-07, "loss": 0.3906, "step": 4757 }, { "epoch": 0.08270611343843974, "grad_norm": 1.7109090772112205, "learning_rate": 9.927380355536218e-07, "loss": 0.5917, "step": 4758 }, { "epoch": 0.08272349597594257, "grad_norm": 1.941660588165367, "learning_rate": 9.9273325461144e-07, "loss": 0.5802, "step": 4759 }, { "epoch": 0.08274087851344539, "grad_norm": 2.9676657609655983, "learning_rate": 9.927284721075202e-07, "loss": 0.496, "step": 4760 }, { "epoch": 0.08275826105094822, "grad_norm": 3.41439753141746, "learning_rate": 9.927236880418775e-07, "loss": 0.3107, "step": 4761 }, { "epoch": 0.08277564358845105, "grad_norm": 1.9933323017760642, "learning_rate": 9.92718902414527e-07, "loss": 0.3382, "step": 4762 }, { "epoch": 0.08279302612595386, "grad_norm": 2.312081682681594, "learning_rate": 9.927141152254837e-07, "loss": 0.4661, "step": 4763 }, { "epoch": 0.0828104086634567, "grad_norm": 1.9389838753506528, "learning_rate": 9.927093264747632e-07, "loss": 0.4559, "step": 4764 }, { "epoch": 0.08282779120095951, "grad_norm": 1.4678036709343192, "learning_rate": 9.927045361623802e-07, "loss": 0.4214, "step": 4765 }, { "epoch": 0.08284517373846234, "grad_norm": 2.6516703202667258, "learning_rate": 9.926997442883505e-07, "loss": 0.6492, "step": 4766 }, { "epoch": 0.08286255627596517, "grad_norm": 2.300097841489749, "learning_rate": 9.926949508526887e-07, "loss": 0.3483, "step": 4767 }, { "epoch": 0.08287993881346799, "grad_norm": 2.3804759097157184, "learning_rate": 9.926901558554103e-07, "loss": 0.5175, "step": 4768 }, { "epoch": 0.08289732135097082, "grad_norm": 2.4399822867719303, "learning_rate": 9.926853592965305e-07, "loss": 0.4623, "step": 4769 }, { "epoch": 0.08291470388847363, "grad_norm": 2.008320123829799, "learning_rate": 9.926805611760644e-07, "loss": 0.3897, "step": 4770 }, { "epoch": 0.08293208642597646, "grad_norm": 1.9226975528621668, "learning_rate": 9.926757614940274e-07, "loss": 0.6402, "step": 4771 }, { "epoch": 0.0829494689634793, "grad_norm": 1.3590422934729562, "learning_rate": 9.926709602504343e-07, "loss": 0.4889, "step": 4772 }, { "epoch": 0.08296685150098211, "grad_norm": 1.4274834852015788, "learning_rate": 9.926661574453007e-07, "loss": 0.495, "step": 4773 }, { "epoch": 0.08298423403848494, "grad_norm": 1.4272868364738125, "learning_rate": 9.926613530786416e-07, "loss": 0.3724, "step": 4774 }, { "epoch": 0.08300161657598776, "grad_norm": 2.268808566569639, "learning_rate": 9.926565471504725e-07, "loss": 0.392, "step": 4775 }, { "epoch": 0.08301899911349059, "grad_norm": 2.4719106598897653, "learning_rate": 9.926517396608083e-07, "loss": 0.2121, "step": 4776 }, { "epoch": 0.08303638165099342, "grad_norm": 2.196978529400147, "learning_rate": 9.926469306096643e-07, "loss": 0.3877, "step": 4777 }, { "epoch": 0.08305376418849623, "grad_norm": 1.504922375663954, "learning_rate": 9.92642119997056e-07, "loss": 0.5383, "step": 4778 }, { "epoch": 0.08307114672599906, "grad_norm": 1.5458113320110174, "learning_rate": 9.926373078229983e-07, "loss": 0.4418, "step": 4779 }, { "epoch": 0.08308852926350188, "grad_norm": 1.5884755429971404, "learning_rate": 9.926324940875067e-07, "loss": 0.3051, "step": 4780 }, { "epoch": 0.08310591180100471, "grad_norm": 2.196736799166442, "learning_rate": 9.926276787905967e-07, "loss": 0.4411, "step": 4781 }, { "epoch": 0.08312329433850754, "grad_norm": 2.3699717310402604, "learning_rate": 9.926228619322828e-07, "loss": 0.459, "step": 4782 }, { "epoch": 0.08314067687601036, "grad_norm": 1.7012718103119056, "learning_rate": 9.92618043512581e-07, "loss": 0.6513, "step": 4783 }, { "epoch": 0.08315805941351319, "grad_norm": 2.344044307467395, "learning_rate": 9.926132235315064e-07, "loss": 0.4712, "step": 4784 }, { "epoch": 0.083175441951016, "grad_norm": 2.2672235283251503, "learning_rate": 9.92608401989074e-07, "loss": 0.334, "step": 4785 }, { "epoch": 0.08319282448851884, "grad_norm": 2.371725575063467, "learning_rate": 9.92603578885299e-07, "loss": 0.5127, "step": 4786 }, { "epoch": 0.08321020702602167, "grad_norm": 4.6627161380733, "learning_rate": 9.925987542201972e-07, "loss": 0.3901, "step": 4787 }, { "epoch": 0.08322758956352448, "grad_norm": 2.2136748433407227, "learning_rate": 9.925939279937837e-07, "loss": 0.7859, "step": 4788 }, { "epoch": 0.08324497210102731, "grad_norm": 1.3897834557576307, "learning_rate": 9.925891002060737e-07, "loss": 0.2108, "step": 4789 }, { "epoch": 0.08326235463853013, "grad_norm": 2.374466921082846, "learning_rate": 9.925842708570825e-07, "loss": 0.5813, "step": 4790 }, { "epoch": 0.08327973717603296, "grad_norm": 2.203679288962128, "learning_rate": 9.925794399468252e-07, "loss": 0.3752, "step": 4791 }, { "epoch": 0.08329711971353579, "grad_norm": 3.6102000805000554, "learning_rate": 9.925746074753176e-07, "loss": 0.8662, "step": 4792 }, { "epoch": 0.0833145022510386, "grad_norm": 2.341117158745046, "learning_rate": 9.925697734425747e-07, "loss": 0.5409, "step": 4793 }, { "epoch": 0.08333188478854144, "grad_norm": 1.7478301584799207, "learning_rate": 9.92564937848612e-07, "loss": 0.5272, "step": 4794 }, { "epoch": 0.08334926732604425, "grad_norm": 2.2904408692437257, "learning_rate": 9.925601006934446e-07, "loss": 0.2819, "step": 4795 }, { "epoch": 0.08336664986354708, "grad_norm": 3.0255630016902653, "learning_rate": 9.92555261977088e-07, "loss": 0.5038, "step": 4796 }, { "epoch": 0.0833840324010499, "grad_norm": 2.492047012190934, "learning_rate": 9.925504216995574e-07, "loss": 0.4271, "step": 4797 }, { "epoch": 0.08340141493855273, "grad_norm": 1.8675571355177778, "learning_rate": 9.925455798608683e-07, "loss": 0.326, "step": 4798 }, { "epoch": 0.08341879747605556, "grad_norm": 1.7620828690999613, "learning_rate": 9.925407364610358e-07, "loss": 0.4002, "step": 4799 }, { "epoch": 0.08343618001355838, "grad_norm": 1.3755056450956844, "learning_rate": 9.925358915000756e-07, "loss": 0.3682, "step": 4800 }, { "epoch": 0.0834535625510612, "grad_norm": 1.398014596730504, "learning_rate": 9.925310449780029e-07, "loss": 0.4402, "step": 4801 }, { "epoch": 0.08347094508856402, "grad_norm": 3.3398933664251675, "learning_rate": 9.92526196894833e-07, "loss": 0.6883, "step": 4802 }, { "epoch": 0.08348832762606685, "grad_norm": 1.7488679635333033, "learning_rate": 9.925213472505812e-07, "loss": 0.5219, "step": 4803 }, { "epoch": 0.08350571016356968, "grad_norm": 1.0637163234291005, "learning_rate": 9.92516496045263e-07, "loss": 0.1916, "step": 4804 }, { "epoch": 0.0835230927010725, "grad_norm": 1.605092505709788, "learning_rate": 9.925116432788939e-07, "loss": 0.3748, "step": 4805 }, { "epoch": 0.08354047523857533, "grad_norm": 1.8247279123959772, "learning_rate": 9.92506788951489e-07, "loss": 0.4429, "step": 4806 }, { "epoch": 0.08355785777607815, "grad_norm": 1.2981680782021694, "learning_rate": 9.925019330630637e-07, "loss": 0.4341, "step": 4807 }, { "epoch": 0.08357524031358098, "grad_norm": 2.516145582351205, "learning_rate": 9.924970756136338e-07, "loss": 0.5634, "step": 4808 }, { "epoch": 0.0835926228510838, "grad_norm": 3.410465377749843, "learning_rate": 9.924922166032143e-07, "loss": 0.3566, "step": 4809 }, { "epoch": 0.08361000538858662, "grad_norm": 1.8532369833459492, "learning_rate": 9.924873560318206e-07, "loss": 0.3747, "step": 4810 }, { "epoch": 0.08362738792608945, "grad_norm": 1.2634052390683044, "learning_rate": 9.924824938994683e-07, "loss": 0.5114, "step": 4811 }, { "epoch": 0.08364477046359227, "grad_norm": 1.55626513570346, "learning_rate": 9.924776302061727e-07, "loss": 0.3866, "step": 4812 }, { "epoch": 0.0836621530010951, "grad_norm": 3.0575669947722437, "learning_rate": 9.924727649519491e-07, "loss": 0.5488, "step": 4813 }, { "epoch": 0.08367953553859793, "grad_norm": 3.6893664918203486, "learning_rate": 9.924678981368132e-07, "loss": 0.4978, "step": 4814 }, { "epoch": 0.08369691807610075, "grad_norm": 1.5543325650219484, "learning_rate": 9.924630297607803e-07, "loss": 0.4967, "step": 4815 }, { "epoch": 0.08371430061360358, "grad_norm": 3.063529150724028, "learning_rate": 9.92458159823866e-07, "loss": 0.5011, "step": 4816 }, { "epoch": 0.08373168315110639, "grad_norm": 1.7185493442905648, "learning_rate": 9.924532883260852e-07, "loss": 0.3206, "step": 4817 }, { "epoch": 0.08374906568860922, "grad_norm": 2.498142437971727, "learning_rate": 9.92448415267454e-07, "loss": 0.5846, "step": 4818 }, { "epoch": 0.08376644822611205, "grad_norm": 1.5125335266297126, "learning_rate": 9.924435406479872e-07, "loss": 0.4648, "step": 4819 }, { "epoch": 0.08378383076361487, "grad_norm": 1.5752400819609425, "learning_rate": 9.924386644677008e-07, "loss": 0.4645, "step": 4820 }, { "epoch": 0.0838012133011177, "grad_norm": 1.9263248824926498, "learning_rate": 9.9243378672661e-07, "loss": 0.3124, "step": 4821 }, { "epoch": 0.08381859583862052, "grad_norm": 1.7756363360254865, "learning_rate": 9.924289074247302e-07, "loss": 0.5756, "step": 4822 }, { "epoch": 0.08383597837612335, "grad_norm": 2.857530245623121, "learning_rate": 9.924240265620772e-07, "loss": 0.391, "step": 4823 }, { "epoch": 0.08385336091362618, "grad_norm": 2.2211639753040817, "learning_rate": 9.92419144138666e-07, "loss": 0.1801, "step": 4824 }, { "epoch": 0.08387074345112899, "grad_norm": 2.1032697637219737, "learning_rate": 9.924142601545124e-07, "loss": 0.6457, "step": 4825 }, { "epoch": 0.08388812598863182, "grad_norm": 2.0653207908362483, "learning_rate": 9.924093746096318e-07, "loss": 0.5783, "step": 4826 }, { "epoch": 0.08390550852613464, "grad_norm": 2.0260568725686823, "learning_rate": 9.924044875040398e-07, "loss": 0.5825, "step": 4827 }, { "epoch": 0.08392289106363747, "grad_norm": 1.417615379487425, "learning_rate": 9.923995988377516e-07, "loss": 0.5083, "step": 4828 }, { "epoch": 0.0839402736011403, "grad_norm": 2.7138296777851028, "learning_rate": 9.923947086107828e-07, "loss": 0.5215, "step": 4829 }, { "epoch": 0.08395765613864312, "grad_norm": 1.590467165706032, "learning_rate": 9.923898168231493e-07, "loss": 0.2214, "step": 4830 }, { "epoch": 0.08397503867614595, "grad_norm": 1.4053315425421162, "learning_rate": 9.92384923474866e-07, "loss": 0.4164, "step": 4831 }, { "epoch": 0.08399242121364876, "grad_norm": 1.190902846880303, "learning_rate": 9.923800285659487e-07, "loss": 0.7012, "step": 4832 }, { "epoch": 0.0840098037511516, "grad_norm": 1.396948271940074, "learning_rate": 9.92375132096413e-07, "loss": 0.7883, "step": 4833 }, { "epoch": 0.08402718628865442, "grad_norm": 1.9591423137595736, "learning_rate": 9.92370234066274e-07, "loss": 0.2486, "step": 4834 }, { "epoch": 0.08404456882615724, "grad_norm": 1.4765625860100964, "learning_rate": 9.923653344755479e-07, "loss": 0.4167, "step": 4835 }, { "epoch": 0.08406195136366007, "grad_norm": 1.832651103709643, "learning_rate": 9.923604333242497e-07, "loss": 0.4063, "step": 4836 }, { "epoch": 0.08407933390116289, "grad_norm": 3.273493560811966, "learning_rate": 9.923555306123951e-07, "loss": 0.2776, "step": 4837 }, { "epoch": 0.08409671643866572, "grad_norm": 2.06211998614391, "learning_rate": 9.923506263399997e-07, "loss": 0.3974, "step": 4838 }, { "epoch": 0.08411409897616855, "grad_norm": 2.352215189857191, "learning_rate": 9.923457205070789e-07, "loss": 0.5163, "step": 4839 }, { "epoch": 0.08413148151367136, "grad_norm": 1.3580932152227432, "learning_rate": 9.923408131136485e-07, "loss": 0.4548, "step": 4840 }, { "epoch": 0.0841488640511742, "grad_norm": 1.4678315314173391, "learning_rate": 9.923359041597235e-07, "loss": 0.1808, "step": 4841 }, { "epoch": 0.08416624658867701, "grad_norm": 1.804483610957404, "learning_rate": 9.923309936453202e-07, "loss": 0.5828, "step": 4842 }, { "epoch": 0.08418362912617984, "grad_norm": 2.2035652002814254, "learning_rate": 9.923260815704537e-07, "loss": 0.4886, "step": 4843 }, { "epoch": 0.08420101166368267, "grad_norm": 2.576335617495028, "learning_rate": 9.923211679351397e-07, "loss": 0.5837, "step": 4844 }, { "epoch": 0.08421839420118549, "grad_norm": 2.4210721625590272, "learning_rate": 9.923162527393936e-07, "loss": 0.6742, "step": 4845 }, { "epoch": 0.08423577673868832, "grad_norm": 1.6946590747552248, "learning_rate": 9.923113359832313e-07, "loss": 0.4279, "step": 4846 }, { "epoch": 0.08425315927619113, "grad_norm": 1.654430640321318, "learning_rate": 9.923064176666682e-07, "loss": 0.4758, "step": 4847 }, { "epoch": 0.08427054181369396, "grad_norm": 1.5925075375263855, "learning_rate": 9.9230149778972e-07, "loss": 0.7333, "step": 4848 }, { "epoch": 0.0842879243511968, "grad_norm": 1.9185233525824334, "learning_rate": 9.92296576352402e-07, "loss": 0.2244, "step": 4849 }, { "epoch": 0.08430530688869961, "grad_norm": 4.02589129955956, "learning_rate": 9.922916533547301e-07, "loss": 0.4496, "step": 4850 }, { "epoch": 0.08432268942620244, "grad_norm": 3.4597178940590876, "learning_rate": 9.922867287967198e-07, "loss": 0.6477, "step": 4851 }, { "epoch": 0.08434007196370526, "grad_norm": 2.1685739214017863, "learning_rate": 9.922818026783866e-07, "loss": 0.5984, "step": 4852 }, { "epoch": 0.08435745450120809, "grad_norm": 1.9670080583486818, "learning_rate": 9.922768749997464e-07, "loss": 0.2884, "step": 4853 }, { "epoch": 0.08437483703871092, "grad_norm": 2.4685818119453806, "learning_rate": 9.922719457608145e-07, "loss": 0.4287, "step": 4854 }, { "epoch": 0.08439221957621373, "grad_norm": 1.518534335701692, "learning_rate": 9.922670149616066e-07, "loss": 0.5973, "step": 4855 }, { "epoch": 0.08440960211371656, "grad_norm": 1.7414050090802118, "learning_rate": 9.922620826021385e-07, "loss": 0.5283, "step": 4856 }, { "epoch": 0.08442698465121938, "grad_norm": 1.8540374350686146, "learning_rate": 9.922571486824256e-07, "loss": 0.2936, "step": 4857 }, { "epoch": 0.08444436718872221, "grad_norm": 3.678018674696377, "learning_rate": 9.922522132024838e-07, "loss": 0.591, "step": 4858 }, { "epoch": 0.08446174972622504, "grad_norm": 3.1829454895385694, "learning_rate": 9.922472761623285e-07, "loss": 0.4145, "step": 4859 }, { "epoch": 0.08447913226372786, "grad_norm": 1.1998841095344515, "learning_rate": 9.922423375619758e-07, "loss": 0.3478, "step": 4860 }, { "epoch": 0.08449651480123069, "grad_norm": 1.3981553271875768, "learning_rate": 9.922373974014407e-07, "loss": 0.4478, "step": 4861 }, { "epoch": 0.0845138973387335, "grad_norm": 1.7753571329223186, "learning_rate": 9.922324556807393e-07, "loss": 0.5277, "step": 4862 }, { "epoch": 0.08453127987623633, "grad_norm": 4.616677375552377, "learning_rate": 9.922275123998869e-07, "loss": 0.3367, "step": 4863 }, { "epoch": 0.08454866241373916, "grad_norm": 2.41725643009155, "learning_rate": 9.922225675588995e-07, "loss": 0.4335, "step": 4864 }, { "epoch": 0.08456604495124198, "grad_norm": 2.2234296965725675, "learning_rate": 9.922176211577928e-07, "loss": 0.6215, "step": 4865 }, { "epoch": 0.08458342748874481, "grad_norm": 1.7478695096052606, "learning_rate": 9.922126731965823e-07, "loss": 0.208, "step": 4866 }, { "epoch": 0.08460081002624763, "grad_norm": 1.3736764372458456, "learning_rate": 9.922077236752836e-07, "loss": 0.3334, "step": 4867 }, { "epoch": 0.08461819256375046, "grad_norm": 2.3753932965631552, "learning_rate": 9.922027725939125e-07, "loss": 0.3517, "step": 4868 }, { "epoch": 0.08463557510125327, "grad_norm": 0.9892354413259129, "learning_rate": 9.92197819952485e-07, "loss": 0.2257, "step": 4869 }, { "epoch": 0.0846529576387561, "grad_norm": 1.7642856499241, "learning_rate": 9.921928657510163e-07, "loss": 0.4299, "step": 4870 }, { "epoch": 0.08467034017625893, "grad_norm": 1.502359353399299, "learning_rate": 9.921879099895222e-07, "loss": 0.3606, "step": 4871 }, { "epoch": 0.08468772271376175, "grad_norm": 1.679813979390882, "learning_rate": 9.921829526680187e-07, "loss": 0.5668, "step": 4872 }, { "epoch": 0.08470510525126458, "grad_norm": 1.8950298333316993, "learning_rate": 9.921779937865212e-07, "loss": 0.3241, "step": 4873 }, { "epoch": 0.0847224877887674, "grad_norm": 1.8750421562883437, "learning_rate": 9.921730333450458e-07, "loss": 0.2845, "step": 4874 }, { "epoch": 0.08473987032627023, "grad_norm": 1.810464188453334, "learning_rate": 9.921680713436077e-07, "loss": 0.5169, "step": 4875 }, { "epoch": 0.08475725286377306, "grad_norm": 2.680117947492362, "learning_rate": 9.92163107782223e-07, "loss": 0.9127, "step": 4876 }, { "epoch": 0.08477463540127587, "grad_norm": 3.979906724691941, "learning_rate": 9.921581426609074e-07, "loss": 0.4461, "step": 4877 }, { "epoch": 0.0847920179387787, "grad_norm": 2.0352766944417993, "learning_rate": 9.921531759796765e-07, "loss": 0.3699, "step": 4878 }, { "epoch": 0.08480940047628152, "grad_norm": 1.5762359642760844, "learning_rate": 9.92148207738546e-07, "loss": 0.4109, "step": 4879 }, { "epoch": 0.08482678301378435, "grad_norm": 1.7720214670821353, "learning_rate": 9.921432379375319e-07, "loss": 0.3583, "step": 4880 }, { "epoch": 0.08484416555128718, "grad_norm": 5.99671815730267, "learning_rate": 9.921382665766498e-07, "loss": 0.4286, "step": 4881 }, { "epoch": 0.08486154808879, "grad_norm": 4.705769324364261, "learning_rate": 9.921332936559154e-07, "loss": 0.2317, "step": 4882 }, { "epoch": 0.08487893062629283, "grad_norm": 1.9397403467954004, "learning_rate": 9.921283191753446e-07, "loss": 0.5638, "step": 4883 }, { "epoch": 0.08489631316379564, "grad_norm": 3.5451754158620994, "learning_rate": 9.92123343134953e-07, "loss": 0.4939, "step": 4884 }, { "epoch": 0.08491369570129847, "grad_norm": 1.7052635427448501, "learning_rate": 9.921183655347565e-07, "loss": 0.4116, "step": 4885 }, { "epoch": 0.0849310782388013, "grad_norm": 2.789526053359388, "learning_rate": 9.921133863747708e-07, "loss": 0.9091, "step": 4886 }, { "epoch": 0.08494846077630412, "grad_norm": 3.0808771323089257, "learning_rate": 9.92108405655012e-07, "loss": 0.5264, "step": 4887 }, { "epoch": 0.08496584331380695, "grad_norm": 2.42137479313957, "learning_rate": 9.921034233754952e-07, "loss": 0.4003, "step": 4888 }, { "epoch": 0.08498322585130977, "grad_norm": 1.7780400795306535, "learning_rate": 9.920984395362368e-07, "loss": 0.5232, "step": 4889 }, { "epoch": 0.0850006083888126, "grad_norm": 3.081369376776865, "learning_rate": 9.920934541372526e-07, "loss": 0.5066, "step": 4890 }, { "epoch": 0.08501799092631543, "grad_norm": 1.5094091093389834, "learning_rate": 9.92088467178558e-07, "loss": 0.3985, "step": 4891 }, { "epoch": 0.08503537346381825, "grad_norm": 1.9897255077730873, "learning_rate": 9.920834786601688e-07, "loss": 0.3751, "step": 4892 }, { "epoch": 0.08505275600132108, "grad_norm": 1.4025651323540809, "learning_rate": 9.920784885821013e-07, "loss": 0.4045, "step": 4893 }, { "epoch": 0.08507013853882389, "grad_norm": 1.7291355252995058, "learning_rate": 9.92073496944371e-07, "loss": 0.5164, "step": 4894 }, { "epoch": 0.08508752107632672, "grad_norm": 1.4545693041867296, "learning_rate": 9.920685037469937e-07, "loss": 0.5066, "step": 4895 }, { "epoch": 0.08510490361382955, "grad_norm": 2.156281283709262, "learning_rate": 9.920635089899852e-07, "loss": 0.296, "step": 4896 }, { "epoch": 0.08512228615133237, "grad_norm": 2.164699341987143, "learning_rate": 9.920585126733614e-07, "loss": 0.3419, "step": 4897 }, { "epoch": 0.0851396686888352, "grad_norm": 1.952602472038306, "learning_rate": 9.920535147971382e-07, "loss": 0.5094, "step": 4898 }, { "epoch": 0.08515705122633802, "grad_norm": 5.151020519222656, "learning_rate": 9.920485153613317e-07, "loss": 0.3459, "step": 4899 }, { "epoch": 0.08517443376384085, "grad_norm": 2.1075699414833897, "learning_rate": 9.92043514365957e-07, "loss": 0.5646, "step": 4900 }, { "epoch": 0.08519181630134368, "grad_norm": 2.1407685132311705, "learning_rate": 9.920385118110307e-07, "loss": 0.5351, "step": 4901 }, { "epoch": 0.08520919883884649, "grad_norm": 3.1211519026101158, "learning_rate": 9.92033507696568e-07, "loss": 0.4929, "step": 4902 }, { "epoch": 0.08522658137634932, "grad_norm": 1.963448762712541, "learning_rate": 9.920285020225853e-07, "loss": 0.6213, "step": 4903 }, { "epoch": 0.08524396391385214, "grad_norm": 2.2764684906092656, "learning_rate": 9.920234947890983e-07, "loss": 0.6056, "step": 4904 }, { "epoch": 0.08526134645135497, "grad_norm": 2.9521264836658294, "learning_rate": 9.920184859961228e-07, "loss": 0.581, "step": 4905 }, { "epoch": 0.0852787289888578, "grad_norm": 2.1224120125702024, "learning_rate": 9.920134756436748e-07, "loss": 0.4062, "step": 4906 }, { "epoch": 0.08529611152636062, "grad_norm": 1.3714587513839664, "learning_rate": 9.9200846373177e-07, "loss": 0.7122, "step": 4907 }, { "epoch": 0.08531349406386345, "grad_norm": 1.910379368870592, "learning_rate": 9.920034502604245e-07, "loss": 0.6425, "step": 4908 }, { "epoch": 0.08533087660136626, "grad_norm": 1.6259264230314014, "learning_rate": 9.919984352296538e-07, "loss": 0.6164, "step": 4909 }, { "epoch": 0.08534825913886909, "grad_norm": 2.2874108890173503, "learning_rate": 9.919934186394743e-07, "loss": 0.3006, "step": 4910 }, { "epoch": 0.08536564167637192, "grad_norm": 2.454673765574669, "learning_rate": 9.919884004899015e-07, "loss": 0.5095, "step": 4911 }, { "epoch": 0.08538302421387474, "grad_norm": 2.662865169334, "learning_rate": 9.919833807809517e-07, "loss": 0.5935, "step": 4912 }, { "epoch": 0.08540040675137757, "grad_norm": 1.760665206464084, "learning_rate": 9.919783595126404e-07, "loss": 0.3858, "step": 4913 }, { "epoch": 0.08541778928888039, "grad_norm": 2.838888770023046, "learning_rate": 9.919733366849837e-07, "loss": 0.4053, "step": 4914 }, { "epoch": 0.08543517182638322, "grad_norm": 1.5120555629201275, "learning_rate": 9.919683122979977e-07, "loss": 0.4332, "step": 4915 }, { "epoch": 0.08545255436388605, "grad_norm": 2.338974030380187, "learning_rate": 9.919632863516978e-07, "loss": 0.3243, "step": 4916 }, { "epoch": 0.08546993690138886, "grad_norm": 1.4634135070788512, "learning_rate": 9.919582588461006e-07, "loss": 0.4844, "step": 4917 }, { "epoch": 0.08548731943889169, "grad_norm": 1.8431550252002484, "learning_rate": 9.919532297812215e-07, "loss": 0.275, "step": 4918 }, { "epoch": 0.08550470197639451, "grad_norm": 3.4662153865387277, "learning_rate": 9.919481991570768e-07, "loss": 0.6482, "step": 4919 }, { "epoch": 0.08552208451389734, "grad_norm": 2.3500002824267416, "learning_rate": 9.919431669736822e-07, "loss": 0.5614, "step": 4920 }, { "epoch": 0.08553946705140017, "grad_norm": 2.669093003726799, "learning_rate": 9.919381332310538e-07, "loss": 0.7446, "step": 4921 }, { "epoch": 0.08555684958890299, "grad_norm": 2.0315952997961984, "learning_rate": 9.919330979292075e-07, "loss": 0.3423, "step": 4922 }, { "epoch": 0.08557423212640582, "grad_norm": 1.468604792904124, "learning_rate": 9.91928061068159e-07, "loss": 0.2471, "step": 4923 }, { "epoch": 0.08559161466390863, "grad_norm": 2.9460888623413073, "learning_rate": 9.91923022647925e-07, "loss": 0.4788, "step": 4924 }, { "epoch": 0.08560899720141146, "grad_norm": 1.9792003707307049, "learning_rate": 9.919179826685205e-07, "loss": 0.3979, "step": 4925 }, { "epoch": 0.0856263797389143, "grad_norm": 3.5672448630510387, "learning_rate": 9.919129411299622e-07, "loss": 0.2793, "step": 4926 }, { "epoch": 0.08564376227641711, "grad_norm": 2.188406794285191, "learning_rate": 9.919078980322659e-07, "loss": 0.5311, "step": 4927 }, { "epoch": 0.08566114481391994, "grad_norm": 1.8750894998571468, "learning_rate": 9.919028533754474e-07, "loss": 0.4331, "step": 4928 }, { "epoch": 0.08567852735142276, "grad_norm": 2.5730914340588082, "learning_rate": 9.91897807159523e-07, "loss": 0.4507, "step": 4929 }, { "epoch": 0.08569590988892559, "grad_norm": 2.4983327402282614, "learning_rate": 9.918927593845084e-07, "loss": 0.5243, "step": 4930 }, { "epoch": 0.08571329242642842, "grad_norm": 1.3716596995702879, "learning_rate": 9.918877100504195e-07, "loss": 0.3174, "step": 4931 }, { "epoch": 0.08573067496393123, "grad_norm": 1.7344640927142636, "learning_rate": 9.91882659157273e-07, "loss": 0.4919, "step": 4932 }, { "epoch": 0.08574805750143406, "grad_norm": 1.8033512737645545, "learning_rate": 9.918776067050838e-07, "loss": 0.3668, "step": 4933 }, { "epoch": 0.08576544003893688, "grad_norm": 1.734679007478086, "learning_rate": 9.91872552693869e-07, "loss": 0.5236, "step": 4934 }, { "epoch": 0.08578282257643971, "grad_norm": 2.410086206748528, "learning_rate": 9.91867497123644e-07, "loss": 0.2003, "step": 4935 }, { "epoch": 0.08580020511394253, "grad_norm": 2.20536444014592, "learning_rate": 9.91862439994425e-07, "loss": 0.5935, "step": 4936 }, { "epoch": 0.08581758765144536, "grad_norm": 2.6515210864537977, "learning_rate": 9.91857381306228e-07, "loss": 0.7862, "step": 4937 }, { "epoch": 0.08583497018894819, "grad_norm": 2.115926423334773, "learning_rate": 9.91852321059069e-07, "loss": 0.4233, "step": 4938 }, { "epoch": 0.085852352726451, "grad_norm": 1.9102687276325425, "learning_rate": 9.918472592529643e-07, "loss": 0.5321, "step": 4939 }, { "epoch": 0.08586973526395383, "grad_norm": 1.531430770218331, "learning_rate": 9.918421958879294e-07, "loss": 0.4228, "step": 4940 }, { "epoch": 0.08588711780145665, "grad_norm": 3.731575913332258, "learning_rate": 9.918371309639809e-07, "loss": 0.5506, "step": 4941 }, { "epoch": 0.08590450033895948, "grad_norm": 2.336504969646633, "learning_rate": 9.918320644811346e-07, "loss": 0.6221, "step": 4942 }, { "epoch": 0.08592188287646231, "grad_norm": 1.4650906834072486, "learning_rate": 9.918269964394065e-07, "loss": 0.2937, "step": 4943 }, { "epoch": 0.08593926541396513, "grad_norm": 1.7512952475984054, "learning_rate": 9.918219268388128e-07, "loss": 0.4023, "step": 4944 }, { "epoch": 0.08595664795146796, "grad_norm": 2.6011837734867873, "learning_rate": 9.918168556793694e-07, "loss": 0.4639, "step": 4945 }, { "epoch": 0.08597403048897077, "grad_norm": 1.9977599762894613, "learning_rate": 9.918117829610926e-07, "loss": 0.1729, "step": 4946 }, { "epoch": 0.0859914130264736, "grad_norm": 1.4209376254561945, "learning_rate": 9.918067086839983e-07, "loss": 0.5288, "step": 4947 }, { "epoch": 0.08600879556397643, "grad_norm": 1.807802767712258, "learning_rate": 9.918016328481027e-07, "loss": 0.4557, "step": 4948 }, { "epoch": 0.08602617810147925, "grad_norm": 2.698666482146885, "learning_rate": 9.91796555453422e-07, "loss": 0.9407, "step": 4949 }, { "epoch": 0.08604356063898208, "grad_norm": 2.458636430199119, "learning_rate": 9.91791476499972e-07, "loss": 0.5152, "step": 4950 }, { "epoch": 0.0860609431764849, "grad_norm": 0.9258902336285473, "learning_rate": 9.917863959877688e-07, "loss": 0.3101, "step": 4951 }, { "epoch": 0.08607832571398773, "grad_norm": 1.384127514185076, "learning_rate": 9.91781313916829e-07, "loss": 0.634, "step": 4952 }, { "epoch": 0.08609570825149056, "grad_norm": 1.608228073676122, "learning_rate": 9.91776230287168e-07, "loss": 0.1876, "step": 4953 }, { "epoch": 0.08611309078899337, "grad_norm": 2.2113527476000354, "learning_rate": 9.917711450988022e-07, "loss": 0.7836, "step": 4954 }, { "epoch": 0.0861304733264962, "grad_norm": 3.968161768682583, "learning_rate": 9.917660583517479e-07, "loss": 0.5653, "step": 4955 }, { "epoch": 0.08614785586399902, "grad_norm": 2.3038171536882834, "learning_rate": 9.917609700460213e-07, "loss": 0.5771, "step": 4956 }, { "epoch": 0.08616523840150185, "grad_norm": 2.575073749267762, "learning_rate": 9.91755880181638e-07, "loss": 0.4731, "step": 4957 }, { "epoch": 0.08618262093900468, "grad_norm": 2.3992630711670575, "learning_rate": 9.917507887586145e-07, "loss": 0.7222, "step": 4958 }, { "epoch": 0.0862000034765075, "grad_norm": 2.83165336343113, "learning_rate": 9.91745695776967e-07, "loss": 0.748, "step": 4959 }, { "epoch": 0.08621738601401033, "grad_norm": 1.5932698451443623, "learning_rate": 9.917406012367116e-07, "loss": 0.3126, "step": 4960 }, { "epoch": 0.08623476855151314, "grad_norm": 3.0098670521236546, "learning_rate": 9.917355051378643e-07, "loss": 1.1831, "step": 4961 }, { "epoch": 0.08625215108901597, "grad_norm": 1.6230928326597616, "learning_rate": 9.917304074804414e-07, "loss": 0.4619, "step": 4962 }, { "epoch": 0.0862695336265188, "grad_norm": 2.789670923522198, "learning_rate": 9.917253082644588e-07, "loss": 0.4541, "step": 4963 }, { "epoch": 0.08628691616402162, "grad_norm": 1.8510356027278683, "learning_rate": 9.917202074899329e-07, "loss": 0.3726, "step": 4964 }, { "epoch": 0.08630429870152445, "grad_norm": 2.0486053326276084, "learning_rate": 9.9171510515688e-07, "loss": 0.2801, "step": 4965 }, { "epoch": 0.08632168123902727, "grad_norm": 3.5349444248078594, "learning_rate": 9.917100012653158e-07, "loss": 0.4542, "step": 4966 }, { "epoch": 0.0863390637765301, "grad_norm": 2.755464337857787, "learning_rate": 9.91704895815257e-07, "loss": 0.4212, "step": 4967 }, { "epoch": 0.08635644631403293, "grad_norm": 2.553723036955946, "learning_rate": 9.916997888067195e-07, "loss": 0.3072, "step": 4968 }, { "epoch": 0.08637382885153574, "grad_norm": 4.131124360570584, "learning_rate": 9.916946802397196e-07, "loss": 0.3402, "step": 4969 }, { "epoch": 0.08639121138903857, "grad_norm": 1.4069828271421454, "learning_rate": 9.916895701142733e-07, "loss": 0.4885, "step": 4970 }, { "epoch": 0.08640859392654139, "grad_norm": 1.3970772842766046, "learning_rate": 9.91684458430397e-07, "loss": 0.7616, "step": 4971 }, { "epoch": 0.08642597646404422, "grad_norm": 2.8345168260983167, "learning_rate": 9.916793451881069e-07, "loss": 0.2917, "step": 4972 }, { "epoch": 0.08644335900154705, "grad_norm": 1.6215990888555116, "learning_rate": 9.91674230387419e-07, "loss": 0.3643, "step": 4973 }, { "epoch": 0.08646074153904987, "grad_norm": 1.7204192957799052, "learning_rate": 9.916691140283497e-07, "loss": 0.5288, "step": 4974 }, { "epoch": 0.0864781240765527, "grad_norm": 1.9109983282152292, "learning_rate": 9.916639961109152e-07, "loss": 0.6596, "step": 4975 }, { "epoch": 0.08649550661405551, "grad_norm": 1.1256564497111246, "learning_rate": 9.916588766351316e-07, "loss": 0.3355, "step": 4976 }, { "epoch": 0.08651288915155834, "grad_norm": 1.3812484814934363, "learning_rate": 9.916537556010152e-07, "loss": 0.4125, "step": 4977 }, { "epoch": 0.08653027168906118, "grad_norm": 3.577750066750874, "learning_rate": 9.916486330085824e-07, "loss": 0.7014, "step": 4978 }, { "epoch": 0.08654765422656399, "grad_norm": 2.6248924060734016, "learning_rate": 9.916435088578492e-07, "loss": 0.3338, "step": 4979 }, { "epoch": 0.08656503676406682, "grad_norm": 1.632904507329348, "learning_rate": 9.91638383148832e-07, "loss": 0.6068, "step": 4980 }, { "epoch": 0.08658241930156964, "grad_norm": 2.659285440857565, "learning_rate": 9.916332558815468e-07, "loss": 0.85, "step": 4981 }, { "epoch": 0.08659980183907247, "grad_norm": 1.5748224827930857, "learning_rate": 9.9162812705601e-07, "loss": 0.3128, "step": 4982 }, { "epoch": 0.0866171843765753, "grad_norm": 1.324254718269334, "learning_rate": 9.916229966722381e-07, "loss": 0.2562, "step": 4983 }, { "epoch": 0.08663456691407811, "grad_norm": 2.293445883460007, "learning_rate": 9.91617864730247e-07, "loss": 0.3394, "step": 4984 }, { "epoch": 0.08665194945158095, "grad_norm": 1.8087316195755845, "learning_rate": 9.91612731230053e-07, "loss": 0.3686, "step": 4985 }, { "epoch": 0.08666933198908376, "grad_norm": 1.9967638677067394, "learning_rate": 9.916075961716726e-07, "loss": 0.3938, "step": 4986 }, { "epoch": 0.08668671452658659, "grad_norm": 2.1310689067003508, "learning_rate": 9.916024595551218e-07, "loss": 0.4154, "step": 4987 }, { "epoch": 0.08670409706408942, "grad_norm": 1.6685104738271814, "learning_rate": 9.915973213804172e-07, "loss": 0.5894, "step": 4988 }, { "epoch": 0.08672147960159224, "grad_norm": 2.9291023533665377, "learning_rate": 9.915921816475748e-07, "loss": 0.6612, "step": 4989 }, { "epoch": 0.08673886213909507, "grad_norm": 1.6644363257666035, "learning_rate": 9.91587040356611e-07, "loss": 0.3249, "step": 4990 }, { "epoch": 0.08675624467659789, "grad_norm": 1.8823211202992645, "learning_rate": 9.915818975075422e-07, "loss": 0.4753, "step": 4991 }, { "epoch": 0.08677362721410072, "grad_norm": 1.7669980951372526, "learning_rate": 9.915767531003844e-07, "loss": 0.4738, "step": 4992 }, { "epoch": 0.08679100975160355, "grad_norm": 1.7878723984619254, "learning_rate": 9.915716071351543e-07, "loss": 0.6275, "step": 4993 }, { "epoch": 0.08680839228910636, "grad_norm": 2.0261479743342816, "learning_rate": 9.91566459611868e-07, "loss": 0.5702, "step": 4994 }, { "epoch": 0.08682577482660919, "grad_norm": 2.8273848511604247, "learning_rate": 9.915613105305418e-07, "loss": 0.3449, "step": 4995 }, { "epoch": 0.08684315736411201, "grad_norm": 2.178667245304541, "learning_rate": 9.915561598911922e-07, "loss": 0.4712, "step": 4996 }, { "epoch": 0.08686053990161484, "grad_norm": 1.8911280792803067, "learning_rate": 9.91551007693835e-07, "loss": 0.3288, "step": 4997 }, { "epoch": 0.08687792243911767, "grad_norm": 1.40691099820487, "learning_rate": 9.91545853938487e-07, "loss": 0.43, "step": 4998 }, { "epoch": 0.08689530497662049, "grad_norm": 2.218115882406358, "learning_rate": 9.915406986251647e-07, "loss": 0.397, "step": 4999 }, { "epoch": 0.08691268751412332, "grad_norm": 1.688931732673742, "learning_rate": 9.915355417538841e-07, "loss": 0.4611, "step": 5000 }, { "epoch": 0.08693007005162613, "grad_norm": 1.7915353784215475, "learning_rate": 9.915303833246615e-07, "loss": 0.3483, "step": 5001 }, { "epoch": 0.08694745258912896, "grad_norm": 2.146664200167725, "learning_rate": 9.915252233375135e-07, "loss": 0.3786, "step": 5002 }, { "epoch": 0.08696483512663179, "grad_norm": 1.4582480673036846, "learning_rate": 9.915200617924563e-07, "loss": 0.2387, "step": 5003 }, { "epoch": 0.08698221766413461, "grad_norm": 1.5085267877136368, "learning_rate": 9.915148986895064e-07, "loss": 0.5449, "step": 5004 }, { "epoch": 0.08699960020163744, "grad_norm": 4.239517068959342, "learning_rate": 9.915097340286798e-07, "loss": 0.7939, "step": 5005 }, { "epoch": 0.08701698273914026, "grad_norm": 4.402645131911923, "learning_rate": 9.915045678099932e-07, "loss": 0.3158, "step": 5006 }, { "epoch": 0.08703436527664309, "grad_norm": 4.068135368706762, "learning_rate": 9.91499400033463e-07, "loss": 0.3613, "step": 5007 }, { "epoch": 0.0870517478141459, "grad_norm": 3.580217148866641, "learning_rate": 9.914942306991054e-07, "loss": 0.4154, "step": 5008 }, { "epoch": 0.08706913035164873, "grad_norm": 1.8590411628342745, "learning_rate": 9.91489059806937e-07, "loss": 0.3069, "step": 5009 }, { "epoch": 0.08708651288915156, "grad_norm": 1.5926692385879544, "learning_rate": 9.914838873569738e-07, "loss": 0.4366, "step": 5010 }, { "epoch": 0.08710389542665438, "grad_norm": 1.2665826377127094, "learning_rate": 9.914787133492327e-07, "loss": 0.2643, "step": 5011 }, { "epoch": 0.08712127796415721, "grad_norm": 2.1235013472192295, "learning_rate": 9.914735377837297e-07, "loss": 0.46, "step": 5012 }, { "epoch": 0.08713866050166003, "grad_norm": 1.7855686675845606, "learning_rate": 9.914683606604813e-07, "loss": 0.5299, "step": 5013 }, { "epoch": 0.08715604303916286, "grad_norm": 2.274700343077748, "learning_rate": 9.914631819795041e-07, "loss": 0.4425, "step": 5014 }, { "epoch": 0.08717342557666569, "grad_norm": 1.2865593954006354, "learning_rate": 9.914580017408143e-07, "loss": 0.6159, "step": 5015 }, { "epoch": 0.0871908081141685, "grad_norm": 1.975597121251512, "learning_rate": 9.914528199444284e-07, "loss": 0.3399, "step": 5016 }, { "epoch": 0.08720819065167133, "grad_norm": 2.8075657438665718, "learning_rate": 9.914476365903629e-07, "loss": 0.4423, "step": 5017 }, { "epoch": 0.08722557318917415, "grad_norm": 1.3918811776942157, "learning_rate": 9.91442451678634e-07, "loss": 0.3516, "step": 5018 }, { "epoch": 0.08724295572667698, "grad_norm": 1.5168384228716085, "learning_rate": 9.914372652092583e-07, "loss": 0.7327, "step": 5019 }, { "epoch": 0.08726033826417981, "grad_norm": 2.4571966714357694, "learning_rate": 9.914320771822522e-07, "loss": 0.4644, "step": 5020 }, { "epoch": 0.08727772080168263, "grad_norm": 2.1414704601202392, "learning_rate": 9.914268875976323e-07, "loss": 0.6024, "step": 5021 }, { "epoch": 0.08729510333918546, "grad_norm": 2.1971308722160092, "learning_rate": 9.914216964554148e-07, "loss": 0.4157, "step": 5022 }, { "epoch": 0.08731248587668827, "grad_norm": 2.0330463252109063, "learning_rate": 9.914165037556163e-07, "loss": 0.5979, "step": 5023 }, { "epoch": 0.0873298684141911, "grad_norm": 1.6141217243794939, "learning_rate": 9.914113094982531e-07, "loss": 0.2533, "step": 5024 }, { "epoch": 0.08734725095169393, "grad_norm": 1.7401584324085406, "learning_rate": 9.914061136833421e-07, "loss": 0.3268, "step": 5025 }, { "epoch": 0.08736463348919675, "grad_norm": 3.203116652114471, "learning_rate": 9.91400916310899e-07, "loss": 0.6499, "step": 5026 }, { "epoch": 0.08738201602669958, "grad_norm": 3.5563800925454605, "learning_rate": 9.913957173809412e-07, "loss": 0.5255, "step": 5027 }, { "epoch": 0.0873993985642024, "grad_norm": 2.4951302855184383, "learning_rate": 9.913905168934845e-07, "loss": 0.364, "step": 5028 }, { "epoch": 0.08741678110170523, "grad_norm": 3.2113123923870583, "learning_rate": 9.913853148485456e-07, "loss": 0.4553, "step": 5029 }, { "epoch": 0.08743416363920806, "grad_norm": 1.4558083460361428, "learning_rate": 9.91380111246141e-07, "loss": 0.6998, "step": 5030 }, { "epoch": 0.08745154617671087, "grad_norm": 1.2005612644220516, "learning_rate": 9.91374906086287e-07, "loss": 0.3843, "step": 5031 }, { "epoch": 0.0874689287142137, "grad_norm": 1.242109580697616, "learning_rate": 9.913696993690004e-07, "loss": 0.5042, "step": 5032 }, { "epoch": 0.08748631125171652, "grad_norm": 2.7135074368602266, "learning_rate": 9.913644910942976e-07, "loss": 0.336, "step": 5033 }, { "epoch": 0.08750369378921935, "grad_norm": 2.641618514185017, "learning_rate": 9.91359281262195e-07, "loss": 0.4371, "step": 5034 }, { "epoch": 0.08752107632672218, "grad_norm": 1.9502403228835032, "learning_rate": 9.913540698727091e-07, "loss": 0.499, "step": 5035 }, { "epoch": 0.087538458864225, "grad_norm": 2.020080596031461, "learning_rate": 9.913488569258565e-07, "loss": 0.3917, "step": 5036 }, { "epoch": 0.08755584140172783, "grad_norm": 2.713544438675359, "learning_rate": 9.913436424216541e-07, "loss": 0.4661, "step": 5037 }, { "epoch": 0.08757322393923064, "grad_norm": 2.4236609644448115, "learning_rate": 9.913384263601177e-07, "loss": 0.4623, "step": 5038 }, { "epoch": 0.08759060647673347, "grad_norm": 1.5831274045031685, "learning_rate": 9.913332087412643e-07, "loss": 0.5176, "step": 5039 }, { "epoch": 0.0876079890142363, "grad_norm": 1.5666583061713821, "learning_rate": 9.913279895651102e-07, "loss": 0.3485, "step": 5040 }, { "epoch": 0.08762537155173912, "grad_norm": 2.3638652515356937, "learning_rate": 9.913227688316724e-07, "loss": 0.3291, "step": 5041 }, { "epoch": 0.08764275408924195, "grad_norm": 1.603983774747049, "learning_rate": 9.91317546540967e-07, "loss": 0.3424, "step": 5042 }, { "epoch": 0.08766013662674477, "grad_norm": 1.61603358768026, "learning_rate": 9.913123226930105e-07, "loss": 0.2941, "step": 5043 }, { "epoch": 0.0876775191642476, "grad_norm": 1.330275692394784, "learning_rate": 9.913070972878196e-07, "loss": 0.6917, "step": 5044 }, { "epoch": 0.08769490170175043, "grad_norm": 1.719964445643218, "learning_rate": 9.913018703254112e-07, "loss": 0.6054, "step": 5045 }, { "epoch": 0.08771228423925324, "grad_norm": 4.86663816802364, "learning_rate": 9.912966418058012e-07, "loss": 0.5398, "step": 5046 }, { "epoch": 0.08772966677675607, "grad_norm": 1.3888095263352347, "learning_rate": 9.912914117290068e-07, "loss": 0.484, "step": 5047 }, { "epoch": 0.08774704931425889, "grad_norm": 2.4629713103725126, "learning_rate": 9.912861800950443e-07, "loss": 0.2706, "step": 5048 }, { "epoch": 0.08776443185176172, "grad_norm": 2.6213399673743383, "learning_rate": 9.912809469039301e-07, "loss": 0.3335, "step": 5049 }, { "epoch": 0.08778181438926455, "grad_norm": 1.9369370878769891, "learning_rate": 9.91275712155681e-07, "loss": 0.4267, "step": 5050 }, { "epoch": 0.08779919692676737, "grad_norm": 1.6640697695411557, "learning_rate": 9.912704758503136e-07, "loss": 0.4867, "step": 5051 }, { "epoch": 0.0878165794642702, "grad_norm": 1.9305481134451126, "learning_rate": 9.912652379878445e-07, "loss": 0.5263, "step": 5052 }, { "epoch": 0.08783396200177301, "grad_norm": 1.258860174396157, "learning_rate": 9.912599985682904e-07, "loss": 0.3989, "step": 5053 }, { "epoch": 0.08785134453927584, "grad_norm": 1.6510663437559485, "learning_rate": 9.912547575916676e-07, "loss": 0.4415, "step": 5054 }, { "epoch": 0.08786872707677867, "grad_norm": 1.862436435064158, "learning_rate": 9.912495150579928e-07, "loss": 0.4449, "step": 5055 }, { "epoch": 0.08788610961428149, "grad_norm": 1.4974090617088938, "learning_rate": 9.912442709672828e-07, "loss": 0.2479, "step": 5056 }, { "epoch": 0.08790349215178432, "grad_norm": 2.5199560455743986, "learning_rate": 9.912390253195541e-07, "loss": 0.7358, "step": 5057 }, { "epoch": 0.08792087468928714, "grad_norm": 1.6922177495067652, "learning_rate": 9.912337781148234e-07, "loss": 0.2752, "step": 5058 }, { "epoch": 0.08793825722678997, "grad_norm": 1.4896377803224576, "learning_rate": 9.912285293531073e-07, "loss": 0.5198, "step": 5059 }, { "epoch": 0.0879556397642928, "grad_norm": 1.9921379151265532, "learning_rate": 9.912232790344223e-07, "loss": 0.5521, "step": 5060 }, { "epoch": 0.08797302230179561, "grad_norm": 1.7081916833065118, "learning_rate": 9.912180271587854e-07, "loss": 0.3482, "step": 5061 }, { "epoch": 0.08799040483929844, "grad_norm": 2.2540528591948648, "learning_rate": 9.912127737262128e-07, "loss": 0.4295, "step": 5062 }, { "epoch": 0.08800778737680126, "grad_norm": 1.8000247157148004, "learning_rate": 9.912075187367212e-07, "loss": 0.2089, "step": 5063 }, { "epoch": 0.08802516991430409, "grad_norm": 1.5013749772553584, "learning_rate": 9.912022621903278e-07, "loss": 0.3187, "step": 5064 }, { "epoch": 0.08804255245180692, "grad_norm": 4.2583754816463335, "learning_rate": 9.911970040870486e-07, "loss": 0.3634, "step": 5065 }, { "epoch": 0.08805993498930974, "grad_norm": 2.0664385981816205, "learning_rate": 9.911917444269006e-07, "loss": 0.3945, "step": 5066 }, { "epoch": 0.08807731752681257, "grad_norm": 2.52202916679557, "learning_rate": 9.911864832099004e-07, "loss": 0.4287, "step": 5067 }, { "epoch": 0.08809470006431538, "grad_norm": 2.125273344948177, "learning_rate": 9.911812204360646e-07, "loss": 0.3358, "step": 5068 }, { "epoch": 0.08811208260181821, "grad_norm": 2.629449487561382, "learning_rate": 9.9117595610541e-07, "loss": 0.4723, "step": 5069 }, { "epoch": 0.08812946513932104, "grad_norm": 2.1979364388281537, "learning_rate": 9.911706902179533e-07, "loss": 0.421, "step": 5070 }, { "epoch": 0.08814684767682386, "grad_norm": 1.8526419488054728, "learning_rate": 9.91165422773711e-07, "loss": 0.3851, "step": 5071 }, { "epoch": 0.08816423021432669, "grad_norm": 1.7239414100579586, "learning_rate": 9.911601537727002e-07, "loss": 0.6062, "step": 5072 }, { "epoch": 0.08818161275182951, "grad_norm": 2.3187271001357526, "learning_rate": 9.911548832149372e-07, "loss": 0.5594, "step": 5073 }, { "epoch": 0.08819899528933234, "grad_norm": 1.7574418994333856, "learning_rate": 9.911496111004388e-07, "loss": 0.3324, "step": 5074 }, { "epoch": 0.08821637782683515, "grad_norm": 1.860155784117579, "learning_rate": 9.911443374292218e-07, "loss": 0.3077, "step": 5075 }, { "epoch": 0.08823376036433798, "grad_norm": 2.377412469583878, "learning_rate": 9.911390622013028e-07, "loss": 0.5457, "step": 5076 }, { "epoch": 0.08825114290184082, "grad_norm": 1.3887752173199106, "learning_rate": 9.911337854166985e-07, "loss": 0.3546, "step": 5077 }, { "epoch": 0.08826852543934363, "grad_norm": 1.8672100862828445, "learning_rate": 9.911285070754258e-07, "loss": 0.5021, "step": 5078 }, { "epoch": 0.08828590797684646, "grad_norm": 1.0095309823272556, "learning_rate": 9.911232271775015e-07, "loss": 0.3229, "step": 5079 }, { "epoch": 0.08830329051434928, "grad_norm": 2.1389058639809013, "learning_rate": 9.91117945722942e-07, "loss": 0.5185, "step": 5080 }, { "epoch": 0.08832067305185211, "grad_norm": 1.6254665780465416, "learning_rate": 9.911126627117642e-07, "loss": 0.4888, "step": 5081 }, { "epoch": 0.08833805558935494, "grad_norm": 1.8560403762709121, "learning_rate": 9.91107378143985e-07, "loss": 0.1794, "step": 5082 }, { "epoch": 0.08835543812685775, "grad_norm": 1.7457276460548579, "learning_rate": 9.91102092019621e-07, "loss": 0.3433, "step": 5083 }, { "epoch": 0.08837282066436059, "grad_norm": 2.2300170153977272, "learning_rate": 9.910968043386889e-07, "loss": 0.7095, "step": 5084 }, { "epoch": 0.0883902032018634, "grad_norm": 1.031491079523481, "learning_rate": 9.910915151012054e-07, "loss": 0.4222, "step": 5085 }, { "epoch": 0.08840758573936623, "grad_norm": 1.5235308043932243, "learning_rate": 9.910862243071876e-07, "loss": 0.3127, "step": 5086 }, { "epoch": 0.08842496827686906, "grad_norm": 1.7930251055602813, "learning_rate": 9.910809319566518e-07, "loss": 0.5122, "step": 5087 }, { "epoch": 0.08844235081437188, "grad_norm": 1.9868002878845084, "learning_rate": 9.910756380496152e-07, "loss": 0.3635, "step": 5088 }, { "epoch": 0.08845973335187471, "grad_norm": 1.4453630613099744, "learning_rate": 9.910703425860944e-07, "loss": 0.3358, "step": 5089 }, { "epoch": 0.08847711588937753, "grad_norm": 2.178564697922933, "learning_rate": 9.910650455661063e-07, "loss": 0.4535, "step": 5090 }, { "epoch": 0.08849449842688036, "grad_norm": 1.7375860301279415, "learning_rate": 9.910597469896674e-07, "loss": 0.5662, "step": 5091 }, { "epoch": 0.08851188096438319, "grad_norm": 1.3611264340283975, "learning_rate": 9.910544468567946e-07, "loss": 0.4348, "step": 5092 }, { "epoch": 0.088529263501886, "grad_norm": 2.1998709606826488, "learning_rate": 9.91049145167505e-07, "loss": 0.6161, "step": 5093 }, { "epoch": 0.08854664603938883, "grad_norm": 1.79660621624399, "learning_rate": 9.910438419218152e-07, "loss": 0.5311, "step": 5094 }, { "epoch": 0.08856402857689165, "grad_norm": 1.941466876676492, "learning_rate": 9.91038537119742e-07, "loss": 0.4324, "step": 5095 }, { "epoch": 0.08858141111439448, "grad_norm": 1.810955010176757, "learning_rate": 9.91033230761302e-07, "loss": 0.4062, "step": 5096 }, { "epoch": 0.08859879365189731, "grad_norm": 1.8656682702408398, "learning_rate": 9.910279228465122e-07, "loss": 0.2348, "step": 5097 }, { "epoch": 0.08861617618940013, "grad_norm": 2.2410543995370977, "learning_rate": 9.910226133753894e-07, "loss": 0.6788, "step": 5098 }, { "epoch": 0.08863355872690296, "grad_norm": 1.8594542121308089, "learning_rate": 9.910173023479508e-07, "loss": 0.6415, "step": 5099 }, { "epoch": 0.08865094126440577, "grad_norm": 2.829784667310242, "learning_rate": 9.910119897642127e-07, "loss": 0.336, "step": 5100 }, { "epoch": 0.0886683238019086, "grad_norm": 3.2573125439241633, "learning_rate": 9.910066756241922e-07, "loss": 0.2536, "step": 5101 }, { "epoch": 0.08868570633941143, "grad_norm": 2.5724066203414035, "learning_rate": 9.910013599279058e-07, "loss": 0.6619, "step": 5102 }, { "epoch": 0.08870308887691425, "grad_norm": 4.85951266147709, "learning_rate": 9.90996042675371e-07, "loss": 1.2165, "step": 5103 }, { "epoch": 0.08872047141441708, "grad_norm": 2.995409156969291, "learning_rate": 9.90990723866604e-07, "loss": 0.7754, "step": 5104 }, { "epoch": 0.0887378539519199, "grad_norm": 2.12907605712242, "learning_rate": 9.909854035016222e-07, "loss": 0.3066, "step": 5105 }, { "epoch": 0.08875523648942273, "grad_norm": 7.063133083057439, "learning_rate": 9.909800815804424e-07, "loss": 0.5382, "step": 5106 }, { "epoch": 0.08877261902692556, "grad_norm": 2.6815028497740605, "learning_rate": 9.90974758103081e-07, "loss": 0.5299, "step": 5107 }, { "epoch": 0.08879000156442837, "grad_norm": 1.69613614211522, "learning_rate": 9.90969433069555e-07, "loss": 0.2641, "step": 5108 }, { "epoch": 0.0888073841019312, "grad_norm": 1.2848228511862347, "learning_rate": 9.909641064798818e-07, "loss": 0.5287, "step": 5109 }, { "epoch": 0.08882476663943402, "grad_norm": 2.208527566839351, "learning_rate": 9.909587783340775e-07, "loss": 0.546, "step": 5110 }, { "epoch": 0.08884214917693685, "grad_norm": 2.5948863255892167, "learning_rate": 9.909534486321598e-07, "loss": 0.5896, "step": 5111 }, { "epoch": 0.08885953171443968, "grad_norm": 1.6310370856499128, "learning_rate": 9.90948117374145e-07, "loss": 0.4524, "step": 5112 }, { "epoch": 0.0888769142519425, "grad_norm": 2.9970144895233983, "learning_rate": 9.909427845600502e-07, "loss": 1.046, "step": 5113 }, { "epoch": 0.08889429678944533, "grad_norm": 2.021379344956392, "learning_rate": 9.909374501898923e-07, "loss": 0.2903, "step": 5114 }, { "epoch": 0.08891167932694814, "grad_norm": 2.383842667451576, "learning_rate": 9.90932114263688e-07, "loss": 0.3298, "step": 5115 }, { "epoch": 0.08892906186445097, "grad_norm": 9.196009921008743, "learning_rate": 9.909267767814546e-07, "loss": 0.652, "step": 5116 }, { "epoch": 0.0889464444019538, "grad_norm": 1.5123176369406504, "learning_rate": 9.90921437743209e-07, "loss": 0.4905, "step": 5117 }, { "epoch": 0.08896382693945662, "grad_norm": 2.1332156700418166, "learning_rate": 9.909160971489679e-07, "loss": 0.3825, "step": 5118 }, { "epoch": 0.08898120947695945, "grad_norm": 6.586658990435648, "learning_rate": 9.909107549987481e-07, "loss": 0.5061, "step": 5119 }, { "epoch": 0.08899859201446227, "grad_norm": 1.7343687164344301, "learning_rate": 9.909054112925668e-07, "loss": 0.3619, "step": 5120 }, { "epoch": 0.0890159745519651, "grad_norm": 1.4763956604592818, "learning_rate": 9.909000660304408e-07, "loss": 0.5542, "step": 5121 }, { "epoch": 0.08903335708946793, "grad_norm": 2.5289076334266793, "learning_rate": 9.90894719212387e-07, "loss": 0.536, "step": 5122 }, { "epoch": 0.08905073962697074, "grad_norm": 1.3617000839406455, "learning_rate": 9.908893708384227e-07, "loss": 0.4108, "step": 5123 }, { "epoch": 0.08906812216447357, "grad_norm": 1.9536144265977737, "learning_rate": 9.908840209085646e-07, "loss": 0.5339, "step": 5124 }, { "epoch": 0.08908550470197639, "grad_norm": 2.1824221297523083, "learning_rate": 9.908786694228295e-07, "loss": 0.5767, "step": 5125 }, { "epoch": 0.08910288723947922, "grad_norm": 2.123615216672523, "learning_rate": 9.908733163812344e-07, "loss": 0.5479, "step": 5126 }, { "epoch": 0.08912026977698205, "grad_norm": 1.5512026306234166, "learning_rate": 9.908679617837965e-07, "loss": 0.449, "step": 5127 }, { "epoch": 0.08913765231448487, "grad_norm": 1.4385641638255724, "learning_rate": 9.908626056305325e-07, "loss": 0.4954, "step": 5128 }, { "epoch": 0.0891550348519877, "grad_norm": 1.882343640872808, "learning_rate": 9.908572479214598e-07, "loss": 0.3638, "step": 5129 }, { "epoch": 0.08917241738949051, "grad_norm": 2.1977139580288365, "learning_rate": 9.908518886565948e-07, "loss": 0.2791, "step": 5130 }, { "epoch": 0.08918979992699334, "grad_norm": 3.0511279359335024, "learning_rate": 9.90846527835955e-07, "loss": 0.7014, "step": 5131 }, { "epoch": 0.08920718246449617, "grad_norm": 1.627485555571851, "learning_rate": 9.908411654595571e-07, "loss": 0.6685, "step": 5132 }, { "epoch": 0.08922456500199899, "grad_norm": 1.8730293988865099, "learning_rate": 9.908358015274183e-07, "loss": 0.6582, "step": 5133 }, { "epoch": 0.08924194753950182, "grad_norm": 1.384351770410153, "learning_rate": 9.908304360395554e-07, "loss": 0.246, "step": 5134 }, { "epoch": 0.08925933007700464, "grad_norm": 1.5238270414527508, "learning_rate": 9.908250689959856e-07, "loss": 0.5222, "step": 5135 }, { "epoch": 0.08927671261450747, "grad_norm": 1.8654462982289595, "learning_rate": 9.908197003967257e-07, "loss": 0.3181, "step": 5136 }, { "epoch": 0.0892940951520103, "grad_norm": 2.2577137078593723, "learning_rate": 9.908143302417928e-07, "loss": 0.5634, "step": 5137 }, { "epoch": 0.08931147768951311, "grad_norm": 2.010903945248389, "learning_rate": 9.908089585312038e-07, "loss": 0.4215, "step": 5138 }, { "epoch": 0.08932886022701594, "grad_norm": 2.9258431103037488, "learning_rate": 9.908035852649761e-07, "loss": 0.5593, "step": 5139 }, { "epoch": 0.08934624276451876, "grad_norm": 3.654367472958918, "learning_rate": 9.907982104431263e-07, "loss": 0.6316, "step": 5140 }, { "epoch": 0.08936362530202159, "grad_norm": 1.7414307700148521, "learning_rate": 9.907928340656717e-07, "loss": 0.3142, "step": 5141 }, { "epoch": 0.08938100783952442, "grad_norm": 2.391376789908844, "learning_rate": 9.907874561326294e-07, "loss": 0.7056, "step": 5142 }, { "epoch": 0.08939839037702724, "grad_norm": 2.5589568963167295, "learning_rate": 9.907820766440162e-07, "loss": 0.6106, "step": 5143 }, { "epoch": 0.08941577291453007, "grad_norm": 1.7821698140225504, "learning_rate": 9.907766955998492e-07, "loss": 0.6131, "step": 5144 }, { "epoch": 0.08943315545203288, "grad_norm": 2.273047639095078, "learning_rate": 9.907713130001456e-07, "loss": 0.4533, "step": 5145 }, { "epoch": 0.08945053798953571, "grad_norm": 1.8047856967530713, "learning_rate": 9.907659288449223e-07, "loss": 0.4759, "step": 5146 }, { "epoch": 0.08946792052703853, "grad_norm": 2.6907306212849518, "learning_rate": 9.907605431341965e-07, "loss": 0.6449, "step": 5147 }, { "epoch": 0.08948530306454136, "grad_norm": 1.8562951433911778, "learning_rate": 9.907551558679853e-07, "loss": 0.6797, "step": 5148 }, { "epoch": 0.08950268560204419, "grad_norm": 2.066013706023078, "learning_rate": 9.907497670463057e-07, "loss": 0.3844, "step": 5149 }, { "epoch": 0.08952006813954701, "grad_norm": 3.645088777609185, "learning_rate": 9.907443766691746e-07, "loss": 0.6742, "step": 5150 }, { "epoch": 0.08953745067704984, "grad_norm": 1.5682939965352214, "learning_rate": 9.907389847366093e-07, "loss": 0.3991, "step": 5151 }, { "epoch": 0.08955483321455265, "grad_norm": 1.78079819188002, "learning_rate": 9.90733591248627e-07, "loss": 0.1589, "step": 5152 }, { "epoch": 0.08957221575205548, "grad_norm": 2.8462439360224283, "learning_rate": 9.907281962052444e-07, "loss": 0.9042, "step": 5153 }, { "epoch": 0.08958959828955831, "grad_norm": 1.841438133381618, "learning_rate": 9.907227996064789e-07, "loss": 0.5245, "step": 5154 }, { "epoch": 0.08960698082706113, "grad_norm": 1.7501148074681974, "learning_rate": 9.907174014523476e-07, "loss": 0.6036, "step": 5155 }, { "epoch": 0.08962436336456396, "grad_norm": 1.882605861605769, "learning_rate": 9.907120017428677e-07, "loss": 0.3539, "step": 5156 }, { "epoch": 0.08964174590206678, "grad_norm": 1.879351410178866, "learning_rate": 9.90706600478056e-07, "loss": 0.3715, "step": 5157 }, { "epoch": 0.08965912843956961, "grad_norm": 3.100650517398025, "learning_rate": 9.907011976579298e-07, "loss": 0.6013, "step": 5158 }, { "epoch": 0.08967651097707244, "grad_norm": 1.971943516774915, "learning_rate": 9.906957932825062e-07, "loss": 0.3745, "step": 5159 }, { "epoch": 0.08969389351457525, "grad_norm": 1.3928682797061176, "learning_rate": 9.906903873518025e-07, "loss": 0.6091, "step": 5160 }, { "epoch": 0.08971127605207808, "grad_norm": 1.558815924673855, "learning_rate": 9.906849798658355e-07, "loss": 0.605, "step": 5161 }, { "epoch": 0.0897286585895809, "grad_norm": 1.3694901998082807, "learning_rate": 9.906795708246225e-07, "loss": 0.3715, "step": 5162 }, { "epoch": 0.08974604112708373, "grad_norm": 1.6048135146199125, "learning_rate": 9.906741602281807e-07, "loss": 0.375, "step": 5163 }, { "epoch": 0.08976342366458656, "grad_norm": 1.6466300014313142, "learning_rate": 9.906687480765271e-07, "loss": 0.3369, "step": 5164 }, { "epoch": 0.08978080620208938, "grad_norm": 2.5733578035120264, "learning_rate": 9.906633343696794e-07, "loss": 0.3352, "step": 5165 }, { "epoch": 0.08979818873959221, "grad_norm": 2.060807629960104, "learning_rate": 9.906579191076539e-07, "loss": 0.4601, "step": 5166 }, { "epoch": 0.08981557127709502, "grad_norm": 2.44596539257197, "learning_rate": 9.906525022904683e-07, "loss": 0.8422, "step": 5167 }, { "epoch": 0.08983295381459785, "grad_norm": 1.8115213145217204, "learning_rate": 9.906470839181398e-07, "loss": 0.3412, "step": 5168 }, { "epoch": 0.08985033635210068, "grad_norm": 1.9372551025202733, "learning_rate": 9.906416639906852e-07, "loss": 0.3861, "step": 5169 }, { "epoch": 0.0898677188896035, "grad_norm": 3.177563822275939, "learning_rate": 9.90636242508122e-07, "loss": 1.0063, "step": 5170 }, { "epoch": 0.08988510142710633, "grad_norm": 1.525058989009344, "learning_rate": 9.906308194704673e-07, "loss": 0.3637, "step": 5171 }, { "epoch": 0.08990248396460915, "grad_norm": 1.373558604395825, "learning_rate": 9.906253948777382e-07, "loss": 0.2871, "step": 5172 }, { "epoch": 0.08991986650211198, "grad_norm": 2.1286953567443723, "learning_rate": 9.906199687299521e-07, "loss": 0.4016, "step": 5173 }, { "epoch": 0.08993724903961481, "grad_norm": 1.2772019261213192, "learning_rate": 9.90614541027126e-07, "loss": 0.4702, "step": 5174 }, { "epoch": 0.08995463157711762, "grad_norm": 1.9367324565234847, "learning_rate": 9.906091117692772e-07, "loss": 0.311, "step": 5175 }, { "epoch": 0.08997201411462045, "grad_norm": 1.9511534664245127, "learning_rate": 9.90603680956423e-07, "loss": 0.4703, "step": 5176 }, { "epoch": 0.08998939665212327, "grad_norm": 4.9514788925209885, "learning_rate": 9.905982485885802e-07, "loss": 0.3469, "step": 5177 }, { "epoch": 0.0900067791896261, "grad_norm": 2.9481655029993123, "learning_rate": 9.905928146657665e-07, "loss": 0.497, "step": 5178 }, { "epoch": 0.09002416172712893, "grad_norm": 1.9086493894798082, "learning_rate": 9.90587379187999e-07, "loss": 0.2929, "step": 5179 }, { "epoch": 0.09004154426463175, "grad_norm": 2.1533897525640127, "learning_rate": 9.905819421552948e-07, "loss": 0.7224, "step": 5180 }, { "epoch": 0.09005892680213458, "grad_norm": 2.4725908354495516, "learning_rate": 9.905765035676712e-07, "loss": 0.4641, "step": 5181 }, { "epoch": 0.0900763093396374, "grad_norm": 2.0378320301055566, "learning_rate": 9.905710634251454e-07, "loss": 0.5693, "step": 5182 }, { "epoch": 0.09009369187714023, "grad_norm": 1.7569355159620594, "learning_rate": 9.905656217277347e-07, "loss": 0.4581, "step": 5183 }, { "epoch": 0.09011107441464306, "grad_norm": 1.8954219179947958, "learning_rate": 9.905601784754562e-07, "loss": 0.3701, "step": 5184 }, { "epoch": 0.09012845695214587, "grad_norm": 1.4744119385963488, "learning_rate": 9.905547336683275e-07, "loss": 0.5929, "step": 5185 }, { "epoch": 0.0901458394896487, "grad_norm": 3.431860489024814, "learning_rate": 9.905492873063655e-07, "loss": 0.4028, "step": 5186 }, { "epoch": 0.09016322202715152, "grad_norm": 2.418423246967491, "learning_rate": 9.905438393895877e-07, "loss": 0.8123, "step": 5187 }, { "epoch": 0.09018060456465435, "grad_norm": 1.5797860459871291, "learning_rate": 9.90538389918011e-07, "loss": 0.4869, "step": 5188 }, { "epoch": 0.09019798710215718, "grad_norm": 1.9401812281318733, "learning_rate": 9.905329388916533e-07, "loss": 0.6249, "step": 5189 }, { "epoch": 0.09021536963966, "grad_norm": 1.3713272272323915, "learning_rate": 9.905274863105314e-07, "loss": 0.5322, "step": 5190 }, { "epoch": 0.09023275217716283, "grad_norm": 1.42901325605188, "learning_rate": 9.905220321746626e-07, "loss": 0.4428, "step": 5191 }, { "epoch": 0.09025013471466564, "grad_norm": 1.429056455945662, "learning_rate": 9.905165764840644e-07, "loss": 0.4334, "step": 5192 }, { "epoch": 0.09026751725216847, "grad_norm": 1.8148114320821107, "learning_rate": 9.90511119238754e-07, "loss": 0.3256, "step": 5193 }, { "epoch": 0.0902848997896713, "grad_norm": 1.8027231121840388, "learning_rate": 9.905056604387485e-07, "loss": 0.5401, "step": 5194 }, { "epoch": 0.09030228232717412, "grad_norm": 1.3632124746628413, "learning_rate": 9.905002000840654e-07, "loss": 0.4072, "step": 5195 }, { "epoch": 0.09031966486467695, "grad_norm": 1.4883310576698956, "learning_rate": 9.904947381747222e-07, "loss": 0.4181, "step": 5196 }, { "epoch": 0.09033704740217977, "grad_norm": 2.0380697979534634, "learning_rate": 9.904892747107357e-07, "loss": 0.52, "step": 5197 }, { "epoch": 0.0903544299396826, "grad_norm": 1.665311431258911, "learning_rate": 9.904838096921237e-07, "loss": 0.3078, "step": 5198 }, { "epoch": 0.09037181247718543, "grad_norm": 1.6471560742046203, "learning_rate": 9.904783431189035e-07, "loss": 0.2169, "step": 5199 }, { "epoch": 0.09038919501468824, "grad_norm": 2.95393335182262, "learning_rate": 9.90472874991092e-07, "loss": 0.3637, "step": 5200 }, { "epoch": 0.09040657755219107, "grad_norm": 1.6864407216046233, "learning_rate": 9.904674053087068e-07, "loss": 0.4539, "step": 5201 }, { "epoch": 0.09042396008969389, "grad_norm": 3.033800843563697, "learning_rate": 9.904619340717654e-07, "loss": 0.7261, "step": 5202 }, { "epoch": 0.09044134262719672, "grad_norm": 2.071816836914375, "learning_rate": 9.904564612802847e-07, "loss": 0.4004, "step": 5203 }, { "epoch": 0.09045872516469955, "grad_norm": 1.8888500667550583, "learning_rate": 9.904509869342826e-07, "loss": 0.5507, "step": 5204 }, { "epoch": 0.09047610770220237, "grad_norm": 1.4124046779427712, "learning_rate": 9.90445511033776e-07, "loss": 0.6532, "step": 5205 }, { "epoch": 0.0904934902397052, "grad_norm": 2.001739887426198, "learning_rate": 9.904400335787824e-07, "loss": 0.3386, "step": 5206 }, { "epoch": 0.09051087277720801, "grad_norm": 2.1301302069080803, "learning_rate": 9.90434554569319e-07, "loss": 0.3152, "step": 5207 }, { "epoch": 0.09052825531471084, "grad_norm": 1.9114452013148946, "learning_rate": 9.904290740054037e-07, "loss": 0.3907, "step": 5208 }, { "epoch": 0.09054563785221367, "grad_norm": 2.0462791213285847, "learning_rate": 9.904235918870533e-07, "loss": 0.4528, "step": 5209 }, { "epoch": 0.09056302038971649, "grad_norm": 2.3683669988070832, "learning_rate": 9.904181082142857e-07, "loss": 0.3181, "step": 5210 }, { "epoch": 0.09058040292721932, "grad_norm": 1.7478292133649198, "learning_rate": 9.904126229871177e-07, "loss": 0.4108, "step": 5211 }, { "epoch": 0.09059778546472214, "grad_norm": 2.4994829441658712, "learning_rate": 9.90407136205567e-07, "loss": 0.2674, "step": 5212 }, { "epoch": 0.09061516800222497, "grad_norm": 2.4179208990537853, "learning_rate": 9.90401647869651e-07, "loss": 0.4758, "step": 5213 }, { "epoch": 0.0906325505397278, "grad_norm": 2.165331424873805, "learning_rate": 9.903961579793871e-07, "loss": 0.6302, "step": 5214 }, { "epoch": 0.09064993307723061, "grad_norm": 2.492816274214185, "learning_rate": 9.903906665347926e-07, "loss": 0.2316, "step": 5215 }, { "epoch": 0.09066731561473344, "grad_norm": 1.7896550414754329, "learning_rate": 9.90385173535885e-07, "loss": 0.4208, "step": 5216 }, { "epoch": 0.09068469815223626, "grad_norm": 5.734089910235738, "learning_rate": 9.903796789826815e-07, "loss": 0.3525, "step": 5217 }, { "epoch": 0.09070208068973909, "grad_norm": 1.6212397290037646, "learning_rate": 9.903741828751998e-07, "loss": 0.5245, "step": 5218 }, { "epoch": 0.0907194632272419, "grad_norm": 2.1181051636050072, "learning_rate": 9.903686852134573e-07, "loss": 0.5882, "step": 5219 }, { "epoch": 0.09073684576474474, "grad_norm": 4.426675833660331, "learning_rate": 9.903631859974711e-07, "loss": 0.7018, "step": 5220 }, { "epoch": 0.09075422830224757, "grad_norm": 2.460945011990756, "learning_rate": 9.90357685227259e-07, "loss": 0.4886, "step": 5221 }, { "epoch": 0.09077161083975038, "grad_norm": 4.317833410128534, "learning_rate": 9.90352182902838e-07, "loss": 0.498, "step": 5222 }, { "epoch": 0.09078899337725321, "grad_norm": 2.316343317344903, "learning_rate": 9.903466790242263e-07, "loss": 0.6774, "step": 5223 }, { "epoch": 0.09080637591475603, "grad_norm": 2.1982597634177465, "learning_rate": 9.903411735914406e-07, "loss": 0.4293, "step": 5224 }, { "epoch": 0.09082375845225886, "grad_norm": 2.1298657344706586, "learning_rate": 9.903356666044987e-07, "loss": 0.5496, "step": 5225 }, { "epoch": 0.09084114098976169, "grad_norm": 2.90940381644963, "learning_rate": 9.90330158063418e-07, "loss": 0.5807, "step": 5226 }, { "epoch": 0.0908585235272645, "grad_norm": 2.898777692799742, "learning_rate": 9.903246479682158e-07, "loss": 0.5217, "step": 5227 }, { "epoch": 0.09087590606476734, "grad_norm": 1.5234126409030964, "learning_rate": 9.903191363189098e-07, "loss": 0.2784, "step": 5228 }, { "epoch": 0.09089328860227015, "grad_norm": 1.8152718928726042, "learning_rate": 9.903136231155173e-07, "loss": 0.3162, "step": 5229 }, { "epoch": 0.09091067113977298, "grad_norm": 1.8775524162013142, "learning_rate": 9.90308108358056e-07, "loss": 0.4285, "step": 5230 }, { "epoch": 0.09092805367727581, "grad_norm": 1.6772419325108427, "learning_rate": 9.903025920465431e-07, "loss": 0.4277, "step": 5231 }, { "epoch": 0.09094543621477863, "grad_norm": 1.7721076892900938, "learning_rate": 9.902970741809964e-07, "loss": 0.4489, "step": 5232 }, { "epoch": 0.09096281875228146, "grad_norm": 2.225636995256202, "learning_rate": 9.90291554761433e-07, "loss": 0.6346, "step": 5233 }, { "epoch": 0.09098020128978428, "grad_norm": 1.8332345127188245, "learning_rate": 9.902860337878706e-07, "loss": 0.4773, "step": 5234 }, { "epoch": 0.0909975838272871, "grad_norm": 2.077986787458845, "learning_rate": 9.902805112603267e-07, "loss": 0.4534, "step": 5235 }, { "epoch": 0.09101496636478994, "grad_norm": 2.1976491363735553, "learning_rate": 9.90274987178819e-07, "loss": 0.7481, "step": 5236 }, { "epoch": 0.09103234890229275, "grad_norm": 2.4364259313216143, "learning_rate": 9.902694615433646e-07, "loss": 0.2624, "step": 5237 }, { "epoch": 0.09104973143979558, "grad_norm": 2.7322159259751473, "learning_rate": 9.902639343539811e-07, "loss": 0.5821, "step": 5238 }, { "epoch": 0.0910671139772984, "grad_norm": 2.1647976153380184, "learning_rate": 9.902584056106865e-07, "loss": 0.1668, "step": 5239 }, { "epoch": 0.09108449651480123, "grad_norm": 1.8881096136540327, "learning_rate": 9.902528753134976e-07, "loss": 0.3269, "step": 5240 }, { "epoch": 0.09110187905230406, "grad_norm": 1.8358258336112723, "learning_rate": 9.902473434624325e-07, "loss": 0.6688, "step": 5241 }, { "epoch": 0.09111926158980688, "grad_norm": 2.147797113786156, "learning_rate": 9.902418100575086e-07, "loss": 0.7185, "step": 5242 }, { "epoch": 0.09113664412730971, "grad_norm": 3.7100482020081493, "learning_rate": 9.902362750987431e-07, "loss": 0.5183, "step": 5243 }, { "epoch": 0.09115402666481252, "grad_norm": 4.438571216265571, "learning_rate": 9.90230738586154e-07, "loss": 0.6261, "step": 5244 }, { "epoch": 0.09117140920231535, "grad_norm": 2.1175280206852523, "learning_rate": 9.902252005197585e-07, "loss": 0.3768, "step": 5245 }, { "epoch": 0.09118879173981818, "grad_norm": 1.5501109991044404, "learning_rate": 9.902196608995744e-07, "loss": 0.2674, "step": 5246 }, { "epoch": 0.091206174277321, "grad_norm": 2.5989635915829172, "learning_rate": 9.902141197256192e-07, "loss": 0.5059, "step": 5247 }, { "epoch": 0.09122355681482383, "grad_norm": 2.126906746342148, "learning_rate": 9.902085769979103e-07, "loss": 0.3909, "step": 5248 }, { "epoch": 0.09124093935232665, "grad_norm": 3.162455534959454, "learning_rate": 9.902030327164654e-07, "loss": 0.6088, "step": 5249 }, { "epoch": 0.09125832188982948, "grad_norm": 3.782627128500644, "learning_rate": 9.901974868813023e-07, "loss": 0.7031, "step": 5250 }, { "epoch": 0.09127570442733231, "grad_norm": 1.8296320466270868, "learning_rate": 9.90191939492438e-07, "loss": 0.3167, "step": 5251 }, { "epoch": 0.09129308696483512, "grad_norm": 1.8410222540046906, "learning_rate": 9.901863905498906e-07, "loss": 0.2664, "step": 5252 }, { "epoch": 0.09131046950233795, "grad_norm": 1.6484137040784868, "learning_rate": 9.901808400536776e-07, "loss": 0.6068, "step": 5253 }, { "epoch": 0.09132785203984077, "grad_norm": 1.8036580788232766, "learning_rate": 9.901752880038165e-07, "loss": 0.5311, "step": 5254 }, { "epoch": 0.0913452345773436, "grad_norm": 1.2428147257924922, "learning_rate": 9.901697344003249e-07, "loss": 0.556, "step": 5255 }, { "epoch": 0.09136261711484643, "grad_norm": 1.831251414688605, "learning_rate": 9.901641792432203e-07, "loss": 0.3524, "step": 5256 }, { "epoch": 0.09137999965234925, "grad_norm": 1.781774562807342, "learning_rate": 9.901586225325204e-07, "loss": 0.6525, "step": 5257 }, { "epoch": 0.09139738218985208, "grad_norm": 5.811066564465454, "learning_rate": 9.90153064268243e-07, "loss": 0.471, "step": 5258 }, { "epoch": 0.0914147647273549, "grad_norm": 1.8227738420981348, "learning_rate": 9.901475044504053e-07, "loss": 0.3686, "step": 5259 }, { "epoch": 0.09143214726485772, "grad_norm": 1.6677774454651337, "learning_rate": 9.901419430790255e-07, "loss": 0.49, "step": 5260 }, { "epoch": 0.09144952980236055, "grad_norm": 2.1536630114305915, "learning_rate": 9.901363801541205e-07, "loss": 0.4409, "step": 5261 }, { "epoch": 0.09146691233986337, "grad_norm": 3.794226082194157, "learning_rate": 9.901308156757086e-07, "loss": 0.394, "step": 5262 }, { "epoch": 0.0914842948773662, "grad_norm": 2.5462387288250037, "learning_rate": 9.901252496438071e-07, "loss": 0.4359, "step": 5263 }, { "epoch": 0.09150167741486902, "grad_norm": 3.980871737446437, "learning_rate": 9.901196820584337e-07, "loss": 0.4098, "step": 5264 }, { "epoch": 0.09151905995237185, "grad_norm": 1.9240409089381663, "learning_rate": 9.90114112919606e-07, "loss": 0.4097, "step": 5265 }, { "epoch": 0.09153644248987468, "grad_norm": 1.9868334442695161, "learning_rate": 9.901085422273417e-07, "loss": 0.4972, "step": 5266 }, { "epoch": 0.0915538250273775, "grad_norm": 2.031266162681421, "learning_rate": 9.901029699816586e-07, "loss": 0.4318, "step": 5267 }, { "epoch": 0.09157120756488032, "grad_norm": 2.3935751546139072, "learning_rate": 9.90097396182574e-07, "loss": 0.5525, "step": 5268 }, { "epoch": 0.09158859010238314, "grad_norm": 2.243904426078232, "learning_rate": 9.900918208301059e-07, "loss": 0.5161, "step": 5269 }, { "epoch": 0.09160597263988597, "grad_norm": 1.8005418253336818, "learning_rate": 9.900862439242718e-07, "loss": 0.3241, "step": 5270 }, { "epoch": 0.0916233551773888, "grad_norm": 1.7389103423062802, "learning_rate": 9.900806654650896e-07, "loss": 0.2007, "step": 5271 }, { "epoch": 0.09164073771489162, "grad_norm": 2.5032401371429533, "learning_rate": 9.900750854525767e-07, "loss": 0.4642, "step": 5272 }, { "epoch": 0.09165812025239445, "grad_norm": 1.3488303987052614, "learning_rate": 9.900695038867508e-07, "loss": 0.2606, "step": 5273 }, { "epoch": 0.09167550278989726, "grad_norm": 2.6722746013072363, "learning_rate": 9.9006392076763e-07, "loss": 0.4709, "step": 5274 }, { "epoch": 0.0916928853274001, "grad_norm": 2.621221963570389, "learning_rate": 9.900583360952314e-07, "loss": 0.5202, "step": 5275 }, { "epoch": 0.09171026786490293, "grad_norm": 1.7105219049104041, "learning_rate": 9.90052749869573e-07, "loss": 0.4321, "step": 5276 }, { "epoch": 0.09172765040240574, "grad_norm": 1.212848922970029, "learning_rate": 9.900471620906726e-07, "loss": 0.4099, "step": 5277 }, { "epoch": 0.09174503293990857, "grad_norm": 1.6108097507455998, "learning_rate": 9.900415727585477e-07, "loss": 0.4435, "step": 5278 }, { "epoch": 0.09176241547741139, "grad_norm": 1.2308536039328035, "learning_rate": 9.900359818732162e-07, "loss": 0.4165, "step": 5279 }, { "epoch": 0.09177979801491422, "grad_norm": 2.9623397360329826, "learning_rate": 9.900303894346959e-07, "loss": 0.5433, "step": 5280 }, { "epoch": 0.09179718055241705, "grad_norm": 2.0474732898649517, "learning_rate": 9.90024795443004e-07, "loss": 0.5213, "step": 5281 }, { "epoch": 0.09181456308991987, "grad_norm": 1.6075426028015989, "learning_rate": 9.90019199898159e-07, "loss": 0.5459, "step": 5282 }, { "epoch": 0.0918319456274227, "grad_norm": 1.3859243404315442, "learning_rate": 9.900136028001779e-07, "loss": 0.6434, "step": 5283 }, { "epoch": 0.09184932816492551, "grad_norm": 1.7389912700695365, "learning_rate": 9.90008004149079e-07, "loss": 0.4254, "step": 5284 }, { "epoch": 0.09186671070242834, "grad_norm": 2.302863762256132, "learning_rate": 9.900024039448798e-07, "loss": 0.5503, "step": 5285 }, { "epoch": 0.09188409323993116, "grad_norm": 2.885339201594676, "learning_rate": 9.89996802187598e-07, "loss": 0.5009, "step": 5286 }, { "epoch": 0.09190147577743399, "grad_norm": 1.7833541126894825, "learning_rate": 9.899911988772514e-07, "loss": 0.3889, "step": 5287 }, { "epoch": 0.09191885831493682, "grad_norm": 1.7311783363537656, "learning_rate": 9.899855940138578e-07, "loss": 0.4038, "step": 5288 }, { "epoch": 0.09193624085243964, "grad_norm": 2.1481742055103323, "learning_rate": 9.899799875974348e-07, "loss": 0.6307, "step": 5289 }, { "epoch": 0.09195362338994247, "grad_norm": 2.221745027970904, "learning_rate": 9.899743796280006e-07, "loss": 0.2766, "step": 5290 }, { "epoch": 0.09197100592744528, "grad_norm": 1.8708354657280055, "learning_rate": 9.899687701055725e-07, "loss": 0.3918, "step": 5291 }, { "epoch": 0.09198838846494811, "grad_norm": 1.0980596584287206, "learning_rate": 9.899631590301685e-07, "loss": 0.4201, "step": 5292 }, { "epoch": 0.09200577100245094, "grad_norm": 1.6351098805023465, "learning_rate": 9.899575464018064e-07, "loss": 0.3921, "step": 5293 }, { "epoch": 0.09202315353995376, "grad_norm": 1.4171512428142197, "learning_rate": 9.89951932220504e-07, "loss": 0.5739, "step": 5294 }, { "epoch": 0.09204053607745659, "grad_norm": 1.7718381538963153, "learning_rate": 9.89946316486279e-07, "loss": 0.2802, "step": 5295 }, { "epoch": 0.0920579186149594, "grad_norm": 1.5585964680998792, "learning_rate": 9.899406991991493e-07, "loss": 0.4595, "step": 5296 }, { "epoch": 0.09207530115246224, "grad_norm": 1.8177503357299551, "learning_rate": 9.899350803591325e-07, "loss": 0.4152, "step": 5297 }, { "epoch": 0.09209268368996507, "grad_norm": 1.5463175063587349, "learning_rate": 9.899294599662466e-07, "loss": 0.3589, "step": 5298 }, { "epoch": 0.09211006622746788, "grad_norm": 1.5538945830124837, "learning_rate": 9.899238380205093e-07, "loss": 0.4149, "step": 5299 }, { "epoch": 0.09212744876497071, "grad_norm": 1.6870145911210492, "learning_rate": 9.899182145219388e-07, "loss": 0.6621, "step": 5300 }, { "epoch": 0.09214483130247353, "grad_norm": 1.4935470326210447, "learning_rate": 9.899125894705523e-07, "loss": 0.2992, "step": 5301 }, { "epoch": 0.09216221383997636, "grad_norm": 1.9300855147924942, "learning_rate": 9.899069628663678e-07, "loss": 0.4734, "step": 5302 }, { "epoch": 0.09217959637747919, "grad_norm": 5.399455630197579, "learning_rate": 9.899013347094037e-07, "loss": 0.3485, "step": 5303 }, { "epoch": 0.092196978914982, "grad_norm": 1.4183067752263738, "learning_rate": 9.89895704999677e-07, "loss": 0.6693, "step": 5304 }, { "epoch": 0.09221436145248484, "grad_norm": 1.9882678109005845, "learning_rate": 9.898900737372062e-07, "loss": 0.5554, "step": 5305 }, { "epoch": 0.09223174398998765, "grad_norm": 1.0509224598689242, "learning_rate": 9.898844409220088e-07, "loss": 0.5155, "step": 5306 }, { "epoch": 0.09224912652749048, "grad_norm": 1.8272270479767097, "learning_rate": 9.89878806554103e-07, "loss": 0.3112, "step": 5307 }, { "epoch": 0.09226650906499331, "grad_norm": 1.709527301484672, "learning_rate": 9.898731706335062e-07, "loss": 0.4938, "step": 5308 }, { "epoch": 0.09228389160249613, "grad_norm": 1.4593069066383122, "learning_rate": 9.898675331602365e-07, "loss": 0.1738, "step": 5309 }, { "epoch": 0.09230127413999896, "grad_norm": 2.430054299897103, "learning_rate": 9.898618941343118e-07, "loss": 0.6458, "step": 5310 }, { "epoch": 0.09231865667750178, "grad_norm": 2.387172305631265, "learning_rate": 9.898562535557498e-07, "loss": 0.4453, "step": 5311 }, { "epoch": 0.0923360392150046, "grad_norm": 2.041749282890877, "learning_rate": 9.898506114245686e-07, "loss": 0.5353, "step": 5312 }, { "epoch": 0.09235342175250744, "grad_norm": 1.8547915431989184, "learning_rate": 9.898449677407858e-07, "loss": 0.4305, "step": 5313 }, { "epoch": 0.09237080429001025, "grad_norm": 3.4861428393679446, "learning_rate": 9.898393225044197e-07, "loss": 0.677, "step": 5314 }, { "epoch": 0.09238818682751308, "grad_norm": 1.373095964141928, "learning_rate": 9.89833675715488e-07, "loss": 0.7106, "step": 5315 }, { "epoch": 0.0924055693650159, "grad_norm": 2.8105059664169483, "learning_rate": 9.898280273740082e-07, "loss": 0.5641, "step": 5316 }, { "epoch": 0.09242295190251873, "grad_norm": 2.795590635719324, "learning_rate": 9.89822377479999e-07, "loss": 0.3618, "step": 5317 }, { "epoch": 0.09244033444002156, "grad_norm": 2.2121036614221086, "learning_rate": 9.898167260334776e-07, "loss": 0.4539, "step": 5318 }, { "epoch": 0.09245771697752438, "grad_norm": 1.6659908798542062, "learning_rate": 9.898110730344622e-07, "loss": 0.3691, "step": 5319 }, { "epoch": 0.0924750995150272, "grad_norm": 2.3766739845764557, "learning_rate": 9.898054184829708e-07, "loss": 0.2996, "step": 5320 }, { "epoch": 0.09249248205253002, "grad_norm": 2.2751016379152373, "learning_rate": 9.897997623790212e-07, "loss": 0.5475, "step": 5321 }, { "epoch": 0.09250986459003285, "grad_norm": 1.573272192174061, "learning_rate": 9.897941047226314e-07, "loss": 0.3589, "step": 5322 }, { "epoch": 0.09252724712753568, "grad_norm": 1.7281107628792356, "learning_rate": 9.89788445513819e-07, "loss": 0.5312, "step": 5323 }, { "epoch": 0.0925446296650385, "grad_norm": 2.5206146969398384, "learning_rate": 9.897827847526026e-07, "loss": 0.6788, "step": 5324 }, { "epoch": 0.09256201220254133, "grad_norm": 2.392927352567065, "learning_rate": 9.897771224389997e-07, "loss": 0.3708, "step": 5325 }, { "epoch": 0.09257939474004415, "grad_norm": 1.7763363865961386, "learning_rate": 9.897714585730282e-07, "loss": 0.5619, "step": 5326 }, { "epoch": 0.09259677727754698, "grad_norm": 2.4761756564798794, "learning_rate": 9.89765793154706e-07, "loss": 0.366, "step": 5327 }, { "epoch": 0.0926141598150498, "grad_norm": 2.298243929567042, "learning_rate": 9.897601261840514e-07, "loss": 0.5413, "step": 5328 }, { "epoch": 0.09263154235255262, "grad_norm": 2.405164572075524, "learning_rate": 9.897544576610822e-07, "loss": 0.7434, "step": 5329 }, { "epoch": 0.09264892489005545, "grad_norm": 2.211947143917495, "learning_rate": 9.897487875858161e-07, "loss": 0.2387, "step": 5330 }, { "epoch": 0.09266630742755827, "grad_norm": 2.1094203279400587, "learning_rate": 9.897431159582717e-07, "loss": 0.5688, "step": 5331 }, { "epoch": 0.0926836899650611, "grad_norm": 2.2886808721120073, "learning_rate": 9.897374427784662e-07, "loss": 0.3971, "step": 5332 }, { "epoch": 0.09270107250256393, "grad_norm": 1.266024626933437, "learning_rate": 9.897317680464183e-07, "loss": 0.5669, "step": 5333 }, { "epoch": 0.09271845504006675, "grad_norm": 2.056143924101378, "learning_rate": 9.897260917621455e-07, "loss": 0.522, "step": 5334 }, { "epoch": 0.09273583757756958, "grad_norm": 1.9859806370607944, "learning_rate": 9.89720413925666e-07, "loss": 0.3998, "step": 5335 }, { "epoch": 0.0927532201150724, "grad_norm": 2.1843520271499126, "learning_rate": 9.897147345369976e-07, "loss": 0.4819, "step": 5336 }, { "epoch": 0.09277060265257522, "grad_norm": 1.9794192362060858, "learning_rate": 9.897090535961587e-07, "loss": 0.5508, "step": 5337 }, { "epoch": 0.09278798519007805, "grad_norm": 1.64306179787629, "learning_rate": 9.897033711031668e-07, "loss": 0.3438, "step": 5338 }, { "epoch": 0.09280536772758087, "grad_norm": 2.0562178098473702, "learning_rate": 9.896976870580404e-07, "loss": 0.3585, "step": 5339 }, { "epoch": 0.0928227502650837, "grad_norm": 1.6655197275984666, "learning_rate": 9.896920014607973e-07, "loss": 0.2971, "step": 5340 }, { "epoch": 0.09284013280258652, "grad_norm": 1.8008426925752885, "learning_rate": 9.896863143114552e-07, "loss": 0.5904, "step": 5341 }, { "epoch": 0.09285751534008935, "grad_norm": 3.0690626430450743, "learning_rate": 9.896806256100326e-07, "loss": 0.6612, "step": 5342 }, { "epoch": 0.09287489787759218, "grad_norm": 2.123783081821261, "learning_rate": 9.896749353565474e-07, "loss": 0.3886, "step": 5343 }, { "epoch": 0.092892280415095, "grad_norm": 2.010239901802091, "learning_rate": 9.896692435510175e-07, "loss": 0.5166, "step": 5344 }, { "epoch": 0.09290966295259782, "grad_norm": 2.137690048890321, "learning_rate": 9.896635501934612e-07, "loss": 0.5982, "step": 5345 }, { "epoch": 0.09292704549010064, "grad_norm": 3.680898690537636, "learning_rate": 9.896578552838962e-07, "loss": 0.6223, "step": 5346 }, { "epoch": 0.09294442802760347, "grad_norm": 1.9182008821741892, "learning_rate": 9.896521588223408e-07, "loss": 0.3469, "step": 5347 }, { "epoch": 0.0929618105651063, "grad_norm": 2.3279515822723926, "learning_rate": 9.89646460808813e-07, "loss": 0.3021, "step": 5348 }, { "epoch": 0.09297919310260912, "grad_norm": 1.5352187816789657, "learning_rate": 9.89640761243331e-07, "loss": 0.4665, "step": 5349 }, { "epoch": 0.09299657564011195, "grad_norm": 2.763535509390731, "learning_rate": 9.896350601259122e-07, "loss": 0.2574, "step": 5350 }, { "epoch": 0.09301395817761476, "grad_norm": 1.2638376651829246, "learning_rate": 9.896293574565756e-07, "loss": 0.4618, "step": 5351 }, { "epoch": 0.0930313407151176, "grad_norm": 1.392989061136705, "learning_rate": 9.896236532353387e-07, "loss": 0.2492, "step": 5352 }, { "epoch": 0.09304872325262042, "grad_norm": 2.625259062132337, "learning_rate": 9.8961794746222e-07, "loss": 0.4721, "step": 5353 }, { "epoch": 0.09306610579012324, "grad_norm": 2.073647786386774, "learning_rate": 9.89612240137237e-07, "loss": 0.4727, "step": 5354 }, { "epoch": 0.09308348832762607, "grad_norm": 3.422817480649726, "learning_rate": 9.896065312604083e-07, "loss": 0.7816, "step": 5355 }, { "epoch": 0.09310087086512889, "grad_norm": 2.579506785390266, "learning_rate": 9.896008208317517e-07, "loss": 0.2936, "step": 5356 }, { "epoch": 0.09311825340263172, "grad_norm": 1.5990493788142641, "learning_rate": 9.895951088512853e-07, "loss": 0.3708, "step": 5357 }, { "epoch": 0.09313563594013453, "grad_norm": 2.6671835251886007, "learning_rate": 9.895893953190275e-07, "loss": 0.441, "step": 5358 }, { "epoch": 0.09315301847763736, "grad_norm": 2.043505543003974, "learning_rate": 9.89583680234996e-07, "loss": 0.52, "step": 5359 }, { "epoch": 0.0931704010151402, "grad_norm": 2.3441814603222215, "learning_rate": 9.895779635992093e-07, "loss": 0.3867, "step": 5360 }, { "epoch": 0.09318778355264301, "grad_norm": 1.6043429627776769, "learning_rate": 9.895722454116856e-07, "loss": 0.3549, "step": 5361 }, { "epoch": 0.09320516609014584, "grad_norm": 1.1088214754591903, "learning_rate": 9.895665256724423e-07, "loss": 0.321, "step": 5362 }, { "epoch": 0.09322254862764866, "grad_norm": 2.2610898089730767, "learning_rate": 9.895608043814983e-07, "loss": 0.3975, "step": 5363 }, { "epoch": 0.09323993116515149, "grad_norm": 1.8368379886481483, "learning_rate": 9.895550815388714e-07, "loss": 0.3098, "step": 5364 }, { "epoch": 0.09325731370265432, "grad_norm": 1.482850786399166, "learning_rate": 9.895493571445796e-07, "loss": 0.5805, "step": 5365 }, { "epoch": 0.09327469624015713, "grad_norm": 1.2153020516088295, "learning_rate": 9.895436311986413e-07, "loss": 0.2685, "step": 5366 }, { "epoch": 0.09329207877765996, "grad_norm": 2.446942367795458, "learning_rate": 9.895379037010747e-07, "loss": 0.5975, "step": 5367 }, { "epoch": 0.09330946131516278, "grad_norm": 1.7995098278078112, "learning_rate": 9.895321746518978e-07, "loss": 0.5742, "step": 5368 }, { "epoch": 0.09332684385266561, "grad_norm": 2.379513684193281, "learning_rate": 9.895264440511286e-07, "loss": 0.2683, "step": 5369 }, { "epoch": 0.09334422639016844, "grad_norm": 1.554041733732926, "learning_rate": 9.895207118987857e-07, "loss": 0.2196, "step": 5370 }, { "epoch": 0.09336160892767126, "grad_norm": 2.092932277072263, "learning_rate": 9.895149781948867e-07, "loss": 0.338, "step": 5371 }, { "epoch": 0.09337899146517409, "grad_norm": 4.274695390422361, "learning_rate": 9.895092429394503e-07, "loss": 0.6955, "step": 5372 }, { "epoch": 0.0933963740026769, "grad_norm": 3.0057045113008143, "learning_rate": 9.895035061324944e-07, "loss": 0.843, "step": 5373 }, { "epoch": 0.09341375654017973, "grad_norm": 2.7876715156145973, "learning_rate": 9.894977677740372e-07, "loss": 0.3286, "step": 5374 }, { "epoch": 0.09343113907768257, "grad_norm": 2.292769174997957, "learning_rate": 9.89492027864097e-07, "loss": 0.2782, "step": 5375 }, { "epoch": 0.09344852161518538, "grad_norm": 1.457472673315292, "learning_rate": 9.89486286402692e-07, "loss": 0.2497, "step": 5376 }, { "epoch": 0.09346590415268821, "grad_norm": 2.436117163504335, "learning_rate": 9.894805433898402e-07, "loss": 0.3745, "step": 5377 }, { "epoch": 0.09348328669019103, "grad_norm": 1.6107664036973517, "learning_rate": 9.894747988255599e-07, "loss": 0.3445, "step": 5378 }, { "epoch": 0.09350066922769386, "grad_norm": 2.141050665067387, "learning_rate": 9.894690527098695e-07, "loss": 0.4966, "step": 5379 }, { "epoch": 0.09351805176519669, "grad_norm": 2.6359557594793204, "learning_rate": 9.894633050427868e-07, "loss": 0.7501, "step": 5380 }, { "epoch": 0.0935354343026995, "grad_norm": 2.8501668595390957, "learning_rate": 9.894575558243304e-07, "loss": 0.871, "step": 5381 }, { "epoch": 0.09355281684020234, "grad_norm": 3.4327401383532554, "learning_rate": 9.894518050545184e-07, "loss": 0.4833, "step": 5382 }, { "epoch": 0.09357019937770515, "grad_norm": 1.83267461327979, "learning_rate": 9.89446052733369e-07, "loss": 0.316, "step": 5383 }, { "epoch": 0.09358758191520798, "grad_norm": 1.9590140644818228, "learning_rate": 9.894402988609004e-07, "loss": 0.5198, "step": 5384 }, { "epoch": 0.09360496445271081, "grad_norm": 2.017026075815277, "learning_rate": 9.89434543437131e-07, "loss": 0.6607, "step": 5385 }, { "epoch": 0.09362234699021363, "grad_norm": 1.494270070187762, "learning_rate": 9.89428786462079e-07, "loss": 0.4336, "step": 5386 }, { "epoch": 0.09363972952771646, "grad_norm": 3.2458340695725103, "learning_rate": 9.894230279357622e-07, "loss": 0.7409, "step": 5387 }, { "epoch": 0.09365711206521928, "grad_norm": 5.260757356484436, "learning_rate": 9.894172678581997e-07, "loss": 0.4414, "step": 5388 }, { "epoch": 0.0936744946027221, "grad_norm": 3.460280507918725, "learning_rate": 9.89411506229409e-07, "loss": 0.2706, "step": 5389 }, { "epoch": 0.09369187714022494, "grad_norm": 2.243839687929987, "learning_rate": 9.894057430494087e-07, "loss": 0.351, "step": 5390 }, { "epoch": 0.09370925967772775, "grad_norm": 2.258468574805515, "learning_rate": 9.893999783182169e-07, "loss": 0.3511, "step": 5391 }, { "epoch": 0.09372664221523058, "grad_norm": 1.5712451441686952, "learning_rate": 9.893942120358521e-07, "loss": 0.4413, "step": 5392 }, { "epoch": 0.0937440247527334, "grad_norm": 5.460769147781648, "learning_rate": 9.893884442023323e-07, "loss": 0.3905, "step": 5393 }, { "epoch": 0.09376140729023623, "grad_norm": 1.8314643515239355, "learning_rate": 9.89382674817676e-07, "loss": 0.5168, "step": 5394 }, { "epoch": 0.09377878982773906, "grad_norm": 2.3128535288375196, "learning_rate": 9.893769038819015e-07, "loss": 0.3153, "step": 5395 }, { "epoch": 0.09379617236524188, "grad_norm": 1.896144424836292, "learning_rate": 9.89371131395027e-07, "loss": 0.3529, "step": 5396 }, { "epoch": 0.0938135549027447, "grad_norm": 1.7529549257140116, "learning_rate": 9.89365357357071e-07, "loss": 0.4846, "step": 5397 }, { "epoch": 0.09383093744024752, "grad_norm": 2.39755237593003, "learning_rate": 9.893595817680514e-07, "loss": 0.4224, "step": 5398 }, { "epoch": 0.09384831997775035, "grad_norm": 2.5552866761586133, "learning_rate": 9.893538046279868e-07, "loss": 0.3956, "step": 5399 }, { "epoch": 0.09386570251525318, "grad_norm": 2.8931800853486225, "learning_rate": 9.893480259368953e-07, "loss": 0.3844, "step": 5400 }, { "epoch": 0.093883085052756, "grad_norm": 1.8322503015036424, "learning_rate": 9.893422456947953e-07, "loss": 0.577, "step": 5401 }, { "epoch": 0.09390046759025883, "grad_norm": 2.5621207096418153, "learning_rate": 9.893364639017053e-07, "loss": 0.5905, "step": 5402 }, { "epoch": 0.09391785012776165, "grad_norm": 1.8081888663270522, "learning_rate": 9.893306805576436e-07, "loss": 0.3977, "step": 5403 }, { "epoch": 0.09393523266526448, "grad_norm": 2.699432482518862, "learning_rate": 9.893248956626284e-07, "loss": 0.3999, "step": 5404 }, { "epoch": 0.0939526152027673, "grad_norm": 1.4332231517232452, "learning_rate": 9.893191092166778e-07, "loss": 0.5648, "step": 5405 }, { "epoch": 0.09396999774027012, "grad_norm": 2.4496159213394657, "learning_rate": 9.893133212198105e-07, "loss": 0.6697, "step": 5406 }, { "epoch": 0.09398738027777295, "grad_norm": 1.5544634724268283, "learning_rate": 9.89307531672045e-07, "loss": 0.2499, "step": 5407 }, { "epoch": 0.09400476281527577, "grad_norm": 1.2932956413261079, "learning_rate": 9.893017405733991e-07, "loss": 0.4092, "step": 5408 }, { "epoch": 0.0940221453527786, "grad_norm": 1.4392093380132982, "learning_rate": 9.892959479238917e-07, "loss": 0.3672, "step": 5409 }, { "epoch": 0.09403952789028143, "grad_norm": 1.8030122194844649, "learning_rate": 9.892901537235406e-07, "loss": 0.4791, "step": 5410 }, { "epoch": 0.09405691042778425, "grad_norm": 2.155852448355133, "learning_rate": 9.892843579723647e-07, "loss": 0.4318, "step": 5411 }, { "epoch": 0.09407429296528708, "grad_norm": 1.9787266718925707, "learning_rate": 9.89278560670382e-07, "loss": 0.2772, "step": 5412 }, { "epoch": 0.09409167550278989, "grad_norm": 2.1903838674547296, "learning_rate": 9.89272761817611e-07, "loss": 0.5003, "step": 5413 }, { "epoch": 0.09410905804029272, "grad_norm": 2.9921568546539232, "learning_rate": 9.892669614140703e-07, "loss": 0.1867, "step": 5414 }, { "epoch": 0.09412644057779555, "grad_norm": 1.7218406549278868, "learning_rate": 9.89261159459778e-07, "loss": 0.3935, "step": 5415 }, { "epoch": 0.09414382311529837, "grad_norm": 1.8388464806878038, "learning_rate": 9.892553559547526e-07, "loss": 0.5431, "step": 5416 }, { "epoch": 0.0941612056528012, "grad_norm": 1.6182495591256878, "learning_rate": 9.892495508990123e-07, "loss": 0.2848, "step": 5417 }, { "epoch": 0.09417858819030402, "grad_norm": 1.7798786347905107, "learning_rate": 9.892437442925758e-07, "loss": 0.4605, "step": 5418 }, { "epoch": 0.09419597072780685, "grad_norm": 5.737385099122851, "learning_rate": 9.892379361354615e-07, "loss": 0.4359, "step": 5419 }, { "epoch": 0.09421335326530968, "grad_norm": 3.000590913054748, "learning_rate": 9.892321264276875e-07, "loss": 0.4658, "step": 5420 }, { "epoch": 0.0942307358028125, "grad_norm": 1.331499988754079, "learning_rate": 9.892263151692724e-07, "loss": 0.5765, "step": 5421 }, { "epoch": 0.09424811834031532, "grad_norm": 1.5036045205560447, "learning_rate": 9.892205023602347e-07, "loss": 0.3404, "step": 5422 }, { "epoch": 0.09426550087781814, "grad_norm": 1.5233122511391088, "learning_rate": 9.892146880005926e-07, "loss": 0.2948, "step": 5423 }, { "epoch": 0.09428288341532097, "grad_norm": 1.350735623217694, "learning_rate": 9.892088720903646e-07, "loss": 0.4415, "step": 5424 }, { "epoch": 0.09430026595282379, "grad_norm": 1.7968114095499554, "learning_rate": 9.892030546295696e-07, "loss": 0.5044, "step": 5425 }, { "epoch": 0.09431764849032662, "grad_norm": 2.2149672633563413, "learning_rate": 9.891972356182252e-07, "loss": 0.4045, "step": 5426 }, { "epoch": 0.09433503102782945, "grad_norm": 2.3645631931700914, "learning_rate": 9.891914150563504e-07, "loss": 0.4399, "step": 5427 }, { "epoch": 0.09435241356533226, "grad_norm": 2.2096782237311956, "learning_rate": 9.891855929439636e-07, "loss": 0.3209, "step": 5428 }, { "epoch": 0.0943697961028351, "grad_norm": 1.8287110928146082, "learning_rate": 9.891797692810832e-07, "loss": 0.3632, "step": 5429 }, { "epoch": 0.09438717864033791, "grad_norm": 1.6638727781381049, "learning_rate": 9.891739440677276e-07, "loss": 0.715, "step": 5430 }, { "epoch": 0.09440456117784074, "grad_norm": 2.554509616827233, "learning_rate": 9.891681173039155e-07, "loss": 0.3391, "step": 5431 }, { "epoch": 0.09442194371534357, "grad_norm": 2.0840693177644143, "learning_rate": 9.891622889896648e-07, "loss": 0.7983, "step": 5432 }, { "epoch": 0.09443932625284639, "grad_norm": 2.2081141706076353, "learning_rate": 9.891564591249947e-07, "loss": 0.4527, "step": 5433 }, { "epoch": 0.09445670879034922, "grad_norm": 1.3241241230976324, "learning_rate": 9.89150627709923e-07, "loss": 0.2351, "step": 5434 }, { "epoch": 0.09447409132785203, "grad_norm": 1.8037763437905585, "learning_rate": 9.89144794744469e-07, "loss": 0.4294, "step": 5435 }, { "epoch": 0.09449147386535486, "grad_norm": 1.8665126189477343, "learning_rate": 9.891389602286503e-07, "loss": 1.0335, "step": 5436 }, { "epoch": 0.0945088564028577, "grad_norm": 2.1984532092118765, "learning_rate": 9.891331241624858e-07, "loss": 0.3935, "step": 5437 }, { "epoch": 0.09452623894036051, "grad_norm": 2.0261391094826466, "learning_rate": 9.891272865459941e-07, "loss": 0.71, "step": 5438 }, { "epoch": 0.09454362147786334, "grad_norm": 2.2363121334669884, "learning_rate": 9.891214473791935e-07, "loss": 0.3712, "step": 5439 }, { "epoch": 0.09456100401536616, "grad_norm": 1.7081859900452927, "learning_rate": 9.891156066621026e-07, "loss": 0.5358, "step": 5440 }, { "epoch": 0.09457838655286899, "grad_norm": 2.014746657366651, "learning_rate": 9.891097643947398e-07, "loss": 0.5989, "step": 5441 }, { "epoch": 0.09459576909037182, "grad_norm": 3.0239227560688846, "learning_rate": 9.89103920577124e-07, "loss": 0.3028, "step": 5442 }, { "epoch": 0.09461315162787463, "grad_norm": 2.227716575790215, "learning_rate": 9.890980752092731e-07, "loss": 0.5799, "step": 5443 }, { "epoch": 0.09463053416537746, "grad_norm": 1.759579927263464, "learning_rate": 9.890922282912065e-07, "loss": 0.4083, "step": 5444 }, { "epoch": 0.09464791670288028, "grad_norm": 2.0770296725273494, "learning_rate": 9.890863798229419e-07, "loss": 0.251, "step": 5445 }, { "epoch": 0.09466529924038311, "grad_norm": 2.084573768075032, "learning_rate": 9.890805298044979e-07, "loss": 0.6081, "step": 5446 }, { "epoch": 0.09468268177788594, "grad_norm": 1.801322313495455, "learning_rate": 9.890746782358936e-07, "loss": 0.4563, "step": 5447 }, { "epoch": 0.09470006431538876, "grad_norm": 1.8147543870075291, "learning_rate": 9.890688251171473e-07, "loss": 0.3833, "step": 5448 }, { "epoch": 0.09471744685289159, "grad_norm": 2.7528591837514846, "learning_rate": 9.890629704482773e-07, "loss": 0.3271, "step": 5449 }, { "epoch": 0.0947348293903944, "grad_norm": 2.363500677169153, "learning_rate": 9.890571142293023e-07, "loss": 0.5791, "step": 5450 }, { "epoch": 0.09475221192789723, "grad_norm": 2.0133763274188663, "learning_rate": 9.89051256460241e-07, "loss": 0.5098, "step": 5451 }, { "epoch": 0.09476959446540006, "grad_norm": 1.4386346303889825, "learning_rate": 9.890453971411119e-07, "loss": 0.4311, "step": 5452 }, { "epoch": 0.09478697700290288, "grad_norm": 1.7977119976024172, "learning_rate": 9.890395362719334e-07, "loss": 0.3004, "step": 5453 }, { "epoch": 0.09480435954040571, "grad_norm": 1.4472723879485654, "learning_rate": 9.890336738527242e-07, "loss": 0.7159, "step": 5454 }, { "epoch": 0.09482174207790853, "grad_norm": 2.010371570143262, "learning_rate": 9.890278098835032e-07, "loss": 0.5787, "step": 5455 }, { "epoch": 0.09483912461541136, "grad_norm": 2.3950788076295764, "learning_rate": 9.890219443642885e-07, "loss": 0.4185, "step": 5456 }, { "epoch": 0.09485650715291419, "grad_norm": 1.49544612183739, "learning_rate": 9.890160772950988e-07, "loss": 0.2263, "step": 5457 }, { "epoch": 0.094873889690417, "grad_norm": 3.445232263353485, "learning_rate": 9.890102086759528e-07, "loss": 0.7778, "step": 5458 }, { "epoch": 0.09489127222791983, "grad_norm": 3.2897776733230084, "learning_rate": 9.890043385068692e-07, "loss": 0.4827, "step": 5459 }, { "epoch": 0.09490865476542265, "grad_norm": 2.38529505622363, "learning_rate": 9.889984667878663e-07, "loss": 0.8025, "step": 5460 }, { "epoch": 0.09492603730292548, "grad_norm": 1.7777496154946675, "learning_rate": 9.88992593518963e-07, "loss": 0.4822, "step": 5461 }, { "epoch": 0.09494341984042831, "grad_norm": 2.600397338003099, "learning_rate": 9.889867187001778e-07, "loss": 0.281, "step": 5462 }, { "epoch": 0.09496080237793113, "grad_norm": 2.9294304078899907, "learning_rate": 9.889808423315293e-07, "loss": 0.5999, "step": 5463 }, { "epoch": 0.09497818491543396, "grad_norm": 2.054920527440122, "learning_rate": 9.889749644130361e-07, "loss": 0.594, "step": 5464 }, { "epoch": 0.09499556745293677, "grad_norm": 4.864703534767477, "learning_rate": 9.88969084944717e-07, "loss": 0.3514, "step": 5465 }, { "epoch": 0.0950129499904396, "grad_norm": 1.4062493170073338, "learning_rate": 9.889632039265905e-07, "loss": 0.2412, "step": 5466 }, { "epoch": 0.09503033252794243, "grad_norm": 1.5633063313539903, "learning_rate": 9.88957321358675e-07, "loss": 0.4535, "step": 5467 }, { "epoch": 0.09504771506544525, "grad_norm": 2.2549236869449576, "learning_rate": 9.889514372409897e-07, "loss": 0.4133, "step": 5468 }, { "epoch": 0.09506509760294808, "grad_norm": 1.8409077053031322, "learning_rate": 9.889455515735528e-07, "loss": 0.3477, "step": 5469 }, { "epoch": 0.0950824801404509, "grad_norm": 3.3206820277262974, "learning_rate": 9.889396643563832e-07, "loss": 0.4217, "step": 5470 }, { "epoch": 0.09509986267795373, "grad_norm": 1.9662614617454592, "learning_rate": 9.889337755894993e-07, "loss": 0.7792, "step": 5471 }, { "epoch": 0.09511724521545656, "grad_norm": 2.533524311347131, "learning_rate": 9.889278852729202e-07, "loss": 0.4988, "step": 5472 }, { "epoch": 0.09513462775295937, "grad_norm": 1.2773671404273599, "learning_rate": 9.889219934066641e-07, "loss": 0.4152, "step": 5473 }, { "epoch": 0.0951520102904622, "grad_norm": 1.4779747649866861, "learning_rate": 9.8891609999075e-07, "loss": 0.2928, "step": 5474 }, { "epoch": 0.09516939282796502, "grad_norm": 1.8070625926476924, "learning_rate": 9.889102050251963e-07, "loss": 0.4932, "step": 5475 }, { "epoch": 0.09518677536546785, "grad_norm": 1.6335008090941463, "learning_rate": 9.88904308510022e-07, "loss": 0.3729, "step": 5476 }, { "epoch": 0.09520415790297068, "grad_norm": 2.454021366744578, "learning_rate": 9.888984104452455e-07, "loss": 0.46, "step": 5477 }, { "epoch": 0.0952215404404735, "grad_norm": 2.445344199035063, "learning_rate": 9.888925108308856e-07, "loss": 0.3899, "step": 5478 }, { "epoch": 0.09523892297797633, "grad_norm": 2.4247530788872913, "learning_rate": 9.88886609666961e-07, "loss": 0.4213, "step": 5479 }, { "epoch": 0.09525630551547914, "grad_norm": 2.1951208436359466, "learning_rate": 9.888807069534907e-07, "loss": 0.2313, "step": 5480 }, { "epoch": 0.09527368805298198, "grad_norm": 4.02132427702717, "learning_rate": 9.888748026904928e-07, "loss": 0.4374, "step": 5481 }, { "epoch": 0.0952910705904848, "grad_norm": 2.167232564067455, "learning_rate": 9.888688968779865e-07, "loss": 0.397, "step": 5482 }, { "epoch": 0.09530845312798762, "grad_norm": 1.7638633795326477, "learning_rate": 9.888629895159903e-07, "loss": 0.4173, "step": 5483 }, { "epoch": 0.09532583566549045, "grad_norm": 1.8575554624654083, "learning_rate": 9.88857080604523e-07, "loss": 0.42, "step": 5484 }, { "epoch": 0.09534321820299327, "grad_norm": 1.724626872521138, "learning_rate": 9.888511701436035e-07, "loss": 0.4708, "step": 5485 }, { "epoch": 0.0953606007404961, "grad_norm": 1.0231187588022113, "learning_rate": 9.8884525813325e-07, "loss": 0.4952, "step": 5486 }, { "epoch": 0.09537798327799893, "grad_norm": 1.6358638581933487, "learning_rate": 9.88839344573482e-07, "loss": 0.3075, "step": 5487 }, { "epoch": 0.09539536581550175, "grad_norm": 1.8398170104456864, "learning_rate": 9.888334294643174e-07, "loss": 0.5362, "step": 5488 }, { "epoch": 0.09541274835300458, "grad_norm": 1.733530934875034, "learning_rate": 9.888275128057757e-07, "loss": 0.1865, "step": 5489 }, { "epoch": 0.09543013089050739, "grad_norm": 1.9386299817299706, "learning_rate": 9.888215945978752e-07, "loss": 0.3606, "step": 5490 }, { "epoch": 0.09544751342801022, "grad_norm": 1.6498954855623893, "learning_rate": 9.88815674840635e-07, "loss": 0.3678, "step": 5491 }, { "epoch": 0.09546489596551305, "grad_norm": 2.1319413485759857, "learning_rate": 9.888097535340734e-07, "loss": 0.6128, "step": 5492 }, { "epoch": 0.09548227850301587, "grad_norm": 1.7463408764709203, "learning_rate": 9.888038306782094e-07, "loss": 0.3334, "step": 5493 }, { "epoch": 0.0954996610405187, "grad_norm": 2.0117513932590545, "learning_rate": 9.88797906273062e-07, "loss": 0.319, "step": 5494 }, { "epoch": 0.09551704357802152, "grad_norm": 1.807881791626373, "learning_rate": 9.887919803186497e-07, "loss": 0.4413, "step": 5495 }, { "epoch": 0.09553442611552435, "grad_norm": 1.7396121410322885, "learning_rate": 9.887860528149911e-07, "loss": 0.3113, "step": 5496 }, { "epoch": 0.09555180865302716, "grad_norm": 2.700449761954674, "learning_rate": 9.887801237621056e-07, "loss": 0.4151, "step": 5497 }, { "epoch": 0.09556919119052999, "grad_norm": 1.8357209525107676, "learning_rate": 9.887741931600116e-07, "loss": 0.5566, "step": 5498 }, { "epoch": 0.09558657372803282, "grad_norm": 1.9376560758984036, "learning_rate": 9.887682610087277e-07, "loss": 0.2748, "step": 5499 }, { "epoch": 0.09560395626553564, "grad_norm": 1.116360099978385, "learning_rate": 9.88762327308273e-07, "loss": 0.344, "step": 5500 }, { "epoch": 0.09562133880303847, "grad_norm": 1.8556859985538359, "learning_rate": 9.887563920586663e-07, "loss": 0.4463, "step": 5501 }, { "epoch": 0.09563872134054129, "grad_norm": 2.0241763537376327, "learning_rate": 9.887504552599264e-07, "loss": 0.2494, "step": 5502 }, { "epoch": 0.09565610387804412, "grad_norm": 2.08268262566753, "learning_rate": 9.88744516912072e-07, "loss": 0.7644, "step": 5503 }, { "epoch": 0.09567348641554695, "grad_norm": 1.9105938752171796, "learning_rate": 9.88738577015122e-07, "loss": 0.5373, "step": 5504 }, { "epoch": 0.09569086895304976, "grad_norm": 1.9727308345297438, "learning_rate": 9.887326355690955e-07, "loss": 0.6183, "step": 5505 }, { "epoch": 0.09570825149055259, "grad_norm": 2.1281886280738083, "learning_rate": 9.887266925740107e-07, "loss": 0.4872, "step": 5506 }, { "epoch": 0.09572563402805541, "grad_norm": 2.245379508741607, "learning_rate": 9.88720748029887e-07, "loss": 0.4438, "step": 5507 }, { "epoch": 0.09574301656555824, "grad_norm": 2.40194620456221, "learning_rate": 9.887148019367428e-07, "loss": 0.4543, "step": 5508 }, { "epoch": 0.09576039910306107, "grad_norm": 2.1030665381992097, "learning_rate": 9.88708854294597e-07, "loss": 0.3875, "step": 5509 }, { "epoch": 0.09577778164056389, "grad_norm": 1.68861303748236, "learning_rate": 9.887029051034691e-07, "loss": 0.4591, "step": 5510 }, { "epoch": 0.09579516417806672, "grad_norm": 2.7325913007590565, "learning_rate": 9.886969543633775e-07, "loss": 0.4986, "step": 5511 }, { "epoch": 0.09581254671556953, "grad_norm": 1.2811584681529706, "learning_rate": 9.886910020743406e-07, "loss": 0.3013, "step": 5512 }, { "epoch": 0.09582992925307236, "grad_norm": 3.286684513715976, "learning_rate": 9.88685048236378e-07, "loss": 0.3795, "step": 5513 }, { "epoch": 0.0958473117905752, "grad_norm": 2.485877732910985, "learning_rate": 9.886790928495083e-07, "loss": 0.4342, "step": 5514 }, { "epoch": 0.09586469432807801, "grad_norm": 1.9671133875932933, "learning_rate": 9.886731359137501e-07, "loss": 0.3898, "step": 5515 }, { "epoch": 0.09588207686558084, "grad_norm": 2.0188438038843657, "learning_rate": 9.886671774291228e-07, "loss": 0.3605, "step": 5516 }, { "epoch": 0.09589945940308366, "grad_norm": 2.91830015596588, "learning_rate": 9.88661217395645e-07, "loss": 0.4109, "step": 5517 }, { "epoch": 0.09591684194058649, "grad_norm": 2.354666432925905, "learning_rate": 9.886552558133356e-07, "loss": 0.3939, "step": 5518 }, { "epoch": 0.09593422447808932, "grad_norm": 2.2230457297101016, "learning_rate": 9.886492926822135e-07, "loss": 0.4225, "step": 5519 }, { "epoch": 0.09595160701559213, "grad_norm": 1.6578183969280085, "learning_rate": 9.886433280022974e-07, "loss": 0.4622, "step": 5520 }, { "epoch": 0.09596898955309496, "grad_norm": 1.7973228750200876, "learning_rate": 9.886373617736067e-07, "loss": 0.687, "step": 5521 }, { "epoch": 0.09598637209059778, "grad_norm": 2.0954339512816844, "learning_rate": 9.886313939961599e-07, "loss": 0.5975, "step": 5522 }, { "epoch": 0.09600375462810061, "grad_norm": 1.2459791430959484, "learning_rate": 9.88625424669976e-07, "loss": 0.3208, "step": 5523 }, { "epoch": 0.09602113716560344, "grad_norm": 2.5252437274399653, "learning_rate": 9.886194537950741e-07, "loss": 0.5813, "step": 5524 }, { "epoch": 0.09603851970310626, "grad_norm": 3.408560049746983, "learning_rate": 9.88613481371473e-07, "loss": 0.3762, "step": 5525 }, { "epoch": 0.09605590224060909, "grad_norm": 1.8643469230501717, "learning_rate": 9.886075073991913e-07, "loss": 0.3935, "step": 5526 }, { "epoch": 0.0960732847781119, "grad_norm": 1.1721148700635957, "learning_rate": 9.886015318782485e-07, "loss": 0.4759, "step": 5527 }, { "epoch": 0.09609066731561473, "grad_norm": 2.970826365643252, "learning_rate": 9.885955548086633e-07, "loss": 0.4007, "step": 5528 }, { "epoch": 0.09610804985311756, "grad_norm": 3.0355634907949014, "learning_rate": 9.885895761904545e-07, "loss": 0.6072, "step": 5529 }, { "epoch": 0.09612543239062038, "grad_norm": 1.3844843769349582, "learning_rate": 9.885835960236415e-07, "loss": 0.3234, "step": 5530 }, { "epoch": 0.09614281492812321, "grad_norm": 2.6862056522207123, "learning_rate": 9.885776143082425e-07, "loss": 0.2201, "step": 5531 }, { "epoch": 0.09616019746562603, "grad_norm": 2.025978330376688, "learning_rate": 9.885716310442772e-07, "loss": 0.3974, "step": 5532 }, { "epoch": 0.09617758000312886, "grad_norm": 2.2093006174887395, "learning_rate": 9.885656462317641e-07, "loss": 0.2958, "step": 5533 }, { "epoch": 0.09619496254063169, "grad_norm": 1.458670857672804, "learning_rate": 9.885596598707224e-07, "loss": 0.5357, "step": 5534 }, { "epoch": 0.0962123450781345, "grad_norm": 1.7353439274183704, "learning_rate": 9.885536719611712e-07, "loss": 0.3454, "step": 5535 }, { "epoch": 0.09622972761563733, "grad_norm": 1.9772550034412542, "learning_rate": 9.88547682503129e-07, "loss": 0.503, "step": 5536 }, { "epoch": 0.09624711015314015, "grad_norm": 1.231707955112022, "learning_rate": 9.88541691496615e-07, "loss": 0.3272, "step": 5537 }, { "epoch": 0.09626449269064298, "grad_norm": 2.0578929105074644, "learning_rate": 9.885356989416485e-07, "loss": 0.6116, "step": 5538 }, { "epoch": 0.09628187522814581, "grad_norm": 2.0568248427713676, "learning_rate": 9.88529704838248e-07, "loss": 0.3072, "step": 5539 }, { "epoch": 0.09629925776564863, "grad_norm": 1.395288809575983, "learning_rate": 9.88523709186433e-07, "loss": 0.5039, "step": 5540 }, { "epoch": 0.09631664030315146, "grad_norm": 3.3200644939744732, "learning_rate": 9.885177119862222e-07, "loss": 0.5694, "step": 5541 }, { "epoch": 0.09633402284065427, "grad_norm": 1.4782168771677369, "learning_rate": 9.885117132376346e-07, "loss": 0.4938, "step": 5542 }, { "epoch": 0.0963514053781571, "grad_norm": 2.074776091586914, "learning_rate": 9.885057129406893e-07, "loss": 0.5525, "step": 5543 }, { "epoch": 0.09636878791565993, "grad_norm": 1.4686200818205934, "learning_rate": 9.884997110954054e-07, "loss": 0.4406, "step": 5544 }, { "epoch": 0.09638617045316275, "grad_norm": 1.7243362807405263, "learning_rate": 9.884937077018016e-07, "loss": 0.6975, "step": 5545 }, { "epoch": 0.09640355299066558, "grad_norm": 2.029277733108892, "learning_rate": 9.884877027598972e-07, "loss": 0.6021, "step": 5546 }, { "epoch": 0.0964209355281684, "grad_norm": 1.5138121982114108, "learning_rate": 9.884816962697112e-07, "loss": 0.4754, "step": 5547 }, { "epoch": 0.09643831806567123, "grad_norm": 2.1524378955537293, "learning_rate": 9.884756882312625e-07, "loss": 0.5207, "step": 5548 }, { "epoch": 0.09645570060317406, "grad_norm": 1.993995011418116, "learning_rate": 9.884696786445705e-07, "loss": 0.6191, "step": 5549 }, { "epoch": 0.09647308314067687, "grad_norm": 1.4244568155114625, "learning_rate": 9.884636675096538e-07, "loss": 0.4337, "step": 5550 }, { "epoch": 0.0964904656781797, "grad_norm": 1.331524554020287, "learning_rate": 9.884576548265318e-07, "loss": 0.2019, "step": 5551 }, { "epoch": 0.09650784821568252, "grad_norm": 3.386701429762219, "learning_rate": 9.884516405952235e-07, "loss": 0.6098, "step": 5552 }, { "epoch": 0.09652523075318535, "grad_norm": 1.8888846455062471, "learning_rate": 9.884456248157476e-07, "loss": 0.4735, "step": 5553 }, { "epoch": 0.09654261329068818, "grad_norm": 1.055255960077502, "learning_rate": 9.884396074881236e-07, "loss": 0.3354, "step": 5554 }, { "epoch": 0.096559995828191, "grad_norm": 1.7772847723604877, "learning_rate": 9.884335886123703e-07, "loss": 0.3572, "step": 5555 }, { "epoch": 0.09657737836569383, "grad_norm": 2.6038544249901365, "learning_rate": 9.88427568188507e-07, "loss": 0.6798, "step": 5556 }, { "epoch": 0.09659476090319664, "grad_norm": 2.2828500890497, "learning_rate": 9.884215462165528e-07, "loss": 0.5014, "step": 5557 }, { "epoch": 0.09661214344069947, "grad_norm": 1.7463837434822134, "learning_rate": 9.884155226965263e-07, "loss": 0.521, "step": 5558 }, { "epoch": 0.0966295259782023, "grad_norm": 1.8259856761374957, "learning_rate": 9.884094976284472e-07, "loss": 0.6592, "step": 5559 }, { "epoch": 0.09664690851570512, "grad_norm": 2.6268453924373336, "learning_rate": 9.884034710123344e-07, "loss": 0.39, "step": 5560 }, { "epoch": 0.09666429105320795, "grad_norm": 1.652612858231536, "learning_rate": 9.88397442848207e-07, "loss": 0.2065, "step": 5561 }, { "epoch": 0.09668167359071077, "grad_norm": 5.293981129569444, "learning_rate": 9.883914131360837e-07, "loss": 0.4916, "step": 5562 }, { "epoch": 0.0966990561282136, "grad_norm": 1.6464607266619544, "learning_rate": 9.883853818759843e-07, "loss": 0.3361, "step": 5563 }, { "epoch": 0.09671643866571641, "grad_norm": 2.618507017186977, "learning_rate": 9.883793490679275e-07, "loss": 0.4323, "step": 5564 }, { "epoch": 0.09673382120321924, "grad_norm": 2.308376375392583, "learning_rate": 9.883733147119324e-07, "loss": 0.421, "step": 5565 }, { "epoch": 0.09675120374072207, "grad_norm": 1.5106777092750086, "learning_rate": 9.883672788080184e-07, "loss": 0.5307, "step": 5566 }, { "epoch": 0.09676858627822489, "grad_norm": 1.7100147054012653, "learning_rate": 9.883612413562043e-07, "loss": 0.4368, "step": 5567 }, { "epoch": 0.09678596881572772, "grad_norm": 1.6760375648984749, "learning_rate": 9.883552023565094e-07, "loss": 0.3462, "step": 5568 }, { "epoch": 0.09680335135323054, "grad_norm": 3.901193831335227, "learning_rate": 9.88349161808953e-07, "loss": 0.6563, "step": 5569 }, { "epoch": 0.09682073389073337, "grad_norm": 1.195097038685089, "learning_rate": 9.88343119713554e-07, "loss": 0.4736, "step": 5570 }, { "epoch": 0.0968381164282362, "grad_norm": 2.0402551305662415, "learning_rate": 9.883370760703316e-07, "loss": 0.4068, "step": 5571 }, { "epoch": 0.09685549896573901, "grad_norm": 0.9579445630199136, "learning_rate": 9.883310308793052e-07, "loss": 0.2029, "step": 5572 }, { "epoch": 0.09687288150324184, "grad_norm": 2.4486406753406307, "learning_rate": 9.883249841404935e-07, "loss": 0.4045, "step": 5573 }, { "epoch": 0.09689026404074466, "grad_norm": 1.3470006709794347, "learning_rate": 9.88318935853916e-07, "loss": 0.3261, "step": 5574 }, { "epoch": 0.09690764657824749, "grad_norm": 2.8660543252581228, "learning_rate": 9.883128860195917e-07, "loss": 0.5048, "step": 5575 }, { "epoch": 0.09692502911575032, "grad_norm": 3.0069187926284813, "learning_rate": 9.8830683463754e-07, "loss": 0.3279, "step": 5576 }, { "epoch": 0.09694241165325314, "grad_norm": 0.8872902897675377, "learning_rate": 9.8830078170778e-07, "loss": 0.5281, "step": 5577 }, { "epoch": 0.09695979419075597, "grad_norm": 1.6375100418094237, "learning_rate": 9.882947272303307e-07, "loss": 0.3761, "step": 5578 }, { "epoch": 0.09697717672825878, "grad_norm": 3.7019140346801724, "learning_rate": 9.882886712052113e-07, "loss": 0.5487, "step": 5579 }, { "epoch": 0.09699455926576162, "grad_norm": 4.911784066019414, "learning_rate": 9.882826136324414e-07, "loss": 0.3911, "step": 5580 }, { "epoch": 0.09701194180326445, "grad_norm": 2.2387035750100015, "learning_rate": 9.882765545120397e-07, "loss": 0.3642, "step": 5581 }, { "epoch": 0.09702932434076726, "grad_norm": 1.2700219579258747, "learning_rate": 9.88270493844026e-07, "loss": 0.2996, "step": 5582 }, { "epoch": 0.09704670687827009, "grad_norm": 2.1837515578288667, "learning_rate": 9.882644316284186e-07, "loss": 0.2924, "step": 5583 }, { "epoch": 0.09706408941577291, "grad_norm": 1.5573464360454476, "learning_rate": 9.882583678652374e-07, "loss": 0.3702, "step": 5584 }, { "epoch": 0.09708147195327574, "grad_norm": 1.8467386484078099, "learning_rate": 9.882523025545016e-07, "loss": 0.3818, "step": 5585 }, { "epoch": 0.09709885449077857, "grad_norm": 1.1133350846036358, "learning_rate": 9.882462356962302e-07, "loss": 0.2695, "step": 5586 }, { "epoch": 0.09711623702828139, "grad_norm": 1.7932177490728303, "learning_rate": 9.882401672904426e-07, "loss": 0.2609, "step": 5587 }, { "epoch": 0.09713361956578422, "grad_norm": 2.3191514520321665, "learning_rate": 9.88234097337158e-07, "loss": 0.4765, "step": 5588 }, { "epoch": 0.09715100210328703, "grad_norm": 3.504916703505633, "learning_rate": 9.882280258363953e-07, "loss": 0.5605, "step": 5589 }, { "epoch": 0.09716838464078986, "grad_norm": 2.7942572048634324, "learning_rate": 9.882219527881742e-07, "loss": 0.4017, "step": 5590 }, { "epoch": 0.09718576717829269, "grad_norm": 2.4207303968863556, "learning_rate": 9.882158781925138e-07, "loss": 0.3187, "step": 5591 }, { "epoch": 0.09720314971579551, "grad_norm": 1.95600395333277, "learning_rate": 9.882098020494334e-07, "loss": 0.3951, "step": 5592 }, { "epoch": 0.09722053225329834, "grad_norm": 2.279458738776834, "learning_rate": 9.88203724358952e-07, "loss": 0.5244, "step": 5593 }, { "epoch": 0.09723791479080116, "grad_norm": 2.088267421463319, "learning_rate": 9.881976451210892e-07, "loss": 0.5015, "step": 5594 }, { "epoch": 0.09725529732830399, "grad_norm": 1.4932854499069284, "learning_rate": 9.88191564335864e-07, "loss": 0.5041, "step": 5595 }, { "epoch": 0.09727267986580682, "grad_norm": 2.37489633445215, "learning_rate": 9.88185482003296e-07, "loss": 0.4828, "step": 5596 }, { "epoch": 0.09729006240330963, "grad_norm": 1.6068589746766246, "learning_rate": 9.881793981234042e-07, "loss": 0.413, "step": 5597 }, { "epoch": 0.09730744494081246, "grad_norm": 2.1074604517864137, "learning_rate": 9.881733126962078e-07, "loss": 0.3919, "step": 5598 }, { "epoch": 0.09732482747831528, "grad_norm": 2.5660191821106606, "learning_rate": 9.881672257217264e-07, "loss": 0.5751, "step": 5599 }, { "epoch": 0.09734221001581811, "grad_norm": 1.3879361847729927, "learning_rate": 9.88161137199979e-07, "loss": 0.3702, "step": 5600 }, { "epoch": 0.09735959255332094, "grad_norm": 3.8607069293623577, "learning_rate": 9.881550471309852e-07, "loss": 0.6481, "step": 5601 }, { "epoch": 0.09737697509082376, "grad_norm": 2.005207201296734, "learning_rate": 9.88148955514764e-07, "loss": 0.732, "step": 5602 }, { "epoch": 0.09739435762832659, "grad_norm": 1.7616111903434684, "learning_rate": 9.88142862351335e-07, "loss": 0.4097, "step": 5603 }, { "epoch": 0.0974117401658294, "grad_norm": 1.5535040484175584, "learning_rate": 9.881367676407173e-07, "loss": 0.5512, "step": 5604 }, { "epoch": 0.09742912270333223, "grad_norm": 1.766285868400101, "learning_rate": 9.881306713829304e-07, "loss": 0.4379, "step": 5605 }, { "epoch": 0.09744650524083506, "grad_norm": 1.5856556587017872, "learning_rate": 9.881245735779933e-07, "loss": 0.3101, "step": 5606 }, { "epoch": 0.09746388777833788, "grad_norm": 0.996860086901987, "learning_rate": 9.881184742259257e-07, "loss": 0.3426, "step": 5607 }, { "epoch": 0.09748127031584071, "grad_norm": 2.1829387342217506, "learning_rate": 9.881123733267466e-07, "loss": 0.5842, "step": 5608 }, { "epoch": 0.09749865285334353, "grad_norm": 1.494367274641533, "learning_rate": 9.881062708804755e-07, "loss": 0.3393, "step": 5609 }, { "epoch": 0.09751603539084636, "grad_norm": 2.3396203339057924, "learning_rate": 9.881001668871318e-07, "loss": 0.4565, "step": 5610 }, { "epoch": 0.09753341792834919, "grad_norm": 2.4269202512984807, "learning_rate": 9.880940613467347e-07, "loss": 0.6821, "step": 5611 }, { "epoch": 0.097550800465852, "grad_norm": 1.8046870698236857, "learning_rate": 9.880879542593037e-07, "loss": 0.5303, "step": 5612 }, { "epoch": 0.09756818300335483, "grad_norm": 2.744467769752242, "learning_rate": 9.880818456248583e-07, "loss": 0.3468, "step": 5613 }, { "epoch": 0.09758556554085765, "grad_norm": 3.17760033386968, "learning_rate": 9.880757354434174e-07, "loss": 0.6594, "step": 5614 }, { "epoch": 0.09760294807836048, "grad_norm": 1.9332182240860631, "learning_rate": 9.880696237150008e-07, "loss": 0.7, "step": 5615 }, { "epoch": 0.09762033061586331, "grad_norm": 1.7717239002640346, "learning_rate": 9.880635104396276e-07, "loss": 0.3875, "step": 5616 }, { "epoch": 0.09763771315336613, "grad_norm": 2.2415065683289455, "learning_rate": 9.88057395617317e-07, "loss": 0.5423, "step": 5617 }, { "epoch": 0.09765509569086896, "grad_norm": 1.2326465817522207, "learning_rate": 9.88051279248089e-07, "loss": 0.431, "step": 5618 }, { "epoch": 0.09767247822837177, "grad_norm": 1.7761903649861706, "learning_rate": 9.880451613319625e-07, "loss": 0.4724, "step": 5619 }, { "epoch": 0.0976898607658746, "grad_norm": 2.5046258311024427, "learning_rate": 9.88039041868957e-07, "loss": 0.4869, "step": 5620 }, { "epoch": 0.09770724330337743, "grad_norm": 1.8046952950075155, "learning_rate": 9.880329208590923e-07, "loss": 0.2941, "step": 5621 }, { "epoch": 0.09772462584088025, "grad_norm": 6.781872180340771, "learning_rate": 9.88026798302387e-07, "loss": 0.5861, "step": 5622 }, { "epoch": 0.09774200837838308, "grad_norm": 1.7161364275589426, "learning_rate": 9.88020674198861e-07, "loss": 0.378, "step": 5623 }, { "epoch": 0.0977593909158859, "grad_norm": 2.1304636375283854, "learning_rate": 9.880145485485338e-07, "loss": 0.5434, "step": 5624 }, { "epoch": 0.09777677345338873, "grad_norm": 2.1782972407130656, "learning_rate": 9.880084213514245e-07, "loss": 0.259, "step": 5625 }, { "epoch": 0.09779415599089156, "grad_norm": 1.931548621892503, "learning_rate": 9.880022926075525e-07, "loss": 0.3257, "step": 5626 }, { "epoch": 0.09781153852839437, "grad_norm": 2.2002925864446325, "learning_rate": 9.879961623169379e-07, "loss": 0.6795, "step": 5627 }, { "epoch": 0.0978289210658972, "grad_norm": 2.1213600197073816, "learning_rate": 9.879900304795993e-07, "loss": 0.589, "step": 5628 }, { "epoch": 0.09784630360340002, "grad_norm": 2.148418003319961, "learning_rate": 9.879838970955566e-07, "loss": 0.5125, "step": 5629 }, { "epoch": 0.09786368614090285, "grad_norm": 2.5935846633142243, "learning_rate": 9.87977762164829e-07, "loss": 0.5743, "step": 5630 }, { "epoch": 0.09788106867840568, "grad_norm": 2.430279559707208, "learning_rate": 9.87971625687436e-07, "loss": 0.2984, "step": 5631 }, { "epoch": 0.0978984512159085, "grad_norm": 2.9468910977560383, "learning_rate": 9.879654876633972e-07, "loss": 0.2026, "step": 5632 }, { "epoch": 0.09791583375341133, "grad_norm": 2.3037105651177208, "learning_rate": 9.87959348092732e-07, "loss": 0.3298, "step": 5633 }, { "epoch": 0.09793321629091414, "grad_norm": 1.1356188072378708, "learning_rate": 9.879532069754596e-07, "loss": 0.5051, "step": 5634 }, { "epoch": 0.09795059882841697, "grad_norm": 2.7764035358551826, "learning_rate": 9.879470643116e-07, "loss": 0.4647, "step": 5635 }, { "epoch": 0.09796798136591979, "grad_norm": 2.8957570887862345, "learning_rate": 9.87940920101172e-07, "loss": 0.3019, "step": 5636 }, { "epoch": 0.09798536390342262, "grad_norm": 1.2176817836305676, "learning_rate": 9.87934774344196e-07, "loss": 0.3095, "step": 5637 }, { "epoch": 0.09800274644092545, "grad_norm": 2.147053907814951, "learning_rate": 9.879286270406903e-07, "loss": 0.4899, "step": 5638 }, { "epoch": 0.09802012897842827, "grad_norm": 1.6135798411259428, "learning_rate": 9.879224781906753e-07, "loss": 0.4805, "step": 5639 }, { "epoch": 0.0980375115159311, "grad_norm": 1.8364868750942331, "learning_rate": 9.8791632779417e-07, "loss": 0.3522, "step": 5640 }, { "epoch": 0.09805489405343391, "grad_norm": 1.9339358981856, "learning_rate": 9.879101758511943e-07, "loss": 0.2699, "step": 5641 }, { "epoch": 0.09807227659093674, "grad_norm": 2.5187457664593955, "learning_rate": 9.879040223617672e-07, "loss": 0.5509, "step": 5642 }, { "epoch": 0.09808965912843957, "grad_norm": 1.6765582060109574, "learning_rate": 9.878978673259087e-07, "loss": 0.2994, "step": 5643 }, { "epoch": 0.09810704166594239, "grad_norm": 1.7969889690210694, "learning_rate": 9.87891710743638e-07, "loss": 0.4885, "step": 5644 }, { "epoch": 0.09812442420344522, "grad_norm": 1.6998615992949466, "learning_rate": 9.878855526149747e-07, "loss": 0.491, "step": 5645 }, { "epoch": 0.09814180674094804, "grad_norm": 2.0427738716454615, "learning_rate": 9.878793929399384e-07, "loss": 0.4647, "step": 5646 }, { "epoch": 0.09815918927845087, "grad_norm": 2.0875536275635476, "learning_rate": 9.878732317185483e-07, "loss": 0.4718, "step": 5647 }, { "epoch": 0.0981765718159537, "grad_norm": 1.8481689929846679, "learning_rate": 9.878670689508242e-07, "loss": 0.3048, "step": 5648 }, { "epoch": 0.09819395435345651, "grad_norm": 1.359141629215939, "learning_rate": 9.878609046367858e-07, "loss": 0.2528, "step": 5649 }, { "epoch": 0.09821133689095934, "grad_norm": 1.2500063787239795, "learning_rate": 9.878547387764525e-07, "loss": 0.3179, "step": 5650 }, { "epoch": 0.09822871942846216, "grad_norm": 2.121897531476297, "learning_rate": 9.878485713698434e-07, "loss": 0.583, "step": 5651 }, { "epoch": 0.09824610196596499, "grad_norm": 1.2738268982155514, "learning_rate": 9.878424024169786e-07, "loss": 0.6082, "step": 5652 }, { "epoch": 0.09826348450346782, "grad_norm": 2.0650640092622723, "learning_rate": 9.878362319178776e-07, "loss": 0.4556, "step": 5653 }, { "epoch": 0.09828086704097064, "grad_norm": 1.208914710067028, "learning_rate": 9.878300598725597e-07, "loss": 0.2649, "step": 5654 }, { "epoch": 0.09829824957847347, "grad_norm": 2.197125887751894, "learning_rate": 9.878238862810448e-07, "loss": 0.3423, "step": 5655 }, { "epoch": 0.09831563211597628, "grad_norm": 1.628198703289991, "learning_rate": 9.878177111433521e-07, "loss": 0.2918, "step": 5656 }, { "epoch": 0.09833301465347911, "grad_norm": 1.718204693290799, "learning_rate": 9.878115344595014e-07, "loss": 0.4223, "step": 5657 }, { "epoch": 0.09835039719098194, "grad_norm": 6.861232511124404, "learning_rate": 9.878053562295123e-07, "loss": 0.4915, "step": 5658 }, { "epoch": 0.09836777972848476, "grad_norm": 1.4801312927075716, "learning_rate": 9.87799176453404e-07, "loss": 0.3001, "step": 5659 }, { "epoch": 0.09838516226598759, "grad_norm": 1.8916771148308353, "learning_rate": 9.877929951311967e-07, "loss": 0.4407, "step": 5660 }, { "epoch": 0.09840254480349041, "grad_norm": 1.9758935082970945, "learning_rate": 9.877868122629096e-07, "loss": 0.4017, "step": 5661 }, { "epoch": 0.09841992734099324, "grad_norm": 2.208071128081729, "learning_rate": 9.877806278485623e-07, "loss": 0.6508, "step": 5662 }, { "epoch": 0.09843730987849607, "grad_norm": 3.2785258625770863, "learning_rate": 9.877744418881744e-07, "loss": 0.7265, "step": 5663 }, { "epoch": 0.09845469241599888, "grad_norm": 2.272637743218663, "learning_rate": 9.877682543817657e-07, "loss": 0.7631, "step": 5664 }, { "epoch": 0.09847207495350171, "grad_norm": 3.2997851208783584, "learning_rate": 9.877620653293559e-07, "loss": 0.3121, "step": 5665 }, { "epoch": 0.09848945749100453, "grad_norm": 1.8631103726580318, "learning_rate": 9.877558747309641e-07, "loss": 0.4455, "step": 5666 }, { "epoch": 0.09850684002850736, "grad_norm": 2.019619035817133, "learning_rate": 9.877496825866104e-07, "loss": 0.4197, "step": 5667 }, { "epoch": 0.09852422256601019, "grad_norm": 1.940195649896124, "learning_rate": 9.877434888963143e-07, "loss": 0.5129, "step": 5668 }, { "epoch": 0.09854160510351301, "grad_norm": 2.666048516888008, "learning_rate": 9.877372936600953e-07, "loss": 0.599, "step": 5669 }, { "epoch": 0.09855898764101584, "grad_norm": 2.01167903907804, "learning_rate": 9.87731096877973e-07, "loss": 0.372, "step": 5670 }, { "epoch": 0.09857637017851865, "grad_norm": 2.393425695780223, "learning_rate": 9.877248985499673e-07, "loss": 0.468, "step": 5671 }, { "epoch": 0.09859375271602148, "grad_norm": 2.060759561640175, "learning_rate": 9.877186986760976e-07, "loss": 0.2738, "step": 5672 }, { "epoch": 0.09861113525352432, "grad_norm": 1.4356737294260096, "learning_rate": 9.87712497256384e-07, "loss": 0.2746, "step": 5673 }, { "epoch": 0.09862851779102713, "grad_norm": 2.1888942879916473, "learning_rate": 9.877062942908453e-07, "loss": 0.398, "step": 5674 }, { "epoch": 0.09864590032852996, "grad_norm": 1.9497309356701265, "learning_rate": 9.87700089779502e-07, "loss": 0.654, "step": 5675 }, { "epoch": 0.09866328286603278, "grad_norm": 2.3222826447611737, "learning_rate": 9.876938837223737e-07, "loss": 0.7463, "step": 5676 }, { "epoch": 0.09868066540353561, "grad_norm": 2.1638475751950916, "learning_rate": 9.876876761194794e-07, "loss": 0.4105, "step": 5677 }, { "epoch": 0.09869804794103844, "grad_norm": 2.6407396600481143, "learning_rate": 9.876814669708393e-07, "loss": 0.4014, "step": 5678 }, { "epoch": 0.09871543047854126, "grad_norm": 1.438505684942831, "learning_rate": 9.87675256276473e-07, "loss": 0.4195, "step": 5679 }, { "epoch": 0.09873281301604409, "grad_norm": 1.4375040162021544, "learning_rate": 9.876690440364002e-07, "loss": 0.511, "step": 5680 }, { "epoch": 0.0987501955535469, "grad_norm": 2.326321132125064, "learning_rate": 9.876628302506404e-07, "loss": 0.3908, "step": 5681 }, { "epoch": 0.09876757809104973, "grad_norm": 3.84577171643775, "learning_rate": 9.876566149192135e-07, "loss": 0.3952, "step": 5682 }, { "epoch": 0.09878496062855256, "grad_norm": 1.978627230292896, "learning_rate": 9.876503980421394e-07, "loss": 0.3976, "step": 5683 }, { "epoch": 0.09880234316605538, "grad_norm": 2.6634482819707084, "learning_rate": 9.876441796194372e-07, "loss": 0.4588, "step": 5684 }, { "epoch": 0.09881972570355821, "grad_norm": 2.8686265557909483, "learning_rate": 9.87637959651127e-07, "loss": 0.5723, "step": 5685 }, { "epoch": 0.09883710824106103, "grad_norm": 2.8055690964023077, "learning_rate": 9.876317381372289e-07, "loss": 0.3587, "step": 5686 }, { "epoch": 0.09885449077856386, "grad_norm": 1.841684783623033, "learning_rate": 9.876255150777616e-07, "loss": 0.4497, "step": 5687 }, { "epoch": 0.09887187331606669, "grad_norm": 1.305851515371122, "learning_rate": 9.876192904727458e-07, "loss": 0.5441, "step": 5688 }, { "epoch": 0.0988892558535695, "grad_norm": 3.022452796329813, "learning_rate": 9.876130643222008e-07, "loss": 0.6271, "step": 5689 }, { "epoch": 0.09890663839107233, "grad_norm": 1.9700614263210152, "learning_rate": 9.876068366261461e-07, "loss": 0.3801, "step": 5690 }, { "epoch": 0.09892402092857515, "grad_norm": 2.368494990926001, "learning_rate": 9.87600607384602e-07, "loss": 0.4847, "step": 5691 }, { "epoch": 0.09894140346607798, "grad_norm": 1.8466515690976515, "learning_rate": 9.87594376597588e-07, "loss": 0.6459, "step": 5692 }, { "epoch": 0.09895878600358081, "grad_norm": 1.8961603786962187, "learning_rate": 9.875881442651235e-07, "loss": 0.4702, "step": 5693 }, { "epoch": 0.09897616854108363, "grad_norm": 4.453777256640991, "learning_rate": 9.875819103872286e-07, "loss": 0.6617, "step": 5694 }, { "epoch": 0.09899355107858646, "grad_norm": 2.1135734620756628, "learning_rate": 9.875756749639232e-07, "loss": 0.4912, "step": 5695 }, { "epoch": 0.09901093361608927, "grad_norm": 2.4098171300647544, "learning_rate": 9.87569437995227e-07, "loss": 0.2842, "step": 5696 }, { "epoch": 0.0990283161535921, "grad_norm": 1.9899599250119404, "learning_rate": 9.875631994811593e-07, "loss": 0.1927, "step": 5697 }, { "epoch": 0.09904569869109493, "grad_norm": 5.975925870680416, "learning_rate": 9.875569594217406e-07, "loss": 0.7295, "step": 5698 }, { "epoch": 0.09906308122859775, "grad_norm": 3.191363840208349, "learning_rate": 9.875507178169899e-07, "loss": 0.5506, "step": 5699 }, { "epoch": 0.09908046376610058, "grad_norm": 1.7723050469446686, "learning_rate": 9.875444746669277e-07, "loss": 0.3124, "step": 5700 }, { "epoch": 0.0990978463036034, "grad_norm": 2.6942987796056945, "learning_rate": 9.875382299715734e-07, "loss": 0.8106, "step": 5701 }, { "epoch": 0.09911522884110623, "grad_norm": 1.7519324505021643, "learning_rate": 9.875319837309468e-07, "loss": 0.4023, "step": 5702 }, { "epoch": 0.09913261137860906, "grad_norm": 2.1603137037802695, "learning_rate": 9.875257359450678e-07, "loss": 0.5998, "step": 5703 }, { "epoch": 0.09914999391611187, "grad_norm": 2.2876169954514727, "learning_rate": 9.875194866139562e-07, "loss": 0.5233, "step": 5704 }, { "epoch": 0.0991673764536147, "grad_norm": 1.9337569262452083, "learning_rate": 9.875132357376318e-07, "loss": 0.365, "step": 5705 }, { "epoch": 0.09918475899111752, "grad_norm": 1.8812135402972217, "learning_rate": 9.875069833161143e-07, "loss": 0.2603, "step": 5706 }, { "epoch": 0.09920214152862035, "grad_norm": 1.603765604573614, "learning_rate": 9.875007293494238e-07, "loss": 0.3881, "step": 5707 }, { "epoch": 0.09921952406612317, "grad_norm": 2.4177103158934723, "learning_rate": 9.874944738375797e-07, "loss": 0.2474, "step": 5708 }, { "epoch": 0.099236906603626, "grad_norm": 2.67065652537525, "learning_rate": 9.874882167806022e-07, "loss": 0.4062, "step": 5709 }, { "epoch": 0.09925428914112883, "grad_norm": 3.123875905042689, "learning_rate": 9.874819581785108e-07, "loss": 0.4973, "step": 5710 }, { "epoch": 0.09927167167863164, "grad_norm": 3.530971474410667, "learning_rate": 9.874756980313256e-07, "loss": 0.3165, "step": 5711 }, { "epoch": 0.09928905421613447, "grad_norm": 2.128049463029029, "learning_rate": 9.874694363390666e-07, "loss": 0.4278, "step": 5712 }, { "epoch": 0.09930643675363729, "grad_norm": 1.426884747305421, "learning_rate": 9.87463173101753e-07, "loss": 0.5726, "step": 5713 }, { "epoch": 0.09932381929114012, "grad_norm": 2.0754544413576954, "learning_rate": 9.874569083194054e-07, "loss": 0.3022, "step": 5714 }, { "epoch": 0.09934120182864295, "grad_norm": 2.5694486168509, "learning_rate": 9.874506419920431e-07, "loss": 0.3084, "step": 5715 }, { "epoch": 0.09935858436614577, "grad_norm": 2.2456336072831036, "learning_rate": 9.874443741196865e-07, "loss": 0.411, "step": 5716 }, { "epoch": 0.0993759669036486, "grad_norm": 3.0587046249830876, "learning_rate": 9.874381047023549e-07, "loss": 0.4635, "step": 5717 }, { "epoch": 0.09939334944115141, "grad_norm": 1.3430525460357094, "learning_rate": 9.874318337400684e-07, "loss": 0.2487, "step": 5718 }, { "epoch": 0.09941073197865424, "grad_norm": 2.1631815773153966, "learning_rate": 9.87425561232847e-07, "loss": 0.3668, "step": 5719 }, { "epoch": 0.09942811451615707, "grad_norm": 3.317856314074978, "learning_rate": 9.874192871807103e-07, "loss": 0.5764, "step": 5720 }, { "epoch": 0.09944549705365989, "grad_norm": 1.5175881990422382, "learning_rate": 9.874130115836785e-07, "loss": 0.3729, "step": 5721 }, { "epoch": 0.09946287959116272, "grad_norm": 1.7689735436626766, "learning_rate": 9.874067344417714e-07, "loss": 0.4678, "step": 5722 }, { "epoch": 0.09948026212866554, "grad_norm": 3.03284755968611, "learning_rate": 9.874004557550087e-07, "loss": 0.6816, "step": 5723 }, { "epoch": 0.09949764466616837, "grad_norm": 1.1971358476825882, "learning_rate": 9.873941755234105e-07, "loss": 0.3752, "step": 5724 }, { "epoch": 0.0995150272036712, "grad_norm": 1.5763761022976277, "learning_rate": 9.873878937469968e-07, "loss": 0.2359, "step": 5725 }, { "epoch": 0.09953240974117401, "grad_norm": 1.8929865306022153, "learning_rate": 9.873816104257872e-07, "loss": 0.3981, "step": 5726 }, { "epoch": 0.09954979227867684, "grad_norm": 1.8596592530302478, "learning_rate": 9.873753255598018e-07, "loss": 0.267, "step": 5727 }, { "epoch": 0.09956717481617966, "grad_norm": 1.7933117636476166, "learning_rate": 9.873690391490605e-07, "loss": 0.4115, "step": 5728 }, { "epoch": 0.09958455735368249, "grad_norm": 1.2852025265641542, "learning_rate": 9.873627511935834e-07, "loss": 0.721, "step": 5729 }, { "epoch": 0.09960193989118532, "grad_norm": 1.6726419505652022, "learning_rate": 9.8735646169339e-07, "loss": 0.3453, "step": 5730 }, { "epoch": 0.09961932242868814, "grad_norm": 1.4620499124811577, "learning_rate": 9.873501706485006e-07, "loss": 0.3803, "step": 5731 }, { "epoch": 0.09963670496619097, "grad_norm": 1.5016814507273477, "learning_rate": 9.87343878058935e-07, "loss": 0.4773, "step": 5732 }, { "epoch": 0.09965408750369378, "grad_norm": 1.4407082070392505, "learning_rate": 9.873375839247134e-07, "loss": 0.3827, "step": 5733 }, { "epoch": 0.09967147004119661, "grad_norm": 1.8465387289100805, "learning_rate": 9.873312882458554e-07, "loss": 0.4102, "step": 5734 }, { "epoch": 0.09968885257869944, "grad_norm": 2.6974507856617276, "learning_rate": 9.87324991022381e-07, "loss": 0.426, "step": 5735 }, { "epoch": 0.09970623511620226, "grad_norm": 1.7763243910343756, "learning_rate": 9.8731869225431e-07, "loss": 0.9516, "step": 5736 }, { "epoch": 0.09972361765370509, "grad_norm": 2.327166076698033, "learning_rate": 9.873123919416628e-07, "loss": 0.6431, "step": 5737 }, { "epoch": 0.09974100019120791, "grad_norm": 1.574294096757837, "learning_rate": 9.873060900844595e-07, "loss": 0.5164, "step": 5738 }, { "epoch": 0.09975838272871074, "grad_norm": 2.1245433058620025, "learning_rate": 9.872997866827193e-07, "loss": 0.3795, "step": 5739 }, { "epoch": 0.09977576526621357, "grad_norm": 2.9158353858707806, "learning_rate": 9.872934817364628e-07, "loss": 0.5061, "step": 5740 }, { "epoch": 0.09979314780371638, "grad_norm": 2.0565357855315307, "learning_rate": 9.872871752457099e-07, "loss": 0.5388, "step": 5741 }, { "epoch": 0.09981053034121921, "grad_norm": 1.8813717278354, "learning_rate": 9.872808672104803e-07, "loss": 0.336, "step": 5742 }, { "epoch": 0.09982791287872203, "grad_norm": 1.7727377336674142, "learning_rate": 9.872745576307943e-07, "loss": 0.5348, "step": 5743 }, { "epoch": 0.09984529541622486, "grad_norm": 1.7328973850975542, "learning_rate": 9.872682465066717e-07, "loss": 0.3298, "step": 5744 }, { "epoch": 0.09986267795372769, "grad_norm": 2.1257330309802, "learning_rate": 9.872619338381327e-07, "loss": 0.3835, "step": 5745 }, { "epoch": 0.09988006049123051, "grad_norm": 2.070218973146777, "learning_rate": 9.87255619625197e-07, "loss": 0.3965, "step": 5746 }, { "epoch": 0.09989744302873334, "grad_norm": 1.720167396175472, "learning_rate": 9.87249303867885e-07, "loss": 0.7699, "step": 5747 }, { "epoch": 0.09991482556623615, "grad_norm": 1.3388977921389764, "learning_rate": 9.872429865662167e-07, "loss": 0.2809, "step": 5748 }, { "epoch": 0.09993220810373898, "grad_norm": 2.8309292499817302, "learning_rate": 9.872366677202118e-07, "loss": 0.5252, "step": 5749 }, { "epoch": 0.09994959064124181, "grad_norm": 2.2993172867084604, "learning_rate": 9.872303473298902e-07, "loss": 0.3602, "step": 5750 }, { "epoch": 0.09996697317874463, "grad_norm": 2.084942513699482, "learning_rate": 9.872240253952726e-07, "loss": 0.5138, "step": 5751 }, { "epoch": 0.09998435571624746, "grad_norm": 3.5067499192523117, "learning_rate": 9.872177019163785e-07, "loss": 0.3744, "step": 5752 }, { "epoch": 0.10000173825375028, "grad_norm": 2.6059090051611964, "learning_rate": 9.87211376893228e-07, "loss": 0.551, "step": 5753 }, { "epoch": 0.10001912079125311, "grad_norm": 1.5527967773978173, "learning_rate": 9.872050503258412e-07, "loss": 0.2419, "step": 5754 }, { "epoch": 0.10003650332875594, "grad_norm": 2.8977920149148995, "learning_rate": 9.871987222142386e-07, "loss": 0.314, "step": 5755 }, { "epoch": 0.10005388586625875, "grad_norm": 1.6104810521851645, "learning_rate": 9.871923925584395e-07, "loss": 0.4676, "step": 5756 }, { "epoch": 0.10007126840376158, "grad_norm": 1.6792174649372456, "learning_rate": 9.871860613584643e-07, "loss": 0.5053, "step": 5757 }, { "epoch": 0.1000886509412644, "grad_norm": 1.4670848886491286, "learning_rate": 9.871797286143333e-07, "loss": 0.3205, "step": 5758 }, { "epoch": 0.10010603347876723, "grad_norm": 1.6560353642644288, "learning_rate": 9.871733943260663e-07, "loss": 0.4808, "step": 5759 }, { "epoch": 0.10012341601627006, "grad_norm": 1.3526819459667385, "learning_rate": 9.871670584936834e-07, "loss": 0.4281, "step": 5760 }, { "epoch": 0.10014079855377288, "grad_norm": 1.7625143988443064, "learning_rate": 9.871607211172046e-07, "loss": 0.3506, "step": 5761 }, { "epoch": 0.10015818109127571, "grad_norm": 2.896184640740395, "learning_rate": 9.8715438219665e-07, "loss": 0.6615, "step": 5762 }, { "epoch": 0.10017556362877852, "grad_norm": 1.833874012273016, "learning_rate": 9.871480417320401e-07, "loss": 0.5921, "step": 5763 }, { "epoch": 0.10019294616628135, "grad_norm": 1.66446199807072, "learning_rate": 9.871416997233945e-07, "loss": 0.8505, "step": 5764 }, { "epoch": 0.10021032870378419, "grad_norm": 2.7217862923086904, "learning_rate": 9.871353561707337e-07, "loss": 0.6141, "step": 5765 }, { "epoch": 0.100227711241287, "grad_norm": 1.7370800143925909, "learning_rate": 9.871290110740774e-07, "loss": 0.4699, "step": 5766 }, { "epoch": 0.10024509377878983, "grad_norm": 4.62566691150114, "learning_rate": 9.87122664433446e-07, "loss": 0.4664, "step": 5767 }, { "epoch": 0.10026247631629265, "grad_norm": 1.9111787649864254, "learning_rate": 9.87116316248859e-07, "loss": 0.3443, "step": 5768 }, { "epoch": 0.10027985885379548, "grad_norm": 1.5324186057846183, "learning_rate": 9.871099665203378e-07, "loss": 0.3411, "step": 5769 }, { "epoch": 0.10029724139129831, "grad_norm": 1.6786317917895686, "learning_rate": 9.871036152479013e-07, "loss": 0.3343, "step": 5770 }, { "epoch": 0.10031462392880112, "grad_norm": 1.941114485992121, "learning_rate": 9.870972624315703e-07, "loss": 0.4746, "step": 5771 }, { "epoch": 0.10033200646630396, "grad_norm": 1.440658649788417, "learning_rate": 9.870909080713647e-07, "loss": 0.3882, "step": 5772 }, { "epoch": 0.10034938900380677, "grad_norm": 2.6270105435343116, "learning_rate": 9.870845521673046e-07, "loss": 0.5579, "step": 5773 }, { "epoch": 0.1003667715413096, "grad_norm": 2.810367309433075, "learning_rate": 9.870781947194102e-07, "loss": 0.2542, "step": 5774 }, { "epoch": 0.10038415407881242, "grad_norm": 1.3968091680579262, "learning_rate": 9.87071835727702e-07, "loss": 0.2855, "step": 5775 }, { "epoch": 0.10040153661631525, "grad_norm": 2.338701715991098, "learning_rate": 9.870654751921995e-07, "loss": 0.4529, "step": 5776 }, { "epoch": 0.10041891915381808, "grad_norm": 1.9721115662137234, "learning_rate": 9.870591131129231e-07, "loss": 0.462, "step": 5777 }, { "epoch": 0.1004363016913209, "grad_norm": 1.4016343627227192, "learning_rate": 9.870527494898932e-07, "loss": 0.4377, "step": 5778 }, { "epoch": 0.10045368422882373, "grad_norm": 1.5988207636060396, "learning_rate": 9.870463843231299e-07, "loss": 0.3674, "step": 5779 }, { "epoch": 0.10047106676632654, "grad_norm": 1.8960700221946767, "learning_rate": 9.870400176126533e-07, "loss": 0.3489, "step": 5780 }, { "epoch": 0.10048844930382937, "grad_norm": 1.7942063769028012, "learning_rate": 9.870336493584834e-07, "loss": 0.3048, "step": 5781 }, { "epoch": 0.1005058318413322, "grad_norm": 2.0531535975092554, "learning_rate": 9.870272795606405e-07, "loss": 0.3432, "step": 5782 }, { "epoch": 0.10052321437883502, "grad_norm": 1.439141671164567, "learning_rate": 9.87020908219145e-07, "loss": 0.3196, "step": 5783 }, { "epoch": 0.10054059691633785, "grad_norm": 1.84035318700069, "learning_rate": 9.87014535334017e-07, "loss": 0.6659, "step": 5784 }, { "epoch": 0.10055797945384067, "grad_norm": 1.8579856554472391, "learning_rate": 9.870081609052764e-07, "loss": 0.3007, "step": 5785 }, { "epoch": 0.1005753619913435, "grad_norm": 2.067036133423838, "learning_rate": 9.870017849329438e-07, "loss": 0.5023, "step": 5786 }, { "epoch": 0.10059274452884633, "grad_norm": 2.1475826814874734, "learning_rate": 9.869954074170394e-07, "loss": 0.3855, "step": 5787 }, { "epoch": 0.10061012706634914, "grad_norm": 1.1393305371075169, "learning_rate": 9.86989028357583e-07, "loss": 0.4016, "step": 5788 }, { "epoch": 0.10062750960385197, "grad_norm": 2.186188806231033, "learning_rate": 9.869826477545953e-07, "loss": 0.513, "step": 5789 }, { "epoch": 0.10064489214135479, "grad_norm": 1.3135490446966471, "learning_rate": 9.869762656080962e-07, "loss": 0.4486, "step": 5790 }, { "epoch": 0.10066227467885762, "grad_norm": 4.40008157146954, "learning_rate": 9.86969881918106e-07, "loss": 0.5886, "step": 5791 }, { "epoch": 0.10067965721636045, "grad_norm": 1.640725628306425, "learning_rate": 9.86963496684645e-07, "loss": 0.2464, "step": 5792 }, { "epoch": 0.10069703975386327, "grad_norm": 1.9255596704418247, "learning_rate": 9.869571099077334e-07, "loss": 0.3109, "step": 5793 }, { "epoch": 0.1007144222913661, "grad_norm": 1.943906982084482, "learning_rate": 9.869507215873916e-07, "loss": 0.2969, "step": 5794 }, { "epoch": 0.10073180482886891, "grad_norm": 1.4144109370088815, "learning_rate": 9.869443317236395e-07, "loss": 0.2125, "step": 5795 }, { "epoch": 0.10074918736637174, "grad_norm": 1.3577886624174367, "learning_rate": 9.869379403164977e-07, "loss": 0.5261, "step": 5796 }, { "epoch": 0.10076656990387457, "grad_norm": 1.920236576863739, "learning_rate": 9.869315473659862e-07, "loss": 0.4459, "step": 5797 }, { "epoch": 0.10078395244137739, "grad_norm": 1.9779509709113154, "learning_rate": 9.869251528721255e-07, "loss": 0.4331, "step": 5798 }, { "epoch": 0.10080133497888022, "grad_norm": 2.2649172967135347, "learning_rate": 9.869187568349355e-07, "loss": 0.4504, "step": 5799 }, { "epoch": 0.10081871751638304, "grad_norm": 1.390728675057183, "learning_rate": 9.86912359254437e-07, "loss": 0.6358, "step": 5800 }, { "epoch": 0.10083610005388587, "grad_norm": 1.934754847522984, "learning_rate": 9.869059601306498e-07, "loss": 0.4175, "step": 5801 }, { "epoch": 0.1008534825913887, "grad_norm": 3.009299789442148, "learning_rate": 9.868995594635945e-07, "loss": 0.6427, "step": 5802 }, { "epoch": 0.10087086512889151, "grad_norm": 2.3351320789603283, "learning_rate": 9.86893157253291e-07, "loss": 0.4589, "step": 5803 }, { "epoch": 0.10088824766639434, "grad_norm": 1.4925830412510055, "learning_rate": 9.868867534997602e-07, "loss": 0.3908, "step": 5804 }, { "epoch": 0.10090563020389716, "grad_norm": 2.215920437619675, "learning_rate": 9.868803482030218e-07, "loss": 0.2638, "step": 5805 }, { "epoch": 0.10092301274139999, "grad_norm": 1.5869790149700833, "learning_rate": 9.868739413630964e-07, "loss": 0.3995, "step": 5806 }, { "epoch": 0.10094039527890282, "grad_norm": 6.2298405358114985, "learning_rate": 9.868675329800044e-07, "loss": 0.4181, "step": 5807 }, { "epoch": 0.10095777781640564, "grad_norm": 2.2625036394942946, "learning_rate": 9.868611230537658e-07, "loss": 0.4052, "step": 5808 }, { "epoch": 0.10097516035390847, "grad_norm": 2.712093976033994, "learning_rate": 9.868547115844012e-07, "loss": 0.4201, "step": 5809 }, { "epoch": 0.10099254289141128, "grad_norm": 2.5044052552520233, "learning_rate": 9.868482985719307e-07, "loss": 0.4531, "step": 5810 }, { "epoch": 0.10100992542891411, "grad_norm": 1.728539771491382, "learning_rate": 9.868418840163748e-07, "loss": 0.2233, "step": 5811 }, { "epoch": 0.10102730796641694, "grad_norm": 1.890041888517343, "learning_rate": 9.868354679177537e-07, "loss": 0.6472, "step": 5812 }, { "epoch": 0.10104469050391976, "grad_norm": 2.0173901423578107, "learning_rate": 9.868290502760878e-07, "loss": 0.4536, "step": 5813 }, { "epoch": 0.10106207304142259, "grad_norm": 1.769725900579979, "learning_rate": 9.868226310913977e-07, "loss": 0.5619, "step": 5814 }, { "epoch": 0.1010794555789254, "grad_norm": 2.0389480698147713, "learning_rate": 9.868162103637032e-07, "loss": 0.3981, "step": 5815 }, { "epoch": 0.10109683811642824, "grad_norm": 2.0009759248529133, "learning_rate": 9.868097880930251e-07, "loss": 0.4765, "step": 5816 }, { "epoch": 0.10111422065393107, "grad_norm": 1.9285698688220292, "learning_rate": 9.868033642793834e-07, "loss": 0.4303, "step": 5817 }, { "epoch": 0.10113160319143388, "grad_norm": 1.8694266963033652, "learning_rate": 9.867969389227987e-07, "loss": 0.2357, "step": 5818 }, { "epoch": 0.10114898572893671, "grad_norm": 2.2515774489975025, "learning_rate": 9.867905120232916e-07, "loss": 0.3632, "step": 5819 }, { "epoch": 0.10116636826643953, "grad_norm": 1.2147574646887445, "learning_rate": 9.867840835808818e-07, "loss": 0.2587, "step": 5820 }, { "epoch": 0.10118375080394236, "grad_norm": 4.531981750422201, "learning_rate": 9.867776535955903e-07, "loss": 0.6694, "step": 5821 }, { "epoch": 0.10120113334144519, "grad_norm": 3.6983577481007686, "learning_rate": 9.867712220674372e-07, "loss": 0.4145, "step": 5822 }, { "epoch": 0.101218515878948, "grad_norm": 1.7013527382899418, "learning_rate": 9.86764788996443e-07, "loss": 0.466, "step": 5823 }, { "epoch": 0.10123589841645084, "grad_norm": 2.80689791199165, "learning_rate": 9.867583543826278e-07, "loss": 0.3489, "step": 5824 }, { "epoch": 0.10125328095395365, "grad_norm": 1.7370681909954846, "learning_rate": 9.867519182260124e-07, "loss": 0.2268, "step": 5825 }, { "epoch": 0.10127066349145648, "grad_norm": 2.383435119111947, "learning_rate": 9.867454805266171e-07, "loss": 0.3902, "step": 5826 }, { "epoch": 0.10128804602895931, "grad_norm": 1.27827087696165, "learning_rate": 9.867390412844621e-07, "loss": 0.426, "step": 5827 }, { "epoch": 0.10130542856646213, "grad_norm": 1.2585911701693657, "learning_rate": 9.867326004995679e-07, "loss": 0.3707, "step": 5828 }, { "epoch": 0.10132281110396496, "grad_norm": 1.5777720253551042, "learning_rate": 9.86726158171955e-07, "loss": 0.6314, "step": 5829 }, { "epoch": 0.10134019364146778, "grad_norm": 1.3598887321414292, "learning_rate": 9.867197143016436e-07, "loss": 0.3377, "step": 5830 }, { "epoch": 0.10135757617897061, "grad_norm": 1.8872291531852898, "learning_rate": 9.867132688886545e-07, "loss": 0.4957, "step": 5831 }, { "epoch": 0.10137495871647344, "grad_norm": 2.740318112652669, "learning_rate": 9.867068219330077e-07, "loss": 0.3078, "step": 5832 }, { "epoch": 0.10139234125397625, "grad_norm": 1.961583913294702, "learning_rate": 9.867003734347242e-07, "loss": 0.456, "step": 5833 }, { "epoch": 0.10140972379147908, "grad_norm": 2.3151609197867287, "learning_rate": 9.866939233938238e-07, "loss": 0.494, "step": 5834 }, { "epoch": 0.1014271063289819, "grad_norm": 1.4255488654594823, "learning_rate": 9.866874718103274e-07, "loss": 0.4441, "step": 5835 }, { "epoch": 0.10144448886648473, "grad_norm": 1.8632644561205411, "learning_rate": 9.86681018684255e-07, "loss": 0.3258, "step": 5836 }, { "epoch": 0.10146187140398756, "grad_norm": 1.6813261425732882, "learning_rate": 9.866745640156276e-07, "loss": 0.3382, "step": 5837 }, { "epoch": 0.10147925394149038, "grad_norm": 1.6192051787477262, "learning_rate": 9.866681078044651e-07, "loss": 0.357, "step": 5838 }, { "epoch": 0.10149663647899321, "grad_norm": 1.886105235088399, "learning_rate": 9.866616500507885e-07, "loss": 0.6825, "step": 5839 }, { "epoch": 0.10151401901649602, "grad_norm": 1.7774979695450268, "learning_rate": 9.866551907546181e-07, "loss": 0.3729, "step": 5840 }, { "epoch": 0.10153140155399885, "grad_norm": 1.4446159967612522, "learning_rate": 9.866487299159743e-07, "loss": 0.6135, "step": 5841 }, { "epoch": 0.10154878409150168, "grad_norm": 1.6222696438526456, "learning_rate": 9.866422675348773e-07, "loss": 0.2345, "step": 5842 }, { "epoch": 0.1015661666290045, "grad_norm": 0.9279438481812551, "learning_rate": 9.866358036113481e-07, "loss": 0.4725, "step": 5843 }, { "epoch": 0.10158354916650733, "grad_norm": 2.5776245229343413, "learning_rate": 9.866293381454067e-07, "loss": 0.7007, "step": 5844 }, { "epoch": 0.10160093170401015, "grad_norm": 1.695228589467519, "learning_rate": 9.866228711370741e-07, "loss": 0.4898, "step": 5845 }, { "epoch": 0.10161831424151298, "grad_norm": 2.1203709031389137, "learning_rate": 9.866164025863704e-07, "loss": 0.4557, "step": 5846 }, { "epoch": 0.1016356967790158, "grad_norm": 4.045135461149896, "learning_rate": 9.866099324933165e-07, "loss": 0.6919, "step": 5847 }, { "epoch": 0.10165307931651862, "grad_norm": 2.089943142248727, "learning_rate": 9.866034608579322e-07, "loss": 0.3771, "step": 5848 }, { "epoch": 0.10167046185402145, "grad_norm": 1.823695992379714, "learning_rate": 9.865969876802386e-07, "loss": 0.5018, "step": 5849 }, { "epoch": 0.10168784439152427, "grad_norm": 1.3083131153809298, "learning_rate": 9.865905129602562e-07, "loss": 0.327, "step": 5850 }, { "epoch": 0.1017052269290271, "grad_norm": 3.1088895165234183, "learning_rate": 9.865840366980055e-07, "loss": 0.4891, "step": 5851 }, { "epoch": 0.10172260946652992, "grad_norm": 1.5653090109799765, "learning_rate": 9.865775588935066e-07, "loss": 0.2141, "step": 5852 }, { "epoch": 0.10173999200403275, "grad_norm": 1.9644358902547765, "learning_rate": 9.865710795467805e-07, "loss": 0.4726, "step": 5853 }, { "epoch": 0.10175737454153558, "grad_norm": 2.622603841309259, "learning_rate": 9.865645986578476e-07, "loss": 0.4521, "step": 5854 }, { "epoch": 0.1017747570790384, "grad_norm": 2.247646142919774, "learning_rate": 9.865581162267284e-07, "loss": 0.5239, "step": 5855 }, { "epoch": 0.10179213961654122, "grad_norm": 1.710419731358896, "learning_rate": 9.865516322534436e-07, "loss": 0.3133, "step": 5856 }, { "epoch": 0.10180952215404404, "grad_norm": 1.7719093952370286, "learning_rate": 9.865451467380135e-07, "loss": 0.3545, "step": 5857 }, { "epoch": 0.10182690469154687, "grad_norm": 1.8514100079756453, "learning_rate": 9.865386596804588e-07, "loss": 0.4698, "step": 5858 }, { "epoch": 0.1018442872290497, "grad_norm": 1.7354533628856978, "learning_rate": 9.865321710808e-07, "loss": 0.6564, "step": 5859 }, { "epoch": 0.10186166976655252, "grad_norm": 1.5317184165643511, "learning_rate": 9.865256809390578e-07, "loss": 0.2132, "step": 5860 }, { "epoch": 0.10187905230405535, "grad_norm": 2.3388907665359504, "learning_rate": 9.865191892552526e-07, "loss": 0.4633, "step": 5861 }, { "epoch": 0.10189643484155816, "grad_norm": 2.725259200957965, "learning_rate": 9.865126960294053e-07, "loss": 0.6787, "step": 5862 }, { "epoch": 0.101913817379061, "grad_norm": 1.888342332586607, "learning_rate": 9.86506201261536e-07, "loss": 0.5232, "step": 5863 }, { "epoch": 0.10193119991656382, "grad_norm": 2.0092626075134024, "learning_rate": 9.864997049516656e-07, "loss": 0.3361, "step": 5864 }, { "epoch": 0.10194858245406664, "grad_norm": 2.482010418410957, "learning_rate": 9.864932070998146e-07, "loss": 0.2991, "step": 5865 }, { "epoch": 0.10196596499156947, "grad_norm": 1.551303403686184, "learning_rate": 9.864867077060036e-07, "loss": 0.3873, "step": 5866 }, { "epoch": 0.10198334752907229, "grad_norm": 3.1163586623274204, "learning_rate": 9.864802067702532e-07, "loss": 0.7104, "step": 5867 }, { "epoch": 0.10200073006657512, "grad_norm": 1.6811096334160425, "learning_rate": 9.864737042925841e-07, "loss": 0.2497, "step": 5868 }, { "epoch": 0.10201811260407795, "grad_norm": 1.3445560851059737, "learning_rate": 9.864672002730167e-07, "loss": 0.3278, "step": 5869 }, { "epoch": 0.10203549514158076, "grad_norm": 1.1576250137262476, "learning_rate": 9.86460694711572e-07, "loss": 0.4055, "step": 5870 }, { "epoch": 0.1020528776790836, "grad_norm": 3.405714183619746, "learning_rate": 9.8645418760827e-07, "loss": 0.6794, "step": 5871 }, { "epoch": 0.10207026021658641, "grad_norm": 1.895537081154976, "learning_rate": 9.864476789631319e-07, "loss": 0.4621, "step": 5872 }, { "epoch": 0.10208764275408924, "grad_norm": 1.4891494734701203, "learning_rate": 9.86441168776178e-07, "loss": 0.3135, "step": 5873 }, { "epoch": 0.10210502529159207, "grad_norm": 2.159196378968704, "learning_rate": 9.864346570474292e-07, "loss": 0.617, "step": 5874 }, { "epoch": 0.10212240782909489, "grad_norm": 1.9655951813292187, "learning_rate": 9.86428143776906e-07, "loss": 0.6082, "step": 5875 }, { "epoch": 0.10213979036659772, "grad_norm": 1.8755927824492649, "learning_rate": 9.864216289646287e-07, "loss": 0.4203, "step": 5876 }, { "epoch": 0.10215717290410053, "grad_norm": 2.79610768677388, "learning_rate": 9.864151126106186e-07, "loss": 0.6144, "step": 5877 }, { "epoch": 0.10217455544160337, "grad_norm": 1.8371028341177507, "learning_rate": 9.864085947148958e-07, "loss": 0.4298, "step": 5878 }, { "epoch": 0.1021919379791062, "grad_norm": 2.3145770937955303, "learning_rate": 9.864020752774816e-07, "loss": 0.5452, "step": 5879 }, { "epoch": 0.10220932051660901, "grad_norm": 1.8610819355519244, "learning_rate": 9.86395554298396e-07, "loss": 0.4991, "step": 5880 }, { "epoch": 0.10222670305411184, "grad_norm": 1.7334295949484713, "learning_rate": 9.863890317776598e-07, "loss": 0.4544, "step": 5881 }, { "epoch": 0.10224408559161466, "grad_norm": 2.4349990775876744, "learning_rate": 9.863825077152939e-07, "loss": 0.5352, "step": 5882 }, { "epoch": 0.10226146812911749, "grad_norm": 1.8589261925870697, "learning_rate": 9.863759821113189e-07, "loss": 0.5424, "step": 5883 }, { "epoch": 0.10227885066662032, "grad_norm": 1.8199292164456788, "learning_rate": 9.863694549657554e-07, "loss": 0.5119, "step": 5884 }, { "epoch": 0.10229623320412314, "grad_norm": 2.292686585752718, "learning_rate": 9.863629262786243e-07, "loss": 0.3742, "step": 5885 }, { "epoch": 0.10231361574162597, "grad_norm": 1.557003660098271, "learning_rate": 9.86356396049946e-07, "loss": 0.2571, "step": 5886 }, { "epoch": 0.10233099827912878, "grad_norm": 2.238058779489741, "learning_rate": 9.863498642797413e-07, "loss": 0.4583, "step": 5887 }, { "epoch": 0.10234838081663161, "grad_norm": 3.443503118435579, "learning_rate": 9.863433309680312e-07, "loss": 0.3162, "step": 5888 }, { "epoch": 0.10236576335413444, "grad_norm": 4.126766524897697, "learning_rate": 9.86336796114836e-07, "loss": 0.2871, "step": 5889 }, { "epoch": 0.10238314589163726, "grad_norm": 1.9641143594691028, "learning_rate": 9.863302597201764e-07, "loss": 0.4316, "step": 5890 }, { "epoch": 0.10240052842914009, "grad_norm": 3.2626568981568513, "learning_rate": 9.863237217840734e-07, "loss": 0.309, "step": 5891 }, { "epoch": 0.1024179109666429, "grad_norm": 1.5977800232467811, "learning_rate": 9.863171823065474e-07, "loss": 0.2082, "step": 5892 }, { "epoch": 0.10243529350414574, "grad_norm": 2.637620605642858, "learning_rate": 9.863106412876197e-07, "loss": 0.3248, "step": 5893 }, { "epoch": 0.10245267604164857, "grad_norm": 1.6794603575884737, "learning_rate": 9.863040987273104e-07, "loss": 0.1521, "step": 5894 }, { "epoch": 0.10247005857915138, "grad_norm": 2.0039390511594624, "learning_rate": 9.862975546256408e-07, "loss": 0.3548, "step": 5895 }, { "epoch": 0.10248744111665421, "grad_norm": 1.767984635688886, "learning_rate": 9.86291008982631e-07, "loss": 0.4008, "step": 5896 }, { "epoch": 0.10250482365415703, "grad_norm": 2.091153288958127, "learning_rate": 9.862844617983021e-07, "loss": 0.4051, "step": 5897 }, { "epoch": 0.10252220619165986, "grad_norm": 2.387127851809051, "learning_rate": 9.86277913072675e-07, "loss": 0.5728, "step": 5898 }, { "epoch": 0.10253958872916269, "grad_norm": 2.394239456556757, "learning_rate": 9.862713628057704e-07, "loss": 0.6477, "step": 5899 }, { "epoch": 0.1025569712666655, "grad_norm": 2.7109583579556, "learning_rate": 9.862648109976087e-07, "loss": 0.4969, "step": 5900 }, { "epoch": 0.10257435380416834, "grad_norm": 1.5788730958138133, "learning_rate": 9.86258257648211e-07, "loss": 0.3429, "step": 5901 }, { "epoch": 0.10259173634167115, "grad_norm": 1.8452952940352214, "learning_rate": 9.86251702757598e-07, "loss": 0.502, "step": 5902 }, { "epoch": 0.10260911887917398, "grad_norm": 2.6160148465586786, "learning_rate": 9.862451463257905e-07, "loss": 0.4093, "step": 5903 }, { "epoch": 0.10262650141667681, "grad_norm": 2.165219490526409, "learning_rate": 9.862385883528093e-07, "loss": 0.861, "step": 5904 }, { "epoch": 0.10264388395417963, "grad_norm": 2.155164563659568, "learning_rate": 9.86232028838675e-07, "loss": 0.3103, "step": 5905 }, { "epoch": 0.10266126649168246, "grad_norm": 1.1131910113934662, "learning_rate": 9.862254677834086e-07, "loss": 0.238, "step": 5906 }, { "epoch": 0.10267864902918528, "grad_norm": 1.8324380397011377, "learning_rate": 9.862189051870309e-07, "loss": 0.6056, "step": 5907 }, { "epoch": 0.1026960315666881, "grad_norm": 1.7511615886666336, "learning_rate": 9.862123410495624e-07, "loss": 0.4382, "step": 5908 }, { "epoch": 0.10271341410419094, "grad_norm": 1.9851643316391465, "learning_rate": 9.862057753710242e-07, "loss": 0.4188, "step": 5909 }, { "epoch": 0.10273079664169375, "grad_norm": 1.4791539568974745, "learning_rate": 9.861992081514373e-07, "loss": 0.2493, "step": 5910 }, { "epoch": 0.10274817917919658, "grad_norm": 2.9694650667741187, "learning_rate": 9.86192639390822e-07, "loss": 0.31, "step": 5911 }, { "epoch": 0.1027655617166994, "grad_norm": 1.9731564219504314, "learning_rate": 9.861860690891995e-07, "loss": 0.2784, "step": 5912 }, { "epoch": 0.10278294425420223, "grad_norm": 2.8037338956477265, "learning_rate": 9.861794972465902e-07, "loss": 0.5987, "step": 5913 }, { "epoch": 0.10280032679170505, "grad_norm": 1.2713852846400229, "learning_rate": 9.861729238630155e-07, "loss": 0.5147, "step": 5914 }, { "epoch": 0.10281770932920788, "grad_norm": 5.42162622428537, "learning_rate": 9.861663489384959e-07, "loss": 0.4233, "step": 5915 }, { "epoch": 0.1028350918667107, "grad_norm": 2.1909471257499886, "learning_rate": 9.861597724730524e-07, "loss": 0.5895, "step": 5916 }, { "epoch": 0.10285247440421352, "grad_norm": 1.7675197315847035, "learning_rate": 9.861531944667056e-07, "loss": 0.4824, "step": 5917 }, { "epoch": 0.10286985694171635, "grad_norm": 2.2486879184749586, "learning_rate": 9.861466149194766e-07, "loss": 0.6355, "step": 5918 }, { "epoch": 0.10288723947921917, "grad_norm": 2.0236178560830687, "learning_rate": 9.86140033831386e-07, "loss": 0.5345, "step": 5919 }, { "epoch": 0.102904622016722, "grad_norm": 2.3663420138049616, "learning_rate": 9.86133451202455e-07, "loss": 0.3951, "step": 5920 }, { "epoch": 0.10292200455422483, "grad_norm": 2.201613472000389, "learning_rate": 9.86126867032704e-07, "loss": 0.4144, "step": 5921 }, { "epoch": 0.10293938709172765, "grad_norm": 2.4333125687682453, "learning_rate": 9.861202813221543e-07, "loss": 0.3801, "step": 5922 }, { "epoch": 0.10295676962923048, "grad_norm": 1.530251606027181, "learning_rate": 9.861136940708269e-07, "loss": 0.3965, "step": 5923 }, { "epoch": 0.1029741521667333, "grad_norm": 1.4608016428883515, "learning_rate": 9.86107105278742e-07, "loss": 0.1714, "step": 5924 }, { "epoch": 0.10299153470423612, "grad_norm": 2.0187047720375344, "learning_rate": 9.86100514945921e-07, "loss": 0.3153, "step": 5925 }, { "epoch": 0.10300891724173895, "grad_norm": 1.3980308400219885, "learning_rate": 9.860939230723845e-07, "loss": 0.5157, "step": 5926 }, { "epoch": 0.10302629977924177, "grad_norm": 1.9255576806040977, "learning_rate": 9.860873296581538e-07, "loss": 0.3865, "step": 5927 }, { "epoch": 0.1030436823167446, "grad_norm": 1.830134324131932, "learning_rate": 9.860807347032493e-07, "loss": 0.2269, "step": 5928 }, { "epoch": 0.10306106485424742, "grad_norm": 1.6031372086826068, "learning_rate": 9.860741382076924e-07, "loss": 0.4415, "step": 5929 }, { "epoch": 0.10307844739175025, "grad_norm": 2.1606822130338355, "learning_rate": 9.860675401715038e-07, "loss": 0.4751, "step": 5930 }, { "epoch": 0.10309582992925308, "grad_norm": 1.2973037169076527, "learning_rate": 9.860609405947043e-07, "loss": 0.4274, "step": 5931 }, { "epoch": 0.1031132124667559, "grad_norm": 2.3230027438283964, "learning_rate": 9.86054339477315e-07, "loss": 0.5289, "step": 5932 }, { "epoch": 0.10313059500425872, "grad_norm": 2.6087036952225087, "learning_rate": 9.860477368193565e-07, "loss": 0.5585, "step": 5933 }, { "epoch": 0.10314797754176154, "grad_norm": 1.7623829266393154, "learning_rate": 9.8604113262085e-07, "loss": 0.2781, "step": 5934 }, { "epoch": 0.10316536007926437, "grad_norm": 1.7866557554055433, "learning_rate": 9.860345268818164e-07, "loss": 0.2582, "step": 5935 }, { "epoch": 0.1031827426167672, "grad_norm": 2.4826399806375132, "learning_rate": 9.860279196022765e-07, "loss": 0.5996, "step": 5936 }, { "epoch": 0.10320012515427002, "grad_norm": 1.8296845507700104, "learning_rate": 9.860213107822514e-07, "loss": 0.6155, "step": 5937 }, { "epoch": 0.10321750769177285, "grad_norm": 2.1036034372298014, "learning_rate": 9.860147004217622e-07, "loss": 0.5009, "step": 5938 }, { "epoch": 0.10323489022927566, "grad_norm": 1.9842619127251109, "learning_rate": 9.860080885208294e-07, "loss": 0.5776, "step": 5939 }, { "epoch": 0.1032522727667785, "grad_norm": 2.390224236744337, "learning_rate": 9.860014750794745e-07, "loss": 0.3106, "step": 5940 }, { "epoch": 0.10326965530428132, "grad_norm": 1.4050127030182518, "learning_rate": 9.859948600977181e-07, "loss": 0.435, "step": 5941 }, { "epoch": 0.10328703784178414, "grad_norm": 1.9770133433969077, "learning_rate": 9.85988243575581e-07, "loss": 0.6889, "step": 5942 }, { "epoch": 0.10330442037928697, "grad_norm": 2.3294415912330555, "learning_rate": 9.859816255130848e-07, "loss": 0.4313, "step": 5943 }, { "epoch": 0.10332180291678979, "grad_norm": 1.2628010300307728, "learning_rate": 9.859750059102498e-07, "loss": 0.347, "step": 5944 }, { "epoch": 0.10333918545429262, "grad_norm": 2.633441282835781, "learning_rate": 9.859683847670972e-07, "loss": 0.7533, "step": 5945 }, { "epoch": 0.10335656799179545, "grad_norm": 2.451127868578092, "learning_rate": 9.859617620836483e-07, "loss": 0.4272, "step": 5946 }, { "epoch": 0.10337395052929826, "grad_norm": 1.7763812913150927, "learning_rate": 9.859551378599237e-07, "loss": 0.6187, "step": 5947 }, { "epoch": 0.1033913330668011, "grad_norm": 1.9786184865595924, "learning_rate": 9.859485120959447e-07, "loss": 0.69, "step": 5948 }, { "epoch": 0.10340871560430391, "grad_norm": 1.7311299810905574, "learning_rate": 9.85941884791732e-07, "loss": 0.3534, "step": 5949 }, { "epoch": 0.10342609814180674, "grad_norm": 1.6164420206067158, "learning_rate": 9.859352559473069e-07, "loss": 0.722, "step": 5950 }, { "epoch": 0.10344348067930957, "grad_norm": 1.5428829486529057, "learning_rate": 9.8592862556269e-07, "loss": 0.6165, "step": 5951 }, { "epoch": 0.10346086321681239, "grad_norm": 1.700255221372161, "learning_rate": 9.859219936379026e-07, "loss": 0.2443, "step": 5952 }, { "epoch": 0.10347824575431522, "grad_norm": 1.905051271515615, "learning_rate": 9.85915360172966e-07, "loss": 0.8115, "step": 5953 }, { "epoch": 0.10349562829181803, "grad_norm": 1.8956664425157415, "learning_rate": 9.859087251679007e-07, "loss": 0.1853, "step": 5954 }, { "epoch": 0.10351301082932086, "grad_norm": 2.013615591955473, "learning_rate": 9.85902088622728e-07, "loss": 0.5608, "step": 5955 }, { "epoch": 0.1035303933668237, "grad_norm": 1.5847716941038632, "learning_rate": 9.858954505374688e-07, "loss": 0.4678, "step": 5956 }, { "epoch": 0.10354777590432651, "grad_norm": 1.656031085104375, "learning_rate": 9.858888109121444e-07, "loss": 0.7531, "step": 5957 }, { "epoch": 0.10356515844182934, "grad_norm": 1.8014693566769777, "learning_rate": 9.858821697467753e-07, "loss": 0.3041, "step": 5958 }, { "epoch": 0.10358254097933216, "grad_norm": 2.376498020343828, "learning_rate": 9.858755270413833e-07, "loss": 0.4376, "step": 5959 }, { "epoch": 0.10359992351683499, "grad_norm": 1.5762304841272354, "learning_rate": 9.85868882795989e-07, "loss": 0.5266, "step": 5960 }, { "epoch": 0.10361730605433782, "grad_norm": 2.097489636931808, "learning_rate": 9.858622370106134e-07, "loss": 0.3875, "step": 5961 }, { "epoch": 0.10363468859184063, "grad_norm": 2.1120270789637905, "learning_rate": 9.85855589685278e-07, "loss": 0.3476, "step": 5962 }, { "epoch": 0.10365207112934346, "grad_norm": 2.156543761700904, "learning_rate": 9.858489408200033e-07, "loss": 0.5129, "step": 5963 }, { "epoch": 0.10366945366684628, "grad_norm": 2.2465159836344983, "learning_rate": 9.858422904148105e-07, "loss": 0.3442, "step": 5964 }, { "epoch": 0.10368683620434911, "grad_norm": 1.6951259621495143, "learning_rate": 9.858356384697211e-07, "loss": 0.7442, "step": 5965 }, { "epoch": 0.10370421874185194, "grad_norm": 2.640128990337589, "learning_rate": 9.858289849847557e-07, "loss": 0.1671, "step": 5966 }, { "epoch": 0.10372160127935476, "grad_norm": 3.9924461433549414, "learning_rate": 9.858223299599358e-07, "loss": 0.4846, "step": 5967 }, { "epoch": 0.10373898381685759, "grad_norm": 2.2909284262161576, "learning_rate": 9.85815673395282e-07, "loss": 0.5018, "step": 5968 }, { "epoch": 0.1037563663543604, "grad_norm": 2.000090476979322, "learning_rate": 9.85809015290816e-07, "loss": 0.537, "step": 5969 }, { "epoch": 0.10377374889186324, "grad_norm": 1.2996002191968266, "learning_rate": 9.858023556465584e-07, "loss": 0.5169, "step": 5970 }, { "epoch": 0.10379113142936607, "grad_norm": 2.1447897534715192, "learning_rate": 9.857956944625306e-07, "loss": 0.5262, "step": 5971 }, { "epoch": 0.10380851396686888, "grad_norm": 1.9031077664626452, "learning_rate": 9.857890317387534e-07, "loss": 0.5511, "step": 5972 }, { "epoch": 0.10382589650437171, "grad_norm": 2.0325892617313226, "learning_rate": 9.857823674752482e-07, "loss": 0.5139, "step": 5973 }, { "epoch": 0.10384327904187453, "grad_norm": 2.585831699615011, "learning_rate": 9.857757016720362e-07, "loss": 0.6393, "step": 5974 }, { "epoch": 0.10386066157937736, "grad_norm": 2.598706641258632, "learning_rate": 9.857690343291382e-07, "loss": 0.4525, "step": 5975 }, { "epoch": 0.10387804411688019, "grad_norm": 4.0407970165681, "learning_rate": 9.857623654465756e-07, "loss": 0.5362, "step": 5976 }, { "epoch": 0.103895426654383, "grad_norm": 1.785688712674861, "learning_rate": 9.857556950243694e-07, "loss": 0.3516, "step": 5977 }, { "epoch": 0.10391280919188584, "grad_norm": 1.7025698671267275, "learning_rate": 9.857490230625409e-07, "loss": 0.3025, "step": 5978 }, { "epoch": 0.10393019172938865, "grad_norm": 2.863762876902158, "learning_rate": 9.85742349561111e-07, "loss": 0.5294, "step": 5979 }, { "epoch": 0.10394757426689148, "grad_norm": 1.5801248678546387, "learning_rate": 9.857356745201008e-07, "loss": 0.5114, "step": 5980 }, { "epoch": 0.10396495680439431, "grad_norm": 3.090014671688134, "learning_rate": 9.857289979395321e-07, "loss": 0.5384, "step": 5981 }, { "epoch": 0.10398233934189713, "grad_norm": 1.667262339895895, "learning_rate": 9.857223198194253e-07, "loss": 0.3995, "step": 5982 }, { "epoch": 0.10399972187939996, "grad_norm": 1.7355635816825805, "learning_rate": 9.85715640159802e-07, "loss": 0.395, "step": 5983 }, { "epoch": 0.10401710441690278, "grad_norm": 1.0719639620244388, "learning_rate": 9.85708958960683e-07, "loss": 0.3825, "step": 5984 }, { "epoch": 0.1040344869544056, "grad_norm": 4.223653783848228, "learning_rate": 9.857022762220899e-07, "loss": 0.4039, "step": 5985 }, { "epoch": 0.10405186949190842, "grad_norm": 2.0376216280705584, "learning_rate": 9.856955919440438e-07, "loss": 0.4567, "step": 5986 }, { "epoch": 0.10406925202941125, "grad_norm": 1.5973834443514179, "learning_rate": 9.856889061265657e-07, "loss": 0.4218, "step": 5987 }, { "epoch": 0.10408663456691408, "grad_norm": 1.5046892099910654, "learning_rate": 9.856822187696766e-07, "loss": 0.4892, "step": 5988 }, { "epoch": 0.1041040171044169, "grad_norm": 1.8832030548592873, "learning_rate": 9.856755298733983e-07, "loss": 0.3046, "step": 5989 }, { "epoch": 0.10412139964191973, "grad_norm": 1.9612194953335662, "learning_rate": 9.856688394377516e-07, "loss": 0.6449, "step": 5990 }, { "epoch": 0.10413878217942255, "grad_norm": 1.884029706359075, "learning_rate": 9.856621474627578e-07, "loss": 0.5196, "step": 5991 }, { "epoch": 0.10415616471692538, "grad_norm": 1.5326827367596196, "learning_rate": 9.85655453948438e-07, "loss": 0.6515, "step": 5992 }, { "epoch": 0.1041735472544282, "grad_norm": 1.3563992377141056, "learning_rate": 9.856487588948136e-07, "loss": 0.3893, "step": 5993 }, { "epoch": 0.10419092979193102, "grad_norm": 1.6682781034677814, "learning_rate": 9.856420623019057e-07, "loss": 0.6743, "step": 5994 }, { "epoch": 0.10420831232943385, "grad_norm": 1.5279230287567107, "learning_rate": 9.856353641697356e-07, "loss": 0.7021, "step": 5995 }, { "epoch": 0.10422569486693667, "grad_norm": 1.8309737757360927, "learning_rate": 9.856286644983242e-07, "loss": 0.2695, "step": 5996 }, { "epoch": 0.1042430774044395, "grad_norm": 2.1980069247975207, "learning_rate": 9.856219632876931e-07, "loss": 0.3836, "step": 5997 }, { "epoch": 0.10426045994194233, "grad_norm": 3.045961046271809, "learning_rate": 9.856152605378636e-07, "loss": 0.5087, "step": 5998 }, { "epoch": 0.10427784247944515, "grad_norm": 1.6350658640912668, "learning_rate": 9.85608556248857e-07, "loss": 0.5254, "step": 5999 }, { "epoch": 0.10429522501694798, "grad_norm": 1.2894648417526862, "learning_rate": 9.85601850420694e-07, "loss": 0.5065, "step": 6000 }, { "epoch": 0.10431260755445079, "grad_norm": 1.5019256639349015, "learning_rate": 9.855951430533963e-07, "loss": 0.205, "step": 6001 }, { "epoch": 0.10432999009195362, "grad_norm": 2.654319963945383, "learning_rate": 9.855884341469851e-07, "loss": 0.5763, "step": 6002 }, { "epoch": 0.10434737262945645, "grad_norm": 4.1524812951194985, "learning_rate": 9.855817237014816e-07, "loss": 0.4527, "step": 6003 }, { "epoch": 0.10436475516695927, "grad_norm": 2.263410914633694, "learning_rate": 9.855750117169071e-07, "loss": 0.6444, "step": 6004 }, { "epoch": 0.1043821377044621, "grad_norm": 1.5456256255514245, "learning_rate": 9.855682981932829e-07, "loss": 1.009, "step": 6005 }, { "epoch": 0.10439952024196492, "grad_norm": 1.6221834174511403, "learning_rate": 9.8556158313063e-07, "loss": 0.3989, "step": 6006 }, { "epoch": 0.10441690277946775, "grad_norm": 3.498507791521765, "learning_rate": 9.855548665289703e-07, "loss": 0.7304, "step": 6007 }, { "epoch": 0.10443428531697058, "grad_norm": 2.169529491836024, "learning_rate": 9.855481483883245e-07, "loss": 0.5365, "step": 6008 }, { "epoch": 0.10445166785447339, "grad_norm": 2.3241005862659363, "learning_rate": 9.855414287087142e-07, "loss": 0.5611, "step": 6009 }, { "epoch": 0.10446905039197622, "grad_norm": 3.4430615612353868, "learning_rate": 9.855347074901605e-07, "loss": 0.6341, "step": 6010 }, { "epoch": 0.10448643292947904, "grad_norm": 2.158009436680994, "learning_rate": 9.85527984732685e-07, "loss": 0.5422, "step": 6011 }, { "epoch": 0.10450381546698187, "grad_norm": 2.2580226876306417, "learning_rate": 9.855212604363088e-07, "loss": 0.4058, "step": 6012 }, { "epoch": 0.1045211980044847, "grad_norm": 1.743283456880706, "learning_rate": 9.85514534601053e-07, "loss": 0.3478, "step": 6013 }, { "epoch": 0.10453858054198752, "grad_norm": 1.8667749209181028, "learning_rate": 9.855078072269394e-07, "loss": 0.2595, "step": 6014 }, { "epoch": 0.10455596307949035, "grad_norm": 2.941117177692182, "learning_rate": 9.855010783139888e-07, "loss": 0.5503, "step": 6015 }, { "epoch": 0.10457334561699316, "grad_norm": 2.1017509297039823, "learning_rate": 9.85494347862223e-07, "loss": 0.5747, "step": 6016 }, { "epoch": 0.104590728154496, "grad_norm": 2.9482378647238585, "learning_rate": 9.854876158716632e-07, "loss": 0.7052, "step": 6017 }, { "epoch": 0.10460811069199882, "grad_norm": 2.8546454656922866, "learning_rate": 9.854808823423307e-07, "loss": 0.5925, "step": 6018 }, { "epoch": 0.10462549322950164, "grad_norm": 2.3707769332277513, "learning_rate": 9.854741472742465e-07, "loss": 0.3143, "step": 6019 }, { "epoch": 0.10464287576700447, "grad_norm": 2.243197355541521, "learning_rate": 9.854674106674326e-07, "loss": 0.302, "step": 6020 }, { "epoch": 0.10466025830450729, "grad_norm": 1.1910726079200342, "learning_rate": 9.854606725219097e-07, "loss": 0.2779, "step": 6021 }, { "epoch": 0.10467764084201012, "grad_norm": 3.304625936573679, "learning_rate": 9.854539328376998e-07, "loss": 0.7723, "step": 6022 }, { "epoch": 0.10469502337951295, "grad_norm": 1.6341581965732672, "learning_rate": 9.854471916148236e-07, "loss": 0.4475, "step": 6023 }, { "epoch": 0.10471240591701576, "grad_norm": 2.3972232048128794, "learning_rate": 9.85440448853303e-07, "loss": 0.5591, "step": 6024 }, { "epoch": 0.1047297884545186, "grad_norm": 2.6115797698405436, "learning_rate": 9.854337045531593e-07, "loss": 0.4175, "step": 6025 }, { "epoch": 0.10474717099202141, "grad_norm": 2.21128904944103, "learning_rate": 9.854269587144135e-07, "loss": 0.6393, "step": 6026 }, { "epoch": 0.10476455352952424, "grad_norm": 1.3545401248044282, "learning_rate": 9.854202113370872e-07, "loss": 0.694, "step": 6027 }, { "epoch": 0.10478193606702707, "grad_norm": 2.8865389009711238, "learning_rate": 9.85413462421202e-07, "loss": 0.3472, "step": 6028 }, { "epoch": 0.10479931860452989, "grad_norm": 2.228344859437504, "learning_rate": 9.85406711966779e-07, "loss": 0.4481, "step": 6029 }, { "epoch": 0.10481670114203272, "grad_norm": 2.0260073604932685, "learning_rate": 9.853999599738398e-07, "loss": 0.5481, "step": 6030 }, { "epoch": 0.10483408367953553, "grad_norm": 1.7071141905646285, "learning_rate": 9.853932064424058e-07, "loss": 0.6607, "step": 6031 }, { "epoch": 0.10485146621703836, "grad_norm": 2.2874164832666795, "learning_rate": 9.853864513724982e-07, "loss": 0.3907, "step": 6032 }, { "epoch": 0.1048688487545412, "grad_norm": 2.373267707159964, "learning_rate": 9.853796947641382e-07, "loss": 0.4702, "step": 6033 }, { "epoch": 0.10488623129204401, "grad_norm": 2.7434131348907838, "learning_rate": 9.853729366173479e-07, "loss": 0.5038, "step": 6034 }, { "epoch": 0.10490361382954684, "grad_norm": 2.3046792922218757, "learning_rate": 9.853661769321482e-07, "loss": 0.7255, "step": 6035 }, { "epoch": 0.10492099636704966, "grad_norm": 1.613262861521046, "learning_rate": 9.853594157085608e-07, "loss": 0.2923, "step": 6036 }, { "epoch": 0.10493837890455249, "grad_norm": 1.5029604976202, "learning_rate": 9.853526529466069e-07, "loss": 0.3551, "step": 6037 }, { "epoch": 0.10495576144205532, "grad_norm": 1.548912076673048, "learning_rate": 9.85345888646308e-07, "loss": 0.6202, "step": 6038 }, { "epoch": 0.10497314397955813, "grad_norm": 3.513629428793841, "learning_rate": 9.853391228076857e-07, "loss": 0.2325, "step": 6039 }, { "epoch": 0.10499052651706096, "grad_norm": 1.6708087618974734, "learning_rate": 9.853323554307612e-07, "loss": 0.5983, "step": 6040 }, { "epoch": 0.10500790905456378, "grad_norm": 2.177756095098118, "learning_rate": 9.853255865155563e-07, "loss": 0.3323, "step": 6041 }, { "epoch": 0.10502529159206661, "grad_norm": 2.6075755612544422, "learning_rate": 9.853188160620922e-07, "loss": 0.4862, "step": 6042 }, { "epoch": 0.10504267412956944, "grad_norm": 1.8966645398163842, "learning_rate": 9.853120440703901e-07, "loss": 0.3235, "step": 6043 }, { "epoch": 0.10506005666707226, "grad_norm": 2.762528611078134, "learning_rate": 9.85305270540472e-07, "loss": 0.4535, "step": 6044 }, { "epoch": 0.10507743920457509, "grad_norm": 2.78341533504984, "learning_rate": 9.852984954723592e-07, "loss": 0.4914, "step": 6045 }, { "epoch": 0.1050948217420779, "grad_norm": 1.9801405153743976, "learning_rate": 9.85291718866073e-07, "loss": 0.4558, "step": 6046 }, { "epoch": 0.10511220427958073, "grad_norm": 3.6356138264325066, "learning_rate": 9.852849407216347e-07, "loss": 0.8641, "step": 6047 }, { "epoch": 0.10512958681708356, "grad_norm": 2.2525070341579965, "learning_rate": 9.852781610390664e-07, "loss": 0.6931, "step": 6048 }, { "epoch": 0.10514696935458638, "grad_norm": 2.2230482438522907, "learning_rate": 9.852713798183892e-07, "loss": 0.6218, "step": 6049 }, { "epoch": 0.10516435189208921, "grad_norm": 1.553187209339566, "learning_rate": 9.852645970596245e-07, "loss": 0.4383, "step": 6050 }, { "epoch": 0.10518173442959203, "grad_norm": 1.7103231402620152, "learning_rate": 9.85257812762794e-07, "loss": 0.3945, "step": 6051 }, { "epoch": 0.10519911696709486, "grad_norm": 1.5558823508347432, "learning_rate": 9.852510269279192e-07, "loss": 0.4909, "step": 6052 }, { "epoch": 0.10521649950459769, "grad_norm": 1.4350294565538584, "learning_rate": 9.852442395550215e-07, "loss": 0.4124, "step": 6053 }, { "epoch": 0.1052338820421005, "grad_norm": 2.6068530071491365, "learning_rate": 9.852374506441226e-07, "loss": 0.5137, "step": 6054 }, { "epoch": 0.10525126457960333, "grad_norm": 4.005145581648664, "learning_rate": 9.852306601952438e-07, "loss": 0.5511, "step": 6055 }, { "epoch": 0.10526864711710615, "grad_norm": 2.4959112266084045, "learning_rate": 9.852238682084066e-07, "loss": 0.764, "step": 6056 }, { "epoch": 0.10528602965460898, "grad_norm": 2.2786325438991404, "learning_rate": 9.852170746836327e-07, "loss": 0.7155, "step": 6057 }, { "epoch": 0.1053034121921118, "grad_norm": 2.1593465099462783, "learning_rate": 9.852102796209436e-07, "loss": 0.4804, "step": 6058 }, { "epoch": 0.10532079472961463, "grad_norm": 2.8746685040110025, "learning_rate": 9.852034830203607e-07, "loss": 0.5309, "step": 6059 }, { "epoch": 0.10533817726711746, "grad_norm": 1.722009178743017, "learning_rate": 9.851966848819059e-07, "loss": 0.4703, "step": 6060 }, { "epoch": 0.10535555980462027, "grad_norm": 2.1364349464243553, "learning_rate": 9.851898852056e-07, "loss": 0.4655, "step": 6061 }, { "epoch": 0.1053729423421231, "grad_norm": 2.345810358873859, "learning_rate": 9.851830839914655e-07, "loss": 0.3979, "step": 6062 }, { "epoch": 0.10539032487962592, "grad_norm": 1.5596190481261851, "learning_rate": 9.851762812395233e-07, "loss": 0.5033, "step": 6063 }, { "epoch": 0.10540770741712875, "grad_norm": 1.6904731550803134, "learning_rate": 9.851694769497951e-07, "loss": 0.6074, "step": 6064 }, { "epoch": 0.10542508995463158, "grad_norm": 2.572432314573731, "learning_rate": 9.851626711223026e-07, "loss": 0.5798, "step": 6065 }, { "epoch": 0.1054424724921344, "grad_norm": 1.5619099822428184, "learning_rate": 9.851558637570675e-07, "loss": 0.5863, "step": 6066 }, { "epoch": 0.10545985502963723, "grad_norm": 2.5937260616240465, "learning_rate": 9.85149054854111e-07, "loss": 0.7139, "step": 6067 }, { "epoch": 0.10547723756714004, "grad_norm": 2.2004427914475095, "learning_rate": 9.851422444134548e-07, "loss": 0.5684, "step": 6068 }, { "epoch": 0.10549462010464287, "grad_norm": 3.53659636757808, "learning_rate": 9.851354324351207e-07, "loss": 0.3781, "step": 6069 }, { "epoch": 0.1055120026421457, "grad_norm": 1.9089308667100802, "learning_rate": 9.8512861891913e-07, "loss": 0.5227, "step": 6070 }, { "epoch": 0.10552938517964852, "grad_norm": 3.1370956668947008, "learning_rate": 9.851218038655043e-07, "loss": 0.593, "step": 6071 }, { "epoch": 0.10554676771715135, "grad_norm": 1.9031936353016434, "learning_rate": 9.851149872742656e-07, "loss": 0.6088, "step": 6072 }, { "epoch": 0.10556415025465417, "grad_norm": 1.573953869513212, "learning_rate": 9.851081691454351e-07, "loss": 0.3427, "step": 6073 }, { "epoch": 0.105581532792157, "grad_norm": 3.236237696504339, "learning_rate": 9.851013494790346e-07, "loss": 0.4884, "step": 6074 }, { "epoch": 0.10559891532965983, "grad_norm": 2.2532966427509384, "learning_rate": 9.850945282750855e-07, "loss": 0.3555, "step": 6075 }, { "epoch": 0.10561629786716265, "grad_norm": 2.716104243889372, "learning_rate": 9.850877055336097e-07, "loss": 0.4294, "step": 6076 }, { "epoch": 0.10563368040466548, "grad_norm": 2.158843573293745, "learning_rate": 9.850808812546288e-07, "loss": 0.4194, "step": 6077 }, { "epoch": 0.10565106294216829, "grad_norm": 2.1823746890511453, "learning_rate": 9.850740554381643e-07, "loss": 0.4531, "step": 6078 }, { "epoch": 0.10566844547967112, "grad_norm": 1.7492067376880522, "learning_rate": 9.850672280842377e-07, "loss": 0.6554, "step": 6079 }, { "epoch": 0.10568582801717395, "grad_norm": 1.6368254487758327, "learning_rate": 9.85060399192871e-07, "loss": 0.4514, "step": 6080 }, { "epoch": 0.10570321055467677, "grad_norm": 2.6888167874346327, "learning_rate": 9.850535687640856e-07, "loss": 0.5065, "step": 6081 }, { "epoch": 0.1057205930921796, "grad_norm": 1.801822085540522, "learning_rate": 9.850467367979031e-07, "loss": 0.5818, "step": 6082 }, { "epoch": 0.10573797562968242, "grad_norm": 3.07440002693535, "learning_rate": 9.850399032943453e-07, "loss": 0.5491, "step": 6083 }, { "epoch": 0.10575535816718525, "grad_norm": 1.843727802376278, "learning_rate": 9.850330682534337e-07, "loss": 0.3737, "step": 6084 }, { "epoch": 0.10577274070468808, "grad_norm": 2.1440394865144357, "learning_rate": 9.850262316751904e-07, "loss": 0.4412, "step": 6085 }, { "epoch": 0.10579012324219089, "grad_norm": 1.486449863710745, "learning_rate": 9.850193935596364e-07, "loss": 0.568, "step": 6086 }, { "epoch": 0.10580750577969372, "grad_norm": 1.8105717279493716, "learning_rate": 9.85012553906794e-07, "loss": 0.5719, "step": 6087 }, { "epoch": 0.10582488831719654, "grad_norm": 1.719657905501589, "learning_rate": 9.850057127166846e-07, "loss": 0.4188, "step": 6088 }, { "epoch": 0.10584227085469937, "grad_norm": 1.977644903588072, "learning_rate": 9.849988699893297e-07, "loss": 0.4638, "step": 6089 }, { "epoch": 0.1058596533922022, "grad_norm": 2.4113749757781533, "learning_rate": 9.849920257247513e-07, "loss": 0.3468, "step": 6090 }, { "epoch": 0.10587703592970502, "grad_norm": 1.71417466023979, "learning_rate": 9.84985179922971e-07, "loss": 0.3082, "step": 6091 }, { "epoch": 0.10589441846720785, "grad_norm": 2.2045494191484996, "learning_rate": 9.849783325840102e-07, "loss": 0.5079, "step": 6092 }, { "epoch": 0.10591180100471066, "grad_norm": 2.290892817879004, "learning_rate": 9.849714837078912e-07, "loss": 0.4156, "step": 6093 }, { "epoch": 0.10592918354221349, "grad_norm": 4.461237733999085, "learning_rate": 9.849646332946351e-07, "loss": 0.4168, "step": 6094 }, { "epoch": 0.10594656607971632, "grad_norm": 2.7345559171571945, "learning_rate": 9.84957781344264e-07, "loss": 0.3723, "step": 6095 }, { "epoch": 0.10596394861721914, "grad_norm": 1.0570871136300766, "learning_rate": 9.849509278567995e-07, "loss": 0.2929, "step": 6096 }, { "epoch": 0.10598133115472197, "grad_norm": 2.487170752866539, "learning_rate": 9.849440728322633e-07, "loss": 0.5113, "step": 6097 }, { "epoch": 0.10599871369222479, "grad_norm": 1.684574354396245, "learning_rate": 9.849372162706772e-07, "loss": 0.3996, "step": 6098 }, { "epoch": 0.10601609622972762, "grad_norm": 2.0293205971802437, "learning_rate": 9.84930358172063e-07, "loss": 0.6014, "step": 6099 }, { "epoch": 0.10603347876723045, "grad_norm": 2.047074597065543, "learning_rate": 9.849234985364423e-07, "loss": 0.6159, "step": 6100 }, { "epoch": 0.10605086130473326, "grad_norm": 2.6011816398640466, "learning_rate": 9.849166373638366e-07, "loss": 0.3729, "step": 6101 }, { "epoch": 0.10606824384223609, "grad_norm": 1.4545477955187587, "learning_rate": 9.849097746542681e-07, "loss": 0.5178, "step": 6102 }, { "epoch": 0.10608562637973891, "grad_norm": 1.7164147534430367, "learning_rate": 9.849029104077583e-07, "loss": 0.3377, "step": 6103 }, { "epoch": 0.10610300891724174, "grad_norm": 2.3611198159421787, "learning_rate": 9.84896044624329e-07, "loss": 0.4309, "step": 6104 }, { "epoch": 0.10612039145474457, "grad_norm": 1.7485930327444996, "learning_rate": 9.848891773040021e-07, "loss": 0.3115, "step": 6105 }, { "epoch": 0.10613777399224739, "grad_norm": 1.8131647792875574, "learning_rate": 9.848823084467994e-07, "loss": 0.3696, "step": 6106 }, { "epoch": 0.10615515652975022, "grad_norm": 2.3950287789515663, "learning_rate": 9.848754380527421e-07, "loss": 0.5192, "step": 6107 }, { "epoch": 0.10617253906725303, "grad_norm": 2.1327412605171645, "learning_rate": 9.848685661218527e-07, "loss": 0.547, "step": 6108 }, { "epoch": 0.10618992160475586, "grad_norm": 2.9529184586727126, "learning_rate": 9.848616926541526e-07, "loss": 0.2109, "step": 6109 }, { "epoch": 0.1062073041422587, "grad_norm": 1.5269297975345748, "learning_rate": 9.848548176496637e-07, "loss": 0.3479, "step": 6110 }, { "epoch": 0.10622468667976151, "grad_norm": 1.768998841588288, "learning_rate": 9.848479411084075e-07, "loss": 0.5912, "step": 6111 }, { "epoch": 0.10624206921726434, "grad_norm": 2.2883839860192134, "learning_rate": 9.848410630304064e-07, "loss": 0.4201, "step": 6112 }, { "epoch": 0.10625945175476716, "grad_norm": 3.620070674482842, "learning_rate": 9.848341834156817e-07, "loss": 0.6588, "step": 6113 }, { "epoch": 0.10627683429226999, "grad_norm": 1.4207220443704578, "learning_rate": 9.848273022642555e-07, "loss": 0.3837, "step": 6114 }, { "epoch": 0.10629421682977282, "grad_norm": 2.059691296605213, "learning_rate": 9.848204195761492e-07, "loss": 0.4232, "step": 6115 }, { "epoch": 0.10631159936727563, "grad_norm": 1.4537666937712845, "learning_rate": 9.848135353513848e-07, "loss": 0.417, "step": 6116 }, { "epoch": 0.10632898190477846, "grad_norm": 2.6695318939392827, "learning_rate": 9.848066495899845e-07, "loss": 0.4342, "step": 6117 }, { "epoch": 0.10634636444228128, "grad_norm": 1.6345584844880554, "learning_rate": 9.847997622919698e-07, "loss": 0.4591, "step": 6118 }, { "epoch": 0.10636374697978411, "grad_norm": 1.5454921290703714, "learning_rate": 9.847928734573624e-07, "loss": 0.4599, "step": 6119 }, { "epoch": 0.10638112951728694, "grad_norm": 3.018171881670118, "learning_rate": 9.847859830861843e-07, "loss": 0.5338, "step": 6120 }, { "epoch": 0.10639851205478976, "grad_norm": 1.913461437430313, "learning_rate": 9.847790911784575e-07, "loss": 0.2979, "step": 6121 }, { "epoch": 0.10641589459229259, "grad_norm": 2.517916685124218, "learning_rate": 9.847721977342034e-07, "loss": 0.3024, "step": 6122 }, { "epoch": 0.1064332771297954, "grad_norm": 1.5303418721132667, "learning_rate": 9.847653027534443e-07, "loss": 0.3731, "step": 6123 }, { "epoch": 0.10645065966729823, "grad_norm": 2.4308698795531134, "learning_rate": 9.847584062362019e-07, "loss": 0.5589, "step": 6124 }, { "epoch": 0.10646804220480105, "grad_norm": 1.562879761473373, "learning_rate": 9.84751508182498e-07, "loss": 0.579, "step": 6125 }, { "epoch": 0.10648542474230388, "grad_norm": 2.369049400184585, "learning_rate": 9.847446085923544e-07, "loss": 0.3549, "step": 6126 }, { "epoch": 0.10650280727980671, "grad_norm": 2.340463183881052, "learning_rate": 9.847377074657931e-07, "loss": 0.3189, "step": 6127 }, { "epoch": 0.10652018981730953, "grad_norm": 2.046243819092576, "learning_rate": 9.84730804802836e-07, "loss": 0.4954, "step": 6128 }, { "epoch": 0.10653757235481236, "grad_norm": 2.019086924326502, "learning_rate": 9.847239006035048e-07, "loss": 0.5913, "step": 6129 }, { "epoch": 0.10655495489231517, "grad_norm": 1.661921313199464, "learning_rate": 9.847169948678218e-07, "loss": 0.5103, "step": 6130 }, { "epoch": 0.106572337429818, "grad_norm": 2.459441954349644, "learning_rate": 9.847100875958084e-07, "loss": 0.3663, "step": 6131 }, { "epoch": 0.10658971996732083, "grad_norm": 3.396324604059901, "learning_rate": 9.847031787874865e-07, "loss": 0.4982, "step": 6132 }, { "epoch": 0.10660710250482365, "grad_norm": 2.2840642922650005, "learning_rate": 9.846962684428783e-07, "loss": 0.7418, "step": 6133 }, { "epoch": 0.10662448504232648, "grad_norm": 3.4309632942687642, "learning_rate": 9.846893565620055e-07, "loss": 0.6308, "step": 6134 }, { "epoch": 0.1066418675798293, "grad_norm": 1.5873523332301158, "learning_rate": 9.846824431448903e-07, "loss": 0.5563, "step": 6135 }, { "epoch": 0.10665925011733213, "grad_norm": 1.7162664762758382, "learning_rate": 9.846755281915542e-07, "loss": 0.5082, "step": 6136 }, { "epoch": 0.10667663265483496, "grad_norm": 2.332192946387, "learning_rate": 9.846686117020195e-07, "loss": 0.7664, "step": 6137 }, { "epoch": 0.10669401519233777, "grad_norm": 1.8266157381402002, "learning_rate": 9.846616936763079e-07, "loss": 0.7037, "step": 6138 }, { "epoch": 0.1067113977298406, "grad_norm": 2.2003195165516765, "learning_rate": 9.846547741144413e-07, "loss": 0.4849, "step": 6139 }, { "epoch": 0.10672878026734342, "grad_norm": 1.7321194350101365, "learning_rate": 9.846478530164417e-07, "loss": 0.5839, "step": 6140 }, { "epoch": 0.10674616280484625, "grad_norm": 2.0943075085463234, "learning_rate": 9.84640930382331e-07, "loss": 0.3577, "step": 6141 }, { "epoch": 0.10676354534234908, "grad_norm": 4.084059997432889, "learning_rate": 9.846340062121311e-07, "loss": 0.9485, "step": 6142 }, { "epoch": 0.1067809278798519, "grad_norm": 2.208095794967231, "learning_rate": 9.846270805058642e-07, "loss": 0.4773, "step": 6143 }, { "epoch": 0.10679831041735473, "grad_norm": 1.357715495117179, "learning_rate": 9.846201532635522e-07, "loss": 0.3625, "step": 6144 }, { "epoch": 0.10681569295485754, "grad_norm": 1.5530187134834603, "learning_rate": 9.846132244852169e-07, "loss": 0.4238, "step": 6145 }, { "epoch": 0.10683307549236037, "grad_norm": 1.9660972359160425, "learning_rate": 9.846062941708798e-07, "loss": 0.3352, "step": 6146 }, { "epoch": 0.1068504580298632, "grad_norm": 1.807346544597156, "learning_rate": 9.84599362320564e-07, "loss": 1.3994, "step": 6147 }, { "epoch": 0.10686784056736602, "grad_norm": 3.5751945070753512, "learning_rate": 9.845924289342904e-07, "loss": 0.7243, "step": 6148 }, { "epoch": 0.10688522310486885, "grad_norm": 2.17696791529281, "learning_rate": 9.845854940120816e-07, "loss": 0.4271, "step": 6149 }, { "epoch": 0.10690260564237167, "grad_norm": 1.694345298013987, "learning_rate": 9.845785575539594e-07, "loss": 0.2455, "step": 6150 }, { "epoch": 0.1069199881798745, "grad_norm": 1.9494580633012177, "learning_rate": 9.845716195599457e-07, "loss": 0.8132, "step": 6151 }, { "epoch": 0.10693737071737733, "grad_norm": 2.1210568905261495, "learning_rate": 9.845646800300625e-07, "loss": 0.4007, "step": 6152 }, { "epoch": 0.10695475325488014, "grad_norm": 2.048523862794002, "learning_rate": 9.845577389643318e-07, "loss": 0.7922, "step": 6153 }, { "epoch": 0.10697213579238297, "grad_norm": 1.5237721465497496, "learning_rate": 9.845507963627756e-07, "loss": 0.6088, "step": 6154 }, { "epoch": 0.10698951832988579, "grad_norm": 2.2140951539889757, "learning_rate": 9.845438522254163e-07, "loss": 0.481, "step": 6155 }, { "epoch": 0.10700690086738862, "grad_norm": 1.586551282159144, "learning_rate": 9.845369065522753e-07, "loss": 0.4275, "step": 6156 }, { "epoch": 0.10702428340489145, "grad_norm": 1.599102426597152, "learning_rate": 9.84529959343375e-07, "loss": 0.3082, "step": 6157 }, { "epoch": 0.10704166594239427, "grad_norm": 1.896101679055299, "learning_rate": 9.845230105987374e-07, "loss": 0.3758, "step": 6158 }, { "epoch": 0.1070590484798971, "grad_norm": 1.395843280295946, "learning_rate": 9.84516060318384e-07, "loss": 0.6435, "step": 6159 }, { "epoch": 0.10707643101739991, "grad_norm": 2.846290933441713, "learning_rate": 9.845091085023378e-07, "loss": 0.7649, "step": 6160 }, { "epoch": 0.10709381355490274, "grad_norm": 3.440942334757635, "learning_rate": 9.8450215515062e-07, "loss": 0.5735, "step": 6161 }, { "epoch": 0.10711119609240558, "grad_norm": 1.498559950441869, "learning_rate": 9.84495200263253e-07, "loss": 0.4533, "step": 6162 }, { "epoch": 0.10712857862990839, "grad_norm": 11.133388437295107, "learning_rate": 9.844882438402589e-07, "loss": 0.5929, "step": 6163 }, { "epoch": 0.10714596116741122, "grad_norm": 2.5460807279432953, "learning_rate": 9.844812858816593e-07, "loss": 0.7957, "step": 6164 }, { "epoch": 0.10716334370491404, "grad_norm": 1.6968964773210362, "learning_rate": 9.844743263874768e-07, "loss": 0.4679, "step": 6165 }, { "epoch": 0.10718072624241687, "grad_norm": 1.7137235330602125, "learning_rate": 9.844673653577333e-07, "loss": 0.4948, "step": 6166 }, { "epoch": 0.1071981087799197, "grad_norm": 1.930012291683678, "learning_rate": 9.844604027924506e-07, "loss": 0.2869, "step": 6167 }, { "epoch": 0.10721549131742251, "grad_norm": 1.7935570513535564, "learning_rate": 9.84453438691651e-07, "loss": 0.4297, "step": 6168 }, { "epoch": 0.10723287385492535, "grad_norm": 1.815186179991341, "learning_rate": 9.844464730553567e-07, "loss": 0.2696, "step": 6169 }, { "epoch": 0.10725025639242816, "grad_norm": 2.36430890380617, "learning_rate": 9.844395058835895e-07, "loss": 0.3471, "step": 6170 }, { "epoch": 0.10726763892993099, "grad_norm": 2.0159696023831977, "learning_rate": 9.844325371763716e-07, "loss": 0.3009, "step": 6171 }, { "epoch": 0.10728502146743382, "grad_norm": 2.0640703478619544, "learning_rate": 9.844255669337251e-07, "loss": 0.4655, "step": 6172 }, { "epoch": 0.10730240400493664, "grad_norm": 1.6445669717859868, "learning_rate": 9.84418595155672e-07, "loss": 0.3435, "step": 6173 }, { "epoch": 0.10731978654243947, "grad_norm": 1.7912892495267978, "learning_rate": 9.844116218422347e-07, "loss": 0.4424, "step": 6174 }, { "epoch": 0.10733716907994229, "grad_norm": 1.247279529012995, "learning_rate": 9.84404646993435e-07, "loss": 0.3647, "step": 6175 }, { "epoch": 0.10735455161744512, "grad_norm": 1.482514952036329, "learning_rate": 9.84397670609295e-07, "loss": 0.6176, "step": 6176 }, { "epoch": 0.10737193415494795, "grad_norm": 1.6681965414112427, "learning_rate": 9.84390692689837e-07, "loss": 0.2541, "step": 6177 }, { "epoch": 0.10738931669245076, "grad_norm": 2.2115528021710147, "learning_rate": 9.843837132350827e-07, "loss": 0.3452, "step": 6178 }, { "epoch": 0.10740669922995359, "grad_norm": 1.661955375453721, "learning_rate": 9.843767322450548e-07, "loss": 0.4156, "step": 6179 }, { "epoch": 0.10742408176745641, "grad_norm": 1.0900562425247087, "learning_rate": 9.843697497197753e-07, "loss": 0.5916, "step": 6180 }, { "epoch": 0.10744146430495924, "grad_norm": 2.4664214391118726, "learning_rate": 9.843627656592658e-07, "loss": 0.4618, "step": 6181 }, { "epoch": 0.10745884684246207, "grad_norm": 3.2832291647247884, "learning_rate": 9.843557800635492e-07, "loss": 0.3268, "step": 6182 }, { "epoch": 0.10747622937996489, "grad_norm": 1.499026061086975, "learning_rate": 9.84348792932647e-07, "loss": 0.4621, "step": 6183 }, { "epoch": 0.10749361191746772, "grad_norm": 2.217082607689911, "learning_rate": 9.843418042665819e-07, "loss": 0.5881, "step": 6184 }, { "epoch": 0.10751099445497053, "grad_norm": 2.0063830700062395, "learning_rate": 9.843348140653756e-07, "loss": 0.8253, "step": 6185 }, { "epoch": 0.10752837699247336, "grad_norm": 1.7041215054623364, "learning_rate": 9.843278223290505e-07, "loss": 0.4173, "step": 6186 }, { "epoch": 0.10754575952997619, "grad_norm": 1.884035243727398, "learning_rate": 9.843208290576286e-07, "loss": 0.4219, "step": 6187 }, { "epoch": 0.10756314206747901, "grad_norm": 1.8802130061754407, "learning_rate": 9.843138342511322e-07, "loss": 0.5186, "step": 6188 }, { "epoch": 0.10758052460498184, "grad_norm": 1.4331694585434611, "learning_rate": 9.843068379095833e-07, "loss": 0.5402, "step": 6189 }, { "epoch": 0.10759790714248466, "grad_norm": 2.8055610520524943, "learning_rate": 9.842998400330043e-07, "loss": 0.521, "step": 6190 }, { "epoch": 0.10761528967998749, "grad_norm": 1.522374118134195, "learning_rate": 9.842928406214172e-07, "loss": 0.4045, "step": 6191 }, { "epoch": 0.10763267221749032, "grad_norm": 2.9821465678674777, "learning_rate": 9.842858396748445e-07, "loss": 0.7289, "step": 6192 }, { "epoch": 0.10765005475499313, "grad_norm": 2.0023353263672767, "learning_rate": 9.84278837193308e-07, "loss": 0.2994, "step": 6193 }, { "epoch": 0.10766743729249596, "grad_norm": 2.528312229941842, "learning_rate": 9.8427183317683e-07, "loss": 0.6891, "step": 6194 }, { "epoch": 0.10768481982999878, "grad_norm": 1.664181133810566, "learning_rate": 9.842648276254327e-07, "loss": 0.3066, "step": 6195 }, { "epoch": 0.10770220236750161, "grad_norm": 2.03443146251441, "learning_rate": 9.842578205391385e-07, "loss": 0.5259, "step": 6196 }, { "epoch": 0.10771958490500443, "grad_norm": 1.3747440303783613, "learning_rate": 9.842508119179692e-07, "loss": 0.4248, "step": 6197 }, { "epoch": 0.10773696744250726, "grad_norm": 2.497751518177311, "learning_rate": 9.842438017619476e-07, "loss": 0.4227, "step": 6198 }, { "epoch": 0.10775434998001009, "grad_norm": 2.091196108779116, "learning_rate": 9.842367900710956e-07, "loss": 0.4156, "step": 6199 }, { "epoch": 0.1077717325175129, "grad_norm": 2.1182644527864576, "learning_rate": 9.84229776845435e-07, "loss": 0.3488, "step": 6200 }, { "epoch": 0.10778911505501573, "grad_norm": 2.455919957991093, "learning_rate": 9.842227620849888e-07, "loss": 0.3615, "step": 6201 }, { "epoch": 0.10780649759251855, "grad_norm": 1.9231511917224278, "learning_rate": 9.84215745789779e-07, "loss": 0.2934, "step": 6202 }, { "epoch": 0.10782388013002138, "grad_norm": 1.9003207625068554, "learning_rate": 9.842087279598273e-07, "loss": 0.3712, "step": 6203 }, { "epoch": 0.10784126266752421, "grad_norm": 1.934787501584088, "learning_rate": 9.842017085951566e-07, "loss": 0.4205, "step": 6204 }, { "epoch": 0.10785864520502703, "grad_norm": 1.9197533384871361, "learning_rate": 9.84194687695789e-07, "loss": 0.3711, "step": 6205 }, { "epoch": 0.10787602774252986, "grad_norm": 1.6376755819719668, "learning_rate": 9.841876652617464e-07, "loss": 0.4899, "step": 6206 }, { "epoch": 0.10789341028003267, "grad_norm": 1.9985278318203088, "learning_rate": 9.841806412930514e-07, "loss": 0.8877, "step": 6207 }, { "epoch": 0.1079107928175355, "grad_norm": 1.9180888555822455, "learning_rate": 9.841736157897262e-07, "loss": 0.5973, "step": 6208 }, { "epoch": 0.10792817535503833, "grad_norm": 1.3463901188117575, "learning_rate": 9.841665887517932e-07, "loss": 0.4925, "step": 6209 }, { "epoch": 0.10794555789254115, "grad_norm": 1.9251499931127842, "learning_rate": 9.841595601792744e-07, "loss": 0.3856, "step": 6210 }, { "epoch": 0.10796294043004398, "grad_norm": 1.8087999645321595, "learning_rate": 9.841525300721922e-07, "loss": 0.6448, "step": 6211 }, { "epoch": 0.1079803229675468, "grad_norm": 1.7851731891063176, "learning_rate": 9.841454984305688e-07, "loss": 0.4951, "step": 6212 }, { "epoch": 0.10799770550504963, "grad_norm": 2.1564071568821523, "learning_rate": 9.841384652544266e-07, "loss": 0.6102, "step": 6213 }, { "epoch": 0.10801508804255246, "grad_norm": 2.2597748322046685, "learning_rate": 9.84131430543788e-07, "loss": 0.4911, "step": 6214 }, { "epoch": 0.10803247058005527, "grad_norm": 1.6008011727993965, "learning_rate": 9.84124394298675e-07, "loss": 0.2711, "step": 6215 }, { "epoch": 0.1080498531175581, "grad_norm": 2.2453640505980856, "learning_rate": 9.841173565191102e-07, "loss": 0.4019, "step": 6216 }, { "epoch": 0.10806723565506092, "grad_norm": 1.8647823562505823, "learning_rate": 9.841103172051157e-07, "loss": 0.3879, "step": 6217 }, { "epoch": 0.10808461819256375, "grad_norm": 1.9665094410562636, "learning_rate": 9.841032763567138e-07, "loss": 0.7134, "step": 6218 }, { "epoch": 0.10810200073006658, "grad_norm": 1.9298401960171516, "learning_rate": 9.84096233973927e-07, "loss": 0.6109, "step": 6219 }, { "epoch": 0.1081193832675694, "grad_norm": 2.3188360898564833, "learning_rate": 9.840891900567777e-07, "loss": 0.8758, "step": 6220 }, { "epoch": 0.10813676580507223, "grad_norm": 1.7489181542051382, "learning_rate": 9.840821446052877e-07, "loss": 0.4583, "step": 6221 }, { "epoch": 0.10815414834257504, "grad_norm": 1.8570276228862541, "learning_rate": 9.840750976194797e-07, "loss": 0.5273, "step": 6222 }, { "epoch": 0.10817153088007787, "grad_norm": 1.783420020259063, "learning_rate": 9.84068049099376e-07, "loss": 0.4207, "step": 6223 }, { "epoch": 0.1081889134175807, "grad_norm": 6.230394705538843, "learning_rate": 9.840609990449992e-07, "loss": 0.7341, "step": 6224 }, { "epoch": 0.10820629595508352, "grad_norm": 1.9050300860116394, "learning_rate": 9.840539474563711e-07, "loss": 0.4078, "step": 6225 }, { "epoch": 0.10822367849258635, "grad_norm": 1.1178556020305708, "learning_rate": 9.840468943335144e-07, "loss": 0.2199, "step": 6226 }, { "epoch": 0.10824106103008917, "grad_norm": 1.4976828378269846, "learning_rate": 9.840398396764517e-07, "loss": 0.4102, "step": 6227 }, { "epoch": 0.108258443567592, "grad_norm": 2.0532060879239, "learning_rate": 9.840327834852049e-07, "loss": 0.5975, "step": 6228 }, { "epoch": 0.10827582610509483, "grad_norm": 2.2723716354762846, "learning_rate": 9.840257257597964e-07, "loss": 0.2683, "step": 6229 }, { "epoch": 0.10829320864259764, "grad_norm": 5.409111492232525, "learning_rate": 9.840186665002487e-07, "loss": 0.5356, "step": 6230 }, { "epoch": 0.10831059118010047, "grad_norm": 2.817929994533443, "learning_rate": 9.840116057065842e-07, "loss": 0.5366, "step": 6231 }, { "epoch": 0.10832797371760329, "grad_norm": 1.6344444347952534, "learning_rate": 9.840045433788254e-07, "loss": 0.4778, "step": 6232 }, { "epoch": 0.10834535625510612, "grad_norm": 2.0620477509131665, "learning_rate": 9.839974795169943e-07, "loss": 0.3256, "step": 6233 }, { "epoch": 0.10836273879260895, "grad_norm": 1.4516001640438474, "learning_rate": 9.839904141211134e-07, "loss": 0.4924, "step": 6234 }, { "epoch": 0.10838012133011177, "grad_norm": 2.2887512590052403, "learning_rate": 9.839833471912055e-07, "loss": 0.5626, "step": 6235 }, { "epoch": 0.1083975038676146, "grad_norm": 2.23085723642295, "learning_rate": 9.839762787272927e-07, "loss": 0.4257, "step": 6236 }, { "epoch": 0.10841488640511741, "grad_norm": 1.4878871458775138, "learning_rate": 9.839692087293974e-07, "loss": 0.2603, "step": 6237 }, { "epoch": 0.10843226894262024, "grad_norm": 1.5625274929882353, "learning_rate": 9.839621371975418e-07, "loss": 0.3759, "step": 6238 }, { "epoch": 0.10844965148012307, "grad_norm": 1.7733894676834836, "learning_rate": 9.839550641317488e-07, "loss": 0.3858, "step": 6239 }, { "epoch": 0.10846703401762589, "grad_norm": 2.3375417123833095, "learning_rate": 9.839479895320406e-07, "loss": 0.4106, "step": 6240 }, { "epoch": 0.10848441655512872, "grad_norm": 3.0113346396197254, "learning_rate": 9.839409133984395e-07, "loss": 0.4173, "step": 6241 }, { "epoch": 0.10850179909263154, "grad_norm": 2.1325277151989606, "learning_rate": 9.839338357309679e-07, "loss": 0.7957, "step": 6242 }, { "epoch": 0.10851918163013437, "grad_norm": 1.9696719252044872, "learning_rate": 9.839267565296483e-07, "loss": 0.4235, "step": 6243 }, { "epoch": 0.1085365641676372, "grad_norm": 1.7303766273533807, "learning_rate": 9.839196757945035e-07, "loss": 0.4478, "step": 6244 }, { "epoch": 0.10855394670514001, "grad_norm": 1.8798557356516548, "learning_rate": 9.839125935255552e-07, "loss": 0.3098, "step": 6245 }, { "epoch": 0.10857132924264284, "grad_norm": 1.934042910937899, "learning_rate": 9.839055097228266e-07, "loss": 0.466, "step": 6246 }, { "epoch": 0.10858871178014566, "grad_norm": 1.440984221115055, "learning_rate": 9.838984243863398e-07, "loss": 0.3927, "step": 6247 }, { "epoch": 0.10860609431764849, "grad_norm": 2.0229212585807943, "learning_rate": 9.838913375161171e-07, "loss": 0.444, "step": 6248 }, { "epoch": 0.10862347685515132, "grad_norm": 2.6332359992825345, "learning_rate": 9.838842491121812e-07, "loss": 0.4114, "step": 6249 }, { "epoch": 0.10864085939265414, "grad_norm": 1.662990850188602, "learning_rate": 9.838771591745545e-07, "loss": 0.3991, "step": 6250 }, { "epoch": 0.10865824193015697, "grad_norm": 2.2499833410791052, "learning_rate": 9.838700677032594e-07, "loss": 0.5426, "step": 6251 }, { "epoch": 0.10867562446765978, "grad_norm": 1.496111629416045, "learning_rate": 9.838629746983187e-07, "loss": 0.5365, "step": 6252 }, { "epoch": 0.10869300700516261, "grad_norm": 3.877124697926861, "learning_rate": 9.838558801597545e-07, "loss": 1.0122, "step": 6253 }, { "epoch": 0.10871038954266544, "grad_norm": 3.3192243857610397, "learning_rate": 9.838487840875895e-07, "loss": 0.5971, "step": 6254 }, { "epoch": 0.10872777208016826, "grad_norm": 3.5410501013447457, "learning_rate": 9.83841686481846e-07, "loss": 0.5458, "step": 6255 }, { "epoch": 0.10874515461767109, "grad_norm": 2.3027514592681246, "learning_rate": 9.838345873425467e-07, "loss": 0.218, "step": 6256 }, { "epoch": 0.10876253715517391, "grad_norm": 2.4254894764536523, "learning_rate": 9.83827486669714e-07, "loss": 0.4169, "step": 6257 }, { "epoch": 0.10877991969267674, "grad_norm": 2.055254163937335, "learning_rate": 9.838203844633703e-07, "loss": 0.7437, "step": 6258 }, { "epoch": 0.10879730223017957, "grad_norm": 2.4320331212115485, "learning_rate": 9.838132807235384e-07, "loss": 0.4232, "step": 6259 }, { "epoch": 0.10881468476768238, "grad_norm": 1.7978312296709658, "learning_rate": 9.838061754502404e-07, "loss": 0.8409, "step": 6260 }, { "epoch": 0.10883206730518521, "grad_norm": 1.834039580893389, "learning_rate": 9.837990686434994e-07, "loss": 0.4603, "step": 6261 }, { "epoch": 0.10884944984268803, "grad_norm": 3.086083802260782, "learning_rate": 9.837919603033374e-07, "loss": 0.6145, "step": 6262 }, { "epoch": 0.10886683238019086, "grad_norm": 2.8984684322252257, "learning_rate": 9.837848504297772e-07, "loss": 0.3928, "step": 6263 }, { "epoch": 0.10888421491769368, "grad_norm": 2.2849749260532586, "learning_rate": 9.837777390228411e-07, "loss": 0.3795, "step": 6264 }, { "epoch": 0.10890159745519651, "grad_norm": 2.1293838868080397, "learning_rate": 9.83770626082552e-07, "loss": 0.3601, "step": 6265 }, { "epoch": 0.10891897999269934, "grad_norm": 1.7091624236285738, "learning_rate": 9.83763511608932e-07, "loss": 0.3511, "step": 6266 }, { "epoch": 0.10893636253020215, "grad_norm": 1.5490905054743764, "learning_rate": 9.837563956020042e-07, "loss": 0.9139, "step": 6267 }, { "epoch": 0.10895374506770499, "grad_norm": 1.6790776810115189, "learning_rate": 9.837492780617907e-07, "loss": 0.3548, "step": 6268 }, { "epoch": 0.1089711276052078, "grad_norm": 3.2862823292376397, "learning_rate": 9.83742158988314e-07, "loss": 0.6, "step": 6269 }, { "epoch": 0.10898851014271063, "grad_norm": 3.3931037580377907, "learning_rate": 9.837350383815973e-07, "loss": 0.4466, "step": 6270 }, { "epoch": 0.10900589268021346, "grad_norm": 1.2758434142315616, "learning_rate": 9.837279162416624e-07, "loss": 0.5979, "step": 6271 }, { "epoch": 0.10902327521771628, "grad_norm": 2.552074640345683, "learning_rate": 9.837207925685322e-07, "loss": 0.3422, "step": 6272 }, { "epoch": 0.10904065775521911, "grad_norm": 2.733539654131113, "learning_rate": 9.837136673622295e-07, "loss": 0.2674, "step": 6273 }, { "epoch": 0.10905804029272193, "grad_norm": 1.3926456635416953, "learning_rate": 9.837065406227765e-07, "loss": 0.3552, "step": 6274 }, { "epoch": 0.10907542283022476, "grad_norm": 2.0189480195875515, "learning_rate": 9.83699412350196e-07, "loss": 0.3707, "step": 6275 }, { "epoch": 0.10909280536772759, "grad_norm": 2.498504723386438, "learning_rate": 9.836922825445106e-07, "loss": 0.4096, "step": 6276 }, { "epoch": 0.1091101879052304, "grad_norm": 1.5358339358691218, "learning_rate": 9.836851512057428e-07, "loss": 0.5093, "step": 6277 }, { "epoch": 0.10912757044273323, "grad_norm": 1.3980138427177267, "learning_rate": 9.836780183339155e-07, "loss": 0.2774, "step": 6278 }, { "epoch": 0.10914495298023605, "grad_norm": 1.3446359385468933, "learning_rate": 9.836708839290509e-07, "loss": 0.5311, "step": 6279 }, { "epoch": 0.10916233551773888, "grad_norm": 2.0573020061473373, "learning_rate": 9.836637479911716e-07, "loss": 0.3901, "step": 6280 }, { "epoch": 0.10917971805524171, "grad_norm": 1.1149330632273915, "learning_rate": 9.836566105203005e-07, "loss": 0.3502, "step": 6281 }, { "epoch": 0.10919710059274453, "grad_norm": 1.690422451650412, "learning_rate": 9.836494715164603e-07, "loss": 0.5202, "step": 6282 }, { "epoch": 0.10921448313024736, "grad_norm": 2.3019942042296213, "learning_rate": 9.836423309796732e-07, "loss": 0.6408, "step": 6283 }, { "epoch": 0.10923186566775017, "grad_norm": 1.2474595367948356, "learning_rate": 9.836351889099621e-07, "loss": 0.4642, "step": 6284 }, { "epoch": 0.109249248205253, "grad_norm": 1.365421493955164, "learning_rate": 9.836280453073497e-07, "loss": 0.3252, "step": 6285 }, { "epoch": 0.10926663074275583, "grad_norm": 1.8591214748351321, "learning_rate": 9.836209001718587e-07, "loss": 0.5207, "step": 6286 }, { "epoch": 0.10928401328025865, "grad_norm": 1.5851287130247398, "learning_rate": 9.836137535035113e-07, "loss": 0.3636, "step": 6287 }, { "epoch": 0.10930139581776148, "grad_norm": 2.9284619705869814, "learning_rate": 9.836066053023308e-07, "loss": 0.4354, "step": 6288 }, { "epoch": 0.1093187783552643, "grad_norm": 1.9011830830931993, "learning_rate": 9.835994555683394e-07, "loss": 0.3993, "step": 6289 }, { "epoch": 0.10933616089276713, "grad_norm": 2.9279520332315347, "learning_rate": 9.835923043015596e-07, "loss": 1.0406, "step": 6290 }, { "epoch": 0.10935354343026996, "grad_norm": 2.3274977288373413, "learning_rate": 9.835851515020146e-07, "loss": 0.3761, "step": 6291 }, { "epoch": 0.10937092596777277, "grad_norm": 2.1517598265664977, "learning_rate": 9.835779971697269e-07, "loss": 0.4735, "step": 6292 }, { "epoch": 0.1093883085052756, "grad_norm": 1.444217088268074, "learning_rate": 9.835708413047188e-07, "loss": 0.5084, "step": 6293 }, { "epoch": 0.10940569104277842, "grad_norm": 1.1910266253240167, "learning_rate": 9.835636839070133e-07, "loss": 0.6729, "step": 6294 }, { "epoch": 0.10942307358028125, "grad_norm": 2.346507797662973, "learning_rate": 9.835565249766333e-07, "loss": 0.4498, "step": 6295 }, { "epoch": 0.10944045611778408, "grad_norm": 1.0399032693441563, "learning_rate": 9.83549364513601e-07, "loss": 0.3706, "step": 6296 }, { "epoch": 0.1094578386552869, "grad_norm": 2.0203966378863307, "learning_rate": 9.835422025179394e-07, "loss": 0.44, "step": 6297 }, { "epoch": 0.10947522119278973, "grad_norm": 2.054481774787164, "learning_rate": 9.835350389896712e-07, "loss": 0.5486, "step": 6298 }, { "epoch": 0.10949260373029254, "grad_norm": 3.103445682862526, "learning_rate": 9.83527873928819e-07, "loss": 0.4215, "step": 6299 }, { "epoch": 0.10950998626779537, "grad_norm": 3.180321598204133, "learning_rate": 9.835207073354058e-07, "loss": 0.6989, "step": 6300 }, { "epoch": 0.1095273688052982, "grad_norm": 1.2200455903034095, "learning_rate": 9.835135392094537e-07, "loss": 0.4036, "step": 6301 }, { "epoch": 0.10954475134280102, "grad_norm": 2.5756442809445037, "learning_rate": 9.83506369550986e-07, "loss": 0.4812, "step": 6302 }, { "epoch": 0.10956213388030385, "grad_norm": 1.9904228913051714, "learning_rate": 9.834991983600253e-07, "loss": 0.8022, "step": 6303 }, { "epoch": 0.10957951641780667, "grad_norm": 1.5740491945520039, "learning_rate": 9.83492025636594e-07, "loss": 0.4022, "step": 6304 }, { "epoch": 0.1095968989553095, "grad_norm": 2.3404869362580643, "learning_rate": 9.83484851380715e-07, "loss": 0.5057, "step": 6305 }, { "epoch": 0.10961428149281233, "grad_norm": 2.0671757001640416, "learning_rate": 9.834776755924115e-07, "loss": 0.6129, "step": 6306 }, { "epoch": 0.10963166403031514, "grad_norm": 1.949037362850945, "learning_rate": 9.834704982717054e-07, "loss": 0.2393, "step": 6307 }, { "epoch": 0.10964904656781797, "grad_norm": 1.8417585426912917, "learning_rate": 9.834633194186202e-07, "loss": 0.356, "step": 6308 }, { "epoch": 0.10966642910532079, "grad_norm": 2.540532629975371, "learning_rate": 9.834561390331783e-07, "loss": 0.5348, "step": 6309 }, { "epoch": 0.10968381164282362, "grad_norm": 2.773407977555163, "learning_rate": 9.834489571154025e-07, "loss": 0.6699, "step": 6310 }, { "epoch": 0.10970119418032645, "grad_norm": 2.423647338674867, "learning_rate": 9.834417736653158e-07, "loss": 0.614, "step": 6311 }, { "epoch": 0.10971857671782927, "grad_norm": 2.0457985473925517, "learning_rate": 9.834345886829405e-07, "loss": 0.3666, "step": 6312 }, { "epoch": 0.1097359592553321, "grad_norm": 2.9382831167184507, "learning_rate": 9.834274021682996e-07, "loss": 0.5161, "step": 6313 }, { "epoch": 0.10975334179283491, "grad_norm": 1.8523108028327353, "learning_rate": 9.83420214121416e-07, "loss": 0.3402, "step": 6314 }, { "epoch": 0.10977072433033774, "grad_norm": 1.673788977955036, "learning_rate": 9.834130245423124e-07, "loss": 0.4093, "step": 6315 }, { "epoch": 0.10978810686784057, "grad_norm": 2.307898519399248, "learning_rate": 9.834058334310115e-07, "loss": 0.7177, "step": 6316 }, { "epoch": 0.10980548940534339, "grad_norm": 4.279378297251249, "learning_rate": 9.833986407875364e-07, "loss": 0.4017, "step": 6317 }, { "epoch": 0.10982287194284622, "grad_norm": 1.84565245015154, "learning_rate": 9.833914466119094e-07, "loss": 0.4672, "step": 6318 }, { "epoch": 0.10984025448034904, "grad_norm": 1.5084711567433067, "learning_rate": 9.833842509041537e-07, "loss": 0.2643, "step": 6319 }, { "epoch": 0.10985763701785187, "grad_norm": 1.2384773442868222, "learning_rate": 9.83377053664292e-07, "loss": 0.5414, "step": 6320 }, { "epoch": 0.1098750195553547, "grad_norm": 1.3655663907697266, "learning_rate": 9.83369854892347e-07, "loss": 0.6376, "step": 6321 }, { "epoch": 0.10989240209285751, "grad_norm": 2.1772324285040647, "learning_rate": 9.833626545883415e-07, "loss": 0.5131, "step": 6322 }, { "epoch": 0.10990978463036034, "grad_norm": 2.674901355870867, "learning_rate": 9.833554527522986e-07, "loss": 0.4782, "step": 6323 }, { "epoch": 0.10992716716786316, "grad_norm": 1.8840121620882344, "learning_rate": 9.833482493842408e-07, "loss": 0.2606, "step": 6324 }, { "epoch": 0.10994454970536599, "grad_norm": 1.8263205564242695, "learning_rate": 9.833410444841911e-07, "loss": 0.3659, "step": 6325 }, { "epoch": 0.10996193224286882, "grad_norm": 2.3920127032723015, "learning_rate": 9.833338380521725e-07, "loss": 0.4547, "step": 6326 }, { "epoch": 0.10997931478037164, "grad_norm": 1.775655959569772, "learning_rate": 9.833266300882077e-07, "loss": 0.4799, "step": 6327 }, { "epoch": 0.10999669731787447, "grad_norm": 1.8309577362147866, "learning_rate": 9.833194205923192e-07, "loss": 0.2834, "step": 6328 }, { "epoch": 0.11001407985537728, "grad_norm": 2.0137617911129073, "learning_rate": 9.833122095645304e-07, "loss": 0.5191, "step": 6329 }, { "epoch": 0.11003146239288011, "grad_norm": 1.3447148101651623, "learning_rate": 9.833049970048639e-07, "loss": 0.3591, "step": 6330 }, { "epoch": 0.11004884493038294, "grad_norm": 2.329562508469788, "learning_rate": 9.832977829133425e-07, "loss": 0.3504, "step": 6331 }, { "epoch": 0.11006622746788576, "grad_norm": 1.7956818390011027, "learning_rate": 9.832905672899889e-07, "loss": 0.5268, "step": 6332 }, { "epoch": 0.11008361000538859, "grad_norm": 2.156085140755821, "learning_rate": 9.832833501348266e-07, "loss": 0.8053, "step": 6333 }, { "epoch": 0.11010099254289141, "grad_norm": 1.1268855757268943, "learning_rate": 9.832761314478779e-07, "loss": 0.3646, "step": 6334 }, { "epoch": 0.11011837508039424, "grad_norm": 2.461591287232788, "learning_rate": 9.83268911229166e-07, "loss": 0.5402, "step": 6335 }, { "epoch": 0.11013575761789705, "grad_norm": 1.8451494220350297, "learning_rate": 9.832616894787134e-07, "loss": 0.7525, "step": 6336 }, { "epoch": 0.11015314015539988, "grad_norm": 1.4071290933109304, "learning_rate": 9.832544661965434e-07, "loss": 0.3776, "step": 6337 }, { "epoch": 0.11017052269290271, "grad_norm": 1.3702480783866915, "learning_rate": 9.832472413826786e-07, "loss": 0.3799, "step": 6338 }, { "epoch": 0.11018790523040553, "grad_norm": 1.8970871616273783, "learning_rate": 9.832400150371421e-07, "loss": 0.3752, "step": 6339 }, { "epoch": 0.11020528776790836, "grad_norm": 1.907835338214402, "learning_rate": 9.832327871599569e-07, "loss": 0.3795, "step": 6340 }, { "epoch": 0.11022267030541118, "grad_norm": 1.6243842119615681, "learning_rate": 9.832255577511455e-07, "loss": 0.5295, "step": 6341 }, { "epoch": 0.11024005284291401, "grad_norm": 1.9125029087678742, "learning_rate": 9.832183268107312e-07, "loss": 0.5629, "step": 6342 }, { "epoch": 0.11025743538041684, "grad_norm": 2.716473675063503, "learning_rate": 9.832110943387367e-07, "loss": 0.4214, "step": 6343 }, { "epoch": 0.11027481791791965, "grad_norm": 1.1730580750317294, "learning_rate": 9.832038603351851e-07, "loss": 0.456, "step": 6344 }, { "epoch": 0.11029220045542248, "grad_norm": 2.0409365755575437, "learning_rate": 9.83196624800099e-07, "loss": 0.4714, "step": 6345 }, { "epoch": 0.1103095829929253, "grad_norm": 1.48965074468969, "learning_rate": 9.831893877335018e-07, "loss": 0.3511, "step": 6346 }, { "epoch": 0.11032696553042813, "grad_norm": 2.499790103757562, "learning_rate": 9.831821491354161e-07, "loss": 0.5149, "step": 6347 }, { "epoch": 0.11034434806793096, "grad_norm": 1.2484615664980667, "learning_rate": 9.831749090058652e-07, "loss": 0.3245, "step": 6348 }, { "epoch": 0.11036173060543378, "grad_norm": 3.792525744416559, "learning_rate": 9.831676673448715e-07, "loss": 0.642, "step": 6349 }, { "epoch": 0.11037911314293661, "grad_norm": 2.5908661912585704, "learning_rate": 9.831604241524583e-07, "loss": 0.4753, "step": 6350 }, { "epoch": 0.11039649568043942, "grad_norm": 1.9987023605449619, "learning_rate": 9.831531794286485e-07, "loss": 0.4928, "step": 6351 }, { "epoch": 0.11041387821794225, "grad_norm": 2.3405993156436136, "learning_rate": 9.83145933173465e-07, "loss": 0.5583, "step": 6352 }, { "epoch": 0.11043126075544508, "grad_norm": 2.669204505542732, "learning_rate": 9.83138685386931e-07, "loss": 0.358, "step": 6353 }, { "epoch": 0.1104486432929479, "grad_norm": 1.631616292688948, "learning_rate": 9.83131436069069e-07, "loss": 0.4589, "step": 6354 }, { "epoch": 0.11046602583045073, "grad_norm": 2.6472961386374267, "learning_rate": 9.831241852199026e-07, "loss": 0.4772, "step": 6355 }, { "epoch": 0.11048340836795355, "grad_norm": 1.8068561943598458, "learning_rate": 9.831169328394545e-07, "loss": 0.7057, "step": 6356 }, { "epoch": 0.11050079090545638, "grad_norm": 1.9321688595643178, "learning_rate": 9.831096789277473e-07, "loss": 0.4774, "step": 6357 }, { "epoch": 0.11051817344295921, "grad_norm": 2.375960249636659, "learning_rate": 9.831024234848046e-07, "loss": 0.4881, "step": 6358 }, { "epoch": 0.11053555598046202, "grad_norm": 1.54935703553575, "learning_rate": 9.83095166510649e-07, "loss": 0.4141, "step": 6359 }, { "epoch": 0.11055293851796485, "grad_norm": 2.6484927460367134, "learning_rate": 9.830879080053038e-07, "loss": 0.5257, "step": 6360 }, { "epoch": 0.11057032105546767, "grad_norm": 1.8717971788360663, "learning_rate": 9.830806479687916e-07, "loss": 0.448, "step": 6361 }, { "epoch": 0.1105877035929705, "grad_norm": 1.417134648437118, "learning_rate": 9.83073386401136e-07, "loss": 0.1537, "step": 6362 }, { "epoch": 0.11060508613047333, "grad_norm": 1.0897015602005256, "learning_rate": 9.830661233023594e-07, "loss": 0.4455, "step": 6363 }, { "epoch": 0.11062246866797615, "grad_norm": 3.034502413448303, "learning_rate": 9.83058858672485e-07, "loss": 0.7841, "step": 6364 }, { "epoch": 0.11063985120547898, "grad_norm": 2.5300052279144243, "learning_rate": 9.830515925115362e-07, "loss": 0.4644, "step": 6365 }, { "epoch": 0.1106572337429818, "grad_norm": 2.2452223273370646, "learning_rate": 9.830443248195357e-07, "loss": 0.627, "step": 6366 }, { "epoch": 0.11067461628048463, "grad_norm": 2.072048944410225, "learning_rate": 9.830370555965064e-07, "loss": 0.6128, "step": 6367 }, { "epoch": 0.11069199881798746, "grad_norm": 1.7946171652740455, "learning_rate": 9.830297848424719e-07, "loss": 0.1495, "step": 6368 }, { "epoch": 0.11070938135549027, "grad_norm": 1.685243684644701, "learning_rate": 9.830225125574545e-07, "loss": 0.2281, "step": 6369 }, { "epoch": 0.1107267638929931, "grad_norm": 1.5726502404916824, "learning_rate": 9.830152387414776e-07, "loss": 0.3085, "step": 6370 }, { "epoch": 0.11074414643049592, "grad_norm": 2.429484104650132, "learning_rate": 9.830079633945644e-07, "loss": 0.5268, "step": 6371 }, { "epoch": 0.11076152896799875, "grad_norm": 2.7892197876699023, "learning_rate": 9.830006865167379e-07, "loss": 0.6411, "step": 6372 }, { "epoch": 0.11077891150550158, "grad_norm": 3.040014581110707, "learning_rate": 9.82993408108021e-07, "loss": 0.6383, "step": 6373 }, { "epoch": 0.1107962940430044, "grad_norm": 2.4347701287446806, "learning_rate": 9.82986128168437e-07, "loss": 0.4547, "step": 6374 }, { "epoch": 0.11081367658050723, "grad_norm": 2.0296399644871697, "learning_rate": 9.829788466980086e-07, "loss": 0.5576, "step": 6375 }, { "epoch": 0.11083105911801004, "grad_norm": 1.4843155905887173, "learning_rate": 9.829715636967592e-07, "loss": 0.5598, "step": 6376 }, { "epoch": 0.11084844165551287, "grad_norm": 1.7031321060359688, "learning_rate": 9.82964279164712e-07, "loss": 0.6893, "step": 6377 }, { "epoch": 0.1108658241930157, "grad_norm": 1.6235887228255534, "learning_rate": 9.829569931018897e-07, "loss": 0.4755, "step": 6378 }, { "epoch": 0.11088320673051852, "grad_norm": 2.30639618763803, "learning_rate": 9.829497055083156e-07, "loss": 0.4171, "step": 6379 }, { "epoch": 0.11090058926802135, "grad_norm": 2.3944204618626133, "learning_rate": 9.829424163840128e-07, "loss": 0.3295, "step": 6380 }, { "epoch": 0.11091797180552417, "grad_norm": 1.5636872056491546, "learning_rate": 9.829351257290044e-07, "loss": 0.2114, "step": 6381 }, { "epoch": 0.110935354343027, "grad_norm": 1.4972884852897435, "learning_rate": 9.829278335433136e-07, "loss": 0.3968, "step": 6382 }, { "epoch": 0.11095273688052983, "grad_norm": 2.2755856822748894, "learning_rate": 9.829205398269631e-07, "loss": 0.5266, "step": 6383 }, { "epoch": 0.11097011941803264, "grad_norm": 1.9334328498073887, "learning_rate": 9.829132445799765e-07, "loss": 0.6501, "step": 6384 }, { "epoch": 0.11098750195553547, "grad_norm": 1.7864486115175588, "learning_rate": 9.82905947802377e-07, "loss": 0.5551, "step": 6385 }, { "epoch": 0.11100488449303829, "grad_norm": 1.2583224969187892, "learning_rate": 9.828986494941872e-07, "loss": 0.3071, "step": 6386 }, { "epoch": 0.11102226703054112, "grad_norm": 1.565037323874775, "learning_rate": 9.828913496554305e-07, "loss": 0.5558, "step": 6387 }, { "epoch": 0.11103964956804395, "grad_norm": 2.4855836385900174, "learning_rate": 9.8288404828613e-07, "loss": 0.4503, "step": 6388 }, { "epoch": 0.11105703210554677, "grad_norm": 2.1947601884701777, "learning_rate": 9.828767453863092e-07, "loss": 0.437, "step": 6389 }, { "epoch": 0.1110744146430496, "grad_norm": 3.040650852420689, "learning_rate": 9.828694409559907e-07, "loss": 0.4502, "step": 6390 }, { "epoch": 0.11109179718055241, "grad_norm": 1.746101136983742, "learning_rate": 9.828621349951979e-07, "loss": 0.2309, "step": 6391 }, { "epoch": 0.11110917971805524, "grad_norm": 1.6618219878828677, "learning_rate": 9.82854827503954e-07, "loss": 0.4003, "step": 6392 }, { "epoch": 0.11112656225555807, "grad_norm": 3.0852383180340475, "learning_rate": 9.828475184822819e-07, "loss": 0.398, "step": 6393 }, { "epoch": 0.11114394479306089, "grad_norm": 1.644089027717342, "learning_rate": 9.82840207930205e-07, "loss": 0.3496, "step": 6394 }, { "epoch": 0.11116132733056372, "grad_norm": 1.0690620220713782, "learning_rate": 9.828328958477465e-07, "loss": 0.2788, "step": 6395 }, { "epoch": 0.11117870986806654, "grad_norm": 6.03037512438487, "learning_rate": 9.828255822349297e-07, "loss": 0.5539, "step": 6396 }, { "epoch": 0.11119609240556937, "grad_norm": 1.8475929600727903, "learning_rate": 9.828182670917773e-07, "loss": 0.714, "step": 6397 }, { "epoch": 0.1112134749430722, "grad_norm": 1.3401384557194571, "learning_rate": 9.82810950418313e-07, "loss": 0.4887, "step": 6398 }, { "epoch": 0.11123085748057501, "grad_norm": 1.53928824200747, "learning_rate": 9.828036322145598e-07, "loss": 0.4932, "step": 6399 }, { "epoch": 0.11124824001807784, "grad_norm": 6.182384903625612, "learning_rate": 9.827963124805407e-07, "loss": 0.6971, "step": 6400 }, { "epoch": 0.11126562255558066, "grad_norm": 2.0111031052713715, "learning_rate": 9.827889912162792e-07, "loss": 0.3334, "step": 6401 }, { "epoch": 0.11128300509308349, "grad_norm": 1.9105012372831953, "learning_rate": 9.827816684217984e-07, "loss": 0.5631, "step": 6402 }, { "epoch": 0.11130038763058632, "grad_norm": 2.159224824095246, "learning_rate": 9.827743440971213e-07, "loss": 0.2775, "step": 6403 }, { "epoch": 0.11131777016808914, "grad_norm": 2.597836211741786, "learning_rate": 9.827670182422715e-07, "loss": 0.761, "step": 6404 }, { "epoch": 0.11133515270559197, "grad_norm": 1.9478010920711064, "learning_rate": 9.82759690857272e-07, "loss": 0.4087, "step": 6405 }, { "epoch": 0.11135253524309478, "grad_norm": 1.6646853909177408, "learning_rate": 9.827523619421459e-07, "loss": 0.2783, "step": 6406 }, { "epoch": 0.11136991778059761, "grad_norm": 1.2361548362430304, "learning_rate": 9.827450314969167e-07, "loss": 0.2751, "step": 6407 }, { "epoch": 0.11138730031810043, "grad_norm": 1.9687003772404323, "learning_rate": 9.827376995216073e-07, "loss": 0.7147, "step": 6408 }, { "epoch": 0.11140468285560326, "grad_norm": 1.7490113399829612, "learning_rate": 9.827303660162412e-07, "loss": 0.3709, "step": 6409 }, { "epoch": 0.11142206539310609, "grad_norm": 1.8953341718959844, "learning_rate": 9.827230309808417e-07, "loss": 0.6026, "step": 6410 }, { "epoch": 0.1114394479306089, "grad_norm": 1.9487585690049556, "learning_rate": 9.827156944154319e-07, "loss": 0.6756, "step": 6411 }, { "epoch": 0.11145683046811174, "grad_norm": 1.5961910189499762, "learning_rate": 9.82708356320035e-07, "loss": 0.38, "step": 6412 }, { "epoch": 0.11147421300561455, "grad_norm": 1.6642328140425826, "learning_rate": 9.827010166946746e-07, "loss": 0.4375, "step": 6413 }, { "epoch": 0.11149159554311738, "grad_norm": 4.460714088513423, "learning_rate": 9.826936755393736e-07, "loss": 0.6283, "step": 6414 }, { "epoch": 0.11150897808062021, "grad_norm": 2.042281439767883, "learning_rate": 9.826863328541553e-07, "loss": 0.2812, "step": 6415 }, { "epoch": 0.11152636061812303, "grad_norm": 3.3895082110475028, "learning_rate": 9.826789886390429e-07, "loss": 0.6623, "step": 6416 }, { "epoch": 0.11154374315562586, "grad_norm": 1.5941774495402, "learning_rate": 9.826716428940602e-07, "loss": 0.602, "step": 6417 }, { "epoch": 0.11156112569312868, "grad_norm": 1.6440792304675094, "learning_rate": 9.8266429561923e-07, "loss": 0.2788, "step": 6418 }, { "epoch": 0.1115785082306315, "grad_norm": 2.6649374570942017, "learning_rate": 9.826569468145755e-07, "loss": 0.5539, "step": 6419 }, { "epoch": 0.11159589076813434, "grad_norm": 1.9631086346023774, "learning_rate": 9.826495964801203e-07, "loss": 0.8521, "step": 6420 }, { "epoch": 0.11161327330563715, "grad_norm": 2.959360961086532, "learning_rate": 9.826422446158877e-07, "loss": 0.317, "step": 6421 }, { "epoch": 0.11163065584313998, "grad_norm": 1.1229652128840086, "learning_rate": 9.826348912219008e-07, "loss": 0.1557, "step": 6422 }, { "epoch": 0.1116480383806428, "grad_norm": 2.0157982708619846, "learning_rate": 9.82627536298183e-07, "loss": 0.5048, "step": 6423 }, { "epoch": 0.11166542091814563, "grad_norm": 2.1036594329437683, "learning_rate": 9.826201798447576e-07, "loss": 0.3544, "step": 6424 }, { "epoch": 0.11168280345564846, "grad_norm": 2.1688495624862516, "learning_rate": 9.826128218616478e-07, "loss": 0.346, "step": 6425 }, { "epoch": 0.11170018599315128, "grad_norm": 2.1669557505973818, "learning_rate": 9.826054623488772e-07, "loss": 0.454, "step": 6426 }, { "epoch": 0.11171756853065411, "grad_norm": 2.0849125148986922, "learning_rate": 9.82598101306469e-07, "loss": 0.468, "step": 6427 }, { "epoch": 0.11173495106815692, "grad_norm": 2.883736238843459, "learning_rate": 9.825907387344466e-07, "loss": 0.5065, "step": 6428 }, { "epoch": 0.11175233360565975, "grad_norm": 2.1149860555078392, "learning_rate": 9.82583374632833e-07, "loss": 0.4194, "step": 6429 }, { "epoch": 0.11176971614316258, "grad_norm": 3.4875992250541636, "learning_rate": 9.82576009001652e-07, "loss": 0.2267, "step": 6430 }, { "epoch": 0.1117870986806654, "grad_norm": 1.5217427096782408, "learning_rate": 9.825686418409267e-07, "loss": 0.4763, "step": 6431 }, { "epoch": 0.11180448121816823, "grad_norm": 2.335695964975141, "learning_rate": 9.825612731506803e-07, "loss": 0.6302, "step": 6432 }, { "epoch": 0.11182186375567105, "grad_norm": 1.6067426844917496, "learning_rate": 9.825539029309364e-07, "loss": 0.2197, "step": 6433 }, { "epoch": 0.11183924629317388, "grad_norm": 2.2584160099872244, "learning_rate": 9.825465311817185e-07, "loss": 0.516, "step": 6434 }, { "epoch": 0.11185662883067671, "grad_norm": 1.7903548131820688, "learning_rate": 9.825391579030495e-07, "loss": 0.4418, "step": 6435 }, { "epoch": 0.11187401136817952, "grad_norm": 1.081791363872806, "learning_rate": 9.825317830949531e-07, "loss": 0.4047, "step": 6436 }, { "epoch": 0.11189139390568235, "grad_norm": 2.752439851003129, "learning_rate": 9.825244067574526e-07, "loss": 0.4041, "step": 6437 }, { "epoch": 0.11190877644318517, "grad_norm": 0.9952500319230239, "learning_rate": 9.825170288905713e-07, "loss": 0.1096, "step": 6438 }, { "epoch": 0.111926158980688, "grad_norm": 1.6650798708336219, "learning_rate": 9.82509649494333e-07, "loss": 0.425, "step": 6439 }, { "epoch": 0.11194354151819083, "grad_norm": 1.4293901829181965, "learning_rate": 9.825022685687605e-07, "loss": 0.4373, "step": 6440 }, { "epoch": 0.11196092405569365, "grad_norm": 1.471624250567162, "learning_rate": 9.824948861138774e-07, "loss": 0.2622, "step": 6441 }, { "epoch": 0.11197830659319648, "grad_norm": 3.979695514076051, "learning_rate": 9.824875021297073e-07, "loss": 0.4077, "step": 6442 }, { "epoch": 0.1119956891306993, "grad_norm": 4.357420887288458, "learning_rate": 9.824801166162734e-07, "loss": 0.713, "step": 6443 }, { "epoch": 0.11201307166820212, "grad_norm": 2.317489059855303, "learning_rate": 9.824727295735993e-07, "loss": 0.5615, "step": 6444 }, { "epoch": 0.11203045420570495, "grad_norm": 1.3759533358534006, "learning_rate": 9.824653410017081e-07, "loss": 0.3957, "step": 6445 }, { "epoch": 0.11204783674320777, "grad_norm": 1.9974921145701396, "learning_rate": 9.824579509006233e-07, "loss": 0.4487, "step": 6446 }, { "epoch": 0.1120652192807106, "grad_norm": 2.164447451154351, "learning_rate": 9.824505592703685e-07, "loss": 0.3403, "step": 6447 }, { "epoch": 0.11208260181821342, "grad_norm": 2.1548357136637097, "learning_rate": 9.82443166110967e-07, "loss": 0.3228, "step": 6448 }, { "epoch": 0.11209998435571625, "grad_norm": 2.196710960737584, "learning_rate": 9.824357714224424e-07, "loss": 0.2835, "step": 6449 }, { "epoch": 0.11211736689321908, "grad_norm": 1.5371458077738112, "learning_rate": 9.82428375204818e-07, "loss": 0.5558, "step": 6450 }, { "epoch": 0.1121347494307219, "grad_norm": 3.023668024996746, "learning_rate": 9.824209774581174e-07, "loss": 0.603, "step": 6451 }, { "epoch": 0.11215213196822472, "grad_norm": 2.677486607040208, "learning_rate": 9.824135781823639e-07, "loss": 0.5549, "step": 6452 }, { "epoch": 0.11216951450572754, "grad_norm": 1.9633066533021497, "learning_rate": 9.824061773775808e-07, "loss": 0.3293, "step": 6453 }, { "epoch": 0.11218689704323037, "grad_norm": 2.8944765867293953, "learning_rate": 9.823987750437916e-07, "loss": 0.4788, "step": 6454 }, { "epoch": 0.1122042795807332, "grad_norm": 1.1550144688678488, "learning_rate": 9.8239137118102e-07, "loss": 0.243, "step": 6455 }, { "epoch": 0.11222166211823602, "grad_norm": 1.5224108549168764, "learning_rate": 9.823839657892893e-07, "loss": 0.2731, "step": 6456 }, { "epoch": 0.11223904465573885, "grad_norm": 1.9717407254758181, "learning_rate": 9.823765588686232e-07, "loss": 0.3969, "step": 6457 }, { "epoch": 0.11225642719324166, "grad_norm": 1.8243213444459905, "learning_rate": 9.82369150419045e-07, "loss": 0.5615, "step": 6458 }, { "epoch": 0.1122738097307445, "grad_norm": 2.750923977956043, "learning_rate": 9.823617404405779e-07, "loss": 0.4051, "step": 6459 }, { "epoch": 0.11229119226824733, "grad_norm": 1.5850126752710936, "learning_rate": 9.82354328933246e-07, "loss": 0.3834, "step": 6460 }, { "epoch": 0.11230857480575014, "grad_norm": 3.1164625913745807, "learning_rate": 9.823469158970722e-07, "loss": 0.5452, "step": 6461 }, { "epoch": 0.11232595734325297, "grad_norm": 1.591894475357051, "learning_rate": 9.823395013320803e-07, "loss": 0.4504, "step": 6462 }, { "epoch": 0.11234333988075579, "grad_norm": 2.5386430052101154, "learning_rate": 9.823320852382938e-07, "loss": 0.5315, "step": 6463 }, { "epoch": 0.11236072241825862, "grad_norm": 2.4921819852302645, "learning_rate": 9.823246676157361e-07, "loss": 0.3453, "step": 6464 }, { "epoch": 0.11237810495576145, "grad_norm": 2.1049940361681174, "learning_rate": 9.823172484644307e-07, "loss": 0.5577, "step": 6465 }, { "epoch": 0.11239548749326427, "grad_norm": 2.9117857804852307, "learning_rate": 9.823098277844013e-07, "loss": 0.4273, "step": 6466 }, { "epoch": 0.1124128700307671, "grad_norm": 1.7367120242251526, "learning_rate": 9.823024055756712e-07, "loss": 0.4354, "step": 6467 }, { "epoch": 0.11243025256826991, "grad_norm": 1.8267074696305712, "learning_rate": 9.822949818382643e-07, "loss": 0.3884, "step": 6468 }, { "epoch": 0.11244763510577274, "grad_norm": 1.714549782675082, "learning_rate": 9.822875565722034e-07, "loss": 0.7315, "step": 6469 }, { "epoch": 0.11246501764327557, "grad_norm": 2.9338791350786027, "learning_rate": 9.82280129777513e-07, "loss": 0.4357, "step": 6470 }, { "epoch": 0.11248240018077839, "grad_norm": 2.8315744960751243, "learning_rate": 9.822727014542158e-07, "loss": 0.7032, "step": 6471 }, { "epoch": 0.11249978271828122, "grad_norm": 2.0564856538574254, "learning_rate": 9.822652716023357e-07, "loss": 0.5459, "step": 6472 }, { "epoch": 0.11251716525578404, "grad_norm": 1.269803892560877, "learning_rate": 9.822578402218962e-07, "loss": 0.4236, "step": 6473 }, { "epoch": 0.11253454779328687, "grad_norm": 1.3377205523719202, "learning_rate": 9.82250407312921e-07, "loss": 0.3327, "step": 6474 }, { "epoch": 0.11255193033078968, "grad_norm": 7.002516154657002, "learning_rate": 9.822429728754337e-07, "loss": 0.6269, "step": 6475 }, { "epoch": 0.11256931286829251, "grad_norm": 3.2211722282847446, "learning_rate": 9.822355369094572e-07, "loss": 0.5907, "step": 6476 }, { "epoch": 0.11258669540579534, "grad_norm": 2.054984078934932, "learning_rate": 9.82228099415016e-07, "loss": 0.4322, "step": 6477 }, { "epoch": 0.11260407794329816, "grad_norm": 1.7237913108161065, "learning_rate": 9.82220660392133e-07, "loss": 0.6378, "step": 6478 }, { "epoch": 0.11262146048080099, "grad_norm": 2.144168207198759, "learning_rate": 9.822132198408321e-07, "loss": 0.2815, "step": 6479 }, { "epoch": 0.1126388430183038, "grad_norm": 1.2646657179181546, "learning_rate": 9.822057777611368e-07, "loss": 0.3792, "step": 6480 }, { "epoch": 0.11265622555580664, "grad_norm": 1.818890149532189, "learning_rate": 9.82198334153071e-07, "loss": 0.4021, "step": 6481 }, { "epoch": 0.11267360809330947, "grad_norm": 2.1613408982487208, "learning_rate": 9.821908890166575e-07, "loss": 0.4623, "step": 6482 }, { "epoch": 0.11269099063081228, "grad_norm": 1.5770249549698372, "learning_rate": 9.821834423519206e-07, "loss": 0.6114, "step": 6483 }, { "epoch": 0.11270837316831511, "grad_norm": 2.7089839087813306, "learning_rate": 9.821759941588837e-07, "loss": 0.354, "step": 6484 }, { "epoch": 0.11272575570581793, "grad_norm": 2.583053802300925, "learning_rate": 9.821685444375702e-07, "loss": 0.7427, "step": 6485 }, { "epoch": 0.11274313824332076, "grad_norm": 1.680403461927919, "learning_rate": 9.821610931880041e-07, "loss": 0.3741, "step": 6486 }, { "epoch": 0.11276052078082359, "grad_norm": 2.503221038003933, "learning_rate": 9.821536404102088e-07, "loss": 0.4427, "step": 6487 }, { "epoch": 0.1127779033183264, "grad_norm": 1.5830868722612712, "learning_rate": 9.82146186104208e-07, "loss": 0.8212, "step": 6488 }, { "epoch": 0.11279528585582924, "grad_norm": 2.5839111038522895, "learning_rate": 9.821387302700252e-07, "loss": 0.3685, "step": 6489 }, { "epoch": 0.11281266839333205, "grad_norm": 1.8361495753311006, "learning_rate": 9.821312729076841e-07, "loss": 0.2603, "step": 6490 }, { "epoch": 0.11283005093083488, "grad_norm": 1.4913353051773237, "learning_rate": 9.821238140172084e-07, "loss": 0.3148, "step": 6491 }, { "epoch": 0.11284743346833771, "grad_norm": 1.6483060371435299, "learning_rate": 9.821163535986215e-07, "loss": 0.4896, "step": 6492 }, { "epoch": 0.11286481600584053, "grad_norm": 1.0795012539897784, "learning_rate": 9.821088916519473e-07, "loss": 0.3399, "step": 6493 }, { "epoch": 0.11288219854334336, "grad_norm": 1.820389870959875, "learning_rate": 9.821014281772094e-07, "loss": 0.2709, "step": 6494 }, { "epoch": 0.11289958108084618, "grad_norm": 2.028365135906033, "learning_rate": 9.820939631744315e-07, "loss": 0.4742, "step": 6495 }, { "epoch": 0.112916963618349, "grad_norm": 1.800907022985299, "learning_rate": 9.82086496643637e-07, "loss": 0.4901, "step": 6496 }, { "epoch": 0.11293434615585184, "grad_norm": 1.600668152647894, "learning_rate": 9.8207902858485e-07, "loss": 0.4548, "step": 6497 }, { "epoch": 0.11295172869335465, "grad_norm": 1.9875142765047662, "learning_rate": 9.82071558998094e-07, "loss": 0.3724, "step": 6498 }, { "epoch": 0.11296911123085748, "grad_norm": 1.6909558202366157, "learning_rate": 9.820640878833922e-07, "loss": 0.4482, "step": 6499 }, { "epoch": 0.1129864937683603, "grad_norm": 4.169804662601314, "learning_rate": 9.82056615240769e-07, "loss": 0.4568, "step": 6500 }, { "epoch": 0.11300387630586313, "grad_norm": 1.9312920691749897, "learning_rate": 9.820491410702476e-07, "loss": 0.3872, "step": 6501 }, { "epoch": 0.11302125884336596, "grad_norm": 2.4592349433855554, "learning_rate": 9.82041665371852e-07, "loss": 0.3165, "step": 6502 }, { "epoch": 0.11303864138086878, "grad_norm": 1.1703296055186212, "learning_rate": 9.820341881456056e-07, "loss": 0.2812, "step": 6503 }, { "epoch": 0.1130560239183716, "grad_norm": 2.316962659217738, "learning_rate": 9.820267093915324e-07, "loss": 0.3155, "step": 6504 }, { "epoch": 0.11307340645587442, "grad_norm": 2.5902297038251434, "learning_rate": 9.820192291096559e-07, "loss": 0.2843, "step": 6505 }, { "epoch": 0.11309078899337725, "grad_norm": 2.7292970721610135, "learning_rate": 9.820117472999998e-07, "loss": 0.6678, "step": 6506 }, { "epoch": 0.11310817153088008, "grad_norm": 2.4609959273300053, "learning_rate": 9.820042639625881e-07, "loss": 0.5713, "step": 6507 }, { "epoch": 0.1131255540683829, "grad_norm": 2.497971319885199, "learning_rate": 9.81996779097444e-07, "loss": 0.3636, "step": 6508 }, { "epoch": 0.11314293660588573, "grad_norm": 2.3076552998162962, "learning_rate": 9.819892927045916e-07, "loss": 0.3715, "step": 6509 }, { "epoch": 0.11316031914338855, "grad_norm": 1.6430532145711616, "learning_rate": 9.819818047840547e-07, "loss": 0.3798, "step": 6510 }, { "epoch": 0.11317770168089138, "grad_norm": 2.034032539027312, "learning_rate": 9.819743153358568e-07, "loss": 0.4143, "step": 6511 }, { "epoch": 0.1131950842183942, "grad_norm": 4.812902379635672, "learning_rate": 9.819668243600218e-07, "loss": 0.222, "step": 6512 }, { "epoch": 0.11321246675589702, "grad_norm": 1.9144857699125843, "learning_rate": 9.819593318565733e-07, "loss": 0.2901, "step": 6513 }, { "epoch": 0.11322984929339985, "grad_norm": 1.7659389775311387, "learning_rate": 9.81951837825535e-07, "loss": 0.2097, "step": 6514 }, { "epoch": 0.11324723183090267, "grad_norm": 2.022247978790329, "learning_rate": 9.81944342266931e-07, "loss": 0.4674, "step": 6515 }, { "epoch": 0.1132646143684055, "grad_norm": 1.8337591874130126, "learning_rate": 9.819368451807846e-07, "loss": 0.2189, "step": 6516 }, { "epoch": 0.11328199690590833, "grad_norm": 1.845331739147252, "learning_rate": 9.8192934656712e-07, "loss": 0.7036, "step": 6517 }, { "epoch": 0.11329937944341115, "grad_norm": 1.8901382748783209, "learning_rate": 9.819218464259605e-07, "loss": 0.3676, "step": 6518 }, { "epoch": 0.11331676198091398, "grad_norm": 1.9212838242045345, "learning_rate": 9.8191434475733e-07, "loss": 0.4929, "step": 6519 }, { "epoch": 0.1133341445184168, "grad_norm": 3.3875238640504457, "learning_rate": 9.819068415612527e-07, "loss": 0.4335, "step": 6520 }, { "epoch": 0.11335152705591962, "grad_norm": 2.497079674332784, "learning_rate": 9.818993368377522e-07, "loss": 0.6135, "step": 6521 }, { "epoch": 0.11336890959342245, "grad_norm": 2.4063221850394982, "learning_rate": 9.81891830586852e-07, "loss": 0.3826, "step": 6522 }, { "epoch": 0.11338629213092527, "grad_norm": 1.5920202518621058, "learning_rate": 9.818843228085761e-07, "loss": 0.4046, "step": 6523 }, { "epoch": 0.1134036746684281, "grad_norm": 3.037856218346681, "learning_rate": 9.81876813502948e-07, "loss": 0.6016, "step": 6524 }, { "epoch": 0.11342105720593092, "grad_norm": 1.9174866941098967, "learning_rate": 9.81869302669992e-07, "loss": 0.5436, "step": 6525 }, { "epoch": 0.11343843974343375, "grad_norm": 1.5569586890573515, "learning_rate": 9.818617903097317e-07, "loss": 0.3492, "step": 6526 }, { "epoch": 0.11345582228093658, "grad_norm": 3.29167937770978, "learning_rate": 9.818542764221909e-07, "loss": 0.455, "step": 6527 }, { "epoch": 0.1134732048184394, "grad_norm": 1.4593935969037308, "learning_rate": 9.81846761007393e-07, "loss": 0.5484, "step": 6528 }, { "epoch": 0.11349058735594222, "grad_norm": 2.1361788955480887, "learning_rate": 9.818392440653627e-07, "loss": 0.544, "step": 6529 }, { "epoch": 0.11350796989344504, "grad_norm": 1.5547927467488207, "learning_rate": 9.818317255961232e-07, "loss": 0.3875, "step": 6530 }, { "epoch": 0.11352535243094787, "grad_norm": 2.816839959318846, "learning_rate": 9.818242055996984e-07, "loss": 0.4382, "step": 6531 }, { "epoch": 0.1135427349684507, "grad_norm": 1.3636534062045986, "learning_rate": 9.81816684076112e-07, "loss": 0.4027, "step": 6532 }, { "epoch": 0.11356011750595352, "grad_norm": 8.04480232643782, "learning_rate": 9.818091610253883e-07, "loss": 1.24, "step": 6533 }, { "epoch": 0.11357750004345635, "grad_norm": 1.8654338057483046, "learning_rate": 9.81801636447551e-07, "loss": 0.3461, "step": 6534 }, { "epoch": 0.11359488258095916, "grad_norm": 1.530857641735809, "learning_rate": 9.817941103426234e-07, "loss": 0.3079, "step": 6535 }, { "epoch": 0.113612265118462, "grad_norm": 2.2306736411737424, "learning_rate": 9.817865827106302e-07, "loss": 0.3018, "step": 6536 }, { "epoch": 0.11362964765596482, "grad_norm": 2.461153677751085, "learning_rate": 9.817790535515946e-07, "loss": 0.4225, "step": 6537 }, { "epoch": 0.11364703019346764, "grad_norm": 2.1788479582943783, "learning_rate": 9.81771522865541e-07, "loss": 0.4443, "step": 6538 }, { "epoch": 0.11366441273097047, "grad_norm": 1.5230352049470144, "learning_rate": 9.817639906524927e-07, "loss": 0.3305, "step": 6539 }, { "epoch": 0.11368179526847329, "grad_norm": 2.3524273217166156, "learning_rate": 9.81756456912474e-07, "loss": 0.6464, "step": 6540 }, { "epoch": 0.11369917780597612, "grad_norm": 1.9847267601219514, "learning_rate": 9.817489216455085e-07, "loss": 0.3686, "step": 6541 }, { "epoch": 0.11371656034347895, "grad_norm": 2.0155682856373445, "learning_rate": 9.817413848516203e-07, "loss": 0.3041, "step": 6542 }, { "epoch": 0.11373394288098176, "grad_norm": 1.0224547247674416, "learning_rate": 9.817338465308332e-07, "loss": 0.314, "step": 6543 }, { "epoch": 0.1137513254184846, "grad_norm": 3.042492253159642, "learning_rate": 9.817263066831712e-07, "loss": 0.9219, "step": 6544 }, { "epoch": 0.11376870795598741, "grad_norm": 2.6177401373981066, "learning_rate": 9.817187653086578e-07, "loss": 0.3712, "step": 6545 }, { "epoch": 0.11378609049349024, "grad_norm": 2.149854844788398, "learning_rate": 9.817112224073175e-07, "loss": 0.4089, "step": 6546 }, { "epoch": 0.11380347303099306, "grad_norm": 1.8987803579294216, "learning_rate": 9.817036779791739e-07, "loss": 0.2845, "step": 6547 }, { "epoch": 0.11382085556849589, "grad_norm": 4.839340704126054, "learning_rate": 9.816961320242508e-07, "loss": 0.8514, "step": 6548 }, { "epoch": 0.11383823810599872, "grad_norm": 4.218327925762207, "learning_rate": 9.81688584542572e-07, "loss": 0.3727, "step": 6549 }, { "epoch": 0.11385562064350153, "grad_norm": 1.3386606370390137, "learning_rate": 9.81681035534162e-07, "loss": 0.6171, "step": 6550 }, { "epoch": 0.11387300318100436, "grad_norm": 1.4387263879179177, "learning_rate": 9.816734849990442e-07, "loss": 0.4629, "step": 6551 }, { "epoch": 0.11389038571850718, "grad_norm": 1.8851670151732811, "learning_rate": 9.81665932937243e-07, "loss": 0.4023, "step": 6552 }, { "epoch": 0.11390776825601001, "grad_norm": 2.6419022803704824, "learning_rate": 9.816583793487817e-07, "loss": 0.6586, "step": 6553 }, { "epoch": 0.11392515079351284, "grad_norm": 5.606277731367553, "learning_rate": 9.816508242336847e-07, "loss": 0.4962, "step": 6554 }, { "epoch": 0.11394253333101566, "grad_norm": 1.9760635853989739, "learning_rate": 9.816432675919759e-07, "loss": 0.6445, "step": 6555 }, { "epoch": 0.11395991586851849, "grad_norm": 2.4724080683694982, "learning_rate": 9.816357094236792e-07, "loss": 0.5352, "step": 6556 }, { "epoch": 0.1139772984060213, "grad_norm": 3.631645125624278, "learning_rate": 9.816281497288185e-07, "loss": 0.8178, "step": 6557 }, { "epoch": 0.11399468094352413, "grad_norm": 1.743046786674117, "learning_rate": 9.816205885074178e-07, "loss": 0.3712, "step": 6558 }, { "epoch": 0.11401206348102697, "grad_norm": 7.007525281225346, "learning_rate": 9.816130257595012e-07, "loss": 0.4272, "step": 6559 }, { "epoch": 0.11402944601852978, "grad_norm": 1.6550289945604288, "learning_rate": 9.816054614850924e-07, "loss": 0.7901, "step": 6560 }, { "epoch": 0.11404682855603261, "grad_norm": 1.791483297323491, "learning_rate": 9.815978956842156e-07, "loss": 0.5043, "step": 6561 }, { "epoch": 0.11406421109353543, "grad_norm": 2.147381377145588, "learning_rate": 9.815903283568945e-07, "loss": 0.5581, "step": 6562 }, { "epoch": 0.11408159363103826, "grad_norm": 1.9704440312482978, "learning_rate": 9.815827595031535e-07, "loss": 0.4556, "step": 6563 }, { "epoch": 0.11409897616854109, "grad_norm": 1.8388504946590922, "learning_rate": 9.815751891230164e-07, "loss": 0.3949, "step": 6564 }, { "epoch": 0.1141163587060439, "grad_norm": 1.948151682257408, "learning_rate": 9.815676172165071e-07, "loss": 0.2499, "step": 6565 }, { "epoch": 0.11413374124354674, "grad_norm": 1.143148920272568, "learning_rate": 9.815600437836495e-07, "loss": 0.2459, "step": 6566 }, { "epoch": 0.11415112378104955, "grad_norm": 1.1991040955671755, "learning_rate": 9.81552468824468e-07, "loss": 0.3561, "step": 6567 }, { "epoch": 0.11416850631855238, "grad_norm": 2.5108282506087263, "learning_rate": 9.815448923389862e-07, "loss": 0.7031, "step": 6568 }, { "epoch": 0.11418588885605521, "grad_norm": 2.143729960184284, "learning_rate": 9.815373143272286e-07, "loss": 0.429, "step": 6569 }, { "epoch": 0.11420327139355803, "grad_norm": 2.236423335088483, "learning_rate": 9.815297347892186e-07, "loss": 0.5043, "step": 6570 }, { "epoch": 0.11422065393106086, "grad_norm": 1.5439733978886978, "learning_rate": 9.815221537249806e-07, "loss": 0.297, "step": 6571 }, { "epoch": 0.11423803646856368, "grad_norm": 3.659981080923676, "learning_rate": 9.815145711345388e-07, "loss": 0.3642, "step": 6572 }, { "epoch": 0.1142554190060665, "grad_norm": 1.6016031651001899, "learning_rate": 9.815069870179167e-07, "loss": 0.3443, "step": 6573 }, { "epoch": 0.11427280154356934, "grad_norm": 2.710234947629654, "learning_rate": 9.814994013751387e-07, "loss": 0.4188, "step": 6574 }, { "epoch": 0.11429018408107215, "grad_norm": 1.9301207525713935, "learning_rate": 9.814918142062288e-07, "loss": 0.7016, "step": 6575 }, { "epoch": 0.11430756661857498, "grad_norm": 1.489631974948898, "learning_rate": 9.814842255112112e-07, "loss": 0.605, "step": 6576 }, { "epoch": 0.1143249491560778, "grad_norm": 1.420979853124238, "learning_rate": 9.814766352901095e-07, "loss": 0.2414, "step": 6577 }, { "epoch": 0.11434233169358063, "grad_norm": 2.0796049878592693, "learning_rate": 9.814690435429482e-07, "loss": 0.1974, "step": 6578 }, { "epoch": 0.11435971423108346, "grad_norm": 2.5310168551108263, "learning_rate": 9.814614502697512e-07, "loss": 0.2659, "step": 6579 }, { "epoch": 0.11437709676858628, "grad_norm": 1.628552316380338, "learning_rate": 9.814538554705425e-07, "loss": 0.2602, "step": 6580 }, { "epoch": 0.1143944793060891, "grad_norm": 1.8457900545696122, "learning_rate": 9.814462591453462e-07, "loss": 0.4426, "step": 6581 }, { "epoch": 0.11441186184359192, "grad_norm": 1.7096113035440785, "learning_rate": 9.814386612941868e-07, "loss": 0.5441, "step": 6582 }, { "epoch": 0.11442924438109475, "grad_norm": 1.4195648427572858, "learning_rate": 9.814310619170876e-07, "loss": 0.4236, "step": 6583 }, { "epoch": 0.11444662691859758, "grad_norm": 1.2774758424441024, "learning_rate": 9.814234610140732e-07, "loss": 0.3749, "step": 6584 }, { "epoch": 0.1144640094561004, "grad_norm": 2.1474648062774193, "learning_rate": 9.814158585851676e-07, "loss": 0.347, "step": 6585 }, { "epoch": 0.11448139199360323, "grad_norm": 1.9069468154123161, "learning_rate": 9.814082546303949e-07, "loss": 0.2789, "step": 6586 }, { "epoch": 0.11449877453110605, "grad_norm": 2.1903575471534618, "learning_rate": 9.814006491497789e-07, "loss": 0.486, "step": 6587 }, { "epoch": 0.11451615706860888, "grad_norm": 3.470700936970508, "learning_rate": 9.813930421433442e-07, "loss": 0.3025, "step": 6588 }, { "epoch": 0.1145335396061117, "grad_norm": 1.7245854897072146, "learning_rate": 9.813854336111146e-07, "loss": 0.6138, "step": 6589 }, { "epoch": 0.11455092214361452, "grad_norm": 1.5104352518097648, "learning_rate": 9.813778235531146e-07, "loss": 0.5153, "step": 6590 }, { "epoch": 0.11456830468111735, "grad_norm": 2.2532631705756594, "learning_rate": 9.813702119693677e-07, "loss": 0.6193, "step": 6591 }, { "epoch": 0.11458568721862017, "grad_norm": 1.9046502677525017, "learning_rate": 9.813625988598984e-07, "loss": 0.6364, "step": 6592 }, { "epoch": 0.114603069756123, "grad_norm": 2.129760537399996, "learning_rate": 9.81354984224731e-07, "loss": 0.5227, "step": 6593 }, { "epoch": 0.11462045229362583, "grad_norm": 1.869906366296601, "learning_rate": 9.813473680638893e-07, "loss": 0.4494, "step": 6594 }, { "epoch": 0.11463783483112865, "grad_norm": 1.5554892885736216, "learning_rate": 9.813397503773975e-07, "loss": 0.4674, "step": 6595 }, { "epoch": 0.11465521736863148, "grad_norm": 1.8692909384749756, "learning_rate": 9.813321311652799e-07, "loss": 0.3359, "step": 6596 }, { "epoch": 0.11467259990613429, "grad_norm": 3.0077671539064457, "learning_rate": 9.813245104275602e-07, "loss": 0.5008, "step": 6597 }, { "epoch": 0.11468998244363712, "grad_norm": 1.9334694641610803, "learning_rate": 9.813168881642633e-07, "loss": 0.3196, "step": 6598 }, { "epoch": 0.11470736498113995, "grad_norm": 2.451979360740162, "learning_rate": 9.813092643754128e-07, "loss": 0.4059, "step": 6599 }, { "epoch": 0.11472474751864277, "grad_norm": 2.1486336189512127, "learning_rate": 9.813016390610331e-07, "loss": 0.4201, "step": 6600 }, { "epoch": 0.1147421300561456, "grad_norm": 11.173374732624481, "learning_rate": 9.812940122211481e-07, "loss": 0.3931, "step": 6601 }, { "epoch": 0.11475951259364842, "grad_norm": 1.8672642934997339, "learning_rate": 9.812863838557825e-07, "loss": 0.393, "step": 6602 }, { "epoch": 0.11477689513115125, "grad_norm": 3.2017472970851837, "learning_rate": 9.8127875396496e-07, "loss": 0.6196, "step": 6603 }, { "epoch": 0.11479427766865408, "grad_norm": 2.6267871664542537, "learning_rate": 9.812711225487049e-07, "loss": 0.3629, "step": 6604 }, { "epoch": 0.11481166020615689, "grad_norm": 2.174821728375513, "learning_rate": 9.812634896070412e-07, "loss": 0.8148, "step": 6605 }, { "epoch": 0.11482904274365972, "grad_norm": 1.8860482028353047, "learning_rate": 9.812558551399937e-07, "loss": 0.5111, "step": 6606 }, { "epoch": 0.11484642528116254, "grad_norm": 2.1582459514159553, "learning_rate": 9.81248219147586e-07, "loss": 0.6144, "step": 6607 }, { "epoch": 0.11486380781866537, "grad_norm": 1.6054184589301277, "learning_rate": 9.812405816298426e-07, "loss": 0.2557, "step": 6608 }, { "epoch": 0.1148811903561682, "grad_norm": 2.1989618158957844, "learning_rate": 9.812329425867877e-07, "loss": 0.649, "step": 6609 }, { "epoch": 0.11489857289367102, "grad_norm": 2.2467106058231976, "learning_rate": 9.812253020184453e-07, "loss": 0.4011, "step": 6610 }, { "epoch": 0.11491595543117385, "grad_norm": 2.216655325794483, "learning_rate": 9.812176599248397e-07, "loss": 0.4104, "step": 6611 }, { "epoch": 0.11493333796867666, "grad_norm": 6.233306415628826, "learning_rate": 9.812100163059952e-07, "loss": 0.2906, "step": 6612 }, { "epoch": 0.1149507205061795, "grad_norm": 2.299765169047521, "learning_rate": 9.81202371161936e-07, "loss": 0.4441, "step": 6613 }, { "epoch": 0.11496810304368231, "grad_norm": 2.2197599543633744, "learning_rate": 9.811947244926864e-07, "loss": 0.542, "step": 6614 }, { "epoch": 0.11498548558118514, "grad_norm": 1.7871101735352282, "learning_rate": 9.811870762982705e-07, "loss": 0.4609, "step": 6615 }, { "epoch": 0.11500286811868797, "grad_norm": 2.404811121845376, "learning_rate": 9.811794265787127e-07, "loss": 0.6716, "step": 6616 }, { "epoch": 0.11502025065619079, "grad_norm": 2.0178552589391705, "learning_rate": 9.811717753340372e-07, "loss": 0.4288, "step": 6617 }, { "epoch": 0.11503763319369362, "grad_norm": 3.0252625207628028, "learning_rate": 9.81164122564268e-07, "loss": 0.2443, "step": 6618 }, { "epoch": 0.11505501573119643, "grad_norm": 1.4182375284718594, "learning_rate": 9.811564682694296e-07, "loss": 0.2727, "step": 6619 }, { "epoch": 0.11507239826869926, "grad_norm": 1.9079399228569434, "learning_rate": 9.811488124495465e-07, "loss": 0.6287, "step": 6620 }, { "epoch": 0.1150897808062021, "grad_norm": 2.8013652895289427, "learning_rate": 9.811411551046421e-07, "loss": 0.6203, "step": 6621 }, { "epoch": 0.11510716334370491, "grad_norm": 1.6683030549483193, "learning_rate": 9.811334962347418e-07, "loss": 0.5078, "step": 6622 }, { "epoch": 0.11512454588120774, "grad_norm": 2.236615560341391, "learning_rate": 9.81125835839869e-07, "loss": 0.6033, "step": 6623 }, { "epoch": 0.11514192841871056, "grad_norm": 1.6654051672854717, "learning_rate": 9.811181739200486e-07, "loss": 0.257, "step": 6624 }, { "epoch": 0.11515931095621339, "grad_norm": 1.694992458237086, "learning_rate": 9.811105104753045e-07, "loss": 0.8826, "step": 6625 }, { "epoch": 0.11517669349371622, "grad_norm": 1.8010404745355792, "learning_rate": 9.81102845505661e-07, "loss": 0.1987, "step": 6626 }, { "epoch": 0.11519407603121903, "grad_norm": 1.844087793355298, "learning_rate": 9.810951790111425e-07, "loss": 0.3132, "step": 6627 }, { "epoch": 0.11521145856872186, "grad_norm": 1.9272097038710627, "learning_rate": 9.810875109917733e-07, "loss": 0.5951, "step": 6628 }, { "epoch": 0.11522884110622468, "grad_norm": 1.7954629166639982, "learning_rate": 9.810798414475778e-07, "loss": 0.2157, "step": 6629 }, { "epoch": 0.11524622364372751, "grad_norm": 2.4035924975215077, "learning_rate": 9.8107217037858e-07, "loss": 0.6097, "step": 6630 }, { "epoch": 0.11526360618123034, "grad_norm": 2.3296438852327537, "learning_rate": 9.810644977848045e-07, "loss": 0.4084, "step": 6631 }, { "epoch": 0.11528098871873316, "grad_norm": 2.1504868301730435, "learning_rate": 9.810568236662755e-07, "loss": 0.4644, "step": 6632 }, { "epoch": 0.11529837125623599, "grad_norm": 1.1012229058425824, "learning_rate": 9.810491480230175e-07, "loss": 0.423, "step": 6633 }, { "epoch": 0.1153157537937388, "grad_norm": 5.735234767036241, "learning_rate": 9.810414708550544e-07, "loss": 0.7048, "step": 6634 }, { "epoch": 0.11533313633124163, "grad_norm": 1.6715651543097472, "learning_rate": 9.81033792162411e-07, "loss": 0.163, "step": 6635 }, { "epoch": 0.11535051886874446, "grad_norm": 2.469361163009409, "learning_rate": 9.810261119451113e-07, "loss": 0.3795, "step": 6636 }, { "epoch": 0.11536790140624728, "grad_norm": 1.7071012711273175, "learning_rate": 9.810184302031799e-07, "loss": 0.4096, "step": 6637 }, { "epoch": 0.11538528394375011, "grad_norm": 1.686538997027468, "learning_rate": 9.81010746936641e-07, "loss": 0.4963, "step": 6638 }, { "epoch": 0.11540266648125293, "grad_norm": 1.1782622412627533, "learning_rate": 9.81003062145519e-07, "loss": 0.4011, "step": 6639 }, { "epoch": 0.11542004901875576, "grad_norm": 1.8553452893640876, "learning_rate": 9.809953758298385e-07, "loss": 0.4999, "step": 6640 }, { "epoch": 0.11543743155625859, "grad_norm": 1.6454064874522591, "learning_rate": 9.809876879896232e-07, "loss": 0.5677, "step": 6641 }, { "epoch": 0.1154548140937614, "grad_norm": 1.7500645191945843, "learning_rate": 9.80979998624898e-07, "loss": 0.5086, "step": 6642 }, { "epoch": 0.11547219663126423, "grad_norm": 1.4926627598860573, "learning_rate": 9.809723077356873e-07, "loss": 0.187, "step": 6643 }, { "epoch": 0.11548957916876705, "grad_norm": 2.2383790110389934, "learning_rate": 9.809646153220153e-07, "loss": 0.7434, "step": 6644 }, { "epoch": 0.11550696170626988, "grad_norm": 1.561374038611371, "learning_rate": 9.809569213839062e-07, "loss": 0.4762, "step": 6645 }, { "epoch": 0.11552434424377271, "grad_norm": 1.6985218307389365, "learning_rate": 9.809492259213848e-07, "loss": 0.2562, "step": 6646 }, { "epoch": 0.11554172678127553, "grad_norm": 1.4613958557004672, "learning_rate": 9.809415289344752e-07, "loss": 0.3853, "step": 6647 }, { "epoch": 0.11555910931877836, "grad_norm": 1.5841489890754314, "learning_rate": 9.809338304232017e-07, "loss": 0.7095, "step": 6648 }, { "epoch": 0.11557649185628117, "grad_norm": 3.8126826654940476, "learning_rate": 9.809261303875893e-07, "loss": 0.7795, "step": 6649 }, { "epoch": 0.115593874393784, "grad_norm": 4.0355225176168, "learning_rate": 9.809184288276617e-07, "loss": 0.6735, "step": 6650 }, { "epoch": 0.11561125693128683, "grad_norm": 1.8767102576587766, "learning_rate": 9.809107257434436e-07, "loss": 0.4617, "step": 6651 }, { "epoch": 0.11562863946878965, "grad_norm": 1.2203605449285042, "learning_rate": 9.809030211349593e-07, "loss": 0.2119, "step": 6652 }, { "epoch": 0.11564602200629248, "grad_norm": 1.7680810838046903, "learning_rate": 9.808953150022337e-07, "loss": 0.5064, "step": 6653 }, { "epoch": 0.1156634045437953, "grad_norm": 2.317299683943478, "learning_rate": 9.808876073452905e-07, "loss": 0.5721, "step": 6654 }, { "epoch": 0.11568078708129813, "grad_norm": 1.907791927497034, "learning_rate": 9.808798981641547e-07, "loss": 0.4846, "step": 6655 }, { "epoch": 0.11569816961880096, "grad_norm": 2.93518953959296, "learning_rate": 9.808721874588505e-07, "loss": 0.2615, "step": 6656 }, { "epoch": 0.11571555215630377, "grad_norm": 1.510743771585858, "learning_rate": 9.808644752294023e-07, "loss": 0.5935, "step": 6657 }, { "epoch": 0.1157329346938066, "grad_norm": 1.2465760428068595, "learning_rate": 9.808567614758347e-07, "loss": 0.4336, "step": 6658 }, { "epoch": 0.11575031723130942, "grad_norm": 1.8181011760401502, "learning_rate": 9.808490461981719e-07, "loss": 0.4027, "step": 6659 }, { "epoch": 0.11576769976881225, "grad_norm": 1.8827814784068428, "learning_rate": 9.808413293964386e-07, "loss": 0.6073, "step": 6660 }, { "epoch": 0.11578508230631508, "grad_norm": 3.249044920410467, "learning_rate": 9.808336110706591e-07, "loss": 0.5127, "step": 6661 }, { "epoch": 0.1158024648438179, "grad_norm": 1.6717944144255303, "learning_rate": 9.80825891220858e-07, "loss": 0.4268, "step": 6662 }, { "epoch": 0.11581984738132073, "grad_norm": 2.3650710231491994, "learning_rate": 9.808181698470596e-07, "loss": 0.543, "step": 6663 }, { "epoch": 0.11583722991882354, "grad_norm": 2.7616583831299555, "learning_rate": 9.808104469492887e-07, "loss": 0.7348, "step": 6664 }, { "epoch": 0.11585461245632638, "grad_norm": 2.5967179615670153, "learning_rate": 9.808027225275693e-07, "loss": 0.4528, "step": 6665 }, { "epoch": 0.1158719949938292, "grad_norm": 2.626399124937922, "learning_rate": 9.807949965819263e-07, "loss": 0.4006, "step": 6666 }, { "epoch": 0.11588937753133202, "grad_norm": 1.7208670165413005, "learning_rate": 9.807872691123838e-07, "loss": 0.6196, "step": 6667 }, { "epoch": 0.11590676006883485, "grad_norm": 2.5636186418761953, "learning_rate": 9.807795401189667e-07, "loss": 0.4379, "step": 6668 }, { "epoch": 0.11592414260633767, "grad_norm": 2.0370895582552615, "learning_rate": 9.807718096016993e-07, "loss": 0.4063, "step": 6669 }, { "epoch": 0.1159415251438405, "grad_norm": 1.4687930491981165, "learning_rate": 9.80764077560606e-07, "loss": 0.3247, "step": 6670 }, { "epoch": 0.11595890768134333, "grad_norm": 2.135960546730365, "learning_rate": 9.807563439957117e-07, "loss": 0.3902, "step": 6671 }, { "epoch": 0.11597629021884615, "grad_norm": 2.179776645093989, "learning_rate": 9.807486089070403e-07, "loss": 0.7485, "step": 6672 }, { "epoch": 0.11599367275634898, "grad_norm": 1.2218736513506612, "learning_rate": 9.80740872294617e-07, "loss": 0.5292, "step": 6673 }, { "epoch": 0.11601105529385179, "grad_norm": 2.1349702306047647, "learning_rate": 9.807331341584656e-07, "loss": 0.2526, "step": 6674 }, { "epoch": 0.11602843783135462, "grad_norm": 2.0267232785076907, "learning_rate": 9.807253944986112e-07, "loss": 0.576, "step": 6675 }, { "epoch": 0.11604582036885745, "grad_norm": 2.3914356618508603, "learning_rate": 9.80717653315078e-07, "loss": 0.3657, "step": 6676 }, { "epoch": 0.11606320290636027, "grad_norm": 1.9125035224526254, "learning_rate": 9.807099106078907e-07, "loss": 0.391, "step": 6677 }, { "epoch": 0.1160805854438631, "grad_norm": 3.1563723038880616, "learning_rate": 9.807021663770739e-07, "loss": 0.4595, "step": 6678 }, { "epoch": 0.11609796798136592, "grad_norm": 2.4007081577026295, "learning_rate": 9.806944206226519e-07, "loss": 0.6982, "step": 6679 }, { "epoch": 0.11611535051886875, "grad_norm": 3.525368242970252, "learning_rate": 9.806866733446495e-07, "loss": 0.4291, "step": 6680 }, { "epoch": 0.11613273305637158, "grad_norm": 2.2434593122130067, "learning_rate": 9.80678924543091e-07, "loss": 0.6037, "step": 6681 }, { "epoch": 0.11615011559387439, "grad_norm": 1.4540041529360326, "learning_rate": 9.806711742180013e-07, "loss": 0.4902, "step": 6682 }, { "epoch": 0.11616749813137722, "grad_norm": 2.0516807151891765, "learning_rate": 9.80663422369405e-07, "loss": 0.4879, "step": 6683 }, { "epoch": 0.11618488066888004, "grad_norm": 1.2845620641016289, "learning_rate": 9.80655668997326e-07, "loss": 0.4129, "step": 6684 }, { "epoch": 0.11620226320638287, "grad_norm": 1.704613620439214, "learning_rate": 9.806479141017896e-07, "loss": 0.4535, "step": 6685 }, { "epoch": 0.11621964574388569, "grad_norm": 2.163749233069887, "learning_rate": 9.8064015768282e-07, "loss": 0.7273, "step": 6686 }, { "epoch": 0.11623702828138852, "grad_norm": 1.5353257786527816, "learning_rate": 9.806323997404418e-07, "loss": 0.2806, "step": 6687 }, { "epoch": 0.11625441081889135, "grad_norm": 1.648338930184029, "learning_rate": 9.806246402746799e-07, "loss": 0.2849, "step": 6688 }, { "epoch": 0.11627179335639416, "grad_norm": 2.194205603661416, "learning_rate": 9.806168792855584e-07, "loss": 0.7259, "step": 6689 }, { "epoch": 0.11628917589389699, "grad_norm": 1.4954159417114834, "learning_rate": 9.806091167731024e-07, "loss": 0.3072, "step": 6690 }, { "epoch": 0.11630655843139981, "grad_norm": 1.6454288457607726, "learning_rate": 9.806013527373363e-07, "loss": 0.5773, "step": 6691 }, { "epoch": 0.11632394096890264, "grad_norm": 1.458059322868636, "learning_rate": 9.805935871782845e-07, "loss": 0.4581, "step": 6692 }, { "epoch": 0.11634132350640547, "grad_norm": 1.5006800766451491, "learning_rate": 9.805858200959718e-07, "loss": 0.4894, "step": 6693 }, { "epoch": 0.11635870604390829, "grad_norm": 1.9795489476667403, "learning_rate": 9.805780514904228e-07, "loss": 0.3914, "step": 6694 }, { "epoch": 0.11637608858141112, "grad_norm": 1.6543356532783713, "learning_rate": 9.805702813616624e-07, "loss": 0.2327, "step": 6695 }, { "epoch": 0.11639347111891393, "grad_norm": 2.4155040818415894, "learning_rate": 9.805625097097148e-07, "loss": 0.3409, "step": 6696 }, { "epoch": 0.11641085365641676, "grad_norm": 1.661859388875182, "learning_rate": 9.805547365346047e-07, "loss": 0.3549, "step": 6697 }, { "epoch": 0.1164282361939196, "grad_norm": 2.249745976212543, "learning_rate": 9.80546961836357e-07, "loss": 0.3403, "step": 6698 }, { "epoch": 0.11644561873142241, "grad_norm": 4.4734075380584315, "learning_rate": 9.80539185614996e-07, "loss": 0.3976, "step": 6699 }, { "epoch": 0.11646300126892524, "grad_norm": 1.2406826184103434, "learning_rate": 9.805314078705467e-07, "loss": 0.4453, "step": 6700 }, { "epoch": 0.11648038380642806, "grad_norm": 1.8216704325302946, "learning_rate": 9.805236286030334e-07, "loss": 0.3827, "step": 6701 }, { "epoch": 0.11649776634393089, "grad_norm": 1.6545599134460172, "learning_rate": 9.805158478124812e-07, "loss": 0.204, "step": 6702 }, { "epoch": 0.11651514888143372, "grad_norm": 1.5408108969495191, "learning_rate": 9.805080654989142e-07, "loss": 0.4878, "step": 6703 }, { "epoch": 0.11653253141893653, "grad_norm": 1.426597672782624, "learning_rate": 9.805002816623576e-07, "loss": 0.6072, "step": 6704 }, { "epoch": 0.11654991395643936, "grad_norm": 2.0137583340709235, "learning_rate": 9.804924963028355e-07, "loss": 0.5978, "step": 6705 }, { "epoch": 0.11656729649394218, "grad_norm": 2.559098103260515, "learning_rate": 9.804847094203732e-07, "loss": 0.4491, "step": 6706 }, { "epoch": 0.11658467903144501, "grad_norm": 1.843585798687697, "learning_rate": 9.80476921014995e-07, "loss": 0.6904, "step": 6707 }, { "epoch": 0.11660206156894784, "grad_norm": 2.529330548321002, "learning_rate": 9.804691310867257e-07, "loss": 0.4545, "step": 6708 }, { "epoch": 0.11661944410645066, "grad_norm": 2.237395633696412, "learning_rate": 9.804613396355898e-07, "loss": 0.4012, "step": 6709 }, { "epoch": 0.11663682664395349, "grad_norm": 2.4672706027080995, "learning_rate": 9.804535466616124e-07, "loss": 0.5811, "step": 6710 }, { "epoch": 0.1166542091814563, "grad_norm": 1.4229175566022374, "learning_rate": 9.80445752164818e-07, "loss": 0.4854, "step": 6711 }, { "epoch": 0.11667159171895913, "grad_norm": 1.7178662650811913, "learning_rate": 9.80437956145231e-07, "loss": 0.3306, "step": 6712 }, { "epoch": 0.11668897425646196, "grad_norm": 3.170980269838943, "learning_rate": 9.804301586028764e-07, "loss": 0.6187, "step": 6713 }, { "epoch": 0.11670635679396478, "grad_norm": 1.536510795164472, "learning_rate": 9.804223595377792e-07, "loss": 0.5387, "step": 6714 }, { "epoch": 0.11672373933146761, "grad_norm": 1.4243512098648738, "learning_rate": 9.804145589499636e-07, "loss": 0.4151, "step": 6715 }, { "epoch": 0.11674112186897043, "grad_norm": 1.457461363041781, "learning_rate": 9.804067568394545e-07, "loss": 0.3168, "step": 6716 }, { "epoch": 0.11675850440647326, "grad_norm": 2.8952944809183774, "learning_rate": 9.803989532062765e-07, "loss": 0.3304, "step": 6717 }, { "epoch": 0.11677588694397609, "grad_norm": 2.5817975899349688, "learning_rate": 9.803911480504548e-07, "loss": 0.6965, "step": 6718 }, { "epoch": 0.1167932694814789, "grad_norm": 3.324036099291525, "learning_rate": 9.803833413720137e-07, "loss": 0.6643, "step": 6719 }, { "epoch": 0.11681065201898173, "grad_norm": 3.327366852902916, "learning_rate": 9.80375533170978e-07, "loss": 0.5199, "step": 6720 }, { "epoch": 0.11682803455648455, "grad_norm": 1.6112488916533474, "learning_rate": 9.803677234473726e-07, "loss": 0.4995, "step": 6721 }, { "epoch": 0.11684541709398738, "grad_norm": 1.5457609916069515, "learning_rate": 9.803599122012222e-07, "loss": 0.3946, "step": 6722 }, { "epoch": 0.11686279963149021, "grad_norm": 1.1899376826108177, "learning_rate": 9.803520994325515e-07, "loss": 0.2686, "step": 6723 }, { "epoch": 0.11688018216899303, "grad_norm": 1.4942491763281567, "learning_rate": 9.803442851413855e-07, "loss": 0.567, "step": 6724 }, { "epoch": 0.11689756470649586, "grad_norm": 1.6501836833825227, "learning_rate": 9.803364693277484e-07, "loss": 0.2881, "step": 6725 }, { "epoch": 0.11691494724399867, "grad_norm": 1.775950363370894, "learning_rate": 9.803286519916657e-07, "loss": 0.2549, "step": 6726 }, { "epoch": 0.1169323297815015, "grad_norm": 1.3647588649176807, "learning_rate": 9.803208331331617e-07, "loss": 0.2348, "step": 6727 }, { "epoch": 0.11694971231900433, "grad_norm": 2.068909807378093, "learning_rate": 9.803130127522611e-07, "loss": 0.2418, "step": 6728 }, { "epoch": 0.11696709485650715, "grad_norm": 2.007811132589033, "learning_rate": 9.803051908489893e-07, "loss": 0.5176, "step": 6729 }, { "epoch": 0.11698447739400998, "grad_norm": 2.1415116186054943, "learning_rate": 9.802973674233704e-07, "loss": 0.432, "step": 6730 }, { "epoch": 0.1170018599315128, "grad_norm": 1.9144869585212732, "learning_rate": 9.802895424754294e-07, "loss": 0.3346, "step": 6731 }, { "epoch": 0.11701924246901563, "grad_norm": 1.6185355085362314, "learning_rate": 9.802817160051914e-07, "loss": 0.4471, "step": 6732 }, { "epoch": 0.11703662500651846, "grad_norm": 5.832002971936758, "learning_rate": 9.802738880126808e-07, "loss": 0.3163, "step": 6733 }, { "epoch": 0.11705400754402127, "grad_norm": 3.108487649670022, "learning_rate": 9.802660584979227e-07, "loss": 0.4221, "step": 6734 }, { "epoch": 0.1170713900815241, "grad_norm": 2.5373747591992344, "learning_rate": 9.802582274609417e-07, "loss": 0.2765, "step": 6735 }, { "epoch": 0.11708877261902692, "grad_norm": 2.0227589255774703, "learning_rate": 9.80250394901763e-07, "loss": 0.7008, "step": 6736 }, { "epoch": 0.11710615515652975, "grad_norm": 1.7439046648238772, "learning_rate": 9.802425608204109e-07, "loss": 0.4707, "step": 6737 }, { "epoch": 0.11712353769403258, "grad_norm": 2.810577081466847, "learning_rate": 9.802347252169106e-07, "loss": 0.5099, "step": 6738 }, { "epoch": 0.1171409202315354, "grad_norm": 2.3035651915548305, "learning_rate": 9.802268880912867e-07, "loss": 0.513, "step": 6739 }, { "epoch": 0.11715830276903823, "grad_norm": 1.7339887260575413, "learning_rate": 9.802190494435642e-07, "loss": 0.5024, "step": 6740 }, { "epoch": 0.11717568530654104, "grad_norm": 2.2718208898321697, "learning_rate": 9.80211209273768e-07, "loss": 0.5091, "step": 6741 }, { "epoch": 0.11719306784404387, "grad_norm": 1.5413602413362193, "learning_rate": 9.802033675819228e-07, "loss": 0.3687, "step": 6742 }, { "epoch": 0.1172104503815467, "grad_norm": 1.5324507597954915, "learning_rate": 9.801955243680534e-07, "loss": 0.5319, "step": 6743 }, { "epoch": 0.11722783291904952, "grad_norm": 1.3226100692705176, "learning_rate": 9.801876796321848e-07, "loss": 0.2235, "step": 6744 }, { "epoch": 0.11724521545655235, "grad_norm": 1.0856458446009296, "learning_rate": 9.801798333743421e-07, "loss": 0.3347, "step": 6745 }, { "epoch": 0.11726259799405517, "grad_norm": 2.282916015649968, "learning_rate": 9.801719855945497e-07, "loss": 0.5302, "step": 6746 }, { "epoch": 0.117279980531558, "grad_norm": 3.595882873997263, "learning_rate": 9.801641362928327e-07, "loss": 0.4185, "step": 6747 }, { "epoch": 0.11729736306906083, "grad_norm": 3.0980437596374975, "learning_rate": 9.801562854692159e-07, "loss": 0.4826, "step": 6748 }, { "epoch": 0.11731474560656364, "grad_norm": 2.3629362318616787, "learning_rate": 9.801484331237244e-07, "loss": 0.2598, "step": 6749 }, { "epoch": 0.11733212814406647, "grad_norm": 2.129931605126574, "learning_rate": 9.801405792563828e-07, "loss": 0.5735, "step": 6750 }, { "epoch": 0.11734951068156929, "grad_norm": 2.1481574370622476, "learning_rate": 9.801327238672162e-07, "loss": 0.347, "step": 6751 }, { "epoch": 0.11736689321907212, "grad_norm": 1.3693916882084516, "learning_rate": 9.801248669562493e-07, "loss": 0.3293, "step": 6752 }, { "epoch": 0.11738427575657495, "grad_norm": 1.6645777006407962, "learning_rate": 9.801170085235072e-07, "loss": 0.4828, "step": 6753 }, { "epoch": 0.11740165829407777, "grad_norm": 2.2008734003035957, "learning_rate": 9.801091485690148e-07, "loss": 0.5927, "step": 6754 }, { "epoch": 0.1174190408315806, "grad_norm": 1.9184051377979325, "learning_rate": 9.801012870927968e-07, "loss": 0.6149, "step": 6755 }, { "epoch": 0.11743642336908341, "grad_norm": 1.583292340339884, "learning_rate": 9.800934240948783e-07, "loss": 0.5386, "step": 6756 }, { "epoch": 0.11745380590658624, "grad_norm": 1.4526769433274438, "learning_rate": 9.800855595752842e-07, "loss": 0.2323, "step": 6757 }, { "epoch": 0.11747118844408906, "grad_norm": 1.507667475284365, "learning_rate": 9.800776935340395e-07, "loss": 0.4035, "step": 6758 }, { "epoch": 0.11748857098159189, "grad_norm": 1.9627728791182124, "learning_rate": 9.800698259711689e-07, "loss": 0.3909, "step": 6759 }, { "epoch": 0.11750595351909472, "grad_norm": 2.761505334814217, "learning_rate": 9.800619568866976e-07, "loss": 0.3274, "step": 6760 }, { "epoch": 0.11752333605659754, "grad_norm": 1.549154525224606, "learning_rate": 9.800540862806504e-07, "loss": 0.8133, "step": 6761 }, { "epoch": 0.11754071859410037, "grad_norm": 1.0658755170523304, "learning_rate": 9.800462141530522e-07, "loss": 0.414, "step": 6762 }, { "epoch": 0.11755810113160318, "grad_norm": 1.8545317413802462, "learning_rate": 9.800383405039282e-07, "loss": 0.4122, "step": 6763 }, { "epoch": 0.11757548366910602, "grad_norm": 2.115883913660761, "learning_rate": 9.800304653333028e-07, "loss": 0.4174, "step": 6764 }, { "epoch": 0.11759286620660885, "grad_norm": 1.0705939981460184, "learning_rate": 9.800225886412015e-07, "loss": 0.1574, "step": 6765 }, { "epoch": 0.11761024874411166, "grad_norm": 2.1757455123652125, "learning_rate": 9.800147104276493e-07, "loss": 0.7143, "step": 6766 }, { "epoch": 0.11762763128161449, "grad_norm": 1.333235127583856, "learning_rate": 9.800068306926707e-07, "loss": 0.2371, "step": 6767 }, { "epoch": 0.11764501381911731, "grad_norm": 3.685608239124641, "learning_rate": 9.79998949436291e-07, "loss": 0.6066, "step": 6768 }, { "epoch": 0.11766239635662014, "grad_norm": 2.3856119702921488, "learning_rate": 9.799910666585352e-07, "loss": 0.845, "step": 6769 }, { "epoch": 0.11767977889412297, "grad_norm": 2.9491094105653755, "learning_rate": 9.799831823594282e-07, "loss": 0.5844, "step": 6770 }, { "epoch": 0.11769716143162579, "grad_norm": 2.0311158835423475, "learning_rate": 9.799752965389951e-07, "loss": 0.6176, "step": 6771 }, { "epoch": 0.11771454396912862, "grad_norm": 1.8190345525567635, "learning_rate": 9.799674091972608e-07, "loss": 0.4385, "step": 6772 }, { "epoch": 0.11773192650663143, "grad_norm": 3.1361854944916434, "learning_rate": 9.7995952033425e-07, "loss": 0.7162, "step": 6773 }, { "epoch": 0.11774930904413426, "grad_norm": 1.531874924147914, "learning_rate": 9.799516299499882e-07, "loss": 0.3894, "step": 6774 }, { "epoch": 0.11776669158163709, "grad_norm": 1.6530679427335406, "learning_rate": 9.799437380445001e-07, "loss": 0.5218, "step": 6775 }, { "epoch": 0.11778407411913991, "grad_norm": 2.087980167936148, "learning_rate": 9.79935844617811e-07, "loss": 0.438, "step": 6776 }, { "epoch": 0.11780145665664274, "grad_norm": 1.2439462058387567, "learning_rate": 9.799279496699454e-07, "loss": 0.4418, "step": 6777 }, { "epoch": 0.11781883919414556, "grad_norm": 2.168609534334564, "learning_rate": 9.79920053200929e-07, "loss": 0.5114, "step": 6778 }, { "epoch": 0.11783622173164839, "grad_norm": 1.9412149779367511, "learning_rate": 9.799121552107866e-07, "loss": 0.5512, "step": 6779 }, { "epoch": 0.11785360426915122, "grad_norm": 1.9735360028172548, "learning_rate": 9.799042556995427e-07, "loss": 0.4356, "step": 6780 }, { "epoch": 0.11787098680665403, "grad_norm": 1.9891873039134296, "learning_rate": 9.79896354667223e-07, "loss": 0.2819, "step": 6781 }, { "epoch": 0.11788836934415686, "grad_norm": 1.8704783608039506, "learning_rate": 9.798884521138522e-07, "loss": 0.5561, "step": 6782 }, { "epoch": 0.11790575188165968, "grad_norm": 2.3396246509035157, "learning_rate": 9.798805480394557e-07, "loss": 0.4551, "step": 6783 }, { "epoch": 0.11792313441916251, "grad_norm": 2.0340016983018456, "learning_rate": 9.79872642444058e-07, "loss": 0.6116, "step": 6784 }, { "epoch": 0.11794051695666534, "grad_norm": 1.4223868316950439, "learning_rate": 9.798647353276844e-07, "loss": 0.3586, "step": 6785 }, { "epoch": 0.11795789949416816, "grad_norm": 1.579329770858145, "learning_rate": 9.798568266903605e-07, "loss": 0.3308, "step": 6786 }, { "epoch": 0.11797528203167099, "grad_norm": 3.0173532334385067, "learning_rate": 9.798489165321105e-07, "loss": 0.5302, "step": 6787 }, { "epoch": 0.1179926645691738, "grad_norm": 2.292722677735815, "learning_rate": 9.798410048529599e-07, "loss": 0.4704, "step": 6788 }, { "epoch": 0.11801004710667663, "grad_norm": 1.0628708685703314, "learning_rate": 9.798330916529339e-07, "loss": 0.2962, "step": 6789 }, { "epoch": 0.11802742964417946, "grad_norm": 2.510642540885461, "learning_rate": 9.79825176932057e-07, "loss": 0.4024, "step": 6790 }, { "epoch": 0.11804481218168228, "grad_norm": 2.847009917258628, "learning_rate": 9.798172606903552e-07, "loss": 0.5238, "step": 6791 }, { "epoch": 0.11806219471918511, "grad_norm": 2.2306566678335216, "learning_rate": 9.79809342927853e-07, "loss": 0.4469, "step": 6792 }, { "epoch": 0.11807957725668793, "grad_norm": 2.1899202552003483, "learning_rate": 9.798014236445752e-07, "loss": 0.5834, "step": 6793 }, { "epoch": 0.11809695979419076, "grad_norm": 2.5752355783051932, "learning_rate": 9.797935028405477e-07, "loss": 0.3214, "step": 6794 }, { "epoch": 0.11811434233169359, "grad_norm": 1.7947266942378535, "learning_rate": 9.797855805157951e-07, "loss": 0.4996, "step": 6795 }, { "epoch": 0.1181317248691964, "grad_norm": 1.4039527409004482, "learning_rate": 9.797776566703427e-07, "loss": 0.4743, "step": 6796 }, { "epoch": 0.11814910740669923, "grad_norm": 2.259315630557208, "learning_rate": 9.797697313042152e-07, "loss": 0.2279, "step": 6797 }, { "epoch": 0.11816648994420205, "grad_norm": 2.0316497669973, "learning_rate": 9.797618044174382e-07, "loss": 0.2949, "step": 6798 }, { "epoch": 0.11818387248170488, "grad_norm": 2.2689264536101126, "learning_rate": 9.797538760100368e-07, "loss": 0.5587, "step": 6799 }, { "epoch": 0.11820125501920771, "grad_norm": 1.2914426593808497, "learning_rate": 9.79745946082036e-07, "loss": 0.7586, "step": 6800 }, { "epoch": 0.11821863755671053, "grad_norm": 2.661158794533654, "learning_rate": 9.797380146334605e-07, "loss": 0.5501, "step": 6801 }, { "epoch": 0.11823602009421336, "grad_norm": 1.4259862367659661, "learning_rate": 9.797300816643363e-07, "loss": 0.4906, "step": 6802 }, { "epoch": 0.11825340263171617, "grad_norm": 1.377338043315693, "learning_rate": 9.79722147174688e-07, "loss": 0.447, "step": 6803 }, { "epoch": 0.118270785169219, "grad_norm": 1.1784670642744828, "learning_rate": 9.79714211164541e-07, "loss": 0.288, "step": 6804 }, { "epoch": 0.11828816770672183, "grad_norm": 1.0575565963255595, "learning_rate": 9.797062736339202e-07, "loss": 0.4697, "step": 6805 }, { "epoch": 0.11830555024422465, "grad_norm": 2.580316916016713, "learning_rate": 9.796983345828506e-07, "loss": 0.4362, "step": 6806 }, { "epoch": 0.11832293278172748, "grad_norm": 2.187265788282521, "learning_rate": 9.79690394011358e-07, "loss": 0.4805, "step": 6807 }, { "epoch": 0.1183403153192303, "grad_norm": 1.2535630389106074, "learning_rate": 9.79682451919467e-07, "loss": 0.2763, "step": 6808 }, { "epoch": 0.11835769785673313, "grad_norm": 1.717699757121755, "learning_rate": 9.796745083072032e-07, "loss": 0.4104, "step": 6809 }, { "epoch": 0.11837508039423596, "grad_norm": 1.674494287020283, "learning_rate": 9.796665631745913e-07, "loss": 0.6681, "step": 6810 }, { "epoch": 0.11839246293173877, "grad_norm": 2.025435394130438, "learning_rate": 9.796586165216568e-07, "loss": 0.3769, "step": 6811 }, { "epoch": 0.1184098454692416, "grad_norm": 1.2723853994044838, "learning_rate": 9.79650668348425e-07, "loss": 0.3821, "step": 6812 }, { "epoch": 0.11842722800674442, "grad_norm": 1.8971598800703278, "learning_rate": 9.796427186549209e-07, "loss": 0.3206, "step": 6813 }, { "epoch": 0.11844461054424725, "grad_norm": 1.6833382309987281, "learning_rate": 9.796347674411694e-07, "loss": 0.4821, "step": 6814 }, { "epoch": 0.11846199308175008, "grad_norm": 1.8919508127272835, "learning_rate": 9.796268147071963e-07, "loss": 0.3891, "step": 6815 }, { "epoch": 0.1184793756192529, "grad_norm": 1.8306317114400104, "learning_rate": 9.796188604530264e-07, "loss": 0.7545, "step": 6816 }, { "epoch": 0.11849675815675573, "grad_norm": 1.4544081703908063, "learning_rate": 9.796109046786848e-07, "loss": 0.4535, "step": 6817 }, { "epoch": 0.11851414069425854, "grad_norm": 1.2766547004184516, "learning_rate": 9.796029473841973e-07, "loss": 0.4425, "step": 6818 }, { "epoch": 0.11853152323176137, "grad_norm": 1.5383186723674929, "learning_rate": 9.795949885695887e-07, "loss": 0.3573, "step": 6819 }, { "epoch": 0.1185489057692642, "grad_norm": 2.049632975016834, "learning_rate": 9.795870282348844e-07, "loss": 0.6011, "step": 6820 }, { "epoch": 0.11856628830676702, "grad_norm": 1.8432094165286366, "learning_rate": 9.795790663801094e-07, "loss": 0.396, "step": 6821 }, { "epoch": 0.11858367084426985, "grad_norm": 3.120995399863738, "learning_rate": 9.795711030052889e-07, "loss": 0.4347, "step": 6822 }, { "epoch": 0.11860105338177267, "grad_norm": 1.527101429199135, "learning_rate": 9.795631381104485e-07, "loss": 0.3538, "step": 6823 }, { "epoch": 0.1186184359192755, "grad_norm": 3.0270878034257587, "learning_rate": 9.795551716956131e-07, "loss": 0.3356, "step": 6824 }, { "epoch": 0.11863581845677831, "grad_norm": 2.24909018894057, "learning_rate": 9.795472037608083e-07, "loss": 0.6215, "step": 6825 }, { "epoch": 0.11865320099428114, "grad_norm": 1.4091908258442405, "learning_rate": 9.79539234306059e-07, "loss": 0.2806, "step": 6826 }, { "epoch": 0.11867058353178397, "grad_norm": 1.8041565605475105, "learning_rate": 9.795312633313905e-07, "loss": 0.6333, "step": 6827 }, { "epoch": 0.11868796606928679, "grad_norm": 2.462303950443492, "learning_rate": 9.795232908368284e-07, "loss": 0.6043, "step": 6828 }, { "epoch": 0.11870534860678962, "grad_norm": 2.4379507119684094, "learning_rate": 9.795153168223978e-07, "loss": 0.4538, "step": 6829 }, { "epoch": 0.11872273114429244, "grad_norm": 3.0460785324072837, "learning_rate": 9.795073412881237e-07, "loss": 0.4386, "step": 6830 }, { "epoch": 0.11874011368179527, "grad_norm": 1.7211259681155953, "learning_rate": 9.794993642340317e-07, "loss": 0.5114, "step": 6831 }, { "epoch": 0.1187574962192981, "grad_norm": 1.9556579193212702, "learning_rate": 9.79491385660147e-07, "loss": 0.3316, "step": 6832 }, { "epoch": 0.11877487875680091, "grad_norm": 1.6199166434167123, "learning_rate": 9.794834055664947e-07, "loss": 0.3439, "step": 6833 }, { "epoch": 0.11879226129430374, "grad_norm": 1.746586959048394, "learning_rate": 9.794754239531005e-07, "loss": 0.5454, "step": 6834 }, { "epoch": 0.11880964383180656, "grad_norm": 2.1771627509455866, "learning_rate": 9.794674408199892e-07, "loss": 0.4673, "step": 6835 }, { "epoch": 0.11882702636930939, "grad_norm": 2.1393669345279367, "learning_rate": 9.794594561671867e-07, "loss": 0.3177, "step": 6836 }, { "epoch": 0.11884440890681222, "grad_norm": 1.728706890456654, "learning_rate": 9.794514699947177e-07, "loss": 0.88, "step": 6837 }, { "epoch": 0.11886179144431504, "grad_norm": 1.5940841576849885, "learning_rate": 9.794434823026079e-07, "loss": 0.6346, "step": 6838 }, { "epoch": 0.11887917398181787, "grad_norm": 1.5464702899269405, "learning_rate": 9.794354930908824e-07, "loss": 0.357, "step": 6839 }, { "epoch": 0.11889655651932068, "grad_norm": 1.5680671163160698, "learning_rate": 9.794275023595667e-07, "loss": 0.21, "step": 6840 }, { "epoch": 0.11891393905682351, "grad_norm": 2.736089986846932, "learning_rate": 9.794195101086861e-07, "loss": 0.4969, "step": 6841 }, { "epoch": 0.11893132159432634, "grad_norm": 2.1982661787033977, "learning_rate": 9.794115163382657e-07, "loss": 0.5767, "step": 6842 }, { "epoch": 0.11894870413182916, "grad_norm": 1.7542821101694137, "learning_rate": 9.794035210483311e-07, "loss": 0.5055, "step": 6843 }, { "epoch": 0.11896608666933199, "grad_norm": 2.0207510849617614, "learning_rate": 9.793955242389077e-07, "loss": 0.3394, "step": 6844 }, { "epoch": 0.11898346920683481, "grad_norm": 1.7132122980223317, "learning_rate": 9.793875259100207e-07, "loss": 0.62, "step": 6845 }, { "epoch": 0.11900085174433764, "grad_norm": 1.7472121530099964, "learning_rate": 9.793795260616953e-07, "loss": 0.4192, "step": 6846 }, { "epoch": 0.11901823428184047, "grad_norm": 1.5186817261782706, "learning_rate": 9.793715246939569e-07, "loss": 0.2248, "step": 6847 }, { "epoch": 0.11903561681934328, "grad_norm": 1.7059449546442846, "learning_rate": 9.793635218068313e-07, "loss": 0.524, "step": 6848 }, { "epoch": 0.11905299935684611, "grad_norm": 2.0843387631730588, "learning_rate": 9.793555174003433e-07, "loss": 0.3718, "step": 6849 }, { "epoch": 0.11907038189434893, "grad_norm": 2.1462028779077924, "learning_rate": 9.793475114745186e-07, "loss": 0.4908, "step": 6850 }, { "epoch": 0.11908776443185176, "grad_norm": 1.9672514447240448, "learning_rate": 9.793395040293823e-07, "loss": 0.8069, "step": 6851 }, { "epoch": 0.11910514696935459, "grad_norm": 3.0382956642352568, "learning_rate": 9.793314950649603e-07, "loss": 0.4328, "step": 6852 }, { "epoch": 0.11912252950685741, "grad_norm": 2.5537198604513023, "learning_rate": 9.793234845812774e-07, "loss": 0.3244, "step": 6853 }, { "epoch": 0.11913991204436024, "grad_norm": 1.231721091316271, "learning_rate": 9.793154725783593e-07, "loss": 0.2486, "step": 6854 }, { "epoch": 0.11915729458186305, "grad_norm": 1.9270435358345759, "learning_rate": 9.793074590562313e-07, "loss": 0.5598, "step": 6855 }, { "epoch": 0.11917467711936588, "grad_norm": 1.6649636783065374, "learning_rate": 9.79299444014919e-07, "loss": 0.508, "step": 6856 }, { "epoch": 0.11919205965686872, "grad_norm": 1.58714285863485, "learning_rate": 9.792914274544474e-07, "loss": 0.5206, "step": 6857 }, { "epoch": 0.11920944219437153, "grad_norm": 2.850097493963906, "learning_rate": 9.792834093748423e-07, "loss": 0.352, "step": 6858 }, { "epoch": 0.11922682473187436, "grad_norm": 2.143368728437653, "learning_rate": 9.792753897761287e-07, "loss": 0.3255, "step": 6859 }, { "epoch": 0.11924420726937718, "grad_norm": 1.6216121090536433, "learning_rate": 9.792673686583325e-07, "loss": 0.8501, "step": 6860 }, { "epoch": 0.11926158980688001, "grad_norm": 1.5307889992987083, "learning_rate": 9.79259346021479e-07, "loss": 0.5847, "step": 6861 }, { "epoch": 0.11927897234438284, "grad_norm": 2.4230220280986585, "learning_rate": 9.792513218655934e-07, "loss": 0.5786, "step": 6862 }, { "epoch": 0.11929635488188566, "grad_norm": 2.043018831932376, "learning_rate": 9.792432961907014e-07, "loss": 0.5068, "step": 6863 }, { "epoch": 0.11931373741938849, "grad_norm": 1.8680275660397159, "learning_rate": 9.792352689968279e-07, "loss": 0.3044, "step": 6864 }, { "epoch": 0.1193311199568913, "grad_norm": 2.065424595972603, "learning_rate": 9.792272402839991e-07, "loss": 0.361, "step": 6865 }, { "epoch": 0.11934850249439413, "grad_norm": 2.721019380727294, "learning_rate": 9.7921921005224e-07, "loss": 0.4473, "step": 6866 }, { "epoch": 0.11936588503189696, "grad_norm": 2.199542688108055, "learning_rate": 9.79211178301576e-07, "loss": 0.5458, "step": 6867 }, { "epoch": 0.11938326756939978, "grad_norm": 2.1758470847731948, "learning_rate": 9.79203145032033e-07, "loss": 0.806, "step": 6868 }, { "epoch": 0.11940065010690261, "grad_norm": 6.5897121242840475, "learning_rate": 9.79195110243636e-07, "loss": 0.7607, "step": 6869 }, { "epoch": 0.11941803264440543, "grad_norm": 2.6071687820310876, "learning_rate": 9.791870739364105e-07, "loss": 0.6548, "step": 6870 }, { "epoch": 0.11943541518190826, "grad_norm": 1.6624748485935636, "learning_rate": 9.791790361103822e-07, "loss": 0.268, "step": 6871 }, { "epoch": 0.11945279771941109, "grad_norm": 1.7133063833291153, "learning_rate": 9.791709967655767e-07, "loss": 0.3407, "step": 6872 }, { "epoch": 0.1194701802569139, "grad_norm": 2.4125904328722094, "learning_rate": 9.79162955902019e-07, "loss": 0.5331, "step": 6873 }, { "epoch": 0.11948756279441673, "grad_norm": 2.9217973505279113, "learning_rate": 9.791549135197349e-07, "loss": 0.4906, "step": 6874 }, { "epoch": 0.11950494533191955, "grad_norm": 1.1487894277245716, "learning_rate": 9.791468696187497e-07, "loss": 0.5778, "step": 6875 }, { "epoch": 0.11952232786942238, "grad_norm": 3.084059806234143, "learning_rate": 9.79138824199089e-07, "loss": 0.3729, "step": 6876 }, { "epoch": 0.11953971040692521, "grad_norm": 3.1403209268068233, "learning_rate": 9.791307772607784e-07, "loss": 0.5375, "step": 6877 }, { "epoch": 0.11955709294442803, "grad_norm": 1.7503066857085334, "learning_rate": 9.791227288038435e-07, "loss": 0.7236, "step": 6878 }, { "epoch": 0.11957447548193086, "grad_norm": 1.9683289137521587, "learning_rate": 9.791146788283095e-07, "loss": 0.2638, "step": 6879 }, { "epoch": 0.11959185801943367, "grad_norm": 1.551799247899815, "learning_rate": 9.791066273342022e-07, "loss": 0.631, "step": 6880 }, { "epoch": 0.1196092405569365, "grad_norm": 2.4956835731470335, "learning_rate": 9.790985743215468e-07, "loss": 0.5169, "step": 6881 }, { "epoch": 0.11962662309443933, "grad_norm": 1.4623663377020162, "learning_rate": 9.790905197903689e-07, "loss": 0.193, "step": 6882 }, { "epoch": 0.11964400563194215, "grad_norm": 2.633759732721381, "learning_rate": 9.790824637406942e-07, "loss": 0.5911, "step": 6883 }, { "epoch": 0.11966138816944498, "grad_norm": 2.269359759440844, "learning_rate": 9.790744061725484e-07, "loss": 0.216, "step": 6884 }, { "epoch": 0.1196787707069478, "grad_norm": 3.9042801605017963, "learning_rate": 9.790663470859566e-07, "loss": 0.5642, "step": 6885 }, { "epoch": 0.11969615324445063, "grad_norm": 1.7746802329124098, "learning_rate": 9.790582864809446e-07, "loss": 0.5277, "step": 6886 }, { "epoch": 0.11971353578195346, "grad_norm": 2.651392882907525, "learning_rate": 9.790502243575378e-07, "loss": 0.3489, "step": 6887 }, { "epoch": 0.11973091831945627, "grad_norm": 2.3549291886160915, "learning_rate": 9.79042160715762e-07, "loss": 0.6075, "step": 6888 }, { "epoch": 0.1197483008569591, "grad_norm": 1.7874753744902416, "learning_rate": 9.790340955556426e-07, "loss": 0.4835, "step": 6889 }, { "epoch": 0.11976568339446192, "grad_norm": 2.50132123886735, "learning_rate": 9.79026028877205e-07, "loss": 0.3668, "step": 6890 }, { "epoch": 0.11978306593196475, "grad_norm": 2.141468375416956, "learning_rate": 9.790179606804751e-07, "loss": 0.4376, "step": 6891 }, { "epoch": 0.11980044846946758, "grad_norm": 2.54914700474335, "learning_rate": 9.790098909654782e-07, "loss": 0.3235, "step": 6892 }, { "epoch": 0.1198178310069704, "grad_norm": 1.894962446556521, "learning_rate": 9.7900181973224e-07, "loss": 0.4043, "step": 6893 }, { "epoch": 0.11983521354447323, "grad_norm": 1.4098045184051102, "learning_rate": 9.78993746980786e-07, "loss": 0.5649, "step": 6894 }, { "epoch": 0.11985259608197604, "grad_norm": 2.1788671336837226, "learning_rate": 9.789856727111422e-07, "loss": 0.707, "step": 6895 }, { "epoch": 0.11986997861947887, "grad_norm": 2.4031845645690115, "learning_rate": 9.789775969233334e-07, "loss": 0.4702, "step": 6896 }, { "epoch": 0.11988736115698169, "grad_norm": 3.7513105277521177, "learning_rate": 9.78969519617386e-07, "loss": 0.7611, "step": 6897 }, { "epoch": 0.11990474369448452, "grad_norm": 1.3816723435118552, "learning_rate": 9.789614407933249e-07, "loss": 0.4748, "step": 6898 }, { "epoch": 0.11992212623198735, "grad_norm": 2.9726591520032204, "learning_rate": 9.789533604511762e-07, "loss": 0.7287, "step": 6899 }, { "epoch": 0.11993950876949017, "grad_norm": 1.8312730676119708, "learning_rate": 9.789452785909656e-07, "loss": 0.4033, "step": 6900 }, { "epoch": 0.119956891306993, "grad_norm": 3.105468372493652, "learning_rate": 9.789371952127181e-07, "loss": 0.7054, "step": 6901 }, { "epoch": 0.11997427384449581, "grad_norm": 2.9426458132707, "learning_rate": 9.789291103164598e-07, "loss": 0.5432, "step": 6902 }, { "epoch": 0.11999165638199864, "grad_norm": 1.7923273156609854, "learning_rate": 9.789210239022163e-07, "loss": 0.3085, "step": 6903 }, { "epoch": 0.12000903891950147, "grad_norm": 1.5438778018699673, "learning_rate": 9.789129359700133e-07, "loss": 0.208, "step": 6904 }, { "epoch": 0.12002642145700429, "grad_norm": 1.3666338969349923, "learning_rate": 9.78904846519876e-07, "loss": 0.2573, "step": 6905 }, { "epoch": 0.12004380399450712, "grad_norm": 1.565219400854883, "learning_rate": 9.788967555518306e-07, "loss": 0.4461, "step": 6906 }, { "epoch": 0.12006118653200994, "grad_norm": 3.061458312653785, "learning_rate": 9.78888663065902e-07, "loss": 0.5083, "step": 6907 }, { "epoch": 0.12007856906951277, "grad_norm": 2.806909217206527, "learning_rate": 9.78880569062117e-07, "loss": 0.5447, "step": 6908 }, { "epoch": 0.1200959516070156, "grad_norm": 1.7714852974079456, "learning_rate": 9.788724735405e-07, "loss": 0.3679, "step": 6909 }, { "epoch": 0.12011333414451841, "grad_norm": 0.957636705772935, "learning_rate": 9.788643765010776e-07, "loss": 0.6811, "step": 6910 }, { "epoch": 0.12013071668202124, "grad_norm": 2.4863073430940603, "learning_rate": 9.78856277943875e-07, "loss": 0.6887, "step": 6911 }, { "epoch": 0.12014809921952406, "grad_norm": 1.6273253864159587, "learning_rate": 9.788481778689177e-07, "loss": 0.3184, "step": 6912 }, { "epoch": 0.12016548175702689, "grad_norm": 2.126403365168248, "learning_rate": 9.788400762762318e-07, "loss": 0.5468, "step": 6913 }, { "epoch": 0.12018286429452972, "grad_norm": 1.805132153372871, "learning_rate": 9.78831973165843e-07, "loss": 0.326, "step": 6914 }, { "epoch": 0.12020024683203254, "grad_norm": 1.2409434069872505, "learning_rate": 9.788238685377768e-07, "loss": 0.5182, "step": 6915 }, { "epoch": 0.12021762936953537, "grad_norm": 1.8793072968888878, "learning_rate": 9.788157623920588e-07, "loss": 0.5983, "step": 6916 }, { "epoch": 0.12023501190703818, "grad_norm": 2.2216572644829, "learning_rate": 9.788076547287146e-07, "loss": 0.3285, "step": 6917 }, { "epoch": 0.12025239444454101, "grad_norm": 2.0227892271728183, "learning_rate": 9.787995455477703e-07, "loss": 0.5329, "step": 6918 }, { "epoch": 0.12026977698204384, "grad_norm": 1.2481341141110447, "learning_rate": 9.787914348492512e-07, "loss": 0.2212, "step": 6919 }, { "epoch": 0.12028715951954666, "grad_norm": 2.6376336656619404, "learning_rate": 9.787833226331835e-07, "loss": 0.7435, "step": 6920 }, { "epoch": 0.12030454205704949, "grad_norm": 3.558611313362006, "learning_rate": 9.787752088995923e-07, "loss": 0.4936, "step": 6921 }, { "epoch": 0.1203219245945523, "grad_norm": 2.338380152104866, "learning_rate": 9.787670936485037e-07, "loss": 0.3987, "step": 6922 }, { "epoch": 0.12033930713205514, "grad_norm": 2.0883965404753946, "learning_rate": 9.787589768799432e-07, "loss": 0.4686, "step": 6923 }, { "epoch": 0.12035668966955797, "grad_norm": 1.5819535475814894, "learning_rate": 9.78750858593937e-07, "loss": 0.2606, "step": 6924 }, { "epoch": 0.12037407220706078, "grad_norm": 1.7232122072580225, "learning_rate": 9.787427387905103e-07, "loss": 0.3828, "step": 6925 }, { "epoch": 0.12039145474456361, "grad_norm": 1.516359232247464, "learning_rate": 9.78734617469689e-07, "loss": 0.2994, "step": 6926 }, { "epoch": 0.12040883728206643, "grad_norm": 1.4764304798236954, "learning_rate": 9.78726494631499e-07, "loss": 0.3943, "step": 6927 }, { "epoch": 0.12042621981956926, "grad_norm": 1.8625792263899021, "learning_rate": 9.78718370275966e-07, "loss": 0.6908, "step": 6928 }, { "epoch": 0.12044360235707209, "grad_norm": 2.543112470585053, "learning_rate": 9.787102444031156e-07, "loss": 0.6587, "step": 6929 }, { "epoch": 0.12046098489457491, "grad_norm": 1.7843221694659368, "learning_rate": 9.787021170129734e-07, "loss": 0.4692, "step": 6930 }, { "epoch": 0.12047836743207774, "grad_norm": 1.4197989939154603, "learning_rate": 9.786939881055656e-07, "loss": 0.3462, "step": 6931 }, { "epoch": 0.12049574996958055, "grad_norm": 2.153590428530231, "learning_rate": 9.786858576809178e-07, "loss": 0.5319, "step": 6932 }, { "epoch": 0.12051313250708338, "grad_norm": 1.723342092861987, "learning_rate": 9.786777257390555e-07, "loss": 0.2991, "step": 6933 }, { "epoch": 0.12053051504458621, "grad_norm": 2.1292988730699047, "learning_rate": 9.786695922800048e-07, "loss": 0.5519, "step": 6934 }, { "epoch": 0.12054789758208903, "grad_norm": 2.6585620886629453, "learning_rate": 9.786614573037914e-07, "loss": 0.5212, "step": 6935 }, { "epoch": 0.12056528011959186, "grad_norm": 2.2709115029217104, "learning_rate": 9.786533208104412e-07, "loss": 0.449, "step": 6936 }, { "epoch": 0.12058266265709468, "grad_norm": 1.3388509468892336, "learning_rate": 9.786451827999797e-07, "loss": 0.2799, "step": 6937 }, { "epoch": 0.12060004519459751, "grad_norm": 1.4410206946950928, "learning_rate": 9.78637043272433e-07, "loss": 0.8556, "step": 6938 }, { "epoch": 0.12061742773210034, "grad_norm": 1.3396640059105849, "learning_rate": 9.786289022278265e-07, "loss": 0.346, "step": 6939 }, { "epoch": 0.12063481026960315, "grad_norm": 1.9445945286611572, "learning_rate": 9.786207596661862e-07, "loss": 0.3529, "step": 6940 }, { "epoch": 0.12065219280710598, "grad_norm": 1.9349983074846324, "learning_rate": 9.786126155875382e-07, "loss": 0.2323, "step": 6941 }, { "epoch": 0.1206695753446088, "grad_norm": 2.4780028674806993, "learning_rate": 9.786044699919078e-07, "loss": 0.2993, "step": 6942 }, { "epoch": 0.12068695788211163, "grad_norm": 2.1390598702229737, "learning_rate": 9.785963228793212e-07, "loss": 0.5282, "step": 6943 }, { "epoch": 0.12070434041961446, "grad_norm": 2.515285136091963, "learning_rate": 9.785881742498043e-07, "loss": 0.3917, "step": 6944 }, { "epoch": 0.12072172295711728, "grad_norm": 4.788054007259054, "learning_rate": 9.785800241033825e-07, "loss": 0.5135, "step": 6945 }, { "epoch": 0.12073910549462011, "grad_norm": 2.4621295773780174, "learning_rate": 9.78571872440082e-07, "loss": 0.3805, "step": 6946 }, { "epoch": 0.12075648803212292, "grad_norm": 3.5262340967978485, "learning_rate": 9.785637192599284e-07, "loss": 0.6795, "step": 6947 }, { "epoch": 0.12077387056962575, "grad_norm": 1.3722179160571748, "learning_rate": 9.785555645629478e-07, "loss": 0.4389, "step": 6948 }, { "epoch": 0.12079125310712858, "grad_norm": 2.019230448976576, "learning_rate": 9.785474083491657e-07, "loss": 0.5073, "step": 6949 }, { "epoch": 0.1208086356446314, "grad_norm": 3.522841335978827, "learning_rate": 9.785392506186082e-07, "loss": 0.4306, "step": 6950 }, { "epoch": 0.12082601818213423, "grad_norm": 2.4420617229548034, "learning_rate": 9.785310913713012e-07, "loss": 0.6088, "step": 6951 }, { "epoch": 0.12084340071963705, "grad_norm": 1.3438442166837568, "learning_rate": 9.7852293060727e-07, "loss": 0.241, "step": 6952 }, { "epoch": 0.12086078325713988, "grad_norm": 1.4879267078031146, "learning_rate": 9.785147683265416e-07, "loss": 0.1897, "step": 6953 }, { "epoch": 0.12087816579464271, "grad_norm": 3.4759894713042194, "learning_rate": 9.78506604529141e-07, "loss": 0.3337, "step": 6954 }, { "epoch": 0.12089554833214552, "grad_norm": 1.4470523224036875, "learning_rate": 9.78498439215094e-07, "loss": 0.2238, "step": 6955 }, { "epoch": 0.12091293086964836, "grad_norm": 2.689899913432665, "learning_rate": 9.78490272384427e-07, "loss": 0.5225, "step": 6956 }, { "epoch": 0.12093031340715117, "grad_norm": 3.932336400523135, "learning_rate": 9.784821040371656e-07, "loss": 0.4074, "step": 6957 }, { "epoch": 0.120947695944654, "grad_norm": 2.046525707565223, "learning_rate": 9.784739341733357e-07, "loss": 0.5433, "step": 6958 }, { "epoch": 0.12096507848215683, "grad_norm": 1.960030244534148, "learning_rate": 9.784657627929632e-07, "loss": 0.3836, "step": 6959 }, { "epoch": 0.12098246101965965, "grad_norm": 2.400082297285213, "learning_rate": 9.78457589896074e-07, "loss": 1.0867, "step": 6960 }, { "epoch": 0.12099984355716248, "grad_norm": 2.995680214330696, "learning_rate": 9.78449415482694e-07, "loss": 0.5362, "step": 6961 }, { "epoch": 0.1210172260946653, "grad_norm": 1.6519292719283813, "learning_rate": 9.78441239552849e-07, "loss": 0.4422, "step": 6962 }, { "epoch": 0.12103460863216813, "grad_norm": 2.2948121043031686, "learning_rate": 9.784330621065653e-07, "loss": 0.6829, "step": 6963 }, { "epoch": 0.12105199116967094, "grad_norm": 1.889093440861249, "learning_rate": 9.784248831438685e-07, "loss": 0.647, "step": 6964 }, { "epoch": 0.12106937370717377, "grad_norm": 2.1489139701636466, "learning_rate": 9.784167026647848e-07, "loss": 0.3794, "step": 6965 }, { "epoch": 0.1210867562446766, "grad_norm": 1.842057944294084, "learning_rate": 9.784085206693396e-07, "loss": 0.5156, "step": 6966 }, { "epoch": 0.12110413878217942, "grad_norm": 2.087374170363332, "learning_rate": 9.784003371575592e-07, "loss": 0.4414, "step": 6967 }, { "epoch": 0.12112152131968225, "grad_norm": 1.9015852940543863, "learning_rate": 9.783921521294698e-07, "loss": 0.4557, "step": 6968 }, { "epoch": 0.12113890385718507, "grad_norm": 1.7790149153908057, "learning_rate": 9.783839655850967e-07, "loss": 0.6001, "step": 6969 }, { "epoch": 0.1211562863946879, "grad_norm": 1.4492156343531444, "learning_rate": 9.783757775244662e-07, "loss": 0.4576, "step": 6970 }, { "epoch": 0.12117366893219073, "grad_norm": 2.3480675307166186, "learning_rate": 9.783675879476044e-07, "loss": 0.3604, "step": 6971 }, { "epoch": 0.12119105146969354, "grad_norm": 2.1133290676428027, "learning_rate": 9.78359396854537e-07, "loss": 0.3685, "step": 6972 }, { "epoch": 0.12120843400719637, "grad_norm": 1.5610297625065275, "learning_rate": 9.7835120424529e-07, "loss": 0.2879, "step": 6973 }, { "epoch": 0.12122581654469919, "grad_norm": 2.1432275128232123, "learning_rate": 9.783430101198894e-07, "loss": 0.4852, "step": 6974 }, { "epoch": 0.12124319908220202, "grad_norm": 1.4276107515695236, "learning_rate": 9.783348144783611e-07, "loss": 0.3703, "step": 6975 }, { "epoch": 0.12126058161970485, "grad_norm": 4.9456312487689384, "learning_rate": 9.783266173207315e-07, "loss": 0.5663, "step": 6976 }, { "epoch": 0.12127796415720767, "grad_norm": 1.8327895135020293, "learning_rate": 9.783184186470261e-07, "loss": 0.3803, "step": 6977 }, { "epoch": 0.1212953466947105, "grad_norm": 1.3470766630679687, "learning_rate": 9.78310218457271e-07, "loss": 0.4044, "step": 6978 }, { "epoch": 0.12131272923221331, "grad_norm": 1.5754308040784302, "learning_rate": 9.783020167514922e-07, "loss": 0.3634, "step": 6979 }, { "epoch": 0.12133011176971614, "grad_norm": 1.6530545231690048, "learning_rate": 9.782938135297157e-07, "loss": 0.3694, "step": 6980 }, { "epoch": 0.12134749430721897, "grad_norm": 2.1054663736171197, "learning_rate": 9.782856087919677e-07, "loss": 0.2464, "step": 6981 }, { "epoch": 0.12136487684472179, "grad_norm": 1.251857784565571, "learning_rate": 9.782774025382738e-07, "loss": 0.3674, "step": 6982 }, { "epoch": 0.12138225938222462, "grad_norm": 2.311060606230829, "learning_rate": 9.782691947686602e-07, "loss": 0.66, "step": 6983 }, { "epoch": 0.12139964191972744, "grad_norm": 1.780223071441064, "learning_rate": 9.782609854831532e-07, "loss": 0.39, "step": 6984 }, { "epoch": 0.12141702445723027, "grad_norm": 2.2485451955332754, "learning_rate": 9.782527746817785e-07, "loss": 0.4519, "step": 6985 }, { "epoch": 0.1214344069947331, "grad_norm": 2.489043599754218, "learning_rate": 9.78244562364562e-07, "loss": 0.4606, "step": 6986 }, { "epoch": 0.12145178953223591, "grad_norm": 1.402212026714937, "learning_rate": 9.782363485315303e-07, "loss": 0.2058, "step": 6987 }, { "epoch": 0.12146917206973874, "grad_norm": 1.682216332236436, "learning_rate": 9.782281331827087e-07, "loss": 0.3802, "step": 6988 }, { "epoch": 0.12148655460724156, "grad_norm": 1.9754266092888473, "learning_rate": 9.782199163181238e-07, "loss": 0.4491, "step": 6989 }, { "epoch": 0.12150393714474439, "grad_norm": 1.8481816021049557, "learning_rate": 9.782116979378014e-07, "loss": 0.2969, "step": 6990 }, { "epoch": 0.12152131968224722, "grad_norm": 2.4679448701482034, "learning_rate": 9.782034780417675e-07, "loss": 0.3485, "step": 6991 }, { "epoch": 0.12153870221975004, "grad_norm": 1.3055040778818603, "learning_rate": 9.781952566300485e-07, "loss": 0.4212, "step": 6992 }, { "epoch": 0.12155608475725287, "grad_norm": 1.5360734270290848, "learning_rate": 9.781870337026698e-07, "loss": 0.4467, "step": 6993 }, { "epoch": 0.12157346729475568, "grad_norm": 1.8954127393290507, "learning_rate": 9.78178809259658e-07, "loss": 0.3835, "step": 6994 }, { "epoch": 0.12159084983225851, "grad_norm": 1.627747587645139, "learning_rate": 9.781705833010392e-07, "loss": 0.3596, "step": 6995 }, { "epoch": 0.12160823236976134, "grad_norm": 3.1097604486133394, "learning_rate": 9.781623558268392e-07, "loss": 0.6654, "step": 6996 }, { "epoch": 0.12162561490726416, "grad_norm": 2.1905474077010076, "learning_rate": 9.781541268370844e-07, "loss": 0.7658, "step": 6997 }, { "epoch": 0.12164299744476699, "grad_norm": 1.6687835771273172, "learning_rate": 9.781458963318004e-07, "loss": 0.2582, "step": 6998 }, { "epoch": 0.1216603799822698, "grad_norm": 1.282946791482915, "learning_rate": 9.781376643110136e-07, "loss": 0.5279, "step": 6999 }, { "epoch": 0.12167776251977264, "grad_norm": 2.2684461630270283, "learning_rate": 9.781294307747502e-07, "loss": 0.5266, "step": 7000 }, { "epoch": 0.12169514505727547, "grad_norm": 1.4346584862621687, "learning_rate": 9.781211957230357e-07, "loss": 0.4386, "step": 7001 }, { "epoch": 0.12171252759477828, "grad_norm": 1.3903527443351473, "learning_rate": 9.78112959155897e-07, "loss": 0.5565, "step": 7002 }, { "epoch": 0.12172991013228111, "grad_norm": 1.9746837204405863, "learning_rate": 9.781047210733597e-07, "loss": 0.2335, "step": 7003 }, { "epoch": 0.12174729266978393, "grad_norm": 1.8057608649508976, "learning_rate": 9.780964814754502e-07, "loss": 0.5827, "step": 7004 }, { "epoch": 0.12176467520728676, "grad_norm": 2.5418788750185923, "learning_rate": 9.780882403621944e-07, "loss": 0.4465, "step": 7005 }, { "epoch": 0.12178205774478959, "grad_norm": 1.8534136662774467, "learning_rate": 9.780799977336186e-07, "loss": 0.4271, "step": 7006 }, { "epoch": 0.1217994402822924, "grad_norm": 1.7757031735539148, "learning_rate": 9.780717535897487e-07, "loss": 0.3247, "step": 7007 }, { "epoch": 0.12181682281979524, "grad_norm": 1.9622475919302367, "learning_rate": 9.78063507930611e-07, "loss": 0.3674, "step": 7008 }, { "epoch": 0.12183420535729805, "grad_norm": 3.09650184078106, "learning_rate": 9.780552607562314e-07, "loss": 0.4219, "step": 7009 }, { "epoch": 0.12185158789480088, "grad_norm": 2.1290178224712455, "learning_rate": 9.780470120666364e-07, "loss": 0.3379, "step": 7010 }, { "epoch": 0.12186897043230371, "grad_norm": 1.9830822268078996, "learning_rate": 9.780387618618519e-07, "loss": 0.4065, "step": 7011 }, { "epoch": 0.12188635296980653, "grad_norm": 1.4270083437226968, "learning_rate": 9.780305101419042e-07, "loss": 0.3431, "step": 7012 }, { "epoch": 0.12190373550730936, "grad_norm": 2.2708974808369264, "learning_rate": 9.780222569068195e-07, "loss": 0.4077, "step": 7013 }, { "epoch": 0.12192111804481218, "grad_norm": 2.2196688504955087, "learning_rate": 9.780140021566236e-07, "loss": 0.3948, "step": 7014 }, { "epoch": 0.12193850058231501, "grad_norm": 2.605314469322934, "learning_rate": 9.78005745891343e-07, "loss": 0.5755, "step": 7015 }, { "epoch": 0.12195588311981784, "grad_norm": 3.22248658794443, "learning_rate": 9.779974881110037e-07, "loss": 0.3998, "step": 7016 }, { "epoch": 0.12197326565732065, "grad_norm": 2.2450120875524227, "learning_rate": 9.77989228815632e-07, "loss": 0.3677, "step": 7017 }, { "epoch": 0.12199064819482348, "grad_norm": 1.8374226910611748, "learning_rate": 9.77980968005254e-07, "loss": 0.3865, "step": 7018 }, { "epoch": 0.1220080307323263, "grad_norm": 1.7928289079366184, "learning_rate": 9.77972705679896e-07, "loss": 0.5408, "step": 7019 }, { "epoch": 0.12202541326982913, "grad_norm": 2.3761767904338233, "learning_rate": 9.779644418395838e-07, "loss": 0.6503, "step": 7020 }, { "epoch": 0.12204279580733196, "grad_norm": 1.1188186752668403, "learning_rate": 9.77956176484344e-07, "loss": 0.462, "step": 7021 }, { "epoch": 0.12206017834483478, "grad_norm": 2.925168174825085, "learning_rate": 9.779479096142027e-07, "loss": 0.5208, "step": 7022 }, { "epoch": 0.12207756088233761, "grad_norm": 1.9609905414440505, "learning_rate": 9.77939641229186e-07, "loss": 0.4016, "step": 7023 }, { "epoch": 0.12209494341984042, "grad_norm": 3.163557751761013, "learning_rate": 9.779313713293204e-07, "loss": 0.6413, "step": 7024 }, { "epoch": 0.12211232595734325, "grad_norm": 1.7849512335311422, "learning_rate": 9.779230999146317e-07, "loss": 0.8228, "step": 7025 }, { "epoch": 0.12212970849484608, "grad_norm": 3.5752021020204054, "learning_rate": 9.779148269851465e-07, "loss": 0.5292, "step": 7026 }, { "epoch": 0.1221470910323489, "grad_norm": 2.264282661161161, "learning_rate": 9.779065525408907e-07, "loss": 0.2767, "step": 7027 }, { "epoch": 0.12216447356985173, "grad_norm": 2.4264997487088356, "learning_rate": 9.778982765818905e-07, "loss": 0.4198, "step": 7028 }, { "epoch": 0.12218185610735455, "grad_norm": 2.0325388547450505, "learning_rate": 9.778899991081727e-07, "loss": 0.4514, "step": 7029 }, { "epoch": 0.12219923864485738, "grad_norm": 1.9357742762267531, "learning_rate": 9.778817201197628e-07, "loss": 0.5239, "step": 7030 }, { "epoch": 0.12221662118236021, "grad_norm": 1.720504507900812, "learning_rate": 9.778734396166875e-07, "loss": 0.3972, "step": 7031 }, { "epoch": 0.12223400371986302, "grad_norm": 2.6128045086248526, "learning_rate": 9.77865157598973e-07, "loss": 0.5372, "step": 7032 }, { "epoch": 0.12225138625736585, "grad_norm": 2.8796552333598626, "learning_rate": 9.778568740666452e-07, "loss": 0.5717, "step": 7033 }, { "epoch": 0.12226876879486867, "grad_norm": 1.3225155083209512, "learning_rate": 9.778485890197307e-07, "loss": 0.8179, "step": 7034 }, { "epoch": 0.1222861513323715, "grad_norm": 2.6307668340050196, "learning_rate": 9.77840302458256e-07, "loss": 0.3868, "step": 7035 }, { "epoch": 0.12230353386987432, "grad_norm": 2.113329843719077, "learning_rate": 9.778320143822468e-07, "loss": 0.6672, "step": 7036 }, { "epoch": 0.12232091640737715, "grad_norm": 1.6557590323133973, "learning_rate": 9.778237247917294e-07, "loss": 0.418, "step": 7037 }, { "epoch": 0.12233829894487998, "grad_norm": 1.3097091955142743, "learning_rate": 9.778154336867306e-07, "loss": 0.3699, "step": 7038 }, { "epoch": 0.1223556814823828, "grad_norm": 2.046584588846788, "learning_rate": 9.778071410672762e-07, "loss": 0.3148, "step": 7039 }, { "epoch": 0.12237306401988562, "grad_norm": 1.662958977684428, "learning_rate": 9.777988469333926e-07, "loss": 0.3818, "step": 7040 }, { "epoch": 0.12239044655738844, "grad_norm": 1.5507822260764976, "learning_rate": 9.777905512851063e-07, "loss": 0.2845, "step": 7041 }, { "epoch": 0.12240782909489127, "grad_norm": 1.7700793406713955, "learning_rate": 9.777822541224435e-07, "loss": 0.5821, "step": 7042 }, { "epoch": 0.1224252116323941, "grad_norm": 2.7878604061842576, "learning_rate": 9.777739554454303e-07, "loss": 0.4269, "step": 7043 }, { "epoch": 0.12244259416989692, "grad_norm": 2.0181399975212084, "learning_rate": 9.77765655254093e-07, "loss": 0.423, "step": 7044 }, { "epoch": 0.12245997670739975, "grad_norm": 1.6681659377985378, "learning_rate": 9.777573535484582e-07, "loss": 0.3834, "step": 7045 }, { "epoch": 0.12247735924490256, "grad_norm": 1.4011320214689673, "learning_rate": 9.777490503285522e-07, "loss": 0.3483, "step": 7046 }, { "epoch": 0.1224947417824054, "grad_norm": 3.0933514173399352, "learning_rate": 9.77740745594401e-07, "loss": 0.6949, "step": 7047 }, { "epoch": 0.12251212431990822, "grad_norm": 1.3016238663393023, "learning_rate": 9.77732439346031e-07, "loss": 0.4416, "step": 7048 }, { "epoch": 0.12252950685741104, "grad_norm": 1.7994514647628312, "learning_rate": 9.777241315834686e-07, "loss": 0.6708, "step": 7049 }, { "epoch": 0.12254688939491387, "grad_norm": 1.9397398048348007, "learning_rate": 9.777158223067403e-07, "loss": 0.5055, "step": 7050 }, { "epoch": 0.12256427193241669, "grad_norm": 2.3565407731594936, "learning_rate": 9.777075115158723e-07, "loss": 0.6764, "step": 7051 }, { "epoch": 0.12258165446991952, "grad_norm": 1.497942083788297, "learning_rate": 9.77699199210891e-07, "loss": 0.5444, "step": 7052 }, { "epoch": 0.12259903700742235, "grad_norm": 0.7999753518669818, "learning_rate": 9.776908853918225e-07, "loss": 0.3708, "step": 7053 }, { "epoch": 0.12261641954492516, "grad_norm": 2.0952665885994617, "learning_rate": 9.776825700586933e-07, "loss": 0.4536, "step": 7054 }, { "epoch": 0.122633802082428, "grad_norm": 3.17615935926943, "learning_rate": 9.776742532115298e-07, "loss": 0.3361, "step": 7055 }, { "epoch": 0.12265118461993081, "grad_norm": 1.4788902521661482, "learning_rate": 9.776659348503585e-07, "loss": 0.3534, "step": 7056 }, { "epoch": 0.12266856715743364, "grad_norm": 1.4835931411581431, "learning_rate": 9.776576149752055e-07, "loss": 0.37, "step": 7057 }, { "epoch": 0.12268594969493647, "grad_norm": 1.966278038077224, "learning_rate": 9.776492935860974e-07, "loss": 0.5057, "step": 7058 }, { "epoch": 0.12270333223243929, "grad_norm": 3.0910689907325812, "learning_rate": 9.776409706830602e-07, "loss": 0.7172, "step": 7059 }, { "epoch": 0.12272071476994212, "grad_norm": 4.235299261133411, "learning_rate": 9.776326462661208e-07, "loss": 0.3863, "step": 7060 }, { "epoch": 0.12273809730744493, "grad_norm": 1.1235091364540928, "learning_rate": 9.77624320335305e-07, "loss": 0.512, "step": 7061 }, { "epoch": 0.12275547984494777, "grad_norm": 2.142037399780866, "learning_rate": 9.776159928906399e-07, "loss": 0.4278, "step": 7062 }, { "epoch": 0.1227728623824506, "grad_norm": 1.700509654782941, "learning_rate": 9.776076639321512e-07, "loss": 0.3169, "step": 7063 }, { "epoch": 0.12279024491995341, "grad_norm": 1.7451706958126836, "learning_rate": 9.775993334598658e-07, "loss": 0.464, "step": 7064 }, { "epoch": 0.12280762745745624, "grad_norm": 3.007530939248212, "learning_rate": 9.775910014738097e-07, "loss": 0.3327, "step": 7065 }, { "epoch": 0.12282500999495906, "grad_norm": 4.297390997498755, "learning_rate": 9.775826679740096e-07, "loss": 0.2838, "step": 7066 }, { "epoch": 0.12284239253246189, "grad_norm": 1.6207899161874528, "learning_rate": 9.77574332960492e-07, "loss": 0.7241, "step": 7067 }, { "epoch": 0.12285977506996472, "grad_norm": 1.8614718654577245, "learning_rate": 9.77565996433283e-07, "loss": 0.385, "step": 7068 }, { "epoch": 0.12287715760746754, "grad_norm": 1.4933308668360057, "learning_rate": 9.77557658392409e-07, "loss": 0.347, "step": 7069 }, { "epoch": 0.12289454014497037, "grad_norm": 2.4730684376761283, "learning_rate": 9.77549318837897e-07, "loss": 0.3931, "step": 7070 }, { "epoch": 0.12291192268247318, "grad_norm": 1.261018301022242, "learning_rate": 9.775409777697726e-07, "loss": 0.4399, "step": 7071 }, { "epoch": 0.12292930521997601, "grad_norm": 1.8500974708438116, "learning_rate": 9.775326351880628e-07, "loss": 0.3769, "step": 7072 }, { "epoch": 0.12294668775747884, "grad_norm": 2.153185765050981, "learning_rate": 9.77524291092794e-07, "loss": 0.6051, "step": 7073 }, { "epoch": 0.12296407029498166, "grad_norm": 1.908427680779449, "learning_rate": 9.775159454839926e-07, "loss": 0.5336, "step": 7074 }, { "epoch": 0.12298145283248449, "grad_norm": 2.05574549110311, "learning_rate": 9.77507598361685e-07, "loss": 0.2535, "step": 7075 }, { "epoch": 0.1229988353699873, "grad_norm": 1.8381848306429072, "learning_rate": 9.774992497258974e-07, "loss": 0.5984, "step": 7076 }, { "epoch": 0.12301621790749014, "grad_norm": 1.337217339365538, "learning_rate": 9.774908995766567e-07, "loss": 0.4806, "step": 7077 }, { "epoch": 0.12303360044499297, "grad_norm": 3.0502724876914553, "learning_rate": 9.774825479139893e-07, "loss": 0.2489, "step": 7078 }, { "epoch": 0.12305098298249578, "grad_norm": 1.3622127515535312, "learning_rate": 9.774741947379215e-07, "loss": 0.5635, "step": 7079 }, { "epoch": 0.12306836551999861, "grad_norm": 1.6292654065517755, "learning_rate": 9.774658400484796e-07, "loss": 0.4481, "step": 7080 }, { "epoch": 0.12308574805750143, "grad_norm": 2.1504426405889863, "learning_rate": 9.774574838456906e-07, "loss": 0.4721, "step": 7081 }, { "epoch": 0.12310313059500426, "grad_norm": 3.843230268028147, "learning_rate": 9.774491261295804e-07, "loss": 0.6307, "step": 7082 }, { "epoch": 0.12312051313250709, "grad_norm": 1.3637141612481856, "learning_rate": 9.774407669001762e-07, "loss": 0.3946, "step": 7083 }, { "epoch": 0.1231378956700099, "grad_norm": 1.8239452468049169, "learning_rate": 9.774324061575038e-07, "loss": 0.5436, "step": 7084 }, { "epoch": 0.12315527820751274, "grad_norm": 2.297325478059596, "learning_rate": 9.774240439015902e-07, "loss": 0.5249, "step": 7085 }, { "epoch": 0.12317266074501555, "grad_norm": 2.3922010213329954, "learning_rate": 9.774156801324616e-07, "loss": 0.433, "step": 7086 }, { "epoch": 0.12319004328251838, "grad_norm": 4.428476425150882, "learning_rate": 9.774073148501445e-07, "loss": 0.8215, "step": 7087 }, { "epoch": 0.12320742582002121, "grad_norm": 2.005385003163017, "learning_rate": 9.773989480546655e-07, "loss": 0.3919, "step": 7088 }, { "epoch": 0.12322480835752403, "grad_norm": 2.499408558018761, "learning_rate": 9.773905797460513e-07, "loss": 0.3034, "step": 7089 }, { "epoch": 0.12324219089502686, "grad_norm": 3.3034310847666775, "learning_rate": 9.773822099243282e-07, "loss": 0.6413, "step": 7090 }, { "epoch": 0.12325957343252968, "grad_norm": 2.5186285225006264, "learning_rate": 9.773738385895228e-07, "loss": 0.6377, "step": 7091 }, { "epoch": 0.1232769559700325, "grad_norm": 1.8792556058361638, "learning_rate": 9.773654657416616e-07, "loss": 0.462, "step": 7092 }, { "epoch": 0.12329433850753534, "grad_norm": 1.3839232799483585, "learning_rate": 9.77357091380771e-07, "loss": 0.5898, "step": 7093 }, { "epoch": 0.12331172104503815, "grad_norm": 1.9436345192300064, "learning_rate": 9.773487155068778e-07, "loss": 0.4905, "step": 7094 }, { "epoch": 0.12332910358254098, "grad_norm": 1.2594253409178955, "learning_rate": 9.773403381200084e-07, "loss": 0.4548, "step": 7095 }, { "epoch": 0.1233464861200438, "grad_norm": 2.408603269107511, "learning_rate": 9.773319592201895e-07, "loss": 0.6349, "step": 7096 }, { "epoch": 0.12336386865754663, "grad_norm": 1.5426046144238672, "learning_rate": 9.773235788074476e-07, "loss": 0.437, "step": 7097 }, { "epoch": 0.12338125119504946, "grad_norm": 2.106987431492223, "learning_rate": 9.773151968818092e-07, "loss": 0.5293, "step": 7098 }, { "epoch": 0.12339863373255228, "grad_norm": 2.713671057582826, "learning_rate": 9.773068134433008e-07, "loss": 0.2426, "step": 7099 }, { "epoch": 0.1234160162700551, "grad_norm": 1.703428725039273, "learning_rate": 9.772984284919493e-07, "loss": 0.7219, "step": 7100 }, { "epoch": 0.12343339880755792, "grad_norm": 2.0474589150198743, "learning_rate": 9.772900420277808e-07, "loss": 0.4446, "step": 7101 }, { "epoch": 0.12345078134506075, "grad_norm": 3.400415833625817, "learning_rate": 9.77281654050822e-07, "loss": 0.8639, "step": 7102 }, { "epoch": 0.12346816388256358, "grad_norm": 1.506460057978784, "learning_rate": 9.772732645611e-07, "loss": 0.6492, "step": 7103 }, { "epoch": 0.1234855464200664, "grad_norm": 2.1178397484460842, "learning_rate": 9.772648735586406e-07, "loss": 0.5982, "step": 7104 }, { "epoch": 0.12350292895756923, "grad_norm": 2.4079618956121047, "learning_rate": 9.77256481043471e-07, "loss": 0.4587, "step": 7105 }, { "epoch": 0.12352031149507205, "grad_norm": 2.478561904410811, "learning_rate": 9.772480870156174e-07, "loss": 0.557, "step": 7106 }, { "epoch": 0.12353769403257488, "grad_norm": 2.0208183970619813, "learning_rate": 9.77239691475107e-07, "loss": 0.9267, "step": 7107 }, { "epoch": 0.1235550765700777, "grad_norm": 1.5626535351135606, "learning_rate": 9.772312944219655e-07, "loss": 0.6158, "step": 7108 }, { "epoch": 0.12357245910758052, "grad_norm": 2.3348729681632134, "learning_rate": 9.772228958562204e-07, "loss": 0.4144, "step": 7109 }, { "epoch": 0.12358984164508335, "grad_norm": 2.0568933092022235, "learning_rate": 9.772144957778976e-07, "loss": 0.3694, "step": 7110 }, { "epoch": 0.12360722418258617, "grad_norm": 1.6194060112619946, "learning_rate": 9.772060941870245e-07, "loss": 0.4351, "step": 7111 }, { "epoch": 0.123624606720089, "grad_norm": 1.5543329605015388, "learning_rate": 9.771976910836269e-07, "loss": 0.516, "step": 7112 }, { "epoch": 0.12364198925759182, "grad_norm": 2.325686061069868, "learning_rate": 9.771892864677318e-07, "loss": 0.341, "step": 7113 }, { "epoch": 0.12365937179509465, "grad_norm": 1.1718917347473279, "learning_rate": 9.771808803393661e-07, "loss": 0.4835, "step": 7114 }, { "epoch": 0.12367675433259748, "grad_norm": 1.2078142331064206, "learning_rate": 9.771724726985561e-07, "loss": 0.4356, "step": 7115 }, { "epoch": 0.1236941368701003, "grad_norm": 1.5366433221971731, "learning_rate": 9.771640635453286e-07, "loss": 0.3726, "step": 7116 }, { "epoch": 0.12371151940760312, "grad_norm": 1.8392068249402238, "learning_rate": 9.771556528797102e-07, "loss": 0.6863, "step": 7117 }, { "epoch": 0.12372890194510594, "grad_norm": 1.9731246721796538, "learning_rate": 9.771472407017276e-07, "loss": 1.0043, "step": 7118 }, { "epoch": 0.12374628448260877, "grad_norm": 2.1364831105782485, "learning_rate": 9.771388270114075e-07, "loss": 0.2074, "step": 7119 }, { "epoch": 0.1237636670201116, "grad_norm": 1.5497351798953816, "learning_rate": 9.771304118087765e-07, "loss": 0.619, "step": 7120 }, { "epoch": 0.12378104955761442, "grad_norm": 19.914004627306962, "learning_rate": 9.77121995093861e-07, "loss": 0.6463, "step": 7121 }, { "epoch": 0.12379843209511725, "grad_norm": 1.100340421286269, "learning_rate": 9.771135768666882e-07, "loss": 0.1617, "step": 7122 }, { "epoch": 0.12381581463262006, "grad_norm": 1.6055630986397744, "learning_rate": 9.771051571272844e-07, "loss": 0.3903, "step": 7123 }, { "epoch": 0.1238331971701229, "grad_norm": 2.007072841205704, "learning_rate": 9.770967358756765e-07, "loss": 0.3482, "step": 7124 }, { "epoch": 0.12385057970762572, "grad_norm": 2.5886295403096926, "learning_rate": 9.77088313111891e-07, "loss": 0.3454, "step": 7125 }, { "epoch": 0.12386796224512854, "grad_norm": 2.0342039731898818, "learning_rate": 9.770798888359548e-07, "loss": 0.3211, "step": 7126 }, { "epoch": 0.12388534478263137, "grad_norm": 2.010201639639529, "learning_rate": 9.770714630478945e-07, "loss": 0.6648, "step": 7127 }, { "epoch": 0.12390272732013419, "grad_norm": 2.4276381912457823, "learning_rate": 9.770630357477367e-07, "loss": 0.5538, "step": 7128 }, { "epoch": 0.12392010985763702, "grad_norm": 2.762201782704002, "learning_rate": 9.770546069355083e-07, "loss": 0.3984, "step": 7129 }, { "epoch": 0.12393749239513985, "grad_norm": 3.4896789675491156, "learning_rate": 9.77046176611236e-07, "loss": 0.3972, "step": 7130 }, { "epoch": 0.12395487493264266, "grad_norm": 2.418137184039121, "learning_rate": 9.770377447749465e-07, "loss": 0.2703, "step": 7131 }, { "epoch": 0.1239722574701455, "grad_norm": 1.6558395392712635, "learning_rate": 9.770293114266664e-07, "loss": 0.45, "step": 7132 }, { "epoch": 0.12398964000764831, "grad_norm": 1.5308528533534174, "learning_rate": 9.770208765664224e-07, "loss": 0.2963, "step": 7133 }, { "epoch": 0.12400702254515114, "grad_norm": 2.378935786911975, "learning_rate": 9.770124401942415e-07, "loss": 0.4262, "step": 7134 }, { "epoch": 0.12402440508265397, "grad_norm": 1.5656868420562247, "learning_rate": 9.770040023101503e-07, "loss": 0.3691, "step": 7135 }, { "epoch": 0.12404178762015679, "grad_norm": 6.208020269156982, "learning_rate": 9.769955629141755e-07, "loss": 0.3789, "step": 7136 }, { "epoch": 0.12405917015765962, "grad_norm": 1.5776761545551983, "learning_rate": 9.769871220063436e-07, "loss": 0.3309, "step": 7137 }, { "epoch": 0.12407655269516243, "grad_norm": 2.2434637614927166, "learning_rate": 9.76978679586682e-07, "loss": 0.4785, "step": 7138 }, { "epoch": 0.12409393523266526, "grad_norm": 1.6449705860865504, "learning_rate": 9.769702356552169e-07, "loss": 0.4219, "step": 7139 }, { "epoch": 0.1241113177701681, "grad_norm": 1.5195368877805642, "learning_rate": 9.769617902119753e-07, "loss": 0.2512, "step": 7140 }, { "epoch": 0.12412870030767091, "grad_norm": 1.6697829101513098, "learning_rate": 9.76953343256984e-07, "loss": 0.5431, "step": 7141 }, { "epoch": 0.12414608284517374, "grad_norm": 2.0053407924082935, "learning_rate": 9.769448947902696e-07, "loss": 0.3097, "step": 7142 }, { "epoch": 0.12416346538267656, "grad_norm": 1.7748776258266457, "learning_rate": 9.76936444811859e-07, "loss": 0.2682, "step": 7143 }, { "epoch": 0.12418084792017939, "grad_norm": 1.3247265520196856, "learning_rate": 9.76927993321779e-07, "loss": 0.5685, "step": 7144 }, { "epoch": 0.12419823045768222, "grad_norm": 2.222083673662515, "learning_rate": 9.769195403200563e-07, "loss": 0.6263, "step": 7145 }, { "epoch": 0.12421561299518503, "grad_norm": 1.4018436216762045, "learning_rate": 9.76911085806718e-07, "loss": 0.4005, "step": 7146 }, { "epoch": 0.12423299553268786, "grad_norm": 1.819456885055362, "learning_rate": 9.769026297817903e-07, "loss": 0.5871, "step": 7147 }, { "epoch": 0.12425037807019068, "grad_norm": 1.9549419157203616, "learning_rate": 9.768941722453003e-07, "loss": 0.3199, "step": 7148 }, { "epoch": 0.12426776060769351, "grad_norm": 2.2404341937636176, "learning_rate": 9.76885713197275e-07, "loss": 0.5768, "step": 7149 }, { "epoch": 0.12428514314519634, "grad_norm": 2.061276469758371, "learning_rate": 9.76877252637741e-07, "loss": 0.4838, "step": 7150 }, { "epoch": 0.12430252568269916, "grad_norm": 5.037150914876465, "learning_rate": 9.768687905667252e-07, "loss": 0.4121, "step": 7151 }, { "epoch": 0.12431990822020199, "grad_norm": 1.7737255384805333, "learning_rate": 9.768603269842544e-07, "loss": 0.4043, "step": 7152 }, { "epoch": 0.1243372907577048, "grad_norm": 1.6554720529433304, "learning_rate": 9.768518618903554e-07, "loss": 0.4876, "step": 7153 }, { "epoch": 0.12435467329520764, "grad_norm": 4.908662443170959, "learning_rate": 9.76843395285055e-07, "loss": 0.3181, "step": 7154 }, { "epoch": 0.12437205583271047, "grad_norm": 3.3001861975066387, "learning_rate": 9.7683492716838e-07, "loss": 0.4148, "step": 7155 }, { "epoch": 0.12438943837021328, "grad_norm": 1.5037203929189196, "learning_rate": 9.768264575403575e-07, "loss": 0.1854, "step": 7156 }, { "epoch": 0.12440682090771611, "grad_norm": 3.761200387628086, "learning_rate": 9.768179864010141e-07, "loss": 0.6323, "step": 7157 }, { "epoch": 0.12442420344521893, "grad_norm": 1.1800383572694375, "learning_rate": 9.768095137503768e-07, "loss": 0.252, "step": 7158 }, { "epoch": 0.12444158598272176, "grad_norm": 1.965764505973267, "learning_rate": 9.768010395884723e-07, "loss": 0.3994, "step": 7159 }, { "epoch": 0.12445896852022459, "grad_norm": 1.7815339605461307, "learning_rate": 9.767925639153274e-07, "loss": 0.6829, "step": 7160 }, { "epoch": 0.1244763510577274, "grad_norm": 1.8842177805538118, "learning_rate": 9.767840867309693e-07, "loss": 0.4313, "step": 7161 }, { "epoch": 0.12449373359523024, "grad_norm": 1.8173235782623771, "learning_rate": 9.767756080354246e-07, "loss": 0.5499, "step": 7162 }, { "epoch": 0.12451111613273305, "grad_norm": 2.0498413917363996, "learning_rate": 9.7676712782872e-07, "loss": 0.3401, "step": 7163 }, { "epoch": 0.12452849867023588, "grad_norm": 2.0882950615808102, "learning_rate": 9.76758646110883e-07, "loss": 0.5327, "step": 7164 }, { "epoch": 0.12454588120773871, "grad_norm": 5.090266866184438, "learning_rate": 9.7675016288194e-07, "loss": 0.6069, "step": 7165 }, { "epoch": 0.12456326374524153, "grad_norm": 2.2626569867074164, "learning_rate": 9.767416781419177e-07, "loss": 0.6129, "step": 7166 }, { "epoch": 0.12458064628274436, "grad_norm": 1.6738332956144097, "learning_rate": 9.767331918908437e-07, "loss": 0.8147, "step": 7167 }, { "epoch": 0.12459802882024718, "grad_norm": 2.0425768776988105, "learning_rate": 9.767247041287443e-07, "loss": 0.5913, "step": 7168 }, { "epoch": 0.12461541135775, "grad_norm": 1.2865410573108764, "learning_rate": 9.767162148556463e-07, "loss": 0.262, "step": 7169 }, { "epoch": 0.12463279389525284, "grad_norm": 1.4722047680822226, "learning_rate": 9.767077240715772e-07, "loss": 0.3045, "step": 7170 }, { "epoch": 0.12465017643275565, "grad_norm": 1.997069750622352, "learning_rate": 9.766992317765636e-07, "loss": 0.3912, "step": 7171 }, { "epoch": 0.12466755897025848, "grad_norm": 2.9056799324695355, "learning_rate": 9.76690737970632e-07, "loss": 0.8049, "step": 7172 }, { "epoch": 0.1246849415077613, "grad_norm": 1.6485480304546936, "learning_rate": 9.766822426538104e-07, "loss": 0.2609, "step": 7173 }, { "epoch": 0.12470232404526413, "grad_norm": 1.7289654311265528, "learning_rate": 9.766737458261246e-07, "loss": 0.4482, "step": 7174 }, { "epoch": 0.12471970658276695, "grad_norm": 2.9821942539704436, "learning_rate": 9.76665247487602e-07, "loss": 0.9367, "step": 7175 }, { "epoch": 0.12473708912026978, "grad_norm": 2.1366082070105703, "learning_rate": 9.766567476382698e-07, "loss": 0.4388, "step": 7176 }, { "epoch": 0.1247544716577726, "grad_norm": 1.8577569576580015, "learning_rate": 9.766482462781544e-07, "loss": 0.7271, "step": 7177 }, { "epoch": 0.12477185419527542, "grad_norm": 1.7983282679264214, "learning_rate": 9.766397434072833e-07, "loss": 0.6957, "step": 7178 }, { "epoch": 0.12478923673277825, "grad_norm": 1.9512738660089417, "learning_rate": 9.76631239025683e-07, "loss": 0.7454, "step": 7179 }, { "epoch": 0.12480661927028107, "grad_norm": 1.380109210596346, "learning_rate": 9.766227331333805e-07, "loss": 0.7572, "step": 7180 }, { "epoch": 0.1248240018077839, "grad_norm": 1.4850161177020702, "learning_rate": 9.76614225730403e-07, "loss": 0.3778, "step": 7181 }, { "epoch": 0.12484138434528673, "grad_norm": 2.1934620076159566, "learning_rate": 9.766057168167774e-07, "loss": 0.7775, "step": 7182 }, { "epoch": 0.12485876688278955, "grad_norm": 2.1018713701349787, "learning_rate": 9.765972063925304e-07, "loss": 0.7324, "step": 7183 }, { "epoch": 0.12487614942029238, "grad_norm": 1.996231915828742, "learning_rate": 9.765886944576893e-07, "loss": 0.2857, "step": 7184 }, { "epoch": 0.12489353195779519, "grad_norm": 2.486547320169213, "learning_rate": 9.76580181012281e-07, "loss": 0.3725, "step": 7185 }, { "epoch": 0.12491091449529802, "grad_norm": 2.740228332999901, "learning_rate": 9.765716660563326e-07, "loss": 0.4904, "step": 7186 }, { "epoch": 0.12492829703280085, "grad_norm": 1.7897895661038983, "learning_rate": 9.765631495898707e-07, "loss": 0.5042, "step": 7187 }, { "epoch": 0.12494567957030367, "grad_norm": 2.197384430983896, "learning_rate": 9.765546316129225e-07, "loss": 0.4169, "step": 7188 }, { "epoch": 0.1249630621078065, "grad_norm": 2.1512574983343877, "learning_rate": 9.765461121255151e-07, "loss": 0.3476, "step": 7189 }, { "epoch": 0.12498044464530932, "grad_norm": 3.238407792046424, "learning_rate": 9.765375911276756e-07, "loss": 0.5888, "step": 7190 }, { "epoch": 0.12499782718281215, "grad_norm": 1.9880349026417312, "learning_rate": 9.765290686194308e-07, "loss": 0.5211, "step": 7191 }, { "epoch": 0.12501520972031496, "grad_norm": 1.7545721859081793, "learning_rate": 9.765205446008075e-07, "loss": 0.3752, "step": 7192 }, { "epoch": 0.1250325922578178, "grad_norm": 3.29616087157651, "learning_rate": 9.765120190718332e-07, "loss": 0.6305, "step": 7193 }, { "epoch": 0.12504997479532062, "grad_norm": 1.9032759437836055, "learning_rate": 9.765034920325346e-07, "loss": 0.3227, "step": 7194 }, { "epoch": 0.12506735733282345, "grad_norm": 2.6086851418490937, "learning_rate": 9.76494963482939e-07, "loss": 0.4968, "step": 7195 }, { "epoch": 0.12508473987032628, "grad_norm": 1.7276564880123513, "learning_rate": 9.76486433423073e-07, "loss": 0.4259, "step": 7196 }, { "epoch": 0.12510212240782909, "grad_norm": 3.094478772660825, "learning_rate": 9.76477901852964e-07, "loss": 0.6216, "step": 7197 }, { "epoch": 0.12511950494533192, "grad_norm": 1.3368167571637528, "learning_rate": 9.764693687726388e-07, "loss": 0.6159, "step": 7198 }, { "epoch": 0.12513688748283475, "grad_norm": 2.006034301806517, "learning_rate": 9.764608341821247e-07, "loss": 0.4375, "step": 7199 }, { "epoch": 0.12515427002033758, "grad_norm": 2.1779415992513367, "learning_rate": 9.764522980814486e-07, "loss": 0.3578, "step": 7200 }, { "epoch": 0.1251716525578404, "grad_norm": 2.188091811589187, "learning_rate": 9.764437604706376e-07, "loss": 0.5949, "step": 7201 }, { "epoch": 0.1251890350953432, "grad_norm": 1.9942327527716501, "learning_rate": 9.764352213497186e-07, "loss": 0.3034, "step": 7202 }, { "epoch": 0.12520641763284604, "grad_norm": 2.1813591644939896, "learning_rate": 9.76426680718719e-07, "loss": 0.5784, "step": 7203 }, { "epoch": 0.12522380017034887, "grad_norm": 1.387755470499547, "learning_rate": 9.764181385776657e-07, "loss": 0.3724, "step": 7204 }, { "epoch": 0.1252411827078517, "grad_norm": 2.4362645664859595, "learning_rate": 9.764095949265856e-07, "loss": 0.4774, "step": 7205 }, { "epoch": 0.12525856524535453, "grad_norm": 2.210353888935205, "learning_rate": 9.76401049765506e-07, "loss": 0.44, "step": 7206 }, { "epoch": 0.12527594778285733, "grad_norm": 1.6921241993981804, "learning_rate": 9.763925030944538e-07, "loss": 0.4757, "step": 7207 }, { "epoch": 0.12529333032036016, "grad_norm": 2.337335400671877, "learning_rate": 9.763839549134563e-07, "loss": 0.4944, "step": 7208 }, { "epoch": 0.125310712857863, "grad_norm": 2.3218926657583485, "learning_rate": 9.763754052225407e-07, "loss": 0.4502, "step": 7209 }, { "epoch": 0.12532809539536582, "grad_norm": 2.13303819034473, "learning_rate": 9.763668540217335e-07, "loss": 0.8055, "step": 7210 }, { "epoch": 0.12534547793286863, "grad_norm": 2.6972616807292296, "learning_rate": 9.763583013110624e-07, "loss": 0.653, "step": 7211 }, { "epoch": 0.12536286047037146, "grad_norm": 1.1139830212637432, "learning_rate": 9.763497470905542e-07, "loss": 0.4318, "step": 7212 }, { "epoch": 0.1253802430078743, "grad_norm": 1.9582763882215584, "learning_rate": 9.763411913602362e-07, "loss": 0.6114, "step": 7213 }, { "epoch": 0.12539762554537712, "grad_norm": 2.2439081956899387, "learning_rate": 9.763326341201354e-07, "loss": 0.5792, "step": 7214 }, { "epoch": 0.12541500808287995, "grad_norm": 1.440283171030365, "learning_rate": 9.76324075370279e-07, "loss": 0.3089, "step": 7215 }, { "epoch": 0.12543239062038275, "grad_norm": 1.525190620272974, "learning_rate": 9.763155151106942e-07, "loss": 0.3904, "step": 7216 }, { "epoch": 0.12544977315788558, "grad_norm": 3.191056792050879, "learning_rate": 9.763069533414078e-07, "loss": 0.4377, "step": 7217 }, { "epoch": 0.1254671556953884, "grad_norm": 1.9250387039298293, "learning_rate": 9.762983900624472e-07, "loss": 0.4781, "step": 7218 }, { "epoch": 0.12548453823289124, "grad_norm": 1.467438594976132, "learning_rate": 9.762898252738397e-07, "loss": 0.4307, "step": 7219 }, { "epoch": 0.12550192077039407, "grad_norm": 2.0257316360318964, "learning_rate": 9.762812589756122e-07, "loss": 0.3803, "step": 7220 }, { "epoch": 0.12551930330789687, "grad_norm": 1.7816507440991067, "learning_rate": 9.762726911677917e-07, "loss": 0.4163, "step": 7221 }, { "epoch": 0.1255366858453997, "grad_norm": 1.2739465759784034, "learning_rate": 9.762641218504057e-07, "loss": 0.2843, "step": 7222 }, { "epoch": 0.12555406838290253, "grad_norm": 2.3629351643196155, "learning_rate": 9.762555510234811e-07, "loss": 0.5604, "step": 7223 }, { "epoch": 0.12557145092040536, "grad_norm": 2.4605624939711, "learning_rate": 9.762469786870454e-07, "loss": 0.6578, "step": 7224 }, { "epoch": 0.1255888334579082, "grad_norm": 1.2512142455794757, "learning_rate": 9.762384048411252e-07, "loss": 0.1953, "step": 7225 }, { "epoch": 0.125606215995411, "grad_norm": 2.4103116463147334, "learning_rate": 9.762298294857484e-07, "loss": 0.6722, "step": 7226 }, { "epoch": 0.12562359853291383, "grad_norm": 1.8291364757781194, "learning_rate": 9.762212526209415e-07, "loss": 0.4305, "step": 7227 }, { "epoch": 0.12564098107041666, "grad_norm": 1.5310130990759163, "learning_rate": 9.762126742467321e-07, "loss": 0.5099, "step": 7228 }, { "epoch": 0.1256583636079195, "grad_norm": 1.3114624261388714, "learning_rate": 9.762040943631473e-07, "loss": 0.3745, "step": 7229 }, { "epoch": 0.12567574614542232, "grad_norm": 2.14824033897451, "learning_rate": 9.76195512970214e-07, "loss": 0.6384, "step": 7230 }, { "epoch": 0.12569312868292512, "grad_norm": 2.9778690378302604, "learning_rate": 9.7618693006796e-07, "loss": 0.6955, "step": 7231 }, { "epoch": 0.12571051122042795, "grad_norm": 1.6299047288963702, "learning_rate": 9.761783456564122e-07, "loss": 0.5647, "step": 7232 }, { "epoch": 0.12572789375793078, "grad_norm": 1.4208411077757015, "learning_rate": 9.761697597355975e-07, "loss": 0.3486, "step": 7233 }, { "epoch": 0.1257452762954336, "grad_norm": 1.5952861135413243, "learning_rate": 9.761611723055437e-07, "loss": 0.4927, "step": 7234 }, { "epoch": 0.12576265883293644, "grad_norm": 2.6457813238061125, "learning_rate": 9.761525833662775e-07, "loss": 0.427, "step": 7235 }, { "epoch": 0.12578004137043924, "grad_norm": 1.6889945977001408, "learning_rate": 9.761439929178265e-07, "loss": 0.2652, "step": 7236 }, { "epoch": 0.12579742390794207, "grad_norm": 1.5420054605387603, "learning_rate": 9.761354009602177e-07, "loss": 0.7003, "step": 7237 }, { "epoch": 0.1258148064454449, "grad_norm": 4.154411462906295, "learning_rate": 9.761268074934782e-07, "loss": 0.6759, "step": 7238 }, { "epoch": 0.12583218898294773, "grad_norm": 2.642856189199493, "learning_rate": 9.761182125176357e-07, "loss": 0.6785, "step": 7239 }, { "epoch": 0.12584957152045056, "grad_norm": 2.1222217312724156, "learning_rate": 9.76109616032717e-07, "loss": 0.6322, "step": 7240 }, { "epoch": 0.12586695405795337, "grad_norm": 1.6401996397604457, "learning_rate": 9.761010180387498e-07, "loss": 0.46, "step": 7241 }, { "epoch": 0.1258843365954562, "grad_norm": 3.774984435584777, "learning_rate": 9.76092418535761e-07, "loss": 0.6205, "step": 7242 }, { "epoch": 0.12590171913295903, "grad_norm": 2.144024234608425, "learning_rate": 9.760838175237778e-07, "loss": 0.5454, "step": 7243 }, { "epoch": 0.12591910167046186, "grad_norm": 1.5812517869022409, "learning_rate": 9.760752150028274e-07, "loss": 0.552, "step": 7244 }, { "epoch": 0.1259364842079647, "grad_norm": 1.410734909975186, "learning_rate": 9.760666109729377e-07, "loss": 0.2708, "step": 7245 }, { "epoch": 0.1259538667454675, "grad_norm": 2.35840001435442, "learning_rate": 9.760580054341352e-07, "loss": 0.4212, "step": 7246 }, { "epoch": 0.12597124928297032, "grad_norm": 2.0945680496128394, "learning_rate": 9.760493983864477e-07, "loss": 0.2973, "step": 7247 }, { "epoch": 0.12598863182047315, "grad_norm": 2.0296716354949202, "learning_rate": 9.76040789829902e-07, "loss": 0.4541, "step": 7248 }, { "epoch": 0.12600601435797598, "grad_norm": 3.1337676409443347, "learning_rate": 9.76032179764526e-07, "loss": 0.4518, "step": 7249 }, { "epoch": 0.1260233968954788, "grad_norm": 1.250971482475148, "learning_rate": 9.760235681903465e-07, "loss": 0.5145, "step": 7250 }, { "epoch": 0.12604077943298161, "grad_norm": 2.8166856149746886, "learning_rate": 9.76014955107391e-07, "loss": 0.5494, "step": 7251 }, { "epoch": 0.12605816197048444, "grad_norm": 1.70099137991158, "learning_rate": 9.760063405156868e-07, "loss": 0.6484, "step": 7252 }, { "epoch": 0.12607554450798727, "grad_norm": 1.3912546282686538, "learning_rate": 9.759977244152611e-07, "loss": 0.5062, "step": 7253 }, { "epoch": 0.1260929270454901, "grad_norm": 1.8741771501118873, "learning_rate": 9.759891068061413e-07, "loss": 0.5606, "step": 7254 }, { "epoch": 0.12611030958299294, "grad_norm": 1.7390836855192717, "learning_rate": 9.759804876883546e-07, "loss": 0.4126, "step": 7255 }, { "epoch": 0.12612769212049574, "grad_norm": 1.4246141655036164, "learning_rate": 9.759718670619285e-07, "loss": 0.3613, "step": 7256 }, { "epoch": 0.12614507465799857, "grad_norm": 2.3940738397736148, "learning_rate": 9.759632449268903e-07, "loss": 0.3546, "step": 7257 }, { "epoch": 0.1261624571955014, "grad_norm": 1.0496066021795263, "learning_rate": 9.75954621283267e-07, "loss": 0.3677, "step": 7258 }, { "epoch": 0.12617983973300423, "grad_norm": 1.9229645660395533, "learning_rate": 9.759459961310862e-07, "loss": 0.5042, "step": 7259 }, { "epoch": 0.12619722227050706, "grad_norm": 1.9653997731759711, "learning_rate": 9.759373694703754e-07, "loss": 0.7276, "step": 7260 }, { "epoch": 0.12621460480800986, "grad_norm": 1.8024089308073148, "learning_rate": 9.759287413011617e-07, "loss": 0.5338, "step": 7261 }, { "epoch": 0.1262319873455127, "grad_norm": 1.7442613081363658, "learning_rate": 9.759201116234726e-07, "loss": 0.3774, "step": 7262 }, { "epoch": 0.12624936988301552, "grad_norm": 2.8149523761481694, "learning_rate": 9.759114804373354e-07, "loss": 0.4046, "step": 7263 }, { "epoch": 0.12626675242051835, "grad_norm": 1.6935008933190907, "learning_rate": 9.759028477427773e-07, "loss": 0.5454, "step": 7264 }, { "epoch": 0.12628413495802118, "grad_norm": 1.566211898570049, "learning_rate": 9.758942135398257e-07, "loss": 0.4881, "step": 7265 }, { "epoch": 0.12630151749552398, "grad_norm": 3.120261647831272, "learning_rate": 9.758855778285083e-07, "loss": 0.4874, "step": 7266 }, { "epoch": 0.12631890003302682, "grad_norm": 1.6267415665628844, "learning_rate": 9.758769406088518e-07, "loss": 0.3788, "step": 7267 }, { "epoch": 0.12633628257052965, "grad_norm": 2.1664206515121185, "learning_rate": 9.758683018808843e-07, "loss": 0.8411, "step": 7268 }, { "epoch": 0.12635366510803248, "grad_norm": 1.999343505598663, "learning_rate": 9.758596616446329e-07, "loss": 0.393, "step": 7269 }, { "epoch": 0.1263710476455353, "grad_norm": 1.3579847879135187, "learning_rate": 9.75851019900125e-07, "loss": 0.4931, "step": 7270 }, { "epoch": 0.1263884301830381, "grad_norm": 1.4977985116471924, "learning_rate": 9.758423766473877e-07, "loss": 0.3361, "step": 7271 }, { "epoch": 0.12640581272054094, "grad_norm": 1.7508993934073456, "learning_rate": 9.758337318864489e-07, "loss": 0.6028, "step": 7272 }, { "epoch": 0.12642319525804377, "grad_norm": 2.1425100273169564, "learning_rate": 9.758250856173357e-07, "loss": 0.4991, "step": 7273 }, { "epoch": 0.1264405777955466, "grad_norm": 2.4564536356218105, "learning_rate": 9.758164378400755e-07, "loss": 0.8864, "step": 7274 }, { "epoch": 0.12645796033304943, "grad_norm": 1.8313575505448736, "learning_rate": 9.758077885546957e-07, "loss": 0.6301, "step": 7275 }, { "epoch": 0.12647534287055223, "grad_norm": 3.440809326537617, "learning_rate": 9.75799137761224e-07, "loss": 0.5143, "step": 7276 }, { "epoch": 0.12649272540805506, "grad_norm": 1.325936298802289, "learning_rate": 9.757904854596875e-07, "loss": 0.6248, "step": 7277 }, { "epoch": 0.1265101079455579, "grad_norm": 1.7158321230577531, "learning_rate": 9.757818316501136e-07, "loss": 0.5375, "step": 7278 }, { "epoch": 0.12652749048306072, "grad_norm": 1.5752522944058436, "learning_rate": 9.7577317633253e-07, "loss": 0.507, "step": 7279 }, { "epoch": 0.12654487302056355, "grad_norm": 2.7533488971219096, "learning_rate": 9.75764519506964e-07, "loss": 0.3888, "step": 7280 }, { "epoch": 0.12656225555806636, "grad_norm": 2.848051947987945, "learning_rate": 9.757558611734428e-07, "loss": 0.6974, "step": 7281 }, { "epoch": 0.12657963809556919, "grad_norm": 1.1940361469022327, "learning_rate": 9.757472013319942e-07, "loss": 0.3383, "step": 7282 }, { "epoch": 0.12659702063307202, "grad_norm": 2.03865258911632, "learning_rate": 9.757385399826456e-07, "loss": 0.3515, "step": 7283 }, { "epoch": 0.12661440317057485, "grad_norm": 1.778722200824804, "learning_rate": 9.757298771254243e-07, "loss": 0.4727, "step": 7284 }, { "epoch": 0.12663178570807768, "grad_norm": 1.8068218745230682, "learning_rate": 9.757212127603579e-07, "loss": 0.1371, "step": 7285 }, { "epoch": 0.12664916824558048, "grad_norm": 1.5523332298043164, "learning_rate": 9.757125468874737e-07, "loss": 0.6171, "step": 7286 }, { "epoch": 0.1266665507830833, "grad_norm": 1.9045850232875752, "learning_rate": 9.757038795067991e-07, "loss": 0.3503, "step": 7287 }, { "epoch": 0.12668393332058614, "grad_norm": 2.154488942420075, "learning_rate": 9.756952106183618e-07, "loss": 0.4402, "step": 7288 }, { "epoch": 0.12670131585808897, "grad_norm": 1.1188593959535027, "learning_rate": 9.756865402221894e-07, "loss": 0.51, "step": 7289 }, { "epoch": 0.1267186983955918, "grad_norm": 1.28541404426647, "learning_rate": 9.756778683183088e-07, "loss": 0.5098, "step": 7290 }, { "epoch": 0.1267360809330946, "grad_norm": 2.1931039354507833, "learning_rate": 9.756691949067482e-07, "loss": 0.3936, "step": 7291 }, { "epoch": 0.12675346347059743, "grad_norm": 1.5904350195283556, "learning_rate": 9.756605199875348e-07, "loss": 0.4603, "step": 7292 }, { "epoch": 0.12677084600810026, "grad_norm": 2.434248006202701, "learning_rate": 9.75651843560696e-07, "loss": 0.6006, "step": 7293 }, { "epoch": 0.1267882285456031, "grad_norm": 1.5881124780728273, "learning_rate": 9.756431656262591e-07, "loss": 0.7347, "step": 7294 }, { "epoch": 0.12680561108310592, "grad_norm": 2.8960762937908355, "learning_rate": 9.75634486184252e-07, "loss": 0.3137, "step": 7295 }, { "epoch": 0.12682299362060873, "grad_norm": 2.5632411243883864, "learning_rate": 9.756258052347022e-07, "loss": 0.6065, "step": 7296 }, { "epoch": 0.12684037615811156, "grad_norm": 2.180005256887126, "learning_rate": 9.75617122777637e-07, "loss": 0.4597, "step": 7297 }, { "epoch": 0.1268577586956144, "grad_norm": 2.0789013582717435, "learning_rate": 9.756084388130838e-07, "loss": 0.6122, "step": 7298 }, { "epoch": 0.12687514123311722, "grad_norm": 2.237387090092347, "learning_rate": 9.755997533410706e-07, "loss": 0.4547, "step": 7299 }, { "epoch": 0.12689252377062005, "grad_norm": 1.9311022782551142, "learning_rate": 9.755910663616246e-07, "loss": 0.4657, "step": 7300 }, { "epoch": 0.12690990630812285, "grad_norm": 3.225058395911409, "learning_rate": 9.755823778747734e-07, "loss": 0.5159, "step": 7301 }, { "epoch": 0.12692728884562568, "grad_norm": 2.256727683897584, "learning_rate": 9.755736878805443e-07, "loss": 0.5376, "step": 7302 }, { "epoch": 0.1269446713831285, "grad_norm": 1.8433969867635325, "learning_rate": 9.755649963789654e-07, "loss": 0.5464, "step": 7303 }, { "epoch": 0.12696205392063134, "grad_norm": 2.1785071754815424, "learning_rate": 9.755563033700636e-07, "loss": 0.3634, "step": 7304 }, { "epoch": 0.12697943645813417, "grad_norm": 1.7086716930053603, "learning_rate": 9.75547608853867e-07, "loss": 0.5874, "step": 7305 }, { "epoch": 0.12699681899563697, "grad_norm": 1.8560544810089408, "learning_rate": 9.755389128304029e-07, "loss": 0.6154, "step": 7306 }, { "epoch": 0.1270142015331398, "grad_norm": 5.25830882907095, "learning_rate": 9.755302152996987e-07, "loss": 0.3806, "step": 7307 }, { "epoch": 0.12703158407064263, "grad_norm": 2.135889866581876, "learning_rate": 9.755215162617825e-07, "loss": 0.4841, "step": 7308 }, { "epoch": 0.12704896660814546, "grad_norm": 1.4967501318599459, "learning_rate": 9.755128157166812e-07, "loss": 0.4986, "step": 7309 }, { "epoch": 0.1270663491456483, "grad_norm": 1.9940281273890783, "learning_rate": 9.755041136644228e-07, "loss": 0.4821, "step": 7310 }, { "epoch": 0.1270837316831511, "grad_norm": 3.885733280988131, "learning_rate": 9.75495410105035e-07, "loss": 0.4064, "step": 7311 }, { "epoch": 0.12710111422065393, "grad_norm": 3.7802609199973642, "learning_rate": 9.754867050385448e-07, "loss": 0.8097, "step": 7312 }, { "epoch": 0.12711849675815676, "grad_norm": 2.085180167255543, "learning_rate": 9.754779984649807e-07, "loss": 0.4496, "step": 7313 }, { "epoch": 0.1271358792956596, "grad_norm": 1.7026298418822368, "learning_rate": 9.754692903843694e-07, "loss": 0.7494, "step": 7314 }, { "epoch": 0.12715326183316242, "grad_norm": 2.3799747721460656, "learning_rate": 9.754605807967388e-07, "loss": 0.6616, "step": 7315 }, { "epoch": 0.12717064437066522, "grad_norm": 2.22422334529442, "learning_rate": 9.754518697021166e-07, "loss": 0.3416, "step": 7316 }, { "epoch": 0.12718802690816805, "grad_norm": 1.366514811294552, "learning_rate": 9.754431571005305e-07, "loss": 0.5415, "step": 7317 }, { "epoch": 0.12720540944567088, "grad_norm": 1.1751868798783682, "learning_rate": 9.75434442992008e-07, "loss": 0.2785, "step": 7318 }, { "epoch": 0.1272227919831737, "grad_norm": 2.0065658263126944, "learning_rate": 9.754257273765765e-07, "loss": 0.5232, "step": 7319 }, { "epoch": 0.12724017452067654, "grad_norm": 2.322276917337793, "learning_rate": 9.754170102542641e-07, "loss": 0.5501, "step": 7320 }, { "epoch": 0.12725755705817934, "grad_norm": 1.9570429197465338, "learning_rate": 9.754082916250982e-07, "loss": 0.5049, "step": 7321 }, { "epoch": 0.12727493959568217, "grad_norm": 3.114825230581274, "learning_rate": 9.75399571489106e-07, "loss": 0.441, "step": 7322 }, { "epoch": 0.127292322133185, "grad_norm": 2.1716882251215406, "learning_rate": 9.753908498463159e-07, "loss": 0.7345, "step": 7323 }, { "epoch": 0.12730970467068783, "grad_norm": 1.581376955781711, "learning_rate": 9.75382126696755e-07, "loss": 0.4031, "step": 7324 }, { "epoch": 0.12732708720819066, "grad_norm": 2.1996348760553226, "learning_rate": 9.753734020404512e-07, "loss": 0.49, "step": 7325 }, { "epoch": 0.12734446974569347, "grad_norm": 2.784528841612267, "learning_rate": 9.75364675877432e-07, "loss": 0.8957, "step": 7326 }, { "epoch": 0.1273618522831963, "grad_norm": 2.17026069175454, "learning_rate": 9.753559482077254e-07, "loss": 0.3058, "step": 7327 }, { "epoch": 0.12737923482069913, "grad_norm": 1.6067039568521704, "learning_rate": 9.753472190313587e-07, "loss": 0.2712, "step": 7328 }, { "epoch": 0.12739661735820196, "grad_norm": 1.6727929661224743, "learning_rate": 9.753384883483595e-07, "loss": 0.4247, "step": 7329 }, { "epoch": 0.1274139998957048, "grad_norm": 1.705799153050261, "learning_rate": 9.753297561587557e-07, "loss": 0.5892, "step": 7330 }, { "epoch": 0.1274313824332076, "grad_norm": 1.9904468648523195, "learning_rate": 9.75321022462575e-07, "loss": 0.5692, "step": 7331 }, { "epoch": 0.12744876497071042, "grad_norm": 1.6787952089392946, "learning_rate": 9.753122872598451e-07, "loss": 0.5337, "step": 7332 }, { "epoch": 0.12746614750821325, "grad_norm": 1.6593494233366364, "learning_rate": 9.753035505505934e-07, "loss": 0.3074, "step": 7333 }, { "epoch": 0.12748353004571608, "grad_norm": 1.7414799373155185, "learning_rate": 9.752948123348478e-07, "loss": 0.5824, "step": 7334 }, { "epoch": 0.1275009125832189, "grad_norm": 1.8999924245195687, "learning_rate": 9.752860726126362e-07, "loss": 0.431, "step": 7335 }, { "epoch": 0.12751829512072171, "grad_norm": 1.6872753141839034, "learning_rate": 9.75277331383986e-07, "loss": 0.6329, "step": 7336 }, { "epoch": 0.12753567765822454, "grad_norm": 1.6396345461245925, "learning_rate": 9.752685886489246e-07, "loss": 0.758, "step": 7337 }, { "epoch": 0.12755306019572737, "grad_norm": 1.0919775537906073, "learning_rate": 9.752598444074805e-07, "loss": 0.2088, "step": 7338 }, { "epoch": 0.1275704427332302, "grad_norm": 1.9326416624959653, "learning_rate": 9.75251098659681e-07, "loss": 0.298, "step": 7339 }, { "epoch": 0.12758782527073304, "grad_norm": 1.2767221394133292, "learning_rate": 9.752423514055537e-07, "loss": 0.6184, "step": 7340 }, { "epoch": 0.12760520780823584, "grad_norm": 2.040112466794743, "learning_rate": 9.752336026451265e-07, "loss": 0.5028, "step": 7341 }, { "epoch": 0.12762259034573867, "grad_norm": 2.6275957518102766, "learning_rate": 9.75224852378427e-07, "loss": 0.4447, "step": 7342 }, { "epoch": 0.1276399728832415, "grad_norm": 1.5999003186446583, "learning_rate": 9.75216100605483e-07, "loss": 0.3774, "step": 7343 }, { "epoch": 0.12765735542074433, "grad_norm": 2.589047313680527, "learning_rate": 9.752073473263223e-07, "loss": 0.6089, "step": 7344 }, { "epoch": 0.12767473795824716, "grad_norm": 1.4412571728471322, "learning_rate": 9.751985925409726e-07, "loss": 0.471, "step": 7345 }, { "epoch": 0.12769212049574996, "grad_norm": 1.636755764677302, "learning_rate": 9.751898362494618e-07, "loss": 0.4936, "step": 7346 }, { "epoch": 0.1277095030332528, "grad_norm": 1.7104345357193362, "learning_rate": 9.751810784518175e-07, "loss": 0.2621, "step": 7347 }, { "epoch": 0.12772688557075562, "grad_norm": 1.8473954176593974, "learning_rate": 9.751723191480671e-07, "loss": 0.4991, "step": 7348 }, { "epoch": 0.12774426810825845, "grad_norm": 1.71326330358234, "learning_rate": 9.751635583382393e-07, "loss": 0.3175, "step": 7349 }, { "epoch": 0.12776165064576125, "grad_norm": 1.5936521487176418, "learning_rate": 9.751547960223607e-07, "loss": 0.5813, "step": 7350 }, { "epoch": 0.12777903318326408, "grad_norm": 1.0830904141813897, "learning_rate": 9.751460322004602e-07, "loss": 0.404, "step": 7351 }, { "epoch": 0.12779641572076691, "grad_norm": 2.5722579084214514, "learning_rate": 9.751372668725645e-07, "loss": 0.2402, "step": 7352 }, { "epoch": 0.12781379825826975, "grad_norm": 3.9087887352118287, "learning_rate": 9.751285000387025e-07, "loss": 0.7021, "step": 7353 }, { "epoch": 0.12783118079577258, "grad_norm": 1.5038618893228888, "learning_rate": 9.75119731698901e-07, "loss": 0.4199, "step": 7354 }, { "epoch": 0.12784856333327538, "grad_norm": 1.4754123242307706, "learning_rate": 9.751109618531884e-07, "loss": 0.9053, "step": 7355 }, { "epoch": 0.1278659458707782, "grad_norm": 1.439678781044173, "learning_rate": 9.75102190501592e-07, "loss": 0.6753, "step": 7356 }, { "epoch": 0.12788332840828104, "grad_norm": 2.2710495654274236, "learning_rate": 9.750934176441401e-07, "loss": 0.356, "step": 7357 }, { "epoch": 0.12790071094578387, "grad_norm": 2.596729922938066, "learning_rate": 9.750846432808603e-07, "loss": 0.4186, "step": 7358 }, { "epoch": 0.1279180934832867, "grad_norm": 2.6192750440678316, "learning_rate": 9.750758674117804e-07, "loss": 0.514, "step": 7359 }, { "epoch": 0.1279354760207895, "grad_norm": 1.858486076788965, "learning_rate": 9.750670900369283e-07, "loss": 0.421, "step": 7360 }, { "epoch": 0.12795285855829233, "grad_norm": 1.9255061093340977, "learning_rate": 9.750583111563319e-07, "loss": 0.5559, "step": 7361 }, { "epoch": 0.12797024109579516, "grad_norm": 2.0408015225734313, "learning_rate": 9.750495307700187e-07, "loss": 0.4966, "step": 7362 }, { "epoch": 0.127987623633298, "grad_norm": 2.313261379345969, "learning_rate": 9.750407488780168e-07, "loss": 0.3838, "step": 7363 }, { "epoch": 0.12800500617080082, "grad_norm": 1.8791759769522414, "learning_rate": 9.750319654803537e-07, "loss": 0.314, "step": 7364 }, { "epoch": 0.12802238870830362, "grad_norm": 4.2821460779571625, "learning_rate": 9.750231805770577e-07, "loss": 1.4467, "step": 7365 }, { "epoch": 0.12803977124580646, "grad_norm": 1.1710252172943012, "learning_rate": 9.750143941681563e-07, "loss": 0.5949, "step": 7366 }, { "epoch": 0.12805715378330929, "grad_norm": 3.061515758185544, "learning_rate": 9.750056062536775e-07, "loss": 0.4126, "step": 7367 }, { "epoch": 0.12807453632081212, "grad_norm": 2.10974728877415, "learning_rate": 9.749968168336493e-07, "loss": 0.2876, "step": 7368 }, { "epoch": 0.12809191885831495, "grad_norm": 2.758715193629189, "learning_rate": 9.749880259080992e-07, "loss": 0.523, "step": 7369 }, { "epoch": 0.12810930139581775, "grad_norm": 1.3418578108995167, "learning_rate": 9.749792334770552e-07, "loss": 0.7657, "step": 7370 }, { "epoch": 0.12812668393332058, "grad_norm": 3.038926795114178, "learning_rate": 9.749704395405453e-07, "loss": 0.6925, "step": 7371 }, { "epoch": 0.1281440664708234, "grad_norm": 1.7113867849115807, "learning_rate": 9.749616440985974e-07, "loss": 0.4518, "step": 7372 }, { "epoch": 0.12816144900832624, "grad_norm": 1.505809435714519, "learning_rate": 9.749528471512391e-07, "loss": 0.4159, "step": 7373 }, { "epoch": 0.12817883154582907, "grad_norm": 1.8494634640199004, "learning_rate": 9.749440486984986e-07, "loss": 0.6648, "step": 7374 }, { "epoch": 0.12819621408333187, "grad_norm": 2.00124831728948, "learning_rate": 9.749352487404035e-07, "loss": 0.4351, "step": 7375 }, { "epoch": 0.1282135966208347, "grad_norm": 1.5880877592544476, "learning_rate": 9.749264472769818e-07, "loss": 0.3304, "step": 7376 }, { "epoch": 0.12823097915833753, "grad_norm": 1.992266667615542, "learning_rate": 9.749176443082616e-07, "loss": 0.2016, "step": 7377 }, { "epoch": 0.12824836169584036, "grad_norm": 3.1793901724451343, "learning_rate": 9.749088398342707e-07, "loss": 0.5808, "step": 7378 }, { "epoch": 0.1282657442333432, "grad_norm": 2.5618065131031105, "learning_rate": 9.749000338550366e-07, "loss": 0.5187, "step": 7379 }, { "epoch": 0.128283126770846, "grad_norm": 3.1777308129027673, "learning_rate": 9.748912263705878e-07, "loss": 0.3297, "step": 7380 }, { "epoch": 0.12830050930834883, "grad_norm": 2.441836230011566, "learning_rate": 9.748824173809518e-07, "loss": 0.5821, "step": 7381 }, { "epoch": 0.12831789184585166, "grad_norm": 3.2396735094325217, "learning_rate": 9.748736068861567e-07, "loss": 0.5941, "step": 7382 }, { "epoch": 0.1283352743833545, "grad_norm": 1.5712786498564102, "learning_rate": 9.748647948862306e-07, "loss": 0.6364, "step": 7383 }, { "epoch": 0.12835265692085732, "grad_norm": 2.01188995842446, "learning_rate": 9.748559813812009e-07, "loss": 0.5333, "step": 7384 }, { "epoch": 0.12837003945836012, "grad_norm": 2.078927885996719, "learning_rate": 9.748471663710961e-07, "loss": 0.5621, "step": 7385 }, { "epoch": 0.12838742199586295, "grad_norm": 2.195087245216164, "learning_rate": 9.74838349855944e-07, "loss": 0.6238, "step": 7386 }, { "epoch": 0.12840480453336578, "grad_norm": 1.5018882413832024, "learning_rate": 9.748295318357723e-07, "loss": 0.5195, "step": 7387 }, { "epoch": 0.1284221870708686, "grad_norm": 1.9042318310758195, "learning_rate": 9.748207123106092e-07, "loss": 0.4124, "step": 7388 }, { "epoch": 0.12843956960837144, "grad_norm": 1.7406256259489472, "learning_rate": 9.748118912804823e-07, "loss": 0.4618, "step": 7389 }, { "epoch": 0.12845695214587424, "grad_norm": 2.978959851428178, "learning_rate": 9.7480306874542e-07, "loss": 0.6098, "step": 7390 }, { "epoch": 0.12847433468337707, "grad_norm": 3.6369432892054223, "learning_rate": 9.747942447054502e-07, "loss": 0.6103, "step": 7391 }, { "epoch": 0.1284917172208799, "grad_norm": 1.9592561877864834, "learning_rate": 9.747854191606007e-07, "loss": 0.7176, "step": 7392 }, { "epoch": 0.12850909975838273, "grad_norm": 1.8287324387468078, "learning_rate": 9.747765921108995e-07, "loss": 0.5822, "step": 7393 }, { "epoch": 0.12852648229588556, "grad_norm": 1.2018666301233492, "learning_rate": 9.747677635563745e-07, "loss": 0.2884, "step": 7394 }, { "epoch": 0.12854386483338837, "grad_norm": 1.7755294422117847, "learning_rate": 9.74758933497054e-07, "loss": 0.3943, "step": 7395 }, { "epoch": 0.1285612473708912, "grad_norm": 1.9593658896924546, "learning_rate": 9.747501019329654e-07, "loss": 0.5301, "step": 7396 }, { "epoch": 0.12857862990839403, "grad_norm": 2.60606650273611, "learning_rate": 9.747412688641372e-07, "loss": 0.444, "step": 7397 }, { "epoch": 0.12859601244589686, "grad_norm": 4.468076230551756, "learning_rate": 9.747324342905974e-07, "loss": 0.6409, "step": 7398 }, { "epoch": 0.1286133949833997, "grad_norm": 1.3741306387447845, "learning_rate": 9.747235982123736e-07, "loss": 0.5447, "step": 7399 }, { "epoch": 0.1286307775209025, "grad_norm": 2.0122366553627513, "learning_rate": 9.747147606294943e-07, "loss": 0.4223, "step": 7400 }, { "epoch": 0.12864816005840532, "grad_norm": 2.025613298916098, "learning_rate": 9.747059215419873e-07, "loss": 0.5511, "step": 7401 }, { "epoch": 0.12866554259590815, "grad_norm": 2.3354861821185673, "learning_rate": 9.746970809498805e-07, "loss": 0.5183, "step": 7402 }, { "epoch": 0.12868292513341098, "grad_norm": 1.467280906619206, "learning_rate": 9.74688238853202e-07, "loss": 0.3537, "step": 7403 }, { "epoch": 0.1287003076709138, "grad_norm": 1.2845323927035164, "learning_rate": 9.746793952519799e-07, "loss": 0.362, "step": 7404 }, { "epoch": 0.1287176902084166, "grad_norm": 1.5211860217691278, "learning_rate": 9.74670550146242e-07, "loss": 0.724, "step": 7405 }, { "epoch": 0.12873507274591944, "grad_norm": 1.7813281443799258, "learning_rate": 9.746617035360167e-07, "loss": 0.4865, "step": 7406 }, { "epoch": 0.12875245528342227, "grad_norm": 1.3539491914223905, "learning_rate": 9.746528554213317e-07, "loss": 0.6044, "step": 7407 }, { "epoch": 0.1287698378209251, "grad_norm": 2.065744688882403, "learning_rate": 9.746440058022152e-07, "loss": 0.3482, "step": 7408 }, { "epoch": 0.12878722035842793, "grad_norm": 4.238221942826103, "learning_rate": 9.746351546786953e-07, "loss": 0.2945, "step": 7409 }, { "epoch": 0.12880460289593074, "grad_norm": 1.891010682073635, "learning_rate": 9.746263020508e-07, "loss": 0.4781, "step": 7410 }, { "epoch": 0.12882198543343357, "grad_norm": 2.710470187309086, "learning_rate": 9.746174479185573e-07, "loss": 0.4083, "step": 7411 }, { "epoch": 0.1288393679709364, "grad_norm": 1.9852939535848464, "learning_rate": 9.746085922819953e-07, "loss": 0.4297, "step": 7412 }, { "epoch": 0.12885675050843923, "grad_norm": 3.022574626694319, "learning_rate": 9.74599735141142e-07, "loss": 0.1947, "step": 7413 }, { "epoch": 0.12887413304594206, "grad_norm": 1.0110820750281306, "learning_rate": 9.745908764960258e-07, "loss": 0.4596, "step": 7414 }, { "epoch": 0.12889151558344486, "grad_norm": 1.2573468725047265, "learning_rate": 9.745820163466744e-07, "loss": 0.3509, "step": 7415 }, { "epoch": 0.1289088981209477, "grad_norm": 0.972164344891918, "learning_rate": 9.74573154693116e-07, "loss": 0.3164, "step": 7416 }, { "epoch": 0.12892628065845052, "grad_norm": 2.3734290478275875, "learning_rate": 9.745642915353787e-07, "loss": 0.6351, "step": 7417 }, { "epoch": 0.12894366319595335, "grad_norm": 2.9579347714929614, "learning_rate": 9.745554268734905e-07, "loss": 0.5391, "step": 7418 }, { "epoch": 0.12896104573345618, "grad_norm": 2.4250740756259876, "learning_rate": 9.745465607074797e-07, "loss": 0.5759, "step": 7419 }, { "epoch": 0.12897842827095898, "grad_norm": 1.6940532650186138, "learning_rate": 9.745376930373743e-07, "loss": 0.2936, "step": 7420 }, { "epoch": 0.1289958108084618, "grad_norm": 1.8343350312095352, "learning_rate": 9.745288238632024e-07, "loss": 0.3755, "step": 7421 }, { "epoch": 0.12901319334596464, "grad_norm": 1.4746679594624441, "learning_rate": 9.745199531849922e-07, "loss": 0.2749, "step": 7422 }, { "epoch": 0.12903057588346747, "grad_norm": 2.1330585423721513, "learning_rate": 9.745110810027715e-07, "loss": 0.3412, "step": 7423 }, { "epoch": 0.1290479584209703, "grad_norm": 1.7329344243435436, "learning_rate": 9.745022073165688e-07, "loss": 0.2964, "step": 7424 }, { "epoch": 0.1290653409584731, "grad_norm": 2.181394767133487, "learning_rate": 9.74493332126412e-07, "loss": 0.4588, "step": 7425 }, { "epoch": 0.12908272349597594, "grad_norm": 2.3029743915114658, "learning_rate": 9.744844554323292e-07, "loss": 0.5325, "step": 7426 }, { "epoch": 0.12910010603347877, "grad_norm": 1.7254660373263078, "learning_rate": 9.74475577234349e-07, "loss": 0.4017, "step": 7427 }, { "epoch": 0.1291174885709816, "grad_norm": 2.5448053394221914, "learning_rate": 9.744666975324989e-07, "loss": 0.3711, "step": 7428 }, { "epoch": 0.12913487110848443, "grad_norm": 1.8522712038847067, "learning_rate": 9.744578163268074e-07, "loss": 0.5143, "step": 7429 }, { "epoch": 0.12915225364598723, "grad_norm": 1.7193827108784583, "learning_rate": 9.744489336173025e-07, "loss": 0.5152, "step": 7430 }, { "epoch": 0.12916963618349006, "grad_norm": 2.172886864561666, "learning_rate": 9.744400494040123e-07, "loss": 0.487, "step": 7431 }, { "epoch": 0.1291870187209929, "grad_norm": 1.6312501386966007, "learning_rate": 9.744311636869653e-07, "loss": 0.415, "step": 7432 }, { "epoch": 0.12920440125849572, "grad_norm": 1.7321694578733768, "learning_rate": 9.744222764661894e-07, "loss": 0.4567, "step": 7433 }, { "epoch": 0.12922178379599855, "grad_norm": 2.516741491543479, "learning_rate": 9.74413387741713e-07, "loss": 0.6315, "step": 7434 }, { "epoch": 0.12923916633350135, "grad_norm": 2.1186796294542862, "learning_rate": 9.744044975135637e-07, "loss": 0.5787, "step": 7435 }, { "epoch": 0.12925654887100418, "grad_norm": 1.96047732214476, "learning_rate": 9.743956057817703e-07, "loss": 0.43, "step": 7436 }, { "epoch": 0.12927393140850701, "grad_norm": 1.6227059488487166, "learning_rate": 9.743867125463608e-07, "loss": 0.5508, "step": 7437 }, { "epoch": 0.12929131394600984, "grad_norm": 6.256074789461484, "learning_rate": 9.743778178073633e-07, "loss": 0.7011, "step": 7438 }, { "epoch": 0.12930869648351268, "grad_norm": 1.6988064714196267, "learning_rate": 9.743689215648058e-07, "loss": 0.5551, "step": 7439 }, { "epoch": 0.12932607902101548, "grad_norm": 1.2814298444373038, "learning_rate": 9.74360023818717e-07, "loss": 0.2802, "step": 7440 }, { "epoch": 0.1293434615585183, "grad_norm": 1.5863777669639532, "learning_rate": 9.743511245691247e-07, "loss": 0.384, "step": 7441 }, { "epoch": 0.12936084409602114, "grad_norm": 1.9292713810221607, "learning_rate": 9.743422238160573e-07, "loss": 0.5384, "step": 7442 }, { "epoch": 0.12937822663352397, "grad_norm": 1.861043503981735, "learning_rate": 9.743333215595428e-07, "loss": 0.5501, "step": 7443 }, { "epoch": 0.1293956091710268, "grad_norm": 1.8632673929768968, "learning_rate": 9.743244177996098e-07, "loss": 0.4271, "step": 7444 }, { "epoch": 0.1294129917085296, "grad_norm": 1.704607613536909, "learning_rate": 9.74315512536286e-07, "loss": 0.3439, "step": 7445 }, { "epoch": 0.12943037424603243, "grad_norm": 2.862071465015795, "learning_rate": 9.743066057696e-07, "loss": 0.3781, "step": 7446 }, { "epoch": 0.12944775678353526, "grad_norm": 1.541914817717041, "learning_rate": 9.7429769749958e-07, "loss": 0.3917, "step": 7447 }, { "epoch": 0.1294651393210381, "grad_norm": 1.9685391942569967, "learning_rate": 9.742887877262543e-07, "loss": 0.3782, "step": 7448 }, { "epoch": 0.12948252185854092, "grad_norm": 1.6300151644516652, "learning_rate": 9.742798764496508e-07, "loss": 0.443, "step": 7449 }, { "epoch": 0.12949990439604372, "grad_norm": 4.155975799062111, "learning_rate": 9.74270963669798e-07, "loss": 0.4642, "step": 7450 }, { "epoch": 0.12951728693354655, "grad_norm": 2.982725450721268, "learning_rate": 9.742620493867241e-07, "loss": 0.4445, "step": 7451 }, { "epoch": 0.12953466947104939, "grad_norm": 1.4961104840229498, "learning_rate": 9.742531336004575e-07, "loss": 0.2836, "step": 7452 }, { "epoch": 0.12955205200855222, "grad_norm": 2.015992358746968, "learning_rate": 9.742442163110263e-07, "loss": 0.6637, "step": 7453 }, { "epoch": 0.12956943454605505, "grad_norm": 1.453870612699433, "learning_rate": 9.742352975184587e-07, "loss": 0.3975, "step": 7454 }, { "epoch": 0.12958681708355785, "grad_norm": 2.013852665476354, "learning_rate": 9.74226377222783e-07, "loss": 0.4529, "step": 7455 }, { "epoch": 0.12960419962106068, "grad_norm": 1.8264263298169658, "learning_rate": 9.742174554240277e-07, "loss": 0.5454, "step": 7456 }, { "epoch": 0.1296215821585635, "grad_norm": 2.1130130473337574, "learning_rate": 9.742085321222207e-07, "loss": 0.3993, "step": 7457 }, { "epoch": 0.12963896469606634, "grad_norm": 2.1470533377762755, "learning_rate": 9.741996073173905e-07, "loss": 0.7356, "step": 7458 }, { "epoch": 0.12965634723356917, "grad_norm": 1.817047360421377, "learning_rate": 9.741906810095656e-07, "loss": 0.3769, "step": 7459 }, { "epoch": 0.12967372977107197, "grad_norm": 1.6483044912011307, "learning_rate": 9.74181753198774e-07, "loss": 0.4606, "step": 7460 }, { "epoch": 0.1296911123085748, "grad_norm": 1.7991325289829676, "learning_rate": 9.741728238850439e-07, "loss": 0.3524, "step": 7461 }, { "epoch": 0.12970849484607763, "grad_norm": 1.5935475545725815, "learning_rate": 9.741638930684038e-07, "loss": 0.2036, "step": 7462 }, { "epoch": 0.12972587738358046, "grad_norm": 1.5105903192294157, "learning_rate": 9.741549607488822e-07, "loss": 0.516, "step": 7463 }, { "epoch": 0.1297432599210833, "grad_norm": 2.104854391119299, "learning_rate": 9.741460269265069e-07, "loss": 0.4185, "step": 7464 }, { "epoch": 0.1297606424585861, "grad_norm": 2.1499515907635076, "learning_rate": 9.741370916013065e-07, "loss": 0.4815, "step": 7465 }, { "epoch": 0.12977802499608893, "grad_norm": 3.223455040137371, "learning_rate": 9.741281547733095e-07, "loss": 0.514, "step": 7466 }, { "epoch": 0.12979540753359176, "grad_norm": 1.7836519700116353, "learning_rate": 9.74119216442544e-07, "loss": 0.502, "step": 7467 }, { "epoch": 0.12981279007109459, "grad_norm": 2.044596422286738, "learning_rate": 9.741102766090384e-07, "loss": 0.4321, "step": 7468 }, { "epoch": 0.12983017260859742, "grad_norm": 2.5342707204184824, "learning_rate": 9.741013352728208e-07, "loss": 0.6047, "step": 7469 }, { "epoch": 0.12984755514610022, "grad_norm": 2.6210134440931054, "learning_rate": 9.7409239243392e-07, "loss": 0.3553, "step": 7470 }, { "epoch": 0.12986493768360305, "grad_norm": 3.146492246098001, "learning_rate": 9.74083448092364e-07, "loss": 0.7441, "step": 7471 }, { "epoch": 0.12988232022110588, "grad_norm": 1.916375430695677, "learning_rate": 9.740745022481812e-07, "loss": 0.4281, "step": 7472 }, { "epoch": 0.1298997027586087, "grad_norm": 3.4733078578969905, "learning_rate": 9.740655549014e-07, "loss": 0.9321, "step": 7473 }, { "epoch": 0.12991708529611154, "grad_norm": 2.166009915234261, "learning_rate": 9.74056606052049e-07, "loss": 0.2962, "step": 7474 }, { "epoch": 0.12993446783361434, "grad_norm": 2.011000499134712, "learning_rate": 9.74047655700156e-07, "loss": 0.6357, "step": 7475 }, { "epoch": 0.12995185037111717, "grad_norm": 1.3216326286239857, "learning_rate": 9.7403870384575e-07, "loss": 0.5001, "step": 7476 }, { "epoch": 0.12996923290862, "grad_norm": 3.086626294156691, "learning_rate": 9.740297504888588e-07, "loss": 0.9701, "step": 7477 }, { "epoch": 0.12998661544612283, "grad_norm": 2.1768526569106688, "learning_rate": 9.740207956295111e-07, "loss": 0.8612, "step": 7478 }, { "epoch": 0.13000399798362566, "grad_norm": 1.2271404524300864, "learning_rate": 9.740118392677354e-07, "loss": 0.359, "step": 7479 }, { "epoch": 0.13002138052112847, "grad_norm": 2.572333529553337, "learning_rate": 9.740028814035596e-07, "loss": 0.3351, "step": 7480 }, { "epoch": 0.1300387630586313, "grad_norm": 1.9820390322901984, "learning_rate": 9.739939220370127e-07, "loss": 0.5654, "step": 7481 }, { "epoch": 0.13005614559613413, "grad_norm": 1.5540745208795006, "learning_rate": 9.739849611681226e-07, "loss": 0.3818, "step": 7482 }, { "epoch": 0.13007352813363696, "grad_norm": 1.8285705324962391, "learning_rate": 9.739759987969181e-07, "loss": 0.5522, "step": 7483 }, { "epoch": 0.1300909106711398, "grad_norm": 1.5002435223076553, "learning_rate": 9.739670349234272e-07, "loss": 0.7373, "step": 7484 }, { "epoch": 0.1301082932086426, "grad_norm": 1.7949165310961315, "learning_rate": 9.739580695476787e-07, "loss": 0.2654, "step": 7485 }, { "epoch": 0.13012567574614542, "grad_norm": 1.7626476366164063, "learning_rate": 9.739491026697006e-07, "loss": 0.4678, "step": 7486 }, { "epoch": 0.13014305828364825, "grad_norm": 1.8297906619078137, "learning_rate": 9.739401342895218e-07, "loss": 0.3229, "step": 7487 }, { "epoch": 0.13016044082115108, "grad_norm": 1.6085720078918835, "learning_rate": 9.739311644071703e-07, "loss": 0.3427, "step": 7488 }, { "epoch": 0.13017782335865388, "grad_norm": 3.570790670265229, "learning_rate": 9.73922193022675e-07, "loss": 0.1779, "step": 7489 }, { "epoch": 0.1301952058961567, "grad_norm": 1.6762119077815294, "learning_rate": 9.739132201360638e-07, "loss": 0.5379, "step": 7490 }, { "epoch": 0.13021258843365954, "grad_norm": 1.5787788920635155, "learning_rate": 9.739042457473655e-07, "loss": 0.1859, "step": 7491 }, { "epoch": 0.13022997097116237, "grad_norm": 2.918431721840546, "learning_rate": 9.738952698566082e-07, "loss": 0.8, "step": 7492 }, { "epoch": 0.1302473535086652, "grad_norm": 2.2716366544438666, "learning_rate": 9.73886292463821e-07, "loss": 0.336, "step": 7493 }, { "epoch": 0.130264736046168, "grad_norm": 2.0604384011974557, "learning_rate": 9.738773135690317e-07, "loss": 0.3232, "step": 7494 }, { "epoch": 0.13028211858367084, "grad_norm": 1.8484501525097479, "learning_rate": 9.73868333172269e-07, "loss": 0.3823, "step": 7495 }, { "epoch": 0.13029950112117367, "grad_norm": 1.8835410127048595, "learning_rate": 9.738593512735613e-07, "loss": 0.5421, "step": 7496 }, { "epoch": 0.1303168836586765, "grad_norm": 5.216058050411758, "learning_rate": 9.738503678729373e-07, "loss": 1.0461, "step": 7497 }, { "epoch": 0.13033426619617933, "grad_norm": 2.5374578988064234, "learning_rate": 9.73841382970425e-07, "loss": 0.4027, "step": 7498 }, { "epoch": 0.13035164873368213, "grad_norm": 2.1034300255143337, "learning_rate": 9.738323965660536e-07, "loss": 0.5693, "step": 7499 }, { "epoch": 0.13036903127118496, "grad_norm": 1.6336739305270263, "learning_rate": 9.738234086598508e-07, "loss": 0.9726, "step": 7500 }, { "epoch": 0.1303864138086878, "grad_norm": 0.9660020384411627, "learning_rate": 9.738144192518456e-07, "loss": 0.1703, "step": 7501 }, { "epoch": 0.13040379634619062, "grad_norm": 1.9786380108175001, "learning_rate": 9.738054283420665e-07, "loss": 0.5398, "step": 7502 }, { "epoch": 0.13042117888369345, "grad_norm": 3.387058594685001, "learning_rate": 9.737964359305417e-07, "loss": 0.543, "step": 7503 }, { "epoch": 0.13043856142119625, "grad_norm": 1.7580025663946686, "learning_rate": 9.737874420172999e-07, "loss": 0.4031, "step": 7504 }, { "epoch": 0.13045594395869908, "grad_norm": 2.665055612455665, "learning_rate": 9.737784466023692e-07, "loss": 0.5596, "step": 7505 }, { "epoch": 0.1304733264962019, "grad_norm": 1.996280145943252, "learning_rate": 9.73769449685779e-07, "loss": 0.3956, "step": 7506 }, { "epoch": 0.13049070903370474, "grad_norm": 2.3803207462391622, "learning_rate": 9.737604512675569e-07, "loss": 0.3019, "step": 7507 }, { "epoch": 0.13050809157120757, "grad_norm": 2.497828763143124, "learning_rate": 9.737514513477319e-07, "loss": 0.8675, "step": 7508 }, { "epoch": 0.13052547410871038, "grad_norm": 2.9008478280829073, "learning_rate": 9.737424499263324e-07, "loss": 0.5001, "step": 7509 }, { "epoch": 0.1305428566462132, "grad_norm": 2.0213099979053775, "learning_rate": 9.73733447003387e-07, "loss": 0.2403, "step": 7510 }, { "epoch": 0.13056023918371604, "grad_norm": 1.3460461797153063, "learning_rate": 9.737244425789242e-07, "loss": 0.5677, "step": 7511 }, { "epoch": 0.13057762172121887, "grad_norm": 1.880508530838433, "learning_rate": 9.737154366529726e-07, "loss": 0.5182, "step": 7512 }, { "epoch": 0.1305950042587217, "grad_norm": 1.9685051494747992, "learning_rate": 9.737064292255604e-07, "loss": 0.4313, "step": 7513 }, { "epoch": 0.1306123867962245, "grad_norm": 2.276779082203523, "learning_rate": 9.736974202967165e-07, "loss": 0.6289, "step": 7514 }, { "epoch": 0.13062976933372733, "grad_norm": 3.7690891136687448, "learning_rate": 9.736884098664697e-07, "loss": 0.5725, "step": 7515 }, { "epoch": 0.13064715187123016, "grad_norm": 1.9204397667791688, "learning_rate": 9.736793979348478e-07, "loss": 0.4745, "step": 7516 }, { "epoch": 0.130664534408733, "grad_norm": 2.767018899940168, "learning_rate": 9.736703845018802e-07, "loss": 0.445, "step": 7517 }, { "epoch": 0.13068191694623582, "grad_norm": 2.884591686139058, "learning_rate": 9.73661369567595e-07, "loss": 0.5516, "step": 7518 }, { "epoch": 0.13069929948373862, "grad_norm": 3.8727898678546633, "learning_rate": 9.736523531320204e-07, "loss": 0.6802, "step": 7519 }, { "epoch": 0.13071668202124145, "grad_norm": 1.5561576171515108, "learning_rate": 9.736433351951859e-07, "loss": 0.4334, "step": 7520 }, { "epoch": 0.13073406455874428, "grad_norm": 2.449871778368248, "learning_rate": 9.736343157571195e-07, "loss": 0.5255, "step": 7521 }, { "epoch": 0.13075144709624711, "grad_norm": 3.0473310437854058, "learning_rate": 9.736252948178496e-07, "loss": 0.3545, "step": 7522 }, { "epoch": 0.13076882963374994, "grad_norm": 2.986039150721774, "learning_rate": 9.736162723774053e-07, "loss": 0.5506, "step": 7523 }, { "epoch": 0.13078621217125275, "grad_norm": 3.476415007861504, "learning_rate": 9.73607248435815e-07, "loss": 0.2303, "step": 7524 }, { "epoch": 0.13080359470875558, "grad_norm": 1.974599312454428, "learning_rate": 9.735982229931074e-07, "loss": 0.7336, "step": 7525 }, { "epoch": 0.1308209772462584, "grad_norm": 2.231563459478445, "learning_rate": 9.735891960493107e-07, "loss": 0.4962, "step": 7526 }, { "epoch": 0.13083835978376124, "grad_norm": 3.7201518702574403, "learning_rate": 9.73580167604454e-07, "loss": 0.5854, "step": 7527 }, { "epoch": 0.13085574232126407, "grad_norm": 2.114959415455926, "learning_rate": 9.735711376585657e-07, "loss": 0.3548, "step": 7528 }, { "epoch": 0.13087312485876687, "grad_norm": 1.3152126589792243, "learning_rate": 9.735621062116744e-07, "loss": 0.369, "step": 7529 }, { "epoch": 0.1308905073962697, "grad_norm": 2.3820717455060954, "learning_rate": 9.735530732638088e-07, "loss": 0.5838, "step": 7530 }, { "epoch": 0.13090788993377253, "grad_norm": 1.1945944908643, "learning_rate": 9.735440388149973e-07, "loss": 0.5046, "step": 7531 }, { "epoch": 0.13092527247127536, "grad_norm": 3.3014558935618337, "learning_rate": 9.73535002865269e-07, "loss": 0.4031, "step": 7532 }, { "epoch": 0.1309426550087782, "grad_norm": 1.7046265532185816, "learning_rate": 9.735259654146522e-07, "loss": 0.6627, "step": 7533 }, { "epoch": 0.130960037546281, "grad_norm": 2.8416448669275733, "learning_rate": 9.735169264631755e-07, "loss": 0.7016, "step": 7534 }, { "epoch": 0.13097742008378382, "grad_norm": 2.2066147229217985, "learning_rate": 9.735078860108677e-07, "loss": 0.5332, "step": 7535 }, { "epoch": 0.13099480262128665, "grad_norm": 1.450306067323607, "learning_rate": 9.734988440577574e-07, "loss": 0.34, "step": 7536 }, { "epoch": 0.13101218515878948, "grad_norm": 2.2039389777064047, "learning_rate": 9.734898006038732e-07, "loss": 0.3839, "step": 7537 }, { "epoch": 0.13102956769629232, "grad_norm": 4.758421970534507, "learning_rate": 9.73480755649244e-07, "loss": 0.7362, "step": 7538 }, { "epoch": 0.13104695023379512, "grad_norm": 1.8530307958804377, "learning_rate": 9.734717091938982e-07, "loss": 0.7661, "step": 7539 }, { "epoch": 0.13106433277129795, "grad_norm": 1.9761768721250037, "learning_rate": 9.734626612378647e-07, "loss": 0.5041, "step": 7540 }, { "epoch": 0.13108171530880078, "grad_norm": 2.4162235944184576, "learning_rate": 9.734536117811717e-07, "loss": 0.5145, "step": 7541 }, { "epoch": 0.1310990978463036, "grad_norm": 1.4411560997325301, "learning_rate": 9.734445608238485e-07, "loss": 0.2886, "step": 7542 }, { "epoch": 0.13111648038380644, "grad_norm": 2.515096376442059, "learning_rate": 9.734355083659237e-07, "loss": 0.6085, "step": 7543 }, { "epoch": 0.13113386292130924, "grad_norm": 1.452087092558159, "learning_rate": 9.734264544074257e-07, "loss": 0.5402, "step": 7544 }, { "epoch": 0.13115124545881207, "grad_norm": 2.5002904263864885, "learning_rate": 9.73417398948383e-07, "loss": 0.4135, "step": 7545 }, { "epoch": 0.1311686279963149, "grad_norm": 1.685943385294427, "learning_rate": 9.734083419888251e-07, "loss": 0.3401, "step": 7546 }, { "epoch": 0.13118601053381773, "grad_norm": 2.136772719169125, "learning_rate": 9.7339928352878e-07, "loss": 0.6085, "step": 7547 }, { "epoch": 0.13120339307132056, "grad_norm": 2.8811600759047145, "learning_rate": 9.733902235682766e-07, "loss": 0.5645, "step": 7548 }, { "epoch": 0.13122077560882336, "grad_norm": 1.9447565948945411, "learning_rate": 9.733811621073436e-07, "loss": 0.3906, "step": 7549 }, { "epoch": 0.1312381581463262, "grad_norm": 2.0000351581853333, "learning_rate": 9.733720991460098e-07, "loss": 0.4498, "step": 7550 }, { "epoch": 0.13125554068382903, "grad_norm": 1.9183813804226848, "learning_rate": 9.73363034684304e-07, "loss": 0.3098, "step": 7551 }, { "epoch": 0.13127292322133186, "grad_norm": 1.3844697704805753, "learning_rate": 9.733539687222548e-07, "loss": 0.6811, "step": 7552 }, { "epoch": 0.13129030575883469, "grad_norm": 2.350033407019005, "learning_rate": 9.733449012598909e-07, "loss": 0.4569, "step": 7553 }, { "epoch": 0.1313076882963375, "grad_norm": 1.4828805894202748, "learning_rate": 9.73335832297241e-07, "loss": 0.2734, "step": 7554 }, { "epoch": 0.13132507083384032, "grad_norm": 2.0520527313141605, "learning_rate": 9.73326761834334e-07, "loss": 0.6034, "step": 7555 }, { "epoch": 0.13134245337134315, "grad_norm": 2.3197147782332617, "learning_rate": 9.73317689871199e-07, "loss": 0.5356, "step": 7556 }, { "epoch": 0.13135983590884598, "grad_norm": 2.471299302989679, "learning_rate": 9.73308616407864e-07, "loss": 0.7668, "step": 7557 }, { "epoch": 0.1313772184463488, "grad_norm": 1.5377158982481505, "learning_rate": 9.73299541444358e-07, "loss": 0.2704, "step": 7558 }, { "epoch": 0.1313946009838516, "grad_norm": 1.6609245954494107, "learning_rate": 9.7329046498071e-07, "loss": 0.4764, "step": 7559 }, { "epoch": 0.13141198352135444, "grad_norm": 1.9370685110881964, "learning_rate": 9.732813870169487e-07, "loss": 0.4137, "step": 7560 }, { "epoch": 0.13142936605885727, "grad_norm": 1.1793728125570255, "learning_rate": 9.732723075531027e-07, "loss": 0.2895, "step": 7561 }, { "epoch": 0.1314467485963601, "grad_norm": 1.2659524141277425, "learning_rate": 9.732632265892009e-07, "loss": 0.3582, "step": 7562 }, { "epoch": 0.13146413113386293, "grad_norm": 2.8246833001291862, "learning_rate": 9.73254144125272e-07, "loss": 0.488, "step": 7563 }, { "epoch": 0.13148151367136574, "grad_norm": 1.6033225013161285, "learning_rate": 9.73245060161345e-07, "loss": 0.51, "step": 7564 }, { "epoch": 0.13149889620886857, "grad_norm": 1.7100153526033877, "learning_rate": 9.732359746974485e-07, "loss": 0.4174, "step": 7565 }, { "epoch": 0.1315162787463714, "grad_norm": 1.830620427478602, "learning_rate": 9.732268877336113e-07, "loss": 0.585, "step": 7566 }, { "epoch": 0.13153366128387423, "grad_norm": 1.1978236389010992, "learning_rate": 9.732177992698623e-07, "loss": 0.6098, "step": 7567 }, { "epoch": 0.13155104382137706, "grad_norm": 1.731554903559696, "learning_rate": 9.732087093062304e-07, "loss": 0.3049, "step": 7568 }, { "epoch": 0.13156842635887986, "grad_norm": 1.127948606738071, "learning_rate": 9.73199617842744e-07, "loss": 0.3575, "step": 7569 }, { "epoch": 0.1315858088963827, "grad_norm": 1.8821707120523585, "learning_rate": 9.731905248794324e-07, "loss": 0.7254, "step": 7570 }, { "epoch": 0.13160319143388552, "grad_norm": 2.304679213240769, "learning_rate": 9.731814304163241e-07, "loss": 0.6674, "step": 7571 }, { "epoch": 0.13162057397138835, "grad_norm": 2.179090765222237, "learning_rate": 9.73172334453448e-07, "loss": 0.5928, "step": 7572 }, { "epoch": 0.13163795650889118, "grad_norm": 2.007121223041612, "learning_rate": 9.73163236990833e-07, "loss": 0.3572, "step": 7573 }, { "epoch": 0.13165533904639398, "grad_norm": 1.5371430168043867, "learning_rate": 9.731541380285077e-07, "loss": 0.3329, "step": 7574 }, { "epoch": 0.1316727215838968, "grad_norm": 2.2775007927309403, "learning_rate": 9.731450375665014e-07, "loss": 0.4585, "step": 7575 }, { "epoch": 0.13169010412139964, "grad_norm": 1.4066712405218624, "learning_rate": 9.731359356048426e-07, "loss": 0.2413, "step": 7576 }, { "epoch": 0.13170748665890247, "grad_norm": 2.2239645689981193, "learning_rate": 9.731268321435602e-07, "loss": 0.3319, "step": 7577 }, { "epoch": 0.1317248691964053, "grad_norm": 1.7807605052966913, "learning_rate": 9.73117727182683e-07, "loss": 0.3144, "step": 7578 }, { "epoch": 0.1317422517339081, "grad_norm": 3.0187595900581363, "learning_rate": 9.7310862072224e-07, "loss": 0.6129, "step": 7579 }, { "epoch": 0.13175963427141094, "grad_norm": 2.4184672914252494, "learning_rate": 9.730995127622602e-07, "loss": 0.5558, "step": 7580 }, { "epoch": 0.13177701680891377, "grad_norm": 1.9407537559272297, "learning_rate": 9.73090403302772e-07, "loss": 0.6492, "step": 7581 }, { "epoch": 0.1317943993464166, "grad_norm": 1.3279999217402274, "learning_rate": 9.730812923438047e-07, "loss": 0.3928, "step": 7582 }, { "epoch": 0.13181178188391943, "grad_norm": 1.585969020212982, "learning_rate": 9.730721798853868e-07, "loss": 0.6369, "step": 7583 }, { "epoch": 0.13182916442142223, "grad_norm": 2.031913431656473, "learning_rate": 9.730630659275477e-07, "loss": 0.5106, "step": 7584 }, { "epoch": 0.13184654695892506, "grad_norm": 3.222292909765489, "learning_rate": 9.73053950470316e-07, "loss": 0.4731, "step": 7585 }, { "epoch": 0.1318639294964279, "grad_norm": 1.4103056325448802, "learning_rate": 9.730448335137203e-07, "loss": 0.4828, "step": 7586 }, { "epoch": 0.13188131203393072, "grad_norm": 2.4827313860659372, "learning_rate": 9.7303571505779e-07, "loss": 1.0395, "step": 7587 }, { "epoch": 0.13189869457143355, "grad_norm": 1.3778780457757767, "learning_rate": 9.730265951025535e-07, "loss": 0.5582, "step": 7588 }, { "epoch": 0.13191607710893635, "grad_norm": 1.8447824068996264, "learning_rate": 9.730174736480403e-07, "loss": 0.4738, "step": 7589 }, { "epoch": 0.13193345964643918, "grad_norm": 1.9024403722517322, "learning_rate": 9.730083506942789e-07, "loss": 0.6726, "step": 7590 }, { "epoch": 0.131950842183942, "grad_norm": 1.8564862128157054, "learning_rate": 9.729992262412983e-07, "loss": 0.4531, "step": 7591 }, { "epoch": 0.13196822472144484, "grad_norm": 2.880384277882485, "learning_rate": 9.729901002891274e-07, "loss": 0.434, "step": 7592 }, { "epoch": 0.13198560725894767, "grad_norm": 4.240100887443488, "learning_rate": 9.72980972837795e-07, "loss": 0.6692, "step": 7593 }, { "epoch": 0.13200298979645048, "grad_norm": 1.8166337004140358, "learning_rate": 9.729718438873304e-07, "loss": 0.37, "step": 7594 }, { "epoch": 0.1320203723339533, "grad_norm": 2.158387483403576, "learning_rate": 9.729627134377623e-07, "loss": 0.3484, "step": 7595 }, { "epoch": 0.13203775487145614, "grad_norm": 1.869474422317231, "learning_rate": 9.729535814891197e-07, "loss": 0.4718, "step": 7596 }, { "epoch": 0.13205513740895897, "grad_norm": 1.906263310426771, "learning_rate": 9.729444480414313e-07, "loss": 0.7186, "step": 7597 }, { "epoch": 0.1320725199464618, "grad_norm": 2.4749064999788146, "learning_rate": 9.729353130947264e-07, "loss": 0.6303, "step": 7598 }, { "epoch": 0.1320899024839646, "grad_norm": 2.60700872849639, "learning_rate": 9.729261766490338e-07, "loss": 0.385, "step": 7599 }, { "epoch": 0.13210728502146743, "grad_norm": 1.479213561838857, "learning_rate": 9.729170387043824e-07, "loss": 0.5006, "step": 7600 }, { "epoch": 0.13212466755897026, "grad_norm": 1.7338336923539226, "learning_rate": 9.729078992608012e-07, "loss": 0.8125, "step": 7601 }, { "epoch": 0.1321420500964731, "grad_norm": 1.4253995265973345, "learning_rate": 9.728987583183194e-07, "loss": 0.5348, "step": 7602 }, { "epoch": 0.13215943263397592, "grad_norm": 1.988994087672061, "learning_rate": 9.728896158769656e-07, "loss": 0.303, "step": 7603 }, { "epoch": 0.13217681517147872, "grad_norm": 1.6292287712788498, "learning_rate": 9.728804719367691e-07, "loss": 0.3203, "step": 7604 }, { "epoch": 0.13219419770898155, "grad_norm": 3.8987547068286847, "learning_rate": 9.728713264977587e-07, "loss": 0.6955, "step": 7605 }, { "epoch": 0.13221158024648438, "grad_norm": 1.4750207418163455, "learning_rate": 9.728621795599633e-07, "loss": 0.3608, "step": 7606 }, { "epoch": 0.13222896278398721, "grad_norm": 1.4558797504977807, "learning_rate": 9.728530311234119e-07, "loss": 0.4415, "step": 7607 }, { "epoch": 0.13224634532149004, "grad_norm": 1.4246264774651647, "learning_rate": 9.728438811881337e-07, "loss": 0.3986, "step": 7608 }, { "epoch": 0.13226372785899285, "grad_norm": 1.4427742095546545, "learning_rate": 9.728347297541576e-07, "loss": 0.376, "step": 7609 }, { "epoch": 0.13228111039649568, "grad_norm": 2.331920107248938, "learning_rate": 9.728255768215127e-07, "loss": 0.5438, "step": 7610 }, { "epoch": 0.1322984929339985, "grad_norm": 1.8974020613641012, "learning_rate": 9.72816422390228e-07, "loss": 0.5418, "step": 7611 }, { "epoch": 0.13231587547150134, "grad_norm": 2.296886408182791, "learning_rate": 9.728072664603323e-07, "loss": 0.6349, "step": 7612 }, { "epoch": 0.13233325800900417, "grad_norm": 1.990498826576853, "learning_rate": 9.727981090318546e-07, "loss": 0.4695, "step": 7613 }, { "epoch": 0.13235064054650697, "grad_norm": 2.504459425477434, "learning_rate": 9.727889501048243e-07, "loss": 0.4795, "step": 7614 }, { "epoch": 0.1323680230840098, "grad_norm": 2.2141134377010174, "learning_rate": 9.727797896792701e-07, "loss": 0.6198, "step": 7615 }, { "epoch": 0.13238540562151263, "grad_norm": 1.4249774910428417, "learning_rate": 9.727706277552212e-07, "loss": 0.4049, "step": 7616 }, { "epoch": 0.13240278815901546, "grad_norm": 2.813472682292849, "learning_rate": 9.727614643327067e-07, "loss": 0.8575, "step": 7617 }, { "epoch": 0.1324201706965183, "grad_norm": 1.269400642592087, "learning_rate": 9.727522994117553e-07, "loss": 0.4011, "step": 7618 }, { "epoch": 0.1324375532340211, "grad_norm": 1.4315843608030723, "learning_rate": 9.727431329923965e-07, "loss": 0.2376, "step": 7619 }, { "epoch": 0.13245493577152392, "grad_norm": 2.885349954746813, "learning_rate": 9.727339650746588e-07, "loss": 0.545, "step": 7620 }, { "epoch": 0.13247231830902675, "grad_norm": 1.3430848555020154, "learning_rate": 9.72724795658572e-07, "loss": 0.3309, "step": 7621 }, { "epoch": 0.13248970084652958, "grad_norm": 1.988769546119718, "learning_rate": 9.727156247441646e-07, "loss": 0.5292, "step": 7622 }, { "epoch": 0.13250708338403241, "grad_norm": 1.6106428128834234, "learning_rate": 9.727064523314657e-07, "loss": 0.4095, "step": 7623 }, { "epoch": 0.13252446592153522, "grad_norm": 1.47256796536059, "learning_rate": 9.726972784205045e-07, "loss": 0.6218, "step": 7624 }, { "epoch": 0.13254184845903805, "grad_norm": 2.044713966978766, "learning_rate": 9.726881030113102e-07, "loss": 0.6511, "step": 7625 }, { "epoch": 0.13255923099654088, "grad_norm": 2.038508974595976, "learning_rate": 9.726789261039115e-07, "loss": 0.2394, "step": 7626 }, { "epoch": 0.1325766135340437, "grad_norm": 2.409303475703564, "learning_rate": 9.72669747698338e-07, "loss": 0.752, "step": 7627 }, { "epoch": 0.13259399607154654, "grad_norm": 1.1680363040238486, "learning_rate": 9.726605677946186e-07, "loss": 0.3385, "step": 7628 }, { "epoch": 0.13261137860904934, "grad_norm": 1.7803649430050243, "learning_rate": 9.72651386392782e-07, "loss": 0.4709, "step": 7629 }, { "epoch": 0.13262876114655217, "grad_norm": 1.5222994542513817, "learning_rate": 9.726422034928577e-07, "loss": 0.23, "step": 7630 }, { "epoch": 0.132646143684055, "grad_norm": 2.022253603651732, "learning_rate": 9.72633019094875e-07, "loss": 0.7181, "step": 7631 }, { "epoch": 0.13266352622155783, "grad_norm": 1.5876623775842584, "learning_rate": 9.726238331988623e-07, "loss": 0.4909, "step": 7632 }, { "epoch": 0.13268090875906063, "grad_norm": 1.8656804001774638, "learning_rate": 9.726146458048493e-07, "loss": 0.3891, "step": 7633 }, { "epoch": 0.13269829129656346, "grad_norm": 1.855221897448483, "learning_rate": 9.726054569128653e-07, "loss": 0.5436, "step": 7634 }, { "epoch": 0.1327156738340663, "grad_norm": 2.465542819583693, "learning_rate": 9.725962665229389e-07, "loss": 0.5676, "step": 7635 }, { "epoch": 0.13273305637156912, "grad_norm": 1.3054050317444204, "learning_rate": 9.725870746350993e-07, "loss": 0.5278, "step": 7636 }, { "epoch": 0.13275043890907195, "grad_norm": 2.4340236776883186, "learning_rate": 9.72577881249376e-07, "loss": 0.4657, "step": 7637 }, { "epoch": 0.13276782144657476, "grad_norm": 3.161124081035567, "learning_rate": 9.725686863657977e-07, "loss": 0.3657, "step": 7638 }, { "epoch": 0.1327852039840776, "grad_norm": 1.709514058431674, "learning_rate": 9.725594899843937e-07, "loss": 0.4334, "step": 7639 }, { "epoch": 0.13280258652158042, "grad_norm": 1.883893637372528, "learning_rate": 9.725502921051933e-07, "loss": 0.5331, "step": 7640 }, { "epoch": 0.13281996905908325, "grad_norm": 2.0956372086591046, "learning_rate": 9.725410927282256e-07, "loss": 0.443, "step": 7641 }, { "epoch": 0.13283735159658608, "grad_norm": 1.444336632227615, "learning_rate": 9.725318918535197e-07, "loss": 0.3486, "step": 7642 }, { "epoch": 0.13285473413408888, "grad_norm": 1.9118487092963556, "learning_rate": 9.725226894811048e-07, "loss": 0.309, "step": 7643 }, { "epoch": 0.1328721166715917, "grad_norm": 2.1788785402226476, "learning_rate": 9.725134856110099e-07, "loss": 0.6552, "step": 7644 }, { "epoch": 0.13288949920909454, "grad_norm": 2.345243883493999, "learning_rate": 9.725042802432643e-07, "loss": 0.3764, "step": 7645 }, { "epoch": 0.13290688174659737, "grad_norm": 1.4940124620855921, "learning_rate": 9.724950733778972e-07, "loss": 0.5666, "step": 7646 }, { "epoch": 0.1329242642841002, "grad_norm": 2.2933782583047724, "learning_rate": 9.724858650149377e-07, "loss": 0.3453, "step": 7647 }, { "epoch": 0.132941646821603, "grad_norm": 2.044891484530836, "learning_rate": 9.724766551544153e-07, "loss": 0.1985, "step": 7648 }, { "epoch": 0.13295902935910583, "grad_norm": 2.6468568521832903, "learning_rate": 9.724674437963587e-07, "loss": 0.5316, "step": 7649 }, { "epoch": 0.13297641189660867, "grad_norm": 1.725247616455672, "learning_rate": 9.724582309407973e-07, "loss": 0.4019, "step": 7650 }, { "epoch": 0.1329937944341115, "grad_norm": 2.938988144335082, "learning_rate": 9.724490165877605e-07, "loss": 0.4016, "step": 7651 }, { "epoch": 0.13301117697161433, "grad_norm": 1.7004447205170774, "learning_rate": 9.724398007372773e-07, "loss": 0.5557, "step": 7652 }, { "epoch": 0.13302855950911713, "grad_norm": 1.5097865139766227, "learning_rate": 9.72430583389377e-07, "loss": 0.5331, "step": 7653 }, { "epoch": 0.13304594204661996, "grad_norm": 1.9409951522051148, "learning_rate": 9.724213645440889e-07, "loss": 0.5773, "step": 7654 }, { "epoch": 0.1330633245841228, "grad_norm": 1.2455416429856663, "learning_rate": 9.724121442014418e-07, "loss": 0.6516, "step": 7655 }, { "epoch": 0.13308070712162562, "grad_norm": 1.3928158569198896, "learning_rate": 9.724029223614654e-07, "loss": 0.2448, "step": 7656 }, { "epoch": 0.13309808965912845, "grad_norm": 1.745258595371221, "learning_rate": 9.723936990241886e-07, "loss": 0.2672, "step": 7657 }, { "epoch": 0.13311547219663125, "grad_norm": 1.6890244866666626, "learning_rate": 9.72384474189641e-07, "loss": 0.6456, "step": 7658 }, { "epoch": 0.13313285473413408, "grad_norm": 2.741162881300549, "learning_rate": 9.723752478578514e-07, "loss": 0.4917, "step": 7659 }, { "epoch": 0.1331502372716369, "grad_norm": 2.90953510033853, "learning_rate": 9.723660200288493e-07, "loss": 0.8632, "step": 7660 }, { "epoch": 0.13316761980913974, "grad_norm": 1.9656760778298783, "learning_rate": 9.723567907026638e-07, "loss": 0.2418, "step": 7661 }, { "epoch": 0.13318500234664257, "grad_norm": 2.7876433485086234, "learning_rate": 9.723475598793244e-07, "loss": 0.3682, "step": 7662 }, { "epoch": 0.13320238488414538, "grad_norm": 1.8282815289435843, "learning_rate": 9.7233832755886e-07, "loss": 0.3626, "step": 7663 }, { "epoch": 0.1332197674216482, "grad_norm": 1.925014567211144, "learning_rate": 9.723290937413005e-07, "loss": 0.3378, "step": 7664 }, { "epoch": 0.13323714995915104, "grad_norm": 1.9666535756672334, "learning_rate": 9.723198584266745e-07, "loss": 0.6459, "step": 7665 }, { "epoch": 0.13325453249665387, "grad_norm": 1.6040324274804463, "learning_rate": 9.723106216150114e-07, "loss": 0.4222, "step": 7666 }, { "epoch": 0.1332719150341567, "grad_norm": 1.6206116303545146, "learning_rate": 9.723013833063407e-07, "loss": 0.3352, "step": 7667 }, { "epoch": 0.1332892975716595, "grad_norm": 1.5090558513535672, "learning_rate": 9.722921435006917e-07, "loss": 0.2807, "step": 7668 }, { "epoch": 0.13330668010916233, "grad_norm": 2.1980524680947573, "learning_rate": 9.722829021980935e-07, "loss": 0.5986, "step": 7669 }, { "epoch": 0.13332406264666516, "grad_norm": 2.0696831878475437, "learning_rate": 9.722736593985752e-07, "loss": 0.4317, "step": 7670 }, { "epoch": 0.133341445184168, "grad_norm": 2.7674121141155323, "learning_rate": 9.722644151021665e-07, "loss": 0.5108, "step": 7671 }, { "epoch": 0.13335882772167082, "grad_norm": 2.257892317814619, "learning_rate": 9.722551693088967e-07, "loss": 0.5045, "step": 7672 }, { "epoch": 0.13337621025917362, "grad_norm": 2.0694644215059204, "learning_rate": 9.72245922018795e-07, "loss": 0.3558, "step": 7673 }, { "epoch": 0.13339359279667645, "grad_norm": 2.101591590855, "learning_rate": 9.722366732318902e-07, "loss": 0.3504, "step": 7674 }, { "epoch": 0.13341097533417928, "grad_norm": 1.387236002275673, "learning_rate": 9.722274229482127e-07, "loss": 0.538, "step": 7675 }, { "epoch": 0.1334283578716821, "grad_norm": 2.3159559571425747, "learning_rate": 9.722181711677906e-07, "loss": 0.2994, "step": 7676 }, { "epoch": 0.13344574040918494, "grad_norm": 2.5141334670156015, "learning_rate": 9.722089178906541e-07, "loss": 0.4024, "step": 7677 }, { "epoch": 0.13346312294668775, "grad_norm": 3.778993269036782, "learning_rate": 9.721996631168322e-07, "loss": 0.3277, "step": 7678 }, { "epoch": 0.13348050548419058, "grad_norm": 1.6338528915900337, "learning_rate": 9.721904068463544e-07, "loss": 0.3706, "step": 7679 }, { "epoch": 0.1334978880216934, "grad_norm": 1.4786520083339245, "learning_rate": 9.721811490792498e-07, "loss": 0.4092, "step": 7680 }, { "epoch": 0.13351527055919624, "grad_norm": 1.7921550979662724, "learning_rate": 9.721718898155479e-07, "loss": 0.3069, "step": 7681 }, { "epoch": 0.13353265309669907, "grad_norm": 1.8813734701378373, "learning_rate": 9.721626290552778e-07, "loss": 0.6477, "step": 7682 }, { "epoch": 0.13355003563420187, "grad_norm": 1.667446388269799, "learning_rate": 9.721533667984694e-07, "loss": 0.3397, "step": 7683 }, { "epoch": 0.1335674181717047, "grad_norm": 1.613320629743567, "learning_rate": 9.721441030451515e-07, "loss": 0.2335, "step": 7684 }, { "epoch": 0.13358480070920753, "grad_norm": 3.6251009450444656, "learning_rate": 9.721348377953538e-07, "loss": 0.405, "step": 7685 }, { "epoch": 0.13360218324671036, "grad_norm": 2.6582848855028924, "learning_rate": 9.721255710491053e-07, "loss": 0.2928, "step": 7686 }, { "epoch": 0.1336195657842132, "grad_norm": 2.175113061414117, "learning_rate": 9.72116302806436e-07, "loss": 0.2468, "step": 7687 }, { "epoch": 0.133636948321716, "grad_norm": 6.840218784783159, "learning_rate": 9.721070330673747e-07, "loss": 0.5548, "step": 7688 }, { "epoch": 0.13365433085921882, "grad_norm": 1.9191048140395666, "learning_rate": 9.72097761831951e-07, "loss": 0.4738, "step": 7689 }, { "epoch": 0.13367171339672165, "grad_norm": 1.3407149002843182, "learning_rate": 9.72088489100194e-07, "loss": 0.4853, "step": 7690 }, { "epoch": 0.13368909593422448, "grad_norm": 1.3752224734568865, "learning_rate": 9.720792148721337e-07, "loss": 0.6681, "step": 7691 }, { "epoch": 0.1337064784717273, "grad_norm": 2.5563336458541133, "learning_rate": 9.720699391477989e-07, "loss": 0.5774, "step": 7692 }, { "epoch": 0.13372386100923012, "grad_norm": 2.4428700473858274, "learning_rate": 9.720606619272193e-07, "loss": 0.6871, "step": 7693 }, { "epoch": 0.13374124354673295, "grad_norm": 1.6795441343808462, "learning_rate": 9.720513832104245e-07, "loss": 0.3321, "step": 7694 }, { "epoch": 0.13375862608423578, "grad_norm": 1.7808930884398246, "learning_rate": 9.720421029974434e-07, "loss": 0.5129, "step": 7695 }, { "epoch": 0.1337760086217386, "grad_norm": 1.9615832005936489, "learning_rate": 9.720328212883058e-07, "loss": 0.2931, "step": 7696 }, { "epoch": 0.13379339115924144, "grad_norm": 2.4068369159742877, "learning_rate": 9.720235380830408e-07, "loss": 0.7794, "step": 7697 }, { "epoch": 0.13381077369674424, "grad_norm": 1.7561501351030417, "learning_rate": 9.720142533816782e-07, "loss": 0.6138, "step": 7698 }, { "epoch": 0.13382815623424707, "grad_norm": 1.4718827340927925, "learning_rate": 9.720049671842471e-07, "loss": 0.356, "step": 7699 }, { "epoch": 0.1338455387717499, "grad_norm": 1.777535021073089, "learning_rate": 9.719956794907773e-07, "loss": 0.7725, "step": 7700 }, { "epoch": 0.13386292130925273, "grad_norm": 2.47833986755337, "learning_rate": 9.719863903012978e-07, "loss": 0.885, "step": 7701 }, { "epoch": 0.13388030384675556, "grad_norm": 1.6301081696839403, "learning_rate": 9.719770996158383e-07, "loss": 0.4299, "step": 7702 }, { "epoch": 0.13389768638425836, "grad_norm": 1.6406664724289879, "learning_rate": 9.719678074344281e-07, "loss": 0.4642, "step": 7703 }, { "epoch": 0.1339150689217612, "grad_norm": 1.5593119009026122, "learning_rate": 9.71958513757097e-07, "loss": 0.4136, "step": 7704 }, { "epoch": 0.13393245145926402, "grad_norm": 1.3344342192951657, "learning_rate": 9.719492185838742e-07, "loss": 0.2591, "step": 7705 }, { "epoch": 0.13394983399676685, "grad_norm": 0.9135239115546674, "learning_rate": 9.71939921914789e-07, "loss": 0.3044, "step": 7706 }, { "epoch": 0.13396721653426968, "grad_norm": 1.2131268412966558, "learning_rate": 9.71930623749871e-07, "loss": 0.4159, "step": 7707 }, { "epoch": 0.1339845990717725, "grad_norm": 1.861120982207833, "learning_rate": 9.7192132408915e-07, "loss": 0.2259, "step": 7708 }, { "epoch": 0.13400198160927532, "grad_norm": 2.0841078647535656, "learning_rate": 9.71912022932655e-07, "loss": 0.3792, "step": 7709 }, { "epoch": 0.13401936414677815, "grad_norm": 3.1647270825861424, "learning_rate": 9.719027202804156e-07, "loss": 0.2985, "step": 7710 }, { "epoch": 0.13403674668428098, "grad_norm": 1.286319816782837, "learning_rate": 9.718934161324613e-07, "loss": 0.3266, "step": 7711 }, { "epoch": 0.1340541292217838, "grad_norm": 1.3471524811217443, "learning_rate": 9.71884110488822e-07, "loss": 0.2433, "step": 7712 }, { "epoch": 0.1340715117592866, "grad_norm": 2.0041192716976255, "learning_rate": 9.718748033495266e-07, "loss": 0.4395, "step": 7713 }, { "epoch": 0.13408889429678944, "grad_norm": 1.7696639856192975, "learning_rate": 9.718654947146048e-07, "loss": 0.3225, "step": 7714 }, { "epoch": 0.13410627683429227, "grad_norm": 1.7081862153895226, "learning_rate": 9.71856184584086e-07, "loss": 0.3616, "step": 7715 }, { "epoch": 0.1341236593717951, "grad_norm": 1.4039970372846071, "learning_rate": 9.718468729580001e-07, "loss": 0.241, "step": 7716 }, { "epoch": 0.13414104190929793, "grad_norm": 1.6899877606851827, "learning_rate": 9.718375598363764e-07, "loss": 0.2624, "step": 7717 }, { "epoch": 0.13415842444680073, "grad_norm": 1.907309013427225, "learning_rate": 9.718282452192441e-07, "loss": 0.4474, "step": 7718 }, { "epoch": 0.13417580698430356, "grad_norm": 2.228852105900247, "learning_rate": 9.718189291066333e-07, "loss": 0.571, "step": 7719 }, { "epoch": 0.1341931895218064, "grad_norm": 2.9305234322303266, "learning_rate": 9.718096114985729e-07, "loss": 0.3067, "step": 7720 }, { "epoch": 0.13421057205930922, "grad_norm": 1.7906764429486424, "learning_rate": 9.71800292395093e-07, "loss": 0.5279, "step": 7721 }, { "epoch": 0.13422795459681205, "grad_norm": 1.67237235426464, "learning_rate": 9.71790971796223e-07, "loss": 0.3935, "step": 7722 }, { "epoch": 0.13424533713431486, "grad_norm": 1.5391032609057205, "learning_rate": 9.71781649701992e-07, "loss": 0.4186, "step": 7723 }, { "epoch": 0.1342627196718177, "grad_norm": 1.699042163600386, "learning_rate": 9.717723261124302e-07, "loss": 0.4981, "step": 7724 }, { "epoch": 0.13428010220932052, "grad_norm": 1.5088364015109623, "learning_rate": 9.717630010275668e-07, "loss": 0.6368, "step": 7725 }, { "epoch": 0.13429748474682335, "grad_norm": 1.913323374941753, "learning_rate": 9.717536744474312e-07, "loss": 0.5251, "step": 7726 }, { "epoch": 0.13431486728432618, "grad_norm": 3.2888811780484386, "learning_rate": 9.717443463720534e-07, "loss": 0.5334, "step": 7727 }, { "epoch": 0.13433224982182898, "grad_norm": 1.2845590662060506, "learning_rate": 9.717350168014625e-07, "loss": 0.573, "step": 7728 }, { "epoch": 0.1343496323593318, "grad_norm": 2.7927139107032484, "learning_rate": 9.717256857356884e-07, "loss": 0.4597, "step": 7729 }, { "epoch": 0.13436701489683464, "grad_norm": 2.0486452323533793, "learning_rate": 9.717163531747606e-07, "loss": 0.6086, "step": 7730 }, { "epoch": 0.13438439743433747, "grad_norm": 1.8472178651950024, "learning_rate": 9.717070191187086e-07, "loss": 0.4745, "step": 7731 }, { "epoch": 0.1344017799718403, "grad_norm": 2.1718180167399517, "learning_rate": 9.716976835675622e-07, "loss": 0.5426, "step": 7732 }, { "epoch": 0.1344191625093431, "grad_norm": 2.773814643525524, "learning_rate": 9.716883465213505e-07, "loss": 0.3694, "step": 7733 }, { "epoch": 0.13443654504684593, "grad_norm": 1.9620004106287272, "learning_rate": 9.716790079801037e-07, "loss": 0.2196, "step": 7734 }, { "epoch": 0.13445392758434876, "grad_norm": 2.106591013484451, "learning_rate": 9.71669667943851e-07, "loss": 0.6351, "step": 7735 }, { "epoch": 0.1344713101218516, "grad_norm": 1.9619152720030633, "learning_rate": 9.716603264126222e-07, "loss": 0.6646, "step": 7736 }, { "epoch": 0.13448869265935443, "grad_norm": 1.4205053574076743, "learning_rate": 9.716509833864466e-07, "loss": 0.3748, "step": 7737 }, { "epoch": 0.13450607519685723, "grad_norm": 2.0499294452535017, "learning_rate": 9.716416388653543e-07, "loss": 0.4267, "step": 7738 }, { "epoch": 0.13452345773436006, "grad_norm": 2.5246570266496735, "learning_rate": 9.716322928493745e-07, "loss": 0.6545, "step": 7739 }, { "epoch": 0.1345408402718629, "grad_norm": 6.731532318518848, "learning_rate": 9.71622945338537e-07, "loss": 0.7585, "step": 7740 }, { "epoch": 0.13455822280936572, "grad_norm": 2.2585927350388815, "learning_rate": 9.716135963328716e-07, "loss": 0.3105, "step": 7741 }, { "epoch": 0.13457560534686855, "grad_norm": 1.4773553331933014, "learning_rate": 9.716042458324074e-07, "loss": 0.3624, "step": 7742 }, { "epoch": 0.13459298788437135, "grad_norm": 1.2517595949725997, "learning_rate": 9.715948938371747e-07, "loss": 0.4766, "step": 7743 }, { "epoch": 0.13461037042187418, "grad_norm": 1.918533181233293, "learning_rate": 9.715855403472026e-07, "loss": 0.2727, "step": 7744 }, { "epoch": 0.134627752959377, "grad_norm": 1.1997077232500368, "learning_rate": 9.71576185362521e-07, "loss": 0.3128, "step": 7745 }, { "epoch": 0.13464513549687984, "grad_norm": 2.1364463264414115, "learning_rate": 9.715668288831598e-07, "loss": 0.6907, "step": 7746 }, { "epoch": 0.13466251803438267, "grad_norm": 2.2248324172809073, "learning_rate": 9.71557470909148e-07, "loss": 0.5257, "step": 7747 }, { "epoch": 0.13467990057188547, "grad_norm": 2.79492350915815, "learning_rate": 9.715481114405159e-07, "loss": 0.6065, "step": 7748 }, { "epoch": 0.1346972831093883, "grad_norm": 1.4064681343186214, "learning_rate": 9.715387504772928e-07, "loss": 0.465, "step": 7749 }, { "epoch": 0.13471466564689114, "grad_norm": 2.2036557664531107, "learning_rate": 9.715293880195082e-07, "loss": 0.4591, "step": 7750 }, { "epoch": 0.13473204818439397, "grad_norm": 2.3377243530251244, "learning_rate": 9.715200240671923e-07, "loss": 0.3272, "step": 7751 }, { "epoch": 0.1347494307218968, "grad_norm": 1.7881565541751598, "learning_rate": 9.715106586203748e-07, "loss": 0.2961, "step": 7752 }, { "epoch": 0.1347668132593996, "grad_norm": 1.344361748358763, "learning_rate": 9.715012916790846e-07, "loss": 0.3922, "step": 7753 }, { "epoch": 0.13478419579690243, "grad_norm": 1.5509234355971675, "learning_rate": 9.71491923243352e-07, "loss": 0.3593, "step": 7754 }, { "epoch": 0.13480157833440526, "grad_norm": 2.9768464347704193, "learning_rate": 9.71482553313207e-07, "loss": 0.4431, "step": 7755 }, { "epoch": 0.1348189608719081, "grad_norm": 1.6762086331382458, "learning_rate": 9.714731818886785e-07, "loss": 0.3734, "step": 7756 }, { "epoch": 0.13483634340941092, "grad_norm": 1.776029454784392, "learning_rate": 9.714638089697965e-07, "loss": 0.4975, "step": 7757 }, { "epoch": 0.13485372594691372, "grad_norm": 1.2327265491182695, "learning_rate": 9.71454434556591e-07, "loss": 0.6726, "step": 7758 }, { "epoch": 0.13487110848441655, "grad_norm": 1.232194498658154, "learning_rate": 9.714450586490916e-07, "loss": 0.679, "step": 7759 }, { "epoch": 0.13488849102191938, "grad_norm": 1.8111400490019252, "learning_rate": 9.71435681247328e-07, "loss": 0.3165, "step": 7760 }, { "epoch": 0.1349058735594222, "grad_norm": 1.6150714630926033, "learning_rate": 9.714263023513294e-07, "loss": 0.3148, "step": 7761 }, { "epoch": 0.13492325609692504, "grad_norm": 2.0484965130541846, "learning_rate": 9.714169219611264e-07, "loss": 0.2982, "step": 7762 }, { "epoch": 0.13494063863442785, "grad_norm": 1.6367787955291135, "learning_rate": 9.714075400767481e-07, "loss": 0.4491, "step": 7763 }, { "epoch": 0.13495802117193068, "grad_norm": 1.5627021342747558, "learning_rate": 9.713981566982245e-07, "loss": 0.2862, "step": 7764 }, { "epoch": 0.1349754037094335, "grad_norm": 1.8097265088522294, "learning_rate": 9.713887718255854e-07, "loss": 0.4229, "step": 7765 }, { "epoch": 0.13499278624693634, "grad_norm": 1.6620499038832899, "learning_rate": 9.713793854588602e-07, "loss": 0.3079, "step": 7766 }, { "epoch": 0.13501016878443917, "grad_norm": 1.7757821724951766, "learning_rate": 9.71369997598079e-07, "loss": 0.3663, "step": 7767 }, { "epoch": 0.13502755132194197, "grad_norm": 1.3431103422358543, "learning_rate": 9.713606082432714e-07, "loss": 0.4911, "step": 7768 }, { "epoch": 0.1350449338594448, "grad_norm": 3.069562014626932, "learning_rate": 9.713512173944672e-07, "loss": 0.5612, "step": 7769 }, { "epoch": 0.13506231639694763, "grad_norm": 1.5808292941812168, "learning_rate": 9.713418250516962e-07, "loss": 0.5938, "step": 7770 }, { "epoch": 0.13507969893445046, "grad_norm": 2.1532066871778395, "learning_rate": 9.713324312149881e-07, "loss": 0.5629, "step": 7771 }, { "epoch": 0.13509708147195326, "grad_norm": 2.355819640820696, "learning_rate": 9.713230358843726e-07, "loss": 0.7837, "step": 7772 }, { "epoch": 0.1351144640094561, "grad_norm": 1.5231641212698561, "learning_rate": 9.713136390598799e-07, "loss": 0.4071, "step": 7773 }, { "epoch": 0.13513184654695892, "grad_norm": 1.4780536455019027, "learning_rate": 9.713042407415393e-07, "loss": 0.3178, "step": 7774 }, { "epoch": 0.13514922908446175, "grad_norm": 2.204684178336807, "learning_rate": 9.712948409293807e-07, "loss": 0.8197, "step": 7775 }, { "epoch": 0.13516661162196458, "grad_norm": 2.386139439612232, "learning_rate": 9.71285439623434e-07, "loss": 0.4215, "step": 7776 }, { "epoch": 0.13518399415946739, "grad_norm": 1.255018424416271, "learning_rate": 9.71276036823729e-07, "loss": 0.3661, "step": 7777 }, { "epoch": 0.13520137669697022, "grad_norm": 1.9072530393321443, "learning_rate": 9.712666325302954e-07, "loss": 0.3283, "step": 7778 }, { "epoch": 0.13521875923447305, "grad_norm": 2.761852513859465, "learning_rate": 9.71257226743163e-07, "loss": 0.7223, "step": 7779 }, { "epoch": 0.13523614177197588, "grad_norm": 1.9318636563107086, "learning_rate": 9.712478194623617e-07, "loss": 0.6557, "step": 7780 }, { "epoch": 0.1352535243094787, "grad_norm": 1.7361751076733212, "learning_rate": 9.712384106879213e-07, "loss": 0.4616, "step": 7781 }, { "epoch": 0.1352709068469815, "grad_norm": 1.9664608222008644, "learning_rate": 9.712290004198716e-07, "loss": 0.4004, "step": 7782 }, { "epoch": 0.13528828938448434, "grad_norm": 1.687221494482278, "learning_rate": 9.712195886582425e-07, "loss": 0.4716, "step": 7783 }, { "epoch": 0.13530567192198717, "grad_norm": 1.9775859562077147, "learning_rate": 9.712101754030637e-07, "loss": 0.2446, "step": 7784 }, { "epoch": 0.13532305445949, "grad_norm": 1.1693070708496585, "learning_rate": 9.71200760654365e-07, "loss": 0.2928, "step": 7785 }, { "epoch": 0.13534043699699283, "grad_norm": 4.773894647615696, "learning_rate": 9.711913444121765e-07, "loss": 0.7497, "step": 7786 }, { "epoch": 0.13535781953449563, "grad_norm": 1.9697106946151852, "learning_rate": 9.711819266765277e-07, "loss": 0.2868, "step": 7787 }, { "epoch": 0.13537520207199846, "grad_norm": 1.5330396722470394, "learning_rate": 9.711725074474488e-07, "loss": 0.4172, "step": 7788 }, { "epoch": 0.1353925846095013, "grad_norm": 3.2478888371657044, "learning_rate": 9.711630867249694e-07, "loss": 0.698, "step": 7789 }, { "epoch": 0.13540996714700412, "grad_norm": 1.8010092292646143, "learning_rate": 9.711536645091197e-07, "loss": 0.3463, "step": 7790 }, { "epoch": 0.13542734968450695, "grad_norm": 1.8246052553350023, "learning_rate": 9.71144240799929e-07, "loss": 0.375, "step": 7791 }, { "epoch": 0.13544473222200976, "grad_norm": 2.9588713098854296, "learning_rate": 9.711348155974277e-07, "loss": 0.5066, "step": 7792 }, { "epoch": 0.1354621147595126, "grad_norm": 1.810479267132493, "learning_rate": 9.711253889016453e-07, "loss": 0.4944, "step": 7793 }, { "epoch": 0.13547949729701542, "grad_norm": 2.414112446757774, "learning_rate": 9.711159607126118e-07, "loss": 0.5192, "step": 7794 }, { "epoch": 0.13549687983451825, "grad_norm": 1.4973014034209287, "learning_rate": 9.711065310303572e-07, "loss": 0.3016, "step": 7795 }, { "epoch": 0.13551426237202108, "grad_norm": 2.2491299970997476, "learning_rate": 9.710970998549115e-07, "loss": 0.3989, "step": 7796 }, { "epoch": 0.13553164490952388, "grad_norm": 1.8134432987856466, "learning_rate": 9.71087667186304e-07, "loss": 0.7953, "step": 7797 }, { "epoch": 0.1355490274470267, "grad_norm": 1.8395604725178756, "learning_rate": 9.710782330245653e-07, "loss": 0.7434, "step": 7798 }, { "epoch": 0.13556640998452954, "grad_norm": 3.0826409673857063, "learning_rate": 9.71068797369725e-07, "loss": 0.2305, "step": 7799 }, { "epoch": 0.13558379252203237, "grad_norm": 1.4829056670777194, "learning_rate": 9.710593602218128e-07, "loss": 0.558, "step": 7800 }, { "epoch": 0.1356011750595352, "grad_norm": 1.4168780919564703, "learning_rate": 9.71049921580859e-07, "loss": 0.2942, "step": 7801 }, { "epoch": 0.135618557597038, "grad_norm": 2.2047081059008833, "learning_rate": 9.710404814468932e-07, "loss": 0.4105, "step": 7802 }, { "epoch": 0.13563594013454083, "grad_norm": 1.358970606478933, "learning_rate": 9.710310398199454e-07, "loss": 0.793, "step": 7803 }, { "epoch": 0.13565332267204366, "grad_norm": 1.6056874504936647, "learning_rate": 9.710215967000457e-07, "loss": 0.3327, "step": 7804 }, { "epoch": 0.1356707052095465, "grad_norm": 2.0076027234187572, "learning_rate": 9.71012152087224e-07, "loss": 0.3732, "step": 7805 }, { "epoch": 0.13568808774704932, "grad_norm": 3.7738161443257727, "learning_rate": 9.7100270598151e-07, "loss": 0.5269, "step": 7806 }, { "epoch": 0.13570547028455213, "grad_norm": 2.0492948282471786, "learning_rate": 9.709932583829339e-07, "loss": 0.3843, "step": 7807 }, { "epoch": 0.13572285282205496, "grad_norm": 2.2172330183425273, "learning_rate": 9.709838092915254e-07, "loss": 0.43, "step": 7808 }, { "epoch": 0.1357402353595578, "grad_norm": 1.602740192701305, "learning_rate": 9.709743587073145e-07, "loss": 0.6026, "step": 7809 }, { "epoch": 0.13575761789706062, "grad_norm": 1.391404812118221, "learning_rate": 9.709649066303313e-07, "loss": 0.4641, "step": 7810 }, { "epoch": 0.13577500043456345, "grad_norm": 1.4134955669461136, "learning_rate": 9.709554530606058e-07, "loss": 0.3733, "step": 7811 }, { "epoch": 0.13579238297206625, "grad_norm": 1.9799592344940524, "learning_rate": 9.70945997998168e-07, "loss": 0.6622, "step": 7812 }, { "epoch": 0.13580976550956908, "grad_norm": 1.0763685807855103, "learning_rate": 9.709365414430476e-07, "loss": 0.4346, "step": 7813 }, { "epoch": 0.1358271480470719, "grad_norm": 1.6739506866415144, "learning_rate": 9.709270833952745e-07, "loss": 0.7293, "step": 7814 }, { "epoch": 0.13584453058457474, "grad_norm": 3.554613379551713, "learning_rate": 9.709176238548788e-07, "loss": 0.5453, "step": 7815 }, { "epoch": 0.13586191312207757, "grad_norm": 1.775027364184381, "learning_rate": 9.709081628218907e-07, "loss": 0.3486, "step": 7816 }, { "epoch": 0.13587929565958037, "grad_norm": 1.3113677011145113, "learning_rate": 9.708987002963402e-07, "loss": 0.2879, "step": 7817 }, { "epoch": 0.1358966781970832, "grad_norm": 1.3250652604277866, "learning_rate": 9.708892362782571e-07, "loss": 0.3476, "step": 7818 }, { "epoch": 0.13591406073458603, "grad_norm": 3.160913627254957, "learning_rate": 9.708797707676712e-07, "loss": 0.619, "step": 7819 }, { "epoch": 0.13593144327208886, "grad_norm": 5.010086534254314, "learning_rate": 9.708703037646129e-07, "loss": 0.9009, "step": 7820 }, { "epoch": 0.1359488258095917, "grad_norm": 4.311119291976842, "learning_rate": 9.708608352691121e-07, "loss": 0.4626, "step": 7821 }, { "epoch": 0.1359662083470945, "grad_norm": 1.8517298412425856, "learning_rate": 9.708513652811986e-07, "loss": 0.4583, "step": 7822 }, { "epoch": 0.13598359088459733, "grad_norm": 2.4093322857187762, "learning_rate": 9.708418938009026e-07, "loss": 0.4398, "step": 7823 }, { "epoch": 0.13600097342210016, "grad_norm": 2.0054407762681468, "learning_rate": 9.708324208282542e-07, "loss": 0.4711, "step": 7824 }, { "epoch": 0.136018355959603, "grad_norm": 3.350177972681385, "learning_rate": 9.70822946363283e-07, "loss": 0.2864, "step": 7825 }, { "epoch": 0.13603573849710582, "grad_norm": 2.924932246879204, "learning_rate": 9.708134704060198e-07, "loss": 0.4866, "step": 7826 }, { "epoch": 0.13605312103460862, "grad_norm": 3.1647804286432737, "learning_rate": 9.70803992956494e-07, "loss": 0.7883, "step": 7827 }, { "epoch": 0.13607050357211145, "grad_norm": 2.3491892699141483, "learning_rate": 9.707945140147355e-07, "loss": 0.4484, "step": 7828 }, { "epoch": 0.13608788610961428, "grad_norm": 2.039556416080315, "learning_rate": 9.70785033580775e-07, "loss": 0.4598, "step": 7829 }, { "epoch": 0.1361052686471171, "grad_norm": 2.4003879386876883, "learning_rate": 9.707755516546421e-07, "loss": 0.3708, "step": 7830 }, { "epoch": 0.13612265118461994, "grad_norm": 1.7914824494987036, "learning_rate": 9.70766068236367e-07, "loss": 0.4098, "step": 7831 }, { "epoch": 0.13614003372212274, "grad_norm": 1.9751835829213686, "learning_rate": 9.707565833259796e-07, "loss": 0.5516, "step": 7832 }, { "epoch": 0.13615741625962557, "grad_norm": 1.7801887666440337, "learning_rate": 9.7074709692351e-07, "loss": 0.2767, "step": 7833 }, { "epoch": 0.1361747987971284, "grad_norm": 1.7211041782132799, "learning_rate": 9.707376090289887e-07, "loss": 0.4913, "step": 7834 }, { "epoch": 0.13619218133463123, "grad_norm": 2.2437771897613032, "learning_rate": 9.70728119642445e-07, "loss": 0.3379, "step": 7835 }, { "epoch": 0.13620956387213407, "grad_norm": 2.067626448831253, "learning_rate": 9.707186287639097e-07, "loss": 0.4527, "step": 7836 }, { "epoch": 0.13622694640963687, "grad_norm": 1.3159644002009845, "learning_rate": 9.707091363934125e-07, "loss": 0.302, "step": 7837 }, { "epoch": 0.1362443289471397, "grad_norm": 2.537031278910074, "learning_rate": 9.706996425309836e-07, "loss": 0.2522, "step": 7838 }, { "epoch": 0.13626171148464253, "grad_norm": 1.4061369893339728, "learning_rate": 9.70690147176653e-07, "loss": 0.3645, "step": 7839 }, { "epoch": 0.13627909402214536, "grad_norm": 3.0729801719658068, "learning_rate": 9.706806503304508e-07, "loss": 0.4593, "step": 7840 }, { "epoch": 0.1362964765596482, "grad_norm": 1.6322172965613404, "learning_rate": 9.706711519924072e-07, "loss": 0.4649, "step": 7841 }, { "epoch": 0.136313859097151, "grad_norm": 1.2893080453630381, "learning_rate": 9.706616521625523e-07, "loss": 0.3952, "step": 7842 }, { "epoch": 0.13633124163465382, "grad_norm": 12.46086047570996, "learning_rate": 9.706521508409162e-07, "loss": 0.6733, "step": 7843 }, { "epoch": 0.13634862417215665, "grad_norm": 3.045595196676147, "learning_rate": 9.70642648027529e-07, "loss": 0.5839, "step": 7844 }, { "epoch": 0.13636600670965948, "grad_norm": 5.468496978270096, "learning_rate": 9.706331437224206e-07, "loss": 0.4409, "step": 7845 }, { "epoch": 0.1363833892471623, "grad_norm": 1.7584119185191605, "learning_rate": 9.706236379256215e-07, "loss": 0.5435, "step": 7846 }, { "epoch": 0.13640077178466511, "grad_norm": 2.572509145148998, "learning_rate": 9.706141306371616e-07, "loss": 0.381, "step": 7847 }, { "epoch": 0.13641815432216794, "grad_norm": 1.9300312726130893, "learning_rate": 9.70604621857071e-07, "loss": 0.5011, "step": 7848 }, { "epoch": 0.13643553685967078, "grad_norm": 1.6797398988031784, "learning_rate": 9.7059511158538e-07, "loss": 0.4779, "step": 7849 }, { "epoch": 0.1364529193971736, "grad_norm": 1.8714181307149682, "learning_rate": 9.705855998221186e-07, "loss": 0.7006, "step": 7850 }, { "epoch": 0.13647030193467644, "grad_norm": 1.9633669781435945, "learning_rate": 9.705760865673172e-07, "loss": 0.7302, "step": 7851 }, { "epoch": 0.13648768447217924, "grad_norm": 2.2669793057718954, "learning_rate": 9.705665718210056e-07, "loss": 0.7379, "step": 7852 }, { "epoch": 0.13650506700968207, "grad_norm": 2.2171666501852445, "learning_rate": 9.705570555832144e-07, "loss": 0.2297, "step": 7853 }, { "epoch": 0.1365224495471849, "grad_norm": 1.454517049115771, "learning_rate": 9.705475378539731e-07, "loss": 0.6805, "step": 7854 }, { "epoch": 0.13653983208468773, "grad_norm": 1.7740252252331412, "learning_rate": 9.705380186333125e-07, "loss": 0.5784, "step": 7855 }, { "epoch": 0.13655721462219056, "grad_norm": 1.4049113829896178, "learning_rate": 9.705284979212625e-07, "loss": 0.4101, "step": 7856 }, { "epoch": 0.13657459715969336, "grad_norm": 1.5677305261831826, "learning_rate": 9.705189757178532e-07, "loss": 0.3979, "step": 7857 }, { "epoch": 0.1365919796971962, "grad_norm": 1.52502760973946, "learning_rate": 9.70509452023115e-07, "loss": 0.3015, "step": 7858 }, { "epoch": 0.13660936223469902, "grad_norm": 1.993535127911414, "learning_rate": 9.70499926837078e-07, "loss": 0.4517, "step": 7859 }, { "epoch": 0.13662674477220185, "grad_norm": 1.465176661825506, "learning_rate": 9.704904001597722e-07, "loss": 0.2757, "step": 7860 }, { "epoch": 0.13664412730970468, "grad_norm": 2.340837864967125, "learning_rate": 9.70480871991228e-07, "loss": 0.5229, "step": 7861 }, { "epoch": 0.13666150984720749, "grad_norm": 1.4678357906782682, "learning_rate": 9.704713423314756e-07, "loss": 0.5222, "step": 7862 }, { "epoch": 0.13667889238471032, "grad_norm": 2.924186559529846, "learning_rate": 9.704618111805452e-07, "loss": 1.0481, "step": 7863 }, { "epoch": 0.13669627492221315, "grad_norm": 3.939786443277737, "learning_rate": 9.70452278538467e-07, "loss": 0.4055, "step": 7864 }, { "epoch": 0.13671365745971598, "grad_norm": 1.4683925931369164, "learning_rate": 9.704427444052713e-07, "loss": 0.5207, "step": 7865 }, { "epoch": 0.1367310399972188, "grad_norm": 1.717829275491277, "learning_rate": 9.70433208780988e-07, "loss": 0.4977, "step": 7866 }, { "epoch": 0.1367484225347216, "grad_norm": 2.2858633870431633, "learning_rate": 9.704236716656475e-07, "loss": 0.7484, "step": 7867 }, { "epoch": 0.13676580507222444, "grad_norm": 1.9144269450432532, "learning_rate": 9.7041413305928e-07, "loss": 0.5377, "step": 7868 }, { "epoch": 0.13678318760972727, "grad_norm": 7.391538154807196, "learning_rate": 9.70404592961916e-07, "loss": 0.76, "step": 7869 }, { "epoch": 0.1368005701472301, "grad_norm": 1.2144951196598712, "learning_rate": 9.703950513735854e-07, "loss": 0.4367, "step": 7870 }, { "epoch": 0.13681795268473293, "grad_norm": 2.839658556123308, "learning_rate": 9.703855082943185e-07, "loss": 0.6495, "step": 7871 }, { "epoch": 0.13683533522223573, "grad_norm": 2.470222071491747, "learning_rate": 9.703759637241458e-07, "loss": 0.4637, "step": 7872 }, { "epoch": 0.13685271775973856, "grad_norm": 1.7932707331107245, "learning_rate": 9.703664176630973e-07, "loss": 0.6326, "step": 7873 }, { "epoch": 0.1368701002972414, "grad_norm": 1.6107232385373769, "learning_rate": 9.703568701112032e-07, "loss": 0.5305, "step": 7874 }, { "epoch": 0.13688748283474422, "grad_norm": 2.463295710393369, "learning_rate": 9.703473210684941e-07, "loss": 0.6415, "step": 7875 }, { "epoch": 0.13690486537224705, "grad_norm": 2.1188849309932323, "learning_rate": 9.703377705349998e-07, "loss": 0.493, "step": 7876 }, { "epoch": 0.13692224790974986, "grad_norm": 1.9381575076787878, "learning_rate": 9.70328218510751e-07, "loss": 0.3619, "step": 7877 }, { "epoch": 0.13693963044725269, "grad_norm": 1.2743960750033978, "learning_rate": 9.703186649957779e-07, "loss": 0.4035, "step": 7878 }, { "epoch": 0.13695701298475552, "grad_norm": 2.07479536912711, "learning_rate": 9.703091099901105e-07, "loss": 0.5028, "step": 7879 }, { "epoch": 0.13697439552225835, "grad_norm": 1.7374910688051661, "learning_rate": 9.702995534937791e-07, "loss": 0.6329, "step": 7880 }, { "epoch": 0.13699177805976118, "grad_norm": 1.4485468328370077, "learning_rate": 9.702899955068146e-07, "loss": 0.8377, "step": 7881 }, { "epoch": 0.13700916059726398, "grad_norm": 1.1170329147987057, "learning_rate": 9.702804360292466e-07, "loss": 0.3585, "step": 7882 }, { "epoch": 0.1370265431347668, "grad_norm": 1.8630791165031435, "learning_rate": 9.702708750611056e-07, "loss": 0.3713, "step": 7883 }, { "epoch": 0.13704392567226964, "grad_norm": 7.5791643629535335, "learning_rate": 9.70261312602422e-07, "loss": 0.5842, "step": 7884 }, { "epoch": 0.13706130820977247, "grad_norm": 1.3259586544092228, "learning_rate": 9.70251748653226e-07, "loss": 0.4996, "step": 7885 }, { "epoch": 0.1370786907472753, "grad_norm": 1.8813125222486444, "learning_rate": 9.70242183213548e-07, "loss": 0.4584, "step": 7886 }, { "epoch": 0.1370960732847781, "grad_norm": 2.3505110896204475, "learning_rate": 9.702326162834185e-07, "loss": 0.6864, "step": 7887 }, { "epoch": 0.13711345582228093, "grad_norm": 2.5561159344254754, "learning_rate": 9.702230478628675e-07, "loss": 0.4377, "step": 7888 }, { "epoch": 0.13713083835978376, "grad_norm": 1.9591152504204163, "learning_rate": 9.702134779519254e-07, "loss": 0.3801, "step": 7889 }, { "epoch": 0.1371482208972866, "grad_norm": 2.962392326972541, "learning_rate": 9.702039065506225e-07, "loss": 0.3407, "step": 7890 }, { "epoch": 0.13716560343478942, "grad_norm": 1.5583456670122906, "learning_rate": 9.701943336589892e-07, "loss": 0.6499, "step": 7891 }, { "epoch": 0.13718298597229223, "grad_norm": 1.7460403746170985, "learning_rate": 9.70184759277056e-07, "loss": 0.4021, "step": 7892 }, { "epoch": 0.13720036850979506, "grad_norm": 1.4791683484092792, "learning_rate": 9.70175183404853e-07, "loss": 0.8923, "step": 7893 }, { "epoch": 0.1372177510472979, "grad_norm": 2.7304253094436572, "learning_rate": 9.701656060424105e-07, "loss": 0.3032, "step": 7894 }, { "epoch": 0.13723513358480072, "grad_norm": 1.3331976843128712, "learning_rate": 9.701560271897594e-07, "loss": 0.3338, "step": 7895 }, { "epoch": 0.13725251612230355, "grad_norm": 2.4032626809254483, "learning_rate": 9.701464468469294e-07, "loss": 0.6654, "step": 7896 }, { "epoch": 0.13726989865980635, "grad_norm": 1.9662244852579454, "learning_rate": 9.70136865013951e-07, "loss": 0.346, "step": 7897 }, { "epoch": 0.13728728119730918, "grad_norm": 1.1692343191298198, "learning_rate": 9.701272816908551e-07, "loss": 0.4211, "step": 7898 }, { "epoch": 0.137304663734812, "grad_norm": 1.483908104372877, "learning_rate": 9.701176968776715e-07, "loss": 0.3332, "step": 7899 }, { "epoch": 0.13732204627231484, "grad_norm": 1.471953421140466, "learning_rate": 9.701081105744305e-07, "loss": 0.3455, "step": 7900 }, { "epoch": 0.13733942880981767, "grad_norm": 1.3800198935674748, "learning_rate": 9.70098522781163e-07, "loss": 0.5246, "step": 7901 }, { "epoch": 0.13735681134732047, "grad_norm": 1.7121268367840854, "learning_rate": 9.70088933497899e-07, "loss": 0.5565, "step": 7902 }, { "epoch": 0.1373741938848233, "grad_norm": 1.2848085841893968, "learning_rate": 9.700793427246691e-07, "loss": 0.3813, "step": 7903 }, { "epoch": 0.13739157642232613, "grad_norm": 1.732890606178335, "learning_rate": 9.700697504615036e-07, "loss": 0.6548, "step": 7904 }, { "epoch": 0.13740895895982896, "grad_norm": 1.6539013576603363, "learning_rate": 9.70060156708433e-07, "loss": 0.28, "step": 7905 }, { "epoch": 0.1374263414973318, "grad_norm": 2.3307167989067907, "learning_rate": 9.700505614654875e-07, "loss": 0.5665, "step": 7906 }, { "epoch": 0.1374437240348346, "grad_norm": 3.0894914810443144, "learning_rate": 9.700409647326979e-07, "loss": 0.5973, "step": 7907 }, { "epoch": 0.13746110657233743, "grad_norm": 1.2124432899264779, "learning_rate": 9.700313665100941e-07, "loss": 0.5087, "step": 7908 }, { "epoch": 0.13747848910984026, "grad_norm": 1.8200141068918851, "learning_rate": 9.700217667977068e-07, "loss": 0.4729, "step": 7909 }, { "epoch": 0.1374958716473431, "grad_norm": 5.254128750444245, "learning_rate": 9.700121655955664e-07, "loss": 0.7311, "step": 7910 }, { "epoch": 0.1375132541848459, "grad_norm": 1.6204679173671215, "learning_rate": 9.700025629037035e-07, "loss": 0.3629, "step": 7911 }, { "epoch": 0.13753063672234872, "grad_norm": 2.9072331033771643, "learning_rate": 9.699929587221482e-07, "loss": 0.5212, "step": 7912 }, { "epoch": 0.13754801925985155, "grad_norm": 1.6142104790935334, "learning_rate": 9.699833530509313e-07, "loss": 0.7495, "step": 7913 }, { "epoch": 0.13756540179735438, "grad_norm": 1.5191327101492542, "learning_rate": 9.69973745890083e-07, "loss": 0.3068, "step": 7914 }, { "epoch": 0.1375827843348572, "grad_norm": 3.0632759491910266, "learning_rate": 9.699641372396337e-07, "loss": 1.3437, "step": 7915 }, { "epoch": 0.13760016687236, "grad_norm": 2.1213959834407574, "learning_rate": 9.699545270996142e-07, "loss": 0.4013, "step": 7916 }, { "epoch": 0.13761754940986284, "grad_norm": 2.1823945136663547, "learning_rate": 9.699449154700546e-07, "loss": 0.6943, "step": 7917 }, { "epoch": 0.13763493194736567, "grad_norm": 2.093228554284023, "learning_rate": 9.699353023509852e-07, "loss": 0.3499, "step": 7918 }, { "epoch": 0.1376523144848685, "grad_norm": 2.3038949348414235, "learning_rate": 9.69925687742437e-07, "loss": 0.6321, "step": 7919 }, { "epoch": 0.13766969702237133, "grad_norm": 1.768021831174543, "learning_rate": 9.699160716444404e-07, "loss": 0.557, "step": 7920 }, { "epoch": 0.13768707955987414, "grad_norm": 1.7799999813744538, "learning_rate": 9.699064540570256e-07, "loss": 0.7491, "step": 7921 }, { "epoch": 0.13770446209737697, "grad_norm": 1.8727788596033668, "learning_rate": 9.698968349802231e-07, "loss": 0.5202, "step": 7922 }, { "epoch": 0.1377218446348798, "grad_norm": 1.6366378879598298, "learning_rate": 9.698872144140638e-07, "loss": 0.4665, "step": 7923 }, { "epoch": 0.13773922717238263, "grad_norm": 4.220198495041649, "learning_rate": 9.698775923585775e-07, "loss": 0.4132, "step": 7924 }, { "epoch": 0.13775660970988546, "grad_norm": 3.76565646426601, "learning_rate": 9.698679688137952e-07, "loss": 0.4142, "step": 7925 }, { "epoch": 0.13777399224738826, "grad_norm": 1.77859842586737, "learning_rate": 9.698583437797474e-07, "loss": 0.638, "step": 7926 }, { "epoch": 0.1377913747848911, "grad_norm": 2.3555819689001924, "learning_rate": 9.698487172564642e-07, "loss": 0.3029, "step": 7927 }, { "epoch": 0.13780875732239392, "grad_norm": 1.8455310688277853, "learning_rate": 9.698390892439767e-07, "loss": 0.4365, "step": 7928 }, { "epoch": 0.13782613985989675, "grad_norm": 1.5422098716811665, "learning_rate": 9.69829459742315e-07, "loss": 0.3988, "step": 7929 }, { "epoch": 0.13784352239739958, "grad_norm": 1.4923584780968322, "learning_rate": 9.698198287515098e-07, "loss": 0.4556, "step": 7930 }, { "epoch": 0.13786090493490238, "grad_norm": 1.9986508461017132, "learning_rate": 9.698101962715915e-07, "loss": 0.7195, "step": 7931 }, { "epoch": 0.13787828747240521, "grad_norm": 1.6586810243668746, "learning_rate": 9.698005623025906e-07, "loss": 0.4098, "step": 7932 }, { "epoch": 0.13789567000990804, "grad_norm": 1.4079911305881043, "learning_rate": 9.697909268445377e-07, "loss": 0.369, "step": 7933 }, { "epoch": 0.13791305254741087, "grad_norm": 1.3624100307740177, "learning_rate": 9.697812898974636e-07, "loss": 0.4071, "step": 7934 }, { "epoch": 0.1379304350849137, "grad_norm": 2.6953381436183013, "learning_rate": 9.697716514613985e-07, "loss": 0.4243, "step": 7935 }, { "epoch": 0.1379478176224165, "grad_norm": 2.113576976280731, "learning_rate": 9.69762011536373e-07, "loss": 0.2284, "step": 7936 }, { "epoch": 0.13796520015991934, "grad_norm": 2.03225597236319, "learning_rate": 9.697523701224177e-07, "loss": 0.7163, "step": 7937 }, { "epoch": 0.13798258269742217, "grad_norm": 2.087781237541447, "learning_rate": 9.697427272195633e-07, "loss": 0.4572, "step": 7938 }, { "epoch": 0.137999965234925, "grad_norm": 2.035825589135135, "learning_rate": 9.697330828278402e-07, "loss": 0.4674, "step": 7939 }, { "epoch": 0.13801734777242783, "grad_norm": 1.0133343055020239, "learning_rate": 9.69723436947279e-07, "loss": 0.4366, "step": 7940 }, { "epoch": 0.13803473030993063, "grad_norm": 2.2224133119034244, "learning_rate": 9.697137895779102e-07, "loss": 0.5288, "step": 7941 }, { "epoch": 0.13805211284743346, "grad_norm": 3.793318186112428, "learning_rate": 9.697041407197645e-07, "loss": 0.6414, "step": 7942 }, { "epoch": 0.1380694953849363, "grad_norm": 2.8150756273964817, "learning_rate": 9.696944903728725e-07, "loss": 0.8603, "step": 7943 }, { "epoch": 0.13808687792243912, "grad_norm": 2.44705413983478, "learning_rate": 9.696848385372646e-07, "loss": 0.6984, "step": 7944 }, { "epoch": 0.13810426045994195, "grad_norm": 1.0657213836143207, "learning_rate": 9.696751852129717e-07, "loss": 0.8423, "step": 7945 }, { "epoch": 0.13812164299744475, "grad_norm": 2.4235396637719857, "learning_rate": 9.696655304000241e-07, "loss": 0.3331, "step": 7946 }, { "epoch": 0.13813902553494758, "grad_norm": 2.468998435475313, "learning_rate": 9.696558740984524e-07, "loss": 0.3677, "step": 7947 }, { "epoch": 0.13815640807245042, "grad_norm": 3.01783269354403, "learning_rate": 9.696462163082875e-07, "loss": 0.6552, "step": 7948 }, { "epoch": 0.13817379060995325, "grad_norm": 2.4109458258526746, "learning_rate": 9.696365570295597e-07, "loss": 0.7222, "step": 7949 }, { "epoch": 0.13819117314745608, "grad_norm": 1.5846827302490003, "learning_rate": 9.696268962622998e-07, "loss": 0.3103, "step": 7950 }, { "epoch": 0.13820855568495888, "grad_norm": 1.422411351799217, "learning_rate": 9.696172340065385e-07, "loss": 0.4535, "step": 7951 }, { "epoch": 0.1382259382224617, "grad_norm": 2.661323096641205, "learning_rate": 9.696075702623061e-07, "loss": 0.5645, "step": 7952 }, { "epoch": 0.13824332075996454, "grad_norm": 1.2619756687687869, "learning_rate": 9.695979050296336e-07, "loss": 0.6174, "step": 7953 }, { "epoch": 0.13826070329746737, "grad_norm": 1.735928425376587, "learning_rate": 9.695882383085513e-07, "loss": 0.3137, "step": 7954 }, { "epoch": 0.1382780858349702, "grad_norm": 1.291717426180983, "learning_rate": 9.695785700990901e-07, "loss": 0.4657, "step": 7955 }, { "epoch": 0.138295468372473, "grad_norm": 2.9850126727101736, "learning_rate": 9.695689004012804e-07, "loss": 0.4337, "step": 7956 }, { "epoch": 0.13831285090997583, "grad_norm": 1.3625732274349376, "learning_rate": 9.695592292151532e-07, "loss": 0.246, "step": 7957 }, { "epoch": 0.13833023344747866, "grad_norm": 1.667514513627507, "learning_rate": 9.695495565407388e-07, "loss": 0.3512, "step": 7958 }, { "epoch": 0.1383476159849815, "grad_norm": 2.7765198743773807, "learning_rate": 9.695398823780681e-07, "loss": 0.7217, "step": 7959 }, { "epoch": 0.13836499852248432, "grad_norm": 1.8088600580380323, "learning_rate": 9.695302067271715e-07, "loss": 0.4709, "step": 7960 }, { "epoch": 0.13838238105998713, "grad_norm": 1.8397751288782545, "learning_rate": 9.6952052958808e-07, "loss": 0.5943, "step": 7961 }, { "epoch": 0.13839976359748996, "grad_norm": 2.1705267886142505, "learning_rate": 9.69510850960824e-07, "loss": 0.5648, "step": 7962 }, { "epoch": 0.13841714613499279, "grad_norm": 1.7812801726021126, "learning_rate": 9.695011708454342e-07, "loss": 0.5004, "step": 7963 }, { "epoch": 0.13843452867249562, "grad_norm": 1.892175727000198, "learning_rate": 9.694914892419416e-07, "loss": 0.3199, "step": 7964 }, { "epoch": 0.13845191120999845, "grad_norm": 1.4382506399222457, "learning_rate": 9.694818061503764e-07, "loss": 0.5524, "step": 7965 }, { "epoch": 0.13846929374750125, "grad_norm": 2.2605542417419375, "learning_rate": 9.694721215707695e-07, "loss": 0.4214, "step": 7966 }, { "epoch": 0.13848667628500408, "grad_norm": 1.997568441564363, "learning_rate": 9.69462435503152e-07, "loss": 0.3871, "step": 7967 }, { "epoch": 0.1385040588225069, "grad_norm": 1.9480758835277658, "learning_rate": 9.694527479475537e-07, "loss": 0.4744, "step": 7968 }, { "epoch": 0.13852144136000974, "grad_norm": 0.9742201650268559, "learning_rate": 9.694430589040064e-07, "loss": 0.3651, "step": 7969 }, { "epoch": 0.13853882389751257, "grad_norm": 2.730578934579824, "learning_rate": 9.6943336837254e-07, "loss": 0.6758, "step": 7970 }, { "epoch": 0.13855620643501537, "grad_norm": 1.5771912742809548, "learning_rate": 9.694236763531853e-07, "loss": 0.5928, "step": 7971 }, { "epoch": 0.1385735889725182, "grad_norm": 1.5850311344205912, "learning_rate": 9.694139828459734e-07, "loss": 0.5698, "step": 7972 }, { "epoch": 0.13859097151002103, "grad_norm": 1.3850968824335395, "learning_rate": 9.694042878509348e-07, "loss": 0.3754, "step": 7973 }, { "epoch": 0.13860835404752386, "grad_norm": 1.8447668380560498, "learning_rate": 9.693945913681001e-07, "loss": 0.6451, "step": 7974 }, { "epoch": 0.1386257365850267, "grad_norm": 1.623749048601244, "learning_rate": 9.693848933975002e-07, "loss": 0.6053, "step": 7975 }, { "epoch": 0.1386431191225295, "grad_norm": 1.8838500485909622, "learning_rate": 9.693751939391659e-07, "loss": 0.862, "step": 7976 }, { "epoch": 0.13866050166003233, "grad_norm": 2.1267354626395902, "learning_rate": 9.693654929931279e-07, "loss": 0.3463, "step": 7977 }, { "epoch": 0.13867788419753516, "grad_norm": 2.048348408211454, "learning_rate": 9.693557905594167e-07, "loss": 0.585, "step": 7978 }, { "epoch": 0.138695266735038, "grad_norm": 3.5170100608243744, "learning_rate": 9.693460866380634e-07, "loss": 0.2711, "step": 7979 }, { "epoch": 0.13871264927254082, "grad_norm": 1.8076004419730125, "learning_rate": 9.693363812290984e-07, "loss": 0.4919, "step": 7980 }, { "epoch": 0.13873003181004362, "grad_norm": 1.9801625210291767, "learning_rate": 9.69326674332553e-07, "loss": 0.5519, "step": 7981 }, { "epoch": 0.13874741434754645, "grad_norm": 1.185440945000228, "learning_rate": 9.693169659484573e-07, "loss": 0.4774, "step": 7982 }, { "epoch": 0.13876479688504928, "grad_norm": 2.188173953935713, "learning_rate": 9.693072560768427e-07, "loss": 0.5726, "step": 7983 }, { "epoch": 0.1387821794225521, "grad_norm": 1.9608513150428013, "learning_rate": 9.692975447177395e-07, "loss": 0.9088, "step": 7984 }, { "epoch": 0.13879956196005494, "grad_norm": 3.0656544759611783, "learning_rate": 9.692878318711787e-07, "loss": 0.5134, "step": 7985 }, { "epoch": 0.13881694449755774, "grad_norm": 1.723827334876118, "learning_rate": 9.69278117537191e-07, "loss": 0.5896, "step": 7986 }, { "epoch": 0.13883432703506057, "grad_norm": 2.873642748225355, "learning_rate": 9.692684017158072e-07, "loss": 0.5995, "step": 7987 }, { "epoch": 0.1388517095725634, "grad_norm": 2.2716346032339776, "learning_rate": 9.692586844070582e-07, "loss": 0.5172, "step": 7988 }, { "epoch": 0.13886909211006623, "grad_norm": 2.3061891636010174, "learning_rate": 9.692489656109748e-07, "loss": 0.5708, "step": 7989 }, { "epoch": 0.13888647464756906, "grad_norm": 2.890131580396894, "learning_rate": 9.692392453275876e-07, "loss": 0.5682, "step": 7990 }, { "epoch": 0.13890385718507187, "grad_norm": 1.0423812473116474, "learning_rate": 9.692295235569276e-07, "loss": 0.3329, "step": 7991 }, { "epoch": 0.1389212397225747, "grad_norm": 6.780480800611268, "learning_rate": 9.692198002990255e-07, "loss": 0.4743, "step": 7992 }, { "epoch": 0.13893862226007753, "grad_norm": 2.111022364991155, "learning_rate": 9.692100755539123e-07, "loss": 0.4175, "step": 7993 }, { "epoch": 0.13895600479758036, "grad_norm": 1.2472993160223877, "learning_rate": 9.692003493216187e-07, "loss": 0.2673, "step": 7994 }, { "epoch": 0.1389733873350832, "grad_norm": 2.365383622690042, "learning_rate": 9.691906216021756e-07, "loss": 0.509, "step": 7995 }, { "epoch": 0.138990769872586, "grad_norm": 2.0699688862700056, "learning_rate": 9.691808923956134e-07, "loss": 0.6374, "step": 7996 }, { "epoch": 0.13900815241008882, "grad_norm": 2.2057310456476555, "learning_rate": 9.691711617019636e-07, "loss": 0.3464, "step": 7997 }, { "epoch": 0.13902553494759165, "grad_norm": 2.1342755012958095, "learning_rate": 9.691614295212566e-07, "loss": 0.5389, "step": 7998 }, { "epoch": 0.13904291748509448, "grad_norm": 2.407885309227508, "learning_rate": 9.691516958535235e-07, "loss": 0.5263, "step": 7999 }, { "epoch": 0.1390603000225973, "grad_norm": 2.4736918026797516, "learning_rate": 9.69141960698795e-07, "loss": 0.2923, "step": 8000 }, { "epoch": 0.1390776825601001, "grad_norm": 1.4475658534101348, "learning_rate": 9.691322240571018e-07, "loss": 0.4715, "step": 8001 }, { "epoch": 0.13909506509760294, "grad_norm": 1.8958688853194308, "learning_rate": 9.691224859284753e-07, "loss": 0.3015, "step": 8002 }, { "epoch": 0.13911244763510577, "grad_norm": 1.9513773915629367, "learning_rate": 9.691127463129459e-07, "loss": 0.4962, "step": 8003 }, { "epoch": 0.1391298301726086, "grad_norm": 2.057199207066094, "learning_rate": 9.691030052105444e-07, "loss": 0.6672, "step": 8004 }, { "epoch": 0.13914721271011143, "grad_norm": 2.5885545921227635, "learning_rate": 9.69093262621302e-07, "loss": 0.7629, "step": 8005 }, { "epoch": 0.13916459524761424, "grad_norm": 2.2410688591456673, "learning_rate": 9.690835185452494e-07, "loss": 0.6492, "step": 8006 }, { "epoch": 0.13918197778511707, "grad_norm": 1.937285075824045, "learning_rate": 9.690737729824175e-07, "loss": 0.3826, "step": 8007 }, { "epoch": 0.1391993603226199, "grad_norm": 1.4338512366685057, "learning_rate": 9.690640259328374e-07, "loss": 0.3369, "step": 8008 }, { "epoch": 0.13921674286012273, "grad_norm": 1.9350294109586883, "learning_rate": 9.690542773965395e-07, "loss": 0.6024, "step": 8009 }, { "epoch": 0.13923412539762556, "grad_norm": 1.729353195032507, "learning_rate": 9.690445273735553e-07, "loss": 0.3084, "step": 8010 }, { "epoch": 0.13925150793512836, "grad_norm": 1.5669552946496248, "learning_rate": 9.690347758639152e-07, "loss": 0.4553, "step": 8011 }, { "epoch": 0.1392688904726312, "grad_norm": 1.9934328972504716, "learning_rate": 9.690250228676505e-07, "loss": 0.3265, "step": 8012 }, { "epoch": 0.13928627301013402, "grad_norm": 1.7643418399486752, "learning_rate": 9.690152683847919e-07, "loss": 0.5341, "step": 8013 }, { "epoch": 0.13930365554763685, "grad_norm": 2.9791253187629105, "learning_rate": 9.690055124153702e-07, "loss": 0.4262, "step": 8014 }, { "epoch": 0.13932103808513968, "grad_norm": 2.074928433975742, "learning_rate": 9.689957549594164e-07, "loss": 0.6256, "step": 8015 }, { "epoch": 0.13933842062264248, "grad_norm": 3.3004586063826697, "learning_rate": 9.689859960169618e-07, "loss": 0.5225, "step": 8016 }, { "epoch": 0.13935580316014531, "grad_norm": 2.1432464051695272, "learning_rate": 9.689762355880368e-07, "loss": 0.6184, "step": 8017 }, { "epoch": 0.13937318569764814, "grad_norm": 1.5182758796216063, "learning_rate": 9.689664736726727e-07, "loss": 0.3014, "step": 8018 }, { "epoch": 0.13939056823515097, "grad_norm": 1.553054201594994, "learning_rate": 9.689567102709002e-07, "loss": 0.4683, "step": 8019 }, { "epoch": 0.1394079507726538, "grad_norm": 2.360153066884949, "learning_rate": 9.689469453827504e-07, "loss": 0.6577, "step": 8020 }, { "epoch": 0.1394253333101566, "grad_norm": 2.6542603197307733, "learning_rate": 9.689371790082542e-07, "loss": 0.6591, "step": 8021 }, { "epoch": 0.13944271584765944, "grad_norm": 2.488042896827106, "learning_rate": 9.689274111474426e-07, "loss": 0.4938, "step": 8022 }, { "epoch": 0.13946009838516227, "grad_norm": 1.929217575021431, "learning_rate": 9.689176418003465e-07, "loss": 0.3468, "step": 8023 }, { "epoch": 0.1394774809226651, "grad_norm": 2.794023691450068, "learning_rate": 9.689078709669969e-07, "loss": 0.3552, "step": 8024 }, { "epoch": 0.13949486346016793, "grad_norm": 2.348270207135149, "learning_rate": 9.688980986474246e-07, "loss": 0.3855, "step": 8025 }, { "epoch": 0.13951224599767073, "grad_norm": 1.9747744995173147, "learning_rate": 9.688883248416608e-07, "loss": 0.3605, "step": 8026 }, { "epoch": 0.13952962853517356, "grad_norm": 4.4556194565976, "learning_rate": 9.688785495497364e-07, "loss": 0.5693, "step": 8027 }, { "epoch": 0.1395470110726764, "grad_norm": 2.7450578673598, "learning_rate": 9.688687727716823e-07, "loss": 0.4335, "step": 8028 }, { "epoch": 0.13956439361017922, "grad_norm": 4.026326174940934, "learning_rate": 9.688589945075297e-07, "loss": 0.822, "step": 8029 }, { "epoch": 0.13958177614768205, "grad_norm": 1.1488637700242212, "learning_rate": 9.688492147573095e-07, "loss": 0.4497, "step": 8030 }, { "epoch": 0.13959915868518485, "grad_norm": 1.8050975250675907, "learning_rate": 9.688394335210525e-07, "loss": 0.5853, "step": 8031 }, { "epoch": 0.13961654122268768, "grad_norm": 1.1311711722455988, "learning_rate": 9.688296507987901e-07, "loss": 0.3162, "step": 8032 }, { "epoch": 0.13963392376019051, "grad_norm": 2.0389865166135634, "learning_rate": 9.688198665905529e-07, "loss": 0.4955, "step": 8033 }, { "epoch": 0.13965130629769335, "grad_norm": 1.3973422848736217, "learning_rate": 9.688100808963719e-07, "loss": 0.5303, "step": 8034 }, { "epoch": 0.13966868883519618, "grad_norm": 1.1614892857354724, "learning_rate": 9.688002937162786e-07, "loss": 0.5566, "step": 8035 }, { "epoch": 0.13968607137269898, "grad_norm": 1.707545041843453, "learning_rate": 9.687905050503035e-07, "loss": 0.3999, "step": 8036 }, { "epoch": 0.1397034539102018, "grad_norm": 1.2544743387663724, "learning_rate": 9.687807148984782e-07, "loss": 0.4333, "step": 8037 }, { "epoch": 0.13972083644770464, "grad_norm": 2.135198918454176, "learning_rate": 9.68770923260833e-07, "loss": 0.3328, "step": 8038 }, { "epoch": 0.13973821898520747, "grad_norm": 1.713657930292494, "learning_rate": 9.687611301373995e-07, "loss": 0.2638, "step": 8039 }, { "epoch": 0.1397556015227103, "grad_norm": 3.8559031457386164, "learning_rate": 9.687513355282084e-07, "loss": 0.5255, "step": 8040 }, { "epoch": 0.1397729840602131, "grad_norm": 2.4778085427281056, "learning_rate": 9.68741539433291e-07, "loss": 0.4808, "step": 8041 }, { "epoch": 0.13979036659771593, "grad_norm": 1.8246436830278174, "learning_rate": 9.687317418526784e-07, "loss": 0.3324, "step": 8042 }, { "epoch": 0.13980774913521876, "grad_norm": 2.6647351723563393, "learning_rate": 9.687219427864012e-07, "loss": 0.5429, "step": 8043 }, { "epoch": 0.1398251316727216, "grad_norm": 3.222263699705928, "learning_rate": 9.687121422344908e-07, "loss": 0.6471, "step": 8044 }, { "epoch": 0.13984251421022442, "grad_norm": 3.679258859474076, "learning_rate": 9.687023401969783e-07, "loss": 0.4954, "step": 8045 }, { "epoch": 0.13985989674772722, "grad_norm": 2.0007032334526924, "learning_rate": 9.686925366738947e-07, "loss": 0.5407, "step": 8046 }, { "epoch": 0.13987727928523006, "grad_norm": 1.9655267487192385, "learning_rate": 9.68682731665271e-07, "loss": 0.4667, "step": 8047 }, { "epoch": 0.13989466182273289, "grad_norm": 2.790060296760332, "learning_rate": 9.686729251711384e-07, "loss": 0.6135, "step": 8048 }, { "epoch": 0.13991204436023572, "grad_norm": 2.213731744283264, "learning_rate": 9.68663117191528e-07, "loss": 0.4694, "step": 8049 }, { "epoch": 0.13992942689773852, "grad_norm": 1.2237801146704157, "learning_rate": 9.686533077264708e-07, "loss": 0.3854, "step": 8050 }, { "epoch": 0.13994680943524135, "grad_norm": 2.038949292959014, "learning_rate": 9.686434967759977e-07, "loss": 0.3205, "step": 8051 }, { "epoch": 0.13996419197274418, "grad_norm": 1.7501426951996355, "learning_rate": 9.6863368434014e-07, "loss": 0.4176, "step": 8052 }, { "epoch": 0.139981574510247, "grad_norm": 1.663496775677131, "learning_rate": 9.686238704189288e-07, "loss": 0.6232, "step": 8053 }, { "epoch": 0.13999895704774984, "grad_norm": 3.135745365548307, "learning_rate": 9.686140550123953e-07, "loss": 0.3617, "step": 8054 }, { "epoch": 0.14001633958525264, "grad_norm": 2.2285652923377364, "learning_rate": 9.686042381205705e-07, "loss": 0.4423, "step": 8055 }, { "epoch": 0.14003372212275547, "grad_norm": 1.7694685136141812, "learning_rate": 9.685944197434856e-07, "loss": 0.464, "step": 8056 }, { "epoch": 0.1400511046602583, "grad_norm": 2.357271157163639, "learning_rate": 9.685845998811716e-07, "loss": 0.4044, "step": 8057 }, { "epoch": 0.14006848719776113, "grad_norm": 1.6515628054443166, "learning_rate": 9.685747785336595e-07, "loss": 0.3184, "step": 8058 }, { "epoch": 0.14008586973526396, "grad_norm": 1.516893213376913, "learning_rate": 9.68564955700981e-07, "loss": 0.494, "step": 8059 }, { "epoch": 0.14010325227276677, "grad_norm": 1.6928680178592885, "learning_rate": 9.685551313831663e-07, "loss": 0.4885, "step": 8060 }, { "epoch": 0.1401206348102696, "grad_norm": 3.1971924097047193, "learning_rate": 9.685453055802475e-07, "loss": 0.5674, "step": 8061 }, { "epoch": 0.14013801734777243, "grad_norm": 1.556079040955293, "learning_rate": 9.68535478292255e-07, "loss": 0.4627, "step": 8062 }, { "epoch": 0.14015539988527526, "grad_norm": 1.9789131631013956, "learning_rate": 9.685256495192205e-07, "loss": 0.387, "step": 8063 }, { "epoch": 0.14017278242277809, "grad_norm": 1.7790287501801965, "learning_rate": 9.685158192611748e-07, "loss": 0.4433, "step": 8064 }, { "epoch": 0.1401901649602809, "grad_norm": 2.3603659560917922, "learning_rate": 9.685059875181493e-07, "loss": 0.4635, "step": 8065 }, { "epoch": 0.14020754749778372, "grad_norm": 2.9408100952161096, "learning_rate": 9.684961542901748e-07, "loss": 0.631, "step": 8066 }, { "epoch": 0.14022493003528655, "grad_norm": 1.8383279877996817, "learning_rate": 9.684863195772828e-07, "loss": 0.3785, "step": 8067 }, { "epoch": 0.14024231257278938, "grad_norm": 1.248236317655854, "learning_rate": 9.684764833795045e-07, "loss": 0.4835, "step": 8068 }, { "epoch": 0.1402596951102922, "grad_norm": 1.3927283654747757, "learning_rate": 9.684666456968707e-07, "loss": 0.7204, "step": 8069 }, { "epoch": 0.140277077647795, "grad_norm": 2.2257551503484234, "learning_rate": 9.684568065294129e-07, "loss": 0.6202, "step": 8070 }, { "epoch": 0.14029446018529784, "grad_norm": 1.3024540810417427, "learning_rate": 9.684469658771622e-07, "loss": 0.5602, "step": 8071 }, { "epoch": 0.14031184272280067, "grad_norm": 1.9738711460138376, "learning_rate": 9.6843712374015e-07, "loss": 0.6781, "step": 8072 }, { "epoch": 0.1403292252603035, "grad_norm": 2.467940845512931, "learning_rate": 9.684272801184071e-07, "loss": 0.2615, "step": 8073 }, { "epoch": 0.14034660779780633, "grad_norm": 2.5510972351695105, "learning_rate": 9.684174350119651e-07, "loss": 0.3681, "step": 8074 }, { "epoch": 0.14036399033530914, "grad_norm": 3.7630214334409415, "learning_rate": 9.68407588420855e-07, "loss": 0.6603, "step": 8075 }, { "epoch": 0.14038137287281197, "grad_norm": 1.653167463135697, "learning_rate": 9.683977403451079e-07, "loss": 0.4574, "step": 8076 }, { "epoch": 0.1403987554103148, "grad_norm": 1.8080593554846986, "learning_rate": 9.68387890784755e-07, "loss": 0.3282, "step": 8077 }, { "epoch": 0.14041613794781763, "grad_norm": 2.9066070309720167, "learning_rate": 9.683780397398278e-07, "loss": 0.8138, "step": 8078 }, { "epoch": 0.14043352048532046, "grad_norm": 2.714702102293417, "learning_rate": 9.683681872103575e-07, "loss": 0.3947, "step": 8079 }, { "epoch": 0.14045090302282326, "grad_norm": 1.9638622229853906, "learning_rate": 9.68358333196375e-07, "loss": 0.4413, "step": 8080 }, { "epoch": 0.1404682855603261, "grad_norm": 3.1913124586015327, "learning_rate": 9.683484776979116e-07, "loss": 0.361, "step": 8081 }, { "epoch": 0.14048566809782892, "grad_norm": 1.4986758452314046, "learning_rate": 9.68338620714999e-07, "loss": 0.3435, "step": 8082 }, { "epoch": 0.14050305063533175, "grad_norm": 2.341790324476971, "learning_rate": 9.683287622476678e-07, "loss": 0.4194, "step": 8083 }, { "epoch": 0.14052043317283458, "grad_norm": 1.9611340259187195, "learning_rate": 9.683189022959498e-07, "loss": 0.4414, "step": 8084 }, { "epoch": 0.14053781571033738, "grad_norm": 1.1472219399798378, "learning_rate": 9.68309040859876e-07, "loss": 0.5825, "step": 8085 }, { "epoch": 0.1405551982478402, "grad_norm": 1.7214774522905283, "learning_rate": 9.682991779394775e-07, "loss": 0.8955, "step": 8086 }, { "epoch": 0.14057258078534304, "grad_norm": 1.6249886861052647, "learning_rate": 9.682893135347859e-07, "loss": 0.2299, "step": 8087 }, { "epoch": 0.14058996332284587, "grad_norm": 2.1284722853055045, "learning_rate": 9.682794476458321e-07, "loss": 0.5496, "step": 8088 }, { "epoch": 0.1406073458603487, "grad_norm": 3.798660811744036, "learning_rate": 9.682695802726477e-07, "loss": 0.3951, "step": 8089 }, { "epoch": 0.1406247283978515, "grad_norm": 2.0599610655681184, "learning_rate": 9.682597114152638e-07, "loss": 0.664, "step": 8090 }, { "epoch": 0.14064211093535434, "grad_norm": 1.5409279576956139, "learning_rate": 9.682498410737117e-07, "loss": 0.4939, "step": 8091 }, { "epoch": 0.14065949347285717, "grad_norm": 2.653284775090728, "learning_rate": 9.682399692480228e-07, "loss": 0.6402, "step": 8092 }, { "epoch": 0.14067687601036, "grad_norm": 1.9087339439224738, "learning_rate": 9.682300959382282e-07, "loss": 0.3419, "step": 8093 }, { "epoch": 0.14069425854786283, "grad_norm": 1.4566664341211129, "learning_rate": 9.682202211443593e-07, "loss": 0.3783, "step": 8094 }, { "epoch": 0.14071164108536563, "grad_norm": 1.8083387995510496, "learning_rate": 9.682103448664473e-07, "loss": 0.746, "step": 8095 }, { "epoch": 0.14072902362286846, "grad_norm": 1.531784786781235, "learning_rate": 9.682004671045237e-07, "loss": 0.2783, "step": 8096 }, { "epoch": 0.1407464061603713, "grad_norm": 2.3049388268575495, "learning_rate": 9.681905878586197e-07, "loss": 0.508, "step": 8097 }, { "epoch": 0.14076378869787412, "grad_norm": 2.207770550724717, "learning_rate": 9.681807071287665e-07, "loss": 0.2892, "step": 8098 }, { "epoch": 0.14078117123537695, "grad_norm": 2.010689115817116, "learning_rate": 9.681708249149954e-07, "loss": 0.3547, "step": 8099 }, { "epoch": 0.14079855377287975, "grad_norm": 1.64619938706299, "learning_rate": 9.681609412173383e-07, "loss": 0.5114, "step": 8100 }, { "epoch": 0.14081593631038258, "grad_norm": 1.7661422324554836, "learning_rate": 9.681510560358255e-07, "loss": 0.3605, "step": 8101 }, { "epoch": 0.1408333188478854, "grad_norm": 2.049004111837937, "learning_rate": 9.681411693704893e-07, "loss": 0.8049, "step": 8102 }, { "epoch": 0.14085070138538824, "grad_norm": 1.9774286047459795, "learning_rate": 9.681312812213604e-07, "loss": 0.3079, "step": 8103 }, { "epoch": 0.14086808392289107, "grad_norm": 1.9987745823699878, "learning_rate": 9.681213915884707e-07, "loss": 0.6895, "step": 8104 }, { "epoch": 0.14088546646039388, "grad_norm": 1.5531502590470987, "learning_rate": 9.68111500471851e-07, "loss": 0.6117, "step": 8105 }, { "epoch": 0.1409028489978967, "grad_norm": 4.496191349807423, "learning_rate": 9.681016078715328e-07, "loss": 0.6157, "step": 8106 }, { "epoch": 0.14092023153539954, "grad_norm": 2.3401367077261614, "learning_rate": 9.680917137875477e-07, "loss": 0.5396, "step": 8107 }, { "epoch": 0.14093761407290237, "grad_norm": 1.4264389952823353, "learning_rate": 9.680818182199268e-07, "loss": 0.6968, "step": 8108 }, { "epoch": 0.1409549966104052, "grad_norm": 2.3344042222263615, "learning_rate": 9.680719211687016e-07, "loss": 0.3393, "step": 8109 }, { "epoch": 0.140972379147908, "grad_norm": 1.903270547262333, "learning_rate": 9.680620226339036e-07, "loss": 0.4628, "step": 8110 }, { "epoch": 0.14098976168541083, "grad_norm": 2.3092157377934996, "learning_rate": 9.680521226155637e-07, "loss": 0.406, "step": 8111 }, { "epoch": 0.14100714422291366, "grad_norm": 1.5478987940426217, "learning_rate": 9.680422211137136e-07, "loss": 0.4628, "step": 8112 }, { "epoch": 0.1410245267604165, "grad_norm": 1.7906521525974266, "learning_rate": 9.680323181283848e-07, "loss": 0.3727, "step": 8113 }, { "epoch": 0.14104190929791932, "grad_norm": 1.7310450191884605, "learning_rate": 9.680224136596085e-07, "loss": 0.5017, "step": 8114 }, { "epoch": 0.14105929183542212, "grad_norm": 1.508210122946102, "learning_rate": 9.680125077074161e-07, "loss": 0.598, "step": 8115 }, { "epoch": 0.14107667437292495, "grad_norm": 1.7300000474337007, "learning_rate": 9.680026002718391e-07, "loss": 0.5142, "step": 8116 }, { "epoch": 0.14109405691042778, "grad_norm": 1.4335730666286306, "learning_rate": 9.679926913529088e-07, "loss": 0.4161, "step": 8117 }, { "epoch": 0.14111143944793061, "grad_norm": 1.4896644133430015, "learning_rate": 9.679827809506567e-07, "loss": 0.4541, "step": 8118 }, { "epoch": 0.14112882198543344, "grad_norm": 1.1793976439728953, "learning_rate": 9.679728690651142e-07, "loss": 0.532, "step": 8119 }, { "epoch": 0.14114620452293625, "grad_norm": 1.7503795456459441, "learning_rate": 9.679629556963124e-07, "loss": 0.4772, "step": 8120 }, { "epoch": 0.14116358706043908, "grad_norm": 1.7986791526730894, "learning_rate": 9.679530408442833e-07, "loss": 0.2348, "step": 8121 }, { "epoch": 0.1411809695979419, "grad_norm": 1.2854013123962043, "learning_rate": 9.67943124509058e-07, "loss": 0.3076, "step": 8122 }, { "epoch": 0.14119835213544474, "grad_norm": 1.8065900190636925, "learning_rate": 9.679332066906677e-07, "loss": 0.6842, "step": 8123 }, { "epoch": 0.14121573467294757, "grad_norm": 2.0858433480342917, "learning_rate": 9.679232873891443e-07, "loss": 0.3166, "step": 8124 }, { "epoch": 0.14123311721045037, "grad_norm": 1.3732544017138688, "learning_rate": 9.67913366604519e-07, "loss": 0.5463, "step": 8125 }, { "epoch": 0.1412504997479532, "grad_norm": 2.2593835130686184, "learning_rate": 9.679034443368232e-07, "loss": 0.516, "step": 8126 }, { "epoch": 0.14126788228545603, "grad_norm": 2.679197505511921, "learning_rate": 9.678935205860884e-07, "loss": 0.9126, "step": 8127 }, { "epoch": 0.14128526482295886, "grad_norm": 1.9997064176812407, "learning_rate": 9.678835953523464e-07, "loss": 0.4504, "step": 8128 }, { "epoch": 0.1413026473604617, "grad_norm": 1.9558944141187133, "learning_rate": 9.67873668635628e-07, "loss": 0.3935, "step": 8129 }, { "epoch": 0.1413200298979645, "grad_norm": 1.508274239843349, "learning_rate": 9.67863740435965e-07, "loss": 0.6281, "step": 8130 }, { "epoch": 0.14133741243546732, "grad_norm": 2.2877040933697947, "learning_rate": 9.678538107533888e-07, "loss": 0.746, "step": 8131 }, { "epoch": 0.14135479497297015, "grad_norm": 3.29530510164366, "learning_rate": 9.678438795879312e-07, "loss": 0.5975, "step": 8132 }, { "epoch": 0.14137217751047298, "grad_norm": 2.1732879319323395, "learning_rate": 9.678339469396234e-07, "loss": 0.6917, "step": 8133 }, { "epoch": 0.14138956004797582, "grad_norm": 1.523387747656065, "learning_rate": 9.678240128084967e-07, "loss": 0.4011, "step": 8134 }, { "epoch": 0.14140694258547862, "grad_norm": 1.1965436240423313, "learning_rate": 9.678140771945827e-07, "loss": 0.3478, "step": 8135 }, { "epoch": 0.14142432512298145, "grad_norm": 2.164084882595447, "learning_rate": 9.67804140097913e-07, "loss": 0.3701, "step": 8136 }, { "epoch": 0.14144170766048428, "grad_norm": 1.6722643496172858, "learning_rate": 9.677942015185194e-07, "loss": 0.4747, "step": 8137 }, { "epoch": 0.1414590901979871, "grad_norm": 1.8002339328260184, "learning_rate": 9.677842614564329e-07, "loss": 0.9031, "step": 8138 }, { "epoch": 0.14147647273548994, "grad_norm": 2.7799420413091744, "learning_rate": 9.67774319911685e-07, "loss": 0.3531, "step": 8139 }, { "epoch": 0.14149385527299274, "grad_norm": 1.9143771331339712, "learning_rate": 9.677643768843075e-07, "loss": 0.4874, "step": 8140 }, { "epoch": 0.14151123781049557, "grad_norm": 1.441511311658489, "learning_rate": 9.677544323743318e-07, "loss": 0.3238, "step": 8141 }, { "epoch": 0.1415286203479984, "grad_norm": 1.841180083895908, "learning_rate": 9.677444863817894e-07, "loss": 0.291, "step": 8142 }, { "epoch": 0.14154600288550123, "grad_norm": 2.282306878153491, "learning_rate": 9.67734538906712e-07, "loss": 0.5886, "step": 8143 }, { "epoch": 0.14156338542300406, "grad_norm": 2.252731793831613, "learning_rate": 9.677245899491307e-07, "loss": 0.8659, "step": 8144 }, { "epoch": 0.14158076796050686, "grad_norm": 1.4501713156420541, "learning_rate": 9.677146395090775e-07, "loss": 0.8196, "step": 8145 }, { "epoch": 0.1415981504980097, "grad_norm": 2.707208566551165, "learning_rate": 9.677046875865837e-07, "loss": 0.749, "step": 8146 }, { "epoch": 0.14161553303551253, "grad_norm": 3.6001284429181677, "learning_rate": 9.676947341816808e-07, "loss": 0.5725, "step": 8147 }, { "epoch": 0.14163291557301536, "grad_norm": 1.4096648611219338, "learning_rate": 9.676847792944005e-07, "loss": 0.5433, "step": 8148 }, { "epoch": 0.14165029811051819, "grad_norm": 1.4810306623519918, "learning_rate": 9.676748229247745e-07, "loss": 0.3771, "step": 8149 }, { "epoch": 0.141667680648021, "grad_norm": 2.119080608055612, "learning_rate": 9.676648650728338e-07, "loss": 0.3552, "step": 8150 }, { "epoch": 0.14168506318552382, "grad_norm": 1.567532343457489, "learning_rate": 9.676549057386105e-07, "loss": 0.4391, "step": 8151 }, { "epoch": 0.14170244572302665, "grad_norm": 1.6564965753637015, "learning_rate": 9.676449449221358e-07, "loss": 0.3956, "step": 8152 }, { "epoch": 0.14171982826052948, "grad_norm": 3.1327453846102515, "learning_rate": 9.676349826234416e-07, "loss": 0.5682, "step": 8153 }, { "epoch": 0.1417372107980323, "grad_norm": 2.237832574594571, "learning_rate": 9.67625018842559e-07, "loss": 0.4103, "step": 8154 }, { "epoch": 0.1417545933355351, "grad_norm": 2.0670681465054184, "learning_rate": 9.676150535795204e-07, "loss": 0.4451, "step": 8155 }, { "epoch": 0.14177197587303794, "grad_norm": 1.3874040578530664, "learning_rate": 9.676050868343568e-07, "loss": 0.5458, "step": 8156 }, { "epoch": 0.14178935841054077, "grad_norm": 1.6508350660862965, "learning_rate": 9.675951186070996e-07, "loss": 0.3599, "step": 8157 }, { "epoch": 0.1418067409480436, "grad_norm": 3.61122440621134, "learning_rate": 9.675851488977808e-07, "loss": 0.6118, "step": 8158 }, { "epoch": 0.14182412348554643, "grad_norm": 1.6489977362656267, "learning_rate": 9.67575177706432e-07, "loss": 0.348, "step": 8159 }, { "epoch": 0.14184150602304924, "grad_norm": 2.685764292963009, "learning_rate": 9.675652050330844e-07, "loss": 0.526, "step": 8160 }, { "epoch": 0.14185888856055207, "grad_norm": 1.5704826528021611, "learning_rate": 9.675552308777702e-07, "loss": 0.5744, "step": 8161 }, { "epoch": 0.1418762710980549, "grad_norm": 3.371487524377043, "learning_rate": 9.675452552405205e-07, "loss": 0.3607, "step": 8162 }, { "epoch": 0.14189365363555773, "grad_norm": 1.3174238397281197, "learning_rate": 9.67535278121367e-07, "loss": 0.4831, "step": 8163 }, { "epoch": 0.14191103617306056, "grad_norm": 1.443734375832408, "learning_rate": 9.675252995203415e-07, "loss": 0.3162, "step": 8164 }, { "epoch": 0.14192841871056336, "grad_norm": 1.4790256425291917, "learning_rate": 9.675153194374758e-07, "loss": 0.3337, "step": 8165 }, { "epoch": 0.1419458012480662, "grad_norm": 1.2985570889252867, "learning_rate": 9.67505337872801e-07, "loss": 0.4417, "step": 8166 }, { "epoch": 0.14196318378556902, "grad_norm": 2.3048948529922844, "learning_rate": 9.674953548263492e-07, "loss": 0.7374, "step": 8167 }, { "epoch": 0.14198056632307185, "grad_norm": 2.2336740227122065, "learning_rate": 9.67485370298152e-07, "loss": 0.6865, "step": 8168 }, { "epoch": 0.14199794886057468, "grad_norm": 1.068176527672749, "learning_rate": 9.674753842882406e-07, "loss": 0.3504, "step": 8169 }, { "epoch": 0.14201533139807748, "grad_norm": 1.7401850320960517, "learning_rate": 9.674653967966473e-07, "loss": 0.4227, "step": 8170 }, { "epoch": 0.1420327139355803, "grad_norm": 2.03294797761703, "learning_rate": 9.674554078234032e-07, "loss": 0.4733, "step": 8171 }, { "epoch": 0.14205009647308314, "grad_norm": 5.448718390183547, "learning_rate": 9.674454173685402e-07, "loss": 0.4849, "step": 8172 }, { "epoch": 0.14206747901058597, "grad_norm": 1.771314728101788, "learning_rate": 9.6743542543209e-07, "loss": 0.6185, "step": 8173 }, { "epoch": 0.1420848615480888, "grad_norm": 1.9184460031178487, "learning_rate": 9.674254320140842e-07, "loss": 0.5306, "step": 8174 }, { "epoch": 0.1421022440855916, "grad_norm": 1.0711146687819757, "learning_rate": 9.674154371145547e-07, "loss": 0.2887, "step": 8175 }, { "epoch": 0.14211962662309444, "grad_norm": 2.180594557235142, "learning_rate": 9.674054407335327e-07, "loss": 0.3277, "step": 8176 }, { "epoch": 0.14213700916059727, "grad_norm": 2.4887563643724886, "learning_rate": 9.673954428710502e-07, "loss": 0.3544, "step": 8177 }, { "epoch": 0.1421543916981001, "grad_norm": 4.129386908302036, "learning_rate": 9.67385443527139e-07, "loss": 0.2639, "step": 8178 }, { "epoch": 0.14217177423560293, "grad_norm": 1.8567887814669182, "learning_rate": 9.673754427018306e-07, "loss": 0.6505, "step": 8179 }, { "epoch": 0.14218915677310573, "grad_norm": 2.6307488388179316, "learning_rate": 9.673654403951566e-07, "loss": 0.5165, "step": 8180 }, { "epoch": 0.14220653931060856, "grad_norm": 3.736511238572064, "learning_rate": 9.67355436607149e-07, "loss": 0.5711, "step": 8181 }, { "epoch": 0.1422239218481114, "grad_norm": 1.650252209321752, "learning_rate": 9.673454313378393e-07, "loss": 0.3705, "step": 8182 }, { "epoch": 0.14224130438561422, "grad_norm": 0.8558837085318125, "learning_rate": 9.673354245872592e-07, "loss": 0.601, "step": 8183 }, { "epoch": 0.14225868692311705, "grad_norm": 1.5880643513564845, "learning_rate": 9.673254163554406e-07, "loss": 0.4831, "step": 8184 }, { "epoch": 0.14227606946061985, "grad_norm": 1.9200188057227112, "learning_rate": 9.67315406642415e-07, "loss": 0.4427, "step": 8185 }, { "epoch": 0.14229345199812268, "grad_norm": 1.5334857440475425, "learning_rate": 9.673053954482143e-07, "loss": 0.4864, "step": 8186 }, { "epoch": 0.1423108345356255, "grad_norm": 1.6698019286188381, "learning_rate": 9.672953827728701e-07, "loss": 0.2965, "step": 8187 }, { "epoch": 0.14232821707312834, "grad_norm": 5.832528567591206, "learning_rate": 9.672853686164144e-07, "loss": 0.4307, "step": 8188 }, { "epoch": 0.14234559961063115, "grad_norm": 1.5541917817851891, "learning_rate": 9.672753529788785e-07, "loss": 0.4899, "step": 8189 }, { "epoch": 0.14236298214813398, "grad_norm": 1.4376788009999906, "learning_rate": 9.672653358602945e-07, "loss": 0.4765, "step": 8190 }, { "epoch": 0.1423803646856368, "grad_norm": 2.5481263272635966, "learning_rate": 9.67255317260694e-07, "loss": 0.4049, "step": 8191 }, { "epoch": 0.14239774722313964, "grad_norm": 1.9157112054317245, "learning_rate": 9.672452971801086e-07, "loss": 0.2478, "step": 8192 }, { "epoch": 0.14241512976064247, "grad_norm": 1.7175938159241826, "learning_rate": 9.672352756185704e-07, "loss": 0.3225, "step": 8193 }, { "epoch": 0.14243251229814527, "grad_norm": 1.9102983938996196, "learning_rate": 9.67225252576111e-07, "loss": 0.6341, "step": 8194 }, { "epoch": 0.1424498948356481, "grad_norm": 1.5519777039381815, "learning_rate": 9.672152280527621e-07, "loss": 0.5412, "step": 8195 }, { "epoch": 0.14246727737315093, "grad_norm": 1.7612519948872387, "learning_rate": 9.672052020485558e-07, "loss": 0.4856, "step": 8196 }, { "epoch": 0.14248465991065376, "grad_norm": 2.8605703748397855, "learning_rate": 9.671951745635235e-07, "loss": 0.5122, "step": 8197 }, { "epoch": 0.1425020424481566, "grad_norm": 2.4643925634020962, "learning_rate": 9.67185145597697e-07, "loss": 0.6469, "step": 8198 }, { "epoch": 0.1425194249856594, "grad_norm": 2.423394260008012, "learning_rate": 9.671751151511082e-07, "loss": 0.4009, "step": 8199 }, { "epoch": 0.14253680752316222, "grad_norm": 2.4722038982997443, "learning_rate": 9.67165083223789e-07, "loss": 0.4245, "step": 8200 }, { "epoch": 0.14255419006066505, "grad_norm": 1.5642935643990958, "learning_rate": 9.67155049815771e-07, "loss": 0.5638, "step": 8201 }, { "epoch": 0.14257157259816788, "grad_norm": 2.053064115307098, "learning_rate": 9.67145014927086e-07, "loss": 0.6417, "step": 8202 }, { "epoch": 0.14258895513567071, "grad_norm": 1.34263674679495, "learning_rate": 9.67134978557766e-07, "loss": 0.5097, "step": 8203 }, { "epoch": 0.14260633767317352, "grad_norm": 4.2705096821410615, "learning_rate": 9.671249407078428e-07, "loss": 0.3886, "step": 8204 }, { "epoch": 0.14262372021067635, "grad_norm": 2.2499988472268404, "learning_rate": 9.67114901377348e-07, "loss": 0.3984, "step": 8205 }, { "epoch": 0.14264110274817918, "grad_norm": 1.3234323970850876, "learning_rate": 9.671048605663136e-07, "loss": 0.4347, "step": 8206 }, { "epoch": 0.142658485285682, "grad_norm": 6.27545802454435, "learning_rate": 9.670948182747714e-07, "loss": 0.3785, "step": 8207 }, { "epoch": 0.14267586782318484, "grad_norm": 2.145946618325831, "learning_rate": 9.67084774502753e-07, "loss": 0.6624, "step": 8208 }, { "epoch": 0.14269325036068764, "grad_norm": 0.9895022472080847, "learning_rate": 9.670747292502906e-07, "loss": 0.3014, "step": 8209 }, { "epoch": 0.14271063289819047, "grad_norm": 2.4506064011349413, "learning_rate": 9.670646825174158e-07, "loss": 0.7909, "step": 8210 }, { "epoch": 0.1427280154356933, "grad_norm": 2.177212622828183, "learning_rate": 9.670546343041606e-07, "loss": 0.8602, "step": 8211 }, { "epoch": 0.14274539797319613, "grad_norm": 2.286300043874501, "learning_rate": 9.670445846105566e-07, "loss": 0.5401, "step": 8212 }, { "epoch": 0.14276278051069896, "grad_norm": 1.1312549100822082, "learning_rate": 9.67034533436636e-07, "loss": 0.5387, "step": 8213 }, { "epoch": 0.14278016304820176, "grad_norm": 1.8902010719159699, "learning_rate": 9.6702448078243e-07, "loss": 0.6785, "step": 8214 }, { "epoch": 0.1427975455857046, "grad_norm": 1.879127300995116, "learning_rate": 9.670144266479714e-07, "loss": 0.5178, "step": 8215 }, { "epoch": 0.14281492812320742, "grad_norm": 1.68682195882969, "learning_rate": 9.670043710332917e-07, "loss": 0.2504, "step": 8216 }, { "epoch": 0.14283231066071025, "grad_norm": 2.5466261748792776, "learning_rate": 9.669943139384223e-07, "loss": 0.9756, "step": 8217 }, { "epoch": 0.14284969319821308, "grad_norm": 1.8815255717201604, "learning_rate": 9.669842553633955e-07, "loss": 0.2964, "step": 8218 }, { "epoch": 0.1428670757357159, "grad_norm": 1.8158651369761578, "learning_rate": 9.669741953082432e-07, "loss": 0.4698, "step": 8219 }, { "epoch": 0.14288445827321872, "grad_norm": 1.5741746601791844, "learning_rate": 9.669641337729974e-07, "loss": 0.9209, "step": 8220 }, { "epoch": 0.14290184081072155, "grad_norm": 1.9671855513695629, "learning_rate": 9.669540707576895e-07, "loss": 0.3978, "step": 8221 }, { "epoch": 0.14291922334822438, "grad_norm": 2.139050217996181, "learning_rate": 9.66944006262352e-07, "loss": 0.5063, "step": 8222 }, { "epoch": 0.1429366058857272, "grad_norm": 1.9095454307326056, "learning_rate": 9.669339402870162e-07, "loss": 0.4153, "step": 8223 }, { "epoch": 0.14295398842323, "grad_norm": 1.6487549130756982, "learning_rate": 9.669238728317144e-07, "loss": 0.5061, "step": 8224 }, { "epoch": 0.14297137096073284, "grad_norm": 2.2062773474212323, "learning_rate": 9.669138038964783e-07, "loss": 0.6033, "step": 8225 }, { "epoch": 0.14298875349823567, "grad_norm": 1.611134103154826, "learning_rate": 9.669037334813401e-07, "loss": 0.6212, "step": 8226 }, { "epoch": 0.1430061360357385, "grad_norm": 1.9232549792668894, "learning_rate": 9.668936615863317e-07, "loss": 0.6021, "step": 8227 }, { "epoch": 0.14302351857324133, "grad_norm": 2.3254639284502767, "learning_rate": 9.668835882114846e-07, "loss": 0.3723, "step": 8228 }, { "epoch": 0.14304090111074413, "grad_norm": 2.089054479965879, "learning_rate": 9.66873513356831e-07, "loss": 0.3708, "step": 8229 }, { "epoch": 0.14305828364824696, "grad_norm": 1.4436880593210821, "learning_rate": 9.66863437022403e-07, "loss": 0.4123, "step": 8230 }, { "epoch": 0.1430756661857498, "grad_norm": 1.0521850296238122, "learning_rate": 9.668533592082322e-07, "loss": 0.4668, "step": 8231 }, { "epoch": 0.14309304872325262, "grad_norm": 1.0130984438711501, "learning_rate": 9.668432799143507e-07, "loss": 0.3918, "step": 8232 }, { "epoch": 0.14311043126075546, "grad_norm": 2.269577150264623, "learning_rate": 9.668331991407905e-07, "loss": 0.7623, "step": 8233 }, { "epoch": 0.14312781379825826, "grad_norm": 2.763955440449718, "learning_rate": 9.668231168875836e-07, "loss": 0.7514, "step": 8234 }, { "epoch": 0.1431451963357611, "grad_norm": 1.5538611432150022, "learning_rate": 9.668130331547618e-07, "loss": 0.4577, "step": 8235 }, { "epoch": 0.14316257887326392, "grad_norm": 4.056123421389185, "learning_rate": 9.66802947942357e-07, "loss": 0.686, "step": 8236 }, { "epoch": 0.14317996141076675, "grad_norm": 1.664085344237277, "learning_rate": 9.667928612504014e-07, "loss": 0.3421, "step": 8237 }, { "epoch": 0.14319734394826958, "grad_norm": 1.6425327964132248, "learning_rate": 9.667827730789268e-07, "loss": 0.6032, "step": 8238 }, { "epoch": 0.14321472648577238, "grad_norm": 1.4473675990818438, "learning_rate": 9.667726834279652e-07, "loss": 0.3951, "step": 8239 }, { "epoch": 0.1432321090232752, "grad_norm": 1.5685864327637438, "learning_rate": 9.66762592297549e-07, "loss": 0.3619, "step": 8240 }, { "epoch": 0.14324949156077804, "grad_norm": 2.7537449498925564, "learning_rate": 9.667524996877092e-07, "loss": 0.4928, "step": 8241 }, { "epoch": 0.14326687409828087, "grad_norm": 1.5132633902773498, "learning_rate": 9.667424055984787e-07, "loss": 0.3089, "step": 8242 }, { "epoch": 0.1432842566357837, "grad_norm": 1.5850109817230638, "learning_rate": 9.667323100298891e-07, "loss": 0.4467, "step": 8243 }, { "epoch": 0.1433016391732865, "grad_norm": 1.5596462826317934, "learning_rate": 9.667222129819726e-07, "loss": 0.2301, "step": 8244 }, { "epoch": 0.14331902171078933, "grad_norm": 1.5141436157949526, "learning_rate": 9.667121144547609e-07, "loss": 0.4315, "step": 8245 }, { "epoch": 0.14333640424829217, "grad_norm": 1.4099829890723796, "learning_rate": 9.667020144482864e-07, "loss": 0.2792, "step": 8246 }, { "epoch": 0.143353786785795, "grad_norm": 2.71742954010568, "learning_rate": 9.666919129625808e-07, "loss": 0.5286, "step": 8247 }, { "epoch": 0.14337116932329783, "grad_norm": 2.2748447067731363, "learning_rate": 9.66681809997676e-07, "loss": 1.3075, "step": 8248 }, { "epoch": 0.14338855186080063, "grad_norm": 3.9530219784069955, "learning_rate": 9.666717055536046e-07, "loss": 0.4058, "step": 8249 }, { "epoch": 0.14340593439830346, "grad_norm": 2.4450138802830566, "learning_rate": 9.666615996303983e-07, "loss": 0.5788, "step": 8250 }, { "epoch": 0.1434233169358063, "grad_norm": 1.4112250247478884, "learning_rate": 9.666514922280888e-07, "loss": 0.2173, "step": 8251 }, { "epoch": 0.14344069947330912, "grad_norm": 1.3689656968647033, "learning_rate": 9.666413833467084e-07, "loss": 0.4211, "step": 8252 }, { "epoch": 0.14345808201081195, "grad_norm": 1.7142424907538507, "learning_rate": 9.666312729862893e-07, "loss": 0.5403, "step": 8253 }, { "epoch": 0.14347546454831475, "grad_norm": 1.5681116147816923, "learning_rate": 9.666211611468634e-07, "loss": 0.7094, "step": 8254 }, { "epoch": 0.14349284708581758, "grad_norm": 2.926703617899459, "learning_rate": 9.666110478284627e-07, "loss": 0.2916, "step": 8255 }, { "epoch": 0.1435102296233204, "grad_norm": 2.2870247716832766, "learning_rate": 9.666009330311197e-07, "loss": 0.5405, "step": 8256 }, { "epoch": 0.14352761216082324, "grad_norm": 1.7668357255518619, "learning_rate": 9.665908167548657e-07, "loss": 0.5675, "step": 8257 }, { "epoch": 0.14354499469832607, "grad_norm": 1.2274737292223958, "learning_rate": 9.665806989997331e-07, "loss": 0.2954, "step": 8258 }, { "epoch": 0.14356237723582888, "grad_norm": 2.1616777477578464, "learning_rate": 9.66570579765754e-07, "loss": 0.6771, "step": 8259 }, { "epoch": 0.1435797597733317, "grad_norm": 2.066562479792961, "learning_rate": 9.665604590529608e-07, "loss": 0.4852, "step": 8260 }, { "epoch": 0.14359714231083454, "grad_norm": 2.3665856542785937, "learning_rate": 9.665503368613851e-07, "loss": 0.2361, "step": 8261 }, { "epoch": 0.14361452484833737, "grad_norm": 2.266622330134638, "learning_rate": 9.665402131910591e-07, "loss": 0.331, "step": 8262 }, { "epoch": 0.1436319073858402, "grad_norm": 2.2706347028151526, "learning_rate": 9.66530088042015e-07, "loss": 0.4143, "step": 8263 }, { "epoch": 0.143649289923343, "grad_norm": 3.9230980779802054, "learning_rate": 9.66519961414285e-07, "loss": 1.543, "step": 8264 }, { "epoch": 0.14366667246084583, "grad_norm": 1.2432718634964308, "learning_rate": 9.665098333079007e-07, "loss": 0.7114, "step": 8265 }, { "epoch": 0.14368405499834866, "grad_norm": 2.2169070293938544, "learning_rate": 9.664997037228948e-07, "loss": 0.43, "step": 8266 }, { "epoch": 0.1437014375358515, "grad_norm": 4.182228072377889, "learning_rate": 9.66489572659299e-07, "loss": 0.7458, "step": 8267 }, { "epoch": 0.14371882007335432, "grad_norm": 1.67803265008646, "learning_rate": 9.664794401171454e-07, "loss": 0.3896, "step": 8268 }, { "epoch": 0.14373620261085712, "grad_norm": 1.9054921194366974, "learning_rate": 9.664693060964663e-07, "loss": 0.5436, "step": 8269 }, { "epoch": 0.14375358514835995, "grad_norm": 1.662782198086461, "learning_rate": 9.66459170597294e-07, "loss": 0.4515, "step": 8270 }, { "epoch": 0.14377096768586278, "grad_norm": 5.341742811156925, "learning_rate": 9.664490336196601e-07, "loss": 0.6916, "step": 8271 }, { "epoch": 0.1437883502233656, "grad_norm": 1.7565027875406332, "learning_rate": 9.664388951635974e-07, "loss": 0.4841, "step": 8272 }, { "epoch": 0.14380573276086844, "grad_norm": 1.7505774357682353, "learning_rate": 9.664287552291374e-07, "loss": 0.792, "step": 8273 }, { "epoch": 0.14382311529837125, "grad_norm": 1.1621714508182144, "learning_rate": 9.664186138163126e-07, "loss": 0.4623, "step": 8274 }, { "epoch": 0.14384049783587408, "grad_norm": 1.7869952749493543, "learning_rate": 9.66408470925155e-07, "loss": 0.4211, "step": 8275 }, { "epoch": 0.1438578803733769, "grad_norm": 2.324277874711658, "learning_rate": 9.66398326555697e-07, "loss": 0.7273, "step": 8276 }, { "epoch": 0.14387526291087974, "grad_norm": 2.200859872148705, "learning_rate": 9.663881807079703e-07, "loss": 0.5106, "step": 8277 }, { "epoch": 0.14389264544838257, "grad_norm": 2.15598674929119, "learning_rate": 9.663780333820074e-07, "loss": 0.5441, "step": 8278 }, { "epoch": 0.14391002798588537, "grad_norm": 1.465963548553376, "learning_rate": 9.663678845778403e-07, "loss": 0.4692, "step": 8279 }, { "epoch": 0.1439274105233882, "grad_norm": 1.8716054015617338, "learning_rate": 9.663577342955014e-07, "loss": 0.5051, "step": 8280 }, { "epoch": 0.14394479306089103, "grad_norm": 1.245055270971901, "learning_rate": 9.663475825350226e-07, "loss": 0.3426, "step": 8281 }, { "epoch": 0.14396217559839386, "grad_norm": 1.5360197612094286, "learning_rate": 9.663374292964362e-07, "loss": 0.4068, "step": 8282 }, { "epoch": 0.1439795581358967, "grad_norm": 2.3971854468991434, "learning_rate": 9.663272745797744e-07, "loss": 0.4723, "step": 8283 }, { "epoch": 0.1439969406733995, "grad_norm": 1.5835275248024052, "learning_rate": 9.66317118385069e-07, "loss": 0.4629, "step": 8284 }, { "epoch": 0.14401432321090232, "grad_norm": 1.8745979810993103, "learning_rate": 9.66306960712353e-07, "loss": 0.3484, "step": 8285 }, { "epoch": 0.14403170574840515, "grad_norm": 2.4955934367870443, "learning_rate": 9.662968015616581e-07, "loss": 0.5371, "step": 8286 }, { "epoch": 0.14404908828590798, "grad_norm": 1.545573927784136, "learning_rate": 9.662866409330163e-07, "loss": 0.7703, "step": 8287 }, { "epoch": 0.1440664708234108, "grad_norm": 1.494138978773463, "learning_rate": 9.662764788264604e-07, "loss": 0.3048, "step": 8288 }, { "epoch": 0.14408385336091362, "grad_norm": 2.1031980019825114, "learning_rate": 9.66266315242022e-07, "loss": 0.7199, "step": 8289 }, { "epoch": 0.14410123589841645, "grad_norm": 1.1876285201948436, "learning_rate": 9.662561501797335e-07, "loss": 0.48, "step": 8290 }, { "epoch": 0.14411861843591928, "grad_norm": 3.412167479501787, "learning_rate": 9.662459836396272e-07, "loss": 0.4398, "step": 8291 }, { "epoch": 0.1441360009734221, "grad_norm": 2.241042731671369, "learning_rate": 9.662358156217355e-07, "loss": 0.5491, "step": 8292 }, { "epoch": 0.14415338351092494, "grad_norm": 1.5805490283102988, "learning_rate": 9.662256461260902e-07, "loss": 0.5798, "step": 8293 }, { "epoch": 0.14417076604842774, "grad_norm": 1.84332049977162, "learning_rate": 9.66215475152724e-07, "loss": 0.2616, "step": 8294 }, { "epoch": 0.14418814858593057, "grad_norm": 1.6644345780438685, "learning_rate": 9.662053027016689e-07, "loss": 0.5895, "step": 8295 }, { "epoch": 0.1442055311234334, "grad_norm": 1.5396107153094107, "learning_rate": 9.661951287729568e-07, "loss": 0.5441, "step": 8296 }, { "epoch": 0.14422291366093623, "grad_norm": 2.017324870666071, "learning_rate": 9.661849533666207e-07, "loss": 0.4787, "step": 8297 }, { "epoch": 0.14424029619843906, "grad_norm": 1.889777638171169, "learning_rate": 9.661747764826922e-07, "loss": 0.4385, "step": 8298 }, { "epoch": 0.14425767873594186, "grad_norm": 1.7084609847492134, "learning_rate": 9.66164598121204e-07, "loss": 0.5589, "step": 8299 }, { "epoch": 0.1442750612734447, "grad_norm": 1.519801133717072, "learning_rate": 9.66154418282188e-07, "loss": 0.2991, "step": 8300 }, { "epoch": 0.14429244381094752, "grad_norm": 1.3783523033150624, "learning_rate": 9.661442369656765e-07, "loss": 0.2012, "step": 8301 }, { "epoch": 0.14430982634845035, "grad_norm": 1.5809320610850437, "learning_rate": 9.661340541717023e-07, "loss": 0.7032, "step": 8302 }, { "epoch": 0.14432720888595318, "grad_norm": 2.245394073082447, "learning_rate": 9.66123869900297e-07, "loss": 0.3567, "step": 8303 }, { "epoch": 0.144344591423456, "grad_norm": 4.729831266440025, "learning_rate": 9.661136841514931e-07, "loss": 0.4853, "step": 8304 }, { "epoch": 0.14436197396095882, "grad_norm": 2.9028721120669525, "learning_rate": 9.66103496925323e-07, "loss": 0.3148, "step": 8305 }, { "epoch": 0.14437935649846165, "grad_norm": 1.254143287230572, "learning_rate": 9.660933082218191e-07, "loss": 0.357, "step": 8306 }, { "epoch": 0.14439673903596448, "grad_norm": 2.095068229701302, "learning_rate": 9.660831180410132e-07, "loss": 0.2471, "step": 8307 }, { "epoch": 0.1444141215734673, "grad_norm": 5.705931805974881, "learning_rate": 9.66072926382938e-07, "loss": 0.3312, "step": 8308 }, { "epoch": 0.1444315041109701, "grad_norm": 3.2490193880183993, "learning_rate": 9.66062733247626e-07, "loss": 0.5343, "step": 8309 }, { "epoch": 0.14444888664847294, "grad_norm": 1.9102901458319652, "learning_rate": 9.66052538635109e-07, "loss": 0.4934, "step": 8310 }, { "epoch": 0.14446626918597577, "grad_norm": 2.517485219091496, "learning_rate": 9.660423425454196e-07, "loss": 0.5276, "step": 8311 }, { "epoch": 0.1444836517234786, "grad_norm": 3.3169789850203912, "learning_rate": 9.6603214497859e-07, "loss": 0.4366, "step": 8312 }, { "epoch": 0.14450103426098143, "grad_norm": 1.6685281934323346, "learning_rate": 9.660219459346526e-07, "loss": 0.3492, "step": 8313 }, { "epoch": 0.14451841679848423, "grad_norm": 1.6126871267498293, "learning_rate": 9.660117454136397e-07, "loss": 0.4802, "step": 8314 }, { "epoch": 0.14453579933598706, "grad_norm": 4.90543101904587, "learning_rate": 9.660015434155837e-07, "loss": 0.4879, "step": 8315 }, { "epoch": 0.1445531818734899, "grad_norm": 1.4636354564284602, "learning_rate": 9.659913399405168e-07, "loss": 0.4034, "step": 8316 }, { "epoch": 0.14457056441099272, "grad_norm": 1.8286662787023713, "learning_rate": 9.659811349884715e-07, "loss": 0.566, "step": 8317 }, { "epoch": 0.14458794694849555, "grad_norm": 1.7095351977466113, "learning_rate": 9.6597092855948e-07, "loss": 0.4915, "step": 8318 }, { "epoch": 0.14460532948599836, "grad_norm": 1.7423304646521092, "learning_rate": 9.659607206535746e-07, "loss": 0.5603, "step": 8319 }, { "epoch": 0.1446227120235012, "grad_norm": 2.612501423864827, "learning_rate": 9.65950511270788e-07, "loss": 0.6898, "step": 8320 }, { "epoch": 0.14464009456100402, "grad_norm": 1.9861324200108783, "learning_rate": 9.659403004111521e-07, "loss": 0.3978, "step": 8321 }, { "epoch": 0.14465747709850685, "grad_norm": 2.461327153341591, "learning_rate": 9.659300880746996e-07, "loss": 0.5895, "step": 8322 }, { "epoch": 0.14467485963600968, "grad_norm": 1.3980567225666922, "learning_rate": 9.659198742614628e-07, "loss": 0.389, "step": 8323 }, { "epoch": 0.14469224217351248, "grad_norm": 1.7816292797356803, "learning_rate": 9.65909658971474e-07, "loss": 0.3082, "step": 8324 }, { "epoch": 0.1447096247110153, "grad_norm": 1.5441220008638896, "learning_rate": 9.658994422047654e-07, "loss": 0.4097, "step": 8325 }, { "epoch": 0.14472700724851814, "grad_norm": 2.9946303312539215, "learning_rate": 9.658892239613698e-07, "loss": 0.4622, "step": 8326 }, { "epoch": 0.14474438978602097, "grad_norm": 1.8986176299028175, "learning_rate": 9.658790042413193e-07, "loss": 0.4754, "step": 8327 }, { "epoch": 0.1447617723235238, "grad_norm": 1.4375689686458666, "learning_rate": 9.658687830446464e-07, "loss": 0.581, "step": 8328 }, { "epoch": 0.1447791548610266, "grad_norm": 2.248021487945146, "learning_rate": 9.658585603713835e-07, "loss": 0.287, "step": 8329 }, { "epoch": 0.14479653739852943, "grad_norm": 1.7406657847526477, "learning_rate": 9.658483362215628e-07, "loss": 0.2788, "step": 8330 }, { "epoch": 0.14481391993603226, "grad_norm": 7.050202576472536, "learning_rate": 9.658381105952167e-07, "loss": 0.4904, "step": 8331 }, { "epoch": 0.1448313024735351, "grad_norm": 1.3442074957606982, "learning_rate": 9.65827883492378e-07, "loss": 0.1934, "step": 8332 }, { "epoch": 0.1448486850110379, "grad_norm": 1.9364470520292116, "learning_rate": 9.65817654913079e-07, "loss": 0.3364, "step": 8333 }, { "epoch": 0.14486606754854073, "grad_norm": 1.9038335649183753, "learning_rate": 9.65807424857352e-07, "loss": 0.3195, "step": 8334 }, { "epoch": 0.14488345008604356, "grad_norm": 1.512890814319133, "learning_rate": 9.657971933252293e-07, "loss": 0.3606, "step": 8335 }, { "epoch": 0.1449008326235464, "grad_norm": 1.2276377324822685, "learning_rate": 9.657869603167436e-07, "loss": 0.2899, "step": 8336 }, { "epoch": 0.14491821516104922, "grad_norm": 1.5014440161630744, "learning_rate": 9.65776725831927e-07, "loss": 0.3574, "step": 8337 }, { "epoch": 0.14493559769855202, "grad_norm": 1.188918229198162, "learning_rate": 9.657664898708124e-07, "loss": 0.1894, "step": 8338 }, { "epoch": 0.14495298023605485, "grad_norm": 1.5834140293396797, "learning_rate": 9.657562524334319e-07, "loss": 0.218, "step": 8339 }, { "epoch": 0.14497036277355768, "grad_norm": 2.3681568016792984, "learning_rate": 9.65746013519818e-07, "loss": 0.4557, "step": 8340 }, { "epoch": 0.1449877453110605, "grad_norm": 2.7693991057585983, "learning_rate": 9.65735773130003e-07, "loss": 0.3148, "step": 8341 }, { "epoch": 0.14500512784856334, "grad_norm": 1.734516118368403, "learning_rate": 9.657255312640198e-07, "loss": 0.2873, "step": 8342 }, { "epoch": 0.14502251038606614, "grad_norm": 2.9560730907315147, "learning_rate": 9.657152879219007e-07, "loss": 0.49, "step": 8343 }, { "epoch": 0.14503989292356897, "grad_norm": 3.0781554789966026, "learning_rate": 9.657050431036778e-07, "loss": 0.5582, "step": 8344 }, { "epoch": 0.1450572754610718, "grad_norm": 1.8515653941606807, "learning_rate": 9.65694796809384e-07, "loss": 0.6352, "step": 8345 }, { "epoch": 0.14507465799857464, "grad_norm": 1.6055164177885575, "learning_rate": 9.656845490390515e-07, "loss": 0.2938, "step": 8346 }, { "epoch": 0.14509204053607747, "grad_norm": 1.599399206767032, "learning_rate": 9.65674299792713e-07, "loss": 0.2451, "step": 8347 }, { "epoch": 0.14510942307358027, "grad_norm": 3.700879453327957, "learning_rate": 9.656640490704006e-07, "loss": 0.3159, "step": 8348 }, { "epoch": 0.1451268056110831, "grad_norm": 2.7681233834430508, "learning_rate": 9.656537968721475e-07, "loss": 0.5324, "step": 8349 }, { "epoch": 0.14514418814858593, "grad_norm": 1.3817457263888784, "learning_rate": 9.656435431979855e-07, "loss": 0.2272, "step": 8350 }, { "epoch": 0.14516157068608876, "grad_norm": 1.3569829695783215, "learning_rate": 9.656332880479473e-07, "loss": 0.3663, "step": 8351 }, { "epoch": 0.1451789532235916, "grad_norm": 2.756677171520315, "learning_rate": 9.656230314220656e-07, "loss": 0.467, "step": 8352 }, { "epoch": 0.1451963357610944, "grad_norm": 2.1556044609504648, "learning_rate": 9.65612773320373e-07, "loss": 0.5598, "step": 8353 }, { "epoch": 0.14521371829859722, "grad_norm": 1.2443512253713331, "learning_rate": 9.656025137429015e-07, "loss": 0.3764, "step": 8354 }, { "epoch": 0.14523110083610005, "grad_norm": 1.999717702227607, "learning_rate": 9.65592252689684e-07, "loss": 0.5102, "step": 8355 }, { "epoch": 0.14524848337360288, "grad_norm": 2.3775164191814517, "learning_rate": 9.655819901607529e-07, "loss": 0.4272, "step": 8356 }, { "epoch": 0.1452658659111057, "grad_norm": 2.336873734441049, "learning_rate": 9.655717261561408e-07, "loss": 0.5196, "step": 8357 }, { "epoch": 0.14528324844860852, "grad_norm": 1.7380056250615128, "learning_rate": 9.655614606758803e-07, "loss": 0.6395, "step": 8358 }, { "epoch": 0.14530063098611135, "grad_norm": 0.976902397259529, "learning_rate": 9.655511937200036e-07, "loss": 0.4122, "step": 8359 }, { "epoch": 0.14531801352361418, "grad_norm": 3.001833349628455, "learning_rate": 9.655409252885435e-07, "loss": 0.4307, "step": 8360 }, { "epoch": 0.145335396061117, "grad_norm": 1.7122761815110283, "learning_rate": 9.655306553815327e-07, "loss": 0.5048, "step": 8361 }, { "epoch": 0.14535277859861984, "grad_norm": 2.206174253454638, "learning_rate": 9.655203839990034e-07, "loss": 0.3508, "step": 8362 }, { "epoch": 0.14537016113612264, "grad_norm": 2.0061542557114747, "learning_rate": 9.655101111409885e-07, "loss": 0.4594, "step": 8363 }, { "epoch": 0.14538754367362547, "grad_norm": 1.5273351538839595, "learning_rate": 9.654998368075203e-07, "loss": 0.4717, "step": 8364 }, { "epoch": 0.1454049262111283, "grad_norm": 1.6200350398027776, "learning_rate": 9.654895609986315e-07, "loss": 0.4743, "step": 8365 }, { "epoch": 0.14542230874863113, "grad_norm": 2.6959224066208978, "learning_rate": 9.654792837143546e-07, "loss": 0.2951, "step": 8366 }, { "epoch": 0.14543969128613396, "grad_norm": 1.6485662645582924, "learning_rate": 9.65469004954722e-07, "loss": 0.3222, "step": 8367 }, { "epoch": 0.14545707382363676, "grad_norm": 1.9928700986835122, "learning_rate": 9.654587247197666e-07, "loss": 0.4511, "step": 8368 }, { "epoch": 0.1454744563611396, "grad_norm": 1.6508855084817124, "learning_rate": 9.654484430095208e-07, "loss": 0.2775, "step": 8369 }, { "epoch": 0.14549183889864242, "grad_norm": 1.555809065878419, "learning_rate": 9.654381598240175e-07, "loss": 0.5525, "step": 8370 }, { "epoch": 0.14550922143614525, "grad_norm": 1.4948445980350993, "learning_rate": 9.654278751632888e-07, "loss": 0.4123, "step": 8371 }, { "epoch": 0.14552660397364808, "grad_norm": 1.988904683106039, "learning_rate": 9.654175890273677e-07, "loss": 0.7032, "step": 8372 }, { "epoch": 0.14554398651115089, "grad_norm": 1.6285066028930064, "learning_rate": 9.654073014162864e-07, "loss": 0.3865, "step": 8373 }, { "epoch": 0.14556136904865372, "grad_norm": 1.5075707114780903, "learning_rate": 9.653970123300778e-07, "loss": 0.4869, "step": 8374 }, { "epoch": 0.14557875158615655, "grad_norm": 1.5918809398567126, "learning_rate": 9.653867217687746e-07, "loss": 0.4352, "step": 8375 }, { "epoch": 0.14559613412365938, "grad_norm": 1.5506281910152704, "learning_rate": 9.653764297324093e-07, "loss": 0.3723, "step": 8376 }, { "epoch": 0.1456135166611622, "grad_norm": 2.8122534670480346, "learning_rate": 9.653661362210143e-07, "loss": 0.4939, "step": 8377 }, { "epoch": 0.145630899198665, "grad_norm": 1.238500554166541, "learning_rate": 9.653558412346225e-07, "loss": 0.416, "step": 8378 }, { "epoch": 0.14564828173616784, "grad_norm": 1.515872267099313, "learning_rate": 9.653455447732665e-07, "loss": 0.2736, "step": 8379 }, { "epoch": 0.14566566427367067, "grad_norm": 1.3794580096387192, "learning_rate": 9.65335246836979e-07, "loss": 0.3, "step": 8380 }, { "epoch": 0.1456830468111735, "grad_norm": 2.3648505765255337, "learning_rate": 9.653249474257923e-07, "loss": 0.4568, "step": 8381 }, { "epoch": 0.14570042934867633, "grad_norm": 3.3404202494411614, "learning_rate": 9.653146465397394e-07, "loss": 0.3118, "step": 8382 }, { "epoch": 0.14571781188617913, "grad_norm": 2.262605709762539, "learning_rate": 9.653043441788529e-07, "loss": 0.4442, "step": 8383 }, { "epoch": 0.14573519442368196, "grad_norm": 1.5442640275873591, "learning_rate": 9.65294040343165e-07, "loss": 0.4098, "step": 8384 }, { "epoch": 0.1457525769611848, "grad_norm": 1.9435690756026913, "learning_rate": 9.65283735032709e-07, "loss": 0.3219, "step": 8385 }, { "epoch": 0.14576995949868762, "grad_norm": 1.2649128565885301, "learning_rate": 9.652734282475175e-07, "loss": 0.2452, "step": 8386 }, { "epoch": 0.14578734203619045, "grad_norm": 1.8468001242752996, "learning_rate": 9.652631199876227e-07, "loss": 0.3799, "step": 8387 }, { "epoch": 0.14580472457369326, "grad_norm": 1.6343180965977109, "learning_rate": 9.652528102530575e-07, "loss": 0.2754, "step": 8388 }, { "epoch": 0.1458221071111961, "grad_norm": 1.7215332420070417, "learning_rate": 9.652424990438547e-07, "loss": 0.2262, "step": 8389 }, { "epoch": 0.14583948964869892, "grad_norm": 1.4319920416590335, "learning_rate": 9.652321863600468e-07, "loss": 0.4068, "step": 8390 }, { "epoch": 0.14585687218620175, "grad_norm": 2.806577541676162, "learning_rate": 9.652218722016667e-07, "loss": 0.5703, "step": 8391 }, { "epoch": 0.14587425472370458, "grad_norm": 2.1867274885239483, "learning_rate": 9.65211556568747e-07, "loss": 0.3848, "step": 8392 }, { "epoch": 0.14589163726120738, "grad_norm": 1.6138051676858243, "learning_rate": 9.652012394613202e-07, "loss": 0.4938, "step": 8393 }, { "epoch": 0.1459090197987102, "grad_norm": 2.3055680927589357, "learning_rate": 9.651909208794192e-07, "loss": 0.3285, "step": 8394 }, { "epoch": 0.14592640233621304, "grad_norm": 3.200561437214182, "learning_rate": 9.651806008230768e-07, "loss": 0.4753, "step": 8395 }, { "epoch": 0.14594378487371587, "grad_norm": 1.7016805997475641, "learning_rate": 9.651702792923254e-07, "loss": 0.4408, "step": 8396 }, { "epoch": 0.1459611674112187, "grad_norm": 3.9445498128964753, "learning_rate": 9.65159956287198e-07, "loss": 0.5209, "step": 8397 }, { "epoch": 0.1459785499487215, "grad_norm": 2.025061848936317, "learning_rate": 9.651496318077273e-07, "loss": 0.5951, "step": 8398 }, { "epoch": 0.14599593248622433, "grad_norm": 2.4316203684302606, "learning_rate": 9.651393058539458e-07, "loss": 0.4084, "step": 8399 }, { "epoch": 0.14601331502372716, "grad_norm": 1.8282609270758539, "learning_rate": 9.651289784258864e-07, "loss": 0.5959, "step": 8400 }, { "epoch": 0.14603069756123, "grad_norm": 2.3969984094836403, "learning_rate": 9.651186495235818e-07, "loss": 0.4215, "step": 8401 }, { "epoch": 0.14604808009873282, "grad_norm": 1.585679290547464, "learning_rate": 9.65108319147065e-07, "loss": 0.386, "step": 8402 }, { "epoch": 0.14606546263623563, "grad_norm": 1.7273198710782633, "learning_rate": 9.650979872963682e-07, "loss": 0.2955, "step": 8403 }, { "epoch": 0.14608284517373846, "grad_norm": 1.845099611707989, "learning_rate": 9.650876539715244e-07, "loss": 0.6548, "step": 8404 }, { "epoch": 0.1461002277112413, "grad_norm": 2.8508947251810306, "learning_rate": 9.650773191725665e-07, "loss": 0.4329, "step": 8405 }, { "epoch": 0.14611761024874412, "grad_norm": 1.9665462545894994, "learning_rate": 9.650669828995271e-07, "loss": 0.4366, "step": 8406 }, { "epoch": 0.14613499278624695, "grad_norm": 1.603368265660237, "learning_rate": 9.65056645152439e-07, "loss": 0.2672, "step": 8407 }, { "epoch": 0.14615237532374975, "grad_norm": 1.641542412324523, "learning_rate": 9.65046305931335e-07, "loss": 0.7538, "step": 8408 }, { "epoch": 0.14616975786125258, "grad_norm": 1.3798016876346963, "learning_rate": 9.650359652362476e-07, "loss": 0.3241, "step": 8409 }, { "epoch": 0.1461871403987554, "grad_norm": 2.0120073716724813, "learning_rate": 9.650256230672103e-07, "loss": 0.4797, "step": 8410 }, { "epoch": 0.14620452293625824, "grad_norm": 1.562601257209497, "learning_rate": 9.65015279424255e-07, "loss": 0.4448, "step": 8411 }, { "epoch": 0.14622190547376107, "grad_norm": 1.5715627409290807, "learning_rate": 9.65004934307415e-07, "loss": 0.4358, "step": 8412 }, { "epoch": 0.14623928801126387, "grad_norm": 1.663128173028014, "learning_rate": 9.649945877167229e-07, "loss": 0.3185, "step": 8413 }, { "epoch": 0.1462566705487667, "grad_norm": 3.404734440311948, "learning_rate": 9.649842396522115e-07, "loss": 0.436, "step": 8414 }, { "epoch": 0.14627405308626953, "grad_norm": 4.7481049189619355, "learning_rate": 9.649738901139139e-07, "loss": 0.4546, "step": 8415 }, { "epoch": 0.14629143562377236, "grad_norm": 3.1805578862093906, "learning_rate": 9.649635391018625e-07, "loss": 0.4508, "step": 8416 }, { "epoch": 0.1463088181612752, "grad_norm": 2.6819729489730975, "learning_rate": 9.649531866160904e-07, "loss": 0.4088, "step": 8417 }, { "epoch": 0.146326200698778, "grad_norm": 1.0110109499891624, "learning_rate": 9.649428326566301e-07, "loss": 0.4005, "step": 8418 }, { "epoch": 0.14634358323628083, "grad_norm": 2.4222320462029034, "learning_rate": 9.649324772235147e-07, "loss": 0.7571, "step": 8419 }, { "epoch": 0.14636096577378366, "grad_norm": 3.0359444619239087, "learning_rate": 9.649221203167768e-07, "loss": 0.5757, "step": 8420 }, { "epoch": 0.1463783483112865, "grad_norm": 2.3237576274986647, "learning_rate": 9.649117619364492e-07, "loss": 0.4463, "step": 8421 }, { "epoch": 0.14639573084878932, "grad_norm": 3.1279984162675776, "learning_rate": 9.649014020825652e-07, "loss": 0.374, "step": 8422 }, { "epoch": 0.14641311338629212, "grad_norm": 1.3320473752860542, "learning_rate": 9.648910407551572e-07, "loss": 0.4925, "step": 8423 }, { "epoch": 0.14643049592379495, "grad_norm": 1.5787246794575793, "learning_rate": 9.648806779542582e-07, "loss": 0.4843, "step": 8424 }, { "epoch": 0.14644787846129778, "grad_norm": 2.017500460561517, "learning_rate": 9.64870313679901e-07, "loss": 0.6228, "step": 8425 }, { "epoch": 0.1464652609988006, "grad_norm": 2.3201007742367112, "learning_rate": 9.648599479321184e-07, "loss": 0.4824, "step": 8426 }, { "epoch": 0.14648264353630344, "grad_norm": 1.9013700635584014, "learning_rate": 9.648495807109433e-07, "loss": 0.4003, "step": 8427 }, { "epoch": 0.14650002607380624, "grad_norm": 1.486977828727668, "learning_rate": 9.648392120164085e-07, "loss": 0.3904, "step": 8428 }, { "epoch": 0.14651740861130907, "grad_norm": 2.2438488377604537, "learning_rate": 9.64828841848547e-07, "loss": 0.4042, "step": 8429 }, { "epoch": 0.1465347911488119, "grad_norm": 1.079046922614511, "learning_rate": 9.648184702073917e-07, "loss": 0.5535, "step": 8430 }, { "epoch": 0.14655217368631474, "grad_norm": 1.1942467333328792, "learning_rate": 9.648080970929753e-07, "loss": 0.3218, "step": 8431 }, { "epoch": 0.14656955622381757, "grad_norm": 1.90989751506496, "learning_rate": 9.647977225053308e-07, "loss": 0.5146, "step": 8432 }, { "epoch": 0.14658693876132037, "grad_norm": 1.3013515623185814, "learning_rate": 9.647873464444908e-07, "loss": 0.412, "step": 8433 }, { "epoch": 0.1466043212988232, "grad_norm": 2.091878065634925, "learning_rate": 9.647769689104886e-07, "loss": 0.3192, "step": 8434 }, { "epoch": 0.14662170383632603, "grad_norm": 1.9971892149625072, "learning_rate": 9.64766589903357e-07, "loss": 0.6065, "step": 8435 }, { "epoch": 0.14663908637382886, "grad_norm": 2.501579031658042, "learning_rate": 9.647562094231287e-07, "loss": 0.4357, "step": 8436 }, { "epoch": 0.1466564689113317, "grad_norm": 2.2320123846831925, "learning_rate": 9.647458274698366e-07, "loss": 0.4138, "step": 8437 }, { "epoch": 0.1466738514488345, "grad_norm": 1.5063119327526946, "learning_rate": 9.647354440435139e-07, "loss": 0.492, "step": 8438 }, { "epoch": 0.14669123398633732, "grad_norm": 1.7144108298382776, "learning_rate": 9.647250591441932e-07, "loss": 0.6534, "step": 8439 }, { "epoch": 0.14670861652384015, "grad_norm": 1.6810572714530483, "learning_rate": 9.647146727719076e-07, "loss": 0.7359, "step": 8440 }, { "epoch": 0.14672599906134298, "grad_norm": 1.897471941549806, "learning_rate": 9.6470428492669e-07, "loss": 0.5933, "step": 8441 }, { "epoch": 0.1467433815988458, "grad_norm": 1.8841114840290796, "learning_rate": 9.64693895608573e-07, "loss": 0.4378, "step": 8442 }, { "epoch": 0.14676076413634861, "grad_norm": 1.6415545252210435, "learning_rate": 9.6468350481759e-07, "loss": 0.631, "step": 8443 }, { "epoch": 0.14677814667385145, "grad_norm": 1.910713351944988, "learning_rate": 9.64673112553774e-07, "loss": 0.49, "step": 8444 }, { "epoch": 0.14679552921135428, "grad_norm": 2.4344266000905455, "learning_rate": 9.646627188171574e-07, "loss": 0.3898, "step": 8445 }, { "epoch": 0.1468129117488571, "grad_norm": 1.939106770476373, "learning_rate": 9.646523236077735e-07, "loss": 0.6638, "step": 8446 }, { "epoch": 0.14683029428635994, "grad_norm": 1.7943844337504735, "learning_rate": 9.64641926925655e-07, "loss": 0.3963, "step": 8447 }, { "epoch": 0.14684767682386274, "grad_norm": 1.4643816583337583, "learning_rate": 9.646315287708354e-07, "loss": 0.5797, "step": 8448 }, { "epoch": 0.14686505936136557, "grad_norm": 4.04479945187866, "learning_rate": 9.646211291433471e-07, "loss": 0.5769, "step": 8449 }, { "epoch": 0.1468824418988684, "grad_norm": 2.41545600346655, "learning_rate": 9.646107280432232e-07, "loss": 0.4722, "step": 8450 }, { "epoch": 0.14689982443637123, "grad_norm": 3.832466553067031, "learning_rate": 9.646003254704968e-07, "loss": 0.4282, "step": 8451 }, { "epoch": 0.14691720697387406, "grad_norm": 2.806974790368338, "learning_rate": 9.645899214252006e-07, "loss": 0.4777, "step": 8452 }, { "epoch": 0.14693458951137686, "grad_norm": 1.4605689224922056, "learning_rate": 9.64579515907368e-07, "loss": 0.4409, "step": 8453 }, { "epoch": 0.1469519720488797, "grad_norm": 1.790088967124962, "learning_rate": 9.645691089170315e-07, "loss": 0.397, "step": 8454 }, { "epoch": 0.14696935458638252, "grad_norm": 2.8749281644853406, "learning_rate": 9.645587004542245e-07, "loss": 0.3813, "step": 8455 }, { "epoch": 0.14698673712388535, "grad_norm": 1.797032941933266, "learning_rate": 9.6454829051898e-07, "loss": 0.5257, "step": 8456 }, { "epoch": 0.14700411966138818, "grad_norm": 1.0076349760471097, "learning_rate": 9.645378791113305e-07, "loss": 0.4528, "step": 8457 }, { "epoch": 0.14702150219889099, "grad_norm": 3.326819932585458, "learning_rate": 9.645274662313094e-07, "loss": 0.6161, "step": 8458 }, { "epoch": 0.14703888473639382, "grad_norm": 1.3492854106422143, "learning_rate": 9.645170518789497e-07, "loss": 0.362, "step": 8459 }, { "epoch": 0.14705626727389665, "grad_norm": 2.025734719147077, "learning_rate": 9.645066360542844e-07, "loss": 0.4251, "step": 8460 }, { "epoch": 0.14707364981139948, "grad_norm": 1.5323177073942877, "learning_rate": 9.644962187573462e-07, "loss": 0.2649, "step": 8461 }, { "epoch": 0.1470910323489023, "grad_norm": 1.81578305336651, "learning_rate": 9.644857999881686e-07, "loss": 0.3021, "step": 8462 }, { "epoch": 0.1471084148864051, "grad_norm": 1.7465071655302185, "learning_rate": 9.644753797467843e-07, "loss": 0.292, "step": 8463 }, { "epoch": 0.14712579742390794, "grad_norm": 1.5269637346860137, "learning_rate": 9.644649580332263e-07, "loss": 0.5326, "step": 8464 }, { "epoch": 0.14714317996141077, "grad_norm": 2.0297623926107096, "learning_rate": 9.64454534847528e-07, "loss": 0.3146, "step": 8465 }, { "epoch": 0.1471605624989136, "grad_norm": 1.5227960564564842, "learning_rate": 9.644441101897218e-07, "loss": 0.5059, "step": 8466 }, { "epoch": 0.14717794503641643, "grad_norm": 1.7539383731549132, "learning_rate": 9.644336840598412e-07, "loss": 0.3772, "step": 8467 }, { "epoch": 0.14719532757391923, "grad_norm": 1.321849584364602, "learning_rate": 9.644232564579193e-07, "loss": 0.1994, "step": 8468 }, { "epoch": 0.14721271011142206, "grad_norm": 2.0521220146739645, "learning_rate": 9.644128273839891e-07, "loss": 0.3466, "step": 8469 }, { "epoch": 0.1472300926489249, "grad_norm": 1.5487670099783322, "learning_rate": 9.644023968380835e-07, "loss": 0.4263, "step": 8470 }, { "epoch": 0.14724747518642772, "grad_norm": 3.1423314825219295, "learning_rate": 9.643919648202355e-07, "loss": 0.6922, "step": 8471 }, { "epoch": 0.14726485772393053, "grad_norm": 3.2404212498717593, "learning_rate": 9.643815313304784e-07, "loss": 0.4851, "step": 8472 }, { "epoch": 0.14728224026143336, "grad_norm": 3.7389384302111215, "learning_rate": 9.643710963688452e-07, "loss": 0.4859, "step": 8473 }, { "epoch": 0.1472996227989362, "grad_norm": 1.0672270142749591, "learning_rate": 9.64360659935369e-07, "loss": 0.5411, "step": 8474 }, { "epoch": 0.14731700533643902, "grad_norm": 1.533948253005404, "learning_rate": 9.643502220300825e-07, "loss": 0.4152, "step": 8475 }, { "epoch": 0.14733438787394185, "grad_norm": 2.3652686988568483, "learning_rate": 9.643397826530193e-07, "loss": 0.3546, "step": 8476 }, { "epoch": 0.14735177041144465, "grad_norm": 4.086168466132792, "learning_rate": 9.643293418042125e-07, "loss": 0.7467, "step": 8477 }, { "epoch": 0.14736915294894748, "grad_norm": 1.3978402441761855, "learning_rate": 9.643188994836946e-07, "loss": 0.3033, "step": 8478 }, { "epoch": 0.1473865354864503, "grad_norm": 1.2484726295978874, "learning_rate": 9.643084556914994e-07, "loss": 0.6003, "step": 8479 }, { "epoch": 0.14740391802395314, "grad_norm": 2.1758741201003655, "learning_rate": 9.642980104276595e-07, "loss": 0.7957, "step": 8480 }, { "epoch": 0.14742130056145597, "grad_norm": 1.562621819540607, "learning_rate": 9.642875636922082e-07, "loss": 0.5611, "step": 8481 }, { "epoch": 0.14743868309895877, "grad_norm": 1.517375096912534, "learning_rate": 9.642771154851787e-07, "loss": 0.273, "step": 8482 }, { "epoch": 0.1474560656364616, "grad_norm": 1.9492889888039715, "learning_rate": 9.64266665806604e-07, "loss": 0.3556, "step": 8483 }, { "epoch": 0.14747344817396443, "grad_norm": 1.439469364333969, "learning_rate": 9.64256214656517e-07, "loss": 0.3736, "step": 8484 }, { "epoch": 0.14749083071146726, "grad_norm": 1.952840319789365, "learning_rate": 9.642457620349513e-07, "loss": 0.9128, "step": 8485 }, { "epoch": 0.1475082132489701, "grad_norm": 1.5596382281561776, "learning_rate": 9.642353079419397e-07, "loss": 0.3795, "step": 8486 }, { "epoch": 0.1475255957864729, "grad_norm": 1.168385705493609, "learning_rate": 9.642248523775156e-07, "loss": 0.3381, "step": 8487 }, { "epoch": 0.14754297832397573, "grad_norm": 1.5535640558686663, "learning_rate": 9.642143953417118e-07, "loss": 0.4444, "step": 8488 }, { "epoch": 0.14756036086147856, "grad_norm": 1.520076267689083, "learning_rate": 9.642039368345616e-07, "loss": 0.3447, "step": 8489 }, { "epoch": 0.1475777433989814, "grad_norm": 2.0290888343391815, "learning_rate": 9.641934768560982e-07, "loss": 0.3552, "step": 8490 }, { "epoch": 0.14759512593648422, "grad_norm": 3.0450698238975296, "learning_rate": 9.641830154063547e-07, "loss": 0.3457, "step": 8491 }, { "epoch": 0.14761250847398702, "grad_norm": 2.3394485796580757, "learning_rate": 9.641725524853643e-07, "loss": 0.5077, "step": 8492 }, { "epoch": 0.14762989101148985, "grad_norm": 1.659026426472105, "learning_rate": 9.6416208809316e-07, "loss": 0.3918, "step": 8493 }, { "epoch": 0.14764727354899268, "grad_norm": 2.8560372406887886, "learning_rate": 9.641516222297751e-07, "loss": 0.3476, "step": 8494 }, { "epoch": 0.1476646560864955, "grad_norm": 2.6967224725151286, "learning_rate": 9.64141154895243e-07, "loss": 0.4164, "step": 8495 }, { "epoch": 0.14768203862399834, "grad_norm": 1.8841067765071788, "learning_rate": 9.641306860895965e-07, "loss": 0.327, "step": 8496 }, { "epoch": 0.14769942116150114, "grad_norm": 1.986041678510361, "learning_rate": 9.64120215812869e-07, "loss": 0.5456, "step": 8497 }, { "epoch": 0.14771680369900397, "grad_norm": 1.7149879479364123, "learning_rate": 9.641097440650936e-07, "loss": 0.325, "step": 8498 }, { "epoch": 0.1477341862365068, "grad_norm": 2.356736715170562, "learning_rate": 9.640992708463033e-07, "loss": 0.4821, "step": 8499 }, { "epoch": 0.14775156877400963, "grad_norm": 1.4964644394695013, "learning_rate": 9.64088796156532e-07, "loss": 0.3271, "step": 8500 }, { "epoch": 0.14776895131151246, "grad_norm": 1.9047394032934581, "learning_rate": 9.64078319995812e-07, "loss": 0.4879, "step": 8501 }, { "epoch": 0.14778633384901527, "grad_norm": 1.7952506115181046, "learning_rate": 9.640678423641768e-07, "loss": 0.7646, "step": 8502 }, { "epoch": 0.1478037163865181, "grad_norm": 1.8592958656649081, "learning_rate": 9.6405736326166e-07, "loss": 0.3166, "step": 8503 }, { "epoch": 0.14782109892402093, "grad_norm": 2.674967106751506, "learning_rate": 9.640468826882943e-07, "loss": 0.5115, "step": 8504 }, { "epoch": 0.14783848146152376, "grad_norm": 1.6486011604731057, "learning_rate": 9.640364006441134e-07, "loss": 0.3359, "step": 8505 }, { "epoch": 0.1478558639990266, "grad_norm": 1.8275901161347758, "learning_rate": 9.640259171291502e-07, "loss": 0.8528, "step": 8506 }, { "epoch": 0.1478732465365294, "grad_norm": 1.4623601353507192, "learning_rate": 9.64015432143438e-07, "loss": 0.3173, "step": 8507 }, { "epoch": 0.14789062907403222, "grad_norm": 1.4157636238852354, "learning_rate": 9.640049456870098e-07, "loss": 0.3541, "step": 8508 }, { "epoch": 0.14790801161153505, "grad_norm": 1.5026285793931002, "learning_rate": 9.639944577598992e-07, "loss": 0.608, "step": 8509 }, { "epoch": 0.14792539414903788, "grad_norm": 1.7585244053895874, "learning_rate": 9.639839683621395e-07, "loss": 0.4407, "step": 8510 }, { "epoch": 0.1479427766865407, "grad_norm": 4.6324949768750185, "learning_rate": 9.639734774937635e-07, "loss": 0.6116, "step": 8511 }, { "epoch": 0.1479601592240435, "grad_norm": 1.4195835295668673, "learning_rate": 9.639629851548049e-07, "loss": 0.6425, "step": 8512 }, { "epoch": 0.14797754176154634, "grad_norm": 1.0436891184601287, "learning_rate": 9.639524913452967e-07, "loss": 0.3979, "step": 8513 }, { "epoch": 0.14799492429904917, "grad_norm": 1.4725126316991783, "learning_rate": 9.639419960652722e-07, "loss": 0.2227, "step": 8514 }, { "epoch": 0.148012306836552, "grad_norm": 4.2503651343514415, "learning_rate": 9.639314993147647e-07, "loss": 0.7444, "step": 8515 }, { "epoch": 0.14802968937405483, "grad_norm": 2.2000033296555563, "learning_rate": 9.639210010938075e-07, "loss": 0.672, "step": 8516 }, { "epoch": 0.14804707191155764, "grad_norm": 2.3457855366175635, "learning_rate": 9.63910501402434e-07, "loss": 0.2768, "step": 8517 }, { "epoch": 0.14806445444906047, "grad_norm": 1.9506184246910898, "learning_rate": 9.639000002406768e-07, "loss": 0.4443, "step": 8518 }, { "epoch": 0.1480818369865633, "grad_norm": 2.707634278977952, "learning_rate": 9.6388949760857e-07, "loss": 0.7034, "step": 8519 }, { "epoch": 0.14809921952406613, "grad_norm": 2.5666345082859507, "learning_rate": 9.638789935061466e-07, "loss": 0.5036, "step": 8520 }, { "epoch": 0.14811660206156896, "grad_norm": 1.7902534482682373, "learning_rate": 9.6386848793344e-07, "loss": 0.6491, "step": 8521 }, { "epoch": 0.14813398459907176, "grad_norm": 2.682504876492389, "learning_rate": 9.638579808904832e-07, "loss": 0.4031, "step": 8522 }, { "epoch": 0.1481513671365746, "grad_norm": 1.379485304637625, "learning_rate": 9.638474723773098e-07, "loss": 0.3704, "step": 8523 }, { "epoch": 0.14816874967407742, "grad_norm": 2.2727924528955663, "learning_rate": 9.63836962393953e-07, "loss": 0.6875, "step": 8524 }, { "epoch": 0.14818613221158025, "grad_norm": 1.623900490359382, "learning_rate": 9.638264509404459e-07, "loss": 0.5963, "step": 8525 }, { "epoch": 0.14820351474908308, "grad_norm": 2.225956140852176, "learning_rate": 9.63815938016822e-07, "loss": 0.5797, "step": 8526 }, { "epoch": 0.14822089728658588, "grad_norm": 1.859283977963273, "learning_rate": 9.638054236231148e-07, "loss": 0.4153, "step": 8527 }, { "epoch": 0.14823827982408871, "grad_norm": 1.9407278163359503, "learning_rate": 9.637949077593575e-07, "loss": 0.5854, "step": 8528 }, { "epoch": 0.14825566236159154, "grad_norm": 3.1367049383673833, "learning_rate": 9.637843904255833e-07, "loss": 0.588, "step": 8529 }, { "epoch": 0.14827304489909438, "grad_norm": 3.339096092926301, "learning_rate": 9.637738716218255e-07, "loss": 0.2989, "step": 8530 }, { "epoch": 0.1482904274365972, "grad_norm": 4.435609747838949, "learning_rate": 9.637633513481178e-07, "loss": 0.5172, "step": 8531 }, { "epoch": 0.1483078099741, "grad_norm": 1.7278115644977956, "learning_rate": 9.637528296044932e-07, "loss": 0.479, "step": 8532 }, { "epoch": 0.14832519251160284, "grad_norm": 1.5557544941506956, "learning_rate": 9.637423063909852e-07, "loss": 0.4285, "step": 8533 }, { "epoch": 0.14834257504910567, "grad_norm": 2.4437272136337973, "learning_rate": 9.637317817076272e-07, "loss": 0.6349, "step": 8534 }, { "epoch": 0.1483599575866085, "grad_norm": 2.07090275404546, "learning_rate": 9.637212555544522e-07, "loss": 0.7139, "step": 8535 }, { "epoch": 0.14837734012411133, "grad_norm": 1.728478065004256, "learning_rate": 9.63710727931494e-07, "loss": 0.463, "step": 8536 }, { "epoch": 0.14839472266161413, "grad_norm": 1.8481995090323853, "learning_rate": 9.637001988387859e-07, "loss": 0.7664, "step": 8537 }, { "epoch": 0.14841210519911696, "grad_norm": 3.549862950126299, "learning_rate": 9.636896682763612e-07, "loss": 0.3848, "step": 8538 }, { "epoch": 0.1484294877366198, "grad_norm": 2.5916855787513615, "learning_rate": 9.63679136244253e-07, "loss": 0.7543, "step": 8539 }, { "epoch": 0.14844687027412262, "grad_norm": 1.5991506615398436, "learning_rate": 9.636686027424951e-07, "loss": 0.8417, "step": 8540 }, { "epoch": 0.14846425281162545, "grad_norm": 1.755580311032026, "learning_rate": 9.636580677711208e-07, "loss": 0.2944, "step": 8541 }, { "epoch": 0.14848163534912825, "grad_norm": 2.5120814295580596, "learning_rate": 9.636475313301634e-07, "loss": 0.6103, "step": 8542 }, { "epoch": 0.14849901788663109, "grad_norm": 2.7671858506427105, "learning_rate": 9.63636993419656e-07, "loss": 1.1219, "step": 8543 }, { "epoch": 0.14851640042413392, "grad_norm": 1.7032628930513394, "learning_rate": 9.636264540396327e-07, "loss": 0.3797, "step": 8544 }, { "epoch": 0.14853378296163675, "grad_norm": 1.4406911866239958, "learning_rate": 9.636159131901263e-07, "loss": 0.5204, "step": 8545 }, { "epoch": 0.14855116549913958, "grad_norm": 2.0882914191035202, "learning_rate": 9.636053708711704e-07, "loss": 0.3975, "step": 8546 }, { "epoch": 0.14856854803664238, "grad_norm": 1.521209294309225, "learning_rate": 9.635948270827985e-07, "loss": 0.1983, "step": 8547 }, { "epoch": 0.1485859305741452, "grad_norm": 2.3300350964640217, "learning_rate": 9.63584281825044e-07, "loss": 0.421, "step": 8548 }, { "epoch": 0.14860331311164804, "grad_norm": 1.9137486801282186, "learning_rate": 9.635737350979402e-07, "loss": 0.3424, "step": 8549 }, { "epoch": 0.14862069564915087, "grad_norm": 1.544876480097597, "learning_rate": 9.635631869015205e-07, "loss": 0.4252, "step": 8550 }, { "epoch": 0.1486380781866537, "grad_norm": 2.1941408553737642, "learning_rate": 9.635526372358186e-07, "loss": 0.3706, "step": 8551 }, { "epoch": 0.1486554607241565, "grad_norm": 1.6488397892771476, "learning_rate": 9.635420861008678e-07, "loss": 0.2588, "step": 8552 }, { "epoch": 0.14867284326165933, "grad_norm": 2.4194436343818095, "learning_rate": 9.635315334967015e-07, "loss": 0.5171, "step": 8553 }, { "epoch": 0.14869022579916216, "grad_norm": 2.0253412780303566, "learning_rate": 9.635209794233531e-07, "loss": 0.2802, "step": 8554 }, { "epoch": 0.148707608336665, "grad_norm": 1.6856044213576589, "learning_rate": 9.635104238808562e-07, "loss": 0.4275, "step": 8555 }, { "epoch": 0.14872499087416782, "grad_norm": 2.2240432278439513, "learning_rate": 9.634998668692442e-07, "loss": 0.4217, "step": 8556 }, { "epoch": 0.14874237341167063, "grad_norm": 1.5056830163216608, "learning_rate": 9.634893083885505e-07, "loss": 0.215, "step": 8557 }, { "epoch": 0.14875975594917346, "grad_norm": 2.8757004141564537, "learning_rate": 9.634787484388085e-07, "loss": 0.5014, "step": 8558 }, { "epoch": 0.14877713848667629, "grad_norm": 1.594939002847788, "learning_rate": 9.634681870200519e-07, "loss": 0.5153, "step": 8559 }, { "epoch": 0.14879452102417912, "grad_norm": 1.2633809716217264, "learning_rate": 9.63457624132314e-07, "loss": 0.3803, "step": 8560 }, { "epoch": 0.14881190356168195, "grad_norm": 2.6073483474762758, "learning_rate": 9.634470597756284e-07, "loss": 0.4163, "step": 8561 }, { "epoch": 0.14882928609918475, "grad_norm": 2.1301288998519583, "learning_rate": 9.634364939500284e-07, "loss": 0.6292, "step": 8562 }, { "epoch": 0.14884666863668758, "grad_norm": 2.174944068064336, "learning_rate": 9.634259266555476e-07, "loss": 0.3349, "step": 8563 }, { "epoch": 0.1488640511741904, "grad_norm": 4.189158995544867, "learning_rate": 9.634153578922197e-07, "loss": 0.4887, "step": 8564 }, { "epoch": 0.14888143371169324, "grad_norm": 2.7517963070446103, "learning_rate": 9.634047876600778e-07, "loss": 0.3227, "step": 8565 }, { "epoch": 0.14889881624919607, "grad_norm": 1.6835810727913953, "learning_rate": 9.633942159591556e-07, "loss": 0.5858, "step": 8566 }, { "epoch": 0.14891619878669887, "grad_norm": 2.0258937845918936, "learning_rate": 9.633836427894865e-07, "loss": 0.716, "step": 8567 }, { "epoch": 0.1489335813242017, "grad_norm": 1.2968287761567392, "learning_rate": 9.633730681511042e-07, "loss": 0.3545, "step": 8568 }, { "epoch": 0.14895096386170453, "grad_norm": 3.7211178739987427, "learning_rate": 9.633624920440424e-07, "loss": 0.6858, "step": 8569 }, { "epoch": 0.14896834639920736, "grad_norm": 2.13208512746518, "learning_rate": 9.63351914468334e-07, "loss": 0.6133, "step": 8570 }, { "epoch": 0.1489857289367102, "grad_norm": 1.7836429913116245, "learning_rate": 9.63341335424013e-07, "loss": 0.5623, "step": 8571 }, { "epoch": 0.149003111474213, "grad_norm": 4.186408680042023, "learning_rate": 9.633307549111128e-07, "loss": 0.7651, "step": 8572 }, { "epoch": 0.14902049401171583, "grad_norm": 2.723520950584018, "learning_rate": 9.63320172929667e-07, "loss": 0.6008, "step": 8573 }, { "epoch": 0.14903787654921866, "grad_norm": 1.4972233692516883, "learning_rate": 9.63309589479709e-07, "loss": 0.4687, "step": 8574 }, { "epoch": 0.1490552590867215, "grad_norm": 2.3224001917471524, "learning_rate": 9.632990045612723e-07, "loss": 0.4987, "step": 8575 }, { "epoch": 0.14907264162422432, "grad_norm": 2.3916671748343603, "learning_rate": 9.632884181743908e-07, "loss": 0.6484, "step": 8576 }, { "epoch": 0.14909002416172712, "grad_norm": 3.339308435930732, "learning_rate": 9.632778303190978e-07, "loss": 0.4817, "step": 8577 }, { "epoch": 0.14910740669922995, "grad_norm": 3.0052150123789563, "learning_rate": 9.632672409954269e-07, "loss": 0.3635, "step": 8578 }, { "epoch": 0.14912478923673278, "grad_norm": 1.773103069888535, "learning_rate": 9.632566502034116e-07, "loss": 0.5345, "step": 8579 }, { "epoch": 0.1491421717742356, "grad_norm": 2.4954211667027804, "learning_rate": 9.632460579430855e-07, "loss": 0.4596, "step": 8580 }, { "epoch": 0.14915955431173844, "grad_norm": 1.8965324063997624, "learning_rate": 9.632354642144821e-07, "loss": 0.3869, "step": 8581 }, { "epoch": 0.14917693684924124, "grad_norm": 2.0469422229386085, "learning_rate": 9.632248690176353e-07, "loss": 0.4792, "step": 8582 }, { "epoch": 0.14919431938674407, "grad_norm": 2.597555842943745, "learning_rate": 9.632142723525784e-07, "loss": 0.509, "step": 8583 }, { "epoch": 0.1492117019242469, "grad_norm": 1.8976132177219296, "learning_rate": 9.63203674219345e-07, "loss": 0.3761, "step": 8584 }, { "epoch": 0.14922908446174973, "grad_norm": 1.7200448537375752, "learning_rate": 9.631930746179688e-07, "loss": 0.3441, "step": 8585 }, { "epoch": 0.14924646699925256, "grad_norm": 1.5075362475354064, "learning_rate": 9.63182473548483e-07, "loss": 0.4306, "step": 8586 }, { "epoch": 0.14926384953675537, "grad_norm": 2.137687887596078, "learning_rate": 9.63171871010922e-07, "loss": 0.5352, "step": 8587 }, { "epoch": 0.1492812320742582, "grad_norm": 1.9302125156969003, "learning_rate": 9.631612670053187e-07, "loss": 0.3676, "step": 8588 }, { "epoch": 0.14929861461176103, "grad_norm": 1.4148140035100356, "learning_rate": 9.63150661531707e-07, "loss": 0.5016, "step": 8589 }, { "epoch": 0.14931599714926386, "grad_norm": 0.944798904581799, "learning_rate": 9.631400545901202e-07, "loss": 0.2073, "step": 8590 }, { "epoch": 0.1493333796867667, "grad_norm": 4.2973564535960245, "learning_rate": 9.631294461805925e-07, "loss": 0.6304, "step": 8591 }, { "epoch": 0.1493507622242695, "grad_norm": 1.4252433601454066, "learning_rate": 9.63118836303157e-07, "loss": 0.3976, "step": 8592 }, { "epoch": 0.14936814476177232, "grad_norm": 2.451750310100549, "learning_rate": 9.631082249578476e-07, "loss": 0.7488, "step": 8593 }, { "epoch": 0.14938552729927515, "grad_norm": 2.066195279565616, "learning_rate": 9.63097612144698e-07, "loss": 0.398, "step": 8594 }, { "epoch": 0.14940290983677798, "grad_norm": 1.4660307993522492, "learning_rate": 9.630869978637414e-07, "loss": 0.5264, "step": 8595 }, { "epoch": 0.1494202923742808, "grad_norm": 1.7485766264287355, "learning_rate": 9.63076382115012e-07, "loss": 0.3107, "step": 8596 }, { "epoch": 0.1494376749117836, "grad_norm": 1.393554838294321, "learning_rate": 9.630657648985432e-07, "loss": 0.613, "step": 8597 }, { "epoch": 0.14945505744928644, "grad_norm": 2.26061592237635, "learning_rate": 9.630551462143687e-07, "loss": 0.3433, "step": 8598 }, { "epoch": 0.14947243998678927, "grad_norm": 3.538875726810795, "learning_rate": 9.630445260625217e-07, "loss": 0.7067, "step": 8599 }, { "epoch": 0.1494898225242921, "grad_norm": 2.3397495469940655, "learning_rate": 9.630339044430364e-07, "loss": 0.5729, "step": 8600 }, { "epoch": 0.14950720506179493, "grad_norm": 2.3736429571099062, "learning_rate": 9.630232813559465e-07, "loss": 0.3107, "step": 8601 }, { "epoch": 0.14952458759929774, "grad_norm": 3.088686932214542, "learning_rate": 9.630126568012854e-07, "loss": 0.5023, "step": 8602 }, { "epoch": 0.14954197013680057, "grad_norm": 4.363310288648129, "learning_rate": 9.63002030779087e-07, "loss": 0.6042, "step": 8603 }, { "epoch": 0.1495593526743034, "grad_norm": 1.3913997409482335, "learning_rate": 9.629914032893848e-07, "loss": 0.3932, "step": 8604 }, { "epoch": 0.14957673521180623, "grad_norm": 1.5269912966995216, "learning_rate": 9.629807743322123e-07, "loss": 0.4828, "step": 8605 }, { "epoch": 0.14959411774930906, "grad_norm": 3.7038092685593855, "learning_rate": 9.629701439076036e-07, "loss": 0.5758, "step": 8606 }, { "epoch": 0.14961150028681186, "grad_norm": 2.4413849344288585, "learning_rate": 9.629595120155922e-07, "loss": 0.7025, "step": 8607 }, { "epoch": 0.1496288828243147, "grad_norm": 2.594924021617359, "learning_rate": 9.629488786562119e-07, "loss": 0.3412, "step": 8608 }, { "epoch": 0.14964626536181752, "grad_norm": 1.6294286298974452, "learning_rate": 9.629382438294962e-07, "loss": 0.805, "step": 8609 }, { "epoch": 0.14966364789932035, "grad_norm": 1.5848090281019007, "learning_rate": 9.629276075354788e-07, "loss": 0.5301, "step": 8610 }, { "epoch": 0.14968103043682315, "grad_norm": 1.8282860001789298, "learning_rate": 9.629169697741938e-07, "loss": 0.6661, "step": 8611 }, { "epoch": 0.14969841297432598, "grad_norm": 1.5924764781638368, "learning_rate": 9.629063305456745e-07, "loss": 0.3103, "step": 8612 }, { "epoch": 0.14971579551182881, "grad_norm": 1.7406549198773273, "learning_rate": 9.628956898499548e-07, "loss": 0.6186, "step": 8613 }, { "epoch": 0.14973317804933164, "grad_norm": 2.2088058565892066, "learning_rate": 9.628850476870684e-07, "loss": 0.4579, "step": 8614 }, { "epoch": 0.14975056058683447, "grad_norm": 1.2216426278220283, "learning_rate": 9.628744040570492e-07, "loss": 0.3314, "step": 8615 }, { "epoch": 0.14976794312433728, "grad_norm": 2.052149599765475, "learning_rate": 9.628637589599305e-07, "loss": 0.8186, "step": 8616 }, { "epoch": 0.1497853256618401, "grad_norm": 2.156024259225817, "learning_rate": 9.628531123957464e-07, "loss": 0.3834, "step": 8617 }, { "epoch": 0.14980270819934294, "grad_norm": 1.707456561287777, "learning_rate": 9.628424643645306e-07, "loss": 0.7632, "step": 8618 }, { "epoch": 0.14982009073684577, "grad_norm": 1.5750231625446391, "learning_rate": 9.628318148663168e-07, "loss": 0.3747, "step": 8619 }, { "epoch": 0.1498374732743486, "grad_norm": 1.4482641683947077, "learning_rate": 9.628211639011388e-07, "loss": 0.286, "step": 8620 }, { "epoch": 0.1498548558118514, "grad_norm": 2.312954734787984, "learning_rate": 9.628105114690301e-07, "loss": 0.4526, "step": 8621 }, { "epoch": 0.14987223834935423, "grad_norm": 2.0033941179226953, "learning_rate": 9.62799857570025e-07, "loss": 0.4052, "step": 8622 }, { "epoch": 0.14988962088685706, "grad_norm": 1.522471661558115, "learning_rate": 9.627892022041567e-07, "loss": 0.3944, "step": 8623 }, { "epoch": 0.1499070034243599, "grad_norm": 2.1947223919043553, "learning_rate": 9.627785453714592e-07, "loss": 0.5448, "step": 8624 }, { "epoch": 0.14992438596186272, "grad_norm": 1.673664879804321, "learning_rate": 9.627678870719665e-07, "loss": 0.3392, "step": 8625 }, { "epoch": 0.14994176849936552, "grad_norm": 1.2494022673149263, "learning_rate": 9.627572273057118e-07, "loss": 0.2543, "step": 8626 }, { "epoch": 0.14995915103686835, "grad_norm": 1.5396914301041804, "learning_rate": 9.627465660727297e-07, "loss": 0.3369, "step": 8627 }, { "epoch": 0.14997653357437118, "grad_norm": 1.4651681960442895, "learning_rate": 9.627359033730532e-07, "loss": 0.3752, "step": 8628 }, { "epoch": 0.14999391611187401, "grad_norm": 2.3259132001354588, "learning_rate": 9.627252392067166e-07, "loss": 0.2339, "step": 8629 }, { "epoch": 0.15001129864937685, "grad_norm": 5.269880158653098, "learning_rate": 9.627145735737536e-07, "loss": 0.4964, "step": 8630 }, { "epoch": 0.15002868118687965, "grad_norm": 1.5487714760886353, "learning_rate": 9.62703906474198e-07, "loss": 0.4055, "step": 8631 }, { "epoch": 0.15004606372438248, "grad_norm": 4.28091811170568, "learning_rate": 9.626932379080835e-07, "loss": 0.5241, "step": 8632 }, { "epoch": 0.1500634462618853, "grad_norm": 2.3251949398377905, "learning_rate": 9.626825678754438e-07, "loss": 0.5524, "step": 8633 }, { "epoch": 0.15008082879938814, "grad_norm": 4.591174239422941, "learning_rate": 9.626718963763133e-07, "loss": 0.5651, "step": 8634 }, { "epoch": 0.15009821133689097, "grad_norm": 2.129997034551789, "learning_rate": 9.626612234107252e-07, "loss": 0.4931, "step": 8635 }, { "epoch": 0.15011559387439377, "grad_norm": 3.439739119659359, "learning_rate": 9.626505489787134e-07, "loss": 0.3331, "step": 8636 }, { "epoch": 0.1501329764118966, "grad_norm": 1.7408857792897898, "learning_rate": 9.62639873080312e-07, "loss": 0.5071, "step": 8637 }, { "epoch": 0.15015035894939943, "grad_norm": 2.1150951493235928, "learning_rate": 9.626291957155548e-07, "loss": 0.4524, "step": 8638 }, { "epoch": 0.15016774148690226, "grad_norm": 1.5220617871312085, "learning_rate": 9.626185168844756e-07, "loss": 0.4949, "step": 8639 }, { "epoch": 0.1501851240244051, "grad_norm": 2.2953623319914565, "learning_rate": 9.62607836587108e-07, "loss": 0.4794, "step": 8640 }, { "epoch": 0.1502025065619079, "grad_norm": 2.179166136270811, "learning_rate": 9.625971548234862e-07, "loss": 0.4848, "step": 8641 }, { "epoch": 0.15021988909941072, "grad_norm": 2.2218795159607025, "learning_rate": 9.625864715936437e-07, "loss": 0.661, "step": 8642 }, { "epoch": 0.15023727163691356, "grad_norm": 2.316530612516901, "learning_rate": 9.62575786897615e-07, "loss": 0.3758, "step": 8643 }, { "epoch": 0.15025465417441639, "grad_norm": 1.8807177916677549, "learning_rate": 9.625651007354331e-07, "loss": 0.3664, "step": 8644 }, { "epoch": 0.15027203671191922, "grad_norm": 1.6187911017085639, "learning_rate": 9.625544131071327e-07, "loss": 0.2425, "step": 8645 }, { "epoch": 0.15028941924942202, "grad_norm": 1.7449134880814217, "learning_rate": 9.62543724012747e-07, "loss": 0.3876, "step": 8646 }, { "epoch": 0.15030680178692485, "grad_norm": 1.326596610218587, "learning_rate": 9.625330334523105e-07, "loss": 0.4245, "step": 8647 }, { "epoch": 0.15032418432442768, "grad_norm": 1.8216898439881162, "learning_rate": 9.625223414258565e-07, "loss": 0.5381, "step": 8648 }, { "epoch": 0.1503415668619305, "grad_norm": 1.1754944733592914, "learning_rate": 9.625116479334192e-07, "loss": 0.5307, "step": 8649 }, { "epoch": 0.15035894939943334, "grad_norm": 2.0284485317458527, "learning_rate": 9.625009529750325e-07, "loss": 0.4387, "step": 8650 }, { "epoch": 0.15037633193693614, "grad_norm": 1.941932419766614, "learning_rate": 9.6249025655073e-07, "loss": 0.5064, "step": 8651 }, { "epoch": 0.15039371447443897, "grad_norm": 1.9470795028077377, "learning_rate": 9.62479558660546e-07, "loss": 0.5761, "step": 8652 }, { "epoch": 0.1504110970119418, "grad_norm": 2.02455472634634, "learning_rate": 9.624688593045142e-07, "loss": 0.216, "step": 8653 }, { "epoch": 0.15042847954944463, "grad_norm": 2.009915306372647, "learning_rate": 9.624581584826687e-07, "loss": 0.3588, "step": 8654 }, { "epoch": 0.15044586208694746, "grad_norm": 1.5218987422396584, "learning_rate": 9.624474561950432e-07, "loss": 0.2424, "step": 8655 }, { "epoch": 0.15046324462445027, "grad_norm": 3.5450213940817417, "learning_rate": 9.624367524416716e-07, "loss": 0.4948, "step": 8656 }, { "epoch": 0.1504806271619531, "grad_norm": 2.7060636689376882, "learning_rate": 9.62426047222588e-07, "loss": 0.4681, "step": 8657 }, { "epoch": 0.15049800969945593, "grad_norm": 1.7066755014782253, "learning_rate": 9.624153405378264e-07, "loss": 0.4215, "step": 8658 }, { "epoch": 0.15051539223695876, "grad_norm": 2.0414768648657846, "learning_rate": 9.624046323874203e-07, "loss": 0.2726, "step": 8659 }, { "epoch": 0.1505327747744616, "grad_norm": 3.5469325469548814, "learning_rate": 9.623939227714042e-07, "loss": 0.6326, "step": 8660 }, { "epoch": 0.1505501573119644, "grad_norm": 1.5006528078325456, "learning_rate": 9.623832116898116e-07, "loss": 0.4237, "step": 8661 }, { "epoch": 0.15056753984946722, "grad_norm": 2.2460012748905127, "learning_rate": 9.623724991426765e-07, "loss": 0.5424, "step": 8662 }, { "epoch": 0.15058492238697005, "grad_norm": 2.079554166426824, "learning_rate": 9.623617851300332e-07, "loss": 0.2525, "step": 8663 }, { "epoch": 0.15060230492447288, "grad_norm": 2.190683618646557, "learning_rate": 9.623510696519155e-07, "loss": 0.3933, "step": 8664 }, { "epoch": 0.1506196874619757, "grad_norm": 1.5450298876715345, "learning_rate": 9.62340352708357e-07, "loss": 0.4116, "step": 8665 }, { "epoch": 0.1506370699994785, "grad_norm": 1.4656031396353248, "learning_rate": 9.623296342993923e-07, "loss": 0.7268, "step": 8666 }, { "epoch": 0.15065445253698134, "grad_norm": 1.5087607584662295, "learning_rate": 9.623189144250548e-07, "loss": 0.3084, "step": 8667 }, { "epoch": 0.15067183507448417, "grad_norm": 1.767258706821536, "learning_rate": 9.623081930853786e-07, "loss": 0.3301, "step": 8668 }, { "epoch": 0.150689217611987, "grad_norm": 1.548933975685582, "learning_rate": 9.62297470280398e-07, "loss": 0.8201, "step": 8669 }, { "epoch": 0.15070660014948983, "grad_norm": 1.5342633674735953, "learning_rate": 9.62286746010147e-07, "loss": 0.2147, "step": 8670 }, { "epoch": 0.15072398268699264, "grad_norm": 1.9031744087252682, "learning_rate": 9.622760202746588e-07, "loss": 0.4026, "step": 8671 }, { "epoch": 0.15074136522449547, "grad_norm": 1.4704267058692804, "learning_rate": 9.622652930739685e-07, "loss": 0.2389, "step": 8672 }, { "epoch": 0.1507587477619983, "grad_norm": 2.041700527217481, "learning_rate": 9.622545644081092e-07, "loss": 0.613, "step": 8673 }, { "epoch": 0.15077613029950113, "grad_norm": 1.0233145501837493, "learning_rate": 9.622438342771156e-07, "loss": 0.5081, "step": 8674 }, { "epoch": 0.15079351283700396, "grad_norm": 1.8397091277974884, "learning_rate": 9.622331026810211e-07, "loss": 0.487, "step": 8675 }, { "epoch": 0.15081089537450676, "grad_norm": 1.9106144321854197, "learning_rate": 9.622223696198602e-07, "loss": 0.4601, "step": 8676 }, { "epoch": 0.1508282779120096, "grad_norm": 3.178147090542042, "learning_rate": 9.622116350936664e-07, "loss": 0.6811, "step": 8677 }, { "epoch": 0.15084566044951242, "grad_norm": 1.5553501424503113, "learning_rate": 9.622008991024743e-07, "loss": 0.4533, "step": 8678 }, { "epoch": 0.15086304298701525, "grad_norm": 1.8039120380039009, "learning_rate": 9.621901616463178e-07, "loss": 0.3456, "step": 8679 }, { "epoch": 0.15088042552451808, "grad_norm": 1.3080979004640136, "learning_rate": 9.621794227252304e-07, "loss": 0.5825, "step": 8680 }, { "epoch": 0.15089780806202088, "grad_norm": 2.0246078844359894, "learning_rate": 9.62168682339247e-07, "loss": 1.0741, "step": 8681 }, { "epoch": 0.1509151905995237, "grad_norm": 1.538851748691322, "learning_rate": 9.621579404884009e-07, "loss": 0.2847, "step": 8682 }, { "epoch": 0.15093257313702654, "grad_norm": 2.3559605638595227, "learning_rate": 9.621471971727264e-07, "loss": 0.6615, "step": 8683 }, { "epoch": 0.15094995567452937, "grad_norm": 1.8401738802206267, "learning_rate": 9.621364523922577e-07, "loss": 0.523, "step": 8684 }, { "epoch": 0.1509673382120322, "grad_norm": 1.4731224114493728, "learning_rate": 9.621257061470288e-07, "loss": 0.5129, "step": 8685 }, { "epoch": 0.150984720749535, "grad_norm": 3.4655176027438346, "learning_rate": 9.621149584370736e-07, "loss": 0.4899, "step": 8686 }, { "epoch": 0.15100210328703784, "grad_norm": 2.1558355321450877, "learning_rate": 9.62104209262426e-07, "loss": 0.5977, "step": 8687 }, { "epoch": 0.15101948582454067, "grad_norm": 2.0124168372747397, "learning_rate": 9.620934586231207e-07, "loss": 0.6814, "step": 8688 }, { "epoch": 0.1510368683620435, "grad_norm": 2.4114000116125096, "learning_rate": 9.620827065191912e-07, "loss": 0.4446, "step": 8689 }, { "epoch": 0.15105425089954633, "grad_norm": 2.8213792676932457, "learning_rate": 9.620719529506718e-07, "loss": 0.633, "step": 8690 }, { "epoch": 0.15107163343704913, "grad_norm": 1.4995773105369619, "learning_rate": 9.620611979175967e-07, "loss": 0.5519, "step": 8691 }, { "epoch": 0.15108901597455196, "grad_norm": 2.225908785274541, "learning_rate": 9.620504414199998e-07, "loss": 0.5054, "step": 8692 }, { "epoch": 0.1511063985120548, "grad_norm": 2.812012127062491, "learning_rate": 9.620396834579152e-07, "loss": 0.6393, "step": 8693 }, { "epoch": 0.15112378104955762, "grad_norm": 2.7826091563683635, "learning_rate": 9.62028924031377e-07, "loss": 0.4153, "step": 8694 }, { "epoch": 0.15114116358706045, "grad_norm": 2.1516035346934492, "learning_rate": 9.620181631404194e-07, "loss": 0.7147, "step": 8695 }, { "epoch": 0.15115854612456325, "grad_norm": 1.6795615108334647, "learning_rate": 9.620074007850765e-07, "loss": 0.6973, "step": 8696 }, { "epoch": 0.15117592866206608, "grad_norm": 1.754138828266891, "learning_rate": 9.619966369653824e-07, "loss": 0.5691, "step": 8697 }, { "epoch": 0.1511933111995689, "grad_norm": 2.8388683849493526, "learning_rate": 9.61985871681371e-07, "loss": 0.71, "step": 8698 }, { "epoch": 0.15121069373707174, "grad_norm": 3.35237721142635, "learning_rate": 9.619751049330768e-07, "loss": 0.3431, "step": 8699 }, { "epoch": 0.15122807627457457, "grad_norm": 2.1826438052132904, "learning_rate": 9.61964336720534e-07, "loss": 0.5633, "step": 8700 }, { "epoch": 0.15124545881207738, "grad_norm": 1.896737834051703, "learning_rate": 9.61953567043776e-07, "loss": 0.3375, "step": 8701 }, { "epoch": 0.1512628413495802, "grad_norm": 2.773014101204403, "learning_rate": 9.619427959028374e-07, "loss": 0.3339, "step": 8702 }, { "epoch": 0.15128022388708304, "grad_norm": 1.8342648721317485, "learning_rate": 9.619320232977525e-07, "loss": 0.2328, "step": 8703 }, { "epoch": 0.15129760642458587, "grad_norm": 1.5735068755235182, "learning_rate": 9.619212492285553e-07, "loss": 0.2835, "step": 8704 }, { "epoch": 0.1513149889620887, "grad_norm": 1.7402291645652068, "learning_rate": 9.619104736952798e-07, "loss": 0.4382, "step": 8705 }, { "epoch": 0.1513323714995915, "grad_norm": 1.575184669831404, "learning_rate": 9.618996966979605e-07, "loss": 0.525, "step": 8706 }, { "epoch": 0.15134975403709433, "grad_norm": 1.207749691645176, "learning_rate": 9.618889182366312e-07, "loss": 0.5158, "step": 8707 }, { "epoch": 0.15136713657459716, "grad_norm": 1.750100897448719, "learning_rate": 9.61878138311326e-07, "loss": 0.3768, "step": 8708 }, { "epoch": 0.1513845191121, "grad_norm": 2.09000364182675, "learning_rate": 9.618673569220797e-07, "loss": 0.9599, "step": 8709 }, { "epoch": 0.15140190164960282, "grad_norm": 1.7856514703242694, "learning_rate": 9.618565740689257e-07, "loss": 0.3479, "step": 8710 }, { "epoch": 0.15141928418710562, "grad_norm": 1.3906781212732817, "learning_rate": 9.618457897518987e-07, "loss": 0.3409, "step": 8711 }, { "epoch": 0.15143666672460845, "grad_norm": 1.9800450453713077, "learning_rate": 9.618350039710325e-07, "loss": 0.3517, "step": 8712 }, { "epoch": 0.15145404926211128, "grad_norm": 1.4541476058556517, "learning_rate": 9.618242167263617e-07, "loss": 0.4525, "step": 8713 }, { "epoch": 0.15147143179961411, "grad_norm": 1.9021935609226488, "learning_rate": 9.618134280179203e-07, "loss": 0.6181, "step": 8714 }, { "epoch": 0.15148881433711694, "grad_norm": 2.106998116421517, "learning_rate": 9.618026378457423e-07, "loss": 0.4515, "step": 8715 }, { "epoch": 0.15150619687461975, "grad_norm": 1.9518004430947502, "learning_rate": 9.617918462098623e-07, "loss": 0.43, "step": 8716 }, { "epoch": 0.15152357941212258, "grad_norm": 2.2195799913825525, "learning_rate": 9.61781053110314e-07, "loss": 0.4377, "step": 8717 }, { "epoch": 0.1515409619496254, "grad_norm": 1.4531383083531086, "learning_rate": 9.617702585471322e-07, "loss": 0.6801, "step": 8718 }, { "epoch": 0.15155834448712824, "grad_norm": 2.9138194242624933, "learning_rate": 9.617594625203505e-07, "loss": 0.5672, "step": 8719 }, { "epoch": 0.15157572702463107, "grad_norm": 2.5893639817891883, "learning_rate": 9.617486650300035e-07, "loss": 0.2669, "step": 8720 }, { "epoch": 0.15159310956213387, "grad_norm": 2.3939337023567044, "learning_rate": 9.617378660761254e-07, "loss": 0.6487, "step": 8721 }, { "epoch": 0.1516104920996367, "grad_norm": 2.419398031895966, "learning_rate": 9.617270656587503e-07, "loss": 0.631, "step": 8722 }, { "epoch": 0.15162787463713953, "grad_norm": 1.9029411132797578, "learning_rate": 9.617162637779126e-07, "loss": 0.4983, "step": 8723 }, { "epoch": 0.15164525717464236, "grad_norm": 1.8636127005553556, "learning_rate": 9.617054604336463e-07, "loss": 0.4966, "step": 8724 }, { "epoch": 0.1516626397121452, "grad_norm": 3.0126706825577294, "learning_rate": 9.61694655625986e-07, "loss": 0.4501, "step": 8725 }, { "epoch": 0.151680022249648, "grad_norm": 1.5007030312380827, "learning_rate": 9.616838493549655e-07, "loss": 0.4523, "step": 8726 }, { "epoch": 0.15169740478715082, "grad_norm": 2.2264801982219953, "learning_rate": 9.616730416206193e-07, "loss": 0.3858, "step": 8727 }, { "epoch": 0.15171478732465365, "grad_norm": 2.7778687422611705, "learning_rate": 9.616622324229816e-07, "loss": 0.407, "step": 8728 }, { "epoch": 0.15173216986215649, "grad_norm": 2.1545552957113974, "learning_rate": 9.61651421762087e-07, "loss": 0.5466, "step": 8729 }, { "epoch": 0.15174955239965932, "grad_norm": 2.2667915196315107, "learning_rate": 9.616406096379692e-07, "loss": 0.5986, "step": 8730 }, { "epoch": 0.15176693493716212, "grad_norm": 1.8892923953411518, "learning_rate": 9.616297960506629e-07, "loss": 0.6189, "step": 8731 }, { "epoch": 0.15178431747466495, "grad_norm": 2.581569159644953, "learning_rate": 9.61618981000202e-07, "loss": 0.5286, "step": 8732 }, { "epoch": 0.15180170001216778, "grad_norm": 1.8370261663715324, "learning_rate": 9.61608164486621e-07, "loss": 0.3514, "step": 8733 }, { "epoch": 0.1518190825496706, "grad_norm": 1.9711466034519882, "learning_rate": 9.615973465099542e-07, "loss": 0.7102, "step": 8734 }, { "epoch": 0.15183646508717344, "grad_norm": 2.1629312827564666, "learning_rate": 9.61586527070236e-07, "loss": 0.4561, "step": 8735 }, { "epoch": 0.15185384762467624, "grad_norm": 2.633284241325648, "learning_rate": 9.615757061675006e-07, "loss": 0.5159, "step": 8736 }, { "epoch": 0.15187123016217907, "grad_norm": 1.8997691894307496, "learning_rate": 9.61564883801782e-07, "loss": 0.5312, "step": 8737 }, { "epoch": 0.1518886126996819, "grad_norm": 1.4658682576303428, "learning_rate": 9.615540599731146e-07, "loss": 0.4931, "step": 8738 }, { "epoch": 0.15190599523718473, "grad_norm": 2.019076185413295, "learning_rate": 9.615432346815332e-07, "loss": 0.6411, "step": 8739 }, { "epoch": 0.15192337777468756, "grad_norm": 1.5340719216376033, "learning_rate": 9.615324079270716e-07, "loss": 0.5973, "step": 8740 }, { "epoch": 0.15194076031219036, "grad_norm": 2.2396934243814277, "learning_rate": 9.615215797097641e-07, "loss": 0.301, "step": 8741 }, { "epoch": 0.1519581428496932, "grad_norm": 2.202355245478878, "learning_rate": 9.615107500296456e-07, "loss": 0.4091, "step": 8742 }, { "epoch": 0.15197552538719603, "grad_norm": 1.9019160087135984, "learning_rate": 9.614999188867498e-07, "loss": 0.675, "step": 8743 }, { "epoch": 0.15199290792469886, "grad_norm": 5.248804948467477, "learning_rate": 9.614890862811114e-07, "loss": 0.5259, "step": 8744 }, { "epoch": 0.15201029046220169, "grad_norm": 1.7626542213248586, "learning_rate": 9.614782522127644e-07, "loss": 0.6006, "step": 8745 }, { "epoch": 0.1520276729997045, "grad_norm": 3.914371522920595, "learning_rate": 9.614674166817434e-07, "loss": 0.5534, "step": 8746 }, { "epoch": 0.15204505553720732, "grad_norm": 1.8135556132123185, "learning_rate": 9.614565796880827e-07, "loss": 0.3697, "step": 8747 }, { "epoch": 0.15206243807471015, "grad_norm": 2.6044596001991773, "learning_rate": 9.614457412318164e-07, "loss": 0.3445, "step": 8748 }, { "epoch": 0.15207982061221298, "grad_norm": 1.9879064974642018, "learning_rate": 9.614349013129794e-07, "loss": 0.5739, "step": 8749 }, { "epoch": 0.15209720314971578, "grad_norm": 3.609667784101776, "learning_rate": 9.614240599316055e-07, "loss": 0.3405, "step": 8750 }, { "epoch": 0.1521145856872186, "grad_norm": 1.710381851753693, "learning_rate": 9.614132170877293e-07, "loss": 0.511, "step": 8751 }, { "epoch": 0.15213196822472144, "grad_norm": 2.198682877871259, "learning_rate": 9.614023727813853e-07, "loss": 0.7287, "step": 8752 }, { "epoch": 0.15214935076222427, "grad_norm": 5.87325468921817, "learning_rate": 9.613915270126075e-07, "loss": 0.6215, "step": 8753 }, { "epoch": 0.1521667332997271, "grad_norm": 2.0149222224183028, "learning_rate": 9.613806797814308e-07, "loss": 0.3771, "step": 8754 }, { "epoch": 0.1521841158372299, "grad_norm": 1.9473600153625272, "learning_rate": 9.613698310878894e-07, "loss": 0.3151, "step": 8755 }, { "epoch": 0.15220149837473274, "grad_norm": 2.1535307293685273, "learning_rate": 9.613589809320171e-07, "loss": 0.5794, "step": 8756 }, { "epoch": 0.15221888091223557, "grad_norm": 2.467777157918797, "learning_rate": 9.61348129313849e-07, "loss": 0.7622, "step": 8757 }, { "epoch": 0.1522362634497384, "grad_norm": 1.9937111069692273, "learning_rate": 9.613372762334193e-07, "loss": 0.5693, "step": 8758 }, { "epoch": 0.15225364598724123, "grad_norm": 2.392569781251676, "learning_rate": 9.613264216907622e-07, "loss": 0.5614, "step": 8759 }, { "epoch": 0.15227102852474403, "grad_norm": 1.9407693519564617, "learning_rate": 9.613155656859125e-07, "loss": 0.5255, "step": 8760 }, { "epoch": 0.15228841106224686, "grad_norm": 1.5847481761287165, "learning_rate": 9.613047082189042e-07, "loss": 0.6146, "step": 8761 }, { "epoch": 0.1523057935997497, "grad_norm": 0.8235632384397912, "learning_rate": 9.612938492897717e-07, "loss": 0.2762, "step": 8762 }, { "epoch": 0.15232317613725252, "grad_norm": 1.6572426986576787, "learning_rate": 9.612829888985498e-07, "loss": 0.4992, "step": 8763 }, { "epoch": 0.15234055867475535, "grad_norm": 1.230106263766007, "learning_rate": 9.612721270452729e-07, "loss": 0.5699, "step": 8764 }, { "epoch": 0.15235794121225815, "grad_norm": 2.760798789851657, "learning_rate": 9.61261263729975e-07, "loss": 0.3279, "step": 8765 }, { "epoch": 0.15237532374976098, "grad_norm": 1.4333432741247434, "learning_rate": 9.612503989526909e-07, "loss": 0.5805, "step": 8766 }, { "epoch": 0.1523927062872638, "grad_norm": 1.5715718619471932, "learning_rate": 9.612395327134548e-07, "loss": 0.274, "step": 8767 }, { "epoch": 0.15241008882476664, "grad_norm": 1.4454427627343212, "learning_rate": 9.612286650123014e-07, "loss": 0.4473, "step": 8768 }, { "epoch": 0.15242747136226947, "grad_norm": 2.470625337281728, "learning_rate": 9.61217795849265e-07, "loss": 0.5184, "step": 8769 }, { "epoch": 0.15244485389977228, "grad_norm": 1.758026482097191, "learning_rate": 9.6120692522438e-07, "loss": 0.2928, "step": 8770 }, { "epoch": 0.1524622364372751, "grad_norm": 3.1018229201376206, "learning_rate": 9.611960531376807e-07, "loss": 0.6822, "step": 8771 }, { "epoch": 0.15247961897477794, "grad_norm": 2.047167745173268, "learning_rate": 9.61185179589202e-07, "loss": 0.3771, "step": 8772 }, { "epoch": 0.15249700151228077, "grad_norm": 2.5758178062297756, "learning_rate": 9.61174304578978e-07, "loss": 0.6112, "step": 8773 }, { "epoch": 0.1525143840497836, "grad_norm": 1.1550380711351904, "learning_rate": 9.611634281070435e-07, "loss": 0.5112, "step": 8774 }, { "epoch": 0.1525317665872864, "grad_norm": 3.1789360036159597, "learning_rate": 9.611525501734326e-07, "loss": 0.2801, "step": 8775 }, { "epoch": 0.15254914912478923, "grad_norm": 2.416641977579984, "learning_rate": 9.611416707781801e-07, "loss": 0.4831, "step": 8776 }, { "epoch": 0.15256653166229206, "grad_norm": 1.6017025925896666, "learning_rate": 9.6113078992132e-07, "loss": 0.5628, "step": 8777 }, { "epoch": 0.1525839141997949, "grad_norm": 1.7606232915635238, "learning_rate": 9.611199076028875e-07, "loss": 0.6484, "step": 8778 }, { "epoch": 0.15260129673729772, "grad_norm": 2.8002984010371237, "learning_rate": 9.611090238229167e-07, "loss": 0.4431, "step": 8779 }, { "epoch": 0.15261867927480052, "grad_norm": 1.4247442821018168, "learning_rate": 9.610981385814418e-07, "loss": 0.3979, "step": 8780 }, { "epoch": 0.15263606181230335, "grad_norm": 3.520694592353857, "learning_rate": 9.61087251878498e-07, "loss": 0.39, "step": 8781 }, { "epoch": 0.15265344434980618, "grad_norm": 1.6458745987910302, "learning_rate": 9.610763637141192e-07, "loss": 0.245, "step": 8782 }, { "epoch": 0.152670826887309, "grad_norm": 2.1717212917367816, "learning_rate": 9.610654740883401e-07, "loss": 0.6602, "step": 8783 }, { "epoch": 0.15268820942481184, "grad_norm": 1.663414429922829, "learning_rate": 9.610545830011953e-07, "loss": 0.2393, "step": 8784 }, { "epoch": 0.15270559196231465, "grad_norm": 2.3122822848959355, "learning_rate": 9.610436904527193e-07, "loss": 0.4956, "step": 8785 }, { "epoch": 0.15272297449981748, "grad_norm": 1.693872762734198, "learning_rate": 9.610327964429465e-07, "loss": 0.4264, "step": 8786 }, { "epoch": 0.1527403570373203, "grad_norm": 1.379945422858696, "learning_rate": 9.610219009719117e-07, "loss": 0.6103, "step": 8787 }, { "epoch": 0.15275773957482314, "grad_norm": 1.8910474509203214, "learning_rate": 9.610110040396489e-07, "loss": 0.2946, "step": 8788 }, { "epoch": 0.15277512211232597, "grad_norm": 1.8853407162387061, "learning_rate": 9.610001056461932e-07, "loss": 0.4164, "step": 8789 }, { "epoch": 0.15279250464982877, "grad_norm": 1.9215038159408453, "learning_rate": 9.609892057915789e-07, "loss": 0.6358, "step": 8790 }, { "epoch": 0.1528098871873316, "grad_norm": 1.4783418031865687, "learning_rate": 9.609783044758407e-07, "loss": 0.4652, "step": 8791 }, { "epoch": 0.15282726972483443, "grad_norm": 2.8210572216433283, "learning_rate": 9.60967401699013e-07, "loss": 0.5572, "step": 8792 }, { "epoch": 0.15284465226233726, "grad_norm": 1.4552729794490404, "learning_rate": 9.609564974611303e-07, "loss": 0.3069, "step": 8793 }, { "epoch": 0.1528620347998401, "grad_norm": 2.070329931560421, "learning_rate": 9.609455917622272e-07, "loss": 0.5322, "step": 8794 }, { "epoch": 0.1528794173373429, "grad_norm": 2.2272406341944273, "learning_rate": 9.609346846023385e-07, "loss": 0.7474, "step": 8795 }, { "epoch": 0.15289679987484572, "grad_norm": 2.8920067205600173, "learning_rate": 9.609237759814985e-07, "loss": 0.3931, "step": 8796 }, { "epoch": 0.15291418241234855, "grad_norm": 1.7311021155197077, "learning_rate": 9.609128658997419e-07, "loss": 0.5198, "step": 8797 }, { "epoch": 0.15293156494985138, "grad_norm": 1.5163284255348577, "learning_rate": 9.60901954357103e-07, "loss": 0.5065, "step": 8798 }, { "epoch": 0.15294894748735421, "grad_norm": 2.390711502364542, "learning_rate": 9.60891041353617e-07, "loss": 0.3093, "step": 8799 }, { "epoch": 0.15296633002485702, "grad_norm": 1.6457474502946812, "learning_rate": 9.608801268893178e-07, "loss": 0.4241, "step": 8800 }, { "epoch": 0.15298371256235985, "grad_norm": 1.5162560997857804, "learning_rate": 9.608692109642404e-07, "loss": 0.5985, "step": 8801 }, { "epoch": 0.15300109509986268, "grad_norm": 2.0473818124229513, "learning_rate": 9.608582935784193e-07, "loss": 0.4339, "step": 8802 }, { "epoch": 0.1530184776373655, "grad_norm": 2.4480888285414637, "learning_rate": 9.608473747318893e-07, "loss": 0.5302, "step": 8803 }, { "epoch": 0.15303586017486834, "grad_norm": 1.9411979514315572, "learning_rate": 9.608364544246846e-07, "loss": 0.3074, "step": 8804 }, { "epoch": 0.15305324271237114, "grad_norm": 1.0862981629462825, "learning_rate": 9.6082553265684e-07, "loss": 0.2232, "step": 8805 }, { "epoch": 0.15307062524987397, "grad_norm": 2.5986039166140156, "learning_rate": 9.608146094283902e-07, "loss": 0.7017, "step": 8806 }, { "epoch": 0.1530880077873768, "grad_norm": 2.182718258905757, "learning_rate": 9.6080368473937e-07, "loss": 0.5582, "step": 8807 }, { "epoch": 0.15310539032487963, "grad_norm": 1.5868830095903061, "learning_rate": 9.607927585898136e-07, "loss": 0.547, "step": 8808 }, { "epoch": 0.15312277286238246, "grad_norm": 1.4639189392175498, "learning_rate": 9.607818309797557e-07, "loss": 0.6154, "step": 8809 }, { "epoch": 0.15314015539988526, "grad_norm": 1.7614758115138147, "learning_rate": 9.607709019092314e-07, "loss": 0.5705, "step": 8810 }, { "epoch": 0.1531575379373881, "grad_norm": 1.7391717096223978, "learning_rate": 9.607599713782747e-07, "loss": 0.4893, "step": 8811 }, { "epoch": 0.15317492047489092, "grad_norm": 1.7015964071725902, "learning_rate": 9.607490393869207e-07, "loss": 0.2656, "step": 8812 }, { "epoch": 0.15319230301239375, "grad_norm": 1.7115339558770983, "learning_rate": 9.607381059352038e-07, "loss": 0.4234, "step": 8813 }, { "epoch": 0.15320968554989658, "grad_norm": 2.0693741131722905, "learning_rate": 9.60727171023159e-07, "loss": 0.3345, "step": 8814 }, { "epoch": 0.1532270680873994, "grad_norm": 2.763738747911999, "learning_rate": 9.607162346508203e-07, "loss": 0.4197, "step": 8815 }, { "epoch": 0.15324445062490222, "grad_norm": 1.8159817534088685, "learning_rate": 9.60705296818223e-07, "loss": 0.3485, "step": 8816 }, { "epoch": 0.15326183316240505, "grad_norm": 1.640607922300525, "learning_rate": 9.606943575254013e-07, "loss": 0.5125, "step": 8817 }, { "epoch": 0.15327921569990788, "grad_norm": 2.4046292156517244, "learning_rate": 9.606834167723904e-07, "loss": 0.4368, "step": 8818 }, { "epoch": 0.1532965982374107, "grad_norm": 1.6966846565610028, "learning_rate": 9.606724745592248e-07, "loss": 0.4171, "step": 8819 }, { "epoch": 0.1533139807749135, "grad_norm": 2.2223777963397686, "learning_rate": 9.606615308859386e-07, "loss": 0.7205, "step": 8820 }, { "epoch": 0.15333136331241634, "grad_norm": 1.5263011372162043, "learning_rate": 9.606505857525674e-07, "loss": 0.5489, "step": 8821 }, { "epoch": 0.15334874584991917, "grad_norm": 2.2611714126363913, "learning_rate": 9.606396391591453e-07, "loss": 0.5799, "step": 8822 }, { "epoch": 0.153366128387422, "grad_norm": 1.8388772725888898, "learning_rate": 9.606286911057071e-07, "loss": 0.2951, "step": 8823 }, { "epoch": 0.15338351092492483, "grad_norm": 1.7150540157185141, "learning_rate": 9.606177415922876e-07, "loss": 0.4764, "step": 8824 }, { "epoch": 0.15340089346242763, "grad_norm": 1.2502324331807226, "learning_rate": 9.606067906189215e-07, "loss": 0.4813, "step": 8825 }, { "epoch": 0.15341827599993046, "grad_norm": 1.9073113602425626, "learning_rate": 9.605958381856435e-07, "loss": 0.4399, "step": 8826 }, { "epoch": 0.1534356585374333, "grad_norm": 2.1452062445400655, "learning_rate": 9.605848842924879e-07, "loss": 0.6479, "step": 8827 }, { "epoch": 0.15345304107493613, "grad_norm": 2.3606143785113263, "learning_rate": 9.605739289394902e-07, "loss": 0.686, "step": 8828 }, { "epoch": 0.15347042361243896, "grad_norm": 1.910790405725553, "learning_rate": 9.605629721266847e-07, "loss": 0.3311, "step": 8829 }, { "epoch": 0.15348780614994176, "grad_norm": 1.7502795072985595, "learning_rate": 9.60552013854106e-07, "loss": 0.3245, "step": 8830 }, { "epoch": 0.1535051886874446, "grad_norm": 1.76878998974144, "learning_rate": 9.60541054121789e-07, "loss": 0.3695, "step": 8831 }, { "epoch": 0.15352257122494742, "grad_norm": 1.8686661369540967, "learning_rate": 9.605300929297682e-07, "loss": 0.6179, "step": 8832 }, { "epoch": 0.15353995376245025, "grad_norm": 1.8327551625522018, "learning_rate": 9.60519130278079e-07, "loss": 0.3603, "step": 8833 }, { "epoch": 0.15355733629995308, "grad_norm": 1.7895525073072396, "learning_rate": 9.605081661667553e-07, "loss": 0.5912, "step": 8834 }, { "epoch": 0.15357471883745588, "grad_norm": 1.9414716623632524, "learning_rate": 9.604972005958324e-07, "loss": 0.3991, "step": 8835 }, { "epoch": 0.1535921013749587, "grad_norm": 3.4070753708921937, "learning_rate": 9.60486233565345e-07, "loss": 0.6133, "step": 8836 }, { "epoch": 0.15360948391246154, "grad_norm": 1.7823377267058866, "learning_rate": 9.604752650753276e-07, "loss": 0.4055, "step": 8837 }, { "epoch": 0.15362686644996437, "grad_norm": 1.649182463577543, "learning_rate": 9.604642951258152e-07, "loss": 0.4176, "step": 8838 }, { "epoch": 0.1536442489874672, "grad_norm": 1.9524938167953798, "learning_rate": 9.604533237168426e-07, "loss": 0.393, "step": 8839 }, { "epoch": 0.15366163152497, "grad_norm": 1.9947230402652, "learning_rate": 9.604423508484444e-07, "loss": 0.4333, "step": 8840 }, { "epoch": 0.15367901406247284, "grad_norm": 2.8174672094577664, "learning_rate": 9.604313765206555e-07, "loss": 0.6708, "step": 8841 }, { "epoch": 0.15369639659997567, "grad_norm": 2.0104461647072482, "learning_rate": 9.604204007335107e-07, "loss": 0.5095, "step": 8842 }, { "epoch": 0.1537137791374785, "grad_norm": 3.2408863571264077, "learning_rate": 9.604094234870445e-07, "loss": 0.3009, "step": 8843 }, { "epoch": 0.15373116167498133, "grad_norm": 1.6370795334401067, "learning_rate": 9.60398444781292e-07, "loss": 0.4474, "step": 8844 }, { "epoch": 0.15374854421248413, "grad_norm": 1.7229354293682397, "learning_rate": 9.60387464616288e-07, "loss": 0.3558, "step": 8845 }, { "epoch": 0.15376592674998696, "grad_norm": 1.5444086630685283, "learning_rate": 9.603764829920674e-07, "loss": 0.4786, "step": 8846 }, { "epoch": 0.1537833092874898, "grad_norm": 2.29399946948896, "learning_rate": 9.603654999086646e-07, "loss": 0.8167, "step": 8847 }, { "epoch": 0.15380069182499262, "grad_norm": 1.2592755814513659, "learning_rate": 9.603545153661145e-07, "loss": 0.438, "step": 8848 }, { "epoch": 0.15381807436249545, "grad_norm": 1.1911671633938614, "learning_rate": 9.603435293644522e-07, "loss": 0.3471, "step": 8849 }, { "epoch": 0.15383545689999825, "grad_norm": 2.080523584698401, "learning_rate": 9.603325419037125e-07, "loss": 0.4829, "step": 8850 }, { "epoch": 0.15385283943750108, "grad_norm": 1.704927250459162, "learning_rate": 9.6032155298393e-07, "loss": 0.3611, "step": 8851 }, { "epoch": 0.1538702219750039, "grad_norm": 1.564050041731703, "learning_rate": 9.603105626051397e-07, "loss": 0.6696, "step": 8852 }, { "epoch": 0.15388760451250674, "grad_norm": 1.5800883196059121, "learning_rate": 9.602995707673761e-07, "loss": 0.3137, "step": 8853 }, { "epoch": 0.15390498705000957, "grad_norm": 3.2129149595036135, "learning_rate": 9.602885774706745e-07, "loss": 0.4435, "step": 8854 }, { "epoch": 0.15392236958751238, "grad_norm": 2.6826454656957837, "learning_rate": 9.602775827150698e-07, "loss": 0.5763, "step": 8855 }, { "epoch": 0.1539397521250152, "grad_norm": 1.5673221815581384, "learning_rate": 9.602665865005963e-07, "loss": 0.6843, "step": 8856 }, { "epoch": 0.15395713466251804, "grad_norm": 1.4397601035405405, "learning_rate": 9.602555888272892e-07, "loss": 0.2513, "step": 8857 }, { "epoch": 0.15397451720002087, "grad_norm": 2.4585981177583935, "learning_rate": 9.602445896951832e-07, "loss": 0.4629, "step": 8858 }, { "epoch": 0.1539918997375237, "grad_norm": 2.7274610767184404, "learning_rate": 9.602335891043135e-07, "loss": 0.6337, "step": 8859 }, { "epoch": 0.1540092822750265, "grad_norm": 1.9851919604234591, "learning_rate": 9.602225870547145e-07, "loss": 0.3545, "step": 8860 }, { "epoch": 0.15402666481252933, "grad_norm": 2.5047479635321355, "learning_rate": 9.602115835464215e-07, "loss": 0.218, "step": 8861 }, { "epoch": 0.15404404735003216, "grad_norm": 1.7741219807673771, "learning_rate": 9.602005785794691e-07, "loss": 0.2993, "step": 8862 }, { "epoch": 0.154061429887535, "grad_norm": 3.196411228233303, "learning_rate": 9.601895721538923e-07, "loss": 0.636, "step": 8863 }, { "epoch": 0.15407881242503782, "grad_norm": 1.2631188158215547, "learning_rate": 9.60178564269726e-07, "loss": 0.4109, "step": 8864 }, { "epoch": 0.15409619496254062, "grad_norm": 1.8100665809486922, "learning_rate": 9.60167554927005e-07, "loss": 0.2204, "step": 8865 }, { "epoch": 0.15411357750004345, "grad_norm": 1.5096237768430583, "learning_rate": 9.601565441257641e-07, "loss": 0.5148, "step": 8866 }, { "epoch": 0.15413096003754628, "grad_norm": 1.6629735129628356, "learning_rate": 9.601455318660385e-07, "loss": 0.5422, "step": 8867 }, { "epoch": 0.1541483425750491, "grad_norm": 2.3147647256944643, "learning_rate": 9.60134518147863e-07, "loss": 0.4281, "step": 8868 }, { "epoch": 0.15416572511255194, "grad_norm": 2.8518125352387385, "learning_rate": 9.601235029712722e-07, "loss": 0.4274, "step": 8869 }, { "epoch": 0.15418310765005475, "grad_norm": 1.3491008905274493, "learning_rate": 9.601124863363012e-07, "loss": 0.4099, "step": 8870 }, { "epoch": 0.15420049018755758, "grad_norm": 1.7646816822065423, "learning_rate": 9.601014682429853e-07, "loss": 0.6227, "step": 8871 }, { "epoch": 0.1542178727250604, "grad_norm": 1.6979132947834514, "learning_rate": 9.600904486913589e-07, "loss": 0.5184, "step": 8872 }, { "epoch": 0.15423525526256324, "grad_norm": 1.5370263617915274, "learning_rate": 9.600794276814572e-07, "loss": 0.4969, "step": 8873 }, { "epoch": 0.15425263780006607, "grad_norm": 3.4274626616337036, "learning_rate": 9.60068405213315e-07, "loss": 0.8492, "step": 8874 }, { "epoch": 0.15427002033756887, "grad_norm": 2.668562558335023, "learning_rate": 9.600573812869674e-07, "loss": 0.6186, "step": 8875 }, { "epoch": 0.1542874028750717, "grad_norm": 1.2489196571519998, "learning_rate": 9.600463559024493e-07, "loss": 0.2869, "step": 8876 }, { "epoch": 0.15430478541257453, "grad_norm": 1.4094615542298834, "learning_rate": 9.600353290597953e-07, "loss": 0.5749, "step": 8877 }, { "epoch": 0.15432216795007736, "grad_norm": 1.2879953772559463, "learning_rate": 9.600243007590408e-07, "loss": 0.3972, "step": 8878 }, { "epoch": 0.1543395504875802, "grad_norm": 1.5993466718048355, "learning_rate": 9.600132710002205e-07, "loss": 0.6496, "step": 8879 }, { "epoch": 0.154356933025083, "grad_norm": 1.230914899654878, "learning_rate": 9.600022397833696e-07, "loss": 0.5051, "step": 8880 }, { "epoch": 0.15437431556258582, "grad_norm": 2.2847087822418484, "learning_rate": 9.599912071085228e-07, "loss": 0.638, "step": 8881 }, { "epoch": 0.15439169810008865, "grad_norm": 1.2001485296499474, "learning_rate": 9.59980172975715e-07, "loss": 0.3788, "step": 8882 }, { "epoch": 0.15440908063759148, "grad_norm": 1.410174757610275, "learning_rate": 9.599691373849816e-07, "loss": 0.3108, "step": 8883 }, { "epoch": 0.15442646317509431, "grad_norm": 2.155584831705974, "learning_rate": 9.599581003363572e-07, "loss": 0.4452, "step": 8884 }, { "epoch": 0.15444384571259712, "grad_norm": 2.405720633214613, "learning_rate": 9.59947061829877e-07, "loss": 0.552, "step": 8885 }, { "epoch": 0.15446122825009995, "grad_norm": 1.868852206595056, "learning_rate": 9.599360218655758e-07, "loss": 0.3393, "step": 8886 }, { "epoch": 0.15447861078760278, "grad_norm": 1.5948779651452396, "learning_rate": 9.59924980443489e-07, "loss": 0.2444, "step": 8887 }, { "epoch": 0.1544959933251056, "grad_norm": 1.9376683532983114, "learning_rate": 9.599139375636508e-07, "loss": 0.2821, "step": 8888 }, { "epoch": 0.1545133758626084, "grad_norm": 3.5038725426655164, "learning_rate": 9.599028932260972e-07, "loss": 0.9753, "step": 8889 }, { "epoch": 0.15453075840011124, "grad_norm": 3.0263223637661176, "learning_rate": 9.598918474308622e-07, "loss": 0.3893, "step": 8890 }, { "epoch": 0.15454814093761407, "grad_norm": 2.369408939238036, "learning_rate": 9.598808001779817e-07, "loss": 0.367, "step": 8891 }, { "epoch": 0.1545655234751169, "grad_norm": 4.803232944649046, "learning_rate": 9.5986975146749e-07, "loss": 0.5911, "step": 8892 }, { "epoch": 0.15458290601261973, "grad_norm": 1.7794296308287876, "learning_rate": 9.598587012994227e-07, "loss": 0.4623, "step": 8893 }, { "epoch": 0.15460028855012253, "grad_norm": 8.027605738823189, "learning_rate": 9.598476496738143e-07, "loss": 0.434, "step": 8894 }, { "epoch": 0.15461767108762536, "grad_norm": 3.496638315101119, "learning_rate": 9.598365965907003e-07, "loss": 0.6099, "step": 8895 }, { "epoch": 0.1546350536251282, "grad_norm": 1.6753211676461364, "learning_rate": 9.598255420501154e-07, "loss": 0.532, "step": 8896 }, { "epoch": 0.15465243616263102, "grad_norm": 2.351340623559683, "learning_rate": 9.598144860520948e-07, "loss": 0.5897, "step": 8897 }, { "epoch": 0.15466981870013385, "grad_norm": 1.984489353210729, "learning_rate": 9.598034285966734e-07, "loss": 0.3337, "step": 8898 }, { "epoch": 0.15468720123763666, "grad_norm": 1.6000842625072493, "learning_rate": 9.597923696838866e-07, "loss": 0.7299, "step": 8899 }, { "epoch": 0.1547045837751395, "grad_norm": 2.7849294280003662, "learning_rate": 9.59781309313769e-07, "loss": 0.7277, "step": 8900 }, { "epoch": 0.15472196631264232, "grad_norm": 1.4973402939837017, "learning_rate": 9.597702474863558e-07, "loss": 0.2728, "step": 8901 }, { "epoch": 0.15473934885014515, "grad_norm": 1.8658923188877887, "learning_rate": 9.597591842016822e-07, "loss": 0.3604, "step": 8902 }, { "epoch": 0.15475673138764798, "grad_norm": 2.55035410588893, "learning_rate": 9.59748119459783e-07, "loss": 0.3425, "step": 8903 }, { "epoch": 0.15477411392515078, "grad_norm": 1.6262583850489445, "learning_rate": 9.597370532606938e-07, "loss": 0.4108, "step": 8904 }, { "epoch": 0.1547914964626536, "grad_norm": 0.985081343987912, "learning_rate": 9.597259856044493e-07, "loss": 0.4794, "step": 8905 }, { "epoch": 0.15480887900015644, "grad_norm": 2.242365497136067, "learning_rate": 9.597149164910845e-07, "loss": 0.5242, "step": 8906 }, { "epoch": 0.15482626153765927, "grad_norm": 1.3542295302880478, "learning_rate": 9.597038459206346e-07, "loss": 0.3199, "step": 8907 }, { "epoch": 0.1548436440751621, "grad_norm": 2.348459928573078, "learning_rate": 9.596927738931344e-07, "loss": 1.0487, "step": 8908 }, { "epoch": 0.1548610266126649, "grad_norm": 2.1243121581639866, "learning_rate": 9.596817004086197e-07, "loss": 0.3961, "step": 8909 }, { "epoch": 0.15487840915016773, "grad_norm": 3.322749618040668, "learning_rate": 9.596706254671249e-07, "loss": 0.4315, "step": 8910 }, { "epoch": 0.15489579168767056, "grad_norm": 5.602611400906543, "learning_rate": 9.596595490686855e-07, "loss": 0.7378, "step": 8911 }, { "epoch": 0.1549131742251734, "grad_norm": 1.870430040881339, "learning_rate": 9.596484712133365e-07, "loss": 0.4145, "step": 8912 }, { "epoch": 0.15493055676267622, "grad_norm": 1.8279080873632827, "learning_rate": 9.59637391901113e-07, "loss": 0.5281, "step": 8913 }, { "epoch": 0.15494793930017903, "grad_norm": 3.4026253316600754, "learning_rate": 9.5962631113205e-07, "loss": 0.7667, "step": 8914 }, { "epoch": 0.15496532183768186, "grad_norm": 2.2727672452872527, "learning_rate": 9.596152289061828e-07, "loss": 0.5121, "step": 8915 }, { "epoch": 0.1549827043751847, "grad_norm": 2.609266862436944, "learning_rate": 9.596041452235464e-07, "loss": 0.5977, "step": 8916 }, { "epoch": 0.15500008691268752, "grad_norm": 3.0635268421652775, "learning_rate": 9.59593060084176e-07, "loss": 0.5744, "step": 8917 }, { "epoch": 0.15501746945019035, "grad_norm": 2.2727328708358083, "learning_rate": 9.595819734881068e-07, "loss": 0.4862, "step": 8918 }, { "epoch": 0.15503485198769315, "grad_norm": 1.6991001459922896, "learning_rate": 9.595708854353737e-07, "loss": 0.3665, "step": 8919 }, { "epoch": 0.15505223452519598, "grad_norm": 1.6232678498428945, "learning_rate": 9.59559795926012e-07, "loss": 0.4803, "step": 8920 }, { "epoch": 0.1550696170626988, "grad_norm": 2.576121269580128, "learning_rate": 9.59548704960057e-07, "loss": 0.5566, "step": 8921 }, { "epoch": 0.15508699960020164, "grad_norm": 1.8596393660866235, "learning_rate": 9.595376125375436e-07, "loss": 0.2915, "step": 8922 }, { "epoch": 0.15510438213770447, "grad_norm": 2.5794649423934057, "learning_rate": 9.595265186585073e-07, "loss": 0.4476, "step": 8923 }, { "epoch": 0.15512176467520727, "grad_norm": 2.6279194344508845, "learning_rate": 9.595154233229828e-07, "loss": 0.4838, "step": 8924 }, { "epoch": 0.1551391472127101, "grad_norm": 1.4010773182144733, "learning_rate": 9.595043265310055e-07, "loss": 0.4089, "step": 8925 }, { "epoch": 0.15515652975021293, "grad_norm": 1.5126970243703655, "learning_rate": 9.594932282826105e-07, "loss": 0.6663, "step": 8926 }, { "epoch": 0.15517391228771577, "grad_norm": 1.734294818260993, "learning_rate": 9.594821285778332e-07, "loss": 0.3402, "step": 8927 }, { "epoch": 0.1551912948252186, "grad_norm": 1.1438336372161466, "learning_rate": 9.594710274167086e-07, "loss": 0.6728, "step": 8928 }, { "epoch": 0.1552086773627214, "grad_norm": 1.774897216303624, "learning_rate": 9.59459924799272e-07, "loss": 0.3303, "step": 8929 }, { "epoch": 0.15522605990022423, "grad_norm": 3.383416449010041, "learning_rate": 9.594488207255582e-07, "loss": 0.4164, "step": 8930 }, { "epoch": 0.15524344243772706, "grad_norm": 1.6292790752225876, "learning_rate": 9.594377151956028e-07, "loss": 0.2903, "step": 8931 }, { "epoch": 0.1552608249752299, "grad_norm": 1.9478354205259185, "learning_rate": 9.594266082094408e-07, "loss": 0.6451, "step": 8932 }, { "epoch": 0.15527820751273272, "grad_norm": 1.9999368910434703, "learning_rate": 9.594154997671078e-07, "loss": 0.5597, "step": 8933 }, { "epoch": 0.15529559005023552, "grad_norm": 2.064276959563231, "learning_rate": 9.594043898686384e-07, "loss": 0.4139, "step": 8934 }, { "epoch": 0.15531297258773835, "grad_norm": 2.1033821187862407, "learning_rate": 9.593932785140684e-07, "loss": 0.5596, "step": 8935 }, { "epoch": 0.15533035512524118, "grad_norm": 1.6772610685192795, "learning_rate": 9.593821657034326e-07, "loss": 0.413, "step": 8936 }, { "epoch": 0.155347737662744, "grad_norm": 1.6533486812057003, "learning_rate": 9.593710514367664e-07, "loss": 0.6511, "step": 8937 }, { "epoch": 0.15536512020024684, "grad_norm": 2.0415525181146275, "learning_rate": 9.593599357141048e-07, "loss": 0.4978, "step": 8938 }, { "epoch": 0.15538250273774964, "grad_norm": 1.1539687145916449, "learning_rate": 9.593488185354833e-07, "loss": 0.5048, "step": 8939 }, { "epoch": 0.15539988527525248, "grad_norm": 1.846686518890554, "learning_rate": 9.593376999009372e-07, "loss": 0.4769, "step": 8940 }, { "epoch": 0.1554172678127553, "grad_norm": 4.448076277562053, "learning_rate": 9.593265798105015e-07, "loss": 0.396, "step": 8941 }, { "epoch": 0.15543465035025814, "grad_norm": 1.90061505612504, "learning_rate": 9.593154582642116e-07, "loss": 0.6347, "step": 8942 }, { "epoch": 0.15545203288776097, "grad_norm": 1.0704829880550315, "learning_rate": 9.593043352621025e-07, "loss": 0.4762, "step": 8943 }, { "epoch": 0.15546941542526377, "grad_norm": 2.2628309711285244, "learning_rate": 9.592932108042097e-07, "loss": 0.5027, "step": 8944 }, { "epoch": 0.1554867979627666, "grad_norm": 2.5535521173023334, "learning_rate": 9.592820848905686e-07, "loss": 0.5152, "step": 8945 }, { "epoch": 0.15550418050026943, "grad_norm": 1.9664887734875363, "learning_rate": 9.592709575212142e-07, "loss": 0.4615, "step": 8946 }, { "epoch": 0.15552156303777226, "grad_norm": 2.193764550954665, "learning_rate": 9.592598286961817e-07, "loss": 0.3992, "step": 8947 }, { "epoch": 0.1555389455752751, "grad_norm": 1.4259294642519496, "learning_rate": 9.592486984155066e-07, "loss": 0.5498, "step": 8948 }, { "epoch": 0.1555563281127779, "grad_norm": 1.7599092983233335, "learning_rate": 9.59237566679224e-07, "loss": 0.4845, "step": 8949 }, { "epoch": 0.15557371065028072, "grad_norm": 1.7940743131137347, "learning_rate": 9.592264334873693e-07, "loss": 0.3714, "step": 8950 }, { "epoch": 0.15559109318778355, "grad_norm": 2.5441470563385855, "learning_rate": 9.592152988399779e-07, "loss": 0.4458, "step": 8951 }, { "epoch": 0.15560847572528638, "grad_norm": 2.068407801689775, "learning_rate": 9.592041627370848e-07, "loss": 0.3527, "step": 8952 }, { "epoch": 0.1556258582627892, "grad_norm": 1.328478111561442, "learning_rate": 9.591930251787255e-07, "loss": 0.2249, "step": 8953 }, { "epoch": 0.15564324080029202, "grad_norm": 1.318957161271876, "learning_rate": 9.591818861649353e-07, "loss": 0.2966, "step": 8954 }, { "epoch": 0.15566062333779485, "grad_norm": 1.8253675251708439, "learning_rate": 9.591707456957494e-07, "loss": 1.2046, "step": 8955 }, { "epoch": 0.15567800587529768, "grad_norm": 3.679247753128866, "learning_rate": 9.591596037712031e-07, "loss": 0.4818, "step": 8956 }, { "epoch": 0.1556953884128005, "grad_norm": 1.9999579150521916, "learning_rate": 9.591484603913319e-07, "loss": 0.4349, "step": 8957 }, { "epoch": 0.15571277095030334, "grad_norm": 1.4790565650685903, "learning_rate": 9.59137315556171e-07, "loss": 0.5548, "step": 8958 }, { "epoch": 0.15573015348780614, "grad_norm": 1.1186997262744203, "learning_rate": 9.591261692657555e-07, "loss": 0.6081, "step": 8959 }, { "epoch": 0.15574753602530897, "grad_norm": 1.2417323983502304, "learning_rate": 9.591150215201212e-07, "loss": 0.6028, "step": 8960 }, { "epoch": 0.1557649185628118, "grad_norm": 1.9812508598464909, "learning_rate": 9.59103872319303e-07, "loss": 0.2208, "step": 8961 }, { "epoch": 0.15578230110031463, "grad_norm": 1.7278181535054589, "learning_rate": 9.590927216633367e-07, "loss": 0.4035, "step": 8962 }, { "epoch": 0.15579968363781746, "grad_norm": 2.267139266075772, "learning_rate": 9.59081569552257e-07, "loss": 0.8579, "step": 8963 }, { "epoch": 0.15581706617532026, "grad_norm": 1.9541886863230027, "learning_rate": 9.590704159860999e-07, "loss": 0.4068, "step": 8964 }, { "epoch": 0.1558344487128231, "grad_norm": 4.784519742114274, "learning_rate": 9.590592609649002e-07, "loss": 0.5799, "step": 8965 }, { "epoch": 0.15585183125032592, "grad_norm": 2.4241094070901124, "learning_rate": 9.590481044886938e-07, "loss": 0.4798, "step": 8966 }, { "epoch": 0.15586921378782875, "grad_norm": 1.8122134199745337, "learning_rate": 9.590369465575154e-07, "loss": 0.5179, "step": 8967 }, { "epoch": 0.15588659632533158, "grad_norm": 2.0000040088442583, "learning_rate": 9.590257871714012e-07, "loss": 0.4568, "step": 8968 }, { "epoch": 0.15590397886283439, "grad_norm": 2.3492762802447738, "learning_rate": 9.590146263303858e-07, "loss": 0.3938, "step": 8969 }, { "epoch": 0.15592136140033722, "grad_norm": 1.4977090155490629, "learning_rate": 9.590034640345049e-07, "loss": 0.443, "step": 8970 }, { "epoch": 0.15593874393784005, "grad_norm": 1.565762018982536, "learning_rate": 9.58992300283794e-07, "loss": 0.3855, "step": 8971 }, { "epoch": 0.15595612647534288, "grad_norm": 1.1791305993245764, "learning_rate": 9.589811350782882e-07, "loss": 0.3303, "step": 8972 }, { "epoch": 0.1559735090128457, "grad_norm": 1.3393619135185004, "learning_rate": 9.589699684180232e-07, "loss": 0.2964, "step": 8973 }, { "epoch": 0.1559908915503485, "grad_norm": 2.063660548910943, "learning_rate": 9.589588003030342e-07, "loss": 0.3081, "step": 8974 }, { "epoch": 0.15600827408785134, "grad_norm": 3.375270795406853, "learning_rate": 9.589476307333567e-07, "loss": 0.3362, "step": 8975 }, { "epoch": 0.15602565662535417, "grad_norm": 2.025008117807031, "learning_rate": 9.58936459709026e-07, "loss": 0.416, "step": 8976 }, { "epoch": 0.156043039162857, "grad_norm": 1.576061667125104, "learning_rate": 9.589252872300772e-07, "loss": 0.5375, "step": 8977 }, { "epoch": 0.15606042170035983, "grad_norm": 1.8378640036789906, "learning_rate": 9.589141132965463e-07, "loss": 0.6274, "step": 8978 }, { "epoch": 0.15607780423786263, "grad_norm": 1.4897524331792702, "learning_rate": 9.589029379084687e-07, "loss": 0.2116, "step": 8979 }, { "epoch": 0.15609518677536546, "grad_norm": 2.0244639728371094, "learning_rate": 9.588917610658794e-07, "loss": 0.4726, "step": 8980 }, { "epoch": 0.1561125693128683, "grad_norm": 1.338718900524212, "learning_rate": 9.588805827688138e-07, "loss": 0.3043, "step": 8981 }, { "epoch": 0.15612995185037112, "grad_norm": 1.3210431104051685, "learning_rate": 9.58869403017308e-07, "loss": 0.4385, "step": 8982 }, { "epoch": 0.15614733438787395, "grad_norm": 2.399526833090007, "learning_rate": 9.588582218113966e-07, "loss": 0.4985, "step": 8983 }, { "epoch": 0.15616471692537676, "grad_norm": 0.960322157356003, "learning_rate": 9.588470391511154e-07, "loss": 0.283, "step": 8984 }, { "epoch": 0.1561820994628796, "grad_norm": 2.3135804823751425, "learning_rate": 9.588358550365001e-07, "loss": 0.3155, "step": 8985 }, { "epoch": 0.15619948200038242, "grad_norm": 2.166286063899389, "learning_rate": 9.58824669467586e-07, "loss": 0.2938, "step": 8986 }, { "epoch": 0.15621686453788525, "grad_norm": 3.134589591855088, "learning_rate": 9.588134824444081e-07, "loss": 0.5619, "step": 8987 }, { "epoch": 0.15623424707538808, "grad_norm": 2.507779063198561, "learning_rate": 9.588022939670024e-07, "loss": 0.312, "step": 8988 }, { "epoch": 0.15625162961289088, "grad_norm": 2.016083415713751, "learning_rate": 9.587911040354042e-07, "loss": 0.4286, "step": 8989 }, { "epoch": 0.1562690121503937, "grad_norm": 2.1083104458433923, "learning_rate": 9.587799126496489e-07, "loss": 0.2873, "step": 8990 }, { "epoch": 0.15628639468789654, "grad_norm": 1.9725589690171914, "learning_rate": 9.587687198097721e-07, "loss": 0.5608, "step": 8991 }, { "epoch": 0.15630377722539937, "grad_norm": 1.851816461411738, "learning_rate": 9.587575255158092e-07, "loss": 0.7664, "step": 8992 }, { "epoch": 0.1563211597629022, "grad_norm": 2.789577778560194, "learning_rate": 9.587463297677956e-07, "loss": 0.5509, "step": 8993 }, { "epoch": 0.156338542300405, "grad_norm": 5.008899852076909, "learning_rate": 9.58735132565767e-07, "loss": 0.4278, "step": 8994 }, { "epoch": 0.15635592483790783, "grad_norm": 2.4523888190805057, "learning_rate": 9.587239339097586e-07, "loss": 0.3187, "step": 8995 }, { "epoch": 0.15637330737541066, "grad_norm": 1.837451131666965, "learning_rate": 9.587127337998061e-07, "loss": 0.6337, "step": 8996 }, { "epoch": 0.1563906899129135, "grad_norm": 1.9806616168096745, "learning_rate": 9.587015322359451e-07, "loss": 0.5245, "step": 8997 }, { "epoch": 0.15640807245041632, "grad_norm": 2.3789101685732206, "learning_rate": 9.586903292182107e-07, "loss": 0.2735, "step": 8998 }, { "epoch": 0.15642545498791913, "grad_norm": 1.9832943523040012, "learning_rate": 9.58679124746639e-07, "loss": 0.6059, "step": 8999 }, { "epoch": 0.15644283752542196, "grad_norm": 2.103149341522566, "learning_rate": 9.586679188212648e-07, "loss": 0.4395, "step": 9000 }, { "epoch": 0.1564602200629248, "grad_norm": 1.2123447657863549, "learning_rate": 9.586567114421241e-07, "loss": 0.4226, "step": 9001 }, { "epoch": 0.15647760260042762, "grad_norm": 2.313887993141748, "learning_rate": 9.586455026092523e-07, "loss": 0.3007, "step": 9002 }, { "epoch": 0.15649498513793045, "grad_norm": 2.098601289561852, "learning_rate": 9.586342923226851e-07, "loss": 0.5378, "step": 9003 }, { "epoch": 0.15651236767543325, "grad_norm": 1.4479724884609548, "learning_rate": 9.586230805824579e-07, "loss": 0.5298, "step": 9004 }, { "epoch": 0.15652975021293608, "grad_norm": 2.297908276557772, "learning_rate": 9.58611867388606e-07, "loss": 0.5726, "step": 9005 }, { "epoch": 0.1565471327504389, "grad_norm": 3.739529318425067, "learning_rate": 9.586006527411651e-07, "loss": 0.8014, "step": 9006 }, { "epoch": 0.15656451528794174, "grad_norm": 1.94290372675785, "learning_rate": 9.58589436640171e-07, "loss": 0.7439, "step": 9007 }, { "epoch": 0.15658189782544457, "grad_norm": 1.9358051965660719, "learning_rate": 9.58578219085659e-07, "loss": 0.3996, "step": 9008 }, { "epoch": 0.15659928036294737, "grad_norm": 2.2225409876527937, "learning_rate": 9.585670000776646e-07, "loss": 0.5598, "step": 9009 }, { "epoch": 0.1566166629004502, "grad_norm": 2.2212931608377144, "learning_rate": 9.585557796162237e-07, "loss": 0.2472, "step": 9010 }, { "epoch": 0.15663404543795303, "grad_norm": 2.033238962548818, "learning_rate": 9.585445577013714e-07, "loss": 0.5242, "step": 9011 }, { "epoch": 0.15665142797545586, "grad_norm": 4.3881980696302465, "learning_rate": 9.585333343331434e-07, "loss": 0.5238, "step": 9012 }, { "epoch": 0.1566688105129587, "grad_norm": 2.0205752208832837, "learning_rate": 9.585221095115756e-07, "loss": 0.4306, "step": 9013 }, { "epoch": 0.1566861930504615, "grad_norm": 4.00125495756332, "learning_rate": 9.585108832367034e-07, "loss": 0.5962, "step": 9014 }, { "epoch": 0.15670357558796433, "grad_norm": 2.973565908833375, "learning_rate": 9.584996555085621e-07, "loss": 0.7991, "step": 9015 }, { "epoch": 0.15672095812546716, "grad_norm": 2.4687459825247777, "learning_rate": 9.584884263271876e-07, "loss": 0.5212, "step": 9016 }, { "epoch": 0.15673834066297, "grad_norm": 2.2540558692474595, "learning_rate": 9.584771956926152e-07, "loss": 0.3526, "step": 9017 }, { "epoch": 0.15675572320047282, "grad_norm": 2.6073325239098852, "learning_rate": 9.58465963604881e-07, "loss": 0.6497, "step": 9018 }, { "epoch": 0.15677310573797562, "grad_norm": 8.2061810308137, "learning_rate": 9.584547300640201e-07, "loss": 0.6774, "step": 9019 }, { "epoch": 0.15679048827547845, "grad_norm": 2.550177494576796, "learning_rate": 9.584434950700684e-07, "loss": 0.4369, "step": 9020 }, { "epoch": 0.15680787081298128, "grad_norm": 1.3576369990288317, "learning_rate": 9.584322586230613e-07, "loss": 0.5983, "step": 9021 }, { "epoch": 0.1568252533504841, "grad_norm": 1.5943468776758496, "learning_rate": 9.584210207230348e-07, "loss": 0.4002, "step": 9022 }, { "epoch": 0.15684263588798694, "grad_norm": 1.7811225894448648, "learning_rate": 9.58409781370024e-07, "loss": 0.3854, "step": 9023 }, { "epoch": 0.15686001842548974, "grad_norm": 6.302011726876416, "learning_rate": 9.583985405640649e-07, "loss": 0.4068, "step": 9024 }, { "epoch": 0.15687740096299257, "grad_norm": 3.0650203634384776, "learning_rate": 9.583872983051928e-07, "loss": 1.6005, "step": 9025 }, { "epoch": 0.1568947835004954, "grad_norm": 1.860227517655201, "learning_rate": 9.583760545934435e-07, "loss": 0.2643, "step": 9026 }, { "epoch": 0.15691216603799824, "grad_norm": 1.9965594764276682, "learning_rate": 9.58364809428853e-07, "loss": 0.3532, "step": 9027 }, { "epoch": 0.15692954857550107, "grad_norm": 1.4372421695433428, "learning_rate": 9.583535628114566e-07, "loss": 0.2388, "step": 9028 }, { "epoch": 0.15694693111300387, "grad_norm": 1.4559794331133435, "learning_rate": 9.583423147412897e-07, "loss": 0.314, "step": 9029 }, { "epoch": 0.1569643136505067, "grad_norm": 2.235458629746099, "learning_rate": 9.583310652183883e-07, "loss": 0.3993, "step": 9030 }, { "epoch": 0.15698169618800953, "grad_norm": 2.1651954458274556, "learning_rate": 9.583198142427879e-07, "loss": 0.4939, "step": 9031 }, { "epoch": 0.15699907872551236, "grad_norm": 2.358828851264448, "learning_rate": 9.583085618145243e-07, "loss": 0.568, "step": 9032 }, { "epoch": 0.15701646126301516, "grad_norm": 1.8918828773607597, "learning_rate": 9.58297307933633e-07, "loss": 0.4536, "step": 9033 }, { "epoch": 0.157033843800518, "grad_norm": 1.4634802218866996, "learning_rate": 9.5828605260015e-07, "loss": 0.2723, "step": 9034 }, { "epoch": 0.15705122633802082, "grad_norm": 1.344589742381346, "learning_rate": 9.582747958141104e-07, "loss": 0.4766, "step": 9035 }, { "epoch": 0.15706860887552365, "grad_norm": 3.4066483710754585, "learning_rate": 9.582635375755504e-07, "loss": 0.704, "step": 9036 }, { "epoch": 0.15708599141302648, "grad_norm": 1.980251453062757, "learning_rate": 9.582522778845055e-07, "loss": 0.4777, "step": 9037 }, { "epoch": 0.15710337395052928, "grad_norm": 2.0254246677126226, "learning_rate": 9.582410167410112e-07, "loss": 0.4242, "step": 9038 }, { "epoch": 0.15712075648803212, "grad_norm": 1.7465770142053747, "learning_rate": 9.582297541451037e-07, "loss": 0.3167, "step": 9039 }, { "epoch": 0.15713813902553495, "grad_norm": 1.7423242626285278, "learning_rate": 9.582184900968181e-07, "loss": 0.2731, "step": 9040 }, { "epoch": 0.15715552156303778, "grad_norm": 2.3141808834632602, "learning_rate": 9.582072245961904e-07, "loss": 0.3049, "step": 9041 }, { "epoch": 0.1571729041005406, "grad_norm": 1.613528773951335, "learning_rate": 9.581959576432566e-07, "loss": 0.3942, "step": 9042 }, { "epoch": 0.1571902866380434, "grad_norm": 1.686848152218293, "learning_rate": 9.581846892380519e-07, "loss": 0.3461, "step": 9043 }, { "epoch": 0.15720766917554624, "grad_norm": 2.0148691016453664, "learning_rate": 9.581734193806122e-07, "loss": 0.6688, "step": 9044 }, { "epoch": 0.15722505171304907, "grad_norm": 1.5629191082678704, "learning_rate": 9.58162148070973e-07, "loss": 0.3498, "step": 9045 }, { "epoch": 0.1572424342505519, "grad_norm": 2.0079721801883936, "learning_rate": 9.581508753091704e-07, "loss": 0.5407, "step": 9046 }, { "epoch": 0.15725981678805473, "grad_norm": 1.2725495992050244, "learning_rate": 9.581396010952401e-07, "loss": 0.35, "step": 9047 }, { "epoch": 0.15727719932555753, "grad_norm": 1.569887929271001, "learning_rate": 9.581283254292176e-07, "loss": 0.5352, "step": 9048 }, { "epoch": 0.15729458186306036, "grad_norm": 2.111573789960708, "learning_rate": 9.58117048311139e-07, "loss": 0.2603, "step": 9049 }, { "epoch": 0.1573119644005632, "grad_norm": 3.5219476393521116, "learning_rate": 9.581057697410395e-07, "loss": 0.6879, "step": 9050 }, { "epoch": 0.15732934693806602, "grad_norm": 1.6680806113677864, "learning_rate": 9.580944897189552e-07, "loss": 0.3803, "step": 9051 }, { "epoch": 0.15734672947556885, "grad_norm": 1.5345019174316687, "learning_rate": 9.580832082449218e-07, "loss": 0.5768, "step": 9052 }, { "epoch": 0.15736411201307166, "grad_norm": 1.2473009118982736, "learning_rate": 9.580719253189751e-07, "loss": 0.4244, "step": 9053 }, { "epoch": 0.15738149455057449, "grad_norm": 2.166774645415154, "learning_rate": 9.58060640941151e-07, "loss": 0.6417, "step": 9054 }, { "epoch": 0.15739887708807732, "grad_norm": 1.3457255120486276, "learning_rate": 9.580493551114848e-07, "loss": 0.6002, "step": 9055 }, { "epoch": 0.15741625962558015, "grad_norm": 1.5664257795475522, "learning_rate": 9.580380678300126e-07, "loss": 0.3861, "step": 9056 }, { "epoch": 0.15743364216308298, "grad_norm": 1.8650113930677683, "learning_rate": 9.580267790967702e-07, "loss": 0.6249, "step": 9057 }, { "epoch": 0.15745102470058578, "grad_norm": 1.9857510103748934, "learning_rate": 9.580154889117933e-07, "loss": 0.5238, "step": 9058 }, { "epoch": 0.1574684072380886, "grad_norm": 1.6508960266849022, "learning_rate": 9.580041972751176e-07, "loss": 0.5658, "step": 9059 }, { "epoch": 0.15748578977559144, "grad_norm": 2.1736395033142255, "learning_rate": 9.57992904186779e-07, "loss": 0.56, "step": 9060 }, { "epoch": 0.15750317231309427, "grad_norm": 1.3052238168658867, "learning_rate": 9.579816096468135e-07, "loss": 0.4221, "step": 9061 }, { "epoch": 0.1575205548505971, "grad_norm": 3.1034140882745325, "learning_rate": 9.579703136552564e-07, "loss": 0.5705, "step": 9062 }, { "epoch": 0.1575379373880999, "grad_norm": 2.0791176848023905, "learning_rate": 9.57959016212144e-07, "loss": 0.2686, "step": 9063 }, { "epoch": 0.15755531992560273, "grad_norm": 1.6209703015963681, "learning_rate": 9.57947717317512e-07, "loss": 0.3912, "step": 9064 }, { "epoch": 0.15757270246310556, "grad_norm": 1.7894388907922676, "learning_rate": 9.579364169713957e-07, "loss": 0.7197, "step": 9065 }, { "epoch": 0.1575900850006084, "grad_norm": 1.8876272649594892, "learning_rate": 9.579251151738314e-07, "loss": 0.6384, "step": 9066 }, { "epoch": 0.15760746753811122, "grad_norm": 1.9269830596667434, "learning_rate": 9.579138119248551e-07, "loss": 0.5148, "step": 9067 }, { "epoch": 0.15762485007561403, "grad_norm": 1.5527904516613253, "learning_rate": 9.579025072245021e-07, "loss": 0.5299, "step": 9068 }, { "epoch": 0.15764223261311686, "grad_norm": 1.5052306587162507, "learning_rate": 9.578912010728085e-07, "loss": 0.4399, "step": 9069 }, { "epoch": 0.1576596151506197, "grad_norm": 2.16278505599207, "learning_rate": 9.578798934698104e-07, "loss": 0.4877, "step": 9070 }, { "epoch": 0.15767699768812252, "grad_norm": 1.4259082581602662, "learning_rate": 9.578685844155433e-07, "loss": 0.3466, "step": 9071 }, { "epoch": 0.15769438022562535, "grad_norm": 1.9563550344370617, "learning_rate": 9.57857273910043e-07, "loss": 0.643, "step": 9072 }, { "epoch": 0.15771176276312815, "grad_norm": 1.7824287929222722, "learning_rate": 9.578459619533455e-07, "loss": 0.374, "step": 9073 }, { "epoch": 0.15772914530063098, "grad_norm": 2.1356334294011234, "learning_rate": 9.578346485454868e-07, "loss": 0.4138, "step": 9074 }, { "epoch": 0.1577465278381338, "grad_norm": 1.6597550492357898, "learning_rate": 9.578233336865022e-07, "loss": 0.1818, "step": 9075 }, { "epoch": 0.15776391037563664, "grad_norm": 2.693350088069454, "learning_rate": 9.57812017376428e-07, "loss": 1.275, "step": 9076 }, { "epoch": 0.15778129291313947, "grad_norm": 1.86627738672589, "learning_rate": 9.578006996153003e-07, "loss": 0.2077, "step": 9077 }, { "epoch": 0.15779867545064227, "grad_norm": 2.3560670816142664, "learning_rate": 9.577893804031544e-07, "loss": 0.4272, "step": 9078 }, { "epoch": 0.1578160579881451, "grad_norm": 2.046758473742482, "learning_rate": 9.577780597400267e-07, "loss": 0.49, "step": 9079 }, { "epoch": 0.15783344052564793, "grad_norm": 2.3469198412768915, "learning_rate": 9.577667376259526e-07, "loss": 0.3352, "step": 9080 }, { "epoch": 0.15785082306315076, "grad_norm": 1.2959202684449682, "learning_rate": 9.577554140609682e-07, "loss": 0.5036, "step": 9081 }, { "epoch": 0.1578682056006536, "grad_norm": 1.8768547413385996, "learning_rate": 9.577440890451097e-07, "loss": 0.6261, "step": 9082 }, { "epoch": 0.1578855881381564, "grad_norm": 2.829376498368222, "learning_rate": 9.577327625784125e-07, "loss": 0.4319, "step": 9083 }, { "epoch": 0.15790297067565923, "grad_norm": 1.7323142389502797, "learning_rate": 9.57721434660913e-07, "loss": 0.3876, "step": 9084 }, { "epoch": 0.15792035321316206, "grad_norm": 1.8430148204969383, "learning_rate": 9.577101052926464e-07, "loss": 0.5639, "step": 9085 }, { "epoch": 0.1579377357506649, "grad_norm": 1.5081734770102477, "learning_rate": 9.576987744736491e-07, "loss": 0.5864, "step": 9086 }, { "epoch": 0.15795511828816772, "grad_norm": 1.8227417614992982, "learning_rate": 9.576874422039572e-07, "loss": 0.3872, "step": 9087 }, { "epoch": 0.15797250082567052, "grad_norm": 1.6631316131242642, "learning_rate": 9.57676108483606e-07, "loss": 0.3155, "step": 9088 }, { "epoch": 0.15798988336317335, "grad_norm": 1.6297614162640535, "learning_rate": 9.57664773312632e-07, "loss": 0.665, "step": 9089 }, { "epoch": 0.15800726590067618, "grad_norm": 3.2649170298803463, "learning_rate": 9.57653436691071e-07, "loss": 0.7703, "step": 9090 }, { "epoch": 0.158024648438179, "grad_norm": 1.3341697277263946, "learning_rate": 9.576420986189587e-07, "loss": 0.3141, "step": 9091 }, { "epoch": 0.15804203097568184, "grad_norm": 1.7811379569691108, "learning_rate": 9.57630759096331e-07, "loss": 0.566, "step": 9092 }, { "epoch": 0.15805941351318464, "grad_norm": 2.322812051035134, "learning_rate": 9.576194181232243e-07, "loss": 0.2726, "step": 9093 }, { "epoch": 0.15807679605068747, "grad_norm": 2.2561192713169977, "learning_rate": 9.576080756996741e-07, "loss": 0.6995, "step": 9094 }, { "epoch": 0.1580941785881903, "grad_norm": 1.8420843077293312, "learning_rate": 9.575967318257164e-07, "loss": 0.3735, "step": 9095 }, { "epoch": 0.15811156112569313, "grad_norm": 1.6413806620255205, "learning_rate": 9.575853865013875e-07, "loss": 0.6276, "step": 9096 }, { "epoch": 0.15812894366319596, "grad_norm": 2.317466098434344, "learning_rate": 9.575740397267228e-07, "loss": 0.4492, "step": 9097 }, { "epoch": 0.15814632620069877, "grad_norm": 1.6661143919519679, "learning_rate": 9.57562691501759e-07, "loss": 0.3657, "step": 9098 }, { "epoch": 0.1581637087382016, "grad_norm": 2.0123933044466193, "learning_rate": 9.575513418265315e-07, "loss": 0.383, "step": 9099 }, { "epoch": 0.15818109127570443, "grad_norm": 1.7583297737412211, "learning_rate": 9.575399907010764e-07, "loss": 0.6094, "step": 9100 }, { "epoch": 0.15819847381320726, "grad_norm": 2.798281280742607, "learning_rate": 9.575286381254295e-07, "loss": 0.5122, "step": 9101 }, { "epoch": 0.1582158563507101, "grad_norm": 3.002129484763039, "learning_rate": 9.575172840996273e-07, "loss": 0.5641, "step": 9102 }, { "epoch": 0.1582332388882129, "grad_norm": 1.8384393030552806, "learning_rate": 9.575059286237054e-07, "loss": 0.4494, "step": 9103 }, { "epoch": 0.15825062142571572, "grad_norm": 1.0397830523989138, "learning_rate": 9.574945716976998e-07, "loss": 0.2525, "step": 9104 }, { "epoch": 0.15826800396321855, "grad_norm": 1.5778767297814322, "learning_rate": 9.574832133216467e-07, "loss": 0.3521, "step": 9105 }, { "epoch": 0.15828538650072138, "grad_norm": 2.3766687788190706, "learning_rate": 9.574718534955816e-07, "loss": 0.6365, "step": 9106 }, { "epoch": 0.1583027690382242, "grad_norm": 2.047870168585872, "learning_rate": 9.574604922195412e-07, "loss": 0.5698, "step": 9107 }, { "epoch": 0.15832015157572701, "grad_norm": 1.8235825058198278, "learning_rate": 9.574491294935612e-07, "loss": 0.4366, "step": 9108 }, { "epoch": 0.15833753411322984, "grad_norm": 1.8620053323924153, "learning_rate": 9.574377653176774e-07, "loss": 0.5667, "step": 9109 }, { "epoch": 0.15835491665073267, "grad_norm": 1.9874718360793413, "learning_rate": 9.574263996919263e-07, "loss": 0.4765, "step": 9110 }, { "epoch": 0.1583722991882355, "grad_norm": 1.2849592066660767, "learning_rate": 9.574150326163433e-07, "loss": 0.4889, "step": 9111 }, { "epoch": 0.15838968172573833, "grad_norm": 1.6029207443348625, "learning_rate": 9.574036640909649e-07, "loss": 0.4015, "step": 9112 }, { "epoch": 0.15840706426324114, "grad_norm": 1.8573983216152503, "learning_rate": 9.57392294115827e-07, "loss": 0.5211, "step": 9113 }, { "epoch": 0.15842444680074397, "grad_norm": 1.900715181340361, "learning_rate": 9.573809226909656e-07, "loss": 0.4751, "step": 9114 }, { "epoch": 0.1584418293382468, "grad_norm": 2.1792934495938954, "learning_rate": 9.573695498164168e-07, "loss": 0.5522, "step": 9115 }, { "epoch": 0.15845921187574963, "grad_norm": 1.4462986630437857, "learning_rate": 9.57358175492217e-07, "loss": 0.3228, "step": 9116 }, { "epoch": 0.15847659441325246, "grad_norm": 1.4530086216855698, "learning_rate": 9.573467997184014e-07, "loss": 0.5455, "step": 9117 }, { "epoch": 0.15849397695075526, "grad_norm": 2.5289693662042314, "learning_rate": 9.573354224950066e-07, "loss": 0.4533, "step": 9118 }, { "epoch": 0.1585113594882581, "grad_norm": 3.7013868243935404, "learning_rate": 9.573240438220686e-07, "loss": 0.7382, "step": 9119 }, { "epoch": 0.15852874202576092, "grad_norm": 1.5907411333459416, "learning_rate": 9.573126636996236e-07, "loss": 0.516, "step": 9120 }, { "epoch": 0.15854612456326375, "grad_norm": 1.9800685037255432, "learning_rate": 9.573012821277075e-07, "loss": 0.4448, "step": 9121 }, { "epoch": 0.15856350710076658, "grad_norm": 1.7904393694501364, "learning_rate": 9.572898991063562e-07, "loss": 0.5208, "step": 9122 }, { "epoch": 0.15858088963826938, "grad_norm": 3.1108645159515316, "learning_rate": 9.572785146356062e-07, "loss": 0.5228, "step": 9123 }, { "epoch": 0.15859827217577221, "grad_norm": 1.8321925958550476, "learning_rate": 9.572671287154933e-07, "loss": 0.2759, "step": 9124 }, { "epoch": 0.15861565471327504, "grad_norm": 1.7331265495907695, "learning_rate": 9.572557413460536e-07, "loss": 0.505, "step": 9125 }, { "epoch": 0.15863303725077788, "grad_norm": 1.5866181022725023, "learning_rate": 9.572443525273234e-07, "loss": 0.3014, "step": 9126 }, { "epoch": 0.1586504197882807, "grad_norm": 1.8113976745480989, "learning_rate": 9.572329622593385e-07, "loss": 0.5697, "step": 9127 }, { "epoch": 0.1586678023257835, "grad_norm": 1.5020276338440257, "learning_rate": 9.572215705421352e-07, "loss": 0.5392, "step": 9128 }, { "epoch": 0.15868518486328634, "grad_norm": 2.3052756547687365, "learning_rate": 9.572101773757494e-07, "loss": 0.692, "step": 9129 }, { "epoch": 0.15870256740078917, "grad_norm": 2.9410593917854975, "learning_rate": 9.571987827602177e-07, "loss": 0.5141, "step": 9130 }, { "epoch": 0.158719949938292, "grad_norm": 1.5081916870710117, "learning_rate": 9.571873866955757e-07, "loss": 0.4363, "step": 9131 }, { "epoch": 0.15873733247579483, "grad_norm": 2.0444222622029113, "learning_rate": 9.571759891818596e-07, "loss": 0.5027, "step": 9132 }, { "epoch": 0.15875471501329763, "grad_norm": 2.6944672057291177, "learning_rate": 9.571645902191056e-07, "loss": 0.2971, "step": 9133 }, { "epoch": 0.15877209755080046, "grad_norm": 0.9623254473627888, "learning_rate": 9.571531898073501e-07, "loss": 0.2849, "step": 9134 }, { "epoch": 0.1587894800883033, "grad_norm": 1.3944407501040907, "learning_rate": 9.571417879466288e-07, "loss": 0.32, "step": 9135 }, { "epoch": 0.15880686262580612, "grad_norm": 1.3334602814056702, "learning_rate": 9.57130384636978e-07, "loss": 0.3394, "step": 9136 }, { "epoch": 0.15882424516330895, "grad_norm": 1.5209195063858925, "learning_rate": 9.57118979878434e-07, "loss": 0.4493, "step": 9137 }, { "epoch": 0.15884162770081175, "grad_norm": 2.0142479055059126, "learning_rate": 9.571075736710328e-07, "loss": 0.6845, "step": 9138 }, { "epoch": 0.15885901023831459, "grad_norm": 1.7739772214748413, "learning_rate": 9.570961660148106e-07, "loss": 0.6295, "step": 9139 }, { "epoch": 0.15887639277581742, "grad_norm": 1.2222774769977314, "learning_rate": 9.570847569098033e-07, "loss": 0.4551, "step": 9140 }, { "epoch": 0.15889377531332025, "grad_norm": 1.1828044311121304, "learning_rate": 9.570733463560473e-07, "loss": 0.2745, "step": 9141 }, { "epoch": 0.15891115785082308, "grad_norm": 1.679234231615554, "learning_rate": 9.57061934353579e-07, "loss": 0.2527, "step": 9142 }, { "epoch": 0.15892854038832588, "grad_norm": 2.2487782205624534, "learning_rate": 9.570505209024342e-07, "loss": 0.2763, "step": 9143 }, { "epoch": 0.1589459229258287, "grad_norm": 2.2088151274051833, "learning_rate": 9.570391060026492e-07, "loss": 0.4157, "step": 9144 }, { "epoch": 0.15896330546333154, "grad_norm": 1.8964279564211777, "learning_rate": 9.570276896542602e-07, "loss": 0.4571, "step": 9145 }, { "epoch": 0.15898068800083437, "grad_norm": 3.937994335987234, "learning_rate": 9.570162718573034e-07, "loss": 0.5388, "step": 9146 }, { "epoch": 0.1589980705383372, "grad_norm": 1.6947042347881789, "learning_rate": 9.57004852611815e-07, "loss": 0.3758, "step": 9147 }, { "epoch": 0.15901545307584, "grad_norm": 1.9144459247590795, "learning_rate": 9.56993431917831e-07, "loss": 0.3608, "step": 9148 }, { "epoch": 0.15903283561334283, "grad_norm": 1.3552083205236862, "learning_rate": 9.569820097753877e-07, "loss": 0.3619, "step": 9149 }, { "epoch": 0.15905021815084566, "grad_norm": 2.120792032317564, "learning_rate": 9.569705861845216e-07, "loss": 0.3116, "step": 9150 }, { "epoch": 0.1590676006883485, "grad_norm": 1.576121031868178, "learning_rate": 9.569591611452686e-07, "loss": 0.3944, "step": 9151 }, { "epoch": 0.15908498322585132, "grad_norm": 2.5129372180670773, "learning_rate": 9.569477346576648e-07, "loss": 0.592, "step": 9152 }, { "epoch": 0.15910236576335413, "grad_norm": 1.6336417605564453, "learning_rate": 9.569363067217467e-07, "loss": 0.3476, "step": 9153 }, { "epoch": 0.15911974830085696, "grad_norm": 2.4976529549316986, "learning_rate": 9.569248773375505e-07, "loss": 0.4521, "step": 9154 }, { "epoch": 0.15913713083835979, "grad_norm": 1.5259551773834834, "learning_rate": 9.569134465051122e-07, "loss": 0.6106, "step": 9155 }, { "epoch": 0.15915451337586262, "grad_norm": 1.2531585730671508, "learning_rate": 9.56902014224468e-07, "loss": 0.2704, "step": 9156 }, { "epoch": 0.15917189591336545, "grad_norm": 1.9010659526674047, "learning_rate": 9.568905804956545e-07, "loss": 0.4914, "step": 9157 }, { "epoch": 0.15918927845086825, "grad_norm": 3.2091012780406896, "learning_rate": 9.568791453187077e-07, "loss": 0.3285, "step": 9158 }, { "epoch": 0.15920666098837108, "grad_norm": 1.4926882874189158, "learning_rate": 9.56867708693664e-07, "loss": 0.2952, "step": 9159 }, { "epoch": 0.1592240435258739, "grad_norm": 1.7466338494600637, "learning_rate": 9.568562706205595e-07, "loss": 0.6874, "step": 9160 }, { "epoch": 0.15924142606337674, "grad_norm": 1.2418848432400698, "learning_rate": 9.568448310994302e-07, "loss": 0.5228, "step": 9161 }, { "epoch": 0.15925880860087957, "grad_norm": 1.6316499595246332, "learning_rate": 9.56833390130313e-07, "loss": 0.5448, "step": 9162 }, { "epoch": 0.15927619113838237, "grad_norm": 1.660473345503801, "learning_rate": 9.568219477132434e-07, "loss": 0.533, "step": 9163 }, { "epoch": 0.1592935736758852, "grad_norm": 1.2190209926900093, "learning_rate": 9.568105038482586e-07, "loss": 0.5597, "step": 9164 }, { "epoch": 0.15931095621338803, "grad_norm": 3.2540973569032263, "learning_rate": 9.56799058535394e-07, "loss": 0.5885, "step": 9165 }, { "epoch": 0.15932833875089086, "grad_norm": 1.8358027327266222, "learning_rate": 9.567876117746863e-07, "loss": 0.7827, "step": 9166 }, { "epoch": 0.1593457212883937, "grad_norm": 2.4552673657833886, "learning_rate": 9.567761635661718e-07, "loss": 0.6135, "step": 9167 }, { "epoch": 0.1593631038258965, "grad_norm": 1.7294539583076913, "learning_rate": 9.567647139098865e-07, "loss": 0.4015, "step": 9168 }, { "epoch": 0.15938048636339933, "grad_norm": 2.560939879668041, "learning_rate": 9.56753262805867e-07, "loss": 0.6953, "step": 9169 }, { "epoch": 0.15939786890090216, "grad_norm": 2.1039415599361857, "learning_rate": 9.567418102541494e-07, "loss": 0.7196, "step": 9170 }, { "epoch": 0.159415251438405, "grad_norm": 1.54427258346967, "learning_rate": 9.567303562547702e-07, "loss": 0.7495, "step": 9171 }, { "epoch": 0.1594326339759078, "grad_norm": 2.1926454809409974, "learning_rate": 9.567189008077654e-07, "loss": 0.6353, "step": 9172 }, { "epoch": 0.15945001651341062, "grad_norm": 1.8297125492307171, "learning_rate": 9.567074439131715e-07, "loss": 0.9876, "step": 9173 }, { "epoch": 0.15946739905091345, "grad_norm": 2.4327402440043007, "learning_rate": 9.56695985571025e-07, "loss": 0.4283, "step": 9174 }, { "epoch": 0.15948478158841628, "grad_norm": 1.7714323820339204, "learning_rate": 9.566845257813618e-07, "loss": 0.6526, "step": 9175 }, { "epoch": 0.1595021641259191, "grad_norm": 2.0803515910614903, "learning_rate": 9.566730645442185e-07, "loss": 0.5973, "step": 9176 }, { "epoch": 0.1595195466634219, "grad_norm": 2.6531871493253676, "learning_rate": 9.566616018596313e-07, "loss": 0.4162, "step": 9177 }, { "epoch": 0.15953692920092474, "grad_norm": 2.3004427766686453, "learning_rate": 9.566501377276368e-07, "loss": 0.4467, "step": 9178 }, { "epoch": 0.15955431173842757, "grad_norm": 2.304824531410637, "learning_rate": 9.56638672148271e-07, "loss": 0.3741, "step": 9179 }, { "epoch": 0.1595716942759304, "grad_norm": 2.2991280098369873, "learning_rate": 9.566272051215704e-07, "loss": 0.4534, "step": 9180 }, { "epoch": 0.15958907681343323, "grad_norm": 1.5589752405158623, "learning_rate": 9.566157366475713e-07, "loss": 0.5749, "step": 9181 }, { "epoch": 0.15960645935093604, "grad_norm": 2.2810243637778886, "learning_rate": 9.5660426672631e-07, "loss": 0.4023, "step": 9182 }, { "epoch": 0.15962384188843887, "grad_norm": 1.6347919810735774, "learning_rate": 9.56592795357823e-07, "loss": 0.4178, "step": 9183 }, { "epoch": 0.1596412244259417, "grad_norm": 1.6894748001061362, "learning_rate": 9.565813225421467e-07, "loss": 0.451, "step": 9184 }, { "epoch": 0.15965860696344453, "grad_norm": 2.771862178281659, "learning_rate": 9.565698482793171e-07, "loss": 0.4517, "step": 9185 }, { "epoch": 0.15967598950094736, "grad_norm": 2.7338539468105814, "learning_rate": 9.56558372569371e-07, "loss": 0.5773, "step": 9186 }, { "epoch": 0.15969337203845016, "grad_norm": 2.6763157864482374, "learning_rate": 9.565468954123444e-07, "loss": 0.3621, "step": 9187 }, { "epoch": 0.159710754575953, "grad_norm": 2.758669135920502, "learning_rate": 9.56535416808274e-07, "loss": 0.8142, "step": 9188 }, { "epoch": 0.15972813711345582, "grad_norm": 2.413129430477137, "learning_rate": 9.56523936757196e-07, "loss": 0.7208, "step": 9189 }, { "epoch": 0.15974551965095865, "grad_norm": 1.5151186109337718, "learning_rate": 9.565124552591468e-07, "loss": 0.3213, "step": 9190 }, { "epoch": 0.15976290218846148, "grad_norm": 1.6724200130439542, "learning_rate": 9.565009723141628e-07, "loss": 0.7764, "step": 9191 }, { "epoch": 0.15978028472596428, "grad_norm": 2.5620308599769346, "learning_rate": 9.564894879222806e-07, "loss": 0.4595, "step": 9192 }, { "epoch": 0.1597976672634671, "grad_norm": 1.7184987871349335, "learning_rate": 9.564780020835362e-07, "loss": 0.396, "step": 9193 }, { "epoch": 0.15981504980096994, "grad_norm": 2.2613694084834255, "learning_rate": 9.564665147979664e-07, "loss": 0.6634, "step": 9194 }, { "epoch": 0.15983243233847277, "grad_norm": 3.27804659181361, "learning_rate": 9.564550260656073e-07, "loss": 0.5772, "step": 9195 }, { "epoch": 0.1598498148759756, "grad_norm": 1.6020208787075882, "learning_rate": 9.564435358864955e-07, "loss": 0.5553, "step": 9196 }, { "epoch": 0.1598671974134784, "grad_norm": 1.6373555233612758, "learning_rate": 9.564320442606675e-07, "loss": 0.492, "step": 9197 }, { "epoch": 0.15988457995098124, "grad_norm": 3.2039502821128143, "learning_rate": 9.564205511881594e-07, "loss": 0.4643, "step": 9198 }, { "epoch": 0.15990196248848407, "grad_norm": 1.8495984705376958, "learning_rate": 9.564090566690077e-07, "loss": 0.4602, "step": 9199 }, { "epoch": 0.1599193450259869, "grad_norm": 2.2697064318034905, "learning_rate": 9.563975607032492e-07, "loss": 0.6738, "step": 9200 }, { "epoch": 0.15993672756348973, "grad_norm": 2.046415005743237, "learning_rate": 9.5638606329092e-07, "loss": 0.4277, "step": 9201 }, { "epoch": 0.15995411010099253, "grad_norm": 2.0991466265672867, "learning_rate": 9.563745644320566e-07, "loss": 0.5784, "step": 9202 }, { "epoch": 0.15997149263849536, "grad_norm": 2.4621695547574247, "learning_rate": 9.563630641266955e-07, "loss": 0.6659, "step": 9203 }, { "epoch": 0.1599888751759982, "grad_norm": 1.5738498257885707, "learning_rate": 9.563515623748732e-07, "loss": 0.3697, "step": 9204 }, { "epoch": 0.16000625771350102, "grad_norm": 2.352407249260223, "learning_rate": 9.56340059176626e-07, "loss": 0.691, "step": 9205 }, { "epoch": 0.16002364025100385, "grad_norm": 1.2699196734259028, "learning_rate": 9.563285545319904e-07, "loss": 0.6417, "step": 9206 }, { "epoch": 0.16004102278850665, "grad_norm": 2.1949798863442087, "learning_rate": 9.56317048441003e-07, "loss": 0.6446, "step": 9207 }, { "epoch": 0.16005840532600948, "grad_norm": 1.0611499629172032, "learning_rate": 9.563055409037e-07, "loss": 0.4925, "step": 9208 }, { "epoch": 0.16007578786351231, "grad_norm": 1.5559700805023802, "learning_rate": 9.562940319201182e-07, "loss": 0.6392, "step": 9209 }, { "epoch": 0.16009317040101514, "grad_norm": 1.3972299561845518, "learning_rate": 9.562825214902939e-07, "loss": 0.3939, "step": 9210 }, { "epoch": 0.16011055293851797, "grad_norm": 1.2614240721875047, "learning_rate": 9.562710096142635e-07, "loss": 0.5903, "step": 9211 }, { "epoch": 0.16012793547602078, "grad_norm": 4.1324093021827855, "learning_rate": 9.562594962920636e-07, "loss": 0.8726, "step": 9212 }, { "epoch": 0.1601453180135236, "grad_norm": 2.4117676030769113, "learning_rate": 9.562479815237308e-07, "loss": 0.5525, "step": 9213 }, { "epoch": 0.16016270055102644, "grad_norm": 1.508360894764778, "learning_rate": 9.562364653093016e-07, "loss": 0.2739, "step": 9214 }, { "epoch": 0.16018008308852927, "grad_norm": 2.087034781552499, "learning_rate": 9.562249476488122e-07, "loss": 0.6024, "step": 9215 }, { "epoch": 0.1601974656260321, "grad_norm": 1.0489577838135014, "learning_rate": 9.562134285422993e-07, "loss": 0.3919, "step": 9216 }, { "epoch": 0.1602148481635349, "grad_norm": 1.9424001242180977, "learning_rate": 9.562019079897994e-07, "loss": 0.5165, "step": 9217 }, { "epoch": 0.16023223070103773, "grad_norm": 1.8823694931208548, "learning_rate": 9.56190385991349e-07, "loss": 0.6407, "step": 9218 }, { "epoch": 0.16024961323854056, "grad_norm": 4.588053967017821, "learning_rate": 9.561788625469847e-07, "loss": 0.6812, "step": 9219 }, { "epoch": 0.1602669957760434, "grad_norm": 1.8021223499320862, "learning_rate": 9.561673376567428e-07, "loss": 0.5565, "step": 9220 }, { "epoch": 0.16028437831354622, "grad_norm": 1.7526011387423446, "learning_rate": 9.561558113206602e-07, "loss": 0.278, "step": 9221 }, { "epoch": 0.16030176085104902, "grad_norm": 2.5208663552740127, "learning_rate": 9.561442835387732e-07, "loss": 0.319, "step": 9222 }, { "epoch": 0.16031914338855185, "grad_norm": 1.7797377659606772, "learning_rate": 9.561327543111182e-07, "loss": 0.5052, "step": 9223 }, { "epoch": 0.16033652592605468, "grad_norm": 2.1826551516869865, "learning_rate": 9.561212236377318e-07, "loss": 0.2812, "step": 9224 }, { "epoch": 0.16035390846355752, "grad_norm": 1.3660493536739702, "learning_rate": 9.56109691518651e-07, "loss": 0.4123, "step": 9225 }, { "epoch": 0.16037129100106035, "grad_norm": 1.446262082791877, "learning_rate": 9.560981579539118e-07, "loss": 0.5514, "step": 9226 }, { "epoch": 0.16038867353856315, "grad_norm": 2.293750188319952, "learning_rate": 9.560866229435509e-07, "loss": 0.7526, "step": 9227 }, { "epoch": 0.16040605607606598, "grad_norm": 8.174814205291678, "learning_rate": 9.56075086487605e-07, "loss": 0.4773, "step": 9228 }, { "epoch": 0.1604234386135688, "grad_norm": 1.454823633647219, "learning_rate": 9.560635485861105e-07, "loss": 0.4174, "step": 9229 }, { "epoch": 0.16044082115107164, "grad_norm": 1.8294712020430888, "learning_rate": 9.560520092391042e-07, "loss": 0.5761, "step": 9230 }, { "epoch": 0.16045820368857447, "grad_norm": 1.9381536952941025, "learning_rate": 9.560404684466223e-07, "loss": 0.2893, "step": 9231 }, { "epoch": 0.16047558622607727, "grad_norm": 2.0704443787483235, "learning_rate": 9.560289262087019e-07, "loss": 0.5289, "step": 9232 }, { "epoch": 0.1604929687635801, "grad_norm": 1.6518292750737842, "learning_rate": 9.560173825253792e-07, "loss": 0.5822, "step": 9233 }, { "epoch": 0.16051035130108293, "grad_norm": 1.6221910504144383, "learning_rate": 9.560058373966907e-07, "loss": 0.9764, "step": 9234 }, { "epoch": 0.16052773383858576, "grad_norm": 2.700878823445735, "learning_rate": 9.559942908226732e-07, "loss": 0.5473, "step": 9235 }, { "epoch": 0.1605451163760886, "grad_norm": 1.1920649556100589, "learning_rate": 9.559827428033634e-07, "loss": 0.3718, "step": 9236 }, { "epoch": 0.1605624989135914, "grad_norm": 2.1500200883882767, "learning_rate": 9.559711933387978e-07, "loss": 0.4433, "step": 9237 }, { "epoch": 0.16057988145109423, "grad_norm": 2.5301637077945154, "learning_rate": 9.559596424290128e-07, "loss": 0.5409, "step": 9238 }, { "epoch": 0.16059726398859706, "grad_norm": 2.9787287951217336, "learning_rate": 9.559480900740453e-07, "loss": 0.3545, "step": 9239 }, { "epoch": 0.16061464652609989, "grad_norm": 1.5709420855615503, "learning_rate": 9.559365362739317e-07, "loss": 0.7007, "step": 9240 }, { "epoch": 0.16063202906360272, "grad_norm": 2.2438445724085136, "learning_rate": 9.559249810287088e-07, "loss": 0.4992, "step": 9241 }, { "epoch": 0.16064941160110552, "grad_norm": 2.120731354608972, "learning_rate": 9.55913424338413e-07, "loss": 0.5497, "step": 9242 }, { "epoch": 0.16066679413860835, "grad_norm": 1.7879399024830462, "learning_rate": 9.559018662030813e-07, "loss": 0.441, "step": 9243 }, { "epoch": 0.16068417667611118, "grad_norm": 4.643288217755559, "learning_rate": 9.558903066227499e-07, "loss": 0.7772, "step": 9244 }, { "epoch": 0.160701559213614, "grad_norm": 2.392199137508831, "learning_rate": 9.558787455974559e-07, "loss": 0.9419, "step": 9245 }, { "epoch": 0.16071894175111684, "grad_norm": 1.6520935990444054, "learning_rate": 9.558671831272357e-07, "loss": 0.3123, "step": 9246 }, { "epoch": 0.16073632428861964, "grad_norm": 4.165456549400455, "learning_rate": 9.558556192121257e-07, "loss": 0.849, "step": 9247 }, { "epoch": 0.16075370682612247, "grad_norm": 1.3613376617722077, "learning_rate": 9.558440538521628e-07, "loss": 0.4319, "step": 9248 }, { "epoch": 0.1607710893636253, "grad_norm": 1.7691697855418247, "learning_rate": 9.558324870473837e-07, "loss": 0.4584, "step": 9249 }, { "epoch": 0.16078847190112813, "grad_norm": 1.6062597415779096, "learning_rate": 9.55820918797825e-07, "loss": 0.5466, "step": 9250 }, { "epoch": 0.16080585443863096, "grad_norm": 4.022569834722311, "learning_rate": 9.558093491035235e-07, "loss": 0.3141, "step": 9251 }, { "epoch": 0.16082323697613377, "grad_norm": 1.82995177886647, "learning_rate": 9.557977779645157e-07, "loss": 0.5073, "step": 9252 }, { "epoch": 0.1608406195136366, "grad_norm": 1.4610168002984953, "learning_rate": 9.557862053808381e-07, "loss": 0.3949, "step": 9253 }, { "epoch": 0.16085800205113943, "grad_norm": 2.028497301244451, "learning_rate": 9.557746313525277e-07, "loss": 0.7678, "step": 9254 }, { "epoch": 0.16087538458864226, "grad_norm": 1.6143962713178606, "learning_rate": 9.557630558796212e-07, "loss": 0.2322, "step": 9255 }, { "epoch": 0.1608927671261451, "grad_norm": 2.099059675162804, "learning_rate": 9.557514789621553e-07, "loss": 0.4377, "step": 9256 }, { "epoch": 0.1609101496636479, "grad_norm": 2.7598159425483124, "learning_rate": 9.557399006001664e-07, "loss": 0.9193, "step": 9257 }, { "epoch": 0.16092753220115072, "grad_norm": 2.207326955442664, "learning_rate": 9.557283207936913e-07, "loss": 0.3345, "step": 9258 }, { "epoch": 0.16094491473865355, "grad_norm": 2.7846913613017357, "learning_rate": 9.557167395427667e-07, "loss": 0.3816, "step": 9259 }, { "epoch": 0.16096229727615638, "grad_norm": 1.8793257131539558, "learning_rate": 9.557051568474294e-07, "loss": 0.5671, "step": 9260 }, { "epoch": 0.1609796798136592, "grad_norm": 1.7373238362337455, "learning_rate": 9.556935727077163e-07, "loss": 0.3082, "step": 9261 }, { "epoch": 0.160997062351162, "grad_norm": 1.263908205937889, "learning_rate": 9.556819871236638e-07, "loss": 0.86, "step": 9262 }, { "epoch": 0.16101444488866484, "grad_norm": 3.1635925036292543, "learning_rate": 9.556704000953088e-07, "loss": 0.7696, "step": 9263 }, { "epoch": 0.16103182742616767, "grad_norm": 1.3823708331534441, "learning_rate": 9.556588116226878e-07, "loss": 0.356, "step": 9264 }, { "epoch": 0.1610492099636705, "grad_norm": 1.9696813880612551, "learning_rate": 9.556472217058377e-07, "loss": 0.6941, "step": 9265 }, { "epoch": 0.16106659250117333, "grad_norm": 2.602749230049631, "learning_rate": 9.556356303447954e-07, "loss": 0.6474, "step": 9266 }, { "epoch": 0.16108397503867614, "grad_norm": 1.84222641579666, "learning_rate": 9.556240375395971e-07, "loss": 0.3842, "step": 9267 }, { "epoch": 0.16110135757617897, "grad_norm": 1.9765733759778403, "learning_rate": 9.556124432902802e-07, "loss": 0.3414, "step": 9268 }, { "epoch": 0.1611187401136818, "grad_norm": 2.1413266506143627, "learning_rate": 9.55600847596881e-07, "loss": 0.7554, "step": 9269 }, { "epoch": 0.16113612265118463, "grad_norm": 2.394636420866611, "learning_rate": 9.555892504594366e-07, "loss": 0.3559, "step": 9270 }, { "epoch": 0.16115350518868746, "grad_norm": 2.0174833908499714, "learning_rate": 9.555776518779833e-07, "loss": 0.5729, "step": 9271 }, { "epoch": 0.16117088772619026, "grad_norm": 2.555700237394894, "learning_rate": 9.555660518525581e-07, "loss": 0.883, "step": 9272 }, { "epoch": 0.1611882702636931, "grad_norm": 2.5498328725522756, "learning_rate": 9.55554450383198e-07, "loss": 0.4011, "step": 9273 }, { "epoch": 0.16120565280119592, "grad_norm": 2.1604726683028352, "learning_rate": 9.555428474699397e-07, "loss": 0.4585, "step": 9274 }, { "epoch": 0.16122303533869875, "grad_norm": 1.4116823304537516, "learning_rate": 9.555312431128194e-07, "loss": 0.3781, "step": 9275 }, { "epoch": 0.16124041787620158, "grad_norm": 1.5208251278844542, "learning_rate": 9.555196373118745e-07, "loss": 0.7686, "step": 9276 }, { "epoch": 0.16125780041370438, "grad_norm": 2.3457068412973245, "learning_rate": 9.555080300671417e-07, "loss": 0.9213, "step": 9277 }, { "epoch": 0.1612751829512072, "grad_norm": 1.5235127629334075, "learning_rate": 9.554964213786576e-07, "loss": 0.6589, "step": 9278 }, { "epoch": 0.16129256548871004, "grad_norm": 1.9092901349885927, "learning_rate": 9.554848112464591e-07, "loss": 0.3397, "step": 9279 }, { "epoch": 0.16130994802621287, "grad_norm": 1.1232159547432954, "learning_rate": 9.55473199670583e-07, "loss": 0.3206, "step": 9280 }, { "epoch": 0.1613273305637157, "grad_norm": 1.1813019142343462, "learning_rate": 9.55461586651066e-07, "loss": 0.3523, "step": 9281 }, { "epoch": 0.1613447131012185, "grad_norm": 3.825009664604716, "learning_rate": 9.55449972187945e-07, "loss": 0.7041, "step": 9282 }, { "epoch": 0.16136209563872134, "grad_norm": 1.8021878053553824, "learning_rate": 9.554383562812569e-07, "loss": 0.4416, "step": 9283 }, { "epoch": 0.16137947817622417, "grad_norm": 1.415477939056827, "learning_rate": 9.554267389310385e-07, "loss": 0.3959, "step": 9284 }, { "epoch": 0.161396860713727, "grad_norm": 2.0027841260676094, "learning_rate": 9.554151201373263e-07, "loss": 0.4784, "step": 9285 }, { "epoch": 0.16141424325122983, "grad_norm": 1.726195057436413, "learning_rate": 9.554034999001575e-07, "loss": 0.6842, "step": 9286 }, { "epoch": 0.16143162578873263, "grad_norm": 1.7700419200330095, "learning_rate": 9.553918782195687e-07, "loss": 0.4225, "step": 9287 }, { "epoch": 0.16144900832623546, "grad_norm": 1.5958796902078272, "learning_rate": 9.55380255095597e-07, "loss": 0.5042, "step": 9288 }, { "epoch": 0.1614663908637383, "grad_norm": 1.4906333097259927, "learning_rate": 9.55368630528279e-07, "loss": 0.3778, "step": 9289 }, { "epoch": 0.16148377340124112, "grad_norm": 2.8307807125100637, "learning_rate": 9.553570045176516e-07, "loss": 0.412, "step": 9290 }, { "epoch": 0.16150115593874395, "grad_norm": 1.607004717240069, "learning_rate": 9.553453770637517e-07, "loss": 0.3399, "step": 9291 }, { "epoch": 0.16151853847624675, "grad_norm": 2.117356814774999, "learning_rate": 9.55333748166616e-07, "loss": 0.5808, "step": 9292 }, { "epoch": 0.16153592101374958, "grad_norm": 3.062702337799442, "learning_rate": 9.553221178262816e-07, "loss": 0.2936, "step": 9293 }, { "epoch": 0.16155330355125241, "grad_norm": 1.7993997273513287, "learning_rate": 9.553104860427852e-07, "loss": 0.6014, "step": 9294 }, { "epoch": 0.16157068608875524, "grad_norm": 1.718837935504421, "learning_rate": 9.55298852816164e-07, "loss": 0.3106, "step": 9295 }, { "epoch": 0.16158806862625807, "grad_norm": 2.6892124427084454, "learning_rate": 9.552872181464541e-07, "loss": 0.4518, "step": 9296 }, { "epoch": 0.16160545116376088, "grad_norm": 3.6352571450156113, "learning_rate": 9.55275582033693e-07, "loss": 0.7315, "step": 9297 }, { "epoch": 0.1616228337012637, "grad_norm": 1.9852183770089062, "learning_rate": 9.552639444779176e-07, "loss": 0.4199, "step": 9298 }, { "epoch": 0.16164021623876654, "grad_norm": 2.6231204398005805, "learning_rate": 9.552523054791645e-07, "loss": 0.6401, "step": 9299 }, { "epoch": 0.16165759877626937, "grad_norm": 1.4371919280868741, "learning_rate": 9.552406650374708e-07, "loss": 0.3265, "step": 9300 }, { "epoch": 0.1616749813137722, "grad_norm": 1.4968676249120232, "learning_rate": 9.552290231528735e-07, "loss": 0.4135, "step": 9301 }, { "epoch": 0.161692363851275, "grad_norm": 1.7079842577347881, "learning_rate": 9.552173798254089e-07, "loss": 0.381, "step": 9302 }, { "epoch": 0.16170974638877783, "grad_norm": 2.313444169456442, "learning_rate": 9.552057350551147e-07, "loss": 0.682, "step": 9303 }, { "epoch": 0.16172712892628066, "grad_norm": 2.4647912060810313, "learning_rate": 9.551940888420271e-07, "loss": 0.5765, "step": 9304 }, { "epoch": 0.1617445114637835, "grad_norm": 1.6915473786334754, "learning_rate": 9.551824411861835e-07, "loss": 0.4096, "step": 9305 }, { "epoch": 0.16176189400128632, "grad_norm": 1.3302745150457294, "learning_rate": 9.551707920876206e-07, "loss": 0.5317, "step": 9306 }, { "epoch": 0.16177927653878912, "grad_norm": 1.33808744754099, "learning_rate": 9.551591415463755e-07, "loss": 0.4048, "step": 9307 }, { "epoch": 0.16179665907629195, "grad_norm": 1.3875236237501816, "learning_rate": 9.55147489562485e-07, "loss": 0.4913, "step": 9308 }, { "epoch": 0.16181404161379478, "grad_norm": 7.255285424875467, "learning_rate": 9.551358361359858e-07, "loss": 0.9467, "step": 9309 }, { "epoch": 0.16183142415129761, "grad_norm": 1.61314302100554, "learning_rate": 9.551241812669152e-07, "loss": 0.3779, "step": 9310 }, { "epoch": 0.16184880668880042, "grad_norm": 2.897645378818143, "learning_rate": 9.5511252495531e-07, "loss": 0.5319, "step": 9311 }, { "epoch": 0.16186618922630325, "grad_norm": 2.0136852000410546, "learning_rate": 9.551008672012072e-07, "loss": 0.355, "step": 9312 }, { "epoch": 0.16188357176380608, "grad_norm": 1.2942801946095706, "learning_rate": 9.550892080046439e-07, "loss": 0.4989, "step": 9313 }, { "epoch": 0.1619009543013089, "grad_norm": 1.6246038772564328, "learning_rate": 9.550775473656564e-07, "loss": 0.4046, "step": 9314 }, { "epoch": 0.16191833683881174, "grad_norm": 1.3374691052186793, "learning_rate": 9.550658852842825e-07, "loss": 0.2062, "step": 9315 }, { "epoch": 0.16193571937631454, "grad_norm": 2.132622312534343, "learning_rate": 9.550542217605585e-07, "loss": 0.3085, "step": 9316 }, { "epoch": 0.16195310191381737, "grad_norm": 1.6777027027703104, "learning_rate": 9.55042556794522e-07, "loss": 0.7989, "step": 9317 }, { "epoch": 0.1619704844513202, "grad_norm": 2.4796684567383953, "learning_rate": 9.550308903862093e-07, "loss": 0.51, "step": 9318 }, { "epoch": 0.16198786698882303, "grad_norm": 1.4241778810892904, "learning_rate": 9.55019222535658e-07, "loss": 0.3133, "step": 9319 }, { "epoch": 0.16200524952632586, "grad_norm": 2.32986475580099, "learning_rate": 9.550075532429046e-07, "loss": 0.3484, "step": 9320 }, { "epoch": 0.16202263206382866, "grad_norm": 3.412416741035791, "learning_rate": 9.549958825079863e-07, "loss": 0.6768, "step": 9321 }, { "epoch": 0.1620400146013315, "grad_norm": 1.3868044917459195, "learning_rate": 9.5498421033094e-07, "loss": 0.3109, "step": 9322 }, { "epoch": 0.16205739713883432, "grad_norm": 1.804800203180389, "learning_rate": 9.54972536711803e-07, "loss": 0.7087, "step": 9323 }, { "epoch": 0.16207477967633716, "grad_norm": 1.181392047738043, "learning_rate": 9.549608616506117e-07, "loss": 0.588, "step": 9324 }, { "epoch": 0.16209216221383999, "grad_norm": 4.419150271599932, "learning_rate": 9.549491851474036e-07, "loss": 0.4966, "step": 9325 }, { "epoch": 0.1621095447513428, "grad_norm": 2.484805119161931, "learning_rate": 9.549375072022157e-07, "loss": 0.6286, "step": 9326 }, { "epoch": 0.16212692728884562, "grad_norm": 1.7617410170708976, "learning_rate": 9.549258278150847e-07, "loss": 0.2951, "step": 9327 }, { "epoch": 0.16214430982634845, "grad_norm": 2.36937065959103, "learning_rate": 9.54914146986048e-07, "loss": 0.499, "step": 9328 }, { "epoch": 0.16216169236385128, "grad_norm": 1.9950533245919246, "learning_rate": 9.549024647151423e-07, "loss": 0.5854, "step": 9329 }, { "epoch": 0.1621790749013541, "grad_norm": 1.8705660178745458, "learning_rate": 9.548907810024048e-07, "loss": 0.3704, "step": 9330 }, { "epoch": 0.1621964574388569, "grad_norm": 1.9586735295013202, "learning_rate": 9.548790958478724e-07, "loss": 0.473, "step": 9331 }, { "epoch": 0.16221383997635974, "grad_norm": 1.9645217550590173, "learning_rate": 9.548674092515821e-07, "loss": 0.3231, "step": 9332 }, { "epoch": 0.16223122251386257, "grad_norm": 2.168983482089311, "learning_rate": 9.548557212135713e-07, "loss": 0.7146, "step": 9333 }, { "epoch": 0.1622486050513654, "grad_norm": 2.1613276260573335, "learning_rate": 9.548440317338767e-07, "loss": 0.354, "step": 9334 }, { "epoch": 0.16226598758886823, "grad_norm": 2.185028068306361, "learning_rate": 9.548323408125356e-07, "loss": 0.5006, "step": 9335 }, { "epoch": 0.16228337012637103, "grad_norm": 1.926214546968355, "learning_rate": 9.548206484495846e-07, "loss": 0.3608, "step": 9336 }, { "epoch": 0.16230075266387387, "grad_norm": 3.0387517337116345, "learning_rate": 9.548089546450614e-07, "loss": 0.6194, "step": 9337 }, { "epoch": 0.1623181352013767, "grad_norm": 2.123337255686273, "learning_rate": 9.547972593990026e-07, "loss": 0.3872, "step": 9338 }, { "epoch": 0.16233551773887953, "grad_norm": 1.7882637629461577, "learning_rate": 9.547855627114453e-07, "loss": 0.4888, "step": 9339 }, { "epoch": 0.16235290027638236, "grad_norm": 1.4974302559833104, "learning_rate": 9.547738645824268e-07, "loss": 0.5908, "step": 9340 }, { "epoch": 0.16237028281388516, "grad_norm": 1.6793530345867886, "learning_rate": 9.54762165011984e-07, "loss": 0.4001, "step": 9341 }, { "epoch": 0.162387665351388, "grad_norm": 2.7489993233076544, "learning_rate": 9.547504640001542e-07, "loss": 0.4309, "step": 9342 }, { "epoch": 0.16240504788889082, "grad_norm": 2.103084277569771, "learning_rate": 9.547387615469741e-07, "loss": 0.4736, "step": 9343 }, { "epoch": 0.16242243042639365, "grad_norm": 2.605213818112825, "learning_rate": 9.547270576524813e-07, "loss": 0.4403, "step": 9344 }, { "epoch": 0.16243981296389648, "grad_norm": 1.5407467480525388, "learning_rate": 9.547153523167123e-07, "loss": 0.3667, "step": 9345 }, { "epoch": 0.16245719550139928, "grad_norm": 1.3453757979543266, "learning_rate": 9.547036455397048e-07, "loss": 0.3523, "step": 9346 }, { "epoch": 0.1624745780389021, "grad_norm": 2.0967561490011453, "learning_rate": 9.546919373214954e-07, "loss": 0.4658, "step": 9347 }, { "epoch": 0.16249196057640494, "grad_norm": 2.0629592608350844, "learning_rate": 9.546802276621215e-07, "loss": 0.4333, "step": 9348 }, { "epoch": 0.16250934311390777, "grad_norm": 1.6426333276087985, "learning_rate": 9.546685165616202e-07, "loss": 0.3238, "step": 9349 }, { "epoch": 0.1625267256514106, "grad_norm": 2.158249070054989, "learning_rate": 9.546568040200285e-07, "loss": 0.5239, "step": 9350 }, { "epoch": 0.1625441081889134, "grad_norm": 1.918316538616272, "learning_rate": 9.546450900373834e-07, "loss": 0.3626, "step": 9351 }, { "epoch": 0.16256149072641624, "grad_norm": 2.0084955553923214, "learning_rate": 9.546333746137225e-07, "loss": 0.7462, "step": 9352 }, { "epoch": 0.16257887326391907, "grad_norm": 2.2376866667520283, "learning_rate": 9.546216577490827e-07, "loss": 0.4797, "step": 9353 }, { "epoch": 0.1625962558014219, "grad_norm": 1.1220005930392116, "learning_rate": 9.546099394435008e-07, "loss": 0.406, "step": 9354 }, { "epoch": 0.16261363833892473, "grad_norm": 1.6845386569808536, "learning_rate": 9.545982196970144e-07, "loss": 0.3036, "step": 9355 }, { "epoch": 0.16263102087642753, "grad_norm": 2.5514178515916104, "learning_rate": 9.545864985096602e-07, "loss": 0.5138, "step": 9356 }, { "epoch": 0.16264840341393036, "grad_norm": 4.542209733923457, "learning_rate": 9.545747758814759e-07, "loss": 0.7444, "step": 9357 }, { "epoch": 0.1626657859514332, "grad_norm": 1.8405056327470486, "learning_rate": 9.545630518124984e-07, "loss": 0.4927, "step": 9358 }, { "epoch": 0.16268316848893602, "grad_norm": 1.9004597192066508, "learning_rate": 9.545513263027647e-07, "loss": 0.4555, "step": 9359 }, { "epoch": 0.16270055102643885, "grad_norm": 2.1979942790442837, "learning_rate": 9.545395993523122e-07, "loss": 0.5615, "step": 9360 }, { "epoch": 0.16271793356394165, "grad_norm": 1.829826355506748, "learning_rate": 9.545278709611779e-07, "loss": 0.4325, "step": 9361 }, { "epoch": 0.16273531610144448, "grad_norm": 2.6112682042874407, "learning_rate": 9.54516141129399e-07, "loss": 0.5682, "step": 9362 }, { "epoch": 0.1627526986389473, "grad_norm": 1.9082821972257882, "learning_rate": 9.545044098570128e-07, "loss": 0.5461, "step": 9363 }, { "epoch": 0.16277008117645014, "grad_norm": 1.5797753726975694, "learning_rate": 9.544926771440563e-07, "loss": 0.4923, "step": 9364 }, { "epoch": 0.16278746371395297, "grad_norm": 1.9542307369973422, "learning_rate": 9.54480942990567e-07, "loss": 0.6287, "step": 9365 }, { "epoch": 0.16280484625145578, "grad_norm": 2.0140091169408674, "learning_rate": 9.544692073965816e-07, "loss": 0.6266, "step": 9366 }, { "epoch": 0.1628222287889586, "grad_norm": 2.015009568058033, "learning_rate": 9.544574703621377e-07, "loss": 0.6547, "step": 9367 }, { "epoch": 0.16283961132646144, "grad_norm": 3.917656591470944, "learning_rate": 9.544457318872724e-07, "loss": 0.3464, "step": 9368 }, { "epoch": 0.16285699386396427, "grad_norm": 2.0672351100949435, "learning_rate": 9.544339919720231e-07, "loss": 0.3944, "step": 9369 }, { "epoch": 0.1628743764014671, "grad_norm": 2.041655403440751, "learning_rate": 9.544222506164265e-07, "loss": 0.3563, "step": 9370 }, { "epoch": 0.1628917589389699, "grad_norm": 2.2507263022144435, "learning_rate": 9.544105078205202e-07, "loss": 0.406, "step": 9371 }, { "epoch": 0.16290914147647273, "grad_norm": 1.7207278651161513, "learning_rate": 9.543987635843413e-07, "loss": 0.3742, "step": 9372 }, { "epoch": 0.16292652401397556, "grad_norm": 1.3702666121248452, "learning_rate": 9.54387017907927e-07, "loss": 0.4864, "step": 9373 }, { "epoch": 0.1629439065514784, "grad_norm": 3.931535510536559, "learning_rate": 9.543752707913147e-07, "loss": 0.6541, "step": 9374 }, { "epoch": 0.16296128908898122, "grad_norm": 1.8233898751868858, "learning_rate": 9.543635222345415e-07, "loss": 0.4784, "step": 9375 }, { "epoch": 0.16297867162648402, "grad_norm": 2.342409221898302, "learning_rate": 9.543517722376447e-07, "loss": 0.4703, "step": 9376 }, { "epoch": 0.16299605416398685, "grad_norm": 1.3002859138271439, "learning_rate": 9.543400208006613e-07, "loss": 0.5853, "step": 9377 }, { "epoch": 0.16301343670148968, "grad_norm": 1.9141434761523575, "learning_rate": 9.54328267923629e-07, "loss": 0.4829, "step": 9378 }, { "epoch": 0.1630308192389925, "grad_norm": 1.8960865326460932, "learning_rate": 9.543165136065844e-07, "loss": 0.5413, "step": 9379 }, { "epoch": 0.16304820177649534, "grad_norm": 1.8021868275137267, "learning_rate": 9.543047578495655e-07, "loss": 0.3716, "step": 9380 }, { "epoch": 0.16306558431399815, "grad_norm": 2.911725809010168, "learning_rate": 9.542930006526092e-07, "loss": 0.3553, "step": 9381 }, { "epoch": 0.16308296685150098, "grad_norm": 1.5437900610894855, "learning_rate": 9.542812420157525e-07, "loss": 0.5132, "step": 9382 }, { "epoch": 0.1631003493890038, "grad_norm": 1.5639212304700854, "learning_rate": 9.54269481939033e-07, "loss": 0.3041, "step": 9383 }, { "epoch": 0.16311773192650664, "grad_norm": 1.9924498507992463, "learning_rate": 9.542577204224882e-07, "loss": 0.2776, "step": 9384 }, { "epoch": 0.16313511446400947, "grad_norm": 3.6200157195543223, "learning_rate": 9.54245957466155e-07, "loss": 0.324, "step": 9385 }, { "epoch": 0.16315249700151227, "grad_norm": 1.208799999424447, "learning_rate": 9.542341930700705e-07, "loss": 0.4786, "step": 9386 }, { "epoch": 0.1631698795390151, "grad_norm": 1.5898927412662496, "learning_rate": 9.542224272342726e-07, "loss": 0.6505, "step": 9387 }, { "epoch": 0.16318726207651793, "grad_norm": 2.0202129434735925, "learning_rate": 9.542106599587982e-07, "loss": 0.3526, "step": 9388 }, { "epoch": 0.16320464461402076, "grad_norm": 2.1076156651111644, "learning_rate": 9.541988912436846e-07, "loss": 0.4676, "step": 9389 }, { "epoch": 0.1632220271515236, "grad_norm": 1.271079435861568, "learning_rate": 9.541871210889693e-07, "loss": 0.532, "step": 9390 }, { "epoch": 0.1632394096890264, "grad_norm": 1.6554453927235837, "learning_rate": 9.541753494946892e-07, "loss": 0.3601, "step": 9391 }, { "epoch": 0.16325679222652922, "grad_norm": 2.086665436877978, "learning_rate": 9.541635764608818e-07, "loss": 0.6315, "step": 9392 }, { "epoch": 0.16327417476403205, "grad_norm": 2.962330290525937, "learning_rate": 9.541518019875848e-07, "loss": 0.4306, "step": 9393 }, { "epoch": 0.16329155730153488, "grad_norm": 1.1380080612351082, "learning_rate": 9.541400260748352e-07, "loss": 0.426, "step": 9394 }, { "epoch": 0.16330893983903771, "grad_norm": 1.1737966091980094, "learning_rate": 9.5412824872267e-07, "loss": 0.3316, "step": 9395 }, { "epoch": 0.16332632237654052, "grad_norm": 1.0122551552291406, "learning_rate": 9.541164699311273e-07, "loss": 0.5396, "step": 9396 }, { "epoch": 0.16334370491404335, "grad_norm": 1.8426966324655343, "learning_rate": 9.541046897002437e-07, "loss": 0.444, "step": 9397 }, { "epoch": 0.16336108745154618, "grad_norm": 1.3240784150507277, "learning_rate": 9.540929080300568e-07, "loss": 0.4589, "step": 9398 }, { "epoch": 0.163378469989049, "grad_norm": 1.8216209823555085, "learning_rate": 9.54081124920604e-07, "loss": 0.4638, "step": 9399 }, { "epoch": 0.16339585252655184, "grad_norm": 1.7805249397808214, "learning_rate": 9.540693403719227e-07, "loss": 0.6425, "step": 9400 }, { "epoch": 0.16341323506405464, "grad_norm": 1.8321372119376413, "learning_rate": 9.540575543840503e-07, "loss": 0.5167, "step": 9401 }, { "epoch": 0.16343061760155747, "grad_norm": 3.2605216139412043, "learning_rate": 9.540457669570239e-07, "loss": 0.4706, "step": 9402 }, { "epoch": 0.1634480001390603, "grad_norm": 1.3352878393311418, "learning_rate": 9.54033978090881e-07, "loss": 0.5767, "step": 9403 }, { "epoch": 0.16346538267656313, "grad_norm": 1.8028459096176808, "learning_rate": 9.540221877856589e-07, "loss": 0.7732, "step": 9404 }, { "epoch": 0.16348276521406596, "grad_norm": 1.685434479740655, "learning_rate": 9.54010396041395e-07, "loss": 0.28, "step": 9405 }, { "epoch": 0.16350014775156876, "grad_norm": 2.3552489422054377, "learning_rate": 9.539986028581269e-07, "loss": 0.5203, "step": 9406 }, { "epoch": 0.1635175302890716, "grad_norm": 1.6398545887714744, "learning_rate": 9.539868082358916e-07, "loss": 0.5108, "step": 9407 }, { "epoch": 0.16353491282657442, "grad_norm": 4.139850103994887, "learning_rate": 9.539750121747267e-07, "loss": 0.4211, "step": 9408 }, { "epoch": 0.16355229536407725, "grad_norm": 2.2890978717377757, "learning_rate": 9.539632146746694e-07, "loss": 0.2548, "step": 9409 }, { "epoch": 0.16356967790158009, "grad_norm": 2.2238207797864784, "learning_rate": 9.539514157357575e-07, "loss": 0.6709, "step": 9410 }, { "epoch": 0.1635870604390829, "grad_norm": 3.386835016174997, "learning_rate": 9.539396153580282e-07, "loss": 0.6524, "step": 9411 }, { "epoch": 0.16360444297658572, "grad_norm": 2.0664294326784245, "learning_rate": 9.539278135415186e-07, "loss": 0.4164, "step": 9412 }, { "epoch": 0.16362182551408855, "grad_norm": 1.9298573230535567, "learning_rate": 9.539160102862664e-07, "loss": 0.5002, "step": 9413 }, { "epoch": 0.16363920805159138, "grad_norm": 1.7582789621280852, "learning_rate": 9.53904205592309e-07, "loss": 0.3752, "step": 9414 }, { "epoch": 0.1636565905890942, "grad_norm": 2.3878400424699016, "learning_rate": 9.538923994596837e-07, "loss": 0.6852, "step": 9415 }, { "epoch": 0.163673973126597, "grad_norm": 1.9564685985324677, "learning_rate": 9.53880591888428e-07, "loss": 0.7378, "step": 9416 }, { "epoch": 0.16369135566409984, "grad_norm": 2.149955761114427, "learning_rate": 9.538687828785796e-07, "loss": 0.6116, "step": 9417 }, { "epoch": 0.16370873820160267, "grad_norm": 1.3031967034186118, "learning_rate": 9.538569724301753e-07, "loss": 0.394, "step": 9418 }, { "epoch": 0.1637261207391055, "grad_norm": 2.8544792306676725, "learning_rate": 9.53845160543253e-07, "loss": 0.7084, "step": 9419 }, { "epoch": 0.16374350327660833, "grad_norm": 2.16790927203044, "learning_rate": 9.538333472178501e-07, "loss": 0.3919, "step": 9420 }, { "epoch": 0.16376088581411113, "grad_norm": 1.697933869809496, "learning_rate": 9.538215324540038e-07, "loss": 0.5163, "step": 9421 }, { "epoch": 0.16377826835161396, "grad_norm": 1.949264885684818, "learning_rate": 9.53809716251752e-07, "loss": 0.5816, "step": 9422 }, { "epoch": 0.1637956508891168, "grad_norm": 1.8673433669463286, "learning_rate": 9.537978986111316e-07, "loss": 0.4295, "step": 9423 }, { "epoch": 0.16381303342661963, "grad_norm": 1.3697737622868726, "learning_rate": 9.537860795321803e-07, "loss": 0.4389, "step": 9424 }, { "epoch": 0.16383041596412246, "grad_norm": 2.705920305990201, "learning_rate": 9.537742590149357e-07, "loss": 0.4003, "step": 9425 }, { "epoch": 0.16384779850162526, "grad_norm": 2.614562326390496, "learning_rate": 9.537624370594351e-07, "loss": 0.2643, "step": 9426 }, { "epoch": 0.1638651810391281, "grad_norm": 2.5356617056995003, "learning_rate": 9.53750613665716e-07, "loss": 0.8401, "step": 9427 }, { "epoch": 0.16388256357663092, "grad_norm": 1.3875183210367394, "learning_rate": 9.53738788833816e-07, "loss": 0.4986, "step": 9428 }, { "epoch": 0.16389994611413375, "grad_norm": 2.3271292257869214, "learning_rate": 9.537269625637725e-07, "loss": 0.4991, "step": 9429 }, { "epoch": 0.16391732865163658, "grad_norm": 1.796275473902661, "learning_rate": 9.537151348556226e-07, "loss": 0.3759, "step": 9430 }, { "epoch": 0.16393471118913938, "grad_norm": 1.309711294766517, "learning_rate": 9.537033057094046e-07, "loss": 0.306, "step": 9431 }, { "epoch": 0.1639520937266422, "grad_norm": 1.7719406804753306, "learning_rate": 9.536914751251552e-07, "loss": 0.3372, "step": 9432 }, { "epoch": 0.16396947626414504, "grad_norm": 3.3524517586959934, "learning_rate": 9.536796431029124e-07, "loss": 0.4903, "step": 9433 }, { "epoch": 0.16398685880164787, "grad_norm": 1.6929393013366474, "learning_rate": 9.536678096427135e-07, "loss": 0.5733, "step": 9434 }, { "epoch": 0.1640042413391507, "grad_norm": 2.8474556402971216, "learning_rate": 9.53655974744596e-07, "loss": 0.5652, "step": 9435 }, { "epoch": 0.1640216238766535, "grad_norm": 1.5887781690879357, "learning_rate": 9.536441384085976e-07, "loss": 0.2381, "step": 9436 }, { "epoch": 0.16403900641415634, "grad_norm": 3.1198688403770287, "learning_rate": 9.536323006347555e-07, "loss": 0.8459, "step": 9437 }, { "epoch": 0.16405638895165917, "grad_norm": 1.2629484325814984, "learning_rate": 9.536204614231073e-07, "loss": 0.2729, "step": 9438 }, { "epoch": 0.164073771489162, "grad_norm": 2.0885648407370847, "learning_rate": 9.536086207736909e-07, "loss": 0.6234, "step": 9439 }, { "epoch": 0.16409115402666483, "grad_norm": 2.0548604252799403, "learning_rate": 9.535967786865433e-07, "loss": 0.5658, "step": 9440 }, { "epoch": 0.16410853656416763, "grad_norm": 1.544847392109428, "learning_rate": 9.535849351617025e-07, "loss": 0.3686, "step": 9441 }, { "epoch": 0.16412591910167046, "grad_norm": 1.3253480566024396, "learning_rate": 9.535730901992056e-07, "loss": 0.3849, "step": 9442 }, { "epoch": 0.1641433016391733, "grad_norm": 2.699447446466941, "learning_rate": 9.535612437990905e-07, "loss": 0.6769, "step": 9443 }, { "epoch": 0.16416068417667612, "grad_norm": 2.6335398849716687, "learning_rate": 9.535493959613945e-07, "loss": 0.4909, "step": 9444 }, { "epoch": 0.16417806671417895, "grad_norm": 1.93520012341869, "learning_rate": 9.535375466861553e-07, "loss": 0.3877, "step": 9445 }, { "epoch": 0.16419544925168175, "grad_norm": 1.9929512305880102, "learning_rate": 9.535256959734105e-07, "loss": 0.3733, "step": 9446 }, { "epoch": 0.16421283178918458, "grad_norm": 1.2069503853145698, "learning_rate": 9.535138438231975e-07, "loss": 0.6759, "step": 9447 }, { "epoch": 0.1642302143266874, "grad_norm": 1.2398373262100577, "learning_rate": 9.535019902355539e-07, "loss": 0.4153, "step": 9448 }, { "epoch": 0.16424759686419024, "grad_norm": 1.7611538685767067, "learning_rate": 9.534901352105174e-07, "loss": 0.6772, "step": 9449 }, { "epoch": 0.16426497940169305, "grad_norm": 1.7127341956982631, "learning_rate": 9.534782787481255e-07, "loss": 0.5623, "step": 9450 }, { "epoch": 0.16428236193919588, "grad_norm": 2.083560601966707, "learning_rate": 9.534664208484156e-07, "loss": 0.3922, "step": 9451 }, { "epoch": 0.1642997444766987, "grad_norm": 1.9315128357409848, "learning_rate": 9.534545615114256e-07, "loss": 0.4567, "step": 9452 }, { "epoch": 0.16431712701420154, "grad_norm": 1.3496344318437674, "learning_rate": 9.53442700737193e-07, "loss": 0.2777, "step": 9453 }, { "epoch": 0.16433450955170437, "grad_norm": 1.6477165627426966, "learning_rate": 9.534308385257553e-07, "loss": 0.4355, "step": 9454 }, { "epoch": 0.16435189208920717, "grad_norm": 1.2937633400657294, "learning_rate": 9.534189748771499e-07, "loss": 0.4263, "step": 9455 }, { "epoch": 0.16436927462671, "grad_norm": 2.2775341970368785, "learning_rate": 9.53407109791415e-07, "loss": 0.3105, "step": 9456 }, { "epoch": 0.16438665716421283, "grad_norm": 1.6693490180957449, "learning_rate": 9.533952432685874e-07, "loss": 0.8736, "step": 9457 }, { "epoch": 0.16440403970171566, "grad_norm": 1.3935660462085786, "learning_rate": 9.533833753087055e-07, "loss": 0.4041, "step": 9458 }, { "epoch": 0.1644214222392185, "grad_norm": 1.9849531924142056, "learning_rate": 9.533715059118065e-07, "loss": 0.3481, "step": 9459 }, { "epoch": 0.1644388047767213, "grad_norm": 2.1007155544145597, "learning_rate": 9.53359635077928e-07, "loss": 0.4394, "step": 9460 }, { "epoch": 0.16445618731422412, "grad_norm": 2.2184870979632465, "learning_rate": 9.533477628071078e-07, "loss": 1.126, "step": 9461 }, { "epoch": 0.16447356985172695, "grad_norm": 2.024876041478703, "learning_rate": 9.533358890993834e-07, "loss": 0.4712, "step": 9462 }, { "epoch": 0.16449095238922978, "grad_norm": 2.4656402408042815, "learning_rate": 9.533240139547925e-07, "loss": 0.4519, "step": 9463 }, { "epoch": 0.1645083349267326, "grad_norm": 1.5286816050643333, "learning_rate": 9.533121373733727e-07, "loss": 0.6869, "step": 9464 }, { "epoch": 0.16452571746423542, "grad_norm": 1.3497054702093132, "learning_rate": 9.533002593551617e-07, "loss": 0.4485, "step": 9465 }, { "epoch": 0.16454310000173825, "grad_norm": 1.5202785627599804, "learning_rate": 9.53288379900197e-07, "loss": 0.4786, "step": 9466 }, { "epoch": 0.16456048253924108, "grad_norm": 1.685239300183584, "learning_rate": 9.532764990085165e-07, "loss": 0.57, "step": 9467 }, { "epoch": 0.1645778650767439, "grad_norm": 1.9315303205713006, "learning_rate": 9.532646166801576e-07, "loss": 0.4262, "step": 9468 }, { "epoch": 0.16459524761424674, "grad_norm": 1.484549075323708, "learning_rate": 9.532527329151581e-07, "loss": 0.5285, "step": 9469 }, { "epoch": 0.16461263015174954, "grad_norm": 2.477472501722342, "learning_rate": 9.532408477135556e-07, "loss": 0.4864, "step": 9470 }, { "epoch": 0.16463001268925237, "grad_norm": 3.18297966737442, "learning_rate": 9.532289610753879e-07, "loss": 0.6376, "step": 9471 }, { "epoch": 0.1646473952267552, "grad_norm": 1.2575058473842886, "learning_rate": 9.532170730006926e-07, "loss": 0.5621, "step": 9472 }, { "epoch": 0.16466477776425803, "grad_norm": 2.462330543371851, "learning_rate": 9.532051834895074e-07, "loss": 0.5106, "step": 9473 }, { "epoch": 0.16468216030176086, "grad_norm": 1.9341487526220937, "learning_rate": 9.531932925418699e-07, "loss": 0.3677, "step": 9474 }, { "epoch": 0.16469954283926366, "grad_norm": 2.005201907587182, "learning_rate": 9.531814001578178e-07, "loss": 0.4148, "step": 9475 }, { "epoch": 0.1647169253767665, "grad_norm": 1.5397613972607085, "learning_rate": 9.531695063373887e-07, "loss": 0.6369, "step": 9476 }, { "epoch": 0.16473430791426932, "grad_norm": 1.9849930065557184, "learning_rate": 9.531576110806207e-07, "loss": 0.7131, "step": 9477 }, { "epoch": 0.16475169045177215, "grad_norm": 1.981197884357972, "learning_rate": 9.531457143875512e-07, "loss": 0.7549, "step": 9478 }, { "epoch": 0.16476907298927498, "grad_norm": 1.5245390255251907, "learning_rate": 9.531338162582179e-07, "loss": 0.3649, "step": 9479 }, { "epoch": 0.1647864555267778, "grad_norm": 1.417818531314774, "learning_rate": 9.531219166926586e-07, "loss": 0.611, "step": 9480 }, { "epoch": 0.16480383806428062, "grad_norm": 1.4538641749972423, "learning_rate": 9.531100156909108e-07, "loss": 0.5784, "step": 9481 }, { "epoch": 0.16482122060178345, "grad_norm": 1.2293254614755844, "learning_rate": 9.530981132530125e-07, "loss": 0.3962, "step": 9482 }, { "epoch": 0.16483860313928628, "grad_norm": 1.958584474412142, "learning_rate": 9.530862093790013e-07, "loss": 0.3952, "step": 9483 }, { "epoch": 0.1648559856767891, "grad_norm": 3.992018360034213, "learning_rate": 9.530743040689151e-07, "loss": 0.6567, "step": 9484 }, { "epoch": 0.1648733682142919, "grad_norm": 1.5950232671150242, "learning_rate": 9.530623973227912e-07, "loss": 0.7136, "step": 9485 }, { "epoch": 0.16489075075179474, "grad_norm": 2.059877132193039, "learning_rate": 9.530504891406679e-07, "loss": 0.5282, "step": 9486 }, { "epoch": 0.16490813328929757, "grad_norm": 1.7142909078421515, "learning_rate": 9.530385795225825e-07, "loss": 0.5845, "step": 9487 }, { "epoch": 0.1649255158268004, "grad_norm": 1.6405356756096776, "learning_rate": 9.530266684685729e-07, "loss": 0.263, "step": 9488 }, { "epoch": 0.16494289836430323, "grad_norm": 1.101336702431214, "learning_rate": 9.53014755978677e-07, "loss": 0.3548, "step": 9489 }, { "epoch": 0.16496028090180603, "grad_norm": 1.284474333403582, "learning_rate": 9.530028420529322e-07, "loss": 0.2292, "step": 9490 }, { "epoch": 0.16497766343930886, "grad_norm": 1.786427600040467, "learning_rate": 9.529909266913767e-07, "loss": 0.3448, "step": 9491 }, { "epoch": 0.1649950459768117, "grad_norm": 1.9879583382902917, "learning_rate": 9.52979009894048e-07, "loss": 0.4178, "step": 9492 }, { "epoch": 0.16501242851431452, "grad_norm": 1.3175157468410923, "learning_rate": 9.529670916609837e-07, "loss": 0.3505, "step": 9493 }, { "epoch": 0.16502981105181735, "grad_norm": 1.0446115221716346, "learning_rate": 9.529551719922221e-07, "loss": 0.4497, "step": 9494 }, { "epoch": 0.16504719358932016, "grad_norm": 1.9053876368479483, "learning_rate": 9.529432508878006e-07, "loss": 0.5807, "step": 9495 }, { "epoch": 0.165064576126823, "grad_norm": 2.0588144153261236, "learning_rate": 9.529313283477569e-07, "loss": 0.3928, "step": 9496 }, { "epoch": 0.16508195866432582, "grad_norm": 2.2199392593189975, "learning_rate": 9.529194043721291e-07, "loss": 0.6473, "step": 9497 }, { "epoch": 0.16509934120182865, "grad_norm": 1.9067591728085884, "learning_rate": 9.529074789609548e-07, "loss": 0.6589, "step": 9498 }, { "epoch": 0.16511672373933148, "grad_norm": 1.7009692377716958, "learning_rate": 9.528955521142718e-07, "loss": 0.2312, "step": 9499 }, { "epoch": 0.16513410627683428, "grad_norm": 2.510029238039777, "learning_rate": 9.528836238321181e-07, "loss": 0.5117, "step": 9500 }, { "epoch": 0.1651514888143371, "grad_norm": 1.2926439283654432, "learning_rate": 9.528716941145312e-07, "loss": 0.7794, "step": 9501 }, { "epoch": 0.16516887135183994, "grad_norm": 1.5624586416074817, "learning_rate": 9.528597629615491e-07, "loss": 0.6066, "step": 9502 }, { "epoch": 0.16518625388934277, "grad_norm": 1.9320688162792217, "learning_rate": 9.528478303732096e-07, "loss": 0.4799, "step": 9503 }, { "epoch": 0.1652036364268456, "grad_norm": 1.7160743075965372, "learning_rate": 9.528358963495504e-07, "loss": 0.4199, "step": 9504 }, { "epoch": 0.1652210189643484, "grad_norm": 1.8382497006813074, "learning_rate": 9.528239608906096e-07, "loss": 0.7053, "step": 9505 }, { "epoch": 0.16523840150185123, "grad_norm": 1.8789487410656844, "learning_rate": 9.528120239964248e-07, "loss": 0.4, "step": 9506 }, { "epoch": 0.16525578403935406, "grad_norm": 2.249452480333073, "learning_rate": 9.528000856670338e-07, "loss": 0.4653, "step": 9507 }, { "epoch": 0.1652731665768569, "grad_norm": 1.700160552350399, "learning_rate": 9.527881459024744e-07, "loss": 0.7174, "step": 9508 }, { "epoch": 0.16529054911435972, "grad_norm": 4.513700363714648, "learning_rate": 9.527762047027849e-07, "loss": 0.6914, "step": 9509 }, { "epoch": 0.16530793165186253, "grad_norm": 1.1944867279982585, "learning_rate": 9.527642620680026e-07, "loss": 0.4756, "step": 9510 }, { "epoch": 0.16532531418936536, "grad_norm": 1.3501537841316444, "learning_rate": 9.527523179981657e-07, "loss": 0.3618, "step": 9511 }, { "epoch": 0.1653426967268682, "grad_norm": 1.5468087080317496, "learning_rate": 9.527403724933119e-07, "loss": 0.4013, "step": 9512 }, { "epoch": 0.16536007926437102, "grad_norm": 2.4078565502780047, "learning_rate": 9.52728425553479e-07, "loss": 0.5791, "step": 9513 }, { "epoch": 0.16537746180187385, "grad_norm": 3.918479035036536, "learning_rate": 9.527164771787051e-07, "loss": 0.4597, "step": 9514 }, { "epoch": 0.16539484433937665, "grad_norm": 2.1661191216258455, "learning_rate": 9.527045273690277e-07, "loss": 0.4065, "step": 9515 }, { "epoch": 0.16541222687687948, "grad_norm": 2.076656484266257, "learning_rate": 9.52692576124485e-07, "loss": 0.7361, "step": 9516 }, { "epoch": 0.1654296094143823, "grad_norm": 1.7985711257763894, "learning_rate": 9.526806234451149e-07, "loss": 0.319, "step": 9517 }, { "epoch": 0.16544699195188514, "grad_norm": 3.3565019524102206, "learning_rate": 9.526686693309551e-07, "loss": 0.4673, "step": 9518 }, { "epoch": 0.16546437448938797, "grad_norm": 1.9723254183312855, "learning_rate": 9.526567137820436e-07, "loss": 0.4632, "step": 9519 }, { "epoch": 0.16548175702689077, "grad_norm": 1.9012268001590018, "learning_rate": 9.526447567984182e-07, "loss": 0.4572, "step": 9520 }, { "epoch": 0.1654991395643936, "grad_norm": 1.3971080607592183, "learning_rate": 9.526327983801168e-07, "loss": 0.6131, "step": 9521 }, { "epoch": 0.16551652210189643, "grad_norm": 2.320397313819976, "learning_rate": 9.526208385271774e-07, "loss": 0.5029, "step": 9522 }, { "epoch": 0.16553390463939927, "grad_norm": 1.2280764323904299, "learning_rate": 9.526088772396379e-07, "loss": 0.3308, "step": 9523 }, { "epoch": 0.1655512871769021, "grad_norm": 1.5506789805180725, "learning_rate": 9.525969145175361e-07, "loss": 0.7222, "step": 9524 }, { "epoch": 0.1655686697144049, "grad_norm": 2.9618700010301326, "learning_rate": 9.5258495036091e-07, "loss": 0.4703, "step": 9525 }, { "epoch": 0.16558605225190773, "grad_norm": 1.4447003461904149, "learning_rate": 9.525729847697977e-07, "loss": 0.5891, "step": 9526 }, { "epoch": 0.16560343478941056, "grad_norm": 2.32734228092919, "learning_rate": 9.525610177442366e-07, "loss": 0.6371, "step": 9527 }, { "epoch": 0.1656208173269134, "grad_norm": 1.0521590777133796, "learning_rate": 9.525490492842651e-07, "loss": 0.6499, "step": 9528 }, { "epoch": 0.16563819986441622, "grad_norm": 1.910143890861784, "learning_rate": 9.525370793899212e-07, "loss": 0.5457, "step": 9529 }, { "epoch": 0.16565558240191902, "grad_norm": 1.92939305648366, "learning_rate": 9.525251080612424e-07, "loss": 0.5729, "step": 9530 }, { "epoch": 0.16567296493942185, "grad_norm": 1.5577140617789071, "learning_rate": 9.52513135298267e-07, "loss": 0.2869, "step": 9531 }, { "epoch": 0.16569034747692468, "grad_norm": 1.6022706209034885, "learning_rate": 9.525011611010328e-07, "loss": 0.5901, "step": 9532 }, { "epoch": 0.1657077300144275, "grad_norm": 2.361551506606885, "learning_rate": 9.524891854695775e-07, "loss": 0.3155, "step": 9533 }, { "epoch": 0.16572511255193034, "grad_norm": 2.3629545412758457, "learning_rate": 9.524772084039397e-07, "loss": 0.6179, "step": 9534 }, { "epoch": 0.16574249508943315, "grad_norm": 1.4872819194444042, "learning_rate": 9.524652299041569e-07, "loss": 0.5384, "step": 9535 }, { "epoch": 0.16575987762693598, "grad_norm": 2.2898617650292845, "learning_rate": 9.524532499702672e-07, "loss": 0.2676, "step": 9536 }, { "epoch": 0.1657772601644388, "grad_norm": 2.3871092364678614, "learning_rate": 9.524412686023085e-07, "loss": 0.4101, "step": 9537 }, { "epoch": 0.16579464270194164, "grad_norm": 1.790819427980332, "learning_rate": 9.524292858003188e-07, "loss": 0.2826, "step": 9538 }, { "epoch": 0.16581202523944447, "grad_norm": 1.4712157428757506, "learning_rate": 9.524173015643361e-07, "loss": 0.5126, "step": 9539 }, { "epoch": 0.16582940777694727, "grad_norm": 1.6034825683935776, "learning_rate": 9.524053158943983e-07, "loss": 0.38, "step": 9540 }, { "epoch": 0.1658467903144501, "grad_norm": 2.022031219333591, "learning_rate": 9.523933287905437e-07, "loss": 0.437, "step": 9541 }, { "epoch": 0.16586417285195293, "grad_norm": 1.5169694210332518, "learning_rate": 9.523813402528098e-07, "loss": 0.393, "step": 9542 }, { "epoch": 0.16588155538945576, "grad_norm": 1.9198980739640579, "learning_rate": 9.52369350281235e-07, "loss": 0.483, "step": 9543 }, { "epoch": 0.1658989379269586, "grad_norm": 2.4468791013937334, "learning_rate": 9.523573588758571e-07, "loss": 0.454, "step": 9544 }, { "epoch": 0.1659163204644614, "grad_norm": 1.6564236252454134, "learning_rate": 9.523453660367142e-07, "loss": 0.5859, "step": 9545 }, { "epoch": 0.16593370300196422, "grad_norm": 2.585999646274276, "learning_rate": 9.523333717638443e-07, "loss": 0.7036, "step": 9546 }, { "epoch": 0.16595108553946705, "grad_norm": 3.0111681730463693, "learning_rate": 9.523213760572854e-07, "loss": 0.5117, "step": 9547 }, { "epoch": 0.16596846807696988, "grad_norm": 2.1511637340028806, "learning_rate": 9.523093789170754e-07, "loss": 0.5614, "step": 9548 }, { "epoch": 0.1659858506144727, "grad_norm": 2.0902111412233815, "learning_rate": 9.522973803432526e-07, "loss": 0.3153, "step": 9549 }, { "epoch": 0.16600323315197552, "grad_norm": 1.43168862632902, "learning_rate": 9.522853803358548e-07, "loss": 0.5304, "step": 9550 }, { "epoch": 0.16602061568947835, "grad_norm": 3.1057630202262945, "learning_rate": 9.5227337889492e-07, "loss": 0.4362, "step": 9551 }, { "epoch": 0.16603799822698118, "grad_norm": 1.4755377039289115, "learning_rate": 9.522613760204865e-07, "loss": 0.256, "step": 9552 }, { "epoch": 0.166055380764484, "grad_norm": 2.342866767238599, "learning_rate": 9.522493717125921e-07, "loss": 0.55, "step": 9553 }, { "epoch": 0.16607276330198684, "grad_norm": 2.080085786197767, "learning_rate": 9.52237365971275e-07, "loss": 0.6244, "step": 9554 }, { "epoch": 0.16609014583948964, "grad_norm": 2.0351125961841205, "learning_rate": 9.522253587965731e-07, "loss": 0.2657, "step": 9555 }, { "epoch": 0.16610752837699247, "grad_norm": 2.218877470056902, "learning_rate": 9.522133501885248e-07, "loss": 0.6202, "step": 9556 }, { "epoch": 0.1661249109144953, "grad_norm": 2.052345343961739, "learning_rate": 9.522013401471677e-07, "loss": 0.6356, "step": 9557 }, { "epoch": 0.16614229345199813, "grad_norm": 2.005182898905183, "learning_rate": 9.521893286725401e-07, "loss": 0.4753, "step": 9558 }, { "epoch": 0.16615967598950096, "grad_norm": 1.2813052499147788, "learning_rate": 9.5217731576468e-07, "loss": 0.4042, "step": 9559 }, { "epoch": 0.16617705852700376, "grad_norm": 3.551722989157993, "learning_rate": 9.521653014236256e-07, "loss": 0.4977, "step": 9560 }, { "epoch": 0.1661944410645066, "grad_norm": 1.991872151293697, "learning_rate": 9.521532856494149e-07, "loss": 0.314, "step": 9561 }, { "epoch": 0.16621182360200942, "grad_norm": 1.155431206238009, "learning_rate": 9.52141268442086e-07, "loss": 0.1926, "step": 9562 }, { "epoch": 0.16622920613951225, "grad_norm": 1.2310118067179683, "learning_rate": 9.521292498016768e-07, "loss": 0.2995, "step": 9563 }, { "epoch": 0.16624658867701508, "grad_norm": 4.350596277359897, "learning_rate": 9.521172297282259e-07, "loss": 0.7212, "step": 9564 }, { "epoch": 0.16626397121451789, "grad_norm": 1.8797929269238025, "learning_rate": 9.521052082217709e-07, "loss": 0.3357, "step": 9565 }, { "epoch": 0.16628135375202072, "grad_norm": 1.6641955447307233, "learning_rate": 9.5209318528235e-07, "loss": 0.4891, "step": 9566 }, { "epoch": 0.16629873628952355, "grad_norm": 1.7297907069265435, "learning_rate": 9.520811609100015e-07, "loss": 0.4193, "step": 9567 }, { "epoch": 0.16631611882702638, "grad_norm": 1.8559137064443652, "learning_rate": 9.520691351047632e-07, "loss": 0.5021, "step": 9568 }, { "epoch": 0.1663335013645292, "grad_norm": 1.7858015039605286, "learning_rate": 9.520571078666735e-07, "loss": 0.4334, "step": 9569 }, { "epoch": 0.166350883902032, "grad_norm": 2.1615091012085985, "learning_rate": 9.520450791957705e-07, "loss": 0.6497, "step": 9570 }, { "epoch": 0.16636826643953484, "grad_norm": 2.1003088416304547, "learning_rate": 9.520330490920923e-07, "loss": 0.4384, "step": 9571 }, { "epoch": 0.16638564897703767, "grad_norm": 1.5614773455117912, "learning_rate": 9.520210175556768e-07, "loss": 0.3715, "step": 9572 }, { "epoch": 0.1664030315145405, "grad_norm": 1.7949279703294405, "learning_rate": 9.520089845865624e-07, "loss": 0.3933, "step": 9573 }, { "epoch": 0.16642041405204333, "grad_norm": 2.051568185481241, "learning_rate": 9.519969501847871e-07, "loss": 0.3082, "step": 9574 }, { "epoch": 0.16643779658954613, "grad_norm": 1.45313038460851, "learning_rate": 9.519849143503891e-07, "loss": 0.5867, "step": 9575 }, { "epoch": 0.16645517912704896, "grad_norm": 1.4510254491644416, "learning_rate": 9.519728770834065e-07, "loss": 0.4731, "step": 9576 }, { "epoch": 0.1664725616645518, "grad_norm": 1.65425668881504, "learning_rate": 9.519608383838777e-07, "loss": 0.3893, "step": 9577 }, { "epoch": 0.16648994420205462, "grad_norm": 1.7715536663499203, "learning_rate": 9.519487982518406e-07, "loss": 0.341, "step": 9578 }, { "epoch": 0.16650732673955745, "grad_norm": 1.9982719077256421, "learning_rate": 9.519367566873333e-07, "loss": 0.428, "step": 9579 }, { "epoch": 0.16652470927706026, "grad_norm": 6.476931956094526, "learning_rate": 9.519247136903943e-07, "loss": 0.8392, "step": 9580 }, { "epoch": 0.1665420918145631, "grad_norm": 1.8649654230476997, "learning_rate": 9.519126692610613e-07, "loss": 0.6485, "step": 9581 }, { "epoch": 0.16655947435206592, "grad_norm": 1.392161746708654, "learning_rate": 9.519006233993729e-07, "loss": 0.5719, "step": 9582 }, { "epoch": 0.16657685688956875, "grad_norm": 1.9105722906668923, "learning_rate": 9.51888576105367e-07, "loss": 0.6494, "step": 9583 }, { "epoch": 0.16659423942707158, "grad_norm": 2.8177036155550246, "learning_rate": 9.518765273790819e-07, "loss": 0.5534, "step": 9584 }, { "epoch": 0.16661162196457438, "grad_norm": 2.071686337115436, "learning_rate": 9.518644772205559e-07, "loss": 0.7098, "step": 9585 }, { "epoch": 0.1666290045020772, "grad_norm": 1.6129294840715007, "learning_rate": 9.518524256298271e-07, "loss": 0.3478, "step": 9586 }, { "epoch": 0.16664638703958004, "grad_norm": 1.762084483991597, "learning_rate": 9.518403726069335e-07, "loss": 0.3661, "step": 9587 }, { "epoch": 0.16666376957708287, "grad_norm": 1.9477330245217501, "learning_rate": 9.518283181519135e-07, "loss": 0.4498, "step": 9588 }, { "epoch": 0.16668115211458567, "grad_norm": 1.4975621442637572, "learning_rate": 9.518162622648054e-07, "loss": 0.4077, "step": 9589 }, { "epoch": 0.1666985346520885, "grad_norm": 2.0809330087346547, "learning_rate": 9.518042049456472e-07, "loss": 0.4738, "step": 9590 }, { "epoch": 0.16671591718959133, "grad_norm": 1.482642053445926, "learning_rate": 9.517921461944772e-07, "loss": 0.5125, "step": 9591 }, { "epoch": 0.16673329972709416, "grad_norm": 1.7373313268996233, "learning_rate": 9.517800860113336e-07, "loss": 0.5588, "step": 9592 }, { "epoch": 0.166750682264597, "grad_norm": 2.5610177473110243, "learning_rate": 9.517680243962549e-07, "loss": 0.4244, "step": 9593 }, { "epoch": 0.1667680648020998, "grad_norm": 2.5409199475776334, "learning_rate": 9.517559613492789e-07, "loss": 0.8348, "step": 9594 }, { "epoch": 0.16678544733960263, "grad_norm": 2.797359093336216, "learning_rate": 9.51743896870444e-07, "loss": 0.3126, "step": 9595 }, { "epoch": 0.16680282987710546, "grad_norm": 1.9413000411719787, "learning_rate": 9.517318309597886e-07, "loss": 0.4534, "step": 9596 }, { "epoch": 0.1668202124146083, "grad_norm": 3.2845562840126696, "learning_rate": 9.517197636173506e-07, "loss": 0.4336, "step": 9597 }, { "epoch": 0.16683759495211112, "grad_norm": 2.0048676296532477, "learning_rate": 9.517076948431686e-07, "loss": 0.6579, "step": 9598 }, { "epoch": 0.16685497748961392, "grad_norm": 1.2830470972444115, "learning_rate": 9.516956246372807e-07, "loss": 0.3662, "step": 9599 }, { "epoch": 0.16687236002711675, "grad_norm": 9.445372638104702, "learning_rate": 9.516835529997251e-07, "loss": 0.8816, "step": 9600 }, { "epoch": 0.16688974256461958, "grad_norm": 2.366534085384287, "learning_rate": 9.516714799305401e-07, "loss": 0.6759, "step": 9601 }, { "epoch": 0.1669071251021224, "grad_norm": 2.085520800845092, "learning_rate": 9.516594054297641e-07, "loss": 0.5371, "step": 9602 }, { "epoch": 0.16692450763962524, "grad_norm": 2.047838017981912, "learning_rate": 9.516473294974352e-07, "loss": 0.5091, "step": 9603 }, { "epoch": 0.16694189017712804, "grad_norm": 2.021507494728905, "learning_rate": 9.516352521335917e-07, "loss": 0.395, "step": 9604 }, { "epoch": 0.16695927271463087, "grad_norm": 1.8551900731395536, "learning_rate": 9.51623173338272e-07, "loss": 0.3766, "step": 9605 }, { "epoch": 0.1669766552521337, "grad_norm": 1.6708506429889003, "learning_rate": 9.516110931115142e-07, "loss": 0.8235, "step": 9606 }, { "epoch": 0.16699403778963653, "grad_norm": 2.2868683024179943, "learning_rate": 9.515990114533567e-07, "loss": 0.4412, "step": 9607 }, { "epoch": 0.16701142032713936, "grad_norm": 1.5737481964985867, "learning_rate": 9.51586928363838e-07, "loss": 0.4124, "step": 9608 }, { "epoch": 0.16702880286464217, "grad_norm": 3.366385177474449, "learning_rate": 9.515748438429959e-07, "loss": 0.3805, "step": 9609 }, { "epoch": 0.167046185402145, "grad_norm": 1.0894852825038928, "learning_rate": 9.515627578908692e-07, "loss": 0.5448, "step": 9610 }, { "epoch": 0.16706356793964783, "grad_norm": 1.9802449569949334, "learning_rate": 9.515506705074958e-07, "loss": 0.3709, "step": 9611 }, { "epoch": 0.16708095047715066, "grad_norm": 3.994575471803721, "learning_rate": 9.515385816929142e-07, "loss": 0.548, "step": 9612 }, { "epoch": 0.1670983330146535, "grad_norm": 2.3852318470670077, "learning_rate": 9.515264914471627e-07, "loss": 0.5021, "step": 9613 }, { "epoch": 0.1671157155521563, "grad_norm": 1.6497314702290649, "learning_rate": 9.5151439977028e-07, "loss": 0.4133, "step": 9614 }, { "epoch": 0.16713309808965912, "grad_norm": 1.6603440301373025, "learning_rate": 9.515023066623037e-07, "loss": 0.7375, "step": 9615 }, { "epoch": 0.16715048062716195, "grad_norm": 1.475548071434917, "learning_rate": 9.514902121232725e-07, "loss": 0.7297, "step": 9616 }, { "epoch": 0.16716786316466478, "grad_norm": 2.319908234925603, "learning_rate": 9.514781161532248e-07, "loss": 0.5005, "step": 9617 }, { "epoch": 0.1671852457021676, "grad_norm": 4.145283020777981, "learning_rate": 9.514660187521989e-07, "loss": 0.6671, "step": 9618 }, { "epoch": 0.16720262823967041, "grad_norm": 1.6915647178837776, "learning_rate": 9.514539199202332e-07, "loss": 0.6053, "step": 9619 }, { "epoch": 0.16722001077717324, "grad_norm": 2.0545907370365937, "learning_rate": 9.514418196573657e-07, "loss": 0.4995, "step": 9620 }, { "epoch": 0.16723739331467607, "grad_norm": 2.039656223847797, "learning_rate": 9.514297179636351e-07, "loss": 0.4019, "step": 9621 }, { "epoch": 0.1672547758521789, "grad_norm": 2.3150206388876873, "learning_rate": 9.514176148390798e-07, "loss": 0.9798, "step": 9622 }, { "epoch": 0.16727215838968174, "grad_norm": 1.7230078857867575, "learning_rate": 9.514055102837379e-07, "loss": 0.4469, "step": 9623 }, { "epoch": 0.16728954092718454, "grad_norm": 1.808340215737647, "learning_rate": 9.513934042976479e-07, "loss": 0.4742, "step": 9624 }, { "epoch": 0.16730692346468737, "grad_norm": 2.566801289459812, "learning_rate": 9.513812968808481e-07, "loss": 0.4952, "step": 9625 }, { "epoch": 0.1673243060021902, "grad_norm": 2.4506816698837235, "learning_rate": 9.513691880333772e-07, "loss": 0.4703, "step": 9626 }, { "epoch": 0.16734168853969303, "grad_norm": 1.24856906471456, "learning_rate": 9.513570777552731e-07, "loss": 0.231, "step": 9627 }, { "epoch": 0.16735907107719586, "grad_norm": 1.197961248014004, "learning_rate": 9.513449660465745e-07, "loss": 0.4261, "step": 9628 }, { "epoch": 0.16737645361469866, "grad_norm": 2.3923285052129173, "learning_rate": 9.513328529073194e-07, "loss": 0.5727, "step": 9629 }, { "epoch": 0.1673938361522015, "grad_norm": 2.3419292931625493, "learning_rate": 9.513207383375468e-07, "loss": 0.2343, "step": 9630 }, { "epoch": 0.16741121868970432, "grad_norm": 2.967344338709787, "learning_rate": 9.513086223372946e-07, "loss": 0.4113, "step": 9631 }, { "epoch": 0.16742860122720715, "grad_norm": 1.2720416664914533, "learning_rate": 9.512965049066015e-07, "loss": 0.3356, "step": 9632 }, { "epoch": 0.16744598376470998, "grad_norm": 2.315095339703055, "learning_rate": 9.512843860455058e-07, "loss": 0.6263, "step": 9633 }, { "epoch": 0.16746336630221278, "grad_norm": 1.7245973456410006, "learning_rate": 9.512722657540459e-07, "loss": 0.4356, "step": 9634 }, { "epoch": 0.16748074883971562, "grad_norm": 2.4773281507030056, "learning_rate": 9.5126014403226e-07, "loss": 0.3514, "step": 9635 }, { "epoch": 0.16749813137721845, "grad_norm": 1.5126203718781293, "learning_rate": 9.51248020880187e-07, "loss": 0.5705, "step": 9636 }, { "epoch": 0.16751551391472128, "grad_norm": 1.825023892606321, "learning_rate": 9.512358962978649e-07, "loss": 0.6391, "step": 9637 }, { "epoch": 0.1675328964522241, "grad_norm": 2.777491767246333, "learning_rate": 9.512237702853324e-07, "loss": 0.4437, "step": 9638 }, { "epoch": 0.1675502789897269, "grad_norm": 2.086600141503181, "learning_rate": 9.512116428426278e-07, "loss": 0.4189, "step": 9639 }, { "epoch": 0.16756766152722974, "grad_norm": 1.2744800180006683, "learning_rate": 9.511995139697894e-07, "loss": 0.3451, "step": 9640 }, { "epoch": 0.16758504406473257, "grad_norm": 1.559445018640298, "learning_rate": 9.51187383666856e-07, "loss": 0.5465, "step": 9641 }, { "epoch": 0.1676024266022354, "grad_norm": 1.411048026655113, "learning_rate": 9.511752519338658e-07, "loss": 0.8632, "step": 9642 }, { "epoch": 0.16761980913973823, "grad_norm": 3.2338069717605933, "learning_rate": 9.511631187708573e-07, "loss": 0.5595, "step": 9643 }, { "epoch": 0.16763719167724103, "grad_norm": 1.8766038595393155, "learning_rate": 9.511509841778689e-07, "loss": 0.3294, "step": 9644 }, { "epoch": 0.16765457421474386, "grad_norm": 3.1131033085586752, "learning_rate": 9.511388481549392e-07, "loss": 0.351, "step": 9645 }, { "epoch": 0.1676719567522467, "grad_norm": 1.5088393837532732, "learning_rate": 9.511267107021066e-07, "loss": 0.3741, "step": 9646 }, { "epoch": 0.16768933928974952, "grad_norm": 2.393996607168866, "learning_rate": 9.511145718194096e-07, "loss": 0.382, "step": 9647 }, { "epoch": 0.16770672182725235, "grad_norm": 1.8564322276527658, "learning_rate": 9.511024315068866e-07, "loss": 0.3979, "step": 9648 }, { "epoch": 0.16772410436475516, "grad_norm": 1.2506590740547594, "learning_rate": 9.510902897645761e-07, "loss": 0.3734, "step": 9649 }, { "epoch": 0.16774148690225799, "grad_norm": 1.9298011195316902, "learning_rate": 9.510781465925164e-07, "loss": 0.3151, "step": 9650 }, { "epoch": 0.16775886943976082, "grad_norm": 1.6963889205916123, "learning_rate": 9.510660019907464e-07, "loss": 0.4351, "step": 9651 }, { "epoch": 0.16777625197726365, "grad_norm": 2.0722897271016234, "learning_rate": 9.510538559593043e-07, "loss": 0.6112, "step": 9652 }, { "epoch": 0.16779363451476648, "grad_norm": 2.4814223229094376, "learning_rate": 9.510417084982288e-07, "loss": 0.6899, "step": 9653 }, { "epoch": 0.16781101705226928, "grad_norm": 1.6999102962741868, "learning_rate": 9.510295596075582e-07, "loss": 0.2605, "step": 9654 }, { "epoch": 0.1678283995897721, "grad_norm": 1.7098047858574024, "learning_rate": 9.510174092873309e-07, "loss": 0.2565, "step": 9655 }, { "epoch": 0.16784578212727494, "grad_norm": 1.128325459013548, "learning_rate": 9.510052575375858e-07, "loss": 0.3952, "step": 9656 }, { "epoch": 0.16786316466477777, "grad_norm": 1.4517736541369959, "learning_rate": 9.509931043583612e-07, "loss": 0.3551, "step": 9657 }, { "epoch": 0.1678805472022806, "grad_norm": 2.0711161914013156, "learning_rate": 9.509809497496955e-07, "loss": 0.4681, "step": 9658 }, { "epoch": 0.1678979297397834, "grad_norm": 2.6267027120877375, "learning_rate": 9.509687937116275e-07, "loss": 0.7561, "step": 9659 }, { "epoch": 0.16791531227728623, "grad_norm": 2.118831173624901, "learning_rate": 9.509566362441954e-07, "loss": 0.7799, "step": 9660 }, { "epoch": 0.16793269481478906, "grad_norm": 1.6356227160730346, "learning_rate": 9.509444773474383e-07, "loss": 0.4547, "step": 9661 }, { "epoch": 0.1679500773522919, "grad_norm": 3.0538993122703326, "learning_rate": 9.509323170213938e-07, "loss": 0.6217, "step": 9662 }, { "epoch": 0.16796745988979472, "grad_norm": 1.606418266097833, "learning_rate": 9.509201552661015e-07, "loss": 0.4618, "step": 9663 }, { "epoch": 0.16798484242729753, "grad_norm": 2.454618129303504, "learning_rate": 9.509079920815991e-07, "loss": 0.4751, "step": 9664 }, { "epoch": 0.16800222496480036, "grad_norm": 2.043201582517828, "learning_rate": 9.508958274679258e-07, "loss": 0.4514, "step": 9665 }, { "epoch": 0.1680196075023032, "grad_norm": 2.264353235964773, "learning_rate": 9.508836614251196e-07, "loss": 0.3618, "step": 9666 }, { "epoch": 0.16803699003980602, "grad_norm": 1.493836272841173, "learning_rate": 9.508714939532193e-07, "loss": 0.6527, "step": 9667 }, { "epoch": 0.16805437257730885, "grad_norm": 2.295410699157964, "learning_rate": 9.508593250522636e-07, "loss": 0.5476, "step": 9668 }, { "epoch": 0.16807175511481165, "grad_norm": 2.140711965324606, "learning_rate": 9.50847154722291e-07, "loss": 0.4092, "step": 9669 }, { "epoch": 0.16808913765231448, "grad_norm": 2.33174075290411, "learning_rate": 9.508349829633399e-07, "loss": 0.377, "step": 9670 }, { "epoch": 0.1681065201898173, "grad_norm": 1.9875261432197326, "learning_rate": 9.508228097754489e-07, "loss": 0.4001, "step": 9671 }, { "epoch": 0.16812390272732014, "grad_norm": 1.8241141006379737, "learning_rate": 9.508106351586569e-07, "loss": 0.6561, "step": 9672 }, { "epoch": 0.16814128526482297, "grad_norm": 1.2051991925613799, "learning_rate": 9.507984591130021e-07, "loss": 0.3342, "step": 9673 }, { "epoch": 0.16815866780232577, "grad_norm": 1.9786347191925302, "learning_rate": 9.507862816385235e-07, "loss": 0.4187, "step": 9674 }, { "epoch": 0.1681760503398286, "grad_norm": 1.9054179944692344, "learning_rate": 9.507741027352593e-07, "loss": 0.3057, "step": 9675 }, { "epoch": 0.16819343287733143, "grad_norm": 2.2398325823992753, "learning_rate": 9.507619224032483e-07, "loss": 0.6933, "step": 9676 }, { "epoch": 0.16821081541483426, "grad_norm": 1.8764762855110824, "learning_rate": 9.50749740642529e-07, "loss": 0.4212, "step": 9677 }, { "epoch": 0.1682281979523371, "grad_norm": 1.1280774269487184, "learning_rate": 9.507375574531401e-07, "loss": 0.5396, "step": 9678 }, { "epoch": 0.1682455804898399, "grad_norm": 1.6012569959325946, "learning_rate": 9.507253728351204e-07, "loss": 0.5295, "step": 9679 }, { "epoch": 0.16826296302734273, "grad_norm": 1.6753161698249794, "learning_rate": 9.50713186788508e-07, "loss": 0.3667, "step": 9680 }, { "epoch": 0.16828034556484556, "grad_norm": 1.5822837925697224, "learning_rate": 9.507009993133419e-07, "loss": 0.3214, "step": 9681 }, { "epoch": 0.1682977281023484, "grad_norm": 1.775177486396198, "learning_rate": 9.506888104096608e-07, "loss": 0.4917, "step": 9682 }, { "epoch": 0.16831511063985122, "grad_norm": 1.4541690496450825, "learning_rate": 9.506766200775032e-07, "loss": 0.287, "step": 9683 }, { "epoch": 0.16833249317735402, "grad_norm": 1.846208493845005, "learning_rate": 9.506644283169076e-07, "loss": 0.4222, "step": 9684 }, { "epoch": 0.16834987571485685, "grad_norm": 2.112178936649776, "learning_rate": 9.506522351279129e-07, "loss": 0.5257, "step": 9685 }, { "epoch": 0.16836725825235968, "grad_norm": 2.3282572806464956, "learning_rate": 9.506400405105577e-07, "loss": 0.3286, "step": 9686 }, { "epoch": 0.1683846407898625, "grad_norm": 4.013327187318002, "learning_rate": 9.506278444648803e-07, "loss": 0.736, "step": 9687 }, { "epoch": 0.16840202332736534, "grad_norm": 2.176126014505586, "learning_rate": 9.506156469909199e-07, "loss": 0.3438, "step": 9688 }, { "epoch": 0.16841940586486814, "grad_norm": 1.4090504126612853, "learning_rate": 9.506034480887148e-07, "loss": 0.3392, "step": 9689 }, { "epoch": 0.16843678840237097, "grad_norm": 3.003052447072699, "learning_rate": 9.505912477583038e-07, "loss": 0.4465, "step": 9690 }, { "epoch": 0.1684541709398738, "grad_norm": 2.1297772288954415, "learning_rate": 9.505790459997254e-07, "loss": 0.4592, "step": 9691 }, { "epoch": 0.16847155347737663, "grad_norm": 1.7719862389822205, "learning_rate": 9.505668428130185e-07, "loss": 0.4321, "step": 9692 }, { "epoch": 0.16848893601487946, "grad_norm": 2.9411213974274006, "learning_rate": 9.505546381982216e-07, "loss": 0.6386, "step": 9693 }, { "epoch": 0.16850631855238227, "grad_norm": 2.9193131458884105, "learning_rate": 9.505424321553737e-07, "loss": 0.5997, "step": 9694 }, { "epoch": 0.1685237010898851, "grad_norm": 2.669738080555068, "learning_rate": 9.50530224684513e-07, "loss": 0.3812, "step": 9695 }, { "epoch": 0.16854108362738793, "grad_norm": 1.402739953822, "learning_rate": 9.505180157856785e-07, "loss": 0.615, "step": 9696 }, { "epoch": 0.16855846616489076, "grad_norm": 2.3827050563710577, "learning_rate": 9.50505805458909e-07, "loss": 0.5424, "step": 9697 }, { "epoch": 0.1685758487023936, "grad_norm": 1.7334778052215485, "learning_rate": 9.504935937042429e-07, "loss": 0.5029, "step": 9698 }, { "epoch": 0.1685932312398964, "grad_norm": 1.5748429931964059, "learning_rate": 9.50481380521719e-07, "loss": 0.2877, "step": 9699 }, { "epoch": 0.16861061377739922, "grad_norm": 1.7724217183143351, "learning_rate": 9.504691659113761e-07, "loss": 0.6162, "step": 9700 }, { "epoch": 0.16862799631490205, "grad_norm": 2.877153750095747, "learning_rate": 9.504569498732529e-07, "loss": 0.3857, "step": 9701 }, { "epoch": 0.16864537885240488, "grad_norm": 2.4774565983234846, "learning_rate": 9.504447324073881e-07, "loss": 0.3394, "step": 9702 }, { "epoch": 0.1686627613899077, "grad_norm": 1.709199025413874, "learning_rate": 9.504325135138205e-07, "loss": 0.5576, "step": 9703 }, { "epoch": 0.16868014392741051, "grad_norm": 4.38940191571511, "learning_rate": 9.504202931925887e-07, "loss": 0.6612, "step": 9704 }, { "epoch": 0.16869752646491334, "grad_norm": 1.5530449886143025, "learning_rate": 9.504080714437312e-07, "loss": 0.3475, "step": 9705 }, { "epoch": 0.16871490900241617, "grad_norm": 2.369524262200599, "learning_rate": 9.503958482672874e-07, "loss": 0.2147, "step": 9706 }, { "epoch": 0.168732291539919, "grad_norm": 1.6547757392874227, "learning_rate": 9.503836236632954e-07, "loss": 0.4453, "step": 9707 }, { "epoch": 0.16874967407742184, "grad_norm": 3.9940564250312964, "learning_rate": 9.503713976317944e-07, "loss": 0.6949, "step": 9708 }, { "epoch": 0.16876705661492464, "grad_norm": 1.9018694656285364, "learning_rate": 9.503591701728228e-07, "loss": 0.6388, "step": 9709 }, { "epoch": 0.16878443915242747, "grad_norm": 1.4724295587324479, "learning_rate": 9.503469412864196e-07, "loss": 0.4927, "step": 9710 }, { "epoch": 0.1688018216899303, "grad_norm": 1.9797275677201296, "learning_rate": 9.503347109726234e-07, "loss": 0.7115, "step": 9711 }, { "epoch": 0.16881920422743313, "grad_norm": 1.4087029732114091, "learning_rate": 9.50322479231473e-07, "loss": 0.5137, "step": 9712 }, { "epoch": 0.16883658676493596, "grad_norm": 1.8659683400819687, "learning_rate": 9.503102460630072e-07, "loss": 0.3419, "step": 9713 }, { "epoch": 0.16885396930243876, "grad_norm": 2.0159598496506668, "learning_rate": 9.50298011467265e-07, "loss": 0.3328, "step": 9714 }, { "epoch": 0.1688713518399416, "grad_norm": 2.3730904911000086, "learning_rate": 9.502857754442848e-07, "loss": 0.3828, "step": 9715 }, { "epoch": 0.16888873437744442, "grad_norm": 1.409584036436804, "learning_rate": 9.502735379941055e-07, "loss": 0.5872, "step": 9716 }, { "epoch": 0.16890611691494725, "grad_norm": 1.7733390835960121, "learning_rate": 9.502612991167659e-07, "loss": 0.2097, "step": 9717 }, { "epoch": 0.16892349945245008, "grad_norm": 1.773980959027116, "learning_rate": 9.502490588123049e-07, "loss": 0.4152, "step": 9718 }, { "epoch": 0.16894088198995288, "grad_norm": 1.4043936703119413, "learning_rate": 9.502368170807611e-07, "loss": 0.7186, "step": 9719 }, { "epoch": 0.16895826452745571, "grad_norm": 1.633605291789046, "learning_rate": 9.502245739221735e-07, "loss": 0.4644, "step": 9720 }, { "epoch": 0.16897564706495855, "grad_norm": 1.7562404424645117, "learning_rate": 9.502123293365809e-07, "loss": 0.6064, "step": 9721 }, { "epoch": 0.16899302960246138, "grad_norm": 1.748745289319792, "learning_rate": 9.502000833240219e-07, "loss": 0.5163, "step": 9722 }, { "epoch": 0.1690104121399642, "grad_norm": 1.9063376683902717, "learning_rate": 9.501878358845355e-07, "loss": 0.3573, "step": 9723 }, { "epoch": 0.169027794677467, "grad_norm": 2.6180987036741614, "learning_rate": 9.501755870181605e-07, "loss": 0.6524, "step": 9724 }, { "epoch": 0.16904517721496984, "grad_norm": 2.246829047946679, "learning_rate": 9.501633367249355e-07, "loss": 0.7274, "step": 9725 }, { "epoch": 0.16906255975247267, "grad_norm": 2.491807586167749, "learning_rate": 9.501510850048997e-07, "loss": 0.8389, "step": 9726 }, { "epoch": 0.1690799422899755, "grad_norm": 2.4856003144435563, "learning_rate": 9.501388318580917e-07, "loss": 0.6846, "step": 9727 }, { "epoch": 0.16909732482747833, "grad_norm": 2.0167324867467844, "learning_rate": 9.501265772845502e-07, "loss": 0.4505, "step": 9728 }, { "epoch": 0.16911470736498113, "grad_norm": 4.292723570351875, "learning_rate": 9.501143212843144e-07, "loss": 0.6924, "step": 9729 }, { "epoch": 0.16913208990248396, "grad_norm": 2.7171444041800115, "learning_rate": 9.50102063857423e-07, "loss": 0.7147, "step": 9730 }, { "epoch": 0.1691494724399868, "grad_norm": 1.4853257632054728, "learning_rate": 9.500898050039148e-07, "loss": 0.5278, "step": 9731 }, { "epoch": 0.16916685497748962, "grad_norm": 1.685326340339448, "learning_rate": 9.500775447238285e-07, "loss": 0.3413, "step": 9732 }, { "epoch": 0.16918423751499242, "grad_norm": 2.4583038104142347, "learning_rate": 9.500652830172033e-07, "loss": 0.3359, "step": 9733 }, { "epoch": 0.16920162005249526, "grad_norm": 4.2717962268649945, "learning_rate": 9.500530198840779e-07, "loss": 0.536, "step": 9734 }, { "epoch": 0.16921900258999809, "grad_norm": 1.8850088767438593, "learning_rate": 9.500407553244912e-07, "loss": 0.6068, "step": 9735 }, { "epoch": 0.16923638512750092, "grad_norm": 1.9841739127123326, "learning_rate": 9.500284893384818e-07, "loss": 0.3761, "step": 9736 }, { "epoch": 0.16925376766500375, "grad_norm": 2.333515419402153, "learning_rate": 9.50016221926089e-07, "loss": 0.3472, "step": 9737 }, { "epoch": 0.16927115020250655, "grad_norm": 2.4158466019827336, "learning_rate": 9.500039530873515e-07, "loss": 0.3611, "step": 9738 }, { "epoch": 0.16928853274000938, "grad_norm": 3.4692541873512557, "learning_rate": 9.499916828223082e-07, "loss": 0.4658, "step": 9739 }, { "epoch": 0.1693059152775122, "grad_norm": 1.9827421591373027, "learning_rate": 9.499794111309978e-07, "loss": 0.298, "step": 9740 }, { "epoch": 0.16932329781501504, "grad_norm": 1.9341188999906729, "learning_rate": 9.499671380134595e-07, "loss": 0.3706, "step": 9741 }, { "epoch": 0.16934068035251787, "grad_norm": 1.7442405946731592, "learning_rate": 9.49954863469732e-07, "loss": 0.5682, "step": 9742 }, { "epoch": 0.16935806289002067, "grad_norm": 2.079216018303799, "learning_rate": 9.499425874998546e-07, "loss": 0.4003, "step": 9743 }, { "epoch": 0.1693754454275235, "grad_norm": 1.8269768744417767, "learning_rate": 9.499303101038656e-07, "loss": 0.4854, "step": 9744 }, { "epoch": 0.16939282796502633, "grad_norm": 1.9320437304245976, "learning_rate": 9.499180312818041e-07, "loss": 0.3878, "step": 9745 }, { "epoch": 0.16941021050252916, "grad_norm": 2.076681275609171, "learning_rate": 9.499057510337093e-07, "loss": 0.4104, "step": 9746 }, { "epoch": 0.169427593040032, "grad_norm": 7.32828227503941, "learning_rate": 9.498934693596199e-07, "loss": 0.7056, "step": 9747 }, { "epoch": 0.1694449755775348, "grad_norm": 7.472889079654136, "learning_rate": 9.498811862595748e-07, "loss": 0.4978, "step": 9748 }, { "epoch": 0.16946235811503763, "grad_norm": 1.324187630061711, "learning_rate": 9.49868901733613e-07, "loss": 0.2866, "step": 9749 }, { "epoch": 0.16947974065254046, "grad_norm": 2.7512770032712366, "learning_rate": 9.498566157817736e-07, "loss": 0.6156, "step": 9750 }, { "epoch": 0.1694971231900433, "grad_norm": 1.3805168994851096, "learning_rate": 9.498443284040952e-07, "loss": 0.3305, "step": 9751 }, { "epoch": 0.16951450572754612, "grad_norm": 1.4922671972733392, "learning_rate": 9.498320396006169e-07, "loss": 0.4132, "step": 9752 }, { "epoch": 0.16953188826504892, "grad_norm": 2.0358656708225324, "learning_rate": 9.498197493713777e-07, "loss": 0.4516, "step": 9753 }, { "epoch": 0.16954927080255175, "grad_norm": 1.5756984316929374, "learning_rate": 9.498074577164167e-07, "loss": 0.5332, "step": 9754 }, { "epoch": 0.16956665334005458, "grad_norm": 1.587903354752108, "learning_rate": 9.497951646357725e-07, "loss": 0.5107, "step": 9755 }, { "epoch": 0.1695840358775574, "grad_norm": 1.5139793529207086, "learning_rate": 9.497828701294843e-07, "loss": 0.3986, "step": 9756 }, { "epoch": 0.16960141841506024, "grad_norm": 1.7537925471982854, "learning_rate": 9.497705741975911e-07, "loss": 0.2324, "step": 9757 }, { "epoch": 0.16961880095256304, "grad_norm": 1.2773129335880016, "learning_rate": 9.497582768401317e-07, "loss": 0.5655, "step": 9758 }, { "epoch": 0.16963618349006587, "grad_norm": 1.1173366338843733, "learning_rate": 9.497459780571452e-07, "loss": 0.5631, "step": 9759 }, { "epoch": 0.1696535660275687, "grad_norm": 1.9694980918659362, "learning_rate": 9.497336778486704e-07, "loss": 0.3015, "step": 9760 }, { "epoch": 0.16967094856507153, "grad_norm": 2.018602088103326, "learning_rate": 9.497213762147467e-07, "loss": 0.3944, "step": 9761 }, { "epoch": 0.16968833110257436, "grad_norm": 1.5714496887098648, "learning_rate": 9.497090731554126e-07, "loss": 0.2636, "step": 9762 }, { "epoch": 0.16970571364007717, "grad_norm": 2.0674763887088146, "learning_rate": 9.496967686707074e-07, "loss": 0.5408, "step": 9763 }, { "epoch": 0.16972309617758, "grad_norm": 1.514711927128159, "learning_rate": 9.4968446276067e-07, "loss": 0.3955, "step": 9764 }, { "epoch": 0.16974047871508283, "grad_norm": 2.574992461592007, "learning_rate": 9.496721554253394e-07, "loss": 0.5139, "step": 9765 }, { "epoch": 0.16975786125258566, "grad_norm": 1.291774836299974, "learning_rate": 9.496598466647548e-07, "loss": 0.4179, "step": 9766 }, { "epoch": 0.1697752437900885, "grad_norm": 1.7480656950724454, "learning_rate": 9.496475364789547e-07, "loss": 0.3937, "step": 9767 }, { "epoch": 0.1697926263275913, "grad_norm": 2.071172417775222, "learning_rate": 9.496352248679788e-07, "loss": 0.6148, "step": 9768 }, { "epoch": 0.16981000886509412, "grad_norm": 1.3432009958421012, "learning_rate": 9.496229118318655e-07, "loss": 0.345, "step": 9769 }, { "epoch": 0.16982739140259695, "grad_norm": 1.5060846448972474, "learning_rate": 9.496105973706543e-07, "loss": 0.3492, "step": 9770 }, { "epoch": 0.16984477394009978, "grad_norm": 1.3202555375224414, "learning_rate": 9.495982814843839e-07, "loss": 0.3171, "step": 9771 }, { "epoch": 0.1698621564776026, "grad_norm": 1.1757723934919995, "learning_rate": 9.495859641730937e-07, "loss": 0.4102, "step": 9772 }, { "epoch": 0.1698795390151054, "grad_norm": 2.6599882269660853, "learning_rate": 9.495736454368224e-07, "loss": 0.2757, "step": 9773 }, { "epoch": 0.16989692155260824, "grad_norm": 2.5972159385806237, "learning_rate": 9.495613252756091e-07, "loss": 0.6951, "step": 9774 }, { "epoch": 0.16991430409011107, "grad_norm": 1.1962824113850923, "learning_rate": 9.495490036894929e-07, "loss": 0.3146, "step": 9775 }, { "epoch": 0.1699316866276139, "grad_norm": 1.5373141786146631, "learning_rate": 9.49536680678513e-07, "loss": 0.5191, "step": 9776 }, { "epoch": 0.16994906916511673, "grad_norm": 4.119580021012151, "learning_rate": 9.495243562427082e-07, "loss": 0.6341, "step": 9777 }, { "epoch": 0.16996645170261954, "grad_norm": 1.7274833354942358, "learning_rate": 9.495120303821178e-07, "loss": 0.3985, "step": 9778 }, { "epoch": 0.16998383424012237, "grad_norm": 1.7242480141437724, "learning_rate": 9.494997030967807e-07, "loss": 0.5938, "step": 9779 }, { "epoch": 0.1700012167776252, "grad_norm": 2.725179634312642, "learning_rate": 9.49487374386736e-07, "loss": 0.385, "step": 9780 }, { "epoch": 0.17001859931512803, "grad_norm": 1.99611034006582, "learning_rate": 9.494750442520228e-07, "loss": 0.3987, "step": 9781 }, { "epoch": 0.17003598185263086, "grad_norm": 2.9535000566093577, "learning_rate": 9.494627126926802e-07, "loss": 0.3529, "step": 9782 }, { "epoch": 0.17005336439013366, "grad_norm": 1.4599851985767618, "learning_rate": 9.494503797087472e-07, "loss": 0.2412, "step": 9783 }, { "epoch": 0.1700707469276365, "grad_norm": 2.4717210291860177, "learning_rate": 9.49438045300263e-07, "loss": 0.4564, "step": 9784 }, { "epoch": 0.17008812946513932, "grad_norm": 1.7823668778565138, "learning_rate": 9.494257094672666e-07, "loss": 0.3682, "step": 9785 }, { "epoch": 0.17010551200264215, "grad_norm": 2.252290114179217, "learning_rate": 9.494133722097972e-07, "loss": 1.4432, "step": 9786 }, { "epoch": 0.17012289454014498, "grad_norm": 2.2784292716381644, "learning_rate": 9.494010335278939e-07, "loss": 0.5703, "step": 9787 }, { "epoch": 0.17014027707764778, "grad_norm": 2.789019909990781, "learning_rate": 9.493886934215958e-07, "loss": 0.5154, "step": 9788 }, { "epoch": 0.1701576596151506, "grad_norm": 1.5451593646787092, "learning_rate": 9.493763518909419e-07, "loss": 0.8386, "step": 9789 }, { "epoch": 0.17017504215265344, "grad_norm": 1.360096551934099, "learning_rate": 9.493640089359714e-07, "loss": 0.4511, "step": 9790 }, { "epoch": 0.17019242469015627, "grad_norm": 1.987794570080772, "learning_rate": 9.493516645567235e-07, "loss": 0.849, "step": 9791 }, { "epoch": 0.1702098072276591, "grad_norm": 1.7190628683173164, "learning_rate": 9.49339318753237e-07, "loss": 0.3362, "step": 9792 }, { "epoch": 0.1702271897651619, "grad_norm": 2.741653786516346, "learning_rate": 9.493269715255514e-07, "loss": 0.7914, "step": 9793 }, { "epoch": 0.17024457230266474, "grad_norm": 3.1452296080819844, "learning_rate": 9.493146228737058e-07, "loss": 0.3381, "step": 9794 }, { "epoch": 0.17026195484016757, "grad_norm": 1.7537982127368303, "learning_rate": 9.493022727977391e-07, "loss": 0.3506, "step": 9795 }, { "epoch": 0.1702793373776704, "grad_norm": 1.7572265875889286, "learning_rate": 9.492899212976907e-07, "loss": 0.4568, "step": 9796 }, { "epoch": 0.17029671991517323, "grad_norm": 2.2379391713486574, "learning_rate": 9.492775683735997e-07, "loss": 0.5182, "step": 9797 }, { "epoch": 0.17031410245267603, "grad_norm": 1.5767602793909679, "learning_rate": 9.49265214025505e-07, "loss": 0.5883, "step": 9798 }, { "epoch": 0.17033148499017886, "grad_norm": 2.329147282688, "learning_rate": 9.492528582534461e-07, "loss": 0.4103, "step": 9799 }, { "epoch": 0.1703488675276817, "grad_norm": 1.8877455433322299, "learning_rate": 9.492405010574618e-07, "loss": 0.6269, "step": 9800 }, { "epoch": 0.17036625006518452, "grad_norm": 1.5382402935196582, "learning_rate": 9.492281424375916e-07, "loss": 0.6716, "step": 9801 }, { "epoch": 0.17038363260268735, "grad_norm": 2.2890812983642523, "learning_rate": 9.492157823938746e-07, "loss": 0.4122, "step": 9802 }, { "epoch": 0.17040101514019015, "grad_norm": 1.9086900138957867, "learning_rate": 9.492034209263498e-07, "loss": 0.4296, "step": 9803 }, { "epoch": 0.17041839767769298, "grad_norm": 2.1009539054960755, "learning_rate": 9.491910580350566e-07, "loss": 0.4188, "step": 9804 }, { "epoch": 0.17043578021519581, "grad_norm": 2.494221090732979, "learning_rate": 9.491786937200341e-07, "loss": 0.4933, "step": 9805 }, { "epoch": 0.17045316275269864, "grad_norm": 2.265369750226021, "learning_rate": 9.491663279813214e-07, "loss": 0.6592, "step": 9806 }, { "epoch": 0.17047054529020148, "grad_norm": 1.2215823795992333, "learning_rate": 9.491539608189578e-07, "loss": 0.5854, "step": 9807 }, { "epoch": 0.17048792782770428, "grad_norm": 1.5588880252688146, "learning_rate": 9.491415922329825e-07, "loss": 0.6424, "step": 9808 }, { "epoch": 0.1705053103652071, "grad_norm": 1.6099202972697613, "learning_rate": 9.491292222234347e-07, "loss": 0.5774, "step": 9809 }, { "epoch": 0.17052269290270994, "grad_norm": 1.5136188340188197, "learning_rate": 9.491168507903535e-07, "loss": 0.4821, "step": 9810 }, { "epoch": 0.17054007544021277, "grad_norm": 1.5798133673836547, "learning_rate": 9.491044779337782e-07, "loss": 0.3916, "step": 9811 }, { "epoch": 0.1705574579777156, "grad_norm": 2.211046581445321, "learning_rate": 9.490921036537481e-07, "loss": 0.4623, "step": 9812 }, { "epoch": 0.1705748405152184, "grad_norm": 2.15541624149512, "learning_rate": 9.490797279503022e-07, "loss": 0.5928, "step": 9813 }, { "epoch": 0.17059222305272123, "grad_norm": 1.5742523945894296, "learning_rate": 9.490673508234799e-07, "loss": 0.3627, "step": 9814 }, { "epoch": 0.17060960559022406, "grad_norm": 1.493991983999221, "learning_rate": 9.490549722733205e-07, "loss": 0.3385, "step": 9815 }, { "epoch": 0.1706269881277269, "grad_norm": 2.0591243783941215, "learning_rate": 9.49042592299863e-07, "loss": 0.3753, "step": 9816 }, { "epoch": 0.17064437066522972, "grad_norm": 1.7359047156714758, "learning_rate": 9.490302109031467e-07, "loss": 0.202, "step": 9817 }, { "epoch": 0.17066175320273252, "grad_norm": 2.1821735670377618, "learning_rate": 9.49017828083211e-07, "loss": 0.7555, "step": 9818 }, { "epoch": 0.17067913574023535, "grad_norm": 2.17807159949507, "learning_rate": 9.49005443840095e-07, "loss": 0.425, "step": 9819 }, { "epoch": 0.17069651827773819, "grad_norm": 1.9767829076393717, "learning_rate": 9.48993058173838e-07, "loss": 0.3711, "step": 9820 }, { "epoch": 0.17071390081524102, "grad_norm": 0.9137939534056547, "learning_rate": 9.489806710844792e-07, "loss": 0.4841, "step": 9821 }, { "epoch": 0.17073128335274385, "grad_norm": 1.6482633525701704, "learning_rate": 9.489682825720581e-07, "loss": 0.526, "step": 9822 }, { "epoch": 0.17074866589024665, "grad_norm": 1.8831022401452158, "learning_rate": 9.489558926366137e-07, "loss": 0.2554, "step": 9823 }, { "epoch": 0.17076604842774948, "grad_norm": 3.2456458475860184, "learning_rate": 9.489435012781853e-07, "loss": 0.5029, "step": 9824 }, { "epoch": 0.1707834309652523, "grad_norm": 2.048295081797184, "learning_rate": 9.489311084968122e-07, "loss": 0.6366, "step": 9825 }, { "epoch": 0.17080081350275514, "grad_norm": 1.3791207038149313, "learning_rate": 9.489187142925337e-07, "loss": 0.4223, "step": 9826 }, { "epoch": 0.17081819604025797, "grad_norm": 3.412771060012223, "learning_rate": 9.489063186653891e-07, "loss": 0.4356, "step": 9827 }, { "epoch": 0.17083557857776077, "grad_norm": 1.8343470413733425, "learning_rate": 9.488939216154179e-07, "loss": 0.3695, "step": 9828 }, { "epoch": 0.1708529611152636, "grad_norm": 1.781326174103714, "learning_rate": 9.48881523142659e-07, "loss": 0.3613, "step": 9829 }, { "epoch": 0.17087034365276643, "grad_norm": 1.8053446064869176, "learning_rate": 9.488691232471519e-07, "loss": 0.2938, "step": 9830 }, { "epoch": 0.17088772619026926, "grad_norm": 1.1652423144339568, "learning_rate": 9.488567219289358e-07, "loss": 0.3517, "step": 9831 }, { "epoch": 0.1709051087277721, "grad_norm": 1.4007726883832619, "learning_rate": 9.488443191880501e-07, "loss": 0.354, "step": 9832 }, { "epoch": 0.1709224912652749, "grad_norm": 1.5404609981859603, "learning_rate": 9.488319150245341e-07, "loss": 0.6865, "step": 9833 }, { "epoch": 0.17093987380277773, "grad_norm": 2.547323093725529, "learning_rate": 9.488195094384272e-07, "loss": 0.7613, "step": 9834 }, { "epoch": 0.17095725634028056, "grad_norm": 2.374423264132465, "learning_rate": 9.488071024297686e-07, "loss": 0.2811, "step": 9835 }, { "epoch": 0.17097463887778339, "grad_norm": 1.7662711918352432, "learning_rate": 9.487946939985976e-07, "loss": 0.4608, "step": 9836 }, { "epoch": 0.17099202141528622, "grad_norm": 2.595815880331381, "learning_rate": 9.487822841449535e-07, "loss": 0.5466, "step": 9837 }, { "epoch": 0.17100940395278902, "grad_norm": 1.7965791440247685, "learning_rate": 9.487698728688758e-07, "loss": 0.4839, "step": 9838 }, { "epoch": 0.17102678649029185, "grad_norm": 2.0968379007998017, "learning_rate": 9.487574601704038e-07, "loss": 0.3661, "step": 9839 }, { "epoch": 0.17104416902779468, "grad_norm": 2.8418045447226503, "learning_rate": 9.487450460495767e-07, "loss": 0.6117, "step": 9840 }, { "epoch": 0.1710615515652975, "grad_norm": 6.310184631191168, "learning_rate": 9.487326305064339e-07, "loss": 1.5963, "step": 9841 }, { "epoch": 0.17107893410280034, "grad_norm": 3.0600221993631203, "learning_rate": 9.487202135410148e-07, "loss": 0.5551, "step": 9842 }, { "epoch": 0.17109631664030314, "grad_norm": 1.2729434715861216, "learning_rate": 9.487077951533588e-07, "loss": 0.3076, "step": 9843 }, { "epoch": 0.17111369917780597, "grad_norm": 1.6280846166753413, "learning_rate": 9.486953753435054e-07, "loss": 0.8048, "step": 9844 }, { "epoch": 0.1711310817153088, "grad_norm": 3.7724838465018395, "learning_rate": 9.486829541114936e-07, "loss": 0.6321, "step": 9845 }, { "epoch": 0.17114846425281163, "grad_norm": 2.308421804357532, "learning_rate": 9.486705314573629e-07, "loss": 0.3363, "step": 9846 }, { "epoch": 0.17116584679031446, "grad_norm": 1.865589369238923, "learning_rate": 9.486581073811529e-07, "loss": 0.5599, "step": 9847 }, { "epoch": 0.17118322932781727, "grad_norm": 1.5477238451469888, "learning_rate": 9.486456818829025e-07, "loss": 0.4164, "step": 9848 }, { "epoch": 0.1712006118653201, "grad_norm": 4.67403456689465, "learning_rate": 9.486332549626516e-07, "loss": 0.504, "step": 9849 }, { "epoch": 0.17121799440282293, "grad_norm": 2.187750910053746, "learning_rate": 9.486208266204393e-07, "loss": 0.3098, "step": 9850 }, { "epoch": 0.17123537694032576, "grad_norm": 2.026088455090999, "learning_rate": 9.48608396856305e-07, "loss": 0.279, "step": 9851 }, { "epoch": 0.1712527594778286, "grad_norm": 1.4446337139203373, "learning_rate": 9.485959656702882e-07, "loss": 0.5551, "step": 9852 }, { "epoch": 0.1712701420153314, "grad_norm": 1.6507516067362529, "learning_rate": 9.485835330624283e-07, "loss": 0.6905, "step": 9853 }, { "epoch": 0.17128752455283422, "grad_norm": 2.4170622534904007, "learning_rate": 9.485710990327647e-07, "loss": 0.4342, "step": 9854 }, { "epoch": 0.17130490709033705, "grad_norm": 3.3286847627317555, "learning_rate": 9.485586635813367e-07, "loss": 0.6457, "step": 9855 }, { "epoch": 0.17132228962783988, "grad_norm": 1.5834994296646834, "learning_rate": 9.485462267081839e-07, "loss": 0.4231, "step": 9856 }, { "epoch": 0.1713396721653427, "grad_norm": 1.0560094226079275, "learning_rate": 9.485337884133454e-07, "loss": 0.3678, "step": 9857 }, { "epoch": 0.1713570547028455, "grad_norm": 1.9581393907939955, "learning_rate": 9.485213486968609e-07, "loss": 0.329, "step": 9858 }, { "epoch": 0.17137443724034834, "grad_norm": 2.6405558371376685, "learning_rate": 9.485089075587698e-07, "loss": 0.4284, "step": 9859 }, { "epoch": 0.17139181977785117, "grad_norm": 1.9964181945068544, "learning_rate": 9.484964649991116e-07, "loss": 0.5745, "step": 9860 }, { "epoch": 0.171409202315354, "grad_norm": 2.518717096995376, "learning_rate": 9.484840210179255e-07, "loss": 0.4683, "step": 9861 }, { "epoch": 0.17142658485285683, "grad_norm": 1.9589272266543318, "learning_rate": 9.484715756152511e-07, "loss": 0.8824, "step": 9862 }, { "epoch": 0.17144396739035964, "grad_norm": 1.9322549608115098, "learning_rate": 9.484591287911277e-07, "loss": 0.4105, "step": 9863 }, { "epoch": 0.17146134992786247, "grad_norm": 4.800178358365043, "learning_rate": 9.48446680545595e-07, "loss": 0.3914, "step": 9864 }, { "epoch": 0.1714787324653653, "grad_norm": 1.1769586134153427, "learning_rate": 9.484342308786924e-07, "loss": 0.3591, "step": 9865 }, { "epoch": 0.17149611500286813, "grad_norm": 1.4799420146976205, "learning_rate": 9.48421779790459e-07, "loss": 0.458, "step": 9866 }, { "epoch": 0.17151349754037096, "grad_norm": 1.5592614422883027, "learning_rate": 9.484093272809348e-07, "loss": 0.7041, "step": 9867 }, { "epoch": 0.17153088007787376, "grad_norm": 1.1438695184163563, "learning_rate": 9.48396873350159e-07, "loss": 0.276, "step": 9868 }, { "epoch": 0.1715482626153766, "grad_norm": 2.1317156221006317, "learning_rate": 9.48384417998171e-07, "loss": 0.5629, "step": 9869 }, { "epoch": 0.17156564515287942, "grad_norm": 2.071893079365417, "learning_rate": 9.483719612250103e-07, "loss": 0.4382, "step": 9870 }, { "epoch": 0.17158302769038225, "grad_norm": 1.7391774258245583, "learning_rate": 9.483595030307165e-07, "loss": 0.21, "step": 9871 }, { "epoch": 0.17160041022788505, "grad_norm": 2.5165517697119446, "learning_rate": 9.483470434153291e-07, "loss": 0.4451, "step": 9872 }, { "epoch": 0.17161779276538788, "grad_norm": 2.417654961020297, "learning_rate": 9.483345823788875e-07, "loss": 0.3194, "step": 9873 }, { "epoch": 0.1716351753028907, "grad_norm": 2.4667131229591286, "learning_rate": 9.483221199214313e-07, "loss": 0.4013, "step": 9874 }, { "epoch": 0.17165255784039354, "grad_norm": 2.194091258905167, "learning_rate": 9.483096560429997e-07, "loss": 0.4181, "step": 9875 }, { "epoch": 0.17166994037789637, "grad_norm": 2.257047997669802, "learning_rate": 9.482971907436326e-07, "loss": 0.4959, "step": 9876 }, { "epoch": 0.17168732291539918, "grad_norm": 3.458399563262625, "learning_rate": 9.482847240233694e-07, "loss": 0.5927, "step": 9877 }, { "epoch": 0.171704705452902, "grad_norm": 2.4824558582487564, "learning_rate": 9.482722558822493e-07, "loss": 0.3319, "step": 9878 }, { "epoch": 0.17172208799040484, "grad_norm": 1.5391711392204095, "learning_rate": 9.482597863203123e-07, "loss": 0.49, "step": 9879 }, { "epoch": 0.17173947052790767, "grad_norm": 1.9527043157157544, "learning_rate": 9.482473153375976e-07, "loss": 0.6527, "step": 9880 }, { "epoch": 0.1717568530654105, "grad_norm": 1.2516463515286083, "learning_rate": 9.482348429341449e-07, "loss": 0.2958, "step": 9881 }, { "epoch": 0.1717742356029133, "grad_norm": 1.6842023736253047, "learning_rate": 9.482223691099935e-07, "loss": 0.4361, "step": 9882 }, { "epoch": 0.17179161814041613, "grad_norm": 1.4891388669796464, "learning_rate": 9.482098938651832e-07, "loss": 0.4652, "step": 9883 }, { "epoch": 0.17180900067791896, "grad_norm": 2.769934541371216, "learning_rate": 9.481974171997535e-07, "loss": 0.4951, "step": 9884 }, { "epoch": 0.1718263832154218, "grad_norm": 1.6919208488576347, "learning_rate": 9.481849391137438e-07, "loss": 0.3104, "step": 9885 }, { "epoch": 0.17184376575292462, "grad_norm": 1.654759713709027, "learning_rate": 9.481724596071936e-07, "loss": 0.4247, "step": 9886 }, { "epoch": 0.17186114829042742, "grad_norm": 2.382955340940778, "learning_rate": 9.481599786801426e-07, "loss": 0.8342, "step": 9887 }, { "epoch": 0.17187853082793025, "grad_norm": 1.6954654055575082, "learning_rate": 9.481474963326305e-07, "loss": 0.4808, "step": 9888 }, { "epoch": 0.17189591336543308, "grad_norm": 1.3458237866676654, "learning_rate": 9.481350125646966e-07, "loss": 0.5093, "step": 9889 }, { "epoch": 0.17191329590293591, "grad_norm": 1.9904727308242214, "learning_rate": 9.481225273763805e-07, "loss": 0.6401, "step": 9890 }, { "epoch": 0.17193067844043874, "grad_norm": 2.280990349347079, "learning_rate": 9.481100407677219e-07, "loss": 0.8225, "step": 9891 }, { "epoch": 0.17194806097794155, "grad_norm": 1.9342968888646943, "learning_rate": 9.480975527387604e-07, "loss": 0.3583, "step": 9892 }, { "epoch": 0.17196544351544438, "grad_norm": 1.4252468626590373, "learning_rate": 9.480850632895354e-07, "loss": 0.4249, "step": 9893 }, { "epoch": 0.1719828260529472, "grad_norm": 1.5536715758464892, "learning_rate": 9.480725724200866e-07, "loss": 0.4425, "step": 9894 }, { "epoch": 0.17200020859045004, "grad_norm": 1.6399155164115813, "learning_rate": 9.480600801304536e-07, "loss": 0.2765, "step": 9895 }, { "epoch": 0.17201759112795287, "grad_norm": 3.942212219806576, "learning_rate": 9.48047586420676e-07, "loss": 0.4805, "step": 9896 }, { "epoch": 0.17203497366545567, "grad_norm": 1.5673318019547924, "learning_rate": 9.480350912907933e-07, "loss": 0.6051, "step": 9897 }, { "epoch": 0.1720523562029585, "grad_norm": 2.1861992108705404, "learning_rate": 9.480225947408452e-07, "loss": 0.4761, "step": 9898 }, { "epoch": 0.17206973874046133, "grad_norm": 2.7407359285011643, "learning_rate": 9.480100967708712e-07, "loss": 0.5981, "step": 9899 }, { "epoch": 0.17208712127796416, "grad_norm": 1.6116930796140103, "learning_rate": 9.479975973809111e-07, "loss": 0.5341, "step": 9900 }, { "epoch": 0.172104503815467, "grad_norm": 1.5108685794800316, "learning_rate": 9.479850965710043e-07, "loss": 0.4358, "step": 9901 }, { "epoch": 0.1721218863529698, "grad_norm": 2.959506189113166, "learning_rate": 9.479725943411906e-07, "loss": 0.565, "step": 9902 }, { "epoch": 0.17213926889047262, "grad_norm": 1.5445800682150728, "learning_rate": 9.479600906915095e-07, "loss": 0.5265, "step": 9903 }, { "epoch": 0.17215665142797545, "grad_norm": 1.885132622737949, "learning_rate": 9.479475856220008e-07, "loss": 0.5754, "step": 9904 }, { "epoch": 0.17217403396547828, "grad_norm": 1.4823316180722188, "learning_rate": 9.47935079132704e-07, "loss": 0.5912, "step": 9905 }, { "epoch": 0.17219141650298112, "grad_norm": 2.1314963016656088, "learning_rate": 9.479225712236587e-07, "loss": 0.4713, "step": 9906 }, { "epoch": 0.17220879904048392, "grad_norm": 2.1047453732374373, "learning_rate": 9.479100618949046e-07, "loss": 0.3807, "step": 9907 }, { "epoch": 0.17222618157798675, "grad_norm": 1.6834712070245437, "learning_rate": 9.478975511464814e-07, "loss": 0.2939, "step": 9908 }, { "epoch": 0.17224356411548958, "grad_norm": 2.012112367112524, "learning_rate": 9.478850389784287e-07, "loss": 0.2959, "step": 9909 }, { "epoch": 0.1722609466529924, "grad_norm": 1.9246367990926414, "learning_rate": 9.478725253907862e-07, "loss": 0.4571, "step": 9910 }, { "epoch": 0.17227832919049524, "grad_norm": 2.115482104823513, "learning_rate": 9.478600103835935e-07, "loss": 0.5622, "step": 9911 }, { "epoch": 0.17229571172799804, "grad_norm": 1.6298883990642388, "learning_rate": 9.478474939568903e-07, "loss": 0.3976, "step": 9912 }, { "epoch": 0.17231309426550087, "grad_norm": 1.1529316470709432, "learning_rate": 9.478349761107163e-07, "loss": 0.376, "step": 9913 }, { "epoch": 0.1723304768030037, "grad_norm": 1.580369498509386, "learning_rate": 9.478224568451111e-07, "loss": 0.7522, "step": 9914 }, { "epoch": 0.17234785934050653, "grad_norm": 1.8915172304598826, "learning_rate": 9.478099361601144e-07, "loss": 0.5802, "step": 9915 }, { "epoch": 0.17236524187800936, "grad_norm": 2.1374395058358426, "learning_rate": 9.47797414055766e-07, "loss": 0.4842, "step": 9916 }, { "epoch": 0.17238262441551216, "grad_norm": 2.3549212240105377, "learning_rate": 9.477848905321054e-07, "loss": 0.3999, "step": 9917 }, { "epoch": 0.172400006953015, "grad_norm": 2.6349916854842683, "learning_rate": 9.477723655891725e-07, "loss": 0.3501, "step": 9918 }, { "epoch": 0.17241738949051783, "grad_norm": 1.65851438453498, "learning_rate": 9.477598392270068e-07, "loss": 0.6205, "step": 9919 }, { "epoch": 0.17243477202802066, "grad_norm": 2.153275408106603, "learning_rate": 9.477473114456481e-07, "loss": 0.4927, "step": 9920 }, { "epoch": 0.17245215456552349, "grad_norm": 1.8487979393666567, "learning_rate": 9.477347822451361e-07, "loss": 0.4948, "step": 9921 }, { "epoch": 0.1724695371030263, "grad_norm": 2.2178014673637336, "learning_rate": 9.477222516255105e-07, "loss": 0.4567, "step": 9922 }, { "epoch": 0.17248691964052912, "grad_norm": 2.6347923290017765, "learning_rate": 9.477097195868111e-07, "loss": 0.3193, "step": 9923 }, { "epoch": 0.17250430217803195, "grad_norm": 1.7025918924005299, "learning_rate": 9.476971861290776e-07, "loss": 0.3834, "step": 9924 }, { "epoch": 0.17252168471553478, "grad_norm": 2.386706024756942, "learning_rate": 9.476846512523495e-07, "loss": 0.6813, "step": 9925 }, { "epoch": 0.1725390672530376, "grad_norm": 2.2973343236165364, "learning_rate": 9.476721149566669e-07, "loss": 0.5653, "step": 9926 }, { "epoch": 0.1725564497905404, "grad_norm": 1.8865247288480058, "learning_rate": 9.476595772420692e-07, "loss": 0.3866, "step": 9927 }, { "epoch": 0.17257383232804324, "grad_norm": 1.8955728383599357, "learning_rate": 9.476470381085963e-07, "loss": 0.4126, "step": 9928 }, { "epoch": 0.17259121486554607, "grad_norm": 3.3459419157643278, "learning_rate": 9.47634497556288e-07, "loss": 0.6487, "step": 9929 }, { "epoch": 0.1726085974030489, "grad_norm": 1.3066152568221974, "learning_rate": 9.476219555851839e-07, "loss": 0.5975, "step": 9930 }, { "epoch": 0.17262597994055173, "grad_norm": 1.243514662276875, "learning_rate": 9.476094121953238e-07, "loss": 0.3851, "step": 9931 }, { "epoch": 0.17264336247805454, "grad_norm": 1.4240543869892828, "learning_rate": 9.475968673867474e-07, "loss": 0.3498, "step": 9932 }, { "epoch": 0.17266074501555737, "grad_norm": 2.086264694006972, "learning_rate": 9.475843211594947e-07, "loss": 0.4616, "step": 9933 }, { "epoch": 0.1726781275530602, "grad_norm": 2.306709015585038, "learning_rate": 9.475717735136052e-07, "loss": 0.6283, "step": 9934 }, { "epoch": 0.17269551009056303, "grad_norm": 2.209950750588569, "learning_rate": 9.475592244491188e-07, "loss": 0.3422, "step": 9935 }, { "epoch": 0.17271289262806586, "grad_norm": 2.5370782866488075, "learning_rate": 9.475466739660753e-07, "loss": 0.4759, "step": 9936 }, { "epoch": 0.17273027516556866, "grad_norm": 1.7865331760633971, "learning_rate": 9.475341220645144e-07, "loss": 0.4474, "step": 9937 }, { "epoch": 0.1727476577030715, "grad_norm": 7.928180830672732, "learning_rate": 9.47521568744476e-07, "loss": 0.6604, "step": 9938 }, { "epoch": 0.17276504024057432, "grad_norm": 2.010642708709644, "learning_rate": 9.475090140059997e-07, "loss": 0.3224, "step": 9939 }, { "epoch": 0.17278242277807715, "grad_norm": 2.1787308620634303, "learning_rate": 9.474964578491253e-07, "loss": 0.2503, "step": 9940 }, { "epoch": 0.17279980531557998, "grad_norm": 1.3231329238603866, "learning_rate": 9.474839002738929e-07, "loss": 0.224, "step": 9941 }, { "epoch": 0.17281718785308278, "grad_norm": 2.305249051177949, "learning_rate": 9.47471341280342e-07, "loss": 0.474, "step": 9942 }, { "epoch": 0.1728345703905856, "grad_norm": 1.6008994767105598, "learning_rate": 9.474587808685125e-07, "loss": 0.4165, "step": 9943 }, { "epoch": 0.17285195292808844, "grad_norm": 2.139517098051087, "learning_rate": 9.474462190384443e-07, "loss": 0.6487, "step": 9944 }, { "epoch": 0.17286933546559127, "grad_norm": 1.7786779976218743, "learning_rate": 9.47433655790177e-07, "loss": 0.6331, "step": 9945 }, { "epoch": 0.1728867180030941, "grad_norm": 2.918290894863096, "learning_rate": 9.474210911237505e-07, "loss": 0.9359, "step": 9946 }, { "epoch": 0.1729041005405969, "grad_norm": 1.5895161964273206, "learning_rate": 9.474085250392047e-07, "loss": 0.4202, "step": 9947 }, { "epoch": 0.17292148307809974, "grad_norm": 1.5849790845340557, "learning_rate": 9.473959575365796e-07, "loss": 0.3734, "step": 9948 }, { "epoch": 0.17293886561560257, "grad_norm": 2.6159775662681866, "learning_rate": 9.473833886159146e-07, "loss": 0.5225, "step": 9949 }, { "epoch": 0.1729562481531054, "grad_norm": 1.235355083383901, "learning_rate": 9.473708182772498e-07, "loss": 0.5329, "step": 9950 }, { "epoch": 0.17297363069060823, "grad_norm": 1.2443778151313907, "learning_rate": 9.473582465206252e-07, "loss": 0.3684, "step": 9951 }, { "epoch": 0.17299101322811103, "grad_norm": 2.24424559543293, "learning_rate": 9.473456733460803e-07, "loss": 0.3215, "step": 9952 }, { "epoch": 0.17300839576561386, "grad_norm": 1.4370557475210723, "learning_rate": 9.47333098753655e-07, "loss": 0.4038, "step": 9953 }, { "epoch": 0.1730257783031167, "grad_norm": 2.7821607486835354, "learning_rate": 9.473205227433894e-07, "loss": 0.6418, "step": 9954 }, { "epoch": 0.17304316084061952, "grad_norm": 2.944739530963201, "learning_rate": 9.473079453153232e-07, "loss": 0.8839, "step": 9955 }, { "epoch": 0.17306054337812235, "grad_norm": 1.4860329632112068, "learning_rate": 9.472953664694963e-07, "loss": 0.2836, "step": 9956 }, { "epoch": 0.17307792591562515, "grad_norm": 1.5005728152628988, "learning_rate": 9.472827862059486e-07, "loss": 0.2943, "step": 9957 }, { "epoch": 0.17309530845312798, "grad_norm": 2.573778275825199, "learning_rate": 9.472702045247198e-07, "loss": 0.7265, "step": 9958 }, { "epoch": 0.1731126909906308, "grad_norm": 1.8805715570035806, "learning_rate": 9.472576214258501e-07, "loss": 0.4714, "step": 9959 }, { "epoch": 0.17313007352813364, "grad_norm": 2.1364866673364884, "learning_rate": 9.472450369093791e-07, "loss": 0.5792, "step": 9960 }, { "epoch": 0.17314745606563647, "grad_norm": 2.3407383804820494, "learning_rate": 9.472324509753469e-07, "loss": 0.2213, "step": 9961 }, { "epoch": 0.17316483860313928, "grad_norm": 2.3757429184903978, "learning_rate": 9.472198636237931e-07, "loss": 0.8408, "step": 9962 }, { "epoch": 0.1731822211406421, "grad_norm": 2.1107373983575677, "learning_rate": 9.472072748547578e-07, "loss": 0.3935, "step": 9963 }, { "epoch": 0.17319960367814494, "grad_norm": 2.927202223417516, "learning_rate": 9.471946846682809e-07, "loss": 0.5452, "step": 9964 }, { "epoch": 0.17321698621564777, "grad_norm": 3.8345357548599712, "learning_rate": 9.471820930644023e-07, "loss": 0.645, "step": 9965 }, { "epoch": 0.1732343687531506, "grad_norm": 2.4349756770672664, "learning_rate": 9.471695000431619e-07, "loss": 0.6342, "step": 9966 }, { "epoch": 0.1732517512906534, "grad_norm": 1.7893925057734934, "learning_rate": 9.471569056045994e-07, "loss": 0.602, "step": 9967 }, { "epoch": 0.17326913382815623, "grad_norm": 4.308804454022109, "learning_rate": 9.471443097487551e-07, "loss": 0.3188, "step": 9968 }, { "epoch": 0.17328651636565906, "grad_norm": 1.8908882297841871, "learning_rate": 9.471317124756687e-07, "loss": 0.5999, "step": 9969 }, { "epoch": 0.1733038989031619, "grad_norm": 1.397645999034959, "learning_rate": 9.471191137853803e-07, "loss": 0.2308, "step": 9970 }, { "epoch": 0.17332128144066472, "grad_norm": 1.9510979076980501, "learning_rate": 9.471065136779295e-07, "loss": 0.31, "step": 9971 }, { "epoch": 0.17333866397816752, "grad_norm": 1.606335185381411, "learning_rate": 9.470939121533565e-07, "loss": 0.3305, "step": 9972 }, { "epoch": 0.17335604651567035, "grad_norm": 3.0776144859038346, "learning_rate": 9.470813092117011e-07, "loss": 0.6624, "step": 9973 }, { "epoch": 0.17337342905317318, "grad_norm": 1.9765091509690766, "learning_rate": 9.470687048530035e-07, "loss": 0.4794, "step": 9974 }, { "epoch": 0.17339081159067601, "grad_norm": 1.7455531117463163, "learning_rate": 9.470560990773033e-07, "loss": 0.6757, "step": 9975 }, { "epoch": 0.17340819412817884, "grad_norm": 2.343233977702674, "learning_rate": 9.470434918846407e-07, "loss": 0.6994, "step": 9976 }, { "epoch": 0.17342557666568165, "grad_norm": 2.1575121730921873, "learning_rate": 9.470308832750556e-07, "loss": 0.3877, "step": 9977 }, { "epoch": 0.17344295920318448, "grad_norm": 1.2832831457424998, "learning_rate": 9.47018273248588e-07, "loss": 0.3985, "step": 9978 }, { "epoch": 0.1734603417406873, "grad_norm": 1.3015740594778198, "learning_rate": 9.470056618052777e-07, "loss": 0.3634, "step": 9979 }, { "epoch": 0.17347772427819014, "grad_norm": 1.2451673296660977, "learning_rate": 9.469930489451649e-07, "loss": 0.3318, "step": 9980 }, { "epoch": 0.17349510681569297, "grad_norm": 2.1684053899213382, "learning_rate": 9.469804346682894e-07, "loss": 0.4017, "step": 9981 }, { "epoch": 0.17351248935319577, "grad_norm": 1.9219187160679831, "learning_rate": 9.469678189746912e-07, "loss": 0.3896, "step": 9982 }, { "epoch": 0.1735298718906986, "grad_norm": 2.2997460492286246, "learning_rate": 9.469552018644105e-07, "loss": 0.6022, "step": 9983 }, { "epoch": 0.17354725442820143, "grad_norm": 2.36304404118015, "learning_rate": 9.469425833374869e-07, "loss": 0.487, "step": 9984 }, { "epoch": 0.17356463696570426, "grad_norm": 1.5908975645113268, "learning_rate": 9.469299633939608e-07, "loss": 0.7403, "step": 9985 }, { "epoch": 0.1735820195032071, "grad_norm": 1.7604886911970639, "learning_rate": 9.46917342033872e-07, "loss": 0.3563, "step": 9986 }, { "epoch": 0.1735994020407099, "grad_norm": 1.8689741940570475, "learning_rate": 9.469047192572604e-07, "loss": 0.4072, "step": 9987 }, { "epoch": 0.17361678457821272, "grad_norm": 1.7862058077990675, "learning_rate": 9.468920950641662e-07, "loss": 0.495, "step": 9988 }, { "epoch": 0.17363416711571555, "grad_norm": 1.4022942589071352, "learning_rate": 9.468794694546294e-07, "loss": 0.4623, "step": 9989 }, { "epoch": 0.17365154965321838, "grad_norm": 1.6262652506806015, "learning_rate": 9.468668424286898e-07, "loss": 0.5523, "step": 9990 }, { "epoch": 0.17366893219072121, "grad_norm": 3.072067330383664, "learning_rate": 9.468542139863878e-07, "loss": 0.862, "step": 9991 }, { "epoch": 0.17368631472822402, "grad_norm": 1.563900374251396, "learning_rate": 9.46841584127763e-07, "loss": 0.6597, "step": 9992 }, { "epoch": 0.17370369726572685, "grad_norm": 1.4321217093852336, "learning_rate": 9.46828952852856e-07, "loss": 0.2564, "step": 9993 }, { "epoch": 0.17372107980322968, "grad_norm": 2.6117762236292683, "learning_rate": 9.468163201617061e-07, "loss": 0.5586, "step": 9994 }, { "epoch": 0.1737384623407325, "grad_norm": 1.7789354384329392, "learning_rate": 9.468036860543538e-07, "loss": 0.4242, "step": 9995 }, { "epoch": 0.17375584487823534, "grad_norm": 1.5386931897048535, "learning_rate": 9.467910505308392e-07, "loss": 0.6518, "step": 9996 }, { "epoch": 0.17377322741573814, "grad_norm": 1.610366460077243, "learning_rate": 9.46778413591202e-07, "loss": 0.3932, "step": 9997 }, { "epoch": 0.17379060995324097, "grad_norm": 2.087700916673672, "learning_rate": 9.467657752354826e-07, "loss": 0.638, "step": 9998 }, { "epoch": 0.1738079924907438, "grad_norm": 2.0346939051196378, "learning_rate": 9.467531354637208e-07, "loss": 0.7948, "step": 9999 }, { "epoch": 0.17382537502824663, "grad_norm": 2.216139554047142, "learning_rate": 9.467404942759569e-07, "loss": 0.5729, "step": 10000 }, { "epoch": 0.17384275756574946, "grad_norm": 1.8833699233029089, "learning_rate": 9.467278516722309e-07, "loss": 0.5036, "step": 10001 }, { "epoch": 0.17386014010325226, "grad_norm": 1.6514790517708295, "learning_rate": 9.467152076525827e-07, "loss": 0.2945, "step": 10002 }, { "epoch": 0.1738775226407551, "grad_norm": 2.724279546739366, "learning_rate": 9.467025622170526e-07, "loss": 0.5352, "step": 10003 }, { "epoch": 0.17389490517825792, "grad_norm": 1.6666155264264815, "learning_rate": 9.466899153656805e-07, "loss": 0.3841, "step": 10004 }, { "epoch": 0.17391228771576075, "grad_norm": 1.9632313972869688, "learning_rate": 9.466772670985066e-07, "loss": 0.3227, "step": 10005 }, { "epoch": 0.17392967025326359, "grad_norm": 1.6170023403181009, "learning_rate": 9.466646174155709e-07, "loss": 0.2049, "step": 10006 }, { "epoch": 0.1739470527907664, "grad_norm": 1.7532520368398898, "learning_rate": 9.466519663169136e-07, "loss": 0.5531, "step": 10007 }, { "epoch": 0.17396443532826922, "grad_norm": 1.6503463685018136, "learning_rate": 9.466393138025748e-07, "loss": 0.6081, "step": 10008 }, { "epoch": 0.17398181786577205, "grad_norm": 2.52681683961321, "learning_rate": 9.466266598725943e-07, "loss": 0.3393, "step": 10009 }, { "epoch": 0.17399920040327488, "grad_norm": 1.0052792306058764, "learning_rate": 9.466140045270127e-07, "loss": 0.5595, "step": 10010 }, { "epoch": 0.17401658294077768, "grad_norm": 0.9650142669733551, "learning_rate": 9.466013477658696e-07, "loss": 0.2013, "step": 10011 }, { "epoch": 0.1740339654782805, "grad_norm": 1.442902201789094, "learning_rate": 9.465886895892057e-07, "loss": 0.4643, "step": 10012 }, { "epoch": 0.17405134801578334, "grad_norm": 1.716622654409144, "learning_rate": 9.465760299970606e-07, "loss": 0.2152, "step": 10013 }, { "epoch": 0.17406873055328617, "grad_norm": 1.7072472342867417, "learning_rate": 9.465633689894747e-07, "loss": 0.3313, "step": 10014 }, { "epoch": 0.174086113090789, "grad_norm": 1.6729025883735054, "learning_rate": 9.46550706566488e-07, "loss": 0.4267, "step": 10015 }, { "epoch": 0.1741034956282918, "grad_norm": 2.2241557295340995, "learning_rate": 9.465380427281408e-07, "loss": 0.3353, "step": 10016 }, { "epoch": 0.17412087816579463, "grad_norm": 2.74433994519834, "learning_rate": 9.46525377474473e-07, "loss": 0.4103, "step": 10017 }, { "epoch": 0.17413826070329746, "grad_norm": 1.5546126104140294, "learning_rate": 9.465127108055248e-07, "loss": 0.3433, "step": 10018 }, { "epoch": 0.1741556432408003, "grad_norm": 3.038257281169108, "learning_rate": 9.465000427213367e-07, "loss": 0.813, "step": 10019 }, { "epoch": 0.17417302577830313, "grad_norm": 2.339493546896798, "learning_rate": 9.464873732219483e-07, "loss": 0.4939, "step": 10020 }, { "epoch": 0.17419040831580593, "grad_norm": 3.527894140962952, "learning_rate": 9.464747023074002e-07, "loss": 0.3758, "step": 10021 }, { "epoch": 0.17420779085330876, "grad_norm": 1.3690596117015221, "learning_rate": 9.464620299777323e-07, "loss": 0.5636, "step": 10022 }, { "epoch": 0.1742251733908116, "grad_norm": 1.474612255746751, "learning_rate": 9.464493562329849e-07, "loss": 0.3852, "step": 10023 }, { "epoch": 0.17424255592831442, "grad_norm": 2.258892258097112, "learning_rate": 9.464366810731981e-07, "loss": 0.5241, "step": 10024 }, { "epoch": 0.17425993846581725, "grad_norm": 1.9641191698276854, "learning_rate": 9.464240044984121e-07, "loss": 0.3642, "step": 10025 }, { "epoch": 0.17427732100332005, "grad_norm": 1.5507143211750596, "learning_rate": 9.464113265086671e-07, "loss": 0.3318, "step": 10026 }, { "epoch": 0.17429470354082288, "grad_norm": 2.2143992710349725, "learning_rate": 9.463986471040032e-07, "loss": 0.3968, "step": 10027 }, { "epoch": 0.1743120860783257, "grad_norm": 1.8853105734844078, "learning_rate": 9.463859662844606e-07, "loss": 0.429, "step": 10028 }, { "epoch": 0.17432946861582854, "grad_norm": 1.758239978075261, "learning_rate": 9.463732840500797e-07, "loss": 0.4982, "step": 10029 }, { "epoch": 0.17434685115333137, "grad_norm": 1.811922472022105, "learning_rate": 9.463606004009003e-07, "loss": 0.4393, "step": 10030 }, { "epoch": 0.17436423369083418, "grad_norm": 1.4031545241798082, "learning_rate": 9.463479153369632e-07, "loss": 0.2033, "step": 10031 }, { "epoch": 0.174381616228337, "grad_norm": 1.8672957974601623, "learning_rate": 9.46335228858308e-07, "loss": 0.5856, "step": 10032 }, { "epoch": 0.17439899876583984, "grad_norm": 4.707956413394491, "learning_rate": 9.463225409649751e-07, "loss": 0.8506, "step": 10033 }, { "epoch": 0.17441638130334267, "grad_norm": 2.6281232416477813, "learning_rate": 9.463098516570049e-07, "loss": 0.3919, "step": 10034 }, { "epoch": 0.1744337638408455, "grad_norm": 2.4073638123856758, "learning_rate": 9.462971609344374e-07, "loss": 0.5172, "step": 10035 }, { "epoch": 0.1744511463783483, "grad_norm": 2.249973519081504, "learning_rate": 9.462844687973131e-07, "loss": 0.5641, "step": 10036 }, { "epoch": 0.17446852891585113, "grad_norm": 1.691240518701547, "learning_rate": 9.462717752456717e-07, "loss": 0.3482, "step": 10037 }, { "epoch": 0.17448591145335396, "grad_norm": 1.7154824463130454, "learning_rate": 9.462590802795541e-07, "loss": 0.25, "step": 10038 }, { "epoch": 0.1745032939908568, "grad_norm": 1.5984080317003269, "learning_rate": 9.462463838990001e-07, "loss": 0.4245, "step": 10039 }, { "epoch": 0.17452067652835962, "grad_norm": 1.675600774172473, "learning_rate": 9.4623368610405e-07, "loss": 0.3818, "step": 10040 }, { "epoch": 0.17453805906586242, "grad_norm": 2.823133309254445, "learning_rate": 9.46220986894744e-07, "loss": 0.7178, "step": 10041 }, { "epoch": 0.17455544160336525, "grad_norm": 1.4689201044374502, "learning_rate": 9.462082862711227e-07, "loss": 0.4058, "step": 10042 }, { "epoch": 0.17457282414086808, "grad_norm": 3.8823322823860327, "learning_rate": 9.461955842332259e-07, "loss": 0.5352, "step": 10043 }, { "epoch": 0.1745902066783709, "grad_norm": 1.487305997640227, "learning_rate": 9.461828807810942e-07, "loss": 0.234, "step": 10044 }, { "epoch": 0.17460758921587374, "grad_norm": 1.680091433962195, "learning_rate": 9.461701759147677e-07, "loss": 0.3118, "step": 10045 }, { "epoch": 0.17462497175337655, "grad_norm": 2.048707325466839, "learning_rate": 9.461574696342867e-07, "loss": 0.3252, "step": 10046 }, { "epoch": 0.17464235429087938, "grad_norm": 1.056870929331426, "learning_rate": 9.461447619396913e-07, "loss": 0.4243, "step": 10047 }, { "epoch": 0.1746597368283822, "grad_norm": 2.279359256203115, "learning_rate": 9.461320528310222e-07, "loss": 0.609, "step": 10048 }, { "epoch": 0.17467711936588504, "grad_norm": 1.840951869272976, "learning_rate": 9.461193423083194e-07, "loss": 0.399, "step": 10049 }, { "epoch": 0.17469450190338787, "grad_norm": 1.8606173076168055, "learning_rate": 9.461066303716229e-07, "loss": 0.4841, "step": 10050 }, { "epoch": 0.17471188444089067, "grad_norm": 1.855712691019817, "learning_rate": 9.460939170209736e-07, "loss": 0.3755, "step": 10051 }, { "epoch": 0.1747292669783935, "grad_norm": 2.0546848624362277, "learning_rate": 9.460812022564115e-07, "loss": 0.6441, "step": 10052 }, { "epoch": 0.17474664951589633, "grad_norm": 2.381427575496147, "learning_rate": 9.460684860779769e-07, "loss": 0.5208, "step": 10053 }, { "epoch": 0.17476403205339916, "grad_norm": 2.1337262121197256, "learning_rate": 9.4605576848571e-07, "loss": 0.3837, "step": 10054 }, { "epoch": 0.174781414590902, "grad_norm": 2.4420439602498423, "learning_rate": 9.460430494796512e-07, "loss": 0.4541, "step": 10055 }, { "epoch": 0.1747987971284048, "grad_norm": 1.6369930825084666, "learning_rate": 9.460303290598409e-07, "loss": 0.4684, "step": 10056 }, { "epoch": 0.17481617966590762, "grad_norm": 1.480894413472434, "learning_rate": 9.460176072263193e-07, "loss": 0.5152, "step": 10057 }, { "epoch": 0.17483356220341045, "grad_norm": 1.732072653127835, "learning_rate": 9.460048839791268e-07, "loss": 0.4471, "step": 10058 }, { "epoch": 0.17485094474091328, "grad_norm": 1.7197820210200039, "learning_rate": 9.459921593183038e-07, "loss": 0.2646, "step": 10059 }, { "epoch": 0.1748683272784161, "grad_norm": 3.497794840394503, "learning_rate": 9.459794332438904e-07, "loss": 0.6401, "step": 10060 }, { "epoch": 0.17488570981591892, "grad_norm": 2.0447971434914125, "learning_rate": 9.459667057559271e-07, "loss": 0.4318, "step": 10061 }, { "epoch": 0.17490309235342175, "grad_norm": 2.144256971257897, "learning_rate": 9.459539768544542e-07, "loss": 0.3318, "step": 10062 }, { "epoch": 0.17492047489092458, "grad_norm": 3.1200342969522747, "learning_rate": 9.459412465395121e-07, "loss": 0.6301, "step": 10063 }, { "epoch": 0.1749378574284274, "grad_norm": 1.9819202575625625, "learning_rate": 9.459285148111409e-07, "loss": 0.2898, "step": 10064 }, { "epoch": 0.17495523996593024, "grad_norm": 2.0276170937178835, "learning_rate": 9.459157816693813e-07, "loss": 0.2908, "step": 10065 }, { "epoch": 0.17497262250343304, "grad_norm": 1.8185325065862052, "learning_rate": 9.459030471142735e-07, "loss": 0.466, "step": 10066 }, { "epoch": 0.17499000504093587, "grad_norm": 1.594276273667886, "learning_rate": 9.458903111458579e-07, "loss": 0.3988, "step": 10067 }, { "epoch": 0.1750073875784387, "grad_norm": 1.5207087421503882, "learning_rate": 9.458775737641749e-07, "loss": 0.7242, "step": 10068 }, { "epoch": 0.17502477011594153, "grad_norm": 1.5770657162559734, "learning_rate": 9.458648349692646e-07, "loss": 0.4032, "step": 10069 }, { "epoch": 0.17504215265344436, "grad_norm": 2.0872407617858113, "learning_rate": 9.458520947611677e-07, "loss": 0.4315, "step": 10070 }, { "epoch": 0.17505953519094716, "grad_norm": 2.2234381167429715, "learning_rate": 9.458393531399245e-07, "loss": 0.5708, "step": 10071 }, { "epoch": 0.17507691772845, "grad_norm": 2.711853880761891, "learning_rate": 9.458266101055754e-07, "loss": 0.4406, "step": 10072 }, { "epoch": 0.17509430026595282, "grad_norm": 2.887438018051462, "learning_rate": 9.458138656581607e-07, "loss": 0.7655, "step": 10073 }, { "epoch": 0.17511168280345565, "grad_norm": 1.3811079579876304, "learning_rate": 9.458011197977207e-07, "loss": 0.342, "step": 10074 }, { "epoch": 0.17512906534095848, "grad_norm": 2.678256376415691, "learning_rate": 9.457883725242961e-07, "loss": 0.6646, "step": 10075 }, { "epoch": 0.1751464478784613, "grad_norm": 5.526032559648485, "learning_rate": 9.457756238379271e-07, "loss": 0.8086, "step": 10076 }, { "epoch": 0.17516383041596412, "grad_norm": 1.5674274790240954, "learning_rate": 9.45762873738654e-07, "loss": 0.3326, "step": 10077 }, { "epoch": 0.17518121295346695, "grad_norm": 1.041934817317812, "learning_rate": 9.457501222265176e-07, "loss": 0.3912, "step": 10078 }, { "epoch": 0.17519859549096978, "grad_norm": 1.525808163033054, "learning_rate": 9.457373693015578e-07, "loss": 0.4734, "step": 10079 }, { "epoch": 0.1752159780284726, "grad_norm": 1.9195758078702185, "learning_rate": 9.457246149638156e-07, "loss": 0.3261, "step": 10080 }, { "epoch": 0.1752333605659754, "grad_norm": 1.4230698095574772, "learning_rate": 9.457118592133309e-07, "loss": 0.4534, "step": 10081 }, { "epoch": 0.17525074310347824, "grad_norm": 1.932372702263566, "learning_rate": 9.456991020501444e-07, "loss": 0.5475, "step": 10082 }, { "epoch": 0.17526812564098107, "grad_norm": 1.4870593463777853, "learning_rate": 9.456863434742965e-07, "loss": 0.3508, "step": 10083 }, { "epoch": 0.1752855081784839, "grad_norm": 1.797584298931977, "learning_rate": 9.456735834858276e-07, "loss": 0.4219, "step": 10084 }, { "epoch": 0.17530289071598673, "grad_norm": 7.4053496974923405, "learning_rate": 9.456608220847781e-07, "loss": 0.4874, "step": 10085 }, { "epoch": 0.17532027325348953, "grad_norm": 2.2107679252483843, "learning_rate": 9.456480592711888e-07, "loss": 0.5141, "step": 10086 }, { "epoch": 0.17533765579099236, "grad_norm": 2.096685810103876, "learning_rate": 9.456352950450997e-07, "loss": 0.51, "step": 10087 }, { "epoch": 0.1753550383284952, "grad_norm": 1.9800934814123734, "learning_rate": 9.456225294065513e-07, "loss": 0.4963, "step": 10088 }, { "epoch": 0.17537242086599802, "grad_norm": 2.0195015500925075, "learning_rate": 9.456097623555843e-07, "loss": 0.4795, "step": 10089 }, { "epoch": 0.17538980340350085, "grad_norm": 2.090805009286529, "learning_rate": 9.45596993892239e-07, "loss": 0.3572, "step": 10090 }, { "epoch": 0.17540718594100366, "grad_norm": 1.473392124785762, "learning_rate": 9.455842240165559e-07, "loss": 0.4857, "step": 10091 }, { "epoch": 0.1754245684785065, "grad_norm": 1.4902626033711637, "learning_rate": 9.455714527285756e-07, "loss": 0.2942, "step": 10092 }, { "epoch": 0.17544195101600932, "grad_norm": 1.9711579632625855, "learning_rate": 9.455586800283384e-07, "loss": 0.289, "step": 10093 }, { "epoch": 0.17545933355351215, "grad_norm": 2.043879037853466, "learning_rate": 9.455459059158848e-07, "loss": 0.5773, "step": 10094 }, { "epoch": 0.17547671609101498, "grad_norm": 1.7364338134158352, "learning_rate": 9.455331303912555e-07, "loss": 0.3849, "step": 10095 }, { "epoch": 0.17549409862851778, "grad_norm": 2.0491529992517963, "learning_rate": 9.455203534544907e-07, "loss": 0.3574, "step": 10096 }, { "epoch": 0.1755114811660206, "grad_norm": 2.1519185571533836, "learning_rate": 9.455075751056312e-07, "loss": 0.4659, "step": 10097 }, { "epoch": 0.17552886370352344, "grad_norm": 2.126431501401272, "learning_rate": 9.454947953447172e-07, "loss": 0.3016, "step": 10098 }, { "epoch": 0.17554624624102627, "grad_norm": 1.7866709292501692, "learning_rate": 9.454820141717893e-07, "loss": 0.4222, "step": 10099 }, { "epoch": 0.1755636287785291, "grad_norm": 1.7160712949268198, "learning_rate": 9.454692315868881e-07, "loss": 0.5941, "step": 10100 }, { "epoch": 0.1755810113160319, "grad_norm": 2.6065250782758564, "learning_rate": 9.454564475900541e-07, "loss": 0.5148, "step": 10101 }, { "epoch": 0.17559839385353473, "grad_norm": 1.6826451366667257, "learning_rate": 9.454436621813278e-07, "loss": 0.4167, "step": 10102 }, { "epoch": 0.17561577639103756, "grad_norm": 1.557766278033532, "learning_rate": 9.454308753607496e-07, "loss": 0.3765, "step": 10103 }, { "epoch": 0.1756331589285404, "grad_norm": 1.7363420761383925, "learning_rate": 9.454180871283604e-07, "loss": 0.723, "step": 10104 }, { "epoch": 0.17565054146604323, "grad_norm": 3.0422586514456724, "learning_rate": 9.454052974842002e-07, "loss": 0.525, "step": 10105 }, { "epoch": 0.17566792400354603, "grad_norm": 2.899013625461808, "learning_rate": 9.4539250642831e-07, "loss": 0.3678, "step": 10106 }, { "epoch": 0.17568530654104886, "grad_norm": 1.8352460637086672, "learning_rate": 9.4537971396073e-07, "loss": 0.62, "step": 10107 }, { "epoch": 0.1757026890785517, "grad_norm": 1.7482705976776074, "learning_rate": 9.453669200815009e-07, "loss": 0.2585, "step": 10108 }, { "epoch": 0.17572007161605452, "grad_norm": 2.2374313419992373, "learning_rate": 9.453541247906633e-07, "loss": 0.514, "step": 10109 }, { "epoch": 0.17573745415355735, "grad_norm": 1.7253230766050822, "learning_rate": 9.453413280882577e-07, "loss": 0.8199, "step": 10110 }, { "epoch": 0.17575483669106015, "grad_norm": 1.8912326421567895, "learning_rate": 9.453285299743248e-07, "loss": 0.2993, "step": 10111 }, { "epoch": 0.17577221922856298, "grad_norm": 2.2478939773931166, "learning_rate": 9.453157304489048e-07, "loss": 0.3141, "step": 10112 }, { "epoch": 0.1757896017660658, "grad_norm": 1.5127774286534983, "learning_rate": 9.453029295120386e-07, "loss": 0.2729, "step": 10113 }, { "epoch": 0.17580698430356864, "grad_norm": 1.998570008292414, "learning_rate": 9.452901271637667e-07, "loss": 0.6608, "step": 10114 }, { "epoch": 0.17582436684107147, "grad_norm": 1.7371791941086743, "learning_rate": 9.452773234041296e-07, "loss": 0.5685, "step": 10115 }, { "epoch": 0.17584174937857427, "grad_norm": 1.3680709371593158, "learning_rate": 9.452645182331679e-07, "loss": 0.2328, "step": 10116 }, { "epoch": 0.1758591319160771, "grad_norm": 0.9621111061119622, "learning_rate": 9.452517116509222e-07, "loss": 0.284, "step": 10117 }, { "epoch": 0.17587651445357994, "grad_norm": 2.690847066833917, "learning_rate": 9.452389036574331e-07, "loss": 0.3618, "step": 10118 }, { "epoch": 0.17589389699108277, "grad_norm": 1.4056896661880585, "learning_rate": 9.452260942527414e-07, "loss": 0.415, "step": 10119 }, { "epoch": 0.1759112795285856, "grad_norm": 1.713434082544305, "learning_rate": 9.452132834368873e-07, "loss": 0.2973, "step": 10120 }, { "epoch": 0.1759286620660884, "grad_norm": 2.4991975077406656, "learning_rate": 9.452004712099117e-07, "loss": 0.6331, "step": 10121 }, { "epoch": 0.17594604460359123, "grad_norm": 1.5307431967188863, "learning_rate": 9.45187657571855e-07, "loss": 0.2132, "step": 10122 }, { "epoch": 0.17596342714109406, "grad_norm": 1.7810930491828194, "learning_rate": 9.45174842522758e-07, "loss": 0.3228, "step": 10123 }, { "epoch": 0.1759808096785969, "grad_norm": 1.3315616741264542, "learning_rate": 9.451620260626613e-07, "loss": 0.6638, "step": 10124 }, { "epoch": 0.17599819221609972, "grad_norm": 4.485534266838008, "learning_rate": 9.451492081916055e-07, "loss": 0.4559, "step": 10125 }, { "epoch": 0.17601557475360252, "grad_norm": 1.2094237893587378, "learning_rate": 9.451363889096311e-07, "loss": 0.2541, "step": 10126 }, { "epoch": 0.17603295729110535, "grad_norm": 1.803807861986331, "learning_rate": 9.451235682167788e-07, "loss": 0.4226, "step": 10127 }, { "epoch": 0.17605033982860818, "grad_norm": 2.7122332207726854, "learning_rate": 9.451107461130894e-07, "loss": 0.7622, "step": 10128 }, { "epoch": 0.176067722366111, "grad_norm": 3.090088330318605, "learning_rate": 9.450979225986032e-07, "loss": 0.4276, "step": 10129 }, { "epoch": 0.17608510490361384, "grad_norm": 2.582743116570423, "learning_rate": 9.450850976733611e-07, "loss": 0.9626, "step": 10130 }, { "epoch": 0.17610248744111665, "grad_norm": 1.5991428792120617, "learning_rate": 9.450722713374038e-07, "loss": 0.3295, "step": 10131 }, { "epoch": 0.17611986997861948, "grad_norm": 2.2097427895760586, "learning_rate": 9.450594435907717e-07, "loss": 0.7727, "step": 10132 }, { "epoch": 0.1761372525161223, "grad_norm": 2.245485457435935, "learning_rate": 9.450466144335059e-07, "loss": 0.5132, "step": 10133 }, { "epoch": 0.17615463505362514, "grad_norm": 1.9541705967857763, "learning_rate": 9.450337838656465e-07, "loss": 0.3935, "step": 10134 }, { "epoch": 0.17617201759112797, "grad_norm": 1.9036859359607639, "learning_rate": 9.450209518872345e-07, "loss": 0.3702, "step": 10135 }, { "epoch": 0.17618940012863077, "grad_norm": 1.8937647438795635, "learning_rate": 9.450081184983105e-07, "loss": 0.5049, "step": 10136 }, { "epoch": 0.1762067826661336, "grad_norm": 1.8732319097294727, "learning_rate": 9.449952836989153e-07, "loss": 0.6139, "step": 10137 }, { "epoch": 0.17622416520363643, "grad_norm": 2.8419795018203833, "learning_rate": 9.449824474890893e-07, "loss": 0.342, "step": 10138 }, { "epoch": 0.17624154774113926, "grad_norm": 1.5802148709628878, "learning_rate": 9.449696098688734e-07, "loss": 0.4157, "step": 10139 }, { "epoch": 0.1762589302786421, "grad_norm": 1.7473783406077992, "learning_rate": 9.449567708383082e-07, "loss": 0.1889, "step": 10140 }, { "epoch": 0.1762763128161449, "grad_norm": 1.4301551771575804, "learning_rate": 9.449439303974346e-07, "loss": 0.2728, "step": 10141 }, { "epoch": 0.17629369535364772, "grad_norm": 1.8744682085242437, "learning_rate": 9.44931088546293e-07, "loss": 0.4201, "step": 10142 }, { "epoch": 0.17631107789115055, "grad_norm": 2.3758939330862883, "learning_rate": 9.449182452849243e-07, "loss": 0.6857, "step": 10143 }, { "epoch": 0.17632846042865338, "grad_norm": 5.08198421799213, "learning_rate": 9.44905400613369e-07, "loss": 0.8469, "step": 10144 }, { "epoch": 0.1763458429661562, "grad_norm": 2.568404418913926, "learning_rate": 9.448925545316681e-07, "loss": 0.3346, "step": 10145 }, { "epoch": 0.17636322550365902, "grad_norm": 2.5631101939325966, "learning_rate": 9.448797070398622e-07, "loss": 0.3807, "step": 10146 }, { "epoch": 0.17638060804116185, "grad_norm": 2.0830651657439603, "learning_rate": 9.448668581379918e-07, "loss": 0.3278, "step": 10147 }, { "epoch": 0.17639799057866468, "grad_norm": 1.920480351241631, "learning_rate": 9.44854007826098e-07, "loss": 0.4845, "step": 10148 }, { "epoch": 0.1764153731161675, "grad_norm": 3.019949273457238, "learning_rate": 9.448411561042212e-07, "loss": 0.5439, "step": 10149 }, { "epoch": 0.1764327556536703, "grad_norm": 2.3017613182961427, "learning_rate": 9.448283029724024e-07, "loss": 0.4835, "step": 10150 }, { "epoch": 0.17645013819117314, "grad_norm": 1.1182348371736615, "learning_rate": 9.44815448430682e-07, "loss": 0.4024, "step": 10151 }, { "epoch": 0.17646752072867597, "grad_norm": 1.8230358391745218, "learning_rate": 9.448025924791012e-07, "loss": 0.4431, "step": 10152 }, { "epoch": 0.1764849032661788, "grad_norm": 1.80688248185566, "learning_rate": 9.447897351177006e-07, "loss": 0.3819, "step": 10153 }, { "epoch": 0.17650228580368163, "grad_norm": 2.6820729155916325, "learning_rate": 9.447768763465206e-07, "loss": 0.5413, "step": 10154 }, { "epoch": 0.17651966834118443, "grad_norm": 1.5129833874689307, "learning_rate": 9.447640161656023e-07, "loss": 0.4824, "step": 10155 }, { "epoch": 0.17653705087868726, "grad_norm": 3.9218300067865277, "learning_rate": 9.447511545749864e-07, "loss": 0.6846, "step": 10156 }, { "epoch": 0.1765544334161901, "grad_norm": 3.8827854389211414, "learning_rate": 9.447382915747136e-07, "loss": 0.6226, "step": 10157 }, { "epoch": 0.17657181595369292, "grad_norm": 2.3504883238681296, "learning_rate": 9.447254271648247e-07, "loss": 0.6461, "step": 10158 }, { "epoch": 0.17658919849119575, "grad_norm": 2.3580814901388885, "learning_rate": 9.447125613453606e-07, "loss": 0.2903, "step": 10159 }, { "epoch": 0.17660658102869856, "grad_norm": 2.497118134395991, "learning_rate": 9.446996941163619e-07, "loss": 0.5649, "step": 10160 }, { "epoch": 0.1766239635662014, "grad_norm": 2.227567389904141, "learning_rate": 9.446868254778694e-07, "loss": 0.4206, "step": 10161 }, { "epoch": 0.17664134610370422, "grad_norm": 2.469139046098276, "learning_rate": 9.446739554299239e-07, "loss": 0.3814, "step": 10162 }, { "epoch": 0.17665872864120705, "grad_norm": 2.230737275362325, "learning_rate": 9.446610839725664e-07, "loss": 0.4169, "step": 10163 }, { "epoch": 0.17667611117870988, "grad_norm": 1.8863608536425354, "learning_rate": 9.446482111058374e-07, "loss": 0.6421, "step": 10164 }, { "epoch": 0.17669349371621268, "grad_norm": 1.7586181736102215, "learning_rate": 9.446353368297778e-07, "loss": 0.5447, "step": 10165 }, { "epoch": 0.1767108762537155, "grad_norm": 1.4862052408451456, "learning_rate": 9.446224611444284e-07, "loss": 0.3306, "step": 10166 }, { "epoch": 0.17672825879121834, "grad_norm": 2.5219736846813725, "learning_rate": 9.446095840498303e-07, "loss": 0.4144, "step": 10167 }, { "epoch": 0.17674564132872117, "grad_norm": 2.333273635197446, "learning_rate": 9.445967055460237e-07, "loss": 0.6129, "step": 10168 }, { "epoch": 0.176763023866224, "grad_norm": 2.6521087929433595, "learning_rate": 9.445838256330501e-07, "loss": 0.7442, "step": 10169 }, { "epoch": 0.1767804064037268, "grad_norm": 2.0391927935704723, "learning_rate": 9.445709443109498e-07, "loss": 0.6028, "step": 10170 }, { "epoch": 0.17679778894122963, "grad_norm": 2.7598375770404253, "learning_rate": 9.445580615797639e-07, "loss": 0.513, "step": 10171 }, { "epoch": 0.17681517147873246, "grad_norm": 1.5795936389016856, "learning_rate": 9.445451774395331e-07, "loss": 0.3502, "step": 10172 }, { "epoch": 0.1768325540162353, "grad_norm": 2.620974627733938, "learning_rate": 9.445322918902984e-07, "loss": 0.4319, "step": 10173 }, { "epoch": 0.17684993655373812, "grad_norm": 1.5972580460001504, "learning_rate": 9.445194049321004e-07, "loss": 0.3864, "step": 10174 }, { "epoch": 0.17686731909124093, "grad_norm": 2.444995699540513, "learning_rate": 9.445065165649801e-07, "loss": 0.4678, "step": 10175 }, { "epoch": 0.17688470162874376, "grad_norm": 1.6999697370866564, "learning_rate": 9.444936267889785e-07, "loss": 0.4153, "step": 10176 }, { "epoch": 0.1769020841662466, "grad_norm": 1.2313081433876452, "learning_rate": 9.444807356041361e-07, "loss": 0.314, "step": 10177 }, { "epoch": 0.17691946670374942, "grad_norm": 1.458537411195367, "learning_rate": 9.444678430104939e-07, "loss": 0.2839, "step": 10178 }, { "epoch": 0.17693684924125225, "grad_norm": 1.5540079446507964, "learning_rate": 9.44454949008093e-07, "loss": 0.3289, "step": 10179 }, { "epoch": 0.17695423177875505, "grad_norm": 1.339406114751326, "learning_rate": 9.444420535969741e-07, "loss": 0.3112, "step": 10180 }, { "epoch": 0.17697161431625788, "grad_norm": 4.149107713090213, "learning_rate": 9.444291567771778e-07, "loss": 0.4514, "step": 10181 }, { "epoch": 0.1769889968537607, "grad_norm": 1.5880346723749927, "learning_rate": 9.444162585487454e-07, "loss": 0.7511, "step": 10182 }, { "epoch": 0.17700637939126354, "grad_norm": 1.9954094365655415, "learning_rate": 9.444033589117176e-07, "loss": 0.6161, "step": 10183 }, { "epoch": 0.17702376192876637, "grad_norm": 2.2798391622072947, "learning_rate": 9.443904578661353e-07, "loss": 0.4291, "step": 10184 }, { "epoch": 0.17704114446626917, "grad_norm": 1.7100829554791233, "learning_rate": 9.443775554120394e-07, "loss": 0.3331, "step": 10185 }, { "epoch": 0.177058527003772, "grad_norm": 1.7849661082545962, "learning_rate": 9.443646515494708e-07, "loss": 0.6182, "step": 10186 }, { "epoch": 0.17707590954127483, "grad_norm": 1.7679643361591102, "learning_rate": 9.443517462784702e-07, "loss": 0.3378, "step": 10187 }, { "epoch": 0.17709329207877766, "grad_norm": 2.529042666035732, "learning_rate": 9.443388395990787e-07, "loss": 0.4602, "step": 10188 }, { "epoch": 0.1771106746162805, "grad_norm": 2.017128718697331, "learning_rate": 9.443259315113374e-07, "loss": 0.7745, "step": 10189 }, { "epoch": 0.1771280571537833, "grad_norm": 2.0962241976228917, "learning_rate": 9.44313022015287e-07, "loss": 0.4525, "step": 10190 }, { "epoch": 0.17714543969128613, "grad_norm": 1.765839195317008, "learning_rate": 9.443001111109683e-07, "loss": 0.2927, "step": 10191 }, { "epoch": 0.17716282222878896, "grad_norm": 2.030423420150676, "learning_rate": 9.442871987984222e-07, "loss": 0.6376, "step": 10192 }, { "epoch": 0.1771802047662918, "grad_norm": 1.5609686706717314, "learning_rate": 9.442742850776901e-07, "loss": 0.4489, "step": 10193 }, { "epoch": 0.17719758730379462, "grad_norm": 1.236761506070408, "learning_rate": 9.442613699488124e-07, "loss": 0.274, "step": 10194 }, { "epoch": 0.17721496984129742, "grad_norm": 4.909422968481724, "learning_rate": 9.442484534118303e-07, "loss": 0.4496, "step": 10195 }, { "epoch": 0.17723235237880025, "grad_norm": 1.8062732021343884, "learning_rate": 9.442355354667846e-07, "loss": 0.5761, "step": 10196 }, { "epoch": 0.17724973491630308, "grad_norm": 2.1360699149279396, "learning_rate": 9.442226161137164e-07, "loss": 0.6297, "step": 10197 }, { "epoch": 0.1772671174538059, "grad_norm": 1.8238726117093746, "learning_rate": 9.442096953526665e-07, "loss": 0.4081, "step": 10198 }, { "epoch": 0.17728449999130874, "grad_norm": 3.5044330285166994, "learning_rate": 9.44196773183676e-07, "loss": 0.3933, "step": 10199 }, { "epoch": 0.17730188252881154, "grad_norm": 1.5885551634874377, "learning_rate": 9.441838496067856e-07, "loss": 0.5069, "step": 10200 }, { "epoch": 0.17731926506631437, "grad_norm": 1.922659541514068, "learning_rate": 9.441709246220366e-07, "loss": 0.4406, "step": 10201 }, { "epoch": 0.1773366476038172, "grad_norm": 2.60428783337379, "learning_rate": 9.441579982294697e-07, "loss": 0.2812, "step": 10202 }, { "epoch": 0.17735403014132003, "grad_norm": 3.836622526164741, "learning_rate": 9.441450704291261e-07, "loss": 0.4281, "step": 10203 }, { "epoch": 0.17737141267882287, "grad_norm": 2.15060247836781, "learning_rate": 9.441321412210466e-07, "loss": 0.3818, "step": 10204 }, { "epoch": 0.17738879521632567, "grad_norm": 2.41012274577851, "learning_rate": 9.441192106052722e-07, "loss": 0.5224, "step": 10205 }, { "epoch": 0.1774061777538285, "grad_norm": 1.151111013515796, "learning_rate": 9.441062785818439e-07, "loss": 0.235, "step": 10206 }, { "epoch": 0.17742356029133133, "grad_norm": 1.4618249801701682, "learning_rate": 9.440933451508028e-07, "loss": 0.4994, "step": 10207 }, { "epoch": 0.17744094282883416, "grad_norm": 1.6443013750791695, "learning_rate": 9.440804103121896e-07, "loss": 0.4251, "step": 10208 }, { "epoch": 0.177458325366337, "grad_norm": 2.145169999836667, "learning_rate": 9.440674740660457e-07, "loss": 0.4519, "step": 10209 }, { "epoch": 0.1774757079038398, "grad_norm": 2.7570401043784423, "learning_rate": 9.440545364124117e-07, "loss": 0.6025, "step": 10210 }, { "epoch": 0.17749309044134262, "grad_norm": 1.5761909394343194, "learning_rate": 9.440415973513289e-07, "loss": 0.2953, "step": 10211 }, { "epoch": 0.17751047297884545, "grad_norm": 1.3946797190930824, "learning_rate": 9.440286568828384e-07, "loss": 0.5555, "step": 10212 }, { "epoch": 0.17752785551634828, "grad_norm": 1.7923999696492128, "learning_rate": 9.440157150069807e-07, "loss": 0.533, "step": 10213 }, { "epoch": 0.1775452380538511, "grad_norm": 1.6752053456600338, "learning_rate": 9.440027717237974e-07, "loss": 0.3563, "step": 10214 }, { "epoch": 0.17756262059135391, "grad_norm": 1.8028261333849622, "learning_rate": 9.439898270333292e-07, "loss": 0.2341, "step": 10215 }, { "epoch": 0.17758000312885674, "grad_norm": 1.2928648372261238, "learning_rate": 9.439768809356172e-07, "loss": 0.5687, "step": 10216 }, { "epoch": 0.17759738566635958, "grad_norm": 1.9169932281902025, "learning_rate": 9.439639334307024e-07, "loss": 0.3117, "step": 10217 }, { "epoch": 0.1776147682038624, "grad_norm": 3.9501316794509678, "learning_rate": 9.439509845186258e-07, "loss": 0.3464, "step": 10218 }, { "epoch": 0.17763215074136524, "grad_norm": 1.6871039341624847, "learning_rate": 9.439380341994286e-07, "loss": 0.526, "step": 10219 }, { "epoch": 0.17764953327886804, "grad_norm": 1.5924481688034846, "learning_rate": 9.439250824731518e-07, "loss": 0.3506, "step": 10220 }, { "epoch": 0.17766691581637087, "grad_norm": 2.238722637259536, "learning_rate": 9.439121293398363e-07, "loss": 0.3531, "step": 10221 }, { "epoch": 0.1776842983538737, "grad_norm": 1.427375832943762, "learning_rate": 9.438991747995234e-07, "loss": 0.4728, "step": 10222 }, { "epoch": 0.17770168089137653, "grad_norm": 2.033411032152599, "learning_rate": 9.438862188522539e-07, "loss": 0.3872, "step": 10223 }, { "epoch": 0.17771906342887936, "grad_norm": 1.8816027401005342, "learning_rate": 9.438732614980691e-07, "loss": 0.6386, "step": 10224 }, { "epoch": 0.17773644596638216, "grad_norm": 1.3910729491711624, "learning_rate": 9.438603027370099e-07, "loss": 0.3373, "step": 10225 }, { "epoch": 0.177753828503885, "grad_norm": 1.3652138086811134, "learning_rate": 9.438473425691175e-07, "loss": 0.4614, "step": 10226 }, { "epoch": 0.17777121104138782, "grad_norm": 1.676956502449481, "learning_rate": 9.438343809944328e-07, "loss": 0.2157, "step": 10227 }, { "epoch": 0.17778859357889065, "grad_norm": 2.6521819529846837, "learning_rate": 9.43821418012997e-07, "loss": 0.5967, "step": 10228 }, { "epoch": 0.17780597611639348, "grad_norm": 1.6853717370145465, "learning_rate": 9.438084536248513e-07, "loss": 0.4819, "step": 10229 }, { "epoch": 0.17782335865389629, "grad_norm": 2.0256347975566045, "learning_rate": 9.437954878300366e-07, "loss": 0.4516, "step": 10230 }, { "epoch": 0.17784074119139912, "grad_norm": 1.1244457058240378, "learning_rate": 9.437825206285941e-07, "loss": 0.2176, "step": 10231 }, { "epoch": 0.17785812372890195, "grad_norm": 1.9317736092870155, "learning_rate": 9.437695520205649e-07, "loss": 0.5227, "step": 10232 }, { "epoch": 0.17787550626640478, "grad_norm": 2.8094852147861764, "learning_rate": 9.437565820059898e-07, "loss": 0.4525, "step": 10233 }, { "epoch": 0.1778928888039076, "grad_norm": 2.421947596814532, "learning_rate": 9.437436105849105e-07, "loss": 0.3391, "step": 10234 }, { "epoch": 0.1779102713414104, "grad_norm": 1.271377309536498, "learning_rate": 9.437306377573678e-07, "loss": 0.3996, "step": 10235 }, { "epoch": 0.17792765387891324, "grad_norm": 1.9450889221318681, "learning_rate": 9.437176635234025e-07, "loss": 0.2346, "step": 10236 }, { "epoch": 0.17794503641641607, "grad_norm": 0.966395335637776, "learning_rate": 9.437046878830563e-07, "loss": 0.2562, "step": 10237 }, { "epoch": 0.1779624189539189, "grad_norm": 2.329568657624544, "learning_rate": 9.4369171083637e-07, "loss": 0.7454, "step": 10238 }, { "epoch": 0.17797980149142173, "grad_norm": 2.087096010886311, "learning_rate": 9.436787323833848e-07, "loss": 0.2453, "step": 10239 }, { "epoch": 0.17799718402892453, "grad_norm": 3.6358724446312, "learning_rate": 9.436657525241417e-07, "loss": 0.4993, "step": 10240 }, { "epoch": 0.17801456656642736, "grad_norm": 4.31703532394184, "learning_rate": 9.436527712586821e-07, "loss": 0.3813, "step": 10241 }, { "epoch": 0.1780319491039302, "grad_norm": 1.3670115483022387, "learning_rate": 9.43639788587047e-07, "loss": 0.33, "step": 10242 }, { "epoch": 0.17804933164143302, "grad_norm": 2.320625557016005, "learning_rate": 9.436268045092775e-07, "loss": 0.6197, "step": 10243 }, { "epoch": 0.17806671417893585, "grad_norm": 2.565942739210882, "learning_rate": 9.436138190254148e-07, "loss": 0.3997, "step": 10244 }, { "epoch": 0.17808409671643866, "grad_norm": 2.308849716565272, "learning_rate": 9.436008321355002e-07, "loss": 0.4049, "step": 10245 }, { "epoch": 0.17810147925394149, "grad_norm": 1.6680517948428029, "learning_rate": 9.435878438395747e-07, "loss": 0.2506, "step": 10246 }, { "epoch": 0.17811886179144432, "grad_norm": 1.4379817862948965, "learning_rate": 9.435748541376795e-07, "loss": 0.5145, "step": 10247 }, { "epoch": 0.17813624432894715, "grad_norm": 2.3028187423022497, "learning_rate": 9.435618630298557e-07, "loss": 0.2726, "step": 10248 }, { "epoch": 0.17815362686644998, "grad_norm": 3.4070733671696294, "learning_rate": 9.435488705161445e-07, "loss": 0.3579, "step": 10249 }, { "epoch": 0.17817100940395278, "grad_norm": 2.8783017734961103, "learning_rate": 9.435358765965873e-07, "loss": 0.4814, "step": 10250 }, { "epoch": 0.1781883919414556, "grad_norm": 2.108138439786575, "learning_rate": 9.435228812712251e-07, "loss": 0.3162, "step": 10251 }, { "epoch": 0.17820577447895844, "grad_norm": 1.3474992396553085, "learning_rate": 9.43509884540099e-07, "loss": 0.3267, "step": 10252 }, { "epoch": 0.17822315701646127, "grad_norm": 1.76536530068285, "learning_rate": 9.434968864032504e-07, "loss": 0.237, "step": 10253 }, { "epoch": 0.1782405395539641, "grad_norm": 1.3734681808509364, "learning_rate": 9.434838868607202e-07, "loss": 0.361, "step": 10254 }, { "epoch": 0.1782579220914669, "grad_norm": 1.8689959087786825, "learning_rate": 9.4347088591255e-07, "loss": 0.3769, "step": 10255 }, { "epoch": 0.17827530462896973, "grad_norm": 1.929783227637249, "learning_rate": 9.434578835587807e-07, "loss": 0.6674, "step": 10256 }, { "epoch": 0.17829268716647256, "grad_norm": 1.4279055482392338, "learning_rate": 9.434448797994536e-07, "loss": 0.3451, "step": 10257 }, { "epoch": 0.1783100697039754, "grad_norm": 1.7834551866232624, "learning_rate": 9.434318746346102e-07, "loss": 0.3393, "step": 10258 }, { "epoch": 0.17832745224147822, "grad_norm": 1.721301545501321, "learning_rate": 9.434188680642912e-07, "loss": 0.2743, "step": 10259 }, { "epoch": 0.17834483477898103, "grad_norm": 1.4935614865168423, "learning_rate": 9.43405860088538e-07, "loss": 0.4008, "step": 10260 }, { "epoch": 0.17836221731648386, "grad_norm": 1.9916950868036412, "learning_rate": 9.433928507073921e-07, "loss": 0.714, "step": 10261 }, { "epoch": 0.1783795998539867, "grad_norm": 1.8553210802527746, "learning_rate": 9.433798399208945e-07, "loss": 0.4696, "step": 10262 }, { "epoch": 0.17839698239148952, "grad_norm": 1.1818023336599384, "learning_rate": 9.433668277290865e-07, "loss": 0.4382, "step": 10263 }, { "epoch": 0.17841436492899235, "grad_norm": 1.602596041682983, "learning_rate": 9.433538141320092e-07, "loss": 0.3218, "step": 10264 }, { "epoch": 0.17843174746649515, "grad_norm": 2.162527763112436, "learning_rate": 9.433407991297039e-07, "loss": 0.3649, "step": 10265 }, { "epoch": 0.17844913000399798, "grad_norm": 2.7110352494255787, "learning_rate": 9.433277827222122e-07, "loss": 0.3939, "step": 10266 }, { "epoch": 0.1784665125415008, "grad_norm": 3.1929273581115307, "learning_rate": 9.433147649095749e-07, "loss": 0.3053, "step": 10267 }, { "epoch": 0.17848389507900364, "grad_norm": 1.8210528225906877, "learning_rate": 9.433017456918335e-07, "loss": 0.3609, "step": 10268 }, { "epoch": 0.17850127761650647, "grad_norm": 2.3564042593444636, "learning_rate": 9.432887250690293e-07, "loss": 0.5528, "step": 10269 }, { "epoch": 0.17851866015400927, "grad_norm": 1.6006766064192908, "learning_rate": 9.432757030412033e-07, "loss": 0.5689, "step": 10270 }, { "epoch": 0.1785360426915121, "grad_norm": 2.180150000497027, "learning_rate": 9.432626796083969e-07, "loss": 0.4066, "step": 10271 }, { "epoch": 0.17855342522901493, "grad_norm": 1.5755298673121716, "learning_rate": 9.432496547706517e-07, "loss": 0.6476, "step": 10272 }, { "epoch": 0.17857080776651776, "grad_norm": 1.645206505899327, "learning_rate": 9.432366285280085e-07, "loss": 0.537, "step": 10273 }, { "epoch": 0.1785881903040206, "grad_norm": 1.5251315200912512, "learning_rate": 9.432236008805088e-07, "loss": 0.429, "step": 10274 }, { "epoch": 0.1786055728415234, "grad_norm": 1.5277184528773848, "learning_rate": 9.432105718281938e-07, "loss": 0.2226, "step": 10275 }, { "epoch": 0.17862295537902623, "grad_norm": 1.613166598921935, "learning_rate": 9.43197541371105e-07, "loss": 0.6779, "step": 10276 }, { "epoch": 0.17864033791652906, "grad_norm": 1.4813055693281076, "learning_rate": 9.431845095092836e-07, "loss": 0.414, "step": 10277 }, { "epoch": 0.1786577204540319, "grad_norm": 2.4005710956499553, "learning_rate": 9.43171476242771e-07, "loss": 0.3682, "step": 10278 }, { "epoch": 0.17867510299153472, "grad_norm": 1.5959146982461117, "learning_rate": 9.431584415716081e-07, "loss": 0.2456, "step": 10279 }, { "epoch": 0.17869248552903752, "grad_norm": 1.8365952798377843, "learning_rate": 9.431454054958367e-07, "loss": 0.3402, "step": 10280 }, { "epoch": 0.17870986806654035, "grad_norm": 5.852086853416015, "learning_rate": 9.431323680154979e-07, "loss": 0.3877, "step": 10281 }, { "epoch": 0.17872725060404318, "grad_norm": 1.571458026814075, "learning_rate": 9.43119329130633e-07, "loss": 0.4003, "step": 10282 }, { "epoch": 0.178744633141546, "grad_norm": 1.2703460031476101, "learning_rate": 9.431062888412836e-07, "loss": 0.3046, "step": 10283 }, { "epoch": 0.17876201567904884, "grad_norm": 2.1827104011005174, "learning_rate": 9.430932471474904e-07, "loss": 0.35, "step": 10284 }, { "epoch": 0.17877939821655164, "grad_norm": 2.097458642333022, "learning_rate": 9.430802040492954e-07, "loss": 0.5314, "step": 10285 }, { "epoch": 0.17879678075405447, "grad_norm": 1.8915816656095947, "learning_rate": 9.430671595467397e-07, "loss": 0.4618, "step": 10286 }, { "epoch": 0.1788141632915573, "grad_norm": 1.8414806770660248, "learning_rate": 9.430541136398647e-07, "loss": 0.3499, "step": 10287 }, { "epoch": 0.17883154582906013, "grad_norm": 1.3313968503328668, "learning_rate": 9.430410663287115e-07, "loss": 0.5982, "step": 10288 }, { "epoch": 0.17884892836656294, "grad_norm": 1.9536021854944274, "learning_rate": 9.430280176133217e-07, "loss": 0.5236, "step": 10289 }, { "epoch": 0.17886631090406577, "grad_norm": 1.010948087476025, "learning_rate": 9.430149674937367e-07, "loss": 0.3801, "step": 10290 }, { "epoch": 0.1788836934415686, "grad_norm": 2.5085547463586857, "learning_rate": 9.430019159699975e-07, "loss": 0.4476, "step": 10291 }, { "epoch": 0.17890107597907143, "grad_norm": 1.950191136600323, "learning_rate": 9.429888630421461e-07, "loss": 0.3106, "step": 10292 }, { "epoch": 0.17891845851657426, "grad_norm": 1.9883652612871634, "learning_rate": 9.429758087102232e-07, "loss": 0.2688, "step": 10293 }, { "epoch": 0.17893584105407706, "grad_norm": 1.902538064213359, "learning_rate": 9.429627529742705e-07, "loss": 0.3481, "step": 10294 }, { "epoch": 0.1789532235915799, "grad_norm": 1.34663472839255, "learning_rate": 9.429496958343295e-07, "loss": 0.6244, "step": 10295 }, { "epoch": 0.17897060612908272, "grad_norm": 1.2785708696542841, "learning_rate": 9.429366372904414e-07, "loss": 0.6842, "step": 10296 }, { "epoch": 0.17898798866658555, "grad_norm": 1.8215486264934295, "learning_rate": 9.429235773426476e-07, "loss": 0.5248, "step": 10297 }, { "epoch": 0.17900537120408838, "grad_norm": 2.0860333708005485, "learning_rate": 9.429105159909895e-07, "loss": 0.2685, "step": 10298 }, { "epoch": 0.17902275374159118, "grad_norm": 1.7812128836666872, "learning_rate": 9.428974532355085e-07, "loss": 0.4176, "step": 10299 }, { "epoch": 0.17904013627909401, "grad_norm": 2.4286964025677475, "learning_rate": 9.428843890762461e-07, "loss": 0.2995, "step": 10300 }, { "epoch": 0.17905751881659684, "grad_norm": 2.295755901474454, "learning_rate": 9.428713235132435e-07, "loss": 0.3185, "step": 10301 }, { "epoch": 0.17907490135409967, "grad_norm": 2.261671631874758, "learning_rate": 9.428582565465424e-07, "loss": 0.6604, "step": 10302 }, { "epoch": 0.1790922838916025, "grad_norm": 2.020012977333305, "learning_rate": 9.428451881761839e-07, "loss": 0.2651, "step": 10303 }, { "epoch": 0.1791096664291053, "grad_norm": 1.995683918674025, "learning_rate": 9.428321184022097e-07, "loss": 0.375, "step": 10304 }, { "epoch": 0.17912704896660814, "grad_norm": 1.6771664902353194, "learning_rate": 9.428190472246611e-07, "loss": 0.4412, "step": 10305 }, { "epoch": 0.17914443150411097, "grad_norm": 1.5571595938540737, "learning_rate": 9.428059746435795e-07, "loss": 0.244, "step": 10306 }, { "epoch": 0.1791618140416138, "grad_norm": 1.873024997859781, "learning_rate": 9.427929006590063e-07, "loss": 0.5479, "step": 10307 }, { "epoch": 0.17917919657911663, "grad_norm": 2.5401723038597344, "learning_rate": 9.427798252709831e-07, "loss": 0.3028, "step": 10308 }, { "epoch": 0.17919657911661943, "grad_norm": 2.114425559695692, "learning_rate": 9.427667484795513e-07, "loss": 0.3186, "step": 10309 }, { "epoch": 0.17921396165412226, "grad_norm": 1.8324315974182104, "learning_rate": 9.427536702847523e-07, "loss": 0.4495, "step": 10310 }, { "epoch": 0.1792313441916251, "grad_norm": 1.9447360996701903, "learning_rate": 9.427405906866274e-07, "loss": 0.2932, "step": 10311 }, { "epoch": 0.17924872672912792, "grad_norm": 2.2562404298173124, "learning_rate": 9.427275096852184e-07, "loss": 0.705, "step": 10312 }, { "epoch": 0.17926610926663075, "grad_norm": 1.7329363731836218, "learning_rate": 9.427144272805663e-07, "loss": 0.3351, "step": 10313 }, { "epoch": 0.17928349180413355, "grad_norm": 1.8343930801856134, "learning_rate": 9.42701343472713e-07, "loss": 0.4385, "step": 10314 }, { "epoch": 0.17930087434163638, "grad_norm": 2.3805780059982347, "learning_rate": 9.426882582616999e-07, "loss": 0.3313, "step": 10315 }, { "epoch": 0.17931825687913922, "grad_norm": 2.8975120431453734, "learning_rate": 9.426751716475683e-07, "loss": 0.3863, "step": 10316 }, { "epoch": 0.17933563941664205, "grad_norm": 1.414478826344438, "learning_rate": 9.426620836303596e-07, "loss": 0.3696, "step": 10317 }, { "epoch": 0.17935302195414488, "grad_norm": 1.1160288976265282, "learning_rate": 9.426489942101156e-07, "loss": 0.5457, "step": 10318 }, { "epoch": 0.17937040449164768, "grad_norm": 3.7069393934305777, "learning_rate": 9.426359033868776e-07, "loss": 0.3429, "step": 10319 }, { "epoch": 0.1793877870291505, "grad_norm": 1.7672169603090744, "learning_rate": 9.426228111606871e-07, "loss": 0.5334, "step": 10320 }, { "epoch": 0.17940516956665334, "grad_norm": 1.8145186364131174, "learning_rate": 9.426097175315855e-07, "loss": 0.4133, "step": 10321 }, { "epoch": 0.17942255210415617, "grad_norm": 0.9563699315708417, "learning_rate": 9.425966224996145e-07, "loss": 0.3052, "step": 10322 }, { "epoch": 0.179439934641659, "grad_norm": 1.8967014159206315, "learning_rate": 9.425835260648156e-07, "loss": 0.5484, "step": 10323 }, { "epoch": 0.1794573171791618, "grad_norm": 1.6281014028885654, "learning_rate": 9.425704282272302e-07, "loss": 0.3841, "step": 10324 }, { "epoch": 0.17947469971666463, "grad_norm": 1.3459103788249052, "learning_rate": 9.425573289868998e-07, "loss": 0.3857, "step": 10325 }, { "epoch": 0.17949208225416746, "grad_norm": 1.9886720205861614, "learning_rate": 9.425442283438661e-07, "loss": 0.5011, "step": 10326 }, { "epoch": 0.1795094647916703, "grad_norm": 1.9258655479339697, "learning_rate": 9.425311262981702e-07, "loss": 0.6068, "step": 10327 }, { "epoch": 0.17952684732917312, "grad_norm": 1.9758623544534202, "learning_rate": 9.425180228498541e-07, "loss": 0.4899, "step": 10328 }, { "epoch": 0.17954422986667593, "grad_norm": 1.636189388649418, "learning_rate": 9.42504917998959e-07, "loss": 0.4555, "step": 10329 }, { "epoch": 0.17956161240417876, "grad_norm": 1.1835347365960451, "learning_rate": 9.424918117455268e-07, "loss": 0.2393, "step": 10330 }, { "epoch": 0.17957899494168159, "grad_norm": 1.297972495786539, "learning_rate": 9.424787040895987e-07, "loss": 0.3642, "step": 10331 }, { "epoch": 0.17959637747918442, "grad_norm": 4.518414467352313, "learning_rate": 9.424655950312163e-07, "loss": 0.6804, "step": 10332 }, { "epoch": 0.17961376001668725, "grad_norm": 2.1000594223451405, "learning_rate": 9.424524845704214e-07, "loss": 0.7276, "step": 10333 }, { "epoch": 0.17963114255419005, "grad_norm": 1.6920851711213174, "learning_rate": 9.424393727072552e-07, "loss": 0.5588, "step": 10334 }, { "epoch": 0.17964852509169288, "grad_norm": 3.035468709325353, "learning_rate": 9.424262594417596e-07, "loss": 0.8049, "step": 10335 }, { "epoch": 0.1796659076291957, "grad_norm": 2.332409838186863, "learning_rate": 9.424131447739758e-07, "loss": 0.3105, "step": 10336 }, { "epoch": 0.17968329016669854, "grad_norm": 3.456056321100125, "learning_rate": 9.424000287039458e-07, "loss": 0.6658, "step": 10337 }, { "epoch": 0.17970067270420137, "grad_norm": 1.8664401205651986, "learning_rate": 9.423869112317107e-07, "loss": 0.5266, "step": 10338 }, { "epoch": 0.17971805524170417, "grad_norm": 1.7184345256726645, "learning_rate": 9.423737923573123e-07, "loss": 0.3049, "step": 10339 }, { "epoch": 0.179735437779207, "grad_norm": 2.1192798576485146, "learning_rate": 9.423606720807924e-07, "loss": 0.3432, "step": 10340 }, { "epoch": 0.17975282031670983, "grad_norm": 2.145707570832838, "learning_rate": 9.423475504021922e-07, "loss": 0.4782, "step": 10341 }, { "epoch": 0.17977020285421266, "grad_norm": 2.7517470547658744, "learning_rate": 9.423344273215534e-07, "loss": 0.5911, "step": 10342 }, { "epoch": 0.1797875853917155, "grad_norm": 1.6607530460855857, "learning_rate": 9.423213028389179e-07, "loss": 0.4191, "step": 10343 }, { "epoch": 0.1798049679292183, "grad_norm": 4.105270622604034, "learning_rate": 9.423081769543269e-07, "loss": 0.4169, "step": 10344 }, { "epoch": 0.17982235046672113, "grad_norm": 1.6591731486551198, "learning_rate": 9.422950496678222e-07, "loss": 0.3168, "step": 10345 }, { "epoch": 0.17983973300422396, "grad_norm": 1.4416942588579555, "learning_rate": 9.422819209794453e-07, "loss": 0.4082, "step": 10346 }, { "epoch": 0.1798571155417268, "grad_norm": 1.3100889058854541, "learning_rate": 9.42268790889238e-07, "loss": 0.2876, "step": 10347 }, { "epoch": 0.17987449807922962, "grad_norm": 1.631289588281926, "learning_rate": 9.422556593972416e-07, "loss": 0.3855, "step": 10348 }, { "epoch": 0.17989188061673242, "grad_norm": 2.028140074973397, "learning_rate": 9.422425265034981e-07, "loss": 0.2746, "step": 10349 }, { "epoch": 0.17990926315423525, "grad_norm": 2.3081163302470378, "learning_rate": 9.422293922080488e-07, "loss": 0.3168, "step": 10350 }, { "epoch": 0.17992664569173808, "grad_norm": 1.9956505108640412, "learning_rate": 9.422162565109356e-07, "loss": 0.3431, "step": 10351 }, { "epoch": 0.1799440282292409, "grad_norm": 2.9765048136561423, "learning_rate": 9.422031194121999e-07, "loss": 0.3482, "step": 10352 }, { "epoch": 0.17996141076674374, "grad_norm": 2.4519440386105678, "learning_rate": 9.421899809118835e-07, "loss": 0.5606, "step": 10353 }, { "epoch": 0.17997879330424654, "grad_norm": 1.9718460837285514, "learning_rate": 9.42176841010028e-07, "loss": 0.4095, "step": 10354 }, { "epoch": 0.17999617584174937, "grad_norm": 2.568743338827833, "learning_rate": 9.421636997066751e-07, "loss": 0.3396, "step": 10355 }, { "epoch": 0.1800135583792522, "grad_norm": 2.313402315971651, "learning_rate": 9.421505570018662e-07, "loss": 0.5379, "step": 10356 }, { "epoch": 0.18003094091675503, "grad_norm": 1.4261556466106415, "learning_rate": 9.421374128956432e-07, "loss": 0.394, "step": 10357 }, { "epoch": 0.18004832345425786, "grad_norm": 1.3902871067992315, "learning_rate": 9.421242673880478e-07, "loss": 0.4004, "step": 10358 }, { "epoch": 0.18006570599176067, "grad_norm": 1.765746654529804, "learning_rate": 9.421111204791215e-07, "loss": 0.3119, "step": 10359 }, { "epoch": 0.1800830885292635, "grad_norm": 3.028382728738794, "learning_rate": 9.420979721689061e-07, "loss": 0.5157, "step": 10360 }, { "epoch": 0.18010047106676633, "grad_norm": 1.6500169747731783, "learning_rate": 9.420848224574432e-07, "loss": 0.3294, "step": 10361 }, { "epoch": 0.18011785360426916, "grad_norm": 1.9078021181981952, "learning_rate": 9.420716713447744e-07, "loss": 0.6339, "step": 10362 }, { "epoch": 0.180135236141772, "grad_norm": 1.672645002875546, "learning_rate": 9.420585188309416e-07, "loss": 0.3322, "step": 10363 }, { "epoch": 0.1801526186792748, "grad_norm": 1.7107580784727339, "learning_rate": 9.420453649159862e-07, "loss": 0.3533, "step": 10364 }, { "epoch": 0.18017000121677762, "grad_norm": 1.7905769518945012, "learning_rate": 9.420322095999501e-07, "loss": 0.3465, "step": 10365 }, { "epoch": 0.18018738375428045, "grad_norm": 1.6978107691896522, "learning_rate": 9.42019052882875e-07, "loss": 0.3086, "step": 10366 }, { "epoch": 0.18020476629178328, "grad_norm": 0.9197439221411644, "learning_rate": 9.420058947648025e-07, "loss": 0.2794, "step": 10367 }, { "epoch": 0.1802221488292861, "grad_norm": 10.205713636850135, "learning_rate": 9.419927352457744e-07, "loss": 0.6981, "step": 10368 }, { "epoch": 0.1802395313667889, "grad_norm": 1.2312958260386138, "learning_rate": 9.419795743258324e-07, "loss": 0.3173, "step": 10369 }, { "epoch": 0.18025691390429174, "grad_norm": 2.3548335334583097, "learning_rate": 9.419664120050181e-07, "loss": 0.3408, "step": 10370 }, { "epoch": 0.18027429644179457, "grad_norm": 0.9155192918092423, "learning_rate": 9.419532482833734e-07, "loss": 0.5765, "step": 10371 }, { "epoch": 0.1802916789792974, "grad_norm": 2.966103372133294, "learning_rate": 9.419400831609397e-07, "loss": 0.6062, "step": 10372 }, { "epoch": 0.18030906151680023, "grad_norm": 1.8229261778843846, "learning_rate": 9.419269166377591e-07, "loss": 0.3359, "step": 10373 }, { "epoch": 0.18032644405430304, "grad_norm": 1.575249173087797, "learning_rate": 9.419137487138732e-07, "loss": 0.2999, "step": 10374 }, { "epoch": 0.18034382659180587, "grad_norm": 1.6970485142426228, "learning_rate": 9.419005793893238e-07, "loss": 0.5647, "step": 10375 }, { "epoch": 0.1803612091293087, "grad_norm": 1.3845483362133735, "learning_rate": 9.418874086641524e-07, "loss": 0.408, "step": 10376 }, { "epoch": 0.18037859166681153, "grad_norm": 2.491301607798431, "learning_rate": 9.418742365384009e-07, "loss": 0.4592, "step": 10377 }, { "epoch": 0.18039597420431436, "grad_norm": 2.3955262643918944, "learning_rate": 9.418610630121111e-07, "loss": 0.4382, "step": 10378 }, { "epoch": 0.18041335674181716, "grad_norm": 1.6062858131106434, "learning_rate": 9.418478880853247e-07, "loss": 0.4293, "step": 10379 }, { "epoch": 0.18043073927932, "grad_norm": 1.7968923479126044, "learning_rate": 9.418347117580834e-07, "loss": 0.6852, "step": 10380 }, { "epoch": 0.18044812181682282, "grad_norm": 3.4880571785475296, "learning_rate": 9.418215340304292e-07, "loss": 0.2873, "step": 10381 }, { "epoch": 0.18046550435432565, "grad_norm": 2.4843316229031727, "learning_rate": 9.418083549024035e-07, "loss": 0.461, "step": 10382 }, { "epoch": 0.18048288689182848, "grad_norm": 1.0941320910169292, "learning_rate": 9.417951743740485e-07, "loss": 0.409, "step": 10383 }, { "epoch": 0.18050026942933128, "grad_norm": 2.2068890092999403, "learning_rate": 9.417819924454054e-07, "loss": 0.6024, "step": 10384 }, { "epoch": 0.18051765196683411, "grad_norm": 2.3228224872788052, "learning_rate": 9.417688091165165e-07, "loss": 0.4323, "step": 10385 }, { "epoch": 0.18053503450433694, "grad_norm": 1.5022560183999059, "learning_rate": 9.417556243874234e-07, "loss": 0.2697, "step": 10386 }, { "epoch": 0.18055241704183977, "grad_norm": 1.3066685551141404, "learning_rate": 9.41742438258168e-07, "loss": 0.423, "step": 10387 }, { "epoch": 0.1805697995793426, "grad_norm": 3.1233443226020645, "learning_rate": 9.41729250728792e-07, "loss": 0.4366, "step": 10388 }, { "epoch": 0.1805871821168454, "grad_norm": 3.720290670312143, "learning_rate": 9.417160617993369e-07, "loss": 0.5074, "step": 10389 }, { "epoch": 0.18060456465434824, "grad_norm": 2.1547964934073063, "learning_rate": 9.417028714698451e-07, "loss": 0.3912, "step": 10390 }, { "epoch": 0.18062194719185107, "grad_norm": 1.5686380375309654, "learning_rate": 9.41689679740358e-07, "loss": 0.3502, "step": 10391 }, { "epoch": 0.1806393297293539, "grad_norm": 1.6990878671663885, "learning_rate": 9.416764866109175e-07, "loss": 0.316, "step": 10392 }, { "epoch": 0.18065671226685673, "grad_norm": 2.0514551353682178, "learning_rate": 9.416632920815654e-07, "loss": 0.2938, "step": 10393 }, { "epoch": 0.18067409480435953, "grad_norm": 2.545540841864648, "learning_rate": 9.416500961523435e-07, "loss": 0.3926, "step": 10394 }, { "epoch": 0.18069147734186236, "grad_norm": 1.5684065096132211, "learning_rate": 9.416368988232938e-07, "loss": 0.4722, "step": 10395 }, { "epoch": 0.1807088598793652, "grad_norm": 1.5449019569316278, "learning_rate": 9.41623700094458e-07, "loss": 0.434, "step": 10396 }, { "epoch": 0.18072624241686802, "grad_norm": 3.6302273242181924, "learning_rate": 9.416104999658778e-07, "loss": 0.4341, "step": 10397 }, { "epoch": 0.18074362495437085, "grad_norm": 2.5624603147463856, "learning_rate": 9.415972984375953e-07, "loss": 0.537, "step": 10398 }, { "epoch": 0.18076100749187365, "grad_norm": 2.4645057219726914, "learning_rate": 9.415840955096521e-07, "loss": 0.4851, "step": 10399 }, { "epoch": 0.18077839002937648, "grad_norm": 1.6535092302491636, "learning_rate": 9.415708911820904e-07, "loss": 0.3222, "step": 10400 }, { "epoch": 0.18079577256687931, "grad_norm": 1.9230897132082607, "learning_rate": 9.415576854549515e-07, "loss": 0.573, "step": 10401 }, { "epoch": 0.18081315510438214, "grad_norm": 1.6731325788587872, "learning_rate": 9.415444783282778e-07, "loss": 0.5026, "step": 10402 }, { "epoch": 0.18083053764188498, "grad_norm": 1.591267645442379, "learning_rate": 9.415312698021107e-07, "loss": 0.4398, "step": 10403 }, { "epoch": 0.18084792017938778, "grad_norm": 1.6417018332882845, "learning_rate": 9.415180598764925e-07, "loss": 0.3624, "step": 10404 }, { "epoch": 0.1808653027168906, "grad_norm": 1.736269095048367, "learning_rate": 9.415048485514648e-07, "loss": 0.5372, "step": 10405 }, { "epoch": 0.18088268525439344, "grad_norm": 1.604147335657894, "learning_rate": 9.414916358270697e-07, "loss": 0.3619, "step": 10406 }, { "epoch": 0.18090006779189627, "grad_norm": 2.007838850184586, "learning_rate": 9.414784217033487e-07, "loss": 0.4185, "step": 10407 }, { "epoch": 0.1809174503293991, "grad_norm": 4.6101241071236885, "learning_rate": 9.41465206180344e-07, "loss": 0.3232, "step": 10408 }, { "epoch": 0.1809348328669019, "grad_norm": 2.2695144764903543, "learning_rate": 9.414519892580974e-07, "loss": 0.4006, "step": 10409 }, { "epoch": 0.18095221540440473, "grad_norm": 2.0241313127372913, "learning_rate": 9.414387709366507e-07, "loss": 0.2929, "step": 10410 }, { "epoch": 0.18096959794190756, "grad_norm": 2.1339138918338336, "learning_rate": 9.41425551216046e-07, "loss": 0.4241, "step": 10411 }, { "epoch": 0.1809869804794104, "grad_norm": 2.450597877601687, "learning_rate": 9.41412330096325e-07, "loss": 0.4093, "step": 10412 }, { "epoch": 0.18100436301691322, "grad_norm": 2.1793980048900243, "learning_rate": 9.413991075775297e-07, "loss": 0.2161, "step": 10413 }, { "epoch": 0.18102174555441602, "grad_norm": 1.4420185457450097, "learning_rate": 9.413858836597019e-07, "loss": 0.3268, "step": 10414 }, { "epoch": 0.18103912809191886, "grad_norm": 2.1533527679011635, "learning_rate": 9.413726583428836e-07, "loss": 0.3916, "step": 10415 }, { "epoch": 0.18105651062942169, "grad_norm": 1.43954249976792, "learning_rate": 9.413594316271169e-07, "loss": 0.3065, "step": 10416 }, { "epoch": 0.18107389316692452, "grad_norm": 1.8006824969777604, "learning_rate": 9.413462035124436e-07, "loss": 0.5236, "step": 10417 }, { "epoch": 0.18109127570442735, "grad_norm": 1.2593638859846716, "learning_rate": 9.413329739989053e-07, "loss": 0.3459, "step": 10418 }, { "epoch": 0.18110865824193015, "grad_norm": 2.7439390710804923, "learning_rate": 9.413197430865442e-07, "loss": 0.5246, "step": 10419 }, { "epoch": 0.18112604077943298, "grad_norm": 7.394577710661388, "learning_rate": 9.413065107754024e-07, "loss": 0.642, "step": 10420 }, { "epoch": 0.1811434233169358, "grad_norm": 1.2353888904368109, "learning_rate": 9.412932770655218e-07, "loss": 0.4893, "step": 10421 }, { "epoch": 0.18116080585443864, "grad_norm": 0.9906044533567077, "learning_rate": 9.41280041956944e-07, "loss": 0.2599, "step": 10422 }, { "epoch": 0.18117818839194147, "grad_norm": 2.0669203618176764, "learning_rate": 9.412668054497112e-07, "loss": 0.4562, "step": 10423 }, { "epoch": 0.18119557092944427, "grad_norm": 1.610370751689484, "learning_rate": 9.412535675438655e-07, "loss": 0.3829, "step": 10424 }, { "epoch": 0.1812129534669471, "grad_norm": 1.6600044949954427, "learning_rate": 9.412403282394485e-07, "loss": 0.3235, "step": 10425 }, { "epoch": 0.18123033600444993, "grad_norm": 1.498522324870617, "learning_rate": 9.412270875365024e-07, "loss": 0.5037, "step": 10426 }, { "epoch": 0.18124771854195276, "grad_norm": 2.5734193406335586, "learning_rate": 9.412138454350691e-07, "loss": 0.5839, "step": 10427 }, { "epoch": 0.1812651010794556, "grad_norm": 1.9818435356689632, "learning_rate": 9.412006019351906e-07, "loss": 0.4551, "step": 10428 }, { "epoch": 0.1812824836169584, "grad_norm": 1.9657644074301364, "learning_rate": 9.411873570369089e-07, "loss": 0.4711, "step": 10429 }, { "epoch": 0.18129986615446123, "grad_norm": 1.5463636195049903, "learning_rate": 9.41174110740266e-07, "loss": 0.3474, "step": 10430 }, { "epoch": 0.18131724869196406, "grad_norm": 1.8338262373690541, "learning_rate": 9.411608630453037e-07, "loss": 0.395, "step": 10431 }, { "epoch": 0.18133463122946689, "grad_norm": 1.16693836829867, "learning_rate": 9.411476139520642e-07, "loss": 0.2182, "step": 10432 }, { "epoch": 0.1813520137669697, "grad_norm": 1.7726856524937984, "learning_rate": 9.411343634605893e-07, "loss": 0.5249, "step": 10433 }, { "epoch": 0.18136939630447252, "grad_norm": 1.3662903190165134, "learning_rate": 9.411211115709212e-07, "loss": 0.3868, "step": 10434 }, { "epoch": 0.18138677884197535, "grad_norm": 1.7439998403433015, "learning_rate": 9.411078582831018e-07, "loss": 0.3955, "step": 10435 }, { "epoch": 0.18140416137947818, "grad_norm": 1.3505118622605181, "learning_rate": 9.410946035971731e-07, "loss": 0.1664, "step": 10436 }, { "epoch": 0.181421543916981, "grad_norm": 2.0438375859100852, "learning_rate": 9.410813475131771e-07, "loss": 0.4011, "step": 10437 }, { "epoch": 0.1814389264544838, "grad_norm": 4.0800813415890405, "learning_rate": 9.410680900311559e-07, "loss": 0.7359, "step": 10438 }, { "epoch": 0.18145630899198664, "grad_norm": 1.6669155928268808, "learning_rate": 9.410548311511515e-07, "loss": 0.3663, "step": 10439 }, { "epoch": 0.18147369152948947, "grad_norm": 1.8059467867838417, "learning_rate": 9.410415708732058e-07, "loss": 0.4104, "step": 10440 }, { "epoch": 0.1814910740669923, "grad_norm": 1.4673748406143545, "learning_rate": 9.41028309197361e-07, "loss": 0.6049, "step": 10441 }, { "epoch": 0.18150845660449513, "grad_norm": 1.7590733306521678, "learning_rate": 9.41015046123659e-07, "loss": 0.5626, "step": 10442 }, { "epoch": 0.18152583914199794, "grad_norm": 1.1141559548636162, "learning_rate": 9.410017816521419e-07, "loss": 0.6806, "step": 10443 }, { "epoch": 0.18154322167950077, "grad_norm": 1.2261960784143642, "learning_rate": 9.409885157828518e-07, "loss": 0.4906, "step": 10444 }, { "epoch": 0.1815606042170036, "grad_norm": 3.8306324544871986, "learning_rate": 9.409752485158305e-07, "loss": 0.9242, "step": 10445 }, { "epoch": 0.18157798675450643, "grad_norm": 1.9333121744805242, "learning_rate": 9.409619798511203e-07, "loss": 0.4058, "step": 10446 }, { "epoch": 0.18159536929200926, "grad_norm": 1.7256411206495155, "learning_rate": 9.409487097887634e-07, "loss": 0.3112, "step": 10447 }, { "epoch": 0.18161275182951206, "grad_norm": 2.4917145081698684, "learning_rate": 9.409354383288014e-07, "loss": 0.3652, "step": 10448 }, { "epoch": 0.1816301343670149, "grad_norm": 2.272823571190787, "learning_rate": 9.409221654712766e-07, "loss": 0.4802, "step": 10449 }, { "epoch": 0.18164751690451772, "grad_norm": 3.4378485517104114, "learning_rate": 9.409088912162313e-07, "loss": 0.5865, "step": 10450 }, { "epoch": 0.18166489944202055, "grad_norm": 2.796723271166048, "learning_rate": 9.408956155637071e-07, "loss": 0.4556, "step": 10451 }, { "epoch": 0.18168228197952338, "grad_norm": 2.055033585934786, "learning_rate": 9.408823385137465e-07, "loss": 0.4265, "step": 10452 }, { "epoch": 0.18169966451702618, "grad_norm": 1.5899184389215935, "learning_rate": 9.408690600663914e-07, "loss": 0.4579, "step": 10453 }, { "epoch": 0.181717047054529, "grad_norm": 1.3717226487470269, "learning_rate": 9.408557802216837e-07, "loss": 0.2824, "step": 10454 }, { "epoch": 0.18173442959203184, "grad_norm": 2.9863323766349357, "learning_rate": 9.40842498979666e-07, "loss": 0.3353, "step": 10455 }, { "epoch": 0.18175181212953467, "grad_norm": 0.9131948102640187, "learning_rate": 9.408292163403798e-07, "loss": 0.3004, "step": 10456 }, { "epoch": 0.1817691946670375, "grad_norm": 2.0587117660437992, "learning_rate": 9.408159323038676e-07, "loss": 0.6265, "step": 10457 }, { "epoch": 0.1817865772045403, "grad_norm": 4.898338458282794, "learning_rate": 9.408026468701715e-07, "loss": 0.4091, "step": 10458 }, { "epoch": 0.18180395974204314, "grad_norm": 1.069058276870015, "learning_rate": 9.407893600393332e-07, "loss": 0.5608, "step": 10459 }, { "epoch": 0.18182134227954597, "grad_norm": 1.8910029542285485, "learning_rate": 9.407760718113955e-07, "loss": 0.3721, "step": 10460 }, { "epoch": 0.1818387248170488, "grad_norm": 0.9442844379705208, "learning_rate": 9.407627821863999e-07, "loss": 0.2376, "step": 10461 }, { "epoch": 0.18185610735455163, "grad_norm": 2.097429609753519, "learning_rate": 9.407494911643888e-07, "loss": 0.5242, "step": 10462 }, { "epoch": 0.18187348989205443, "grad_norm": 1.3571410833782602, "learning_rate": 9.407361987454042e-07, "loss": 0.3976, "step": 10463 }, { "epoch": 0.18189087242955726, "grad_norm": 1.961536565163421, "learning_rate": 9.407229049294883e-07, "loss": 0.4035, "step": 10464 }, { "epoch": 0.1819082549670601, "grad_norm": 3.217473515718446, "learning_rate": 9.407096097166833e-07, "loss": 0.4601, "step": 10465 }, { "epoch": 0.18192563750456292, "grad_norm": 1.4778608576868375, "learning_rate": 9.406963131070313e-07, "loss": 0.5387, "step": 10466 }, { "epoch": 0.18194302004206575, "grad_norm": 1.6417771423177472, "learning_rate": 9.406830151005744e-07, "loss": 0.3616, "step": 10467 }, { "epoch": 0.18196040257956855, "grad_norm": 2.3045101720914594, "learning_rate": 9.406697156973549e-07, "loss": 0.4765, "step": 10468 }, { "epoch": 0.18197778511707138, "grad_norm": 1.345184426752037, "learning_rate": 9.406564148974148e-07, "loss": 0.268, "step": 10469 }, { "epoch": 0.1819951676545742, "grad_norm": 1.7717907566592277, "learning_rate": 9.406431127007961e-07, "loss": 0.3972, "step": 10470 }, { "epoch": 0.18201255019207704, "grad_norm": 1.9888121360425501, "learning_rate": 9.406298091075413e-07, "loss": 0.3901, "step": 10471 }, { "epoch": 0.18202993272957987, "grad_norm": 1.4791482196753067, "learning_rate": 9.406165041176924e-07, "loss": 0.2525, "step": 10472 }, { "epoch": 0.18204731526708268, "grad_norm": 1.9953396448052507, "learning_rate": 9.406031977312916e-07, "loss": 0.458, "step": 10473 }, { "epoch": 0.1820646978045855, "grad_norm": 2.071878066997956, "learning_rate": 9.40589889948381e-07, "loss": 0.679, "step": 10474 }, { "epoch": 0.18208208034208834, "grad_norm": 1.590774511113333, "learning_rate": 9.405765807690028e-07, "loss": 0.4638, "step": 10475 }, { "epoch": 0.18209946287959117, "grad_norm": 1.84504593200448, "learning_rate": 9.405632701931993e-07, "loss": 0.3971, "step": 10476 }, { "epoch": 0.182116845417094, "grad_norm": 2.0004769433561327, "learning_rate": 9.405499582210126e-07, "loss": 0.3901, "step": 10477 }, { "epoch": 0.1821342279545968, "grad_norm": 2.65855104099671, "learning_rate": 9.405366448524849e-07, "loss": 0.2848, "step": 10478 }, { "epoch": 0.18215161049209963, "grad_norm": 1.5262347967178806, "learning_rate": 9.405233300876583e-07, "loss": 0.656, "step": 10479 }, { "epoch": 0.18216899302960246, "grad_norm": 1.8294715732047762, "learning_rate": 9.405100139265752e-07, "loss": 0.3583, "step": 10480 }, { "epoch": 0.1821863755671053, "grad_norm": 5.55813457669548, "learning_rate": 9.404966963692778e-07, "loss": 0.4635, "step": 10481 }, { "epoch": 0.18220375810460812, "grad_norm": 1.8129258679013163, "learning_rate": 9.40483377415808e-07, "loss": 0.3709, "step": 10482 }, { "epoch": 0.18222114064211092, "grad_norm": 1.6233354817794268, "learning_rate": 9.404700570662085e-07, "loss": 0.5594, "step": 10483 }, { "epoch": 0.18223852317961375, "grad_norm": 1.7803676946035103, "learning_rate": 9.404567353205211e-07, "loss": 0.4748, "step": 10484 }, { "epoch": 0.18225590571711658, "grad_norm": 1.9064265752279888, "learning_rate": 9.404434121787882e-07, "loss": 0.5148, "step": 10485 }, { "epoch": 0.18227328825461941, "grad_norm": 1.4880200943355, "learning_rate": 9.40430087641052e-07, "loss": 0.5413, "step": 10486 }, { "epoch": 0.18229067079212224, "grad_norm": 1.8907411528473521, "learning_rate": 9.404167617073547e-07, "loss": 0.5381, "step": 10487 }, { "epoch": 0.18230805332962505, "grad_norm": 3.1594688277445346, "learning_rate": 9.404034343777387e-07, "loss": 0.3352, "step": 10488 }, { "epoch": 0.18232543586712788, "grad_norm": 1.7861109082524085, "learning_rate": 9.40390105652246e-07, "loss": 0.6175, "step": 10489 }, { "epoch": 0.1823428184046307, "grad_norm": 2.2490364775269263, "learning_rate": 9.403767755309189e-07, "loss": 0.4714, "step": 10490 }, { "epoch": 0.18236020094213354, "grad_norm": 2.034747338968151, "learning_rate": 9.403634440138e-07, "loss": 0.4766, "step": 10491 }, { "epoch": 0.18237758347963637, "grad_norm": 2.3957730095781686, "learning_rate": 9.40350111100931e-07, "loss": 0.2244, "step": 10492 }, { "epoch": 0.18239496601713917, "grad_norm": 2.130048923115039, "learning_rate": 9.403367767923544e-07, "loss": 0.2202, "step": 10493 }, { "epoch": 0.182412348554642, "grad_norm": 2.4087058590169974, "learning_rate": 9.403234410881126e-07, "loss": 0.4698, "step": 10494 }, { "epoch": 0.18242973109214483, "grad_norm": 2.0799244194045143, "learning_rate": 9.40310103988248e-07, "loss": 0.2541, "step": 10495 }, { "epoch": 0.18244711362964766, "grad_norm": 2.334121706842099, "learning_rate": 9.402967654928022e-07, "loss": 0.4451, "step": 10496 }, { "epoch": 0.1824644961671505, "grad_norm": 2.075539487693418, "learning_rate": 9.402834256018182e-07, "loss": 0.4543, "step": 10497 }, { "epoch": 0.1824818787046533, "grad_norm": 2.3188064818180893, "learning_rate": 9.402700843153379e-07, "loss": 0.4345, "step": 10498 }, { "epoch": 0.18249926124215612, "grad_norm": 1.1188578099522462, "learning_rate": 9.402567416334037e-07, "loss": 0.2737, "step": 10499 }, { "epoch": 0.18251664377965895, "grad_norm": 1.2871260182523918, "learning_rate": 9.402433975560577e-07, "loss": 0.3023, "step": 10500 }, { "epoch": 0.18253402631716178, "grad_norm": 2.3681794570325714, "learning_rate": 9.402300520833426e-07, "loss": 0.5945, "step": 10501 }, { "epoch": 0.18255140885466462, "grad_norm": 2.2375035913126298, "learning_rate": 9.402167052153003e-07, "loss": 0.4369, "step": 10502 }, { "epoch": 0.18256879139216742, "grad_norm": 1.555306800776558, "learning_rate": 9.402033569519734e-07, "loss": 0.3247, "step": 10503 }, { "epoch": 0.18258617392967025, "grad_norm": 1.8961960233367339, "learning_rate": 9.401900072934039e-07, "loss": 0.3705, "step": 10504 }, { "epoch": 0.18260355646717308, "grad_norm": 1.4829177325913545, "learning_rate": 9.401766562396344e-07, "loss": 0.2909, "step": 10505 }, { "epoch": 0.1826209390046759, "grad_norm": 1.9429980135100355, "learning_rate": 9.40163303790707e-07, "loss": 0.5649, "step": 10506 }, { "epoch": 0.18263832154217874, "grad_norm": 1.736873943626842, "learning_rate": 9.401499499466643e-07, "loss": 0.5132, "step": 10507 }, { "epoch": 0.18265570407968154, "grad_norm": 1.6668788396916845, "learning_rate": 9.401365947075483e-07, "loss": 0.2395, "step": 10508 }, { "epoch": 0.18267308661718437, "grad_norm": 1.518987031962988, "learning_rate": 9.401232380734017e-07, "loss": 0.3724, "step": 10509 }, { "epoch": 0.1826904691546872, "grad_norm": 2.013760056370889, "learning_rate": 9.401098800442664e-07, "loss": 0.3117, "step": 10510 }, { "epoch": 0.18270785169219003, "grad_norm": 1.9393490752987297, "learning_rate": 9.40096520620185e-07, "loss": 0.3673, "step": 10511 }, { "epoch": 0.18272523422969286, "grad_norm": 1.5175746297893544, "learning_rate": 9.400831598011997e-07, "loss": 0.4555, "step": 10512 }, { "epoch": 0.18274261676719566, "grad_norm": 2.2582735620248027, "learning_rate": 9.40069797587353e-07, "loss": 0.5102, "step": 10513 }, { "epoch": 0.1827599993046985, "grad_norm": 1.5062954217818834, "learning_rate": 9.400564339786872e-07, "loss": 0.2576, "step": 10514 }, { "epoch": 0.18277738184220133, "grad_norm": 3.384457294974534, "learning_rate": 9.400430689752448e-07, "loss": 0.2041, "step": 10515 }, { "epoch": 0.18279476437970416, "grad_norm": 1.1589736247732318, "learning_rate": 9.400297025770678e-07, "loss": 0.209, "step": 10516 }, { "epoch": 0.18281214691720699, "grad_norm": 1.776337302443857, "learning_rate": 9.400163347841988e-07, "loss": 0.3415, "step": 10517 }, { "epoch": 0.1828295294547098, "grad_norm": 2.497705719025696, "learning_rate": 9.400029655966802e-07, "loss": 0.5436, "step": 10518 }, { "epoch": 0.18284691199221262, "grad_norm": 2.414717325176647, "learning_rate": 9.399895950145543e-07, "loss": 0.5251, "step": 10519 }, { "epoch": 0.18286429452971545, "grad_norm": 1.3118137904622145, "learning_rate": 9.399762230378636e-07, "loss": 0.4017, "step": 10520 }, { "epoch": 0.18288167706721828, "grad_norm": 1.9743464634848156, "learning_rate": 9.399628496666503e-07, "loss": 0.5173, "step": 10521 }, { "epoch": 0.1828990596047211, "grad_norm": 2.253757125550566, "learning_rate": 9.399494749009568e-07, "loss": 0.2672, "step": 10522 }, { "epoch": 0.1829164421422239, "grad_norm": 2.008591947889888, "learning_rate": 9.399360987408256e-07, "loss": 0.4246, "step": 10523 }, { "epoch": 0.18293382467972674, "grad_norm": 1.720904181870157, "learning_rate": 9.399227211862991e-07, "loss": 0.5243, "step": 10524 }, { "epoch": 0.18295120721722957, "grad_norm": 3.2415501797766546, "learning_rate": 9.399093422374196e-07, "loss": 0.4681, "step": 10525 }, { "epoch": 0.1829685897547324, "grad_norm": 2.1492749327945426, "learning_rate": 9.398959618942297e-07, "loss": 0.4119, "step": 10526 }, { "epoch": 0.18298597229223523, "grad_norm": 1.5226921752834512, "learning_rate": 9.398825801567716e-07, "loss": 0.4014, "step": 10527 }, { "epoch": 0.18300335482973804, "grad_norm": 4.998186981892907, "learning_rate": 9.398691970250877e-07, "loss": 0.385, "step": 10528 }, { "epoch": 0.18302073736724087, "grad_norm": 2.1302992133529473, "learning_rate": 9.398558124992207e-07, "loss": 0.3339, "step": 10529 }, { "epoch": 0.1830381199047437, "grad_norm": 1.648111052698226, "learning_rate": 9.398424265792125e-07, "loss": 0.4233, "step": 10530 }, { "epoch": 0.18305550244224653, "grad_norm": 2.0003130219937173, "learning_rate": 9.39829039265106e-07, "loss": 0.5327, "step": 10531 }, { "epoch": 0.18307288497974936, "grad_norm": 1.6853817677239251, "learning_rate": 9.398156505569436e-07, "loss": 0.2918, "step": 10532 }, { "epoch": 0.18309026751725216, "grad_norm": 2.317546697538034, "learning_rate": 9.398022604547675e-07, "loss": 0.455, "step": 10533 }, { "epoch": 0.183107650054755, "grad_norm": 1.4008768615603255, "learning_rate": 9.397888689586204e-07, "loss": 0.3666, "step": 10534 }, { "epoch": 0.18312503259225782, "grad_norm": 2.353572580241452, "learning_rate": 9.397754760685446e-07, "loss": 0.4464, "step": 10535 }, { "epoch": 0.18314241512976065, "grad_norm": 1.8597703714641807, "learning_rate": 9.397620817845823e-07, "loss": 0.543, "step": 10536 }, { "epoch": 0.18315979766726348, "grad_norm": 1.6060804469219112, "learning_rate": 9.397486861067765e-07, "loss": 0.3647, "step": 10537 }, { "epoch": 0.18317718020476628, "grad_norm": 1.6306957291945978, "learning_rate": 9.397352890351693e-07, "loss": 0.4145, "step": 10538 }, { "epoch": 0.1831945627422691, "grad_norm": 0.956806972160791, "learning_rate": 9.397218905698033e-07, "loss": 0.2991, "step": 10539 }, { "epoch": 0.18321194527977194, "grad_norm": 3.804240937137323, "learning_rate": 9.397084907107206e-07, "loss": 0.5753, "step": 10540 }, { "epoch": 0.18322932781727477, "grad_norm": 1.5646471861878941, "learning_rate": 9.396950894579643e-07, "loss": 0.4615, "step": 10541 }, { "epoch": 0.1832467103547776, "grad_norm": 1.479643222853917, "learning_rate": 9.396816868115764e-07, "loss": 0.243, "step": 10542 }, { "epoch": 0.1832640928922804, "grad_norm": 1.685877236999462, "learning_rate": 9.396682827715995e-07, "loss": 0.2373, "step": 10543 }, { "epoch": 0.18328147542978324, "grad_norm": 4.985110281411013, "learning_rate": 9.396548773380761e-07, "loss": 0.5419, "step": 10544 }, { "epoch": 0.18329885796728607, "grad_norm": 1.8846700712784967, "learning_rate": 9.396414705110488e-07, "loss": 0.5994, "step": 10545 }, { "epoch": 0.1833162405047889, "grad_norm": 2.148526026274236, "learning_rate": 9.3962806229056e-07, "loss": 0.3915, "step": 10546 }, { "epoch": 0.18333362304229173, "grad_norm": 1.642477861168994, "learning_rate": 9.39614652676652e-07, "loss": 0.3362, "step": 10547 }, { "epoch": 0.18335100557979453, "grad_norm": 2.441180650273237, "learning_rate": 9.396012416693677e-07, "loss": 0.4994, "step": 10548 }, { "epoch": 0.18336838811729736, "grad_norm": 2.0501622699753757, "learning_rate": 9.395878292687493e-07, "loss": 0.8498, "step": 10549 }, { "epoch": 0.1833857706548002, "grad_norm": 1.4440722301432758, "learning_rate": 9.395744154748395e-07, "loss": 0.3078, "step": 10550 }, { "epoch": 0.18340315319230302, "grad_norm": 2.1619579556827335, "learning_rate": 9.395610002876806e-07, "loss": 0.3708, "step": 10551 }, { "epoch": 0.18342053572980585, "grad_norm": 0.8148053330446169, "learning_rate": 9.395475837073154e-07, "loss": 0.4098, "step": 10552 }, { "epoch": 0.18343791826730865, "grad_norm": 2.496818211868371, "learning_rate": 9.39534165733786e-07, "loss": 0.4443, "step": 10553 }, { "epoch": 0.18345530080481148, "grad_norm": 2.3023677778246263, "learning_rate": 9.395207463671356e-07, "loss": 0.4341, "step": 10554 }, { "epoch": 0.1834726833423143, "grad_norm": 2.0358790602645076, "learning_rate": 9.39507325607406e-07, "loss": 0.331, "step": 10555 }, { "epoch": 0.18349006587981714, "grad_norm": 1.9732163550702126, "learning_rate": 9.394939034546401e-07, "loss": 0.3873, "step": 10556 }, { "epoch": 0.18350744841731997, "grad_norm": 2.2180387614275867, "learning_rate": 9.394804799088805e-07, "loss": 0.4016, "step": 10557 }, { "epoch": 0.18352483095482278, "grad_norm": 1.8140472343219423, "learning_rate": 9.394670549701697e-07, "loss": 0.4727, "step": 10558 }, { "epoch": 0.1835422134923256, "grad_norm": 5.573769400284986, "learning_rate": 9.394536286385503e-07, "loss": 0.7079, "step": 10559 }, { "epoch": 0.18355959602982844, "grad_norm": 1.459038982642081, "learning_rate": 9.394402009140645e-07, "loss": 0.2861, "step": 10560 }, { "epoch": 0.18357697856733127, "grad_norm": 1.5692022268939545, "learning_rate": 9.394267717967554e-07, "loss": 0.3989, "step": 10561 }, { "epoch": 0.1835943611048341, "grad_norm": 1.2385661663425462, "learning_rate": 9.394133412866651e-07, "loss": 0.2853, "step": 10562 }, { "epoch": 0.1836117436423369, "grad_norm": 1.29833164712511, "learning_rate": 9.393999093838364e-07, "loss": 0.3672, "step": 10563 }, { "epoch": 0.18362912617983973, "grad_norm": 2.1930060650113004, "learning_rate": 9.393864760883119e-07, "loss": 0.2805, "step": 10564 }, { "epoch": 0.18364650871734256, "grad_norm": 1.6757977353292206, "learning_rate": 9.393730414001339e-07, "loss": 0.3055, "step": 10565 }, { "epoch": 0.1836638912548454, "grad_norm": 2.6390699053641766, "learning_rate": 9.393596053193455e-07, "loss": 0.4284, "step": 10566 }, { "epoch": 0.18368127379234822, "grad_norm": 2.2366875560711974, "learning_rate": 9.393461678459888e-07, "loss": 0.4492, "step": 10567 }, { "epoch": 0.18369865632985102, "grad_norm": 1.9401700074006953, "learning_rate": 9.393327289801066e-07, "loss": 0.3717, "step": 10568 }, { "epoch": 0.18371603886735385, "grad_norm": 2.469404272290099, "learning_rate": 9.393192887217414e-07, "loss": 0.4078, "step": 10569 }, { "epoch": 0.18373342140485668, "grad_norm": 2.6448687658162915, "learning_rate": 9.393058470709358e-07, "loss": 0.5487, "step": 10570 }, { "epoch": 0.18375080394235951, "grad_norm": 3.253928809823904, "learning_rate": 9.392924040277326e-07, "loss": 0.2736, "step": 10571 }, { "epoch": 0.18376818647986232, "grad_norm": 2.231832452243405, "learning_rate": 9.392789595921742e-07, "loss": 0.3082, "step": 10572 }, { "epoch": 0.18378556901736515, "grad_norm": 1.7959533497465132, "learning_rate": 9.392655137643034e-07, "loss": 0.4276, "step": 10573 }, { "epoch": 0.18380295155486798, "grad_norm": 1.504435036455891, "learning_rate": 9.392520665441627e-07, "loss": 0.3472, "step": 10574 }, { "epoch": 0.1838203340923708, "grad_norm": 1.9477090719238808, "learning_rate": 9.392386179317946e-07, "loss": 0.4225, "step": 10575 }, { "epoch": 0.18383771662987364, "grad_norm": 2.396272996525619, "learning_rate": 9.39225167927242e-07, "loss": 0.3439, "step": 10576 }, { "epoch": 0.18385509916737644, "grad_norm": 1.6520822878547246, "learning_rate": 9.392117165305472e-07, "loss": 0.3732, "step": 10577 }, { "epoch": 0.18387248170487927, "grad_norm": 1.5322392861096028, "learning_rate": 9.391982637417532e-07, "loss": 0.2976, "step": 10578 }, { "epoch": 0.1838898642423821, "grad_norm": 2.7544603005760235, "learning_rate": 9.391848095609023e-07, "loss": 0.4484, "step": 10579 }, { "epoch": 0.18390724677988493, "grad_norm": 1.2866726988979211, "learning_rate": 9.391713539880374e-07, "loss": 0.3272, "step": 10580 }, { "epoch": 0.18392462931738776, "grad_norm": 1.6161323669175263, "learning_rate": 9.391578970232011e-07, "loss": 0.6596, "step": 10581 }, { "epoch": 0.18394201185489056, "grad_norm": 4.5102140991451805, "learning_rate": 9.391444386664358e-07, "loss": 0.2877, "step": 10582 }, { "epoch": 0.1839593943923934, "grad_norm": 1.559306213232631, "learning_rate": 9.391309789177844e-07, "loss": 0.5853, "step": 10583 }, { "epoch": 0.18397677692989622, "grad_norm": 1.3866111433049582, "learning_rate": 9.391175177772897e-07, "loss": 0.4309, "step": 10584 }, { "epoch": 0.18399415946739905, "grad_norm": 2.0414637363293484, "learning_rate": 9.39104055244994e-07, "loss": 0.5406, "step": 10585 }, { "epoch": 0.18401154200490188, "grad_norm": 1.9561432385622246, "learning_rate": 9.390905913209401e-07, "loss": 0.4206, "step": 10586 }, { "epoch": 0.1840289245424047, "grad_norm": 1.4666438248244624, "learning_rate": 9.390771260051709e-07, "loss": 0.6284, "step": 10587 }, { "epoch": 0.18404630707990752, "grad_norm": 1.6824557848771446, "learning_rate": 9.390636592977289e-07, "loss": 0.4762, "step": 10588 }, { "epoch": 0.18406368961741035, "grad_norm": 1.4100773003566767, "learning_rate": 9.390501911986568e-07, "loss": 0.3791, "step": 10589 }, { "epoch": 0.18408107215491318, "grad_norm": 1.646439752673158, "learning_rate": 9.390367217079971e-07, "loss": 0.533, "step": 10590 }, { "epoch": 0.184098454692416, "grad_norm": 4.178582504712914, "learning_rate": 9.390232508257928e-07, "loss": 0.4059, "step": 10591 }, { "epoch": 0.1841158372299188, "grad_norm": 1.351528892782142, "learning_rate": 9.390097785520865e-07, "loss": 0.3674, "step": 10592 }, { "epoch": 0.18413321976742164, "grad_norm": 2.0155759969181406, "learning_rate": 9.389963048869208e-07, "loss": 0.3574, "step": 10593 }, { "epoch": 0.18415060230492447, "grad_norm": 1.8350975556175102, "learning_rate": 9.389828298303385e-07, "loss": 0.4541, "step": 10594 }, { "epoch": 0.1841679848424273, "grad_norm": 1.8959098021045606, "learning_rate": 9.389693533823823e-07, "loss": 0.6295, "step": 10595 }, { "epoch": 0.18418536737993013, "grad_norm": 2.0855124946863532, "learning_rate": 9.389558755430948e-07, "loss": 0.3998, "step": 10596 }, { "epoch": 0.18420274991743293, "grad_norm": 1.0649736579905789, "learning_rate": 9.389423963125189e-07, "loss": 0.1897, "step": 10597 }, { "epoch": 0.18422013245493576, "grad_norm": 1.8490780974404197, "learning_rate": 9.389289156906972e-07, "loss": 0.495, "step": 10598 }, { "epoch": 0.1842375149924386, "grad_norm": 2.5512463737431186, "learning_rate": 9.389154336776725e-07, "loss": 0.3049, "step": 10599 }, { "epoch": 0.18425489752994142, "grad_norm": 1.8286956967402903, "learning_rate": 9.389019502734874e-07, "loss": 0.2713, "step": 10600 }, { "epoch": 0.18427228006744426, "grad_norm": 1.7232557268978523, "learning_rate": 9.388884654781849e-07, "loss": 0.4969, "step": 10601 }, { "epoch": 0.18428966260494706, "grad_norm": 1.4722329275285753, "learning_rate": 9.388749792918075e-07, "loss": 0.3186, "step": 10602 }, { "epoch": 0.1843070451424499, "grad_norm": 2.0897646949868887, "learning_rate": 9.38861491714398e-07, "loss": 0.5347, "step": 10603 }, { "epoch": 0.18432442767995272, "grad_norm": 1.9899809937968218, "learning_rate": 9.388480027459991e-07, "loss": 0.3249, "step": 10604 }, { "epoch": 0.18434181021745555, "grad_norm": 2.7538065628075508, "learning_rate": 9.388345123866537e-07, "loss": 0.4303, "step": 10605 }, { "epoch": 0.18435919275495838, "grad_norm": 1.9021138504681234, "learning_rate": 9.388210206364045e-07, "loss": 0.4376, "step": 10606 }, { "epoch": 0.18437657529246118, "grad_norm": 2.065577471832697, "learning_rate": 9.388075274952943e-07, "loss": 0.4042, "step": 10607 }, { "epoch": 0.184393957829964, "grad_norm": 2.632588823203498, "learning_rate": 9.387940329633657e-07, "loss": 0.3078, "step": 10608 }, { "epoch": 0.18441134036746684, "grad_norm": 2.047280974251461, "learning_rate": 9.387805370406616e-07, "loss": 0.3071, "step": 10609 }, { "epoch": 0.18442872290496967, "grad_norm": 1.629309364781099, "learning_rate": 9.387670397272247e-07, "loss": 0.2166, "step": 10610 }, { "epoch": 0.1844461054424725, "grad_norm": 1.5476729437542067, "learning_rate": 9.38753541023098e-07, "loss": 0.3364, "step": 10611 }, { "epoch": 0.1844634879799753, "grad_norm": 2.8087541492010635, "learning_rate": 9.38740040928324e-07, "loss": 0.4445, "step": 10612 }, { "epoch": 0.18448087051747813, "grad_norm": 1.1943755645750682, "learning_rate": 9.387265394429455e-07, "loss": 0.5507, "step": 10613 }, { "epoch": 0.18449825305498097, "grad_norm": 1.3034337893797843, "learning_rate": 9.387130365670056e-07, "loss": 0.1745, "step": 10614 }, { "epoch": 0.1845156355924838, "grad_norm": 3.060616757111848, "learning_rate": 9.386995323005469e-07, "loss": 0.2957, "step": 10615 }, { "epoch": 0.18453301812998663, "grad_norm": 1.393228231432462, "learning_rate": 9.386860266436121e-07, "loss": 0.329, "step": 10616 }, { "epoch": 0.18455040066748943, "grad_norm": 1.6075103148909187, "learning_rate": 9.386725195962441e-07, "loss": 0.2731, "step": 10617 }, { "epoch": 0.18456778320499226, "grad_norm": 1.9519334084515008, "learning_rate": 9.386590111584858e-07, "loss": 0.3952, "step": 10618 }, { "epoch": 0.1845851657424951, "grad_norm": 1.9825065794827508, "learning_rate": 9.3864550133038e-07, "loss": 0.1845, "step": 10619 }, { "epoch": 0.18460254827999792, "grad_norm": 2.2939819286813763, "learning_rate": 9.386319901119693e-07, "loss": 0.4078, "step": 10620 }, { "epoch": 0.18461993081750075, "grad_norm": 2.803912304312451, "learning_rate": 9.386184775032968e-07, "loss": 0.506, "step": 10621 }, { "epoch": 0.18463731335500355, "grad_norm": 1.638164580194395, "learning_rate": 9.386049635044051e-07, "loss": 0.2459, "step": 10622 }, { "epoch": 0.18465469589250638, "grad_norm": 2.1811899677390056, "learning_rate": 9.385914481153372e-07, "loss": 0.5407, "step": 10623 }, { "epoch": 0.1846720784300092, "grad_norm": 1.9186742934989736, "learning_rate": 9.385779313361359e-07, "loss": 0.4493, "step": 10624 }, { "epoch": 0.18468946096751204, "grad_norm": 2.2880207718114938, "learning_rate": 9.38564413166844e-07, "loss": 0.4341, "step": 10625 }, { "epoch": 0.18470684350501487, "grad_norm": 2.7043188266839095, "learning_rate": 9.385508936075044e-07, "loss": 0.2992, "step": 10626 }, { "epoch": 0.18472422604251768, "grad_norm": 1.589395289943266, "learning_rate": 9.3853737265816e-07, "loss": 0.3027, "step": 10627 }, { "epoch": 0.1847416085800205, "grad_norm": 1.188222634122069, "learning_rate": 9.385238503188535e-07, "loss": 0.2517, "step": 10628 }, { "epoch": 0.18475899111752334, "grad_norm": 1.8185698367979126, "learning_rate": 9.385103265896277e-07, "loss": 0.2846, "step": 10629 }, { "epoch": 0.18477637365502617, "grad_norm": 1.720486736218457, "learning_rate": 9.384968014705258e-07, "loss": 0.4943, "step": 10630 }, { "epoch": 0.184793756192529, "grad_norm": 1.8593833362443193, "learning_rate": 9.384832749615904e-07, "loss": 0.532, "step": 10631 }, { "epoch": 0.1848111387300318, "grad_norm": 2.019020907567366, "learning_rate": 9.384697470628644e-07, "loss": 0.283, "step": 10632 }, { "epoch": 0.18482852126753463, "grad_norm": 1.2870607294466, "learning_rate": 9.384562177743908e-07, "loss": 0.2434, "step": 10633 }, { "epoch": 0.18484590380503746, "grad_norm": 2.346884077902384, "learning_rate": 9.384426870962123e-07, "loss": 0.4804, "step": 10634 }, { "epoch": 0.1848632863425403, "grad_norm": 3.2200465942074845, "learning_rate": 9.38429155028372e-07, "loss": 0.4011, "step": 10635 }, { "epoch": 0.18488066888004312, "grad_norm": 1.5855081914040012, "learning_rate": 9.384156215709126e-07, "loss": 0.4717, "step": 10636 }, { "epoch": 0.18489805141754592, "grad_norm": 1.995448672411366, "learning_rate": 9.384020867238771e-07, "loss": 0.5086, "step": 10637 }, { "epoch": 0.18491543395504875, "grad_norm": 2.0745885504544335, "learning_rate": 9.383885504873084e-07, "loss": 0.3447, "step": 10638 }, { "epoch": 0.18493281649255158, "grad_norm": 2.375192375683018, "learning_rate": 9.383750128612492e-07, "loss": 0.3721, "step": 10639 }, { "epoch": 0.1849501990300544, "grad_norm": 3.3158721530012834, "learning_rate": 9.383614738457428e-07, "loss": 0.3133, "step": 10640 }, { "epoch": 0.18496758156755724, "grad_norm": 1.2697560961368517, "learning_rate": 9.383479334408317e-07, "loss": 0.5007, "step": 10641 }, { "epoch": 0.18498496410506005, "grad_norm": 2.543613886728198, "learning_rate": 9.383343916465591e-07, "loss": 0.3024, "step": 10642 }, { "epoch": 0.18500234664256288, "grad_norm": 1.7864703163458289, "learning_rate": 9.383208484629679e-07, "loss": 0.3098, "step": 10643 }, { "epoch": 0.1850197291800657, "grad_norm": 2.746516512075638, "learning_rate": 9.38307303890101e-07, "loss": 0.2856, "step": 10644 }, { "epoch": 0.18503711171756854, "grad_norm": 2.741969480251618, "learning_rate": 9.382937579280011e-07, "loss": 0.425, "step": 10645 }, { "epoch": 0.18505449425507137, "grad_norm": 2.3088929575876085, "learning_rate": 9.382802105767114e-07, "loss": 0.3579, "step": 10646 }, { "epoch": 0.18507187679257417, "grad_norm": 1.2427885762008546, "learning_rate": 9.38266661836275e-07, "loss": 0.2672, "step": 10647 }, { "epoch": 0.185089259330077, "grad_norm": 2.4282841260094647, "learning_rate": 9.382531117067343e-07, "loss": 0.2383, "step": 10648 }, { "epoch": 0.18510664186757983, "grad_norm": 1.1393161838653612, "learning_rate": 9.382395601881326e-07, "loss": 0.2679, "step": 10649 }, { "epoch": 0.18512402440508266, "grad_norm": 1.4585750883752364, "learning_rate": 9.382260072805128e-07, "loss": 0.1649, "step": 10650 }, { "epoch": 0.1851414069425855, "grad_norm": 1.874050821212778, "learning_rate": 9.38212452983918e-07, "loss": 0.4089, "step": 10651 }, { "epoch": 0.1851587894800883, "grad_norm": 1.7501855307112555, "learning_rate": 9.381988972983907e-07, "loss": 0.6012, "step": 10652 }, { "epoch": 0.18517617201759112, "grad_norm": 2.263907148664262, "learning_rate": 9.381853402239745e-07, "loss": 0.4859, "step": 10653 }, { "epoch": 0.18519355455509395, "grad_norm": 1.510546774895527, "learning_rate": 9.38171781760712e-07, "loss": 0.3057, "step": 10654 }, { "epoch": 0.18521093709259678, "grad_norm": 1.9954281698115146, "learning_rate": 9.381582219086462e-07, "loss": 0.4342, "step": 10655 }, { "epoch": 0.1852283196300996, "grad_norm": 1.5108598772074122, "learning_rate": 9.3814466066782e-07, "loss": 0.2153, "step": 10656 }, { "epoch": 0.18524570216760242, "grad_norm": 2.1636855172091507, "learning_rate": 9.381310980382767e-07, "loss": 0.4931, "step": 10657 }, { "epoch": 0.18526308470510525, "grad_norm": 2.489878724895594, "learning_rate": 9.38117534020059e-07, "loss": 0.3203, "step": 10658 }, { "epoch": 0.18528046724260808, "grad_norm": 1.5420754779146677, "learning_rate": 9.3810396861321e-07, "loss": 0.3562, "step": 10659 }, { "epoch": 0.1852978497801109, "grad_norm": 2.797884078469793, "learning_rate": 9.380904018177727e-07, "loss": 0.3127, "step": 10660 }, { "epoch": 0.18531523231761374, "grad_norm": 1.9230555861829837, "learning_rate": 9.380768336337899e-07, "loss": 0.3959, "step": 10661 }, { "epoch": 0.18533261485511654, "grad_norm": 2.2452105288324833, "learning_rate": 9.380632640613049e-07, "loss": 0.3227, "step": 10662 }, { "epoch": 0.18534999739261937, "grad_norm": 2.2396353755671554, "learning_rate": 9.380496931003605e-07, "loss": 0.5539, "step": 10663 }, { "epoch": 0.1853673799301222, "grad_norm": 1.2334465117143842, "learning_rate": 9.38036120751e-07, "loss": 0.2405, "step": 10664 }, { "epoch": 0.18538476246762503, "grad_norm": 2.028935154800365, "learning_rate": 9.380225470132659e-07, "loss": 0.335, "step": 10665 }, { "epoch": 0.18540214500512786, "grad_norm": 1.5788100732281745, "learning_rate": 9.380089718872018e-07, "loss": 0.2866, "step": 10666 }, { "epoch": 0.18541952754263066, "grad_norm": 1.9977775720286144, "learning_rate": 9.379953953728503e-07, "loss": 0.279, "step": 10667 }, { "epoch": 0.1854369100801335, "grad_norm": 1.9192278654862138, "learning_rate": 9.379818174702548e-07, "loss": 0.4551, "step": 10668 }, { "epoch": 0.18545429261763632, "grad_norm": 1.6604851413748587, "learning_rate": 9.379682381794578e-07, "loss": 0.5696, "step": 10669 }, { "epoch": 0.18547167515513915, "grad_norm": 2.360344718173488, "learning_rate": 9.379546575005029e-07, "loss": 0.3744, "step": 10670 }, { "epoch": 0.18548905769264198, "grad_norm": 7.031144756414838, "learning_rate": 9.379410754334329e-07, "loss": 0.3274, "step": 10671 }, { "epoch": 0.1855064402301448, "grad_norm": 2.6748181300314546, "learning_rate": 9.379274919782909e-07, "loss": 0.2643, "step": 10672 }, { "epoch": 0.18552382276764762, "grad_norm": 1.9627261634083522, "learning_rate": 9.379139071351198e-07, "loss": 0.2746, "step": 10673 }, { "epoch": 0.18554120530515045, "grad_norm": 2.573210086565379, "learning_rate": 9.379003209039628e-07, "loss": 0.5332, "step": 10674 }, { "epoch": 0.18555858784265328, "grad_norm": 1.649543839605057, "learning_rate": 9.37886733284863e-07, "loss": 0.5051, "step": 10675 }, { "epoch": 0.1855759703801561, "grad_norm": 1.6630572492908742, "learning_rate": 9.378731442778634e-07, "loss": 0.2811, "step": 10676 }, { "epoch": 0.1855933529176589, "grad_norm": 1.6409346582418538, "learning_rate": 9.378595538830069e-07, "loss": 0.307, "step": 10677 }, { "epoch": 0.18561073545516174, "grad_norm": 2.3053439170530776, "learning_rate": 9.37845962100337e-07, "loss": 0.3225, "step": 10678 }, { "epoch": 0.18562811799266457, "grad_norm": 1.2855543234490692, "learning_rate": 9.378323689298965e-07, "loss": 0.2686, "step": 10679 }, { "epoch": 0.1856455005301674, "grad_norm": 2.3952808652807063, "learning_rate": 9.378187743717284e-07, "loss": 0.4161, "step": 10680 }, { "epoch": 0.18566288306767023, "grad_norm": 1.2960768105663507, "learning_rate": 9.37805178425876e-07, "loss": 0.2639, "step": 10681 }, { "epoch": 0.18568026560517303, "grad_norm": 1.7398287799984988, "learning_rate": 9.377915810923822e-07, "loss": 0.2524, "step": 10682 }, { "epoch": 0.18569764814267586, "grad_norm": 1.7207968572389323, "learning_rate": 9.377779823712902e-07, "loss": 0.3363, "step": 10683 }, { "epoch": 0.1857150306801787, "grad_norm": 1.5727688572419722, "learning_rate": 9.377643822626431e-07, "loss": 0.1811, "step": 10684 }, { "epoch": 0.18573241321768152, "grad_norm": 2.2077857291937457, "learning_rate": 9.37750780766484e-07, "loss": 0.4879, "step": 10685 }, { "epoch": 0.18574979575518435, "grad_norm": 1.8940255271234423, "learning_rate": 9.377371778828562e-07, "loss": 0.3709, "step": 10686 }, { "epoch": 0.18576717829268716, "grad_norm": 1.1624591951167385, "learning_rate": 9.377235736118025e-07, "loss": 0.2398, "step": 10687 }, { "epoch": 0.18578456083019, "grad_norm": 2.2785435030265933, "learning_rate": 9.37709967953366e-07, "loss": 0.3121, "step": 10688 }, { "epoch": 0.18580194336769282, "grad_norm": 1.2181001600173114, "learning_rate": 9.3769636090759e-07, "loss": 0.3883, "step": 10689 }, { "epoch": 0.18581932590519565, "grad_norm": 2.9343204668764713, "learning_rate": 9.376827524745178e-07, "loss": 0.4836, "step": 10690 }, { "epoch": 0.18583670844269848, "grad_norm": 1.990501101174046, "learning_rate": 9.376691426541923e-07, "loss": 0.2613, "step": 10691 }, { "epoch": 0.18585409098020128, "grad_norm": 5.034974464204611, "learning_rate": 9.376555314466565e-07, "loss": 0.4577, "step": 10692 }, { "epoch": 0.1858714735177041, "grad_norm": 1.621400695889658, "learning_rate": 9.376419188519537e-07, "loss": 0.2921, "step": 10693 }, { "epoch": 0.18588885605520694, "grad_norm": 1.6030228416823091, "learning_rate": 9.376283048701272e-07, "loss": 0.5564, "step": 10694 }, { "epoch": 0.18590623859270977, "grad_norm": 2.7202198369660215, "learning_rate": 9.3761468950122e-07, "loss": 0.3911, "step": 10695 }, { "epoch": 0.1859236211302126, "grad_norm": 1.7127130420776984, "learning_rate": 9.376010727452752e-07, "loss": 0.3545, "step": 10696 }, { "epoch": 0.1859410036677154, "grad_norm": 1.6229466728517896, "learning_rate": 9.37587454602336e-07, "loss": 0.3651, "step": 10697 }, { "epoch": 0.18595838620521823, "grad_norm": 1.78325871307758, "learning_rate": 9.375738350724455e-07, "loss": 0.4567, "step": 10698 }, { "epoch": 0.18597576874272106, "grad_norm": 1.4825532903485334, "learning_rate": 9.375602141556471e-07, "loss": 0.2535, "step": 10699 }, { "epoch": 0.1859931512802239, "grad_norm": 2.853168429370071, "learning_rate": 9.375465918519838e-07, "loss": 0.336, "step": 10700 }, { "epoch": 0.18601053381772673, "grad_norm": 2.404320703048626, "learning_rate": 9.375329681614988e-07, "loss": 0.5001, "step": 10701 }, { "epoch": 0.18602791635522953, "grad_norm": 1.2239967525053117, "learning_rate": 9.375193430842352e-07, "loss": 0.3294, "step": 10702 }, { "epoch": 0.18604529889273236, "grad_norm": 1.6556633515921277, "learning_rate": 9.375057166202363e-07, "loss": 0.2327, "step": 10703 }, { "epoch": 0.1860626814302352, "grad_norm": 2.059502421256711, "learning_rate": 9.374920887695452e-07, "loss": 0.3132, "step": 10704 }, { "epoch": 0.18608006396773802, "grad_norm": 2.716173003415409, "learning_rate": 9.374784595322051e-07, "loss": 0.4073, "step": 10705 }, { "epoch": 0.18609744650524085, "grad_norm": 1.6205203129163255, "learning_rate": 9.374648289082595e-07, "loss": 0.2786, "step": 10706 }, { "epoch": 0.18611482904274365, "grad_norm": 1.4731601013299551, "learning_rate": 9.374511968977512e-07, "loss": 0.2076, "step": 10707 }, { "epoch": 0.18613221158024648, "grad_norm": 1.5680330367410047, "learning_rate": 9.374375635007234e-07, "loss": 0.1931, "step": 10708 }, { "epoch": 0.1861495941177493, "grad_norm": 2.792829950165459, "learning_rate": 9.374239287172197e-07, "loss": 0.4037, "step": 10709 }, { "epoch": 0.18616697665525214, "grad_norm": 2.5962105996387934, "learning_rate": 9.374102925472831e-07, "loss": 0.3172, "step": 10710 }, { "epoch": 0.18618435919275494, "grad_norm": 1.759261248664264, "learning_rate": 9.373966549909567e-07, "loss": 0.4204, "step": 10711 }, { "epoch": 0.18620174173025777, "grad_norm": 1.7605542563978898, "learning_rate": 9.373830160482838e-07, "loss": 0.3906, "step": 10712 }, { "epoch": 0.1862191242677606, "grad_norm": 2.8342170809836635, "learning_rate": 9.373693757193077e-07, "loss": 0.5258, "step": 10713 }, { "epoch": 0.18623650680526344, "grad_norm": 2.438621657578563, "learning_rate": 9.373557340040715e-07, "loss": 0.4103, "step": 10714 }, { "epoch": 0.18625388934276627, "grad_norm": 2.1053243557704104, "learning_rate": 9.373420909026186e-07, "loss": 0.5152, "step": 10715 }, { "epoch": 0.18627127188026907, "grad_norm": 1.730995632997828, "learning_rate": 9.373284464149922e-07, "loss": 0.3762, "step": 10716 }, { "epoch": 0.1862886544177719, "grad_norm": 1.1147526698766437, "learning_rate": 9.373148005412354e-07, "loss": 0.361, "step": 10717 }, { "epoch": 0.18630603695527473, "grad_norm": 1.3487088778398766, "learning_rate": 9.373011532813917e-07, "loss": 0.4401, "step": 10718 }, { "epoch": 0.18632341949277756, "grad_norm": 1.597158981552324, "learning_rate": 9.372875046355043e-07, "loss": 0.3353, "step": 10719 }, { "epoch": 0.1863408020302804, "grad_norm": 1.6401334237384924, "learning_rate": 9.37273854603616e-07, "loss": 0.3646, "step": 10720 }, { "epoch": 0.1863581845677832, "grad_norm": 2.010297079138786, "learning_rate": 9.372602031857708e-07, "loss": 0.3768, "step": 10721 }, { "epoch": 0.18637556710528602, "grad_norm": 2.404885580833237, "learning_rate": 9.372465503820116e-07, "loss": 0.587, "step": 10722 }, { "epoch": 0.18639294964278885, "grad_norm": 1.2034882917162222, "learning_rate": 9.372328961923816e-07, "loss": 0.1969, "step": 10723 }, { "epoch": 0.18641033218029168, "grad_norm": 1.5008710093537747, "learning_rate": 9.372192406169242e-07, "loss": 0.3092, "step": 10724 }, { "epoch": 0.1864277147177945, "grad_norm": 1.0985205176818005, "learning_rate": 9.372055836556827e-07, "loss": 0.2329, "step": 10725 }, { "epoch": 0.18644509725529732, "grad_norm": 1.5386491732638221, "learning_rate": 9.371919253087001e-07, "loss": 0.2359, "step": 10726 }, { "epoch": 0.18646247979280015, "grad_norm": 1.87575589243189, "learning_rate": 9.371782655760202e-07, "loss": 0.416, "step": 10727 }, { "epoch": 0.18647986233030298, "grad_norm": 1.5420446088510074, "learning_rate": 9.371646044576859e-07, "loss": 0.2714, "step": 10728 }, { "epoch": 0.1864972448678058, "grad_norm": 1.4821547884213255, "learning_rate": 9.371509419537407e-07, "loss": 0.2876, "step": 10729 }, { "epoch": 0.18651462740530864, "grad_norm": 1.94673190204779, "learning_rate": 9.371372780642277e-07, "loss": 0.4237, "step": 10730 }, { "epoch": 0.18653200994281144, "grad_norm": 1.5122271683192596, "learning_rate": 9.371236127891904e-07, "loss": 0.3585, "step": 10731 }, { "epoch": 0.18654939248031427, "grad_norm": 2.324449121502115, "learning_rate": 9.37109946128672e-07, "loss": 0.3341, "step": 10732 }, { "epoch": 0.1865667750178171, "grad_norm": 1.678553328600038, "learning_rate": 9.37096278082716e-07, "loss": 0.5177, "step": 10733 }, { "epoch": 0.18658415755531993, "grad_norm": 1.7482263398753664, "learning_rate": 9.370826086513655e-07, "loss": 0.4234, "step": 10734 }, { "epoch": 0.18660154009282276, "grad_norm": 1.3497239655955287, "learning_rate": 9.370689378346639e-07, "loss": 0.2575, "step": 10735 }, { "epoch": 0.18661892263032556, "grad_norm": 1.6149714843729555, "learning_rate": 9.370552656326544e-07, "loss": 0.1851, "step": 10736 }, { "epoch": 0.1866363051678284, "grad_norm": 6.469138154133399, "learning_rate": 9.370415920453807e-07, "loss": 0.657, "step": 10737 }, { "epoch": 0.18665368770533122, "grad_norm": 1.651435684253785, "learning_rate": 9.370279170728858e-07, "loss": 0.2435, "step": 10738 }, { "epoch": 0.18667107024283405, "grad_norm": 1.8992496745041787, "learning_rate": 9.370142407152132e-07, "loss": 0.3166, "step": 10739 }, { "epoch": 0.18668845278033688, "grad_norm": 1.363371820728153, "learning_rate": 9.370005629724061e-07, "loss": 0.249, "step": 10740 }, { "epoch": 0.18670583531783969, "grad_norm": 2.1114650812481575, "learning_rate": 9.369868838445081e-07, "loss": 0.3229, "step": 10741 }, { "epoch": 0.18672321785534252, "grad_norm": 3.1031556190753, "learning_rate": 9.369732033315623e-07, "loss": 0.4367, "step": 10742 }, { "epoch": 0.18674060039284535, "grad_norm": 5.7983790496297125, "learning_rate": 9.369595214336124e-07, "loss": 0.3992, "step": 10743 }, { "epoch": 0.18675798293034818, "grad_norm": 1.8379489991785014, "learning_rate": 9.369458381507014e-07, "loss": 0.4013, "step": 10744 }, { "epoch": 0.186775365467851, "grad_norm": 2.3062965567943947, "learning_rate": 9.369321534828727e-07, "loss": 0.3583, "step": 10745 }, { "epoch": 0.1867927480053538, "grad_norm": 2.761394834276543, "learning_rate": 9.369184674301698e-07, "loss": 0.5659, "step": 10746 }, { "epoch": 0.18681013054285664, "grad_norm": 2.087592786171762, "learning_rate": 9.369047799926362e-07, "loss": 0.3826, "step": 10747 }, { "epoch": 0.18682751308035947, "grad_norm": 1.682220669838524, "learning_rate": 9.368910911703151e-07, "loss": 0.3474, "step": 10748 }, { "epoch": 0.1868448956178623, "grad_norm": 1.8339367564099998, "learning_rate": 9.368774009632498e-07, "loss": 0.2689, "step": 10749 }, { "epoch": 0.18686227815536513, "grad_norm": 1.2669477638768585, "learning_rate": 9.368637093714839e-07, "loss": 0.3784, "step": 10750 }, { "epoch": 0.18687966069286793, "grad_norm": 2.0181236872087225, "learning_rate": 9.368500163950608e-07, "loss": 0.4466, "step": 10751 }, { "epoch": 0.18689704323037076, "grad_norm": 1.575542831939537, "learning_rate": 9.368363220340238e-07, "loss": 0.2076, "step": 10752 }, { "epoch": 0.1869144257678736, "grad_norm": 1.682978680843852, "learning_rate": 9.368226262884162e-07, "loss": 0.4639, "step": 10753 }, { "epoch": 0.18693180830537642, "grad_norm": 2.212529454543585, "learning_rate": 9.368089291582817e-07, "loss": 0.3842, "step": 10754 }, { "epoch": 0.18694919084287925, "grad_norm": 1.5504311275455722, "learning_rate": 9.367952306436635e-07, "loss": 0.4144, "step": 10755 }, { "epoch": 0.18696657338038206, "grad_norm": 2.3573967480535276, "learning_rate": 9.36781530744605e-07, "loss": 0.487, "step": 10756 }, { "epoch": 0.1869839559178849, "grad_norm": 2.121362635166568, "learning_rate": 9.367678294611499e-07, "loss": 0.2294, "step": 10757 }, { "epoch": 0.18700133845538772, "grad_norm": 1.5997215020146542, "learning_rate": 9.367541267933412e-07, "loss": 0.2425, "step": 10758 }, { "epoch": 0.18701872099289055, "grad_norm": 2.0607081206350517, "learning_rate": 9.367404227412226e-07, "loss": 0.5112, "step": 10759 }, { "epoch": 0.18703610353039338, "grad_norm": 4.07845498593468, "learning_rate": 9.367267173048374e-07, "loss": 0.1794, "step": 10760 }, { "epoch": 0.18705348606789618, "grad_norm": 2.370624519461709, "learning_rate": 9.367130104842291e-07, "loss": 0.2903, "step": 10761 }, { "epoch": 0.187070868605399, "grad_norm": 1.9406907101268336, "learning_rate": 9.366993022794414e-07, "loss": 0.334, "step": 10762 }, { "epoch": 0.18708825114290184, "grad_norm": 3.330298774756101, "learning_rate": 9.366855926905172e-07, "loss": 0.5238, "step": 10763 }, { "epoch": 0.18710563368040467, "grad_norm": 2.0476687078651614, "learning_rate": 9.366718817175006e-07, "loss": 0.3607, "step": 10764 }, { "epoch": 0.1871230162179075, "grad_norm": 3.3562618506388424, "learning_rate": 9.366581693604345e-07, "loss": 0.5979, "step": 10765 }, { "epoch": 0.1871403987554103, "grad_norm": 1.2347036058296144, "learning_rate": 9.366444556193626e-07, "loss": 0.2905, "step": 10766 }, { "epoch": 0.18715778129291313, "grad_norm": 2.6097849423501365, "learning_rate": 9.366307404943285e-07, "loss": 0.5913, "step": 10767 }, { "epoch": 0.18717516383041596, "grad_norm": 1.606413219905642, "learning_rate": 9.366170239853754e-07, "loss": 0.264, "step": 10768 }, { "epoch": 0.1871925463679188, "grad_norm": 1.4074326982733754, "learning_rate": 9.366033060925471e-07, "loss": 0.2324, "step": 10769 }, { "epoch": 0.18720992890542162, "grad_norm": 1.7983843133709503, "learning_rate": 9.365895868158866e-07, "loss": 0.3578, "step": 10770 }, { "epoch": 0.18722731144292443, "grad_norm": 2.74283973130359, "learning_rate": 9.365758661554378e-07, "loss": 0.4665, "step": 10771 }, { "epoch": 0.18724469398042726, "grad_norm": 2.4118550706964346, "learning_rate": 9.365621441112441e-07, "loss": 0.2998, "step": 10772 }, { "epoch": 0.1872620765179301, "grad_norm": 2.5178345582309287, "learning_rate": 9.365484206833489e-07, "loss": 0.415, "step": 10773 }, { "epoch": 0.18727945905543292, "grad_norm": 1.521202857829747, "learning_rate": 9.365346958717957e-07, "loss": 0.3301, "step": 10774 }, { "epoch": 0.18729684159293575, "grad_norm": 2.0735440820589455, "learning_rate": 9.365209696766281e-07, "loss": 0.2774, "step": 10775 }, { "epoch": 0.18731422413043855, "grad_norm": 2.5998285208225376, "learning_rate": 9.365072420978894e-07, "loss": 0.3077, "step": 10776 }, { "epoch": 0.18733160666794138, "grad_norm": 1.5388917497841186, "learning_rate": 9.364935131356234e-07, "loss": 0.3056, "step": 10777 }, { "epoch": 0.1873489892054442, "grad_norm": 2.130678293240842, "learning_rate": 9.364797827898733e-07, "loss": 0.469, "step": 10778 }, { "epoch": 0.18736637174294704, "grad_norm": 1.5075583066335136, "learning_rate": 9.36466051060683e-07, "loss": 0.323, "step": 10779 }, { "epoch": 0.18738375428044987, "grad_norm": 3.0744651735821886, "learning_rate": 9.364523179480957e-07, "loss": 0.6022, "step": 10780 }, { "epoch": 0.18740113681795267, "grad_norm": 1.6279896487864123, "learning_rate": 9.364385834521551e-07, "loss": 0.2522, "step": 10781 }, { "epoch": 0.1874185193554555, "grad_norm": 1.697653109383812, "learning_rate": 9.364248475729048e-07, "loss": 0.397, "step": 10782 }, { "epoch": 0.18743590189295833, "grad_norm": 1.277894298531065, "learning_rate": 9.36411110310388e-07, "loss": 0.2948, "step": 10783 }, { "epoch": 0.18745328443046116, "grad_norm": 3.449225506415926, "learning_rate": 9.363973716646485e-07, "loss": 0.3695, "step": 10784 }, { "epoch": 0.187470666967964, "grad_norm": 1.7107569166095886, "learning_rate": 9.363836316357298e-07, "loss": 0.1452, "step": 10785 }, { "epoch": 0.1874880495054668, "grad_norm": 1.8647634281590426, "learning_rate": 9.363698902236754e-07, "loss": 0.3991, "step": 10786 }, { "epoch": 0.18750543204296963, "grad_norm": 2.0093245805457522, "learning_rate": 9.363561474285291e-07, "loss": 0.3248, "step": 10787 }, { "epoch": 0.18752281458047246, "grad_norm": 1.3547818272235468, "learning_rate": 9.363424032503341e-07, "loss": 0.3383, "step": 10788 }, { "epoch": 0.1875401971179753, "grad_norm": 1.2202916137974804, "learning_rate": 9.363286576891341e-07, "loss": 0.4645, "step": 10789 }, { "epoch": 0.18755757965547812, "grad_norm": 2.1651450796867455, "learning_rate": 9.363149107449727e-07, "loss": 0.4053, "step": 10790 }, { "epoch": 0.18757496219298092, "grad_norm": 2.3650036519959174, "learning_rate": 9.363011624178935e-07, "loss": 0.3605, "step": 10791 }, { "epoch": 0.18759234473048375, "grad_norm": 2.533998690858596, "learning_rate": 9.3628741270794e-07, "loss": 0.3231, "step": 10792 }, { "epoch": 0.18760972726798658, "grad_norm": 2.3755571867360965, "learning_rate": 9.362736616151559e-07, "loss": 0.2632, "step": 10793 }, { "epoch": 0.1876271098054894, "grad_norm": 2.6442951059236677, "learning_rate": 9.362599091395847e-07, "loss": 0.3129, "step": 10794 }, { "epoch": 0.18764449234299224, "grad_norm": 1.2619590497907247, "learning_rate": 9.362461552812698e-07, "loss": 0.2934, "step": 10795 }, { "epoch": 0.18766187488049504, "grad_norm": 1.5115454889213589, "learning_rate": 9.362324000402551e-07, "loss": 0.4047, "step": 10796 }, { "epoch": 0.18767925741799787, "grad_norm": 1.3311334812317057, "learning_rate": 9.362186434165841e-07, "loss": 0.2679, "step": 10797 }, { "epoch": 0.1876966399555007, "grad_norm": 1.844632952792292, "learning_rate": 9.362048854103004e-07, "loss": 0.2547, "step": 10798 }, { "epoch": 0.18771402249300354, "grad_norm": 1.9665474563625527, "learning_rate": 9.361911260214475e-07, "loss": 0.2815, "step": 10799 }, { "epoch": 0.18773140503050637, "grad_norm": 2.861224934344881, "learning_rate": 9.361773652500692e-07, "loss": 0.4327, "step": 10800 }, { "epoch": 0.18774878756800917, "grad_norm": 2.214435371310006, "learning_rate": 9.36163603096209e-07, "loss": 0.5323, "step": 10801 }, { "epoch": 0.187766170105512, "grad_norm": 2.3386933979110194, "learning_rate": 9.361498395599106e-07, "loss": 0.4757, "step": 10802 }, { "epoch": 0.18778355264301483, "grad_norm": 7.0179164471285285, "learning_rate": 9.361360746412175e-07, "loss": 0.2904, "step": 10803 }, { "epoch": 0.18780093518051766, "grad_norm": 2.3516321280093333, "learning_rate": 9.361223083401733e-07, "loss": 0.2227, "step": 10804 }, { "epoch": 0.1878183177180205, "grad_norm": 2.090197167703256, "learning_rate": 9.361085406568216e-07, "loss": 0.267, "step": 10805 }, { "epoch": 0.1878357002555233, "grad_norm": 1.5138164503963967, "learning_rate": 9.360947715912065e-07, "loss": 0.3479, "step": 10806 }, { "epoch": 0.18785308279302612, "grad_norm": 1.8233667596909704, "learning_rate": 9.360810011433711e-07, "loss": 0.3589, "step": 10807 }, { "epoch": 0.18787046533052895, "grad_norm": 1.9484487497619507, "learning_rate": 9.360672293133594e-07, "loss": 0.3379, "step": 10808 }, { "epoch": 0.18788784786803178, "grad_norm": 1.5856993808414916, "learning_rate": 9.360534561012147e-07, "loss": 0.3625, "step": 10809 }, { "epoch": 0.1879052304055346, "grad_norm": 2.0815783728353194, "learning_rate": 9.360396815069808e-07, "loss": 0.5287, "step": 10810 }, { "epoch": 0.18792261294303741, "grad_norm": 1.9277288358299287, "learning_rate": 9.360259055307016e-07, "loss": 0.3443, "step": 10811 }, { "epoch": 0.18793999548054025, "grad_norm": 2.123197260345553, "learning_rate": 9.360121281724206e-07, "loss": 0.397, "step": 10812 }, { "epoch": 0.18795737801804308, "grad_norm": 3.20328958195681, "learning_rate": 9.359983494321813e-07, "loss": 0.2568, "step": 10813 }, { "epoch": 0.1879747605555459, "grad_norm": 1.582901827465174, "learning_rate": 9.359845693100277e-07, "loss": 0.3446, "step": 10814 }, { "epoch": 0.18799214309304874, "grad_norm": 1.5714534240744464, "learning_rate": 9.35970787806003e-07, "loss": 0.2488, "step": 10815 }, { "epoch": 0.18800952563055154, "grad_norm": 1.5064701092053685, "learning_rate": 9.359570049201514e-07, "loss": 0.2669, "step": 10816 }, { "epoch": 0.18802690816805437, "grad_norm": 2.717219237198298, "learning_rate": 9.359432206525163e-07, "loss": 0.4343, "step": 10817 }, { "epoch": 0.1880442907055572, "grad_norm": 2.2412782767969173, "learning_rate": 9.359294350031415e-07, "loss": 0.4171, "step": 10818 }, { "epoch": 0.18806167324306003, "grad_norm": 1.3662970318922953, "learning_rate": 9.359156479720705e-07, "loss": 0.1906, "step": 10819 }, { "epoch": 0.18807905578056286, "grad_norm": 1.8408176884429717, "learning_rate": 9.359018595593472e-07, "loss": 0.3776, "step": 10820 }, { "epoch": 0.18809643831806566, "grad_norm": 2.053467277693423, "learning_rate": 9.358880697650153e-07, "loss": 0.2673, "step": 10821 }, { "epoch": 0.1881138208555685, "grad_norm": 0.9245914371258014, "learning_rate": 9.358742785891183e-07, "loss": 0.2373, "step": 10822 }, { "epoch": 0.18813120339307132, "grad_norm": 2.0779625334186713, "learning_rate": 9.358604860317002e-07, "loss": 0.1735, "step": 10823 }, { "epoch": 0.18814858593057415, "grad_norm": 2.103292241971956, "learning_rate": 9.358466920928045e-07, "loss": 0.3486, "step": 10824 }, { "epoch": 0.18816596846807698, "grad_norm": 1.2016242108050539, "learning_rate": 9.358328967724751e-07, "loss": 0.1966, "step": 10825 }, { "epoch": 0.18818335100557979, "grad_norm": 1.8365590243413958, "learning_rate": 9.358191000707556e-07, "loss": 0.3022, "step": 10826 }, { "epoch": 0.18820073354308262, "grad_norm": 1.3311570128661623, "learning_rate": 9.358053019876897e-07, "loss": 0.3085, "step": 10827 }, { "epoch": 0.18821811608058545, "grad_norm": 2.1312709776938363, "learning_rate": 9.357915025233212e-07, "loss": 0.1995, "step": 10828 }, { "epoch": 0.18823549861808828, "grad_norm": 2.4834648314417875, "learning_rate": 9.357777016776938e-07, "loss": 0.2841, "step": 10829 }, { "epoch": 0.1882528811555911, "grad_norm": 1.5279948559475294, "learning_rate": 9.357638994508513e-07, "loss": 0.2928, "step": 10830 }, { "epoch": 0.1882702636930939, "grad_norm": 1.953688971771476, "learning_rate": 9.357500958428375e-07, "loss": 0.2339, "step": 10831 }, { "epoch": 0.18828764623059674, "grad_norm": 1.2481952118415511, "learning_rate": 9.357362908536958e-07, "loss": 0.3849, "step": 10832 }, { "epoch": 0.18830502876809957, "grad_norm": 4.872569844021062, "learning_rate": 9.357224844834704e-07, "loss": 0.4022, "step": 10833 }, { "epoch": 0.1883224113056024, "grad_norm": 1.3607410993113744, "learning_rate": 9.357086767322048e-07, "loss": 0.234, "step": 10834 }, { "epoch": 0.18833979384310523, "grad_norm": 1.908271116498926, "learning_rate": 9.35694867599943e-07, "loss": 0.3841, "step": 10835 }, { "epoch": 0.18835717638060803, "grad_norm": 1.7717860620027506, "learning_rate": 9.356810570867287e-07, "loss": 0.3591, "step": 10836 }, { "epoch": 0.18837455891811086, "grad_norm": 1.7421964718843694, "learning_rate": 9.356672451926053e-07, "loss": 0.3026, "step": 10837 }, { "epoch": 0.1883919414556137, "grad_norm": 1.6914393652493833, "learning_rate": 9.356534319176169e-07, "loss": 0.2524, "step": 10838 }, { "epoch": 0.18840932399311652, "grad_norm": 1.4793284588033069, "learning_rate": 9.356396172618073e-07, "loss": 0.2582, "step": 10839 }, { "epoch": 0.18842670653061935, "grad_norm": 2.268380935379841, "learning_rate": 9.356258012252203e-07, "loss": 0.2298, "step": 10840 }, { "epoch": 0.18844408906812216, "grad_norm": 1.3699518212592399, "learning_rate": 9.356119838078995e-07, "loss": 0.2877, "step": 10841 }, { "epoch": 0.188461471605625, "grad_norm": 1.5288544489574096, "learning_rate": 9.35598165009889e-07, "loss": 0.3065, "step": 10842 }, { "epoch": 0.18847885414312782, "grad_norm": 1.4590469673858328, "learning_rate": 9.355843448312321e-07, "loss": 0.2098, "step": 10843 }, { "epoch": 0.18849623668063065, "grad_norm": 2.060524671751285, "learning_rate": 9.355705232719733e-07, "loss": 0.4405, "step": 10844 }, { "epoch": 0.18851361921813348, "grad_norm": 2.679916336509416, "learning_rate": 9.355567003321559e-07, "loss": 0.3739, "step": 10845 }, { "epoch": 0.18853100175563628, "grad_norm": 1.3113146759171033, "learning_rate": 9.355428760118236e-07, "loss": 0.2206, "step": 10846 }, { "epoch": 0.1885483842931391, "grad_norm": 1.832228130380544, "learning_rate": 9.355290503110207e-07, "loss": 0.3951, "step": 10847 }, { "epoch": 0.18856576683064194, "grad_norm": 2.020804219270888, "learning_rate": 9.355152232297908e-07, "loss": 0.3115, "step": 10848 }, { "epoch": 0.18858314936814477, "grad_norm": 1.4580089005546581, "learning_rate": 9.355013947681774e-07, "loss": 0.2732, "step": 10849 }, { "epoch": 0.18860053190564757, "grad_norm": 2.6339382051829556, "learning_rate": 9.354875649262248e-07, "loss": 0.4338, "step": 10850 }, { "epoch": 0.1886179144431504, "grad_norm": 1.5419355112826072, "learning_rate": 9.354737337039768e-07, "loss": 0.4714, "step": 10851 }, { "epoch": 0.18863529698065323, "grad_norm": 7.3771609048848825, "learning_rate": 9.35459901101477e-07, "loss": 0.3967, "step": 10852 }, { "epoch": 0.18865267951815606, "grad_norm": 2.3161297439685518, "learning_rate": 9.354460671187694e-07, "loss": 0.212, "step": 10853 }, { "epoch": 0.1886700620556589, "grad_norm": 1.1213719085919327, "learning_rate": 9.354322317558976e-07, "loss": 0.147, "step": 10854 }, { "epoch": 0.1886874445931617, "grad_norm": 1.3497171419887855, "learning_rate": 9.354183950129059e-07, "loss": 0.2782, "step": 10855 }, { "epoch": 0.18870482713066453, "grad_norm": 2.829535577064635, "learning_rate": 9.354045568898377e-07, "loss": 0.3433, "step": 10856 }, { "epoch": 0.18872220966816736, "grad_norm": 3.4814258346996767, "learning_rate": 9.353907173867372e-07, "loss": 0.615, "step": 10857 }, { "epoch": 0.1887395922056702, "grad_norm": 1.3853220513821034, "learning_rate": 9.353768765036481e-07, "loss": 0.3655, "step": 10858 }, { "epoch": 0.18875697474317302, "grad_norm": 3.5072732943530656, "learning_rate": 9.353630342406142e-07, "loss": 0.3807, "step": 10859 }, { "epoch": 0.18877435728067582, "grad_norm": 3.4535671668430115, "learning_rate": 9.353491905976796e-07, "loss": 0.5329, "step": 10860 }, { "epoch": 0.18879173981817865, "grad_norm": 3.052912797660981, "learning_rate": 9.35335345574888e-07, "loss": 0.5465, "step": 10861 }, { "epoch": 0.18880912235568148, "grad_norm": 1.655807299802918, "learning_rate": 9.353214991722833e-07, "loss": 0.489, "step": 10862 }, { "epoch": 0.1888265048931843, "grad_norm": 1.2416260406765287, "learning_rate": 9.353076513899096e-07, "loss": 0.1966, "step": 10863 }, { "epoch": 0.18884388743068714, "grad_norm": 2.5689725478693437, "learning_rate": 9.352938022278104e-07, "loss": 0.8627, "step": 10864 }, { "epoch": 0.18886126996818994, "grad_norm": 1.697609273470121, "learning_rate": 9.3527995168603e-07, "loss": 0.3711, "step": 10865 }, { "epoch": 0.18887865250569277, "grad_norm": 2.186512477557917, "learning_rate": 9.352660997646118e-07, "loss": 0.3407, "step": 10866 }, { "epoch": 0.1888960350431956, "grad_norm": 1.7386293145311598, "learning_rate": 9.352522464636003e-07, "loss": 0.2285, "step": 10867 }, { "epoch": 0.18891341758069843, "grad_norm": 2.2609980815492765, "learning_rate": 9.352383917830389e-07, "loss": 0.4787, "step": 10868 }, { "epoch": 0.18893080011820126, "grad_norm": 2.2079188031244583, "learning_rate": 9.352245357229719e-07, "loss": 0.5012, "step": 10869 }, { "epoch": 0.18894818265570407, "grad_norm": 1.4403180849157087, "learning_rate": 9.352106782834429e-07, "loss": 0.3931, "step": 10870 }, { "epoch": 0.1889655651932069, "grad_norm": 1.3229018161351944, "learning_rate": 9.351968194644961e-07, "loss": 0.3397, "step": 10871 }, { "epoch": 0.18898294773070973, "grad_norm": 2.3293704797640054, "learning_rate": 9.351829592661753e-07, "loss": 0.2802, "step": 10872 }, { "epoch": 0.18900033026821256, "grad_norm": 1.497561931371727, "learning_rate": 9.351690976885243e-07, "loss": 0.2466, "step": 10873 }, { "epoch": 0.1890177128057154, "grad_norm": 1.5968575013435011, "learning_rate": 9.351552347315872e-07, "loss": 0.33, "step": 10874 }, { "epoch": 0.1890350953432182, "grad_norm": 1.8191242344023073, "learning_rate": 9.35141370395408e-07, "loss": 0.3658, "step": 10875 }, { "epoch": 0.18905247788072102, "grad_norm": 1.7939766111210058, "learning_rate": 9.351275046800304e-07, "loss": 0.3529, "step": 10876 }, { "epoch": 0.18906986041822385, "grad_norm": 1.840458437590581, "learning_rate": 9.351136375854987e-07, "loss": 0.3026, "step": 10877 }, { "epoch": 0.18908724295572668, "grad_norm": 1.3455182071211216, "learning_rate": 9.350997691118564e-07, "loss": 0.2666, "step": 10878 }, { "epoch": 0.1891046254932295, "grad_norm": 2.11836946797788, "learning_rate": 9.350858992591478e-07, "loss": 0.4698, "step": 10879 }, { "epoch": 0.1891220080307323, "grad_norm": 3.0091887750885533, "learning_rate": 9.350720280274167e-07, "loss": 0.4592, "step": 10880 }, { "epoch": 0.18913939056823514, "grad_norm": 1.6110080217665919, "learning_rate": 9.350581554167072e-07, "loss": 0.4376, "step": 10881 }, { "epoch": 0.18915677310573797, "grad_norm": 1.8477207075263946, "learning_rate": 9.350442814270632e-07, "loss": 0.1818, "step": 10882 }, { "epoch": 0.1891741556432408, "grad_norm": 1.4312310662997856, "learning_rate": 9.350304060585287e-07, "loss": 0.4032, "step": 10883 }, { "epoch": 0.18919153818074363, "grad_norm": 1.535505507433269, "learning_rate": 9.350165293111476e-07, "loss": 0.2654, "step": 10884 }, { "epoch": 0.18920892071824644, "grad_norm": 1.483726165853993, "learning_rate": 9.350026511849639e-07, "loss": 0.2132, "step": 10885 }, { "epoch": 0.18922630325574927, "grad_norm": 1.761911841131836, "learning_rate": 9.349887716800217e-07, "loss": 0.2635, "step": 10886 }, { "epoch": 0.1892436857932521, "grad_norm": 2.379214041999417, "learning_rate": 9.349748907963648e-07, "loss": 0.3494, "step": 10887 }, { "epoch": 0.18926106833075493, "grad_norm": 2.670642628979792, "learning_rate": 9.349610085340375e-07, "loss": 0.4762, "step": 10888 }, { "epoch": 0.18927845086825776, "grad_norm": 1.486684066832888, "learning_rate": 9.349471248930835e-07, "loss": 0.3887, "step": 10889 }, { "epoch": 0.18929583340576056, "grad_norm": 1.4625901253512976, "learning_rate": 9.349332398735468e-07, "loss": 0.4457, "step": 10890 }, { "epoch": 0.1893132159432634, "grad_norm": 1.1060856478905692, "learning_rate": 9.349193534754717e-07, "loss": 0.2612, "step": 10891 }, { "epoch": 0.18933059848076622, "grad_norm": 3.1558082704230275, "learning_rate": 9.34905465698902e-07, "loss": 0.2888, "step": 10892 }, { "epoch": 0.18934798101826905, "grad_norm": 3.3582098606683757, "learning_rate": 9.348915765438816e-07, "loss": 0.3755, "step": 10893 }, { "epoch": 0.18936536355577188, "grad_norm": 2.0309808588629616, "learning_rate": 9.348776860104548e-07, "loss": 0.6333, "step": 10894 }, { "epoch": 0.18938274609327468, "grad_norm": 2.1849141048552143, "learning_rate": 9.348637940986655e-07, "loss": 0.2671, "step": 10895 }, { "epoch": 0.18940012863077751, "grad_norm": 2.1074350031755125, "learning_rate": 9.348499008085577e-07, "loss": 0.4358, "step": 10896 }, { "epoch": 0.18941751116828034, "grad_norm": 2.0761840842378994, "learning_rate": 9.348360061401756e-07, "loss": 0.2462, "step": 10897 }, { "epoch": 0.18943489370578317, "grad_norm": 2.8615489004127257, "learning_rate": 9.348221100935629e-07, "loss": 0.3984, "step": 10898 }, { "epoch": 0.189452276243286, "grad_norm": 1.9309917805928407, "learning_rate": 9.348082126687639e-07, "loss": 0.4232, "step": 10899 }, { "epoch": 0.1894696587807888, "grad_norm": 2.4045901961077694, "learning_rate": 9.347943138658227e-07, "loss": 0.4013, "step": 10900 }, { "epoch": 0.18948704131829164, "grad_norm": 1.7509623805918983, "learning_rate": 9.347804136847832e-07, "loss": 0.3078, "step": 10901 }, { "epoch": 0.18950442385579447, "grad_norm": 1.5303295100470327, "learning_rate": 9.347665121256896e-07, "loss": 0.2738, "step": 10902 }, { "epoch": 0.1895218063932973, "grad_norm": 1.4478442407810677, "learning_rate": 9.347526091885858e-07, "loss": 0.3331, "step": 10903 }, { "epoch": 0.18953918893080013, "grad_norm": 1.4393240957708915, "learning_rate": 9.347387048735158e-07, "loss": 0.2541, "step": 10904 }, { "epoch": 0.18955657146830293, "grad_norm": 1.860487822994771, "learning_rate": 9.347247991805241e-07, "loss": 0.3372, "step": 10905 }, { "epoch": 0.18957395400580576, "grad_norm": 5.622898692060736, "learning_rate": 9.347108921096543e-07, "loss": 0.4052, "step": 10906 }, { "epoch": 0.1895913365433086, "grad_norm": 1.4395833940082725, "learning_rate": 9.346969836609506e-07, "loss": 0.2226, "step": 10907 }, { "epoch": 0.18960871908081142, "grad_norm": 1.247047858947823, "learning_rate": 9.346830738344571e-07, "loss": 0.4476, "step": 10908 }, { "epoch": 0.18962610161831425, "grad_norm": 1.4575439537896009, "learning_rate": 9.346691626302181e-07, "loss": 0.2971, "step": 10909 }, { "epoch": 0.18964348415581705, "grad_norm": 3.152043197519713, "learning_rate": 9.346552500482775e-07, "loss": 0.4813, "step": 10910 }, { "epoch": 0.18966086669331989, "grad_norm": 2.0344567089523693, "learning_rate": 9.346413360886793e-07, "loss": 0.6314, "step": 10911 }, { "epoch": 0.18967824923082272, "grad_norm": 2.4559107158549804, "learning_rate": 9.346274207514679e-07, "loss": 0.2256, "step": 10912 }, { "epoch": 0.18969563176832555, "grad_norm": 1.8816504821920654, "learning_rate": 9.346135040366872e-07, "loss": 0.3233, "step": 10913 }, { "epoch": 0.18971301430582838, "grad_norm": 2.139105353179224, "learning_rate": 9.345995859443813e-07, "loss": 0.3586, "step": 10914 }, { "epoch": 0.18973039684333118, "grad_norm": 2.120046910279826, "learning_rate": 9.345856664745944e-07, "loss": 0.4748, "step": 10915 }, { "epoch": 0.189747779380834, "grad_norm": 3.624083266065451, "learning_rate": 9.345717456273705e-07, "loss": 0.3884, "step": 10916 }, { "epoch": 0.18976516191833684, "grad_norm": 1.4472016033577424, "learning_rate": 9.345578234027538e-07, "loss": 0.4992, "step": 10917 }, { "epoch": 0.18978254445583967, "grad_norm": 1.563046184169329, "learning_rate": 9.345438998007885e-07, "loss": 0.3393, "step": 10918 }, { "epoch": 0.1897999269933425, "grad_norm": 2.6197023904373316, "learning_rate": 9.345299748215184e-07, "loss": 0.5092, "step": 10919 }, { "epoch": 0.1898173095308453, "grad_norm": 2.1949875724613674, "learning_rate": 9.345160484649882e-07, "loss": 0.2773, "step": 10920 }, { "epoch": 0.18983469206834813, "grad_norm": 3.7636808646089412, "learning_rate": 9.345021207312415e-07, "loss": 0.3022, "step": 10921 }, { "epoch": 0.18985207460585096, "grad_norm": 1.5645404067822608, "learning_rate": 9.344881916203226e-07, "loss": 0.445, "step": 10922 }, { "epoch": 0.1898694571433538, "grad_norm": 2.30454092751718, "learning_rate": 9.34474261132276e-07, "loss": 0.4668, "step": 10923 }, { "epoch": 0.18988683968085662, "grad_norm": 1.2959757203493187, "learning_rate": 9.344603292671455e-07, "loss": 0.2419, "step": 10924 }, { "epoch": 0.18990422221835943, "grad_norm": 1.9375493937681862, "learning_rate": 9.344463960249753e-07, "loss": 0.4496, "step": 10925 }, { "epoch": 0.18992160475586226, "grad_norm": 1.4094344783700516, "learning_rate": 9.344324614058095e-07, "loss": 0.3126, "step": 10926 }, { "epoch": 0.18993898729336509, "grad_norm": 1.3536716810427905, "learning_rate": 9.344185254096924e-07, "loss": 0.3518, "step": 10927 }, { "epoch": 0.18995636983086792, "grad_norm": 1.9908144685577958, "learning_rate": 9.344045880366682e-07, "loss": 0.4055, "step": 10928 }, { "epoch": 0.18997375236837075, "grad_norm": 1.631557033121246, "learning_rate": 9.34390649286781e-07, "loss": 0.4969, "step": 10929 }, { "epoch": 0.18999113490587355, "grad_norm": 2.439119559755104, "learning_rate": 9.343767091600749e-07, "loss": 0.3291, "step": 10930 }, { "epoch": 0.19000851744337638, "grad_norm": 1.9209490498153579, "learning_rate": 9.343627676565941e-07, "loss": 0.3778, "step": 10931 }, { "epoch": 0.1900258999808792, "grad_norm": 2.4958982782065045, "learning_rate": 9.343488247763831e-07, "loss": 0.3058, "step": 10932 }, { "epoch": 0.19004328251838204, "grad_norm": 1.637532863388254, "learning_rate": 9.343348805194857e-07, "loss": 0.3826, "step": 10933 }, { "epoch": 0.19006066505588487, "grad_norm": 1.3575319710933453, "learning_rate": 9.343209348859463e-07, "loss": 0.2802, "step": 10934 }, { "epoch": 0.19007804759338767, "grad_norm": 1.7946301869778205, "learning_rate": 9.34306987875809e-07, "loss": 0.3837, "step": 10935 }, { "epoch": 0.1900954301308905, "grad_norm": 2.6965921326563165, "learning_rate": 9.342930394891181e-07, "loss": 0.3929, "step": 10936 }, { "epoch": 0.19011281266839333, "grad_norm": 1.4858770963700312, "learning_rate": 9.342790897259178e-07, "loss": 0.4623, "step": 10937 }, { "epoch": 0.19013019520589616, "grad_norm": 0.9982132729681461, "learning_rate": 9.342651385862521e-07, "loss": 0.1436, "step": 10938 }, { "epoch": 0.190147577743399, "grad_norm": 3.7794027759184585, "learning_rate": 9.342511860701655e-07, "loss": 0.404, "step": 10939 }, { "epoch": 0.1901649602809018, "grad_norm": 1.3901979533125504, "learning_rate": 9.342372321777022e-07, "loss": 0.2689, "step": 10940 }, { "epoch": 0.19018234281840463, "grad_norm": 1.686556177440488, "learning_rate": 9.342232769089062e-07, "loss": 0.392, "step": 10941 }, { "epoch": 0.19019972535590746, "grad_norm": 1.329893832555315, "learning_rate": 9.34209320263822e-07, "loss": 0.4372, "step": 10942 }, { "epoch": 0.1902171078934103, "grad_norm": 1.695301037206324, "learning_rate": 9.341953622424937e-07, "loss": 0.2728, "step": 10943 }, { "epoch": 0.19023449043091312, "grad_norm": 1.971701598623106, "learning_rate": 9.341814028449655e-07, "loss": 0.4484, "step": 10944 }, { "epoch": 0.19025187296841592, "grad_norm": 1.886294359673673, "learning_rate": 9.341674420712817e-07, "loss": 0.5159, "step": 10945 }, { "epoch": 0.19026925550591875, "grad_norm": 1.3174006917441172, "learning_rate": 9.341534799214865e-07, "loss": 0.4174, "step": 10946 }, { "epoch": 0.19028663804342158, "grad_norm": 1.7319812921259248, "learning_rate": 9.341395163956243e-07, "loss": 0.3597, "step": 10947 }, { "epoch": 0.1903040205809244, "grad_norm": 1.366921295428415, "learning_rate": 9.341255514937392e-07, "loss": 0.4156, "step": 10948 }, { "epoch": 0.19032140311842724, "grad_norm": 2.1044988552269337, "learning_rate": 9.341115852158756e-07, "loss": 0.4272, "step": 10949 }, { "epoch": 0.19033878565593004, "grad_norm": 1.8249226195199848, "learning_rate": 9.340976175620776e-07, "loss": 0.3886, "step": 10950 }, { "epoch": 0.19035616819343287, "grad_norm": 2.3610559792382393, "learning_rate": 9.340836485323897e-07, "loss": 0.3491, "step": 10951 }, { "epoch": 0.1903735507309357, "grad_norm": 1.3609597901190729, "learning_rate": 9.340696781268559e-07, "loss": 0.2785, "step": 10952 }, { "epoch": 0.19039093326843853, "grad_norm": 1.771262054225264, "learning_rate": 9.340557063455206e-07, "loss": 0.2774, "step": 10953 }, { "epoch": 0.19040831580594136, "grad_norm": 1.9106503341319838, "learning_rate": 9.340417331884283e-07, "loss": 0.3676, "step": 10954 }, { "epoch": 0.19042569834344417, "grad_norm": 1.6216060575029896, "learning_rate": 9.340277586556229e-07, "loss": 0.2472, "step": 10955 }, { "epoch": 0.190443080880947, "grad_norm": 1.881034903048751, "learning_rate": 9.340137827471488e-07, "loss": 0.5317, "step": 10956 }, { "epoch": 0.19046046341844983, "grad_norm": 1.8657195777788522, "learning_rate": 9.339998054630507e-07, "loss": 0.3404, "step": 10957 }, { "epoch": 0.19047784595595266, "grad_norm": 1.5037656436074518, "learning_rate": 9.339858268033722e-07, "loss": 0.1987, "step": 10958 }, { "epoch": 0.1904952284934555, "grad_norm": 2.5815692095912484, "learning_rate": 9.339718467681583e-07, "loss": 0.4391, "step": 10959 }, { "epoch": 0.1905126110309583, "grad_norm": 1.8352272087474224, "learning_rate": 9.339578653574528e-07, "loss": 0.2582, "step": 10960 }, { "epoch": 0.19052999356846112, "grad_norm": 2.1221305895848146, "learning_rate": 9.339438825713003e-07, "loss": 0.3264, "step": 10961 }, { "epoch": 0.19054737610596395, "grad_norm": 2.247723368868486, "learning_rate": 9.33929898409745e-07, "loss": 0.3387, "step": 10962 }, { "epoch": 0.19056475864346678, "grad_norm": 1.5820631074027025, "learning_rate": 9.339159128728312e-07, "loss": 0.299, "step": 10963 }, { "epoch": 0.1905821411809696, "grad_norm": 2.553817079219103, "learning_rate": 9.339019259606034e-07, "loss": 0.2468, "step": 10964 }, { "epoch": 0.1905995237184724, "grad_norm": 2.450608575144118, "learning_rate": 9.338879376731057e-07, "loss": 0.4308, "step": 10965 }, { "epoch": 0.19061690625597524, "grad_norm": 3.41967825848898, "learning_rate": 9.338739480103826e-07, "loss": 0.3069, "step": 10966 }, { "epoch": 0.19063428879347807, "grad_norm": 2.4694081373346437, "learning_rate": 9.338599569724784e-07, "loss": 0.4161, "step": 10967 }, { "epoch": 0.1906516713309809, "grad_norm": 1.0288191822337447, "learning_rate": 9.338459645594375e-07, "loss": 0.408, "step": 10968 }, { "epoch": 0.19066905386848373, "grad_norm": 1.2473169922804268, "learning_rate": 9.338319707713041e-07, "loss": 0.262, "step": 10969 }, { "epoch": 0.19068643640598654, "grad_norm": 1.3506422568531113, "learning_rate": 9.338179756081226e-07, "loss": 0.3211, "step": 10970 }, { "epoch": 0.19070381894348937, "grad_norm": 2.047543786434323, "learning_rate": 9.338039790699375e-07, "loss": 0.3796, "step": 10971 }, { "epoch": 0.1907212014809922, "grad_norm": 2.3686152580650486, "learning_rate": 9.337899811567929e-07, "loss": 0.4121, "step": 10972 }, { "epoch": 0.19073858401849503, "grad_norm": 1.4980794464166345, "learning_rate": 9.337759818687335e-07, "loss": 0.5204, "step": 10973 }, { "epoch": 0.19075596655599786, "grad_norm": 3.2135847095934156, "learning_rate": 9.337619812058034e-07, "loss": 0.4787, "step": 10974 }, { "epoch": 0.19077334909350066, "grad_norm": 1.0998461585242185, "learning_rate": 9.337479791680471e-07, "loss": 0.4817, "step": 10975 }, { "epoch": 0.1907907316310035, "grad_norm": 2.4680045613116266, "learning_rate": 9.33733975755509e-07, "loss": 0.2609, "step": 10976 }, { "epoch": 0.19080811416850632, "grad_norm": 1.641954235116521, "learning_rate": 9.337199709682333e-07, "loss": 0.2594, "step": 10977 }, { "epoch": 0.19082549670600915, "grad_norm": 1.686366431740252, "learning_rate": 9.337059648062647e-07, "loss": 0.393, "step": 10978 }, { "epoch": 0.19084287924351198, "grad_norm": 2.664934549653374, "learning_rate": 9.336919572696473e-07, "loss": 0.2967, "step": 10979 }, { "epoch": 0.19086026178101478, "grad_norm": 2.2172680749897413, "learning_rate": 9.336779483584257e-07, "loss": 0.3346, "step": 10980 }, { "epoch": 0.19087764431851761, "grad_norm": 1.526391123506569, "learning_rate": 9.336639380726442e-07, "loss": 0.3058, "step": 10981 }, { "epoch": 0.19089502685602044, "grad_norm": 1.9556215594296722, "learning_rate": 9.336499264123472e-07, "loss": 0.3731, "step": 10982 }, { "epoch": 0.19091240939352327, "grad_norm": 1.7236839516518374, "learning_rate": 9.336359133775791e-07, "loss": 0.334, "step": 10983 }, { "epoch": 0.1909297919310261, "grad_norm": 1.84930264917059, "learning_rate": 9.336218989683844e-07, "loss": 0.403, "step": 10984 }, { "epoch": 0.1909471744685289, "grad_norm": 4.2556747629791385, "learning_rate": 9.336078831848075e-07, "loss": 0.2505, "step": 10985 }, { "epoch": 0.19096455700603174, "grad_norm": 1.5824000741882989, "learning_rate": 9.335938660268927e-07, "loss": 0.2677, "step": 10986 }, { "epoch": 0.19098193954353457, "grad_norm": 1.839192049772324, "learning_rate": 9.335798474946845e-07, "loss": 0.286, "step": 10987 }, { "epoch": 0.1909993220810374, "grad_norm": 1.7919170365428938, "learning_rate": 9.335658275882275e-07, "loss": 0.3924, "step": 10988 }, { "epoch": 0.1910167046185402, "grad_norm": 2.57933358844235, "learning_rate": 9.335518063075661e-07, "loss": 0.3754, "step": 10989 }, { "epoch": 0.19103408715604303, "grad_norm": 2.1339397733600802, "learning_rate": 9.335377836527442e-07, "loss": 0.4849, "step": 10990 }, { "epoch": 0.19105146969354586, "grad_norm": 1.4843058848269446, "learning_rate": 9.33523759623807e-07, "loss": 0.2134, "step": 10991 }, { "epoch": 0.1910688522310487, "grad_norm": 1.5956005466992553, "learning_rate": 9.335097342207986e-07, "loss": 0.374, "step": 10992 }, { "epoch": 0.19108623476855152, "grad_norm": 3.1735550564497066, "learning_rate": 9.334957074437635e-07, "loss": 0.3347, "step": 10993 }, { "epoch": 0.19110361730605432, "grad_norm": 1.1812547591991922, "learning_rate": 9.33481679292746e-07, "loss": 0.3386, "step": 10994 }, { "epoch": 0.19112099984355715, "grad_norm": 1.9461561045137548, "learning_rate": 9.334676497677908e-07, "loss": 0.3142, "step": 10995 }, { "epoch": 0.19113838238105998, "grad_norm": 2.0297450587498225, "learning_rate": 9.334536188689422e-07, "loss": 0.4447, "step": 10996 }, { "epoch": 0.19115576491856281, "grad_norm": 2.4627916058071957, "learning_rate": 9.334395865962448e-07, "loss": 0.3795, "step": 10997 }, { "epoch": 0.19117314745606565, "grad_norm": 1.9238594126604032, "learning_rate": 9.33425552949743e-07, "loss": 0.3634, "step": 10998 }, { "epoch": 0.19119052999356845, "grad_norm": 2.151661778064119, "learning_rate": 9.334115179294812e-07, "loss": 0.4677, "step": 10999 }, { "epoch": 0.19120791253107128, "grad_norm": 1.5792639909783033, "learning_rate": 9.333974815355041e-07, "loss": 0.4437, "step": 11000 }, { "epoch": 0.1912252950685741, "grad_norm": 1.4619423459801477, "learning_rate": 9.33383443767856e-07, "loss": 0.3339, "step": 11001 }, { "epoch": 0.19124267760607694, "grad_norm": 2.258881622015901, "learning_rate": 9.333694046265816e-07, "loss": 0.3302, "step": 11002 }, { "epoch": 0.19126006014357977, "grad_norm": 2.8325025661402425, "learning_rate": 9.333553641117252e-07, "loss": 0.6048, "step": 11003 }, { "epoch": 0.19127744268108257, "grad_norm": 1.278736825732411, "learning_rate": 9.333413222233312e-07, "loss": 0.3159, "step": 11004 }, { "epoch": 0.1912948252185854, "grad_norm": 1.485378326749635, "learning_rate": 9.333272789614446e-07, "loss": 0.2476, "step": 11005 }, { "epoch": 0.19131220775608823, "grad_norm": 1.2120247866809626, "learning_rate": 9.333132343261094e-07, "loss": 0.237, "step": 11006 }, { "epoch": 0.19132959029359106, "grad_norm": 1.2886654795865988, "learning_rate": 9.332991883173702e-07, "loss": 0.2222, "step": 11007 }, { "epoch": 0.1913469728310939, "grad_norm": 2.4105758324888904, "learning_rate": 9.332851409352718e-07, "loss": 0.5099, "step": 11008 }, { "epoch": 0.1913643553685967, "grad_norm": 1.6911966217322834, "learning_rate": 9.332710921798583e-07, "loss": 0.3329, "step": 11009 }, { "epoch": 0.19138173790609952, "grad_norm": 1.3680139018582311, "learning_rate": 9.332570420511747e-07, "loss": 0.4806, "step": 11010 }, { "epoch": 0.19139912044360236, "grad_norm": 2.1200647856554005, "learning_rate": 9.332429905492652e-07, "loss": 0.2491, "step": 11011 }, { "epoch": 0.19141650298110519, "grad_norm": 1.6582995358663672, "learning_rate": 9.332289376741744e-07, "loss": 0.3386, "step": 11012 }, { "epoch": 0.19143388551860802, "grad_norm": 1.8231066039476485, "learning_rate": 9.332148834259469e-07, "loss": 0.361, "step": 11013 }, { "epoch": 0.19145126805611082, "grad_norm": 1.6612328653013564, "learning_rate": 9.332008278046272e-07, "loss": 0.357, "step": 11014 }, { "epoch": 0.19146865059361365, "grad_norm": 1.1894851596471014, "learning_rate": 9.331867708102598e-07, "loss": 0.2948, "step": 11015 }, { "epoch": 0.19148603313111648, "grad_norm": 2.31078241643723, "learning_rate": 9.331727124428896e-07, "loss": 0.4814, "step": 11016 }, { "epoch": 0.1915034156686193, "grad_norm": 1.1479289646364046, "learning_rate": 9.331586527025607e-07, "loss": 0.1932, "step": 11017 }, { "epoch": 0.19152079820612214, "grad_norm": 1.5971033842889184, "learning_rate": 9.331445915893179e-07, "loss": 0.3811, "step": 11018 }, { "epoch": 0.19153818074362494, "grad_norm": 1.261793514306485, "learning_rate": 9.331305291032057e-07, "loss": 0.3286, "step": 11019 }, { "epoch": 0.19155556328112777, "grad_norm": 1.8018936035164375, "learning_rate": 9.331164652442685e-07, "loss": 0.4316, "step": 11020 }, { "epoch": 0.1915729458186306, "grad_norm": 1.4654279364558809, "learning_rate": 9.331024000125514e-07, "loss": 0.2118, "step": 11021 }, { "epoch": 0.19159032835613343, "grad_norm": 1.920925066777769, "learning_rate": 9.330883334080983e-07, "loss": 0.2499, "step": 11022 }, { "epoch": 0.19160771089363626, "grad_norm": 1.0431613006037517, "learning_rate": 9.330742654309543e-07, "loss": 0.1289, "step": 11023 }, { "epoch": 0.19162509343113907, "grad_norm": 1.6902530649076708, "learning_rate": 9.330601960811639e-07, "loss": 0.309, "step": 11024 }, { "epoch": 0.1916424759686419, "grad_norm": 1.4185409673899159, "learning_rate": 9.330461253587716e-07, "loss": 0.328, "step": 11025 }, { "epoch": 0.19165985850614473, "grad_norm": 4.68863854914175, "learning_rate": 9.330320532638219e-07, "loss": 0.5406, "step": 11026 }, { "epoch": 0.19167724104364756, "grad_norm": 1.8330047212489546, "learning_rate": 9.330179797963595e-07, "loss": 0.4541, "step": 11027 }, { "epoch": 0.1916946235811504, "grad_norm": 3.0421952301801443, "learning_rate": 9.330039049564291e-07, "loss": 0.3376, "step": 11028 }, { "epoch": 0.1917120061186532, "grad_norm": 2.372595046205081, "learning_rate": 9.329898287440752e-07, "loss": 0.2353, "step": 11029 }, { "epoch": 0.19172938865615602, "grad_norm": 2.451481576657016, "learning_rate": 9.329757511593424e-07, "loss": 0.3792, "step": 11030 }, { "epoch": 0.19174677119365885, "grad_norm": 2.240330530833726, "learning_rate": 9.329616722022753e-07, "loss": 0.4152, "step": 11031 }, { "epoch": 0.19176415373116168, "grad_norm": 2.3793796109846106, "learning_rate": 9.329475918729187e-07, "loss": 0.5162, "step": 11032 }, { "epoch": 0.1917815362686645, "grad_norm": 1.312393154655799, "learning_rate": 9.329335101713172e-07, "loss": 0.3621, "step": 11033 }, { "epoch": 0.1917989188061673, "grad_norm": 1.5370644415274428, "learning_rate": 9.329194270975151e-07, "loss": 0.2172, "step": 11034 }, { "epoch": 0.19181630134367014, "grad_norm": 2.874923046743188, "learning_rate": 9.329053426515574e-07, "loss": 0.4518, "step": 11035 }, { "epoch": 0.19183368388117297, "grad_norm": 2.074390750659384, "learning_rate": 9.328912568334888e-07, "loss": 0.3467, "step": 11036 }, { "epoch": 0.1918510664186758, "grad_norm": 2.70378074078313, "learning_rate": 9.328771696433535e-07, "loss": 0.3342, "step": 11037 }, { "epoch": 0.19186844895617863, "grad_norm": 1.3978825518578317, "learning_rate": 9.328630810811966e-07, "loss": 0.2836, "step": 11038 }, { "epoch": 0.19188583149368144, "grad_norm": 2.55843177211119, "learning_rate": 9.328489911470625e-07, "loss": 0.4323, "step": 11039 }, { "epoch": 0.19190321403118427, "grad_norm": 1.976718006383068, "learning_rate": 9.328348998409959e-07, "loss": 0.3249, "step": 11040 }, { "epoch": 0.1919205965686871, "grad_norm": 1.3031144004317652, "learning_rate": 9.328208071630414e-07, "loss": 0.3451, "step": 11041 }, { "epoch": 0.19193797910618993, "grad_norm": 3.2406185305744413, "learning_rate": 9.32806713113244e-07, "loss": 0.3495, "step": 11042 }, { "epoch": 0.19195536164369276, "grad_norm": 3.2796241133989064, "learning_rate": 9.32792617691648e-07, "loss": 0.3394, "step": 11043 }, { "epoch": 0.19197274418119556, "grad_norm": 1.5704954016175312, "learning_rate": 9.327785208982983e-07, "loss": 0.3766, "step": 11044 }, { "epoch": 0.1919901267186984, "grad_norm": 1.944337004528202, "learning_rate": 9.327644227332394e-07, "loss": 0.3988, "step": 11045 }, { "epoch": 0.19200750925620122, "grad_norm": 2.085549261221999, "learning_rate": 9.327503231965162e-07, "loss": 0.3357, "step": 11046 }, { "epoch": 0.19202489179370405, "grad_norm": 2.72916458656453, "learning_rate": 9.327362222881732e-07, "loss": 0.2968, "step": 11047 }, { "epoch": 0.19204227433120688, "grad_norm": 2.438709832360635, "learning_rate": 9.327221200082551e-07, "loss": 0.4083, "step": 11048 }, { "epoch": 0.19205965686870968, "grad_norm": 1.9740676911948596, "learning_rate": 9.327080163568066e-07, "loss": 0.3255, "step": 11049 }, { "epoch": 0.1920770394062125, "grad_norm": 2.602606250786481, "learning_rate": 9.326939113338726e-07, "loss": 0.4055, "step": 11050 }, { "epoch": 0.19209442194371534, "grad_norm": 1.8892259627092762, "learning_rate": 9.326798049394976e-07, "loss": 0.3433, "step": 11051 }, { "epoch": 0.19211180448121817, "grad_norm": 1.0912952056496161, "learning_rate": 9.326656971737263e-07, "loss": 0.3597, "step": 11052 }, { "epoch": 0.192129187018721, "grad_norm": 1.7983085329677886, "learning_rate": 9.326515880366037e-07, "loss": 0.2445, "step": 11053 }, { "epoch": 0.1921465695562238, "grad_norm": 1.80815540733949, "learning_rate": 9.326374775281741e-07, "loss": 0.3303, "step": 11054 }, { "epoch": 0.19216395209372664, "grad_norm": 3.9537897350318025, "learning_rate": 9.326233656484825e-07, "loss": 0.4634, "step": 11055 }, { "epoch": 0.19218133463122947, "grad_norm": 2.2868669242445776, "learning_rate": 9.326092523975736e-07, "loss": 0.5525, "step": 11056 }, { "epoch": 0.1921987171687323, "grad_norm": 3.067148145353489, "learning_rate": 9.325951377754921e-07, "loss": 0.2648, "step": 11057 }, { "epoch": 0.19221609970623513, "grad_norm": 2.022936209134656, "learning_rate": 9.325810217822827e-07, "loss": 0.4172, "step": 11058 }, { "epoch": 0.19223348224373793, "grad_norm": 1.749625174270021, "learning_rate": 9.325669044179902e-07, "loss": 0.3596, "step": 11059 }, { "epoch": 0.19225086478124076, "grad_norm": 2.3835914955118285, "learning_rate": 9.325527856826592e-07, "loss": 0.3379, "step": 11060 }, { "epoch": 0.1922682473187436, "grad_norm": 1.6804530261171824, "learning_rate": 9.325386655763348e-07, "loss": 0.1852, "step": 11061 }, { "epoch": 0.19228562985624642, "grad_norm": 2.032366171277577, "learning_rate": 9.325245440990613e-07, "loss": 0.308, "step": 11062 }, { "epoch": 0.19230301239374925, "grad_norm": 1.707329382401942, "learning_rate": 9.325104212508838e-07, "loss": 0.5686, "step": 11063 }, { "epoch": 0.19232039493125205, "grad_norm": 1.1889308948897765, "learning_rate": 9.324962970318468e-07, "loss": 0.4902, "step": 11064 }, { "epoch": 0.19233777746875488, "grad_norm": 2.4509820961640663, "learning_rate": 9.324821714419954e-07, "loss": 0.4442, "step": 11065 }, { "epoch": 0.1923551600062577, "grad_norm": 2.4039414384730864, "learning_rate": 9.324680444813742e-07, "loss": 0.5549, "step": 11066 }, { "epoch": 0.19237254254376054, "grad_norm": 2.3002581778967346, "learning_rate": 9.324539161500277e-07, "loss": 0.444, "step": 11067 }, { "epoch": 0.19238992508126337, "grad_norm": 1.454924431079366, "learning_rate": 9.324397864480011e-07, "loss": 0.2459, "step": 11068 }, { "epoch": 0.19240730761876618, "grad_norm": 4.044488019661382, "learning_rate": 9.324256553753391e-07, "loss": 0.5296, "step": 11069 }, { "epoch": 0.192424690156269, "grad_norm": 1.8889671869826914, "learning_rate": 9.324115229320863e-07, "loss": 0.3715, "step": 11070 }, { "epoch": 0.19244207269377184, "grad_norm": 1.5810453732130396, "learning_rate": 9.323973891182877e-07, "loss": 0.56, "step": 11071 }, { "epoch": 0.19245945523127467, "grad_norm": 4.895618443288524, "learning_rate": 9.32383253933988e-07, "loss": 0.512, "step": 11072 }, { "epoch": 0.1924768377687775, "grad_norm": 1.1887080492004003, "learning_rate": 9.323691173792318e-07, "loss": 0.4261, "step": 11073 }, { "epoch": 0.1924942203062803, "grad_norm": 1.9697663884942846, "learning_rate": 9.323549794540644e-07, "loss": 0.2724, "step": 11074 }, { "epoch": 0.19251160284378313, "grad_norm": 1.757003081834273, "learning_rate": 9.323408401585302e-07, "loss": 0.3858, "step": 11075 }, { "epoch": 0.19252898538128596, "grad_norm": 2.3140065633088205, "learning_rate": 9.323266994926741e-07, "loss": 0.457, "step": 11076 }, { "epoch": 0.1925463679187888, "grad_norm": 1.237272252428471, "learning_rate": 9.323125574565409e-07, "loss": 0.3153, "step": 11077 }, { "epoch": 0.19256375045629162, "grad_norm": 3.2281791961954793, "learning_rate": 9.322984140501756e-07, "loss": 0.438, "step": 11078 }, { "epoch": 0.19258113299379442, "grad_norm": 5.180470180345183, "learning_rate": 9.322842692736229e-07, "loss": 0.5191, "step": 11079 }, { "epoch": 0.19259851553129725, "grad_norm": 2.2806980093796763, "learning_rate": 9.322701231269277e-07, "loss": 0.3241, "step": 11080 }, { "epoch": 0.19261589806880008, "grad_norm": 1.5882993680262072, "learning_rate": 9.322559756101346e-07, "loss": 0.3967, "step": 11081 }, { "epoch": 0.19263328060630291, "grad_norm": 1.5436659520493483, "learning_rate": 9.322418267232888e-07, "loss": 0.2869, "step": 11082 }, { "epoch": 0.19265066314380574, "grad_norm": 2.291222490159822, "learning_rate": 9.322276764664348e-07, "loss": 0.351, "step": 11083 }, { "epoch": 0.19266804568130855, "grad_norm": 1.8034609843963951, "learning_rate": 9.322135248396177e-07, "loss": 0.5056, "step": 11084 }, { "epoch": 0.19268542821881138, "grad_norm": 1.2659179202914017, "learning_rate": 9.321993718428823e-07, "loss": 0.4395, "step": 11085 }, { "epoch": 0.1927028107563142, "grad_norm": 1.4053269810303086, "learning_rate": 9.321852174762733e-07, "loss": 0.2986, "step": 11086 }, { "epoch": 0.19272019329381704, "grad_norm": 1.5140390865001865, "learning_rate": 9.321710617398357e-07, "loss": 0.3158, "step": 11087 }, { "epoch": 0.19273757583131987, "grad_norm": 1.4718523674483917, "learning_rate": 9.321569046336146e-07, "loss": 0.367, "step": 11088 }, { "epoch": 0.19275495836882267, "grad_norm": 1.0951046946877372, "learning_rate": 9.321427461576543e-07, "loss": 0.2783, "step": 11089 }, { "epoch": 0.1927723409063255, "grad_norm": 1.9728011156569834, "learning_rate": 9.321285863120003e-07, "loss": 0.6043, "step": 11090 }, { "epoch": 0.19278972344382833, "grad_norm": 2.211002399754656, "learning_rate": 9.32114425096697e-07, "loss": 0.4585, "step": 11091 }, { "epoch": 0.19280710598133116, "grad_norm": 0.982904084735732, "learning_rate": 9.321002625117895e-07, "loss": 0.2939, "step": 11092 }, { "epoch": 0.192824488518834, "grad_norm": 1.7625500805795722, "learning_rate": 9.320860985573227e-07, "loss": 0.2907, "step": 11093 }, { "epoch": 0.1928418710563368, "grad_norm": 2.6375952007498955, "learning_rate": 9.320719332333415e-07, "loss": 0.5145, "step": 11094 }, { "epoch": 0.19285925359383962, "grad_norm": 1.9943461020417408, "learning_rate": 9.320577665398907e-07, "loss": 0.3784, "step": 11095 }, { "epoch": 0.19287663613134245, "grad_norm": 2.3772643798366326, "learning_rate": 9.320435984770152e-07, "loss": 0.3181, "step": 11096 }, { "epoch": 0.19289401866884529, "grad_norm": 1.0964943703616867, "learning_rate": 9.320294290447599e-07, "loss": 0.3329, "step": 11097 }, { "epoch": 0.19291140120634812, "grad_norm": 2.3915494208704513, "learning_rate": 9.320152582431699e-07, "loss": 0.3467, "step": 11098 }, { "epoch": 0.19292878374385092, "grad_norm": 1.8910448120143386, "learning_rate": 9.3200108607229e-07, "loss": 0.4638, "step": 11099 }, { "epoch": 0.19294616628135375, "grad_norm": 1.8568223961257584, "learning_rate": 9.319869125321651e-07, "loss": 0.3279, "step": 11100 }, { "epoch": 0.19296354881885658, "grad_norm": 2.381982610912365, "learning_rate": 9.3197273762284e-07, "loss": 0.4007, "step": 11101 }, { "epoch": 0.1929809313563594, "grad_norm": 2.8499434565635733, "learning_rate": 9.319585613443598e-07, "loss": 0.2984, "step": 11102 }, { "epoch": 0.19299831389386224, "grad_norm": 2.1073384162532816, "learning_rate": 9.319443836967694e-07, "loss": 0.4448, "step": 11103 }, { "epoch": 0.19301569643136504, "grad_norm": 1.7022453346028499, "learning_rate": 9.319302046801139e-07, "loss": 0.4276, "step": 11104 }, { "epoch": 0.19303307896886787, "grad_norm": 1.8281791863463313, "learning_rate": 9.319160242944377e-07, "loss": 0.3414, "step": 11105 }, { "epoch": 0.1930504615063707, "grad_norm": 2.1633708516379913, "learning_rate": 9.319018425397864e-07, "loss": 0.5067, "step": 11106 }, { "epoch": 0.19306784404387353, "grad_norm": 1.8913163487223021, "learning_rate": 9.318876594162046e-07, "loss": 0.296, "step": 11107 }, { "epoch": 0.19308522658137636, "grad_norm": 1.6348064229565422, "learning_rate": 9.318734749237371e-07, "loss": 0.3587, "step": 11108 }, { "epoch": 0.19310260911887916, "grad_norm": 2.5437993581140366, "learning_rate": 9.318592890624293e-07, "loss": 0.6311, "step": 11109 }, { "epoch": 0.193119991656382, "grad_norm": 4.34675021779086, "learning_rate": 9.318451018323258e-07, "loss": 0.4245, "step": 11110 }, { "epoch": 0.19313737419388483, "grad_norm": 2.7637488780873225, "learning_rate": 9.31830913233472e-07, "loss": 0.3041, "step": 11111 }, { "epoch": 0.19315475673138766, "grad_norm": 1.5057929932086078, "learning_rate": 9.318167232659122e-07, "loss": 0.3369, "step": 11112 }, { "epoch": 0.19317213926889049, "grad_norm": 2.1765134475041803, "learning_rate": 9.31802531929692e-07, "loss": 0.4135, "step": 11113 }, { "epoch": 0.1931895218063933, "grad_norm": 1.8523608181050384, "learning_rate": 9.31788339224856e-07, "loss": 0.3178, "step": 11114 }, { "epoch": 0.19320690434389612, "grad_norm": 1.4121461556359618, "learning_rate": 9.317741451514493e-07, "loss": 0.2173, "step": 11115 }, { "epoch": 0.19322428688139895, "grad_norm": 1.7554633802111594, "learning_rate": 9.31759949709517e-07, "loss": 0.4375, "step": 11116 }, { "epoch": 0.19324166941890178, "grad_norm": 1.3167096583642566, "learning_rate": 9.317457528991039e-07, "loss": 0.2383, "step": 11117 }, { "epoch": 0.1932590519564046, "grad_norm": 1.3535530920608685, "learning_rate": 9.317315547202551e-07, "loss": 0.4753, "step": 11118 }, { "epoch": 0.1932764344939074, "grad_norm": 1.227900764824331, "learning_rate": 9.317173551730157e-07, "loss": 0.2839, "step": 11119 }, { "epoch": 0.19329381703141024, "grad_norm": 2.026759570379667, "learning_rate": 9.317031542574305e-07, "loss": 0.2528, "step": 11120 }, { "epoch": 0.19331119956891307, "grad_norm": 4.015903106958476, "learning_rate": 9.316889519735447e-07, "loss": 0.3553, "step": 11121 }, { "epoch": 0.1933285821064159, "grad_norm": 1.5307771267922021, "learning_rate": 9.316747483214032e-07, "loss": 0.5164, "step": 11122 }, { "epoch": 0.19334596464391873, "grad_norm": 1.454206509331689, "learning_rate": 9.316605433010509e-07, "loss": 0.374, "step": 11123 }, { "epoch": 0.19336334718142154, "grad_norm": 2.084346236673178, "learning_rate": 9.31646336912533e-07, "loss": 0.2589, "step": 11124 }, { "epoch": 0.19338072971892437, "grad_norm": 2.99428185723931, "learning_rate": 9.316321291558946e-07, "loss": 0.3713, "step": 11125 }, { "epoch": 0.1933981122564272, "grad_norm": 2.3406429320336968, "learning_rate": 9.316179200311806e-07, "loss": 0.4299, "step": 11126 }, { "epoch": 0.19341549479393003, "grad_norm": 1.8444854624355924, "learning_rate": 9.31603709538436e-07, "loss": 0.228, "step": 11127 }, { "epoch": 0.19343287733143283, "grad_norm": 2.177555945405689, "learning_rate": 9.315894976777059e-07, "loss": 0.2282, "step": 11128 }, { "epoch": 0.19345025986893566, "grad_norm": 1.498404702188883, "learning_rate": 9.315752844490355e-07, "loss": 0.4903, "step": 11129 }, { "epoch": 0.1934676424064385, "grad_norm": 2.626195232471084, "learning_rate": 9.315610698524697e-07, "loss": 0.3659, "step": 11130 }, { "epoch": 0.19348502494394132, "grad_norm": 1.6284263010123252, "learning_rate": 9.315468538880533e-07, "loss": 0.2715, "step": 11131 }, { "epoch": 0.19350240748144415, "grad_norm": 2.417293997657206, "learning_rate": 9.315326365558318e-07, "loss": 0.5713, "step": 11132 }, { "epoch": 0.19351979001894695, "grad_norm": 1.903851282320605, "learning_rate": 9.315184178558499e-07, "loss": 0.4111, "step": 11133 }, { "epoch": 0.19353717255644978, "grad_norm": 2.474118326052463, "learning_rate": 9.31504197788153e-07, "loss": 0.2892, "step": 11134 }, { "epoch": 0.1935545550939526, "grad_norm": 2.038928345236668, "learning_rate": 9.31489976352786e-07, "loss": 0.4403, "step": 11135 }, { "epoch": 0.19357193763145544, "grad_norm": 2.0971642619333535, "learning_rate": 9.314757535497939e-07, "loss": 0.4585, "step": 11136 }, { "epoch": 0.19358932016895827, "grad_norm": 1.9218748895921576, "learning_rate": 9.31461529379222e-07, "loss": 0.4872, "step": 11137 }, { "epoch": 0.19360670270646108, "grad_norm": 3.9997077317361844, "learning_rate": 9.314473038411152e-07, "loss": 0.4031, "step": 11138 }, { "epoch": 0.1936240852439639, "grad_norm": 1.5832857692521511, "learning_rate": 9.314330769355186e-07, "loss": 0.3738, "step": 11139 }, { "epoch": 0.19364146778146674, "grad_norm": 3.0165082910686305, "learning_rate": 9.314188486624773e-07, "loss": 0.302, "step": 11140 }, { "epoch": 0.19365885031896957, "grad_norm": 1.1002795664139742, "learning_rate": 9.314046190220365e-07, "loss": 0.2326, "step": 11141 }, { "epoch": 0.1936762328564724, "grad_norm": 2.001828802056177, "learning_rate": 9.313903880142412e-07, "loss": 0.5219, "step": 11142 }, { "epoch": 0.1936936153939752, "grad_norm": 2.068446960801027, "learning_rate": 9.313761556391365e-07, "loss": 0.2773, "step": 11143 }, { "epoch": 0.19371099793147803, "grad_norm": 1.6252096111143188, "learning_rate": 9.313619218967677e-07, "loss": 0.2736, "step": 11144 }, { "epoch": 0.19372838046898086, "grad_norm": 1.6889758916325248, "learning_rate": 9.313476867871795e-07, "loss": 0.295, "step": 11145 }, { "epoch": 0.1937457630064837, "grad_norm": 1.7936800586986643, "learning_rate": 9.313334503104174e-07, "loss": 0.4003, "step": 11146 }, { "epoch": 0.19376314554398652, "grad_norm": 2.100043876020458, "learning_rate": 9.313192124665265e-07, "loss": 0.5088, "step": 11147 }, { "epoch": 0.19378052808148932, "grad_norm": 1.4735423203694724, "learning_rate": 9.313049732555518e-07, "loss": 0.5646, "step": 11148 }, { "epoch": 0.19379791061899215, "grad_norm": 1.4690500658452177, "learning_rate": 9.312907326775383e-07, "loss": 0.3825, "step": 11149 }, { "epoch": 0.19381529315649498, "grad_norm": 1.6541429128122842, "learning_rate": 9.312764907325315e-07, "loss": 0.4282, "step": 11150 }, { "epoch": 0.1938326756939978, "grad_norm": 1.2002712689435941, "learning_rate": 9.312622474205761e-07, "loss": 0.4254, "step": 11151 }, { "epoch": 0.19385005823150064, "grad_norm": 1.915478018434292, "learning_rate": 9.312480027417177e-07, "loss": 0.328, "step": 11152 }, { "epoch": 0.19386744076900345, "grad_norm": 1.867024658660314, "learning_rate": 9.312337566960012e-07, "loss": 0.3536, "step": 11153 }, { "epoch": 0.19388482330650628, "grad_norm": 2.280710842767096, "learning_rate": 9.312195092834716e-07, "loss": 0.6384, "step": 11154 }, { "epoch": 0.1939022058440091, "grad_norm": 4.402714688539012, "learning_rate": 9.312052605041744e-07, "loss": 0.5119, "step": 11155 }, { "epoch": 0.19391958838151194, "grad_norm": 2.2160578042408834, "learning_rate": 9.311910103581546e-07, "loss": 0.4924, "step": 11156 }, { "epoch": 0.19393697091901477, "grad_norm": 1.80051721943076, "learning_rate": 9.311767588454573e-07, "loss": 0.5433, "step": 11157 }, { "epoch": 0.19395435345651757, "grad_norm": 1.5230107836901081, "learning_rate": 9.311625059661279e-07, "loss": 0.2049, "step": 11158 }, { "epoch": 0.1939717359940204, "grad_norm": 1.4068083390895316, "learning_rate": 9.311482517202112e-07, "loss": 0.5927, "step": 11159 }, { "epoch": 0.19398911853152323, "grad_norm": 1.286338184645199, "learning_rate": 9.311339961077527e-07, "loss": 0.2311, "step": 11160 }, { "epoch": 0.19400650106902606, "grad_norm": 1.9150991575037928, "learning_rate": 9.311197391287975e-07, "loss": 0.4807, "step": 11161 }, { "epoch": 0.1940238836065289, "grad_norm": 1.378646215916171, "learning_rate": 9.311054807833907e-07, "loss": 0.2542, "step": 11162 }, { "epoch": 0.1940412661440317, "grad_norm": 1.5842460064894075, "learning_rate": 9.310912210715777e-07, "loss": 0.2933, "step": 11163 }, { "epoch": 0.19405864868153452, "grad_norm": 1.408481113671403, "learning_rate": 9.310769599934034e-07, "loss": 0.2314, "step": 11164 }, { "epoch": 0.19407603121903735, "grad_norm": 1.973570262724996, "learning_rate": 9.310626975489133e-07, "loss": 0.2478, "step": 11165 }, { "epoch": 0.19409341375654018, "grad_norm": 2.45889754733343, "learning_rate": 9.310484337381524e-07, "loss": 0.4965, "step": 11166 }, { "epoch": 0.19411079629404301, "grad_norm": 1.4904783459410678, "learning_rate": 9.310341685611659e-07, "loss": 0.2773, "step": 11167 }, { "epoch": 0.19412817883154582, "grad_norm": 2.138897654419361, "learning_rate": 9.310199020179992e-07, "loss": 0.7539, "step": 11168 }, { "epoch": 0.19414556136904865, "grad_norm": 1.6082715287060332, "learning_rate": 9.310056341086973e-07, "loss": 0.2606, "step": 11169 }, { "epoch": 0.19416294390655148, "grad_norm": 1.2521671518259032, "learning_rate": 9.309913648333056e-07, "loss": 0.3332, "step": 11170 }, { "epoch": 0.1941803264440543, "grad_norm": 1.508412456580939, "learning_rate": 9.309770941918691e-07, "loss": 0.3613, "step": 11171 }, { "epoch": 0.19419770898155714, "grad_norm": 2.50771346324076, "learning_rate": 9.309628221844335e-07, "loss": 0.1994, "step": 11172 }, { "epoch": 0.19421509151905994, "grad_norm": 1.731974543955441, "learning_rate": 9.309485488110433e-07, "loss": 0.4579, "step": 11173 }, { "epoch": 0.19423247405656277, "grad_norm": 1.4750486790312154, "learning_rate": 9.309342740717445e-07, "loss": 0.2413, "step": 11174 }, { "epoch": 0.1942498565940656, "grad_norm": 4.53761889725313, "learning_rate": 9.30919997966582e-07, "loss": 0.3704, "step": 11175 }, { "epoch": 0.19426723913156843, "grad_norm": 1.5616157180034231, "learning_rate": 9.309057204956009e-07, "loss": 0.3677, "step": 11176 }, { "epoch": 0.19428462166907126, "grad_norm": 1.5183972399261985, "learning_rate": 9.308914416588467e-07, "loss": 0.2758, "step": 11177 }, { "epoch": 0.19430200420657406, "grad_norm": 1.7106661385114073, "learning_rate": 9.308771614563645e-07, "loss": 0.2166, "step": 11178 }, { "epoch": 0.1943193867440769, "grad_norm": 3.309292068241464, "learning_rate": 9.308628798881997e-07, "loss": 0.323, "step": 11179 }, { "epoch": 0.19433676928157972, "grad_norm": 1.6728275264552925, "learning_rate": 9.308485969543975e-07, "loss": 0.3244, "step": 11180 }, { "epoch": 0.19435415181908255, "grad_norm": 2.305459237454038, "learning_rate": 9.30834312655003e-07, "loss": 0.5006, "step": 11181 }, { "epoch": 0.19437153435658538, "grad_norm": 3.5099817215273212, "learning_rate": 9.308200269900618e-07, "loss": 0.4055, "step": 11182 }, { "epoch": 0.1943889168940882, "grad_norm": 1.4117005799108533, "learning_rate": 9.30805739959619e-07, "loss": 0.3026, "step": 11183 }, { "epoch": 0.19440629943159102, "grad_norm": 1.8893358206755297, "learning_rate": 9.307914515637199e-07, "loss": 0.4858, "step": 11184 }, { "epoch": 0.19442368196909385, "grad_norm": 1.261535560147396, "learning_rate": 9.307771618024097e-07, "loss": 0.4206, "step": 11185 }, { "epoch": 0.19444106450659668, "grad_norm": 1.69464283484764, "learning_rate": 9.307628706757338e-07, "loss": 0.3456, "step": 11186 }, { "epoch": 0.1944584470440995, "grad_norm": 1.7706601177370822, "learning_rate": 9.307485781837375e-07, "loss": 0.3238, "step": 11187 }, { "epoch": 0.1944758295816023, "grad_norm": 2.4711773136658204, "learning_rate": 9.307342843264661e-07, "loss": 0.3772, "step": 11188 }, { "epoch": 0.19449321211910514, "grad_norm": 1.3410556875800999, "learning_rate": 9.30719989103965e-07, "loss": 0.3318, "step": 11189 }, { "epoch": 0.19451059465660797, "grad_norm": 2.4025453397291723, "learning_rate": 9.307056925162792e-07, "loss": 0.535, "step": 11190 }, { "epoch": 0.1945279771941108, "grad_norm": 1.7432089747809913, "learning_rate": 9.306913945634542e-07, "loss": 0.4816, "step": 11191 }, { "epoch": 0.19454535973161363, "grad_norm": 1.8178806162181582, "learning_rate": 9.306770952455354e-07, "loss": 0.5772, "step": 11192 }, { "epoch": 0.19456274226911643, "grad_norm": 1.8497324345865243, "learning_rate": 9.30662794562568e-07, "loss": 0.3079, "step": 11193 }, { "epoch": 0.19458012480661926, "grad_norm": 2.1710552080289744, "learning_rate": 9.306484925145975e-07, "loss": 0.3779, "step": 11194 }, { "epoch": 0.1945975073441221, "grad_norm": 1.2913913021049812, "learning_rate": 9.306341891016689e-07, "loss": 0.4157, "step": 11195 }, { "epoch": 0.19461488988162493, "grad_norm": 2.886730759323179, "learning_rate": 9.30619884323828e-07, "loss": 0.5035, "step": 11196 }, { "epoch": 0.19463227241912776, "grad_norm": 2.4370077078725023, "learning_rate": 9.306055781811197e-07, "loss": 0.4019, "step": 11197 }, { "epoch": 0.19464965495663056, "grad_norm": 1.776998024486565, "learning_rate": 9.305912706735894e-07, "loss": 0.3678, "step": 11198 }, { "epoch": 0.1946670374941334, "grad_norm": 1.7158617247809271, "learning_rate": 9.305769618012829e-07, "loss": 0.4053, "step": 11199 }, { "epoch": 0.19468442003163622, "grad_norm": 1.63324420273216, "learning_rate": 9.305626515642449e-07, "loss": 0.2593, "step": 11200 }, { "epoch": 0.19470180256913905, "grad_norm": 1.7461500913788202, "learning_rate": 9.305483399625213e-07, "loss": 0.3694, "step": 11201 }, { "epoch": 0.19471918510664188, "grad_norm": 1.5264856195038774, "learning_rate": 9.305340269961571e-07, "loss": 0.2914, "step": 11202 }, { "epoch": 0.19473656764414468, "grad_norm": 1.9384965169173982, "learning_rate": 9.305197126651978e-07, "loss": 0.5072, "step": 11203 }, { "epoch": 0.1947539501816475, "grad_norm": 1.979679676644248, "learning_rate": 9.305053969696889e-07, "loss": 0.3475, "step": 11204 }, { "epoch": 0.19477133271915034, "grad_norm": 1.778830550740145, "learning_rate": 9.304910799096757e-07, "loss": 0.2869, "step": 11205 }, { "epoch": 0.19478871525665317, "grad_norm": 2.7102209384265277, "learning_rate": 9.304767614852033e-07, "loss": 0.4173, "step": 11206 }, { "epoch": 0.194806097794156, "grad_norm": 2.3711643108511136, "learning_rate": 9.304624416963175e-07, "loss": 0.3415, "step": 11207 }, { "epoch": 0.1948234803316588, "grad_norm": 2.8159928665216905, "learning_rate": 9.304481205430634e-07, "loss": 0.501, "step": 11208 }, { "epoch": 0.19484086286916164, "grad_norm": 3.0644379336297676, "learning_rate": 9.304337980254865e-07, "loss": 0.4727, "step": 11209 }, { "epoch": 0.19485824540666447, "grad_norm": 2.1757612458748445, "learning_rate": 9.304194741436322e-07, "loss": 0.432, "step": 11210 }, { "epoch": 0.1948756279441673, "grad_norm": 1.8791825442291803, "learning_rate": 9.304051488975459e-07, "loss": 0.3901, "step": 11211 }, { "epoch": 0.19489301048167013, "grad_norm": 2.591424821890044, "learning_rate": 9.30390822287273e-07, "loss": 0.5936, "step": 11212 }, { "epoch": 0.19491039301917293, "grad_norm": 1.6193689756110903, "learning_rate": 9.303764943128589e-07, "loss": 0.3051, "step": 11213 }, { "epoch": 0.19492777555667576, "grad_norm": 2.486860938718517, "learning_rate": 9.30362164974349e-07, "loss": 0.3285, "step": 11214 }, { "epoch": 0.1949451580941786, "grad_norm": 1.479604516232069, "learning_rate": 9.303478342717886e-07, "loss": 0.2957, "step": 11215 }, { "epoch": 0.19496254063168142, "grad_norm": 3.0640227989935016, "learning_rate": 9.303335022052235e-07, "loss": 0.5416, "step": 11216 }, { "epoch": 0.19497992316918425, "grad_norm": 1.6199210735415812, "learning_rate": 9.303191687746988e-07, "loss": 0.3772, "step": 11217 }, { "epoch": 0.19499730570668705, "grad_norm": 3.3046779964328605, "learning_rate": 9.303048339802599e-07, "loss": 0.3132, "step": 11218 }, { "epoch": 0.19501468824418988, "grad_norm": 1.7825528801954438, "learning_rate": 9.302904978219524e-07, "loss": 0.7027, "step": 11219 }, { "epoch": 0.1950320707816927, "grad_norm": 2.5541697362799654, "learning_rate": 9.302761602998216e-07, "loss": 0.2493, "step": 11220 }, { "epoch": 0.19504945331919554, "grad_norm": 3.525192306010438, "learning_rate": 9.30261821413913e-07, "loss": 0.4134, "step": 11221 }, { "epoch": 0.19506683585669837, "grad_norm": 1.059046460126883, "learning_rate": 9.302474811642722e-07, "loss": 0.4108, "step": 11222 }, { "epoch": 0.19508421839420118, "grad_norm": 1.8839311637762055, "learning_rate": 9.302331395509446e-07, "loss": 0.2236, "step": 11223 }, { "epoch": 0.195101600931704, "grad_norm": 2.9358044035862885, "learning_rate": 9.302187965739754e-07, "loss": 0.4001, "step": 11224 }, { "epoch": 0.19511898346920684, "grad_norm": 2.558853736712658, "learning_rate": 9.302044522334103e-07, "loss": 0.3379, "step": 11225 }, { "epoch": 0.19513636600670967, "grad_norm": 2.0441420380341815, "learning_rate": 9.301901065292947e-07, "loss": 0.4492, "step": 11226 }, { "epoch": 0.1951537485442125, "grad_norm": 1.429268973208484, "learning_rate": 9.301757594616742e-07, "loss": 0.2323, "step": 11227 }, { "epoch": 0.1951711310817153, "grad_norm": 1.8906858131265765, "learning_rate": 9.30161411030594e-07, "loss": 0.3738, "step": 11228 }, { "epoch": 0.19518851361921813, "grad_norm": 2.276481948477419, "learning_rate": 9.301470612360998e-07, "loss": 0.5197, "step": 11229 }, { "epoch": 0.19520589615672096, "grad_norm": 1.9968247194101116, "learning_rate": 9.30132710078237e-07, "loss": 0.2566, "step": 11230 }, { "epoch": 0.1952232786942238, "grad_norm": 1.7852028730798144, "learning_rate": 9.30118357557051e-07, "loss": 0.3324, "step": 11231 }, { "epoch": 0.19524066123172662, "grad_norm": 3.91817410137274, "learning_rate": 9.301040036725876e-07, "loss": 0.5034, "step": 11232 }, { "epoch": 0.19525804376922942, "grad_norm": 1.776818364937805, "learning_rate": 9.300896484248919e-07, "loss": 0.31, "step": 11233 }, { "epoch": 0.19527542630673225, "grad_norm": 3.965395197420796, "learning_rate": 9.300752918140096e-07, "loss": 0.483, "step": 11234 }, { "epoch": 0.19529280884423508, "grad_norm": 1.2023180458154743, "learning_rate": 9.300609338399862e-07, "loss": 0.505, "step": 11235 }, { "epoch": 0.1953101913817379, "grad_norm": 1.8067063952980853, "learning_rate": 9.300465745028672e-07, "loss": 0.3428, "step": 11236 }, { "epoch": 0.19532757391924074, "grad_norm": 3.1151717699337502, "learning_rate": 9.300322138026981e-07, "loss": 0.6149, "step": 11237 }, { "epoch": 0.19534495645674355, "grad_norm": 1.5021110126036363, "learning_rate": 9.300178517395245e-07, "loss": 0.2699, "step": 11238 }, { "epoch": 0.19536233899424638, "grad_norm": 1.9806349730987678, "learning_rate": 9.300034883133919e-07, "loss": 0.3836, "step": 11239 }, { "epoch": 0.1953797215317492, "grad_norm": 1.3576997229546106, "learning_rate": 9.299891235243455e-07, "loss": 0.2645, "step": 11240 }, { "epoch": 0.19539710406925204, "grad_norm": 2.0027838858915272, "learning_rate": 9.299747573724313e-07, "loss": 0.4059, "step": 11241 }, { "epoch": 0.19541448660675487, "grad_norm": 1.2143761008939191, "learning_rate": 9.299603898576945e-07, "loss": 0.2729, "step": 11242 }, { "epoch": 0.19543186914425767, "grad_norm": 1.7672024059982934, "learning_rate": 9.299460209801809e-07, "loss": 0.5826, "step": 11243 }, { "epoch": 0.1954492516817605, "grad_norm": 2.1452119399190206, "learning_rate": 9.299316507399358e-07, "loss": 0.3241, "step": 11244 }, { "epoch": 0.19546663421926333, "grad_norm": 1.5421759638931063, "learning_rate": 9.29917279137005e-07, "loss": 0.3061, "step": 11245 }, { "epoch": 0.19548401675676616, "grad_norm": 1.7192389109213104, "learning_rate": 9.299029061714339e-07, "loss": 0.3405, "step": 11246 }, { "epoch": 0.195501399294269, "grad_norm": 2.4675550392668573, "learning_rate": 9.298885318432679e-07, "loss": 0.3912, "step": 11247 }, { "epoch": 0.1955187818317718, "grad_norm": 1.3002249185588046, "learning_rate": 9.298741561525529e-07, "loss": 0.264, "step": 11248 }, { "epoch": 0.19553616436927462, "grad_norm": 2.169891990272126, "learning_rate": 9.298597790993343e-07, "loss": 0.3526, "step": 11249 }, { "epoch": 0.19555354690677745, "grad_norm": 1.2282175472322716, "learning_rate": 9.298454006836576e-07, "loss": 0.3916, "step": 11250 }, { "epoch": 0.19557092944428028, "grad_norm": 2.3087587038402715, "learning_rate": 9.298310209055684e-07, "loss": 0.3453, "step": 11251 }, { "epoch": 0.19558831198178311, "grad_norm": 1.3012303997929617, "learning_rate": 9.298166397651123e-07, "loss": 0.4248, "step": 11252 }, { "epoch": 0.19560569451928592, "grad_norm": 1.9098188446058846, "learning_rate": 9.29802257262335e-07, "loss": 0.2527, "step": 11253 }, { "epoch": 0.19562307705678875, "grad_norm": 3.0403892644008117, "learning_rate": 9.297878733972819e-07, "loss": 0.5943, "step": 11254 }, { "epoch": 0.19564045959429158, "grad_norm": 4.9762302750388585, "learning_rate": 9.297734881699987e-07, "loss": 0.2347, "step": 11255 }, { "epoch": 0.1956578421317944, "grad_norm": 2.1253373792441788, "learning_rate": 9.297591015805308e-07, "loss": 0.3477, "step": 11256 }, { "epoch": 0.19567522466929724, "grad_norm": 1.468314175432593, "learning_rate": 9.297447136289242e-07, "loss": 0.3781, "step": 11257 }, { "epoch": 0.19569260720680004, "grad_norm": 3.6771207056177855, "learning_rate": 9.297303243152241e-07, "loss": 0.3946, "step": 11258 }, { "epoch": 0.19570998974430287, "grad_norm": 2.930087607695263, "learning_rate": 9.297159336394764e-07, "loss": 0.5053, "step": 11259 }, { "epoch": 0.1957273722818057, "grad_norm": 1.727765303766936, "learning_rate": 9.297015416017266e-07, "loss": 0.3427, "step": 11260 }, { "epoch": 0.19574475481930853, "grad_norm": 1.9091866988323467, "learning_rate": 9.296871482020203e-07, "loss": 0.2869, "step": 11261 }, { "epoch": 0.19576213735681136, "grad_norm": 1.2228861296535765, "learning_rate": 9.296727534404029e-07, "loss": 0.3498, "step": 11262 }, { "epoch": 0.19577951989431416, "grad_norm": 3.020196793531525, "learning_rate": 9.296583573169206e-07, "loss": 0.4122, "step": 11263 }, { "epoch": 0.195796902431817, "grad_norm": 1.8600985657806417, "learning_rate": 9.296439598316184e-07, "loss": 0.3637, "step": 11264 }, { "epoch": 0.19581428496931982, "grad_norm": 1.793018816251824, "learning_rate": 9.296295609845423e-07, "loss": 0.2813, "step": 11265 }, { "epoch": 0.19583166750682265, "grad_norm": 1.8750981890849139, "learning_rate": 9.296151607757379e-07, "loss": 0.3624, "step": 11266 }, { "epoch": 0.19584905004432548, "grad_norm": 1.7173546783684328, "learning_rate": 9.296007592052507e-07, "loss": 0.2511, "step": 11267 }, { "epoch": 0.1958664325818283, "grad_norm": 1.5470704821708132, "learning_rate": 9.295863562731264e-07, "loss": 0.3082, "step": 11268 }, { "epoch": 0.19588381511933112, "grad_norm": 2.179632859610636, "learning_rate": 9.295719519794107e-07, "loss": 0.3255, "step": 11269 }, { "epoch": 0.19590119765683395, "grad_norm": 1.6882053750182713, "learning_rate": 9.295575463241494e-07, "loss": 0.3927, "step": 11270 }, { "epoch": 0.19591858019433678, "grad_norm": 2.245846678205186, "learning_rate": 9.295431393073878e-07, "loss": 0.2909, "step": 11271 }, { "epoch": 0.19593596273183958, "grad_norm": 1.6051667924448445, "learning_rate": 9.295287309291719e-07, "loss": 0.2597, "step": 11272 }, { "epoch": 0.1959533452693424, "grad_norm": 1.070064293795752, "learning_rate": 9.295143211895474e-07, "loss": 0.4638, "step": 11273 }, { "epoch": 0.19597072780684524, "grad_norm": 1.4918030022400628, "learning_rate": 9.294999100885595e-07, "loss": 0.2919, "step": 11274 }, { "epoch": 0.19598811034434807, "grad_norm": 1.7518434798718507, "learning_rate": 9.294854976262543e-07, "loss": 0.3353, "step": 11275 }, { "epoch": 0.1960054928818509, "grad_norm": 2.062233424279188, "learning_rate": 9.294710838026773e-07, "loss": 0.207, "step": 11276 }, { "epoch": 0.1960228754193537, "grad_norm": 1.4013020927204634, "learning_rate": 9.294566686178742e-07, "loss": 0.186, "step": 11277 }, { "epoch": 0.19604025795685653, "grad_norm": 1.6619986391210442, "learning_rate": 9.29442252071891e-07, "loss": 0.4367, "step": 11278 }, { "epoch": 0.19605764049435936, "grad_norm": 1.975794399810536, "learning_rate": 9.294278341647729e-07, "loss": 0.3924, "step": 11279 }, { "epoch": 0.1960750230318622, "grad_norm": 2.0056348470666463, "learning_rate": 9.294134148965657e-07, "loss": 0.3409, "step": 11280 }, { "epoch": 0.19609240556936502, "grad_norm": 2.009630059307441, "learning_rate": 9.293989942673154e-07, "loss": 0.6092, "step": 11281 }, { "epoch": 0.19610978810686783, "grad_norm": 1.773788474462041, "learning_rate": 9.293845722770676e-07, "loss": 0.2498, "step": 11282 }, { "epoch": 0.19612717064437066, "grad_norm": 1.5804625447081413, "learning_rate": 9.293701489258681e-07, "loss": 0.3161, "step": 11283 }, { "epoch": 0.1961445531818735, "grad_norm": 1.1655488729879766, "learning_rate": 9.293557242137622e-07, "loss": 0.3496, "step": 11284 }, { "epoch": 0.19616193571937632, "grad_norm": 1.5838708580226628, "learning_rate": 9.29341298140796e-07, "loss": 0.1264, "step": 11285 }, { "epoch": 0.19617931825687915, "grad_norm": 1.9063116229689105, "learning_rate": 9.29326870707015e-07, "loss": 0.2341, "step": 11286 }, { "epoch": 0.19619670079438195, "grad_norm": 1.7415738891124333, "learning_rate": 9.293124419124651e-07, "loss": 0.3207, "step": 11287 }, { "epoch": 0.19621408333188478, "grad_norm": 2.168237888430282, "learning_rate": 9.292980117571919e-07, "loss": 0.2445, "step": 11288 }, { "epoch": 0.1962314658693876, "grad_norm": 2.2959339774160847, "learning_rate": 9.292835802412413e-07, "loss": 0.3032, "step": 11289 }, { "epoch": 0.19624884840689044, "grad_norm": 1.5477542730443004, "learning_rate": 9.29269147364659e-07, "loss": 0.2808, "step": 11290 }, { "epoch": 0.19626623094439327, "grad_norm": 1.95850751945952, "learning_rate": 9.292547131274906e-07, "loss": 0.4434, "step": 11291 }, { "epoch": 0.19628361348189607, "grad_norm": 2.4901166911876667, "learning_rate": 9.292402775297819e-07, "loss": 0.2482, "step": 11292 }, { "epoch": 0.1963009960193989, "grad_norm": 2.244282501588051, "learning_rate": 9.292258405715787e-07, "loss": 0.3294, "step": 11293 }, { "epoch": 0.19631837855690173, "grad_norm": 3.6009344612013408, "learning_rate": 9.292114022529269e-07, "loss": 0.3582, "step": 11294 }, { "epoch": 0.19633576109440457, "grad_norm": 2.207834459594325, "learning_rate": 9.29196962573872e-07, "loss": 0.2937, "step": 11295 }, { "epoch": 0.1963531436319074, "grad_norm": 1.8114394836022212, "learning_rate": 9.2918252153446e-07, "loss": 0.3507, "step": 11296 }, { "epoch": 0.1963705261694102, "grad_norm": 1.8421730084822474, "learning_rate": 9.291680791347363e-07, "loss": 0.4889, "step": 11297 }, { "epoch": 0.19638790870691303, "grad_norm": 3.316586178429388, "learning_rate": 9.291536353747473e-07, "loss": 0.2326, "step": 11298 }, { "epoch": 0.19640529124441586, "grad_norm": 2.6498455948775717, "learning_rate": 9.291391902545382e-07, "loss": 0.3138, "step": 11299 }, { "epoch": 0.1964226737819187, "grad_norm": 2.3317429241767544, "learning_rate": 9.29124743774155e-07, "loss": 0.3714, "step": 11300 }, { "epoch": 0.19644005631942152, "grad_norm": 2.6988612535105747, "learning_rate": 9.291102959336434e-07, "loss": 0.2832, "step": 11301 }, { "epoch": 0.19645743885692432, "grad_norm": 3.166955713494126, "learning_rate": 9.290958467330494e-07, "loss": 0.4334, "step": 11302 }, { "epoch": 0.19647482139442715, "grad_norm": 2.4069409691854715, "learning_rate": 9.290813961724186e-07, "loss": 0.2319, "step": 11303 }, { "epoch": 0.19649220393192998, "grad_norm": 1.3590835331976039, "learning_rate": 9.29066944251797e-07, "loss": 0.2907, "step": 11304 }, { "epoch": 0.1965095864694328, "grad_norm": 1.328702214149736, "learning_rate": 9.290524909712303e-07, "loss": 0.522, "step": 11305 }, { "epoch": 0.19652696900693564, "grad_norm": 1.9638642748433182, "learning_rate": 9.290380363307642e-07, "loss": 0.5134, "step": 11306 }, { "epoch": 0.19654435154443844, "grad_norm": 2.2531571219398097, "learning_rate": 9.290235803304445e-07, "loss": 0.398, "step": 11307 }, { "epoch": 0.19656173408194128, "grad_norm": 2.4108659480928916, "learning_rate": 9.290091229703173e-07, "loss": 0.6728, "step": 11308 }, { "epoch": 0.1965791166194441, "grad_norm": 2.236794975951296, "learning_rate": 9.289946642504283e-07, "loss": 0.3198, "step": 11309 }, { "epoch": 0.19659649915694694, "grad_norm": 1.601112906480098, "learning_rate": 9.28980204170823e-07, "loss": 0.4096, "step": 11310 }, { "epoch": 0.19661388169444977, "grad_norm": 2.2972715670825825, "learning_rate": 9.289657427315478e-07, "loss": 0.2416, "step": 11311 }, { "epoch": 0.19663126423195257, "grad_norm": 2.293537648705167, "learning_rate": 9.28951279932648e-07, "loss": 0.5041, "step": 11312 }, { "epoch": 0.1966486467694554, "grad_norm": 1.1991434734325443, "learning_rate": 9.2893681577417e-07, "loss": 0.2093, "step": 11313 }, { "epoch": 0.19666602930695823, "grad_norm": 2.1795452898090035, "learning_rate": 9.28922350256159e-07, "loss": 0.3218, "step": 11314 }, { "epoch": 0.19668341184446106, "grad_norm": 4.222262644766866, "learning_rate": 9.289078833786614e-07, "loss": 0.4628, "step": 11315 }, { "epoch": 0.1967007943819639, "grad_norm": 2.234722512209114, "learning_rate": 9.288934151417227e-07, "loss": 0.2925, "step": 11316 }, { "epoch": 0.1967181769194667, "grad_norm": 2.1694178579979684, "learning_rate": 9.288789455453889e-07, "loss": 0.5301, "step": 11317 }, { "epoch": 0.19673555945696952, "grad_norm": 1.5631744478569105, "learning_rate": 9.288644745897059e-07, "loss": 0.4058, "step": 11318 }, { "epoch": 0.19675294199447235, "grad_norm": 1.696723881541683, "learning_rate": 9.288500022747197e-07, "loss": 0.2657, "step": 11319 }, { "epoch": 0.19677032453197518, "grad_norm": 1.7370388025911998, "learning_rate": 9.288355286004757e-07, "loss": 0.413, "step": 11320 }, { "epoch": 0.196787707069478, "grad_norm": 1.4409081857584611, "learning_rate": 9.288210535670202e-07, "loss": 0.2594, "step": 11321 }, { "epoch": 0.19680508960698082, "grad_norm": 1.696748022138104, "learning_rate": 9.288065771743989e-07, "loss": 0.3579, "step": 11322 }, { "epoch": 0.19682247214448365, "grad_norm": 1.6132242783789055, "learning_rate": 9.287920994226578e-07, "loss": 0.3711, "step": 11323 }, { "epoch": 0.19683985468198648, "grad_norm": 2.6253465463546335, "learning_rate": 9.287776203118427e-07, "loss": 0.4828, "step": 11324 }, { "epoch": 0.1968572372194893, "grad_norm": 1.50061746216583, "learning_rate": 9.287631398419995e-07, "loss": 0.2921, "step": 11325 }, { "epoch": 0.19687461975699214, "grad_norm": 1.7558566695828068, "learning_rate": 9.28748658013174e-07, "loss": 0.2775, "step": 11326 }, { "epoch": 0.19689200229449494, "grad_norm": 2.2723304587705067, "learning_rate": 9.287341748254123e-07, "loss": 0.3307, "step": 11327 }, { "epoch": 0.19690938483199777, "grad_norm": 2.4138730036006733, "learning_rate": 9.287196902787603e-07, "loss": 0.3583, "step": 11328 }, { "epoch": 0.1969267673695006, "grad_norm": 1.3048079241007593, "learning_rate": 9.287052043732637e-07, "loss": 0.407, "step": 11329 }, { "epoch": 0.19694414990700343, "grad_norm": 2.299096279117052, "learning_rate": 9.286907171089686e-07, "loss": 0.5638, "step": 11330 }, { "epoch": 0.19696153244450626, "grad_norm": 2.0430784182177866, "learning_rate": 9.286762284859207e-07, "loss": 0.6172, "step": 11331 }, { "epoch": 0.19697891498200906, "grad_norm": 1.537584031483973, "learning_rate": 9.286617385041662e-07, "loss": 0.3657, "step": 11332 }, { "epoch": 0.1969962975195119, "grad_norm": 1.3800364607282756, "learning_rate": 9.28647247163751e-07, "loss": 0.3522, "step": 11333 }, { "epoch": 0.19701368005701472, "grad_norm": 1.4165157989831305, "learning_rate": 9.286327544647208e-07, "loss": 0.4641, "step": 11334 }, { "epoch": 0.19703106259451755, "grad_norm": 2.035096973188818, "learning_rate": 9.286182604071216e-07, "loss": 0.2496, "step": 11335 }, { "epoch": 0.19704844513202038, "grad_norm": 2.2200675401171406, "learning_rate": 9.286037649909994e-07, "loss": 0.4396, "step": 11336 }, { "epoch": 0.19706582766952319, "grad_norm": 1.4688866733277137, "learning_rate": 9.285892682164003e-07, "loss": 0.4047, "step": 11337 }, { "epoch": 0.19708321020702602, "grad_norm": 1.3001261798951402, "learning_rate": 9.2857477008337e-07, "loss": 0.4644, "step": 11338 }, { "epoch": 0.19710059274452885, "grad_norm": 2.2287935390287084, "learning_rate": 9.285602705919547e-07, "loss": 0.4554, "step": 11339 }, { "epoch": 0.19711797528203168, "grad_norm": 1.5305405816194468, "learning_rate": 9.285457697422e-07, "loss": 0.2677, "step": 11340 }, { "epoch": 0.1971353578195345, "grad_norm": 2.6006892261924115, "learning_rate": 9.285312675341521e-07, "loss": 0.3062, "step": 11341 }, { "epoch": 0.1971527403570373, "grad_norm": 1.7648057338738974, "learning_rate": 9.28516763967857e-07, "loss": 0.3503, "step": 11342 }, { "epoch": 0.19717012289454014, "grad_norm": 1.5993026944624964, "learning_rate": 9.285022590433607e-07, "loss": 0.4451, "step": 11343 }, { "epoch": 0.19718750543204297, "grad_norm": 1.5224280827618588, "learning_rate": 9.284877527607089e-07, "loss": 0.3663, "step": 11344 }, { "epoch": 0.1972048879695458, "grad_norm": 2.181704199245366, "learning_rate": 9.284732451199477e-07, "loss": 0.5254, "step": 11345 }, { "epoch": 0.19722227050704863, "grad_norm": 3.4127862126461133, "learning_rate": 9.284587361211234e-07, "loss": 0.3764, "step": 11346 }, { "epoch": 0.19723965304455143, "grad_norm": 2.998821637731378, "learning_rate": 9.284442257642815e-07, "loss": 0.373, "step": 11347 }, { "epoch": 0.19725703558205426, "grad_norm": 2.0738246851447455, "learning_rate": 9.284297140494684e-07, "loss": 0.2531, "step": 11348 }, { "epoch": 0.1972744181195571, "grad_norm": 2.467120153100281, "learning_rate": 9.284152009767297e-07, "loss": 0.3246, "step": 11349 }, { "epoch": 0.19729180065705992, "grad_norm": 1.4636764711707777, "learning_rate": 9.284006865461118e-07, "loss": 0.2262, "step": 11350 }, { "epoch": 0.19730918319456275, "grad_norm": 7.986038025973449, "learning_rate": 9.283861707576604e-07, "loss": 0.2962, "step": 11351 }, { "epoch": 0.19732656573206556, "grad_norm": 2.1823215472929993, "learning_rate": 9.283716536114217e-07, "loss": 0.4547, "step": 11352 }, { "epoch": 0.1973439482695684, "grad_norm": 2.4331097748326167, "learning_rate": 9.283571351074415e-07, "loss": 0.3842, "step": 11353 }, { "epoch": 0.19736133080707122, "grad_norm": 2.2575650184066265, "learning_rate": 9.28342615245766e-07, "loss": 0.3428, "step": 11354 }, { "epoch": 0.19737871334457405, "grad_norm": 2.4970128911562552, "learning_rate": 9.283280940264413e-07, "loss": 0.3055, "step": 11355 }, { "epoch": 0.19739609588207688, "grad_norm": 1.2963618807546322, "learning_rate": 9.283135714495131e-07, "loss": 0.2581, "step": 11356 }, { "epoch": 0.19741347841957968, "grad_norm": 6.19259491777434, "learning_rate": 9.282990475150279e-07, "loss": 0.4644, "step": 11357 }, { "epoch": 0.1974308609570825, "grad_norm": 1.6318831009394705, "learning_rate": 9.282845222230313e-07, "loss": 0.2822, "step": 11358 }, { "epoch": 0.19744824349458534, "grad_norm": 1.6203351156611827, "learning_rate": 9.282699955735696e-07, "loss": 0.269, "step": 11359 }, { "epoch": 0.19746562603208817, "grad_norm": 1.7662632877900977, "learning_rate": 9.282554675666886e-07, "loss": 0.6837, "step": 11360 }, { "epoch": 0.197483008569591, "grad_norm": 2.3472427941184395, "learning_rate": 9.282409382024345e-07, "loss": 0.3252, "step": 11361 }, { "epoch": 0.1975003911070938, "grad_norm": 1.5264505752075566, "learning_rate": 9.282264074808534e-07, "loss": 0.2596, "step": 11362 }, { "epoch": 0.19751777364459663, "grad_norm": 1.181449529160287, "learning_rate": 9.282118754019913e-07, "loss": 0.4383, "step": 11363 }, { "epoch": 0.19753515618209946, "grad_norm": 2.137450109889938, "learning_rate": 9.281973419658943e-07, "loss": 0.266, "step": 11364 }, { "epoch": 0.1975525387196023, "grad_norm": 1.6037417656768087, "learning_rate": 9.281828071726083e-07, "loss": 0.2334, "step": 11365 }, { "epoch": 0.19756992125710512, "grad_norm": 2.074576425950058, "learning_rate": 9.281682710221796e-07, "loss": 0.5152, "step": 11366 }, { "epoch": 0.19758730379460793, "grad_norm": 1.6826164251201579, "learning_rate": 9.281537335146541e-07, "loss": 0.2474, "step": 11367 }, { "epoch": 0.19760468633211076, "grad_norm": 1.9020938537412633, "learning_rate": 9.28139194650078e-07, "loss": 0.2603, "step": 11368 }, { "epoch": 0.1976220688696136, "grad_norm": 1.1168788994813905, "learning_rate": 9.281246544284973e-07, "loss": 0.4546, "step": 11369 }, { "epoch": 0.19763945140711642, "grad_norm": 1.8651321695871768, "learning_rate": 9.281101128499582e-07, "loss": 0.4095, "step": 11370 }, { "epoch": 0.19765683394461925, "grad_norm": 1.8952531545632045, "learning_rate": 9.280955699145065e-07, "loss": 0.3925, "step": 11371 }, { "epoch": 0.19767421648212205, "grad_norm": 2.5326724504050224, "learning_rate": 9.280810256221887e-07, "loss": 0.272, "step": 11372 }, { "epoch": 0.19769159901962488, "grad_norm": 1.5220556269897456, "learning_rate": 9.280664799730506e-07, "loss": 0.1911, "step": 11373 }, { "epoch": 0.1977089815571277, "grad_norm": 5.966942657788954, "learning_rate": 9.280519329671384e-07, "loss": 0.4059, "step": 11374 }, { "epoch": 0.19772636409463054, "grad_norm": 3.278130663632166, "learning_rate": 9.280373846044982e-07, "loss": 0.3108, "step": 11375 }, { "epoch": 0.19774374663213337, "grad_norm": 1.4569049850933669, "learning_rate": 9.280228348851761e-07, "loss": 0.2937, "step": 11376 }, { "epoch": 0.19776112916963617, "grad_norm": 2.163616588388743, "learning_rate": 9.280082838092182e-07, "loss": 0.4684, "step": 11377 }, { "epoch": 0.197778511707139, "grad_norm": 1.7828335948053573, "learning_rate": 9.279937313766708e-07, "loss": 0.4177, "step": 11378 }, { "epoch": 0.19779589424464183, "grad_norm": 1.773038758933421, "learning_rate": 9.279791775875797e-07, "loss": 0.3135, "step": 11379 }, { "epoch": 0.19781327678214466, "grad_norm": 1.3128654810352496, "learning_rate": 9.279646224419913e-07, "loss": 0.2693, "step": 11380 }, { "epoch": 0.1978306593196475, "grad_norm": 2.087297633482606, "learning_rate": 9.279500659399516e-07, "loss": 0.271, "step": 11381 }, { "epoch": 0.1978480418571503, "grad_norm": 2.34675777213758, "learning_rate": 9.279355080815068e-07, "loss": 0.4352, "step": 11382 }, { "epoch": 0.19786542439465313, "grad_norm": 1.647996672714324, "learning_rate": 9.279209488667031e-07, "loss": 0.3799, "step": 11383 }, { "epoch": 0.19788280693215596, "grad_norm": 1.4938861214984351, "learning_rate": 9.279063882955865e-07, "loss": 0.244, "step": 11384 }, { "epoch": 0.1979001894696588, "grad_norm": 2.4693157657158586, "learning_rate": 9.27891826368203e-07, "loss": 0.4088, "step": 11385 }, { "epoch": 0.19791757200716162, "grad_norm": 1.4572696902633744, "learning_rate": 9.278772630845993e-07, "loss": 0.2418, "step": 11386 }, { "epoch": 0.19793495454466442, "grad_norm": 2.0130203639489093, "learning_rate": 9.27862698444821e-07, "loss": 0.5099, "step": 11387 }, { "epoch": 0.19795233708216725, "grad_norm": 2.976289327678911, "learning_rate": 9.278481324489146e-07, "loss": 0.5648, "step": 11388 }, { "epoch": 0.19796971961967008, "grad_norm": 1.4476271381163566, "learning_rate": 9.27833565096926e-07, "loss": 0.4123, "step": 11389 }, { "epoch": 0.1979871021571729, "grad_norm": 2.1346901010705466, "learning_rate": 9.278189963889019e-07, "loss": 0.3754, "step": 11390 }, { "epoch": 0.19800448469467574, "grad_norm": 1.7989956501961857, "learning_rate": 9.278044263248876e-07, "loss": 0.3633, "step": 11391 }, { "epoch": 0.19802186723217854, "grad_norm": 1.0680149788607525, "learning_rate": 9.277898549049302e-07, "loss": 0.2935, "step": 11392 }, { "epoch": 0.19803924976968137, "grad_norm": 1.4105488278598077, "learning_rate": 9.277752821290753e-07, "loss": 0.1746, "step": 11393 }, { "epoch": 0.1980566323071842, "grad_norm": 1.40909190060044, "learning_rate": 9.277607079973692e-07, "loss": 0.2609, "step": 11394 }, { "epoch": 0.19807401484468704, "grad_norm": 1.8910802619890634, "learning_rate": 9.277461325098583e-07, "loss": 0.2891, "step": 11395 }, { "epoch": 0.19809139738218987, "grad_norm": 2.314591735813117, "learning_rate": 9.277315556665886e-07, "loss": 0.2135, "step": 11396 }, { "epoch": 0.19810877991969267, "grad_norm": 2.3584642000935396, "learning_rate": 9.277169774676063e-07, "loss": 0.366, "step": 11397 }, { "epoch": 0.1981261624571955, "grad_norm": 1.3388512791162253, "learning_rate": 9.277023979129577e-07, "loss": 0.333, "step": 11398 }, { "epoch": 0.19814354499469833, "grad_norm": 5.079787212709195, "learning_rate": 9.276878170026889e-07, "loss": 0.5622, "step": 11399 }, { "epoch": 0.19816092753220116, "grad_norm": 1.5856866778885088, "learning_rate": 9.276732347368464e-07, "loss": 0.3458, "step": 11400 }, { "epoch": 0.198178310069704, "grad_norm": 2.178548538752655, "learning_rate": 9.27658651115476e-07, "loss": 0.4792, "step": 11401 }, { "epoch": 0.1981956926072068, "grad_norm": 2.012383400661851, "learning_rate": 9.276440661386241e-07, "loss": 0.3198, "step": 11402 }, { "epoch": 0.19821307514470962, "grad_norm": 2.5494794720411176, "learning_rate": 9.276294798063371e-07, "loss": 0.2821, "step": 11403 }, { "epoch": 0.19823045768221245, "grad_norm": 2.6869819617480277, "learning_rate": 9.27614892118661e-07, "loss": 0.2799, "step": 11404 }, { "epoch": 0.19824784021971528, "grad_norm": 1.5047862566622512, "learning_rate": 9.276003030756421e-07, "loss": 0.472, "step": 11405 }, { "epoch": 0.1982652227572181, "grad_norm": 2.8652542515225154, "learning_rate": 9.275857126773267e-07, "loss": 0.3616, "step": 11406 }, { "epoch": 0.19828260529472092, "grad_norm": 1.7818901312206141, "learning_rate": 9.27571120923761e-07, "loss": 0.6735, "step": 11407 }, { "epoch": 0.19829998783222375, "grad_norm": 1.7950348616488359, "learning_rate": 9.275565278149913e-07, "loss": 0.4077, "step": 11408 }, { "epoch": 0.19831737036972658, "grad_norm": 1.909809455733049, "learning_rate": 9.275419333510637e-07, "loss": 0.3885, "step": 11409 }, { "epoch": 0.1983347529072294, "grad_norm": 1.9517749162464715, "learning_rate": 9.275273375320248e-07, "loss": 0.4893, "step": 11410 }, { "epoch": 0.1983521354447322, "grad_norm": 1.3258472587061165, "learning_rate": 9.275127403579204e-07, "loss": 0.4236, "step": 11411 }, { "epoch": 0.19836951798223504, "grad_norm": 4.3076777539912285, "learning_rate": 9.27498141828797e-07, "loss": 0.4827, "step": 11412 }, { "epoch": 0.19838690051973787, "grad_norm": 1.8507887604985596, "learning_rate": 9.27483541944701e-07, "loss": 0.4214, "step": 11413 }, { "epoch": 0.1984042830572407, "grad_norm": 3.096973305304634, "learning_rate": 9.274689407056784e-07, "loss": 0.5567, "step": 11414 }, { "epoch": 0.19842166559474353, "grad_norm": 1.7195005903914176, "learning_rate": 9.274543381117756e-07, "loss": 0.5159, "step": 11415 }, { "epoch": 0.19843904813224633, "grad_norm": 1.4406608834196963, "learning_rate": 9.274397341630391e-07, "loss": 0.2116, "step": 11416 }, { "epoch": 0.19845643066974916, "grad_norm": 1.8384332006508524, "learning_rate": 9.274251288595149e-07, "loss": 0.5206, "step": 11417 }, { "epoch": 0.198473813207252, "grad_norm": 2.1506609412820072, "learning_rate": 9.274105222012494e-07, "loss": 0.2872, "step": 11418 }, { "epoch": 0.19849119574475482, "grad_norm": 1.0768569653297175, "learning_rate": 9.273959141882888e-07, "loss": 0.2701, "step": 11419 }, { "epoch": 0.19850857828225765, "grad_norm": 3.543922578093515, "learning_rate": 9.273813048206796e-07, "loss": 0.4274, "step": 11420 }, { "epoch": 0.19852596081976046, "grad_norm": 1.9970656558789153, "learning_rate": 9.273666940984679e-07, "loss": 0.4132, "step": 11421 }, { "epoch": 0.19854334335726329, "grad_norm": 1.9739695400656854, "learning_rate": 9.273520820217002e-07, "loss": 0.3038, "step": 11422 }, { "epoch": 0.19856072589476612, "grad_norm": 1.7362798671572315, "learning_rate": 9.273374685904225e-07, "loss": 0.456, "step": 11423 }, { "epoch": 0.19857810843226895, "grad_norm": 1.7318019233630069, "learning_rate": 9.273228538046816e-07, "loss": 0.4128, "step": 11424 }, { "epoch": 0.19859549096977178, "grad_norm": 1.847564035020749, "learning_rate": 9.273082376645234e-07, "loss": 0.4479, "step": 11425 }, { "epoch": 0.19861287350727458, "grad_norm": 1.746287146790137, "learning_rate": 9.272936201699944e-07, "loss": 0.2972, "step": 11426 }, { "epoch": 0.1986302560447774, "grad_norm": 2.763261178138656, "learning_rate": 9.272790013211408e-07, "loss": 0.4205, "step": 11427 }, { "epoch": 0.19864763858228024, "grad_norm": 2.7673134562222317, "learning_rate": 9.272643811180092e-07, "loss": 0.4646, "step": 11428 }, { "epoch": 0.19866502111978307, "grad_norm": 1.6709361977924098, "learning_rate": 9.272497595606457e-07, "loss": 0.3144, "step": 11429 }, { "epoch": 0.1986824036572859, "grad_norm": 1.9091010657229766, "learning_rate": 9.272351366490967e-07, "loss": 0.3152, "step": 11430 }, { "epoch": 0.1986997861947887, "grad_norm": 3.0056788390917526, "learning_rate": 9.272205123834086e-07, "loss": 0.4593, "step": 11431 }, { "epoch": 0.19871716873229153, "grad_norm": 2.7864679984943925, "learning_rate": 9.272058867636278e-07, "loss": 0.2607, "step": 11432 }, { "epoch": 0.19873455126979436, "grad_norm": 2.9061395859446826, "learning_rate": 9.271912597898004e-07, "loss": 0.3538, "step": 11433 }, { "epoch": 0.1987519338072972, "grad_norm": 4.687767341375561, "learning_rate": 9.271766314619731e-07, "loss": 0.4755, "step": 11434 }, { "epoch": 0.19876931634480002, "grad_norm": 2.59906930654753, "learning_rate": 9.271620017801921e-07, "loss": 0.6073, "step": 11435 }, { "epoch": 0.19878669888230283, "grad_norm": 1.402283145632094, "learning_rate": 9.271473707445037e-07, "loss": 0.2886, "step": 11436 }, { "epoch": 0.19880408141980566, "grad_norm": 3.355633476563386, "learning_rate": 9.271327383549544e-07, "loss": 0.342, "step": 11437 }, { "epoch": 0.1988214639573085, "grad_norm": 1.4914557542673632, "learning_rate": 9.271181046115905e-07, "loss": 0.3336, "step": 11438 }, { "epoch": 0.19883884649481132, "grad_norm": 1.9424809072443356, "learning_rate": 9.271034695144585e-07, "loss": 0.359, "step": 11439 }, { "epoch": 0.19885622903231415, "grad_norm": 1.7612550249819987, "learning_rate": 9.270888330636047e-07, "loss": 0.6553, "step": 11440 }, { "epoch": 0.19887361156981695, "grad_norm": 2.5299825037338604, "learning_rate": 9.270741952590753e-07, "loss": 0.3681, "step": 11441 }, { "epoch": 0.19889099410731978, "grad_norm": 2.031964697700437, "learning_rate": 9.270595561009171e-07, "loss": 0.3553, "step": 11442 }, { "epoch": 0.1989083766448226, "grad_norm": 1.7267973787073791, "learning_rate": 9.270449155891762e-07, "loss": 0.3398, "step": 11443 }, { "epoch": 0.19892575918232544, "grad_norm": 2.0350687215677725, "learning_rate": 9.270302737238991e-07, "loss": 0.2816, "step": 11444 }, { "epoch": 0.19894314171982827, "grad_norm": 5.1175494060207205, "learning_rate": 9.27015630505132e-07, "loss": 0.2903, "step": 11445 }, { "epoch": 0.19896052425733107, "grad_norm": 1.630830977081601, "learning_rate": 9.270009859329217e-07, "loss": 0.291, "step": 11446 }, { "epoch": 0.1989779067948339, "grad_norm": 1.416359645752254, "learning_rate": 9.269863400073143e-07, "loss": 0.1941, "step": 11447 }, { "epoch": 0.19899528933233673, "grad_norm": 1.6741638096501514, "learning_rate": 9.269716927283566e-07, "loss": 0.3193, "step": 11448 }, { "epoch": 0.19901267186983956, "grad_norm": 3.1398038555129166, "learning_rate": 9.269570440960945e-07, "loss": 0.3797, "step": 11449 }, { "epoch": 0.1990300544073424, "grad_norm": 1.61260239442617, "learning_rate": 9.26942394110575e-07, "loss": 0.2438, "step": 11450 }, { "epoch": 0.1990474369448452, "grad_norm": 3.0333482460663097, "learning_rate": 9.269277427718439e-07, "loss": 0.1889, "step": 11451 }, { "epoch": 0.19906481948234803, "grad_norm": 0.9447995696410225, "learning_rate": 9.269130900799481e-07, "loss": 0.3931, "step": 11452 }, { "epoch": 0.19908220201985086, "grad_norm": 2.8187957916026694, "learning_rate": 9.268984360349339e-07, "loss": 0.5437, "step": 11453 }, { "epoch": 0.1990995845573537, "grad_norm": 1.3404384815372261, "learning_rate": 9.268837806368478e-07, "loss": 0.2876, "step": 11454 }, { "epoch": 0.19911696709485652, "grad_norm": 2.1100763436970524, "learning_rate": 9.268691238857362e-07, "loss": 0.6272, "step": 11455 }, { "epoch": 0.19913434963235932, "grad_norm": 2.463395212501957, "learning_rate": 9.268544657816455e-07, "loss": 0.3902, "step": 11456 }, { "epoch": 0.19915173216986215, "grad_norm": 1.836791913509367, "learning_rate": 9.268398063246224e-07, "loss": 0.3884, "step": 11457 }, { "epoch": 0.19916911470736498, "grad_norm": 4.068229417950471, "learning_rate": 9.268251455147129e-07, "loss": 0.4121, "step": 11458 }, { "epoch": 0.1991864972448678, "grad_norm": 1.6050578778462326, "learning_rate": 9.26810483351964e-07, "loss": 0.4378, "step": 11459 }, { "epoch": 0.19920387978237064, "grad_norm": 1.4931692482143455, "learning_rate": 9.267958198364219e-07, "loss": 0.2596, "step": 11460 }, { "epoch": 0.19922126231987344, "grad_norm": 1.7080529417840096, "learning_rate": 9.26781154968133e-07, "loss": 0.3066, "step": 11461 }, { "epoch": 0.19923864485737627, "grad_norm": 1.5205168554054858, "learning_rate": 9.267664887471439e-07, "loss": 0.298, "step": 11462 }, { "epoch": 0.1992560273948791, "grad_norm": 1.9686588171674642, "learning_rate": 9.267518211735011e-07, "loss": 0.3001, "step": 11463 }, { "epoch": 0.19927340993238193, "grad_norm": 1.5290275104577487, "learning_rate": 9.267371522472511e-07, "loss": 0.2142, "step": 11464 }, { "epoch": 0.19929079246988476, "grad_norm": 1.620272637512766, "learning_rate": 9.267224819684403e-07, "loss": 0.3447, "step": 11465 }, { "epoch": 0.19930817500738757, "grad_norm": 2.3238190401186163, "learning_rate": 9.267078103371153e-07, "loss": 0.5086, "step": 11466 }, { "epoch": 0.1993255575448904, "grad_norm": 1.1283021403075721, "learning_rate": 9.266931373533226e-07, "loss": 0.3824, "step": 11467 }, { "epoch": 0.19934294008239323, "grad_norm": 1.2887773052319356, "learning_rate": 9.266784630171086e-07, "loss": 0.3162, "step": 11468 }, { "epoch": 0.19936032261989606, "grad_norm": 2.210449108199132, "learning_rate": 9.266637873285198e-07, "loss": 0.3326, "step": 11469 }, { "epoch": 0.1993777051573989, "grad_norm": 2.086994285984133, "learning_rate": 9.266491102876028e-07, "loss": 0.3994, "step": 11470 }, { "epoch": 0.1993950876949017, "grad_norm": 2.3294835590893577, "learning_rate": 9.266344318944041e-07, "loss": 0.5825, "step": 11471 }, { "epoch": 0.19941247023240452, "grad_norm": 1.5862083074151538, "learning_rate": 9.266197521489705e-07, "loss": 0.3008, "step": 11472 }, { "epoch": 0.19942985276990735, "grad_norm": 2.1557150315286977, "learning_rate": 9.26605071051348e-07, "loss": 0.3485, "step": 11473 }, { "epoch": 0.19944723530741018, "grad_norm": 2.06908881348131, "learning_rate": 9.265903886015834e-07, "loss": 0.4404, "step": 11474 }, { "epoch": 0.199464617844913, "grad_norm": 2.7543499756548435, "learning_rate": 9.265757047997233e-07, "loss": 0.3452, "step": 11475 }, { "epoch": 0.19948200038241581, "grad_norm": 2.3785278220515704, "learning_rate": 9.265610196458141e-07, "loss": 0.3777, "step": 11476 }, { "epoch": 0.19949938291991864, "grad_norm": 1.3903172795993053, "learning_rate": 9.265463331399025e-07, "loss": 0.2789, "step": 11477 }, { "epoch": 0.19951676545742147, "grad_norm": 5.379594269282548, "learning_rate": 9.265316452820348e-07, "loss": 0.3202, "step": 11478 }, { "epoch": 0.1995341479949243, "grad_norm": 1.574927819118428, "learning_rate": 9.265169560722579e-07, "loss": 0.2321, "step": 11479 }, { "epoch": 0.19955153053242713, "grad_norm": 2.0393864114873335, "learning_rate": 9.26502265510618e-07, "loss": 0.3739, "step": 11480 }, { "epoch": 0.19956891306992994, "grad_norm": 2.1207924038340384, "learning_rate": 9.26487573597162e-07, "loss": 0.2537, "step": 11481 }, { "epoch": 0.19958629560743277, "grad_norm": 1.6449049480442717, "learning_rate": 9.264728803319364e-07, "loss": 0.2759, "step": 11482 }, { "epoch": 0.1996036781449356, "grad_norm": 1.6311038672037212, "learning_rate": 9.264581857149875e-07, "loss": 0.4222, "step": 11483 }, { "epoch": 0.19962106068243843, "grad_norm": 2.6904947343482855, "learning_rate": 9.26443489746362e-07, "loss": 0.4449, "step": 11484 }, { "epoch": 0.19963844321994126, "grad_norm": 1.9464229671446265, "learning_rate": 9.264287924261066e-07, "loss": 0.4403, "step": 11485 }, { "epoch": 0.19965582575744406, "grad_norm": 2.811740734726163, "learning_rate": 9.264140937542678e-07, "loss": 0.3208, "step": 11486 }, { "epoch": 0.1996732082949469, "grad_norm": 1.5286003846530865, "learning_rate": 9.263993937308923e-07, "loss": 0.3337, "step": 11487 }, { "epoch": 0.19969059083244972, "grad_norm": 1.7097613745322993, "learning_rate": 9.263846923560265e-07, "loss": 0.2893, "step": 11488 }, { "epoch": 0.19970797336995255, "grad_norm": 2.052606317387705, "learning_rate": 9.26369989629717e-07, "loss": 0.3673, "step": 11489 }, { "epoch": 0.19972535590745538, "grad_norm": 2.3813323897369005, "learning_rate": 9.263552855520107e-07, "loss": 0.3566, "step": 11490 }, { "epoch": 0.19974273844495818, "grad_norm": 1.5050400018849504, "learning_rate": 9.263405801229538e-07, "loss": 0.4103, "step": 11491 }, { "epoch": 0.19976012098246101, "grad_norm": 2.4429004546799895, "learning_rate": 9.263258733425931e-07, "loss": 0.4192, "step": 11492 }, { "epoch": 0.19977750351996384, "grad_norm": 1.7503232694405904, "learning_rate": 9.263111652109755e-07, "loss": 0.2863, "step": 11493 }, { "epoch": 0.19979488605746668, "grad_norm": 1.336454847231046, "learning_rate": 9.262964557281471e-07, "loss": 0.2984, "step": 11494 }, { "epoch": 0.1998122685949695, "grad_norm": 1.890857746427102, "learning_rate": 9.262817448941547e-07, "loss": 0.463, "step": 11495 }, { "epoch": 0.1998296511324723, "grad_norm": 1.8997827653275363, "learning_rate": 9.262670327090451e-07, "loss": 0.2651, "step": 11496 }, { "epoch": 0.19984703366997514, "grad_norm": 1.6613095908242586, "learning_rate": 9.26252319172865e-07, "loss": 0.2825, "step": 11497 }, { "epoch": 0.19986441620747797, "grad_norm": 2.4434023693686098, "learning_rate": 9.262376042856605e-07, "loss": 0.3639, "step": 11498 }, { "epoch": 0.1998817987449808, "grad_norm": 1.7470638781396977, "learning_rate": 9.262228880474789e-07, "loss": 0.2635, "step": 11499 }, { "epoch": 0.19989918128248363, "grad_norm": 1.5152117476810003, "learning_rate": 9.262081704583664e-07, "loss": 0.3953, "step": 11500 }, { "epoch": 0.19991656381998643, "grad_norm": 1.8418340833181774, "learning_rate": 9.261934515183697e-07, "loss": 0.2854, "step": 11501 }, { "epoch": 0.19993394635748926, "grad_norm": 1.7839197203720487, "learning_rate": 9.261787312275357e-07, "loss": 0.4261, "step": 11502 }, { "epoch": 0.1999513288949921, "grad_norm": 2.0762828053568567, "learning_rate": 9.261640095859107e-07, "loss": 0.3638, "step": 11503 }, { "epoch": 0.19996871143249492, "grad_norm": 1.9087458391619423, "learning_rate": 9.261492865935417e-07, "loss": 0.419, "step": 11504 }, { "epoch": 0.19998609396999775, "grad_norm": 1.8404386474323522, "learning_rate": 9.261345622504753e-07, "loss": 0.2708, "step": 11505 }, { "epoch": 0.20000347650750055, "grad_norm": 2.3527894455555844, "learning_rate": 9.261198365567578e-07, "loss": 0.2982, "step": 11506 }, { "epoch": 0.20002085904500339, "grad_norm": 1.9751133641220915, "learning_rate": 9.261051095124363e-07, "loss": 0.1406, "step": 11507 }, { "epoch": 0.20003824158250622, "grad_norm": 2.2243259677650524, "learning_rate": 9.260903811175575e-07, "loss": 0.3828, "step": 11508 }, { "epoch": 0.20005562412000905, "grad_norm": 3.4818599819225797, "learning_rate": 9.260756513721677e-07, "loss": 0.6966, "step": 11509 }, { "epoch": 0.20007300665751188, "grad_norm": 9.414226420150634, "learning_rate": 9.260609202763139e-07, "loss": 0.9501, "step": 11510 }, { "epoch": 0.20009038919501468, "grad_norm": 1.5406285629157397, "learning_rate": 9.260461878300428e-07, "loss": 0.4457, "step": 11511 }, { "epoch": 0.2001077717325175, "grad_norm": 1.9727770500948758, "learning_rate": 9.260314540334009e-07, "loss": 0.2812, "step": 11512 }, { "epoch": 0.20012515427002034, "grad_norm": 2.448776869763958, "learning_rate": 9.26016718886435e-07, "loss": 0.3668, "step": 11513 }, { "epoch": 0.20014253680752317, "grad_norm": 2.128323330825185, "learning_rate": 9.260019823891917e-07, "loss": 0.5256, "step": 11514 }, { "epoch": 0.200159919345026, "grad_norm": 3.8598075312114, "learning_rate": 9.259872445417178e-07, "loss": 0.3149, "step": 11515 }, { "epoch": 0.2001773018825288, "grad_norm": 1.3249299437536457, "learning_rate": 9.259725053440602e-07, "loss": 0.2772, "step": 11516 }, { "epoch": 0.20019468442003163, "grad_norm": 2.8324014388396153, "learning_rate": 9.259577647962653e-07, "loss": 0.2296, "step": 11517 }, { "epoch": 0.20021206695753446, "grad_norm": 2.1863052579497966, "learning_rate": 9.2594302289838e-07, "loss": 0.5147, "step": 11518 }, { "epoch": 0.2002294494950373, "grad_norm": 1.5014672639533384, "learning_rate": 9.259282796504511e-07, "loss": 0.2789, "step": 11519 }, { "epoch": 0.20024683203254012, "grad_norm": 2.640853475519399, "learning_rate": 9.259135350525249e-07, "loss": 0.3148, "step": 11520 }, { "epoch": 0.20026421457004293, "grad_norm": 3.024590392108633, "learning_rate": 9.258987891046486e-07, "loss": 0.6415, "step": 11521 }, { "epoch": 0.20028159710754576, "grad_norm": 1.2665709884607228, "learning_rate": 9.25884041806869e-07, "loss": 0.2345, "step": 11522 }, { "epoch": 0.20029897964504859, "grad_norm": 2.106440104356195, "learning_rate": 9.258692931592323e-07, "loss": 0.3884, "step": 11523 }, { "epoch": 0.20031636218255142, "grad_norm": 2.3572581729767546, "learning_rate": 9.258545431617857e-07, "loss": 0.4033, "step": 11524 }, { "epoch": 0.20033374472005425, "grad_norm": 1.4018463048745835, "learning_rate": 9.258397918145758e-07, "loss": 0.2298, "step": 11525 }, { "epoch": 0.20035112725755705, "grad_norm": 1.6868741332600294, "learning_rate": 9.258250391176495e-07, "loss": 0.2871, "step": 11526 }, { "epoch": 0.20036850979505988, "grad_norm": 1.7699607971395024, "learning_rate": 9.258102850710533e-07, "loss": 0.3712, "step": 11527 }, { "epoch": 0.2003858923325627, "grad_norm": 1.9542451377642265, "learning_rate": 9.257955296748341e-07, "loss": 0.5433, "step": 11528 }, { "epoch": 0.20040327487006554, "grad_norm": 2.5079743890017805, "learning_rate": 9.257807729290388e-07, "loss": 0.3341, "step": 11529 }, { "epoch": 0.20042065740756837, "grad_norm": 1.8165525922319417, "learning_rate": 9.257660148337138e-07, "loss": 0.3966, "step": 11530 }, { "epoch": 0.20043803994507117, "grad_norm": 2.978852015926386, "learning_rate": 9.257512553889062e-07, "loss": 0.5454, "step": 11531 }, { "epoch": 0.200455422482574, "grad_norm": 2.5419990923948688, "learning_rate": 9.257364945946629e-07, "loss": 0.4198, "step": 11532 }, { "epoch": 0.20047280502007683, "grad_norm": 1.3769014194076332, "learning_rate": 9.257217324510303e-07, "loss": 0.3375, "step": 11533 }, { "epoch": 0.20049018755757966, "grad_norm": 1.5719938699659575, "learning_rate": 9.257069689580552e-07, "loss": 0.2598, "step": 11534 }, { "epoch": 0.2005075700950825, "grad_norm": 2.139617852062784, "learning_rate": 9.256922041157848e-07, "loss": 0.3041, "step": 11535 }, { "epoch": 0.2005249526325853, "grad_norm": 1.433436478899983, "learning_rate": 9.256774379242656e-07, "loss": 0.2892, "step": 11536 }, { "epoch": 0.20054233517008813, "grad_norm": 8.19548578317528, "learning_rate": 9.256626703835446e-07, "loss": 0.6541, "step": 11537 }, { "epoch": 0.20055971770759096, "grad_norm": 1.702772236915317, "learning_rate": 9.256479014936683e-07, "loss": 0.3541, "step": 11538 }, { "epoch": 0.2005771002450938, "grad_norm": 2.159792739547056, "learning_rate": 9.256331312546836e-07, "loss": 0.2828, "step": 11539 }, { "epoch": 0.20059448278259662, "grad_norm": 1.7693112270319307, "learning_rate": 9.256183596666376e-07, "loss": 0.3059, "step": 11540 }, { "epoch": 0.20061186532009942, "grad_norm": 2.164211250963134, "learning_rate": 9.256035867295769e-07, "loss": 0.2732, "step": 11541 }, { "epoch": 0.20062924785760225, "grad_norm": 2.8281487402760512, "learning_rate": 9.255888124435481e-07, "loss": 0.4135, "step": 11542 }, { "epoch": 0.20064663039510508, "grad_norm": 1.4429712177089542, "learning_rate": 9.255740368085984e-07, "loss": 0.5123, "step": 11543 }, { "epoch": 0.2006640129326079, "grad_norm": 3.884828770118365, "learning_rate": 9.255592598247746e-07, "loss": 0.4582, "step": 11544 }, { "epoch": 0.20068139547011074, "grad_norm": 1.6767376088166754, "learning_rate": 9.255444814921234e-07, "loss": 0.4799, "step": 11545 }, { "epoch": 0.20069877800761354, "grad_norm": 1.7785183915714318, "learning_rate": 9.255297018106916e-07, "loss": 0.3265, "step": 11546 }, { "epoch": 0.20071616054511637, "grad_norm": 12.37224136290864, "learning_rate": 9.255149207805261e-07, "loss": 0.4403, "step": 11547 }, { "epoch": 0.2007335430826192, "grad_norm": 1.8795367910601266, "learning_rate": 9.255001384016739e-07, "loss": 0.3699, "step": 11548 }, { "epoch": 0.20075092562012203, "grad_norm": 1.5978242201339694, "learning_rate": 9.254853546741816e-07, "loss": 0.4071, "step": 11549 }, { "epoch": 0.20076830815762484, "grad_norm": 2.2346420330117156, "learning_rate": 9.254705695980961e-07, "loss": 0.2183, "step": 11550 }, { "epoch": 0.20078569069512767, "grad_norm": 2.3985193642903195, "learning_rate": 9.254557831734644e-07, "loss": 0.3368, "step": 11551 }, { "epoch": 0.2008030732326305, "grad_norm": 1.7471039246957734, "learning_rate": 9.254409954003335e-07, "loss": 0.2599, "step": 11552 }, { "epoch": 0.20082045577013333, "grad_norm": 1.364457199026229, "learning_rate": 9.254262062787499e-07, "loss": 0.306, "step": 11553 }, { "epoch": 0.20083783830763616, "grad_norm": 3.1661241175589323, "learning_rate": 9.254114158087607e-07, "loss": 0.3483, "step": 11554 }, { "epoch": 0.20085522084513896, "grad_norm": 1.402911637223073, "learning_rate": 9.253966239904126e-07, "loss": 0.3901, "step": 11555 }, { "epoch": 0.2008726033826418, "grad_norm": 1.8109380063760323, "learning_rate": 9.253818308237528e-07, "loss": 0.366, "step": 11556 }, { "epoch": 0.20088998592014462, "grad_norm": 2.0363713034445725, "learning_rate": 9.253670363088279e-07, "loss": 0.4935, "step": 11557 }, { "epoch": 0.20090736845764745, "grad_norm": 3.414475430743685, "learning_rate": 9.253522404456848e-07, "loss": 0.3566, "step": 11558 }, { "epoch": 0.20092475099515028, "grad_norm": 1.8962764479871748, "learning_rate": 9.253374432343707e-07, "loss": 0.3288, "step": 11559 }, { "epoch": 0.20094213353265308, "grad_norm": 3.941790211343226, "learning_rate": 9.253226446749322e-07, "loss": 0.3053, "step": 11560 }, { "epoch": 0.2009595160701559, "grad_norm": 5.580832636343818, "learning_rate": 9.253078447674163e-07, "loss": 0.4564, "step": 11561 }, { "epoch": 0.20097689860765874, "grad_norm": 3.4637500192127986, "learning_rate": 9.252930435118699e-07, "loss": 0.3785, "step": 11562 }, { "epoch": 0.20099428114516157, "grad_norm": 2.2811093658476858, "learning_rate": 9.252782409083398e-07, "loss": 0.6339, "step": 11563 }, { "epoch": 0.2010116636826644, "grad_norm": 0.9538605274457985, "learning_rate": 9.252634369568733e-07, "loss": 0.4396, "step": 11564 }, { "epoch": 0.2010290462201672, "grad_norm": 1.1774126503085078, "learning_rate": 9.252486316575168e-07, "loss": 0.3208, "step": 11565 }, { "epoch": 0.20104642875767004, "grad_norm": 1.8590240282299022, "learning_rate": 9.252338250103175e-07, "loss": 0.4298, "step": 11566 }, { "epoch": 0.20106381129517287, "grad_norm": 2.112983124814384, "learning_rate": 9.252190170153224e-07, "loss": 0.2918, "step": 11567 }, { "epoch": 0.2010811938326757, "grad_norm": 1.583591293929276, "learning_rate": 9.252042076725783e-07, "loss": 0.549, "step": 11568 }, { "epoch": 0.20109857637017853, "grad_norm": 1.8908260609829308, "learning_rate": 9.251893969821323e-07, "loss": 0.4074, "step": 11569 }, { "epoch": 0.20111595890768133, "grad_norm": 4.689636888577332, "learning_rate": 9.251745849440311e-07, "loss": 0.7727, "step": 11570 }, { "epoch": 0.20113334144518416, "grad_norm": 1.276198044840711, "learning_rate": 9.251597715583219e-07, "loss": 0.4284, "step": 11571 }, { "epoch": 0.201150723982687, "grad_norm": 1.9086518438389766, "learning_rate": 9.251449568250513e-07, "loss": 0.3069, "step": 11572 }, { "epoch": 0.20116810652018982, "grad_norm": 2.2312327839476636, "learning_rate": 9.251301407442667e-07, "loss": 0.4647, "step": 11573 }, { "epoch": 0.20118548905769265, "grad_norm": 1.2029430200904445, "learning_rate": 9.251153233160147e-07, "loss": 0.251, "step": 11574 }, { "epoch": 0.20120287159519545, "grad_norm": 1.5514389735439185, "learning_rate": 9.251005045403426e-07, "loss": 0.2678, "step": 11575 }, { "epoch": 0.20122025413269828, "grad_norm": 7.839361335288317, "learning_rate": 9.250856844172969e-07, "loss": 0.3353, "step": 11576 }, { "epoch": 0.20123763667020111, "grad_norm": 2.247657775698086, "learning_rate": 9.25070862946925e-07, "loss": 0.6447, "step": 11577 }, { "epoch": 0.20125501920770394, "grad_norm": 1.2309597289745198, "learning_rate": 9.250560401292738e-07, "loss": 0.3814, "step": 11578 }, { "epoch": 0.20127240174520677, "grad_norm": 1.949017185411169, "learning_rate": 9.250412159643901e-07, "loss": 0.3378, "step": 11579 }, { "epoch": 0.20128978428270958, "grad_norm": 1.350666228458955, "learning_rate": 9.25026390452321e-07, "loss": 0.2588, "step": 11580 }, { "epoch": 0.2013071668202124, "grad_norm": 2.7665427139812597, "learning_rate": 9.250115635931136e-07, "loss": 0.4169, "step": 11581 }, { "epoch": 0.20132454935771524, "grad_norm": 2.1830442381675406, "learning_rate": 9.249967353868147e-07, "loss": 0.3058, "step": 11582 }, { "epoch": 0.20134193189521807, "grad_norm": 2.2000770706280615, "learning_rate": 9.249819058334714e-07, "loss": 0.409, "step": 11583 }, { "epoch": 0.2013593144327209, "grad_norm": 4.542130444475102, "learning_rate": 9.249670749331306e-07, "loss": 0.4196, "step": 11584 }, { "epoch": 0.2013766969702237, "grad_norm": 1.6351462177157639, "learning_rate": 9.249522426858394e-07, "loss": 0.5189, "step": 11585 }, { "epoch": 0.20139407950772653, "grad_norm": 1.9549400053666075, "learning_rate": 9.249374090916448e-07, "loss": 0.2301, "step": 11586 }, { "epoch": 0.20141146204522936, "grad_norm": 1.8792924538824864, "learning_rate": 9.24922574150594e-07, "loss": 0.4407, "step": 11587 }, { "epoch": 0.2014288445827322, "grad_norm": 2.4360449157575697, "learning_rate": 9.249077378627336e-07, "loss": 0.5776, "step": 11588 }, { "epoch": 0.20144622712023502, "grad_norm": 3.1009269854539037, "learning_rate": 9.24892900228111e-07, "loss": 0.3168, "step": 11589 }, { "epoch": 0.20146360965773782, "grad_norm": 1.5791938799755196, "learning_rate": 9.248780612467728e-07, "loss": 0.2915, "step": 11590 }, { "epoch": 0.20148099219524065, "grad_norm": 2.0690437136727517, "learning_rate": 9.248632209187666e-07, "loss": 0.4983, "step": 11591 }, { "epoch": 0.20149837473274348, "grad_norm": 1.2262830044838149, "learning_rate": 9.248483792441392e-07, "loss": 0.3223, "step": 11592 }, { "epoch": 0.20151575727024632, "grad_norm": 2.574565498399231, "learning_rate": 9.248335362229374e-07, "loss": 0.4338, "step": 11593 }, { "epoch": 0.20153313980774915, "grad_norm": 2.3755330563100405, "learning_rate": 9.248186918552086e-07, "loss": 0.4277, "step": 11594 }, { "epoch": 0.20155052234525195, "grad_norm": 2.3191166314676033, "learning_rate": 9.248038461409996e-07, "loss": 0.7359, "step": 11595 }, { "epoch": 0.20156790488275478, "grad_norm": 1.4743182137538076, "learning_rate": 9.247889990803575e-07, "loss": 0.3904, "step": 11596 }, { "epoch": 0.2015852874202576, "grad_norm": 1.8510254400355999, "learning_rate": 9.247741506733295e-07, "loss": 0.1675, "step": 11597 }, { "epoch": 0.20160266995776044, "grad_norm": 2.1279796962954216, "learning_rate": 9.247593009199626e-07, "loss": 0.4235, "step": 11598 }, { "epoch": 0.20162005249526327, "grad_norm": 1.2867330400998973, "learning_rate": 9.247444498203038e-07, "loss": 0.3917, "step": 11599 }, { "epoch": 0.20163743503276607, "grad_norm": 2.5579591209283383, "learning_rate": 9.247295973744001e-07, "loss": 0.5376, "step": 11600 }, { "epoch": 0.2016548175702689, "grad_norm": 1.215024130761488, "learning_rate": 9.247147435822989e-07, "loss": 0.302, "step": 11601 }, { "epoch": 0.20167220010777173, "grad_norm": 2.134462051496298, "learning_rate": 9.246998884440469e-07, "loss": 0.3915, "step": 11602 }, { "epoch": 0.20168958264527456, "grad_norm": 1.655521958326663, "learning_rate": 9.246850319596913e-07, "loss": 0.3226, "step": 11603 }, { "epoch": 0.2017069651827774, "grad_norm": 2.2966874056492457, "learning_rate": 9.246701741292793e-07, "loss": 0.4198, "step": 11604 }, { "epoch": 0.2017243477202802, "grad_norm": 1.569416213721018, "learning_rate": 9.24655314952858e-07, "loss": 0.3716, "step": 11605 }, { "epoch": 0.20174173025778303, "grad_norm": 1.9025497706132501, "learning_rate": 9.246404544304742e-07, "loss": 0.2716, "step": 11606 }, { "epoch": 0.20175911279528586, "grad_norm": 2.1154255098998274, "learning_rate": 9.246255925621755e-07, "loss": 0.4306, "step": 11607 }, { "epoch": 0.20177649533278869, "grad_norm": 1.4622052432884627, "learning_rate": 9.246107293480085e-07, "loss": 0.3012, "step": 11608 }, { "epoch": 0.20179387787029152, "grad_norm": 1.3241501429391314, "learning_rate": 9.245958647880207e-07, "loss": 0.403, "step": 11609 }, { "epoch": 0.20181126040779432, "grad_norm": 1.6651751410115812, "learning_rate": 9.24580998882259e-07, "loss": 0.3965, "step": 11610 }, { "epoch": 0.20182864294529715, "grad_norm": 1.7288796197919676, "learning_rate": 9.245661316307706e-07, "loss": 0.575, "step": 11611 }, { "epoch": 0.20184602548279998, "grad_norm": 2.322621111192974, "learning_rate": 9.245512630336026e-07, "loss": 0.3494, "step": 11612 }, { "epoch": 0.2018634080203028, "grad_norm": 1.478839513642253, "learning_rate": 9.24536393090802e-07, "loss": 0.2936, "step": 11613 }, { "epoch": 0.20188079055780564, "grad_norm": 1.501459574312768, "learning_rate": 9.24521521802416e-07, "loss": 0.2905, "step": 11614 }, { "epoch": 0.20189817309530844, "grad_norm": 0.9271301803093371, "learning_rate": 9.245066491684921e-07, "loss": 0.2417, "step": 11615 }, { "epoch": 0.20191555563281127, "grad_norm": 1.1712738598622325, "learning_rate": 9.244917751890768e-07, "loss": 0.3687, "step": 11616 }, { "epoch": 0.2019329381703141, "grad_norm": 1.6336647642382374, "learning_rate": 9.244768998642178e-07, "loss": 0.3929, "step": 11617 }, { "epoch": 0.20195032070781693, "grad_norm": 2.6116809709194864, "learning_rate": 9.24462023193962e-07, "loss": 0.4705, "step": 11618 }, { "epoch": 0.20196770324531976, "grad_norm": 1.4289645594270057, "learning_rate": 9.244471451783565e-07, "loss": 0.3383, "step": 11619 }, { "epoch": 0.20198508578282257, "grad_norm": 1.2099492697029905, "learning_rate": 9.244322658174483e-07, "loss": 0.253, "step": 11620 }, { "epoch": 0.2020024683203254, "grad_norm": 2.249637465147433, "learning_rate": 9.24417385111285e-07, "loss": 0.3975, "step": 11621 }, { "epoch": 0.20201985085782823, "grad_norm": 2.132890665335864, "learning_rate": 9.244025030599137e-07, "loss": 0.4047, "step": 11622 }, { "epoch": 0.20203723339533106, "grad_norm": 1.273774217199445, "learning_rate": 9.243876196633811e-07, "loss": 0.266, "step": 11623 }, { "epoch": 0.2020546159328339, "grad_norm": 1.6595395247527596, "learning_rate": 9.24372734921735e-07, "loss": 0.296, "step": 11624 }, { "epoch": 0.2020719984703367, "grad_norm": 9.269751026020037, "learning_rate": 9.243578488350221e-07, "loss": 0.4188, "step": 11625 }, { "epoch": 0.20208938100783952, "grad_norm": 1.6992891102628882, "learning_rate": 9.243429614032898e-07, "loss": 0.3827, "step": 11626 }, { "epoch": 0.20210676354534235, "grad_norm": 1.8050079905823693, "learning_rate": 9.243280726265852e-07, "loss": 0.2443, "step": 11627 }, { "epoch": 0.20212414608284518, "grad_norm": 1.3031621567473246, "learning_rate": 9.243131825049556e-07, "loss": 0.4179, "step": 11628 }, { "epoch": 0.202141528620348, "grad_norm": 1.673161943893055, "learning_rate": 9.24298291038448e-07, "loss": 0.2858, "step": 11629 }, { "epoch": 0.2021589111578508, "grad_norm": 1.7801876399514378, "learning_rate": 9.242833982271099e-07, "loss": 0.4095, "step": 11630 }, { "epoch": 0.20217629369535364, "grad_norm": 1.6434914491804395, "learning_rate": 9.242685040709882e-07, "loss": 0.2503, "step": 11631 }, { "epoch": 0.20219367623285647, "grad_norm": 1.8574198104142738, "learning_rate": 9.242536085701302e-07, "loss": 0.5565, "step": 11632 }, { "epoch": 0.2022110587703593, "grad_norm": 1.6262974105958923, "learning_rate": 9.242387117245833e-07, "loss": 0.2551, "step": 11633 }, { "epoch": 0.20222844130786213, "grad_norm": 1.8483224869128516, "learning_rate": 9.242238135343945e-07, "loss": 0.384, "step": 11634 }, { "epoch": 0.20224582384536494, "grad_norm": 2.2180268368137757, "learning_rate": 9.24208913999611e-07, "loss": 0.3877, "step": 11635 }, { "epoch": 0.20226320638286777, "grad_norm": 1.2815304478460972, "learning_rate": 9.241940131202801e-07, "loss": 0.2606, "step": 11636 }, { "epoch": 0.2022805889203706, "grad_norm": 1.121533196948857, "learning_rate": 9.241791108964492e-07, "loss": 0.3663, "step": 11637 }, { "epoch": 0.20229797145787343, "grad_norm": 1.4380832091880966, "learning_rate": 9.241642073281653e-07, "loss": 0.5295, "step": 11638 }, { "epoch": 0.20231535399537626, "grad_norm": 1.6983713236827382, "learning_rate": 9.241493024154756e-07, "loss": 0.33, "step": 11639 }, { "epoch": 0.20233273653287906, "grad_norm": 2.4586410286885325, "learning_rate": 9.241343961584276e-07, "loss": 0.4721, "step": 11640 }, { "epoch": 0.2023501190703819, "grad_norm": 2.4338619656630662, "learning_rate": 9.241194885570683e-07, "loss": 0.4842, "step": 11641 }, { "epoch": 0.20236750160788472, "grad_norm": 1.559339163305119, "learning_rate": 9.241045796114451e-07, "loss": 0.3248, "step": 11642 }, { "epoch": 0.20238488414538755, "grad_norm": 4.786451052629984, "learning_rate": 9.240896693216052e-07, "loss": 0.3693, "step": 11643 }, { "epoch": 0.20240226668289038, "grad_norm": 2.16688749732916, "learning_rate": 9.240747576875958e-07, "loss": 0.3379, "step": 11644 }, { "epoch": 0.20241964922039318, "grad_norm": 1.5710047522654393, "learning_rate": 9.240598447094643e-07, "loss": 0.4989, "step": 11645 }, { "epoch": 0.202437031757896, "grad_norm": 1.5876937642542235, "learning_rate": 9.240449303872578e-07, "loss": 0.3166, "step": 11646 }, { "epoch": 0.20245441429539884, "grad_norm": 2.6104357290245814, "learning_rate": 9.240300147210239e-07, "loss": 0.574, "step": 11647 }, { "epoch": 0.20247179683290167, "grad_norm": 1.7648826134749136, "learning_rate": 9.240150977108092e-07, "loss": 0.4453, "step": 11648 }, { "epoch": 0.2024891793704045, "grad_norm": 1.7813432883062306, "learning_rate": 9.240001793566617e-07, "loss": 0.4138, "step": 11649 }, { "epoch": 0.2025065619079073, "grad_norm": 1.8160487612363176, "learning_rate": 9.239852596586284e-07, "loss": 0.441, "step": 11650 }, { "epoch": 0.20252394444541014, "grad_norm": 1.4707182785632806, "learning_rate": 9.239703386167566e-07, "loss": 0.5969, "step": 11651 }, { "epoch": 0.20254132698291297, "grad_norm": 1.5811898130488447, "learning_rate": 9.239554162310935e-07, "loss": 0.2803, "step": 11652 }, { "epoch": 0.2025587095204158, "grad_norm": 1.7942804075947787, "learning_rate": 9.239404925016865e-07, "loss": 0.3103, "step": 11653 }, { "epoch": 0.20257609205791863, "grad_norm": 1.4495302037577853, "learning_rate": 9.239255674285828e-07, "loss": 0.4534, "step": 11654 }, { "epoch": 0.20259347459542143, "grad_norm": 1.1896939236206234, "learning_rate": 9.2391064101183e-07, "loss": 0.3531, "step": 11655 }, { "epoch": 0.20261085713292426, "grad_norm": 2.309130046182539, "learning_rate": 9.23895713251475e-07, "loss": 0.3416, "step": 11656 }, { "epoch": 0.2026282396704271, "grad_norm": 2.0752139627452046, "learning_rate": 9.238807841475656e-07, "loss": 0.3519, "step": 11657 }, { "epoch": 0.20264562220792992, "grad_norm": 1.5534565170988808, "learning_rate": 9.238658537001485e-07, "loss": 0.2983, "step": 11658 }, { "epoch": 0.20266300474543275, "grad_norm": 2.091551114933941, "learning_rate": 9.238509219092713e-07, "loss": 0.4319, "step": 11659 }, { "epoch": 0.20268038728293555, "grad_norm": 1.7737597960772353, "learning_rate": 9.238359887749817e-07, "loss": 0.4012, "step": 11660 }, { "epoch": 0.20269776982043838, "grad_norm": 2.6126429487765903, "learning_rate": 9.238210542973265e-07, "loss": 0.3454, "step": 11661 }, { "epoch": 0.20271515235794121, "grad_norm": 2.2614178602186823, "learning_rate": 9.238061184763533e-07, "loss": 0.6902, "step": 11662 }, { "epoch": 0.20273253489544404, "grad_norm": 1.7162160629157914, "learning_rate": 9.237911813121096e-07, "loss": 0.3861, "step": 11663 }, { "epoch": 0.20274991743294687, "grad_norm": 2.8611692207809454, "learning_rate": 9.237762428046422e-07, "loss": 0.3416, "step": 11664 }, { "epoch": 0.20276729997044968, "grad_norm": 1.973268744070383, "learning_rate": 9.237613029539988e-07, "loss": 0.4339, "step": 11665 }, { "epoch": 0.2027846825079525, "grad_norm": 1.6271948038339714, "learning_rate": 9.237463617602269e-07, "loss": 0.3823, "step": 11666 }, { "epoch": 0.20280206504545534, "grad_norm": 1.8213500773198692, "learning_rate": 9.237314192233736e-07, "loss": 0.4288, "step": 11667 }, { "epoch": 0.20281944758295817, "grad_norm": 1.278161962968589, "learning_rate": 9.237164753434862e-07, "loss": 0.273, "step": 11668 }, { "epoch": 0.202836830120461, "grad_norm": 1.6712998811221378, "learning_rate": 9.237015301206126e-07, "loss": 0.3714, "step": 11669 }, { "epoch": 0.2028542126579638, "grad_norm": 2.480963750857227, "learning_rate": 9.236865835547996e-07, "loss": 0.3815, "step": 11670 }, { "epoch": 0.20287159519546663, "grad_norm": 1.5931364442278368, "learning_rate": 9.236716356460947e-07, "loss": 0.2577, "step": 11671 }, { "epoch": 0.20288897773296946, "grad_norm": 1.902028874375752, "learning_rate": 9.236566863945452e-07, "loss": 0.4085, "step": 11672 }, { "epoch": 0.2029063602704723, "grad_norm": 2.107368843404673, "learning_rate": 9.236417358001989e-07, "loss": 0.2643, "step": 11673 }, { "epoch": 0.20292374280797512, "grad_norm": 1.525244294143979, "learning_rate": 9.236267838631026e-07, "loss": 0.3082, "step": 11674 }, { "epoch": 0.20294112534547792, "grad_norm": 1.568466433495821, "learning_rate": 9.236118305833042e-07, "loss": 0.3232, "step": 11675 }, { "epoch": 0.20295850788298075, "grad_norm": 2.3629613985695923, "learning_rate": 9.235968759608509e-07, "loss": 0.5732, "step": 11676 }, { "epoch": 0.20297589042048358, "grad_norm": 2.0321615830946085, "learning_rate": 9.2358191999579e-07, "loss": 0.3711, "step": 11677 }, { "epoch": 0.20299327295798641, "grad_norm": 1.252739697677916, "learning_rate": 9.23566962688169e-07, "loss": 0.2913, "step": 11678 }, { "epoch": 0.20301065549548925, "grad_norm": 1.1545449127602576, "learning_rate": 9.235520040380354e-07, "loss": 0.3835, "step": 11679 }, { "epoch": 0.20302803803299205, "grad_norm": 1.0358410591096818, "learning_rate": 9.235370440454364e-07, "loss": 0.2957, "step": 11680 }, { "epoch": 0.20304542057049488, "grad_norm": 2.8497837341762855, "learning_rate": 9.235220827104195e-07, "loss": 0.4645, "step": 11681 }, { "epoch": 0.2030628031079977, "grad_norm": 2.0204523257180265, "learning_rate": 9.235071200330322e-07, "loss": 0.4191, "step": 11682 }, { "epoch": 0.20308018564550054, "grad_norm": 2.59938853519938, "learning_rate": 9.23492156013322e-07, "loss": 0.273, "step": 11683 }, { "epoch": 0.20309756818300337, "grad_norm": 5.465373591464911, "learning_rate": 9.234771906513361e-07, "loss": 0.4514, "step": 11684 }, { "epoch": 0.20311495072050617, "grad_norm": 2.941858502341457, "learning_rate": 9.23462223947122e-07, "loss": 0.3375, "step": 11685 }, { "epoch": 0.203132333258009, "grad_norm": 3.7495669049674745, "learning_rate": 9.234472559007271e-07, "loss": 0.3881, "step": 11686 }, { "epoch": 0.20314971579551183, "grad_norm": 3.3246377083778667, "learning_rate": 9.234322865121991e-07, "loss": 0.3276, "step": 11687 }, { "epoch": 0.20316709833301466, "grad_norm": 1.9832666986529266, "learning_rate": 9.234173157815852e-07, "loss": 0.2597, "step": 11688 }, { "epoch": 0.20318448087051746, "grad_norm": 1.6515128394104879, "learning_rate": 9.234023437089329e-07, "loss": 0.2064, "step": 11689 }, { "epoch": 0.2032018634080203, "grad_norm": 1.9475750183989593, "learning_rate": 9.233873702942896e-07, "loss": 0.4442, "step": 11690 }, { "epoch": 0.20321924594552312, "grad_norm": 2.418925536169106, "learning_rate": 9.233723955377028e-07, "loss": 0.3963, "step": 11691 }, { "epoch": 0.20323662848302596, "grad_norm": 2.614570578294604, "learning_rate": 9.233574194392201e-07, "loss": 0.3208, "step": 11692 }, { "epoch": 0.20325401102052879, "grad_norm": 1.31254474132264, "learning_rate": 9.233424419988888e-07, "loss": 0.5033, "step": 11693 }, { "epoch": 0.2032713935580316, "grad_norm": 1.7645896035212376, "learning_rate": 9.233274632167565e-07, "loss": 0.2145, "step": 11694 }, { "epoch": 0.20328877609553442, "grad_norm": 2.4426130562612838, "learning_rate": 9.233124830928705e-07, "loss": 0.2636, "step": 11695 }, { "epoch": 0.20330615863303725, "grad_norm": 2.6768872915336615, "learning_rate": 9.232975016272784e-07, "loss": 0.3062, "step": 11696 }, { "epoch": 0.20332354117054008, "grad_norm": 1.7433310865685074, "learning_rate": 9.232825188200278e-07, "loss": 0.3461, "step": 11697 }, { "epoch": 0.2033409237080429, "grad_norm": 1.6816278731327952, "learning_rate": 9.232675346711658e-07, "loss": 0.2962, "step": 11698 }, { "epoch": 0.2033583062455457, "grad_norm": 1.2098538223470758, "learning_rate": 9.232525491807404e-07, "loss": 0.1922, "step": 11699 }, { "epoch": 0.20337568878304854, "grad_norm": 1.8828887273143782, "learning_rate": 9.232375623487986e-07, "loss": 0.2893, "step": 11700 }, { "epoch": 0.20339307132055137, "grad_norm": 1.9767101663934679, "learning_rate": 9.232225741753883e-07, "loss": 0.3887, "step": 11701 }, { "epoch": 0.2034104538580542, "grad_norm": 1.63689444333908, "learning_rate": 9.232075846605569e-07, "loss": 0.388, "step": 11702 }, { "epoch": 0.20342783639555703, "grad_norm": 2.0030611682967874, "learning_rate": 9.231925938043517e-07, "loss": 0.4739, "step": 11703 }, { "epoch": 0.20344521893305983, "grad_norm": 1.8033620114531914, "learning_rate": 9.231776016068206e-07, "loss": 0.3735, "step": 11704 }, { "epoch": 0.20346260147056267, "grad_norm": 2.1283879145640854, "learning_rate": 9.231626080680107e-07, "loss": 0.4642, "step": 11705 }, { "epoch": 0.2034799840080655, "grad_norm": 2.3270592991572756, "learning_rate": 9.231476131879699e-07, "loss": 0.4599, "step": 11706 }, { "epoch": 0.20349736654556833, "grad_norm": 1.3933395275432137, "learning_rate": 9.231326169667452e-07, "loss": 0.3572, "step": 11707 }, { "epoch": 0.20351474908307116, "grad_norm": 2.242608598789863, "learning_rate": 9.231176194043848e-07, "loss": 0.4866, "step": 11708 }, { "epoch": 0.20353213162057396, "grad_norm": 1.4905247470398777, "learning_rate": 9.231026205009358e-07, "loss": 0.4736, "step": 11709 }, { "epoch": 0.2035495141580768, "grad_norm": 3.3825941992686213, "learning_rate": 9.230876202564459e-07, "loss": 0.3551, "step": 11710 }, { "epoch": 0.20356689669557962, "grad_norm": 1.247645936042837, "learning_rate": 9.230726186709625e-07, "loss": 0.4011, "step": 11711 }, { "epoch": 0.20358427923308245, "grad_norm": 1.724138255195602, "learning_rate": 9.230576157445334e-07, "loss": 0.303, "step": 11712 }, { "epoch": 0.20360166177058528, "grad_norm": 2.0355045549222432, "learning_rate": 9.230426114772058e-07, "loss": 0.3529, "step": 11713 }, { "epoch": 0.20361904430808808, "grad_norm": 1.8535124545492132, "learning_rate": 9.230276058690276e-07, "loss": 0.3368, "step": 11714 }, { "epoch": 0.2036364268455909, "grad_norm": 2.095638263204944, "learning_rate": 9.230125989200462e-07, "loss": 0.3875, "step": 11715 }, { "epoch": 0.20365380938309374, "grad_norm": 1.5361068701216694, "learning_rate": 9.229975906303091e-07, "loss": 0.4382, "step": 11716 }, { "epoch": 0.20367119192059657, "grad_norm": 2.131061425474301, "learning_rate": 9.229825809998641e-07, "loss": 0.3635, "step": 11717 }, { "epoch": 0.2036885744580994, "grad_norm": 1.2122125630611966, "learning_rate": 9.229675700287584e-07, "loss": 0.3748, "step": 11718 }, { "epoch": 0.2037059569956022, "grad_norm": 1.6434901505304822, "learning_rate": 9.229525577170399e-07, "loss": 0.5287, "step": 11719 }, { "epoch": 0.20372333953310504, "grad_norm": 2.1070643580621633, "learning_rate": 9.229375440647562e-07, "loss": 0.3102, "step": 11720 }, { "epoch": 0.20374072207060787, "grad_norm": 2.4222945428570615, "learning_rate": 9.229225290719547e-07, "loss": 0.4741, "step": 11721 }, { "epoch": 0.2037581046081107, "grad_norm": 1.1730595949944547, "learning_rate": 9.22907512738683e-07, "loss": 0.34, "step": 11722 }, { "epoch": 0.20377548714561353, "grad_norm": 2.4910470012589534, "learning_rate": 9.228924950649887e-07, "loss": 0.3214, "step": 11723 }, { "epoch": 0.20379286968311633, "grad_norm": 2.515041169820538, "learning_rate": 9.228774760509195e-07, "loss": 0.3632, "step": 11724 }, { "epoch": 0.20381025222061916, "grad_norm": 1.6965300104196939, "learning_rate": 9.22862455696523e-07, "loss": 0.3436, "step": 11725 }, { "epoch": 0.203827634758122, "grad_norm": 1.7721536912122777, "learning_rate": 9.228474340018467e-07, "loss": 0.2395, "step": 11726 }, { "epoch": 0.20384501729562482, "grad_norm": 1.3156171804335044, "learning_rate": 9.228324109669383e-07, "loss": 0.4279, "step": 11727 }, { "epoch": 0.20386239983312765, "grad_norm": 1.8654553090628292, "learning_rate": 9.228173865918454e-07, "loss": 0.3015, "step": 11728 }, { "epoch": 0.20387978237063045, "grad_norm": 1.2290689598548457, "learning_rate": 9.228023608766155e-07, "loss": 0.3308, "step": 11729 }, { "epoch": 0.20389716490813328, "grad_norm": 1.4926889443712936, "learning_rate": 9.227873338212966e-07, "loss": 0.3084, "step": 11730 }, { "epoch": 0.2039145474456361, "grad_norm": 2.107106130072846, "learning_rate": 9.227723054259359e-07, "loss": 0.5813, "step": 11731 }, { "epoch": 0.20393192998313894, "grad_norm": 2.3080264332474645, "learning_rate": 9.227572756905812e-07, "loss": 0.2872, "step": 11732 }, { "epoch": 0.20394931252064177, "grad_norm": 2.3531726616918136, "learning_rate": 9.227422446152802e-07, "loss": 0.6228, "step": 11733 }, { "epoch": 0.20396669505814458, "grad_norm": 1.9030678666182848, "learning_rate": 9.227272122000804e-07, "loss": 0.3553, "step": 11734 }, { "epoch": 0.2039840775956474, "grad_norm": 1.4523541524513992, "learning_rate": 9.227121784450295e-07, "loss": 0.2924, "step": 11735 }, { "epoch": 0.20400146013315024, "grad_norm": 1.3129628108750737, "learning_rate": 9.226971433501753e-07, "loss": 0.4555, "step": 11736 }, { "epoch": 0.20401884267065307, "grad_norm": 2.2787902813735674, "learning_rate": 9.226821069155652e-07, "loss": 0.4008, "step": 11737 }, { "epoch": 0.2040362252081559, "grad_norm": 3.627142256895427, "learning_rate": 9.22667069141247e-07, "loss": 0.3062, "step": 11738 }, { "epoch": 0.2040536077456587, "grad_norm": 1.3316273491868935, "learning_rate": 9.226520300272685e-07, "loss": 0.2875, "step": 11739 }, { "epoch": 0.20407099028316153, "grad_norm": 2.372364811741525, "learning_rate": 9.22636989573677e-07, "loss": 0.249, "step": 11740 }, { "epoch": 0.20408837282066436, "grad_norm": 1.6735575981894335, "learning_rate": 9.226219477805204e-07, "loss": 0.3141, "step": 11741 }, { "epoch": 0.2041057553581672, "grad_norm": 1.4863241686651552, "learning_rate": 9.226069046478464e-07, "loss": 0.2668, "step": 11742 }, { "epoch": 0.20412313789567002, "grad_norm": 1.8205615141803566, "learning_rate": 9.225918601757028e-07, "loss": 0.2911, "step": 11743 }, { "epoch": 0.20414052043317282, "grad_norm": 3.0463151103564505, "learning_rate": 9.22576814364137e-07, "loss": 0.2581, "step": 11744 }, { "epoch": 0.20415790297067565, "grad_norm": 2.889590260800951, "learning_rate": 9.225617672131968e-07, "loss": 0.2395, "step": 11745 }, { "epoch": 0.20417528550817848, "grad_norm": 1.8773328372494775, "learning_rate": 9.225467187229299e-07, "loss": 0.3942, "step": 11746 }, { "epoch": 0.2041926680456813, "grad_norm": 3.236335922409795, "learning_rate": 9.225316688933839e-07, "loss": 0.3091, "step": 11747 }, { "epoch": 0.20421005058318414, "grad_norm": 2.698221613477664, "learning_rate": 9.225166177246069e-07, "loss": 0.3949, "step": 11748 }, { "epoch": 0.20422743312068695, "grad_norm": 3.326201257264395, "learning_rate": 9.225015652166462e-07, "loss": 0.3495, "step": 11749 }, { "epoch": 0.20424481565818978, "grad_norm": 2.5765580139322712, "learning_rate": 9.224865113695495e-07, "loss": 0.4449, "step": 11750 }, { "epoch": 0.2042621981956926, "grad_norm": 1.4717772454757023, "learning_rate": 9.224714561833646e-07, "loss": 0.3002, "step": 11751 }, { "epoch": 0.20427958073319544, "grad_norm": 1.5115049697547194, "learning_rate": 9.224563996581394e-07, "loss": 0.2364, "step": 11752 }, { "epoch": 0.20429696327069827, "grad_norm": 1.8158942751241764, "learning_rate": 9.224413417939214e-07, "loss": 0.1802, "step": 11753 }, { "epoch": 0.20431434580820107, "grad_norm": 2.367772412186792, "learning_rate": 9.224262825907585e-07, "loss": 0.3536, "step": 11754 }, { "epoch": 0.2043317283457039, "grad_norm": 1.4067460695981135, "learning_rate": 9.224112220486981e-07, "loss": 0.3158, "step": 11755 }, { "epoch": 0.20434911088320673, "grad_norm": 1.2112431465897826, "learning_rate": 9.223961601677884e-07, "loss": 0.5196, "step": 11756 }, { "epoch": 0.20436649342070956, "grad_norm": 1.9081749139313582, "learning_rate": 9.223810969480769e-07, "loss": 0.3482, "step": 11757 }, { "epoch": 0.2043838759582124, "grad_norm": 1.9665426033592524, "learning_rate": 9.223660323896112e-07, "loss": 0.2463, "step": 11758 }, { "epoch": 0.2044012584957152, "grad_norm": 1.4663841524724972, "learning_rate": 9.223509664924394e-07, "loss": 0.227, "step": 11759 }, { "epoch": 0.20441864103321802, "grad_norm": 1.4806769921176617, "learning_rate": 9.223358992566088e-07, "loss": 0.4187, "step": 11760 }, { "epoch": 0.20443602357072085, "grad_norm": 2.2075819947394995, "learning_rate": 9.223208306821675e-07, "loss": 0.4906, "step": 11761 }, { "epoch": 0.20445340610822368, "grad_norm": 1.603466207433121, "learning_rate": 9.223057607691632e-07, "loss": 0.3597, "step": 11762 }, { "epoch": 0.20447078864572651, "grad_norm": 1.9581814273421059, "learning_rate": 9.222906895176437e-07, "loss": 0.2295, "step": 11763 }, { "epoch": 0.20448817118322932, "grad_norm": 2.0785294372193994, "learning_rate": 9.222756169276566e-07, "loss": 0.6041, "step": 11764 }, { "epoch": 0.20450555372073215, "grad_norm": 1.3961852038473042, "learning_rate": 9.222605429992498e-07, "loss": 0.3757, "step": 11765 }, { "epoch": 0.20452293625823498, "grad_norm": 1.2285150516334797, "learning_rate": 9.22245467732471e-07, "loss": 0.2239, "step": 11766 }, { "epoch": 0.2045403187957378, "grad_norm": 2.120759247192923, "learning_rate": 9.222303911273681e-07, "loss": 0.4127, "step": 11767 }, { "epoch": 0.20455770133324064, "grad_norm": 1.6447929461247122, "learning_rate": 9.222153131839887e-07, "loss": 0.3321, "step": 11768 }, { "epoch": 0.20457508387074344, "grad_norm": 1.9569156592136832, "learning_rate": 9.222002339023809e-07, "loss": 0.2994, "step": 11769 }, { "epoch": 0.20459246640824627, "grad_norm": 2.544042789705327, "learning_rate": 9.221851532825922e-07, "loss": 0.3796, "step": 11770 }, { "epoch": 0.2046098489457491, "grad_norm": 1.7092829051552174, "learning_rate": 9.221700713246705e-07, "loss": 0.3897, "step": 11771 }, { "epoch": 0.20462723148325193, "grad_norm": 1.0101368889266542, "learning_rate": 9.221549880286636e-07, "loss": 0.187, "step": 11772 }, { "epoch": 0.20464461402075476, "grad_norm": 2.0947760706949268, "learning_rate": 9.221399033946194e-07, "loss": 0.3187, "step": 11773 }, { "epoch": 0.20466199655825756, "grad_norm": 1.4454637753217514, "learning_rate": 9.221248174225854e-07, "loss": 0.361, "step": 11774 }, { "epoch": 0.2046793790957604, "grad_norm": 1.3028404587615667, "learning_rate": 9.221097301126097e-07, "loss": 0.3603, "step": 11775 }, { "epoch": 0.20469676163326322, "grad_norm": 2.1213305395169404, "learning_rate": 9.220946414647402e-07, "loss": 0.2591, "step": 11776 }, { "epoch": 0.20471414417076605, "grad_norm": 1.7881225519427186, "learning_rate": 9.220795514790245e-07, "loss": 0.4384, "step": 11777 }, { "epoch": 0.20473152670826888, "grad_norm": 1.2379111402013594, "learning_rate": 9.220644601555106e-07, "loss": 0.3314, "step": 11778 }, { "epoch": 0.2047489092457717, "grad_norm": 2.1469317886014596, "learning_rate": 9.22049367494246e-07, "loss": 0.3437, "step": 11779 }, { "epoch": 0.20476629178327452, "grad_norm": 1.7947889084953732, "learning_rate": 9.220342734952791e-07, "loss": 0.3787, "step": 11780 }, { "epoch": 0.20478367432077735, "grad_norm": 2.9579251633980133, "learning_rate": 9.220191781586571e-07, "loss": 0.3222, "step": 11781 }, { "epoch": 0.20480105685828018, "grad_norm": 1.3187362182482132, "learning_rate": 9.220040814844283e-07, "loss": 0.3371, "step": 11782 }, { "epoch": 0.204818439395783, "grad_norm": 2.4258852725508313, "learning_rate": 9.219889834726404e-07, "loss": 0.4963, "step": 11783 }, { "epoch": 0.2048358219332858, "grad_norm": 1.7710758127630628, "learning_rate": 9.219738841233413e-07, "loss": 0.234, "step": 11784 }, { "epoch": 0.20485320447078864, "grad_norm": 1.538701240198988, "learning_rate": 9.21958783436579e-07, "loss": 0.2974, "step": 11785 }, { "epoch": 0.20487058700829147, "grad_norm": 2.6430179697588043, "learning_rate": 9.219436814124009e-07, "loss": 0.3058, "step": 11786 }, { "epoch": 0.2048879695457943, "grad_norm": 2.6735912965565105, "learning_rate": 9.219285780508554e-07, "loss": 0.497, "step": 11787 }, { "epoch": 0.20490535208329713, "grad_norm": 2.0729478940944195, "learning_rate": 9.2191347335199e-07, "loss": 0.5876, "step": 11788 }, { "epoch": 0.20492273462079993, "grad_norm": 1.6670346432711625, "learning_rate": 9.218983673158527e-07, "loss": 0.3155, "step": 11789 }, { "epoch": 0.20494011715830276, "grad_norm": 1.0389069881383797, "learning_rate": 9.218832599424913e-07, "loss": 0.3673, "step": 11790 }, { "epoch": 0.2049574996958056, "grad_norm": 2.949143475923746, "learning_rate": 9.218681512319538e-07, "loss": 0.5456, "step": 11791 }, { "epoch": 0.20497488223330843, "grad_norm": 1.0605593644815074, "learning_rate": 9.218530411842881e-07, "loss": 0.3723, "step": 11792 }, { "epoch": 0.20499226477081126, "grad_norm": 1.5186005519834018, "learning_rate": 9.21837929799542e-07, "loss": 0.5786, "step": 11793 }, { "epoch": 0.20500964730831406, "grad_norm": 1.2504905383044185, "learning_rate": 9.218228170777635e-07, "loss": 0.1582, "step": 11794 }, { "epoch": 0.2050270298458169, "grad_norm": 1.3556263086561136, "learning_rate": 9.218077030190004e-07, "loss": 0.2232, "step": 11795 }, { "epoch": 0.20504441238331972, "grad_norm": 2.210322498095505, "learning_rate": 9.217925876233007e-07, "loss": 0.3945, "step": 11796 }, { "epoch": 0.20506179492082255, "grad_norm": 2.0653467617082133, "learning_rate": 9.217774708907122e-07, "loss": 0.6409, "step": 11797 }, { "epoch": 0.20507917745832538, "grad_norm": 2.3172484191447142, "learning_rate": 9.217623528212829e-07, "loss": 0.2994, "step": 11798 }, { "epoch": 0.20509655999582818, "grad_norm": 1.3671963784382084, "learning_rate": 9.217472334150607e-07, "loss": 0.2716, "step": 11799 }, { "epoch": 0.205113942533331, "grad_norm": 2.081700823677055, "learning_rate": 9.217321126720933e-07, "loss": 0.2813, "step": 11800 }, { "epoch": 0.20513132507083384, "grad_norm": 1.5666810007996956, "learning_rate": 9.217169905924291e-07, "loss": 0.3419, "step": 11801 }, { "epoch": 0.20514870760833667, "grad_norm": 2.4308920758579218, "learning_rate": 9.217018671761156e-07, "loss": 0.3995, "step": 11802 }, { "epoch": 0.2051660901458395, "grad_norm": 1.3942535884798033, "learning_rate": 9.216867424232011e-07, "loss": 0.3484, "step": 11803 }, { "epoch": 0.2051834726833423, "grad_norm": 1.9303470544359487, "learning_rate": 9.216716163337332e-07, "loss": 0.359, "step": 11804 }, { "epoch": 0.20520085522084514, "grad_norm": 1.988082314389408, "learning_rate": 9.216564889077599e-07, "loss": 0.4736, "step": 11805 }, { "epoch": 0.20521823775834797, "grad_norm": 2.076773494309255, "learning_rate": 9.216413601453293e-07, "loss": 0.2864, "step": 11806 }, { "epoch": 0.2052356202958508, "grad_norm": 1.7997330940522205, "learning_rate": 9.216262300464893e-07, "loss": 0.3218, "step": 11807 }, { "epoch": 0.20525300283335363, "grad_norm": 1.9840483266261015, "learning_rate": 9.216110986112877e-07, "loss": 0.3597, "step": 11808 }, { "epoch": 0.20527038537085643, "grad_norm": 1.2946901771619408, "learning_rate": 9.215959658397727e-07, "loss": 0.2319, "step": 11809 }, { "epoch": 0.20528776790835926, "grad_norm": 1.0903658878840081, "learning_rate": 9.215808317319922e-07, "loss": 0.5953, "step": 11810 }, { "epoch": 0.2053051504458621, "grad_norm": 2.10912754853844, "learning_rate": 9.215656962879939e-07, "loss": 0.2989, "step": 11811 }, { "epoch": 0.20532253298336492, "grad_norm": 2.3261776148555384, "learning_rate": 9.215505595078262e-07, "loss": 0.8418, "step": 11812 }, { "epoch": 0.20533991552086775, "grad_norm": 2.2564056732879996, "learning_rate": 9.215354213915368e-07, "loss": 0.3376, "step": 11813 }, { "epoch": 0.20535729805837055, "grad_norm": 1.665347677795682, "learning_rate": 9.215202819391739e-07, "loss": 0.4745, "step": 11814 }, { "epoch": 0.20537468059587338, "grad_norm": 2.864088078417982, "learning_rate": 9.215051411507851e-07, "loss": 0.4942, "step": 11815 }, { "epoch": 0.2053920631333762, "grad_norm": 1.4900896484227075, "learning_rate": 9.214899990264188e-07, "loss": 0.4399, "step": 11816 }, { "epoch": 0.20540944567087904, "grad_norm": 2.1021752882581777, "learning_rate": 9.214748555661227e-07, "loss": 0.5455, "step": 11817 }, { "epoch": 0.20542682820838187, "grad_norm": 4.1799892793653, "learning_rate": 9.21459710769945e-07, "loss": 0.1928, "step": 11818 }, { "epoch": 0.20544421074588468, "grad_norm": 1.8765686016463377, "learning_rate": 9.214445646379336e-07, "loss": 0.4865, "step": 11819 }, { "epoch": 0.2054615932833875, "grad_norm": 1.4421494742524266, "learning_rate": 9.214294171701365e-07, "loss": 0.3478, "step": 11820 }, { "epoch": 0.20547897582089034, "grad_norm": 1.941724964852841, "learning_rate": 9.214142683666017e-07, "loss": 0.4059, "step": 11821 }, { "epoch": 0.20549635835839317, "grad_norm": 2.9058586486501645, "learning_rate": 9.213991182273774e-07, "loss": 0.3777, "step": 11822 }, { "epoch": 0.205513740895896, "grad_norm": 2.190387682319517, "learning_rate": 9.213839667525114e-07, "loss": 0.3683, "step": 11823 }, { "epoch": 0.2055311234333988, "grad_norm": 1.8300975979214555, "learning_rate": 9.213688139420517e-07, "loss": 0.4791, "step": 11824 }, { "epoch": 0.20554850597090163, "grad_norm": 2.655365582413276, "learning_rate": 9.213536597960466e-07, "loss": 0.4843, "step": 11825 }, { "epoch": 0.20556588850840446, "grad_norm": 2.321445429405742, "learning_rate": 9.213385043145438e-07, "loss": 0.4676, "step": 11826 }, { "epoch": 0.2055832710459073, "grad_norm": 1.2744501012827036, "learning_rate": 9.213233474975915e-07, "loss": 0.6235, "step": 11827 }, { "epoch": 0.2056006535834101, "grad_norm": 1.9297744771865988, "learning_rate": 9.213081893452376e-07, "loss": 0.3879, "step": 11828 }, { "epoch": 0.20561803612091292, "grad_norm": 1.982422552034946, "learning_rate": 9.212930298575305e-07, "loss": 0.4898, "step": 11829 }, { "epoch": 0.20563541865841575, "grad_norm": 1.8303705438442879, "learning_rate": 9.21277869034518e-07, "loss": 0.3179, "step": 11830 }, { "epoch": 0.20565280119591858, "grad_norm": 1.936040698287697, "learning_rate": 9.212627068762482e-07, "loss": 0.3907, "step": 11831 }, { "epoch": 0.2056701837334214, "grad_norm": 1.5521180577436782, "learning_rate": 9.21247543382769e-07, "loss": 0.3501, "step": 11832 }, { "epoch": 0.20568756627092422, "grad_norm": 2.033043814480901, "learning_rate": 9.212323785541287e-07, "loss": 0.4059, "step": 11833 }, { "epoch": 0.20570494880842705, "grad_norm": 1.960294882156807, "learning_rate": 9.212172123903752e-07, "loss": 0.5035, "step": 11834 }, { "epoch": 0.20572233134592988, "grad_norm": 1.9798222353124195, "learning_rate": 9.212020448915566e-07, "loss": 0.4056, "step": 11835 }, { "epoch": 0.2057397138834327, "grad_norm": 1.562937267693747, "learning_rate": 9.211868760577211e-07, "loss": 0.4775, "step": 11836 }, { "epoch": 0.20575709642093554, "grad_norm": 2.268921364475125, "learning_rate": 9.211717058889165e-07, "loss": 0.3565, "step": 11837 }, { "epoch": 0.20577447895843834, "grad_norm": 2.0408112557163958, "learning_rate": 9.211565343851912e-07, "loss": 0.2318, "step": 11838 }, { "epoch": 0.20579186149594117, "grad_norm": 1.6059104382241205, "learning_rate": 9.211413615465932e-07, "loss": 0.3647, "step": 11839 }, { "epoch": 0.205809244033444, "grad_norm": 2.363261507393515, "learning_rate": 9.211261873731704e-07, "loss": 0.5868, "step": 11840 }, { "epoch": 0.20582662657094683, "grad_norm": 3.3780878767569256, "learning_rate": 9.211110118649712e-07, "loss": 0.7831, "step": 11841 }, { "epoch": 0.20584400910844966, "grad_norm": 2.137838128586767, "learning_rate": 9.210958350220435e-07, "loss": 0.5798, "step": 11842 }, { "epoch": 0.20586139164595246, "grad_norm": 1.7352625425698827, "learning_rate": 9.210806568444353e-07, "loss": 0.3149, "step": 11843 }, { "epoch": 0.2058787741834553, "grad_norm": 1.2393571993212409, "learning_rate": 9.210654773321949e-07, "loss": 0.236, "step": 11844 }, { "epoch": 0.20589615672095812, "grad_norm": 1.7028745729882322, "learning_rate": 9.210502964853704e-07, "loss": 0.3196, "step": 11845 }, { "epoch": 0.20591353925846095, "grad_norm": 3.198712407567573, "learning_rate": 9.210351143040098e-07, "loss": 0.2994, "step": 11846 }, { "epoch": 0.20593092179596378, "grad_norm": 1.2168613506167683, "learning_rate": 9.210199307881614e-07, "loss": 0.2051, "step": 11847 }, { "epoch": 0.2059483043334666, "grad_norm": 1.6764619792062019, "learning_rate": 9.210047459378731e-07, "loss": 0.4131, "step": 11848 }, { "epoch": 0.20596568687096942, "grad_norm": 2.469059788871904, "learning_rate": 9.209895597531933e-07, "loss": 0.471, "step": 11849 }, { "epoch": 0.20598306940847225, "grad_norm": 2.427669042876339, "learning_rate": 9.209743722341698e-07, "loss": 0.3397, "step": 11850 }, { "epoch": 0.20600045194597508, "grad_norm": 1.319865416972593, "learning_rate": 9.209591833808512e-07, "loss": 0.2676, "step": 11851 }, { "epoch": 0.2060178344834779, "grad_norm": 1.4183340801432767, "learning_rate": 9.209439931932852e-07, "loss": 0.3971, "step": 11852 }, { "epoch": 0.2060352170209807, "grad_norm": 2.3590621226230986, "learning_rate": 9.2092880167152e-07, "loss": 0.3419, "step": 11853 }, { "epoch": 0.20605259955848354, "grad_norm": 1.6511723558417735, "learning_rate": 9.20913608815604e-07, "loss": 0.2395, "step": 11854 }, { "epoch": 0.20606998209598637, "grad_norm": 2.279930983105581, "learning_rate": 9.208984146255852e-07, "loss": 0.4287, "step": 11855 }, { "epoch": 0.2060873646334892, "grad_norm": 1.3486155008603633, "learning_rate": 9.208832191015117e-07, "loss": 0.3186, "step": 11856 }, { "epoch": 0.20610474717099203, "grad_norm": 1.3532951936042632, "learning_rate": 9.208680222434317e-07, "loss": 0.3082, "step": 11857 }, { "epoch": 0.20612212970849483, "grad_norm": 2.0043350495387267, "learning_rate": 9.208528240513935e-07, "loss": 0.3559, "step": 11858 }, { "epoch": 0.20613951224599766, "grad_norm": 1.7079020379454382, "learning_rate": 9.208376245254452e-07, "loss": 0.349, "step": 11859 }, { "epoch": 0.2061568947835005, "grad_norm": 1.5605798297764917, "learning_rate": 9.208224236656349e-07, "loss": 0.4092, "step": 11860 }, { "epoch": 0.20617427732100332, "grad_norm": 1.8345163934380848, "learning_rate": 9.208072214720109e-07, "loss": 0.4528, "step": 11861 }, { "epoch": 0.20619165985850615, "grad_norm": 3.92619659721222, "learning_rate": 9.207920179446212e-07, "loss": 0.3195, "step": 11862 }, { "epoch": 0.20620904239600896, "grad_norm": 1.159744009037652, "learning_rate": 9.207768130835141e-07, "loss": 0.3159, "step": 11863 }, { "epoch": 0.2062264249335118, "grad_norm": 1.1890257573760254, "learning_rate": 9.207616068887379e-07, "loss": 0.373, "step": 11864 }, { "epoch": 0.20624380747101462, "grad_norm": 1.1294103702064042, "learning_rate": 9.207463993603405e-07, "loss": 0.2912, "step": 11865 }, { "epoch": 0.20626119000851745, "grad_norm": 1.8760548926272624, "learning_rate": 9.207311904983704e-07, "loss": 0.4305, "step": 11866 }, { "epoch": 0.20627857254602028, "grad_norm": 1.3933700903060195, "learning_rate": 9.207159803028757e-07, "loss": 0.3798, "step": 11867 }, { "epoch": 0.20629595508352308, "grad_norm": 1.6322910145845002, "learning_rate": 9.207007687739045e-07, "loss": 0.2192, "step": 11868 }, { "epoch": 0.2063133376210259, "grad_norm": 2.377375356125986, "learning_rate": 9.206855559115052e-07, "loss": 0.417, "step": 11869 }, { "epoch": 0.20633072015852874, "grad_norm": 1.2160740805232122, "learning_rate": 9.20670341715726e-07, "loss": 0.3496, "step": 11870 }, { "epoch": 0.20634810269603157, "grad_norm": 1.5230848265531622, "learning_rate": 9.20655126186615e-07, "loss": 0.2496, "step": 11871 }, { "epoch": 0.2063654852335344, "grad_norm": 1.8209422756331672, "learning_rate": 9.206399093242206e-07, "loss": 0.3675, "step": 11872 }, { "epoch": 0.2063828677710372, "grad_norm": 1.6453791743894868, "learning_rate": 9.206246911285906e-07, "loss": 0.3411, "step": 11873 }, { "epoch": 0.20640025030854003, "grad_norm": 1.519751871053541, "learning_rate": 9.206094715997738e-07, "loss": 0.1946, "step": 11874 }, { "epoch": 0.20641763284604286, "grad_norm": 1.2359719875578314, "learning_rate": 9.205942507378181e-07, "loss": 0.2087, "step": 11875 }, { "epoch": 0.2064350153835457, "grad_norm": 2.112703536528976, "learning_rate": 9.205790285427719e-07, "loss": 0.5037, "step": 11876 }, { "epoch": 0.20645239792104852, "grad_norm": 2.0798399909394067, "learning_rate": 9.205638050146833e-07, "loss": 0.2544, "step": 11877 }, { "epoch": 0.20646978045855133, "grad_norm": 2.5150918547842798, "learning_rate": 9.205485801536008e-07, "loss": 0.4287, "step": 11878 }, { "epoch": 0.20648716299605416, "grad_norm": 1.6312703754433293, "learning_rate": 9.205333539595723e-07, "loss": 0.5084, "step": 11879 }, { "epoch": 0.206504545533557, "grad_norm": 1.7597231549366141, "learning_rate": 9.205181264326462e-07, "loss": 0.3108, "step": 11880 }, { "epoch": 0.20652192807105982, "grad_norm": 1.7404417433485813, "learning_rate": 9.20502897572871e-07, "loss": 0.3432, "step": 11881 }, { "epoch": 0.20653931060856265, "grad_norm": 1.2115661566946097, "learning_rate": 9.204876673802948e-07, "loss": 0.2927, "step": 11882 }, { "epoch": 0.20655669314606545, "grad_norm": 1.451259985501338, "learning_rate": 9.204724358549657e-07, "loss": 0.4039, "step": 11883 }, { "epoch": 0.20657407568356828, "grad_norm": 2.0723952554097194, "learning_rate": 9.204572029969321e-07, "loss": 0.3421, "step": 11884 }, { "epoch": 0.2065914582210711, "grad_norm": 1.6993871792147455, "learning_rate": 9.204419688062424e-07, "loss": 0.3827, "step": 11885 }, { "epoch": 0.20660884075857394, "grad_norm": 1.6633687403741535, "learning_rate": 9.204267332829447e-07, "loss": 0.4677, "step": 11886 }, { "epoch": 0.20662622329607677, "grad_norm": 2.1341156496587304, "learning_rate": 9.204114964270876e-07, "loss": 0.3464, "step": 11887 }, { "epoch": 0.20664360583357957, "grad_norm": 3.333497094198522, "learning_rate": 9.203962582387191e-07, "loss": 0.3386, "step": 11888 }, { "epoch": 0.2066609883710824, "grad_norm": 3.1251083708758833, "learning_rate": 9.203810187178874e-07, "loss": 0.3166, "step": 11889 }, { "epoch": 0.20667837090858523, "grad_norm": 1.9613965617685003, "learning_rate": 9.203657778646411e-07, "loss": 0.4907, "step": 11890 }, { "epoch": 0.20669575344608807, "grad_norm": 3.2203640527169224, "learning_rate": 9.203505356790284e-07, "loss": 0.5092, "step": 11891 }, { "epoch": 0.2067131359835909, "grad_norm": 1.299508992353201, "learning_rate": 9.203352921610976e-07, "loss": 0.3941, "step": 11892 }, { "epoch": 0.2067305185210937, "grad_norm": 5.831763617161027, "learning_rate": 9.20320047310897e-07, "loss": 0.348, "step": 11893 }, { "epoch": 0.20674790105859653, "grad_norm": 1.7709524129598924, "learning_rate": 9.203048011284748e-07, "loss": 0.4396, "step": 11894 }, { "epoch": 0.20676528359609936, "grad_norm": 1.1806941834624405, "learning_rate": 9.202895536138795e-07, "loss": 0.3475, "step": 11895 }, { "epoch": 0.2067826661336022, "grad_norm": 2.491753079044431, "learning_rate": 9.202743047671595e-07, "loss": 0.3631, "step": 11896 }, { "epoch": 0.20680004867110502, "grad_norm": 2.1224001333102285, "learning_rate": 9.20259054588363e-07, "loss": 0.3961, "step": 11897 }, { "epoch": 0.20681743120860782, "grad_norm": 1.7573345472103812, "learning_rate": 9.202438030775382e-07, "loss": 0.385, "step": 11898 }, { "epoch": 0.20683481374611065, "grad_norm": 2.764956444302965, "learning_rate": 9.202285502347338e-07, "loss": 0.4387, "step": 11899 }, { "epoch": 0.20685219628361348, "grad_norm": 2.1343058314974086, "learning_rate": 9.202132960599977e-07, "loss": 0.3638, "step": 11900 }, { "epoch": 0.2068695788211163, "grad_norm": 1.8130799069724872, "learning_rate": 9.201980405533787e-07, "loss": 0.342, "step": 11901 }, { "epoch": 0.20688696135861914, "grad_norm": 1.5205677274524687, "learning_rate": 9.201827837149247e-07, "loss": 0.3906, "step": 11902 }, { "epoch": 0.20690434389612194, "grad_norm": 2.2454802423929996, "learning_rate": 9.201675255446846e-07, "loss": 0.4103, "step": 11903 }, { "epoch": 0.20692172643362478, "grad_norm": 3.352978292285407, "learning_rate": 9.201522660427061e-07, "loss": 0.4383, "step": 11904 }, { "epoch": 0.2069391089711276, "grad_norm": 1.50842925748994, "learning_rate": 9.201370052090383e-07, "loss": 0.4101, "step": 11905 }, { "epoch": 0.20695649150863044, "grad_norm": 2.744346279410842, "learning_rate": 9.201217430437289e-07, "loss": 0.521, "step": 11906 }, { "epoch": 0.20697387404613327, "grad_norm": 1.7380836934452266, "learning_rate": 9.201064795468267e-07, "loss": 0.3306, "step": 11907 }, { "epoch": 0.20699125658363607, "grad_norm": 1.3802085035864218, "learning_rate": 9.2009121471838e-07, "loss": 0.2994, "step": 11908 }, { "epoch": 0.2070086391211389, "grad_norm": 1.9575537405477712, "learning_rate": 9.200759485584368e-07, "loss": 0.1863, "step": 11909 }, { "epoch": 0.20702602165864173, "grad_norm": 1.1147441192407888, "learning_rate": 9.200606810670461e-07, "loss": 0.2556, "step": 11910 }, { "epoch": 0.20704340419614456, "grad_norm": 1.7760142139387418, "learning_rate": 9.200454122442559e-07, "loss": 0.447, "step": 11911 }, { "epoch": 0.2070607867336474, "grad_norm": 4.322403036986584, "learning_rate": 9.200301420901147e-07, "loss": 0.244, "step": 11912 }, { "epoch": 0.2070781692711502, "grad_norm": 3.1291256936726253, "learning_rate": 9.20014870604671e-07, "loss": 0.3757, "step": 11913 }, { "epoch": 0.20709555180865302, "grad_norm": 1.835308788486742, "learning_rate": 9.199995977879729e-07, "loss": 0.2416, "step": 11914 }, { "epoch": 0.20711293434615585, "grad_norm": 1.4847592181592122, "learning_rate": 9.199843236400693e-07, "loss": 0.261, "step": 11915 }, { "epoch": 0.20713031688365868, "grad_norm": 2.5515503727489532, "learning_rate": 9.199690481610081e-07, "loss": 0.242, "step": 11916 }, { "epoch": 0.2071476994211615, "grad_norm": 2.0670576653949118, "learning_rate": 9.19953771350838e-07, "loss": 0.3363, "step": 11917 }, { "epoch": 0.20716508195866432, "grad_norm": 1.956792333004336, "learning_rate": 9.199384932096073e-07, "loss": 0.5567, "step": 11918 }, { "epoch": 0.20718246449616715, "grad_norm": 6.818146951841488, "learning_rate": 9.199232137373645e-07, "loss": 0.4066, "step": 11919 }, { "epoch": 0.20719984703366998, "grad_norm": 1.6117221406515863, "learning_rate": 9.199079329341581e-07, "loss": 0.3935, "step": 11920 }, { "epoch": 0.2072172295711728, "grad_norm": 2.800678670154188, "learning_rate": 9.198926508000363e-07, "loss": 0.6727, "step": 11921 }, { "epoch": 0.20723461210867564, "grad_norm": 1.8692397000190941, "learning_rate": 9.198773673350477e-07, "loss": 0.3223, "step": 11922 }, { "epoch": 0.20725199464617844, "grad_norm": 1.8599429073773783, "learning_rate": 9.198620825392408e-07, "loss": 0.2217, "step": 11923 }, { "epoch": 0.20726937718368127, "grad_norm": 1.4734337815731193, "learning_rate": 9.19846796412664e-07, "loss": 0.2423, "step": 11924 }, { "epoch": 0.2072867597211841, "grad_norm": 1.9934135282497558, "learning_rate": 9.198315089553656e-07, "loss": 0.2746, "step": 11925 }, { "epoch": 0.20730414225868693, "grad_norm": 1.3540613751592019, "learning_rate": 9.198162201673943e-07, "loss": 0.3511, "step": 11926 }, { "epoch": 0.20732152479618976, "grad_norm": 2.2666978324028135, "learning_rate": 9.198009300487984e-07, "loss": 0.5709, "step": 11927 }, { "epoch": 0.20733890733369256, "grad_norm": 1.7395191321062085, "learning_rate": 9.197856385996262e-07, "loss": 0.5503, "step": 11928 }, { "epoch": 0.2073562898711954, "grad_norm": 2.456105978250409, "learning_rate": 9.197703458199265e-07, "loss": 0.442, "step": 11929 }, { "epoch": 0.20737367240869822, "grad_norm": 1.5992944029642562, "learning_rate": 9.197550517097476e-07, "loss": 0.2719, "step": 11930 }, { "epoch": 0.20739105494620105, "grad_norm": 1.5457458258753332, "learning_rate": 9.19739756269138e-07, "loss": 0.2831, "step": 11931 }, { "epoch": 0.20740843748370388, "grad_norm": 2.298965998061912, "learning_rate": 9.197244594981464e-07, "loss": 0.3744, "step": 11932 }, { "epoch": 0.20742582002120669, "grad_norm": 3.5309154426994778, "learning_rate": 9.197091613968209e-07, "loss": 0.3283, "step": 11933 }, { "epoch": 0.20744320255870952, "grad_norm": 1.3974023873864754, "learning_rate": 9.196938619652101e-07, "loss": 0.3732, "step": 11934 }, { "epoch": 0.20746058509621235, "grad_norm": 1.9422233504010435, "learning_rate": 9.196785612033627e-07, "loss": 0.3731, "step": 11935 }, { "epoch": 0.20747796763371518, "grad_norm": 1.7056571243403418, "learning_rate": 9.196632591113269e-07, "loss": 0.3414, "step": 11936 }, { "epoch": 0.207495350171218, "grad_norm": 2.1857467535798856, "learning_rate": 9.196479556891514e-07, "loss": 0.4205, "step": 11937 }, { "epoch": 0.2075127327087208, "grad_norm": 1.9745411795627947, "learning_rate": 9.196326509368846e-07, "loss": 0.4293, "step": 11938 }, { "epoch": 0.20753011524622364, "grad_norm": 2.4600490235854386, "learning_rate": 9.196173448545751e-07, "loss": 0.3583, "step": 11939 }, { "epoch": 0.20754749778372647, "grad_norm": 1.1805114181383827, "learning_rate": 9.196020374422714e-07, "loss": 0.3195, "step": 11940 }, { "epoch": 0.2075648803212293, "grad_norm": 2.0237450072670815, "learning_rate": 9.19586728700022e-07, "loss": 0.327, "step": 11941 }, { "epoch": 0.20758226285873213, "grad_norm": 3.905654411387628, "learning_rate": 9.195714186278754e-07, "loss": 0.6254, "step": 11942 }, { "epoch": 0.20759964539623493, "grad_norm": 3.1811743381832414, "learning_rate": 9.1955610722588e-07, "loss": 0.4204, "step": 11943 }, { "epoch": 0.20761702793373776, "grad_norm": 2.40858033130438, "learning_rate": 9.195407944940846e-07, "loss": 0.4411, "step": 11944 }, { "epoch": 0.2076344104712406, "grad_norm": 1.96276042394811, "learning_rate": 9.195254804325376e-07, "loss": 0.2756, "step": 11945 }, { "epoch": 0.20765179300874342, "grad_norm": 1.643907586898494, "learning_rate": 9.195101650412875e-07, "loss": 0.2374, "step": 11946 }, { "epoch": 0.20766917554624625, "grad_norm": 2.1172891083597998, "learning_rate": 9.19494848320383e-07, "loss": 0.5879, "step": 11947 }, { "epoch": 0.20768655808374906, "grad_norm": 5.38060707740061, "learning_rate": 9.194795302698723e-07, "loss": 0.6673, "step": 11948 }, { "epoch": 0.2077039406212519, "grad_norm": 1.249465890310051, "learning_rate": 9.194642108898045e-07, "loss": 0.2683, "step": 11949 }, { "epoch": 0.20772132315875472, "grad_norm": 1.2332306057908957, "learning_rate": 9.194488901802277e-07, "loss": 0.3955, "step": 11950 }, { "epoch": 0.20773870569625755, "grad_norm": 2.762268256263949, "learning_rate": 9.194335681411905e-07, "loss": 0.519, "step": 11951 }, { "epoch": 0.20775608823376038, "grad_norm": 1.5815792045855113, "learning_rate": 9.194182447727416e-07, "loss": 0.3893, "step": 11952 }, { "epoch": 0.20777347077126318, "grad_norm": 1.6702663917677005, "learning_rate": 9.194029200749296e-07, "loss": 0.3885, "step": 11953 }, { "epoch": 0.207790853308766, "grad_norm": 1.818421094411038, "learning_rate": 9.193875940478029e-07, "loss": 0.3944, "step": 11954 }, { "epoch": 0.20780823584626884, "grad_norm": 1.9088192506295276, "learning_rate": 9.193722666914104e-07, "loss": 0.4253, "step": 11955 }, { "epoch": 0.20782561838377167, "grad_norm": 2.3627431357637185, "learning_rate": 9.193569380058002e-07, "loss": 0.4711, "step": 11956 }, { "epoch": 0.2078430009212745, "grad_norm": 1.4878911677463715, "learning_rate": 9.193416079910212e-07, "loss": 0.3821, "step": 11957 }, { "epoch": 0.2078603834587773, "grad_norm": 2.1167507906277567, "learning_rate": 9.193262766471219e-07, "loss": 0.3987, "step": 11958 }, { "epoch": 0.20787776599628013, "grad_norm": 1.2150848499084357, "learning_rate": 9.193109439741511e-07, "loss": 0.4832, "step": 11959 }, { "epoch": 0.20789514853378296, "grad_norm": 1.8901128586992055, "learning_rate": 9.192956099721571e-07, "loss": 0.1837, "step": 11960 }, { "epoch": 0.2079125310712858, "grad_norm": 2.027910779980463, "learning_rate": 9.192802746411885e-07, "loss": 0.5157, "step": 11961 }, { "epoch": 0.20792991360878862, "grad_norm": 2.19330421160669, "learning_rate": 9.192649379812942e-07, "loss": 0.3893, "step": 11962 }, { "epoch": 0.20794729614629143, "grad_norm": 2.5410479102261263, "learning_rate": 9.192495999925227e-07, "loss": 0.6659, "step": 11963 }, { "epoch": 0.20796467868379426, "grad_norm": 1.886232800419757, "learning_rate": 9.192342606749224e-07, "loss": 0.3022, "step": 11964 }, { "epoch": 0.2079820612212971, "grad_norm": 1.8716177083875003, "learning_rate": 9.192189200285423e-07, "loss": 0.2647, "step": 11965 }, { "epoch": 0.20799944375879992, "grad_norm": 2.1111827678020894, "learning_rate": 9.192035780534306e-07, "loss": 0.4036, "step": 11966 }, { "epoch": 0.20801682629630275, "grad_norm": 1.3179789888725835, "learning_rate": 9.191882347496361e-07, "loss": 0.4035, "step": 11967 }, { "epoch": 0.20803420883380555, "grad_norm": 1.3965973798735771, "learning_rate": 9.191728901172076e-07, "loss": 0.3063, "step": 11968 }, { "epoch": 0.20805159137130838, "grad_norm": 3.262362528323739, "learning_rate": 9.191575441561934e-07, "loss": 0.3508, "step": 11969 }, { "epoch": 0.2080689739088112, "grad_norm": 1.6165348823879246, "learning_rate": 9.191421968666425e-07, "loss": 0.4396, "step": 11970 }, { "epoch": 0.20808635644631404, "grad_norm": 2.7281047191734222, "learning_rate": 9.191268482486033e-07, "loss": 0.438, "step": 11971 }, { "epoch": 0.20810373898381684, "grad_norm": 2.2601574327429557, "learning_rate": 9.191114983021245e-07, "loss": 0.2921, "step": 11972 }, { "epoch": 0.20812112152131967, "grad_norm": 2.0494030710740385, "learning_rate": 9.190961470272549e-07, "loss": 0.4726, "step": 11973 }, { "epoch": 0.2081385040588225, "grad_norm": 2.184706073890256, "learning_rate": 9.190807944240428e-07, "loss": 0.4093, "step": 11974 }, { "epoch": 0.20815588659632533, "grad_norm": 1.4682258003671225, "learning_rate": 9.190654404925373e-07, "loss": 0.3192, "step": 11975 }, { "epoch": 0.20817326913382816, "grad_norm": 3.0007834101063398, "learning_rate": 9.190500852327868e-07, "loss": 0.3556, "step": 11976 }, { "epoch": 0.20819065167133097, "grad_norm": 2.3503711385683648, "learning_rate": 9.190347286448401e-07, "loss": 0.3968, "step": 11977 }, { "epoch": 0.2082080342088338, "grad_norm": 2.818657476870955, "learning_rate": 9.190193707287457e-07, "loss": 0.4428, "step": 11978 }, { "epoch": 0.20822541674633663, "grad_norm": 1.875315511734954, "learning_rate": 9.190040114845523e-07, "loss": 0.373, "step": 11979 }, { "epoch": 0.20824279928383946, "grad_norm": 2.0900215852493327, "learning_rate": 9.189886509123088e-07, "loss": 0.3504, "step": 11980 }, { "epoch": 0.2082601818213423, "grad_norm": 1.6907442671777182, "learning_rate": 9.189732890120638e-07, "loss": 0.4336, "step": 11981 }, { "epoch": 0.2082775643588451, "grad_norm": 1.4995658140485684, "learning_rate": 9.189579257838657e-07, "loss": 0.1375, "step": 11982 }, { "epoch": 0.20829494689634792, "grad_norm": 1.6672773926328526, "learning_rate": 9.189425612277636e-07, "loss": 0.4588, "step": 11983 }, { "epoch": 0.20831232943385075, "grad_norm": 1.6377498567379196, "learning_rate": 9.18927195343806e-07, "loss": 0.3321, "step": 11984 }, { "epoch": 0.20832971197135358, "grad_norm": 4.27070141748533, "learning_rate": 9.189118281320416e-07, "loss": 0.3457, "step": 11985 }, { "epoch": 0.2083470945088564, "grad_norm": 2.2128996810119426, "learning_rate": 9.188964595925192e-07, "loss": 0.4322, "step": 11986 }, { "epoch": 0.20836447704635921, "grad_norm": 2.0583515942833603, "learning_rate": 9.188810897252873e-07, "loss": 0.5558, "step": 11987 }, { "epoch": 0.20838185958386204, "grad_norm": 1.685492612452605, "learning_rate": 9.188657185303949e-07, "loss": 0.3327, "step": 11988 }, { "epoch": 0.20839924212136487, "grad_norm": 2.613865612638595, "learning_rate": 9.188503460078906e-07, "loss": 0.456, "step": 11989 }, { "epoch": 0.2084166246588677, "grad_norm": 2.0059794944038822, "learning_rate": 9.18834972157823e-07, "loss": 0.3276, "step": 11990 }, { "epoch": 0.20843400719637054, "grad_norm": 1.9902141978271408, "learning_rate": 9.18819596980241e-07, "loss": 0.3048, "step": 11991 }, { "epoch": 0.20845138973387334, "grad_norm": 1.520708170996301, "learning_rate": 9.188042204751934e-07, "loss": 0.4322, "step": 11992 }, { "epoch": 0.20846877227137617, "grad_norm": 2.9279577771427716, "learning_rate": 9.187888426427286e-07, "loss": 0.3486, "step": 11993 }, { "epoch": 0.208486154808879, "grad_norm": 2.0492140364297544, "learning_rate": 9.187734634828956e-07, "loss": 0.633, "step": 11994 }, { "epoch": 0.20850353734638183, "grad_norm": 1.678890198389439, "learning_rate": 9.187580829957431e-07, "loss": 0.5442, "step": 11995 }, { "epoch": 0.20852091988388466, "grad_norm": 2.0931084030118, "learning_rate": 9.187427011813198e-07, "loss": 0.5468, "step": 11996 }, { "epoch": 0.20853830242138746, "grad_norm": 2.5203687686066987, "learning_rate": 9.187273180396745e-07, "loss": 0.3536, "step": 11997 }, { "epoch": 0.2085556849588903, "grad_norm": 1.781312083688103, "learning_rate": 9.187119335708559e-07, "loss": 0.4743, "step": 11998 }, { "epoch": 0.20857306749639312, "grad_norm": 2.1988134535363475, "learning_rate": 9.18696547774913e-07, "loss": 0.3714, "step": 11999 }, { "epoch": 0.20859045003389595, "grad_norm": 4.924610439002051, "learning_rate": 9.186811606518943e-07, "loss": 0.5081, "step": 12000 }, { "epoch": 0.20860783257139878, "grad_norm": 2.1334481752184353, "learning_rate": 9.186657722018485e-07, "loss": 0.6461, "step": 12001 }, { "epoch": 0.20862521510890158, "grad_norm": 1.0836614016203725, "learning_rate": 9.186503824248246e-07, "loss": 0.4672, "step": 12002 }, { "epoch": 0.20864259764640442, "grad_norm": 1.7221720528648305, "learning_rate": 9.186349913208714e-07, "loss": 0.2552, "step": 12003 }, { "epoch": 0.20865998018390725, "grad_norm": 1.8062786844540515, "learning_rate": 9.186195988900376e-07, "loss": 0.366, "step": 12004 }, { "epoch": 0.20867736272141008, "grad_norm": 1.688155687601391, "learning_rate": 9.186042051323718e-07, "loss": 0.3517, "step": 12005 }, { "epoch": 0.2086947452589129, "grad_norm": 1.6518460321946384, "learning_rate": 9.18588810047923e-07, "loss": 0.4454, "step": 12006 }, { "epoch": 0.2087121277964157, "grad_norm": 2.067377650341737, "learning_rate": 9.1857341363674e-07, "loss": 0.2123, "step": 12007 }, { "epoch": 0.20872951033391854, "grad_norm": 1.7024504170683648, "learning_rate": 9.185580158988717e-07, "loss": 0.2324, "step": 12008 }, { "epoch": 0.20874689287142137, "grad_norm": 1.3904129823963916, "learning_rate": 9.185426168343666e-07, "loss": 0.3454, "step": 12009 }, { "epoch": 0.2087642754089242, "grad_norm": 1.8213269216370938, "learning_rate": 9.185272164432735e-07, "loss": 0.4161, "step": 12010 }, { "epoch": 0.20878165794642703, "grad_norm": 2.369382730824609, "learning_rate": 9.185118147256416e-07, "loss": 0.7164, "step": 12011 }, { "epoch": 0.20879904048392983, "grad_norm": 2.274989019624182, "learning_rate": 9.184964116815195e-07, "loss": 0.5149, "step": 12012 }, { "epoch": 0.20881642302143266, "grad_norm": 1.5412828301310193, "learning_rate": 9.184810073109562e-07, "loss": 0.344, "step": 12013 }, { "epoch": 0.2088338055589355, "grad_norm": 1.7026801362108854, "learning_rate": 9.18465601614e-07, "loss": 0.4761, "step": 12014 }, { "epoch": 0.20885118809643832, "grad_norm": 1.5947009152310434, "learning_rate": 9.184501945907002e-07, "loss": 0.3045, "step": 12015 }, { "epoch": 0.20886857063394115, "grad_norm": 1.5907473995713788, "learning_rate": 9.184347862411054e-07, "loss": 0.3124, "step": 12016 }, { "epoch": 0.20888595317144396, "grad_norm": 1.5976742964358135, "learning_rate": 9.184193765652647e-07, "loss": 0.4003, "step": 12017 }, { "epoch": 0.20890333570894679, "grad_norm": 1.6454010691704646, "learning_rate": 9.184039655632268e-07, "loss": 0.3784, "step": 12018 }, { "epoch": 0.20892071824644962, "grad_norm": 1.3145652629242144, "learning_rate": 9.183885532350404e-07, "loss": 0.4219, "step": 12019 }, { "epoch": 0.20893810078395245, "grad_norm": 1.4654986794201286, "learning_rate": 9.183731395807544e-07, "loss": 0.2842, "step": 12020 }, { "epoch": 0.20895548332145528, "grad_norm": 1.527220957115308, "learning_rate": 9.18357724600418e-07, "loss": 0.3133, "step": 12021 }, { "epoch": 0.20897286585895808, "grad_norm": 2.0374473409106733, "learning_rate": 9.183423082940796e-07, "loss": 0.5616, "step": 12022 }, { "epoch": 0.2089902483964609, "grad_norm": 1.8434634179733167, "learning_rate": 9.183268906617882e-07, "loss": 0.4086, "step": 12023 }, { "epoch": 0.20900763093396374, "grad_norm": 1.3169067380712478, "learning_rate": 9.18311471703593e-07, "loss": 0.3124, "step": 12024 }, { "epoch": 0.20902501347146657, "grad_norm": 1.3715712432350946, "learning_rate": 9.182960514195423e-07, "loss": 0.3329, "step": 12025 }, { "epoch": 0.2090423960089694, "grad_norm": 2.0204298172943354, "learning_rate": 9.182806298096854e-07, "loss": 0.5258, "step": 12026 }, { "epoch": 0.2090597785464722, "grad_norm": 1.8173126867558194, "learning_rate": 9.18265206874071e-07, "loss": 0.4131, "step": 12027 }, { "epoch": 0.20907716108397503, "grad_norm": 2.2770103892475957, "learning_rate": 9.18249782612748e-07, "loss": 0.6973, "step": 12028 }, { "epoch": 0.20909454362147786, "grad_norm": 1.935361233655293, "learning_rate": 9.182343570257653e-07, "loss": 0.5289, "step": 12029 }, { "epoch": 0.2091119261589807, "grad_norm": 1.9122982728149072, "learning_rate": 9.18218930113172e-07, "loss": 1.0348, "step": 12030 }, { "epoch": 0.20912930869648352, "grad_norm": 1.7724701232119064, "learning_rate": 9.182035018750167e-07, "loss": 0.3644, "step": 12031 }, { "epoch": 0.20914669123398633, "grad_norm": 1.518001953349554, "learning_rate": 9.181880723113484e-07, "loss": 0.2643, "step": 12032 }, { "epoch": 0.20916407377148916, "grad_norm": 1.8582409573266383, "learning_rate": 9.18172641422216e-07, "loss": 0.4642, "step": 12033 }, { "epoch": 0.209181456308992, "grad_norm": 1.7011461693944716, "learning_rate": 9.181572092076684e-07, "loss": 0.4314, "step": 12034 }, { "epoch": 0.20919883884649482, "grad_norm": 2.1717882803527733, "learning_rate": 9.181417756677545e-07, "loss": 0.322, "step": 12035 }, { "epoch": 0.20921622138399765, "grad_norm": 3.538800904837884, "learning_rate": 9.181263408025233e-07, "loss": 0.3069, "step": 12036 }, { "epoch": 0.20923360392150045, "grad_norm": 2.190839209536185, "learning_rate": 9.181109046120237e-07, "loss": 0.4308, "step": 12037 }, { "epoch": 0.20925098645900328, "grad_norm": 1.741059521071056, "learning_rate": 9.180954670963046e-07, "loss": 0.1895, "step": 12038 }, { "epoch": 0.2092683689965061, "grad_norm": 3.0561516371528645, "learning_rate": 9.180800282554149e-07, "loss": 0.3565, "step": 12039 }, { "epoch": 0.20928575153400894, "grad_norm": 1.5010178100124583, "learning_rate": 9.180645880894036e-07, "loss": 0.1863, "step": 12040 }, { "epoch": 0.20930313407151177, "grad_norm": 1.988570363612794, "learning_rate": 9.180491465983195e-07, "loss": 0.4702, "step": 12041 }, { "epoch": 0.20932051660901457, "grad_norm": 1.6614143725817794, "learning_rate": 9.180337037822116e-07, "loss": 0.271, "step": 12042 }, { "epoch": 0.2093378991465174, "grad_norm": 2.602919531675777, "learning_rate": 9.180182596411291e-07, "loss": 0.4351, "step": 12043 }, { "epoch": 0.20935528168402023, "grad_norm": 1.7188330885633252, "learning_rate": 9.180028141751205e-07, "loss": 0.2415, "step": 12044 }, { "epoch": 0.20937266422152306, "grad_norm": 2.1274659475326856, "learning_rate": 9.179873673842352e-07, "loss": 0.4943, "step": 12045 }, { "epoch": 0.2093900467590259, "grad_norm": 2.0291722539606365, "learning_rate": 9.179719192685219e-07, "loss": 0.4788, "step": 12046 }, { "epoch": 0.2094074292965287, "grad_norm": 2.4873230733744687, "learning_rate": 9.179564698280295e-07, "loss": 0.3016, "step": 12047 }, { "epoch": 0.20942481183403153, "grad_norm": 2.2556805347833793, "learning_rate": 9.179410190628074e-07, "loss": 0.5473, "step": 12048 }, { "epoch": 0.20944219437153436, "grad_norm": 1.584305666266178, "learning_rate": 9.179255669729039e-07, "loss": 0.4302, "step": 12049 }, { "epoch": 0.2094595769090372, "grad_norm": 2.200456317354315, "learning_rate": 9.179101135583684e-07, "loss": 0.4118, "step": 12050 }, { "epoch": 0.20947695944654002, "grad_norm": 2.1595705026246814, "learning_rate": 9.1789465881925e-07, "loss": 0.3183, "step": 12051 }, { "epoch": 0.20949434198404282, "grad_norm": 1.9254983138758928, "learning_rate": 9.178792027555973e-07, "loss": 0.2931, "step": 12052 }, { "epoch": 0.20951172452154565, "grad_norm": 1.228277367157787, "learning_rate": 9.178637453674595e-07, "loss": 0.1915, "step": 12053 }, { "epoch": 0.20952910705904848, "grad_norm": 2.247409820469615, "learning_rate": 9.178482866548857e-07, "loss": 0.5797, "step": 12054 }, { "epoch": 0.2095464895965513, "grad_norm": 1.3437395241266996, "learning_rate": 9.178328266179247e-07, "loss": 0.2286, "step": 12055 }, { "epoch": 0.20956387213405414, "grad_norm": 2.2740981397921165, "learning_rate": 9.178173652566257e-07, "loss": 0.2718, "step": 12056 }, { "epoch": 0.20958125467155694, "grad_norm": 1.8996547842997826, "learning_rate": 9.178019025710373e-07, "loss": 0.356, "step": 12057 }, { "epoch": 0.20959863720905977, "grad_norm": 2.185902354840472, "learning_rate": 9.17786438561209e-07, "loss": 0.5178, "step": 12058 }, { "epoch": 0.2096160197465626, "grad_norm": 1.7273819365185954, "learning_rate": 9.177709732271897e-07, "loss": 0.396, "step": 12059 }, { "epoch": 0.20963340228406543, "grad_norm": 1.2389598423431423, "learning_rate": 9.177555065690281e-07, "loss": 0.2105, "step": 12060 }, { "epoch": 0.20965078482156826, "grad_norm": 2.290483690544432, "learning_rate": 9.177400385867735e-07, "loss": 0.2379, "step": 12061 }, { "epoch": 0.20966816735907107, "grad_norm": 1.7944524117621758, "learning_rate": 9.177245692804749e-07, "loss": 0.2362, "step": 12062 }, { "epoch": 0.2096855498965739, "grad_norm": 2.4234890198659262, "learning_rate": 9.177090986501813e-07, "loss": 0.4419, "step": 12063 }, { "epoch": 0.20970293243407673, "grad_norm": 1.8436193198117206, "learning_rate": 9.176936266959417e-07, "loss": 0.2752, "step": 12064 }, { "epoch": 0.20972031497157956, "grad_norm": 1.9949805285279385, "learning_rate": 9.176781534178053e-07, "loss": 0.2885, "step": 12065 }, { "epoch": 0.2097376975090824, "grad_norm": 1.4254752964802178, "learning_rate": 9.176626788158209e-07, "loss": 0.219, "step": 12066 }, { "epoch": 0.2097550800465852, "grad_norm": 1.6751339625559554, "learning_rate": 9.176472028900377e-07, "loss": 0.3558, "step": 12067 }, { "epoch": 0.20977246258408802, "grad_norm": 1.2154164475528157, "learning_rate": 9.176317256405047e-07, "loss": 0.2442, "step": 12068 }, { "epoch": 0.20978984512159085, "grad_norm": 2.802177420956646, "learning_rate": 9.17616247067271e-07, "loss": 0.2197, "step": 12069 }, { "epoch": 0.20980722765909368, "grad_norm": 3.7429863758447803, "learning_rate": 9.176007671703856e-07, "loss": 0.6475, "step": 12070 }, { "epoch": 0.2098246101965965, "grad_norm": 1.5939850958541697, "learning_rate": 9.175852859498977e-07, "loss": 0.4075, "step": 12071 }, { "epoch": 0.20984199273409931, "grad_norm": 1.708501110265628, "learning_rate": 9.175698034058561e-07, "loss": 0.4276, "step": 12072 }, { "epoch": 0.20985937527160214, "grad_norm": 2.1783727123661674, "learning_rate": 9.175543195383102e-07, "loss": 0.363, "step": 12073 }, { "epoch": 0.20987675780910497, "grad_norm": 1.5838919660562434, "learning_rate": 9.175388343473089e-07, "loss": 0.246, "step": 12074 }, { "epoch": 0.2098941403466078, "grad_norm": 1.5175391848772766, "learning_rate": 9.175233478329011e-07, "loss": 0.2511, "step": 12075 }, { "epoch": 0.20991152288411064, "grad_norm": 1.6574273671435218, "learning_rate": 9.175078599951363e-07, "loss": 0.3282, "step": 12076 }, { "epoch": 0.20992890542161344, "grad_norm": 2.6451243746976276, "learning_rate": 9.174923708340634e-07, "loss": 0.2803, "step": 12077 }, { "epoch": 0.20994628795911627, "grad_norm": 1.411456142718125, "learning_rate": 9.174768803497312e-07, "loss": 0.4236, "step": 12078 }, { "epoch": 0.2099636704966191, "grad_norm": 3.7252740913445432, "learning_rate": 9.174613885421893e-07, "loss": 0.4013, "step": 12079 }, { "epoch": 0.20998105303412193, "grad_norm": 1.245492514397446, "learning_rate": 9.174458954114865e-07, "loss": 0.3957, "step": 12080 }, { "epoch": 0.20999843557162476, "grad_norm": 2.4402922878265576, "learning_rate": 9.17430400957672e-07, "loss": 0.4281, "step": 12081 }, { "epoch": 0.21001581810912756, "grad_norm": 2.312421489966282, "learning_rate": 9.174149051807949e-07, "loss": 0.6169, "step": 12082 }, { "epoch": 0.2100332006466304, "grad_norm": 1.3340130350755373, "learning_rate": 9.173994080809042e-07, "loss": 0.3515, "step": 12083 }, { "epoch": 0.21005058318413322, "grad_norm": 2.4130549222939126, "learning_rate": 9.173839096580492e-07, "loss": 0.5314, "step": 12084 }, { "epoch": 0.21006796572163605, "grad_norm": 1.8805028852886445, "learning_rate": 9.173684099122788e-07, "loss": 0.3838, "step": 12085 }, { "epoch": 0.21008534825913888, "grad_norm": 1.8267817927152141, "learning_rate": 9.173529088436423e-07, "loss": 0.545, "step": 12086 }, { "epoch": 0.21010273079664168, "grad_norm": 1.8601553894791827, "learning_rate": 9.173374064521889e-07, "loss": 0.3156, "step": 12087 }, { "epoch": 0.21012011333414451, "grad_norm": 1.5426614898268443, "learning_rate": 9.173219027379676e-07, "loss": 0.1967, "step": 12088 }, { "epoch": 0.21013749587164735, "grad_norm": 1.6182565918175016, "learning_rate": 9.173063977010276e-07, "loss": 0.4359, "step": 12089 }, { "epoch": 0.21015487840915018, "grad_norm": 1.880838168526304, "learning_rate": 9.172908913414179e-07, "loss": 0.4133, "step": 12090 }, { "epoch": 0.210172260946653, "grad_norm": 2.3459879765692104, "learning_rate": 9.172753836591878e-07, "loss": 0.5801, "step": 12091 }, { "epoch": 0.2101896434841558, "grad_norm": 2.0845665381704825, "learning_rate": 9.172598746543864e-07, "loss": 0.1654, "step": 12092 }, { "epoch": 0.21020702602165864, "grad_norm": 1.976553698485713, "learning_rate": 9.17244364327063e-07, "loss": 0.5536, "step": 12093 }, { "epoch": 0.21022440855916147, "grad_norm": 1.6918706728579116, "learning_rate": 9.172288526772666e-07, "loss": 0.435, "step": 12094 }, { "epoch": 0.2102417910966643, "grad_norm": 1.610953900072068, "learning_rate": 9.172133397050463e-07, "loss": 0.3657, "step": 12095 }, { "epoch": 0.21025917363416713, "grad_norm": 3.4824770573597883, "learning_rate": 9.171978254104513e-07, "loss": 0.4366, "step": 12096 }, { "epoch": 0.21027655617166993, "grad_norm": 1.5733700057765523, "learning_rate": 9.17182309793531e-07, "loss": 0.3284, "step": 12097 }, { "epoch": 0.21029393870917276, "grad_norm": 1.262418081745454, "learning_rate": 9.171667928543344e-07, "loss": 0.4817, "step": 12098 }, { "epoch": 0.2103113212466756, "grad_norm": 1.394799156549783, "learning_rate": 9.171512745929106e-07, "loss": 0.3331, "step": 12099 }, { "epoch": 0.21032870378417842, "grad_norm": 1.1824782008346384, "learning_rate": 9.171357550093091e-07, "loss": 0.2873, "step": 12100 }, { "epoch": 0.21034608632168125, "grad_norm": 2.252024887878078, "learning_rate": 9.171202341035787e-07, "loss": 0.2824, "step": 12101 }, { "epoch": 0.21036346885918406, "grad_norm": 1.9520287386632746, "learning_rate": 9.171047118757687e-07, "loss": 0.2658, "step": 12102 }, { "epoch": 0.21038085139668689, "grad_norm": 0.7119415234951458, "learning_rate": 9.170891883259284e-07, "loss": 0.2687, "step": 12103 }, { "epoch": 0.21039823393418972, "grad_norm": 1.9092216075142305, "learning_rate": 9.17073663454107e-07, "loss": 0.4347, "step": 12104 }, { "epoch": 0.21041561647169255, "grad_norm": 1.6038713575210122, "learning_rate": 9.170581372603537e-07, "loss": 0.2939, "step": 12105 }, { "epoch": 0.21043299900919538, "grad_norm": 2.4835892247912708, "learning_rate": 9.170426097447177e-07, "loss": 0.5784, "step": 12106 }, { "epoch": 0.21045038154669818, "grad_norm": 2.6592432641615495, "learning_rate": 9.170270809072483e-07, "loss": 0.3133, "step": 12107 }, { "epoch": 0.210467764084201, "grad_norm": 1.8513090381199047, "learning_rate": 9.170115507479943e-07, "loss": 0.4127, "step": 12108 }, { "epoch": 0.21048514662170384, "grad_norm": 1.3276969477443556, "learning_rate": 9.169960192670055e-07, "loss": 0.4007, "step": 12109 }, { "epoch": 0.21050252915920667, "grad_norm": 1.816990060439758, "learning_rate": 9.169804864643308e-07, "loss": 0.2995, "step": 12110 }, { "epoch": 0.21051991169670947, "grad_norm": 1.6755760632860122, "learning_rate": 9.169649523400196e-07, "loss": 0.3795, "step": 12111 }, { "epoch": 0.2105372942342123, "grad_norm": 1.484086844126674, "learning_rate": 9.169494168941208e-07, "loss": 0.3531, "step": 12112 }, { "epoch": 0.21055467677171513, "grad_norm": 1.8321877040176595, "learning_rate": 9.169338801266842e-07, "loss": 0.2654, "step": 12113 }, { "epoch": 0.21057205930921796, "grad_norm": 2.078647664958289, "learning_rate": 9.169183420377584e-07, "loss": 0.6421, "step": 12114 }, { "epoch": 0.2105894418467208, "grad_norm": 1.1865712582336683, "learning_rate": 9.169028026273931e-07, "loss": 0.3357, "step": 12115 }, { "epoch": 0.2106068243842236, "grad_norm": 1.943716786529067, "learning_rate": 9.168872618956376e-07, "loss": 0.3501, "step": 12116 }, { "epoch": 0.21062420692172643, "grad_norm": 1.8841544936059678, "learning_rate": 9.168717198425409e-07, "loss": 0.4393, "step": 12117 }, { "epoch": 0.21064158945922926, "grad_norm": 2.0982870006657963, "learning_rate": 9.168561764681522e-07, "loss": 0.3015, "step": 12118 }, { "epoch": 0.2106589719967321, "grad_norm": 1.472458248325469, "learning_rate": 9.16840631772521e-07, "loss": 0.2314, "step": 12119 }, { "epoch": 0.21067635453423492, "grad_norm": 2.3342830153769585, "learning_rate": 9.168250857556964e-07, "loss": 0.3886, "step": 12120 }, { "epoch": 0.21069373707173772, "grad_norm": 1.7726821419930892, "learning_rate": 9.168095384177279e-07, "loss": 0.4099, "step": 12121 }, { "epoch": 0.21071111960924055, "grad_norm": 1.480267905516409, "learning_rate": 9.167939897586645e-07, "loss": 0.371, "step": 12122 }, { "epoch": 0.21072850214674338, "grad_norm": 3.7716767902217114, "learning_rate": 9.167784397785556e-07, "loss": 0.619, "step": 12123 }, { "epoch": 0.2107458846842462, "grad_norm": 1.7487114587749872, "learning_rate": 9.167628884774506e-07, "loss": 0.2949, "step": 12124 }, { "epoch": 0.21076326722174904, "grad_norm": 1.175194311252089, "learning_rate": 9.167473358553987e-07, "loss": 0.2303, "step": 12125 }, { "epoch": 0.21078064975925184, "grad_norm": 1.7203700819267729, "learning_rate": 9.167317819124491e-07, "loss": 0.4615, "step": 12126 }, { "epoch": 0.21079803229675467, "grad_norm": 3.6130739562248086, "learning_rate": 9.167162266486512e-07, "loss": 0.3815, "step": 12127 }, { "epoch": 0.2108154148342575, "grad_norm": 1.3896313225607733, "learning_rate": 9.167006700640543e-07, "loss": 0.2384, "step": 12128 }, { "epoch": 0.21083279737176033, "grad_norm": 1.7318259433792629, "learning_rate": 9.166851121587075e-07, "loss": 0.2801, "step": 12129 }, { "epoch": 0.21085017990926316, "grad_norm": 1.8896833494746628, "learning_rate": 9.166695529326607e-07, "loss": 0.4085, "step": 12130 }, { "epoch": 0.21086756244676597, "grad_norm": 1.6519797558126148, "learning_rate": 9.166539923859625e-07, "loss": 0.2874, "step": 12131 }, { "epoch": 0.2108849449842688, "grad_norm": 1.712833031693462, "learning_rate": 9.166384305186627e-07, "loss": 0.2527, "step": 12132 }, { "epoch": 0.21090232752177163, "grad_norm": 3.3101004266067835, "learning_rate": 9.166228673308104e-07, "loss": 0.5195, "step": 12133 }, { "epoch": 0.21091971005927446, "grad_norm": 1.158226507476904, "learning_rate": 9.16607302822455e-07, "loss": 0.2843, "step": 12134 }, { "epoch": 0.2109370925967773, "grad_norm": 2.899585501065728, "learning_rate": 9.165917369936459e-07, "loss": 0.4066, "step": 12135 }, { "epoch": 0.2109544751342801, "grad_norm": 1.9108706498252728, "learning_rate": 9.165761698444323e-07, "loss": 0.2434, "step": 12136 }, { "epoch": 0.21097185767178292, "grad_norm": 1.6469250234430268, "learning_rate": 9.165606013748636e-07, "loss": 0.4975, "step": 12137 }, { "epoch": 0.21098924020928575, "grad_norm": 1.8913507489517387, "learning_rate": 9.165450315849891e-07, "loss": 0.432, "step": 12138 }, { "epoch": 0.21100662274678858, "grad_norm": 3.1514300814983773, "learning_rate": 9.165294604748583e-07, "loss": 0.2741, "step": 12139 }, { "epoch": 0.2110240052842914, "grad_norm": 1.664235234909764, "learning_rate": 9.165138880445203e-07, "loss": 0.2949, "step": 12140 }, { "epoch": 0.2110413878217942, "grad_norm": 3.475789525038451, "learning_rate": 9.164983142940247e-07, "loss": 0.4967, "step": 12141 }, { "epoch": 0.21105877035929704, "grad_norm": 1.5808657378209539, "learning_rate": 9.164827392234207e-07, "loss": 0.3015, "step": 12142 }, { "epoch": 0.21107615289679987, "grad_norm": 1.7083826137561622, "learning_rate": 9.164671628327578e-07, "loss": 0.3362, "step": 12143 }, { "epoch": 0.2110935354343027, "grad_norm": 1.5179883084151407, "learning_rate": 9.164515851220854e-07, "loss": 0.6519, "step": 12144 }, { "epoch": 0.21111091797180553, "grad_norm": 1.281538495808173, "learning_rate": 9.164360060914526e-07, "loss": 0.2114, "step": 12145 }, { "epoch": 0.21112830050930834, "grad_norm": 1.6459871501903038, "learning_rate": 9.16420425740909e-07, "loss": 0.3722, "step": 12146 }, { "epoch": 0.21114568304681117, "grad_norm": 3.874793571292897, "learning_rate": 9.16404844070504e-07, "loss": 0.4854, "step": 12147 }, { "epoch": 0.211163065584314, "grad_norm": 1.1207064038932177, "learning_rate": 9.163892610802868e-07, "loss": 0.2188, "step": 12148 }, { "epoch": 0.21118044812181683, "grad_norm": 1.2869338812621665, "learning_rate": 9.16373676770307e-07, "loss": 0.4198, "step": 12149 }, { "epoch": 0.21119783065931966, "grad_norm": 1.9687808209198614, "learning_rate": 9.163580911406139e-07, "loss": 0.3423, "step": 12150 }, { "epoch": 0.21121521319682246, "grad_norm": 1.893137793529505, "learning_rate": 9.16342504191257e-07, "loss": 0.2337, "step": 12151 }, { "epoch": 0.2112325957343253, "grad_norm": 1.2638767591679503, "learning_rate": 9.163269159222855e-07, "loss": 0.3785, "step": 12152 }, { "epoch": 0.21124997827182812, "grad_norm": 1.9859500316243817, "learning_rate": 9.16311326333749e-07, "loss": 0.2151, "step": 12153 }, { "epoch": 0.21126736080933095, "grad_norm": 1.3648261732051252, "learning_rate": 9.162957354256968e-07, "loss": 0.3025, "step": 12154 }, { "epoch": 0.21128474334683378, "grad_norm": 2.9909123932572905, "learning_rate": 9.162801431981784e-07, "loss": 0.4079, "step": 12155 }, { "epoch": 0.21130212588433658, "grad_norm": 1.7799032603925966, "learning_rate": 9.162645496512431e-07, "loss": 0.2651, "step": 12156 }, { "epoch": 0.2113195084218394, "grad_norm": 1.934942120422101, "learning_rate": 9.162489547849404e-07, "loss": 0.3705, "step": 12157 }, { "epoch": 0.21133689095934224, "grad_norm": 1.8827136568081007, "learning_rate": 9.162333585993197e-07, "loss": 0.3294, "step": 12158 }, { "epoch": 0.21135427349684507, "grad_norm": 1.597671855881907, "learning_rate": 9.162177610944304e-07, "loss": 0.2007, "step": 12159 }, { "epoch": 0.2113716560343479, "grad_norm": 2.537454226433867, "learning_rate": 9.162021622703222e-07, "loss": 0.4834, "step": 12160 }, { "epoch": 0.2113890385718507, "grad_norm": 2.872032885204099, "learning_rate": 9.161865621270441e-07, "loss": 0.3449, "step": 12161 }, { "epoch": 0.21140642110935354, "grad_norm": 2.235589118875215, "learning_rate": 9.161709606646459e-07, "loss": 0.5388, "step": 12162 }, { "epoch": 0.21142380364685637, "grad_norm": 3.1957007454964996, "learning_rate": 9.161553578831769e-07, "loss": 0.4544, "step": 12163 }, { "epoch": 0.2114411861843592, "grad_norm": 1.4589798663712301, "learning_rate": 9.161397537826867e-07, "loss": 0.3061, "step": 12164 }, { "epoch": 0.21145856872186203, "grad_norm": 1.8388418672657791, "learning_rate": 9.161241483632245e-07, "loss": 0.2418, "step": 12165 }, { "epoch": 0.21147595125936483, "grad_norm": 1.7839709303188647, "learning_rate": 9.1610854162484e-07, "loss": 0.2286, "step": 12166 }, { "epoch": 0.21149333379686766, "grad_norm": 2.2072416641815, "learning_rate": 9.160929335675825e-07, "loss": 0.3358, "step": 12167 }, { "epoch": 0.2115107163343705, "grad_norm": 5.488096953425497, "learning_rate": 9.160773241915015e-07, "loss": 0.3346, "step": 12168 }, { "epoch": 0.21152809887187332, "grad_norm": 1.7136814960843778, "learning_rate": 9.160617134966465e-07, "loss": 0.522, "step": 12169 }, { "epoch": 0.21154548140937615, "grad_norm": 1.9756332683431768, "learning_rate": 9.160461014830672e-07, "loss": 0.3305, "step": 12170 }, { "epoch": 0.21156286394687895, "grad_norm": 2.786262593196821, "learning_rate": 9.160304881508125e-07, "loss": 0.4233, "step": 12171 }, { "epoch": 0.21158024648438178, "grad_norm": 2.379723339373312, "learning_rate": 9.160148734999326e-07, "loss": 0.2809, "step": 12172 }, { "epoch": 0.21159762902188461, "grad_norm": 1.3279490314742386, "learning_rate": 9.159992575304765e-07, "loss": 0.3198, "step": 12173 }, { "epoch": 0.21161501155938744, "grad_norm": 1.5355305321407264, "learning_rate": 9.159836402424938e-07, "loss": 0.2823, "step": 12174 }, { "epoch": 0.21163239409689028, "grad_norm": 2.268030058928462, "learning_rate": 9.159680216360342e-07, "loss": 0.3324, "step": 12175 }, { "epoch": 0.21164977663439308, "grad_norm": 2.7982519597213242, "learning_rate": 9.159524017111471e-07, "loss": 0.4962, "step": 12176 }, { "epoch": 0.2116671591718959, "grad_norm": 1.5151861608366564, "learning_rate": 9.159367804678817e-07, "loss": 0.3437, "step": 12177 }, { "epoch": 0.21168454170939874, "grad_norm": 2.1020721242197467, "learning_rate": 9.15921157906288e-07, "loss": 0.2865, "step": 12178 }, { "epoch": 0.21170192424690157, "grad_norm": 1.7164554224785875, "learning_rate": 9.159055340264152e-07, "loss": 0.2681, "step": 12179 }, { "epoch": 0.2117193067844044, "grad_norm": 3.7715148652602837, "learning_rate": 9.15889908828313e-07, "loss": 0.463, "step": 12180 }, { "epoch": 0.2117366893219072, "grad_norm": 1.3343709131305141, "learning_rate": 9.158742823120307e-07, "loss": 0.2449, "step": 12181 }, { "epoch": 0.21175407185941003, "grad_norm": 2.443371900555791, "learning_rate": 9.158586544776179e-07, "loss": 0.4599, "step": 12182 }, { "epoch": 0.21177145439691286, "grad_norm": 2.560348047340388, "learning_rate": 9.158430253251244e-07, "loss": 0.5825, "step": 12183 }, { "epoch": 0.2117888369344157, "grad_norm": 3.028888562165552, "learning_rate": 9.158273948545993e-07, "loss": 0.657, "step": 12184 }, { "epoch": 0.21180621947191852, "grad_norm": 1.2176326278414298, "learning_rate": 9.158117630660927e-07, "loss": 0.1748, "step": 12185 }, { "epoch": 0.21182360200942132, "grad_norm": 2.0225998670647685, "learning_rate": 9.157961299596535e-07, "loss": 0.3923, "step": 12186 }, { "epoch": 0.21184098454692415, "grad_norm": 3.456905738317985, "learning_rate": 9.157804955353317e-07, "loss": 0.3678, "step": 12187 }, { "epoch": 0.21185836708442699, "grad_norm": 3.2241353840398492, "learning_rate": 9.157648597931768e-07, "loss": 0.4251, "step": 12188 }, { "epoch": 0.21187574962192982, "grad_norm": 1.4883193104373917, "learning_rate": 9.157492227332382e-07, "loss": 0.3893, "step": 12189 }, { "epoch": 0.21189313215943265, "grad_norm": 2.044974097513903, "learning_rate": 9.157335843555655e-07, "loss": 0.3591, "step": 12190 }, { "epoch": 0.21191051469693545, "grad_norm": 2.4901634021772, "learning_rate": 9.157179446602083e-07, "loss": 0.3025, "step": 12191 }, { "epoch": 0.21192789723443828, "grad_norm": 1.5088876543031053, "learning_rate": 9.157023036472163e-07, "loss": 0.2922, "step": 12192 }, { "epoch": 0.2119452797719411, "grad_norm": 2.875141066864355, "learning_rate": 9.156866613166388e-07, "loss": 0.2634, "step": 12193 }, { "epoch": 0.21196266230944394, "grad_norm": 0.860477863334766, "learning_rate": 9.156710176685256e-07, "loss": 0.2113, "step": 12194 }, { "epoch": 0.21198004484694677, "grad_norm": 3.0399191412937157, "learning_rate": 9.156553727029264e-07, "loss": 0.4523, "step": 12195 }, { "epoch": 0.21199742738444957, "grad_norm": 1.911052562416052, "learning_rate": 9.156397264198903e-07, "loss": 0.3902, "step": 12196 }, { "epoch": 0.2120148099219524, "grad_norm": 1.7156472428330618, "learning_rate": 9.156240788194674e-07, "loss": 0.2597, "step": 12197 }, { "epoch": 0.21203219245945523, "grad_norm": 3.0548450160667304, "learning_rate": 9.156084299017071e-07, "loss": 0.5793, "step": 12198 }, { "epoch": 0.21204957499695806, "grad_norm": 3.3468193820182472, "learning_rate": 9.155927796666589e-07, "loss": 0.5763, "step": 12199 }, { "epoch": 0.2120669575344609, "grad_norm": 1.3012003382598143, "learning_rate": 9.155771281143725e-07, "loss": 0.2031, "step": 12200 }, { "epoch": 0.2120843400719637, "grad_norm": 1.9670145192785922, "learning_rate": 9.155614752448974e-07, "loss": 0.4185, "step": 12201 }, { "epoch": 0.21210172260946653, "grad_norm": 1.192171255162547, "learning_rate": 9.155458210582836e-07, "loss": 0.2909, "step": 12202 }, { "epoch": 0.21211910514696936, "grad_norm": 1.4612455842039216, "learning_rate": 9.155301655545802e-07, "loss": 0.3925, "step": 12203 }, { "epoch": 0.21213648768447219, "grad_norm": 1.5548758571794774, "learning_rate": 9.155145087338372e-07, "loss": 0.2839, "step": 12204 }, { "epoch": 0.21215387022197502, "grad_norm": 2.32627118355983, "learning_rate": 9.15498850596104e-07, "loss": 0.329, "step": 12205 }, { "epoch": 0.21217125275947782, "grad_norm": 1.66289588243764, "learning_rate": 9.154831911414302e-07, "loss": 0.3194, "step": 12206 }, { "epoch": 0.21218863529698065, "grad_norm": 2.784001885084976, "learning_rate": 9.154675303698657e-07, "loss": 0.3614, "step": 12207 }, { "epoch": 0.21220601783448348, "grad_norm": 1.3331176542192986, "learning_rate": 9.1545186828146e-07, "loss": 0.2066, "step": 12208 }, { "epoch": 0.2122234003719863, "grad_norm": 1.5020561169938058, "learning_rate": 9.154362048762627e-07, "loss": 0.2369, "step": 12209 }, { "epoch": 0.21224078290948914, "grad_norm": 2.6926004488664184, "learning_rate": 9.154205401543234e-07, "loss": 0.4163, "step": 12210 }, { "epoch": 0.21225816544699194, "grad_norm": 1.8379999737754529, "learning_rate": 9.154048741156919e-07, "loss": 0.3719, "step": 12211 }, { "epoch": 0.21227554798449477, "grad_norm": 2.4913870074030573, "learning_rate": 9.153892067604177e-07, "loss": 0.3471, "step": 12212 }, { "epoch": 0.2122929305219976, "grad_norm": 1.4386496809964664, "learning_rate": 9.153735380885505e-07, "loss": 0.2371, "step": 12213 }, { "epoch": 0.21231031305950043, "grad_norm": 1.936657335303534, "learning_rate": 9.153578681001401e-07, "loss": 0.2695, "step": 12214 }, { "epoch": 0.21232769559700326, "grad_norm": 1.7705069114437204, "learning_rate": 9.153421967952359e-07, "loss": 0.2926, "step": 12215 }, { "epoch": 0.21234507813450607, "grad_norm": 1.524430031125063, "learning_rate": 9.153265241738879e-07, "loss": 0.2834, "step": 12216 }, { "epoch": 0.2123624606720089, "grad_norm": 1.8675355486377914, "learning_rate": 9.153108502361455e-07, "loss": 0.3131, "step": 12217 }, { "epoch": 0.21237984320951173, "grad_norm": 3.889832330455246, "learning_rate": 9.152951749820586e-07, "loss": 0.5095, "step": 12218 }, { "epoch": 0.21239722574701456, "grad_norm": 1.3187292974216918, "learning_rate": 9.152794984116765e-07, "loss": 0.3724, "step": 12219 }, { "epoch": 0.2124146082845174, "grad_norm": 2.6521144205839917, "learning_rate": 9.152638205250494e-07, "loss": 0.2916, "step": 12220 }, { "epoch": 0.2124319908220202, "grad_norm": 2.091022323221442, "learning_rate": 9.152481413222267e-07, "loss": 0.5942, "step": 12221 }, { "epoch": 0.21244937335952302, "grad_norm": 1.7175261086175246, "learning_rate": 9.15232460803258e-07, "loss": 0.3732, "step": 12222 }, { "epoch": 0.21246675589702585, "grad_norm": 1.880033826612513, "learning_rate": 9.152167789681933e-07, "loss": 0.3961, "step": 12223 }, { "epoch": 0.21248413843452868, "grad_norm": 1.8581335265337842, "learning_rate": 9.152010958170822e-07, "loss": 0.2179, "step": 12224 }, { "epoch": 0.2125015209720315, "grad_norm": 2.6725036635811885, "learning_rate": 9.151854113499742e-07, "loss": 0.601, "step": 12225 }, { "epoch": 0.2125189035095343, "grad_norm": 2.5666217342741118, "learning_rate": 9.151697255669192e-07, "loss": 0.3672, "step": 12226 }, { "epoch": 0.21253628604703714, "grad_norm": 2.086830985894855, "learning_rate": 9.151540384679669e-07, "loss": 0.453, "step": 12227 }, { "epoch": 0.21255366858453997, "grad_norm": 1.490734197417012, "learning_rate": 9.151383500531671e-07, "loss": 0.4037, "step": 12228 }, { "epoch": 0.2125710511220428, "grad_norm": 1.4084940441843012, "learning_rate": 9.151226603225693e-07, "loss": 0.4608, "step": 12229 }, { "epoch": 0.21258843365954563, "grad_norm": 1.6268323851549547, "learning_rate": 9.151069692762233e-07, "loss": 0.3165, "step": 12230 }, { "epoch": 0.21260581619704844, "grad_norm": 3.620646965983163, "learning_rate": 9.15091276914179e-07, "loss": 0.5159, "step": 12231 }, { "epoch": 0.21262319873455127, "grad_norm": 2.0649717202228324, "learning_rate": 9.150755832364861e-07, "loss": 0.367, "step": 12232 }, { "epoch": 0.2126405812720541, "grad_norm": 1.8128142233669509, "learning_rate": 9.150598882431942e-07, "loss": 0.3478, "step": 12233 }, { "epoch": 0.21265796380955693, "grad_norm": 2.2697291260279155, "learning_rate": 9.15044191934353e-07, "loss": 0.4403, "step": 12234 }, { "epoch": 0.21267534634705976, "grad_norm": 1.880148463057666, "learning_rate": 9.150284943100125e-07, "loss": 0.2485, "step": 12235 }, { "epoch": 0.21269272888456256, "grad_norm": 2.4290372208065247, "learning_rate": 9.150127953702224e-07, "loss": 0.4198, "step": 12236 }, { "epoch": 0.2127101114220654, "grad_norm": 1.5332614384942167, "learning_rate": 9.149970951150322e-07, "loss": 0.5574, "step": 12237 }, { "epoch": 0.21272749395956822, "grad_norm": 1.8238597560281322, "learning_rate": 9.149813935444919e-07, "loss": 0.2839, "step": 12238 }, { "epoch": 0.21274487649707105, "grad_norm": 3.450254751682055, "learning_rate": 9.149656906586512e-07, "loss": 0.4374, "step": 12239 }, { "epoch": 0.21276225903457388, "grad_norm": 1.064299592112471, "learning_rate": 9.1494998645756e-07, "loss": 0.3169, "step": 12240 }, { "epoch": 0.21277964157207668, "grad_norm": 1.72309030753502, "learning_rate": 9.149342809412679e-07, "loss": 0.3869, "step": 12241 }, { "epoch": 0.2127970241095795, "grad_norm": 2.11480248360308, "learning_rate": 9.149185741098247e-07, "loss": 0.5376, "step": 12242 }, { "epoch": 0.21281440664708234, "grad_norm": 1.3649110297748643, "learning_rate": 9.149028659632801e-07, "loss": 0.2578, "step": 12243 }, { "epoch": 0.21283178918458517, "grad_norm": 1.5909863534115014, "learning_rate": 9.148871565016842e-07, "loss": 0.526, "step": 12244 }, { "epoch": 0.212849171722088, "grad_norm": 2.272377994657575, "learning_rate": 9.148714457250864e-07, "loss": 0.5404, "step": 12245 }, { "epoch": 0.2128665542595908, "grad_norm": 1.1864864766558927, "learning_rate": 9.148557336335369e-07, "loss": 0.2962, "step": 12246 }, { "epoch": 0.21288393679709364, "grad_norm": 3.041149556494422, "learning_rate": 9.148400202270852e-07, "loss": 0.4015, "step": 12247 }, { "epoch": 0.21290131933459647, "grad_norm": 1.3154244809379112, "learning_rate": 9.148243055057811e-07, "loss": 0.2125, "step": 12248 }, { "epoch": 0.2129187018720993, "grad_norm": 1.37493468347401, "learning_rate": 9.148085894696746e-07, "loss": 0.3821, "step": 12249 }, { "epoch": 0.2129360844096021, "grad_norm": 1.6799469150143187, "learning_rate": 9.147928721188155e-07, "loss": 0.2672, "step": 12250 }, { "epoch": 0.21295346694710493, "grad_norm": 1.569239398962622, "learning_rate": 9.147771534532535e-07, "loss": 0.3406, "step": 12251 }, { "epoch": 0.21297084948460776, "grad_norm": 2.4710839913313754, "learning_rate": 9.147614334730384e-07, "loss": 0.3529, "step": 12252 }, { "epoch": 0.2129882320221106, "grad_norm": 3.2241139052359626, "learning_rate": 9.147457121782201e-07, "loss": 0.4794, "step": 12253 }, { "epoch": 0.21300561455961342, "grad_norm": 1.6834722528440331, "learning_rate": 9.147299895688484e-07, "loss": 0.3494, "step": 12254 }, { "epoch": 0.21302299709711622, "grad_norm": 1.7131553978935607, "learning_rate": 9.147142656449731e-07, "loss": 0.2225, "step": 12255 }, { "epoch": 0.21304037963461905, "grad_norm": 2.080924593497979, "learning_rate": 9.146985404066443e-07, "loss": 0.2627, "step": 12256 }, { "epoch": 0.21305776217212188, "grad_norm": 1.4317028317798315, "learning_rate": 9.146828138539113e-07, "loss": 0.2432, "step": 12257 }, { "epoch": 0.21307514470962471, "grad_norm": 2.9448951611741543, "learning_rate": 9.146670859868245e-07, "loss": 0.5584, "step": 12258 }, { "epoch": 0.21309252724712754, "grad_norm": 2.0657913048364387, "learning_rate": 9.146513568054335e-07, "loss": 0.6221, "step": 12259 }, { "epoch": 0.21310990978463035, "grad_norm": 2.088289473238509, "learning_rate": 9.146356263097881e-07, "loss": 0.4068, "step": 12260 }, { "epoch": 0.21312729232213318, "grad_norm": 3.551753792576381, "learning_rate": 9.146198944999383e-07, "loss": 0.5019, "step": 12261 }, { "epoch": 0.213144674859636, "grad_norm": 1.8851393306662203, "learning_rate": 9.146041613759339e-07, "loss": 0.4996, "step": 12262 }, { "epoch": 0.21316205739713884, "grad_norm": 1.6849113684368837, "learning_rate": 9.145884269378247e-07, "loss": 0.3792, "step": 12263 }, { "epoch": 0.21317943993464167, "grad_norm": 2.2896156752829304, "learning_rate": 9.145726911856606e-07, "loss": 0.449, "step": 12264 }, { "epoch": 0.21319682247214447, "grad_norm": 2.3901388377070663, "learning_rate": 9.145569541194915e-07, "loss": 0.321, "step": 12265 }, { "epoch": 0.2132142050096473, "grad_norm": 1.5427220538356106, "learning_rate": 9.145412157393675e-07, "loss": 0.3168, "step": 12266 }, { "epoch": 0.21323158754715013, "grad_norm": 2.3086709239896357, "learning_rate": 9.14525476045338e-07, "loss": 0.5734, "step": 12267 }, { "epoch": 0.21324897008465296, "grad_norm": 2.08470654595769, "learning_rate": 9.145097350374533e-07, "loss": 0.2466, "step": 12268 }, { "epoch": 0.2132663526221558, "grad_norm": 1.4791274850258889, "learning_rate": 9.144939927157631e-07, "loss": 0.5784, "step": 12269 }, { "epoch": 0.2132837351596586, "grad_norm": 3.0506163841915193, "learning_rate": 9.144782490803173e-07, "loss": 0.3011, "step": 12270 }, { "epoch": 0.21330111769716142, "grad_norm": 1.7367724006683942, "learning_rate": 9.144625041311659e-07, "loss": 0.3961, "step": 12271 }, { "epoch": 0.21331850023466425, "grad_norm": 3.5679889311067905, "learning_rate": 9.144467578683589e-07, "loss": 0.5348, "step": 12272 }, { "epoch": 0.21333588277216708, "grad_norm": 1.8722023461729855, "learning_rate": 9.144310102919459e-07, "loss": 0.3538, "step": 12273 }, { "epoch": 0.21335326530966991, "grad_norm": 2.172467158764988, "learning_rate": 9.14415261401977e-07, "loss": 0.296, "step": 12274 }, { "epoch": 0.21337064784717272, "grad_norm": 1.8436621086272051, "learning_rate": 9.143995111985021e-07, "loss": 0.3259, "step": 12275 }, { "epoch": 0.21338803038467555, "grad_norm": 1.6393939858820317, "learning_rate": 9.143837596815711e-07, "loss": 0.5916, "step": 12276 }, { "epoch": 0.21340541292217838, "grad_norm": 1.8806495609641039, "learning_rate": 9.143680068512339e-07, "loss": 0.287, "step": 12277 }, { "epoch": 0.2134227954596812, "grad_norm": 1.5307310977548432, "learning_rate": 9.143522527075405e-07, "loss": 0.3599, "step": 12278 }, { "epoch": 0.21344017799718404, "grad_norm": 3.0676874573852815, "learning_rate": 9.143364972505407e-07, "loss": 0.5714, "step": 12279 }, { "epoch": 0.21345756053468684, "grad_norm": 1.7743155719508916, "learning_rate": 9.143207404802847e-07, "loss": 0.4101, "step": 12280 }, { "epoch": 0.21347494307218967, "grad_norm": 1.212056582191705, "learning_rate": 9.143049823968222e-07, "loss": 0.167, "step": 12281 }, { "epoch": 0.2134923256096925, "grad_norm": 4.809517493662113, "learning_rate": 9.142892230002032e-07, "loss": 0.4389, "step": 12282 }, { "epoch": 0.21350970814719533, "grad_norm": 2.114797069126888, "learning_rate": 9.142734622904777e-07, "loss": 0.339, "step": 12283 }, { "epoch": 0.21352709068469816, "grad_norm": 2.036840485415805, "learning_rate": 9.142577002676957e-07, "loss": 0.2089, "step": 12284 }, { "epoch": 0.21354447322220096, "grad_norm": 1.4952824824528579, "learning_rate": 9.14241936931907e-07, "loss": 0.2592, "step": 12285 }, { "epoch": 0.2135618557597038, "grad_norm": 2.2962638924101855, "learning_rate": 9.142261722831617e-07, "loss": 0.5345, "step": 12286 }, { "epoch": 0.21357923829720663, "grad_norm": 1.771027888060957, "learning_rate": 9.142104063215096e-07, "loss": 0.4642, "step": 12287 }, { "epoch": 0.21359662083470946, "grad_norm": 1.738040189775095, "learning_rate": 9.141946390470008e-07, "loss": 0.1536, "step": 12288 }, { "epoch": 0.21361400337221229, "grad_norm": 1.36742991807094, "learning_rate": 9.141788704596853e-07, "loss": 0.1983, "step": 12289 }, { "epoch": 0.2136313859097151, "grad_norm": 1.1078714860291656, "learning_rate": 9.141631005596131e-07, "loss": 0.3034, "step": 12290 }, { "epoch": 0.21364876844721792, "grad_norm": 2.3386180930847975, "learning_rate": 9.141473293468341e-07, "loss": 0.3162, "step": 12291 }, { "epoch": 0.21366615098472075, "grad_norm": 1.7054481268526558, "learning_rate": 9.141315568213983e-07, "loss": 0.6423, "step": 12292 }, { "epoch": 0.21368353352222358, "grad_norm": 2.294740382643228, "learning_rate": 9.141157829833556e-07, "loss": 0.4723, "step": 12293 }, { "epoch": 0.2137009160597264, "grad_norm": 1.4491606874864775, "learning_rate": 9.141000078327562e-07, "loss": 0.2425, "step": 12294 }, { "epoch": 0.2137182985972292, "grad_norm": 2.0542479266615077, "learning_rate": 9.1408423136965e-07, "loss": 0.462, "step": 12295 }, { "epoch": 0.21373568113473204, "grad_norm": 1.4769913797002114, "learning_rate": 9.14068453594087e-07, "loss": 0.3696, "step": 12296 }, { "epoch": 0.21375306367223487, "grad_norm": 3.0179502146702943, "learning_rate": 9.140526745061172e-07, "loss": 0.3737, "step": 12297 }, { "epoch": 0.2137704462097377, "grad_norm": 2.2701657845236816, "learning_rate": 9.140368941057906e-07, "loss": 0.3797, "step": 12298 }, { "epoch": 0.21378782874724053, "grad_norm": 1.8517937683760464, "learning_rate": 9.140211123931571e-07, "loss": 0.3243, "step": 12299 }, { "epoch": 0.21380521128474334, "grad_norm": 3.467014294896021, "learning_rate": 9.140053293682672e-07, "loss": 0.3659, "step": 12300 }, { "epoch": 0.21382259382224617, "grad_norm": 4.098425440793207, "learning_rate": 9.139895450311703e-07, "loss": 0.5115, "step": 12301 }, { "epoch": 0.213839976359749, "grad_norm": 2.8499469696261386, "learning_rate": 9.139737593819168e-07, "loss": 0.318, "step": 12302 }, { "epoch": 0.21385735889725183, "grad_norm": 1.1734839373869408, "learning_rate": 9.139579724205566e-07, "loss": 0.2417, "step": 12303 }, { "epoch": 0.21387474143475466, "grad_norm": 1.8994060501771188, "learning_rate": 9.1394218414714e-07, "loss": 0.2392, "step": 12304 }, { "epoch": 0.21389212397225746, "grad_norm": 0.9326504967635504, "learning_rate": 9.139263945617166e-07, "loss": 0.2944, "step": 12305 }, { "epoch": 0.2139095065097603, "grad_norm": 1.9609360328444145, "learning_rate": 9.139106036643367e-07, "loss": 0.4262, "step": 12306 }, { "epoch": 0.21392688904726312, "grad_norm": 1.4096226690973441, "learning_rate": 9.138948114550504e-07, "loss": 0.1531, "step": 12307 }, { "epoch": 0.21394427158476595, "grad_norm": 1.337196068404893, "learning_rate": 9.138790179339076e-07, "loss": 0.4238, "step": 12308 }, { "epoch": 0.21396165412226878, "grad_norm": 2.3142195635226783, "learning_rate": 9.138632231009584e-07, "loss": 0.2573, "step": 12309 }, { "epoch": 0.21397903665977158, "grad_norm": 1.7758526797704204, "learning_rate": 9.13847426956253e-07, "loss": 0.3233, "step": 12310 }, { "epoch": 0.2139964191972744, "grad_norm": 3.26986472932783, "learning_rate": 9.138316294998413e-07, "loss": 0.473, "step": 12311 }, { "epoch": 0.21401380173477724, "grad_norm": 1.5361386229344685, "learning_rate": 9.138158307317734e-07, "loss": 0.2489, "step": 12312 }, { "epoch": 0.21403118427228007, "grad_norm": 2.157211951161018, "learning_rate": 9.138000306520996e-07, "loss": 0.3879, "step": 12313 }, { "epoch": 0.2140485668097829, "grad_norm": 1.8873235144926648, "learning_rate": 9.137842292608695e-07, "loss": 0.3665, "step": 12314 }, { "epoch": 0.2140659493472857, "grad_norm": 2.3275716547489904, "learning_rate": 9.137684265581338e-07, "loss": 0.3507, "step": 12315 }, { "epoch": 0.21408333188478854, "grad_norm": 2.030216144357481, "learning_rate": 9.13752622543942e-07, "loss": 0.4564, "step": 12316 }, { "epoch": 0.21410071442229137, "grad_norm": 2.7969713830098955, "learning_rate": 9.137368172183445e-07, "loss": 0.3691, "step": 12317 }, { "epoch": 0.2141180969597942, "grad_norm": 1.8191992310899106, "learning_rate": 9.137210105813914e-07, "loss": 0.3694, "step": 12318 }, { "epoch": 0.21413547949729703, "grad_norm": 2.9600502881110287, "learning_rate": 9.137052026331326e-07, "loss": 0.3264, "step": 12319 }, { "epoch": 0.21415286203479983, "grad_norm": 2.533721133475548, "learning_rate": 9.136893933736186e-07, "loss": 0.3188, "step": 12320 }, { "epoch": 0.21417024457230266, "grad_norm": 2.1775058400329064, "learning_rate": 9.136735828028991e-07, "loss": 0.4404, "step": 12321 }, { "epoch": 0.2141876271098055, "grad_norm": 2.2008845161031054, "learning_rate": 9.136577709210245e-07, "loss": 0.4394, "step": 12322 }, { "epoch": 0.21420500964730832, "grad_norm": 1.4630786361351045, "learning_rate": 9.136419577280445e-07, "loss": 0.4539, "step": 12323 }, { "epoch": 0.21422239218481115, "grad_norm": 3.446025753215682, "learning_rate": 9.136261432240098e-07, "loss": 0.3776, "step": 12324 }, { "epoch": 0.21423977472231395, "grad_norm": 2.17393722698451, "learning_rate": 9.136103274089701e-07, "loss": 0.4153, "step": 12325 }, { "epoch": 0.21425715725981678, "grad_norm": 2.082539050858742, "learning_rate": 9.135945102829756e-07, "loss": 0.3781, "step": 12326 }, { "epoch": 0.2142745397973196, "grad_norm": 2.865360195744836, "learning_rate": 9.135786918460766e-07, "loss": 0.3235, "step": 12327 }, { "epoch": 0.21429192233482244, "grad_norm": 1.5491467250247821, "learning_rate": 9.135628720983231e-07, "loss": 0.3078, "step": 12328 }, { "epoch": 0.21430930487232527, "grad_norm": 2.253602436479381, "learning_rate": 9.135470510397653e-07, "loss": 0.3317, "step": 12329 }, { "epoch": 0.21432668740982808, "grad_norm": 2.18320509962089, "learning_rate": 9.135312286704533e-07, "loss": 0.2957, "step": 12330 }, { "epoch": 0.2143440699473309, "grad_norm": 1.719745526849906, "learning_rate": 9.135154049904372e-07, "loss": 0.1878, "step": 12331 }, { "epoch": 0.21436145248483374, "grad_norm": 1.1847950044211115, "learning_rate": 9.134995799997673e-07, "loss": 0.1979, "step": 12332 }, { "epoch": 0.21437883502233657, "grad_norm": 1.5508595103597052, "learning_rate": 9.134837536984936e-07, "loss": 0.2555, "step": 12333 }, { "epoch": 0.2143962175598394, "grad_norm": 1.521924096625071, "learning_rate": 9.134679260866663e-07, "loss": 0.3938, "step": 12334 }, { "epoch": 0.2144136000973422, "grad_norm": 1.8128109116064868, "learning_rate": 9.134520971643358e-07, "loss": 0.3053, "step": 12335 }, { "epoch": 0.21443098263484503, "grad_norm": 1.8441404123478726, "learning_rate": 9.134362669315519e-07, "loss": 0.3518, "step": 12336 }, { "epoch": 0.21444836517234786, "grad_norm": 1.9878565830641224, "learning_rate": 9.13420435388365e-07, "loss": 0.4734, "step": 12337 }, { "epoch": 0.2144657477098507, "grad_norm": 4.479517012358033, "learning_rate": 9.134046025348252e-07, "loss": 0.3775, "step": 12338 }, { "epoch": 0.21448313024735352, "grad_norm": 1.632863261202405, "learning_rate": 9.133887683709829e-07, "loss": 0.5251, "step": 12339 }, { "epoch": 0.21450051278485632, "grad_norm": 2.416871190041717, "learning_rate": 9.133729328968878e-07, "loss": 0.3655, "step": 12340 }, { "epoch": 0.21451789532235915, "grad_norm": 1.8169983330603192, "learning_rate": 9.133570961125906e-07, "loss": 0.4528, "step": 12341 }, { "epoch": 0.21453527785986198, "grad_norm": 1.2705458751742094, "learning_rate": 9.133412580181412e-07, "loss": 0.2079, "step": 12342 }, { "epoch": 0.2145526603973648, "grad_norm": 2.6889062941786457, "learning_rate": 9.133254186135899e-07, "loss": 0.4011, "step": 12343 }, { "epoch": 0.21457004293486764, "grad_norm": 6.460366554296136, "learning_rate": 9.13309577898987e-07, "loss": 0.3812, "step": 12344 }, { "epoch": 0.21458742547237045, "grad_norm": 1.2802718190768538, "learning_rate": 9.132937358743824e-07, "loss": 0.2644, "step": 12345 }, { "epoch": 0.21460480800987328, "grad_norm": 1.9628893448122213, "learning_rate": 9.132778925398267e-07, "loss": 0.3176, "step": 12346 }, { "epoch": 0.2146221905473761, "grad_norm": 2.08604003571633, "learning_rate": 9.132620478953699e-07, "loss": 0.4226, "step": 12347 }, { "epoch": 0.21463957308487894, "grad_norm": 1.4477701063209463, "learning_rate": 9.132462019410621e-07, "loss": 0.3759, "step": 12348 }, { "epoch": 0.21465695562238177, "grad_norm": 2.98278279996599, "learning_rate": 9.132303546769537e-07, "loss": 0.4121, "step": 12349 }, { "epoch": 0.21467433815988457, "grad_norm": 1.8829074024258854, "learning_rate": 9.13214506103095e-07, "loss": 0.4487, "step": 12350 }, { "epoch": 0.2146917206973874, "grad_norm": 3.7754750765747254, "learning_rate": 9.131986562195361e-07, "loss": 0.35, "step": 12351 }, { "epoch": 0.21470910323489023, "grad_norm": 1.504947945391435, "learning_rate": 9.131828050263273e-07, "loss": 0.2488, "step": 12352 }, { "epoch": 0.21472648577239306, "grad_norm": 1.570495790391199, "learning_rate": 9.131669525235187e-07, "loss": 0.3842, "step": 12353 }, { "epoch": 0.2147438683098959, "grad_norm": 1.870500054299885, "learning_rate": 9.131510987111607e-07, "loss": 0.4303, "step": 12354 }, { "epoch": 0.2147612508473987, "grad_norm": 2.362293916382035, "learning_rate": 9.131352435893036e-07, "loss": 0.3274, "step": 12355 }, { "epoch": 0.21477863338490152, "grad_norm": 1.5747462270093597, "learning_rate": 9.131193871579974e-07, "loss": 0.2875, "step": 12356 }, { "epoch": 0.21479601592240435, "grad_norm": 4.12562962888153, "learning_rate": 9.131035294172927e-07, "loss": 0.6911, "step": 12357 }, { "epoch": 0.21481339845990718, "grad_norm": 1.529911730662775, "learning_rate": 9.130876703672394e-07, "loss": 0.2738, "step": 12358 }, { "epoch": 0.21483078099741001, "grad_norm": 2.537297167626554, "learning_rate": 9.13071810007888e-07, "loss": 0.4158, "step": 12359 }, { "epoch": 0.21484816353491282, "grad_norm": 1.9705221839979978, "learning_rate": 9.130559483392886e-07, "loss": 0.2464, "step": 12360 }, { "epoch": 0.21486554607241565, "grad_norm": 2.3113141560906594, "learning_rate": 9.130400853614917e-07, "loss": 0.2586, "step": 12361 }, { "epoch": 0.21488292860991848, "grad_norm": 1.0860929610499706, "learning_rate": 9.130242210745475e-07, "loss": 0.3218, "step": 12362 }, { "epoch": 0.2149003111474213, "grad_norm": 2.9407029804811717, "learning_rate": 9.130083554785062e-07, "loss": 0.4688, "step": 12363 }, { "epoch": 0.21491769368492414, "grad_norm": 1.4058598712631818, "learning_rate": 9.129924885734182e-07, "loss": 0.4351, "step": 12364 }, { "epoch": 0.21493507622242694, "grad_norm": 1.5067823808119332, "learning_rate": 9.129766203593337e-07, "loss": 0.486, "step": 12365 }, { "epoch": 0.21495245875992977, "grad_norm": 1.796452202931312, "learning_rate": 9.12960750836303e-07, "loss": 0.4246, "step": 12366 }, { "epoch": 0.2149698412974326, "grad_norm": 2.5575125678300648, "learning_rate": 9.129448800043764e-07, "loss": 0.4109, "step": 12367 }, { "epoch": 0.21498722383493543, "grad_norm": 1.1749860988924477, "learning_rate": 9.129290078636041e-07, "loss": 0.3473, "step": 12368 }, { "epoch": 0.21500460637243826, "grad_norm": 1.6830741631093409, "learning_rate": 9.129131344140369e-07, "loss": 0.4484, "step": 12369 }, { "epoch": 0.21502198890994106, "grad_norm": 1.353993953389372, "learning_rate": 9.128972596557243e-07, "loss": 0.2592, "step": 12370 }, { "epoch": 0.2150393714474439, "grad_norm": 1.4766852538401825, "learning_rate": 9.128813835887172e-07, "loss": 0.3636, "step": 12371 }, { "epoch": 0.21505675398494672, "grad_norm": 1.4951148681837179, "learning_rate": 9.12865506213066e-07, "loss": 0.4269, "step": 12372 }, { "epoch": 0.21507413652244955, "grad_norm": 1.8007562247648996, "learning_rate": 9.128496275288206e-07, "loss": 0.4378, "step": 12373 }, { "epoch": 0.21509151905995239, "grad_norm": 2.0643175107639045, "learning_rate": 9.128337475360314e-07, "loss": 0.2576, "step": 12374 }, { "epoch": 0.2151089015974552, "grad_norm": 2.0623225713029747, "learning_rate": 9.128178662347492e-07, "loss": 0.2487, "step": 12375 }, { "epoch": 0.21512628413495802, "grad_norm": 1.7151328211971995, "learning_rate": 9.128019836250238e-07, "loss": 0.4736, "step": 12376 }, { "epoch": 0.21514366667246085, "grad_norm": 1.7879457148511342, "learning_rate": 9.127860997069057e-07, "loss": 0.6702, "step": 12377 }, { "epoch": 0.21516104920996368, "grad_norm": 0.9480797921444616, "learning_rate": 9.127702144804455e-07, "loss": 0.3152, "step": 12378 }, { "epoch": 0.2151784317474665, "grad_norm": 1.859169469810853, "learning_rate": 9.127543279456931e-07, "loss": 0.3276, "step": 12379 }, { "epoch": 0.2151958142849693, "grad_norm": 1.9888999374450114, "learning_rate": 9.127384401026992e-07, "loss": 0.5051, "step": 12380 }, { "epoch": 0.21521319682247214, "grad_norm": 2.490962375792954, "learning_rate": 9.12722550951514e-07, "loss": 0.4053, "step": 12381 }, { "epoch": 0.21523057935997497, "grad_norm": 1.8495081338945663, "learning_rate": 9.127066604921878e-07, "loss": 0.2203, "step": 12382 }, { "epoch": 0.2152479618974778, "grad_norm": 1.878690688314858, "learning_rate": 9.126907687247711e-07, "loss": 0.3151, "step": 12383 }, { "epoch": 0.21526534443498063, "grad_norm": 1.6901702741083025, "learning_rate": 9.126748756493144e-07, "loss": 0.2397, "step": 12384 }, { "epoch": 0.21528272697248343, "grad_norm": 1.9009656934961976, "learning_rate": 9.126589812658678e-07, "loss": 0.5971, "step": 12385 }, { "epoch": 0.21530010950998626, "grad_norm": 2.891122132501087, "learning_rate": 9.126430855744817e-07, "loss": 0.5214, "step": 12386 }, { "epoch": 0.2153174920474891, "grad_norm": 1.94119380477424, "learning_rate": 9.126271885752068e-07, "loss": 0.5362, "step": 12387 }, { "epoch": 0.21533487458499193, "grad_norm": 1.5505982337280273, "learning_rate": 9.126112902680932e-07, "loss": 0.3614, "step": 12388 }, { "epoch": 0.21535225712249473, "grad_norm": 1.6880126214517313, "learning_rate": 9.125953906531911e-07, "loss": 0.5941, "step": 12389 }, { "epoch": 0.21536963965999756, "grad_norm": 1.353170120322075, "learning_rate": 9.125794897305514e-07, "loss": 0.3836, "step": 12390 }, { "epoch": 0.2153870221975004, "grad_norm": 1.8125988178890062, "learning_rate": 9.125635875002242e-07, "loss": 0.4396, "step": 12391 }, { "epoch": 0.21540440473500322, "grad_norm": 1.3927918128724814, "learning_rate": 9.125476839622599e-07, "loss": 0.375, "step": 12392 }, { "epoch": 0.21542178727250605, "grad_norm": 1.8979181845483013, "learning_rate": 9.12531779116709e-07, "loss": 0.3095, "step": 12393 }, { "epoch": 0.21543916981000885, "grad_norm": 1.6919808178240259, "learning_rate": 9.125158729636218e-07, "loss": 0.2297, "step": 12394 }, { "epoch": 0.21545655234751168, "grad_norm": 1.509553363988479, "learning_rate": 9.124999655030488e-07, "loss": 0.4674, "step": 12395 }, { "epoch": 0.2154739348850145, "grad_norm": 1.4678825926477115, "learning_rate": 9.124840567350404e-07, "loss": 0.2906, "step": 12396 }, { "epoch": 0.21549131742251734, "grad_norm": 1.5816011222106774, "learning_rate": 9.12468146659647e-07, "loss": 0.4493, "step": 12397 }, { "epoch": 0.21550869996002017, "grad_norm": 1.0140345800028103, "learning_rate": 9.124522352769191e-07, "loss": 0.1913, "step": 12398 }, { "epoch": 0.21552608249752297, "grad_norm": 3.051464782426605, "learning_rate": 9.12436322586907e-07, "loss": 0.3032, "step": 12399 }, { "epoch": 0.2155434650350258, "grad_norm": 1.7616468123568796, "learning_rate": 9.124204085896612e-07, "loss": 0.2774, "step": 12400 }, { "epoch": 0.21556084757252864, "grad_norm": 2.3403619190390694, "learning_rate": 9.124044932852322e-07, "loss": 0.5297, "step": 12401 }, { "epoch": 0.21557823011003147, "grad_norm": 1.825325179416844, "learning_rate": 9.123885766736705e-07, "loss": 0.347, "step": 12402 }, { "epoch": 0.2155956126475343, "grad_norm": 9.17213854656076, "learning_rate": 9.123726587550263e-07, "loss": 0.5446, "step": 12403 }, { "epoch": 0.2156129951850371, "grad_norm": 1.7723463752548279, "learning_rate": 9.123567395293503e-07, "loss": 0.3636, "step": 12404 }, { "epoch": 0.21563037772253993, "grad_norm": 1.6792776980356185, "learning_rate": 9.123408189966928e-07, "loss": 0.2509, "step": 12405 }, { "epoch": 0.21564776026004276, "grad_norm": 2.4284864120878744, "learning_rate": 9.123248971571043e-07, "loss": 0.4436, "step": 12406 }, { "epoch": 0.2156651427975456, "grad_norm": 1.7748097868272272, "learning_rate": 9.123089740106353e-07, "loss": 0.3159, "step": 12407 }, { "epoch": 0.21568252533504842, "grad_norm": 1.3875262725298951, "learning_rate": 9.122930495573365e-07, "loss": 0.2892, "step": 12408 }, { "epoch": 0.21569990787255122, "grad_norm": 1.6548142623939242, "learning_rate": 9.122771237972578e-07, "loss": 0.4892, "step": 12409 }, { "epoch": 0.21571729041005405, "grad_norm": 2.0898973567175956, "learning_rate": 9.122611967304501e-07, "loss": 0.4995, "step": 12410 }, { "epoch": 0.21573467294755688, "grad_norm": 1.9237990679927164, "learning_rate": 9.122452683569637e-07, "loss": 0.2643, "step": 12411 }, { "epoch": 0.2157520554850597, "grad_norm": 1.6709297930046354, "learning_rate": 9.122293386768494e-07, "loss": 0.4369, "step": 12412 }, { "epoch": 0.21576943802256254, "grad_norm": 2.2503123795552327, "learning_rate": 9.122134076901572e-07, "loss": 0.4153, "step": 12413 }, { "epoch": 0.21578682056006535, "grad_norm": 1.6813077147523245, "learning_rate": 9.121974753969379e-07, "loss": 0.2829, "step": 12414 }, { "epoch": 0.21580420309756818, "grad_norm": 4.4718501788243, "learning_rate": 9.12181541797242e-07, "loss": 0.3454, "step": 12415 }, { "epoch": 0.215821585635071, "grad_norm": 1.575861708635642, "learning_rate": 9.1216560689112e-07, "loss": 0.4636, "step": 12416 }, { "epoch": 0.21583896817257384, "grad_norm": 4.269187268259764, "learning_rate": 9.121496706786224e-07, "loss": 0.4808, "step": 12417 }, { "epoch": 0.21585635071007667, "grad_norm": 1.3377876053885265, "learning_rate": 9.121337331597995e-07, "loss": 0.271, "step": 12418 }, { "epoch": 0.21587373324757947, "grad_norm": 1.6242896479507984, "learning_rate": 9.121177943347022e-07, "loss": 0.26, "step": 12419 }, { "epoch": 0.2158911157850823, "grad_norm": 1.7867751703257686, "learning_rate": 9.121018542033807e-07, "loss": 0.4211, "step": 12420 }, { "epoch": 0.21590849832258513, "grad_norm": 4.786614056669394, "learning_rate": 9.120859127658856e-07, "loss": 0.26, "step": 12421 }, { "epoch": 0.21592588086008796, "grad_norm": 3.6406935666838156, "learning_rate": 9.120699700222674e-07, "loss": 0.3414, "step": 12422 }, { "epoch": 0.2159432633975908, "grad_norm": 1.6841113077427075, "learning_rate": 9.120540259725768e-07, "loss": 0.2988, "step": 12423 }, { "epoch": 0.2159606459350936, "grad_norm": 2.601129888032987, "learning_rate": 9.120380806168641e-07, "loss": 0.4151, "step": 12424 }, { "epoch": 0.21597802847259642, "grad_norm": 1.8808849580775173, "learning_rate": 9.1202213395518e-07, "loss": 0.3259, "step": 12425 }, { "epoch": 0.21599541101009925, "grad_norm": 2.26946276411212, "learning_rate": 9.120061859875751e-07, "loss": 0.3186, "step": 12426 }, { "epoch": 0.21601279354760208, "grad_norm": 2.4875624792981035, "learning_rate": 9.119902367140997e-07, "loss": 0.2455, "step": 12427 }, { "epoch": 0.2160301760851049, "grad_norm": 1.4959694708592906, "learning_rate": 9.119742861348046e-07, "loss": 0.2571, "step": 12428 }, { "epoch": 0.21604755862260772, "grad_norm": 1.614266803915542, "learning_rate": 9.119583342497403e-07, "loss": 0.1819, "step": 12429 }, { "epoch": 0.21606494116011055, "grad_norm": 1.5555670802711263, "learning_rate": 9.119423810589573e-07, "loss": 0.2057, "step": 12430 }, { "epoch": 0.21608232369761338, "grad_norm": 2.0008796721130837, "learning_rate": 9.11926426562506e-07, "loss": 0.4937, "step": 12431 }, { "epoch": 0.2160997062351162, "grad_norm": 1.9398357367407213, "learning_rate": 9.119104707604374e-07, "loss": 0.3891, "step": 12432 }, { "epoch": 0.21611708877261904, "grad_norm": 1.7026223510637517, "learning_rate": 9.118945136528016e-07, "loss": 0.4445, "step": 12433 }, { "epoch": 0.21613447131012184, "grad_norm": 2.687584396051366, "learning_rate": 9.118785552396496e-07, "loss": 0.5353, "step": 12434 }, { "epoch": 0.21615185384762467, "grad_norm": 1.878047174705088, "learning_rate": 9.118625955210317e-07, "loss": 0.2943, "step": 12435 }, { "epoch": 0.2161692363851275, "grad_norm": 5.16583216312617, "learning_rate": 9.118466344969986e-07, "loss": 0.3517, "step": 12436 }, { "epoch": 0.21618661892263033, "grad_norm": 2.895099772509348, "learning_rate": 9.118306721676008e-07, "loss": 0.4564, "step": 12437 }, { "epoch": 0.21620400146013316, "grad_norm": 2.216110878272301, "learning_rate": 9.11814708532889e-07, "loss": 0.2643, "step": 12438 }, { "epoch": 0.21622138399763596, "grad_norm": 1.5560312990698861, "learning_rate": 9.117987435929137e-07, "loss": 0.4174, "step": 12439 }, { "epoch": 0.2162387665351388, "grad_norm": 3.592787144155526, "learning_rate": 9.117827773477255e-07, "loss": 0.4681, "step": 12440 }, { "epoch": 0.21625614907264162, "grad_norm": 2.498343975451678, "learning_rate": 9.117668097973752e-07, "loss": 0.5587, "step": 12441 }, { "epoch": 0.21627353161014445, "grad_norm": 2.780097834629338, "learning_rate": 9.117508409419131e-07, "loss": 0.4699, "step": 12442 }, { "epoch": 0.21629091414764728, "grad_norm": 1.6297246730320165, "learning_rate": 9.117348707813901e-07, "loss": 0.2813, "step": 12443 }, { "epoch": 0.2163082966851501, "grad_norm": 1.8799143855211347, "learning_rate": 9.117188993158566e-07, "loss": 0.3078, "step": 12444 }, { "epoch": 0.21632567922265292, "grad_norm": 1.7412914467844516, "learning_rate": 9.117029265453636e-07, "loss": 0.3247, "step": 12445 }, { "epoch": 0.21634306176015575, "grad_norm": 2.6843196741503195, "learning_rate": 9.116869524699611e-07, "loss": 0.4468, "step": 12446 }, { "epoch": 0.21636044429765858, "grad_norm": 1.6984504545659722, "learning_rate": 9.116709770897002e-07, "loss": 0.3291, "step": 12447 }, { "epoch": 0.2163778268351614, "grad_norm": 2.6317284673310657, "learning_rate": 9.116550004046313e-07, "loss": 0.3798, "step": 12448 }, { "epoch": 0.2163952093726642, "grad_norm": 1.8605618479341484, "learning_rate": 9.116390224148052e-07, "loss": 0.476, "step": 12449 }, { "epoch": 0.21641259191016704, "grad_norm": 1.9553897400009814, "learning_rate": 9.116230431202726e-07, "loss": 0.3438, "step": 12450 }, { "epoch": 0.21642997444766987, "grad_norm": 1.588843761508407, "learning_rate": 9.11607062521084e-07, "loss": 0.3138, "step": 12451 }, { "epoch": 0.2164473569851727, "grad_norm": 2.3379883206641003, "learning_rate": 9.115910806172901e-07, "loss": 0.2793, "step": 12452 }, { "epoch": 0.21646473952267553, "grad_norm": 1.588428486477059, "learning_rate": 9.115750974089414e-07, "loss": 0.3821, "step": 12453 }, { "epoch": 0.21648212206017833, "grad_norm": 1.53211889727678, "learning_rate": 9.115591128960889e-07, "loss": 0.3934, "step": 12454 }, { "epoch": 0.21649950459768116, "grad_norm": 1.1296897218426452, "learning_rate": 9.115431270787829e-07, "loss": 0.3878, "step": 12455 }, { "epoch": 0.216516887135184, "grad_norm": 2.508007292501069, "learning_rate": 9.115271399570743e-07, "loss": 0.4338, "step": 12456 }, { "epoch": 0.21653426967268682, "grad_norm": 2.189632240229794, "learning_rate": 9.115111515310137e-07, "loss": 0.3428, "step": 12457 }, { "epoch": 0.21655165221018965, "grad_norm": 1.653865808546861, "learning_rate": 9.114951618006519e-07, "loss": 0.3132, "step": 12458 }, { "epoch": 0.21656903474769246, "grad_norm": 2.572997923122955, "learning_rate": 9.114791707660394e-07, "loss": 0.3709, "step": 12459 }, { "epoch": 0.2165864172851953, "grad_norm": 1.2576096325696289, "learning_rate": 9.114631784272268e-07, "loss": 0.3332, "step": 12460 }, { "epoch": 0.21660379982269812, "grad_norm": 1.8208705430925187, "learning_rate": 9.114471847842651e-07, "loss": 0.173, "step": 12461 }, { "epoch": 0.21662118236020095, "grad_norm": 1.5372058262567174, "learning_rate": 9.114311898372046e-07, "loss": 0.4912, "step": 12462 }, { "epoch": 0.21663856489770378, "grad_norm": 1.9822495194401182, "learning_rate": 9.114151935860965e-07, "loss": 0.4115, "step": 12463 }, { "epoch": 0.21665594743520658, "grad_norm": 2.5009073912038025, "learning_rate": 9.113991960309912e-07, "loss": 0.3892, "step": 12464 }, { "epoch": 0.2166733299727094, "grad_norm": 5.455854117857146, "learning_rate": 9.113831971719392e-07, "loss": 0.3073, "step": 12465 }, { "epoch": 0.21669071251021224, "grad_norm": 1.89954618838746, "learning_rate": 9.113671970089917e-07, "loss": 0.4577, "step": 12466 }, { "epoch": 0.21670809504771507, "grad_norm": 1.90745724203348, "learning_rate": 9.113511955421991e-07, "loss": 0.2828, "step": 12467 }, { "epoch": 0.2167254775852179, "grad_norm": 1.6592668952535496, "learning_rate": 9.113351927716121e-07, "loss": 0.6632, "step": 12468 }, { "epoch": 0.2167428601227207, "grad_norm": 1.530131879866986, "learning_rate": 9.113191886972815e-07, "loss": 0.2666, "step": 12469 }, { "epoch": 0.21676024266022353, "grad_norm": 1.6315779657021903, "learning_rate": 9.113031833192579e-07, "loss": 0.358, "step": 12470 }, { "epoch": 0.21677762519772636, "grad_norm": 1.2852955853630987, "learning_rate": 9.112871766375922e-07, "loss": 0.2375, "step": 12471 }, { "epoch": 0.2167950077352292, "grad_norm": 2.0531909772827444, "learning_rate": 9.112711686523353e-07, "loss": 0.4477, "step": 12472 }, { "epoch": 0.21681239027273203, "grad_norm": 1.511492489641878, "learning_rate": 9.112551593635376e-07, "loss": 0.3097, "step": 12473 }, { "epoch": 0.21682977281023483, "grad_norm": 2.0238928362154263, "learning_rate": 9.112391487712498e-07, "loss": 0.4986, "step": 12474 }, { "epoch": 0.21684715534773766, "grad_norm": 1.7040977984130037, "learning_rate": 9.11223136875523e-07, "loss": 0.422, "step": 12475 }, { "epoch": 0.2168645378852405, "grad_norm": 4.256418433643681, "learning_rate": 9.112071236764076e-07, "loss": 0.4444, "step": 12476 }, { "epoch": 0.21688192042274332, "grad_norm": 2.0627512575804507, "learning_rate": 9.111911091739546e-07, "loss": 0.6513, "step": 12477 }, { "epoch": 0.21689930296024615, "grad_norm": 1.5447906236292248, "learning_rate": 9.111750933682148e-07, "loss": 0.2138, "step": 12478 }, { "epoch": 0.21691668549774895, "grad_norm": 2.272011073665112, "learning_rate": 9.111590762592386e-07, "loss": 0.5536, "step": 12479 }, { "epoch": 0.21693406803525178, "grad_norm": 3.1576708286577677, "learning_rate": 9.111430578470772e-07, "loss": 0.4828, "step": 12480 }, { "epoch": 0.2169514505727546, "grad_norm": 1.9592244244475778, "learning_rate": 9.11127038131781e-07, "loss": 0.285, "step": 12481 }, { "epoch": 0.21696883311025744, "grad_norm": 1.5583819021805696, "learning_rate": 9.11111017113401e-07, "loss": 0.351, "step": 12482 }, { "epoch": 0.21698621564776027, "grad_norm": 3.0963559750328007, "learning_rate": 9.110949947919879e-07, "loss": 0.5263, "step": 12483 }, { "epoch": 0.21700359818526307, "grad_norm": 1.3070816531595701, "learning_rate": 9.110789711675926e-07, "loss": 0.2711, "step": 12484 }, { "epoch": 0.2170209807227659, "grad_norm": 2.1599773430995004, "learning_rate": 9.110629462402657e-07, "loss": 0.4106, "step": 12485 }, { "epoch": 0.21703836326026874, "grad_norm": 1.900185410912663, "learning_rate": 9.110469200100581e-07, "loss": 0.2759, "step": 12486 }, { "epoch": 0.21705574579777157, "grad_norm": 1.9255044557641596, "learning_rate": 9.110308924770207e-07, "loss": 0.4592, "step": 12487 }, { "epoch": 0.2170731283352744, "grad_norm": 2.0405449782310265, "learning_rate": 9.11014863641204e-07, "loss": 0.2707, "step": 12488 }, { "epoch": 0.2170905108727772, "grad_norm": 2.8496479383847997, "learning_rate": 9.109988335026591e-07, "loss": 0.5013, "step": 12489 }, { "epoch": 0.21710789341028003, "grad_norm": 1.9839388861300291, "learning_rate": 9.109828020614367e-07, "loss": 0.2793, "step": 12490 }, { "epoch": 0.21712527594778286, "grad_norm": 1.839196340332424, "learning_rate": 9.109667693175876e-07, "loss": 0.3581, "step": 12491 }, { "epoch": 0.2171426584852857, "grad_norm": 2.0817370599401204, "learning_rate": 9.109507352711626e-07, "loss": 0.3526, "step": 12492 }, { "epoch": 0.21716004102278852, "grad_norm": 1.605064525179395, "learning_rate": 9.109346999222125e-07, "loss": 0.3844, "step": 12493 }, { "epoch": 0.21717742356029132, "grad_norm": 2.107546989628836, "learning_rate": 9.109186632707883e-07, "loss": 0.3204, "step": 12494 }, { "epoch": 0.21719480609779415, "grad_norm": 1.8852239987690533, "learning_rate": 9.109026253169406e-07, "loss": 0.7452, "step": 12495 }, { "epoch": 0.21721218863529698, "grad_norm": 1.0981753226627224, "learning_rate": 9.108865860607204e-07, "loss": 0.4741, "step": 12496 }, { "epoch": 0.2172295711727998, "grad_norm": 2.0379402359737653, "learning_rate": 9.108705455021783e-07, "loss": 0.4124, "step": 12497 }, { "epoch": 0.21724695371030264, "grad_norm": 1.247570358365392, "learning_rate": 9.108545036413655e-07, "loss": 0.232, "step": 12498 }, { "epoch": 0.21726433624780545, "grad_norm": 4.5148309142087335, "learning_rate": 9.108384604783325e-07, "loss": 0.5036, "step": 12499 }, { "epoch": 0.21728171878530828, "grad_norm": 2.5036725498852044, "learning_rate": 9.108224160131305e-07, "loss": 0.692, "step": 12500 }, { "epoch": 0.2172991013228111, "grad_norm": 1.7456898514960133, "learning_rate": 9.1080637024581e-07, "loss": 0.2338, "step": 12501 }, { "epoch": 0.21731648386031394, "grad_norm": 2.1184747368760903, "learning_rate": 9.10790323176422e-07, "loss": 0.3517, "step": 12502 }, { "epoch": 0.21733386639781677, "grad_norm": 1.2024522730314269, "learning_rate": 9.107742748050176e-07, "loss": 0.2691, "step": 12503 }, { "epoch": 0.21735124893531957, "grad_norm": 1.4544366493173293, "learning_rate": 9.107582251316472e-07, "loss": 0.3741, "step": 12504 }, { "epoch": 0.2173686314728224, "grad_norm": 1.353457393770262, "learning_rate": 9.10742174156362e-07, "loss": 0.4269, "step": 12505 }, { "epoch": 0.21738601401032523, "grad_norm": 1.3869791603977062, "learning_rate": 9.107261218792128e-07, "loss": 0.2367, "step": 12506 }, { "epoch": 0.21740339654782806, "grad_norm": 3.8312886636862147, "learning_rate": 9.107100683002504e-07, "loss": 0.361, "step": 12507 }, { "epoch": 0.2174207790853309, "grad_norm": 1.7391743974214164, "learning_rate": 9.106940134195258e-07, "loss": 0.2464, "step": 12508 }, { "epoch": 0.2174381616228337, "grad_norm": 1.174446139837465, "learning_rate": 9.106779572370898e-07, "loss": 0.4148, "step": 12509 }, { "epoch": 0.21745554416033652, "grad_norm": 3.580299803922347, "learning_rate": 9.106618997529933e-07, "loss": 0.6768, "step": 12510 }, { "epoch": 0.21747292669783935, "grad_norm": 1.4452190618722611, "learning_rate": 9.106458409672873e-07, "loss": 0.347, "step": 12511 }, { "epoch": 0.21749030923534218, "grad_norm": 0.8969333774754423, "learning_rate": 9.106297808800226e-07, "loss": 0.4322, "step": 12512 }, { "epoch": 0.217507691772845, "grad_norm": 1.7021841240947855, "learning_rate": 9.106137194912502e-07, "loss": 0.4242, "step": 12513 }, { "epoch": 0.21752507431034782, "grad_norm": 2.1679252670411002, "learning_rate": 9.105976568010208e-07, "loss": 0.3778, "step": 12514 }, { "epoch": 0.21754245684785065, "grad_norm": 1.5525465616833465, "learning_rate": 9.105815928093856e-07, "loss": 0.425, "step": 12515 }, { "epoch": 0.21755983938535348, "grad_norm": 1.799143123259103, "learning_rate": 9.105655275163951e-07, "loss": 0.3986, "step": 12516 }, { "epoch": 0.2175772219228563, "grad_norm": 1.5794340125215274, "learning_rate": 9.105494609221007e-07, "loss": 0.248, "step": 12517 }, { "epoch": 0.21759460446035914, "grad_norm": 1.2250130564449746, "learning_rate": 9.10533393026553e-07, "loss": 0.2685, "step": 12518 }, { "epoch": 0.21761198699786194, "grad_norm": 2.3460891381032303, "learning_rate": 9.105173238298031e-07, "loss": 0.3795, "step": 12519 }, { "epoch": 0.21762936953536477, "grad_norm": 4.454894833196951, "learning_rate": 9.105012533319017e-07, "loss": 0.4194, "step": 12520 }, { "epoch": 0.2176467520728676, "grad_norm": 2.0057379001548004, "learning_rate": 9.104851815329e-07, "loss": 0.3332, "step": 12521 }, { "epoch": 0.21766413461037043, "grad_norm": 1.6769002624894225, "learning_rate": 9.104691084328488e-07, "loss": 0.2891, "step": 12522 }, { "epoch": 0.21768151714787326, "grad_norm": 2.295948643225001, "learning_rate": 9.104530340317992e-07, "loss": 0.3552, "step": 12523 }, { "epoch": 0.21769889968537606, "grad_norm": 1.4799308819999892, "learning_rate": 9.104369583298019e-07, "loss": 0.301, "step": 12524 }, { "epoch": 0.2177162822228789, "grad_norm": 1.7669174883634007, "learning_rate": 9.104208813269081e-07, "loss": 0.2484, "step": 12525 }, { "epoch": 0.21773366476038172, "grad_norm": 1.7613051167057594, "learning_rate": 9.104048030231685e-07, "loss": 0.2229, "step": 12526 }, { "epoch": 0.21775104729788455, "grad_norm": 2.3912835756590884, "learning_rate": 9.103887234186342e-07, "loss": 0.331, "step": 12527 }, { "epoch": 0.21776842983538736, "grad_norm": 1.5009215108374185, "learning_rate": 9.103726425133565e-07, "loss": 0.4292, "step": 12528 }, { "epoch": 0.2177858123728902, "grad_norm": 2.419461450586845, "learning_rate": 9.103565603073856e-07, "loss": 0.3784, "step": 12529 }, { "epoch": 0.21780319491039302, "grad_norm": 2.1786409980817227, "learning_rate": 9.103404768007732e-07, "loss": 0.2146, "step": 12530 }, { "epoch": 0.21782057744789585, "grad_norm": 1.4279019633312606, "learning_rate": 9.103243919935697e-07, "loss": 0.2588, "step": 12531 }, { "epoch": 0.21783795998539868, "grad_norm": 1.590863029887738, "learning_rate": 9.103083058858265e-07, "loss": 0.2276, "step": 12532 }, { "epoch": 0.21785534252290148, "grad_norm": 9.074753467709733, "learning_rate": 9.102922184775945e-07, "loss": 0.4924, "step": 12533 }, { "epoch": 0.2178727250604043, "grad_norm": 2.3279314116832754, "learning_rate": 9.102761297689247e-07, "loss": 0.3602, "step": 12534 }, { "epoch": 0.21789010759790714, "grad_norm": 1.5099182357713852, "learning_rate": 9.102600397598678e-07, "loss": 0.1821, "step": 12535 }, { "epoch": 0.21790749013540997, "grad_norm": 1.8686428091666605, "learning_rate": 9.102439484504753e-07, "loss": 0.3747, "step": 12536 }, { "epoch": 0.2179248726729128, "grad_norm": 1.6667218585259749, "learning_rate": 9.102278558407979e-07, "loss": 0.4128, "step": 12537 }, { "epoch": 0.2179422552104156, "grad_norm": 1.7334495441047806, "learning_rate": 9.102117619308864e-07, "loss": 0.4302, "step": 12538 }, { "epoch": 0.21795963774791843, "grad_norm": 2.9369605251866724, "learning_rate": 9.101956667207921e-07, "loss": 0.5128, "step": 12539 }, { "epoch": 0.21797702028542126, "grad_norm": 2.1052834693681595, "learning_rate": 9.101795702105661e-07, "loss": 0.3686, "step": 12540 }, { "epoch": 0.2179944028229241, "grad_norm": 2.035200981821274, "learning_rate": 9.101634724002593e-07, "loss": 0.3668, "step": 12541 }, { "epoch": 0.21801178536042692, "grad_norm": 1.6751961618662776, "learning_rate": 9.101473732899227e-07, "loss": 0.2468, "step": 12542 }, { "epoch": 0.21802916789792973, "grad_norm": 1.3090503032887617, "learning_rate": 9.101312728796073e-07, "loss": 0.2575, "step": 12543 }, { "epoch": 0.21804655043543256, "grad_norm": 4.022241032377252, "learning_rate": 9.101151711693642e-07, "loss": 0.3848, "step": 12544 }, { "epoch": 0.2180639329729354, "grad_norm": 0.848964696109494, "learning_rate": 9.100990681592443e-07, "loss": 0.2904, "step": 12545 }, { "epoch": 0.21808131551043822, "grad_norm": 1.7926087033410605, "learning_rate": 9.100829638492989e-07, "loss": 0.3194, "step": 12546 }, { "epoch": 0.21809869804794105, "grad_norm": 1.8617752812619124, "learning_rate": 9.100668582395787e-07, "loss": 0.3674, "step": 12547 }, { "epoch": 0.21811608058544385, "grad_norm": 1.7646087838840863, "learning_rate": 9.10050751330135e-07, "loss": 0.4324, "step": 12548 }, { "epoch": 0.21813346312294668, "grad_norm": 2.04098580127722, "learning_rate": 9.100346431210187e-07, "loss": 0.3651, "step": 12549 }, { "epoch": 0.2181508456604495, "grad_norm": 1.6458735041282215, "learning_rate": 9.10018533612281e-07, "loss": 0.3039, "step": 12550 }, { "epoch": 0.21816822819795234, "grad_norm": 1.570745259069785, "learning_rate": 9.10002422803973e-07, "loss": 0.2183, "step": 12551 }, { "epoch": 0.21818561073545517, "grad_norm": 3.4956364066055414, "learning_rate": 9.099863106961456e-07, "loss": 0.5025, "step": 12552 }, { "epoch": 0.21820299327295797, "grad_norm": 2.460745781962477, "learning_rate": 9.099701972888498e-07, "loss": 0.4805, "step": 12553 }, { "epoch": 0.2182203758104608, "grad_norm": 3.8836735524208676, "learning_rate": 9.09954082582137e-07, "loss": 0.3295, "step": 12554 }, { "epoch": 0.21823775834796363, "grad_norm": 1.9109912611663256, "learning_rate": 9.099379665760579e-07, "loss": 0.2415, "step": 12555 }, { "epoch": 0.21825514088546646, "grad_norm": 1.5318333075023836, "learning_rate": 9.099218492706639e-07, "loss": 0.3259, "step": 12556 }, { "epoch": 0.2182725234229693, "grad_norm": 2.534035833126464, "learning_rate": 9.099057306660058e-07, "loss": 0.3827, "step": 12557 }, { "epoch": 0.2182899059604721, "grad_norm": 1.8096322272488845, "learning_rate": 9.09889610762135e-07, "loss": 0.3032, "step": 12558 }, { "epoch": 0.21830728849797493, "grad_norm": 1.482620981281378, "learning_rate": 9.098734895591022e-07, "loss": 0.2855, "step": 12559 }, { "epoch": 0.21832467103547776, "grad_norm": 2.6370553136510178, "learning_rate": 9.098573670569589e-07, "loss": 0.2479, "step": 12560 }, { "epoch": 0.2183420535729806, "grad_norm": 1.8239184485820867, "learning_rate": 9.098412432557558e-07, "loss": 0.2718, "step": 12561 }, { "epoch": 0.21835943611048342, "grad_norm": 2.298685036792098, "learning_rate": 9.098251181555445e-07, "loss": 0.4599, "step": 12562 }, { "epoch": 0.21837681864798622, "grad_norm": 2.2171697138404403, "learning_rate": 9.098089917563758e-07, "loss": 0.3616, "step": 12563 }, { "epoch": 0.21839420118548905, "grad_norm": 1.0124340828403409, "learning_rate": 9.097928640583007e-07, "loss": 0.3014, "step": 12564 }, { "epoch": 0.21841158372299188, "grad_norm": 1.8505316864841164, "learning_rate": 9.097767350613706e-07, "loss": 0.2659, "step": 12565 }, { "epoch": 0.2184289662604947, "grad_norm": 3.683558359303915, "learning_rate": 9.097606047656363e-07, "loss": 0.4363, "step": 12566 }, { "epoch": 0.21844634879799754, "grad_norm": 1.2214711939800622, "learning_rate": 9.097444731711492e-07, "loss": 0.4143, "step": 12567 }, { "epoch": 0.21846373133550034, "grad_norm": 1.331547600471252, "learning_rate": 9.097283402779604e-07, "loss": 0.1962, "step": 12568 }, { "epoch": 0.21848111387300317, "grad_norm": 2.2090983038702867, "learning_rate": 9.09712206086121e-07, "loss": 0.306, "step": 12569 }, { "epoch": 0.218498496410506, "grad_norm": 2.3415765796165404, "learning_rate": 9.096960705956819e-07, "loss": 0.4039, "step": 12570 }, { "epoch": 0.21851587894800883, "grad_norm": 2.6016873285570745, "learning_rate": 9.096799338066947e-07, "loss": 0.4337, "step": 12571 }, { "epoch": 0.21853326148551167, "grad_norm": 1.368388740963887, "learning_rate": 9.096637957192102e-07, "loss": 0.432, "step": 12572 }, { "epoch": 0.21855064402301447, "grad_norm": 1.6032979810265455, "learning_rate": 9.096476563332797e-07, "loss": 0.3543, "step": 12573 }, { "epoch": 0.2185680265605173, "grad_norm": 1.50607906328087, "learning_rate": 9.096315156489542e-07, "loss": 0.2475, "step": 12574 }, { "epoch": 0.21858540909802013, "grad_norm": 2.553331928362462, "learning_rate": 9.09615373666285e-07, "loss": 0.502, "step": 12575 }, { "epoch": 0.21860279163552296, "grad_norm": 1.3769607710941711, "learning_rate": 9.095992303853233e-07, "loss": 0.2676, "step": 12576 }, { "epoch": 0.2186201741730258, "grad_norm": 2.3396658324182424, "learning_rate": 9.095830858061202e-07, "loss": 0.2829, "step": 12577 }, { "epoch": 0.2186375567105286, "grad_norm": 1.5970553726150651, "learning_rate": 9.095669399287269e-07, "loss": 0.1878, "step": 12578 }, { "epoch": 0.21865493924803142, "grad_norm": 1.8823232521254083, "learning_rate": 9.095507927531945e-07, "loss": 0.2792, "step": 12579 }, { "epoch": 0.21867232178553425, "grad_norm": 2.0987802955485177, "learning_rate": 9.095346442795741e-07, "loss": 0.2665, "step": 12580 }, { "epoch": 0.21868970432303708, "grad_norm": 1.5503352296027628, "learning_rate": 9.095184945079171e-07, "loss": 0.3342, "step": 12581 }, { "epoch": 0.2187070868605399, "grad_norm": 1.8370812565022345, "learning_rate": 9.095023434382745e-07, "loss": 0.297, "step": 12582 }, { "epoch": 0.21872446939804271, "grad_norm": 1.9324609838376094, "learning_rate": 9.094861910706976e-07, "loss": 0.2328, "step": 12583 }, { "epoch": 0.21874185193554554, "grad_norm": 1.4297265365630913, "learning_rate": 9.094700374052378e-07, "loss": 0.3103, "step": 12584 }, { "epoch": 0.21875923447304838, "grad_norm": 1.6069711945872966, "learning_rate": 9.094538824419458e-07, "loss": 0.3356, "step": 12585 }, { "epoch": 0.2187766170105512, "grad_norm": 1.9193871324992091, "learning_rate": 9.094377261808732e-07, "loss": 0.3874, "step": 12586 }, { "epoch": 0.21879399954805404, "grad_norm": 2.422744559730374, "learning_rate": 9.094215686220711e-07, "loss": 0.4556, "step": 12587 }, { "epoch": 0.21881138208555684, "grad_norm": 1.1778091495937382, "learning_rate": 9.094054097655908e-07, "loss": 0.1817, "step": 12588 }, { "epoch": 0.21882876462305967, "grad_norm": 1.8724761059705985, "learning_rate": 9.093892496114832e-07, "loss": 0.3857, "step": 12589 }, { "epoch": 0.2188461471605625, "grad_norm": 1.2688204121238502, "learning_rate": 9.093730881597999e-07, "loss": 0.3313, "step": 12590 }, { "epoch": 0.21886352969806533, "grad_norm": 2.2187892412943304, "learning_rate": 9.093569254105918e-07, "loss": 0.2333, "step": 12591 }, { "epoch": 0.21888091223556816, "grad_norm": 2.019281643839363, "learning_rate": 9.093407613639103e-07, "loss": 0.2988, "step": 12592 }, { "epoch": 0.21889829477307096, "grad_norm": 2.140457132284549, "learning_rate": 9.093245960198068e-07, "loss": 0.2777, "step": 12593 }, { "epoch": 0.2189156773105738, "grad_norm": 1.5074144043424829, "learning_rate": 9.093084293783321e-07, "loss": 0.3106, "step": 12594 }, { "epoch": 0.21893305984807662, "grad_norm": 3.3892966890432628, "learning_rate": 9.09292261439538e-07, "loss": 0.4796, "step": 12595 }, { "epoch": 0.21895044238557945, "grad_norm": 1.5172171966805486, "learning_rate": 9.092760922034752e-07, "loss": 0.2588, "step": 12596 }, { "epoch": 0.21896782492308228, "grad_norm": 1.4733498563062024, "learning_rate": 9.092599216701953e-07, "loss": 0.319, "step": 12597 }, { "epoch": 0.21898520746058509, "grad_norm": 1.7953693521008232, "learning_rate": 9.092437498397496e-07, "loss": 0.2382, "step": 12598 }, { "epoch": 0.21900258999808792, "grad_norm": 1.7797897293247695, "learning_rate": 9.092275767121891e-07, "loss": 0.4002, "step": 12599 }, { "epoch": 0.21901997253559075, "grad_norm": 1.6653723755880072, "learning_rate": 9.09211402287565e-07, "loss": 0.53, "step": 12600 }, { "epoch": 0.21903735507309358, "grad_norm": 2.2342782932296026, "learning_rate": 9.091952265659288e-07, "loss": 0.3771, "step": 12601 }, { "epoch": 0.2190547376105964, "grad_norm": 3.2552304447200617, "learning_rate": 9.091790495473318e-07, "loss": 0.3708, "step": 12602 }, { "epoch": 0.2190721201480992, "grad_norm": 1.6841637379304788, "learning_rate": 9.091628712318251e-07, "loss": 0.4084, "step": 12603 }, { "epoch": 0.21908950268560204, "grad_norm": 2.3985380779950036, "learning_rate": 9.091466916194602e-07, "loss": 0.2788, "step": 12604 }, { "epoch": 0.21910688522310487, "grad_norm": 2.300296572547339, "learning_rate": 9.09130510710288e-07, "loss": 0.4907, "step": 12605 }, { "epoch": 0.2191242677606077, "grad_norm": 1.910530972232003, "learning_rate": 9.0911432850436e-07, "loss": 0.5529, "step": 12606 }, { "epoch": 0.21914165029811053, "grad_norm": 2.3801754383228446, "learning_rate": 9.090981450017279e-07, "loss": 0.5173, "step": 12607 }, { "epoch": 0.21915903283561333, "grad_norm": 1.5901287740976415, "learning_rate": 9.090819602024423e-07, "loss": 0.2724, "step": 12608 }, { "epoch": 0.21917641537311616, "grad_norm": 2.285093417528508, "learning_rate": 9.090657741065548e-07, "loss": 0.4542, "step": 12609 }, { "epoch": 0.219193797910619, "grad_norm": 2.0434653254292052, "learning_rate": 9.090495867141168e-07, "loss": 0.3993, "step": 12610 }, { "epoch": 0.21921118044812182, "grad_norm": 1.9411657756344063, "learning_rate": 9.090333980251795e-07, "loss": 0.5452, "step": 12611 }, { "epoch": 0.21922856298562465, "grad_norm": 1.4859895330916317, "learning_rate": 9.090172080397941e-07, "loss": 0.2042, "step": 12612 }, { "epoch": 0.21924594552312746, "grad_norm": 2.0782105946287697, "learning_rate": 9.090010167580121e-07, "loss": 0.4188, "step": 12613 }, { "epoch": 0.21926332806063029, "grad_norm": 2.811141610646273, "learning_rate": 9.089848241798847e-07, "loss": 0.4663, "step": 12614 }, { "epoch": 0.21928071059813312, "grad_norm": 2.569866798715728, "learning_rate": 9.089686303054635e-07, "loss": 0.4065, "step": 12615 }, { "epoch": 0.21929809313563595, "grad_norm": 1.6821647106969746, "learning_rate": 9.089524351347995e-07, "loss": 0.2055, "step": 12616 }, { "epoch": 0.21931547567313878, "grad_norm": 1.6888881500406947, "learning_rate": 9.08936238667944e-07, "loss": 0.2961, "step": 12617 }, { "epoch": 0.21933285821064158, "grad_norm": 2.1907366920852485, "learning_rate": 9.089200409049487e-07, "loss": 0.323, "step": 12618 }, { "epoch": 0.2193502407481444, "grad_norm": 1.3838457901820211, "learning_rate": 9.089038418458645e-07, "loss": 0.1908, "step": 12619 }, { "epoch": 0.21936762328564724, "grad_norm": 1.606553035441834, "learning_rate": 9.088876414907432e-07, "loss": 0.3669, "step": 12620 }, { "epoch": 0.21938500582315007, "grad_norm": 2.3824756379007064, "learning_rate": 9.088714398396355e-07, "loss": 0.3231, "step": 12621 }, { "epoch": 0.2194023883606529, "grad_norm": 1.335154520392342, "learning_rate": 9.088552368925935e-07, "loss": 0.186, "step": 12622 }, { "epoch": 0.2194197708981557, "grad_norm": 1.5239888420565255, "learning_rate": 9.088390326496682e-07, "loss": 0.4614, "step": 12623 }, { "epoch": 0.21943715343565853, "grad_norm": 1.847614120806055, "learning_rate": 9.088228271109108e-07, "loss": 0.4351, "step": 12624 }, { "epoch": 0.21945453597316136, "grad_norm": 3.360719923543681, "learning_rate": 9.088066202763729e-07, "loss": 0.4854, "step": 12625 }, { "epoch": 0.2194719185106642, "grad_norm": 1.413808389221648, "learning_rate": 9.087904121461058e-07, "loss": 0.2759, "step": 12626 }, { "epoch": 0.21948930104816702, "grad_norm": 1.630225003391781, "learning_rate": 9.087742027201611e-07, "loss": 0.2772, "step": 12627 }, { "epoch": 0.21950668358566983, "grad_norm": 1.9038729927721167, "learning_rate": 9.087579919985898e-07, "loss": 0.4541, "step": 12628 }, { "epoch": 0.21952406612317266, "grad_norm": 1.614145999103254, "learning_rate": 9.087417799814432e-07, "loss": 0.2799, "step": 12629 }, { "epoch": 0.2195414486606755, "grad_norm": 1.212611966668378, "learning_rate": 9.087255666687732e-07, "loss": 0.291, "step": 12630 }, { "epoch": 0.21955883119817832, "grad_norm": 2.7414006942719746, "learning_rate": 9.087093520606309e-07, "loss": 0.3925, "step": 12631 }, { "epoch": 0.21957621373568115, "grad_norm": 1.1987908974224244, "learning_rate": 9.086931361570675e-07, "loss": 0.2531, "step": 12632 }, { "epoch": 0.21959359627318395, "grad_norm": 0.8929478663669704, "learning_rate": 9.086769189581347e-07, "loss": 0.3667, "step": 12633 }, { "epoch": 0.21961097881068678, "grad_norm": 1.905695927319948, "learning_rate": 9.086607004638839e-07, "loss": 0.3706, "step": 12634 }, { "epoch": 0.2196283613481896, "grad_norm": 1.7137553669676107, "learning_rate": 9.086444806743663e-07, "loss": 0.2799, "step": 12635 }, { "epoch": 0.21964574388569244, "grad_norm": 1.4592426162560683, "learning_rate": 9.086282595896334e-07, "loss": 0.263, "step": 12636 }, { "epoch": 0.21966312642319527, "grad_norm": 1.8656588168896502, "learning_rate": 9.086120372097367e-07, "loss": 0.2887, "step": 12637 }, { "epoch": 0.21968050896069807, "grad_norm": 1.774455212901175, "learning_rate": 9.085958135347274e-07, "loss": 0.2176, "step": 12638 }, { "epoch": 0.2196978914982009, "grad_norm": 1.7538600117983367, "learning_rate": 9.085795885646572e-07, "loss": 0.4929, "step": 12639 }, { "epoch": 0.21971527403570373, "grad_norm": 2.347569350833307, "learning_rate": 9.085633622995774e-07, "loss": 0.3093, "step": 12640 }, { "epoch": 0.21973265657320656, "grad_norm": 1.8797188849281812, "learning_rate": 9.085471347395392e-07, "loss": 0.2172, "step": 12641 }, { "epoch": 0.2197500391107094, "grad_norm": 1.9758542292105978, "learning_rate": 9.085309058845946e-07, "loss": 0.3362, "step": 12642 }, { "epoch": 0.2197674216482122, "grad_norm": 1.5823093999796052, "learning_rate": 9.085146757347945e-07, "loss": 0.2756, "step": 12643 }, { "epoch": 0.21978480418571503, "grad_norm": 3.720038500053135, "learning_rate": 9.084984442901907e-07, "loss": 0.5259, "step": 12644 }, { "epoch": 0.21980218672321786, "grad_norm": 1.2547070081372058, "learning_rate": 9.084822115508341e-07, "loss": 0.2257, "step": 12645 }, { "epoch": 0.2198195692607207, "grad_norm": 1.9367676978267636, "learning_rate": 9.084659775167769e-07, "loss": 0.2926, "step": 12646 }, { "epoch": 0.21983695179822352, "grad_norm": 1.7423054174368233, "learning_rate": 9.0844974218807e-07, "loss": 0.3173, "step": 12647 }, { "epoch": 0.21985433433572632, "grad_norm": 1.2050924719815757, "learning_rate": 9.084335055647652e-07, "loss": 0.3656, "step": 12648 }, { "epoch": 0.21987171687322915, "grad_norm": 1.489144707548487, "learning_rate": 9.084172676469135e-07, "loss": 0.2559, "step": 12649 }, { "epoch": 0.21988909941073198, "grad_norm": 1.5085414231051055, "learning_rate": 9.084010284345668e-07, "loss": 0.3022, "step": 12650 }, { "epoch": 0.2199064819482348, "grad_norm": 1.7248386490467713, "learning_rate": 9.083847879277766e-07, "loss": 0.2865, "step": 12651 }, { "epoch": 0.21992386448573764, "grad_norm": 2.0569099723439637, "learning_rate": 9.083685461265941e-07, "loss": 0.2983, "step": 12652 }, { "epoch": 0.21994124702324044, "grad_norm": 2.331752081103079, "learning_rate": 9.08352303031071e-07, "loss": 0.4002, "step": 12653 }, { "epoch": 0.21995862956074327, "grad_norm": 2.77568194733847, "learning_rate": 9.083360586412588e-07, "loss": 0.3417, "step": 12654 }, { "epoch": 0.2199760120982461, "grad_norm": 2.6183092498595815, "learning_rate": 9.083198129572085e-07, "loss": 0.4021, "step": 12655 }, { "epoch": 0.21999339463574893, "grad_norm": 2.2364475877309067, "learning_rate": 9.083035659789722e-07, "loss": 0.4957, "step": 12656 }, { "epoch": 0.22001077717325176, "grad_norm": 1.6232019466732757, "learning_rate": 9.08287317706601e-07, "loss": 0.3162, "step": 12657 }, { "epoch": 0.22002815971075457, "grad_norm": 4.847928794453749, "learning_rate": 9.082710681401467e-07, "loss": 0.4612, "step": 12658 }, { "epoch": 0.2200455422482574, "grad_norm": 2.3733044208138456, "learning_rate": 9.082548172796607e-07, "loss": 0.3641, "step": 12659 }, { "epoch": 0.22006292478576023, "grad_norm": 1.4401706351737873, "learning_rate": 9.082385651251943e-07, "loss": 0.2803, "step": 12660 }, { "epoch": 0.22008030732326306, "grad_norm": 2.073954913093828, "learning_rate": 9.082223116767993e-07, "loss": 0.3198, "step": 12661 }, { "epoch": 0.2200976898607659, "grad_norm": 1.0924769512859582, "learning_rate": 9.08206056934527e-07, "loss": 0.1096, "step": 12662 }, { "epoch": 0.2201150723982687, "grad_norm": 1.7668277104350163, "learning_rate": 9.081898008984291e-07, "loss": 0.422, "step": 12663 }, { "epoch": 0.22013245493577152, "grad_norm": 1.5037906984308635, "learning_rate": 9.08173543568557e-07, "loss": 0.4853, "step": 12664 }, { "epoch": 0.22014983747327435, "grad_norm": 2.7888862818569486, "learning_rate": 9.081572849449622e-07, "loss": 0.5074, "step": 12665 }, { "epoch": 0.22016722001077718, "grad_norm": 2.6643407300678326, "learning_rate": 9.081410250276964e-07, "loss": 0.3841, "step": 12666 }, { "epoch": 0.22018460254828, "grad_norm": 1.8810546911861243, "learning_rate": 9.08124763816811e-07, "loss": 0.4696, "step": 12667 }, { "epoch": 0.22020198508578281, "grad_norm": 2.5613610845720984, "learning_rate": 9.081085013123577e-07, "loss": 0.3365, "step": 12668 }, { "epoch": 0.22021936762328564, "grad_norm": 2.185689140482373, "learning_rate": 9.080922375143878e-07, "loss": 0.3364, "step": 12669 }, { "epoch": 0.22023675016078847, "grad_norm": 2.4425461375401, "learning_rate": 9.080759724229529e-07, "loss": 0.2538, "step": 12670 }, { "epoch": 0.2202541326982913, "grad_norm": 1.3715091950587663, "learning_rate": 9.080597060381047e-07, "loss": 0.3265, "step": 12671 }, { "epoch": 0.2202715152357941, "grad_norm": 2.8842852519078748, "learning_rate": 9.080434383598947e-07, "loss": 0.4444, "step": 12672 }, { "epoch": 0.22028889777329694, "grad_norm": 2.078433580432042, "learning_rate": 9.080271693883744e-07, "loss": 0.2997, "step": 12673 }, { "epoch": 0.22030628031079977, "grad_norm": 1.5614245949665901, "learning_rate": 9.080108991235953e-07, "loss": 0.3422, "step": 12674 }, { "epoch": 0.2203236628483026, "grad_norm": 2.0079528872898846, "learning_rate": 9.079946275656093e-07, "loss": 0.3399, "step": 12675 }, { "epoch": 0.22034104538580543, "grad_norm": 2.552185496305348, "learning_rate": 9.079783547144676e-07, "loss": 0.3296, "step": 12676 }, { "epoch": 0.22035842792330823, "grad_norm": 3.9486167304149506, "learning_rate": 9.07962080570222e-07, "loss": 0.3283, "step": 12677 }, { "epoch": 0.22037581046081106, "grad_norm": 1.0971628831865883, "learning_rate": 9.07945805132924e-07, "loss": 0.1555, "step": 12678 }, { "epoch": 0.2203931929983139, "grad_norm": 2.4320972240867205, "learning_rate": 9.079295284026251e-07, "loss": 0.4569, "step": 12679 }, { "epoch": 0.22041057553581672, "grad_norm": 2.1196479650050604, "learning_rate": 9.079132503793771e-07, "loss": 0.4173, "step": 12680 }, { "epoch": 0.22042795807331955, "grad_norm": 2.9802591489243087, "learning_rate": 9.078969710632314e-07, "loss": 0.3687, "step": 12681 }, { "epoch": 0.22044534061082235, "grad_norm": 1.7739089293404857, "learning_rate": 9.078806904542397e-07, "loss": 0.2817, "step": 12682 }, { "epoch": 0.22046272314832518, "grad_norm": 1.2545633239005285, "learning_rate": 9.078644085524535e-07, "loss": 0.2045, "step": 12683 }, { "epoch": 0.22048010568582802, "grad_norm": 1.5139664827946606, "learning_rate": 9.078481253579247e-07, "loss": 0.3737, "step": 12684 }, { "epoch": 0.22049748822333085, "grad_norm": 1.6401839916562866, "learning_rate": 9.078318408707044e-07, "loss": 0.3538, "step": 12685 }, { "epoch": 0.22051487076083368, "grad_norm": 2.0266232503935644, "learning_rate": 9.078155550908446e-07, "loss": 0.3664, "step": 12686 }, { "epoch": 0.22053225329833648, "grad_norm": 1.6867903116296443, "learning_rate": 9.07799268018397e-07, "loss": 0.4616, "step": 12687 }, { "epoch": 0.2205496358358393, "grad_norm": 2.2355197668161657, "learning_rate": 9.077829796534129e-07, "loss": 0.2815, "step": 12688 }, { "epoch": 0.22056701837334214, "grad_norm": 2.021977743887627, "learning_rate": 9.077666899959441e-07, "loss": 0.2964, "step": 12689 }, { "epoch": 0.22058440091084497, "grad_norm": 1.60902450852381, "learning_rate": 9.077503990460423e-07, "loss": 0.2239, "step": 12690 }, { "epoch": 0.2206017834483478, "grad_norm": 1.7692196907522846, "learning_rate": 9.077341068037588e-07, "loss": 0.3552, "step": 12691 }, { "epoch": 0.2206191659858506, "grad_norm": 1.4114213531172675, "learning_rate": 9.077178132691456e-07, "loss": 0.4411, "step": 12692 }, { "epoch": 0.22063654852335343, "grad_norm": 2.744459650332732, "learning_rate": 9.077015184422543e-07, "loss": 0.3106, "step": 12693 }, { "epoch": 0.22065393106085626, "grad_norm": 1.706873385113337, "learning_rate": 9.076852223231364e-07, "loss": 0.4049, "step": 12694 }, { "epoch": 0.2206713135983591, "grad_norm": 2.1386948051320096, "learning_rate": 9.076689249118436e-07, "loss": 0.2799, "step": 12695 }, { "epoch": 0.22068869613586192, "grad_norm": 2.3101866497531356, "learning_rate": 9.076526262084276e-07, "loss": 0.4497, "step": 12696 }, { "epoch": 0.22070607867336473, "grad_norm": 24.4537594778201, "learning_rate": 9.076363262129402e-07, "loss": 0.5496, "step": 12697 }, { "epoch": 0.22072346121086756, "grad_norm": 1.368416464414717, "learning_rate": 9.076200249254326e-07, "loss": 0.2108, "step": 12698 }, { "epoch": 0.22074084374837039, "grad_norm": 2.8905079239232285, "learning_rate": 9.07603722345957e-07, "loss": 0.4892, "step": 12699 }, { "epoch": 0.22075822628587322, "grad_norm": 1.5284654215665714, "learning_rate": 9.075874184745648e-07, "loss": 0.1926, "step": 12700 }, { "epoch": 0.22077560882337605, "grad_norm": 3.4518251865176004, "learning_rate": 9.075711133113076e-07, "loss": 0.4527, "step": 12701 }, { "epoch": 0.22079299136087885, "grad_norm": 2.6434138818154915, "learning_rate": 9.075548068562373e-07, "loss": 0.2977, "step": 12702 }, { "epoch": 0.22081037389838168, "grad_norm": 2.4181779354663115, "learning_rate": 9.075384991094055e-07, "loss": 0.4721, "step": 12703 }, { "epoch": 0.2208277564358845, "grad_norm": 2.0685891560664693, "learning_rate": 9.075221900708639e-07, "loss": 0.3628, "step": 12704 }, { "epoch": 0.22084513897338734, "grad_norm": 1.7622567360983115, "learning_rate": 9.07505879740664e-07, "loss": 0.3192, "step": 12705 }, { "epoch": 0.22086252151089017, "grad_norm": 2.042369797125241, "learning_rate": 9.074895681188577e-07, "loss": 0.2789, "step": 12706 }, { "epoch": 0.22087990404839297, "grad_norm": 1.4290250532239335, "learning_rate": 9.074732552054967e-07, "loss": 0.2488, "step": 12707 }, { "epoch": 0.2208972865858958, "grad_norm": 1.889071146618978, "learning_rate": 9.074569410006325e-07, "loss": 0.3931, "step": 12708 }, { "epoch": 0.22091466912339863, "grad_norm": 1.4844386846167053, "learning_rate": 9.074406255043172e-07, "loss": 0.2872, "step": 12709 }, { "epoch": 0.22093205166090146, "grad_norm": 1.7658538868260272, "learning_rate": 9.074243087166021e-07, "loss": 0.3346, "step": 12710 }, { "epoch": 0.2209494341984043, "grad_norm": 2.164363786418397, "learning_rate": 9.074079906375391e-07, "loss": 0.2358, "step": 12711 }, { "epoch": 0.2209668167359071, "grad_norm": 2.3495985717035524, "learning_rate": 9.073916712671801e-07, "loss": 0.4188, "step": 12712 }, { "epoch": 0.22098419927340993, "grad_norm": 2.736697429149727, "learning_rate": 9.073753506055766e-07, "loss": 0.2828, "step": 12713 }, { "epoch": 0.22100158181091276, "grad_norm": 1.4074184423461413, "learning_rate": 9.073590286527801e-07, "loss": 0.2708, "step": 12714 }, { "epoch": 0.2210189643484156, "grad_norm": 3.1901440925106823, "learning_rate": 9.07342705408843e-07, "loss": 0.3594, "step": 12715 }, { "epoch": 0.22103634688591842, "grad_norm": 1.3382195049732326, "learning_rate": 9.073263808738163e-07, "loss": 0.1962, "step": 12716 }, { "epoch": 0.22105372942342122, "grad_norm": 6.193225721281993, "learning_rate": 9.073100550477523e-07, "loss": 0.3574, "step": 12717 }, { "epoch": 0.22107111196092405, "grad_norm": 1.5965176036235018, "learning_rate": 9.072937279307024e-07, "loss": 0.1842, "step": 12718 }, { "epoch": 0.22108849449842688, "grad_norm": 1.4909319972970145, "learning_rate": 9.072773995227187e-07, "loss": 0.3509, "step": 12719 }, { "epoch": 0.2211058770359297, "grad_norm": 1.3029316483603257, "learning_rate": 9.072610698238525e-07, "loss": 0.2667, "step": 12720 }, { "epoch": 0.22112325957343254, "grad_norm": 1.6999160391849226, "learning_rate": 9.072447388341559e-07, "loss": 0.2868, "step": 12721 }, { "epoch": 0.22114064211093534, "grad_norm": 2.1298556350695335, "learning_rate": 9.072284065536803e-07, "loss": 0.4755, "step": 12722 }, { "epoch": 0.22115802464843817, "grad_norm": 1.5807713626008189, "learning_rate": 9.072120729824781e-07, "loss": 0.2835, "step": 12723 }, { "epoch": 0.221175407185941, "grad_norm": 2.541225933952539, "learning_rate": 9.071957381206006e-07, "loss": 0.2575, "step": 12724 }, { "epoch": 0.22119278972344383, "grad_norm": 4.211990468351535, "learning_rate": 9.071794019680995e-07, "loss": 0.2624, "step": 12725 }, { "epoch": 0.22121017226094666, "grad_norm": 1.668129022213025, "learning_rate": 9.071630645250267e-07, "loss": 0.4278, "step": 12726 }, { "epoch": 0.22122755479844947, "grad_norm": 2.952729981770618, "learning_rate": 9.071467257914343e-07, "loss": 0.4892, "step": 12727 }, { "epoch": 0.2212449373359523, "grad_norm": 1.6188079677421345, "learning_rate": 9.071303857673736e-07, "loss": 0.322, "step": 12728 }, { "epoch": 0.22126231987345513, "grad_norm": 1.173688220620258, "learning_rate": 9.071140444528966e-07, "loss": 0.3359, "step": 12729 }, { "epoch": 0.22127970241095796, "grad_norm": 1.7015326508985984, "learning_rate": 9.070977018480551e-07, "loss": 0.2362, "step": 12730 }, { "epoch": 0.2212970849484608, "grad_norm": 1.41848413239014, "learning_rate": 9.070813579529009e-07, "loss": 0.2483, "step": 12731 }, { "epoch": 0.2213144674859636, "grad_norm": 4.25774684479896, "learning_rate": 9.07065012767486e-07, "loss": 0.3083, "step": 12732 }, { "epoch": 0.22133185002346642, "grad_norm": 2.199284488360941, "learning_rate": 9.070486662918617e-07, "loss": 0.2994, "step": 12733 }, { "epoch": 0.22134923256096925, "grad_norm": 1.5062775746434722, "learning_rate": 9.070323185260802e-07, "loss": 0.2927, "step": 12734 }, { "epoch": 0.22136661509847208, "grad_norm": 1.3085127112213064, "learning_rate": 9.070159694701934e-07, "loss": 0.2818, "step": 12735 }, { "epoch": 0.2213839976359749, "grad_norm": 2.2523716643832348, "learning_rate": 9.069996191242527e-07, "loss": 0.4856, "step": 12736 }, { "epoch": 0.2214013801734777, "grad_norm": 1.2392258618111829, "learning_rate": 9.069832674883102e-07, "loss": 0.2536, "step": 12737 }, { "epoch": 0.22141876271098054, "grad_norm": 1.3007182557790142, "learning_rate": 9.069669145624178e-07, "loss": 0.3261, "step": 12738 }, { "epoch": 0.22143614524848337, "grad_norm": 1.3985747497363459, "learning_rate": 9.069505603466272e-07, "loss": 0.3697, "step": 12739 }, { "epoch": 0.2214535277859862, "grad_norm": 2.1049354227993233, "learning_rate": 9.069342048409903e-07, "loss": 0.3491, "step": 12740 }, { "epoch": 0.22147091032348903, "grad_norm": 2.1399841860363793, "learning_rate": 9.069178480455588e-07, "loss": 0.6955, "step": 12741 }, { "epoch": 0.22148829286099184, "grad_norm": 1.980704979347775, "learning_rate": 9.069014899603846e-07, "loss": 0.56, "step": 12742 }, { "epoch": 0.22150567539849467, "grad_norm": 1.879369638424459, "learning_rate": 9.068851305855199e-07, "loss": 0.2019, "step": 12743 }, { "epoch": 0.2215230579359975, "grad_norm": 2.2287367342496354, "learning_rate": 9.068687699210159e-07, "loss": 0.3189, "step": 12744 }, { "epoch": 0.22154044047350033, "grad_norm": 1.4752782095928865, "learning_rate": 9.068524079669248e-07, "loss": 0.1813, "step": 12745 }, { "epoch": 0.22155782301100316, "grad_norm": 2.738999533602266, "learning_rate": 9.068360447232987e-07, "loss": 0.3725, "step": 12746 }, { "epoch": 0.22157520554850596, "grad_norm": 1.6004993266469252, "learning_rate": 9.06819680190189e-07, "loss": 0.3909, "step": 12747 }, { "epoch": 0.2215925880860088, "grad_norm": 1.278009301336801, "learning_rate": 9.06803314367648e-07, "loss": 0.2121, "step": 12748 }, { "epoch": 0.22160997062351162, "grad_norm": 1.816864538189791, "learning_rate": 9.067869472557272e-07, "loss": 0.3418, "step": 12749 }, { "epoch": 0.22162735316101445, "grad_norm": 1.6708081304173428, "learning_rate": 9.067705788544788e-07, "loss": 0.3289, "step": 12750 }, { "epoch": 0.22164473569851728, "grad_norm": 3.1758498460023956, "learning_rate": 9.067542091639544e-07, "loss": 0.3662, "step": 12751 }, { "epoch": 0.22166211823602008, "grad_norm": 2.4345207207202098, "learning_rate": 9.067378381842061e-07, "loss": 0.2779, "step": 12752 }, { "epoch": 0.22167950077352291, "grad_norm": 1.6676408402655265, "learning_rate": 9.067214659152856e-07, "loss": 0.1867, "step": 12753 }, { "epoch": 0.22169688331102574, "grad_norm": 2.5383794606180623, "learning_rate": 9.067050923572449e-07, "loss": 0.2135, "step": 12754 }, { "epoch": 0.22171426584852857, "grad_norm": 2.6674660224252915, "learning_rate": 9.066887175101358e-07, "loss": 0.7507, "step": 12755 }, { "epoch": 0.2217316483860314, "grad_norm": 2.7231221244301986, "learning_rate": 9.066723413740105e-07, "loss": 0.4988, "step": 12756 }, { "epoch": 0.2217490309235342, "grad_norm": 1.4077878379949609, "learning_rate": 9.066559639489206e-07, "loss": 0.1842, "step": 12757 }, { "epoch": 0.22176641346103704, "grad_norm": 1.7637899990006518, "learning_rate": 9.06639585234918e-07, "loss": 0.1988, "step": 12758 }, { "epoch": 0.22178379599853987, "grad_norm": 1.9207285142124917, "learning_rate": 9.066232052320549e-07, "loss": 0.169, "step": 12759 }, { "epoch": 0.2218011785360427, "grad_norm": 2.4861782567310375, "learning_rate": 9.066068239403828e-07, "loss": 0.4522, "step": 12760 }, { "epoch": 0.22181856107354553, "grad_norm": 1.3039961105726616, "learning_rate": 9.065904413599539e-07, "loss": 0.2818, "step": 12761 }, { "epoch": 0.22183594361104833, "grad_norm": 1.3815971618497878, "learning_rate": 9.065740574908201e-07, "loss": 0.3116, "step": 12762 }, { "epoch": 0.22185332614855116, "grad_norm": 1.2772224939057253, "learning_rate": 9.065576723330332e-07, "loss": 0.1621, "step": 12763 }, { "epoch": 0.221870708686054, "grad_norm": 1.1656995968798074, "learning_rate": 9.065412858866455e-07, "loss": 0.3309, "step": 12764 }, { "epoch": 0.22188809122355682, "grad_norm": 2.91870260146852, "learning_rate": 9.065248981517086e-07, "loss": 0.4217, "step": 12765 }, { "epoch": 0.22190547376105965, "grad_norm": 1.6886922900486867, "learning_rate": 9.065085091282743e-07, "loss": 0.5432, "step": 12766 }, { "epoch": 0.22192285629856245, "grad_norm": 2.9134664742091805, "learning_rate": 9.064921188163949e-07, "loss": 0.5385, "step": 12767 }, { "epoch": 0.22194023883606528, "grad_norm": 1.9356041503838715, "learning_rate": 9.064757272161223e-07, "loss": 0.232, "step": 12768 }, { "epoch": 0.22195762137356811, "grad_norm": 1.267375063709476, "learning_rate": 9.064593343275083e-07, "loss": 0.2235, "step": 12769 }, { "epoch": 0.22197500391107094, "grad_norm": 1.6973530436320792, "learning_rate": 9.064429401506048e-07, "loss": 0.303, "step": 12770 }, { "epoch": 0.22199238644857378, "grad_norm": 1.2941929646677917, "learning_rate": 9.064265446854641e-07, "loss": 0.3094, "step": 12771 }, { "epoch": 0.22200976898607658, "grad_norm": 2.2253595157620687, "learning_rate": 9.064101479321378e-07, "loss": 0.3697, "step": 12772 }, { "epoch": 0.2220271515235794, "grad_norm": 1.8153187401134407, "learning_rate": 9.063937498906781e-07, "loss": 0.3584, "step": 12773 }, { "epoch": 0.22204453406108224, "grad_norm": 1.2891509256541733, "learning_rate": 9.063773505611367e-07, "loss": 0.3527, "step": 12774 }, { "epoch": 0.22206191659858507, "grad_norm": 1.9763531245611587, "learning_rate": 9.06360949943566e-07, "loss": 0.3535, "step": 12775 }, { "epoch": 0.2220792991360879, "grad_norm": 1.7971674503346546, "learning_rate": 9.063445480380177e-07, "loss": 0.2867, "step": 12776 }, { "epoch": 0.2220966816735907, "grad_norm": 1.8109158090502617, "learning_rate": 9.063281448445439e-07, "loss": 0.2937, "step": 12777 }, { "epoch": 0.22211406421109353, "grad_norm": 1.460935020063827, "learning_rate": 9.063117403631963e-07, "loss": 0.3026, "step": 12778 }, { "epoch": 0.22213144674859636, "grad_norm": 1.3494400172165428, "learning_rate": 9.062953345940274e-07, "loss": 0.288, "step": 12779 }, { "epoch": 0.2221488292860992, "grad_norm": 2.964426887488585, "learning_rate": 9.062789275370888e-07, "loss": 0.4236, "step": 12780 }, { "epoch": 0.22216621182360202, "grad_norm": 2.6741939675755475, "learning_rate": 9.062625191924326e-07, "loss": 0.3574, "step": 12781 }, { "epoch": 0.22218359436110482, "grad_norm": 1.7114801349122433, "learning_rate": 9.062461095601108e-07, "loss": 0.3626, "step": 12782 }, { "epoch": 0.22220097689860765, "grad_norm": 1.217134574439497, "learning_rate": 9.062296986401756e-07, "loss": 0.1239, "step": 12783 }, { "epoch": 0.22221835943611049, "grad_norm": 4.319514524443684, "learning_rate": 9.062132864326789e-07, "loss": 0.4425, "step": 12784 }, { "epoch": 0.22223574197361332, "grad_norm": 1.6643029251148722, "learning_rate": 9.061968729376726e-07, "loss": 0.2335, "step": 12785 }, { "epoch": 0.22225312451111615, "grad_norm": 2.2212098199329917, "learning_rate": 9.061804581552086e-07, "loss": 0.3398, "step": 12786 }, { "epoch": 0.22227050704861895, "grad_norm": 2.341670408554659, "learning_rate": 9.061640420853393e-07, "loss": 0.305, "step": 12787 }, { "epoch": 0.22228788958612178, "grad_norm": 1.6775192764402587, "learning_rate": 9.061476247281166e-07, "loss": 0.2367, "step": 12788 }, { "epoch": 0.2223052721236246, "grad_norm": 3.7427296112131017, "learning_rate": 9.061312060835925e-07, "loss": 0.3856, "step": 12789 }, { "epoch": 0.22232265466112744, "grad_norm": 2.4915017720405412, "learning_rate": 9.061147861518191e-07, "loss": 0.4504, "step": 12790 }, { "epoch": 0.22234003719863027, "grad_norm": 1.6262929619939401, "learning_rate": 9.060983649328482e-07, "loss": 0.397, "step": 12791 }, { "epoch": 0.22235741973613307, "grad_norm": 2.44951282111718, "learning_rate": 9.06081942426732e-07, "loss": 0.3715, "step": 12792 }, { "epoch": 0.2223748022736359, "grad_norm": 1.5886539234804493, "learning_rate": 9.060655186335228e-07, "loss": 0.3597, "step": 12793 }, { "epoch": 0.22239218481113873, "grad_norm": 1.5821594268958628, "learning_rate": 9.060490935532723e-07, "loss": 0.3026, "step": 12794 }, { "epoch": 0.22240956734864156, "grad_norm": 2.1527629720305743, "learning_rate": 9.060326671860328e-07, "loss": 0.3603, "step": 12795 }, { "epoch": 0.2224269498861444, "grad_norm": 2.3631218984619107, "learning_rate": 9.060162395318561e-07, "loss": 0.4053, "step": 12796 }, { "epoch": 0.2224443324236472, "grad_norm": 2.4169263274455295, "learning_rate": 9.059998105907945e-07, "loss": 0.3938, "step": 12797 }, { "epoch": 0.22246171496115003, "grad_norm": 2.9779972402227646, "learning_rate": 9.059833803629e-07, "loss": 0.3506, "step": 12798 }, { "epoch": 0.22247909749865286, "grad_norm": 2.4900001498733744, "learning_rate": 9.059669488482248e-07, "loss": 0.5589, "step": 12799 }, { "epoch": 0.22249648003615569, "grad_norm": 1.8804338706554355, "learning_rate": 9.059505160468208e-07, "loss": 0.3196, "step": 12800 }, { "epoch": 0.22251386257365852, "grad_norm": 4.884228511748782, "learning_rate": 9.0593408195874e-07, "loss": 0.3686, "step": 12801 }, { "epoch": 0.22253124511116132, "grad_norm": 1.24412936482368, "learning_rate": 9.059176465840348e-07, "loss": 0.4087, "step": 12802 }, { "epoch": 0.22254862764866415, "grad_norm": 1.2296533398289333, "learning_rate": 9.05901209922757e-07, "loss": 0.2105, "step": 12803 }, { "epoch": 0.22256601018616698, "grad_norm": 2.2626335434350766, "learning_rate": 9.058847719749589e-07, "loss": 0.4434, "step": 12804 }, { "epoch": 0.2225833927236698, "grad_norm": 1.77419632397247, "learning_rate": 9.058683327406924e-07, "loss": 0.3125, "step": 12805 }, { "epoch": 0.22260077526117264, "grad_norm": 2.2416610526379923, "learning_rate": 9.058518922200099e-07, "loss": 0.3502, "step": 12806 }, { "epoch": 0.22261815779867544, "grad_norm": 4.869128190013135, "learning_rate": 9.058354504129633e-07, "loss": 0.489, "step": 12807 }, { "epoch": 0.22263554033617827, "grad_norm": 1.3641964812070009, "learning_rate": 9.058190073196046e-07, "loss": 0.3937, "step": 12808 }, { "epoch": 0.2226529228736811, "grad_norm": 2.375249296299405, "learning_rate": 9.058025629399862e-07, "loss": 0.279, "step": 12809 }, { "epoch": 0.22267030541118393, "grad_norm": 1.8697611282288569, "learning_rate": 9.057861172741601e-07, "loss": 0.2718, "step": 12810 }, { "epoch": 0.22268768794868674, "grad_norm": 1.2564531356072033, "learning_rate": 9.057696703221784e-07, "loss": 0.1799, "step": 12811 }, { "epoch": 0.22270507048618957, "grad_norm": 2.1765449926750953, "learning_rate": 9.057532220840931e-07, "loss": 0.3639, "step": 12812 }, { "epoch": 0.2227224530236924, "grad_norm": 1.4046257921553953, "learning_rate": 9.057367725599566e-07, "loss": 0.2717, "step": 12813 }, { "epoch": 0.22273983556119523, "grad_norm": 2.3128195406963674, "learning_rate": 9.057203217498209e-07, "loss": 0.2351, "step": 12814 }, { "epoch": 0.22275721809869806, "grad_norm": 2.050515488761335, "learning_rate": 9.057038696537381e-07, "loss": 0.2886, "step": 12815 }, { "epoch": 0.22277460063620086, "grad_norm": 2.633991626541585, "learning_rate": 9.056874162717605e-07, "loss": 0.7585, "step": 12816 }, { "epoch": 0.2227919831737037, "grad_norm": 2.1294399965547726, "learning_rate": 9.056709616039401e-07, "loss": 0.436, "step": 12817 }, { "epoch": 0.22280936571120652, "grad_norm": 2.2281059321243477, "learning_rate": 9.056545056503291e-07, "loss": 0.2982, "step": 12818 }, { "epoch": 0.22282674824870935, "grad_norm": 1.5103293193790743, "learning_rate": 9.056380484109796e-07, "loss": 0.1993, "step": 12819 }, { "epoch": 0.22284413078621218, "grad_norm": 2.7705866965208266, "learning_rate": 9.056215898859439e-07, "loss": 0.3245, "step": 12820 }, { "epoch": 0.22286151332371498, "grad_norm": 2.0457207461532954, "learning_rate": 9.056051300752741e-07, "loss": 0.3217, "step": 12821 }, { "epoch": 0.2228788958612178, "grad_norm": 0.9067019621272272, "learning_rate": 9.055886689790222e-07, "loss": 0.358, "step": 12822 }, { "epoch": 0.22289627839872064, "grad_norm": 2.036479674451454, "learning_rate": 9.055722065972406e-07, "loss": 0.3789, "step": 12823 }, { "epoch": 0.22291366093622347, "grad_norm": 1.1534347709228874, "learning_rate": 9.055557429299816e-07, "loss": 0.3863, "step": 12824 }, { "epoch": 0.2229310434737263, "grad_norm": 1.6706396932828638, "learning_rate": 9.05539277977297e-07, "loss": 0.3725, "step": 12825 }, { "epoch": 0.2229484260112291, "grad_norm": 1.519031474404575, "learning_rate": 9.055228117392393e-07, "loss": 0.2901, "step": 12826 }, { "epoch": 0.22296580854873194, "grad_norm": 1.369770665914845, "learning_rate": 9.055063442158605e-07, "loss": 0.3514, "step": 12827 }, { "epoch": 0.22298319108623477, "grad_norm": 2.064784671294186, "learning_rate": 9.054898754072127e-07, "loss": 0.2516, "step": 12828 }, { "epoch": 0.2230005736237376, "grad_norm": 2.0966151342192787, "learning_rate": 9.054734053133484e-07, "loss": 0.5026, "step": 12829 }, { "epoch": 0.22301795616124043, "grad_norm": 1.4511314818874999, "learning_rate": 9.054569339343199e-07, "loss": 0.396, "step": 12830 }, { "epoch": 0.22303533869874323, "grad_norm": 1.1101357766844955, "learning_rate": 9.054404612701788e-07, "loss": 0.2924, "step": 12831 }, { "epoch": 0.22305272123624606, "grad_norm": 1.4270480943195516, "learning_rate": 9.054239873209779e-07, "loss": 0.2242, "step": 12832 }, { "epoch": 0.2230701037737489, "grad_norm": 2.270153791832498, "learning_rate": 9.054075120867691e-07, "loss": 0.3846, "step": 12833 }, { "epoch": 0.22308748631125172, "grad_norm": 1.6781513626725586, "learning_rate": 9.053910355676048e-07, "loss": 0.375, "step": 12834 }, { "epoch": 0.22310486884875455, "grad_norm": 1.4936296656202697, "learning_rate": 9.05374557763537e-07, "loss": 0.2939, "step": 12835 }, { "epoch": 0.22312225138625735, "grad_norm": 1.232359773849027, "learning_rate": 9.053580786746182e-07, "loss": 0.1832, "step": 12836 }, { "epoch": 0.22313963392376018, "grad_norm": 2.2618470737588803, "learning_rate": 9.053415983009005e-07, "loss": 0.5524, "step": 12837 }, { "epoch": 0.223157016461263, "grad_norm": 2.2370823759578413, "learning_rate": 9.05325116642436e-07, "loss": 0.6434, "step": 12838 }, { "epoch": 0.22317439899876584, "grad_norm": 2.443133254319765, "learning_rate": 9.053086336992773e-07, "loss": 0.442, "step": 12839 }, { "epoch": 0.22319178153626867, "grad_norm": 1.677801352083759, "learning_rate": 9.052921494714763e-07, "loss": 0.3335, "step": 12840 }, { "epoch": 0.22320916407377148, "grad_norm": 1.7708171809707258, "learning_rate": 9.052756639590853e-07, "loss": 0.6036, "step": 12841 }, { "epoch": 0.2232265466112743, "grad_norm": 2.641546657568151, "learning_rate": 9.052591771621565e-07, "loss": 0.5718, "step": 12842 }, { "epoch": 0.22324392914877714, "grad_norm": 2.25365810811133, "learning_rate": 9.052426890807426e-07, "loss": 0.4351, "step": 12843 }, { "epoch": 0.22326131168627997, "grad_norm": 2.858116934073793, "learning_rate": 9.052261997148954e-07, "loss": 0.3608, "step": 12844 }, { "epoch": 0.2232786942237828, "grad_norm": 3.6728764552941113, "learning_rate": 9.05209709064667e-07, "loss": 0.2706, "step": 12845 }, { "epoch": 0.2232960767612856, "grad_norm": 1.7441856605394712, "learning_rate": 9.051932171301102e-07, "loss": 0.3133, "step": 12846 }, { "epoch": 0.22331345929878843, "grad_norm": 1.562808905393816, "learning_rate": 9.051767239112771e-07, "loss": 0.3232, "step": 12847 }, { "epoch": 0.22333084183629126, "grad_norm": 2.4861873232211935, "learning_rate": 9.051602294082199e-07, "loss": 0.3504, "step": 12848 }, { "epoch": 0.2233482243737941, "grad_norm": 1.1870921350189476, "learning_rate": 9.051437336209908e-07, "loss": 0.3768, "step": 12849 }, { "epoch": 0.22336560691129692, "grad_norm": 1.9721203898425141, "learning_rate": 9.051272365496422e-07, "loss": 0.3779, "step": 12850 }, { "epoch": 0.22338298944879972, "grad_norm": 3.890231449885407, "learning_rate": 9.051107381942264e-07, "loss": 0.3296, "step": 12851 }, { "epoch": 0.22340037198630255, "grad_norm": 1.756923643236456, "learning_rate": 9.050942385547954e-07, "loss": 0.2744, "step": 12852 }, { "epoch": 0.22341775452380538, "grad_norm": 2.488484507841156, "learning_rate": 9.05077737631402e-07, "loss": 0.439, "step": 12853 }, { "epoch": 0.22343513706130821, "grad_norm": 3.6142753464722244, "learning_rate": 9.050612354240983e-07, "loss": 0.4559, "step": 12854 }, { "epoch": 0.22345251959881104, "grad_norm": 1.3456366669549047, "learning_rate": 9.050447319329363e-07, "loss": 0.2862, "step": 12855 }, { "epoch": 0.22346990213631385, "grad_norm": 2.005475025239102, "learning_rate": 9.050282271579689e-07, "loss": 0.4423, "step": 12856 }, { "epoch": 0.22348728467381668, "grad_norm": 2.2860323176762556, "learning_rate": 9.050117210992479e-07, "loss": 0.2876, "step": 12857 }, { "epoch": 0.2235046672113195, "grad_norm": 1.6622613671214048, "learning_rate": 9.049952137568258e-07, "loss": 0.4058, "step": 12858 }, { "epoch": 0.22352204974882234, "grad_norm": 1.275378017095903, "learning_rate": 9.04978705130755e-07, "loss": 0.2732, "step": 12859 }, { "epoch": 0.22353943228632517, "grad_norm": 1.7945282494728343, "learning_rate": 9.049621952210876e-07, "loss": 0.5074, "step": 12860 }, { "epoch": 0.22355681482382797, "grad_norm": 0.9319570567565376, "learning_rate": 9.04945684027876e-07, "loss": 0.2628, "step": 12861 }, { "epoch": 0.2235741973613308, "grad_norm": 4.123796765045951, "learning_rate": 9.049291715511727e-07, "loss": 0.2461, "step": 12862 }, { "epoch": 0.22359157989883363, "grad_norm": 1.9101656365368065, "learning_rate": 9.0491265779103e-07, "loss": 0.3424, "step": 12863 }, { "epoch": 0.22360896243633646, "grad_norm": 3.0618259525575393, "learning_rate": 9.048961427475001e-07, "loss": 0.4249, "step": 12864 }, { "epoch": 0.2236263449738393, "grad_norm": 2.5959151640087743, "learning_rate": 9.048796264206354e-07, "loss": 0.3624, "step": 12865 }, { "epoch": 0.2236437275113421, "grad_norm": 3.472810799932377, "learning_rate": 9.048631088104884e-07, "loss": 0.4714, "step": 12866 }, { "epoch": 0.22366111004884492, "grad_norm": 1.808412847727875, "learning_rate": 9.048465899171112e-07, "loss": 0.5735, "step": 12867 }, { "epoch": 0.22367849258634775, "grad_norm": 2.2205615547633766, "learning_rate": 9.048300697405563e-07, "loss": 0.2829, "step": 12868 }, { "epoch": 0.22369587512385058, "grad_norm": 2.867028489724941, "learning_rate": 9.04813548280876e-07, "loss": 0.4782, "step": 12869 }, { "epoch": 0.22371325766135342, "grad_norm": 1.5422371307497356, "learning_rate": 9.047970255381228e-07, "loss": 0.2774, "step": 12870 }, { "epoch": 0.22373064019885622, "grad_norm": 2.191544141870278, "learning_rate": 9.047805015123489e-07, "loss": 0.369, "step": 12871 }, { "epoch": 0.22374802273635905, "grad_norm": 2.084951128590352, "learning_rate": 9.047639762036068e-07, "loss": 0.3144, "step": 12872 }, { "epoch": 0.22376540527386188, "grad_norm": 2.0771102306661233, "learning_rate": 9.04747449611949e-07, "loss": 0.3955, "step": 12873 }, { "epoch": 0.2237827878113647, "grad_norm": 1.6088577807305633, "learning_rate": 9.047309217374274e-07, "loss": 0.3605, "step": 12874 }, { "epoch": 0.22380017034886754, "grad_norm": 2.0652814092722074, "learning_rate": 9.047143925800949e-07, "loss": 0.2725, "step": 12875 }, { "epoch": 0.22381755288637034, "grad_norm": 1.9691785518460694, "learning_rate": 9.046978621400035e-07, "loss": 0.4158, "step": 12876 }, { "epoch": 0.22383493542387317, "grad_norm": 1.2179989839905605, "learning_rate": 9.04681330417206e-07, "loss": 0.2811, "step": 12877 }, { "epoch": 0.223852317961376, "grad_norm": 1.6331057773749904, "learning_rate": 9.046647974117543e-07, "loss": 0.2466, "step": 12878 }, { "epoch": 0.22386970049887883, "grad_norm": 2.884170903126685, "learning_rate": 9.046482631237014e-07, "loss": 0.4893, "step": 12879 }, { "epoch": 0.22388708303638166, "grad_norm": 2.20619209197269, "learning_rate": 9.046317275530993e-07, "loss": 0.3661, "step": 12880 }, { "epoch": 0.22390446557388446, "grad_norm": 1.6650268814990363, "learning_rate": 9.046151907000003e-07, "loss": 0.2202, "step": 12881 }, { "epoch": 0.2239218481113873, "grad_norm": 1.4044377699205035, "learning_rate": 9.045986525644572e-07, "loss": 0.2838, "step": 12882 }, { "epoch": 0.22393923064889013, "grad_norm": 2.7749163390196268, "learning_rate": 9.045821131465221e-07, "loss": 0.43, "step": 12883 }, { "epoch": 0.22395661318639296, "grad_norm": 1.7314515713900536, "learning_rate": 9.045655724462478e-07, "loss": 0.2986, "step": 12884 }, { "epoch": 0.22397399572389579, "grad_norm": 2.5320494450342377, "learning_rate": 9.045490304636862e-07, "loss": 0.2601, "step": 12885 }, { "epoch": 0.2239913782613986, "grad_norm": 4.561391163020712, "learning_rate": 9.045324871988901e-07, "loss": 0.2508, "step": 12886 }, { "epoch": 0.22400876079890142, "grad_norm": 1.8205145860537202, "learning_rate": 9.045159426519118e-07, "loss": 0.2732, "step": 12887 }, { "epoch": 0.22402614333640425, "grad_norm": 1.2590272122763784, "learning_rate": 9.044993968228039e-07, "loss": 0.3087, "step": 12888 }, { "epoch": 0.22404352587390708, "grad_norm": 3.0637007084996384, "learning_rate": 9.044828497116186e-07, "loss": 0.3595, "step": 12889 }, { "epoch": 0.2240609084114099, "grad_norm": 1.5831378876771383, "learning_rate": 9.044663013184086e-07, "loss": 0.4126, "step": 12890 }, { "epoch": 0.2240782909489127, "grad_norm": 2.123342554568259, "learning_rate": 9.044497516432262e-07, "loss": 0.4699, "step": 12891 }, { "epoch": 0.22409567348641554, "grad_norm": 4.32036519215289, "learning_rate": 9.044332006861237e-07, "loss": 0.53, "step": 12892 }, { "epoch": 0.22411305602391837, "grad_norm": 1.5946189975628426, "learning_rate": 9.044166484471539e-07, "loss": 0.396, "step": 12893 }, { "epoch": 0.2241304385614212, "grad_norm": 3.0522894948843566, "learning_rate": 9.04400094926369e-07, "loss": 0.3257, "step": 12894 }, { "epoch": 0.22414782109892403, "grad_norm": 1.1909574005574997, "learning_rate": 9.043835401238217e-07, "loss": 0.1768, "step": 12895 }, { "epoch": 0.22416520363642684, "grad_norm": 2.178448812160899, "learning_rate": 9.043669840395642e-07, "loss": 0.224, "step": 12896 }, { "epoch": 0.22418258617392967, "grad_norm": 2.222456516711709, "learning_rate": 9.043504266736493e-07, "loss": 0.2459, "step": 12897 }, { "epoch": 0.2241999687114325, "grad_norm": 1.5636060055182444, "learning_rate": 9.043338680261291e-07, "loss": 0.343, "step": 12898 }, { "epoch": 0.22421735124893533, "grad_norm": 2.543566234319488, "learning_rate": 9.043173080970563e-07, "loss": 0.2296, "step": 12899 }, { "epoch": 0.22423473378643816, "grad_norm": 3.746118149333843, "learning_rate": 9.043007468864834e-07, "loss": 0.3641, "step": 12900 }, { "epoch": 0.22425211632394096, "grad_norm": 1.5630593221852787, "learning_rate": 9.042841843944629e-07, "loss": 0.351, "step": 12901 }, { "epoch": 0.2242694988614438, "grad_norm": 3.2868773583838324, "learning_rate": 9.042676206210471e-07, "loss": 0.3609, "step": 12902 }, { "epoch": 0.22428688139894662, "grad_norm": 1.9544681102754697, "learning_rate": 9.042510555662888e-07, "loss": 0.2663, "step": 12903 }, { "epoch": 0.22430426393644945, "grad_norm": 2.0100822409843984, "learning_rate": 9.042344892302404e-07, "loss": 0.3, "step": 12904 }, { "epoch": 0.22432164647395228, "grad_norm": 1.6869474625123053, "learning_rate": 9.042179216129542e-07, "loss": 0.2881, "step": 12905 }, { "epoch": 0.22433902901145508, "grad_norm": 1.302574951482289, "learning_rate": 9.04201352714483e-07, "loss": 0.388, "step": 12906 }, { "epoch": 0.2243564115489579, "grad_norm": 1.2085864417585774, "learning_rate": 9.041847825348791e-07, "loss": 0.2706, "step": 12907 }, { "epoch": 0.22437379408646074, "grad_norm": 1.8182790505159474, "learning_rate": 9.041682110741951e-07, "loss": 0.2335, "step": 12908 }, { "epoch": 0.22439117662396357, "grad_norm": 2.8775452268768724, "learning_rate": 9.041516383324836e-07, "loss": 0.3115, "step": 12909 }, { "epoch": 0.2244085591614664, "grad_norm": 2.3622385443383185, "learning_rate": 9.041350643097969e-07, "loss": 0.4658, "step": 12910 }, { "epoch": 0.2244259416989692, "grad_norm": 1.599335011690066, "learning_rate": 9.041184890061879e-07, "loss": 0.2738, "step": 12911 }, { "epoch": 0.22444332423647204, "grad_norm": 1.5961962718070244, "learning_rate": 9.041019124217089e-07, "loss": 0.2614, "step": 12912 }, { "epoch": 0.22446070677397487, "grad_norm": 2.513045004423379, "learning_rate": 9.040853345564123e-07, "loss": 0.4987, "step": 12913 }, { "epoch": 0.2244780893114777, "grad_norm": 2.449565021115839, "learning_rate": 9.040687554103508e-07, "loss": 0.4383, "step": 12914 }, { "epoch": 0.22449547184898053, "grad_norm": 1.893838831658169, "learning_rate": 9.040521749835772e-07, "loss": 0.324, "step": 12915 }, { "epoch": 0.22451285438648333, "grad_norm": 1.3418123340877253, "learning_rate": 9.040355932761436e-07, "loss": 0.3053, "step": 12916 }, { "epoch": 0.22453023692398616, "grad_norm": 1.9141412866922387, "learning_rate": 9.040190102881029e-07, "loss": 0.2789, "step": 12917 }, { "epoch": 0.224547619461489, "grad_norm": 1.6434648173685884, "learning_rate": 9.040024260195074e-07, "loss": 0.388, "step": 12918 }, { "epoch": 0.22456500199899182, "grad_norm": 1.7712947804350891, "learning_rate": 9.039858404704099e-07, "loss": 0.4348, "step": 12919 }, { "epoch": 0.22458238453649465, "grad_norm": 2.7115780848050672, "learning_rate": 9.039692536408628e-07, "loss": 0.1531, "step": 12920 }, { "epoch": 0.22459976707399745, "grad_norm": 2.537113065245394, "learning_rate": 9.039526655309188e-07, "loss": 0.9095, "step": 12921 }, { "epoch": 0.22461714961150028, "grad_norm": 1.8647529891151042, "learning_rate": 9.039360761406304e-07, "loss": 0.3711, "step": 12922 }, { "epoch": 0.2246345321490031, "grad_norm": 1.7150671771169137, "learning_rate": 9.039194854700502e-07, "loss": 0.2464, "step": 12923 }, { "epoch": 0.22465191468650594, "grad_norm": 2.1021927827055267, "learning_rate": 9.039028935192307e-07, "loss": 0.4662, "step": 12924 }, { "epoch": 0.22466929722400877, "grad_norm": 1.4314089833920356, "learning_rate": 9.038863002882247e-07, "loss": 0.3543, "step": 12925 }, { "epoch": 0.22468667976151158, "grad_norm": 1.5637629240754314, "learning_rate": 9.038697057770846e-07, "loss": 0.293, "step": 12926 }, { "epoch": 0.2247040622990144, "grad_norm": 2.2543252340226094, "learning_rate": 9.038531099858628e-07, "loss": 0.4514, "step": 12927 }, { "epoch": 0.22472144483651724, "grad_norm": 2.8399553890589817, "learning_rate": 9.038365129146125e-07, "loss": 0.4556, "step": 12928 }, { "epoch": 0.22473882737402007, "grad_norm": 1.7226863634613394, "learning_rate": 9.038199145633858e-07, "loss": 0.3936, "step": 12929 }, { "epoch": 0.2247562099115229, "grad_norm": 4.10888133719302, "learning_rate": 9.038033149322354e-07, "loss": 0.4484, "step": 12930 }, { "epoch": 0.2247735924490257, "grad_norm": 1.1175940493066132, "learning_rate": 9.037867140212143e-07, "loss": 0.3501, "step": 12931 }, { "epoch": 0.22479097498652853, "grad_norm": 2.037078604172408, "learning_rate": 9.037701118303746e-07, "loss": 0.3852, "step": 12932 }, { "epoch": 0.22480835752403136, "grad_norm": 2.0976053001525363, "learning_rate": 9.037535083597691e-07, "loss": 0.1776, "step": 12933 }, { "epoch": 0.2248257400615342, "grad_norm": 1.7515888076036004, "learning_rate": 9.037369036094504e-07, "loss": 0.4519, "step": 12934 }, { "epoch": 0.22484312259903702, "grad_norm": 2.246619263574565, "learning_rate": 9.037202975794713e-07, "loss": 0.3134, "step": 12935 }, { "epoch": 0.22486050513653982, "grad_norm": 2.549908763073398, "learning_rate": 9.037036902698842e-07, "loss": 0.3907, "step": 12936 }, { "epoch": 0.22487788767404265, "grad_norm": 1.4319319568625035, "learning_rate": 9.03687081680742e-07, "loss": 0.2439, "step": 12937 }, { "epoch": 0.22489527021154548, "grad_norm": 1.7723795808283611, "learning_rate": 9.036704718120971e-07, "loss": 0.372, "step": 12938 }, { "epoch": 0.22491265274904831, "grad_norm": 1.5944371148125065, "learning_rate": 9.036538606640022e-07, "loss": 0.3204, "step": 12939 }, { "epoch": 0.22493003528655114, "grad_norm": 1.6074382153230855, "learning_rate": 9.0363724823651e-07, "loss": 0.2971, "step": 12940 }, { "epoch": 0.22494741782405395, "grad_norm": 1.7098070818233921, "learning_rate": 9.036206345296731e-07, "loss": 0.1643, "step": 12941 }, { "epoch": 0.22496480036155678, "grad_norm": 2.336743713207906, "learning_rate": 9.036040195435444e-07, "loss": 0.2758, "step": 12942 }, { "epoch": 0.2249821828990596, "grad_norm": 3.160789864865356, "learning_rate": 9.035874032781761e-07, "loss": 0.3314, "step": 12943 }, { "epoch": 0.22499956543656244, "grad_norm": 1.368412436638413, "learning_rate": 9.035707857336212e-07, "loss": 0.3409, "step": 12944 }, { "epoch": 0.22501694797406527, "grad_norm": 1.5792636920449994, "learning_rate": 9.035541669099323e-07, "loss": 0.272, "step": 12945 }, { "epoch": 0.22503433051156807, "grad_norm": 1.666934826109859, "learning_rate": 9.035375468071621e-07, "loss": 0.2103, "step": 12946 }, { "epoch": 0.2250517130490709, "grad_norm": 1.845802353606976, "learning_rate": 9.035209254253633e-07, "loss": 0.2598, "step": 12947 }, { "epoch": 0.22506909558657373, "grad_norm": 1.5508720233661881, "learning_rate": 9.035043027645884e-07, "loss": 0.3392, "step": 12948 }, { "epoch": 0.22508647812407656, "grad_norm": 2.6320045475749927, "learning_rate": 9.034876788248903e-07, "loss": 0.2765, "step": 12949 }, { "epoch": 0.22510386066157936, "grad_norm": 2.0872205910655968, "learning_rate": 9.034710536063217e-07, "loss": 0.528, "step": 12950 }, { "epoch": 0.2251212431990822, "grad_norm": 3.622863408896813, "learning_rate": 9.034544271089351e-07, "loss": 0.3845, "step": 12951 }, { "epoch": 0.22513862573658502, "grad_norm": 2.524316038275975, "learning_rate": 9.034377993327832e-07, "loss": 0.3092, "step": 12952 }, { "epoch": 0.22515600827408785, "grad_norm": 1.9715685409589017, "learning_rate": 9.034211702779189e-07, "loss": 0.2638, "step": 12953 }, { "epoch": 0.22517339081159068, "grad_norm": 2.278129011374911, "learning_rate": 9.034045399443948e-07, "loss": 0.4763, "step": 12954 }, { "epoch": 0.2251907733490935, "grad_norm": 1.7026792115073914, "learning_rate": 9.033879083322636e-07, "loss": 0.3863, "step": 12955 }, { "epoch": 0.22520815588659632, "grad_norm": 1.9793836122618507, "learning_rate": 9.033712754415781e-07, "loss": 0.3859, "step": 12956 }, { "epoch": 0.22522553842409915, "grad_norm": 2.1110605665493964, "learning_rate": 9.033546412723908e-07, "loss": 0.2384, "step": 12957 }, { "epoch": 0.22524292096160198, "grad_norm": 1.492659082771545, "learning_rate": 9.033380058247547e-07, "loss": 0.4429, "step": 12958 }, { "epoch": 0.2252603034991048, "grad_norm": 1.628732121662762, "learning_rate": 9.033213690987223e-07, "loss": 0.3467, "step": 12959 }, { "epoch": 0.2252776860366076, "grad_norm": 2.5023672931169356, "learning_rate": 9.033047310943466e-07, "loss": 0.2486, "step": 12960 }, { "epoch": 0.22529506857411044, "grad_norm": 2.848270025590553, "learning_rate": 9.032880918116801e-07, "loss": 0.3755, "step": 12961 }, { "epoch": 0.22531245111161327, "grad_norm": 1.3380672802965807, "learning_rate": 9.032714512507755e-07, "loss": 0.3211, "step": 12962 }, { "epoch": 0.2253298336491161, "grad_norm": 2.6005957078481594, "learning_rate": 9.032548094116856e-07, "loss": 0.4898, "step": 12963 }, { "epoch": 0.22534721618661893, "grad_norm": 0.99587492049687, "learning_rate": 9.032381662944634e-07, "loss": 0.2506, "step": 12964 }, { "epoch": 0.22536459872412173, "grad_norm": 1.618352519096129, "learning_rate": 9.032215218991615e-07, "loss": 0.2507, "step": 12965 }, { "epoch": 0.22538198126162456, "grad_norm": 4.050849090677531, "learning_rate": 9.032048762258324e-07, "loss": 0.3133, "step": 12966 }, { "epoch": 0.2253993637991274, "grad_norm": 1.6738159208886094, "learning_rate": 9.031882292745291e-07, "loss": 0.2445, "step": 12967 }, { "epoch": 0.22541674633663022, "grad_norm": 2.6203737452520395, "learning_rate": 9.031715810453042e-07, "loss": 0.3953, "step": 12968 }, { "epoch": 0.22543412887413306, "grad_norm": 1.532644847832349, "learning_rate": 9.031549315382107e-07, "loss": 0.2008, "step": 12969 }, { "epoch": 0.22545151141163586, "grad_norm": 1.6947406213003169, "learning_rate": 9.031382807533015e-07, "loss": 0.3657, "step": 12970 }, { "epoch": 0.2254688939491387, "grad_norm": 1.0519487752645742, "learning_rate": 9.031216286906289e-07, "loss": 0.2899, "step": 12971 }, { "epoch": 0.22548627648664152, "grad_norm": 1.8666411525445208, "learning_rate": 9.031049753502459e-07, "loss": 0.2989, "step": 12972 }, { "epoch": 0.22550365902414435, "grad_norm": 2.659520916677732, "learning_rate": 9.030883207322053e-07, "loss": 0.5217, "step": 12973 }, { "epoch": 0.22552104156164718, "grad_norm": 1.854772394950859, "learning_rate": 9.030716648365599e-07, "loss": 0.1854, "step": 12974 }, { "epoch": 0.22553842409914998, "grad_norm": 2.0182906301920562, "learning_rate": 9.030550076633625e-07, "loss": 0.2827, "step": 12975 }, { "epoch": 0.2255558066366528, "grad_norm": 1.5718258780438437, "learning_rate": 9.030383492126659e-07, "loss": 0.3593, "step": 12976 }, { "epoch": 0.22557318917415564, "grad_norm": 1.9409522164907278, "learning_rate": 9.030216894845228e-07, "loss": 0.2227, "step": 12977 }, { "epoch": 0.22559057171165847, "grad_norm": 1.7147301364340097, "learning_rate": 9.030050284789862e-07, "loss": 0.3533, "step": 12978 }, { "epoch": 0.2256079542491613, "grad_norm": 1.6094429854346468, "learning_rate": 9.029883661961086e-07, "loss": 0.3685, "step": 12979 }, { "epoch": 0.2256253367866641, "grad_norm": 1.4810586110402957, "learning_rate": 9.029717026359433e-07, "loss": 0.2615, "step": 12980 }, { "epoch": 0.22564271932416693, "grad_norm": 2.3189323096495253, "learning_rate": 9.029550377985426e-07, "loss": 0.1832, "step": 12981 }, { "epoch": 0.22566010186166977, "grad_norm": 3.5733016581377433, "learning_rate": 9.029383716839595e-07, "loss": 0.5433, "step": 12982 }, { "epoch": 0.2256774843991726, "grad_norm": 1.6008517168973688, "learning_rate": 9.02921704292247e-07, "loss": 0.3762, "step": 12983 }, { "epoch": 0.22569486693667543, "grad_norm": 2.531286628706082, "learning_rate": 9.029050356234576e-07, "loss": 0.5716, "step": 12984 }, { "epoch": 0.22571224947417823, "grad_norm": 1.8692245879202507, "learning_rate": 9.028883656776445e-07, "loss": 0.2391, "step": 12985 }, { "epoch": 0.22572963201168106, "grad_norm": 2.8588971164077024, "learning_rate": 9.028716944548602e-07, "loss": 0.5331, "step": 12986 }, { "epoch": 0.2257470145491839, "grad_norm": 2.3792529019442306, "learning_rate": 9.028550219551578e-07, "loss": 0.2663, "step": 12987 }, { "epoch": 0.22576439708668672, "grad_norm": 1.9992813546041057, "learning_rate": 9.0283834817859e-07, "loss": 0.4591, "step": 12988 }, { "epoch": 0.22578177962418955, "grad_norm": 1.6662391446066458, "learning_rate": 9.028216731252096e-07, "loss": 0.3307, "step": 12989 }, { "epoch": 0.22579916216169235, "grad_norm": 2.321836523542959, "learning_rate": 9.028049967950697e-07, "loss": 0.3094, "step": 12990 }, { "epoch": 0.22581654469919518, "grad_norm": 1.2026967277567933, "learning_rate": 9.027883191882228e-07, "loss": 0.342, "step": 12991 }, { "epoch": 0.225833927236698, "grad_norm": 1.6383139876716906, "learning_rate": 9.027716403047221e-07, "loss": 0.3158, "step": 12992 }, { "epoch": 0.22585130977420084, "grad_norm": 1.2402295099255516, "learning_rate": 9.027549601446202e-07, "loss": 0.2982, "step": 12993 }, { "epoch": 0.22586869231170367, "grad_norm": 2.311734087588492, "learning_rate": 9.027382787079703e-07, "loss": 0.52, "step": 12994 }, { "epoch": 0.22588607484920648, "grad_norm": 3.1532038820371704, "learning_rate": 9.027215959948249e-07, "loss": 0.4965, "step": 12995 }, { "epoch": 0.2259034573867093, "grad_norm": 1.730598813499782, "learning_rate": 9.027049120052371e-07, "loss": 0.2855, "step": 12996 }, { "epoch": 0.22592083992421214, "grad_norm": 3.013350475459809, "learning_rate": 9.026882267392597e-07, "loss": 0.2534, "step": 12997 }, { "epoch": 0.22593822246171497, "grad_norm": 1.4335928192889478, "learning_rate": 9.026715401969456e-07, "loss": 0.2536, "step": 12998 }, { "epoch": 0.2259556049992178, "grad_norm": 2.116265820840554, "learning_rate": 9.026548523783477e-07, "loss": 0.4818, "step": 12999 }, { "epoch": 0.2259729875367206, "grad_norm": 1.8835793047118097, "learning_rate": 9.026381632835189e-07, "loss": 0.3084, "step": 13000 }, { "epoch": 0.22599037007422343, "grad_norm": 2.2885542588838264, "learning_rate": 9.026214729125119e-07, "loss": 0.4682, "step": 13001 }, { "epoch": 0.22600775261172626, "grad_norm": 1.706626057466363, "learning_rate": 9.0260478126538e-07, "loss": 0.2458, "step": 13002 }, { "epoch": 0.2260251351492291, "grad_norm": 1.3642415610811418, "learning_rate": 9.025880883421757e-07, "loss": 0.2543, "step": 13003 }, { "epoch": 0.22604251768673192, "grad_norm": 1.7773114684948015, "learning_rate": 9.025713941429522e-07, "loss": 0.3168, "step": 13004 }, { "epoch": 0.22605990022423472, "grad_norm": 1.4837738197379573, "learning_rate": 9.025546986677624e-07, "loss": 0.2834, "step": 13005 }, { "epoch": 0.22607728276173755, "grad_norm": 2.4202545798063797, "learning_rate": 9.02538001916659e-07, "loss": 0.2558, "step": 13006 }, { "epoch": 0.22609466529924038, "grad_norm": 1.4473211765797116, "learning_rate": 9.025213038896951e-07, "loss": 0.3436, "step": 13007 }, { "epoch": 0.2261120478367432, "grad_norm": 1.3005057351040241, "learning_rate": 9.025046045869235e-07, "loss": 0.3402, "step": 13008 }, { "epoch": 0.22612943037424604, "grad_norm": 2.243486801293493, "learning_rate": 9.024879040083973e-07, "loss": 0.4737, "step": 13009 }, { "epoch": 0.22614681291174885, "grad_norm": 0.9350521909268665, "learning_rate": 9.024712021541692e-07, "loss": 0.1678, "step": 13010 }, { "epoch": 0.22616419544925168, "grad_norm": 1.7875214114705533, "learning_rate": 9.024544990242924e-07, "loss": 0.3711, "step": 13011 }, { "epoch": 0.2261815779867545, "grad_norm": 1.7600624527775532, "learning_rate": 9.024377946188196e-07, "loss": 0.2215, "step": 13012 }, { "epoch": 0.22619896052425734, "grad_norm": 1.7299116224125917, "learning_rate": 9.024210889378038e-07, "loss": 0.3076, "step": 13013 }, { "epoch": 0.22621634306176017, "grad_norm": 4.095976712088668, "learning_rate": 9.024043819812982e-07, "loss": 0.2615, "step": 13014 }, { "epoch": 0.22623372559926297, "grad_norm": 1.6238366262712325, "learning_rate": 9.023876737493554e-07, "loss": 0.3815, "step": 13015 }, { "epoch": 0.2262511081367658, "grad_norm": 2.3086168481603244, "learning_rate": 9.023709642420287e-07, "loss": 0.4323, "step": 13016 }, { "epoch": 0.22626849067426863, "grad_norm": 0.9645423106912284, "learning_rate": 9.023542534593707e-07, "loss": 0.2201, "step": 13017 }, { "epoch": 0.22628587321177146, "grad_norm": 1.7292964611960264, "learning_rate": 9.023375414014347e-07, "loss": 0.3031, "step": 13018 }, { "epoch": 0.2263032557492743, "grad_norm": 1.585369316454644, "learning_rate": 9.023208280682735e-07, "loss": 0.3767, "step": 13019 }, { "epoch": 0.2263206382867771, "grad_norm": 1.8425477979519589, "learning_rate": 9.023041134599399e-07, "loss": 0.3211, "step": 13020 }, { "epoch": 0.22633802082427992, "grad_norm": 1.6129032842376625, "learning_rate": 9.022873975764872e-07, "loss": 0.3203, "step": 13021 }, { "epoch": 0.22635540336178275, "grad_norm": 1.615325649817846, "learning_rate": 9.022706804179682e-07, "loss": 0.3042, "step": 13022 }, { "epoch": 0.22637278589928558, "grad_norm": 2.9474787636867927, "learning_rate": 9.02253961984436e-07, "loss": 0.4397, "step": 13023 }, { "epoch": 0.2263901684367884, "grad_norm": 1.2315047774749919, "learning_rate": 9.022372422759434e-07, "loss": 0.3474, "step": 13024 }, { "epoch": 0.22640755097429122, "grad_norm": 1.5650242130758485, "learning_rate": 9.022205212925436e-07, "loss": 0.1708, "step": 13025 }, { "epoch": 0.22642493351179405, "grad_norm": 1.906409472182066, "learning_rate": 9.022037990342894e-07, "loss": 0.4126, "step": 13026 }, { "epoch": 0.22644231604929688, "grad_norm": 2.235080739371402, "learning_rate": 9.021870755012342e-07, "loss": 0.3134, "step": 13027 }, { "epoch": 0.2264596985867997, "grad_norm": 1.4577228735796495, "learning_rate": 9.021703506934304e-07, "loss": 0.2484, "step": 13028 }, { "epoch": 0.22647708112430254, "grad_norm": 1.3027743469047741, "learning_rate": 9.021536246109314e-07, "loss": 0.1776, "step": 13029 }, { "epoch": 0.22649446366180534, "grad_norm": 2.5402594170896866, "learning_rate": 9.021368972537903e-07, "loss": 0.4804, "step": 13030 }, { "epoch": 0.22651184619930817, "grad_norm": 1.6690434574883546, "learning_rate": 9.021201686220598e-07, "loss": 0.4909, "step": 13031 }, { "epoch": 0.226529228736811, "grad_norm": 1.7089296852719498, "learning_rate": 9.021034387157932e-07, "loss": 0.346, "step": 13032 }, { "epoch": 0.22654661127431383, "grad_norm": 1.630521864594391, "learning_rate": 9.020867075350434e-07, "loss": 0.171, "step": 13033 }, { "epoch": 0.22656399381181666, "grad_norm": 1.8380628130815204, "learning_rate": 9.020699750798633e-07, "loss": 0.4129, "step": 13034 }, { "epoch": 0.22658137634931946, "grad_norm": 1.8103644343214012, "learning_rate": 9.020532413503063e-07, "loss": 0.2816, "step": 13035 }, { "epoch": 0.2265987588868223, "grad_norm": 2.496097286935937, "learning_rate": 9.02036506346425e-07, "loss": 0.2754, "step": 13036 }, { "epoch": 0.22661614142432512, "grad_norm": 1.6036027833923165, "learning_rate": 9.020197700682728e-07, "loss": 0.302, "step": 13037 }, { "epoch": 0.22663352396182795, "grad_norm": 1.692637816212695, "learning_rate": 9.020030325159025e-07, "loss": 0.2253, "step": 13038 }, { "epoch": 0.22665090649933078, "grad_norm": 1.5856018056380206, "learning_rate": 9.019862936893673e-07, "loss": 0.4366, "step": 13039 }, { "epoch": 0.2266682890368336, "grad_norm": 2.6812299535924535, "learning_rate": 9.019695535887202e-07, "loss": 0.375, "step": 13040 }, { "epoch": 0.22668567157433642, "grad_norm": 1.3859205420873086, "learning_rate": 9.019528122140143e-07, "loss": 0.2891, "step": 13041 }, { "epoch": 0.22670305411183925, "grad_norm": 2.696058587285616, "learning_rate": 9.019360695653027e-07, "loss": 0.2238, "step": 13042 }, { "epoch": 0.22672043664934208, "grad_norm": 2.0608655028551413, "learning_rate": 9.019193256426382e-07, "loss": 0.2985, "step": 13043 }, { "epoch": 0.2267378191868449, "grad_norm": 1.4319464101913928, "learning_rate": 9.019025804460742e-07, "loss": 0.2722, "step": 13044 }, { "epoch": 0.2267552017243477, "grad_norm": 2.6767054896731417, "learning_rate": 9.018858339756635e-07, "loss": 0.2667, "step": 13045 }, { "epoch": 0.22677258426185054, "grad_norm": 2.697739891884225, "learning_rate": 9.018690862314596e-07, "loss": 0.3583, "step": 13046 }, { "epoch": 0.22678996679935337, "grad_norm": 2.0758395899320625, "learning_rate": 9.01852337213515e-07, "loss": 0.3456, "step": 13047 }, { "epoch": 0.2268073493368562, "grad_norm": 1.3205789437869506, "learning_rate": 9.018355869218832e-07, "loss": 0.2059, "step": 13048 }, { "epoch": 0.22682473187435903, "grad_norm": 2.30111347145184, "learning_rate": 9.018188353566172e-07, "loss": 0.3157, "step": 13049 }, { "epoch": 0.22684211441186183, "grad_norm": 2.855592532611954, "learning_rate": 9.018020825177699e-07, "loss": 0.4213, "step": 13050 }, { "epoch": 0.22685949694936466, "grad_norm": 1.665919776311069, "learning_rate": 9.017853284053948e-07, "loss": 0.2147, "step": 13051 }, { "epoch": 0.2268768794868675, "grad_norm": 1.9997625699269277, "learning_rate": 9.017685730195447e-07, "loss": 0.4269, "step": 13052 }, { "epoch": 0.22689426202437032, "grad_norm": 1.194460353871571, "learning_rate": 9.017518163602728e-07, "loss": 0.1981, "step": 13053 }, { "epoch": 0.22691164456187315, "grad_norm": 2.048392736316388, "learning_rate": 9.017350584276322e-07, "loss": 0.3511, "step": 13054 }, { "epoch": 0.22692902709937596, "grad_norm": 1.6084020090885893, "learning_rate": 9.017182992216759e-07, "loss": 0.4335, "step": 13055 }, { "epoch": 0.2269464096368788, "grad_norm": 1.8658004312424161, "learning_rate": 9.017015387424571e-07, "loss": 0.4158, "step": 13056 }, { "epoch": 0.22696379217438162, "grad_norm": 2.42350124432592, "learning_rate": 9.016847769900292e-07, "loss": 0.1907, "step": 13057 }, { "epoch": 0.22698117471188445, "grad_norm": 3.049031567368687, "learning_rate": 9.016680139644448e-07, "loss": 0.335, "step": 13058 }, { "epoch": 0.22699855724938728, "grad_norm": 1.5230649465314317, "learning_rate": 9.016512496657576e-07, "loss": 0.5142, "step": 13059 }, { "epoch": 0.22701593978689008, "grad_norm": 2.008802872305414, "learning_rate": 9.016344840940203e-07, "loss": 0.3957, "step": 13060 }, { "epoch": 0.2270333223243929, "grad_norm": 1.2411739334969616, "learning_rate": 9.01617717249286e-07, "loss": 0.3447, "step": 13061 }, { "epoch": 0.22705070486189574, "grad_norm": 1.8533195736120076, "learning_rate": 9.016009491316082e-07, "loss": 0.2487, "step": 13062 }, { "epoch": 0.22706808739939857, "grad_norm": 2.5309777712989203, "learning_rate": 9.015841797410399e-07, "loss": 0.4266, "step": 13063 }, { "epoch": 0.2270854699369014, "grad_norm": 1.7999795811750887, "learning_rate": 9.015674090776342e-07, "loss": 0.3144, "step": 13064 }, { "epoch": 0.2271028524744042, "grad_norm": 1.8049081356422205, "learning_rate": 9.015506371414442e-07, "loss": 0.4462, "step": 13065 }, { "epoch": 0.22712023501190703, "grad_norm": 1.4373985507319416, "learning_rate": 9.015338639325232e-07, "loss": 0.3234, "step": 13066 }, { "epoch": 0.22713761754940986, "grad_norm": 1.6454806089432557, "learning_rate": 9.015170894509243e-07, "loss": 0.3141, "step": 13067 }, { "epoch": 0.2271550000869127, "grad_norm": 1.527563634725082, "learning_rate": 9.015003136967006e-07, "loss": 0.3546, "step": 13068 }, { "epoch": 0.22717238262441553, "grad_norm": 1.514391332071611, "learning_rate": 9.014835366699054e-07, "loss": 0.3813, "step": 13069 }, { "epoch": 0.22718976516191833, "grad_norm": 1.9457663959208096, "learning_rate": 9.014667583705918e-07, "loss": 0.3512, "step": 13070 }, { "epoch": 0.22720714769942116, "grad_norm": 2.156338916014769, "learning_rate": 9.01449978798813e-07, "loss": 0.3765, "step": 13071 }, { "epoch": 0.227224530236924, "grad_norm": 1.4730457564814137, "learning_rate": 9.014331979546222e-07, "loss": 0.2361, "step": 13072 }, { "epoch": 0.22724191277442682, "grad_norm": 2.1075408179061648, "learning_rate": 9.014164158380726e-07, "loss": 0.6713, "step": 13073 }, { "epoch": 0.22725929531192965, "grad_norm": 1.887781151597799, "learning_rate": 9.013996324492173e-07, "loss": 0.2355, "step": 13074 }, { "epoch": 0.22727667784943245, "grad_norm": 1.7161047921948573, "learning_rate": 9.013828477881096e-07, "loss": 0.2804, "step": 13075 }, { "epoch": 0.22729406038693528, "grad_norm": 1.1864750544676645, "learning_rate": 9.013660618548026e-07, "loss": 0.2084, "step": 13076 }, { "epoch": 0.2273114429244381, "grad_norm": 2.0838545283760017, "learning_rate": 9.013492746493495e-07, "loss": 0.1964, "step": 13077 }, { "epoch": 0.22732882546194094, "grad_norm": 3.1603571211409074, "learning_rate": 9.013324861718036e-07, "loss": 0.4264, "step": 13078 }, { "epoch": 0.22734620799944377, "grad_norm": 2.243710694612944, "learning_rate": 9.013156964222182e-07, "loss": 0.3265, "step": 13079 }, { "epoch": 0.22736359053694657, "grad_norm": 1.5052962040268303, "learning_rate": 9.012989054006464e-07, "loss": 0.2495, "step": 13080 }, { "epoch": 0.2273809730744494, "grad_norm": 2.6104535185090803, "learning_rate": 9.012821131071412e-07, "loss": 0.4592, "step": 13081 }, { "epoch": 0.22739835561195224, "grad_norm": 2.4384378386505796, "learning_rate": 9.012653195417561e-07, "loss": 0.3661, "step": 13082 }, { "epoch": 0.22741573814945507, "grad_norm": 1.4879468282650805, "learning_rate": 9.012485247045444e-07, "loss": 0.2487, "step": 13083 }, { "epoch": 0.2274331206869579, "grad_norm": 3.046059818995604, "learning_rate": 9.012317285955591e-07, "loss": 0.553, "step": 13084 }, { "epoch": 0.2274505032244607, "grad_norm": 1.1425027392566587, "learning_rate": 9.012149312148536e-07, "loss": 0.2631, "step": 13085 }, { "epoch": 0.22746788576196353, "grad_norm": 1.6629124579594257, "learning_rate": 9.011981325624809e-07, "loss": 0.4188, "step": 13086 }, { "epoch": 0.22748526829946636, "grad_norm": 1.689005746565399, "learning_rate": 9.011813326384945e-07, "loss": 0.3331, "step": 13087 }, { "epoch": 0.2275026508369692, "grad_norm": 1.3679392468227594, "learning_rate": 9.011645314429477e-07, "loss": 0.2347, "step": 13088 }, { "epoch": 0.227520033374472, "grad_norm": 2.3148906610340947, "learning_rate": 9.011477289758934e-07, "loss": 0.3703, "step": 13089 }, { "epoch": 0.22753741591197482, "grad_norm": 4.047622944043335, "learning_rate": 9.011309252373852e-07, "loss": 0.5998, "step": 13090 }, { "epoch": 0.22755479844947765, "grad_norm": 2.0930411201615073, "learning_rate": 9.011141202274761e-07, "loss": 0.4278, "step": 13091 }, { "epoch": 0.22757218098698048, "grad_norm": 1.4553462924639637, "learning_rate": 9.010973139462197e-07, "loss": 0.3389, "step": 13092 }, { "epoch": 0.2275895635244833, "grad_norm": 1.7134642118749615, "learning_rate": 9.01080506393669e-07, "loss": 0.3438, "step": 13093 }, { "epoch": 0.22760694606198612, "grad_norm": 1.7547319308142355, "learning_rate": 9.010636975698774e-07, "loss": 0.4165, "step": 13094 }, { "epoch": 0.22762432859948895, "grad_norm": 1.8990686538255028, "learning_rate": 9.01046887474898e-07, "loss": 0.3028, "step": 13095 }, { "epoch": 0.22764171113699178, "grad_norm": 1.9985514118422372, "learning_rate": 9.010300761087841e-07, "loss": 0.2984, "step": 13096 }, { "epoch": 0.2276590936744946, "grad_norm": 3.0236582785891173, "learning_rate": 9.010132634715893e-07, "loss": 0.2723, "step": 13097 }, { "epoch": 0.22767647621199744, "grad_norm": 2.170688934086232, "learning_rate": 9.009964495633664e-07, "loss": 0.2528, "step": 13098 }, { "epoch": 0.22769385874950024, "grad_norm": 5.873409970141742, "learning_rate": 9.009796343841691e-07, "loss": 0.6316, "step": 13099 }, { "epoch": 0.22771124128700307, "grad_norm": 1.5465891970186605, "learning_rate": 9.009628179340506e-07, "loss": 0.3052, "step": 13100 }, { "epoch": 0.2277286238245059, "grad_norm": 1.3092589382620536, "learning_rate": 9.009460002130641e-07, "loss": 0.2018, "step": 13101 }, { "epoch": 0.22774600636200873, "grad_norm": 1.6366250549129029, "learning_rate": 9.009291812212629e-07, "loss": 0.3046, "step": 13102 }, { "epoch": 0.22776338889951156, "grad_norm": 1.471049035174285, "learning_rate": 9.009123609587006e-07, "loss": 0.2094, "step": 13103 }, { "epoch": 0.22778077143701436, "grad_norm": 1.5754060386066397, "learning_rate": 9.008955394254301e-07, "loss": 0.308, "step": 13104 }, { "epoch": 0.2277981539745172, "grad_norm": 1.7543831462103634, "learning_rate": 9.008787166215048e-07, "loss": 0.2953, "step": 13105 }, { "epoch": 0.22781553651202002, "grad_norm": 1.9013785656229454, "learning_rate": 9.008618925469784e-07, "loss": 0.387, "step": 13106 }, { "epoch": 0.22783291904952285, "grad_norm": 2.1632312109920004, "learning_rate": 9.008450672019037e-07, "loss": 0.2251, "step": 13107 }, { "epoch": 0.22785030158702568, "grad_norm": 1.4680168329511072, "learning_rate": 9.008282405863343e-07, "loss": 0.309, "step": 13108 }, { "epoch": 0.22786768412452849, "grad_norm": 2.850157502026976, "learning_rate": 9.008114127003237e-07, "loss": 0.4474, "step": 13109 }, { "epoch": 0.22788506666203132, "grad_norm": 1.600965298105251, "learning_rate": 9.007945835439248e-07, "loss": 0.3363, "step": 13110 }, { "epoch": 0.22790244919953415, "grad_norm": 1.7214304005500372, "learning_rate": 9.007777531171912e-07, "loss": 0.2887, "step": 13111 }, { "epoch": 0.22791983173703698, "grad_norm": 1.6492689863117314, "learning_rate": 9.007609214201763e-07, "loss": 0.467, "step": 13112 }, { "epoch": 0.2279372142745398, "grad_norm": 1.8447792048482554, "learning_rate": 9.007440884529334e-07, "loss": 0.2753, "step": 13113 }, { "epoch": 0.2279545968120426, "grad_norm": 2.633024109590185, "learning_rate": 9.007272542155158e-07, "loss": 0.5644, "step": 13114 }, { "epoch": 0.22797197934954544, "grad_norm": 1.929150035225128, "learning_rate": 9.007104187079768e-07, "loss": 0.3275, "step": 13115 }, { "epoch": 0.22798936188704827, "grad_norm": 3.056190148262153, "learning_rate": 9.0069358193037e-07, "loss": 0.316, "step": 13116 }, { "epoch": 0.2280067444245511, "grad_norm": 1.7079082643458083, "learning_rate": 9.006767438827485e-07, "loss": 0.516, "step": 13117 }, { "epoch": 0.22802412696205393, "grad_norm": 1.4397770466518391, "learning_rate": 9.006599045651658e-07, "loss": 0.4466, "step": 13118 }, { "epoch": 0.22804150949955673, "grad_norm": 1.8491583524686703, "learning_rate": 9.006430639776753e-07, "loss": 0.2383, "step": 13119 }, { "epoch": 0.22805889203705956, "grad_norm": 1.2004511527694475, "learning_rate": 9.006262221203303e-07, "loss": 0.369, "step": 13120 }, { "epoch": 0.2280762745745624, "grad_norm": 3.2532766564183375, "learning_rate": 9.006093789931841e-07, "loss": 0.3943, "step": 13121 }, { "epoch": 0.22809365711206522, "grad_norm": 2.085279157435029, "learning_rate": 9.005925345962904e-07, "loss": 0.3084, "step": 13122 }, { "epoch": 0.22811103964956805, "grad_norm": 1.483362063462036, "learning_rate": 9.005756889297022e-07, "loss": 0.273, "step": 13123 }, { "epoch": 0.22812842218707086, "grad_norm": 4.187229389480494, "learning_rate": 9.005588419934731e-07, "loss": 0.3666, "step": 13124 }, { "epoch": 0.2281458047245737, "grad_norm": 1.3743639307521593, "learning_rate": 9.005419937876566e-07, "loss": 0.337, "step": 13125 }, { "epoch": 0.22816318726207652, "grad_norm": 1.757027390941764, "learning_rate": 9.005251443123059e-07, "loss": 0.2279, "step": 13126 }, { "epoch": 0.22818056979957935, "grad_norm": 2.1364001404047928, "learning_rate": 9.005082935674745e-07, "loss": 0.2543, "step": 13127 }, { "epoch": 0.22819795233708218, "grad_norm": 1.777075509347652, "learning_rate": 9.004914415532156e-07, "loss": 0.3057, "step": 13128 }, { "epoch": 0.22821533487458498, "grad_norm": 1.689026393610529, "learning_rate": 9.00474588269583e-07, "loss": 0.2558, "step": 13129 }, { "epoch": 0.2282327174120878, "grad_norm": 0.9352446325656018, "learning_rate": 9.004577337166299e-07, "loss": 0.1991, "step": 13130 }, { "epoch": 0.22825009994959064, "grad_norm": 1.184993004313267, "learning_rate": 9.004408778944097e-07, "loss": 0.316, "step": 13131 }, { "epoch": 0.22826748248709347, "grad_norm": 1.739441817077134, "learning_rate": 9.00424020802976e-07, "loss": 0.2465, "step": 13132 }, { "epoch": 0.2282848650245963, "grad_norm": 2.4268284604428296, "learning_rate": 9.004071624423819e-07, "loss": 0.3247, "step": 13133 }, { "epoch": 0.2283022475620991, "grad_norm": 2.1648181194135563, "learning_rate": 9.003903028126812e-07, "loss": 0.4409, "step": 13134 }, { "epoch": 0.22831963009960193, "grad_norm": 3.296452470412119, "learning_rate": 9.003734419139271e-07, "loss": 0.401, "step": 13135 }, { "epoch": 0.22833701263710476, "grad_norm": 1.8863904760106323, "learning_rate": 9.00356579746173e-07, "loss": 0.4006, "step": 13136 }, { "epoch": 0.2283543951746076, "grad_norm": 1.609146209983781, "learning_rate": 9.003397163094724e-07, "loss": 0.4954, "step": 13137 }, { "epoch": 0.22837177771211042, "grad_norm": 1.9779618020161636, "learning_rate": 9.00322851603879e-07, "loss": 0.3679, "step": 13138 }, { "epoch": 0.22838916024961323, "grad_norm": 2.173744932830024, "learning_rate": 9.00305985629446e-07, "loss": 0.2841, "step": 13139 }, { "epoch": 0.22840654278711606, "grad_norm": 2.3808062966898476, "learning_rate": 9.002891183862269e-07, "loss": 0.3316, "step": 13140 }, { "epoch": 0.2284239253246189, "grad_norm": 3.5370273341336302, "learning_rate": 9.002722498742752e-07, "loss": 0.451, "step": 13141 }, { "epoch": 0.22844130786212172, "grad_norm": 1.6407808615278132, "learning_rate": 9.002553800936444e-07, "loss": 0.373, "step": 13142 }, { "epoch": 0.22845869039962455, "grad_norm": 1.7561247783390013, "learning_rate": 9.002385090443876e-07, "loss": 0.2633, "step": 13143 }, { "epoch": 0.22847607293712735, "grad_norm": 1.9584969063885522, "learning_rate": 9.002216367265589e-07, "loss": 0.3595, "step": 13144 }, { "epoch": 0.22849345547463018, "grad_norm": 1.4427926736022298, "learning_rate": 9.002047631402112e-07, "loss": 0.2894, "step": 13145 }, { "epoch": 0.228510838012133, "grad_norm": 2.318694135252045, "learning_rate": 9.001878882853985e-07, "loss": 0.3953, "step": 13146 }, { "epoch": 0.22852822054963584, "grad_norm": 1.2825457365183082, "learning_rate": 9.001710121621739e-07, "loss": 0.2257, "step": 13147 }, { "epoch": 0.22854560308713867, "grad_norm": 3.182105218877641, "learning_rate": 9.00154134770591e-07, "loss": 0.3756, "step": 13148 }, { "epoch": 0.22856298562464147, "grad_norm": 2.5054348003661295, "learning_rate": 9.001372561107033e-07, "loss": 0.418, "step": 13149 }, { "epoch": 0.2285803681621443, "grad_norm": 1.6450449782746976, "learning_rate": 9.001203761825644e-07, "loss": 0.4836, "step": 13150 }, { "epoch": 0.22859775069964713, "grad_norm": 4.202340596337364, "learning_rate": 9.001034949862276e-07, "loss": 0.2528, "step": 13151 }, { "epoch": 0.22861513323714996, "grad_norm": 1.4434434923067838, "learning_rate": 9.000866125217466e-07, "loss": 0.2514, "step": 13152 }, { "epoch": 0.2286325157746528, "grad_norm": 2.2104775711663667, "learning_rate": 9.000697287891747e-07, "loss": 0.527, "step": 13153 }, { "epoch": 0.2286498983121556, "grad_norm": 1.6326341775146629, "learning_rate": 9.000528437885658e-07, "loss": 0.2869, "step": 13154 }, { "epoch": 0.22866728084965843, "grad_norm": 1.2157136498312384, "learning_rate": 9.000359575199728e-07, "loss": 0.2466, "step": 13155 }, { "epoch": 0.22868466338716126, "grad_norm": 2.1618588408404382, "learning_rate": 9.000190699834498e-07, "loss": 0.3239, "step": 13156 }, { "epoch": 0.2287020459246641, "grad_norm": 1.9052768182354993, "learning_rate": 9.000021811790501e-07, "loss": 0.3714, "step": 13157 }, { "epoch": 0.22871942846216692, "grad_norm": 2.3895491879788557, "learning_rate": 8.999852911068272e-07, "loss": 0.2501, "step": 13158 }, { "epoch": 0.22873681099966972, "grad_norm": 2.1195476161255975, "learning_rate": 8.999683997668346e-07, "loss": 0.3039, "step": 13159 }, { "epoch": 0.22875419353717255, "grad_norm": 2.178638046526397, "learning_rate": 8.99951507159126e-07, "loss": 0.2251, "step": 13160 }, { "epoch": 0.22877157607467538, "grad_norm": 2.1388728162888486, "learning_rate": 8.999346132837547e-07, "loss": 0.336, "step": 13161 }, { "epoch": 0.2287889586121782, "grad_norm": 1.9663590237379287, "learning_rate": 8.999177181407745e-07, "loss": 0.3034, "step": 13162 }, { "epoch": 0.22880634114968104, "grad_norm": 1.4755748482415083, "learning_rate": 8.999008217302388e-07, "loss": 0.3902, "step": 13163 }, { "epoch": 0.22882372368718384, "grad_norm": 5.912936306656423, "learning_rate": 8.998839240522013e-07, "loss": 0.3657, "step": 13164 }, { "epoch": 0.22884110622468667, "grad_norm": 1.5542733324598947, "learning_rate": 8.998670251067154e-07, "loss": 0.264, "step": 13165 }, { "epoch": 0.2288584887621895, "grad_norm": 1.8739194274386657, "learning_rate": 8.998501248938347e-07, "loss": 0.2668, "step": 13166 }, { "epoch": 0.22887587129969234, "grad_norm": 1.4673898058210717, "learning_rate": 8.998332234136128e-07, "loss": 0.2339, "step": 13167 }, { "epoch": 0.22889325383719517, "grad_norm": 2.0166721349530086, "learning_rate": 8.998163206661032e-07, "loss": 0.3526, "step": 13168 }, { "epoch": 0.22891063637469797, "grad_norm": 2.8030875968816544, "learning_rate": 8.997994166513596e-07, "loss": 0.4411, "step": 13169 }, { "epoch": 0.2289280189122008, "grad_norm": 1.6692979229353595, "learning_rate": 8.997825113694354e-07, "loss": 0.3597, "step": 13170 }, { "epoch": 0.22894540144970363, "grad_norm": 2.260565867165268, "learning_rate": 8.997656048203843e-07, "loss": 0.3458, "step": 13171 }, { "epoch": 0.22896278398720646, "grad_norm": 2.3469278003870175, "learning_rate": 8.997486970042599e-07, "loss": 0.1651, "step": 13172 }, { "epoch": 0.2289801665247093, "grad_norm": 1.3822253771287243, "learning_rate": 8.997317879211157e-07, "loss": 0.2801, "step": 13173 }, { "epoch": 0.2289975490622121, "grad_norm": 4.190480655744784, "learning_rate": 8.997148775710055e-07, "loss": 0.3181, "step": 13174 }, { "epoch": 0.22901493159971492, "grad_norm": 1.6785872727430589, "learning_rate": 8.996979659539827e-07, "loss": 0.2573, "step": 13175 }, { "epoch": 0.22903231413721775, "grad_norm": 0.982683589334445, "learning_rate": 8.996810530701009e-07, "loss": 0.2637, "step": 13176 }, { "epoch": 0.22904969667472058, "grad_norm": 1.3800798872008782, "learning_rate": 8.996641389194137e-07, "loss": 0.2128, "step": 13177 }, { "epoch": 0.2290670792122234, "grad_norm": 2.3574192889405596, "learning_rate": 8.996472235019749e-07, "loss": 0.4046, "step": 13178 }, { "epoch": 0.22908446174972621, "grad_norm": 2.791979310835083, "learning_rate": 8.996303068178379e-07, "loss": 0.3818, "step": 13179 }, { "epoch": 0.22910184428722905, "grad_norm": 2.852902174831465, "learning_rate": 8.996133888670565e-07, "loss": 0.6738, "step": 13180 }, { "epoch": 0.22911922682473188, "grad_norm": 2.041010511529909, "learning_rate": 8.995964696496842e-07, "loss": 0.359, "step": 13181 }, { "epoch": 0.2291366093622347, "grad_norm": 1.8874811595534269, "learning_rate": 8.995795491657745e-07, "loss": 0.3888, "step": 13182 }, { "epoch": 0.22915399189973754, "grad_norm": 1.6855563360857324, "learning_rate": 8.995626274153813e-07, "loss": 0.2642, "step": 13183 }, { "epoch": 0.22917137443724034, "grad_norm": 1.9418682647642969, "learning_rate": 8.995457043985581e-07, "loss": 0.4699, "step": 13184 }, { "epoch": 0.22918875697474317, "grad_norm": 1.8669652591882224, "learning_rate": 8.995287801153586e-07, "loss": 0.3378, "step": 13185 }, { "epoch": 0.229206139512246, "grad_norm": 1.3219272823391996, "learning_rate": 8.995118545658365e-07, "loss": 0.3892, "step": 13186 }, { "epoch": 0.22922352204974883, "grad_norm": 1.873201390769359, "learning_rate": 8.994949277500451e-07, "loss": 0.4106, "step": 13187 }, { "epoch": 0.22924090458725166, "grad_norm": 1.7725828574551514, "learning_rate": 8.994779996680384e-07, "loss": 0.4456, "step": 13188 }, { "epoch": 0.22925828712475446, "grad_norm": 1.382814594973277, "learning_rate": 8.9946107031987e-07, "loss": 0.2734, "step": 13189 }, { "epoch": 0.2292756696622573, "grad_norm": 2.3956656441482216, "learning_rate": 8.994441397055934e-07, "loss": 0.3896, "step": 13190 }, { "epoch": 0.22929305219976012, "grad_norm": 1.641166304609303, "learning_rate": 8.994272078252623e-07, "loss": 0.4352, "step": 13191 }, { "epoch": 0.22931043473726295, "grad_norm": 3.9260853825824693, "learning_rate": 8.994102746789307e-07, "loss": 0.2958, "step": 13192 }, { "epoch": 0.22932781727476578, "grad_norm": 4.957350829844851, "learning_rate": 8.993933402666518e-07, "loss": 0.2822, "step": 13193 }, { "epoch": 0.22934519981226859, "grad_norm": 2.505068307551396, "learning_rate": 8.993764045884795e-07, "loss": 0.4383, "step": 13194 }, { "epoch": 0.22936258234977142, "grad_norm": 2.769990339763196, "learning_rate": 8.993594676444676e-07, "loss": 0.3635, "step": 13195 }, { "epoch": 0.22937996488727425, "grad_norm": 1.7438490592199476, "learning_rate": 8.993425294346695e-07, "loss": 0.2245, "step": 13196 }, { "epoch": 0.22939734742477708, "grad_norm": 1.792764623494971, "learning_rate": 8.99325589959139e-07, "loss": 0.2793, "step": 13197 }, { "epoch": 0.2294147299622799, "grad_norm": 2.767535736954476, "learning_rate": 8.993086492179298e-07, "loss": 0.4341, "step": 13198 }, { "epoch": 0.2294321124997827, "grad_norm": 1.9099443042861122, "learning_rate": 8.992917072110958e-07, "loss": 0.2612, "step": 13199 }, { "epoch": 0.22944949503728554, "grad_norm": 1.5659811062474895, "learning_rate": 8.992747639386903e-07, "loss": 0.3055, "step": 13200 }, { "epoch": 0.22946687757478837, "grad_norm": 1.583360896347099, "learning_rate": 8.992578194007673e-07, "loss": 0.1999, "step": 13201 }, { "epoch": 0.2294842601122912, "grad_norm": 1.694912255519501, "learning_rate": 8.992408735973804e-07, "loss": 0.4124, "step": 13202 }, { "epoch": 0.22950164264979403, "grad_norm": 1.317562140562219, "learning_rate": 8.992239265285833e-07, "loss": 0.2508, "step": 13203 }, { "epoch": 0.22951902518729683, "grad_norm": 1.4079816848605162, "learning_rate": 8.992069781944299e-07, "loss": 0.2555, "step": 13204 }, { "epoch": 0.22953640772479966, "grad_norm": 2.301147945904156, "learning_rate": 8.991900285949735e-07, "loss": 0.2729, "step": 13205 }, { "epoch": 0.2295537902623025, "grad_norm": 2.729741823993389, "learning_rate": 8.991730777302681e-07, "loss": 0.3713, "step": 13206 }, { "epoch": 0.22957117279980532, "grad_norm": 1.5235093672481672, "learning_rate": 8.991561256003677e-07, "loss": 0.5268, "step": 13207 }, { "epoch": 0.22958855533730815, "grad_norm": 2.388855011430355, "learning_rate": 8.991391722053254e-07, "loss": 0.2253, "step": 13208 }, { "epoch": 0.22960593787481096, "grad_norm": 2.8580923236914835, "learning_rate": 8.991222175451955e-07, "loss": 0.482, "step": 13209 }, { "epoch": 0.22962332041231379, "grad_norm": 1.4666921381791265, "learning_rate": 8.991052616200314e-07, "loss": 0.2308, "step": 13210 }, { "epoch": 0.22964070294981662, "grad_norm": 1.207815877341397, "learning_rate": 8.990883044298871e-07, "loss": 0.337, "step": 13211 }, { "epoch": 0.22965808548731945, "grad_norm": 1.4916455751162367, "learning_rate": 8.99071345974816e-07, "loss": 0.2847, "step": 13212 }, { "epoch": 0.22967546802482228, "grad_norm": 2.444026436962497, "learning_rate": 8.990543862548722e-07, "loss": 0.3043, "step": 13213 }, { "epoch": 0.22969285056232508, "grad_norm": 1.794022835960796, "learning_rate": 8.990374252701093e-07, "loss": 0.2626, "step": 13214 }, { "epoch": 0.2297102330998279, "grad_norm": 2.6522645196426153, "learning_rate": 8.990204630205811e-07, "loss": 0.4531, "step": 13215 }, { "epoch": 0.22972761563733074, "grad_norm": 2.174915478156142, "learning_rate": 8.990034995063411e-07, "loss": 0.3424, "step": 13216 }, { "epoch": 0.22974499817483357, "grad_norm": 2.4565875737965315, "learning_rate": 8.989865347274435e-07, "loss": 0.2532, "step": 13217 }, { "epoch": 0.2297623807123364, "grad_norm": 1.4991775736780262, "learning_rate": 8.989695686839418e-07, "loss": 0.3084, "step": 13218 }, { "epoch": 0.2297797632498392, "grad_norm": 1.5745775426068591, "learning_rate": 8.989526013758899e-07, "loss": 0.3636, "step": 13219 }, { "epoch": 0.22979714578734203, "grad_norm": 1.9827431310674488, "learning_rate": 8.989356328033415e-07, "loss": 0.3194, "step": 13220 }, { "epoch": 0.22981452832484486, "grad_norm": 3.2282213626106873, "learning_rate": 8.989186629663504e-07, "loss": 0.3966, "step": 13221 }, { "epoch": 0.2298319108623477, "grad_norm": 1.7859497268997937, "learning_rate": 8.989016918649703e-07, "loss": 0.2788, "step": 13222 }, { "epoch": 0.22984929339985052, "grad_norm": 1.4125966582365665, "learning_rate": 8.98884719499255e-07, "loss": 0.3297, "step": 13223 }, { "epoch": 0.22986667593735333, "grad_norm": 1.395275451061736, "learning_rate": 8.988677458692586e-07, "loss": 0.2539, "step": 13224 }, { "epoch": 0.22988405847485616, "grad_norm": 1.8207512828453423, "learning_rate": 8.988507709750346e-07, "loss": 0.3413, "step": 13225 }, { "epoch": 0.229901441012359, "grad_norm": 1.7063397141795689, "learning_rate": 8.988337948166369e-07, "loss": 0.3012, "step": 13226 }, { "epoch": 0.22991882354986182, "grad_norm": 1.5771164349662252, "learning_rate": 8.988168173941191e-07, "loss": 0.6129, "step": 13227 }, { "epoch": 0.22993620608736462, "grad_norm": 1.3644670835799302, "learning_rate": 8.987998387075354e-07, "loss": 0.3482, "step": 13228 }, { "epoch": 0.22995358862486745, "grad_norm": 1.322758879625888, "learning_rate": 8.987828587569393e-07, "loss": 0.1613, "step": 13229 }, { "epoch": 0.22997097116237028, "grad_norm": 2.8057572102432293, "learning_rate": 8.987658775423847e-07, "loss": 0.3869, "step": 13230 }, { "epoch": 0.2299883536998731, "grad_norm": 1.0640484353572464, "learning_rate": 8.987488950639255e-07, "loss": 0.279, "step": 13231 }, { "epoch": 0.23000573623737594, "grad_norm": 1.065830336201543, "learning_rate": 8.987319113216155e-07, "loss": 0.1585, "step": 13232 }, { "epoch": 0.23002311877487874, "grad_norm": 1.1298461312957642, "learning_rate": 8.987149263155085e-07, "loss": 0.2506, "step": 13233 }, { "epoch": 0.23004050131238157, "grad_norm": 1.3839283402707536, "learning_rate": 8.986979400456583e-07, "loss": 0.2178, "step": 13234 }, { "epoch": 0.2300578838498844, "grad_norm": 2.527165617934276, "learning_rate": 8.986809525121189e-07, "loss": 0.3627, "step": 13235 }, { "epoch": 0.23007526638738723, "grad_norm": 2.6800635958082837, "learning_rate": 8.986639637149438e-07, "loss": 0.3575, "step": 13236 }, { "epoch": 0.23009264892489006, "grad_norm": 1.542568422163603, "learning_rate": 8.98646973654187e-07, "loss": 0.1651, "step": 13237 }, { "epoch": 0.23011003146239287, "grad_norm": 1.2034330322759699, "learning_rate": 8.986299823299026e-07, "loss": 0.198, "step": 13238 }, { "epoch": 0.2301274139998957, "grad_norm": 1.5151271560597994, "learning_rate": 8.986129897421444e-07, "loss": 0.2981, "step": 13239 }, { "epoch": 0.23014479653739853, "grad_norm": 3.8863427645322, "learning_rate": 8.985959958909659e-07, "loss": 0.4772, "step": 13240 }, { "epoch": 0.23016217907490136, "grad_norm": 2.552617821176951, "learning_rate": 8.985790007764213e-07, "loss": 0.4248, "step": 13241 }, { "epoch": 0.2301795616124042, "grad_norm": 3.511984471229571, "learning_rate": 8.985620043985645e-07, "loss": 0.3723, "step": 13242 }, { "epoch": 0.230196944149907, "grad_norm": 1.3810669271468323, "learning_rate": 8.985450067574489e-07, "loss": 0.4423, "step": 13243 }, { "epoch": 0.23021432668740982, "grad_norm": 2.486636628301294, "learning_rate": 8.98528007853129e-07, "loss": 0.2716, "step": 13244 }, { "epoch": 0.23023170922491265, "grad_norm": 1.8519530214074604, "learning_rate": 8.985110076856581e-07, "loss": 0.4005, "step": 13245 }, { "epoch": 0.23024909176241548, "grad_norm": 2.254701385561408, "learning_rate": 8.984940062550906e-07, "loss": 0.3831, "step": 13246 }, { "epoch": 0.2302664742999183, "grad_norm": 2.5112046529111183, "learning_rate": 8.984770035614801e-07, "loss": 0.2678, "step": 13247 }, { "epoch": 0.2302838568374211, "grad_norm": 1.857398001027785, "learning_rate": 8.984599996048805e-07, "loss": 0.3615, "step": 13248 }, { "epoch": 0.23030123937492394, "grad_norm": 2.5161283209790897, "learning_rate": 8.984429943853457e-07, "loss": 0.4646, "step": 13249 }, { "epoch": 0.23031862191242677, "grad_norm": 1.774636983536389, "learning_rate": 8.984259879029298e-07, "loss": 0.281, "step": 13250 }, { "epoch": 0.2303360044499296, "grad_norm": 1.6072320168388063, "learning_rate": 8.984089801576864e-07, "loss": 0.3629, "step": 13251 }, { "epoch": 0.23035338698743243, "grad_norm": 2.8512543837705118, "learning_rate": 8.983919711496697e-07, "loss": 0.381, "step": 13252 }, { "epoch": 0.23037076952493524, "grad_norm": 1.8858011641376828, "learning_rate": 8.983749608789332e-07, "loss": 0.3672, "step": 13253 }, { "epoch": 0.23038815206243807, "grad_norm": 5.301755979926544, "learning_rate": 8.983579493455313e-07, "loss": 0.4382, "step": 13254 }, { "epoch": 0.2304055345999409, "grad_norm": 1.4053840577383352, "learning_rate": 8.983409365495176e-07, "loss": 0.3164, "step": 13255 }, { "epoch": 0.23042291713744373, "grad_norm": 1.802448561345003, "learning_rate": 8.983239224909461e-07, "loss": 0.3634, "step": 13256 }, { "epoch": 0.23044029967494656, "grad_norm": 1.8169049205085697, "learning_rate": 8.983069071698708e-07, "loss": 0.281, "step": 13257 }, { "epoch": 0.23045768221244936, "grad_norm": 1.3441748695671376, "learning_rate": 8.982898905863455e-07, "loss": 0.3801, "step": 13258 }, { "epoch": 0.2304750647499522, "grad_norm": 1.9955340740067131, "learning_rate": 8.982728727404243e-07, "loss": 0.2714, "step": 13259 }, { "epoch": 0.23049244728745502, "grad_norm": 2.6459501874576516, "learning_rate": 8.982558536321609e-07, "loss": 0.2888, "step": 13260 }, { "epoch": 0.23050982982495785, "grad_norm": 2.6673731107665106, "learning_rate": 8.982388332616095e-07, "loss": 0.3003, "step": 13261 }, { "epoch": 0.23052721236246068, "grad_norm": 2.0090110770686542, "learning_rate": 8.98221811628824e-07, "loss": 0.4641, "step": 13262 }, { "epoch": 0.23054459489996348, "grad_norm": 1.5352223785929981, "learning_rate": 8.982047887338582e-07, "loss": 0.2966, "step": 13263 }, { "epoch": 0.23056197743746631, "grad_norm": 1.972084247246601, "learning_rate": 8.98187764576766e-07, "loss": 0.3457, "step": 13264 }, { "epoch": 0.23057935997496914, "grad_norm": 2.3357547604349826, "learning_rate": 8.981707391576017e-07, "loss": 0.2804, "step": 13265 }, { "epoch": 0.23059674251247197, "grad_norm": 1.6581305147313006, "learning_rate": 8.981537124764189e-07, "loss": 0.4087, "step": 13266 }, { "epoch": 0.2306141250499748, "grad_norm": 1.7385816700993793, "learning_rate": 8.981366845332718e-07, "loss": 0.2187, "step": 13267 }, { "epoch": 0.2306315075874776, "grad_norm": 2.5933464818271523, "learning_rate": 8.981196553282144e-07, "loss": 0.3452, "step": 13268 }, { "epoch": 0.23064889012498044, "grad_norm": 2.598347628352272, "learning_rate": 8.981026248613005e-07, "loss": 0.255, "step": 13269 }, { "epoch": 0.23066627266248327, "grad_norm": 2.508230108450829, "learning_rate": 8.980855931325841e-07, "loss": 0.3524, "step": 13270 }, { "epoch": 0.2306836551999861, "grad_norm": 1.6225723678267499, "learning_rate": 8.980685601421194e-07, "loss": 0.3706, "step": 13271 }, { "epoch": 0.23070103773748893, "grad_norm": 1.6020408654340577, "learning_rate": 8.980515258899601e-07, "loss": 0.2788, "step": 13272 }, { "epoch": 0.23071842027499173, "grad_norm": 1.7158395493998664, "learning_rate": 8.980344903761603e-07, "loss": 0.3416, "step": 13273 }, { "epoch": 0.23073580281249456, "grad_norm": 2.590003563908808, "learning_rate": 8.980174536007741e-07, "loss": 0.4709, "step": 13274 }, { "epoch": 0.2307531853499974, "grad_norm": 3.180978420430995, "learning_rate": 8.980004155638552e-07, "loss": 0.349, "step": 13275 }, { "epoch": 0.23077056788750022, "grad_norm": 1.784590405583918, "learning_rate": 8.97983376265458e-07, "loss": 0.2331, "step": 13276 }, { "epoch": 0.23078795042500305, "grad_norm": 1.561482031006041, "learning_rate": 8.979663357056361e-07, "loss": 0.2779, "step": 13277 }, { "epoch": 0.23080533296250585, "grad_norm": 1.560279997442174, "learning_rate": 8.97949293884444e-07, "loss": 0.2944, "step": 13278 }, { "epoch": 0.23082271550000868, "grad_norm": 3.271825575219722, "learning_rate": 8.979322508019353e-07, "loss": 0.2688, "step": 13279 }, { "epoch": 0.23084009803751152, "grad_norm": 2.0821353337242896, "learning_rate": 8.979152064581644e-07, "loss": 0.3322, "step": 13280 }, { "epoch": 0.23085748057501435, "grad_norm": 2.3323768995964183, "learning_rate": 8.978981608531847e-07, "loss": 0.3053, "step": 13281 }, { "epoch": 0.23087486311251718, "grad_norm": 2.4984428588386765, "learning_rate": 8.978811139870508e-07, "loss": 0.3891, "step": 13282 }, { "epoch": 0.23089224565001998, "grad_norm": 3.5353647274029925, "learning_rate": 8.978640658598165e-07, "loss": 0.3363, "step": 13283 }, { "epoch": 0.2309096281875228, "grad_norm": 2.207232498026864, "learning_rate": 8.978470164715359e-07, "loss": 0.2021, "step": 13284 }, { "epoch": 0.23092701072502564, "grad_norm": 2.9300167570156654, "learning_rate": 8.97829965822263e-07, "loss": 0.5173, "step": 13285 }, { "epoch": 0.23094439326252847, "grad_norm": 1.9147242251011056, "learning_rate": 8.978129139120519e-07, "loss": 0.3673, "step": 13286 }, { "epoch": 0.2309617758000313, "grad_norm": 2.811527690594554, "learning_rate": 8.977958607409566e-07, "loss": 0.5016, "step": 13287 }, { "epoch": 0.2309791583375341, "grad_norm": 2.2156981359215098, "learning_rate": 8.977788063090311e-07, "loss": 0.4618, "step": 13288 }, { "epoch": 0.23099654087503693, "grad_norm": 1.3897337885040286, "learning_rate": 8.977617506163295e-07, "loss": 0.2537, "step": 13289 }, { "epoch": 0.23101392341253976, "grad_norm": 2.0533293544738935, "learning_rate": 8.977446936629058e-07, "loss": 0.372, "step": 13290 }, { "epoch": 0.2310313059500426, "grad_norm": 1.5701724992721304, "learning_rate": 8.977276354488144e-07, "loss": 0.4016, "step": 13291 }, { "epoch": 0.23104868848754542, "grad_norm": 1.5089563280136542, "learning_rate": 8.977105759741088e-07, "loss": 0.4258, "step": 13292 }, { "epoch": 0.23106607102504823, "grad_norm": 1.7668029771167704, "learning_rate": 8.976935152388436e-07, "loss": 0.2051, "step": 13293 }, { "epoch": 0.23108345356255106, "grad_norm": 2.5590358588676634, "learning_rate": 8.976764532430725e-07, "loss": 0.3958, "step": 13294 }, { "epoch": 0.23110083610005389, "grad_norm": 1.9487970005538122, "learning_rate": 8.976593899868497e-07, "loss": 0.3566, "step": 13295 }, { "epoch": 0.23111821863755672, "grad_norm": 1.4214930106372983, "learning_rate": 8.976423254702293e-07, "loss": 0.4185, "step": 13296 }, { "epoch": 0.23113560117505955, "grad_norm": 2.562396032928058, "learning_rate": 8.976252596932655e-07, "loss": 0.4569, "step": 13297 }, { "epoch": 0.23115298371256235, "grad_norm": 2.4467042214915207, "learning_rate": 8.976081926560121e-07, "loss": 0.3797, "step": 13298 }, { "epoch": 0.23117036625006518, "grad_norm": 1.119988970199888, "learning_rate": 8.975911243585237e-07, "loss": 0.2196, "step": 13299 }, { "epoch": 0.231187748787568, "grad_norm": 3.0354745094540476, "learning_rate": 8.975740548008538e-07, "loss": 0.368, "step": 13300 }, { "epoch": 0.23120513132507084, "grad_norm": 1.9274750249713848, "learning_rate": 8.975569839830568e-07, "loss": 0.2636, "step": 13301 }, { "epoch": 0.23122251386257367, "grad_norm": 1.4802683651602075, "learning_rate": 8.97539911905187e-07, "loss": 0.3023, "step": 13302 }, { "epoch": 0.23123989640007647, "grad_norm": 2.1769947711509734, "learning_rate": 8.97522838567298e-07, "loss": 0.388, "step": 13303 }, { "epoch": 0.2312572789375793, "grad_norm": 1.386227398730135, "learning_rate": 8.975057639694444e-07, "loss": 0.2413, "step": 13304 }, { "epoch": 0.23127466147508213, "grad_norm": 1.571905730251629, "learning_rate": 8.974886881116801e-07, "loss": 0.3608, "step": 13305 }, { "epoch": 0.23129204401258496, "grad_norm": 1.6255918829192355, "learning_rate": 8.974716109940593e-07, "loss": 0.3442, "step": 13306 }, { "epoch": 0.2313094265500878, "grad_norm": 1.2460484209027394, "learning_rate": 8.974545326166359e-07, "loss": 0.2702, "step": 13307 }, { "epoch": 0.2313268090875906, "grad_norm": 1.7485339638863429, "learning_rate": 8.974374529794644e-07, "loss": 0.3291, "step": 13308 }, { "epoch": 0.23134419162509343, "grad_norm": 0.7415700853914785, "learning_rate": 8.974203720825986e-07, "loss": 0.1226, "step": 13309 }, { "epoch": 0.23136157416259626, "grad_norm": 1.9483680374156553, "learning_rate": 8.974032899260928e-07, "loss": 0.2843, "step": 13310 }, { "epoch": 0.2313789567000991, "grad_norm": 1.320243887336185, "learning_rate": 8.973862065100011e-07, "loss": 0.2001, "step": 13311 }, { "epoch": 0.23139633923760192, "grad_norm": 2.7086848317412002, "learning_rate": 8.973691218343777e-07, "loss": 0.4508, "step": 13312 }, { "epoch": 0.23141372177510472, "grad_norm": 1.3189973011922096, "learning_rate": 8.973520358992768e-07, "loss": 0.2853, "step": 13313 }, { "epoch": 0.23143110431260755, "grad_norm": 0.9196284311737091, "learning_rate": 8.973349487047524e-07, "loss": 0.191, "step": 13314 }, { "epoch": 0.23144848685011038, "grad_norm": 2.5436495700754813, "learning_rate": 8.973178602508587e-07, "loss": 0.4119, "step": 13315 }, { "epoch": 0.2314658693876132, "grad_norm": 2.0335406851485316, "learning_rate": 8.973007705376499e-07, "loss": 0.558, "step": 13316 }, { "epoch": 0.23148325192511604, "grad_norm": 1.5942802984763373, "learning_rate": 8.972836795651802e-07, "loss": 0.3392, "step": 13317 }, { "epoch": 0.23150063446261884, "grad_norm": 2.4982661503318693, "learning_rate": 8.972665873335039e-07, "loss": 0.3688, "step": 13318 }, { "epoch": 0.23151801700012167, "grad_norm": 1.4031726176063186, "learning_rate": 8.972494938426748e-07, "loss": 0.2426, "step": 13319 }, { "epoch": 0.2315353995376245, "grad_norm": 1.8324054643203707, "learning_rate": 8.972323990927473e-07, "loss": 0.3077, "step": 13320 }, { "epoch": 0.23155278207512733, "grad_norm": 1.3900649557138836, "learning_rate": 8.972153030837757e-07, "loss": 0.4087, "step": 13321 }, { "epoch": 0.23157016461263016, "grad_norm": 1.6620251957921583, "learning_rate": 8.971982058158139e-07, "loss": 0.2172, "step": 13322 }, { "epoch": 0.23158754715013297, "grad_norm": 1.9311520504141513, "learning_rate": 8.971811072889162e-07, "loss": 0.2539, "step": 13323 }, { "epoch": 0.2316049296876358, "grad_norm": 1.7697492831873927, "learning_rate": 8.97164007503137e-07, "loss": 0.204, "step": 13324 }, { "epoch": 0.23162231222513863, "grad_norm": 2.164313887762796, "learning_rate": 8.971469064585302e-07, "loss": 0.2686, "step": 13325 }, { "epoch": 0.23163969476264146, "grad_norm": 1.512277628134042, "learning_rate": 8.971298041551502e-07, "loss": 0.3095, "step": 13326 }, { "epoch": 0.2316570773001443, "grad_norm": 1.4732594856866026, "learning_rate": 8.971127005930512e-07, "loss": 0.3334, "step": 13327 }, { "epoch": 0.2316744598376471, "grad_norm": 2.0955789036952233, "learning_rate": 8.970955957722874e-07, "loss": 0.2969, "step": 13328 }, { "epoch": 0.23169184237514992, "grad_norm": 2.1366740170605483, "learning_rate": 8.970784896929129e-07, "loss": 0.4241, "step": 13329 }, { "epoch": 0.23170922491265275, "grad_norm": 2.650456995797627, "learning_rate": 8.970613823549819e-07, "loss": 0.3524, "step": 13330 }, { "epoch": 0.23172660745015558, "grad_norm": 1.3724260981065723, "learning_rate": 8.970442737585488e-07, "loss": 0.362, "step": 13331 }, { "epoch": 0.2317439899876584, "grad_norm": 2.1634112698767964, "learning_rate": 8.970271639036677e-07, "loss": 0.397, "step": 13332 }, { "epoch": 0.2317613725251612, "grad_norm": 1.2684014695655272, "learning_rate": 8.97010052790393e-07, "loss": 0.2069, "step": 13333 }, { "epoch": 0.23177875506266404, "grad_norm": 1.1632009540675605, "learning_rate": 8.969929404187787e-07, "loss": 0.2841, "step": 13334 }, { "epoch": 0.23179613760016687, "grad_norm": 1.6080607240935474, "learning_rate": 8.969758267888792e-07, "loss": 0.3139, "step": 13335 }, { "epoch": 0.2318135201376697, "grad_norm": 2.1648274833939567, "learning_rate": 8.969587119007487e-07, "loss": 0.3003, "step": 13336 }, { "epoch": 0.23183090267517253, "grad_norm": 2.0478481880058643, "learning_rate": 8.969415957544413e-07, "loss": 0.3641, "step": 13337 }, { "epoch": 0.23184828521267534, "grad_norm": 1.6885003328426107, "learning_rate": 8.969244783500114e-07, "loss": 0.2183, "step": 13338 }, { "epoch": 0.23186566775017817, "grad_norm": 1.0029694631476531, "learning_rate": 8.969073596875134e-07, "loss": 0.281, "step": 13339 }, { "epoch": 0.231883050287681, "grad_norm": 1.3278447413807037, "learning_rate": 8.968902397670013e-07, "loss": 0.2702, "step": 13340 }, { "epoch": 0.23190043282518383, "grad_norm": 1.1734239821031456, "learning_rate": 8.968731185885294e-07, "loss": 0.23, "step": 13341 }, { "epoch": 0.23191781536268666, "grad_norm": 1.3738137650640652, "learning_rate": 8.96855996152152e-07, "loss": 0.4475, "step": 13342 }, { "epoch": 0.23193519790018946, "grad_norm": 1.4883941093143425, "learning_rate": 8.968388724579235e-07, "loss": 0.313, "step": 13343 }, { "epoch": 0.2319525804376923, "grad_norm": 2.974341973959979, "learning_rate": 8.96821747505898e-07, "loss": 0.3273, "step": 13344 }, { "epoch": 0.23196996297519512, "grad_norm": 1.6607089728012263, "learning_rate": 8.968046212961299e-07, "loss": 0.6034, "step": 13345 }, { "epoch": 0.23198734551269795, "grad_norm": 2.2823477926756497, "learning_rate": 8.967874938286735e-07, "loss": 0.1387, "step": 13346 }, { "epoch": 0.23200472805020078, "grad_norm": 1.4802943084761961, "learning_rate": 8.967703651035827e-07, "loss": 0.2711, "step": 13347 }, { "epoch": 0.23202211058770358, "grad_norm": 2.443736029329323, "learning_rate": 8.967532351209124e-07, "loss": 0.3761, "step": 13348 }, { "epoch": 0.23203949312520641, "grad_norm": 1.0500352518890876, "learning_rate": 8.967361038807165e-07, "loss": 0.223, "step": 13349 }, { "epoch": 0.23205687566270924, "grad_norm": 2.0335302852695887, "learning_rate": 8.967189713830494e-07, "loss": 0.2152, "step": 13350 }, { "epoch": 0.23207425820021207, "grad_norm": 3.5135533386261955, "learning_rate": 8.967018376279654e-07, "loss": 0.4198, "step": 13351 }, { "epoch": 0.2320916407377149, "grad_norm": 1.6276131236868852, "learning_rate": 8.966847026155188e-07, "loss": 0.29, "step": 13352 }, { "epoch": 0.2321090232752177, "grad_norm": 1.586793323408955, "learning_rate": 8.966675663457639e-07, "loss": 0.1986, "step": 13353 }, { "epoch": 0.23212640581272054, "grad_norm": 1.486245438224199, "learning_rate": 8.966504288187551e-07, "loss": 0.3698, "step": 13354 }, { "epoch": 0.23214378835022337, "grad_norm": 1.0476651407261768, "learning_rate": 8.966332900345465e-07, "loss": 0.1929, "step": 13355 }, { "epoch": 0.2321611708877262, "grad_norm": 1.247603513092075, "learning_rate": 8.966161499931928e-07, "loss": 0.2502, "step": 13356 }, { "epoch": 0.23217855342522903, "grad_norm": 4.034494262522193, "learning_rate": 8.965990086947479e-07, "loss": 0.4506, "step": 13357 }, { "epoch": 0.23219593596273183, "grad_norm": 1.0683044587134254, "learning_rate": 8.965818661392665e-07, "loss": 0.3383, "step": 13358 }, { "epoch": 0.23221331850023466, "grad_norm": 3.5425833913816183, "learning_rate": 8.965647223268026e-07, "loss": 0.2317, "step": 13359 }, { "epoch": 0.2322307010377375, "grad_norm": 1.5667740894786903, "learning_rate": 8.965475772574107e-07, "loss": 0.2881, "step": 13360 }, { "epoch": 0.23224808357524032, "grad_norm": 2.0861434899843627, "learning_rate": 8.965304309311452e-07, "loss": 0.3822, "step": 13361 }, { "epoch": 0.23226546611274315, "grad_norm": 2.8217657418597377, "learning_rate": 8.965132833480605e-07, "loss": 0.4232, "step": 13362 }, { "epoch": 0.23228284865024595, "grad_norm": 1.6169868239100984, "learning_rate": 8.964961345082106e-07, "loss": 0.2434, "step": 13363 }, { "epoch": 0.23230023118774878, "grad_norm": 2.1956549161293464, "learning_rate": 8.964789844116503e-07, "loss": 0.2896, "step": 13364 }, { "epoch": 0.23231761372525161, "grad_norm": 1.557267180378972, "learning_rate": 8.964618330584336e-07, "loss": 0.2307, "step": 13365 }, { "epoch": 0.23233499626275445, "grad_norm": 1.4222628416768441, "learning_rate": 8.96444680448615e-07, "loss": 0.3028, "step": 13366 }, { "epoch": 0.23235237880025728, "grad_norm": 1.0974205197342783, "learning_rate": 8.96427526582249e-07, "loss": 0.2191, "step": 13367 }, { "epoch": 0.23236976133776008, "grad_norm": 3.3181383963086875, "learning_rate": 8.964103714593898e-07, "loss": 0.2242, "step": 13368 }, { "epoch": 0.2323871438752629, "grad_norm": 1.8763672974589318, "learning_rate": 8.963932150800917e-07, "loss": 0.275, "step": 13369 }, { "epoch": 0.23240452641276574, "grad_norm": 1.2800881861118032, "learning_rate": 8.963760574444093e-07, "loss": 0.3213, "step": 13370 }, { "epoch": 0.23242190895026857, "grad_norm": 1.4193525805888783, "learning_rate": 8.963588985523969e-07, "loss": 0.4312, "step": 13371 }, { "epoch": 0.23243929148777137, "grad_norm": 1.8858032944908798, "learning_rate": 8.963417384041087e-07, "loss": 0.2169, "step": 13372 }, { "epoch": 0.2324566740252742, "grad_norm": 2.4176532164357285, "learning_rate": 8.963245769995995e-07, "loss": 0.4147, "step": 13373 }, { "epoch": 0.23247405656277703, "grad_norm": 1.887295656663707, "learning_rate": 8.963074143389233e-07, "loss": 0.273, "step": 13374 }, { "epoch": 0.23249143910027986, "grad_norm": 1.8856845522071595, "learning_rate": 8.962902504221346e-07, "loss": 0.3161, "step": 13375 }, { "epoch": 0.2325088216377827, "grad_norm": 1.6113982726444056, "learning_rate": 8.96273085249288e-07, "loss": 0.2382, "step": 13376 }, { "epoch": 0.2325262041752855, "grad_norm": 1.0514144457235262, "learning_rate": 8.962559188204377e-07, "loss": 0.2788, "step": 13377 }, { "epoch": 0.23254358671278832, "grad_norm": 1.224028864684225, "learning_rate": 8.962387511356382e-07, "loss": 0.3097, "step": 13378 }, { "epoch": 0.23256096925029116, "grad_norm": 1.791366031803718, "learning_rate": 8.962215821949437e-07, "loss": 0.2896, "step": 13379 }, { "epoch": 0.23257835178779399, "grad_norm": 1.9450932032831822, "learning_rate": 8.962044119984089e-07, "loss": 0.2847, "step": 13380 }, { "epoch": 0.23259573432529682, "grad_norm": 2.040902045169486, "learning_rate": 8.961872405460881e-07, "loss": 0.1697, "step": 13381 }, { "epoch": 0.23261311686279962, "grad_norm": 2.10139180712178, "learning_rate": 8.961700678380358e-07, "loss": 0.2863, "step": 13382 }, { "epoch": 0.23263049940030245, "grad_norm": 1.8294206041886003, "learning_rate": 8.961528938743063e-07, "loss": 0.3377, "step": 13383 }, { "epoch": 0.23264788193780528, "grad_norm": 2.478757272901217, "learning_rate": 8.961357186549542e-07, "loss": 0.5108, "step": 13384 }, { "epoch": 0.2326652644753081, "grad_norm": 3.1420352133673317, "learning_rate": 8.961185421800337e-07, "loss": 0.4689, "step": 13385 }, { "epoch": 0.23268264701281094, "grad_norm": 2.3065802036530374, "learning_rate": 8.961013644495995e-07, "loss": 0.4676, "step": 13386 }, { "epoch": 0.23270002955031374, "grad_norm": 1.6033549405491256, "learning_rate": 8.960841854637059e-07, "loss": 0.2719, "step": 13387 }, { "epoch": 0.23271741208781657, "grad_norm": 3.0594677576996245, "learning_rate": 8.960670052224073e-07, "loss": 0.5014, "step": 13388 }, { "epoch": 0.2327347946253194, "grad_norm": 2.2180839414053444, "learning_rate": 8.960498237257583e-07, "loss": 0.4297, "step": 13389 }, { "epoch": 0.23275217716282223, "grad_norm": 4.118902596463531, "learning_rate": 8.960326409738133e-07, "loss": 0.3555, "step": 13390 }, { "epoch": 0.23276955970032506, "grad_norm": 1.4892451591330451, "learning_rate": 8.960154569666268e-07, "loss": 0.4288, "step": 13391 }, { "epoch": 0.23278694223782787, "grad_norm": 2.645333717967295, "learning_rate": 8.959982717042531e-07, "loss": 0.3325, "step": 13392 }, { "epoch": 0.2328043247753307, "grad_norm": 2.4993320136201125, "learning_rate": 8.959810851867469e-07, "loss": 0.2222, "step": 13393 }, { "epoch": 0.23282170731283353, "grad_norm": 2.2550935405164303, "learning_rate": 8.959638974141626e-07, "loss": 0.2802, "step": 13394 }, { "epoch": 0.23283908985033636, "grad_norm": 2.867840076692282, "learning_rate": 8.959467083865545e-07, "loss": 0.3029, "step": 13395 }, { "epoch": 0.2328564723878392, "grad_norm": 1.3872973305063925, "learning_rate": 8.959295181039772e-07, "loss": 0.2308, "step": 13396 }, { "epoch": 0.232873854925342, "grad_norm": 1.45259927173393, "learning_rate": 8.959123265664853e-07, "loss": 0.1474, "step": 13397 }, { "epoch": 0.23289123746284482, "grad_norm": 3.4710185315567075, "learning_rate": 8.958951337741332e-07, "loss": 0.3615, "step": 13398 }, { "epoch": 0.23290862000034765, "grad_norm": 1.9849966351100536, "learning_rate": 8.958779397269753e-07, "loss": 0.2493, "step": 13399 }, { "epoch": 0.23292600253785048, "grad_norm": 2.0830503132338762, "learning_rate": 8.958607444250662e-07, "loss": 0.2886, "step": 13400 }, { "epoch": 0.2329433850753533, "grad_norm": 2.405952826837724, "learning_rate": 8.958435478684604e-07, "loss": 0.3047, "step": 13401 }, { "epoch": 0.2329607676128561, "grad_norm": 2.133454667625522, "learning_rate": 8.958263500572125e-07, "loss": 0.4242, "step": 13402 }, { "epoch": 0.23297815015035894, "grad_norm": 1.2546446013305965, "learning_rate": 8.958091509913768e-07, "loss": 0.262, "step": 13403 }, { "epoch": 0.23299553268786177, "grad_norm": 1.4355120990502062, "learning_rate": 8.957919506710077e-07, "loss": 0.3257, "step": 13404 }, { "epoch": 0.2330129152253646, "grad_norm": 2.536362411627977, "learning_rate": 8.957747490961602e-07, "loss": 0.3986, "step": 13405 }, { "epoch": 0.23303029776286743, "grad_norm": 1.9357950190834396, "learning_rate": 8.957575462668884e-07, "loss": 0.4272, "step": 13406 }, { "epoch": 0.23304768030037024, "grad_norm": 2.321571105634721, "learning_rate": 8.957403421832471e-07, "loss": 0.3094, "step": 13407 }, { "epoch": 0.23306506283787307, "grad_norm": 3.39517817058586, "learning_rate": 8.957231368452906e-07, "loss": 0.3606, "step": 13408 }, { "epoch": 0.2330824453753759, "grad_norm": 2.491569162511614, "learning_rate": 8.957059302530736e-07, "loss": 0.2995, "step": 13409 }, { "epoch": 0.23309982791287873, "grad_norm": 2.1024676514693352, "learning_rate": 8.956887224066507e-07, "loss": 0.412, "step": 13410 }, { "epoch": 0.23311721045038156, "grad_norm": 1.3717366316316426, "learning_rate": 8.956715133060762e-07, "loss": 0.3806, "step": 13411 }, { "epoch": 0.23313459298788436, "grad_norm": 1.1441944440200515, "learning_rate": 8.956543029514047e-07, "loss": 0.4207, "step": 13412 }, { "epoch": 0.2331519755253872, "grad_norm": 2.8373092910530087, "learning_rate": 8.956370913426907e-07, "loss": 0.1788, "step": 13413 }, { "epoch": 0.23316935806289002, "grad_norm": 2.2789251261352987, "learning_rate": 8.956198784799891e-07, "loss": 0.3556, "step": 13414 }, { "epoch": 0.23318674060039285, "grad_norm": 1.9484253925852633, "learning_rate": 8.95602664363354e-07, "loss": 0.2912, "step": 13415 }, { "epoch": 0.23320412313789568, "grad_norm": 1.3272914436444465, "learning_rate": 8.955854489928405e-07, "loss": 0.3067, "step": 13416 }, { "epoch": 0.23322150567539848, "grad_norm": 3.147539731293144, "learning_rate": 8.955682323685025e-07, "loss": 0.2822, "step": 13417 }, { "epoch": 0.2332388882129013, "grad_norm": 1.6625757782509543, "learning_rate": 8.955510144903951e-07, "loss": 0.3263, "step": 13418 }, { "epoch": 0.23325627075040414, "grad_norm": 2.0633082724964087, "learning_rate": 8.955337953585727e-07, "loss": 0.303, "step": 13419 }, { "epoch": 0.23327365328790697, "grad_norm": 11.035603979675892, "learning_rate": 8.955165749730898e-07, "loss": 0.4032, "step": 13420 }, { "epoch": 0.2332910358254098, "grad_norm": 2.2146837459531805, "learning_rate": 8.954993533340011e-07, "loss": 0.581, "step": 13421 }, { "epoch": 0.2333084183629126, "grad_norm": 3.5364694374985084, "learning_rate": 8.954821304413611e-07, "loss": 0.3713, "step": 13422 }, { "epoch": 0.23332580090041544, "grad_norm": 1.6587458532418853, "learning_rate": 8.954649062952244e-07, "loss": 0.3611, "step": 13423 }, { "epoch": 0.23334318343791827, "grad_norm": 1.3955644030952818, "learning_rate": 8.954476808956456e-07, "loss": 0.3675, "step": 13424 }, { "epoch": 0.2333605659754211, "grad_norm": 2.805219583950305, "learning_rate": 8.954304542426793e-07, "loss": 0.3053, "step": 13425 }, { "epoch": 0.23337794851292393, "grad_norm": 1.0784688065161903, "learning_rate": 8.9541322633638e-07, "loss": 0.211, "step": 13426 }, { "epoch": 0.23339533105042673, "grad_norm": 2.095133597073398, "learning_rate": 8.953959971768026e-07, "loss": 0.6238, "step": 13427 }, { "epoch": 0.23341271358792956, "grad_norm": 2.17355003216938, "learning_rate": 8.953787667640015e-07, "loss": 0.3552, "step": 13428 }, { "epoch": 0.2334300961254324, "grad_norm": 2.331161855983264, "learning_rate": 8.953615350980312e-07, "loss": 0.3928, "step": 13429 }, { "epoch": 0.23344747866293522, "grad_norm": 2.280073262615612, "learning_rate": 8.953443021789466e-07, "loss": 0.341, "step": 13430 }, { "epoch": 0.23346486120043805, "grad_norm": 2.99895901064012, "learning_rate": 8.953270680068022e-07, "loss": 0.3719, "step": 13431 }, { "epoch": 0.23348224373794085, "grad_norm": 1.7690149769938386, "learning_rate": 8.953098325816524e-07, "loss": 0.3001, "step": 13432 }, { "epoch": 0.23349962627544368, "grad_norm": 1.6476008514161178, "learning_rate": 8.952925959035521e-07, "loss": 0.2711, "step": 13433 }, { "epoch": 0.2335170088129465, "grad_norm": 3.0478634689764914, "learning_rate": 8.952753579725558e-07, "loss": 0.4566, "step": 13434 }, { "epoch": 0.23353439135044934, "grad_norm": 1.5220901422694535, "learning_rate": 8.952581187887183e-07, "loss": 0.1473, "step": 13435 }, { "epoch": 0.23355177388795217, "grad_norm": 1.4838537015645539, "learning_rate": 8.952408783520942e-07, "loss": 0.1501, "step": 13436 }, { "epoch": 0.23356915642545498, "grad_norm": 1.7433185182334692, "learning_rate": 8.952236366627378e-07, "loss": 0.3113, "step": 13437 }, { "epoch": 0.2335865389629578, "grad_norm": 1.2794187734465552, "learning_rate": 8.952063937207041e-07, "loss": 0.2465, "step": 13438 }, { "epoch": 0.23360392150046064, "grad_norm": 1.623332974487524, "learning_rate": 8.951891495260478e-07, "loss": 0.2681, "step": 13439 }, { "epoch": 0.23362130403796347, "grad_norm": 2.4794566096339494, "learning_rate": 8.951719040788235e-07, "loss": 0.4895, "step": 13440 }, { "epoch": 0.2336386865754663, "grad_norm": 2.203689322928179, "learning_rate": 8.951546573790855e-07, "loss": 0.278, "step": 13441 }, { "epoch": 0.2336560691129691, "grad_norm": 1.5764236526666018, "learning_rate": 8.951374094268891e-07, "loss": 0.2845, "step": 13442 }, { "epoch": 0.23367345165047193, "grad_norm": 1.4944935607923064, "learning_rate": 8.951201602222884e-07, "loss": 0.3108, "step": 13443 }, { "epoch": 0.23369083418797476, "grad_norm": 1.7585595371224703, "learning_rate": 8.951029097653384e-07, "loss": 0.21, "step": 13444 }, { "epoch": 0.2337082167254776, "grad_norm": 2.184143156220327, "learning_rate": 8.950856580560935e-07, "loss": 0.3928, "step": 13445 }, { "epoch": 0.23372559926298042, "grad_norm": 1.8552575423042963, "learning_rate": 8.950684050946087e-07, "loss": 0.3419, "step": 13446 }, { "epoch": 0.23374298180048322, "grad_norm": 4.371360941148879, "learning_rate": 8.950511508809387e-07, "loss": 0.2526, "step": 13447 }, { "epoch": 0.23376036433798605, "grad_norm": 4.3012694250113235, "learning_rate": 8.950338954151378e-07, "loss": 0.3695, "step": 13448 }, { "epoch": 0.23377774687548888, "grad_norm": 1.129826914498307, "learning_rate": 8.950166386972608e-07, "loss": 0.2248, "step": 13449 }, { "epoch": 0.23379512941299171, "grad_norm": 1.8668220847581163, "learning_rate": 8.949993807273627e-07, "loss": 0.3524, "step": 13450 }, { "epoch": 0.23381251195049454, "grad_norm": 1.9725115030680642, "learning_rate": 8.94982121505498e-07, "loss": 0.3029, "step": 13451 }, { "epoch": 0.23382989448799735, "grad_norm": 1.678618832092912, "learning_rate": 8.949648610317213e-07, "loss": 0.2226, "step": 13452 }, { "epoch": 0.23384727702550018, "grad_norm": 1.6114864020927484, "learning_rate": 8.949475993060875e-07, "loss": 0.2204, "step": 13453 }, { "epoch": 0.233864659563003, "grad_norm": 2.280973879677144, "learning_rate": 8.949303363286514e-07, "loss": 0.3368, "step": 13454 }, { "epoch": 0.23388204210050584, "grad_norm": 2.447584075730534, "learning_rate": 8.949130720994673e-07, "loss": 0.233, "step": 13455 }, { "epoch": 0.23389942463800867, "grad_norm": 4.06113490852337, "learning_rate": 8.948958066185903e-07, "loss": 0.6027, "step": 13456 }, { "epoch": 0.23391680717551147, "grad_norm": 0.8996439658807832, "learning_rate": 8.94878539886075e-07, "loss": 0.3819, "step": 13457 }, { "epoch": 0.2339341897130143, "grad_norm": 1.9451490749835216, "learning_rate": 8.948612719019761e-07, "loss": 0.522, "step": 13458 }, { "epoch": 0.23395157225051713, "grad_norm": 3.5774835164676846, "learning_rate": 8.948440026663482e-07, "loss": 0.4226, "step": 13459 }, { "epoch": 0.23396895478801996, "grad_norm": 2.5088890965462345, "learning_rate": 8.948267321792463e-07, "loss": 0.6453, "step": 13460 }, { "epoch": 0.2339863373255228, "grad_norm": 1.7742944236084823, "learning_rate": 8.94809460440725e-07, "loss": 0.2669, "step": 13461 }, { "epoch": 0.2340037198630256, "grad_norm": 2.446137340809374, "learning_rate": 8.947921874508391e-07, "loss": 0.5031, "step": 13462 }, { "epoch": 0.23402110240052842, "grad_norm": 2.6020321937954676, "learning_rate": 8.947749132096433e-07, "loss": 0.205, "step": 13463 }, { "epoch": 0.23403848493803125, "grad_norm": 3.343278344474146, "learning_rate": 8.947576377171926e-07, "loss": 0.6195, "step": 13464 }, { "epoch": 0.23405586747553409, "grad_norm": 2.0103312116470304, "learning_rate": 8.947403609735413e-07, "loss": 0.2421, "step": 13465 }, { "epoch": 0.23407325001303692, "grad_norm": 1.4313444300834026, "learning_rate": 8.947230829787443e-07, "loss": 0.4472, "step": 13466 }, { "epoch": 0.23409063255053972, "grad_norm": 1.1597111785577774, "learning_rate": 8.947058037328567e-07, "loss": 0.2488, "step": 13467 }, { "epoch": 0.23410801508804255, "grad_norm": 1.0192973657879514, "learning_rate": 8.946885232359328e-07, "loss": 0.2141, "step": 13468 }, { "epoch": 0.23412539762554538, "grad_norm": 1.7975531424798379, "learning_rate": 8.946712414880276e-07, "loss": 0.276, "step": 13469 }, { "epoch": 0.2341427801630482, "grad_norm": 1.8204754870670934, "learning_rate": 8.946539584891959e-07, "loss": 0.2688, "step": 13470 }, { "epoch": 0.23416016270055104, "grad_norm": 2.369042411700673, "learning_rate": 8.946366742394926e-07, "loss": 0.4702, "step": 13471 }, { "epoch": 0.23417754523805384, "grad_norm": 3.500947209781123, "learning_rate": 8.946193887389721e-07, "loss": 0.2493, "step": 13472 }, { "epoch": 0.23419492777555667, "grad_norm": 1.2769470475449092, "learning_rate": 8.946021019876895e-07, "loss": 0.1842, "step": 13473 }, { "epoch": 0.2342123103130595, "grad_norm": 2.812897623782875, "learning_rate": 8.945848139856995e-07, "loss": 0.3652, "step": 13474 }, { "epoch": 0.23422969285056233, "grad_norm": 3.4182144089773088, "learning_rate": 8.945675247330568e-07, "loss": 0.3742, "step": 13475 }, { "epoch": 0.23424707538806516, "grad_norm": 1.9000173283904993, "learning_rate": 8.945502342298165e-07, "loss": 0.4223, "step": 13476 }, { "epoch": 0.23426445792556796, "grad_norm": 2.0364988544869473, "learning_rate": 8.945329424760331e-07, "loss": 0.2756, "step": 13477 }, { "epoch": 0.2342818404630708, "grad_norm": 1.8509278184857543, "learning_rate": 8.945156494717615e-07, "loss": 0.2612, "step": 13478 }, { "epoch": 0.23429922300057363, "grad_norm": 1.2912725940122596, "learning_rate": 8.944983552170566e-07, "loss": 0.3873, "step": 13479 }, { "epoch": 0.23431660553807646, "grad_norm": 2.0387734954378645, "learning_rate": 8.94481059711973e-07, "loss": 0.5257, "step": 13480 }, { "epoch": 0.23433398807557929, "grad_norm": 1.256305345590814, "learning_rate": 8.944637629565657e-07, "loss": 0.3342, "step": 13481 }, { "epoch": 0.2343513706130821, "grad_norm": 5.3863720149107825, "learning_rate": 8.944464649508894e-07, "loss": 0.2696, "step": 13482 }, { "epoch": 0.23436875315058492, "grad_norm": 2.008389628682176, "learning_rate": 8.944291656949992e-07, "loss": 0.2533, "step": 13483 }, { "epoch": 0.23438613568808775, "grad_norm": 1.654839306012354, "learning_rate": 8.944118651889496e-07, "loss": 0.1976, "step": 13484 }, { "epoch": 0.23440351822559058, "grad_norm": 1.9167920841391684, "learning_rate": 8.943945634327955e-07, "loss": 0.2182, "step": 13485 }, { "epoch": 0.2344209007630934, "grad_norm": 2.5056524415355628, "learning_rate": 8.943772604265918e-07, "loss": 0.3214, "step": 13486 }, { "epoch": 0.2344382833005962, "grad_norm": 2.2837794869329024, "learning_rate": 8.943599561703935e-07, "loss": 0.2487, "step": 13487 }, { "epoch": 0.23445566583809904, "grad_norm": 1.5255711569741797, "learning_rate": 8.943426506642553e-07, "loss": 0.2857, "step": 13488 }, { "epoch": 0.23447304837560187, "grad_norm": 2.0586007135879445, "learning_rate": 8.943253439082319e-07, "loss": 0.256, "step": 13489 }, { "epoch": 0.2344904309131047, "grad_norm": 2.1373927514293896, "learning_rate": 8.943080359023783e-07, "loss": 0.2078, "step": 13490 }, { "epoch": 0.23450781345060753, "grad_norm": 2.1151732099891007, "learning_rate": 8.942907266467494e-07, "loss": 0.3775, "step": 13491 }, { "epoch": 0.23452519598811034, "grad_norm": 2.26874999997884, "learning_rate": 8.942734161414001e-07, "loss": 0.4701, "step": 13492 }, { "epoch": 0.23454257852561317, "grad_norm": 3.080231344540534, "learning_rate": 8.94256104386385e-07, "loss": 0.3746, "step": 13493 }, { "epoch": 0.234559961063116, "grad_norm": 9.062650466119123, "learning_rate": 8.942387913817593e-07, "loss": 0.3282, "step": 13494 }, { "epoch": 0.23457734360061883, "grad_norm": 1.8109992526865435, "learning_rate": 8.942214771275778e-07, "loss": 0.2934, "step": 13495 }, { "epoch": 0.23459472613812166, "grad_norm": 1.0113458930009076, "learning_rate": 8.942041616238951e-07, "loss": 0.2684, "step": 13496 }, { "epoch": 0.23461210867562446, "grad_norm": 1.1842827655133839, "learning_rate": 8.941868448707665e-07, "loss": 0.1929, "step": 13497 }, { "epoch": 0.2346294912131273, "grad_norm": 6.903288599635384, "learning_rate": 8.941695268682465e-07, "loss": 0.3825, "step": 13498 }, { "epoch": 0.23464687375063012, "grad_norm": 2.1808286785991418, "learning_rate": 8.941522076163903e-07, "loss": 0.442, "step": 13499 }, { "epoch": 0.23466425628813295, "grad_norm": 5.412511946958945, "learning_rate": 8.941348871152526e-07, "loss": 0.4109, "step": 13500 }, { "epoch": 0.23468163882563578, "grad_norm": 1.5600031525952394, "learning_rate": 8.941175653648883e-07, "loss": 0.1932, "step": 13501 }, { "epoch": 0.23469902136313858, "grad_norm": 1.6121392117727513, "learning_rate": 8.941002423653523e-07, "loss": 0.2499, "step": 13502 }, { "epoch": 0.2347164039006414, "grad_norm": 1.8452820518939952, "learning_rate": 8.940829181166998e-07, "loss": 0.2863, "step": 13503 }, { "epoch": 0.23473378643814424, "grad_norm": 1.406297689839421, "learning_rate": 8.940655926189853e-07, "loss": 0.2191, "step": 13504 }, { "epoch": 0.23475116897564707, "grad_norm": 2.118716249899402, "learning_rate": 8.94048265872264e-07, "loss": 0.3166, "step": 13505 }, { "epoch": 0.2347685515131499, "grad_norm": 3.8763738762683912, "learning_rate": 8.940309378765907e-07, "loss": 0.3226, "step": 13506 }, { "epoch": 0.2347859340506527, "grad_norm": 2.7254389472834237, "learning_rate": 8.940136086320203e-07, "loss": 0.4045, "step": 13507 }, { "epoch": 0.23480331658815554, "grad_norm": 1.8336425885733956, "learning_rate": 8.939962781386077e-07, "loss": 0.2283, "step": 13508 }, { "epoch": 0.23482069912565837, "grad_norm": 2.315738619278637, "learning_rate": 8.93978946396408e-07, "loss": 0.295, "step": 13509 }, { "epoch": 0.2348380816631612, "grad_norm": 3.0231203032848923, "learning_rate": 8.93961613405476e-07, "loss": 0.2489, "step": 13510 }, { "epoch": 0.234855464200664, "grad_norm": 2.0636752570929384, "learning_rate": 8.939442791658664e-07, "loss": 0.4325, "step": 13511 }, { "epoch": 0.23487284673816683, "grad_norm": 2.030505821993716, "learning_rate": 8.939269436776347e-07, "loss": 0.2604, "step": 13512 }, { "epoch": 0.23489022927566966, "grad_norm": 1.2914154012306218, "learning_rate": 8.939096069408354e-07, "loss": 0.2404, "step": 13513 }, { "epoch": 0.2349076118131725, "grad_norm": 2.0712829180813443, "learning_rate": 8.938922689555237e-07, "loss": 0.2937, "step": 13514 }, { "epoch": 0.23492499435067532, "grad_norm": 1.4254964136395454, "learning_rate": 8.938749297217545e-07, "loss": 0.251, "step": 13515 }, { "epoch": 0.23494237688817812, "grad_norm": 2.111800450737288, "learning_rate": 8.938575892395826e-07, "loss": 0.3634, "step": 13516 }, { "epoch": 0.23495975942568095, "grad_norm": 2.2353357530251956, "learning_rate": 8.93840247509063e-07, "loss": 0.4556, "step": 13517 }, { "epoch": 0.23497714196318378, "grad_norm": 2.2596638761482732, "learning_rate": 8.938229045302508e-07, "loss": 0.328, "step": 13518 }, { "epoch": 0.2349945245006866, "grad_norm": 1.570457560699584, "learning_rate": 8.938055603032009e-07, "loss": 0.2845, "step": 13519 }, { "epoch": 0.23501190703818944, "grad_norm": 2.289595270984926, "learning_rate": 8.937882148279683e-07, "loss": 0.2164, "step": 13520 }, { "epoch": 0.23502928957569225, "grad_norm": 1.5992303003463448, "learning_rate": 8.937708681046078e-07, "loss": 0.2547, "step": 13521 }, { "epoch": 0.23504667211319508, "grad_norm": 1.5558867569046881, "learning_rate": 8.937535201331747e-07, "loss": 0.243, "step": 13522 }, { "epoch": 0.2350640546506979, "grad_norm": 3.101826177331377, "learning_rate": 8.937361709137237e-07, "loss": 0.2904, "step": 13523 }, { "epoch": 0.23508143718820074, "grad_norm": 2.0148957729550787, "learning_rate": 8.9371882044631e-07, "loss": 0.3188, "step": 13524 }, { "epoch": 0.23509881972570357, "grad_norm": 1.3619259005738023, "learning_rate": 8.937014687309885e-07, "loss": 0.2519, "step": 13525 }, { "epoch": 0.23511620226320637, "grad_norm": 1.332169638556724, "learning_rate": 8.936841157678141e-07, "loss": 0.2478, "step": 13526 }, { "epoch": 0.2351335848007092, "grad_norm": 1.6883630065549218, "learning_rate": 8.93666761556842e-07, "loss": 0.296, "step": 13527 }, { "epoch": 0.23515096733821203, "grad_norm": 1.808594476545255, "learning_rate": 8.93649406098127e-07, "loss": 0.3056, "step": 13528 }, { "epoch": 0.23516834987571486, "grad_norm": 1.7042769715621355, "learning_rate": 8.936320493917243e-07, "loss": 0.2869, "step": 13529 }, { "epoch": 0.2351857324132177, "grad_norm": 2.0447587441332775, "learning_rate": 8.936146914376888e-07, "loss": 0.2492, "step": 13530 }, { "epoch": 0.2352031149507205, "grad_norm": 1.8466364448167616, "learning_rate": 8.935973322360755e-07, "loss": 0.2276, "step": 13531 }, { "epoch": 0.23522049748822332, "grad_norm": 1.4423678862618718, "learning_rate": 8.935799717869394e-07, "loss": 0.3162, "step": 13532 }, { "epoch": 0.23523788002572615, "grad_norm": 1.6855819782779868, "learning_rate": 8.935626100903358e-07, "loss": 0.208, "step": 13533 }, { "epoch": 0.23525526256322898, "grad_norm": 1.816453238879296, "learning_rate": 8.935452471463194e-07, "loss": 0.3104, "step": 13534 }, { "epoch": 0.23527264510073181, "grad_norm": 2.045058685741465, "learning_rate": 8.935278829549453e-07, "loss": 0.2681, "step": 13535 }, { "epoch": 0.23529002763823462, "grad_norm": 1.583448094009356, "learning_rate": 8.935105175162687e-07, "loss": 0.3531, "step": 13536 }, { "epoch": 0.23530741017573745, "grad_norm": 1.5379132752456315, "learning_rate": 8.934931508303445e-07, "loss": 0.2198, "step": 13537 }, { "epoch": 0.23532479271324028, "grad_norm": 1.4744334195584006, "learning_rate": 8.934757828972277e-07, "loss": 0.2699, "step": 13538 }, { "epoch": 0.2353421752507431, "grad_norm": 2.0219776848179176, "learning_rate": 8.934584137169734e-07, "loss": 0.4887, "step": 13539 }, { "epoch": 0.23535955778824594, "grad_norm": 1.628685864611313, "learning_rate": 8.934410432896367e-07, "loss": 0.208, "step": 13540 }, { "epoch": 0.23537694032574874, "grad_norm": 1.7880440446544832, "learning_rate": 8.934236716152725e-07, "loss": 0.3998, "step": 13541 }, { "epoch": 0.23539432286325157, "grad_norm": 1.485714813325608, "learning_rate": 8.934062986939363e-07, "loss": 0.2576, "step": 13542 }, { "epoch": 0.2354117054007544, "grad_norm": 1.187543153103171, "learning_rate": 8.933889245256825e-07, "loss": 0.365, "step": 13543 }, { "epoch": 0.23542908793825723, "grad_norm": 1.8295699300507775, "learning_rate": 8.933715491105666e-07, "loss": 0.2751, "step": 13544 }, { "epoch": 0.23544647047576006, "grad_norm": 1.908816483959387, "learning_rate": 8.933541724486438e-07, "loss": 0.3385, "step": 13545 }, { "epoch": 0.23546385301326286, "grad_norm": 1.501209904748058, "learning_rate": 8.933367945399688e-07, "loss": 0.2645, "step": 13546 }, { "epoch": 0.2354812355507657, "grad_norm": 1.9156395580209091, "learning_rate": 8.933194153845969e-07, "loss": 0.2785, "step": 13547 }, { "epoch": 0.23549861808826852, "grad_norm": 3.2181617299731813, "learning_rate": 8.933020349825832e-07, "loss": 0.3869, "step": 13548 }, { "epoch": 0.23551600062577135, "grad_norm": 3.43635314668081, "learning_rate": 8.932846533339827e-07, "loss": 0.4287, "step": 13549 }, { "epoch": 0.23553338316327418, "grad_norm": 1.9834635481172744, "learning_rate": 8.932672704388503e-07, "loss": 0.3695, "step": 13550 }, { "epoch": 0.235550765700777, "grad_norm": 2.1153006508179657, "learning_rate": 8.932498862972414e-07, "loss": 0.2792, "step": 13551 }, { "epoch": 0.23556814823827982, "grad_norm": 1.5062964190144321, "learning_rate": 8.932325009092111e-07, "loss": 0.2045, "step": 13552 }, { "epoch": 0.23558553077578265, "grad_norm": 1.7683078200037736, "learning_rate": 8.932151142748143e-07, "loss": 0.3225, "step": 13553 }, { "epoch": 0.23560291331328548, "grad_norm": 2.4307292786572363, "learning_rate": 8.931977263941063e-07, "loss": 0.2893, "step": 13554 }, { "epoch": 0.2356202958507883, "grad_norm": 2.013070763060864, "learning_rate": 8.93180337267142e-07, "loss": 0.3916, "step": 13555 }, { "epoch": 0.2356376783882911, "grad_norm": 1.1716722184475985, "learning_rate": 8.931629468939767e-07, "loss": 0.2352, "step": 13556 }, { "epoch": 0.23565506092579394, "grad_norm": 1.4800097881095529, "learning_rate": 8.931455552746656e-07, "loss": 0.3732, "step": 13557 }, { "epoch": 0.23567244346329677, "grad_norm": 1.2751821018736729, "learning_rate": 8.931281624092636e-07, "loss": 0.2641, "step": 13558 }, { "epoch": 0.2356898260007996, "grad_norm": 1.4111926860820656, "learning_rate": 8.931107682978259e-07, "loss": 0.1834, "step": 13559 }, { "epoch": 0.23570720853830243, "grad_norm": 1.8471559911818765, "learning_rate": 8.930933729404077e-07, "loss": 0.2734, "step": 13560 }, { "epoch": 0.23572459107580523, "grad_norm": 2.0138110165375016, "learning_rate": 8.930759763370639e-07, "loss": 0.2393, "step": 13561 }, { "epoch": 0.23574197361330806, "grad_norm": 1.5718411748873815, "learning_rate": 8.9305857848785e-07, "loss": 0.1797, "step": 13562 }, { "epoch": 0.2357593561508109, "grad_norm": 1.5660287952571683, "learning_rate": 8.93041179392821e-07, "loss": 0.2564, "step": 13563 }, { "epoch": 0.23577673868831373, "grad_norm": 2.6342360246962184, "learning_rate": 8.930237790520319e-07, "loss": 0.4837, "step": 13564 }, { "epoch": 0.23579412122581656, "grad_norm": 1.9791118549891813, "learning_rate": 8.930063774655379e-07, "loss": 0.2157, "step": 13565 }, { "epoch": 0.23581150376331936, "grad_norm": 1.4237171107254234, "learning_rate": 8.929889746333943e-07, "loss": 0.2433, "step": 13566 }, { "epoch": 0.2358288863008222, "grad_norm": 1.4802479305948282, "learning_rate": 8.929715705556562e-07, "loss": 0.4257, "step": 13567 }, { "epoch": 0.23584626883832502, "grad_norm": 1.2309073010789418, "learning_rate": 8.929541652323787e-07, "loss": 0.1947, "step": 13568 }, { "epoch": 0.23586365137582785, "grad_norm": 1.8793259536906148, "learning_rate": 8.92936758663617e-07, "loss": 0.3338, "step": 13569 }, { "epoch": 0.23588103391333068, "grad_norm": 2.579763208789471, "learning_rate": 8.929193508494263e-07, "loss": 0.3063, "step": 13570 }, { "epoch": 0.23589841645083348, "grad_norm": 1.622423637765071, "learning_rate": 8.929019417898618e-07, "loss": 0.2588, "step": 13571 }, { "epoch": 0.2359157989883363, "grad_norm": 1.8069395140209479, "learning_rate": 8.928845314849784e-07, "loss": 0.3831, "step": 13572 }, { "epoch": 0.23593318152583914, "grad_norm": 2.190586176961195, "learning_rate": 8.928671199348316e-07, "loss": 0.2689, "step": 13573 }, { "epoch": 0.23595056406334197, "grad_norm": 3.3945462202787913, "learning_rate": 8.928497071394767e-07, "loss": 0.2751, "step": 13574 }, { "epoch": 0.2359679466008448, "grad_norm": 1.7830574579704181, "learning_rate": 8.928322930989687e-07, "loss": 0.2395, "step": 13575 }, { "epoch": 0.2359853291383476, "grad_norm": 1.6894391785892056, "learning_rate": 8.928148778133626e-07, "loss": 0.2896, "step": 13576 }, { "epoch": 0.23600271167585044, "grad_norm": 1.8426244042345543, "learning_rate": 8.927974612827138e-07, "loss": 0.2629, "step": 13577 }, { "epoch": 0.23602009421335327, "grad_norm": 1.4031249874071652, "learning_rate": 8.927800435070775e-07, "loss": 0.202, "step": 13578 }, { "epoch": 0.2360374767508561, "grad_norm": 1.4634164952092075, "learning_rate": 8.927626244865088e-07, "loss": 0.2791, "step": 13579 }, { "epoch": 0.23605485928835893, "grad_norm": 1.6607030738452964, "learning_rate": 8.927452042210632e-07, "loss": 0.3046, "step": 13580 }, { "epoch": 0.23607224182586173, "grad_norm": 1.638683327082746, "learning_rate": 8.927277827107956e-07, "loss": 0.2936, "step": 13581 }, { "epoch": 0.23608962436336456, "grad_norm": 1.9306795511464026, "learning_rate": 8.927103599557614e-07, "loss": 0.2177, "step": 13582 }, { "epoch": 0.2361070069008674, "grad_norm": 1.2298607592679358, "learning_rate": 8.926929359560155e-07, "loss": 0.2158, "step": 13583 }, { "epoch": 0.23612438943837022, "grad_norm": 0.9123154964166742, "learning_rate": 8.926755107116137e-07, "loss": 0.3357, "step": 13584 }, { "epoch": 0.23614177197587305, "grad_norm": 2.2587637822354885, "learning_rate": 8.926580842226108e-07, "loss": 0.3196, "step": 13585 }, { "epoch": 0.23615915451337585, "grad_norm": 2.409157775013252, "learning_rate": 8.926406564890621e-07, "loss": 0.2711, "step": 13586 }, { "epoch": 0.23617653705087868, "grad_norm": 1.9017919486241357, "learning_rate": 8.92623227511023e-07, "loss": 0.2223, "step": 13587 }, { "epoch": 0.2361939195883815, "grad_norm": 2.050765806174333, "learning_rate": 8.926057972885485e-07, "loss": 0.269, "step": 13588 }, { "epoch": 0.23621130212588434, "grad_norm": 1.5799278199174445, "learning_rate": 8.925883658216941e-07, "loss": 0.2933, "step": 13589 }, { "epoch": 0.23622868466338717, "grad_norm": 1.2686530836736893, "learning_rate": 8.925709331105149e-07, "loss": 0.2815, "step": 13590 }, { "epoch": 0.23624606720088998, "grad_norm": 1.4416809042665584, "learning_rate": 8.92553499155066e-07, "loss": 0.3784, "step": 13591 }, { "epoch": 0.2362634497383928, "grad_norm": 2.1430787566500022, "learning_rate": 8.92536063955403e-07, "loss": 0.2626, "step": 13592 }, { "epoch": 0.23628083227589564, "grad_norm": 2.167046831750637, "learning_rate": 8.925186275115811e-07, "loss": 0.2841, "step": 13593 }, { "epoch": 0.23629821481339847, "grad_norm": 1.8744083570286576, "learning_rate": 8.925011898236552e-07, "loss": 0.241, "step": 13594 }, { "epoch": 0.2363155973509013, "grad_norm": 2.068093825766211, "learning_rate": 8.92483750891681e-07, "loss": 0.261, "step": 13595 }, { "epoch": 0.2363329798884041, "grad_norm": 1.5765167698112994, "learning_rate": 8.924663107157134e-07, "loss": 0.1967, "step": 13596 }, { "epoch": 0.23635036242590693, "grad_norm": 1.3110267386857248, "learning_rate": 8.924488692958081e-07, "loss": 0.2557, "step": 13597 }, { "epoch": 0.23636774496340976, "grad_norm": 1.5524347029257237, "learning_rate": 8.9243142663202e-07, "loss": 0.4127, "step": 13598 }, { "epoch": 0.2363851275009126, "grad_norm": 2.0265876952626414, "learning_rate": 8.924139827244046e-07, "loss": 0.311, "step": 13599 }, { "epoch": 0.23640251003841542, "grad_norm": 1.9623781226040709, "learning_rate": 8.923965375730171e-07, "loss": 0.3729, "step": 13600 }, { "epoch": 0.23641989257591822, "grad_norm": 1.423181057413475, "learning_rate": 8.92379091177913e-07, "loss": 0.2174, "step": 13601 }, { "epoch": 0.23643727511342105, "grad_norm": 4.458698894391094, "learning_rate": 8.923616435391472e-07, "loss": 0.4385, "step": 13602 }, { "epoch": 0.23645465765092388, "grad_norm": 1.2718449409762416, "learning_rate": 8.923441946567754e-07, "loss": 0.3023, "step": 13603 }, { "epoch": 0.2364720401884267, "grad_norm": 2.2031385906102923, "learning_rate": 8.923267445308526e-07, "loss": 0.4129, "step": 13604 }, { "epoch": 0.23648942272592954, "grad_norm": 1.4303592112807735, "learning_rate": 8.923092931614345e-07, "loss": 0.381, "step": 13605 }, { "epoch": 0.23650680526343235, "grad_norm": 1.8909989875199114, "learning_rate": 8.922918405485759e-07, "loss": 0.3899, "step": 13606 }, { "epoch": 0.23652418780093518, "grad_norm": 1.3488814203650867, "learning_rate": 8.922743866923324e-07, "loss": 0.2404, "step": 13607 }, { "epoch": 0.236541570338438, "grad_norm": 1.5248866529476883, "learning_rate": 8.922569315927593e-07, "loss": 0.2, "step": 13608 }, { "epoch": 0.23655895287594084, "grad_norm": 2.717215482836039, "learning_rate": 8.922394752499119e-07, "loss": 0.4262, "step": 13609 }, { "epoch": 0.23657633541344367, "grad_norm": 1.676973030815725, "learning_rate": 8.922220176638456e-07, "loss": 0.1902, "step": 13610 }, { "epoch": 0.23659371795094647, "grad_norm": 1.7067505809093346, "learning_rate": 8.922045588346157e-07, "loss": 0.2429, "step": 13611 }, { "epoch": 0.2366111004884493, "grad_norm": 1.770640990185776, "learning_rate": 8.921870987622775e-07, "loss": 0.3521, "step": 13612 }, { "epoch": 0.23662848302595213, "grad_norm": 3.301266142704671, "learning_rate": 8.921696374468862e-07, "loss": 0.3732, "step": 13613 }, { "epoch": 0.23664586556345496, "grad_norm": 2.6683421836047794, "learning_rate": 8.921521748884975e-07, "loss": 0.2888, "step": 13614 }, { "epoch": 0.2366632481009578, "grad_norm": 2.2598756272922182, "learning_rate": 8.921347110871664e-07, "loss": 0.4408, "step": 13615 }, { "epoch": 0.2366806306384606, "grad_norm": 2.1865338397412417, "learning_rate": 8.921172460429485e-07, "loss": 0.4083, "step": 13616 }, { "epoch": 0.23669801317596342, "grad_norm": 3.000836440372397, "learning_rate": 8.920997797558989e-07, "loss": 0.3442, "step": 13617 }, { "epoch": 0.23671539571346625, "grad_norm": 2.8427367498782923, "learning_rate": 8.920823122260733e-07, "loss": 0.2442, "step": 13618 }, { "epoch": 0.23673277825096908, "grad_norm": 1.276849905147125, "learning_rate": 8.920648434535268e-07, "loss": 0.1994, "step": 13619 }, { "epoch": 0.23675016078847191, "grad_norm": 2.6169162019927295, "learning_rate": 8.92047373438315e-07, "loss": 0.23, "step": 13620 }, { "epoch": 0.23676754332597472, "grad_norm": 2.5195800553147585, "learning_rate": 8.920299021804929e-07, "loss": 0.3641, "step": 13621 }, { "epoch": 0.23678492586347755, "grad_norm": 2.058798649630399, "learning_rate": 8.920124296801162e-07, "loss": 0.1851, "step": 13622 }, { "epoch": 0.23680230840098038, "grad_norm": 2.9901870980852525, "learning_rate": 8.919949559372401e-07, "loss": 0.3359, "step": 13623 }, { "epoch": 0.2368196909384832, "grad_norm": 1.3889409338270162, "learning_rate": 8.9197748095192e-07, "loss": 0.2873, "step": 13624 }, { "epoch": 0.23683707347598604, "grad_norm": 1.3018124245886762, "learning_rate": 8.919600047242115e-07, "loss": 0.3536, "step": 13625 }, { "epoch": 0.23685445601348884, "grad_norm": 2.2081031902105157, "learning_rate": 8.919425272541697e-07, "loss": 0.307, "step": 13626 }, { "epoch": 0.23687183855099167, "grad_norm": 1.4788063642142053, "learning_rate": 8.919250485418504e-07, "loss": 0.2553, "step": 13627 }, { "epoch": 0.2368892210884945, "grad_norm": 2.320581066239325, "learning_rate": 8.919075685873083e-07, "loss": 0.362, "step": 13628 }, { "epoch": 0.23690660362599733, "grad_norm": 1.862557210394213, "learning_rate": 8.918900873905995e-07, "loss": 0.3432, "step": 13629 }, { "epoch": 0.23692398616350016, "grad_norm": 1.2127251652854787, "learning_rate": 8.918726049517792e-07, "loss": 0.2188, "step": 13630 }, { "epoch": 0.23694136870100296, "grad_norm": 2.7539728803849384, "learning_rate": 8.918551212709027e-07, "loss": 0.321, "step": 13631 }, { "epoch": 0.2369587512385058, "grad_norm": 2.160722747374654, "learning_rate": 8.918376363480254e-07, "loss": 0.2558, "step": 13632 }, { "epoch": 0.23697613377600862, "grad_norm": 1.5306079097686294, "learning_rate": 8.918201501832028e-07, "loss": 0.236, "step": 13633 }, { "epoch": 0.23699351631351145, "grad_norm": 0.7928297120520009, "learning_rate": 8.918026627764905e-07, "loss": 0.2422, "step": 13634 }, { "epoch": 0.23701089885101428, "grad_norm": 1.4487639333917148, "learning_rate": 8.917851741279434e-07, "loss": 0.271, "step": 13635 }, { "epoch": 0.2370282813885171, "grad_norm": 1.5341879443670412, "learning_rate": 8.917676842376175e-07, "loss": 0.6035, "step": 13636 }, { "epoch": 0.23704566392601992, "grad_norm": 1.3023156270235887, "learning_rate": 8.91750193105568e-07, "loss": 0.1723, "step": 13637 }, { "epoch": 0.23706304646352275, "grad_norm": 1.893614792088115, "learning_rate": 8.917327007318502e-07, "loss": 0.3929, "step": 13638 }, { "epoch": 0.23708042900102558, "grad_norm": 1.559811505757339, "learning_rate": 8.917152071165198e-07, "loss": 0.6042, "step": 13639 }, { "epoch": 0.2370978115385284, "grad_norm": 2.134310315147425, "learning_rate": 8.916977122596322e-07, "loss": 0.3691, "step": 13640 }, { "epoch": 0.2371151940760312, "grad_norm": 1.891790900498678, "learning_rate": 8.916802161612427e-07, "loss": 0.4134, "step": 13641 }, { "epoch": 0.23713257661353404, "grad_norm": 3.3444333483987703, "learning_rate": 8.916627188214068e-07, "loss": 0.3273, "step": 13642 }, { "epoch": 0.23714995915103687, "grad_norm": 1.913697936637304, "learning_rate": 8.916452202401801e-07, "loss": 0.1675, "step": 13643 }, { "epoch": 0.2371673416885397, "grad_norm": 1.772469367927224, "learning_rate": 8.916277204176178e-07, "loss": 0.2563, "step": 13644 }, { "epoch": 0.23718472422604253, "grad_norm": 1.7809023079232265, "learning_rate": 8.916102193537756e-07, "loss": 0.2866, "step": 13645 }, { "epoch": 0.23720210676354533, "grad_norm": 1.0993970989980943, "learning_rate": 8.91592717048709e-07, "loss": 0.2081, "step": 13646 }, { "epoch": 0.23721948930104816, "grad_norm": 2.6354304044505965, "learning_rate": 8.915752135024732e-07, "loss": 0.411, "step": 13647 }, { "epoch": 0.237236871838551, "grad_norm": 2.015911950604777, "learning_rate": 8.915577087151239e-07, "loss": 0.2229, "step": 13648 }, { "epoch": 0.23725425437605382, "grad_norm": 2.2944353953646077, "learning_rate": 8.915402026867167e-07, "loss": 0.2722, "step": 13649 }, { "epoch": 0.23727163691355663, "grad_norm": 2.0263632099741873, "learning_rate": 8.915226954173068e-07, "loss": 0.4067, "step": 13650 }, { "epoch": 0.23728901945105946, "grad_norm": 1.8297503896710026, "learning_rate": 8.915051869069497e-07, "loss": 0.4041, "step": 13651 }, { "epoch": 0.2373064019885623, "grad_norm": 1.9355520794068872, "learning_rate": 8.914876771557011e-07, "loss": 0.3704, "step": 13652 }, { "epoch": 0.23732378452606512, "grad_norm": 1.7045380469365554, "learning_rate": 8.914701661636163e-07, "loss": 0.3468, "step": 13653 }, { "epoch": 0.23734116706356795, "grad_norm": 2.693639027930286, "learning_rate": 8.91452653930751e-07, "loss": 0.4089, "step": 13654 }, { "epoch": 0.23735854960107075, "grad_norm": 2.026675462628709, "learning_rate": 8.914351404571607e-07, "loss": 0.4094, "step": 13655 }, { "epoch": 0.23737593213857358, "grad_norm": 2.2743737786300997, "learning_rate": 8.914176257429007e-07, "loss": 0.4597, "step": 13656 }, { "epoch": 0.2373933146760764, "grad_norm": 1.3456536951298075, "learning_rate": 8.914001097880266e-07, "loss": 0.2175, "step": 13657 }, { "epoch": 0.23741069721357924, "grad_norm": 1.4498625520820665, "learning_rate": 8.91382592592594e-07, "loss": 0.3277, "step": 13658 }, { "epoch": 0.23742807975108207, "grad_norm": 1.948919639709805, "learning_rate": 8.913650741566583e-07, "loss": 0.41, "step": 13659 }, { "epoch": 0.23744546228858487, "grad_norm": 1.6365208411213334, "learning_rate": 8.913475544802751e-07, "loss": 0.3668, "step": 13660 }, { "epoch": 0.2374628448260877, "grad_norm": 2.5206208347887125, "learning_rate": 8.913300335634999e-07, "loss": 0.3492, "step": 13661 }, { "epoch": 0.23748022736359053, "grad_norm": 2.0791702368756453, "learning_rate": 8.913125114063884e-07, "loss": 0.2937, "step": 13662 }, { "epoch": 0.23749760990109337, "grad_norm": 2.4357479304282266, "learning_rate": 8.91294988008996e-07, "loss": 0.3452, "step": 13663 }, { "epoch": 0.2375149924385962, "grad_norm": 1.0382619961816768, "learning_rate": 8.91277463371378e-07, "loss": 0.1429, "step": 13664 }, { "epoch": 0.237532374976099, "grad_norm": 2.169010819027545, "learning_rate": 8.912599374935904e-07, "loss": 0.4668, "step": 13665 }, { "epoch": 0.23754975751360183, "grad_norm": 1.5185726088458034, "learning_rate": 8.912424103756884e-07, "loss": 0.3137, "step": 13666 }, { "epoch": 0.23756714005110466, "grad_norm": 2.9863843760915803, "learning_rate": 8.912248820177277e-07, "loss": 0.2928, "step": 13667 }, { "epoch": 0.2375845225886075, "grad_norm": 1.210080410499224, "learning_rate": 8.912073524197639e-07, "loss": 0.2344, "step": 13668 }, { "epoch": 0.23760190512611032, "grad_norm": 2.7339884971788124, "learning_rate": 8.911898215818525e-07, "loss": 0.3616, "step": 13669 }, { "epoch": 0.23761928766361312, "grad_norm": 3.062219239024769, "learning_rate": 8.91172289504049e-07, "loss": 0.2875, "step": 13670 }, { "epoch": 0.23763667020111595, "grad_norm": 3.3372901028534283, "learning_rate": 8.911547561864091e-07, "loss": 0.259, "step": 13671 }, { "epoch": 0.23765405273861878, "grad_norm": 1.855969259433457, "learning_rate": 8.911372216289883e-07, "loss": 0.2721, "step": 13672 }, { "epoch": 0.2376714352761216, "grad_norm": 2.2086468437481996, "learning_rate": 8.911196858318421e-07, "loss": 0.344, "step": 13673 }, { "epoch": 0.23768881781362444, "grad_norm": 1.5469167591452426, "learning_rate": 8.911021487950262e-07, "loss": 0.2627, "step": 13674 }, { "epoch": 0.23770620035112724, "grad_norm": 2.6851814370920977, "learning_rate": 8.910846105185961e-07, "loss": 0.4458, "step": 13675 }, { "epoch": 0.23772358288863008, "grad_norm": 1.4513440211102249, "learning_rate": 8.910670710026076e-07, "loss": 0.3014, "step": 13676 }, { "epoch": 0.2377409654261329, "grad_norm": 1.3567020184636736, "learning_rate": 8.91049530247116e-07, "loss": 0.3072, "step": 13677 }, { "epoch": 0.23775834796363574, "grad_norm": 2.0246807470206636, "learning_rate": 8.910319882521771e-07, "loss": 0.3449, "step": 13678 }, { "epoch": 0.23777573050113857, "grad_norm": 1.5011284168387267, "learning_rate": 8.910144450178463e-07, "loss": 0.2301, "step": 13679 }, { "epoch": 0.23779311303864137, "grad_norm": 2.1945292015448223, "learning_rate": 8.909969005441794e-07, "loss": 0.3186, "step": 13680 }, { "epoch": 0.2378104955761442, "grad_norm": 2.176882671801569, "learning_rate": 8.909793548312319e-07, "loss": 0.4311, "step": 13681 }, { "epoch": 0.23782787811364703, "grad_norm": 3.245554279712909, "learning_rate": 8.909618078790595e-07, "loss": 0.3202, "step": 13682 }, { "epoch": 0.23784526065114986, "grad_norm": 1.9855154544439644, "learning_rate": 8.909442596877176e-07, "loss": 0.3921, "step": 13683 }, { "epoch": 0.2378626431886527, "grad_norm": 2.6035325751704805, "learning_rate": 8.909267102572623e-07, "loss": 0.374, "step": 13684 }, { "epoch": 0.2378800257261555, "grad_norm": 1.6617503742184458, "learning_rate": 8.909091595877487e-07, "loss": 0.3841, "step": 13685 }, { "epoch": 0.23789740826365832, "grad_norm": 2.024478786471243, "learning_rate": 8.908916076792326e-07, "loss": 0.2477, "step": 13686 }, { "epoch": 0.23791479080116115, "grad_norm": 1.4768672843327781, "learning_rate": 8.908740545317697e-07, "loss": 0.2101, "step": 13687 }, { "epoch": 0.23793217333866398, "grad_norm": 1.8017948294548285, "learning_rate": 8.908565001454157e-07, "loss": 0.4002, "step": 13688 }, { "epoch": 0.2379495558761668, "grad_norm": 2.102855290287943, "learning_rate": 8.90838944520226e-07, "loss": 0.2579, "step": 13689 }, { "epoch": 0.23796693841366962, "grad_norm": 1.3975628095582349, "learning_rate": 8.908213876562564e-07, "loss": 0.2966, "step": 13690 }, { "epoch": 0.23798432095117245, "grad_norm": 2.2951204520773354, "learning_rate": 8.908038295535627e-07, "loss": 0.2995, "step": 13691 }, { "epoch": 0.23800170348867528, "grad_norm": 1.030210234307334, "learning_rate": 8.907862702122002e-07, "loss": 0.3198, "step": 13692 }, { "epoch": 0.2380190860261781, "grad_norm": 1.9300715942908615, "learning_rate": 8.907687096322248e-07, "loss": 0.2554, "step": 13693 }, { "epoch": 0.23803646856368094, "grad_norm": 1.7429171534236791, "learning_rate": 8.907511478136921e-07, "loss": 0.289, "step": 13694 }, { "epoch": 0.23805385110118374, "grad_norm": 2.287206681305011, "learning_rate": 8.907335847566577e-07, "loss": 0.2043, "step": 13695 }, { "epoch": 0.23807123363868657, "grad_norm": 1.7367432534303309, "learning_rate": 8.907160204611775e-07, "loss": 0.3509, "step": 13696 }, { "epoch": 0.2380886161761894, "grad_norm": 1.6902901951493146, "learning_rate": 8.906984549273068e-07, "loss": 0.2609, "step": 13697 }, { "epoch": 0.23810599871369223, "grad_norm": 1.6374867316255393, "learning_rate": 8.906808881551016e-07, "loss": 0.2997, "step": 13698 }, { "epoch": 0.23812338125119506, "grad_norm": 1.8336281481089927, "learning_rate": 8.906633201446173e-07, "loss": 0.273, "step": 13699 }, { "epoch": 0.23814076378869786, "grad_norm": 2.047042814560788, "learning_rate": 8.906457508959098e-07, "loss": 0.4816, "step": 13700 }, { "epoch": 0.2381581463262007, "grad_norm": 1.92398465870086, "learning_rate": 8.906281804090348e-07, "loss": 0.2772, "step": 13701 }, { "epoch": 0.23817552886370352, "grad_norm": 1.6314956612477136, "learning_rate": 8.906106086840476e-07, "loss": 0.2873, "step": 13702 }, { "epoch": 0.23819291140120635, "grad_norm": 1.7900945623586495, "learning_rate": 8.905930357210045e-07, "loss": 0.2821, "step": 13703 }, { "epoch": 0.23821029393870918, "grad_norm": 2.9111075941116153, "learning_rate": 8.905754615199609e-07, "loss": 0.3976, "step": 13704 }, { "epoch": 0.23822767647621199, "grad_norm": 2.3729556394464963, "learning_rate": 8.905578860809724e-07, "loss": 0.4489, "step": 13705 }, { "epoch": 0.23824505901371482, "grad_norm": 2.602890045845785, "learning_rate": 8.905403094040948e-07, "loss": 0.5007, "step": 13706 }, { "epoch": 0.23826244155121765, "grad_norm": 1.5743521508309302, "learning_rate": 8.905227314893839e-07, "loss": 0.408, "step": 13707 }, { "epoch": 0.23827982408872048, "grad_norm": 4.364921466821435, "learning_rate": 8.905051523368954e-07, "loss": 0.639, "step": 13708 }, { "epoch": 0.2382972066262233, "grad_norm": 1.7194397027690875, "learning_rate": 8.904875719466849e-07, "loss": 0.1946, "step": 13709 }, { "epoch": 0.2383145891637261, "grad_norm": 1.9085905574138278, "learning_rate": 8.904699903188081e-07, "loss": 0.2874, "step": 13710 }, { "epoch": 0.23833197170122894, "grad_norm": 1.6259333071754698, "learning_rate": 8.904524074533208e-07, "loss": 0.1637, "step": 13711 }, { "epoch": 0.23834935423873177, "grad_norm": 1.5522842907654197, "learning_rate": 8.904348233502787e-07, "loss": 0.2145, "step": 13712 }, { "epoch": 0.2383667367762346, "grad_norm": 1.9155746688356128, "learning_rate": 8.904172380097376e-07, "loss": 0.4418, "step": 13713 }, { "epoch": 0.23838411931373743, "grad_norm": 2.136162958294518, "learning_rate": 8.903996514317533e-07, "loss": 0.4023, "step": 13714 }, { "epoch": 0.23840150185124023, "grad_norm": 2.033343695325209, "learning_rate": 8.903820636163812e-07, "loss": 0.4097, "step": 13715 }, { "epoch": 0.23841888438874306, "grad_norm": 2.6013909952272214, "learning_rate": 8.903644745636776e-07, "loss": 0.4373, "step": 13716 }, { "epoch": 0.2384362669262459, "grad_norm": 2.25117435492178, "learning_rate": 8.903468842736977e-07, "loss": 0.5383, "step": 13717 }, { "epoch": 0.23845364946374872, "grad_norm": 4.611491482619417, "learning_rate": 8.903292927464976e-07, "loss": 0.4742, "step": 13718 }, { "epoch": 0.23847103200125155, "grad_norm": 2.4620293287697668, "learning_rate": 8.90311699982133e-07, "loss": 0.3525, "step": 13719 }, { "epoch": 0.23848841453875436, "grad_norm": 2.5090949906945976, "learning_rate": 8.902941059806595e-07, "loss": 0.3949, "step": 13720 }, { "epoch": 0.2385057970762572, "grad_norm": 2.2335355163309405, "learning_rate": 8.902765107421329e-07, "loss": 0.3067, "step": 13721 }, { "epoch": 0.23852317961376002, "grad_norm": 3.4307913558154435, "learning_rate": 8.902589142666091e-07, "loss": 0.4706, "step": 13722 }, { "epoch": 0.23854056215126285, "grad_norm": 1.630136431884674, "learning_rate": 8.902413165541438e-07, "loss": 0.257, "step": 13723 }, { "epoch": 0.23855794468876568, "grad_norm": 2.150834009730562, "learning_rate": 8.902237176047929e-07, "loss": 0.399, "step": 13724 }, { "epoch": 0.23857532722626848, "grad_norm": 1.9217691328513966, "learning_rate": 8.90206117418612e-07, "loss": 0.4394, "step": 13725 }, { "epoch": 0.2385927097637713, "grad_norm": 1.5261644506960046, "learning_rate": 8.901885159956568e-07, "loss": 0.4496, "step": 13726 }, { "epoch": 0.23861009230127414, "grad_norm": 1.5224421420534706, "learning_rate": 8.901709133359835e-07, "loss": 0.2811, "step": 13727 }, { "epoch": 0.23862747483877697, "grad_norm": 2.662919617931954, "learning_rate": 8.901533094396475e-07, "loss": 0.3593, "step": 13728 }, { "epoch": 0.2386448573762798, "grad_norm": 1.7222928201585488, "learning_rate": 8.901357043067048e-07, "loss": 0.3861, "step": 13729 }, { "epoch": 0.2386622399137826, "grad_norm": 2.9192964998628597, "learning_rate": 8.901180979372109e-07, "loss": 0.459, "step": 13730 }, { "epoch": 0.23867962245128543, "grad_norm": 2.004625051094422, "learning_rate": 8.901004903312221e-07, "loss": 0.2431, "step": 13731 }, { "epoch": 0.23869700498878826, "grad_norm": 2.9642172236251136, "learning_rate": 8.900828814887939e-07, "loss": 0.5279, "step": 13732 }, { "epoch": 0.2387143875262911, "grad_norm": 2.8592178458565756, "learning_rate": 8.900652714099821e-07, "loss": 0.2401, "step": 13733 }, { "epoch": 0.23873177006379392, "grad_norm": 1.5490646235313925, "learning_rate": 8.900476600948425e-07, "loss": 0.3716, "step": 13734 }, { "epoch": 0.23874915260129673, "grad_norm": 2.030421811117792, "learning_rate": 8.900300475434311e-07, "loss": 0.3297, "step": 13735 }, { "epoch": 0.23876653513879956, "grad_norm": 1.754432850249459, "learning_rate": 8.900124337558037e-07, "loss": 0.3142, "step": 13736 }, { "epoch": 0.2387839176763024, "grad_norm": 1.854125202891338, "learning_rate": 8.899948187320158e-07, "loss": 0.2452, "step": 13737 }, { "epoch": 0.23880130021380522, "grad_norm": 1.5432472582640242, "learning_rate": 8.899772024721237e-07, "loss": 0.3462, "step": 13738 }, { "epoch": 0.23881868275130805, "grad_norm": 1.6721783864221789, "learning_rate": 8.899595849761829e-07, "loss": 0.3932, "step": 13739 }, { "epoch": 0.23883606528881085, "grad_norm": 2.4343531161174465, "learning_rate": 8.899419662442493e-07, "loss": 0.2285, "step": 13740 }, { "epoch": 0.23885344782631368, "grad_norm": 1.686785542997272, "learning_rate": 8.899243462763789e-07, "loss": 0.3858, "step": 13741 }, { "epoch": 0.2388708303638165, "grad_norm": 1.8215479829952699, "learning_rate": 8.899067250726274e-07, "loss": 0.4108, "step": 13742 }, { "epoch": 0.23888821290131934, "grad_norm": 1.5473724229732921, "learning_rate": 8.898891026330507e-07, "loss": 0.3442, "step": 13743 }, { "epoch": 0.23890559543882217, "grad_norm": 1.7109957326521308, "learning_rate": 8.898714789577046e-07, "loss": 0.5389, "step": 13744 }, { "epoch": 0.23892297797632497, "grad_norm": 1.972747984629098, "learning_rate": 8.89853854046645e-07, "loss": 0.3032, "step": 13745 }, { "epoch": 0.2389403605138278, "grad_norm": 1.2734221936530226, "learning_rate": 8.898362278999279e-07, "loss": 0.2989, "step": 13746 }, { "epoch": 0.23895774305133063, "grad_norm": 2.5989890297454963, "learning_rate": 8.89818600517609e-07, "loss": 0.3573, "step": 13747 }, { "epoch": 0.23897512558883346, "grad_norm": 2.9131256740579317, "learning_rate": 8.89800971899744e-07, "loss": 0.4487, "step": 13748 }, { "epoch": 0.2389925081263363, "grad_norm": 1.7024085103767097, "learning_rate": 8.897833420463892e-07, "loss": 0.2181, "step": 13749 }, { "epoch": 0.2390098906638391, "grad_norm": 3.2055451389478358, "learning_rate": 8.897657109576001e-07, "loss": 0.3835, "step": 13750 }, { "epoch": 0.23902727320134193, "grad_norm": 1.9974294006360813, "learning_rate": 8.897480786334328e-07, "loss": 0.5239, "step": 13751 }, { "epoch": 0.23904465573884476, "grad_norm": 1.6452553479942842, "learning_rate": 8.897304450739432e-07, "loss": 0.2994, "step": 13752 }, { "epoch": 0.2390620382763476, "grad_norm": 2.5959901070248543, "learning_rate": 8.897128102791871e-07, "loss": 0.3441, "step": 13753 }, { "epoch": 0.23907942081385042, "grad_norm": 2.3625906835171393, "learning_rate": 8.896951742492203e-07, "loss": 0.6506, "step": 13754 }, { "epoch": 0.23909680335135322, "grad_norm": 1.6829626466767436, "learning_rate": 8.896775369840989e-07, "loss": 0.3708, "step": 13755 }, { "epoch": 0.23911418588885605, "grad_norm": 2.0127220694042656, "learning_rate": 8.896598984838787e-07, "loss": 0.3414, "step": 13756 }, { "epoch": 0.23913156842635888, "grad_norm": 1.9071605602900283, "learning_rate": 8.896422587486157e-07, "loss": 0.3273, "step": 13757 }, { "epoch": 0.2391489509638617, "grad_norm": 1.563863950211474, "learning_rate": 8.896246177783655e-07, "loss": 0.3188, "step": 13758 }, { "epoch": 0.23916633350136454, "grad_norm": 2.04605544005151, "learning_rate": 8.896069755731845e-07, "loss": 0.324, "step": 13759 }, { "epoch": 0.23918371603886734, "grad_norm": 1.1391252351588397, "learning_rate": 8.89589332133128e-07, "loss": 0.2067, "step": 13760 }, { "epoch": 0.23920109857637017, "grad_norm": 2.192291508034469, "learning_rate": 8.895716874582526e-07, "loss": 0.4025, "step": 13761 }, { "epoch": 0.239218481113873, "grad_norm": 2.862699993294384, "learning_rate": 8.895540415486138e-07, "loss": 0.298, "step": 13762 }, { "epoch": 0.23923586365137584, "grad_norm": 1.983096800030368, "learning_rate": 8.895363944042678e-07, "loss": 0.3098, "step": 13763 }, { "epoch": 0.23925324618887867, "grad_norm": 1.8887518668912706, "learning_rate": 8.895187460252701e-07, "loss": 0.2763, "step": 13764 }, { "epoch": 0.23927062872638147, "grad_norm": 1.3439791519501032, "learning_rate": 8.89501096411677e-07, "loss": 0.2209, "step": 13765 }, { "epoch": 0.2392880112638843, "grad_norm": 2.9635457470654947, "learning_rate": 8.894834455635445e-07, "loss": 0.4023, "step": 13766 }, { "epoch": 0.23930539380138713, "grad_norm": 1.8129573409973776, "learning_rate": 8.894657934809282e-07, "loss": 0.3239, "step": 13767 }, { "epoch": 0.23932277633888996, "grad_norm": 2.45992453682118, "learning_rate": 8.894481401638843e-07, "loss": 0.3852, "step": 13768 }, { "epoch": 0.2393401588763928, "grad_norm": 2.050306447951174, "learning_rate": 8.894304856124687e-07, "loss": 0.4376, "step": 13769 }, { "epoch": 0.2393575414138956, "grad_norm": 1.5494869288856405, "learning_rate": 8.894128298267374e-07, "loss": 0.4607, "step": 13770 }, { "epoch": 0.23937492395139842, "grad_norm": 1.7618351259680038, "learning_rate": 8.893951728067461e-07, "loss": 0.5658, "step": 13771 }, { "epoch": 0.23939230648890125, "grad_norm": 1.9596049602893044, "learning_rate": 8.893775145525512e-07, "loss": 0.3051, "step": 13772 }, { "epoch": 0.23940968902640408, "grad_norm": 1.0742019828143505, "learning_rate": 8.893598550642085e-07, "loss": 0.1584, "step": 13773 }, { "epoch": 0.2394270715639069, "grad_norm": 1.8647393849419878, "learning_rate": 8.893421943417737e-07, "loss": 0.4291, "step": 13774 }, { "epoch": 0.23944445410140971, "grad_norm": 2.7117751493592555, "learning_rate": 8.893245323853032e-07, "loss": 0.5637, "step": 13775 }, { "epoch": 0.23946183663891255, "grad_norm": 2.9909064928284512, "learning_rate": 8.893068691948526e-07, "loss": 0.2972, "step": 13776 }, { "epoch": 0.23947921917641538, "grad_norm": 1.5211637140684475, "learning_rate": 8.892892047704781e-07, "loss": 0.2979, "step": 13777 }, { "epoch": 0.2394966017139182, "grad_norm": 1.1563301661669256, "learning_rate": 8.892715391122357e-07, "loss": 0.2658, "step": 13778 }, { "epoch": 0.23951398425142104, "grad_norm": 1.8061552519495854, "learning_rate": 8.892538722201812e-07, "loss": 0.2126, "step": 13779 }, { "epoch": 0.23953136678892384, "grad_norm": 1.6510753485111005, "learning_rate": 8.892362040943708e-07, "loss": 0.2771, "step": 13780 }, { "epoch": 0.23954874932642667, "grad_norm": 1.4424610420043071, "learning_rate": 8.892185347348605e-07, "loss": 0.2821, "step": 13781 }, { "epoch": 0.2395661318639295, "grad_norm": 1.2618399007547687, "learning_rate": 8.892008641417062e-07, "loss": 0.2119, "step": 13782 }, { "epoch": 0.23958351440143233, "grad_norm": 1.464456641241929, "learning_rate": 8.891831923149639e-07, "loss": 0.3921, "step": 13783 }, { "epoch": 0.23960089693893516, "grad_norm": 1.4183721196009171, "learning_rate": 8.891655192546898e-07, "loss": 0.2814, "step": 13784 }, { "epoch": 0.23961827947643796, "grad_norm": 2.603333401183753, "learning_rate": 8.891478449609397e-07, "loss": 0.6113, "step": 13785 }, { "epoch": 0.2396356620139408, "grad_norm": 1.776785133109364, "learning_rate": 8.891301694337696e-07, "loss": 0.3092, "step": 13786 }, { "epoch": 0.23965304455144362, "grad_norm": 2.2555208155310105, "learning_rate": 8.891124926732357e-07, "loss": 0.3379, "step": 13787 }, { "epoch": 0.23967042708894645, "grad_norm": 2.465379447766044, "learning_rate": 8.890948146793939e-07, "loss": 0.3322, "step": 13788 }, { "epoch": 0.23968780962644926, "grad_norm": 1.1902128121990216, "learning_rate": 8.890771354523003e-07, "loss": 0.2559, "step": 13789 }, { "epoch": 0.23970519216395209, "grad_norm": 1.9760687717485457, "learning_rate": 8.89059454992011e-07, "loss": 0.4119, "step": 13790 }, { "epoch": 0.23972257470145492, "grad_norm": 5.405177582089053, "learning_rate": 8.890417732985818e-07, "loss": 0.5758, "step": 13791 }, { "epoch": 0.23973995723895775, "grad_norm": 1.8104383329516247, "learning_rate": 8.89024090372069e-07, "loss": 0.3624, "step": 13792 }, { "epoch": 0.23975733977646058, "grad_norm": 1.6870169362212428, "learning_rate": 8.890064062125284e-07, "loss": 0.2301, "step": 13793 }, { "epoch": 0.23977472231396338, "grad_norm": 3.3146333908644667, "learning_rate": 8.889887208200163e-07, "loss": 0.5777, "step": 13794 }, { "epoch": 0.2397921048514662, "grad_norm": 2.39788006900514, "learning_rate": 8.889710341945886e-07, "loss": 0.2406, "step": 13795 }, { "epoch": 0.23980948738896904, "grad_norm": 2.6544735463626155, "learning_rate": 8.889533463363015e-07, "loss": 0.4027, "step": 13796 }, { "epoch": 0.23982686992647187, "grad_norm": 2.0664854523627105, "learning_rate": 8.889356572452108e-07, "loss": 0.36, "step": 13797 }, { "epoch": 0.2398442524639747, "grad_norm": 1.8575879057405862, "learning_rate": 8.889179669213729e-07, "loss": 0.3988, "step": 13798 }, { "epoch": 0.2398616350014775, "grad_norm": 2.093276794129414, "learning_rate": 8.889002753648436e-07, "loss": 0.3675, "step": 13799 }, { "epoch": 0.23987901753898033, "grad_norm": 2.5141974432035865, "learning_rate": 8.88882582575679e-07, "loss": 0.4749, "step": 13800 }, { "epoch": 0.23989640007648316, "grad_norm": 1.4927909719790002, "learning_rate": 8.888648885539354e-07, "loss": 0.3009, "step": 13801 }, { "epoch": 0.239913782613986, "grad_norm": 1.532354513086583, "learning_rate": 8.888471932996687e-07, "loss": 0.2796, "step": 13802 }, { "epoch": 0.23993116515148882, "grad_norm": 1.6848718448738986, "learning_rate": 8.888294968129348e-07, "loss": 0.4403, "step": 13803 }, { "epoch": 0.23994854768899163, "grad_norm": 2.3576298822879864, "learning_rate": 8.888117990937903e-07, "loss": 0.3312, "step": 13804 }, { "epoch": 0.23996593022649446, "grad_norm": 1.1526656686504757, "learning_rate": 8.887941001422908e-07, "loss": 0.236, "step": 13805 }, { "epoch": 0.2399833127639973, "grad_norm": 2.1583286991280164, "learning_rate": 8.887763999584928e-07, "loss": 0.3763, "step": 13806 }, { "epoch": 0.24000069530150012, "grad_norm": 2.830769410184197, "learning_rate": 8.88758698542452e-07, "loss": 0.3889, "step": 13807 }, { "epoch": 0.24001807783900295, "grad_norm": 2.7904405883931935, "learning_rate": 8.887409958942248e-07, "loss": 0.4267, "step": 13808 }, { "epoch": 0.24003546037650575, "grad_norm": 1.3290182753412034, "learning_rate": 8.887232920138672e-07, "loss": 0.2109, "step": 13809 }, { "epoch": 0.24005284291400858, "grad_norm": 2.380919231120101, "learning_rate": 8.887055869014353e-07, "loss": 0.2531, "step": 13810 }, { "epoch": 0.2400702254515114, "grad_norm": 2.1231764404342903, "learning_rate": 8.886878805569853e-07, "loss": 0.3208, "step": 13811 }, { "epoch": 0.24008760798901424, "grad_norm": 1.1363803054572539, "learning_rate": 8.886701729805734e-07, "loss": 0.1642, "step": 13812 }, { "epoch": 0.24010499052651707, "grad_norm": 2.1077186299047272, "learning_rate": 8.886524641722554e-07, "loss": 0.3714, "step": 13813 }, { "epoch": 0.24012237306401987, "grad_norm": 3.8747371314152415, "learning_rate": 8.886347541320876e-07, "loss": 0.2281, "step": 13814 }, { "epoch": 0.2401397556015227, "grad_norm": 1.507860258971039, "learning_rate": 8.886170428601263e-07, "loss": 0.2866, "step": 13815 }, { "epoch": 0.24015713813902553, "grad_norm": 3.49033367576942, "learning_rate": 8.885993303564274e-07, "loss": 0.5871, "step": 13816 }, { "epoch": 0.24017452067652836, "grad_norm": 2.0703488557664294, "learning_rate": 8.885816166210472e-07, "loss": 0.3665, "step": 13817 }, { "epoch": 0.2401919032140312, "grad_norm": 1.3574696324014304, "learning_rate": 8.885639016540417e-07, "loss": 0.3867, "step": 13818 }, { "epoch": 0.240209285751534, "grad_norm": 1.2038197831235697, "learning_rate": 8.88546185455467e-07, "loss": 0.3975, "step": 13819 }, { "epoch": 0.24022666828903683, "grad_norm": 1.9065200253314463, "learning_rate": 8.885284680253796e-07, "loss": 0.3563, "step": 13820 }, { "epoch": 0.24024405082653966, "grad_norm": 2.3386185820062617, "learning_rate": 8.885107493638353e-07, "loss": 0.3668, "step": 13821 }, { "epoch": 0.2402614333640425, "grad_norm": 2.3733001860577416, "learning_rate": 8.884930294708906e-07, "loss": 0.4774, "step": 13822 }, { "epoch": 0.24027881590154532, "grad_norm": 1.660036896000822, "learning_rate": 8.884753083466013e-07, "loss": 0.3647, "step": 13823 }, { "epoch": 0.24029619843904812, "grad_norm": 1.2576761690958382, "learning_rate": 8.884575859910236e-07, "loss": 0.3234, "step": 13824 }, { "epoch": 0.24031358097655095, "grad_norm": 1.3101271709568765, "learning_rate": 8.88439862404214e-07, "loss": 0.2648, "step": 13825 }, { "epoch": 0.24033096351405378, "grad_norm": 0.9850214333379044, "learning_rate": 8.884221375862283e-07, "loss": 0.249, "step": 13826 }, { "epoch": 0.2403483460515566, "grad_norm": 1.2902210155392086, "learning_rate": 8.88404411537123e-07, "loss": 0.2609, "step": 13827 }, { "epoch": 0.24036572858905944, "grad_norm": 1.7891221354599207, "learning_rate": 8.88386684256954e-07, "loss": 0.3063, "step": 13828 }, { "epoch": 0.24038311112656224, "grad_norm": 1.5803411994131278, "learning_rate": 8.883689557457778e-07, "loss": 0.313, "step": 13829 }, { "epoch": 0.24040049366406507, "grad_norm": 3.0735133443082776, "learning_rate": 8.883512260036502e-07, "loss": 0.3773, "step": 13830 }, { "epoch": 0.2404178762015679, "grad_norm": 2.1174947376160014, "learning_rate": 8.883334950306279e-07, "loss": 0.3494, "step": 13831 }, { "epoch": 0.24043525873907073, "grad_norm": 3.025728228787865, "learning_rate": 8.883157628267664e-07, "loss": 0.2685, "step": 13832 }, { "epoch": 0.24045264127657356, "grad_norm": 2.279932885828067, "learning_rate": 8.882980293921227e-07, "loss": 0.4541, "step": 13833 }, { "epoch": 0.24047002381407637, "grad_norm": 2.0753252575644447, "learning_rate": 8.882802947267524e-07, "loss": 0.3723, "step": 13834 }, { "epoch": 0.2404874063515792, "grad_norm": 1.6671222249870008, "learning_rate": 8.882625588307118e-07, "loss": 0.2882, "step": 13835 }, { "epoch": 0.24050478888908203, "grad_norm": 1.3346188617247365, "learning_rate": 8.882448217040575e-07, "loss": 0.2385, "step": 13836 }, { "epoch": 0.24052217142658486, "grad_norm": 1.5347990694741793, "learning_rate": 8.882270833468455e-07, "loss": 0.262, "step": 13837 }, { "epoch": 0.2405395539640877, "grad_norm": 2.2480501283550582, "learning_rate": 8.882093437591318e-07, "loss": 0.4459, "step": 13838 }, { "epoch": 0.2405569365015905, "grad_norm": 2.1360982938898183, "learning_rate": 8.881916029409729e-07, "loss": 0.4147, "step": 13839 }, { "epoch": 0.24057431903909332, "grad_norm": 1.7206385212839734, "learning_rate": 8.881738608924248e-07, "loss": 0.223, "step": 13840 }, { "epoch": 0.24059170157659615, "grad_norm": 1.493172016348665, "learning_rate": 8.881561176135441e-07, "loss": 0.3079, "step": 13841 }, { "epoch": 0.24060908411409898, "grad_norm": 1.4239206774087447, "learning_rate": 8.881383731043865e-07, "loss": 0.4039, "step": 13842 }, { "epoch": 0.2406264666516018, "grad_norm": 2.6265428605528713, "learning_rate": 8.881206273650088e-07, "loss": 0.4189, "step": 13843 }, { "epoch": 0.2406438491891046, "grad_norm": 2.4451581912159908, "learning_rate": 8.88102880395467e-07, "loss": 0.4895, "step": 13844 }, { "epoch": 0.24066123172660744, "grad_norm": 1.9633036468294889, "learning_rate": 8.880851321958173e-07, "loss": 0.4012, "step": 13845 }, { "epoch": 0.24067861426411027, "grad_norm": 1.4896578584203817, "learning_rate": 8.880673827661161e-07, "loss": 0.2647, "step": 13846 }, { "epoch": 0.2406959968016131, "grad_norm": 1.449572150430197, "learning_rate": 8.880496321064193e-07, "loss": 0.3815, "step": 13847 }, { "epoch": 0.24071337933911593, "grad_norm": 1.9150475683070058, "learning_rate": 8.880318802167836e-07, "loss": 0.2974, "step": 13848 }, { "epoch": 0.24073076187661874, "grad_norm": 2.618532604156323, "learning_rate": 8.88014127097265e-07, "loss": 0.3093, "step": 13849 }, { "epoch": 0.24074814441412157, "grad_norm": 2.628485392210695, "learning_rate": 8.8799637274792e-07, "loss": 0.2498, "step": 13850 }, { "epoch": 0.2407655269516244, "grad_norm": 1.873424545937502, "learning_rate": 8.879786171688046e-07, "loss": 0.4096, "step": 13851 }, { "epoch": 0.24078290948912723, "grad_norm": 8.720133428278404, "learning_rate": 8.879608603599753e-07, "loss": 0.6539, "step": 13852 }, { "epoch": 0.24080029202663006, "grad_norm": 1.7398816067029739, "learning_rate": 8.879431023214883e-07, "loss": 0.3033, "step": 13853 }, { "epoch": 0.24081767456413286, "grad_norm": 1.8861553038830023, "learning_rate": 8.879253430533997e-07, "loss": 0.285, "step": 13854 }, { "epoch": 0.2408350571016357, "grad_norm": 1.9363037226150688, "learning_rate": 8.879075825557661e-07, "loss": 0.3888, "step": 13855 }, { "epoch": 0.24085243963913852, "grad_norm": 1.0697652107902393, "learning_rate": 8.878898208286436e-07, "loss": 0.382, "step": 13856 }, { "epoch": 0.24086982217664135, "grad_norm": 1.2864994344245388, "learning_rate": 8.878720578720887e-07, "loss": 0.168, "step": 13857 }, { "epoch": 0.24088720471414418, "grad_norm": 1.72909989970483, "learning_rate": 8.878542936861574e-07, "loss": 0.2101, "step": 13858 }, { "epoch": 0.24090458725164698, "grad_norm": 1.2984327006741372, "learning_rate": 8.878365282709062e-07, "loss": 0.2732, "step": 13859 }, { "epoch": 0.24092196978914981, "grad_norm": 2.001271831084123, "learning_rate": 8.878187616263912e-07, "loss": 0.4314, "step": 13860 }, { "epoch": 0.24093935232665264, "grad_norm": 2.0901512137332534, "learning_rate": 8.878009937526691e-07, "loss": 0.2404, "step": 13861 }, { "epoch": 0.24095673486415548, "grad_norm": 1.1677020748531848, "learning_rate": 8.877832246497959e-07, "loss": 0.286, "step": 13862 }, { "epoch": 0.2409741174016583, "grad_norm": 1.5256884411971328, "learning_rate": 8.877654543178281e-07, "loss": 0.189, "step": 13863 }, { "epoch": 0.2409914999391611, "grad_norm": 1.844685003411712, "learning_rate": 8.877476827568218e-07, "loss": 0.488, "step": 13864 }, { "epoch": 0.24100888247666394, "grad_norm": 1.7261982731989025, "learning_rate": 8.877299099668336e-07, "loss": 0.2675, "step": 13865 }, { "epoch": 0.24102626501416677, "grad_norm": 2.2517634602491254, "learning_rate": 8.877121359479196e-07, "loss": 0.3943, "step": 13866 }, { "epoch": 0.2410436475516696, "grad_norm": 2.065446466137947, "learning_rate": 8.876943607001361e-07, "loss": 0.3199, "step": 13867 }, { "epoch": 0.24106103008917243, "grad_norm": 2.153835033174725, "learning_rate": 8.876765842235399e-07, "loss": 0.4436, "step": 13868 }, { "epoch": 0.24107841262667523, "grad_norm": 1.8289044321080064, "learning_rate": 8.876588065181867e-07, "loss": 0.3096, "step": 13869 }, { "epoch": 0.24109579516417806, "grad_norm": 1.643696613866721, "learning_rate": 8.876410275841332e-07, "loss": 0.2038, "step": 13870 }, { "epoch": 0.2411131777016809, "grad_norm": 2.1350446569587636, "learning_rate": 8.876232474214359e-07, "loss": 0.2715, "step": 13871 }, { "epoch": 0.24113056023918372, "grad_norm": 2.15094576547496, "learning_rate": 8.876054660301509e-07, "loss": 0.2744, "step": 13872 }, { "epoch": 0.24114794277668655, "grad_norm": 1.6079526819703838, "learning_rate": 8.875876834103346e-07, "loss": 0.3842, "step": 13873 }, { "epoch": 0.24116532531418935, "grad_norm": 1.6283083758226946, "learning_rate": 8.875698995620433e-07, "loss": 0.2742, "step": 13874 }, { "epoch": 0.24118270785169219, "grad_norm": 2.0186223890506243, "learning_rate": 8.875521144853335e-07, "loss": 0.3055, "step": 13875 }, { "epoch": 0.24120009038919502, "grad_norm": 1.552368083666725, "learning_rate": 8.875343281802616e-07, "loss": 0.2954, "step": 13876 }, { "epoch": 0.24121747292669785, "grad_norm": 1.5923116067159055, "learning_rate": 8.875165406468839e-07, "loss": 0.2013, "step": 13877 }, { "epoch": 0.24123485546420068, "grad_norm": 2.159267091897164, "learning_rate": 8.874987518852567e-07, "loss": 0.3333, "step": 13878 }, { "epoch": 0.24125223800170348, "grad_norm": 3.169576848825464, "learning_rate": 8.874809618954364e-07, "loss": 0.365, "step": 13879 }, { "epoch": 0.2412696205392063, "grad_norm": 1.7887368779674715, "learning_rate": 8.874631706774796e-07, "loss": 0.3319, "step": 13880 }, { "epoch": 0.24128700307670914, "grad_norm": 2.989319193263295, "learning_rate": 8.874453782314424e-07, "loss": 0.4902, "step": 13881 }, { "epoch": 0.24130438561421197, "grad_norm": 1.0651302555403768, "learning_rate": 8.874275845573814e-07, "loss": 0.2486, "step": 13882 }, { "epoch": 0.2413217681517148, "grad_norm": 1.1241485278944292, "learning_rate": 8.87409789655353e-07, "loss": 0.2456, "step": 13883 }, { "epoch": 0.2413391506892176, "grad_norm": 1.5877742545077733, "learning_rate": 8.873919935254134e-07, "loss": 0.2622, "step": 13884 }, { "epoch": 0.24135653322672043, "grad_norm": 2.691001692157794, "learning_rate": 8.873741961676191e-07, "loss": 0.4684, "step": 13885 }, { "epoch": 0.24137391576422326, "grad_norm": 2.0870457988174307, "learning_rate": 8.873563975820267e-07, "loss": 0.3826, "step": 13886 }, { "epoch": 0.2413912983017261, "grad_norm": 3.1803384682062696, "learning_rate": 8.873385977686923e-07, "loss": 0.339, "step": 13887 }, { "epoch": 0.24140868083922892, "grad_norm": 1.5376997176101037, "learning_rate": 8.873207967276726e-07, "loss": 0.2966, "step": 13888 }, { "epoch": 0.24142606337673173, "grad_norm": 1.1212778395403322, "learning_rate": 8.873029944590239e-07, "loss": 0.2232, "step": 13889 }, { "epoch": 0.24144344591423456, "grad_norm": 1.999314783193979, "learning_rate": 8.872851909628026e-07, "loss": 0.3501, "step": 13890 }, { "epoch": 0.24146082845173739, "grad_norm": 1.6928310418158428, "learning_rate": 8.872673862390651e-07, "loss": 0.3854, "step": 13891 }, { "epoch": 0.24147821098924022, "grad_norm": 1.492337741839628, "learning_rate": 8.872495802878679e-07, "loss": 0.4254, "step": 13892 }, { "epoch": 0.24149559352674305, "grad_norm": 3.104417912845801, "learning_rate": 8.872317731092674e-07, "loss": 0.3844, "step": 13893 }, { "epoch": 0.24151297606424585, "grad_norm": 1.7743025691009313, "learning_rate": 8.872139647033201e-07, "loss": 0.3028, "step": 13894 }, { "epoch": 0.24153035860174868, "grad_norm": 2.101302873830171, "learning_rate": 8.871961550700824e-07, "loss": 0.4167, "step": 13895 }, { "epoch": 0.2415477411392515, "grad_norm": 1.4708917198168627, "learning_rate": 8.871783442096106e-07, "loss": 0.2376, "step": 13896 }, { "epoch": 0.24156512367675434, "grad_norm": 2.100571113253426, "learning_rate": 8.871605321219616e-07, "loss": 0.2135, "step": 13897 }, { "epoch": 0.24158250621425717, "grad_norm": 2.070939406876626, "learning_rate": 8.871427188071914e-07, "loss": 0.3257, "step": 13898 }, { "epoch": 0.24159988875175997, "grad_norm": 1.371973596410289, "learning_rate": 8.871249042653565e-07, "loss": 0.2667, "step": 13899 }, { "epoch": 0.2416172712892628, "grad_norm": 1.8003019203760906, "learning_rate": 8.871070884965136e-07, "loss": 0.36, "step": 13900 }, { "epoch": 0.24163465382676563, "grad_norm": 1.726728476551598, "learning_rate": 8.870892715007191e-07, "loss": 0.3087, "step": 13901 }, { "epoch": 0.24165203636426846, "grad_norm": 1.612706048754316, "learning_rate": 8.870714532780295e-07, "loss": 0.3216, "step": 13902 }, { "epoch": 0.2416694189017713, "grad_norm": 2.2002144558297774, "learning_rate": 8.870536338285009e-07, "loss": 0.3506, "step": 13903 }, { "epoch": 0.2416868014392741, "grad_norm": 3.0875218152003456, "learning_rate": 8.870358131521903e-07, "loss": 0.4275, "step": 13904 }, { "epoch": 0.24170418397677693, "grad_norm": 1.9327017016877326, "learning_rate": 8.870179912491539e-07, "loss": 0.3559, "step": 13905 }, { "epoch": 0.24172156651427976, "grad_norm": 1.6402047053504236, "learning_rate": 8.870001681194483e-07, "loss": 0.3569, "step": 13906 }, { "epoch": 0.2417389490517826, "grad_norm": 1.2593439270519042, "learning_rate": 8.869823437631298e-07, "loss": 0.3376, "step": 13907 }, { "epoch": 0.24175633158928542, "grad_norm": 2.3866048812521816, "learning_rate": 8.869645181802551e-07, "loss": 0.3765, "step": 13908 }, { "epoch": 0.24177371412678822, "grad_norm": 2.0698200020029724, "learning_rate": 8.869466913708808e-07, "loss": 0.3294, "step": 13909 }, { "epoch": 0.24179109666429105, "grad_norm": 1.2678547695454665, "learning_rate": 8.869288633350631e-07, "loss": 0.2742, "step": 13910 }, { "epoch": 0.24180847920179388, "grad_norm": 1.383358017659606, "learning_rate": 8.869110340728587e-07, "loss": 0.3923, "step": 13911 }, { "epoch": 0.2418258617392967, "grad_norm": 2.519551396870569, "learning_rate": 8.868932035843239e-07, "loss": 0.3991, "step": 13912 }, { "epoch": 0.24184324427679954, "grad_norm": 1.8514393518031522, "learning_rate": 8.868753718695156e-07, "loss": 0.2355, "step": 13913 }, { "epoch": 0.24186062681430234, "grad_norm": 1.5668571526545416, "learning_rate": 8.868575389284899e-07, "loss": 0.2815, "step": 13914 }, { "epoch": 0.24187800935180517, "grad_norm": 1.3351841746592756, "learning_rate": 8.868397047613036e-07, "loss": 0.3106, "step": 13915 }, { "epoch": 0.241895391889308, "grad_norm": 1.8595829989832255, "learning_rate": 8.868218693680132e-07, "loss": 0.2107, "step": 13916 }, { "epoch": 0.24191277442681083, "grad_norm": 2.2209824697195613, "learning_rate": 8.86804032748675e-07, "loss": 0.4488, "step": 13917 }, { "epoch": 0.24193015696431366, "grad_norm": 1.2305776329926477, "learning_rate": 8.867861949033457e-07, "loss": 0.3731, "step": 13918 }, { "epoch": 0.24194753950181647, "grad_norm": 1.807954813188481, "learning_rate": 8.86768355832082e-07, "loss": 0.5045, "step": 13919 }, { "epoch": 0.2419649220393193, "grad_norm": 1.7235149481619554, "learning_rate": 8.867505155349401e-07, "loss": 0.2621, "step": 13920 }, { "epoch": 0.24198230457682213, "grad_norm": 2.2471676656647666, "learning_rate": 8.867326740119769e-07, "loss": 0.3049, "step": 13921 }, { "epoch": 0.24199968711432496, "grad_norm": 1.6390985039980435, "learning_rate": 8.867148312632487e-07, "loss": 0.3047, "step": 13922 }, { "epoch": 0.2420170696518278, "grad_norm": 1.8156842370554334, "learning_rate": 8.866969872888122e-07, "loss": 0.3357, "step": 13923 }, { "epoch": 0.2420344521893306, "grad_norm": 1.2027435581452544, "learning_rate": 8.866791420887237e-07, "loss": 0.1925, "step": 13924 }, { "epoch": 0.24205183472683342, "grad_norm": 1.2453194831821417, "learning_rate": 8.8666129566304e-07, "loss": 0.2895, "step": 13925 }, { "epoch": 0.24206921726433625, "grad_norm": 1.3319575482130717, "learning_rate": 8.866434480118177e-07, "loss": 0.2616, "step": 13926 }, { "epoch": 0.24208659980183908, "grad_norm": 1.6584083986012654, "learning_rate": 8.866255991351132e-07, "loss": 0.2806, "step": 13927 }, { "epoch": 0.24210398233934188, "grad_norm": 2.264330017271215, "learning_rate": 8.866077490329833e-07, "loss": 0.4496, "step": 13928 }, { "epoch": 0.2421213648768447, "grad_norm": 2.134361026838545, "learning_rate": 8.865898977054842e-07, "loss": 0.2367, "step": 13929 }, { "epoch": 0.24213874741434754, "grad_norm": 1.460937178345179, "learning_rate": 8.865720451526729e-07, "loss": 0.3693, "step": 13930 }, { "epoch": 0.24215612995185037, "grad_norm": 1.8213692857468955, "learning_rate": 8.865541913746058e-07, "loss": 0.3653, "step": 13931 }, { "epoch": 0.2421735124893532, "grad_norm": 4.967943365977909, "learning_rate": 8.865363363713392e-07, "loss": 0.3249, "step": 13932 }, { "epoch": 0.242190895026856, "grad_norm": 1.6728195292892722, "learning_rate": 8.865184801429303e-07, "loss": 0.407, "step": 13933 }, { "epoch": 0.24220827756435884, "grad_norm": 1.8446729852418293, "learning_rate": 8.865006226894352e-07, "loss": 0.2432, "step": 13934 }, { "epoch": 0.24222566010186167, "grad_norm": 1.9419233162778295, "learning_rate": 8.864827640109107e-07, "loss": 0.4082, "step": 13935 }, { "epoch": 0.2422430426393645, "grad_norm": 1.4995120498485155, "learning_rate": 8.864649041074135e-07, "loss": 0.4149, "step": 13936 }, { "epoch": 0.24226042517686733, "grad_norm": 3.8895376014881062, "learning_rate": 8.864470429789999e-07, "loss": 0.5258, "step": 13937 }, { "epoch": 0.24227780771437013, "grad_norm": 1.7263253728237788, "learning_rate": 8.864291806257267e-07, "loss": 0.3326, "step": 13938 }, { "epoch": 0.24229519025187296, "grad_norm": 1.2764780693846034, "learning_rate": 8.864113170476505e-07, "loss": 0.4115, "step": 13939 }, { "epoch": 0.2423125727893758, "grad_norm": 2.133884642695142, "learning_rate": 8.863934522448282e-07, "loss": 0.2499, "step": 13940 }, { "epoch": 0.24232995532687862, "grad_norm": 1.6625991155090751, "learning_rate": 8.863755862173159e-07, "loss": 0.3781, "step": 13941 }, { "epoch": 0.24234733786438145, "grad_norm": 1.5215630870110874, "learning_rate": 8.863577189651706e-07, "loss": 0.2635, "step": 13942 }, { "epoch": 0.24236472040188425, "grad_norm": 2.1843243946530255, "learning_rate": 8.863398504884488e-07, "loss": 0.4218, "step": 13943 }, { "epoch": 0.24238210293938708, "grad_norm": 5.9849149665471, "learning_rate": 8.863219807872069e-07, "loss": 0.5332, "step": 13944 }, { "epoch": 0.24239948547688991, "grad_norm": 2.3298539142043038, "learning_rate": 8.86304109861502e-07, "loss": 0.2184, "step": 13945 }, { "epoch": 0.24241686801439274, "grad_norm": 1.703962684227191, "learning_rate": 8.862862377113904e-07, "loss": 0.2573, "step": 13946 }, { "epoch": 0.24243425055189557, "grad_norm": 2.1263379224462113, "learning_rate": 8.862683643369291e-07, "loss": 0.2575, "step": 13947 }, { "epoch": 0.24245163308939838, "grad_norm": 1.6704635905778746, "learning_rate": 8.862504897381743e-07, "loss": 0.2964, "step": 13948 }, { "epoch": 0.2424690156269012, "grad_norm": 4.2083413352460735, "learning_rate": 8.86232613915183e-07, "loss": 0.3672, "step": 13949 }, { "epoch": 0.24248639816440404, "grad_norm": 1.2929556355881748, "learning_rate": 8.862147368680117e-07, "loss": 0.414, "step": 13950 }, { "epoch": 0.24250378070190687, "grad_norm": 3.1733179568408754, "learning_rate": 8.861968585967172e-07, "loss": 0.2808, "step": 13951 }, { "epoch": 0.2425211632394097, "grad_norm": 1.8365232505902298, "learning_rate": 8.861789791013558e-07, "loss": 0.2863, "step": 13952 }, { "epoch": 0.2425385457769125, "grad_norm": 1.4731439945715152, "learning_rate": 8.861610983819848e-07, "loss": 0.242, "step": 13953 }, { "epoch": 0.24255592831441533, "grad_norm": 3.0007599791101627, "learning_rate": 8.861432164386602e-07, "loss": 0.3868, "step": 13954 }, { "epoch": 0.24257331085191816, "grad_norm": 1.4408138101011996, "learning_rate": 8.86125333271439e-07, "loss": 0.3061, "step": 13955 }, { "epoch": 0.242590693389421, "grad_norm": 1.2432741798069937, "learning_rate": 8.861074488803779e-07, "loss": 0.3328, "step": 13956 }, { "epoch": 0.24260807592692382, "grad_norm": 2.4434110484779583, "learning_rate": 8.860895632655336e-07, "loss": 0.3117, "step": 13957 }, { "epoch": 0.24262545846442662, "grad_norm": 1.6884419573596492, "learning_rate": 8.860716764269628e-07, "loss": 0.3881, "step": 13958 }, { "epoch": 0.24264284100192945, "grad_norm": 1.6073032562312313, "learning_rate": 8.860537883647221e-07, "loss": 0.3462, "step": 13959 }, { "epoch": 0.24266022353943228, "grad_norm": 1.801498263783913, "learning_rate": 8.860358990788681e-07, "loss": 0.3052, "step": 13960 }, { "epoch": 0.24267760607693512, "grad_norm": 2.938126543819767, "learning_rate": 8.860180085694578e-07, "loss": 0.741, "step": 13961 }, { "epoch": 0.24269498861443795, "grad_norm": 2.3915066569664876, "learning_rate": 8.860001168365476e-07, "loss": 0.3703, "step": 13962 }, { "epoch": 0.24271237115194075, "grad_norm": 1.9418002526363867, "learning_rate": 8.859822238801944e-07, "loss": 0.7105, "step": 13963 }, { "epoch": 0.24272975368944358, "grad_norm": 2.7701480940379155, "learning_rate": 8.859643297004549e-07, "loss": 0.3164, "step": 13964 }, { "epoch": 0.2427471362269464, "grad_norm": 2.3659955627552067, "learning_rate": 8.859464342973857e-07, "loss": 0.5587, "step": 13965 }, { "epoch": 0.24276451876444924, "grad_norm": 2.2746644127128848, "learning_rate": 8.859285376710437e-07, "loss": 0.5049, "step": 13966 }, { "epoch": 0.24278190130195207, "grad_norm": 2.4480433366237873, "learning_rate": 8.859106398214854e-07, "loss": 0.3166, "step": 13967 }, { "epoch": 0.24279928383945487, "grad_norm": 1.5843136626871195, "learning_rate": 8.858927407487677e-07, "loss": 0.3299, "step": 13968 }, { "epoch": 0.2428166663769577, "grad_norm": 2.0705263212722818, "learning_rate": 8.858748404529472e-07, "loss": 0.194, "step": 13969 }, { "epoch": 0.24283404891446053, "grad_norm": 2.4468159342356155, "learning_rate": 8.858569389340809e-07, "loss": 0.494, "step": 13970 }, { "epoch": 0.24285143145196336, "grad_norm": 1.7850868289298942, "learning_rate": 8.858390361922251e-07, "loss": 0.322, "step": 13971 }, { "epoch": 0.2428688139894662, "grad_norm": 1.8767022902402866, "learning_rate": 8.85821132227437e-07, "loss": 0.2024, "step": 13972 }, { "epoch": 0.242886196526969, "grad_norm": 2.1455582932694526, "learning_rate": 8.858032270397731e-07, "loss": 0.3579, "step": 13973 }, { "epoch": 0.24290357906447183, "grad_norm": 1.0621699505967221, "learning_rate": 8.857853206292901e-07, "loss": 0.3091, "step": 13974 }, { "epoch": 0.24292096160197466, "grad_norm": 1.2166695930900664, "learning_rate": 8.857674129960449e-07, "loss": 0.2801, "step": 13975 }, { "epoch": 0.24293834413947749, "grad_norm": 2.799951507173783, "learning_rate": 8.857495041400942e-07, "loss": 0.2947, "step": 13976 }, { "epoch": 0.24295572667698032, "grad_norm": 3.3573379075516128, "learning_rate": 8.857315940614948e-07, "loss": 0.3616, "step": 13977 }, { "epoch": 0.24297310921448312, "grad_norm": 2.2301404475252125, "learning_rate": 8.857136827603035e-07, "loss": 0.3684, "step": 13978 }, { "epoch": 0.24299049175198595, "grad_norm": 2.1513467205711434, "learning_rate": 8.856957702365768e-07, "loss": 0.3087, "step": 13979 }, { "epoch": 0.24300787428948878, "grad_norm": 2.588704875744263, "learning_rate": 8.856778564903718e-07, "loss": 0.3468, "step": 13980 }, { "epoch": 0.2430252568269916, "grad_norm": 1.3462478254081613, "learning_rate": 8.856599415217452e-07, "loss": 0.2039, "step": 13981 }, { "epoch": 0.24304263936449444, "grad_norm": 2.66852735826564, "learning_rate": 8.856420253307537e-07, "loss": 0.1793, "step": 13982 }, { "epoch": 0.24306002190199724, "grad_norm": 1.9469260156775907, "learning_rate": 8.856241079174542e-07, "loss": 0.3171, "step": 13983 }, { "epoch": 0.24307740443950007, "grad_norm": 1.8361037994248028, "learning_rate": 8.856061892819033e-07, "loss": 0.2505, "step": 13984 }, { "epoch": 0.2430947869770029, "grad_norm": 1.7297697043220102, "learning_rate": 8.85588269424158e-07, "loss": 0.2102, "step": 13985 }, { "epoch": 0.24311216951450573, "grad_norm": 2.1887518532501904, "learning_rate": 8.85570348344275e-07, "loss": 0.2617, "step": 13986 }, { "epoch": 0.24312955205200856, "grad_norm": 2.140872670080089, "learning_rate": 8.855524260423111e-07, "loss": 0.3069, "step": 13987 }, { "epoch": 0.24314693458951137, "grad_norm": 2.529778993942274, "learning_rate": 8.855345025183232e-07, "loss": 0.4719, "step": 13988 }, { "epoch": 0.2431643171270142, "grad_norm": 1.821472547967164, "learning_rate": 8.855165777723679e-07, "loss": 0.6156, "step": 13989 }, { "epoch": 0.24318169966451703, "grad_norm": 2.766890296768189, "learning_rate": 8.854986518045023e-07, "loss": 0.422, "step": 13990 }, { "epoch": 0.24319908220201986, "grad_norm": 1.6283998078223647, "learning_rate": 8.854807246147829e-07, "loss": 0.2945, "step": 13991 }, { "epoch": 0.2432164647395227, "grad_norm": 2.7150028104639423, "learning_rate": 8.854627962032666e-07, "loss": 0.2875, "step": 13992 }, { "epoch": 0.2432338472770255, "grad_norm": 1.8452004202815404, "learning_rate": 8.854448665700105e-07, "loss": 0.2654, "step": 13993 }, { "epoch": 0.24325122981452832, "grad_norm": 1.418621451471659, "learning_rate": 8.854269357150712e-07, "loss": 0.3003, "step": 13994 }, { "epoch": 0.24326861235203115, "grad_norm": 2.2184268854346616, "learning_rate": 8.854090036385055e-07, "loss": 0.3522, "step": 13995 }, { "epoch": 0.24328599488953398, "grad_norm": 2.000126961350548, "learning_rate": 8.853910703403702e-07, "loss": 0.3367, "step": 13996 }, { "epoch": 0.2433033774270368, "grad_norm": 1.2014532959354136, "learning_rate": 8.853731358207224e-07, "loss": 0.209, "step": 13997 }, { "epoch": 0.2433207599645396, "grad_norm": 1.555166221362954, "learning_rate": 8.853552000796187e-07, "loss": 0.2287, "step": 13998 }, { "epoch": 0.24333814250204244, "grad_norm": 1.7537511560288994, "learning_rate": 8.85337263117116e-07, "loss": 0.3597, "step": 13999 }, { "epoch": 0.24335552503954527, "grad_norm": 2.082346101915774, "learning_rate": 8.853193249332714e-07, "loss": 0.4054, "step": 14000 }, { "epoch": 0.2433729075770481, "grad_norm": 1.8804904566680976, "learning_rate": 8.853013855281415e-07, "loss": 0.343, "step": 14001 }, { "epoch": 0.24339029011455093, "grad_norm": 1.3011057995974613, "learning_rate": 8.852834449017829e-07, "loss": 0.2189, "step": 14002 }, { "epoch": 0.24340767265205374, "grad_norm": 2.431473179345038, "learning_rate": 8.85265503054253e-07, "loss": 0.3579, "step": 14003 }, { "epoch": 0.24342505518955657, "grad_norm": 1.2792538267240394, "learning_rate": 8.852475599856085e-07, "loss": 0.2513, "step": 14004 }, { "epoch": 0.2434424377270594, "grad_norm": 1.484064622314627, "learning_rate": 8.852296156959061e-07, "loss": 0.4089, "step": 14005 }, { "epoch": 0.24345982026456223, "grad_norm": 2.1059018623256702, "learning_rate": 8.852116701852028e-07, "loss": 0.382, "step": 14006 }, { "epoch": 0.24347720280206506, "grad_norm": 2.5070973446687086, "learning_rate": 8.851937234535554e-07, "loss": 0.3603, "step": 14007 }, { "epoch": 0.24349458533956786, "grad_norm": 1.5197358024074332, "learning_rate": 8.85175775501021e-07, "loss": 0.2016, "step": 14008 }, { "epoch": 0.2435119678770707, "grad_norm": 2.281790441154894, "learning_rate": 8.851578263276562e-07, "loss": 0.3054, "step": 14009 }, { "epoch": 0.24352935041457352, "grad_norm": 1.5037898805631638, "learning_rate": 8.85139875933518e-07, "loss": 0.3298, "step": 14010 }, { "epoch": 0.24354673295207635, "grad_norm": 1.598910760194083, "learning_rate": 8.851219243186633e-07, "loss": 0.2414, "step": 14011 }, { "epoch": 0.24356411548957918, "grad_norm": 1.1484223114308434, "learning_rate": 8.851039714831491e-07, "loss": 0.3302, "step": 14012 }, { "epoch": 0.24358149802708198, "grad_norm": 2.2035096191570744, "learning_rate": 8.850860174270322e-07, "loss": 0.2732, "step": 14013 }, { "epoch": 0.2435988805645848, "grad_norm": 2.140768890466852, "learning_rate": 8.850680621503694e-07, "loss": 0.4481, "step": 14014 }, { "epoch": 0.24361626310208764, "grad_norm": 1.8916563824816874, "learning_rate": 8.850501056532178e-07, "loss": 0.3235, "step": 14015 }, { "epoch": 0.24363364563959047, "grad_norm": 3.057204032234947, "learning_rate": 8.850321479356343e-07, "loss": 0.3905, "step": 14016 }, { "epoch": 0.2436510281770933, "grad_norm": 2.938577616522529, "learning_rate": 8.850141889976758e-07, "loss": 0.2965, "step": 14017 }, { "epoch": 0.2436684107145961, "grad_norm": 1.8497119510676705, "learning_rate": 8.84996228839399e-07, "loss": 0.3747, "step": 14018 }, { "epoch": 0.24368579325209894, "grad_norm": 1.424303945562266, "learning_rate": 8.849782674608611e-07, "loss": 0.2242, "step": 14019 }, { "epoch": 0.24370317578960177, "grad_norm": 1.826932970279434, "learning_rate": 8.84960304862119e-07, "loss": 0.2356, "step": 14020 }, { "epoch": 0.2437205583271046, "grad_norm": 2.4191153376754757, "learning_rate": 8.849423410432295e-07, "loss": 0.365, "step": 14021 }, { "epoch": 0.24373794086460743, "grad_norm": 1.3679372019533704, "learning_rate": 8.849243760042496e-07, "loss": 0.2972, "step": 14022 }, { "epoch": 0.24375532340211023, "grad_norm": 2.4254382360895033, "learning_rate": 8.849064097452364e-07, "loss": 0.3822, "step": 14023 }, { "epoch": 0.24377270593961306, "grad_norm": 1.1828699465291594, "learning_rate": 8.848884422662464e-07, "loss": 0.2723, "step": 14024 }, { "epoch": 0.2437900884771159, "grad_norm": 4.261842913382538, "learning_rate": 8.84870473567337e-07, "loss": 0.2175, "step": 14025 }, { "epoch": 0.24380747101461872, "grad_norm": 2.2429968159544282, "learning_rate": 8.84852503648565e-07, "loss": 0.5528, "step": 14026 }, { "epoch": 0.24382485355212155, "grad_norm": 2.7293646829589755, "learning_rate": 8.848345325099873e-07, "loss": 0.5829, "step": 14027 }, { "epoch": 0.24384223608962435, "grad_norm": 1.6707066389397143, "learning_rate": 8.848165601516611e-07, "loss": 0.4212, "step": 14028 }, { "epoch": 0.24385961862712718, "grad_norm": 3.0517013911045208, "learning_rate": 8.847985865736429e-07, "loss": 0.9336, "step": 14029 }, { "epoch": 0.24387700116463001, "grad_norm": 1.9257300439347809, "learning_rate": 8.847806117759901e-07, "loss": 0.2864, "step": 14030 }, { "epoch": 0.24389438370213284, "grad_norm": 1.9449427645330906, "learning_rate": 8.847626357587594e-07, "loss": 0.2506, "step": 14031 }, { "epoch": 0.24391176623963567, "grad_norm": 1.953645530018235, "learning_rate": 8.847446585220081e-07, "loss": 0.2922, "step": 14032 }, { "epoch": 0.24392914877713848, "grad_norm": 1.1423937929321204, "learning_rate": 8.847266800657928e-07, "loss": 0.2472, "step": 14033 }, { "epoch": 0.2439465313146413, "grad_norm": 1.1990506371098901, "learning_rate": 8.847087003901708e-07, "loss": 0.338, "step": 14034 }, { "epoch": 0.24396391385214414, "grad_norm": 2.6623427219080686, "learning_rate": 8.846907194951987e-07, "loss": 0.3073, "step": 14035 }, { "epoch": 0.24398129638964697, "grad_norm": 1.8809332504019314, "learning_rate": 8.84672737380934e-07, "loss": 0.3578, "step": 14036 }, { "epoch": 0.2439986789271498, "grad_norm": 1.8416755506853664, "learning_rate": 8.846547540474332e-07, "loss": 0.5417, "step": 14037 }, { "epoch": 0.2440160614646526, "grad_norm": 1.3636117681646978, "learning_rate": 8.846367694947537e-07, "loss": 0.3631, "step": 14038 }, { "epoch": 0.24403344400215543, "grad_norm": 1.2474651561681998, "learning_rate": 8.846187837229522e-07, "loss": 0.3643, "step": 14039 }, { "epoch": 0.24405082653965826, "grad_norm": 1.5950478771391614, "learning_rate": 8.846007967320859e-07, "loss": 0.4385, "step": 14040 }, { "epoch": 0.2440682090771611, "grad_norm": 1.9175329649550137, "learning_rate": 8.845828085222118e-07, "loss": 0.2255, "step": 14041 }, { "epoch": 0.24408559161466392, "grad_norm": 1.8426698172351736, "learning_rate": 8.845648190933868e-07, "loss": 0.3433, "step": 14042 }, { "epoch": 0.24410297415216672, "grad_norm": 1.7754564333608531, "learning_rate": 8.845468284456679e-07, "loss": 0.2155, "step": 14043 }, { "epoch": 0.24412035668966955, "grad_norm": 1.1746839866033354, "learning_rate": 8.845288365791123e-07, "loss": 0.2308, "step": 14044 }, { "epoch": 0.24413773922717238, "grad_norm": 1.478871976890415, "learning_rate": 8.845108434937769e-07, "loss": 0.162, "step": 14045 }, { "epoch": 0.24415512176467521, "grad_norm": 1.7356591189750121, "learning_rate": 8.844928491897188e-07, "loss": 0.2296, "step": 14046 }, { "epoch": 0.24417250430217805, "grad_norm": 2.2198224146753556, "learning_rate": 8.844748536669948e-07, "loss": 0.3577, "step": 14047 }, { "epoch": 0.24418988683968085, "grad_norm": 1.923453666283807, "learning_rate": 8.844568569256623e-07, "loss": 0.4202, "step": 14048 }, { "epoch": 0.24420726937718368, "grad_norm": 2.1978323271325695, "learning_rate": 8.844388589657781e-07, "loss": 0.2448, "step": 14049 }, { "epoch": 0.2442246519146865, "grad_norm": 1.71101085463908, "learning_rate": 8.844208597873994e-07, "loss": 0.2375, "step": 14050 }, { "epoch": 0.24424203445218934, "grad_norm": 2.414499105576065, "learning_rate": 8.844028593905831e-07, "loss": 0.3088, "step": 14051 }, { "epoch": 0.24425941698969217, "grad_norm": 2.1660866984701648, "learning_rate": 8.843848577753863e-07, "loss": 0.1903, "step": 14052 }, { "epoch": 0.24427679952719497, "grad_norm": 1.5721101563433537, "learning_rate": 8.843668549418661e-07, "loss": 0.1999, "step": 14053 }, { "epoch": 0.2442941820646978, "grad_norm": 1.9922031794741235, "learning_rate": 8.843488508900796e-07, "loss": 0.331, "step": 14054 }, { "epoch": 0.24431156460220063, "grad_norm": 1.9579909840379461, "learning_rate": 8.843308456200837e-07, "loss": 0.2797, "step": 14055 }, { "epoch": 0.24432894713970346, "grad_norm": 1.9450630652524843, "learning_rate": 8.843128391319357e-07, "loss": 0.3706, "step": 14056 }, { "epoch": 0.2443463296772063, "grad_norm": 1.642933484678586, "learning_rate": 8.842948314256924e-07, "loss": 0.4095, "step": 14057 }, { "epoch": 0.2443637122147091, "grad_norm": 1.873907800327155, "learning_rate": 8.84276822501411e-07, "loss": 0.2971, "step": 14058 }, { "epoch": 0.24438109475221192, "grad_norm": 0.8865653608975934, "learning_rate": 8.842588123591487e-07, "loss": 0.3522, "step": 14059 }, { "epoch": 0.24439847728971476, "grad_norm": 1.448711564622131, "learning_rate": 8.842408009989624e-07, "loss": 0.4467, "step": 14060 }, { "epoch": 0.24441585982721759, "grad_norm": 2.6468197584508273, "learning_rate": 8.842227884209092e-07, "loss": 0.3099, "step": 14061 }, { "epoch": 0.24443324236472042, "grad_norm": 1.5949538498766387, "learning_rate": 8.842047746250465e-07, "loss": 0.2193, "step": 14062 }, { "epoch": 0.24445062490222322, "grad_norm": 1.342607293578934, "learning_rate": 8.841867596114309e-07, "loss": 0.2266, "step": 14063 }, { "epoch": 0.24446800743972605, "grad_norm": 1.4944258928198166, "learning_rate": 8.8416874338012e-07, "loss": 0.1938, "step": 14064 }, { "epoch": 0.24448538997722888, "grad_norm": 1.4559676933960422, "learning_rate": 8.841507259311704e-07, "loss": 0.2569, "step": 14065 }, { "epoch": 0.2445027725147317, "grad_norm": 1.241998786741306, "learning_rate": 8.841327072646397e-07, "loss": 0.3214, "step": 14066 }, { "epoch": 0.24452015505223454, "grad_norm": 1.356179089186154, "learning_rate": 8.841146873805846e-07, "loss": 0.2884, "step": 14067 }, { "epoch": 0.24453753758973734, "grad_norm": 1.349122626710557, "learning_rate": 8.840966662790624e-07, "loss": 0.3878, "step": 14068 }, { "epoch": 0.24455492012724017, "grad_norm": 1.2025566651148647, "learning_rate": 8.840786439601302e-07, "loss": 0.786, "step": 14069 }, { "epoch": 0.244572302664743, "grad_norm": 2.044035687967355, "learning_rate": 8.840606204238452e-07, "loss": 0.3148, "step": 14070 }, { "epoch": 0.24458968520224583, "grad_norm": 1.3342628076041878, "learning_rate": 8.840425956702645e-07, "loss": 0.2631, "step": 14071 }, { "epoch": 0.24460706773974863, "grad_norm": 1.4738867200400085, "learning_rate": 8.840245696994451e-07, "loss": 0.4592, "step": 14072 }, { "epoch": 0.24462445027725147, "grad_norm": 1.4924062389855643, "learning_rate": 8.840065425114443e-07, "loss": 0.2458, "step": 14073 }, { "epoch": 0.2446418328147543, "grad_norm": 2.087786884680433, "learning_rate": 8.83988514106319e-07, "loss": 0.3321, "step": 14074 }, { "epoch": 0.24465921535225713, "grad_norm": 1.5293854234446231, "learning_rate": 8.839704844841267e-07, "loss": 0.2424, "step": 14075 }, { "epoch": 0.24467659788975996, "grad_norm": 3.9190436770110932, "learning_rate": 8.839524536449242e-07, "loss": 0.4147, "step": 14076 }, { "epoch": 0.24469398042726276, "grad_norm": 1.9588859686083624, "learning_rate": 8.839344215887688e-07, "loss": 0.3636, "step": 14077 }, { "epoch": 0.2447113629647656, "grad_norm": 2.996929812557084, "learning_rate": 8.839163883157177e-07, "loss": 0.464, "step": 14078 }, { "epoch": 0.24472874550226842, "grad_norm": 1.2457156748760447, "learning_rate": 8.83898353825828e-07, "loss": 0.2116, "step": 14079 }, { "epoch": 0.24474612803977125, "grad_norm": 2.004548681512942, "learning_rate": 8.83880318119157e-07, "loss": 0.3905, "step": 14080 }, { "epoch": 0.24476351057727408, "grad_norm": 1.1017778904840354, "learning_rate": 8.838622811957615e-07, "loss": 0.3089, "step": 14081 }, { "epoch": 0.24478089311477688, "grad_norm": 2.7940221407797, "learning_rate": 8.83844243055699e-07, "loss": 0.3444, "step": 14082 }, { "epoch": 0.2447982756522797, "grad_norm": 2.590075603923318, "learning_rate": 8.838262036990265e-07, "loss": 0.3905, "step": 14083 }, { "epoch": 0.24481565818978254, "grad_norm": 1.5961389708828677, "learning_rate": 8.838081631258014e-07, "loss": 0.4937, "step": 14084 }, { "epoch": 0.24483304072728537, "grad_norm": 0.9522044958068441, "learning_rate": 8.837901213360806e-07, "loss": 0.3282, "step": 14085 }, { "epoch": 0.2448504232647882, "grad_norm": 1.9755380117276036, "learning_rate": 8.837720783299215e-07, "loss": 0.3516, "step": 14086 }, { "epoch": 0.244867805802291, "grad_norm": 1.1148197931514494, "learning_rate": 8.837540341073812e-07, "loss": 0.4887, "step": 14087 }, { "epoch": 0.24488518833979384, "grad_norm": 3.810782115876008, "learning_rate": 8.837359886685168e-07, "loss": 0.3738, "step": 14088 }, { "epoch": 0.24490257087729667, "grad_norm": 1.401840840135423, "learning_rate": 8.837179420133856e-07, "loss": 0.305, "step": 14089 }, { "epoch": 0.2449199534147995, "grad_norm": 1.6229419892482795, "learning_rate": 8.836998941420449e-07, "loss": 0.3285, "step": 14090 }, { "epoch": 0.24493733595230233, "grad_norm": 1.9699658228154817, "learning_rate": 8.836818450545518e-07, "loss": 0.3784, "step": 14091 }, { "epoch": 0.24495471848980513, "grad_norm": 1.7529911075544462, "learning_rate": 8.836637947509633e-07, "loss": 0.3033, "step": 14092 }, { "epoch": 0.24497210102730796, "grad_norm": 1.2346337176000062, "learning_rate": 8.836457432313371e-07, "loss": 0.2995, "step": 14093 }, { "epoch": 0.2449894835648108, "grad_norm": 2.0433395497048488, "learning_rate": 8.836276904957298e-07, "loss": 0.1842, "step": 14094 }, { "epoch": 0.24500686610231362, "grad_norm": 2.5608515234381493, "learning_rate": 8.836096365441992e-07, "loss": 0.3099, "step": 14095 }, { "epoch": 0.24502424863981645, "grad_norm": 1.4861099181537605, "learning_rate": 8.835915813768022e-07, "loss": 0.2452, "step": 14096 }, { "epoch": 0.24504163117731925, "grad_norm": 2.019343042503292, "learning_rate": 8.835735249935961e-07, "loss": 0.2611, "step": 14097 }, { "epoch": 0.24505901371482208, "grad_norm": 2.17666799442488, "learning_rate": 8.835554673946382e-07, "loss": 0.2561, "step": 14098 }, { "epoch": 0.2450763962523249, "grad_norm": 2.841943101057414, "learning_rate": 8.835374085799855e-07, "loss": 0.2488, "step": 14099 }, { "epoch": 0.24509377878982774, "grad_norm": 2.598387421284097, "learning_rate": 8.835193485496955e-07, "loss": 0.2245, "step": 14100 }, { "epoch": 0.24511116132733057, "grad_norm": 1.6602547041046551, "learning_rate": 8.835012873038252e-07, "loss": 0.2845, "step": 14101 }, { "epoch": 0.24512854386483338, "grad_norm": 1.2557283089010645, "learning_rate": 8.834832248424321e-07, "loss": 0.2533, "step": 14102 }, { "epoch": 0.2451459264023362, "grad_norm": 1.2612742285596719, "learning_rate": 8.834651611655733e-07, "loss": 0.1793, "step": 14103 }, { "epoch": 0.24516330893983904, "grad_norm": 2.7022647875102717, "learning_rate": 8.834470962733061e-07, "loss": 0.3579, "step": 14104 }, { "epoch": 0.24518069147734187, "grad_norm": 2.2510292412806687, "learning_rate": 8.834290301656877e-07, "loss": 0.2406, "step": 14105 }, { "epoch": 0.2451980740148447, "grad_norm": 1.635384694465712, "learning_rate": 8.834109628427755e-07, "loss": 0.3814, "step": 14106 }, { "epoch": 0.2452154565523475, "grad_norm": 2.1131850801558816, "learning_rate": 8.833928943046268e-07, "loss": 0.3549, "step": 14107 }, { "epoch": 0.24523283908985033, "grad_norm": 2.696420748762451, "learning_rate": 8.833748245512984e-07, "loss": 0.2564, "step": 14108 }, { "epoch": 0.24525022162735316, "grad_norm": 2.1770218209770342, "learning_rate": 8.833567535828482e-07, "loss": 0.3755, "step": 14109 }, { "epoch": 0.245267604164856, "grad_norm": 1.2249533748397008, "learning_rate": 8.833386813993329e-07, "loss": 0.2676, "step": 14110 }, { "epoch": 0.24528498670235882, "grad_norm": 2.0656036395174375, "learning_rate": 8.833206080008103e-07, "loss": 0.299, "step": 14111 }, { "epoch": 0.24530236923986162, "grad_norm": 2.0576754171530256, "learning_rate": 8.833025333873374e-07, "loss": 0.4902, "step": 14112 }, { "epoch": 0.24531975177736445, "grad_norm": 2.1975757912918503, "learning_rate": 8.832844575589715e-07, "loss": 0.3026, "step": 14113 }, { "epoch": 0.24533713431486728, "grad_norm": 1.4478623525824088, "learning_rate": 8.832663805157701e-07, "loss": 0.2754, "step": 14114 }, { "epoch": 0.2453545168523701, "grad_norm": 1.515520707841539, "learning_rate": 8.832483022577902e-07, "loss": 0.327, "step": 14115 }, { "epoch": 0.24537189938987294, "grad_norm": 1.7769897644810888, "learning_rate": 8.832302227850892e-07, "loss": 0.1913, "step": 14116 }, { "epoch": 0.24538928192737575, "grad_norm": 4.617077175500059, "learning_rate": 8.832121420977246e-07, "loss": 0.2906, "step": 14117 }, { "epoch": 0.24540666446487858, "grad_norm": 2.8390986575599277, "learning_rate": 8.831940601957534e-07, "loss": 0.2871, "step": 14118 }, { "epoch": 0.2454240470023814, "grad_norm": 1.9715737196770728, "learning_rate": 8.831759770792331e-07, "loss": 0.3619, "step": 14119 }, { "epoch": 0.24544142953988424, "grad_norm": 1.9065083139266252, "learning_rate": 8.83157892748221e-07, "loss": 0.2353, "step": 14120 }, { "epoch": 0.24545881207738707, "grad_norm": 1.0717816955271036, "learning_rate": 8.831398072027742e-07, "loss": 0.2998, "step": 14121 }, { "epoch": 0.24547619461488987, "grad_norm": 1.5503277620814317, "learning_rate": 8.831217204429505e-07, "loss": 0.2579, "step": 14122 }, { "epoch": 0.2454935771523927, "grad_norm": 1.3060192576055019, "learning_rate": 8.831036324688067e-07, "loss": 0.353, "step": 14123 }, { "epoch": 0.24551095968989553, "grad_norm": 2.1909616345180356, "learning_rate": 8.830855432804005e-07, "loss": 0.7506, "step": 14124 }, { "epoch": 0.24552834222739836, "grad_norm": 1.5154911137811984, "learning_rate": 8.830674528777891e-07, "loss": 0.2664, "step": 14125 }, { "epoch": 0.2455457247649012, "grad_norm": 1.9466395581270848, "learning_rate": 8.830493612610297e-07, "loss": 0.2802, "step": 14126 }, { "epoch": 0.245563107302404, "grad_norm": 1.7062372462193947, "learning_rate": 8.8303126843018e-07, "loss": 0.3095, "step": 14127 }, { "epoch": 0.24558048983990682, "grad_norm": 1.1017022056178616, "learning_rate": 8.830131743852969e-07, "loss": 0.2395, "step": 14128 }, { "epoch": 0.24559787237740965, "grad_norm": 2.3611264544345705, "learning_rate": 8.829950791264382e-07, "loss": 0.4617, "step": 14129 }, { "epoch": 0.24561525491491248, "grad_norm": 0.9100255705157521, "learning_rate": 8.829769826536608e-07, "loss": 0.1611, "step": 14130 }, { "epoch": 0.24563263745241531, "grad_norm": 1.6487502384026322, "learning_rate": 8.829588849670225e-07, "loss": 0.3348, "step": 14131 }, { "epoch": 0.24565001998991812, "grad_norm": 2.4627519465949765, "learning_rate": 8.829407860665803e-07, "loss": 0.4087, "step": 14132 }, { "epoch": 0.24566740252742095, "grad_norm": 1.7689647254310166, "learning_rate": 8.829226859523918e-07, "loss": 0.2815, "step": 14133 }, { "epoch": 0.24568478506492378, "grad_norm": 2.7536000847391313, "learning_rate": 8.829045846245144e-07, "loss": 0.4765, "step": 14134 }, { "epoch": 0.2457021676024266, "grad_norm": 2.4254490570608658, "learning_rate": 8.828864820830051e-07, "loss": 0.338, "step": 14135 }, { "epoch": 0.24571955013992944, "grad_norm": 2.122029659402218, "learning_rate": 8.828683783279217e-07, "loss": 0.3851, "step": 14136 }, { "epoch": 0.24573693267743224, "grad_norm": 2.4821275499177196, "learning_rate": 8.828502733593214e-07, "loss": 0.3098, "step": 14137 }, { "epoch": 0.24575431521493507, "grad_norm": 2.0013170566040355, "learning_rate": 8.828321671772616e-07, "loss": 0.2506, "step": 14138 }, { "epoch": 0.2457716977524379, "grad_norm": 1.8274245483003864, "learning_rate": 8.828140597817997e-07, "loss": 0.2828, "step": 14139 }, { "epoch": 0.24578908028994073, "grad_norm": 1.1992920349000291, "learning_rate": 8.82795951172993e-07, "loss": 0.1718, "step": 14140 }, { "epoch": 0.24580646282744356, "grad_norm": 2.935844383348261, "learning_rate": 8.827778413508991e-07, "loss": 0.278, "step": 14141 }, { "epoch": 0.24582384536494636, "grad_norm": 4.905173395378596, "learning_rate": 8.827597303155751e-07, "loss": 0.3799, "step": 14142 }, { "epoch": 0.2458412279024492, "grad_norm": 3.69229544886006, "learning_rate": 8.827416180670787e-07, "loss": 0.2364, "step": 14143 }, { "epoch": 0.24585861043995202, "grad_norm": 2.1646560417558347, "learning_rate": 8.827235046054672e-07, "loss": 0.4391, "step": 14144 }, { "epoch": 0.24587599297745485, "grad_norm": 1.584961381961988, "learning_rate": 8.827053899307979e-07, "loss": 0.4503, "step": 14145 }, { "epoch": 0.24589337551495768, "grad_norm": 1.5950673646811788, "learning_rate": 8.826872740431284e-07, "loss": 0.2492, "step": 14146 }, { "epoch": 0.2459107580524605, "grad_norm": 1.7592415026626576, "learning_rate": 8.82669156942516e-07, "loss": 0.291, "step": 14147 }, { "epoch": 0.24592814058996332, "grad_norm": 1.9467852487930777, "learning_rate": 8.826510386290182e-07, "loss": 0.5623, "step": 14148 }, { "epoch": 0.24594552312746615, "grad_norm": 1.3310884090444959, "learning_rate": 8.826329191026925e-07, "loss": 0.303, "step": 14149 }, { "epoch": 0.24596290566496898, "grad_norm": 1.7685805357421458, "learning_rate": 8.82614798363596e-07, "loss": 0.5137, "step": 14150 }, { "epoch": 0.2459802882024718, "grad_norm": 1.6480766075099087, "learning_rate": 8.825966764117864e-07, "loss": 0.3328, "step": 14151 }, { "epoch": 0.2459976707399746, "grad_norm": 3.79452493814227, "learning_rate": 8.825785532473211e-07, "loss": 0.4825, "step": 14152 }, { "epoch": 0.24601505327747744, "grad_norm": 2.1357947259848786, "learning_rate": 8.825604288702575e-07, "loss": 0.2699, "step": 14153 }, { "epoch": 0.24603243581498027, "grad_norm": 1.476126630206825, "learning_rate": 8.825423032806532e-07, "loss": 0.3206, "step": 14154 }, { "epoch": 0.2460498183524831, "grad_norm": 2.270963080738095, "learning_rate": 8.825241764785654e-07, "loss": 0.3294, "step": 14155 }, { "epoch": 0.24606720088998593, "grad_norm": 1.8781694645361808, "learning_rate": 8.825060484640516e-07, "loss": 0.2359, "step": 14156 }, { "epoch": 0.24608458342748873, "grad_norm": 2.053246860683158, "learning_rate": 8.824879192371695e-07, "loss": 0.4326, "step": 14157 }, { "epoch": 0.24610196596499156, "grad_norm": 2.067443673360214, "learning_rate": 8.824697887979764e-07, "loss": 0.3853, "step": 14158 }, { "epoch": 0.2461193485024944, "grad_norm": 1.5943740107085045, "learning_rate": 8.824516571465297e-07, "loss": 0.2896, "step": 14159 }, { "epoch": 0.24613673103999723, "grad_norm": 2.609235997053633, "learning_rate": 8.824335242828868e-07, "loss": 0.2763, "step": 14160 }, { "epoch": 0.24615411357750006, "grad_norm": 1.7706506884977709, "learning_rate": 8.824153902071056e-07, "loss": 0.4739, "step": 14161 }, { "epoch": 0.24617149611500286, "grad_norm": 1.333616791123361, "learning_rate": 8.823972549192431e-07, "loss": 0.2744, "step": 14162 }, { "epoch": 0.2461888786525057, "grad_norm": 1.4002748513639076, "learning_rate": 8.82379118419357e-07, "loss": 0.2191, "step": 14163 }, { "epoch": 0.24620626119000852, "grad_norm": 1.7896332473311212, "learning_rate": 8.823609807075047e-07, "loss": 0.3205, "step": 14164 }, { "epoch": 0.24622364372751135, "grad_norm": 1.6085528461433998, "learning_rate": 8.823428417837438e-07, "loss": 0.3757, "step": 14165 }, { "epoch": 0.24624102626501418, "grad_norm": 2.224742091965954, "learning_rate": 8.823247016481316e-07, "loss": 0.4033, "step": 14166 }, { "epoch": 0.24625840880251698, "grad_norm": 2.085104241467329, "learning_rate": 8.823065603007258e-07, "loss": 0.357, "step": 14167 }, { "epoch": 0.2462757913400198, "grad_norm": 1.7921738605208988, "learning_rate": 8.822884177415839e-07, "loss": 0.2683, "step": 14168 }, { "epoch": 0.24629317387752264, "grad_norm": 1.7503868605471062, "learning_rate": 8.822702739707632e-07, "loss": 0.2952, "step": 14169 }, { "epoch": 0.24631055641502547, "grad_norm": 2.979460011644161, "learning_rate": 8.822521289883213e-07, "loss": 0.4684, "step": 14170 }, { "epoch": 0.2463279389525283, "grad_norm": 1.6395388494054306, "learning_rate": 8.82233982794316e-07, "loss": 0.1786, "step": 14171 }, { "epoch": 0.2463453214900311, "grad_norm": 1.094777228276167, "learning_rate": 8.822158353888043e-07, "loss": 0.2733, "step": 14172 }, { "epoch": 0.24636270402753394, "grad_norm": 1.4847745119729236, "learning_rate": 8.821976867718441e-07, "loss": 0.2278, "step": 14173 }, { "epoch": 0.24638008656503677, "grad_norm": 1.469414193885575, "learning_rate": 8.821795369434928e-07, "loss": 0.2258, "step": 14174 }, { "epoch": 0.2463974691025396, "grad_norm": 2.146477163080729, "learning_rate": 8.821613859038079e-07, "loss": 0.2887, "step": 14175 }, { "epoch": 0.24641485164004243, "grad_norm": 1.4957288213369289, "learning_rate": 8.82143233652847e-07, "loss": 0.2997, "step": 14176 }, { "epoch": 0.24643223417754523, "grad_norm": 2.348565390163436, "learning_rate": 8.821250801906675e-07, "loss": 0.2154, "step": 14177 }, { "epoch": 0.24644961671504806, "grad_norm": 1.8053758604127537, "learning_rate": 8.82106925517327e-07, "loss": 0.34, "step": 14178 }, { "epoch": 0.2464669992525509, "grad_norm": 1.6220061360070466, "learning_rate": 8.820887696328831e-07, "loss": 0.3666, "step": 14179 }, { "epoch": 0.24648438179005372, "grad_norm": 1.6843335342769226, "learning_rate": 8.820706125373934e-07, "loss": 0.4056, "step": 14180 }, { "epoch": 0.24650176432755655, "grad_norm": 2.306840356892627, "learning_rate": 8.820524542309154e-07, "loss": 0.4647, "step": 14181 }, { "epoch": 0.24651914686505935, "grad_norm": 2.8325046065701334, "learning_rate": 8.820342947135064e-07, "loss": 0.5039, "step": 14182 }, { "epoch": 0.24653652940256218, "grad_norm": 2.5013851355331718, "learning_rate": 8.820161339852243e-07, "loss": 0.3258, "step": 14183 }, { "epoch": 0.246553911940065, "grad_norm": 1.2037503884103178, "learning_rate": 8.819979720461266e-07, "loss": 0.3173, "step": 14184 }, { "epoch": 0.24657129447756784, "grad_norm": 1.6246072687626656, "learning_rate": 8.819798088962709e-07, "loss": 0.3769, "step": 14185 }, { "epoch": 0.24658867701507067, "grad_norm": 2.4985033964872687, "learning_rate": 8.819616445357145e-07, "loss": 0.6105, "step": 14186 }, { "epoch": 0.24660605955257348, "grad_norm": 1.8932800341712992, "learning_rate": 8.819434789645152e-07, "loss": 0.2272, "step": 14187 }, { "epoch": 0.2466234420900763, "grad_norm": 1.3856527692717524, "learning_rate": 8.819253121827305e-07, "loss": 0.226, "step": 14188 }, { "epoch": 0.24664082462757914, "grad_norm": 1.676306383410798, "learning_rate": 8.81907144190418e-07, "loss": 0.4355, "step": 14189 }, { "epoch": 0.24665820716508197, "grad_norm": 1.8433911784115604, "learning_rate": 8.818889749876352e-07, "loss": 0.1773, "step": 14190 }, { "epoch": 0.2466755897025848, "grad_norm": 1.281937586346634, "learning_rate": 8.8187080457444e-07, "loss": 0.2616, "step": 14191 }, { "epoch": 0.2466929722400876, "grad_norm": 1.4456482768241814, "learning_rate": 8.818526329508896e-07, "loss": 0.3315, "step": 14192 }, { "epoch": 0.24671035477759043, "grad_norm": 2.138194246949519, "learning_rate": 8.818344601170418e-07, "loss": 0.4415, "step": 14193 }, { "epoch": 0.24672773731509326, "grad_norm": 1.7686214040915202, "learning_rate": 8.818162860729541e-07, "loss": 0.272, "step": 14194 }, { "epoch": 0.2467451198525961, "grad_norm": 1.7335342560632834, "learning_rate": 8.817981108186842e-07, "loss": 0.3183, "step": 14195 }, { "epoch": 0.24676250239009892, "grad_norm": 1.3494421813495536, "learning_rate": 8.817799343542898e-07, "loss": 0.2192, "step": 14196 }, { "epoch": 0.24677988492760172, "grad_norm": 3.061379238850152, "learning_rate": 8.817617566798284e-07, "loss": 0.3701, "step": 14197 }, { "epoch": 0.24679726746510455, "grad_norm": 1.2109007917599421, "learning_rate": 8.817435777953573e-07, "loss": 0.2044, "step": 14198 }, { "epoch": 0.24681465000260738, "grad_norm": 2.142148467968133, "learning_rate": 8.817253977009347e-07, "loss": 0.3097, "step": 14199 }, { "epoch": 0.2468320325401102, "grad_norm": 1.97954159928267, "learning_rate": 8.817072163966178e-07, "loss": 0.3272, "step": 14200 }, { "epoch": 0.24684941507761304, "grad_norm": 1.837764387019083, "learning_rate": 8.816890338824644e-07, "loss": 0.1917, "step": 14201 }, { "epoch": 0.24686679761511585, "grad_norm": 1.6810962143920805, "learning_rate": 8.816708501585322e-07, "loss": 0.2347, "step": 14202 }, { "epoch": 0.24688418015261868, "grad_norm": 1.5197617552962097, "learning_rate": 8.816526652248786e-07, "loss": 0.4481, "step": 14203 }, { "epoch": 0.2469015626901215, "grad_norm": 1.9043122804483574, "learning_rate": 8.816344790815613e-07, "loss": 0.246, "step": 14204 }, { "epoch": 0.24691894522762434, "grad_norm": 3.050978392258156, "learning_rate": 8.816162917286381e-07, "loss": 0.2926, "step": 14205 }, { "epoch": 0.24693632776512717, "grad_norm": 3.328074540642266, "learning_rate": 8.815981031661665e-07, "loss": 0.4169, "step": 14206 }, { "epoch": 0.24695371030262997, "grad_norm": 2.907539314079655, "learning_rate": 8.815799133942043e-07, "loss": 0.2934, "step": 14207 }, { "epoch": 0.2469710928401328, "grad_norm": 1.6133040785089783, "learning_rate": 8.815617224128091e-07, "loss": 0.2168, "step": 14208 }, { "epoch": 0.24698847537763563, "grad_norm": 1.7485089217757792, "learning_rate": 8.815435302220384e-07, "loss": 0.241, "step": 14209 }, { "epoch": 0.24700585791513846, "grad_norm": 1.8341360498600567, "learning_rate": 8.8152533682195e-07, "loss": 0.3411, "step": 14210 }, { "epoch": 0.24702324045264126, "grad_norm": 3.511406907262538, "learning_rate": 8.815071422126016e-07, "loss": 0.5041, "step": 14211 }, { "epoch": 0.2470406229901441, "grad_norm": 10.41036913816244, "learning_rate": 8.814889463940508e-07, "loss": 0.3493, "step": 14212 }, { "epoch": 0.24705800552764692, "grad_norm": 3.361943205769137, "learning_rate": 8.814707493663552e-07, "loss": 0.3615, "step": 14213 }, { "epoch": 0.24707538806514975, "grad_norm": 1.4681599400586975, "learning_rate": 8.814525511295726e-07, "loss": 0.1852, "step": 14214 }, { "epoch": 0.24709277060265258, "grad_norm": 4.376583322677148, "learning_rate": 8.814343516837607e-07, "loss": 0.4342, "step": 14215 }, { "epoch": 0.2471101531401554, "grad_norm": 1.4399363851345959, "learning_rate": 8.814161510289771e-07, "loss": 0.195, "step": 14216 }, { "epoch": 0.24712753567765822, "grad_norm": 1.9293246162651565, "learning_rate": 8.813979491652794e-07, "loss": 0.3501, "step": 14217 }, { "epoch": 0.24714491821516105, "grad_norm": 1.2208326073584346, "learning_rate": 8.813797460927255e-07, "loss": 0.2998, "step": 14218 }, { "epoch": 0.24716230075266388, "grad_norm": 1.794857343049225, "learning_rate": 8.81361541811373e-07, "loss": 0.3471, "step": 14219 }, { "epoch": 0.2471796832901667, "grad_norm": 2.471209252779605, "learning_rate": 8.813433363212796e-07, "loss": 0.2844, "step": 14220 }, { "epoch": 0.2471970658276695, "grad_norm": 1.9276124107918966, "learning_rate": 8.81325129622503e-07, "loss": 0.3771, "step": 14221 }, { "epoch": 0.24721444836517234, "grad_norm": 2.3074160687094767, "learning_rate": 8.813069217151008e-07, "loss": 0.3408, "step": 14222 }, { "epoch": 0.24723183090267517, "grad_norm": 2.0447163810569937, "learning_rate": 8.812887125991308e-07, "loss": 0.2003, "step": 14223 }, { "epoch": 0.247249213440178, "grad_norm": 2.0808545617479437, "learning_rate": 8.812705022746509e-07, "loss": 0.3996, "step": 14224 }, { "epoch": 0.24726659597768083, "grad_norm": 2.238204653530708, "learning_rate": 8.812522907417186e-07, "loss": 0.3179, "step": 14225 }, { "epoch": 0.24728397851518363, "grad_norm": 1.8522884509467714, "learning_rate": 8.812340780003916e-07, "loss": 0.2189, "step": 14226 }, { "epoch": 0.24730136105268646, "grad_norm": 3.2425689306792433, "learning_rate": 8.812158640507278e-07, "loss": 0.3268, "step": 14227 }, { "epoch": 0.2473187435901893, "grad_norm": 1.2079858878168401, "learning_rate": 8.811976488927848e-07, "loss": 0.1828, "step": 14228 }, { "epoch": 0.24733612612769212, "grad_norm": 1.210362964023777, "learning_rate": 8.811794325266201e-07, "loss": 0.3032, "step": 14229 }, { "epoch": 0.24735350866519495, "grad_norm": 2.994255217688958, "learning_rate": 8.81161214952292e-07, "loss": 0.3373, "step": 14230 }, { "epoch": 0.24737089120269776, "grad_norm": 2.9719731379985506, "learning_rate": 8.81142996169858e-07, "loss": 0.39, "step": 14231 }, { "epoch": 0.2473882737402006, "grad_norm": 1.4482790696876193, "learning_rate": 8.811247761793756e-07, "loss": 0.237, "step": 14232 }, { "epoch": 0.24740565627770342, "grad_norm": 2.258902141943631, "learning_rate": 8.811065549809028e-07, "loss": 0.3852, "step": 14233 }, { "epoch": 0.24742303881520625, "grad_norm": 4.021733814343713, "learning_rate": 8.810883325744973e-07, "loss": 0.4446, "step": 14234 }, { "epoch": 0.24744042135270908, "grad_norm": 5.180229164457368, "learning_rate": 8.810701089602168e-07, "loss": 0.3434, "step": 14235 }, { "epoch": 0.24745780389021188, "grad_norm": 3.5224996093426695, "learning_rate": 8.810518841381192e-07, "loss": 0.4262, "step": 14236 }, { "epoch": 0.2474751864277147, "grad_norm": 2.6615120430054033, "learning_rate": 8.810336581082621e-07, "loss": 0.2737, "step": 14237 }, { "epoch": 0.24749256896521754, "grad_norm": 1.504721487728658, "learning_rate": 8.810154308707032e-07, "loss": 0.2805, "step": 14238 }, { "epoch": 0.24750995150272037, "grad_norm": 1.2327832459726822, "learning_rate": 8.809972024255006e-07, "loss": 0.2441, "step": 14239 }, { "epoch": 0.2475273340402232, "grad_norm": 1.7938980494618417, "learning_rate": 8.809789727727117e-07, "loss": 0.3056, "step": 14240 }, { "epoch": 0.247544716577726, "grad_norm": 1.8337077120994216, "learning_rate": 8.809607419123946e-07, "loss": 0.3043, "step": 14241 }, { "epoch": 0.24756209911522883, "grad_norm": 2.7853161762991916, "learning_rate": 8.809425098446071e-07, "loss": 0.4489, "step": 14242 }, { "epoch": 0.24757948165273166, "grad_norm": 1.6690075517227225, "learning_rate": 8.809242765694064e-07, "loss": 0.2291, "step": 14243 }, { "epoch": 0.2475968641902345, "grad_norm": 2.231764648355886, "learning_rate": 8.809060420868511e-07, "loss": 0.3667, "step": 14244 }, { "epoch": 0.24761424672773732, "grad_norm": 2.5323625818527162, "learning_rate": 8.808878063969985e-07, "loss": 0.3681, "step": 14245 }, { "epoch": 0.24763162926524013, "grad_norm": 2.2872903652762235, "learning_rate": 8.808695694999066e-07, "loss": 0.4091, "step": 14246 }, { "epoch": 0.24764901180274296, "grad_norm": 1.585646009382732, "learning_rate": 8.80851331395633e-07, "loss": 0.3553, "step": 14247 }, { "epoch": 0.2476663943402458, "grad_norm": 2.3731677324367593, "learning_rate": 8.808330920842357e-07, "loss": 0.2456, "step": 14248 }, { "epoch": 0.24768377687774862, "grad_norm": 2.369717773199859, "learning_rate": 8.808148515657724e-07, "loss": 0.3693, "step": 14249 }, { "epoch": 0.24770115941525145, "grad_norm": 2.462769000006347, "learning_rate": 8.80796609840301e-07, "loss": 0.3373, "step": 14250 }, { "epoch": 0.24771854195275425, "grad_norm": 1.4226558608909794, "learning_rate": 8.807783669078792e-07, "loss": 0.3677, "step": 14251 }, { "epoch": 0.24773592449025708, "grad_norm": 1.4726328720423834, "learning_rate": 8.80760122768565e-07, "loss": 0.3545, "step": 14252 }, { "epoch": 0.2477533070277599, "grad_norm": 1.164857151517162, "learning_rate": 8.80741877422416e-07, "loss": 0.1902, "step": 14253 }, { "epoch": 0.24777068956526274, "grad_norm": 1.5715611932964704, "learning_rate": 8.807236308694901e-07, "loss": 0.1691, "step": 14254 }, { "epoch": 0.24778807210276557, "grad_norm": 1.6707824653930925, "learning_rate": 8.807053831098454e-07, "loss": 0.4039, "step": 14255 }, { "epoch": 0.24780545464026837, "grad_norm": 1.9771101554745354, "learning_rate": 8.806871341435394e-07, "loss": 0.2731, "step": 14256 }, { "epoch": 0.2478228371777712, "grad_norm": 2.1457853784475627, "learning_rate": 8.8066888397063e-07, "loss": 0.3219, "step": 14257 }, { "epoch": 0.24784021971527403, "grad_norm": 1.7181577909449675, "learning_rate": 8.806506325911752e-07, "loss": 0.3964, "step": 14258 }, { "epoch": 0.24785760225277687, "grad_norm": 2.0472327142692937, "learning_rate": 8.806323800052327e-07, "loss": 0.2971, "step": 14259 }, { "epoch": 0.2478749847902797, "grad_norm": 1.6299446952995729, "learning_rate": 8.806141262128603e-07, "loss": 0.3419, "step": 14260 }, { "epoch": 0.2478923673277825, "grad_norm": 1.6431585365485397, "learning_rate": 8.805958712141162e-07, "loss": 0.2913, "step": 14261 }, { "epoch": 0.24790974986528533, "grad_norm": 2.0370082390720663, "learning_rate": 8.805776150090578e-07, "loss": 0.3757, "step": 14262 }, { "epoch": 0.24792713240278816, "grad_norm": 1.9058284067131943, "learning_rate": 8.805593575977433e-07, "loss": 0.3173, "step": 14263 }, { "epoch": 0.247944514940291, "grad_norm": 2.8043112025275585, "learning_rate": 8.805410989802304e-07, "loss": 0.3049, "step": 14264 }, { "epoch": 0.24796189747779382, "grad_norm": 1.5240520242476252, "learning_rate": 8.805228391565771e-07, "loss": 0.2671, "step": 14265 }, { "epoch": 0.24797928001529662, "grad_norm": 2.2144112965906193, "learning_rate": 8.805045781268411e-07, "loss": 0.5241, "step": 14266 }, { "epoch": 0.24799666255279945, "grad_norm": 1.2884660031617767, "learning_rate": 8.804863158910805e-07, "loss": 0.3714, "step": 14267 }, { "epoch": 0.24801404509030228, "grad_norm": 1.616511347405332, "learning_rate": 8.804680524493528e-07, "loss": 0.2334, "step": 14268 }, { "epoch": 0.2480314276278051, "grad_norm": 1.5268834511257192, "learning_rate": 8.804497878017164e-07, "loss": 0.3296, "step": 14269 }, { "epoch": 0.24804881016530794, "grad_norm": 2.2044509735024516, "learning_rate": 8.804315219482289e-07, "loss": 0.2461, "step": 14270 }, { "epoch": 0.24806619270281074, "grad_norm": 1.4783316840290466, "learning_rate": 8.804132548889482e-07, "loss": 0.2991, "step": 14271 }, { "epoch": 0.24808357524031358, "grad_norm": 1.9873513654325063, "learning_rate": 8.803949866239322e-07, "loss": 0.2797, "step": 14272 }, { "epoch": 0.2481009577778164, "grad_norm": 3.7339906246401657, "learning_rate": 8.803767171532388e-07, "loss": 0.3414, "step": 14273 }, { "epoch": 0.24811834031531924, "grad_norm": 1.6813424967957, "learning_rate": 8.80358446476926e-07, "loss": 0.2973, "step": 14274 }, { "epoch": 0.24813572285282207, "grad_norm": 1.599234839495625, "learning_rate": 8.803401745950518e-07, "loss": 0.3891, "step": 14275 }, { "epoch": 0.24815310539032487, "grad_norm": 1.9880637465922648, "learning_rate": 8.803219015076737e-07, "loss": 0.3021, "step": 14276 }, { "epoch": 0.2481704879278277, "grad_norm": 1.2942425694530406, "learning_rate": 8.8030362721485e-07, "loss": 0.3206, "step": 14277 }, { "epoch": 0.24818787046533053, "grad_norm": 3.4905151445328517, "learning_rate": 8.802853517166384e-07, "loss": 0.4361, "step": 14278 }, { "epoch": 0.24820525300283336, "grad_norm": 1.5670719144646197, "learning_rate": 8.802670750130971e-07, "loss": 0.1792, "step": 14279 }, { "epoch": 0.2482226355403362, "grad_norm": 1.8992778026880552, "learning_rate": 8.802487971042837e-07, "loss": 0.3491, "step": 14280 }, { "epoch": 0.248240018077839, "grad_norm": 2.05932442929012, "learning_rate": 8.802305179902564e-07, "loss": 0.2906, "step": 14281 }, { "epoch": 0.24825740061534182, "grad_norm": 2.954486716133874, "learning_rate": 8.802122376710728e-07, "loss": 0.3202, "step": 14282 }, { "epoch": 0.24827478315284465, "grad_norm": 1.3669689253775845, "learning_rate": 8.801939561467913e-07, "loss": 0.2439, "step": 14283 }, { "epoch": 0.24829216569034748, "grad_norm": 1.2915484274937523, "learning_rate": 8.801756734174694e-07, "loss": 0.312, "step": 14284 }, { "epoch": 0.2483095482278503, "grad_norm": 1.6054578329564186, "learning_rate": 8.801573894831656e-07, "loss": 0.4511, "step": 14285 }, { "epoch": 0.24832693076535312, "grad_norm": 1.8016276007731375, "learning_rate": 8.801391043439372e-07, "loss": 0.4031, "step": 14286 }, { "epoch": 0.24834431330285595, "grad_norm": 2.117846356534343, "learning_rate": 8.801208179998425e-07, "loss": 0.363, "step": 14287 }, { "epoch": 0.24836169584035878, "grad_norm": 2.4376873119378626, "learning_rate": 8.801025304509395e-07, "loss": 0.4315, "step": 14288 }, { "epoch": 0.2483790783778616, "grad_norm": 2.0163712682512913, "learning_rate": 8.800842416972861e-07, "loss": 0.313, "step": 14289 }, { "epoch": 0.24839646091536444, "grad_norm": 1.7472101176987507, "learning_rate": 8.800659517389402e-07, "loss": 0.4086, "step": 14290 }, { "epoch": 0.24841384345286724, "grad_norm": 2.290833995700077, "learning_rate": 8.800476605759598e-07, "loss": 0.2459, "step": 14291 }, { "epoch": 0.24843122599037007, "grad_norm": 1.070807483179876, "learning_rate": 8.800293682084029e-07, "loss": 0.2319, "step": 14292 }, { "epoch": 0.2484486085278729, "grad_norm": 4.281632297559644, "learning_rate": 8.800110746363276e-07, "loss": 0.3279, "step": 14293 }, { "epoch": 0.24846599106537573, "grad_norm": 1.562624690876912, "learning_rate": 8.799927798597915e-07, "loss": 0.2328, "step": 14294 }, { "epoch": 0.24848337360287856, "grad_norm": 1.4677806636958286, "learning_rate": 8.79974483878853e-07, "loss": 0.3239, "step": 14295 }, { "epoch": 0.24850075614038136, "grad_norm": 1.5232935514629546, "learning_rate": 8.7995618669357e-07, "loss": 0.1992, "step": 14296 }, { "epoch": 0.2485181386778842, "grad_norm": 2.8369229096664115, "learning_rate": 8.799378883040003e-07, "loss": 0.4377, "step": 14297 }, { "epoch": 0.24853552121538702, "grad_norm": 1.974422843443956, "learning_rate": 8.799195887102021e-07, "loss": 0.3281, "step": 14298 }, { "epoch": 0.24855290375288985, "grad_norm": 2.1581701015804677, "learning_rate": 8.799012879122332e-07, "loss": 0.2617, "step": 14299 }, { "epoch": 0.24857028629039268, "grad_norm": 2.3902542554802033, "learning_rate": 8.79882985910152e-07, "loss": 0.5964, "step": 14300 }, { "epoch": 0.24858766882789549, "grad_norm": 2.0335710097931434, "learning_rate": 8.798646827040159e-07, "loss": 0.3594, "step": 14301 }, { "epoch": 0.24860505136539832, "grad_norm": 1.9463254695446925, "learning_rate": 8.798463782938834e-07, "loss": 0.3511, "step": 14302 }, { "epoch": 0.24862243390290115, "grad_norm": 2.5798821333579305, "learning_rate": 8.798280726798126e-07, "loss": 0.3842, "step": 14303 }, { "epoch": 0.24863981644040398, "grad_norm": 1.4792062318876265, "learning_rate": 8.79809765861861e-07, "loss": 0.2711, "step": 14304 }, { "epoch": 0.2486571989779068, "grad_norm": 1.5461549753527712, "learning_rate": 8.797914578400869e-07, "loss": 0.3726, "step": 14305 }, { "epoch": 0.2486745815154096, "grad_norm": 1.7082052138848511, "learning_rate": 8.797731486145486e-07, "loss": 0.4012, "step": 14306 }, { "epoch": 0.24869196405291244, "grad_norm": 1.9607731579824164, "learning_rate": 8.797548381853035e-07, "loss": 0.2948, "step": 14307 }, { "epoch": 0.24870934659041527, "grad_norm": 3.6557019242716695, "learning_rate": 8.797365265524103e-07, "loss": 0.2779, "step": 14308 }, { "epoch": 0.2487267291279181, "grad_norm": 2.3350100091668136, "learning_rate": 8.797182137159267e-07, "loss": 0.4235, "step": 14309 }, { "epoch": 0.24874411166542093, "grad_norm": 2.1533116981366507, "learning_rate": 8.796998996759107e-07, "loss": 0.3671, "step": 14310 }, { "epoch": 0.24876149420292373, "grad_norm": 1.5675930837920076, "learning_rate": 8.796815844324204e-07, "loss": 0.3713, "step": 14311 }, { "epoch": 0.24877887674042656, "grad_norm": 2.2085043299054132, "learning_rate": 8.79663267985514e-07, "loss": 0.5008, "step": 14312 }, { "epoch": 0.2487962592779294, "grad_norm": 3.761226002677616, "learning_rate": 8.796449503352494e-07, "loss": 0.4917, "step": 14313 }, { "epoch": 0.24881364181543222, "grad_norm": 2.3015705478566226, "learning_rate": 8.796266314816847e-07, "loss": 0.3989, "step": 14314 }, { "epoch": 0.24883102435293505, "grad_norm": 1.855250062757222, "learning_rate": 8.79608311424878e-07, "loss": 0.1813, "step": 14315 }, { "epoch": 0.24884840689043786, "grad_norm": 2.0988999645458715, "learning_rate": 8.795899901648874e-07, "loss": 0.2394, "step": 14316 }, { "epoch": 0.2488657894279407, "grad_norm": 1.374168625142059, "learning_rate": 8.795716677017709e-07, "loss": 0.3388, "step": 14317 }, { "epoch": 0.24888317196544352, "grad_norm": 1.6116395940762442, "learning_rate": 8.795533440355864e-07, "loss": 0.2325, "step": 14318 }, { "epoch": 0.24890055450294635, "grad_norm": 2.4573660188701743, "learning_rate": 8.795350191663922e-07, "loss": 0.4068, "step": 14319 }, { "epoch": 0.24891793704044918, "grad_norm": 1.4534675934807977, "learning_rate": 8.795166930942464e-07, "loss": 0.3027, "step": 14320 }, { "epoch": 0.24893531957795198, "grad_norm": 1.9104573808138001, "learning_rate": 8.79498365819207e-07, "loss": 0.1558, "step": 14321 }, { "epoch": 0.2489527021154548, "grad_norm": 1.7247015624658975, "learning_rate": 8.794800373413321e-07, "loss": 0.368, "step": 14322 }, { "epoch": 0.24897008465295764, "grad_norm": 1.641869999982252, "learning_rate": 8.794617076606798e-07, "loss": 0.3769, "step": 14323 }, { "epoch": 0.24898746719046047, "grad_norm": 1.8268469375501857, "learning_rate": 8.794433767773082e-07, "loss": 0.4517, "step": 14324 }, { "epoch": 0.2490048497279633, "grad_norm": 1.9886907887597312, "learning_rate": 8.794250446912753e-07, "loss": 0.4001, "step": 14325 }, { "epoch": 0.2490222322654661, "grad_norm": 1.4294921238925709, "learning_rate": 8.794067114026393e-07, "loss": 0.176, "step": 14326 }, { "epoch": 0.24903961480296893, "grad_norm": 1.4557581627574943, "learning_rate": 8.793883769114585e-07, "loss": 0.2857, "step": 14327 }, { "epoch": 0.24905699734047176, "grad_norm": 1.675384624843593, "learning_rate": 8.793700412177908e-07, "loss": 0.3068, "step": 14328 }, { "epoch": 0.2490743798779746, "grad_norm": 1.6279621538229032, "learning_rate": 8.793517043216943e-07, "loss": 0.2418, "step": 14329 }, { "epoch": 0.24909176241547742, "grad_norm": 1.9171115137351546, "learning_rate": 8.793333662232271e-07, "loss": 0.2165, "step": 14330 }, { "epoch": 0.24910914495298023, "grad_norm": 1.2522677820586507, "learning_rate": 8.793150269224473e-07, "loss": 0.3039, "step": 14331 }, { "epoch": 0.24912652749048306, "grad_norm": 1.461103323655472, "learning_rate": 8.792966864194133e-07, "loss": 0.2746, "step": 14332 }, { "epoch": 0.2491439100279859, "grad_norm": 3.114505012371192, "learning_rate": 8.792783447141829e-07, "loss": 0.3751, "step": 14333 }, { "epoch": 0.24916129256548872, "grad_norm": 2.2682593815623493, "learning_rate": 8.792600018068144e-07, "loss": 0.2868, "step": 14334 }, { "epoch": 0.24917867510299155, "grad_norm": 2.099296703289276, "learning_rate": 8.79241657697366e-07, "loss": 0.4149, "step": 14335 }, { "epoch": 0.24919605764049435, "grad_norm": 1.0131192756558758, "learning_rate": 8.792233123858957e-07, "loss": 0.2769, "step": 14336 }, { "epoch": 0.24921344017799718, "grad_norm": 1.4373635234788147, "learning_rate": 8.792049658724616e-07, "loss": 0.264, "step": 14337 }, { "epoch": 0.2492308227155, "grad_norm": 2.0196124455468514, "learning_rate": 8.791866181571221e-07, "loss": 0.3127, "step": 14338 }, { "epoch": 0.24924820525300284, "grad_norm": 1.4231016394173706, "learning_rate": 8.791682692399352e-07, "loss": 0.2514, "step": 14339 }, { "epoch": 0.24926558779050567, "grad_norm": 2.274307994238357, "learning_rate": 8.79149919120959e-07, "loss": 0.3576, "step": 14340 }, { "epoch": 0.24928297032800847, "grad_norm": 1.5032090784910246, "learning_rate": 8.791315678002517e-07, "loss": 0.2465, "step": 14341 }, { "epoch": 0.2493003528655113, "grad_norm": 2.1288845901221043, "learning_rate": 8.791132152778715e-07, "loss": 0.2595, "step": 14342 }, { "epoch": 0.24931773540301413, "grad_norm": 2.937505866115514, "learning_rate": 8.790948615538765e-07, "loss": 0.3041, "step": 14343 }, { "epoch": 0.24933511794051696, "grad_norm": 1.8191495450312467, "learning_rate": 8.79076506628325e-07, "loss": 0.2133, "step": 14344 }, { "epoch": 0.2493525004780198, "grad_norm": 2.131967466285304, "learning_rate": 8.79058150501275e-07, "loss": 0.3771, "step": 14345 }, { "epoch": 0.2493698830155226, "grad_norm": 1.639497327873702, "learning_rate": 8.790397931727851e-07, "loss": 0.3295, "step": 14346 }, { "epoch": 0.24938726555302543, "grad_norm": 2.0212216646499614, "learning_rate": 8.79021434642913e-07, "loss": 0.2823, "step": 14347 }, { "epoch": 0.24940464809052826, "grad_norm": 1.3759827375582898, "learning_rate": 8.790030749117169e-07, "loss": 0.248, "step": 14348 }, { "epoch": 0.2494220306280311, "grad_norm": 1.516879107980696, "learning_rate": 8.789847139792554e-07, "loss": 0.2752, "step": 14349 }, { "epoch": 0.2494394131655339, "grad_norm": 2.688255088438558, "learning_rate": 8.789663518455863e-07, "loss": 0.2368, "step": 14350 }, { "epoch": 0.24945679570303672, "grad_norm": 2.190053141858686, "learning_rate": 8.78947988510768e-07, "loss": 0.4698, "step": 14351 }, { "epoch": 0.24947417824053955, "grad_norm": 1.9971809807008731, "learning_rate": 8.789296239748587e-07, "loss": 0.3932, "step": 14352 }, { "epoch": 0.24949156077804238, "grad_norm": 2.4238559895603564, "learning_rate": 8.789112582379163e-07, "loss": 0.5615, "step": 14353 }, { "epoch": 0.2495089433155452, "grad_norm": 1.3472638219990054, "learning_rate": 8.788928912999996e-07, "loss": 0.3479, "step": 14354 }, { "epoch": 0.24952632585304801, "grad_norm": 3.020662893046402, "learning_rate": 8.788745231611663e-07, "loss": 0.3891, "step": 14355 }, { "epoch": 0.24954370839055084, "grad_norm": 2.3610638201396394, "learning_rate": 8.788561538214749e-07, "loss": 0.3454, "step": 14356 }, { "epoch": 0.24956109092805367, "grad_norm": 2.2282569440808295, "learning_rate": 8.788377832809835e-07, "loss": 0.4776, "step": 14357 }, { "epoch": 0.2495784734655565, "grad_norm": 1.2704834463038195, "learning_rate": 8.788194115397502e-07, "loss": 0.3443, "step": 14358 }, { "epoch": 0.24959585600305934, "grad_norm": 2.7116471779303915, "learning_rate": 8.788010385978336e-07, "loss": 0.3106, "step": 14359 }, { "epoch": 0.24961323854056214, "grad_norm": 1.4228651756472823, "learning_rate": 8.787826644552916e-07, "loss": 0.2997, "step": 14360 }, { "epoch": 0.24963062107806497, "grad_norm": 2.94236014344725, "learning_rate": 8.787642891121826e-07, "loss": 0.4593, "step": 14361 }, { "epoch": 0.2496480036155678, "grad_norm": 1.1749897693053954, "learning_rate": 8.787459125685648e-07, "loss": 0.2514, "step": 14362 }, { "epoch": 0.24966538615307063, "grad_norm": 1.6352622562854144, "learning_rate": 8.787275348244963e-07, "loss": 0.337, "step": 14363 }, { "epoch": 0.24968276869057346, "grad_norm": 1.7345359416047132, "learning_rate": 8.787091558800357e-07, "loss": 0.492, "step": 14364 }, { "epoch": 0.24970015122807626, "grad_norm": 1.9185351916277706, "learning_rate": 8.786907757352408e-07, "loss": 0.2952, "step": 14365 }, { "epoch": 0.2497175337655791, "grad_norm": 2.7728477896729253, "learning_rate": 8.786723943901703e-07, "loss": 0.2604, "step": 14366 }, { "epoch": 0.24973491630308192, "grad_norm": 1.9981271893649213, "learning_rate": 8.786540118448822e-07, "loss": 0.1819, "step": 14367 }, { "epoch": 0.24975229884058475, "grad_norm": 1.8125786757702285, "learning_rate": 8.786356280994348e-07, "loss": 0.1811, "step": 14368 }, { "epoch": 0.24976968137808758, "grad_norm": 1.6031902280265895, "learning_rate": 8.786172431538864e-07, "loss": 0.3194, "step": 14369 }, { "epoch": 0.24978706391559038, "grad_norm": 1.669655605405129, "learning_rate": 8.785988570082952e-07, "loss": 0.1671, "step": 14370 }, { "epoch": 0.24980444645309322, "grad_norm": 4.131444486110488, "learning_rate": 8.785804696627197e-07, "loss": 0.3813, "step": 14371 }, { "epoch": 0.24982182899059605, "grad_norm": 1.8737432347617013, "learning_rate": 8.785620811172179e-07, "loss": 0.2989, "step": 14372 }, { "epoch": 0.24983921152809888, "grad_norm": 2.5661968777102597, "learning_rate": 8.785436913718482e-07, "loss": 0.288, "step": 14373 }, { "epoch": 0.2498565940656017, "grad_norm": 1.804464509017353, "learning_rate": 8.785253004266688e-07, "loss": 0.3753, "step": 14374 }, { "epoch": 0.2498739766031045, "grad_norm": 2.6865025098692863, "learning_rate": 8.785069082817382e-07, "loss": 0.3727, "step": 14375 }, { "epoch": 0.24989135914060734, "grad_norm": 1.9106075080458087, "learning_rate": 8.784885149371147e-07, "loss": 0.5485, "step": 14376 }, { "epoch": 0.24990874167811017, "grad_norm": 1.6940724274325996, "learning_rate": 8.784701203928562e-07, "loss": 0.3825, "step": 14377 }, { "epoch": 0.249926124215613, "grad_norm": 1.4933521387509878, "learning_rate": 8.784517246490214e-07, "loss": 0.2915, "step": 14378 }, { "epoch": 0.24994350675311583, "grad_norm": 1.5556033782774124, "learning_rate": 8.784333277056686e-07, "loss": 0.2546, "step": 14379 }, { "epoch": 0.24996088929061863, "grad_norm": 2.0731266739215277, "learning_rate": 8.784149295628558e-07, "loss": 0.3218, "step": 14380 }, { "epoch": 0.24997827182812146, "grad_norm": 2.372892371504462, "learning_rate": 8.783965302206415e-07, "loss": 0.3244, "step": 14381 }, { "epoch": 0.2499956543656243, "grad_norm": 1.4186602405142532, "learning_rate": 8.783781296790842e-07, "loss": 0.2813, "step": 14382 }, { "epoch": 0.2500130369031271, "grad_norm": 1.092223917533515, "learning_rate": 8.783597279382418e-07, "loss": 0.3667, "step": 14383 }, { "epoch": 0.2500304194406299, "grad_norm": 2.8419861353872715, "learning_rate": 8.783413249981731e-07, "loss": 0.2905, "step": 14384 }, { "epoch": 0.25004780197813276, "grad_norm": 1.719677205772087, "learning_rate": 8.78322920858936e-07, "loss": 0.1645, "step": 14385 }, { "epoch": 0.2500651845156356, "grad_norm": 3.334695839457956, "learning_rate": 8.783045155205892e-07, "loss": 0.2075, "step": 14386 }, { "epoch": 0.2500825670531384, "grad_norm": 2.0237457771627634, "learning_rate": 8.782861089831908e-07, "loss": 0.2966, "step": 14387 }, { "epoch": 0.25009994959064125, "grad_norm": 1.6327105696619728, "learning_rate": 8.782677012467992e-07, "loss": 0.2807, "step": 14388 }, { "epoch": 0.2501173321281441, "grad_norm": 1.32524450724129, "learning_rate": 8.782492923114727e-07, "loss": 0.2246, "step": 14389 }, { "epoch": 0.2501347146656469, "grad_norm": 3.6001534331410046, "learning_rate": 8.782308821772697e-07, "loss": 0.3941, "step": 14390 }, { "epoch": 0.25015209720314974, "grad_norm": 2.4059297487466824, "learning_rate": 8.782124708442487e-07, "loss": 0.3628, "step": 14391 }, { "epoch": 0.25016947974065257, "grad_norm": 2.238620571614036, "learning_rate": 8.781940583124678e-07, "loss": 0.2659, "step": 14392 }, { "epoch": 0.25018686227815534, "grad_norm": 1.5185040659595326, "learning_rate": 8.781756445819855e-07, "loss": 0.1639, "step": 14393 }, { "epoch": 0.25020424481565817, "grad_norm": 2.092279319595569, "learning_rate": 8.781572296528598e-07, "loss": 0.6173, "step": 14394 }, { "epoch": 0.250221627353161, "grad_norm": 1.9035368865793334, "learning_rate": 8.781388135251497e-07, "loss": 0.1888, "step": 14395 }, { "epoch": 0.25023900989066383, "grad_norm": 2.4204921719768304, "learning_rate": 8.781203961989134e-07, "loss": 0.2604, "step": 14396 }, { "epoch": 0.25025639242816666, "grad_norm": 1.7160490895612859, "learning_rate": 8.781019776742089e-07, "loss": 0.2033, "step": 14397 }, { "epoch": 0.2502737749656695, "grad_norm": 1.4859359503316776, "learning_rate": 8.780835579510949e-07, "loss": 0.3446, "step": 14398 }, { "epoch": 0.2502911575031723, "grad_norm": 1.3126086700628101, "learning_rate": 8.780651370296297e-07, "loss": 0.2533, "step": 14399 }, { "epoch": 0.25030854004067515, "grad_norm": 2.3360934627049676, "learning_rate": 8.780467149098716e-07, "loss": 0.268, "step": 14400 }, { "epoch": 0.250325922578178, "grad_norm": 6.301490634452193, "learning_rate": 8.780282915918792e-07, "loss": 0.4427, "step": 14401 }, { "epoch": 0.2503433051156808, "grad_norm": 1.7955407834437935, "learning_rate": 8.780098670757107e-07, "loss": 0.365, "step": 14402 }, { "epoch": 0.2503606876531836, "grad_norm": 1.4163933965287048, "learning_rate": 8.779914413614247e-07, "loss": 0.2833, "step": 14403 }, { "epoch": 0.2503780701906864, "grad_norm": 1.8572333267109542, "learning_rate": 8.779730144490792e-07, "loss": 0.2667, "step": 14404 }, { "epoch": 0.25039545272818925, "grad_norm": 3.1927161847987757, "learning_rate": 8.779545863387329e-07, "loss": 0.4755, "step": 14405 }, { "epoch": 0.2504128352656921, "grad_norm": 1.9762306454069927, "learning_rate": 8.779361570304444e-07, "loss": 0.3436, "step": 14406 }, { "epoch": 0.2504302178031949, "grad_norm": 2.1656985319744093, "learning_rate": 8.779177265242719e-07, "loss": 0.3031, "step": 14407 }, { "epoch": 0.25044760034069774, "grad_norm": 1.8439373533567256, "learning_rate": 8.778992948202737e-07, "loss": 0.406, "step": 14408 }, { "epoch": 0.25046498287820057, "grad_norm": 1.9924069830619906, "learning_rate": 8.778808619185083e-07, "loss": 0.3601, "step": 14409 }, { "epoch": 0.2504823654157034, "grad_norm": 1.5201418493343488, "learning_rate": 8.778624278190341e-07, "loss": 0.2889, "step": 14410 }, { "epoch": 0.25049974795320623, "grad_norm": 1.5868350968811225, "learning_rate": 8.778439925219096e-07, "loss": 0.2118, "step": 14411 }, { "epoch": 0.25051713049070906, "grad_norm": 1.8246725601575897, "learning_rate": 8.778255560271934e-07, "loss": 0.3813, "step": 14412 }, { "epoch": 0.25053451302821184, "grad_norm": 1.7744963601110404, "learning_rate": 8.778071183349436e-07, "loss": 0.3159, "step": 14413 }, { "epoch": 0.25055189556571467, "grad_norm": 2.6580556111230966, "learning_rate": 8.777886794452187e-07, "loss": 0.3484, "step": 14414 }, { "epoch": 0.2505692781032175, "grad_norm": 2.0063022701921587, "learning_rate": 8.777702393580775e-07, "loss": 0.3112, "step": 14415 }, { "epoch": 0.2505866606407203, "grad_norm": 2.219878135969361, "learning_rate": 8.777517980735779e-07, "loss": 0.3802, "step": 14416 }, { "epoch": 0.25060404317822316, "grad_norm": 1.6258039926171022, "learning_rate": 8.777333555917788e-07, "loss": 0.2879, "step": 14417 }, { "epoch": 0.250621425715726, "grad_norm": 1.9317633063667028, "learning_rate": 8.777149119127384e-07, "loss": 0.4162, "step": 14418 }, { "epoch": 0.2506388082532288, "grad_norm": 1.4328506731808146, "learning_rate": 8.776964670365152e-07, "loss": 0.2215, "step": 14419 }, { "epoch": 0.25065619079073165, "grad_norm": 1.1005622542923532, "learning_rate": 8.776780209631678e-07, "loss": 0.2602, "step": 14420 }, { "epoch": 0.2506735733282345, "grad_norm": 1.1512815606293494, "learning_rate": 8.776595736927544e-07, "loss": 0.1709, "step": 14421 }, { "epoch": 0.25069095586573725, "grad_norm": 1.4541384208361523, "learning_rate": 8.776411252253337e-07, "loss": 0.3069, "step": 14422 }, { "epoch": 0.2507083384032401, "grad_norm": 2.1646619826292732, "learning_rate": 8.776226755609643e-07, "loss": 0.2385, "step": 14423 }, { "epoch": 0.2507257209407429, "grad_norm": 1.584932615646166, "learning_rate": 8.776042246997042e-07, "loss": 0.3879, "step": 14424 }, { "epoch": 0.25074310347824574, "grad_norm": 1.730263055041734, "learning_rate": 8.775857726416123e-07, "loss": 0.3692, "step": 14425 }, { "epoch": 0.2507604860157486, "grad_norm": 1.7182232384064449, "learning_rate": 8.77567319386747e-07, "loss": 0.499, "step": 14426 }, { "epoch": 0.2507778685532514, "grad_norm": 1.425686168385627, "learning_rate": 8.775488649351668e-07, "loss": 0.3401, "step": 14427 }, { "epoch": 0.25079525109075423, "grad_norm": 1.6659426465800355, "learning_rate": 8.775304092869299e-07, "loss": 0.3739, "step": 14428 }, { "epoch": 0.25081263362825706, "grad_norm": 1.6845788733212865, "learning_rate": 8.775119524420951e-07, "loss": 0.2911, "step": 14429 }, { "epoch": 0.2508300161657599, "grad_norm": 1.8036494835185644, "learning_rate": 8.774934944007209e-07, "loss": 0.1459, "step": 14430 }, { "epoch": 0.2508473987032627, "grad_norm": 2.263156786369792, "learning_rate": 8.774750351628657e-07, "loss": 0.4643, "step": 14431 }, { "epoch": 0.2508647812407655, "grad_norm": 1.8595648184305063, "learning_rate": 8.774565747285881e-07, "loss": 0.4628, "step": 14432 }, { "epoch": 0.25088216377826833, "grad_norm": 1.579136668010236, "learning_rate": 8.774381130979465e-07, "loss": 0.3048, "step": 14433 }, { "epoch": 0.25089954631577116, "grad_norm": 2.0262665091021987, "learning_rate": 8.774196502709994e-07, "loss": 0.2408, "step": 14434 }, { "epoch": 0.250916928853274, "grad_norm": 2.112968329977246, "learning_rate": 8.774011862478053e-07, "loss": 0.3462, "step": 14435 }, { "epoch": 0.2509343113907768, "grad_norm": 2.2083296282620544, "learning_rate": 8.77382721028423e-07, "loss": 0.2365, "step": 14436 }, { "epoch": 0.25095169392827965, "grad_norm": 1.7580874612999806, "learning_rate": 8.773642546129107e-07, "loss": 0.4685, "step": 14437 }, { "epoch": 0.2509690764657825, "grad_norm": 1.0977780885925685, "learning_rate": 8.773457870013272e-07, "loss": 0.278, "step": 14438 }, { "epoch": 0.2509864590032853, "grad_norm": 1.2144628919548321, "learning_rate": 8.773273181937309e-07, "loss": 0.3131, "step": 14439 }, { "epoch": 0.25100384154078814, "grad_norm": 1.5490996035308473, "learning_rate": 8.7730884819018e-07, "loss": 0.3647, "step": 14440 }, { "epoch": 0.25102122407829097, "grad_norm": 1.5685770807977855, "learning_rate": 8.772903769907338e-07, "loss": 0.2452, "step": 14441 }, { "epoch": 0.25103860661579375, "grad_norm": 2.8024673668513063, "learning_rate": 8.772719045954502e-07, "loss": 0.5362, "step": 14442 }, { "epoch": 0.2510559891532966, "grad_norm": 2.996102928380925, "learning_rate": 8.77253431004388e-07, "loss": 0.3413, "step": 14443 }, { "epoch": 0.2510733716907994, "grad_norm": 2.048232387169327, "learning_rate": 8.772349562176057e-07, "loss": 0.2866, "step": 14444 }, { "epoch": 0.25109075422830224, "grad_norm": 1.5101766154364906, "learning_rate": 8.77216480235162e-07, "loss": 0.2797, "step": 14445 }, { "epoch": 0.25110813676580507, "grad_norm": 1.7338455120787057, "learning_rate": 8.771980030571151e-07, "loss": 0.2167, "step": 14446 }, { "epoch": 0.2511255193033079, "grad_norm": 3.4645173863854164, "learning_rate": 8.771795246835239e-07, "loss": 0.6713, "step": 14447 }, { "epoch": 0.25114290184081073, "grad_norm": 1.615321858729465, "learning_rate": 8.77161045114447e-07, "loss": 0.3041, "step": 14448 }, { "epoch": 0.25116028437831356, "grad_norm": 1.399402777380606, "learning_rate": 8.771425643499427e-07, "loss": 0.299, "step": 14449 }, { "epoch": 0.2511776669158164, "grad_norm": 1.840438949473303, "learning_rate": 8.771240823900697e-07, "loss": 0.3106, "step": 14450 }, { "epoch": 0.2511950494533192, "grad_norm": 1.5203126374998788, "learning_rate": 8.771055992348867e-07, "loss": 0.2435, "step": 14451 }, { "epoch": 0.251212431990822, "grad_norm": 1.609967232985762, "learning_rate": 8.770871148844521e-07, "loss": 0.3372, "step": 14452 }, { "epoch": 0.2512298145283248, "grad_norm": 2.212391796410376, "learning_rate": 8.770686293388247e-07, "loss": 0.2958, "step": 14453 }, { "epoch": 0.25124719706582765, "grad_norm": 2.1229391335194823, "learning_rate": 8.770501425980628e-07, "loss": 0.3967, "step": 14454 }, { "epoch": 0.2512645796033305, "grad_norm": 1.1702800697788462, "learning_rate": 8.770316546622252e-07, "loss": 0.2135, "step": 14455 }, { "epoch": 0.2512819621408333, "grad_norm": 1.6684232162286754, "learning_rate": 8.770131655313704e-07, "loss": 0.4081, "step": 14456 }, { "epoch": 0.25129934467833615, "grad_norm": 1.4290132623030847, "learning_rate": 8.769946752055571e-07, "loss": 0.3787, "step": 14457 }, { "epoch": 0.251316727215839, "grad_norm": 1.1162148559532108, "learning_rate": 8.76976183684844e-07, "loss": 0.2806, "step": 14458 }, { "epoch": 0.2513341097533418, "grad_norm": 2.0534901240276318, "learning_rate": 8.769576909692893e-07, "loss": 0.2504, "step": 14459 }, { "epoch": 0.25135149229084464, "grad_norm": 2.0761205411486556, "learning_rate": 8.76939197058952e-07, "loss": 0.2935, "step": 14460 }, { "epoch": 0.25136887482834747, "grad_norm": 1.472946800950579, "learning_rate": 8.769207019538906e-07, "loss": 0.282, "step": 14461 }, { "epoch": 0.25138625736585024, "grad_norm": 1.3160714300497685, "learning_rate": 8.769022056541637e-07, "loss": 0.1799, "step": 14462 }, { "epoch": 0.25140363990335307, "grad_norm": 2.7574412369738344, "learning_rate": 8.7688370815983e-07, "loss": 0.4534, "step": 14463 }, { "epoch": 0.2514210224408559, "grad_norm": 2.5086406183246375, "learning_rate": 8.768652094709481e-07, "loss": 0.4464, "step": 14464 }, { "epoch": 0.25143840497835873, "grad_norm": 1.6072316323107525, "learning_rate": 8.768467095875764e-07, "loss": 0.2245, "step": 14465 }, { "epoch": 0.25145578751586156, "grad_norm": 1.114604673713859, "learning_rate": 8.76828208509774e-07, "loss": 0.4514, "step": 14466 }, { "epoch": 0.2514731700533644, "grad_norm": 1.386285072225561, "learning_rate": 8.768097062375992e-07, "loss": 0.3073, "step": 14467 }, { "epoch": 0.2514905525908672, "grad_norm": 2.370928396324391, "learning_rate": 8.767912027711106e-07, "loss": 0.3668, "step": 14468 }, { "epoch": 0.25150793512837005, "grad_norm": 1.2095345708988405, "learning_rate": 8.767726981103671e-07, "loss": 0.2587, "step": 14469 }, { "epoch": 0.2515253176658729, "grad_norm": 1.2414081685433942, "learning_rate": 8.767541922554271e-07, "loss": 0.1614, "step": 14470 }, { "epoch": 0.2515427002033757, "grad_norm": 1.7143237527638475, "learning_rate": 8.767356852063495e-07, "loss": 0.2062, "step": 14471 }, { "epoch": 0.2515600827408785, "grad_norm": 2.1325334079384985, "learning_rate": 8.767171769631928e-07, "loss": 0.3447, "step": 14472 }, { "epoch": 0.2515774652783813, "grad_norm": 1.4062128616641845, "learning_rate": 8.766986675260158e-07, "loss": 0.4469, "step": 14473 }, { "epoch": 0.25159484781588415, "grad_norm": 1.4987380209208425, "learning_rate": 8.766801568948769e-07, "loss": 0.2883, "step": 14474 }, { "epoch": 0.251612230353387, "grad_norm": 1.2839215141290956, "learning_rate": 8.76661645069835e-07, "loss": 0.357, "step": 14475 }, { "epoch": 0.2516296128908898, "grad_norm": 1.9677803344652243, "learning_rate": 8.766431320509487e-07, "loss": 0.2892, "step": 14476 }, { "epoch": 0.25164699542839264, "grad_norm": 3.6061298159609505, "learning_rate": 8.766246178382766e-07, "loss": 0.2909, "step": 14477 }, { "epoch": 0.25166437796589547, "grad_norm": 2.026885995791593, "learning_rate": 8.766061024318776e-07, "loss": 0.4837, "step": 14478 }, { "epoch": 0.2516817605033983, "grad_norm": 4.820571687861041, "learning_rate": 8.765875858318101e-07, "loss": 0.3229, "step": 14479 }, { "epoch": 0.25169914304090113, "grad_norm": 2.354517231196314, "learning_rate": 8.765690680381331e-07, "loss": 0.2705, "step": 14480 }, { "epoch": 0.25171652557840396, "grad_norm": 2.014325927591742, "learning_rate": 8.765505490509051e-07, "loss": 0.3091, "step": 14481 }, { "epoch": 0.25173390811590673, "grad_norm": 1.9870033623763306, "learning_rate": 8.765320288701848e-07, "loss": 0.5071, "step": 14482 }, { "epoch": 0.25175129065340957, "grad_norm": 1.3498540085456505, "learning_rate": 8.76513507496031e-07, "loss": 0.263, "step": 14483 }, { "epoch": 0.2517686731909124, "grad_norm": 1.2447464940736974, "learning_rate": 8.764949849285023e-07, "loss": 0.2151, "step": 14484 }, { "epoch": 0.2517860557284152, "grad_norm": 6.255666494114276, "learning_rate": 8.764764611676574e-07, "loss": 0.398, "step": 14485 }, { "epoch": 0.25180343826591806, "grad_norm": 2.639491522095795, "learning_rate": 8.764579362135551e-07, "loss": 0.2418, "step": 14486 }, { "epoch": 0.2518208208034209, "grad_norm": 1.9261132103536343, "learning_rate": 8.76439410066254e-07, "loss": 0.4028, "step": 14487 }, { "epoch": 0.2518382033409237, "grad_norm": 2.076369947686733, "learning_rate": 8.76420882725813e-07, "loss": 0.3239, "step": 14488 }, { "epoch": 0.25185558587842655, "grad_norm": 2.034409803656384, "learning_rate": 8.764023541922907e-07, "loss": 0.2034, "step": 14489 }, { "epoch": 0.2518729684159294, "grad_norm": 2.260931297995506, "learning_rate": 8.763838244657458e-07, "loss": 0.3967, "step": 14490 }, { "epoch": 0.2518903509534322, "grad_norm": 2.4570770103545985, "learning_rate": 8.763652935462371e-07, "loss": 0.3847, "step": 14491 }, { "epoch": 0.251907733490935, "grad_norm": 1.7085666327470475, "learning_rate": 8.763467614338233e-07, "loss": 0.5383, "step": 14492 }, { "epoch": 0.2519251160284378, "grad_norm": 3.4572682200818643, "learning_rate": 8.763282281285631e-07, "loss": 0.7986, "step": 14493 }, { "epoch": 0.25194249856594064, "grad_norm": 1.5676823712239283, "learning_rate": 8.763096936305154e-07, "loss": 0.2646, "step": 14494 }, { "epoch": 0.2519598811034435, "grad_norm": 1.6423839403604872, "learning_rate": 8.762911579397388e-07, "loss": 0.1802, "step": 14495 }, { "epoch": 0.2519772636409463, "grad_norm": 2.7596595466914886, "learning_rate": 8.76272621056292e-07, "loss": 0.4059, "step": 14496 }, { "epoch": 0.25199464617844913, "grad_norm": 1.7145072919653181, "learning_rate": 8.762540829802338e-07, "loss": 0.3611, "step": 14497 }, { "epoch": 0.25201202871595196, "grad_norm": 1.6175653100780307, "learning_rate": 8.762355437116231e-07, "loss": 0.1981, "step": 14498 }, { "epoch": 0.2520294112534548, "grad_norm": 1.7302902849639532, "learning_rate": 8.762170032505185e-07, "loss": 0.3372, "step": 14499 }, { "epoch": 0.2520467937909576, "grad_norm": 2.507445612662511, "learning_rate": 8.761984615969789e-07, "loss": 0.3053, "step": 14500 }, { "epoch": 0.25206417632846045, "grad_norm": 1.5028195800646598, "learning_rate": 8.76179918751063e-07, "loss": 0.2532, "step": 14501 }, { "epoch": 0.25208155886596323, "grad_norm": 1.710377597551632, "learning_rate": 8.761613747128295e-07, "loss": 0.4388, "step": 14502 }, { "epoch": 0.25209894140346606, "grad_norm": 1.6753990961964835, "learning_rate": 8.761428294823372e-07, "loss": 0.2015, "step": 14503 }, { "epoch": 0.2521163239409689, "grad_norm": 1.9933404076649006, "learning_rate": 8.76124283059645e-07, "loss": 0.3813, "step": 14504 }, { "epoch": 0.2521337064784717, "grad_norm": 1.731341373233642, "learning_rate": 8.761057354448114e-07, "loss": 0.5219, "step": 14505 }, { "epoch": 0.25215108901597455, "grad_norm": 2.289523225054275, "learning_rate": 8.760871866378956e-07, "loss": 0.4131, "step": 14506 }, { "epoch": 0.2521684715534774, "grad_norm": 1.6855580508524504, "learning_rate": 8.760686366389561e-07, "loss": 0.2926, "step": 14507 }, { "epoch": 0.2521858540909802, "grad_norm": 1.5160493257918592, "learning_rate": 8.760500854480519e-07, "loss": 0.4702, "step": 14508 }, { "epoch": 0.25220323662848304, "grad_norm": 2.34745947936119, "learning_rate": 8.760315330652414e-07, "loss": 0.3145, "step": 14509 }, { "epoch": 0.25222061916598587, "grad_norm": 2.9123257688511113, "learning_rate": 8.760129794905839e-07, "loss": 0.3005, "step": 14510 }, { "epoch": 0.2522380017034887, "grad_norm": 2.5251713234567057, "learning_rate": 8.75994424724138e-07, "loss": 0.4606, "step": 14511 }, { "epoch": 0.2522553842409915, "grad_norm": 1.4209494748963405, "learning_rate": 8.759758687659625e-07, "loss": 0.3529, "step": 14512 }, { "epoch": 0.2522727667784943, "grad_norm": 2.1912338968596927, "learning_rate": 8.759573116161161e-07, "loss": 0.3324, "step": 14513 }, { "epoch": 0.25229014931599714, "grad_norm": 1.0062104386481012, "learning_rate": 8.759387532746577e-07, "loss": 0.2955, "step": 14514 }, { "epoch": 0.25230753185349997, "grad_norm": 2.098694984483857, "learning_rate": 8.759201937416462e-07, "loss": 0.4084, "step": 14515 }, { "epoch": 0.2523249143910028, "grad_norm": 2.9278058135773697, "learning_rate": 8.759016330171403e-07, "loss": 0.459, "step": 14516 }, { "epoch": 0.2523422969285056, "grad_norm": 1.4344897358071027, "learning_rate": 8.758830711011989e-07, "loss": 0.3363, "step": 14517 }, { "epoch": 0.25235967946600846, "grad_norm": 1.6592510646831038, "learning_rate": 8.758645079938809e-07, "loss": 0.5265, "step": 14518 }, { "epoch": 0.2523770620035113, "grad_norm": 1.5339174115094079, "learning_rate": 8.75845943695245e-07, "loss": 0.3347, "step": 14519 }, { "epoch": 0.2523944445410141, "grad_norm": 1.8233372690552943, "learning_rate": 8.758273782053503e-07, "loss": 0.473, "step": 14520 }, { "epoch": 0.25241182707851695, "grad_norm": 1.7238027141340238, "learning_rate": 8.758088115242552e-07, "loss": 0.294, "step": 14521 }, { "epoch": 0.2524292096160197, "grad_norm": 2.191306298987002, "learning_rate": 8.757902436520189e-07, "loss": 0.23, "step": 14522 }, { "epoch": 0.25244659215352255, "grad_norm": 1.749215858139695, "learning_rate": 8.757716745887003e-07, "loss": 0.3169, "step": 14523 }, { "epoch": 0.2524639746910254, "grad_norm": 2.8362993825331384, "learning_rate": 8.757531043343579e-07, "loss": 0.3546, "step": 14524 }, { "epoch": 0.2524813572285282, "grad_norm": 3.507428725803616, "learning_rate": 8.757345328890508e-07, "loss": 0.3168, "step": 14525 }, { "epoch": 0.25249873976603104, "grad_norm": 1.6704879342442076, "learning_rate": 8.757159602528379e-07, "loss": 0.3076, "step": 14526 }, { "epoch": 0.2525161223035339, "grad_norm": 1.6053029088254531, "learning_rate": 8.756973864257779e-07, "loss": 0.5589, "step": 14527 }, { "epoch": 0.2525335048410367, "grad_norm": 2.0285363092474538, "learning_rate": 8.756788114079298e-07, "loss": 0.3616, "step": 14528 }, { "epoch": 0.25255088737853953, "grad_norm": 1.8651411109855662, "learning_rate": 8.756602351993525e-07, "loss": 0.2228, "step": 14529 }, { "epoch": 0.25256826991604236, "grad_norm": 1.0155409322407003, "learning_rate": 8.756416578001048e-07, "loss": 0.5053, "step": 14530 }, { "epoch": 0.2525856524535452, "grad_norm": 2.6608592270852207, "learning_rate": 8.756230792102456e-07, "loss": 0.3534, "step": 14531 }, { "epoch": 0.25260303499104797, "grad_norm": 1.8099881359662948, "learning_rate": 8.756044994298337e-07, "loss": 0.5402, "step": 14532 }, { "epoch": 0.2526204175285508, "grad_norm": 2.1189996347412574, "learning_rate": 8.755859184589283e-07, "loss": 0.2828, "step": 14533 }, { "epoch": 0.25263780006605363, "grad_norm": 1.9299716333434502, "learning_rate": 8.755673362975879e-07, "loss": 0.2389, "step": 14534 }, { "epoch": 0.25265518260355646, "grad_norm": 9.000682836006485, "learning_rate": 8.755487529458716e-07, "loss": 0.2882, "step": 14535 }, { "epoch": 0.2526725651410593, "grad_norm": 2.2596492587014216, "learning_rate": 8.755301684038382e-07, "loss": 0.2434, "step": 14536 }, { "epoch": 0.2526899476785621, "grad_norm": 3.592190518503671, "learning_rate": 8.755115826715467e-07, "loss": 0.5285, "step": 14537 }, { "epoch": 0.25270733021606495, "grad_norm": 4.3938981227614775, "learning_rate": 8.75492995749056e-07, "loss": 0.4841, "step": 14538 }, { "epoch": 0.2527247127535678, "grad_norm": 2.7669948176962706, "learning_rate": 8.75474407636425e-07, "loss": 0.3719, "step": 14539 }, { "epoch": 0.2527420952910706, "grad_norm": 6.916569954806547, "learning_rate": 8.754558183337126e-07, "loss": 0.4128, "step": 14540 }, { "epoch": 0.25275947782857344, "grad_norm": 1.3442291818247016, "learning_rate": 8.754372278409778e-07, "loss": 0.2853, "step": 14541 }, { "epoch": 0.2527768603660762, "grad_norm": 2.1816433598931724, "learning_rate": 8.754186361582794e-07, "loss": 0.5117, "step": 14542 }, { "epoch": 0.25279424290357905, "grad_norm": 3.3523288856448263, "learning_rate": 8.754000432856763e-07, "loss": 0.4836, "step": 14543 }, { "epoch": 0.2528116254410819, "grad_norm": 1.716585728646303, "learning_rate": 8.753814492232274e-07, "loss": 0.2252, "step": 14544 }, { "epoch": 0.2528290079785847, "grad_norm": 1.3493233330300414, "learning_rate": 8.753628539709919e-07, "loss": 0.201, "step": 14545 }, { "epoch": 0.25284639051608754, "grad_norm": 2.2686140931844836, "learning_rate": 8.753442575290287e-07, "loss": 0.2451, "step": 14546 }, { "epoch": 0.25286377305359037, "grad_norm": 3.1233006775529097, "learning_rate": 8.753256598973964e-07, "loss": 0.2938, "step": 14547 }, { "epoch": 0.2528811555910932, "grad_norm": 2.8840487808496804, "learning_rate": 8.753070610761544e-07, "loss": 0.3149, "step": 14548 }, { "epoch": 0.25289853812859603, "grad_norm": 1.6639300517533682, "learning_rate": 8.752884610653613e-07, "loss": 0.4061, "step": 14549 }, { "epoch": 0.25291592066609886, "grad_norm": 1.9667091056109458, "learning_rate": 8.752698598650761e-07, "loss": 0.2633, "step": 14550 }, { "epoch": 0.2529333032036017, "grad_norm": 1.4682710424021643, "learning_rate": 8.752512574753579e-07, "loss": 0.3257, "step": 14551 }, { "epoch": 0.25295068574110446, "grad_norm": 1.7359242559725603, "learning_rate": 8.752326538962655e-07, "loss": 0.2252, "step": 14552 }, { "epoch": 0.2529680682786073, "grad_norm": 1.405299156906377, "learning_rate": 8.752140491278581e-07, "loss": 0.3655, "step": 14553 }, { "epoch": 0.2529854508161101, "grad_norm": 1.7394155957042283, "learning_rate": 8.751954431701944e-07, "loss": 0.4658, "step": 14554 }, { "epoch": 0.25300283335361295, "grad_norm": 1.6883418836694146, "learning_rate": 8.751768360233336e-07, "loss": 0.4275, "step": 14555 }, { "epoch": 0.2530202158911158, "grad_norm": 2.103459677500868, "learning_rate": 8.751582276873345e-07, "loss": 0.3093, "step": 14556 }, { "epoch": 0.2530375984286186, "grad_norm": 1.8443833228399737, "learning_rate": 8.751396181622562e-07, "loss": 0.2123, "step": 14557 }, { "epoch": 0.25305498096612145, "grad_norm": 1.4455988041645642, "learning_rate": 8.751210074481577e-07, "loss": 0.3508, "step": 14558 }, { "epoch": 0.2530723635036243, "grad_norm": 4.529849479155053, "learning_rate": 8.751023955450978e-07, "loss": 0.4771, "step": 14559 }, { "epoch": 0.2530897460411271, "grad_norm": 1.8946310623083844, "learning_rate": 8.750837824531357e-07, "loss": 0.3341, "step": 14560 }, { "epoch": 0.2531071285786299, "grad_norm": 1.589213363250633, "learning_rate": 8.750651681723302e-07, "loss": 0.6464, "step": 14561 }, { "epoch": 0.2531245111161327, "grad_norm": 2.5483835628248235, "learning_rate": 8.750465527027404e-07, "loss": 0.3728, "step": 14562 }, { "epoch": 0.25314189365363554, "grad_norm": 1.899314416432837, "learning_rate": 8.750279360444254e-07, "loss": 0.3124, "step": 14563 }, { "epoch": 0.25315927619113837, "grad_norm": 1.5131996848177756, "learning_rate": 8.75009318197444e-07, "loss": 0.2523, "step": 14564 }, { "epoch": 0.2531766587286412, "grad_norm": 1.65482160901347, "learning_rate": 8.749906991618555e-07, "loss": 0.2516, "step": 14565 }, { "epoch": 0.25319404126614403, "grad_norm": 2.195601351016485, "learning_rate": 8.749720789377187e-07, "loss": 0.3823, "step": 14566 }, { "epoch": 0.25321142380364686, "grad_norm": 1.8800078414778016, "learning_rate": 8.749534575250925e-07, "loss": 0.438, "step": 14567 }, { "epoch": 0.2532288063411497, "grad_norm": 1.7826142335548012, "learning_rate": 8.749348349240363e-07, "loss": 0.2516, "step": 14568 }, { "epoch": 0.2532461888786525, "grad_norm": 2.0169865555106288, "learning_rate": 8.749162111346088e-07, "loss": 0.4792, "step": 14569 }, { "epoch": 0.25326357141615535, "grad_norm": 2.062072306398716, "learning_rate": 8.748975861568691e-07, "loss": 0.3562, "step": 14570 }, { "epoch": 0.25328095395365813, "grad_norm": 2.7533138070506062, "learning_rate": 8.748789599908764e-07, "loss": 0.483, "step": 14571 }, { "epoch": 0.25329833649116096, "grad_norm": 2.03845061129145, "learning_rate": 8.748603326366894e-07, "loss": 0.3115, "step": 14572 }, { "epoch": 0.2533157190286638, "grad_norm": 3.7696527246636844, "learning_rate": 8.748417040943675e-07, "loss": 0.2586, "step": 14573 }, { "epoch": 0.2533331015661666, "grad_norm": 2.810425034790945, "learning_rate": 8.748230743639696e-07, "loss": 0.3552, "step": 14574 }, { "epoch": 0.25335048410366945, "grad_norm": 1.8257228289012668, "learning_rate": 8.748044434455546e-07, "loss": 0.3363, "step": 14575 }, { "epoch": 0.2533678666411723, "grad_norm": 3.021865393825803, "learning_rate": 8.747858113391817e-07, "loss": 0.3077, "step": 14576 }, { "epoch": 0.2533852491786751, "grad_norm": 1.5373130267210238, "learning_rate": 8.747671780449099e-07, "loss": 0.2765, "step": 14577 }, { "epoch": 0.25340263171617794, "grad_norm": 3.3209889126510137, "learning_rate": 8.747485435627985e-07, "loss": 0.3766, "step": 14578 }, { "epoch": 0.25342001425368077, "grad_norm": 1.9905644238507074, "learning_rate": 8.747299078929063e-07, "loss": 0.3995, "step": 14579 }, { "epoch": 0.2534373967911836, "grad_norm": 2.06780938546579, "learning_rate": 8.747112710352923e-07, "loss": 0.3114, "step": 14580 }, { "epoch": 0.2534547793286864, "grad_norm": 2.668552997325446, "learning_rate": 8.746926329900157e-07, "loss": 0.8346, "step": 14581 }, { "epoch": 0.2534721618661892, "grad_norm": 1.353126915799177, "learning_rate": 8.746739937571357e-07, "loss": 0.2455, "step": 14582 }, { "epoch": 0.25348954440369204, "grad_norm": 3.095193728905731, "learning_rate": 8.746553533367113e-07, "loss": 0.2561, "step": 14583 }, { "epoch": 0.25350692694119487, "grad_norm": 2.4923188349078664, "learning_rate": 8.746367117288014e-07, "loss": 0.5453, "step": 14584 }, { "epoch": 0.2535243094786977, "grad_norm": 2.8719733934786684, "learning_rate": 8.746180689334652e-07, "loss": 0.3797, "step": 14585 }, { "epoch": 0.2535416920162005, "grad_norm": 1.665804732130568, "learning_rate": 8.745994249507619e-07, "loss": 0.2461, "step": 14586 }, { "epoch": 0.25355907455370336, "grad_norm": 2.5744414200174464, "learning_rate": 8.745807797807505e-07, "loss": 0.2503, "step": 14587 }, { "epoch": 0.2535764570912062, "grad_norm": 1.7391871555914584, "learning_rate": 8.7456213342349e-07, "loss": 0.3609, "step": 14588 }, { "epoch": 0.253593839628709, "grad_norm": 2.024733594248215, "learning_rate": 8.745434858790397e-07, "loss": 0.3524, "step": 14589 }, { "epoch": 0.25361122216621185, "grad_norm": 1.4579757512908953, "learning_rate": 8.745248371474585e-07, "loss": 0.2117, "step": 14590 }, { "epoch": 0.2536286047037146, "grad_norm": 1.3737528882428545, "learning_rate": 8.745061872288058e-07, "loss": 0.1959, "step": 14591 }, { "epoch": 0.25364598724121745, "grad_norm": 1.5550454512937941, "learning_rate": 8.744875361231403e-07, "loss": 0.369, "step": 14592 }, { "epoch": 0.2536633697787203, "grad_norm": 2.0592888630966826, "learning_rate": 8.744688838305215e-07, "loss": 0.3395, "step": 14593 }, { "epoch": 0.2536807523162231, "grad_norm": 1.494329332404774, "learning_rate": 8.744502303510082e-07, "loss": 0.4504, "step": 14594 }, { "epoch": 0.25369813485372594, "grad_norm": 2.350916423352394, "learning_rate": 8.744315756846597e-07, "loss": 0.2935, "step": 14595 }, { "epoch": 0.2537155173912288, "grad_norm": 1.4037662228460925, "learning_rate": 8.744129198315352e-07, "loss": 0.1471, "step": 14596 }, { "epoch": 0.2537328999287316, "grad_norm": 1.8129207392306304, "learning_rate": 8.743942627916937e-07, "loss": 0.45, "step": 14597 }, { "epoch": 0.25375028246623443, "grad_norm": 2.1603256240692925, "learning_rate": 8.743756045651944e-07, "loss": 0.4525, "step": 14598 }, { "epoch": 0.25376766500373726, "grad_norm": 2.447925786220494, "learning_rate": 8.743569451520963e-07, "loss": 0.4161, "step": 14599 }, { "epoch": 0.2537850475412401, "grad_norm": 1.528938545607078, "learning_rate": 8.743382845524587e-07, "loss": 0.3305, "step": 14600 }, { "epoch": 0.25380243007874287, "grad_norm": 1.8960232167543902, "learning_rate": 8.743196227663407e-07, "loss": 0.3786, "step": 14601 }, { "epoch": 0.2538198126162457, "grad_norm": 2.575002349146068, "learning_rate": 8.743009597938016e-07, "loss": 0.2671, "step": 14602 }, { "epoch": 0.25383719515374853, "grad_norm": 2.6468798125247113, "learning_rate": 8.742822956349002e-07, "loss": 0.2456, "step": 14603 }, { "epoch": 0.25385457769125136, "grad_norm": 1.8906162827608228, "learning_rate": 8.74263630289696e-07, "loss": 0.2885, "step": 14604 }, { "epoch": 0.2538719602287542, "grad_norm": 1.4158361439936806, "learning_rate": 8.742449637582479e-07, "loss": 0.4609, "step": 14605 }, { "epoch": 0.253889342766257, "grad_norm": 1.4815662002638155, "learning_rate": 8.74226296040615e-07, "loss": 0.1928, "step": 14606 }, { "epoch": 0.25390672530375985, "grad_norm": 1.1826635358344408, "learning_rate": 8.74207627136857e-07, "loss": 0.1901, "step": 14607 }, { "epoch": 0.2539241078412627, "grad_norm": 1.8143234344868062, "learning_rate": 8.741889570470326e-07, "loss": 0.3281, "step": 14608 }, { "epoch": 0.2539414903787655, "grad_norm": 1.1636845801622482, "learning_rate": 8.74170285771201e-07, "loss": 0.3426, "step": 14609 }, { "epoch": 0.25395887291626834, "grad_norm": 1.5234897680202448, "learning_rate": 8.741516133094215e-07, "loss": 0.5574, "step": 14610 }, { "epoch": 0.2539762554537711, "grad_norm": 1.4019571156591673, "learning_rate": 8.741329396617534e-07, "loss": 0.2238, "step": 14611 }, { "epoch": 0.25399363799127395, "grad_norm": 1.0587916316348904, "learning_rate": 8.741142648282556e-07, "loss": 0.1511, "step": 14612 }, { "epoch": 0.2540110205287768, "grad_norm": 1.4188644945543958, "learning_rate": 8.740955888089874e-07, "loss": 0.3196, "step": 14613 }, { "epoch": 0.2540284030662796, "grad_norm": 3.9942545651790256, "learning_rate": 8.740769116040081e-07, "loss": 0.4341, "step": 14614 }, { "epoch": 0.25404578560378244, "grad_norm": 2.2666901623768116, "learning_rate": 8.740582332133769e-07, "loss": 0.4055, "step": 14615 }, { "epoch": 0.25406316814128527, "grad_norm": 1.4539036870086726, "learning_rate": 8.740395536371528e-07, "loss": 0.2991, "step": 14616 }, { "epoch": 0.2540805506787881, "grad_norm": 1.876346105712818, "learning_rate": 8.740208728753952e-07, "loss": 0.2855, "step": 14617 }, { "epoch": 0.2540979332162909, "grad_norm": 2.072717204027873, "learning_rate": 8.740021909281633e-07, "loss": 0.2152, "step": 14618 }, { "epoch": 0.25411531575379376, "grad_norm": 1.956979646515674, "learning_rate": 8.739835077955162e-07, "loss": 0.2693, "step": 14619 }, { "epoch": 0.2541326982912966, "grad_norm": 1.6162572002148115, "learning_rate": 8.739648234775132e-07, "loss": 0.2477, "step": 14620 }, { "epoch": 0.25415008082879936, "grad_norm": 1.4167224027281788, "learning_rate": 8.739461379742134e-07, "loss": 0.2324, "step": 14621 }, { "epoch": 0.2541674633663022, "grad_norm": 1.8479025642002311, "learning_rate": 8.739274512856762e-07, "loss": 0.2992, "step": 14622 }, { "epoch": 0.254184845903805, "grad_norm": 2.1178265900276196, "learning_rate": 8.739087634119608e-07, "loss": 0.3122, "step": 14623 }, { "epoch": 0.25420222844130785, "grad_norm": 1.507742097532763, "learning_rate": 8.738900743531262e-07, "loss": 0.3359, "step": 14624 }, { "epoch": 0.2542196109788107, "grad_norm": 1.6659953166713302, "learning_rate": 8.73871384109232e-07, "loss": 0.2989, "step": 14625 }, { "epoch": 0.2542369935163135, "grad_norm": 1.9918545185249588, "learning_rate": 8.738526926803371e-07, "loss": 0.2888, "step": 14626 }, { "epoch": 0.25425437605381634, "grad_norm": 2.711906178557227, "learning_rate": 8.73834000066501e-07, "loss": 0.4332, "step": 14627 }, { "epoch": 0.2542717585913192, "grad_norm": 1.9830851504420575, "learning_rate": 8.738153062677828e-07, "loss": 0.3279, "step": 14628 }, { "epoch": 0.254289141128822, "grad_norm": 1.9885091287895182, "learning_rate": 8.737966112842419e-07, "loss": 0.3304, "step": 14629 }, { "epoch": 0.25430652366632484, "grad_norm": 2.3978819576773533, "learning_rate": 8.737779151159375e-07, "loss": 0.2352, "step": 14630 }, { "epoch": 0.2543239062038276, "grad_norm": 2.2365316233738577, "learning_rate": 8.737592177629285e-07, "loss": 0.3582, "step": 14631 }, { "epoch": 0.25434128874133044, "grad_norm": 1.3613881950412405, "learning_rate": 8.737405192252747e-07, "loss": 0.3251, "step": 14632 }, { "epoch": 0.25435867127883327, "grad_norm": 1.2536643139600123, "learning_rate": 8.737218195030351e-07, "loss": 0.3301, "step": 14633 }, { "epoch": 0.2543760538163361, "grad_norm": 1.4286828903539244, "learning_rate": 8.737031185962689e-07, "loss": 0.3092, "step": 14634 }, { "epoch": 0.25439343635383893, "grad_norm": 1.3321757289852485, "learning_rate": 8.736844165050357e-07, "loss": 0.3542, "step": 14635 }, { "epoch": 0.25441081889134176, "grad_norm": 2.6978163188853332, "learning_rate": 8.736657132293943e-07, "loss": 0.3637, "step": 14636 }, { "epoch": 0.2544282014288446, "grad_norm": 1.4275888114996873, "learning_rate": 8.736470087694043e-07, "loss": 0.3045, "step": 14637 }, { "epoch": 0.2544455839663474, "grad_norm": 1.1470643350463339, "learning_rate": 8.73628303125125e-07, "loss": 0.3045, "step": 14638 }, { "epoch": 0.25446296650385025, "grad_norm": 2.625251345977469, "learning_rate": 8.736095962966157e-07, "loss": 0.4724, "step": 14639 }, { "epoch": 0.2544803490413531, "grad_norm": 1.23566114850445, "learning_rate": 8.735908882839356e-07, "loss": 0.1503, "step": 14640 }, { "epoch": 0.25449773157885586, "grad_norm": 2.402566940963857, "learning_rate": 8.735721790871438e-07, "loss": 0.3218, "step": 14641 }, { "epoch": 0.2545151141163587, "grad_norm": 2.3956469397802644, "learning_rate": 8.735534687062999e-07, "loss": 0.296, "step": 14642 }, { "epoch": 0.2545324966538615, "grad_norm": 1.9611747082442537, "learning_rate": 8.73534757141463e-07, "loss": 0.4294, "step": 14643 }, { "epoch": 0.25454987919136435, "grad_norm": 1.4394004496369626, "learning_rate": 8.735160443926928e-07, "loss": 0.1711, "step": 14644 }, { "epoch": 0.2545672617288672, "grad_norm": 1.8269383902501313, "learning_rate": 8.73497330460048e-07, "loss": 0.3518, "step": 14645 }, { "epoch": 0.25458464426637, "grad_norm": 4.295721331796932, "learning_rate": 8.734786153435883e-07, "loss": 0.6247, "step": 14646 }, { "epoch": 0.25460202680387284, "grad_norm": 2.1237303614039007, "learning_rate": 8.734598990433731e-07, "loss": 0.2974, "step": 14647 }, { "epoch": 0.25461940934137567, "grad_norm": 1.3621342004452581, "learning_rate": 8.734411815594614e-07, "loss": 0.1509, "step": 14648 }, { "epoch": 0.2546367918788785, "grad_norm": 1.6432532638775255, "learning_rate": 8.734224628919129e-07, "loss": 0.2135, "step": 14649 }, { "epoch": 0.25465417441638133, "grad_norm": 1.4446899100200743, "learning_rate": 8.734037430407865e-07, "loss": 0.286, "step": 14650 }, { "epoch": 0.2546715569538841, "grad_norm": 1.6113980451221714, "learning_rate": 8.733850220061418e-07, "loss": 0.501, "step": 14651 }, { "epoch": 0.25468893949138693, "grad_norm": 1.725517679696165, "learning_rate": 8.733662997880382e-07, "loss": 0.2735, "step": 14652 }, { "epoch": 0.25470632202888976, "grad_norm": 1.4686665027204062, "learning_rate": 8.73347576386535e-07, "loss": 0.2988, "step": 14653 }, { "epoch": 0.2547237045663926, "grad_norm": 1.8044318942572792, "learning_rate": 8.733288518016915e-07, "loss": 0.2249, "step": 14654 }, { "epoch": 0.2547410871038954, "grad_norm": 1.3005085586280103, "learning_rate": 8.733101260335669e-07, "loss": 0.3375, "step": 14655 }, { "epoch": 0.25475846964139826, "grad_norm": 1.9281699365986225, "learning_rate": 8.732913990822206e-07, "loss": 0.2581, "step": 14656 }, { "epoch": 0.2547758521789011, "grad_norm": 2.0030642303229644, "learning_rate": 8.732726709477122e-07, "loss": 0.5423, "step": 14657 }, { "epoch": 0.2547932347164039, "grad_norm": 1.5725776513919578, "learning_rate": 8.732539416301008e-07, "loss": 0.3846, "step": 14658 }, { "epoch": 0.25481061725390675, "grad_norm": 2.3740710052783474, "learning_rate": 8.73235211129446e-07, "loss": 0.3721, "step": 14659 }, { "epoch": 0.2548279997914096, "grad_norm": 1.6298323765776606, "learning_rate": 8.732164794458069e-07, "loss": 0.24, "step": 14660 }, { "epoch": 0.25484538232891235, "grad_norm": 1.8082782194488956, "learning_rate": 8.731977465792431e-07, "loss": 0.375, "step": 14661 }, { "epoch": 0.2548627648664152, "grad_norm": 1.638175905644655, "learning_rate": 8.731790125298138e-07, "loss": 0.3325, "step": 14662 }, { "epoch": 0.254880147403918, "grad_norm": 1.518188452127092, "learning_rate": 8.731602772975784e-07, "loss": 0.3243, "step": 14663 }, { "epoch": 0.25489752994142084, "grad_norm": 5.617490751643334, "learning_rate": 8.731415408825964e-07, "loss": 0.3682, "step": 14664 }, { "epoch": 0.25491491247892367, "grad_norm": 1.743294192729606, "learning_rate": 8.731228032849271e-07, "loss": 0.3486, "step": 14665 }, { "epoch": 0.2549322950164265, "grad_norm": 2.246457542493181, "learning_rate": 8.7310406450463e-07, "loss": 0.4572, "step": 14666 }, { "epoch": 0.25494967755392933, "grad_norm": 1.559864096393826, "learning_rate": 8.730853245417645e-07, "loss": 0.1987, "step": 14667 }, { "epoch": 0.25496706009143216, "grad_norm": 2.03870240972704, "learning_rate": 8.730665833963897e-07, "loss": 0.4402, "step": 14668 }, { "epoch": 0.254984442628935, "grad_norm": 1.3483720960038994, "learning_rate": 8.730478410685654e-07, "loss": 0.436, "step": 14669 }, { "epoch": 0.2550018251664378, "grad_norm": 2.06354326762147, "learning_rate": 8.730290975583505e-07, "loss": 0.2451, "step": 14670 }, { "epoch": 0.2550192077039406, "grad_norm": 1.7745410061372893, "learning_rate": 8.730103528658048e-07, "loss": 0.325, "step": 14671 }, { "epoch": 0.25503659024144343, "grad_norm": 1.702875874785719, "learning_rate": 8.729916069909878e-07, "loss": 0.3143, "step": 14672 }, { "epoch": 0.25505397277894626, "grad_norm": 2.0064070927583852, "learning_rate": 8.729728599339587e-07, "loss": 0.3904, "step": 14673 }, { "epoch": 0.2550713553164491, "grad_norm": 1.4894847016542714, "learning_rate": 8.72954111694777e-07, "loss": 0.2847, "step": 14674 }, { "epoch": 0.2550887378539519, "grad_norm": 1.691003743933199, "learning_rate": 8.729353622735018e-07, "loss": 0.2665, "step": 14675 }, { "epoch": 0.25510612039145475, "grad_norm": 3.640372336757377, "learning_rate": 8.729166116701932e-07, "loss": 0.3531, "step": 14676 }, { "epoch": 0.2551235029289576, "grad_norm": 1.98330173374766, "learning_rate": 8.7289785988491e-07, "loss": 0.2743, "step": 14677 }, { "epoch": 0.2551408854664604, "grad_norm": 2.006733829220634, "learning_rate": 8.72879106917712e-07, "loss": 0.1903, "step": 14678 }, { "epoch": 0.25515826800396324, "grad_norm": 1.686063192418371, "learning_rate": 8.728603527686585e-07, "loss": 0.2668, "step": 14679 }, { "epoch": 0.25517565054146607, "grad_norm": 2.437679098915895, "learning_rate": 8.728415974378088e-07, "loss": 0.4304, "step": 14680 }, { "epoch": 0.25519303307896885, "grad_norm": 2.6404946675840364, "learning_rate": 8.728228409252228e-07, "loss": 0.375, "step": 14681 }, { "epoch": 0.2552104156164717, "grad_norm": 1.7906646960688741, "learning_rate": 8.728040832309593e-07, "loss": 0.3402, "step": 14682 }, { "epoch": 0.2552277981539745, "grad_norm": 1.4728280467747326, "learning_rate": 8.727853243550784e-07, "loss": 0.2286, "step": 14683 }, { "epoch": 0.25524518069147734, "grad_norm": 2.7028768784439046, "learning_rate": 8.727665642976392e-07, "loss": 0.2943, "step": 14684 }, { "epoch": 0.25526256322898017, "grad_norm": 2.0123104032130765, "learning_rate": 8.727478030587012e-07, "loss": 0.2277, "step": 14685 }, { "epoch": 0.255279945766483, "grad_norm": 1.4330887693443097, "learning_rate": 8.727290406383238e-07, "loss": 0.205, "step": 14686 }, { "epoch": 0.2552973283039858, "grad_norm": 1.638344870322016, "learning_rate": 8.727102770365667e-07, "loss": 0.3207, "step": 14687 }, { "epoch": 0.25531471084148866, "grad_norm": 1.670020777578933, "learning_rate": 8.726915122534891e-07, "loss": 0.364, "step": 14688 }, { "epoch": 0.2553320933789915, "grad_norm": 2.0380501443199783, "learning_rate": 8.726727462891506e-07, "loss": 0.2699, "step": 14689 }, { "epoch": 0.2553494759164943, "grad_norm": 2.2654210202264866, "learning_rate": 8.726539791436108e-07, "loss": 0.2439, "step": 14690 }, { "epoch": 0.2553668584539971, "grad_norm": 1.5286757012075123, "learning_rate": 8.72635210816929e-07, "loss": 0.2688, "step": 14691 }, { "epoch": 0.2553842409914999, "grad_norm": 1.5647635424082518, "learning_rate": 8.726164413091647e-07, "loss": 0.2696, "step": 14692 }, { "epoch": 0.25540162352900275, "grad_norm": 1.692037553074314, "learning_rate": 8.725976706203777e-07, "loss": 0.3045, "step": 14693 }, { "epoch": 0.2554190060665056, "grad_norm": 2.4787820066228994, "learning_rate": 8.725788987506271e-07, "loss": 0.3135, "step": 14694 }, { "epoch": 0.2554363886040084, "grad_norm": 2.1430158999307216, "learning_rate": 8.725601256999724e-07, "loss": 0.3161, "step": 14695 }, { "epoch": 0.25545377114151124, "grad_norm": 2.2647500288768536, "learning_rate": 8.725413514684734e-07, "loss": 0.3225, "step": 14696 }, { "epoch": 0.2554711536790141, "grad_norm": 3.3855493567316897, "learning_rate": 8.725225760561894e-07, "loss": 0.5406, "step": 14697 }, { "epoch": 0.2554885362165169, "grad_norm": 1.7898759158995223, "learning_rate": 8.7250379946318e-07, "loss": 0.2333, "step": 14698 }, { "epoch": 0.25550591875401973, "grad_norm": 1.482404966481098, "learning_rate": 8.724850216895045e-07, "loss": 0.2761, "step": 14699 }, { "epoch": 0.2555233012915225, "grad_norm": 2.1351829115345304, "learning_rate": 8.724662427352229e-07, "loss": 0.242, "step": 14700 }, { "epoch": 0.25554068382902534, "grad_norm": 1.9124678173836203, "learning_rate": 8.724474626003941e-07, "loss": 0.3808, "step": 14701 }, { "epoch": 0.25555806636652817, "grad_norm": 2.163276362669731, "learning_rate": 8.724286812850781e-07, "loss": 0.4523, "step": 14702 }, { "epoch": 0.255575448904031, "grad_norm": 2.375006216834277, "learning_rate": 8.724098987893342e-07, "loss": 0.2126, "step": 14703 }, { "epoch": 0.25559283144153383, "grad_norm": 1.6463333831595375, "learning_rate": 8.72391115113222e-07, "loss": 0.2275, "step": 14704 }, { "epoch": 0.25561021397903666, "grad_norm": 1.8563783979511335, "learning_rate": 8.723723302568011e-07, "loss": 0.3326, "step": 14705 }, { "epoch": 0.2556275965165395, "grad_norm": 2.281074712340521, "learning_rate": 8.723535442201309e-07, "loss": 0.3938, "step": 14706 }, { "epoch": 0.2556449790540423, "grad_norm": 1.6783690780946585, "learning_rate": 8.723347570032711e-07, "loss": 0.3173, "step": 14707 }, { "epoch": 0.25566236159154515, "grad_norm": 1.3973854037936653, "learning_rate": 8.72315968606281e-07, "loss": 0.2911, "step": 14708 }, { "epoch": 0.255679744129048, "grad_norm": 1.325047947380341, "learning_rate": 8.722971790292204e-07, "loss": 0.3626, "step": 14709 }, { "epoch": 0.25569712666655076, "grad_norm": 3.1056252158616537, "learning_rate": 8.722783882721488e-07, "loss": 0.4536, "step": 14710 }, { "epoch": 0.2557145092040536, "grad_norm": 1.8938396088041585, "learning_rate": 8.722595963351256e-07, "loss": 0.3564, "step": 14711 }, { "epoch": 0.2557318917415564, "grad_norm": 1.3353696120710001, "learning_rate": 8.722408032182105e-07, "loss": 0.3678, "step": 14712 }, { "epoch": 0.25574927427905925, "grad_norm": 1.9690496558381145, "learning_rate": 8.722220089214631e-07, "loss": 0.2922, "step": 14713 }, { "epoch": 0.2557666568165621, "grad_norm": 1.8457988874772056, "learning_rate": 8.722032134449428e-07, "loss": 0.3193, "step": 14714 }, { "epoch": 0.2557840393540649, "grad_norm": 1.4230348180170582, "learning_rate": 8.721844167887095e-07, "loss": 0.2696, "step": 14715 }, { "epoch": 0.25580142189156774, "grad_norm": 1.05273373694657, "learning_rate": 8.721656189528224e-07, "loss": 0.2872, "step": 14716 }, { "epoch": 0.25581880442907057, "grad_norm": 1.6563118693758916, "learning_rate": 8.721468199373414e-07, "loss": 0.2195, "step": 14717 }, { "epoch": 0.2558361869665734, "grad_norm": 2.1339233243267723, "learning_rate": 8.721280197423258e-07, "loss": 0.3329, "step": 14718 }, { "epoch": 0.25585356950407623, "grad_norm": 2.073675400757786, "learning_rate": 8.721092183678353e-07, "loss": 0.2873, "step": 14719 }, { "epoch": 0.255870952041579, "grad_norm": 2.3636735154264406, "learning_rate": 8.720904158139296e-07, "loss": 0.3873, "step": 14720 }, { "epoch": 0.25588833457908183, "grad_norm": 1.589425589124683, "learning_rate": 8.720716120806682e-07, "loss": 0.2366, "step": 14721 }, { "epoch": 0.25590571711658466, "grad_norm": 3.2379117211595174, "learning_rate": 8.720528071681106e-07, "loss": 0.4124, "step": 14722 }, { "epoch": 0.2559230996540875, "grad_norm": 1.4569262854140683, "learning_rate": 8.720340010763167e-07, "loss": 0.3207, "step": 14723 }, { "epoch": 0.2559404821915903, "grad_norm": 2.010091419532085, "learning_rate": 8.720151938053457e-07, "loss": 0.2172, "step": 14724 }, { "epoch": 0.25595786472909315, "grad_norm": 1.5502389130929348, "learning_rate": 8.719963853552576e-07, "loss": 0.3903, "step": 14725 }, { "epoch": 0.255975247266596, "grad_norm": 1.2941347321853056, "learning_rate": 8.719775757261117e-07, "loss": 0.2177, "step": 14726 }, { "epoch": 0.2559926298040988, "grad_norm": 2.6219393844127334, "learning_rate": 8.719587649179678e-07, "loss": 0.3322, "step": 14727 }, { "epoch": 0.25601001234160164, "grad_norm": 1.5844259403552046, "learning_rate": 8.719399529308856e-07, "loss": 0.2939, "step": 14728 }, { "epoch": 0.2560273948791045, "grad_norm": 2.152683052415086, "learning_rate": 8.719211397649244e-07, "loss": 0.4526, "step": 14729 }, { "epoch": 0.25604477741660725, "grad_norm": 1.5280010678820155, "learning_rate": 8.719023254201443e-07, "loss": 0.3391, "step": 14730 }, { "epoch": 0.2560621599541101, "grad_norm": 2.2795150054575806, "learning_rate": 8.718835098966045e-07, "loss": 0.3039, "step": 14731 }, { "epoch": 0.2560795424916129, "grad_norm": 1.5371719502838963, "learning_rate": 8.718646931943647e-07, "loss": 0.235, "step": 14732 }, { "epoch": 0.25609692502911574, "grad_norm": 1.0654899692715332, "learning_rate": 8.718458753134847e-07, "loss": 0.2749, "step": 14733 }, { "epoch": 0.25611430756661857, "grad_norm": 4.19558690486878, "learning_rate": 8.718270562540242e-07, "loss": 0.2713, "step": 14734 }, { "epoch": 0.2561316901041214, "grad_norm": 1.7666163395197405, "learning_rate": 8.718082360160427e-07, "loss": 0.3059, "step": 14735 }, { "epoch": 0.25614907264162423, "grad_norm": 1.4551767770856112, "learning_rate": 8.717894145995999e-07, "loss": 0.451, "step": 14736 }, { "epoch": 0.25616645517912706, "grad_norm": 1.7081278202647137, "learning_rate": 8.717705920047555e-07, "loss": 0.2895, "step": 14737 }, { "epoch": 0.2561838377166299, "grad_norm": 3.2508472017307457, "learning_rate": 8.71751768231569e-07, "loss": 0.5073, "step": 14738 }, { "epoch": 0.2562012202541327, "grad_norm": 1.8265971419647935, "learning_rate": 8.717329432801002e-07, "loss": 0.3107, "step": 14739 }, { "epoch": 0.2562186027916355, "grad_norm": 1.916334242577685, "learning_rate": 8.717141171504085e-07, "loss": 0.3514, "step": 14740 }, { "epoch": 0.2562359853291383, "grad_norm": 2.154554356307923, "learning_rate": 8.716952898425541e-07, "loss": 0.2758, "step": 14741 }, { "epoch": 0.25625336786664116, "grad_norm": 1.4420948594151053, "learning_rate": 8.716764613565963e-07, "loss": 0.3703, "step": 14742 }, { "epoch": 0.256270750404144, "grad_norm": 1.5403404287323998, "learning_rate": 8.716576316925948e-07, "loss": 0.1918, "step": 14743 }, { "epoch": 0.2562881329416468, "grad_norm": 2.6617008525134622, "learning_rate": 8.716388008506094e-07, "loss": 0.2505, "step": 14744 }, { "epoch": 0.25630551547914965, "grad_norm": 1.6979387022408723, "learning_rate": 8.716199688306997e-07, "loss": 0.1569, "step": 14745 }, { "epoch": 0.2563228980166525, "grad_norm": 1.3610965618266357, "learning_rate": 8.716011356329254e-07, "loss": 0.2369, "step": 14746 }, { "epoch": 0.2563402805541553, "grad_norm": 1.9042141144991356, "learning_rate": 8.715823012573461e-07, "loss": 0.2786, "step": 14747 }, { "epoch": 0.25635766309165814, "grad_norm": 2.1704719470286573, "learning_rate": 8.715634657040216e-07, "loss": 0.2887, "step": 14748 }, { "epoch": 0.25637504562916097, "grad_norm": 2.439956957783401, "learning_rate": 8.715446289730118e-07, "loss": 0.2696, "step": 14749 }, { "epoch": 0.25639242816666374, "grad_norm": 0.9992354298508962, "learning_rate": 8.715257910643761e-07, "loss": 0.2194, "step": 14750 }, { "epoch": 0.2564098107041666, "grad_norm": 1.7918294608264842, "learning_rate": 8.715069519781741e-07, "loss": 0.2309, "step": 14751 }, { "epoch": 0.2564271932416694, "grad_norm": 1.5853554452694898, "learning_rate": 8.71488111714466e-07, "loss": 0.2963, "step": 14752 }, { "epoch": 0.25644457577917223, "grad_norm": 1.434685183730862, "learning_rate": 8.714692702733111e-07, "loss": 0.2767, "step": 14753 }, { "epoch": 0.25646195831667506, "grad_norm": 1.6591401741714065, "learning_rate": 8.714504276547693e-07, "loss": 0.1984, "step": 14754 }, { "epoch": 0.2564793408541779, "grad_norm": 1.3018060264512297, "learning_rate": 8.714315838589002e-07, "loss": 0.3551, "step": 14755 }, { "epoch": 0.2564967233916807, "grad_norm": 1.8709070970679853, "learning_rate": 8.714127388857637e-07, "loss": 0.1882, "step": 14756 }, { "epoch": 0.25651410592918356, "grad_norm": 1.303152551685495, "learning_rate": 8.713938927354194e-07, "loss": 0.4082, "step": 14757 }, { "epoch": 0.2565314884666864, "grad_norm": 2.2954711623620914, "learning_rate": 8.713750454079272e-07, "loss": 0.3795, "step": 14758 }, { "epoch": 0.2565488710041892, "grad_norm": 1.3831844780312155, "learning_rate": 8.713561969033465e-07, "loss": 0.3134, "step": 14759 }, { "epoch": 0.256566253541692, "grad_norm": 2.615720035360986, "learning_rate": 8.713373472217372e-07, "loss": 0.3202, "step": 14760 }, { "epoch": 0.2565836360791948, "grad_norm": 1.745361490630007, "learning_rate": 8.713184963631592e-07, "loss": 0.2357, "step": 14761 }, { "epoch": 0.25660101861669765, "grad_norm": 1.6304789229549774, "learning_rate": 8.712996443276722e-07, "loss": 0.1956, "step": 14762 }, { "epoch": 0.2566184011542005, "grad_norm": 1.5463129952753458, "learning_rate": 8.712807911153358e-07, "loss": 0.2287, "step": 14763 }, { "epoch": 0.2566357836917033, "grad_norm": 2.414949255660984, "learning_rate": 8.7126193672621e-07, "loss": 0.3634, "step": 14764 }, { "epoch": 0.25665316622920614, "grad_norm": 3.0981347263624013, "learning_rate": 8.712430811603542e-07, "loss": 0.2599, "step": 14765 }, { "epoch": 0.256670548766709, "grad_norm": 2.0786769763178485, "learning_rate": 8.712242244178283e-07, "loss": 0.479, "step": 14766 }, { "epoch": 0.2566879313042118, "grad_norm": 1.4929681414738987, "learning_rate": 8.712053664986923e-07, "loss": 0.246, "step": 14767 }, { "epoch": 0.25670531384171463, "grad_norm": 1.7591316770123342, "learning_rate": 8.711865074030058e-07, "loss": 0.2852, "step": 14768 }, { "epoch": 0.25672269637921746, "grad_norm": 1.674475823388749, "learning_rate": 8.711676471308286e-07, "loss": 0.3072, "step": 14769 }, { "epoch": 0.25674007891672024, "grad_norm": 1.4344470120272488, "learning_rate": 8.711487856822204e-07, "loss": 0.2466, "step": 14770 }, { "epoch": 0.25675746145422307, "grad_norm": 4.129339232933777, "learning_rate": 8.711299230572411e-07, "loss": 0.3652, "step": 14771 }, { "epoch": 0.2567748439917259, "grad_norm": 1.719211824342242, "learning_rate": 8.711110592559503e-07, "loss": 0.3677, "step": 14772 }, { "epoch": 0.25679222652922873, "grad_norm": 1.2304747151931585, "learning_rate": 8.710921942784079e-07, "loss": 0.2207, "step": 14773 }, { "epoch": 0.25680960906673156, "grad_norm": 1.819018075283913, "learning_rate": 8.71073328124674e-07, "loss": 0.1824, "step": 14774 }, { "epoch": 0.2568269916042344, "grad_norm": 1.813449151595761, "learning_rate": 8.710544607948078e-07, "loss": 0.3343, "step": 14775 }, { "epoch": 0.2568443741417372, "grad_norm": 2.3374090775387613, "learning_rate": 8.710355922888696e-07, "loss": 0.294, "step": 14776 }, { "epoch": 0.25686175667924005, "grad_norm": 3.0175129825817875, "learning_rate": 8.710167226069189e-07, "loss": 0.371, "step": 14777 }, { "epoch": 0.2568791392167429, "grad_norm": 1.6979396169994918, "learning_rate": 8.709978517490156e-07, "loss": 0.349, "step": 14778 }, { "epoch": 0.2568965217542457, "grad_norm": 1.6063962842902375, "learning_rate": 8.709789797152196e-07, "loss": 0.4178, "step": 14779 }, { "epoch": 0.2569139042917485, "grad_norm": 1.6295012577729135, "learning_rate": 8.709601065055907e-07, "loss": 0.2736, "step": 14780 }, { "epoch": 0.2569312868292513, "grad_norm": 2.594798719723505, "learning_rate": 8.709412321201885e-07, "loss": 0.3261, "step": 14781 }, { "epoch": 0.25694866936675415, "grad_norm": 1.9695791597254944, "learning_rate": 8.709223565590731e-07, "loss": 0.2308, "step": 14782 }, { "epoch": 0.256966051904257, "grad_norm": 4.504943901528115, "learning_rate": 8.709034798223043e-07, "loss": 0.7234, "step": 14783 }, { "epoch": 0.2569834344417598, "grad_norm": 1.2031487574848432, "learning_rate": 8.708846019099417e-07, "loss": 0.2209, "step": 14784 }, { "epoch": 0.25700081697926264, "grad_norm": 2.0242359113012682, "learning_rate": 8.708657228220453e-07, "loss": 0.3319, "step": 14785 }, { "epoch": 0.25701819951676547, "grad_norm": 1.1035355064165535, "learning_rate": 8.708468425586751e-07, "loss": 0.4187, "step": 14786 }, { "epoch": 0.2570355820542683, "grad_norm": 1.6680223602986715, "learning_rate": 8.708279611198905e-07, "loss": 0.2195, "step": 14787 }, { "epoch": 0.2570529645917711, "grad_norm": 1.9332023958457218, "learning_rate": 8.708090785057518e-07, "loss": 0.3527, "step": 14788 }, { "epoch": 0.25707034712927396, "grad_norm": 1.922356449152829, "learning_rate": 8.707901947163185e-07, "loss": 0.2381, "step": 14789 }, { "epoch": 0.25708772966677673, "grad_norm": 1.7963797168345166, "learning_rate": 8.707713097516508e-07, "loss": 0.2899, "step": 14790 }, { "epoch": 0.25710511220427956, "grad_norm": 2.007735325582474, "learning_rate": 8.707524236118082e-07, "loss": 0.321, "step": 14791 }, { "epoch": 0.2571224947417824, "grad_norm": 1.811866307700911, "learning_rate": 8.707335362968508e-07, "loss": 0.3212, "step": 14792 }, { "epoch": 0.2571398772792852, "grad_norm": 3.143481931354791, "learning_rate": 8.707146478068384e-07, "loss": 0.328, "step": 14793 }, { "epoch": 0.25715725981678805, "grad_norm": 2.1484733667742217, "learning_rate": 8.706957581418308e-07, "loss": 0.282, "step": 14794 }, { "epoch": 0.2571746423542909, "grad_norm": 1.1792344075008654, "learning_rate": 8.706768673018881e-07, "loss": 0.262, "step": 14795 }, { "epoch": 0.2571920248917937, "grad_norm": 1.319095667834195, "learning_rate": 8.706579752870698e-07, "loss": 0.2229, "step": 14796 }, { "epoch": 0.25720940742929654, "grad_norm": 1.8113656840612902, "learning_rate": 8.706390820974361e-07, "loss": 0.2867, "step": 14797 }, { "epoch": 0.2572267899667994, "grad_norm": 1.79152483568258, "learning_rate": 8.706201877330468e-07, "loss": 0.3133, "step": 14798 }, { "epoch": 0.2572441725043022, "grad_norm": 1.7262024232871171, "learning_rate": 8.706012921939617e-07, "loss": 0.3871, "step": 14799 }, { "epoch": 0.257261555041805, "grad_norm": 3.0440534664690873, "learning_rate": 8.705823954802407e-07, "loss": 0.2663, "step": 14800 }, { "epoch": 0.2572789375793078, "grad_norm": 1.783319393227551, "learning_rate": 8.705634975919437e-07, "loss": 0.4199, "step": 14801 }, { "epoch": 0.25729632011681064, "grad_norm": 2.5866247896799828, "learning_rate": 8.705445985291306e-07, "loss": 0.5294, "step": 14802 }, { "epoch": 0.25731370265431347, "grad_norm": 1.609994432554555, "learning_rate": 8.705256982918616e-07, "loss": 0.2572, "step": 14803 }, { "epoch": 0.2573310851918163, "grad_norm": 1.3702954178369544, "learning_rate": 8.705067968801961e-07, "loss": 0.2539, "step": 14804 }, { "epoch": 0.25734846772931913, "grad_norm": 1.5886438663126714, "learning_rate": 8.704878942941943e-07, "loss": 0.3576, "step": 14805 }, { "epoch": 0.25736585026682196, "grad_norm": 0.929987023855183, "learning_rate": 8.704689905339162e-07, "loss": 0.2448, "step": 14806 }, { "epoch": 0.2573832328043248, "grad_norm": 1.157311771281739, "learning_rate": 8.704500855994215e-07, "loss": 0.3321, "step": 14807 }, { "epoch": 0.2574006153418276, "grad_norm": 1.8927643228821187, "learning_rate": 8.704311794907701e-07, "loss": 0.163, "step": 14808 }, { "epoch": 0.25741799787933045, "grad_norm": 1.044008093550626, "learning_rate": 8.704122722080221e-07, "loss": 0.3497, "step": 14809 }, { "epoch": 0.2574353804168332, "grad_norm": 1.7281345599069493, "learning_rate": 8.703933637512373e-07, "loss": 0.3238, "step": 14810 }, { "epoch": 0.25745276295433606, "grad_norm": 1.7946779737386598, "learning_rate": 8.703744541204758e-07, "loss": 0.2567, "step": 14811 }, { "epoch": 0.2574701454918389, "grad_norm": 1.5870625746558447, "learning_rate": 8.703555433157973e-07, "loss": 0.244, "step": 14812 }, { "epoch": 0.2574875280293417, "grad_norm": 2.2129488988036603, "learning_rate": 8.70336631337262e-07, "loss": 0.3158, "step": 14813 }, { "epoch": 0.25750491056684455, "grad_norm": 2.1336068459847444, "learning_rate": 8.703177181849295e-07, "loss": 0.347, "step": 14814 }, { "epoch": 0.2575222931043474, "grad_norm": 1.6226272790261602, "learning_rate": 8.702988038588601e-07, "loss": 0.2863, "step": 14815 }, { "epoch": 0.2575396756418502, "grad_norm": 2.158933206375575, "learning_rate": 8.702798883591135e-07, "loss": 0.4428, "step": 14816 }, { "epoch": 0.25755705817935304, "grad_norm": 1.4739813018788628, "learning_rate": 8.702609716857498e-07, "loss": 0.235, "step": 14817 }, { "epoch": 0.25757444071685587, "grad_norm": 1.8435013559420996, "learning_rate": 8.702420538388288e-07, "loss": 0.5078, "step": 14818 }, { "epoch": 0.2575918232543587, "grad_norm": 2.272628472761876, "learning_rate": 8.702231348184108e-07, "loss": 0.281, "step": 14819 }, { "epoch": 0.2576092057918615, "grad_norm": 1.8050791345478214, "learning_rate": 8.702042146245553e-07, "loss": 0.2375, "step": 14820 }, { "epoch": 0.2576265883293643, "grad_norm": 1.9773699563996934, "learning_rate": 8.701852932573226e-07, "loss": 0.3833, "step": 14821 }, { "epoch": 0.25764397086686713, "grad_norm": 1.4601384240024684, "learning_rate": 8.701663707167725e-07, "loss": 0.2621, "step": 14822 }, { "epoch": 0.25766135340436996, "grad_norm": 2.023877862492863, "learning_rate": 8.70147447002965e-07, "loss": 0.2875, "step": 14823 }, { "epoch": 0.2576787359418728, "grad_norm": 1.7585763078989545, "learning_rate": 8.701285221159604e-07, "loss": 0.3296, "step": 14824 }, { "epoch": 0.2576961184793756, "grad_norm": 1.9272122340903866, "learning_rate": 8.701095960558181e-07, "loss": 0.3161, "step": 14825 }, { "epoch": 0.25771350101687845, "grad_norm": 2.8227684465827347, "learning_rate": 8.700906688225987e-07, "loss": 0.3129, "step": 14826 }, { "epoch": 0.2577308835543813, "grad_norm": 1.1188418167626706, "learning_rate": 8.700717404163616e-07, "loss": 0.3423, "step": 14827 }, { "epoch": 0.2577482660918841, "grad_norm": 2.4812166627125545, "learning_rate": 8.700528108371672e-07, "loss": 0.4101, "step": 14828 }, { "epoch": 0.25776564862938695, "grad_norm": 1.2582350158178748, "learning_rate": 8.700338800850753e-07, "loss": 0.3754, "step": 14829 }, { "epoch": 0.2577830311668897, "grad_norm": 1.6419440892186394, "learning_rate": 8.700149481601461e-07, "loss": 0.3443, "step": 14830 }, { "epoch": 0.25780041370439255, "grad_norm": 1.228603229244853, "learning_rate": 8.699960150624394e-07, "loss": 0.2528, "step": 14831 }, { "epoch": 0.2578177962418954, "grad_norm": 1.5428819270067708, "learning_rate": 8.699770807920154e-07, "loss": 0.3541, "step": 14832 }, { "epoch": 0.2578351787793982, "grad_norm": 2.7105377381360634, "learning_rate": 8.699581453489339e-07, "loss": 0.4284, "step": 14833 }, { "epoch": 0.25785256131690104, "grad_norm": 1.7575921086281854, "learning_rate": 8.699392087332551e-07, "loss": 0.3544, "step": 14834 }, { "epoch": 0.25786994385440387, "grad_norm": 2.149929287289761, "learning_rate": 8.699202709450389e-07, "loss": 0.3915, "step": 14835 }, { "epoch": 0.2578873263919067, "grad_norm": 1.8051556214629612, "learning_rate": 8.699013319843453e-07, "loss": 0.3197, "step": 14836 }, { "epoch": 0.25790470892940953, "grad_norm": 1.8288771068260965, "learning_rate": 8.698823918512343e-07, "loss": 0.324, "step": 14837 }, { "epoch": 0.25792209146691236, "grad_norm": 1.40755745420226, "learning_rate": 8.698634505457663e-07, "loss": 0.2644, "step": 14838 }, { "epoch": 0.25793947400441514, "grad_norm": 1.3192708713418486, "learning_rate": 8.69844508068001e-07, "loss": 0.3275, "step": 14839 }, { "epoch": 0.25795685654191797, "grad_norm": 1.5312198716736458, "learning_rate": 8.698255644179984e-07, "loss": 0.4509, "step": 14840 }, { "epoch": 0.2579742390794208, "grad_norm": 1.7763504298944666, "learning_rate": 8.698066195958186e-07, "loss": 0.4703, "step": 14841 }, { "epoch": 0.2579916216169236, "grad_norm": 2.1569198237092526, "learning_rate": 8.697876736015219e-07, "loss": 0.486, "step": 14842 }, { "epoch": 0.25800900415442646, "grad_norm": 1.1689676310692367, "learning_rate": 8.697687264351681e-07, "loss": 0.2518, "step": 14843 }, { "epoch": 0.2580263866919293, "grad_norm": 1.78241030683595, "learning_rate": 8.697497780968172e-07, "loss": 0.3417, "step": 14844 }, { "epoch": 0.2580437692294321, "grad_norm": 2.082714510052601, "learning_rate": 8.697308285865294e-07, "loss": 0.3704, "step": 14845 }, { "epoch": 0.25806115176693495, "grad_norm": 2.06563742378641, "learning_rate": 8.697118779043646e-07, "loss": 0.3103, "step": 14846 }, { "epoch": 0.2580785343044378, "grad_norm": 2.9192817927389356, "learning_rate": 8.696929260503832e-07, "loss": 0.3453, "step": 14847 }, { "epoch": 0.2580959168419406, "grad_norm": 1.4536302137776957, "learning_rate": 8.69673973024645e-07, "loss": 0.4048, "step": 14848 }, { "epoch": 0.2581132993794434, "grad_norm": 1.6322619073099982, "learning_rate": 8.696550188272099e-07, "loss": 0.5192, "step": 14849 }, { "epoch": 0.2581306819169462, "grad_norm": 1.9539482918995517, "learning_rate": 8.696360634581384e-07, "loss": 0.3579, "step": 14850 }, { "epoch": 0.25814806445444904, "grad_norm": 1.918637781078256, "learning_rate": 8.696171069174902e-07, "loss": 0.2125, "step": 14851 }, { "epoch": 0.2581654469919519, "grad_norm": 1.4043112393924393, "learning_rate": 8.695981492053257e-07, "loss": 0.312, "step": 14852 }, { "epoch": 0.2581828295294547, "grad_norm": 1.1928614197912109, "learning_rate": 8.695791903217049e-07, "loss": 0.2207, "step": 14853 }, { "epoch": 0.25820021206695754, "grad_norm": 1.4793748755366707, "learning_rate": 8.695602302666877e-07, "loss": 0.3714, "step": 14854 }, { "epoch": 0.25821759460446037, "grad_norm": 1.9916728498068388, "learning_rate": 8.695412690403344e-07, "loss": 0.4424, "step": 14855 }, { "epoch": 0.2582349771419632, "grad_norm": 2.1686504526131203, "learning_rate": 8.695223066427051e-07, "loss": 0.3694, "step": 14856 }, { "epoch": 0.258252359679466, "grad_norm": 1.1246477044492567, "learning_rate": 8.695033430738596e-07, "loss": 0.2086, "step": 14857 }, { "epoch": 0.25826974221696886, "grad_norm": 2.001853765092672, "learning_rate": 8.694843783338584e-07, "loss": 0.3809, "step": 14858 }, { "epoch": 0.25828712475447163, "grad_norm": 1.3313410047409027, "learning_rate": 8.694654124227616e-07, "loss": 0.2984, "step": 14859 }, { "epoch": 0.25830450729197446, "grad_norm": 2.772737286205394, "learning_rate": 8.694464453406289e-07, "loss": 0.385, "step": 14860 }, { "epoch": 0.2583218898294773, "grad_norm": 1.9371715162480971, "learning_rate": 8.694274770875208e-07, "loss": 0.3619, "step": 14861 }, { "epoch": 0.2583392723669801, "grad_norm": 1.2898719483825265, "learning_rate": 8.694085076634973e-07, "loss": 0.3028, "step": 14862 }, { "epoch": 0.25835665490448295, "grad_norm": 1.254368016741928, "learning_rate": 8.693895370686184e-07, "loss": 0.2526, "step": 14863 }, { "epoch": 0.2583740374419858, "grad_norm": 1.2242433434341295, "learning_rate": 8.693705653029445e-07, "loss": 0.1646, "step": 14864 }, { "epoch": 0.2583914199794886, "grad_norm": 1.3094008607954208, "learning_rate": 8.693515923665356e-07, "loss": 0.2248, "step": 14865 }, { "epoch": 0.25840880251699144, "grad_norm": 1.6881865917585406, "learning_rate": 8.693326182594517e-07, "loss": 0.4995, "step": 14866 }, { "epoch": 0.2584261850544943, "grad_norm": 2.2472375518387495, "learning_rate": 8.693136429817531e-07, "loss": 0.4429, "step": 14867 }, { "epoch": 0.2584435675919971, "grad_norm": 1.968458592372118, "learning_rate": 8.692946665334999e-07, "loss": 0.3746, "step": 14868 }, { "epoch": 0.2584609501294999, "grad_norm": 1.4337277326077649, "learning_rate": 8.692756889147523e-07, "loss": 0.3644, "step": 14869 }, { "epoch": 0.2584783326670027, "grad_norm": 1.4041744457562721, "learning_rate": 8.692567101255704e-07, "loss": 0.2404, "step": 14870 }, { "epoch": 0.25849571520450554, "grad_norm": 1.4879532880133888, "learning_rate": 8.692377301660142e-07, "loss": 0.2655, "step": 14871 }, { "epoch": 0.25851309774200837, "grad_norm": 1.4437732213710852, "learning_rate": 8.692187490361441e-07, "loss": 0.2981, "step": 14872 }, { "epoch": 0.2585304802795112, "grad_norm": 2.2401941613591183, "learning_rate": 8.691997667360203e-07, "loss": 0.3536, "step": 14873 }, { "epoch": 0.25854786281701403, "grad_norm": 1.625888763953379, "learning_rate": 8.691807832657026e-07, "loss": 0.5479, "step": 14874 }, { "epoch": 0.25856524535451686, "grad_norm": 1.9859772390317856, "learning_rate": 8.691617986252516e-07, "loss": 0.5424, "step": 14875 }, { "epoch": 0.2585826278920197, "grad_norm": 1.7744721493549447, "learning_rate": 8.691428128147272e-07, "loss": 0.3243, "step": 14876 }, { "epoch": 0.2586000104295225, "grad_norm": 1.401397082235429, "learning_rate": 8.691238258341896e-07, "loss": 0.4487, "step": 14877 }, { "epoch": 0.25861739296702535, "grad_norm": 2.1722695104300853, "learning_rate": 8.691048376836991e-07, "loss": 0.3779, "step": 14878 }, { "epoch": 0.2586347755045281, "grad_norm": 1.686489216813169, "learning_rate": 8.690858483633159e-07, "loss": 0.3164, "step": 14879 }, { "epoch": 0.25865215804203096, "grad_norm": 1.2651476193329851, "learning_rate": 8.690668578731e-07, "loss": 0.275, "step": 14880 }, { "epoch": 0.2586695405795338, "grad_norm": 0.9445412393999747, "learning_rate": 8.690478662131116e-07, "loss": 0.2151, "step": 14881 }, { "epoch": 0.2586869231170366, "grad_norm": 1.8239474892150833, "learning_rate": 8.690288733834112e-07, "loss": 0.4267, "step": 14882 }, { "epoch": 0.25870430565453945, "grad_norm": 1.5170469391347405, "learning_rate": 8.690098793840587e-07, "loss": 0.3385, "step": 14883 }, { "epoch": 0.2587216881920423, "grad_norm": 1.785592275205064, "learning_rate": 8.689908842151143e-07, "loss": 0.3168, "step": 14884 }, { "epoch": 0.2587390707295451, "grad_norm": 2.2945075180697043, "learning_rate": 8.689718878766385e-07, "loss": 0.3683, "step": 14885 }, { "epoch": 0.25875645326704794, "grad_norm": 2.0636777740873957, "learning_rate": 8.689528903686912e-07, "loss": 0.6924, "step": 14886 }, { "epoch": 0.25877383580455077, "grad_norm": 1.5150940807543392, "learning_rate": 8.689338916913327e-07, "loss": 0.2314, "step": 14887 }, { "epoch": 0.2587912183420536, "grad_norm": 2.016738405785266, "learning_rate": 8.689148918446232e-07, "loss": 0.3705, "step": 14888 }, { "epoch": 0.25880860087955637, "grad_norm": 1.9048394412194958, "learning_rate": 8.68895890828623e-07, "loss": 0.3373, "step": 14889 }, { "epoch": 0.2588259834170592, "grad_norm": 3.451304607025221, "learning_rate": 8.688768886433922e-07, "loss": 0.3772, "step": 14890 }, { "epoch": 0.25884336595456203, "grad_norm": 2.826271835902315, "learning_rate": 8.688578852889913e-07, "loss": 0.5156, "step": 14891 }, { "epoch": 0.25886074849206486, "grad_norm": 2.768915845328134, "learning_rate": 8.688388807654802e-07, "loss": 0.3702, "step": 14892 }, { "epoch": 0.2588781310295677, "grad_norm": 2.4346387903671243, "learning_rate": 8.688198750729193e-07, "loss": 0.5629, "step": 14893 }, { "epoch": 0.2588955135670705, "grad_norm": 1.9285940618395994, "learning_rate": 8.688008682113687e-07, "loss": 0.3365, "step": 14894 }, { "epoch": 0.25891289610457335, "grad_norm": 2.4057154164505716, "learning_rate": 8.687818601808891e-07, "loss": 0.2435, "step": 14895 }, { "epoch": 0.2589302786420762, "grad_norm": 2.1613657961085986, "learning_rate": 8.687628509815402e-07, "loss": 0.2215, "step": 14896 }, { "epoch": 0.258947661179579, "grad_norm": 2.4020238785779164, "learning_rate": 8.687438406133824e-07, "loss": 0.6124, "step": 14897 }, { "epoch": 0.25896504371708184, "grad_norm": 1.4544470316328038, "learning_rate": 8.687248290764762e-07, "loss": 0.5381, "step": 14898 }, { "epoch": 0.2589824262545846, "grad_norm": 2.2251400738272236, "learning_rate": 8.687058163708814e-07, "loss": 0.507, "step": 14899 }, { "epoch": 0.25899980879208745, "grad_norm": 5.077477385026344, "learning_rate": 8.686868024966587e-07, "loss": 0.4095, "step": 14900 }, { "epoch": 0.2590171913295903, "grad_norm": 2.192455078254991, "learning_rate": 8.686677874538682e-07, "loss": 0.545, "step": 14901 }, { "epoch": 0.2590345738670931, "grad_norm": 1.5613839498119713, "learning_rate": 8.6864877124257e-07, "loss": 0.3293, "step": 14902 }, { "epoch": 0.25905195640459594, "grad_norm": 2.2926177916207817, "learning_rate": 8.686297538628249e-07, "loss": 0.5768, "step": 14903 }, { "epoch": 0.25906933894209877, "grad_norm": 1.958601835117611, "learning_rate": 8.686107353146926e-07, "loss": 0.4127, "step": 14904 }, { "epoch": 0.2590867214796016, "grad_norm": 1.681842998153229, "learning_rate": 8.685917155982335e-07, "loss": 0.1486, "step": 14905 }, { "epoch": 0.25910410401710443, "grad_norm": 2.4208016616253807, "learning_rate": 8.685726947135081e-07, "loss": 0.3896, "step": 14906 }, { "epoch": 0.25912148655460726, "grad_norm": 1.9529021463349931, "learning_rate": 8.685536726605764e-07, "loss": 0.5231, "step": 14907 }, { "epoch": 0.2591388690921101, "grad_norm": 2.895966943329497, "learning_rate": 8.685346494394992e-07, "loss": 0.1947, "step": 14908 }, { "epoch": 0.25915625162961287, "grad_norm": 1.3848355928897527, "learning_rate": 8.685156250503362e-07, "loss": 0.2931, "step": 14909 }, { "epoch": 0.2591736341671157, "grad_norm": 1.4899655101143146, "learning_rate": 8.68496599493148e-07, "loss": 0.4803, "step": 14910 }, { "epoch": 0.2591910167046185, "grad_norm": 1.721489336703808, "learning_rate": 8.684775727679948e-07, "loss": 0.2521, "step": 14911 }, { "epoch": 0.25920839924212136, "grad_norm": 1.734431837574398, "learning_rate": 8.68458544874937e-07, "loss": 0.2476, "step": 14912 }, { "epoch": 0.2592257817796242, "grad_norm": 1.4863540873837309, "learning_rate": 8.68439515814035e-07, "loss": 0.3916, "step": 14913 }, { "epoch": 0.259243164317127, "grad_norm": 1.8362477658949494, "learning_rate": 8.684204855853488e-07, "loss": 0.3198, "step": 14914 }, { "epoch": 0.25926054685462985, "grad_norm": 1.7116959589977594, "learning_rate": 8.684014541889389e-07, "loss": 0.3573, "step": 14915 }, { "epoch": 0.2592779293921327, "grad_norm": 1.429044705399362, "learning_rate": 8.683824216248656e-07, "loss": 0.1921, "step": 14916 }, { "epoch": 0.2592953119296355, "grad_norm": 2.1644845370927626, "learning_rate": 8.683633878931894e-07, "loss": 0.5618, "step": 14917 }, { "epoch": 0.25931269446713834, "grad_norm": 1.375069071085009, "learning_rate": 8.683443529939705e-07, "loss": 0.2432, "step": 14918 }, { "epoch": 0.2593300770046411, "grad_norm": 2.184800620816052, "learning_rate": 8.68325316927269e-07, "loss": 0.3199, "step": 14919 }, { "epoch": 0.25934745954214394, "grad_norm": 1.4600096189019087, "learning_rate": 8.683062796931455e-07, "loss": 0.4149, "step": 14920 }, { "epoch": 0.2593648420796468, "grad_norm": 2.7392644929529912, "learning_rate": 8.682872412916604e-07, "loss": 0.4679, "step": 14921 }, { "epoch": 0.2593822246171496, "grad_norm": 1.6884625253877414, "learning_rate": 8.682682017228738e-07, "loss": 0.2507, "step": 14922 }, { "epoch": 0.25939960715465243, "grad_norm": 1.7018829366977979, "learning_rate": 8.682491609868461e-07, "loss": 0.4339, "step": 14923 }, { "epoch": 0.25941698969215526, "grad_norm": 4.804085688674288, "learning_rate": 8.682301190836378e-07, "loss": 0.4262, "step": 14924 }, { "epoch": 0.2594343722296581, "grad_norm": 4.83353121978088, "learning_rate": 8.682110760133094e-07, "loss": 0.3396, "step": 14925 }, { "epoch": 0.2594517547671609, "grad_norm": 1.332601060360152, "learning_rate": 8.681920317759208e-07, "loss": 0.2205, "step": 14926 }, { "epoch": 0.25946913730466376, "grad_norm": 2.3259306816036145, "learning_rate": 8.681729863715325e-07, "loss": 0.3483, "step": 14927 }, { "epoch": 0.2594865198421666, "grad_norm": 1.7878953219180878, "learning_rate": 8.681539398002052e-07, "loss": 0.4465, "step": 14928 }, { "epoch": 0.25950390237966936, "grad_norm": 2.4392703072102138, "learning_rate": 8.681348920619988e-07, "loss": 0.4927, "step": 14929 }, { "epoch": 0.2595212849171722, "grad_norm": 4.0206552019838995, "learning_rate": 8.681158431569741e-07, "loss": 0.5212, "step": 14930 }, { "epoch": 0.259538667454675, "grad_norm": 1.728636139380755, "learning_rate": 8.680967930851912e-07, "loss": 0.474, "step": 14931 }, { "epoch": 0.25955604999217785, "grad_norm": 1.168689546020446, "learning_rate": 8.680777418467106e-07, "loss": 0.2109, "step": 14932 }, { "epoch": 0.2595734325296807, "grad_norm": 1.2213879947041373, "learning_rate": 8.680586894415925e-07, "loss": 0.354, "step": 14933 }, { "epoch": 0.2595908150671835, "grad_norm": 1.7750811617263542, "learning_rate": 8.680396358698976e-07, "loss": 0.4604, "step": 14934 }, { "epoch": 0.25960819760468634, "grad_norm": 1.9605500520204482, "learning_rate": 8.68020581131686e-07, "loss": 0.3908, "step": 14935 }, { "epoch": 0.25962558014218917, "grad_norm": 8.62079680569622, "learning_rate": 8.680015252270183e-07, "loss": 0.3342, "step": 14936 }, { "epoch": 0.259642962679692, "grad_norm": 3.3418000857121744, "learning_rate": 8.679824681559547e-07, "loss": 0.2716, "step": 14937 }, { "epoch": 0.25966034521719483, "grad_norm": 2.8591169117914466, "learning_rate": 8.679634099185558e-07, "loss": 0.4029, "step": 14938 }, { "epoch": 0.2596777277546976, "grad_norm": 1.8887590833862382, "learning_rate": 8.679443505148818e-07, "loss": 0.3089, "step": 14939 }, { "epoch": 0.25969511029220044, "grad_norm": 1.81527826070546, "learning_rate": 8.679252899449934e-07, "loss": 0.4733, "step": 14940 }, { "epoch": 0.25971249282970327, "grad_norm": 2.528811330575545, "learning_rate": 8.679062282089507e-07, "loss": 0.2829, "step": 14941 }, { "epoch": 0.2597298753672061, "grad_norm": 1.5465237400513807, "learning_rate": 8.678871653068143e-07, "loss": 0.4592, "step": 14942 }, { "epoch": 0.25974725790470893, "grad_norm": 1.9221116898797685, "learning_rate": 8.678681012386446e-07, "loss": 0.1596, "step": 14943 }, { "epoch": 0.25976464044221176, "grad_norm": 1.9128381163034236, "learning_rate": 8.67849036004502e-07, "loss": 0.3432, "step": 14944 }, { "epoch": 0.2597820229797146, "grad_norm": 1.5436347268776478, "learning_rate": 8.678299696044469e-07, "loss": 0.2404, "step": 14945 }, { "epoch": 0.2597994055172174, "grad_norm": 1.7435099985981999, "learning_rate": 8.678109020385398e-07, "loss": 0.2755, "step": 14946 }, { "epoch": 0.25981678805472025, "grad_norm": 1.4809752204265796, "learning_rate": 8.677918333068412e-07, "loss": 0.4102, "step": 14947 }, { "epoch": 0.2598341705922231, "grad_norm": 1.9627325607771013, "learning_rate": 8.677727634094112e-07, "loss": 0.4699, "step": 14948 }, { "epoch": 0.25985155312972585, "grad_norm": 1.4501540965624051, "learning_rate": 8.677536923463107e-07, "loss": 0.3118, "step": 14949 }, { "epoch": 0.2598689356672287, "grad_norm": 1.7485472251369518, "learning_rate": 8.677346201175998e-07, "loss": 0.4164, "step": 14950 }, { "epoch": 0.2598863182047315, "grad_norm": 1.8196492290505983, "learning_rate": 8.677155467233392e-07, "loss": 0.3336, "step": 14951 }, { "epoch": 0.25990370074223434, "grad_norm": 2.1445874919111563, "learning_rate": 8.676964721635891e-07, "loss": 0.3763, "step": 14952 }, { "epoch": 0.2599210832797372, "grad_norm": 2.707507333028153, "learning_rate": 8.676773964384101e-07, "loss": 0.2901, "step": 14953 }, { "epoch": 0.25993846581724, "grad_norm": 1.7925089682642168, "learning_rate": 8.676583195478626e-07, "loss": 0.3316, "step": 14954 }, { "epoch": 0.25995584835474284, "grad_norm": 0.8168428962578569, "learning_rate": 8.676392414920072e-07, "loss": 0.2199, "step": 14955 }, { "epoch": 0.25997323089224567, "grad_norm": 1.9688324120818101, "learning_rate": 8.676201622709043e-07, "loss": 0.2593, "step": 14956 }, { "epoch": 0.2599906134297485, "grad_norm": 1.9707951580089462, "learning_rate": 8.676010818846145e-07, "loss": 0.3571, "step": 14957 }, { "epoch": 0.2600079959672513, "grad_norm": 1.3751002841591211, "learning_rate": 8.67582000333198e-07, "loss": 0.3585, "step": 14958 }, { "epoch": 0.2600253785047541, "grad_norm": 3.1831567132034966, "learning_rate": 8.675629176167152e-07, "loss": 0.3439, "step": 14959 }, { "epoch": 0.26004276104225693, "grad_norm": 1.743866131915298, "learning_rate": 8.67543833735227e-07, "loss": 0.2922, "step": 14960 }, { "epoch": 0.26006014357975976, "grad_norm": 2.2288393501651123, "learning_rate": 8.675247486887938e-07, "loss": 0.3432, "step": 14961 }, { "epoch": 0.2600775261172626, "grad_norm": 2.243472610162608, "learning_rate": 8.675056624774757e-07, "loss": 0.3669, "step": 14962 }, { "epoch": 0.2600949086547654, "grad_norm": 1.9459883683592434, "learning_rate": 8.674865751013336e-07, "loss": 0.371, "step": 14963 }, { "epoch": 0.26011229119226825, "grad_norm": 2.0781574215138092, "learning_rate": 8.674674865604279e-07, "loss": 0.3407, "step": 14964 }, { "epoch": 0.2601296737297711, "grad_norm": 2.03214758857851, "learning_rate": 8.67448396854819e-07, "loss": 0.2547, "step": 14965 }, { "epoch": 0.2601470562672739, "grad_norm": 1.804540121837962, "learning_rate": 8.674293059845676e-07, "loss": 0.2972, "step": 14966 }, { "epoch": 0.26016443880477674, "grad_norm": 1.9010955600990378, "learning_rate": 8.67410213949734e-07, "loss": 0.3855, "step": 14967 }, { "epoch": 0.2601818213422796, "grad_norm": 1.364584421210166, "learning_rate": 8.673911207503788e-07, "loss": 0.3492, "step": 14968 }, { "epoch": 0.26019920387978235, "grad_norm": 2.0764648902092886, "learning_rate": 8.673720263865624e-07, "loss": 0.3258, "step": 14969 }, { "epoch": 0.2602165864172852, "grad_norm": 2.7600725389555083, "learning_rate": 8.673529308583456e-07, "loss": 0.4846, "step": 14970 }, { "epoch": 0.260233968954788, "grad_norm": 1.656826631241086, "learning_rate": 8.673338341657888e-07, "loss": 0.2506, "step": 14971 }, { "epoch": 0.26025135149229084, "grad_norm": 1.801140596125539, "learning_rate": 8.673147363089523e-07, "loss": 0.2105, "step": 14972 }, { "epoch": 0.26026873402979367, "grad_norm": 1.2254168627207873, "learning_rate": 8.67295637287897e-07, "loss": 0.2105, "step": 14973 }, { "epoch": 0.2602861165672965, "grad_norm": 1.2897030982791036, "learning_rate": 8.672765371026831e-07, "loss": 0.2498, "step": 14974 }, { "epoch": 0.26030349910479933, "grad_norm": 1.9363838264799398, "learning_rate": 8.672574357533713e-07, "loss": 0.2917, "step": 14975 }, { "epoch": 0.26032088164230216, "grad_norm": 2.385608194078101, "learning_rate": 8.672383332400222e-07, "loss": 0.4214, "step": 14976 }, { "epoch": 0.260338264179805, "grad_norm": 2.020979373834839, "learning_rate": 8.672192295626963e-07, "loss": 0.3652, "step": 14977 }, { "epoch": 0.26035564671730776, "grad_norm": 1.8427862633792658, "learning_rate": 8.67200124721454e-07, "loss": 0.3015, "step": 14978 }, { "epoch": 0.2603730292548106, "grad_norm": 1.7403220926962168, "learning_rate": 8.671810187163562e-07, "loss": 0.2599, "step": 14979 }, { "epoch": 0.2603904117923134, "grad_norm": 1.2911386857904907, "learning_rate": 8.671619115474631e-07, "loss": 0.3344, "step": 14980 }, { "epoch": 0.26040779432981626, "grad_norm": 1.6087347923118926, "learning_rate": 8.671428032148354e-07, "loss": 0.3695, "step": 14981 }, { "epoch": 0.2604251768673191, "grad_norm": 1.7942829414640222, "learning_rate": 8.671236937185336e-07, "loss": 0.3801, "step": 14982 }, { "epoch": 0.2604425594048219, "grad_norm": 1.6909455306632633, "learning_rate": 8.671045830586184e-07, "loss": 0.3615, "step": 14983 }, { "epoch": 0.26045994194232475, "grad_norm": 2.376337836635525, "learning_rate": 8.670854712351503e-07, "loss": 0.3622, "step": 14984 }, { "epoch": 0.2604773244798276, "grad_norm": 2.1171378831167567, "learning_rate": 8.670663582481898e-07, "loss": 0.2997, "step": 14985 }, { "epoch": 0.2604947070173304, "grad_norm": 1.1520105163483347, "learning_rate": 8.670472440977979e-07, "loss": 0.1874, "step": 14986 }, { "epoch": 0.26051208955483324, "grad_norm": 1.164570622679657, "learning_rate": 8.670281287840344e-07, "loss": 0.2957, "step": 14987 }, { "epoch": 0.260529472092336, "grad_norm": 1.7204939487336677, "learning_rate": 8.670090123069607e-07, "loss": 0.1445, "step": 14988 }, { "epoch": 0.26054685462983884, "grad_norm": 1.4617359304202542, "learning_rate": 8.669898946666367e-07, "loss": 0.2916, "step": 14989 }, { "epoch": 0.2605642371673417, "grad_norm": 1.4422792382815346, "learning_rate": 8.669707758631235e-07, "loss": 0.2343, "step": 14990 }, { "epoch": 0.2605816197048445, "grad_norm": 2.1615266632786825, "learning_rate": 8.669516558964816e-07, "loss": 0.4707, "step": 14991 }, { "epoch": 0.26059900224234733, "grad_norm": 1.381509641865935, "learning_rate": 8.669325347667713e-07, "loss": 0.3412, "step": 14992 }, { "epoch": 0.26061638477985016, "grad_norm": 4.52332556477459, "learning_rate": 8.669134124740536e-07, "loss": 0.3432, "step": 14993 }, { "epoch": 0.260633767317353, "grad_norm": 2.0221723474873996, "learning_rate": 8.668942890183888e-07, "loss": 0.2609, "step": 14994 }, { "epoch": 0.2606511498548558, "grad_norm": 1.3872542140659339, "learning_rate": 8.668751643998377e-07, "loss": 0.2221, "step": 14995 }, { "epoch": 0.26066853239235865, "grad_norm": 1.4318680819558003, "learning_rate": 8.668560386184608e-07, "loss": 0.2774, "step": 14996 }, { "epoch": 0.2606859149298615, "grad_norm": 1.8290176546404062, "learning_rate": 8.668369116743189e-07, "loss": 0.3016, "step": 14997 }, { "epoch": 0.26070329746736426, "grad_norm": 2.084217934595876, "learning_rate": 8.668177835674724e-07, "loss": 0.3274, "step": 14998 }, { "epoch": 0.2607206800048671, "grad_norm": 1.724101427053244, "learning_rate": 8.66798654297982e-07, "loss": 0.4986, "step": 14999 }, { "epoch": 0.2607380625423699, "grad_norm": 2.2049366866248743, "learning_rate": 8.667795238659084e-07, "loss": 0.2767, "step": 15000 }, { "epoch": 0.26075544507987275, "grad_norm": 1.645988484274657, "learning_rate": 8.667603922713123e-07, "loss": 0.1976, "step": 15001 }, { "epoch": 0.2607728276173756, "grad_norm": 1.6424507861174409, "learning_rate": 8.66741259514254e-07, "loss": 0.3179, "step": 15002 }, { "epoch": 0.2607902101548784, "grad_norm": 2.6074538347616754, "learning_rate": 8.667221255947945e-07, "loss": 0.3425, "step": 15003 }, { "epoch": 0.26080759269238124, "grad_norm": 1.7703855811036584, "learning_rate": 8.667029905129943e-07, "loss": 0.3335, "step": 15004 }, { "epoch": 0.26082497522988407, "grad_norm": 1.2502650614296695, "learning_rate": 8.666838542689141e-07, "loss": 0.2633, "step": 15005 }, { "epoch": 0.2608423577673869, "grad_norm": 1.4734986482013022, "learning_rate": 8.666647168626147e-07, "loss": 0.2785, "step": 15006 }, { "epoch": 0.26085974030488973, "grad_norm": 1.3653633235988243, "learning_rate": 8.666455782941563e-07, "loss": 0.2295, "step": 15007 }, { "epoch": 0.2608771228423925, "grad_norm": 1.8139875479901912, "learning_rate": 8.666264385635999e-07, "loss": 0.371, "step": 15008 }, { "epoch": 0.26089450537989534, "grad_norm": 1.322916756278831, "learning_rate": 8.666072976710062e-07, "loss": 0.2993, "step": 15009 }, { "epoch": 0.26091188791739817, "grad_norm": 1.856669432518842, "learning_rate": 8.665881556164356e-07, "loss": 0.3059, "step": 15010 }, { "epoch": 0.260929270454901, "grad_norm": 1.724378057705645, "learning_rate": 8.665690123999492e-07, "loss": 0.3592, "step": 15011 }, { "epoch": 0.2609466529924038, "grad_norm": 1.870749807219213, "learning_rate": 8.665498680216072e-07, "loss": 0.3032, "step": 15012 }, { "epoch": 0.26096403552990666, "grad_norm": 1.4589159987352074, "learning_rate": 8.665307224814706e-07, "loss": 0.2871, "step": 15013 }, { "epoch": 0.2609814180674095, "grad_norm": 1.5133681675252437, "learning_rate": 8.665115757796e-07, "loss": 0.2656, "step": 15014 }, { "epoch": 0.2609988006049123, "grad_norm": 1.8628243061114331, "learning_rate": 8.664924279160559e-07, "loss": 0.303, "step": 15015 }, { "epoch": 0.26101618314241515, "grad_norm": 1.6382184576291636, "learning_rate": 8.664732788908992e-07, "loss": 0.2367, "step": 15016 }, { "epoch": 0.261033565679918, "grad_norm": 1.8767393605161649, "learning_rate": 8.664541287041908e-07, "loss": 0.304, "step": 15017 }, { "epoch": 0.26105094821742075, "grad_norm": 1.8661091932778577, "learning_rate": 8.664349773559909e-07, "loss": 0.3377, "step": 15018 }, { "epoch": 0.2610683307549236, "grad_norm": 2.5101495458864975, "learning_rate": 8.664158248463606e-07, "loss": 0.2462, "step": 15019 }, { "epoch": 0.2610857132924264, "grad_norm": 1.4133762891591335, "learning_rate": 8.663966711753604e-07, "loss": 0.2896, "step": 15020 }, { "epoch": 0.26110309582992924, "grad_norm": 1.6742177586715934, "learning_rate": 8.66377516343051e-07, "loss": 0.2411, "step": 15021 }, { "epoch": 0.2611204783674321, "grad_norm": 1.1779780572041483, "learning_rate": 8.663583603494933e-07, "loss": 0.3241, "step": 15022 }, { "epoch": 0.2611378609049349, "grad_norm": 1.9249277518684675, "learning_rate": 8.663392031947478e-07, "loss": 0.502, "step": 15023 }, { "epoch": 0.26115524344243773, "grad_norm": 1.8893471615560418, "learning_rate": 8.663200448788752e-07, "loss": 0.2842, "step": 15024 }, { "epoch": 0.26117262597994056, "grad_norm": 1.7917371082256708, "learning_rate": 8.663008854019365e-07, "loss": 0.2184, "step": 15025 }, { "epoch": 0.2611900085174434, "grad_norm": 2.0342612793546935, "learning_rate": 8.662817247639922e-07, "loss": 0.2345, "step": 15026 }, { "epoch": 0.2612073910549462, "grad_norm": 1.6032527502268725, "learning_rate": 8.66262562965103e-07, "loss": 0.3694, "step": 15027 }, { "epoch": 0.261224773592449, "grad_norm": 1.7161454505864553, "learning_rate": 8.662434000053299e-07, "loss": 0.2339, "step": 15028 }, { "epoch": 0.26124215612995183, "grad_norm": 1.5737594796114505, "learning_rate": 8.662242358847333e-07, "loss": 0.2407, "step": 15029 }, { "epoch": 0.26125953866745466, "grad_norm": 2.783099796238185, "learning_rate": 8.662050706033742e-07, "loss": 0.2144, "step": 15030 }, { "epoch": 0.2612769212049575, "grad_norm": 2.848681176060789, "learning_rate": 8.661859041613134e-07, "loss": 0.3591, "step": 15031 }, { "epoch": 0.2612943037424603, "grad_norm": 1.5859735616525787, "learning_rate": 8.661667365586111e-07, "loss": 0.39, "step": 15032 }, { "epoch": 0.26131168627996315, "grad_norm": 1.6576285379791411, "learning_rate": 8.661475677953287e-07, "loss": 0.2946, "step": 15033 }, { "epoch": 0.261329068817466, "grad_norm": 2.1068511661007125, "learning_rate": 8.661283978715266e-07, "loss": 0.2438, "step": 15034 }, { "epoch": 0.2613464513549688, "grad_norm": 2.054583682467711, "learning_rate": 8.661092267872658e-07, "loss": 0.3873, "step": 15035 }, { "epoch": 0.26136383389247164, "grad_norm": 2.059397467542361, "learning_rate": 8.660900545426069e-07, "loss": 0.3801, "step": 15036 }, { "epoch": 0.2613812164299745, "grad_norm": 1.7551637475729402, "learning_rate": 8.660708811376107e-07, "loss": 0.2941, "step": 15037 }, { "epoch": 0.26139859896747725, "grad_norm": 1.3802402147134425, "learning_rate": 8.660517065723378e-07, "loss": 0.2567, "step": 15038 }, { "epoch": 0.2614159815049801, "grad_norm": 1.6748454020079233, "learning_rate": 8.660325308468492e-07, "loss": 0.2496, "step": 15039 }, { "epoch": 0.2614333640424829, "grad_norm": 2.088148692468084, "learning_rate": 8.660133539612058e-07, "loss": 0.3889, "step": 15040 }, { "epoch": 0.26145074657998574, "grad_norm": 1.3088660081824235, "learning_rate": 8.659941759154681e-07, "loss": 0.3136, "step": 15041 }, { "epoch": 0.26146812911748857, "grad_norm": 2.2581229605362094, "learning_rate": 8.65974996709697e-07, "loss": 0.3464, "step": 15042 }, { "epoch": 0.2614855116549914, "grad_norm": 1.4960686315166054, "learning_rate": 8.659558163439532e-07, "loss": 0.3738, "step": 15043 }, { "epoch": 0.26150289419249423, "grad_norm": 2.1547082064236056, "learning_rate": 8.659366348182976e-07, "loss": 0.2746, "step": 15044 }, { "epoch": 0.26152027672999706, "grad_norm": 1.2800138305917448, "learning_rate": 8.65917452132791e-07, "loss": 0.2551, "step": 15045 }, { "epoch": 0.2615376592674999, "grad_norm": 2.139182777990164, "learning_rate": 8.658982682874941e-07, "loss": 0.2286, "step": 15046 }, { "epoch": 0.2615550418050027, "grad_norm": 1.858585516651877, "learning_rate": 8.658790832824679e-07, "loss": 0.2578, "step": 15047 }, { "epoch": 0.2615724243425055, "grad_norm": 2.171066831785411, "learning_rate": 8.65859897117773e-07, "loss": 0.2876, "step": 15048 }, { "epoch": 0.2615898068800083, "grad_norm": 1.741422222391463, "learning_rate": 8.658407097934702e-07, "loss": 0.386, "step": 15049 }, { "epoch": 0.26160718941751115, "grad_norm": 1.8447907308190707, "learning_rate": 8.658215213096205e-07, "loss": 0.2504, "step": 15050 }, { "epoch": 0.261624571955014, "grad_norm": 2.1603935656349185, "learning_rate": 8.658023316662846e-07, "loss": 0.4285, "step": 15051 }, { "epoch": 0.2616419544925168, "grad_norm": 2.24574465563647, "learning_rate": 8.657831408635234e-07, "loss": 0.4126, "step": 15052 }, { "epoch": 0.26165933703001965, "grad_norm": 1.6602730248843287, "learning_rate": 8.657639489013976e-07, "loss": 0.2408, "step": 15053 }, { "epoch": 0.2616767195675225, "grad_norm": 2.077566641132435, "learning_rate": 8.657447557799681e-07, "loss": 0.247, "step": 15054 }, { "epoch": 0.2616941021050253, "grad_norm": 2.485280792135483, "learning_rate": 8.657255614992957e-07, "loss": 0.4359, "step": 15055 }, { "epoch": 0.26171148464252814, "grad_norm": 1.2510585898443256, "learning_rate": 8.657063660594414e-07, "loss": 0.2415, "step": 15056 }, { "epoch": 0.26172886718003097, "grad_norm": 4.0547474232118335, "learning_rate": 8.656871694604658e-07, "loss": 0.2687, "step": 15057 }, { "epoch": 0.26174624971753374, "grad_norm": 1.5725789315504695, "learning_rate": 8.656679717024299e-07, "loss": 0.3591, "step": 15058 }, { "epoch": 0.26176363225503657, "grad_norm": 2.4066160497612894, "learning_rate": 8.656487727853945e-07, "loss": 0.5425, "step": 15059 }, { "epoch": 0.2617810147925394, "grad_norm": 3.1821009324804765, "learning_rate": 8.656295727094205e-07, "loss": 0.2804, "step": 15060 }, { "epoch": 0.26179839733004223, "grad_norm": 0.774306654700279, "learning_rate": 8.656103714745686e-07, "loss": 0.2063, "step": 15061 }, { "epoch": 0.26181577986754506, "grad_norm": 2.0859671425239723, "learning_rate": 8.655911690808999e-07, "loss": 0.4906, "step": 15062 }, { "epoch": 0.2618331624050479, "grad_norm": 2.032620887809179, "learning_rate": 8.65571965528475e-07, "loss": 0.2751, "step": 15063 }, { "epoch": 0.2618505449425507, "grad_norm": 1.5392809570859882, "learning_rate": 8.65552760817355e-07, "loss": 0.2682, "step": 15064 }, { "epoch": 0.26186792748005355, "grad_norm": 1.3108084169823817, "learning_rate": 8.655335549476006e-07, "loss": 0.2731, "step": 15065 }, { "epoch": 0.2618853100175564, "grad_norm": 1.129496103133375, "learning_rate": 8.655143479192728e-07, "loss": 0.4634, "step": 15066 }, { "epoch": 0.2619026925550592, "grad_norm": 1.4885062165590985, "learning_rate": 8.654951397324324e-07, "loss": 0.2116, "step": 15067 }, { "epoch": 0.261920075092562, "grad_norm": 1.8718153997270348, "learning_rate": 8.654759303871403e-07, "loss": 0.2487, "step": 15068 }, { "epoch": 0.2619374576300648, "grad_norm": 1.6996546779108146, "learning_rate": 8.654567198834574e-07, "loss": 0.459, "step": 15069 }, { "epoch": 0.26195484016756765, "grad_norm": 1.587560179926627, "learning_rate": 8.654375082214445e-07, "loss": 0.361, "step": 15070 }, { "epoch": 0.2619722227050705, "grad_norm": 1.2926846700528214, "learning_rate": 8.654182954011626e-07, "loss": 0.3779, "step": 15071 }, { "epoch": 0.2619896052425733, "grad_norm": 1.1638419022868618, "learning_rate": 8.653990814226726e-07, "loss": 0.2495, "step": 15072 }, { "epoch": 0.26200698778007614, "grad_norm": 2.213400471871135, "learning_rate": 8.653798662860354e-07, "loss": 0.3148, "step": 15073 }, { "epoch": 0.26202437031757897, "grad_norm": 1.8085072033989467, "learning_rate": 8.653606499913118e-07, "loss": 0.3893, "step": 15074 }, { "epoch": 0.2620417528550818, "grad_norm": 1.622214065545706, "learning_rate": 8.653414325385629e-07, "loss": 0.3994, "step": 15075 }, { "epoch": 0.26205913539258463, "grad_norm": 2.1719854756840395, "learning_rate": 8.653222139278493e-07, "loss": 0.3313, "step": 15076 }, { "epoch": 0.26207651793008746, "grad_norm": 1.9087112158333226, "learning_rate": 8.653029941592321e-07, "loss": 0.2692, "step": 15077 }, { "epoch": 0.26209390046759024, "grad_norm": 2.0723606046364966, "learning_rate": 8.652837732327724e-07, "loss": 0.3272, "step": 15078 }, { "epoch": 0.26211128300509307, "grad_norm": 1.7010249497879613, "learning_rate": 8.652645511485308e-07, "loss": 0.2258, "step": 15079 }, { "epoch": 0.2621286655425959, "grad_norm": 1.292020031009745, "learning_rate": 8.652453279065683e-07, "loss": 0.2441, "step": 15080 }, { "epoch": 0.2621460480800987, "grad_norm": 2.283632077990685, "learning_rate": 8.65226103506946e-07, "loss": 0.3936, "step": 15081 }, { "epoch": 0.26216343061760156, "grad_norm": 2.007695539061109, "learning_rate": 8.652068779497247e-07, "loss": 0.4338, "step": 15082 }, { "epoch": 0.2621808131551044, "grad_norm": 1.8204866085835427, "learning_rate": 8.651876512349654e-07, "loss": 0.3053, "step": 15083 }, { "epoch": 0.2621981956926072, "grad_norm": 1.7747598204013286, "learning_rate": 8.651684233627288e-07, "loss": 0.441, "step": 15084 }, { "epoch": 0.26221557823011005, "grad_norm": 2.0261273036348992, "learning_rate": 8.651491943330764e-07, "loss": 0.3243, "step": 15085 }, { "epoch": 0.2622329607676129, "grad_norm": 1.803958403162482, "learning_rate": 8.651299641460685e-07, "loss": 0.5027, "step": 15086 }, { "epoch": 0.2622503433051157, "grad_norm": 1.5213497010629184, "learning_rate": 8.651107328017664e-07, "loss": 0.2267, "step": 15087 }, { "epoch": 0.2622677258426185, "grad_norm": 2.370283955229125, "learning_rate": 8.65091500300231e-07, "loss": 0.2891, "step": 15088 }, { "epoch": 0.2622851083801213, "grad_norm": 1.931806176439254, "learning_rate": 8.650722666415233e-07, "loss": 0.3291, "step": 15089 }, { "epoch": 0.26230249091762414, "grad_norm": 2.703004130631359, "learning_rate": 8.650530318257042e-07, "loss": 0.3288, "step": 15090 }, { "epoch": 0.262319873455127, "grad_norm": 1.562852375937382, "learning_rate": 8.650337958528347e-07, "loss": 0.3494, "step": 15091 }, { "epoch": 0.2623372559926298, "grad_norm": 1.4508110072581266, "learning_rate": 8.650145587229758e-07, "loss": 0.4098, "step": 15092 }, { "epoch": 0.26235463853013263, "grad_norm": 1.7971373868055858, "learning_rate": 8.649953204361882e-07, "loss": 0.2784, "step": 15093 }, { "epoch": 0.26237202106763546, "grad_norm": 1.9777200583607308, "learning_rate": 8.649760809925332e-07, "loss": 0.4739, "step": 15094 }, { "epoch": 0.2623894036051383, "grad_norm": 3.116751912991328, "learning_rate": 8.649568403920719e-07, "loss": 0.3396, "step": 15095 }, { "epoch": 0.2624067861426411, "grad_norm": 1.529731961922808, "learning_rate": 8.649375986348648e-07, "loss": 0.2109, "step": 15096 }, { "epoch": 0.26242416868014395, "grad_norm": 1.5327740736726478, "learning_rate": 8.649183557209732e-07, "loss": 0.3412, "step": 15097 }, { "epoch": 0.26244155121764673, "grad_norm": 2.5542788464389004, "learning_rate": 8.648991116504581e-07, "loss": 0.4058, "step": 15098 }, { "epoch": 0.26245893375514956, "grad_norm": 1.505625351284673, "learning_rate": 8.648798664233805e-07, "loss": 0.2023, "step": 15099 }, { "epoch": 0.2624763162926524, "grad_norm": 1.480640707606086, "learning_rate": 8.648606200398012e-07, "loss": 0.3048, "step": 15100 }, { "epoch": 0.2624936988301552, "grad_norm": 1.9070974663236302, "learning_rate": 8.648413724997814e-07, "loss": 0.3829, "step": 15101 }, { "epoch": 0.26251108136765805, "grad_norm": 1.8476480053304425, "learning_rate": 8.64822123803382e-07, "loss": 0.2784, "step": 15102 }, { "epoch": 0.2625284639051609, "grad_norm": 2.1732074878768866, "learning_rate": 8.64802873950664e-07, "loss": 0.3044, "step": 15103 }, { "epoch": 0.2625458464426637, "grad_norm": 1.6805014665328735, "learning_rate": 8.647836229416887e-07, "loss": 0.1879, "step": 15104 }, { "epoch": 0.26256322898016654, "grad_norm": 2.1642011592507244, "learning_rate": 8.647643707765168e-07, "loss": 0.333, "step": 15105 }, { "epoch": 0.26258061151766937, "grad_norm": 0.9418123228767157, "learning_rate": 8.647451174552092e-07, "loss": 0.2661, "step": 15106 }, { "epoch": 0.2625979940551722, "grad_norm": 3.18444277984999, "learning_rate": 8.647258629778274e-07, "loss": 0.3762, "step": 15107 }, { "epoch": 0.262615376592675, "grad_norm": 1.5750687767320137, "learning_rate": 8.64706607344432e-07, "loss": 0.2216, "step": 15108 }, { "epoch": 0.2626327591301778, "grad_norm": 1.6583563876105387, "learning_rate": 8.646873505550842e-07, "loss": 0.6174, "step": 15109 }, { "epoch": 0.26265014166768064, "grad_norm": 1.1909938343550701, "learning_rate": 8.646680926098451e-07, "loss": 0.2399, "step": 15110 }, { "epoch": 0.26266752420518347, "grad_norm": 1.103388018175552, "learning_rate": 8.646488335087755e-07, "loss": 0.2517, "step": 15111 }, { "epoch": 0.2626849067426863, "grad_norm": 1.3262995474054269, "learning_rate": 8.646295732519367e-07, "loss": 0.1923, "step": 15112 }, { "epoch": 0.2627022892801891, "grad_norm": 1.6098352239558835, "learning_rate": 8.646103118393897e-07, "loss": 0.2583, "step": 15113 }, { "epoch": 0.26271967181769196, "grad_norm": 1.6554143401051413, "learning_rate": 8.645910492711954e-07, "loss": 0.3497, "step": 15114 }, { "epoch": 0.2627370543551948, "grad_norm": 9.22222306295424, "learning_rate": 8.64571785547415e-07, "loss": 0.3349, "step": 15115 }, { "epoch": 0.2627544368926976, "grad_norm": 1.4474214028503476, "learning_rate": 8.645525206681096e-07, "loss": 0.3302, "step": 15116 }, { "epoch": 0.26277181943020045, "grad_norm": 1.733434498621582, "learning_rate": 8.645332546333401e-07, "loss": 0.3041, "step": 15117 }, { "epoch": 0.2627892019677032, "grad_norm": 1.2811493263443177, "learning_rate": 8.645139874431675e-07, "loss": 0.3289, "step": 15118 }, { "epoch": 0.26280658450520605, "grad_norm": 1.646091666440215, "learning_rate": 8.644947190976531e-07, "loss": 0.2374, "step": 15119 }, { "epoch": 0.2628239670427089, "grad_norm": 1.102957737873179, "learning_rate": 8.64475449596858e-07, "loss": 0.2925, "step": 15120 }, { "epoch": 0.2628413495802117, "grad_norm": 1.4408602259060617, "learning_rate": 8.644561789408431e-07, "loss": 0.4586, "step": 15121 }, { "epoch": 0.26285873211771454, "grad_norm": 1.567053293255855, "learning_rate": 8.644369071296693e-07, "loss": 0.2588, "step": 15122 }, { "epoch": 0.2628761146552174, "grad_norm": 2.1752406041237156, "learning_rate": 8.644176341633982e-07, "loss": 0.3155, "step": 15123 }, { "epoch": 0.2628934971927202, "grad_norm": 1.960744270305587, "learning_rate": 8.643983600420906e-07, "loss": 0.2513, "step": 15124 }, { "epoch": 0.26291087973022303, "grad_norm": 1.4295234312267957, "learning_rate": 8.643790847658073e-07, "loss": 0.2467, "step": 15125 }, { "epoch": 0.26292826226772587, "grad_norm": 1.4087276095082932, "learning_rate": 8.6435980833461e-07, "loss": 0.215, "step": 15126 }, { "epoch": 0.26294564480522864, "grad_norm": 3.0431228265995314, "learning_rate": 8.643405307485593e-07, "loss": 0.4586, "step": 15127 }, { "epoch": 0.26296302734273147, "grad_norm": 2.2871855757906885, "learning_rate": 8.643212520077165e-07, "loss": 0.4628, "step": 15128 }, { "epoch": 0.2629804098802343, "grad_norm": 2.9655638540898943, "learning_rate": 8.643019721121427e-07, "loss": 0.2577, "step": 15129 }, { "epoch": 0.26299779241773713, "grad_norm": 2.1423483435623845, "learning_rate": 8.64282691061899e-07, "loss": 0.7614, "step": 15130 }, { "epoch": 0.26301517495523996, "grad_norm": 2.0854322853741922, "learning_rate": 8.642634088570465e-07, "loss": 0.2829, "step": 15131 }, { "epoch": 0.2630325574927428, "grad_norm": 1.7272483261693494, "learning_rate": 8.642441254976464e-07, "loss": 0.2442, "step": 15132 }, { "epoch": 0.2630499400302456, "grad_norm": 1.9089759305400857, "learning_rate": 8.642248409837595e-07, "loss": 0.3798, "step": 15133 }, { "epoch": 0.26306732256774845, "grad_norm": 1.6985086518154928, "learning_rate": 8.642055553154475e-07, "loss": 0.4523, "step": 15134 }, { "epoch": 0.2630847051052513, "grad_norm": 2.2087933033454603, "learning_rate": 8.64186268492771e-07, "loss": 0.1863, "step": 15135 }, { "epoch": 0.2631020876427541, "grad_norm": 1.7912022043849094, "learning_rate": 8.641669805157914e-07, "loss": 0.3467, "step": 15136 }, { "epoch": 0.2631194701802569, "grad_norm": 1.7401898352191507, "learning_rate": 8.641476913845698e-07, "loss": 0.1946, "step": 15137 }, { "epoch": 0.2631368527177597, "grad_norm": 1.1782892576489257, "learning_rate": 8.641284010991671e-07, "loss": 0.1744, "step": 15138 }, { "epoch": 0.26315423525526255, "grad_norm": 1.8714021189512722, "learning_rate": 8.641091096596448e-07, "loss": 0.2353, "step": 15139 }, { "epoch": 0.2631716177927654, "grad_norm": 1.7986830895752657, "learning_rate": 8.640898170660637e-07, "loss": 0.253, "step": 15140 }, { "epoch": 0.2631890003302682, "grad_norm": 2.9939097311494773, "learning_rate": 8.640705233184853e-07, "loss": 0.4313, "step": 15141 }, { "epoch": 0.26320638286777104, "grad_norm": 1.6085978035796484, "learning_rate": 8.640512284169706e-07, "loss": 0.4405, "step": 15142 }, { "epoch": 0.26322376540527387, "grad_norm": 1.1459811449485175, "learning_rate": 8.640319323615808e-07, "loss": 0.359, "step": 15143 }, { "epoch": 0.2632411479427767, "grad_norm": 1.6750202940159946, "learning_rate": 8.640126351523768e-07, "loss": 0.2203, "step": 15144 }, { "epoch": 0.26325853048027953, "grad_norm": 2.7392821848897193, "learning_rate": 8.6399333678942e-07, "loss": 0.6849, "step": 15145 }, { "epoch": 0.26327591301778236, "grad_norm": 1.5068527737677686, "learning_rate": 8.639740372727716e-07, "loss": 0.3889, "step": 15146 }, { "epoch": 0.26329329555528513, "grad_norm": 2.1172897871881657, "learning_rate": 8.639547366024927e-07, "loss": 0.3264, "step": 15147 }, { "epoch": 0.26331067809278796, "grad_norm": 1.9418598773954066, "learning_rate": 8.639354347786444e-07, "loss": 0.363, "step": 15148 }, { "epoch": 0.2633280606302908, "grad_norm": 1.3050691868887507, "learning_rate": 8.639161318012879e-07, "loss": 0.1861, "step": 15149 }, { "epoch": 0.2633454431677936, "grad_norm": 3.37237858167231, "learning_rate": 8.638968276704845e-07, "loss": 0.3021, "step": 15150 }, { "epoch": 0.26336282570529645, "grad_norm": 2.2529515194821377, "learning_rate": 8.638775223862954e-07, "loss": 0.3377, "step": 15151 }, { "epoch": 0.2633802082427993, "grad_norm": 2.1490750559303406, "learning_rate": 8.638582159487817e-07, "loss": 0.3116, "step": 15152 }, { "epoch": 0.2633975907803021, "grad_norm": 1.7767179173288712, "learning_rate": 8.638389083580044e-07, "loss": 0.4547, "step": 15153 }, { "epoch": 0.26341497331780495, "grad_norm": 1.2924994816099873, "learning_rate": 8.63819599614025e-07, "loss": 0.3869, "step": 15154 }, { "epoch": 0.2634323558553078, "grad_norm": 1.521690910219925, "learning_rate": 8.638002897169047e-07, "loss": 0.2888, "step": 15155 }, { "epoch": 0.2634497383928106, "grad_norm": 1.882853031463564, "learning_rate": 8.637809786667045e-07, "loss": 0.4383, "step": 15156 }, { "epoch": 0.2634671209303134, "grad_norm": 1.6413453587564497, "learning_rate": 8.637616664634857e-07, "loss": 0.365, "step": 15157 }, { "epoch": 0.2634845034678162, "grad_norm": 1.9834875884049425, "learning_rate": 8.637423531073095e-07, "loss": 0.4233, "step": 15158 }, { "epoch": 0.26350188600531904, "grad_norm": 1.8637312863603, "learning_rate": 8.637230385982373e-07, "loss": 0.2791, "step": 15159 }, { "epoch": 0.26351926854282187, "grad_norm": 1.6508585390630632, "learning_rate": 8.6370372293633e-07, "loss": 0.2041, "step": 15160 }, { "epoch": 0.2635366510803247, "grad_norm": 1.4782097182935998, "learning_rate": 8.636844061216489e-07, "loss": 0.2618, "step": 15161 }, { "epoch": 0.26355403361782753, "grad_norm": 1.9362199835718825, "learning_rate": 8.636650881542556e-07, "loss": 0.2356, "step": 15162 }, { "epoch": 0.26357141615533036, "grad_norm": 2.104458188064698, "learning_rate": 8.636457690342107e-07, "loss": 0.3856, "step": 15163 }, { "epoch": 0.2635887986928332, "grad_norm": 1.515787700699021, "learning_rate": 8.636264487615759e-07, "loss": 0.2475, "step": 15164 }, { "epoch": 0.263606181230336, "grad_norm": 1.5608987514608974, "learning_rate": 8.636071273364123e-07, "loss": 0.1956, "step": 15165 }, { "epoch": 0.26362356376783885, "grad_norm": 1.9639248940030256, "learning_rate": 8.63587804758781e-07, "loss": 0.3963, "step": 15166 }, { "epoch": 0.26364094630534163, "grad_norm": 1.9172608656451766, "learning_rate": 8.635684810287434e-07, "loss": 0.1952, "step": 15167 }, { "epoch": 0.26365832884284446, "grad_norm": 2.1818076182971318, "learning_rate": 8.635491561463608e-07, "loss": 0.4203, "step": 15168 }, { "epoch": 0.2636757113803473, "grad_norm": 3.997338881467798, "learning_rate": 8.635298301116945e-07, "loss": 0.3082, "step": 15169 }, { "epoch": 0.2636930939178501, "grad_norm": 2.218182148989185, "learning_rate": 8.635105029248055e-07, "loss": 0.4875, "step": 15170 }, { "epoch": 0.26371047645535295, "grad_norm": 2.0689430027551965, "learning_rate": 8.634911745857552e-07, "loss": 0.6006, "step": 15171 }, { "epoch": 0.2637278589928558, "grad_norm": 1.6209035186756915, "learning_rate": 8.634718450946048e-07, "loss": 0.4972, "step": 15172 }, { "epoch": 0.2637452415303586, "grad_norm": 2.922172889190832, "learning_rate": 8.634525144514156e-07, "loss": 0.2926, "step": 15173 }, { "epoch": 0.26376262406786144, "grad_norm": 1.897564403249886, "learning_rate": 8.63433182656249e-07, "loss": 0.3779, "step": 15174 }, { "epoch": 0.26378000660536427, "grad_norm": 1.0303104745718183, "learning_rate": 8.634138497091661e-07, "loss": 0.2359, "step": 15175 }, { "epoch": 0.2637973891428671, "grad_norm": 1.4311090850379415, "learning_rate": 8.633945156102282e-07, "loss": 0.4358, "step": 15176 }, { "epoch": 0.2638147716803699, "grad_norm": 3.5397305273040773, "learning_rate": 8.633751803594966e-07, "loss": 0.279, "step": 15177 }, { "epoch": 0.2638321542178727, "grad_norm": 1.6638912205900218, "learning_rate": 8.633558439570328e-07, "loss": 0.3413, "step": 15178 }, { "epoch": 0.26384953675537554, "grad_norm": 1.2649581467618893, "learning_rate": 8.633365064028977e-07, "loss": 0.3594, "step": 15179 }, { "epoch": 0.26386691929287837, "grad_norm": 2.782301472632671, "learning_rate": 8.633171676971528e-07, "loss": 0.423, "step": 15180 }, { "epoch": 0.2638843018303812, "grad_norm": 1.9232739342809508, "learning_rate": 8.632978278398593e-07, "loss": 0.3625, "step": 15181 }, { "epoch": 0.263901684367884, "grad_norm": 2.189599394118334, "learning_rate": 8.632784868310787e-07, "loss": 0.3045, "step": 15182 }, { "epoch": 0.26391906690538686, "grad_norm": 2.7922682820147946, "learning_rate": 8.632591446708721e-07, "loss": 0.3417, "step": 15183 }, { "epoch": 0.2639364494428897, "grad_norm": 1.007454608567824, "learning_rate": 8.63239801359301e-07, "loss": 0.3258, "step": 15184 }, { "epoch": 0.2639538319803925, "grad_norm": 1.8690069748981002, "learning_rate": 8.632204568964265e-07, "loss": 0.3701, "step": 15185 }, { "epoch": 0.26397121451789535, "grad_norm": 1.1966784345660515, "learning_rate": 8.6320111128231e-07, "loss": 0.4594, "step": 15186 }, { "epoch": 0.2639885970553981, "grad_norm": 3.517095757222034, "learning_rate": 8.631817645170128e-07, "loss": 0.2912, "step": 15187 }, { "epoch": 0.26400597959290095, "grad_norm": 3.2413965839634993, "learning_rate": 8.631624166005962e-07, "loss": 0.339, "step": 15188 }, { "epoch": 0.2640233621304038, "grad_norm": 1.520592086442288, "learning_rate": 8.631430675331217e-07, "loss": 0.2042, "step": 15189 }, { "epoch": 0.2640407446679066, "grad_norm": 1.5761650005953238, "learning_rate": 8.631237173146504e-07, "loss": 0.3508, "step": 15190 }, { "epoch": 0.26405812720540944, "grad_norm": 2.2043171720646813, "learning_rate": 8.631043659452438e-07, "loss": 0.3937, "step": 15191 }, { "epoch": 0.2640755097429123, "grad_norm": 3.4546319576461046, "learning_rate": 8.63085013424963e-07, "loss": 0.4119, "step": 15192 }, { "epoch": 0.2640928922804151, "grad_norm": 1.851941493951149, "learning_rate": 8.630656597538696e-07, "loss": 0.2938, "step": 15193 }, { "epoch": 0.26411027481791793, "grad_norm": 1.8319730545884556, "learning_rate": 8.63046304932025e-07, "loss": 0.2737, "step": 15194 }, { "epoch": 0.26412765735542076, "grad_norm": 2.8880218780699827, "learning_rate": 8.6302694895949e-07, "loss": 0.5492, "step": 15195 }, { "epoch": 0.2641450398929236, "grad_norm": 1.8606776917480874, "learning_rate": 8.630075918363265e-07, "loss": 0.2641, "step": 15196 }, { "epoch": 0.26416242243042637, "grad_norm": 1.5910816089606348, "learning_rate": 8.629882335625957e-07, "loss": 0.315, "step": 15197 }, { "epoch": 0.2641798049679292, "grad_norm": 3.916003859964725, "learning_rate": 8.62968874138359e-07, "loss": 0.3444, "step": 15198 }, { "epoch": 0.26419718750543203, "grad_norm": 1.4979045074091297, "learning_rate": 8.629495135636777e-07, "loss": 0.412, "step": 15199 }, { "epoch": 0.26421457004293486, "grad_norm": 2.142952924804607, "learning_rate": 8.629301518386132e-07, "loss": 0.2455, "step": 15200 }, { "epoch": 0.2642319525804377, "grad_norm": 1.1430560637002387, "learning_rate": 8.629107889632268e-07, "loss": 0.3251, "step": 15201 }, { "epoch": 0.2642493351179405, "grad_norm": 1.0611893666221561, "learning_rate": 8.628914249375799e-07, "loss": 0.3429, "step": 15202 }, { "epoch": 0.26426671765544335, "grad_norm": 2.192301050878409, "learning_rate": 8.628720597617339e-07, "loss": 0.2714, "step": 15203 }, { "epoch": 0.2642841001929462, "grad_norm": 2.5126289044282957, "learning_rate": 8.628526934357501e-07, "loss": 0.3484, "step": 15204 }, { "epoch": 0.264301482730449, "grad_norm": 1.5788450547147674, "learning_rate": 8.628333259596898e-07, "loss": 0.4119, "step": 15205 }, { "epoch": 0.26431886526795184, "grad_norm": 1.4103623709943585, "learning_rate": 8.628139573336149e-07, "loss": 0.2926, "step": 15206 }, { "epoch": 0.2643362478054546, "grad_norm": 1.7058150226402249, "learning_rate": 8.627945875575862e-07, "loss": 0.3463, "step": 15207 }, { "epoch": 0.26435363034295745, "grad_norm": 1.2043403006283078, "learning_rate": 8.627752166316653e-07, "loss": 0.2828, "step": 15208 }, { "epoch": 0.2643710128804603, "grad_norm": 1.616673722135386, "learning_rate": 8.627558445559136e-07, "loss": 0.3588, "step": 15209 }, { "epoch": 0.2643883954179631, "grad_norm": 2.2824013933951646, "learning_rate": 8.627364713303925e-07, "loss": 0.4493, "step": 15210 }, { "epoch": 0.26440577795546594, "grad_norm": 2.97268803277173, "learning_rate": 8.627170969551635e-07, "loss": 0.3131, "step": 15211 }, { "epoch": 0.26442316049296877, "grad_norm": 0.84577979224521, "learning_rate": 8.626977214302878e-07, "loss": 0.2728, "step": 15212 }, { "epoch": 0.2644405430304716, "grad_norm": 2.1946899366333783, "learning_rate": 8.62678344755827e-07, "loss": 0.5407, "step": 15213 }, { "epoch": 0.26445792556797443, "grad_norm": 2.8939338892886926, "learning_rate": 8.626589669318425e-07, "loss": 0.4676, "step": 15214 }, { "epoch": 0.26447530810547726, "grad_norm": 1.5586543471031316, "learning_rate": 8.626395879583957e-07, "loss": 0.1987, "step": 15215 }, { "epoch": 0.2644926906429801, "grad_norm": 1.8067433024684119, "learning_rate": 8.626202078355478e-07, "loss": 0.3583, "step": 15216 }, { "epoch": 0.26451007318048286, "grad_norm": 1.6404449153848482, "learning_rate": 8.626008265633605e-07, "loss": 0.4494, "step": 15217 }, { "epoch": 0.2645274557179857, "grad_norm": 2.1880752222725453, "learning_rate": 8.625814441418953e-07, "loss": 0.3368, "step": 15218 }, { "epoch": 0.2645448382554885, "grad_norm": 1.8766270948247314, "learning_rate": 8.625620605712133e-07, "loss": 0.2901, "step": 15219 }, { "epoch": 0.26456222079299135, "grad_norm": 4.483318678619149, "learning_rate": 8.625426758513761e-07, "loss": 0.6861, "step": 15220 }, { "epoch": 0.2645796033304942, "grad_norm": 1.2252631766798534, "learning_rate": 8.625232899824452e-07, "loss": 0.3704, "step": 15221 }, { "epoch": 0.264596985867997, "grad_norm": 1.8673499045056112, "learning_rate": 8.625039029644821e-07, "loss": 0.227, "step": 15222 }, { "epoch": 0.26461436840549984, "grad_norm": 3.7949156553162657, "learning_rate": 8.624845147975481e-07, "loss": 0.3285, "step": 15223 }, { "epoch": 0.2646317509430027, "grad_norm": 1.781116326548626, "learning_rate": 8.624651254817046e-07, "loss": 0.3135, "step": 15224 }, { "epoch": 0.2646491334805055, "grad_norm": 1.7938699031878, "learning_rate": 8.624457350170133e-07, "loss": 0.2987, "step": 15225 }, { "epoch": 0.26466651601800834, "grad_norm": 1.487533603813134, "learning_rate": 8.624263434035353e-07, "loss": 0.271, "step": 15226 }, { "epoch": 0.2646838985555111, "grad_norm": 1.5196786621316696, "learning_rate": 8.624069506413325e-07, "loss": 0.3336, "step": 15227 }, { "epoch": 0.26470128109301394, "grad_norm": 1.4870884736663679, "learning_rate": 8.623875567304661e-07, "loss": 0.3596, "step": 15228 }, { "epoch": 0.26471866363051677, "grad_norm": 2.8248189392875247, "learning_rate": 8.623681616709975e-07, "loss": 0.2784, "step": 15229 }, { "epoch": 0.2647360461680196, "grad_norm": 2.0524687253789176, "learning_rate": 8.623487654629884e-07, "loss": 0.396, "step": 15230 }, { "epoch": 0.26475342870552243, "grad_norm": 1.9100586557691819, "learning_rate": 8.623293681065e-07, "loss": 0.318, "step": 15231 }, { "epoch": 0.26477081124302526, "grad_norm": 2.0711304127093304, "learning_rate": 8.623099696015942e-07, "loss": 0.362, "step": 15232 }, { "epoch": 0.2647881937805281, "grad_norm": 2.1411682450394096, "learning_rate": 8.62290569948332e-07, "loss": 0.3911, "step": 15233 }, { "epoch": 0.2648055763180309, "grad_norm": 1.201321606204572, "learning_rate": 8.622711691467753e-07, "loss": 0.4231, "step": 15234 }, { "epoch": 0.26482295885553375, "grad_norm": 1.608609058584641, "learning_rate": 8.622517671969854e-07, "loss": 0.3519, "step": 15235 }, { "epoch": 0.2648403413930366, "grad_norm": 2.1923525459895625, "learning_rate": 8.622323640990236e-07, "loss": 0.2925, "step": 15236 }, { "epoch": 0.26485772393053936, "grad_norm": 2.5448430022022737, "learning_rate": 8.622129598529518e-07, "loss": 0.455, "step": 15237 }, { "epoch": 0.2648751064680422, "grad_norm": 1.2696373517003658, "learning_rate": 8.621935544588313e-07, "loss": 0.2534, "step": 15238 }, { "epoch": 0.264892489005545, "grad_norm": 2.0914572996283844, "learning_rate": 8.621741479167233e-07, "loss": 0.3002, "step": 15239 }, { "epoch": 0.26490987154304785, "grad_norm": 2.399779409682008, "learning_rate": 8.6215474022669e-07, "loss": 0.34, "step": 15240 }, { "epoch": 0.2649272540805507, "grad_norm": 2.5833604064774507, "learning_rate": 8.621353313887924e-07, "loss": 0.3611, "step": 15241 }, { "epoch": 0.2649446366180535, "grad_norm": 2.170861950214398, "learning_rate": 8.621159214030921e-07, "loss": 0.315, "step": 15242 }, { "epoch": 0.26496201915555634, "grad_norm": 1.8793498821474386, "learning_rate": 8.620965102696507e-07, "loss": 0.3653, "step": 15243 }, { "epoch": 0.26497940169305917, "grad_norm": 1.8033990362032435, "learning_rate": 8.620770979885297e-07, "loss": 0.3858, "step": 15244 }, { "epoch": 0.264996784230562, "grad_norm": 1.1792646204664674, "learning_rate": 8.620576845597905e-07, "loss": 0.2335, "step": 15245 }, { "epoch": 0.26501416676806483, "grad_norm": 1.999218578309733, "learning_rate": 8.620382699834949e-07, "loss": 0.309, "step": 15246 }, { "epoch": 0.2650315493055676, "grad_norm": 1.8413833502942367, "learning_rate": 8.620188542597045e-07, "loss": 0.3441, "step": 15247 }, { "epoch": 0.26504893184307043, "grad_norm": 2.132254772286813, "learning_rate": 8.619994373884803e-07, "loss": 0.3531, "step": 15248 }, { "epoch": 0.26506631438057326, "grad_norm": 1.2958594993750778, "learning_rate": 8.619800193698843e-07, "loss": 0.2317, "step": 15249 }, { "epoch": 0.2650836969180761, "grad_norm": 1.512090818530592, "learning_rate": 8.619606002039779e-07, "loss": 0.2878, "step": 15250 }, { "epoch": 0.2651010794555789, "grad_norm": 1.4410128323961517, "learning_rate": 8.619411798908226e-07, "loss": 0.2588, "step": 15251 }, { "epoch": 0.26511846199308176, "grad_norm": 1.128075231444815, "learning_rate": 8.619217584304801e-07, "loss": 0.274, "step": 15252 }, { "epoch": 0.2651358445305846, "grad_norm": 2.150483695136978, "learning_rate": 8.619023358230119e-07, "loss": 0.3374, "step": 15253 }, { "epoch": 0.2651532270680874, "grad_norm": 3.012077672188593, "learning_rate": 8.618829120684795e-07, "loss": 0.3292, "step": 15254 }, { "epoch": 0.26517060960559025, "grad_norm": 1.4490398828046083, "learning_rate": 8.618634871669445e-07, "loss": 0.3861, "step": 15255 }, { "epoch": 0.2651879921430931, "grad_norm": 1.8766409163749072, "learning_rate": 8.618440611184686e-07, "loss": 0.3628, "step": 15256 }, { "epoch": 0.26520537468059585, "grad_norm": 1.7732746588441768, "learning_rate": 8.61824633923113e-07, "loss": 0.2555, "step": 15257 }, { "epoch": 0.2652227572180987, "grad_norm": 2.061770794513398, "learning_rate": 8.618052055809398e-07, "loss": 0.2106, "step": 15258 }, { "epoch": 0.2652401397556015, "grad_norm": 1.6176964199942696, "learning_rate": 8.617857760920099e-07, "loss": 0.3033, "step": 15259 }, { "epoch": 0.26525752229310434, "grad_norm": 2.432315887431242, "learning_rate": 8.617663454563857e-07, "loss": 0.2798, "step": 15260 }, { "epoch": 0.26527490483060717, "grad_norm": 1.638476439780378, "learning_rate": 8.617469136741281e-07, "loss": 0.3162, "step": 15261 }, { "epoch": 0.26529228736811, "grad_norm": 1.5067272815560349, "learning_rate": 8.61727480745299e-07, "loss": 0.2814, "step": 15262 }, { "epoch": 0.26530966990561283, "grad_norm": 1.6342181031439666, "learning_rate": 8.617080466699601e-07, "loss": 0.2477, "step": 15263 }, { "epoch": 0.26532705244311566, "grad_norm": 2.9287383550925123, "learning_rate": 8.616886114481728e-07, "loss": 0.3596, "step": 15264 }, { "epoch": 0.2653444349806185, "grad_norm": 2.048402706236762, "learning_rate": 8.616691750799986e-07, "loss": 0.4149, "step": 15265 }, { "epoch": 0.26536181751812127, "grad_norm": 1.8574791564616637, "learning_rate": 8.616497375654995e-07, "loss": 0.2286, "step": 15266 }, { "epoch": 0.2653792000556241, "grad_norm": 1.8149493096495088, "learning_rate": 8.616302989047366e-07, "loss": 0.267, "step": 15267 }, { "epoch": 0.26539658259312693, "grad_norm": 1.3751947623429708, "learning_rate": 8.616108590977719e-07, "loss": 0.2158, "step": 15268 }, { "epoch": 0.26541396513062976, "grad_norm": 2.305429181071141, "learning_rate": 8.615914181446668e-07, "loss": 0.2794, "step": 15269 }, { "epoch": 0.2654313476681326, "grad_norm": 2.6431464789532257, "learning_rate": 8.615719760454832e-07, "loss": 0.2455, "step": 15270 }, { "epoch": 0.2654487302056354, "grad_norm": 1.8256748449811724, "learning_rate": 8.615525328002822e-07, "loss": 0.3111, "step": 15271 }, { "epoch": 0.26546611274313825, "grad_norm": 1.8436570621195814, "learning_rate": 8.615330884091261e-07, "loss": 0.2013, "step": 15272 }, { "epoch": 0.2654834952806411, "grad_norm": 4.258197981681371, "learning_rate": 8.61513642872076e-07, "loss": 0.6354, "step": 15273 }, { "epoch": 0.2655008778181439, "grad_norm": 1.7407452809071635, "learning_rate": 8.614941961891938e-07, "loss": 0.2347, "step": 15274 }, { "epoch": 0.26551826035564674, "grad_norm": 1.8470995022932055, "learning_rate": 8.614747483605409e-07, "loss": 0.33, "step": 15275 }, { "epoch": 0.2655356428931495, "grad_norm": 1.976072189044419, "learning_rate": 8.614552993861792e-07, "loss": 0.3533, "step": 15276 }, { "epoch": 0.26555302543065235, "grad_norm": 2.4830011592200445, "learning_rate": 8.614358492661703e-07, "loss": 0.3075, "step": 15277 }, { "epoch": 0.2655704079681552, "grad_norm": 1.3221070574141418, "learning_rate": 8.614163980005758e-07, "loss": 0.2878, "step": 15278 }, { "epoch": 0.265587790505658, "grad_norm": 1.2324227312947713, "learning_rate": 8.613969455894572e-07, "loss": 0.4721, "step": 15279 }, { "epoch": 0.26560517304316084, "grad_norm": 2.3172386423144467, "learning_rate": 8.613774920328763e-07, "loss": 0.3901, "step": 15280 }, { "epoch": 0.26562255558066367, "grad_norm": 2.2057662927545416, "learning_rate": 8.613580373308949e-07, "loss": 0.4028, "step": 15281 }, { "epoch": 0.2656399381181665, "grad_norm": 1.8312638850738503, "learning_rate": 8.613385814835744e-07, "loss": 0.3436, "step": 15282 }, { "epoch": 0.2656573206556693, "grad_norm": 1.997050045763711, "learning_rate": 8.613191244909765e-07, "loss": 0.4525, "step": 15283 }, { "epoch": 0.26567470319317216, "grad_norm": 2.7980379926296326, "learning_rate": 8.612996663531632e-07, "loss": 0.4031, "step": 15284 }, { "epoch": 0.265692085730675, "grad_norm": 1.4895892986199122, "learning_rate": 8.612802070701958e-07, "loss": 0.1965, "step": 15285 }, { "epoch": 0.26570946826817776, "grad_norm": 2.0122644937885616, "learning_rate": 8.612607466421361e-07, "loss": 0.2556, "step": 15286 }, { "epoch": 0.2657268508056806, "grad_norm": 1.1527634021625293, "learning_rate": 8.612412850690458e-07, "loss": 0.1817, "step": 15287 }, { "epoch": 0.2657442333431834, "grad_norm": 1.224539418370331, "learning_rate": 8.612218223509866e-07, "loss": 0.4384, "step": 15288 }, { "epoch": 0.26576161588068625, "grad_norm": 4.296905249870421, "learning_rate": 8.6120235848802e-07, "loss": 0.3487, "step": 15289 }, { "epoch": 0.2657789984181891, "grad_norm": 1.635699074498196, "learning_rate": 8.61182893480208e-07, "loss": 0.3694, "step": 15290 }, { "epoch": 0.2657963809556919, "grad_norm": 1.5995276139670669, "learning_rate": 8.61163427327612e-07, "loss": 0.4601, "step": 15291 }, { "epoch": 0.26581376349319474, "grad_norm": 2.0471540900551983, "learning_rate": 8.611439600302941e-07, "loss": 0.174, "step": 15292 }, { "epoch": 0.2658311460306976, "grad_norm": 1.5312320952654976, "learning_rate": 8.611244915883155e-07, "loss": 0.2725, "step": 15293 }, { "epoch": 0.2658485285682004, "grad_norm": 1.9449737369247655, "learning_rate": 8.611050220017382e-07, "loss": 0.4124, "step": 15294 }, { "epoch": 0.26586591110570323, "grad_norm": 3.269944710533651, "learning_rate": 8.610855512706238e-07, "loss": 0.4568, "step": 15295 }, { "epoch": 0.265883293643206, "grad_norm": 1.534621412937951, "learning_rate": 8.610660793950342e-07, "loss": 0.4926, "step": 15296 }, { "epoch": 0.26590067618070884, "grad_norm": 1.7368828221719956, "learning_rate": 8.61046606375031e-07, "loss": 0.4302, "step": 15297 }, { "epoch": 0.26591805871821167, "grad_norm": 3.1549634403699756, "learning_rate": 8.610271322106759e-07, "loss": 0.402, "step": 15298 }, { "epoch": 0.2659354412557145, "grad_norm": 1.432430442698828, "learning_rate": 8.610076569020306e-07, "loss": 0.5414, "step": 15299 }, { "epoch": 0.26595282379321733, "grad_norm": 2.588744308036913, "learning_rate": 8.609881804491568e-07, "loss": 0.3637, "step": 15300 }, { "epoch": 0.26597020633072016, "grad_norm": 1.790782639810534, "learning_rate": 8.609687028521163e-07, "loss": 0.4011, "step": 15301 }, { "epoch": 0.265987588868223, "grad_norm": 6.142930133935351, "learning_rate": 8.609492241109708e-07, "loss": 0.6111, "step": 15302 }, { "epoch": 0.2660049714057258, "grad_norm": 1.8105447783711992, "learning_rate": 8.609297442257821e-07, "loss": 0.4568, "step": 15303 }, { "epoch": 0.26602235394322865, "grad_norm": 1.8377694325472174, "learning_rate": 8.609102631966121e-07, "loss": 0.2325, "step": 15304 }, { "epoch": 0.2660397364807315, "grad_norm": 1.625086324974361, "learning_rate": 8.608907810235222e-07, "loss": 0.3706, "step": 15305 }, { "epoch": 0.26605711901823426, "grad_norm": 1.5598882901015976, "learning_rate": 8.608712977065742e-07, "loss": 0.2432, "step": 15306 }, { "epoch": 0.2660745015557371, "grad_norm": 1.6041841032489859, "learning_rate": 8.608518132458301e-07, "loss": 0.2387, "step": 15307 }, { "epoch": 0.2660918840932399, "grad_norm": 1.497831541636431, "learning_rate": 8.608323276413514e-07, "loss": 0.2362, "step": 15308 }, { "epoch": 0.26610926663074275, "grad_norm": 1.739000618240983, "learning_rate": 8.608128408932e-07, "loss": 0.2535, "step": 15309 }, { "epoch": 0.2661266491682456, "grad_norm": 1.8165344176174059, "learning_rate": 8.607933530014377e-07, "loss": 0.2187, "step": 15310 }, { "epoch": 0.2661440317057484, "grad_norm": 1.9962285977435468, "learning_rate": 8.607738639661262e-07, "loss": 0.2829, "step": 15311 }, { "epoch": 0.26616141424325124, "grad_norm": 2.778377520481958, "learning_rate": 8.607543737873272e-07, "loss": 0.4282, "step": 15312 }, { "epoch": 0.26617879678075407, "grad_norm": 0.9777882385137722, "learning_rate": 8.607348824651027e-07, "loss": 0.2748, "step": 15313 }, { "epoch": 0.2661961793182569, "grad_norm": 2.695772539372489, "learning_rate": 8.607153899995143e-07, "loss": 0.6491, "step": 15314 }, { "epoch": 0.26621356185575973, "grad_norm": 3.062169383624693, "learning_rate": 8.606958963906238e-07, "loss": 0.3597, "step": 15315 }, { "epoch": 0.2662309443932625, "grad_norm": 2.5297687848195127, "learning_rate": 8.606764016384928e-07, "loss": 0.3719, "step": 15316 }, { "epoch": 0.26624832693076533, "grad_norm": 1.6665262434372743, "learning_rate": 8.606569057431837e-07, "loss": 0.2945, "step": 15317 }, { "epoch": 0.26626570946826816, "grad_norm": 1.9610399448793936, "learning_rate": 8.606374087047575e-07, "loss": 0.2823, "step": 15318 }, { "epoch": 0.266283092005771, "grad_norm": 2.67778759143359, "learning_rate": 8.606179105232764e-07, "loss": 0.3637, "step": 15319 }, { "epoch": 0.2663004745432738, "grad_norm": 2.0094943903581433, "learning_rate": 8.605984111988024e-07, "loss": 0.2063, "step": 15320 }, { "epoch": 0.26631785708077665, "grad_norm": 1.3523821709443489, "learning_rate": 8.60578910731397e-07, "loss": 0.2646, "step": 15321 }, { "epoch": 0.2663352396182795, "grad_norm": 1.9380108148033477, "learning_rate": 8.60559409121122e-07, "loss": 0.3089, "step": 15322 }, { "epoch": 0.2663526221557823, "grad_norm": 1.940638705035761, "learning_rate": 8.605399063680394e-07, "loss": 0.3523, "step": 15323 }, { "epoch": 0.26637000469328515, "grad_norm": 1.4368108258972623, "learning_rate": 8.605204024722108e-07, "loss": 0.2928, "step": 15324 }, { "epoch": 0.266387387230788, "grad_norm": 2.0745431377671117, "learning_rate": 8.605008974336983e-07, "loss": 0.4246, "step": 15325 }, { "epoch": 0.26640476976829075, "grad_norm": 1.9181233363582693, "learning_rate": 8.604813912525635e-07, "loss": 0.2662, "step": 15326 }, { "epoch": 0.2664221523057936, "grad_norm": 1.892110997174217, "learning_rate": 8.604618839288682e-07, "loss": 0.3586, "step": 15327 }, { "epoch": 0.2664395348432964, "grad_norm": 1.7919054718536465, "learning_rate": 8.604423754626743e-07, "loss": 0.3277, "step": 15328 }, { "epoch": 0.26645691738079924, "grad_norm": 1.3762227164508267, "learning_rate": 8.604228658540436e-07, "loss": 0.2598, "step": 15329 }, { "epoch": 0.26647429991830207, "grad_norm": 2.5833475082273134, "learning_rate": 8.604033551030382e-07, "loss": 0.3206, "step": 15330 }, { "epoch": 0.2664916824558049, "grad_norm": 1.2506506582919839, "learning_rate": 8.603838432097194e-07, "loss": 0.3722, "step": 15331 }, { "epoch": 0.26650906499330773, "grad_norm": 1.6879761243421019, "learning_rate": 8.603643301741497e-07, "loss": 0.2802, "step": 15332 }, { "epoch": 0.26652644753081056, "grad_norm": 3.022888416896562, "learning_rate": 8.603448159963905e-07, "loss": 0.4093, "step": 15333 }, { "epoch": 0.2665438300683134, "grad_norm": 1.4168990822418046, "learning_rate": 8.603253006765036e-07, "loss": 0.1839, "step": 15334 }, { "epoch": 0.2665612126058162, "grad_norm": 1.9408890956096723, "learning_rate": 8.603057842145513e-07, "loss": 0.31, "step": 15335 }, { "epoch": 0.266578595143319, "grad_norm": 1.4504754973974092, "learning_rate": 8.60286266610595e-07, "loss": 0.2126, "step": 15336 }, { "epoch": 0.2665959776808218, "grad_norm": 2.0948012751588756, "learning_rate": 8.602667478646968e-07, "loss": 0.6074, "step": 15337 }, { "epoch": 0.26661336021832466, "grad_norm": 1.7329401790012038, "learning_rate": 8.602472279769184e-07, "loss": 0.2456, "step": 15338 }, { "epoch": 0.2666307427558275, "grad_norm": 2.2215208475220143, "learning_rate": 8.602277069473218e-07, "loss": 0.3733, "step": 15339 }, { "epoch": 0.2666481252933303, "grad_norm": 1.4813014162006106, "learning_rate": 8.602081847759689e-07, "loss": 0.2969, "step": 15340 }, { "epoch": 0.26666550783083315, "grad_norm": 1.772614060693112, "learning_rate": 8.601886614629215e-07, "loss": 0.3418, "step": 15341 }, { "epoch": 0.266682890368336, "grad_norm": 1.8307670743252207, "learning_rate": 8.601691370082415e-07, "loss": 0.5671, "step": 15342 }, { "epoch": 0.2667002729058388, "grad_norm": 5.0151085823683115, "learning_rate": 8.601496114119908e-07, "loss": 0.4423, "step": 15343 }, { "epoch": 0.26671765544334164, "grad_norm": 2.3001133240877114, "learning_rate": 8.601300846742312e-07, "loss": 0.3463, "step": 15344 }, { "epoch": 0.26673503798084447, "grad_norm": 4.798914635524789, "learning_rate": 8.601105567950249e-07, "loss": 0.209, "step": 15345 }, { "epoch": 0.26675242051834724, "grad_norm": 1.8471337159098884, "learning_rate": 8.600910277744332e-07, "loss": 0.2949, "step": 15346 }, { "epoch": 0.2667698030558501, "grad_norm": 1.7226056493977737, "learning_rate": 8.600714976125185e-07, "loss": 0.2219, "step": 15347 }, { "epoch": 0.2667871855933529, "grad_norm": 2.256850570100223, "learning_rate": 8.600519663093426e-07, "loss": 0.5309, "step": 15348 }, { "epoch": 0.26680456813085573, "grad_norm": 1.9112598014685396, "learning_rate": 8.600324338649673e-07, "loss": 0.3197, "step": 15349 }, { "epoch": 0.26682195066835857, "grad_norm": 1.5657703643141345, "learning_rate": 8.600129002794547e-07, "loss": 0.3608, "step": 15350 }, { "epoch": 0.2668393332058614, "grad_norm": 2.330296957471354, "learning_rate": 8.599933655528665e-07, "loss": 0.384, "step": 15351 }, { "epoch": 0.2668567157433642, "grad_norm": 1.7601062307266468, "learning_rate": 8.599738296852648e-07, "loss": 0.2113, "step": 15352 }, { "epoch": 0.26687409828086706, "grad_norm": 1.590786371501618, "learning_rate": 8.599542926767112e-07, "loss": 0.2857, "step": 15353 }, { "epoch": 0.2668914808183699, "grad_norm": 2.6562092892127906, "learning_rate": 8.599347545272679e-07, "loss": 0.343, "step": 15354 }, { "epoch": 0.2669088633558727, "grad_norm": 2.727916856577434, "learning_rate": 8.599152152369968e-07, "loss": 0.5559, "step": 15355 }, { "epoch": 0.2669262458933755, "grad_norm": 2.3209163205736556, "learning_rate": 8.598956748059599e-07, "loss": 0.3733, "step": 15356 }, { "epoch": 0.2669436284308783, "grad_norm": 3.5110005583578077, "learning_rate": 8.598761332342191e-07, "loss": 0.2856, "step": 15357 }, { "epoch": 0.26696101096838115, "grad_norm": 1.5378716742476415, "learning_rate": 8.59856590521836e-07, "loss": 0.4728, "step": 15358 }, { "epoch": 0.266978393505884, "grad_norm": 1.9010090861207616, "learning_rate": 8.598370466688729e-07, "loss": 0.2681, "step": 15359 }, { "epoch": 0.2669957760433868, "grad_norm": 1.5873755675944943, "learning_rate": 8.598175016753916e-07, "loss": 0.1798, "step": 15360 }, { "epoch": 0.26701315858088964, "grad_norm": 1.178334643750207, "learning_rate": 8.597979555414543e-07, "loss": 0.2402, "step": 15361 }, { "epoch": 0.2670305411183925, "grad_norm": 2.183332002754672, "learning_rate": 8.597784082671227e-07, "loss": 0.3568, "step": 15362 }, { "epoch": 0.2670479236558953, "grad_norm": 1.5514627813084525, "learning_rate": 8.597588598524588e-07, "loss": 0.3291, "step": 15363 }, { "epoch": 0.26706530619339813, "grad_norm": 1.8041455607778645, "learning_rate": 8.597393102975245e-07, "loss": 0.2193, "step": 15364 }, { "epoch": 0.26708268873090096, "grad_norm": 1.6732118815757226, "learning_rate": 8.597197596023819e-07, "loss": 0.3937, "step": 15365 }, { "epoch": 0.26710007126840374, "grad_norm": 2.017699232181219, "learning_rate": 8.59700207767093e-07, "loss": 0.4746, "step": 15366 }, { "epoch": 0.26711745380590657, "grad_norm": 1.7382914282960733, "learning_rate": 8.596806547917195e-07, "loss": 0.4737, "step": 15367 }, { "epoch": 0.2671348363434094, "grad_norm": 2.001651954720737, "learning_rate": 8.596611006763235e-07, "loss": 0.3066, "step": 15368 }, { "epoch": 0.26715221888091223, "grad_norm": 0.9066099721790837, "learning_rate": 8.596415454209673e-07, "loss": 0.2404, "step": 15369 }, { "epoch": 0.26716960141841506, "grad_norm": 1.4905552510594955, "learning_rate": 8.596219890257124e-07, "loss": 0.2315, "step": 15370 }, { "epoch": 0.2671869839559179, "grad_norm": 2.2652322152793296, "learning_rate": 8.59602431490621e-07, "loss": 0.5364, "step": 15371 }, { "epoch": 0.2672043664934207, "grad_norm": 1.4685478444872109, "learning_rate": 8.595828728157551e-07, "loss": 0.221, "step": 15372 }, { "epoch": 0.26722174903092355, "grad_norm": 1.5310097327217496, "learning_rate": 8.595633130011766e-07, "loss": 0.3192, "step": 15373 }, { "epoch": 0.2672391315684264, "grad_norm": 1.3463583327609816, "learning_rate": 8.595437520469477e-07, "loss": 0.1822, "step": 15374 }, { "epoch": 0.2672565141059292, "grad_norm": 1.2313900259261878, "learning_rate": 8.595241899531302e-07, "loss": 0.4231, "step": 15375 }, { "epoch": 0.267273896643432, "grad_norm": 1.7946086629656859, "learning_rate": 8.595046267197863e-07, "loss": 0.5315, "step": 15376 }, { "epoch": 0.2672912791809348, "grad_norm": 1.2873625507961413, "learning_rate": 8.594850623469778e-07, "loss": 0.2746, "step": 15377 }, { "epoch": 0.26730866171843765, "grad_norm": 1.967673599279785, "learning_rate": 8.594654968347667e-07, "loss": 0.1842, "step": 15378 }, { "epoch": 0.2673260442559405, "grad_norm": 4.155709961573289, "learning_rate": 8.594459301832151e-07, "loss": 0.1831, "step": 15379 }, { "epoch": 0.2673434267934433, "grad_norm": 2.127064657455915, "learning_rate": 8.594263623923852e-07, "loss": 0.3004, "step": 15380 }, { "epoch": 0.26736080933094614, "grad_norm": 1.9039167966811719, "learning_rate": 8.594067934623386e-07, "loss": 0.397, "step": 15381 }, { "epoch": 0.26737819186844897, "grad_norm": 2.18047749402417, "learning_rate": 8.593872233931377e-07, "loss": 0.2651, "step": 15382 }, { "epoch": 0.2673955744059518, "grad_norm": 1.7579787125507136, "learning_rate": 8.593676521848445e-07, "loss": 0.2215, "step": 15383 }, { "epoch": 0.2674129569434546, "grad_norm": 2.2476261372168223, "learning_rate": 8.593480798375207e-07, "loss": 0.2384, "step": 15384 }, { "epoch": 0.26743033948095746, "grad_norm": 2.012796618424081, "learning_rate": 8.593285063512287e-07, "loss": 0.1859, "step": 15385 }, { "epoch": 0.26744772201846023, "grad_norm": 1.807319686897137, "learning_rate": 8.593089317260302e-07, "loss": 0.2779, "step": 15386 }, { "epoch": 0.26746510455596306, "grad_norm": 2.0498903073062045, "learning_rate": 8.592893559619876e-07, "loss": 0.2648, "step": 15387 }, { "epoch": 0.2674824870934659, "grad_norm": 1.8812902351217788, "learning_rate": 8.592697790591627e-07, "loss": 0.3361, "step": 15388 }, { "epoch": 0.2674998696309687, "grad_norm": 2.253063595561514, "learning_rate": 8.592502010176178e-07, "loss": 0.3156, "step": 15389 }, { "epoch": 0.26751725216847155, "grad_norm": 2.2744205224115923, "learning_rate": 8.592306218374147e-07, "loss": 0.4506, "step": 15390 }, { "epoch": 0.2675346347059744, "grad_norm": 1.9572712419024703, "learning_rate": 8.592110415186156e-07, "loss": 0.5436, "step": 15391 }, { "epoch": 0.2675520172434772, "grad_norm": 1.647707549526063, "learning_rate": 8.591914600612824e-07, "loss": 0.3817, "step": 15392 }, { "epoch": 0.26756939978098004, "grad_norm": 2.5379681249273993, "learning_rate": 8.591718774654772e-07, "loss": 0.4364, "step": 15393 }, { "epoch": 0.2675867823184829, "grad_norm": 1.7049061812102655, "learning_rate": 8.591522937312623e-07, "loss": 0.4689, "step": 15394 }, { "epoch": 0.2676041648559857, "grad_norm": 1.8920609040199186, "learning_rate": 8.591327088586996e-07, "loss": 0.2103, "step": 15395 }, { "epoch": 0.2676215473934885, "grad_norm": 4.693601105124598, "learning_rate": 8.591131228478512e-07, "loss": 0.248, "step": 15396 }, { "epoch": 0.2676389299309913, "grad_norm": 1.9433008568266157, "learning_rate": 8.590935356987791e-07, "loss": 0.3142, "step": 15397 }, { "epoch": 0.26765631246849414, "grad_norm": 1.7141286384080727, "learning_rate": 8.590739474115454e-07, "loss": 0.318, "step": 15398 }, { "epoch": 0.26767369500599697, "grad_norm": 1.7707939448880698, "learning_rate": 8.590543579862124e-07, "loss": 0.3963, "step": 15399 }, { "epoch": 0.2676910775434998, "grad_norm": 1.4859627069630226, "learning_rate": 8.590347674228418e-07, "loss": 0.2752, "step": 15400 }, { "epoch": 0.26770846008100263, "grad_norm": 2.731680258570529, "learning_rate": 8.59015175721496e-07, "loss": 0.4212, "step": 15401 }, { "epoch": 0.26772584261850546, "grad_norm": 2.2031519579479757, "learning_rate": 8.58995582882237e-07, "loss": 0.3709, "step": 15402 }, { "epoch": 0.2677432251560083, "grad_norm": 1.4249332862781658, "learning_rate": 8.589759889051271e-07, "loss": 0.2544, "step": 15403 }, { "epoch": 0.2677606076935111, "grad_norm": 2.5744991669688155, "learning_rate": 8.58956393790228e-07, "loss": 0.3351, "step": 15404 }, { "epoch": 0.2677779902310139, "grad_norm": 2.036240208329768, "learning_rate": 8.589367975376021e-07, "loss": 0.2123, "step": 15405 }, { "epoch": 0.2677953727685167, "grad_norm": 1.5935890001770139, "learning_rate": 8.589172001473114e-07, "loss": 0.3884, "step": 15406 }, { "epoch": 0.26781275530601956, "grad_norm": 1.8358378735125962, "learning_rate": 8.588976016194181e-07, "loss": 0.2986, "step": 15407 }, { "epoch": 0.2678301378435224, "grad_norm": 1.7437722287367081, "learning_rate": 8.588780019539843e-07, "loss": 0.2616, "step": 15408 }, { "epoch": 0.2678475203810252, "grad_norm": 1.3176181980642376, "learning_rate": 8.58858401151072e-07, "loss": 0.2473, "step": 15409 }, { "epoch": 0.26786490291852805, "grad_norm": 2.5971647090450833, "learning_rate": 8.588387992107435e-07, "loss": 0.3876, "step": 15410 }, { "epoch": 0.2678822854560309, "grad_norm": 1.5481307251704426, "learning_rate": 8.588191961330606e-07, "loss": 0.2382, "step": 15411 }, { "epoch": 0.2678996679935337, "grad_norm": 1.5846179579153092, "learning_rate": 8.587995919180859e-07, "loss": 0.2877, "step": 15412 }, { "epoch": 0.26791705053103654, "grad_norm": 2.349939379548499, "learning_rate": 8.587799865658814e-07, "loss": 0.3003, "step": 15413 }, { "epoch": 0.26793443306853937, "grad_norm": 1.6879061983468933, "learning_rate": 8.587603800765089e-07, "loss": 0.4065, "step": 15414 }, { "epoch": 0.26795181560604214, "grad_norm": 2.984329772751499, "learning_rate": 8.58740772450031e-07, "loss": 0.3606, "step": 15415 }, { "epoch": 0.267969198143545, "grad_norm": 1.9566681850920677, "learning_rate": 8.587211636865094e-07, "loss": 0.2634, "step": 15416 }, { "epoch": 0.2679865806810478, "grad_norm": 1.8569607190797346, "learning_rate": 8.587015537860067e-07, "loss": 0.4471, "step": 15417 }, { "epoch": 0.26800396321855063, "grad_norm": 2.710800724270729, "learning_rate": 8.586819427485848e-07, "loss": 0.1935, "step": 15418 }, { "epoch": 0.26802134575605346, "grad_norm": 2.2190051996785813, "learning_rate": 8.586623305743059e-07, "loss": 0.3663, "step": 15419 }, { "epoch": 0.2680387282935563, "grad_norm": 1.6650865610936145, "learning_rate": 8.586427172632322e-07, "loss": 0.4433, "step": 15420 }, { "epoch": 0.2680561108310591, "grad_norm": 1.7057522285821118, "learning_rate": 8.586231028154258e-07, "loss": 0.1967, "step": 15421 }, { "epoch": 0.26807349336856195, "grad_norm": 2.351807477516839, "learning_rate": 8.586034872309489e-07, "loss": 0.3737, "step": 15422 }, { "epoch": 0.2680908759060648, "grad_norm": 1.4536887085934844, "learning_rate": 8.585838705098638e-07, "loss": 0.1917, "step": 15423 }, { "epoch": 0.2681082584435676, "grad_norm": 1.9507505443822861, "learning_rate": 8.585642526522325e-07, "loss": 0.2129, "step": 15424 }, { "epoch": 0.2681256409810704, "grad_norm": 2.0339735707799917, "learning_rate": 8.58544633658117e-07, "loss": 0.2717, "step": 15425 }, { "epoch": 0.2681430235185732, "grad_norm": 1.17445608540633, "learning_rate": 8.5852501352758e-07, "loss": 0.4302, "step": 15426 }, { "epoch": 0.26816040605607605, "grad_norm": 1.4037494414147784, "learning_rate": 8.585053922606833e-07, "loss": 0.2876, "step": 15427 }, { "epoch": 0.2681777885935789, "grad_norm": 3.8430685669073092, "learning_rate": 8.584857698574891e-07, "loss": 0.3813, "step": 15428 }, { "epoch": 0.2681951711310817, "grad_norm": 2.8669871598207335, "learning_rate": 8.584661463180598e-07, "loss": 0.4119, "step": 15429 }, { "epoch": 0.26821255366858454, "grad_norm": 2.3496541229320984, "learning_rate": 8.584465216424574e-07, "loss": 0.3827, "step": 15430 }, { "epoch": 0.26822993620608737, "grad_norm": 2.7128398110868104, "learning_rate": 8.584268958307442e-07, "loss": 0.2301, "step": 15431 }, { "epoch": 0.2682473187435902, "grad_norm": 2.107900609030329, "learning_rate": 8.584072688829824e-07, "loss": 0.2887, "step": 15432 }, { "epoch": 0.26826470128109303, "grad_norm": 1.8293294108561762, "learning_rate": 8.583876407992343e-07, "loss": 0.3236, "step": 15433 }, { "epoch": 0.26828208381859586, "grad_norm": 1.5792289133008137, "learning_rate": 8.58368011579562e-07, "loss": 0.3425, "step": 15434 }, { "epoch": 0.26829946635609864, "grad_norm": 2.5741857546135667, "learning_rate": 8.583483812240276e-07, "loss": 0.2249, "step": 15435 }, { "epoch": 0.26831684889360147, "grad_norm": 1.0637360513179848, "learning_rate": 8.583287497326935e-07, "loss": 0.4192, "step": 15436 }, { "epoch": 0.2683342314311043, "grad_norm": 1.5146831281466566, "learning_rate": 8.583091171056218e-07, "loss": 0.5215, "step": 15437 }, { "epoch": 0.26835161396860713, "grad_norm": 1.7991989651151261, "learning_rate": 8.582894833428749e-07, "loss": 0.49, "step": 15438 }, { "epoch": 0.26836899650610996, "grad_norm": 1.6430695694253319, "learning_rate": 8.582698484445147e-07, "loss": 0.2163, "step": 15439 }, { "epoch": 0.2683863790436128, "grad_norm": 1.3755030340792125, "learning_rate": 8.582502124106039e-07, "loss": 0.2257, "step": 15440 }, { "epoch": 0.2684037615811156, "grad_norm": 5.716774479266265, "learning_rate": 8.582305752412044e-07, "loss": 0.2186, "step": 15441 }, { "epoch": 0.26842114411861845, "grad_norm": 1.3582346759034958, "learning_rate": 8.582109369363786e-07, "loss": 0.2794, "step": 15442 }, { "epoch": 0.2684385266561213, "grad_norm": 1.3981695605491855, "learning_rate": 8.581912974961887e-07, "loss": 0.4184, "step": 15443 }, { "epoch": 0.2684559091936241, "grad_norm": 1.644699806586135, "learning_rate": 8.581716569206967e-07, "loss": 0.2786, "step": 15444 }, { "epoch": 0.2684732917311269, "grad_norm": 1.4672302435291569, "learning_rate": 8.581520152099653e-07, "loss": 0.2061, "step": 15445 }, { "epoch": 0.2684906742686297, "grad_norm": 1.3620752643438703, "learning_rate": 8.581323723640565e-07, "loss": 0.3652, "step": 15446 }, { "epoch": 0.26850805680613254, "grad_norm": 1.5341015981528483, "learning_rate": 8.581127283830325e-07, "loss": 0.4, "step": 15447 }, { "epoch": 0.2685254393436354, "grad_norm": 1.6825036484190232, "learning_rate": 8.580930832669558e-07, "loss": 0.3804, "step": 15448 }, { "epoch": 0.2685428218811382, "grad_norm": 2.1256555467949028, "learning_rate": 8.580734370158884e-07, "loss": 0.2313, "step": 15449 }, { "epoch": 0.26856020441864104, "grad_norm": 1.593062192216997, "learning_rate": 8.580537896298927e-07, "loss": 0.2278, "step": 15450 }, { "epoch": 0.26857758695614387, "grad_norm": 1.5507235410888804, "learning_rate": 8.58034141109031e-07, "loss": 0.2879, "step": 15451 }, { "epoch": 0.2685949694936467, "grad_norm": 1.0716782328827053, "learning_rate": 8.580144914533654e-07, "loss": 0.4017, "step": 15452 }, { "epoch": 0.2686123520311495, "grad_norm": 3.0548306471122686, "learning_rate": 8.579948406629585e-07, "loss": 0.4556, "step": 15453 }, { "epoch": 0.26862973456865236, "grad_norm": 1.9540229463976302, "learning_rate": 8.579751887378723e-07, "loss": 0.4282, "step": 15454 }, { "epoch": 0.26864711710615513, "grad_norm": 1.735874859607823, "learning_rate": 8.579555356781693e-07, "loss": 0.4468, "step": 15455 }, { "epoch": 0.26866449964365796, "grad_norm": 2.2873305783879054, "learning_rate": 8.579358814839117e-07, "loss": 0.2195, "step": 15456 }, { "epoch": 0.2686818821811608, "grad_norm": 1.215944505972559, "learning_rate": 8.579162261551617e-07, "loss": 0.3249, "step": 15457 }, { "epoch": 0.2686992647186636, "grad_norm": 1.4155494406413294, "learning_rate": 8.578965696919817e-07, "loss": 0.4, "step": 15458 }, { "epoch": 0.26871664725616645, "grad_norm": 2.1600427530586908, "learning_rate": 8.578769120944339e-07, "loss": 0.2411, "step": 15459 }, { "epoch": 0.2687340297936693, "grad_norm": 1.466037017194806, "learning_rate": 8.578572533625807e-07, "loss": 0.3081, "step": 15460 }, { "epoch": 0.2687514123311721, "grad_norm": 1.341688674331784, "learning_rate": 8.578375934964845e-07, "loss": 0.3259, "step": 15461 }, { "epoch": 0.26876879486867494, "grad_norm": 1.4125731210849928, "learning_rate": 8.578179324962074e-07, "loss": 0.2775, "step": 15462 }, { "epoch": 0.2687861774061778, "grad_norm": 4.173988052165958, "learning_rate": 8.577982703618119e-07, "loss": 0.3819, "step": 15463 }, { "epoch": 0.2688035599436806, "grad_norm": 4.0897068213981385, "learning_rate": 8.577786070933602e-07, "loss": 0.3723, "step": 15464 }, { "epoch": 0.2688209424811834, "grad_norm": 1.2381076464466867, "learning_rate": 8.577589426909146e-07, "loss": 0.2455, "step": 15465 }, { "epoch": 0.2688383250186862, "grad_norm": 2.1692141701639374, "learning_rate": 8.577392771545377e-07, "loss": 0.357, "step": 15466 }, { "epoch": 0.26885570755618904, "grad_norm": 1.6786558549024644, "learning_rate": 8.577196104842915e-07, "loss": 0.2958, "step": 15467 }, { "epoch": 0.26887309009369187, "grad_norm": 1.4955272380103, "learning_rate": 8.576999426802385e-07, "loss": 0.3328, "step": 15468 }, { "epoch": 0.2688904726311947, "grad_norm": 1.612813674270069, "learning_rate": 8.576802737424409e-07, "loss": 0.2856, "step": 15469 }, { "epoch": 0.26890785516869753, "grad_norm": 1.8882450532976376, "learning_rate": 8.57660603670961e-07, "loss": 0.2991, "step": 15470 }, { "epoch": 0.26892523770620036, "grad_norm": 3.5759461566187394, "learning_rate": 8.576409324658615e-07, "loss": 0.5192, "step": 15471 }, { "epoch": 0.2689426202437032, "grad_norm": 2.3720898436316893, "learning_rate": 8.576212601272046e-07, "loss": 0.3671, "step": 15472 }, { "epoch": 0.268960002781206, "grad_norm": 1.4808620539307822, "learning_rate": 8.576015866550525e-07, "loss": 0.4145, "step": 15473 }, { "epoch": 0.26897738531870885, "grad_norm": 2.025421312189578, "learning_rate": 8.575819120494677e-07, "loss": 0.3102, "step": 15474 }, { "epoch": 0.2689947678562116, "grad_norm": 1.5110408861149163, "learning_rate": 8.575622363105123e-07, "loss": 0.5516, "step": 15475 }, { "epoch": 0.26901215039371446, "grad_norm": 1.340168765608538, "learning_rate": 8.57542559438249e-07, "loss": 0.4223, "step": 15476 }, { "epoch": 0.2690295329312173, "grad_norm": 1.3792690034785338, "learning_rate": 8.5752288143274e-07, "loss": 0.2991, "step": 15477 }, { "epoch": 0.2690469154687201, "grad_norm": 2.6893119821856803, "learning_rate": 8.575032022940476e-07, "loss": 0.4977, "step": 15478 }, { "epoch": 0.26906429800622295, "grad_norm": 2.613841654706594, "learning_rate": 8.574835220222344e-07, "loss": 0.509, "step": 15479 }, { "epoch": 0.2690816805437258, "grad_norm": 0.9870969631907973, "learning_rate": 8.574638406173627e-07, "loss": 0.3399, "step": 15480 }, { "epoch": 0.2690990630812286, "grad_norm": 3.2397015694578624, "learning_rate": 8.574441580794947e-07, "loss": 0.3766, "step": 15481 }, { "epoch": 0.26911644561873144, "grad_norm": 1.7374809098529203, "learning_rate": 8.57424474408693e-07, "loss": 0.244, "step": 15482 }, { "epoch": 0.26913382815623427, "grad_norm": 1.722508150662353, "learning_rate": 8.574047896050198e-07, "loss": 0.3226, "step": 15483 }, { "epoch": 0.2691512106937371, "grad_norm": 2.0535821209570946, "learning_rate": 8.573851036685377e-07, "loss": 0.5703, "step": 15484 }, { "epoch": 0.26916859323123987, "grad_norm": 2.324598979347345, "learning_rate": 8.573654165993089e-07, "loss": 0.3294, "step": 15485 }, { "epoch": 0.2691859757687427, "grad_norm": 1.3950443871978964, "learning_rate": 8.57345728397396e-07, "loss": 0.3655, "step": 15486 }, { "epoch": 0.26920335830624553, "grad_norm": 2.0419460970860026, "learning_rate": 8.573260390628613e-07, "loss": 0.4509, "step": 15487 }, { "epoch": 0.26922074084374836, "grad_norm": 1.7615181221273415, "learning_rate": 8.57306348595767e-07, "loss": 0.2611, "step": 15488 }, { "epoch": 0.2692381233812512, "grad_norm": 2.314576972135837, "learning_rate": 8.572866569961759e-07, "loss": 0.3182, "step": 15489 }, { "epoch": 0.269255505918754, "grad_norm": 1.1677479828340314, "learning_rate": 8.572669642641501e-07, "loss": 0.1659, "step": 15490 }, { "epoch": 0.26927288845625685, "grad_norm": 1.8928102600190984, "learning_rate": 8.572472703997523e-07, "loss": 0.259, "step": 15491 }, { "epoch": 0.2692902709937597, "grad_norm": 1.5285747907112115, "learning_rate": 8.572275754030446e-07, "loss": 0.2853, "step": 15492 }, { "epoch": 0.2693076535312625, "grad_norm": 1.931664795309387, "learning_rate": 8.572078792740898e-07, "loss": 0.2803, "step": 15493 }, { "epoch": 0.26932503606876534, "grad_norm": 2.1261443371669775, "learning_rate": 8.571881820129498e-07, "loss": 0.2645, "step": 15494 }, { "epoch": 0.2693424186062681, "grad_norm": 2.024688679806446, "learning_rate": 8.571684836196876e-07, "loss": 0.3537, "step": 15495 }, { "epoch": 0.26935980114377095, "grad_norm": 1.7022659567906469, "learning_rate": 8.571487840943652e-07, "loss": 0.3424, "step": 15496 }, { "epoch": 0.2693771836812738, "grad_norm": 1.6018583761740597, "learning_rate": 8.571290834370452e-07, "loss": 0.3637, "step": 15497 }, { "epoch": 0.2693945662187766, "grad_norm": 1.877078774990868, "learning_rate": 8.571093816477903e-07, "loss": 0.4183, "step": 15498 }, { "epoch": 0.26941194875627944, "grad_norm": 1.838001261040852, "learning_rate": 8.570896787266626e-07, "loss": 0.3112, "step": 15499 }, { "epoch": 0.26942933129378227, "grad_norm": 2.4905302923799857, "learning_rate": 8.570699746737245e-07, "loss": 0.3573, "step": 15500 }, { "epoch": 0.2694467138312851, "grad_norm": 2.696333825101123, "learning_rate": 8.570502694890386e-07, "loss": 0.6637, "step": 15501 }, { "epoch": 0.26946409636878793, "grad_norm": 1.892173425696374, "learning_rate": 8.570305631726677e-07, "loss": 0.2355, "step": 15502 }, { "epoch": 0.26948147890629076, "grad_norm": 2.202380358660836, "learning_rate": 8.570108557246735e-07, "loss": 0.4065, "step": 15503 }, { "epoch": 0.2694988614437936, "grad_norm": 1.247094820294662, "learning_rate": 8.569911471451192e-07, "loss": 0.3766, "step": 15504 }, { "epoch": 0.26951624398129637, "grad_norm": 2.1816965207952506, "learning_rate": 8.569714374340669e-07, "loss": 0.3988, "step": 15505 }, { "epoch": 0.2695336265187992, "grad_norm": 1.7734890040389402, "learning_rate": 8.569517265915788e-07, "loss": 0.3824, "step": 15506 }, { "epoch": 0.269551009056302, "grad_norm": 1.441082470653475, "learning_rate": 8.569320146177181e-07, "loss": 0.2893, "step": 15507 }, { "epoch": 0.26956839159380486, "grad_norm": 1.2934589182442682, "learning_rate": 8.569123015125468e-07, "loss": 0.3026, "step": 15508 }, { "epoch": 0.2695857741313077, "grad_norm": 2.400890120894996, "learning_rate": 8.568925872761272e-07, "loss": 0.5615, "step": 15509 }, { "epoch": 0.2696031566688105, "grad_norm": 1.743554499224535, "learning_rate": 8.568728719085223e-07, "loss": 0.3583, "step": 15510 }, { "epoch": 0.26962053920631335, "grad_norm": 1.3943380598169026, "learning_rate": 8.568531554097942e-07, "loss": 0.2019, "step": 15511 }, { "epoch": 0.2696379217438162, "grad_norm": 3.109668011915194, "learning_rate": 8.568334377800056e-07, "loss": 0.3325, "step": 15512 }, { "epoch": 0.269655304281319, "grad_norm": 1.4820604194925862, "learning_rate": 8.56813719019219e-07, "loss": 0.4984, "step": 15513 }, { "epoch": 0.26967268681882184, "grad_norm": 1.1828679216823854, "learning_rate": 8.567939991274966e-07, "loss": 0.343, "step": 15514 }, { "epoch": 0.2696900693563246, "grad_norm": 2.205266779628011, "learning_rate": 8.567742781049012e-07, "loss": 0.5605, "step": 15515 }, { "epoch": 0.26970745189382744, "grad_norm": 1.9622313815987718, "learning_rate": 8.567545559514953e-07, "loss": 0.4106, "step": 15516 }, { "epoch": 0.2697248344313303, "grad_norm": 2.5661263537669807, "learning_rate": 8.567348326673414e-07, "loss": 0.4343, "step": 15517 }, { "epoch": 0.2697422169688331, "grad_norm": 1.8357869679708196, "learning_rate": 8.567151082525017e-07, "loss": 0.3416, "step": 15518 }, { "epoch": 0.26975959950633593, "grad_norm": 2.544321825738386, "learning_rate": 8.566953827070391e-07, "loss": 0.3874, "step": 15519 }, { "epoch": 0.26977698204383876, "grad_norm": 1.042795725351354, "learning_rate": 8.56675656031016e-07, "loss": 0.1708, "step": 15520 }, { "epoch": 0.2697943645813416, "grad_norm": 1.531854198810484, "learning_rate": 8.56655928224495e-07, "loss": 0.343, "step": 15521 }, { "epoch": 0.2698117471188444, "grad_norm": 3.585717766750786, "learning_rate": 8.566361992875384e-07, "loss": 0.4926, "step": 15522 }, { "epoch": 0.26982912965634726, "grad_norm": 1.888934756697558, "learning_rate": 8.566164692202089e-07, "loss": 0.4883, "step": 15523 }, { "epoch": 0.2698465121938501, "grad_norm": 1.3683608006945998, "learning_rate": 8.565967380225692e-07, "loss": 0.3089, "step": 15524 }, { "epoch": 0.26986389473135286, "grad_norm": 0.9857079228870896, "learning_rate": 8.565770056946815e-07, "loss": 0.2457, "step": 15525 }, { "epoch": 0.2698812772688557, "grad_norm": 2.945046339163604, "learning_rate": 8.565572722366084e-07, "loss": 0.4873, "step": 15526 }, { "epoch": 0.2698986598063585, "grad_norm": 1.1905198743937249, "learning_rate": 8.565375376484126e-07, "loss": 0.297, "step": 15527 }, { "epoch": 0.26991604234386135, "grad_norm": 1.4105254637404472, "learning_rate": 8.565178019301567e-07, "loss": 0.3087, "step": 15528 }, { "epoch": 0.2699334248813642, "grad_norm": 1.4890957856297105, "learning_rate": 8.564980650819029e-07, "loss": 0.45, "step": 15529 }, { "epoch": 0.269950807418867, "grad_norm": 1.1955383275120004, "learning_rate": 8.564783271037141e-07, "loss": 0.3302, "step": 15530 }, { "epoch": 0.26996818995636984, "grad_norm": 1.036493438587753, "learning_rate": 8.564585879956528e-07, "loss": 0.2872, "step": 15531 }, { "epoch": 0.26998557249387267, "grad_norm": 0.8809851385807376, "learning_rate": 8.564388477577816e-07, "loss": 0.3001, "step": 15532 }, { "epoch": 0.2700029550313755, "grad_norm": 1.540258448491485, "learning_rate": 8.564191063901629e-07, "loss": 0.459, "step": 15533 }, { "epoch": 0.27002033756887833, "grad_norm": 1.5446348588473795, "learning_rate": 8.563993638928593e-07, "loss": 0.1585, "step": 15534 }, { "epoch": 0.2700377201063811, "grad_norm": 1.658041574579467, "learning_rate": 8.563796202659335e-07, "loss": 0.3146, "step": 15535 }, { "epoch": 0.27005510264388394, "grad_norm": 2.3222011206435753, "learning_rate": 8.56359875509448e-07, "loss": 0.377, "step": 15536 }, { "epoch": 0.27007248518138677, "grad_norm": 1.7915298861022, "learning_rate": 8.563401296234653e-07, "loss": 0.4271, "step": 15537 }, { "epoch": 0.2700898677188896, "grad_norm": 2.7042371939501133, "learning_rate": 8.563203826080483e-07, "loss": 0.3799, "step": 15538 }, { "epoch": 0.27010725025639243, "grad_norm": 3.266688057701843, "learning_rate": 8.563006344632592e-07, "loss": 0.4418, "step": 15539 }, { "epoch": 0.27012463279389526, "grad_norm": 1.204211596539616, "learning_rate": 8.562808851891607e-07, "loss": 0.4309, "step": 15540 }, { "epoch": 0.2701420153313981, "grad_norm": 2.2021426658152246, "learning_rate": 8.562611347858157e-07, "loss": 0.3856, "step": 15541 }, { "epoch": 0.2701593978689009, "grad_norm": 1.5345982695939269, "learning_rate": 8.562413832532863e-07, "loss": 0.3104, "step": 15542 }, { "epoch": 0.27017678040640375, "grad_norm": 1.6052342326420628, "learning_rate": 8.562216305916354e-07, "loss": 0.2629, "step": 15543 }, { "epoch": 0.2701941629439065, "grad_norm": 1.076278499415925, "learning_rate": 8.562018768009259e-07, "loss": 0.2882, "step": 15544 }, { "epoch": 0.27021154548140935, "grad_norm": 1.1096471314326772, "learning_rate": 8.561821218812196e-07, "loss": 0.2303, "step": 15545 }, { "epoch": 0.2702289280189122, "grad_norm": 2.629328571160548, "learning_rate": 8.561623658325798e-07, "loss": 0.535, "step": 15546 }, { "epoch": 0.270246310556415, "grad_norm": 1.8876958983679537, "learning_rate": 8.561426086550689e-07, "loss": 0.3379, "step": 15547 }, { "epoch": 0.27026369309391785, "grad_norm": 1.4423061681828788, "learning_rate": 8.561228503487497e-07, "loss": 0.2912, "step": 15548 }, { "epoch": 0.2702810756314207, "grad_norm": 1.616972006017041, "learning_rate": 8.561030909136844e-07, "loss": 0.5435, "step": 15549 }, { "epoch": 0.2702984581689235, "grad_norm": 1.6496902810768794, "learning_rate": 8.560833303499359e-07, "loss": 0.3456, "step": 15550 }, { "epoch": 0.27031584070642634, "grad_norm": 1.390929962341424, "learning_rate": 8.560635686575671e-07, "loss": 0.3741, "step": 15551 }, { "epoch": 0.27033322324392917, "grad_norm": 2.0397818994165595, "learning_rate": 8.5604380583664e-07, "loss": 0.4544, "step": 15552 }, { "epoch": 0.270350605781432, "grad_norm": 2.003345537008081, "learning_rate": 8.560240418872178e-07, "loss": 0.377, "step": 15553 }, { "epoch": 0.27036798831893477, "grad_norm": 2.463961500250829, "learning_rate": 8.560042768093627e-07, "loss": 0.3222, "step": 15554 }, { "epoch": 0.2703853708564376, "grad_norm": 2.3080894753331056, "learning_rate": 8.559845106031378e-07, "loss": 0.2934, "step": 15555 }, { "epoch": 0.27040275339394043, "grad_norm": 2.355394509592048, "learning_rate": 8.559647432686054e-07, "loss": 0.3609, "step": 15556 }, { "epoch": 0.27042013593144326, "grad_norm": 1.363812003952092, "learning_rate": 8.559449748058285e-07, "loss": 0.2857, "step": 15557 }, { "epoch": 0.2704375184689461, "grad_norm": 2.3127728810779065, "learning_rate": 8.559252052148694e-07, "loss": 0.5306, "step": 15558 }, { "epoch": 0.2704549010064489, "grad_norm": 1.9491529893161095, "learning_rate": 8.559054344957908e-07, "loss": 0.3225, "step": 15559 }, { "epoch": 0.27047228354395175, "grad_norm": 1.6757065923903856, "learning_rate": 8.558856626486555e-07, "loss": 0.3798, "step": 15560 }, { "epoch": 0.2704896660814546, "grad_norm": 2.5681782974755842, "learning_rate": 8.558658896735263e-07, "loss": 0.3093, "step": 15561 }, { "epoch": 0.2705070486189574, "grad_norm": 1.2424560979599764, "learning_rate": 8.558461155704655e-07, "loss": 0.2907, "step": 15562 }, { "epoch": 0.27052443115646024, "grad_norm": 1.709971508122872, "learning_rate": 8.558263403395361e-07, "loss": 0.3441, "step": 15563 }, { "epoch": 0.270541813693963, "grad_norm": 1.5875185565928904, "learning_rate": 8.558065639808005e-07, "loss": 0.3449, "step": 15564 }, { "epoch": 0.27055919623146585, "grad_norm": 1.4378818177861103, "learning_rate": 8.557867864943216e-07, "loss": 0.2105, "step": 15565 }, { "epoch": 0.2705765787689687, "grad_norm": 2.0118217973665704, "learning_rate": 8.557670078801621e-07, "loss": 0.4446, "step": 15566 }, { "epoch": 0.2705939613064715, "grad_norm": 2.3180348183708785, "learning_rate": 8.557472281383846e-07, "loss": 0.1978, "step": 15567 }, { "epoch": 0.27061134384397434, "grad_norm": 1.1448744864914782, "learning_rate": 8.557274472690517e-07, "loss": 0.3747, "step": 15568 }, { "epoch": 0.27062872638147717, "grad_norm": 2.6377405430205014, "learning_rate": 8.557076652722263e-07, "loss": 0.4481, "step": 15569 }, { "epoch": 0.27064610891898, "grad_norm": 2.021365675765283, "learning_rate": 8.556878821479709e-07, "loss": 0.2285, "step": 15570 }, { "epoch": 0.27066349145648283, "grad_norm": 1.329611284026732, "learning_rate": 8.556680978963484e-07, "loss": 0.2176, "step": 15571 }, { "epoch": 0.27068087399398566, "grad_norm": 1.4758852802890619, "learning_rate": 8.556483125174214e-07, "loss": 0.412, "step": 15572 }, { "epoch": 0.2706982565314885, "grad_norm": 1.6232368186241966, "learning_rate": 8.556285260112526e-07, "loss": 0.5332, "step": 15573 }, { "epoch": 0.27071563906899127, "grad_norm": 1.7449912523290152, "learning_rate": 8.556087383779049e-07, "loss": 0.2176, "step": 15574 }, { "epoch": 0.2707330216064941, "grad_norm": 2.1515940475304673, "learning_rate": 8.555889496174406e-07, "loss": 0.2353, "step": 15575 }, { "epoch": 0.2707504041439969, "grad_norm": 2.975087661046948, "learning_rate": 8.555691597299229e-07, "loss": 0.3535, "step": 15576 }, { "epoch": 0.27076778668149976, "grad_norm": 2.075599151754548, "learning_rate": 8.55549368715414e-07, "loss": 0.2758, "step": 15577 }, { "epoch": 0.2707851692190026, "grad_norm": 2.024025541610967, "learning_rate": 8.555295765739772e-07, "loss": 0.3076, "step": 15578 }, { "epoch": 0.2708025517565054, "grad_norm": 1.300755060286191, "learning_rate": 8.555097833056748e-07, "loss": 0.359, "step": 15579 }, { "epoch": 0.27081993429400825, "grad_norm": 1.6058207905914395, "learning_rate": 8.554899889105697e-07, "loss": 0.3744, "step": 15580 }, { "epoch": 0.2708373168315111, "grad_norm": 1.6950841328295807, "learning_rate": 8.554701933887247e-07, "loss": 0.2683, "step": 15581 }, { "epoch": 0.2708546993690139, "grad_norm": 2.4416741926318073, "learning_rate": 8.554503967402025e-07, "loss": 0.4025, "step": 15582 }, { "epoch": 0.27087208190651674, "grad_norm": 1.7238151011549319, "learning_rate": 8.554305989650657e-07, "loss": 0.2453, "step": 15583 }, { "epoch": 0.2708894644440195, "grad_norm": 1.2880227603426302, "learning_rate": 8.554108000633773e-07, "loss": 0.2281, "step": 15584 }, { "epoch": 0.27090684698152234, "grad_norm": 2.229271209878098, "learning_rate": 8.553910000351999e-07, "loss": 0.3676, "step": 15585 }, { "epoch": 0.2709242295190252, "grad_norm": 2.254706679818671, "learning_rate": 8.553711988805963e-07, "loss": 0.4689, "step": 15586 }, { "epoch": 0.270941612056528, "grad_norm": 1.6307498727052252, "learning_rate": 8.553513965996292e-07, "loss": 0.1978, "step": 15587 }, { "epoch": 0.27095899459403083, "grad_norm": 2.1117085727707647, "learning_rate": 8.553315931923614e-07, "loss": 0.3679, "step": 15588 }, { "epoch": 0.27097637713153366, "grad_norm": 2.5218978492543442, "learning_rate": 8.553117886588557e-07, "loss": 0.565, "step": 15589 }, { "epoch": 0.2709937596690365, "grad_norm": 1.3136823601239371, "learning_rate": 8.552919829991748e-07, "loss": 0.2691, "step": 15590 }, { "epoch": 0.2710111422065393, "grad_norm": 3.8446573706451144, "learning_rate": 8.552721762133815e-07, "loss": 0.2754, "step": 15591 }, { "epoch": 0.27102852474404215, "grad_norm": 1.4003086252121228, "learning_rate": 8.552523683015386e-07, "loss": 0.206, "step": 15592 }, { "epoch": 0.271045907281545, "grad_norm": 1.9990185121102844, "learning_rate": 8.55232559263709e-07, "loss": 0.3225, "step": 15593 }, { "epoch": 0.27106328981904776, "grad_norm": 1.8410861137798147, "learning_rate": 8.552127490999555e-07, "loss": 0.3192, "step": 15594 }, { "epoch": 0.2710806723565506, "grad_norm": 3.5994463164662363, "learning_rate": 8.551929378103404e-07, "loss": 0.1758, "step": 15595 }, { "epoch": 0.2710980548940534, "grad_norm": 1.2438439735320925, "learning_rate": 8.55173125394927e-07, "loss": 0.3822, "step": 15596 }, { "epoch": 0.27111543743155625, "grad_norm": 1.7648622089730868, "learning_rate": 8.55153311853778e-07, "loss": 0.3637, "step": 15597 }, { "epoch": 0.2711328199690591, "grad_norm": 1.1286333566321176, "learning_rate": 8.551334971869562e-07, "loss": 0.1934, "step": 15598 }, { "epoch": 0.2711502025065619, "grad_norm": 2.473476811236358, "learning_rate": 8.551136813945241e-07, "loss": 0.3085, "step": 15599 }, { "epoch": 0.27116758504406474, "grad_norm": 1.3823070992424116, "learning_rate": 8.550938644765449e-07, "loss": 0.421, "step": 15600 }, { "epoch": 0.27118496758156757, "grad_norm": 1.2398222789436255, "learning_rate": 8.550740464330814e-07, "loss": 0.3974, "step": 15601 }, { "epoch": 0.2712023501190704, "grad_norm": 1.4145252587565864, "learning_rate": 8.550542272641961e-07, "loss": 0.2789, "step": 15602 }, { "epoch": 0.27121973265657323, "grad_norm": 1.5068170533730973, "learning_rate": 8.550344069699521e-07, "loss": 0.3974, "step": 15603 }, { "epoch": 0.271237115194076, "grad_norm": 1.9110468712776358, "learning_rate": 8.550145855504121e-07, "loss": 0.135, "step": 15604 }, { "epoch": 0.27125449773157884, "grad_norm": 0.9905319958663447, "learning_rate": 8.549947630056389e-07, "loss": 0.4024, "step": 15605 }, { "epoch": 0.27127188026908167, "grad_norm": 1.8158573878005224, "learning_rate": 8.549749393356955e-07, "loss": 0.1939, "step": 15606 }, { "epoch": 0.2712892628065845, "grad_norm": 2.6051256111447225, "learning_rate": 8.549551145406444e-07, "loss": 0.3891, "step": 15607 }, { "epoch": 0.2713066453440873, "grad_norm": 1.565937550131815, "learning_rate": 8.549352886205488e-07, "loss": 0.2502, "step": 15608 }, { "epoch": 0.27132402788159016, "grad_norm": 1.7984212462504974, "learning_rate": 8.549154615754713e-07, "loss": 0.2736, "step": 15609 }, { "epoch": 0.271341410419093, "grad_norm": 2.436453993087268, "learning_rate": 8.548956334054748e-07, "loss": 0.3942, "step": 15610 }, { "epoch": 0.2713587929565958, "grad_norm": 1.2656770105525832, "learning_rate": 8.548758041106223e-07, "loss": 0.292, "step": 15611 }, { "epoch": 0.27137617549409865, "grad_norm": 1.5832783476325163, "learning_rate": 8.548559736909764e-07, "loss": 0.4404, "step": 15612 }, { "epoch": 0.2713935580316015, "grad_norm": 5.904972155088518, "learning_rate": 8.548361421465999e-07, "loss": 0.4554, "step": 15613 }, { "epoch": 0.27141094056910425, "grad_norm": 2.6933031802907177, "learning_rate": 8.548163094775561e-07, "loss": 0.6704, "step": 15614 }, { "epoch": 0.2714283231066071, "grad_norm": 3.127848974767045, "learning_rate": 8.547964756839075e-07, "loss": 0.2918, "step": 15615 }, { "epoch": 0.2714457056441099, "grad_norm": 1.9060657031726516, "learning_rate": 8.547766407657171e-07, "loss": 0.2853, "step": 15616 }, { "epoch": 0.27146308818161274, "grad_norm": 1.5688502357112801, "learning_rate": 8.547568047230478e-07, "loss": 0.2887, "step": 15617 }, { "epoch": 0.2714804707191156, "grad_norm": 1.5803766246664928, "learning_rate": 8.547369675559621e-07, "loss": 0.2771, "step": 15618 }, { "epoch": 0.2714978532566184, "grad_norm": 1.9985352558461467, "learning_rate": 8.547171292645234e-07, "loss": 0.4299, "step": 15619 }, { "epoch": 0.27151523579412123, "grad_norm": 1.407034537989396, "learning_rate": 8.546972898487943e-07, "loss": 0.3311, "step": 15620 }, { "epoch": 0.27153261833162406, "grad_norm": 2.8055312061000057, "learning_rate": 8.546774493088378e-07, "loss": 0.403, "step": 15621 }, { "epoch": 0.2715500008691269, "grad_norm": 1.5561151487939744, "learning_rate": 8.546576076447165e-07, "loss": 0.3868, "step": 15622 }, { "epoch": 0.2715673834066297, "grad_norm": 1.785607835210316, "learning_rate": 8.546377648564936e-07, "loss": 0.3232, "step": 15623 }, { "epoch": 0.2715847659441325, "grad_norm": 2.2467947890888187, "learning_rate": 8.546179209442319e-07, "loss": 0.3775, "step": 15624 }, { "epoch": 0.27160214848163533, "grad_norm": 2.6889010886334446, "learning_rate": 8.545980759079943e-07, "loss": 0.3141, "step": 15625 }, { "epoch": 0.27161953101913816, "grad_norm": 1.6841320578680137, "learning_rate": 8.545782297478437e-07, "loss": 0.2727, "step": 15626 }, { "epoch": 0.271636913556641, "grad_norm": 1.900819076855434, "learning_rate": 8.545583824638429e-07, "loss": 0.4041, "step": 15627 }, { "epoch": 0.2716542960941438, "grad_norm": 2.332475822745106, "learning_rate": 8.545385340560551e-07, "loss": 0.2733, "step": 15628 }, { "epoch": 0.27167167863164665, "grad_norm": 1.7936780760493147, "learning_rate": 8.545186845245429e-07, "loss": 0.3992, "step": 15629 }, { "epoch": 0.2716890611691495, "grad_norm": 2.0140113504325448, "learning_rate": 8.544988338693692e-07, "loss": 0.4095, "step": 15630 }, { "epoch": 0.2717064437066523, "grad_norm": 2.1046176354634865, "learning_rate": 8.544789820905973e-07, "loss": 0.3902, "step": 15631 }, { "epoch": 0.27172382624415514, "grad_norm": 1.1071046611691018, "learning_rate": 8.544591291882897e-07, "loss": 0.5283, "step": 15632 }, { "epoch": 0.271741208781658, "grad_norm": 2.837223913144438, "learning_rate": 8.544392751625095e-07, "loss": 0.3478, "step": 15633 }, { "epoch": 0.27175859131916075, "grad_norm": 2.1012890197230436, "learning_rate": 8.544194200133196e-07, "loss": 0.3957, "step": 15634 }, { "epoch": 0.2717759738566636, "grad_norm": 1.7067631576140565, "learning_rate": 8.54399563740783e-07, "loss": 0.5694, "step": 15635 }, { "epoch": 0.2717933563941664, "grad_norm": 1.5013210858382458, "learning_rate": 8.543797063449625e-07, "loss": 0.5742, "step": 15636 }, { "epoch": 0.27181073893166924, "grad_norm": 1.3698880570222034, "learning_rate": 8.543598478259211e-07, "loss": 0.4988, "step": 15637 }, { "epoch": 0.27182812146917207, "grad_norm": 3.710758452827183, "learning_rate": 8.543399881837219e-07, "loss": 0.2555, "step": 15638 }, { "epoch": 0.2718455040066749, "grad_norm": 2.345461530760939, "learning_rate": 8.543201274184276e-07, "loss": 0.4204, "step": 15639 }, { "epoch": 0.27186288654417773, "grad_norm": 2.9614285669339067, "learning_rate": 8.543002655301013e-07, "loss": 0.3556, "step": 15640 }, { "epoch": 0.27188026908168056, "grad_norm": 1.1965170536010243, "learning_rate": 8.54280402518806e-07, "loss": 0.2897, "step": 15641 }, { "epoch": 0.2718976516191834, "grad_norm": 1.3055848607959992, "learning_rate": 8.542605383846044e-07, "loss": 0.2044, "step": 15642 }, { "epoch": 0.2719150341566862, "grad_norm": 1.7220161867076873, "learning_rate": 8.542406731275595e-07, "loss": 0.4235, "step": 15643 }, { "epoch": 0.271932416694189, "grad_norm": 2.5614349991472722, "learning_rate": 8.542208067477347e-07, "loss": 0.3764, "step": 15644 }, { "epoch": 0.2719497992316918, "grad_norm": 3.679102135271306, "learning_rate": 8.542009392451926e-07, "loss": 0.2279, "step": 15645 }, { "epoch": 0.27196718176919465, "grad_norm": 2.288818899028265, "learning_rate": 8.541810706199961e-07, "loss": 0.3408, "step": 15646 }, { "epoch": 0.2719845643066975, "grad_norm": 5.337851114220915, "learning_rate": 8.541612008722083e-07, "loss": 0.4169, "step": 15647 }, { "epoch": 0.2720019468442003, "grad_norm": 3.4943013979200193, "learning_rate": 8.541413300018924e-07, "loss": 0.4288, "step": 15648 }, { "epoch": 0.27201932938170315, "grad_norm": 1.3032816557473674, "learning_rate": 8.541214580091109e-07, "loss": 0.211, "step": 15649 }, { "epoch": 0.272036711919206, "grad_norm": 3.6782444336520146, "learning_rate": 8.541015848939273e-07, "loss": 0.3755, "step": 15650 }, { "epoch": 0.2720540944567088, "grad_norm": 1.1041880506308015, "learning_rate": 8.540817106564041e-07, "loss": 0.3703, "step": 15651 }, { "epoch": 0.27207147699421164, "grad_norm": 2.08491921743966, "learning_rate": 8.540618352966047e-07, "loss": 0.4243, "step": 15652 }, { "epoch": 0.27208885953171447, "grad_norm": 1.5147347681775472, "learning_rate": 8.540419588145917e-07, "loss": 0.2538, "step": 15653 }, { "epoch": 0.27210624206921724, "grad_norm": 2.007124841770084, "learning_rate": 8.540220812104287e-07, "loss": 0.3715, "step": 15654 }, { "epoch": 0.27212362460672007, "grad_norm": 1.4515332629590103, "learning_rate": 8.540022024841779e-07, "loss": 0.3482, "step": 15655 }, { "epoch": 0.2721410071442229, "grad_norm": 2.1208779485733813, "learning_rate": 8.539823226359031e-07, "loss": 0.2445, "step": 15656 }, { "epoch": 0.27215838968172573, "grad_norm": 3.227492548953487, "learning_rate": 8.539624416656668e-07, "loss": 0.355, "step": 15657 }, { "epoch": 0.27217577221922856, "grad_norm": 1.583487842305404, "learning_rate": 8.539425595735322e-07, "loss": 0.2328, "step": 15658 }, { "epoch": 0.2721931547567314, "grad_norm": 1.6585194682790014, "learning_rate": 8.539226763595621e-07, "loss": 0.322, "step": 15659 }, { "epoch": 0.2722105372942342, "grad_norm": 1.0957764377732553, "learning_rate": 8.539027920238198e-07, "loss": 0.2635, "step": 15660 }, { "epoch": 0.27222791983173705, "grad_norm": 2.132714671701202, "learning_rate": 8.538829065663682e-07, "loss": 0.4857, "step": 15661 }, { "epoch": 0.2722453023692399, "grad_norm": 3.625306912997188, "learning_rate": 8.538630199872704e-07, "loss": 0.4179, "step": 15662 }, { "epoch": 0.2722626849067427, "grad_norm": 1.3400068473766864, "learning_rate": 8.538431322865893e-07, "loss": 0.3551, "step": 15663 }, { "epoch": 0.2722800674442455, "grad_norm": 1.8618227506938014, "learning_rate": 8.538232434643882e-07, "loss": 0.3255, "step": 15664 }, { "epoch": 0.2722974499817483, "grad_norm": 1.4471312823603355, "learning_rate": 8.538033535207298e-07, "loss": 0.3817, "step": 15665 }, { "epoch": 0.27231483251925115, "grad_norm": 1.481135492966845, "learning_rate": 8.537834624556773e-07, "loss": 0.3818, "step": 15666 }, { "epoch": 0.272332215056754, "grad_norm": 1.3314412676851917, "learning_rate": 8.537635702692938e-07, "loss": 0.2981, "step": 15667 }, { "epoch": 0.2723495975942568, "grad_norm": 1.267210472296448, "learning_rate": 8.537436769616421e-07, "loss": 0.2045, "step": 15668 }, { "epoch": 0.27236698013175964, "grad_norm": 1.5626161762910378, "learning_rate": 8.537237825327857e-07, "loss": 0.4785, "step": 15669 }, { "epoch": 0.27238436266926247, "grad_norm": 1.7670382613816, "learning_rate": 8.537038869827871e-07, "loss": 0.261, "step": 15670 }, { "epoch": 0.2724017452067653, "grad_norm": 2.8388775421286385, "learning_rate": 8.5368399031171e-07, "loss": 0.4554, "step": 15671 }, { "epoch": 0.27241912774426813, "grad_norm": 2.9905992771983265, "learning_rate": 8.536640925196169e-07, "loss": 0.3201, "step": 15672 }, { "epoch": 0.27243651028177096, "grad_norm": 1.338022153886852, "learning_rate": 8.536441936065712e-07, "loss": 0.3455, "step": 15673 }, { "epoch": 0.27245389281927374, "grad_norm": 1.9490614402788822, "learning_rate": 8.536242935726357e-07, "loss": 0.3351, "step": 15674 }, { "epoch": 0.27247127535677657, "grad_norm": 2.314358978905989, "learning_rate": 8.536043924178738e-07, "loss": 0.572, "step": 15675 }, { "epoch": 0.2724886578942794, "grad_norm": 2.274101776707184, "learning_rate": 8.535844901423484e-07, "loss": 0.5079, "step": 15676 }, { "epoch": 0.2725060404317822, "grad_norm": 1.7399013723747738, "learning_rate": 8.535645867461225e-07, "loss": 0.2854, "step": 15677 }, { "epoch": 0.27252342296928506, "grad_norm": 1.445197280865613, "learning_rate": 8.535446822292592e-07, "loss": 0.1766, "step": 15678 }, { "epoch": 0.2725408055067879, "grad_norm": 1.5910076356133043, "learning_rate": 8.535247765918218e-07, "loss": 0.4421, "step": 15679 }, { "epoch": 0.2725581880442907, "grad_norm": 2.464610699046676, "learning_rate": 8.535048698338732e-07, "loss": 0.4655, "step": 15680 }, { "epoch": 0.27257557058179355, "grad_norm": 1.707260336716945, "learning_rate": 8.534849619554765e-07, "loss": 0.286, "step": 15681 }, { "epoch": 0.2725929531192964, "grad_norm": 0.9558276147224659, "learning_rate": 8.534650529566949e-07, "loss": 0.4907, "step": 15682 }, { "epoch": 0.27261033565679915, "grad_norm": 1.5743227397358828, "learning_rate": 8.534451428375915e-07, "loss": 0.3118, "step": 15683 }, { "epoch": 0.272627718194302, "grad_norm": 2.984757904469522, "learning_rate": 8.534252315982293e-07, "loss": 0.4148, "step": 15684 }, { "epoch": 0.2726451007318048, "grad_norm": 1.3126330290831851, "learning_rate": 8.534053192386715e-07, "loss": 0.4344, "step": 15685 }, { "epoch": 0.27266248326930764, "grad_norm": 13.381500286601533, "learning_rate": 8.533854057589811e-07, "loss": 0.2318, "step": 15686 }, { "epoch": 0.2726798658068105, "grad_norm": 1.696409091360848, "learning_rate": 8.533654911592214e-07, "loss": 0.2309, "step": 15687 }, { "epoch": 0.2726972483443133, "grad_norm": 1.6935728727425061, "learning_rate": 8.533455754394554e-07, "loss": 0.2435, "step": 15688 }, { "epoch": 0.27271463088181613, "grad_norm": 1.9242955367600147, "learning_rate": 8.533256585997463e-07, "loss": 0.2749, "step": 15689 }, { "epoch": 0.27273201341931896, "grad_norm": 1.658995037036463, "learning_rate": 8.533057406401571e-07, "loss": 0.209, "step": 15690 }, { "epoch": 0.2727493959568218, "grad_norm": 2.0838222629995466, "learning_rate": 8.532858215607509e-07, "loss": 0.4219, "step": 15691 }, { "epoch": 0.2727667784943246, "grad_norm": 1.6981666291876878, "learning_rate": 8.532659013615911e-07, "loss": 0.4125, "step": 15692 }, { "epoch": 0.2727841610318274, "grad_norm": 2.4021753030258024, "learning_rate": 8.532459800427405e-07, "loss": 0.2625, "step": 15693 }, { "epoch": 0.27280154356933023, "grad_norm": 1.7558267719651526, "learning_rate": 8.532260576042625e-07, "loss": 0.3019, "step": 15694 }, { "epoch": 0.27281892610683306, "grad_norm": 1.7706914469317308, "learning_rate": 8.532061340462202e-07, "loss": 0.4493, "step": 15695 }, { "epoch": 0.2728363086443359, "grad_norm": 1.444341313229727, "learning_rate": 8.531862093686766e-07, "loss": 0.4671, "step": 15696 }, { "epoch": 0.2728536911818387, "grad_norm": 2.565764665619778, "learning_rate": 8.531662835716951e-07, "loss": 0.485, "step": 15697 }, { "epoch": 0.27287107371934155, "grad_norm": 1.9418862421507987, "learning_rate": 8.531463566553385e-07, "loss": 0.3432, "step": 15698 }, { "epoch": 0.2728884562568444, "grad_norm": 1.89469510148263, "learning_rate": 8.531264286196703e-07, "loss": 0.2461, "step": 15699 }, { "epoch": 0.2729058387943472, "grad_norm": 2.159701481350206, "learning_rate": 8.531064994647536e-07, "loss": 0.3907, "step": 15700 }, { "epoch": 0.27292322133185004, "grad_norm": 1.8363240697623902, "learning_rate": 8.530865691906515e-07, "loss": 0.3142, "step": 15701 }, { "epoch": 0.27294060386935287, "grad_norm": 1.8739200104492244, "learning_rate": 8.530666377974269e-07, "loss": 0.3877, "step": 15702 }, { "epoch": 0.27295798640685565, "grad_norm": 1.6551539174098242, "learning_rate": 8.530467052851435e-07, "loss": 0.208, "step": 15703 }, { "epoch": 0.2729753689443585, "grad_norm": 1.4661542553189393, "learning_rate": 8.530267716538643e-07, "loss": 0.3896, "step": 15704 }, { "epoch": 0.2729927514818613, "grad_norm": 1.880011328413871, "learning_rate": 8.530068369036523e-07, "loss": 0.3738, "step": 15705 }, { "epoch": 0.27301013401936414, "grad_norm": 1.818923402823048, "learning_rate": 8.529869010345707e-07, "loss": 0.346, "step": 15706 }, { "epoch": 0.27302751655686697, "grad_norm": 0.9292480962510201, "learning_rate": 8.529669640466828e-07, "loss": 0.1898, "step": 15707 }, { "epoch": 0.2730448990943698, "grad_norm": 1.5457913005711434, "learning_rate": 8.529470259400517e-07, "loss": 0.2716, "step": 15708 }, { "epoch": 0.2730622816318726, "grad_norm": 1.4543863262156325, "learning_rate": 8.529270867147409e-07, "loss": 0.3887, "step": 15709 }, { "epoch": 0.27307966416937546, "grad_norm": 4.712275938839561, "learning_rate": 8.52907146370813e-07, "loss": 0.3402, "step": 15710 }, { "epoch": 0.2730970467068783, "grad_norm": 1.4512187194860893, "learning_rate": 8.528872049083319e-07, "loss": 0.325, "step": 15711 }, { "epoch": 0.2731144292443811, "grad_norm": 3.0035162201196703, "learning_rate": 8.528672623273603e-07, "loss": 0.3404, "step": 15712 }, { "epoch": 0.2731318117818839, "grad_norm": 1.5770967650143861, "learning_rate": 8.528473186279617e-07, "loss": 0.3195, "step": 15713 }, { "epoch": 0.2731491943193867, "grad_norm": 1.8474104330208976, "learning_rate": 8.52827373810199e-07, "loss": 0.249, "step": 15714 }, { "epoch": 0.27316657685688955, "grad_norm": 2.1183788575274254, "learning_rate": 8.528074278741356e-07, "loss": 0.3149, "step": 15715 }, { "epoch": 0.2731839593943924, "grad_norm": 3.533058279384391, "learning_rate": 8.527874808198348e-07, "loss": 0.4465, "step": 15716 }, { "epoch": 0.2732013419318952, "grad_norm": 2.9474419581872873, "learning_rate": 8.527675326473598e-07, "loss": 0.3738, "step": 15717 }, { "epoch": 0.27321872446939804, "grad_norm": 1.9148219058082987, "learning_rate": 8.527475833567737e-07, "loss": 0.2506, "step": 15718 }, { "epoch": 0.2732361070069009, "grad_norm": 1.548495568838074, "learning_rate": 8.527276329481399e-07, "loss": 0.4198, "step": 15719 }, { "epoch": 0.2732534895444037, "grad_norm": 1.931272915048495, "learning_rate": 8.527076814215214e-07, "loss": 0.3847, "step": 15720 }, { "epoch": 0.27327087208190654, "grad_norm": 1.9382400484460587, "learning_rate": 8.526877287769816e-07, "loss": 0.4715, "step": 15721 }, { "epoch": 0.27328825461940937, "grad_norm": 1.8116802930222728, "learning_rate": 8.526677750145838e-07, "loss": 0.3034, "step": 15722 }, { "epoch": 0.27330563715691214, "grad_norm": 1.2153755637956534, "learning_rate": 8.526478201343909e-07, "loss": 0.3075, "step": 15723 }, { "epoch": 0.27332301969441497, "grad_norm": 1.5002006135554544, "learning_rate": 8.526278641364667e-07, "loss": 0.4598, "step": 15724 }, { "epoch": 0.2733404022319178, "grad_norm": 1.8838456464268865, "learning_rate": 8.526079070208741e-07, "loss": 0.3138, "step": 15725 }, { "epoch": 0.27335778476942063, "grad_norm": 1.652989798702504, "learning_rate": 8.525879487876764e-07, "loss": 0.3427, "step": 15726 }, { "epoch": 0.27337516730692346, "grad_norm": 2.15522896259875, "learning_rate": 8.525679894369368e-07, "loss": 0.4679, "step": 15727 }, { "epoch": 0.2733925498444263, "grad_norm": 1.7414090318126771, "learning_rate": 8.525480289687186e-07, "loss": 0.258, "step": 15728 }, { "epoch": 0.2734099323819291, "grad_norm": 1.9519411888875111, "learning_rate": 8.525280673830851e-07, "loss": 0.3053, "step": 15729 }, { "epoch": 0.27342731491943195, "grad_norm": 2.92845524629675, "learning_rate": 8.525081046800998e-07, "loss": 0.3612, "step": 15730 }, { "epoch": 0.2734446974569348, "grad_norm": 2.2600982560497425, "learning_rate": 8.524881408598254e-07, "loss": 0.3863, "step": 15731 }, { "epoch": 0.2734620799944376, "grad_norm": 1.951015086174666, "learning_rate": 8.524681759223257e-07, "loss": 0.4273, "step": 15732 }, { "epoch": 0.2734794625319404, "grad_norm": 1.8671506140884337, "learning_rate": 8.524482098676638e-07, "loss": 0.3003, "step": 15733 }, { "epoch": 0.2734968450694432, "grad_norm": 1.8999148574075113, "learning_rate": 8.524282426959028e-07, "loss": 0.2104, "step": 15734 }, { "epoch": 0.27351422760694605, "grad_norm": 3.6414882117134324, "learning_rate": 8.524082744071064e-07, "loss": 0.3272, "step": 15735 }, { "epoch": 0.2735316101444489, "grad_norm": 1.892104126334567, "learning_rate": 8.523883050013375e-07, "loss": 0.3512, "step": 15736 }, { "epoch": 0.2735489926819517, "grad_norm": 2.6299795762120914, "learning_rate": 8.523683344786596e-07, "loss": 0.5905, "step": 15737 }, { "epoch": 0.27356637521945454, "grad_norm": 1.9825746104091195, "learning_rate": 8.523483628391359e-07, "loss": 0.2933, "step": 15738 }, { "epoch": 0.27358375775695737, "grad_norm": 1.9284403197610593, "learning_rate": 8.523283900828297e-07, "loss": 0.3346, "step": 15739 }, { "epoch": 0.2736011402944602, "grad_norm": 1.2325917087096934, "learning_rate": 8.523084162098046e-07, "loss": 0.5653, "step": 15740 }, { "epoch": 0.27361852283196303, "grad_norm": 1.2870824294218204, "learning_rate": 8.522884412201234e-07, "loss": 0.3116, "step": 15741 }, { "epoch": 0.27363590536946586, "grad_norm": 1.8562466824719723, "learning_rate": 8.522684651138498e-07, "loss": 0.3252, "step": 15742 }, { "epoch": 0.27365328790696863, "grad_norm": 1.7534382315408734, "learning_rate": 8.52248487891047e-07, "loss": 0.533, "step": 15743 }, { "epoch": 0.27367067044447146, "grad_norm": 2.329429477209513, "learning_rate": 8.522285095517783e-07, "loss": 0.4087, "step": 15744 }, { "epoch": 0.2736880529819743, "grad_norm": 2.212094542775434, "learning_rate": 8.522085300961068e-07, "loss": 0.3104, "step": 15745 }, { "epoch": 0.2737054355194771, "grad_norm": 1.4885323950361178, "learning_rate": 8.521885495240963e-07, "loss": 0.2289, "step": 15746 }, { "epoch": 0.27372281805697996, "grad_norm": 1.486573983573685, "learning_rate": 8.521685678358098e-07, "loss": 0.225, "step": 15747 }, { "epoch": 0.2737402005944828, "grad_norm": 2.0350149272421896, "learning_rate": 8.521485850313107e-07, "loss": 0.4096, "step": 15748 }, { "epoch": 0.2737575831319856, "grad_norm": 1.7965264316144653, "learning_rate": 8.521286011106623e-07, "loss": 0.2667, "step": 15749 }, { "epoch": 0.27377496566948845, "grad_norm": 1.1648880359612819, "learning_rate": 8.521086160739282e-07, "loss": 0.2085, "step": 15750 }, { "epoch": 0.2737923482069913, "grad_norm": 1.6258280646667278, "learning_rate": 8.520886299211714e-07, "loss": 0.3307, "step": 15751 }, { "epoch": 0.2738097307444941, "grad_norm": 1.4807944359311216, "learning_rate": 8.520686426524552e-07, "loss": 0.3368, "step": 15752 }, { "epoch": 0.2738271132819969, "grad_norm": 2.9848133920146376, "learning_rate": 8.520486542678434e-07, "loss": 0.5278, "step": 15753 }, { "epoch": 0.2738444958194997, "grad_norm": 1.1216078396693168, "learning_rate": 8.52028664767399e-07, "loss": 0.2033, "step": 15754 }, { "epoch": 0.27386187835700254, "grad_norm": 2.2401531762677385, "learning_rate": 8.520086741511854e-07, "loss": 0.3968, "step": 15755 }, { "epoch": 0.27387926089450537, "grad_norm": 1.475639331371103, "learning_rate": 8.51988682419266e-07, "loss": 0.284, "step": 15756 }, { "epoch": 0.2738966434320082, "grad_norm": 10.159965628073337, "learning_rate": 8.519686895717042e-07, "loss": 0.5306, "step": 15757 }, { "epoch": 0.27391402596951103, "grad_norm": 2.0189185481424734, "learning_rate": 8.519486956085633e-07, "loss": 0.289, "step": 15758 }, { "epoch": 0.27393140850701386, "grad_norm": 1.7437060865003047, "learning_rate": 8.519287005299068e-07, "loss": 0.2983, "step": 15759 }, { "epoch": 0.2739487910445167, "grad_norm": 1.671442115936926, "learning_rate": 8.51908704335798e-07, "loss": 0.2377, "step": 15760 }, { "epoch": 0.2739661735820195, "grad_norm": 1.5661810391390378, "learning_rate": 8.518887070263001e-07, "loss": 0.3267, "step": 15761 }, { "epoch": 0.27398355611952235, "grad_norm": 0.9095671494525547, "learning_rate": 8.518687086014767e-07, "loss": 0.1987, "step": 15762 }, { "epoch": 0.27400093865702513, "grad_norm": 1.3849871402951348, "learning_rate": 8.518487090613912e-07, "loss": 0.1705, "step": 15763 }, { "epoch": 0.27401832119452796, "grad_norm": 1.550534418132677, "learning_rate": 8.518287084061069e-07, "loss": 0.3154, "step": 15764 }, { "epoch": 0.2740357037320308, "grad_norm": 1.6456192508783343, "learning_rate": 8.518087066356872e-07, "loss": 0.2798, "step": 15765 }, { "epoch": 0.2740530862695336, "grad_norm": 1.7889965850101752, "learning_rate": 8.517887037501954e-07, "loss": 0.3367, "step": 15766 }, { "epoch": 0.27407046880703645, "grad_norm": 3.4715146004104547, "learning_rate": 8.517686997496951e-07, "loss": 0.3182, "step": 15767 }, { "epoch": 0.2740878513445393, "grad_norm": 1.9425788641119148, "learning_rate": 8.517486946342496e-07, "loss": 0.227, "step": 15768 }, { "epoch": 0.2741052338820421, "grad_norm": 1.8181430863451065, "learning_rate": 8.517286884039223e-07, "loss": 0.3508, "step": 15769 }, { "epoch": 0.27412261641954494, "grad_norm": 1.498899104819038, "learning_rate": 8.517086810587766e-07, "loss": 0.1539, "step": 15770 }, { "epoch": 0.27413999895704777, "grad_norm": 2.9653623364946196, "learning_rate": 8.516886725988761e-07, "loss": 0.3928, "step": 15771 }, { "epoch": 0.2741573814945506, "grad_norm": 1.6240651416530931, "learning_rate": 8.51668663024284e-07, "loss": 0.24, "step": 15772 }, { "epoch": 0.2741747640320534, "grad_norm": 2.0752861499434356, "learning_rate": 8.516486523350637e-07, "loss": 0.22, "step": 15773 }, { "epoch": 0.2741921465695562, "grad_norm": 2.380108972006608, "learning_rate": 8.516286405312787e-07, "loss": 0.5066, "step": 15774 }, { "epoch": 0.27420952910705904, "grad_norm": 2.1424743304881284, "learning_rate": 8.516086276129924e-07, "loss": 0.3343, "step": 15775 }, { "epoch": 0.27422691164456187, "grad_norm": 1.585167125537688, "learning_rate": 8.515886135802684e-07, "loss": 0.314, "step": 15776 }, { "epoch": 0.2742442941820647, "grad_norm": 1.4514689092205468, "learning_rate": 8.515685984331699e-07, "loss": 0.4597, "step": 15777 }, { "epoch": 0.2742616767195675, "grad_norm": 2.4942983702418404, "learning_rate": 8.515485821717605e-07, "loss": 0.4366, "step": 15778 }, { "epoch": 0.27427905925707036, "grad_norm": 2.5196427505842656, "learning_rate": 8.515285647961037e-07, "loss": 0.2713, "step": 15779 }, { "epoch": 0.2742964417945732, "grad_norm": 2.1185486336131705, "learning_rate": 8.515085463062627e-07, "loss": 0.3569, "step": 15780 }, { "epoch": 0.274313824332076, "grad_norm": 1.131959788084234, "learning_rate": 8.514885267023011e-07, "loss": 0.1871, "step": 15781 }, { "epoch": 0.27433120686957885, "grad_norm": 1.7275317065535771, "learning_rate": 8.514685059842824e-07, "loss": 0.1988, "step": 15782 }, { "epoch": 0.2743485894070816, "grad_norm": 1.3837966140780238, "learning_rate": 8.514484841522698e-07, "loss": 0.2463, "step": 15783 }, { "epoch": 0.27436597194458445, "grad_norm": 1.4532867562491314, "learning_rate": 8.51428461206327e-07, "loss": 0.2972, "step": 15784 }, { "epoch": 0.2743833544820873, "grad_norm": 3.059299016832599, "learning_rate": 8.514084371465176e-07, "loss": 0.323, "step": 15785 }, { "epoch": 0.2744007370195901, "grad_norm": 1.5666497999820124, "learning_rate": 8.513884119729046e-07, "loss": 0.2451, "step": 15786 }, { "epoch": 0.27441811955709294, "grad_norm": 1.532848510201822, "learning_rate": 8.513683856855518e-07, "loss": 0.5209, "step": 15787 }, { "epoch": 0.2744355020945958, "grad_norm": 1.6975265743710584, "learning_rate": 8.513483582845228e-07, "loss": 0.2905, "step": 15788 }, { "epoch": 0.2744528846320986, "grad_norm": 1.2776379521313577, "learning_rate": 8.513283297698808e-07, "loss": 0.214, "step": 15789 }, { "epoch": 0.27447026716960143, "grad_norm": 1.262820264986967, "learning_rate": 8.513083001416894e-07, "loss": 0.3445, "step": 15790 }, { "epoch": 0.27448764970710426, "grad_norm": 1.6736459413287543, "learning_rate": 8.512882694000121e-07, "loss": 0.3283, "step": 15791 }, { "epoch": 0.2745050322446071, "grad_norm": 1.5858114141902024, "learning_rate": 8.512682375449123e-07, "loss": 0.5073, "step": 15792 }, { "epoch": 0.27452241478210987, "grad_norm": 3.4947798430401673, "learning_rate": 8.512482045764535e-07, "loss": 0.4244, "step": 15793 }, { "epoch": 0.2745397973196127, "grad_norm": 2.0582677308420574, "learning_rate": 8.512281704946993e-07, "loss": 0.3656, "step": 15794 }, { "epoch": 0.27455717985711553, "grad_norm": 2.8684789072179004, "learning_rate": 8.512081352997132e-07, "loss": 0.3513, "step": 15795 }, { "epoch": 0.27457456239461836, "grad_norm": 2.1955298583512803, "learning_rate": 8.511880989915585e-07, "loss": 0.5277, "step": 15796 }, { "epoch": 0.2745919449321212, "grad_norm": 2.0971400490161636, "learning_rate": 8.51168061570299e-07, "loss": 0.308, "step": 15797 }, { "epoch": 0.274609327469624, "grad_norm": 1.5876247598233992, "learning_rate": 8.51148023035998e-07, "loss": 0.5631, "step": 15798 }, { "epoch": 0.27462671000712685, "grad_norm": 4.550713237851926, "learning_rate": 8.511279833887189e-07, "loss": 0.2879, "step": 15799 }, { "epoch": 0.2746440925446297, "grad_norm": 2.123121014662979, "learning_rate": 8.511079426285256e-07, "loss": 0.3879, "step": 15800 }, { "epoch": 0.2746614750821325, "grad_norm": 1.309959422853848, "learning_rate": 8.510879007554814e-07, "loss": 0.3, "step": 15801 }, { "epoch": 0.27467885761963534, "grad_norm": 2.009169019922444, "learning_rate": 8.510678577696498e-07, "loss": 0.2659, "step": 15802 }, { "epoch": 0.2746962401571381, "grad_norm": 1.7666411184109856, "learning_rate": 8.510478136710942e-07, "loss": 0.4054, "step": 15803 }, { "epoch": 0.27471362269464095, "grad_norm": 1.4157889732910902, "learning_rate": 8.510277684598786e-07, "loss": 0.2642, "step": 15804 }, { "epoch": 0.2747310052321438, "grad_norm": 2.23022734397063, "learning_rate": 8.510077221360659e-07, "loss": 0.3718, "step": 15805 }, { "epoch": 0.2747483877696466, "grad_norm": 2.990960010616363, "learning_rate": 8.509876746997202e-07, "loss": 0.341, "step": 15806 }, { "epoch": 0.27476577030714944, "grad_norm": 1.5714981112619293, "learning_rate": 8.509676261509047e-07, "loss": 0.2804, "step": 15807 }, { "epoch": 0.27478315284465227, "grad_norm": 2.4231883781698573, "learning_rate": 8.509475764896831e-07, "loss": 0.2892, "step": 15808 }, { "epoch": 0.2748005353821551, "grad_norm": 2.4620016252682015, "learning_rate": 8.509275257161188e-07, "loss": 0.4945, "step": 15809 }, { "epoch": 0.27481791791965793, "grad_norm": 1.4753930368991857, "learning_rate": 8.509074738302755e-07, "loss": 0.4246, "step": 15810 }, { "epoch": 0.27483530045716076, "grad_norm": 2.1971897416898334, "learning_rate": 8.508874208322167e-07, "loss": 0.2018, "step": 15811 }, { "epoch": 0.2748526829946636, "grad_norm": 6.00654772848025, "learning_rate": 8.50867366722006e-07, "loss": 0.3907, "step": 15812 }, { "epoch": 0.27487006553216636, "grad_norm": 1.884465561339716, "learning_rate": 8.508473114997069e-07, "loss": 0.2953, "step": 15813 }, { "epoch": 0.2748874480696692, "grad_norm": 1.5544584550750749, "learning_rate": 8.50827255165383e-07, "loss": 0.3685, "step": 15814 }, { "epoch": 0.274904830607172, "grad_norm": 1.724977737678066, "learning_rate": 8.508071977190979e-07, "loss": 0.4154, "step": 15815 }, { "epoch": 0.27492221314467485, "grad_norm": 2.115629138437221, "learning_rate": 8.50787139160915e-07, "loss": 0.3279, "step": 15816 }, { "epoch": 0.2749395956821777, "grad_norm": 1.7527097539211063, "learning_rate": 8.507670794908981e-07, "loss": 0.2324, "step": 15817 }, { "epoch": 0.2749569782196805, "grad_norm": 2.26351370970407, "learning_rate": 8.507470187091106e-07, "loss": 0.2675, "step": 15818 }, { "epoch": 0.27497436075718334, "grad_norm": 2.063102322302327, "learning_rate": 8.507269568156161e-07, "loss": 0.3826, "step": 15819 }, { "epoch": 0.2749917432946862, "grad_norm": 1.8693214071585018, "learning_rate": 8.507068938104786e-07, "loss": 0.4666, "step": 15820 }, { "epoch": 0.275009125832189, "grad_norm": 1.409257876763341, "learning_rate": 8.50686829693761e-07, "loss": 0.1752, "step": 15821 }, { "epoch": 0.2750265083696918, "grad_norm": 4.262447357269181, "learning_rate": 8.506667644655274e-07, "loss": 0.3477, "step": 15822 }, { "epoch": 0.2750438909071946, "grad_norm": 1.0128999854987024, "learning_rate": 8.506466981258412e-07, "loss": 0.3846, "step": 15823 }, { "epoch": 0.27506127344469744, "grad_norm": 1.3492655473999082, "learning_rate": 8.50626630674766e-07, "loss": 0.2345, "step": 15824 }, { "epoch": 0.27507865598220027, "grad_norm": 1.2586841482560989, "learning_rate": 8.506065621123656e-07, "loss": 0.3592, "step": 15825 }, { "epoch": 0.2750960385197031, "grad_norm": 1.8405630536098787, "learning_rate": 8.505864924387034e-07, "loss": 0.3108, "step": 15826 }, { "epoch": 0.27511342105720593, "grad_norm": 1.263462763366692, "learning_rate": 8.50566421653843e-07, "loss": 0.3269, "step": 15827 }, { "epoch": 0.27513080359470876, "grad_norm": 1.6554639207151487, "learning_rate": 8.50546349757848e-07, "loss": 0.2989, "step": 15828 }, { "epoch": 0.2751481861322116, "grad_norm": 1.6662303842587332, "learning_rate": 8.505262767507823e-07, "loss": 0.3362, "step": 15829 }, { "epoch": 0.2751655686697144, "grad_norm": 1.610980315697868, "learning_rate": 8.505062026327093e-07, "loss": 0.4565, "step": 15830 }, { "epoch": 0.27518295120721725, "grad_norm": 2.1898898297465084, "learning_rate": 8.504861274036925e-07, "loss": 0.3697, "step": 15831 }, { "epoch": 0.27520033374472, "grad_norm": 1.151378599701834, "learning_rate": 8.504660510637958e-07, "loss": 0.2843, "step": 15832 }, { "epoch": 0.27521771628222286, "grad_norm": 1.466895796453529, "learning_rate": 8.504459736130826e-07, "loss": 0.2218, "step": 15833 }, { "epoch": 0.2752350988197257, "grad_norm": 2.6584929844971286, "learning_rate": 8.504258950516167e-07, "loss": 0.2357, "step": 15834 }, { "epoch": 0.2752524813572285, "grad_norm": 2.2498108932751197, "learning_rate": 8.504058153794616e-07, "loss": 0.3739, "step": 15835 }, { "epoch": 0.27526986389473135, "grad_norm": 1.2803631280921275, "learning_rate": 8.503857345966812e-07, "loss": 0.2237, "step": 15836 }, { "epoch": 0.2752872464322342, "grad_norm": 1.3296652161385063, "learning_rate": 8.50365652703339e-07, "loss": 0.3051, "step": 15837 }, { "epoch": 0.275304628969737, "grad_norm": 2.0299508792126146, "learning_rate": 8.503455696994983e-07, "loss": 0.3857, "step": 15838 }, { "epoch": 0.27532201150723984, "grad_norm": 2.7776141661021607, "learning_rate": 8.503254855852235e-07, "loss": 0.4395, "step": 15839 }, { "epoch": 0.27533939404474267, "grad_norm": 2.2415455395684454, "learning_rate": 8.503054003605775e-07, "loss": 0.359, "step": 15840 }, { "epoch": 0.2753567765822455, "grad_norm": 2.3063715331591035, "learning_rate": 8.502853140256245e-07, "loss": 0.2641, "step": 15841 }, { "epoch": 0.2753741591197483, "grad_norm": 1.6947545815683527, "learning_rate": 8.502652265804278e-07, "loss": 0.3486, "step": 15842 }, { "epoch": 0.2753915416572511, "grad_norm": 2.768535702532471, "learning_rate": 8.502451380250514e-07, "loss": 0.2003, "step": 15843 }, { "epoch": 0.27540892419475393, "grad_norm": 1.1710281602378885, "learning_rate": 8.502250483595587e-07, "loss": 0.292, "step": 15844 }, { "epoch": 0.27542630673225676, "grad_norm": 1.5780377679094735, "learning_rate": 8.502049575840136e-07, "loss": 0.6613, "step": 15845 }, { "epoch": 0.2754436892697596, "grad_norm": 1.4745387083762291, "learning_rate": 8.501848656984794e-07, "loss": 0.3126, "step": 15846 }, { "epoch": 0.2754610718072624, "grad_norm": 1.8539100284947525, "learning_rate": 8.501647727030202e-07, "loss": 0.2733, "step": 15847 }, { "epoch": 0.27547845434476526, "grad_norm": 2.1139561573153403, "learning_rate": 8.501446785976996e-07, "loss": 0.4236, "step": 15848 }, { "epoch": 0.2754958368822681, "grad_norm": 1.9634707748706064, "learning_rate": 8.501245833825812e-07, "loss": 0.6362, "step": 15849 }, { "epoch": 0.2755132194197709, "grad_norm": 1.2021194256857526, "learning_rate": 8.501044870577286e-07, "loss": 0.566, "step": 15850 }, { "epoch": 0.27553060195727375, "grad_norm": 1.1197252518458987, "learning_rate": 8.500843896232056e-07, "loss": 0.187, "step": 15851 }, { "epoch": 0.2755479844947765, "grad_norm": 2.0618978709325373, "learning_rate": 8.50064291079076e-07, "loss": 0.4047, "step": 15852 }, { "epoch": 0.27556536703227935, "grad_norm": 1.890492299086915, "learning_rate": 8.500441914254033e-07, "loss": 0.3567, "step": 15853 }, { "epoch": 0.2755827495697822, "grad_norm": 2.1428702541159215, "learning_rate": 8.500240906622514e-07, "loss": 0.5048, "step": 15854 }, { "epoch": 0.275600132107285, "grad_norm": 3.873262361206089, "learning_rate": 8.500039887896837e-07, "loss": 0.5561, "step": 15855 }, { "epoch": 0.27561751464478784, "grad_norm": 1.5797309555731252, "learning_rate": 8.499838858077644e-07, "loss": 0.415, "step": 15856 }, { "epoch": 0.2756348971822907, "grad_norm": 1.8122356602983374, "learning_rate": 8.499637817165568e-07, "loss": 0.4038, "step": 15857 }, { "epoch": 0.2756522797197935, "grad_norm": 1.416366417460331, "learning_rate": 8.49943676516125e-07, "loss": 0.4051, "step": 15858 }, { "epoch": 0.27566966225729633, "grad_norm": 2.2264457065787995, "learning_rate": 8.499235702065321e-07, "loss": 0.1928, "step": 15859 }, { "epoch": 0.27568704479479916, "grad_norm": 1.3368457444707813, "learning_rate": 8.499034627878426e-07, "loss": 0.2265, "step": 15860 }, { "epoch": 0.275704427332302, "grad_norm": 1.8124350371868307, "learning_rate": 8.498833542601197e-07, "loss": 0.3551, "step": 15861 }, { "epoch": 0.27572180986980477, "grad_norm": 2.0415679300476275, "learning_rate": 8.498632446234272e-07, "loss": 0.2447, "step": 15862 }, { "epoch": 0.2757391924073076, "grad_norm": 1.5631965803525143, "learning_rate": 8.498431338778291e-07, "loss": 0.4363, "step": 15863 }, { "epoch": 0.27575657494481043, "grad_norm": 1.4060895606556894, "learning_rate": 8.498230220233889e-07, "loss": 0.3826, "step": 15864 }, { "epoch": 0.27577395748231326, "grad_norm": 1.4340440994356811, "learning_rate": 8.498029090601704e-07, "loss": 0.2891, "step": 15865 }, { "epoch": 0.2757913400198161, "grad_norm": 1.5814442813668013, "learning_rate": 8.497827949882373e-07, "loss": 0.2346, "step": 15866 }, { "epoch": 0.2758087225573189, "grad_norm": 1.2937082626079235, "learning_rate": 8.497626798076536e-07, "loss": 0.3861, "step": 15867 }, { "epoch": 0.27582610509482175, "grad_norm": 3.080644200392834, "learning_rate": 8.497425635184828e-07, "loss": 0.6533, "step": 15868 }, { "epoch": 0.2758434876323246, "grad_norm": 2.1740677330486426, "learning_rate": 8.497224461207886e-07, "loss": 0.4437, "step": 15869 }, { "epoch": 0.2758608701698274, "grad_norm": 1.173351707162077, "learning_rate": 8.497023276146351e-07, "loss": 0.2568, "step": 15870 }, { "epoch": 0.27587825270733024, "grad_norm": 1.166253574154572, "learning_rate": 8.496822080000856e-07, "loss": 0.2849, "step": 15871 }, { "epoch": 0.275895635244833, "grad_norm": 1.5811308664921733, "learning_rate": 8.496620872772043e-07, "loss": 0.2773, "step": 15872 }, { "epoch": 0.27591301778233585, "grad_norm": 1.6395922293240681, "learning_rate": 8.496419654460549e-07, "loss": 0.3124, "step": 15873 }, { "epoch": 0.2759304003198387, "grad_norm": 1.392204213480186, "learning_rate": 8.49621842506701e-07, "loss": 0.2026, "step": 15874 }, { "epoch": 0.2759477828573415, "grad_norm": 1.5352728515294385, "learning_rate": 8.496017184592064e-07, "loss": 0.4965, "step": 15875 }, { "epoch": 0.27596516539484434, "grad_norm": 1.921542022609511, "learning_rate": 8.49581593303635e-07, "loss": 0.2036, "step": 15876 }, { "epoch": 0.27598254793234717, "grad_norm": 1.8263047550400022, "learning_rate": 8.495614670400505e-07, "loss": 0.4286, "step": 15877 }, { "epoch": 0.27599993046985, "grad_norm": 2.6215112030416132, "learning_rate": 8.495413396685167e-07, "loss": 0.4715, "step": 15878 }, { "epoch": 0.2760173130073528, "grad_norm": 2.271148083808506, "learning_rate": 8.495212111890975e-07, "loss": 0.2586, "step": 15879 }, { "epoch": 0.27603469554485566, "grad_norm": 1.042624456345236, "learning_rate": 8.495010816018565e-07, "loss": 0.2585, "step": 15880 }, { "epoch": 0.2760520780823585, "grad_norm": 1.4259139349268177, "learning_rate": 8.494809509068576e-07, "loss": 0.284, "step": 15881 }, { "epoch": 0.27606946061986126, "grad_norm": 1.7128628487030602, "learning_rate": 8.494608191041648e-07, "loss": 0.4351, "step": 15882 }, { "epoch": 0.2760868431573641, "grad_norm": 4.540373845491656, "learning_rate": 8.494406861938417e-07, "loss": 0.3467, "step": 15883 }, { "epoch": 0.2761042256948669, "grad_norm": 1.8335439217476963, "learning_rate": 8.494205521759521e-07, "loss": 0.2384, "step": 15884 }, { "epoch": 0.27612160823236975, "grad_norm": 1.4400646213945696, "learning_rate": 8.494004170505598e-07, "loss": 0.4066, "step": 15885 }, { "epoch": 0.2761389907698726, "grad_norm": 1.260669938492114, "learning_rate": 8.493802808177288e-07, "loss": 0.3173, "step": 15886 }, { "epoch": 0.2761563733073754, "grad_norm": 1.685998947491506, "learning_rate": 8.493601434775227e-07, "loss": 0.2931, "step": 15887 }, { "epoch": 0.27617375584487824, "grad_norm": 2.011923720481586, "learning_rate": 8.493400050300054e-07, "loss": 0.2384, "step": 15888 }, { "epoch": 0.2761911383823811, "grad_norm": 1.4740715627579268, "learning_rate": 8.493198654752409e-07, "loss": 0.3699, "step": 15889 }, { "epoch": 0.2762085209198839, "grad_norm": 1.9248488381908282, "learning_rate": 8.492997248132929e-07, "loss": 0.4414, "step": 15890 }, { "epoch": 0.27622590345738673, "grad_norm": 1.3623193422681952, "learning_rate": 8.492795830442251e-07, "loss": 0.2794, "step": 15891 }, { "epoch": 0.2762432859948895, "grad_norm": 2.198236854717295, "learning_rate": 8.492594401681014e-07, "loss": 0.4119, "step": 15892 }, { "epoch": 0.27626066853239234, "grad_norm": 1.2171863098880635, "learning_rate": 8.49239296184986e-07, "loss": 0.3917, "step": 15893 }, { "epoch": 0.27627805106989517, "grad_norm": 2.163081285693062, "learning_rate": 8.492191510949422e-07, "loss": 0.264, "step": 15894 }, { "epoch": 0.276295433607398, "grad_norm": 1.3189199069947166, "learning_rate": 8.491990048980342e-07, "loss": 0.3032, "step": 15895 }, { "epoch": 0.27631281614490083, "grad_norm": 2.5037890639658533, "learning_rate": 8.491788575943258e-07, "loss": 0.5822, "step": 15896 }, { "epoch": 0.27633019868240366, "grad_norm": 1.4065196311994277, "learning_rate": 8.491587091838808e-07, "loss": 0.3036, "step": 15897 }, { "epoch": 0.2763475812199065, "grad_norm": 1.961136985630824, "learning_rate": 8.49138559666763e-07, "loss": 0.452, "step": 15898 }, { "epoch": 0.2763649637574093, "grad_norm": 3.0649757980319094, "learning_rate": 8.491184090430363e-07, "loss": 0.4361, "step": 15899 }, { "epoch": 0.27638234629491215, "grad_norm": 2.2885990530277662, "learning_rate": 8.490982573127648e-07, "loss": 0.2622, "step": 15900 }, { "epoch": 0.276399728832415, "grad_norm": 1.6498977100899, "learning_rate": 8.49078104476012e-07, "loss": 0.2664, "step": 15901 }, { "epoch": 0.27641711136991776, "grad_norm": 1.4892741350781558, "learning_rate": 8.490579505328423e-07, "loss": 0.283, "step": 15902 }, { "epoch": 0.2764344939074206, "grad_norm": 2.356692928236568, "learning_rate": 8.490377954833189e-07, "loss": 0.4249, "step": 15903 }, { "epoch": 0.2764518764449234, "grad_norm": 1.403536873367141, "learning_rate": 8.490176393275061e-07, "loss": 0.274, "step": 15904 }, { "epoch": 0.27646925898242625, "grad_norm": 1.2185661668368817, "learning_rate": 8.489974820654677e-07, "loss": 0.3745, "step": 15905 }, { "epoch": 0.2764866415199291, "grad_norm": 1.0865615651404281, "learning_rate": 8.489773236972677e-07, "loss": 0.2551, "step": 15906 }, { "epoch": 0.2765040240574319, "grad_norm": 4.230685641590928, "learning_rate": 8.489571642229699e-07, "loss": 0.3837, "step": 15907 }, { "epoch": 0.27652140659493474, "grad_norm": 2.027846669913684, "learning_rate": 8.48937003642638e-07, "loss": 0.3247, "step": 15908 }, { "epoch": 0.27653878913243757, "grad_norm": 2.032324677285422, "learning_rate": 8.489168419563362e-07, "loss": 0.2009, "step": 15909 }, { "epoch": 0.2765561716699404, "grad_norm": 1.6498147781706483, "learning_rate": 8.488966791641284e-07, "loss": 0.4168, "step": 15910 }, { "epoch": 0.27657355420744323, "grad_norm": 1.7135716897139048, "learning_rate": 8.488765152660782e-07, "loss": 0.5987, "step": 15911 }, { "epoch": 0.276590936744946, "grad_norm": 2.5504971714272293, "learning_rate": 8.488563502622499e-07, "loss": 0.3494, "step": 15912 }, { "epoch": 0.27660831928244883, "grad_norm": 2.4482855680564937, "learning_rate": 8.488361841527071e-07, "loss": 0.3478, "step": 15913 }, { "epoch": 0.27662570181995166, "grad_norm": 1.3323056823087234, "learning_rate": 8.488160169375139e-07, "loss": 0.2869, "step": 15914 }, { "epoch": 0.2766430843574545, "grad_norm": 1.9687521085691158, "learning_rate": 8.48795848616734e-07, "loss": 0.3692, "step": 15915 }, { "epoch": 0.2766604668949573, "grad_norm": 1.473891213779881, "learning_rate": 8.487756791904316e-07, "loss": 0.392, "step": 15916 }, { "epoch": 0.27667784943246015, "grad_norm": 2.256145217629709, "learning_rate": 8.487555086586706e-07, "loss": 0.3177, "step": 15917 }, { "epoch": 0.276695231969963, "grad_norm": 1.0935686920182814, "learning_rate": 8.487353370215146e-07, "loss": 0.3786, "step": 15918 }, { "epoch": 0.2767126145074658, "grad_norm": 1.6575964979610152, "learning_rate": 8.48715164279028e-07, "loss": 0.2695, "step": 15919 }, { "epoch": 0.27672999704496865, "grad_norm": 1.5097655419828737, "learning_rate": 8.486949904312745e-07, "loss": 0.3142, "step": 15920 }, { "epoch": 0.2767473795824715, "grad_norm": 2.2455301712855884, "learning_rate": 8.486748154783181e-07, "loss": 0.5863, "step": 15921 }, { "epoch": 0.27676476211997425, "grad_norm": 1.0719187798505627, "learning_rate": 8.486546394202225e-07, "loss": 0.1984, "step": 15922 }, { "epoch": 0.2767821446574771, "grad_norm": 1.4173566311760493, "learning_rate": 8.486344622570519e-07, "loss": 0.3039, "step": 15923 }, { "epoch": 0.2767995271949799, "grad_norm": 2.6643265388810735, "learning_rate": 8.486142839888704e-07, "loss": 0.4928, "step": 15924 }, { "epoch": 0.27681690973248274, "grad_norm": 1.6533703388120997, "learning_rate": 8.485941046157415e-07, "loss": 0.3275, "step": 15925 }, { "epoch": 0.27683429226998557, "grad_norm": 2.5338632016983347, "learning_rate": 8.485739241377296e-07, "loss": 0.3248, "step": 15926 }, { "epoch": 0.2768516748074884, "grad_norm": 1.64503512854413, "learning_rate": 8.485537425548985e-07, "loss": 0.2904, "step": 15927 }, { "epoch": 0.27686905734499123, "grad_norm": 1.2793785422149306, "learning_rate": 8.485335598673121e-07, "loss": 0.2334, "step": 15928 }, { "epoch": 0.27688643988249406, "grad_norm": 2.447452095369215, "learning_rate": 8.485133760750342e-07, "loss": 0.3188, "step": 15929 }, { "epoch": 0.2769038224199969, "grad_norm": 1.5564301395137645, "learning_rate": 8.484931911781293e-07, "loss": 0.2876, "step": 15930 }, { "epoch": 0.2769212049574997, "grad_norm": 1.294651821478854, "learning_rate": 8.48473005176661e-07, "loss": 0.2953, "step": 15931 }, { "epoch": 0.2769385874950025, "grad_norm": 2.6886299883902773, "learning_rate": 8.484528180706932e-07, "loss": 0.5351, "step": 15932 }, { "epoch": 0.2769559700325053, "grad_norm": 1.6017349321737642, "learning_rate": 8.484326298602902e-07, "loss": 0.3346, "step": 15933 }, { "epoch": 0.27697335257000816, "grad_norm": 2.8922109522432646, "learning_rate": 8.484124405455157e-07, "loss": 0.7244, "step": 15934 }, { "epoch": 0.276990735107511, "grad_norm": 1.2975713585726165, "learning_rate": 8.483922501264339e-07, "loss": 0.2064, "step": 15935 }, { "epoch": 0.2770081176450138, "grad_norm": 2.411072928936364, "learning_rate": 8.483720586031087e-07, "loss": 0.34, "step": 15936 }, { "epoch": 0.27702550018251665, "grad_norm": 1.5076146910406123, "learning_rate": 8.483518659756042e-07, "loss": 0.4242, "step": 15937 }, { "epoch": 0.2770428827200195, "grad_norm": 2.1947379389425707, "learning_rate": 8.48331672243984e-07, "loss": 0.2654, "step": 15938 }, { "epoch": 0.2770602652575223, "grad_norm": 2.2245999945243735, "learning_rate": 8.483114774083127e-07, "loss": 0.3137, "step": 15939 }, { "epoch": 0.27707764779502514, "grad_norm": 2.572316866089713, "learning_rate": 8.482912814686538e-07, "loss": 0.427, "step": 15940 }, { "epoch": 0.27709503033252797, "grad_norm": 1.690372921167953, "learning_rate": 8.482710844250718e-07, "loss": 0.4048, "step": 15941 }, { "epoch": 0.27711241287003074, "grad_norm": 7.091995935322032, "learning_rate": 8.482508862776301e-07, "loss": 0.3709, "step": 15942 }, { "epoch": 0.2771297954075336, "grad_norm": 2.343275466446323, "learning_rate": 8.482306870263934e-07, "loss": 0.2493, "step": 15943 }, { "epoch": 0.2771471779450364, "grad_norm": 1.0959459216335194, "learning_rate": 8.482104866714251e-07, "loss": 0.3499, "step": 15944 }, { "epoch": 0.27716456048253924, "grad_norm": 1.2003489166909913, "learning_rate": 8.481902852127897e-07, "loss": 0.2744, "step": 15945 }, { "epoch": 0.27718194302004207, "grad_norm": 1.7442874709861, "learning_rate": 8.48170082650551e-07, "loss": 0.3901, "step": 15946 }, { "epoch": 0.2771993255575449, "grad_norm": 1.844169272850491, "learning_rate": 8.48149878984773e-07, "loss": 0.4106, "step": 15947 }, { "epoch": 0.2772167080950477, "grad_norm": 1.1996262858654962, "learning_rate": 8.4812967421552e-07, "loss": 0.3375, "step": 15948 }, { "epoch": 0.27723409063255056, "grad_norm": 1.2499883247850478, "learning_rate": 8.481094683428557e-07, "loss": 0.3243, "step": 15949 }, { "epoch": 0.2772514731700534, "grad_norm": 1.3298599446812984, "learning_rate": 8.480892613668442e-07, "loss": 0.3378, "step": 15950 }, { "epoch": 0.2772688557075562, "grad_norm": 3.091477669334736, "learning_rate": 8.480690532875498e-07, "loss": 0.3868, "step": 15951 }, { "epoch": 0.277286238245059, "grad_norm": 2.1493432596654256, "learning_rate": 8.480488441050364e-07, "loss": 0.2954, "step": 15952 }, { "epoch": 0.2773036207825618, "grad_norm": 1.658213840836232, "learning_rate": 8.48028633819368e-07, "loss": 0.1998, "step": 15953 }, { "epoch": 0.27732100332006465, "grad_norm": 1.495682960108483, "learning_rate": 8.480084224306087e-07, "loss": 0.3933, "step": 15954 }, { "epoch": 0.2773383858575675, "grad_norm": 1.9976257335826706, "learning_rate": 8.479882099388225e-07, "loss": 0.3124, "step": 15955 }, { "epoch": 0.2773557683950703, "grad_norm": 1.783698374346343, "learning_rate": 8.479679963440736e-07, "loss": 0.3357, "step": 15956 }, { "epoch": 0.27737315093257314, "grad_norm": 1.7071727288336425, "learning_rate": 8.47947781646426e-07, "loss": 0.2362, "step": 15957 }, { "epoch": 0.277390533470076, "grad_norm": 1.259675988437506, "learning_rate": 8.479275658459437e-07, "loss": 0.3168, "step": 15958 }, { "epoch": 0.2774079160075788, "grad_norm": 1.246968325063539, "learning_rate": 8.479073489426909e-07, "loss": 0.5543, "step": 15959 }, { "epoch": 0.27742529854508163, "grad_norm": 1.6174687796962581, "learning_rate": 8.478871309367317e-07, "loss": 0.3559, "step": 15960 }, { "epoch": 0.2774426810825844, "grad_norm": 1.6843518175648091, "learning_rate": 8.478669118281298e-07, "loss": 0.2647, "step": 15961 }, { "epoch": 0.27746006362008724, "grad_norm": 1.4990535716366653, "learning_rate": 8.4784669161695e-07, "loss": 0.2132, "step": 15962 }, { "epoch": 0.27747744615759007, "grad_norm": 1.5629118089991376, "learning_rate": 8.478264703032557e-07, "loss": 0.3511, "step": 15963 }, { "epoch": 0.2774948286950929, "grad_norm": 1.4289936083289205, "learning_rate": 8.478062478871113e-07, "loss": 0.3672, "step": 15964 }, { "epoch": 0.27751221123259573, "grad_norm": 1.7481606748279452, "learning_rate": 8.477860243685811e-07, "loss": 0.3769, "step": 15965 }, { "epoch": 0.27752959377009856, "grad_norm": 1.0406788448615254, "learning_rate": 8.477657997477286e-07, "loss": 0.2988, "step": 15966 }, { "epoch": 0.2775469763076014, "grad_norm": 2.0942457958992424, "learning_rate": 8.477455740246185e-07, "loss": 0.5076, "step": 15967 }, { "epoch": 0.2775643588451042, "grad_norm": 2.2810539940596657, "learning_rate": 8.477253471993146e-07, "loss": 0.3843, "step": 15968 }, { "epoch": 0.27758174138260705, "grad_norm": 1.4535670132456253, "learning_rate": 8.47705119271881e-07, "loss": 0.4791, "step": 15969 }, { "epoch": 0.2775991239201099, "grad_norm": 2.336112109376731, "learning_rate": 8.47684890242382e-07, "loss": 0.375, "step": 15970 }, { "epoch": 0.27761650645761266, "grad_norm": 1.643583588683438, "learning_rate": 8.476646601108816e-07, "loss": 0.3021, "step": 15971 }, { "epoch": 0.2776338889951155, "grad_norm": 1.3896838223529024, "learning_rate": 8.476444288774438e-07, "loss": 0.359, "step": 15972 }, { "epoch": 0.2776512715326183, "grad_norm": 1.3617065789181977, "learning_rate": 8.47624196542133e-07, "loss": 0.3914, "step": 15973 }, { "epoch": 0.27766865407012115, "grad_norm": 2.5100952120725624, "learning_rate": 8.476039631050132e-07, "loss": 0.8235, "step": 15974 }, { "epoch": 0.277686036607624, "grad_norm": 1.1591075066142207, "learning_rate": 8.475837285661485e-07, "loss": 0.2476, "step": 15975 }, { "epoch": 0.2777034191451268, "grad_norm": 1.6635076429448101, "learning_rate": 8.47563492925603e-07, "loss": 0.3751, "step": 15976 }, { "epoch": 0.27772080168262964, "grad_norm": 1.7802600543653306, "learning_rate": 8.475432561834408e-07, "loss": 0.2388, "step": 15977 }, { "epoch": 0.27773818422013247, "grad_norm": 1.4529441759941282, "learning_rate": 8.475230183397264e-07, "loss": 0.3901, "step": 15978 }, { "epoch": 0.2777555667576353, "grad_norm": 1.1589012978492463, "learning_rate": 8.475027793945234e-07, "loss": 0.3539, "step": 15979 }, { "epoch": 0.2777729492951381, "grad_norm": 2.1463568269555133, "learning_rate": 8.474825393478964e-07, "loss": 0.3605, "step": 15980 }, { "epoch": 0.2777903318326409, "grad_norm": 1.607877950804641, "learning_rate": 8.474622981999094e-07, "loss": 0.365, "step": 15981 }, { "epoch": 0.27780771437014373, "grad_norm": 1.8490827778799002, "learning_rate": 8.474420559506263e-07, "loss": 0.4121, "step": 15982 }, { "epoch": 0.27782509690764656, "grad_norm": 1.8407572973505402, "learning_rate": 8.474218126001116e-07, "loss": 0.4407, "step": 15983 }, { "epoch": 0.2778424794451494, "grad_norm": 2.1048366317930753, "learning_rate": 8.474015681484293e-07, "loss": 0.4404, "step": 15984 }, { "epoch": 0.2778598619826522, "grad_norm": 1.2294575507685241, "learning_rate": 8.473813225956437e-07, "loss": 0.2722, "step": 15985 }, { "epoch": 0.27787724452015505, "grad_norm": 3.4210294419974594, "learning_rate": 8.473610759418189e-07, "loss": 0.3302, "step": 15986 }, { "epoch": 0.2778946270576579, "grad_norm": 2.5857384587127377, "learning_rate": 8.47340828187019e-07, "loss": 0.2986, "step": 15987 }, { "epoch": 0.2779120095951607, "grad_norm": 1.7257026103819442, "learning_rate": 8.473205793313081e-07, "loss": 0.3229, "step": 15988 }, { "epoch": 0.27792939213266354, "grad_norm": 1.3122396568214805, "learning_rate": 8.473003293747506e-07, "loss": 0.2615, "step": 15989 }, { "epoch": 0.2779467746701664, "grad_norm": 1.6413200420872058, "learning_rate": 8.472800783174107e-07, "loss": 0.407, "step": 15990 }, { "epoch": 0.27796415720766915, "grad_norm": 1.6663013046380528, "learning_rate": 8.472598261593524e-07, "loss": 0.2896, "step": 15991 }, { "epoch": 0.277981539745172, "grad_norm": 2.47138626160169, "learning_rate": 8.472395729006399e-07, "loss": 0.2411, "step": 15992 }, { "epoch": 0.2779989222826748, "grad_norm": 2.5623423121323077, "learning_rate": 8.472193185413377e-07, "loss": 0.4896, "step": 15993 }, { "epoch": 0.27801630482017764, "grad_norm": 2.0543032614661363, "learning_rate": 8.471990630815096e-07, "loss": 0.4146, "step": 15994 }, { "epoch": 0.27803368735768047, "grad_norm": 1.961908446615429, "learning_rate": 8.471788065212197e-07, "loss": 0.3413, "step": 15995 }, { "epoch": 0.2780510698951833, "grad_norm": 1.84865123696473, "learning_rate": 8.471585488605328e-07, "loss": 0.2602, "step": 15996 }, { "epoch": 0.27806845243268613, "grad_norm": 2.0023093654403037, "learning_rate": 8.471382900995128e-07, "loss": 0.2496, "step": 15997 }, { "epoch": 0.27808583497018896, "grad_norm": 1.3840749901313145, "learning_rate": 8.471180302382237e-07, "loss": 0.3428, "step": 15998 }, { "epoch": 0.2781032175076918, "grad_norm": 1.329547614111098, "learning_rate": 8.470977692767301e-07, "loss": 0.1936, "step": 15999 }, { "epoch": 0.2781206000451946, "grad_norm": 2.4372540344718674, "learning_rate": 8.470775072150957e-07, "loss": 0.476, "step": 16000 }, { "epoch": 0.2781379825826974, "grad_norm": 1.3391199721164482, "learning_rate": 8.470572440533853e-07, "loss": 0.37, "step": 16001 }, { "epoch": 0.2781553651202002, "grad_norm": 1.8586770533249442, "learning_rate": 8.470369797916628e-07, "loss": 0.3361, "step": 16002 }, { "epoch": 0.27817274765770306, "grad_norm": 1.389830012389587, "learning_rate": 8.470167144299924e-07, "loss": 0.3143, "step": 16003 }, { "epoch": 0.2781901301952059, "grad_norm": 1.7600651802134444, "learning_rate": 8.469964479684386e-07, "loss": 0.4032, "step": 16004 }, { "epoch": 0.2782075127327087, "grad_norm": 7.367003945559877, "learning_rate": 8.469761804070652e-07, "loss": 0.2857, "step": 16005 }, { "epoch": 0.27822489527021155, "grad_norm": 1.7386706633646114, "learning_rate": 8.469559117459371e-07, "loss": 0.3509, "step": 16006 }, { "epoch": 0.2782422778077144, "grad_norm": 1.7610267956562218, "learning_rate": 8.469356419851176e-07, "loss": 0.2267, "step": 16007 }, { "epoch": 0.2782596603452172, "grad_norm": 2.6047798617029843, "learning_rate": 8.46915371124672e-07, "loss": 0.4553, "step": 16008 }, { "epoch": 0.27827704288272004, "grad_norm": 1.6029414442828565, "learning_rate": 8.468950991646637e-07, "loss": 0.297, "step": 16009 }, { "epoch": 0.27829442542022287, "grad_norm": 9.574982836242134, "learning_rate": 8.468748261051575e-07, "loss": 0.4, "step": 16010 }, { "epoch": 0.27831180795772564, "grad_norm": 1.88507210245867, "learning_rate": 8.468545519462172e-07, "loss": 0.4029, "step": 16011 }, { "epoch": 0.2783291904952285, "grad_norm": 1.4580721297098436, "learning_rate": 8.468342766879076e-07, "loss": 0.2671, "step": 16012 }, { "epoch": 0.2783465730327313, "grad_norm": 2.2023880393285156, "learning_rate": 8.468140003302924e-07, "loss": 0.2577, "step": 16013 }, { "epoch": 0.27836395557023413, "grad_norm": 2.5850567920059024, "learning_rate": 8.467937228734362e-07, "loss": 0.7557, "step": 16014 }, { "epoch": 0.27838133810773696, "grad_norm": 2.1016175759058067, "learning_rate": 8.467734443174033e-07, "loss": 0.3897, "step": 16015 }, { "epoch": 0.2783987206452398, "grad_norm": 1.5307754865563286, "learning_rate": 8.467531646622578e-07, "loss": 0.3002, "step": 16016 }, { "epoch": 0.2784161031827426, "grad_norm": 0.9361672764474983, "learning_rate": 8.46732883908064e-07, "loss": 0.3278, "step": 16017 }, { "epoch": 0.27843348572024545, "grad_norm": 2.8595647365198893, "learning_rate": 8.467126020548863e-07, "loss": 0.488, "step": 16018 }, { "epoch": 0.2784508682577483, "grad_norm": 1.5456516135916658, "learning_rate": 8.46692319102789e-07, "loss": 0.257, "step": 16019 }, { "epoch": 0.2784682507952511, "grad_norm": 1.4099082359670068, "learning_rate": 8.466720350518362e-07, "loss": 0.3454, "step": 16020 }, { "epoch": 0.2784856333327539, "grad_norm": 2.486904165566586, "learning_rate": 8.466517499020922e-07, "loss": 0.4995, "step": 16021 }, { "epoch": 0.2785030158702567, "grad_norm": 1.9191656161040815, "learning_rate": 8.466314636536216e-07, "loss": 0.3938, "step": 16022 }, { "epoch": 0.27852039840775955, "grad_norm": 1.6148510536219356, "learning_rate": 8.466111763064885e-07, "loss": 0.3641, "step": 16023 }, { "epoch": 0.2785377809452624, "grad_norm": 2.3846429399939812, "learning_rate": 8.46590887860757e-07, "loss": 0.3293, "step": 16024 }, { "epoch": 0.2785551634827652, "grad_norm": 1.4105163364389737, "learning_rate": 8.465705983164917e-07, "loss": 0.226, "step": 16025 }, { "epoch": 0.27857254602026804, "grad_norm": 2.030645007020062, "learning_rate": 8.465503076737568e-07, "loss": 0.3355, "step": 16026 }, { "epoch": 0.27858992855777087, "grad_norm": 2.4855216994733462, "learning_rate": 8.465300159326165e-07, "loss": 0.3234, "step": 16027 }, { "epoch": 0.2786073110952737, "grad_norm": 1.5234171959774647, "learning_rate": 8.465097230931355e-07, "loss": 0.3534, "step": 16028 }, { "epoch": 0.27862469363277653, "grad_norm": 2.273464299598014, "learning_rate": 8.464894291553777e-07, "loss": 0.5514, "step": 16029 }, { "epoch": 0.27864207617027936, "grad_norm": 2.946972507146544, "learning_rate": 8.464691341194074e-07, "loss": 0.3349, "step": 16030 }, { "epoch": 0.27865945870778214, "grad_norm": 1.8468589786425973, "learning_rate": 8.464488379852895e-07, "loss": 0.2074, "step": 16031 }, { "epoch": 0.27867684124528497, "grad_norm": 1.9894483099326075, "learning_rate": 8.464285407530876e-07, "loss": 0.4762, "step": 16032 }, { "epoch": 0.2786942237827878, "grad_norm": 2.362933631811185, "learning_rate": 8.464082424228665e-07, "loss": 0.2201, "step": 16033 }, { "epoch": 0.27871160632029063, "grad_norm": 2.0537190153558234, "learning_rate": 8.463879429946903e-07, "loss": 0.3945, "step": 16034 }, { "epoch": 0.27872898885779346, "grad_norm": 1.5577827468780308, "learning_rate": 8.463676424686235e-07, "loss": 0.2881, "step": 16035 }, { "epoch": 0.2787463713952963, "grad_norm": 2.131190499376192, "learning_rate": 8.463473408447303e-07, "loss": 0.4637, "step": 16036 }, { "epoch": 0.2787637539327991, "grad_norm": 1.2680305974491162, "learning_rate": 8.463270381230752e-07, "loss": 0.3244, "step": 16037 }, { "epoch": 0.27878113647030195, "grad_norm": 2.246177456074036, "learning_rate": 8.463067343037225e-07, "loss": 0.3642, "step": 16038 }, { "epoch": 0.2787985190078048, "grad_norm": 1.5423283143581532, "learning_rate": 8.462864293867366e-07, "loss": 0.244, "step": 16039 }, { "epoch": 0.2788159015453076, "grad_norm": 1.5079858390588001, "learning_rate": 8.462661233721817e-07, "loss": 0.6304, "step": 16040 }, { "epoch": 0.2788332840828104, "grad_norm": 2.6511160393200113, "learning_rate": 8.462458162601222e-07, "loss": 0.382, "step": 16041 }, { "epoch": 0.2788506666203132, "grad_norm": 2.991469909461736, "learning_rate": 8.462255080506226e-07, "loss": 0.6394, "step": 16042 }, { "epoch": 0.27886804915781604, "grad_norm": 2.1400276693466305, "learning_rate": 8.462051987437472e-07, "loss": 0.5076, "step": 16043 }, { "epoch": 0.2788854316953189, "grad_norm": 1.643096677607953, "learning_rate": 8.461848883395603e-07, "loss": 0.3912, "step": 16044 }, { "epoch": 0.2789028142328217, "grad_norm": 2.039764552462438, "learning_rate": 8.461645768381264e-07, "loss": 0.3996, "step": 16045 }, { "epoch": 0.27892019677032454, "grad_norm": 3.541227929597624, "learning_rate": 8.461442642395096e-07, "loss": 0.6951, "step": 16046 }, { "epoch": 0.27893757930782737, "grad_norm": 1.2983212590740179, "learning_rate": 8.461239505437747e-07, "loss": 0.2916, "step": 16047 }, { "epoch": 0.2789549618453302, "grad_norm": 1.6141779963649248, "learning_rate": 8.461036357509859e-07, "loss": 0.3026, "step": 16048 }, { "epoch": 0.278972344382833, "grad_norm": 1.599087698999046, "learning_rate": 8.460833198612074e-07, "loss": 0.517, "step": 16049 }, { "epoch": 0.27898972692033586, "grad_norm": 2.0139484995212005, "learning_rate": 8.460630028745038e-07, "loss": 0.3299, "step": 16050 }, { "epoch": 0.27900710945783863, "grad_norm": 1.8146686801874972, "learning_rate": 8.460426847909394e-07, "loss": 0.3213, "step": 16051 }, { "epoch": 0.27902449199534146, "grad_norm": 2.2239986868298796, "learning_rate": 8.460223656105787e-07, "loss": 0.1794, "step": 16052 }, { "epoch": 0.2790418745328443, "grad_norm": 1.9246636079558392, "learning_rate": 8.460020453334862e-07, "loss": 0.4864, "step": 16053 }, { "epoch": 0.2790592570703471, "grad_norm": 1.4337341553018208, "learning_rate": 8.45981723959726e-07, "loss": 0.5087, "step": 16054 }, { "epoch": 0.27907663960784995, "grad_norm": 3.5398388877974876, "learning_rate": 8.459614014893627e-07, "loss": 0.3786, "step": 16055 }, { "epoch": 0.2790940221453528, "grad_norm": 2.1124162995585807, "learning_rate": 8.459410779224607e-07, "loss": 0.5116, "step": 16056 }, { "epoch": 0.2791114046828556, "grad_norm": 2.1436031127442265, "learning_rate": 8.459207532590843e-07, "loss": 0.4153, "step": 16057 }, { "epoch": 0.27912878722035844, "grad_norm": 1.1875530623545862, "learning_rate": 8.459004274992982e-07, "loss": 0.2234, "step": 16058 }, { "epoch": 0.2791461697578613, "grad_norm": 2.2901125847046684, "learning_rate": 8.458801006431664e-07, "loss": 0.3221, "step": 16059 }, { "epoch": 0.2791635522953641, "grad_norm": 1.5736795075312977, "learning_rate": 8.458597726907537e-07, "loss": 0.3803, "step": 16060 }, { "epoch": 0.2791809348328669, "grad_norm": 1.0947390957217724, "learning_rate": 8.458394436421243e-07, "loss": 0.2063, "step": 16061 }, { "epoch": 0.2791983173703697, "grad_norm": 1.3551163550567604, "learning_rate": 8.458191134973429e-07, "loss": 0.3644, "step": 16062 }, { "epoch": 0.27921569990787254, "grad_norm": 1.875357700202727, "learning_rate": 8.457987822564736e-07, "loss": 0.312, "step": 16063 }, { "epoch": 0.27923308244537537, "grad_norm": 1.6559611365712184, "learning_rate": 8.457784499195811e-07, "loss": 0.3175, "step": 16064 }, { "epoch": 0.2792504649828782, "grad_norm": 1.8402139860866356, "learning_rate": 8.457581164867298e-07, "loss": 0.2821, "step": 16065 }, { "epoch": 0.27926784752038103, "grad_norm": 2.066300838865623, "learning_rate": 8.45737781957984e-07, "loss": 0.3943, "step": 16066 }, { "epoch": 0.27928523005788386, "grad_norm": 1.2838492793871297, "learning_rate": 8.457174463334082e-07, "loss": 0.4165, "step": 16067 }, { "epoch": 0.2793026125953867, "grad_norm": 1.2808110238121406, "learning_rate": 8.456971096130671e-07, "loss": 0.2254, "step": 16068 }, { "epoch": 0.2793199951328895, "grad_norm": 1.786223967927548, "learning_rate": 8.456767717970249e-07, "loss": 0.3762, "step": 16069 }, { "epoch": 0.27933737767039235, "grad_norm": 2.2902065441534942, "learning_rate": 8.456564328853461e-07, "loss": 0.376, "step": 16070 }, { "epoch": 0.2793547602078951, "grad_norm": 1.64332253308194, "learning_rate": 8.45636092878095e-07, "loss": 0.3642, "step": 16071 }, { "epoch": 0.27937214274539796, "grad_norm": 2.1007949956435956, "learning_rate": 8.456157517753367e-07, "loss": 0.7273, "step": 16072 }, { "epoch": 0.2793895252829008, "grad_norm": 2.183981913616407, "learning_rate": 8.455954095771348e-07, "loss": 0.2499, "step": 16073 }, { "epoch": 0.2794069078204036, "grad_norm": 1.1455161355536256, "learning_rate": 8.455750662835543e-07, "loss": 0.348, "step": 16074 }, { "epoch": 0.27942429035790645, "grad_norm": 1.8896835273848687, "learning_rate": 8.455547218946597e-07, "loss": 0.3691, "step": 16075 }, { "epoch": 0.2794416728954093, "grad_norm": 2.021851123158364, "learning_rate": 8.455343764105153e-07, "loss": 0.4407, "step": 16076 }, { "epoch": 0.2794590554329121, "grad_norm": 1.9665385938442483, "learning_rate": 8.455140298311856e-07, "loss": 0.5284, "step": 16077 }, { "epoch": 0.27947643797041494, "grad_norm": 1.9499533086552647, "learning_rate": 8.454936821567352e-07, "loss": 0.4305, "step": 16078 }, { "epoch": 0.27949382050791777, "grad_norm": 6.515153909016638, "learning_rate": 8.454733333872286e-07, "loss": 0.7155, "step": 16079 }, { "epoch": 0.2795112030454206, "grad_norm": 1.9409493383609178, "learning_rate": 8.454529835227302e-07, "loss": 0.5119, "step": 16080 }, { "epoch": 0.2795285855829234, "grad_norm": 1.94258547934157, "learning_rate": 8.454326325633044e-07, "loss": 0.5264, "step": 16081 }, { "epoch": 0.2795459681204262, "grad_norm": 1.8961999993307268, "learning_rate": 8.45412280509016e-07, "loss": 0.2212, "step": 16082 }, { "epoch": 0.27956335065792903, "grad_norm": 2.3172878980319327, "learning_rate": 8.453919273599292e-07, "loss": 0.5587, "step": 16083 }, { "epoch": 0.27958073319543186, "grad_norm": 1.2218991106202124, "learning_rate": 8.453715731161087e-07, "loss": 0.3337, "step": 16084 }, { "epoch": 0.2795981157329347, "grad_norm": 1.747031708413198, "learning_rate": 8.45351217777619e-07, "loss": 0.4123, "step": 16085 }, { "epoch": 0.2796154982704375, "grad_norm": 1.0925883956416838, "learning_rate": 8.453308613445247e-07, "loss": 0.3836, "step": 16086 }, { "epoch": 0.27963288080794035, "grad_norm": 1.451415323910419, "learning_rate": 8.4531050381689e-07, "loss": 0.4788, "step": 16087 }, { "epoch": 0.2796502633454432, "grad_norm": 1.477068342475746, "learning_rate": 8.452901451947797e-07, "loss": 0.4161, "step": 16088 }, { "epoch": 0.279667645882946, "grad_norm": 1.27792904083744, "learning_rate": 8.452697854782583e-07, "loss": 0.446, "step": 16089 }, { "epoch": 0.27968502842044884, "grad_norm": 1.8658194547956966, "learning_rate": 8.452494246673902e-07, "loss": 0.2591, "step": 16090 }, { "epoch": 0.2797024109579516, "grad_norm": 1.1446309258262997, "learning_rate": 8.4522906276224e-07, "loss": 0.3945, "step": 16091 }, { "epoch": 0.27971979349545445, "grad_norm": 1.6141175494126818, "learning_rate": 8.452086997628723e-07, "loss": 0.5145, "step": 16092 }, { "epoch": 0.2797371760329573, "grad_norm": 1.617160902013608, "learning_rate": 8.451883356693517e-07, "loss": 0.2118, "step": 16093 }, { "epoch": 0.2797545585704601, "grad_norm": 1.8831065623880408, "learning_rate": 8.451679704817425e-07, "loss": 0.3201, "step": 16094 }, { "epoch": 0.27977194110796294, "grad_norm": 1.604228563942062, "learning_rate": 8.451476042001094e-07, "loss": 0.3851, "step": 16095 }, { "epoch": 0.27978932364546577, "grad_norm": 4.82118833217841, "learning_rate": 8.45127236824517e-07, "loss": 0.3732, "step": 16096 }, { "epoch": 0.2798067061829686, "grad_norm": 1.893578424658333, "learning_rate": 8.451068683550297e-07, "loss": 0.3962, "step": 16097 }, { "epoch": 0.27982408872047143, "grad_norm": 1.189097772483523, "learning_rate": 8.450864987917124e-07, "loss": 0.3027, "step": 16098 }, { "epoch": 0.27984147125797426, "grad_norm": 1.6469636213938115, "learning_rate": 8.450661281346292e-07, "loss": 0.3306, "step": 16099 }, { "epoch": 0.27985885379547704, "grad_norm": 1.1940610118015593, "learning_rate": 8.450457563838449e-07, "loss": 0.4583, "step": 16100 }, { "epoch": 0.27987623633297987, "grad_norm": 1.6558591300628562, "learning_rate": 8.450253835394241e-07, "loss": 0.2741, "step": 16101 }, { "epoch": 0.2798936188704827, "grad_norm": 1.2677188010180147, "learning_rate": 8.450050096014313e-07, "loss": 0.2689, "step": 16102 }, { "epoch": 0.2799110014079855, "grad_norm": 1.4382805475120253, "learning_rate": 8.449846345699311e-07, "loss": 0.5501, "step": 16103 }, { "epoch": 0.27992838394548836, "grad_norm": 1.75690065802799, "learning_rate": 8.449642584449881e-07, "loss": 0.4077, "step": 16104 }, { "epoch": 0.2799457664829912, "grad_norm": 2.1594145297053653, "learning_rate": 8.44943881226667e-07, "loss": 0.3114, "step": 16105 }, { "epoch": 0.279963149020494, "grad_norm": 1.5861877567275096, "learning_rate": 8.449235029150319e-07, "loss": 0.4354, "step": 16106 }, { "epoch": 0.27998053155799685, "grad_norm": 3.7510094789316404, "learning_rate": 8.44903123510148e-07, "loss": 0.6661, "step": 16107 }, { "epoch": 0.2799979140954997, "grad_norm": 1.8593156710604233, "learning_rate": 8.448827430120797e-07, "loss": 0.33, "step": 16108 }, { "epoch": 0.2800152966330025, "grad_norm": 2.2204440871107773, "learning_rate": 8.448623614208914e-07, "loss": 0.2886, "step": 16109 }, { "epoch": 0.2800326791705053, "grad_norm": 2.4618279258800073, "learning_rate": 8.448419787366479e-07, "loss": 0.4442, "step": 16110 }, { "epoch": 0.2800500617080081, "grad_norm": 1.9380768456253976, "learning_rate": 8.448215949594135e-07, "loss": 0.3196, "step": 16111 }, { "epoch": 0.28006744424551094, "grad_norm": 1.8087441948259149, "learning_rate": 8.448012100892533e-07, "loss": 0.4078, "step": 16112 }, { "epoch": 0.2800848267830138, "grad_norm": 1.710182657208666, "learning_rate": 8.447808241262316e-07, "loss": 0.3387, "step": 16113 }, { "epoch": 0.2801022093205166, "grad_norm": 2.558658733505037, "learning_rate": 8.44760437070413e-07, "loss": 0.3515, "step": 16114 }, { "epoch": 0.28011959185801943, "grad_norm": 1.271466655892951, "learning_rate": 8.447400489218623e-07, "loss": 0.1948, "step": 16115 }, { "epoch": 0.28013697439552226, "grad_norm": 4.910099576008505, "learning_rate": 8.447196596806438e-07, "loss": 0.4461, "step": 16116 }, { "epoch": 0.2801543569330251, "grad_norm": 2.103757198780546, "learning_rate": 8.446992693468225e-07, "loss": 0.2807, "step": 16117 }, { "epoch": 0.2801717394705279, "grad_norm": 2.015998323799061, "learning_rate": 8.446788779204628e-07, "loss": 0.3351, "step": 16118 }, { "epoch": 0.28018912200803076, "grad_norm": 1.9688099179842757, "learning_rate": 8.446584854016295e-07, "loss": 0.4255, "step": 16119 }, { "epoch": 0.28020650454553353, "grad_norm": 2.709549539662871, "learning_rate": 8.44638091790387e-07, "loss": 0.7, "step": 16120 }, { "epoch": 0.28022388708303636, "grad_norm": 2.085293512702407, "learning_rate": 8.446176970868002e-07, "loss": 0.279, "step": 16121 }, { "epoch": 0.2802412696205392, "grad_norm": 1.8418138178635748, "learning_rate": 8.445973012909335e-07, "loss": 0.2687, "step": 16122 }, { "epoch": 0.280258652158042, "grad_norm": 1.9940943292808369, "learning_rate": 8.445769044028516e-07, "loss": 0.2614, "step": 16123 }, { "epoch": 0.28027603469554485, "grad_norm": 1.6727823348287407, "learning_rate": 8.445565064226193e-07, "loss": 0.3491, "step": 16124 }, { "epoch": 0.2802934172330477, "grad_norm": 1.5067755476769809, "learning_rate": 8.44536107350301e-07, "loss": 0.1988, "step": 16125 }, { "epoch": 0.2803107997705505, "grad_norm": 1.7925490946768117, "learning_rate": 8.445157071859617e-07, "loss": 0.3351, "step": 16126 }, { "epoch": 0.28032818230805334, "grad_norm": 1.9779363140924897, "learning_rate": 8.444953059296658e-07, "loss": 0.2381, "step": 16127 }, { "epoch": 0.28034556484555617, "grad_norm": 1.4815754907351713, "learning_rate": 8.44474903581478e-07, "loss": 0.5232, "step": 16128 }, { "epoch": 0.280362947383059, "grad_norm": 1.2993803217465523, "learning_rate": 8.44454500141463e-07, "loss": 0.5191, "step": 16129 }, { "epoch": 0.2803803299205618, "grad_norm": 1.9161746332672838, "learning_rate": 8.444340956096853e-07, "loss": 0.3578, "step": 16130 }, { "epoch": 0.2803977124580646, "grad_norm": 1.9577317732511745, "learning_rate": 8.4441368998621e-07, "loss": 0.3236, "step": 16131 }, { "epoch": 0.28041509499556744, "grad_norm": 2.081246784784633, "learning_rate": 8.443932832711015e-07, "loss": 0.3905, "step": 16132 }, { "epoch": 0.28043247753307027, "grad_norm": 2.5254990839568223, "learning_rate": 8.443728754644243e-07, "loss": 0.378, "step": 16133 }, { "epoch": 0.2804498600705731, "grad_norm": 2.527060582518978, "learning_rate": 8.443524665662434e-07, "loss": 0.6878, "step": 16134 }, { "epoch": 0.28046724260807593, "grad_norm": 1.5999538740018036, "learning_rate": 8.443320565766234e-07, "loss": 0.2678, "step": 16135 }, { "epoch": 0.28048462514557876, "grad_norm": 2.1477295213034813, "learning_rate": 8.44311645495629e-07, "loss": 0.3432, "step": 16136 }, { "epoch": 0.2805020076830816, "grad_norm": 2.216869107816448, "learning_rate": 8.442912333233246e-07, "loss": 0.4164, "step": 16137 }, { "epoch": 0.2805193902205844, "grad_norm": 1.849707026515756, "learning_rate": 8.442708200597754e-07, "loss": 0.2737, "step": 16138 }, { "epoch": 0.28053677275808725, "grad_norm": 1.2670443650658578, "learning_rate": 8.442504057050457e-07, "loss": 0.4315, "step": 16139 }, { "epoch": 0.28055415529559, "grad_norm": 1.3064868659865632, "learning_rate": 8.442299902592004e-07, "loss": 0.2324, "step": 16140 }, { "epoch": 0.28057153783309285, "grad_norm": 1.1963109795847238, "learning_rate": 8.442095737223041e-07, "loss": 0.3146, "step": 16141 }, { "epoch": 0.2805889203705957, "grad_norm": 2.136615330677018, "learning_rate": 8.441891560944217e-07, "loss": 0.4356, "step": 16142 }, { "epoch": 0.2806063029080985, "grad_norm": 1.5166619616301145, "learning_rate": 8.441687373756177e-07, "loss": 0.3538, "step": 16143 }, { "epoch": 0.28062368544560135, "grad_norm": 1.285634735595015, "learning_rate": 8.441483175659569e-07, "loss": 0.2988, "step": 16144 }, { "epoch": 0.2806410679831042, "grad_norm": 2.8748550210503754, "learning_rate": 8.44127896665504e-07, "loss": 0.2786, "step": 16145 }, { "epoch": 0.280658450520607, "grad_norm": 1.5492406257072955, "learning_rate": 8.441074746743238e-07, "loss": 0.3328, "step": 16146 }, { "epoch": 0.28067583305810984, "grad_norm": 1.3355811257956889, "learning_rate": 8.440870515924809e-07, "loss": 0.2478, "step": 16147 }, { "epoch": 0.28069321559561267, "grad_norm": 1.2782803715497155, "learning_rate": 8.440666274200401e-07, "loss": 0.2595, "step": 16148 }, { "epoch": 0.2807105981331155, "grad_norm": 5.392694152274675, "learning_rate": 8.440462021570663e-07, "loss": 0.4878, "step": 16149 }, { "epoch": 0.28072798067061827, "grad_norm": 3.070207421364477, "learning_rate": 8.440257758036239e-07, "loss": 0.4437, "step": 16150 }, { "epoch": 0.2807453632081211, "grad_norm": 2.1024060928627932, "learning_rate": 8.44005348359778e-07, "loss": 0.2715, "step": 16151 }, { "epoch": 0.28076274574562393, "grad_norm": 1.9002747589205953, "learning_rate": 8.43984919825593e-07, "loss": 0.427, "step": 16152 }, { "epoch": 0.28078012828312676, "grad_norm": 2.0193051699218185, "learning_rate": 8.439644902011338e-07, "loss": 0.5015, "step": 16153 }, { "epoch": 0.2807975108206296, "grad_norm": 1.2559851200205925, "learning_rate": 8.439440594864653e-07, "loss": 0.2898, "step": 16154 }, { "epoch": 0.2808148933581324, "grad_norm": 1.6112400486125782, "learning_rate": 8.439236276816521e-07, "loss": 0.4253, "step": 16155 }, { "epoch": 0.28083227589563525, "grad_norm": 8.911541160475776, "learning_rate": 8.43903194786759e-07, "loss": 0.3459, "step": 16156 }, { "epoch": 0.2808496584331381, "grad_norm": 1.6373411477207276, "learning_rate": 8.438827608018506e-07, "loss": 0.5836, "step": 16157 }, { "epoch": 0.2808670409706409, "grad_norm": 1.4508606256811976, "learning_rate": 8.43862325726992e-07, "loss": 0.2425, "step": 16158 }, { "epoch": 0.28088442350814374, "grad_norm": 2.2135341908442046, "learning_rate": 8.438418895622476e-07, "loss": 0.4364, "step": 16159 }, { "epoch": 0.2809018060456465, "grad_norm": 0.7653152927907304, "learning_rate": 8.438214523076826e-07, "loss": 0.238, "step": 16160 }, { "epoch": 0.28091918858314935, "grad_norm": 1.4979914935700493, "learning_rate": 8.438010139633614e-07, "loss": 0.1995, "step": 16161 }, { "epoch": 0.2809365711206522, "grad_norm": 2.0761242980741126, "learning_rate": 8.437805745293489e-07, "loss": 0.2864, "step": 16162 }, { "epoch": 0.280953953658155, "grad_norm": 1.5369731550490482, "learning_rate": 8.437601340057098e-07, "loss": 0.2217, "step": 16163 }, { "epoch": 0.28097133619565784, "grad_norm": 2.8752948084416348, "learning_rate": 8.437396923925093e-07, "loss": 0.5582, "step": 16164 }, { "epoch": 0.28098871873316067, "grad_norm": 1.6863509322948864, "learning_rate": 8.437192496898117e-07, "loss": 0.3565, "step": 16165 }, { "epoch": 0.2810061012706635, "grad_norm": 2.6925598992579247, "learning_rate": 8.43698805897682e-07, "loss": 0.3744, "step": 16166 }, { "epoch": 0.28102348380816633, "grad_norm": 3.925741478750214, "learning_rate": 8.436783610161848e-07, "loss": 0.5574, "step": 16167 }, { "epoch": 0.28104086634566916, "grad_norm": 1.2146157979578471, "learning_rate": 8.436579150453853e-07, "loss": 0.4277, "step": 16168 }, { "epoch": 0.281058248883172, "grad_norm": 1.8057148539272807, "learning_rate": 8.436374679853478e-07, "loss": 0.2013, "step": 16169 }, { "epoch": 0.28107563142067477, "grad_norm": 1.403802027811511, "learning_rate": 8.436170198361376e-07, "loss": 0.3064, "step": 16170 }, { "epoch": 0.2810930139581776, "grad_norm": 1.7433872117739033, "learning_rate": 8.435965705978193e-07, "loss": 0.365, "step": 16171 }, { "epoch": 0.2811103964956804, "grad_norm": 2.818705382636673, "learning_rate": 8.435761202704577e-07, "loss": 0.4238, "step": 16172 }, { "epoch": 0.28112777903318326, "grad_norm": 5.166682561591871, "learning_rate": 8.435556688541174e-07, "loss": 1.1089, "step": 16173 }, { "epoch": 0.2811451615706861, "grad_norm": 1.6962102293705978, "learning_rate": 8.435352163488635e-07, "loss": 0.4179, "step": 16174 }, { "epoch": 0.2811625441081889, "grad_norm": 2.476974796491622, "learning_rate": 8.435147627547611e-07, "loss": 0.3644, "step": 16175 }, { "epoch": 0.28117992664569175, "grad_norm": 1.3154426647906692, "learning_rate": 8.434943080718744e-07, "loss": 0.3186, "step": 16176 }, { "epoch": 0.2811973091831946, "grad_norm": 1.355965931555403, "learning_rate": 8.434738523002687e-07, "loss": 0.3727, "step": 16177 }, { "epoch": 0.2812146917206974, "grad_norm": 1.842262350463824, "learning_rate": 8.434533954400085e-07, "loss": 0.4587, "step": 16178 }, { "epoch": 0.28123207425820024, "grad_norm": 1.305440835900865, "learning_rate": 8.434329374911589e-07, "loss": 0.4651, "step": 16179 }, { "epoch": 0.281249456795703, "grad_norm": 2.513325316060671, "learning_rate": 8.434124784537847e-07, "loss": 0.2625, "step": 16180 }, { "epoch": 0.28126683933320584, "grad_norm": 2.6184171453696834, "learning_rate": 8.433920183279506e-07, "loss": 0.4354, "step": 16181 }, { "epoch": 0.2812842218707087, "grad_norm": 3.8069641751929275, "learning_rate": 8.433715571137216e-07, "loss": 0.3815, "step": 16182 }, { "epoch": 0.2813016044082115, "grad_norm": 2.0662464497053437, "learning_rate": 8.433510948111624e-07, "loss": 0.2942, "step": 16183 }, { "epoch": 0.28131898694571433, "grad_norm": 1.565525894477385, "learning_rate": 8.433306314203381e-07, "loss": 0.7115, "step": 16184 }, { "epoch": 0.28133636948321716, "grad_norm": 1.1008843034482834, "learning_rate": 8.433101669413133e-07, "loss": 0.2521, "step": 16185 }, { "epoch": 0.28135375202072, "grad_norm": 1.9293646791124361, "learning_rate": 8.432897013741531e-07, "loss": 0.3705, "step": 16186 }, { "epoch": 0.2813711345582228, "grad_norm": 2.029500438997877, "learning_rate": 8.432692347189221e-07, "loss": 0.3138, "step": 16187 }, { "epoch": 0.28138851709572565, "grad_norm": 2.3244150243018713, "learning_rate": 8.432487669756854e-07, "loss": 0.393, "step": 16188 }, { "epoch": 0.2814058996332285, "grad_norm": 2.507518338255562, "learning_rate": 8.432282981445078e-07, "loss": 0.2686, "step": 16189 }, { "epoch": 0.28142328217073126, "grad_norm": 1.9208858868799958, "learning_rate": 8.43207828225454e-07, "loss": 0.1672, "step": 16190 }, { "epoch": 0.2814406647082341, "grad_norm": 2.1420009634576367, "learning_rate": 8.431873572185892e-07, "loss": 0.2741, "step": 16191 }, { "epoch": 0.2814580472457369, "grad_norm": 2.0248619305457125, "learning_rate": 8.431668851239781e-07, "loss": 0.2309, "step": 16192 }, { "epoch": 0.28147542978323975, "grad_norm": 1.0209837878869683, "learning_rate": 8.431464119416855e-07, "loss": 0.2632, "step": 16193 }, { "epoch": 0.2814928123207426, "grad_norm": 1.2286667210052284, "learning_rate": 8.431259376717765e-07, "loss": 0.3962, "step": 16194 }, { "epoch": 0.2815101948582454, "grad_norm": 4.739098546942611, "learning_rate": 8.431054623143159e-07, "loss": 0.4795, "step": 16195 }, { "epoch": 0.28152757739574824, "grad_norm": 1.6055630819324613, "learning_rate": 8.430849858693686e-07, "loss": 0.1951, "step": 16196 }, { "epoch": 0.28154495993325107, "grad_norm": 1.866317824511263, "learning_rate": 8.430645083369994e-07, "loss": 0.4253, "step": 16197 }, { "epoch": 0.2815623424707539, "grad_norm": 1.5522571863753771, "learning_rate": 8.430440297172734e-07, "loss": 0.2569, "step": 16198 }, { "epoch": 0.28157972500825673, "grad_norm": 1.3415624183347918, "learning_rate": 8.430235500102553e-07, "loss": 0.2924, "step": 16199 }, { "epoch": 0.2815971075457595, "grad_norm": 1.7668710953168156, "learning_rate": 8.430030692160103e-07, "loss": 0.3642, "step": 16200 }, { "epoch": 0.28161449008326234, "grad_norm": 1.9877475616276292, "learning_rate": 8.429825873346031e-07, "loss": 0.4041, "step": 16201 }, { "epoch": 0.28163187262076517, "grad_norm": 1.5009735025925717, "learning_rate": 8.429621043660985e-07, "loss": 0.5675, "step": 16202 }, { "epoch": 0.281649255158268, "grad_norm": 1.6306218584245824, "learning_rate": 8.429416203105617e-07, "loss": 0.4139, "step": 16203 }, { "epoch": 0.2816666376957708, "grad_norm": 1.9056294094879016, "learning_rate": 8.429211351680574e-07, "loss": 0.224, "step": 16204 }, { "epoch": 0.28168402023327366, "grad_norm": 1.0797753191485224, "learning_rate": 8.429006489386507e-07, "loss": 0.2781, "step": 16205 }, { "epoch": 0.2817014027707765, "grad_norm": 1.6507349897670607, "learning_rate": 8.428801616224064e-07, "loss": 0.2033, "step": 16206 }, { "epoch": 0.2817187853082793, "grad_norm": 1.9559159449804004, "learning_rate": 8.428596732193895e-07, "loss": 0.3824, "step": 16207 }, { "epoch": 0.28173616784578215, "grad_norm": 1.9059128318915572, "learning_rate": 8.428391837296651e-07, "loss": 0.4645, "step": 16208 }, { "epoch": 0.281753550383285, "grad_norm": 1.612797030633257, "learning_rate": 8.428186931532978e-07, "loss": 0.3085, "step": 16209 }, { "epoch": 0.28177093292078775, "grad_norm": 4.023330038343123, "learning_rate": 8.427982014903527e-07, "loss": 0.4627, "step": 16210 }, { "epoch": 0.2817883154582906, "grad_norm": 2.7502536424911592, "learning_rate": 8.42777708740895e-07, "loss": 0.6052, "step": 16211 }, { "epoch": 0.2818056979957934, "grad_norm": 1.8439550244699032, "learning_rate": 8.427572149049891e-07, "loss": 0.2791, "step": 16212 }, { "epoch": 0.28182308053329624, "grad_norm": 1.07144536400551, "learning_rate": 8.427367199827004e-07, "loss": 0.3294, "step": 16213 }, { "epoch": 0.2818404630707991, "grad_norm": 2.0004064620616555, "learning_rate": 8.427162239740938e-07, "loss": 0.2495, "step": 16214 }, { "epoch": 0.2818578456083019, "grad_norm": 1.216263010510558, "learning_rate": 8.426957268792343e-07, "loss": 0.2393, "step": 16215 }, { "epoch": 0.28187522814580473, "grad_norm": 0.933133304966608, "learning_rate": 8.426752286981865e-07, "loss": 0.1508, "step": 16216 }, { "epoch": 0.28189261068330757, "grad_norm": 1.4441743946578214, "learning_rate": 8.426547294310158e-07, "loss": 0.3965, "step": 16217 }, { "epoch": 0.2819099932208104, "grad_norm": 2.4773122614387044, "learning_rate": 8.42634229077787e-07, "loss": 0.4508, "step": 16218 }, { "epoch": 0.2819273757583132, "grad_norm": 1.2523787654037375, "learning_rate": 8.426137276385651e-07, "loss": 0.3148, "step": 16219 }, { "epoch": 0.281944758295816, "grad_norm": 1.3190493977895885, "learning_rate": 8.42593225113415e-07, "loss": 0.2492, "step": 16220 }, { "epoch": 0.28196214083331883, "grad_norm": 1.0860904581172868, "learning_rate": 8.425727215024018e-07, "loss": 0.2771, "step": 16221 }, { "epoch": 0.28197952337082166, "grad_norm": 1.515376102165656, "learning_rate": 8.425522168055906e-07, "loss": 0.2654, "step": 16222 }, { "epoch": 0.2819969059083245, "grad_norm": 2.3265423438987782, "learning_rate": 8.425317110230459e-07, "loss": 0.3375, "step": 16223 }, { "epoch": 0.2820142884458273, "grad_norm": 1.5111888412501682, "learning_rate": 8.425112041548332e-07, "loss": 0.2253, "step": 16224 }, { "epoch": 0.28203167098333015, "grad_norm": 2.6250322643961965, "learning_rate": 8.424906962010173e-07, "loss": 0.5007, "step": 16225 }, { "epoch": 0.282049053520833, "grad_norm": 1.1573050191398218, "learning_rate": 8.424701871616633e-07, "loss": 0.3307, "step": 16226 }, { "epoch": 0.2820664360583358, "grad_norm": 1.386997801098809, "learning_rate": 8.424496770368361e-07, "loss": 0.2908, "step": 16227 }, { "epoch": 0.28208381859583864, "grad_norm": 1.7575733571578271, "learning_rate": 8.424291658266005e-07, "loss": 0.3241, "step": 16228 }, { "epoch": 0.2821012011333415, "grad_norm": 3.100069967241339, "learning_rate": 8.42408653531022e-07, "loss": 0.4617, "step": 16229 }, { "epoch": 0.28211858367084425, "grad_norm": 1.7917381440827882, "learning_rate": 8.423881401501653e-07, "loss": 0.3162, "step": 16230 }, { "epoch": 0.2821359662083471, "grad_norm": 2.3488533647467964, "learning_rate": 8.423676256840955e-07, "loss": 0.6654, "step": 16231 }, { "epoch": 0.2821533487458499, "grad_norm": 2.358201517691832, "learning_rate": 8.423471101328774e-07, "loss": 0.3108, "step": 16232 }, { "epoch": 0.28217073128335274, "grad_norm": 1.7423404541717389, "learning_rate": 8.423265934965764e-07, "loss": 0.3391, "step": 16233 }, { "epoch": 0.28218811382085557, "grad_norm": 1.66872260899339, "learning_rate": 8.423060757752574e-07, "loss": 0.3367, "step": 16234 }, { "epoch": 0.2822054963583584, "grad_norm": 1.2123826710483514, "learning_rate": 8.422855569689852e-07, "loss": 0.4042, "step": 16235 }, { "epoch": 0.28222287889586123, "grad_norm": 1.967232773876829, "learning_rate": 8.422650370778253e-07, "loss": 0.2484, "step": 16236 }, { "epoch": 0.28224026143336406, "grad_norm": 1.7073930570763993, "learning_rate": 8.422445161018422e-07, "loss": 0.2349, "step": 16237 }, { "epoch": 0.2822576439708669, "grad_norm": 1.6217016668595192, "learning_rate": 8.422239940411012e-07, "loss": 0.2518, "step": 16238 }, { "epoch": 0.28227502650836966, "grad_norm": 1.2844108539091328, "learning_rate": 8.422034708956674e-07, "loss": 0.3493, "step": 16239 }, { "epoch": 0.2822924090458725, "grad_norm": 1.284616629281043, "learning_rate": 8.421829466656058e-07, "loss": 0.3828, "step": 16240 }, { "epoch": 0.2823097915833753, "grad_norm": 2.5735963891505773, "learning_rate": 8.421624213509816e-07, "loss": 0.6276, "step": 16241 }, { "epoch": 0.28232717412087815, "grad_norm": 1.142815109794732, "learning_rate": 8.421418949518595e-07, "loss": 0.2992, "step": 16242 }, { "epoch": 0.282344556658381, "grad_norm": 2.677563339468851, "learning_rate": 8.421213674683049e-07, "loss": 0.5324, "step": 16243 }, { "epoch": 0.2823619391958838, "grad_norm": 2.440572434985054, "learning_rate": 8.421008389003825e-07, "loss": 0.5365, "step": 16244 }, { "epoch": 0.28237932173338665, "grad_norm": 1.5182808951136046, "learning_rate": 8.420803092481579e-07, "loss": 0.3648, "step": 16245 }, { "epoch": 0.2823967042708895, "grad_norm": 1.2129572405234141, "learning_rate": 8.420597785116956e-07, "loss": 0.3057, "step": 16246 }, { "epoch": 0.2824140868083923, "grad_norm": 2.447771532104514, "learning_rate": 8.42039246691061e-07, "loss": 0.482, "step": 16247 }, { "epoch": 0.28243146934589514, "grad_norm": 1.2907961580186853, "learning_rate": 8.420187137863192e-07, "loss": 0.3343, "step": 16248 }, { "epoch": 0.2824488518833979, "grad_norm": 1.1818970942173774, "learning_rate": 8.419981797975352e-07, "loss": 0.2132, "step": 16249 }, { "epoch": 0.28246623442090074, "grad_norm": 0.9615437943002968, "learning_rate": 8.41977644724774e-07, "loss": 0.1654, "step": 16250 }, { "epoch": 0.28248361695840357, "grad_norm": 1.8462115697606745, "learning_rate": 8.419571085681007e-07, "loss": 0.2935, "step": 16251 }, { "epoch": 0.2825009994959064, "grad_norm": 2.1124841546917894, "learning_rate": 8.419365713275806e-07, "loss": 0.2288, "step": 16252 }, { "epoch": 0.28251838203340923, "grad_norm": 2.9264565299015874, "learning_rate": 8.419160330032784e-07, "loss": 0.2552, "step": 16253 }, { "epoch": 0.28253576457091206, "grad_norm": 2.001558477252921, "learning_rate": 8.418954935952596e-07, "loss": 0.3614, "step": 16254 }, { "epoch": 0.2825531471084149, "grad_norm": 2.4583289677331024, "learning_rate": 8.418749531035891e-07, "loss": 0.7158, "step": 16255 }, { "epoch": 0.2825705296459177, "grad_norm": 1.2639819478165912, "learning_rate": 8.418544115283322e-07, "loss": 0.3976, "step": 16256 }, { "epoch": 0.28258791218342055, "grad_norm": 2.1161431837304194, "learning_rate": 8.418338688695536e-07, "loss": 0.3437, "step": 16257 }, { "epoch": 0.2826052947209234, "grad_norm": 2.205353474206229, "learning_rate": 8.418133251273188e-07, "loss": 0.4705, "step": 16258 }, { "epoch": 0.28262267725842616, "grad_norm": 1.950146361704505, "learning_rate": 8.417927803016927e-07, "loss": 0.3634, "step": 16259 }, { "epoch": 0.282640059795929, "grad_norm": 1.7879845066749989, "learning_rate": 8.417722343927405e-07, "loss": 0.2324, "step": 16260 }, { "epoch": 0.2826574423334318, "grad_norm": 1.6645372929697653, "learning_rate": 8.417516874005274e-07, "loss": 0.4116, "step": 16261 }, { "epoch": 0.28267482487093465, "grad_norm": 1.2944296980282715, "learning_rate": 8.417311393251184e-07, "loss": 0.2203, "step": 16262 }, { "epoch": 0.2826922074084375, "grad_norm": 2.2312466454143, "learning_rate": 8.417105901665787e-07, "loss": 0.2788, "step": 16263 }, { "epoch": 0.2827095899459403, "grad_norm": 10.22024230239693, "learning_rate": 8.416900399249733e-07, "loss": 0.4589, "step": 16264 }, { "epoch": 0.28272697248344314, "grad_norm": 4.871949366075896, "learning_rate": 8.416694886003674e-07, "loss": 0.2338, "step": 16265 }, { "epoch": 0.28274435502094597, "grad_norm": 1.6266920859363379, "learning_rate": 8.416489361928263e-07, "loss": 0.2792, "step": 16266 }, { "epoch": 0.2827617375584488, "grad_norm": 1.458966561925316, "learning_rate": 8.416283827024149e-07, "loss": 0.2368, "step": 16267 }, { "epoch": 0.28277912009595163, "grad_norm": 1.926761124732993, "learning_rate": 8.416078281291987e-07, "loss": 0.2679, "step": 16268 }, { "epoch": 0.2827965026334544, "grad_norm": 1.624480485195243, "learning_rate": 8.415872724732425e-07, "loss": 0.2806, "step": 16269 }, { "epoch": 0.28281388517095724, "grad_norm": 2.1910973574025294, "learning_rate": 8.415667157346113e-07, "loss": 0.3399, "step": 16270 }, { "epoch": 0.28283126770846007, "grad_norm": 3.030827025268631, "learning_rate": 8.415461579133709e-07, "loss": 0.2456, "step": 16271 }, { "epoch": 0.2828486502459629, "grad_norm": 0.968297044195205, "learning_rate": 8.415255990095858e-07, "loss": 0.3516, "step": 16272 }, { "epoch": 0.2828660327834657, "grad_norm": 1.775949493841184, "learning_rate": 8.415050390233215e-07, "loss": 0.3864, "step": 16273 }, { "epoch": 0.28288341532096856, "grad_norm": 2.709861819832299, "learning_rate": 8.41484477954643e-07, "loss": 0.2417, "step": 16274 }, { "epoch": 0.2829007978584714, "grad_norm": 2.696404519267197, "learning_rate": 8.414639158036158e-07, "loss": 0.3152, "step": 16275 }, { "epoch": 0.2829181803959742, "grad_norm": 1.8599964998035354, "learning_rate": 8.414433525703049e-07, "loss": 0.3987, "step": 16276 }, { "epoch": 0.28293556293347705, "grad_norm": 3.415219636791098, "learning_rate": 8.414227882547751e-07, "loss": 0.4295, "step": 16277 }, { "epoch": 0.2829529454709799, "grad_norm": 2.1689689840510704, "learning_rate": 8.41402222857092e-07, "loss": 0.2789, "step": 16278 }, { "epoch": 0.28297032800848265, "grad_norm": 1.323317029496586, "learning_rate": 8.413816563773208e-07, "loss": 0.4046, "step": 16279 }, { "epoch": 0.2829877105459855, "grad_norm": 1.9526410294380598, "learning_rate": 8.413610888155265e-07, "loss": 0.3417, "step": 16280 }, { "epoch": 0.2830050930834883, "grad_norm": 1.3676526627049475, "learning_rate": 8.413405201717745e-07, "loss": 0.4476, "step": 16281 }, { "epoch": 0.28302247562099114, "grad_norm": 3.654309659039849, "learning_rate": 8.413199504461295e-07, "loss": 0.5096, "step": 16282 }, { "epoch": 0.283039858158494, "grad_norm": 3.016727638795261, "learning_rate": 8.412993796386574e-07, "loss": 0.368, "step": 16283 }, { "epoch": 0.2830572406959968, "grad_norm": 0.9804625375357976, "learning_rate": 8.412788077494229e-07, "loss": 0.5422, "step": 16284 }, { "epoch": 0.28307462323349963, "grad_norm": 2.2231035332502507, "learning_rate": 8.412582347784914e-07, "loss": 0.3, "step": 16285 }, { "epoch": 0.28309200577100246, "grad_norm": 1.3441280412731542, "learning_rate": 8.412376607259279e-07, "loss": 0.3916, "step": 16286 }, { "epoch": 0.2831093883085053, "grad_norm": 1.756501423507373, "learning_rate": 8.412170855917979e-07, "loss": 0.2797, "step": 16287 }, { "epoch": 0.2831267708460081, "grad_norm": 1.3662952155882127, "learning_rate": 8.411965093761665e-07, "loss": 0.4, "step": 16288 }, { "epoch": 0.2831441533835109, "grad_norm": 1.7807231529529055, "learning_rate": 8.411759320790987e-07, "loss": 0.3246, "step": 16289 }, { "epoch": 0.28316153592101373, "grad_norm": 2.407506145899154, "learning_rate": 8.411553537006602e-07, "loss": 0.816, "step": 16290 }, { "epoch": 0.28317891845851656, "grad_norm": 1.7751414057407222, "learning_rate": 8.411347742409159e-07, "loss": 0.3129, "step": 16291 }, { "epoch": 0.2831963009960194, "grad_norm": 1.6955735073583338, "learning_rate": 8.411141936999309e-07, "loss": 0.3908, "step": 16292 }, { "epoch": 0.2832136835335222, "grad_norm": 2.10723205180276, "learning_rate": 8.410936120777707e-07, "loss": 0.271, "step": 16293 }, { "epoch": 0.28323106607102505, "grad_norm": 1.9449018476413218, "learning_rate": 8.410730293745003e-07, "loss": 0.3332, "step": 16294 }, { "epoch": 0.2832484486085279, "grad_norm": 1.8610384871449381, "learning_rate": 8.410524455901853e-07, "loss": 0.3237, "step": 16295 }, { "epoch": 0.2832658311460307, "grad_norm": 1.2099013502334808, "learning_rate": 8.410318607248905e-07, "loss": 0.413, "step": 16296 }, { "epoch": 0.28328321368353354, "grad_norm": 1.1398927645684793, "learning_rate": 8.410112747786815e-07, "loss": 0.5563, "step": 16297 }, { "epoch": 0.28330059622103637, "grad_norm": 1.1451124008899636, "learning_rate": 8.409906877516234e-07, "loss": 0.1671, "step": 16298 }, { "epoch": 0.28331797875853915, "grad_norm": 1.1922613650520377, "learning_rate": 8.409700996437814e-07, "loss": 0.2496, "step": 16299 }, { "epoch": 0.283335361296042, "grad_norm": 2.3137864871095677, "learning_rate": 8.409495104552209e-07, "loss": 0.4914, "step": 16300 }, { "epoch": 0.2833527438335448, "grad_norm": 3.5581885742036055, "learning_rate": 8.40928920186007e-07, "loss": 0.381, "step": 16301 }, { "epoch": 0.28337012637104764, "grad_norm": 1.97257678039655, "learning_rate": 8.409083288362051e-07, "loss": 0.286, "step": 16302 }, { "epoch": 0.28338750890855047, "grad_norm": 2.4639273662802377, "learning_rate": 8.408877364058803e-07, "loss": 0.4718, "step": 16303 }, { "epoch": 0.2834048914460533, "grad_norm": 1.6904657754325916, "learning_rate": 8.408671428950979e-07, "loss": 0.3223, "step": 16304 }, { "epoch": 0.28342227398355613, "grad_norm": 1.2129862528026483, "learning_rate": 8.408465483039234e-07, "loss": 0.2475, "step": 16305 }, { "epoch": 0.28343965652105896, "grad_norm": 1.1584208497771111, "learning_rate": 8.40825952632422e-07, "loss": 0.236, "step": 16306 }, { "epoch": 0.2834570390585618, "grad_norm": 1.8289271919614638, "learning_rate": 8.408053558806587e-07, "loss": 0.3702, "step": 16307 }, { "epoch": 0.2834744215960646, "grad_norm": 1.8659854773372253, "learning_rate": 8.40784758048699e-07, "loss": 0.3927, "step": 16308 }, { "epoch": 0.2834918041335674, "grad_norm": 1.7802351004385315, "learning_rate": 8.407641591366083e-07, "loss": 0.3103, "step": 16309 }, { "epoch": 0.2835091866710702, "grad_norm": 1.5361590694302412, "learning_rate": 8.407435591444516e-07, "loss": 0.4666, "step": 16310 }, { "epoch": 0.28352656920857305, "grad_norm": 1.656641745090022, "learning_rate": 8.407229580722943e-07, "loss": 0.3588, "step": 16311 }, { "epoch": 0.2835439517460759, "grad_norm": 1.9156488040794033, "learning_rate": 8.40702355920202e-07, "loss": 0.2991, "step": 16312 }, { "epoch": 0.2835613342835787, "grad_norm": 1.1209689738584883, "learning_rate": 8.406817526882395e-07, "loss": 0.4308, "step": 16313 }, { "epoch": 0.28357871682108154, "grad_norm": 0.8270006266396597, "learning_rate": 8.406611483764723e-07, "loss": 0.2913, "step": 16314 }, { "epoch": 0.2835960993585844, "grad_norm": 1.176068536818037, "learning_rate": 8.406405429849659e-07, "loss": 0.2152, "step": 16315 }, { "epoch": 0.2836134818960872, "grad_norm": 2.645654315063361, "learning_rate": 8.406199365137854e-07, "loss": 0.3574, "step": 16316 }, { "epoch": 0.28363086443359004, "grad_norm": 1.4944706573116484, "learning_rate": 8.405993289629963e-07, "loss": 0.2484, "step": 16317 }, { "epoch": 0.28364824697109287, "grad_norm": 2.0274503188826256, "learning_rate": 8.405787203326636e-07, "loss": 0.5399, "step": 16318 }, { "epoch": 0.28366562950859564, "grad_norm": 1.9912731611284553, "learning_rate": 8.405581106228528e-07, "loss": 0.428, "step": 16319 }, { "epoch": 0.28368301204609847, "grad_norm": 1.9370941791236513, "learning_rate": 8.405374998336293e-07, "loss": 0.6325, "step": 16320 }, { "epoch": 0.2837003945836013, "grad_norm": 1.4046379289831656, "learning_rate": 8.405168879650584e-07, "loss": 0.2972, "step": 16321 }, { "epoch": 0.28371777712110413, "grad_norm": 1.5751500436102088, "learning_rate": 8.404962750172053e-07, "loss": 0.2683, "step": 16322 }, { "epoch": 0.28373515965860696, "grad_norm": 1.3411750448629995, "learning_rate": 8.404756609901354e-07, "loss": 0.2584, "step": 16323 }, { "epoch": 0.2837525421961098, "grad_norm": 2.441686914795182, "learning_rate": 8.404550458839142e-07, "loss": 0.2782, "step": 16324 }, { "epoch": 0.2837699247336126, "grad_norm": 1.618685730224304, "learning_rate": 8.404344296986068e-07, "loss": 0.3735, "step": 16325 }, { "epoch": 0.28378730727111545, "grad_norm": 1.4833605206621758, "learning_rate": 8.404138124342785e-07, "loss": 0.2876, "step": 16326 }, { "epoch": 0.2838046898086183, "grad_norm": 3.9732690780224518, "learning_rate": 8.403931940909951e-07, "loss": 0.5794, "step": 16327 }, { "epoch": 0.2838220723461211, "grad_norm": 1.2774545189091269, "learning_rate": 8.403725746688215e-07, "loss": 0.4106, "step": 16328 }, { "epoch": 0.2838394548836239, "grad_norm": 1.7216288937295734, "learning_rate": 8.403519541678231e-07, "loss": 0.3393, "step": 16329 }, { "epoch": 0.2838568374211267, "grad_norm": 1.2715423752824062, "learning_rate": 8.403313325880654e-07, "loss": 0.3274, "step": 16330 }, { "epoch": 0.28387421995862955, "grad_norm": 1.5876226011688175, "learning_rate": 8.403107099296138e-07, "loss": 0.3054, "step": 16331 }, { "epoch": 0.2838916024961324, "grad_norm": 2.1485712832935624, "learning_rate": 8.402900861925335e-07, "loss": 0.3675, "step": 16332 }, { "epoch": 0.2839089850336352, "grad_norm": 1.0718706492946324, "learning_rate": 8.4026946137689e-07, "loss": 0.2213, "step": 16333 }, { "epoch": 0.28392636757113804, "grad_norm": 1.7561061548632768, "learning_rate": 8.402488354827484e-07, "loss": 0.5081, "step": 16334 }, { "epoch": 0.28394375010864087, "grad_norm": 1.4111005132229324, "learning_rate": 8.402282085101746e-07, "loss": 0.3052, "step": 16335 }, { "epoch": 0.2839611326461437, "grad_norm": 3.0237483044330746, "learning_rate": 8.402075804592334e-07, "loss": 0.2765, "step": 16336 }, { "epoch": 0.28397851518364653, "grad_norm": 3.5362501959226544, "learning_rate": 8.401869513299906e-07, "loss": 0.5623, "step": 16337 }, { "epoch": 0.28399589772114936, "grad_norm": 2.330847613475592, "learning_rate": 8.401663211225114e-07, "loss": 0.2836, "step": 16338 }, { "epoch": 0.28401328025865213, "grad_norm": 2.3743864244072386, "learning_rate": 8.401456898368613e-07, "loss": 0.3499, "step": 16339 }, { "epoch": 0.28403066279615496, "grad_norm": 2.345692299687239, "learning_rate": 8.401250574731054e-07, "loss": 0.1347, "step": 16340 }, { "epoch": 0.2840480453336578, "grad_norm": 2.984790382592072, "learning_rate": 8.401044240313096e-07, "loss": 0.3651, "step": 16341 }, { "epoch": 0.2840654278711606, "grad_norm": 2.461763047461304, "learning_rate": 8.400837895115388e-07, "loss": 0.444, "step": 16342 }, { "epoch": 0.28408281040866346, "grad_norm": 1.302400953918249, "learning_rate": 8.400631539138587e-07, "loss": 0.4356, "step": 16343 }, { "epoch": 0.2841001929461663, "grad_norm": 1.5767104475818488, "learning_rate": 8.400425172383345e-07, "loss": 0.2424, "step": 16344 }, { "epoch": 0.2841175754836691, "grad_norm": 1.6024107758702444, "learning_rate": 8.400218794850319e-07, "loss": 0.3648, "step": 16345 }, { "epoch": 0.28413495802117195, "grad_norm": 1.7900252881159522, "learning_rate": 8.40001240654016e-07, "loss": 0.2727, "step": 16346 }, { "epoch": 0.2841523405586748, "grad_norm": 1.197995635491333, "learning_rate": 8.399806007453524e-07, "loss": 0.5939, "step": 16347 }, { "epoch": 0.2841697230961776, "grad_norm": 1.184397536696379, "learning_rate": 8.399599597591065e-07, "loss": 0.2559, "step": 16348 }, { "epoch": 0.2841871056336804, "grad_norm": 2.6702285961348804, "learning_rate": 8.399393176953435e-07, "loss": 0.59, "step": 16349 }, { "epoch": 0.2842044881711832, "grad_norm": 2.2568147689990226, "learning_rate": 8.399186745541292e-07, "loss": 0.3833, "step": 16350 }, { "epoch": 0.28422187070868604, "grad_norm": 3.021263278331197, "learning_rate": 8.398980303355289e-07, "loss": 0.4045, "step": 16351 }, { "epoch": 0.28423925324618887, "grad_norm": 1.4530683401334659, "learning_rate": 8.398773850396078e-07, "loss": 0.3235, "step": 16352 }, { "epoch": 0.2842566357836917, "grad_norm": 1.6466139616983273, "learning_rate": 8.398567386664317e-07, "loss": 0.2498, "step": 16353 }, { "epoch": 0.28427401832119453, "grad_norm": 1.2662371484509776, "learning_rate": 8.398360912160658e-07, "loss": 0.1799, "step": 16354 }, { "epoch": 0.28429140085869736, "grad_norm": 2.3915471861641517, "learning_rate": 8.398154426885755e-07, "loss": 0.374, "step": 16355 }, { "epoch": 0.2843087833962002, "grad_norm": 2.672747586062847, "learning_rate": 8.397947930840264e-07, "loss": 0.3588, "step": 16356 }, { "epoch": 0.284326165933703, "grad_norm": 1.2034249174967415, "learning_rate": 8.397741424024839e-07, "loss": 0.1977, "step": 16357 }, { "epoch": 0.28434354847120585, "grad_norm": 1.7335805907467492, "learning_rate": 8.397534906440135e-07, "loss": 0.2011, "step": 16358 }, { "epoch": 0.28436093100870863, "grad_norm": 1.2397810360498265, "learning_rate": 8.397328378086805e-07, "loss": 0.2396, "step": 16359 }, { "epoch": 0.28437831354621146, "grad_norm": 2.6671486097333252, "learning_rate": 8.397121838965506e-07, "loss": 0.2903, "step": 16360 }, { "epoch": 0.2843956960837143, "grad_norm": 1.7847316960927277, "learning_rate": 8.396915289076889e-07, "loss": 0.2977, "step": 16361 }, { "epoch": 0.2844130786212171, "grad_norm": 1.3488539347340887, "learning_rate": 8.396708728421613e-07, "loss": 0.3603, "step": 16362 }, { "epoch": 0.28443046115871995, "grad_norm": 1.3864349114943264, "learning_rate": 8.396502157000331e-07, "loss": 0.5267, "step": 16363 }, { "epoch": 0.2844478436962228, "grad_norm": 1.4318728838037444, "learning_rate": 8.396295574813696e-07, "loss": 0.3122, "step": 16364 }, { "epoch": 0.2844652262337256, "grad_norm": 1.2540506924921677, "learning_rate": 8.396088981862364e-07, "loss": 0.3133, "step": 16365 }, { "epoch": 0.28448260877122844, "grad_norm": 1.960281714672657, "learning_rate": 8.395882378146992e-07, "loss": 0.2729, "step": 16366 }, { "epoch": 0.28449999130873127, "grad_norm": 1.8217514408517461, "learning_rate": 8.395675763668231e-07, "loss": 0.4982, "step": 16367 }, { "epoch": 0.2845173738462341, "grad_norm": 1.7395703096442197, "learning_rate": 8.395469138426738e-07, "loss": 0.3441, "step": 16368 }, { "epoch": 0.2845347563837369, "grad_norm": 2.2160841262726083, "learning_rate": 8.395262502423169e-07, "loss": 0.4811, "step": 16369 }, { "epoch": 0.2845521389212397, "grad_norm": 1.7627838945968437, "learning_rate": 8.395055855658175e-07, "loss": 0.2682, "step": 16370 }, { "epoch": 0.28456952145874254, "grad_norm": 1.816661742490224, "learning_rate": 8.394849198132414e-07, "loss": 0.4281, "step": 16371 }, { "epoch": 0.28458690399624537, "grad_norm": 2.4052914081339805, "learning_rate": 8.394642529846542e-07, "loss": 0.2557, "step": 16372 }, { "epoch": 0.2846042865337482, "grad_norm": 1.468058866547357, "learning_rate": 8.394435850801212e-07, "loss": 0.2245, "step": 16373 }, { "epoch": 0.284621669071251, "grad_norm": 4.214649944319591, "learning_rate": 8.394229160997079e-07, "loss": 0.3174, "step": 16374 }, { "epoch": 0.28463905160875386, "grad_norm": 1.5821619256373471, "learning_rate": 8.394022460434798e-07, "loss": 0.4097, "step": 16375 }, { "epoch": 0.2846564341462567, "grad_norm": 2.6429865822838914, "learning_rate": 8.393815749115028e-07, "loss": 0.2676, "step": 16376 }, { "epoch": 0.2846738166837595, "grad_norm": 1.789286405619333, "learning_rate": 8.393609027038417e-07, "loss": 0.245, "step": 16377 }, { "epoch": 0.2846911992212623, "grad_norm": 1.4269559061832529, "learning_rate": 8.393402294205627e-07, "loss": 0.2106, "step": 16378 }, { "epoch": 0.2847085817587651, "grad_norm": 2.576510012986042, "learning_rate": 8.393195550617311e-07, "loss": 0.7057, "step": 16379 }, { "epoch": 0.28472596429626795, "grad_norm": 2.647613418590844, "learning_rate": 8.39298879627412e-07, "loss": 0.2862, "step": 16380 }, { "epoch": 0.2847433468337708, "grad_norm": 1.8213775111007464, "learning_rate": 8.392782031176717e-07, "loss": 0.2461, "step": 16381 }, { "epoch": 0.2847607293712736, "grad_norm": 1.4551684888239436, "learning_rate": 8.392575255325754e-07, "loss": 0.3658, "step": 16382 }, { "epoch": 0.28477811190877644, "grad_norm": 1.9219150092133375, "learning_rate": 8.392368468721883e-07, "loss": 0.37, "step": 16383 }, { "epoch": 0.2847954944462793, "grad_norm": 1.1769914312745142, "learning_rate": 8.392161671365763e-07, "loss": 0.1749, "step": 16384 }, { "epoch": 0.2848128769837821, "grad_norm": 1.6082262102030571, "learning_rate": 8.391954863258051e-07, "loss": 0.473, "step": 16385 }, { "epoch": 0.28483025952128493, "grad_norm": 2.0781106021151783, "learning_rate": 8.391748044399397e-07, "loss": 0.4101, "step": 16386 }, { "epoch": 0.28484764205878776, "grad_norm": 2.170573083022173, "learning_rate": 8.391541214790461e-07, "loss": 0.3121, "step": 16387 }, { "epoch": 0.28486502459629054, "grad_norm": 1.3126238213779813, "learning_rate": 8.391334374431898e-07, "loss": 0.339, "step": 16388 }, { "epoch": 0.28488240713379337, "grad_norm": 1.7472772188721244, "learning_rate": 8.391127523324362e-07, "loss": 0.3205, "step": 16389 }, { "epoch": 0.2848997896712962, "grad_norm": 1.9199660137452867, "learning_rate": 8.39092066146851e-07, "loss": 0.189, "step": 16390 }, { "epoch": 0.28491717220879903, "grad_norm": 1.4924787294151132, "learning_rate": 8.390713788864997e-07, "loss": 0.5025, "step": 16391 }, { "epoch": 0.28493455474630186, "grad_norm": 2.627257966963157, "learning_rate": 8.390506905514479e-07, "loss": 0.2422, "step": 16392 }, { "epoch": 0.2849519372838047, "grad_norm": 1.5224016672257008, "learning_rate": 8.39030001141761e-07, "loss": 0.2141, "step": 16393 }, { "epoch": 0.2849693198213075, "grad_norm": 3.4240221946141425, "learning_rate": 8.390093106575049e-07, "loss": 0.3558, "step": 16394 }, { "epoch": 0.28498670235881035, "grad_norm": 1.5136783185059866, "learning_rate": 8.38988619098745e-07, "loss": 0.3767, "step": 16395 }, { "epoch": 0.2850040848963132, "grad_norm": 1.7556904346428297, "learning_rate": 8.389679264655469e-07, "loss": 0.4564, "step": 16396 }, { "epoch": 0.285021467433816, "grad_norm": 1.3929562409971508, "learning_rate": 8.389472327579762e-07, "loss": 0.3846, "step": 16397 }, { "epoch": 0.2850388499713188, "grad_norm": 1.9325772138880295, "learning_rate": 8.389265379760986e-07, "loss": 0.3306, "step": 16398 }, { "epoch": 0.2850562325088216, "grad_norm": 2.33659094624062, "learning_rate": 8.389058421199793e-07, "loss": 0.2545, "step": 16399 }, { "epoch": 0.28507361504632445, "grad_norm": 1.795652005540743, "learning_rate": 8.388851451896843e-07, "loss": 0.4653, "step": 16400 }, { "epoch": 0.2850909975838273, "grad_norm": 1.510359290620599, "learning_rate": 8.388644471852791e-07, "loss": 0.3619, "step": 16401 }, { "epoch": 0.2851083801213301, "grad_norm": 1.280469851748016, "learning_rate": 8.388437481068291e-07, "loss": 0.2589, "step": 16402 }, { "epoch": 0.28512576265883294, "grad_norm": 2.0657217280951152, "learning_rate": 8.388230479544001e-07, "loss": 0.524, "step": 16403 }, { "epoch": 0.28514314519633577, "grad_norm": 1.8195299093728632, "learning_rate": 8.388023467280578e-07, "loss": 0.3807, "step": 16404 }, { "epoch": 0.2851605277338386, "grad_norm": 2.025364926391661, "learning_rate": 8.387816444278677e-07, "loss": 0.6439, "step": 16405 }, { "epoch": 0.28517791027134143, "grad_norm": 1.7472022547669266, "learning_rate": 8.387609410538952e-07, "loss": 0.5316, "step": 16406 }, { "epoch": 0.28519529280884426, "grad_norm": 1.706972697655139, "learning_rate": 8.387402366062064e-07, "loss": 0.2371, "step": 16407 }, { "epoch": 0.28521267534634703, "grad_norm": 1.6476944458407885, "learning_rate": 8.387195310848665e-07, "loss": 0.2694, "step": 16408 }, { "epoch": 0.28523005788384986, "grad_norm": 1.3600384072568694, "learning_rate": 8.386988244899413e-07, "loss": 0.2504, "step": 16409 }, { "epoch": 0.2852474404213527, "grad_norm": 1.9071421079049633, "learning_rate": 8.386781168214964e-07, "loss": 0.4079, "step": 16410 }, { "epoch": 0.2852648229588555, "grad_norm": 2.2331750902424488, "learning_rate": 8.386574080795974e-07, "loss": 0.4034, "step": 16411 }, { "epoch": 0.28528220549635835, "grad_norm": 1.4437468541248208, "learning_rate": 8.3863669826431e-07, "loss": 0.125, "step": 16412 }, { "epoch": 0.2852995880338612, "grad_norm": 1.8743003664920383, "learning_rate": 8.386159873757001e-07, "loss": 0.321, "step": 16413 }, { "epoch": 0.285316970571364, "grad_norm": 3.1920958894920464, "learning_rate": 8.385952754138328e-07, "loss": 0.3357, "step": 16414 }, { "epoch": 0.28533435310886684, "grad_norm": 2.2706006749830654, "learning_rate": 8.38574562378774e-07, "loss": 0.3748, "step": 16415 }, { "epoch": 0.2853517356463697, "grad_norm": 3.645306588133725, "learning_rate": 8.385538482705894e-07, "loss": 0.5115, "step": 16416 }, { "epoch": 0.2853691181838725, "grad_norm": 2.1460483710151013, "learning_rate": 8.385331330893447e-07, "loss": 0.3344, "step": 16417 }, { "epoch": 0.2853865007213753, "grad_norm": 4.841817685316233, "learning_rate": 8.385124168351054e-07, "loss": 0.3323, "step": 16418 }, { "epoch": 0.2854038832588781, "grad_norm": 2.198390958318137, "learning_rate": 8.384916995079372e-07, "loss": 0.4628, "step": 16419 }, { "epoch": 0.28542126579638094, "grad_norm": 2.1721069484987225, "learning_rate": 8.38470981107906e-07, "loss": 0.2699, "step": 16420 }, { "epoch": 0.28543864833388377, "grad_norm": 1.2900249237452444, "learning_rate": 8.38450261635077e-07, "loss": 0.2836, "step": 16421 }, { "epoch": 0.2854560308713866, "grad_norm": 1.2864496014972668, "learning_rate": 8.384295410895163e-07, "loss": 0.4677, "step": 16422 }, { "epoch": 0.28547341340888943, "grad_norm": 1.649021252738719, "learning_rate": 8.384088194712895e-07, "loss": 0.2483, "step": 16423 }, { "epoch": 0.28549079594639226, "grad_norm": 1.2926903947320691, "learning_rate": 8.38388096780462e-07, "loss": 0.4013, "step": 16424 }, { "epoch": 0.2855081784838951, "grad_norm": 3.2738559637817652, "learning_rate": 8.383673730170997e-07, "loss": 0.3299, "step": 16425 }, { "epoch": 0.2855255610213979, "grad_norm": 2.050476899300312, "learning_rate": 8.383466481812684e-07, "loss": 0.2239, "step": 16426 }, { "epoch": 0.28554294355890075, "grad_norm": 1.869738658618681, "learning_rate": 8.383259222730337e-07, "loss": 0.4678, "step": 16427 }, { "epoch": 0.2855603260964035, "grad_norm": 1.5854890593164623, "learning_rate": 8.38305195292461e-07, "loss": 0.2884, "step": 16428 }, { "epoch": 0.28557770863390636, "grad_norm": 2.6888443381329514, "learning_rate": 8.382844672396165e-07, "loss": 0.3723, "step": 16429 }, { "epoch": 0.2855950911714092, "grad_norm": 2.375860979769825, "learning_rate": 8.382637381145654e-07, "loss": 0.3156, "step": 16430 }, { "epoch": 0.285612473708912, "grad_norm": 1.1943976317762812, "learning_rate": 8.382430079173737e-07, "loss": 0.3519, "step": 16431 }, { "epoch": 0.28562985624641485, "grad_norm": 1.2605750374395588, "learning_rate": 8.38222276648107e-07, "loss": 0.3105, "step": 16432 }, { "epoch": 0.2856472387839177, "grad_norm": 1.5613706389602693, "learning_rate": 8.382015443068311e-07, "loss": 0.3795, "step": 16433 }, { "epoch": 0.2856646213214205, "grad_norm": 2.3684083100139253, "learning_rate": 8.381808108936117e-07, "loss": 0.5196, "step": 16434 }, { "epoch": 0.28568200385892334, "grad_norm": 1.737007871488968, "learning_rate": 8.381600764085143e-07, "loss": 0.2928, "step": 16435 }, { "epoch": 0.28569938639642617, "grad_norm": 2.0901439581151386, "learning_rate": 8.381393408516051e-07, "loss": 0.198, "step": 16436 }, { "epoch": 0.285716768933929, "grad_norm": 1.7365120091085395, "learning_rate": 8.381186042229494e-07, "loss": 0.4555, "step": 16437 }, { "epoch": 0.2857341514714318, "grad_norm": 4.126126611390337, "learning_rate": 8.38097866522613e-07, "loss": 0.5069, "step": 16438 }, { "epoch": 0.2857515340089346, "grad_norm": 2.2900846169327416, "learning_rate": 8.380771277506618e-07, "loss": 0.4142, "step": 16439 }, { "epoch": 0.28576891654643743, "grad_norm": 1.7762943532786113, "learning_rate": 8.380563879071611e-07, "loss": 0.3323, "step": 16440 }, { "epoch": 0.28578629908394027, "grad_norm": 1.8793017629748272, "learning_rate": 8.380356469921771e-07, "loss": 0.178, "step": 16441 }, { "epoch": 0.2858036816214431, "grad_norm": 3.2494681030030974, "learning_rate": 8.380149050057754e-07, "loss": 0.3671, "step": 16442 }, { "epoch": 0.2858210641589459, "grad_norm": 1.8086160811160261, "learning_rate": 8.379941619480218e-07, "loss": 0.4333, "step": 16443 }, { "epoch": 0.28583844669644876, "grad_norm": 1.7481025286173326, "learning_rate": 8.379734178189818e-07, "loss": 0.4148, "step": 16444 }, { "epoch": 0.2858558292339516, "grad_norm": 1.2824748113359974, "learning_rate": 8.379526726187216e-07, "loss": 0.3236, "step": 16445 }, { "epoch": 0.2858732117714544, "grad_norm": 1.8187374078477148, "learning_rate": 8.379319263473063e-07, "loss": 0.2316, "step": 16446 }, { "epoch": 0.28589059430895725, "grad_norm": 1.3579199362405865, "learning_rate": 8.379111790048023e-07, "loss": 0.2792, "step": 16447 }, { "epoch": 0.28590797684646, "grad_norm": 2.9638922328143895, "learning_rate": 8.37890430591275e-07, "loss": 0.3409, "step": 16448 }, { "epoch": 0.28592535938396285, "grad_norm": 1.8403448327391096, "learning_rate": 8.378696811067902e-07, "loss": 0.3005, "step": 16449 }, { "epoch": 0.2859427419214657, "grad_norm": 1.6082971953147662, "learning_rate": 8.378489305514138e-07, "loss": 0.2313, "step": 16450 }, { "epoch": 0.2859601244589685, "grad_norm": 1.5271143493925234, "learning_rate": 8.378281789252115e-07, "loss": 0.4079, "step": 16451 }, { "epoch": 0.28597750699647134, "grad_norm": 1.783739368187776, "learning_rate": 8.378074262282491e-07, "loss": 0.4443, "step": 16452 }, { "epoch": 0.2859948895339742, "grad_norm": 1.816594907588362, "learning_rate": 8.37786672460592e-07, "loss": 0.6954, "step": 16453 }, { "epoch": 0.286012272071477, "grad_norm": 1.8804910657892762, "learning_rate": 8.377659176223066e-07, "loss": 0.3195, "step": 16454 }, { "epoch": 0.28602965460897983, "grad_norm": 1.326475780585455, "learning_rate": 8.377451617134585e-07, "loss": 0.3564, "step": 16455 }, { "epoch": 0.28604703714648266, "grad_norm": 1.8049735120081911, "learning_rate": 8.377244047341132e-07, "loss": 0.6135, "step": 16456 }, { "epoch": 0.2860644196839855, "grad_norm": 3.6303779577526534, "learning_rate": 8.377036466843366e-07, "loss": 0.5514, "step": 16457 }, { "epoch": 0.28608180222148827, "grad_norm": 1.0786363392005276, "learning_rate": 8.376828875641948e-07, "loss": 0.1784, "step": 16458 }, { "epoch": 0.2860991847589911, "grad_norm": 1.3826662324269972, "learning_rate": 8.376621273737532e-07, "loss": 0.4873, "step": 16459 }, { "epoch": 0.28611656729649393, "grad_norm": 1.8494203819776434, "learning_rate": 8.376413661130778e-07, "loss": 0.329, "step": 16460 }, { "epoch": 0.28613394983399676, "grad_norm": 1.4917386173655163, "learning_rate": 8.376206037822344e-07, "loss": 0.5157, "step": 16461 }, { "epoch": 0.2861513323714996, "grad_norm": 1.6460378784585743, "learning_rate": 8.375998403812887e-07, "loss": 0.2986, "step": 16462 }, { "epoch": 0.2861687149090024, "grad_norm": 2.539158645162611, "learning_rate": 8.375790759103068e-07, "loss": 0.3361, "step": 16463 }, { "epoch": 0.28618609744650525, "grad_norm": 4.831085630534791, "learning_rate": 8.37558310369354e-07, "loss": 0.454, "step": 16464 }, { "epoch": 0.2862034799840081, "grad_norm": 2.150024321346679, "learning_rate": 8.375375437584966e-07, "loss": 0.4763, "step": 16465 }, { "epoch": 0.2862208625215109, "grad_norm": 1.7190773377255393, "learning_rate": 8.375167760778001e-07, "loss": 0.3057, "step": 16466 }, { "epoch": 0.28623824505901374, "grad_norm": 2.0972704234252966, "learning_rate": 8.374960073273307e-07, "loss": 0.3027, "step": 16467 }, { "epoch": 0.2862556275965165, "grad_norm": 2.006445715089063, "learning_rate": 8.374752375071539e-07, "loss": 0.2784, "step": 16468 }, { "epoch": 0.28627301013401935, "grad_norm": 1.8515534195401808, "learning_rate": 8.374544666173355e-07, "loss": 0.258, "step": 16469 }, { "epoch": 0.2862903926715222, "grad_norm": 1.7272241568546713, "learning_rate": 8.374336946579416e-07, "loss": 0.1923, "step": 16470 }, { "epoch": 0.286307775209025, "grad_norm": 3.2177168570638943, "learning_rate": 8.374129216290378e-07, "loss": 0.4931, "step": 16471 }, { "epoch": 0.28632515774652784, "grad_norm": 1.4566944284715562, "learning_rate": 8.3739214753069e-07, "loss": 0.3373, "step": 16472 }, { "epoch": 0.28634254028403067, "grad_norm": 1.0864701511357049, "learning_rate": 8.373713723629641e-07, "loss": 0.227, "step": 16473 }, { "epoch": 0.2863599228215335, "grad_norm": 1.7470411278588784, "learning_rate": 8.373505961259259e-07, "loss": 0.2723, "step": 16474 }, { "epoch": 0.2863773053590363, "grad_norm": 1.4532706307935357, "learning_rate": 8.373298188196414e-07, "loss": 0.3575, "step": 16475 }, { "epoch": 0.28639468789653916, "grad_norm": 1.4962168583102298, "learning_rate": 8.373090404441762e-07, "loss": 0.3079, "step": 16476 }, { "epoch": 0.286412070434042, "grad_norm": 2.415816163936458, "learning_rate": 8.372882609995965e-07, "loss": 0.4539, "step": 16477 }, { "epoch": 0.28642945297154476, "grad_norm": 1.1440977551078764, "learning_rate": 8.372674804859678e-07, "loss": 0.3129, "step": 16478 }, { "epoch": 0.2864468355090476, "grad_norm": 1.1126346564777785, "learning_rate": 8.37246698903356e-07, "loss": 0.3153, "step": 16479 }, { "epoch": 0.2864642180465504, "grad_norm": 1.880697493534054, "learning_rate": 8.372259162518273e-07, "loss": 0.3508, "step": 16480 }, { "epoch": 0.28648160058405325, "grad_norm": 1.504155703349962, "learning_rate": 8.372051325314473e-07, "loss": 0.25, "step": 16481 }, { "epoch": 0.2864989831215561, "grad_norm": 2.3595964945968237, "learning_rate": 8.371843477422817e-07, "loss": 0.5392, "step": 16482 }, { "epoch": 0.2865163656590589, "grad_norm": 3.267836901819721, "learning_rate": 8.37163561884397e-07, "loss": 0.4214, "step": 16483 }, { "epoch": 0.28653374819656174, "grad_norm": 1.6882836573263025, "learning_rate": 8.371427749578584e-07, "loss": 0.3813, "step": 16484 }, { "epoch": 0.2865511307340646, "grad_norm": 2.226635941540447, "learning_rate": 8.371219869627322e-07, "loss": 0.3376, "step": 16485 }, { "epoch": 0.2865685132715674, "grad_norm": 1.9206354461055886, "learning_rate": 8.371011978990841e-07, "loss": 0.7646, "step": 16486 }, { "epoch": 0.28658589580907023, "grad_norm": 5.02078561823381, "learning_rate": 8.3708040776698e-07, "loss": 0.3305, "step": 16487 }, { "epoch": 0.286603278346573, "grad_norm": 1.3529608656110472, "learning_rate": 8.370596165664859e-07, "loss": 0.243, "step": 16488 }, { "epoch": 0.28662066088407584, "grad_norm": 2.6466549327388127, "learning_rate": 8.370388242976678e-07, "loss": 0.355, "step": 16489 }, { "epoch": 0.28663804342157867, "grad_norm": 2.142960220320195, "learning_rate": 8.370180309605912e-07, "loss": 0.3666, "step": 16490 }, { "epoch": 0.2866554259590815, "grad_norm": 1.1351695018710695, "learning_rate": 8.369972365553224e-07, "loss": 0.2784, "step": 16491 }, { "epoch": 0.28667280849658433, "grad_norm": 2.257163974528661, "learning_rate": 8.369764410819271e-07, "loss": 0.2747, "step": 16492 }, { "epoch": 0.28669019103408716, "grad_norm": 1.9735040440017757, "learning_rate": 8.369556445404713e-07, "loss": 0.5506, "step": 16493 }, { "epoch": 0.28670757357159, "grad_norm": 2.053147105380071, "learning_rate": 8.36934846931021e-07, "loss": 0.3344, "step": 16494 }, { "epoch": 0.2867249561090928, "grad_norm": 1.3554854971302306, "learning_rate": 8.369140482536419e-07, "loss": 0.2307, "step": 16495 }, { "epoch": 0.28674233864659565, "grad_norm": 1.6517804519982213, "learning_rate": 8.368932485084e-07, "loss": 0.3803, "step": 16496 }, { "epoch": 0.2867597211840985, "grad_norm": 1.4281051441834849, "learning_rate": 8.368724476953614e-07, "loss": 0.278, "step": 16497 }, { "epoch": 0.28677710372160126, "grad_norm": 2.2602955540396303, "learning_rate": 8.368516458145918e-07, "loss": 0.3354, "step": 16498 }, { "epoch": 0.2867944862591041, "grad_norm": 3.561619532499502, "learning_rate": 8.368308428661572e-07, "loss": 0.481, "step": 16499 }, { "epoch": 0.2868118687966069, "grad_norm": 2.8881767975011865, "learning_rate": 8.368100388501236e-07, "loss": 0.2774, "step": 16500 }, { "epoch": 0.28682925133410975, "grad_norm": 1.6368426460400152, "learning_rate": 8.367892337665568e-07, "loss": 0.3103, "step": 16501 }, { "epoch": 0.2868466338716126, "grad_norm": 3.51566907606043, "learning_rate": 8.367684276155229e-07, "loss": 0.306, "step": 16502 }, { "epoch": 0.2868640164091154, "grad_norm": 1.5758030484332735, "learning_rate": 8.367476203970878e-07, "loss": 0.4373, "step": 16503 }, { "epoch": 0.28688139894661824, "grad_norm": 1.7318679450502334, "learning_rate": 8.367268121113173e-07, "loss": 0.2862, "step": 16504 }, { "epoch": 0.28689878148412107, "grad_norm": 2.7616148140155232, "learning_rate": 8.367060027582777e-07, "loss": 0.3885, "step": 16505 }, { "epoch": 0.2869161640216239, "grad_norm": 3.153454838926503, "learning_rate": 8.366851923380347e-07, "loss": 0.435, "step": 16506 }, { "epoch": 0.28693354655912673, "grad_norm": 2.728918120431227, "learning_rate": 8.366643808506542e-07, "loss": 0.4979, "step": 16507 }, { "epoch": 0.2869509290966295, "grad_norm": 3.345279560659731, "learning_rate": 8.366435682962023e-07, "loss": 0.6279, "step": 16508 }, { "epoch": 0.28696831163413233, "grad_norm": 2.012879060749558, "learning_rate": 8.36622754674745e-07, "loss": 0.5996, "step": 16509 }, { "epoch": 0.28698569417163516, "grad_norm": 2.136426713350805, "learning_rate": 8.366019399863481e-07, "loss": 0.3856, "step": 16510 }, { "epoch": 0.287003076709138, "grad_norm": 1.2450112751373505, "learning_rate": 8.365811242310778e-07, "loss": 0.5092, "step": 16511 }, { "epoch": 0.2870204592466408, "grad_norm": 2.3541510918436024, "learning_rate": 8.365603074089998e-07, "loss": 0.4634, "step": 16512 }, { "epoch": 0.28703784178414365, "grad_norm": 2.63279036284512, "learning_rate": 8.365394895201802e-07, "loss": 0.4212, "step": 16513 }, { "epoch": 0.2870552243216465, "grad_norm": 1.400861916424356, "learning_rate": 8.365186705646851e-07, "loss": 0.2344, "step": 16514 }, { "epoch": 0.2870726068591493, "grad_norm": 1.7521443768719993, "learning_rate": 8.364978505425805e-07, "loss": 0.3077, "step": 16515 }, { "epoch": 0.28708998939665215, "grad_norm": 1.9446934729594174, "learning_rate": 8.364770294539322e-07, "loss": 0.2653, "step": 16516 }, { "epoch": 0.2871073719341549, "grad_norm": 2.064201393669419, "learning_rate": 8.364562072988061e-07, "loss": 0.343, "step": 16517 }, { "epoch": 0.28712475447165775, "grad_norm": 1.9352280354562061, "learning_rate": 8.364353840772687e-07, "loss": 0.3861, "step": 16518 }, { "epoch": 0.2871421370091606, "grad_norm": 2.3629014750343873, "learning_rate": 8.364145597893855e-07, "loss": 0.5312, "step": 16519 }, { "epoch": 0.2871595195466634, "grad_norm": 1.7318230506945604, "learning_rate": 8.363937344352228e-07, "loss": 0.2468, "step": 16520 }, { "epoch": 0.28717690208416624, "grad_norm": 1.8850987671828578, "learning_rate": 8.363729080148463e-07, "loss": 0.307, "step": 16521 }, { "epoch": 0.28719428462166907, "grad_norm": 1.9520869892165762, "learning_rate": 8.363520805283223e-07, "loss": 0.3352, "step": 16522 }, { "epoch": 0.2872116671591719, "grad_norm": 3.407958108804254, "learning_rate": 8.363312519757166e-07, "loss": 0.4447, "step": 16523 }, { "epoch": 0.28722904969667473, "grad_norm": 1.6253706775797565, "learning_rate": 8.363104223570956e-07, "loss": 0.4152, "step": 16524 }, { "epoch": 0.28724643223417756, "grad_norm": 2.7212505677941685, "learning_rate": 8.362895916725248e-07, "loss": 0.3775, "step": 16525 }, { "epoch": 0.2872638147716804, "grad_norm": 1.520180562168254, "learning_rate": 8.362687599220705e-07, "loss": 0.3008, "step": 16526 }, { "epoch": 0.28728119730918317, "grad_norm": 2.1604860001227038, "learning_rate": 8.362479271057988e-07, "loss": 0.3139, "step": 16527 }, { "epoch": 0.287298579846686, "grad_norm": 3.054377694874222, "learning_rate": 8.362270932237756e-07, "loss": 0.5054, "step": 16528 }, { "epoch": 0.2873159623841888, "grad_norm": 1.8838251585868466, "learning_rate": 8.36206258276067e-07, "loss": 0.2823, "step": 16529 }, { "epoch": 0.28733334492169166, "grad_norm": 1.7435337397471584, "learning_rate": 8.361854222627389e-07, "loss": 0.2916, "step": 16530 }, { "epoch": 0.2873507274591945, "grad_norm": 1.1557541711991797, "learning_rate": 8.361645851838575e-07, "loss": 0.2246, "step": 16531 }, { "epoch": 0.2873681099966973, "grad_norm": 1.6291343438776704, "learning_rate": 8.361437470394888e-07, "loss": 0.2528, "step": 16532 }, { "epoch": 0.28738549253420015, "grad_norm": 1.9560440814423354, "learning_rate": 8.361229078296989e-07, "loss": 0.5866, "step": 16533 }, { "epoch": 0.287402875071703, "grad_norm": 2.075386629206846, "learning_rate": 8.361020675545535e-07, "loss": 0.2351, "step": 16534 }, { "epoch": 0.2874202576092058, "grad_norm": 2.7646731350861233, "learning_rate": 8.360812262141192e-07, "loss": 0.4454, "step": 16535 }, { "epoch": 0.28743764014670864, "grad_norm": 2.0163184402448557, "learning_rate": 8.360603838084618e-07, "loss": 0.2345, "step": 16536 }, { "epoch": 0.2874550226842114, "grad_norm": 2.8426287626043285, "learning_rate": 8.360395403376473e-07, "loss": 0.5862, "step": 16537 }, { "epoch": 0.28747240522171424, "grad_norm": 1.3692126210944857, "learning_rate": 8.360186958017419e-07, "loss": 0.318, "step": 16538 }, { "epoch": 0.2874897877592171, "grad_norm": 2.712325872004468, "learning_rate": 8.359978502008113e-07, "loss": 0.3349, "step": 16539 }, { "epoch": 0.2875071702967199, "grad_norm": 1.965050705548148, "learning_rate": 8.359770035349223e-07, "loss": 0.3547, "step": 16540 }, { "epoch": 0.28752455283422274, "grad_norm": 2.214902319185344, "learning_rate": 8.359561558041402e-07, "loss": 0.4015, "step": 16541 }, { "epoch": 0.28754193537172557, "grad_norm": 1.5472815415823358, "learning_rate": 8.359353070085315e-07, "loss": 0.3211, "step": 16542 }, { "epoch": 0.2875593179092284, "grad_norm": 2.5378115596713418, "learning_rate": 8.359144571481622e-07, "loss": 0.4218, "step": 16543 }, { "epoch": 0.2875767004467312, "grad_norm": 1.3770684960102986, "learning_rate": 8.358936062230983e-07, "loss": 0.239, "step": 16544 }, { "epoch": 0.28759408298423406, "grad_norm": 1.4405427594033862, "learning_rate": 8.35872754233406e-07, "loss": 0.3956, "step": 16545 }, { "epoch": 0.2876114655217369, "grad_norm": 1.2937670291361965, "learning_rate": 8.358519011791516e-07, "loss": 0.2308, "step": 16546 }, { "epoch": 0.28762884805923966, "grad_norm": 1.696114663126748, "learning_rate": 8.358310470604006e-07, "loss": 0.2957, "step": 16547 }, { "epoch": 0.2876462305967425, "grad_norm": 1.3551677126527346, "learning_rate": 8.358101918772196e-07, "loss": 0.2332, "step": 16548 }, { "epoch": 0.2876636131342453, "grad_norm": 1.8420587601653997, "learning_rate": 8.357893356296746e-07, "loss": 0.4493, "step": 16549 }, { "epoch": 0.28768099567174815, "grad_norm": 1.464523898065853, "learning_rate": 8.357684783178315e-07, "loss": 0.3799, "step": 16550 }, { "epoch": 0.287698378209251, "grad_norm": 1.6773767680797016, "learning_rate": 8.357476199417568e-07, "loss": 0.2295, "step": 16551 }, { "epoch": 0.2877157607467538, "grad_norm": 1.9278908574032971, "learning_rate": 8.357267605015163e-07, "loss": 0.4099, "step": 16552 }, { "epoch": 0.28773314328425664, "grad_norm": 2.9371542310468093, "learning_rate": 8.357058999971761e-07, "loss": 0.3295, "step": 16553 }, { "epoch": 0.2877505258217595, "grad_norm": 2.2156730183598756, "learning_rate": 8.356850384288024e-07, "loss": 0.4224, "step": 16554 }, { "epoch": 0.2877679083592623, "grad_norm": 2.5865732528771996, "learning_rate": 8.356641757964613e-07, "loss": 0.4052, "step": 16555 }, { "epoch": 0.28778529089676513, "grad_norm": 1.4887067894648207, "learning_rate": 8.356433121002191e-07, "loss": 0.4219, "step": 16556 }, { "epoch": 0.2878026734342679, "grad_norm": 1.952563816150538, "learning_rate": 8.356224473401416e-07, "loss": 0.3471, "step": 16557 }, { "epoch": 0.28782005597177074, "grad_norm": 1.2421474437913373, "learning_rate": 8.356015815162954e-07, "loss": 0.3128, "step": 16558 }, { "epoch": 0.28783743850927357, "grad_norm": 1.454766674589769, "learning_rate": 8.355807146287461e-07, "loss": 0.4014, "step": 16559 }, { "epoch": 0.2878548210467764, "grad_norm": 1.6786350997680932, "learning_rate": 8.355598466775602e-07, "loss": 0.3614, "step": 16560 }, { "epoch": 0.28787220358427923, "grad_norm": 4.637154076610779, "learning_rate": 8.355389776628037e-07, "loss": 0.6809, "step": 16561 }, { "epoch": 0.28788958612178206, "grad_norm": 1.3904742268629922, "learning_rate": 8.355181075845428e-07, "loss": 0.3019, "step": 16562 }, { "epoch": 0.2879069686592849, "grad_norm": 2.5715510159486867, "learning_rate": 8.354972364428436e-07, "loss": 0.4434, "step": 16563 }, { "epoch": 0.2879243511967877, "grad_norm": 1.9014669856710986, "learning_rate": 8.354763642377722e-07, "loss": 0.475, "step": 16564 }, { "epoch": 0.28794173373429055, "grad_norm": 2.3090440542794357, "learning_rate": 8.354554909693949e-07, "loss": 0.3029, "step": 16565 }, { "epoch": 0.2879591162717934, "grad_norm": 1.8388112242510675, "learning_rate": 8.354346166377777e-07, "loss": 0.263, "step": 16566 }, { "epoch": 0.28797649880929616, "grad_norm": 1.1243703503234865, "learning_rate": 8.354137412429869e-07, "loss": 0.3052, "step": 16567 }, { "epoch": 0.287993881346799, "grad_norm": 1.8764199898494731, "learning_rate": 8.353928647850887e-07, "loss": 0.3447, "step": 16568 }, { "epoch": 0.2880112638843018, "grad_norm": 1.731512997247577, "learning_rate": 8.353719872641491e-07, "loss": 0.568, "step": 16569 }, { "epoch": 0.28802864642180465, "grad_norm": 1.9561410503044212, "learning_rate": 8.353511086802346e-07, "loss": 0.3058, "step": 16570 }, { "epoch": 0.2880460289593075, "grad_norm": 1.5782304755407666, "learning_rate": 8.353302290334107e-07, "loss": 0.2115, "step": 16571 }, { "epoch": 0.2880634114968103, "grad_norm": 1.8629390517058375, "learning_rate": 8.353093483237444e-07, "loss": 0.5587, "step": 16572 }, { "epoch": 0.28808079403431314, "grad_norm": 1.9854971356211852, "learning_rate": 8.352884665513013e-07, "loss": 0.3954, "step": 16573 }, { "epoch": 0.28809817657181597, "grad_norm": 5.441236306172417, "learning_rate": 8.352675837161478e-07, "loss": 0.3709, "step": 16574 }, { "epoch": 0.2881155591093188, "grad_norm": 1.4716057001385197, "learning_rate": 8.3524669981835e-07, "loss": 0.2757, "step": 16575 }, { "epoch": 0.2881329416468216, "grad_norm": 1.9168477132177664, "learning_rate": 8.352258148579743e-07, "loss": 0.3337, "step": 16576 }, { "epoch": 0.2881503241843244, "grad_norm": 1.8195251598469493, "learning_rate": 8.352049288350867e-07, "loss": 0.2153, "step": 16577 }, { "epoch": 0.28816770672182723, "grad_norm": 4.09121826566688, "learning_rate": 8.351840417497535e-07, "loss": 0.3967, "step": 16578 }, { "epoch": 0.28818508925933006, "grad_norm": 1.115491910398534, "learning_rate": 8.351631536020409e-07, "loss": 0.2806, "step": 16579 }, { "epoch": 0.2882024717968329, "grad_norm": 1.3066478038313394, "learning_rate": 8.351422643920148e-07, "loss": 0.4228, "step": 16580 }, { "epoch": 0.2882198543343357, "grad_norm": 1.787264253008939, "learning_rate": 8.35121374119742e-07, "loss": 0.3637, "step": 16581 }, { "epoch": 0.28823723687183855, "grad_norm": 2.257807363956492, "learning_rate": 8.351004827852881e-07, "loss": 0.306, "step": 16582 }, { "epoch": 0.2882546194093414, "grad_norm": 2.18355039262904, "learning_rate": 8.350795903887198e-07, "loss": 0.2862, "step": 16583 }, { "epoch": 0.2882720019468442, "grad_norm": 2.231439514671589, "learning_rate": 8.35058696930103e-07, "loss": 0.2947, "step": 16584 }, { "epoch": 0.28828938448434704, "grad_norm": 1.7396819846617502, "learning_rate": 8.350378024095042e-07, "loss": 0.3766, "step": 16585 }, { "epoch": 0.2883067670218499, "grad_norm": 1.7198846140107753, "learning_rate": 8.350169068269894e-07, "loss": 0.3127, "step": 16586 }, { "epoch": 0.28832414955935265, "grad_norm": 2.209305066342231, "learning_rate": 8.349960101826247e-07, "loss": 0.4686, "step": 16587 }, { "epoch": 0.2883415320968555, "grad_norm": 1.8828110079001492, "learning_rate": 8.349751124764768e-07, "loss": 0.4086, "step": 16588 }, { "epoch": 0.2883589146343583, "grad_norm": 1.0001949514219692, "learning_rate": 8.349542137086116e-07, "loss": 0.2716, "step": 16589 }, { "epoch": 0.28837629717186114, "grad_norm": 2.905824798071669, "learning_rate": 8.349333138790954e-07, "loss": 0.6806, "step": 16590 }, { "epoch": 0.28839367970936397, "grad_norm": 2.199350194560212, "learning_rate": 8.349124129879944e-07, "loss": 0.6465, "step": 16591 }, { "epoch": 0.2884110622468668, "grad_norm": 2.150972668883728, "learning_rate": 8.348915110353749e-07, "loss": 0.3166, "step": 16592 }, { "epoch": 0.28842844478436963, "grad_norm": 1.5938488430358735, "learning_rate": 8.348706080213032e-07, "loss": 0.444, "step": 16593 }, { "epoch": 0.28844582732187246, "grad_norm": 1.5564484785663157, "learning_rate": 8.348497039458455e-07, "loss": 0.5029, "step": 16594 }, { "epoch": 0.2884632098593753, "grad_norm": 2.43607424979954, "learning_rate": 8.348287988090681e-07, "loss": 0.3725, "step": 16595 }, { "epoch": 0.2884805923968781, "grad_norm": 1.7334040089550844, "learning_rate": 8.348078926110372e-07, "loss": 0.2958, "step": 16596 }, { "epoch": 0.2884979749343809, "grad_norm": 2.4990263220935263, "learning_rate": 8.34786985351819e-07, "loss": 0.639, "step": 16597 }, { "epoch": 0.2885153574718837, "grad_norm": 2.1817162712671463, "learning_rate": 8.347660770314798e-07, "loss": 0.4809, "step": 16598 }, { "epoch": 0.28853274000938656, "grad_norm": 2.1106487557247764, "learning_rate": 8.34745167650086e-07, "loss": 0.3996, "step": 16599 }, { "epoch": 0.2885501225468894, "grad_norm": 2.423615914861627, "learning_rate": 8.347242572077037e-07, "loss": 0.485, "step": 16600 }, { "epoch": 0.2885675050843922, "grad_norm": 1.233434397271677, "learning_rate": 8.347033457043995e-07, "loss": 0.51, "step": 16601 }, { "epoch": 0.28858488762189505, "grad_norm": 4.743290629772411, "learning_rate": 8.346824331402392e-07, "loss": 0.3382, "step": 16602 }, { "epoch": 0.2886022701593979, "grad_norm": 1.4387992851886147, "learning_rate": 8.346615195152895e-07, "loss": 0.2791, "step": 16603 }, { "epoch": 0.2886196526969007, "grad_norm": 1.8840492118458345, "learning_rate": 8.346406048296164e-07, "loss": 0.3221, "step": 16604 }, { "epoch": 0.28863703523440354, "grad_norm": 1.8666870201858323, "learning_rate": 8.346196890832863e-07, "loss": 0.5166, "step": 16605 }, { "epoch": 0.28865441777190637, "grad_norm": 3.390294190833414, "learning_rate": 8.345987722763655e-07, "loss": 0.4927, "step": 16606 }, { "epoch": 0.28867180030940914, "grad_norm": 2.2218118772833213, "learning_rate": 8.345778544089203e-07, "loss": 0.29, "step": 16607 }, { "epoch": 0.288689182846912, "grad_norm": 2.7069427466028175, "learning_rate": 8.345569354810171e-07, "loss": 0.4876, "step": 16608 }, { "epoch": 0.2887065653844148, "grad_norm": 2.1262128119881543, "learning_rate": 8.345360154927219e-07, "loss": 0.5362, "step": 16609 }, { "epoch": 0.28872394792191763, "grad_norm": 1.5457182058567118, "learning_rate": 8.345150944441013e-07, "loss": 0.2738, "step": 16610 }, { "epoch": 0.28874133045942046, "grad_norm": 1.5803318217392948, "learning_rate": 8.344941723352217e-07, "loss": 0.2972, "step": 16611 }, { "epoch": 0.2887587129969233, "grad_norm": 2.4888145807706965, "learning_rate": 8.344732491661491e-07, "loss": 0.3827, "step": 16612 }, { "epoch": 0.2887760955344261, "grad_norm": 2.3563190505843767, "learning_rate": 8.344523249369497e-07, "loss": 0.3587, "step": 16613 }, { "epoch": 0.28879347807192896, "grad_norm": 1.4101307253071937, "learning_rate": 8.344313996476904e-07, "loss": 0.3847, "step": 16614 }, { "epoch": 0.2888108606094318, "grad_norm": 2.0241019749274938, "learning_rate": 8.344104732984369e-07, "loss": 0.3768, "step": 16615 }, { "epoch": 0.2888282431469346, "grad_norm": 1.5071320310648797, "learning_rate": 8.34389545889256e-07, "loss": 0.3192, "step": 16616 }, { "epoch": 0.2888456256844374, "grad_norm": 1.9075182102912254, "learning_rate": 8.343686174202138e-07, "loss": 0.2421, "step": 16617 }, { "epoch": 0.2888630082219402, "grad_norm": 2.338590161096024, "learning_rate": 8.343476878913767e-07, "loss": 0.3714, "step": 16618 }, { "epoch": 0.28888039075944305, "grad_norm": 1.6076811434682496, "learning_rate": 8.343267573028111e-07, "loss": 0.4357, "step": 16619 }, { "epoch": 0.2888977732969459, "grad_norm": 2.7261043058876364, "learning_rate": 8.343058256545832e-07, "loss": 0.3125, "step": 16620 }, { "epoch": 0.2889151558344487, "grad_norm": 1.7282787780441322, "learning_rate": 8.342848929467593e-07, "loss": 0.2538, "step": 16621 }, { "epoch": 0.28893253837195154, "grad_norm": 1.2740844527602242, "learning_rate": 8.342639591794059e-07, "loss": 0.4516, "step": 16622 }, { "epoch": 0.28894992090945437, "grad_norm": 1.4913984021489997, "learning_rate": 8.342430243525893e-07, "loss": 0.2629, "step": 16623 }, { "epoch": 0.2889673034469572, "grad_norm": 1.9461725565417443, "learning_rate": 8.34222088466376e-07, "loss": 0.5082, "step": 16624 }, { "epoch": 0.28898468598446003, "grad_norm": 2.0129131445535084, "learning_rate": 8.34201151520832e-07, "loss": 0.5176, "step": 16625 }, { "epoch": 0.28900206852196286, "grad_norm": 1.4713931674627005, "learning_rate": 8.34180213516024e-07, "loss": 0.3271, "step": 16626 }, { "epoch": 0.28901945105946564, "grad_norm": 1.9179439698049519, "learning_rate": 8.34159274452018e-07, "loss": 0.3057, "step": 16627 }, { "epoch": 0.28903683359696847, "grad_norm": 1.580458166274092, "learning_rate": 8.341383343288809e-07, "loss": 0.2268, "step": 16628 }, { "epoch": 0.2890542161344713, "grad_norm": 1.977176141698844, "learning_rate": 8.341173931466788e-07, "loss": 0.3077, "step": 16629 }, { "epoch": 0.28907159867197413, "grad_norm": 2.736304759883726, "learning_rate": 8.340964509054777e-07, "loss": 0.2211, "step": 16630 }, { "epoch": 0.28908898120947696, "grad_norm": 1.3214551884956998, "learning_rate": 8.340755076053447e-07, "loss": 0.1782, "step": 16631 }, { "epoch": 0.2891063637469798, "grad_norm": 2.3660112964620716, "learning_rate": 8.340545632463455e-07, "loss": 0.2914, "step": 16632 }, { "epoch": 0.2891237462844826, "grad_norm": 2.8603398460380385, "learning_rate": 8.340336178285468e-07, "loss": 0.486, "step": 16633 }, { "epoch": 0.28914112882198545, "grad_norm": 1.4013397607321851, "learning_rate": 8.340126713520151e-07, "loss": 0.1883, "step": 16634 }, { "epoch": 0.2891585113594883, "grad_norm": 1.4415606206530638, "learning_rate": 8.339917238168169e-07, "loss": 0.2569, "step": 16635 }, { "epoch": 0.2891758938969911, "grad_norm": 1.3924015379713792, "learning_rate": 8.339707752230178e-07, "loss": 0.5834, "step": 16636 }, { "epoch": 0.2891932764344939, "grad_norm": 1.2113315283814954, "learning_rate": 8.339498255706851e-07, "loss": 0.2855, "step": 16637 }, { "epoch": 0.2892106589719967, "grad_norm": 2.0692530852264372, "learning_rate": 8.33928874859885e-07, "loss": 0.4072, "step": 16638 }, { "epoch": 0.28922804150949954, "grad_norm": 1.423964008107112, "learning_rate": 8.339079230906835e-07, "loss": 0.2754, "step": 16639 }, { "epoch": 0.2892454240470024, "grad_norm": 2.0668577798630055, "learning_rate": 8.338869702631472e-07, "loss": 0.4147, "step": 16640 }, { "epoch": 0.2892628065845052, "grad_norm": 2.0370077621564673, "learning_rate": 8.338660163773427e-07, "loss": 0.2781, "step": 16641 }, { "epoch": 0.28928018912200804, "grad_norm": 2.2068637391082073, "learning_rate": 8.338450614333364e-07, "loss": 0.4177, "step": 16642 }, { "epoch": 0.28929757165951087, "grad_norm": 1.3581572469355399, "learning_rate": 8.338241054311946e-07, "loss": 0.2978, "step": 16643 }, { "epoch": 0.2893149541970137, "grad_norm": 1.8253751497434727, "learning_rate": 8.338031483709836e-07, "loss": 0.243, "step": 16644 }, { "epoch": 0.2893323367345165, "grad_norm": 1.9014949831721977, "learning_rate": 8.337821902527699e-07, "loss": 0.5561, "step": 16645 }, { "epoch": 0.28934971927201936, "grad_norm": 1.9919615021382029, "learning_rate": 8.337612310766202e-07, "loss": 0.2561, "step": 16646 }, { "epoch": 0.28936710180952213, "grad_norm": 1.783333380689559, "learning_rate": 8.337402708426007e-07, "loss": 0.5984, "step": 16647 }, { "epoch": 0.28938448434702496, "grad_norm": 1.7521608688870778, "learning_rate": 8.337193095507778e-07, "loss": 0.5261, "step": 16648 }, { "epoch": 0.2894018668845278, "grad_norm": 1.7948633792692887, "learning_rate": 8.336983472012178e-07, "loss": 0.2602, "step": 16649 }, { "epoch": 0.2894192494220306, "grad_norm": 2.1165781661248424, "learning_rate": 8.336773837939877e-07, "loss": 0.4796, "step": 16650 }, { "epoch": 0.28943663195953345, "grad_norm": 3.775521085711404, "learning_rate": 8.336564193291532e-07, "loss": 0.4777, "step": 16651 }, { "epoch": 0.2894540144970363, "grad_norm": 1.4132915703006972, "learning_rate": 8.336354538067813e-07, "loss": 0.3526, "step": 16652 }, { "epoch": 0.2894713970345391, "grad_norm": 1.5370650550473761, "learning_rate": 8.336144872269383e-07, "loss": 0.3587, "step": 16653 }, { "epoch": 0.28948877957204194, "grad_norm": 2.550455667201696, "learning_rate": 8.335935195896906e-07, "loss": 0.5951, "step": 16654 }, { "epoch": 0.2895061621095448, "grad_norm": 2.4115764817319896, "learning_rate": 8.335725508951046e-07, "loss": 0.4042, "step": 16655 }, { "epoch": 0.2895235446470476, "grad_norm": 2.4768103035948887, "learning_rate": 8.335515811432468e-07, "loss": 0.4986, "step": 16656 }, { "epoch": 0.2895409271845504, "grad_norm": 1.6774137772784268, "learning_rate": 8.33530610334184e-07, "loss": 0.3521, "step": 16657 }, { "epoch": 0.2895583097220532, "grad_norm": 1.6650198641449903, "learning_rate": 8.335096384679821e-07, "loss": 0.5212, "step": 16658 }, { "epoch": 0.28957569225955604, "grad_norm": 2.1207745429866542, "learning_rate": 8.334886655447077e-07, "loss": 0.5783, "step": 16659 }, { "epoch": 0.28959307479705887, "grad_norm": 2.465481327083439, "learning_rate": 8.334676915644279e-07, "loss": 0.2726, "step": 16660 }, { "epoch": 0.2896104573345617, "grad_norm": 2.1007933033083277, "learning_rate": 8.334467165272083e-07, "loss": 0.5375, "step": 16661 }, { "epoch": 0.28962783987206453, "grad_norm": 1.8339853474117134, "learning_rate": 8.33425740433116e-07, "loss": 0.228, "step": 16662 }, { "epoch": 0.28964522240956736, "grad_norm": 3.0491343760765073, "learning_rate": 8.334047632822172e-07, "loss": 0.3111, "step": 16663 }, { "epoch": 0.2896626049470702, "grad_norm": 2.3493004780810725, "learning_rate": 8.333837850745784e-07, "loss": 0.3988, "step": 16664 }, { "epoch": 0.289679987484573, "grad_norm": 1.19457450893612, "learning_rate": 8.333628058102662e-07, "loss": 0.2993, "step": 16665 }, { "epoch": 0.2896973700220758, "grad_norm": 1.5292056586154201, "learning_rate": 8.333418254893472e-07, "loss": 0.3675, "step": 16666 }, { "epoch": 0.2897147525595786, "grad_norm": 1.6946548874509053, "learning_rate": 8.333208441118875e-07, "loss": 0.342, "step": 16667 }, { "epoch": 0.28973213509708146, "grad_norm": 2.1633977321979247, "learning_rate": 8.332998616779539e-07, "loss": 0.3496, "step": 16668 }, { "epoch": 0.2897495176345843, "grad_norm": 1.6655544534757356, "learning_rate": 8.332788781876128e-07, "loss": 0.2917, "step": 16669 }, { "epoch": 0.2897669001720871, "grad_norm": 5.6660877912607255, "learning_rate": 8.33257893640931e-07, "loss": 0.4315, "step": 16670 }, { "epoch": 0.28978428270958995, "grad_norm": 0.950190028592503, "learning_rate": 8.332369080379745e-07, "loss": 0.4868, "step": 16671 }, { "epoch": 0.2898016652470928, "grad_norm": 2.7656429876615487, "learning_rate": 8.332159213788101e-07, "loss": 0.2533, "step": 16672 }, { "epoch": 0.2898190477845956, "grad_norm": 1.5261008957312485, "learning_rate": 8.331949336635044e-07, "loss": 0.2076, "step": 16673 }, { "epoch": 0.28983643032209844, "grad_norm": 2.6814603695201003, "learning_rate": 8.331739448921236e-07, "loss": 0.278, "step": 16674 }, { "epoch": 0.28985381285960127, "grad_norm": 1.8756693483539553, "learning_rate": 8.331529550647347e-07, "loss": 0.3712, "step": 16675 }, { "epoch": 0.28987119539710404, "grad_norm": 1.425927080274337, "learning_rate": 8.331319641814039e-07, "loss": 0.4571, "step": 16676 }, { "epoch": 0.2898885779346069, "grad_norm": 0.9965549184770962, "learning_rate": 8.331109722421977e-07, "loss": 0.4121, "step": 16677 }, { "epoch": 0.2899059604721097, "grad_norm": 1.427089337092178, "learning_rate": 8.330899792471828e-07, "loss": 0.324, "step": 16678 }, { "epoch": 0.28992334300961253, "grad_norm": 1.6704984988872782, "learning_rate": 8.330689851964257e-07, "loss": 0.3147, "step": 16679 }, { "epoch": 0.28994072554711536, "grad_norm": 1.78635519289518, "learning_rate": 8.330479900899929e-07, "loss": 0.3827, "step": 16680 }, { "epoch": 0.2899581080846182, "grad_norm": 1.5123451856968102, "learning_rate": 8.33026993927951e-07, "loss": 0.2237, "step": 16681 }, { "epoch": 0.289975490622121, "grad_norm": 1.5444086432365352, "learning_rate": 8.330059967103666e-07, "loss": 0.2767, "step": 16682 }, { "epoch": 0.28999287315962385, "grad_norm": 1.6683354318256336, "learning_rate": 8.32984998437306e-07, "loss": 0.3272, "step": 16683 }, { "epoch": 0.2900102556971267, "grad_norm": 1.5977259129284653, "learning_rate": 8.32963999108836e-07, "loss": 0.4151, "step": 16684 }, { "epoch": 0.2900276382346295, "grad_norm": 1.6553887155402456, "learning_rate": 8.32942998725023e-07, "loss": 0.5551, "step": 16685 }, { "epoch": 0.2900450207721323, "grad_norm": 1.5683131575331062, "learning_rate": 8.329219972859336e-07, "loss": 0.1951, "step": 16686 }, { "epoch": 0.2900624033096351, "grad_norm": 0.7979222979222969, "learning_rate": 8.329009947916346e-07, "loss": 0.1944, "step": 16687 }, { "epoch": 0.29007978584713795, "grad_norm": 2.5794195711593937, "learning_rate": 8.328799912421922e-07, "loss": 0.4029, "step": 16688 }, { "epoch": 0.2900971683846408, "grad_norm": 1.2677434405629506, "learning_rate": 8.328589866376732e-07, "loss": 0.4295, "step": 16689 }, { "epoch": 0.2901145509221436, "grad_norm": 1.2727563923861807, "learning_rate": 8.32837980978144e-07, "loss": 0.351, "step": 16690 }, { "epoch": 0.29013193345964644, "grad_norm": 2.0377371652796983, "learning_rate": 8.328169742636715e-07, "loss": 0.3373, "step": 16691 }, { "epoch": 0.29014931599714927, "grad_norm": 1.1248335203021658, "learning_rate": 8.327959664943218e-07, "loss": 0.1054, "step": 16692 }, { "epoch": 0.2901666985346521, "grad_norm": 1.9676308415884196, "learning_rate": 8.327749576701619e-07, "loss": 0.2627, "step": 16693 }, { "epoch": 0.29018408107215493, "grad_norm": 2.4906040791826727, "learning_rate": 8.327539477912583e-07, "loss": 0.4217, "step": 16694 }, { "epoch": 0.29020146360965776, "grad_norm": 3.518539710343157, "learning_rate": 8.327329368576775e-07, "loss": 0.308, "step": 16695 }, { "epoch": 0.29021884614716054, "grad_norm": 1.54965729295212, "learning_rate": 8.327119248694861e-07, "loss": 0.2032, "step": 16696 }, { "epoch": 0.29023622868466337, "grad_norm": 1.2741296198006462, "learning_rate": 8.326909118267509e-07, "loss": 0.2219, "step": 16697 }, { "epoch": 0.2902536112221662, "grad_norm": 1.9707794906173255, "learning_rate": 8.326698977295381e-07, "loss": 0.4927, "step": 16698 }, { "epoch": 0.290270993759669, "grad_norm": 1.4579169909196197, "learning_rate": 8.326488825779148e-07, "loss": 0.3467, "step": 16699 }, { "epoch": 0.29028837629717186, "grad_norm": 1.0586595737820368, "learning_rate": 8.32627866371947e-07, "loss": 0.2093, "step": 16700 }, { "epoch": 0.2903057588346747, "grad_norm": 1.7345455726953145, "learning_rate": 8.32606849111702e-07, "loss": 0.2808, "step": 16701 }, { "epoch": 0.2903231413721775, "grad_norm": 1.1983304956110865, "learning_rate": 8.325858307972459e-07, "loss": 0.3649, "step": 16702 }, { "epoch": 0.29034052390968035, "grad_norm": 3.904421193840745, "learning_rate": 8.325648114286454e-07, "loss": 0.4626, "step": 16703 }, { "epoch": 0.2903579064471832, "grad_norm": 2.537157257652365, "learning_rate": 8.325437910059675e-07, "loss": 0.2494, "step": 16704 }, { "epoch": 0.290375288984686, "grad_norm": 2.0748392870784738, "learning_rate": 8.325227695292784e-07, "loss": 0.2658, "step": 16705 }, { "epoch": 0.2903926715221888, "grad_norm": 1.8457395649484012, "learning_rate": 8.325017469986448e-07, "loss": 0.3312, "step": 16706 }, { "epoch": 0.2904100540596916, "grad_norm": 1.7892737016827864, "learning_rate": 8.324807234141336e-07, "loss": 0.2539, "step": 16707 }, { "epoch": 0.29042743659719444, "grad_norm": 1.6738610290694813, "learning_rate": 8.32459698775811e-07, "loss": 0.4303, "step": 16708 }, { "epoch": 0.2904448191346973, "grad_norm": 1.9259610895455337, "learning_rate": 8.32438673083744e-07, "loss": 0.4041, "step": 16709 }, { "epoch": 0.2904622016722001, "grad_norm": 1.1864036190094045, "learning_rate": 8.324176463379991e-07, "loss": 0.3099, "step": 16710 }, { "epoch": 0.29047958420970293, "grad_norm": 1.4873362793979183, "learning_rate": 8.323966185386428e-07, "loss": 0.281, "step": 16711 }, { "epoch": 0.29049696674720576, "grad_norm": 4.469851581215864, "learning_rate": 8.32375589685742e-07, "loss": 0.3048, "step": 16712 }, { "epoch": 0.2905143492847086, "grad_norm": 1.5752956659962916, "learning_rate": 8.323545597793636e-07, "loss": 0.397, "step": 16713 }, { "epoch": 0.2905317318222114, "grad_norm": 3.0212395740515157, "learning_rate": 8.323335288195734e-07, "loss": 0.4002, "step": 16714 }, { "epoch": 0.29054911435971426, "grad_norm": 1.8639529411925637, "learning_rate": 8.323124968064389e-07, "loss": 0.4241, "step": 16715 }, { "epoch": 0.29056649689721703, "grad_norm": 2.4886986577595565, "learning_rate": 8.322914637400262e-07, "loss": 0.6379, "step": 16716 }, { "epoch": 0.29058387943471986, "grad_norm": 2.116693789375051, "learning_rate": 8.322704296204026e-07, "loss": 0.4683, "step": 16717 }, { "epoch": 0.2906012619722227, "grad_norm": 2.7615388209715976, "learning_rate": 8.32249394447634e-07, "loss": 0.4699, "step": 16718 }, { "epoch": 0.2906186445097255, "grad_norm": 2.2283932012330236, "learning_rate": 8.322283582217875e-07, "loss": 0.4777, "step": 16719 }, { "epoch": 0.29063602704722835, "grad_norm": 1.8936263727997815, "learning_rate": 8.322073209429299e-07, "loss": 0.2691, "step": 16720 }, { "epoch": 0.2906534095847312, "grad_norm": 2.0963829409056327, "learning_rate": 8.321862826111274e-07, "loss": 0.3434, "step": 16721 }, { "epoch": 0.290670792122234, "grad_norm": 1.6000853889218352, "learning_rate": 8.321652432264472e-07, "loss": 0.2941, "step": 16722 }, { "epoch": 0.29068817465973684, "grad_norm": 1.7089978332642337, "learning_rate": 8.321442027889558e-07, "loss": 0.38, "step": 16723 }, { "epoch": 0.2907055571972397, "grad_norm": 3.0506313728580152, "learning_rate": 8.321231612987197e-07, "loss": 0.3244, "step": 16724 }, { "epoch": 0.2907229397347425, "grad_norm": 2.0170594201123935, "learning_rate": 8.321021187558058e-07, "loss": 0.3788, "step": 16725 }, { "epoch": 0.2907403222722453, "grad_norm": 2.560764835509614, "learning_rate": 8.320810751602808e-07, "loss": 0.3055, "step": 16726 }, { "epoch": 0.2907577048097481, "grad_norm": 1.2132296649280536, "learning_rate": 8.320600305122112e-07, "loss": 0.304, "step": 16727 }, { "epoch": 0.29077508734725094, "grad_norm": 2.072835465727506, "learning_rate": 8.32038984811664e-07, "loss": 0.5701, "step": 16728 }, { "epoch": 0.29079246988475377, "grad_norm": 2.027594346784718, "learning_rate": 8.320179380587057e-07, "loss": 0.4802, "step": 16729 }, { "epoch": 0.2908098524222566, "grad_norm": 1.611963992159758, "learning_rate": 8.319968902534029e-07, "loss": 0.3116, "step": 16730 }, { "epoch": 0.29082723495975943, "grad_norm": 2.1691037754752056, "learning_rate": 8.319758413958227e-07, "loss": 0.1877, "step": 16731 }, { "epoch": 0.29084461749726226, "grad_norm": 1.5589239979100646, "learning_rate": 8.319547914860315e-07, "loss": 0.3704, "step": 16732 }, { "epoch": 0.2908620000347651, "grad_norm": 2.0852668834028796, "learning_rate": 8.31933740524096e-07, "loss": 0.5633, "step": 16733 }, { "epoch": 0.2908793825722679, "grad_norm": 1.3294057375544521, "learning_rate": 8.319126885100831e-07, "loss": 0.2969, "step": 16734 }, { "epoch": 0.29089676510977075, "grad_norm": 1.744953134498287, "learning_rate": 8.318916354440594e-07, "loss": 0.3686, "step": 16735 }, { "epoch": 0.2909141476472735, "grad_norm": 1.334444337505894, "learning_rate": 8.318705813260917e-07, "loss": 0.2824, "step": 16736 }, { "epoch": 0.29093153018477635, "grad_norm": 0.9969271736442624, "learning_rate": 8.318495261562468e-07, "loss": 0.2393, "step": 16737 }, { "epoch": 0.2909489127222792, "grad_norm": 1.8329431194258081, "learning_rate": 8.318284699345912e-07, "loss": 0.2749, "step": 16738 }, { "epoch": 0.290966295259782, "grad_norm": 1.6996350819128399, "learning_rate": 8.318074126611918e-07, "loss": 0.4362, "step": 16739 }, { "epoch": 0.29098367779728485, "grad_norm": 1.8821213246977768, "learning_rate": 8.317863543361155e-07, "loss": 0.3128, "step": 16740 }, { "epoch": 0.2910010603347877, "grad_norm": 3.2268647943449467, "learning_rate": 8.317652949594286e-07, "loss": 0.5785, "step": 16741 }, { "epoch": 0.2910184428722905, "grad_norm": 1.9031181180270538, "learning_rate": 8.317442345311983e-07, "loss": 0.3941, "step": 16742 }, { "epoch": 0.29103582540979334, "grad_norm": 1.268437060769732, "learning_rate": 8.317231730514911e-07, "loss": 0.3959, "step": 16743 }, { "epoch": 0.29105320794729617, "grad_norm": 2.4353361369033424, "learning_rate": 8.317021105203738e-07, "loss": 0.4603, "step": 16744 }, { "epoch": 0.291070590484799, "grad_norm": 1.6632220497000243, "learning_rate": 8.316810469379132e-07, "loss": 0.3089, "step": 16745 }, { "epoch": 0.29108797302230177, "grad_norm": 1.3511262914702258, "learning_rate": 8.316599823041761e-07, "loss": 0.4112, "step": 16746 }, { "epoch": 0.2911053555598046, "grad_norm": 3.1516090141300173, "learning_rate": 8.31638916619229e-07, "loss": 0.3737, "step": 16747 }, { "epoch": 0.29112273809730743, "grad_norm": 1.493536474498016, "learning_rate": 8.316178498831392e-07, "loss": 0.3338, "step": 16748 }, { "epoch": 0.29114012063481026, "grad_norm": 2.152288739592492, "learning_rate": 8.315967820959729e-07, "loss": 0.1785, "step": 16749 }, { "epoch": 0.2911575031723131, "grad_norm": 1.926685526691995, "learning_rate": 8.315757132577972e-07, "loss": 0.336, "step": 16750 }, { "epoch": 0.2911748857098159, "grad_norm": 1.3236446067565657, "learning_rate": 8.315546433686788e-07, "loss": 0.3619, "step": 16751 }, { "epoch": 0.29119226824731875, "grad_norm": 1.7385052121091082, "learning_rate": 8.315335724286845e-07, "loss": 0.3135, "step": 16752 }, { "epoch": 0.2912096507848216, "grad_norm": 1.6716109605241167, "learning_rate": 8.315125004378811e-07, "loss": 0.3298, "step": 16753 }, { "epoch": 0.2912270333223244, "grad_norm": 1.4313485187258652, "learning_rate": 8.314914273963354e-07, "loss": 0.3991, "step": 16754 }, { "epoch": 0.29124441585982724, "grad_norm": 1.2731864131853952, "learning_rate": 8.31470353304114e-07, "loss": 0.3471, "step": 16755 }, { "epoch": 0.29126179839733, "grad_norm": 1.845859607147332, "learning_rate": 8.314492781612839e-07, "loss": 0.4281, "step": 16756 }, { "epoch": 0.29127918093483285, "grad_norm": 1.8790781044836213, "learning_rate": 8.314282019679119e-07, "loss": 0.4062, "step": 16757 }, { "epoch": 0.2912965634723357, "grad_norm": 2.3638430495778526, "learning_rate": 8.314071247240647e-07, "loss": 0.4958, "step": 16758 }, { "epoch": 0.2913139460098385, "grad_norm": 1.8791856797521411, "learning_rate": 8.313860464298092e-07, "loss": 0.5036, "step": 16759 }, { "epoch": 0.29133132854734134, "grad_norm": 2.3509644133062486, "learning_rate": 8.313649670852121e-07, "loss": 0.3198, "step": 16760 }, { "epoch": 0.29134871108484417, "grad_norm": 2.179393374178701, "learning_rate": 8.313438866903403e-07, "loss": 0.2952, "step": 16761 }, { "epoch": 0.291366093622347, "grad_norm": 1.2886614782116617, "learning_rate": 8.313228052452605e-07, "loss": 0.2312, "step": 16762 }, { "epoch": 0.29138347615984983, "grad_norm": 1.8027856628921006, "learning_rate": 8.313017227500396e-07, "loss": 0.395, "step": 16763 }, { "epoch": 0.29140085869735266, "grad_norm": 2.0800404263132637, "learning_rate": 8.312806392047447e-07, "loss": 0.3129, "step": 16764 }, { "epoch": 0.2914182412348555, "grad_norm": 1.3983955274454285, "learning_rate": 8.312595546094421e-07, "loss": 0.2917, "step": 16765 }, { "epoch": 0.29143562377235827, "grad_norm": 1.5531123019275235, "learning_rate": 8.31238468964199e-07, "loss": 0.2504, "step": 16766 }, { "epoch": 0.2914530063098611, "grad_norm": 2.544753864885042, "learning_rate": 8.31217382269082e-07, "loss": 0.2779, "step": 16767 }, { "epoch": 0.2914703888473639, "grad_norm": 1.6593399818162269, "learning_rate": 8.31196294524158e-07, "loss": 0.3413, "step": 16768 }, { "epoch": 0.29148777138486676, "grad_norm": 1.2935668018628073, "learning_rate": 8.31175205729494e-07, "loss": 0.2236, "step": 16769 }, { "epoch": 0.2915051539223696, "grad_norm": 1.7973785933875985, "learning_rate": 8.311541158851567e-07, "loss": 0.3173, "step": 16770 }, { "epoch": 0.2915225364598724, "grad_norm": 1.289479376590155, "learning_rate": 8.31133024991213e-07, "loss": 0.2747, "step": 16771 }, { "epoch": 0.29153991899737525, "grad_norm": 2.471864222352114, "learning_rate": 8.311119330477296e-07, "loss": 0.3885, "step": 16772 }, { "epoch": 0.2915573015348781, "grad_norm": 1.7852472666145849, "learning_rate": 8.310908400547738e-07, "loss": 0.2647, "step": 16773 }, { "epoch": 0.2915746840723809, "grad_norm": 2.1334532511709896, "learning_rate": 8.310697460124119e-07, "loss": 0.4598, "step": 16774 }, { "epoch": 0.29159206660988374, "grad_norm": 1.5173095250852615, "learning_rate": 8.310486509207109e-07, "loss": 0.2344, "step": 16775 }, { "epoch": 0.2916094491473865, "grad_norm": 1.4569883984223069, "learning_rate": 8.310275547797378e-07, "loss": 0.4718, "step": 16776 }, { "epoch": 0.29162683168488934, "grad_norm": 1.2379992911620643, "learning_rate": 8.310064575895595e-07, "loss": 0.3268, "step": 16777 }, { "epoch": 0.2916442142223922, "grad_norm": 1.297545928003635, "learning_rate": 8.309853593502427e-07, "loss": 0.3769, "step": 16778 }, { "epoch": 0.291661596759895, "grad_norm": 4.088516755645884, "learning_rate": 8.309642600618544e-07, "loss": 0.1822, "step": 16779 }, { "epoch": 0.29167897929739783, "grad_norm": 1.335738516896259, "learning_rate": 8.309431597244614e-07, "loss": 0.3207, "step": 16780 }, { "epoch": 0.29169636183490066, "grad_norm": 1.3107528692673522, "learning_rate": 8.309220583381307e-07, "loss": 0.2847, "step": 16781 }, { "epoch": 0.2917137443724035, "grad_norm": 2.4454384846579584, "learning_rate": 8.30900955902929e-07, "loss": 0.3191, "step": 16782 }, { "epoch": 0.2917311269099063, "grad_norm": 1.5169713460973842, "learning_rate": 8.308798524189233e-07, "loss": 0.2892, "step": 16783 }, { "epoch": 0.29174850944740915, "grad_norm": 1.3700283364838033, "learning_rate": 8.308587478861804e-07, "loss": 0.2667, "step": 16784 }, { "epoch": 0.291765891984912, "grad_norm": 1.4867286941744227, "learning_rate": 8.308376423047671e-07, "loss": 0.2381, "step": 16785 }, { "epoch": 0.29178327452241476, "grad_norm": 1.5111934861518, "learning_rate": 8.308165356747508e-07, "loss": 0.4371, "step": 16786 }, { "epoch": 0.2918006570599176, "grad_norm": 1.9741977089321794, "learning_rate": 8.307954279961979e-07, "loss": 0.4034, "step": 16787 }, { "epoch": 0.2918180395974204, "grad_norm": 1.0926409420375196, "learning_rate": 8.307743192691754e-07, "loss": 0.3185, "step": 16788 }, { "epoch": 0.29183542213492325, "grad_norm": 2.8467596566035853, "learning_rate": 8.307532094937504e-07, "loss": 0.3577, "step": 16789 }, { "epoch": 0.2918528046724261, "grad_norm": 2.4126615869392536, "learning_rate": 8.307320986699895e-07, "loss": 0.4418, "step": 16790 }, { "epoch": 0.2918701872099289, "grad_norm": 1.585247741167506, "learning_rate": 8.307109867979599e-07, "loss": 0.2578, "step": 16791 }, { "epoch": 0.29188756974743174, "grad_norm": 1.7483171667194866, "learning_rate": 8.306898738777282e-07, "loss": 0.3346, "step": 16792 }, { "epoch": 0.29190495228493457, "grad_norm": 1.6521455713174131, "learning_rate": 8.306687599093616e-07, "loss": 0.4305, "step": 16793 }, { "epoch": 0.2919223348224374, "grad_norm": 2.251258302775206, "learning_rate": 8.30647644892927e-07, "loss": 0.2485, "step": 16794 }, { "epoch": 0.29193971735994023, "grad_norm": 2.122024835810601, "learning_rate": 8.30626528828491e-07, "loss": 0.3446, "step": 16795 }, { "epoch": 0.291957099897443, "grad_norm": 1.3947680873498594, "learning_rate": 8.306054117161212e-07, "loss": 0.1945, "step": 16796 }, { "epoch": 0.29197448243494584, "grad_norm": 1.7353994299959348, "learning_rate": 8.305842935558838e-07, "loss": 0.263, "step": 16797 }, { "epoch": 0.29199186497244867, "grad_norm": 1.5469861715175106, "learning_rate": 8.30563174347846e-07, "loss": 0.2362, "step": 16798 }, { "epoch": 0.2920092475099515, "grad_norm": 1.639721531825677, "learning_rate": 8.305420540920751e-07, "loss": 0.4475, "step": 16799 }, { "epoch": 0.2920266300474543, "grad_norm": 2.071529584578182, "learning_rate": 8.305209327886376e-07, "loss": 0.2083, "step": 16800 }, { "epoch": 0.29204401258495716, "grad_norm": 1.7308966369336138, "learning_rate": 8.304998104376004e-07, "loss": 0.3479, "step": 16801 }, { "epoch": 0.29206139512246, "grad_norm": 2.1563528320713514, "learning_rate": 8.304786870390308e-07, "loss": 0.4497, "step": 16802 }, { "epoch": 0.2920787776599628, "grad_norm": 1.930217540047658, "learning_rate": 8.304575625929955e-07, "loss": 0.2923, "step": 16803 }, { "epoch": 0.29209616019746565, "grad_norm": 3.573942316117953, "learning_rate": 8.304364370995616e-07, "loss": 0.5935, "step": 16804 }, { "epoch": 0.2921135427349684, "grad_norm": 1.4970651856954664, "learning_rate": 8.304153105587959e-07, "loss": 0.2143, "step": 16805 }, { "epoch": 0.29213092527247125, "grad_norm": 2.1213578664811568, "learning_rate": 8.303941829707655e-07, "loss": 0.5399, "step": 16806 }, { "epoch": 0.2921483078099741, "grad_norm": 1.1616983127752156, "learning_rate": 8.303730543355373e-07, "loss": 0.4664, "step": 16807 }, { "epoch": 0.2921656903474769, "grad_norm": 1.8313659934482363, "learning_rate": 8.303519246531782e-07, "loss": 0.3127, "step": 16808 }, { "epoch": 0.29218307288497974, "grad_norm": 1.4933654668190426, "learning_rate": 8.303307939237554e-07, "loss": 0.3034, "step": 16809 }, { "epoch": 0.2922004554224826, "grad_norm": 2.0754945079907405, "learning_rate": 8.303096621473356e-07, "loss": 0.244, "step": 16810 }, { "epoch": 0.2922178379599854, "grad_norm": 1.5197311159049902, "learning_rate": 8.30288529323986e-07, "loss": 0.4613, "step": 16811 }, { "epoch": 0.29223522049748824, "grad_norm": 1.854662193917037, "learning_rate": 8.302673954537733e-07, "loss": 0.5906, "step": 16812 }, { "epoch": 0.29225260303499107, "grad_norm": 1.991869088391996, "learning_rate": 8.302462605367649e-07, "loss": 0.4272, "step": 16813 }, { "epoch": 0.2922699855724939, "grad_norm": 1.863192759542293, "learning_rate": 8.302251245730274e-07, "loss": 0.2888, "step": 16814 }, { "epoch": 0.29228736810999667, "grad_norm": 1.7856643936705345, "learning_rate": 8.302039875626279e-07, "loss": 0.2892, "step": 16815 }, { "epoch": 0.2923047506474995, "grad_norm": 2.293824086886931, "learning_rate": 8.301828495056335e-07, "loss": 0.206, "step": 16816 }, { "epoch": 0.29232213318500233, "grad_norm": 1.2234980283273917, "learning_rate": 8.301617104021112e-07, "loss": 0.4356, "step": 16817 }, { "epoch": 0.29233951572250516, "grad_norm": 1.7945781029278103, "learning_rate": 8.30140570252128e-07, "loss": 0.6567, "step": 16818 }, { "epoch": 0.292356898260008, "grad_norm": 1.2328619705942914, "learning_rate": 8.301194290557507e-07, "loss": 0.3891, "step": 16819 }, { "epoch": 0.2923742807975108, "grad_norm": 1.4730011073137539, "learning_rate": 8.300982868130466e-07, "loss": 0.5683, "step": 16820 }, { "epoch": 0.29239166333501365, "grad_norm": 1.8660232284062286, "learning_rate": 8.300771435240826e-07, "loss": 0.2551, "step": 16821 }, { "epoch": 0.2924090458725165, "grad_norm": 2.7829520871009636, "learning_rate": 8.300559991889256e-07, "loss": 0.3842, "step": 16822 }, { "epoch": 0.2924264284100193, "grad_norm": 1.577555985473696, "learning_rate": 8.300348538076425e-07, "loss": 0.3427, "step": 16823 }, { "epoch": 0.29244381094752214, "grad_norm": 2.533696146695875, "learning_rate": 8.300137073803008e-07, "loss": 0.3708, "step": 16824 }, { "epoch": 0.2924611934850249, "grad_norm": 1.8871741896120393, "learning_rate": 8.29992559906967e-07, "loss": 0.2532, "step": 16825 }, { "epoch": 0.29247857602252775, "grad_norm": 2.243052132571248, "learning_rate": 8.299714113877085e-07, "loss": 0.4917, "step": 16826 }, { "epoch": 0.2924959585600306, "grad_norm": 1.1061038911414267, "learning_rate": 8.299502618225923e-07, "loss": 0.2997, "step": 16827 }, { "epoch": 0.2925133410975334, "grad_norm": 1.5486569692439474, "learning_rate": 8.299291112116853e-07, "loss": 0.3051, "step": 16828 }, { "epoch": 0.29253072363503624, "grad_norm": 2.170449307091497, "learning_rate": 8.299079595550544e-07, "loss": 0.2981, "step": 16829 }, { "epoch": 0.29254810617253907, "grad_norm": 1.919474078039382, "learning_rate": 8.29886806852767e-07, "loss": 0.2299, "step": 16830 }, { "epoch": 0.2925654887100419, "grad_norm": 1.5689250923263072, "learning_rate": 8.298656531048899e-07, "loss": 0.2336, "step": 16831 }, { "epoch": 0.29258287124754473, "grad_norm": 1.2561308625037986, "learning_rate": 8.298444983114902e-07, "loss": 0.3981, "step": 16832 }, { "epoch": 0.29260025378504756, "grad_norm": 2.33742563916191, "learning_rate": 8.29823342472635e-07, "loss": 0.2963, "step": 16833 }, { "epoch": 0.2926176363225504, "grad_norm": 1.1826838266691486, "learning_rate": 8.298021855883912e-07, "loss": 0.2708, "step": 16834 }, { "epoch": 0.29263501886005316, "grad_norm": 2.346328503832596, "learning_rate": 8.297810276588261e-07, "loss": 0.3469, "step": 16835 }, { "epoch": 0.292652401397556, "grad_norm": 3.2729126790716645, "learning_rate": 8.297598686840065e-07, "loss": 0.4708, "step": 16836 }, { "epoch": 0.2926697839350588, "grad_norm": 2.9704861653012795, "learning_rate": 8.297387086639997e-07, "loss": 0.4075, "step": 16837 }, { "epoch": 0.29268716647256166, "grad_norm": 1.4213197287572203, "learning_rate": 8.297175475988727e-07, "loss": 0.4447, "step": 16838 }, { "epoch": 0.2927045490100645, "grad_norm": 4.0359359763038185, "learning_rate": 8.296963854886924e-07, "loss": 0.3252, "step": 16839 }, { "epoch": 0.2927219315475673, "grad_norm": 1.8420736190381666, "learning_rate": 8.296752223335261e-07, "loss": 0.1567, "step": 16840 }, { "epoch": 0.29273931408507015, "grad_norm": 1.1325066319486445, "learning_rate": 8.296540581334408e-07, "loss": 0.4207, "step": 16841 }, { "epoch": 0.292756696622573, "grad_norm": 2.247224590929644, "learning_rate": 8.296328928885034e-07, "loss": 0.2948, "step": 16842 }, { "epoch": 0.2927740791600758, "grad_norm": 1.4114536806925486, "learning_rate": 8.296117265987813e-07, "loss": 0.3417, "step": 16843 }, { "epoch": 0.29279146169757864, "grad_norm": 1.7296039668907977, "learning_rate": 8.295905592643414e-07, "loss": 0.4019, "step": 16844 }, { "epoch": 0.2928088442350814, "grad_norm": 2.1442130169808, "learning_rate": 8.295693908852509e-07, "loss": 0.4077, "step": 16845 }, { "epoch": 0.29282622677258424, "grad_norm": 1.3971279232278961, "learning_rate": 8.295482214615768e-07, "loss": 0.1852, "step": 16846 }, { "epoch": 0.29284360931008707, "grad_norm": 2.1727683967596625, "learning_rate": 8.295270509933861e-07, "loss": 0.4142, "step": 16847 }, { "epoch": 0.2928609918475899, "grad_norm": 1.05013946908688, "learning_rate": 8.295058794807461e-07, "loss": 0.2529, "step": 16848 }, { "epoch": 0.29287837438509273, "grad_norm": 1.2765470955708587, "learning_rate": 8.294847069237239e-07, "loss": 0.3469, "step": 16849 }, { "epoch": 0.29289575692259556, "grad_norm": 1.1174210207766018, "learning_rate": 8.294635333223865e-07, "loss": 0.19, "step": 16850 }, { "epoch": 0.2929131394600984, "grad_norm": 1.285243886991725, "learning_rate": 8.294423586768009e-07, "loss": 0.2232, "step": 16851 }, { "epoch": 0.2929305219976012, "grad_norm": 2.0811275641530353, "learning_rate": 8.294211829870346e-07, "loss": 0.2355, "step": 16852 }, { "epoch": 0.29294790453510405, "grad_norm": 4.362891973870475, "learning_rate": 8.294000062531543e-07, "loss": 0.4856, "step": 16853 }, { "epoch": 0.2929652870726069, "grad_norm": 1.7659183723993996, "learning_rate": 8.293788284752272e-07, "loss": 0.3522, "step": 16854 }, { "epoch": 0.29298266961010966, "grad_norm": 2.0848345561687593, "learning_rate": 8.293576496533207e-07, "loss": 0.5686, "step": 16855 }, { "epoch": 0.2930000521476125, "grad_norm": 1.8040576071842875, "learning_rate": 8.293364697875017e-07, "loss": 0.2649, "step": 16856 }, { "epoch": 0.2930174346851153, "grad_norm": 1.9068333187249755, "learning_rate": 8.293152888778374e-07, "loss": 0.4333, "step": 16857 }, { "epoch": 0.29303481722261815, "grad_norm": 1.5846046071057869, "learning_rate": 8.292941069243949e-07, "loss": 0.2975, "step": 16858 }, { "epoch": 0.293052199760121, "grad_norm": 1.8222534258612222, "learning_rate": 8.292729239272413e-07, "loss": 0.5528, "step": 16859 }, { "epoch": 0.2930695822976238, "grad_norm": 1.6881905443027487, "learning_rate": 8.292517398864438e-07, "loss": 0.5174, "step": 16860 }, { "epoch": 0.29308696483512664, "grad_norm": 1.6205903693270673, "learning_rate": 8.292305548020695e-07, "loss": 0.3992, "step": 16861 }, { "epoch": 0.29310434737262947, "grad_norm": 1.8013044318045075, "learning_rate": 8.292093686741856e-07, "loss": 0.212, "step": 16862 }, { "epoch": 0.2931217299101323, "grad_norm": 2.0285072278876215, "learning_rate": 8.291881815028593e-07, "loss": 0.258, "step": 16863 }, { "epoch": 0.29313911244763513, "grad_norm": 1.9234817304112064, "learning_rate": 8.291669932881575e-07, "loss": 0.4946, "step": 16864 }, { "epoch": 0.2931564949851379, "grad_norm": 2.0460696174560513, "learning_rate": 8.291458040301478e-07, "loss": 0.4112, "step": 16865 }, { "epoch": 0.29317387752264074, "grad_norm": 2.3924732844698955, "learning_rate": 8.29124613728897e-07, "loss": 0.6268, "step": 16866 }, { "epoch": 0.29319126006014357, "grad_norm": 1.862870858103543, "learning_rate": 8.291034223844723e-07, "loss": 0.468, "step": 16867 }, { "epoch": 0.2932086425976464, "grad_norm": 1.9009345991295292, "learning_rate": 8.290822299969409e-07, "loss": 0.4475, "step": 16868 }, { "epoch": 0.2932260251351492, "grad_norm": 2.1931174271515217, "learning_rate": 8.290610365663702e-07, "loss": 0.538, "step": 16869 }, { "epoch": 0.29324340767265206, "grad_norm": 2.0321556212081986, "learning_rate": 8.290398420928269e-07, "loss": 0.2035, "step": 16870 }, { "epoch": 0.2932607902101549, "grad_norm": 2.9134558633851326, "learning_rate": 8.290186465763786e-07, "loss": 0.2916, "step": 16871 }, { "epoch": 0.2932781727476577, "grad_norm": 2.4223832984471625, "learning_rate": 8.289974500170922e-07, "loss": 0.3275, "step": 16872 }, { "epoch": 0.29329555528516055, "grad_norm": 1.9198847888397121, "learning_rate": 8.289762524150352e-07, "loss": 0.2235, "step": 16873 }, { "epoch": 0.2933129378226634, "grad_norm": 2.001727664847527, "learning_rate": 8.289550537702745e-07, "loss": 0.3339, "step": 16874 }, { "epoch": 0.29333032036016615, "grad_norm": 2.0400301264116982, "learning_rate": 8.289338540828774e-07, "loss": 0.4281, "step": 16875 }, { "epoch": 0.293347702897669, "grad_norm": 1.4433541140338608, "learning_rate": 8.289126533529112e-07, "loss": 0.3772, "step": 16876 }, { "epoch": 0.2933650854351718, "grad_norm": 1.4381447719054072, "learning_rate": 8.288914515804428e-07, "loss": 0.3844, "step": 16877 }, { "epoch": 0.29338246797267464, "grad_norm": 2.411171332695292, "learning_rate": 8.288702487655397e-07, "loss": 0.4236, "step": 16878 }, { "epoch": 0.2933998505101775, "grad_norm": 2.5929988702922904, "learning_rate": 8.288490449082688e-07, "loss": 0.3025, "step": 16879 }, { "epoch": 0.2934172330476803, "grad_norm": 1.8717975131339022, "learning_rate": 8.288278400086976e-07, "loss": 0.4862, "step": 16880 }, { "epoch": 0.29343461558518313, "grad_norm": 3.1329007797769695, "learning_rate": 8.288066340668933e-07, "loss": 0.2098, "step": 16881 }, { "epoch": 0.29345199812268596, "grad_norm": 1.4860605244822522, "learning_rate": 8.287854270829229e-07, "loss": 0.2669, "step": 16882 }, { "epoch": 0.2934693806601888, "grad_norm": 1.4675856958586198, "learning_rate": 8.287642190568538e-07, "loss": 0.2935, "step": 16883 }, { "epoch": 0.2934867631976916, "grad_norm": 1.0040940745430313, "learning_rate": 8.287430099887532e-07, "loss": 0.3673, "step": 16884 }, { "epoch": 0.2935041457351944, "grad_norm": 2.432254295232852, "learning_rate": 8.287217998786879e-07, "loss": 0.5173, "step": 16885 }, { "epoch": 0.29352152827269723, "grad_norm": 1.630849280657747, "learning_rate": 8.287005887267259e-07, "loss": 0.4008, "step": 16886 }, { "epoch": 0.29353891081020006, "grad_norm": 1.5018982951317128, "learning_rate": 8.28679376532934e-07, "loss": 0.181, "step": 16887 }, { "epoch": 0.2935562933477029, "grad_norm": 1.4747169948639176, "learning_rate": 8.286581632973793e-07, "loss": 0.3203, "step": 16888 }, { "epoch": 0.2935736758852057, "grad_norm": 1.5111008500781513, "learning_rate": 8.286369490201293e-07, "loss": 0.2766, "step": 16889 }, { "epoch": 0.29359105842270855, "grad_norm": 1.4958336516029584, "learning_rate": 8.286157337012511e-07, "loss": 0.5445, "step": 16890 }, { "epoch": 0.2936084409602114, "grad_norm": 1.6617621903054731, "learning_rate": 8.285945173408119e-07, "loss": 0.3176, "step": 16891 }, { "epoch": 0.2936258234977142, "grad_norm": 1.3340081071381926, "learning_rate": 8.285732999388791e-07, "loss": 0.2936, "step": 16892 }, { "epoch": 0.29364320603521704, "grad_norm": 1.6950566010329622, "learning_rate": 8.285520814955198e-07, "loss": 0.2362, "step": 16893 }, { "epoch": 0.29366058857271987, "grad_norm": 2.064288581897418, "learning_rate": 8.285308620108014e-07, "loss": 0.3985, "step": 16894 }, { "epoch": 0.29367797111022265, "grad_norm": 1.549486784661113, "learning_rate": 8.285096414847911e-07, "loss": 0.3835, "step": 16895 }, { "epoch": 0.2936953536477255, "grad_norm": 2.363967750640226, "learning_rate": 8.284884199175561e-07, "loss": 0.2641, "step": 16896 }, { "epoch": 0.2937127361852283, "grad_norm": 8.284412477890145, "learning_rate": 8.284671973091637e-07, "loss": 0.1962, "step": 16897 }, { "epoch": 0.29373011872273114, "grad_norm": 1.221314206987975, "learning_rate": 8.284459736596812e-07, "loss": 0.365, "step": 16898 }, { "epoch": 0.29374750126023397, "grad_norm": 1.7104162276763577, "learning_rate": 8.284247489691758e-07, "loss": 0.2879, "step": 16899 }, { "epoch": 0.2937648837977368, "grad_norm": 1.563629550803125, "learning_rate": 8.284035232377149e-07, "loss": 0.37, "step": 16900 }, { "epoch": 0.29378226633523963, "grad_norm": 1.192551603569412, "learning_rate": 8.283822964653655e-07, "loss": 0.3855, "step": 16901 }, { "epoch": 0.29379964887274246, "grad_norm": 3.930466658168036, "learning_rate": 8.283610686521952e-07, "loss": 0.4242, "step": 16902 }, { "epoch": 0.2938170314102453, "grad_norm": 1.840013503601525, "learning_rate": 8.283398397982711e-07, "loss": 0.3574, "step": 16903 }, { "epoch": 0.2938344139477481, "grad_norm": 1.753552726755324, "learning_rate": 8.283186099036606e-07, "loss": 0.3799, "step": 16904 }, { "epoch": 0.2938517964852509, "grad_norm": 2.2014198741215982, "learning_rate": 8.282973789684309e-07, "loss": 0.3219, "step": 16905 }, { "epoch": 0.2938691790227537, "grad_norm": 1.569064527218423, "learning_rate": 8.282761469926494e-07, "loss": 0.3542, "step": 16906 }, { "epoch": 0.29388656156025655, "grad_norm": 2.441119490040245, "learning_rate": 8.282549139763832e-07, "loss": 0.4273, "step": 16907 }, { "epoch": 0.2939039440977594, "grad_norm": 1.059715170372428, "learning_rate": 8.282336799196996e-07, "loss": 0.3836, "step": 16908 }, { "epoch": 0.2939213266352622, "grad_norm": 2.073894219058512, "learning_rate": 8.282124448226662e-07, "loss": 0.2594, "step": 16909 }, { "epoch": 0.29393870917276504, "grad_norm": 1.7435981272622025, "learning_rate": 8.281912086853502e-07, "loss": 0.4537, "step": 16910 }, { "epoch": 0.2939560917102679, "grad_norm": 1.2737250330798133, "learning_rate": 8.281699715078186e-07, "loss": 0.2568, "step": 16911 }, { "epoch": 0.2939734742477707, "grad_norm": 2.895050389063755, "learning_rate": 8.281487332901391e-07, "loss": 0.549, "step": 16912 }, { "epoch": 0.29399085678527354, "grad_norm": 1.3781571866030577, "learning_rate": 8.281274940323788e-07, "loss": 0.6023, "step": 16913 }, { "epoch": 0.29400823932277637, "grad_norm": 1.8665803150524125, "learning_rate": 8.281062537346051e-07, "loss": 0.5008, "step": 16914 }, { "epoch": 0.29402562186027914, "grad_norm": 3.0050470449478, "learning_rate": 8.280850123968851e-07, "loss": 0.3187, "step": 16915 }, { "epoch": 0.29404300439778197, "grad_norm": 2.7999269744751776, "learning_rate": 8.280637700192866e-07, "loss": 0.2114, "step": 16916 }, { "epoch": 0.2940603869352848, "grad_norm": 1.22419333495539, "learning_rate": 8.280425266018765e-07, "loss": 0.1924, "step": 16917 }, { "epoch": 0.29407776947278763, "grad_norm": 1.5476548081603088, "learning_rate": 8.280212821447222e-07, "loss": 0.2289, "step": 16918 }, { "epoch": 0.29409515201029046, "grad_norm": 1.9322493579814526, "learning_rate": 8.280000366478913e-07, "loss": 0.3554, "step": 16919 }, { "epoch": 0.2941125345477933, "grad_norm": 1.449397505300079, "learning_rate": 8.279787901114509e-07, "loss": 0.4489, "step": 16920 }, { "epoch": 0.2941299170852961, "grad_norm": 1.3648352248617188, "learning_rate": 8.279575425354684e-07, "loss": 0.2182, "step": 16921 }, { "epoch": 0.29414729962279895, "grad_norm": 1.5830115649819134, "learning_rate": 8.279362939200112e-07, "loss": 0.2453, "step": 16922 }, { "epoch": 0.2941646821603018, "grad_norm": 1.9375434552638284, "learning_rate": 8.279150442651464e-07, "loss": 0.287, "step": 16923 }, { "epoch": 0.2941820646978046, "grad_norm": 1.670466604990845, "learning_rate": 8.278937935709416e-07, "loss": 0.3735, "step": 16924 }, { "epoch": 0.2941994472353074, "grad_norm": 1.2289008336776985, "learning_rate": 8.278725418374641e-07, "loss": 0.3622, "step": 16925 }, { "epoch": 0.2942168297728102, "grad_norm": 2.507404311925415, "learning_rate": 8.278512890647815e-07, "loss": 0.3573, "step": 16926 }, { "epoch": 0.29423421231031305, "grad_norm": 2.107038694005412, "learning_rate": 8.278300352529606e-07, "loss": 0.4504, "step": 16927 }, { "epoch": 0.2942515948478159, "grad_norm": 1.937934197389449, "learning_rate": 8.278087804020693e-07, "loss": 0.4131, "step": 16928 }, { "epoch": 0.2942689773853187, "grad_norm": 1.5166516615455317, "learning_rate": 8.277875245121746e-07, "loss": 0.351, "step": 16929 }, { "epoch": 0.29428635992282154, "grad_norm": 2.0919626245311616, "learning_rate": 8.27766267583344e-07, "loss": 0.4441, "step": 16930 }, { "epoch": 0.29430374246032437, "grad_norm": 1.3997327938613948, "learning_rate": 8.27745009615645e-07, "loss": 0.2556, "step": 16931 }, { "epoch": 0.2943211249978272, "grad_norm": 1.698008435695997, "learning_rate": 8.277237506091448e-07, "loss": 0.3094, "step": 16932 }, { "epoch": 0.29433850753533003, "grad_norm": 1.0991386376703596, "learning_rate": 8.277024905639109e-07, "loss": 0.1966, "step": 16933 }, { "epoch": 0.29435589007283286, "grad_norm": 2.1234862642857784, "learning_rate": 8.276812294800106e-07, "loss": 0.4066, "step": 16934 }, { "epoch": 0.29437327261033563, "grad_norm": 2.3474649227201887, "learning_rate": 8.276599673575113e-07, "loss": 0.56, "step": 16935 }, { "epoch": 0.29439065514783846, "grad_norm": 1.5554366647728948, "learning_rate": 8.276387041964805e-07, "loss": 0.3829, "step": 16936 }, { "epoch": 0.2944080376853413, "grad_norm": 2.3508382334557383, "learning_rate": 8.276174399969855e-07, "loss": 0.3131, "step": 16937 }, { "epoch": 0.2944254202228441, "grad_norm": 1.820238526921001, "learning_rate": 8.275961747590937e-07, "loss": 0.3745, "step": 16938 }, { "epoch": 0.29444280276034696, "grad_norm": 1.6029063987986518, "learning_rate": 8.275749084828725e-07, "loss": 0.5656, "step": 16939 }, { "epoch": 0.2944601852978498, "grad_norm": 2.192504235886157, "learning_rate": 8.275536411683893e-07, "loss": 0.2154, "step": 16940 }, { "epoch": 0.2944775678353526, "grad_norm": 1.3970115613701175, "learning_rate": 8.275323728157116e-07, "loss": 0.2686, "step": 16941 }, { "epoch": 0.29449495037285545, "grad_norm": 2.3994119457790384, "learning_rate": 8.275111034249067e-07, "loss": 0.4277, "step": 16942 }, { "epoch": 0.2945123329103583, "grad_norm": 1.8447361937789521, "learning_rate": 8.27489832996042e-07, "loss": 0.4035, "step": 16943 }, { "epoch": 0.29452971544786105, "grad_norm": 1.8300938127733455, "learning_rate": 8.27468561529185e-07, "loss": 0.6259, "step": 16944 }, { "epoch": 0.2945470979853639, "grad_norm": 1.2447089239534983, "learning_rate": 8.27447289024403e-07, "loss": 0.2205, "step": 16945 }, { "epoch": 0.2945644805228667, "grad_norm": 1.7078769720822995, "learning_rate": 8.274260154817636e-07, "loss": 0.3589, "step": 16946 }, { "epoch": 0.29458186306036954, "grad_norm": 2.2950525781286886, "learning_rate": 8.274047409013343e-07, "loss": 0.6386, "step": 16947 }, { "epoch": 0.2945992455978724, "grad_norm": 1.9340657516589612, "learning_rate": 8.273834652831821e-07, "loss": 0.2287, "step": 16948 }, { "epoch": 0.2946166281353752, "grad_norm": 1.9768888132152336, "learning_rate": 8.273621886273747e-07, "loss": 0.4065, "step": 16949 }, { "epoch": 0.29463401067287803, "grad_norm": 1.8003695402585802, "learning_rate": 8.273409109339798e-07, "loss": 0.3118, "step": 16950 }, { "epoch": 0.29465139321038086, "grad_norm": 1.8783620108623154, "learning_rate": 8.273196322030645e-07, "loss": 0.5451, "step": 16951 }, { "epoch": 0.2946687757478837, "grad_norm": 1.465044718116341, "learning_rate": 8.272983524346961e-07, "loss": 0.3358, "step": 16952 }, { "epoch": 0.2946861582853865, "grad_norm": 2.504361137239967, "learning_rate": 8.272770716289426e-07, "loss": 0.5673, "step": 16953 }, { "epoch": 0.2947035408228893, "grad_norm": 3.162494150148147, "learning_rate": 8.272557897858707e-07, "loss": 0.386, "step": 16954 }, { "epoch": 0.29472092336039213, "grad_norm": 2.286599815082972, "learning_rate": 8.272345069055486e-07, "loss": 0.3016, "step": 16955 }, { "epoch": 0.29473830589789496, "grad_norm": 1.2151248811420836, "learning_rate": 8.272132229880433e-07, "loss": 0.2679, "step": 16956 }, { "epoch": 0.2947556884353978, "grad_norm": 1.8440226760923786, "learning_rate": 8.271919380334225e-07, "loss": 0.4643, "step": 16957 }, { "epoch": 0.2947730709729006, "grad_norm": 1.6724900435022836, "learning_rate": 8.271706520417535e-07, "loss": 0.2819, "step": 16958 }, { "epoch": 0.29479045351040345, "grad_norm": 2.189851439815259, "learning_rate": 8.271493650131038e-07, "loss": 0.3222, "step": 16959 }, { "epoch": 0.2948078360479063, "grad_norm": 2.0211548053534414, "learning_rate": 8.271280769475409e-07, "loss": 0.4671, "step": 16960 }, { "epoch": 0.2948252185854091, "grad_norm": 2.109978364737387, "learning_rate": 8.271067878451324e-07, "loss": 0.2931, "step": 16961 }, { "epoch": 0.29484260112291194, "grad_norm": 3.209256595777487, "learning_rate": 8.270854977059454e-07, "loss": 0.4074, "step": 16962 }, { "epoch": 0.29485998366041477, "grad_norm": 1.2634062360194669, "learning_rate": 8.270642065300478e-07, "loss": 0.3971, "step": 16963 }, { "epoch": 0.29487736619791755, "grad_norm": 1.9833742135937722, "learning_rate": 8.270429143175068e-07, "loss": 0.2862, "step": 16964 }, { "epoch": 0.2948947487354204, "grad_norm": 2.9964554030839254, "learning_rate": 8.2702162106839e-07, "loss": 0.2585, "step": 16965 }, { "epoch": 0.2949121312729232, "grad_norm": 1.7819083316872175, "learning_rate": 8.27000326782765e-07, "loss": 0.4501, "step": 16966 }, { "epoch": 0.29492951381042604, "grad_norm": 1.8691145798441295, "learning_rate": 8.26979031460699e-07, "loss": 0.443, "step": 16967 }, { "epoch": 0.29494689634792887, "grad_norm": 2.64671889689276, "learning_rate": 8.269577351022598e-07, "loss": 0.2442, "step": 16968 }, { "epoch": 0.2949642788854317, "grad_norm": 1.6416160939923015, "learning_rate": 8.269364377075149e-07, "loss": 0.301, "step": 16969 }, { "epoch": 0.2949816614229345, "grad_norm": 2.379413417924674, "learning_rate": 8.269151392765314e-07, "loss": 0.316, "step": 16970 }, { "epoch": 0.29499904396043736, "grad_norm": 1.375407088990122, "learning_rate": 8.268938398093772e-07, "loss": 0.5431, "step": 16971 }, { "epoch": 0.2950164264979402, "grad_norm": 3.1605547548197963, "learning_rate": 8.268725393061198e-07, "loss": 0.3283, "step": 16972 }, { "epoch": 0.295033809035443, "grad_norm": 4.230180614957315, "learning_rate": 8.268512377668265e-07, "loss": 0.3976, "step": 16973 }, { "epoch": 0.2950511915729458, "grad_norm": 1.7345024101107631, "learning_rate": 8.268299351915649e-07, "loss": 0.3192, "step": 16974 }, { "epoch": 0.2950685741104486, "grad_norm": 2.1489148664924738, "learning_rate": 8.268086315804025e-07, "loss": 0.2955, "step": 16975 }, { "epoch": 0.29508595664795145, "grad_norm": 2.000000005261736, "learning_rate": 8.26787326933407e-07, "loss": 0.2729, "step": 16976 }, { "epoch": 0.2951033391854543, "grad_norm": 1.5622064083932434, "learning_rate": 8.267660212506456e-07, "loss": 0.4223, "step": 16977 }, { "epoch": 0.2951207217229571, "grad_norm": 1.3162933613981325, "learning_rate": 8.267447145321862e-07, "loss": 0.3823, "step": 16978 }, { "epoch": 0.29513810426045994, "grad_norm": 1.4126869249218144, "learning_rate": 8.267234067780961e-07, "loss": 0.2772, "step": 16979 }, { "epoch": 0.2951554867979628, "grad_norm": 2.2883686283001485, "learning_rate": 8.267020979884427e-07, "loss": 0.3674, "step": 16980 }, { "epoch": 0.2951728693354656, "grad_norm": 1.755556603303896, "learning_rate": 8.26680788163294e-07, "loss": 0.2968, "step": 16981 }, { "epoch": 0.29519025187296843, "grad_norm": 1.50929551534526, "learning_rate": 8.266594773027171e-07, "loss": 0.3967, "step": 16982 }, { "epoch": 0.29520763441047126, "grad_norm": 2.278295386359167, "learning_rate": 8.266381654067797e-07, "loss": 0.2835, "step": 16983 }, { "epoch": 0.29522501694797404, "grad_norm": 2.1503740149971167, "learning_rate": 8.266168524755495e-07, "loss": 0.4024, "step": 16984 }, { "epoch": 0.29524239948547687, "grad_norm": 3.83030209916494, "learning_rate": 8.265955385090938e-07, "loss": 0.327, "step": 16985 }, { "epoch": 0.2952597820229797, "grad_norm": 1.6167921415887552, "learning_rate": 8.265742235074802e-07, "loss": 0.286, "step": 16986 }, { "epoch": 0.29527716456048253, "grad_norm": 2.3696066632366612, "learning_rate": 8.265529074707766e-07, "loss": 0.5088, "step": 16987 }, { "epoch": 0.29529454709798536, "grad_norm": 2.094089548773314, "learning_rate": 8.265315903990501e-07, "loss": 0.1393, "step": 16988 }, { "epoch": 0.2953119296354882, "grad_norm": 1.610023381057982, "learning_rate": 8.265102722923685e-07, "loss": 0.2201, "step": 16989 }, { "epoch": 0.295329312172991, "grad_norm": 1.6883027365908914, "learning_rate": 8.264889531507993e-07, "loss": 0.3698, "step": 16990 }, { "epoch": 0.29534669471049385, "grad_norm": 1.3014174539260714, "learning_rate": 8.2646763297441e-07, "loss": 0.2943, "step": 16991 }, { "epoch": 0.2953640772479967, "grad_norm": 2.6463758576441094, "learning_rate": 8.264463117632683e-07, "loss": 0.4409, "step": 16992 }, { "epoch": 0.2953814597854995, "grad_norm": 1.309555406177434, "learning_rate": 8.264249895174418e-07, "loss": 0.2675, "step": 16993 }, { "epoch": 0.2953988423230023, "grad_norm": 1.771180452062267, "learning_rate": 8.264036662369981e-07, "loss": 0.2276, "step": 16994 }, { "epoch": 0.2954162248605051, "grad_norm": 1.9280684261255785, "learning_rate": 8.263823419220046e-07, "loss": 0.3827, "step": 16995 }, { "epoch": 0.29543360739800795, "grad_norm": 1.5183958123532255, "learning_rate": 8.263610165725291e-07, "loss": 0.3218, "step": 16996 }, { "epoch": 0.2954509899355108, "grad_norm": 2.6436029255960847, "learning_rate": 8.26339690188639e-07, "loss": 0.3405, "step": 16997 }, { "epoch": 0.2954683724730136, "grad_norm": 2.710342705246588, "learning_rate": 8.263183627704021e-07, "loss": 0.5361, "step": 16998 }, { "epoch": 0.29548575501051644, "grad_norm": 1.6263898298126416, "learning_rate": 8.262970343178859e-07, "loss": 0.476, "step": 16999 }, { "epoch": 0.29550313754801927, "grad_norm": 2.067605377402114, "learning_rate": 8.262757048311578e-07, "loss": 0.4486, "step": 17000 }, { "epoch": 0.2955205200855221, "grad_norm": 1.8129043977701778, "learning_rate": 8.262543743102859e-07, "loss": 0.4205, "step": 17001 }, { "epoch": 0.29553790262302493, "grad_norm": 2.173720371786245, "learning_rate": 8.262330427553372e-07, "loss": 0.6123, "step": 17002 }, { "epoch": 0.29555528516052776, "grad_norm": 1.7552210733340416, "learning_rate": 8.262117101663797e-07, "loss": 0.314, "step": 17003 }, { "epoch": 0.29557266769803053, "grad_norm": 1.6168658230413016, "learning_rate": 8.26190376543481e-07, "loss": 0.2599, "step": 17004 }, { "epoch": 0.29559005023553336, "grad_norm": 1.156985798935738, "learning_rate": 8.261690418867087e-07, "loss": 0.3771, "step": 17005 }, { "epoch": 0.2956074327730362, "grad_norm": 1.5793375650468662, "learning_rate": 8.261477061961301e-07, "loss": 0.403, "step": 17006 }, { "epoch": 0.295624815310539, "grad_norm": 2.461572611121268, "learning_rate": 8.261263694718135e-07, "loss": 0.4887, "step": 17007 }, { "epoch": 0.29564219784804185, "grad_norm": 2.4486859443151774, "learning_rate": 8.261050317138259e-07, "loss": 0.3945, "step": 17008 }, { "epoch": 0.2956595803855447, "grad_norm": 2.438365013214251, "learning_rate": 8.26083692922235e-07, "loss": 0.4125, "step": 17009 }, { "epoch": 0.2956769629230475, "grad_norm": 1.130187826594589, "learning_rate": 8.260623530971087e-07, "loss": 0.2728, "step": 17010 }, { "epoch": 0.29569434546055035, "grad_norm": 1.2073518745084348, "learning_rate": 8.260410122385147e-07, "loss": 0.339, "step": 17011 }, { "epoch": 0.2957117279980532, "grad_norm": 1.8114185384587937, "learning_rate": 8.260196703465201e-07, "loss": 0.4543, "step": 17012 }, { "epoch": 0.295729110535556, "grad_norm": 1.742993111483255, "learning_rate": 8.259983274211933e-07, "loss": 0.3477, "step": 17013 }, { "epoch": 0.2957464930730588, "grad_norm": 1.3236423093503409, "learning_rate": 8.259769834626013e-07, "loss": 0.3837, "step": 17014 }, { "epoch": 0.2957638756105616, "grad_norm": 1.519169284804457, "learning_rate": 8.25955638470812e-07, "loss": 0.2164, "step": 17015 }, { "epoch": 0.29578125814806444, "grad_norm": 3.2127176031616544, "learning_rate": 8.259342924458931e-07, "loss": 0.5551, "step": 17016 }, { "epoch": 0.29579864068556727, "grad_norm": 1.501666287251266, "learning_rate": 8.259129453879122e-07, "loss": 0.2777, "step": 17017 }, { "epoch": 0.2958160232230701, "grad_norm": 1.4229789806090547, "learning_rate": 8.258915972969372e-07, "loss": 0.327, "step": 17018 }, { "epoch": 0.29583340576057293, "grad_norm": 1.8286487140166205, "learning_rate": 8.258702481730352e-07, "loss": 0.3747, "step": 17019 }, { "epoch": 0.29585078829807576, "grad_norm": 1.5991951474166173, "learning_rate": 8.258488980162743e-07, "loss": 0.4053, "step": 17020 }, { "epoch": 0.2958681708355786, "grad_norm": 0.9060124974931978, "learning_rate": 8.258275468267221e-07, "loss": 0.279, "step": 17021 }, { "epoch": 0.2958855533730814, "grad_norm": 2.791220494403194, "learning_rate": 8.258061946044463e-07, "loss": 0.3229, "step": 17022 }, { "epoch": 0.29590293591058425, "grad_norm": 1.1557758483828071, "learning_rate": 8.257848413495147e-07, "loss": 0.2162, "step": 17023 }, { "epoch": 0.295920318448087, "grad_norm": 0.9684349649570966, "learning_rate": 8.257634870619946e-07, "loss": 0.1745, "step": 17024 }, { "epoch": 0.29593770098558986, "grad_norm": 2.286022457535784, "learning_rate": 8.257421317419538e-07, "loss": 0.4252, "step": 17025 }, { "epoch": 0.2959550835230927, "grad_norm": 1.1785788518637341, "learning_rate": 8.257207753894603e-07, "loss": 0.3292, "step": 17026 }, { "epoch": 0.2959724660605955, "grad_norm": 1.429776963042106, "learning_rate": 8.256994180045813e-07, "loss": 0.3528, "step": 17027 }, { "epoch": 0.29598984859809835, "grad_norm": 2.7012875065115933, "learning_rate": 8.256780595873849e-07, "loss": 0.3451, "step": 17028 }, { "epoch": 0.2960072311356012, "grad_norm": 1.334814720285688, "learning_rate": 8.256567001379387e-07, "loss": 0.3263, "step": 17029 }, { "epoch": 0.296024613673104, "grad_norm": 1.6556123459337204, "learning_rate": 8.256353396563104e-07, "loss": 0.3137, "step": 17030 }, { "epoch": 0.29604199621060684, "grad_norm": 1.5747691590560136, "learning_rate": 8.256139781425675e-07, "loss": 0.2742, "step": 17031 }, { "epoch": 0.29605937874810967, "grad_norm": 2.001536745086766, "learning_rate": 8.255926155967781e-07, "loss": 0.3524, "step": 17032 }, { "epoch": 0.2960767612856125, "grad_norm": 1.051406595695274, "learning_rate": 8.255712520190094e-07, "loss": 0.1724, "step": 17033 }, { "epoch": 0.2960941438231153, "grad_norm": 2.0665148229863792, "learning_rate": 8.255498874093295e-07, "loss": 0.3745, "step": 17034 }, { "epoch": 0.2961115263606181, "grad_norm": 2.231295094787616, "learning_rate": 8.255285217678059e-07, "loss": 0.3284, "step": 17035 }, { "epoch": 0.29612890889812093, "grad_norm": 0.9138140982645524, "learning_rate": 8.255071550945066e-07, "loss": 0.2781, "step": 17036 }, { "epoch": 0.29614629143562377, "grad_norm": 1.8125824313609515, "learning_rate": 8.254857873894992e-07, "loss": 0.5421, "step": 17037 }, { "epoch": 0.2961636739731266, "grad_norm": 1.4602070684385875, "learning_rate": 8.254644186528511e-07, "loss": 0.3383, "step": 17038 }, { "epoch": 0.2961810565106294, "grad_norm": 1.5736665033403263, "learning_rate": 8.254430488846305e-07, "loss": 0.466, "step": 17039 }, { "epoch": 0.29619843904813226, "grad_norm": 2.5456363660523507, "learning_rate": 8.254216780849048e-07, "loss": 0.4186, "step": 17040 }, { "epoch": 0.2962158215856351, "grad_norm": 2.8138329854043627, "learning_rate": 8.25400306253742e-07, "loss": 0.3257, "step": 17041 }, { "epoch": 0.2962332041231379, "grad_norm": 1.9467735361910183, "learning_rate": 8.253789333912097e-07, "loss": 0.3883, "step": 17042 }, { "epoch": 0.29625058666064075, "grad_norm": 1.503542349911322, "learning_rate": 8.253575594973755e-07, "loss": 0.4049, "step": 17043 }, { "epoch": 0.2962679691981435, "grad_norm": 1.404629996542531, "learning_rate": 8.253361845723074e-07, "loss": 0.3436, "step": 17044 }, { "epoch": 0.29628535173564635, "grad_norm": 1.3290221831276054, "learning_rate": 8.253148086160731e-07, "loss": 0.346, "step": 17045 }, { "epoch": 0.2963027342731492, "grad_norm": 1.7521811183602196, "learning_rate": 8.252934316287401e-07, "loss": 0.3213, "step": 17046 }, { "epoch": 0.296320116810652, "grad_norm": 2.813154505156216, "learning_rate": 8.252720536103765e-07, "loss": 0.5687, "step": 17047 }, { "epoch": 0.29633749934815484, "grad_norm": 1.4666251270834922, "learning_rate": 8.252506745610499e-07, "loss": 0.2348, "step": 17048 }, { "epoch": 0.2963548818856577, "grad_norm": 1.3749233732990493, "learning_rate": 8.25229294480828e-07, "loss": 0.733, "step": 17049 }, { "epoch": 0.2963722644231605, "grad_norm": 2.129569072221918, "learning_rate": 8.252079133697787e-07, "loss": 0.3757, "step": 17050 }, { "epoch": 0.29638964696066333, "grad_norm": 2.790884879239051, "learning_rate": 8.251865312279698e-07, "loss": 0.4765, "step": 17051 }, { "epoch": 0.29640702949816616, "grad_norm": 1.4708060121284459, "learning_rate": 8.251651480554686e-07, "loss": 0.347, "step": 17052 }, { "epoch": 0.296424412035669, "grad_norm": 1.2557303157711452, "learning_rate": 8.251437638523436e-07, "loss": 0.4297, "step": 17053 }, { "epoch": 0.29644179457317177, "grad_norm": 2.556699274797837, "learning_rate": 8.251223786186621e-07, "loss": 0.3564, "step": 17054 }, { "epoch": 0.2964591771106746, "grad_norm": 1.5825916738957824, "learning_rate": 8.251009923544921e-07, "loss": 0.5515, "step": 17055 }, { "epoch": 0.29647655964817743, "grad_norm": 1.5509023714354075, "learning_rate": 8.250796050599013e-07, "loss": 0.4012, "step": 17056 }, { "epoch": 0.29649394218568026, "grad_norm": 2.445117567612291, "learning_rate": 8.250582167349574e-07, "loss": 0.3913, "step": 17057 }, { "epoch": 0.2965113247231831, "grad_norm": 1.864245871951242, "learning_rate": 8.250368273797284e-07, "loss": 0.3099, "step": 17058 }, { "epoch": 0.2965287072606859, "grad_norm": 3.4461466024648337, "learning_rate": 8.250154369942817e-07, "loss": 0.3113, "step": 17059 }, { "epoch": 0.29654608979818875, "grad_norm": 1.417380626563419, "learning_rate": 8.249940455786854e-07, "loss": 0.2884, "step": 17060 }, { "epoch": 0.2965634723356916, "grad_norm": 1.5006728616932732, "learning_rate": 8.249726531330075e-07, "loss": 0.3784, "step": 17061 }, { "epoch": 0.2965808548731944, "grad_norm": 2.749431181283856, "learning_rate": 8.249512596573155e-07, "loss": 0.3551, "step": 17062 }, { "epoch": 0.29659823741069724, "grad_norm": 1.949983332770497, "learning_rate": 8.249298651516772e-07, "loss": 0.4218, "step": 17063 }, { "epoch": 0.2966156199482, "grad_norm": 1.3387527052376078, "learning_rate": 8.249084696161605e-07, "loss": 0.1964, "step": 17064 }, { "epoch": 0.29663300248570285, "grad_norm": 1.3775172313269943, "learning_rate": 8.248870730508332e-07, "loss": 0.2954, "step": 17065 }, { "epoch": 0.2966503850232057, "grad_norm": 1.308163922250774, "learning_rate": 8.248656754557633e-07, "loss": 0.389, "step": 17066 }, { "epoch": 0.2966677675607085, "grad_norm": 2.045307788257537, "learning_rate": 8.248442768310182e-07, "loss": 0.5597, "step": 17067 }, { "epoch": 0.29668515009821134, "grad_norm": 1.246095983559002, "learning_rate": 8.248228771766661e-07, "loss": 0.3559, "step": 17068 }, { "epoch": 0.29670253263571417, "grad_norm": 1.316582378254072, "learning_rate": 8.248014764927746e-07, "loss": 0.4442, "step": 17069 }, { "epoch": 0.296719915173217, "grad_norm": 2.2459557839971382, "learning_rate": 8.247800747794117e-07, "loss": 0.4124, "step": 17070 }, { "epoch": 0.2967372977107198, "grad_norm": 1.1645759200119532, "learning_rate": 8.247586720366451e-07, "loss": 0.2727, "step": 17071 }, { "epoch": 0.29675468024822266, "grad_norm": 1.590016560111671, "learning_rate": 8.247372682645426e-07, "loss": 0.4022, "step": 17072 }, { "epoch": 0.2967720627857255, "grad_norm": 1.5971647219532417, "learning_rate": 8.247158634631723e-07, "loss": 0.3081, "step": 17073 }, { "epoch": 0.29678944532322826, "grad_norm": 2.006167630103996, "learning_rate": 8.246944576326018e-07, "loss": 0.2668, "step": 17074 }, { "epoch": 0.2968068278607311, "grad_norm": 1.6035554341379195, "learning_rate": 8.24673050772899e-07, "loss": 0.4094, "step": 17075 }, { "epoch": 0.2968242103982339, "grad_norm": 1.5331657849730909, "learning_rate": 8.246516428841319e-07, "loss": 0.2621, "step": 17076 }, { "epoch": 0.29684159293573675, "grad_norm": 1.2661561288921643, "learning_rate": 8.246302339663681e-07, "loss": 0.3408, "step": 17077 }, { "epoch": 0.2968589754732396, "grad_norm": 4.003757893369433, "learning_rate": 8.246088240196756e-07, "loss": 0.5761, "step": 17078 }, { "epoch": 0.2968763580107424, "grad_norm": 1.4205103741713694, "learning_rate": 8.245874130441223e-07, "loss": 0.2603, "step": 17079 }, { "epoch": 0.29689374054824524, "grad_norm": 1.4102929420961614, "learning_rate": 8.24566001039776e-07, "loss": 0.5902, "step": 17080 }, { "epoch": 0.2969111230857481, "grad_norm": 1.5906090845107232, "learning_rate": 8.245445880067044e-07, "loss": 0.3816, "step": 17081 }, { "epoch": 0.2969285056232509, "grad_norm": 1.5148778006077974, "learning_rate": 8.245231739449756e-07, "loss": 0.3944, "step": 17082 }, { "epoch": 0.2969458881607537, "grad_norm": 1.9299466769207054, "learning_rate": 8.245017588546576e-07, "loss": 0.3507, "step": 17083 }, { "epoch": 0.2969632706982565, "grad_norm": 2.2795906320290578, "learning_rate": 8.24480342735818e-07, "loss": 0.4175, "step": 17084 }, { "epoch": 0.29698065323575934, "grad_norm": 1.0866202764243549, "learning_rate": 8.244589255885246e-07, "loss": 0.2246, "step": 17085 }, { "epoch": 0.29699803577326217, "grad_norm": 1.6235369307393588, "learning_rate": 8.244375074128458e-07, "loss": 0.4033, "step": 17086 }, { "epoch": 0.297015418310765, "grad_norm": 1.9161275258037989, "learning_rate": 8.244160882088488e-07, "loss": 0.3755, "step": 17087 }, { "epoch": 0.29703280084826783, "grad_norm": 2.2303707274582734, "learning_rate": 8.243946679766019e-07, "loss": 0.3172, "step": 17088 }, { "epoch": 0.29705018338577066, "grad_norm": 1.2493757898914355, "learning_rate": 8.243732467161731e-07, "loss": 0.2548, "step": 17089 }, { "epoch": 0.2970675659232735, "grad_norm": 1.1793846555063134, "learning_rate": 8.243518244276299e-07, "loss": 0.2501, "step": 17090 }, { "epoch": 0.2970849484607763, "grad_norm": 2.171915066995042, "learning_rate": 8.243304011110405e-07, "loss": 0.47, "step": 17091 }, { "epoch": 0.29710233099827915, "grad_norm": 1.7818535626024028, "learning_rate": 8.243089767664727e-07, "loss": 0.3275, "step": 17092 }, { "epoch": 0.2971197135357819, "grad_norm": 3.5539317025327417, "learning_rate": 8.242875513939945e-07, "loss": 0.4179, "step": 17093 }, { "epoch": 0.29713709607328476, "grad_norm": 2.2112397802698878, "learning_rate": 8.242661249936735e-07, "loss": 0.3667, "step": 17094 }, { "epoch": 0.2971544786107876, "grad_norm": 1.2961119642838195, "learning_rate": 8.24244697565578e-07, "loss": 0.4302, "step": 17095 }, { "epoch": 0.2971718611482904, "grad_norm": 2.202787316189723, "learning_rate": 8.242232691097758e-07, "loss": 0.521, "step": 17096 }, { "epoch": 0.29718924368579325, "grad_norm": 1.2626465486322245, "learning_rate": 8.242018396263347e-07, "loss": 0.4605, "step": 17097 }, { "epoch": 0.2972066262232961, "grad_norm": 1.5602085067889608, "learning_rate": 8.241804091153226e-07, "loss": 0.4329, "step": 17098 }, { "epoch": 0.2972240087607989, "grad_norm": 1.697625671062639, "learning_rate": 8.241589775768078e-07, "loss": 0.4939, "step": 17099 }, { "epoch": 0.29724139129830174, "grad_norm": 1.1920340619776348, "learning_rate": 8.241375450108576e-07, "loss": 0.2354, "step": 17100 }, { "epoch": 0.29725877383580457, "grad_norm": 1.8697639175973986, "learning_rate": 8.241161114175405e-07, "loss": 0.4264, "step": 17101 }, { "epoch": 0.2972761563733074, "grad_norm": 2.34070803037989, "learning_rate": 8.240946767969242e-07, "loss": 0.2878, "step": 17102 }, { "epoch": 0.2972935389108102, "grad_norm": 1.6322638591164527, "learning_rate": 8.240732411490767e-07, "loss": 0.3877, "step": 17103 }, { "epoch": 0.297310921448313, "grad_norm": 2.3936263074425486, "learning_rate": 8.240518044740658e-07, "loss": 0.3538, "step": 17104 }, { "epoch": 0.29732830398581583, "grad_norm": 2.6389208672934044, "learning_rate": 8.240303667719595e-07, "loss": 0.4222, "step": 17105 }, { "epoch": 0.29734568652331866, "grad_norm": 1.1731975461416124, "learning_rate": 8.240089280428258e-07, "loss": 0.4229, "step": 17106 }, { "epoch": 0.2973630690608215, "grad_norm": 1.385062018314087, "learning_rate": 8.239874882867327e-07, "loss": 0.1747, "step": 17107 }, { "epoch": 0.2973804515983243, "grad_norm": 2.5085650970452327, "learning_rate": 8.239660475037481e-07, "loss": 0.3398, "step": 17108 }, { "epoch": 0.29739783413582715, "grad_norm": 2.072163387427937, "learning_rate": 8.239446056939399e-07, "loss": 0.3197, "step": 17109 }, { "epoch": 0.29741521667333, "grad_norm": 2.262881941070196, "learning_rate": 8.239231628573761e-07, "loss": 0.291, "step": 17110 }, { "epoch": 0.2974325992108328, "grad_norm": 2.140018358652208, "learning_rate": 8.239017189941246e-07, "loss": 0.4341, "step": 17111 }, { "epoch": 0.29744998174833565, "grad_norm": 1.1619758451365179, "learning_rate": 8.238802741042536e-07, "loss": 0.4758, "step": 17112 }, { "epoch": 0.2974673642858384, "grad_norm": 1.6059658174363256, "learning_rate": 8.238588281878308e-07, "loss": 0.2273, "step": 17113 }, { "epoch": 0.29748474682334125, "grad_norm": 2.233675132659116, "learning_rate": 8.238373812449243e-07, "loss": 0.2587, "step": 17114 }, { "epoch": 0.2975021293608441, "grad_norm": 0.9208715187852576, "learning_rate": 8.238159332756021e-07, "loss": 0.2853, "step": 17115 }, { "epoch": 0.2975195118983469, "grad_norm": 1.712227702079774, "learning_rate": 8.237944842799322e-07, "loss": 0.3359, "step": 17116 }, { "epoch": 0.29753689443584974, "grad_norm": 1.2089850321093063, "learning_rate": 8.237730342579824e-07, "loss": 0.32, "step": 17117 }, { "epoch": 0.29755427697335257, "grad_norm": 1.0917344016408945, "learning_rate": 8.237515832098209e-07, "loss": 0.3473, "step": 17118 }, { "epoch": 0.2975716595108554, "grad_norm": 1.376190475577975, "learning_rate": 8.237301311355156e-07, "loss": 0.3016, "step": 17119 }, { "epoch": 0.29758904204835823, "grad_norm": 1.2461035794985018, "learning_rate": 8.237086780351344e-07, "loss": 0.377, "step": 17120 }, { "epoch": 0.29760642458586106, "grad_norm": 1.3755383538521049, "learning_rate": 8.236872239087454e-07, "loss": 0.2932, "step": 17121 }, { "epoch": 0.2976238071233639, "grad_norm": 1.2880438195796928, "learning_rate": 8.236657687564166e-07, "loss": 0.376, "step": 17122 }, { "epoch": 0.29764118966086667, "grad_norm": 1.8230566597351872, "learning_rate": 8.23644312578216e-07, "loss": 0.3084, "step": 17123 }, { "epoch": 0.2976585721983695, "grad_norm": 1.1457221221136447, "learning_rate": 8.236228553742117e-07, "loss": 0.4801, "step": 17124 }, { "epoch": 0.29767595473587233, "grad_norm": 1.6801859016287743, "learning_rate": 8.236013971444716e-07, "loss": 0.2947, "step": 17125 }, { "epoch": 0.29769333727337516, "grad_norm": 1.814979605164024, "learning_rate": 8.235799378890636e-07, "loss": 0.2436, "step": 17126 }, { "epoch": 0.297710719810878, "grad_norm": 2.6444877106843423, "learning_rate": 8.235584776080559e-07, "loss": 0.5798, "step": 17127 }, { "epoch": 0.2977281023483808, "grad_norm": 1.7472693096703409, "learning_rate": 8.235370163015164e-07, "loss": 0.4081, "step": 17128 }, { "epoch": 0.29774548488588365, "grad_norm": 1.118950900147506, "learning_rate": 8.235155539695132e-07, "loss": 0.2809, "step": 17129 }, { "epoch": 0.2977628674233865, "grad_norm": 1.682379499528744, "learning_rate": 8.234940906121144e-07, "loss": 0.4234, "step": 17130 }, { "epoch": 0.2977802499608893, "grad_norm": 2.0327188741927595, "learning_rate": 8.234726262293878e-07, "loss": 0.5111, "step": 17131 }, { "epoch": 0.29779763249839214, "grad_norm": 2.3031112603985506, "learning_rate": 8.234511608214016e-07, "loss": 0.6943, "step": 17132 }, { "epoch": 0.2978150150358949, "grad_norm": 1.366478482645011, "learning_rate": 8.23429694388224e-07, "loss": 0.2767, "step": 17133 }, { "epoch": 0.29783239757339774, "grad_norm": 2.47904386900521, "learning_rate": 8.234082269299225e-07, "loss": 0.3304, "step": 17134 }, { "epoch": 0.2978497801109006, "grad_norm": 1.6102405425032174, "learning_rate": 8.233867584465657e-07, "loss": 0.3588, "step": 17135 }, { "epoch": 0.2978671626484034, "grad_norm": 2.9170185101958483, "learning_rate": 8.233652889382213e-07, "loss": 0.4336, "step": 17136 }, { "epoch": 0.29788454518590624, "grad_norm": 1.994376913412094, "learning_rate": 8.233438184049577e-07, "loss": 0.2958, "step": 17137 }, { "epoch": 0.29790192772340907, "grad_norm": 1.49828343988122, "learning_rate": 8.233223468468424e-07, "loss": 0.3005, "step": 17138 }, { "epoch": 0.2979193102609119, "grad_norm": 1.7989669963328625, "learning_rate": 8.233008742639441e-07, "loss": 0.2944, "step": 17139 }, { "epoch": 0.2979366927984147, "grad_norm": 2.1433080317400726, "learning_rate": 8.232794006563304e-07, "loss": 0.5986, "step": 17140 }, { "epoch": 0.29795407533591756, "grad_norm": 1.5482847735516228, "learning_rate": 8.232579260240695e-07, "loss": 0.3331, "step": 17141 }, { "epoch": 0.2979714578734204, "grad_norm": 1.3728458007005064, "learning_rate": 8.232364503672294e-07, "loss": 0.472, "step": 17142 }, { "epoch": 0.29798884041092316, "grad_norm": 1.9671304648100396, "learning_rate": 8.232149736858785e-07, "loss": 0.4185, "step": 17143 }, { "epoch": 0.298006222948426, "grad_norm": 1.551084799194148, "learning_rate": 8.231934959800844e-07, "loss": 0.4142, "step": 17144 }, { "epoch": 0.2980236054859288, "grad_norm": 0.8670180611304537, "learning_rate": 8.231720172499152e-07, "loss": 0.2079, "step": 17145 }, { "epoch": 0.29804098802343165, "grad_norm": 2.311369796311749, "learning_rate": 8.231505374954395e-07, "loss": 0.5329, "step": 17146 }, { "epoch": 0.2980583705609345, "grad_norm": 2.065096075440962, "learning_rate": 8.231290567167248e-07, "loss": 0.3674, "step": 17147 }, { "epoch": 0.2980757530984373, "grad_norm": 2.295852514622249, "learning_rate": 8.231075749138396e-07, "loss": 0.2189, "step": 17148 }, { "epoch": 0.29809313563594014, "grad_norm": 2.0577460831305827, "learning_rate": 8.230860920868516e-07, "loss": 0.2616, "step": 17149 }, { "epoch": 0.298110518173443, "grad_norm": 1.4737618736723401, "learning_rate": 8.230646082358294e-07, "loss": 0.3766, "step": 17150 }, { "epoch": 0.2981279007109458, "grad_norm": 3.634498463229105, "learning_rate": 8.230431233608406e-07, "loss": 0.3559, "step": 17151 }, { "epoch": 0.29814528324844863, "grad_norm": 1.195927540685713, "learning_rate": 8.230216374619535e-07, "loss": 0.1391, "step": 17152 }, { "epoch": 0.2981626657859514, "grad_norm": 1.4733856896583093, "learning_rate": 8.230001505392362e-07, "loss": 0.3917, "step": 17153 }, { "epoch": 0.29818004832345424, "grad_norm": 1.4197194167346723, "learning_rate": 8.229786625927568e-07, "loss": 0.5468, "step": 17154 }, { "epoch": 0.29819743086095707, "grad_norm": 2.349262383482504, "learning_rate": 8.229571736225834e-07, "loss": 0.3305, "step": 17155 }, { "epoch": 0.2982148133984599, "grad_norm": 1.3864402854627742, "learning_rate": 8.229356836287842e-07, "loss": 0.4367, "step": 17156 }, { "epoch": 0.29823219593596273, "grad_norm": 1.287033259351527, "learning_rate": 8.229141926114271e-07, "loss": 0.3658, "step": 17157 }, { "epoch": 0.29824957847346556, "grad_norm": 1.238248458553577, "learning_rate": 8.228927005705803e-07, "loss": 0.2946, "step": 17158 }, { "epoch": 0.2982669610109684, "grad_norm": 1.5995370584556194, "learning_rate": 8.228712075063122e-07, "loss": 0.2738, "step": 17159 }, { "epoch": 0.2982843435484712, "grad_norm": 1.6214017126330584, "learning_rate": 8.228497134186905e-07, "loss": 0.2475, "step": 17160 }, { "epoch": 0.29830172608597405, "grad_norm": 2.2570168390069134, "learning_rate": 8.228282183077835e-07, "loss": 0.3975, "step": 17161 }, { "epoch": 0.2983191086234769, "grad_norm": 1.426477078530641, "learning_rate": 8.228067221736595e-07, "loss": 0.192, "step": 17162 }, { "epoch": 0.29833649116097966, "grad_norm": 1.354395340595838, "learning_rate": 8.227852250163863e-07, "loss": 0.3989, "step": 17163 }, { "epoch": 0.2983538736984825, "grad_norm": 1.6115039958243136, "learning_rate": 8.227637268360323e-07, "loss": 0.273, "step": 17164 }, { "epoch": 0.2983712562359853, "grad_norm": 4.6788153425526655, "learning_rate": 8.227422276326657e-07, "loss": 0.3873, "step": 17165 }, { "epoch": 0.29838863877348815, "grad_norm": 1.8179522922219435, "learning_rate": 8.227207274063542e-07, "loss": 0.3457, "step": 17166 }, { "epoch": 0.298406021310991, "grad_norm": 2.179875039334153, "learning_rate": 8.226992261571664e-07, "loss": 0.3031, "step": 17167 }, { "epoch": 0.2984234038484938, "grad_norm": 1.992428217408846, "learning_rate": 8.226777238851703e-07, "loss": 0.3251, "step": 17168 }, { "epoch": 0.29844078638599664, "grad_norm": 2.679821797035134, "learning_rate": 8.226562205904339e-07, "loss": 0.4742, "step": 17169 }, { "epoch": 0.29845816892349947, "grad_norm": 1.5544226139546717, "learning_rate": 8.226347162730256e-07, "loss": 0.4411, "step": 17170 }, { "epoch": 0.2984755514610023, "grad_norm": 1.6676088343940876, "learning_rate": 8.226132109330136e-07, "loss": 0.4463, "step": 17171 }, { "epoch": 0.29849293399850513, "grad_norm": 3.4966723035502185, "learning_rate": 8.225917045704658e-07, "loss": 0.5585, "step": 17172 }, { "epoch": 0.2985103165360079, "grad_norm": 2.0585094237768713, "learning_rate": 8.225701971854504e-07, "loss": 0.5216, "step": 17173 }, { "epoch": 0.29852769907351073, "grad_norm": 1.8045010209132328, "learning_rate": 8.225486887780357e-07, "loss": 0.3524, "step": 17174 }, { "epoch": 0.29854508161101356, "grad_norm": 1.8057962624466686, "learning_rate": 8.2252717934829e-07, "loss": 0.5769, "step": 17175 }, { "epoch": 0.2985624641485164, "grad_norm": 1.8663300468623427, "learning_rate": 8.22505668896281e-07, "loss": 0.457, "step": 17176 }, { "epoch": 0.2985798466860192, "grad_norm": 1.5475710319958156, "learning_rate": 8.224841574220773e-07, "loss": 0.3836, "step": 17177 }, { "epoch": 0.29859722922352205, "grad_norm": 1.153580408216792, "learning_rate": 8.224626449257471e-07, "loss": 0.3264, "step": 17178 }, { "epoch": 0.2986146117610249, "grad_norm": 2.4455652402602195, "learning_rate": 8.224411314073582e-07, "loss": 0.5292, "step": 17179 }, { "epoch": 0.2986319942985277, "grad_norm": 2.3499421725351204, "learning_rate": 8.224196168669792e-07, "loss": 0.645, "step": 17180 }, { "epoch": 0.29864937683603054, "grad_norm": 1.3757277371226382, "learning_rate": 8.223981013046781e-07, "loss": 0.1686, "step": 17181 }, { "epoch": 0.2986667593735334, "grad_norm": 1.6301321124266175, "learning_rate": 8.223765847205231e-07, "loss": 0.3881, "step": 17182 }, { "epoch": 0.29868414191103615, "grad_norm": 2.0426467764840366, "learning_rate": 8.223550671145823e-07, "loss": 0.1993, "step": 17183 }, { "epoch": 0.298701524448539, "grad_norm": 1.3858343666386463, "learning_rate": 8.223335484869242e-07, "loss": 0.3337, "step": 17184 }, { "epoch": 0.2987189069860418, "grad_norm": 1.3615596808227048, "learning_rate": 8.223120288376166e-07, "loss": 0.5667, "step": 17185 }, { "epoch": 0.29873628952354464, "grad_norm": 1.6302808259806425, "learning_rate": 8.222905081667282e-07, "loss": 0.4508, "step": 17186 }, { "epoch": 0.29875367206104747, "grad_norm": 1.8219840601600916, "learning_rate": 8.222689864743269e-07, "loss": 0.275, "step": 17187 }, { "epoch": 0.2987710545985503, "grad_norm": 1.9524451723211562, "learning_rate": 8.222474637604807e-07, "loss": 0.4176, "step": 17188 }, { "epoch": 0.29878843713605313, "grad_norm": 1.7264533590105438, "learning_rate": 8.222259400252582e-07, "loss": 0.3714, "step": 17189 }, { "epoch": 0.29880581967355596, "grad_norm": 1.7521154212944112, "learning_rate": 8.222044152687275e-07, "loss": 0.4621, "step": 17190 }, { "epoch": 0.2988232022110588, "grad_norm": 3.0283749374131395, "learning_rate": 8.221828894909568e-07, "loss": 0.4233, "step": 17191 }, { "epoch": 0.2988405847485616, "grad_norm": 2.0888516966788684, "learning_rate": 8.221613626920143e-07, "loss": 0.3566, "step": 17192 }, { "epoch": 0.2988579672860644, "grad_norm": 1.9468752996219558, "learning_rate": 8.221398348719683e-07, "loss": 0.3033, "step": 17193 }, { "epoch": 0.2988753498235672, "grad_norm": 2.707052816809514, "learning_rate": 8.221183060308872e-07, "loss": 0.5953, "step": 17194 }, { "epoch": 0.29889273236107006, "grad_norm": 1.4759117942437525, "learning_rate": 8.220967761688387e-07, "loss": 0.2446, "step": 17195 }, { "epoch": 0.2989101148985729, "grad_norm": 1.523276332845191, "learning_rate": 8.220752452858916e-07, "loss": 0.2284, "step": 17196 }, { "epoch": 0.2989274974360757, "grad_norm": 1.8286765318348461, "learning_rate": 8.22053713382114e-07, "loss": 0.2865, "step": 17197 }, { "epoch": 0.29894487997357855, "grad_norm": 1.926573372257284, "learning_rate": 8.220321804575738e-07, "loss": 0.8534, "step": 17198 }, { "epoch": 0.2989622625110814, "grad_norm": 1.3830877052952932, "learning_rate": 8.220106465123396e-07, "loss": 0.3043, "step": 17199 }, { "epoch": 0.2989796450485842, "grad_norm": 1.897907556598351, "learning_rate": 8.219891115464796e-07, "loss": 0.5617, "step": 17200 }, { "epoch": 0.29899702758608704, "grad_norm": 1.1610436840547296, "learning_rate": 8.219675755600621e-07, "loss": 0.4533, "step": 17201 }, { "epoch": 0.29901441012358987, "grad_norm": 1.9139172355723333, "learning_rate": 8.219460385531552e-07, "loss": 0.4283, "step": 17202 }, { "epoch": 0.29903179266109264, "grad_norm": 1.6641364545971393, "learning_rate": 8.219245005258274e-07, "loss": 0.3881, "step": 17203 }, { "epoch": 0.2990491751985955, "grad_norm": 2.9297535956623, "learning_rate": 8.219029614781467e-07, "loss": 0.3229, "step": 17204 }, { "epoch": 0.2990665577360983, "grad_norm": 1.8581660458633182, "learning_rate": 8.218814214101815e-07, "loss": 0.3686, "step": 17205 }, { "epoch": 0.29908394027360113, "grad_norm": 1.735732878330304, "learning_rate": 8.218598803220001e-07, "loss": 0.2752, "step": 17206 }, { "epoch": 0.29910132281110396, "grad_norm": 2.0538330711732926, "learning_rate": 8.218383382136706e-07, "loss": 0.3167, "step": 17207 }, { "epoch": 0.2991187053486068, "grad_norm": 2.016453257914618, "learning_rate": 8.218167950852613e-07, "loss": 0.4354, "step": 17208 }, { "epoch": 0.2991360878861096, "grad_norm": 2.427669349433351, "learning_rate": 8.217952509368409e-07, "loss": 0.4499, "step": 17209 }, { "epoch": 0.29915347042361246, "grad_norm": 1.3532826104152953, "learning_rate": 8.217737057684772e-07, "loss": 0.4734, "step": 17210 }, { "epoch": 0.2991708529611153, "grad_norm": 1.5793073767165247, "learning_rate": 8.217521595802386e-07, "loss": 0.349, "step": 17211 }, { "epoch": 0.2991882354986181, "grad_norm": 2.112314345158656, "learning_rate": 8.217306123721937e-07, "loss": 0.2574, "step": 17212 }, { "epoch": 0.2992056180361209, "grad_norm": 1.387772489965661, "learning_rate": 8.217090641444104e-07, "loss": 0.2102, "step": 17213 }, { "epoch": 0.2992230005736237, "grad_norm": 3.630635747458699, "learning_rate": 8.216875148969571e-07, "loss": 0.4187, "step": 17214 }, { "epoch": 0.29924038311112655, "grad_norm": 1.537224584117345, "learning_rate": 8.216659646299022e-07, "loss": 0.381, "step": 17215 }, { "epoch": 0.2992577656486294, "grad_norm": 1.8474733692677459, "learning_rate": 8.216444133433141e-07, "loss": 0.3647, "step": 17216 }, { "epoch": 0.2992751481861322, "grad_norm": 1.6738231419508542, "learning_rate": 8.216228610372607e-07, "loss": 0.456, "step": 17217 }, { "epoch": 0.29929253072363504, "grad_norm": 1.7712285624151254, "learning_rate": 8.216013077118107e-07, "loss": 0.3164, "step": 17218 }, { "epoch": 0.29930991326113787, "grad_norm": 1.5912937339389095, "learning_rate": 8.215797533670322e-07, "loss": 0.4478, "step": 17219 }, { "epoch": 0.2993272957986407, "grad_norm": 3.1452476691033144, "learning_rate": 8.215581980029937e-07, "loss": 0.4554, "step": 17220 }, { "epoch": 0.29934467833614353, "grad_norm": 2.166057307056998, "learning_rate": 8.215366416197633e-07, "loss": 0.6218, "step": 17221 }, { "epoch": 0.2993620608736463, "grad_norm": 1.8671074282631406, "learning_rate": 8.215150842174097e-07, "loss": 0.4412, "step": 17222 }, { "epoch": 0.29937944341114914, "grad_norm": 1.345214658277165, "learning_rate": 8.214935257960007e-07, "loss": 0.4813, "step": 17223 }, { "epoch": 0.29939682594865197, "grad_norm": 1.7169664574977606, "learning_rate": 8.214719663556051e-07, "loss": 0.5759, "step": 17224 }, { "epoch": 0.2994142084861548, "grad_norm": 1.1912756389197645, "learning_rate": 8.21450405896291e-07, "loss": 0.2028, "step": 17225 }, { "epoch": 0.29943159102365763, "grad_norm": 1.914458705127286, "learning_rate": 8.214288444181268e-07, "loss": 0.4103, "step": 17226 }, { "epoch": 0.29944897356116046, "grad_norm": 2.952788406641129, "learning_rate": 8.214072819211805e-07, "loss": 0.3723, "step": 17227 }, { "epoch": 0.2994663560986633, "grad_norm": 1.5026789365033255, "learning_rate": 8.213857184055213e-07, "loss": 0.4148, "step": 17228 }, { "epoch": 0.2994837386361661, "grad_norm": 2.2297378085429735, "learning_rate": 8.213641538712166e-07, "loss": 0.2785, "step": 17229 }, { "epoch": 0.29950112117366895, "grad_norm": 2.882834179016329, "learning_rate": 8.213425883183353e-07, "loss": 0.2196, "step": 17230 }, { "epoch": 0.2995185037111718, "grad_norm": 1.841912921576882, "learning_rate": 8.213210217469456e-07, "loss": 0.347, "step": 17231 }, { "epoch": 0.29953588624867455, "grad_norm": 2.158589100036687, "learning_rate": 8.212994541571158e-07, "loss": 0.5893, "step": 17232 }, { "epoch": 0.2995532687861774, "grad_norm": 2.7220636941089564, "learning_rate": 8.212778855489144e-07, "loss": 0.3208, "step": 17233 }, { "epoch": 0.2995706513236802, "grad_norm": 1.8911366879123026, "learning_rate": 8.212563159224097e-07, "loss": 0.2977, "step": 17234 }, { "epoch": 0.29958803386118305, "grad_norm": 4.563412486160317, "learning_rate": 8.2123474527767e-07, "loss": 0.3143, "step": 17235 }, { "epoch": 0.2996054163986859, "grad_norm": 1.9852208540835579, "learning_rate": 8.212131736147636e-07, "loss": 0.3055, "step": 17236 }, { "epoch": 0.2996227989361887, "grad_norm": 1.6038582080289006, "learning_rate": 8.211916009337592e-07, "loss": 0.2859, "step": 17237 }, { "epoch": 0.29964018147369154, "grad_norm": 1.2434563651568973, "learning_rate": 8.21170027234725e-07, "loss": 0.2999, "step": 17238 }, { "epoch": 0.29965756401119437, "grad_norm": 2.9732068398963447, "learning_rate": 8.211484525177292e-07, "loss": 0.4141, "step": 17239 }, { "epoch": 0.2996749465486972, "grad_norm": 1.0457403086001, "learning_rate": 8.211268767828403e-07, "loss": 0.3368, "step": 17240 }, { "epoch": 0.2996923290862, "grad_norm": 1.6721279626410872, "learning_rate": 8.211053000301268e-07, "loss": 0.4132, "step": 17241 }, { "epoch": 0.2997097116237028, "grad_norm": 1.5105952980199178, "learning_rate": 8.21083722259657e-07, "loss": 0.3199, "step": 17242 }, { "epoch": 0.29972709416120563, "grad_norm": 1.0265366005202767, "learning_rate": 8.210621434714993e-07, "loss": 0.3825, "step": 17243 }, { "epoch": 0.29974447669870846, "grad_norm": 1.0057264760297266, "learning_rate": 8.210405636657221e-07, "loss": 0.1967, "step": 17244 }, { "epoch": 0.2997618592362113, "grad_norm": 2.4642406369633667, "learning_rate": 8.210189828423936e-07, "loss": 0.3287, "step": 17245 }, { "epoch": 0.2997792417737141, "grad_norm": 2.8728945507263006, "learning_rate": 8.209974010015825e-07, "loss": 0.6027, "step": 17246 }, { "epoch": 0.29979662431121695, "grad_norm": 14.99939553220502, "learning_rate": 8.209758181433573e-07, "loss": 0.8174, "step": 17247 }, { "epoch": 0.2998140068487198, "grad_norm": 1.678145309740121, "learning_rate": 8.20954234267786e-07, "loss": 0.24, "step": 17248 }, { "epoch": 0.2998313893862226, "grad_norm": 2.4978787846206547, "learning_rate": 8.209326493749372e-07, "loss": 0.4298, "step": 17249 }, { "epoch": 0.29984877192372544, "grad_norm": 2.429657315631291, "learning_rate": 8.209110634648795e-07, "loss": 0.3598, "step": 17250 }, { "epoch": 0.2998661544612283, "grad_norm": 1.8821116857589986, "learning_rate": 8.208894765376809e-07, "loss": 0.2534, "step": 17251 }, { "epoch": 0.29988353699873105, "grad_norm": 1.9925272233915936, "learning_rate": 8.208678885934101e-07, "loss": 0.4983, "step": 17252 }, { "epoch": 0.2999009195362339, "grad_norm": 1.289799365248304, "learning_rate": 8.208462996321357e-07, "loss": 0.2327, "step": 17253 }, { "epoch": 0.2999183020737367, "grad_norm": 1.5260965280307874, "learning_rate": 8.208247096539258e-07, "loss": 0.2387, "step": 17254 }, { "epoch": 0.29993568461123954, "grad_norm": 1.160875937029117, "learning_rate": 8.208031186588489e-07, "loss": 0.2115, "step": 17255 }, { "epoch": 0.29995306714874237, "grad_norm": 1.051579484462361, "learning_rate": 8.207815266469736e-07, "loss": 0.1831, "step": 17256 }, { "epoch": 0.2999704496862452, "grad_norm": 1.8014336051151552, "learning_rate": 8.20759933618368e-07, "loss": 0.4296, "step": 17257 }, { "epoch": 0.29998783222374803, "grad_norm": 1.280792537304688, "learning_rate": 8.207383395731009e-07, "loss": 0.3454, "step": 17258 }, { "epoch": 0.30000521476125086, "grad_norm": 4.632615085575519, "learning_rate": 8.207167445112405e-07, "loss": 0.6597, "step": 17259 }, { "epoch": 0.3000225972987537, "grad_norm": 1.4960642102353952, "learning_rate": 8.206951484328554e-07, "loss": 0.328, "step": 17260 }, { "epoch": 0.3000399798362565, "grad_norm": 1.072364258090667, "learning_rate": 8.206735513380141e-07, "loss": 0.3423, "step": 17261 }, { "epoch": 0.3000573623737593, "grad_norm": 4.852680337479431, "learning_rate": 8.206519532267848e-07, "loss": 0.4052, "step": 17262 }, { "epoch": 0.3000747449112621, "grad_norm": 1.6511736049047328, "learning_rate": 8.206303540992363e-07, "loss": 0.3804, "step": 17263 }, { "epoch": 0.30009212744876496, "grad_norm": 1.5756353738245537, "learning_rate": 8.206087539554367e-07, "loss": 0.4222, "step": 17264 }, { "epoch": 0.3001095099862678, "grad_norm": 1.960419065173843, "learning_rate": 8.205871527954547e-07, "loss": 0.4295, "step": 17265 }, { "epoch": 0.3001268925237706, "grad_norm": 1.8200333060227551, "learning_rate": 8.205655506193586e-07, "loss": 0.4005, "step": 17266 }, { "epoch": 0.30014427506127345, "grad_norm": 1.9344456160864458, "learning_rate": 8.20543947427217e-07, "loss": 0.3037, "step": 17267 }, { "epoch": 0.3001616575987763, "grad_norm": 1.3449826886565364, "learning_rate": 8.205223432190983e-07, "loss": 0.3332, "step": 17268 }, { "epoch": 0.3001790401362791, "grad_norm": 1.8635462102294391, "learning_rate": 8.205007379950712e-07, "loss": 0.4898, "step": 17269 }, { "epoch": 0.30019642267378194, "grad_norm": 1.6528052491940333, "learning_rate": 8.204791317552038e-07, "loss": 0.4544, "step": 17270 }, { "epoch": 0.30021380521128477, "grad_norm": 2.2970615432511963, "learning_rate": 8.204575244995648e-07, "loss": 0.3324, "step": 17271 }, { "epoch": 0.30023118774878754, "grad_norm": 1.5316745900209867, "learning_rate": 8.204359162282226e-07, "loss": 0.3469, "step": 17272 }, { "epoch": 0.3002485702862904, "grad_norm": 1.7123521231059537, "learning_rate": 8.204143069412458e-07, "loss": 0.3929, "step": 17273 }, { "epoch": 0.3002659528237932, "grad_norm": 1.4161316816492293, "learning_rate": 8.203926966387029e-07, "loss": 0.2394, "step": 17274 }, { "epoch": 0.30028333536129603, "grad_norm": 1.8153236746164665, "learning_rate": 8.203710853206622e-07, "loss": 0.423, "step": 17275 }, { "epoch": 0.30030071789879886, "grad_norm": 1.906321241651612, "learning_rate": 8.203494729871923e-07, "loss": 0.2781, "step": 17276 }, { "epoch": 0.3003181004363017, "grad_norm": 1.1833018425009392, "learning_rate": 8.203278596383618e-07, "loss": 0.2763, "step": 17277 }, { "epoch": 0.3003354829738045, "grad_norm": 1.8757102234511869, "learning_rate": 8.203062452742392e-07, "loss": 0.4489, "step": 17278 }, { "epoch": 0.30035286551130735, "grad_norm": 1.1556645763212068, "learning_rate": 8.202846298948929e-07, "loss": 0.2301, "step": 17279 }, { "epoch": 0.3003702480488102, "grad_norm": 2.919584107437806, "learning_rate": 8.202630135003914e-07, "loss": 0.4513, "step": 17280 }, { "epoch": 0.300387630586313, "grad_norm": 2.4783564547263794, "learning_rate": 8.202413960908033e-07, "loss": 0.3757, "step": 17281 }, { "epoch": 0.3004050131238158, "grad_norm": 1.412310595875409, "learning_rate": 8.202197776661971e-07, "loss": 0.3443, "step": 17282 }, { "epoch": 0.3004223956613186, "grad_norm": 2.560512942467743, "learning_rate": 8.201981582266413e-07, "loss": 0.3679, "step": 17283 }, { "epoch": 0.30043977819882145, "grad_norm": 2.19577666225708, "learning_rate": 8.201765377722043e-07, "loss": 0.3038, "step": 17284 }, { "epoch": 0.3004571607363243, "grad_norm": 1.671972625018858, "learning_rate": 8.201549163029549e-07, "loss": 0.3561, "step": 17285 }, { "epoch": 0.3004745432738271, "grad_norm": 2.092153264221219, "learning_rate": 8.201332938189616e-07, "loss": 0.3577, "step": 17286 }, { "epoch": 0.30049192581132994, "grad_norm": 2.2498200961854122, "learning_rate": 8.201116703202926e-07, "loss": 0.5774, "step": 17287 }, { "epoch": 0.30050930834883277, "grad_norm": 3.058910647803683, "learning_rate": 8.200900458070167e-07, "loss": 0.4098, "step": 17288 }, { "epoch": 0.3005266908863356, "grad_norm": 2.4334977711441, "learning_rate": 8.200684202792023e-07, "loss": 0.2549, "step": 17289 }, { "epoch": 0.30054407342383843, "grad_norm": 1.4538018003485385, "learning_rate": 8.200467937369182e-07, "loss": 0.443, "step": 17290 }, { "epoch": 0.30056145596134126, "grad_norm": 2.261715031850589, "learning_rate": 8.200251661802327e-07, "loss": 0.32, "step": 17291 }, { "epoch": 0.30057883849884404, "grad_norm": 1.6306667081025432, "learning_rate": 8.200035376092145e-07, "loss": 0.3045, "step": 17292 }, { "epoch": 0.30059622103634687, "grad_norm": 2.058181294797858, "learning_rate": 8.199819080239321e-07, "loss": 0.4724, "step": 17293 }, { "epoch": 0.3006136035738497, "grad_norm": 1.6581417259531217, "learning_rate": 8.199602774244538e-07, "loss": 0.3048, "step": 17294 }, { "epoch": 0.3006309861113525, "grad_norm": 1.3640531577023591, "learning_rate": 8.199386458108486e-07, "loss": 0.4926, "step": 17295 }, { "epoch": 0.30064836864885536, "grad_norm": 2.459185141046664, "learning_rate": 8.199170131831849e-07, "loss": 0.6334, "step": 17296 }, { "epoch": 0.3006657511863582, "grad_norm": 1.6006389870082076, "learning_rate": 8.198953795415311e-07, "loss": 0.5293, "step": 17297 }, { "epoch": 0.300683133723861, "grad_norm": 2.0366956542391756, "learning_rate": 8.19873744885956e-07, "loss": 0.4175, "step": 17298 }, { "epoch": 0.30070051626136385, "grad_norm": 2.690797144173001, "learning_rate": 8.19852109216528e-07, "loss": 0.462, "step": 17299 }, { "epoch": 0.3007178987988667, "grad_norm": 1.6879889123550653, "learning_rate": 8.198304725333158e-07, "loss": 0.2839, "step": 17300 }, { "epoch": 0.3007352813363695, "grad_norm": 1.335140716291269, "learning_rate": 8.198088348363879e-07, "loss": 0.5259, "step": 17301 }, { "epoch": 0.3007526638738723, "grad_norm": 2.60301628607069, "learning_rate": 8.197871961258128e-07, "loss": 0.2369, "step": 17302 }, { "epoch": 0.3007700464113751, "grad_norm": 2.6213459658581133, "learning_rate": 8.197655564016593e-07, "loss": 0.3138, "step": 17303 }, { "epoch": 0.30078742894887794, "grad_norm": 1.4547898288568222, "learning_rate": 8.197439156639958e-07, "loss": 0.1799, "step": 17304 }, { "epoch": 0.3008048114863808, "grad_norm": 1.0410116122276798, "learning_rate": 8.197222739128911e-07, "loss": 0.2342, "step": 17305 }, { "epoch": 0.3008221940238836, "grad_norm": 1.2997244482228598, "learning_rate": 8.197006311484135e-07, "loss": 0.3595, "step": 17306 }, { "epoch": 0.30083957656138643, "grad_norm": 1.6060133677733337, "learning_rate": 8.196789873706318e-07, "loss": 0.342, "step": 17307 }, { "epoch": 0.30085695909888927, "grad_norm": 2.5106219216089367, "learning_rate": 8.196573425796146e-07, "loss": 0.5462, "step": 17308 }, { "epoch": 0.3008743416363921, "grad_norm": 1.741717034780264, "learning_rate": 8.196356967754306e-07, "loss": 0.5099, "step": 17309 }, { "epoch": 0.3008917241738949, "grad_norm": 1.7625704225893335, "learning_rate": 8.196140499581481e-07, "loss": 0.3775, "step": 17310 }, { "epoch": 0.30090910671139776, "grad_norm": 2.2205670438799383, "learning_rate": 8.195924021278357e-07, "loss": 0.5599, "step": 17311 }, { "epoch": 0.30092648924890053, "grad_norm": 2.066268309427526, "learning_rate": 8.195707532845624e-07, "loss": 0.3174, "step": 17312 }, { "epoch": 0.30094387178640336, "grad_norm": 1.859561905678552, "learning_rate": 8.195491034283967e-07, "loss": 0.2509, "step": 17313 }, { "epoch": 0.3009612543239062, "grad_norm": 2.0909247445057373, "learning_rate": 8.19527452559407e-07, "loss": 0.4205, "step": 17314 }, { "epoch": 0.300978636861409, "grad_norm": 2.825478730322528, "learning_rate": 8.195058006776621e-07, "loss": 0.3952, "step": 17315 }, { "epoch": 0.30099601939891185, "grad_norm": 1.4732260988932417, "learning_rate": 8.194841477832307e-07, "loss": 0.346, "step": 17316 }, { "epoch": 0.3010134019364147, "grad_norm": 2.0019916570729928, "learning_rate": 8.194624938761811e-07, "loss": 0.2037, "step": 17317 }, { "epoch": 0.3010307844739175, "grad_norm": 1.1115936397777324, "learning_rate": 8.194408389565821e-07, "loss": 0.279, "step": 17318 }, { "epoch": 0.30104816701142034, "grad_norm": 1.4082065430956328, "learning_rate": 8.194191830245025e-07, "loss": 0.3346, "step": 17319 }, { "epoch": 0.3010655495489232, "grad_norm": 1.843704154424298, "learning_rate": 8.19397526080011e-07, "loss": 0.3396, "step": 17320 }, { "epoch": 0.301082932086426, "grad_norm": 2.3735393215995617, "learning_rate": 8.193758681231757e-07, "loss": 0.504, "step": 17321 }, { "epoch": 0.3011003146239288, "grad_norm": 1.750580383736594, "learning_rate": 8.193542091540658e-07, "loss": 0.4092, "step": 17322 }, { "epoch": 0.3011176971614316, "grad_norm": 1.572808842217171, "learning_rate": 8.193325491727499e-07, "loss": 0.236, "step": 17323 }, { "epoch": 0.30113507969893444, "grad_norm": 1.657881771275469, "learning_rate": 8.193108881792963e-07, "loss": 0.4996, "step": 17324 }, { "epoch": 0.30115246223643727, "grad_norm": 1.6422972072576232, "learning_rate": 8.192892261737738e-07, "loss": 0.4438, "step": 17325 }, { "epoch": 0.3011698447739401, "grad_norm": 1.7582527364043061, "learning_rate": 8.192675631562513e-07, "loss": 0.3393, "step": 17326 }, { "epoch": 0.30118722731144293, "grad_norm": 1.8080879645916044, "learning_rate": 8.192458991267972e-07, "loss": 0.3916, "step": 17327 }, { "epoch": 0.30120460984894576, "grad_norm": 5.2239777037139365, "learning_rate": 8.192242340854804e-07, "loss": 0.4127, "step": 17328 }, { "epoch": 0.3012219923864486, "grad_norm": 2.4732719152232225, "learning_rate": 8.192025680323693e-07, "loss": 0.2916, "step": 17329 }, { "epoch": 0.3012393749239514, "grad_norm": 1.3564887472026588, "learning_rate": 8.191809009675326e-07, "loss": 0.3091, "step": 17330 }, { "epoch": 0.30125675746145425, "grad_norm": 1.6628961463680128, "learning_rate": 8.191592328910391e-07, "loss": 0.4137, "step": 17331 }, { "epoch": 0.301274139998957, "grad_norm": 1.0119812477763404, "learning_rate": 8.191375638029576e-07, "loss": 0.2216, "step": 17332 }, { "epoch": 0.30129152253645985, "grad_norm": 1.6222949349548481, "learning_rate": 8.191158937033564e-07, "loss": 0.2753, "step": 17333 }, { "epoch": 0.3013089050739627, "grad_norm": 1.5020668903898866, "learning_rate": 8.190942225923045e-07, "loss": 0.4249, "step": 17334 }, { "epoch": 0.3013262876114655, "grad_norm": 1.6138618193788492, "learning_rate": 8.190725504698705e-07, "loss": 0.4036, "step": 17335 }, { "epoch": 0.30134367014896835, "grad_norm": 3.266076210771373, "learning_rate": 8.190508773361231e-07, "loss": 0.4297, "step": 17336 }, { "epoch": 0.3013610526864712, "grad_norm": 1.331440926110825, "learning_rate": 8.19029203191131e-07, "loss": 0.4224, "step": 17337 }, { "epoch": 0.301378435223974, "grad_norm": 1.2392010939103613, "learning_rate": 8.190075280349629e-07, "loss": 0.2653, "step": 17338 }, { "epoch": 0.30139581776147684, "grad_norm": 1.5333643256816465, "learning_rate": 8.189858518676875e-07, "loss": 0.2763, "step": 17339 }, { "epoch": 0.30141320029897967, "grad_norm": 1.621734954790707, "learning_rate": 8.189641746893734e-07, "loss": 0.4842, "step": 17340 }, { "epoch": 0.3014305828364825, "grad_norm": 1.2547150873883288, "learning_rate": 8.189424965000893e-07, "loss": 0.4596, "step": 17341 }, { "epoch": 0.30144796537398527, "grad_norm": 1.650721180396545, "learning_rate": 8.189208172999041e-07, "loss": 0.3982, "step": 17342 }, { "epoch": 0.3014653479114881, "grad_norm": 1.6340192414111037, "learning_rate": 8.188991370888864e-07, "loss": 0.3398, "step": 17343 }, { "epoch": 0.30148273044899093, "grad_norm": 1.2905304456987907, "learning_rate": 8.188774558671048e-07, "loss": 0.2985, "step": 17344 }, { "epoch": 0.30150011298649376, "grad_norm": 1.7079100327344998, "learning_rate": 8.188557736346284e-07, "loss": 0.2681, "step": 17345 }, { "epoch": 0.3015174955239966, "grad_norm": 1.9015186052715143, "learning_rate": 8.188340903915256e-07, "loss": 0.2957, "step": 17346 }, { "epoch": 0.3015348780614994, "grad_norm": 1.6626038562003844, "learning_rate": 8.188124061378651e-07, "loss": 0.3675, "step": 17347 }, { "epoch": 0.30155226059900225, "grad_norm": 2.6748345304505734, "learning_rate": 8.187907208737157e-07, "loss": 0.3546, "step": 17348 }, { "epoch": 0.3015696431365051, "grad_norm": 1.504401468033724, "learning_rate": 8.187690345991462e-07, "loss": 0.3324, "step": 17349 }, { "epoch": 0.3015870256740079, "grad_norm": 1.7170351628874303, "learning_rate": 8.187473473142253e-07, "loss": 0.5188, "step": 17350 }, { "epoch": 0.30160440821151074, "grad_norm": 1.8979849162459177, "learning_rate": 8.187256590190218e-07, "loss": 0.2415, "step": 17351 }, { "epoch": 0.3016217907490135, "grad_norm": 2.435377970016718, "learning_rate": 8.187039697136042e-07, "loss": 0.6061, "step": 17352 }, { "epoch": 0.30163917328651635, "grad_norm": 1.502542810912357, "learning_rate": 8.186822793980416e-07, "loss": 0.2961, "step": 17353 }, { "epoch": 0.3016565558240192, "grad_norm": 1.4436104227771382, "learning_rate": 8.186605880724024e-07, "loss": 0.4918, "step": 17354 }, { "epoch": 0.301673938361522, "grad_norm": 2.672339409638482, "learning_rate": 8.186388957367557e-07, "loss": 0.1931, "step": 17355 }, { "epoch": 0.30169132089902484, "grad_norm": 1.4341265535446823, "learning_rate": 8.186172023911699e-07, "loss": 0.2467, "step": 17356 }, { "epoch": 0.30170870343652767, "grad_norm": 1.763265049882832, "learning_rate": 8.185955080357141e-07, "loss": 0.3824, "step": 17357 }, { "epoch": 0.3017260859740305, "grad_norm": 1.4257784217534137, "learning_rate": 8.185738126704568e-07, "loss": 0.1928, "step": 17358 }, { "epoch": 0.30174346851153333, "grad_norm": 1.441959760999034, "learning_rate": 8.185521162954667e-07, "loss": 0.3896, "step": 17359 }, { "epoch": 0.30176085104903616, "grad_norm": 1.382372656494153, "learning_rate": 8.185304189108128e-07, "loss": 0.2892, "step": 17360 }, { "epoch": 0.30177823358653894, "grad_norm": 1.6538149128225963, "learning_rate": 8.18508720516564e-07, "loss": 0.405, "step": 17361 }, { "epoch": 0.30179561612404177, "grad_norm": 2.907640404629921, "learning_rate": 8.184870211127888e-07, "loss": 0.385, "step": 17362 }, { "epoch": 0.3018129986615446, "grad_norm": 1.1593100271459835, "learning_rate": 8.184653206995559e-07, "loss": 0.2056, "step": 17363 }, { "epoch": 0.3018303811990474, "grad_norm": 1.9737831245251525, "learning_rate": 8.184436192769343e-07, "loss": 0.283, "step": 17364 }, { "epoch": 0.30184776373655026, "grad_norm": 1.7552712860272113, "learning_rate": 8.184219168449927e-07, "loss": 0.2904, "step": 17365 }, { "epoch": 0.3018651462740531, "grad_norm": 2.2328660708672827, "learning_rate": 8.184002134038e-07, "loss": 0.4828, "step": 17366 }, { "epoch": 0.3018825288115559, "grad_norm": 1.096400276604519, "learning_rate": 8.183785089534248e-07, "loss": 0.5526, "step": 17367 }, { "epoch": 0.30189991134905875, "grad_norm": 1.9166842212686441, "learning_rate": 8.183568034939359e-07, "loss": 0.2805, "step": 17368 }, { "epoch": 0.3019172938865616, "grad_norm": 1.5780423573490814, "learning_rate": 8.183350970254021e-07, "loss": 0.2574, "step": 17369 }, { "epoch": 0.3019346764240644, "grad_norm": 1.3101181714114856, "learning_rate": 8.183133895478925e-07, "loss": 0.2455, "step": 17370 }, { "epoch": 0.3019520589615672, "grad_norm": 2.187481649242006, "learning_rate": 8.182916810614755e-07, "loss": 0.243, "step": 17371 }, { "epoch": 0.30196944149907, "grad_norm": 1.593813802853195, "learning_rate": 8.182699715662202e-07, "loss": 0.3733, "step": 17372 }, { "epoch": 0.30198682403657284, "grad_norm": 2.406520096325903, "learning_rate": 8.182482610621952e-07, "loss": 0.3819, "step": 17373 }, { "epoch": 0.3020042065740757, "grad_norm": 1.8056402494957888, "learning_rate": 8.182265495494695e-07, "loss": 0.2755, "step": 17374 }, { "epoch": 0.3020215891115785, "grad_norm": 2.520440945168315, "learning_rate": 8.182048370281118e-07, "loss": 0.5745, "step": 17375 }, { "epoch": 0.30203897164908133, "grad_norm": 8.324833626741158, "learning_rate": 8.181831234981908e-07, "loss": 0.6405, "step": 17376 }, { "epoch": 0.30205635418658416, "grad_norm": 1.77288618852493, "learning_rate": 8.181614089597757e-07, "loss": 0.4496, "step": 17377 }, { "epoch": 0.302073736724087, "grad_norm": 1.0774313784168301, "learning_rate": 8.18139693412935e-07, "loss": 0.1418, "step": 17378 }, { "epoch": 0.3020911192615898, "grad_norm": 6.617402235819881, "learning_rate": 8.181179768577376e-07, "loss": 0.3702, "step": 17379 }, { "epoch": 0.30210850179909265, "grad_norm": 2.981453341392496, "learning_rate": 8.180962592942523e-07, "loss": 0.5271, "step": 17380 }, { "epoch": 0.30212588433659543, "grad_norm": 1.7177785722155863, "learning_rate": 8.18074540722548e-07, "loss": 0.297, "step": 17381 }, { "epoch": 0.30214326687409826, "grad_norm": 1.8002403378312848, "learning_rate": 8.180528211426935e-07, "loss": 0.4599, "step": 17382 }, { "epoch": 0.3021606494116011, "grad_norm": 1.7233283491993976, "learning_rate": 8.180311005547578e-07, "loss": 0.3853, "step": 17383 }, { "epoch": 0.3021780319491039, "grad_norm": 1.4955067146255148, "learning_rate": 8.180093789588094e-07, "loss": 0.3521, "step": 17384 }, { "epoch": 0.30219541448660675, "grad_norm": 2.3362086790256416, "learning_rate": 8.179876563549174e-07, "loss": 0.2578, "step": 17385 }, { "epoch": 0.3022127970241096, "grad_norm": 1.7337744258164531, "learning_rate": 8.179659327431507e-07, "loss": 0.3207, "step": 17386 }, { "epoch": 0.3022301795616124, "grad_norm": 2.3569478552336545, "learning_rate": 8.179442081235779e-07, "loss": 0.3216, "step": 17387 }, { "epoch": 0.30224756209911524, "grad_norm": 2.646192111074962, "learning_rate": 8.179224824962681e-07, "loss": 0.3253, "step": 17388 }, { "epoch": 0.30226494463661807, "grad_norm": 1.377412359390321, "learning_rate": 8.179007558612901e-07, "loss": 0.276, "step": 17389 }, { "epoch": 0.3022823271741209, "grad_norm": 2.1568074926839267, "learning_rate": 8.178790282187127e-07, "loss": 0.4001, "step": 17390 }, { "epoch": 0.3022997097116237, "grad_norm": 1.942285867720172, "learning_rate": 8.178572995686048e-07, "loss": 0.3145, "step": 17391 }, { "epoch": 0.3023170922491265, "grad_norm": 1.8117918054242834, "learning_rate": 8.178355699110353e-07, "loss": 0.1786, "step": 17392 }, { "epoch": 0.30233447478662934, "grad_norm": 1.8773235312941505, "learning_rate": 8.178138392460731e-07, "loss": 0.5475, "step": 17393 }, { "epoch": 0.30235185732413217, "grad_norm": 2.2218502770001023, "learning_rate": 8.177921075737868e-07, "loss": 0.2588, "step": 17394 }, { "epoch": 0.302369239861635, "grad_norm": 1.9197409448596217, "learning_rate": 8.177703748942457e-07, "loss": 0.2823, "step": 17395 }, { "epoch": 0.3023866223991378, "grad_norm": 1.719335123820871, "learning_rate": 8.177486412075184e-07, "loss": 0.4616, "step": 17396 }, { "epoch": 0.30240400493664066, "grad_norm": 1.691257924897149, "learning_rate": 8.177269065136739e-07, "loss": 0.6795, "step": 17397 }, { "epoch": 0.3024213874741435, "grad_norm": 2.528233869827739, "learning_rate": 8.177051708127811e-07, "loss": 0.4475, "step": 17398 }, { "epoch": 0.3024387700116463, "grad_norm": 1.2090719819176847, "learning_rate": 8.176834341049088e-07, "loss": 0.3781, "step": 17399 }, { "epoch": 0.30245615254914915, "grad_norm": 1.1372559118231949, "learning_rate": 8.176616963901259e-07, "loss": 0.2056, "step": 17400 }, { "epoch": 0.3024735350866519, "grad_norm": 1.7237519432534278, "learning_rate": 8.176399576685014e-07, "loss": 0.269, "step": 17401 }, { "epoch": 0.30249091762415475, "grad_norm": 2.5074226985673724, "learning_rate": 8.176182179401042e-07, "loss": 0.4476, "step": 17402 }, { "epoch": 0.3025083001616576, "grad_norm": 1.6186321515996376, "learning_rate": 8.17596477205003e-07, "loss": 0.3349, "step": 17403 }, { "epoch": 0.3025256826991604, "grad_norm": 1.6680035580211638, "learning_rate": 8.175747354632669e-07, "loss": 0.2349, "step": 17404 }, { "epoch": 0.30254306523666324, "grad_norm": 1.3542525235580063, "learning_rate": 8.175529927149649e-07, "loss": 0.3141, "step": 17405 }, { "epoch": 0.3025604477741661, "grad_norm": 1.41286885203675, "learning_rate": 8.175312489601658e-07, "loss": 0.2935, "step": 17406 }, { "epoch": 0.3025778303116689, "grad_norm": 1.5292245396404454, "learning_rate": 8.175095041989384e-07, "loss": 0.2825, "step": 17407 }, { "epoch": 0.30259521284917174, "grad_norm": 1.9147242129154833, "learning_rate": 8.174877584313518e-07, "loss": 0.3724, "step": 17408 }, { "epoch": 0.30261259538667457, "grad_norm": 1.4998061662417752, "learning_rate": 8.174660116574748e-07, "loss": 0.2212, "step": 17409 }, { "epoch": 0.3026299779241774, "grad_norm": 1.8027324139708567, "learning_rate": 8.174442638773762e-07, "loss": 0.2759, "step": 17410 }, { "epoch": 0.30264736046168017, "grad_norm": 2.2126401086008345, "learning_rate": 8.174225150911253e-07, "loss": 0.4435, "step": 17411 }, { "epoch": 0.302664742999183, "grad_norm": 1.6253588204872134, "learning_rate": 8.174007652987909e-07, "loss": 0.3666, "step": 17412 }, { "epoch": 0.30268212553668583, "grad_norm": 2.366281535067592, "learning_rate": 8.173790145004418e-07, "loss": 0.3895, "step": 17413 }, { "epoch": 0.30269950807418866, "grad_norm": 1.507709134791391, "learning_rate": 8.173572626961469e-07, "loss": 0.3821, "step": 17414 }, { "epoch": 0.3027168906116915, "grad_norm": 4.522994859734877, "learning_rate": 8.173355098859753e-07, "loss": 0.4284, "step": 17415 }, { "epoch": 0.3027342731491943, "grad_norm": 2.0383961177777006, "learning_rate": 8.173137560699961e-07, "loss": 0.3342, "step": 17416 }, { "epoch": 0.30275165568669715, "grad_norm": 1.6194901085888231, "learning_rate": 8.172920012482778e-07, "loss": 0.3199, "step": 17417 }, { "epoch": 0.3027690382242, "grad_norm": 2.124142122445181, "learning_rate": 8.172702454208898e-07, "loss": 0.2193, "step": 17418 }, { "epoch": 0.3027864207617028, "grad_norm": 2.3463055213222255, "learning_rate": 8.172484885879007e-07, "loss": 0.3447, "step": 17419 }, { "epoch": 0.30280380329920564, "grad_norm": 2.152242868110952, "learning_rate": 8.172267307493798e-07, "loss": 0.5877, "step": 17420 }, { "epoch": 0.3028211858367084, "grad_norm": 2.73071519934409, "learning_rate": 8.172049719053958e-07, "loss": 0.408, "step": 17421 }, { "epoch": 0.30283856837421125, "grad_norm": 1.7208538576104664, "learning_rate": 8.171832120560178e-07, "loss": 0.3092, "step": 17422 }, { "epoch": 0.3028559509117141, "grad_norm": 1.5659219896798862, "learning_rate": 8.171614512013145e-07, "loss": 0.3175, "step": 17423 }, { "epoch": 0.3028733334492169, "grad_norm": 2.494802304785384, "learning_rate": 8.171396893413554e-07, "loss": 0.4919, "step": 17424 }, { "epoch": 0.30289071598671974, "grad_norm": 1.6408070778855655, "learning_rate": 8.171179264762089e-07, "loss": 0.2885, "step": 17425 }, { "epoch": 0.30290809852422257, "grad_norm": 1.3496590444603622, "learning_rate": 8.170961626059445e-07, "loss": 0.2949, "step": 17426 }, { "epoch": 0.3029254810617254, "grad_norm": 2.013996507270413, "learning_rate": 8.170743977306307e-07, "loss": 0.4299, "step": 17427 }, { "epoch": 0.30294286359922823, "grad_norm": 1.8201314685738332, "learning_rate": 8.170526318503368e-07, "loss": 0.2989, "step": 17428 }, { "epoch": 0.30296024613673106, "grad_norm": 2.2471141582376903, "learning_rate": 8.170308649651316e-07, "loss": 0.3275, "step": 17429 }, { "epoch": 0.3029776286742339, "grad_norm": 2.1149938651561233, "learning_rate": 8.170090970750844e-07, "loss": 0.318, "step": 17430 }, { "epoch": 0.30299501121173666, "grad_norm": 2.7525970689678165, "learning_rate": 8.169873281802639e-07, "loss": 0.6784, "step": 17431 }, { "epoch": 0.3030123937492395, "grad_norm": 1.8449260922094501, "learning_rate": 8.169655582807391e-07, "loss": 0.5084, "step": 17432 }, { "epoch": 0.3030297762867423, "grad_norm": 1.5653729453416816, "learning_rate": 8.169437873765792e-07, "loss": 0.4312, "step": 17433 }, { "epoch": 0.30304715882424516, "grad_norm": 1.6114392232500088, "learning_rate": 8.169220154678531e-07, "loss": 0.3715, "step": 17434 }, { "epoch": 0.303064541361748, "grad_norm": 1.8057754640949353, "learning_rate": 8.169002425546297e-07, "loss": 0.3611, "step": 17435 }, { "epoch": 0.3030819238992508, "grad_norm": 1.6766643991460541, "learning_rate": 8.168784686369781e-07, "loss": 0.2464, "step": 17436 }, { "epoch": 0.30309930643675365, "grad_norm": 2.1390125705286684, "learning_rate": 8.168566937149675e-07, "loss": 0.453, "step": 17437 }, { "epoch": 0.3031166889742565, "grad_norm": 0.8614523608525219, "learning_rate": 8.168349177886664e-07, "loss": 0.2301, "step": 17438 }, { "epoch": 0.3031340715117593, "grad_norm": 2.4514044462591857, "learning_rate": 8.168131408581443e-07, "loss": 0.4444, "step": 17439 }, { "epoch": 0.30315145404926214, "grad_norm": 3.2275909184307823, "learning_rate": 8.167913629234702e-07, "loss": 0.2758, "step": 17440 }, { "epoch": 0.3031688365867649, "grad_norm": 0.8726017767737854, "learning_rate": 8.167695839847128e-07, "loss": 0.2314, "step": 17441 }, { "epoch": 0.30318621912426774, "grad_norm": 1.5792918314159652, "learning_rate": 8.167478040419415e-07, "loss": 0.3236, "step": 17442 }, { "epoch": 0.30320360166177057, "grad_norm": 1.1850483600501405, "learning_rate": 8.16726023095225e-07, "loss": 0.2874, "step": 17443 }, { "epoch": 0.3032209841992734, "grad_norm": 1.4601440605458793, "learning_rate": 8.167042411446328e-07, "loss": 0.2772, "step": 17444 }, { "epoch": 0.30323836673677623, "grad_norm": 1.3845646378414838, "learning_rate": 8.166824581902334e-07, "loss": 0.3777, "step": 17445 }, { "epoch": 0.30325574927427906, "grad_norm": 1.1746407501932796, "learning_rate": 8.16660674232096e-07, "loss": 0.4383, "step": 17446 }, { "epoch": 0.3032731318117819, "grad_norm": 2.6838949663038614, "learning_rate": 8.1663888927029e-07, "loss": 0.4586, "step": 17447 }, { "epoch": 0.3032905143492847, "grad_norm": 1.2811769266165396, "learning_rate": 8.166171033048838e-07, "loss": 0.5495, "step": 17448 }, { "epoch": 0.30330789688678755, "grad_norm": 2.0309899527522712, "learning_rate": 8.165953163359472e-07, "loss": 0.2654, "step": 17449 }, { "epoch": 0.3033252794242904, "grad_norm": 1.1949082257162091, "learning_rate": 8.165735283635486e-07, "loss": 0.2941, "step": 17450 }, { "epoch": 0.30334266196179316, "grad_norm": 1.4350377566990202, "learning_rate": 8.165517393877576e-07, "loss": 0.2118, "step": 17451 }, { "epoch": 0.303360044499296, "grad_norm": 3.3419858430138727, "learning_rate": 8.165299494086428e-07, "loss": 0.412, "step": 17452 }, { "epoch": 0.3033774270367988, "grad_norm": 1.6948828641532383, "learning_rate": 8.165081584262736e-07, "loss": 0.2891, "step": 17453 }, { "epoch": 0.30339480957430165, "grad_norm": 1.3601916210981453, "learning_rate": 8.164863664407188e-07, "loss": 0.3649, "step": 17454 }, { "epoch": 0.3034121921118045, "grad_norm": 1.3856744831840113, "learning_rate": 8.164645734520477e-07, "loss": 0.3088, "step": 17455 }, { "epoch": 0.3034295746493073, "grad_norm": 1.4391679632774241, "learning_rate": 8.164427794603292e-07, "loss": 0.258, "step": 17456 }, { "epoch": 0.30344695718681014, "grad_norm": 1.7309165997628522, "learning_rate": 8.164209844656325e-07, "loss": 0.4629, "step": 17457 }, { "epoch": 0.30346433972431297, "grad_norm": 1.629365210488776, "learning_rate": 8.163991884680266e-07, "loss": 0.3197, "step": 17458 }, { "epoch": 0.3034817222618158, "grad_norm": 1.5010307664203142, "learning_rate": 8.163773914675806e-07, "loss": 0.2778, "step": 17459 }, { "epoch": 0.30349910479931863, "grad_norm": 2.3661604391269817, "learning_rate": 8.163555934643636e-07, "loss": 0.2898, "step": 17460 }, { "epoch": 0.3035164873368214, "grad_norm": 1.8115883026986435, "learning_rate": 8.163337944584446e-07, "loss": 0.3137, "step": 17461 }, { "epoch": 0.30353386987432424, "grad_norm": 0.7727939423591558, "learning_rate": 8.16311994449893e-07, "loss": 0.2906, "step": 17462 }, { "epoch": 0.30355125241182707, "grad_norm": 1.7204410466723115, "learning_rate": 8.162901934387776e-07, "loss": 0.5441, "step": 17463 }, { "epoch": 0.3035686349493299, "grad_norm": 1.3147604003147402, "learning_rate": 8.162683914251675e-07, "loss": 0.3051, "step": 17464 }, { "epoch": 0.3035860174868327, "grad_norm": 1.2190283802374293, "learning_rate": 8.16246588409132e-07, "loss": 0.1774, "step": 17465 }, { "epoch": 0.30360340002433556, "grad_norm": 1.0092255642710084, "learning_rate": 8.162247843907399e-07, "loss": 0.27, "step": 17466 }, { "epoch": 0.3036207825618384, "grad_norm": 2.9487127768285117, "learning_rate": 8.162029793700607e-07, "loss": 0.2841, "step": 17467 }, { "epoch": 0.3036381650993412, "grad_norm": 2.0345966392905823, "learning_rate": 8.161811733471631e-07, "loss": 0.2894, "step": 17468 }, { "epoch": 0.30365554763684405, "grad_norm": 1.5475742607374547, "learning_rate": 8.161593663221167e-07, "loss": 0.4542, "step": 17469 }, { "epoch": 0.3036729301743469, "grad_norm": 2.517181991856236, "learning_rate": 8.161375582949901e-07, "loss": 0.3516, "step": 17470 }, { "epoch": 0.30369031271184965, "grad_norm": 1.1152414600039704, "learning_rate": 8.161157492658528e-07, "loss": 0.3892, "step": 17471 }, { "epoch": 0.3037076952493525, "grad_norm": 1.6110611151022125, "learning_rate": 8.160939392347736e-07, "loss": 0.3552, "step": 17472 }, { "epoch": 0.3037250777868553, "grad_norm": 1.7662902810909853, "learning_rate": 8.16072128201822e-07, "loss": 0.4324, "step": 17473 }, { "epoch": 0.30374246032435814, "grad_norm": 2.2838738734017334, "learning_rate": 8.160503161670669e-07, "loss": 0.2244, "step": 17474 }, { "epoch": 0.303759842861861, "grad_norm": 1.8010935754857027, "learning_rate": 8.160285031305776e-07, "loss": 0.366, "step": 17475 }, { "epoch": 0.3037772253993638, "grad_norm": 3.143026424219717, "learning_rate": 8.160066890924229e-07, "loss": 0.2563, "step": 17476 }, { "epoch": 0.30379460793686663, "grad_norm": 1.7358784725889698, "learning_rate": 8.159848740526723e-07, "loss": 0.3299, "step": 17477 }, { "epoch": 0.30381199047436946, "grad_norm": 1.689261151303331, "learning_rate": 8.159630580113947e-07, "loss": 0.2452, "step": 17478 }, { "epoch": 0.3038293730118723, "grad_norm": 2.983778479872127, "learning_rate": 8.159412409686593e-07, "loss": 0.4944, "step": 17479 }, { "epoch": 0.3038467555493751, "grad_norm": 1.1715286883334397, "learning_rate": 8.159194229245354e-07, "loss": 0.2313, "step": 17480 }, { "epoch": 0.3038641380868779, "grad_norm": 2.44539004440575, "learning_rate": 8.158976038790922e-07, "loss": 0.2726, "step": 17481 }, { "epoch": 0.30388152062438073, "grad_norm": 1.2795663345354369, "learning_rate": 8.158757838323984e-07, "loss": 0.2866, "step": 17482 }, { "epoch": 0.30389890316188356, "grad_norm": 1.3638426429325592, "learning_rate": 8.158539627845236e-07, "loss": 0.2067, "step": 17483 }, { "epoch": 0.3039162856993864, "grad_norm": 2.2197941850178666, "learning_rate": 8.158321407355368e-07, "loss": 0.2179, "step": 17484 }, { "epoch": 0.3039336682368892, "grad_norm": 1.0513871287099326, "learning_rate": 8.158103176855073e-07, "loss": 0.2611, "step": 17485 }, { "epoch": 0.30395105077439205, "grad_norm": 1.4790149656731946, "learning_rate": 8.157884936345041e-07, "loss": 0.3399, "step": 17486 }, { "epoch": 0.3039684333118949, "grad_norm": 1.312987404001043, "learning_rate": 8.157666685825966e-07, "loss": 0.3928, "step": 17487 }, { "epoch": 0.3039858158493977, "grad_norm": 1.667459172860381, "learning_rate": 8.157448425298535e-07, "loss": 0.2481, "step": 17488 }, { "epoch": 0.30400319838690054, "grad_norm": 1.5015073182530176, "learning_rate": 8.157230154763444e-07, "loss": 0.2525, "step": 17489 }, { "epoch": 0.30402058092440337, "grad_norm": 1.4204715424818286, "learning_rate": 8.157011874221384e-07, "loss": 0.2112, "step": 17490 }, { "epoch": 0.30403796346190615, "grad_norm": 1.5715479453806651, "learning_rate": 8.156793583673047e-07, "loss": 0.2338, "step": 17491 }, { "epoch": 0.304055345999409, "grad_norm": 1.6460252026814697, "learning_rate": 8.156575283119124e-07, "loss": 0.2417, "step": 17492 }, { "epoch": 0.3040727285369118, "grad_norm": 1.3454667778038154, "learning_rate": 8.156356972560309e-07, "loss": 0.3758, "step": 17493 }, { "epoch": 0.30409011107441464, "grad_norm": 2.159677702115238, "learning_rate": 8.156138651997291e-07, "loss": 0.2625, "step": 17494 }, { "epoch": 0.30410749361191747, "grad_norm": 2.4031255439968255, "learning_rate": 8.155920321430762e-07, "loss": 0.4116, "step": 17495 }, { "epoch": 0.3041248761494203, "grad_norm": 1.5873121196970645, "learning_rate": 8.155701980861416e-07, "loss": 0.1942, "step": 17496 }, { "epoch": 0.30414225868692313, "grad_norm": 2.6018020427812383, "learning_rate": 8.155483630289945e-07, "loss": 0.5082, "step": 17497 }, { "epoch": 0.30415964122442596, "grad_norm": 2.7618138352909267, "learning_rate": 8.155265269717041e-07, "loss": 0.4288, "step": 17498 }, { "epoch": 0.3041770237619288, "grad_norm": 1.2531876780839666, "learning_rate": 8.155046899143394e-07, "loss": 0.3521, "step": 17499 }, { "epoch": 0.30419440629943156, "grad_norm": 1.8775236254125274, "learning_rate": 8.154828518569698e-07, "loss": 0.2501, "step": 17500 }, { "epoch": 0.3042117888369344, "grad_norm": 1.7097828862168187, "learning_rate": 8.154610127996646e-07, "loss": 0.2645, "step": 17501 }, { "epoch": 0.3042291713744372, "grad_norm": 4.0192173030498575, "learning_rate": 8.154391727424927e-07, "loss": 0.4874, "step": 17502 }, { "epoch": 0.30424655391194005, "grad_norm": 1.1085378366039926, "learning_rate": 8.154173316855237e-07, "loss": 0.2757, "step": 17503 }, { "epoch": 0.3042639364494429, "grad_norm": 2.880123650049361, "learning_rate": 8.153954896288267e-07, "loss": 0.5754, "step": 17504 }, { "epoch": 0.3042813189869457, "grad_norm": 1.0725039358785913, "learning_rate": 8.153736465724707e-07, "loss": 0.6223, "step": 17505 }, { "epoch": 0.30429870152444854, "grad_norm": 1.5043472356099588, "learning_rate": 8.153518025165252e-07, "loss": 0.3205, "step": 17506 }, { "epoch": 0.3043160840619514, "grad_norm": 1.6120512181856355, "learning_rate": 8.153299574610593e-07, "loss": 0.3941, "step": 17507 }, { "epoch": 0.3043334665994542, "grad_norm": 1.9589523115129497, "learning_rate": 8.153081114061423e-07, "loss": 0.267, "step": 17508 }, { "epoch": 0.30435084913695704, "grad_norm": 1.9571350009419435, "learning_rate": 8.152862643518434e-07, "loss": 0.297, "step": 17509 }, { "epoch": 0.3043682316744598, "grad_norm": 2.4835618362630045, "learning_rate": 8.15264416298232e-07, "loss": 0.309, "step": 17510 }, { "epoch": 0.30438561421196264, "grad_norm": 1.2363870410015487, "learning_rate": 8.152425672453771e-07, "loss": 0.2635, "step": 17511 }, { "epoch": 0.30440299674946547, "grad_norm": 2.5962258082491685, "learning_rate": 8.152207171933481e-07, "loss": 0.5709, "step": 17512 }, { "epoch": 0.3044203792869683, "grad_norm": 3.943900126001599, "learning_rate": 8.151988661422142e-07, "loss": 0.3683, "step": 17513 }, { "epoch": 0.30443776182447113, "grad_norm": 1.8126069657177282, "learning_rate": 8.151770140920446e-07, "loss": 0.5581, "step": 17514 }, { "epoch": 0.30445514436197396, "grad_norm": 1.1610999896991248, "learning_rate": 8.151551610429088e-07, "loss": 0.2277, "step": 17515 }, { "epoch": 0.3044725268994768, "grad_norm": 1.996757442932722, "learning_rate": 8.151333069948759e-07, "loss": 0.2927, "step": 17516 }, { "epoch": 0.3044899094369796, "grad_norm": 1.5586633810654014, "learning_rate": 8.151114519480151e-07, "loss": 0.273, "step": 17517 }, { "epoch": 0.30450729197448245, "grad_norm": 1.9277516215369654, "learning_rate": 8.150895959023958e-07, "loss": 0.1865, "step": 17518 }, { "epoch": 0.3045246745119853, "grad_norm": 1.6398762435574634, "learning_rate": 8.150677388580874e-07, "loss": 0.3312, "step": 17519 }, { "epoch": 0.30454205704948806, "grad_norm": 1.2535070350140989, "learning_rate": 8.150458808151587e-07, "loss": 0.3775, "step": 17520 }, { "epoch": 0.3045594395869909, "grad_norm": 1.273615513448383, "learning_rate": 8.150240217736794e-07, "loss": 0.3343, "step": 17521 }, { "epoch": 0.3045768221244937, "grad_norm": 1.499297772621557, "learning_rate": 8.150021617337186e-07, "loss": 0.2796, "step": 17522 }, { "epoch": 0.30459420466199655, "grad_norm": 2.3308608906715516, "learning_rate": 8.149803006953457e-07, "loss": 0.4247, "step": 17523 }, { "epoch": 0.3046115871994994, "grad_norm": 1.3298242617680498, "learning_rate": 8.149584386586299e-07, "loss": 0.393, "step": 17524 }, { "epoch": 0.3046289697370022, "grad_norm": 1.3375718001618964, "learning_rate": 8.149365756236406e-07, "loss": 0.346, "step": 17525 }, { "epoch": 0.30464635227450504, "grad_norm": 3.555521914268149, "learning_rate": 8.149147115904471e-07, "loss": 0.484, "step": 17526 }, { "epoch": 0.30466373481200787, "grad_norm": 2.0545518541157093, "learning_rate": 8.148928465591185e-07, "loss": 0.4488, "step": 17527 }, { "epoch": 0.3046811173495107, "grad_norm": 1.7775165935865798, "learning_rate": 8.148709805297242e-07, "loss": 0.3388, "step": 17528 }, { "epoch": 0.30469849988701353, "grad_norm": 4.115723790873331, "learning_rate": 8.148491135023336e-07, "loss": 0.3536, "step": 17529 }, { "epoch": 0.3047158824245163, "grad_norm": 1.6705987903848032, "learning_rate": 8.148272454770159e-07, "loss": 0.2657, "step": 17530 }, { "epoch": 0.30473326496201913, "grad_norm": 1.8755740467490076, "learning_rate": 8.148053764538405e-07, "loss": 0.2919, "step": 17531 }, { "epoch": 0.30475064749952196, "grad_norm": 1.6710704618895367, "learning_rate": 8.147835064328765e-07, "loss": 0.26, "step": 17532 }, { "epoch": 0.3047680300370248, "grad_norm": 2.7391725242190703, "learning_rate": 8.147616354141935e-07, "loss": 0.4329, "step": 17533 }, { "epoch": 0.3047854125745276, "grad_norm": 1.486656368973733, "learning_rate": 8.147397633978607e-07, "loss": 0.2717, "step": 17534 }, { "epoch": 0.30480279511203046, "grad_norm": 1.0729549485191725, "learning_rate": 8.147178903839475e-07, "loss": 0.3488, "step": 17535 }, { "epoch": 0.3048201776495333, "grad_norm": 2.3475188394879867, "learning_rate": 8.146960163725231e-07, "loss": 0.4551, "step": 17536 }, { "epoch": 0.3048375601870361, "grad_norm": 2.387951280186894, "learning_rate": 8.146741413636569e-07, "loss": 0.445, "step": 17537 }, { "epoch": 0.30485494272453895, "grad_norm": 1.1726522651962434, "learning_rate": 8.146522653574183e-07, "loss": 0.2141, "step": 17538 }, { "epoch": 0.3048723252620418, "grad_norm": 2.0421857458977146, "learning_rate": 8.146303883538765e-07, "loss": 0.5485, "step": 17539 }, { "epoch": 0.30488970779954455, "grad_norm": 2.087144988331542, "learning_rate": 8.146085103531009e-07, "loss": 0.2434, "step": 17540 }, { "epoch": 0.3049070903370474, "grad_norm": 1.2413739027730377, "learning_rate": 8.145866313551608e-07, "loss": 0.5624, "step": 17541 }, { "epoch": 0.3049244728745502, "grad_norm": 2.3626314611122994, "learning_rate": 8.145647513601255e-07, "loss": 0.4444, "step": 17542 }, { "epoch": 0.30494185541205304, "grad_norm": 2.147155759400617, "learning_rate": 8.145428703680646e-07, "loss": 0.4489, "step": 17543 }, { "epoch": 0.3049592379495559, "grad_norm": 1.5730193273980357, "learning_rate": 8.145209883790471e-07, "loss": 0.3059, "step": 17544 }, { "epoch": 0.3049766204870587, "grad_norm": 2.138005370838884, "learning_rate": 8.144991053931427e-07, "loss": 0.392, "step": 17545 }, { "epoch": 0.30499400302456153, "grad_norm": 2.9189099629709423, "learning_rate": 8.144772214104207e-07, "loss": 0.3508, "step": 17546 }, { "epoch": 0.30501138556206436, "grad_norm": 3.20260986385519, "learning_rate": 8.144553364309503e-07, "loss": 0.6516, "step": 17547 }, { "epoch": 0.3050287680995672, "grad_norm": 1.8867594073057028, "learning_rate": 8.144334504548007e-07, "loss": 0.3382, "step": 17548 }, { "epoch": 0.30504615063707, "grad_norm": 3.563999508193992, "learning_rate": 8.144115634820418e-07, "loss": 0.3948, "step": 17549 }, { "epoch": 0.3050635331745728, "grad_norm": 1.3706730858690899, "learning_rate": 8.143896755127426e-07, "loss": 0.4587, "step": 17550 }, { "epoch": 0.30508091571207563, "grad_norm": 4.224522072939263, "learning_rate": 8.143677865469725e-07, "loss": 0.4126, "step": 17551 }, { "epoch": 0.30509829824957846, "grad_norm": 2.6337059203005664, "learning_rate": 8.143458965848009e-07, "loss": 0.2295, "step": 17552 }, { "epoch": 0.3051156807870813, "grad_norm": 1.7624516816143252, "learning_rate": 8.143240056262972e-07, "loss": 0.342, "step": 17553 }, { "epoch": 0.3051330633245841, "grad_norm": 2.2355037536352276, "learning_rate": 8.143021136715309e-07, "loss": 0.3018, "step": 17554 }, { "epoch": 0.30515044586208695, "grad_norm": 1.4312381846119662, "learning_rate": 8.142802207205712e-07, "loss": 0.1893, "step": 17555 }, { "epoch": 0.3051678283995898, "grad_norm": 2.1596049373635506, "learning_rate": 8.142583267734875e-07, "loss": 0.3645, "step": 17556 }, { "epoch": 0.3051852109370926, "grad_norm": 1.118423796102977, "learning_rate": 8.142364318303494e-07, "loss": 0.365, "step": 17557 }, { "epoch": 0.30520259347459544, "grad_norm": 1.5772376625911928, "learning_rate": 8.14214535891226e-07, "loss": 0.3982, "step": 17558 }, { "epoch": 0.30521997601209827, "grad_norm": 1.47237975771906, "learning_rate": 8.14192638956187e-07, "loss": 0.3183, "step": 17559 }, { "epoch": 0.30523735854960105, "grad_norm": 1.4118221252107082, "learning_rate": 8.141707410253015e-07, "loss": 0.2363, "step": 17560 }, { "epoch": 0.3052547410871039, "grad_norm": 2.445595507379666, "learning_rate": 8.141488420986393e-07, "loss": 0.3762, "step": 17561 }, { "epoch": 0.3052721236246067, "grad_norm": 1.9685399527070258, "learning_rate": 8.141269421762694e-07, "loss": 0.375, "step": 17562 }, { "epoch": 0.30528950616210954, "grad_norm": 1.7960110169825643, "learning_rate": 8.141050412582615e-07, "loss": 0.4216, "step": 17563 }, { "epoch": 0.30530688869961237, "grad_norm": 6.343238425049955, "learning_rate": 8.140831393446849e-07, "loss": 0.3859, "step": 17564 }, { "epoch": 0.3053242712371152, "grad_norm": 2.7982783567975167, "learning_rate": 8.14061236435609e-07, "loss": 0.3894, "step": 17565 }, { "epoch": 0.305341653774618, "grad_norm": 1.895567657590476, "learning_rate": 8.140393325311032e-07, "loss": 0.3065, "step": 17566 }, { "epoch": 0.30535903631212086, "grad_norm": 2.1202953290463915, "learning_rate": 8.140174276312371e-07, "loss": 0.4111, "step": 17567 }, { "epoch": 0.3053764188496237, "grad_norm": 1.6484514154183516, "learning_rate": 8.139955217360799e-07, "loss": 0.3288, "step": 17568 }, { "epoch": 0.3053938013871265, "grad_norm": 1.8936822063670624, "learning_rate": 8.13973614845701e-07, "loss": 0.2134, "step": 17569 }, { "epoch": 0.3054111839246293, "grad_norm": 1.559997869355503, "learning_rate": 8.139517069601702e-07, "loss": 0.4847, "step": 17570 }, { "epoch": 0.3054285664621321, "grad_norm": 2.2587577639345167, "learning_rate": 8.139297980795566e-07, "loss": 0.3554, "step": 17571 }, { "epoch": 0.30544594899963495, "grad_norm": 0.9323660293761139, "learning_rate": 8.139078882039298e-07, "loss": 0.3164, "step": 17572 }, { "epoch": 0.3054633315371378, "grad_norm": 1.734970271811862, "learning_rate": 8.13885977333359e-07, "loss": 0.2067, "step": 17573 }, { "epoch": 0.3054807140746406, "grad_norm": 1.4761595786565083, "learning_rate": 8.138640654679141e-07, "loss": 0.583, "step": 17574 }, { "epoch": 0.30549809661214344, "grad_norm": 1.0142552217098688, "learning_rate": 8.138421526076643e-07, "loss": 0.4118, "step": 17575 }, { "epoch": 0.3055154791496463, "grad_norm": 1.6715640424148144, "learning_rate": 8.13820238752679e-07, "loss": 0.2918, "step": 17576 }, { "epoch": 0.3055328616871491, "grad_norm": 1.5883842861747681, "learning_rate": 8.137983239030275e-07, "loss": 0.3126, "step": 17577 }, { "epoch": 0.30555024422465193, "grad_norm": 2.2732739155307, "learning_rate": 8.137764080587796e-07, "loss": 0.5921, "step": 17578 }, { "epoch": 0.30556762676215476, "grad_norm": 1.799050384168052, "learning_rate": 8.137544912200047e-07, "loss": 0.2003, "step": 17579 }, { "epoch": 0.30558500929965754, "grad_norm": 1.1755350366084711, "learning_rate": 8.13732573386772e-07, "loss": 0.2693, "step": 17580 }, { "epoch": 0.30560239183716037, "grad_norm": 1.4904609490425818, "learning_rate": 8.137106545591513e-07, "loss": 0.2255, "step": 17581 }, { "epoch": 0.3056197743746632, "grad_norm": 1.7999635143318966, "learning_rate": 8.136887347372119e-07, "loss": 0.4866, "step": 17582 }, { "epoch": 0.30563715691216603, "grad_norm": 1.6764150854801085, "learning_rate": 8.136668139210233e-07, "loss": 0.3067, "step": 17583 }, { "epoch": 0.30565453944966886, "grad_norm": 1.7644499923777348, "learning_rate": 8.136448921106549e-07, "loss": 0.3016, "step": 17584 }, { "epoch": 0.3056719219871717, "grad_norm": 1.65286556838711, "learning_rate": 8.136229693061765e-07, "loss": 0.3952, "step": 17585 }, { "epoch": 0.3056893045246745, "grad_norm": 2.0646605083494163, "learning_rate": 8.136010455076571e-07, "loss": 0.1829, "step": 17586 }, { "epoch": 0.30570668706217735, "grad_norm": 2.702859147401581, "learning_rate": 8.135791207151665e-07, "loss": 0.4677, "step": 17587 }, { "epoch": 0.3057240695996802, "grad_norm": 2.287406230403166, "learning_rate": 8.135571949287742e-07, "loss": 0.2989, "step": 17588 }, { "epoch": 0.305741452137183, "grad_norm": 1.7799079125743136, "learning_rate": 8.135352681485497e-07, "loss": 0.3614, "step": 17589 }, { "epoch": 0.3057588346746858, "grad_norm": 1.723633329011506, "learning_rate": 8.135133403745624e-07, "loss": 0.3095, "step": 17590 }, { "epoch": 0.3057762172121886, "grad_norm": 2.015459649796367, "learning_rate": 8.134914116068818e-07, "loss": 0.365, "step": 17591 }, { "epoch": 0.30579359974969145, "grad_norm": 1.3622293147335953, "learning_rate": 8.134694818455774e-07, "loss": 0.2919, "step": 17592 }, { "epoch": 0.3058109822871943, "grad_norm": 1.2969520934287264, "learning_rate": 8.134475510907187e-07, "loss": 0.3007, "step": 17593 }, { "epoch": 0.3058283648246971, "grad_norm": 1.6337312448380015, "learning_rate": 8.134256193423753e-07, "loss": 0.1769, "step": 17594 }, { "epoch": 0.30584574736219994, "grad_norm": 1.4880577263219936, "learning_rate": 8.134036866006166e-07, "loss": 0.4156, "step": 17595 }, { "epoch": 0.30586312989970277, "grad_norm": 1.990238282913446, "learning_rate": 8.133817528655122e-07, "loss": 0.4038, "step": 17596 }, { "epoch": 0.3058805124372056, "grad_norm": 1.4971081950989722, "learning_rate": 8.133598181371317e-07, "loss": 0.3334, "step": 17597 }, { "epoch": 0.30589789497470843, "grad_norm": 2.8296267375302544, "learning_rate": 8.133378824155446e-07, "loss": 0.4946, "step": 17598 }, { "epoch": 0.30591527751221126, "grad_norm": 1.6724432031966976, "learning_rate": 8.133159457008201e-07, "loss": 0.4138, "step": 17599 }, { "epoch": 0.30593266004971403, "grad_norm": 2.7173201023951714, "learning_rate": 8.13294007993028e-07, "loss": 0.4016, "step": 17600 }, { "epoch": 0.30595004258721686, "grad_norm": 2.0358630826345205, "learning_rate": 8.13272069292238e-07, "loss": 0.4323, "step": 17601 }, { "epoch": 0.3059674251247197, "grad_norm": 2.329438313361681, "learning_rate": 8.132501295985192e-07, "loss": 0.4163, "step": 17602 }, { "epoch": 0.3059848076622225, "grad_norm": 1.3616796117160073, "learning_rate": 8.132281889119417e-07, "loss": 0.2576, "step": 17603 }, { "epoch": 0.30600219019972535, "grad_norm": 1.523568496499957, "learning_rate": 8.132062472325745e-07, "loss": 0.3635, "step": 17604 }, { "epoch": 0.3060195727372282, "grad_norm": 1.1961738589718274, "learning_rate": 8.131843045604873e-07, "loss": 0.2459, "step": 17605 }, { "epoch": 0.306036955274731, "grad_norm": 1.9426581603166322, "learning_rate": 8.131623608957499e-07, "loss": 0.3561, "step": 17606 }, { "epoch": 0.30605433781223385, "grad_norm": 2.499762979133298, "learning_rate": 8.131404162384317e-07, "loss": 0.2629, "step": 17607 }, { "epoch": 0.3060717203497367, "grad_norm": 1.31693585393284, "learning_rate": 8.131184705886021e-07, "loss": 0.3812, "step": 17608 }, { "epoch": 0.3060891028872395, "grad_norm": 1.7167160098117533, "learning_rate": 8.130965239463308e-07, "loss": 0.2642, "step": 17609 }, { "epoch": 0.3061064854247423, "grad_norm": 3.1758726727531394, "learning_rate": 8.130745763116874e-07, "loss": 0.4312, "step": 17610 }, { "epoch": 0.3061238679622451, "grad_norm": 1.053072747071902, "learning_rate": 8.130526276847413e-07, "loss": 0.1809, "step": 17611 }, { "epoch": 0.30614125049974794, "grad_norm": 2.265708250267223, "learning_rate": 8.130306780655622e-07, "loss": 0.3863, "step": 17612 }, { "epoch": 0.30615863303725077, "grad_norm": 1.6658106980820933, "learning_rate": 8.130087274542199e-07, "loss": 0.9532, "step": 17613 }, { "epoch": 0.3061760155747536, "grad_norm": 1.3913317133622938, "learning_rate": 8.129867758507835e-07, "loss": 0.2807, "step": 17614 }, { "epoch": 0.30619339811225643, "grad_norm": 1.4173800379404011, "learning_rate": 8.129648232553228e-07, "loss": 0.4659, "step": 17615 }, { "epoch": 0.30621078064975926, "grad_norm": 1.3098842198612706, "learning_rate": 8.129428696679072e-07, "loss": 0.3257, "step": 17616 }, { "epoch": 0.3062281631872621, "grad_norm": 1.4358016221272596, "learning_rate": 8.129209150886067e-07, "loss": 0.2927, "step": 17617 }, { "epoch": 0.3062455457247649, "grad_norm": 1.9551975199584042, "learning_rate": 8.128989595174907e-07, "loss": 0.2354, "step": 17618 }, { "epoch": 0.30626292826226775, "grad_norm": 1.0635368247923433, "learning_rate": 8.128770029546286e-07, "loss": 0.2892, "step": 17619 }, { "epoch": 0.3062803107997705, "grad_norm": 1.3952148241384426, "learning_rate": 8.1285504540009e-07, "loss": 0.2842, "step": 17620 }, { "epoch": 0.30629769333727336, "grad_norm": 2.0280146059935853, "learning_rate": 8.128330868539448e-07, "loss": 0.4371, "step": 17621 }, { "epoch": 0.3063150758747762, "grad_norm": 1.7782401538270607, "learning_rate": 8.128111273162623e-07, "loss": 0.6021, "step": 17622 }, { "epoch": 0.306332458412279, "grad_norm": 2.1811289759728045, "learning_rate": 8.127891667871125e-07, "loss": 0.3694, "step": 17623 }, { "epoch": 0.30634984094978185, "grad_norm": 1.7440172604919368, "learning_rate": 8.127672052665644e-07, "loss": 0.3932, "step": 17624 }, { "epoch": 0.3063672234872847, "grad_norm": 1.9553002488032216, "learning_rate": 8.127452427546881e-07, "loss": 0.3188, "step": 17625 }, { "epoch": 0.3063846060247875, "grad_norm": 1.4519334124608032, "learning_rate": 8.12723279251553e-07, "loss": 0.3523, "step": 17626 }, { "epoch": 0.30640198856229034, "grad_norm": 1.6275713577959745, "learning_rate": 8.127013147572288e-07, "loss": 0.4036, "step": 17627 }, { "epoch": 0.30641937109979317, "grad_norm": 2.9584406589511, "learning_rate": 8.126793492717849e-07, "loss": 0.3597, "step": 17628 }, { "epoch": 0.306436753637296, "grad_norm": 2.1106561387600005, "learning_rate": 8.126573827952914e-07, "loss": 0.3969, "step": 17629 }, { "epoch": 0.3064541361747988, "grad_norm": 1.441646751227614, "learning_rate": 8.126354153278173e-07, "loss": 0.2322, "step": 17630 }, { "epoch": 0.3064715187123016, "grad_norm": 1.1348265956786663, "learning_rate": 8.126134468694328e-07, "loss": 0.228, "step": 17631 }, { "epoch": 0.30648890124980444, "grad_norm": 1.8393391720472991, "learning_rate": 8.125914774202072e-07, "loss": 0.5518, "step": 17632 }, { "epoch": 0.30650628378730727, "grad_norm": 1.3460778418660744, "learning_rate": 8.125695069802101e-07, "loss": 0.3761, "step": 17633 }, { "epoch": 0.3065236663248101, "grad_norm": 3.0632240308339584, "learning_rate": 8.125475355495113e-07, "loss": 0.2409, "step": 17634 }, { "epoch": 0.3065410488623129, "grad_norm": 2.400285520848231, "learning_rate": 8.125255631281805e-07, "loss": 0.3748, "step": 17635 }, { "epoch": 0.30655843139981576, "grad_norm": 1.9789125800689986, "learning_rate": 8.125035897162871e-07, "loss": 0.3784, "step": 17636 }, { "epoch": 0.3065758139373186, "grad_norm": 1.4414917649850547, "learning_rate": 8.12481615313901e-07, "loss": 0.2608, "step": 17637 }, { "epoch": 0.3065931964748214, "grad_norm": 1.8153626101223772, "learning_rate": 8.124596399210914e-07, "loss": 0.4208, "step": 17638 }, { "epoch": 0.3066105790123242, "grad_norm": 1.9207558074370588, "learning_rate": 8.124376635379287e-07, "loss": 0.3517, "step": 17639 }, { "epoch": 0.306627961549827, "grad_norm": 1.5369172777790874, "learning_rate": 8.124156861644818e-07, "loss": 0.3904, "step": 17640 }, { "epoch": 0.30664534408732985, "grad_norm": 1.1075980376405667, "learning_rate": 8.123937078008208e-07, "loss": 0.4787, "step": 17641 }, { "epoch": 0.3066627266248327, "grad_norm": 2.648549364991294, "learning_rate": 8.123717284470153e-07, "loss": 0.2756, "step": 17642 }, { "epoch": 0.3066801091623355, "grad_norm": 1.169693806745076, "learning_rate": 8.123497481031349e-07, "loss": 0.3964, "step": 17643 }, { "epoch": 0.30669749169983834, "grad_norm": 2.700212906962697, "learning_rate": 8.123277667692492e-07, "loss": 0.2891, "step": 17644 }, { "epoch": 0.3067148742373412, "grad_norm": 2.0537788500855987, "learning_rate": 8.123057844454282e-07, "loss": 0.313, "step": 17645 }, { "epoch": 0.306732256774844, "grad_norm": 1.6021700366168883, "learning_rate": 8.12283801131741e-07, "loss": 0.3383, "step": 17646 }, { "epoch": 0.30674963931234683, "grad_norm": 2.11436260182604, "learning_rate": 8.122618168282577e-07, "loss": 0.2824, "step": 17647 }, { "epoch": 0.30676702184984966, "grad_norm": 1.231756876390548, "learning_rate": 8.12239831535048e-07, "loss": 0.3532, "step": 17648 }, { "epoch": 0.30678440438735244, "grad_norm": 2.036156463725859, "learning_rate": 8.122178452521814e-07, "loss": 0.3741, "step": 17649 }, { "epoch": 0.30680178692485527, "grad_norm": 2.3280775862121033, "learning_rate": 8.121958579797277e-07, "loss": 0.4459, "step": 17650 }, { "epoch": 0.3068191694623581, "grad_norm": 2.197098625101907, "learning_rate": 8.121738697177565e-07, "loss": 0.4777, "step": 17651 }, { "epoch": 0.30683655199986093, "grad_norm": 3.4911215721640634, "learning_rate": 8.121518804663375e-07, "loss": 0.5241, "step": 17652 }, { "epoch": 0.30685393453736376, "grad_norm": 2.1972292046445436, "learning_rate": 8.121298902255403e-07, "loss": 0.4069, "step": 17653 }, { "epoch": 0.3068713170748666, "grad_norm": 1.4701206680392676, "learning_rate": 8.121078989954349e-07, "loss": 0.2018, "step": 17654 }, { "epoch": 0.3068886996123694, "grad_norm": 2.128924982708613, "learning_rate": 8.120859067760908e-07, "loss": 0.2986, "step": 17655 }, { "epoch": 0.30690608214987225, "grad_norm": 1.9715854225496479, "learning_rate": 8.120639135675777e-07, "loss": 0.251, "step": 17656 }, { "epoch": 0.3069234646873751, "grad_norm": 2.636993799431585, "learning_rate": 8.120419193699655e-07, "loss": 0.4531, "step": 17657 }, { "epoch": 0.3069408472248779, "grad_norm": 2.0725715567003875, "learning_rate": 8.120199241833236e-07, "loss": 0.4013, "step": 17658 }, { "epoch": 0.3069582297623807, "grad_norm": 1.841261258084639, "learning_rate": 8.119979280077218e-07, "loss": 0.7482, "step": 17659 }, { "epoch": 0.3069756122998835, "grad_norm": 1.8857898585094504, "learning_rate": 8.119759308432301e-07, "loss": 0.3054, "step": 17660 }, { "epoch": 0.30699299483738635, "grad_norm": 1.5861305606398317, "learning_rate": 8.119539326899178e-07, "loss": 0.1608, "step": 17661 }, { "epoch": 0.3070103773748892, "grad_norm": 2.940540049707509, "learning_rate": 8.119319335478551e-07, "loss": 0.3447, "step": 17662 }, { "epoch": 0.307027759912392, "grad_norm": 2.219517782684505, "learning_rate": 8.119099334171113e-07, "loss": 0.4481, "step": 17663 }, { "epoch": 0.30704514244989484, "grad_norm": 2.1305451522784296, "learning_rate": 8.118879322977563e-07, "loss": 0.4087, "step": 17664 }, { "epoch": 0.30706252498739767, "grad_norm": 1.158409694860336, "learning_rate": 8.118659301898599e-07, "loss": 0.2558, "step": 17665 }, { "epoch": 0.3070799075249005, "grad_norm": 2.116017767951768, "learning_rate": 8.118439270934916e-07, "loss": 0.3577, "step": 17666 }, { "epoch": 0.3070972900624033, "grad_norm": 1.5968104159748946, "learning_rate": 8.118219230087213e-07, "loss": 0.4052, "step": 17667 }, { "epoch": 0.30711467259990616, "grad_norm": 1.8795300964278892, "learning_rate": 8.117999179356188e-07, "loss": 0.2513, "step": 17668 }, { "epoch": 0.30713205513740893, "grad_norm": 2.03491463654115, "learning_rate": 8.117779118742539e-07, "loss": 0.504, "step": 17669 }, { "epoch": 0.30714943767491176, "grad_norm": 1.8177756821132236, "learning_rate": 8.117559048246962e-07, "loss": 0.4346, "step": 17670 }, { "epoch": 0.3071668202124146, "grad_norm": 1.373167211212132, "learning_rate": 8.117338967870153e-07, "loss": 0.5146, "step": 17671 }, { "epoch": 0.3071842027499174, "grad_norm": 1.182437966399215, "learning_rate": 8.117118877612813e-07, "loss": 0.3085, "step": 17672 }, { "epoch": 0.30720158528742025, "grad_norm": 2.788145981107664, "learning_rate": 8.116898777475638e-07, "loss": 0.251, "step": 17673 }, { "epoch": 0.3072189678249231, "grad_norm": 4.044531955017383, "learning_rate": 8.116678667459326e-07, "loss": 0.4215, "step": 17674 }, { "epoch": 0.3072363503624259, "grad_norm": 1.4413890524415878, "learning_rate": 8.116458547564573e-07, "loss": 0.3263, "step": 17675 }, { "epoch": 0.30725373289992874, "grad_norm": 1.337695494475171, "learning_rate": 8.116238417792078e-07, "loss": 0.3844, "step": 17676 }, { "epoch": 0.3072711154374316, "grad_norm": 2.3554459839148496, "learning_rate": 8.116018278142541e-07, "loss": 0.5929, "step": 17677 }, { "epoch": 0.3072884979749344, "grad_norm": 1.5819435552280647, "learning_rate": 8.115798128616656e-07, "loss": 0.2676, "step": 17678 }, { "epoch": 0.3073058805124372, "grad_norm": 3.7948429658908998, "learning_rate": 8.115577969215121e-07, "loss": 0.4205, "step": 17679 }, { "epoch": 0.30732326304994, "grad_norm": 1.765861826077237, "learning_rate": 8.115357799938638e-07, "loss": 0.2533, "step": 17680 }, { "epoch": 0.30734064558744284, "grad_norm": 1.8865679241205158, "learning_rate": 8.1151376207879e-07, "loss": 0.2482, "step": 17681 }, { "epoch": 0.30735802812494567, "grad_norm": 1.798213254997382, "learning_rate": 8.114917431763606e-07, "loss": 0.1792, "step": 17682 }, { "epoch": 0.3073754106624485, "grad_norm": 2.4236070594510526, "learning_rate": 8.114697232866456e-07, "loss": 0.4123, "step": 17683 }, { "epoch": 0.30739279319995133, "grad_norm": 2.129054042444826, "learning_rate": 8.114477024097146e-07, "loss": 0.4133, "step": 17684 }, { "epoch": 0.30741017573745416, "grad_norm": 1.8515122415997896, "learning_rate": 8.114256805456374e-07, "loss": 0.4341, "step": 17685 }, { "epoch": 0.307427558274957, "grad_norm": 1.8824433695371372, "learning_rate": 8.114036576944839e-07, "loss": 0.2695, "step": 17686 }, { "epoch": 0.3074449408124598, "grad_norm": 2.1916346985911903, "learning_rate": 8.113816338563239e-07, "loss": 0.3484, "step": 17687 }, { "epoch": 0.30746232334996265, "grad_norm": 2.08603154094967, "learning_rate": 8.113596090312271e-07, "loss": 0.432, "step": 17688 }, { "epoch": 0.3074797058874654, "grad_norm": 1.817154310667261, "learning_rate": 8.113375832192634e-07, "loss": 0.4257, "step": 17689 }, { "epoch": 0.30749708842496826, "grad_norm": 1.7530191475375383, "learning_rate": 8.113155564205026e-07, "loss": 0.3114, "step": 17690 }, { "epoch": 0.3075144709624711, "grad_norm": 1.145631764286416, "learning_rate": 8.112935286350143e-07, "loss": 0.4563, "step": 17691 }, { "epoch": 0.3075318534999739, "grad_norm": 4.402735710716045, "learning_rate": 8.112714998628687e-07, "loss": 0.4902, "step": 17692 }, { "epoch": 0.30754923603747675, "grad_norm": 3.21698585219037, "learning_rate": 8.112494701041354e-07, "loss": 0.5034, "step": 17693 }, { "epoch": 0.3075666185749796, "grad_norm": 1.4615003840817986, "learning_rate": 8.112274393588842e-07, "loss": 0.3997, "step": 17694 }, { "epoch": 0.3075840011124824, "grad_norm": 1.8809805357803122, "learning_rate": 8.11205407627185e-07, "loss": 0.3071, "step": 17695 }, { "epoch": 0.30760138364998524, "grad_norm": 1.4209740928370223, "learning_rate": 8.111833749091078e-07, "loss": 0.3874, "step": 17696 }, { "epoch": 0.30761876618748807, "grad_norm": 3.931208025877319, "learning_rate": 8.11161341204722e-07, "loss": 0.4149, "step": 17697 }, { "epoch": 0.3076361487249909, "grad_norm": 1.8359173831601991, "learning_rate": 8.111393065140977e-07, "loss": 0.3765, "step": 17698 }, { "epoch": 0.3076535312624937, "grad_norm": 1.2380541608848359, "learning_rate": 8.111172708373049e-07, "loss": 0.2483, "step": 17699 }, { "epoch": 0.3076709137999965, "grad_norm": 1.8654400331654402, "learning_rate": 8.11095234174413e-07, "loss": 0.2357, "step": 17700 }, { "epoch": 0.30768829633749933, "grad_norm": 1.6394163689438706, "learning_rate": 8.110731965254925e-07, "loss": 0.3558, "step": 17701 }, { "epoch": 0.30770567887500216, "grad_norm": 1.7161046914894749, "learning_rate": 8.110511578906125e-07, "loss": 0.2135, "step": 17702 }, { "epoch": 0.307723061412505, "grad_norm": 1.9050209668013933, "learning_rate": 8.110291182698434e-07, "loss": 0.1952, "step": 17703 }, { "epoch": 0.3077404439500078, "grad_norm": 1.6160211116359402, "learning_rate": 8.110070776632547e-07, "loss": 0.396, "step": 17704 }, { "epoch": 0.30775782648751066, "grad_norm": 1.6458951529516879, "learning_rate": 8.109850360709168e-07, "loss": 0.2371, "step": 17705 }, { "epoch": 0.3077752090250135, "grad_norm": 1.8744717241138913, "learning_rate": 8.109629934928988e-07, "loss": 0.4211, "step": 17706 }, { "epoch": 0.3077925915625163, "grad_norm": 2.888381961279332, "learning_rate": 8.109409499292711e-07, "loss": 0.5578, "step": 17707 }, { "epoch": 0.30780997410001915, "grad_norm": 2.2202208034570576, "learning_rate": 8.109189053801035e-07, "loss": 0.4524, "step": 17708 }, { "epoch": 0.3078273566375219, "grad_norm": 1.456803069214406, "learning_rate": 8.108968598454657e-07, "loss": 0.2905, "step": 17709 }, { "epoch": 0.30784473917502475, "grad_norm": 1.8790104316335112, "learning_rate": 8.108748133254277e-07, "loss": 0.2588, "step": 17710 }, { "epoch": 0.3078621217125276, "grad_norm": 2.7614714487623355, "learning_rate": 8.108527658200592e-07, "loss": 0.2962, "step": 17711 }, { "epoch": 0.3078795042500304, "grad_norm": 1.5766090808639437, "learning_rate": 8.108307173294305e-07, "loss": 0.3341, "step": 17712 }, { "epoch": 0.30789688678753324, "grad_norm": 3.1499730940478328, "learning_rate": 8.10808667853611e-07, "loss": 0.2801, "step": 17713 }, { "epoch": 0.30791426932503607, "grad_norm": 2.055443793614889, "learning_rate": 8.107866173926708e-07, "loss": 0.3654, "step": 17714 }, { "epoch": 0.3079316518625389, "grad_norm": 1.6298220162900243, "learning_rate": 8.107645659466799e-07, "loss": 0.3367, "step": 17715 }, { "epoch": 0.30794903440004173, "grad_norm": 2.121976418577231, "learning_rate": 8.10742513515708e-07, "loss": 0.319, "step": 17716 }, { "epoch": 0.30796641693754456, "grad_norm": 1.3812590074920879, "learning_rate": 8.107204600998251e-07, "loss": 0.3216, "step": 17717 }, { "epoch": 0.3079837994750474, "grad_norm": 1.6487790560360855, "learning_rate": 8.10698405699101e-07, "loss": 0.2737, "step": 17718 }, { "epoch": 0.30800118201255017, "grad_norm": 1.7190743724419668, "learning_rate": 8.106763503136058e-07, "loss": 0.3973, "step": 17719 }, { "epoch": 0.308018564550053, "grad_norm": 1.8229122231539647, "learning_rate": 8.106542939434093e-07, "loss": 0.2681, "step": 17720 }, { "epoch": 0.30803594708755583, "grad_norm": 1.590416589854775, "learning_rate": 8.106322365885813e-07, "loss": 0.3662, "step": 17721 }, { "epoch": 0.30805332962505866, "grad_norm": 2.3829562901175523, "learning_rate": 8.106101782491919e-07, "loss": 0.5231, "step": 17722 }, { "epoch": 0.3080707121625615, "grad_norm": 2.269916152298638, "learning_rate": 8.105881189253108e-07, "loss": 0.3252, "step": 17723 }, { "epoch": 0.3080880947000643, "grad_norm": 2.481555307982157, "learning_rate": 8.105660586170082e-07, "loss": 0.308, "step": 17724 }, { "epoch": 0.30810547723756715, "grad_norm": 1.2987716074966213, "learning_rate": 8.105439973243536e-07, "loss": 0.5003, "step": 17725 }, { "epoch": 0.30812285977507, "grad_norm": 2.089870196150797, "learning_rate": 8.105219350474173e-07, "loss": 0.4821, "step": 17726 }, { "epoch": 0.3081402423125728, "grad_norm": 2.2810032487981577, "learning_rate": 8.104998717862692e-07, "loss": 0.3083, "step": 17727 }, { "epoch": 0.30815762485007564, "grad_norm": 1.7991579481705207, "learning_rate": 8.104778075409791e-07, "loss": 0.4581, "step": 17728 }, { "epoch": 0.3081750073875784, "grad_norm": 2.0829137623110605, "learning_rate": 8.104557423116169e-07, "loss": 0.3421, "step": 17729 }, { "epoch": 0.30819238992508124, "grad_norm": 3.0983337894615963, "learning_rate": 8.104336760982526e-07, "loss": 0.4212, "step": 17730 }, { "epoch": 0.3082097724625841, "grad_norm": 1.2674106439202677, "learning_rate": 8.104116089009563e-07, "loss": 0.2095, "step": 17731 }, { "epoch": 0.3082271550000869, "grad_norm": 1.1790677754942376, "learning_rate": 8.103895407197976e-07, "loss": 0.443, "step": 17732 }, { "epoch": 0.30824453753758974, "grad_norm": 0.9901764776917829, "learning_rate": 8.103674715548469e-07, "loss": 0.4194, "step": 17733 }, { "epoch": 0.30826192007509257, "grad_norm": 1.6241159111151329, "learning_rate": 8.103454014061737e-07, "loss": 0.3038, "step": 17734 }, { "epoch": 0.3082793026125954, "grad_norm": 1.9933945264353432, "learning_rate": 8.10323330273848e-07, "loss": 0.2686, "step": 17735 }, { "epoch": 0.3082966851500982, "grad_norm": 1.578004719214672, "learning_rate": 8.1030125815794e-07, "loss": 0.31, "step": 17736 }, { "epoch": 0.30831406768760106, "grad_norm": 1.7413080758535338, "learning_rate": 8.102791850585198e-07, "loss": 0.3075, "step": 17737 }, { "epoch": 0.3083314502251039, "grad_norm": 1.8032470910439533, "learning_rate": 8.102571109756569e-07, "loss": 0.597, "step": 17738 }, { "epoch": 0.30834883276260666, "grad_norm": 2.126168746018277, "learning_rate": 8.102350359094214e-07, "loss": 0.448, "step": 17739 }, { "epoch": 0.3083662153001095, "grad_norm": 1.792980358611739, "learning_rate": 8.102129598598835e-07, "loss": 0.2011, "step": 17740 }, { "epoch": 0.3083835978376123, "grad_norm": 1.5984960513295583, "learning_rate": 8.101908828271129e-07, "loss": 0.3004, "step": 17741 }, { "epoch": 0.30840098037511515, "grad_norm": 2.418111369516662, "learning_rate": 8.101688048111796e-07, "loss": 0.3999, "step": 17742 }, { "epoch": 0.308418362912618, "grad_norm": 1.5973260255798654, "learning_rate": 8.101467258121539e-07, "loss": 0.3598, "step": 17743 }, { "epoch": 0.3084357454501208, "grad_norm": 1.260577342556962, "learning_rate": 8.101246458301055e-07, "loss": 0.2923, "step": 17744 }, { "epoch": 0.30845312798762364, "grad_norm": 2.2508687618263257, "learning_rate": 8.101025648651045e-07, "loss": 0.7039, "step": 17745 }, { "epoch": 0.3084705105251265, "grad_norm": 1.7521718324063937, "learning_rate": 8.100804829172207e-07, "loss": 0.345, "step": 17746 }, { "epoch": 0.3084878930626293, "grad_norm": 1.613630529571933, "learning_rate": 8.100583999865242e-07, "loss": 0.4721, "step": 17747 }, { "epoch": 0.30850527560013213, "grad_norm": 1.0440397358218187, "learning_rate": 8.10036316073085e-07, "loss": 0.3659, "step": 17748 }, { "epoch": 0.3085226581376349, "grad_norm": 2.390882197752967, "learning_rate": 8.100142311769731e-07, "loss": 0.258, "step": 17749 }, { "epoch": 0.30854004067513774, "grad_norm": 1.646433264288883, "learning_rate": 8.099921452982586e-07, "loss": 0.3413, "step": 17750 }, { "epoch": 0.30855742321264057, "grad_norm": 1.837445453113103, "learning_rate": 8.099700584370113e-07, "loss": 0.2574, "step": 17751 }, { "epoch": 0.3085748057501434, "grad_norm": 1.5609988025107844, "learning_rate": 8.099479705933012e-07, "loss": 0.3647, "step": 17752 }, { "epoch": 0.30859218828764623, "grad_norm": 1.5362239703898317, "learning_rate": 8.099258817671985e-07, "loss": 0.3362, "step": 17753 }, { "epoch": 0.30860957082514906, "grad_norm": 1.5835023703460405, "learning_rate": 8.099037919587733e-07, "loss": 0.3621, "step": 17754 }, { "epoch": 0.3086269533626519, "grad_norm": 2.305603980690995, "learning_rate": 8.098817011680951e-07, "loss": 0.3666, "step": 17755 }, { "epoch": 0.3086443359001547, "grad_norm": 3.0113906288231638, "learning_rate": 8.098596093952346e-07, "loss": 0.3396, "step": 17756 }, { "epoch": 0.30866171843765755, "grad_norm": 2.112739252538052, "learning_rate": 8.098375166402613e-07, "loss": 0.307, "step": 17757 }, { "epoch": 0.3086791009751604, "grad_norm": 1.549494122287298, "learning_rate": 8.098154229032454e-07, "loss": 0.226, "step": 17758 }, { "epoch": 0.30869648351266316, "grad_norm": 2.0714352028041767, "learning_rate": 8.097933281842571e-07, "loss": 0.2744, "step": 17759 }, { "epoch": 0.308713866050166, "grad_norm": 1.5034404890837194, "learning_rate": 8.09771232483366e-07, "loss": 0.3119, "step": 17760 }, { "epoch": 0.3087312485876688, "grad_norm": 1.924573482602788, "learning_rate": 8.097491358006425e-07, "loss": 0.2892, "step": 17761 }, { "epoch": 0.30874863112517165, "grad_norm": 1.3786207538978406, "learning_rate": 8.097270381361565e-07, "loss": 0.2947, "step": 17762 }, { "epoch": 0.3087660136626745, "grad_norm": 1.6889373470053257, "learning_rate": 8.09704939489978e-07, "loss": 0.2032, "step": 17763 }, { "epoch": 0.3087833962001773, "grad_norm": 1.4164583502743338, "learning_rate": 8.096828398621773e-07, "loss": 0.2923, "step": 17764 }, { "epoch": 0.30880077873768014, "grad_norm": 2.612770983835519, "learning_rate": 8.096607392528242e-07, "loss": 0.4668, "step": 17765 }, { "epoch": 0.30881816127518297, "grad_norm": 2.0394667995455915, "learning_rate": 8.096386376619888e-07, "loss": 0.2559, "step": 17766 }, { "epoch": 0.3088355438126858, "grad_norm": 1.9294846489019934, "learning_rate": 8.09616535089741e-07, "loss": 0.2905, "step": 17767 }, { "epoch": 0.30885292635018863, "grad_norm": 1.5547277699853017, "learning_rate": 8.095944315361512e-07, "loss": 0.5468, "step": 17768 }, { "epoch": 0.3088703088876914, "grad_norm": 1.9847847624541457, "learning_rate": 8.095723270012892e-07, "loss": 0.3729, "step": 17769 }, { "epoch": 0.30888769142519423, "grad_norm": 1.7514890591905437, "learning_rate": 8.09550221485225e-07, "loss": 0.3971, "step": 17770 }, { "epoch": 0.30890507396269706, "grad_norm": 1.1341404092031797, "learning_rate": 8.09528114988029e-07, "loss": 0.2449, "step": 17771 }, { "epoch": 0.3089224565001999, "grad_norm": 1.1729986256515885, "learning_rate": 8.09506007509771e-07, "loss": 0.5253, "step": 17772 }, { "epoch": 0.3089398390377027, "grad_norm": 1.517599343137429, "learning_rate": 8.09483899050521e-07, "loss": 0.2197, "step": 17773 }, { "epoch": 0.30895722157520555, "grad_norm": 2.0400325063672953, "learning_rate": 8.094617896103493e-07, "loss": 0.3736, "step": 17774 }, { "epoch": 0.3089746041127084, "grad_norm": 3.1410881836155067, "learning_rate": 8.094396791893259e-07, "loss": 0.4565, "step": 17775 }, { "epoch": 0.3089919866502112, "grad_norm": 2.857233657147405, "learning_rate": 8.094175677875209e-07, "loss": 0.391, "step": 17776 }, { "epoch": 0.30900936918771404, "grad_norm": 1.8364814259680031, "learning_rate": 8.093954554050043e-07, "loss": 0.3668, "step": 17777 }, { "epoch": 0.3090267517252168, "grad_norm": 1.2108530608783044, "learning_rate": 8.093733420418463e-07, "loss": 0.262, "step": 17778 }, { "epoch": 0.30904413426271965, "grad_norm": 5.063801601217987, "learning_rate": 8.093512276981166e-07, "loss": 0.391, "step": 17779 }, { "epoch": 0.3090615168002225, "grad_norm": 1.1087902992773209, "learning_rate": 8.09329112373886e-07, "loss": 0.2621, "step": 17780 }, { "epoch": 0.3090788993377253, "grad_norm": 1.5121558575983518, "learning_rate": 8.093069960692242e-07, "loss": 0.3815, "step": 17781 }, { "epoch": 0.30909628187522814, "grad_norm": 1.959723865150373, "learning_rate": 8.09284878784201e-07, "loss": 0.4086, "step": 17782 }, { "epoch": 0.30911366441273097, "grad_norm": 2.544426742139182, "learning_rate": 8.092627605188871e-07, "loss": 0.3388, "step": 17783 }, { "epoch": 0.3091310469502338, "grad_norm": 2.7314028915112845, "learning_rate": 8.092406412733522e-07, "loss": 0.3612, "step": 17784 }, { "epoch": 0.30914842948773663, "grad_norm": 2.7393732334767567, "learning_rate": 8.092185210476665e-07, "loss": 0.4582, "step": 17785 }, { "epoch": 0.30916581202523946, "grad_norm": 3.2121937690960096, "learning_rate": 8.091963998419001e-07, "loss": 0.3512, "step": 17786 }, { "epoch": 0.3091831945627423, "grad_norm": 3.528184041919849, "learning_rate": 8.091742776561233e-07, "loss": 0.5606, "step": 17787 }, { "epoch": 0.30920057710024507, "grad_norm": 1.3238061064278488, "learning_rate": 8.091521544904058e-07, "loss": 0.2143, "step": 17788 }, { "epoch": 0.3092179596377479, "grad_norm": 1.7834423547380354, "learning_rate": 8.091300303448183e-07, "loss": 0.1857, "step": 17789 }, { "epoch": 0.3092353421752507, "grad_norm": 2.021899190394317, "learning_rate": 8.091079052194305e-07, "loss": 0.2844, "step": 17790 }, { "epoch": 0.30925272471275356, "grad_norm": 2.207115523370197, "learning_rate": 8.090857791143125e-07, "loss": 0.3911, "step": 17791 }, { "epoch": 0.3092701072502564, "grad_norm": 0.8870551658124026, "learning_rate": 8.090636520295346e-07, "loss": 0.2096, "step": 17792 }, { "epoch": 0.3092874897877592, "grad_norm": 1.4994726212858007, "learning_rate": 8.09041523965167e-07, "loss": 0.1835, "step": 17793 }, { "epoch": 0.30930487232526205, "grad_norm": 1.4518441307976357, "learning_rate": 8.090193949212797e-07, "loss": 0.1731, "step": 17794 }, { "epoch": 0.3093222548627649, "grad_norm": 1.8373155051913883, "learning_rate": 8.089972648979428e-07, "loss": 0.363, "step": 17795 }, { "epoch": 0.3093396374002677, "grad_norm": 1.8396584449063866, "learning_rate": 8.089751338952265e-07, "loss": 0.3106, "step": 17796 }, { "epoch": 0.30935701993777054, "grad_norm": 1.804577971220447, "learning_rate": 8.089530019132011e-07, "loss": 0.3264, "step": 17797 }, { "epoch": 0.3093744024752733, "grad_norm": 2.284588831188306, "learning_rate": 8.089308689519366e-07, "loss": 0.5091, "step": 17798 }, { "epoch": 0.30939178501277614, "grad_norm": 1.5274389423289885, "learning_rate": 8.089087350115029e-07, "loss": 0.6862, "step": 17799 }, { "epoch": 0.309409167550279, "grad_norm": 2.302537726911645, "learning_rate": 8.088866000919707e-07, "loss": 0.2292, "step": 17800 }, { "epoch": 0.3094265500877818, "grad_norm": 2.050514138807078, "learning_rate": 8.088644641934096e-07, "loss": 0.4513, "step": 17801 }, { "epoch": 0.30944393262528463, "grad_norm": 1.490211474759529, "learning_rate": 8.088423273158901e-07, "loss": 0.3937, "step": 17802 }, { "epoch": 0.30946131516278746, "grad_norm": 2.1627534495841028, "learning_rate": 8.088201894594826e-07, "loss": 0.3878, "step": 17803 }, { "epoch": 0.3094786977002903, "grad_norm": 1.8010882737465357, "learning_rate": 8.087980506242566e-07, "loss": 0.3092, "step": 17804 }, { "epoch": 0.3094960802377931, "grad_norm": 1.3723662650261492, "learning_rate": 8.087759108102829e-07, "loss": 0.3887, "step": 17805 }, { "epoch": 0.30951346277529596, "grad_norm": 1.4463355639678035, "learning_rate": 8.087537700176312e-07, "loss": 0.1495, "step": 17806 }, { "epoch": 0.3095308453127988, "grad_norm": 1.7702778209209888, "learning_rate": 8.087316282463719e-07, "loss": 0.4385, "step": 17807 }, { "epoch": 0.30954822785030156, "grad_norm": 1.9702892010961144, "learning_rate": 8.087094854965753e-07, "loss": 0.2922, "step": 17808 }, { "epoch": 0.3095656103878044, "grad_norm": 1.7474707429488292, "learning_rate": 8.086873417683113e-07, "loss": 0.3489, "step": 17809 }, { "epoch": 0.3095829929253072, "grad_norm": 2.4232763541037268, "learning_rate": 8.086651970616503e-07, "loss": 0.4799, "step": 17810 }, { "epoch": 0.30960037546281005, "grad_norm": 1.4043251064994242, "learning_rate": 8.086430513766623e-07, "loss": 0.3497, "step": 17811 }, { "epoch": 0.3096177580003129, "grad_norm": 1.9655763596968094, "learning_rate": 8.086209047134178e-07, "loss": 0.2374, "step": 17812 }, { "epoch": 0.3096351405378157, "grad_norm": 1.6133233546488879, "learning_rate": 8.085987570719867e-07, "loss": 0.3949, "step": 17813 }, { "epoch": 0.30965252307531854, "grad_norm": 1.4043362807593687, "learning_rate": 8.085766084524393e-07, "loss": 0.3777, "step": 17814 }, { "epoch": 0.3096699056128214, "grad_norm": 2.023678896172252, "learning_rate": 8.085544588548456e-07, "loss": 0.3862, "step": 17815 }, { "epoch": 0.3096872881503242, "grad_norm": 1.25590043748383, "learning_rate": 8.085323082792764e-07, "loss": 0.2398, "step": 17816 }, { "epoch": 0.30970467068782703, "grad_norm": 4.690781034948331, "learning_rate": 8.085101567258012e-07, "loss": 0.2905, "step": 17817 }, { "epoch": 0.3097220532253298, "grad_norm": 2.0275568513757287, "learning_rate": 8.084880041944906e-07, "loss": 0.3095, "step": 17818 }, { "epoch": 0.30973943576283264, "grad_norm": 2.9101008433419793, "learning_rate": 8.084658506854148e-07, "loss": 0.2821, "step": 17819 }, { "epoch": 0.30975681830033547, "grad_norm": 2.2379063591141564, "learning_rate": 8.084436961986438e-07, "loss": 0.3015, "step": 17820 }, { "epoch": 0.3097742008378383, "grad_norm": 2.337110247684271, "learning_rate": 8.084215407342481e-07, "loss": 0.2629, "step": 17821 }, { "epoch": 0.30979158337534113, "grad_norm": 1.4753393198814053, "learning_rate": 8.083993842922978e-07, "loss": 0.295, "step": 17822 }, { "epoch": 0.30980896591284396, "grad_norm": 3.065597316666093, "learning_rate": 8.083772268728631e-07, "loss": 0.3423, "step": 17823 }, { "epoch": 0.3098263484503468, "grad_norm": 1.4717807309430089, "learning_rate": 8.083550684760143e-07, "loss": 0.3693, "step": 17824 }, { "epoch": 0.3098437309878496, "grad_norm": 1.6852252816045021, "learning_rate": 8.083329091018216e-07, "loss": 0.1884, "step": 17825 }, { "epoch": 0.30986111352535245, "grad_norm": 0.8412445752295873, "learning_rate": 8.083107487503551e-07, "loss": 0.3222, "step": 17826 }, { "epoch": 0.3098784960628553, "grad_norm": 1.5818131369251693, "learning_rate": 8.082885874216851e-07, "loss": 0.3941, "step": 17827 }, { "epoch": 0.30989587860035805, "grad_norm": 1.9399212051681625, "learning_rate": 8.082664251158821e-07, "loss": 0.2803, "step": 17828 }, { "epoch": 0.3099132611378609, "grad_norm": 2.466417037731291, "learning_rate": 8.08244261833016e-07, "loss": 0.2266, "step": 17829 }, { "epoch": 0.3099306436753637, "grad_norm": 1.200523903929317, "learning_rate": 8.082220975731572e-07, "loss": 0.2459, "step": 17830 }, { "epoch": 0.30994802621286655, "grad_norm": 1.5845387500861055, "learning_rate": 8.08199932336376e-07, "loss": 0.3501, "step": 17831 }, { "epoch": 0.3099654087503694, "grad_norm": 1.7586541420309953, "learning_rate": 8.081777661227425e-07, "loss": 0.207, "step": 17832 }, { "epoch": 0.3099827912878722, "grad_norm": 1.5843707766615618, "learning_rate": 8.08155598932327e-07, "loss": 0.336, "step": 17833 }, { "epoch": 0.31000017382537504, "grad_norm": 1.2809121834029855, "learning_rate": 8.081334307651999e-07, "loss": 0.4039, "step": 17834 }, { "epoch": 0.31001755636287787, "grad_norm": 1.8577563470508625, "learning_rate": 8.081112616214314e-07, "loss": 0.287, "step": 17835 }, { "epoch": 0.3100349389003807, "grad_norm": 1.5021757232935098, "learning_rate": 8.080890915010916e-07, "loss": 0.3844, "step": 17836 }, { "epoch": 0.3100523214378835, "grad_norm": 1.949506784538083, "learning_rate": 8.080669204042509e-07, "loss": 0.3513, "step": 17837 }, { "epoch": 0.3100697039753863, "grad_norm": 1.47384264664884, "learning_rate": 8.080447483309798e-07, "loss": 0.2044, "step": 17838 }, { "epoch": 0.31008708651288913, "grad_norm": 1.3029629665789788, "learning_rate": 8.080225752813482e-07, "loss": 0.2379, "step": 17839 }, { "epoch": 0.31010446905039196, "grad_norm": 2.150414168086796, "learning_rate": 8.080004012554266e-07, "loss": 0.3657, "step": 17840 }, { "epoch": 0.3101218515878948, "grad_norm": 1.1333979146213706, "learning_rate": 8.079782262532851e-07, "loss": 0.3013, "step": 17841 }, { "epoch": 0.3101392341253976, "grad_norm": 1.5847085687109588, "learning_rate": 8.079560502749943e-07, "loss": 0.3038, "step": 17842 }, { "epoch": 0.31015661666290045, "grad_norm": 1.6816663344986, "learning_rate": 8.07933873320624e-07, "loss": 0.4168, "step": 17843 }, { "epoch": 0.3101739992004033, "grad_norm": 2.0043550430065475, "learning_rate": 8.079116953902449e-07, "loss": 0.337, "step": 17844 }, { "epoch": 0.3101913817379061, "grad_norm": 1.9834055803987667, "learning_rate": 8.078895164839272e-07, "loss": 0.2526, "step": 17845 }, { "epoch": 0.31020876427540894, "grad_norm": 1.6549165618340145, "learning_rate": 8.078673366017412e-07, "loss": 0.4398, "step": 17846 }, { "epoch": 0.3102261468129118, "grad_norm": 1.6485382814544156, "learning_rate": 8.078451557437572e-07, "loss": 0.2674, "step": 17847 }, { "epoch": 0.31024352935041455, "grad_norm": 2.6240520355053745, "learning_rate": 8.078229739100452e-07, "loss": 0.3069, "step": 17848 }, { "epoch": 0.3102609118879174, "grad_norm": 1.5143806848023813, "learning_rate": 8.078007911006761e-07, "loss": 0.3606, "step": 17849 }, { "epoch": 0.3102782944254202, "grad_norm": 2.2071116727068367, "learning_rate": 8.077786073157198e-07, "loss": 0.1637, "step": 17850 }, { "epoch": 0.31029567696292304, "grad_norm": 1.7872910602780216, "learning_rate": 8.077564225552466e-07, "loss": 0.2843, "step": 17851 }, { "epoch": 0.31031305950042587, "grad_norm": 1.3514327933727244, "learning_rate": 8.077342368193269e-07, "loss": 0.1821, "step": 17852 }, { "epoch": 0.3103304420379287, "grad_norm": 1.674775379750952, "learning_rate": 8.077120501080312e-07, "loss": 0.3656, "step": 17853 }, { "epoch": 0.31034782457543153, "grad_norm": 1.2603792471514204, "learning_rate": 8.076898624214296e-07, "loss": 0.3423, "step": 17854 }, { "epoch": 0.31036520711293436, "grad_norm": 1.7132119298373756, "learning_rate": 8.076676737595923e-07, "loss": 0.4115, "step": 17855 }, { "epoch": 0.3103825896504372, "grad_norm": 1.4501894040014722, "learning_rate": 8.0764548412259e-07, "loss": 0.2725, "step": 17856 }, { "epoch": 0.31039997218794, "grad_norm": 1.5832510646666935, "learning_rate": 8.076232935104928e-07, "loss": 0.1706, "step": 17857 }, { "epoch": 0.3104173547254428, "grad_norm": 2.51170750849857, "learning_rate": 8.076011019233709e-07, "loss": 0.3287, "step": 17858 }, { "epoch": 0.3104347372629456, "grad_norm": 1.1102182752783383, "learning_rate": 8.075789093612951e-07, "loss": 0.1789, "step": 17859 }, { "epoch": 0.31045211980044846, "grad_norm": 1.332494190951401, "learning_rate": 8.075567158243351e-07, "loss": 0.2794, "step": 17860 }, { "epoch": 0.3104695023379513, "grad_norm": 2.114305207010144, "learning_rate": 8.075345213125619e-07, "loss": 0.339, "step": 17861 }, { "epoch": 0.3104868848754541, "grad_norm": 2.01069929786421, "learning_rate": 8.075123258260453e-07, "loss": 0.4351, "step": 17862 }, { "epoch": 0.31050426741295695, "grad_norm": 1.6028607350602635, "learning_rate": 8.07490129364856e-07, "loss": 0.5016, "step": 17863 }, { "epoch": 0.3105216499504598, "grad_norm": 1.3110433249090676, "learning_rate": 8.074679319290643e-07, "loss": 0.3388, "step": 17864 }, { "epoch": 0.3105390324879626, "grad_norm": 2.1068776543025303, "learning_rate": 8.074457335187404e-07, "loss": 0.1888, "step": 17865 }, { "epoch": 0.31055641502546544, "grad_norm": 2.732603217640135, "learning_rate": 8.074235341339548e-07, "loss": 0.3167, "step": 17866 }, { "epoch": 0.31057379756296827, "grad_norm": 1.2079415386368468, "learning_rate": 8.074013337747778e-07, "loss": 0.224, "step": 17867 }, { "epoch": 0.31059118010047104, "grad_norm": 1.6640965806586132, "learning_rate": 8.073791324412797e-07, "loss": 0.2851, "step": 17868 }, { "epoch": 0.3106085626379739, "grad_norm": 1.463840549841865, "learning_rate": 8.073569301335313e-07, "loss": 0.2568, "step": 17869 }, { "epoch": 0.3106259451754767, "grad_norm": 1.390256920591899, "learning_rate": 8.073347268516022e-07, "loss": 0.3907, "step": 17870 }, { "epoch": 0.31064332771297953, "grad_norm": 1.342275568762291, "learning_rate": 8.073125225955632e-07, "loss": 0.4128, "step": 17871 }, { "epoch": 0.31066071025048236, "grad_norm": 1.1552607977688683, "learning_rate": 8.07290317365485e-07, "loss": 0.3801, "step": 17872 }, { "epoch": 0.3106780927879852, "grad_norm": 1.698880626238417, "learning_rate": 8.072681111614374e-07, "loss": 0.4097, "step": 17873 }, { "epoch": 0.310695475325488, "grad_norm": 1.840401990236514, "learning_rate": 8.072459039834912e-07, "loss": 0.421, "step": 17874 }, { "epoch": 0.31071285786299085, "grad_norm": 4.17990870832379, "learning_rate": 8.072236958317164e-07, "loss": 0.7472, "step": 17875 }, { "epoch": 0.3107302404004937, "grad_norm": 1.3551645523152538, "learning_rate": 8.072014867061838e-07, "loss": 0.323, "step": 17876 }, { "epoch": 0.3107476229379965, "grad_norm": 3.009310909738352, "learning_rate": 8.071792766069636e-07, "loss": 0.4097, "step": 17877 }, { "epoch": 0.3107650054754993, "grad_norm": 3.5988327302115315, "learning_rate": 8.07157065534126e-07, "loss": 0.4153, "step": 17878 }, { "epoch": 0.3107823880130021, "grad_norm": 1.1545387068370039, "learning_rate": 8.071348534877418e-07, "loss": 0.3209, "step": 17879 }, { "epoch": 0.31079977055050495, "grad_norm": 2.418856218035994, "learning_rate": 8.071126404678812e-07, "loss": 0.4308, "step": 17880 }, { "epoch": 0.3108171530880078, "grad_norm": 1.728699576848346, "learning_rate": 8.070904264746145e-07, "loss": 0.3763, "step": 17881 }, { "epoch": 0.3108345356255106, "grad_norm": 1.3069444971433397, "learning_rate": 8.070682115080123e-07, "loss": 0.2661, "step": 17882 }, { "epoch": 0.31085191816301344, "grad_norm": 1.4569286478612102, "learning_rate": 8.070459955681449e-07, "loss": 0.3442, "step": 17883 }, { "epoch": 0.31086930070051627, "grad_norm": 1.9978949838448357, "learning_rate": 8.070237786550826e-07, "loss": 0.2613, "step": 17884 }, { "epoch": 0.3108866832380191, "grad_norm": 1.5528486556422614, "learning_rate": 8.070015607688963e-07, "loss": 0.3929, "step": 17885 }, { "epoch": 0.31090406577552193, "grad_norm": 1.2070368349350005, "learning_rate": 8.069793419096558e-07, "loss": 0.2062, "step": 17886 }, { "epoch": 0.31092144831302476, "grad_norm": 1.970066701718806, "learning_rate": 8.069571220774318e-07, "loss": 0.3254, "step": 17887 }, { "epoch": 0.31093883085052754, "grad_norm": 1.9985285570848124, "learning_rate": 8.069349012722948e-07, "loss": 0.27, "step": 17888 }, { "epoch": 0.31095621338803037, "grad_norm": 2.274335226722726, "learning_rate": 8.069126794943151e-07, "loss": 0.4112, "step": 17889 }, { "epoch": 0.3109735959255332, "grad_norm": 1.7251554284821726, "learning_rate": 8.068904567435633e-07, "loss": 0.4127, "step": 17890 }, { "epoch": 0.310990978463036, "grad_norm": 1.768987182621973, "learning_rate": 8.068682330201096e-07, "loss": 0.4852, "step": 17891 }, { "epoch": 0.31100836100053886, "grad_norm": 1.8086375623090107, "learning_rate": 8.068460083240247e-07, "loss": 0.2647, "step": 17892 }, { "epoch": 0.3110257435380417, "grad_norm": 1.9983809765868596, "learning_rate": 8.068237826553787e-07, "loss": 0.2267, "step": 17893 }, { "epoch": 0.3110431260755445, "grad_norm": 2.668297750169834, "learning_rate": 8.068015560142425e-07, "loss": 0.3012, "step": 17894 }, { "epoch": 0.31106050861304735, "grad_norm": 1.3331327548890823, "learning_rate": 8.067793284006861e-07, "loss": 0.4707, "step": 17895 }, { "epoch": 0.3110778911505502, "grad_norm": 2.6514966651323397, "learning_rate": 8.067570998147802e-07, "loss": 0.4214, "step": 17896 }, { "epoch": 0.311095273688053, "grad_norm": 2.8175712169185054, "learning_rate": 8.067348702565953e-07, "loss": 0.5667, "step": 17897 }, { "epoch": 0.3111126562255558, "grad_norm": 10.975356978484635, "learning_rate": 8.067126397262017e-07, "loss": 0.3164, "step": 17898 }, { "epoch": 0.3111300387630586, "grad_norm": 1.8709300036417318, "learning_rate": 8.066904082236699e-07, "loss": 0.5581, "step": 17899 }, { "epoch": 0.31114742130056144, "grad_norm": 1.8331481437110875, "learning_rate": 8.066681757490705e-07, "loss": 0.3168, "step": 17900 }, { "epoch": 0.3111648038380643, "grad_norm": 1.6808870114814847, "learning_rate": 8.066459423024738e-07, "loss": 0.3336, "step": 17901 }, { "epoch": 0.3111821863755671, "grad_norm": 1.6544202449947434, "learning_rate": 8.066237078839503e-07, "loss": 0.3863, "step": 17902 }, { "epoch": 0.31119956891306993, "grad_norm": 1.449697126603293, "learning_rate": 8.066014724935703e-07, "loss": 0.2206, "step": 17903 }, { "epoch": 0.31121695145057277, "grad_norm": 1.4191822230203002, "learning_rate": 8.065792361314049e-07, "loss": 0.3091, "step": 17904 }, { "epoch": 0.3112343339880756, "grad_norm": 1.8902382512306057, "learning_rate": 8.065569987975238e-07, "loss": 0.3263, "step": 17905 }, { "epoch": 0.3112517165255784, "grad_norm": 2.4006222183600383, "learning_rate": 8.065347604919979e-07, "loss": 0.4374, "step": 17906 }, { "epoch": 0.31126909906308126, "grad_norm": 1.821194755241953, "learning_rate": 8.065125212148978e-07, "loss": 0.3193, "step": 17907 }, { "epoch": 0.31128648160058403, "grad_norm": 1.2137167029738387, "learning_rate": 8.064902809662937e-07, "loss": 0.2778, "step": 17908 }, { "epoch": 0.31130386413808686, "grad_norm": 2.841848567312171, "learning_rate": 8.064680397462562e-07, "loss": 0.3125, "step": 17909 }, { "epoch": 0.3113212466755897, "grad_norm": 1.4501623202154292, "learning_rate": 8.064457975548557e-07, "loss": 0.3827, "step": 17910 }, { "epoch": 0.3113386292130925, "grad_norm": 2.2993906363918257, "learning_rate": 8.064235543921629e-07, "loss": 0.2729, "step": 17911 }, { "epoch": 0.31135601175059535, "grad_norm": 1.5327794119955838, "learning_rate": 8.064013102582481e-07, "loss": 0.414, "step": 17912 }, { "epoch": 0.3113733942880982, "grad_norm": 1.8353212155450107, "learning_rate": 8.063790651531821e-07, "loss": 0.5822, "step": 17913 }, { "epoch": 0.311390776825601, "grad_norm": 1.5969391279264193, "learning_rate": 8.06356819077035e-07, "loss": 0.2823, "step": 17914 }, { "epoch": 0.31140815936310384, "grad_norm": 1.7409292169374375, "learning_rate": 8.063345720298776e-07, "loss": 0.2762, "step": 17915 }, { "epoch": 0.3114255419006067, "grad_norm": 1.2920795129858407, "learning_rate": 8.063123240117801e-07, "loss": 0.2457, "step": 17916 }, { "epoch": 0.31144292443810945, "grad_norm": 1.6471974436045498, "learning_rate": 8.062900750228137e-07, "loss": 0.1553, "step": 17917 }, { "epoch": 0.3114603069756123, "grad_norm": 5.019677996540689, "learning_rate": 8.062678250630481e-07, "loss": 0.315, "step": 17918 }, { "epoch": 0.3114776895131151, "grad_norm": 1.6501361437453674, "learning_rate": 8.062455741325542e-07, "loss": 0.1316, "step": 17919 }, { "epoch": 0.31149507205061794, "grad_norm": 1.3946224746338425, "learning_rate": 8.062233222314026e-07, "loss": 0.1917, "step": 17920 }, { "epoch": 0.31151245458812077, "grad_norm": 4.166771905688307, "learning_rate": 8.062010693596638e-07, "loss": 0.2936, "step": 17921 }, { "epoch": 0.3115298371256236, "grad_norm": 2.362339942716433, "learning_rate": 8.061788155174082e-07, "loss": 0.3489, "step": 17922 }, { "epoch": 0.31154721966312643, "grad_norm": 1.6771918112992958, "learning_rate": 8.061565607047064e-07, "loss": 0.2537, "step": 17923 }, { "epoch": 0.31156460220062926, "grad_norm": 1.4187269547149295, "learning_rate": 8.061343049216289e-07, "loss": 0.2873, "step": 17924 }, { "epoch": 0.3115819847381321, "grad_norm": 1.03833816088251, "learning_rate": 8.061120481682463e-07, "loss": 0.2245, "step": 17925 }, { "epoch": 0.3115993672756349, "grad_norm": 1.0419024698197559, "learning_rate": 8.060897904446292e-07, "loss": 0.3305, "step": 17926 }, { "epoch": 0.3116167498131377, "grad_norm": 1.1499637799202342, "learning_rate": 8.06067531750848e-07, "loss": 0.3367, "step": 17927 }, { "epoch": 0.3116341323506405, "grad_norm": 0.9572721073231473, "learning_rate": 8.060452720869732e-07, "loss": 0.3778, "step": 17928 }, { "epoch": 0.31165151488814336, "grad_norm": 2.1105911286185193, "learning_rate": 8.060230114530757e-07, "loss": 0.3828, "step": 17929 }, { "epoch": 0.3116688974256462, "grad_norm": 3.164912203427802, "learning_rate": 8.060007498492259e-07, "loss": 0.3609, "step": 17930 }, { "epoch": 0.311686279963149, "grad_norm": 2.041904121834836, "learning_rate": 8.05978487275494e-07, "loss": 0.4466, "step": 17931 }, { "epoch": 0.31170366250065185, "grad_norm": 1.7483397205644513, "learning_rate": 8.05956223731951e-07, "loss": 0.4173, "step": 17932 }, { "epoch": 0.3117210450381547, "grad_norm": 1.7746663010279693, "learning_rate": 8.059339592186672e-07, "loss": 0.3252, "step": 17933 }, { "epoch": 0.3117384275756575, "grad_norm": 1.1421674227230374, "learning_rate": 8.059116937357133e-07, "loss": 0.3294, "step": 17934 }, { "epoch": 0.31175581011316034, "grad_norm": 1.460280661390583, "learning_rate": 8.0588942728316e-07, "loss": 0.2412, "step": 17935 }, { "epoch": 0.31177319265066317, "grad_norm": 1.1830836313093442, "learning_rate": 8.058671598610777e-07, "loss": 0.4138, "step": 17936 }, { "epoch": 0.31179057518816594, "grad_norm": 1.8883104795210128, "learning_rate": 8.058448914695369e-07, "loss": 0.34, "step": 17937 }, { "epoch": 0.31180795772566877, "grad_norm": 1.6018030084922672, "learning_rate": 8.058226221086085e-07, "loss": 0.3087, "step": 17938 }, { "epoch": 0.3118253402631716, "grad_norm": 2.0125569646765373, "learning_rate": 8.058003517783627e-07, "loss": 0.4818, "step": 17939 }, { "epoch": 0.31184272280067443, "grad_norm": 1.5333862570909926, "learning_rate": 8.057780804788702e-07, "loss": 0.3668, "step": 17940 }, { "epoch": 0.31186010533817726, "grad_norm": 2.020792502874011, "learning_rate": 8.057558082102016e-07, "loss": 0.2965, "step": 17941 }, { "epoch": 0.3118774878756801, "grad_norm": 8.748691400876838, "learning_rate": 8.057335349724278e-07, "loss": 0.7037, "step": 17942 }, { "epoch": 0.3118948704131829, "grad_norm": 1.342276797905799, "learning_rate": 8.05711260765619e-07, "loss": 0.2222, "step": 17943 }, { "epoch": 0.31191225295068575, "grad_norm": 2.154905144740873, "learning_rate": 8.056889855898459e-07, "loss": 0.2444, "step": 17944 }, { "epoch": 0.3119296354881886, "grad_norm": 1.5687958277453096, "learning_rate": 8.056667094451792e-07, "loss": 0.2969, "step": 17945 }, { "epoch": 0.3119470180256914, "grad_norm": 1.5843206679960022, "learning_rate": 8.056444323316894e-07, "loss": 0.4136, "step": 17946 }, { "epoch": 0.3119644005631942, "grad_norm": 1.606994786596992, "learning_rate": 8.056221542494472e-07, "loss": 0.3088, "step": 17947 }, { "epoch": 0.311981783100697, "grad_norm": 1.2305319256828127, "learning_rate": 8.055998751985232e-07, "loss": 0.1991, "step": 17948 }, { "epoch": 0.31199916563819985, "grad_norm": 2.104976392412091, "learning_rate": 8.055775951789878e-07, "loss": 0.4001, "step": 17949 }, { "epoch": 0.3120165481757027, "grad_norm": 0.8542939367040551, "learning_rate": 8.05555314190912e-07, "loss": 0.2954, "step": 17950 }, { "epoch": 0.3120339307132055, "grad_norm": 3.1618795766032153, "learning_rate": 8.055330322343661e-07, "loss": 0.3278, "step": 17951 }, { "epoch": 0.31205131325070834, "grad_norm": 2.432231510990752, "learning_rate": 8.055107493094209e-07, "loss": 0.5346, "step": 17952 }, { "epoch": 0.31206869578821117, "grad_norm": 1.9353063466533413, "learning_rate": 8.054884654161468e-07, "loss": 0.3096, "step": 17953 }, { "epoch": 0.312086078325714, "grad_norm": 1.302988653306546, "learning_rate": 8.054661805546148e-07, "loss": 0.2996, "step": 17954 }, { "epoch": 0.31210346086321683, "grad_norm": 1.4773069177571638, "learning_rate": 8.054438947248952e-07, "loss": 0.3183, "step": 17955 }, { "epoch": 0.31212084340071966, "grad_norm": 1.3669320510904295, "learning_rate": 8.054216079270588e-07, "loss": 0.2548, "step": 17956 }, { "epoch": 0.31213822593822244, "grad_norm": 1.2973515470420258, "learning_rate": 8.053993201611762e-07, "loss": 0.3375, "step": 17957 }, { "epoch": 0.31215560847572527, "grad_norm": 1.0532391733218307, "learning_rate": 8.05377031427318e-07, "loss": 0.4757, "step": 17958 }, { "epoch": 0.3121729910132281, "grad_norm": 1.2321961873960752, "learning_rate": 8.053547417255551e-07, "loss": 0.4862, "step": 17959 }, { "epoch": 0.3121903735507309, "grad_norm": 1.8379849718639674, "learning_rate": 8.053324510559576e-07, "loss": 0.3348, "step": 17960 }, { "epoch": 0.31220775608823376, "grad_norm": 1.628640572230921, "learning_rate": 8.053101594185967e-07, "loss": 0.3378, "step": 17961 }, { "epoch": 0.3122251386257366, "grad_norm": 1.4707791042929563, "learning_rate": 8.052878668135429e-07, "loss": 0.2804, "step": 17962 }, { "epoch": 0.3122425211632394, "grad_norm": 1.764171460413168, "learning_rate": 8.052655732408666e-07, "loss": 0.3146, "step": 17963 }, { "epoch": 0.31225990370074225, "grad_norm": 2.719576845161218, "learning_rate": 8.052432787006389e-07, "loss": 0.2457, "step": 17964 }, { "epoch": 0.3122772862382451, "grad_norm": 1.7825092499549873, "learning_rate": 8.0522098319293e-07, "loss": 0.3477, "step": 17965 }, { "epoch": 0.3122946687757479, "grad_norm": 1.5204709242187984, "learning_rate": 8.051986867178109e-07, "loss": 0.3739, "step": 17966 }, { "epoch": 0.3123120513132507, "grad_norm": 1.6595005811518555, "learning_rate": 8.051763892753522e-07, "loss": 0.5711, "step": 17967 }, { "epoch": 0.3123294338507535, "grad_norm": 1.5627017243579895, "learning_rate": 8.051540908656245e-07, "loss": 0.3496, "step": 17968 }, { "epoch": 0.31234681638825634, "grad_norm": 1.4823635724773194, "learning_rate": 8.051317914886984e-07, "loss": 0.2225, "step": 17969 }, { "epoch": 0.3123641989257592, "grad_norm": 2.4953527252568874, "learning_rate": 8.051094911446448e-07, "loss": 0.2647, "step": 17970 }, { "epoch": 0.312381581463262, "grad_norm": 1.8573181511436174, "learning_rate": 8.050871898335342e-07, "loss": 0.3853, "step": 17971 }, { "epoch": 0.31239896400076483, "grad_norm": 1.3572656819851918, "learning_rate": 8.050648875554374e-07, "loss": 0.3451, "step": 17972 }, { "epoch": 0.31241634653826766, "grad_norm": 1.2258713730844222, "learning_rate": 8.050425843104252e-07, "loss": 0.2399, "step": 17973 }, { "epoch": 0.3124337290757705, "grad_norm": 1.2711986125111752, "learning_rate": 8.05020280098568e-07, "loss": 0.197, "step": 17974 }, { "epoch": 0.3124511116132733, "grad_norm": 1.544768351502644, "learning_rate": 8.049979749199365e-07, "loss": 0.385, "step": 17975 }, { "epoch": 0.31246849415077615, "grad_norm": 1.8638947680201057, "learning_rate": 8.049756687746015e-07, "loss": 0.2895, "step": 17976 }, { "epoch": 0.31248587668827893, "grad_norm": 0.8893585251933699, "learning_rate": 8.049533616626338e-07, "loss": 0.2413, "step": 17977 }, { "epoch": 0.31250325922578176, "grad_norm": 2.097002490796411, "learning_rate": 8.049310535841042e-07, "loss": 0.2114, "step": 17978 }, { "epoch": 0.3125206417632846, "grad_norm": 2.523436230228059, "learning_rate": 8.049087445390831e-07, "loss": 0.3966, "step": 17979 }, { "epoch": 0.3125380243007874, "grad_norm": 2.369171270457426, "learning_rate": 8.048864345276414e-07, "loss": 0.3514, "step": 17980 }, { "epoch": 0.31255540683829025, "grad_norm": 0.9626273738227733, "learning_rate": 8.048641235498496e-07, "loss": 0.259, "step": 17981 }, { "epoch": 0.3125727893757931, "grad_norm": 1.7290230866830134, "learning_rate": 8.048418116057786e-07, "loss": 0.3324, "step": 17982 }, { "epoch": 0.3125901719132959, "grad_norm": 2.13841926637113, "learning_rate": 8.048194986954993e-07, "loss": 0.31, "step": 17983 }, { "epoch": 0.31260755445079874, "grad_norm": 2.7936242594639884, "learning_rate": 8.04797184819082e-07, "loss": 0.3169, "step": 17984 }, { "epoch": 0.31262493698830157, "grad_norm": 1.5527824392692617, "learning_rate": 8.047748699765978e-07, "loss": 0.4803, "step": 17985 }, { "epoch": 0.3126423195258044, "grad_norm": 2.093827531338291, "learning_rate": 8.047525541681172e-07, "loss": 0.284, "step": 17986 }, { "epoch": 0.3126597020633072, "grad_norm": 2.459140738704952, "learning_rate": 8.04730237393711e-07, "loss": 0.3113, "step": 17987 }, { "epoch": 0.31267708460081, "grad_norm": 1.7493812989309185, "learning_rate": 8.047079196534498e-07, "loss": 0.2744, "step": 17988 }, { "epoch": 0.31269446713831284, "grad_norm": 3.338859340319963, "learning_rate": 8.046856009474047e-07, "loss": 0.3484, "step": 17989 }, { "epoch": 0.31271184967581567, "grad_norm": 1.7369935289731497, "learning_rate": 8.04663281275646e-07, "loss": 0.32, "step": 17990 }, { "epoch": 0.3127292322133185, "grad_norm": 1.9171207527032843, "learning_rate": 8.046409606382448e-07, "loss": 0.3411, "step": 17991 }, { "epoch": 0.31274661475082133, "grad_norm": 3.5989657787911025, "learning_rate": 8.046186390352716e-07, "loss": 0.2719, "step": 17992 }, { "epoch": 0.31276399728832416, "grad_norm": 1.9356849917191374, "learning_rate": 8.045963164667973e-07, "loss": 0.3326, "step": 17993 }, { "epoch": 0.312781379825827, "grad_norm": 1.6399308121167002, "learning_rate": 8.045739929328925e-07, "loss": 0.4928, "step": 17994 }, { "epoch": 0.3127987623633298, "grad_norm": 2.5790663225265416, "learning_rate": 8.045516684336279e-07, "loss": 0.5226, "step": 17995 }, { "epoch": 0.31281614490083265, "grad_norm": 2.5962257780420277, "learning_rate": 8.045293429690748e-07, "loss": 0.1936, "step": 17996 }, { "epoch": 0.3128335274383354, "grad_norm": 1.9514117865726968, "learning_rate": 8.045070165393035e-07, "loss": 0.3861, "step": 17997 }, { "epoch": 0.31285090997583825, "grad_norm": 2.2170255721676653, "learning_rate": 8.044846891443846e-07, "loss": 0.3056, "step": 17998 }, { "epoch": 0.3128682925133411, "grad_norm": 1.8173501303532396, "learning_rate": 8.044623607843894e-07, "loss": 0.2127, "step": 17999 }, { "epoch": 0.3128856750508439, "grad_norm": 1.3602241097101893, "learning_rate": 8.044400314593881e-07, "loss": 0.5111, "step": 18000 }, { "epoch": 0.31290305758834674, "grad_norm": 1.586520642930325, "learning_rate": 8.044177011694518e-07, "loss": 0.3545, "step": 18001 }, { "epoch": 0.3129204401258496, "grad_norm": 1.5576149834151174, "learning_rate": 8.043953699146514e-07, "loss": 0.3285, "step": 18002 }, { "epoch": 0.3129378226633524, "grad_norm": 1.6320841467056022, "learning_rate": 8.043730376950573e-07, "loss": 0.426, "step": 18003 }, { "epoch": 0.31295520520085524, "grad_norm": 2.7075304510348226, "learning_rate": 8.043507045107406e-07, "loss": 0.1591, "step": 18004 }, { "epoch": 0.31297258773835807, "grad_norm": 1.9054518488593826, "learning_rate": 8.043283703617721e-07, "loss": 0.4205, "step": 18005 }, { "epoch": 0.3129899702758609, "grad_norm": 1.6275707669444763, "learning_rate": 8.043060352482223e-07, "loss": 0.2793, "step": 18006 }, { "epoch": 0.31300735281336367, "grad_norm": 3.030560318372994, "learning_rate": 8.042836991701621e-07, "loss": 0.2294, "step": 18007 }, { "epoch": 0.3130247353508665, "grad_norm": 1.7147485454931466, "learning_rate": 8.042613621276624e-07, "loss": 0.3813, "step": 18008 }, { "epoch": 0.31304211788836933, "grad_norm": 1.9273942685360705, "learning_rate": 8.04239024120794e-07, "loss": 0.4873, "step": 18009 }, { "epoch": 0.31305950042587216, "grad_norm": 2.4307374143909164, "learning_rate": 8.042166851496274e-07, "loss": 0.2941, "step": 18010 }, { "epoch": 0.313076882963375, "grad_norm": 2.741319495124637, "learning_rate": 8.041943452142341e-07, "loss": 0.6864, "step": 18011 }, { "epoch": 0.3130942655008778, "grad_norm": 1.3381842436067548, "learning_rate": 8.041720043146842e-07, "loss": 0.2202, "step": 18012 }, { "epoch": 0.31311164803838065, "grad_norm": 2.5890028684833184, "learning_rate": 8.041496624510488e-07, "loss": 0.5219, "step": 18013 }, { "epoch": 0.3131290305758835, "grad_norm": 1.7701553362447118, "learning_rate": 8.041273196233986e-07, "loss": 0.3839, "step": 18014 }, { "epoch": 0.3131464131133863, "grad_norm": 1.420105148140966, "learning_rate": 8.041049758318047e-07, "loss": 0.1551, "step": 18015 }, { "epoch": 0.31316379565088914, "grad_norm": 2.5576904702172163, "learning_rate": 8.040826310763375e-07, "loss": 0.3225, "step": 18016 }, { "epoch": 0.3131811781883919, "grad_norm": 2.664881267847775, "learning_rate": 8.040602853570681e-07, "loss": 0.3959, "step": 18017 }, { "epoch": 0.31319856072589475, "grad_norm": 2.1100487511621555, "learning_rate": 8.040379386740675e-07, "loss": 0.3535, "step": 18018 }, { "epoch": 0.3132159432633976, "grad_norm": 1.2265810337175538, "learning_rate": 8.040155910274061e-07, "loss": 0.4024, "step": 18019 }, { "epoch": 0.3132333258009004, "grad_norm": 1.454369272874833, "learning_rate": 8.039932424171549e-07, "loss": 0.2723, "step": 18020 }, { "epoch": 0.31325070833840324, "grad_norm": 1.5395838615099044, "learning_rate": 8.039708928433849e-07, "loss": 0.1682, "step": 18021 }, { "epoch": 0.31326809087590607, "grad_norm": 1.936272084966422, "learning_rate": 8.039485423061667e-07, "loss": 0.3042, "step": 18022 }, { "epoch": 0.3132854734134089, "grad_norm": 1.4253530777909313, "learning_rate": 8.039261908055712e-07, "loss": 0.2669, "step": 18023 }, { "epoch": 0.31330285595091173, "grad_norm": 3.142453166662352, "learning_rate": 8.039038383416694e-07, "loss": 0.335, "step": 18024 }, { "epoch": 0.31332023848841456, "grad_norm": 1.7354708794917062, "learning_rate": 8.038814849145321e-07, "loss": 0.274, "step": 18025 }, { "epoch": 0.3133376210259174, "grad_norm": 1.1455881120173654, "learning_rate": 8.0385913052423e-07, "loss": 0.3391, "step": 18026 }, { "epoch": 0.31335500356342016, "grad_norm": 1.9836286565524537, "learning_rate": 8.03836775170834e-07, "loss": 0.4188, "step": 18027 }, { "epoch": 0.313372386100923, "grad_norm": 1.7926621903720823, "learning_rate": 8.038144188544149e-07, "loss": 0.2887, "step": 18028 }, { "epoch": 0.3133897686384258, "grad_norm": 1.5136200141983536, "learning_rate": 8.037920615750437e-07, "loss": 0.2425, "step": 18029 }, { "epoch": 0.31340715117592866, "grad_norm": 2.7306475224786064, "learning_rate": 8.037697033327913e-07, "loss": 0.2251, "step": 18030 }, { "epoch": 0.3134245337134315, "grad_norm": 1.166528143890425, "learning_rate": 8.037473441277285e-07, "loss": 0.3165, "step": 18031 }, { "epoch": 0.3134419162509343, "grad_norm": 1.6383740620869045, "learning_rate": 8.03724983959926e-07, "loss": 0.2543, "step": 18032 }, { "epoch": 0.31345929878843715, "grad_norm": 3.2385543918977233, "learning_rate": 8.03702622829455e-07, "loss": 0.3372, "step": 18033 }, { "epoch": 0.31347668132594, "grad_norm": 1.9122495017550516, "learning_rate": 8.036802607363862e-07, "loss": 0.2489, "step": 18034 }, { "epoch": 0.3134940638634428, "grad_norm": 1.6424933240671955, "learning_rate": 8.036578976807904e-07, "loss": 0.3743, "step": 18035 }, { "epoch": 0.31351144640094564, "grad_norm": 1.6385609338390812, "learning_rate": 8.036355336627385e-07, "loss": 0.1839, "step": 18036 }, { "epoch": 0.3135288289384484, "grad_norm": 13.87463466806516, "learning_rate": 8.036131686823015e-07, "loss": 0.4352, "step": 18037 }, { "epoch": 0.31354621147595124, "grad_norm": 2.174611776647175, "learning_rate": 8.035908027395502e-07, "loss": 0.3116, "step": 18038 }, { "epoch": 0.31356359401345407, "grad_norm": 1.3513714084926431, "learning_rate": 8.035684358345555e-07, "loss": 0.3516, "step": 18039 }, { "epoch": 0.3135809765509569, "grad_norm": 2.6088410816706706, "learning_rate": 8.035460679673883e-07, "loss": 0.3862, "step": 18040 }, { "epoch": 0.31359835908845973, "grad_norm": 2.8874659603621327, "learning_rate": 8.035236991381195e-07, "loss": 0.462, "step": 18041 }, { "epoch": 0.31361574162596256, "grad_norm": 1.8046764908333195, "learning_rate": 8.0350132934682e-07, "loss": 0.2244, "step": 18042 }, { "epoch": 0.3136331241634654, "grad_norm": 1.5950680270514157, "learning_rate": 8.034789585935609e-07, "loss": 0.2465, "step": 18043 }, { "epoch": 0.3136505067009682, "grad_norm": 1.6277168810906484, "learning_rate": 8.034565868784126e-07, "loss": 0.3225, "step": 18044 }, { "epoch": 0.31366788923847105, "grad_norm": 2.8766463914230114, "learning_rate": 8.034342142014465e-07, "loss": 0.2368, "step": 18045 }, { "epoch": 0.3136852717759739, "grad_norm": 2.598100481298613, "learning_rate": 8.034118405627333e-07, "loss": 0.3199, "step": 18046 }, { "epoch": 0.31370265431347666, "grad_norm": 2.0826827676174977, "learning_rate": 8.033894659623439e-07, "loss": 0.3168, "step": 18047 }, { "epoch": 0.3137200368509795, "grad_norm": 1.4697461575259159, "learning_rate": 8.033670904003492e-07, "loss": 0.3736, "step": 18048 }, { "epoch": 0.3137374193884823, "grad_norm": 1.035330364056431, "learning_rate": 8.033447138768203e-07, "loss": 0.2867, "step": 18049 }, { "epoch": 0.31375480192598515, "grad_norm": 1.5818855768510387, "learning_rate": 8.03322336391828e-07, "loss": 0.2158, "step": 18050 }, { "epoch": 0.313772184463488, "grad_norm": 1.4512018507151672, "learning_rate": 8.032999579454431e-07, "loss": 0.2626, "step": 18051 }, { "epoch": 0.3137895670009908, "grad_norm": 2.1827804594459415, "learning_rate": 8.032775785377368e-07, "loss": 0.3126, "step": 18052 }, { "epoch": 0.31380694953849364, "grad_norm": 1.5075805497167534, "learning_rate": 8.032551981687799e-07, "loss": 0.4308, "step": 18053 }, { "epoch": 0.31382433207599647, "grad_norm": 2.0284993823485196, "learning_rate": 8.032328168386433e-07, "loss": 0.2402, "step": 18054 }, { "epoch": 0.3138417146134993, "grad_norm": 2.3298565969439333, "learning_rate": 8.032104345473978e-07, "loss": 0.3027, "step": 18055 }, { "epoch": 0.31385909715100213, "grad_norm": 1.5570054267617772, "learning_rate": 8.031880512951148e-07, "loss": 0.254, "step": 18056 }, { "epoch": 0.3138764796885049, "grad_norm": 1.3091192575913762, "learning_rate": 8.031656670818647e-07, "loss": 0.3448, "step": 18057 }, { "epoch": 0.31389386222600774, "grad_norm": 1.3889279702359147, "learning_rate": 8.031432819077188e-07, "loss": 0.2254, "step": 18058 }, { "epoch": 0.31391124476351057, "grad_norm": 2.3459956130303783, "learning_rate": 8.03120895772748e-07, "loss": 0.2613, "step": 18059 }, { "epoch": 0.3139286273010134, "grad_norm": 1.5820478754760243, "learning_rate": 8.03098508677023e-07, "loss": 0.1857, "step": 18060 }, { "epoch": 0.3139460098385162, "grad_norm": 1.4129795158150051, "learning_rate": 8.030761206206152e-07, "loss": 0.2629, "step": 18061 }, { "epoch": 0.31396339237601906, "grad_norm": 1.5063004799613597, "learning_rate": 8.030537316035952e-07, "loss": 0.2015, "step": 18062 }, { "epoch": 0.3139807749135219, "grad_norm": 1.5276041086748287, "learning_rate": 8.030313416260342e-07, "loss": 0.3857, "step": 18063 }, { "epoch": 0.3139981574510247, "grad_norm": 1.1556899600042554, "learning_rate": 8.030089506880029e-07, "loss": 0.3065, "step": 18064 }, { "epoch": 0.31401553998852755, "grad_norm": 2.1593155832135897, "learning_rate": 8.029865587895724e-07, "loss": 0.3132, "step": 18065 }, { "epoch": 0.3140329225260303, "grad_norm": 1.8204226799130145, "learning_rate": 8.029641659308137e-07, "loss": 0.7305, "step": 18066 }, { "epoch": 0.31405030506353315, "grad_norm": 2.3666748759056446, "learning_rate": 8.02941772111798e-07, "loss": 0.4471, "step": 18067 }, { "epoch": 0.314067687601036, "grad_norm": 2.1012300493177567, "learning_rate": 8.029193773325957e-07, "loss": 0.3405, "step": 18068 }, { "epoch": 0.3140850701385388, "grad_norm": 1.7409877505101368, "learning_rate": 8.028969815932782e-07, "loss": 0.2935, "step": 18069 }, { "epoch": 0.31410245267604164, "grad_norm": 1.8959881641199807, "learning_rate": 8.028745848939165e-07, "loss": 0.2897, "step": 18070 }, { "epoch": 0.3141198352135445, "grad_norm": 3.0814866067068825, "learning_rate": 8.028521872345817e-07, "loss": 0.432, "step": 18071 }, { "epoch": 0.3141372177510473, "grad_norm": 1.6221849315913628, "learning_rate": 8.028297886153444e-07, "loss": 0.3183, "step": 18072 }, { "epoch": 0.31415460028855013, "grad_norm": 1.9917768556564486, "learning_rate": 8.028073890362756e-07, "loss": 0.5039, "step": 18073 }, { "epoch": 0.31417198282605296, "grad_norm": 2.280114456502323, "learning_rate": 8.027849884974466e-07, "loss": 0.3419, "step": 18074 }, { "epoch": 0.3141893653635558, "grad_norm": 1.319057278883308, "learning_rate": 8.027625869989283e-07, "loss": 0.3169, "step": 18075 }, { "epoch": 0.31420674790105857, "grad_norm": 1.5133497557486648, "learning_rate": 8.027401845407917e-07, "loss": 0.3328, "step": 18076 }, { "epoch": 0.3142241304385614, "grad_norm": 1.2148989847518887, "learning_rate": 8.027177811231077e-07, "loss": 0.2633, "step": 18077 }, { "epoch": 0.31424151297606423, "grad_norm": 1.5935785378750844, "learning_rate": 8.026953767459475e-07, "loss": 0.3602, "step": 18078 }, { "epoch": 0.31425889551356706, "grad_norm": 2.825300883331971, "learning_rate": 8.026729714093821e-07, "loss": 0.4325, "step": 18079 }, { "epoch": 0.3142762780510699, "grad_norm": 1.139298439236647, "learning_rate": 8.026505651134821e-07, "loss": 0.3334, "step": 18080 }, { "epoch": 0.3142936605885727, "grad_norm": 1.3707106497445949, "learning_rate": 8.026281578583192e-07, "loss": 0.4602, "step": 18081 }, { "epoch": 0.31431104312607555, "grad_norm": 1.597599803753597, "learning_rate": 8.026057496439637e-07, "loss": 0.3511, "step": 18082 }, { "epoch": 0.3143284256635784, "grad_norm": 0.9998365068627323, "learning_rate": 8.025833404704872e-07, "loss": 0.1589, "step": 18083 }, { "epoch": 0.3143458082010812, "grad_norm": 2.1394295366973064, "learning_rate": 8.025609303379604e-07, "loss": 0.241, "step": 18084 }, { "epoch": 0.31436319073858404, "grad_norm": 1.8183975423377288, "learning_rate": 8.025385192464544e-07, "loss": 0.3661, "step": 18085 }, { "epoch": 0.3143805732760868, "grad_norm": 1.584500151183503, "learning_rate": 8.025161071960405e-07, "loss": 0.1904, "step": 18086 }, { "epoch": 0.31439795581358965, "grad_norm": 1.5291788825149344, "learning_rate": 8.024936941867893e-07, "loss": 0.3198, "step": 18087 }, { "epoch": 0.3144153383510925, "grad_norm": 1.4233769595724433, "learning_rate": 8.02471280218772e-07, "loss": 0.3254, "step": 18088 }, { "epoch": 0.3144327208885953, "grad_norm": 1.4541730149732677, "learning_rate": 8.024488652920599e-07, "loss": 0.2755, "step": 18089 }, { "epoch": 0.31445010342609814, "grad_norm": 1.9055593953501537, "learning_rate": 8.024264494067237e-07, "loss": 0.3778, "step": 18090 }, { "epoch": 0.31446748596360097, "grad_norm": 1.644718018633169, "learning_rate": 8.024040325628346e-07, "loss": 0.4231, "step": 18091 }, { "epoch": 0.3144848685011038, "grad_norm": 3.207951119648357, "learning_rate": 8.023816147604636e-07, "loss": 0.3763, "step": 18092 }, { "epoch": 0.31450225103860663, "grad_norm": 1.3240978844274454, "learning_rate": 8.023591959996819e-07, "loss": 0.3788, "step": 18093 }, { "epoch": 0.31451963357610946, "grad_norm": 1.7211180415078837, "learning_rate": 8.023367762805603e-07, "loss": 0.2983, "step": 18094 }, { "epoch": 0.3145370161136123, "grad_norm": 1.385744046464738, "learning_rate": 8.023143556031701e-07, "loss": 0.3997, "step": 18095 }, { "epoch": 0.31455439865111506, "grad_norm": 1.4696198567337788, "learning_rate": 8.022919339675822e-07, "loss": 0.201, "step": 18096 }, { "epoch": 0.3145717811886179, "grad_norm": 1.6518808441391435, "learning_rate": 8.022695113738678e-07, "loss": 0.2526, "step": 18097 }, { "epoch": 0.3145891637261207, "grad_norm": 1.5498901688965632, "learning_rate": 8.022470878220977e-07, "loss": 0.3484, "step": 18098 }, { "epoch": 0.31460654626362355, "grad_norm": 3.411078547798269, "learning_rate": 8.022246633123434e-07, "loss": 0.4825, "step": 18099 }, { "epoch": 0.3146239288011264, "grad_norm": 1.5288079883431207, "learning_rate": 8.022022378446756e-07, "loss": 0.2904, "step": 18100 }, { "epoch": 0.3146413113386292, "grad_norm": 1.7020322830993533, "learning_rate": 8.021798114191655e-07, "loss": 0.4691, "step": 18101 }, { "epoch": 0.31465869387613205, "grad_norm": 1.3926875719720266, "learning_rate": 8.021573840358844e-07, "loss": 0.4021, "step": 18102 }, { "epoch": 0.3146760764136349, "grad_norm": 1.9512093374961055, "learning_rate": 8.02134955694903e-07, "loss": 0.3723, "step": 18103 }, { "epoch": 0.3146934589511377, "grad_norm": 1.683056704849884, "learning_rate": 8.021125263962928e-07, "loss": 0.4189, "step": 18104 }, { "epoch": 0.31471084148864054, "grad_norm": 1.4933515156336563, "learning_rate": 8.020900961401244e-07, "loss": 0.3147, "step": 18105 }, { "epoch": 0.3147282240261433, "grad_norm": 1.541539102855854, "learning_rate": 8.020676649264695e-07, "loss": 0.5582, "step": 18106 }, { "epoch": 0.31474560656364614, "grad_norm": 2.3657507365586077, "learning_rate": 8.020452327553986e-07, "loss": 0.4081, "step": 18107 }, { "epoch": 0.31476298910114897, "grad_norm": 2.0130940598652822, "learning_rate": 8.020227996269831e-07, "loss": 0.2845, "step": 18108 }, { "epoch": 0.3147803716386518, "grad_norm": 1.7252872038085352, "learning_rate": 8.020003655412941e-07, "loss": 0.2456, "step": 18109 }, { "epoch": 0.31479775417615463, "grad_norm": 2.0554130881455572, "learning_rate": 8.019779304984027e-07, "loss": 0.3599, "step": 18110 }, { "epoch": 0.31481513671365746, "grad_norm": 3.9613692368544347, "learning_rate": 8.019554944983799e-07, "loss": 0.5264, "step": 18111 }, { "epoch": 0.3148325192511603, "grad_norm": 1.2141166568429327, "learning_rate": 8.01933057541297e-07, "loss": 0.3499, "step": 18112 }, { "epoch": 0.3148499017886631, "grad_norm": 1.8449424011759232, "learning_rate": 8.019106196272249e-07, "loss": 0.6241, "step": 18113 }, { "epoch": 0.31486728432616595, "grad_norm": 2.139985295912581, "learning_rate": 8.018881807562348e-07, "loss": 0.3926, "step": 18114 }, { "epoch": 0.3148846668636688, "grad_norm": 1.7128701788913587, "learning_rate": 8.01865740928398e-07, "loss": 0.4407, "step": 18115 }, { "epoch": 0.31490204940117156, "grad_norm": 1.5652776690877024, "learning_rate": 8.018433001437855e-07, "loss": 0.498, "step": 18116 }, { "epoch": 0.3149194319386744, "grad_norm": 2.365174802238394, "learning_rate": 8.018208584024682e-07, "loss": 0.5117, "step": 18117 }, { "epoch": 0.3149368144761772, "grad_norm": 1.9254440996827322, "learning_rate": 8.017984157045174e-07, "loss": 0.347, "step": 18118 }, { "epoch": 0.31495419701368005, "grad_norm": 0.870440191450284, "learning_rate": 8.017759720500043e-07, "loss": 0.1987, "step": 18119 }, { "epoch": 0.3149715795511829, "grad_norm": 1.3958847292808347, "learning_rate": 8.017535274390001e-07, "loss": 0.2376, "step": 18120 }, { "epoch": 0.3149889620886857, "grad_norm": 7.664638698205915, "learning_rate": 8.017310818715757e-07, "loss": 0.6354, "step": 18121 }, { "epoch": 0.31500634462618854, "grad_norm": 2.082960978480528, "learning_rate": 8.017086353478024e-07, "loss": 0.4446, "step": 18122 }, { "epoch": 0.31502372716369137, "grad_norm": 1.6511946206973076, "learning_rate": 8.016861878677514e-07, "loss": 0.675, "step": 18123 }, { "epoch": 0.3150411097011942, "grad_norm": 2.1286883669665926, "learning_rate": 8.016637394314938e-07, "loss": 0.4375, "step": 18124 }, { "epoch": 0.31505849223869703, "grad_norm": 1.9910831851989312, "learning_rate": 8.016412900391007e-07, "loss": 0.4136, "step": 18125 }, { "epoch": 0.3150758747761998, "grad_norm": 1.7301000254851444, "learning_rate": 8.016188396906432e-07, "loss": 0.444, "step": 18126 }, { "epoch": 0.31509325731370263, "grad_norm": 1.2155249065403828, "learning_rate": 8.015963883861925e-07, "loss": 0.5446, "step": 18127 }, { "epoch": 0.31511063985120547, "grad_norm": 0.990907391634576, "learning_rate": 8.015739361258198e-07, "loss": 0.268, "step": 18128 }, { "epoch": 0.3151280223887083, "grad_norm": 1.4775042499055269, "learning_rate": 8.015514829095963e-07, "loss": 0.2571, "step": 18129 }, { "epoch": 0.3151454049262111, "grad_norm": 1.876882888924195, "learning_rate": 8.015290287375931e-07, "loss": 0.3759, "step": 18130 }, { "epoch": 0.31516278746371396, "grad_norm": 1.2799827763993943, "learning_rate": 8.015065736098814e-07, "loss": 0.2226, "step": 18131 }, { "epoch": 0.3151801700012168, "grad_norm": 1.4265911063505976, "learning_rate": 8.014841175265324e-07, "loss": 0.314, "step": 18132 }, { "epoch": 0.3151975525387196, "grad_norm": 2.0495919644546987, "learning_rate": 8.014616604876172e-07, "loss": 0.4195, "step": 18133 }, { "epoch": 0.31521493507622245, "grad_norm": 2.0105045937602615, "learning_rate": 8.01439202493207e-07, "loss": 0.3982, "step": 18134 }, { "epoch": 0.3152323176137253, "grad_norm": 14.167552498505374, "learning_rate": 8.01416743543373e-07, "loss": 0.634, "step": 18135 }, { "epoch": 0.31524970015122805, "grad_norm": 1.7796058455913195, "learning_rate": 8.013942836381864e-07, "loss": 0.276, "step": 18136 }, { "epoch": 0.3152670826887309, "grad_norm": 1.5743070874017946, "learning_rate": 8.013718227777185e-07, "loss": 0.4979, "step": 18137 }, { "epoch": 0.3152844652262337, "grad_norm": 3.4898481481905876, "learning_rate": 8.013493609620402e-07, "loss": 1.0394, "step": 18138 }, { "epoch": 0.31530184776373654, "grad_norm": 2.0347335143746648, "learning_rate": 8.013268981912229e-07, "loss": 0.195, "step": 18139 }, { "epoch": 0.3153192303012394, "grad_norm": 1.504387623768372, "learning_rate": 8.013044344653377e-07, "loss": 0.48, "step": 18140 }, { "epoch": 0.3153366128387422, "grad_norm": 1.835218776113259, "learning_rate": 8.012819697844559e-07, "loss": 0.3696, "step": 18141 }, { "epoch": 0.31535399537624503, "grad_norm": 1.4587497373424745, "learning_rate": 8.012595041486486e-07, "loss": 0.3234, "step": 18142 }, { "epoch": 0.31537137791374786, "grad_norm": 1.235349899312109, "learning_rate": 8.012370375579871e-07, "loss": 0.2745, "step": 18143 }, { "epoch": 0.3153887604512507, "grad_norm": 1.5970646720237691, "learning_rate": 8.012145700125426e-07, "loss": 0.3082, "step": 18144 }, { "epoch": 0.3154061429887535, "grad_norm": 1.7995281417053244, "learning_rate": 8.011921015123862e-07, "loss": 0.4244, "step": 18145 }, { "epoch": 0.3154235255262563, "grad_norm": 1.7156936469228736, "learning_rate": 8.011696320575894e-07, "loss": 0.2861, "step": 18146 }, { "epoch": 0.31544090806375913, "grad_norm": 1.5139363848779348, "learning_rate": 8.01147161648223e-07, "loss": 0.2657, "step": 18147 }, { "epoch": 0.31545829060126196, "grad_norm": 1.8527230005397317, "learning_rate": 8.011246902843585e-07, "loss": 0.6771, "step": 18148 }, { "epoch": 0.3154756731387648, "grad_norm": 1.9831563518708457, "learning_rate": 8.01102217966067e-07, "loss": 0.2936, "step": 18149 }, { "epoch": 0.3154930556762676, "grad_norm": 1.5211170689180904, "learning_rate": 8.010797446934199e-07, "loss": 0.2212, "step": 18150 }, { "epoch": 0.31551043821377045, "grad_norm": 1.4448036368946127, "learning_rate": 8.010572704664882e-07, "loss": 0.2257, "step": 18151 }, { "epoch": 0.3155278207512733, "grad_norm": 1.8997714109910906, "learning_rate": 8.010347952853433e-07, "loss": 0.3311, "step": 18152 }, { "epoch": 0.3155452032887761, "grad_norm": 1.0969268815188549, "learning_rate": 8.010123191500564e-07, "loss": 0.2553, "step": 18153 }, { "epoch": 0.31556258582627894, "grad_norm": 1.63189112780071, "learning_rate": 8.009898420606986e-07, "loss": 0.2188, "step": 18154 }, { "epoch": 0.31557996836378177, "grad_norm": 1.6628273680465808, "learning_rate": 8.009673640173413e-07, "loss": 0.2221, "step": 18155 }, { "epoch": 0.31559735090128455, "grad_norm": 1.8134951283530678, "learning_rate": 8.009448850200558e-07, "loss": 0.3487, "step": 18156 }, { "epoch": 0.3156147334387874, "grad_norm": 3.1952728350741597, "learning_rate": 8.009224050689133e-07, "loss": 0.4657, "step": 18157 }, { "epoch": 0.3156321159762902, "grad_norm": 2.102871352868498, "learning_rate": 8.008999241639849e-07, "loss": 0.3387, "step": 18158 }, { "epoch": 0.31564949851379304, "grad_norm": 1.2458657833194666, "learning_rate": 8.00877442305342e-07, "loss": 0.1965, "step": 18159 }, { "epoch": 0.31566688105129587, "grad_norm": 2.824068490533246, "learning_rate": 8.008549594930558e-07, "loss": 0.213, "step": 18160 }, { "epoch": 0.3156842635887987, "grad_norm": 1.3491454226997308, "learning_rate": 8.008324757271976e-07, "loss": 0.3017, "step": 18161 }, { "epoch": 0.3157016461263015, "grad_norm": 2.4827627786777375, "learning_rate": 8.008099910078386e-07, "loss": 0.2634, "step": 18162 }, { "epoch": 0.31571902866380436, "grad_norm": 1.3310588466371585, "learning_rate": 8.007875053350504e-07, "loss": 0.2896, "step": 18163 }, { "epoch": 0.3157364112013072, "grad_norm": 2.1597156112610456, "learning_rate": 8.007650187089035e-07, "loss": 0.3326, "step": 18164 }, { "epoch": 0.31575379373881, "grad_norm": 2.1781999036925694, "learning_rate": 8.0074253112947e-07, "loss": 0.2995, "step": 18165 }, { "epoch": 0.3157711762763128, "grad_norm": 2.743096232029985, "learning_rate": 8.007200425968207e-07, "loss": 0.2606, "step": 18166 }, { "epoch": 0.3157885588138156, "grad_norm": 1.3537601968864508, "learning_rate": 8.006975531110269e-07, "loss": 0.3109, "step": 18167 }, { "epoch": 0.31580594135131845, "grad_norm": 2.128226522146539, "learning_rate": 8.006750626721601e-07, "loss": 0.4375, "step": 18168 }, { "epoch": 0.3158233238888213, "grad_norm": 2.769397795366465, "learning_rate": 8.006525712802916e-07, "loss": 0.5497, "step": 18169 }, { "epoch": 0.3158407064263241, "grad_norm": 1.5318727780239634, "learning_rate": 8.006300789354924e-07, "loss": 0.3208, "step": 18170 }, { "epoch": 0.31585808896382694, "grad_norm": 1.9069862776733417, "learning_rate": 8.006075856378339e-07, "loss": 0.2859, "step": 18171 }, { "epoch": 0.3158754715013298, "grad_norm": 1.6751217561243885, "learning_rate": 8.005850913873876e-07, "loss": 0.3545, "step": 18172 }, { "epoch": 0.3158928540388326, "grad_norm": 1.949811182284522, "learning_rate": 8.005625961842246e-07, "loss": 0.3474, "step": 18173 }, { "epoch": 0.31591023657633543, "grad_norm": 1.8784088590977086, "learning_rate": 8.005401000284161e-07, "loss": 0.3258, "step": 18174 }, { "epoch": 0.31592761911383827, "grad_norm": 1.8763211094922296, "learning_rate": 8.005176029200335e-07, "loss": 0.3974, "step": 18175 }, { "epoch": 0.31594500165134104, "grad_norm": 1.8151009919814127, "learning_rate": 8.004951048591484e-07, "loss": 0.3251, "step": 18176 }, { "epoch": 0.31596238418884387, "grad_norm": 4.088670465310474, "learning_rate": 8.004726058458318e-07, "loss": 0.2736, "step": 18177 }, { "epoch": 0.3159797667263467, "grad_norm": 1.7153064750562246, "learning_rate": 8.00450105880155e-07, "loss": 0.1791, "step": 18178 }, { "epoch": 0.31599714926384953, "grad_norm": 1.3862223042450077, "learning_rate": 8.004276049621893e-07, "loss": 0.1807, "step": 18179 }, { "epoch": 0.31601453180135236, "grad_norm": 1.3096929715340666, "learning_rate": 8.004051030920063e-07, "loss": 0.1822, "step": 18180 }, { "epoch": 0.3160319143388552, "grad_norm": 2.9630292372953124, "learning_rate": 8.003826002696768e-07, "loss": 0.3609, "step": 18181 }, { "epoch": 0.316049296876358, "grad_norm": 2.4081887171498635, "learning_rate": 8.003600964952727e-07, "loss": 0.4958, "step": 18182 }, { "epoch": 0.31606667941386085, "grad_norm": 1.4951191708814346, "learning_rate": 8.003375917688649e-07, "loss": 0.3479, "step": 18183 }, { "epoch": 0.3160840619513637, "grad_norm": 2.0016552422959175, "learning_rate": 8.00315086090525e-07, "loss": 0.3831, "step": 18184 }, { "epoch": 0.3161014444888665, "grad_norm": 2.117231303195477, "learning_rate": 8.002925794603244e-07, "loss": 0.4368, "step": 18185 }, { "epoch": 0.3161188270263693, "grad_norm": 2.026348749354301, "learning_rate": 8.002700718783339e-07, "loss": 0.3176, "step": 18186 }, { "epoch": 0.3161362095638721, "grad_norm": 2.079930109690084, "learning_rate": 8.002475633446256e-07, "loss": 0.4391, "step": 18187 }, { "epoch": 0.31615359210137495, "grad_norm": 1.5640348913928181, "learning_rate": 8.002250538592702e-07, "loss": 0.2231, "step": 18188 }, { "epoch": 0.3161709746388778, "grad_norm": 1.3960237111905718, "learning_rate": 8.002025434223393e-07, "loss": 0.2598, "step": 18189 }, { "epoch": 0.3161883571763806, "grad_norm": 2.872759408957802, "learning_rate": 8.001800320339043e-07, "loss": 0.515, "step": 18190 }, { "epoch": 0.31620573971388344, "grad_norm": 1.0877407379713033, "learning_rate": 8.001575196940364e-07, "loss": 0.2603, "step": 18191 }, { "epoch": 0.31622312225138627, "grad_norm": 1.6579900066145878, "learning_rate": 8.001350064028072e-07, "loss": 0.2129, "step": 18192 }, { "epoch": 0.3162405047888891, "grad_norm": 1.516775106476744, "learning_rate": 8.001124921602877e-07, "loss": 0.3587, "step": 18193 }, { "epoch": 0.31625788732639193, "grad_norm": 2.9196327884605098, "learning_rate": 8.000899769665495e-07, "loss": 0.3469, "step": 18194 }, { "epoch": 0.31627526986389476, "grad_norm": 1.5110283493005645, "learning_rate": 8.000674608216641e-07, "loss": 0.2486, "step": 18195 }, { "epoch": 0.31629265240139753, "grad_norm": 3.6681948189397913, "learning_rate": 8.000449437257026e-07, "loss": 0.2162, "step": 18196 }, { "epoch": 0.31631003493890036, "grad_norm": 1.0734130795409758, "learning_rate": 8.000224256787365e-07, "loss": 0.4444, "step": 18197 }, { "epoch": 0.3163274174764032, "grad_norm": 1.974429404195834, "learning_rate": 7.99999906680837e-07, "loss": 0.3535, "step": 18198 }, { "epoch": 0.316344800013906, "grad_norm": 1.556791089146818, "learning_rate": 7.999773867320756e-07, "loss": 0.4078, "step": 18199 }, { "epoch": 0.31636218255140885, "grad_norm": 2.6365079203992146, "learning_rate": 7.999548658325238e-07, "loss": 0.3882, "step": 18200 }, { "epoch": 0.3163795650889117, "grad_norm": 2.288246983238289, "learning_rate": 7.999323439822528e-07, "loss": 0.2922, "step": 18201 }, { "epoch": 0.3163969476264145, "grad_norm": 2.009815306809749, "learning_rate": 7.999098211813341e-07, "loss": 0.2761, "step": 18202 }, { "epoch": 0.31641433016391735, "grad_norm": 1.034282622960368, "learning_rate": 7.99887297429839e-07, "loss": 0.2323, "step": 18203 }, { "epoch": 0.3164317127014202, "grad_norm": 4.439706856990631, "learning_rate": 7.99864772727839e-07, "loss": 0.2708, "step": 18204 }, { "epoch": 0.31644909523892295, "grad_norm": 2.407910542936446, "learning_rate": 7.998422470754053e-07, "loss": 0.368, "step": 18205 }, { "epoch": 0.3164664777764258, "grad_norm": 1.4298750730020642, "learning_rate": 7.998197204726094e-07, "loss": 0.2791, "step": 18206 }, { "epoch": 0.3164838603139286, "grad_norm": 1.5394408708582832, "learning_rate": 7.997971929195229e-07, "loss": 0.34, "step": 18207 }, { "epoch": 0.31650124285143144, "grad_norm": 3.5266082685024673, "learning_rate": 7.997746644162168e-07, "loss": 0.3104, "step": 18208 }, { "epoch": 0.31651862538893427, "grad_norm": 2.5559988426887235, "learning_rate": 7.997521349627627e-07, "loss": 0.1971, "step": 18209 }, { "epoch": 0.3165360079264371, "grad_norm": 1.328210680401675, "learning_rate": 7.997296045592322e-07, "loss": 0.2388, "step": 18210 }, { "epoch": 0.31655339046393993, "grad_norm": 1.311014996067989, "learning_rate": 7.997070732056965e-07, "loss": 0.2665, "step": 18211 }, { "epoch": 0.31657077300144276, "grad_norm": 1.6533970143498453, "learning_rate": 7.996845409022271e-07, "loss": 0.252, "step": 18212 }, { "epoch": 0.3165881555389456, "grad_norm": 2.2365954185816257, "learning_rate": 7.996620076488951e-07, "loss": 0.4262, "step": 18213 }, { "epoch": 0.3166055380764484, "grad_norm": 3.235891388147595, "learning_rate": 7.996394734457724e-07, "loss": 0.4432, "step": 18214 }, { "epoch": 0.3166229206139512, "grad_norm": 1.2583405786666606, "learning_rate": 7.996169382929302e-07, "loss": 0.3696, "step": 18215 }, { "epoch": 0.31664030315145403, "grad_norm": 1.4692647583025036, "learning_rate": 7.9959440219044e-07, "loss": 0.3912, "step": 18216 }, { "epoch": 0.31665768568895686, "grad_norm": 1.585809138999463, "learning_rate": 7.99571865138373e-07, "loss": 0.2998, "step": 18217 }, { "epoch": 0.3166750682264597, "grad_norm": 1.5367355714239674, "learning_rate": 7.995493271368009e-07, "loss": 0.4673, "step": 18218 }, { "epoch": 0.3166924507639625, "grad_norm": 1.342398301071369, "learning_rate": 7.99526788185795e-07, "loss": 0.422, "step": 18219 }, { "epoch": 0.31670983330146535, "grad_norm": 3.6711442841084834, "learning_rate": 7.995042482854268e-07, "loss": 0.4661, "step": 18220 }, { "epoch": 0.3167272158389682, "grad_norm": 1.2302675120028086, "learning_rate": 7.994817074357678e-07, "loss": 0.2896, "step": 18221 }, { "epoch": 0.316744598376471, "grad_norm": 1.88725638824991, "learning_rate": 7.994591656368892e-07, "loss": 0.4547, "step": 18222 }, { "epoch": 0.31676198091397384, "grad_norm": 1.637285403808391, "learning_rate": 7.994366228888627e-07, "loss": 0.3196, "step": 18223 }, { "epoch": 0.31677936345147667, "grad_norm": 1.5869341234431635, "learning_rate": 7.994140791917596e-07, "loss": 0.2221, "step": 18224 }, { "epoch": 0.31679674598897944, "grad_norm": 1.164554556480664, "learning_rate": 7.993915345456514e-07, "loss": 0.4051, "step": 18225 }, { "epoch": 0.3168141285264823, "grad_norm": 0.9685329276008511, "learning_rate": 7.993689889506097e-07, "loss": 0.2804, "step": 18226 }, { "epoch": 0.3168315110639851, "grad_norm": 2.0475797821420954, "learning_rate": 7.993464424067058e-07, "loss": 0.3828, "step": 18227 }, { "epoch": 0.31684889360148794, "grad_norm": 1.366969868964469, "learning_rate": 7.993238949140111e-07, "loss": 0.1516, "step": 18228 }, { "epoch": 0.31686627613899077, "grad_norm": 1.5198228167602088, "learning_rate": 7.993013464725972e-07, "loss": 0.541, "step": 18229 }, { "epoch": 0.3168836586764936, "grad_norm": 1.5099438183113862, "learning_rate": 7.992787970825354e-07, "loss": 0.2352, "step": 18230 }, { "epoch": 0.3169010412139964, "grad_norm": 2.2613712950129874, "learning_rate": 7.992562467438975e-07, "loss": 0.4207, "step": 18231 }, { "epoch": 0.31691842375149926, "grad_norm": 1.5742768768521105, "learning_rate": 7.992336954567545e-07, "loss": 0.4809, "step": 18232 }, { "epoch": 0.3169358062890021, "grad_norm": 1.5321241957643978, "learning_rate": 7.992111432211783e-07, "loss": 0.394, "step": 18233 }, { "epoch": 0.3169531888265049, "grad_norm": 1.2414581004346807, "learning_rate": 7.991885900372401e-07, "loss": 0.5124, "step": 18234 }, { "epoch": 0.3169705713640077, "grad_norm": 1.7900686301601467, "learning_rate": 7.991660359050117e-07, "loss": 0.3391, "step": 18235 }, { "epoch": 0.3169879539015105, "grad_norm": 2.8819133114199835, "learning_rate": 7.991434808245644e-07, "loss": 0.2907, "step": 18236 }, { "epoch": 0.31700533643901335, "grad_norm": 1.3572626348366361, "learning_rate": 7.991209247959696e-07, "loss": 0.2884, "step": 18237 }, { "epoch": 0.3170227189765162, "grad_norm": 2.1600680027745103, "learning_rate": 7.990983678192989e-07, "loss": 0.3022, "step": 18238 }, { "epoch": 0.317040101514019, "grad_norm": 1.818724984402337, "learning_rate": 7.990758098946236e-07, "loss": 0.3557, "step": 18239 }, { "epoch": 0.31705748405152184, "grad_norm": 2.044184882223623, "learning_rate": 7.990532510220156e-07, "loss": 0.4852, "step": 18240 }, { "epoch": 0.3170748665890247, "grad_norm": 2.3488795726561382, "learning_rate": 7.99030691201546e-07, "loss": 0.3478, "step": 18241 }, { "epoch": 0.3170922491265275, "grad_norm": 1.5897722896337008, "learning_rate": 7.990081304332865e-07, "loss": 0.4254, "step": 18242 }, { "epoch": 0.31710963166403033, "grad_norm": 2.308886853719339, "learning_rate": 7.989855687173086e-07, "loss": 0.5752, "step": 18243 }, { "epoch": 0.31712701420153316, "grad_norm": 1.525551943339993, "learning_rate": 7.989630060536838e-07, "loss": 0.1841, "step": 18244 }, { "epoch": 0.31714439673903594, "grad_norm": 1.4704650711003875, "learning_rate": 7.989404424424837e-07, "loss": 0.2157, "step": 18245 }, { "epoch": 0.31716177927653877, "grad_norm": 1.4186409085100573, "learning_rate": 7.989178778837795e-07, "loss": 0.2946, "step": 18246 }, { "epoch": 0.3171791618140416, "grad_norm": 1.0847361800084878, "learning_rate": 7.988953123776431e-07, "loss": 0.2224, "step": 18247 }, { "epoch": 0.31719654435154443, "grad_norm": 3.781658621319293, "learning_rate": 7.988727459241459e-07, "loss": 0.4537, "step": 18248 }, { "epoch": 0.31721392688904726, "grad_norm": 2.5057119610374596, "learning_rate": 7.988501785233592e-07, "loss": 0.2546, "step": 18249 }, { "epoch": 0.3172313094265501, "grad_norm": 1.512285838410987, "learning_rate": 7.988276101753549e-07, "loss": 0.3183, "step": 18250 }, { "epoch": 0.3172486919640529, "grad_norm": 1.575031619543415, "learning_rate": 7.988050408802043e-07, "loss": 0.3252, "step": 18251 }, { "epoch": 0.31726607450155575, "grad_norm": 1.5501754626341366, "learning_rate": 7.987824706379789e-07, "loss": 0.2958, "step": 18252 }, { "epoch": 0.3172834570390586, "grad_norm": 1.6414976294812247, "learning_rate": 7.987598994487503e-07, "loss": 0.3792, "step": 18253 }, { "epoch": 0.3173008395765614, "grad_norm": 1.8044562144636096, "learning_rate": 7.987373273125902e-07, "loss": 0.5673, "step": 18254 }, { "epoch": 0.3173182221140642, "grad_norm": 2.0838529982213925, "learning_rate": 7.9871475422957e-07, "loss": 0.2262, "step": 18255 }, { "epoch": 0.317335604651567, "grad_norm": 1.6613428080777595, "learning_rate": 7.986921801997612e-07, "loss": 0.2835, "step": 18256 }, { "epoch": 0.31735298718906985, "grad_norm": 1.6727731429864439, "learning_rate": 7.986696052232354e-07, "loss": 0.248, "step": 18257 }, { "epoch": 0.3173703697265727, "grad_norm": 1.2362071774089396, "learning_rate": 7.98647029300064e-07, "loss": 0.2276, "step": 18258 }, { "epoch": 0.3173877522640755, "grad_norm": 1.1413785402772545, "learning_rate": 7.986244524303188e-07, "loss": 0.2882, "step": 18259 }, { "epoch": 0.31740513480157834, "grad_norm": 3.8887428296448454, "learning_rate": 7.986018746140712e-07, "loss": 0.5856, "step": 18260 }, { "epoch": 0.31742251733908117, "grad_norm": 1.030501685095244, "learning_rate": 7.985792958513931e-07, "loss": 0.3095, "step": 18261 }, { "epoch": 0.317439899876584, "grad_norm": 1.5677992755655692, "learning_rate": 7.985567161423556e-07, "loss": 0.3374, "step": 18262 }, { "epoch": 0.3174572824140868, "grad_norm": 1.3780521914163715, "learning_rate": 7.985341354870304e-07, "loss": 0.4069, "step": 18263 }, { "epoch": 0.31747466495158966, "grad_norm": 1.6250877324163708, "learning_rate": 7.985115538854892e-07, "loss": 0.5964, "step": 18264 }, { "epoch": 0.31749204748909243, "grad_norm": 1.5694862548540525, "learning_rate": 7.984889713378036e-07, "loss": 0.4003, "step": 18265 }, { "epoch": 0.31750943002659526, "grad_norm": 1.6817715167676588, "learning_rate": 7.984663878440448e-07, "loss": 0.2786, "step": 18266 }, { "epoch": 0.3175268125640981, "grad_norm": 2.1460051408855567, "learning_rate": 7.984438034042848e-07, "loss": 0.3715, "step": 18267 }, { "epoch": 0.3175441951016009, "grad_norm": 1.8916739528947035, "learning_rate": 7.984212180185952e-07, "loss": 0.4432, "step": 18268 }, { "epoch": 0.31756157763910375, "grad_norm": 2.5053962621028756, "learning_rate": 7.983986316870472e-07, "loss": 0.5676, "step": 18269 }, { "epoch": 0.3175789601766066, "grad_norm": 2.1715764949870002, "learning_rate": 7.983760444097128e-07, "loss": 0.4495, "step": 18270 }, { "epoch": 0.3175963427141094, "grad_norm": 1.2526512064700572, "learning_rate": 7.983534561866632e-07, "loss": 0.5749, "step": 18271 }, { "epoch": 0.31761372525161224, "grad_norm": 1.5950575709948358, "learning_rate": 7.983308670179702e-07, "loss": 0.2374, "step": 18272 }, { "epoch": 0.3176311077891151, "grad_norm": 1.648425201113937, "learning_rate": 7.983082769037055e-07, "loss": 0.3382, "step": 18273 }, { "epoch": 0.3176484903266179, "grad_norm": 1.295834455398326, "learning_rate": 7.982856858439406e-07, "loss": 0.3329, "step": 18274 }, { "epoch": 0.3176658728641207, "grad_norm": 1.2396485822187624, "learning_rate": 7.982630938387471e-07, "loss": 0.2606, "step": 18275 }, { "epoch": 0.3176832554016235, "grad_norm": 1.4545875026109227, "learning_rate": 7.982405008881965e-07, "loss": 0.3834, "step": 18276 }, { "epoch": 0.31770063793912634, "grad_norm": 1.6933182104215072, "learning_rate": 7.982179069923607e-07, "loss": 0.2262, "step": 18277 }, { "epoch": 0.31771802047662917, "grad_norm": 1.6193688607570484, "learning_rate": 7.981953121513109e-07, "loss": 0.3303, "step": 18278 }, { "epoch": 0.317735403014132, "grad_norm": 1.2890840252235047, "learning_rate": 7.981727163651191e-07, "loss": 0.3328, "step": 18279 }, { "epoch": 0.31775278555163483, "grad_norm": 2.2545054926218167, "learning_rate": 7.981501196338567e-07, "loss": 0.4619, "step": 18280 }, { "epoch": 0.31777016808913766, "grad_norm": 2.5251299985467197, "learning_rate": 7.981275219575953e-07, "loss": 0.3901, "step": 18281 }, { "epoch": 0.3177875506266405, "grad_norm": 2.3685447005996974, "learning_rate": 7.981049233364068e-07, "loss": 0.4993, "step": 18282 }, { "epoch": 0.3178049331641433, "grad_norm": 1.8669214362595759, "learning_rate": 7.980823237703625e-07, "loss": 0.1913, "step": 18283 }, { "epoch": 0.31782231570164615, "grad_norm": 1.3352258496112213, "learning_rate": 7.980597232595341e-07, "loss": 0.2717, "step": 18284 }, { "epoch": 0.3178396982391489, "grad_norm": 1.4076980853742, "learning_rate": 7.980371218039934e-07, "loss": 0.2219, "step": 18285 }, { "epoch": 0.31785708077665176, "grad_norm": 1.679631143271834, "learning_rate": 7.980145194038119e-07, "loss": 0.3801, "step": 18286 }, { "epoch": 0.3178744633141546, "grad_norm": 2.203924453896016, "learning_rate": 7.979919160590613e-07, "loss": 0.4313, "step": 18287 }, { "epoch": 0.3178918458516574, "grad_norm": 1.9012343027052632, "learning_rate": 7.979693117698132e-07, "loss": 0.2904, "step": 18288 }, { "epoch": 0.31790922838916025, "grad_norm": 1.6803268194802936, "learning_rate": 7.979467065361393e-07, "loss": 0.3343, "step": 18289 }, { "epoch": 0.3179266109266631, "grad_norm": 2.2348670604007257, "learning_rate": 7.979241003581111e-07, "loss": 0.4922, "step": 18290 }, { "epoch": 0.3179439934641659, "grad_norm": 3.9741290390362973, "learning_rate": 7.979014932358003e-07, "loss": 0.6185, "step": 18291 }, { "epoch": 0.31796137600166874, "grad_norm": 2.489143505922354, "learning_rate": 7.978788851692788e-07, "loss": 0.5473, "step": 18292 }, { "epoch": 0.31797875853917157, "grad_norm": 2.1785694750471616, "learning_rate": 7.97856276158618e-07, "loss": 0.3413, "step": 18293 }, { "epoch": 0.3179961410766744, "grad_norm": 1.937061039193654, "learning_rate": 7.978336662038896e-07, "loss": 0.2763, "step": 18294 }, { "epoch": 0.3180135236141772, "grad_norm": 1.4726943100177023, "learning_rate": 7.978110553051653e-07, "loss": 0.3555, "step": 18295 }, { "epoch": 0.31803090615168, "grad_norm": 1.4406724006317209, "learning_rate": 7.977884434625169e-07, "loss": 0.2765, "step": 18296 }, { "epoch": 0.31804828868918283, "grad_norm": 1.6485550947494576, "learning_rate": 7.977658306760158e-07, "loss": 0.3237, "step": 18297 }, { "epoch": 0.31806567122668566, "grad_norm": 2.3683249352008655, "learning_rate": 7.977432169457336e-07, "loss": 0.5279, "step": 18298 }, { "epoch": 0.3180830537641885, "grad_norm": 2.887827855446993, "learning_rate": 7.977206022717424e-07, "loss": 0.3773, "step": 18299 }, { "epoch": 0.3181004363016913, "grad_norm": 1.0333597344128351, "learning_rate": 7.976979866541137e-07, "loss": 0.3888, "step": 18300 }, { "epoch": 0.31811781883919416, "grad_norm": 0.9741041634274491, "learning_rate": 7.976753700929191e-07, "loss": 0.1925, "step": 18301 }, { "epoch": 0.318135201376697, "grad_norm": 1.9972856528917122, "learning_rate": 7.976527525882302e-07, "loss": 0.3963, "step": 18302 }, { "epoch": 0.3181525839141998, "grad_norm": 1.8029544501233277, "learning_rate": 7.976301341401188e-07, "loss": 0.3196, "step": 18303 }, { "epoch": 0.31816996645170265, "grad_norm": 1.578432101058934, "learning_rate": 7.976075147486567e-07, "loss": 0.3274, "step": 18304 }, { "epoch": 0.3181873489892054, "grad_norm": 1.6478987347985756, "learning_rate": 7.975848944139156e-07, "loss": 0.2848, "step": 18305 }, { "epoch": 0.31820473152670825, "grad_norm": 2.1879810092322343, "learning_rate": 7.975622731359668e-07, "loss": 0.4554, "step": 18306 }, { "epoch": 0.3182221140642111, "grad_norm": 1.7190116708409429, "learning_rate": 7.975396509148824e-07, "loss": 0.3747, "step": 18307 }, { "epoch": 0.3182394966017139, "grad_norm": 2.1858289518349627, "learning_rate": 7.975170277507341e-07, "loss": 0.2984, "step": 18308 }, { "epoch": 0.31825687913921674, "grad_norm": 4.6867450654595375, "learning_rate": 7.974944036435934e-07, "loss": 0.4017, "step": 18309 }, { "epoch": 0.31827426167671957, "grad_norm": 2.976226737934896, "learning_rate": 7.974717785935321e-07, "loss": 0.5527, "step": 18310 }, { "epoch": 0.3182916442142224, "grad_norm": 1.1636737260756067, "learning_rate": 7.974491526006219e-07, "loss": 0.2308, "step": 18311 }, { "epoch": 0.31830902675172523, "grad_norm": 2.1853408423045257, "learning_rate": 7.974265256649346e-07, "loss": 0.1869, "step": 18312 }, { "epoch": 0.31832640928922806, "grad_norm": 2.7462706551832805, "learning_rate": 7.974038977865417e-07, "loss": 0.3932, "step": 18313 }, { "epoch": 0.3183437918267309, "grad_norm": 0.8637394040078121, "learning_rate": 7.973812689655151e-07, "loss": 0.296, "step": 18314 }, { "epoch": 0.31836117436423367, "grad_norm": 1.2582681234768618, "learning_rate": 7.973586392019266e-07, "loss": 0.3549, "step": 18315 }, { "epoch": 0.3183785569017365, "grad_norm": 2.0249392212909987, "learning_rate": 7.973360084958477e-07, "loss": 0.3846, "step": 18316 }, { "epoch": 0.31839593943923933, "grad_norm": 2.0754028864060494, "learning_rate": 7.973133768473503e-07, "loss": 0.3709, "step": 18317 }, { "epoch": 0.31841332197674216, "grad_norm": 1.324028068990691, "learning_rate": 7.97290744256506e-07, "loss": 0.1964, "step": 18318 }, { "epoch": 0.318430704514245, "grad_norm": 2.092831243512672, "learning_rate": 7.972681107233867e-07, "loss": 0.387, "step": 18319 }, { "epoch": 0.3184480870517478, "grad_norm": 1.355234930082363, "learning_rate": 7.972454762480639e-07, "loss": 0.3937, "step": 18320 }, { "epoch": 0.31846546958925065, "grad_norm": 1.5218618782196351, "learning_rate": 7.972228408306096e-07, "loss": 0.3054, "step": 18321 }, { "epoch": 0.3184828521267535, "grad_norm": 1.537258098781938, "learning_rate": 7.972002044710954e-07, "loss": 0.2157, "step": 18322 }, { "epoch": 0.3185002346642563, "grad_norm": 2.2227703795543743, "learning_rate": 7.971775671695931e-07, "loss": 0.3084, "step": 18323 }, { "epoch": 0.31851761720175914, "grad_norm": 2.0503676523608383, "learning_rate": 7.971549289261744e-07, "loss": 0.2617, "step": 18324 }, { "epoch": 0.3185349997392619, "grad_norm": 1.550750725362829, "learning_rate": 7.97132289740911e-07, "loss": 0.1874, "step": 18325 }, { "epoch": 0.31855238227676475, "grad_norm": 1.699116032836355, "learning_rate": 7.971096496138749e-07, "loss": 0.2791, "step": 18326 }, { "epoch": 0.3185697648142676, "grad_norm": 2.268103892892599, "learning_rate": 7.970870085451375e-07, "loss": 0.2691, "step": 18327 }, { "epoch": 0.3185871473517704, "grad_norm": 1.1448819145614648, "learning_rate": 7.970643665347709e-07, "loss": 0.1917, "step": 18328 }, { "epoch": 0.31860452988927324, "grad_norm": 1.653118450865476, "learning_rate": 7.970417235828467e-07, "loss": 0.4109, "step": 18329 }, { "epoch": 0.31862191242677607, "grad_norm": 1.2173734790078623, "learning_rate": 7.970190796894366e-07, "loss": 0.4324, "step": 18330 }, { "epoch": 0.3186392949642789, "grad_norm": 1.7833764591604575, "learning_rate": 7.969964348546126e-07, "loss": 0.3891, "step": 18331 }, { "epoch": 0.3186566775017817, "grad_norm": 2.08179349784727, "learning_rate": 7.96973789078446e-07, "loss": 0.3812, "step": 18332 }, { "epoch": 0.31867406003928456, "grad_norm": 1.937010835273206, "learning_rate": 7.969511423610093e-07, "loss": 0.2994, "step": 18333 }, { "epoch": 0.3186914425767874, "grad_norm": 1.1268031152277207, "learning_rate": 7.969284947023738e-07, "loss": 0.2852, "step": 18334 }, { "epoch": 0.31870882511429016, "grad_norm": 2.4152604901371397, "learning_rate": 7.969058461026112e-07, "loss": 0.4458, "step": 18335 }, { "epoch": 0.318726207651793, "grad_norm": 2.736896780418576, "learning_rate": 7.968831965617935e-07, "loss": 0.5998, "step": 18336 }, { "epoch": 0.3187435901892958, "grad_norm": 1.419387313533623, "learning_rate": 7.968605460799926e-07, "loss": 0.3739, "step": 18337 }, { "epoch": 0.31876097272679865, "grad_norm": 2.171437544092366, "learning_rate": 7.968378946572801e-07, "loss": 0.4065, "step": 18338 }, { "epoch": 0.3187783552643015, "grad_norm": 2.035093648103071, "learning_rate": 7.968152422937277e-07, "loss": 0.518, "step": 18339 }, { "epoch": 0.3187957378018043, "grad_norm": 2.8218473467964786, "learning_rate": 7.967925889894074e-07, "loss": 0.5471, "step": 18340 }, { "epoch": 0.31881312033930714, "grad_norm": 1.6636487055408689, "learning_rate": 7.967699347443909e-07, "loss": 0.2642, "step": 18341 }, { "epoch": 0.31883050287681, "grad_norm": 3.4318228362446686, "learning_rate": 7.9674727955875e-07, "loss": 0.5087, "step": 18342 }, { "epoch": 0.3188478854143128, "grad_norm": 1.655293223693293, "learning_rate": 7.967246234325566e-07, "loss": 0.26, "step": 18343 }, { "epoch": 0.3188652679518156, "grad_norm": 1.7548622655675323, "learning_rate": 7.967019663658825e-07, "loss": 0.3744, "step": 18344 }, { "epoch": 0.3188826504893184, "grad_norm": 2.03437910522626, "learning_rate": 7.966793083587994e-07, "loss": 0.3727, "step": 18345 }, { "epoch": 0.31890003302682124, "grad_norm": 3.373312696600443, "learning_rate": 7.966566494113792e-07, "loss": 0.5399, "step": 18346 }, { "epoch": 0.31891741556432407, "grad_norm": 2.1063542348611115, "learning_rate": 7.966339895236937e-07, "loss": 0.5283, "step": 18347 }, { "epoch": 0.3189347981018269, "grad_norm": 2.2450935986118976, "learning_rate": 7.966113286958146e-07, "loss": 0.4233, "step": 18348 }, { "epoch": 0.31895218063932973, "grad_norm": 3.165942669535759, "learning_rate": 7.96588666927814e-07, "loss": 0.5556, "step": 18349 }, { "epoch": 0.31896956317683256, "grad_norm": 1.6891732266659854, "learning_rate": 7.965660042197635e-07, "loss": 0.5374, "step": 18350 }, { "epoch": 0.3189869457143354, "grad_norm": 1.8868196416399923, "learning_rate": 7.96543340571735e-07, "loss": 0.3473, "step": 18351 }, { "epoch": 0.3190043282518382, "grad_norm": 1.8616852461640394, "learning_rate": 7.965206759838003e-07, "loss": 0.2114, "step": 18352 }, { "epoch": 0.31902171078934105, "grad_norm": 1.6547903793223844, "learning_rate": 7.964980104560313e-07, "loss": 0.2626, "step": 18353 }, { "epoch": 0.3190390933268438, "grad_norm": 1.752601737710212, "learning_rate": 7.964753439884997e-07, "loss": 0.1999, "step": 18354 }, { "epoch": 0.31905647586434666, "grad_norm": 1.5988399026767426, "learning_rate": 7.964526765812776e-07, "loss": 0.4393, "step": 18355 }, { "epoch": 0.3190738584018495, "grad_norm": 1.7292194717009917, "learning_rate": 7.964300082344366e-07, "loss": 0.3281, "step": 18356 }, { "epoch": 0.3190912409393523, "grad_norm": 1.7077335008149728, "learning_rate": 7.964073389480488e-07, "loss": 0.1565, "step": 18357 }, { "epoch": 0.31910862347685515, "grad_norm": 1.6620899418782207, "learning_rate": 7.963846687221857e-07, "loss": 0.315, "step": 18358 }, { "epoch": 0.319126006014358, "grad_norm": 2.12369147025792, "learning_rate": 7.963619975569195e-07, "loss": 0.4675, "step": 18359 }, { "epoch": 0.3191433885518608, "grad_norm": 3.138251800154057, "learning_rate": 7.963393254523218e-07, "loss": 0.5636, "step": 18360 }, { "epoch": 0.31916077108936364, "grad_norm": 1.4239066831909482, "learning_rate": 7.963166524084645e-07, "loss": 0.3581, "step": 18361 }, { "epoch": 0.31917815362686647, "grad_norm": 1.607172219772907, "learning_rate": 7.962939784254197e-07, "loss": 0.2764, "step": 18362 }, { "epoch": 0.3191955361643693, "grad_norm": 2.191824398105234, "learning_rate": 7.96271303503259e-07, "loss": 0.3321, "step": 18363 }, { "epoch": 0.3192129187018721, "grad_norm": 1.704132627238613, "learning_rate": 7.962486276420544e-07, "loss": 0.2238, "step": 18364 }, { "epoch": 0.3192303012393749, "grad_norm": 1.7541449955039503, "learning_rate": 7.962259508418778e-07, "loss": 0.4111, "step": 18365 }, { "epoch": 0.31924768377687773, "grad_norm": 1.5501113071430381, "learning_rate": 7.962032731028008e-07, "loss": 0.261, "step": 18366 }, { "epoch": 0.31926506631438056, "grad_norm": 1.7947133852882855, "learning_rate": 7.961805944248956e-07, "loss": 0.2256, "step": 18367 }, { "epoch": 0.3192824488518834, "grad_norm": 1.3714779649308009, "learning_rate": 7.96157914808234e-07, "loss": 0.2086, "step": 18368 }, { "epoch": 0.3192998313893862, "grad_norm": 1.1554990339420392, "learning_rate": 7.961352342528879e-07, "loss": 0.2803, "step": 18369 }, { "epoch": 0.31931721392688905, "grad_norm": 1.285080949911851, "learning_rate": 7.961125527589291e-07, "loss": 0.3907, "step": 18370 }, { "epoch": 0.3193345964643919, "grad_norm": 1.5135786823703334, "learning_rate": 7.960898703264296e-07, "loss": 0.2273, "step": 18371 }, { "epoch": 0.3193519790018947, "grad_norm": 1.6851917525751865, "learning_rate": 7.960671869554611e-07, "loss": 0.1552, "step": 18372 }, { "epoch": 0.31936936153939754, "grad_norm": 1.9215147751133987, "learning_rate": 7.960445026460956e-07, "loss": 0.5271, "step": 18373 }, { "epoch": 0.3193867440769003, "grad_norm": 3.4533936605534383, "learning_rate": 7.96021817398405e-07, "loss": 0.4286, "step": 18374 }, { "epoch": 0.31940412661440315, "grad_norm": 3.2455170670520985, "learning_rate": 7.959991312124614e-07, "loss": 0.3878, "step": 18375 }, { "epoch": 0.319421509151906, "grad_norm": 2.068624797972228, "learning_rate": 7.959764440883364e-07, "loss": 0.3392, "step": 18376 }, { "epoch": 0.3194388916894088, "grad_norm": 1.8304104640044012, "learning_rate": 7.95953756026102e-07, "loss": 0.2747, "step": 18377 }, { "epoch": 0.31945627422691164, "grad_norm": 2.211465585626285, "learning_rate": 7.959310670258304e-07, "loss": 0.4404, "step": 18378 }, { "epoch": 0.31947365676441447, "grad_norm": 2.1476888226572926, "learning_rate": 7.959083770875929e-07, "loss": 0.3679, "step": 18379 }, { "epoch": 0.3194910393019173, "grad_norm": 1.5858410496696753, "learning_rate": 7.95885686211462e-07, "loss": 0.4396, "step": 18380 }, { "epoch": 0.31950842183942013, "grad_norm": 2.1311718058415012, "learning_rate": 7.958629943975093e-07, "loss": 0.4865, "step": 18381 }, { "epoch": 0.31952580437692296, "grad_norm": 1.6541438003939362, "learning_rate": 7.958403016458069e-07, "loss": 0.2566, "step": 18382 }, { "epoch": 0.3195431869144258, "grad_norm": 1.450206013476985, "learning_rate": 7.958176079564265e-07, "loss": 0.2035, "step": 18383 }, { "epoch": 0.31956056945192857, "grad_norm": 1.990680616919911, "learning_rate": 7.957949133294403e-07, "loss": 0.4422, "step": 18384 }, { "epoch": 0.3195779519894314, "grad_norm": 1.7157761728428607, "learning_rate": 7.9577221776492e-07, "loss": 0.4019, "step": 18385 }, { "epoch": 0.3195953345269342, "grad_norm": 0.8431611701935217, "learning_rate": 7.957495212629377e-07, "loss": 0.2222, "step": 18386 }, { "epoch": 0.31961271706443706, "grad_norm": 2.941054230649584, "learning_rate": 7.957268238235652e-07, "loss": 0.3227, "step": 18387 }, { "epoch": 0.3196300996019399, "grad_norm": 1.3571138575712707, "learning_rate": 7.957041254468746e-07, "loss": 0.4363, "step": 18388 }, { "epoch": 0.3196474821394427, "grad_norm": 2.4911817858533962, "learning_rate": 7.956814261329376e-07, "loss": 0.3379, "step": 18389 }, { "epoch": 0.31966486467694555, "grad_norm": 1.3633719966227675, "learning_rate": 7.956587258818266e-07, "loss": 0.3614, "step": 18390 }, { "epoch": 0.3196822472144484, "grad_norm": 1.5232057898505853, "learning_rate": 7.956360246936131e-07, "loss": 0.2784, "step": 18391 }, { "epoch": 0.3196996297519512, "grad_norm": 2.439567799111087, "learning_rate": 7.956133225683693e-07, "loss": 0.2211, "step": 18392 }, { "epoch": 0.31971701228945404, "grad_norm": 1.4641720075499567, "learning_rate": 7.955906195061669e-07, "loss": 0.4616, "step": 18393 }, { "epoch": 0.3197343948269568, "grad_norm": 1.9513940684071711, "learning_rate": 7.95567915507078e-07, "loss": 0.4174, "step": 18394 }, { "epoch": 0.31975177736445964, "grad_norm": 3.363177363584408, "learning_rate": 7.955452105711746e-07, "loss": 0.3834, "step": 18395 }, { "epoch": 0.3197691599019625, "grad_norm": 2.200580938609855, "learning_rate": 7.955225046985287e-07, "loss": 0.4871, "step": 18396 }, { "epoch": 0.3197865424394653, "grad_norm": 1.8863481568688059, "learning_rate": 7.954997978892124e-07, "loss": 0.1994, "step": 18397 }, { "epoch": 0.31980392497696813, "grad_norm": 2.9825215639368965, "learning_rate": 7.954770901432972e-07, "loss": 0.5079, "step": 18398 }, { "epoch": 0.31982130751447096, "grad_norm": 1.60145325007334, "learning_rate": 7.954543814608554e-07, "loss": 0.2656, "step": 18399 }, { "epoch": 0.3198386900519738, "grad_norm": 1.4679516301015054, "learning_rate": 7.954316718419592e-07, "loss": 0.5362, "step": 18400 }, { "epoch": 0.3198560725894766, "grad_norm": 2.1762622887966154, "learning_rate": 7.954089612866799e-07, "loss": 0.3885, "step": 18401 }, { "epoch": 0.31987345512697946, "grad_norm": 1.7435095489576147, "learning_rate": 7.953862497950901e-07, "loss": 0.4222, "step": 18402 }, { "epoch": 0.3198908376644823, "grad_norm": 1.7466648959191953, "learning_rate": 7.953635373672615e-07, "loss": 0.292, "step": 18403 }, { "epoch": 0.31990822020198506, "grad_norm": 2.0328010995578785, "learning_rate": 7.953408240032664e-07, "loss": 0.3159, "step": 18404 }, { "epoch": 0.3199256027394879, "grad_norm": 1.7560507784836403, "learning_rate": 7.953181097031762e-07, "loss": 0.5699, "step": 18405 }, { "epoch": 0.3199429852769907, "grad_norm": 2.0910574525831067, "learning_rate": 7.952953944670637e-07, "loss": 0.4504, "step": 18406 }, { "epoch": 0.31996036781449355, "grad_norm": 1.7573550812205416, "learning_rate": 7.952726782950001e-07, "loss": 0.3874, "step": 18407 }, { "epoch": 0.3199777503519964, "grad_norm": 2.14164182701624, "learning_rate": 7.952499611870578e-07, "loss": 0.3879, "step": 18408 }, { "epoch": 0.3199951328894992, "grad_norm": 1.6407884042126464, "learning_rate": 7.952272431433088e-07, "loss": 0.2326, "step": 18409 }, { "epoch": 0.32001251542700204, "grad_norm": 2.341968897449756, "learning_rate": 7.95204524163825e-07, "loss": 0.3563, "step": 18410 }, { "epoch": 0.3200298979645049, "grad_norm": 3.268477729846454, "learning_rate": 7.951818042486784e-07, "loss": 0.5589, "step": 18411 }, { "epoch": 0.3200472805020077, "grad_norm": 1.7511926049980486, "learning_rate": 7.951590833979413e-07, "loss": 0.2969, "step": 18412 }, { "epoch": 0.32006466303951053, "grad_norm": 1.826031654813051, "learning_rate": 7.951363616116853e-07, "loss": 0.3061, "step": 18413 }, { "epoch": 0.3200820455770133, "grad_norm": 1.544560442696792, "learning_rate": 7.951136388899828e-07, "loss": 0.6048, "step": 18414 }, { "epoch": 0.32009942811451614, "grad_norm": 1.038341807100245, "learning_rate": 7.950909152329054e-07, "loss": 0.2834, "step": 18415 }, { "epoch": 0.32011681065201897, "grad_norm": 0.9675270413470564, "learning_rate": 7.950681906405254e-07, "loss": 0.3467, "step": 18416 }, { "epoch": 0.3201341931895218, "grad_norm": 2.506156046491027, "learning_rate": 7.95045465112915e-07, "loss": 0.4638, "step": 18417 }, { "epoch": 0.32015157572702463, "grad_norm": 1.7133198553567988, "learning_rate": 7.950227386501458e-07, "loss": 0.4167, "step": 18418 }, { "epoch": 0.32016895826452746, "grad_norm": 1.871636840089528, "learning_rate": 7.950000112522901e-07, "loss": 0.3002, "step": 18419 }, { "epoch": 0.3201863408020303, "grad_norm": 1.255184507402427, "learning_rate": 7.949772829194198e-07, "loss": 0.1406, "step": 18420 }, { "epoch": 0.3202037233395331, "grad_norm": 1.796164008085599, "learning_rate": 7.949545536516071e-07, "loss": 0.2249, "step": 18421 }, { "epoch": 0.32022110587703595, "grad_norm": 1.9318283890143562, "learning_rate": 7.94931823448924e-07, "loss": 0.5972, "step": 18422 }, { "epoch": 0.3202384884145388, "grad_norm": 4.208103997191449, "learning_rate": 7.949090923114423e-07, "loss": 0.3644, "step": 18423 }, { "epoch": 0.32025587095204155, "grad_norm": 1.8908529175635613, "learning_rate": 7.948863602392344e-07, "loss": 0.4183, "step": 18424 }, { "epoch": 0.3202732534895444, "grad_norm": 3.183275860759233, "learning_rate": 7.948636272323724e-07, "loss": 0.4061, "step": 18425 }, { "epoch": 0.3202906360270472, "grad_norm": 1.4903927317936922, "learning_rate": 7.948408932909278e-07, "loss": 0.2096, "step": 18426 }, { "epoch": 0.32030801856455005, "grad_norm": 1.372839830893233, "learning_rate": 7.948181584149731e-07, "loss": 0.6004, "step": 18427 }, { "epoch": 0.3203254011020529, "grad_norm": 1.9825441079930395, "learning_rate": 7.947954226045804e-07, "loss": 0.3912, "step": 18428 }, { "epoch": 0.3203427836395557, "grad_norm": 1.9155075019231773, "learning_rate": 7.947726858598214e-07, "loss": 0.2223, "step": 18429 }, { "epoch": 0.32036016617705854, "grad_norm": 1.936968853909737, "learning_rate": 7.947499481807685e-07, "loss": 0.2944, "step": 18430 }, { "epoch": 0.32037754871456137, "grad_norm": 1.2601625001904737, "learning_rate": 7.947272095674938e-07, "loss": 0.3057, "step": 18431 }, { "epoch": 0.3203949312520642, "grad_norm": 1.5059290782630592, "learning_rate": 7.947044700200692e-07, "loss": 0.3293, "step": 18432 }, { "epoch": 0.320412313789567, "grad_norm": 1.311814522940398, "learning_rate": 7.946817295385666e-07, "loss": 0.2852, "step": 18433 }, { "epoch": 0.3204296963270698, "grad_norm": 1.4111214135739976, "learning_rate": 7.946589881230583e-07, "loss": 0.2563, "step": 18434 }, { "epoch": 0.32044707886457263, "grad_norm": 1.1842992778505517, "learning_rate": 7.946362457736166e-07, "loss": 0.2843, "step": 18435 }, { "epoch": 0.32046446140207546, "grad_norm": 1.6128290478201155, "learning_rate": 7.946135024903131e-07, "loss": 0.3138, "step": 18436 }, { "epoch": 0.3204818439395783, "grad_norm": 1.536575107418957, "learning_rate": 7.945907582732203e-07, "loss": 0.3964, "step": 18437 }, { "epoch": 0.3204992264770811, "grad_norm": 3.2392793363297945, "learning_rate": 7.945680131224101e-07, "loss": 0.4107, "step": 18438 }, { "epoch": 0.32051660901458395, "grad_norm": 1.599129216316756, "learning_rate": 7.945452670379546e-07, "loss": 0.3078, "step": 18439 }, { "epoch": 0.3205339915520868, "grad_norm": 1.272648301309987, "learning_rate": 7.945225200199258e-07, "loss": 0.3369, "step": 18440 }, { "epoch": 0.3205513740895896, "grad_norm": 2.029357440352772, "learning_rate": 7.94499772068396e-07, "loss": 0.2894, "step": 18441 }, { "epoch": 0.32056875662709244, "grad_norm": 2.014070656174478, "learning_rate": 7.944770231834372e-07, "loss": 0.3539, "step": 18442 }, { "epoch": 0.3205861391645953, "grad_norm": 1.6079982469435916, "learning_rate": 7.944542733651216e-07, "loss": 0.4107, "step": 18443 }, { "epoch": 0.32060352170209805, "grad_norm": 3.4509963989396852, "learning_rate": 7.944315226135211e-07, "loss": 0.2861, "step": 18444 }, { "epoch": 0.3206209042396009, "grad_norm": 1.3751672391314416, "learning_rate": 7.944087709287079e-07, "loss": 0.3156, "step": 18445 }, { "epoch": 0.3206382867771037, "grad_norm": 1.5408626936489216, "learning_rate": 7.943860183107542e-07, "loss": 0.3106, "step": 18446 }, { "epoch": 0.32065566931460654, "grad_norm": 1.347264732499644, "learning_rate": 7.94363264759732e-07, "loss": 0.3693, "step": 18447 }, { "epoch": 0.32067305185210937, "grad_norm": 1.9104042635434935, "learning_rate": 7.943405102757135e-07, "loss": 0.5219, "step": 18448 }, { "epoch": 0.3206904343896122, "grad_norm": 1.854330608763087, "learning_rate": 7.943177548587707e-07, "loss": 0.4256, "step": 18449 }, { "epoch": 0.32070781692711503, "grad_norm": 1.5288132608146807, "learning_rate": 7.94294998508976e-07, "loss": 0.3371, "step": 18450 }, { "epoch": 0.32072519946461786, "grad_norm": 1.212526056683252, "learning_rate": 7.942722412264013e-07, "loss": 0.2998, "step": 18451 }, { "epoch": 0.3207425820021207, "grad_norm": 1.5052067695793894, "learning_rate": 7.942494830111186e-07, "loss": 0.4119, "step": 18452 }, { "epoch": 0.3207599645396235, "grad_norm": 3.8166386117805557, "learning_rate": 7.942267238632005e-07, "loss": 0.4065, "step": 18453 }, { "epoch": 0.3207773470771263, "grad_norm": 1.6950996566963417, "learning_rate": 7.942039637827186e-07, "loss": 0.2045, "step": 18454 }, { "epoch": 0.3207947296146291, "grad_norm": 2.365563168755791, "learning_rate": 7.941812027697453e-07, "loss": 0.4715, "step": 18455 }, { "epoch": 0.32081211215213196, "grad_norm": 1.7526542961194753, "learning_rate": 7.941584408243527e-07, "loss": 0.5551, "step": 18456 }, { "epoch": 0.3208294946896348, "grad_norm": 1.9233999838133906, "learning_rate": 7.941356779466132e-07, "loss": 0.3318, "step": 18457 }, { "epoch": 0.3208468772271376, "grad_norm": 2.1650135146192735, "learning_rate": 7.941129141365986e-07, "loss": 0.4559, "step": 18458 }, { "epoch": 0.32086425976464045, "grad_norm": 1.2093028019742116, "learning_rate": 7.94090149394381e-07, "loss": 0.4196, "step": 18459 }, { "epoch": 0.3208816423021433, "grad_norm": 1.5113430879349057, "learning_rate": 7.940673837200328e-07, "loss": 0.1953, "step": 18460 }, { "epoch": 0.3208990248396461, "grad_norm": 1.4638209255935744, "learning_rate": 7.940446171136262e-07, "loss": 0.2752, "step": 18461 }, { "epoch": 0.32091640737714894, "grad_norm": 3.648015450401277, "learning_rate": 7.940218495752331e-07, "loss": 0.4683, "step": 18462 }, { "epoch": 0.32093378991465177, "grad_norm": 1.3764560272600177, "learning_rate": 7.93999081104926e-07, "loss": 0.3941, "step": 18463 }, { "epoch": 0.32095117245215454, "grad_norm": 1.889045511246833, "learning_rate": 7.939763117027767e-07, "loss": 0.2536, "step": 18464 }, { "epoch": 0.3209685549896574, "grad_norm": 1.7959275338781746, "learning_rate": 7.939535413688575e-07, "loss": 0.2883, "step": 18465 }, { "epoch": 0.3209859375271602, "grad_norm": 1.8286820256757481, "learning_rate": 7.939307701032408e-07, "loss": 0.5585, "step": 18466 }, { "epoch": 0.32100332006466303, "grad_norm": 1.9659977799685127, "learning_rate": 7.939079979059983e-07, "loss": 0.2507, "step": 18467 }, { "epoch": 0.32102070260216586, "grad_norm": 1.686704619223284, "learning_rate": 7.938852247772025e-07, "loss": 0.2931, "step": 18468 }, { "epoch": 0.3210380851396687, "grad_norm": 2.155965674077778, "learning_rate": 7.938624507169258e-07, "loss": 0.4703, "step": 18469 }, { "epoch": 0.3210554676771715, "grad_norm": 1.2674870920709296, "learning_rate": 7.938396757252399e-07, "loss": 0.3254, "step": 18470 }, { "epoch": 0.32107285021467435, "grad_norm": 2.026335964996635, "learning_rate": 7.938168998022172e-07, "loss": 0.5768, "step": 18471 }, { "epoch": 0.3210902327521772, "grad_norm": 2.226605629256807, "learning_rate": 7.9379412294793e-07, "loss": 0.3638, "step": 18472 }, { "epoch": 0.32110761528968, "grad_norm": 2.2302331829351947, "learning_rate": 7.937713451624504e-07, "loss": 0.3612, "step": 18473 }, { "epoch": 0.3211249978271828, "grad_norm": 1.606852531297844, "learning_rate": 7.937485664458505e-07, "loss": 0.2768, "step": 18474 }, { "epoch": 0.3211423803646856, "grad_norm": 3.9472106805185114, "learning_rate": 7.937257867982025e-07, "loss": 0.5401, "step": 18475 }, { "epoch": 0.32115976290218845, "grad_norm": 1.387264177194097, "learning_rate": 7.937030062195789e-07, "loss": 0.1751, "step": 18476 }, { "epoch": 0.3211771454396913, "grad_norm": 2.3692942486866517, "learning_rate": 7.936802247100516e-07, "loss": 0.47, "step": 18477 }, { "epoch": 0.3211945279771941, "grad_norm": 3.089805887607939, "learning_rate": 7.936574422696929e-07, "loss": 0.4574, "step": 18478 }, { "epoch": 0.32121191051469694, "grad_norm": 2.0276130440985805, "learning_rate": 7.93634658898575e-07, "loss": 0.399, "step": 18479 }, { "epoch": 0.32122929305219977, "grad_norm": 2.0614731365733636, "learning_rate": 7.936118745967701e-07, "loss": 0.197, "step": 18480 }, { "epoch": 0.3212466755897026, "grad_norm": 2.0773840857661012, "learning_rate": 7.935890893643503e-07, "loss": 0.4383, "step": 18481 }, { "epoch": 0.32126405812720543, "grad_norm": 2.313276367725461, "learning_rate": 7.935663032013882e-07, "loss": 0.3382, "step": 18482 }, { "epoch": 0.3212814406647082, "grad_norm": 1.6655996962923878, "learning_rate": 7.935435161079555e-07, "loss": 0.371, "step": 18483 }, { "epoch": 0.32129882320221104, "grad_norm": 1.772963879635668, "learning_rate": 7.93520728084125e-07, "loss": 0.4365, "step": 18484 }, { "epoch": 0.32131620573971387, "grad_norm": 1.712633047030205, "learning_rate": 7.934979391299685e-07, "loss": 0.29, "step": 18485 }, { "epoch": 0.3213335882772167, "grad_norm": 1.1428796159574854, "learning_rate": 7.934751492455582e-07, "loss": 0.2681, "step": 18486 }, { "epoch": 0.3213509708147195, "grad_norm": 1.9173266511696219, "learning_rate": 7.934523584309667e-07, "loss": 0.3588, "step": 18487 }, { "epoch": 0.32136835335222236, "grad_norm": 2.3829711004972816, "learning_rate": 7.93429566686266e-07, "loss": 0.3231, "step": 18488 }, { "epoch": 0.3213857358897252, "grad_norm": 0.9144749717465745, "learning_rate": 7.934067740115282e-07, "loss": 0.3467, "step": 18489 }, { "epoch": 0.321403118427228, "grad_norm": 2.7577926222081475, "learning_rate": 7.933839804068258e-07, "loss": 0.3787, "step": 18490 }, { "epoch": 0.32142050096473085, "grad_norm": 1.5191625716673522, "learning_rate": 7.93361185872231e-07, "loss": 0.2714, "step": 18491 }, { "epoch": 0.3214378835022337, "grad_norm": 1.8945147756458594, "learning_rate": 7.933383904078159e-07, "loss": 0.3823, "step": 18492 }, { "epoch": 0.32145526603973645, "grad_norm": 1.5406469837296826, "learning_rate": 7.933155940136529e-07, "loss": 0.2692, "step": 18493 }, { "epoch": 0.3214726485772393, "grad_norm": 3.2498064709898196, "learning_rate": 7.932927966898142e-07, "loss": 0.3022, "step": 18494 }, { "epoch": 0.3214900311147421, "grad_norm": 1.4267731692459393, "learning_rate": 7.932699984363722e-07, "loss": 0.1812, "step": 18495 }, { "epoch": 0.32150741365224494, "grad_norm": 1.2512275600815634, "learning_rate": 7.932471992533989e-07, "loss": 0.4236, "step": 18496 }, { "epoch": 0.3215247961897478, "grad_norm": 1.9188359963130568, "learning_rate": 7.932243991409666e-07, "loss": 0.4746, "step": 18497 }, { "epoch": 0.3215421787272506, "grad_norm": 2.9408186718606952, "learning_rate": 7.932015980991478e-07, "loss": 0.243, "step": 18498 }, { "epoch": 0.32155956126475344, "grad_norm": 4.895064626816682, "learning_rate": 7.931787961280146e-07, "loss": 0.3417, "step": 18499 }, { "epoch": 0.32157694380225627, "grad_norm": 2.3197606989053945, "learning_rate": 7.931559932276392e-07, "loss": 0.2547, "step": 18500 }, { "epoch": 0.3215943263397591, "grad_norm": 1.5736751915663536, "learning_rate": 7.931331893980939e-07, "loss": 0.181, "step": 18501 }, { "epoch": 0.3216117088772619, "grad_norm": 1.8579812648264575, "learning_rate": 7.931103846394513e-07, "loss": 0.289, "step": 18502 }, { "epoch": 0.3216290914147647, "grad_norm": 1.815815051372958, "learning_rate": 7.930875789517832e-07, "loss": 0.4424, "step": 18503 }, { "epoch": 0.32164647395226753, "grad_norm": 5.061545853778798, "learning_rate": 7.930647723351622e-07, "loss": 0.4793, "step": 18504 }, { "epoch": 0.32166385648977036, "grad_norm": 2.1524815831294823, "learning_rate": 7.930419647896605e-07, "loss": 0.3589, "step": 18505 }, { "epoch": 0.3216812390272732, "grad_norm": 3.1607387593923195, "learning_rate": 7.930191563153504e-07, "loss": 0.3185, "step": 18506 }, { "epoch": 0.321698621564776, "grad_norm": 2.3342136899507633, "learning_rate": 7.929963469123041e-07, "loss": 0.401, "step": 18507 }, { "epoch": 0.32171600410227885, "grad_norm": 1.021233091694278, "learning_rate": 7.929735365805941e-07, "loss": 0.3759, "step": 18508 }, { "epoch": 0.3217333866397817, "grad_norm": 1.6006092212976568, "learning_rate": 7.929507253202924e-07, "loss": 0.3764, "step": 18509 }, { "epoch": 0.3217507691772845, "grad_norm": 1.4431791951291593, "learning_rate": 7.929279131314717e-07, "loss": 0.2341, "step": 18510 }, { "epoch": 0.32176815171478734, "grad_norm": 1.8413482965863466, "learning_rate": 7.929051000142037e-07, "loss": 0.3219, "step": 18511 }, { "epoch": 0.3217855342522902, "grad_norm": 1.1380917633695842, "learning_rate": 7.928822859685613e-07, "loss": 0.4276, "step": 18512 }, { "epoch": 0.32180291678979295, "grad_norm": 1.846848764646243, "learning_rate": 7.928594709946168e-07, "loss": 0.411, "step": 18513 }, { "epoch": 0.3218202993272958, "grad_norm": 1.6127746150979518, "learning_rate": 7.928366550924421e-07, "loss": 0.3775, "step": 18514 }, { "epoch": 0.3218376818647986, "grad_norm": 1.590814533629673, "learning_rate": 7.928138382621096e-07, "loss": 0.3338, "step": 18515 }, { "epoch": 0.32185506440230144, "grad_norm": 2.121943377535782, "learning_rate": 7.927910205036918e-07, "loss": 0.3688, "step": 18516 }, { "epoch": 0.32187244693980427, "grad_norm": 2.1133216453121486, "learning_rate": 7.927682018172612e-07, "loss": 0.2561, "step": 18517 }, { "epoch": 0.3218898294773071, "grad_norm": 1.9264807385783278, "learning_rate": 7.927453822028897e-07, "loss": 0.3245, "step": 18518 }, { "epoch": 0.32190721201480993, "grad_norm": 2.3378453122652942, "learning_rate": 7.927225616606499e-07, "loss": 0.3804, "step": 18519 }, { "epoch": 0.32192459455231276, "grad_norm": 4.334369058563931, "learning_rate": 7.926997401906139e-07, "loss": 0.6045, "step": 18520 }, { "epoch": 0.3219419770898156, "grad_norm": 1.6464050292179768, "learning_rate": 7.926769177928542e-07, "loss": 0.282, "step": 18521 }, { "epoch": 0.3219593596273184, "grad_norm": 2.9203490661318146, "learning_rate": 7.926540944674433e-07, "loss": 0.3299, "step": 18522 }, { "epoch": 0.3219767421648212, "grad_norm": 1.4117231955324203, "learning_rate": 7.926312702144533e-07, "loss": 0.5704, "step": 18523 }, { "epoch": 0.321994124702324, "grad_norm": 1.2825167232002284, "learning_rate": 7.926084450339566e-07, "loss": 0.2087, "step": 18524 }, { "epoch": 0.32201150723982686, "grad_norm": 1.5067676901018017, "learning_rate": 7.925856189260254e-07, "loss": 0.3109, "step": 18525 }, { "epoch": 0.3220288897773297, "grad_norm": 1.317426571383912, "learning_rate": 7.925627918907323e-07, "loss": 0.2945, "step": 18526 }, { "epoch": 0.3220462723148325, "grad_norm": 2.4098833364562386, "learning_rate": 7.925399639281493e-07, "loss": 0.5684, "step": 18527 }, { "epoch": 0.32206365485233535, "grad_norm": 1.6754377704075443, "learning_rate": 7.925171350383492e-07, "loss": 0.4834, "step": 18528 }, { "epoch": 0.3220810373898382, "grad_norm": 1.5580399889446281, "learning_rate": 7.924943052214044e-07, "loss": 0.3162, "step": 18529 }, { "epoch": 0.322098419927341, "grad_norm": 1.5378598874355234, "learning_rate": 7.924714744773866e-07, "loss": 0.36, "step": 18530 }, { "epoch": 0.32211580246484384, "grad_norm": 1.2550766568064493, "learning_rate": 7.924486428063688e-07, "loss": 0.2669, "step": 18531 }, { "epoch": 0.32213318500234667, "grad_norm": 3.0861316026104606, "learning_rate": 7.924258102084231e-07, "loss": 0.3422, "step": 18532 }, { "epoch": 0.32215056753984944, "grad_norm": 1.2813291254450292, "learning_rate": 7.924029766836219e-07, "loss": 0.2775, "step": 18533 }, { "epoch": 0.32216795007735227, "grad_norm": 1.678765881030754, "learning_rate": 7.923801422320376e-07, "loss": 0.3833, "step": 18534 }, { "epoch": 0.3221853326148551, "grad_norm": 2.045426388348507, "learning_rate": 7.923573068537426e-07, "loss": 0.4807, "step": 18535 }, { "epoch": 0.32220271515235793, "grad_norm": 2.8321674567811366, "learning_rate": 7.923344705488091e-07, "loss": 0.428, "step": 18536 }, { "epoch": 0.32222009768986076, "grad_norm": 2.041944803794113, "learning_rate": 7.923116333173096e-07, "loss": 0.2402, "step": 18537 }, { "epoch": 0.3222374802273636, "grad_norm": 1.3384166564497644, "learning_rate": 7.922887951593167e-07, "loss": 0.2609, "step": 18538 }, { "epoch": 0.3222548627648664, "grad_norm": 0.9444968000638535, "learning_rate": 7.922659560749026e-07, "loss": 0.3733, "step": 18539 }, { "epoch": 0.32227224530236925, "grad_norm": 2.535369765428172, "learning_rate": 7.922431160641394e-07, "loss": 0.4398, "step": 18540 }, { "epoch": 0.3222896278398721, "grad_norm": 2.8656810218340363, "learning_rate": 7.922202751270999e-07, "loss": 0.5648, "step": 18541 }, { "epoch": 0.3223070103773749, "grad_norm": 1.4418573816545868, "learning_rate": 7.921974332638564e-07, "loss": 0.2729, "step": 18542 }, { "epoch": 0.3223243929148777, "grad_norm": 1.8941833285815461, "learning_rate": 7.921745904744812e-07, "loss": 0.2241, "step": 18543 }, { "epoch": 0.3223417754523805, "grad_norm": 2.1258666642244264, "learning_rate": 7.921517467590469e-07, "loss": 0.2379, "step": 18544 }, { "epoch": 0.32235915798988335, "grad_norm": 1.5230171603740739, "learning_rate": 7.921289021176256e-07, "loss": 0.4127, "step": 18545 }, { "epoch": 0.3223765405273862, "grad_norm": 1.8200996398640616, "learning_rate": 7.921060565502899e-07, "loss": 0.2827, "step": 18546 }, { "epoch": 0.322393923064889, "grad_norm": 1.8165045876233707, "learning_rate": 7.920832100571122e-07, "loss": 0.3447, "step": 18547 }, { "epoch": 0.32241130560239184, "grad_norm": 1.8162756010104928, "learning_rate": 7.92060362638165e-07, "loss": 0.1947, "step": 18548 }, { "epoch": 0.32242868813989467, "grad_norm": 1.4786914072052042, "learning_rate": 7.920375142935206e-07, "loss": 0.5467, "step": 18549 }, { "epoch": 0.3224460706773975, "grad_norm": 2.2438278713519213, "learning_rate": 7.920146650232513e-07, "loss": 0.3963, "step": 18550 }, { "epoch": 0.32246345321490033, "grad_norm": 1.347063524833143, "learning_rate": 7.919918148274298e-07, "loss": 0.3492, "step": 18551 }, { "epoch": 0.32248083575240316, "grad_norm": 1.5057908566900373, "learning_rate": 7.919689637061282e-07, "loss": 0.3627, "step": 18552 }, { "epoch": 0.32249821828990594, "grad_norm": 1.3492163809239868, "learning_rate": 7.919461116594192e-07, "loss": 0.1785, "step": 18553 }, { "epoch": 0.32251560082740877, "grad_norm": 1.652152693564835, "learning_rate": 7.919232586873752e-07, "loss": 0.3586, "step": 18554 }, { "epoch": 0.3225329833649116, "grad_norm": 1.7497234961494097, "learning_rate": 7.919004047900686e-07, "loss": 0.37, "step": 18555 }, { "epoch": 0.3225503659024144, "grad_norm": 1.8192888600538624, "learning_rate": 7.918775499675716e-07, "loss": 0.5761, "step": 18556 }, { "epoch": 0.32256774843991726, "grad_norm": 1.3972092401196199, "learning_rate": 7.918546942199568e-07, "loss": 0.2292, "step": 18557 }, { "epoch": 0.3225851309774201, "grad_norm": 2.1965852154006216, "learning_rate": 7.91831837547297e-07, "loss": 0.3419, "step": 18558 }, { "epoch": 0.3226025135149229, "grad_norm": 2.4724980546302544, "learning_rate": 7.918089799496643e-07, "loss": 0.4704, "step": 18559 }, { "epoch": 0.32261989605242575, "grad_norm": 2.5303697505560163, "learning_rate": 7.917861214271309e-07, "loss": 0.2859, "step": 18560 }, { "epoch": 0.3226372785899286, "grad_norm": 1.8501246433455092, "learning_rate": 7.917632619797697e-07, "loss": 0.3774, "step": 18561 }, { "epoch": 0.3226546611274314, "grad_norm": 1.1259249063277006, "learning_rate": 7.91740401607653e-07, "loss": 0.5098, "step": 18562 }, { "epoch": 0.3226720436649342, "grad_norm": 1.8297660518766272, "learning_rate": 7.917175403108531e-07, "loss": 0.4491, "step": 18563 }, { "epoch": 0.322689426202437, "grad_norm": 2.147151373864707, "learning_rate": 7.916946780894429e-07, "loss": 0.2066, "step": 18564 }, { "epoch": 0.32270680873993984, "grad_norm": 1.8170854072803801, "learning_rate": 7.916718149434942e-07, "loss": 0.3416, "step": 18565 }, { "epoch": 0.3227241912774427, "grad_norm": 1.849300024301097, "learning_rate": 7.9164895087308e-07, "loss": 0.2358, "step": 18566 }, { "epoch": 0.3227415738149455, "grad_norm": 1.8822829061685316, "learning_rate": 7.916260858782727e-07, "loss": 0.3498, "step": 18567 }, { "epoch": 0.32275895635244833, "grad_norm": 1.565479919374, "learning_rate": 7.916032199591446e-07, "loss": 0.2686, "step": 18568 }, { "epoch": 0.32277633888995116, "grad_norm": 1.0219539366089585, "learning_rate": 7.915803531157681e-07, "loss": 0.1252, "step": 18569 }, { "epoch": 0.322793721427454, "grad_norm": 1.1767509116584538, "learning_rate": 7.915574853482161e-07, "loss": 0.2745, "step": 18570 }, { "epoch": 0.3228111039649568, "grad_norm": 1.7507161853465298, "learning_rate": 7.915346166565606e-07, "loss": 0.3849, "step": 18571 }, { "epoch": 0.32282848650245966, "grad_norm": 1.9206305010595563, "learning_rate": 7.915117470408743e-07, "loss": 0.4101, "step": 18572 }, { "epoch": 0.32284586903996243, "grad_norm": 3.7589404945384803, "learning_rate": 7.914888765012298e-07, "loss": 0.5723, "step": 18573 }, { "epoch": 0.32286325157746526, "grad_norm": 1.4564827411305687, "learning_rate": 7.914660050376993e-07, "loss": 0.2928, "step": 18574 }, { "epoch": 0.3228806341149681, "grad_norm": 1.977639167898253, "learning_rate": 7.914431326503554e-07, "loss": 0.4799, "step": 18575 }, { "epoch": 0.3228980166524709, "grad_norm": 1.4676534945927062, "learning_rate": 7.914202593392708e-07, "loss": 0.5691, "step": 18576 }, { "epoch": 0.32291539918997375, "grad_norm": 2.0717782585133295, "learning_rate": 7.913973851045179e-07, "loss": 0.3033, "step": 18577 }, { "epoch": 0.3229327817274766, "grad_norm": 2.2109442063635436, "learning_rate": 7.91374509946169e-07, "loss": 0.3631, "step": 18578 }, { "epoch": 0.3229501642649794, "grad_norm": 1.30208250271846, "learning_rate": 7.913516338642967e-07, "loss": 0.4279, "step": 18579 }, { "epoch": 0.32296754680248224, "grad_norm": 2.182925498191642, "learning_rate": 7.913287568589737e-07, "loss": 0.3233, "step": 18580 }, { "epoch": 0.32298492933998507, "grad_norm": 1.6466955641263064, "learning_rate": 7.913058789302723e-07, "loss": 0.3517, "step": 18581 }, { "epoch": 0.3230023118774879, "grad_norm": 1.59413690929767, "learning_rate": 7.912830000782651e-07, "loss": 0.3552, "step": 18582 }, { "epoch": 0.3230196944149907, "grad_norm": 1.6250189080843, "learning_rate": 7.912601203030247e-07, "loss": 0.6136, "step": 18583 }, { "epoch": 0.3230370769524935, "grad_norm": 1.8195218612321877, "learning_rate": 7.912372396046234e-07, "loss": 0.565, "step": 18584 }, { "epoch": 0.32305445948999634, "grad_norm": 1.1751142228152718, "learning_rate": 7.912143579831338e-07, "loss": 0.3957, "step": 18585 }, { "epoch": 0.32307184202749917, "grad_norm": 2.8430447431720953, "learning_rate": 7.911914754386285e-07, "loss": 0.346, "step": 18586 }, { "epoch": 0.323089224565002, "grad_norm": 2.2204466933161897, "learning_rate": 7.911685919711801e-07, "loss": 0.5626, "step": 18587 }, { "epoch": 0.32310660710250483, "grad_norm": 2.338307202040023, "learning_rate": 7.911457075808607e-07, "loss": 0.3819, "step": 18588 }, { "epoch": 0.32312398964000766, "grad_norm": 2.808945176079028, "learning_rate": 7.911228222677435e-07, "loss": 0.3954, "step": 18589 }, { "epoch": 0.3231413721775105, "grad_norm": 2.470723200689698, "learning_rate": 7.910999360319004e-07, "loss": 0.3828, "step": 18590 }, { "epoch": 0.3231587547150133, "grad_norm": 1.7262606981140804, "learning_rate": 7.910770488734045e-07, "loss": 0.3976, "step": 18591 }, { "epoch": 0.32317613725251615, "grad_norm": 1.7872169796898048, "learning_rate": 7.910541607923278e-07, "loss": 0.3856, "step": 18592 }, { "epoch": 0.3231935197900189, "grad_norm": 1.2020026602024045, "learning_rate": 7.910312717887433e-07, "loss": 0.5695, "step": 18593 }, { "epoch": 0.32321090232752175, "grad_norm": 1.5772545394621411, "learning_rate": 7.910083818627232e-07, "loss": 0.5714, "step": 18594 }, { "epoch": 0.3232282848650246, "grad_norm": 1.3140674435418707, "learning_rate": 7.909854910143403e-07, "loss": 0.3987, "step": 18595 }, { "epoch": 0.3232456674025274, "grad_norm": 1.8694876978632375, "learning_rate": 7.90962599243667e-07, "loss": 0.2428, "step": 18596 }, { "epoch": 0.32326304994003024, "grad_norm": 1.1928704908735663, "learning_rate": 7.909397065507761e-07, "loss": 0.4607, "step": 18597 }, { "epoch": 0.3232804324775331, "grad_norm": 1.829751779897285, "learning_rate": 7.909168129357398e-07, "loss": 0.3768, "step": 18598 }, { "epoch": 0.3232978150150359, "grad_norm": 1.7073720117343796, "learning_rate": 7.908939183986309e-07, "loss": 0.2895, "step": 18599 }, { "epoch": 0.32331519755253874, "grad_norm": 2.7174502314648983, "learning_rate": 7.908710229395219e-07, "loss": 1.0687, "step": 18600 }, { "epoch": 0.32333258009004157, "grad_norm": 1.3849883523044983, "learning_rate": 7.908481265584855e-07, "loss": 0.4436, "step": 18601 }, { "epoch": 0.3233499626275444, "grad_norm": 1.462460954892751, "learning_rate": 7.908252292555939e-07, "loss": 0.4039, "step": 18602 }, { "epoch": 0.32336734516504717, "grad_norm": 1.9213878464791714, "learning_rate": 7.9080233103092e-07, "loss": 0.3264, "step": 18603 }, { "epoch": 0.32338472770255, "grad_norm": 1.6049267009694497, "learning_rate": 7.907794318845363e-07, "loss": 0.2679, "step": 18604 }, { "epoch": 0.32340211024005283, "grad_norm": 6.546554228062859, "learning_rate": 7.907565318165155e-07, "loss": 0.4252, "step": 18605 }, { "epoch": 0.32341949277755566, "grad_norm": 1.758869999050025, "learning_rate": 7.907336308269299e-07, "loss": 0.3142, "step": 18606 }, { "epoch": 0.3234368753150585, "grad_norm": 2.1294749527032497, "learning_rate": 7.907107289158523e-07, "loss": 0.3834, "step": 18607 }, { "epoch": 0.3234542578525613, "grad_norm": 2.7601000241423392, "learning_rate": 7.906878260833552e-07, "loss": 0.3891, "step": 18608 }, { "epoch": 0.32347164039006415, "grad_norm": 1.5689066202282507, "learning_rate": 7.906649223295113e-07, "loss": 0.2668, "step": 18609 }, { "epoch": 0.323489022927567, "grad_norm": 2.0647626190065065, "learning_rate": 7.90642017654393e-07, "loss": 0.3206, "step": 18610 }, { "epoch": 0.3235064054650698, "grad_norm": 1.7063147292806067, "learning_rate": 7.906191120580732e-07, "loss": 0.3062, "step": 18611 }, { "epoch": 0.32352378800257264, "grad_norm": 2.7939387087590553, "learning_rate": 7.905962055406241e-07, "loss": 0.5863, "step": 18612 }, { "epoch": 0.3235411705400754, "grad_norm": 3.0207115658151054, "learning_rate": 7.905732981021187e-07, "loss": 0.4347, "step": 18613 }, { "epoch": 0.32355855307757825, "grad_norm": 1.76428713919049, "learning_rate": 7.905503897426293e-07, "loss": 0.2876, "step": 18614 }, { "epoch": 0.3235759356150811, "grad_norm": 2.708490606427588, "learning_rate": 7.905274804622286e-07, "loss": 0.4044, "step": 18615 }, { "epoch": 0.3235933181525839, "grad_norm": 1.7403039284104431, "learning_rate": 7.905045702609893e-07, "loss": 0.2273, "step": 18616 }, { "epoch": 0.32361070069008674, "grad_norm": 2.899675672197759, "learning_rate": 7.90481659138984e-07, "loss": 0.3502, "step": 18617 }, { "epoch": 0.32362808322758957, "grad_norm": 2.90558930758297, "learning_rate": 7.904587470962854e-07, "loss": 0.3065, "step": 18618 }, { "epoch": 0.3236454657650924, "grad_norm": 1.4762286501946928, "learning_rate": 7.904358341329658e-07, "loss": 0.3841, "step": 18619 }, { "epoch": 0.32366284830259523, "grad_norm": 1.03192945410033, "learning_rate": 7.90412920249098e-07, "loss": 0.336, "step": 18620 }, { "epoch": 0.32368023084009806, "grad_norm": 1.5734357198810978, "learning_rate": 7.903900054447548e-07, "loss": 0.5748, "step": 18621 }, { "epoch": 0.32369761337760083, "grad_norm": 1.8655702949794393, "learning_rate": 7.903670897200085e-07, "loss": 0.3494, "step": 18622 }, { "epoch": 0.32371499591510366, "grad_norm": 1.0532395186201897, "learning_rate": 7.903441730749319e-07, "loss": 0.3423, "step": 18623 }, { "epoch": 0.3237323784526065, "grad_norm": 2.482401013659871, "learning_rate": 7.903212555095979e-07, "loss": 0.3951, "step": 18624 }, { "epoch": 0.3237497609901093, "grad_norm": 2.297736593714424, "learning_rate": 7.902983370240787e-07, "loss": 0.4848, "step": 18625 }, { "epoch": 0.32376714352761216, "grad_norm": 1.9140642169517368, "learning_rate": 7.902754176184471e-07, "loss": 0.4267, "step": 18626 }, { "epoch": 0.323784526065115, "grad_norm": 1.348899193248259, "learning_rate": 7.902524972927759e-07, "loss": 0.4771, "step": 18627 }, { "epoch": 0.3238019086026178, "grad_norm": 1.5045520107938515, "learning_rate": 7.902295760471375e-07, "loss": 0.3397, "step": 18628 }, { "epoch": 0.32381929114012065, "grad_norm": 1.710573480799242, "learning_rate": 7.902066538816047e-07, "loss": 0.4767, "step": 18629 }, { "epoch": 0.3238366736776235, "grad_norm": 2.0291238804646596, "learning_rate": 7.901837307962501e-07, "loss": 0.4367, "step": 18630 }, { "epoch": 0.3238540562151263, "grad_norm": 1.994265022642585, "learning_rate": 7.901608067911463e-07, "loss": 0.2896, "step": 18631 }, { "epoch": 0.3238714387526291, "grad_norm": 0.741219303691937, "learning_rate": 7.901378818663662e-07, "loss": 0.2401, "step": 18632 }, { "epoch": 0.3238888212901319, "grad_norm": 3.533412963120907, "learning_rate": 7.901149560219822e-07, "loss": 0.3922, "step": 18633 }, { "epoch": 0.32390620382763474, "grad_norm": 2.2980449791942, "learning_rate": 7.900920292580671e-07, "loss": 0.4496, "step": 18634 }, { "epoch": 0.3239235863651376, "grad_norm": 0.9982275009105862, "learning_rate": 7.900691015746934e-07, "loss": 0.323, "step": 18635 }, { "epoch": 0.3239409689026404, "grad_norm": 1.6244283255824434, "learning_rate": 7.900461729719341e-07, "loss": 0.5351, "step": 18636 }, { "epoch": 0.32395835144014323, "grad_norm": 0.8818232324909229, "learning_rate": 7.900232434498615e-07, "loss": 0.2894, "step": 18637 }, { "epoch": 0.32397573397764606, "grad_norm": 2.4943612084822946, "learning_rate": 7.900003130085485e-07, "loss": 0.4762, "step": 18638 }, { "epoch": 0.3239931165151489, "grad_norm": 1.374920143013177, "learning_rate": 7.899773816480677e-07, "loss": 0.1773, "step": 18639 }, { "epoch": 0.3240104990526517, "grad_norm": 1.9002175230143397, "learning_rate": 7.899544493684919e-07, "loss": 0.3782, "step": 18640 }, { "epoch": 0.32402788159015455, "grad_norm": 1.9034662537674185, "learning_rate": 7.899315161698937e-07, "loss": 0.5433, "step": 18641 }, { "epoch": 0.32404526412765733, "grad_norm": 1.7386897559035257, "learning_rate": 7.899085820523456e-07, "loss": 0.8363, "step": 18642 }, { "epoch": 0.32406264666516016, "grad_norm": 5.310463957568594, "learning_rate": 7.898856470159206e-07, "loss": 0.6549, "step": 18643 }, { "epoch": 0.324080029202663, "grad_norm": 1.1591161384860498, "learning_rate": 7.898627110606913e-07, "loss": 0.4392, "step": 18644 }, { "epoch": 0.3240974117401658, "grad_norm": 2.1105348530795807, "learning_rate": 7.898397741867302e-07, "loss": 0.314, "step": 18645 }, { "epoch": 0.32411479427766865, "grad_norm": 3.48712177930973, "learning_rate": 7.898168363941103e-07, "loss": 0.5369, "step": 18646 }, { "epoch": 0.3241321768151715, "grad_norm": 2.333046106833145, "learning_rate": 7.89793897682904e-07, "loss": 0.4612, "step": 18647 }, { "epoch": 0.3241495593526743, "grad_norm": 1.1993957369699306, "learning_rate": 7.897709580531844e-07, "loss": 0.3513, "step": 18648 }, { "epoch": 0.32416694189017714, "grad_norm": 4.219499242148293, "learning_rate": 7.897480175050237e-07, "loss": 0.4413, "step": 18649 }, { "epoch": 0.32418432442767997, "grad_norm": 2.9719816937934835, "learning_rate": 7.89725076038495e-07, "loss": 0.2311, "step": 18650 }, { "epoch": 0.3242017069651828, "grad_norm": 2.5378808268187205, "learning_rate": 7.897021336536709e-07, "loss": 0.6058, "step": 18651 }, { "epoch": 0.3242190895026856, "grad_norm": 2.2930394568245642, "learning_rate": 7.896791903506242e-07, "loss": 0.2915, "step": 18652 }, { "epoch": 0.3242364720401884, "grad_norm": 1.6995687347201138, "learning_rate": 7.896562461294275e-07, "loss": 0.4542, "step": 18653 }, { "epoch": 0.32425385457769124, "grad_norm": 1.2191464827670682, "learning_rate": 7.896333009901535e-07, "loss": 0.4241, "step": 18654 }, { "epoch": 0.32427123711519407, "grad_norm": 2.604977232070443, "learning_rate": 7.89610354932875e-07, "loss": 0.3611, "step": 18655 }, { "epoch": 0.3242886196526969, "grad_norm": 1.6351254985231187, "learning_rate": 7.895874079576647e-07, "loss": 0.3167, "step": 18656 }, { "epoch": 0.3243060021901997, "grad_norm": 1.8965682878634427, "learning_rate": 7.895644600645954e-07, "loss": 0.3105, "step": 18657 }, { "epoch": 0.32432338472770256, "grad_norm": 0.9199669992640791, "learning_rate": 7.895415112537398e-07, "loss": 0.1449, "step": 18658 }, { "epoch": 0.3243407672652054, "grad_norm": 1.9105176808883644, "learning_rate": 7.895185615251705e-07, "loss": 0.2358, "step": 18659 }, { "epoch": 0.3243581498027082, "grad_norm": 1.2317280395922556, "learning_rate": 7.894956108789605e-07, "loss": 0.5486, "step": 18660 }, { "epoch": 0.32437553234021105, "grad_norm": 1.2802121801327375, "learning_rate": 7.894726593151822e-07, "loss": 0.2346, "step": 18661 }, { "epoch": 0.3243929148777138, "grad_norm": 1.5417024186312707, "learning_rate": 7.894497068339088e-07, "loss": 0.3789, "step": 18662 }, { "epoch": 0.32441029741521665, "grad_norm": 1.960226551437297, "learning_rate": 7.894267534352126e-07, "loss": 0.7302, "step": 18663 }, { "epoch": 0.3244276799527195, "grad_norm": 1.9412372883097848, "learning_rate": 7.894037991191664e-07, "loss": 0.4142, "step": 18664 }, { "epoch": 0.3244450624902223, "grad_norm": 2.392633748319697, "learning_rate": 7.893808438858433e-07, "loss": 0.3857, "step": 18665 }, { "epoch": 0.32446244502772514, "grad_norm": 1.775408569007323, "learning_rate": 7.89357887735316e-07, "loss": 0.3597, "step": 18666 }, { "epoch": 0.324479827565228, "grad_norm": 1.5248207258211353, "learning_rate": 7.893349306676569e-07, "loss": 0.2988, "step": 18667 }, { "epoch": 0.3244972101027308, "grad_norm": 2.601555685856361, "learning_rate": 7.893119726829391e-07, "loss": 0.6945, "step": 18668 }, { "epoch": 0.32451459264023363, "grad_norm": 1.2634928689336875, "learning_rate": 7.892890137812352e-07, "loss": 0.4894, "step": 18669 }, { "epoch": 0.32453197517773646, "grad_norm": 4.567836108899066, "learning_rate": 7.892660539626179e-07, "loss": 0.3619, "step": 18670 }, { "epoch": 0.3245493577152393, "grad_norm": 2.299610435467523, "learning_rate": 7.892430932271601e-07, "loss": 0.3108, "step": 18671 }, { "epoch": 0.32456674025274207, "grad_norm": 1.5424729805691988, "learning_rate": 7.892201315749347e-07, "loss": 0.1785, "step": 18672 }, { "epoch": 0.3245841227902449, "grad_norm": 1.1848145456654098, "learning_rate": 7.891971690060141e-07, "loss": 0.3343, "step": 18673 }, { "epoch": 0.32460150532774773, "grad_norm": 1.6532709661949934, "learning_rate": 7.891742055204716e-07, "loss": 0.4427, "step": 18674 }, { "epoch": 0.32461888786525056, "grad_norm": 1.8190372822879206, "learning_rate": 7.891512411183796e-07, "loss": 0.3585, "step": 18675 }, { "epoch": 0.3246362704027534, "grad_norm": 1.9525912849829306, "learning_rate": 7.89128275799811e-07, "loss": 0.3525, "step": 18676 }, { "epoch": 0.3246536529402562, "grad_norm": 1.3066440327961777, "learning_rate": 7.891053095648386e-07, "loss": 0.2819, "step": 18677 }, { "epoch": 0.32467103547775905, "grad_norm": 2.059686812751038, "learning_rate": 7.890823424135352e-07, "loss": 0.2646, "step": 18678 }, { "epoch": 0.3246884180152619, "grad_norm": 1.9774189496672356, "learning_rate": 7.890593743459735e-07, "loss": 0.4547, "step": 18679 }, { "epoch": 0.3247058005527647, "grad_norm": 2.1718361214631994, "learning_rate": 7.890364053622263e-07, "loss": 0.3576, "step": 18680 }, { "epoch": 0.32472318309026754, "grad_norm": 1.1129741679691831, "learning_rate": 7.890134354623666e-07, "loss": 0.3634, "step": 18681 }, { "epoch": 0.3247405656277703, "grad_norm": 1.4332993314076583, "learning_rate": 7.889904646464671e-07, "loss": 0.6002, "step": 18682 }, { "epoch": 0.32475794816527315, "grad_norm": 3.128683795531801, "learning_rate": 7.889674929146005e-07, "loss": 0.5631, "step": 18683 }, { "epoch": 0.324775330702776, "grad_norm": 2.0915962396513783, "learning_rate": 7.889445202668396e-07, "loss": 0.2711, "step": 18684 }, { "epoch": 0.3247927132402788, "grad_norm": 1.214119190728347, "learning_rate": 7.889215467032575e-07, "loss": 0.2179, "step": 18685 }, { "epoch": 0.32481009577778164, "grad_norm": 2.803447227910529, "learning_rate": 7.888985722239266e-07, "loss": 0.3854, "step": 18686 }, { "epoch": 0.32482747831528447, "grad_norm": 1.5733003924395843, "learning_rate": 7.888755968289202e-07, "loss": 0.4417, "step": 18687 }, { "epoch": 0.3248448608527873, "grad_norm": 2.67794375706496, "learning_rate": 7.888526205183106e-07, "loss": 0.3492, "step": 18688 }, { "epoch": 0.32486224339029013, "grad_norm": 2.710060125767839, "learning_rate": 7.888296432921709e-07, "loss": 0.4885, "step": 18689 }, { "epoch": 0.32487962592779296, "grad_norm": 2.063888091347547, "learning_rate": 7.88806665150574e-07, "loss": 0.4413, "step": 18690 }, { "epoch": 0.3248970084652958, "grad_norm": 1.3269319240740658, "learning_rate": 7.887836860935927e-07, "loss": 0.2357, "step": 18691 }, { "epoch": 0.32491439100279856, "grad_norm": 2.148131141431237, "learning_rate": 7.887607061212997e-07, "loss": 0.4038, "step": 18692 }, { "epoch": 0.3249317735403014, "grad_norm": 2.505542490217353, "learning_rate": 7.887377252337679e-07, "loss": 0.3283, "step": 18693 }, { "epoch": 0.3249491560778042, "grad_norm": 1.226146076813301, "learning_rate": 7.887147434310702e-07, "loss": 0.2947, "step": 18694 }, { "epoch": 0.32496653861530705, "grad_norm": 2.8080846089564577, "learning_rate": 7.886917607132793e-07, "loss": 0.5098, "step": 18695 }, { "epoch": 0.3249839211528099, "grad_norm": 1.5393890408810051, "learning_rate": 7.886687770804681e-07, "loss": 0.408, "step": 18696 }, { "epoch": 0.3250013036903127, "grad_norm": 2.131429633045553, "learning_rate": 7.886457925327097e-07, "loss": 0.6914, "step": 18697 }, { "epoch": 0.32501868622781555, "grad_norm": 1.4252925181781204, "learning_rate": 7.886228070700764e-07, "loss": 0.3924, "step": 18698 }, { "epoch": 0.3250360687653184, "grad_norm": 1.3677937496015715, "learning_rate": 7.885998206926416e-07, "loss": 0.3892, "step": 18699 }, { "epoch": 0.3250534513028212, "grad_norm": 1.3617424559084885, "learning_rate": 7.88576833400478e-07, "loss": 0.5421, "step": 18700 }, { "epoch": 0.32507083384032404, "grad_norm": 1.5968919157154562, "learning_rate": 7.885538451936583e-07, "loss": 0.4177, "step": 18701 }, { "epoch": 0.3250882163778268, "grad_norm": 2.0181732389239833, "learning_rate": 7.885308560722555e-07, "loss": 0.5218, "step": 18702 }, { "epoch": 0.32510559891532964, "grad_norm": 1.9043156329917503, "learning_rate": 7.885078660363424e-07, "loss": 0.3968, "step": 18703 }, { "epoch": 0.32512298145283247, "grad_norm": 4.0196766081812285, "learning_rate": 7.884848750859918e-07, "loss": 0.3738, "step": 18704 }, { "epoch": 0.3251403639903353, "grad_norm": 1.6274789372699214, "learning_rate": 7.884618832212768e-07, "loss": 0.4658, "step": 18705 }, { "epoch": 0.32515774652783813, "grad_norm": 2.545269131186822, "learning_rate": 7.884388904422701e-07, "loss": 0.285, "step": 18706 }, { "epoch": 0.32517512906534096, "grad_norm": 2.703552107845521, "learning_rate": 7.884158967490446e-07, "loss": 0.3666, "step": 18707 }, { "epoch": 0.3251925116028438, "grad_norm": 1.7413276847749317, "learning_rate": 7.883929021416732e-07, "loss": 0.6396, "step": 18708 }, { "epoch": 0.3252098941403466, "grad_norm": 1.4793249794523595, "learning_rate": 7.883699066202287e-07, "loss": 0.4394, "step": 18709 }, { "epoch": 0.32522727667784945, "grad_norm": 1.3022599789956166, "learning_rate": 7.883469101847842e-07, "loss": 0.312, "step": 18710 }, { "epoch": 0.3252446592153523, "grad_norm": 1.1659134013471877, "learning_rate": 7.883239128354125e-07, "loss": 0.4099, "step": 18711 }, { "epoch": 0.32526204175285506, "grad_norm": 1.4043579918672922, "learning_rate": 7.883009145721863e-07, "loss": 0.3814, "step": 18712 }, { "epoch": 0.3252794242903579, "grad_norm": 2.2520698653964257, "learning_rate": 7.882779153951786e-07, "loss": 0.4333, "step": 18713 }, { "epoch": 0.3252968068278607, "grad_norm": 2.19236367131778, "learning_rate": 7.882549153044625e-07, "loss": 0.2435, "step": 18714 }, { "epoch": 0.32531418936536355, "grad_norm": 2.0482375656016782, "learning_rate": 7.882319143001105e-07, "loss": 0.371, "step": 18715 }, { "epoch": 0.3253315719028664, "grad_norm": 1.9515398149813756, "learning_rate": 7.88208912382196e-07, "loss": 0.3283, "step": 18716 }, { "epoch": 0.3253489544403692, "grad_norm": 1.482464989499248, "learning_rate": 7.881859095507915e-07, "loss": 0.2077, "step": 18717 }, { "epoch": 0.32536633697787204, "grad_norm": 1.8973431560868805, "learning_rate": 7.8816290580597e-07, "loss": 0.4013, "step": 18718 }, { "epoch": 0.32538371951537487, "grad_norm": 2.283272168305041, "learning_rate": 7.881399011478045e-07, "loss": 0.4284, "step": 18719 }, { "epoch": 0.3254011020528777, "grad_norm": 2.791368775551634, "learning_rate": 7.881168955763679e-07, "loss": 0.488, "step": 18720 }, { "epoch": 0.32541848459038053, "grad_norm": 1.9844126069208041, "learning_rate": 7.88093889091733e-07, "loss": 0.3102, "step": 18721 }, { "epoch": 0.3254358671278833, "grad_norm": 3.2138561925724347, "learning_rate": 7.88070881693973e-07, "loss": 0.4623, "step": 18722 }, { "epoch": 0.32545324966538614, "grad_norm": 1.3953379464668794, "learning_rate": 7.880478733831605e-07, "loss": 0.2327, "step": 18723 }, { "epoch": 0.32547063220288897, "grad_norm": 1.7559683358176748, "learning_rate": 7.880248641593685e-07, "loss": 0.2902, "step": 18724 }, { "epoch": 0.3254880147403918, "grad_norm": 0.9810920005215471, "learning_rate": 7.880018540226703e-07, "loss": 0.2927, "step": 18725 }, { "epoch": 0.3255053972778946, "grad_norm": 2.4042950549870343, "learning_rate": 7.879788429731382e-07, "loss": 0.3559, "step": 18726 }, { "epoch": 0.32552277981539746, "grad_norm": 1.2365312788243132, "learning_rate": 7.879558310108455e-07, "loss": 0.2115, "step": 18727 }, { "epoch": 0.3255401623529003, "grad_norm": 1.8446845116592523, "learning_rate": 7.879328181358652e-07, "loss": 0.2978, "step": 18728 }, { "epoch": 0.3255575448904031, "grad_norm": 1.7197945986689624, "learning_rate": 7.879098043482701e-07, "loss": 0.3003, "step": 18729 }, { "epoch": 0.32557492742790595, "grad_norm": 3.122128573071549, "learning_rate": 7.87886789648133e-07, "loss": 0.2123, "step": 18730 }, { "epoch": 0.3255923099654088, "grad_norm": 1.1912113455668465, "learning_rate": 7.878637740355272e-07, "loss": 0.179, "step": 18731 }, { "epoch": 0.32560969250291155, "grad_norm": 1.2829646713183618, "learning_rate": 7.878407575105254e-07, "loss": 0.47, "step": 18732 }, { "epoch": 0.3256270750404144, "grad_norm": 1.652746686351323, "learning_rate": 7.878177400732007e-07, "loss": 0.4533, "step": 18733 }, { "epoch": 0.3256444575779172, "grad_norm": 1.4458364911527233, "learning_rate": 7.87794721723626e-07, "loss": 0.4675, "step": 18734 }, { "epoch": 0.32566184011542004, "grad_norm": 2.201537470448003, "learning_rate": 7.877717024618742e-07, "loss": 0.259, "step": 18735 }, { "epoch": 0.3256792226529229, "grad_norm": 2.1185830654411615, "learning_rate": 7.877486822880182e-07, "loss": 0.3459, "step": 18736 }, { "epoch": 0.3256966051904257, "grad_norm": 1.6527803723436623, "learning_rate": 7.877256612021311e-07, "loss": 0.3169, "step": 18737 }, { "epoch": 0.32571398772792853, "grad_norm": 1.6123176744627212, "learning_rate": 7.87702639204286e-07, "loss": 0.4205, "step": 18738 }, { "epoch": 0.32573137026543136, "grad_norm": 1.9961477503849827, "learning_rate": 7.876796162945553e-07, "loss": 0.4605, "step": 18739 }, { "epoch": 0.3257487528029342, "grad_norm": 2.539026026453417, "learning_rate": 7.876565924730127e-07, "loss": 0.4646, "step": 18740 }, { "epoch": 0.325766135340437, "grad_norm": 3.1277717422209395, "learning_rate": 7.876335677397308e-07, "loss": 0.3949, "step": 18741 }, { "epoch": 0.3257835178779398, "grad_norm": 1.8440117012594408, "learning_rate": 7.876105420947825e-07, "loss": 0.3881, "step": 18742 }, { "epoch": 0.32580090041544263, "grad_norm": 1.5137703942653706, "learning_rate": 7.87587515538241e-07, "loss": 0.6674, "step": 18743 }, { "epoch": 0.32581828295294546, "grad_norm": 1.9788096602439844, "learning_rate": 7.875644880701792e-07, "loss": 0.4989, "step": 18744 }, { "epoch": 0.3258356654904483, "grad_norm": 2.0216982602035767, "learning_rate": 7.875414596906699e-07, "loss": 0.3216, "step": 18745 }, { "epoch": 0.3258530480279511, "grad_norm": 1.8210388555140233, "learning_rate": 7.875184303997863e-07, "loss": 0.5208, "step": 18746 }, { "epoch": 0.32587043056545395, "grad_norm": 1.7518987583218033, "learning_rate": 7.874954001976016e-07, "loss": 0.2662, "step": 18747 }, { "epoch": 0.3258878131029568, "grad_norm": 2.6771291041334835, "learning_rate": 7.874723690841884e-07, "loss": 0.3166, "step": 18748 }, { "epoch": 0.3259051956404596, "grad_norm": 3.4217168917591456, "learning_rate": 7.874493370596198e-07, "loss": 0.5049, "step": 18749 }, { "epoch": 0.32592257817796244, "grad_norm": 2.0960517448823266, "learning_rate": 7.874263041239688e-07, "loss": 0.2954, "step": 18750 }, { "epoch": 0.32593996071546527, "grad_norm": 1.7745529985429693, "learning_rate": 7.874032702773086e-07, "loss": 0.4016, "step": 18751 }, { "epoch": 0.32595734325296805, "grad_norm": 2.3276004054826473, "learning_rate": 7.873802355197118e-07, "loss": 0.6294, "step": 18752 }, { "epoch": 0.3259747257904709, "grad_norm": 1.4030892748471622, "learning_rate": 7.87357199851252e-07, "loss": 0.2441, "step": 18753 }, { "epoch": 0.3259921083279737, "grad_norm": 2.4432360498251775, "learning_rate": 7.873341632720017e-07, "loss": 0.2711, "step": 18754 }, { "epoch": 0.32600949086547654, "grad_norm": 1.5763872392181157, "learning_rate": 7.87311125782034e-07, "loss": 0.2995, "step": 18755 }, { "epoch": 0.32602687340297937, "grad_norm": 1.3008935708180136, "learning_rate": 7.872880873814222e-07, "loss": 0.4578, "step": 18756 }, { "epoch": 0.3260442559404822, "grad_norm": 0.8598814108196527, "learning_rate": 7.872650480702391e-07, "loss": 0.2437, "step": 18757 }, { "epoch": 0.326061638477985, "grad_norm": 2.2365126167432594, "learning_rate": 7.872420078485577e-07, "loss": 0.276, "step": 18758 }, { "epoch": 0.32607902101548786, "grad_norm": 1.4002900108734113, "learning_rate": 7.87218966716451e-07, "loss": 0.2676, "step": 18759 }, { "epoch": 0.3260964035529907, "grad_norm": 1.651798372369448, "learning_rate": 7.871959246739924e-07, "loss": 0.3432, "step": 18760 }, { "epoch": 0.32611378609049346, "grad_norm": 1.7669815571915775, "learning_rate": 7.871728817212543e-07, "loss": 0.3701, "step": 18761 }, { "epoch": 0.3261311686279963, "grad_norm": 1.8760367863093608, "learning_rate": 7.871498378583101e-07, "loss": 0.3444, "step": 18762 }, { "epoch": 0.3261485511654991, "grad_norm": 3.205945966186617, "learning_rate": 7.87126793085233e-07, "loss": 0.5973, "step": 18763 }, { "epoch": 0.32616593370300195, "grad_norm": 1.8012898431520326, "learning_rate": 7.871037474020958e-07, "loss": 0.352, "step": 18764 }, { "epoch": 0.3261833162405048, "grad_norm": 1.4042268442740504, "learning_rate": 7.870807008089717e-07, "loss": 0.2561, "step": 18765 }, { "epoch": 0.3262006987780076, "grad_norm": 1.5159486848383257, "learning_rate": 7.870576533059335e-07, "loss": 0.4707, "step": 18766 }, { "epoch": 0.32621808131551044, "grad_norm": 1.3776674654386214, "learning_rate": 7.870346048930545e-07, "loss": 0.2582, "step": 18767 }, { "epoch": 0.3262354638530133, "grad_norm": 1.528795455805768, "learning_rate": 7.870115555704076e-07, "loss": 0.4251, "step": 18768 }, { "epoch": 0.3262528463905161, "grad_norm": 2.207012026303677, "learning_rate": 7.869885053380661e-07, "loss": 0.4612, "step": 18769 }, { "epoch": 0.32627022892801893, "grad_norm": 1.9939544833764085, "learning_rate": 7.869654541961026e-07, "loss": 0.23, "step": 18770 }, { "epoch": 0.3262876114655217, "grad_norm": 1.393544706369717, "learning_rate": 7.869424021445905e-07, "loss": 0.2772, "step": 18771 }, { "epoch": 0.32630499400302454, "grad_norm": 1.5446590257563844, "learning_rate": 7.869193491836028e-07, "loss": 0.2787, "step": 18772 }, { "epoch": 0.32632237654052737, "grad_norm": 3.452885157658387, "learning_rate": 7.868962953132126e-07, "loss": 0.4838, "step": 18773 }, { "epoch": 0.3263397590780302, "grad_norm": 1.3827722064699253, "learning_rate": 7.86873240533493e-07, "loss": 0.3627, "step": 18774 }, { "epoch": 0.32635714161553303, "grad_norm": 1.6087546041495533, "learning_rate": 7.868501848445168e-07, "loss": 0.4629, "step": 18775 }, { "epoch": 0.32637452415303586, "grad_norm": 1.2609291468671586, "learning_rate": 7.868271282463575e-07, "loss": 0.2745, "step": 18776 }, { "epoch": 0.3263919066905387, "grad_norm": 2.8827456349903446, "learning_rate": 7.86804070739088e-07, "loss": 0.5075, "step": 18777 }, { "epoch": 0.3264092892280415, "grad_norm": 2.9363899731404803, "learning_rate": 7.86781012322781e-07, "loss": 0.4485, "step": 18778 }, { "epoch": 0.32642667176554435, "grad_norm": 2.499239775742073, "learning_rate": 7.867579529975103e-07, "loss": 0.305, "step": 18779 }, { "epoch": 0.3264440543030472, "grad_norm": 1.2015125897329475, "learning_rate": 7.867348927633484e-07, "loss": 0.3955, "step": 18780 }, { "epoch": 0.32646143684054996, "grad_norm": 0.9882287993377579, "learning_rate": 7.867118316203687e-07, "loss": 0.4405, "step": 18781 }, { "epoch": 0.3264788193780528, "grad_norm": 1.5806240704226695, "learning_rate": 7.866887695686442e-07, "loss": 0.1969, "step": 18782 }, { "epoch": 0.3264962019155556, "grad_norm": 2.138136676054754, "learning_rate": 7.866657066082479e-07, "loss": 0.3778, "step": 18783 }, { "epoch": 0.32651358445305845, "grad_norm": 1.845352050003401, "learning_rate": 7.86642642739253e-07, "loss": 0.4822, "step": 18784 }, { "epoch": 0.3265309669905613, "grad_norm": 2.0346506915717453, "learning_rate": 7.866195779617327e-07, "loss": 0.4909, "step": 18785 }, { "epoch": 0.3265483495280641, "grad_norm": 2.812149896558739, "learning_rate": 7.8659651227576e-07, "loss": 0.3314, "step": 18786 }, { "epoch": 0.32656573206556694, "grad_norm": 2.801926061511623, "learning_rate": 7.86573445681408e-07, "loss": 0.4216, "step": 18787 }, { "epoch": 0.32658311460306977, "grad_norm": 3.0859886529184113, "learning_rate": 7.865503781787497e-07, "loss": 0.3096, "step": 18788 }, { "epoch": 0.3266004971405726, "grad_norm": 0.9573772108335998, "learning_rate": 7.865273097678584e-07, "loss": 0.2999, "step": 18789 }, { "epoch": 0.32661787967807543, "grad_norm": 1.845259077809497, "learning_rate": 7.865042404488071e-07, "loss": 0.3011, "step": 18790 }, { "epoch": 0.3266352622155782, "grad_norm": 2.7380181029216524, "learning_rate": 7.864811702216692e-07, "loss": 0.4981, "step": 18791 }, { "epoch": 0.32665264475308103, "grad_norm": 2.045560708094498, "learning_rate": 7.864580990865173e-07, "loss": 0.338, "step": 18792 }, { "epoch": 0.32667002729058386, "grad_norm": 1.698343220450798, "learning_rate": 7.86435027043425e-07, "loss": 0.5221, "step": 18793 }, { "epoch": 0.3266874098280867, "grad_norm": 2.81830857756976, "learning_rate": 7.86411954092465e-07, "loss": 0.3467, "step": 18794 }, { "epoch": 0.3267047923655895, "grad_norm": 2.388428253080511, "learning_rate": 7.863888802337109e-07, "loss": 0.4354, "step": 18795 }, { "epoch": 0.32672217490309235, "grad_norm": 1.959104684312011, "learning_rate": 7.863658054672355e-07, "loss": 0.4164, "step": 18796 }, { "epoch": 0.3267395574405952, "grad_norm": 3.493943475075917, "learning_rate": 7.863427297931121e-07, "loss": 0.4903, "step": 18797 }, { "epoch": 0.326756939978098, "grad_norm": 1.1218711113016218, "learning_rate": 7.863196532114138e-07, "loss": 0.2777, "step": 18798 }, { "epoch": 0.32677432251560085, "grad_norm": 1.4583070316230518, "learning_rate": 7.862965757222137e-07, "loss": 0.4903, "step": 18799 }, { "epoch": 0.3267917050531037, "grad_norm": 1.8327808916587103, "learning_rate": 7.86273497325585e-07, "loss": 0.4803, "step": 18800 }, { "epoch": 0.32680908759060645, "grad_norm": 1.7502461126826114, "learning_rate": 7.862504180216007e-07, "loss": 0.4368, "step": 18801 }, { "epoch": 0.3268264701281093, "grad_norm": 2.4359685347678566, "learning_rate": 7.862273378103341e-07, "loss": 0.3786, "step": 18802 }, { "epoch": 0.3268438526656121, "grad_norm": 1.1955044828470782, "learning_rate": 7.862042566918584e-07, "loss": 0.3074, "step": 18803 }, { "epoch": 0.32686123520311494, "grad_norm": 2.75747626787951, "learning_rate": 7.861811746662468e-07, "loss": 0.3627, "step": 18804 }, { "epoch": 0.32687861774061777, "grad_norm": 1.1825213874623384, "learning_rate": 7.86158091733572e-07, "loss": 0.3827, "step": 18805 }, { "epoch": 0.3268960002781206, "grad_norm": 2.294404779210857, "learning_rate": 7.861350078939078e-07, "loss": 0.2619, "step": 18806 }, { "epoch": 0.32691338281562343, "grad_norm": 1.464711662211549, "learning_rate": 7.86111923147327e-07, "loss": 0.25, "step": 18807 }, { "epoch": 0.32693076535312626, "grad_norm": 1.467247494559363, "learning_rate": 7.860888374939027e-07, "loss": 0.1803, "step": 18808 }, { "epoch": 0.3269481478906291, "grad_norm": 2.385202782683804, "learning_rate": 7.860657509337083e-07, "loss": 0.2033, "step": 18809 }, { "epoch": 0.3269655304281319, "grad_norm": 1.939390068469025, "learning_rate": 7.860426634668169e-07, "loss": 0.3843, "step": 18810 }, { "epoch": 0.3269829129656347, "grad_norm": 3.4641485206225804, "learning_rate": 7.860195750933017e-07, "loss": 0.487, "step": 18811 }, { "epoch": 0.32700029550313753, "grad_norm": 2.5263199428845518, "learning_rate": 7.859964858132355e-07, "loss": 0.3108, "step": 18812 }, { "epoch": 0.32701767804064036, "grad_norm": 2.0770760512655553, "learning_rate": 7.859733956266923e-07, "loss": 0.4295, "step": 18813 }, { "epoch": 0.3270350605781432, "grad_norm": 1.4030799377147127, "learning_rate": 7.859503045337445e-07, "loss": 0.259, "step": 18814 }, { "epoch": 0.327052443115646, "grad_norm": 1.5246203854567675, "learning_rate": 7.859272125344656e-07, "loss": 0.2312, "step": 18815 }, { "epoch": 0.32706982565314885, "grad_norm": 1.8683725389235686, "learning_rate": 7.859041196289288e-07, "loss": 0.3518, "step": 18816 }, { "epoch": 0.3270872081906517, "grad_norm": 1.5821906266768901, "learning_rate": 7.858810258172074e-07, "loss": 0.3032, "step": 18817 }, { "epoch": 0.3271045907281545, "grad_norm": 1.3485041981371226, "learning_rate": 7.858579310993743e-07, "loss": 0.3893, "step": 18818 }, { "epoch": 0.32712197326565734, "grad_norm": 1.3771134711059252, "learning_rate": 7.858348354755029e-07, "loss": 0.2174, "step": 18819 }, { "epoch": 0.32713935580316017, "grad_norm": 1.844364708145331, "learning_rate": 7.858117389456664e-07, "loss": 0.2389, "step": 18820 }, { "epoch": 0.32715673834066294, "grad_norm": 3.0499061691561735, "learning_rate": 7.857886415099381e-07, "loss": 0.7193, "step": 18821 }, { "epoch": 0.3271741208781658, "grad_norm": 1.3812313591671026, "learning_rate": 7.857655431683909e-07, "loss": 0.2916, "step": 18822 }, { "epoch": 0.3271915034156686, "grad_norm": 2.0807982119400434, "learning_rate": 7.857424439210984e-07, "loss": 0.4601, "step": 18823 }, { "epoch": 0.32720888595317144, "grad_norm": 2.5633261006464774, "learning_rate": 7.857193437681334e-07, "loss": 0.5626, "step": 18824 }, { "epoch": 0.32722626849067427, "grad_norm": 1.2658610442087346, "learning_rate": 7.856962427095694e-07, "loss": 0.2273, "step": 18825 }, { "epoch": 0.3272436510281771, "grad_norm": 1.6750204047880577, "learning_rate": 7.856731407454798e-07, "loss": 0.4536, "step": 18826 }, { "epoch": 0.3272610335656799, "grad_norm": 3.608783449197374, "learning_rate": 7.856500378759372e-07, "loss": 0.469, "step": 18827 }, { "epoch": 0.32727841610318276, "grad_norm": 1.876898811775216, "learning_rate": 7.856269341010154e-07, "loss": 0.3575, "step": 18828 }, { "epoch": 0.3272957986406856, "grad_norm": 1.856324038962773, "learning_rate": 7.856038294207874e-07, "loss": 0.2818, "step": 18829 }, { "epoch": 0.3273131811781884, "grad_norm": 1.2978960106105988, "learning_rate": 7.855807238353263e-07, "loss": 0.3275, "step": 18830 }, { "epoch": 0.3273305637156912, "grad_norm": 2.473017763953956, "learning_rate": 7.855576173447056e-07, "loss": 0.4387, "step": 18831 }, { "epoch": 0.327347946253194, "grad_norm": 1.6282199060030282, "learning_rate": 7.855345099489985e-07, "loss": 0.3196, "step": 18832 }, { "epoch": 0.32736532879069685, "grad_norm": 1.1410110122369495, "learning_rate": 7.855114016482783e-07, "loss": 0.3631, "step": 18833 }, { "epoch": 0.3273827113281997, "grad_norm": 1.5263480782433179, "learning_rate": 7.854882924426178e-07, "loss": 0.3218, "step": 18834 }, { "epoch": 0.3274000938657025, "grad_norm": 1.2966634911826365, "learning_rate": 7.854651823320905e-07, "loss": 0.2061, "step": 18835 }, { "epoch": 0.32741747640320534, "grad_norm": 2.115767773326864, "learning_rate": 7.8544207131677e-07, "loss": 0.2815, "step": 18836 }, { "epoch": 0.3274348589407082, "grad_norm": 2.0789543801874317, "learning_rate": 7.85418959396729e-07, "loss": 0.3121, "step": 18837 }, { "epoch": 0.327452241478211, "grad_norm": 1.5336373959702778, "learning_rate": 7.853958465720413e-07, "loss": 0.3897, "step": 18838 }, { "epoch": 0.32746962401571383, "grad_norm": 2.995212053231688, "learning_rate": 7.853727328427797e-07, "loss": 0.5324, "step": 18839 }, { "epoch": 0.32748700655321666, "grad_norm": 2.5751494753315756, "learning_rate": 7.853496182090177e-07, "loss": 0.3509, "step": 18840 }, { "epoch": 0.32750438909071944, "grad_norm": 2.239634354924575, "learning_rate": 7.853265026708284e-07, "loss": 0.422, "step": 18841 }, { "epoch": 0.32752177162822227, "grad_norm": 1.6950128620837188, "learning_rate": 7.853033862282851e-07, "loss": 0.2816, "step": 18842 }, { "epoch": 0.3275391541657251, "grad_norm": 1.9020861949939856, "learning_rate": 7.852802688814613e-07, "loss": 0.3507, "step": 18843 }, { "epoch": 0.32755653670322793, "grad_norm": 2.4125120750834337, "learning_rate": 7.8525715063043e-07, "loss": 0.4086, "step": 18844 }, { "epoch": 0.32757391924073076, "grad_norm": 1.6665832663450186, "learning_rate": 7.852340314752646e-07, "loss": 0.486, "step": 18845 }, { "epoch": 0.3275913017782336, "grad_norm": 1.6476109799262404, "learning_rate": 7.852109114160383e-07, "loss": 0.2031, "step": 18846 }, { "epoch": 0.3276086843157364, "grad_norm": 1.551808045417586, "learning_rate": 7.851877904528243e-07, "loss": 0.3275, "step": 18847 }, { "epoch": 0.32762606685323925, "grad_norm": 1.6709663297478137, "learning_rate": 7.851646685856962e-07, "loss": 0.3568, "step": 18848 }, { "epoch": 0.3276434493907421, "grad_norm": 2.030125451820611, "learning_rate": 7.851415458147272e-07, "loss": 0.3526, "step": 18849 }, { "epoch": 0.3276608319282449, "grad_norm": 1.634962351870516, "learning_rate": 7.851184221399902e-07, "loss": 0.5101, "step": 18850 }, { "epoch": 0.3276782144657477, "grad_norm": 3.761594133984062, "learning_rate": 7.85095297561559e-07, "loss": 0.4442, "step": 18851 }, { "epoch": 0.3276955970032505, "grad_norm": 1.3177987472828714, "learning_rate": 7.850721720795066e-07, "loss": 0.2186, "step": 18852 }, { "epoch": 0.32771297954075335, "grad_norm": 1.899535743506899, "learning_rate": 7.850490456939063e-07, "loss": 0.3043, "step": 18853 }, { "epoch": 0.3277303620782562, "grad_norm": 1.3514915366967375, "learning_rate": 7.850259184048315e-07, "loss": 0.5341, "step": 18854 }, { "epoch": 0.327747744615759, "grad_norm": 1.609716568557276, "learning_rate": 7.850027902123555e-07, "loss": 0.3859, "step": 18855 }, { "epoch": 0.32776512715326184, "grad_norm": 1.7712300406300319, "learning_rate": 7.849796611165514e-07, "loss": 0.5119, "step": 18856 }, { "epoch": 0.32778250969076467, "grad_norm": 2.4563131511136396, "learning_rate": 7.849565311174929e-07, "loss": 0.6579, "step": 18857 }, { "epoch": 0.3277998922282675, "grad_norm": 3.3868403161803657, "learning_rate": 7.84933400215253e-07, "loss": 0.5188, "step": 18858 }, { "epoch": 0.32781727476577033, "grad_norm": 1.532419993634547, "learning_rate": 7.849102684099052e-07, "loss": 0.3336, "step": 18859 }, { "epoch": 0.32783465730327316, "grad_norm": 2.4842563877097725, "learning_rate": 7.848871357015226e-07, "loss": 0.2492, "step": 18860 }, { "epoch": 0.32785203984077593, "grad_norm": 4.056291910362199, "learning_rate": 7.848640020901788e-07, "loss": 0.3806, "step": 18861 }, { "epoch": 0.32786942237827876, "grad_norm": 1.3048596048362027, "learning_rate": 7.848408675759468e-07, "loss": 0.2887, "step": 18862 }, { "epoch": 0.3278868049157816, "grad_norm": 2.713669919119856, "learning_rate": 7.848177321589002e-07, "loss": 0.3612, "step": 18863 }, { "epoch": 0.3279041874532844, "grad_norm": 1.7024926762911496, "learning_rate": 7.847945958391122e-07, "loss": 0.3164, "step": 18864 }, { "epoch": 0.32792156999078725, "grad_norm": 1.4791558864008696, "learning_rate": 7.847714586166562e-07, "loss": 0.2806, "step": 18865 }, { "epoch": 0.3279389525282901, "grad_norm": 2.2263688121323275, "learning_rate": 7.847483204916055e-07, "loss": 0.3432, "step": 18866 }, { "epoch": 0.3279563350657929, "grad_norm": 2.1421960771622732, "learning_rate": 7.847251814640333e-07, "loss": 0.4157, "step": 18867 }, { "epoch": 0.32797371760329574, "grad_norm": 1.401522005511642, "learning_rate": 7.847020415340131e-07, "loss": 0.2647, "step": 18868 }, { "epoch": 0.3279911001407986, "grad_norm": 2.166817385610449, "learning_rate": 7.846789007016182e-07, "loss": 0.4507, "step": 18869 }, { "epoch": 0.3280084826783014, "grad_norm": 1.4908226171572652, "learning_rate": 7.84655758966922e-07, "loss": 0.441, "step": 18870 }, { "epoch": 0.3280258652158042, "grad_norm": 1.7210902086137392, "learning_rate": 7.846326163299979e-07, "loss": 0.5158, "step": 18871 }, { "epoch": 0.328043247753307, "grad_norm": 2.101220995506634, "learning_rate": 7.846094727909189e-07, "loss": 0.4984, "step": 18872 }, { "epoch": 0.32806063029080984, "grad_norm": 1.6172066088011225, "learning_rate": 7.845863283497589e-07, "loss": 0.3098, "step": 18873 }, { "epoch": 0.32807801282831267, "grad_norm": 1.4556187212155516, "learning_rate": 7.845631830065909e-07, "loss": 0.3283, "step": 18874 }, { "epoch": 0.3280953953658155, "grad_norm": 1.2138155124268446, "learning_rate": 7.845400367614882e-07, "loss": 0.2955, "step": 18875 }, { "epoch": 0.32811277790331833, "grad_norm": 2.34218518130454, "learning_rate": 7.845168896145242e-07, "loss": 0.2893, "step": 18876 }, { "epoch": 0.32813016044082116, "grad_norm": 1.4052057870363241, "learning_rate": 7.844937415657726e-07, "loss": 0.2223, "step": 18877 }, { "epoch": 0.328147542978324, "grad_norm": 2.407578466225476, "learning_rate": 7.844705926153066e-07, "loss": 0.4967, "step": 18878 }, { "epoch": 0.3281649255158268, "grad_norm": 3.2199718156688912, "learning_rate": 7.844474427631992e-07, "loss": 0.3578, "step": 18879 }, { "epoch": 0.32818230805332965, "grad_norm": 2.4268429272396617, "learning_rate": 7.844242920095242e-07, "loss": 0.278, "step": 18880 }, { "epoch": 0.3281996905908324, "grad_norm": 1.6814776497793231, "learning_rate": 7.844011403543549e-07, "loss": 0.3233, "step": 18881 }, { "epoch": 0.32821707312833526, "grad_norm": 1.6616802502503352, "learning_rate": 7.843779877977646e-07, "loss": 0.3909, "step": 18882 }, { "epoch": 0.3282344556658381, "grad_norm": 1.95459788286306, "learning_rate": 7.843548343398267e-07, "loss": 0.4554, "step": 18883 }, { "epoch": 0.3282518382033409, "grad_norm": 1.9722367410327943, "learning_rate": 7.843316799806146e-07, "loss": 0.297, "step": 18884 }, { "epoch": 0.32826922074084375, "grad_norm": 1.769192890583172, "learning_rate": 7.843085247202015e-07, "loss": 0.375, "step": 18885 }, { "epoch": 0.3282866032783466, "grad_norm": 4.610175864985294, "learning_rate": 7.842853685586612e-07, "loss": 0.2531, "step": 18886 }, { "epoch": 0.3283039858158494, "grad_norm": 1.7100761276698477, "learning_rate": 7.842622114960668e-07, "loss": 0.3525, "step": 18887 }, { "epoch": 0.32832136835335224, "grad_norm": 1.5537541553047394, "learning_rate": 7.842390535324919e-07, "loss": 0.4671, "step": 18888 }, { "epoch": 0.32833875089085507, "grad_norm": 1.7310855034818993, "learning_rate": 7.842158946680097e-07, "loss": 0.3685, "step": 18889 }, { "epoch": 0.3283561334283579, "grad_norm": 2.3002599121009335, "learning_rate": 7.841927349026935e-07, "loss": 0.3597, "step": 18890 }, { "epoch": 0.3283735159658607, "grad_norm": 2.4852174150338846, "learning_rate": 7.84169574236617e-07, "loss": 0.3601, "step": 18891 }, { "epoch": 0.3283908985033635, "grad_norm": 1.8579923299903167, "learning_rate": 7.841464126698536e-07, "loss": 0.4244, "step": 18892 }, { "epoch": 0.32840828104086633, "grad_norm": 1.3725759207829165, "learning_rate": 7.841232502024764e-07, "loss": 0.218, "step": 18893 }, { "epoch": 0.32842566357836916, "grad_norm": 1.5098334233254525, "learning_rate": 7.84100086834559e-07, "loss": 0.1938, "step": 18894 }, { "epoch": 0.328443046115872, "grad_norm": 1.2182409204242983, "learning_rate": 7.840769225661751e-07, "loss": 0.3959, "step": 18895 }, { "epoch": 0.3284604286533748, "grad_norm": 2.709380213066734, "learning_rate": 7.840537573973978e-07, "loss": 0.3559, "step": 18896 }, { "epoch": 0.32847781119087766, "grad_norm": 1.4234516703864406, "learning_rate": 7.840305913283003e-07, "loss": 0.379, "step": 18897 }, { "epoch": 0.3284951937283805, "grad_norm": 1.6000598913318684, "learning_rate": 7.840074243589565e-07, "loss": 0.2037, "step": 18898 }, { "epoch": 0.3285125762658833, "grad_norm": 1.4498749928466148, "learning_rate": 7.839842564894397e-07, "loss": 0.2455, "step": 18899 }, { "epoch": 0.3285299588033861, "grad_norm": 2.532085047986628, "learning_rate": 7.83961087719823e-07, "loss": 0.2812, "step": 18900 }, { "epoch": 0.3285473413408889, "grad_norm": 1.3386989627247081, "learning_rate": 7.839379180501802e-07, "loss": 0.322, "step": 18901 }, { "epoch": 0.32856472387839175, "grad_norm": 1.8158609721411993, "learning_rate": 7.839147474805847e-07, "loss": 0.2052, "step": 18902 }, { "epoch": 0.3285821064158946, "grad_norm": 1.550535778720957, "learning_rate": 7.838915760111099e-07, "loss": 0.2853, "step": 18903 }, { "epoch": 0.3285994889533974, "grad_norm": 2.849615765759585, "learning_rate": 7.83868403641829e-07, "loss": 0.3446, "step": 18904 }, { "epoch": 0.32861687149090024, "grad_norm": 2.9613506055385255, "learning_rate": 7.83845230372816e-07, "loss": 0.2819, "step": 18905 }, { "epoch": 0.32863425402840307, "grad_norm": 3.494792542041347, "learning_rate": 7.838220562041438e-07, "loss": 0.2939, "step": 18906 }, { "epoch": 0.3286516365659059, "grad_norm": 0.9835896714273574, "learning_rate": 7.83798881135886e-07, "loss": 0.3939, "step": 18907 }, { "epoch": 0.32866901910340873, "grad_norm": 1.1555159903608543, "learning_rate": 7.837757051681162e-07, "loss": 0.334, "step": 18908 }, { "epoch": 0.32868640164091156, "grad_norm": 1.8431180229311197, "learning_rate": 7.837525283009078e-07, "loss": 0.3403, "step": 18909 }, { "epoch": 0.32870378417841434, "grad_norm": 1.882913509352012, "learning_rate": 7.837293505343342e-07, "loss": 0.4669, "step": 18910 }, { "epoch": 0.32872116671591717, "grad_norm": 1.167963339036121, "learning_rate": 7.837061718684689e-07, "loss": 0.2768, "step": 18911 }, { "epoch": 0.32873854925342, "grad_norm": 1.5805220570881415, "learning_rate": 7.836829923033853e-07, "loss": 0.3212, "step": 18912 }, { "epoch": 0.32875593179092283, "grad_norm": 1.6192104404819554, "learning_rate": 7.836598118391568e-07, "loss": 0.2921, "step": 18913 }, { "epoch": 0.32877331432842566, "grad_norm": 2.8858357728959825, "learning_rate": 7.836366304758573e-07, "loss": 0.4282, "step": 18914 }, { "epoch": 0.3287906968659285, "grad_norm": 2.0629008794747326, "learning_rate": 7.836134482135599e-07, "loss": 0.4083, "step": 18915 }, { "epoch": 0.3288080794034313, "grad_norm": 2.968164568184417, "learning_rate": 7.83590265052338e-07, "loss": 0.3362, "step": 18916 }, { "epoch": 0.32882546194093415, "grad_norm": 1.7822902098924664, "learning_rate": 7.835670809922654e-07, "loss": 0.2795, "step": 18917 }, { "epoch": 0.328842844478437, "grad_norm": 2.273392931698539, "learning_rate": 7.835438960334153e-07, "loss": 0.4994, "step": 18918 }, { "epoch": 0.3288602270159398, "grad_norm": 2.1069900986986982, "learning_rate": 7.835207101758615e-07, "loss": 0.3683, "step": 18919 }, { "epoch": 0.3288776095534426, "grad_norm": 1.3197108680263039, "learning_rate": 7.83497523419677e-07, "loss": 0.4771, "step": 18920 }, { "epoch": 0.3288949920909454, "grad_norm": 1.5561661310151393, "learning_rate": 7.834743357649357e-07, "loss": 0.1966, "step": 18921 }, { "epoch": 0.32891237462844825, "grad_norm": 2.608845360202673, "learning_rate": 7.834511472117109e-07, "loss": 0.3722, "step": 18922 }, { "epoch": 0.3289297571659511, "grad_norm": 1.753740316496587, "learning_rate": 7.834279577600763e-07, "loss": 0.3199, "step": 18923 }, { "epoch": 0.3289471397034539, "grad_norm": 1.4899466909377856, "learning_rate": 7.834047674101052e-07, "loss": 0.3154, "step": 18924 }, { "epoch": 0.32896452224095674, "grad_norm": 4.029995513028457, "learning_rate": 7.833815761618711e-07, "loss": 0.3187, "step": 18925 }, { "epoch": 0.32898190477845957, "grad_norm": 2.7463928662771435, "learning_rate": 7.833583840154479e-07, "loss": 0.474, "step": 18926 }, { "epoch": 0.3289992873159624, "grad_norm": 2.304719238816844, "learning_rate": 7.833351909709085e-07, "loss": 0.2838, "step": 18927 }, { "epoch": 0.3290166698534652, "grad_norm": 3.257214875932645, "learning_rate": 7.833119970283268e-07, "loss": 0.4503, "step": 18928 }, { "epoch": 0.32903405239096806, "grad_norm": 1.591830931753142, "learning_rate": 7.83288802187776e-07, "loss": 0.2987, "step": 18929 }, { "epoch": 0.32905143492847083, "grad_norm": 1.6893098892100973, "learning_rate": 7.832656064493302e-07, "loss": 0.3557, "step": 18930 }, { "epoch": 0.32906881746597366, "grad_norm": 2.0530420788768367, "learning_rate": 7.832424098130622e-07, "loss": 0.3306, "step": 18931 }, { "epoch": 0.3290862000034765, "grad_norm": 1.224289865763637, "learning_rate": 7.83219212279046e-07, "loss": 0.1443, "step": 18932 }, { "epoch": 0.3291035825409793, "grad_norm": 1.7439500447324183, "learning_rate": 7.83196013847355e-07, "loss": 0.432, "step": 18933 }, { "epoch": 0.32912096507848215, "grad_norm": 1.7051523276989475, "learning_rate": 7.831728145180627e-07, "loss": 0.2756, "step": 18934 }, { "epoch": 0.329138347615985, "grad_norm": 2.1199437509447656, "learning_rate": 7.831496142912428e-07, "loss": 0.2266, "step": 18935 }, { "epoch": 0.3291557301534878, "grad_norm": 2.4749158674496603, "learning_rate": 7.831264131669686e-07, "loss": 0.532, "step": 18936 }, { "epoch": 0.32917311269099064, "grad_norm": 1.7352274206337137, "learning_rate": 7.831032111453137e-07, "loss": 0.4177, "step": 18937 }, { "epoch": 0.3291904952284935, "grad_norm": 1.4698897814905538, "learning_rate": 7.830800082263516e-07, "loss": 0.5866, "step": 18938 }, { "epoch": 0.3292078777659963, "grad_norm": 3.18647881355054, "learning_rate": 7.83056804410156e-07, "loss": 0.4717, "step": 18939 }, { "epoch": 0.3292252603034991, "grad_norm": 1.188726390047473, "learning_rate": 7.830335996968004e-07, "loss": 0.2177, "step": 18940 }, { "epoch": 0.3292426428410019, "grad_norm": 2.6341366654462806, "learning_rate": 7.830103940863582e-07, "loss": 0.3854, "step": 18941 }, { "epoch": 0.32926002537850474, "grad_norm": 1.1509446793223603, "learning_rate": 7.829871875789031e-07, "loss": 0.3249, "step": 18942 }, { "epoch": 0.32927740791600757, "grad_norm": 1.688689382944416, "learning_rate": 7.829639801745087e-07, "loss": 0.3556, "step": 18943 }, { "epoch": 0.3292947904535104, "grad_norm": 1.759622862102842, "learning_rate": 7.829407718732483e-07, "loss": 0.3329, "step": 18944 }, { "epoch": 0.32931217299101323, "grad_norm": 1.4158386128321694, "learning_rate": 7.829175626751957e-07, "loss": 0.3415, "step": 18945 }, { "epoch": 0.32932955552851606, "grad_norm": 1.8955807463193866, "learning_rate": 7.828943525804245e-07, "loss": 0.3163, "step": 18946 }, { "epoch": 0.3293469380660189, "grad_norm": 1.5022942663624215, "learning_rate": 7.82871141589008e-07, "loss": 0.2513, "step": 18947 }, { "epoch": 0.3293643206035217, "grad_norm": 1.678055364708261, "learning_rate": 7.8284792970102e-07, "loss": 0.4433, "step": 18948 }, { "epoch": 0.32938170314102455, "grad_norm": 1.6696461835205136, "learning_rate": 7.828247169165339e-07, "loss": 0.2864, "step": 18949 }, { "epoch": 0.3293990856785273, "grad_norm": 1.453185042791218, "learning_rate": 7.828015032356235e-07, "loss": 0.2451, "step": 18950 }, { "epoch": 0.32941646821603016, "grad_norm": 2.6867161065172867, "learning_rate": 7.82778288658362e-07, "loss": 0.518, "step": 18951 }, { "epoch": 0.329433850753533, "grad_norm": 1.193738097130419, "learning_rate": 7.827550731848236e-07, "loss": 0.3135, "step": 18952 }, { "epoch": 0.3294512332910358, "grad_norm": 1.2651398426011387, "learning_rate": 7.827318568150813e-07, "loss": 0.3448, "step": 18953 }, { "epoch": 0.32946861582853865, "grad_norm": 1.7638891274315098, "learning_rate": 7.827086395492089e-07, "loss": 0.3725, "step": 18954 }, { "epoch": 0.3294859983660415, "grad_norm": 2.1005078605981913, "learning_rate": 7.826854213872798e-07, "loss": 0.4081, "step": 18955 }, { "epoch": 0.3295033809035443, "grad_norm": 1.9197622775398837, "learning_rate": 7.826622023293681e-07, "loss": 0.4665, "step": 18956 }, { "epoch": 0.32952076344104714, "grad_norm": 1.5453671469658863, "learning_rate": 7.826389823755469e-07, "loss": 0.3228, "step": 18957 }, { "epoch": 0.32953814597854997, "grad_norm": 1.6708303200392636, "learning_rate": 7.826157615258899e-07, "loss": 0.2957, "step": 18958 }, { "epoch": 0.3295555285160528, "grad_norm": 2.7759975351901325, "learning_rate": 7.825925397804708e-07, "loss": 0.3856, "step": 18959 }, { "epoch": 0.3295729110535556, "grad_norm": 1.6606173480313693, "learning_rate": 7.825693171393633e-07, "loss": 0.2772, "step": 18960 }, { "epoch": 0.3295902935910584, "grad_norm": 1.4413166230666392, "learning_rate": 7.825460936026406e-07, "loss": 0.4429, "step": 18961 }, { "epoch": 0.32960767612856123, "grad_norm": 2.0742002588310426, "learning_rate": 7.825228691703768e-07, "loss": 0.3521, "step": 18962 }, { "epoch": 0.32962505866606406, "grad_norm": 2.2267904758616206, "learning_rate": 7.824996438426452e-07, "loss": 0.36, "step": 18963 }, { "epoch": 0.3296424412035669, "grad_norm": 0.8978994082443199, "learning_rate": 7.824764176195194e-07, "loss": 0.5367, "step": 18964 }, { "epoch": 0.3296598237410697, "grad_norm": 1.5057502014328574, "learning_rate": 7.824531905010733e-07, "loss": 0.3082, "step": 18965 }, { "epoch": 0.32967720627857255, "grad_norm": 1.5761820043610577, "learning_rate": 7.824299624873803e-07, "loss": 0.3171, "step": 18966 }, { "epoch": 0.3296945888160754, "grad_norm": 1.3098597535076386, "learning_rate": 7.82406733578514e-07, "loss": 0.2965, "step": 18967 }, { "epoch": 0.3297119713535782, "grad_norm": 3.76752415403293, "learning_rate": 7.823835037745481e-07, "loss": 0.2113, "step": 18968 }, { "epoch": 0.32972935389108105, "grad_norm": 1.5795740235679352, "learning_rate": 7.823602730755561e-07, "loss": 0.2369, "step": 18969 }, { "epoch": 0.3297467364285838, "grad_norm": 3.0703590080755165, "learning_rate": 7.823370414816119e-07, "loss": 0.4981, "step": 18970 }, { "epoch": 0.32976411896608665, "grad_norm": 1.3488621199803825, "learning_rate": 7.82313808992789e-07, "loss": 0.2696, "step": 18971 }, { "epoch": 0.3297815015035895, "grad_norm": 2.4722206251453676, "learning_rate": 7.822905756091608e-07, "loss": 0.2922, "step": 18972 }, { "epoch": 0.3297988840410923, "grad_norm": 1.3714168764094732, "learning_rate": 7.822673413308012e-07, "loss": 0.322, "step": 18973 }, { "epoch": 0.32981626657859514, "grad_norm": 1.0924000241260874, "learning_rate": 7.822441061577839e-07, "loss": 0.545, "step": 18974 }, { "epoch": 0.32983364911609797, "grad_norm": 1.8429425846164798, "learning_rate": 7.822208700901824e-07, "loss": 0.297, "step": 18975 }, { "epoch": 0.3298510316536008, "grad_norm": 3.128875138464711, "learning_rate": 7.821976331280704e-07, "loss": 0.4549, "step": 18976 }, { "epoch": 0.32986841419110363, "grad_norm": 1.355519706079662, "learning_rate": 7.821743952715215e-07, "loss": 0.2601, "step": 18977 }, { "epoch": 0.32988579672860646, "grad_norm": 1.762256919928789, "learning_rate": 7.821511565206095e-07, "loss": 0.2845, "step": 18978 }, { "epoch": 0.3299031792661093, "grad_norm": 2.177807735726304, "learning_rate": 7.821279168754078e-07, "loss": 0.6195, "step": 18979 }, { "epoch": 0.32992056180361207, "grad_norm": 1.4771113423597038, "learning_rate": 7.821046763359902e-07, "loss": 0.2593, "step": 18980 }, { "epoch": 0.3299379443411149, "grad_norm": 1.6383773970219988, "learning_rate": 7.820814349024305e-07, "loss": 0.1626, "step": 18981 }, { "epoch": 0.3299553268786177, "grad_norm": 2.2641155703532654, "learning_rate": 7.820581925748021e-07, "loss": 0.3051, "step": 18982 }, { "epoch": 0.32997270941612056, "grad_norm": 1.7643728422169898, "learning_rate": 7.820349493531788e-07, "loss": 0.2492, "step": 18983 }, { "epoch": 0.3299900919536234, "grad_norm": 1.4257323099189327, "learning_rate": 7.820117052376344e-07, "loss": 0.2444, "step": 18984 }, { "epoch": 0.3300074744911262, "grad_norm": 1.3057233199628293, "learning_rate": 7.819884602282423e-07, "loss": 0.2076, "step": 18985 }, { "epoch": 0.33002485702862905, "grad_norm": 1.4646350385892537, "learning_rate": 7.819652143250763e-07, "loss": 0.3122, "step": 18986 }, { "epoch": 0.3300422395661319, "grad_norm": 2.517086336512277, "learning_rate": 7.819419675282102e-07, "loss": 0.2717, "step": 18987 }, { "epoch": 0.3300596221036347, "grad_norm": 1.8580612664744045, "learning_rate": 7.819187198377176e-07, "loss": 0.3218, "step": 18988 }, { "epoch": 0.33007700464113754, "grad_norm": 1.7353949891173344, "learning_rate": 7.81895471253672e-07, "loss": 0.472, "step": 18989 }, { "epoch": 0.3300943871786403, "grad_norm": 2.130575333417961, "learning_rate": 7.818722217761475e-07, "loss": 0.3474, "step": 18990 }, { "epoch": 0.33011176971614314, "grad_norm": 1.606376728437736, "learning_rate": 7.818489714052173e-07, "loss": 0.1558, "step": 18991 }, { "epoch": 0.330129152253646, "grad_norm": 0.8834079337079532, "learning_rate": 7.818257201409555e-07, "loss": 0.1761, "step": 18992 }, { "epoch": 0.3301465347911488, "grad_norm": 1.3163173440565763, "learning_rate": 7.818024679834355e-07, "loss": 0.4019, "step": 18993 }, { "epoch": 0.33016391732865163, "grad_norm": 1.5575360151219668, "learning_rate": 7.817792149327311e-07, "loss": 0.5518, "step": 18994 }, { "epoch": 0.33018129986615447, "grad_norm": 1.1684627371819305, "learning_rate": 7.817559609889161e-07, "loss": 0.3057, "step": 18995 }, { "epoch": 0.3301986824036573, "grad_norm": 1.8173939155760572, "learning_rate": 7.817327061520642e-07, "loss": 0.3823, "step": 18996 }, { "epoch": 0.3302160649411601, "grad_norm": 1.560785322915115, "learning_rate": 7.817094504222489e-07, "loss": 0.2504, "step": 18997 }, { "epoch": 0.33023344747866296, "grad_norm": 1.3919534915400684, "learning_rate": 7.81686193799544e-07, "loss": 0.3754, "step": 18998 }, { "epoch": 0.3302508300161658, "grad_norm": 1.8133664018716176, "learning_rate": 7.816629362840235e-07, "loss": 0.4848, "step": 18999 }, { "epoch": 0.33026821255366856, "grad_norm": 1.951771253487975, "learning_rate": 7.816396778757608e-07, "loss": 0.5786, "step": 19000 }, { "epoch": 0.3302855950911714, "grad_norm": 1.2021732507912668, "learning_rate": 7.816164185748296e-07, "loss": 0.1232, "step": 19001 }, { "epoch": 0.3303029776286742, "grad_norm": 3.9182690374717954, "learning_rate": 7.815931583813038e-07, "loss": 0.2492, "step": 19002 }, { "epoch": 0.33032036016617705, "grad_norm": 3.1159450350016957, "learning_rate": 7.81569897295257e-07, "loss": 0.4729, "step": 19003 }, { "epoch": 0.3303377427036799, "grad_norm": 1.6611633688062248, "learning_rate": 7.81546635316763e-07, "loss": 0.4135, "step": 19004 }, { "epoch": 0.3303551252411827, "grad_norm": 2.1465537121974667, "learning_rate": 7.815233724458954e-07, "loss": 0.3775, "step": 19005 }, { "epoch": 0.33037250777868554, "grad_norm": 1.051792457761707, "learning_rate": 7.815001086827282e-07, "loss": 0.3257, "step": 19006 }, { "epoch": 0.3303898903161884, "grad_norm": 1.729202445394111, "learning_rate": 7.814768440273348e-07, "loss": 0.3704, "step": 19007 }, { "epoch": 0.3304072728536912, "grad_norm": 0.9283889303877815, "learning_rate": 7.814535784797891e-07, "loss": 0.2336, "step": 19008 }, { "epoch": 0.33042465539119403, "grad_norm": 2.0589596569985873, "learning_rate": 7.81430312040165e-07, "loss": 0.3657, "step": 19009 }, { "epoch": 0.3304420379286968, "grad_norm": 2.2724367131580467, "learning_rate": 7.81407044708536e-07, "loss": 0.3895, "step": 19010 }, { "epoch": 0.33045942046619964, "grad_norm": 1.8367799006053702, "learning_rate": 7.81383776484976e-07, "loss": 0.4484, "step": 19011 }, { "epoch": 0.33047680300370247, "grad_norm": 1.770301209000209, "learning_rate": 7.813605073695586e-07, "loss": 0.1139, "step": 19012 }, { "epoch": 0.3304941855412053, "grad_norm": 0.9812041297171852, "learning_rate": 7.813372373623576e-07, "loss": 0.3414, "step": 19013 }, { "epoch": 0.33051156807870813, "grad_norm": 1.7270571130689758, "learning_rate": 7.81313966463447e-07, "loss": 0.4554, "step": 19014 }, { "epoch": 0.33052895061621096, "grad_norm": 1.6968989887555561, "learning_rate": 7.812906946729001e-07, "loss": 0.3334, "step": 19015 }, { "epoch": 0.3305463331537138, "grad_norm": 2.0257878202510575, "learning_rate": 7.812674219907911e-07, "loss": 0.3116, "step": 19016 }, { "epoch": 0.3305637156912166, "grad_norm": 2.6410899168333866, "learning_rate": 7.812441484171935e-07, "loss": 0.4266, "step": 19017 }, { "epoch": 0.33058109822871945, "grad_norm": 2.166548635908431, "learning_rate": 7.81220873952181e-07, "loss": 0.2323, "step": 19018 }, { "epoch": 0.3305984807662223, "grad_norm": 1.006762718947616, "learning_rate": 7.811975985958279e-07, "loss": 0.2523, "step": 19019 }, { "epoch": 0.33061586330372505, "grad_norm": 2.9694583604939635, "learning_rate": 7.811743223482073e-07, "loss": 0.4347, "step": 19020 }, { "epoch": 0.3306332458412279, "grad_norm": 2.2598953775881396, "learning_rate": 7.811510452093932e-07, "loss": 0.719, "step": 19021 }, { "epoch": 0.3306506283787307, "grad_norm": 1.67576172289266, "learning_rate": 7.811277671794597e-07, "loss": 0.3578, "step": 19022 }, { "epoch": 0.33066801091623355, "grad_norm": 1.1017297886364623, "learning_rate": 7.8110448825848e-07, "loss": 0.3041, "step": 19023 }, { "epoch": 0.3306853934537364, "grad_norm": 2.1612631882859787, "learning_rate": 7.810812084465284e-07, "loss": 0.4304, "step": 19024 }, { "epoch": 0.3307027759912392, "grad_norm": 2.1962814257836767, "learning_rate": 7.810579277436784e-07, "loss": 0.4189, "step": 19025 }, { "epoch": 0.33072015852874204, "grad_norm": 2.1688137730386376, "learning_rate": 7.810346461500039e-07, "loss": 0.3465, "step": 19026 }, { "epoch": 0.33073754106624487, "grad_norm": 1.4545701713612573, "learning_rate": 7.810113636655787e-07, "loss": 0.4858, "step": 19027 }, { "epoch": 0.3307549236037477, "grad_norm": 1.2547720732325784, "learning_rate": 7.809880802904767e-07, "loss": 0.386, "step": 19028 }, { "epoch": 0.3307723061412505, "grad_norm": 1.178575895969217, "learning_rate": 7.809647960247713e-07, "loss": 0.2416, "step": 19029 }, { "epoch": 0.3307896886787533, "grad_norm": 1.7753001227853749, "learning_rate": 7.809415108685367e-07, "loss": 0.313, "step": 19030 }, { "epoch": 0.33080707121625613, "grad_norm": 2.1912525033745247, "learning_rate": 7.809182248218466e-07, "loss": 0.401, "step": 19031 }, { "epoch": 0.33082445375375896, "grad_norm": 2.6958797593718575, "learning_rate": 7.808949378847747e-07, "loss": 0.2419, "step": 19032 }, { "epoch": 0.3308418362912618, "grad_norm": 1.519483878508797, "learning_rate": 7.808716500573948e-07, "loss": 0.2529, "step": 19033 }, { "epoch": 0.3308592188287646, "grad_norm": 2.3095056229723654, "learning_rate": 7.808483613397808e-07, "loss": 0.324, "step": 19034 }, { "epoch": 0.33087660136626745, "grad_norm": 1.8010046756032865, "learning_rate": 7.808250717320066e-07, "loss": 0.3432, "step": 19035 }, { "epoch": 0.3308939839037703, "grad_norm": 1.988046669535785, "learning_rate": 7.80801781234146e-07, "loss": 0.4405, "step": 19036 }, { "epoch": 0.3309113664412731, "grad_norm": 2.376345923701517, "learning_rate": 7.807784898462726e-07, "loss": 0.3973, "step": 19037 }, { "epoch": 0.33092874897877594, "grad_norm": 1.7960149061893347, "learning_rate": 7.807551975684605e-07, "loss": 0.2694, "step": 19038 }, { "epoch": 0.3309461315162787, "grad_norm": 1.2874172235454453, "learning_rate": 7.807319044007832e-07, "loss": 0.3009, "step": 19039 }, { "epoch": 0.33096351405378155, "grad_norm": 1.9483522390775454, "learning_rate": 7.807086103433149e-07, "loss": 0.3157, "step": 19040 }, { "epoch": 0.3309808965912844, "grad_norm": 1.0321048792369214, "learning_rate": 7.806853153961292e-07, "loss": 0.345, "step": 19041 }, { "epoch": 0.3309982791287872, "grad_norm": 2.377898089808868, "learning_rate": 7.806620195592998e-07, "loss": 0.2972, "step": 19042 }, { "epoch": 0.33101566166629004, "grad_norm": 2.3271615139595188, "learning_rate": 7.80638722832901e-07, "loss": 0.4124, "step": 19043 }, { "epoch": 0.33103304420379287, "grad_norm": 2.6680717386856925, "learning_rate": 7.806154252170063e-07, "loss": 0.5324, "step": 19044 }, { "epoch": 0.3310504267412957, "grad_norm": 2.2851404251176626, "learning_rate": 7.805921267116895e-07, "loss": 0.5006, "step": 19045 }, { "epoch": 0.33106780927879853, "grad_norm": 1.4415685205282531, "learning_rate": 7.805688273170246e-07, "loss": 0.3155, "step": 19046 }, { "epoch": 0.33108519181630136, "grad_norm": 2.4820370367774944, "learning_rate": 7.805455270330856e-07, "loss": 0.2957, "step": 19047 }, { "epoch": 0.3311025743538042, "grad_norm": 2.5554994787119485, "learning_rate": 7.805222258599459e-07, "loss": 0.8444, "step": 19048 }, { "epoch": 0.33111995689130697, "grad_norm": 2.206857080094355, "learning_rate": 7.804989237976796e-07, "loss": 0.2001, "step": 19049 }, { "epoch": 0.3311373394288098, "grad_norm": 1.7025695891211927, "learning_rate": 7.804756208463608e-07, "loss": 0.2384, "step": 19050 }, { "epoch": 0.3311547219663126, "grad_norm": 1.8725933576131137, "learning_rate": 7.804523170060629e-07, "loss": 0.5593, "step": 19051 }, { "epoch": 0.33117210450381546, "grad_norm": 2.2636500191438143, "learning_rate": 7.8042901227686e-07, "loss": 0.3076, "step": 19052 }, { "epoch": 0.3311894870413183, "grad_norm": 1.2937133269859162, "learning_rate": 7.80405706658826e-07, "loss": 0.266, "step": 19053 }, { "epoch": 0.3312068695788211, "grad_norm": 1.5601869389229372, "learning_rate": 7.803824001520348e-07, "loss": 0.3455, "step": 19054 }, { "epoch": 0.33122425211632395, "grad_norm": 1.1170732827410068, "learning_rate": 7.8035909275656e-07, "loss": 0.3599, "step": 19055 }, { "epoch": 0.3312416346538268, "grad_norm": 1.6179597018582852, "learning_rate": 7.803357844724757e-07, "loss": 0.459, "step": 19056 }, { "epoch": 0.3312590171913296, "grad_norm": 2.042982991744269, "learning_rate": 7.803124752998558e-07, "loss": 0.2681, "step": 19057 }, { "epoch": 0.33127639972883244, "grad_norm": 4.051369924138713, "learning_rate": 7.802891652387741e-07, "loss": 0.3965, "step": 19058 }, { "epoch": 0.3312937822663352, "grad_norm": 1.4820379544659021, "learning_rate": 7.802658542893046e-07, "loss": 0.1701, "step": 19059 }, { "epoch": 0.33131116480383804, "grad_norm": 2.289397269799612, "learning_rate": 7.80242542451521e-07, "loss": 0.3124, "step": 19060 }, { "epoch": 0.3313285473413409, "grad_norm": 1.3978068038788953, "learning_rate": 7.802192297254972e-07, "loss": 0.3391, "step": 19061 }, { "epoch": 0.3313459298788437, "grad_norm": 1.5269117085658563, "learning_rate": 7.801959161113072e-07, "loss": 0.2768, "step": 19062 }, { "epoch": 0.33136331241634653, "grad_norm": 1.407431609731691, "learning_rate": 7.801726016090249e-07, "loss": 0.4428, "step": 19063 }, { "epoch": 0.33138069495384936, "grad_norm": 2.4793121998252734, "learning_rate": 7.801492862187242e-07, "loss": 0.3186, "step": 19064 }, { "epoch": 0.3313980774913522, "grad_norm": 2.6132543981669194, "learning_rate": 7.801259699404789e-07, "loss": 0.5094, "step": 19065 }, { "epoch": 0.331415460028855, "grad_norm": 1.688435320883508, "learning_rate": 7.801026527743631e-07, "loss": 0.3538, "step": 19066 }, { "epoch": 0.33143284256635785, "grad_norm": 1.7287546587051787, "learning_rate": 7.800793347204503e-07, "loss": 0.331, "step": 19067 }, { "epoch": 0.3314502251038607, "grad_norm": 1.786961748364509, "learning_rate": 7.800560157788148e-07, "loss": 0.2389, "step": 19068 }, { "epoch": 0.33146760764136346, "grad_norm": 1.07940804058249, "learning_rate": 7.800326959495304e-07, "loss": 0.3696, "step": 19069 }, { "epoch": 0.3314849901788663, "grad_norm": 1.8858061670853532, "learning_rate": 7.800093752326708e-07, "loss": 0.3509, "step": 19070 }, { "epoch": 0.3315023727163691, "grad_norm": 1.6775305794698083, "learning_rate": 7.799860536283103e-07, "loss": 0.3152, "step": 19071 }, { "epoch": 0.33151975525387195, "grad_norm": 2.6068110712662804, "learning_rate": 7.799627311365227e-07, "loss": 0.3621, "step": 19072 }, { "epoch": 0.3315371377913748, "grad_norm": 1.8000728751584707, "learning_rate": 7.799394077573818e-07, "loss": 0.3729, "step": 19073 }, { "epoch": 0.3315545203288776, "grad_norm": 1.4979702033272115, "learning_rate": 7.799160834909613e-07, "loss": 0.1727, "step": 19074 }, { "epoch": 0.33157190286638044, "grad_norm": 1.3086318021795151, "learning_rate": 7.798927583373356e-07, "loss": 0.3989, "step": 19075 }, { "epoch": 0.33158928540388327, "grad_norm": 1.314111480386219, "learning_rate": 7.798694322965785e-07, "loss": 0.2223, "step": 19076 }, { "epoch": 0.3316066679413861, "grad_norm": 1.125038067712233, "learning_rate": 7.798461053687637e-07, "loss": 0.3803, "step": 19077 }, { "epoch": 0.33162405047888893, "grad_norm": 2.548121919175546, "learning_rate": 7.798227775539654e-07, "loss": 0.279, "step": 19078 }, { "epoch": 0.3316414330163917, "grad_norm": 2.146847350006676, "learning_rate": 7.797994488522575e-07, "loss": 0.3286, "step": 19079 }, { "epoch": 0.33165881555389454, "grad_norm": 1.410227223852166, "learning_rate": 7.797761192637138e-07, "loss": 0.3515, "step": 19080 }, { "epoch": 0.33167619809139737, "grad_norm": 1.1480857099473871, "learning_rate": 7.797527887884083e-07, "loss": 0.3017, "step": 19081 }, { "epoch": 0.3316935806289002, "grad_norm": 2.9818754056719223, "learning_rate": 7.797294574264151e-07, "loss": 0.3333, "step": 19082 }, { "epoch": 0.33171096316640303, "grad_norm": 1.1641708353929285, "learning_rate": 7.797061251778079e-07, "loss": 0.309, "step": 19083 }, { "epoch": 0.33172834570390586, "grad_norm": 1.5700036539370685, "learning_rate": 7.796827920426607e-07, "loss": 0.5276, "step": 19084 }, { "epoch": 0.3317457282414087, "grad_norm": 2.5401335090434056, "learning_rate": 7.796594580210476e-07, "loss": 0.5457, "step": 19085 }, { "epoch": 0.3317631107789115, "grad_norm": 1.468223028046766, "learning_rate": 7.796361231130423e-07, "loss": 0.2747, "step": 19086 }, { "epoch": 0.33178049331641435, "grad_norm": 2.644224307466279, "learning_rate": 7.796127873187193e-07, "loss": 0.244, "step": 19087 }, { "epoch": 0.3317978758539172, "grad_norm": 1.283086233496241, "learning_rate": 7.79589450638152e-07, "loss": 0.4335, "step": 19088 }, { "epoch": 0.33181525839141995, "grad_norm": 1.4750768414947861, "learning_rate": 7.795661130714146e-07, "loss": 0.3874, "step": 19089 }, { "epoch": 0.3318326409289228, "grad_norm": 1.7954636946023972, "learning_rate": 7.79542774618581e-07, "loss": 0.2839, "step": 19090 }, { "epoch": 0.3318500234664256, "grad_norm": 1.651361202338528, "learning_rate": 7.795194352797254e-07, "loss": 0.5106, "step": 19091 }, { "epoch": 0.33186740600392844, "grad_norm": 2.108397963721567, "learning_rate": 7.794960950549214e-07, "loss": 0.2966, "step": 19092 }, { "epoch": 0.3318847885414313, "grad_norm": 1.7316551231913904, "learning_rate": 7.794727539442432e-07, "loss": 0.3322, "step": 19093 }, { "epoch": 0.3319021710789341, "grad_norm": 1.884967119185523, "learning_rate": 7.794494119477647e-07, "loss": 0.4512, "step": 19094 }, { "epoch": 0.33191955361643694, "grad_norm": 1.9347283295372286, "learning_rate": 7.7942606906556e-07, "loss": 0.3053, "step": 19095 }, { "epoch": 0.33193693615393977, "grad_norm": 1.0940280346580737, "learning_rate": 7.794027252977031e-07, "loss": 0.2774, "step": 19096 }, { "epoch": 0.3319543186914426, "grad_norm": 2.752626477704401, "learning_rate": 7.793793806442678e-07, "loss": 0.2062, "step": 19097 }, { "epoch": 0.3319717012289454, "grad_norm": 1.87541930349283, "learning_rate": 7.793560351053283e-07, "loss": 0.3229, "step": 19098 }, { "epoch": 0.3319890837664482, "grad_norm": 1.162636771855706, "learning_rate": 7.793326886809584e-07, "loss": 0.3964, "step": 19099 }, { "epoch": 0.33200646630395103, "grad_norm": 2.8228407167710494, "learning_rate": 7.793093413712322e-07, "loss": 0.3683, "step": 19100 }, { "epoch": 0.33202384884145386, "grad_norm": 3.4518949066343385, "learning_rate": 7.792859931762236e-07, "loss": 0.3829, "step": 19101 }, { "epoch": 0.3320412313789567, "grad_norm": 1.1427151453192255, "learning_rate": 7.79262644096007e-07, "loss": 0.2782, "step": 19102 }, { "epoch": 0.3320586139164595, "grad_norm": 1.6173055036789181, "learning_rate": 7.792392941306558e-07, "loss": 0.4516, "step": 19103 }, { "epoch": 0.33207599645396235, "grad_norm": 1.7839949327041444, "learning_rate": 7.792159432802446e-07, "loss": 0.3043, "step": 19104 }, { "epoch": 0.3320933789914652, "grad_norm": 1.3225878183063793, "learning_rate": 7.791925915448468e-07, "loss": 0.3369, "step": 19105 }, { "epoch": 0.332110761528968, "grad_norm": 1.2668598249233645, "learning_rate": 7.791692389245368e-07, "loss": 0.3328, "step": 19106 }, { "epoch": 0.33212814406647084, "grad_norm": 2.036103004783525, "learning_rate": 7.791458854193888e-07, "loss": 0.35, "step": 19107 }, { "epoch": 0.3321455266039737, "grad_norm": 1.9994093611996446, "learning_rate": 7.791225310294763e-07, "loss": 0.5235, "step": 19108 }, { "epoch": 0.33216290914147645, "grad_norm": 1.9918160057320005, "learning_rate": 7.790991757548737e-07, "loss": 0.262, "step": 19109 }, { "epoch": 0.3321802916789793, "grad_norm": 3.253015151652129, "learning_rate": 7.790758195956549e-07, "loss": 0.277, "step": 19110 }, { "epoch": 0.3321976742164821, "grad_norm": 2.1111055445651696, "learning_rate": 7.79052462551894e-07, "loss": 0.4979, "step": 19111 }, { "epoch": 0.33221505675398494, "grad_norm": 2.2306648630037453, "learning_rate": 7.790291046236648e-07, "loss": 0.5911, "step": 19112 }, { "epoch": 0.33223243929148777, "grad_norm": 1.4053839132810078, "learning_rate": 7.790057458110417e-07, "loss": 0.2758, "step": 19113 }, { "epoch": 0.3322498218289906, "grad_norm": 1.016375049302853, "learning_rate": 7.789823861140985e-07, "loss": 0.6858, "step": 19114 }, { "epoch": 0.33226720436649343, "grad_norm": 1.001429955133739, "learning_rate": 7.789590255329093e-07, "loss": 0.2513, "step": 19115 }, { "epoch": 0.33228458690399626, "grad_norm": 1.5363345240733415, "learning_rate": 7.789356640675482e-07, "loss": 0.381, "step": 19116 }, { "epoch": 0.3323019694414991, "grad_norm": 2.345240048678782, "learning_rate": 7.789123017180892e-07, "loss": 0.3622, "step": 19117 }, { "epoch": 0.3323193519790019, "grad_norm": 4.413719833173038, "learning_rate": 7.788889384846062e-07, "loss": 0.492, "step": 19118 }, { "epoch": 0.3323367345165047, "grad_norm": 1.4340120543095207, "learning_rate": 7.788655743671732e-07, "loss": 0.4483, "step": 19119 }, { "epoch": 0.3323541170540075, "grad_norm": 1.4204374682625118, "learning_rate": 7.788422093658648e-07, "loss": 0.2748, "step": 19120 }, { "epoch": 0.33237149959151036, "grad_norm": 0.9679037441450011, "learning_rate": 7.788188434807545e-07, "loss": 0.3587, "step": 19121 }, { "epoch": 0.3323888821290132, "grad_norm": 1.7188545664536452, "learning_rate": 7.787954767119164e-07, "loss": 0.4472, "step": 19122 }, { "epoch": 0.332406264666516, "grad_norm": 1.302503537897341, "learning_rate": 7.78772109059425e-07, "loss": 0.4825, "step": 19123 }, { "epoch": 0.33242364720401885, "grad_norm": 1.8100043443352496, "learning_rate": 7.78748740523354e-07, "loss": 0.2807, "step": 19124 }, { "epoch": 0.3324410297415217, "grad_norm": 3.05693953046974, "learning_rate": 7.787253711037775e-07, "loss": 0.4513, "step": 19125 }, { "epoch": 0.3324584122790245, "grad_norm": 1.6288180029265755, "learning_rate": 7.787020008007696e-07, "loss": 0.3149, "step": 19126 }, { "epoch": 0.33247579481652734, "grad_norm": 1.7795329194536589, "learning_rate": 7.786786296144044e-07, "loss": 0.2843, "step": 19127 }, { "epoch": 0.33249317735403017, "grad_norm": 1.3692571763748747, "learning_rate": 7.78655257544756e-07, "loss": 0.2453, "step": 19128 }, { "epoch": 0.33251055989153294, "grad_norm": 2.5346595639653544, "learning_rate": 7.786318845918983e-07, "loss": 0.2444, "step": 19129 }, { "epoch": 0.33252794242903577, "grad_norm": 1.5383082539350748, "learning_rate": 7.786085107559057e-07, "loss": 0.3578, "step": 19130 }, { "epoch": 0.3325453249665386, "grad_norm": 1.2636395839059045, "learning_rate": 7.785851360368518e-07, "loss": 0.5422, "step": 19131 }, { "epoch": 0.33256270750404143, "grad_norm": 1.3282257708814567, "learning_rate": 7.785617604348114e-07, "loss": 0.4266, "step": 19132 }, { "epoch": 0.33258009004154426, "grad_norm": 1.6184985006030128, "learning_rate": 7.785383839498578e-07, "loss": 0.2929, "step": 19133 }, { "epoch": 0.3325974725790471, "grad_norm": 1.320417111302898, "learning_rate": 7.785150065820657e-07, "loss": 0.2364, "step": 19134 }, { "epoch": 0.3326148551165499, "grad_norm": 2.617816978016333, "learning_rate": 7.784916283315089e-07, "loss": 0.6574, "step": 19135 }, { "epoch": 0.33263223765405275, "grad_norm": 2.025070738819664, "learning_rate": 7.784682491982616e-07, "loss": 0.3115, "step": 19136 }, { "epoch": 0.3326496201915556, "grad_norm": 1.235442863325852, "learning_rate": 7.784448691823978e-07, "loss": 0.5086, "step": 19137 }, { "epoch": 0.3326670027290584, "grad_norm": 1.262583844662836, "learning_rate": 7.784214882839917e-07, "loss": 0.3027, "step": 19138 }, { "epoch": 0.3326843852665612, "grad_norm": 1.8918616652808908, "learning_rate": 7.783981065031174e-07, "loss": 0.3735, "step": 19139 }, { "epoch": 0.332701767804064, "grad_norm": 2.3139095593561096, "learning_rate": 7.783747238398491e-07, "loss": 0.3521, "step": 19140 }, { "epoch": 0.33271915034156685, "grad_norm": 1.4823226927312987, "learning_rate": 7.783513402942605e-07, "loss": 0.2693, "step": 19141 }, { "epoch": 0.3327365328790697, "grad_norm": 1.4284715299813928, "learning_rate": 7.783279558664263e-07, "loss": 0.3658, "step": 19142 }, { "epoch": 0.3327539154165725, "grad_norm": 1.8892801968880863, "learning_rate": 7.783045705564202e-07, "loss": 0.2846, "step": 19143 }, { "epoch": 0.33277129795407534, "grad_norm": 1.982890376424179, "learning_rate": 7.782811843643165e-07, "loss": 0.2873, "step": 19144 }, { "epoch": 0.33278868049157817, "grad_norm": 1.6622617095568628, "learning_rate": 7.782577972901894e-07, "loss": 0.303, "step": 19145 }, { "epoch": 0.332806063029081, "grad_norm": 1.1677217413450234, "learning_rate": 7.782344093341127e-07, "loss": 0.2143, "step": 19146 }, { "epoch": 0.33282344556658383, "grad_norm": 1.8195517520613045, "learning_rate": 7.782110204961607e-07, "loss": 0.3195, "step": 19147 }, { "epoch": 0.33284082810408666, "grad_norm": 2.6424834688878946, "learning_rate": 7.781876307764077e-07, "loss": 0.5054, "step": 19148 }, { "epoch": 0.33285821064158944, "grad_norm": 1.4936155072976252, "learning_rate": 7.781642401749277e-07, "loss": 0.2435, "step": 19149 }, { "epoch": 0.33287559317909227, "grad_norm": 1.6160130603798009, "learning_rate": 7.781408486917947e-07, "loss": 0.3593, "step": 19150 }, { "epoch": 0.3328929757165951, "grad_norm": 1.794038703984659, "learning_rate": 7.781174563270832e-07, "loss": 0.6505, "step": 19151 }, { "epoch": 0.3329103582540979, "grad_norm": 1.9607069369033268, "learning_rate": 7.78094063080867e-07, "loss": 0.3033, "step": 19152 }, { "epoch": 0.33292774079160076, "grad_norm": 1.3293208187081533, "learning_rate": 7.780706689532203e-07, "loss": 0.4414, "step": 19153 }, { "epoch": 0.3329451233291036, "grad_norm": 2.0770095111665476, "learning_rate": 7.780472739442173e-07, "loss": 0.393, "step": 19154 }, { "epoch": 0.3329625058666064, "grad_norm": 4.476354651513663, "learning_rate": 7.780238780539323e-07, "loss": 0.3664, "step": 19155 }, { "epoch": 0.33297988840410925, "grad_norm": 1.3821399339946434, "learning_rate": 7.780004812824393e-07, "loss": 0.4633, "step": 19156 }, { "epoch": 0.3329972709416121, "grad_norm": 1.488464165547168, "learning_rate": 7.779770836298124e-07, "loss": 0.2514, "step": 19157 }, { "epoch": 0.3330146534791149, "grad_norm": 1.1978059366234732, "learning_rate": 7.779536850961259e-07, "loss": 0.2982, "step": 19158 }, { "epoch": 0.3330320360166177, "grad_norm": 1.8881792062507508, "learning_rate": 7.77930285681454e-07, "loss": 0.4866, "step": 19159 }, { "epoch": 0.3330494185541205, "grad_norm": 1.606298553639476, "learning_rate": 7.779068853858706e-07, "loss": 1.2967, "step": 19160 }, { "epoch": 0.33306680109162334, "grad_norm": 1.3885975796111913, "learning_rate": 7.778834842094501e-07, "loss": 0.4787, "step": 19161 }, { "epoch": 0.3330841836291262, "grad_norm": 1.688388049873089, "learning_rate": 7.778600821522665e-07, "loss": 0.3411, "step": 19162 }, { "epoch": 0.333101566166629, "grad_norm": 1.731831436691954, "learning_rate": 7.778366792143943e-07, "loss": 0.3423, "step": 19163 }, { "epoch": 0.33311894870413183, "grad_norm": 4.059604491283955, "learning_rate": 7.778132753959072e-07, "loss": 0.4742, "step": 19164 }, { "epoch": 0.33313633124163466, "grad_norm": 2.016145092652587, "learning_rate": 7.777898706968797e-07, "loss": 0.3714, "step": 19165 }, { "epoch": 0.3331537137791375, "grad_norm": 1.5053215698394697, "learning_rate": 7.77766465117386e-07, "loss": 0.4257, "step": 19166 }, { "epoch": 0.3331710963166403, "grad_norm": 1.5798176839899485, "learning_rate": 7.777430586575002e-07, "loss": 0.2383, "step": 19167 }, { "epoch": 0.33318847885414316, "grad_norm": 1.581193585902516, "learning_rate": 7.777196513172964e-07, "loss": 0.3103, "step": 19168 }, { "epoch": 0.33320586139164593, "grad_norm": 1.5637958129740395, "learning_rate": 7.77696243096849e-07, "loss": 0.1668, "step": 19169 }, { "epoch": 0.33322324392914876, "grad_norm": 1.3211229102287871, "learning_rate": 7.77672833996232e-07, "loss": 0.2516, "step": 19170 }, { "epoch": 0.3332406264666516, "grad_norm": 5.379411576514362, "learning_rate": 7.776494240155196e-07, "loss": 0.57, "step": 19171 }, { "epoch": 0.3332580090041544, "grad_norm": 1.2355304808182344, "learning_rate": 7.776260131547862e-07, "loss": 0.261, "step": 19172 }, { "epoch": 0.33327539154165725, "grad_norm": 1.2509992208801854, "learning_rate": 7.776026014141058e-07, "loss": 0.2859, "step": 19173 }, { "epoch": 0.3332927740791601, "grad_norm": 1.4913339799510053, "learning_rate": 7.775791887935526e-07, "loss": 0.3027, "step": 19174 }, { "epoch": 0.3333101566166629, "grad_norm": 2.400235248056115, "learning_rate": 7.775557752932009e-07, "loss": 0.6437, "step": 19175 }, { "epoch": 0.33332753915416574, "grad_norm": 2.005766806137525, "learning_rate": 7.775323609131251e-07, "loss": 0.4517, "step": 19176 }, { "epoch": 0.33334492169166857, "grad_norm": 2.4687358999988165, "learning_rate": 7.775089456533991e-07, "loss": 0.5055, "step": 19177 }, { "epoch": 0.33336230422917135, "grad_norm": 1.2640306744271175, "learning_rate": 7.774855295140972e-07, "loss": 0.2447, "step": 19178 }, { "epoch": 0.3333796867666742, "grad_norm": 1.4702895428730687, "learning_rate": 7.774621124952936e-07, "loss": 0.4174, "step": 19179 }, { "epoch": 0.333397069304177, "grad_norm": 1.9691548980988152, "learning_rate": 7.774386945970625e-07, "loss": 0.3285, "step": 19180 }, { "epoch": 0.33341445184167984, "grad_norm": 3.22023254390411, "learning_rate": 7.774152758194783e-07, "loss": 0.6499, "step": 19181 }, { "epoch": 0.33343183437918267, "grad_norm": 2.159288517939125, "learning_rate": 7.77391856162615e-07, "loss": 0.468, "step": 19182 }, { "epoch": 0.3334492169166855, "grad_norm": 2.149505517212201, "learning_rate": 7.773684356265471e-07, "loss": 0.5147, "step": 19183 }, { "epoch": 0.33346659945418833, "grad_norm": 1.3464894472537727, "learning_rate": 7.773450142113486e-07, "loss": 0.411, "step": 19184 }, { "epoch": 0.33348398199169116, "grad_norm": 1.589869419751186, "learning_rate": 7.773215919170937e-07, "loss": 0.4238, "step": 19185 }, { "epoch": 0.333501364529194, "grad_norm": 1.89897849089959, "learning_rate": 7.772981687438569e-07, "loss": 0.4213, "step": 19186 }, { "epoch": 0.3335187470666968, "grad_norm": 2.7106496655090457, "learning_rate": 7.772747446917123e-07, "loss": 0.3544, "step": 19187 }, { "epoch": 0.3335361296041996, "grad_norm": 0.8938160557943269, "learning_rate": 7.77251319760734e-07, "loss": 0.3024, "step": 19188 }, { "epoch": 0.3335535121417024, "grad_norm": 1.1003676668221154, "learning_rate": 7.772278939509964e-07, "loss": 0.2545, "step": 19189 }, { "epoch": 0.33357089467920525, "grad_norm": 2.30050931062813, "learning_rate": 7.772044672625737e-07, "loss": 0.3624, "step": 19190 }, { "epoch": 0.3335882772167081, "grad_norm": 2.6254650016617487, "learning_rate": 7.771810396955402e-07, "loss": 0.3637, "step": 19191 }, { "epoch": 0.3336056597542109, "grad_norm": 1.260006764202933, "learning_rate": 7.771576112499702e-07, "loss": 0.3056, "step": 19192 }, { "epoch": 0.33362304229171375, "grad_norm": 3.058032207083951, "learning_rate": 7.771341819259378e-07, "loss": 0.1819, "step": 19193 }, { "epoch": 0.3336404248292166, "grad_norm": 2.6475393042794533, "learning_rate": 7.771107517235174e-07, "loss": 0.5432, "step": 19194 }, { "epoch": 0.3336578073667194, "grad_norm": 1.6106486939016875, "learning_rate": 7.770873206427832e-07, "loss": 0.3436, "step": 19195 }, { "epoch": 0.33367518990422224, "grad_norm": 2.0560541589545673, "learning_rate": 7.770638886838096e-07, "loss": 0.5124, "step": 19196 }, { "epoch": 0.33369257244172507, "grad_norm": 2.014061545402419, "learning_rate": 7.770404558466706e-07, "loss": 0.3813, "step": 19197 }, { "epoch": 0.33370995497922784, "grad_norm": 1.5494667610752313, "learning_rate": 7.770170221314405e-07, "loss": 0.2376, "step": 19198 }, { "epoch": 0.33372733751673067, "grad_norm": 2.359859836820385, "learning_rate": 7.769935875381939e-07, "loss": 0.3749, "step": 19199 }, { "epoch": 0.3337447200542335, "grad_norm": 1.4821800809146408, "learning_rate": 7.769701520670049e-07, "loss": 0.2991, "step": 19200 }, { "epoch": 0.33376210259173633, "grad_norm": 1.5330224340290561, "learning_rate": 7.769467157179475e-07, "loss": 0.3238, "step": 19201 }, { "epoch": 0.33377948512923916, "grad_norm": 2.009249039640611, "learning_rate": 7.769232784910964e-07, "loss": 0.3655, "step": 19202 }, { "epoch": 0.333796867666742, "grad_norm": 1.8983754922166378, "learning_rate": 7.768998403865257e-07, "loss": 0.3902, "step": 19203 }, { "epoch": 0.3338142502042448, "grad_norm": 0.6950195455134052, "learning_rate": 7.768764014043097e-07, "loss": 0.152, "step": 19204 }, { "epoch": 0.33383163274174765, "grad_norm": 2.0014107622273625, "learning_rate": 7.768529615445227e-07, "loss": 0.6094, "step": 19205 }, { "epoch": 0.3338490152792505, "grad_norm": 2.208005451522093, "learning_rate": 7.76829520807239e-07, "loss": 0.3474, "step": 19206 }, { "epoch": 0.3338663978167533, "grad_norm": 1.3328266393122514, "learning_rate": 7.768060791925329e-07, "loss": 0.3855, "step": 19207 }, { "epoch": 0.3338837803542561, "grad_norm": 1.2975613155199037, "learning_rate": 7.767826367004786e-07, "loss": 0.2993, "step": 19208 }, { "epoch": 0.3339011628917589, "grad_norm": 1.7144441951381393, "learning_rate": 7.767591933311506e-07, "loss": 0.2605, "step": 19209 }, { "epoch": 0.33391854542926175, "grad_norm": 1.3448248184836562, "learning_rate": 7.767357490846229e-07, "loss": 0.3531, "step": 19210 }, { "epoch": 0.3339359279667646, "grad_norm": 2.0276681274424257, "learning_rate": 7.767123039609702e-07, "loss": 0.3263, "step": 19211 }, { "epoch": 0.3339533105042674, "grad_norm": 2.3224158769391727, "learning_rate": 7.766888579602665e-07, "loss": 0.3546, "step": 19212 }, { "epoch": 0.33397069304177024, "grad_norm": 1.309854162000049, "learning_rate": 7.766654110825863e-07, "loss": 0.2944, "step": 19213 }, { "epoch": 0.33398807557927307, "grad_norm": 1.80896241743001, "learning_rate": 7.766419633280037e-07, "loss": 0.2685, "step": 19214 }, { "epoch": 0.3340054581167759, "grad_norm": 1.745293599105943, "learning_rate": 7.766185146965934e-07, "loss": 0.3661, "step": 19215 }, { "epoch": 0.33402284065427873, "grad_norm": 2.336795624057712, "learning_rate": 7.765950651884293e-07, "loss": 0.3873, "step": 19216 }, { "epoch": 0.33404022319178156, "grad_norm": 1.4118051519477268, "learning_rate": 7.765716148035857e-07, "loss": 0.4513, "step": 19217 }, { "epoch": 0.33405760572928433, "grad_norm": 1.232906428020558, "learning_rate": 7.765481635421375e-07, "loss": 0.2684, "step": 19218 }, { "epoch": 0.33407498826678717, "grad_norm": 1.7845557150684026, "learning_rate": 7.765247114041584e-07, "loss": 0.2256, "step": 19219 }, { "epoch": 0.33409237080429, "grad_norm": 1.8048003535645711, "learning_rate": 7.765012583897232e-07, "loss": 0.2044, "step": 19220 }, { "epoch": 0.3341097533417928, "grad_norm": 1.9430147370155206, "learning_rate": 7.764778044989059e-07, "loss": 0.4245, "step": 19221 }, { "epoch": 0.33412713587929566, "grad_norm": 2.52080518467599, "learning_rate": 7.764543497317809e-07, "loss": 0.3236, "step": 19222 }, { "epoch": 0.3341445184167985, "grad_norm": 2.1991407241603493, "learning_rate": 7.764308940884228e-07, "loss": 0.3212, "step": 19223 }, { "epoch": 0.3341619009543013, "grad_norm": 1.7438142624648656, "learning_rate": 7.764074375689056e-07, "loss": 0.2321, "step": 19224 }, { "epoch": 0.33417928349180415, "grad_norm": 1.5695708992804671, "learning_rate": 7.763839801733039e-07, "loss": 0.349, "step": 19225 }, { "epoch": 0.334196666029307, "grad_norm": 1.3247906753695453, "learning_rate": 7.763605219016918e-07, "loss": 0.382, "step": 19226 }, { "epoch": 0.3342140485668098, "grad_norm": 2.496268311111751, "learning_rate": 7.763370627541438e-07, "loss": 0.4949, "step": 19227 }, { "epoch": 0.3342314311043126, "grad_norm": 1.784830574830534, "learning_rate": 7.763136027307345e-07, "loss": 0.3123, "step": 19228 }, { "epoch": 0.3342488136418154, "grad_norm": 2.5626109175017553, "learning_rate": 7.762901418315377e-07, "loss": 0.3037, "step": 19229 }, { "epoch": 0.33426619617931824, "grad_norm": 2.7009410880718834, "learning_rate": 7.762666800566282e-07, "loss": 0.4317, "step": 19230 }, { "epoch": 0.3342835787168211, "grad_norm": 2.1838069979285235, "learning_rate": 7.762432174060801e-07, "loss": 0.3225, "step": 19231 }, { "epoch": 0.3343009612543239, "grad_norm": 2.0327793566395327, "learning_rate": 7.76219753879968e-07, "loss": 0.3033, "step": 19232 }, { "epoch": 0.33431834379182673, "grad_norm": 2.41416506636464, "learning_rate": 7.761962894783663e-07, "loss": 0.2502, "step": 19233 }, { "epoch": 0.33433572632932956, "grad_norm": 4.23369270963297, "learning_rate": 7.76172824201349e-07, "loss": 0.413, "step": 19234 }, { "epoch": 0.3343531088668324, "grad_norm": 1.9914320257502185, "learning_rate": 7.761493580489909e-07, "loss": 0.4313, "step": 19235 }, { "epoch": 0.3343704914043352, "grad_norm": 1.1087140025779902, "learning_rate": 7.761258910213661e-07, "loss": 0.2579, "step": 19236 }, { "epoch": 0.33438787394183805, "grad_norm": 1.8738237078533821, "learning_rate": 7.761024231185491e-07, "loss": 0.3496, "step": 19237 }, { "epoch": 0.33440525647934083, "grad_norm": 3.06764469988611, "learning_rate": 7.760789543406143e-07, "loss": 0.4594, "step": 19238 }, { "epoch": 0.33442263901684366, "grad_norm": 1.4183767537158403, "learning_rate": 7.760554846876359e-07, "loss": 0.3179, "step": 19239 }, { "epoch": 0.3344400215543465, "grad_norm": 2.384998724479894, "learning_rate": 7.760320141596885e-07, "loss": 0.2449, "step": 19240 }, { "epoch": 0.3344574040918493, "grad_norm": 2.065909181283047, "learning_rate": 7.760085427568463e-07, "loss": 0.3528, "step": 19241 }, { "epoch": 0.33447478662935215, "grad_norm": 1.7883257301982973, "learning_rate": 7.759850704791839e-07, "loss": 0.3425, "step": 19242 }, { "epoch": 0.334492169166855, "grad_norm": 2.301488508514002, "learning_rate": 7.759615973267757e-07, "loss": 0.3685, "step": 19243 }, { "epoch": 0.3345095517043578, "grad_norm": 3.2370953218921468, "learning_rate": 7.75938123299696e-07, "loss": 0.5431, "step": 19244 }, { "epoch": 0.33452693424186064, "grad_norm": 1.7660787902551671, "learning_rate": 7.75914648398019e-07, "loss": 0.3953, "step": 19245 }, { "epoch": 0.33454431677936347, "grad_norm": 1.9066827783766334, "learning_rate": 7.758911726218196e-07, "loss": 0.5587, "step": 19246 }, { "epoch": 0.3345616993168663, "grad_norm": 1.8453768136754407, "learning_rate": 7.758676959711716e-07, "loss": 0.3028, "step": 19247 }, { "epoch": 0.3345790818543691, "grad_norm": 1.4638452991630884, "learning_rate": 7.758442184461499e-07, "loss": 0.3975, "step": 19248 }, { "epoch": 0.3345964643918719, "grad_norm": 1.167871076700697, "learning_rate": 7.758207400468289e-07, "loss": 0.1736, "step": 19249 }, { "epoch": 0.33461384692937474, "grad_norm": 1.4708467549671984, "learning_rate": 7.757972607732826e-07, "loss": 0.2303, "step": 19250 }, { "epoch": 0.33463122946687757, "grad_norm": 3.6268429863729414, "learning_rate": 7.757737806255857e-07, "loss": 0.2237, "step": 19251 }, { "epoch": 0.3346486120043804, "grad_norm": 2.3499302012042187, "learning_rate": 7.757502996038127e-07, "loss": 0.422, "step": 19252 }, { "epoch": 0.3346659945418832, "grad_norm": 1.8803723286713934, "learning_rate": 7.757268177080378e-07, "loss": 0.5014, "step": 19253 }, { "epoch": 0.33468337707938606, "grad_norm": 2.1891201657910506, "learning_rate": 7.757033349383355e-07, "loss": 0.4057, "step": 19254 }, { "epoch": 0.3347007596168889, "grad_norm": 2.7290557628046024, "learning_rate": 7.756798512947804e-07, "loss": 0.4473, "step": 19255 }, { "epoch": 0.3347181421543917, "grad_norm": 2.301386431259523, "learning_rate": 7.756563667774468e-07, "loss": 0.3192, "step": 19256 }, { "epoch": 0.33473552469189455, "grad_norm": 1.1997705784070156, "learning_rate": 7.756328813864092e-07, "loss": 0.4156, "step": 19257 }, { "epoch": 0.3347529072293973, "grad_norm": 1.9136545112294483, "learning_rate": 7.75609395121742e-07, "loss": 0.3051, "step": 19258 }, { "epoch": 0.33477028976690015, "grad_norm": 1.149281582739903, "learning_rate": 7.755859079835194e-07, "loss": 0.4298, "step": 19259 }, { "epoch": 0.334787672304403, "grad_norm": 1.6504280481925282, "learning_rate": 7.755624199718162e-07, "loss": 0.2858, "step": 19260 }, { "epoch": 0.3348050548419058, "grad_norm": 1.1380306598469758, "learning_rate": 7.755389310867067e-07, "loss": 0.2439, "step": 19261 }, { "epoch": 0.33482243737940864, "grad_norm": 1.5207500741274707, "learning_rate": 7.755154413282654e-07, "loss": 0.277, "step": 19262 }, { "epoch": 0.3348398199169115, "grad_norm": 2.4003074458473823, "learning_rate": 7.754919506965667e-07, "loss": 0.4013, "step": 19263 }, { "epoch": 0.3348572024544143, "grad_norm": 2.2100175152248616, "learning_rate": 7.754684591916851e-07, "loss": 0.3378, "step": 19264 }, { "epoch": 0.33487458499191713, "grad_norm": 2.0374925905251606, "learning_rate": 7.754449668136951e-07, "loss": 0.454, "step": 19265 }, { "epoch": 0.33489196752941996, "grad_norm": 2.3742733085644994, "learning_rate": 7.754214735626708e-07, "loss": 0.373, "step": 19266 }, { "epoch": 0.3349093500669228, "grad_norm": 1.3725215440886152, "learning_rate": 7.753979794386871e-07, "loss": 0.3103, "step": 19267 }, { "epoch": 0.33492673260442557, "grad_norm": 1.1841436535221952, "learning_rate": 7.753744844418184e-07, "loss": 0.2569, "step": 19268 }, { "epoch": 0.3349441151419284, "grad_norm": 1.367096636013782, "learning_rate": 7.75350988572139e-07, "loss": 0.3325, "step": 19269 }, { "epoch": 0.33496149767943123, "grad_norm": 1.838221059924421, "learning_rate": 7.753274918297234e-07, "loss": 0.3174, "step": 19270 }, { "epoch": 0.33497888021693406, "grad_norm": 2.3052139405136223, "learning_rate": 7.753039942146462e-07, "loss": 0.4618, "step": 19271 }, { "epoch": 0.3349962627544369, "grad_norm": 1.8314996841080562, "learning_rate": 7.752804957269818e-07, "loss": 0.3089, "step": 19272 }, { "epoch": 0.3350136452919397, "grad_norm": 1.9513736651696472, "learning_rate": 7.752569963668047e-07, "loss": 0.4145, "step": 19273 }, { "epoch": 0.33503102782944255, "grad_norm": 1.7617707811701648, "learning_rate": 7.752334961341894e-07, "loss": 0.2783, "step": 19274 }, { "epoch": 0.3350484103669454, "grad_norm": 1.3814150794254132, "learning_rate": 7.752099950292103e-07, "loss": 0.3224, "step": 19275 }, { "epoch": 0.3350657929044482, "grad_norm": 10.071924423795874, "learning_rate": 7.751864930519418e-07, "loss": 0.3051, "step": 19276 }, { "epoch": 0.33508317544195104, "grad_norm": 4.175264129814906, "learning_rate": 7.751629902024587e-07, "loss": 0.535, "step": 19277 }, { "epoch": 0.3351005579794538, "grad_norm": 1.6902121619062327, "learning_rate": 7.751394864808353e-07, "loss": 0.2714, "step": 19278 }, { "epoch": 0.33511794051695665, "grad_norm": 2.273483731482913, "learning_rate": 7.75115981887146e-07, "loss": 0.3558, "step": 19279 }, { "epoch": 0.3351353230544595, "grad_norm": 1.473815482751718, "learning_rate": 7.750924764214655e-07, "loss": 0.3332, "step": 19280 }, { "epoch": 0.3351527055919623, "grad_norm": 1.6592250658799907, "learning_rate": 7.750689700838683e-07, "loss": 0.2773, "step": 19281 }, { "epoch": 0.33517008812946514, "grad_norm": 3.4651023046075964, "learning_rate": 7.750454628744287e-07, "loss": 0.2575, "step": 19282 }, { "epoch": 0.33518747066696797, "grad_norm": 1.2935852156621224, "learning_rate": 7.750219547932213e-07, "loss": 0.212, "step": 19283 }, { "epoch": 0.3352048532044708, "grad_norm": 1.754976693049038, "learning_rate": 7.749984458403207e-07, "loss": 0.3262, "step": 19284 }, { "epoch": 0.33522223574197363, "grad_norm": 1.792826890968923, "learning_rate": 7.749749360158014e-07, "loss": 0.3671, "step": 19285 }, { "epoch": 0.33523961827947646, "grad_norm": 2.1682062716050674, "learning_rate": 7.749514253197378e-07, "loss": 0.359, "step": 19286 }, { "epoch": 0.3352570008169793, "grad_norm": 1.9791552652125992, "learning_rate": 7.749279137522046e-07, "loss": 0.374, "step": 19287 }, { "epoch": 0.33527438335448206, "grad_norm": 1.3130540848516723, "learning_rate": 7.74904401313276e-07, "loss": 0.336, "step": 19288 }, { "epoch": 0.3352917658919849, "grad_norm": 1.6806292884264498, "learning_rate": 7.748808880030269e-07, "loss": 0.3552, "step": 19289 }, { "epoch": 0.3353091484294877, "grad_norm": 1.435122534977769, "learning_rate": 7.748573738215316e-07, "loss": 0.3832, "step": 19290 }, { "epoch": 0.33532653096699055, "grad_norm": 1.8325006343384005, "learning_rate": 7.748338587688646e-07, "loss": 0.3774, "step": 19291 }, { "epoch": 0.3353439135044934, "grad_norm": 2.018285873657275, "learning_rate": 7.748103428451006e-07, "loss": 0.2702, "step": 19292 }, { "epoch": 0.3353612960419962, "grad_norm": 2.410490713335969, "learning_rate": 7.747868260503141e-07, "loss": 0.4366, "step": 19293 }, { "epoch": 0.33537867857949905, "grad_norm": 1.372648535441964, "learning_rate": 7.747633083845795e-07, "loss": 0.3404, "step": 19294 }, { "epoch": 0.3353960611170019, "grad_norm": 1.6858784866981578, "learning_rate": 7.747397898479716e-07, "loss": 0.343, "step": 19295 }, { "epoch": 0.3354134436545047, "grad_norm": 2.1033812920824437, "learning_rate": 7.747162704405645e-07, "loss": 0.3775, "step": 19296 }, { "epoch": 0.33543082619200754, "grad_norm": 1.6024706771812178, "learning_rate": 7.746927501624332e-07, "loss": 0.3014, "step": 19297 }, { "epoch": 0.3354482087295103, "grad_norm": 1.5943350214272658, "learning_rate": 7.74669229013652e-07, "loss": 0.2677, "step": 19298 }, { "epoch": 0.33546559126701314, "grad_norm": 4.033254011595343, "learning_rate": 7.746457069942955e-07, "loss": 0.4188, "step": 19299 }, { "epoch": 0.33548297380451597, "grad_norm": 1.4322559833537565, "learning_rate": 7.746221841044384e-07, "loss": 0.1591, "step": 19300 }, { "epoch": 0.3355003563420188, "grad_norm": 2.799735249929021, "learning_rate": 7.74598660344155e-07, "loss": 0.5445, "step": 19301 }, { "epoch": 0.33551773887952163, "grad_norm": 1.6171226531495801, "learning_rate": 7.7457513571352e-07, "loss": 0.2528, "step": 19302 }, { "epoch": 0.33553512141702446, "grad_norm": 1.7351831492411163, "learning_rate": 7.74551610212608e-07, "loss": 0.3299, "step": 19303 }, { "epoch": 0.3355525039545273, "grad_norm": 1.0957027719794183, "learning_rate": 7.745280838414934e-07, "loss": 0.3372, "step": 19304 }, { "epoch": 0.3355698864920301, "grad_norm": 1.5544758850224139, "learning_rate": 7.745045566002509e-07, "loss": 0.3468, "step": 19305 }, { "epoch": 0.33558726902953295, "grad_norm": 2.5262856355868584, "learning_rate": 7.744810284889552e-07, "loss": 0.4415, "step": 19306 }, { "epoch": 0.3356046515670358, "grad_norm": 2.059502408644461, "learning_rate": 7.744574995076807e-07, "loss": 0.3669, "step": 19307 }, { "epoch": 0.33562203410453856, "grad_norm": 2.9491851027838005, "learning_rate": 7.744339696565019e-07, "loss": 0.3274, "step": 19308 }, { "epoch": 0.3356394166420414, "grad_norm": 1.275653870136547, "learning_rate": 7.744104389354935e-07, "loss": 0.3299, "step": 19309 }, { "epoch": 0.3356567991795442, "grad_norm": 1.3661739500305061, "learning_rate": 7.743869073447301e-07, "loss": 0.3707, "step": 19310 }, { "epoch": 0.33567418171704705, "grad_norm": 1.7174962697743625, "learning_rate": 7.743633748842862e-07, "loss": 0.2844, "step": 19311 }, { "epoch": 0.3356915642545499, "grad_norm": 3.920699481988243, "learning_rate": 7.743398415542365e-07, "loss": 0.3313, "step": 19312 }, { "epoch": 0.3357089467920527, "grad_norm": 1.6504731141165698, "learning_rate": 7.743163073546555e-07, "loss": 0.3806, "step": 19313 }, { "epoch": 0.33572632932955554, "grad_norm": 1.2792648821138846, "learning_rate": 7.742927722856178e-07, "loss": 0.3136, "step": 19314 }, { "epoch": 0.33574371186705837, "grad_norm": 2.443157561374875, "learning_rate": 7.74269236347198e-07, "loss": 0.3647, "step": 19315 }, { "epoch": 0.3357610944045612, "grad_norm": 1.7894274252957727, "learning_rate": 7.742456995394707e-07, "loss": 0.3726, "step": 19316 }, { "epoch": 0.335778476942064, "grad_norm": 2.246358302197246, "learning_rate": 7.742221618625106e-07, "loss": 0.2711, "step": 19317 }, { "epoch": 0.3357958594795668, "grad_norm": 2.17705806848416, "learning_rate": 7.741986233163922e-07, "loss": 0.3626, "step": 19318 }, { "epoch": 0.33581324201706964, "grad_norm": 2.087317834844718, "learning_rate": 7.7417508390119e-07, "loss": 0.3053, "step": 19319 }, { "epoch": 0.33583062455457247, "grad_norm": 1.8224651397911624, "learning_rate": 7.741515436169787e-07, "loss": 0.367, "step": 19320 }, { "epoch": 0.3358480070920753, "grad_norm": 1.7515065950225481, "learning_rate": 7.741280024638332e-07, "loss": 0.2731, "step": 19321 }, { "epoch": 0.3358653896295781, "grad_norm": 1.3447959932049256, "learning_rate": 7.741044604418276e-07, "loss": 0.3486, "step": 19322 }, { "epoch": 0.33588277216708096, "grad_norm": 2.673186835684852, "learning_rate": 7.74080917551037e-07, "loss": 0.1773, "step": 19323 }, { "epoch": 0.3359001547045838, "grad_norm": 1.631283604528156, "learning_rate": 7.740573737915355e-07, "loss": 0.4077, "step": 19324 }, { "epoch": 0.3359175372420866, "grad_norm": 2.0426841403704348, "learning_rate": 7.740338291633983e-07, "loss": 0.5018, "step": 19325 }, { "epoch": 0.33593491977958945, "grad_norm": 2.220126317401802, "learning_rate": 7.740102836666996e-07, "loss": 0.3971, "step": 19326 }, { "epoch": 0.3359523023170922, "grad_norm": 3.0215436124931707, "learning_rate": 7.739867373015142e-07, "loss": 0.3403, "step": 19327 }, { "epoch": 0.33596968485459505, "grad_norm": 2.573765810865967, "learning_rate": 7.739631900679167e-07, "loss": 0.2947, "step": 19328 }, { "epoch": 0.3359870673920979, "grad_norm": 2.268213937566143, "learning_rate": 7.739396419659818e-07, "loss": 0.4327, "step": 19329 }, { "epoch": 0.3360044499296007, "grad_norm": 1.8277720188653543, "learning_rate": 7.739160929957839e-07, "loss": 0.4275, "step": 19330 }, { "epoch": 0.33602183246710354, "grad_norm": 1.7152588623029363, "learning_rate": 7.738925431573981e-07, "loss": 0.4482, "step": 19331 }, { "epoch": 0.3360392150046064, "grad_norm": 2.636403226695056, "learning_rate": 7.738689924508986e-07, "loss": 0.3277, "step": 19332 }, { "epoch": 0.3360565975421092, "grad_norm": 1.9308577817648238, "learning_rate": 7.7384544087636e-07, "loss": 0.3676, "step": 19333 }, { "epoch": 0.33607398007961203, "grad_norm": 1.294080521083563, "learning_rate": 7.738218884338574e-07, "loss": 0.3215, "step": 19334 }, { "epoch": 0.33609136261711486, "grad_norm": 2.4172426265844043, "learning_rate": 7.737983351234651e-07, "loss": 0.4458, "step": 19335 }, { "epoch": 0.3361087451546177, "grad_norm": 1.1132693542031344, "learning_rate": 7.737747809452579e-07, "loss": 0.291, "step": 19336 }, { "epoch": 0.33612612769212047, "grad_norm": 2.3250461577049277, "learning_rate": 7.737512258993103e-07, "loss": 0.4702, "step": 19337 }, { "epoch": 0.3361435102296233, "grad_norm": 1.6075368107480001, "learning_rate": 7.737276699856972e-07, "loss": 0.2663, "step": 19338 }, { "epoch": 0.33616089276712613, "grad_norm": 1.2329985104772874, "learning_rate": 7.73704113204493e-07, "loss": 0.4152, "step": 19339 }, { "epoch": 0.33617827530462896, "grad_norm": 1.3488335657276618, "learning_rate": 7.736805555557726e-07, "loss": 0.3806, "step": 19340 }, { "epoch": 0.3361956578421318, "grad_norm": 1.2072391553302677, "learning_rate": 7.736569970396105e-07, "loss": 0.2146, "step": 19341 }, { "epoch": 0.3362130403796346, "grad_norm": 3.5155169639056254, "learning_rate": 7.736334376560815e-07, "loss": 0.3634, "step": 19342 }, { "epoch": 0.33623042291713745, "grad_norm": 1.564998993588287, "learning_rate": 7.736098774052601e-07, "loss": 0.5234, "step": 19343 }, { "epoch": 0.3362478054546403, "grad_norm": 1.8792740330608935, "learning_rate": 7.735863162872212e-07, "loss": 0.2777, "step": 19344 }, { "epoch": 0.3362651879921431, "grad_norm": 1.6346180979727072, "learning_rate": 7.735627543020391e-07, "loss": 0.3902, "step": 19345 }, { "epoch": 0.33628257052964594, "grad_norm": 1.2254367700459157, "learning_rate": 7.73539191449789e-07, "loss": 0.2497, "step": 19346 }, { "epoch": 0.3362999530671487, "grad_norm": 2.352140994695768, "learning_rate": 7.735156277305452e-07, "loss": 0.3354, "step": 19347 }, { "epoch": 0.33631733560465155, "grad_norm": 2.696130704361601, "learning_rate": 7.734920631443825e-07, "loss": 0.792, "step": 19348 }, { "epoch": 0.3363347181421544, "grad_norm": 1.8613742195201621, "learning_rate": 7.734684976913756e-07, "loss": 0.5982, "step": 19349 }, { "epoch": 0.3363521006796572, "grad_norm": 2.884229012102631, "learning_rate": 7.734449313715992e-07, "loss": 0.6821, "step": 19350 }, { "epoch": 0.33636948321716004, "grad_norm": 1.6805309220279872, "learning_rate": 7.734213641851279e-07, "loss": 0.2617, "step": 19351 }, { "epoch": 0.33638686575466287, "grad_norm": 1.4262066048718491, "learning_rate": 7.733977961320366e-07, "loss": 0.2414, "step": 19352 }, { "epoch": 0.3364042482921657, "grad_norm": 2.911669813632113, "learning_rate": 7.733742272123997e-07, "loss": 0.3635, "step": 19353 }, { "epoch": 0.3364216308296685, "grad_norm": 1.8131690416823587, "learning_rate": 7.733506574262923e-07, "loss": 0.2572, "step": 19354 }, { "epoch": 0.33643901336717136, "grad_norm": 1.4414077809360295, "learning_rate": 7.733270867737889e-07, "loss": 0.2556, "step": 19355 }, { "epoch": 0.3364563959046742, "grad_norm": 1.6544205384373107, "learning_rate": 7.733035152549639e-07, "loss": 0.3509, "step": 19356 }, { "epoch": 0.33647377844217696, "grad_norm": 2.0478896325746527, "learning_rate": 7.732799428698926e-07, "loss": 0.3445, "step": 19357 }, { "epoch": 0.3364911609796798, "grad_norm": 1.201818611637633, "learning_rate": 7.732563696186492e-07, "loss": 0.1921, "step": 19358 }, { "epoch": 0.3365085435171826, "grad_norm": 1.9396819198795079, "learning_rate": 7.732327955013087e-07, "loss": 0.4578, "step": 19359 }, { "epoch": 0.33652592605468545, "grad_norm": 1.813970404924263, "learning_rate": 7.732092205179459e-07, "loss": 0.2584, "step": 19360 }, { "epoch": 0.3365433085921883, "grad_norm": 2.137463391033553, "learning_rate": 7.731856446686352e-07, "loss": 0.4048, "step": 19361 }, { "epoch": 0.3365606911296911, "grad_norm": 1.6173794126675354, "learning_rate": 7.731620679534516e-07, "loss": 0.3418, "step": 19362 }, { "epoch": 0.33657807366719394, "grad_norm": 1.9775112908930752, "learning_rate": 7.731384903724697e-07, "loss": 0.3118, "step": 19363 }, { "epoch": 0.3365954562046968, "grad_norm": 1.4346863240418573, "learning_rate": 7.731149119257643e-07, "loss": 0.3779, "step": 19364 }, { "epoch": 0.3366128387421996, "grad_norm": 1.13069399068012, "learning_rate": 7.730913326134099e-07, "loss": 0.3456, "step": 19365 }, { "epoch": 0.33663022127970244, "grad_norm": 1.29631907778278, "learning_rate": 7.730677524354816e-07, "loss": 0.238, "step": 19366 }, { "epoch": 0.3366476038172052, "grad_norm": 1.6997007124116164, "learning_rate": 7.730441713920539e-07, "loss": 0.4306, "step": 19367 }, { "epoch": 0.33666498635470804, "grad_norm": 2.3376390007972603, "learning_rate": 7.730205894832016e-07, "loss": 0.3955, "step": 19368 }, { "epoch": 0.33668236889221087, "grad_norm": 1.5666161406124917, "learning_rate": 7.729970067089995e-07, "loss": 0.4498, "step": 19369 }, { "epoch": 0.3366997514297137, "grad_norm": 1.2901162302908946, "learning_rate": 7.729734230695221e-07, "loss": 0.3319, "step": 19370 }, { "epoch": 0.33671713396721653, "grad_norm": 1.4681203449729168, "learning_rate": 7.729498385648445e-07, "loss": 0.2905, "step": 19371 }, { "epoch": 0.33673451650471936, "grad_norm": 2.7667454699404774, "learning_rate": 7.729262531950414e-07, "loss": 0.571, "step": 19372 }, { "epoch": 0.3367518990422222, "grad_norm": 9.080419564042282, "learning_rate": 7.729026669601872e-07, "loss": 0.7176, "step": 19373 }, { "epoch": 0.336769281579725, "grad_norm": 1.4262288214252403, "learning_rate": 7.728790798603571e-07, "loss": 0.4279, "step": 19374 }, { "epoch": 0.33678666411722785, "grad_norm": 1.6916296252553213, "learning_rate": 7.728554918956256e-07, "loss": 0.2, "step": 19375 }, { "epoch": 0.3368040466547307, "grad_norm": 1.8251668125961313, "learning_rate": 7.728319030660674e-07, "loss": 0.3287, "step": 19376 }, { "epoch": 0.33682142919223346, "grad_norm": 2.055504914317338, "learning_rate": 7.728083133717576e-07, "loss": 0.513, "step": 19377 }, { "epoch": 0.3368388117297363, "grad_norm": 1.8014521787075524, "learning_rate": 7.727847228127708e-07, "loss": 0.3338, "step": 19378 }, { "epoch": 0.3368561942672391, "grad_norm": 3.403710643994052, "learning_rate": 7.727611313891815e-07, "loss": 0.4315, "step": 19379 }, { "epoch": 0.33687357680474195, "grad_norm": 2.284191622962934, "learning_rate": 7.727375391010648e-07, "loss": 0.2187, "step": 19380 }, { "epoch": 0.3368909593422448, "grad_norm": 2.372236235467507, "learning_rate": 7.727139459484954e-07, "loss": 0.2791, "step": 19381 }, { "epoch": 0.3369083418797476, "grad_norm": 1.159849312277928, "learning_rate": 7.726903519315482e-07, "loss": 0.4411, "step": 19382 }, { "epoch": 0.33692572441725044, "grad_norm": 1.7572535693890774, "learning_rate": 7.726667570502978e-07, "loss": 0.4132, "step": 19383 }, { "epoch": 0.33694310695475327, "grad_norm": 1.384434204555978, "learning_rate": 7.72643161304819e-07, "loss": 0.2564, "step": 19384 }, { "epoch": 0.3369604894922561, "grad_norm": 1.3289404466343466, "learning_rate": 7.726195646951866e-07, "loss": 0.2567, "step": 19385 }, { "epoch": 0.33697787202975893, "grad_norm": 1.6419548075669808, "learning_rate": 7.725959672214754e-07, "loss": 0.377, "step": 19386 }, { "epoch": 0.3369952545672617, "grad_norm": 1.7702562535716548, "learning_rate": 7.725723688837602e-07, "loss": 0.3875, "step": 19387 }, { "epoch": 0.33701263710476453, "grad_norm": 1.652046837303962, "learning_rate": 7.725487696821158e-07, "loss": 0.36, "step": 19388 }, { "epoch": 0.33703001964226736, "grad_norm": 1.3109473507126126, "learning_rate": 7.725251696166171e-07, "loss": 0.6643, "step": 19389 }, { "epoch": 0.3370474021797702, "grad_norm": 1.1597308484277427, "learning_rate": 7.725015686873386e-07, "loss": 0.2248, "step": 19390 }, { "epoch": 0.337064784717273, "grad_norm": 1.4526073725453919, "learning_rate": 7.724779668943555e-07, "loss": 0.3835, "step": 19391 }, { "epoch": 0.33708216725477586, "grad_norm": 3.2369748301145282, "learning_rate": 7.724543642377422e-07, "loss": 0.4202, "step": 19392 }, { "epoch": 0.3370995497922787, "grad_norm": 1.459348683019979, "learning_rate": 7.724307607175738e-07, "loss": 0.3043, "step": 19393 }, { "epoch": 0.3371169323297815, "grad_norm": 1.637937985345483, "learning_rate": 7.724071563339252e-07, "loss": 0.4612, "step": 19394 }, { "epoch": 0.33713431486728435, "grad_norm": 5.291390893920302, "learning_rate": 7.723835510868709e-07, "loss": 0.5182, "step": 19395 }, { "epoch": 0.3371516974047872, "grad_norm": 2.214289466664952, "learning_rate": 7.723599449764859e-07, "loss": 0.248, "step": 19396 }, { "epoch": 0.33716907994228995, "grad_norm": 1.1197559533442731, "learning_rate": 7.72336338002845e-07, "loss": 0.3673, "step": 19397 }, { "epoch": 0.3371864624797928, "grad_norm": 1.4363399839732223, "learning_rate": 7.72312730166023e-07, "loss": 0.3472, "step": 19398 }, { "epoch": 0.3372038450172956, "grad_norm": 1.6546209655196862, "learning_rate": 7.722891214660947e-07, "loss": 0.4914, "step": 19399 }, { "epoch": 0.33722122755479844, "grad_norm": 2.037031293185174, "learning_rate": 7.722655119031349e-07, "loss": 0.4143, "step": 19400 }, { "epoch": 0.33723861009230127, "grad_norm": 1.3857635519544296, "learning_rate": 7.722419014772185e-07, "loss": 0.2835, "step": 19401 }, { "epoch": 0.3372559926298041, "grad_norm": 1.5548603501209477, "learning_rate": 7.722182901884205e-07, "loss": 0.305, "step": 19402 }, { "epoch": 0.33727337516730693, "grad_norm": 2.5080693306929542, "learning_rate": 7.721946780368156e-07, "loss": 0.3188, "step": 19403 }, { "epoch": 0.33729075770480976, "grad_norm": 2.5295924926790376, "learning_rate": 7.721710650224784e-07, "loss": 0.4162, "step": 19404 }, { "epoch": 0.3373081402423126, "grad_norm": 2.4081665268420482, "learning_rate": 7.72147451145484e-07, "loss": 0.4182, "step": 19405 }, { "epoch": 0.3373255227798154, "grad_norm": 2.3133273386371083, "learning_rate": 7.721238364059072e-07, "loss": 0.4969, "step": 19406 }, { "epoch": 0.3373429053173182, "grad_norm": 4.3180140476893865, "learning_rate": 7.72100220803823e-07, "loss": 0.4801, "step": 19407 }, { "epoch": 0.33736028785482103, "grad_norm": 2.0508646911159913, "learning_rate": 7.720766043393059e-07, "loss": 0.3239, "step": 19408 }, { "epoch": 0.33737767039232386, "grad_norm": 1.663222210227553, "learning_rate": 7.720529870124311e-07, "loss": 0.2857, "step": 19409 }, { "epoch": 0.3373950529298267, "grad_norm": 4.207708549345403, "learning_rate": 7.720293688232731e-07, "loss": 0.4296, "step": 19410 }, { "epoch": 0.3374124354673295, "grad_norm": 2.115351095813245, "learning_rate": 7.720057497719071e-07, "loss": 0.2076, "step": 19411 }, { "epoch": 0.33742981800483235, "grad_norm": 1.6136378584488766, "learning_rate": 7.719821298584077e-07, "loss": 0.2825, "step": 19412 }, { "epoch": 0.3374472005423352, "grad_norm": 1.9742633781868875, "learning_rate": 7.7195850908285e-07, "loss": 0.2209, "step": 19413 }, { "epoch": 0.337464583079838, "grad_norm": 1.6931049257371613, "learning_rate": 7.719348874453088e-07, "loss": 0.4446, "step": 19414 }, { "epoch": 0.33748196561734084, "grad_norm": 6.385735038410361, "learning_rate": 7.719112649458589e-07, "loss": 0.2309, "step": 19415 }, { "epoch": 0.33749934815484367, "grad_norm": 1.9041583655180598, "learning_rate": 7.718876415845752e-07, "loss": 0.3183, "step": 19416 }, { "epoch": 0.33751673069234644, "grad_norm": 2.5411275549042864, "learning_rate": 7.718640173615325e-07, "loss": 0.3344, "step": 19417 }, { "epoch": 0.3375341132298493, "grad_norm": 3.2193158928269883, "learning_rate": 7.718403922768058e-07, "loss": 0.4287, "step": 19418 }, { "epoch": 0.3375514957673521, "grad_norm": 1.586617655029579, "learning_rate": 7.718167663304698e-07, "loss": 0.2796, "step": 19419 }, { "epoch": 0.33756887830485494, "grad_norm": 3.7235934418771035, "learning_rate": 7.717931395225997e-07, "loss": 0.3698, "step": 19420 }, { "epoch": 0.33758626084235777, "grad_norm": 1.3810655370415559, "learning_rate": 7.717695118532701e-07, "loss": 0.1844, "step": 19421 }, { "epoch": 0.3376036433798606, "grad_norm": 2.7820927120343466, "learning_rate": 7.717458833225559e-07, "loss": 0.7194, "step": 19422 }, { "epoch": 0.3376210259173634, "grad_norm": 2.435844145922754, "learning_rate": 7.717222539305324e-07, "loss": 0.3562, "step": 19423 }, { "epoch": 0.33763840845486626, "grad_norm": 3.557283551380795, "learning_rate": 7.716986236772739e-07, "loss": 0.331, "step": 19424 }, { "epoch": 0.3376557909923691, "grad_norm": 1.3485252659724332, "learning_rate": 7.716749925628556e-07, "loss": 0.5305, "step": 19425 }, { "epoch": 0.3376731735298719, "grad_norm": 2.3726532027322955, "learning_rate": 7.716513605873525e-07, "loss": 0.2447, "step": 19426 }, { "epoch": 0.3376905560673747, "grad_norm": 1.3601463153965794, "learning_rate": 7.716277277508392e-07, "loss": 0.2588, "step": 19427 }, { "epoch": 0.3377079386048775, "grad_norm": 1.8293872001535125, "learning_rate": 7.716040940533908e-07, "loss": 0.4716, "step": 19428 }, { "epoch": 0.33772532114238035, "grad_norm": 2.3197987805254043, "learning_rate": 7.715804594950823e-07, "loss": 0.292, "step": 19429 }, { "epoch": 0.3377427036798832, "grad_norm": 1.8603474086285166, "learning_rate": 7.715568240759884e-07, "loss": 0.5185, "step": 19430 }, { "epoch": 0.337760086217386, "grad_norm": 2.6017974851508483, "learning_rate": 7.715331877961841e-07, "loss": 0.3371, "step": 19431 }, { "epoch": 0.33777746875488884, "grad_norm": 3.6068569891140894, "learning_rate": 7.715095506557445e-07, "loss": 0.2838, "step": 19432 }, { "epoch": 0.3377948512923917, "grad_norm": 1.7712157256258056, "learning_rate": 7.714859126547442e-07, "loss": 0.4155, "step": 19433 }, { "epoch": 0.3378122338298945, "grad_norm": 1.8199382091734737, "learning_rate": 7.714622737932583e-07, "loss": 0.3335, "step": 19434 }, { "epoch": 0.33782961636739733, "grad_norm": 1.6942692026570614, "learning_rate": 7.714386340713617e-07, "loss": 0.3045, "step": 19435 }, { "epoch": 0.33784699890490016, "grad_norm": 0.9552061004753191, "learning_rate": 7.714149934891292e-07, "loss": 0.362, "step": 19436 }, { "epoch": 0.33786438144240294, "grad_norm": 0.8914883045728607, "learning_rate": 7.71391352046636e-07, "loss": 0.1667, "step": 19437 }, { "epoch": 0.33788176397990577, "grad_norm": 2.448304445937917, "learning_rate": 7.713677097439568e-07, "loss": 0.4762, "step": 19438 }, { "epoch": 0.3378991465174086, "grad_norm": 2.0519325538186197, "learning_rate": 7.713440665811667e-07, "loss": 0.48, "step": 19439 }, { "epoch": 0.33791652905491143, "grad_norm": 2.0665895374111845, "learning_rate": 7.713204225583405e-07, "loss": 0.379, "step": 19440 }, { "epoch": 0.33793391159241426, "grad_norm": 1.836249951232277, "learning_rate": 7.712967776755531e-07, "loss": 0.5228, "step": 19441 }, { "epoch": 0.3379512941299171, "grad_norm": 1.5076149927861244, "learning_rate": 7.712731319328797e-07, "loss": 0.2886, "step": 19442 }, { "epoch": 0.3379686766674199, "grad_norm": 1.1439523130982454, "learning_rate": 7.71249485330395e-07, "loss": 0.2587, "step": 19443 }, { "epoch": 0.33798605920492275, "grad_norm": 1.103414428192897, "learning_rate": 7.71225837868174e-07, "loss": 0.2347, "step": 19444 }, { "epoch": 0.3380034417424256, "grad_norm": 2.1061173991385598, "learning_rate": 7.712021895462918e-07, "loss": 0.4031, "step": 19445 }, { "epoch": 0.3380208242799284, "grad_norm": 1.2139568470804023, "learning_rate": 7.711785403648232e-07, "loss": 0.2585, "step": 19446 }, { "epoch": 0.3380382068174312, "grad_norm": 2.642465964996828, "learning_rate": 7.711548903238432e-07, "loss": 0.355, "step": 19447 }, { "epoch": 0.338055589354934, "grad_norm": 1.2499429404761215, "learning_rate": 7.711312394234267e-07, "loss": 0.3003, "step": 19448 }, { "epoch": 0.33807297189243685, "grad_norm": 2.1406725234595143, "learning_rate": 7.711075876636487e-07, "loss": 0.4502, "step": 19449 }, { "epoch": 0.3380903544299397, "grad_norm": 1.5079666400593703, "learning_rate": 7.710839350445843e-07, "loss": 0.3777, "step": 19450 }, { "epoch": 0.3381077369674425, "grad_norm": 1.755647690036942, "learning_rate": 7.710602815663083e-07, "loss": 0.2888, "step": 19451 }, { "epoch": 0.33812511950494534, "grad_norm": 1.8333938533177212, "learning_rate": 7.710366272288956e-07, "loss": 0.2312, "step": 19452 }, { "epoch": 0.33814250204244817, "grad_norm": 1.4927563906105683, "learning_rate": 7.710129720324214e-07, "loss": 0.2327, "step": 19453 }, { "epoch": 0.338159884579951, "grad_norm": 1.9802414007014928, "learning_rate": 7.709893159769605e-07, "loss": 0.3401, "step": 19454 }, { "epoch": 0.33817726711745383, "grad_norm": 1.7231253099310737, "learning_rate": 7.709656590625882e-07, "loss": 0.3901, "step": 19455 }, { "epoch": 0.33819464965495666, "grad_norm": 2.0023063141555157, "learning_rate": 7.709420012893791e-07, "loss": 0.3969, "step": 19456 }, { "epoch": 0.33821203219245943, "grad_norm": 3.2203069048405313, "learning_rate": 7.709183426574081e-07, "loss": 0.5846, "step": 19457 }, { "epoch": 0.33822941472996226, "grad_norm": 1.6290067127253953, "learning_rate": 7.708946831667507e-07, "loss": 0.3112, "step": 19458 }, { "epoch": 0.3382467972674651, "grad_norm": 1.611254366678111, "learning_rate": 7.708710228174814e-07, "loss": 0.4765, "step": 19459 }, { "epoch": 0.3382641798049679, "grad_norm": 1.4968255017463372, "learning_rate": 7.708473616096753e-07, "loss": 0.3687, "step": 19460 }, { "epoch": 0.33828156234247075, "grad_norm": 1.7992946662301397, "learning_rate": 7.708236995434077e-07, "loss": 0.491, "step": 19461 }, { "epoch": 0.3382989448799736, "grad_norm": 6.733952448641987, "learning_rate": 7.708000366187535e-07, "loss": 0.4322, "step": 19462 }, { "epoch": 0.3383163274174764, "grad_norm": 1.3855252018303519, "learning_rate": 7.707763728357873e-07, "loss": 0.2244, "step": 19463 }, { "epoch": 0.33833370995497924, "grad_norm": 1.6827040437377676, "learning_rate": 7.707527081945844e-07, "loss": 0.3624, "step": 19464 }, { "epoch": 0.3383510924924821, "grad_norm": 4.121244041385162, "learning_rate": 7.707290426952199e-07, "loss": 0.456, "step": 19465 }, { "epoch": 0.33836847502998485, "grad_norm": 1.189746540585577, "learning_rate": 7.707053763377687e-07, "loss": 0.2228, "step": 19466 }, { "epoch": 0.3383858575674877, "grad_norm": 1.269476882416195, "learning_rate": 7.706817091223057e-07, "loss": 0.4098, "step": 19467 }, { "epoch": 0.3384032401049905, "grad_norm": 1.865858215714478, "learning_rate": 7.706580410489061e-07, "loss": 0.3606, "step": 19468 }, { "epoch": 0.33842062264249334, "grad_norm": 1.6431370839173047, "learning_rate": 7.706343721176448e-07, "loss": 0.3448, "step": 19469 }, { "epoch": 0.33843800517999617, "grad_norm": 0.7995416456149264, "learning_rate": 7.706107023285971e-07, "loss": 0.3517, "step": 19470 }, { "epoch": 0.338455387717499, "grad_norm": 2.6618771436320414, "learning_rate": 7.705870316818374e-07, "loss": 0.4812, "step": 19471 }, { "epoch": 0.33847277025500183, "grad_norm": 1.9941769942189937, "learning_rate": 7.705633601774413e-07, "loss": 0.2053, "step": 19472 }, { "epoch": 0.33849015279250466, "grad_norm": 2.372601558263148, "learning_rate": 7.705396878154836e-07, "loss": 0.4471, "step": 19473 }, { "epoch": 0.3385075353300075, "grad_norm": 1.4328796979795937, "learning_rate": 7.705160145960394e-07, "loss": 0.132, "step": 19474 }, { "epoch": 0.3385249178675103, "grad_norm": 1.7413823289347365, "learning_rate": 7.704923405191837e-07, "loss": 0.3478, "step": 19475 }, { "epoch": 0.3385423004050131, "grad_norm": 1.721326516207997, "learning_rate": 7.704686655849915e-07, "loss": 0.4596, "step": 19476 }, { "epoch": 0.3385596829425159, "grad_norm": 1.6227672931930082, "learning_rate": 7.704449897935378e-07, "loss": 0.3502, "step": 19477 }, { "epoch": 0.33857706548001876, "grad_norm": 1.9552686754024924, "learning_rate": 7.704213131448979e-07, "loss": 0.3668, "step": 19478 }, { "epoch": 0.3385944480175216, "grad_norm": 1.4541013174132364, "learning_rate": 7.703976356391465e-07, "loss": 0.305, "step": 19479 }, { "epoch": 0.3386118305550244, "grad_norm": 1.4720118906752073, "learning_rate": 7.70373957276359e-07, "loss": 0.435, "step": 19480 }, { "epoch": 0.33862921309252725, "grad_norm": 2.2319715681457706, "learning_rate": 7.7035027805661e-07, "loss": 0.3644, "step": 19481 }, { "epoch": 0.3386465956300301, "grad_norm": 1.8132915500156699, "learning_rate": 7.703265979799749e-07, "loss": 0.44, "step": 19482 }, { "epoch": 0.3386639781675329, "grad_norm": 1.899213360729536, "learning_rate": 7.703029170465287e-07, "loss": 0.3521, "step": 19483 }, { "epoch": 0.33868136070503574, "grad_norm": 1.0661249382058327, "learning_rate": 7.702792352563465e-07, "loss": 0.3422, "step": 19484 }, { "epoch": 0.33869874324253857, "grad_norm": 1.4305066868760574, "learning_rate": 7.702555526095032e-07, "loss": 0.1983, "step": 19485 }, { "epoch": 0.33871612578004134, "grad_norm": 1.7179607768033658, "learning_rate": 7.70231869106074e-07, "loss": 0.2669, "step": 19486 }, { "epoch": 0.3387335083175442, "grad_norm": 1.767124589528582, "learning_rate": 7.702081847461337e-07, "loss": 0.4513, "step": 19487 }, { "epoch": 0.338750890855047, "grad_norm": 2.3934494700886617, "learning_rate": 7.701844995297577e-07, "loss": 0.1564, "step": 19488 }, { "epoch": 0.33876827339254983, "grad_norm": 1.1939959601650996, "learning_rate": 7.70160813457021e-07, "loss": 0.3104, "step": 19489 }, { "epoch": 0.33878565593005266, "grad_norm": 2.671897253311817, "learning_rate": 7.701371265279986e-07, "loss": 0.2757, "step": 19490 }, { "epoch": 0.3388030384675555, "grad_norm": 1.1024942889911529, "learning_rate": 7.701134387427657e-07, "loss": 0.3219, "step": 19491 }, { "epoch": 0.3388204210050583, "grad_norm": 2.555969583717188, "learning_rate": 7.700897501013971e-07, "loss": 0.4766, "step": 19492 }, { "epoch": 0.33883780354256116, "grad_norm": 1.191537619534331, "learning_rate": 7.700660606039681e-07, "loss": 0.1928, "step": 19493 }, { "epoch": 0.338855186080064, "grad_norm": 1.3494046657085494, "learning_rate": 7.700423702505537e-07, "loss": 0.4573, "step": 19494 }, { "epoch": 0.3388725686175668, "grad_norm": 3.0220777535595107, "learning_rate": 7.700186790412292e-07, "loss": 0.3367, "step": 19495 }, { "epoch": 0.3388899511550696, "grad_norm": 2.7351690483458517, "learning_rate": 7.699949869760695e-07, "loss": 0.5846, "step": 19496 }, { "epoch": 0.3389073336925724, "grad_norm": 2.0796049039117, "learning_rate": 7.699712940551497e-07, "loss": 0.4454, "step": 19497 }, { "epoch": 0.33892471623007525, "grad_norm": 1.2328505346866274, "learning_rate": 7.699476002785448e-07, "loss": 0.2752, "step": 19498 }, { "epoch": 0.3389420987675781, "grad_norm": 2.2543428691092644, "learning_rate": 7.699239056463301e-07, "loss": 0.533, "step": 19499 }, { "epoch": 0.3389594813050809, "grad_norm": 1.8961779782736734, "learning_rate": 7.699002101585805e-07, "loss": 0.3339, "step": 19500 }, { "epoch": 0.33897686384258374, "grad_norm": 1.185547387851178, "learning_rate": 7.698765138153713e-07, "loss": 0.3347, "step": 19501 }, { "epoch": 0.3389942463800866, "grad_norm": 1.8733765631499908, "learning_rate": 7.698528166167776e-07, "loss": 0.3608, "step": 19502 }, { "epoch": 0.3390116289175894, "grad_norm": 1.801665688608942, "learning_rate": 7.698291185628743e-07, "loss": 0.3954, "step": 19503 }, { "epoch": 0.33902901145509223, "grad_norm": 1.4942963890848986, "learning_rate": 7.698054196537367e-07, "loss": 0.5912, "step": 19504 }, { "epoch": 0.33904639399259506, "grad_norm": 1.2279185860075306, "learning_rate": 7.697817198894399e-07, "loss": 0.2126, "step": 19505 }, { "epoch": 0.33906377653009784, "grad_norm": 2.580887041822142, "learning_rate": 7.69758019270059e-07, "loss": 0.3786, "step": 19506 }, { "epoch": 0.33908115906760067, "grad_norm": 1.8451384256820988, "learning_rate": 7.69734317795669e-07, "loss": 0.4713, "step": 19507 }, { "epoch": 0.3390985416051035, "grad_norm": 1.7763620718919917, "learning_rate": 7.697106154663452e-07, "loss": 0.4101, "step": 19508 }, { "epoch": 0.33911592414260633, "grad_norm": 2.048676445264716, "learning_rate": 7.696869122821625e-07, "loss": 0.4004, "step": 19509 }, { "epoch": 0.33913330668010916, "grad_norm": 2.374568143306169, "learning_rate": 7.696632082431961e-07, "loss": 0.2464, "step": 19510 }, { "epoch": 0.339150689217612, "grad_norm": 2.740136120171756, "learning_rate": 7.696395033495215e-07, "loss": 0.5469, "step": 19511 }, { "epoch": 0.3391680717551148, "grad_norm": 1.2069646395668232, "learning_rate": 7.696157976012134e-07, "loss": 0.3782, "step": 19512 }, { "epoch": 0.33918545429261765, "grad_norm": 2.018147138335983, "learning_rate": 7.695920909983471e-07, "loss": 0.5258, "step": 19513 }, { "epoch": 0.3392028368301205, "grad_norm": 4.998878948843858, "learning_rate": 7.695683835409976e-07, "loss": 0.2702, "step": 19514 }, { "epoch": 0.3392202193676233, "grad_norm": 1.9587912881426377, "learning_rate": 7.695446752292404e-07, "loss": 0.3356, "step": 19515 }, { "epoch": 0.3392376019051261, "grad_norm": 2.891479887529154, "learning_rate": 7.695209660631502e-07, "loss": 0.4419, "step": 19516 }, { "epoch": 0.3392549844426289, "grad_norm": 0.9092150863400928, "learning_rate": 7.694972560428024e-07, "loss": 0.4271, "step": 19517 }, { "epoch": 0.33927236698013175, "grad_norm": 2.6988597878839053, "learning_rate": 7.694735451682722e-07, "loss": 0.5611, "step": 19518 }, { "epoch": 0.3392897495176346, "grad_norm": 1.6912937853801961, "learning_rate": 7.694498334396345e-07, "loss": 0.3253, "step": 19519 }, { "epoch": 0.3393071320551374, "grad_norm": 1.2594046525388207, "learning_rate": 7.694261208569646e-07, "loss": 0.4351, "step": 19520 }, { "epoch": 0.33932451459264024, "grad_norm": 1.579826269506443, "learning_rate": 7.694024074203377e-07, "loss": 0.3157, "step": 19521 }, { "epoch": 0.33934189713014307, "grad_norm": 2.7490021701390246, "learning_rate": 7.69378693129829e-07, "loss": 0.2485, "step": 19522 }, { "epoch": 0.3393592796676459, "grad_norm": 1.6833569427689266, "learning_rate": 7.693549779855134e-07, "loss": 0.426, "step": 19523 }, { "epoch": 0.3393766622051487, "grad_norm": 1.6163991598760703, "learning_rate": 7.693312619874663e-07, "loss": 0.2712, "step": 19524 }, { "epoch": 0.33939404474265156, "grad_norm": 1.4227410726862944, "learning_rate": 7.693075451357629e-07, "loss": 0.5063, "step": 19525 }, { "epoch": 0.33941142728015433, "grad_norm": 1.7352875276871782, "learning_rate": 7.692838274304782e-07, "loss": 0.3801, "step": 19526 }, { "epoch": 0.33942880981765716, "grad_norm": 1.6293695309892011, "learning_rate": 7.692601088716877e-07, "loss": 0.3987, "step": 19527 }, { "epoch": 0.33944619235516, "grad_norm": 1.6199026229274591, "learning_rate": 7.692363894594661e-07, "loss": 0.297, "step": 19528 }, { "epoch": 0.3394635748926628, "grad_norm": 1.8814808552557467, "learning_rate": 7.692126691938888e-07, "loss": 0.3866, "step": 19529 }, { "epoch": 0.33948095743016565, "grad_norm": 1.6139775908398468, "learning_rate": 7.691889480750311e-07, "loss": 0.3203, "step": 19530 }, { "epoch": 0.3394983399676685, "grad_norm": 2.2950226036646337, "learning_rate": 7.691652261029682e-07, "loss": 0.4536, "step": 19531 }, { "epoch": 0.3395157225051713, "grad_norm": 1.6890823399921284, "learning_rate": 7.691415032777751e-07, "loss": 0.3778, "step": 19532 }, { "epoch": 0.33953310504267414, "grad_norm": 5.298709712422678, "learning_rate": 7.691177795995269e-07, "loss": 0.5594, "step": 19533 }, { "epoch": 0.339550487580177, "grad_norm": 1.7683278050083424, "learning_rate": 7.690940550682991e-07, "loss": 0.2897, "step": 19534 }, { "epoch": 0.3395678701176798, "grad_norm": 1.6819830053376394, "learning_rate": 7.690703296841668e-07, "loss": 0.2317, "step": 19535 }, { "epoch": 0.3395852526551826, "grad_norm": 1.7906076914973486, "learning_rate": 7.690466034472052e-07, "loss": 0.3287, "step": 19536 }, { "epoch": 0.3396026351926854, "grad_norm": 1.0583486581851904, "learning_rate": 7.690228763574893e-07, "loss": 0.2917, "step": 19537 }, { "epoch": 0.33962001773018824, "grad_norm": 1.466705591224614, "learning_rate": 7.689991484150945e-07, "loss": 0.3704, "step": 19538 }, { "epoch": 0.33963740026769107, "grad_norm": 2.007028429946318, "learning_rate": 7.68975419620096e-07, "loss": 0.2529, "step": 19539 }, { "epoch": 0.3396547828051939, "grad_norm": 2.644995961026911, "learning_rate": 7.68951689972569e-07, "loss": 0.6014, "step": 19540 }, { "epoch": 0.33967216534269673, "grad_norm": 1.5064661244320734, "learning_rate": 7.689279594725887e-07, "loss": 0.4261, "step": 19541 }, { "epoch": 0.33968954788019956, "grad_norm": 2.2530676782524535, "learning_rate": 7.689042281202302e-07, "loss": 0.3614, "step": 19542 }, { "epoch": 0.3397069304177024, "grad_norm": 1.358379281869405, "learning_rate": 7.688804959155691e-07, "loss": 0.2987, "step": 19543 }, { "epoch": 0.3397243129552052, "grad_norm": 1.450224505000836, "learning_rate": 7.6885676285868e-07, "loss": 0.3789, "step": 19544 }, { "epoch": 0.33974169549270805, "grad_norm": 3.022468034515652, "learning_rate": 7.688330289496385e-07, "loss": 0.4522, "step": 19545 }, { "epoch": 0.3397590780302108, "grad_norm": 1.3291766452520517, "learning_rate": 7.6880929418852e-07, "loss": 0.4972, "step": 19546 }, { "epoch": 0.33977646056771366, "grad_norm": 2.0306409010609103, "learning_rate": 7.687855585753994e-07, "loss": 0.2985, "step": 19547 }, { "epoch": 0.3397938431052165, "grad_norm": 2.0184717754882944, "learning_rate": 7.68761822110352e-07, "loss": 0.2709, "step": 19548 }, { "epoch": 0.3398112256427193, "grad_norm": 2.0721017942607705, "learning_rate": 7.687380847934533e-07, "loss": 0.2949, "step": 19549 }, { "epoch": 0.33982860818022215, "grad_norm": 1.1221780379012491, "learning_rate": 7.687143466247781e-07, "loss": 0.4618, "step": 19550 }, { "epoch": 0.339845990717725, "grad_norm": 1.769763525588353, "learning_rate": 7.686906076044018e-07, "loss": 0.4553, "step": 19551 }, { "epoch": 0.3398633732552278, "grad_norm": 1.3549262804320483, "learning_rate": 7.686668677323999e-07, "loss": 0.424, "step": 19552 }, { "epoch": 0.33988075579273064, "grad_norm": 1.6075372968109845, "learning_rate": 7.686431270088474e-07, "loss": 0.4241, "step": 19553 }, { "epoch": 0.33989813833023347, "grad_norm": 1.2850853553218817, "learning_rate": 7.686193854338194e-07, "loss": 0.2907, "step": 19554 }, { "epoch": 0.3399155208677363, "grad_norm": 1.275215235140932, "learning_rate": 7.685956430073915e-07, "loss": 0.4041, "step": 19555 }, { "epoch": 0.3399329034052391, "grad_norm": 1.2411173448435542, "learning_rate": 7.685718997296389e-07, "loss": 0.3542, "step": 19556 }, { "epoch": 0.3399502859427419, "grad_norm": 1.9447172333526193, "learning_rate": 7.685481556006366e-07, "loss": 0.3303, "step": 19557 }, { "epoch": 0.33996766848024473, "grad_norm": 1.8462800268295563, "learning_rate": 7.685244106204601e-07, "loss": 0.4155, "step": 19558 }, { "epoch": 0.33998505101774756, "grad_norm": 2.6290942767086873, "learning_rate": 7.685006647891845e-07, "loss": 0.4674, "step": 19559 }, { "epoch": 0.3400024335552504, "grad_norm": 4.376273339377818, "learning_rate": 7.684769181068851e-07, "loss": 0.3274, "step": 19560 }, { "epoch": 0.3400198160927532, "grad_norm": 1.42821851708948, "learning_rate": 7.684531705736373e-07, "loss": 0.4687, "step": 19561 }, { "epoch": 0.34003719863025605, "grad_norm": 2.640128014569205, "learning_rate": 7.684294221895161e-07, "loss": 0.6226, "step": 19562 }, { "epoch": 0.3400545811677589, "grad_norm": 1.3285683199299934, "learning_rate": 7.684056729545971e-07, "loss": 0.3313, "step": 19563 }, { "epoch": 0.3400719637052617, "grad_norm": 1.894671223156811, "learning_rate": 7.683819228689553e-07, "loss": 0.311, "step": 19564 }, { "epoch": 0.34008934624276455, "grad_norm": 1.9019727940469193, "learning_rate": 7.683581719326662e-07, "loss": 0.2896, "step": 19565 }, { "epoch": 0.3401067287802673, "grad_norm": 1.9542565043476126, "learning_rate": 7.683344201458048e-07, "loss": 0.301, "step": 19566 }, { "epoch": 0.34012411131777015, "grad_norm": 1.8827513380337142, "learning_rate": 7.683106675084467e-07, "loss": 0.3346, "step": 19567 }, { "epoch": 0.340141493855273, "grad_norm": 2.2245347103805493, "learning_rate": 7.682869140206672e-07, "loss": 0.3074, "step": 19568 }, { "epoch": 0.3401588763927758, "grad_norm": 3.170532298706332, "learning_rate": 7.68263159682541e-07, "loss": 0.4082, "step": 19569 }, { "epoch": 0.34017625893027864, "grad_norm": 1.2364032651336423, "learning_rate": 7.68239404494144e-07, "loss": 0.231, "step": 19570 }, { "epoch": 0.34019364146778147, "grad_norm": 1.0478550967929998, "learning_rate": 7.682156484555515e-07, "loss": 0.2885, "step": 19571 }, { "epoch": 0.3402110240052843, "grad_norm": 2.158765908563475, "learning_rate": 7.681918915668384e-07, "loss": 0.3494, "step": 19572 }, { "epoch": 0.34022840654278713, "grad_norm": 1.1536325497236943, "learning_rate": 7.681681338280802e-07, "loss": 0.2032, "step": 19573 }, { "epoch": 0.34024578908028996, "grad_norm": 1.375759149987217, "learning_rate": 7.681443752393523e-07, "loss": 0.4532, "step": 19574 }, { "epoch": 0.3402631716177928, "grad_norm": 1.6186587925444071, "learning_rate": 7.681206158007298e-07, "loss": 0.4017, "step": 19575 }, { "epoch": 0.34028055415529557, "grad_norm": 1.9241881276683683, "learning_rate": 7.680968555122882e-07, "loss": 0.445, "step": 19576 }, { "epoch": 0.3402979366927984, "grad_norm": 1.4147141930312073, "learning_rate": 7.680730943741027e-07, "loss": 0.2862, "step": 19577 }, { "epoch": 0.3403153192303012, "grad_norm": 2.2046927261628744, "learning_rate": 7.680493323862487e-07, "loss": 0.3987, "step": 19578 }, { "epoch": 0.34033270176780406, "grad_norm": 2.7582092527326796, "learning_rate": 7.680255695488013e-07, "loss": 0.4905, "step": 19579 }, { "epoch": 0.3403500843053069, "grad_norm": 1.657050711646727, "learning_rate": 7.680018058618361e-07, "loss": 0.4489, "step": 19580 }, { "epoch": 0.3403674668428097, "grad_norm": 1.8104157497028925, "learning_rate": 7.679780413254282e-07, "loss": 0.6196, "step": 19581 }, { "epoch": 0.34038484938031255, "grad_norm": 2.017715869239017, "learning_rate": 7.679542759396531e-07, "loss": 0.3876, "step": 19582 }, { "epoch": 0.3404022319178154, "grad_norm": 1.5413117705387338, "learning_rate": 7.679305097045858e-07, "loss": 0.4475, "step": 19583 }, { "epoch": 0.3404196144553182, "grad_norm": 1.4668817130081233, "learning_rate": 7.679067426203022e-07, "loss": 0.2765, "step": 19584 }, { "epoch": 0.34043699699282104, "grad_norm": 2.842588718036679, "learning_rate": 7.678829746868771e-07, "loss": 0.2598, "step": 19585 }, { "epoch": 0.3404543795303238, "grad_norm": 1.483701195351799, "learning_rate": 7.67859205904386e-07, "loss": 0.2135, "step": 19586 }, { "epoch": 0.34047176206782664, "grad_norm": 1.559669555921345, "learning_rate": 7.678354362729044e-07, "loss": 0.412, "step": 19587 }, { "epoch": 0.3404891446053295, "grad_norm": 1.4919602342318514, "learning_rate": 7.678116657925074e-07, "loss": 0.4669, "step": 19588 }, { "epoch": 0.3405065271428323, "grad_norm": 2.096758519059262, "learning_rate": 7.677878944632703e-07, "loss": 0.3294, "step": 19589 }, { "epoch": 0.34052390968033514, "grad_norm": 1.780916360027266, "learning_rate": 7.677641222852689e-07, "loss": 0.6381, "step": 19590 }, { "epoch": 0.34054129221783797, "grad_norm": 1.3837899829168188, "learning_rate": 7.67740349258578e-07, "loss": 0.3537, "step": 19591 }, { "epoch": 0.3405586747553408, "grad_norm": 1.4061292493454873, "learning_rate": 7.677165753832733e-07, "loss": 0.3766, "step": 19592 }, { "epoch": 0.3405760572928436, "grad_norm": 1.4252582922793164, "learning_rate": 7.676928006594299e-07, "loss": 0.4865, "step": 19593 }, { "epoch": 0.34059343983034646, "grad_norm": 8.85218332950556, "learning_rate": 7.676690250871234e-07, "loss": 0.6083, "step": 19594 }, { "epoch": 0.3406108223678493, "grad_norm": 1.8591978365717576, "learning_rate": 7.676452486664291e-07, "loss": 0.4766, "step": 19595 }, { "epoch": 0.34062820490535206, "grad_norm": 1.349114546068376, "learning_rate": 7.676214713974222e-07, "loss": 0.2726, "step": 19596 }, { "epoch": 0.3406455874428549, "grad_norm": 1.296451468640727, "learning_rate": 7.675976932801782e-07, "loss": 0.2278, "step": 19597 }, { "epoch": 0.3406629699803577, "grad_norm": 2.2930234908884692, "learning_rate": 7.675739143147724e-07, "loss": 0.315, "step": 19598 }, { "epoch": 0.34068035251786055, "grad_norm": 1.4463187944080602, "learning_rate": 7.675501345012803e-07, "loss": 0.277, "step": 19599 }, { "epoch": 0.3406977350553634, "grad_norm": 1.1783970926929697, "learning_rate": 7.675263538397771e-07, "loss": 0.3398, "step": 19600 }, { "epoch": 0.3407151175928662, "grad_norm": 3.5558600973012977, "learning_rate": 7.675025723303384e-07, "loss": 0.4265, "step": 19601 }, { "epoch": 0.34073250013036904, "grad_norm": 1.5575396365214285, "learning_rate": 7.67478789973039e-07, "loss": 0.2103, "step": 19602 }, { "epoch": 0.3407498826678719, "grad_norm": 1.6353201623815854, "learning_rate": 7.674550067679552e-07, "loss": 0.3769, "step": 19603 }, { "epoch": 0.3407672652053747, "grad_norm": 1.9508728980728824, "learning_rate": 7.674312227151616e-07, "loss": 0.369, "step": 19604 }, { "epoch": 0.3407846477428775, "grad_norm": 1.171281789445397, "learning_rate": 7.674074378147339e-07, "loss": 0.3955, "step": 19605 }, { "epoch": 0.3408020302803803, "grad_norm": 0.9758946603267465, "learning_rate": 7.673836520667476e-07, "loss": 0.3265, "step": 19606 }, { "epoch": 0.34081941281788314, "grad_norm": 1.4942022634556753, "learning_rate": 7.67359865471278e-07, "loss": 0.2613, "step": 19607 }, { "epoch": 0.34083679535538597, "grad_norm": 2.2911440155618568, "learning_rate": 7.673360780284001e-07, "loss": 0.3726, "step": 19608 }, { "epoch": 0.3408541778928888, "grad_norm": 2.670710021965827, "learning_rate": 7.673122897381901e-07, "loss": 0.3253, "step": 19609 }, { "epoch": 0.34087156043039163, "grad_norm": 1.327936179221845, "learning_rate": 7.672885006007226e-07, "loss": 0.2724, "step": 19610 }, { "epoch": 0.34088894296789446, "grad_norm": 1.8487982548633342, "learning_rate": 7.672647106160735e-07, "loss": 0.3012, "step": 19611 }, { "epoch": 0.3409063255053973, "grad_norm": 1.101156313881172, "learning_rate": 7.672409197843179e-07, "loss": 0.3269, "step": 19612 }, { "epoch": 0.3409237080429001, "grad_norm": 1.904103997671542, "learning_rate": 7.672171281055314e-07, "loss": 0.2405, "step": 19613 }, { "epoch": 0.34094109058040295, "grad_norm": 1.5603395957691077, "learning_rate": 7.671933355797893e-07, "loss": 0.3631, "step": 19614 }, { "epoch": 0.3409584731179057, "grad_norm": 1.3971420835872426, "learning_rate": 7.671695422071671e-07, "loss": 0.2681, "step": 19615 }, { "epoch": 0.34097585565540856, "grad_norm": 1.421928224340739, "learning_rate": 7.671457479877402e-07, "loss": 0.3862, "step": 19616 }, { "epoch": 0.3409932381929114, "grad_norm": 1.4414577343759425, "learning_rate": 7.67121952921584e-07, "loss": 0.3152, "step": 19617 }, { "epoch": 0.3410106207304142, "grad_norm": 1.6093690198615782, "learning_rate": 7.670981570087739e-07, "loss": 0.2342, "step": 19618 }, { "epoch": 0.34102800326791705, "grad_norm": 1.1463237209607895, "learning_rate": 7.670743602493853e-07, "loss": 0.2789, "step": 19619 }, { "epoch": 0.3410453858054199, "grad_norm": 2.0942879812389346, "learning_rate": 7.670505626434937e-07, "loss": 0.242, "step": 19620 }, { "epoch": 0.3410627683429227, "grad_norm": 1.7409901103519192, "learning_rate": 7.670267641911745e-07, "loss": 0.2527, "step": 19621 }, { "epoch": 0.34108015088042554, "grad_norm": 1.6241778024013431, "learning_rate": 7.670029648925031e-07, "loss": 0.2071, "step": 19622 }, { "epoch": 0.34109753341792837, "grad_norm": 1.8057780953362066, "learning_rate": 7.66979164747555e-07, "loss": 0.3678, "step": 19623 }, { "epoch": 0.3411149159554312, "grad_norm": 1.4694269313332318, "learning_rate": 7.669553637564056e-07, "loss": 0.2693, "step": 19624 }, { "epoch": 0.34113229849293397, "grad_norm": 2.603309922538404, "learning_rate": 7.669315619191303e-07, "loss": 0.3363, "step": 19625 }, { "epoch": 0.3411496810304368, "grad_norm": 1.3393197213051145, "learning_rate": 7.669077592358047e-07, "loss": 0.2542, "step": 19626 }, { "epoch": 0.34116706356793963, "grad_norm": 1.9319433974049256, "learning_rate": 7.668839557065038e-07, "loss": 0.3143, "step": 19627 }, { "epoch": 0.34118444610544246, "grad_norm": 1.4032462455674313, "learning_rate": 7.668601513313036e-07, "loss": 0.4099, "step": 19628 }, { "epoch": 0.3412018286429453, "grad_norm": 1.27987895480881, "learning_rate": 7.668363461102792e-07, "loss": 0.5008, "step": 19629 }, { "epoch": 0.3412192111804481, "grad_norm": 3.927246457059153, "learning_rate": 7.668125400435061e-07, "loss": 0.5043, "step": 19630 }, { "epoch": 0.34123659371795095, "grad_norm": 1.9337169536569816, "learning_rate": 7.667887331310599e-07, "loss": 0.2165, "step": 19631 }, { "epoch": 0.3412539762554538, "grad_norm": 1.0591601306234706, "learning_rate": 7.66764925373016e-07, "loss": 0.4078, "step": 19632 }, { "epoch": 0.3412713587929566, "grad_norm": 2.2310410159009875, "learning_rate": 7.667411167694498e-07, "loss": 0.3288, "step": 19633 }, { "epoch": 0.34128874133045944, "grad_norm": 1.593708771085445, "learning_rate": 7.667173073204367e-07, "loss": 0.2819, "step": 19634 }, { "epoch": 0.3413061238679622, "grad_norm": 1.1768241554236505, "learning_rate": 7.666934970260523e-07, "loss": 0.4334, "step": 19635 }, { "epoch": 0.34132350640546505, "grad_norm": 1.166398373582742, "learning_rate": 7.666696858863722e-07, "loss": 0.2939, "step": 19636 }, { "epoch": 0.3413408889429679, "grad_norm": 1.4202855307729494, "learning_rate": 7.666458739014714e-07, "loss": 0.4319, "step": 19637 }, { "epoch": 0.3413582714804707, "grad_norm": 2.216653017770443, "learning_rate": 7.66622061071426e-07, "loss": 0.4789, "step": 19638 }, { "epoch": 0.34137565401797354, "grad_norm": 1.9429165298064857, "learning_rate": 7.665982473963108e-07, "loss": 0.4403, "step": 19639 }, { "epoch": 0.34139303655547637, "grad_norm": 2.4635089600753086, "learning_rate": 7.665744328762018e-07, "loss": 0.4401, "step": 19640 }, { "epoch": 0.3414104190929792, "grad_norm": 1.5633493017225193, "learning_rate": 7.665506175111744e-07, "loss": 0.1906, "step": 19641 }, { "epoch": 0.34142780163048203, "grad_norm": 1.280895615949621, "learning_rate": 7.665268013013038e-07, "loss": 0.1495, "step": 19642 }, { "epoch": 0.34144518416798486, "grad_norm": 1.4547239429142864, "learning_rate": 7.665029842466657e-07, "loss": 0.2059, "step": 19643 }, { "epoch": 0.3414625667054877, "grad_norm": 2.061594274964614, "learning_rate": 7.664791663473358e-07, "loss": 0.377, "step": 19644 }, { "epoch": 0.34147994924299047, "grad_norm": 5.38502573323209, "learning_rate": 7.66455347603389e-07, "loss": 0.2276, "step": 19645 }, { "epoch": 0.3414973317804933, "grad_norm": 1.3945605127500134, "learning_rate": 7.664315280149014e-07, "loss": 0.2443, "step": 19646 }, { "epoch": 0.3415147143179961, "grad_norm": 2.0098963340115223, "learning_rate": 7.664077075819481e-07, "loss": 0.2657, "step": 19647 }, { "epoch": 0.34153209685549896, "grad_norm": 1.2314839385443312, "learning_rate": 7.663838863046048e-07, "loss": 0.2575, "step": 19648 }, { "epoch": 0.3415494793930018, "grad_norm": 1.0957062457090292, "learning_rate": 7.66360064182947e-07, "loss": 0.2547, "step": 19649 }, { "epoch": 0.3415668619305046, "grad_norm": 1.6234839130443628, "learning_rate": 7.663362412170501e-07, "loss": 0.3092, "step": 19650 }, { "epoch": 0.34158424446800745, "grad_norm": 1.4902717051165302, "learning_rate": 7.663124174069897e-07, "loss": 0.3286, "step": 19651 }, { "epoch": 0.3416016270055103, "grad_norm": 1.8775599142188377, "learning_rate": 7.662885927528412e-07, "loss": 0.3532, "step": 19652 }, { "epoch": 0.3416190095430131, "grad_norm": 1.8328785615280154, "learning_rate": 7.662647672546801e-07, "loss": 0.3047, "step": 19653 }, { "epoch": 0.34163639208051594, "grad_norm": 1.8661780757195203, "learning_rate": 7.662409409125822e-07, "loss": 0.2862, "step": 19654 }, { "epoch": 0.3416537746180187, "grad_norm": 2.9914532310193933, "learning_rate": 7.662171137266227e-07, "loss": 0.4648, "step": 19655 }, { "epoch": 0.34167115715552154, "grad_norm": 1.9189314869603642, "learning_rate": 7.661932856968774e-07, "loss": 0.2518, "step": 19656 }, { "epoch": 0.3416885396930244, "grad_norm": 2.7150611495670485, "learning_rate": 7.661694568234214e-07, "loss": 0.4856, "step": 19657 }, { "epoch": 0.3417059222305272, "grad_norm": 1.5354122436232667, "learning_rate": 7.661456271063307e-07, "loss": 0.3013, "step": 19658 }, { "epoch": 0.34172330476803003, "grad_norm": 1.8889109046587589, "learning_rate": 7.661217965456806e-07, "loss": 0.3424, "step": 19659 }, { "epoch": 0.34174068730553286, "grad_norm": 1.4876369506365268, "learning_rate": 7.660979651415465e-07, "loss": 0.2736, "step": 19660 }, { "epoch": 0.3417580698430357, "grad_norm": 1.8664744579362953, "learning_rate": 7.660741328940041e-07, "loss": 0.4985, "step": 19661 }, { "epoch": 0.3417754523805385, "grad_norm": 2.5283769768618427, "learning_rate": 7.660502998031291e-07, "loss": 0.3946, "step": 19662 }, { "epoch": 0.34179283491804135, "grad_norm": 2.020968880285199, "learning_rate": 7.660264658689967e-07, "loss": 0.3164, "step": 19663 }, { "epoch": 0.3418102174555442, "grad_norm": 1.5500865608065377, "learning_rate": 7.660026310916827e-07, "loss": 0.2481, "step": 19664 }, { "epoch": 0.34182759999304696, "grad_norm": 3.488837329441912, "learning_rate": 7.659787954712626e-07, "loss": 0.7256, "step": 19665 }, { "epoch": 0.3418449825305498, "grad_norm": 1.5106518223446528, "learning_rate": 7.659549590078118e-07, "loss": 0.4948, "step": 19666 }, { "epoch": 0.3418623650680526, "grad_norm": 1.8671173832639403, "learning_rate": 7.659311217014059e-07, "loss": 0.3632, "step": 19667 }, { "epoch": 0.34187974760555545, "grad_norm": 1.2722453195198633, "learning_rate": 7.659072835521205e-07, "loss": 0.4021, "step": 19668 }, { "epoch": 0.3418971301430583, "grad_norm": 2.041768456223988, "learning_rate": 7.658834445600314e-07, "loss": 0.3977, "step": 19669 }, { "epoch": 0.3419145126805611, "grad_norm": 2.5044280644939363, "learning_rate": 7.658596047252137e-07, "loss": 0.3116, "step": 19670 }, { "epoch": 0.34193189521806394, "grad_norm": 1.568661392935703, "learning_rate": 7.658357640477433e-07, "loss": 0.4665, "step": 19671 }, { "epoch": 0.34194927775556677, "grad_norm": 1.8168926720484655, "learning_rate": 7.658119225276956e-07, "loss": 0.3068, "step": 19672 }, { "epoch": 0.3419666602930696, "grad_norm": 1.9148695843482357, "learning_rate": 7.657880801651462e-07, "loss": 0.2441, "step": 19673 }, { "epoch": 0.34198404283057243, "grad_norm": 2.7574268983018593, "learning_rate": 7.657642369601704e-07, "loss": 0.33, "step": 19674 }, { "epoch": 0.3420014253680752, "grad_norm": 2.635533907299186, "learning_rate": 7.657403929128445e-07, "loss": 0.268, "step": 19675 }, { "epoch": 0.34201880790557804, "grad_norm": 1.9812375446429273, "learning_rate": 7.657165480232434e-07, "loss": 0.3001, "step": 19676 }, { "epoch": 0.34203619044308087, "grad_norm": 2.0465055475951077, "learning_rate": 7.65692702291443e-07, "loss": 0.227, "step": 19677 }, { "epoch": 0.3420535729805837, "grad_norm": 2.6688376077457874, "learning_rate": 7.656688557175186e-07, "loss": 0.4412, "step": 19678 }, { "epoch": 0.34207095551808653, "grad_norm": 2.485145243228997, "learning_rate": 7.656450083015461e-07, "loss": 0.1956, "step": 19679 }, { "epoch": 0.34208833805558936, "grad_norm": 1.960834215755008, "learning_rate": 7.656211600436009e-07, "loss": 0.3471, "step": 19680 }, { "epoch": 0.3421057205930922, "grad_norm": 1.362524976736706, "learning_rate": 7.655973109437588e-07, "loss": 0.3139, "step": 19681 }, { "epoch": 0.342123103130595, "grad_norm": 1.645995369926662, "learning_rate": 7.65573461002095e-07, "loss": 0.2814, "step": 19682 }, { "epoch": 0.34214048566809785, "grad_norm": 2.2803237493963016, "learning_rate": 7.655496102186855e-07, "loss": 0.3082, "step": 19683 }, { "epoch": 0.3421578682056007, "grad_norm": 1.6439661861827406, "learning_rate": 7.655257585936057e-07, "loss": 0.2867, "step": 19684 }, { "epoch": 0.34217525074310345, "grad_norm": 1.2112599174022285, "learning_rate": 7.655019061269311e-07, "loss": 0.514, "step": 19685 }, { "epoch": 0.3421926332806063, "grad_norm": 1.0463710293320954, "learning_rate": 7.654780528187374e-07, "loss": 0.2104, "step": 19686 }, { "epoch": 0.3422100158181091, "grad_norm": 1.6877609411784762, "learning_rate": 7.654541986691003e-07, "loss": 0.2084, "step": 19687 }, { "epoch": 0.34222739835561194, "grad_norm": 1.99698006579131, "learning_rate": 7.654303436780956e-07, "loss": 0.2425, "step": 19688 }, { "epoch": 0.3422447808931148, "grad_norm": 2.6907756841639596, "learning_rate": 7.654064878457982e-07, "loss": 0.2259, "step": 19689 }, { "epoch": 0.3422621634306176, "grad_norm": 1.577149381562771, "learning_rate": 7.653826311722844e-07, "loss": 0.2916, "step": 19690 }, { "epoch": 0.34227954596812044, "grad_norm": 2.063516145449584, "learning_rate": 7.653587736576295e-07, "loss": 0.1848, "step": 19691 }, { "epoch": 0.34229692850562327, "grad_norm": 1.5425465862363918, "learning_rate": 7.653349153019091e-07, "loss": 0.4089, "step": 19692 }, { "epoch": 0.3423143110431261, "grad_norm": 1.281796442805186, "learning_rate": 7.653110561051989e-07, "loss": 0.2592, "step": 19693 }, { "epoch": 0.3423316935806289, "grad_norm": 1.634446057484223, "learning_rate": 7.652871960675747e-07, "loss": 0.3613, "step": 19694 }, { "epoch": 0.3423490761181317, "grad_norm": 1.7885708336444177, "learning_rate": 7.65263335189112e-07, "loss": 0.2346, "step": 19695 }, { "epoch": 0.34236645865563453, "grad_norm": 1.8902820714089965, "learning_rate": 7.652394734698862e-07, "loss": 0.2932, "step": 19696 }, { "epoch": 0.34238384119313736, "grad_norm": 1.1667960570646991, "learning_rate": 7.65215610909973e-07, "loss": 0.2032, "step": 19697 }, { "epoch": 0.3424012237306402, "grad_norm": 1.753623596311469, "learning_rate": 7.651917475094485e-07, "loss": 0.3594, "step": 19698 }, { "epoch": 0.342418606268143, "grad_norm": 2.043519922114633, "learning_rate": 7.651678832683876e-07, "loss": 0.2819, "step": 19699 }, { "epoch": 0.34243598880564585, "grad_norm": 1.4061069595905957, "learning_rate": 7.651440181868666e-07, "loss": 0.3809, "step": 19700 }, { "epoch": 0.3424533713431487, "grad_norm": 2.3916197097677006, "learning_rate": 7.651201522649607e-07, "loss": 0.1922, "step": 19701 }, { "epoch": 0.3424707538806515, "grad_norm": 1.84664556213199, "learning_rate": 7.650962855027459e-07, "loss": 0.3267, "step": 19702 }, { "epoch": 0.34248813641815434, "grad_norm": 1.2007100622866302, "learning_rate": 7.650724179002974e-07, "loss": 0.3482, "step": 19703 }, { "epoch": 0.3425055189556572, "grad_norm": 1.0719419460154769, "learning_rate": 7.650485494576913e-07, "loss": 0.2694, "step": 19704 }, { "epoch": 0.34252290149315995, "grad_norm": 2.1205019812119765, "learning_rate": 7.650246801750029e-07, "loss": 0.2849, "step": 19705 }, { "epoch": 0.3425402840306628, "grad_norm": 1.7996004783501067, "learning_rate": 7.650008100523079e-07, "loss": 0.4034, "step": 19706 }, { "epoch": 0.3425576665681656, "grad_norm": 1.6475797323413321, "learning_rate": 7.649769390896823e-07, "loss": 0.3862, "step": 19707 }, { "epoch": 0.34257504910566844, "grad_norm": 1.6633496510499872, "learning_rate": 7.649530672872015e-07, "loss": 0.436, "step": 19708 }, { "epoch": 0.34259243164317127, "grad_norm": 4.001411094453573, "learning_rate": 7.649291946449412e-07, "loss": 0.4209, "step": 19709 }, { "epoch": 0.3426098141806741, "grad_norm": 3.8246815293194416, "learning_rate": 7.649053211629769e-07, "loss": 0.4421, "step": 19710 }, { "epoch": 0.34262719671817693, "grad_norm": 1.8790930987484438, "learning_rate": 7.648814468413845e-07, "loss": 0.4481, "step": 19711 }, { "epoch": 0.34264457925567976, "grad_norm": 3.4178420307991186, "learning_rate": 7.648575716802396e-07, "loss": 0.4173, "step": 19712 }, { "epoch": 0.3426619617931826, "grad_norm": 2.8220275121514184, "learning_rate": 7.648336956796178e-07, "loss": 0.3378, "step": 19713 }, { "epoch": 0.3426793443306854, "grad_norm": 1.9053353574243435, "learning_rate": 7.64809818839595e-07, "loss": 0.3822, "step": 19714 }, { "epoch": 0.3426967268681882, "grad_norm": 0.8096370183682065, "learning_rate": 7.647859411602467e-07, "loss": 0.2174, "step": 19715 }, { "epoch": 0.342714109405691, "grad_norm": 3.1798096096351434, "learning_rate": 7.647620626416483e-07, "loss": 0.3281, "step": 19716 }, { "epoch": 0.34273149194319386, "grad_norm": 1.2343469585824463, "learning_rate": 7.647381832838762e-07, "loss": 0.3001, "step": 19717 }, { "epoch": 0.3427488744806967, "grad_norm": 2.012118909987736, "learning_rate": 7.647143030870054e-07, "loss": 0.3672, "step": 19718 }, { "epoch": 0.3427662570181995, "grad_norm": 1.6490873166212767, "learning_rate": 7.646904220511119e-07, "loss": 0.4267, "step": 19719 }, { "epoch": 0.34278363955570235, "grad_norm": 3.2710088254188423, "learning_rate": 7.646665401762714e-07, "loss": 0.3942, "step": 19720 }, { "epoch": 0.3428010220932052, "grad_norm": 3.294031611693121, "learning_rate": 7.646426574625595e-07, "loss": 0.2908, "step": 19721 }, { "epoch": 0.342818404630708, "grad_norm": 1.8262024257930063, "learning_rate": 7.646187739100521e-07, "loss": 0.2943, "step": 19722 }, { "epoch": 0.34283578716821084, "grad_norm": 2.068162141717532, "learning_rate": 7.645948895188246e-07, "loss": 0.2711, "step": 19723 }, { "epoch": 0.34285316970571367, "grad_norm": 2.1770400367270124, "learning_rate": 7.645710042889528e-07, "loss": 0.2751, "step": 19724 }, { "epoch": 0.34287055224321644, "grad_norm": 1.5532685469695198, "learning_rate": 7.645471182205125e-07, "loss": 0.2485, "step": 19725 }, { "epoch": 0.3428879347807193, "grad_norm": 4.229682301957043, "learning_rate": 7.645232313135795e-07, "loss": 0.3121, "step": 19726 }, { "epoch": 0.3429053173182221, "grad_norm": 1.093744973815309, "learning_rate": 7.644993435682292e-07, "loss": 0.268, "step": 19727 }, { "epoch": 0.34292269985572493, "grad_norm": 2.805396726889193, "learning_rate": 7.644754549845374e-07, "loss": 0.2712, "step": 19728 }, { "epoch": 0.34294008239322776, "grad_norm": 1.8274456558902088, "learning_rate": 7.6445156556258e-07, "loss": 0.4128, "step": 19729 }, { "epoch": 0.3429574649307306, "grad_norm": 1.8313815206401471, "learning_rate": 7.644276753024325e-07, "loss": 0.329, "step": 19730 }, { "epoch": 0.3429748474682334, "grad_norm": 1.8401367548883296, "learning_rate": 7.64403784204171e-07, "loss": 0.3966, "step": 19731 }, { "epoch": 0.34299223000573625, "grad_norm": 3.1480690460165706, "learning_rate": 7.643798922678708e-07, "loss": 0.2973, "step": 19732 }, { "epoch": 0.3430096125432391, "grad_norm": 2.387460214419491, "learning_rate": 7.643559994936079e-07, "loss": 0.3997, "step": 19733 }, { "epoch": 0.3430269950807419, "grad_norm": 1.9095235423860382, "learning_rate": 7.643321058814578e-07, "loss": 0.3631, "step": 19734 }, { "epoch": 0.3430443776182447, "grad_norm": 1.6868489601617735, "learning_rate": 7.643082114314963e-07, "loss": 0.2812, "step": 19735 }, { "epoch": 0.3430617601557475, "grad_norm": 1.6090914267144625, "learning_rate": 7.642843161437993e-07, "loss": 0.2722, "step": 19736 }, { "epoch": 0.34307914269325035, "grad_norm": 1.8204480763376178, "learning_rate": 7.642604200184425e-07, "loss": 0.3789, "step": 19737 }, { "epoch": 0.3430965252307532, "grad_norm": 2.3167969455109625, "learning_rate": 7.642365230555014e-07, "loss": 0.4534, "step": 19738 }, { "epoch": 0.343113907768256, "grad_norm": 2.335884500955751, "learning_rate": 7.64212625255052e-07, "loss": 0.1436, "step": 19739 }, { "epoch": 0.34313129030575884, "grad_norm": 1.262830720705692, "learning_rate": 7.641887266171698e-07, "loss": 0.3355, "step": 19740 }, { "epoch": 0.34314867284326167, "grad_norm": 4.650322022334829, "learning_rate": 7.641648271419308e-07, "loss": 0.3046, "step": 19741 }, { "epoch": 0.3431660553807645, "grad_norm": 1.6329164493645765, "learning_rate": 7.641409268294107e-07, "loss": 0.2744, "step": 19742 }, { "epoch": 0.34318343791826733, "grad_norm": 2.1765694664755766, "learning_rate": 7.64117025679685e-07, "loss": 0.2073, "step": 19743 }, { "epoch": 0.3432008204557701, "grad_norm": 1.583162380168251, "learning_rate": 7.640931236928298e-07, "loss": 0.3531, "step": 19744 }, { "epoch": 0.34321820299327294, "grad_norm": 2.086460055784697, "learning_rate": 7.640692208689208e-07, "loss": 0.7385, "step": 19745 }, { "epoch": 0.34323558553077577, "grad_norm": 3.6725478838157692, "learning_rate": 7.640453172080336e-07, "loss": 0.7539, "step": 19746 }, { "epoch": 0.3432529680682786, "grad_norm": 1.680895778247381, "learning_rate": 7.640214127102439e-07, "loss": 0.2393, "step": 19747 }, { "epoch": 0.3432703506057814, "grad_norm": 1.6651973657600208, "learning_rate": 7.639975073756278e-07, "loss": 0.3479, "step": 19748 }, { "epoch": 0.34328773314328426, "grad_norm": 2.544883932552933, "learning_rate": 7.639736012042608e-07, "loss": 0.3721, "step": 19749 }, { "epoch": 0.3433051156807871, "grad_norm": 6.48565278011771, "learning_rate": 7.639496941962186e-07, "loss": 0.4318, "step": 19750 }, { "epoch": 0.3433224982182899, "grad_norm": 1.2911308207982242, "learning_rate": 7.639257863515774e-07, "loss": 0.2069, "step": 19751 }, { "epoch": 0.34333988075579275, "grad_norm": 1.0518910088780746, "learning_rate": 7.639018776704124e-07, "loss": 0.2578, "step": 19752 }, { "epoch": 0.3433572632932956, "grad_norm": 1.1733794234833739, "learning_rate": 7.638779681527998e-07, "loss": 0.1141, "step": 19753 }, { "epoch": 0.34337464583079835, "grad_norm": 2.2832300031079225, "learning_rate": 7.638540577988153e-07, "loss": 0.5572, "step": 19754 }, { "epoch": 0.3433920283683012, "grad_norm": 3.8341792912382764, "learning_rate": 7.638301466085346e-07, "loss": 0.2983, "step": 19755 }, { "epoch": 0.343409410905804, "grad_norm": 3.1164623571898344, "learning_rate": 7.638062345820337e-07, "loss": 0.4709, "step": 19756 }, { "epoch": 0.34342679344330684, "grad_norm": 1.2203359285990596, "learning_rate": 7.63782321719388e-07, "loss": 0.3181, "step": 19757 }, { "epoch": 0.3434441759808097, "grad_norm": 2.0006001013703636, "learning_rate": 7.637584080206736e-07, "loss": 0.253, "step": 19758 }, { "epoch": 0.3434615585183125, "grad_norm": 2.2983785709366176, "learning_rate": 7.637344934859662e-07, "loss": 0.2877, "step": 19759 }, { "epoch": 0.34347894105581533, "grad_norm": 1.333631166531939, "learning_rate": 7.637105781153415e-07, "loss": 0.3144, "step": 19760 }, { "epoch": 0.34349632359331816, "grad_norm": 2.9769580804082043, "learning_rate": 7.636866619088756e-07, "loss": 0.4678, "step": 19761 }, { "epoch": 0.343513706130821, "grad_norm": 2.701501081781166, "learning_rate": 7.636627448666439e-07, "loss": 0.5377, "step": 19762 }, { "epoch": 0.3435310886683238, "grad_norm": 2.3321728842006606, "learning_rate": 7.636388269887225e-07, "loss": 0.3008, "step": 19763 }, { "epoch": 0.3435484712058266, "grad_norm": 1.7450595508691942, "learning_rate": 7.636149082751872e-07, "loss": 0.3858, "step": 19764 }, { "epoch": 0.34356585374332943, "grad_norm": 2.0785608849853645, "learning_rate": 7.635909887261136e-07, "loss": 0.4527, "step": 19765 }, { "epoch": 0.34358323628083226, "grad_norm": 1.3895539480689398, "learning_rate": 7.635670683415778e-07, "loss": 0.4538, "step": 19766 }, { "epoch": 0.3436006188183351, "grad_norm": 1.7682270626825838, "learning_rate": 7.635431471216553e-07, "loss": 0.3949, "step": 19767 }, { "epoch": 0.3436180013558379, "grad_norm": 1.357780556088812, "learning_rate": 7.635192250664221e-07, "loss": 0.3111, "step": 19768 }, { "epoch": 0.34363538389334075, "grad_norm": 2.77889246993019, "learning_rate": 7.634953021759542e-07, "loss": 0.3987, "step": 19769 }, { "epoch": 0.3436527664308436, "grad_norm": 1.6107585474977466, "learning_rate": 7.634713784503272e-07, "loss": 0.3713, "step": 19770 }, { "epoch": 0.3436701489683464, "grad_norm": 2.2744152969263074, "learning_rate": 7.634474538896169e-07, "loss": 0.4088, "step": 19771 }, { "epoch": 0.34368753150584924, "grad_norm": 1.6455848169210818, "learning_rate": 7.634235284938991e-07, "loss": 0.36, "step": 19772 }, { "epoch": 0.34370491404335207, "grad_norm": 2.591983248782257, "learning_rate": 7.633996022632498e-07, "loss": 0.2787, "step": 19773 }, { "epoch": 0.34372229658085485, "grad_norm": 3.293616958723001, "learning_rate": 7.633756751977447e-07, "loss": 0.6524, "step": 19774 }, { "epoch": 0.3437396791183577, "grad_norm": 1.3428466193775577, "learning_rate": 7.633517472974598e-07, "loss": 0.3862, "step": 19775 }, { "epoch": 0.3437570616558605, "grad_norm": 1.8999791012708667, "learning_rate": 7.633278185624707e-07, "loss": 0.437, "step": 19776 }, { "epoch": 0.34377444419336334, "grad_norm": 0.9564072088591771, "learning_rate": 7.633038889928536e-07, "loss": 0.2394, "step": 19777 }, { "epoch": 0.34379182673086617, "grad_norm": 2.0890067975976043, "learning_rate": 7.632799585886839e-07, "loss": 0.2862, "step": 19778 }, { "epoch": 0.343809209268369, "grad_norm": 1.5538972597588043, "learning_rate": 7.632560273500376e-07, "loss": 0.4136, "step": 19779 }, { "epoch": 0.34382659180587183, "grad_norm": 1.7713080762805837, "learning_rate": 7.632320952769908e-07, "loss": 0.3588, "step": 19780 }, { "epoch": 0.34384397434337466, "grad_norm": 1.509948251354754, "learning_rate": 7.632081623696193e-07, "loss": 0.272, "step": 19781 }, { "epoch": 0.3438613568808775, "grad_norm": 1.183628396942966, "learning_rate": 7.631842286279986e-07, "loss": 0.246, "step": 19782 }, { "epoch": 0.3438787394183803, "grad_norm": 1.4182112454950309, "learning_rate": 7.631602940522051e-07, "loss": 0.4648, "step": 19783 }, { "epoch": 0.3438961219558831, "grad_norm": 1.0164035035442, "learning_rate": 7.631363586423139e-07, "loss": 0.1898, "step": 19784 }, { "epoch": 0.3439135044933859, "grad_norm": 1.9098580997865728, "learning_rate": 7.631124223984015e-07, "loss": 0.6734, "step": 19785 }, { "epoch": 0.34393088703088875, "grad_norm": 1.7037211914784502, "learning_rate": 7.630884853205437e-07, "loss": 0.3733, "step": 19786 }, { "epoch": 0.3439482695683916, "grad_norm": 1.5036318068530332, "learning_rate": 7.630645474088162e-07, "loss": 0.2041, "step": 19787 }, { "epoch": 0.3439656521058944, "grad_norm": 1.3793412032008188, "learning_rate": 7.630406086632948e-07, "loss": 0.2816, "step": 19788 }, { "epoch": 0.34398303464339725, "grad_norm": 2.563940437863127, "learning_rate": 7.630166690840558e-07, "loss": 0.2183, "step": 19789 }, { "epoch": 0.3440004171809001, "grad_norm": 2.975191288354481, "learning_rate": 7.629927286711746e-07, "loss": 0.3347, "step": 19790 }, { "epoch": 0.3440177997184029, "grad_norm": 1.6054101151127143, "learning_rate": 7.629687874247272e-07, "loss": 0.2952, "step": 19791 }, { "epoch": 0.34403518225590574, "grad_norm": 4.049762644642282, "learning_rate": 7.629448453447895e-07, "loss": 0.3933, "step": 19792 }, { "epoch": 0.34405256479340857, "grad_norm": 4.332347251671337, "learning_rate": 7.629209024314375e-07, "loss": 0.569, "step": 19793 }, { "epoch": 0.34406994733091134, "grad_norm": 3.012130821431385, "learning_rate": 7.628969586847472e-07, "loss": 0.5346, "step": 19794 }, { "epoch": 0.34408732986841417, "grad_norm": 2.7347706439752453, "learning_rate": 7.628730141047942e-07, "loss": 0.4605, "step": 19795 }, { "epoch": 0.344104712405917, "grad_norm": 0.8492756899175297, "learning_rate": 7.628490686916543e-07, "loss": 0.3362, "step": 19796 }, { "epoch": 0.34412209494341983, "grad_norm": 1.3584031671201862, "learning_rate": 7.628251224454039e-07, "loss": 0.2738, "step": 19797 }, { "epoch": 0.34413947748092266, "grad_norm": 2.666957509041285, "learning_rate": 7.628011753661183e-07, "loss": 0.3717, "step": 19798 }, { "epoch": 0.3441568600184255, "grad_norm": 2.25758456788346, "learning_rate": 7.627772274538737e-07, "loss": 0.3969, "step": 19799 }, { "epoch": 0.3441742425559283, "grad_norm": 1.2615632756337136, "learning_rate": 7.627532787087463e-07, "loss": 0.4942, "step": 19800 }, { "epoch": 0.34419162509343115, "grad_norm": 1.4483321301623209, "learning_rate": 7.627293291308114e-07, "loss": 0.4278, "step": 19801 }, { "epoch": 0.344209007630934, "grad_norm": 1.8633615884594223, "learning_rate": 7.627053787201454e-07, "loss": 0.3383, "step": 19802 }, { "epoch": 0.3442263901684368, "grad_norm": 2.198716932059298, "learning_rate": 7.62681427476824e-07, "loss": 0.3167, "step": 19803 }, { "epoch": 0.3442437727059396, "grad_norm": 1.9594490517693224, "learning_rate": 7.626574754009231e-07, "loss": 0.2794, "step": 19804 }, { "epoch": 0.3442611552434424, "grad_norm": 1.9333454946114226, "learning_rate": 7.626335224925187e-07, "loss": 0.4641, "step": 19805 }, { "epoch": 0.34427853778094525, "grad_norm": 1.925519602224282, "learning_rate": 7.626095687516867e-07, "loss": 0.2442, "step": 19806 }, { "epoch": 0.3442959203184481, "grad_norm": 1.1548656795274013, "learning_rate": 7.625856141785028e-07, "loss": 0.416, "step": 19807 }, { "epoch": 0.3443133028559509, "grad_norm": 1.5572036294657368, "learning_rate": 7.625616587730435e-07, "loss": 0.4957, "step": 19808 }, { "epoch": 0.34433068539345374, "grad_norm": 1.8304670724190693, "learning_rate": 7.625377025353839e-07, "loss": 0.2095, "step": 19809 }, { "epoch": 0.34434806793095657, "grad_norm": 2.2835883812010254, "learning_rate": 7.625137454656007e-07, "loss": 0.3981, "step": 19810 }, { "epoch": 0.3443654504684594, "grad_norm": 1.7464830640650746, "learning_rate": 7.624897875637695e-07, "loss": 0.2776, "step": 19811 }, { "epoch": 0.34438283300596223, "grad_norm": 1.1974027038656416, "learning_rate": 7.624658288299661e-07, "loss": 0.3903, "step": 19812 }, { "epoch": 0.34440021554346506, "grad_norm": 2.038328344102874, "learning_rate": 7.624418692642666e-07, "loss": 0.4157, "step": 19813 }, { "epoch": 0.34441759808096784, "grad_norm": 1.76438552677966, "learning_rate": 7.624179088667471e-07, "loss": 0.2663, "step": 19814 }, { "epoch": 0.34443498061847067, "grad_norm": 2.8876303930555567, "learning_rate": 7.623939476374832e-07, "loss": 0.4203, "step": 19815 }, { "epoch": 0.3444523631559735, "grad_norm": 1.333343619871712, "learning_rate": 7.62369985576551e-07, "loss": 0.3165, "step": 19816 }, { "epoch": 0.3444697456934763, "grad_norm": 1.5451938864545356, "learning_rate": 7.623460226840266e-07, "loss": 0.2889, "step": 19817 }, { "epoch": 0.34448712823097916, "grad_norm": 1.7049867789233262, "learning_rate": 7.623220589599859e-07, "loss": 0.3296, "step": 19818 }, { "epoch": 0.344504510768482, "grad_norm": 1.169838159395109, "learning_rate": 7.622980944045046e-07, "loss": 0.2869, "step": 19819 }, { "epoch": 0.3445218933059848, "grad_norm": 1.6066451634104684, "learning_rate": 7.622741290176589e-07, "loss": 0.2923, "step": 19820 }, { "epoch": 0.34453927584348765, "grad_norm": 1.6391811880949434, "learning_rate": 7.622501627995246e-07, "loss": 0.362, "step": 19821 }, { "epoch": 0.3445566583809905, "grad_norm": 1.6378214341322213, "learning_rate": 7.622261957501778e-07, "loss": 0.2445, "step": 19822 }, { "epoch": 0.3445740409184933, "grad_norm": 1.2875120803651667, "learning_rate": 7.622022278696945e-07, "loss": 0.394, "step": 19823 }, { "epoch": 0.3445914234559961, "grad_norm": 7.736980022267987, "learning_rate": 7.621782591581505e-07, "loss": 0.8638, "step": 19824 }, { "epoch": 0.3446088059934989, "grad_norm": 1.4768589729969928, "learning_rate": 7.621542896156219e-07, "loss": 0.328, "step": 19825 }, { "epoch": 0.34462618853100174, "grad_norm": 1.8928784684930922, "learning_rate": 7.621303192421847e-07, "loss": 0.427, "step": 19826 }, { "epoch": 0.3446435710685046, "grad_norm": 2.2923562659443837, "learning_rate": 7.621063480379146e-07, "loss": 0.202, "step": 19827 }, { "epoch": 0.3446609536060074, "grad_norm": 1.7373646575654227, "learning_rate": 7.620823760028878e-07, "loss": 0.3351, "step": 19828 }, { "epoch": 0.34467833614351023, "grad_norm": 1.9093493314574872, "learning_rate": 7.620584031371804e-07, "loss": 0.3472, "step": 19829 }, { "epoch": 0.34469571868101306, "grad_norm": 2.7131839016964863, "learning_rate": 7.620344294408683e-07, "loss": 0.3438, "step": 19830 }, { "epoch": 0.3447131012185159, "grad_norm": 2.0327902452972673, "learning_rate": 7.620104549140272e-07, "loss": 0.5004, "step": 19831 }, { "epoch": 0.3447304837560187, "grad_norm": 1.3335877290686615, "learning_rate": 7.619864795567334e-07, "loss": 0.2152, "step": 19832 }, { "epoch": 0.34474786629352155, "grad_norm": 1.537277279429699, "learning_rate": 7.619625033690628e-07, "loss": 0.3394, "step": 19833 }, { "epoch": 0.34476524883102433, "grad_norm": 2.688618538903499, "learning_rate": 7.619385263510915e-07, "loss": 0.2093, "step": 19834 }, { "epoch": 0.34478263136852716, "grad_norm": 1.9533546712027547, "learning_rate": 7.619145485028952e-07, "loss": 0.2206, "step": 19835 }, { "epoch": 0.34480001390603, "grad_norm": 1.9831112901677483, "learning_rate": 7.618905698245503e-07, "loss": 0.2169, "step": 19836 }, { "epoch": 0.3448173964435328, "grad_norm": 1.587635002951052, "learning_rate": 7.618665903161325e-07, "loss": 0.3546, "step": 19837 }, { "epoch": 0.34483477898103565, "grad_norm": 1.360487361499169, "learning_rate": 7.61842609977718e-07, "loss": 0.3297, "step": 19838 }, { "epoch": 0.3448521615185385, "grad_norm": 1.3181992558181503, "learning_rate": 7.618186288093826e-07, "loss": 0.5218, "step": 19839 }, { "epoch": 0.3448695440560413, "grad_norm": 8.149957509497424, "learning_rate": 7.617946468112024e-07, "loss": 0.5165, "step": 19840 }, { "epoch": 0.34488692659354414, "grad_norm": 1.6564954285255806, "learning_rate": 7.617706639832535e-07, "loss": 0.2686, "step": 19841 }, { "epoch": 0.34490430913104697, "grad_norm": 1.4687180256929828, "learning_rate": 7.617466803256118e-07, "loss": 0.2477, "step": 19842 }, { "epoch": 0.3449216916685498, "grad_norm": 2.802514973383736, "learning_rate": 7.617226958383535e-07, "loss": 0.5551, "step": 19843 }, { "epoch": 0.3449390742060526, "grad_norm": 1.4737381326480559, "learning_rate": 7.616987105215542e-07, "loss": 0.2397, "step": 19844 }, { "epoch": 0.3449564567435554, "grad_norm": 3.263665432742347, "learning_rate": 7.616747243752905e-07, "loss": 0.2694, "step": 19845 }, { "epoch": 0.34497383928105824, "grad_norm": 1.577308095526832, "learning_rate": 7.61650737399638e-07, "loss": 0.3605, "step": 19846 }, { "epoch": 0.34499122181856107, "grad_norm": 2.615656264240559, "learning_rate": 7.616267495946729e-07, "loss": 0.3294, "step": 19847 }, { "epoch": 0.3450086043560639, "grad_norm": 1.6525905346485483, "learning_rate": 7.61602760960471e-07, "loss": 0.3341, "step": 19848 }, { "epoch": 0.3450259868935667, "grad_norm": 1.9322637710148312, "learning_rate": 7.615787714971087e-07, "loss": 0.3285, "step": 19849 }, { "epoch": 0.34504336943106956, "grad_norm": 1.277228332311835, "learning_rate": 7.615547812046617e-07, "loss": 0.3933, "step": 19850 }, { "epoch": 0.3450607519685724, "grad_norm": 1.2060746113043574, "learning_rate": 7.615307900832063e-07, "loss": 0.2678, "step": 19851 }, { "epoch": 0.3450781345060752, "grad_norm": 0.9090493231445553, "learning_rate": 7.615067981328185e-07, "loss": 0.2572, "step": 19852 }, { "epoch": 0.34509551704357805, "grad_norm": 1.636531770291304, "learning_rate": 7.61482805353574e-07, "loss": 0.3737, "step": 19853 }, { "epoch": 0.3451128995810808, "grad_norm": 2.1392228292414757, "learning_rate": 7.614588117455494e-07, "loss": 0.29, "step": 19854 }, { "epoch": 0.34513028211858365, "grad_norm": 2.29338794207899, "learning_rate": 7.614348173088204e-07, "loss": 0.3668, "step": 19855 }, { "epoch": 0.3451476646560865, "grad_norm": 1.4220946558220504, "learning_rate": 7.614108220434632e-07, "loss": 0.2167, "step": 19856 }, { "epoch": 0.3451650471935893, "grad_norm": 3.1947496956326775, "learning_rate": 7.613868259495536e-07, "loss": 0.21, "step": 19857 }, { "epoch": 0.34518242973109214, "grad_norm": 3.0772756917816566, "learning_rate": 7.613628290271678e-07, "loss": 0.3923, "step": 19858 }, { "epoch": 0.345199812268595, "grad_norm": 2.1009934315983876, "learning_rate": 7.613388312763823e-07, "loss": 0.2858, "step": 19859 }, { "epoch": 0.3452171948060978, "grad_norm": 2.287116480527283, "learning_rate": 7.613148326972724e-07, "loss": 0.3002, "step": 19860 }, { "epoch": 0.34523457734360063, "grad_norm": 2.2690700118436613, "learning_rate": 7.612908332899144e-07, "loss": 0.3133, "step": 19861 }, { "epoch": 0.34525195988110347, "grad_norm": 1.1964490659926976, "learning_rate": 7.612668330543848e-07, "loss": 0.3001, "step": 19862 }, { "epoch": 0.3452693424186063, "grad_norm": 1.8386730527876598, "learning_rate": 7.612428319907592e-07, "loss": 0.3609, "step": 19863 }, { "epoch": 0.34528672495610907, "grad_norm": 1.591167202552087, "learning_rate": 7.612188300991138e-07, "loss": 0.3105, "step": 19864 }, { "epoch": 0.3453041074936119, "grad_norm": 2.8917781228364907, "learning_rate": 7.611948273795247e-07, "loss": 0.3191, "step": 19865 }, { "epoch": 0.34532149003111473, "grad_norm": 0.9536029815125328, "learning_rate": 7.611708238320683e-07, "loss": 0.2082, "step": 19866 }, { "epoch": 0.34533887256861756, "grad_norm": 1.0069694005272583, "learning_rate": 7.611468194568199e-07, "loss": 0.2076, "step": 19867 }, { "epoch": 0.3453562551061204, "grad_norm": 1.7503974251236643, "learning_rate": 7.611228142538564e-07, "loss": 0.2511, "step": 19868 }, { "epoch": 0.3453736376436232, "grad_norm": 2.238499710170119, "learning_rate": 7.610988082232532e-07, "loss": 0.4056, "step": 19869 }, { "epoch": 0.34539102018112605, "grad_norm": 1.6657105457899286, "learning_rate": 7.610748013650869e-07, "loss": 0.3789, "step": 19870 }, { "epoch": 0.3454084027186289, "grad_norm": 1.523143587972829, "learning_rate": 7.610507936794335e-07, "loss": 0.2251, "step": 19871 }, { "epoch": 0.3454257852561317, "grad_norm": 2.25684106995953, "learning_rate": 7.61026785166369e-07, "loss": 0.3591, "step": 19872 }, { "epoch": 0.34544316779363454, "grad_norm": 0.9712135199153733, "learning_rate": 7.610027758259693e-07, "loss": 0.2824, "step": 19873 }, { "epoch": 0.3454605503311373, "grad_norm": 1.9704032892461247, "learning_rate": 7.609787656583108e-07, "loss": 0.4075, "step": 19874 }, { "epoch": 0.34547793286864015, "grad_norm": 1.681260782771081, "learning_rate": 7.609547546634695e-07, "loss": 0.2446, "step": 19875 }, { "epoch": 0.345495315406143, "grad_norm": 1.7331351834222348, "learning_rate": 7.609307428415214e-07, "loss": 0.2583, "step": 19876 }, { "epoch": 0.3455126979436458, "grad_norm": 2.4226588848590165, "learning_rate": 7.609067301925428e-07, "loss": 0.3599, "step": 19877 }, { "epoch": 0.34553008048114864, "grad_norm": 1.3908484779962547, "learning_rate": 7.608827167166099e-07, "loss": 0.4089, "step": 19878 }, { "epoch": 0.34554746301865147, "grad_norm": 1.4308005518631204, "learning_rate": 7.608587024137983e-07, "loss": 0.2992, "step": 19879 }, { "epoch": 0.3455648455561543, "grad_norm": 2.3176343678717175, "learning_rate": 7.608346872841847e-07, "loss": 0.2895, "step": 19880 }, { "epoch": 0.34558222809365713, "grad_norm": 0.924471775092277, "learning_rate": 7.608106713278447e-07, "loss": 0.195, "step": 19881 }, { "epoch": 0.34559961063115996, "grad_norm": 1.5647203158214962, "learning_rate": 7.60786654544855e-07, "loss": 0.3852, "step": 19882 }, { "epoch": 0.34561699316866273, "grad_norm": 1.8861047703049567, "learning_rate": 7.607626369352911e-07, "loss": 0.2939, "step": 19883 }, { "epoch": 0.34563437570616556, "grad_norm": 1.3144963535587277, "learning_rate": 7.607386184992295e-07, "loss": 0.1809, "step": 19884 }, { "epoch": 0.3456517582436684, "grad_norm": 1.5054393433699391, "learning_rate": 7.607145992367463e-07, "loss": 0.2588, "step": 19885 }, { "epoch": 0.3456691407811712, "grad_norm": 1.3305907077110917, "learning_rate": 7.606905791479176e-07, "loss": 0.469, "step": 19886 }, { "epoch": 0.34568652331867405, "grad_norm": 2.1313519616752887, "learning_rate": 7.606665582328196e-07, "loss": 0.2937, "step": 19887 }, { "epoch": 0.3457039058561769, "grad_norm": 1.036326618164662, "learning_rate": 7.606425364915281e-07, "loss": 0.3689, "step": 19888 }, { "epoch": 0.3457212883936797, "grad_norm": 2.3832661862700095, "learning_rate": 7.606185139241197e-07, "loss": 0.3106, "step": 19889 }, { "epoch": 0.34573867093118255, "grad_norm": 2.268810472895139, "learning_rate": 7.605944905306703e-07, "loss": 0.365, "step": 19890 }, { "epoch": 0.3457560534686854, "grad_norm": 1.1407883288387481, "learning_rate": 7.60570466311256e-07, "loss": 0.4263, "step": 19891 }, { "epoch": 0.3457734360061882, "grad_norm": 1.4129730321424676, "learning_rate": 7.605464412659531e-07, "loss": 0.2792, "step": 19892 }, { "epoch": 0.345790818543691, "grad_norm": 1.5095872257312357, "learning_rate": 7.605224153948375e-07, "loss": 0.3329, "step": 19893 }, { "epoch": 0.3458082010811938, "grad_norm": 1.811092402567197, "learning_rate": 7.604983886979857e-07, "loss": 0.6945, "step": 19894 }, { "epoch": 0.34582558361869664, "grad_norm": 2.202434013356376, "learning_rate": 7.604743611754736e-07, "loss": 0.2645, "step": 19895 }, { "epoch": 0.34584296615619947, "grad_norm": 1.902779580111484, "learning_rate": 7.604503328273772e-07, "loss": 0.4034, "step": 19896 }, { "epoch": 0.3458603486937023, "grad_norm": 1.004436507495115, "learning_rate": 7.604263036537734e-07, "loss": 0.2491, "step": 19897 }, { "epoch": 0.34587773123120513, "grad_norm": 1.4697817324936386, "learning_rate": 7.604022736547376e-07, "loss": 0.3869, "step": 19898 }, { "epoch": 0.34589511376870796, "grad_norm": 2.0679511069418055, "learning_rate": 7.60378242830346e-07, "loss": 0.2309, "step": 19899 }, { "epoch": 0.3459124963062108, "grad_norm": 1.4000353992667052, "learning_rate": 7.603542111806752e-07, "loss": 0.174, "step": 19900 }, { "epoch": 0.3459298788437136, "grad_norm": 2.238684764099561, "learning_rate": 7.603301787058012e-07, "loss": 0.2595, "step": 19901 }, { "epoch": 0.34594726138121645, "grad_norm": 2.2155269930855286, "learning_rate": 7.603061454057999e-07, "loss": 0.3263, "step": 19902 }, { "epoch": 0.34596464391871923, "grad_norm": 1.230752135023902, "learning_rate": 7.60282111280748e-07, "loss": 0.169, "step": 19903 }, { "epoch": 0.34598202645622206, "grad_norm": 3.0070196595758882, "learning_rate": 7.602580763307212e-07, "loss": 0.6818, "step": 19904 }, { "epoch": 0.3459994089937249, "grad_norm": 1.7427525381776214, "learning_rate": 7.602340405557957e-07, "loss": 0.3557, "step": 19905 }, { "epoch": 0.3460167915312277, "grad_norm": 1.7083304496789953, "learning_rate": 7.602100039560479e-07, "loss": 0.4266, "step": 19906 }, { "epoch": 0.34603417406873055, "grad_norm": 1.8602433372225773, "learning_rate": 7.601859665315542e-07, "loss": 0.3736, "step": 19907 }, { "epoch": 0.3460515566062334, "grad_norm": 2.807374954482196, "learning_rate": 7.601619282823901e-07, "loss": 0.2839, "step": 19908 }, { "epoch": 0.3460689391437362, "grad_norm": 2.646067026748069, "learning_rate": 7.601378892086325e-07, "loss": 0.3984, "step": 19909 }, { "epoch": 0.34608632168123904, "grad_norm": 1.8850157703538344, "learning_rate": 7.601138493103572e-07, "loss": 0.3603, "step": 19910 }, { "epoch": 0.34610370421874187, "grad_norm": 1.1843560134410656, "learning_rate": 7.600898085876403e-07, "loss": 0.2436, "step": 19911 }, { "epoch": 0.3461210867562447, "grad_norm": 1.4800793332082738, "learning_rate": 7.600657670405585e-07, "loss": 0.3407, "step": 19912 }, { "epoch": 0.3461384692937475, "grad_norm": 1.8879666699293653, "learning_rate": 7.600417246691874e-07, "loss": 0.4862, "step": 19913 }, { "epoch": 0.3461558518312503, "grad_norm": 1.45988853285, "learning_rate": 7.600176814736037e-07, "loss": 0.2502, "step": 19914 }, { "epoch": 0.34617323436875314, "grad_norm": 2.3270671174371773, "learning_rate": 7.599936374538834e-07, "loss": 0.4723, "step": 19915 }, { "epoch": 0.34619061690625597, "grad_norm": 2.1091162187169616, "learning_rate": 7.599695926101026e-07, "loss": 0.2342, "step": 19916 }, { "epoch": 0.3462079994437588, "grad_norm": 1.7396339345735343, "learning_rate": 7.599455469423375e-07, "loss": 0.3782, "step": 19917 }, { "epoch": 0.3462253819812616, "grad_norm": 3.3219764299776364, "learning_rate": 7.599215004506646e-07, "loss": 0.4906, "step": 19918 }, { "epoch": 0.34624276451876446, "grad_norm": 2.040890443380777, "learning_rate": 7.598974531351599e-07, "loss": 0.3406, "step": 19919 }, { "epoch": 0.3462601470562673, "grad_norm": 1.847473098226627, "learning_rate": 7.598734049958997e-07, "loss": 0.3804, "step": 19920 }, { "epoch": 0.3462775295937701, "grad_norm": 1.69010668406405, "learning_rate": 7.5984935603296e-07, "loss": 0.3864, "step": 19921 }, { "epoch": 0.34629491213127295, "grad_norm": 2.352437839504331, "learning_rate": 7.598253062464175e-07, "loss": 0.4768, "step": 19922 }, { "epoch": 0.3463122946687757, "grad_norm": 2.2112912035222503, "learning_rate": 7.598012556363479e-07, "loss": 0.3376, "step": 19923 }, { "epoch": 0.34632967720627855, "grad_norm": 1.9226656717473627, "learning_rate": 7.597772042028278e-07, "loss": 0.4675, "step": 19924 }, { "epoch": 0.3463470597437814, "grad_norm": 2.203377795173358, "learning_rate": 7.597531519459333e-07, "loss": 0.2887, "step": 19925 }, { "epoch": 0.3463644422812842, "grad_norm": 2.9336960234750418, "learning_rate": 7.597290988657405e-07, "loss": 0.4687, "step": 19926 }, { "epoch": 0.34638182481878704, "grad_norm": 1.5197887407976458, "learning_rate": 7.597050449623257e-07, "loss": 0.2897, "step": 19927 }, { "epoch": 0.3463992073562899, "grad_norm": 2.19833908783823, "learning_rate": 7.596809902357655e-07, "loss": 0.3391, "step": 19928 }, { "epoch": 0.3464165898937927, "grad_norm": 2.805969372744065, "learning_rate": 7.596569346861356e-07, "loss": 0.3038, "step": 19929 }, { "epoch": 0.34643397243129553, "grad_norm": 0.9648455808732438, "learning_rate": 7.596328783135127e-07, "loss": 0.27, "step": 19930 }, { "epoch": 0.34645135496879836, "grad_norm": 2.132495040233605, "learning_rate": 7.596088211179725e-07, "loss": 0.364, "step": 19931 }, { "epoch": 0.3464687375063012, "grad_norm": 1.4458669981059942, "learning_rate": 7.59584763099592e-07, "loss": 0.3683, "step": 19932 }, { "epoch": 0.34648612004380397, "grad_norm": 1.4726268878905298, "learning_rate": 7.595607042584467e-07, "loss": 0.3085, "step": 19933 }, { "epoch": 0.3465035025813068, "grad_norm": 2.4126240050033867, "learning_rate": 7.595366445946135e-07, "loss": 0.4467, "step": 19934 }, { "epoch": 0.34652088511880963, "grad_norm": 1.1769201758953889, "learning_rate": 7.595125841081681e-07, "loss": 0.1959, "step": 19935 }, { "epoch": 0.34653826765631246, "grad_norm": 1.7079560537616993, "learning_rate": 7.594885227991872e-07, "loss": 0.3756, "step": 19936 }, { "epoch": 0.3465556501938153, "grad_norm": 2.855027009395727, "learning_rate": 7.594644606677467e-07, "loss": 0.3039, "step": 19937 }, { "epoch": 0.3465730327313181, "grad_norm": 1.4842090569970652, "learning_rate": 7.594403977139232e-07, "loss": 0.3223, "step": 19938 }, { "epoch": 0.34659041526882095, "grad_norm": 2.1352870213506874, "learning_rate": 7.594163339377928e-07, "loss": 0.3776, "step": 19939 }, { "epoch": 0.3466077978063238, "grad_norm": 2.3233926119315913, "learning_rate": 7.593922693394317e-07, "loss": 0.3732, "step": 19940 }, { "epoch": 0.3466251803438266, "grad_norm": 1.5650077332524162, "learning_rate": 7.593682039189164e-07, "loss": 0.3427, "step": 19941 }, { "epoch": 0.34664256288132944, "grad_norm": 1.3319767631870507, "learning_rate": 7.59344137676323e-07, "loss": 0.2627, "step": 19942 }, { "epoch": 0.3466599454188322, "grad_norm": 2.184961231497698, "learning_rate": 7.593200706117278e-07, "loss": 0.4453, "step": 19943 }, { "epoch": 0.34667732795633505, "grad_norm": 1.954180249463255, "learning_rate": 7.59296002725207e-07, "loss": 0.3656, "step": 19944 }, { "epoch": 0.3466947104938379, "grad_norm": 2.753148886853228, "learning_rate": 7.59271934016837e-07, "loss": 0.3945, "step": 19945 }, { "epoch": 0.3467120930313407, "grad_norm": 1.731478991221578, "learning_rate": 7.592478644866941e-07, "loss": 0.3859, "step": 19946 }, { "epoch": 0.34672947556884354, "grad_norm": 2.403108763578456, "learning_rate": 7.592237941348547e-07, "loss": 0.275, "step": 19947 }, { "epoch": 0.34674685810634637, "grad_norm": 1.4375708662100655, "learning_rate": 7.591997229613948e-07, "loss": 0.1911, "step": 19948 }, { "epoch": 0.3467642406438492, "grad_norm": 2.2859367729241877, "learning_rate": 7.59175650966391e-07, "loss": 0.6971, "step": 19949 }, { "epoch": 0.34678162318135203, "grad_norm": 1.3286303269565953, "learning_rate": 7.591515781499192e-07, "loss": 0.3744, "step": 19950 }, { "epoch": 0.34679900571885486, "grad_norm": 1.4282470582299753, "learning_rate": 7.591275045120561e-07, "loss": 0.6111, "step": 19951 }, { "epoch": 0.3468163882563577, "grad_norm": 3.300713249565417, "learning_rate": 7.591034300528778e-07, "loss": 0.1977, "step": 19952 }, { "epoch": 0.34683377079386046, "grad_norm": 1.606746150775651, "learning_rate": 7.590793547724605e-07, "loss": 0.3121, "step": 19953 }, { "epoch": 0.3468511533313633, "grad_norm": 3.042916011248988, "learning_rate": 7.59055278670881e-07, "loss": 0.2741, "step": 19954 }, { "epoch": 0.3468685358688661, "grad_norm": 1.667804495108739, "learning_rate": 7.59031201748215e-07, "loss": 0.3676, "step": 19955 }, { "epoch": 0.34688591840636895, "grad_norm": 2.1511688410603775, "learning_rate": 7.59007124004539e-07, "loss": 0.4404, "step": 19956 }, { "epoch": 0.3469033009438718, "grad_norm": 1.254481137345316, "learning_rate": 7.589830454399297e-07, "loss": 0.1901, "step": 19957 }, { "epoch": 0.3469206834813746, "grad_norm": 1.6178795479687138, "learning_rate": 7.589589660544629e-07, "loss": 0.3248, "step": 19958 }, { "epoch": 0.34693806601887744, "grad_norm": 2.2584264549870277, "learning_rate": 7.589348858482153e-07, "loss": 0.2652, "step": 19959 }, { "epoch": 0.3469554485563803, "grad_norm": 1.5134757613336083, "learning_rate": 7.589108048212629e-07, "loss": 0.54, "step": 19960 }, { "epoch": 0.3469728310938831, "grad_norm": 1.4062817489402686, "learning_rate": 7.588867229736824e-07, "loss": 0.2045, "step": 19961 }, { "epoch": 0.34699021363138594, "grad_norm": 1.2714198877039515, "learning_rate": 7.588626403055496e-07, "loss": 0.2694, "step": 19962 }, { "epoch": 0.3470075961688887, "grad_norm": 1.3529001106793785, "learning_rate": 7.588385568169414e-07, "loss": 0.299, "step": 19963 }, { "epoch": 0.34702497870639154, "grad_norm": 2.526520425344351, "learning_rate": 7.588144725079337e-07, "loss": 0.3497, "step": 19964 }, { "epoch": 0.34704236124389437, "grad_norm": 2.1916567402595826, "learning_rate": 7.587903873786031e-07, "loss": 0.245, "step": 19965 }, { "epoch": 0.3470597437813972, "grad_norm": 1.4952367245709073, "learning_rate": 7.587663014290259e-07, "loss": 0.253, "step": 19966 }, { "epoch": 0.34707712631890003, "grad_norm": 1.2515410190004184, "learning_rate": 7.587422146592783e-07, "loss": 0.4249, "step": 19967 }, { "epoch": 0.34709450885640286, "grad_norm": 1.6522493071559725, "learning_rate": 7.587181270694367e-07, "loss": 0.288, "step": 19968 }, { "epoch": 0.3471118913939057, "grad_norm": 2.7105142541917515, "learning_rate": 7.586940386595776e-07, "loss": 0.336, "step": 19969 }, { "epoch": 0.3471292739314085, "grad_norm": 1.5159149634970952, "learning_rate": 7.586699494297773e-07, "loss": 0.3275, "step": 19970 }, { "epoch": 0.34714665646891135, "grad_norm": 3.714170811555087, "learning_rate": 7.586458593801118e-07, "loss": 0.52, "step": 19971 }, { "epoch": 0.3471640390064142, "grad_norm": 2.0268543198395865, "learning_rate": 7.58621768510658e-07, "loss": 0.4319, "step": 19972 }, { "epoch": 0.34718142154391696, "grad_norm": 1.6263597822965714, "learning_rate": 7.585976768214918e-07, "loss": 0.4711, "step": 19973 }, { "epoch": 0.3471988040814198, "grad_norm": 1.367514911392323, "learning_rate": 7.5857358431269e-07, "loss": 0.3074, "step": 19974 }, { "epoch": 0.3472161866189226, "grad_norm": 1.5994135613683367, "learning_rate": 7.585494909843284e-07, "loss": 0.2051, "step": 19975 }, { "epoch": 0.34723356915642545, "grad_norm": 1.923133392311644, "learning_rate": 7.585253968364838e-07, "loss": 0.2496, "step": 19976 }, { "epoch": 0.3472509516939283, "grad_norm": 1.64493774454803, "learning_rate": 7.585013018692326e-07, "loss": 0.2715, "step": 19977 }, { "epoch": 0.3472683342314311, "grad_norm": 2.1116072879091354, "learning_rate": 7.584772060826508e-07, "loss": 0.3993, "step": 19978 }, { "epoch": 0.34728571676893394, "grad_norm": 1.9966451855704315, "learning_rate": 7.584531094768151e-07, "loss": 0.4517, "step": 19979 }, { "epoch": 0.34730309930643677, "grad_norm": 1.6832547958976851, "learning_rate": 7.584290120518018e-07, "loss": 0.4714, "step": 19980 }, { "epoch": 0.3473204818439396, "grad_norm": 2.812246134788162, "learning_rate": 7.584049138076872e-07, "loss": 0.173, "step": 19981 }, { "epoch": 0.34733786438144243, "grad_norm": 1.4003809217787277, "learning_rate": 7.583808147445479e-07, "loss": 0.2764, "step": 19982 }, { "epoch": 0.3473552469189452, "grad_norm": 1.9188249673079074, "learning_rate": 7.583567148624599e-07, "loss": 0.1861, "step": 19983 }, { "epoch": 0.34737262945644803, "grad_norm": 1.7932734285208478, "learning_rate": 7.583326141614999e-07, "loss": 0.3364, "step": 19984 }, { "epoch": 0.34739001199395086, "grad_norm": 1.2656524119417643, "learning_rate": 7.58308512641744e-07, "loss": 0.2881, "step": 19985 }, { "epoch": 0.3474073945314537, "grad_norm": 1.1221454427432382, "learning_rate": 7.582844103032688e-07, "loss": 0.2881, "step": 19986 }, { "epoch": 0.3474247770689565, "grad_norm": 1.6615794264867987, "learning_rate": 7.582603071461509e-07, "loss": 0.325, "step": 19987 }, { "epoch": 0.34744215960645936, "grad_norm": 1.4513844180670474, "learning_rate": 7.582362031704664e-07, "loss": 0.2807, "step": 19988 }, { "epoch": 0.3474595421439622, "grad_norm": 2.557680221222515, "learning_rate": 7.582120983762915e-07, "loss": 0.32, "step": 19989 }, { "epoch": 0.347476924681465, "grad_norm": 1.2538140969479534, "learning_rate": 7.581879927637032e-07, "loss": 0.2137, "step": 19990 }, { "epoch": 0.34749430721896785, "grad_norm": 1.711160440671005, "learning_rate": 7.581638863327773e-07, "loss": 0.3469, "step": 19991 }, { "epoch": 0.3475116897564707, "grad_norm": 1.6121191938935642, "learning_rate": 7.581397790835906e-07, "loss": 0.3425, "step": 19992 }, { "epoch": 0.34752907229397345, "grad_norm": 1.5348719518024745, "learning_rate": 7.581156710162193e-07, "loss": 0.2545, "step": 19993 }, { "epoch": 0.3475464548314763, "grad_norm": 2.1899123689359508, "learning_rate": 7.580915621307399e-07, "loss": 0.3428, "step": 19994 }, { "epoch": 0.3475638373689791, "grad_norm": 1.9953840028201224, "learning_rate": 7.58067452427229e-07, "loss": 0.4877, "step": 19995 }, { "epoch": 0.34758121990648194, "grad_norm": 1.609909601951204, "learning_rate": 7.580433419057626e-07, "loss": 0.3167, "step": 19996 }, { "epoch": 0.34759860244398477, "grad_norm": 2.305535058415688, "learning_rate": 7.580192305664174e-07, "loss": 0.4437, "step": 19997 }, { "epoch": 0.3476159849814876, "grad_norm": 1.394196133869208, "learning_rate": 7.579951184092698e-07, "loss": 0.3601, "step": 19998 }, { "epoch": 0.34763336751899043, "grad_norm": 1.6038134889189846, "learning_rate": 7.579710054343961e-07, "loss": 0.2156, "step": 19999 }, { "epoch": 0.34765075005649326, "grad_norm": 1.772534227182582, "learning_rate": 7.57946891641873e-07, "loss": 0.2782, "step": 20000 }, { "epoch": 0.3476681325939961, "grad_norm": 3.613209555944558, "learning_rate": 7.579227770317767e-07, "loss": 0.2113, "step": 20001 }, { "epoch": 0.3476855151314989, "grad_norm": 1.776468550766881, "learning_rate": 7.578986616041835e-07, "loss": 0.1566, "step": 20002 }, { "epoch": 0.3477028976690017, "grad_norm": 1.9917384793285344, "learning_rate": 7.578745453591702e-07, "loss": 0.2117, "step": 20003 }, { "epoch": 0.34772028020650453, "grad_norm": 2.399956671139659, "learning_rate": 7.578504282968129e-07, "loss": 0.3857, "step": 20004 }, { "epoch": 0.34773766274400736, "grad_norm": 3.0492797495227864, "learning_rate": 7.578263104171883e-07, "loss": 0.3983, "step": 20005 }, { "epoch": 0.3477550452815102, "grad_norm": 1.778899258247132, "learning_rate": 7.578021917203725e-07, "loss": 0.3893, "step": 20006 }, { "epoch": 0.347772427819013, "grad_norm": 1.3458345682808543, "learning_rate": 7.577780722064426e-07, "loss": 0.2806, "step": 20007 }, { "epoch": 0.34778981035651585, "grad_norm": 1.699812415064338, "learning_rate": 7.577539518754743e-07, "loss": 0.301, "step": 20008 }, { "epoch": 0.3478071928940187, "grad_norm": 2.4866299760244854, "learning_rate": 7.577298307275445e-07, "loss": 0.2765, "step": 20009 }, { "epoch": 0.3478245754315215, "grad_norm": 1.9112988268879265, "learning_rate": 7.577057087627295e-07, "loss": 0.2927, "step": 20010 }, { "epoch": 0.34784195796902434, "grad_norm": 2.5074931828457268, "learning_rate": 7.576815859811057e-07, "loss": 0.5399, "step": 20011 }, { "epoch": 0.34785934050652717, "grad_norm": 1.8036840931959701, "learning_rate": 7.576574623827497e-07, "loss": 0.3168, "step": 20012 }, { "epoch": 0.34787672304402995, "grad_norm": 3.2953433767089253, "learning_rate": 7.57633337967738e-07, "loss": 0.2653, "step": 20013 }, { "epoch": 0.3478941055815328, "grad_norm": 1.474307179427554, "learning_rate": 7.576092127361468e-07, "loss": 0.1886, "step": 20014 }, { "epoch": 0.3479114881190356, "grad_norm": 2.148996397623551, "learning_rate": 7.575850866880529e-07, "loss": 0.3654, "step": 20015 }, { "epoch": 0.34792887065653844, "grad_norm": 1.825181679094195, "learning_rate": 7.575609598235324e-07, "loss": 0.2133, "step": 20016 }, { "epoch": 0.34794625319404127, "grad_norm": 2.5154478503314532, "learning_rate": 7.575368321426623e-07, "loss": 0.3457, "step": 20017 }, { "epoch": 0.3479636357315441, "grad_norm": 2.336920929823802, "learning_rate": 7.575127036455185e-07, "loss": 0.3279, "step": 20018 }, { "epoch": 0.3479810182690469, "grad_norm": 2.6142287952584162, "learning_rate": 7.574885743321777e-07, "loss": 0.5159, "step": 20019 }, { "epoch": 0.34799840080654976, "grad_norm": 2.746637635291162, "learning_rate": 7.574644442027165e-07, "loss": 0.2541, "step": 20020 }, { "epoch": 0.3480157833440526, "grad_norm": 2.0065346552663237, "learning_rate": 7.574403132572111e-07, "loss": 0.3413, "step": 20021 }, { "epoch": 0.34803316588155536, "grad_norm": 2.070901342366349, "learning_rate": 7.574161814957383e-07, "loss": 0.3286, "step": 20022 }, { "epoch": 0.3480505484190582, "grad_norm": 1.9424511774348965, "learning_rate": 7.573920489183746e-07, "loss": 0.2981, "step": 20023 }, { "epoch": 0.348067930956561, "grad_norm": 1.1802555406043507, "learning_rate": 7.573679155251962e-07, "loss": 0.49, "step": 20024 }, { "epoch": 0.34808531349406385, "grad_norm": 2.271515438008759, "learning_rate": 7.573437813162796e-07, "loss": 0.3702, "step": 20025 }, { "epoch": 0.3481026960315667, "grad_norm": 1.2119693749266212, "learning_rate": 7.573196462917017e-07, "loss": 0.1925, "step": 20026 }, { "epoch": 0.3481200785690695, "grad_norm": 1.4248686405588553, "learning_rate": 7.572955104515385e-07, "loss": 0.3008, "step": 20027 }, { "epoch": 0.34813746110657234, "grad_norm": 1.435805015454888, "learning_rate": 7.572713737958667e-07, "loss": 0.3826, "step": 20028 }, { "epoch": 0.3481548436440752, "grad_norm": 1.6517320552391603, "learning_rate": 7.572472363247631e-07, "loss": 0.3492, "step": 20029 }, { "epoch": 0.348172226181578, "grad_norm": 1.9135183610933173, "learning_rate": 7.572230980383036e-07, "loss": 0.2339, "step": 20030 }, { "epoch": 0.34818960871908083, "grad_norm": 1.2390259216777313, "learning_rate": 7.571989589365652e-07, "loss": 0.4247, "step": 20031 }, { "epoch": 0.3482069912565836, "grad_norm": 1.7892013475148136, "learning_rate": 7.571748190196243e-07, "loss": 0.2806, "step": 20032 }, { "epoch": 0.34822437379408644, "grad_norm": 1.3813355701643983, "learning_rate": 7.571506782875573e-07, "loss": 0.3509, "step": 20033 }, { "epoch": 0.34824175633158927, "grad_norm": 1.5993268172596136, "learning_rate": 7.571265367404408e-07, "loss": 0.3613, "step": 20034 }, { "epoch": 0.3482591388690921, "grad_norm": 1.874949862304232, "learning_rate": 7.571023943783512e-07, "loss": 0.3031, "step": 20035 }, { "epoch": 0.34827652140659493, "grad_norm": 1.6838687130149763, "learning_rate": 7.570782512013653e-07, "loss": 0.4203, "step": 20036 }, { "epoch": 0.34829390394409776, "grad_norm": 2.032414536266118, "learning_rate": 7.570541072095593e-07, "loss": 0.2856, "step": 20037 }, { "epoch": 0.3483112864816006, "grad_norm": 2.2915639542009405, "learning_rate": 7.570299624030099e-07, "loss": 0.3077, "step": 20038 }, { "epoch": 0.3483286690191034, "grad_norm": 0.917942508549032, "learning_rate": 7.570058167817936e-07, "loss": 0.212, "step": 20039 }, { "epoch": 0.34834605155660625, "grad_norm": 2.1172230835562136, "learning_rate": 7.569816703459868e-07, "loss": 0.3893, "step": 20040 }, { "epoch": 0.3483634340941091, "grad_norm": 3.1142861587396253, "learning_rate": 7.569575230956662e-07, "loss": 0.6385, "step": 20041 }, { "epoch": 0.34838081663161186, "grad_norm": 1.6270614486173156, "learning_rate": 7.569333750309084e-07, "loss": 0.2904, "step": 20042 }, { "epoch": 0.3483981991691147, "grad_norm": 2.3386204545166187, "learning_rate": 7.569092261517898e-07, "loss": 0.3553, "step": 20043 }, { "epoch": 0.3484155817066175, "grad_norm": 1.834047878073653, "learning_rate": 7.56885076458387e-07, "loss": 0.4064, "step": 20044 }, { "epoch": 0.34843296424412035, "grad_norm": 1.802252933540628, "learning_rate": 7.568609259507763e-07, "loss": 0.2654, "step": 20045 }, { "epoch": 0.3484503467816232, "grad_norm": 1.3447149450191551, "learning_rate": 7.568367746290347e-07, "loss": 0.4107, "step": 20046 }, { "epoch": 0.348467729319126, "grad_norm": 0.8813044110256546, "learning_rate": 7.568126224932383e-07, "loss": 0.2661, "step": 20047 }, { "epoch": 0.34848511185662884, "grad_norm": 1.544591148969232, "learning_rate": 7.567884695434641e-07, "loss": 0.2766, "step": 20048 }, { "epoch": 0.34850249439413167, "grad_norm": 1.4027362724233505, "learning_rate": 7.567643157797882e-07, "loss": 0.3662, "step": 20049 }, { "epoch": 0.3485198769316345, "grad_norm": 2.777385676601301, "learning_rate": 7.567401612022874e-07, "loss": 0.3539, "step": 20050 }, { "epoch": 0.34853725946913733, "grad_norm": 1.4480371509490475, "learning_rate": 7.567160058110382e-07, "loss": 0.2752, "step": 20051 }, { "epoch": 0.3485546420066401, "grad_norm": 1.8331375860661296, "learning_rate": 7.566918496061174e-07, "loss": 0.2418, "step": 20052 }, { "epoch": 0.34857202454414293, "grad_norm": 1.740987616896336, "learning_rate": 7.56667692587601e-07, "loss": 0.3497, "step": 20053 }, { "epoch": 0.34858940708164576, "grad_norm": 2.063912872538911, "learning_rate": 7.566435347555662e-07, "loss": 0.3506, "step": 20054 }, { "epoch": 0.3486067896191486, "grad_norm": 1.9102619887311811, "learning_rate": 7.566193761100892e-07, "loss": 0.3775, "step": 20055 }, { "epoch": 0.3486241721566514, "grad_norm": 1.7446747906903135, "learning_rate": 7.565952166512467e-07, "loss": 0.38, "step": 20056 }, { "epoch": 0.34864155469415425, "grad_norm": 2.3259232121563396, "learning_rate": 7.565710563791151e-07, "loss": 0.2118, "step": 20057 }, { "epoch": 0.3486589372316571, "grad_norm": 1.8494870634856562, "learning_rate": 7.565468952937713e-07, "loss": 0.6072, "step": 20058 }, { "epoch": 0.3486763197691599, "grad_norm": 6.371280762344761, "learning_rate": 7.565227333952915e-07, "loss": 0.3753, "step": 20059 }, { "epoch": 0.34869370230666275, "grad_norm": 2.9440066366631576, "learning_rate": 7.564985706837527e-07, "loss": 0.4517, "step": 20060 }, { "epoch": 0.3487110848441656, "grad_norm": 2.0666860615972014, "learning_rate": 7.564744071592311e-07, "loss": 0.3599, "step": 20061 }, { "epoch": 0.34872846738166835, "grad_norm": 2.7547510198663634, "learning_rate": 7.564502428218033e-07, "loss": 0.2527, "step": 20062 }, { "epoch": 0.3487458499191712, "grad_norm": 2.425198586015947, "learning_rate": 7.564260776715463e-07, "loss": 0.2513, "step": 20063 }, { "epoch": 0.348763232456674, "grad_norm": 1.0822054808130144, "learning_rate": 7.564019117085363e-07, "loss": 0.2704, "step": 20064 }, { "epoch": 0.34878061499417684, "grad_norm": 1.749112319998744, "learning_rate": 7.563777449328501e-07, "loss": 0.2174, "step": 20065 }, { "epoch": 0.34879799753167967, "grad_norm": 1.351267770775904, "learning_rate": 7.563535773445641e-07, "loss": 0.2065, "step": 20066 }, { "epoch": 0.3488153800691825, "grad_norm": 3.7444763085284882, "learning_rate": 7.563294089437553e-07, "loss": 0.237, "step": 20067 }, { "epoch": 0.34883276260668533, "grad_norm": 1.6537638308019793, "learning_rate": 7.563052397304997e-07, "loss": 0.1766, "step": 20068 }, { "epoch": 0.34885014514418816, "grad_norm": 1.7628339643599098, "learning_rate": 7.562810697048742e-07, "loss": 0.3495, "step": 20069 }, { "epoch": 0.348867527681691, "grad_norm": 1.4561204882021395, "learning_rate": 7.562568988669558e-07, "loss": 0.3972, "step": 20070 }, { "epoch": 0.3488849102191938, "grad_norm": 1.5353976873880635, "learning_rate": 7.562327272168205e-07, "loss": 0.4085, "step": 20071 }, { "epoch": 0.3489022927566966, "grad_norm": 1.9741115825235422, "learning_rate": 7.562085547545451e-07, "loss": 0.2814, "step": 20072 }, { "epoch": 0.3489196752941994, "grad_norm": 1.0843505777251008, "learning_rate": 7.561843814802063e-07, "loss": 0.3362, "step": 20073 }, { "epoch": 0.34893705783170226, "grad_norm": 1.4971868200074705, "learning_rate": 7.561602073938808e-07, "loss": 0.2485, "step": 20074 }, { "epoch": 0.3489544403692051, "grad_norm": 1.2022037710920115, "learning_rate": 7.56136032495645e-07, "loss": 0.4796, "step": 20075 }, { "epoch": 0.3489718229067079, "grad_norm": 1.3441659165966708, "learning_rate": 7.561118567855758e-07, "loss": 0.406, "step": 20076 }, { "epoch": 0.34898920544421075, "grad_norm": 2.926848543987757, "learning_rate": 7.560876802637495e-07, "loss": 0.3399, "step": 20077 }, { "epoch": 0.3490065879817136, "grad_norm": 1.527209738824346, "learning_rate": 7.560635029302429e-07, "loss": 0.2461, "step": 20078 }, { "epoch": 0.3490239705192164, "grad_norm": 1.1220618335705483, "learning_rate": 7.560393247851327e-07, "loss": 0.5069, "step": 20079 }, { "epoch": 0.34904135305671924, "grad_norm": 1.6742683345847114, "learning_rate": 7.560151458284955e-07, "loss": 0.2618, "step": 20080 }, { "epoch": 0.34905873559422207, "grad_norm": 2.2798351010414657, "learning_rate": 7.559909660604078e-07, "loss": 0.3157, "step": 20081 }, { "epoch": 0.34907611813172484, "grad_norm": 1.8448046900711053, "learning_rate": 7.559667854809462e-07, "loss": 0.2761, "step": 20082 }, { "epoch": 0.3490935006692277, "grad_norm": 2.519630839087223, "learning_rate": 7.559426040901877e-07, "loss": 0.2295, "step": 20083 }, { "epoch": 0.3491108832067305, "grad_norm": 1.4628178402584961, "learning_rate": 7.559184218882086e-07, "loss": 0.3125, "step": 20084 }, { "epoch": 0.34912826574423333, "grad_norm": 0.9436939434613355, "learning_rate": 7.558942388750857e-07, "loss": 0.282, "step": 20085 }, { "epoch": 0.34914564828173617, "grad_norm": 1.3433970822934256, "learning_rate": 7.558700550508956e-07, "loss": 0.3353, "step": 20086 }, { "epoch": 0.349163030819239, "grad_norm": 1.3854676878873387, "learning_rate": 7.558458704157149e-07, "loss": 0.3005, "step": 20087 }, { "epoch": 0.3491804133567418, "grad_norm": 1.5905624884412028, "learning_rate": 7.558216849696204e-07, "loss": 0.3728, "step": 20088 }, { "epoch": 0.34919779589424466, "grad_norm": 1.1772107182855553, "learning_rate": 7.557974987126887e-07, "loss": 0.305, "step": 20089 }, { "epoch": 0.3492151784317475, "grad_norm": 1.3212295357122226, "learning_rate": 7.557733116449965e-07, "loss": 0.3767, "step": 20090 }, { "epoch": 0.3492325609692503, "grad_norm": 1.6969906752082353, "learning_rate": 7.557491237666201e-07, "loss": 0.4421, "step": 20091 }, { "epoch": 0.3492499435067531, "grad_norm": 1.5347251886345834, "learning_rate": 7.557249350776368e-07, "loss": 0.2701, "step": 20092 }, { "epoch": 0.3492673260442559, "grad_norm": 1.8142066153911292, "learning_rate": 7.557007455781227e-07, "loss": 0.1986, "step": 20093 }, { "epoch": 0.34928470858175875, "grad_norm": 2.2657509360127186, "learning_rate": 7.556765552681547e-07, "loss": 0.2814, "step": 20094 }, { "epoch": 0.3493020911192616, "grad_norm": 1.6640150813916974, "learning_rate": 7.556523641478095e-07, "loss": 0.2818, "step": 20095 }, { "epoch": 0.3493194736567644, "grad_norm": 1.7181100754075413, "learning_rate": 7.55628172217164e-07, "loss": 0.3157, "step": 20096 }, { "epoch": 0.34933685619426724, "grad_norm": 2.426645052727398, "learning_rate": 7.556039794762943e-07, "loss": 0.2279, "step": 20097 }, { "epoch": 0.3493542387317701, "grad_norm": 1.8579554457949838, "learning_rate": 7.555797859252775e-07, "loss": 0.3015, "step": 20098 }, { "epoch": 0.3493716212692729, "grad_norm": 1.5258832669489457, "learning_rate": 7.555555915641903e-07, "loss": 0.3703, "step": 20099 }, { "epoch": 0.34938900380677573, "grad_norm": 4.304561521562061, "learning_rate": 7.55531396393109e-07, "loss": 0.3917, "step": 20100 }, { "epoch": 0.34940638634427856, "grad_norm": 1.3843841039743117, "learning_rate": 7.555072004121106e-07, "loss": 0.3157, "step": 20101 }, { "epoch": 0.34942376888178134, "grad_norm": 3.1567143171499605, "learning_rate": 7.554830036212721e-07, "loss": 0.4415, "step": 20102 }, { "epoch": 0.34944115141928417, "grad_norm": 1.2756596975421264, "learning_rate": 7.554588060206695e-07, "loss": 0.2877, "step": 20103 }, { "epoch": 0.349458533956787, "grad_norm": 2.065029320382043, "learning_rate": 7.5543460761038e-07, "loss": 0.2221, "step": 20104 }, { "epoch": 0.34947591649428983, "grad_norm": 1.1765690841940237, "learning_rate": 7.5541040839048e-07, "loss": 0.2403, "step": 20105 }, { "epoch": 0.34949329903179266, "grad_norm": 2.4106594772583065, "learning_rate": 7.553862083610463e-07, "loss": 0.3163, "step": 20106 }, { "epoch": 0.3495106815692955, "grad_norm": 1.3640632884783488, "learning_rate": 7.553620075221556e-07, "loss": 0.3595, "step": 20107 }, { "epoch": 0.3495280641067983, "grad_norm": 1.7762020871955864, "learning_rate": 7.553378058738849e-07, "loss": 0.3087, "step": 20108 }, { "epoch": 0.34954544664430115, "grad_norm": 1.625084014067758, "learning_rate": 7.553136034163104e-07, "loss": 0.5148, "step": 20109 }, { "epoch": 0.349562829181804, "grad_norm": 1.3649128475309462, "learning_rate": 7.552894001495092e-07, "loss": 0.416, "step": 20110 }, { "epoch": 0.3495802117193068, "grad_norm": 1.4291502690341626, "learning_rate": 7.552651960735579e-07, "loss": 0.4753, "step": 20111 }, { "epoch": 0.3495975942568096, "grad_norm": 1.846576954281714, "learning_rate": 7.552409911885331e-07, "loss": 0.236, "step": 20112 }, { "epoch": 0.3496149767943124, "grad_norm": 1.0354675947385585, "learning_rate": 7.552167854945115e-07, "loss": 0.4173, "step": 20113 }, { "epoch": 0.34963235933181525, "grad_norm": 1.6837317789495427, "learning_rate": 7.551925789915701e-07, "loss": 0.4246, "step": 20114 }, { "epoch": 0.3496497418693181, "grad_norm": 2.366429253743045, "learning_rate": 7.551683716797855e-07, "loss": 0.2657, "step": 20115 }, { "epoch": 0.3496671244068209, "grad_norm": 1.4736153853781928, "learning_rate": 7.551441635592342e-07, "loss": 0.2113, "step": 20116 }, { "epoch": 0.34968450694432374, "grad_norm": 2.287672302215322, "learning_rate": 7.551199546299931e-07, "loss": 0.3563, "step": 20117 }, { "epoch": 0.34970188948182657, "grad_norm": 5.524332715543395, "learning_rate": 7.550957448921391e-07, "loss": 0.3724, "step": 20118 }, { "epoch": 0.3497192720193294, "grad_norm": 2.513723899530762, "learning_rate": 7.550715343457487e-07, "loss": 0.2875, "step": 20119 }, { "epoch": 0.3497366545568322, "grad_norm": 1.836349739964267, "learning_rate": 7.550473229908986e-07, "loss": 0.4195, "step": 20120 }, { "epoch": 0.34975403709433506, "grad_norm": 4.050742805399545, "learning_rate": 7.550231108276659e-07, "loss": 0.2955, "step": 20121 }, { "epoch": 0.34977141963183783, "grad_norm": 1.5224399316945993, "learning_rate": 7.549988978561268e-07, "loss": 0.4595, "step": 20122 }, { "epoch": 0.34978880216934066, "grad_norm": 1.1701519697752572, "learning_rate": 7.549746840763585e-07, "loss": 0.3437, "step": 20123 }, { "epoch": 0.3498061847068435, "grad_norm": 1.4279679058556902, "learning_rate": 7.549504694884376e-07, "loss": 0.2081, "step": 20124 }, { "epoch": 0.3498235672443463, "grad_norm": 1.815611162728282, "learning_rate": 7.549262540924408e-07, "loss": 0.2104, "step": 20125 }, { "epoch": 0.34984094978184915, "grad_norm": 1.4508709242014826, "learning_rate": 7.549020378884448e-07, "loss": 0.3526, "step": 20126 }, { "epoch": 0.349858332319352, "grad_norm": 2.4112147584820223, "learning_rate": 7.548778208765265e-07, "loss": 0.3838, "step": 20127 }, { "epoch": 0.3498757148568548, "grad_norm": 2.557658683549927, "learning_rate": 7.548536030567624e-07, "loss": 0.3207, "step": 20128 }, { "epoch": 0.34989309739435764, "grad_norm": 1.3126874254464718, "learning_rate": 7.548293844292297e-07, "loss": 0.3611, "step": 20129 }, { "epoch": 0.3499104799318605, "grad_norm": 2.750745363349842, "learning_rate": 7.548051649940049e-07, "loss": 0.2637, "step": 20130 }, { "epoch": 0.3499278624693633, "grad_norm": 1.5369653755527448, "learning_rate": 7.547809447511647e-07, "loss": 0.2467, "step": 20131 }, { "epoch": 0.3499452450068661, "grad_norm": 2.9595055791288374, "learning_rate": 7.547567237007859e-07, "loss": 0.2793, "step": 20132 }, { "epoch": 0.3499626275443689, "grad_norm": 1.1642527523810915, "learning_rate": 7.547325018429454e-07, "loss": 0.2592, "step": 20133 }, { "epoch": 0.34998001008187174, "grad_norm": 0.7061220922431513, "learning_rate": 7.547082791777197e-07, "loss": 0.2149, "step": 20134 }, { "epoch": 0.34999739261937457, "grad_norm": 2.249847329090189, "learning_rate": 7.54684055705186e-07, "loss": 0.5898, "step": 20135 }, { "epoch": 0.3500147751568774, "grad_norm": 1.4901440195102478, "learning_rate": 7.546598314254206e-07, "loss": 0.4047, "step": 20136 }, { "epoch": 0.35003215769438023, "grad_norm": 2.1398015022130425, "learning_rate": 7.546356063385009e-07, "loss": 0.4094, "step": 20137 }, { "epoch": 0.35004954023188306, "grad_norm": 1.7887961553860423, "learning_rate": 7.546113804445029e-07, "loss": 0.2752, "step": 20138 }, { "epoch": 0.3500669227693859, "grad_norm": 1.7652757510641337, "learning_rate": 7.54587153743504e-07, "loss": 0.2212, "step": 20139 }, { "epoch": 0.3500843053068887, "grad_norm": 2.1369295161085726, "learning_rate": 7.545629262355806e-07, "loss": 0.5278, "step": 20140 }, { "epoch": 0.35010168784439155, "grad_norm": 1.9761432072627731, "learning_rate": 7.545386979208099e-07, "loss": 0.2865, "step": 20141 }, { "epoch": 0.3501190703818943, "grad_norm": 1.9056311269117199, "learning_rate": 7.545144687992681e-07, "loss": 0.4679, "step": 20142 }, { "epoch": 0.35013645291939716, "grad_norm": 1.557365588455497, "learning_rate": 7.544902388710326e-07, "loss": 0.4855, "step": 20143 }, { "epoch": 0.3501538354569, "grad_norm": 7.208681741232518, "learning_rate": 7.5446600813618e-07, "loss": 0.483, "step": 20144 }, { "epoch": 0.3501712179944028, "grad_norm": 1.9787873029542695, "learning_rate": 7.544417765947869e-07, "loss": 0.3298, "step": 20145 }, { "epoch": 0.35018860053190565, "grad_norm": 1.3186140390659502, "learning_rate": 7.544175442469303e-07, "loss": 0.3384, "step": 20146 }, { "epoch": 0.3502059830694085, "grad_norm": 1.7887021397143126, "learning_rate": 7.543933110926869e-07, "loss": 0.3173, "step": 20147 }, { "epoch": 0.3502233656069113, "grad_norm": 1.8702087655933022, "learning_rate": 7.543690771321336e-07, "loss": 0.3513, "step": 20148 }, { "epoch": 0.35024074814441414, "grad_norm": 1.1195052704498594, "learning_rate": 7.543448423653473e-07, "loss": 0.3453, "step": 20149 }, { "epoch": 0.35025813068191697, "grad_norm": 2.4900401807164347, "learning_rate": 7.543206067924046e-07, "loss": 0.3583, "step": 20150 }, { "epoch": 0.3502755132194198, "grad_norm": 2.7349967900932026, "learning_rate": 7.542963704133822e-07, "loss": 0.4081, "step": 20151 }, { "epoch": 0.3502928957569226, "grad_norm": 3.2636001819366838, "learning_rate": 7.542721332283575e-07, "loss": 0.7236, "step": 20152 }, { "epoch": 0.3503102782944254, "grad_norm": 1.4411389874531069, "learning_rate": 7.542478952374066e-07, "loss": 0.5097, "step": 20153 }, { "epoch": 0.35032766083192823, "grad_norm": 1.4815304877711462, "learning_rate": 7.542236564406068e-07, "loss": 0.342, "step": 20154 }, { "epoch": 0.35034504336943106, "grad_norm": 1.954358462506115, "learning_rate": 7.541994168380348e-07, "loss": 0.3197, "step": 20155 }, { "epoch": 0.3503624259069339, "grad_norm": 2.5596739049165813, "learning_rate": 7.541751764297674e-07, "loss": 0.2986, "step": 20156 }, { "epoch": 0.3503798084444367, "grad_norm": 2.856002775594988, "learning_rate": 7.541509352158815e-07, "loss": 0.5047, "step": 20157 }, { "epoch": 0.35039719098193955, "grad_norm": 2.2614976616609743, "learning_rate": 7.541266931964538e-07, "loss": 0.3595, "step": 20158 }, { "epoch": 0.3504145735194424, "grad_norm": 1.7331429726680867, "learning_rate": 7.541024503715613e-07, "loss": 0.4241, "step": 20159 }, { "epoch": 0.3504319560569452, "grad_norm": 1.6378888620882592, "learning_rate": 7.540782067412807e-07, "loss": 0.3798, "step": 20160 }, { "epoch": 0.350449338594448, "grad_norm": 2.8515096510560807, "learning_rate": 7.540539623056889e-07, "loss": 0.5534, "step": 20161 }, { "epoch": 0.3504667211319508, "grad_norm": 1.8408177138402018, "learning_rate": 7.540297170648628e-07, "loss": 0.1881, "step": 20162 }, { "epoch": 0.35048410366945365, "grad_norm": 2.5523045250843843, "learning_rate": 7.540054710188791e-07, "loss": 0.5674, "step": 20163 }, { "epoch": 0.3505014862069565, "grad_norm": 2.0625043796081672, "learning_rate": 7.539812241678148e-07, "loss": 0.3511, "step": 20164 }, { "epoch": 0.3505188687444593, "grad_norm": 1.644760508151232, "learning_rate": 7.539569765117467e-07, "loss": 0.3767, "step": 20165 }, { "epoch": 0.35053625128196214, "grad_norm": 2.835928034726276, "learning_rate": 7.539327280507517e-07, "loss": 0.285, "step": 20166 }, { "epoch": 0.35055363381946497, "grad_norm": 1.3428710994648778, "learning_rate": 7.539084787849064e-07, "loss": 0.2573, "step": 20167 }, { "epoch": 0.3505710163569678, "grad_norm": 2.8186520512930855, "learning_rate": 7.538842287142881e-07, "loss": 0.4659, "step": 20168 }, { "epoch": 0.35058839889447063, "grad_norm": 4.327372870841648, "learning_rate": 7.538599778389732e-07, "loss": 0.4969, "step": 20169 }, { "epoch": 0.35060578143197346, "grad_norm": 2.504610897341375, "learning_rate": 7.538357261590389e-07, "loss": 0.4657, "step": 20170 }, { "epoch": 0.35062316396947624, "grad_norm": 2.1888550858033398, "learning_rate": 7.53811473674562e-07, "loss": 0.4349, "step": 20171 }, { "epoch": 0.35064054650697907, "grad_norm": 1.6443710099300788, "learning_rate": 7.537872203856193e-07, "loss": 0.2159, "step": 20172 }, { "epoch": 0.3506579290444819, "grad_norm": 1.8106502753913867, "learning_rate": 7.537629662922876e-07, "loss": 0.2886, "step": 20173 }, { "epoch": 0.3506753115819847, "grad_norm": 3.254415705125116, "learning_rate": 7.537387113946439e-07, "loss": 0.3237, "step": 20174 }, { "epoch": 0.35069269411948756, "grad_norm": 1.8781357757776551, "learning_rate": 7.53714455692765e-07, "loss": 0.3645, "step": 20175 }, { "epoch": 0.3507100766569904, "grad_norm": 2.1367381020109755, "learning_rate": 7.536901991867278e-07, "loss": 0.454, "step": 20176 }, { "epoch": 0.3507274591944932, "grad_norm": 1.0830192884731467, "learning_rate": 7.536659418766092e-07, "loss": 0.4015, "step": 20177 }, { "epoch": 0.35074484173199605, "grad_norm": 2.204819537174489, "learning_rate": 7.536416837624861e-07, "loss": 0.3254, "step": 20178 }, { "epoch": 0.3507622242694989, "grad_norm": 1.729847463567546, "learning_rate": 7.536174248444356e-07, "loss": 0.3976, "step": 20179 }, { "epoch": 0.3507796068070017, "grad_norm": 1.755300514076765, "learning_rate": 7.535931651225342e-07, "loss": 0.3058, "step": 20180 }, { "epoch": 0.3507969893445045, "grad_norm": 2.949839843900119, "learning_rate": 7.535689045968589e-07, "loss": 0.3019, "step": 20181 }, { "epoch": 0.3508143718820073, "grad_norm": 1.922582419460384, "learning_rate": 7.535446432674868e-07, "loss": 0.4525, "step": 20182 }, { "epoch": 0.35083175441951014, "grad_norm": 2.7104993741468717, "learning_rate": 7.535203811344945e-07, "loss": 0.3822, "step": 20183 }, { "epoch": 0.350849136957013, "grad_norm": 1.9156621473531819, "learning_rate": 7.534961181979591e-07, "loss": 0.5576, "step": 20184 }, { "epoch": 0.3508665194945158, "grad_norm": 1.3918792398998279, "learning_rate": 7.534718544579575e-07, "loss": 0.2376, "step": 20185 }, { "epoch": 0.35088390203201864, "grad_norm": 2.100042313242143, "learning_rate": 7.534475899145665e-07, "loss": 0.4259, "step": 20186 }, { "epoch": 0.35090128456952147, "grad_norm": 1.9114546908025845, "learning_rate": 7.534233245678631e-07, "loss": 0.3254, "step": 20187 }, { "epoch": 0.3509186671070243, "grad_norm": 1.2798789005763418, "learning_rate": 7.533990584179242e-07, "loss": 0.3108, "step": 20188 }, { "epoch": 0.3509360496445271, "grad_norm": 1.0450496171195702, "learning_rate": 7.533747914648266e-07, "loss": 0.2566, "step": 20189 }, { "epoch": 0.35095343218202996, "grad_norm": 1.218643795025975, "learning_rate": 7.533505237086475e-07, "loss": 0.2909, "step": 20190 }, { "epoch": 0.35097081471953273, "grad_norm": 1.9457980433246553, "learning_rate": 7.533262551494634e-07, "loss": 0.3684, "step": 20191 }, { "epoch": 0.35098819725703556, "grad_norm": 1.24105227551163, "learning_rate": 7.533019857873515e-07, "loss": 0.3106, "step": 20192 }, { "epoch": 0.3510055797945384, "grad_norm": 1.6829062934350332, "learning_rate": 7.532777156223886e-07, "loss": 0.3314, "step": 20193 }, { "epoch": 0.3510229623320412, "grad_norm": 1.6431039601855542, "learning_rate": 7.532534446546518e-07, "loss": 0.3769, "step": 20194 }, { "epoch": 0.35104034486954405, "grad_norm": 1.984554354855379, "learning_rate": 7.532291728842179e-07, "loss": 0.4167, "step": 20195 }, { "epoch": 0.3510577274070469, "grad_norm": 1.5545395887828208, "learning_rate": 7.532049003111638e-07, "loss": 0.2677, "step": 20196 }, { "epoch": 0.3510751099445497, "grad_norm": 1.9165368116630812, "learning_rate": 7.531806269355665e-07, "loss": 0.2768, "step": 20197 }, { "epoch": 0.35109249248205254, "grad_norm": 1.3912210744251619, "learning_rate": 7.531563527575028e-07, "loss": 0.3602, "step": 20198 }, { "epoch": 0.3511098750195554, "grad_norm": 2.935718645350658, "learning_rate": 7.531320777770501e-07, "loss": 0.239, "step": 20199 }, { "epoch": 0.3511272575570582, "grad_norm": 1.6008197889470608, "learning_rate": 7.531078019942847e-07, "loss": 0.2165, "step": 20200 }, { "epoch": 0.351144640094561, "grad_norm": 4.572184092743291, "learning_rate": 7.530835254092839e-07, "loss": 0.2725, "step": 20201 }, { "epoch": 0.3511620226320638, "grad_norm": 0.9465397799166574, "learning_rate": 7.530592480221245e-07, "loss": 0.295, "step": 20202 }, { "epoch": 0.35117940516956664, "grad_norm": 1.473825032282182, "learning_rate": 7.530349698328836e-07, "loss": 0.2806, "step": 20203 }, { "epoch": 0.35119678770706947, "grad_norm": 1.668024370180064, "learning_rate": 7.530106908416381e-07, "loss": 0.2838, "step": 20204 }, { "epoch": 0.3512141702445723, "grad_norm": 1.9098851545668343, "learning_rate": 7.52986411048465e-07, "loss": 0.2444, "step": 20205 }, { "epoch": 0.35123155278207513, "grad_norm": 3.2983470739332548, "learning_rate": 7.529621304534411e-07, "loss": 0.3251, "step": 20206 }, { "epoch": 0.35124893531957796, "grad_norm": 1.3503323942772105, "learning_rate": 7.529378490566434e-07, "loss": 0.3414, "step": 20207 }, { "epoch": 0.3512663178570808, "grad_norm": 1.9880472505447893, "learning_rate": 7.52913566858149e-07, "loss": 0.3382, "step": 20208 }, { "epoch": 0.3512837003945836, "grad_norm": 1.5388064667339085, "learning_rate": 7.528892838580347e-07, "loss": 0.2279, "step": 20209 }, { "epoch": 0.35130108293208645, "grad_norm": 2.6819901154401724, "learning_rate": 7.528650000563777e-07, "loss": 0.2222, "step": 20210 }, { "epoch": 0.3513184654695892, "grad_norm": 1.5244007461783575, "learning_rate": 7.528407154532545e-07, "loss": 0.4441, "step": 20211 }, { "epoch": 0.35133584800709206, "grad_norm": 1.5551160475871046, "learning_rate": 7.528164300487427e-07, "loss": 0.2108, "step": 20212 }, { "epoch": 0.3513532305445949, "grad_norm": 1.7185127924485082, "learning_rate": 7.527921438429188e-07, "loss": 0.2789, "step": 20213 }, { "epoch": 0.3513706130820977, "grad_norm": 2.5308156698094755, "learning_rate": 7.527678568358599e-07, "loss": 0.3147, "step": 20214 }, { "epoch": 0.35138799561960055, "grad_norm": 1.7170424216309097, "learning_rate": 7.527435690276429e-07, "loss": 0.324, "step": 20215 }, { "epoch": 0.3514053781571034, "grad_norm": 2.573369182733816, "learning_rate": 7.527192804183452e-07, "loss": 0.2385, "step": 20216 }, { "epoch": 0.3514227606946062, "grad_norm": 1.95656597878792, "learning_rate": 7.526949910080433e-07, "loss": 0.2416, "step": 20217 }, { "epoch": 0.35144014323210904, "grad_norm": 2.8338952751683215, "learning_rate": 7.526707007968143e-07, "loss": 0.5497, "step": 20218 }, { "epoch": 0.35145752576961187, "grad_norm": 3.9942226566768215, "learning_rate": 7.526464097847354e-07, "loss": 0.3197, "step": 20219 }, { "epoch": 0.3514749083071147, "grad_norm": 1.8057200319680857, "learning_rate": 7.526221179718832e-07, "loss": 0.2903, "step": 20220 }, { "epoch": 0.35149229084461747, "grad_norm": 1.0967588828335193, "learning_rate": 7.525978253583351e-07, "loss": 0.5748, "step": 20221 }, { "epoch": 0.3515096733821203, "grad_norm": 1.6827815100580144, "learning_rate": 7.52573531944168e-07, "loss": 0.2434, "step": 20222 }, { "epoch": 0.35152705591962313, "grad_norm": 2.588559381638618, "learning_rate": 7.525492377294586e-07, "loss": 0.3148, "step": 20223 }, { "epoch": 0.35154443845712596, "grad_norm": 1.4320498934143062, "learning_rate": 7.525249427142843e-07, "loss": 0.3573, "step": 20224 }, { "epoch": 0.3515618209946288, "grad_norm": 2.1940227397883243, "learning_rate": 7.52500646898722e-07, "loss": 0.3728, "step": 20225 }, { "epoch": 0.3515792035321316, "grad_norm": 1.2514596525526607, "learning_rate": 7.524763502828485e-07, "loss": 0.3542, "step": 20226 }, { "epoch": 0.35159658606963445, "grad_norm": 3.304283264957804, "learning_rate": 7.524520528667409e-07, "loss": 0.5445, "step": 20227 }, { "epoch": 0.3516139686071373, "grad_norm": 1.6989399342927713, "learning_rate": 7.524277546504764e-07, "loss": 0.3327, "step": 20228 }, { "epoch": 0.3516313511446401, "grad_norm": 1.5756754314732557, "learning_rate": 7.524034556341319e-07, "loss": 0.3191, "step": 20229 }, { "epoch": 0.35164873368214294, "grad_norm": 1.552060462799576, "learning_rate": 7.523791558177842e-07, "loss": 0.3091, "step": 20230 }, { "epoch": 0.3516661162196457, "grad_norm": 1.520131288772482, "learning_rate": 7.523548552015106e-07, "loss": 0.2779, "step": 20231 }, { "epoch": 0.35168349875714855, "grad_norm": 1.2804060791554173, "learning_rate": 7.52330553785388e-07, "loss": 0.1946, "step": 20232 }, { "epoch": 0.3517008812946514, "grad_norm": 1.693942089424945, "learning_rate": 7.523062515694936e-07, "loss": 0.3141, "step": 20233 }, { "epoch": 0.3517182638321542, "grad_norm": 1.2174267529035097, "learning_rate": 7.522819485539041e-07, "loss": 0.4721, "step": 20234 }, { "epoch": 0.35173564636965704, "grad_norm": 2.547856699880351, "learning_rate": 7.522576447386968e-07, "loss": 0.4455, "step": 20235 }, { "epoch": 0.35175302890715987, "grad_norm": 1.1503296952094157, "learning_rate": 7.522333401239486e-07, "loss": 0.2813, "step": 20236 }, { "epoch": 0.3517704114446627, "grad_norm": 2.227873161910559, "learning_rate": 7.522090347097367e-07, "loss": 0.2486, "step": 20237 }, { "epoch": 0.35178779398216553, "grad_norm": 2.0195996220617674, "learning_rate": 7.521847284961377e-07, "loss": 0.2223, "step": 20238 }, { "epoch": 0.35180517651966836, "grad_norm": 1.638133103004885, "learning_rate": 7.521604214832293e-07, "loss": 0.3695, "step": 20239 }, { "epoch": 0.3518225590571712, "grad_norm": 2.1760607153560105, "learning_rate": 7.521361136710881e-07, "loss": 0.4741, "step": 20240 }, { "epoch": 0.35183994159467397, "grad_norm": 1.9567544620780026, "learning_rate": 7.521118050597913e-07, "loss": 0.3312, "step": 20241 }, { "epoch": 0.3518573241321768, "grad_norm": 2.3214694178978745, "learning_rate": 7.520874956494157e-07, "loss": 0.2805, "step": 20242 }, { "epoch": 0.3518747066696796, "grad_norm": 2.7417491218518655, "learning_rate": 7.520631854400387e-07, "loss": 0.3825, "step": 20243 }, { "epoch": 0.35189208920718246, "grad_norm": 1.9176204642987253, "learning_rate": 7.520388744317372e-07, "loss": 0.2691, "step": 20244 }, { "epoch": 0.3519094717446853, "grad_norm": 1.7782076312624449, "learning_rate": 7.520145626245882e-07, "loss": 0.4142, "step": 20245 }, { "epoch": 0.3519268542821881, "grad_norm": 2.2749183448259727, "learning_rate": 7.519902500186687e-07, "loss": 0.5241, "step": 20246 }, { "epoch": 0.35194423681969095, "grad_norm": 2.733603074563969, "learning_rate": 7.519659366140561e-07, "loss": 0.4261, "step": 20247 }, { "epoch": 0.3519616193571938, "grad_norm": 1.538699816278058, "learning_rate": 7.519416224108269e-07, "loss": 0.3358, "step": 20248 }, { "epoch": 0.3519790018946966, "grad_norm": 1.2664442218088576, "learning_rate": 7.519173074090588e-07, "loss": 0.5377, "step": 20249 }, { "epoch": 0.35199638443219944, "grad_norm": 2.931243293728761, "learning_rate": 7.518929916088284e-07, "loss": 0.2874, "step": 20250 }, { "epoch": 0.3520137669697022, "grad_norm": 0.8770861685925443, "learning_rate": 7.518686750102129e-07, "loss": 0.1894, "step": 20251 }, { "epoch": 0.35203114950720504, "grad_norm": 1.6172897359011025, "learning_rate": 7.518443576132894e-07, "loss": 0.2216, "step": 20252 }, { "epoch": 0.3520485320447079, "grad_norm": 1.5710343558443836, "learning_rate": 7.518200394181351e-07, "loss": 0.2741, "step": 20253 }, { "epoch": 0.3520659145822107, "grad_norm": 1.3110626668997973, "learning_rate": 7.517957204248268e-07, "loss": 0.182, "step": 20254 }, { "epoch": 0.35208329711971353, "grad_norm": 3.2976926260151846, "learning_rate": 7.517714006334419e-07, "loss": 0.3597, "step": 20255 }, { "epoch": 0.35210067965721636, "grad_norm": 1.5430383616719918, "learning_rate": 7.517470800440571e-07, "loss": 0.2717, "step": 20256 }, { "epoch": 0.3521180621947192, "grad_norm": 1.2375601952859148, "learning_rate": 7.517227586567499e-07, "loss": 0.3605, "step": 20257 }, { "epoch": 0.352135444732222, "grad_norm": 1.2341146743303995, "learning_rate": 7.51698436471597e-07, "loss": 0.4422, "step": 20258 }, { "epoch": 0.35215282726972486, "grad_norm": 1.259797155688236, "learning_rate": 7.516741134886758e-07, "loss": 0.4615, "step": 20259 }, { "epoch": 0.3521702098072277, "grad_norm": 1.4125393187167588, "learning_rate": 7.516497897080633e-07, "loss": 0.2405, "step": 20260 }, { "epoch": 0.35218759234473046, "grad_norm": 2.1780584867795914, "learning_rate": 7.516254651298366e-07, "loss": 0.4036, "step": 20261 }, { "epoch": 0.3522049748822333, "grad_norm": 1.4169997068863014, "learning_rate": 7.516011397540725e-07, "loss": 0.2679, "step": 20262 }, { "epoch": 0.3522223574197361, "grad_norm": 3.215913503545858, "learning_rate": 7.515768135808487e-07, "loss": 0.5714, "step": 20263 }, { "epoch": 0.35223973995723895, "grad_norm": 2.5713487441546405, "learning_rate": 7.515524866102418e-07, "loss": 0.3629, "step": 20264 }, { "epoch": 0.3522571224947418, "grad_norm": 2.1264763137098255, "learning_rate": 7.51528158842329e-07, "loss": 0.3986, "step": 20265 }, { "epoch": 0.3522745050322446, "grad_norm": 1.0160090014697984, "learning_rate": 7.515038302771877e-07, "loss": 0.3064, "step": 20266 }, { "epoch": 0.35229188756974744, "grad_norm": 2.8647083737384533, "learning_rate": 7.514795009148945e-07, "loss": 0.2613, "step": 20267 }, { "epoch": 0.35230927010725027, "grad_norm": 1.6395788865373355, "learning_rate": 7.51455170755527e-07, "loss": 0.2304, "step": 20268 }, { "epoch": 0.3523266526447531, "grad_norm": 1.558445138553439, "learning_rate": 7.51430839799162e-07, "loss": 0.2394, "step": 20269 }, { "epoch": 0.35234403518225593, "grad_norm": 1.7361902560713314, "learning_rate": 7.514065080458769e-07, "loss": 0.3542, "step": 20270 }, { "epoch": 0.3523614177197587, "grad_norm": 1.2539921617261802, "learning_rate": 7.513821754957484e-07, "loss": 0.412, "step": 20271 }, { "epoch": 0.35237880025726154, "grad_norm": 1.3819359417740684, "learning_rate": 7.513578421488543e-07, "loss": 0.28, "step": 20272 }, { "epoch": 0.35239618279476437, "grad_norm": 1.045216849135855, "learning_rate": 7.513335080052709e-07, "loss": 0.3534, "step": 20273 }, { "epoch": 0.3524135653322672, "grad_norm": 1.688525884924069, "learning_rate": 7.513091730650758e-07, "loss": 0.4617, "step": 20274 }, { "epoch": 0.35243094786977003, "grad_norm": 2.8909927362690255, "learning_rate": 7.512848373283461e-07, "loss": 0.4259, "step": 20275 }, { "epoch": 0.35244833040727286, "grad_norm": 1.3180437436105037, "learning_rate": 7.512605007951591e-07, "loss": 0.1678, "step": 20276 }, { "epoch": 0.3524657129447757, "grad_norm": 1.4330920756672867, "learning_rate": 7.512361634655915e-07, "loss": 0.2883, "step": 20277 }, { "epoch": 0.3524830954822785, "grad_norm": 2.1345338322398173, "learning_rate": 7.512118253397207e-07, "loss": 0.2064, "step": 20278 }, { "epoch": 0.35250047801978135, "grad_norm": 2.1610248616773853, "learning_rate": 7.511874864176239e-07, "loss": 0.2739, "step": 20279 }, { "epoch": 0.3525178605572842, "grad_norm": 2.213031259442596, "learning_rate": 7.511631466993781e-07, "loss": 0.3152, "step": 20280 }, { "epoch": 0.35253524309478695, "grad_norm": 4.808455326396735, "learning_rate": 7.511388061850604e-07, "loss": 0.3228, "step": 20281 }, { "epoch": 0.3525526256322898, "grad_norm": 0.9843373878775921, "learning_rate": 7.511144648747482e-07, "loss": 0.3209, "step": 20282 }, { "epoch": 0.3525700081697926, "grad_norm": 2.466779210080547, "learning_rate": 7.510901227685184e-07, "loss": 0.2514, "step": 20283 }, { "epoch": 0.35258739070729544, "grad_norm": 1.4280914094258796, "learning_rate": 7.510657798664484e-07, "loss": 0.2433, "step": 20284 }, { "epoch": 0.3526047732447983, "grad_norm": 2.7115817528288195, "learning_rate": 7.510414361686152e-07, "loss": 0.4309, "step": 20285 }, { "epoch": 0.3526221557823011, "grad_norm": 1.9385620552680793, "learning_rate": 7.510170916750958e-07, "loss": 0.2588, "step": 20286 }, { "epoch": 0.35263953831980394, "grad_norm": 3.394127223115445, "learning_rate": 7.509927463859675e-07, "loss": 0.31, "step": 20287 }, { "epoch": 0.35265692085730677, "grad_norm": 1.8150654045280006, "learning_rate": 7.509684003013078e-07, "loss": 0.2873, "step": 20288 }, { "epoch": 0.3526743033948096, "grad_norm": 1.8548940267930614, "learning_rate": 7.509440534211933e-07, "loss": 0.3851, "step": 20289 }, { "epoch": 0.3526916859323124, "grad_norm": 1.1261979279519874, "learning_rate": 7.509197057457014e-07, "loss": 0.1618, "step": 20290 }, { "epoch": 0.3527090684698152, "grad_norm": 1.4584845506274338, "learning_rate": 7.508953572749095e-07, "loss": 0.2597, "step": 20291 }, { "epoch": 0.35272645100731803, "grad_norm": 3.0841630637413875, "learning_rate": 7.508710080088942e-07, "loss": 0.4122, "step": 20292 }, { "epoch": 0.35274383354482086, "grad_norm": 1.1804558659321593, "learning_rate": 7.508466579477334e-07, "loss": 0.3898, "step": 20293 }, { "epoch": 0.3527612160823237, "grad_norm": 1.04867037955397, "learning_rate": 7.508223070915038e-07, "loss": 0.2657, "step": 20294 }, { "epoch": 0.3527785986198265, "grad_norm": 1.3233456791204592, "learning_rate": 7.507979554402828e-07, "loss": 0.6173, "step": 20295 }, { "epoch": 0.35279598115732935, "grad_norm": 1.7604694034816228, "learning_rate": 7.507736029941473e-07, "loss": 0.2792, "step": 20296 }, { "epoch": 0.3528133636948322, "grad_norm": 2.337666914970272, "learning_rate": 7.507492497531748e-07, "loss": 0.4205, "step": 20297 }, { "epoch": 0.352830746232335, "grad_norm": 1.71386937644282, "learning_rate": 7.507248957174424e-07, "loss": 0.5758, "step": 20298 }, { "epoch": 0.35284812876983784, "grad_norm": 1.829379123575058, "learning_rate": 7.507005408870272e-07, "loss": 0.3398, "step": 20299 }, { "epoch": 0.3528655113073406, "grad_norm": 1.9499791079974174, "learning_rate": 7.506761852620062e-07, "loss": 0.2253, "step": 20300 }, { "epoch": 0.35288289384484345, "grad_norm": 1.7574546903379207, "learning_rate": 7.506518288424573e-07, "loss": 0.339, "step": 20301 }, { "epoch": 0.3529002763823463, "grad_norm": 2.0390186040571128, "learning_rate": 7.506274716284569e-07, "loss": 0.2984, "step": 20302 }, { "epoch": 0.3529176589198491, "grad_norm": 1.2087274616921646, "learning_rate": 7.506031136200828e-07, "loss": 0.4883, "step": 20303 }, { "epoch": 0.35293504145735194, "grad_norm": 2.7419966153064683, "learning_rate": 7.505787548174119e-07, "loss": 0.3234, "step": 20304 }, { "epoch": 0.35295242399485477, "grad_norm": 1.3770725358843083, "learning_rate": 7.505543952205213e-07, "loss": 0.2931, "step": 20305 }, { "epoch": 0.3529698065323576, "grad_norm": 2.7383343052894227, "learning_rate": 7.505300348294883e-07, "loss": 0.3793, "step": 20306 }, { "epoch": 0.35298718906986043, "grad_norm": 1.9698202978679737, "learning_rate": 7.505056736443905e-07, "loss": 0.5069, "step": 20307 }, { "epoch": 0.35300457160736326, "grad_norm": 1.7647159326177995, "learning_rate": 7.504813116653045e-07, "loss": 0.2156, "step": 20308 }, { "epoch": 0.3530219541448661, "grad_norm": 1.2183309483187748, "learning_rate": 7.50456948892308e-07, "loss": 0.2683, "step": 20309 }, { "epoch": 0.35303933668236887, "grad_norm": 1.2391492039375047, "learning_rate": 7.504325853254778e-07, "loss": 0.1608, "step": 20310 }, { "epoch": 0.3530567192198717, "grad_norm": 1.6138400964185304, "learning_rate": 7.504082209648916e-07, "loss": 0.5488, "step": 20311 }, { "epoch": 0.3530741017573745, "grad_norm": 1.3354468328454614, "learning_rate": 7.503838558106261e-07, "loss": 0.406, "step": 20312 }, { "epoch": 0.35309148429487736, "grad_norm": 2.574190209071736, "learning_rate": 7.50359489862759e-07, "loss": 0.4517, "step": 20313 }, { "epoch": 0.3531088668323802, "grad_norm": 1.7409778747621147, "learning_rate": 7.503351231213673e-07, "loss": 0.4877, "step": 20314 }, { "epoch": 0.353126249369883, "grad_norm": 2.5414081408492724, "learning_rate": 7.503107555865283e-07, "loss": 0.5382, "step": 20315 }, { "epoch": 0.35314363190738585, "grad_norm": 1.1693172839665062, "learning_rate": 7.502863872583189e-07, "loss": 0.2923, "step": 20316 }, { "epoch": 0.3531610144448887, "grad_norm": 1.7595711809787804, "learning_rate": 7.502620181368169e-07, "loss": 0.2943, "step": 20317 }, { "epoch": 0.3531783969823915, "grad_norm": 1.3959995032488908, "learning_rate": 7.502376482220992e-07, "loss": 0.2647, "step": 20318 }, { "epoch": 0.35319577951989434, "grad_norm": 1.8992710967452644, "learning_rate": 7.50213277514243e-07, "loss": 0.2919, "step": 20319 }, { "epoch": 0.3532131620573971, "grad_norm": 2.19330281705123, "learning_rate": 7.501889060133258e-07, "loss": 0.3932, "step": 20320 }, { "epoch": 0.35323054459489994, "grad_norm": 1.868988122474579, "learning_rate": 7.501645337194246e-07, "loss": 0.1769, "step": 20321 }, { "epoch": 0.3532479271324028, "grad_norm": 5.36256244558664, "learning_rate": 7.501401606326167e-07, "loss": 0.402, "step": 20322 }, { "epoch": 0.3532653096699056, "grad_norm": 1.5959761734347713, "learning_rate": 7.501157867529795e-07, "loss": 0.2792, "step": 20323 }, { "epoch": 0.35328269220740843, "grad_norm": 2.4075919344641994, "learning_rate": 7.500914120805901e-07, "loss": 0.474, "step": 20324 }, { "epoch": 0.35330007474491126, "grad_norm": 2.5394596660254862, "learning_rate": 7.500670366155258e-07, "loss": 0.2341, "step": 20325 }, { "epoch": 0.3533174572824141, "grad_norm": 2.487890490604783, "learning_rate": 7.50042660357864e-07, "loss": 0.3145, "step": 20326 }, { "epoch": 0.3533348398199169, "grad_norm": 4.813443697423719, "learning_rate": 7.500182833076817e-07, "loss": 0.3361, "step": 20327 }, { "epoch": 0.35335222235741975, "grad_norm": 2.019126477354871, "learning_rate": 7.499939054650564e-07, "loss": 0.4783, "step": 20328 }, { "epoch": 0.3533696048949226, "grad_norm": 1.2707525108568063, "learning_rate": 7.499695268300651e-07, "loss": 0.2148, "step": 20329 }, { "epoch": 0.35338698743242536, "grad_norm": 1.7646307944878954, "learning_rate": 7.499451474027853e-07, "loss": 0.2843, "step": 20330 }, { "epoch": 0.3534043699699282, "grad_norm": 1.7609245217168492, "learning_rate": 7.499207671832941e-07, "loss": 0.2678, "step": 20331 }, { "epoch": 0.353421752507431, "grad_norm": 1.328179316231259, "learning_rate": 7.498963861716692e-07, "loss": 0.3722, "step": 20332 }, { "epoch": 0.35343913504493385, "grad_norm": 1.8034176017011054, "learning_rate": 7.498720043679871e-07, "loss": 0.2926, "step": 20333 }, { "epoch": 0.3534565175824367, "grad_norm": 1.5670025360803748, "learning_rate": 7.498476217723258e-07, "loss": 0.3216, "step": 20334 }, { "epoch": 0.3534739001199395, "grad_norm": 1.7933377827235881, "learning_rate": 7.498232383847622e-07, "loss": 0.2821, "step": 20335 }, { "epoch": 0.35349128265744234, "grad_norm": 1.024863388873851, "learning_rate": 7.497988542053738e-07, "loss": 0.3746, "step": 20336 }, { "epoch": 0.35350866519494517, "grad_norm": 1.8185106527806014, "learning_rate": 7.497744692342377e-07, "loss": 0.369, "step": 20337 }, { "epoch": 0.353526047732448, "grad_norm": 1.428292204699256, "learning_rate": 7.497500834714313e-07, "loss": 0.2888, "step": 20338 }, { "epoch": 0.35354343026995083, "grad_norm": 1.9772472812972561, "learning_rate": 7.497256969170319e-07, "loss": 0.3632, "step": 20339 }, { "epoch": 0.3535608128074536, "grad_norm": 1.3832202335275274, "learning_rate": 7.497013095711167e-07, "loss": 0.3032, "step": 20340 }, { "epoch": 0.35357819534495644, "grad_norm": 1.964907942116128, "learning_rate": 7.496769214337631e-07, "loss": 0.3638, "step": 20341 }, { "epoch": 0.35359557788245927, "grad_norm": 1.4616024363932472, "learning_rate": 7.496525325050483e-07, "loss": 0.2761, "step": 20342 }, { "epoch": 0.3536129604199621, "grad_norm": 1.478224758429492, "learning_rate": 7.496281427850496e-07, "loss": 0.2179, "step": 20343 }, { "epoch": 0.3536303429574649, "grad_norm": 1.5757076531514114, "learning_rate": 7.496037522738445e-07, "loss": 0.3728, "step": 20344 }, { "epoch": 0.35364772549496776, "grad_norm": 1.523066940874813, "learning_rate": 7.495793609715101e-07, "loss": 0.2926, "step": 20345 }, { "epoch": 0.3536651080324706, "grad_norm": 1.669405296772395, "learning_rate": 7.495549688781237e-07, "loss": 0.3105, "step": 20346 }, { "epoch": 0.3536824905699734, "grad_norm": 1.9116724440644295, "learning_rate": 7.495305759937627e-07, "loss": 0.3971, "step": 20347 }, { "epoch": 0.35369987310747625, "grad_norm": 1.6409314454348038, "learning_rate": 7.495061823185045e-07, "loss": 0.2998, "step": 20348 }, { "epoch": 0.3537172556449791, "grad_norm": 1.6337280074475151, "learning_rate": 7.494817878524263e-07, "loss": 0.1888, "step": 20349 }, { "epoch": 0.35373463818248185, "grad_norm": 2.212284771002353, "learning_rate": 7.494573925956054e-07, "loss": 0.2313, "step": 20350 }, { "epoch": 0.3537520207199847, "grad_norm": 1.746706836652196, "learning_rate": 7.494329965481192e-07, "loss": 0.3463, "step": 20351 }, { "epoch": 0.3537694032574875, "grad_norm": 1.4101528704672228, "learning_rate": 7.494085997100448e-07, "loss": 0.3972, "step": 20352 }, { "epoch": 0.35378678579499034, "grad_norm": 4.558087885479449, "learning_rate": 7.4938420208146e-07, "loss": 0.392, "step": 20353 }, { "epoch": 0.3538041683324932, "grad_norm": 2.0907700116790027, "learning_rate": 7.493598036624416e-07, "loss": 0.3804, "step": 20354 }, { "epoch": 0.353821550869996, "grad_norm": 1.9927675727116092, "learning_rate": 7.493354044530673e-07, "loss": 0.3375, "step": 20355 }, { "epoch": 0.35383893340749883, "grad_norm": 2.323612509722575, "learning_rate": 7.493110044534141e-07, "loss": 0.6196, "step": 20356 }, { "epoch": 0.35385631594500166, "grad_norm": 1.442255001783197, "learning_rate": 7.492866036635598e-07, "loss": 0.3098, "step": 20357 }, { "epoch": 0.3538736984825045, "grad_norm": 1.866138599954307, "learning_rate": 7.492622020835814e-07, "loss": 0.4686, "step": 20358 }, { "epoch": 0.3538910810200073, "grad_norm": 0.8822886799534551, "learning_rate": 7.492377997135563e-07, "loss": 0.2961, "step": 20359 }, { "epoch": 0.3539084635575101, "grad_norm": 1.7468112807517708, "learning_rate": 7.492133965535617e-07, "loss": 0.3183, "step": 20360 }, { "epoch": 0.35392584609501293, "grad_norm": 1.360470185233566, "learning_rate": 7.491889926036755e-07, "loss": 0.3122, "step": 20361 }, { "epoch": 0.35394322863251576, "grad_norm": 1.481674916152155, "learning_rate": 7.491645878639743e-07, "loss": 0.1994, "step": 20362 }, { "epoch": 0.3539606111700186, "grad_norm": 2.0043631219653264, "learning_rate": 7.49140182334536e-07, "loss": 0.3515, "step": 20363 }, { "epoch": 0.3539779937075214, "grad_norm": 1.3104243881988045, "learning_rate": 7.491157760154376e-07, "loss": 0.446, "step": 20364 }, { "epoch": 0.35399537624502425, "grad_norm": 1.8725336865248385, "learning_rate": 7.490913689067568e-07, "loss": 0.3135, "step": 20365 }, { "epoch": 0.3540127587825271, "grad_norm": 1.369148052152719, "learning_rate": 7.490669610085707e-07, "loss": 0.2615, "step": 20366 }, { "epoch": 0.3540301413200299, "grad_norm": 1.4751754934692676, "learning_rate": 7.490425523209569e-07, "loss": 0.3083, "step": 20367 }, { "epoch": 0.35404752385753274, "grad_norm": 2.479061186552394, "learning_rate": 7.490181428439923e-07, "loss": 0.5505, "step": 20368 }, { "epoch": 0.3540649063950356, "grad_norm": 1.562668486005132, "learning_rate": 7.489937325777549e-07, "loss": 0.37, "step": 20369 }, { "epoch": 0.35408228893253835, "grad_norm": 1.2799970198784358, "learning_rate": 7.489693215223216e-07, "loss": 0.2308, "step": 20370 }, { "epoch": 0.3540996714700412, "grad_norm": 2.2248510934720205, "learning_rate": 7.489449096777699e-07, "loss": 0.2021, "step": 20371 }, { "epoch": 0.354117054007544, "grad_norm": 1.4003220522244446, "learning_rate": 7.489204970441772e-07, "loss": 0.2837, "step": 20372 }, { "epoch": 0.35413443654504684, "grad_norm": 1.0861019094746294, "learning_rate": 7.488960836216208e-07, "loss": 0.3083, "step": 20373 }, { "epoch": 0.35415181908254967, "grad_norm": 2.9792373278061395, "learning_rate": 7.488716694101782e-07, "loss": 0.2821, "step": 20374 }, { "epoch": 0.3541692016200525, "grad_norm": 1.0941214681639715, "learning_rate": 7.488472544099269e-07, "loss": 0.2173, "step": 20375 }, { "epoch": 0.35418658415755533, "grad_norm": 3.0169002785304406, "learning_rate": 7.488228386209438e-07, "loss": 0.3659, "step": 20376 }, { "epoch": 0.35420396669505816, "grad_norm": 1.219948196107491, "learning_rate": 7.487984220433068e-07, "loss": 0.2586, "step": 20377 }, { "epoch": 0.354221349232561, "grad_norm": 1.7491718991405532, "learning_rate": 7.487740046770932e-07, "loss": 0.2969, "step": 20378 }, { "epoch": 0.3542387317700638, "grad_norm": 1.389626474796159, "learning_rate": 7.487495865223801e-07, "loss": 0.1748, "step": 20379 }, { "epoch": 0.3542561143075666, "grad_norm": 1.125202745408312, "learning_rate": 7.487251675792452e-07, "loss": 0.2928, "step": 20380 }, { "epoch": 0.3542734968450694, "grad_norm": 2.5601397175758787, "learning_rate": 7.487007478477656e-07, "loss": 0.3297, "step": 20381 }, { "epoch": 0.35429087938257225, "grad_norm": 1.9612590404360053, "learning_rate": 7.48676327328019e-07, "loss": 0.4209, "step": 20382 }, { "epoch": 0.3543082619200751, "grad_norm": 1.047088438144979, "learning_rate": 7.486519060200827e-07, "loss": 0.388, "step": 20383 }, { "epoch": 0.3543256444575779, "grad_norm": 1.7927766487699452, "learning_rate": 7.486274839240339e-07, "loss": 0.4147, "step": 20384 }, { "epoch": 0.35434302699508075, "grad_norm": 2.072694199399717, "learning_rate": 7.486030610399503e-07, "loss": 0.3166, "step": 20385 }, { "epoch": 0.3543604095325836, "grad_norm": 2.0599603695344753, "learning_rate": 7.485786373679093e-07, "loss": 0.4494, "step": 20386 }, { "epoch": 0.3543777920700864, "grad_norm": 3.3066657446617644, "learning_rate": 7.48554212907988e-07, "loss": 0.3794, "step": 20387 }, { "epoch": 0.35439517460758924, "grad_norm": 3.076391966976447, "learning_rate": 7.485297876602642e-07, "loss": 0.3454, "step": 20388 }, { "epoch": 0.35441255714509207, "grad_norm": 2.468214092387636, "learning_rate": 7.485053616248151e-07, "loss": 0.3178, "step": 20389 }, { "epoch": 0.35442993968259484, "grad_norm": 1.4613472860636538, "learning_rate": 7.484809348017182e-07, "loss": 0.3264, "step": 20390 }, { "epoch": 0.35444732222009767, "grad_norm": 1.4356896022868053, "learning_rate": 7.484565071910507e-07, "loss": 0.4608, "step": 20391 }, { "epoch": 0.3544647047576005, "grad_norm": 2.4385819173957843, "learning_rate": 7.484320787928904e-07, "loss": 0.2405, "step": 20392 }, { "epoch": 0.35448208729510333, "grad_norm": 1.698420633893265, "learning_rate": 7.484076496073144e-07, "loss": 0.3181, "step": 20393 }, { "epoch": 0.35449946983260616, "grad_norm": 1.5580562544155128, "learning_rate": 7.483832196344004e-07, "loss": 0.3424, "step": 20394 }, { "epoch": 0.354516852370109, "grad_norm": 3.3696468533192157, "learning_rate": 7.483587888742255e-07, "loss": 0.4364, "step": 20395 }, { "epoch": 0.3545342349076118, "grad_norm": 1.2968191913255078, "learning_rate": 7.483343573268675e-07, "loss": 0.3383, "step": 20396 }, { "epoch": 0.35455161744511465, "grad_norm": 1.8358161232641372, "learning_rate": 7.483099249924034e-07, "loss": 0.201, "step": 20397 }, { "epoch": 0.3545689999826175, "grad_norm": 2.4217595930724043, "learning_rate": 7.482854918709112e-07, "loss": 0.2679, "step": 20398 }, { "epoch": 0.3545863825201203, "grad_norm": 2.592540997802985, "learning_rate": 7.48261057962468e-07, "loss": 0.3199, "step": 20399 }, { "epoch": 0.3546037650576231, "grad_norm": 1.4035746369944173, "learning_rate": 7.482366232671512e-07, "loss": 0.2657, "step": 20400 }, { "epoch": 0.3546211475951259, "grad_norm": 2.960299258182841, "learning_rate": 7.482121877850382e-07, "loss": 0.5891, "step": 20401 }, { "epoch": 0.35463853013262875, "grad_norm": 2.0471057022327095, "learning_rate": 7.48187751516207e-07, "loss": 0.256, "step": 20402 }, { "epoch": 0.3546559126701316, "grad_norm": 2.328945667817917, "learning_rate": 7.481633144607343e-07, "loss": 0.2858, "step": 20403 }, { "epoch": 0.3546732952076344, "grad_norm": 1.435348213559874, "learning_rate": 7.481388766186979e-07, "loss": 0.2562, "step": 20404 }, { "epoch": 0.35469067774513724, "grad_norm": 1.5293870791372368, "learning_rate": 7.481144379901754e-07, "loss": 0.1994, "step": 20405 }, { "epoch": 0.35470806028264007, "grad_norm": 1.2642746989444706, "learning_rate": 7.48089998575244e-07, "loss": 0.2252, "step": 20406 }, { "epoch": 0.3547254428201429, "grad_norm": 2.1141830417726233, "learning_rate": 7.480655583739812e-07, "loss": 0.2997, "step": 20407 }, { "epoch": 0.35474282535764573, "grad_norm": 0.8604978512668265, "learning_rate": 7.480411173864645e-07, "loss": 0.2852, "step": 20408 }, { "epoch": 0.35476020789514856, "grad_norm": 1.9873872644310693, "learning_rate": 7.480166756127716e-07, "loss": 0.3272, "step": 20409 }, { "epoch": 0.35477759043265134, "grad_norm": 1.763187707141596, "learning_rate": 7.479922330529798e-07, "loss": 0.352, "step": 20410 }, { "epoch": 0.35479497297015417, "grad_norm": 2.3649223380389808, "learning_rate": 7.479677897071663e-07, "loss": 0.3912, "step": 20411 }, { "epoch": 0.354812355507657, "grad_norm": 1.5570635018815224, "learning_rate": 7.479433455754089e-07, "loss": 0.2536, "step": 20412 }, { "epoch": 0.3548297380451598, "grad_norm": 3.1117786309527826, "learning_rate": 7.479189006577851e-07, "loss": 0.4647, "step": 20413 }, { "epoch": 0.35484712058266266, "grad_norm": 1.1706172804319654, "learning_rate": 7.478944549543721e-07, "loss": 0.1785, "step": 20414 }, { "epoch": 0.3548645031201655, "grad_norm": 1.5483764433040021, "learning_rate": 7.478700084652477e-07, "loss": 0.2996, "step": 20415 }, { "epoch": 0.3548818856576683, "grad_norm": 2.398206518113281, "learning_rate": 7.478455611904891e-07, "loss": 0.3878, "step": 20416 }, { "epoch": 0.35489926819517115, "grad_norm": 2.9439960952306135, "learning_rate": 7.478211131301741e-07, "loss": 0.538, "step": 20417 }, { "epoch": 0.354916650732674, "grad_norm": 1.6669081593241433, "learning_rate": 7.477966642843799e-07, "loss": 0.2938, "step": 20418 }, { "epoch": 0.3549340332701768, "grad_norm": 1.6780125996869297, "learning_rate": 7.477722146531842e-07, "loss": 0.5076, "step": 20419 }, { "epoch": 0.3549514158076796, "grad_norm": 1.2967117200209735, "learning_rate": 7.477477642366642e-07, "loss": 0.217, "step": 20420 }, { "epoch": 0.3549687983451824, "grad_norm": 4.42800840453807, "learning_rate": 7.477233130348979e-07, "loss": 0.3983, "step": 20421 }, { "epoch": 0.35498618088268524, "grad_norm": 1.3548010868751996, "learning_rate": 7.476988610479622e-07, "loss": 0.4298, "step": 20422 }, { "epoch": 0.3550035634201881, "grad_norm": 1.8035117194390418, "learning_rate": 7.476744082759349e-07, "loss": 0.2306, "step": 20423 }, { "epoch": 0.3550209459576909, "grad_norm": 2.336815237389557, "learning_rate": 7.476499547188937e-07, "loss": 0.7021, "step": 20424 }, { "epoch": 0.35503832849519373, "grad_norm": 1.787098298999632, "learning_rate": 7.476255003769158e-07, "loss": 0.3769, "step": 20425 }, { "epoch": 0.35505571103269656, "grad_norm": 1.4964959929748658, "learning_rate": 7.476010452500787e-07, "loss": 0.3785, "step": 20426 }, { "epoch": 0.3550730935701994, "grad_norm": 2.126440105759838, "learning_rate": 7.475765893384602e-07, "loss": 0.3546, "step": 20427 }, { "epoch": 0.3550904761077022, "grad_norm": 1.6030065930319701, "learning_rate": 7.475521326421375e-07, "loss": 0.2187, "step": 20428 }, { "epoch": 0.35510785864520505, "grad_norm": 0.913333298813292, "learning_rate": 7.475276751611884e-07, "loss": 0.4507, "step": 20429 }, { "epoch": 0.35512524118270783, "grad_norm": 1.7616761363657074, "learning_rate": 7.475032168956903e-07, "loss": 0.3999, "step": 20430 }, { "epoch": 0.35514262372021066, "grad_norm": 1.3899661469781575, "learning_rate": 7.474787578457205e-07, "loss": 0.2486, "step": 20431 }, { "epoch": 0.3551600062577135, "grad_norm": 1.7468955987092352, "learning_rate": 7.474542980113569e-07, "loss": 0.4733, "step": 20432 }, { "epoch": 0.3551773887952163, "grad_norm": 2.0755119791741956, "learning_rate": 7.474298373926767e-07, "loss": 0.1803, "step": 20433 }, { "epoch": 0.35519477133271915, "grad_norm": 1.482237902964956, "learning_rate": 7.474053759897577e-07, "loss": 0.2847, "step": 20434 }, { "epoch": 0.355212153870222, "grad_norm": 0.9629023629898584, "learning_rate": 7.473809138026772e-07, "loss": 0.1608, "step": 20435 }, { "epoch": 0.3552295364077248, "grad_norm": 2.16519004844724, "learning_rate": 7.473564508315129e-07, "loss": 0.4098, "step": 20436 }, { "epoch": 0.35524691894522764, "grad_norm": 2.170712457690171, "learning_rate": 7.473319870763422e-07, "loss": 0.3008, "step": 20437 }, { "epoch": 0.35526430148273047, "grad_norm": 2.0980874968560914, "learning_rate": 7.473075225372428e-07, "loss": 0.3486, "step": 20438 }, { "epoch": 0.35528168402023325, "grad_norm": 1.1011741947124118, "learning_rate": 7.472830572142922e-07, "loss": 0.2589, "step": 20439 }, { "epoch": 0.3552990665577361, "grad_norm": 1.7742014135316573, "learning_rate": 7.47258591107568e-07, "loss": 0.3337, "step": 20440 }, { "epoch": 0.3553164490952389, "grad_norm": 2.5733712871045213, "learning_rate": 7.472341242171473e-07, "loss": 0.3956, "step": 20441 }, { "epoch": 0.35533383163274174, "grad_norm": 2.079632450671131, "learning_rate": 7.472096565431082e-07, "loss": 0.322, "step": 20442 }, { "epoch": 0.35535121417024457, "grad_norm": 1.6382802908382454, "learning_rate": 7.47185188085528e-07, "loss": 0.3904, "step": 20443 }, { "epoch": 0.3553685967077474, "grad_norm": 1.6339284039337962, "learning_rate": 7.471607188444844e-07, "loss": 0.559, "step": 20444 }, { "epoch": 0.3553859792452502, "grad_norm": 2.2666574569867857, "learning_rate": 7.471362488200547e-07, "loss": 0.2994, "step": 20445 }, { "epoch": 0.35540336178275306, "grad_norm": 1.5117831240548418, "learning_rate": 7.471117780123169e-07, "loss": 0.2112, "step": 20446 }, { "epoch": 0.3554207443202559, "grad_norm": 1.528944309246711, "learning_rate": 7.47087306421348e-07, "loss": 0.6028, "step": 20447 }, { "epoch": 0.3554381268577587, "grad_norm": 2.289210134600363, "learning_rate": 7.470628340472259e-07, "loss": 0.3055, "step": 20448 }, { "epoch": 0.3554555093952615, "grad_norm": 1.4851088786034479, "learning_rate": 7.470383608900282e-07, "loss": 0.2565, "step": 20449 }, { "epoch": 0.3554728919327643, "grad_norm": 1.159268170401875, "learning_rate": 7.470138869498323e-07, "loss": 0.4361, "step": 20450 }, { "epoch": 0.35549027447026715, "grad_norm": 1.5911601240697124, "learning_rate": 7.469894122267157e-07, "loss": 0.3676, "step": 20451 }, { "epoch": 0.35550765700777, "grad_norm": 1.997672938810471, "learning_rate": 7.469649367207564e-07, "loss": 0.4477, "step": 20452 }, { "epoch": 0.3555250395452728, "grad_norm": 2.517861689152946, "learning_rate": 7.469404604320316e-07, "loss": 0.2842, "step": 20453 }, { "epoch": 0.35554242208277564, "grad_norm": 1.5146203532134206, "learning_rate": 7.46915983360619e-07, "loss": 0.416, "step": 20454 }, { "epoch": 0.3555598046202785, "grad_norm": 2.267864261904118, "learning_rate": 7.46891505506596e-07, "loss": 0.2548, "step": 20455 }, { "epoch": 0.3555771871577813, "grad_norm": 1.2327402479363039, "learning_rate": 7.468670268700405e-07, "loss": 0.508, "step": 20456 }, { "epoch": 0.35559456969528414, "grad_norm": 1.378694729051629, "learning_rate": 7.468425474510299e-07, "loss": 0.6504, "step": 20457 }, { "epoch": 0.35561195223278697, "grad_norm": 0.9817468010785352, "learning_rate": 7.468180672496418e-07, "loss": 0.3001, "step": 20458 }, { "epoch": 0.35562933477028974, "grad_norm": 1.4701972000127728, "learning_rate": 7.467935862659538e-07, "loss": 0.5337, "step": 20459 }, { "epoch": 0.35564671730779257, "grad_norm": 1.9941035716352702, "learning_rate": 7.467691045000435e-07, "loss": 0.302, "step": 20460 }, { "epoch": 0.3556640998452954, "grad_norm": 1.4156477442083022, "learning_rate": 7.467446219519884e-07, "loss": 0.3723, "step": 20461 }, { "epoch": 0.35568148238279823, "grad_norm": 2.246999432527864, "learning_rate": 7.467201386218664e-07, "loss": 0.1855, "step": 20462 }, { "epoch": 0.35569886492030106, "grad_norm": 2.0532360694873617, "learning_rate": 7.466956545097547e-07, "loss": 0.3947, "step": 20463 }, { "epoch": 0.3557162474578039, "grad_norm": 1.6710592775274766, "learning_rate": 7.466711696157313e-07, "loss": 0.2545, "step": 20464 }, { "epoch": 0.3557336299953067, "grad_norm": 1.062881966856144, "learning_rate": 7.466466839398734e-07, "loss": 0.3564, "step": 20465 }, { "epoch": 0.35575101253280955, "grad_norm": 1.4795421313299726, "learning_rate": 7.466221974822589e-07, "loss": 0.3712, "step": 20466 }, { "epoch": 0.3557683950703124, "grad_norm": 2.2662362156935347, "learning_rate": 7.465977102429653e-07, "loss": 0.2777, "step": 20467 }, { "epoch": 0.3557857776078152, "grad_norm": 2.257270221824258, "learning_rate": 7.465732222220702e-07, "loss": 0.3932, "step": 20468 }, { "epoch": 0.355803160145318, "grad_norm": 1.4175119766662678, "learning_rate": 7.465487334196514e-07, "loss": 0.2762, "step": 20469 }, { "epoch": 0.3558205426828208, "grad_norm": 2.220968705841793, "learning_rate": 7.465242438357862e-07, "loss": 0.4409, "step": 20470 }, { "epoch": 0.35583792522032365, "grad_norm": 1.4670020160828923, "learning_rate": 7.464997534705526e-07, "loss": 0.1463, "step": 20471 }, { "epoch": 0.3558553077578265, "grad_norm": 0.9235869906471228, "learning_rate": 7.464752623240278e-07, "loss": 0.2195, "step": 20472 }, { "epoch": 0.3558726902953293, "grad_norm": 1.2897298876721979, "learning_rate": 7.464507703962897e-07, "loss": 0.3013, "step": 20473 }, { "epoch": 0.35589007283283214, "grad_norm": 2.426124828252974, "learning_rate": 7.464262776874158e-07, "loss": 0.3462, "step": 20474 }, { "epoch": 0.35590745537033497, "grad_norm": 4.01134205245458, "learning_rate": 7.46401784197484e-07, "loss": 0.6666, "step": 20475 }, { "epoch": 0.3559248379078378, "grad_norm": 2.914147919652162, "learning_rate": 7.463772899265715e-07, "loss": 0.5143, "step": 20476 }, { "epoch": 0.35594222044534063, "grad_norm": 1.568826634313368, "learning_rate": 7.463527948747564e-07, "loss": 0.2225, "step": 20477 }, { "epoch": 0.35595960298284346, "grad_norm": 2.3867996931381055, "learning_rate": 7.463282990421161e-07, "loss": 0.2674, "step": 20478 }, { "epoch": 0.35597698552034623, "grad_norm": 1.4930607930593123, "learning_rate": 7.46303802428728e-07, "loss": 0.3328, "step": 20479 }, { "epoch": 0.35599436805784906, "grad_norm": 2.4646840730652215, "learning_rate": 7.462793050346701e-07, "loss": 0.3012, "step": 20480 }, { "epoch": 0.3560117505953519, "grad_norm": 3.205741876228356, "learning_rate": 7.462548068600201e-07, "loss": 0.2252, "step": 20481 }, { "epoch": 0.3560291331328547, "grad_norm": 1.7166632319651085, "learning_rate": 7.462303079048552e-07, "loss": 0.2635, "step": 20482 }, { "epoch": 0.35604651567035756, "grad_norm": 1.4359904285057123, "learning_rate": 7.462058081692535e-07, "loss": 0.1982, "step": 20483 }, { "epoch": 0.3560638982078604, "grad_norm": 3.4239514396026163, "learning_rate": 7.461813076532926e-07, "loss": 0.2562, "step": 20484 }, { "epoch": 0.3560812807453632, "grad_norm": 2.20319913251444, "learning_rate": 7.461568063570498e-07, "loss": 0.2888, "step": 20485 }, { "epoch": 0.35609866328286605, "grad_norm": 2.0637719563621912, "learning_rate": 7.461323042806031e-07, "loss": 0.2492, "step": 20486 }, { "epoch": 0.3561160458203689, "grad_norm": 1.4660748350571586, "learning_rate": 7.461078014240301e-07, "loss": 0.3397, "step": 20487 }, { "epoch": 0.3561334283578717, "grad_norm": 1.9623519060342436, "learning_rate": 7.460832977874083e-07, "loss": 0.3278, "step": 20488 }, { "epoch": 0.3561508108953745, "grad_norm": 1.3527899982327956, "learning_rate": 7.460587933708155e-07, "loss": 0.2486, "step": 20489 }, { "epoch": 0.3561681934328773, "grad_norm": 1.3202915946157456, "learning_rate": 7.460342881743296e-07, "loss": 0.234, "step": 20490 }, { "epoch": 0.35618557597038014, "grad_norm": 1.4740239320236306, "learning_rate": 7.460097821980278e-07, "loss": 0.198, "step": 20491 }, { "epoch": 0.35620295850788297, "grad_norm": 1.3572156966761149, "learning_rate": 7.45985275441988e-07, "loss": 0.2337, "step": 20492 }, { "epoch": 0.3562203410453858, "grad_norm": 1.9271711456707776, "learning_rate": 7.459607679062879e-07, "loss": 0.2824, "step": 20493 }, { "epoch": 0.35623772358288863, "grad_norm": 1.775763227130448, "learning_rate": 7.459362595910052e-07, "loss": 0.2239, "step": 20494 }, { "epoch": 0.35625510612039146, "grad_norm": 2.717401842734854, "learning_rate": 7.459117504962176e-07, "loss": 0.4318, "step": 20495 }, { "epoch": 0.3562724886578943, "grad_norm": 1.964915831027908, "learning_rate": 7.458872406220025e-07, "loss": 0.2708, "step": 20496 }, { "epoch": 0.3562898711953971, "grad_norm": 1.7470311999435841, "learning_rate": 7.458627299684379e-07, "loss": 0.2981, "step": 20497 }, { "epoch": 0.35630725373289995, "grad_norm": 2.3137704900111866, "learning_rate": 7.458382185356014e-07, "loss": 0.4471, "step": 20498 }, { "epoch": 0.35632463627040273, "grad_norm": 1.8904509579902393, "learning_rate": 7.458137063235708e-07, "loss": 0.3079, "step": 20499 }, { "epoch": 0.35634201880790556, "grad_norm": 1.6067800885425767, "learning_rate": 7.457891933324235e-07, "loss": 0.2368, "step": 20500 }, { "epoch": 0.3563594013454084, "grad_norm": 2.9769071879511912, "learning_rate": 7.457646795622374e-07, "loss": 0.4412, "step": 20501 }, { "epoch": 0.3563767838829112, "grad_norm": 1.7765637767545623, "learning_rate": 7.457401650130901e-07, "loss": 0.2515, "step": 20502 }, { "epoch": 0.35639416642041405, "grad_norm": 1.79747531079376, "learning_rate": 7.457156496850595e-07, "loss": 0.4788, "step": 20503 }, { "epoch": 0.3564115489579169, "grad_norm": 1.450924785697175, "learning_rate": 7.456911335782232e-07, "loss": 0.3977, "step": 20504 }, { "epoch": 0.3564289314954197, "grad_norm": 2.1661485324618677, "learning_rate": 7.456666166926586e-07, "loss": 0.3307, "step": 20505 }, { "epoch": 0.35644631403292254, "grad_norm": 2.0427055830623546, "learning_rate": 7.45642099028444e-07, "loss": 0.2423, "step": 20506 }, { "epoch": 0.35646369657042537, "grad_norm": 2.1159467347917222, "learning_rate": 7.456175805856566e-07, "loss": 0.1727, "step": 20507 }, { "epoch": 0.3564810791079282, "grad_norm": 1.6245030634782025, "learning_rate": 7.455930613643742e-07, "loss": 0.3226, "step": 20508 }, { "epoch": 0.356498461645431, "grad_norm": 2.3343819884895955, "learning_rate": 7.455685413646748e-07, "loss": 0.3812, "step": 20509 }, { "epoch": 0.3565158441829338, "grad_norm": 1.7396936339619733, "learning_rate": 7.455440205866359e-07, "loss": 0.4239, "step": 20510 }, { "epoch": 0.35653322672043664, "grad_norm": 1.316289476606966, "learning_rate": 7.455194990303352e-07, "loss": 0.2305, "step": 20511 }, { "epoch": 0.35655060925793947, "grad_norm": 1.049177475438901, "learning_rate": 7.454949766958505e-07, "loss": 0.4062, "step": 20512 }, { "epoch": 0.3565679917954423, "grad_norm": 1.711071967945476, "learning_rate": 7.454704535832594e-07, "loss": 0.2665, "step": 20513 }, { "epoch": 0.3565853743329451, "grad_norm": 1.6601976985174318, "learning_rate": 7.454459296926399e-07, "loss": 0.3319, "step": 20514 }, { "epoch": 0.35660275687044796, "grad_norm": 1.861011793377532, "learning_rate": 7.454214050240693e-07, "loss": 0.2052, "step": 20515 }, { "epoch": 0.3566201394079508, "grad_norm": 3.374633757340628, "learning_rate": 7.453968795776259e-07, "loss": 0.335, "step": 20516 }, { "epoch": 0.3566375219454536, "grad_norm": 1.6788098315649966, "learning_rate": 7.453723533533869e-07, "loss": 0.1984, "step": 20517 }, { "epoch": 0.35665490448295645, "grad_norm": 2.8477813247287, "learning_rate": 7.453478263514302e-07, "loss": 0.4593, "step": 20518 }, { "epoch": 0.3566722870204592, "grad_norm": 1.5076264109561912, "learning_rate": 7.453232985718339e-07, "loss": 0.2425, "step": 20519 }, { "epoch": 0.35668966955796205, "grad_norm": 1.797607099898415, "learning_rate": 7.452987700146751e-07, "loss": 0.2541, "step": 20520 }, { "epoch": 0.3567070520954649, "grad_norm": 1.2443075765822649, "learning_rate": 7.45274240680032e-07, "loss": 0.488, "step": 20521 }, { "epoch": 0.3567244346329677, "grad_norm": 1.8640944485495654, "learning_rate": 7.452497105679823e-07, "loss": 0.4228, "step": 20522 }, { "epoch": 0.35674181717047054, "grad_norm": 1.3392315775894559, "learning_rate": 7.452251796786036e-07, "loss": 0.2447, "step": 20523 }, { "epoch": 0.3567591997079734, "grad_norm": 1.7613539809024128, "learning_rate": 7.452006480119737e-07, "loss": 0.3647, "step": 20524 }, { "epoch": 0.3567765822454762, "grad_norm": 1.6131372337538392, "learning_rate": 7.451761155681705e-07, "loss": 0.354, "step": 20525 }, { "epoch": 0.35679396478297903, "grad_norm": 1.443741033698003, "learning_rate": 7.451515823472715e-07, "loss": 0.3463, "step": 20526 }, { "epoch": 0.35681134732048186, "grad_norm": 1.8736582098715044, "learning_rate": 7.451270483493547e-07, "loss": 0.3768, "step": 20527 }, { "epoch": 0.3568287298579847, "grad_norm": 1.3536236354126912, "learning_rate": 7.451025135744977e-07, "loss": 0.2273, "step": 20528 }, { "epoch": 0.35684611239548747, "grad_norm": 3.2814384510512986, "learning_rate": 7.450779780227784e-07, "loss": 0.4791, "step": 20529 }, { "epoch": 0.3568634949329903, "grad_norm": 1.7866174401169013, "learning_rate": 7.450534416942743e-07, "loss": 0.2695, "step": 20530 }, { "epoch": 0.35688087747049313, "grad_norm": 1.5520148216000014, "learning_rate": 7.450289045890637e-07, "loss": 0.3083, "step": 20531 }, { "epoch": 0.35689826000799596, "grad_norm": 1.1385607700011542, "learning_rate": 7.450043667072236e-07, "loss": 0.27, "step": 20532 }, { "epoch": 0.3569156425454988, "grad_norm": 1.9971106515401746, "learning_rate": 7.449798280488326e-07, "loss": 0.3248, "step": 20533 }, { "epoch": 0.3569330250830016, "grad_norm": 3.0885137264957794, "learning_rate": 7.449552886139677e-07, "loss": 0.4436, "step": 20534 }, { "epoch": 0.35695040762050445, "grad_norm": 1.4440786453246284, "learning_rate": 7.449307484027074e-07, "loss": 0.3854, "step": 20535 }, { "epoch": 0.3569677901580073, "grad_norm": 1.1562222588224822, "learning_rate": 7.44906207415129e-07, "loss": 0.2751, "step": 20536 }, { "epoch": 0.3569851726955101, "grad_norm": 2.642310184282134, "learning_rate": 7.448816656513104e-07, "loss": 0.3163, "step": 20537 }, { "epoch": 0.35700255523301294, "grad_norm": 1.7639759796763481, "learning_rate": 7.448571231113294e-07, "loss": 0.6199, "step": 20538 }, { "epoch": 0.3570199377705157, "grad_norm": 2.7434768411157786, "learning_rate": 7.44832579795264e-07, "loss": 0.3916, "step": 20539 }, { "epoch": 0.35703732030801855, "grad_norm": 3.627804884388318, "learning_rate": 7.448080357031914e-07, "loss": 0.4066, "step": 20540 }, { "epoch": 0.3570547028455214, "grad_norm": 2.007292358883743, "learning_rate": 7.447834908351902e-07, "loss": 0.4042, "step": 20541 }, { "epoch": 0.3570720853830242, "grad_norm": 1.5113958285478506, "learning_rate": 7.447589451913374e-07, "loss": 0.1774, "step": 20542 }, { "epoch": 0.35708946792052704, "grad_norm": 1.7589013066086674, "learning_rate": 7.447343987717114e-07, "loss": 0.3043, "step": 20543 }, { "epoch": 0.35710685045802987, "grad_norm": 1.814150770939906, "learning_rate": 7.447098515763899e-07, "loss": 0.3366, "step": 20544 }, { "epoch": 0.3571242329955327, "grad_norm": 1.3490515476667184, "learning_rate": 7.446853036054504e-07, "loss": 0.2366, "step": 20545 }, { "epoch": 0.35714161553303553, "grad_norm": 1.4724606098852946, "learning_rate": 7.446607548589707e-07, "loss": 0.6093, "step": 20546 }, { "epoch": 0.35715899807053836, "grad_norm": 1.9912071154103657, "learning_rate": 7.446362053370292e-07, "loss": 0.2568, "step": 20547 }, { "epoch": 0.3571763806080412, "grad_norm": 1.7613812956781445, "learning_rate": 7.44611655039703e-07, "loss": 0.1687, "step": 20548 }, { "epoch": 0.35719376314554396, "grad_norm": 1.6996640056134205, "learning_rate": 7.445871039670704e-07, "loss": 0.373, "step": 20549 }, { "epoch": 0.3572111456830468, "grad_norm": 1.298702925056266, "learning_rate": 7.445625521192089e-07, "loss": 0.2438, "step": 20550 }, { "epoch": 0.3572285282205496, "grad_norm": 1.737577476312871, "learning_rate": 7.445379994961966e-07, "loss": 0.4053, "step": 20551 }, { "epoch": 0.35724591075805245, "grad_norm": 1.7988898160375173, "learning_rate": 7.44513446098111e-07, "loss": 0.2346, "step": 20552 }, { "epoch": 0.3572632932955553, "grad_norm": 1.8552308455438202, "learning_rate": 7.444888919250301e-07, "loss": 0.3767, "step": 20553 }, { "epoch": 0.3572806758330581, "grad_norm": 4.052180048516096, "learning_rate": 7.444643369770319e-07, "loss": 0.4019, "step": 20554 }, { "epoch": 0.35729805837056094, "grad_norm": 2.9244248977725573, "learning_rate": 7.444397812541939e-07, "loss": 0.2304, "step": 20555 }, { "epoch": 0.3573154409080638, "grad_norm": 2.253336301810008, "learning_rate": 7.444152247565942e-07, "loss": 0.572, "step": 20556 }, { "epoch": 0.3573328234455666, "grad_norm": 1.6625279673951534, "learning_rate": 7.443906674843103e-07, "loss": 0.2506, "step": 20557 }, { "epoch": 0.35735020598306944, "grad_norm": 0.9290269604576966, "learning_rate": 7.443661094374205e-07, "loss": 0.3616, "step": 20558 }, { "epoch": 0.3573675885205722, "grad_norm": 2.2673384631755797, "learning_rate": 7.443415506160022e-07, "loss": 0.5124, "step": 20559 }, { "epoch": 0.35738497105807504, "grad_norm": 1.243387715673594, "learning_rate": 7.443169910201337e-07, "loss": 0.3309, "step": 20560 }, { "epoch": 0.35740235359557787, "grad_norm": 1.9742012871986465, "learning_rate": 7.442924306498923e-07, "loss": 0.3589, "step": 20561 }, { "epoch": 0.3574197361330807, "grad_norm": 1.37957608022654, "learning_rate": 7.442678695053561e-07, "loss": 0.3302, "step": 20562 }, { "epoch": 0.35743711867058353, "grad_norm": 1.3061195718840426, "learning_rate": 7.442433075866031e-07, "loss": 0.3629, "step": 20563 }, { "epoch": 0.35745450120808636, "grad_norm": 1.4759584062644915, "learning_rate": 7.44218744893711e-07, "loss": 0.3295, "step": 20564 }, { "epoch": 0.3574718837455892, "grad_norm": 2.437726870548271, "learning_rate": 7.441941814267575e-07, "loss": 0.1821, "step": 20565 }, { "epoch": 0.357489266283092, "grad_norm": 3.1854429424537574, "learning_rate": 7.441696171858209e-07, "loss": 0.3782, "step": 20566 }, { "epoch": 0.35750664882059485, "grad_norm": 2.7464695973487423, "learning_rate": 7.441450521709785e-07, "loss": 0.4927, "step": 20567 }, { "epoch": 0.3575240313580977, "grad_norm": 1.509877457114961, "learning_rate": 7.441204863823086e-07, "loss": 0.2215, "step": 20568 }, { "epoch": 0.35754141389560046, "grad_norm": 1.9634791275988837, "learning_rate": 7.440959198198888e-07, "loss": 0.1871, "step": 20569 }, { "epoch": 0.3575587964331033, "grad_norm": 2.200826679257239, "learning_rate": 7.440713524837971e-07, "loss": 0.3798, "step": 20570 }, { "epoch": 0.3575761789706061, "grad_norm": 2.6909471792794424, "learning_rate": 7.440467843741113e-07, "loss": 0.3641, "step": 20571 }, { "epoch": 0.35759356150810895, "grad_norm": 2.777256952958701, "learning_rate": 7.440222154909095e-07, "loss": 0.3213, "step": 20572 }, { "epoch": 0.3576109440456118, "grad_norm": 1.5496781972833613, "learning_rate": 7.439976458342691e-07, "loss": 0.45, "step": 20573 }, { "epoch": 0.3576283265831146, "grad_norm": 1.4242946532122804, "learning_rate": 7.439730754042684e-07, "loss": 0.2219, "step": 20574 }, { "epoch": 0.35764570912061744, "grad_norm": 1.5987272076510657, "learning_rate": 7.43948504200985e-07, "loss": 0.2844, "step": 20575 }, { "epoch": 0.35766309165812027, "grad_norm": 10.089190833416627, "learning_rate": 7.439239322244972e-07, "loss": 0.445, "step": 20576 }, { "epoch": 0.3576804741956231, "grad_norm": 1.5588048758812467, "learning_rate": 7.438993594748822e-07, "loss": 0.2885, "step": 20577 }, { "epoch": 0.3576978567331259, "grad_norm": 2.579353723873827, "learning_rate": 7.438747859522186e-07, "loss": 0.3979, "step": 20578 }, { "epoch": 0.3577152392706287, "grad_norm": 1.8375718687885305, "learning_rate": 7.438502116565836e-07, "loss": 0.3615, "step": 20579 }, { "epoch": 0.35773262180813153, "grad_norm": 2.2309082078189024, "learning_rate": 7.438256365880557e-07, "loss": 0.328, "step": 20580 }, { "epoch": 0.35775000434563436, "grad_norm": 1.2292621747942978, "learning_rate": 7.438010607467125e-07, "loss": 0.4158, "step": 20581 }, { "epoch": 0.3577673868831372, "grad_norm": 1.9152274219758043, "learning_rate": 7.437764841326319e-07, "loss": 0.358, "step": 20582 }, { "epoch": 0.35778476942064, "grad_norm": 1.646866979754213, "learning_rate": 7.437519067458918e-07, "loss": 0.3456, "step": 20583 }, { "epoch": 0.35780215195814286, "grad_norm": 1.2298881596806834, "learning_rate": 7.437273285865702e-07, "loss": 0.4771, "step": 20584 }, { "epoch": 0.3578195344956457, "grad_norm": 1.5855955174499243, "learning_rate": 7.43702749654745e-07, "loss": 0.3044, "step": 20585 }, { "epoch": 0.3578369170331485, "grad_norm": 1.3240713296014277, "learning_rate": 7.436781699504939e-07, "loss": 0.2638, "step": 20586 }, { "epoch": 0.35785429957065135, "grad_norm": 1.1357375679433885, "learning_rate": 7.436535894738949e-07, "loss": 0.307, "step": 20587 }, { "epoch": 0.3578716821081541, "grad_norm": 1.9289151629607164, "learning_rate": 7.436290082250259e-07, "loss": 0.3571, "step": 20588 }, { "epoch": 0.35788906464565695, "grad_norm": 2.7908480370687294, "learning_rate": 7.43604426203965e-07, "loss": 0.4138, "step": 20589 }, { "epoch": 0.3579064471831598, "grad_norm": 1.5071812244703864, "learning_rate": 7.435798434107899e-07, "loss": 0.4501, "step": 20590 }, { "epoch": 0.3579238297206626, "grad_norm": 1.485806912188264, "learning_rate": 7.435552598455788e-07, "loss": 0.2141, "step": 20591 }, { "epoch": 0.35794121225816544, "grad_norm": 2.227556960523557, "learning_rate": 7.435306755084091e-07, "loss": 0.3193, "step": 20592 }, { "epoch": 0.3579585947956683, "grad_norm": 1.0029763694228686, "learning_rate": 7.435060903993592e-07, "loss": 0.3316, "step": 20593 }, { "epoch": 0.3579759773331711, "grad_norm": 1.5199038038972195, "learning_rate": 7.434815045185067e-07, "loss": 0.2068, "step": 20594 }, { "epoch": 0.35799335987067393, "grad_norm": 1.5250018102911684, "learning_rate": 7.434569178659299e-07, "loss": 0.1909, "step": 20595 }, { "epoch": 0.35801074240817676, "grad_norm": 1.3678172164955082, "learning_rate": 7.434323304417063e-07, "loss": 0.3146, "step": 20596 }, { "epoch": 0.3580281249456796, "grad_norm": 1.663896476274226, "learning_rate": 7.434077422459142e-07, "loss": 0.2873, "step": 20597 }, { "epoch": 0.35804550748318237, "grad_norm": 4.014023063677692, "learning_rate": 7.433831532786312e-07, "loss": 0.3573, "step": 20598 }, { "epoch": 0.3580628900206852, "grad_norm": 1.7290626810535217, "learning_rate": 7.433585635399355e-07, "loss": 0.4159, "step": 20599 }, { "epoch": 0.35808027255818803, "grad_norm": 2.099312927502048, "learning_rate": 7.43333973029905e-07, "loss": 0.2088, "step": 20600 }, { "epoch": 0.35809765509569086, "grad_norm": 1.0871479436684892, "learning_rate": 7.433093817486175e-07, "loss": 0.2085, "step": 20601 }, { "epoch": 0.3581150376331937, "grad_norm": 1.9002744292004154, "learning_rate": 7.43284789696151e-07, "loss": 0.3831, "step": 20602 }, { "epoch": 0.3581324201706965, "grad_norm": 1.7827356147142488, "learning_rate": 7.432601968725836e-07, "loss": 0.2022, "step": 20603 }, { "epoch": 0.35814980270819935, "grad_norm": 1.5054101723800564, "learning_rate": 7.43235603277993e-07, "loss": 0.2548, "step": 20604 }, { "epoch": 0.3581671852457022, "grad_norm": 1.4221859869903466, "learning_rate": 7.432110089124574e-07, "loss": 0.3265, "step": 20605 }, { "epoch": 0.358184567783205, "grad_norm": 1.1705971153574213, "learning_rate": 7.431864137760544e-07, "loss": 0.2226, "step": 20606 }, { "epoch": 0.35820195032070784, "grad_norm": 1.4042621994569222, "learning_rate": 7.431618178688624e-07, "loss": 0.3085, "step": 20607 }, { "epoch": 0.3582193328582106, "grad_norm": 1.4809816410667078, "learning_rate": 7.43137221190959e-07, "loss": 0.2938, "step": 20608 }, { "epoch": 0.35823671539571345, "grad_norm": 1.135345828558022, "learning_rate": 7.431126237424224e-07, "loss": 0.2324, "step": 20609 }, { "epoch": 0.3582540979332163, "grad_norm": 1.6204062367501173, "learning_rate": 7.430880255233305e-07, "loss": 0.338, "step": 20610 }, { "epoch": 0.3582714804707191, "grad_norm": 1.0129571958214916, "learning_rate": 7.43063426533761e-07, "loss": 0.3303, "step": 20611 }, { "epoch": 0.35828886300822194, "grad_norm": 2.2652189737211543, "learning_rate": 7.430388267737923e-07, "loss": 0.3428, "step": 20612 }, { "epoch": 0.35830624554572477, "grad_norm": 1.3478034580443123, "learning_rate": 7.43014226243502e-07, "loss": 0.2991, "step": 20613 }, { "epoch": 0.3583236280832276, "grad_norm": 1.4447376713715256, "learning_rate": 7.429896249429685e-07, "loss": 0.22, "step": 20614 }, { "epoch": 0.3583410106207304, "grad_norm": 1.7405915074947622, "learning_rate": 7.429650228722692e-07, "loss": 0.3732, "step": 20615 }, { "epoch": 0.35835839315823326, "grad_norm": 2.7170246718797317, "learning_rate": 7.429404200314825e-07, "loss": 0.4788, "step": 20616 }, { "epoch": 0.3583757756957361, "grad_norm": 1.793235879554155, "learning_rate": 7.429158164206863e-07, "loss": 0.2694, "step": 20617 }, { "epoch": 0.35839315823323886, "grad_norm": 1.609992109951071, "learning_rate": 7.428912120399587e-07, "loss": 0.2797, "step": 20618 }, { "epoch": 0.3584105407707417, "grad_norm": 1.4600752877161234, "learning_rate": 7.428666068893772e-07, "loss": 0.3473, "step": 20619 }, { "epoch": 0.3584279233082445, "grad_norm": 1.3738401903953126, "learning_rate": 7.428420009690204e-07, "loss": 0.1757, "step": 20620 }, { "epoch": 0.35844530584574735, "grad_norm": 2.4025631601840582, "learning_rate": 7.428173942789659e-07, "loss": 0.3004, "step": 20621 }, { "epoch": 0.3584626883832502, "grad_norm": 2.9915501539108056, "learning_rate": 7.427927868192917e-07, "loss": 0.2723, "step": 20622 }, { "epoch": 0.358480070920753, "grad_norm": 2.467377875519878, "learning_rate": 7.427681785900761e-07, "loss": 0.2172, "step": 20623 }, { "epoch": 0.35849745345825584, "grad_norm": 1.7768403958876064, "learning_rate": 7.427435695913967e-07, "loss": 0.3241, "step": 20624 }, { "epoch": 0.3585148359957587, "grad_norm": 1.0437607409979934, "learning_rate": 7.427189598233318e-07, "loss": 0.1222, "step": 20625 }, { "epoch": 0.3585322185332615, "grad_norm": 2.5505248247037438, "learning_rate": 7.426943492859593e-07, "loss": 0.2909, "step": 20626 }, { "epoch": 0.35854960107076433, "grad_norm": 4.031152135456963, "learning_rate": 7.426697379793572e-07, "loss": 0.3854, "step": 20627 }, { "epoch": 0.3585669836082671, "grad_norm": 1.7056177396732093, "learning_rate": 7.426451259036034e-07, "loss": 0.2948, "step": 20628 }, { "epoch": 0.35858436614576994, "grad_norm": 1.6122617971837454, "learning_rate": 7.426205130587761e-07, "loss": 0.2833, "step": 20629 }, { "epoch": 0.35860174868327277, "grad_norm": 2.1270464043032407, "learning_rate": 7.425958994449532e-07, "loss": 0.2315, "step": 20630 }, { "epoch": 0.3586191312207756, "grad_norm": 2.122793023556594, "learning_rate": 7.425712850622128e-07, "loss": 0.3354, "step": 20631 }, { "epoch": 0.35863651375827843, "grad_norm": 2.0763046004362264, "learning_rate": 7.425466699106328e-07, "loss": 0.2755, "step": 20632 }, { "epoch": 0.35865389629578126, "grad_norm": 2.1989364123593345, "learning_rate": 7.425220539902912e-07, "loss": 0.324, "step": 20633 }, { "epoch": 0.3586712788332841, "grad_norm": 2.00112984352762, "learning_rate": 7.424974373012662e-07, "loss": 0.2631, "step": 20634 }, { "epoch": 0.3586886613707869, "grad_norm": 2.300223745845396, "learning_rate": 7.424728198436357e-07, "loss": 0.2956, "step": 20635 }, { "epoch": 0.35870604390828975, "grad_norm": 2.9080228086711006, "learning_rate": 7.424482016174778e-07, "loss": 0.3058, "step": 20636 }, { "epoch": 0.3587234264457926, "grad_norm": 1.3316176853042399, "learning_rate": 7.424235826228703e-07, "loss": 0.1974, "step": 20637 }, { "epoch": 0.35874080898329536, "grad_norm": 1.7994511889463354, "learning_rate": 7.423989628598916e-07, "loss": 0.2022, "step": 20638 }, { "epoch": 0.3587581915207982, "grad_norm": 1.5991318885828136, "learning_rate": 7.423743423286194e-07, "loss": 0.5095, "step": 20639 }, { "epoch": 0.358775574058301, "grad_norm": 2.284159380746949, "learning_rate": 7.423497210291319e-07, "loss": 0.2799, "step": 20640 }, { "epoch": 0.35879295659580385, "grad_norm": 1.4752319127218785, "learning_rate": 7.423250989615071e-07, "loss": 0.3328, "step": 20641 }, { "epoch": 0.3588103391333067, "grad_norm": 1.2544963814822114, "learning_rate": 7.423004761258231e-07, "loss": 0.247, "step": 20642 }, { "epoch": 0.3588277216708095, "grad_norm": 1.9069374644383865, "learning_rate": 7.42275852522158e-07, "loss": 0.3722, "step": 20643 }, { "epoch": 0.35884510420831234, "grad_norm": 1.5207187853898825, "learning_rate": 7.422512281505895e-07, "loss": 0.1838, "step": 20644 }, { "epoch": 0.35886248674581517, "grad_norm": 0.9101480484847692, "learning_rate": 7.422266030111961e-07, "loss": 0.1951, "step": 20645 }, { "epoch": 0.358879869283318, "grad_norm": 1.337505197717355, "learning_rate": 7.422019771040554e-07, "loss": 0.4016, "step": 20646 }, { "epoch": 0.35889725182082083, "grad_norm": 1.5160184389475673, "learning_rate": 7.421773504292459e-07, "loss": 0.2722, "step": 20647 }, { "epoch": 0.3589146343583236, "grad_norm": 1.8633937488455126, "learning_rate": 7.421527229868454e-07, "loss": 0.3377, "step": 20648 }, { "epoch": 0.35893201689582643, "grad_norm": 1.5487006986835752, "learning_rate": 7.42128094776932e-07, "loss": 0.2829, "step": 20649 }, { "epoch": 0.35894939943332926, "grad_norm": 2.0429689327082263, "learning_rate": 7.421034657995837e-07, "loss": 0.2442, "step": 20650 }, { "epoch": 0.3589667819708321, "grad_norm": 1.9027095080476448, "learning_rate": 7.420788360548788e-07, "loss": 0.5146, "step": 20651 }, { "epoch": 0.3589841645083349, "grad_norm": 1.4873577918724747, "learning_rate": 7.420542055428949e-07, "loss": 0.2391, "step": 20652 }, { "epoch": 0.35900154704583775, "grad_norm": 2.002746908739454, "learning_rate": 7.420295742637106e-07, "loss": 0.4436, "step": 20653 }, { "epoch": 0.3590189295833406, "grad_norm": 1.790747604210245, "learning_rate": 7.420049422174035e-07, "loss": 0.3083, "step": 20654 }, { "epoch": 0.3590363121208434, "grad_norm": 1.0699465290826573, "learning_rate": 7.419803094040522e-07, "loss": 0.2205, "step": 20655 }, { "epoch": 0.35905369465834625, "grad_norm": 1.7396654598661494, "learning_rate": 7.419556758237342e-07, "loss": 0.2818, "step": 20656 }, { "epoch": 0.3590710771958491, "grad_norm": 1.7147603797105626, "learning_rate": 7.41931041476528e-07, "loss": 0.3998, "step": 20657 }, { "epoch": 0.35908845973335185, "grad_norm": 1.9281271059087408, "learning_rate": 7.419064063625116e-07, "loss": 0.3092, "step": 20658 }, { "epoch": 0.3591058422708547, "grad_norm": 1.4416811116828414, "learning_rate": 7.418817704817629e-07, "loss": 0.3692, "step": 20659 }, { "epoch": 0.3591232248083575, "grad_norm": 1.9847042730406008, "learning_rate": 7.418571338343601e-07, "loss": 0.4632, "step": 20660 }, { "epoch": 0.35914060734586034, "grad_norm": 1.031146044446429, "learning_rate": 7.418324964203815e-07, "loss": 0.2045, "step": 20661 }, { "epoch": 0.35915798988336317, "grad_norm": 1.743293388506168, "learning_rate": 7.418078582399047e-07, "loss": 0.3113, "step": 20662 }, { "epoch": 0.359175372420866, "grad_norm": 2.0654803422010537, "learning_rate": 7.417832192930081e-07, "loss": 0.2914, "step": 20663 }, { "epoch": 0.35919275495836883, "grad_norm": 1.9804353764218359, "learning_rate": 7.417585795797698e-07, "loss": 0.3329, "step": 20664 }, { "epoch": 0.35921013749587166, "grad_norm": 1.7571457500091718, "learning_rate": 7.41733939100268e-07, "loss": 0.3631, "step": 20665 }, { "epoch": 0.3592275200333745, "grad_norm": 1.1357729619861616, "learning_rate": 7.417092978545804e-07, "loss": 0.2673, "step": 20666 }, { "epoch": 0.3592449025708773, "grad_norm": 0.9185467237016292, "learning_rate": 7.416846558427855e-07, "loss": 0.2917, "step": 20667 }, { "epoch": 0.3592622851083801, "grad_norm": 0.8500179725112869, "learning_rate": 7.416600130649613e-07, "loss": 0.2268, "step": 20668 }, { "epoch": 0.3592796676458829, "grad_norm": 1.431458664392397, "learning_rate": 7.416353695211859e-07, "loss": 0.289, "step": 20669 }, { "epoch": 0.35929705018338576, "grad_norm": 2.008548934503604, "learning_rate": 7.416107252115372e-07, "loss": 0.2906, "step": 20670 }, { "epoch": 0.3593144327208886, "grad_norm": 1.281328107661935, "learning_rate": 7.415860801360936e-07, "loss": 0.4377, "step": 20671 }, { "epoch": 0.3593318152583914, "grad_norm": 1.2633151916227385, "learning_rate": 7.41561434294933e-07, "loss": 0.3022, "step": 20672 }, { "epoch": 0.35934919779589425, "grad_norm": 1.667380995373857, "learning_rate": 7.415367876881337e-07, "loss": 0.4145, "step": 20673 }, { "epoch": 0.3593665803333971, "grad_norm": 1.4735065438470245, "learning_rate": 7.415121403157738e-07, "loss": 0.2858, "step": 20674 }, { "epoch": 0.3593839628708999, "grad_norm": 2.7096496244492885, "learning_rate": 7.414874921779313e-07, "loss": 0.3557, "step": 20675 }, { "epoch": 0.35940134540840274, "grad_norm": 1.5816072634721718, "learning_rate": 7.414628432746843e-07, "loss": 0.1712, "step": 20676 }, { "epoch": 0.35941872794590557, "grad_norm": 1.3934596580161702, "learning_rate": 7.414381936061111e-07, "loss": 0.2336, "step": 20677 }, { "epoch": 0.35943611048340834, "grad_norm": 2.7694614115127454, "learning_rate": 7.414135431722898e-07, "loss": 0.2565, "step": 20678 }, { "epoch": 0.3594534930209112, "grad_norm": 1.4577522013873452, "learning_rate": 7.413888919732982e-07, "loss": 0.2303, "step": 20679 }, { "epoch": 0.359470875558414, "grad_norm": 2.245550974576297, "learning_rate": 7.413642400092151e-07, "loss": 0.2936, "step": 20680 }, { "epoch": 0.35948825809591683, "grad_norm": 1.578773959130523, "learning_rate": 7.41339587280118e-07, "loss": 0.3076, "step": 20681 }, { "epoch": 0.35950564063341967, "grad_norm": 1.2679287600041629, "learning_rate": 7.413149337860852e-07, "loss": 0.2373, "step": 20682 }, { "epoch": 0.3595230231709225, "grad_norm": 1.6889002492568748, "learning_rate": 7.41290279527195e-07, "loss": 0.3578, "step": 20683 }, { "epoch": 0.3595404057084253, "grad_norm": 1.880313869472012, "learning_rate": 7.412656245035254e-07, "loss": 0.3374, "step": 20684 }, { "epoch": 0.35955778824592816, "grad_norm": 1.7413618265903426, "learning_rate": 7.412409687151546e-07, "loss": 0.2641, "step": 20685 }, { "epoch": 0.359575170783431, "grad_norm": 2.275155489071651, "learning_rate": 7.412163121621609e-07, "loss": 0.2125, "step": 20686 }, { "epoch": 0.3595925533209338, "grad_norm": 1.7837867084302725, "learning_rate": 7.411916548446222e-07, "loss": 0.2035, "step": 20687 }, { "epoch": 0.3596099358584366, "grad_norm": 1.7659082126408212, "learning_rate": 7.411669967626167e-07, "loss": 0.3484, "step": 20688 }, { "epoch": 0.3596273183959394, "grad_norm": 1.9227538614365045, "learning_rate": 7.411423379162228e-07, "loss": 0.2405, "step": 20689 }, { "epoch": 0.35964470093344225, "grad_norm": 2.9150462682278984, "learning_rate": 7.411176783055182e-07, "loss": 0.4184, "step": 20690 }, { "epoch": 0.3596620834709451, "grad_norm": 1.149317609749205, "learning_rate": 7.410930179305813e-07, "loss": 0.1653, "step": 20691 }, { "epoch": 0.3596794660084479, "grad_norm": 1.1339598386255405, "learning_rate": 7.410683567914906e-07, "loss": 0.2292, "step": 20692 }, { "epoch": 0.35969684854595074, "grad_norm": 1.6084535412618928, "learning_rate": 7.410436948883237e-07, "loss": 0.2646, "step": 20693 }, { "epoch": 0.3597142310834536, "grad_norm": 1.5104128122138336, "learning_rate": 7.41019032221159e-07, "loss": 0.2591, "step": 20694 }, { "epoch": 0.3597316136209564, "grad_norm": 1.330279714057412, "learning_rate": 7.409943687900747e-07, "loss": 0.3901, "step": 20695 }, { "epoch": 0.35974899615845923, "grad_norm": 2.060865399097961, "learning_rate": 7.409697045951489e-07, "loss": 0.2599, "step": 20696 }, { "epoch": 0.35976637869596206, "grad_norm": 1.3471875519841792, "learning_rate": 7.4094503963646e-07, "loss": 0.2762, "step": 20697 }, { "epoch": 0.35978376123346484, "grad_norm": 1.972762182110827, "learning_rate": 7.409203739140861e-07, "loss": 0.2917, "step": 20698 }, { "epoch": 0.35980114377096767, "grad_norm": 2.4189474329370615, "learning_rate": 7.40895707428105e-07, "loss": 0.4183, "step": 20699 }, { "epoch": 0.3598185263084705, "grad_norm": 1.950681198203365, "learning_rate": 7.408710401785953e-07, "loss": 0.3478, "step": 20700 }, { "epoch": 0.35983590884597333, "grad_norm": 1.703776491104297, "learning_rate": 7.408463721656349e-07, "loss": 0.2958, "step": 20701 }, { "epoch": 0.35985329138347616, "grad_norm": 1.4583823581392166, "learning_rate": 7.408217033893024e-07, "loss": 0.3862, "step": 20702 }, { "epoch": 0.359870673920979, "grad_norm": 2.180633924285642, "learning_rate": 7.407970338496756e-07, "loss": 0.3252, "step": 20703 }, { "epoch": 0.3598880564584818, "grad_norm": 1.4617944959833813, "learning_rate": 7.407723635468327e-07, "loss": 0.2964, "step": 20704 }, { "epoch": 0.35990543899598465, "grad_norm": 1.3537655744652461, "learning_rate": 7.407476924808522e-07, "loss": 0.4996, "step": 20705 }, { "epoch": 0.3599228215334875, "grad_norm": 1.217446067381113, "learning_rate": 7.40723020651812e-07, "loss": 0.2414, "step": 20706 }, { "epoch": 0.3599402040709903, "grad_norm": 2.300117269924373, "learning_rate": 7.406983480597905e-07, "loss": 0.3368, "step": 20707 }, { "epoch": 0.3599575866084931, "grad_norm": 2.8028418115996874, "learning_rate": 7.406736747048656e-07, "loss": 0.2804, "step": 20708 }, { "epoch": 0.3599749691459959, "grad_norm": 1.8157246037504002, "learning_rate": 7.406490005871159e-07, "loss": 0.4485, "step": 20709 }, { "epoch": 0.35999235168349875, "grad_norm": 2.237504883332191, "learning_rate": 7.406243257066193e-07, "loss": 0.551, "step": 20710 }, { "epoch": 0.3600097342210016, "grad_norm": 1.5699440445772284, "learning_rate": 7.405996500634544e-07, "loss": 0.2673, "step": 20711 }, { "epoch": 0.3600271167585044, "grad_norm": 1.7463342450018835, "learning_rate": 7.405749736576988e-07, "loss": 0.4263, "step": 20712 }, { "epoch": 0.36004449929600724, "grad_norm": 1.4900467275452955, "learning_rate": 7.405502964894312e-07, "loss": 0.5193, "step": 20713 }, { "epoch": 0.36006188183351007, "grad_norm": 2.126682532624992, "learning_rate": 7.405256185587294e-07, "loss": 0.3394, "step": 20714 }, { "epoch": 0.3600792643710129, "grad_norm": 2.1228595276229782, "learning_rate": 7.405009398656723e-07, "loss": 0.285, "step": 20715 }, { "epoch": 0.3600966469085157, "grad_norm": 1.1963040764233852, "learning_rate": 7.404762604103374e-07, "loss": 0.1788, "step": 20716 }, { "epoch": 0.3601140294460185, "grad_norm": 1.0125372075107817, "learning_rate": 7.404515801928033e-07, "loss": 0.2039, "step": 20717 }, { "epoch": 0.36013141198352133, "grad_norm": 3.305754778118193, "learning_rate": 7.404268992131482e-07, "loss": 0.1947, "step": 20718 }, { "epoch": 0.36014879452102416, "grad_norm": 3.075416576959099, "learning_rate": 7.404022174714501e-07, "loss": 0.2819, "step": 20719 }, { "epoch": 0.360166177058527, "grad_norm": 0.9419293443781299, "learning_rate": 7.403775349677875e-07, "loss": 0.2478, "step": 20720 }, { "epoch": 0.3601835595960298, "grad_norm": 1.3777546523121496, "learning_rate": 7.403528517022387e-07, "loss": 0.266, "step": 20721 }, { "epoch": 0.36020094213353265, "grad_norm": 1.6627566043206956, "learning_rate": 7.403281676748816e-07, "loss": 0.2566, "step": 20722 }, { "epoch": 0.3602183246710355, "grad_norm": 3.0420206115286557, "learning_rate": 7.403034828857947e-07, "loss": 0.3005, "step": 20723 }, { "epoch": 0.3602357072085383, "grad_norm": 1.2503793040491973, "learning_rate": 7.40278797335056e-07, "loss": 0.3133, "step": 20724 }, { "epoch": 0.36025308974604114, "grad_norm": 3.5317648930377694, "learning_rate": 7.40254111022744e-07, "loss": 0.5146, "step": 20725 }, { "epoch": 0.360270472283544, "grad_norm": 2.621197521342041, "learning_rate": 7.402294239489369e-07, "loss": 0.2199, "step": 20726 }, { "epoch": 0.36028785482104675, "grad_norm": 1.871908642570686, "learning_rate": 7.402047361137127e-07, "loss": 0.409, "step": 20727 }, { "epoch": 0.3603052373585496, "grad_norm": 1.2748304226043388, "learning_rate": 7.401800475171499e-07, "loss": 0.2399, "step": 20728 }, { "epoch": 0.3603226198960524, "grad_norm": 1.1757020391728779, "learning_rate": 7.401553581593268e-07, "loss": 0.4143, "step": 20729 }, { "epoch": 0.36034000243355524, "grad_norm": 1.2431188321892552, "learning_rate": 7.401306680403213e-07, "loss": 0.199, "step": 20730 }, { "epoch": 0.36035738497105807, "grad_norm": 2.0976671844720287, "learning_rate": 7.401059771602119e-07, "loss": 0.2476, "step": 20731 }, { "epoch": 0.3603747675085609, "grad_norm": 2.8725723695012455, "learning_rate": 7.40081285519077e-07, "loss": 0.2727, "step": 20732 }, { "epoch": 0.36039215004606373, "grad_norm": 1.5419592330104632, "learning_rate": 7.400565931169947e-07, "loss": 0.274, "step": 20733 }, { "epoch": 0.36040953258356656, "grad_norm": 2.5187081295457543, "learning_rate": 7.400318999540434e-07, "loss": 0.3321, "step": 20734 }, { "epoch": 0.3604269151210694, "grad_norm": 1.6372569142562974, "learning_rate": 7.400072060303009e-07, "loss": 0.3623, "step": 20735 }, { "epoch": 0.3604442976585722, "grad_norm": 1.571854747210566, "learning_rate": 7.39982511345846e-07, "loss": 0.3707, "step": 20736 }, { "epoch": 0.360461680196075, "grad_norm": 1.100937251982044, "learning_rate": 7.399578159007568e-07, "loss": 0.2616, "step": 20737 }, { "epoch": 0.3604790627335778, "grad_norm": 1.8457558432483303, "learning_rate": 7.399331196951115e-07, "loss": 0.3427, "step": 20738 }, { "epoch": 0.36049644527108066, "grad_norm": 1.5428670443791284, "learning_rate": 7.399084227289884e-07, "loss": 0.421, "step": 20739 }, { "epoch": 0.3605138278085835, "grad_norm": 1.848414458318529, "learning_rate": 7.39883725002466e-07, "loss": 0.284, "step": 20740 }, { "epoch": 0.3605312103460863, "grad_norm": 1.7614566406348193, "learning_rate": 7.398590265156221e-07, "loss": 0.3178, "step": 20741 }, { "epoch": 0.36054859288358915, "grad_norm": 1.7953996581189493, "learning_rate": 7.398343272685355e-07, "loss": 0.3088, "step": 20742 }, { "epoch": 0.360565975421092, "grad_norm": 0.9577664419937046, "learning_rate": 7.398096272612841e-07, "loss": 0.2233, "step": 20743 }, { "epoch": 0.3605833579585948, "grad_norm": 1.2587599294943257, "learning_rate": 7.397849264939464e-07, "loss": 0.4231, "step": 20744 }, { "epoch": 0.36060074049609764, "grad_norm": 1.684112307379599, "learning_rate": 7.397602249666006e-07, "loss": 0.2699, "step": 20745 }, { "epoch": 0.36061812303360047, "grad_norm": 1.5278349684630825, "learning_rate": 7.397355226793252e-07, "loss": 0.4186, "step": 20746 }, { "epoch": 0.36063550557110324, "grad_norm": 1.842624537755523, "learning_rate": 7.397108196321979e-07, "loss": 0.2096, "step": 20747 }, { "epoch": 0.3606528881086061, "grad_norm": 1.7529887578894754, "learning_rate": 7.396861158252978e-07, "loss": 0.3431, "step": 20748 }, { "epoch": 0.3606702706461089, "grad_norm": 1.708625372841474, "learning_rate": 7.396614112587028e-07, "loss": 0.2829, "step": 20749 }, { "epoch": 0.36068765318361173, "grad_norm": 3.9305458740501016, "learning_rate": 7.396367059324911e-07, "loss": 0.3936, "step": 20750 }, { "epoch": 0.36070503572111456, "grad_norm": 1.7856442446425143, "learning_rate": 7.39611999846741e-07, "loss": 0.1878, "step": 20751 }, { "epoch": 0.3607224182586174, "grad_norm": 1.959398016737584, "learning_rate": 7.395872930015311e-07, "loss": 0.4052, "step": 20752 }, { "epoch": 0.3607398007961202, "grad_norm": 1.3311969377787907, "learning_rate": 7.395625853969396e-07, "loss": 0.185, "step": 20753 }, { "epoch": 0.36075718333362305, "grad_norm": 1.8564880026990909, "learning_rate": 7.395378770330446e-07, "loss": 0.3264, "step": 20754 }, { "epoch": 0.3607745658711259, "grad_norm": 1.5457416312024466, "learning_rate": 7.395131679099246e-07, "loss": 0.2977, "step": 20755 }, { "epoch": 0.3607919484086287, "grad_norm": 2.122931185040081, "learning_rate": 7.394884580276579e-07, "loss": 0.7343, "step": 20756 }, { "epoch": 0.3608093309461315, "grad_norm": 2.067049860683891, "learning_rate": 7.394637473863228e-07, "loss": 0.4072, "step": 20757 }, { "epoch": 0.3608267134836343, "grad_norm": 1.9475774308018639, "learning_rate": 7.394390359859977e-07, "loss": 0.2377, "step": 20758 }, { "epoch": 0.36084409602113715, "grad_norm": 1.4950930037427919, "learning_rate": 7.394143238267607e-07, "loss": 0.3205, "step": 20759 }, { "epoch": 0.36086147855864, "grad_norm": 2.243988960816153, "learning_rate": 7.393896109086903e-07, "loss": 0.3639, "step": 20760 }, { "epoch": 0.3608788610961428, "grad_norm": 1.2769183019990138, "learning_rate": 7.393648972318649e-07, "loss": 0.2899, "step": 20761 }, { "epoch": 0.36089624363364564, "grad_norm": 3.996645799535903, "learning_rate": 7.393401827963627e-07, "loss": 0.4776, "step": 20762 }, { "epoch": 0.36091362617114847, "grad_norm": 1.1618730657015466, "learning_rate": 7.393154676022621e-07, "loss": 0.157, "step": 20763 }, { "epoch": 0.3609310087086513, "grad_norm": 1.4877477949647546, "learning_rate": 7.392907516496412e-07, "loss": 0.2126, "step": 20764 }, { "epoch": 0.36094839124615413, "grad_norm": 1.5130849103423827, "learning_rate": 7.392660349385788e-07, "loss": 0.2311, "step": 20765 }, { "epoch": 0.36096577378365696, "grad_norm": 1.3564852961878506, "learning_rate": 7.392413174691527e-07, "loss": 0.1705, "step": 20766 }, { "epoch": 0.36098315632115974, "grad_norm": 1.41191701704679, "learning_rate": 7.392165992414418e-07, "loss": 0.2385, "step": 20767 }, { "epoch": 0.36100053885866257, "grad_norm": 2.3720362409995577, "learning_rate": 7.39191880255524e-07, "loss": 0.3276, "step": 20768 }, { "epoch": 0.3610179213961654, "grad_norm": 2.525296939994284, "learning_rate": 7.391671605114779e-07, "loss": 0.4342, "step": 20769 }, { "epoch": 0.36103530393366823, "grad_norm": 1.6303595579030656, "learning_rate": 7.391424400093817e-07, "loss": 0.2716, "step": 20770 }, { "epoch": 0.36105268647117106, "grad_norm": 1.7403500500506333, "learning_rate": 7.391177187493139e-07, "loss": 0.3183, "step": 20771 }, { "epoch": 0.3610700690086739, "grad_norm": 1.9349723238480188, "learning_rate": 7.390929967313527e-07, "loss": 0.5118, "step": 20772 }, { "epoch": 0.3610874515461767, "grad_norm": 2.0882944158052354, "learning_rate": 7.390682739555765e-07, "loss": 0.5096, "step": 20773 }, { "epoch": 0.36110483408367955, "grad_norm": 1.8392347635395596, "learning_rate": 7.390435504220636e-07, "loss": 0.2055, "step": 20774 }, { "epoch": 0.3611222166211824, "grad_norm": 1.8046356243568478, "learning_rate": 7.390188261308927e-07, "loss": 0.4946, "step": 20775 }, { "epoch": 0.3611395991586852, "grad_norm": 1.37082677999476, "learning_rate": 7.389941010821418e-07, "loss": 0.3256, "step": 20776 }, { "epoch": 0.361156981696188, "grad_norm": 1.1973524967000932, "learning_rate": 7.389693752758894e-07, "loss": 0.3626, "step": 20777 }, { "epoch": 0.3611743642336908, "grad_norm": 2.7405940049621846, "learning_rate": 7.389446487122139e-07, "loss": 0.2652, "step": 20778 }, { "epoch": 0.36119174677119364, "grad_norm": 1.321357333848307, "learning_rate": 7.389199213911936e-07, "loss": 0.3955, "step": 20779 }, { "epoch": 0.3612091293086965, "grad_norm": 1.0753760973364337, "learning_rate": 7.388951933129067e-07, "loss": 0.3162, "step": 20780 }, { "epoch": 0.3612265118461993, "grad_norm": 1.4941930533923158, "learning_rate": 7.388704644774322e-07, "loss": 0.3606, "step": 20781 }, { "epoch": 0.36124389438370214, "grad_norm": 1.5435541039886889, "learning_rate": 7.388457348848476e-07, "loss": 0.2822, "step": 20782 }, { "epoch": 0.36126127692120497, "grad_norm": 1.5452761490645073, "learning_rate": 7.38821004535232e-07, "loss": 0.3188, "step": 20783 }, { "epoch": 0.3612786594587078, "grad_norm": 2.6440026214525005, "learning_rate": 7.387962734286634e-07, "loss": 0.3449, "step": 20784 }, { "epoch": 0.3612960419962106, "grad_norm": 1.8231245966024352, "learning_rate": 7.387715415652204e-07, "loss": 0.2261, "step": 20785 }, { "epoch": 0.36131342453371346, "grad_norm": 2.9918745627241754, "learning_rate": 7.387468089449813e-07, "loss": 0.3348, "step": 20786 }, { "epoch": 0.36133080707121623, "grad_norm": 3.351196914920892, "learning_rate": 7.387220755680245e-07, "loss": 0.4104, "step": 20787 }, { "epoch": 0.36134818960871906, "grad_norm": 1.8738056805011474, "learning_rate": 7.386973414344283e-07, "loss": 0.4377, "step": 20788 }, { "epoch": 0.3613655721462219, "grad_norm": 2.2806848896191547, "learning_rate": 7.386726065442711e-07, "loss": 0.2059, "step": 20789 }, { "epoch": 0.3613829546837247, "grad_norm": 1.51929508847721, "learning_rate": 7.386478708976315e-07, "loss": 0.3442, "step": 20790 }, { "epoch": 0.36140033722122755, "grad_norm": 1.7123791249744358, "learning_rate": 7.386231344945878e-07, "loss": 0.3541, "step": 20791 }, { "epoch": 0.3614177197587304, "grad_norm": 1.1030410373472292, "learning_rate": 7.385983973352184e-07, "loss": 0.6447, "step": 20792 }, { "epoch": 0.3614351022962332, "grad_norm": 1.2940139047127546, "learning_rate": 7.385736594196014e-07, "loss": 0.3227, "step": 20793 }, { "epoch": 0.36145248483373604, "grad_norm": 1.724023170628462, "learning_rate": 7.38548920747816e-07, "loss": 0.309, "step": 20794 }, { "epoch": 0.3614698673712389, "grad_norm": 2.0214696664667335, "learning_rate": 7.385241813199396e-07, "loss": 0.3137, "step": 20795 }, { "epoch": 0.3614872499087417, "grad_norm": 2.513454721315062, "learning_rate": 7.384994411360515e-07, "loss": 0.391, "step": 20796 }, { "epoch": 0.3615046324462445, "grad_norm": 2.326755514083111, "learning_rate": 7.384747001962295e-07, "loss": 0.4472, "step": 20797 }, { "epoch": 0.3615220149837473, "grad_norm": 2.351760426711152, "learning_rate": 7.384499585005524e-07, "loss": 0.2795, "step": 20798 }, { "epoch": 0.36153939752125014, "grad_norm": 2.057115108178673, "learning_rate": 7.384252160490983e-07, "loss": 0.3465, "step": 20799 }, { "epoch": 0.36155678005875297, "grad_norm": 2.5975330551810307, "learning_rate": 7.384004728419459e-07, "loss": 0.4274, "step": 20800 }, { "epoch": 0.3615741625962558, "grad_norm": 2.8090029875338, "learning_rate": 7.383757288791734e-07, "loss": 0.3149, "step": 20801 }, { "epoch": 0.36159154513375863, "grad_norm": 1.5582647594921895, "learning_rate": 7.383509841608594e-07, "loss": 0.2159, "step": 20802 }, { "epoch": 0.36160892767126146, "grad_norm": 1.854315137877038, "learning_rate": 7.383262386870823e-07, "loss": 0.267, "step": 20803 }, { "epoch": 0.3616263102087643, "grad_norm": 1.809253172267412, "learning_rate": 7.383014924579205e-07, "loss": 0.5813, "step": 20804 }, { "epoch": 0.3616436927462671, "grad_norm": 1.6016732343014113, "learning_rate": 7.382767454734524e-07, "loss": 0.3177, "step": 20805 }, { "epoch": 0.36166107528376995, "grad_norm": 1.5925412984445246, "learning_rate": 7.382519977337566e-07, "loss": 0.3307, "step": 20806 }, { "epoch": 0.3616784578212727, "grad_norm": 2.1411481286670653, "learning_rate": 7.382272492389112e-07, "loss": 0.4022, "step": 20807 }, { "epoch": 0.36169584035877556, "grad_norm": 1.5611072547476432, "learning_rate": 7.38202499988995e-07, "loss": 0.3446, "step": 20808 }, { "epoch": 0.3617132228962784, "grad_norm": 2.269530592876313, "learning_rate": 7.381777499840862e-07, "loss": 0.2606, "step": 20809 }, { "epoch": 0.3617306054337812, "grad_norm": 1.6529831766512766, "learning_rate": 7.381529992242635e-07, "loss": 0.3857, "step": 20810 }, { "epoch": 0.36174798797128405, "grad_norm": 1.6907208540142558, "learning_rate": 7.381282477096049e-07, "loss": 0.3352, "step": 20811 }, { "epoch": 0.3617653705087869, "grad_norm": 1.9260843533618566, "learning_rate": 7.381034954401894e-07, "loss": 0.3496, "step": 20812 }, { "epoch": 0.3617827530462897, "grad_norm": 2.1350755480475385, "learning_rate": 7.380787424160951e-07, "loss": 0.1756, "step": 20813 }, { "epoch": 0.36180013558379254, "grad_norm": 1.252473573641436, "learning_rate": 7.380539886374007e-07, "loss": 0.2178, "step": 20814 }, { "epoch": 0.36181751812129537, "grad_norm": 1.6171088843488564, "learning_rate": 7.380292341041842e-07, "loss": 0.2044, "step": 20815 }, { "epoch": 0.3618349006587982, "grad_norm": 0.9653683222574155, "learning_rate": 7.380044788165246e-07, "loss": 0.3085, "step": 20816 }, { "epoch": 0.361852283196301, "grad_norm": 1.3625990683155642, "learning_rate": 7.379797227745001e-07, "loss": 0.2595, "step": 20817 }, { "epoch": 0.3618696657338038, "grad_norm": 1.7647100345536237, "learning_rate": 7.379549659781892e-07, "loss": 0.2933, "step": 20818 }, { "epoch": 0.36188704827130663, "grad_norm": 1.5177351794995328, "learning_rate": 7.379302084276704e-07, "loss": 0.2854, "step": 20819 }, { "epoch": 0.36190443080880946, "grad_norm": 1.982127404257349, "learning_rate": 7.379054501230219e-07, "loss": 0.3967, "step": 20820 }, { "epoch": 0.3619218133463123, "grad_norm": 2.9960471000655113, "learning_rate": 7.378806910643227e-07, "loss": 0.4546, "step": 20821 }, { "epoch": 0.3619391958838151, "grad_norm": 2.3474722155103733, "learning_rate": 7.378559312516509e-07, "loss": 0.2875, "step": 20822 }, { "epoch": 0.36195657842131795, "grad_norm": 2.5302154532221857, "learning_rate": 7.378311706850851e-07, "loss": 0.4611, "step": 20823 }, { "epoch": 0.3619739609588208, "grad_norm": 1.9617719906546707, "learning_rate": 7.378064093647035e-07, "loss": 0.2553, "step": 20824 }, { "epoch": 0.3619913434963236, "grad_norm": 1.634909324670165, "learning_rate": 7.377816472905851e-07, "loss": 0.1694, "step": 20825 }, { "epoch": 0.36200872603382644, "grad_norm": 1.4444971135115083, "learning_rate": 7.377568844628079e-07, "loss": 0.3944, "step": 20826 }, { "epoch": 0.3620261085713292, "grad_norm": 1.8454462124080038, "learning_rate": 7.377321208814507e-07, "loss": 0.3339, "step": 20827 }, { "epoch": 0.36204349110883205, "grad_norm": 1.5269691391658995, "learning_rate": 7.37707356546592e-07, "loss": 0.1776, "step": 20828 }, { "epoch": 0.3620608736463349, "grad_norm": 1.6354414333905092, "learning_rate": 7.3768259145831e-07, "loss": 0.409, "step": 20829 }, { "epoch": 0.3620782561838377, "grad_norm": 1.5955539906068998, "learning_rate": 7.376578256166834e-07, "loss": 0.4028, "step": 20830 }, { "epoch": 0.36209563872134054, "grad_norm": 1.4949748332715262, "learning_rate": 7.376330590217908e-07, "loss": 0.2194, "step": 20831 }, { "epoch": 0.36211302125884337, "grad_norm": 1.8785071702340213, "learning_rate": 7.376082916737103e-07, "loss": 0.3447, "step": 20832 }, { "epoch": 0.3621304037963462, "grad_norm": 1.4265911277404961, "learning_rate": 7.375835235725209e-07, "loss": 0.3484, "step": 20833 }, { "epoch": 0.36214778633384903, "grad_norm": 2.709411020914087, "learning_rate": 7.375587547183007e-07, "loss": 0.3715, "step": 20834 }, { "epoch": 0.36216516887135186, "grad_norm": 1.7514233797552354, "learning_rate": 7.375339851111286e-07, "loss": 0.2492, "step": 20835 }, { "epoch": 0.3621825514088547, "grad_norm": 2.0221612388518273, "learning_rate": 7.375092147510826e-07, "loss": 0.2523, "step": 20836 }, { "epoch": 0.36219993394635747, "grad_norm": 1.2224276593325574, "learning_rate": 7.374844436382417e-07, "loss": 0.3465, "step": 20837 }, { "epoch": 0.3622173164838603, "grad_norm": 4.952971131497963, "learning_rate": 7.374596717726841e-07, "loss": 0.301, "step": 20838 }, { "epoch": 0.3622346990213631, "grad_norm": 1.7204993876495058, "learning_rate": 7.374348991544884e-07, "loss": 0.3543, "step": 20839 }, { "epoch": 0.36225208155886596, "grad_norm": 1.38275663061073, "learning_rate": 7.374101257837332e-07, "loss": 0.1796, "step": 20840 }, { "epoch": 0.3622694640963688, "grad_norm": 2.5663041832017215, "learning_rate": 7.373853516604968e-07, "loss": 0.3862, "step": 20841 }, { "epoch": 0.3622868466338716, "grad_norm": 3.491841925304712, "learning_rate": 7.373605767848581e-07, "loss": 0.5224, "step": 20842 }, { "epoch": 0.36230422917137445, "grad_norm": 2.1266822494847037, "learning_rate": 7.373358011568955e-07, "loss": 0.3863, "step": 20843 }, { "epoch": 0.3623216117088773, "grad_norm": 2.3899152088470728, "learning_rate": 7.373110247766872e-07, "loss": 0.3137, "step": 20844 }, { "epoch": 0.3623389942463801, "grad_norm": 1.4282973032712438, "learning_rate": 7.372862476443119e-07, "loss": 0.3609, "step": 20845 }, { "epoch": 0.36235637678388294, "grad_norm": 1.9896928326644228, "learning_rate": 7.372614697598484e-07, "loss": 0.2207, "step": 20846 }, { "epoch": 0.3623737593213857, "grad_norm": 1.5835661245404329, "learning_rate": 7.372366911233749e-07, "loss": 0.2615, "step": 20847 }, { "epoch": 0.36239114185888854, "grad_norm": 1.9266146138229723, "learning_rate": 7.372119117349702e-07, "loss": 0.247, "step": 20848 }, { "epoch": 0.3624085243963914, "grad_norm": 1.5492661762919258, "learning_rate": 7.371871315947125e-07, "loss": 0.3654, "step": 20849 }, { "epoch": 0.3624259069338942, "grad_norm": 1.5609267722691695, "learning_rate": 7.371623507026809e-07, "loss": 0.3124, "step": 20850 }, { "epoch": 0.36244328947139703, "grad_norm": 2.004016279262057, "learning_rate": 7.371375690589532e-07, "loss": 0.1771, "step": 20851 }, { "epoch": 0.36246067200889986, "grad_norm": 1.559825863903541, "learning_rate": 7.371127866636085e-07, "loss": 0.3064, "step": 20852 }, { "epoch": 0.3624780545464027, "grad_norm": 2.2052129478901397, "learning_rate": 7.370880035167251e-07, "loss": 0.25, "step": 20853 }, { "epoch": 0.3624954370839055, "grad_norm": 2.125983547278548, "learning_rate": 7.370632196183819e-07, "loss": 0.2968, "step": 20854 }, { "epoch": 0.36251281962140836, "grad_norm": 1.829362276133226, "learning_rate": 7.370384349686569e-07, "loss": 0.3809, "step": 20855 }, { "epoch": 0.3625302021589112, "grad_norm": 1.6211406001025048, "learning_rate": 7.370136495676291e-07, "loss": 0.2857, "step": 20856 }, { "epoch": 0.36254758469641396, "grad_norm": 3.2693070727513844, "learning_rate": 7.36988863415377e-07, "loss": 0.4156, "step": 20857 }, { "epoch": 0.3625649672339168, "grad_norm": 1.9202564154493533, "learning_rate": 7.369640765119789e-07, "loss": 0.5002, "step": 20858 }, { "epoch": 0.3625823497714196, "grad_norm": 2.674527870674111, "learning_rate": 7.369392888575135e-07, "loss": 0.2815, "step": 20859 }, { "epoch": 0.36259973230892245, "grad_norm": 1.5725097095395373, "learning_rate": 7.369145004520596e-07, "loss": 0.3316, "step": 20860 }, { "epoch": 0.3626171148464253, "grad_norm": 1.4894221462219928, "learning_rate": 7.368897112956953e-07, "loss": 0.4094, "step": 20861 }, { "epoch": 0.3626344973839281, "grad_norm": 1.8957207892312198, "learning_rate": 7.368649213884996e-07, "loss": 0.4125, "step": 20862 }, { "epoch": 0.36265187992143094, "grad_norm": 1.4095733811693751, "learning_rate": 7.368401307305508e-07, "loss": 0.24, "step": 20863 }, { "epoch": 0.36266926245893377, "grad_norm": 1.564669383595854, "learning_rate": 7.368153393219277e-07, "loss": 0.2915, "step": 20864 }, { "epoch": 0.3626866449964366, "grad_norm": 1.4858939790391743, "learning_rate": 7.367905471627087e-07, "loss": 0.235, "step": 20865 }, { "epoch": 0.3627040275339394, "grad_norm": 2.06401160792482, "learning_rate": 7.367657542529726e-07, "loss": 0.2338, "step": 20866 }, { "epoch": 0.3627214100714422, "grad_norm": 1.7049465861539015, "learning_rate": 7.367409605927976e-07, "loss": 0.285, "step": 20867 }, { "epoch": 0.36273879260894504, "grad_norm": 2.7194574240366864, "learning_rate": 7.367161661822625e-07, "loss": 0.3216, "step": 20868 }, { "epoch": 0.36275617514644787, "grad_norm": 2.536391881331255, "learning_rate": 7.366913710214459e-07, "loss": 0.349, "step": 20869 }, { "epoch": 0.3627735576839507, "grad_norm": 1.6829555197525616, "learning_rate": 7.366665751104266e-07, "loss": 0.3292, "step": 20870 }, { "epoch": 0.36279094022145353, "grad_norm": 1.4723611591840569, "learning_rate": 7.366417784492827e-07, "loss": 0.3005, "step": 20871 }, { "epoch": 0.36280832275895636, "grad_norm": 0.9436046643363916, "learning_rate": 7.366169810380932e-07, "loss": 0.1726, "step": 20872 }, { "epoch": 0.3628257052964592, "grad_norm": 1.4803861857489082, "learning_rate": 7.365921828769366e-07, "loss": 0.1755, "step": 20873 }, { "epoch": 0.362843087833962, "grad_norm": 1.487083282455412, "learning_rate": 7.365673839658915e-07, "loss": 0.3902, "step": 20874 }, { "epoch": 0.36286047037146485, "grad_norm": 1.3887424167722442, "learning_rate": 7.365425843050362e-07, "loss": 0.3807, "step": 20875 }, { "epoch": 0.3628778529089676, "grad_norm": 1.4655469545136388, "learning_rate": 7.365177838944497e-07, "loss": 0.3692, "step": 20876 }, { "epoch": 0.36289523544647045, "grad_norm": 1.5736481934572752, "learning_rate": 7.364929827342106e-07, "loss": 0.3661, "step": 20877 }, { "epoch": 0.3629126179839733, "grad_norm": 1.9153661242570583, "learning_rate": 7.364681808243973e-07, "loss": 0.2828, "step": 20878 }, { "epoch": 0.3629300005214761, "grad_norm": 2.2883083811375706, "learning_rate": 7.364433781650885e-07, "loss": 0.5147, "step": 20879 }, { "epoch": 0.36294738305897895, "grad_norm": 1.6970115427279389, "learning_rate": 7.364185747563627e-07, "loss": 0.3941, "step": 20880 }, { "epoch": 0.3629647655964818, "grad_norm": 1.1374287123331364, "learning_rate": 7.363937705982988e-07, "loss": 0.2249, "step": 20881 }, { "epoch": 0.3629821481339846, "grad_norm": 3.7356509069758665, "learning_rate": 7.363689656909751e-07, "loss": 0.2433, "step": 20882 }, { "epoch": 0.36299953067148744, "grad_norm": 1.9217061212513176, "learning_rate": 7.363441600344704e-07, "loss": 0.4613, "step": 20883 }, { "epoch": 0.36301691320899027, "grad_norm": 1.6737323594500815, "learning_rate": 7.363193536288633e-07, "loss": 0.4163, "step": 20884 }, { "epoch": 0.3630342957464931, "grad_norm": 1.9687943530487997, "learning_rate": 7.362945464742324e-07, "loss": 0.2874, "step": 20885 }, { "epoch": 0.36305167828399587, "grad_norm": 1.7238433200249796, "learning_rate": 7.362697385706562e-07, "loss": 0.3497, "step": 20886 }, { "epoch": 0.3630690608214987, "grad_norm": 1.0591213428871276, "learning_rate": 7.362449299182136e-07, "loss": 0.3594, "step": 20887 }, { "epoch": 0.36308644335900153, "grad_norm": 3.272371246111885, "learning_rate": 7.362201205169831e-07, "loss": 0.5261, "step": 20888 }, { "epoch": 0.36310382589650436, "grad_norm": 1.4698526176061042, "learning_rate": 7.361953103670434e-07, "loss": 0.358, "step": 20889 }, { "epoch": 0.3631212084340072, "grad_norm": 1.3314891096215222, "learning_rate": 7.361704994684729e-07, "loss": 0.4624, "step": 20890 }, { "epoch": 0.36313859097151, "grad_norm": 1.7108016761945886, "learning_rate": 7.361456878213505e-07, "loss": 0.3342, "step": 20891 }, { "epoch": 0.36315597350901285, "grad_norm": 1.7845491748149043, "learning_rate": 7.361208754257547e-07, "loss": 0.6189, "step": 20892 }, { "epoch": 0.3631733560465157, "grad_norm": 1.4556200097321765, "learning_rate": 7.360960622817643e-07, "loss": 0.2299, "step": 20893 }, { "epoch": 0.3631907385840185, "grad_norm": 1.686039391758269, "learning_rate": 7.360712483894575e-07, "loss": 0.4074, "step": 20894 }, { "epoch": 0.36320812112152134, "grad_norm": 4.1319667302202765, "learning_rate": 7.360464337489137e-07, "loss": 0.3326, "step": 20895 }, { "epoch": 0.3632255036590241, "grad_norm": 2.3581370154268155, "learning_rate": 7.360216183602109e-07, "loss": 0.3707, "step": 20896 }, { "epoch": 0.36324288619652695, "grad_norm": 0.8940229117700265, "learning_rate": 7.359968022234282e-07, "loss": 0.2183, "step": 20897 }, { "epoch": 0.3632602687340298, "grad_norm": 1.3005938794909755, "learning_rate": 7.359719853386439e-07, "loss": 0.3186, "step": 20898 }, { "epoch": 0.3632776512715326, "grad_norm": 1.5833706656746327, "learning_rate": 7.359471677059368e-07, "loss": 0.3973, "step": 20899 }, { "epoch": 0.36329503380903544, "grad_norm": 2.544576305232489, "learning_rate": 7.359223493253855e-07, "loss": 0.9248, "step": 20900 }, { "epoch": 0.36331241634653827, "grad_norm": 2.549576627466687, "learning_rate": 7.358975301970688e-07, "loss": 0.4509, "step": 20901 }, { "epoch": 0.3633297988840411, "grad_norm": 1.9787490755274901, "learning_rate": 7.358727103210653e-07, "loss": 0.1937, "step": 20902 }, { "epoch": 0.36334718142154393, "grad_norm": 1.763196652525551, "learning_rate": 7.358478896974536e-07, "loss": 0.3567, "step": 20903 }, { "epoch": 0.36336456395904676, "grad_norm": 1.5264286806173109, "learning_rate": 7.358230683263126e-07, "loss": 0.3273, "step": 20904 }, { "epoch": 0.3633819464965496, "grad_norm": 1.2789380501419332, "learning_rate": 7.357982462077205e-07, "loss": 0.3374, "step": 20905 }, { "epoch": 0.36339932903405237, "grad_norm": 1.9821163651605824, "learning_rate": 7.357734233417565e-07, "loss": 0.374, "step": 20906 }, { "epoch": 0.3634167115715552, "grad_norm": 1.3545974339364282, "learning_rate": 7.357485997284992e-07, "loss": 0.312, "step": 20907 }, { "epoch": 0.363434094109058, "grad_norm": 1.3256017175395083, "learning_rate": 7.357237753680268e-07, "loss": 0.2165, "step": 20908 }, { "epoch": 0.36345147664656086, "grad_norm": 1.4266876925263132, "learning_rate": 7.356989502604184e-07, "loss": 0.2675, "step": 20909 }, { "epoch": 0.3634688591840637, "grad_norm": 1.8565233670305603, "learning_rate": 7.356741244057528e-07, "loss": 0.3823, "step": 20910 }, { "epoch": 0.3634862417215665, "grad_norm": 1.720381557139761, "learning_rate": 7.356492978041083e-07, "loss": 0.2102, "step": 20911 }, { "epoch": 0.36350362425906935, "grad_norm": 1.124481543191527, "learning_rate": 7.356244704555638e-07, "loss": 0.2405, "step": 20912 }, { "epoch": 0.3635210067965722, "grad_norm": 2.370789253214771, "learning_rate": 7.355996423601979e-07, "loss": 0.389, "step": 20913 }, { "epoch": 0.363538389334075, "grad_norm": 1.7114899297500694, "learning_rate": 7.355748135180895e-07, "loss": 0.4401, "step": 20914 }, { "epoch": 0.36355577187157784, "grad_norm": 1.2062253826363698, "learning_rate": 7.35549983929317e-07, "loss": 0.3398, "step": 20915 }, { "epoch": 0.3635731544090806, "grad_norm": 2.0310259268769424, "learning_rate": 7.355251535939594e-07, "loss": 0.2899, "step": 20916 }, { "epoch": 0.36359053694658344, "grad_norm": 1.488681720389705, "learning_rate": 7.355003225120953e-07, "loss": 0.2618, "step": 20917 }, { "epoch": 0.3636079194840863, "grad_norm": 1.969599938189534, "learning_rate": 7.354754906838033e-07, "loss": 0.3457, "step": 20918 }, { "epoch": 0.3636253020215891, "grad_norm": 1.860768459921512, "learning_rate": 7.354506581091619e-07, "loss": 0.4078, "step": 20919 }, { "epoch": 0.36364268455909193, "grad_norm": 1.1677449821619958, "learning_rate": 7.354258247882504e-07, "loss": 0.3421, "step": 20920 }, { "epoch": 0.36366006709659476, "grad_norm": 1.2518577149649948, "learning_rate": 7.35400990721147e-07, "loss": 0.3866, "step": 20921 }, { "epoch": 0.3636774496340976, "grad_norm": 2.4381861545262975, "learning_rate": 7.353761559079308e-07, "loss": 0.6094, "step": 20922 }, { "epoch": 0.3636948321716004, "grad_norm": 1.4447868887110398, "learning_rate": 7.353513203486801e-07, "loss": 0.4277, "step": 20923 }, { "epoch": 0.36371221470910325, "grad_norm": 1.7524329205518319, "learning_rate": 7.35326484043474e-07, "loss": 0.2921, "step": 20924 }, { "epoch": 0.3637295972466061, "grad_norm": 2.318377575906978, "learning_rate": 7.353016469923908e-07, "loss": 0.5058, "step": 20925 }, { "epoch": 0.36374697978410886, "grad_norm": 1.3565955360736206, "learning_rate": 7.352768091955097e-07, "loss": 0.1664, "step": 20926 }, { "epoch": 0.3637643623216117, "grad_norm": 2.182580856520277, "learning_rate": 7.352519706529091e-07, "loss": 0.3796, "step": 20927 }, { "epoch": 0.3637817448591145, "grad_norm": 4.671090359358912, "learning_rate": 7.352271313646678e-07, "loss": 0.4894, "step": 20928 }, { "epoch": 0.36379912739661735, "grad_norm": 1.9215535197458424, "learning_rate": 7.352022913308646e-07, "loss": 0.4112, "step": 20929 }, { "epoch": 0.3638165099341202, "grad_norm": 1.9705731254777588, "learning_rate": 7.351774505515781e-07, "loss": 0.3136, "step": 20930 }, { "epoch": 0.363833892471623, "grad_norm": 0.8136619332802592, "learning_rate": 7.351526090268873e-07, "loss": 0.2895, "step": 20931 }, { "epoch": 0.36385127500912584, "grad_norm": 2.6413044302659094, "learning_rate": 7.351277667568705e-07, "loss": 0.3109, "step": 20932 }, { "epoch": 0.36386865754662867, "grad_norm": 3.570478229421702, "learning_rate": 7.351029237416069e-07, "loss": 0.2556, "step": 20933 }, { "epoch": 0.3638860400841315, "grad_norm": 1.5810488667778075, "learning_rate": 7.350780799811749e-07, "loss": 0.1896, "step": 20934 }, { "epoch": 0.36390342262163433, "grad_norm": 5.346909401840704, "learning_rate": 7.350532354756533e-07, "loss": 0.5126, "step": 20935 }, { "epoch": 0.3639208051591371, "grad_norm": 1.9783279016790047, "learning_rate": 7.35028390225121e-07, "loss": 0.275, "step": 20936 }, { "epoch": 0.36393818769663994, "grad_norm": 2.9235056859163286, "learning_rate": 7.350035442296568e-07, "loss": 0.4886, "step": 20937 }, { "epoch": 0.36395557023414277, "grad_norm": 1.453099622311526, "learning_rate": 7.349786974893392e-07, "loss": 0.3068, "step": 20938 }, { "epoch": 0.3639729527716456, "grad_norm": 3.4037277713057787, "learning_rate": 7.349538500042472e-07, "loss": 0.5508, "step": 20939 }, { "epoch": 0.3639903353091484, "grad_norm": 1.4113056527065042, "learning_rate": 7.349290017744592e-07, "loss": 0.2614, "step": 20940 }, { "epoch": 0.36400771784665126, "grad_norm": 1.722491428497411, "learning_rate": 7.349041528000542e-07, "loss": 0.3438, "step": 20941 }, { "epoch": 0.3640251003841541, "grad_norm": 2.0092150359407763, "learning_rate": 7.348793030811112e-07, "loss": 0.2886, "step": 20942 }, { "epoch": 0.3640424829216569, "grad_norm": 2.5283933605806994, "learning_rate": 7.348544526177085e-07, "loss": 0.3025, "step": 20943 }, { "epoch": 0.36405986545915975, "grad_norm": 1.732206807247176, "learning_rate": 7.34829601409925e-07, "loss": 0.4753, "step": 20944 }, { "epoch": 0.3640772479966626, "grad_norm": 2.3110059978246826, "learning_rate": 7.348047494578397e-07, "loss": 0.4757, "step": 20945 }, { "epoch": 0.36409463053416535, "grad_norm": 1.3778913010909315, "learning_rate": 7.347798967615311e-07, "loss": 0.2153, "step": 20946 }, { "epoch": 0.3641120130716682, "grad_norm": 3.5079103624893264, "learning_rate": 7.347550433210782e-07, "loss": 0.5885, "step": 20947 }, { "epoch": 0.364129395609171, "grad_norm": 1.3282929899388216, "learning_rate": 7.347301891365596e-07, "loss": 0.2415, "step": 20948 }, { "epoch": 0.36414677814667384, "grad_norm": 2.166202666290947, "learning_rate": 7.347053342080541e-07, "loss": 0.3285, "step": 20949 }, { "epoch": 0.3641641606841767, "grad_norm": 2.580518456841735, "learning_rate": 7.346804785356404e-07, "loss": 0.4067, "step": 20950 }, { "epoch": 0.3641815432216795, "grad_norm": 1.696991283147273, "learning_rate": 7.346556221193975e-07, "loss": 0.33, "step": 20951 }, { "epoch": 0.36419892575918233, "grad_norm": 2.821135741915717, "learning_rate": 7.34630764959404e-07, "loss": 0.3194, "step": 20952 }, { "epoch": 0.36421630829668517, "grad_norm": 2.8892000436598693, "learning_rate": 7.346059070557389e-07, "loss": 0.3887, "step": 20953 }, { "epoch": 0.364233690834188, "grad_norm": 0.8392730886461546, "learning_rate": 7.345810484084807e-07, "loss": 0.3291, "step": 20954 }, { "epoch": 0.3642510733716908, "grad_norm": 1.286415284987151, "learning_rate": 7.345561890177084e-07, "loss": 0.2753, "step": 20955 }, { "epoch": 0.3642684559091936, "grad_norm": 1.9300945405453536, "learning_rate": 7.345313288835006e-07, "loss": 0.2814, "step": 20956 }, { "epoch": 0.36428583844669643, "grad_norm": 1.8990295529775636, "learning_rate": 7.345064680059365e-07, "loss": 0.5017, "step": 20957 }, { "epoch": 0.36430322098419926, "grad_norm": 1.6856288803394333, "learning_rate": 7.344816063850945e-07, "loss": 0.3317, "step": 20958 }, { "epoch": 0.3643206035217021, "grad_norm": 1.0748792685831678, "learning_rate": 7.344567440210534e-07, "loss": 0.1796, "step": 20959 }, { "epoch": 0.3643379860592049, "grad_norm": 1.6011277413012717, "learning_rate": 7.344318809138922e-07, "loss": 0.3389, "step": 20960 }, { "epoch": 0.36435536859670775, "grad_norm": 1.0811453215354045, "learning_rate": 7.344070170636895e-07, "loss": 0.2124, "step": 20961 }, { "epoch": 0.3643727511342106, "grad_norm": 1.0750790007724584, "learning_rate": 7.343821524705244e-07, "loss": 0.3293, "step": 20962 }, { "epoch": 0.3643901336717134, "grad_norm": 1.2105607256292195, "learning_rate": 7.343572871344757e-07, "loss": 0.3724, "step": 20963 }, { "epoch": 0.36440751620921624, "grad_norm": 2.204207859062239, "learning_rate": 7.343324210556218e-07, "loss": 0.3782, "step": 20964 }, { "epoch": 0.3644248987467191, "grad_norm": 1.3100640250103894, "learning_rate": 7.343075542340418e-07, "loss": 0.2413, "step": 20965 }, { "epoch": 0.36444228128422185, "grad_norm": 1.1995825542691798, "learning_rate": 7.342826866698144e-07, "loss": 0.4161, "step": 20966 }, { "epoch": 0.3644596638217247, "grad_norm": 1.3874559547676668, "learning_rate": 7.342578183630187e-07, "loss": 0.2499, "step": 20967 }, { "epoch": 0.3644770463592275, "grad_norm": 2.3856126054463602, "learning_rate": 7.342329493137333e-07, "loss": 0.3556, "step": 20968 }, { "epoch": 0.36449442889673034, "grad_norm": 2.8614179976907836, "learning_rate": 7.342080795220369e-07, "loss": 0.452, "step": 20969 }, { "epoch": 0.36451181143423317, "grad_norm": 1.8774007590882742, "learning_rate": 7.341832089880086e-07, "loss": 0.2687, "step": 20970 }, { "epoch": 0.364529193971736, "grad_norm": 2.1576388858170894, "learning_rate": 7.341583377117271e-07, "loss": 0.3454, "step": 20971 }, { "epoch": 0.36454657650923883, "grad_norm": 3.2724093657168765, "learning_rate": 7.341334656932712e-07, "loss": 0.7876, "step": 20972 }, { "epoch": 0.36456395904674166, "grad_norm": 1.6800351827126894, "learning_rate": 7.341085929327197e-07, "loss": 0.4019, "step": 20973 }, { "epoch": 0.3645813415842445, "grad_norm": 2.0732666235362105, "learning_rate": 7.340837194301517e-07, "loss": 0.3678, "step": 20974 }, { "epoch": 0.3645987241217473, "grad_norm": 3.5079888561347494, "learning_rate": 7.340588451856456e-07, "loss": 0.5653, "step": 20975 }, { "epoch": 0.3646161066592501, "grad_norm": 1.5956046366727206, "learning_rate": 7.340339701992806e-07, "loss": 0.2547, "step": 20976 }, { "epoch": 0.3646334891967529, "grad_norm": 2.1172082474957317, "learning_rate": 7.340090944711353e-07, "loss": 0.3602, "step": 20977 }, { "epoch": 0.36465087173425575, "grad_norm": 2.0043998344169425, "learning_rate": 7.339842180012889e-07, "loss": 0.4632, "step": 20978 }, { "epoch": 0.3646682542717586, "grad_norm": 1.8248163032413967, "learning_rate": 7.339593407898198e-07, "loss": 0.3463, "step": 20979 }, { "epoch": 0.3646856368092614, "grad_norm": 2.669376587036776, "learning_rate": 7.339344628368073e-07, "loss": 0.3004, "step": 20980 }, { "epoch": 0.36470301934676425, "grad_norm": 1.6115421338812652, "learning_rate": 7.339095841423297e-07, "loss": 0.245, "step": 20981 }, { "epoch": 0.3647204018842671, "grad_norm": 1.3086393555424358, "learning_rate": 7.338847047064663e-07, "loss": 0.3037, "step": 20982 }, { "epoch": 0.3647377844217699, "grad_norm": 2.6012510945936724, "learning_rate": 7.33859824529296e-07, "loss": 0.3241, "step": 20983 }, { "epoch": 0.36475516695927274, "grad_norm": 1.2306186874067333, "learning_rate": 7.338349436108973e-07, "loss": 0.4762, "step": 20984 }, { "epoch": 0.36477254949677557, "grad_norm": 1.7643029875792609, "learning_rate": 7.338100619513492e-07, "loss": 0.4178, "step": 20985 }, { "epoch": 0.36478993203427834, "grad_norm": 2.81762124697349, "learning_rate": 7.337851795507308e-07, "loss": 0.3339, "step": 20986 }, { "epoch": 0.36480731457178117, "grad_norm": 1.6202986134875832, "learning_rate": 7.337602964091206e-07, "loss": 0.2071, "step": 20987 }, { "epoch": 0.364824697109284, "grad_norm": 1.5661011161500413, "learning_rate": 7.337354125265976e-07, "loss": 0.2384, "step": 20988 }, { "epoch": 0.36484207964678683, "grad_norm": 1.2354324832050856, "learning_rate": 7.337105279032409e-07, "loss": 0.1999, "step": 20989 }, { "epoch": 0.36485946218428966, "grad_norm": 1.9204365924270783, "learning_rate": 7.33685642539129e-07, "loss": 0.428, "step": 20990 }, { "epoch": 0.3648768447217925, "grad_norm": 1.5748579540345107, "learning_rate": 7.336607564343411e-07, "loss": 0.2919, "step": 20991 }, { "epoch": 0.3648942272592953, "grad_norm": 1.5748715249322796, "learning_rate": 7.336358695889559e-07, "loss": 0.3428, "step": 20992 }, { "epoch": 0.36491160979679815, "grad_norm": 1.9354082049128516, "learning_rate": 7.336109820030523e-07, "loss": 0.3251, "step": 20993 }, { "epoch": 0.364928992334301, "grad_norm": 2.5709632403501628, "learning_rate": 7.335860936767091e-07, "loss": 0.2679, "step": 20994 }, { "epoch": 0.3649463748718038, "grad_norm": 1.1909886524636293, "learning_rate": 7.335612046100054e-07, "loss": 0.3875, "step": 20995 }, { "epoch": 0.3649637574093066, "grad_norm": 1.520581200167911, "learning_rate": 7.3353631480302e-07, "loss": 0.3018, "step": 20996 }, { "epoch": 0.3649811399468094, "grad_norm": 1.7021614845072124, "learning_rate": 7.335114242558318e-07, "loss": 0.2517, "step": 20997 }, { "epoch": 0.36499852248431225, "grad_norm": 1.3495192696532472, "learning_rate": 7.334865329685195e-07, "loss": 0.1792, "step": 20998 }, { "epoch": 0.3650159050218151, "grad_norm": 2.1362707553783493, "learning_rate": 7.334616409411623e-07, "loss": 0.3663, "step": 20999 }, { "epoch": 0.3650332875593179, "grad_norm": 1.1445687245747371, "learning_rate": 7.334367481738388e-07, "loss": 0.212, "step": 21000 }, { "epoch": 0.36505067009682074, "grad_norm": 1.7466923241892771, "learning_rate": 7.334118546666281e-07, "loss": 0.3653, "step": 21001 }, { "epoch": 0.36506805263432357, "grad_norm": 1.8815030776368227, "learning_rate": 7.33386960419609e-07, "loss": 0.3766, "step": 21002 }, { "epoch": 0.3650854351718264, "grad_norm": 1.8277169382065213, "learning_rate": 7.333620654328606e-07, "loss": 0.2857, "step": 21003 }, { "epoch": 0.36510281770932923, "grad_norm": 1.694574966567419, "learning_rate": 7.333371697064615e-07, "loss": 0.472, "step": 21004 }, { "epoch": 0.365120200246832, "grad_norm": 3.3161042775411094, "learning_rate": 7.333122732404908e-07, "loss": 0.3146, "step": 21005 }, { "epoch": 0.36513758278433484, "grad_norm": 1.745786512994212, "learning_rate": 7.332873760350273e-07, "loss": 0.3932, "step": 21006 }, { "epoch": 0.36515496532183767, "grad_norm": 1.0960579316241414, "learning_rate": 7.332624780901501e-07, "loss": 0.2559, "step": 21007 }, { "epoch": 0.3651723478593405, "grad_norm": 1.8432460717303745, "learning_rate": 7.33237579405938e-07, "loss": 0.4062, "step": 21008 }, { "epoch": 0.3651897303968433, "grad_norm": 2.294842692286516, "learning_rate": 7.332126799824699e-07, "loss": 0.364, "step": 21009 }, { "epoch": 0.36520711293434616, "grad_norm": 2.30241060147846, "learning_rate": 7.331877798198245e-07, "loss": 0.4141, "step": 21010 }, { "epoch": 0.365224495471849, "grad_norm": 1.6407856988288343, "learning_rate": 7.331628789180813e-07, "loss": 0.2208, "step": 21011 }, { "epoch": 0.3652418780093518, "grad_norm": 2.0124223829898775, "learning_rate": 7.331379772773186e-07, "loss": 0.2459, "step": 21012 }, { "epoch": 0.36525926054685465, "grad_norm": 1.3402250124891397, "learning_rate": 7.331130748976158e-07, "loss": 0.1971, "step": 21013 }, { "epoch": 0.3652766430843575, "grad_norm": 1.0638646144439803, "learning_rate": 7.330881717790515e-07, "loss": 0.2728, "step": 21014 }, { "epoch": 0.36529402562186025, "grad_norm": 2.1742122044401517, "learning_rate": 7.330632679217049e-07, "loss": 0.3748, "step": 21015 }, { "epoch": 0.3653114081593631, "grad_norm": 2.050656296075338, "learning_rate": 7.330383633256547e-07, "loss": 0.2536, "step": 21016 }, { "epoch": 0.3653287906968659, "grad_norm": 1.3626307126034354, "learning_rate": 7.330134579909799e-07, "loss": 0.6162, "step": 21017 }, { "epoch": 0.36534617323436874, "grad_norm": 1.3583235673418708, "learning_rate": 7.329885519177597e-07, "loss": 0.4282, "step": 21018 }, { "epoch": 0.3653635557718716, "grad_norm": 2.697961513265112, "learning_rate": 7.329636451060725e-07, "loss": 0.4464, "step": 21019 }, { "epoch": 0.3653809383093744, "grad_norm": 2.435768044784512, "learning_rate": 7.329387375559977e-07, "loss": 0.3196, "step": 21020 }, { "epoch": 0.36539832084687723, "grad_norm": 1.443489634570751, "learning_rate": 7.329138292676142e-07, "loss": 0.2619, "step": 21021 }, { "epoch": 0.36541570338438006, "grad_norm": 2.7985447803510577, "learning_rate": 7.328889202410008e-07, "loss": 0.217, "step": 21022 }, { "epoch": 0.3654330859218829, "grad_norm": 2.1995610464264863, "learning_rate": 7.328640104762364e-07, "loss": 0.2317, "step": 21023 }, { "epoch": 0.3654504684593857, "grad_norm": 1.9310148672152114, "learning_rate": 7.328390999734004e-07, "loss": 0.2942, "step": 21024 }, { "epoch": 0.3654678509968885, "grad_norm": 2.524191578439492, "learning_rate": 7.32814188732571e-07, "loss": 0.5384, "step": 21025 }, { "epoch": 0.36548523353439133, "grad_norm": 2.0931765535193496, "learning_rate": 7.327892767538277e-07, "loss": 0.4739, "step": 21026 }, { "epoch": 0.36550261607189416, "grad_norm": 1.7930510052103965, "learning_rate": 7.327643640372494e-07, "loss": 0.3218, "step": 21027 }, { "epoch": 0.365519998609397, "grad_norm": 1.9769612350787085, "learning_rate": 7.32739450582915e-07, "loss": 0.3132, "step": 21028 }, { "epoch": 0.3655373811468998, "grad_norm": 2.9895558034926113, "learning_rate": 7.327145363909032e-07, "loss": 0.3206, "step": 21029 }, { "epoch": 0.36555476368440265, "grad_norm": 1.925589238161227, "learning_rate": 7.326896214612935e-07, "loss": 0.4492, "step": 21030 }, { "epoch": 0.3655721462219055, "grad_norm": 2.1356499324026896, "learning_rate": 7.326647057941645e-07, "loss": 0.4507, "step": 21031 }, { "epoch": 0.3655895287594083, "grad_norm": 1.6999673783601004, "learning_rate": 7.326397893895952e-07, "loss": 0.5695, "step": 21032 }, { "epoch": 0.36560691129691114, "grad_norm": 2.0011227691795774, "learning_rate": 7.326148722476646e-07, "loss": 0.4544, "step": 21033 }, { "epoch": 0.36562429383441397, "grad_norm": 1.699108304937792, "learning_rate": 7.32589954368452e-07, "loss": 0.236, "step": 21034 }, { "epoch": 0.36564167637191675, "grad_norm": 1.1204194330601875, "learning_rate": 7.325650357520358e-07, "loss": 0.2486, "step": 21035 }, { "epoch": 0.3656590589094196, "grad_norm": 1.779326063776424, "learning_rate": 7.325401163984955e-07, "loss": 0.1815, "step": 21036 }, { "epoch": 0.3656764414469224, "grad_norm": 1.785365737725476, "learning_rate": 7.325151963079095e-07, "loss": 0.3303, "step": 21037 }, { "epoch": 0.36569382398442524, "grad_norm": 2.19556549085081, "learning_rate": 7.324902754803574e-07, "loss": 0.2721, "step": 21038 }, { "epoch": 0.36571120652192807, "grad_norm": 1.7801939112728504, "learning_rate": 7.324653539159179e-07, "loss": 0.3255, "step": 21039 }, { "epoch": 0.3657285890594309, "grad_norm": 1.485999743264577, "learning_rate": 7.324404316146701e-07, "loss": 0.2121, "step": 21040 }, { "epoch": 0.3657459715969337, "grad_norm": 1.6224016900524603, "learning_rate": 7.324155085766926e-07, "loss": 0.2189, "step": 21041 }, { "epoch": 0.36576335413443656, "grad_norm": 2.8724505084803784, "learning_rate": 7.323905848020649e-07, "loss": 0.3182, "step": 21042 }, { "epoch": 0.3657807366719394, "grad_norm": 1.435941873419387, "learning_rate": 7.323656602908658e-07, "loss": 0.3642, "step": 21043 }, { "epoch": 0.3657981192094422, "grad_norm": 1.0495766404228415, "learning_rate": 7.323407350431744e-07, "loss": 0.2911, "step": 21044 }, { "epoch": 0.365815501746945, "grad_norm": 1.295877515392994, "learning_rate": 7.323158090590695e-07, "loss": 0.3703, "step": 21045 }, { "epoch": 0.3658328842844478, "grad_norm": 1.8914791428799291, "learning_rate": 7.322908823386301e-07, "loss": 0.5691, "step": 21046 }, { "epoch": 0.36585026682195065, "grad_norm": 1.4334070269655173, "learning_rate": 7.322659548819354e-07, "loss": 0.245, "step": 21047 }, { "epoch": 0.3658676493594535, "grad_norm": 1.6575630423705332, "learning_rate": 7.322410266890644e-07, "loss": 0.2183, "step": 21048 }, { "epoch": 0.3658850318969563, "grad_norm": 1.8283526180209795, "learning_rate": 7.322160977600959e-07, "loss": 0.2868, "step": 21049 }, { "epoch": 0.36590241443445914, "grad_norm": 3.1458115822480193, "learning_rate": 7.321911680951091e-07, "loss": 0.4563, "step": 21050 }, { "epoch": 0.365919796971962, "grad_norm": 1.2431423272590245, "learning_rate": 7.321662376941829e-07, "loss": 0.2636, "step": 21051 }, { "epoch": 0.3659371795094648, "grad_norm": 1.2059637249037274, "learning_rate": 7.321413065573965e-07, "loss": 0.3013, "step": 21052 }, { "epoch": 0.36595456204696764, "grad_norm": 1.8431893890296365, "learning_rate": 7.321163746848286e-07, "loss": 0.4186, "step": 21053 }, { "epoch": 0.36597194458447047, "grad_norm": 1.5956676608752447, "learning_rate": 7.320914420765585e-07, "loss": 0.3178, "step": 21054 }, { "epoch": 0.36598932712197324, "grad_norm": 1.3593005369333082, "learning_rate": 7.320665087326652e-07, "loss": 0.3751, "step": 21055 }, { "epoch": 0.36600670965947607, "grad_norm": 1.713218256403362, "learning_rate": 7.320415746532275e-07, "loss": 0.5275, "step": 21056 }, { "epoch": 0.3660240921969789, "grad_norm": 1.8245585140759644, "learning_rate": 7.32016639838325e-07, "loss": 0.3505, "step": 21057 }, { "epoch": 0.36604147473448173, "grad_norm": 1.573366759874251, "learning_rate": 7.319917042880359e-07, "loss": 0.6783, "step": 21058 }, { "epoch": 0.36605885727198456, "grad_norm": 2.361968094047355, "learning_rate": 7.3196676800244e-07, "loss": 0.4056, "step": 21059 }, { "epoch": 0.3660762398094874, "grad_norm": 1.8428459279364005, "learning_rate": 7.319418309816156e-07, "loss": 0.2456, "step": 21060 }, { "epoch": 0.3660936223469902, "grad_norm": 1.5125531267791532, "learning_rate": 7.319168932256425e-07, "loss": 0.3104, "step": 21061 }, { "epoch": 0.36611100488449305, "grad_norm": 1.9869504460922474, "learning_rate": 7.318919547345993e-07, "loss": 0.2836, "step": 21062 }, { "epoch": 0.3661283874219959, "grad_norm": 2.1355485060165207, "learning_rate": 7.318670155085651e-07, "loss": 0.2256, "step": 21063 }, { "epoch": 0.3661457699594987, "grad_norm": 1.7197681376668204, "learning_rate": 7.318420755476188e-07, "loss": 0.3386, "step": 21064 }, { "epoch": 0.3661631524970015, "grad_norm": 3.4730817216209986, "learning_rate": 7.318171348518399e-07, "loss": 0.7496, "step": 21065 }, { "epoch": 0.3661805350345043, "grad_norm": 2.410969514091057, "learning_rate": 7.317921934213071e-07, "loss": 0.5515, "step": 21066 }, { "epoch": 0.36619791757200715, "grad_norm": 1.5957228013648612, "learning_rate": 7.317672512560994e-07, "loss": 0.1778, "step": 21067 }, { "epoch": 0.36621530010951, "grad_norm": 3.5890917742602206, "learning_rate": 7.317423083562961e-07, "loss": 0.6194, "step": 21068 }, { "epoch": 0.3662326826470128, "grad_norm": 1.4227253246217408, "learning_rate": 7.317173647219761e-07, "loss": 0.3172, "step": 21069 }, { "epoch": 0.36625006518451564, "grad_norm": 1.7867894023392137, "learning_rate": 7.316924203532184e-07, "loss": 0.4203, "step": 21070 }, { "epoch": 0.36626744772201847, "grad_norm": 1.1762710887355352, "learning_rate": 7.316674752501025e-07, "loss": 0.3483, "step": 21071 }, { "epoch": 0.3662848302595213, "grad_norm": 1.6943402152116913, "learning_rate": 7.316425294127067e-07, "loss": 0.493, "step": 21072 }, { "epoch": 0.36630221279702413, "grad_norm": 1.727421828492045, "learning_rate": 7.316175828411107e-07, "loss": 0.2015, "step": 21073 }, { "epoch": 0.36631959533452696, "grad_norm": 1.2945409112903354, "learning_rate": 7.315926355353932e-07, "loss": 0.2497, "step": 21074 }, { "epoch": 0.36633697787202973, "grad_norm": 1.2718750667964396, "learning_rate": 7.315676874956336e-07, "loss": 0.1613, "step": 21075 }, { "epoch": 0.36635436040953256, "grad_norm": 2.467512260628203, "learning_rate": 7.315427387219108e-07, "loss": 0.2579, "step": 21076 }, { "epoch": 0.3663717429470354, "grad_norm": 2.082981554080944, "learning_rate": 7.315177892143037e-07, "loss": 0.2518, "step": 21077 }, { "epoch": 0.3663891254845382, "grad_norm": 1.7236088116197457, "learning_rate": 7.314928389728918e-07, "loss": 0.5189, "step": 21078 }, { "epoch": 0.36640650802204106, "grad_norm": 1.5061251436648997, "learning_rate": 7.314678879977538e-07, "loss": 0.3988, "step": 21079 }, { "epoch": 0.3664238905595439, "grad_norm": 1.466478553710202, "learning_rate": 7.314429362889689e-07, "loss": 0.2826, "step": 21080 }, { "epoch": 0.3664412730970467, "grad_norm": 1.880171341363904, "learning_rate": 7.314179838466162e-07, "loss": 0.2763, "step": 21081 }, { "epoch": 0.36645865563454955, "grad_norm": 1.457849881344676, "learning_rate": 7.313930306707749e-07, "loss": 0.3216, "step": 21082 }, { "epoch": 0.3664760381720524, "grad_norm": 1.8699414600810167, "learning_rate": 7.313680767615239e-07, "loss": 0.2694, "step": 21083 }, { "epoch": 0.3664934207095552, "grad_norm": 2.3020676086696286, "learning_rate": 7.313431221189425e-07, "loss": 0.2958, "step": 21084 }, { "epoch": 0.366510803247058, "grad_norm": 5.187387533458205, "learning_rate": 7.313181667431096e-07, "loss": 0.4033, "step": 21085 }, { "epoch": 0.3665281857845608, "grad_norm": 1.8305060684370247, "learning_rate": 7.312932106341044e-07, "loss": 0.2695, "step": 21086 }, { "epoch": 0.36654556832206364, "grad_norm": 2.1556278498632224, "learning_rate": 7.312682537920059e-07, "loss": 0.3333, "step": 21087 }, { "epoch": 0.36656295085956647, "grad_norm": 1.568430944220351, "learning_rate": 7.312432962168933e-07, "loss": 0.2902, "step": 21088 }, { "epoch": 0.3665803333970693, "grad_norm": 1.810273355982853, "learning_rate": 7.312183379088456e-07, "loss": 0.3212, "step": 21089 }, { "epoch": 0.36659771593457213, "grad_norm": 1.2834584077143145, "learning_rate": 7.311933788679422e-07, "loss": 0.1744, "step": 21090 }, { "epoch": 0.36661509847207496, "grad_norm": 1.5429971354230618, "learning_rate": 7.311684190942617e-07, "loss": 0.2758, "step": 21091 }, { "epoch": 0.3666324810095778, "grad_norm": 1.8256746725979582, "learning_rate": 7.311434585878837e-07, "loss": 0.4352, "step": 21092 }, { "epoch": 0.3666498635470806, "grad_norm": 2.3274971248501872, "learning_rate": 7.31118497348887e-07, "loss": 0.3619, "step": 21093 }, { "epoch": 0.36666724608458345, "grad_norm": 1.9872611048338715, "learning_rate": 7.310935353773511e-07, "loss": 0.2812, "step": 21094 }, { "epoch": 0.36668462862208623, "grad_norm": 1.5774666162085986, "learning_rate": 7.310685726733546e-07, "loss": 0.2783, "step": 21095 }, { "epoch": 0.36670201115958906, "grad_norm": 2.5533734256914653, "learning_rate": 7.310436092369768e-07, "loss": 0.254, "step": 21096 }, { "epoch": 0.3667193936970919, "grad_norm": 1.4500868961122046, "learning_rate": 7.310186450682971e-07, "loss": 0.2778, "step": 21097 }, { "epoch": 0.3667367762345947, "grad_norm": 2.0184844092892114, "learning_rate": 7.309936801673943e-07, "loss": 0.2476, "step": 21098 }, { "epoch": 0.36675415877209755, "grad_norm": 1.0374250039093542, "learning_rate": 7.309687145343476e-07, "loss": 0.2182, "step": 21099 }, { "epoch": 0.3667715413096004, "grad_norm": 1.4946125813461266, "learning_rate": 7.309437481692363e-07, "loss": 0.2325, "step": 21100 }, { "epoch": 0.3667889238471032, "grad_norm": 1.2880963716700573, "learning_rate": 7.309187810721394e-07, "loss": 0.2433, "step": 21101 }, { "epoch": 0.36680630638460604, "grad_norm": 2.904670494951325, "learning_rate": 7.308938132431359e-07, "loss": 0.2975, "step": 21102 }, { "epoch": 0.36682368892210887, "grad_norm": 1.4592902312447875, "learning_rate": 7.308688446823053e-07, "loss": 0.2583, "step": 21103 }, { "epoch": 0.3668410714596117, "grad_norm": 2.135911493636813, "learning_rate": 7.308438753897264e-07, "loss": 0.5283, "step": 21104 }, { "epoch": 0.3668584539971145, "grad_norm": 1.7677299646417717, "learning_rate": 7.308189053654783e-07, "loss": 0.3965, "step": 21105 }, { "epoch": 0.3668758365346173, "grad_norm": 1.855642073417788, "learning_rate": 7.307939346096403e-07, "loss": 0.269, "step": 21106 }, { "epoch": 0.36689321907212014, "grad_norm": 1.8458366502389905, "learning_rate": 7.307689631222918e-07, "loss": 0.2145, "step": 21107 }, { "epoch": 0.36691060160962297, "grad_norm": 2.2317530375043244, "learning_rate": 7.307439909035116e-07, "loss": 0.3837, "step": 21108 }, { "epoch": 0.3669279841471258, "grad_norm": 1.2851266500319618, "learning_rate": 7.307190179533789e-07, "loss": 0.2556, "step": 21109 }, { "epoch": 0.3669453666846286, "grad_norm": 2.4760489151608387, "learning_rate": 7.306940442719729e-07, "loss": 0.3894, "step": 21110 }, { "epoch": 0.36696274922213146, "grad_norm": 2.6701857124432067, "learning_rate": 7.306690698593727e-07, "loss": 0.2767, "step": 21111 }, { "epoch": 0.3669801317596343, "grad_norm": 1.9061324246126603, "learning_rate": 7.306440947156576e-07, "loss": 0.3452, "step": 21112 }, { "epoch": 0.3669975142971371, "grad_norm": 1.2835475601782937, "learning_rate": 7.306191188409066e-07, "loss": 0.2722, "step": 21113 }, { "epoch": 0.36701489683463995, "grad_norm": 1.0913239832723969, "learning_rate": 7.30594142235199e-07, "loss": 0.3317, "step": 21114 }, { "epoch": 0.3670322793721427, "grad_norm": 1.690370154473228, "learning_rate": 7.305691648986139e-07, "loss": 0.5681, "step": 21115 }, { "epoch": 0.36704966190964555, "grad_norm": 1.7246797445589124, "learning_rate": 7.305441868312304e-07, "loss": 0.2971, "step": 21116 }, { "epoch": 0.3670670444471484, "grad_norm": 1.5373136609165787, "learning_rate": 7.305192080331277e-07, "loss": 0.3761, "step": 21117 }, { "epoch": 0.3670844269846512, "grad_norm": 1.4482385948790182, "learning_rate": 7.304942285043851e-07, "loss": 0.531, "step": 21118 }, { "epoch": 0.36710180952215404, "grad_norm": 1.6374577363435692, "learning_rate": 7.304692482450818e-07, "loss": 0.3388, "step": 21119 }, { "epoch": 0.3671191920596569, "grad_norm": 0.8932654875363338, "learning_rate": 7.304442672552965e-07, "loss": 0.1625, "step": 21120 }, { "epoch": 0.3671365745971597, "grad_norm": 1.6407226318736816, "learning_rate": 7.30419285535109e-07, "loss": 0.3662, "step": 21121 }, { "epoch": 0.36715395713466253, "grad_norm": 2.0491728386488126, "learning_rate": 7.303943030845982e-07, "loss": 0.4209, "step": 21122 }, { "epoch": 0.36717133967216536, "grad_norm": 2.07503891656375, "learning_rate": 7.303693199038432e-07, "loss": 0.3871, "step": 21123 }, { "epoch": 0.3671887222096682, "grad_norm": 1.5403160629774024, "learning_rate": 7.303443359929233e-07, "loss": 0.2333, "step": 21124 }, { "epoch": 0.36720610474717097, "grad_norm": 1.3747865134581192, "learning_rate": 7.303193513519176e-07, "loss": 0.3369, "step": 21125 }, { "epoch": 0.3672234872846738, "grad_norm": 2.230294414738982, "learning_rate": 7.302943659809055e-07, "loss": 0.2188, "step": 21126 }, { "epoch": 0.36724086982217663, "grad_norm": 1.6899656640402965, "learning_rate": 7.302693798799659e-07, "loss": 0.2741, "step": 21127 }, { "epoch": 0.36725825235967946, "grad_norm": 1.9199087776052504, "learning_rate": 7.302443930491783e-07, "loss": 0.3122, "step": 21128 }, { "epoch": 0.3672756348971823, "grad_norm": 2.383695985220191, "learning_rate": 7.302194054886218e-07, "loss": 0.3156, "step": 21129 }, { "epoch": 0.3672930174346851, "grad_norm": 1.4942641540432715, "learning_rate": 7.301944171983752e-07, "loss": 0.5338, "step": 21130 }, { "epoch": 0.36731039997218795, "grad_norm": 1.6955203776719816, "learning_rate": 7.301694281785182e-07, "loss": 0.33, "step": 21131 }, { "epoch": 0.3673277825096908, "grad_norm": 1.8185811033538946, "learning_rate": 7.3014443842913e-07, "loss": 0.3796, "step": 21132 }, { "epoch": 0.3673451650471936, "grad_norm": 1.9101652810252752, "learning_rate": 7.301194479502895e-07, "loss": 0.3039, "step": 21133 }, { "epoch": 0.36736254758469644, "grad_norm": 2.694088179726988, "learning_rate": 7.300944567420759e-07, "loss": 0.471, "step": 21134 }, { "epoch": 0.3673799301221992, "grad_norm": 2.7538154398429415, "learning_rate": 7.300694648045688e-07, "loss": 0.2376, "step": 21135 }, { "epoch": 0.36739731265970205, "grad_norm": 2.3991735124429727, "learning_rate": 7.300444721378473e-07, "loss": 0.3075, "step": 21136 }, { "epoch": 0.3674146951972049, "grad_norm": 1.921947684430092, "learning_rate": 7.300194787419903e-07, "loss": 0.2848, "step": 21137 }, { "epoch": 0.3674320777347077, "grad_norm": 1.064019556981159, "learning_rate": 7.299944846170773e-07, "loss": 0.1714, "step": 21138 }, { "epoch": 0.36744946027221054, "grad_norm": 1.5909293054753353, "learning_rate": 7.299694897631872e-07, "loss": 0.3332, "step": 21139 }, { "epoch": 0.36746684280971337, "grad_norm": 1.0631093423531344, "learning_rate": 7.299444941803997e-07, "loss": 0.3966, "step": 21140 }, { "epoch": 0.3674842253472162, "grad_norm": 1.1734664629179186, "learning_rate": 7.299194978687937e-07, "loss": 0.2287, "step": 21141 }, { "epoch": 0.36750160788471903, "grad_norm": 1.8247986214559673, "learning_rate": 7.298945008284486e-07, "loss": 0.3466, "step": 21142 }, { "epoch": 0.36751899042222186, "grad_norm": 1.116425428703728, "learning_rate": 7.298695030594434e-07, "loss": 0.2511, "step": 21143 }, { "epoch": 0.36753637295972463, "grad_norm": 1.403381195517229, "learning_rate": 7.298445045618577e-07, "loss": 0.2011, "step": 21144 }, { "epoch": 0.36755375549722746, "grad_norm": 1.901120130315319, "learning_rate": 7.298195053357702e-07, "loss": 0.2116, "step": 21145 }, { "epoch": 0.3675711380347303, "grad_norm": 3.8384002220348736, "learning_rate": 7.297945053812606e-07, "loss": 0.2474, "step": 21146 }, { "epoch": 0.3675885205722331, "grad_norm": 1.137556290111869, "learning_rate": 7.29769504698408e-07, "loss": 0.3842, "step": 21147 }, { "epoch": 0.36760590310973595, "grad_norm": 1.2171253024288204, "learning_rate": 7.297445032872915e-07, "loss": 0.2335, "step": 21148 }, { "epoch": 0.3676232856472388, "grad_norm": 3.3135005552751458, "learning_rate": 7.297195011479905e-07, "loss": 0.2793, "step": 21149 }, { "epoch": 0.3676406681847416, "grad_norm": 1.148219358821535, "learning_rate": 7.296944982805844e-07, "loss": 0.2213, "step": 21150 }, { "epoch": 0.36765805072224444, "grad_norm": 1.8894748677256146, "learning_rate": 7.29669494685152e-07, "loss": 0.2347, "step": 21151 }, { "epoch": 0.3676754332597473, "grad_norm": 0.8814706773930157, "learning_rate": 7.296444903617729e-07, "loss": 0.5126, "step": 21152 }, { "epoch": 0.3676928157972501, "grad_norm": 1.2564596537705648, "learning_rate": 7.296194853105262e-07, "loss": 0.1394, "step": 21153 }, { "epoch": 0.3677101983347529, "grad_norm": 1.0816333115937522, "learning_rate": 7.295944795314914e-07, "loss": 0.2679, "step": 21154 }, { "epoch": 0.3677275808722557, "grad_norm": 1.9156370093624295, "learning_rate": 7.295694730247473e-07, "loss": 0.2653, "step": 21155 }, { "epoch": 0.36774496340975854, "grad_norm": 2.9062825780142174, "learning_rate": 7.295444657903736e-07, "loss": 0.437, "step": 21156 }, { "epoch": 0.36776234594726137, "grad_norm": 1.3019786884127358, "learning_rate": 7.295194578284494e-07, "loss": 0.1898, "step": 21157 }, { "epoch": 0.3677797284847642, "grad_norm": 1.2959037817318355, "learning_rate": 7.294944491390538e-07, "loss": 0.1387, "step": 21158 }, { "epoch": 0.36779711102226703, "grad_norm": 1.4670726349152317, "learning_rate": 7.294694397222662e-07, "loss": 0.1906, "step": 21159 }, { "epoch": 0.36781449355976986, "grad_norm": 1.0039647007135801, "learning_rate": 7.294444295781662e-07, "loss": 0.1949, "step": 21160 }, { "epoch": 0.3678318760972727, "grad_norm": 2.3577005381017506, "learning_rate": 7.294194187068326e-07, "loss": 0.3971, "step": 21161 }, { "epoch": 0.3678492586347755, "grad_norm": 1.366623828475586, "learning_rate": 7.293944071083447e-07, "loss": 0.207, "step": 21162 }, { "epoch": 0.36786664117227835, "grad_norm": 2.104573747670922, "learning_rate": 7.29369394782782e-07, "loss": 0.2643, "step": 21163 }, { "epoch": 0.3678840237097811, "grad_norm": 2.9467973728245327, "learning_rate": 7.293443817302237e-07, "loss": 0.2122, "step": 21164 }, { "epoch": 0.36790140624728396, "grad_norm": 1.247393502628811, "learning_rate": 7.29319367950749e-07, "loss": 0.2515, "step": 21165 }, { "epoch": 0.3679187887847868, "grad_norm": 2.3859428657012813, "learning_rate": 7.292943534444372e-07, "loss": 0.4702, "step": 21166 }, { "epoch": 0.3679361713222896, "grad_norm": 1.3614418438698825, "learning_rate": 7.292693382113679e-07, "loss": 0.2123, "step": 21167 }, { "epoch": 0.36795355385979245, "grad_norm": 1.651658067760149, "learning_rate": 7.292443222516198e-07, "loss": 0.2332, "step": 21168 }, { "epoch": 0.3679709363972953, "grad_norm": 1.1404577898860624, "learning_rate": 7.292193055652727e-07, "loss": 0.3245, "step": 21169 }, { "epoch": 0.3679883189347981, "grad_norm": 1.647060391799067, "learning_rate": 7.291942881524056e-07, "loss": 0.3439, "step": 21170 }, { "epoch": 0.36800570147230094, "grad_norm": 1.7147320763208986, "learning_rate": 7.291692700130978e-07, "loss": 0.3615, "step": 21171 }, { "epoch": 0.36802308400980377, "grad_norm": 2.8131530102213618, "learning_rate": 7.291442511474289e-07, "loss": 0.4844, "step": 21172 }, { "epoch": 0.3680404665473066, "grad_norm": 1.5195986655678113, "learning_rate": 7.29119231555478e-07, "loss": 0.224, "step": 21173 }, { "epoch": 0.3680578490848094, "grad_norm": 1.8948604442668682, "learning_rate": 7.29094211237324e-07, "loss": 0.2846, "step": 21174 }, { "epoch": 0.3680752316223122, "grad_norm": 1.4483759349296912, "learning_rate": 7.290691901930469e-07, "loss": 0.1674, "step": 21175 }, { "epoch": 0.36809261415981503, "grad_norm": 1.7052625135265929, "learning_rate": 7.290441684227257e-07, "loss": 0.3026, "step": 21176 }, { "epoch": 0.36810999669731786, "grad_norm": 2.425733339869328, "learning_rate": 7.290191459264397e-07, "loss": 0.2152, "step": 21177 }, { "epoch": 0.3681273792348207, "grad_norm": 1.3666593648375844, "learning_rate": 7.289941227042681e-07, "loss": 0.3939, "step": 21178 }, { "epoch": 0.3681447617723235, "grad_norm": 2.1250898795578244, "learning_rate": 7.289690987562905e-07, "loss": 0.2266, "step": 21179 }, { "epoch": 0.36816214430982636, "grad_norm": 2.29440793316841, "learning_rate": 7.289440740825858e-07, "loss": 0.3504, "step": 21180 }, { "epoch": 0.3681795268473292, "grad_norm": 1.292440435425982, "learning_rate": 7.289190486832337e-07, "loss": 0.2665, "step": 21181 }, { "epoch": 0.368196909384832, "grad_norm": 2.0702870600772982, "learning_rate": 7.288940225583133e-07, "loss": 0.2993, "step": 21182 }, { "epoch": 0.36821429192233485, "grad_norm": 1.0076881735256937, "learning_rate": 7.288689957079042e-07, "loss": 0.1784, "step": 21183 }, { "epoch": 0.3682316744598376, "grad_norm": 1.8754080768608519, "learning_rate": 7.288439681320851e-07, "loss": 0.3512, "step": 21184 }, { "epoch": 0.36824905699734045, "grad_norm": 1.2131151683937167, "learning_rate": 7.288189398309361e-07, "loss": 0.3249, "step": 21185 }, { "epoch": 0.3682664395348433, "grad_norm": 1.9368518534675967, "learning_rate": 7.287939108045361e-07, "loss": 0.3169, "step": 21186 }, { "epoch": 0.3682838220723461, "grad_norm": 2.3853241989836795, "learning_rate": 7.287688810529644e-07, "loss": 0.1985, "step": 21187 }, { "epoch": 0.36830120460984894, "grad_norm": 1.8951082300715192, "learning_rate": 7.287438505763006e-07, "loss": 0.215, "step": 21188 }, { "epoch": 0.3683185871473518, "grad_norm": 3.169479524090285, "learning_rate": 7.287188193746238e-07, "loss": 0.4378, "step": 21189 }, { "epoch": 0.3683359696848546, "grad_norm": 1.059464866797615, "learning_rate": 7.286937874480132e-07, "loss": 0.2156, "step": 21190 }, { "epoch": 0.36835335222235743, "grad_norm": 1.8919390209308053, "learning_rate": 7.286687547965487e-07, "loss": 0.2727, "step": 21191 }, { "epoch": 0.36837073475986026, "grad_norm": 3.056958502604698, "learning_rate": 7.286437214203091e-07, "loss": 0.4111, "step": 21192 }, { "epoch": 0.3683881172973631, "grad_norm": 1.3406018884051802, "learning_rate": 7.286186873193739e-07, "loss": 0.3027, "step": 21193 }, { "epoch": 0.36840549983486587, "grad_norm": 1.4198362127375879, "learning_rate": 7.285936524938226e-07, "loss": 0.1728, "step": 21194 }, { "epoch": 0.3684228823723687, "grad_norm": 1.7651997949490865, "learning_rate": 7.285686169437343e-07, "loss": 0.1625, "step": 21195 }, { "epoch": 0.36844026490987153, "grad_norm": 2.105197827465123, "learning_rate": 7.285435806691885e-07, "loss": 0.3387, "step": 21196 }, { "epoch": 0.36845764744737436, "grad_norm": 1.2443018330709492, "learning_rate": 7.285185436702647e-07, "loss": 0.2959, "step": 21197 }, { "epoch": 0.3684750299848772, "grad_norm": 2.2099231159137336, "learning_rate": 7.28493505947042e-07, "loss": 0.3377, "step": 21198 }, { "epoch": 0.36849241252238, "grad_norm": 1.7506947894353313, "learning_rate": 7.284684674995997e-07, "loss": 0.4016, "step": 21199 }, { "epoch": 0.36850979505988285, "grad_norm": 1.8010900078763852, "learning_rate": 7.284434283280174e-07, "loss": 0.2855, "step": 21200 }, { "epoch": 0.3685271775973857, "grad_norm": 1.1745208752877692, "learning_rate": 7.284183884323745e-07, "loss": 0.341, "step": 21201 }, { "epoch": 0.3685445601348885, "grad_norm": 0.9864585212940988, "learning_rate": 7.283933478127501e-07, "loss": 0.2573, "step": 21202 }, { "epoch": 0.36856194267239134, "grad_norm": 1.2181213424619288, "learning_rate": 7.283683064692236e-07, "loss": 0.1736, "step": 21203 }, { "epoch": 0.3685793252098941, "grad_norm": 4.542356546441697, "learning_rate": 7.283432644018748e-07, "loss": 0.4175, "step": 21204 }, { "epoch": 0.36859670774739695, "grad_norm": 1.1626809494434307, "learning_rate": 7.283182216107823e-07, "loss": 0.3096, "step": 21205 }, { "epoch": 0.3686140902848998, "grad_norm": 1.7791836496535889, "learning_rate": 7.282931780960263e-07, "loss": 0.2931, "step": 21206 }, { "epoch": 0.3686314728224026, "grad_norm": 1.1880420400944467, "learning_rate": 7.282681338576857e-07, "loss": 0.4848, "step": 21207 }, { "epoch": 0.36864885535990544, "grad_norm": 2.056330312058357, "learning_rate": 7.282430888958399e-07, "loss": 0.4775, "step": 21208 }, { "epoch": 0.36866623789740827, "grad_norm": 1.1059364974086014, "learning_rate": 7.282180432105684e-07, "loss": 0.2058, "step": 21209 }, { "epoch": 0.3686836204349111, "grad_norm": 1.005857111775349, "learning_rate": 7.281929968019505e-07, "loss": 0.2658, "step": 21210 }, { "epoch": 0.3687010029724139, "grad_norm": 2.1920648666851683, "learning_rate": 7.281679496700657e-07, "loss": 0.4618, "step": 21211 }, { "epoch": 0.36871838550991676, "grad_norm": 1.1209394392001473, "learning_rate": 7.281429018149931e-07, "loss": 0.3912, "step": 21212 }, { "epoch": 0.3687357680474196, "grad_norm": 1.325020203056493, "learning_rate": 7.281178532368125e-07, "loss": 0.2711, "step": 21213 }, { "epoch": 0.36875315058492236, "grad_norm": 1.7738346363181452, "learning_rate": 7.280928039356032e-07, "loss": 0.3857, "step": 21214 }, { "epoch": 0.3687705331224252, "grad_norm": 1.4939439081966615, "learning_rate": 7.280677539114443e-07, "loss": 0.4719, "step": 21215 }, { "epoch": 0.368787915659928, "grad_norm": 1.6146405109387822, "learning_rate": 7.280427031644154e-07, "loss": 0.2387, "step": 21216 }, { "epoch": 0.36880529819743085, "grad_norm": 1.2946181714523681, "learning_rate": 7.28017651694596e-07, "loss": 0.2067, "step": 21217 }, { "epoch": 0.3688226807349337, "grad_norm": 3.422025268169641, "learning_rate": 7.279925995020652e-07, "loss": 0.4864, "step": 21218 }, { "epoch": 0.3688400632724365, "grad_norm": 4.342208265434769, "learning_rate": 7.279675465869026e-07, "loss": 0.6182, "step": 21219 }, { "epoch": 0.36885744580993934, "grad_norm": 1.3691607570265907, "learning_rate": 7.279424929491878e-07, "loss": 0.2913, "step": 21220 }, { "epoch": 0.3688748283474422, "grad_norm": 1.587200200460555, "learning_rate": 7.279174385889999e-07, "loss": 0.3158, "step": 21221 }, { "epoch": 0.368892210884945, "grad_norm": 2.1958088710673835, "learning_rate": 7.278923835064184e-07, "loss": 0.3888, "step": 21222 }, { "epoch": 0.36890959342244783, "grad_norm": 2.7165021390117694, "learning_rate": 7.278673277015229e-07, "loss": 0.3021, "step": 21223 }, { "epoch": 0.3689269759599506, "grad_norm": 1.7495743072933123, "learning_rate": 7.278422711743922e-07, "loss": 0.2844, "step": 21224 }, { "epoch": 0.36894435849745344, "grad_norm": 1.9428459508705145, "learning_rate": 7.278172139251065e-07, "loss": 0.4133, "step": 21225 }, { "epoch": 0.36896174103495627, "grad_norm": 2.7274256858762027, "learning_rate": 7.277921559537449e-07, "loss": 0.5585, "step": 21226 }, { "epoch": 0.3689791235724591, "grad_norm": 1.8750611821296819, "learning_rate": 7.277670972603867e-07, "loss": 0.3309, "step": 21227 }, { "epoch": 0.36899650610996193, "grad_norm": 2.0903915758286056, "learning_rate": 7.277420378451114e-07, "loss": 0.3176, "step": 21228 }, { "epoch": 0.36901388864746476, "grad_norm": 1.632814166978004, "learning_rate": 7.277169777079986e-07, "loss": 0.1971, "step": 21229 }, { "epoch": 0.3690312711849676, "grad_norm": 1.6256840208861345, "learning_rate": 7.276919168491275e-07, "loss": 0.3448, "step": 21230 }, { "epoch": 0.3690486537224704, "grad_norm": 1.7282059278437403, "learning_rate": 7.276668552685776e-07, "loss": 0.2498, "step": 21231 }, { "epoch": 0.36906603625997325, "grad_norm": 2.3299490129833567, "learning_rate": 7.276417929664285e-07, "loss": 0.5288, "step": 21232 }, { "epoch": 0.3690834187974761, "grad_norm": 1.8095473042501835, "learning_rate": 7.276167299427594e-07, "loss": 0.2756, "step": 21233 }, { "epoch": 0.36910080133497886, "grad_norm": 1.8829303044575416, "learning_rate": 7.275916661976496e-07, "loss": 0.259, "step": 21234 }, { "epoch": 0.3691181838724817, "grad_norm": 1.4735000070701336, "learning_rate": 7.27566601731179e-07, "loss": 0.6341, "step": 21235 }, { "epoch": 0.3691355664099845, "grad_norm": 0.994525949609566, "learning_rate": 7.275415365434267e-07, "loss": 0.2251, "step": 21236 }, { "epoch": 0.36915294894748735, "grad_norm": 1.1574269199186236, "learning_rate": 7.275164706344724e-07, "loss": 0.2766, "step": 21237 }, { "epoch": 0.3691703314849902, "grad_norm": 0.9140569701340593, "learning_rate": 7.274914040043952e-07, "loss": 0.2225, "step": 21238 }, { "epoch": 0.369187714022493, "grad_norm": 1.171307196923086, "learning_rate": 7.27466336653275e-07, "loss": 0.3108, "step": 21239 }, { "epoch": 0.36920509655999584, "grad_norm": 1.9902038747241304, "learning_rate": 7.274412685811908e-07, "loss": 0.3053, "step": 21240 }, { "epoch": 0.36922247909749867, "grad_norm": 2.159559633604048, "learning_rate": 7.274161997882223e-07, "loss": 0.3708, "step": 21241 }, { "epoch": 0.3692398616350015, "grad_norm": 1.5184225673667002, "learning_rate": 7.27391130274449e-07, "loss": 0.2728, "step": 21242 }, { "epoch": 0.36925724417250433, "grad_norm": 1.4345206548399332, "learning_rate": 7.273660600399501e-07, "loss": 0.2333, "step": 21243 }, { "epoch": 0.3692746267100071, "grad_norm": 1.870167097485996, "learning_rate": 7.273409890848053e-07, "loss": 0.2651, "step": 21244 }, { "epoch": 0.36929200924750993, "grad_norm": 2.0552790827819374, "learning_rate": 7.273159174090942e-07, "loss": 0.3818, "step": 21245 }, { "epoch": 0.36930939178501276, "grad_norm": 2.007406725083056, "learning_rate": 7.272908450128958e-07, "loss": 0.2066, "step": 21246 }, { "epoch": 0.3693267743225156, "grad_norm": 1.1660499636287345, "learning_rate": 7.2726577189629e-07, "loss": 0.2707, "step": 21247 }, { "epoch": 0.3693441568600184, "grad_norm": 2.4782843985476264, "learning_rate": 7.272406980593561e-07, "loss": 0.3219, "step": 21248 }, { "epoch": 0.36936153939752125, "grad_norm": 3.213299575484899, "learning_rate": 7.272156235021735e-07, "loss": 0.2207, "step": 21249 }, { "epoch": 0.3693789219350241, "grad_norm": 1.4269532451814635, "learning_rate": 7.271905482248216e-07, "loss": 0.2965, "step": 21250 }, { "epoch": 0.3693963044725269, "grad_norm": 1.3316390377683813, "learning_rate": 7.271654722273802e-07, "loss": 0.307, "step": 21251 }, { "epoch": 0.36941368701002975, "grad_norm": 8.282013236046545, "learning_rate": 7.271403955099286e-07, "loss": 0.2749, "step": 21252 }, { "epoch": 0.3694310695475326, "grad_norm": 1.633108504923287, "learning_rate": 7.271153180725462e-07, "loss": 0.2088, "step": 21253 }, { "epoch": 0.36944845208503535, "grad_norm": 1.4483809778576888, "learning_rate": 7.270902399153128e-07, "loss": 0.2769, "step": 21254 }, { "epoch": 0.3694658346225382, "grad_norm": 1.4094394573491602, "learning_rate": 7.270651610383074e-07, "loss": 0.2201, "step": 21255 }, { "epoch": 0.369483217160041, "grad_norm": 2.1036136007185116, "learning_rate": 7.270400814416097e-07, "loss": 0.3904, "step": 21256 }, { "epoch": 0.36950059969754384, "grad_norm": 2.4678520670618114, "learning_rate": 7.270150011252993e-07, "loss": 0.5419, "step": 21257 }, { "epoch": 0.36951798223504667, "grad_norm": 1.475235012563466, "learning_rate": 7.269899200894559e-07, "loss": 0.2117, "step": 21258 }, { "epoch": 0.3695353647725495, "grad_norm": 1.7205791008230513, "learning_rate": 7.269648383341584e-07, "loss": 0.3238, "step": 21259 }, { "epoch": 0.36955274731005233, "grad_norm": 1.644742797879065, "learning_rate": 7.269397558594869e-07, "loss": 0.3001, "step": 21260 }, { "epoch": 0.36957012984755516, "grad_norm": 2.58632126180165, "learning_rate": 7.269146726655204e-07, "loss": 0.3806, "step": 21261 }, { "epoch": 0.369587512385058, "grad_norm": 3.5149830672128015, "learning_rate": 7.268895887523387e-07, "loss": 0.3506, "step": 21262 }, { "epoch": 0.3696048949225608, "grad_norm": 3.5602078480702586, "learning_rate": 7.268645041200213e-07, "loss": 0.3755, "step": 21263 }, { "epoch": 0.3696222774600636, "grad_norm": 2.719665337987454, "learning_rate": 7.268394187686475e-07, "loss": 0.5098, "step": 21264 }, { "epoch": 0.3696396599975664, "grad_norm": 2.661311129830193, "learning_rate": 7.268143326982969e-07, "loss": 0.3178, "step": 21265 }, { "epoch": 0.36965704253506926, "grad_norm": 1.587657969475124, "learning_rate": 7.267892459090492e-07, "loss": 0.2578, "step": 21266 }, { "epoch": 0.3696744250725721, "grad_norm": 1.487536183805002, "learning_rate": 7.267641584009838e-07, "loss": 0.4209, "step": 21267 }, { "epoch": 0.3696918076100749, "grad_norm": 1.280266863559717, "learning_rate": 7.267390701741802e-07, "loss": 0.2897, "step": 21268 }, { "epoch": 0.36970919014757775, "grad_norm": 1.4145226457869031, "learning_rate": 7.267139812287176e-07, "loss": 0.3325, "step": 21269 }, { "epoch": 0.3697265726850806, "grad_norm": 1.3522958423580869, "learning_rate": 7.266888915646762e-07, "loss": 0.2771, "step": 21270 }, { "epoch": 0.3697439552225834, "grad_norm": 2.532373230027517, "learning_rate": 7.266638011821349e-07, "loss": 0.2453, "step": 21271 }, { "epoch": 0.36976133776008624, "grad_norm": 3.040290849617133, "learning_rate": 7.266387100811735e-07, "loss": 0.3684, "step": 21272 }, { "epoch": 0.36977872029758907, "grad_norm": 1.3568480724044392, "learning_rate": 7.266136182618715e-07, "loss": 0.1829, "step": 21273 }, { "epoch": 0.36979610283509184, "grad_norm": 1.5980070778205164, "learning_rate": 7.265885257243086e-07, "loss": 0.2414, "step": 21274 }, { "epoch": 0.3698134853725947, "grad_norm": 2.0745014699100772, "learning_rate": 7.26563432468564e-07, "loss": 0.3532, "step": 21275 }, { "epoch": 0.3698308679100975, "grad_norm": 3.5965523190813298, "learning_rate": 7.265383384947174e-07, "loss": 0.236, "step": 21276 }, { "epoch": 0.36984825044760034, "grad_norm": 1.5004685372522037, "learning_rate": 7.265132438028484e-07, "loss": 0.3797, "step": 21277 }, { "epoch": 0.36986563298510317, "grad_norm": 1.589534944079413, "learning_rate": 7.264881483930364e-07, "loss": 0.2475, "step": 21278 }, { "epoch": 0.369883015522606, "grad_norm": 1.9446521735827955, "learning_rate": 7.26463052265361e-07, "loss": 0.6046, "step": 21279 }, { "epoch": 0.3699003980601088, "grad_norm": 3.4421973526814016, "learning_rate": 7.264379554199018e-07, "loss": 0.5225, "step": 21280 }, { "epoch": 0.36991778059761166, "grad_norm": 1.9269143402286344, "learning_rate": 7.264128578567383e-07, "loss": 0.4498, "step": 21281 }, { "epoch": 0.3699351631351145, "grad_norm": 3.60895823964104, "learning_rate": 7.2638775957595e-07, "loss": 0.2707, "step": 21282 }, { "epoch": 0.36995254567261726, "grad_norm": 2.6719671043592608, "learning_rate": 7.263626605776167e-07, "loss": 0.2546, "step": 21283 }, { "epoch": 0.3699699282101201, "grad_norm": 1.222527022363767, "learning_rate": 7.263375608618174e-07, "loss": 0.2901, "step": 21284 }, { "epoch": 0.3699873107476229, "grad_norm": 1.7358390058039004, "learning_rate": 7.263124604286323e-07, "loss": 0.2909, "step": 21285 }, { "epoch": 0.37000469328512575, "grad_norm": 1.490902547991947, "learning_rate": 7.262873592781404e-07, "loss": 0.3073, "step": 21286 }, { "epoch": 0.3700220758226286, "grad_norm": 1.823722488742103, "learning_rate": 7.262622574104218e-07, "loss": 0.3448, "step": 21287 }, { "epoch": 0.3700394583601314, "grad_norm": 1.2289874680911923, "learning_rate": 7.262371548255555e-07, "loss": 0.2126, "step": 21288 }, { "epoch": 0.37005684089763424, "grad_norm": 1.324694862885361, "learning_rate": 7.262120515236216e-07, "loss": 0.2333, "step": 21289 }, { "epoch": 0.3700742234351371, "grad_norm": 0.9268863940490503, "learning_rate": 7.261869475046992e-07, "loss": 0.1383, "step": 21290 }, { "epoch": 0.3700916059726399, "grad_norm": 1.6697383921767381, "learning_rate": 7.261618427688683e-07, "loss": 0.2555, "step": 21291 }, { "epoch": 0.37010898851014273, "grad_norm": 0.8947643991864936, "learning_rate": 7.261367373162081e-07, "loss": 0.2732, "step": 21292 }, { "epoch": 0.3701263710476455, "grad_norm": 2.765710528937721, "learning_rate": 7.261116311467983e-07, "loss": 0.3359, "step": 21293 }, { "epoch": 0.37014375358514834, "grad_norm": 1.5255715225236175, "learning_rate": 7.260865242607185e-07, "loss": 0.3316, "step": 21294 }, { "epoch": 0.37016113612265117, "grad_norm": 1.4392850849224281, "learning_rate": 7.260614166580483e-07, "loss": 0.2981, "step": 21295 }, { "epoch": 0.370178518660154, "grad_norm": 1.8595344638274587, "learning_rate": 7.260363083388673e-07, "loss": 0.2558, "step": 21296 }, { "epoch": 0.37019590119765683, "grad_norm": 2.8642133615664855, "learning_rate": 7.26011199303255e-07, "loss": 0.4088, "step": 21297 }, { "epoch": 0.37021328373515966, "grad_norm": 2.4583246960621223, "learning_rate": 7.25986089551291e-07, "loss": 0.5648, "step": 21298 }, { "epoch": 0.3702306662726625, "grad_norm": 1.991293380884054, "learning_rate": 7.259609790830551e-07, "loss": 0.3066, "step": 21299 }, { "epoch": 0.3702480488101653, "grad_norm": 1.9871428573505272, "learning_rate": 7.259358678986263e-07, "loss": 0.2103, "step": 21300 }, { "epoch": 0.37026543134766815, "grad_norm": 1.189455871695571, "learning_rate": 7.259107559980849e-07, "loss": 0.3142, "step": 21301 }, { "epoch": 0.370282813885171, "grad_norm": 1.1084748850443813, "learning_rate": 7.258856433815101e-07, "loss": 0.2635, "step": 21302 }, { "epoch": 0.37030019642267376, "grad_norm": 1.293603574046328, "learning_rate": 7.258605300489817e-07, "loss": 0.275, "step": 21303 }, { "epoch": 0.3703175789601766, "grad_norm": 1.897534276457499, "learning_rate": 7.258354160005789e-07, "loss": 0.2899, "step": 21304 }, { "epoch": 0.3703349614976794, "grad_norm": 1.5692956010659067, "learning_rate": 7.258103012363818e-07, "loss": 0.3743, "step": 21305 }, { "epoch": 0.37035234403518225, "grad_norm": 1.6302902137658237, "learning_rate": 7.257851857564696e-07, "loss": 0.4297, "step": 21306 }, { "epoch": 0.3703697265726851, "grad_norm": 1.4485182379618373, "learning_rate": 7.257600695609222e-07, "loss": 0.3056, "step": 21307 }, { "epoch": 0.3703871091101879, "grad_norm": 2.34246133100362, "learning_rate": 7.257349526498189e-07, "loss": 0.4308, "step": 21308 }, { "epoch": 0.37040449164769074, "grad_norm": 1.3581483880841787, "learning_rate": 7.257098350232397e-07, "loss": 0.4324, "step": 21309 }, { "epoch": 0.37042187418519357, "grad_norm": 1.36470557660345, "learning_rate": 7.25684716681264e-07, "loss": 0.3069, "step": 21310 }, { "epoch": 0.3704392567226964, "grad_norm": 1.2505526075120277, "learning_rate": 7.256595976239713e-07, "loss": 0.2996, "step": 21311 }, { "epoch": 0.3704566392601992, "grad_norm": 2.5076424544109903, "learning_rate": 7.256344778514413e-07, "loss": 0.2742, "step": 21312 }, { "epoch": 0.370474021797702, "grad_norm": 1.3890200561109411, "learning_rate": 7.256093573637538e-07, "loss": 0.3319, "step": 21313 }, { "epoch": 0.37049140433520483, "grad_norm": 1.964907534832822, "learning_rate": 7.255842361609883e-07, "loss": 0.1567, "step": 21314 }, { "epoch": 0.37050878687270766, "grad_norm": 2.1908205379145578, "learning_rate": 7.255591142432242e-07, "loss": 0.4901, "step": 21315 }, { "epoch": 0.3705261694102105, "grad_norm": 2.6315251415679697, "learning_rate": 7.255339916105415e-07, "loss": 0.2566, "step": 21316 }, { "epoch": 0.3705435519477133, "grad_norm": 2.005070616716073, "learning_rate": 7.255088682630195e-07, "loss": 0.4768, "step": 21317 }, { "epoch": 0.37056093448521615, "grad_norm": 1.4686147014867512, "learning_rate": 7.254837442007382e-07, "loss": 0.4181, "step": 21318 }, { "epoch": 0.370578317022719, "grad_norm": 1.9429750395408658, "learning_rate": 7.254586194237768e-07, "loss": 0.2557, "step": 21319 }, { "epoch": 0.3705956995602218, "grad_norm": 1.5307174502671692, "learning_rate": 7.254334939322153e-07, "loss": 0.2724, "step": 21320 }, { "epoch": 0.37061308209772464, "grad_norm": 2.2550254691992446, "learning_rate": 7.254083677261331e-07, "loss": 0.3178, "step": 21321 }, { "epoch": 0.3706304646352275, "grad_norm": 2.218417700211011, "learning_rate": 7.2538324080561e-07, "loss": 0.2365, "step": 21322 }, { "epoch": 0.37064784717273025, "grad_norm": 1.5191398406589005, "learning_rate": 7.253581131707254e-07, "loss": 0.2599, "step": 21323 }, { "epoch": 0.3706652297102331, "grad_norm": 1.671880358486324, "learning_rate": 7.253329848215592e-07, "loss": 0.413, "step": 21324 }, { "epoch": 0.3706826122477359, "grad_norm": 1.2216674619131818, "learning_rate": 7.253078557581909e-07, "loss": 0.3921, "step": 21325 }, { "epoch": 0.37069999478523874, "grad_norm": 1.9521087537916944, "learning_rate": 7.252827259807001e-07, "loss": 0.3352, "step": 21326 }, { "epoch": 0.37071737732274157, "grad_norm": 1.0563137384202281, "learning_rate": 7.252575954891668e-07, "loss": 0.4126, "step": 21327 }, { "epoch": 0.3707347598602444, "grad_norm": 2.5074563277170103, "learning_rate": 7.252324642836703e-07, "loss": 0.2255, "step": 21328 }, { "epoch": 0.37075214239774723, "grad_norm": 1.283013166021454, "learning_rate": 7.252073323642902e-07, "loss": 0.2, "step": 21329 }, { "epoch": 0.37076952493525006, "grad_norm": 1.4559507023233396, "learning_rate": 7.251821997311065e-07, "loss": 0.2514, "step": 21330 }, { "epoch": 0.3707869074727529, "grad_norm": 1.8001940014919529, "learning_rate": 7.251570663841986e-07, "loss": 0.2467, "step": 21331 }, { "epoch": 0.3708042900102557, "grad_norm": 2.1058283636875306, "learning_rate": 7.251319323236463e-07, "loss": 0.2902, "step": 21332 }, { "epoch": 0.3708216725477585, "grad_norm": 1.4330873549652867, "learning_rate": 7.25106797549529e-07, "loss": 0.2106, "step": 21333 }, { "epoch": 0.3708390550852613, "grad_norm": 2.088084107809829, "learning_rate": 7.250816620619267e-07, "loss": 0.3517, "step": 21334 }, { "epoch": 0.37085643762276416, "grad_norm": 1.8489655001134957, "learning_rate": 7.250565258609188e-07, "loss": 0.1966, "step": 21335 }, { "epoch": 0.370873820160267, "grad_norm": 1.7513916166681307, "learning_rate": 7.250313889465852e-07, "loss": 0.3325, "step": 21336 }, { "epoch": 0.3708912026977698, "grad_norm": 1.20324978558462, "learning_rate": 7.250062513190055e-07, "loss": 0.1633, "step": 21337 }, { "epoch": 0.37090858523527265, "grad_norm": 1.6056796981909183, "learning_rate": 7.249811129782593e-07, "loss": 0.3615, "step": 21338 }, { "epoch": 0.3709259677727755, "grad_norm": 1.4662126712977706, "learning_rate": 7.249559739244264e-07, "loss": 0.6487, "step": 21339 }, { "epoch": 0.3709433503102783, "grad_norm": 2.289260994391392, "learning_rate": 7.249308341575861e-07, "loss": 0.246, "step": 21340 }, { "epoch": 0.37096073284778114, "grad_norm": 1.803067262594519, "learning_rate": 7.249056936778187e-07, "loss": 0.289, "step": 21341 }, { "epoch": 0.37097811538528397, "grad_norm": 2.406474189943209, "learning_rate": 7.248805524852036e-07, "loss": 0.3126, "step": 21342 }, { "epoch": 0.37099549792278674, "grad_norm": 2.183553428454331, "learning_rate": 7.248554105798203e-07, "loss": 0.3377, "step": 21343 }, { "epoch": 0.3710128804602896, "grad_norm": 0.9190428279782257, "learning_rate": 7.248302679617486e-07, "loss": 0.3617, "step": 21344 }, { "epoch": 0.3710302629977924, "grad_norm": 1.484146289498276, "learning_rate": 7.248051246310682e-07, "loss": 0.3031, "step": 21345 }, { "epoch": 0.37104764553529523, "grad_norm": 2.4180264973716348, "learning_rate": 7.247799805878589e-07, "loss": 0.3445, "step": 21346 }, { "epoch": 0.37106502807279806, "grad_norm": 1.3778114201950622, "learning_rate": 7.247548358322004e-07, "loss": 0.2903, "step": 21347 }, { "epoch": 0.3710824106103009, "grad_norm": 2.3335411418798966, "learning_rate": 7.247296903641722e-07, "loss": 0.4209, "step": 21348 }, { "epoch": 0.3710997931478037, "grad_norm": 2.27956909921297, "learning_rate": 7.247045441838542e-07, "loss": 0.3444, "step": 21349 }, { "epoch": 0.37111717568530656, "grad_norm": 1.3312572071813145, "learning_rate": 7.246793972913259e-07, "loss": 0.2862, "step": 21350 }, { "epoch": 0.3711345582228094, "grad_norm": 1.162021655135682, "learning_rate": 7.246542496866672e-07, "loss": 0.2887, "step": 21351 }, { "epoch": 0.3711519407603122, "grad_norm": 1.2015048830567376, "learning_rate": 7.246291013699576e-07, "loss": 0.1963, "step": 21352 }, { "epoch": 0.371169323297815, "grad_norm": 1.742334923157743, "learning_rate": 7.246039523412773e-07, "loss": 0.3381, "step": 21353 }, { "epoch": 0.3711867058353178, "grad_norm": 1.7210470015486923, "learning_rate": 7.245788026007052e-07, "loss": 0.327, "step": 21354 }, { "epoch": 0.37120408837282065, "grad_norm": 1.760076569456137, "learning_rate": 7.245536521483216e-07, "loss": 0.2891, "step": 21355 }, { "epoch": 0.3712214709103235, "grad_norm": 1.3621844932224287, "learning_rate": 7.245285009842062e-07, "loss": 0.3366, "step": 21356 }, { "epoch": 0.3712388534478263, "grad_norm": 5.453821636482698, "learning_rate": 7.245033491084386e-07, "loss": 0.4412, "step": 21357 }, { "epoch": 0.37125623598532914, "grad_norm": 1.5971904218255333, "learning_rate": 7.244781965210983e-07, "loss": 0.3311, "step": 21358 }, { "epoch": 0.37127361852283197, "grad_norm": 7.233573322298037, "learning_rate": 7.244530432222655e-07, "loss": 0.576, "step": 21359 }, { "epoch": 0.3712910010603348, "grad_norm": 0.9750048286041597, "learning_rate": 7.244278892120195e-07, "loss": 0.1966, "step": 21360 }, { "epoch": 0.37130838359783763, "grad_norm": 2.32691049542158, "learning_rate": 7.244027344904402e-07, "loss": 0.2737, "step": 21361 }, { "epoch": 0.37132576613534046, "grad_norm": 2.350234698332525, "learning_rate": 7.243775790576072e-07, "loss": 0.2023, "step": 21362 }, { "epoch": 0.37134314867284324, "grad_norm": 1.3078200392405817, "learning_rate": 7.243524229136005e-07, "loss": 0.3685, "step": 21363 }, { "epoch": 0.37136053121034607, "grad_norm": 1.2028492061854787, "learning_rate": 7.243272660584996e-07, "loss": 0.2143, "step": 21364 }, { "epoch": 0.3713779137478489, "grad_norm": 1.6905773924718728, "learning_rate": 7.243021084923844e-07, "loss": 0.2769, "step": 21365 }, { "epoch": 0.37139529628535173, "grad_norm": 2.445055115095321, "learning_rate": 7.242769502153345e-07, "loss": 0.6342, "step": 21366 }, { "epoch": 0.37141267882285456, "grad_norm": 1.1364955773864756, "learning_rate": 7.242517912274298e-07, "loss": 0.2471, "step": 21367 }, { "epoch": 0.3714300613603574, "grad_norm": 1.1165901091261716, "learning_rate": 7.242266315287498e-07, "loss": 0.2605, "step": 21368 }, { "epoch": 0.3714474438978602, "grad_norm": 1.019183652954981, "learning_rate": 7.242014711193744e-07, "loss": 0.1817, "step": 21369 }, { "epoch": 0.37146482643536305, "grad_norm": 1.0383032111578376, "learning_rate": 7.241763099993833e-07, "loss": 0.3407, "step": 21370 }, { "epoch": 0.3714822089728659, "grad_norm": 1.8873435861633783, "learning_rate": 7.241511481688564e-07, "loss": 0.4066, "step": 21371 }, { "epoch": 0.3714995915103687, "grad_norm": 1.9453591761990878, "learning_rate": 7.241259856278732e-07, "loss": 0.3674, "step": 21372 }, { "epoch": 0.3715169740478715, "grad_norm": 1.370359670226489, "learning_rate": 7.241008223765134e-07, "loss": 0.2758, "step": 21373 }, { "epoch": 0.3715343565853743, "grad_norm": 1.9208120049726949, "learning_rate": 7.240756584148573e-07, "loss": 0.2727, "step": 21374 }, { "epoch": 0.37155173912287714, "grad_norm": 2.384517458608598, "learning_rate": 7.240504937429842e-07, "loss": 0.497, "step": 21375 }, { "epoch": 0.37156912166038, "grad_norm": 1.7041117752859476, "learning_rate": 7.240253283609738e-07, "loss": 0.2462, "step": 21376 }, { "epoch": 0.3715865041978828, "grad_norm": 2.1414874747139776, "learning_rate": 7.240001622689061e-07, "loss": 0.463, "step": 21377 }, { "epoch": 0.37160388673538564, "grad_norm": 1.4669005964349229, "learning_rate": 7.239749954668611e-07, "loss": 0.2827, "step": 21378 }, { "epoch": 0.37162126927288847, "grad_norm": 1.2824492858993104, "learning_rate": 7.239498279549178e-07, "loss": 0.458, "step": 21379 }, { "epoch": 0.3716386518103913, "grad_norm": 3.532657002575077, "learning_rate": 7.239246597331565e-07, "loss": 0.2793, "step": 21380 }, { "epoch": 0.3716560343478941, "grad_norm": 2.8252608482008403, "learning_rate": 7.238994908016571e-07, "loss": 0.556, "step": 21381 }, { "epoch": 0.37167341688539696, "grad_norm": 2.2971161396280384, "learning_rate": 7.23874321160499e-07, "loss": 0.3913, "step": 21382 }, { "epoch": 0.37169079942289973, "grad_norm": 1.7209956419344767, "learning_rate": 7.238491508097622e-07, "loss": 0.2175, "step": 21383 }, { "epoch": 0.37170818196040256, "grad_norm": 1.5680467386552872, "learning_rate": 7.238239797495264e-07, "loss": 0.2732, "step": 21384 }, { "epoch": 0.3717255644979054, "grad_norm": 2.2640791835226617, "learning_rate": 7.237988079798714e-07, "loss": 0.4695, "step": 21385 }, { "epoch": 0.3717429470354082, "grad_norm": 1.3239787653190558, "learning_rate": 7.23773635500877e-07, "loss": 0.1485, "step": 21386 }, { "epoch": 0.37176032957291105, "grad_norm": 2.211565187380298, "learning_rate": 7.237484623126229e-07, "loss": 0.2492, "step": 21387 }, { "epoch": 0.3717777121104139, "grad_norm": 2.4106002113622846, "learning_rate": 7.23723288415189e-07, "loss": 0.4009, "step": 21388 }, { "epoch": 0.3717950946479167, "grad_norm": 2.100533950218723, "learning_rate": 7.23698113808655e-07, "loss": 0.3967, "step": 21389 }, { "epoch": 0.37181247718541954, "grad_norm": 1.640787327261889, "learning_rate": 7.23672938493101e-07, "loss": 0.3873, "step": 21390 }, { "epoch": 0.3718298597229224, "grad_norm": 2.4072099291216342, "learning_rate": 7.236477624686061e-07, "loss": 0.242, "step": 21391 }, { "epoch": 0.3718472422604252, "grad_norm": 1.530273972602374, "learning_rate": 7.236225857352508e-07, "loss": 0.2304, "step": 21392 }, { "epoch": 0.371864624797928, "grad_norm": 2.680048344795951, "learning_rate": 7.235974082931145e-07, "loss": 0.3099, "step": 21393 }, { "epoch": 0.3718820073354308, "grad_norm": 1.3609526070302986, "learning_rate": 7.235722301422773e-07, "loss": 0.3266, "step": 21394 }, { "epoch": 0.37189938987293364, "grad_norm": 1.863115283472341, "learning_rate": 7.235470512828188e-07, "loss": 0.3234, "step": 21395 }, { "epoch": 0.37191677241043647, "grad_norm": 1.7953250401149736, "learning_rate": 7.235218717148187e-07, "loss": 0.2026, "step": 21396 }, { "epoch": 0.3719341549479393, "grad_norm": 2.6183558869838803, "learning_rate": 7.234966914383571e-07, "loss": 0.2207, "step": 21397 }, { "epoch": 0.37195153748544213, "grad_norm": 1.4820734835596612, "learning_rate": 7.234715104535135e-07, "loss": 0.1876, "step": 21398 }, { "epoch": 0.37196892002294496, "grad_norm": 1.163969813170297, "learning_rate": 7.23446328760368e-07, "loss": 0.4409, "step": 21399 }, { "epoch": 0.3719863025604478, "grad_norm": 1.415912430278517, "learning_rate": 7.234211463590001e-07, "loss": 0.2633, "step": 21400 }, { "epoch": 0.3720036850979506, "grad_norm": 1.7355245216671296, "learning_rate": 7.2339596324949e-07, "loss": 0.3588, "step": 21401 }, { "epoch": 0.37202106763545345, "grad_norm": 2.8769951471823627, "learning_rate": 7.233707794319172e-07, "loss": 0.4538, "step": 21402 }, { "epoch": 0.3720384501729562, "grad_norm": 1.040387233518834, "learning_rate": 7.233455949063617e-07, "loss": 0.3921, "step": 21403 }, { "epoch": 0.37205583271045906, "grad_norm": 1.469887786686961, "learning_rate": 7.233204096729032e-07, "loss": 0.355, "step": 21404 }, { "epoch": 0.3720732152479619, "grad_norm": 1.341975979448708, "learning_rate": 7.232952237316217e-07, "loss": 0.2363, "step": 21405 }, { "epoch": 0.3720905977854647, "grad_norm": 2.238668046558668, "learning_rate": 7.232700370825968e-07, "loss": 0.1308, "step": 21406 }, { "epoch": 0.37210798032296755, "grad_norm": 1.7548132892148103, "learning_rate": 7.232448497259085e-07, "loss": 0.5183, "step": 21407 }, { "epoch": 0.3721253628604704, "grad_norm": 2.795237446598427, "learning_rate": 7.232196616616364e-07, "loss": 0.3484, "step": 21408 }, { "epoch": 0.3721427453979732, "grad_norm": 1.6648735562233188, "learning_rate": 7.231944728898608e-07, "loss": 0.2791, "step": 21409 }, { "epoch": 0.37216012793547604, "grad_norm": 0.9328415452917052, "learning_rate": 7.23169283410661e-07, "loss": 0.1788, "step": 21410 }, { "epoch": 0.37217751047297887, "grad_norm": 3.0850070540173973, "learning_rate": 7.231440932241173e-07, "loss": 0.2979, "step": 21411 }, { "epoch": 0.3721948930104817, "grad_norm": 0.9531941151731929, "learning_rate": 7.231189023303093e-07, "loss": 0.313, "step": 21412 }, { "epoch": 0.3722122755479845, "grad_norm": 2.397696985120346, "learning_rate": 7.230937107293167e-07, "loss": 0.4329, "step": 21413 }, { "epoch": 0.3722296580854873, "grad_norm": 1.7007318541943863, "learning_rate": 7.230685184212195e-07, "loss": 0.234, "step": 21414 }, { "epoch": 0.37224704062299013, "grad_norm": 1.3688790066540908, "learning_rate": 7.230433254060977e-07, "loss": 0.3123, "step": 21415 }, { "epoch": 0.37226442316049296, "grad_norm": 1.2370448974571941, "learning_rate": 7.230181316840311e-07, "loss": 0.2621, "step": 21416 }, { "epoch": 0.3722818056979958, "grad_norm": 2.6656381883673297, "learning_rate": 7.229929372550993e-07, "loss": 0.4894, "step": 21417 }, { "epoch": 0.3722991882354986, "grad_norm": 1.3540236627628066, "learning_rate": 7.229677421193824e-07, "loss": 0.2555, "step": 21418 }, { "epoch": 0.37231657077300145, "grad_norm": 1.9080213394345378, "learning_rate": 7.229425462769603e-07, "loss": 0.5193, "step": 21419 }, { "epoch": 0.3723339533105043, "grad_norm": 3.2227280214032543, "learning_rate": 7.229173497279125e-07, "loss": 0.5333, "step": 21420 }, { "epoch": 0.3723513358480071, "grad_norm": 3.262357221564392, "learning_rate": 7.228921524723192e-07, "loss": 0.4621, "step": 21421 }, { "epoch": 0.3723687183855099, "grad_norm": 2.1164351245678374, "learning_rate": 7.228669545102602e-07, "loss": 0.6325, "step": 21422 }, { "epoch": 0.3723861009230127, "grad_norm": 1.3451621523549386, "learning_rate": 7.228417558418154e-07, "loss": 0.2964, "step": 21423 }, { "epoch": 0.37240348346051555, "grad_norm": 1.1682717991331935, "learning_rate": 7.228165564670645e-07, "loss": 0.228, "step": 21424 }, { "epoch": 0.3724208659980184, "grad_norm": 1.5170675336752992, "learning_rate": 7.227913563860873e-07, "loss": 0.345, "step": 21425 }, { "epoch": 0.3724382485355212, "grad_norm": 2.5528032535097047, "learning_rate": 7.227661555989641e-07, "loss": 0.3469, "step": 21426 }, { "epoch": 0.37245563107302404, "grad_norm": 1.6555820097118301, "learning_rate": 7.227409541057745e-07, "loss": 0.1389, "step": 21427 }, { "epoch": 0.37247301361052687, "grad_norm": 2.110090516770214, "learning_rate": 7.227157519065983e-07, "loss": 0.6534, "step": 21428 }, { "epoch": 0.3724903961480297, "grad_norm": 1.7839856771800735, "learning_rate": 7.226905490015153e-07, "loss": 0.202, "step": 21429 }, { "epoch": 0.37250777868553253, "grad_norm": 2.076419886779278, "learning_rate": 7.22665345390606e-07, "loss": 0.3667, "step": 21430 }, { "epoch": 0.37252516122303536, "grad_norm": 1.1828701977644596, "learning_rate": 7.226401410739496e-07, "loss": 0.2624, "step": 21431 }, { "epoch": 0.37254254376053814, "grad_norm": 2.439458902389637, "learning_rate": 7.226149360516262e-07, "loss": 0.3272, "step": 21432 }, { "epoch": 0.37255992629804097, "grad_norm": 1.5302191451587333, "learning_rate": 7.225897303237157e-07, "loss": 0.2, "step": 21433 }, { "epoch": 0.3725773088355438, "grad_norm": 1.6757153176944912, "learning_rate": 7.225645238902979e-07, "loss": 0.3857, "step": 21434 }, { "epoch": 0.3725946913730466, "grad_norm": 2.5528804568736314, "learning_rate": 7.225393167514531e-07, "loss": 0.3695, "step": 21435 }, { "epoch": 0.37261207391054946, "grad_norm": 1.7062795018825514, "learning_rate": 7.225141089072607e-07, "loss": 0.368, "step": 21436 }, { "epoch": 0.3726294564480523, "grad_norm": 1.1978354899476011, "learning_rate": 7.224889003578007e-07, "loss": 0.1965, "step": 21437 }, { "epoch": 0.3726468389855551, "grad_norm": 1.2758360374470852, "learning_rate": 7.224636911031534e-07, "loss": 0.2231, "step": 21438 }, { "epoch": 0.37266422152305795, "grad_norm": 1.6169606902743292, "learning_rate": 7.224384811433981e-07, "loss": 0.3325, "step": 21439 }, { "epoch": 0.3726816040605608, "grad_norm": 2.288876941520542, "learning_rate": 7.224132704786151e-07, "loss": 0.2845, "step": 21440 }, { "epoch": 0.3726989865980636, "grad_norm": 1.3828802100094053, "learning_rate": 7.223880591088842e-07, "loss": 0.3484, "step": 21441 }, { "epoch": 0.3727163691355664, "grad_norm": 1.4919533731642682, "learning_rate": 7.223628470342853e-07, "loss": 0.3812, "step": 21442 }, { "epoch": 0.3727337516730692, "grad_norm": 1.5066173793566906, "learning_rate": 7.223376342548984e-07, "loss": 0.1945, "step": 21443 }, { "epoch": 0.37275113421057204, "grad_norm": 1.4663332122615735, "learning_rate": 7.223124207708034e-07, "loss": 0.2402, "step": 21444 }, { "epoch": 0.3727685167480749, "grad_norm": 1.899088266531911, "learning_rate": 7.222872065820798e-07, "loss": 0.3751, "step": 21445 }, { "epoch": 0.3727858992855777, "grad_norm": 1.6025830841550248, "learning_rate": 7.222619916888081e-07, "loss": 0.3325, "step": 21446 }, { "epoch": 0.37280328182308053, "grad_norm": 1.5610846056634762, "learning_rate": 7.222367760910679e-07, "loss": 0.2142, "step": 21447 }, { "epoch": 0.37282066436058336, "grad_norm": 1.3015796573631313, "learning_rate": 7.222115597889393e-07, "loss": 0.2345, "step": 21448 }, { "epoch": 0.3728380468980862, "grad_norm": 1.612109525395713, "learning_rate": 7.221863427825021e-07, "loss": 0.27, "step": 21449 }, { "epoch": 0.372855429435589, "grad_norm": 1.3115431190701876, "learning_rate": 7.221611250718363e-07, "loss": 0.2259, "step": 21450 }, { "epoch": 0.37287281197309186, "grad_norm": 1.2981353189200342, "learning_rate": 7.221359066570217e-07, "loss": 0.2042, "step": 21451 }, { "epoch": 0.37289019451059463, "grad_norm": 2.1343226168433125, "learning_rate": 7.221106875381384e-07, "loss": 0.3954, "step": 21452 }, { "epoch": 0.37290757704809746, "grad_norm": 1.8545018387529237, "learning_rate": 7.220854677152661e-07, "loss": 0.259, "step": 21453 }, { "epoch": 0.3729249595856003, "grad_norm": 1.7850395662381278, "learning_rate": 7.22060247188485e-07, "loss": 0.3127, "step": 21454 }, { "epoch": 0.3729423421231031, "grad_norm": 1.2629675970562524, "learning_rate": 7.220350259578749e-07, "loss": 0.2548, "step": 21455 }, { "epoch": 0.37295972466060595, "grad_norm": 2.7035183447394515, "learning_rate": 7.220098040235159e-07, "loss": 0.2982, "step": 21456 }, { "epoch": 0.3729771071981088, "grad_norm": 2.6182418756112438, "learning_rate": 7.219845813854877e-07, "loss": 0.3196, "step": 21457 }, { "epoch": 0.3729944897356116, "grad_norm": 1.063339194674508, "learning_rate": 7.219593580438702e-07, "loss": 0.2448, "step": 21458 }, { "epoch": 0.37301187227311444, "grad_norm": 3.153557423027736, "learning_rate": 7.219341339987436e-07, "loss": 0.3115, "step": 21459 }, { "epoch": 0.3730292548106173, "grad_norm": 1.4201760949482813, "learning_rate": 7.219089092501877e-07, "loss": 0.1861, "step": 21460 }, { "epoch": 0.3730466373481201, "grad_norm": 1.8800771587109668, "learning_rate": 7.218836837982827e-07, "loss": 0.2786, "step": 21461 }, { "epoch": 0.3730640198856229, "grad_norm": 1.2909568347363585, "learning_rate": 7.218584576431082e-07, "loss": 0.3116, "step": 21462 }, { "epoch": 0.3730814024231257, "grad_norm": 1.1529191981684528, "learning_rate": 7.218332307847443e-07, "loss": 0.3044, "step": 21463 }, { "epoch": 0.37309878496062854, "grad_norm": 1.3669457579019704, "learning_rate": 7.218080032232709e-07, "loss": 0.2832, "step": 21464 }, { "epoch": 0.37311616749813137, "grad_norm": 1.7703434088267052, "learning_rate": 7.217827749587681e-07, "loss": 0.3055, "step": 21465 }, { "epoch": 0.3731335500356342, "grad_norm": 2.6717350125297887, "learning_rate": 7.217575459913159e-07, "loss": 0.29, "step": 21466 }, { "epoch": 0.37315093257313703, "grad_norm": 2.610180494178929, "learning_rate": 7.21732316320994e-07, "loss": 0.4064, "step": 21467 }, { "epoch": 0.37316831511063986, "grad_norm": 2.054137551814575, "learning_rate": 7.217070859478825e-07, "loss": 0.4756, "step": 21468 }, { "epoch": 0.3731856976481427, "grad_norm": 2.361484195445931, "learning_rate": 7.216818548720614e-07, "loss": 0.3037, "step": 21469 }, { "epoch": 0.3732030801856455, "grad_norm": 1.820568089420986, "learning_rate": 7.216566230936107e-07, "loss": 0.4681, "step": 21470 }, { "epoch": 0.37322046272314835, "grad_norm": 2.4810289437794517, "learning_rate": 7.216313906126103e-07, "loss": 0.3226, "step": 21471 }, { "epoch": 0.3732378452606511, "grad_norm": 1.6852900514218765, "learning_rate": 7.216061574291401e-07, "loss": 0.2718, "step": 21472 }, { "epoch": 0.37325522779815395, "grad_norm": 2.053658345534576, "learning_rate": 7.215809235432804e-07, "loss": 0.2164, "step": 21473 }, { "epoch": 0.3732726103356568, "grad_norm": 1.8554723911148994, "learning_rate": 7.215556889551106e-07, "loss": 0.3877, "step": 21474 }, { "epoch": 0.3732899928731596, "grad_norm": 1.3549620154400224, "learning_rate": 7.215304536647114e-07, "loss": 0.3537, "step": 21475 }, { "epoch": 0.37330737541066245, "grad_norm": 1.2396318013698024, "learning_rate": 7.215052176721623e-07, "loss": 0.3697, "step": 21476 }, { "epoch": 0.3733247579481653, "grad_norm": 1.3564797371952493, "learning_rate": 7.214799809775434e-07, "loss": 0.2648, "step": 21477 }, { "epoch": 0.3733421404856681, "grad_norm": 1.504477581047614, "learning_rate": 7.214547435809346e-07, "loss": 0.3023, "step": 21478 }, { "epoch": 0.37335952302317094, "grad_norm": 1.5933045366651108, "learning_rate": 7.214295054824163e-07, "loss": 0.3859, "step": 21479 }, { "epoch": 0.37337690556067377, "grad_norm": 1.8345680326626403, "learning_rate": 7.214042666820679e-07, "loss": 0.3553, "step": 21480 }, { "epoch": 0.3733942880981766, "grad_norm": 1.837805126431306, "learning_rate": 7.213790271799698e-07, "loss": 0.2404, "step": 21481 }, { "epoch": 0.37341167063567937, "grad_norm": 1.0680076163158057, "learning_rate": 7.213537869762018e-07, "loss": 0.2756, "step": 21482 }, { "epoch": 0.3734290531731822, "grad_norm": 2.292914490868411, "learning_rate": 7.21328546070844e-07, "loss": 0.2595, "step": 21483 }, { "epoch": 0.37344643571068503, "grad_norm": 1.957511214698267, "learning_rate": 7.213033044639764e-07, "loss": 0.26, "step": 21484 }, { "epoch": 0.37346381824818786, "grad_norm": 1.464044579689926, "learning_rate": 7.212780621556789e-07, "loss": 0.2614, "step": 21485 }, { "epoch": 0.3734812007856907, "grad_norm": 1.3251353628900602, "learning_rate": 7.212528191460318e-07, "loss": 0.2459, "step": 21486 }, { "epoch": 0.3734985833231935, "grad_norm": 1.5829322233607745, "learning_rate": 7.212275754351147e-07, "loss": 0.2074, "step": 21487 }, { "epoch": 0.37351596586069635, "grad_norm": 3.2032399834752656, "learning_rate": 7.212023310230079e-07, "loss": 0.2873, "step": 21488 }, { "epoch": 0.3735333483981992, "grad_norm": 1.9902088393447726, "learning_rate": 7.211770859097912e-07, "loss": 0.1537, "step": 21489 }, { "epoch": 0.373550730935702, "grad_norm": 2.2813235557743807, "learning_rate": 7.211518400955449e-07, "loss": 0.2333, "step": 21490 }, { "epoch": 0.37356811347320484, "grad_norm": 1.2065284090842137, "learning_rate": 7.211265935803487e-07, "loss": 0.3209, "step": 21491 }, { "epoch": 0.3735854960107076, "grad_norm": 2.9427248319134747, "learning_rate": 7.21101346364283e-07, "loss": 0.3399, "step": 21492 }, { "epoch": 0.37360287854821045, "grad_norm": 1.993491443945503, "learning_rate": 7.210760984474273e-07, "loss": 0.2283, "step": 21493 }, { "epoch": 0.3736202610857133, "grad_norm": 1.7295295261265993, "learning_rate": 7.210508498298622e-07, "loss": 0.2409, "step": 21494 }, { "epoch": 0.3736376436232161, "grad_norm": 2.3757844638998615, "learning_rate": 7.210256005116673e-07, "loss": 0.3862, "step": 21495 }, { "epoch": 0.37365502616071894, "grad_norm": 1.7670989993384434, "learning_rate": 7.210003504929228e-07, "loss": 0.3174, "step": 21496 }, { "epoch": 0.37367240869822177, "grad_norm": 1.579198637920712, "learning_rate": 7.209750997737086e-07, "loss": 0.3264, "step": 21497 }, { "epoch": 0.3736897912357246, "grad_norm": 2.5008712576704557, "learning_rate": 7.209498483541051e-07, "loss": 0.3118, "step": 21498 }, { "epoch": 0.37370717377322743, "grad_norm": 1.6107556976447577, "learning_rate": 7.209245962341919e-07, "loss": 0.4404, "step": 21499 }, { "epoch": 0.37372455631073026, "grad_norm": 1.6234934253354847, "learning_rate": 7.208993434140492e-07, "loss": 0.3548, "step": 21500 }, { "epoch": 0.3737419388482331, "grad_norm": 1.4922902730398533, "learning_rate": 7.208740898937572e-07, "loss": 0.4853, "step": 21501 }, { "epoch": 0.37375932138573587, "grad_norm": 1.766830090040511, "learning_rate": 7.208488356733957e-07, "loss": 0.3778, "step": 21502 }, { "epoch": 0.3737767039232387, "grad_norm": 1.4529787823371663, "learning_rate": 7.208235807530447e-07, "loss": 0.4709, "step": 21503 }, { "epoch": 0.3737940864607415, "grad_norm": 1.559723816998759, "learning_rate": 7.207983251327847e-07, "loss": 0.2425, "step": 21504 }, { "epoch": 0.37381146899824436, "grad_norm": 1.4436666933644848, "learning_rate": 7.207730688126952e-07, "loss": 0.2872, "step": 21505 }, { "epoch": 0.3738288515357472, "grad_norm": 1.4719539683348999, "learning_rate": 7.207478117928566e-07, "loss": 0.2809, "step": 21506 }, { "epoch": 0.37384623407325, "grad_norm": 2.885143617518588, "learning_rate": 7.207225540733488e-07, "loss": 0.3128, "step": 21507 }, { "epoch": 0.37386361661075285, "grad_norm": 1.4288301357127842, "learning_rate": 7.206972956542518e-07, "loss": 0.366, "step": 21508 }, { "epoch": 0.3738809991482557, "grad_norm": 1.6218632158343405, "learning_rate": 7.206720365356459e-07, "loss": 0.3791, "step": 21509 }, { "epoch": 0.3738983816857585, "grad_norm": 1.998490599633021, "learning_rate": 7.20646776717611e-07, "loss": 0.2973, "step": 21510 }, { "epoch": 0.37391576422326134, "grad_norm": 1.8135137248283622, "learning_rate": 7.20621516200227e-07, "loss": 0.5285, "step": 21511 }, { "epoch": 0.3739331467607641, "grad_norm": 1.4536505481265671, "learning_rate": 7.205962549835743e-07, "loss": 0.35, "step": 21512 }, { "epoch": 0.37395052929826694, "grad_norm": 1.9150011587587137, "learning_rate": 7.205709930677327e-07, "loss": 0.2102, "step": 21513 }, { "epoch": 0.3739679118357698, "grad_norm": 1.6628212826317177, "learning_rate": 7.205457304527825e-07, "loss": 0.2526, "step": 21514 }, { "epoch": 0.3739852943732726, "grad_norm": 1.6199406653340982, "learning_rate": 7.205204671388037e-07, "loss": 0.2621, "step": 21515 }, { "epoch": 0.37400267691077543, "grad_norm": 1.3894586929616541, "learning_rate": 7.204952031258762e-07, "loss": 0.2308, "step": 21516 }, { "epoch": 0.37402005944827826, "grad_norm": 2.0353426762833373, "learning_rate": 7.204699384140802e-07, "loss": 0.2769, "step": 21517 }, { "epoch": 0.3740374419857811, "grad_norm": 1.2616931361329278, "learning_rate": 7.204446730034957e-07, "loss": 0.1501, "step": 21518 }, { "epoch": 0.3740548245232839, "grad_norm": 2.1011560805533342, "learning_rate": 7.204194068942028e-07, "loss": 0.2839, "step": 21519 }, { "epoch": 0.37407220706078675, "grad_norm": 0.9612504589935292, "learning_rate": 7.203941400862819e-07, "loss": 0.501, "step": 21520 }, { "epoch": 0.3740895895982896, "grad_norm": 3.4685651321748567, "learning_rate": 7.203688725798128e-07, "loss": 0.3952, "step": 21521 }, { "epoch": 0.37410697213579236, "grad_norm": 1.556102052610059, "learning_rate": 7.203436043748753e-07, "loss": 0.2979, "step": 21522 }, { "epoch": 0.3741243546732952, "grad_norm": 1.3366932632946624, "learning_rate": 7.2031833547155e-07, "loss": 0.2619, "step": 21523 }, { "epoch": 0.374141737210798, "grad_norm": 2.4395678112372488, "learning_rate": 7.202930658699167e-07, "loss": 0.2945, "step": 21524 }, { "epoch": 0.37415911974830085, "grad_norm": 2.7975934556445807, "learning_rate": 7.202677955700557e-07, "loss": 0.5023, "step": 21525 }, { "epoch": 0.3741765022858037, "grad_norm": 1.3388890180405042, "learning_rate": 7.20242524572047e-07, "loss": 0.2766, "step": 21526 }, { "epoch": 0.3741938848233065, "grad_norm": 5.374277704124614, "learning_rate": 7.202172528759705e-07, "loss": 0.4806, "step": 21527 }, { "epoch": 0.37421126736080934, "grad_norm": 1.602935845291659, "learning_rate": 7.201919804819064e-07, "loss": 0.2691, "step": 21528 }, { "epoch": 0.37422864989831217, "grad_norm": 1.509703293246514, "learning_rate": 7.201667073899352e-07, "loss": 0.3254, "step": 21529 }, { "epoch": 0.374246032435815, "grad_norm": 2.1240741035022457, "learning_rate": 7.201414336001363e-07, "loss": 0.3467, "step": 21530 }, { "epoch": 0.37426341497331783, "grad_norm": 1.343698664322207, "learning_rate": 7.201161591125904e-07, "loss": 0.3686, "step": 21531 }, { "epoch": 0.3742807975108206, "grad_norm": 1.6753624323910745, "learning_rate": 7.200908839273774e-07, "loss": 0.1668, "step": 21532 }, { "epoch": 0.37429818004832344, "grad_norm": 2.1755808379339645, "learning_rate": 7.200656080445773e-07, "loss": 0.3884, "step": 21533 }, { "epoch": 0.37431556258582627, "grad_norm": 1.5587582975504606, "learning_rate": 7.200403314642704e-07, "loss": 0.2744, "step": 21534 }, { "epoch": 0.3743329451233291, "grad_norm": 1.3594476108714928, "learning_rate": 7.200150541865366e-07, "loss": 0.325, "step": 21535 }, { "epoch": 0.3743503276608319, "grad_norm": 2.224039404588474, "learning_rate": 7.199897762114563e-07, "loss": 0.2958, "step": 21536 }, { "epoch": 0.37436771019833476, "grad_norm": 1.9233987644169899, "learning_rate": 7.199644975391094e-07, "loss": 0.2449, "step": 21537 }, { "epoch": 0.3743850927358376, "grad_norm": 3.070046261724854, "learning_rate": 7.19939218169576e-07, "loss": 0.3192, "step": 21538 }, { "epoch": 0.3744024752733404, "grad_norm": 1.089444497374934, "learning_rate": 7.199139381029365e-07, "loss": 0.4336, "step": 21539 }, { "epoch": 0.37441985781084325, "grad_norm": 1.708053714171903, "learning_rate": 7.198886573392706e-07, "loss": 0.2386, "step": 21540 }, { "epoch": 0.3744372403483461, "grad_norm": 3.6954185153849033, "learning_rate": 7.198633758786588e-07, "loss": 0.2935, "step": 21541 }, { "epoch": 0.37445462288584885, "grad_norm": 1.2929042666058792, "learning_rate": 7.198380937211812e-07, "loss": 0.4079, "step": 21542 }, { "epoch": 0.3744720054233517, "grad_norm": 1.9355251369701736, "learning_rate": 7.198128108669175e-07, "loss": 0.4098, "step": 21543 }, { "epoch": 0.3744893879608545, "grad_norm": 1.6684090781134389, "learning_rate": 7.197875273159485e-07, "loss": 0.3488, "step": 21544 }, { "epoch": 0.37450677049835734, "grad_norm": 2.2024358242259425, "learning_rate": 7.197622430683539e-07, "loss": 0.2313, "step": 21545 }, { "epoch": 0.3745241530358602, "grad_norm": 1.564753278700655, "learning_rate": 7.197369581242139e-07, "loss": 0.3081, "step": 21546 }, { "epoch": 0.374541535573363, "grad_norm": 1.7198193147364993, "learning_rate": 7.197116724836087e-07, "loss": 0.3809, "step": 21547 }, { "epoch": 0.37455891811086583, "grad_norm": 1.6010770451833714, "learning_rate": 7.196863861466185e-07, "loss": 0.4103, "step": 21548 }, { "epoch": 0.37457630064836867, "grad_norm": 1.1848697680501472, "learning_rate": 7.196610991133232e-07, "loss": 0.202, "step": 21549 }, { "epoch": 0.3745936831858715, "grad_norm": 1.6813965602362007, "learning_rate": 7.196358113838033e-07, "loss": 0.23, "step": 21550 }, { "epoch": 0.3746110657233743, "grad_norm": 3.361243071553059, "learning_rate": 7.196105229581386e-07, "loss": 0.6408, "step": 21551 }, { "epoch": 0.3746284482608771, "grad_norm": 1.6983153559145996, "learning_rate": 7.195852338364095e-07, "loss": 0.2618, "step": 21552 }, { "epoch": 0.37464583079837993, "grad_norm": 3.106883014856877, "learning_rate": 7.195599440186962e-07, "loss": 0.3823, "step": 21553 }, { "epoch": 0.37466321333588276, "grad_norm": 1.4223913035919216, "learning_rate": 7.195346535050786e-07, "loss": 0.3432, "step": 21554 }, { "epoch": 0.3746805958733856, "grad_norm": 1.4583698002800616, "learning_rate": 7.19509362295637e-07, "loss": 0.2775, "step": 21555 }, { "epoch": 0.3746979784108884, "grad_norm": 1.6440408807514524, "learning_rate": 7.194840703904516e-07, "loss": 0.1617, "step": 21556 }, { "epoch": 0.37471536094839125, "grad_norm": 1.243107032484278, "learning_rate": 7.194587777896024e-07, "loss": 0.2857, "step": 21557 }, { "epoch": 0.3747327434858941, "grad_norm": 1.1083075985105588, "learning_rate": 7.194334844931699e-07, "loss": 0.4487, "step": 21558 }, { "epoch": 0.3747501260233969, "grad_norm": 1.2527262401019885, "learning_rate": 7.194081905012338e-07, "loss": 0.4091, "step": 21559 }, { "epoch": 0.37476750856089974, "grad_norm": 1.3704262748079499, "learning_rate": 7.193828958138747e-07, "loss": 0.301, "step": 21560 }, { "epoch": 0.3747848910984025, "grad_norm": 1.8460869395218578, "learning_rate": 7.193576004311726e-07, "loss": 0.4563, "step": 21561 }, { "epoch": 0.37480227363590535, "grad_norm": 2.6466851767487545, "learning_rate": 7.193323043532076e-07, "loss": 0.3947, "step": 21562 }, { "epoch": 0.3748196561734082, "grad_norm": 2.536510170617122, "learning_rate": 7.193070075800599e-07, "loss": 0.4282, "step": 21563 }, { "epoch": 0.374837038710911, "grad_norm": 2.390275100759926, "learning_rate": 7.192817101118099e-07, "loss": 0.2526, "step": 21564 }, { "epoch": 0.37485442124841384, "grad_norm": 2.2874147593730334, "learning_rate": 7.192564119485375e-07, "loss": 0.4934, "step": 21565 }, { "epoch": 0.37487180378591667, "grad_norm": 1.6867061179424963, "learning_rate": 7.192311130903231e-07, "loss": 0.289, "step": 21566 }, { "epoch": 0.3748891863234195, "grad_norm": 1.5503766717499565, "learning_rate": 7.192058135372465e-07, "loss": 0.439, "step": 21567 }, { "epoch": 0.37490656886092233, "grad_norm": 1.1263786864460994, "learning_rate": 7.191805132893885e-07, "loss": 0.2557, "step": 21568 }, { "epoch": 0.37492395139842516, "grad_norm": 1.981121506325166, "learning_rate": 7.191552123468286e-07, "loss": 0.3154, "step": 21569 }, { "epoch": 0.374941333935928, "grad_norm": 1.5621162528261505, "learning_rate": 7.191299107096476e-07, "loss": 0.2194, "step": 21570 }, { "epoch": 0.37495871647343076, "grad_norm": 1.3116172932036236, "learning_rate": 7.191046083779254e-07, "loss": 0.2582, "step": 21571 }, { "epoch": 0.3749760990109336, "grad_norm": 1.2485102628627196, "learning_rate": 7.190793053517423e-07, "loss": 0.1766, "step": 21572 }, { "epoch": 0.3749934815484364, "grad_norm": 1.3526667653577669, "learning_rate": 7.190540016311783e-07, "loss": 0.4001, "step": 21573 }, { "epoch": 0.37501086408593926, "grad_norm": 1.5039822526774826, "learning_rate": 7.190286972163137e-07, "loss": 0.2339, "step": 21574 }, { "epoch": 0.3750282466234421, "grad_norm": 1.2885221757473755, "learning_rate": 7.190033921072289e-07, "loss": 0.364, "step": 21575 }, { "epoch": 0.3750456291609449, "grad_norm": 1.9879965245058857, "learning_rate": 7.18978086304004e-07, "loss": 0.1921, "step": 21576 }, { "epoch": 0.37506301169844775, "grad_norm": 1.8969290486713704, "learning_rate": 7.189527798067191e-07, "loss": 0.3329, "step": 21577 }, { "epoch": 0.3750803942359506, "grad_norm": 1.0472258815911795, "learning_rate": 7.189274726154544e-07, "loss": 0.1775, "step": 21578 }, { "epoch": 0.3750977767734534, "grad_norm": 1.3181815071247613, "learning_rate": 7.189021647302902e-07, "loss": 0.2403, "step": 21579 }, { "epoch": 0.37511515931095624, "grad_norm": 2.129226884380437, "learning_rate": 7.188768561513067e-07, "loss": 0.3879, "step": 21580 }, { "epoch": 0.375132541848459, "grad_norm": 1.3366318231597187, "learning_rate": 7.188515468785841e-07, "loss": 0.4238, "step": 21581 }, { "epoch": 0.37514992438596184, "grad_norm": 1.580499409423799, "learning_rate": 7.188262369122026e-07, "loss": 0.3582, "step": 21582 }, { "epoch": 0.37516730692346467, "grad_norm": 1.6269957006469404, "learning_rate": 7.188009262522426e-07, "loss": 0.2112, "step": 21583 }, { "epoch": 0.3751846894609675, "grad_norm": 2.341556175517927, "learning_rate": 7.187756148987839e-07, "loss": 0.3584, "step": 21584 }, { "epoch": 0.37520207199847033, "grad_norm": 1.7661004363877142, "learning_rate": 7.187503028519072e-07, "loss": 0.391, "step": 21585 }, { "epoch": 0.37521945453597316, "grad_norm": 2.6780523456502743, "learning_rate": 7.187249901116925e-07, "loss": 0.3423, "step": 21586 }, { "epoch": 0.375236837073476, "grad_norm": 1.7182650201127694, "learning_rate": 7.186996766782201e-07, "loss": 0.3505, "step": 21587 }, { "epoch": 0.3752542196109788, "grad_norm": 1.708620707422303, "learning_rate": 7.1867436255157e-07, "loss": 0.2634, "step": 21588 }, { "epoch": 0.37527160214848165, "grad_norm": 2.248367003203141, "learning_rate": 7.186490477318228e-07, "loss": 0.1917, "step": 21589 }, { "epoch": 0.3752889846859845, "grad_norm": 2.586698393827919, "learning_rate": 7.186237322190585e-07, "loss": 0.2852, "step": 21590 }, { "epoch": 0.37530636722348726, "grad_norm": 3.0359027814532924, "learning_rate": 7.185984160133575e-07, "loss": 0.3729, "step": 21591 }, { "epoch": 0.3753237497609901, "grad_norm": 1.8245238865609015, "learning_rate": 7.185730991147998e-07, "loss": 0.2834, "step": 21592 }, { "epoch": 0.3753411322984929, "grad_norm": 1.7532165865683615, "learning_rate": 7.185477815234658e-07, "loss": 0.2431, "step": 21593 }, { "epoch": 0.37535851483599575, "grad_norm": 1.4831161875978303, "learning_rate": 7.185224632394356e-07, "loss": 0.2409, "step": 21594 }, { "epoch": 0.3753758973734986, "grad_norm": 1.9179020720107212, "learning_rate": 7.184971442627899e-07, "loss": 0.3261, "step": 21595 }, { "epoch": 0.3753932799110014, "grad_norm": 1.3558052556373932, "learning_rate": 7.184718245936084e-07, "loss": 0.2516, "step": 21596 }, { "epoch": 0.37541066244850424, "grad_norm": 3.834453759282364, "learning_rate": 7.184465042319716e-07, "loss": 0.3323, "step": 21597 }, { "epoch": 0.37542804498600707, "grad_norm": 1.7915340479077317, "learning_rate": 7.184211831779597e-07, "loss": 0.3295, "step": 21598 }, { "epoch": 0.3754454275235099, "grad_norm": 1.309392017096206, "learning_rate": 7.18395861431653e-07, "loss": 0.4857, "step": 21599 }, { "epoch": 0.37546281006101273, "grad_norm": 1.636311537239303, "learning_rate": 7.183705389931319e-07, "loss": 0.277, "step": 21600 }, { "epoch": 0.3754801925985155, "grad_norm": 1.3503942406160208, "learning_rate": 7.183452158624763e-07, "loss": 0.31, "step": 21601 }, { "epoch": 0.37549757513601834, "grad_norm": 1.512261186438229, "learning_rate": 7.183198920397667e-07, "loss": 0.3052, "step": 21602 }, { "epoch": 0.37551495767352117, "grad_norm": 1.806647025416616, "learning_rate": 7.182945675250833e-07, "loss": 0.2438, "step": 21603 }, { "epoch": 0.375532340211024, "grad_norm": 1.866618877708946, "learning_rate": 7.182692423185065e-07, "loss": 0.2923, "step": 21604 }, { "epoch": 0.3755497227485268, "grad_norm": 1.395380930153076, "learning_rate": 7.182439164201165e-07, "loss": 0.2484, "step": 21605 }, { "epoch": 0.37556710528602966, "grad_norm": 1.6088253269482082, "learning_rate": 7.182185898299936e-07, "loss": 0.3081, "step": 21606 }, { "epoch": 0.3755844878235325, "grad_norm": 2.434925456613801, "learning_rate": 7.181932625482177e-07, "loss": 0.1907, "step": 21607 }, { "epoch": 0.3756018703610353, "grad_norm": 3.212416214196124, "learning_rate": 7.181679345748697e-07, "loss": 0.4385, "step": 21608 }, { "epoch": 0.37561925289853815, "grad_norm": 1.483437331783999, "learning_rate": 7.181426059100292e-07, "loss": 0.3216, "step": 21609 }, { "epoch": 0.375636635436041, "grad_norm": 1.6958107576074908, "learning_rate": 7.181172765537772e-07, "loss": 0.2875, "step": 21610 }, { "epoch": 0.37565401797354375, "grad_norm": 1.0696827509346993, "learning_rate": 7.180919465061935e-07, "loss": 0.4569, "step": 21611 }, { "epoch": 0.3756714005110466, "grad_norm": 2.2473849252174096, "learning_rate": 7.180666157673585e-07, "loss": 0.2274, "step": 21612 }, { "epoch": 0.3756887830485494, "grad_norm": 1.5553659571229195, "learning_rate": 7.180412843373525e-07, "loss": 0.1725, "step": 21613 }, { "epoch": 0.37570616558605224, "grad_norm": 2.725021534187965, "learning_rate": 7.180159522162558e-07, "loss": 0.2482, "step": 21614 }, { "epoch": 0.3757235481235551, "grad_norm": 1.4984317952573696, "learning_rate": 7.179906194041488e-07, "loss": 0.1985, "step": 21615 }, { "epoch": 0.3757409306610579, "grad_norm": 1.2593202967633452, "learning_rate": 7.179652859011115e-07, "loss": 0.3815, "step": 21616 }, { "epoch": 0.37575831319856073, "grad_norm": 1.2731374442115306, "learning_rate": 7.179399517072244e-07, "loss": 0.1991, "step": 21617 }, { "epoch": 0.37577569573606356, "grad_norm": 1.4669387429288623, "learning_rate": 7.179146168225678e-07, "loss": 0.2775, "step": 21618 }, { "epoch": 0.3757930782735664, "grad_norm": 2.380195567008891, "learning_rate": 7.178892812472219e-07, "loss": 0.3994, "step": 21619 }, { "epoch": 0.3758104608110692, "grad_norm": 1.3025431270400254, "learning_rate": 7.178639449812671e-07, "loss": 0.256, "step": 21620 }, { "epoch": 0.375827843348572, "grad_norm": 4.61360917157277, "learning_rate": 7.178386080247838e-07, "loss": 0.3899, "step": 21621 }, { "epoch": 0.37584522588607483, "grad_norm": 2.1146398863240097, "learning_rate": 7.178132703778521e-07, "loss": 0.278, "step": 21622 }, { "epoch": 0.37586260842357766, "grad_norm": 2.4377338641752924, "learning_rate": 7.177879320405523e-07, "loss": 0.3839, "step": 21623 }, { "epoch": 0.3758799909610805, "grad_norm": 1.7910421225809674, "learning_rate": 7.17762593012965e-07, "loss": 0.3618, "step": 21624 }, { "epoch": 0.3758973734985833, "grad_norm": 1.6404693650906619, "learning_rate": 7.177372532951701e-07, "loss": 0.2632, "step": 21625 }, { "epoch": 0.37591475603608615, "grad_norm": 2.0573655949733913, "learning_rate": 7.177119128872482e-07, "loss": 0.3213, "step": 21626 }, { "epoch": 0.375932138573589, "grad_norm": 2.2673171339117664, "learning_rate": 7.176865717892796e-07, "loss": 0.2839, "step": 21627 }, { "epoch": 0.3759495211110918, "grad_norm": 2.1077909202269143, "learning_rate": 7.176612300013445e-07, "loss": 0.2031, "step": 21628 }, { "epoch": 0.37596690364859464, "grad_norm": 1.5278741504452182, "learning_rate": 7.176358875235232e-07, "loss": 0.2039, "step": 21629 }, { "epoch": 0.37598428618609747, "grad_norm": 2.118903676716614, "learning_rate": 7.176105443558963e-07, "loss": 0.4249, "step": 21630 }, { "epoch": 0.37600166872360025, "grad_norm": 1.0272768711263547, "learning_rate": 7.175852004985439e-07, "loss": 0.3242, "step": 21631 }, { "epoch": 0.3760190512611031, "grad_norm": 1.353654545234001, "learning_rate": 7.175598559515463e-07, "loss": 0.2237, "step": 21632 }, { "epoch": 0.3760364337986059, "grad_norm": 1.5813493796768576, "learning_rate": 7.175345107149838e-07, "loss": 0.2099, "step": 21633 }, { "epoch": 0.37605381633610874, "grad_norm": 3.1098864665490003, "learning_rate": 7.17509164788937e-07, "loss": 0.3911, "step": 21634 }, { "epoch": 0.37607119887361157, "grad_norm": 1.9886820534930563, "learning_rate": 7.17483818173486e-07, "loss": 0.366, "step": 21635 }, { "epoch": 0.3760885814111144, "grad_norm": 2.0299230312235332, "learning_rate": 7.174584708687111e-07, "loss": 0.2351, "step": 21636 }, { "epoch": 0.37610596394861723, "grad_norm": 2.057708397550516, "learning_rate": 7.17433122874693e-07, "loss": 0.3091, "step": 21637 }, { "epoch": 0.37612334648612006, "grad_norm": 1.6522960301632412, "learning_rate": 7.174077741915115e-07, "loss": 0.3881, "step": 21638 }, { "epoch": 0.3761407290236229, "grad_norm": 1.522643852380421, "learning_rate": 7.173824248192474e-07, "loss": 0.4439, "step": 21639 }, { "epoch": 0.3761581115611257, "grad_norm": 4.131403888591163, "learning_rate": 7.173570747579808e-07, "loss": 0.327, "step": 21640 }, { "epoch": 0.3761754940986285, "grad_norm": 1.4198150882783378, "learning_rate": 7.173317240077922e-07, "loss": 0.288, "step": 21641 }, { "epoch": 0.3761928766361313, "grad_norm": 1.9421241038027437, "learning_rate": 7.173063725687617e-07, "loss": 0.3254, "step": 21642 }, { "epoch": 0.37621025917363415, "grad_norm": 1.4058341500947207, "learning_rate": 7.172810204409701e-07, "loss": 0.5431, "step": 21643 }, { "epoch": 0.376227641711137, "grad_norm": 1.850562791913525, "learning_rate": 7.172556676244972e-07, "loss": 0.2377, "step": 21644 }, { "epoch": 0.3762450242486398, "grad_norm": 1.9392175159518126, "learning_rate": 7.172303141194237e-07, "loss": 0.3111, "step": 21645 }, { "epoch": 0.37626240678614264, "grad_norm": 1.5969219113222137, "learning_rate": 7.1720495992583e-07, "loss": 0.3141, "step": 21646 }, { "epoch": 0.3762797893236455, "grad_norm": 0.8333919655266981, "learning_rate": 7.171796050437963e-07, "loss": 0.2685, "step": 21647 }, { "epoch": 0.3762971718611483, "grad_norm": 1.1223631035625392, "learning_rate": 7.171542494734028e-07, "loss": 0.3604, "step": 21648 }, { "epoch": 0.37631455439865114, "grad_norm": 2.2074089106750887, "learning_rate": 7.171288932147304e-07, "loss": 0.1607, "step": 21649 }, { "epoch": 0.37633193693615397, "grad_norm": 3.0350631309117486, "learning_rate": 7.17103536267859e-07, "loss": 0.6824, "step": 21650 }, { "epoch": 0.37634931947365674, "grad_norm": 1.5384735960118938, "learning_rate": 7.170781786328693e-07, "loss": 0.2804, "step": 21651 }, { "epoch": 0.37636670201115957, "grad_norm": 1.8941673959680436, "learning_rate": 7.170528203098413e-07, "loss": 0.4172, "step": 21652 }, { "epoch": 0.3763840845486624, "grad_norm": 2.169811226441304, "learning_rate": 7.170274612988556e-07, "loss": 0.2335, "step": 21653 }, { "epoch": 0.37640146708616523, "grad_norm": 2.1530140666837667, "learning_rate": 7.170021015999925e-07, "loss": 0.3552, "step": 21654 }, { "epoch": 0.37641884962366806, "grad_norm": 1.788721338590401, "learning_rate": 7.169767412133324e-07, "loss": 0.2711, "step": 21655 }, { "epoch": 0.3764362321611709, "grad_norm": 3.4437612469409946, "learning_rate": 7.169513801389559e-07, "loss": 0.2468, "step": 21656 }, { "epoch": 0.3764536146986737, "grad_norm": 1.6924958915809891, "learning_rate": 7.16926018376943e-07, "loss": 0.254, "step": 21657 }, { "epoch": 0.37647099723617655, "grad_norm": 1.6402915969775327, "learning_rate": 7.169006559273742e-07, "loss": 0.3099, "step": 21658 }, { "epoch": 0.3764883797736794, "grad_norm": 2.1259693691603134, "learning_rate": 7.168752927903301e-07, "loss": 0.385, "step": 21659 }, { "epoch": 0.3765057623111822, "grad_norm": 1.60603068042684, "learning_rate": 7.168499289658911e-07, "loss": 0.1883, "step": 21660 }, { "epoch": 0.376523144848685, "grad_norm": 1.7911262742205336, "learning_rate": 7.168245644541372e-07, "loss": 0.3312, "step": 21661 }, { "epoch": 0.3765405273861878, "grad_norm": 2.0888567879185143, "learning_rate": 7.167991992551491e-07, "loss": 0.2656, "step": 21662 }, { "epoch": 0.37655790992369065, "grad_norm": 1.2848977688588112, "learning_rate": 7.16773833369007e-07, "loss": 0.6189, "step": 21663 }, { "epoch": 0.3765752924611935, "grad_norm": 1.7125557986447988, "learning_rate": 7.167484667957916e-07, "loss": 0.3574, "step": 21664 }, { "epoch": 0.3765926749986963, "grad_norm": 1.352986431573245, "learning_rate": 7.167230995355831e-07, "loss": 0.2936, "step": 21665 }, { "epoch": 0.37661005753619914, "grad_norm": 0.9484578031388642, "learning_rate": 7.166977315884619e-07, "loss": 0.2032, "step": 21666 }, { "epoch": 0.37662744007370197, "grad_norm": 2.307304752982079, "learning_rate": 7.166723629545084e-07, "loss": 0.3799, "step": 21667 }, { "epoch": 0.3766448226112048, "grad_norm": 1.1046719088503698, "learning_rate": 7.166469936338031e-07, "loss": 0.4808, "step": 21668 }, { "epoch": 0.37666220514870763, "grad_norm": 1.5535700534494938, "learning_rate": 7.166216236264263e-07, "loss": 0.2461, "step": 21669 }, { "epoch": 0.37667958768621046, "grad_norm": 1.6407630860904967, "learning_rate": 7.165962529324586e-07, "loss": 0.2773, "step": 21670 }, { "epoch": 0.37669697022371323, "grad_norm": 1.6984422151319274, "learning_rate": 7.165708815519801e-07, "loss": 0.3742, "step": 21671 }, { "epoch": 0.37671435276121606, "grad_norm": 1.1334849119463335, "learning_rate": 7.165455094850714e-07, "loss": 0.2662, "step": 21672 }, { "epoch": 0.3767317352987189, "grad_norm": 2.6366083243350116, "learning_rate": 7.16520136731813e-07, "loss": 0.2758, "step": 21673 }, { "epoch": 0.3767491178362217, "grad_norm": 3.10317711977197, "learning_rate": 7.164947632922851e-07, "loss": 0.4943, "step": 21674 }, { "epoch": 0.37676650037372456, "grad_norm": 3.6703459334693402, "learning_rate": 7.164693891665684e-07, "loss": 0.4621, "step": 21675 }, { "epoch": 0.3767838829112274, "grad_norm": 3.1749287941062025, "learning_rate": 7.164440143547431e-07, "loss": 0.492, "step": 21676 }, { "epoch": 0.3768012654487302, "grad_norm": 1.6275707555814487, "learning_rate": 7.164186388568896e-07, "loss": 0.3376, "step": 21677 }, { "epoch": 0.37681864798623305, "grad_norm": 1.4775042396954654, "learning_rate": 7.163932626730887e-07, "loss": 0.4496, "step": 21678 }, { "epoch": 0.3768360305237359, "grad_norm": 1.0521568834006039, "learning_rate": 7.163678858034203e-07, "loss": 0.3642, "step": 21679 }, { "epoch": 0.3768534130612387, "grad_norm": 1.6918529895744758, "learning_rate": 7.163425082479653e-07, "loss": 0.3322, "step": 21680 }, { "epoch": 0.3768707955987415, "grad_norm": 1.4734419212427434, "learning_rate": 7.163171300068038e-07, "loss": 0.1802, "step": 21681 }, { "epoch": 0.3768881781362443, "grad_norm": 1.514249800343383, "learning_rate": 7.162917510800164e-07, "loss": 0.4969, "step": 21682 }, { "epoch": 0.37690556067374714, "grad_norm": 1.3170316922825183, "learning_rate": 7.162663714676834e-07, "loss": 0.3984, "step": 21683 }, { "epoch": 0.37692294321125, "grad_norm": 1.6488861740231633, "learning_rate": 7.162409911698854e-07, "loss": 0.2756, "step": 21684 }, { "epoch": 0.3769403257487528, "grad_norm": 1.3551763160073496, "learning_rate": 7.16215610186703e-07, "loss": 0.3604, "step": 21685 }, { "epoch": 0.37695770828625563, "grad_norm": 1.0847520714029637, "learning_rate": 7.161902285182162e-07, "loss": 0.4117, "step": 21686 }, { "epoch": 0.37697509082375846, "grad_norm": 2.3462260974547995, "learning_rate": 7.161648461645057e-07, "loss": 0.339, "step": 21687 }, { "epoch": 0.3769924733612613, "grad_norm": 2.241067534870187, "learning_rate": 7.161394631256521e-07, "loss": 0.3283, "step": 21688 }, { "epoch": 0.3770098558987641, "grad_norm": 1.4812074602876002, "learning_rate": 7.161140794017355e-07, "loss": 0.4461, "step": 21689 }, { "epoch": 0.37702723843626695, "grad_norm": 2.2854337338270647, "learning_rate": 7.160886949928366e-07, "loss": 0.2147, "step": 21690 }, { "epoch": 0.37704462097376973, "grad_norm": 1.0003410713686358, "learning_rate": 7.16063309899036e-07, "loss": 0.208, "step": 21691 }, { "epoch": 0.37706200351127256, "grad_norm": 0.8366819057254966, "learning_rate": 7.160379241204136e-07, "loss": 0.153, "step": 21692 }, { "epoch": 0.3770793860487754, "grad_norm": 1.1953044573988996, "learning_rate": 7.160125376570505e-07, "loss": 0.4263, "step": 21693 }, { "epoch": 0.3770967685862782, "grad_norm": 3.078497202859091, "learning_rate": 7.159871505090267e-07, "loss": 0.2081, "step": 21694 }, { "epoch": 0.37711415112378105, "grad_norm": 1.861633483099383, "learning_rate": 7.159617626764229e-07, "loss": 0.2372, "step": 21695 }, { "epoch": 0.3771315336612839, "grad_norm": 2.0842482601587484, "learning_rate": 7.159363741593196e-07, "loss": 0.3795, "step": 21696 }, { "epoch": 0.3771489161987867, "grad_norm": 1.4807086642600529, "learning_rate": 7.159109849577973e-07, "loss": 0.2982, "step": 21697 }, { "epoch": 0.37716629873628954, "grad_norm": 3.171777816479971, "learning_rate": 7.15885595071936e-07, "loss": 0.3543, "step": 21698 }, { "epoch": 0.37718368127379237, "grad_norm": 1.5692500348338896, "learning_rate": 7.158602045018168e-07, "loss": 0.4207, "step": 21699 }, { "epoch": 0.37720106381129515, "grad_norm": 1.5654808650222705, "learning_rate": 7.158348132475198e-07, "loss": 0.3348, "step": 21700 }, { "epoch": 0.377218446348798, "grad_norm": 2.5351749015627214, "learning_rate": 7.158094213091257e-07, "loss": 0.3011, "step": 21701 }, { "epoch": 0.3772358288863008, "grad_norm": 1.080898221520057, "learning_rate": 7.157840286867147e-07, "loss": 0.3256, "step": 21702 }, { "epoch": 0.37725321142380364, "grad_norm": 1.4221678725612614, "learning_rate": 7.157586353803677e-07, "loss": 0.2512, "step": 21703 }, { "epoch": 0.37727059396130647, "grad_norm": 0.8844535012095246, "learning_rate": 7.157332413901646e-07, "loss": 0.4601, "step": 21704 }, { "epoch": 0.3772879764988093, "grad_norm": 1.836452673542342, "learning_rate": 7.157078467161865e-07, "loss": 0.4738, "step": 21705 }, { "epoch": 0.3773053590363121, "grad_norm": 1.5347110666800126, "learning_rate": 7.156824513585135e-07, "loss": 0.2977, "step": 21706 }, { "epoch": 0.37732274157381496, "grad_norm": 1.8508201842908951, "learning_rate": 7.156570553172263e-07, "loss": 0.3092, "step": 21707 }, { "epoch": 0.3773401241113178, "grad_norm": 2.6387537756620767, "learning_rate": 7.156316585924051e-07, "loss": 0.5367, "step": 21708 }, { "epoch": 0.3773575066488206, "grad_norm": 2.3094231172692568, "learning_rate": 7.156062611841309e-07, "loss": 0.4948, "step": 21709 }, { "epoch": 0.3773748891863234, "grad_norm": 2.2578838635397545, "learning_rate": 7.155808630924836e-07, "loss": 0.3544, "step": 21710 }, { "epoch": 0.3773922717238262, "grad_norm": 1.742818407448595, "learning_rate": 7.155554643175442e-07, "loss": 0.3451, "step": 21711 }, { "epoch": 0.37740965426132905, "grad_norm": 2.305657312920774, "learning_rate": 7.155300648593929e-07, "loss": 0.2391, "step": 21712 }, { "epoch": 0.3774270367988319, "grad_norm": 1.554404874673244, "learning_rate": 7.155046647181103e-07, "loss": 0.3537, "step": 21713 }, { "epoch": 0.3774444193363347, "grad_norm": 1.324660375873423, "learning_rate": 7.154792638937769e-07, "loss": 0.2877, "step": 21714 }, { "epoch": 0.37746180187383754, "grad_norm": 2.6785007907595144, "learning_rate": 7.154538623864732e-07, "loss": 0.3085, "step": 21715 }, { "epoch": 0.3774791844113404, "grad_norm": 1.235539313008289, "learning_rate": 7.154284601962798e-07, "loss": 0.2739, "step": 21716 }, { "epoch": 0.3774965669488432, "grad_norm": 1.4673486480099123, "learning_rate": 7.154030573232773e-07, "loss": 0.295, "step": 21717 }, { "epoch": 0.37751394948634603, "grad_norm": 1.747602457543597, "learning_rate": 7.153776537675457e-07, "loss": 0.3512, "step": 21718 }, { "epoch": 0.37753133202384886, "grad_norm": 1.4090593843591566, "learning_rate": 7.153522495291659e-07, "loss": 0.3638, "step": 21719 }, { "epoch": 0.37754871456135164, "grad_norm": 1.4768353275921426, "learning_rate": 7.153268446082187e-07, "loss": 0.3605, "step": 21720 }, { "epoch": 0.37756609709885447, "grad_norm": 2.2954605216934425, "learning_rate": 7.153014390047841e-07, "loss": 0.4138, "step": 21721 }, { "epoch": 0.3775834796363573, "grad_norm": 1.9836373167895103, "learning_rate": 7.152760327189429e-07, "loss": 0.2649, "step": 21722 }, { "epoch": 0.37760086217386013, "grad_norm": 3.1999050723369686, "learning_rate": 7.152506257507756e-07, "loss": 1.0055, "step": 21723 }, { "epoch": 0.37761824471136296, "grad_norm": 1.1514582722804154, "learning_rate": 7.152252181003625e-07, "loss": 0.2386, "step": 21724 }, { "epoch": 0.3776356272488658, "grad_norm": 2.5504974965820457, "learning_rate": 7.151998097677845e-07, "loss": 0.2633, "step": 21725 }, { "epoch": 0.3776530097863686, "grad_norm": 1.9244804353684501, "learning_rate": 7.151744007531219e-07, "loss": 0.1883, "step": 21726 }, { "epoch": 0.37767039232387145, "grad_norm": 3.378614635010369, "learning_rate": 7.151489910564554e-07, "loss": 0.2438, "step": 21727 }, { "epoch": 0.3776877748613743, "grad_norm": 1.814822598208948, "learning_rate": 7.151235806778654e-07, "loss": 0.3184, "step": 21728 }, { "epoch": 0.3777051573988771, "grad_norm": 1.153744644831902, "learning_rate": 7.150981696174324e-07, "loss": 0.31, "step": 21729 }, { "epoch": 0.3777225399363799, "grad_norm": 1.331048201765287, "learning_rate": 7.150727578752369e-07, "loss": 0.2291, "step": 21730 }, { "epoch": 0.3777399224738827, "grad_norm": 1.6360326820622368, "learning_rate": 7.150473454513598e-07, "loss": 0.2191, "step": 21731 }, { "epoch": 0.37775730501138555, "grad_norm": 1.4194806570442429, "learning_rate": 7.150219323458812e-07, "loss": 0.1713, "step": 21732 }, { "epoch": 0.3777746875488884, "grad_norm": 1.3277246168261219, "learning_rate": 7.14996518558882e-07, "loss": 0.342, "step": 21733 }, { "epoch": 0.3777920700863912, "grad_norm": 1.5622024377095458, "learning_rate": 7.149711040904425e-07, "loss": 0.3143, "step": 21734 }, { "epoch": 0.37780945262389404, "grad_norm": 2.1077544608226515, "learning_rate": 7.149456889406434e-07, "loss": 0.2902, "step": 21735 }, { "epoch": 0.37782683516139687, "grad_norm": 1.8623959678267619, "learning_rate": 7.149202731095652e-07, "loss": 0.3369, "step": 21736 }, { "epoch": 0.3778442176988997, "grad_norm": 1.6442818811620312, "learning_rate": 7.148948565972884e-07, "loss": 0.2301, "step": 21737 }, { "epoch": 0.37786160023640253, "grad_norm": 3.1298001332905523, "learning_rate": 7.148694394038937e-07, "loss": 0.3688, "step": 21738 }, { "epoch": 0.37787898277390536, "grad_norm": 2.2981542768004606, "learning_rate": 7.148440215294615e-07, "loss": 0.2321, "step": 21739 }, { "epoch": 0.37789636531140813, "grad_norm": 1.8779466814109014, "learning_rate": 7.148186029740725e-07, "loss": 0.2893, "step": 21740 }, { "epoch": 0.37791374784891096, "grad_norm": 1.3304890617857335, "learning_rate": 7.147931837378072e-07, "loss": 0.2276, "step": 21741 }, { "epoch": 0.3779311303864138, "grad_norm": 1.7908022921657647, "learning_rate": 7.147677638207461e-07, "loss": 0.3137, "step": 21742 }, { "epoch": 0.3779485129239166, "grad_norm": 1.1328605976947697, "learning_rate": 7.147423432229699e-07, "loss": 0.3145, "step": 21743 }, { "epoch": 0.37796589546141945, "grad_norm": 1.690811733925559, "learning_rate": 7.147169219445591e-07, "loss": 0.4522, "step": 21744 }, { "epoch": 0.3779832779989223, "grad_norm": 1.4372376404229963, "learning_rate": 7.146914999855944e-07, "loss": 0.1748, "step": 21745 }, { "epoch": 0.3780006605364251, "grad_norm": 1.294992583887631, "learning_rate": 7.146660773461561e-07, "loss": 0.2945, "step": 21746 }, { "epoch": 0.37801804307392795, "grad_norm": 1.4005332014416392, "learning_rate": 7.146406540263251e-07, "loss": 0.2003, "step": 21747 }, { "epoch": 0.3780354256114308, "grad_norm": 1.3021412139887456, "learning_rate": 7.146152300261818e-07, "loss": 0.1936, "step": 21748 }, { "epoch": 0.3780528081489336, "grad_norm": 3.052032315842805, "learning_rate": 7.145898053458067e-07, "loss": 0.2484, "step": 21749 }, { "epoch": 0.3780701906864364, "grad_norm": 4.185404308519507, "learning_rate": 7.145643799852805e-07, "loss": 0.5786, "step": 21750 }, { "epoch": 0.3780875732239392, "grad_norm": 1.6912058146749762, "learning_rate": 7.145389539446838e-07, "loss": 0.1976, "step": 21751 }, { "epoch": 0.37810495576144204, "grad_norm": 1.9693350475367795, "learning_rate": 7.145135272240972e-07, "loss": 0.2285, "step": 21752 }, { "epoch": 0.37812233829894487, "grad_norm": 1.8778778296480427, "learning_rate": 7.144880998236014e-07, "loss": 0.1948, "step": 21753 }, { "epoch": 0.3781397208364477, "grad_norm": 1.1757770249966626, "learning_rate": 7.144626717432767e-07, "loss": 0.2424, "step": 21754 }, { "epoch": 0.37815710337395053, "grad_norm": 1.6068578550411539, "learning_rate": 7.144372429832037e-07, "loss": 0.2784, "step": 21755 }, { "epoch": 0.37817448591145336, "grad_norm": 1.5205788880971867, "learning_rate": 7.144118135434634e-07, "loss": 0.3533, "step": 21756 }, { "epoch": 0.3781918684489562, "grad_norm": 1.7684265475500411, "learning_rate": 7.14386383424136e-07, "loss": 0.2986, "step": 21757 }, { "epoch": 0.378209250986459, "grad_norm": 1.5420990660317109, "learning_rate": 7.143609526253023e-07, "loss": 0.3012, "step": 21758 }, { "epoch": 0.37822663352396185, "grad_norm": 1.4868009619327218, "learning_rate": 7.143355211470429e-07, "loss": 0.2454, "step": 21759 }, { "epoch": 0.3782440160614646, "grad_norm": 1.3011753388801648, "learning_rate": 7.143100889894382e-07, "loss": 0.2742, "step": 21760 }, { "epoch": 0.37826139859896746, "grad_norm": 2.8491313510342593, "learning_rate": 7.142846561525692e-07, "loss": 0.512, "step": 21761 }, { "epoch": 0.3782787811364703, "grad_norm": 2.0347897711116207, "learning_rate": 7.142592226365159e-07, "loss": 0.2898, "step": 21762 }, { "epoch": 0.3782961636739731, "grad_norm": 1.2817645631565178, "learning_rate": 7.142337884413597e-07, "loss": 0.3394, "step": 21763 }, { "epoch": 0.37831354621147595, "grad_norm": 2.0549579228170254, "learning_rate": 7.142083535671806e-07, "loss": 0.4711, "step": 21764 }, { "epoch": 0.3783309287489788, "grad_norm": 1.1005737924168137, "learning_rate": 7.141829180140594e-07, "loss": 0.2158, "step": 21765 }, { "epoch": 0.3783483112864816, "grad_norm": 1.482633725304819, "learning_rate": 7.141574817820767e-07, "loss": 0.3821, "step": 21766 }, { "epoch": 0.37836569382398444, "grad_norm": 1.6049390190268902, "learning_rate": 7.141320448713133e-07, "loss": 0.2102, "step": 21767 }, { "epoch": 0.37838307636148727, "grad_norm": 1.8216047564704378, "learning_rate": 7.141066072818496e-07, "loss": 0.3374, "step": 21768 }, { "epoch": 0.3784004588989901, "grad_norm": 1.400624679019641, "learning_rate": 7.140811690137665e-07, "loss": 0.1565, "step": 21769 }, { "epoch": 0.3784178414364929, "grad_norm": 1.3414824686459312, "learning_rate": 7.140557300671442e-07, "loss": 0.3781, "step": 21770 }, { "epoch": 0.3784352239739957, "grad_norm": 1.5717800322370463, "learning_rate": 7.140302904420636e-07, "loss": 0.3629, "step": 21771 }, { "epoch": 0.37845260651149853, "grad_norm": 2.4531083106790885, "learning_rate": 7.140048501386054e-07, "loss": 0.4055, "step": 21772 }, { "epoch": 0.37846998904900137, "grad_norm": 2.222498237353762, "learning_rate": 7.1397940915685e-07, "loss": 0.4158, "step": 21773 }, { "epoch": 0.3784873715865042, "grad_norm": 1.7139886412923604, "learning_rate": 7.13953967496878e-07, "loss": 0.3762, "step": 21774 }, { "epoch": 0.378504754124007, "grad_norm": 1.460491447687719, "learning_rate": 7.139285251587706e-07, "loss": 0.2352, "step": 21775 }, { "epoch": 0.37852213666150986, "grad_norm": 1.7962659672047134, "learning_rate": 7.139030821426079e-07, "loss": 0.3082, "step": 21776 }, { "epoch": 0.3785395191990127, "grad_norm": 2.021580602109622, "learning_rate": 7.138776384484705e-07, "loss": 0.2438, "step": 21777 }, { "epoch": 0.3785569017365155, "grad_norm": 1.8785842477975512, "learning_rate": 7.138521940764395e-07, "loss": 0.7153, "step": 21778 }, { "epoch": 0.37857428427401835, "grad_norm": 2.797234122474101, "learning_rate": 7.138267490265951e-07, "loss": 0.3914, "step": 21779 }, { "epoch": 0.3785916668115211, "grad_norm": 2.4894412355730307, "learning_rate": 7.138013032990182e-07, "loss": 0.427, "step": 21780 }, { "epoch": 0.37860904934902395, "grad_norm": 1.704975092868796, "learning_rate": 7.137758568937893e-07, "loss": 0.5886, "step": 21781 }, { "epoch": 0.3786264318865268, "grad_norm": 1.5029288475616192, "learning_rate": 7.137504098109893e-07, "loss": 0.335, "step": 21782 }, { "epoch": 0.3786438144240296, "grad_norm": 2.591474998260965, "learning_rate": 7.137249620506987e-07, "loss": 0.3417, "step": 21783 }, { "epoch": 0.37866119696153244, "grad_norm": 1.7638065730108519, "learning_rate": 7.136995136129979e-07, "loss": 0.3931, "step": 21784 }, { "epoch": 0.3786785794990353, "grad_norm": 4.470179691395946, "learning_rate": 7.136740644979681e-07, "loss": 0.4676, "step": 21785 }, { "epoch": 0.3786959620365381, "grad_norm": 1.0675833938839543, "learning_rate": 7.136486147056895e-07, "loss": 0.2923, "step": 21786 }, { "epoch": 0.37871334457404093, "grad_norm": 2.033486934433258, "learning_rate": 7.136231642362429e-07, "loss": 0.2737, "step": 21787 }, { "epoch": 0.37873072711154376, "grad_norm": 2.099795310525852, "learning_rate": 7.135977130897092e-07, "loss": 0.3325, "step": 21788 }, { "epoch": 0.3787481096490466, "grad_norm": 5.908215031657407, "learning_rate": 7.135722612661687e-07, "loss": 0.3943, "step": 21789 }, { "epoch": 0.37876549218654937, "grad_norm": 1.7259409886918253, "learning_rate": 7.135468087657022e-07, "loss": 0.2418, "step": 21790 }, { "epoch": 0.3787828747240522, "grad_norm": 1.7937584778977835, "learning_rate": 7.135213555883906e-07, "loss": 0.1693, "step": 21791 }, { "epoch": 0.37880025726155503, "grad_norm": 1.4104633380434106, "learning_rate": 7.134959017343141e-07, "loss": 0.379, "step": 21792 }, { "epoch": 0.37881763979905786, "grad_norm": 1.4581383326103592, "learning_rate": 7.134704472035539e-07, "loss": 0.1734, "step": 21793 }, { "epoch": 0.3788350223365607, "grad_norm": 2.0260670163864547, "learning_rate": 7.134449919961903e-07, "loss": 0.5997, "step": 21794 }, { "epoch": 0.3788524048740635, "grad_norm": 1.2319176968533234, "learning_rate": 7.134195361123043e-07, "loss": 0.3574, "step": 21795 }, { "epoch": 0.37886978741156635, "grad_norm": 1.0713072381000537, "learning_rate": 7.133940795519763e-07, "loss": 0.3676, "step": 21796 }, { "epoch": 0.3788871699490692, "grad_norm": 1.4298113822832301, "learning_rate": 7.133686223152871e-07, "loss": 0.185, "step": 21797 }, { "epoch": 0.378904552486572, "grad_norm": 1.443814930835322, "learning_rate": 7.133431644023174e-07, "loss": 0.2117, "step": 21798 }, { "epoch": 0.37892193502407484, "grad_norm": 1.2141152871801215, "learning_rate": 7.133177058131478e-07, "loss": 0.2387, "step": 21799 }, { "epoch": 0.3789393175615776, "grad_norm": 2.5095568743708, "learning_rate": 7.132922465478592e-07, "loss": 0.4703, "step": 21800 }, { "epoch": 0.37895670009908045, "grad_norm": 3.4333270618010334, "learning_rate": 7.13266786606532e-07, "loss": 0.1553, "step": 21801 }, { "epoch": 0.3789740826365833, "grad_norm": 2.793034407513569, "learning_rate": 7.132413259892471e-07, "loss": 0.2823, "step": 21802 }, { "epoch": 0.3789914651740861, "grad_norm": 2.244260788396915, "learning_rate": 7.132158646960851e-07, "loss": 0.2819, "step": 21803 }, { "epoch": 0.37900884771158894, "grad_norm": 1.964357725028576, "learning_rate": 7.13190402727127e-07, "loss": 0.1941, "step": 21804 }, { "epoch": 0.37902623024909177, "grad_norm": 4.141382079032219, "learning_rate": 7.13164940082453e-07, "loss": 0.344, "step": 21805 }, { "epoch": 0.3790436127865946, "grad_norm": 2.054547795012995, "learning_rate": 7.131394767621441e-07, "loss": 0.2387, "step": 21806 }, { "epoch": 0.3790609953240974, "grad_norm": 2.1278564950722876, "learning_rate": 7.131140127662812e-07, "loss": 0.2634, "step": 21807 }, { "epoch": 0.37907837786160026, "grad_norm": 1.47860983388111, "learning_rate": 7.130885480949444e-07, "loss": 0.3368, "step": 21808 }, { "epoch": 0.3790957603991031, "grad_norm": 1.3673923369093444, "learning_rate": 7.130630827482149e-07, "loss": 0.5514, "step": 21809 }, { "epoch": 0.37911314293660586, "grad_norm": 2.669497925161416, "learning_rate": 7.130376167261735e-07, "loss": 0.2593, "step": 21810 }, { "epoch": 0.3791305254741087, "grad_norm": 1.605026988159259, "learning_rate": 7.130121500289006e-07, "loss": 0.256, "step": 21811 }, { "epoch": 0.3791479080116115, "grad_norm": 1.8017227158917635, "learning_rate": 7.12986682656477e-07, "loss": 0.1934, "step": 21812 }, { "epoch": 0.37916529054911435, "grad_norm": 1.528971105614672, "learning_rate": 7.129612146089833e-07, "loss": 0.5952, "step": 21813 }, { "epoch": 0.3791826730866172, "grad_norm": 1.4705863354516886, "learning_rate": 7.129357458865007e-07, "loss": 0.2105, "step": 21814 }, { "epoch": 0.37920005562412, "grad_norm": 2.0971170839881212, "learning_rate": 7.129102764891095e-07, "loss": 0.3479, "step": 21815 }, { "epoch": 0.37921743816162284, "grad_norm": 1.409562429227992, "learning_rate": 7.128848064168904e-07, "loss": 0.1998, "step": 21816 }, { "epoch": 0.3792348206991257, "grad_norm": 2.0073618937659568, "learning_rate": 7.128593356699244e-07, "loss": 0.3122, "step": 21817 }, { "epoch": 0.3792522032366285, "grad_norm": 2.3588604605949217, "learning_rate": 7.128338642482919e-07, "loss": 0.2648, "step": 21818 }, { "epoch": 0.37926958577413133, "grad_norm": 1.6828891038360683, "learning_rate": 7.12808392152074e-07, "loss": 0.2784, "step": 21819 }, { "epoch": 0.3792869683116341, "grad_norm": 1.0870475007367435, "learning_rate": 7.127829193813512e-07, "loss": 0.3162, "step": 21820 }, { "epoch": 0.37930435084913694, "grad_norm": 2.404653537949974, "learning_rate": 7.127574459362043e-07, "loss": 0.4189, "step": 21821 }, { "epoch": 0.37932173338663977, "grad_norm": 1.7978677237055118, "learning_rate": 7.127319718167139e-07, "loss": 0.3646, "step": 21822 }, { "epoch": 0.3793391159241426, "grad_norm": 2.091365976449638, "learning_rate": 7.127064970229612e-07, "loss": 0.3015, "step": 21823 }, { "epoch": 0.37935649846164543, "grad_norm": 1.1825793875574737, "learning_rate": 7.126810215550263e-07, "loss": 0.3632, "step": 21824 }, { "epoch": 0.37937388099914826, "grad_norm": 1.845671256635531, "learning_rate": 7.126555454129904e-07, "loss": 0.3135, "step": 21825 }, { "epoch": 0.3793912635366511, "grad_norm": 3.048804628716869, "learning_rate": 7.126300685969341e-07, "loss": 0.2711, "step": 21826 }, { "epoch": 0.3794086460741539, "grad_norm": 1.5489552718758433, "learning_rate": 7.12604591106938e-07, "loss": 0.3002, "step": 21827 }, { "epoch": 0.37942602861165675, "grad_norm": 3.462829219574419, "learning_rate": 7.125791129430832e-07, "loss": 0.212, "step": 21828 }, { "epoch": 0.3794434111491596, "grad_norm": 3.2433917545192683, "learning_rate": 7.125536341054501e-07, "loss": 0.5344, "step": 21829 }, { "epoch": 0.37946079368666236, "grad_norm": 2.444886640569239, "learning_rate": 7.125281545941197e-07, "loss": 0.7217, "step": 21830 }, { "epoch": 0.3794781762241652, "grad_norm": 1.416761208348779, "learning_rate": 7.125026744091727e-07, "loss": 0.3386, "step": 21831 }, { "epoch": 0.379495558761668, "grad_norm": 2.86051101186911, "learning_rate": 7.124771935506899e-07, "loss": 0.2848, "step": 21832 }, { "epoch": 0.37951294129917085, "grad_norm": 1.502538329762857, "learning_rate": 7.124517120187518e-07, "loss": 0.3005, "step": 21833 }, { "epoch": 0.3795303238366737, "grad_norm": 1.6748437984252271, "learning_rate": 7.124262298134396e-07, "loss": 0.2016, "step": 21834 }, { "epoch": 0.3795477063741765, "grad_norm": 1.4890815882914548, "learning_rate": 7.124007469348337e-07, "loss": 0.2341, "step": 21835 }, { "epoch": 0.37956508891167934, "grad_norm": 1.2109750346092538, "learning_rate": 7.123752633830152e-07, "loss": 0.247, "step": 21836 }, { "epoch": 0.37958247144918217, "grad_norm": 3.4340974814560905, "learning_rate": 7.123497791580643e-07, "loss": 0.3708, "step": 21837 }, { "epoch": 0.379599853986685, "grad_norm": 1.4897556637213067, "learning_rate": 7.123242942600624e-07, "loss": 0.1913, "step": 21838 }, { "epoch": 0.3796172365241878, "grad_norm": 1.8364912893117702, "learning_rate": 7.1229880868909e-07, "loss": 0.4099, "step": 21839 }, { "epoch": 0.3796346190616906, "grad_norm": 2.382309921647534, "learning_rate": 7.122733224452279e-07, "loss": 0.4094, "step": 21840 }, { "epoch": 0.37965200159919343, "grad_norm": 1.8694760309806333, "learning_rate": 7.122478355285567e-07, "loss": 0.2432, "step": 21841 }, { "epoch": 0.37966938413669626, "grad_norm": 1.1784773052271618, "learning_rate": 7.122223479391576e-07, "loss": 0.2792, "step": 21842 }, { "epoch": 0.3796867666741991, "grad_norm": 1.6185558111813387, "learning_rate": 7.121968596771109e-07, "loss": 0.2489, "step": 21843 }, { "epoch": 0.3797041492117019, "grad_norm": 1.941208207816492, "learning_rate": 7.121713707424979e-07, "loss": 0.1962, "step": 21844 }, { "epoch": 0.37972153174920475, "grad_norm": 1.8779447142819832, "learning_rate": 7.12145881135399e-07, "loss": 0.4087, "step": 21845 }, { "epoch": 0.3797389142867076, "grad_norm": 2.5548150146483906, "learning_rate": 7.12120390855895e-07, "loss": 0.3089, "step": 21846 }, { "epoch": 0.3797562968242104, "grad_norm": 2.0112212516073082, "learning_rate": 7.120948999040669e-07, "loss": 0.1792, "step": 21847 }, { "epoch": 0.37977367936171325, "grad_norm": 1.4211052964209712, "learning_rate": 7.120694082799955e-07, "loss": 0.2793, "step": 21848 }, { "epoch": 0.379791061899216, "grad_norm": 1.9023968129200879, "learning_rate": 7.120439159837613e-07, "loss": 0.3459, "step": 21849 }, { "epoch": 0.37980844443671885, "grad_norm": 1.2029664509374283, "learning_rate": 7.120184230154455e-07, "loss": 0.2653, "step": 21850 }, { "epoch": 0.3798258269742217, "grad_norm": 1.139333977045664, "learning_rate": 7.119929293751285e-07, "loss": 0.1886, "step": 21851 }, { "epoch": 0.3798432095117245, "grad_norm": 1.2916010738244836, "learning_rate": 7.119674350628915e-07, "loss": 0.2464, "step": 21852 }, { "epoch": 0.37986059204922734, "grad_norm": 1.7661830307679725, "learning_rate": 7.119419400788148e-07, "loss": 0.2271, "step": 21853 }, { "epoch": 0.37987797458673017, "grad_norm": 4.020587871379777, "learning_rate": 7.119164444229797e-07, "loss": 0.3863, "step": 21854 }, { "epoch": 0.379895357124233, "grad_norm": 1.3769369978105335, "learning_rate": 7.118909480954668e-07, "loss": 0.2283, "step": 21855 }, { "epoch": 0.37991273966173583, "grad_norm": 3.184561083078475, "learning_rate": 7.11865451096357e-07, "loss": 0.4456, "step": 21856 }, { "epoch": 0.37993012219923866, "grad_norm": 1.4817771086755462, "learning_rate": 7.118399534257309e-07, "loss": 0.325, "step": 21857 }, { "epoch": 0.3799475047367415, "grad_norm": 1.671831317845915, "learning_rate": 7.118144550836695e-07, "loss": 0.2635, "step": 21858 }, { "epoch": 0.37996488727424427, "grad_norm": 1.722403482234795, "learning_rate": 7.117889560702537e-07, "loss": 0.2683, "step": 21859 }, { "epoch": 0.3799822698117471, "grad_norm": 1.7151401577112584, "learning_rate": 7.117634563855641e-07, "loss": 0.2941, "step": 21860 }, { "epoch": 0.37999965234924993, "grad_norm": 1.860137327899349, "learning_rate": 7.117379560296816e-07, "loss": 0.3355, "step": 21861 }, { "epoch": 0.38001703488675276, "grad_norm": 1.1514545395719546, "learning_rate": 7.117124550026871e-07, "loss": 0.3487, "step": 21862 }, { "epoch": 0.3800344174242556, "grad_norm": 2.6127520560604665, "learning_rate": 7.116869533046612e-07, "loss": 0.4539, "step": 21863 }, { "epoch": 0.3800517999617584, "grad_norm": 2.4034118281105443, "learning_rate": 7.11661450935685e-07, "loss": 0.2178, "step": 21864 }, { "epoch": 0.38006918249926125, "grad_norm": 1.5648714111491269, "learning_rate": 7.116359478958394e-07, "loss": 0.3523, "step": 21865 }, { "epoch": 0.3800865650367641, "grad_norm": 1.8031419108226598, "learning_rate": 7.116104441852049e-07, "loss": 0.2937, "step": 21866 }, { "epoch": 0.3801039475742669, "grad_norm": 2.472523923319504, "learning_rate": 7.115849398038626e-07, "loss": 0.4063, "step": 21867 }, { "epoch": 0.38012133011176974, "grad_norm": 3.4765751439039874, "learning_rate": 7.115594347518931e-07, "loss": 0.3997, "step": 21868 }, { "epoch": 0.3801387126492725, "grad_norm": 1.0953541687903925, "learning_rate": 7.115339290293774e-07, "loss": 0.261, "step": 21869 }, { "epoch": 0.38015609518677534, "grad_norm": 2.5817077610441337, "learning_rate": 7.115084226363963e-07, "loss": 0.4166, "step": 21870 }, { "epoch": 0.3801734777242782, "grad_norm": 1.29340836238836, "learning_rate": 7.114829155730309e-07, "loss": 0.2917, "step": 21871 }, { "epoch": 0.380190860261781, "grad_norm": 1.6101292331149621, "learning_rate": 7.114574078393613e-07, "loss": 0.297, "step": 21872 }, { "epoch": 0.38020824279928384, "grad_norm": 3.1497234538228893, "learning_rate": 7.114318994354693e-07, "loss": 0.2837, "step": 21873 }, { "epoch": 0.38022562533678667, "grad_norm": 2.5128905134580366, "learning_rate": 7.114063903614353e-07, "loss": 0.4605, "step": 21874 }, { "epoch": 0.3802430078742895, "grad_norm": 1.3649808273246444, "learning_rate": 7.113808806173399e-07, "loss": 0.1533, "step": 21875 }, { "epoch": 0.3802603904117923, "grad_norm": 1.8263313992185626, "learning_rate": 7.113553702032643e-07, "loss": 0.4815, "step": 21876 }, { "epoch": 0.38027777294929516, "grad_norm": 1.8630452914503497, "learning_rate": 7.113298591192894e-07, "loss": 0.3609, "step": 21877 }, { "epoch": 0.380295155486798, "grad_norm": 1.814803814436019, "learning_rate": 7.113043473654958e-07, "loss": 0.3227, "step": 21878 }, { "epoch": 0.38031253802430076, "grad_norm": 2.0359438002274306, "learning_rate": 7.112788349419645e-07, "loss": 0.1978, "step": 21879 }, { "epoch": 0.3803299205618036, "grad_norm": 1.8503742576864413, "learning_rate": 7.112533218487764e-07, "loss": 0.331, "step": 21880 }, { "epoch": 0.3803473030993064, "grad_norm": 1.6910930635354346, "learning_rate": 7.112278080860122e-07, "loss": 0.3209, "step": 21881 }, { "epoch": 0.38036468563680925, "grad_norm": 2.2523134574488823, "learning_rate": 7.11202293653753e-07, "loss": 0.4096, "step": 21882 }, { "epoch": 0.3803820681743121, "grad_norm": 1.574114253032355, "learning_rate": 7.111767785520795e-07, "loss": 0.1888, "step": 21883 }, { "epoch": 0.3803994507118149, "grad_norm": 1.4711045870640327, "learning_rate": 7.111512627810725e-07, "loss": 0.1446, "step": 21884 }, { "epoch": 0.38041683324931774, "grad_norm": 2.2222114798974215, "learning_rate": 7.111257463408131e-07, "loss": 0.3072, "step": 21885 }, { "epoch": 0.3804342157868206, "grad_norm": 3.2984467699355347, "learning_rate": 7.111002292313821e-07, "loss": 0.4879, "step": 21886 }, { "epoch": 0.3804515983243234, "grad_norm": 1.6790259420809812, "learning_rate": 7.110747114528604e-07, "loss": 0.3747, "step": 21887 }, { "epoch": 0.38046898086182623, "grad_norm": 1.672888569182328, "learning_rate": 7.110491930053286e-07, "loss": 0.3259, "step": 21888 }, { "epoch": 0.380486363399329, "grad_norm": 1.9513567044148024, "learning_rate": 7.11023673888868e-07, "loss": 0.4021, "step": 21889 }, { "epoch": 0.38050374593683184, "grad_norm": 2.45603684877098, "learning_rate": 7.109981541035592e-07, "loss": 0.3804, "step": 21890 }, { "epoch": 0.38052112847433467, "grad_norm": 1.534982651996488, "learning_rate": 7.109726336494833e-07, "loss": 0.4353, "step": 21891 }, { "epoch": 0.3805385110118375, "grad_norm": 1.6241818977817704, "learning_rate": 7.10947112526721e-07, "loss": 0.3676, "step": 21892 }, { "epoch": 0.38055589354934033, "grad_norm": 1.02349215265517, "learning_rate": 7.109215907353531e-07, "loss": 0.2955, "step": 21893 }, { "epoch": 0.38057327608684316, "grad_norm": 2.4088804443956735, "learning_rate": 7.108960682754608e-07, "loss": 0.3083, "step": 21894 }, { "epoch": 0.380590658624346, "grad_norm": 1.943074667679847, "learning_rate": 7.108705451471249e-07, "loss": 0.2783, "step": 21895 }, { "epoch": 0.3806080411618488, "grad_norm": 2.3533417030538097, "learning_rate": 7.108450213504261e-07, "loss": 0.387, "step": 21896 }, { "epoch": 0.38062542369935165, "grad_norm": 2.623615514066962, "learning_rate": 7.108194968854455e-07, "loss": 0.4207, "step": 21897 }, { "epoch": 0.3806428062368545, "grad_norm": 1.6872789263016892, "learning_rate": 7.107939717522639e-07, "loss": 0.4178, "step": 21898 }, { "epoch": 0.38066018877435726, "grad_norm": 1.895390723748145, "learning_rate": 7.107684459509623e-07, "loss": 0.3401, "step": 21899 }, { "epoch": 0.3806775713118601, "grad_norm": 2.3854432557844794, "learning_rate": 7.107429194816214e-07, "loss": 0.2349, "step": 21900 }, { "epoch": 0.3806949538493629, "grad_norm": 1.522013797630257, "learning_rate": 7.107173923443224e-07, "loss": 0.1976, "step": 21901 }, { "epoch": 0.38071233638686575, "grad_norm": 1.3304542758997842, "learning_rate": 7.106918645391462e-07, "loss": 0.2496, "step": 21902 }, { "epoch": 0.3807297189243686, "grad_norm": 1.9826584710261652, "learning_rate": 7.106663360661733e-07, "loss": 0.4321, "step": 21903 }, { "epoch": 0.3807471014618714, "grad_norm": 1.1341858456337204, "learning_rate": 7.10640806925485e-07, "loss": 0.2385, "step": 21904 }, { "epoch": 0.38076448399937424, "grad_norm": 1.201629347965681, "learning_rate": 7.106152771171621e-07, "loss": 0.2276, "step": 21905 }, { "epoch": 0.38078186653687707, "grad_norm": 1.2365949593036978, "learning_rate": 7.105897466412856e-07, "loss": 0.3303, "step": 21906 }, { "epoch": 0.3807992490743799, "grad_norm": 1.7745373641755937, "learning_rate": 7.105642154979361e-07, "loss": 0.4081, "step": 21907 }, { "epoch": 0.3808166316118827, "grad_norm": 1.8658512481638252, "learning_rate": 7.10538683687195e-07, "loss": 0.3118, "step": 21908 }, { "epoch": 0.3808340141493855, "grad_norm": 1.5254323484599703, "learning_rate": 7.105131512091429e-07, "loss": 0.2749, "step": 21909 }, { "epoch": 0.38085139668688833, "grad_norm": 1.3171553916430239, "learning_rate": 7.104876180638608e-07, "loss": 0.154, "step": 21910 }, { "epoch": 0.38086877922439116, "grad_norm": 2.545348215042533, "learning_rate": 7.104620842514298e-07, "loss": 0.2211, "step": 21911 }, { "epoch": 0.380886161761894, "grad_norm": 1.263623389127292, "learning_rate": 7.104365497719305e-07, "loss": 0.2584, "step": 21912 }, { "epoch": 0.3809035442993968, "grad_norm": 1.3967600506512963, "learning_rate": 7.104110146254439e-07, "loss": 0.482, "step": 21913 }, { "epoch": 0.38092092683689965, "grad_norm": 2.654153724852816, "learning_rate": 7.103854788120512e-07, "loss": 0.3412, "step": 21914 }, { "epoch": 0.3809383093744025, "grad_norm": 2.061306397594474, "learning_rate": 7.103599423318332e-07, "loss": 0.3313, "step": 21915 }, { "epoch": 0.3809556919119053, "grad_norm": 1.3533354786210707, "learning_rate": 7.103344051848706e-07, "loss": 0.4297, "step": 21916 }, { "epoch": 0.38097307444940814, "grad_norm": 1.7727165401149656, "learning_rate": 7.103088673712448e-07, "loss": 0.3504, "step": 21917 }, { "epoch": 0.380990456986911, "grad_norm": 1.9402580977333508, "learning_rate": 7.102833288910364e-07, "loss": 0.3583, "step": 21918 }, { "epoch": 0.38100783952441375, "grad_norm": 1.7244562166091089, "learning_rate": 7.102577897443265e-07, "loss": 0.2175, "step": 21919 }, { "epoch": 0.3810252220619166, "grad_norm": 1.5018323939053722, "learning_rate": 7.102322499311959e-07, "loss": 0.288, "step": 21920 }, { "epoch": 0.3810426045994194, "grad_norm": 1.727303810735244, "learning_rate": 7.102067094517257e-07, "loss": 0.4081, "step": 21921 }, { "epoch": 0.38105998713692224, "grad_norm": 2.15471229801019, "learning_rate": 7.101811683059968e-07, "loss": 0.3583, "step": 21922 }, { "epoch": 0.38107736967442507, "grad_norm": 1.3528394125102978, "learning_rate": 7.1015562649409e-07, "loss": 0.3478, "step": 21923 }, { "epoch": 0.3810947522119279, "grad_norm": 1.6016362472176484, "learning_rate": 7.101300840160865e-07, "loss": 0.4136, "step": 21924 }, { "epoch": 0.38111213474943073, "grad_norm": 1.7474082273092493, "learning_rate": 7.101045408720673e-07, "loss": 0.2385, "step": 21925 }, { "epoch": 0.38112951728693356, "grad_norm": 1.1976297247332164, "learning_rate": 7.100789970621131e-07, "loss": 0.311, "step": 21926 }, { "epoch": 0.3811468998244364, "grad_norm": 1.6774996400548194, "learning_rate": 7.10053452586305e-07, "loss": 0.2977, "step": 21927 }, { "epoch": 0.3811642823619392, "grad_norm": 2.04197992602238, "learning_rate": 7.100279074447238e-07, "loss": 0.2642, "step": 21928 }, { "epoch": 0.381181664899442, "grad_norm": 0.9636259741541303, "learning_rate": 7.100023616374509e-07, "loss": 0.3682, "step": 21929 }, { "epoch": 0.3811990474369448, "grad_norm": 1.6204453772524139, "learning_rate": 7.099768151645668e-07, "loss": 0.3576, "step": 21930 }, { "epoch": 0.38121642997444766, "grad_norm": 0.9557468783470803, "learning_rate": 7.099512680261527e-07, "loss": 0.2041, "step": 21931 }, { "epoch": 0.3812338125119505, "grad_norm": 2.4813760312814117, "learning_rate": 7.099257202222894e-07, "loss": 0.3975, "step": 21932 }, { "epoch": 0.3812511950494533, "grad_norm": 2.371120164092832, "learning_rate": 7.099001717530582e-07, "loss": 0.4687, "step": 21933 }, { "epoch": 0.38126857758695615, "grad_norm": 3.6619900186328205, "learning_rate": 7.098746226185397e-07, "loss": 0.4473, "step": 21934 }, { "epoch": 0.381285960124459, "grad_norm": 1.6153925021098743, "learning_rate": 7.098490728188151e-07, "loss": 0.1572, "step": 21935 }, { "epoch": 0.3813033426619618, "grad_norm": 2.569953083507849, "learning_rate": 7.098235223539654e-07, "loss": 0.2794, "step": 21936 }, { "epoch": 0.38132072519946464, "grad_norm": 1.8464005149763523, "learning_rate": 7.097979712240716e-07, "loss": 0.2231, "step": 21937 }, { "epoch": 0.38133810773696747, "grad_norm": 1.5346381189596447, "learning_rate": 7.097724194292144e-07, "loss": 0.2953, "step": 21938 }, { "epoch": 0.38135549027447024, "grad_norm": 2.286483781539661, "learning_rate": 7.097468669694751e-07, "loss": 0.309, "step": 21939 }, { "epoch": 0.3813728728119731, "grad_norm": 1.4931338066583404, "learning_rate": 7.097213138449345e-07, "loss": 0.4028, "step": 21940 }, { "epoch": 0.3813902553494759, "grad_norm": 1.7811769367953887, "learning_rate": 7.096957600556738e-07, "loss": 0.4565, "step": 21941 }, { "epoch": 0.38140763788697873, "grad_norm": 1.5016597547062887, "learning_rate": 7.096702056017737e-07, "loss": 0.3426, "step": 21942 }, { "epoch": 0.38142502042448156, "grad_norm": 1.5637326044432205, "learning_rate": 7.096446504833155e-07, "loss": 0.4518, "step": 21943 }, { "epoch": 0.3814424029619844, "grad_norm": 1.7442559992208246, "learning_rate": 7.096190947003801e-07, "loss": 0.3688, "step": 21944 }, { "epoch": 0.3814597854994872, "grad_norm": 8.616351626607306, "learning_rate": 7.095935382530484e-07, "loss": 0.3373, "step": 21945 }, { "epoch": 0.38147716803699006, "grad_norm": 1.868233394666494, "learning_rate": 7.095679811414015e-07, "loss": 0.2469, "step": 21946 }, { "epoch": 0.3814945505744929, "grad_norm": 1.0209165751389158, "learning_rate": 7.095424233655203e-07, "loss": 0.214, "step": 21947 }, { "epoch": 0.3815119331119957, "grad_norm": 2.566752994543443, "learning_rate": 7.095168649254858e-07, "loss": 0.2545, "step": 21948 }, { "epoch": 0.3815293156494985, "grad_norm": 1.6969785285125176, "learning_rate": 7.094913058213791e-07, "loss": 0.2862, "step": 21949 }, { "epoch": 0.3815466981870013, "grad_norm": 3.879127729213847, "learning_rate": 7.094657460532813e-07, "loss": 0.3798, "step": 21950 }, { "epoch": 0.38156408072450415, "grad_norm": 1.4300315374894832, "learning_rate": 7.094401856212734e-07, "loss": 0.4869, "step": 21951 }, { "epoch": 0.381581463262007, "grad_norm": 1.5177040476734147, "learning_rate": 7.094146245254362e-07, "loss": 0.3126, "step": 21952 }, { "epoch": 0.3815988457995098, "grad_norm": 1.2428199226129029, "learning_rate": 7.093890627658507e-07, "loss": 0.3516, "step": 21953 }, { "epoch": 0.38161622833701264, "grad_norm": 1.322176452706473, "learning_rate": 7.093635003425981e-07, "loss": 0.3549, "step": 21954 }, { "epoch": 0.38163361087451547, "grad_norm": 3.2775226915734614, "learning_rate": 7.093379372557596e-07, "loss": 0.4062, "step": 21955 }, { "epoch": 0.3816509934120183, "grad_norm": 1.9781638993150605, "learning_rate": 7.093123735054159e-07, "loss": 0.3661, "step": 21956 }, { "epoch": 0.38166837594952113, "grad_norm": 1.8622499889344128, "learning_rate": 7.09286809091648e-07, "loss": 0.165, "step": 21957 }, { "epoch": 0.38168575848702396, "grad_norm": 4.65279614897631, "learning_rate": 7.092612440145371e-07, "loss": 0.4817, "step": 21958 }, { "epoch": 0.38170314102452674, "grad_norm": 2.262581029034686, "learning_rate": 7.092356782741643e-07, "loss": 0.3028, "step": 21959 }, { "epoch": 0.38172052356202957, "grad_norm": 1.4627982947824645, "learning_rate": 7.092101118706105e-07, "loss": 0.3006, "step": 21960 }, { "epoch": 0.3817379060995324, "grad_norm": 1.9060951535112487, "learning_rate": 7.091845448039566e-07, "loss": 0.2975, "step": 21961 }, { "epoch": 0.38175528863703523, "grad_norm": 2.060500760770523, "learning_rate": 7.09158977074284e-07, "loss": 0.4128, "step": 21962 }, { "epoch": 0.38177267117453806, "grad_norm": 1.4186598793236533, "learning_rate": 7.091334086816733e-07, "loss": 0.2757, "step": 21963 }, { "epoch": 0.3817900537120409, "grad_norm": 1.5412815614600843, "learning_rate": 7.091078396262059e-07, "loss": 0.3476, "step": 21964 }, { "epoch": 0.3818074362495437, "grad_norm": 1.4609929107530077, "learning_rate": 7.090822699079627e-07, "loss": 0.2477, "step": 21965 }, { "epoch": 0.38182481878704655, "grad_norm": 2.031560896203684, "learning_rate": 7.090566995270247e-07, "loss": 0.3215, "step": 21966 }, { "epoch": 0.3818422013245494, "grad_norm": 1.122458440363512, "learning_rate": 7.09031128483473e-07, "loss": 0.3544, "step": 21967 }, { "epoch": 0.3818595838620522, "grad_norm": 3.5469958574753786, "learning_rate": 7.090055567773887e-07, "loss": 0.3367, "step": 21968 }, { "epoch": 0.381876966399555, "grad_norm": 1.3385571145505144, "learning_rate": 7.089799844088528e-07, "loss": 0.3205, "step": 21969 }, { "epoch": 0.3818943489370578, "grad_norm": 3.306723301148323, "learning_rate": 7.089544113779462e-07, "loss": 0.3703, "step": 21970 }, { "epoch": 0.38191173147456065, "grad_norm": 1.2330556725345574, "learning_rate": 7.089288376847502e-07, "loss": 0.2032, "step": 21971 }, { "epoch": 0.3819291140120635, "grad_norm": 1.9338657058749438, "learning_rate": 7.089032633293458e-07, "loss": 0.3819, "step": 21972 }, { "epoch": 0.3819464965495663, "grad_norm": 1.7526781117858148, "learning_rate": 7.08877688311814e-07, "loss": 0.2498, "step": 21973 }, { "epoch": 0.38196387908706914, "grad_norm": 1.3633682383101806, "learning_rate": 7.088521126322358e-07, "loss": 0.1372, "step": 21974 }, { "epoch": 0.38198126162457197, "grad_norm": 1.4162361818558067, "learning_rate": 7.088265362906924e-07, "loss": 0.2808, "step": 21975 }, { "epoch": 0.3819986441620748, "grad_norm": 1.7358504598287536, "learning_rate": 7.088009592872649e-07, "loss": 0.3665, "step": 21976 }, { "epoch": 0.3820160266995776, "grad_norm": 1.5880555345144676, "learning_rate": 7.08775381622034e-07, "loss": 0.2106, "step": 21977 }, { "epoch": 0.3820334092370804, "grad_norm": 1.6726537233107437, "learning_rate": 7.087498032950813e-07, "loss": 0.2823, "step": 21978 }, { "epoch": 0.38205079177458323, "grad_norm": 1.2255129903438502, "learning_rate": 7.087242243064875e-07, "loss": 0.335, "step": 21979 }, { "epoch": 0.38206817431208606, "grad_norm": 1.4463240402544522, "learning_rate": 7.086986446563339e-07, "loss": 0.2565, "step": 21980 }, { "epoch": 0.3820855568495889, "grad_norm": 1.5588882162246038, "learning_rate": 7.086730643447014e-07, "loss": 0.2539, "step": 21981 }, { "epoch": 0.3821029393870917, "grad_norm": 2.296250042282756, "learning_rate": 7.08647483371671e-07, "loss": 0.3073, "step": 21982 }, { "epoch": 0.38212032192459455, "grad_norm": 1.3331043411890864, "learning_rate": 7.086219017373241e-07, "loss": 0.3066, "step": 21983 }, { "epoch": 0.3821377044620974, "grad_norm": 1.8893027411085357, "learning_rate": 7.085963194417416e-07, "loss": 0.3616, "step": 21984 }, { "epoch": 0.3821550869996002, "grad_norm": 1.190234419885498, "learning_rate": 7.085707364850045e-07, "loss": 0.3424, "step": 21985 }, { "epoch": 0.38217246953710304, "grad_norm": 1.633056461083445, "learning_rate": 7.08545152867194e-07, "loss": 0.2354, "step": 21986 }, { "epoch": 0.3821898520746059, "grad_norm": 1.7055621817982207, "learning_rate": 7.085195685883913e-07, "loss": 0.3149, "step": 21987 }, { "epoch": 0.38220723461210865, "grad_norm": 1.6211282614674087, "learning_rate": 7.084939836486772e-07, "loss": 0.3536, "step": 21988 }, { "epoch": 0.3822246171496115, "grad_norm": 1.4668528016080757, "learning_rate": 7.08468398048133e-07, "loss": 0.2473, "step": 21989 }, { "epoch": 0.3822419996871143, "grad_norm": 2.5777071461497862, "learning_rate": 7.084428117868398e-07, "loss": 0.2709, "step": 21990 }, { "epoch": 0.38225938222461714, "grad_norm": 2.305302953912681, "learning_rate": 7.084172248648785e-07, "loss": 0.3128, "step": 21991 }, { "epoch": 0.38227676476211997, "grad_norm": 6.306099938471412, "learning_rate": 7.083916372823303e-07, "loss": 0.5431, "step": 21992 }, { "epoch": 0.3822941472996228, "grad_norm": 1.468314388761878, "learning_rate": 7.083660490392764e-07, "loss": 0.3546, "step": 21993 }, { "epoch": 0.38231152983712563, "grad_norm": 1.735562164897764, "learning_rate": 7.08340460135798e-07, "loss": 0.3472, "step": 21994 }, { "epoch": 0.38232891237462846, "grad_norm": 2.2877016658148746, "learning_rate": 7.083148705719759e-07, "loss": 0.3178, "step": 21995 }, { "epoch": 0.3823462949121313, "grad_norm": 1.5905583657585103, "learning_rate": 7.082892803478913e-07, "loss": 0.2095, "step": 21996 }, { "epoch": 0.3823636774496341, "grad_norm": 1.6178082202252981, "learning_rate": 7.082636894636256e-07, "loss": 0.2397, "step": 21997 }, { "epoch": 0.3823810599871369, "grad_norm": 2.011606467920473, "learning_rate": 7.082380979192594e-07, "loss": 0.3057, "step": 21998 }, { "epoch": 0.3823984425246397, "grad_norm": 1.767548119195186, "learning_rate": 7.082125057148742e-07, "loss": 0.307, "step": 21999 }, { "epoch": 0.38241582506214256, "grad_norm": 1.9449554607456738, "learning_rate": 7.081869128505511e-07, "loss": 0.1881, "step": 22000 }, { "epoch": 0.3824332075996454, "grad_norm": 2.443155873661491, "learning_rate": 7.081613193263709e-07, "loss": 0.2378, "step": 22001 }, { "epoch": 0.3824505901371482, "grad_norm": 1.3168482234617307, "learning_rate": 7.081357251424151e-07, "loss": 0.3018, "step": 22002 }, { "epoch": 0.38246797267465105, "grad_norm": 1.8480295870286427, "learning_rate": 7.081101302987647e-07, "loss": 0.2286, "step": 22003 }, { "epoch": 0.3824853552121539, "grad_norm": 3.750740110455405, "learning_rate": 7.080845347955006e-07, "loss": 0.2122, "step": 22004 }, { "epoch": 0.3825027377496567, "grad_norm": 1.0529137960717876, "learning_rate": 7.080589386327042e-07, "loss": 0.3391, "step": 22005 }, { "epoch": 0.38252012028715954, "grad_norm": 2.6787802841146386, "learning_rate": 7.080333418104565e-07, "loss": 0.4403, "step": 22006 }, { "epoch": 0.38253750282466237, "grad_norm": 1.4327104018430041, "learning_rate": 7.080077443288388e-07, "loss": 0.2995, "step": 22007 }, { "epoch": 0.38255488536216514, "grad_norm": 1.9006304189884116, "learning_rate": 7.07982146187932e-07, "loss": 0.2, "step": 22008 }, { "epoch": 0.382572267899668, "grad_norm": 1.6975864316078741, "learning_rate": 7.079565473878174e-07, "loss": 0.2829, "step": 22009 }, { "epoch": 0.3825896504371708, "grad_norm": 1.531666174412261, "learning_rate": 7.07930947928576e-07, "loss": 0.1807, "step": 22010 }, { "epoch": 0.38260703297467363, "grad_norm": 1.7081696223340321, "learning_rate": 7.07905347810289e-07, "loss": 0.2179, "step": 22011 }, { "epoch": 0.38262441551217646, "grad_norm": 1.1607347992583388, "learning_rate": 7.078797470330377e-07, "loss": 0.2303, "step": 22012 }, { "epoch": 0.3826417980496793, "grad_norm": 1.5223006981497387, "learning_rate": 7.078541455969029e-07, "loss": 0.2908, "step": 22013 }, { "epoch": 0.3826591805871821, "grad_norm": 1.6101305658643292, "learning_rate": 7.07828543501966e-07, "loss": 0.2856, "step": 22014 }, { "epoch": 0.38267656312468495, "grad_norm": 3.42330600582876, "learning_rate": 7.078029407483081e-07, "loss": 0.5115, "step": 22015 }, { "epoch": 0.3826939456621878, "grad_norm": 2.635353451471561, "learning_rate": 7.077773373360104e-07, "loss": 0.2984, "step": 22016 }, { "epoch": 0.3827113281996906, "grad_norm": 0.9805292403824537, "learning_rate": 7.077517332651539e-07, "loss": 0.3588, "step": 22017 }, { "epoch": 0.3827287107371934, "grad_norm": 1.3884729212046847, "learning_rate": 7.0772612853582e-07, "loss": 0.2716, "step": 22018 }, { "epoch": 0.3827460932746962, "grad_norm": 2.1512056459520914, "learning_rate": 7.077005231480894e-07, "loss": 0.2205, "step": 22019 }, { "epoch": 0.38276347581219905, "grad_norm": 1.8985520554434967, "learning_rate": 7.076749171020437e-07, "loss": 0.3296, "step": 22020 }, { "epoch": 0.3827808583497019, "grad_norm": 1.286476258309686, "learning_rate": 7.07649310397764e-07, "loss": 0.3297, "step": 22021 }, { "epoch": 0.3827982408872047, "grad_norm": 2.028451182675732, "learning_rate": 7.076237030353314e-07, "loss": 0.305, "step": 22022 }, { "epoch": 0.38281562342470754, "grad_norm": 1.984409061051083, "learning_rate": 7.075980950148268e-07, "loss": 0.5998, "step": 22023 }, { "epoch": 0.38283300596221037, "grad_norm": 2.4875469695203103, "learning_rate": 7.075724863363318e-07, "loss": 0.2691, "step": 22024 }, { "epoch": 0.3828503884997132, "grad_norm": 0.6800474242628466, "learning_rate": 7.075468769999274e-07, "loss": 0.2765, "step": 22025 }, { "epoch": 0.38286777103721603, "grad_norm": 1.3266241134519285, "learning_rate": 7.075212670056946e-07, "loss": 0.2823, "step": 22026 }, { "epoch": 0.38288515357471886, "grad_norm": 1.767814395099273, "learning_rate": 7.074956563537146e-07, "loss": 0.2976, "step": 22027 }, { "epoch": 0.38290253611222164, "grad_norm": 2.2146215191268706, "learning_rate": 7.074700450440691e-07, "loss": 0.5956, "step": 22028 }, { "epoch": 0.38291991864972447, "grad_norm": 3.11022901660445, "learning_rate": 7.074444330768384e-07, "loss": 0.5878, "step": 22029 }, { "epoch": 0.3829373011872273, "grad_norm": 1.1876495022683426, "learning_rate": 7.074188204521044e-07, "loss": 0.2882, "step": 22030 }, { "epoch": 0.3829546837247301, "grad_norm": 1.4469055815926548, "learning_rate": 7.07393207169948e-07, "loss": 0.2036, "step": 22031 }, { "epoch": 0.38297206626223296, "grad_norm": 1.5597672101163185, "learning_rate": 7.073675932304504e-07, "loss": 0.2956, "step": 22032 }, { "epoch": 0.3829894487997358, "grad_norm": 1.303344018091465, "learning_rate": 7.073419786336926e-07, "loss": 0.216, "step": 22033 }, { "epoch": 0.3830068313372386, "grad_norm": 1.3558058371371364, "learning_rate": 7.073163633797562e-07, "loss": 0.3123, "step": 22034 }, { "epoch": 0.38302421387474145, "grad_norm": 1.5625281180821864, "learning_rate": 7.072907474687221e-07, "loss": 0.2141, "step": 22035 }, { "epoch": 0.3830415964122443, "grad_norm": 2.6331199167612285, "learning_rate": 7.072651309006715e-07, "loss": 0.2546, "step": 22036 }, { "epoch": 0.3830589789497471, "grad_norm": 2.259862083507905, "learning_rate": 7.072395136756857e-07, "loss": 0.4225, "step": 22037 }, { "epoch": 0.3830763614872499, "grad_norm": 3.2865400373013203, "learning_rate": 7.072138957938458e-07, "loss": 0.5249, "step": 22038 }, { "epoch": 0.3830937440247527, "grad_norm": 1.444140528174094, "learning_rate": 7.071882772552331e-07, "loss": 0.3706, "step": 22039 }, { "epoch": 0.38311112656225554, "grad_norm": 1.006621554472835, "learning_rate": 7.071626580599287e-07, "loss": 0.1681, "step": 22040 }, { "epoch": 0.3831285090997584, "grad_norm": 1.6494732753264125, "learning_rate": 7.071370382080139e-07, "loss": 0.255, "step": 22041 }, { "epoch": 0.3831458916372612, "grad_norm": 1.7411710358977959, "learning_rate": 7.071114176995696e-07, "loss": 0.337, "step": 22042 }, { "epoch": 0.38316327417476403, "grad_norm": 1.4871544566919317, "learning_rate": 7.070857965346775e-07, "loss": 0.3139, "step": 22043 }, { "epoch": 0.38318065671226686, "grad_norm": 2.3050373437829994, "learning_rate": 7.070601747134184e-07, "loss": 0.409, "step": 22044 }, { "epoch": 0.3831980392497697, "grad_norm": 2.9121706453410083, "learning_rate": 7.070345522358738e-07, "loss": 0.4834, "step": 22045 }, { "epoch": 0.3832154217872725, "grad_norm": 1.453075529813246, "learning_rate": 7.070089291021246e-07, "loss": 0.3458, "step": 22046 }, { "epoch": 0.38323280432477536, "grad_norm": 1.9738447296071018, "learning_rate": 7.069833053122525e-07, "loss": 0.3337, "step": 22047 }, { "epoch": 0.38325018686227813, "grad_norm": 1.8942971158663404, "learning_rate": 7.06957680866338e-07, "loss": 0.3486, "step": 22048 }, { "epoch": 0.38326756939978096, "grad_norm": 2.8500893289777824, "learning_rate": 7.069320557644629e-07, "loss": 0.3159, "step": 22049 }, { "epoch": 0.3832849519372838, "grad_norm": 1.2726097429836316, "learning_rate": 7.069064300067084e-07, "loss": 0.2512, "step": 22050 }, { "epoch": 0.3833023344747866, "grad_norm": 1.5177948738839768, "learning_rate": 7.068808035931554e-07, "loss": 0.2762, "step": 22051 }, { "epoch": 0.38331971701228945, "grad_norm": 1.4927427174326724, "learning_rate": 7.068551765238851e-07, "loss": 0.2121, "step": 22052 }, { "epoch": 0.3833370995497923, "grad_norm": 1.2595640897218479, "learning_rate": 7.068295487989793e-07, "loss": 0.306, "step": 22053 }, { "epoch": 0.3833544820872951, "grad_norm": 1.452750327472052, "learning_rate": 7.068039204185185e-07, "loss": 0.2976, "step": 22054 }, { "epoch": 0.38337186462479794, "grad_norm": 1.702730963224338, "learning_rate": 7.067782913825845e-07, "loss": 0.4609, "step": 22055 }, { "epoch": 0.3833892471623008, "grad_norm": 1.6147399941527487, "learning_rate": 7.067526616912582e-07, "loss": 0.305, "step": 22056 }, { "epoch": 0.3834066296998036, "grad_norm": 1.0182741106898396, "learning_rate": 7.067270313446211e-07, "loss": 0.2643, "step": 22057 }, { "epoch": 0.3834240122373064, "grad_norm": 3.7632703381926365, "learning_rate": 7.06701400342754e-07, "loss": 0.3706, "step": 22058 }, { "epoch": 0.3834413947748092, "grad_norm": 1.186803992725805, "learning_rate": 7.066757686857385e-07, "loss": 0.3003, "step": 22059 }, { "epoch": 0.38345877731231204, "grad_norm": 1.469899117614202, "learning_rate": 7.066501363736559e-07, "loss": 0.3966, "step": 22060 }, { "epoch": 0.38347615984981487, "grad_norm": 1.8733847888498876, "learning_rate": 7.066245034065872e-07, "loss": 0.2131, "step": 22061 }, { "epoch": 0.3834935423873177, "grad_norm": 1.05339943125225, "learning_rate": 7.065988697846137e-07, "loss": 0.2615, "step": 22062 }, { "epoch": 0.38351092492482053, "grad_norm": 1.5458828930422712, "learning_rate": 7.065732355078166e-07, "loss": 0.2276, "step": 22063 }, { "epoch": 0.38352830746232336, "grad_norm": 3.1416775799393455, "learning_rate": 7.065476005762775e-07, "loss": 0.4933, "step": 22064 }, { "epoch": 0.3835456899998262, "grad_norm": 1.3968976779812523, "learning_rate": 7.065219649900772e-07, "loss": 0.4533, "step": 22065 }, { "epoch": 0.383563072537329, "grad_norm": 1.1874387220790736, "learning_rate": 7.064963287492973e-07, "loss": 0.2686, "step": 22066 }, { "epoch": 0.38358045507483185, "grad_norm": 1.563812738499157, "learning_rate": 7.064706918540187e-07, "loss": 0.2894, "step": 22067 }, { "epoch": 0.3835978376123346, "grad_norm": 1.3801969958340452, "learning_rate": 7.064450543043229e-07, "loss": 0.3209, "step": 22068 }, { "epoch": 0.38361522014983745, "grad_norm": 2.238131100305057, "learning_rate": 7.064194161002911e-07, "loss": 0.4042, "step": 22069 }, { "epoch": 0.3836326026873403, "grad_norm": 1.6935421112096143, "learning_rate": 7.063937772420046e-07, "loss": 0.3364, "step": 22070 }, { "epoch": 0.3836499852248431, "grad_norm": 1.9781402458265018, "learning_rate": 7.063681377295447e-07, "loss": 0.4072, "step": 22071 }, { "epoch": 0.38366736776234595, "grad_norm": 1.1969125074627267, "learning_rate": 7.063424975629925e-07, "loss": 0.3018, "step": 22072 }, { "epoch": 0.3836847502998488, "grad_norm": 1.7647503361241226, "learning_rate": 7.063168567424294e-07, "loss": 0.2625, "step": 22073 }, { "epoch": 0.3837021328373516, "grad_norm": 2.971513552145639, "learning_rate": 7.062912152679367e-07, "loss": 0.1954, "step": 22074 }, { "epoch": 0.38371951537485444, "grad_norm": 1.2436871167171974, "learning_rate": 7.062655731395954e-07, "loss": 0.4437, "step": 22075 }, { "epoch": 0.38373689791235727, "grad_norm": 2.3076661843831276, "learning_rate": 7.062399303574873e-07, "loss": 0.3007, "step": 22076 }, { "epoch": 0.3837542804498601, "grad_norm": 1.755485101585612, "learning_rate": 7.062142869216931e-07, "loss": 0.3015, "step": 22077 }, { "epoch": 0.38377166298736287, "grad_norm": 3.0824096496633864, "learning_rate": 7.061886428322945e-07, "loss": 0.5276, "step": 22078 }, { "epoch": 0.3837890455248657, "grad_norm": 1.4557596376674131, "learning_rate": 7.061629980893726e-07, "loss": 0.1832, "step": 22079 }, { "epoch": 0.38380642806236853, "grad_norm": 0.9810374897932215, "learning_rate": 7.061373526930088e-07, "loss": 0.2824, "step": 22080 }, { "epoch": 0.38382381059987136, "grad_norm": 1.3014524074952714, "learning_rate": 7.06111706643284e-07, "loss": 0.4321, "step": 22081 }, { "epoch": 0.3838411931373742, "grad_norm": 4.269108120595503, "learning_rate": 7.0608605994028e-07, "loss": 0.3406, "step": 22082 }, { "epoch": 0.383858575674877, "grad_norm": 1.3955628141701595, "learning_rate": 7.060604125840777e-07, "loss": 0.3216, "step": 22083 }, { "epoch": 0.38387595821237985, "grad_norm": 2.053254332720304, "learning_rate": 7.060347645747587e-07, "loss": 0.4525, "step": 22084 }, { "epoch": 0.3838933407498827, "grad_norm": 2.404723886615577, "learning_rate": 7.06009115912404e-07, "loss": 0.563, "step": 22085 }, { "epoch": 0.3839107232873855, "grad_norm": 2.6912577299385356, "learning_rate": 7.059834665970952e-07, "loss": 0.4308, "step": 22086 }, { "epoch": 0.38392810582488834, "grad_norm": 1.8577138069921064, "learning_rate": 7.059578166289131e-07, "loss": 0.3653, "step": 22087 }, { "epoch": 0.3839454883623911, "grad_norm": 1.2773197943300298, "learning_rate": 7.059321660079398e-07, "loss": 0.208, "step": 22088 }, { "epoch": 0.38396287089989395, "grad_norm": 2.5221445933869733, "learning_rate": 7.059065147342558e-07, "loss": 0.2917, "step": 22089 }, { "epoch": 0.3839802534373968, "grad_norm": 2.0961844829698615, "learning_rate": 7.058808628079429e-07, "loss": 0.3988, "step": 22090 }, { "epoch": 0.3839976359748996, "grad_norm": 1.3932875130401094, "learning_rate": 7.058552102290821e-07, "loss": 0.2397, "step": 22091 }, { "epoch": 0.38401501851240244, "grad_norm": 1.7293289758351178, "learning_rate": 7.058295569977549e-07, "loss": 0.5384, "step": 22092 }, { "epoch": 0.38403240104990527, "grad_norm": 2.965906052477317, "learning_rate": 7.058039031140426e-07, "loss": 0.4505, "step": 22093 }, { "epoch": 0.3840497835874081, "grad_norm": 2.428315815820465, "learning_rate": 7.057782485780264e-07, "loss": 0.3204, "step": 22094 }, { "epoch": 0.38406716612491093, "grad_norm": 1.2136069218537782, "learning_rate": 7.057525933897878e-07, "loss": 0.2066, "step": 22095 }, { "epoch": 0.38408454866241376, "grad_norm": 1.8971325784777862, "learning_rate": 7.057269375494079e-07, "loss": 0.2796, "step": 22096 }, { "epoch": 0.3841019311999166, "grad_norm": 1.3092474213814798, "learning_rate": 7.05701281056968e-07, "loss": 0.3544, "step": 22097 }, { "epoch": 0.38411931373741937, "grad_norm": 1.3242998197411422, "learning_rate": 7.056756239125496e-07, "loss": 0.3708, "step": 22098 }, { "epoch": 0.3841366962749222, "grad_norm": 2.311619454947979, "learning_rate": 7.05649966116234e-07, "loss": 0.3061, "step": 22099 }, { "epoch": 0.384154078812425, "grad_norm": 21.283887412377105, "learning_rate": 7.056243076681026e-07, "loss": 1.3702, "step": 22100 }, { "epoch": 0.38417146134992786, "grad_norm": 2.120899178486373, "learning_rate": 7.055986485682364e-07, "loss": 0.5276, "step": 22101 }, { "epoch": 0.3841888438874307, "grad_norm": 2.0926692832324267, "learning_rate": 7.05572988816717e-07, "loss": 0.4178, "step": 22102 }, { "epoch": 0.3842062264249335, "grad_norm": 2.9475911904717886, "learning_rate": 7.055473284136257e-07, "loss": 0.3277, "step": 22103 }, { "epoch": 0.38422360896243635, "grad_norm": 4.850521775700734, "learning_rate": 7.055216673590437e-07, "loss": 0.3516, "step": 22104 }, { "epoch": 0.3842409914999392, "grad_norm": 1.3667843387809908, "learning_rate": 7.054960056530525e-07, "loss": 0.2632, "step": 22105 }, { "epoch": 0.384258374037442, "grad_norm": 1.0940130742712781, "learning_rate": 7.054703432957332e-07, "loss": 0.2146, "step": 22106 }, { "epoch": 0.38427575657494484, "grad_norm": 1.692351152879455, "learning_rate": 7.054446802871676e-07, "loss": 0.3043, "step": 22107 }, { "epoch": 0.3842931391124476, "grad_norm": 1.7766240107934306, "learning_rate": 7.054190166274365e-07, "loss": 0.4351, "step": 22108 }, { "epoch": 0.38431052164995044, "grad_norm": 1.3861923065375865, "learning_rate": 7.053933523166215e-07, "loss": 0.4498, "step": 22109 }, { "epoch": 0.3843279041874533, "grad_norm": 1.330332986933902, "learning_rate": 7.05367687354804e-07, "loss": 0.2961, "step": 22110 }, { "epoch": 0.3843452867249561, "grad_norm": 2.0287383236211003, "learning_rate": 7.053420217420654e-07, "loss": 0.4057, "step": 22111 }, { "epoch": 0.38436266926245893, "grad_norm": 1.677215523479279, "learning_rate": 7.053163554784867e-07, "loss": 0.2169, "step": 22112 }, { "epoch": 0.38438005179996176, "grad_norm": 1.7397534649201631, "learning_rate": 7.052906885641494e-07, "loss": 0.3565, "step": 22113 }, { "epoch": 0.3843974343374646, "grad_norm": 1.7412703545784884, "learning_rate": 7.052650209991351e-07, "loss": 0.2366, "step": 22114 }, { "epoch": 0.3844148168749674, "grad_norm": 1.1011547182336618, "learning_rate": 7.05239352783525e-07, "loss": 0.2557, "step": 22115 }, { "epoch": 0.38443219941247025, "grad_norm": 1.7610696361125586, "learning_rate": 7.052136839174002e-07, "loss": 0.5247, "step": 22116 }, { "epoch": 0.38444958194997303, "grad_norm": 1.8509811539361087, "learning_rate": 7.051880144008426e-07, "loss": 0.4318, "step": 22117 }, { "epoch": 0.38446696448747586, "grad_norm": 1.4750194726986279, "learning_rate": 7.05162344233933e-07, "loss": 0.1488, "step": 22118 }, { "epoch": 0.3844843470249787, "grad_norm": 1.5768292304950944, "learning_rate": 7.051366734167531e-07, "loss": 0.3898, "step": 22119 }, { "epoch": 0.3845017295624815, "grad_norm": 1.6686804821566132, "learning_rate": 7.051110019493842e-07, "loss": 0.3481, "step": 22120 }, { "epoch": 0.38451911209998435, "grad_norm": 2.0702609461988684, "learning_rate": 7.050853298319078e-07, "loss": 0.2496, "step": 22121 }, { "epoch": 0.3845364946374872, "grad_norm": 1.6048549515815285, "learning_rate": 7.050596570644047e-07, "loss": 0.2755, "step": 22122 }, { "epoch": 0.38455387717499, "grad_norm": 2.1672722779512688, "learning_rate": 7.050339836469569e-07, "loss": 0.4685, "step": 22123 }, { "epoch": 0.38457125971249284, "grad_norm": 1.6513342417737076, "learning_rate": 7.050083095796456e-07, "loss": 0.2156, "step": 22124 }, { "epoch": 0.38458864224999567, "grad_norm": 2.15257257757046, "learning_rate": 7.049826348625522e-07, "loss": 0.4288, "step": 22125 }, { "epoch": 0.3846060247874985, "grad_norm": 2.4534382509737105, "learning_rate": 7.049569594957579e-07, "loss": 0.3196, "step": 22126 }, { "epoch": 0.3846234073250013, "grad_norm": 1.5430776025484032, "learning_rate": 7.049312834793442e-07, "loss": 0.476, "step": 22127 }, { "epoch": 0.3846407898625041, "grad_norm": 1.2656709826780206, "learning_rate": 7.049056068133924e-07, "loss": 0.2553, "step": 22128 }, { "epoch": 0.38465817240000694, "grad_norm": 1.4184785636129955, "learning_rate": 7.048799294979841e-07, "loss": 0.1708, "step": 22129 }, { "epoch": 0.38467555493750977, "grad_norm": 2.6224003737790755, "learning_rate": 7.048542515332004e-07, "loss": 0.2359, "step": 22130 }, { "epoch": 0.3846929374750126, "grad_norm": 1.6513408029542727, "learning_rate": 7.048285729191229e-07, "loss": 0.5445, "step": 22131 }, { "epoch": 0.3847103200125154, "grad_norm": 1.9224612079582615, "learning_rate": 7.04802893655833e-07, "loss": 0.3413, "step": 22132 }, { "epoch": 0.38472770255001826, "grad_norm": 2.1414619230424234, "learning_rate": 7.047772137434119e-07, "loss": 0.2778, "step": 22133 }, { "epoch": 0.3847450850875211, "grad_norm": 2.44778851547847, "learning_rate": 7.04751533181941e-07, "loss": 0.3289, "step": 22134 }, { "epoch": 0.3847624676250239, "grad_norm": 2.5440927554985984, "learning_rate": 7.047258519715018e-07, "loss": 0.4617, "step": 22135 }, { "epoch": 0.38477985016252675, "grad_norm": 2.007985449205867, "learning_rate": 7.047001701121759e-07, "loss": 0.3489, "step": 22136 }, { "epoch": 0.3847972327000295, "grad_norm": 1.530434502449081, "learning_rate": 7.046744876040444e-07, "loss": 0.1914, "step": 22137 }, { "epoch": 0.38481461523753235, "grad_norm": 1.4947987847834414, "learning_rate": 7.046488044471888e-07, "loss": 0.3081, "step": 22138 }, { "epoch": 0.3848319977750352, "grad_norm": 2.17166612540297, "learning_rate": 7.046231206416905e-07, "loss": 0.4225, "step": 22139 }, { "epoch": 0.384849380312538, "grad_norm": 2.956480281185174, "learning_rate": 7.045974361876309e-07, "loss": 0.2744, "step": 22140 }, { "epoch": 0.38486676285004084, "grad_norm": 1.0355877064908314, "learning_rate": 7.045717510850913e-07, "loss": 0.3029, "step": 22141 }, { "epoch": 0.3848841453875437, "grad_norm": 1.9573142042978027, "learning_rate": 7.045460653341536e-07, "loss": 0.4552, "step": 22142 }, { "epoch": 0.3849015279250465, "grad_norm": 2.481787344274389, "learning_rate": 7.045203789348983e-07, "loss": 0.3894, "step": 22143 }, { "epoch": 0.38491891046254934, "grad_norm": 1.8288678594489098, "learning_rate": 7.044946918874076e-07, "loss": 0.3719, "step": 22144 }, { "epoch": 0.38493629300005217, "grad_norm": 1.6572013536845658, "learning_rate": 7.044690041917626e-07, "loss": 0.4395, "step": 22145 }, { "epoch": 0.384953675537555, "grad_norm": 1.5810471379162723, "learning_rate": 7.044433158480449e-07, "loss": 0.2645, "step": 22146 }, { "epoch": 0.38497105807505777, "grad_norm": 1.7488413217916168, "learning_rate": 7.044176268563358e-07, "loss": 0.2528, "step": 22147 }, { "epoch": 0.3849884406125606, "grad_norm": 0.9098839134151157, "learning_rate": 7.043919372167167e-07, "loss": 0.2618, "step": 22148 }, { "epoch": 0.38500582315006343, "grad_norm": 1.11946840481085, "learning_rate": 7.043662469292691e-07, "loss": 0.3259, "step": 22149 }, { "epoch": 0.38502320568756626, "grad_norm": 1.759444498928195, "learning_rate": 7.043405559940742e-07, "loss": 0.3717, "step": 22150 }, { "epoch": 0.3850405882250691, "grad_norm": 1.6956010963944, "learning_rate": 7.043148644112136e-07, "loss": 0.6236, "step": 22151 }, { "epoch": 0.3850579707625719, "grad_norm": 2.7745500996109924, "learning_rate": 7.042891721807689e-07, "loss": 0.3575, "step": 22152 }, { "epoch": 0.38507535330007475, "grad_norm": 1.1494929864536672, "learning_rate": 7.042634793028212e-07, "loss": 0.292, "step": 22153 }, { "epoch": 0.3850927358375776, "grad_norm": 1.3005774390145075, "learning_rate": 7.042377857774523e-07, "loss": 0.2323, "step": 22154 }, { "epoch": 0.3851101183750804, "grad_norm": 1.2554965908657585, "learning_rate": 7.042120916047434e-07, "loss": 0.1969, "step": 22155 }, { "epoch": 0.38512750091258324, "grad_norm": 1.09727782430414, "learning_rate": 7.041863967847759e-07, "loss": 0.3123, "step": 22156 }, { "epoch": 0.385144883450086, "grad_norm": 1.486592824603413, "learning_rate": 7.041607013176312e-07, "loss": 0.3289, "step": 22157 }, { "epoch": 0.38516226598758885, "grad_norm": 1.585617542989075, "learning_rate": 7.041350052033911e-07, "loss": 0.3179, "step": 22158 }, { "epoch": 0.3851796485250917, "grad_norm": 1.039081211384107, "learning_rate": 7.041093084421366e-07, "loss": 0.2325, "step": 22159 }, { "epoch": 0.3851970310625945, "grad_norm": 2.7134328998887685, "learning_rate": 7.040836110339494e-07, "loss": 0.2792, "step": 22160 }, { "epoch": 0.38521441360009734, "grad_norm": 1.764581480852271, "learning_rate": 7.040579129789112e-07, "loss": 0.3766, "step": 22161 }, { "epoch": 0.38523179613760017, "grad_norm": 3.712525423165001, "learning_rate": 7.040322142771028e-07, "loss": 0.3642, "step": 22162 }, { "epoch": 0.385249178675103, "grad_norm": 1.3747555101746087, "learning_rate": 7.040065149286061e-07, "loss": 0.2549, "step": 22163 }, { "epoch": 0.38526656121260583, "grad_norm": 1.23842818989873, "learning_rate": 7.039808149335024e-07, "loss": 0.2604, "step": 22164 }, { "epoch": 0.38528394375010866, "grad_norm": 1.848702225312972, "learning_rate": 7.039551142918733e-07, "loss": 0.5696, "step": 22165 }, { "epoch": 0.3853013262876115, "grad_norm": 1.1314642933869867, "learning_rate": 7.039294130038002e-07, "loss": 0.1252, "step": 22166 }, { "epoch": 0.38531870882511426, "grad_norm": 1.9794710807432543, "learning_rate": 7.039037110693647e-07, "loss": 0.4139, "step": 22167 }, { "epoch": 0.3853360913626171, "grad_norm": 1.61260204964658, "learning_rate": 7.038780084886478e-07, "loss": 0.2815, "step": 22168 }, { "epoch": 0.3853534739001199, "grad_norm": 1.7644577390644, "learning_rate": 7.038523052617313e-07, "loss": 0.2077, "step": 22169 }, { "epoch": 0.38537085643762276, "grad_norm": 1.535484492739695, "learning_rate": 7.038266013886967e-07, "loss": 0.4927, "step": 22170 }, { "epoch": 0.3853882389751256, "grad_norm": 0.949750754012915, "learning_rate": 7.038008968696255e-07, "loss": 0.2292, "step": 22171 }, { "epoch": 0.3854056215126284, "grad_norm": 2.2715584258098045, "learning_rate": 7.037751917045989e-07, "loss": 0.2224, "step": 22172 }, { "epoch": 0.38542300405013125, "grad_norm": 1.262689192357741, "learning_rate": 7.037494858936987e-07, "loss": 0.2508, "step": 22173 }, { "epoch": 0.3854403865876341, "grad_norm": 1.9752128150840864, "learning_rate": 7.037237794370061e-07, "loss": 0.4494, "step": 22174 }, { "epoch": 0.3854577691251369, "grad_norm": 0.9016990860138493, "learning_rate": 7.036980723346029e-07, "loss": 0.212, "step": 22175 }, { "epoch": 0.38547515166263974, "grad_norm": 0.9972314275312462, "learning_rate": 7.0367236458657e-07, "loss": 0.2646, "step": 22176 }, { "epoch": 0.3854925342001425, "grad_norm": 1.5343375061257631, "learning_rate": 7.036466561929898e-07, "loss": 0.3117, "step": 22177 }, { "epoch": 0.38550991673764534, "grad_norm": 2.2133979968830464, "learning_rate": 7.036209471539428e-07, "loss": 0.2089, "step": 22178 }, { "epoch": 0.38552729927514817, "grad_norm": 1.5334561094130739, "learning_rate": 7.035952374695111e-07, "loss": 0.2377, "step": 22179 }, { "epoch": 0.385544681812651, "grad_norm": 2.7781921423683764, "learning_rate": 7.035695271397761e-07, "loss": 0.3762, "step": 22180 }, { "epoch": 0.38556206435015383, "grad_norm": 1.7799499061888926, "learning_rate": 7.035438161648191e-07, "loss": 0.2891, "step": 22181 }, { "epoch": 0.38557944688765666, "grad_norm": 1.6180330835021846, "learning_rate": 7.035181045447217e-07, "loss": 0.2544, "step": 22182 }, { "epoch": 0.3855968294251595, "grad_norm": 2.070989324413613, "learning_rate": 7.034923922795655e-07, "loss": 0.4308, "step": 22183 }, { "epoch": 0.3856142119626623, "grad_norm": 2.332714205661073, "learning_rate": 7.034666793694317e-07, "loss": 0.3559, "step": 22184 }, { "epoch": 0.38563159450016515, "grad_norm": 1.3080406310373416, "learning_rate": 7.034409658144021e-07, "loss": 0.1769, "step": 22185 }, { "epoch": 0.385648977037668, "grad_norm": 1.936628747857695, "learning_rate": 7.034152516145582e-07, "loss": 0.2466, "step": 22186 }, { "epoch": 0.38566635957517076, "grad_norm": 2.283146249840709, "learning_rate": 7.033895367699813e-07, "loss": 0.3029, "step": 22187 }, { "epoch": 0.3856837421126736, "grad_norm": 0.7754217041454426, "learning_rate": 7.033638212807528e-07, "loss": 0.2896, "step": 22188 }, { "epoch": 0.3857011246501764, "grad_norm": 1.4767091549265838, "learning_rate": 7.033381051469547e-07, "loss": 0.5375, "step": 22189 }, { "epoch": 0.38571850718767925, "grad_norm": 2.392672493166635, "learning_rate": 7.033123883686683e-07, "loss": 0.4314, "step": 22190 }, { "epoch": 0.3857358897251821, "grad_norm": 1.6397777028611424, "learning_rate": 7.032866709459746e-07, "loss": 0.4589, "step": 22191 }, { "epoch": 0.3857532722626849, "grad_norm": 2.143231342252255, "learning_rate": 7.03260952878956e-07, "loss": 0.3071, "step": 22192 }, { "epoch": 0.38577065480018774, "grad_norm": 1.639209598645575, "learning_rate": 7.032352341676932e-07, "loss": 0.3714, "step": 22193 }, { "epoch": 0.38578803733769057, "grad_norm": 2.015544076641261, "learning_rate": 7.032095148122681e-07, "loss": 0.3934, "step": 22194 }, { "epoch": 0.3858054198751934, "grad_norm": 1.269604385027478, "learning_rate": 7.031837948127623e-07, "loss": 0.3161, "step": 22195 }, { "epoch": 0.38582280241269623, "grad_norm": 1.717610618588051, "learning_rate": 7.031580741692573e-07, "loss": 0.2634, "step": 22196 }, { "epoch": 0.385840184950199, "grad_norm": 1.6216555713305707, "learning_rate": 7.031323528818343e-07, "loss": 0.2399, "step": 22197 }, { "epoch": 0.38585756748770184, "grad_norm": 1.9618655644947864, "learning_rate": 7.031066309505751e-07, "loss": 0.3607, "step": 22198 }, { "epoch": 0.38587495002520467, "grad_norm": 1.8199463852018654, "learning_rate": 7.030809083755614e-07, "loss": 0.3673, "step": 22199 }, { "epoch": 0.3858923325627075, "grad_norm": 1.3390509135327366, "learning_rate": 7.030551851568743e-07, "loss": 0.2417, "step": 22200 }, { "epoch": 0.3859097151002103, "grad_norm": 1.4534150638298657, "learning_rate": 7.030294612945955e-07, "loss": 0.3091, "step": 22201 }, { "epoch": 0.38592709763771316, "grad_norm": 1.6866153827937316, "learning_rate": 7.030037367888067e-07, "loss": 0.3403, "step": 22202 }, { "epoch": 0.385944480175216, "grad_norm": 1.7085718674221777, "learning_rate": 7.029780116395891e-07, "loss": 0.2712, "step": 22203 }, { "epoch": 0.3859618627127188, "grad_norm": 2.195317297109144, "learning_rate": 7.029522858470247e-07, "loss": 0.2368, "step": 22204 }, { "epoch": 0.38597924525022165, "grad_norm": 2.896317688672192, "learning_rate": 7.029265594111948e-07, "loss": 0.2913, "step": 22205 }, { "epoch": 0.3859966277877245, "grad_norm": 1.2441130522726926, "learning_rate": 7.029008323321808e-07, "loss": 0.4634, "step": 22206 }, { "epoch": 0.38601401032522725, "grad_norm": 0.9471572125683334, "learning_rate": 7.028751046100642e-07, "loss": 0.3365, "step": 22207 }, { "epoch": 0.3860313928627301, "grad_norm": 1.5360933202971387, "learning_rate": 7.02849376244927e-07, "loss": 0.3984, "step": 22208 }, { "epoch": 0.3860487754002329, "grad_norm": 1.1270490396325483, "learning_rate": 7.028236472368503e-07, "loss": 0.312, "step": 22209 }, { "epoch": 0.38606615793773574, "grad_norm": 2.451911375620888, "learning_rate": 7.027979175859159e-07, "loss": 0.3745, "step": 22210 }, { "epoch": 0.3860835404752386, "grad_norm": 1.5395493042742272, "learning_rate": 7.027721872922052e-07, "loss": 0.2383, "step": 22211 }, { "epoch": 0.3861009230127414, "grad_norm": 1.0413290463890097, "learning_rate": 7.027464563557998e-07, "loss": 0.1838, "step": 22212 }, { "epoch": 0.38611830555024423, "grad_norm": 1.7176971401278107, "learning_rate": 7.027207247767813e-07, "loss": 0.2586, "step": 22213 }, { "epoch": 0.38613568808774706, "grad_norm": 2.220589414757046, "learning_rate": 7.026949925552313e-07, "loss": 0.2713, "step": 22214 }, { "epoch": 0.3861530706252499, "grad_norm": 1.9734032246845494, "learning_rate": 7.026692596912311e-07, "loss": 0.3003, "step": 22215 }, { "epoch": 0.3861704531627527, "grad_norm": 2.5269027794276715, "learning_rate": 7.026435261848625e-07, "loss": 0.4483, "step": 22216 }, { "epoch": 0.3861878357002555, "grad_norm": 2.3655725595352206, "learning_rate": 7.026177920362071e-07, "loss": 0.3515, "step": 22217 }, { "epoch": 0.38620521823775833, "grad_norm": 1.3165617651054211, "learning_rate": 7.025920572453463e-07, "loss": 0.2801, "step": 22218 }, { "epoch": 0.38622260077526116, "grad_norm": 2.263047884084265, "learning_rate": 7.025663218123618e-07, "loss": 0.3728, "step": 22219 }, { "epoch": 0.386239983312764, "grad_norm": 2.458081759517083, "learning_rate": 7.02540585737335e-07, "loss": 0.5403, "step": 22220 }, { "epoch": 0.3862573658502668, "grad_norm": 1.8093548322219342, "learning_rate": 7.025148490203479e-07, "loss": 0.2235, "step": 22221 }, { "epoch": 0.38627474838776965, "grad_norm": 1.5183057990523918, "learning_rate": 7.024891116614814e-07, "loss": 0.3881, "step": 22222 }, { "epoch": 0.3862921309252725, "grad_norm": 1.6418560000143285, "learning_rate": 7.024633736608176e-07, "loss": 0.2394, "step": 22223 }, { "epoch": 0.3863095134627753, "grad_norm": 2.0856149488900937, "learning_rate": 7.024376350184378e-07, "loss": 0.3237, "step": 22224 }, { "epoch": 0.38632689600027814, "grad_norm": 1.8249399931502561, "learning_rate": 7.024118957344238e-07, "loss": 0.2572, "step": 22225 }, { "epoch": 0.38634427853778097, "grad_norm": 1.798594385163105, "learning_rate": 7.02386155808857e-07, "loss": 0.2284, "step": 22226 }, { "epoch": 0.38636166107528375, "grad_norm": 1.7236000121205421, "learning_rate": 7.023604152418191e-07, "loss": 0.2016, "step": 22227 }, { "epoch": 0.3863790436127866, "grad_norm": 2.3418585510317245, "learning_rate": 7.023346740333916e-07, "loss": 0.2869, "step": 22228 }, { "epoch": 0.3863964261502894, "grad_norm": 2.044095762398598, "learning_rate": 7.023089321836562e-07, "loss": 0.2413, "step": 22229 }, { "epoch": 0.38641380868779224, "grad_norm": 3.489072758741299, "learning_rate": 7.022831896926943e-07, "loss": 0.378, "step": 22230 }, { "epoch": 0.38643119122529507, "grad_norm": 1.2834703810000472, "learning_rate": 7.022574465605877e-07, "loss": 0.387, "step": 22231 }, { "epoch": 0.3864485737627979, "grad_norm": 1.9676371683671483, "learning_rate": 7.022317027874179e-07, "loss": 0.2934, "step": 22232 }, { "epoch": 0.38646595630030073, "grad_norm": 1.6133631411097573, "learning_rate": 7.022059583732665e-07, "loss": 0.2058, "step": 22233 }, { "epoch": 0.38648333883780356, "grad_norm": 2.085178326033548, "learning_rate": 7.02180213318215e-07, "loss": 0.3839, "step": 22234 }, { "epoch": 0.3865007213753064, "grad_norm": 1.564046225096482, "learning_rate": 7.021544676223452e-07, "loss": 0.3369, "step": 22235 }, { "epoch": 0.3865181039128092, "grad_norm": 1.943690303288505, "learning_rate": 7.021287212857385e-07, "loss": 0.2574, "step": 22236 }, { "epoch": 0.386535486450312, "grad_norm": 1.1914512199003178, "learning_rate": 7.021029743084768e-07, "loss": 0.3136, "step": 22237 }, { "epoch": 0.3865528689878148, "grad_norm": 1.358157395140309, "learning_rate": 7.020772266906413e-07, "loss": 0.2048, "step": 22238 }, { "epoch": 0.38657025152531765, "grad_norm": 3.1199588558014013, "learning_rate": 7.02051478432314e-07, "loss": 0.3065, "step": 22239 }, { "epoch": 0.3865876340628205, "grad_norm": 2.281963361547768, "learning_rate": 7.020257295335762e-07, "loss": 0.4657, "step": 22240 }, { "epoch": 0.3866050166003233, "grad_norm": 1.3999583304298173, "learning_rate": 7.019999799945097e-07, "loss": 0.3033, "step": 22241 }, { "epoch": 0.38662239913782614, "grad_norm": 1.62598867671488, "learning_rate": 7.01974229815196e-07, "loss": 0.2753, "step": 22242 }, { "epoch": 0.386639781675329, "grad_norm": 1.5539837625344342, "learning_rate": 7.019484789957168e-07, "loss": 0.3309, "step": 22243 }, { "epoch": 0.3866571642128318, "grad_norm": 3.1116528709263753, "learning_rate": 7.019227275361537e-07, "loss": 0.4492, "step": 22244 }, { "epoch": 0.38667454675033464, "grad_norm": 1.7989520481531431, "learning_rate": 7.018969754365882e-07, "loss": 0.1874, "step": 22245 }, { "epoch": 0.38669192928783747, "grad_norm": 1.4641889814769131, "learning_rate": 7.018712226971022e-07, "loss": 0.3146, "step": 22246 }, { "epoch": 0.38670931182534024, "grad_norm": 1.3207159038827023, "learning_rate": 7.01845469317777e-07, "loss": 0.1533, "step": 22247 }, { "epoch": 0.38672669436284307, "grad_norm": 1.567779286668537, "learning_rate": 7.018197152986946e-07, "loss": 0.4014, "step": 22248 }, { "epoch": 0.3867440769003459, "grad_norm": 2.344917146970279, "learning_rate": 7.017939606399363e-07, "loss": 0.2184, "step": 22249 }, { "epoch": 0.38676145943784873, "grad_norm": 2.772999932119293, "learning_rate": 7.017682053415838e-07, "loss": 0.4093, "step": 22250 }, { "epoch": 0.38677884197535156, "grad_norm": 1.3960948393075605, "learning_rate": 7.017424494037186e-07, "loss": 0.2798, "step": 22251 }, { "epoch": 0.3867962245128544, "grad_norm": 1.5079870534352762, "learning_rate": 7.017166928264229e-07, "loss": 0.1549, "step": 22252 }, { "epoch": 0.3868136070503572, "grad_norm": 1.5286081071138657, "learning_rate": 7.016909356097777e-07, "loss": 0.3767, "step": 22253 }, { "epoch": 0.38683098958786005, "grad_norm": 1.8453054064073149, "learning_rate": 7.016651777538649e-07, "loss": 0.3133, "step": 22254 }, { "epoch": 0.3868483721253629, "grad_norm": 2.127440562982153, "learning_rate": 7.016394192587661e-07, "loss": 0.3088, "step": 22255 }, { "epoch": 0.38686575466286566, "grad_norm": 2.3538954458587877, "learning_rate": 7.016136601245632e-07, "loss": 0.2404, "step": 22256 }, { "epoch": 0.3868831372003685, "grad_norm": 1.729648210438686, "learning_rate": 7.015879003513373e-07, "loss": 0.3995, "step": 22257 }, { "epoch": 0.3869005197378713, "grad_norm": 1.4361480884854516, "learning_rate": 7.015621399391705e-07, "loss": 0.2618, "step": 22258 }, { "epoch": 0.38691790227537415, "grad_norm": 1.2258637051195351, "learning_rate": 7.015363788881443e-07, "loss": 0.2113, "step": 22259 }, { "epoch": 0.386935284812877, "grad_norm": 1.3003792802373602, "learning_rate": 7.015106171983403e-07, "loss": 0.2905, "step": 22260 }, { "epoch": 0.3869526673503798, "grad_norm": 1.617672032623062, "learning_rate": 7.014848548698402e-07, "loss": 0.1661, "step": 22261 }, { "epoch": 0.38697004988788264, "grad_norm": 1.186359006535634, "learning_rate": 7.014590919027259e-07, "loss": 0.2032, "step": 22262 }, { "epoch": 0.38698743242538547, "grad_norm": 1.22877167994484, "learning_rate": 7.014333282970785e-07, "loss": 0.3017, "step": 22263 }, { "epoch": 0.3870048149628883, "grad_norm": 1.6195525615884934, "learning_rate": 7.014075640529802e-07, "loss": 0.2161, "step": 22264 }, { "epoch": 0.38702219750039113, "grad_norm": 2.2114563580106195, "learning_rate": 7.013817991705124e-07, "loss": 0.4372, "step": 22265 }, { "epoch": 0.3870395800378939, "grad_norm": 2.1911313091791245, "learning_rate": 7.013560336497568e-07, "loss": 0.3339, "step": 22266 }, { "epoch": 0.38705696257539673, "grad_norm": 0.9821164915438393, "learning_rate": 7.013302674907951e-07, "loss": 0.1828, "step": 22267 }, { "epoch": 0.38707434511289956, "grad_norm": 3.730951626371675, "learning_rate": 7.013045006937088e-07, "loss": 0.3526, "step": 22268 }, { "epoch": 0.3870917276504024, "grad_norm": 1.5009269026146235, "learning_rate": 7.012787332585798e-07, "loss": 0.316, "step": 22269 }, { "epoch": 0.3871091101879052, "grad_norm": 1.3326315544076808, "learning_rate": 7.012529651854898e-07, "loss": 0.2654, "step": 22270 }, { "epoch": 0.38712649272540806, "grad_norm": 2.073529341695658, "learning_rate": 7.012271964745201e-07, "loss": 0.3221, "step": 22271 }, { "epoch": 0.3871438752629109, "grad_norm": 1.5536885212693854, "learning_rate": 7.012014271257527e-07, "loss": 0.2261, "step": 22272 }, { "epoch": 0.3871612578004137, "grad_norm": 1.247971444886906, "learning_rate": 7.011756571392692e-07, "loss": 0.2417, "step": 22273 }, { "epoch": 0.38717864033791655, "grad_norm": 2.0596823197233367, "learning_rate": 7.011498865151513e-07, "loss": 0.3441, "step": 22274 }, { "epoch": 0.3871960228754194, "grad_norm": 1.3985891204658931, "learning_rate": 7.011241152534807e-07, "loss": 0.4193, "step": 22275 }, { "epoch": 0.38721340541292215, "grad_norm": 1.5568637733850188, "learning_rate": 7.010983433543389e-07, "loss": 0.2993, "step": 22276 }, { "epoch": 0.387230787950425, "grad_norm": 1.5267989481479578, "learning_rate": 7.01072570817808e-07, "loss": 0.2279, "step": 22277 }, { "epoch": 0.3872481704879278, "grad_norm": 1.6681290887236795, "learning_rate": 7.010467976439692e-07, "loss": 0.2916, "step": 22278 }, { "epoch": 0.38726555302543064, "grad_norm": 1.8637361429792851, "learning_rate": 7.010210238329045e-07, "loss": 0.2418, "step": 22279 }, { "epoch": 0.3872829355629335, "grad_norm": 2.4040117003770387, "learning_rate": 7.009952493846953e-07, "loss": 0.538, "step": 22280 }, { "epoch": 0.3873003181004363, "grad_norm": 2.2842256204810014, "learning_rate": 7.009694742994239e-07, "loss": 0.365, "step": 22281 }, { "epoch": 0.38731770063793913, "grad_norm": 1.601375065122134, "learning_rate": 7.009436985771712e-07, "loss": 0.2972, "step": 22282 }, { "epoch": 0.38733508317544196, "grad_norm": 1.582156413073508, "learning_rate": 7.009179222180193e-07, "loss": 0.3709, "step": 22283 }, { "epoch": 0.3873524657129448, "grad_norm": 1.6227596390758883, "learning_rate": 7.008921452220501e-07, "loss": 0.2315, "step": 22284 }, { "epoch": 0.3873698482504476, "grad_norm": 1.4445479035338697, "learning_rate": 7.00866367589345e-07, "loss": 0.2905, "step": 22285 }, { "epoch": 0.3873872307879504, "grad_norm": 1.7374592934543696, "learning_rate": 7.008405893199856e-07, "loss": 0.3945, "step": 22286 }, { "epoch": 0.38740461332545323, "grad_norm": 1.7423085860151537, "learning_rate": 7.008148104140542e-07, "loss": 0.2138, "step": 22287 }, { "epoch": 0.38742199586295606, "grad_norm": 1.461866550239813, "learning_rate": 7.007890308716318e-07, "loss": 0.2399, "step": 22288 }, { "epoch": 0.3874393784004589, "grad_norm": 3.1825853460237186, "learning_rate": 7.007632506928004e-07, "loss": 0.317, "step": 22289 }, { "epoch": 0.3874567609379617, "grad_norm": 2.934972715355306, "learning_rate": 7.007374698776418e-07, "loss": 0.3396, "step": 22290 }, { "epoch": 0.38747414347546455, "grad_norm": 1.794006351759341, "learning_rate": 7.007116884262376e-07, "loss": 0.3435, "step": 22291 }, { "epoch": 0.3874915260129674, "grad_norm": 1.6346339688785774, "learning_rate": 7.006859063386695e-07, "loss": 0.2292, "step": 22292 }, { "epoch": 0.3875089085504702, "grad_norm": 2.0187958202016034, "learning_rate": 7.006601236150194e-07, "loss": 0.3179, "step": 22293 }, { "epoch": 0.38752629108797304, "grad_norm": 6.107871173729468, "learning_rate": 7.006343402553689e-07, "loss": 0.3675, "step": 22294 }, { "epoch": 0.38754367362547587, "grad_norm": 2.0166941848701274, "learning_rate": 7.006085562597995e-07, "loss": 0.3961, "step": 22295 }, { "epoch": 0.38756105616297865, "grad_norm": 3.6571214543255337, "learning_rate": 7.005827716283932e-07, "loss": 0.4603, "step": 22296 }, { "epoch": 0.3875784387004815, "grad_norm": 1.5087172833465616, "learning_rate": 7.005569863612318e-07, "loss": 0.2512, "step": 22297 }, { "epoch": 0.3875958212379843, "grad_norm": 1.624530156296245, "learning_rate": 7.005312004583968e-07, "loss": 0.3241, "step": 22298 }, { "epoch": 0.38761320377548714, "grad_norm": 1.4675978544173094, "learning_rate": 7.005054139199698e-07, "loss": 0.2246, "step": 22299 }, { "epoch": 0.38763058631298997, "grad_norm": 1.0255360843190675, "learning_rate": 7.00479626746033e-07, "loss": 0.2993, "step": 22300 }, { "epoch": 0.3876479688504928, "grad_norm": 1.700927159281281, "learning_rate": 7.004538389366677e-07, "loss": 0.3753, "step": 22301 }, { "epoch": 0.3876653513879956, "grad_norm": 1.7025360798952847, "learning_rate": 7.00428050491956e-07, "loss": 0.3166, "step": 22302 }, { "epoch": 0.38768273392549846, "grad_norm": 1.4484834831315323, "learning_rate": 7.004022614119793e-07, "loss": 0.4423, "step": 22303 }, { "epoch": 0.3877001164630013, "grad_norm": 1.4198781070127051, "learning_rate": 7.003764716968197e-07, "loss": 0.4238, "step": 22304 }, { "epoch": 0.3877174990005041, "grad_norm": 2.195083290872691, "learning_rate": 7.003506813465585e-07, "loss": 0.3742, "step": 22305 }, { "epoch": 0.3877348815380069, "grad_norm": 1.6989506123512534, "learning_rate": 7.003248903612778e-07, "loss": 0.2819, "step": 22306 }, { "epoch": 0.3877522640755097, "grad_norm": 1.3679587894929561, "learning_rate": 7.002990987410591e-07, "loss": 0.2783, "step": 22307 }, { "epoch": 0.38776964661301255, "grad_norm": 1.0709556258944233, "learning_rate": 7.002733064859843e-07, "loss": 0.2291, "step": 22308 }, { "epoch": 0.3877870291505154, "grad_norm": 1.3333842764906254, "learning_rate": 7.002475135961352e-07, "loss": 0.2769, "step": 22309 }, { "epoch": 0.3878044116880182, "grad_norm": 1.633657511424073, "learning_rate": 7.002217200715936e-07, "loss": 0.3616, "step": 22310 }, { "epoch": 0.38782179422552104, "grad_norm": 1.362966494520835, "learning_rate": 7.001959259124409e-07, "loss": 0.2289, "step": 22311 }, { "epoch": 0.3878391767630239, "grad_norm": 1.2148129224470356, "learning_rate": 7.001701311187592e-07, "loss": 0.3917, "step": 22312 }, { "epoch": 0.3878565593005267, "grad_norm": 1.5836912702975359, "learning_rate": 7.001443356906299e-07, "loss": 0.2094, "step": 22313 }, { "epoch": 0.38787394183802953, "grad_norm": 1.752576306358721, "learning_rate": 7.001185396281352e-07, "loss": 0.33, "step": 22314 }, { "epoch": 0.38789132437553236, "grad_norm": 1.583175112045496, "learning_rate": 7.000927429313566e-07, "loss": 0.2726, "step": 22315 }, { "epoch": 0.38790870691303514, "grad_norm": 1.4884920883530115, "learning_rate": 7.000669456003759e-07, "loss": 0.2904, "step": 22316 }, { "epoch": 0.38792608945053797, "grad_norm": 1.063072689557763, "learning_rate": 7.000411476352749e-07, "loss": 0.2109, "step": 22317 }, { "epoch": 0.3879434719880408, "grad_norm": 2.149149251789564, "learning_rate": 7.000153490361353e-07, "loss": 0.3823, "step": 22318 }, { "epoch": 0.38796085452554363, "grad_norm": 2.1388543616508775, "learning_rate": 6.999895498030391e-07, "loss": 0.2902, "step": 22319 }, { "epoch": 0.38797823706304646, "grad_norm": 1.4295727597058427, "learning_rate": 6.999637499360677e-07, "loss": 0.2278, "step": 22320 }, { "epoch": 0.3879956196005493, "grad_norm": 2.0518179267269048, "learning_rate": 6.999379494353031e-07, "loss": 0.2357, "step": 22321 }, { "epoch": 0.3880130021380521, "grad_norm": 1.283762230773102, "learning_rate": 6.999121483008273e-07, "loss": 0.3635, "step": 22322 }, { "epoch": 0.38803038467555495, "grad_norm": 1.9090208393938077, "learning_rate": 6.998863465327216e-07, "loss": 0.2963, "step": 22323 }, { "epoch": 0.3880477672130578, "grad_norm": 2.0284167778013735, "learning_rate": 6.99860544131068e-07, "loss": 0.6628, "step": 22324 }, { "epoch": 0.3880651497505606, "grad_norm": 3.0448964921376866, "learning_rate": 6.998347410959483e-07, "loss": 0.3219, "step": 22325 }, { "epoch": 0.3880825322880634, "grad_norm": 1.7604793126359954, "learning_rate": 6.998089374274442e-07, "loss": 0.4699, "step": 22326 }, { "epoch": 0.3880999148255662, "grad_norm": 1.3572909426515427, "learning_rate": 6.997831331256376e-07, "loss": 0.2081, "step": 22327 }, { "epoch": 0.38811729736306905, "grad_norm": 1.581674241970135, "learning_rate": 6.997573281906104e-07, "loss": 0.3679, "step": 22328 }, { "epoch": 0.3881346799005719, "grad_norm": 3.300953637189705, "learning_rate": 6.99731522622444e-07, "loss": 0.4179, "step": 22329 }, { "epoch": 0.3881520624380747, "grad_norm": 1.1230340130641747, "learning_rate": 6.997057164212205e-07, "loss": 0.2331, "step": 22330 }, { "epoch": 0.38816944497557754, "grad_norm": 2.181013769808501, "learning_rate": 6.996799095870218e-07, "loss": 0.4853, "step": 22331 }, { "epoch": 0.38818682751308037, "grad_norm": 2.2419984086107387, "learning_rate": 6.996541021199293e-07, "loss": 0.5051, "step": 22332 }, { "epoch": 0.3882042100505832, "grad_norm": 3.470092156199089, "learning_rate": 6.996282940200251e-07, "loss": 0.3349, "step": 22333 }, { "epoch": 0.38822159258808603, "grad_norm": 1.2952540711903644, "learning_rate": 6.996024852873908e-07, "loss": 0.3183, "step": 22334 }, { "epoch": 0.38823897512558886, "grad_norm": 2.104007864416731, "learning_rate": 6.995766759221085e-07, "loss": 0.2884, "step": 22335 }, { "epoch": 0.38825635766309163, "grad_norm": 1.435138705341726, "learning_rate": 6.995508659242595e-07, "loss": 0.1663, "step": 22336 }, { "epoch": 0.38827374020059446, "grad_norm": 2.35907494228655, "learning_rate": 6.995250552939262e-07, "loss": 0.4925, "step": 22337 }, { "epoch": 0.3882911227380973, "grad_norm": 1.756432917981135, "learning_rate": 6.994992440311901e-07, "loss": 0.275, "step": 22338 }, { "epoch": 0.3883085052756001, "grad_norm": 2.2346166839079644, "learning_rate": 6.994734321361329e-07, "loss": 0.2498, "step": 22339 }, { "epoch": 0.38832588781310295, "grad_norm": 1.282704009443977, "learning_rate": 6.994476196088366e-07, "loss": 0.1903, "step": 22340 }, { "epoch": 0.3883432703506058, "grad_norm": 2.565910907456849, "learning_rate": 6.994218064493831e-07, "loss": 0.4128, "step": 22341 }, { "epoch": 0.3883606528881086, "grad_norm": 2.3023692478387217, "learning_rate": 6.99395992657854e-07, "loss": 0.4867, "step": 22342 }, { "epoch": 0.38837803542561145, "grad_norm": 3.125220545457204, "learning_rate": 6.993701782343311e-07, "loss": 0.416, "step": 22343 }, { "epoch": 0.3883954179631143, "grad_norm": 2.853561136110794, "learning_rate": 6.993443631788964e-07, "loss": 0.2682, "step": 22344 }, { "epoch": 0.3884128005006171, "grad_norm": 2.4324519853829587, "learning_rate": 6.993185474916316e-07, "loss": 0.3586, "step": 22345 }, { "epoch": 0.3884301830381199, "grad_norm": 1.3060277270052043, "learning_rate": 6.992927311726187e-07, "loss": 0.2571, "step": 22346 }, { "epoch": 0.3884475655756227, "grad_norm": 1.9765531155508278, "learning_rate": 6.992669142219394e-07, "loss": 0.293, "step": 22347 }, { "epoch": 0.38846494811312554, "grad_norm": 1.3233174141068105, "learning_rate": 6.992410966396754e-07, "loss": 0.2122, "step": 22348 }, { "epoch": 0.38848233065062837, "grad_norm": 1.0944443012167475, "learning_rate": 6.992152784259086e-07, "loss": 0.1954, "step": 22349 }, { "epoch": 0.3884997131881312, "grad_norm": 1.0771546526473375, "learning_rate": 6.991894595807209e-07, "loss": 0.265, "step": 22350 }, { "epoch": 0.38851709572563403, "grad_norm": 1.5222965554201315, "learning_rate": 6.991636401041943e-07, "loss": 0.3471, "step": 22351 }, { "epoch": 0.38853447826313686, "grad_norm": 1.6749244384118571, "learning_rate": 6.9913781999641e-07, "loss": 0.2208, "step": 22352 }, { "epoch": 0.3885518608006397, "grad_norm": 2.7397737802102604, "learning_rate": 6.991119992574508e-07, "loss": 0.294, "step": 22353 }, { "epoch": 0.3885692433381425, "grad_norm": 1.8212980669242422, "learning_rate": 6.990861778873977e-07, "loss": 0.2445, "step": 22354 }, { "epoch": 0.38858662587564535, "grad_norm": 1.4106105554135226, "learning_rate": 6.99060355886333e-07, "loss": 0.3331, "step": 22355 }, { "epoch": 0.3886040084131481, "grad_norm": 1.3073906156465906, "learning_rate": 6.990345332543384e-07, "loss": 0.2602, "step": 22356 }, { "epoch": 0.38862139095065096, "grad_norm": 1.400871236541735, "learning_rate": 6.990087099914957e-07, "loss": 0.2489, "step": 22357 }, { "epoch": 0.3886387734881538, "grad_norm": 1.6250159793500187, "learning_rate": 6.989828860978869e-07, "loss": 0.2378, "step": 22358 }, { "epoch": 0.3886561560256566, "grad_norm": 1.3135609448737906, "learning_rate": 6.989570615735938e-07, "loss": 0.3639, "step": 22359 }, { "epoch": 0.38867353856315945, "grad_norm": 1.4895975642203574, "learning_rate": 6.989312364186981e-07, "loss": 0.2208, "step": 22360 }, { "epoch": 0.3886909211006623, "grad_norm": 3.071644632608494, "learning_rate": 6.989054106332817e-07, "loss": 0.3593, "step": 22361 }, { "epoch": 0.3887083036381651, "grad_norm": 1.627406856209761, "learning_rate": 6.988795842174266e-07, "loss": 0.2986, "step": 22362 }, { "epoch": 0.38872568617566794, "grad_norm": 1.8686329140390856, "learning_rate": 6.988537571712146e-07, "loss": 0.325, "step": 22363 }, { "epoch": 0.38874306871317077, "grad_norm": 3.0104673854781914, "learning_rate": 6.988279294947276e-07, "loss": 0.3966, "step": 22364 }, { "epoch": 0.3887604512506736, "grad_norm": 7.6820365149735865, "learning_rate": 6.988021011880471e-07, "loss": 0.9604, "step": 22365 }, { "epoch": 0.3887778337881764, "grad_norm": 1.6718940756574778, "learning_rate": 6.987762722512557e-07, "loss": 0.2231, "step": 22366 }, { "epoch": 0.3887952163256792, "grad_norm": 2.1027968708151756, "learning_rate": 6.987504426844345e-07, "loss": 0.3592, "step": 22367 }, { "epoch": 0.38881259886318204, "grad_norm": 0.8778384172937205, "learning_rate": 6.987246124876658e-07, "loss": 0.225, "step": 22368 }, { "epoch": 0.38882998140068487, "grad_norm": 1.126171901647531, "learning_rate": 6.986987816610315e-07, "loss": 0.1552, "step": 22369 }, { "epoch": 0.3888473639381877, "grad_norm": 1.9019326150807145, "learning_rate": 6.986729502046131e-07, "loss": 0.2574, "step": 22370 }, { "epoch": 0.3888647464756905, "grad_norm": 1.3222247910806133, "learning_rate": 6.986471181184927e-07, "loss": 0.3008, "step": 22371 }, { "epoch": 0.38888212901319336, "grad_norm": 1.5753440287805316, "learning_rate": 6.986212854027524e-07, "loss": 0.3701, "step": 22372 }, { "epoch": 0.3888995115506962, "grad_norm": 1.5175736121350278, "learning_rate": 6.985954520574737e-07, "loss": 0.2973, "step": 22373 }, { "epoch": 0.388916894088199, "grad_norm": 3.3512575512233695, "learning_rate": 6.985696180827388e-07, "loss": 0.447, "step": 22374 }, { "epoch": 0.38893427662570185, "grad_norm": 2.1370346736134898, "learning_rate": 6.985437834786293e-07, "loss": 0.2359, "step": 22375 }, { "epoch": 0.3889516591632046, "grad_norm": 1.1787704234226883, "learning_rate": 6.985179482452273e-07, "loss": 0.2463, "step": 22376 }, { "epoch": 0.38896904170070745, "grad_norm": 1.953018836041331, "learning_rate": 6.984921123826146e-07, "loss": 0.3343, "step": 22377 }, { "epoch": 0.3889864242382103, "grad_norm": 1.9923373265576416, "learning_rate": 6.98466275890873e-07, "loss": 0.4287, "step": 22378 }, { "epoch": 0.3890038067757131, "grad_norm": 1.6300507318532038, "learning_rate": 6.984404387700845e-07, "loss": 0.3568, "step": 22379 }, { "epoch": 0.38902118931321594, "grad_norm": 1.481930807839226, "learning_rate": 6.984146010203309e-07, "loss": 0.404, "step": 22380 }, { "epoch": 0.3890385718507188, "grad_norm": 3.3965744535504854, "learning_rate": 6.983887626416942e-07, "loss": 0.1962, "step": 22381 }, { "epoch": 0.3890559543882216, "grad_norm": 1.5202826564732939, "learning_rate": 6.983629236342563e-07, "loss": 0.3099, "step": 22382 }, { "epoch": 0.38907333692572443, "grad_norm": 1.441980572355698, "learning_rate": 6.983370839980989e-07, "loss": 0.2783, "step": 22383 }, { "epoch": 0.38909071946322726, "grad_norm": 2.6686744485513323, "learning_rate": 6.983112437333043e-07, "loss": 0.3703, "step": 22384 }, { "epoch": 0.3891081020007301, "grad_norm": 1.2546019293836803, "learning_rate": 6.982854028399541e-07, "loss": 0.3212, "step": 22385 }, { "epoch": 0.38912548453823287, "grad_norm": 1.4279474848787752, "learning_rate": 6.9825956131813e-07, "loss": 0.453, "step": 22386 }, { "epoch": 0.3891428670757357, "grad_norm": 1.7629457093575371, "learning_rate": 6.982337191679144e-07, "loss": 0.222, "step": 22387 }, { "epoch": 0.38916024961323853, "grad_norm": 1.8557587845335741, "learning_rate": 6.982078763893889e-07, "loss": 0.1564, "step": 22388 }, { "epoch": 0.38917763215074136, "grad_norm": 1.239937026889628, "learning_rate": 6.981820329826356e-07, "loss": 0.3192, "step": 22389 }, { "epoch": 0.3891950146882442, "grad_norm": 1.9005066769387573, "learning_rate": 6.981561889477361e-07, "loss": 0.4584, "step": 22390 }, { "epoch": 0.389212397225747, "grad_norm": 2.820847391317294, "learning_rate": 6.981303442847728e-07, "loss": 0.4882, "step": 22391 }, { "epoch": 0.38922977976324985, "grad_norm": 1.64624227349185, "learning_rate": 6.98104498993827e-07, "loss": 0.2329, "step": 22392 }, { "epoch": 0.3892471623007527, "grad_norm": 4.446610144017836, "learning_rate": 6.980786530749812e-07, "loss": 0.3412, "step": 22393 }, { "epoch": 0.3892645448382555, "grad_norm": 1.335134690956563, "learning_rate": 6.980528065283166e-07, "loss": 0.2363, "step": 22394 }, { "epoch": 0.38928192737575834, "grad_norm": 1.587178759221344, "learning_rate": 6.980269593539162e-07, "loss": 0.3896, "step": 22395 }, { "epoch": 0.3892993099132611, "grad_norm": 3.2455122147711335, "learning_rate": 6.98001111551861e-07, "loss": 0.329, "step": 22396 }, { "epoch": 0.38931669245076395, "grad_norm": 2.003425110537578, "learning_rate": 6.979752631222331e-07, "loss": 0.3334, "step": 22397 }, { "epoch": 0.3893340749882668, "grad_norm": 3.928620221828414, "learning_rate": 6.979494140651147e-07, "loss": 0.5766, "step": 22398 }, { "epoch": 0.3893514575257696, "grad_norm": 1.7283509202781557, "learning_rate": 6.979235643805876e-07, "loss": 0.2339, "step": 22399 }, { "epoch": 0.38936884006327244, "grad_norm": 2.367020499483926, "learning_rate": 6.978977140687336e-07, "loss": 0.4359, "step": 22400 }, { "epoch": 0.38938622260077527, "grad_norm": 4.308748213901456, "learning_rate": 6.978718631296351e-07, "loss": 0.3186, "step": 22401 }, { "epoch": 0.3894036051382781, "grad_norm": 1.9032222352084984, "learning_rate": 6.978460115633734e-07, "loss": 0.3609, "step": 22402 }, { "epoch": 0.3894209876757809, "grad_norm": 0.9637120792685602, "learning_rate": 6.978201593700308e-07, "loss": 0.2998, "step": 22403 }, { "epoch": 0.38943837021328376, "grad_norm": 1.1213398396211633, "learning_rate": 6.977943065496891e-07, "loss": 0.3725, "step": 22404 }, { "epoch": 0.38945575275078653, "grad_norm": 1.3844065598890964, "learning_rate": 6.977684531024304e-07, "loss": 0.3671, "step": 22405 }, { "epoch": 0.38947313528828936, "grad_norm": 2.0479846762532414, "learning_rate": 6.977425990283364e-07, "loss": 0.2646, "step": 22406 }, { "epoch": 0.3894905178257922, "grad_norm": 2.220237323711858, "learning_rate": 6.977167443274894e-07, "loss": 0.4097, "step": 22407 }, { "epoch": 0.389507900363295, "grad_norm": 2.0697573190030636, "learning_rate": 6.976908889999711e-07, "loss": 0.2675, "step": 22408 }, { "epoch": 0.38952528290079785, "grad_norm": 1.2205902482356867, "learning_rate": 6.976650330458633e-07, "loss": 0.3897, "step": 22409 }, { "epoch": 0.3895426654383007, "grad_norm": 1.8272105845550117, "learning_rate": 6.976391764652484e-07, "loss": 0.6651, "step": 22410 }, { "epoch": 0.3895600479758035, "grad_norm": 1.3305109272517617, "learning_rate": 6.97613319258208e-07, "loss": 0.1911, "step": 22411 }, { "epoch": 0.38957743051330634, "grad_norm": 1.556516955600292, "learning_rate": 6.97587461424824e-07, "loss": 0.2066, "step": 22412 }, { "epoch": 0.3895948130508092, "grad_norm": 2.1614025423677323, "learning_rate": 6.975616029651787e-07, "loss": 0.4658, "step": 22413 }, { "epoch": 0.389612195588312, "grad_norm": 0.9181311366979679, "learning_rate": 6.975357438793539e-07, "loss": 0.3715, "step": 22414 }, { "epoch": 0.3896295781258148, "grad_norm": 2.3179511749959905, "learning_rate": 6.975098841674314e-07, "loss": 0.3262, "step": 22415 }, { "epoch": 0.3896469606633176, "grad_norm": 1.5624045000504903, "learning_rate": 6.974840238294932e-07, "loss": 0.3416, "step": 22416 }, { "epoch": 0.38966434320082044, "grad_norm": 1.3977856424440456, "learning_rate": 6.974581628656217e-07, "loss": 0.3668, "step": 22417 }, { "epoch": 0.38968172573832327, "grad_norm": 1.5841678919768318, "learning_rate": 6.974323012758983e-07, "loss": 0.3337, "step": 22418 }, { "epoch": 0.3896991082758261, "grad_norm": 1.9285418598469524, "learning_rate": 6.974064390604051e-07, "loss": 0.1925, "step": 22419 }, { "epoch": 0.38971649081332893, "grad_norm": 1.320414464241658, "learning_rate": 6.973805762192244e-07, "loss": 0.1985, "step": 22420 }, { "epoch": 0.38973387335083176, "grad_norm": 2.7188884158969935, "learning_rate": 6.973547127524378e-07, "loss": 0.327, "step": 22421 }, { "epoch": 0.3897512558883346, "grad_norm": 1.3022589612103121, "learning_rate": 6.973288486601274e-07, "loss": 0.2756, "step": 22422 }, { "epoch": 0.3897686384258374, "grad_norm": 1.7027169428462752, "learning_rate": 6.973029839423754e-07, "loss": 0.3617, "step": 22423 }, { "epoch": 0.38978602096334025, "grad_norm": 1.5458568504831849, "learning_rate": 6.972771185992634e-07, "loss": 0.2602, "step": 22424 }, { "epoch": 0.389803403500843, "grad_norm": 1.0056341536695412, "learning_rate": 6.972512526308736e-07, "loss": 0.5374, "step": 22425 }, { "epoch": 0.38982078603834586, "grad_norm": 2.9478404953693915, "learning_rate": 6.97225386037288e-07, "loss": 0.4054, "step": 22426 }, { "epoch": 0.3898381685758487, "grad_norm": 1.5309497271725234, "learning_rate": 6.971995188185884e-07, "loss": 0.1845, "step": 22427 }, { "epoch": 0.3898555511133515, "grad_norm": 1.5313839324127674, "learning_rate": 6.971736509748569e-07, "loss": 0.2081, "step": 22428 }, { "epoch": 0.38987293365085435, "grad_norm": 4.077240911972166, "learning_rate": 6.971477825061755e-07, "loss": 0.2773, "step": 22429 }, { "epoch": 0.3898903161883572, "grad_norm": 1.8023254193138092, "learning_rate": 6.971219134126264e-07, "loss": 0.1873, "step": 22430 }, { "epoch": 0.38990769872586, "grad_norm": 1.4025306923399878, "learning_rate": 6.970960436942912e-07, "loss": 0.392, "step": 22431 }, { "epoch": 0.38992508126336284, "grad_norm": 2.3200030862326044, "learning_rate": 6.970701733512521e-07, "loss": 0.4291, "step": 22432 }, { "epoch": 0.38994246380086567, "grad_norm": 2.351558974811057, "learning_rate": 6.970443023835911e-07, "loss": 0.5356, "step": 22433 }, { "epoch": 0.3899598463383685, "grad_norm": 1.4609747271877855, "learning_rate": 6.970184307913902e-07, "loss": 0.216, "step": 22434 }, { "epoch": 0.3899772288758713, "grad_norm": 1.7324245584087863, "learning_rate": 6.969925585747313e-07, "loss": 0.3974, "step": 22435 }, { "epoch": 0.3899946114133741, "grad_norm": 1.8526203129430043, "learning_rate": 6.969666857336967e-07, "loss": 0.404, "step": 22436 }, { "epoch": 0.39001199395087693, "grad_norm": 1.515126118031149, "learning_rate": 6.969408122683679e-07, "loss": 0.4314, "step": 22437 }, { "epoch": 0.39002937648837976, "grad_norm": 1.867711901116628, "learning_rate": 6.969149381788273e-07, "loss": 0.3566, "step": 22438 }, { "epoch": 0.3900467590258826, "grad_norm": 1.8502876654520035, "learning_rate": 6.968890634651569e-07, "loss": 0.2186, "step": 22439 }, { "epoch": 0.3900641415633854, "grad_norm": 3.411748837816952, "learning_rate": 6.968631881274385e-07, "loss": 0.2736, "step": 22440 }, { "epoch": 0.39008152410088826, "grad_norm": 1.210980395697349, "learning_rate": 6.968373121657542e-07, "loss": 0.5048, "step": 22441 }, { "epoch": 0.3900989066383911, "grad_norm": 1.3721924243533992, "learning_rate": 6.96811435580186e-07, "loss": 0.4794, "step": 22442 }, { "epoch": 0.3901162891758939, "grad_norm": 2.4629211238954887, "learning_rate": 6.967855583708161e-07, "loss": 0.313, "step": 22443 }, { "epoch": 0.39013367171339675, "grad_norm": 1.800052442137092, "learning_rate": 6.967596805377264e-07, "loss": 0.248, "step": 22444 }, { "epoch": 0.3901510542508995, "grad_norm": 1.29271777080908, "learning_rate": 6.967338020809987e-07, "loss": 0.1645, "step": 22445 }, { "epoch": 0.39016843678840235, "grad_norm": 1.6636018087791127, "learning_rate": 6.967079230007153e-07, "loss": 0.3458, "step": 22446 }, { "epoch": 0.3901858193259052, "grad_norm": 1.143847563070172, "learning_rate": 6.966820432969582e-07, "loss": 0.2308, "step": 22447 }, { "epoch": 0.390203201863408, "grad_norm": 2.5912126725165052, "learning_rate": 6.966561629698095e-07, "loss": 0.2394, "step": 22448 }, { "epoch": 0.39022058440091084, "grad_norm": 2.4468695233578086, "learning_rate": 6.96630282019351e-07, "loss": 0.3158, "step": 22449 }, { "epoch": 0.39023796693841367, "grad_norm": 2.091001992286618, "learning_rate": 6.966044004456646e-07, "loss": 0.3878, "step": 22450 }, { "epoch": 0.3902553494759165, "grad_norm": 3.5448394889799872, "learning_rate": 6.965785182488328e-07, "loss": 0.321, "step": 22451 }, { "epoch": 0.39027273201341933, "grad_norm": 1.1885467443715934, "learning_rate": 6.965526354289372e-07, "loss": 0.1888, "step": 22452 }, { "epoch": 0.39029011455092216, "grad_norm": 1.997879444781803, "learning_rate": 6.965267519860604e-07, "loss": 0.6728, "step": 22453 }, { "epoch": 0.390307497088425, "grad_norm": 1.2339346638133522, "learning_rate": 6.965008679202837e-07, "loss": 0.4236, "step": 22454 }, { "epoch": 0.39032487962592777, "grad_norm": 2.7241381431434095, "learning_rate": 6.964749832316899e-07, "loss": 0.3116, "step": 22455 }, { "epoch": 0.3903422621634306, "grad_norm": 3.707669244292522, "learning_rate": 6.964490979203602e-07, "loss": 0.3594, "step": 22456 }, { "epoch": 0.39035964470093343, "grad_norm": 1.8422755972276152, "learning_rate": 6.964232119863773e-07, "loss": 0.2226, "step": 22457 }, { "epoch": 0.39037702723843626, "grad_norm": 3.393646968073697, "learning_rate": 6.963973254298232e-07, "loss": 0.4878, "step": 22458 }, { "epoch": 0.3903944097759391, "grad_norm": 3.5074048144164287, "learning_rate": 6.963714382507797e-07, "loss": 0.3023, "step": 22459 }, { "epoch": 0.3904117923134419, "grad_norm": 1.960559653376063, "learning_rate": 6.963455504493287e-07, "loss": 0.4076, "step": 22460 }, { "epoch": 0.39042917485094475, "grad_norm": 2.1795518595986394, "learning_rate": 6.963196620255529e-07, "loss": 0.3501, "step": 22461 }, { "epoch": 0.3904465573884476, "grad_norm": 1.229888665486972, "learning_rate": 6.962937729795338e-07, "loss": 0.3033, "step": 22462 }, { "epoch": 0.3904639399259504, "grad_norm": 1.5015759588585833, "learning_rate": 6.962678833113535e-07, "loss": 0.3977, "step": 22463 }, { "epoch": 0.39048132246345324, "grad_norm": 1.649676208503469, "learning_rate": 6.962419930210944e-07, "loss": 0.2746, "step": 22464 }, { "epoch": 0.390498705000956, "grad_norm": 1.8665169883908836, "learning_rate": 6.962161021088381e-07, "loss": 0.2177, "step": 22465 }, { "epoch": 0.39051608753845884, "grad_norm": 1.8756029567335526, "learning_rate": 6.961902105746669e-07, "loss": 0.3092, "step": 22466 }, { "epoch": 0.3905334700759617, "grad_norm": 1.2181718468084617, "learning_rate": 6.961643184186629e-07, "loss": 0.2059, "step": 22467 }, { "epoch": 0.3905508526134645, "grad_norm": 2.2666717223142303, "learning_rate": 6.961384256409082e-07, "loss": 0.3693, "step": 22468 }, { "epoch": 0.39056823515096734, "grad_norm": 1.5815428221261545, "learning_rate": 6.961125322414847e-07, "loss": 0.1936, "step": 22469 }, { "epoch": 0.39058561768847017, "grad_norm": 1.562945477939867, "learning_rate": 6.960866382204746e-07, "loss": 0.43, "step": 22470 }, { "epoch": 0.390603000225973, "grad_norm": 4.951057966617933, "learning_rate": 6.9606074357796e-07, "loss": 0.2762, "step": 22471 }, { "epoch": 0.3906203827634758, "grad_norm": 3.597099012444742, "learning_rate": 6.960348483140226e-07, "loss": 0.4564, "step": 22472 }, { "epoch": 0.39063776530097866, "grad_norm": 2.0001193033997358, "learning_rate": 6.960089524287452e-07, "loss": 0.4098, "step": 22473 }, { "epoch": 0.3906551478384815, "grad_norm": 1.6198403948080407, "learning_rate": 6.959830559222092e-07, "loss": 0.4235, "step": 22474 }, { "epoch": 0.39067253037598426, "grad_norm": 1.683668771109246, "learning_rate": 6.95957158794497e-07, "loss": 0.3022, "step": 22475 }, { "epoch": 0.3906899129134871, "grad_norm": 1.903056257587201, "learning_rate": 6.959312610456905e-07, "loss": 0.2846, "step": 22476 }, { "epoch": 0.3907072954509899, "grad_norm": 1.5083169020633311, "learning_rate": 6.959053626758721e-07, "loss": 0.4154, "step": 22477 }, { "epoch": 0.39072467798849275, "grad_norm": 1.1878235947945686, "learning_rate": 6.958794636851237e-07, "loss": 0.2886, "step": 22478 }, { "epoch": 0.3907420605259956, "grad_norm": 1.2891296303328308, "learning_rate": 6.958535640735271e-07, "loss": 0.2335, "step": 22479 }, { "epoch": 0.3907594430634984, "grad_norm": 2.7146779225827578, "learning_rate": 6.958276638411651e-07, "loss": 0.5041, "step": 22480 }, { "epoch": 0.39077682560100124, "grad_norm": 2.4975279325624755, "learning_rate": 6.95801762988119e-07, "loss": 0.2709, "step": 22481 }, { "epoch": 0.3907942081385041, "grad_norm": 4.171256942329614, "learning_rate": 6.957758615144715e-07, "loss": 0.3169, "step": 22482 }, { "epoch": 0.3908115906760069, "grad_norm": 1.3616821234679648, "learning_rate": 6.957499594203042e-07, "loss": 0.2662, "step": 22483 }, { "epoch": 0.39082897321350973, "grad_norm": 1.5835743179425328, "learning_rate": 6.957240567056997e-07, "loss": 0.3976, "step": 22484 }, { "epoch": 0.3908463557510125, "grad_norm": 1.5091391862797037, "learning_rate": 6.956981533707397e-07, "loss": 0.4307, "step": 22485 }, { "epoch": 0.39086373828851534, "grad_norm": 1.6600233635843027, "learning_rate": 6.956722494155066e-07, "loss": 0.218, "step": 22486 }, { "epoch": 0.39088112082601817, "grad_norm": 1.449171143415077, "learning_rate": 6.956463448400821e-07, "loss": 0.2138, "step": 22487 }, { "epoch": 0.390898503363521, "grad_norm": 1.085445951612814, "learning_rate": 6.956204396445488e-07, "loss": 0.4025, "step": 22488 }, { "epoch": 0.39091588590102383, "grad_norm": 1.2520849580662563, "learning_rate": 6.955945338289884e-07, "loss": 0.6155, "step": 22489 }, { "epoch": 0.39093326843852666, "grad_norm": 1.3660288868004793, "learning_rate": 6.955686273934833e-07, "loss": 0.2774, "step": 22490 }, { "epoch": 0.3909506509760295, "grad_norm": 1.1594951962941138, "learning_rate": 6.955427203381153e-07, "loss": 0.4165, "step": 22491 }, { "epoch": 0.3909680335135323, "grad_norm": 1.8133372470645137, "learning_rate": 6.95516812662967e-07, "loss": 0.2358, "step": 22492 }, { "epoch": 0.39098541605103515, "grad_norm": 1.596969158066721, "learning_rate": 6.9549090436812e-07, "loss": 0.222, "step": 22493 }, { "epoch": 0.391002798588538, "grad_norm": 1.4354268839383457, "learning_rate": 6.954649954536567e-07, "loss": 0.286, "step": 22494 }, { "epoch": 0.39102018112604076, "grad_norm": 1.500745472495046, "learning_rate": 6.954390859196591e-07, "loss": 0.2361, "step": 22495 }, { "epoch": 0.3910375636635436, "grad_norm": 1.4227903405073443, "learning_rate": 6.954131757662095e-07, "loss": 0.2757, "step": 22496 }, { "epoch": 0.3910549462010464, "grad_norm": 2.755114181058091, "learning_rate": 6.953872649933898e-07, "loss": 0.3792, "step": 22497 }, { "epoch": 0.39107232873854925, "grad_norm": 2.4342991796955182, "learning_rate": 6.953613536012823e-07, "loss": 0.2919, "step": 22498 }, { "epoch": 0.3910897112760521, "grad_norm": 1.4853869967844446, "learning_rate": 6.953354415899689e-07, "loss": 0.2768, "step": 22499 }, { "epoch": 0.3911070938135549, "grad_norm": 2.9641447803053422, "learning_rate": 6.95309528959532e-07, "loss": 0.4475, "step": 22500 }, { "epoch": 0.39112447635105774, "grad_norm": 1.5005224844744034, "learning_rate": 6.952836157100534e-07, "loss": 0.2213, "step": 22501 }, { "epoch": 0.39114185888856057, "grad_norm": 2.0618316965606724, "learning_rate": 6.952577018416157e-07, "loss": 0.3764, "step": 22502 }, { "epoch": 0.3911592414260634, "grad_norm": 2.030564226653077, "learning_rate": 6.952317873543008e-07, "loss": 0.3323, "step": 22503 }, { "epoch": 0.39117662396356623, "grad_norm": 1.2072711863827053, "learning_rate": 6.952058722481908e-07, "loss": 0.3914, "step": 22504 }, { "epoch": 0.391194006501069, "grad_norm": 1.847844477683507, "learning_rate": 6.951799565233677e-07, "loss": 0.3321, "step": 22505 }, { "epoch": 0.39121138903857183, "grad_norm": 1.2631814803792996, "learning_rate": 6.951540401799139e-07, "loss": 0.3678, "step": 22506 }, { "epoch": 0.39122877157607466, "grad_norm": 1.4604639106746384, "learning_rate": 6.951281232179114e-07, "loss": 0.3393, "step": 22507 }, { "epoch": 0.3912461541135775, "grad_norm": 1.8610525353482887, "learning_rate": 6.951022056374423e-07, "loss": 0.3422, "step": 22508 }, { "epoch": 0.3912635366510803, "grad_norm": 2.13041324587579, "learning_rate": 6.950762874385889e-07, "loss": 0.5504, "step": 22509 }, { "epoch": 0.39128091918858315, "grad_norm": 1.9611577016064725, "learning_rate": 6.950503686214333e-07, "loss": 0.416, "step": 22510 }, { "epoch": 0.391298301726086, "grad_norm": 1.6120135207889792, "learning_rate": 6.950244491860578e-07, "loss": 0.3611, "step": 22511 }, { "epoch": 0.3913156842635888, "grad_norm": 1.2681355986842178, "learning_rate": 6.949985291325441e-07, "loss": 0.3734, "step": 22512 }, { "epoch": 0.39133306680109164, "grad_norm": 1.471615280410076, "learning_rate": 6.949726084609748e-07, "loss": 0.305, "step": 22513 }, { "epoch": 0.3913504493385945, "grad_norm": 1.4360445703862597, "learning_rate": 6.949466871714318e-07, "loss": 0.3065, "step": 22514 }, { "epoch": 0.39136783187609725, "grad_norm": 2.041237932096445, "learning_rate": 6.949207652639975e-07, "loss": 0.3056, "step": 22515 }, { "epoch": 0.3913852144136001, "grad_norm": 2.1989781305732747, "learning_rate": 6.948948427387538e-07, "loss": 0.2332, "step": 22516 }, { "epoch": 0.3914025969511029, "grad_norm": 1.4548701185561468, "learning_rate": 6.948689195957829e-07, "loss": 0.3928, "step": 22517 }, { "epoch": 0.39141997948860574, "grad_norm": 1.7925106534256612, "learning_rate": 6.948429958351672e-07, "loss": 0.3215, "step": 22518 }, { "epoch": 0.39143736202610857, "grad_norm": 5.331255995351745, "learning_rate": 6.948170714569887e-07, "loss": 0.2534, "step": 22519 }, { "epoch": 0.3914547445636114, "grad_norm": 1.2790574781349624, "learning_rate": 6.947911464613294e-07, "loss": 0.2402, "step": 22520 }, { "epoch": 0.39147212710111423, "grad_norm": 2.255306637348407, "learning_rate": 6.947652208482718e-07, "loss": 0.262, "step": 22521 }, { "epoch": 0.39148950963861706, "grad_norm": 2.585933339705155, "learning_rate": 6.947392946178978e-07, "loss": 0.2803, "step": 22522 }, { "epoch": 0.3915068921761199, "grad_norm": 1.4035846573873736, "learning_rate": 6.947133677702898e-07, "loss": 0.4255, "step": 22523 }, { "epoch": 0.3915242747136227, "grad_norm": 3.1139651800188908, "learning_rate": 6.946874403055298e-07, "loss": 0.5667, "step": 22524 }, { "epoch": 0.3915416572511255, "grad_norm": 2.5763547708550805, "learning_rate": 6.946615122237002e-07, "loss": 0.3632, "step": 22525 }, { "epoch": 0.3915590397886283, "grad_norm": 1.5740902254816538, "learning_rate": 6.946355835248828e-07, "loss": 0.2828, "step": 22526 }, { "epoch": 0.39157642232613116, "grad_norm": 1.325056867477946, "learning_rate": 6.946096542091601e-07, "loss": 0.2275, "step": 22527 }, { "epoch": 0.391593804863634, "grad_norm": 1.648254882407001, "learning_rate": 6.945837242766143e-07, "loss": 0.3241, "step": 22528 }, { "epoch": 0.3916111874011368, "grad_norm": 1.9895395680849732, "learning_rate": 6.945577937273274e-07, "loss": 0.2462, "step": 22529 }, { "epoch": 0.39162856993863965, "grad_norm": 1.8862564537763775, "learning_rate": 6.945318625613816e-07, "loss": 0.3288, "step": 22530 }, { "epoch": 0.3916459524761425, "grad_norm": 1.3249619371624215, "learning_rate": 6.945059307788592e-07, "loss": 0.3254, "step": 22531 }, { "epoch": 0.3916633350136453, "grad_norm": 1.9201430535632467, "learning_rate": 6.944799983798423e-07, "loss": 0.2868, "step": 22532 }, { "epoch": 0.39168071755114814, "grad_norm": 1.5962995583265018, "learning_rate": 6.944540653644133e-07, "loss": 0.2481, "step": 22533 }, { "epoch": 0.39169810008865097, "grad_norm": 2.00054559914281, "learning_rate": 6.944281317326541e-07, "loss": 0.4776, "step": 22534 }, { "epoch": 0.39171548262615374, "grad_norm": 1.3953528459100257, "learning_rate": 6.944021974846471e-07, "loss": 0.2935, "step": 22535 }, { "epoch": 0.3917328651636566, "grad_norm": 1.968352253765911, "learning_rate": 6.943762626204744e-07, "loss": 0.3359, "step": 22536 }, { "epoch": 0.3917502477011594, "grad_norm": 3.584507912544465, "learning_rate": 6.943503271402182e-07, "loss": 0.1942, "step": 22537 }, { "epoch": 0.39176763023866223, "grad_norm": 1.2311217839259438, "learning_rate": 6.943243910439609e-07, "loss": 0.2641, "step": 22538 }, { "epoch": 0.39178501277616506, "grad_norm": 2.4531850092815564, "learning_rate": 6.942984543317844e-07, "loss": 0.292, "step": 22539 }, { "epoch": 0.3918023953136679, "grad_norm": 1.237648743390242, "learning_rate": 6.942725170037711e-07, "loss": 0.2465, "step": 22540 }, { "epoch": 0.3918197778511707, "grad_norm": 2.567587492488314, "learning_rate": 6.942465790600031e-07, "loss": 0.2752, "step": 22541 }, { "epoch": 0.39183716038867356, "grad_norm": 0.9472279718772152, "learning_rate": 6.942206405005628e-07, "loss": 0.2313, "step": 22542 }, { "epoch": 0.3918545429261764, "grad_norm": 1.7805571048592956, "learning_rate": 6.941947013255321e-07, "loss": 0.2582, "step": 22543 }, { "epoch": 0.39187192546367916, "grad_norm": 0.8695357425939525, "learning_rate": 6.941687615349936e-07, "loss": 0.2943, "step": 22544 }, { "epoch": 0.391889308001182, "grad_norm": 1.5101099559135693, "learning_rate": 6.941428211290292e-07, "loss": 0.3357, "step": 22545 }, { "epoch": 0.3919066905386848, "grad_norm": 1.5594375581127538, "learning_rate": 6.941168801077213e-07, "loss": 0.3638, "step": 22546 }, { "epoch": 0.39192407307618765, "grad_norm": 1.8849012761545674, "learning_rate": 6.940909384711519e-07, "loss": 0.2861, "step": 22547 }, { "epoch": 0.3919414556136905, "grad_norm": 1.3609503157061522, "learning_rate": 6.940649962194036e-07, "loss": 0.3569, "step": 22548 }, { "epoch": 0.3919588381511933, "grad_norm": 1.6861428973546568, "learning_rate": 6.940390533525585e-07, "loss": 0.1823, "step": 22549 }, { "epoch": 0.39197622068869614, "grad_norm": 1.7265307529193752, "learning_rate": 6.940131098706985e-07, "loss": 0.5447, "step": 22550 }, { "epoch": 0.39199360322619897, "grad_norm": 1.7548586464767237, "learning_rate": 6.93987165773906e-07, "loss": 0.334, "step": 22551 }, { "epoch": 0.3920109857637018, "grad_norm": 1.1080817930312725, "learning_rate": 6.939612210622635e-07, "loss": 0.2939, "step": 22552 }, { "epoch": 0.39202836830120463, "grad_norm": 1.2930006791373525, "learning_rate": 6.939352757358529e-07, "loss": 0.2476, "step": 22553 }, { "epoch": 0.3920457508387074, "grad_norm": 1.896506566321177, "learning_rate": 6.939093297947567e-07, "loss": 0.3231, "step": 22554 }, { "epoch": 0.39206313337621024, "grad_norm": 1.9069902702234451, "learning_rate": 6.938833832390567e-07, "loss": 0.4146, "step": 22555 }, { "epoch": 0.39208051591371307, "grad_norm": 1.1620249279091597, "learning_rate": 6.938574360688358e-07, "loss": 0.1791, "step": 22556 }, { "epoch": 0.3920978984512159, "grad_norm": 1.9025559581607614, "learning_rate": 6.938314882841758e-07, "loss": 0.4296, "step": 22557 }, { "epoch": 0.39211528098871873, "grad_norm": 1.4264223122331525, "learning_rate": 6.938055398851588e-07, "loss": 0.2561, "step": 22558 }, { "epoch": 0.39213266352622156, "grad_norm": 2.1854766350279093, "learning_rate": 6.937795908718674e-07, "loss": 0.3557, "step": 22559 }, { "epoch": 0.3921500460637244, "grad_norm": 1.3892923160681738, "learning_rate": 6.937536412443838e-07, "loss": 0.3111, "step": 22560 }, { "epoch": 0.3921674286012272, "grad_norm": 1.9502223149648723, "learning_rate": 6.9372769100279e-07, "loss": 0.2513, "step": 22561 }, { "epoch": 0.39218481113873005, "grad_norm": 1.674754944010236, "learning_rate": 6.937017401471686e-07, "loss": 0.2877, "step": 22562 }, { "epoch": 0.3922021936762329, "grad_norm": 1.462901225220164, "learning_rate": 6.936757886776014e-07, "loss": 0.3327, "step": 22563 }, { "epoch": 0.39221957621373565, "grad_norm": 2.138411088773547, "learning_rate": 6.936498365941712e-07, "loss": 0.6028, "step": 22564 }, { "epoch": 0.3922369587512385, "grad_norm": 1.1254504595388526, "learning_rate": 6.936238838969599e-07, "loss": 0.2904, "step": 22565 }, { "epoch": 0.3922543412887413, "grad_norm": 1.5727745890508924, "learning_rate": 6.935979305860496e-07, "loss": 0.3761, "step": 22566 }, { "epoch": 0.39227172382624415, "grad_norm": 1.5781299291465452, "learning_rate": 6.93571976661523e-07, "loss": 0.2656, "step": 22567 }, { "epoch": 0.392289106363747, "grad_norm": 1.9524470188890701, "learning_rate": 6.935460221234621e-07, "loss": 0.6097, "step": 22568 }, { "epoch": 0.3923064889012498, "grad_norm": 1.575679326107845, "learning_rate": 6.935200669719493e-07, "loss": 0.4861, "step": 22569 }, { "epoch": 0.39232387143875264, "grad_norm": 3.291951843639347, "learning_rate": 6.934941112070667e-07, "loss": 0.4731, "step": 22570 }, { "epoch": 0.39234125397625547, "grad_norm": 1.4684904084162036, "learning_rate": 6.934681548288967e-07, "loss": 0.2521, "step": 22571 }, { "epoch": 0.3923586365137583, "grad_norm": 1.534679581103908, "learning_rate": 6.934421978375216e-07, "loss": 0.2369, "step": 22572 }, { "epoch": 0.3923760190512611, "grad_norm": 1.351861968031401, "learning_rate": 6.934162402330234e-07, "loss": 0.3136, "step": 22573 }, { "epoch": 0.3923934015887639, "grad_norm": 1.9427560496252023, "learning_rate": 6.933902820154845e-07, "loss": 0.175, "step": 22574 }, { "epoch": 0.39241078412626673, "grad_norm": 2.0065529748300377, "learning_rate": 6.933643231849876e-07, "loss": 0.348, "step": 22575 }, { "epoch": 0.39242816666376956, "grad_norm": 4.941481126283863, "learning_rate": 6.933383637416143e-07, "loss": 0.2907, "step": 22576 }, { "epoch": 0.3924455492012724, "grad_norm": 1.7795646565727479, "learning_rate": 6.933124036854474e-07, "loss": 0.4504, "step": 22577 }, { "epoch": 0.3924629317387752, "grad_norm": 2.172184062735905, "learning_rate": 6.932864430165689e-07, "loss": 0.356, "step": 22578 }, { "epoch": 0.39248031427627805, "grad_norm": 1.5683508632652756, "learning_rate": 6.932604817350611e-07, "loss": 0.1592, "step": 22579 }, { "epoch": 0.3924976968137809, "grad_norm": 2.232289430319463, "learning_rate": 6.932345198410065e-07, "loss": 0.2272, "step": 22580 }, { "epoch": 0.3925150793512837, "grad_norm": 1.157292277936587, "learning_rate": 6.932085573344873e-07, "loss": 0.4885, "step": 22581 }, { "epoch": 0.39253246188878654, "grad_norm": 1.4777832202454366, "learning_rate": 6.931825942155853e-07, "loss": 0.4742, "step": 22582 }, { "epoch": 0.3925498444262894, "grad_norm": 1.7613630331037349, "learning_rate": 6.931566304843835e-07, "loss": 0.3352, "step": 22583 }, { "epoch": 0.39256722696379215, "grad_norm": 1.48788456259286, "learning_rate": 6.93130666140964e-07, "loss": 0.4132, "step": 22584 }, { "epoch": 0.392584609501295, "grad_norm": 1.893205980954809, "learning_rate": 6.931047011854089e-07, "loss": 0.3552, "step": 22585 }, { "epoch": 0.3926019920387978, "grad_norm": 1.664881187325467, "learning_rate": 6.930787356178006e-07, "loss": 0.3824, "step": 22586 }, { "epoch": 0.39261937457630064, "grad_norm": 2.3576454778920466, "learning_rate": 6.930527694382214e-07, "loss": 0.2889, "step": 22587 }, { "epoch": 0.39263675711380347, "grad_norm": 1.8864497541807255, "learning_rate": 6.930268026467536e-07, "loss": 0.2373, "step": 22588 }, { "epoch": 0.3926541396513063, "grad_norm": 2.4508464328994126, "learning_rate": 6.930008352434796e-07, "loss": 0.5765, "step": 22589 }, { "epoch": 0.39267152218880913, "grad_norm": 1.3211092096969588, "learning_rate": 6.929748672284815e-07, "loss": 0.3727, "step": 22590 }, { "epoch": 0.39268890472631196, "grad_norm": 12.177788029530252, "learning_rate": 6.929488986018417e-07, "loss": 0.3337, "step": 22591 }, { "epoch": 0.3927062872638148, "grad_norm": 2.1459021535907548, "learning_rate": 6.929229293636426e-07, "loss": 0.2329, "step": 22592 }, { "epoch": 0.3927236698013176, "grad_norm": 2.8186643155277764, "learning_rate": 6.928969595139664e-07, "loss": 0.4133, "step": 22593 }, { "epoch": 0.3927410523388204, "grad_norm": 2.2362752647941413, "learning_rate": 6.928709890528954e-07, "loss": 0.3328, "step": 22594 }, { "epoch": 0.3927584348763232, "grad_norm": 2.265767198407863, "learning_rate": 6.92845017980512e-07, "loss": 0.2836, "step": 22595 }, { "epoch": 0.39277581741382606, "grad_norm": 1.577976562898424, "learning_rate": 6.928190462968986e-07, "loss": 0.3358, "step": 22596 }, { "epoch": 0.3927931999513289, "grad_norm": 1.6820396603766667, "learning_rate": 6.927930740021373e-07, "loss": 0.2565, "step": 22597 }, { "epoch": 0.3928105824888317, "grad_norm": 2.043205338234514, "learning_rate": 6.927671010963104e-07, "loss": 0.4313, "step": 22598 }, { "epoch": 0.39282796502633455, "grad_norm": 1.512326595865332, "learning_rate": 6.927411275795004e-07, "loss": 0.3069, "step": 22599 }, { "epoch": 0.3928453475638374, "grad_norm": 1.1543204538961813, "learning_rate": 6.927151534517898e-07, "loss": 0.2123, "step": 22600 }, { "epoch": 0.3928627301013402, "grad_norm": 1.2950320883939246, "learning_rate": 6.926891787132604e-07, "loss": 0.3103, "step": 22601 }, { "epoch": 0.39288011263884304, "grad_norm": 2.008111545673254, "learning_rate": 6.92663203363995e-07, "loss": 0.2539, "step": 22602 }, { "epoch": 0.39289749517634587, "grad_norm": 2.5052781708341745, "learning_rate": 6.926372274040756e-07, "loss": 0.2705, "step": 22603 }, { "epoch": 0.39291487771384864, "grad_norm": 1.646286738792529, "learning_rate": 6.926112508335847e-07, "loss": 0.3653, "step": 22604 }, { "epoch": 0.3929322602513515, "grad_norm": 1.4103054697654078, "learning_rate": 6.925852736526047e-07, "loss": 0.3049, "step": 22605 }, { "epoch": 0.3929496427888543, "grad_norm": 1.2675340752126434, "learning_rate": 6.925592958612179e-07, "loss": 0.3084, "step": 22606 }, { "epoch": 0.39296702532635713, "grad_norm": 1.167753176421444, "learning_rate": 6.925333174595064e-07, "loss": 0.2475, "step": 22607 }, { "epoch": 0.39298440786385996, "grad_norm": 1.438397553660142, "learning_rate": 6.925073384475528e-07, "loss": 0.2822, "step": 22608 }, { "epoch": 0.3930017904013628, "grad_norm": 1.7922573419910235, "learning_rate": 6.924813588254395e-07, "loss": 0.3809, "step": 22609 }, { "epoch": 0.3930191729388656, "grad_norm": 1.310324214032731, "learning_rate": 6.924553785932485e-07, "loss": 0.2561, "step": 22610 }, { "epoch": 0.39303655547636845, "grad_norm": 2.1079554772412203, "learning_rate": 6.924293977510625e-07, "loss": 0.386, "step": 22611 }, { "epoch": 0.3930539380138713, "grad_norm": 2.80019660402875, "learning_rate": 6.924034162989637e-07, "loss": 0.3354, "step": 22612 }, { "epoch": 0.3930713205513741, "grad_norm": 4.735291762926382, "learning_rate": 6.923774342370344e-07, "loss": 0.328, "step": 22613 }, { "epoch": 0.3930887030888769, "grad_norm": 1.089530912508913, "learning_rate": 6.92351451565357e-07, "loss": 0.2726, "step": 22614 }, { "epoch": 0.3931060856263797, "grad_norm": 3.1283216891222465, "learning_rate": 6.923254682840138e-07, "loss": 0.538, "step": 22615 }, { "epoch": 0.39312346816388255, "grad_norm": 1.3384507073999559, "learning_rate": 6.922994843930874e-07, "loss": 0.3117, "step": 22616 }, { "epoch": 0.3931408507013854, "grad_norm": 1.2687708771317703, "learning_rate": 6.922734998926598e-07, "loss": 0.3422, "step": 22617 }, { "epoch": 0.3931582332388882, "grad_norm": 1.0837651032293911, "learning_rate": 6.922475147828136e-07, "loss": 0.3469, "step": 22618 }, { "epoch": 0.39317561577639104, "grad_norm": 1.9068044637664243, "learning_rate": 6.922215290636311e-07, "loss": 0.4173, "step": 22619 }, { "epoch": 0.39319299831389387, "grad_norm": 1.6390309014837785, "learning_rate": 6.921955427351946e-07, "loss": 0.2297, "step": 22620 }, { "epoch": 0.3932103808513967, "grad_norm": 1.2592915689838011, "learning_rate": 6.921695557975864e-07, "loss": 0.2242, "step": 22621 }, { "epoch": 0.39322776338889953, "grad_norm": 0.9198892762357931, "learning_rate": 6.921435682508891e-07, "loss": 0.1702, "step": 22622 }, { "epoch": 0.39324514592640236, "grad_norm": 2.140611225358611, "learning_rate": 6.92117580095185e-07, "loss": 0.3504, "step": 22623 }, { "epoch": 0.39326252846390514, "grad_norm": 1.1736125004180207, "learning_rate": 6.920915913305565e-07, "loss": 0.4073, "step": 22624 }, { "epoch": 0.39327991100140797, "grad_norm": 2.2568802360258697, "learning_rate": 6.920656019570859e-07, "loss": 0.3109, "step": 22625 }, { "epoch": 0.3932972935389108, "grad_norm": 2.5825783670078044, "learning_rate": 6.920396119748553e-07, "loss": 0.5893, "step": 22626 }, { "epoch": 0.3933146760764136, "grad_norm": 1.4259231521585343, "learning_rate": 6.920136213839476e-07, "loss": 0.2457, "step": 22627 }, { "epoch": 0.39333205861391646, "grad_norm": 1.580162538596304, "learning_rate": 6.919876301844448e-07, "loss": 0.2924, "step": 22628 }, { "epoch": 0.3933494411514193, "grad_norm": 1.6469101537484696, "learning_rate": 6.919616383764296e-07, "loss": 0.2441, "step": 22629 }, { "epoch": 0.3933668236889221, "grad_norm": 1.4330245665465766, "learning_rate": 6.919356459599839e-07, "loss": 0.1801, "step": 22630 }, { "epoch": 0.39338420622642495, "grad_norm": 1.6470591321527654, "learning_rate": 6.919096529351905e-07, "loss": 0.472, "step": 22631 }, { "epoch": 0.3934015887639278, "grad_norm": 1.113252262708642, "learning_rate": 6.918836593021317e-07, "loss": 0.3031, "step": 22632 }, { "epoch": 0.3934189713014306, "grad_norm": 1.3155930365130168, "learning_rate": 6.918576650608898e-07, "loss": 0.2941, "step": 22633 }, { "epoch": 0.3934363538389334, "grad_norm": 1.1567285879819988, "learning_rate": 6.918316702115471e-07, "loss": 0.3395, "step": 22634 }, { "epoch": 0.3934537363764362, "grad_norm": 1.0165845277894758, "learning_rate": 6.918056747541864e-07, "loss": 0.3636, "step": 22635 }, { "epoch": 0.39347111891393904, "grad_norm": 2.3504465455211307, "learning_rate": 6.917796786888896e-07, "loss": 0.3371, "step": 22636 }, { "epoch": 0.3934885014514419, "grad_norm": 1.4577815207478533, "learning_rate": 6.917536820157394e-07, "loss": 0.3426, "step": 22637 }, { "epoch": 0.3935058839889447, "grad_norm": 1.7107758429078388, "learning_rate": 6.917276847348182e-07, "loss": 0.3903, "step": 22638 }, { "epoch": 0.39352326652644753, "grad_norm": 2.953715109297842, "learning_rate": 6.917016868462082e-07, "loss": 0.2898, "step": 22639 }, { "epoch": 0.39354064906395037, "grad_norm": 1.1389629483592787, "learning_rate": 6.916756883499918e-07, "loss": 0.2956, "step": 22640 }, { "epoch": 0.3935580316014532, "grad_norm": 0.988749192511169, "learning_rate": 6.916496892462518e-07, "loss": 0.2299, "step": 22641 }, { "epoch": 0.393575414138956, "grad_norm": 1.3008203420606699, "learning_rate": 6.9162368953507e-07, "loss": 0.2739, "step": 22642 }, { "epoch": 0.39359279667645886, "grad_norm": 1.7529842553203479, "learning_rate": 6.915976892165294e-07, "loss": 0.3001, "step": 22643 }, { "epoch": 0.39361017921396163, "grad_norm": 1.9243115821550545, "learning_rate": 6.915716882907121e-07, "loss": 0.3183, "step": 22644 }, { "epoch": 0.39362756175146446, "grad_norm": 1.4352571175465856, "learning_rate": 6.915456867577004e-07, "loss": 0.51, "step": 22645 }, { "epoch": 0.3936449442889673, "grad_norm": 6.815725833653884, "learning_rate": 6.915196846175769e-07, "loss": 0.4271, "step": 22646 }, { "epoch": 0.3936623268264701, "grad_norm": 2.7405984158750902, "learning_rate": 6.91493681870424e-07, "loss": 0.2773, "step": 22647 }, { "epoch": 0.39367970936397295, "grad_norm": 2.252077034136188, "learning_rate": 6.914676785163241e-07, "loss": 0.3144, "step": 22648 }, { "epoch": 0.3936970919014758, "grad_norm": 1.672094109222854, "learning_rate": 6.914416745553597e-07, "loss": 0.3667, "step": 22649 }, { "epoch": 0.3937144744389786, "grad_norm": 1.1142796883794828, "learning_rate": 6.914156699876131e-07, "loss": 0.4407, "step": 22650 }, { "epoch": 0.39373185697648144, "grad_norm": 1.9317486960274328, "learning_rate": 6.913896648131666e-07, "loss": 0.5018, "step": 22651 }, { "epoch": 0.3937492395139843, "grad_norm": 1.8644846026914785, "learning_rate": 6.91363659032103e-07, "loss": 0.4793, "step": 22652 }, { "epoch": 0.3937666220514871, "grad_norm": 3.890812774119019, "learning_rate": 6.913376526445044e-07, "loss": 0.3671, "step": 22653 }, { "epoch": 0.3937840045889899, "grad_norm": 4.49549607017622, "learning_rate": 6.913116456504533e-07, "loss": 0.5961, "step": 22654 }, { "epoch": 0.3938013871264927, "grad_norm": 1.9855126754191115, "learning_rate": 6.912856380500322e-07, "loss": 0.3224, "step": 22655 }, { "epoch": 0.39381876966399554, "grad_norm": 3.3183363834222868, "learning_rate": 6.912596298433235e-07, "loss": 0.4349, "step": 22656 }, { "epoch": 0.39383615220149837, "grad_norm": 1.7046953479416556, "learning_rate": 6.912336210304096e-07, "loss": 0.2018, "step": 22657 }, { "epoch": 0.3938535347390012, "grad_norm": 1.7700868122457947, "learning_rate": 6.91207611611373e-07, "loss": 0.3054, "step": 22658 }, { "epoch": 0.39387091727650403, "grad_norm": 3.8905471290241733, "learning_rate": 6.911816015862961e-07, "loss": 0.4189, "step": 22659 }, { "epoch": 0.39388829981400686, "grad_norm": 2.853018224826184, "learning_rate": 6.911555909552614e-07, "loss": 0.3324, "step": 22660 }, { "epoch": 0.3939056823515097, "grad_norm": 1.567056594289207, "learning_rate": 6.911295797183511e-07, "loss": 0.249, "step": 22661 }, { "epoch": 0.3939230648890125, "grad_norm": 2.067688167053953, "learning_rate": 6.91103567875648e-07, "loss": 0.262, "step": 22662 }, { "epoch": 0.39394044742651535, "grad_norm": 1.3082652973726654, "learning_rate": 6.910775554272345e-07, "loss": 0.2668, "step": 22663 }, { "epoch": 0.3939578299640181, "grad_norm": 1.4348917352155182, "learning_rate": 6.910515423731928e-07, "loss": 0.4363, "step": 22664 }, { "epoch": 0.39397521250152095, "grad_norm": 1.4220702726528363, "learning_rate": 6.910255287136053e-07, "loss": 0.4704, "step": 22665 }, { "epoch": 0.3939925950390238, "grad_norm": 1.608976846918555, "learning_rate": 6.909995144485548e-07, "loss": 0.2561, "step": 22666 }, { "epoch": 0.3940099775765266, "grad_norm": 2.7913286765492327, "learning_rate": 6.909734995781235e-07, "loss": 0.4418, "step": 22667 }, { "epoch": 0.39402736011402945, "grad_norm": 1.115384988689657, "learning_rate": 6.90947484102394e-07, "loss": 0.3516, "step": 22668 }, { "epoch": 0.3940447426515323, "grad_norm": 2.032783446847047, "learning_rate": 6.909214680214486e-07, "loss": 0.6046, "step": 22669 }, { "epoch": 0.3940621251890351, "grad_norm": 1.7417553929328795, "learning_rate": 6.908954513353699e-07, "loss": 0.269, "step": 22670 }, { "epoch": 0.39407950772653794, "grad_norm": 1.9200885636423555, "learning_rate": 6.908694340442403e-07, "loss": 0.5056, "step": 22671 }, { "epoch": 0.39409689026404077, "grad_norm": 2.356462545203462, "learning_rate": 6.908434161481423e-07, "loss": 0.4008, "step": 22672 }, { "epoch": 0.3941142728015436, "grad_norm": 2.062512111548569, "learning_rate": 6.908173976471584e-07, "loss": 0.2544, "step": 22673 }, { "epoch": 0.39413165533904637, "grad_norm": 1.77564311844718, "learning_rate": 6.907913785413709e-07, "loss": 0.4905, "step": 22674 }, { "epoch": 0.3941490378765492, "grad_norm": 1.5378159965814318, "learning_rate": 6.907653588308624e-07, "loss": 0.2281, "step": 22675 }, { "epoch": 0.39416642041405203, "grad_norm": 1.6945152888648056, "learning_rate": 6.907393385157154e-07, "loss": 0.346, "step": 22676 }, { "epoch": 0.39418380295155486, "grad_norm": 5.037320227512969, "learning_rate": 6.907133175960121e-07, "loss": 0.3327, "step": 22677 }, { "epoch": 0.3942011854890577, "grad_norm": 1.6266846617777648, "learning_rate": 6.906872960718355e-07, "loss": 0.5084, "step": 22678 }, { "epoch": 0.3942185680265605, "grad_norm": 1.8505933773358776, "learning_rate": 6.906612739432676e-07, "loss": 0.2535, "step": 22679 }, { "epoch": 0.39423595056406335, "grad_norm": 1.1136716022460762, "learning_rate": 6.90635251210391e-07, "loss": 0.3542, "step": 22680 }, { "epoch": 0.3942533331015662, "grad_norm": 1.5687701904722529, "learning_rate": 6.906092278732882e-07, "loss": 0.2744, "step": 22681 }, { "epoch": 0.394270715639069, "grad_norm": 1.827942345933901, "learning_rate": 6.905832039320419e-07, "loss": 0.2411, "step": 22682 }, { "epoch": 0.3942880981765718, "grad_norm": 1.8699040887834901, "learning_rate": 6.905571793867343e-07, "loss": 0.3391, "step": 22683 }, { "epoch": 0.3943054807140746, "grad_norm": 1.7567929947792378, "learning_rate": 6.905311542374479e-07, "loss": 0.3507, "step": 22684 }, { "epoch": 0.39432286325157745, "grad_norm": 1.91327281137563, "learning_rate": 6.905051284842653e-07, "loss": 0.3117, "step": 22685 }, { "epoch": 0.3943402457890803, "grad_norm": 1.340535342847405, "learning_rate": 6.90479102127269e-07, "loss": 0.2679, "step": 22686 }, { "epoch": 0.3943576283265831, "grad_norm": 2.380051186930645, "learning_rate": 6.904530751665414e-07, "loss": 0.3649, "step": 22687 }, { "epoch": 0.39437501086408594, "grad_norm": 2.178243374267932, "learning_rate": 6.904270476021651e-07, "loss": 0.295, "step": 22688 }, { "epoch": 0.39439239340158877, "grad_norm": 1.354874426468334, "learning_rate": 6.904010194342224e-07, "loss": 0.1888, "step": 22689 }, { "epoch": 0.3944097759390916, "grad_norm": 1.2022955194241525, "learning_rate": 6.90374990662796e-07, "loss": 0.3136, "step": 22690 }, { "epoch": 0.39442715847659443, "grad_norm": 1.7664649382756383, "learning_rate": 6.903489612879685e-07, "loss": 0.1575, "step": 22691 }, { "epoch": 0.39444454101409726, "grad_norm": 1.4585002443705513, "learning_rate": 6.90322931309822e-07, "loss": 0.4617, "step": 22692 }, { "epoch": 0.39446192355160004, "grad_norm": 2.7811249186783256, "learning_rate": 6.902969007284394e-07, "loss": 0.434, "step": 22693 }, { "epoch": 0.39447930608910287, "grad_norm": 1.3574144808088182, "learning_rate": 6.902708695439028e-07, "loss": 0.188, "step": 22694 }, { "epoch": 0.3944966886266057, "grad_norm": 1.3505625273952448, "learning_rate": 6.902448377562953e-07, "loss": 0.2998, "step": 22695 }, { "epoch": 0.3945140711641085, "grad_norm": 3.6818777225987622, "learning_rate": 6.902188053656987e-07, "loss": 0.5279, "step": 22696 }, { "epoch": 0.39453145370161136, "grad_norm": 1.4427239786781778, "learning_rate": 6.901927723721961e-07, "loss": 0.3724, "step": 22697 }, { "epoch": 0.3945488362391142, "grad_norm": 1.034772713743062, "learning_rate": 6.901667387758697e-07, "loss": 0.2356, "step": 22698 }, { "epoch": 0.394566218776617, "grad_norm": 1.4524306210871667, "learning_rate": 6.901407045768021e-07, "loss": 0.2807, "step": 22699 }, { "epoch": 0.39458360131411985, "grad_norm": 2.4873513391572803, "learning_rate": 6.901146697750758e-07, "loss": 0.4224, "step": 22700 }, { "epoch": 0.3946009838516227, "grad_norm": 1.0997621578976697, "learning_rate": 6.900886343707735e-07, "loss": 0.1814, "step": 22701 }, { "epoch": 0.3946183663891255, "grad_norm": 1.773713648120858, "learning_rate": 6.900625983639773e-07, "loss": 0.2781, "step": 22702 }, { "epoch": 0.3946357489266283, "grad_norm": 1.4135291346154761, "learning_rate": 6.900365617547701e-07, "loss": 0.2974, "step": 22703 }, { "epoch": 0.3946531314641311, "grad_norm": 1.1911109711713401, "learning_rate": 6.900105245432341e-07, "loss": 0.262, "step": 22704 }, { "epoch": 0.39467051400163394, "grad_norm": 1.9280552233539172, "learning_rate": 6.899844867294523e-07, "loss": 0.2412, "step": 22705 }, { "epoch": 0.3946878965391368, "grad_norm": 1.2326923627857045, "learning_rate": 6.899584483135068e-07, "loss": 0.2903, "step": 22706 }, { "epoch": 0.3947052790766396, "grad_norm": 1.0391825361905944, "learning_rate": 6.899324092954802e-07, "loss": 0.3827, "step": 22707 }, { "epoch": 0.39472266161414243, "grad_norm": 1.898200603508885, "learning_rate": 6.899063696754554e-07, "loss": 0.2707, "step": 22708 }, { "epoch": 0.39474004415164526, "grad_norm": 5.210105420571947, "learning_rate": 6.898803294535144e-07, "loss": 0.4174, "step": 22709 }, { "epoch": 0.3947574266891481, "grad_norm": 1.451117616654633, "learning_rate": 6.8985428862974e-07, "loss": 0.2437, "step": 22710 }, { "epoch": 0.3947748092266509, "grad_norm": 1.6065962156724565, "learning_rate": 6.898282472042147e-07, "loss": 0.2842, "step": 22711 }, { "epoch": 0.39479219176415375, "grad_norm": 3.3453419332749736, "learning_rate": 6.898022051770211e-07, "loss": 0.2886, "step": 22712 }, { "epoch": 0.39480957430165653, "grad_norm": 0.7617967055171493, "learning_rate": 6.897761625482416e-07, "loss": 0.3012, "step": 22713 }, { "epoch": 0.39482695683915936, "grad_norm": 1.5105933620932062, "learning_rate": 6.897501193179592e-07, "loss": 0.2575, "step": 22714 }, { "epoch": 0.3948443393766622, "grad_norm": 1.7977447949346312, "learning_rate": 6.897240754862557e-07, "loss": 0.3514, "step": 22715 }, { "epoch": 0.394861721914165, "grad_norm": 1.1845922707719914, "learning_rate": 6.896980310532141e-07, "loss": 0.3023, "step": 22716 }, { "epoch": 0.39487910445166785, "grad_norm": 1.3526108131134695, "learning_rate": 6.896719860189171e-07, "loss": 0.2342, "step": 22717 }, { "epoch": 0.3948964869891707, "grad_norm": 1.5825621962543657, "learning_rate": 6.896459403834468e-07, "loss": 0.2138, "step": 22718 }, { "epoch": 0.3949138695266735, "grad_norm": 1.463536050945666, "learning_rate": 6.896198941468861e-07, "loss": 0.2119, "step": 22719 }, { "epoch": 0.39493125206417634, "grad_norm": 1.714759652579299, "learning_rate": 6.895938473093174e-07, "loss": 0.3452, "step": 22720 }, { "epoch": 0.39494863460167917, "grad_norm": 1.9886305156509738, "learning_rate": 6.895677998708233e-07, "loss": 0.3594, "step": 22721 }, { "epoch": 0.394966017139182, "grad_norm": 2.6282116895319083, "learning_rate": 6.895417518314864e-07, "loss": 0.2891, "step": 22722 }, { "epoch": 0.3949833996766848, "grad_norm": 2.1862225105431663, "learning_rate": 6.895157031913892e-07, "loss": 0.561, "step": 22723 }, { "epoch": 0.3950007822141876, "grad_norm": 2.601017259880847, "learning_rate": 6.894896539506143e-07, "loss": 0.3719, "step": 22724 }, { "epoch": 0.39501816475169044, "grad_norm": 2.4109303675616314, "learning_rate": 6.894636041092441e-07, "loss": 0.3248, "step": 22725 }, { "epoch": 0.39503554728919327, "grad_norm": 2.33807955644253, "learning_rate": 6.894375536673615e-07, "loss": 0.2796, "step": 22726 }, { "epoch": 0.3950529298266961, "grad_norm": 0.8217042536180159, "learning_rate": 6.894115026250486e-07, "loss": 0.1354, "step": 22727 }, { "epoch": 0.39507031236419893, "grad_norm": 1.860872668310684, "learning_rate": 6.893854509823885e-07, "loss": 0.2159, "step": 22728 }, { "epoch": 0.39508769490170176, "grad_norm": 1.9081551656927234, "learning_rate": 6.893593987394635e-07, "loss": 0.2367, "step": 22729 }, { "epoch": 0.3951050774392046, "grad_norm": 2.575965156390154, "learning_rate": 6.89333345896356e-07, "loss": 0.3277, "step": 22730 }, { "epoch": 0.3951224599767074, "grad_norm": 1.877963450548673, "learning_rate": 6.893072924531489e-07, "loss": 0.3727, "step": 22731 }, { "epoch": 0.39513984251421025, "grad_norm": 1.9338704845651002, "learning_rate": 6.892812384099247e-07, "loss": 0.3803, "step": 22732 }, { "epoch": 0.395157225051713, "grad_norm": 3.613391501604744, "learning_rate": 6.892551837667658e-07, "loss": 0.4163, "step": 22733 }, { "epoch": 0.39517460758921585, "grad_norm": 1.9071858647289897, "learning_rate": 6.89229128523755e-07, "loss": 0.342, "step": 22734 }, { "epoch": 0.3951919901267187, "grad_norm": 1.511082622140259, "learning_rate": 6.892030726809747e-07, "loss": 0.2097, "step": 22735 }, { "epoch": 0.3952093726642215, "grad_norm": 4.9268241368186905, "learning_rate": 6.891770162385077e-07, "loss": 0.5273, "step": 22736 }, { "epoch": 0.39522675520172434, "grad_norm": 2.9859418238325293, "learning_rate": 6.891509591964362e-07, "loss": 0.302, "step": 22737 }, { "epoch": 0.3952441377392272, "grad_norm": 1.9220340921973726, "learning_rate": 6.891249015548433e-07, "loss": 0.2537, "step": 22738 }, { "epoch": 0.39526152027673, "grad_norm": 1.6213705912172631, "learning_rate": 6.890988433138114e-07, "loss": 0.4687, "step": 22739 }, { "epoch": 0.39527890281423284, "grad_norm": 2.4836156148097794, "learning_rate": 6.890727844734228e-07, "loss": 0.3312, "step": 22740 }, { "epoch": 0.39529628535173567, "grad_norm": 1.9475744792907548, "learning_rate": 6.890467250337604e-07, "loss": 0.276, "step": 22741 }, { "epoch": 0.3953136678892385, "grad_norm": 1.615724779727948, "learning_rate": 6.890206649949069e-07, "loss": 0.3238, "step": 22742 }, { "epoch": 0.39533105042674127, "grad_norm": 1.7735882863630557, "learning_rate": 6.889946043569445e-07, "loss": 0.2798, "step": 22743 }, { "epoch": 0.3953484329642441, "grad_norm": 1.4736696287377167, "learning_rate": 6.88968543119956e-07, "loss": 0.2464, "step": 22744 }, { "epoch": 0.39536581550174693, "grad_norm": 1.3897281713927299, "learning_rate": 6.889424812840243e-07, "loss": 0.2914, "step": 22745 }, { "epoch": 0.39538319803924976, "grad_norm": 2.2180865108662116, "learning_rate": 6.889164188492313e-07, "loss": 0.2267, "step": 22746 }, { "epoch": 0.3954005805767526, "grad_norm": 1.4715615974291358, "learning_rate": 6.888903558156605e-07, "loss": 0.2587, "step": 22747 }, { "epoch": 0.3954179631142554, "grad_norm": 1.8208142921836754, "learning_rate": 6.888642921833937e-07, "loss": 0.2118, "step": 22748 }, { "epoch": 0.39543534565175825, "grad_norm": 1.9619268932170937, "learning_rate": 6.88838227952514e-07, "loss": 0.2453, "step": 22749 }, { "epoch": 0.3954527281892611, "grad_norm": 1.6411575403495366, "learning_rate": 6.888121631231038e-07, "loss": 0.4031, "step": 22750 }, { "epoch": 0.3954701107267639, "grad_norm": 2.5250979668413365, "learning_rate": 6.887860976952459e-07, "loss": 0.2705, "step": 22751 }, { "epoch": 0.39548749326426674, "grad_norm": 1.340234185080883, "learning_rate": 6.887600316690227e-07, "loss": 0.3871, "step": 22752 }, { "epoch": 0.3955048758017695, "grad_norm": 2.4214010201076297, "learning_rate": 6.887339650445169e-07, "loss": 0.2784, "step": 22753 }, { "epoch": 0.39552225833927235, "grad_norm": 1.9962202209304745, "learning_rate": 6.887078978218111e-07, "loss": 0.248, "step": 22754 }, { "epoch": 0.3955396408767752, "grad_norm": 1.6857555124676886, "learning_rate": 6.88681830000988e-07, "loss": 0.239, "step": 22755 }, { "epoch": 0.395557023414278, "grad_norm": 1.3239779282183026, "learning_rate": 6.8865576158213e-07, "loss": 0.2136, "step": 22756 }, { "epoch": 0.39557440595178084, "grad_norm": 4.563440599662926, "learning_rate": 6.886296925653202e-07, "loss": 0.4062, "step": 22757 }, { "epoch": 0.39559178848928367, "grad_norm": 1.22891765343203, "learning_rate": 6.886036229506408e-07, "loss": 0.3332, "step": 22758 }, { "epoch": 0.3956091710267865, "grad_norm": 1.464556787717666, "learning_rate": 6.885775527381746e-07, "loss": 0.3207, "step": 22759 }, { "epoch": 0.39562655356428933, "grad_norm": 1.6341818896491005, "learning_rate": 6.885514819280039e-07, "loss": 0.2561, "step": 22760 }, { "epoch": 0.39564393610179216, "grad_norm": 1.3864662433973387, "learning_rate": 6.885254105202119e-07, "loss": 0.2956, "step": 22761 }, { "epoch": 0.395661318639295, "grad_norm": 1.7171806109867387, "learning_rate": 6.884993385148808e-07, "loss": 0.2704, "step": 22762 }, { "epoch": 0.39567870117679776, "grad_norm": 2.637292548732252, "learning_rate": 6.884732659120935e-07, "loss": 0.3531, "step": 22763 }, { "epoch": 0.3956960837143006, "grad_norm": 5.331082080208237, "learning_rate": 6.884471927119326e-07, "loss": 0.532, "step": 22764 }, { "epoch": 0.3957134662518034, "grad_norm": 0.9332945033663831, "learning_rate": 6.884211189144804e-07, "loss": 0.194, "step": 22765 }, { "epoch": 0.39573084878930626, "grad_norm": 2.0913105405249746, "learning_rate": 6.8839504451982e-07, "loss": 0.472, "step": 22766 }, { "epoch": 0.3957482313268091, "grad_norm": 2.0279612145691965, "learning_rate": 6.883689695280339e-07, "loss": 0.3431, "step": 22767 }, { "epoch": 0.3957656138643119, "grad_norm": 2.074922235369288, "learning_rate": 6.883428939392046e-07, "loss": 0.2136, "step": 22768 }, { "epoch": 0.39578299640181475, "grad_norm": 2.0173218882166473, "learning_rate": 6.883168177534148e-07, "loss": 0.2265, "step": 22769 }, { "epoch": 0.3958003789393176, "grad_norm": 3.718291426677514, "learning_rate": 6.882907409707473e-07, "loss": 0.3289, "step": 22770 }, { "epoch": 0.3958177614768204, "grad_norm": 1.3951703110887506, "learning_rate": 6.882646635912846e-07, "loss": 0.4484, "step": 22771 }, { "epoch": 0.39583514401432324, "grad_norm": 3.6105660607696684, "learning_rate": 6.882385856151094e-07, "loss": 0.3509, "step": 22772 }, { "epoch": 0.395852526551826, "grad_norm": 1.9079912975764235, "learning_rate": 6.882125070423042e-07, "loss": 0.3079, "step": 22773 }, { "epoch": 0.39586990908932884, "grad_norm": 2.1388430797763114, "learning_rate": 6.881864278729521e-07, "loss": 0.2304, "step": 22774 }, { "epoch": 0.39588729162683167, "grad_norm": 1.0939015453147396, "learning_rate": 6.881603481071351e-07, "loss": 0.2503, "step": 22775 }, { "epoch": 0.3959046741643345, "grad_norm": 1.4606010647649859, "learning_rate": 6.881342677449367e-07, "loss": 0.4798, "step": 22776 }, { "epoch": 0.39592205670183733, "grad_norm": 2.2760432796933467, "learning_rate": 6.881081867864388e-07, "loss": 0.1984, "step": 22777 }, { "epoch": 0.39593943923934016, "grad_norm": 2.912925015180208, "learning_rate": 6.880821052317244e-07, "loss": 0.3391, "step": 22778 }, { "epoch": 0.395956821776843, "grad_norm": 1.2192179633506328, "learning_rate": 6.88056023080876e-07, "loss": 0.2821, "step": 22779 }, { "epoch": 0.3959742043143458, "grad_norm": 1.6091475928366812, "learning_rate": 6.880299403339766e-07, "loss": 0.3123, "step": 22780 }, { "epoch": 0.39599158685184865, "grad_norm": 1.6882932494351877, "learning_rate": 6.880038569911086e-07, "loss": 0.259, "step": 22781 }, { "epoch": 0.3960089693893515, "grad_norm": 1.079786332102122, "learning_rate": 6.879777730523548e-07, "loss": 0.2223, "step": 22782 }, { "epoch": 0.39602635192685426, "grad_norm": 0.9090795599280226, "learning_rate": 6.879516885177976e-07, "loss": 0.4457, "step": 22783 }, { "epoch": 0.3960437344643571, "grad_norm": 1.9895691828208617, "learning_rate": 6.8792560338752e-07, "loss": 0.4345, "step": 22784 }, { "epoch": 0.3960611170018599, "grad_norm": 2.0395130004233404, "learning_rate": 6.878995176616046e-07, "loss": 0.6587, "step": 22785 }, { "epoch": 0.39607849953936275, "grad_norm": 1.7498227476477828, "learning_rate": 6.878734313401342e-07, "loss": 0.3122, "step": 22786 }, { "epoch": 0.3960958820768656, "grad_norm": 1.7302207102679816, "learning_rate": 6.87847344423191e-07, "loss": 0.331, "step": 22787 }, { "epoch": 0.3961132646143684, "grad_norm": 2.7070251788948285, "learning_rate": 6.878212569108581e-07, "loss": 0.4429, "step": 22788 }, { "epoch": 0.39613064715187124, "grad_norm": 1.4229874004537446, "learning_rate": 6.877951688032182e-07, "loss": 0.3069, "step": 22789 }, { "epoch": 0.39614802968937407, "grad_norm": 2.6323369598975352, "learning_rate": 6.87769080100354e-07, "loss": 0.3839, "step": 22790 }, { "epoch": 0.3961654122268769, "grad_norm": 2.67896962284385, "learning_rate": 6.877429908023478e-07, "loss": 0.4606, "step": 22791 }, { "epoch": 0.39618279476437973, "grad_norm": 1.6897970265346398, "learning_rate": 6.877169009092826e-07, "loss": 0.3786, "step": 22792 }, { "epoch": 0.3962001773018825, "grad_norm": 5.581489806400531, "learning_rate": 6.876908104212411e-07, "loss": 0.7347, "step": 22793 }, { "epoch": 0.39621755983938534, "grad_norm": 1.358027557768744, "learning_rate": 6.87664719338306e-07, "loss": 0.2321, "step": 22794 }, { "epoch": 0.39623494237688817, "grad_norm": 2.3043363666628114, "learning_rate": 6.876386276605598e-07, "loss": 0.261, "step": 22795 }, { "epoch": 0.396252324914391, "grad_norm": 1.6668782306546623, "learning_rate": 6.876125353880855e-07, "loss": 0.2986, "step": 22796 }, { "epoch": 0.3962697074518938, "grad_norm": 2.0893221652363816, "learning_rate": 6.875864425209656e-07, "loss": 0.4806, "step": 22797 }, { "epoch": 0.39628708998939666, "grad_norm": 2.680276068094249, "learning_rate": 6.875603490592828e-07, "loss": 0.2934, "step": 22798 }, { "epoch": 0.3963044725268995, "grad_norm": 2.1202965351188556, "learning_rate": 6.8753425500312e-07, "loss": 0.3582, "step": 22799 }, { "epoch": 0.3963218550644023, "grad_norm": 1.57231828798639, "learning_rate": 6.875081603525595e-07, "loss": 0.2984, "step": 22800 }, { "epoch": 0.39633923760190515, "grad_norm": 1.2910642691834082, "learning_rate": 6.874820651076845e-07, "loss": 0.3281, "step": 22801 }, { "epoch": 0.396356620139408, "grad_norm": 2.671458316356171, "learning_rate": 6.874559692685775e-07, "loss": 0.4275, "step": 22802 }, { "epoch": 0.39637400267691075, "grad_norm": 1.4665347020244108, "learning_rate": 6.87429872835321e-07, "loss": 0.2565, "step": 22803 }, { "epoch": 0.3963913852144136, "grad_norm": 1.949901227544633, "learning_rate": 6.874037758079979e-07, "loss": 0.4052, "step": 22804 }, { "epoch": 0.3964087677519164, "grad_norm": 0.961274540359345, "learning_rate": 6.873776781866911e-07, "loss": 0.2671, "step": 22805 }, { "epoch": 0.39642615028941924, "grad_norm": 2.912207112350565, "learning_rate": 6.873515799714829e-07, "loss": 0.4109, "step": 22806 }, { "epoch": 0.3964435328269221, "grad_norm": 1.3950213149893635, "learning_rate": 6.873254811624564e-07, "loss": 0.3495, "step": 22807 }, { "epoch": 0.3964609153644249, "grad_norm": 1.4992413873816155, "learning_rate": 6.872993817596943e-07, "loss": 0.3694, "step": 22808 }, { "epoch": 0.39647829790192773, "grad_norm": 2.3318358071392375, "learning_rate": 6.87273281763279e-07, "loss": 0.2547, "step": 22809 }, { "epoch": 0.39649568043943056, "grad_norm": 1.4564925295861944, "learning_rate": 6.872471811732935e-07, "loss": 0.3056, "step": 22810 }, { "epoch": 0.3965130629769334, "grad_norm": 1.4271599093527012, "learning_rate": 6.872210799898206e-07, "loss": 0.26, "step": 22811 }, { "epoch": 0.3965304455144362, "grad_norm": 1.1276561085980537, "learning_rate": 6.871949782129426e-07, "loss": 0.2093, "step": 22812 }, { "epoch": 0.396547828051939, "grad_norm": 1.687485755450226, "learning_rate": 6.871688758427426e-07, "loss": 0.3235, "step": 22813 }, { "epoch": 0.39656521058944183, "grad_norm": 1.1183161309043994, "learning_rate": 6.871427728793032e-07, "loss": 0.311, "step": 22814 }, { "epoch": 0.39658259312694466, "grad_norm": 1.0695717539481524, "learning_rate": 6.871166693227074e-07, "loss": 0.2317, "step": 22815 }, { "epoch": 0.3965999756644475, "grad_norm": 1.8432812730288073, "learning_rate": 6.870905651730374e-07, "loss": 0.4666, "step": 22816 }, { "epoch": 0.3966173582019503, "grad_norm": 1.1685300377245635, "learning_rate": 6.870644604303765e-07, "loss": 0.3661, "step": 22817 }, { "epoch": 0.39663474073945315, "grad_norm": 2.212862935062646, "learning_rate": 6.870383550948071e-07, "loss": 0.2647, "step": 22818 }, { "epoch": 0.396652123276956, "grad_norm": 1.5226410246742283, "learning_rate": 6.87012249166412e-07, "loss": 0.3014, "step": 22819 }, { "epoch": 0.3966695058144588, "grad_norm": 2.791500061957802, "learning_rate": 6.869861426452739e-07, "loss": 0.2471, "step": 22820 }, { "epoch": 0.39668688835196164, "grad_norm": 1.6840472908193682, "learning_rate": 6.869600355314757e-07, "loss": 0.2332, "step": 22821 }, { "epoch": 0.3967042708894644, "grad_norm": 3.936223678659345, "learning_rate": 6.869339278251002e-07, "loss": 0.4031, "step": 22822 }, { "epoch": 0.39672165342696725, "grad_norm": 1.3413419634343788, "learning_rate": 6.869078195262299e-07, "loss": 0.3535, "step": 22823 }, { "epoch": 0.3967390359644701, "grad_norm": 3.7083256961388407, "learning_rate": 6.868817106349478e-07, "loss": 0.4125, "step": 22824 }, { "epoch": 0.3967564185019729, "grad_norm": 1.460825025899594, "learning_rate": 6.868556011513362e-07, "loss": 0.1982, "step": 22825 }, { "epoch": 0.39677380103947574, "grad_norm": 1.0764139473995795, "learning_rate": 6.868294910754785e-07, "loss": 0.2781, "step": 22826 }, { "epoch": 0.39679118357697857, "grad_norm": 1.897058609577138, "learning_rate": 6.86803380407457e-07, "loss": 0.2594, "step": 22827 }, { "epoch": 0.3968085661144814, "grad_norm": 2.6580190779661717, "learning_rate": 6.867772691473547e-07, "loss": 0.3075, "step": 22828 }, { "epoch": 0.39682594865198423, "grad_norm": 1.2625521543898957, "learning_rate": 6.86751157295254e-07, "loss": 0.2973, "step": 22829 }, { "epoch": 0.39684333118948706, "grad_norm": 2.428737996555264, "learning_rate": 6.867250448512383e-07, "loss": 0.2633, "step": 22830 }, { "epoch": 0.3968607137269899, "grad_norm": 1.6079788009935525, "learning_rate": 6.866989318153898e-07, "loss": 0.314, "step": 22831 }, { "epoch": 0.39687809626449266, "grad_norm": 2.7546999042953364, "learning_rate": 6.866728181877913e-07, "loss": 0.2835, "step": 22832 }, { "epoch": 0.3968954788019955, "grad_norm": 1.6215887786741943, "learning_rate": 6.866467039685258e-07, "loss": 0.2242, "step": 22833 }, { "epoch": 0.3969128613394983, "grad_norm": 2.1558620261220285, "learning_rate": 6.866205891576762e-07, "loss": 0.4469, "step": 22834 }, { "epoch": 0.39693024387700115, "grad_norm": 1.0275740493849304, "learning_rate": 6.865944737553248e-07, "loss": 0.1968, "step": 22835 }, { "epoch": 0.396947626414504, "grad_norm": 1.6152547654937173, "learning_rate": 6.865683577615546e-07, "loss": 0.2553, "step": 22836 }, { "epoch": 0.3969650089520068, "grad_norm": 1.6275106113218254, "learning_rate": 6.865422411764486e-07, "loss": 0.5076, "step": 22837 }, { "epoch": 0.39698239148950965, "grad_norm": 4.077768358238537, "learning_rate": 6.865161240000894e-07, "loss": 0.256, "step": 22838 }, { "epoch": 0.3969997740270125, "grad_norm": 1.2694074687081203, "learning_rate": 6.864900062325596e-07, "loss": 0.3796, "step": 22839 }, { "epoch": 0.3970171565645153, "grad_norm": 1.834790268223108, "learning_rate": 6.864638878739423e-07, "loss": 0.2462, "step": 22840 }, { "epoch": 0.39703453910201814, "grad_norm": 1.6936102363717542, "learning_rate": 6.864377689243199e-07, "loss": 0.3767, "step": 22841 }, { "epoch": 0.3970519216395209, "grad_norm": 2.625043014381551, "learning_rate": 6.864116493837757e-07, "loss": 0.4464, "step": 22842 }, { "epoch": 0.39706930417702374, "grad_norm": 1.9716753638944586, "learning_rate": 6.863855292523921e-07, "loss": 0.204, "step": 22843 }, { "epoch": 0.39708668671452657, "grad_norm": 1.5760319536718854, "learning_rate": 6.86359408530252e-07, "loss": 0.2214, "step": 22844 }, { "epoch": 0.3971040692520294, "grad_norm": 2.1617620948933505, "learning_rate": 6.86333287217438e-07, "loss": 0.4384, "step": 22845 }, { "epoch": 0.39712145178953223, "grad_norm": 1.4447690134695688, "learning_rate": 6.863071653140333e-07, "loss": 0.2952, "step": 22846 }, { "epoch": 0.39713883432703506, "grad_norm": 1.1110565701475756, "learning_rate": 6.862810428201203e-07, "loss": 0.1693, "step": 22847 }, { "epoch": 0.3971562168645379, "grad_norm": 2.145826589021067, "learning_rate": 6.862549197357821e-07, "loss": 0.3424, "step": 22848 }, { "epoch": 0.3971735994020407, "grad_norm": 1.6577026550133, "learning_rate": 6.862287960611014e-07, "loss": 0.2295, "step": 22849 }, { "epoch": 0.39719098193954355, "grad_norm": 1.4147492555556638, "learning_rate": 6.862026717961608e-07, "loss": 0.3155, "step": 22850 }, { "epoch": 0.3972083644770464, "grad_norm": 1.6154189526261353, "learning_rate": 6.861765469410434e-07, "loss": 0.524, "step": 22851 }, { "epoch": 0.39722574701454916, "grad_norm": 1.3557680367326512, "learning_rate": 6.861504214958318e-07, "loss": 0.1639, "step": 22852 }, { "epoch": 0.397243129552052, "grad_norm": 1.340001590853541, "learning_rate": 6.861242954606089e-07, "loss": 0.4906, "step": 22853 }, { "epoch": 0.3972605120895548, "grad_norm": 1.4636744728068007, "learning_rate": 6.860981688354573e-07, "loss": 0.2944, "step": 22854 }, { "epoch": 0.39727789462705765, "grad_norm": 1.932463248077652, "learning_rate": 6.860720416204602e-07, "loss": 0.4077, "step": 22855 }, { "epoch": 0.3972952771645605, "grad_norm": 1.6383922115244591, "learning_rate": 6.860459138157002e-07, "loss": 0.4124, "step": 22856 }, { "epoch": 0.3973126597020633, "grad_norm": 2.2715726823359685, "learning_rate": 6.860197854212601e-07, "loss": 0.4168, "step": 22857 }, { "epoch": 0.39733004223956614, "grad_norm": 1.2929567078942426, "learning_rate": 6.859936564372227e-07, "loss": 0.261, "step": 22858 }, { "epoch": 0.39734742477706897, "grad_norm": 2.4049427486717514, "learning_rate": 6.859675268636709e-07, "loss": 0.4084, "step": 22859 }, { "epoch": 0.3973648073145718, "grad_norm": 2.1288114984356996, "learning_rate": 6.859413967006872e-07, "loss": 0.2809, "step": 22860 }, { "epoch": 0.39738218985207463, "grad_norm": 1.418093067737265, "learning_rate": 6.859152659483551e-07, "loss": 0.3407, "step": 22861 }, { "epoch": 0.3973995723895774, "grad_norm": 1.4438963034308239, "learning_rate": 6.858891346067567e-07, "loss": 0.2522, "step": 22862 }, { "epoch": 0.39741695492708023, "grad_norm": 1.8325995803695947, "learning_rate": 6.858630026759754e-07, "loss": 0.3017, "step": 22863 }, { "epoch": 0.39743433746458307, "grad_norm": 1.013932058359089, "learning_rate": 6.858368701560934e-07, "loss": 0.2845, "step": 22864 }, { "epoch": 0.3974517200020859, "grad_norm": 1.5130734972386541, "learning_rate": 6.858107370471944e-07, "loss": 0.1855, "step": 22865 }, { "epoch": 0.3974691025395887, "grad_norm": 3.379873530848785, "learning_rate": 6.857846033493602e-07, "loss": 0.3011, "step": 22866 }, { "epoch": 0.39748648507709156, "grad_norm": 1.204134089122565, "learning_rate": 6.857584690626746e-07, "loss": 0.5126, "step": 22867 }, { "epoch": 0.3975038676145944, "grad_norm": 2.473334388741976, "learning_rate": 6.857323341872196e-07, "loss": 0.3248, "step": 22868 }, { "epoch": 0.3975212501520972, "grad_norm": 2.097998155008043, "learning_rate": 6.857061987230787e-07, "loss": 0.3672, "step": 22869 }, { "epoch": 0.39753863268960005, "grad_norm": 2.0448870511420427, "learning_rate": 6.856800626703342e-07, "loss": 0.3001, "step": 22870 }, { "epoch": 0.3975560152271029, "grad_norm": 2.231942621946872, "learning_rate": 6.856539260290694e-07, "loss": 0.3844, "step": 22871 }, { "epoch": 0.39757339776460565, "grad_norm": 2.978474761364071, "learning_rate": 6.856277887993668e-07, "loss": 0.354, "step": 22872 }, { "epoch": 0.3975907803021085, "grad_norm": 3.0566239973441207, "learning_rate": 6.856016509813094e-07, "loss": 0.3571, "step": 22873 }, { "epoch": 0.3976081628396113, "grad_norm": 2.044316393805308, "learning_rate": 6.8557551257498e-07, "loss": 0.4809, "step": 22874 }, { "epoch": 0.39762554537711414, "grad_norm": 1.1804215758287635, "learning_rate": 6.855493735804617e-07, "loss": 0.2933, "step": 22875 }, { "epoch": 0.397642927914617, "grad_norm": 1.5469643332379717, "learning_rate": 6.855232339978368e-07, "loss": 0.2636, "step": 22876 }, { "epoch": 0.3976603104521198, "grad_norm": 1.8186473015204785, "learning_rate": 6.854970938271886e-07, "loss": 0.3434, "step": 22877 }, { "epoch": 0.39767769298962263, "grad_norm": 1.7373689220677173, "learning_rate": 6.854709530686001e-07, "loss": 0.1475, "step": 22878 }, { "epoch": 0.39769507552712546, "grad_norm": 1.1753103540611534, "learning_rate": 6.854448117221536e-07, "loss": 0.1627, "step": 22879 }, { "epoch": 0.3977124580646283, "grad_norm": 2.364661119060331, "learning_rate": 6.854186697879322e-07, "loss": 0.2501, "step": 22880 }, { "epoch": 0.3977298406021311, "grad_norm": 1.4472070823328842, "learning_rate": 6.853925272660188e-07, "loss": 0.2742, "step": 22881 }, { "epoch": 0.3977472231396339, "grad_norm": 1.9649852463015813, "learning_rate": 6.853663841564963e-07, "loss": 0.2357, "step": 22882 }, { "epoch": 0.39776460567713673, "grad_norm": 1.1553097909310226, "learning_rate": 6.853402404594475e-07, "loss": 0.2357, "step": 22883 }, { "epoch": 0.39778198821463956, "grad_norm": 1.461405888110056, "learning_rate": 6.853140961749554e-07, "loss": 0.2673, "step": 22884 }, { "epoch": 0.3977993707521424, "grad_norm": 1.4929535835861636, "learning_rate": 6.852879513031024e-07, "loss": 0.3346, "step": 22885 }, { "epoch": 0.3978167532896452, "grad_norm": 2.1388348841080282, "learning_rate": 6.852618058439721e-07, "loss": 0.3917, "step": 22886 }, { "epoch": 0.39783413582714805, "grad_norm": 2.649336321706976, "learning_rate": 6.852356597976469e-07, "loss": 0.3141, "step": 22887 }, { "epoch": 0.3978515183646509, "grad_norm": 2.6371993087126038, "learning_rate": 6.852095131642096e-07, "loss": 0.4747, "step": 22888 }, { "epoch": 0.3978689009021537, "grad_norm": 3.914735964392278, "learning_rate": 6.851833659437432e-07, "loss": 0.2965, "step": 22889 }, { "epoch": 0.39788628343965654, "grad_norm": 1.5340438492203732, "learning_rate": 6.851572181363307e-07, "loss": 0.4147, "step": 22890 }, { "epoch": 0.39790366597715937, "grad_norm": 1.8918850755542516, "learning_rate": 6.851310697420549e-07, "loss": 0.283, "step": 22891 }, { "epoch": 0.39792104851466215, "grad_norm": 1.5382195795666473, "learning_rate": 6.851049207609987e-07, "loss": 0.2953, "step": 22892 }, { "epoch": 0.397938431052165, "grad_norm": 2.067114328176123, "learning_rate": 6.850787711932446e-07, "loss": 0.3257, "step": 22893 }, { "epoch": 0.3979558135896678, "grad_norm": 2.327901879287499, "learning_rate": 6.850526210388763e-07, "loss": 0.4276, "step": 22894 }, { "epoch": 0.39797319612717064, "grad_norm": 1.9616312741574926, "learning_rate": 6.850264702979758e-07, "loss": 0.3157, "step": 22895 }, { "epoch": 0.39799057866467347, "grad_norm": 2.4994616090255812, "learning_rate": 6.850003189706266e-07, "loss": 0.2254, "step": 22896 }, { "epoch": 0.3980079612021763, "grad_norm": 2.4457351704514774, "learning_rate": 6.849741670569114e-07, "loss": 0.4035, "step": 22897 }, { "epoch": 0.3980253437396791, "grad_norm": 2.1102430994687915, "learning_rate": 6.849480145569128e-07, "loss": 0.3805, "step": 22898 }, { "epoch": 0.39804272627718196, "grad_norm": 2.2070024231141128, "learning_rate": 6.849218614707142e-07, "loss": 0.3022, "step": 22899 }, { "epoch": 0.3980601088146848, "grad_norm": 1.514904466143881, "learning_rate": 6.848957077983982e-07, "loss": 0.2074, "step": 22900 }, { "epoch": 0.3980774913521876, "grad_norm": 1.6737275563164196, "learning_rate": 6.848695535400478e-07, "loss": 0.4295, "step": 22901 }, { "epoch": 0.3980948738896904, "grad_norm": 1.6698078066806996, "learning_rate": 6.848433986957456e-07, "loss": 0.5657, "step": 22902 }, { "epoch": 0.3981122564271932, "grad_norm": 2.5857483723924224, "learning_rate": 6.848172432655749e-07, "loss": 0.3422, "step": 22903 }, { "epoch": 0.39812963896469605, "grad_norm": 1.463381859170009, "learning_rate": 6.847910872496184e-07, "loss": 0.236, "step": 22904 }, { "epoch": 0.3981470215021989, "grad_norm": 1.6575861160811034, "learning_rate": 6.847649306479591e-07, "loss": 0.2871, "step": 22905 }, { "epoch": 0.3981644040397017, "grad_norm": 1.8777908120996183, "learning_rate": 6.847387734606798e-07, "loss": 0.2021, "step": 22906 }, { "epoch": 0.39818178657720454, "grad_norm": 1.3495930393192648, "learning_rate": 6.847126156878634e-07, "loss": 0.1901, "step": 22907 }, { "epoch": 0.3981991691147074, "grad_norm": 1.082720363654407, "learning_rate": 6.84686457329593e-07, "loss": 0.1863, "step": 22908 }, { "epoch": 0.3982165516522102, "grad_norm": 1.4887655378445848, "learning_rate": 6.846602983859513e-07, "loss": 0.3127, "step": 22909 }, { "epoch": 0.39823393418971303, "grad_norm": 2.030748194772408, "learning_rate": 6.846341388570212e-07, "loss": 0.3328, "step": 22910 }, { "epoch": 0.39825131672721586, "grad_norm": 2.076718795337802, "learning_rate": 6.846079787428857e-07, "loss": 0.305, "step": 22911 }, { "epoch": 0.39826869926471864, "grad_norm": 1.953649542869046, "learning_rate": 6.845818180436278e-07, "loss": 0.3421, "step": 22912 }, { "epoch": 0.39828608180222147, "grad_norm": 2.530144651197389, "learning_rate": 6.845556567593303e-07, "loss": 0.3292, "step": 22913 }, { "epoch": 0.3983034643397243, "grad_norm": 1.8059479495114492, "learning_rate": 6.84529494890076e-07, "loss": 0.3122, "step": 22914 }, { "epoch": 0.39832084687722713, "grad_norm": 2.1890436748739592, "learning_rate": 6.845033324359481e-07, "loss": 0.3828, "step": 22915 }, { "epoch": 0.39833822941472996, "grad_norm": 1.4585948680880843, "learning_rate": 6.844771693970295e-07, "loss": 0.2647, "step": 22916 }, { "epoch": 0.3983556119522328, "grad_norm": 2.251520031353153, "learning_rate": 6.844510057734028e-07, "loss": 0.2457, "step": 22917 }, { "epoch": 0.3983729944897356, "grad_norm": 2.0487887893092838, "learning_rate": 6.844248415651513e-07, "loss": 0.408, "step": 22918 }, { "epoch": 0.39839037702723845, "grad_norm": 1.315858643775942, "learning_rate": 6.843986767723577e-07, "loss": 0.2552, "step": 22919 }, { "epoch": 0.3984077595647413, "grad_norm": 1.6169000854255124, "learning_rate": 6.84372511395105e-07, "loss": 0.2518, "step": 22920 }, { "epoch": 0.3984251421022441, "grad_norm": 1.9772086336173145, "learning_rate": 6.843463454334761e-07, "loss": 0.3643, "step": 22921 }, { "epoch": 0.3984425246397469, "grad_norm": 1.4687314767166204, "learning_rate": 6.843201788875541e-07, "loss": 0.3348, "step": 22922 }, { "epoch": 0.3984599071772497, "grad_norm": 1.8064635030536327, "learning_rate": 6.842940117574218e-07, "loss": 0.3114, "step": 22923 }, { "epoch": 0.39847728971475255, "grad_norm": 1.5269045784219484, "learning_rate": 6.842678440431619e-07, "loss": 0.3731, "step": 22924 }, { "epoch": 0.3984946722522554, "grad_norm": 2.1810023146783935, "learning_rate": 6.842416757448579e-07, "loss": 0.3801, "step": 22925 }, { "epoch": 0.3985120547897582, "grad_norm": 1.2871013437727445, "learning_rate": 6.842155068625921e-07, "loss": 0.2741, "step": 22926 }, { "epoch": 0.39852943732726104, "grad_norm": 1.1348104023798178, "learning_rate": 6.841893373964481e-07, "loss": 0.1837, "step": 22927 }, { "epoch": 0.39854681986476387, "grad_norm": 1.6118912032374215, "learning_rate": 6.841631673465082e-07, "loss": 0.2734, "step": 22928 }, { "epoch": 0.3985642024022667, "grad_norm": 1.6810512336758916, "learning_rate": 6.841369967128558e-07, "loss": 0.2272, "step": 22929 }, { "epoch": 0.39858158493976953, "grad_norm": 1.572505479538049, "learning_rate": 6.841108254955736e-07, "loss": 0.2982, "step": 22930 }, { "epoch": 0.39859896747727236, "grad_norm": 1.9138435158172984, "learning_rate": 6.84084653694745e-07, "loss": 0.2811, "step": 22931 }, { "epoch": 0.39861635001477513, "grad_norm": 1.7569134184332418, "learning_rate": 6.840584813104523e-07, "loss": 0.444, "step": 22932 }, { "epoch": 0.39863373255227796, "grad_norm": 1.2801490705962821, "learning_rate": 6.840323083427788e-07, "loss": 0.1631, "step": 22933 }, { "epoch": 0.3986511150897808, "grad_norm": 2.3277661698023064, "learning_rate": 6.840061347918074e-07, "loss": 0.2966, "step": 22934 }, { "epoch": 0.3986684976272836, "grad_norm": 1.9911056585713907, "learning_rate": 6.839799606576212e-07, "loss": 0.1905, "step": 22935 }, { "epoch": 0.39868588016478645, "grad_norm": 0.8562632805781438, "learning_rate": 6.839537859403028e-07, "loss": 0.2202, "step": 22936 }, { "epoch": 0.3987032627022893, "grad_norm": 1.3402402565623412, "learning_rate": 6.839276106399356e-07, "loss": 0.3556, "step": 22937 }, { "epoch": 0.3987206452397921, "grad_norm": 0.917198011958542, "learning_rate": 6.839014347566025e-07, "loss": 0.2347, "step": 22938 }, { "epoch": 0.39873802777729495, "grad_norm": 1.8544308272955077, "learning_rate": 6.838752582903861e-07, "loss": 0.2658, "step": 22939 }, { "epoch": 0.3987554103147978, "grad_norm": 1.6857280939648778, "learning_rate": 6.838490812413695e-07, "loss": 0.328, "step": 22940 }, { "epoch": 0.3987727928523006, "grad_norm": 1.7275194699240566, "learning_rate": 6.83822903609636e-07, "loss": 0.3071, "step": 22941 }, { "epoch": 0.3987901753898034, "grad_norm": 1.4055794985338121, "learning_rate": 6.837967253952683e-07, "loss": 0.2205, "step": 22942 }, { "epoch": 0.3988075579273062, "grad_norm": 2.0969646163405997, "learning_rate": 6.837705465983494e-07, "loss": 0.3267, "step": 22943 }, { "epoch": 0.39882494046480904, "grad_norm": 1.5727779551236836, "learning_rate": 6.837443672189623e-07, "loss": 0.3171, "step": 22944 }, { "epoch": 0.39884232300231187, "grad_norm": 2.1485228671924093, "learning_rate": 6.837181872571897e-07, "loss": 0.4198, "step": 22945 }, { "epoch": 0.3988597055398147, "grad_norm": 1.4589058597275097, "learning_rate": 6.836920067131152e-07, "loss": 0.2676, "step": 22946 }, { "epoch": 0.39887708807731753, "grad_norm": 1.6551671263941057, "learning_rate": 6.836658255868213e-07, "loss": 0.2898, "step": 22947 }, { "epoch": 0.39889447061482036, "grad_norm": 2.3733187515346055, "learning_rate": 6.836396438783911e-07, "loss": 0.568, "step": 22948 }, { "epoch": 0.3989118531523232, "grad_norm": 2.4715574652303474, "learning_rate": 6.836134615879075e-07, "loss": 0.2912, "step": 22949 }, { "epoch": 0.398929235689826, "grad_norm": 2.5124868348880907, "learning_rate": 6.835872787154537e-07, "loss": 0.3054, "step": 22950 }, { "epoch": 0.39894661822732885, "grad_norm": 1.053769539640012, "learning_rate": 6.835610952611124e-07, "loss": 0.174, "step": 22951 }, { "epoch": 0.39896400076483163, "grad_norm": 0.9439382870516475, "learning_rate": 6.835349112249669e-07, "loss": 0.3074, "step": 22952 }, { "epoch": 0.39898138330233446, "grad_norm": 2.4310763384707315, "learning_rate": 6.835087266071e-07, "loss": 0.3578, "step": 22953 }, { "epoch": 0.3989987658398373, "grad_norm": 2.5741517309500166, "learning_rate": 6.834825414075947e-07, "loss": 0.2475, "step": 22954 }, { "epoch": 0.3990161483773401, "grad_norm": 1.8674464728923814, "learning_rate": 6.83456355626534e-07, "loss": 0.3033, "step": 22955 }, { "epoch": 0.39903353091484295, "grad_norm": 3.619997936336624, "learning_rate": 6.83430169264001e-07, "loss": 0.3068, "step": 22956 }, { "epoch": 0.3990509134523458, "grad_norm": 1.9607632507045718, "learning_rate": 6.834039823200785e-07, "loss": 0.3438, "step": 22957 }, { "epoch": 0.3990682959898486, "grad_norm": 1.3981008342834396, "learning_rate": 6.833777947948498e-07, "loss": 0.284, "step": 22958 }, { "epoch": 0.39908567852735144, "grad_norm": 2.076897843251011, "learning_rate": 6.833516066883975e-07, "loss": 0.3412, "step": 22959 }, { "epoch": 0.39910306106485427, "grad_norm": 2.1436980244567936, "learning_rate": 6.833254180008053e-07, "loss": 0.3377, "step": 22960 }, { "epoch": 0.39912044360235704, "grad_norm": 1.4436361592210587, "learning_rate": 6.832992287321552e-07, "loss": 0.37, "step": 22961 }, { "epoch": 0.3991378261398599, "grad_norm": 2.6108958166907623, "learning_rate": 6.832730388825309e-07, "loss": 0.3828, "step": 22962 }, { "epoch": 0.3991552086773627, "grad_norm": 1.6949105414293606, "learning_rate": 6.832468484520154e-07, "loss": 0.2659, "step": 22963 }, { "epoch": 0.39917259121486554, "grad_norm": 1.6231540738805563, "learning_rate": 6.832206574406916e-07, "loss": 0.2863, "step": 22964 }, { "epoch": 0.39918997375236837, "grad_norm": 1.5107268032716588, "learning_rate": 6.831944658486421e-07, "loss": 0.3265, "step": 22965 }, { "epoch": 0.3992073562898712, "grad_norm": 1.2841853338273725, "learning_rate": 6.831682736759508e-07, "loss": 0.2743, "step": 22966 }, { "epoch": 0.399224738827374, "grad_norm": 3.1831145158627896, "learning_rate": 6.831420809226998e-07, "loss": 0.5333, "step": 22967 }, { "epoch": 0.39924212136487686, "grad_norm": 0.9863731794956917, "learning_rate": 6.831158875889728e-07, "loss": 0.2439, "step": 22968 }, { "epoch": 0.3992595039023797, "grad_norm": 1.9380532571248752, "learning_rate": 6.830896936748525e-07, "loss": 0.3701, "step": 22969 }, { "epoch": 0.3992768864398825, "grad_norm": 1.633300299482637, "learning_rate": 6.830634991804218e-07, "loss": 0.3489, "step": 22970 }, { "epoch": 0.3992942689773853, "grad_norm": 1.528091261233047, "learning_rate": 6.830373041057641e-07, "loss": 0.2917, "step": 22971 }, { "epoch": 0.3993116515148881, "grad_norm": 2.6460172969143203, "learning_rate": 6.830111084509622e-07, "loss": 0.3876, "step": 22972 }, { "epoch": 0.39932903405239095, "grad_norm": 1.908732609921517, "learning_rate": 6.829849122160991e-07, "loss": 0.32, "step": 22973 }, { "epoch": 0.3993464165898938, "grad_norm": 1.9117699754565844, "learning_rate": 6.829587154012578e-07, "loss": 0.4122, "step": 22974 }, { "epoch": 0.3993637991273966, "grad_norm": 1.793281001654135, "learning_rate": 6.829325180065215e-07, "loss": 0.3119, "step": 22975 }, { "epoch": 0.39938118166489944, "grad_norm": 1.0415617554825018, "learning_rate": 6.829063200319733e-07, "loss": 0.3338, "step": 22976 }, { "epoch": 0.3993985642024023, "grad_norm": 1.9068574755592795, "learning_rate": 6.828801214776959e-07, "loss": 0.4765, "step": 22977 }, { "epoch": 0.3994159467399051, "grad_norm": 1.9555299169186335, "learning_rate": 6.828539223437724e-07, "loss": 0.4647, "step": 22978 }, { "epoch": 0.39943332927740793, "grad_norm": 1.599486176185909, "learning_rate": 6.828277226302864e-07, "loss": 0.4556, "step": 22979 }, { "epoch": 0.39945071181491076, "grad_norm": 1.5980931607314386, "learning_rate": 6.828015223373201e-07, "loss": 0.2543, "step": 22980 }, { "epoch": 0.39946809435241354, "grad_norm": 2.9180050313153045, "learning_rate": 6.827753214649572e-07, "loss": 0.4347, "step": 22981 }, { "epoch": 0.39948547688991637, "grad_norm": 1.8181087343818407, "learning_rate": 6.827491200132803e-07, "loss": 0.2945, "step": 22982 }, { "epoch": 0.3995028594274192, "grad_norm": 2.112023584801398, "learning_rate": 6.827229179823728e-07, "loss": 0.4218, "step": 22983 }, { "epoch": 0.39952024196492203, "grad_norm": 0.7591349557733535, "learning_rate": 6.826967153723174e-07, "loss": 0.2133, "step": 22984 }, { "epoch": 0.39953762450242486, "grad_norm": 2.1778926523374182, "learning_rate": 6.826705121831976e-07, "loss": 0.3792, "step": 22985 }, { "epoch": 0.3995550070399277, "grad_norm": 1.2772589808352648, "learning_rate": 6.82644308415096e-07, "loss": 0.5442, "step": 22986 }, { "epoch": 0.3995723895774305, "grad_norm": 1.3932500588085972, "learning_rate": 6.826181040680958e-07, "loss": 0.3335, "step": 22987 }, { "epoch": 0.39958977211493335, "grad_norm": 1.149071900967392, "learning_rate": 6.825918991422802e-07, "loss": 0.3573, "step": 22988 }, { "epoch": 0.3996071546524362, "grad_norm": 1.5792379587343515, "learning_rate": 6.825656936377322e-07, "loss": 0.3445, "step": 22989 }, { "epoch": 0.399624537189939, "grad_norm": 0.9787105939616713, "learning_rate": 6.825394875545346e-07, "loss": 0.313, "step": 22990 }, { "epoch": 0.3996419197274418, "grad_norm": 1.2344145418071006, "learning_rate": 6.82513280892771e-07, "loss": 0.2123, "step": 22991 }, { "epoch": 0.3996593022649446, "grad_norm": 2.4572112449619543, "learning_rate": 6.824870736525237e-07, "loss": 0.4317, "step": 22992 }, { "epoch": 0.39967668480244745, "grad_norm": 1.0605613683955843, "learning_rate": 6.824608658338767e-07, "loss": 0.2898, "step": 22993 }, { "epoch": 0.3996940673399503, "grad_norm": 1.3782486191544845, "learning_rate": 6.82434657436912e-07, "loss": 0.3054, "step": 22994 }, { "epoch": 0.3997114498774531, "grad_norm": 2.5109683168705446, "learning_rate": 6.824084484617137e-07, "loss": 0.454, "step": 22995 }, { "epoch": 0.39972883241495594, "grad_norm": 1.6847610199096228, "learning_rate": 6.823822389083643e-07, "loss": 0.3413, "step": 22996 }, { "epoch": 0.39974621495245877, "grad_norm": 1.4798338771255732, "learning_rate": 6.82356028776947e-07, "loss": 0.3724, "step": 22997 }, { "epoch": 0.3997635974899616, "grad_norm": 2.537015688211453, "learning_rate": 6.823298180675448e-07, "loss": 0.2295, "step": 22998 }, { "epoch": 0.3997809800274644, "grad_norm": 2.1555921682662635, "learning_rate": 6.823036067802408e-07, "loss": 0.2097, "step": 22999 }, { "epoch": 0.39979836256496726, "grad_norm": 2.283671112107743, "learning_rate": 6.822773949151182e-07, "loss": 0.4065, "step": 23000 }, { "epoch": 0.39981574510247003, "grad_norm": 2.332137186828745, "learning_rate": 6.822511824722599e-07, "loss": 0.3606, "step": 23001 }, { "epoch": 0.39983312763997286, "grad_norm": 2.2740997851890494, "learning_rate": 6.822249694517492e-07, "loss": 0.4885, "step": 23002 }, { "epoch": 0.3998505101774757, "grad_norm": 1.7861760513226193, "learning_rate": 6.821987558536689e-07, "loss": 0.3554, "step": 23003 }, { "epoch": 0.3998678927149785, "grad_norm": 2.067767644315535, "learning_rate": 6.821725416781023e-07, "loss": 0.2654, "step": 23004 }, { "epoch": 0.39988527525248135, "grad_norm": 1.730445107071489, "learning_rate": 6.821463269251324e-07, "loss": 0.5347, "step": 23005 }, { "epoch": 0.3999026577899842, "grad_norm": 2.8266022628971488, "learning_rate": 6.821201115948424e-07, "loss": 0.3636, "step": 23006 }, { "epoch": 0.399920040327487, "grad_norm": 2.5544803761712513, "learning_rate": 6.820938956873153e-07, "loss": 0.2066, "step": 23007 }, { "epoch": 0.39993742286498984, "grad_norm": 2.5424070675251955, "learning_rate": 6.820676792026342e-07, "loss": 0.4603, "step": 23008 }, { "epoch": 0.3999548054024927, "grad_norm": 1.8585111143096216, "learning_rate": 6.820414621408821e-07, "loss": 0.387, "step": 23009 }, { "epoch": 0.3999721879399955, "grad_norm": 1.43234000868215, "learning_rate": 6.820152445021423e-07, "loss": 0.3573, "step": 23010 }, { "epoch": 0.3999895704774983, "grad_norm": 3.057617589462543, "learning_rate": 6.819890262864977e-07, "loss": 0.3249, "step": 23011 }, { "epoch": 0.4000069530150011, "grad_norm": 1.5679396115843198, "learning_rate": 6.819628074940315e-07, "loss": 0.3198, "step": 23012 }, { "epoch": 0.40002433555250394, "grad_norm": 1.9445635671337715, "learning_rate": 6.819365881248267e-07, "loss": 0.3003, "step": 23013 }, { "epoch": 0.40004171809000677, "grad_norm": 1.0361514539961039, "learning_rate": 6.819103681789667e-07, "loss": 0.2951, "step": 23014 }, { "epoch": 0.4000591006275096, "grad_norm": 2.2561927747320785, "learning_rate": 6.818841476565343e-07, "loss": 0.4303, "step": 23015 }, { "epoch": 0.40007648316501243, "grad_norm": 2.102742051753905, "learning_rate": 6.818579265576126e-07, "loss": 0.2924, "step": 23016 }, { "epoch": 0.40009386570251526, "grad_norm": 1.8875756161801491, "learning_rate": 6.818317048822849e-07, "loss": 0.4011, "step": 23017 }, { "epoch": 0.4001112482400181, "grad_norm": 1.219813528220049, "learning_rate": 6.818054826306343e-07, "loss": 0.28, "step": 23018 }, { "epoch": 0.4001286307775209, "grad_norm": 2.29046901640223, "learning_rate": 6.817792598027436e-07, "loss": 0.3869, "step": 23019 }, { "epoch": 0.40014601331502375, "grad_norm": 1.2209626550937485, "learning_rate": 6.817530363986965e-07, "loss": 0.3114, "step": 23020 }, { "epoch": 0.4001633958525265, "grad_norm": 1.8136098948632404, "learning_rate": 6.817268124185754e-07, "loss": 0.2647, "step": 23021 }, { "epoch": 0.40018077839002936, "grad_norm": 1.925569264198168, "learning_rate": 6.81700587862464e-07, "loss": 0.3054, "step": 23022 }, { "epoch": 0.4001981609275322, "grad_norm": 1.6531581304162617, "learning_rate": 6.816743627304453e-07, "loss": 0.4193, "step": 23023 }, { "epoch": 0.400215543465035, "grad_norm": 1.3104600433588818, "learning_rate": 6.816481370226021e-07, "loss": 0.2033, "step": 23024 }, { "epoch": 0.40023292600253785, "grad_norm": 1.907873604177292, "learning_rate": 6.816219107390177e-07, "loss": 0.3004, "step": 23025 }, { "epoch": 0.4002503085400407, "grad_norm": 1.8224730464014631, "learning_rate": 6.815956838797754e-07, "loss": 0.4287, "step": 23026 }, { "epoch": 0.4002676910775435, "grad_norm": 1.3232797054107266, "learning_rate": 6.815694564449583e-07, "loss": 0.2186, "step": 23027 }, { "epoch": 0.40028507361504634, "grad_norm": 1.4923406078712478, "learning_rate": 6.815432284346493e-07, "loss": 0.4162, "step": 23028 }, { "epoch": 0.40030245615254917, "grad_norm": 2.363089482373223, "learning_rate": 6.815169998489318e-07, "loss": 0.3706, "step": 23029 }, { "epoch": 0.400319838690052, "grad_norm": 1.6549995574890994, "learning_rate": 6.814907706878885e-07, "loss": 0.463, "step": 23030 }, { "epoch": 0.4003372212275548, "grad_norm": 1.2015105476499337, "learning_rate": 6.814645409516032e-07, "loss": 0.3034, "step": 23031 }, { "epoch": 0.4003546037650576, "grad_norm": 1.4770276868372918, "learning_rate": 6.814383106401583e-07, "loss": 0.2313, "step": 23032 }, { "epoch": 0.40037198630256043, "grad_norm": 2.1145052646823026, "learning_rate": 6.814120797536375e-07, "loss": 0.3136, "step": 23033 }, { "epoch": 0.40038936884006326, "grad_norm": 1.4334613361363346, "learning_rate": 6.813858482921236e-07, "loss": 0.2314, "step": 23034 }, { "epoch": 0.4004067513775661, "grad_norm": 1.886690034093795, "learning_rate": 6.813596162557001e-07, "loss": 0.2299, "step": 23035 }, { "epoch": 0.4004241339150689, "grad_norm": 1.9596801247218691, "learning_rate": 6.813333836444498e-07, "loss": 0.4716, "step": 23036 }, { "epoch": 0.40044151645257176, "grad_norm": 1.8518896742635589, "learning_rate": 6.81307150458456e-07, "loss": 0.3424, "step": 23037 }, { "epoch": 0.4004588989900746, "grad_norm": 2.026824936046776, "learning_rate": 6.812809166978018e-07, "loss": 0.2532, "step": 23038 }, { "epoch": 0.4004762815275774, "grad_norm": 1.7633006649159502, "learning_rate": 6.812546823625704e-07, "loss": 0.2157, "step": 23039 }, { "epoch": 0.40049366406508025, "grad_norm": 9.063192730841667, "learning_rate": 6.812284474528449e-07, "loss": 0.7053, "step": 23040 }, { "epoch": 0.400511046602583, "grad_norm": 1.6500965180513163, "learning_rate": 6.812022119687084e-07, "loss": 0.3664, "step": 23041 }, { "epoch": 0.40052842914008585, "grad_norm": 5.38480732700678, "learning_rate": 6.811759759102442e-07, "loss": 0.4292, "step": 23042 }, { "epoch": 0.4005458116775887, "grad_norm": 1.4980400350067644, "learning_rate": 6.811497392775355e-07, "loss": 0.3131, "step": 23043 }, { "epoch": 0.4005631942150915, "grad_norm": 1.4435780871432375, "learning_rate": 6.811235020706652e-07, "loss": 0.1691, "step": 23044 }, { "epoch": 0.40058057675259434, "grad_norm": 1.3156630784044818, "learning_rate": 6.810972642897167e-07, "loss": 0.2906, "step": 23045 }, { "epoch": 0.40059795929009717, "grad_norm": 1.5982988657353678, "learning_rate": 6.810710259347728e-07, "loss": 0.3613, "step": 23046 }, { "epoch": 0.4006153418276, "grad_norm": 1.082372720385112, "learning_rate": 6.810447870059171e-07, "loss": 0.3701, "step": 23047 }, { "epoch": 0.40063272436510283, "grad_norm": 1.9056013059541197, "learning_rate": 6.810185475032327e-07, "loss": 0.2534, "step": 23048 }, { "epoch": 0.40065010690260566, "grad_norm": 1.9848879131638146, "learning_rate": 6.809923074268026e-07, "loss": 0.22, "step": 23049 }, { "epoch": 0.4006674894401085, "grad_norm": 1.6108801426395747, "learning_rate": 6.809660667767099e-07, "loss": 0.4132, "step": 23050 }, { "epoch": 0.40068487197761127, "grad_norm": 1.5083927858622526, "learning_rate": 6.809398255530379e-07, "loss": 0.3536, "step": 23051 }, { "epoch": 0.4007022545151141, "grad_norm": 2.0772900710506583, "learning_rate": 6.809135837558699e-07, "loss": 0.3316, "step": 23052 }, { "epoch": 0.40071963705261693, "grad_norm": 1.5488877059975756, "learning_rate": 6.808873413852889e-07, "loss": 0.2371, "step": 23053 }, { "epoch": 0.40073701959011976, "grad_norm": 1.453240122759256, "learning_rate": 6.808610984413781e-07, "loss": 0.3645, "step": 23054 }, { "epoch": 0.4007544021276226, "grad_norm": 1.5744915466300091, "learning_rate": 6.808348549242206e-07, "loss": 0.3078, "step": 23055 }, { "epoch": 0.4007717846651254, "grad_norm": 2.1615024741749576, "learning_rate": 6.808086108338999e-07, "loss": 0.1596, "step": 23056 }, { "epoch": 0.40078916720262825, "grad_norm": 1.697314717065392, "learning_rate": 6.807823661704989e-07, "loss": 0.3392, "step": 23057 }, { "epoch": 0.4008065497401311, "grad_norm": 1.7324156301653992, "learning_rate": 6.807561209341008e-07, "loss": 0.2332, "step": 23058 }, { "epoch": 0.4008239322776339, "grad_norm": 1.3900320941357902, "learning_rate": 6.807298751247888e-07, "loss": 0.1715, "step": 23059 }, { "epoch": 0.40084131481513674, "grad_norm": 1.1716054456808258, "learning_rate": 6.807036287426461e-07, "loss": 0.2428, "step": 23060 }, { "epoch": 0.4008586973526395, "grad_norm": 1.0162718875926722, "learning_rate": 6.806773817877559e-07, "loss": 0.1653, "step": 23061 }, { "epoch": 0.40087607989014234, "grad_norm": 1.2420634718170562, "learning_rate": 6.806511342602015e-07, "loss": 0.5499, "step": 23062 }, { "epoch": 0.4008934624276452, "grad_norm": 1.9459310768172524, "learning_rate": 6.806248861600659e-07, "loss": 0.4457, "step": 23063 }, { "epoch": 0.400910844965148, "grad_norm": 1.2963798879191561, "learning_rate": 6.805986374874324e-07, "loss": 0.3357, "step": 23064 }, { "epoch": 0.40092822750265084, "grad_norm": 1.131207663854763, "learning_rate": 6.805723882423842e-07, "loss": 0.3129, "step": 23065 }, { "epoch": 0.40094561004015367, "grad_norm": 8.047484653926894, "learning_rate": 6.805461384250045e-07, "loss": 0.4104, "step": 23066 }, { "epoch": 0.4009629925776565, "grad_norm": 1.9459968246482457, "learning_rate": 6.805198880353765e-07, "loss": 0.3894, "step": 23067 }, { "epoch": 0.4009803751151593, "grad_norm": 4.351163806130342, "learning_rate": 6.804936370735832e-07, "loss": 0.5544, "step": 23068 }, { "epoch": 0.40099775765266216, "grad_norm": 1.369315431978553, "learning_rate": 6.804673855397081e-07, "loss": 0.2381, "step": 23069 }, { "epoch": 0.401015140190165, "grad_norm": 3.175801911106102, "learning_rate": 6.804411334338345e-07, "loss": 0.4021, "step": 23070 }, { "epoch": 0.40103252272766776, "grad_norm": 1.9183115448353796, "learning_rate": 6.80414880756045e-07, "loss": 0.3305, "step": 23071 }, { "epoch": 0.4010499052651706, "grad_norm": 1.8423020682057951, "learning_rate": 6.803886275064234e-07, "loss": 0.4145, "step": 23072 }, { "epoch": 0.4010672878026734, "grad_norm": 1.5343382704153654, "learning_rate": 6.803623736850527e-07, "loss": 0.1861, "step": 23073 }, { "epoch": 0.40108467034017625, "grad_norm": 0.9668912045673419, "learning_rate": 6.803361192920162e-07, "loss": 0.3833, "step": 23074 }, { "epoch": 0.4011020528776791, "grad_norm": 1.4454083335917327, "learning_rate": 6.803098643273969e-07, "loss": 0.374, "step": 23075 }, { "epoch": 0.4011194354151819, "grad_norm": 1.3066010838827824, "learning_rate": 6.802836087912782e-07, "loss": 0.2696, "step": 23076 }, { "epoch": 0.40113681795268474, "grad_norm": 2.0569069410232292, "learning_rate": 6.802573526837434e-07, "loss": 0.3547, "step": 23077 }, { "epoch": 0.4011542004901876, "grad_norm": 2.246477532533801, "learning_rate": 6.802310960048755e-07, "loss": 0.1581, "step": 23078 }, { "epoch": 0.4011715830276904, "grad_norm": 1.9716145025292033, "learning_rate": 6.802048387547578e-07, "loss": 0.4437, "step": 23079 }, { "epoch": 0.40118896556519323, "grad_norm": 1.40077330965191, "learning_rate": 6.801785809334737e-07, "loss": 0.2948, "step": 23080 }, { "epoch": 0.401206348102696, "grad_norm": 1.8738607222758805, "learning_rate": 6.801523225411059e-07, "loss": 0.3506, "step": 23081 }, { "epoch": 0.40122373064019884, "grad_norm": 1.5846171015851895, "learning_rate": 6.801260635777384e-07, "loss": 0.3026, "step": 23082 }, { "epoch": 0.40124111317770167, "grad_norm": 1.157872820744963, "learning_rate": 6.800998040434538e-07, "loss": 0.205, "step": 23083 }, { "epoch": 0.4012584957152045, "grad_norm": 2.024807793352917, "learning_rate": 6.800735439383356e-07, "loss": 0.2995, "step": 23084 }, { "epoch": 0.40127587825270733, "grad_norm": 1.584640145007681, "learning_rate": 6.800472832624669e-07, "loss": 0.2762, "step": 23085 }, { "epoch": 0.40129326079021016, "grad_norm": 1.640540809722089, "learning_rate": 6.800210220159312e-07, "loss": 0.4354, "step": 23086 }, { "epoch": 0.401310643327713, "grad_norm": 2.2689608394452314, "learning_rate": 6.799947601988114e-07, "loss": 0.4376, "step": 23087 }, { "epoch": 0.4013280258652158, "grad_norm": 2.4922503251240324, "learning_rate": 6.799684978111908e-07, "loss": 0.3602, "step": 23088 }, { "epoch": 0.40134540840271865, "grad_norm": 1.3268173808251125, "learning_rate": 6.79942234853153e-07, "loss": 0.5061, "step": 23089 }, { "epoch": 0.4013627909402215, "grad_norm": 1.857341021528479, "learning_rate": 6.799159713247809e-07, "loss": 0.3093, "step": 23090 }, { "epoch": 0.40138017347772426, "grad_norm": 1.2769826852992279, "learning_rate": 6.798897072261577e-07, "loss": 0.3187, "step": 23091 }, { "epoch": 0.4013975560152271, "grad_norm": 1.7971730776750456, "learning_rate": 6.798634425573668e-07, "loss": 0.3076, "step": 23092 }, { "epoch": 0.4014149385527299, "grad_norm": 2.0992068820260235, "learning_rate": 6.798371773184916e-07, "loss": 0.2311, "step": 23093 }, { "epoch": 0.40143232109023275, "grad_norm": 2.414395325363116, "learning_rate": 6.798109115096149e-07, "loss": 0.3998, "step": 23094 }, { "epoch": 0.4014497036277356, "grad_norm": 1.6045398019308088, "learning_rate": 6.797846451308202e-07, "loss": 0.2544, "step": 23095 }, { "epoch": 0.4014670861652384, "grad_norm": 2.0321977161341422, "learning_rate": 6.79758378182191e-07, "loss": 0.4379, "step": 23096 }, { "epoch": 0.40148446870274124, "grad_norm": 1.5633466282324138, "learning_rate": 6.797321106638102e-07, "loss": 0.301, "step": 23097 }, { "epoch": 0.40150185124024407, "grad_norm": 2.8531376060856153, "learning_rate": 6.797058425757611e-07, "loss": 0.4363, "step": 23098 }, { "epoch": 0.4015192337777469, "grad_norm": 1.6771213988155975, "learning_rate": 6.796795739181271e-07, "loss": 0.3093, "step": 23099 }, { "epoch": 0.4015366163152497, "grad_norm": 1.9376982341906306, "learning_rate": 6.796533046909913e-07, "loss": 0.2797, "step": 23100 }, { "epoch": 0.4015539988527525, "grad_norm": 1.5439243516071635, "learning_rate": 6.796270348944372e-07, "loss": 0.2379, "step": 23101 }, { "epoch": 0.40157138139025533, "grad_norm": 1.5763248340136078, "learning_rate": 6.796007645285479e-07, "loss": 0.3506, "step": 23102 }, { "epoch": 0.40158876392775816, "grad_norm": 1.139470062692028, "learning_rate": 6.795744935934065e-07, "loss": 0.2595, "step": 23103 }, { "epoch": 0.401606146465261, "grad_norm": 1.6729869737177325, "learning_rate": 6.795482220890966e-07, "loss": 0.3145, "step": 23104 }, { "epoch": 0.4016235290027638, "grad_norm": 1.3247700718984166, "learning_rate": 6.795219500157014e-07, "loss": 0.2062, "step": 23105 }, { "epoch": 0.40164091154026665, "grad_norm": 1.2351549629693768, "learning_rate": 6.794956773733038e-07, "loss": 0.2103, "step": 23106 }, { "epoch": 0.4016582940777695, "grad_norm": 3.1846318369756887, "learning_rate": 6.794694041619876e-07, "loss": 0.6903, "step": 23107 }, { "epoch": 0.4016756766152723, "grad_norm": 2.4392138373612977, "learning_rate": 6.794431303818358e-07, "loss": 0.3848, "step": 23108 }, { "epoch": 0.40169305915277514, "grad_norm": 1.4660734665868767, "learning_rate": 6.794168560329316e-07, "loss": 0.5971, "step": 23109 }, { "epoch": 0.4017104416902779, "grad_norm": 2.5505626782726853, "learning_rate": 6.793905811153584e-07, "loss": 0.2851, "step": 23110 }, { "epoch": 0.40172782422778075, "grad_norm": 1.4888162548328179, "learning_rate": 6.793643056291996e-07, "loss": 0.2486, "step": 23111 }, { "epoch": 0.4017452067652836, "grad_norm": 1.1762443296645435, "learning_rate": 6.793380295745381e-07, "loss": 0.3028, "step": 23112 }, { "epoch": 0.4017625893027864, "grad_norm": 1.5443538293185697, "learning_rate": 6.793117529514577e-07, "loss": 0.238, "step": 23113 }, { "epoch": 0.40177997184028924, "grad_norm": 1.1918137051019226, "learning_rate": 6.792854757600411e-07, "loss": 0.2937, "step": 23114 }, { "epoch": 0.40179735437779207, "grad_norm": 1.6339114888533146, "learning_rate": 6.792591980003722e-07, "loss": 0.2374, "step": 23115 }, { "epoch": 0.4018147369152949, "grad_norm": 1.6452262046272854, "learning_rate": 6.792329196725339e-07, "loss": 0.4499, "step": 23116 }, { "epoch": 0.40183211945279773, "grad_norm": 1.984364553934108, "learning_rate": 6.792066407766095e-07, "loss": 0.2659, "step": 23117 }, { "epoch": 0.40184950199030056, "grad_norm": 1.794125098014128, "learning_rate": 6.791803613126825e-07, "loss": 0.3871, "step": 23118 }, { "epoch": 0.4018668845278034, "grad_norm": 1.3724766499923307, "learning_rate": 6.791540812808359e-07, "loss": 0.3547, "step": 23119 }, { "epoch": 0.40188426706530617, "grad_norm": 1.9217507776672471, "learning_rate": 6.791278006811531e-07, "loss": 0.3307, "step": 23120 }, { "epoch": 0.401901649602809, "grad_norm": 2.2289549970824347, "learning_rate": 6.791015195137177e-07, "loss": 0.3825, "step": 23121 }, { "epoch": 0.4019190321403118, "grad_norm": 3.6491515077062284, "learning_rate": 6.790752377786127e-07, "loss": 0.4149, "step": 23122 }, { "epoch": 0.40193641467781466, "grad_norm": 1.3366501734006908, "learning_rate": 6.790489554759213e-07, "loss": 0.332, "step": 23123 }, { "epoch": 0.4019537972153175, "grad_norm": 1.5039833387520654, "learning_rate": 6.790226726057272e-07, "loss": 0.4284, "step": 23124 }, { "epoch": 0.4019711797528203, "grad_norm": 1.3752119019634417, "learning_rate": 6.789963891681133e-07, "loss": 0.1433, "step": 23125 }, { "epoch": 0.40198856229032315, "grad_norm": 1.5264870975955758, "learning_rate": 6.789701051631631e-07, "loss": 0.5157, "step": 23126 }, { "epoch": 0.402005944827826, "grad_norm": 1.3211111880665851, "learning_rate": 6.789438205909599e-07, "loss": 0.3541, "step": 23127 }, { "epoch": 0.4020233273653288, "grad_norm": 1.6483088833479074, "learning_rate": 6.78917535451587e-07, "loss": 0.4439, "step": 23128 }, { "epoch": 0.40204070990283164, "grad_norm": 1.5497539435197039, "learning_rate": 6.788912497451276e-07, "loss": 0.2211, "step": 23129 }, { "epoch": 0.4020580924403344, "grad_norm": 1.6771206034599582, "learning_rate": 6.788649634716654e-07, "loss": 0.3521, "step": 23130 }, { "epoch": 0.40207547497783724, "grad_norm": 1.6570319509706475, "learning_rate": 6.78838676631283e-07, "loss": 0.6007, "step": 23131 }, { "epoch": 0.4020928575153401, "grad_norm": 2.102094198697681, "learning_rate": 6.788123892240645e-07, "loss": 0.3441, "step": 23132 }, { "epoch": 0.4021102400528429, "grad_norm": 0.8470383682338493, "learning_rate": 6.787861012500926e-07, "loss": 0.2834, "step": 23133 }, { "epoch": 0.40212762259034573, "grad_norm": 2.8784881515219265, "learning_rate": 6.787598127094512e-07, "loss": 0.4272, "step": 23134 }, { "epoch": 0.40214500512784856, "grad_norm": 3.5534980697980947, "learning_rate": 6.787335236022229e-07, "loss": 0.4795, "step": 23135 }, { "epoch": 0.4021623876653514, "grad_norm": 1.732350543057809, "learning_rate": 6.787072339284917e-07, "loss": 0.2082, "step": 23136 }, { "epoch": 0.4021797702028542, "grad_norm": 3.1217541181788553, "learning_rate": 6.786809436883407e-07, "loss": 0.8973, "step": 23137 }, { "epoch": 0.40219715274035706, "grad_norm": 1.4840183347395488, "learning_rate": 6.786546528818531e-07, "loss": 0.3949, "step": 23138 }, { "epoch": 0.4022145352778599, "grad_norm": 1.753598682309473, "learning_rate": 6.786283615091121e-07, "loss": 0.3416, "step": 23139 }, { "epoch": 0.40223191781536266, "grad_norm": 1.7143542452963703, "learning_rate": 6.786020695702017e-07, "loss": 0.458, "step": 23140 }, { "epoch": 0.4022493003528655, "grad_norm": 1.3824421462855145, "learning_rate": 6.785757770652046e-07, "loss": 0.276, "step": 23141 }, { "epoch": 0.4022666828903683, "grad_norm": 1.8447273471794945, "learning_rate": 6.785494839942042e-07, "loss": 0.4447, "step": 23142 }, { "epoch": 0.40228406542787115, "grad_norm": 1.1730212870379266, "learning_rate": 6.78523190357284e-07, "loss": 0.2971, "step": 23143 }, { "epoch": 0.402301447965374, "grad_norm": 1.7554297337685065, "learning_rate": 6.784968961545272e-07, "loss": 0.3367, "step": 23144 }, { "epoch": 0.4023188305028768, "grad_norm": 3.409377301486621, "learning_rate": 6.784706013860174e-07, "loss": 0.3674, "step": 23145 }, { "epoch": 0.40233621304037964, "grad_norm": 2.2697051178447136, "learning_rate": 6.784443060518378e-07, "loss": 0.3799, "step": 23146 }, { "epoch": 0.4023535955778825, "grad_norm": 2.0844219532752253, "learning_rate": 6.784180101520717e-07, "loss": 0.4388, "step": 23147 }, { "epoch": 0.4023709781153853, "grad_norm": 1.5786044456396917, "learning_rate": 6.783917136868022e-07, "loss": 0.2582, "step": 23148 }, { "epoch": 0.40238836065288813, "grad_norm": 1.857273319460697, "learning_rate": 6.783654166561134e-07, "loss": 0.4228, "step": 23149 }, { "epoch": 0.4024057431903909, "grad_norm": 1.3967927355810679, "learning_rate": 6.783391190600878e-07, "loss": 0.3869, "step": 23150 }, { "epoch": 0.40242312572789374, "grad_norm": 1.2028778500065525, "learning_rate": 6.783128208988092e-07, "loss": 0.2829, "step": 23151 }, { "epoch": 0.40244050826539657, "grad_norm": 1.9613002707661016, "learning_rate": 6.782865221723608e-07, "loss": 0.3373, "step": 23152 }, { "epoch": 0.4024578908028994, "grad_norm": 2.125867682613775, "learning_rate": 6.782602228808262e-07, "loss": 0.2544, "step": 23153 }, { "epoch": 0.40247527334040223, "grad_norm": 2.0238357857778646, "learning_rate": 6.782339230242884e-07, "loss": 0.207, "step": 23154 }, { "epoch": 0.40249265587790506, "grad_norm": 1.5160892657838725, "learning_rate": 6.782076226028311e-07, "loss": 0.2998, "step": 23155 }, { "epoch": 0.4025100384154079, "grad_norm": 1.9535621133256873, "learning_rate": 6.781813216165373e-07, "loss": 0.5124, "step": 23156 }, { "epoch": 0.4025274209529107, "grad_norm": 1.7174054987485814, "learning_rate": 6.781550200654907e-07, "loss": 0.6251, "step": 23157 }, { "epoch": 0.40254480349041355, "grad_norm": 1.3420143327467808, "learning_rate": 6.781287179497744e-07, "loss": 0.2362, "step": 23158 }, { "epoch": 0.4025621860279164, "grad_norm": 2.1454008876214496, "learning_rate": 6.781024152694722e-07, "loss": 0.712, "step": 23159 }, { "epoch": 0.40257956856541915, "grad_norm": 1.161948166241078, "learning_rate": 6.780761120246668e-07, "loss": 0.1552, "step": 23160 }, { "epoch": 0.402596951102922, "grad_norm": 1.304822205567054, "learning_rate": 6.780498082154421e-07, "loss": 0.1552, "step": 23161 }, { "epoch": 0.4026143336404248, "grad_norm": 4.72142722087624, "learning_rate": 6.780235038418813e-07, "loss": 0.4718, "step": 23162 }, { "epoch": 0.40263171617792765, "grad_norm": 1.0813451416338389, "learning_rate": 6.779971989040678e-07, "loss": 0.3089, "step": 23163 }, { "epoch": 0.4026490987154305, "grad_norm": 3.523341385106048, "learning_rate": 6.779708934020848e-07, "loss": 0.3355, "step": 23164 }, { "epoch": 0.4026664812529333, "grad_norm": 1.4796999910004647, "learning_rate": 6.77944587336016e-07, "loss": 0.3243, "step": 23165 }, { "epoch": 0.40268386379043614, "grad_norm": 2.4939133032369436, "learning_rate": 6.779182807059445e-07, "loss": 0.3306, "step": 23166 }, { "epoch": 0.40270124632793897, "grad_norm": 1.4049892465365117, "learning_rate": 6.778919735119537e-07, "loss": 0.3285, "step": 23167 }, { "epoch": 0.4027186288654418, "grad_norm": 1.411583553666655, "learning_rate": 6.778656657541273e-07, "loss": 0.3528, "step": 23168 }, { "epoch": 0.4027360114029446, "grad_norm": 1.0929974108936693, "learning_rate": 6.778393574325483e-07, "loss": 0.2285, "step": 23169 }, { "epoch": 0.4027533939404474, "grad_norm": 1.4838966315276347, "learning_rate": 6.778130485473001e-07, "loss": 0.3351, "step": 23170 }, { "epoch": 0.40277077647795023, "grad_norm": 3.575143650988191, "learning_rate": 6.777867390984663e-07, "loss": 0.3511, "step": 23171 }, { "epoch": 0.40278815901545306, "grad_norm": 2.0173439544742897, "learning_rate": 6.777604290861303e-07, "loss": 0.1761, "step": 23172 }, { "epoch": 0.4028055415529559, "grad_norm": 1.8549787976051404, "learning_rate": 6.777341185103753e-07, "loss": 0.4689, "step": 23173 }, { "epoch": 0.4028229240904587, "grad_norm": 2.204263056535418, "learning_rate": 6.777078073712848e-07, "loss": 0.3034, "step": 23174 }, { "epoch": 0.40284030662796155, "grad_norm": 1.6099959954151386, "learning_rate": 6.776814956689422e-07, "loss": 0.3456, "step": 23175 }, { "epoch": 0.4028576891654644, "grad_norm": 2.591885329099998, "learning_rate": 6.776551834034309e-07, "loss": 0.3299, "step": 23176 }, { "epoch": 0.4028750717029672, "grad_norm": 2.1169113524378758, "learning_rate": 6.776288705748343e-07, "loss": 0.2754, "step": 23177 }, { "epoch": 0.40289245424047004, "grad_norm": 2.961830135071458, "learning_rate": 6.776025571832358e-07, "loss": 0.2314, "step": 23178 }, { "epoch": 0.4029098367779729, "grad_norm": 3.9044817445381783, "learning_rate": 6.775762432287187e-07, "loss": 0.3191, "step": 23179 }, { "epoch": 0.40292721931547565, "grad_norm": 1.59967751421102, "learning_rate": 6.775499287113664e-07, "loss": 0.2396, "step": 23180 }, { "epoch": 0.4029446018529785, "grad_norm": 1.312607847473622, "learning_rate": 6.775236136312625e-07, "loss": 0.1885, "step": 23181 }, { "epoch": 0.4029619843904813, "grad_norm": 2.3211557048990366, "learning_rate": 6.774972979884903e-07, "loss": 0.3208, "step": 23182 }, { "epoch": 0.40297936692798414, "grad_norm": 2.6615686499260036, "learning_rate": 6.774709817831332e-07, "loss": 0.3711, "step": 23183 }, { "epoch": 0.40299674946548697, "grad_norm": 1.6723076342593932, "learning_rate": 6.774446650152746e-07, "loss": 0.1933, "step": 23184 }, { "epoch": 0.4030141320029898, "grad_norm": 1.150182098241587, "learning_rate": 6.774183476849979e-07, "loss": 0.3321, "step": 23185 }, { "epoch": 0.40303151454049263, "grad_norm": 2.2284839200898925, "learning_rate": 6.773920297923866e-07, "loss": 0.3662, "step": 23186 }, { "epoch": 0.40304889707799546, "grad_norm": 1.223508076366578, "learning_rate": 6.77365711337524e-07, "loss": 0.4017, "step": 23187 }, { "epoch": 0.4030662796154983, "grad_norm": 1.5862884696722357, "learning_rate": 6.773393923204936e-07, "loss": 0.3765, "step": 23188 }, { "epoch": 0.4030836621530011, "grad_norm": 2.3588766298487815, "learning_rate": 6.773130727413788e-07, "loss": 0.4374, "step": 23189 }, { "epoch": 0.4031010446905039, "grad_norm": 3.0622694887934014, "learning_rate": 6.77286752600263e-07, "loss": 0.6138, "step": 23190 }, { "epoch": 0.4031184272280067, "grad_norm": 1.6518386629977526, "learning_rate": 6.772604318972295e-07, "loss": 0.2018, "step": 23191 }, { "epoch": 0.40313580976550956, "grad_norm": 1.3893855850511925, "learning_rate": 6.77234110632362e-07, "loss": 0.2321, "step": 23192 }, { "epoch": 0.4031531923030124, "grad_norm": 1.3135975970496172, "learning_rate": 6.772077888057437e-07, "loss": 0.3603, "step": 23193 }, { "epoch": 0.4031705748405152, "grad_norm": 2.432251433585325, "learning_rate": 6.771814664174582e-07, "loss": 0.233, "step": 23194 }, { "epoch": 0.40318795737801805, "grad_norm": 1.7797233680351867, "learning_rate": 6.771551434675887e-07, "loss": 0.3684, "step": 23195 }, { "epoch": 0.4032053399155209, "grad_norm": 1.2964516984069083, "learning_rate": 6.77128819956219e-07, "loss": 0.3031, "step": 23196 }, { "epoch": 0.4032227224530237, "grad_norm": 2.3533205314317156, "learning_rate": 6.771024958834322e-07, "loss": 0.2539, "step": 23197 }, { "epoch": 0.40324010499052654, "grad_norm": 2.061828084658886, "learning_rate": 6.770761712493119e-07, "loss": 0.5622, "step": 23198 }, { "epoch": 0.40325748752802937, "grad_norm": 0.9075057685329259, "learning_rate": 6.770498460539412e-07, "loss": 0.3639, "step": 23199 }, { "epoch": 0.40327487006553214, "grad_norm": 2.015591247392726, "learning_rate": 6.770235202974041e-07, "loss": 0.3839, "step": 23200 }, { "epoch": 0.403292252603035, "grad_norm": 2.5687399830605053, "learning_rate": 6.769971939797836e-07, "loss": 0.551, "step": 23201 }, { "epoch": 0.4033096351405378, "grad_norm": 1.8766747921488347, "learning_rate": 6.769708671011635e-07, "loss": 0.2534, "step": 23202 }, { "epoch": 0.40332701767804063, "grad_norm": 1.4339468504591946, "learning_rate": 6.769445396616269e-07, "loss": 0.2277, "step": 23203 }, { "epoch": 0.40334440021554346, "grad_norm": 1.6520985604660834, "learning_rate": 6.769182116612573e-07, "loss": 0.2336, "step": 23204 }, { "epoch": 0.4033617827530463, "grad_norm": 1.2949205171589848, "learning_rate": 6.768918831001385e-07, "loss": 0.2692, "step": 23205 }, { "epoch": 0.4033791652905491, "grad_norm": 2.655851528462846, "learning_rate": 6.768655539783535e-07, "loss": 0.3848, "step": 23206 }, { "epoch": 0.40339654782805195, "grad_norm": 1.2411336276272962, "learning_rate": 6.768392242959859e-07, "loss": 0.2928, "step": 23207 }, { "epoch": 0.4034139303655548, "grad_norm": 1.849817067229902, "learning_rate": 6.768128940531192e-07, "loss": 0.318, "step": 23208 }, { "epoch": 0.4034313129030576, "grad_norm": 1.7948810685052596, "learning_rate": 6.767865632498371e-07, "loss": 0.2769, "step": 23209 }, { "epoch": 0.4034486954405604, "grad_norm": 2.651928560732617, "learning_rate": 6.767602318862226e-07, "loss": 0.4627, "step": 23210 }, { "epoch": 0.4034660779780632, "grad_norm": 1.555113208409995, "learning_rate": 6.767338999623594e-07, "loss": 0.3325, "step": 23211 }, { "epoch": 0.40348346051556605, "grad_norm": 2.6789751610751504, "learning_rate": 6.767075674783309e-07, "loss": 0.3911, "step": 23212 }, { "epoch": 0.4035008430530689, "grad_norm": 1.835138424129218, "learning_rate": 6.766812344342207e-07, "loss": 0.3091, "step": 23213 }, { "epoch": 0.4035182255905717, "grad_norm": 2.0805722229271515, "learning_rate": 6.766549008301119e-07, "loss": 0.3958, "step": 23214 }, { "epoch": 0.40353560812807454, "grad_norm": 1.7091858742465305, "learning_rate": 6.766285666660884e-07, "loss": 0.3696, "step": 23215 }, { "epoch": 0.40355299066557737, "grad_norm": 1.2653440931860018, "learning_rate": 6.766022319422335e-07, "loss": 0.2515, "step": 23216 }, { "epoch": 0.4035703732030802, "grad_norm": 1.4130981684675592, "learning_rate": 6.765758966586306e-07, "loss": 0.1549, "step": 23217 }, { "epoch": 0.40358775574058303, "grad_norm": 3.285579799118674, "learning_rate": 6.765495608153631e-07, "loss": 0.5421, "step": 23218 }, { "epoch": 0.40360513827808586, "grad_norm": 3.2386912426785, "learning_rate": 6.765232244125148e-07, "loss": 0.3744, "step": 23219 }, { "epoch": 0.40362252081558864, "grad_norm": 1.9528582131480767, "learning_rate": 6.764968874501687e-07, "loss": 0.3443, "step": 23220 }, { "epoch": 0.40363990335309147, "grad_norm": 1.5825038460528995, "learning_rate": 6.764705499284088e-07, "loss": 0.3435, "step": 23221 }, { "epoch": 0.4036572858905943, "grad_norm": 1.1691070799130734, "learning_rate": 6.764442118473183e-07, "loss": 0.2216, "step": 23222 }, { "epoch": 0.4036746684280971, "grad_norm": 1.1718540608650196, "learning_rate": 6.764178732069805e-07, "loss": 0.2126, "step": 23223 }, { "epoch": 0.40369205096559996, "grad_norm": 1.2091352897937686, "learning_rate": 6.763915340074792e-07, "loss": 0.3085, "step": 23224 }, { "epoch": 0.4037094335031028, "grad_norm": 1.0681617981388634, "learning_rate": 6.763651942488979e-07, "loss": 0.2058, "step": 23225 }, { "epoch": 0.4037268160406056, "grad_norm": 1.4628009123503292, "learning_rate": 6.763388539313197e-07, "loss": 0.259, "step": 23226 }, { "epoch": 0.40374419857810845, "grad_norm": 2.9837180275629778, "learning_rate": 6.763125130548285e-07, "loss": 0.5918, "step": 23227 }, { "epoch": 0.4037615811156113, "grad_norm": 1.4242536700080777, "learning_rate": 6.762861716195076e-07, "loss": 0.4332, "step": 23228 }, { "epoch": 0.4037789636531141, "grad_norm": 1.4659434626002563, "learning_rate": 6.762598296254405e-07, "loss": 0.3062, "step": 23229 }, { "epoch": 0.4037963461906169, "grad_norm": 2.021024934948198, "learning_rate": 6.762334870727107e-07, "loss": 0.4596, "step": 23230 }, { "epoch": 0.4038137287281197, "grad_norm": 3.9452519689553807, "learning_rate": 6.762071439614016e-07, "loss": 0.4282, "step": 23231 }, { "epoch": 0.40383111126562254, "grad_norm": 3.6854949408895306, "learning_rate": 6.761808002915971e-07, "loss": 0.4945, "step": 23232 }, { "epoch": 0.4038484938031254, "grad_norm": 0.9147322460282051, "learning_rate": 6.7615445606338e-07, "loss": 0.3643, "step": 23233 }, { "epoch": 0.4038658763406282, "grad_norm": 3.4198228132361987, "learning_rate": 6.761281112768345e-07, "loss": 0.3904, "step": 23234 }, { "epoch": 0.40388325887813104, "grad_norm": 1.9433462671630193, "learning_rate": 6.761017659320437e-07, "loss": 0.3676, "step": 23235 }, { "epoch": 0.40390064141563387, "grad_norm": 1.4124960618414533, "learning_rate": 6.760754200290912e-07, "loss": 0.4374, "step": 23236 }, { "epoch": 0.4039180239531367, "grad_norm": 1.0008806160937473, "learning_rate": 6.760490735680604e-07, "loss": 0.3093, "step": 23237 }, { "epoch": 0.4039354064906395, "grad_norm": 1.8489632426781148, "learning_rate": 6.760227265490351e-07, "loss": 0.4579, "step": 23238 }, { "epoch": 0.4039527890281423, "grad_norm": 0.9033843905177753, "learning_rate": 6.759963789720984e-07, "loss": 0.4978, "step": 23239 }, { "epoch": 0.40397017156564513, "grad_norm": 1.9877956392914709, "learning_rate": 6.759700308373342e-07, "loss": 0.3653, "step": 23240 }, { "epoch": 0.40398755410314796, "grad_norm": 1.63410708269376, "learning_rate": 6.759436821448258e-07, "loss": 0.3424, "step": 23241 }, { "epoch": 0.4040049366406508, "grad_norm": 3.1647181016253803, "learning_rate": 6.759173328946567e-07, "loss": 0.4648, "step": 23242 }, { "epoch": 0.4040223191781536, "grad_norm": 1.53692731745001, "learning_rate": 6.758909830869104e-07, "loss": 0.3369, "step": 23243 }, { "epoch": 0.40403970171565645, "grad_norm": 1.3092089482780505, "learning_rate": 6.758646327216707e-07, "loss": 0.2314, "step": 23244 }, { "epoch": 0.4040570842531593, "grad_norm": 2.075922509830861, "learning_rate": 6.758382817990207e-07, "loss": 0.2602, "step": 23245 }, { "epoch": 0.4040744667906621, "grad_norm": 1.1011188559758818, "learning_rate": 6.758119303190442e-07, "loss": 0.3532, "step": 23246 }, { "epoch": 0.40409184932816494, "grad_norm": 1.4724290042356374, "learning_rate": 6.757855782818246e-07, "loss": 0.277, "step": 23247 }, { "epoch": 0.4041092318656678, "grad_norm": 1.4005122504122578, "learning_rate": 6.757592256874455e-07, "loss": 0.4667, "step": 23248 }, { "epoch": 0.40412661440317055, "grad_norm": 1.7305925262754651, "learning_rate": 6.757328725359903e-07, "loss": 0.2825, "step": 23249 }, { "epoch": 0.4041439969406734, "grad_norm": 3.0922276390111545, "learning_rate": 6.757065188275428e-07, "loss": 0.2604, "step": 23250 }, { "epoch": 0.4041613794781762, "grad_norm": 1.456269745372191, "learning_rate": 6.756801645621862e-07, "loss": 0.286, "step": 23251 }, { "epoch": 0.40417876201567904, "grad_norm": 1.1109034196841634, "learning_rate": 6.756538097400043e-07, "loss": 0.2724, "step": 23252 }, { "epoch": 0.40419614455318187, "grad_norm": 1.383517230553344, "learning_rate": 6.756274543610803e-07, "loss": 0.2383, "step": 23253 }, { "epoch": 0.4042135270906847, "grad_norm": 3.147936393246124, "learning_rate": 6.756010984254983e-07, "loss": 0.3241, "step": 23254 }, { "epoch": 0.40423090962818753, "grad_norm": 0.9824861695811309, "learning_rate": 6.75574741933341e-07, "loss": 0.2335, "step": 23255 }, { "epoch": 0.40424829216569036, "grad_norm": 2.006112675498293, "learning_rate": 6.755483848846928e-07, "loss": 0.3233, "step": 23256 }, { "epoch": 0.4042656747031932, "grad_norm": 1.1123443856923951, "learning_rate": 6.755220272796368e-07, "loss": 0.3488, "step": 23257 }, { "epoch": 0.404283057240696, "grad_norm": 1.9666381575363816, "learning_rate": 6.754956691182566e-07, "loss": 0.3082, "step": 23258 }, { "epoch": 0.4043004397781988, "grad_norm": 1.8253231075281309, "learning_rate": 6.754693104006356e-07, "loss": 0.3922, "step": 23259 }, { "epoch": 0.4043178223157016, "grad_norm": 1.384174820549043, "learning_rate": 6.754429511268576e-07, "loss": 0.2697, "step": 23260 }, { "epoch": 0.40433520485320446, "grad_norm": 1.9650815292064485, "learning_rate": 6.75416591297006e-07, "loss": 0.4201, "step": 23261 }, { "epoch": 0.4043525873907073, "grad_norm": 1.7938167823766527, "learning_rate": 6.753902309111645e-07, "loss": 0.3774, "step": 23262 }, { "epoch": 0.4043699699282101, "grad_norm": 2.3445411705341903, "learning_rate": 6.753638699694164e-07, "loss": 0.3988, "step": 23263 }, { "epoch": 0.40438735246571295, "grad_norm": 1.5342348846643603, "learning_rate": 6.753375084718453e-07, "loss": 0.2806, "step": 23264 }, { "epoch": 0.4044047350032158, "grad_norm": 1.9434340165398523, "learning_rate": 6.753111464185351e-07, "loss": 0.2293, "step": 23265 }, { "epoch": 0.4044221175407186, "grad_norm": 1.7704865348836105, "learning_rate": 6.75284783809569e-07, "loss": 0.2257, "step": 23266 }, { "epoch": 0.40443950007822144, "grad_norm": 1.1868461667976564, "learning_rate": 6.752584206450307e-07, "loss": 0.2518, "step": 23267 }, { "epoch": 0.40445688261572427, "grad_norm": 1.2154192833684114, "learning_rate": 6.752320569250035e-07, "loss": 0.2306, "step": 23268 }, { "epoch": 0.40447426515322704, "grad_norm": 2.710019885754403, "learning_rate": 6.752056926495715e-07, "loss": 0.3525, "step": 23269 }, { "epoch": 0.40449164769072987, "grad_norm": 2.4793048144203977, "learning_rate": 6.751793278188176e-07, "loss": 0.284, "step": 23270 }, { "epoch": 0.4045090302282327, "grad_norm": 1.9894351284446266, "learning_rate": 6.751529624328258e-07, "loss": 0.2999, "step": 23271 }, { "epoch": 0.40452641276573553, "grad_norm": 1.7477247277727672, "learning_rate": 6.751265964916797e-07, "loss": 0.3021, "step": 23272 }, { "epoch": 0.40454379530323836, "grad_norm": 2.979639628508922, "learning_rate": 6.751002299954626e-07, "loss": 0.5503, "step": 23273 }, { "epoch": 0.4045611778407412, "grad_norm": 3.272992051858819, "learning_rate": 6.750738629442583e-07, "loss": 0.36, "step": 23274 }, { "epoch": 0.404578560378244, "grad_norm": 1.7593595004975213, "learning_rate": 6.750474953381502e-07, "loss": 0.3108, "step": 23275 }, { "epoch": 0.40459594291574685, "grad_norm": 1.8523771486940377, "learning_rate": 6.75021127177222e-07, "loss": 0.4327, "step": 23276 }, { "epoch": 0.4046133254532497, "grad_norm": 1.644329094518855, "learning_rate": 6.749947584615573e-07, "loss": 0.4217, "step": 23277 }, { "epoch": 0.4046307079907525, "grad_norm": 2.5422411959191256, "learning_rate": 6.749683891912394e-07, "loss": 0.2203, "step": 23278 }, { "epoch": 0.4046480905282553, "grad_norm": 2.2114727970355434, "learning_rate": 6.749420193663523e-07, "loss": 0.3689, "step": 23279 }, { "epoch": 0.4046654730657581, "grad_norm": 3.9629484704795304, "learning_rate": 6.749156489869792e-07, "loss": 0.2242, "step": 23280 }, { "epoch": 0.40468285560326095, "grad_norm": 1.4065058122136262, "learning_rate": 6.74889278053204e-07, "loss": 0.2723, "step": 23281 }, { "epoch": 0.4047002381407638, "grad_norm": 1.2448698211750637, "learning_rate": 6.7486290656511e-07, "loss": 0.2243, "step": 23282 }, { "epoch": 0.4047176206782666, "grad_norm": 1.3545672452129347, "learning_rate": 6.748365345227808e-07, "loss": 0.2361, "step": 23283 }, { "epoch": 0.40473500321576944, "grad_norm": 1.516731587721716, "learning_rate": 6.748101619263003e-07, "loss": 0.2295, "step": 23284 }, { "epoch": 0.40475238575327227, "grad_norm": 1.4171859696019091, "learning_rate": 6.747837887757518e-07, "loss": 0.3499, "step": 23285 }, { "epoch": 0.4047697682907751, "grad_norm": 2.208326019459312, "learning_rate": 6.74757415071219e-07, "loss": 0.236, "step": 23286 }, { "epoch": 0.40478715082827793, "grad_norm": 1.2683648124258686, "learning_rate": 6.747310408127855e-07, "loss": 0.2183, "step": 23287 }, { "epoch": 0.40480453336578076, "grad_norm": 1.6400141077928074, "learning_rate": 6.747046660005349e-07, "loss": 0.271, "step": 23288 }, { "epoch": 0.40482191590328354, "grad_norm": 1.148409203862909, "learning_rate": 6.746782906345506e-07, "loss": 0.4017, "step": 23289 }, { "epoch": 0.40483929844078637, "grad_norm": 2.3722239339016706, "learning_rate": 6.746519147149166e-07, "loss": 0.309, "step": 23290 }, { "epoch": 0.4048566809782892, "grad_norm": 1.380532669291218, "learning_rate": 6.746255382417161e-07, "loss": 0.3162, "step": 23291 }, { "epoch": 0.404874063515792, "grad_norm": 1.353968534201491, "learning_rate": 6.745991612150328e-07, "loss": 0.2239, "step": 23292 }, { "epoch": 0.40489144605329486, "grad_norm": 3.073173681009045, "learning_rate": 6.745727836349504e-07, "loss": 0.6044, "step": 23293 }, { "epoch": 0.4049088285907977, "grad_norm": 2.153177732580026, "learning_rate": 6.745464055015525e-07, "loss": 0.3504, "step": 23294 }, { "epoch": 0.4049262111283005, "grad_norm": 2.116801032258964, "learning_rate": 6.745200268149227e-07, "loss": 0.2237, "step": 23295 }, { "epoch": 0.40494359366580335, "grad_norm": 1.6933671773502337, "learning_rate": 6.744936475751446e-07, "loss": 0.4648, "step": 23296 }, { "epoch": 0.4049609762033062, "grad_norm": 2.4634499431964385, "learning_rate": 6.744672677823018e-07, "loss": 0.326, "step": 23297 }, { "epoch": 0.404978358740809, "grad_norm": 1.9303393115026575, "learning_rate": 6.74440887436478e-07, "loss": 0.312, "step": 23298 }, { "epoch": 0.4049957412783118, "grad_norm": 1.5402735744159706, "learning_rate": 6.744145065377564e-07, "loss": 0.564, "step": 23299 }, { "epoch": 0.4050131238158146, "grad_norm": 1.3938629260229753, "learning_rate": 6.743881250862211e-07, "loss": 0.5628, "step": 23300 }, { "epoch": 0.40503050635331744, "grad_norm": 2.0853104638186855, "learning_rate": 6.743617430819557e-07, "loss": 0.2307, "step": 23301 }, { "epoch": 0.4050478888908203, "grad_norm": 2.0560203465277844, "learning_rate": 6.743353605250434e-07, "loss": 0.3426, "step": 23302 }, { "epoch": 0.4050652714283231, "grad_norm": 0.9922963209141868, "learning_rate": 6.743089774155682e-07, "loss": 0.231, "step": 23303 }, { "epoch": 0.40508265396582593, "grad_norm": 2.16379670868369, "learning_rate": 6.742825937536138e-07, "loss": 0.6655, "step": 23304 }, { "epoch": 0.40510003650332876, "grad_norm": 2.4170166540537172, "learning_rate": 6.742562095392634e-07, "loss": 0.3713, "step": 23305 }, { "epoch": 0.4051174190408316, "grad_norm": 2.1733030429504785, "learning_rate": 6.742298247726009e-07, "loss": 0.2064, "step": 23306 }, { "epoch": 0.4051348015783344, "grad_norm": 0.9345285773341764, "learning_rate": 6.7420343945371e-07, "loss": 0.2652, "step": 23307 }, { "epoch": 0.40515218411583725, "grad_norm": 2.5869788243571783, "learning_rate": 6.741770535826742e-07, "loss": 0.5862, "step": 23308 }, { "epoch": 0.40516956665334003, "grad_norm": 1.559748938420036, "learning_rate": 6.74150667159577e-07, "loss": 0.3839, "step": 23309 }, { "epoch": 0.40518694919084286, "grad_norm": 2.1014223161730703, "learning_rate": 6.741242801845024e-07, "loss": 0.3692, "step": 23310 }, { "epoch": 0.4052043317283457, "grad_norm": 0.9672912897809232, "learning_rate": 6.740978926575336e-07, "loss": 0.2436, "step": 23311 }, { "epoch": 0.4052217142658485, "grad_norm": 2.01414732300709, "learning_rate": 6.740715045787546e-07, "loss": 0.3164, "step": 23312 }, { "epoch": 0.40523909680335135, "grad_norm": 2.35426882959417, "learning_rate": 6.740451159482487e-07, "loss": 0.2994, "step": 23313 }, { "epoch": 0.4052564793408542, "grad_norm": 1.8016261890161422, "learning_rate": 6.740187267661001e-07, "loss": 0.391, "step": 23314 }, { "epoch": 0.405273861878357, "grad_norm": 1.8003069838933872, "learning_rate": 6.739923370323918e-07, "loss": 0.6497, "step": 23315 }, { "epoch": 0.40529124441585984, "grad_norm": 1.6018652549418229, "learning_rate": 6.739659467472077e-07, "loss": 0.312, "step": 23316 }, { "epoch": 0.40530862695336267, "grad_norm": 1.7422491294906113, "learning_rate": 6.739395559106316e-07, "loss": 0.423, "step": 23317 }, { "epoch": 0.4053260094908655, "grad_norm": 2.263299819832799, "learning_rate": 6.739131645227469e-07, "loss": 0.4227, "step": 23318 }, { "epoch": 0.4053433920283683, "grad_norm": 2.4457120486542845, "learning_rate": 6.738867725836374e-07, "loss": 0.2659, "step": 23319 }, { "epoch": 0.4053607745658711, "grad_norm": 1.5647785217976826, "learning_rate": 6.738603800933867e-07, "loss": 0.1663, "step": 23320 }, { "epoch": 0.40537815710337394, "grad_norm": 1.4336409263751841, "learning_rate": 6.738339870520785e-07, "loss": 0.2912, "step": 23321 }, { "epoch": 0.40539553964087677, "grad_norm": 1.6089759827147208, "learning_rate": 6.738075934597963e-07, "loss": 0.3151, "step": 23322 }, { "epoch": 0.4054129221783796, "grad_norm": 1.7377265624104468, "learning_rate": 6.73781199316624e-07, "loss": 0.2466, "step": 23323 }, { "epoch": 0.40543030471588243, "grad_norm": 1.450256294381521, "learning_rate": 6.737548046226451e-07, "loss": 0.1483, "step": 23324 }, { "epoch": 0.40544768725338526, "grad_norm": 1.4840780632675294, "learning_rate": 6.737284093779433e-07, "loss": 0.1417, "step": 23325 }, { "epoch": 0.4054650697908881, "grad_norm": 1.2877274202337654, "learning_rate": 6.737020135826022e-07, "loss": 0.1629, "step": 23326 }, { "epoch": 0.4054824523283909, "grad_norm": 1.4254729803665585, "learning_rate": 6.736756172367055e-07, "loss": 0.4006, "step": 23327 }, { "epoch": 0.40549983486589375, "grad_norm": 1.7118181871581684, "learning_rate": 6.736492203403369e-07, "loss": 0.2825, "step": 23328 }, { "epoch": 0.4055172174033965, "grad_norm": 1.5408626994675674, "learning_rate": 6.7362282289358e-07, "loss": 0.4684, "step": 23329 }, { "epoch": 0.40553459994089935, "grad_norm": 1.6563465381775038, "learning_rate": 6.735964248965186e-07, "loss": 0.2952, "step": 23330 }, { "epoch": 0.4055519824784022, "grad_norm": 1.5067848809982496, "learning_rate": 6.735700263492362e-07, "loss": 0.3951, "step": 23331 }, { "epoch": 0.405569365015905, "grad_norm": 1.3108237613492872, "learning_rate": 6.735436272518166e-07, "loss": 0.1776, "step": 23332 }, { "epoch": 0.40558674755340784, "grad_norm": 2.5935970513135187, "learning_rate": 6.735172276043434e-07, "loss": 0.3186, "step": 23333 }, { "epoch": 0.4056041300909107, "grad_norm": 1.704702682144677, "learning_rate": 6.734908274069002e-07, "loss": 0.4358, "step": 23334 }, { "epoch": 0.4056215126284135, "grad_norm": 1.2582553148310525, "learning_rate": 6.734644266595708e-07, "loss": 0.3331, "step": 23335 }, { "epoch": 0.40563889516591634, "grad_norm": 2.1468309347122525, "learning_rate": 6.73438025362439e-07, "loss": 0.3485, "step": 23336 }, { "epoch": 0.40565627770341917, "grad_norm": 2.1573211796691414, "learning_rate": 6.734116235155882e-07, "loss": 0.3167, "step": 23337 }, { "epoch": 0.405673660240922, "grad_norm": 2.310380210451911, "learning_rate": 6.733852211191022e-07, "loss": 0.6111, "step": 23338 }, { "epoch": 0.40569104277842477, "grad_norm": 2.3867941126044205, "learning_rate": 6.733588181730649e-07, "loss": 0.2711, "step": 23339 }, { "epoch": 0.4057084253159276, "grad_norm": 1.7826675329485255, "learning_rate": 6.733324146775595e-07, "loss": 0.3006, "step": 23340 }, { "epoch": 0.40572580785343043, "grad_norm": 2.541997212227418, "learning_rate": 6.733060106326702e-07, "loss": 0.2615, "step": 23341 }, { "epoch": 0.40574319039093326, "grad_norm": 1.5518168399270402, "learning_rate": 6.732796060384803e-07, "loss": 0.2826, "step": 23342 }, { "epoch": 0.4057605729284361, "grad_norm": 1.5556522638772055, "learning_rate": 6.732532008950737e-07, "loss": 0.3254, "step": 23343 }, { "epoch": 0.4057779554659389, "grad_norm": 1.3668313132012766, "learning_rate": 6.73226795202534e-07, "loss": 0.3239, "step": 23344 }, { "epoch": 0.40579533800344175, "grad_norm": 1.120647653345887, "learning_rate": 6.73200388960945e-07, "loss": 0.1385, "step": 23345 }, { "epoch": 0.4058127205409446, "grad_norm": 2.5467287244090695, "learning_rate": 6.731739821703903e-07, "loss": 0.5695, "step": 23346 }, { "epoch": 0.4058301030784474, "grad_norm": 1.4690115677424005, "learning_rate": 6.731475748309537e-07, "loss": 0.3448, "step": 23347 }, { "epoch": 0.40584748561595024, "grad_norm": 1.8286746114759558, "learning_rate": 6.731211669427187e-07, "loss": 0.2245, "step": 23348 }, { "epoch": 0.405864868153453, "grad_norm": 1.8767310856291546, "learning_rate": 6.730947585057691e-07, "loss": 0.2838, "step": 23349 }, { "epoch": 0.40588225069095585, "grad_norm": 3.156301659947399, "learning_rate": 6.730683495201888e-07, "loss": 0.3911, "step": 23350 }, { "epoch": 0.4058996332284587, "grad_norm": 1.8945177802409572, "learning_rate": 6.730419399860612e-07, "loss": 0.201, "step": 23351 }, { "epoch": 0.4059170157659615, "grad_norm": 1.6132020146135533, "learning_rate": 6.730155299034701e-07, "loss": 0.3048, "step": 23352 }, { "epoch": 0.40593439830346434, "grad_norm": 4.426096328610078, "learning_rate": 6.729891192724993e-07, "loss": 0.3248, "step": 23353 }, { "epoch": 0.40595178084096717, "grad_norm": 1.334257406298415, "learning_rate": 6.729627080932325e-07, "loss": 0.2351, "step": 23354 }, { "epoch": 0.40596916337847, "grad_norm": 1.1567892270238205, "learning_rate": 6.729362963657534e-07, "loss": 0.1669, "step": 23355 }, { "epoch": 0.40598654591597283, "grad_norm": 1.3054156504061918, "learning_rate": 6.729098840901456e-07, "loss": 0.201, "step": 23356 }, { "epoch": 0.40600392845347566, "grad_norm": 1.6058112437859298, "learning_rate": 6.728834712664928e-07, "loss": 0.2298, "step": 23357 }, { "epoch": 0.4060213109909785, "grad_norm": 2.0986616355686123, "learning_rate": 6.728570578948791e-07, "loss": 0.3582, "step": 23358 }, { "epoch": 0.40603869352848126, "grad_norm": 1.459640073765343, "learning_rate": 6.728306439753877e-07, "loss": 0.1774, "step": 23359 }, { "epoch": 0.4060560760659841, "grad_norm": 1.7833968249534895, "learning_rate": 6.728042295081027e-07, "loss": 0.483, "step": 23360 }, { "epoch": 0.4060734586034869, "grad_norm": 2.4207199395259753, "learning_rate": 6.727778144931076e-07, "loss": 0.289, "step": 23361 }, { "epoch": 0.40609084114098976, "grad_norm": 1.4837931563535576, "learning_rate": 6.727513989304862e-07, "loss": 0.3548, "step": 23362 }, { "epoch": 0.4061082236784926, "grad_norm": 1.2097167234322084, "learning_rate": 6.727249828203221e-07, "loss": 0.1836, "step": 23363 }, { "epoch": 0.4061256062159954, "grad_norm": 1.6552148272802747, "learning_rate": 6.726985661626994e-07, "loss": 0.239, "step": 23364 }, { "epoch": 0.40614298875349825, "grad_norm": 1.9253477555743652, "learning_rate": 6.726721489577013e-07, "loss": 0.3469, "step": 23365 }, { "epoch": 0.4061603712910011, "grad_norm": 1.774455155559557, "learning_rate": 6.72645731205412e-07, "loss": 0.2202, "step": 23366 }, { "epoch": 0.4061777538285039, "grad_norm": 1.4542796149286648, "learning_rate": 6.72619312905915e-07, "loss": 0.374, "step": 23367 }, { "epoch": 0.40619513636600674, "grad_norm": 2.5016537957314227, "learning_rate": 6.72592894059294e-07, "loss": 0.2968, "step": 23368 }, { "epoch": 0.4062125189035095, "grad_norm": 1.873154503117392, "learning_rate": 6.725664746656328e-07, "loss": 0.3559, "step": 23369 }, { "epoch": 0.40622990144101234, "grad_norm": 2.2184649320996304, "learning_rate": 6.725400547250151e-07, "loss": 0.3742, "step": 23370 }, { "epoch": 0.4062472839785152, "grad_norm": 1.5018773091286746, "learning_rate": 6.725136342375249e-07, "loss": 0.4837, "step": 23371 }, { "epoch": 0.406264666516018, "grad_norm": 2.511752172032468, "learning_rate": 6.724872132032457e-07, "loss": 0.3684, "step": 23372 }, { "epoch": 0.40628204905352083, "grad_norm": 1.6054765275408327, "learning_rate": 6.724607916222611e-07, "loss": 0.3661, "step": 23373 }, { "epoch": 0.40629943159102366, "grad_norm": 2.830760715335336, "learning_rate": 6.72434369494655e-07, "loss": 0.5622, "step": 23374 }, { "epoch": 0.4063168141285265, "grad_norm": 0.9826301658100371, "learning_rate": 6.724079468205114e-07, "loss": 0.1213, "step": 23375 }, { "epoch": 0.4063341966660293, "grad_norm": 2.5026892554762354, "learning_rate": 6.723815235999136e-07, "loss": 0.4102, "step": 23376 }, { "epoch": 0.40635157920353215, "grad_norm": 5.54777925468887, "learning_rate": 6.723550998329457e-07, "loss": 0.3147, "step": 23377 }, { "epoch": 0.40636896174103493, "grad_norm": 1.4167947737365953, "learning_rate": 6.723286755196911e-07, "loss": 0.2772, "step": 23378 }, { "epoch": 0.40638634427853776, "grad_norm": 1.2958848255400517, "learning_rate": 6.723022506602339e-07, "loss": 0.2822, "step": 23379 }, { "epoch": 0.4064037268160406, "grad_norm": 4.053126015310323, "learning_rate": 6.722758252546578e-07, "loss": 0.3933, "step": 23380 }, { "epoch": 0.4064211093535434, "grad_norm": 1.4197348165592456, "learning_rate": 6.722493993030464e-07, "loss": 0.4137, "step": 23381 }, { "epoch": 0.40643849189104625, "grad_norm": 1.5213049754533772, "learning_rate": 6.722229728054833e-07, "loss": 0.2418, "step": 23382 }, { "epoch": 0.4064558744285491, "grad_norm": 1.809877063271499, "learning_rate": 6.721965457620528e-07, "loss": 0.2671, "step": 23383 }, { "epoch": 0.4064732569660519, "grad_norm": 2.609633890145011, "learning_rate": 6.721701181728382e-07, "loss": 0.2501, "step": 23384 }, { "epoch": 0.40649063950355474, "grad_norm": 1.287651053491306, "learning_rate": 6.721436900379234e-07, "loss": 0.3844, "step": 23385 }, { "epoch": 0.40650802204105757, "grad_norm": 1.943226511500285, "learning_rate": 6.721172613573923e-07, "loss": 0.3302, "step": 23386 }, { "epoch": 0.4065254045785604, "grad_norm": 1.4719786574234728, "learning_rate": 6.720908321313285e-07, "loss": 0.2398, "step": 23387 }, { "epoch": 0.4065427871160632, "grad_norm": 3.88812408199199, "learning_rate": 6.720644023598158e-07, "loss": 0.4646, "step": 23388 }, { "epoch": 0.406560169653566, "grad_norm": 4.994059946097469, "learning_rate": 6.72037972042938e-07, "loss": 0.2294, "step": 23389 }, { "epoch": 0.40657755219106884, "grad_norm": 5.457169672902555, "learning_rate": 6.720115411807788e-07, "loss": 0.3317, "step": 23390 }, { "epoch": 0.40659493472857167, "grad_norm": 1.5073556414457108, "learning_rate": 6.71985109773422e-07, "loss": 0.2969, "step": 23391 }, { "epoch": 0.4066123172660745, "grad_norm": 1.2874603034914949, "learning_rate": 6.719586778209515e-07, "loss": 0.2602, "step": 23392 }, { "epoch": 0.4066296998035773, "grad_norm": 1.3739641056149179, "learning_rate": 6.71932245323451e-07, "loss": 0.3547, "step": 23393 }, { "epoch": 0.40664708234108016, "grad_norm": 2.3208014431838326, "learning_rate": 6.719058122810042e-07, "loss": 0.4408, "step": 23394 }, { "epoch": 0.406664464878583, "grad_norm": 1.178743664150764, "learning_rate": 6.718793786936949e-07, "loss": 0.2325, "step": 23395 }, { "epoch": 0.4066818474160858, "grad_norm": 1.6905533394247667, "learning_rate": 6.71852944561607e-07, "loss": 0.2077, "step": 23396 }, { "epoch": 0.40669922995358865, "grad_norm": 1.8247878965701376, "learning_rate": 6.718265098848242e-07, "loss": 0.3693, "step": 23397 }, { "epoch": 0.4067166124910914, "grad_norm": 1.3553900649009327, "learning_rate": 6.718000746634302e-07, "loss": 0.2402, "step": 23398 }, { "epoch": 0.40673399502859425, "grad_norm": 1.996152329143813, "learning_rate": 6.717736388975091e-07, "loss": 0.3423, "step": 23399 }, { "epoch": 0.4067513775660971, "grad_norm": 1.5507072846037544, "learning_rate": 6.717472025871443e-07, "loss": 0.1862, "step": 23400 }, { "epoch": 0.4067687601035999, "grad_norm": 2.9012593744285082, "learning_rate": 6.717207657324198e-07, "loss": 0.5559, "step": 23401 }, { "epoch": 0.40678614264110274, "grad_norm": 1.2632072697978782, "learning_rate": 6.716943283334193e-07, "loss": 0.3793, "step": 23402 }, { "epoch": 0.4068035251786056, "grad_norm": 2.6439845735458802, "learning_rate": 6.716678903902268e-07, "loss": 0.3056, "step": 23403 }, { "epoch": 0.4068209077161084, "grad_norm": 1.7116016373720448, "learning_rate": 6.716414519029257e-07, "loss": 0.6202, "step": 23404 }, { "epoch": 0.40683829025361123, "grad_norm": 1.8772042736145924, "learning_rate": 6.716150128716003e-07, "loss": 0.2399, "step": 23405 }, { "epoch": 0.40685567279111406, "grad_norm": 1.2133843897077754, "learning_rate": 6.71588573296334e-07, "loss": 0.24, "step": 23406 }, { "epoch": 0.4068730553286169, "grad_norm": 1.5051367518944758, "learning_rate": 6.715621331772107e-07, "loss": 0.3731, "step": 23407 }, { "epoch": 0.40689043786611967, "grad_norm": 2.2392000641541427, "learning_rate": 6.715356925143144e-07, "loss": 0.282, "step": 23408 }, { "epoch": 0.4069078204036225, "grad_norm": 1.6370922236626018, "learning_rate": 6.715092513077286e-07, "loss": 0.2411, "step": 23409 }, { "epoch": 0.40692520294112533, "grad_norm": 1.7445021064651314, "learning_rate": 6.714828095575374e-07, "loss": 0.2721, "step": 23410 }, { "epoch": 0.40694258547862816, "grad_norm": 1.486717977666096, "learning_rate": 6.714563672638244e-07, "loss": 0.2449, "step": 23411 }, { "epoch": 0.406959968016131, "grad_norm": 1.15025342503296, "learning_rate": 6.714299244266734e-07, "loss": 0.323, "step": 23412 }, { "epoch": 0.4069773505536338, "grad_norm": 2.724108464486301, "learning_rate": 6.714034810461682e-07, "loss": 0.3464, "step": 23413 }, { "epoch": 0.40699473309113665, "grad_norm": 1.2820308574044577, "learning_rate": 6.71377037122393e-07, "loss": 0.1897, "step": 23414 }, { "epoch": 0.4070121156286395, "grad_norm": 1.2081836325295805, "learning_rate": 6.713505926554313e-07, "loss": 0.1961, "step": 23415 }, { "epoch": 0.4070294981661423, "grad_norm": 4.329220816261513, "learning_rate": 6.713241476453668e-07, "loss": 0.2859, "step": 23416 }, { "epoch": 0.40704688070364514, "grad_norm": 1.6011485752007304, "learning_rate": 6.712977020922834e-07, "loss": 0.3275, "step": 23417 }, { "epoch": 0.4070642632411479, "grad_norm": 1.7673261155576487, "learning_rate": 6.712712559962651e-07, "loss": 0.2564, "step": 23418 }, { "epoch": 0.40708164577865075, "grad_norm": 1.5434070717701123, "learning_rate": 6.712448093573954e-07, "loss": 0.2687, "step": 23419 }, { "epoch": 0.4070990283161536, "grad_norm": 1.0161234703937199, "learning_rate": 6.712183621757585e-07, "loss": 0.2262, "step": 23420 }, { "epoch": 0.4071164108536564, "grad_norm": 2.1517272945687225, "learning_rate": 6.71191914451438e-07, "loss": 0.2999, "step": 23421 }, { "epoch": 0.40713379339115924, "grad_norm": 2.0368753405152074, "learning_rate": 6.711654661845178e-07, "loss": 0.3256, "step": 23422 }, { "epoch": 0.40715117592866207, "grad_norm": 2.472452860966976, "learning_rate": 6.711390173750816e-07, "loss": 0.2551, "step": 23423 }, { "epoch": 0.4071685584661649, "grad_norm": 2.1468028528101364, "learning_rate": 6.711125680232134e-07, "loss": 0.1949, "step": 23424 }, { "epoch": 0.40718594100366773, "grad_norm": 1.7434851381283538, "learning_rate": 6.710861181289969e-07, "loss": 0.3795, "step": 23425 }, { "epoch": 0.40720332354117056, "grad_norm": 1.9989632062535219, "learning_rate": 6.710596676925161e-07, "loss": 0.2999, "step": 23426 }, { "epoch": 0.4072207060786734, "grad_norm": 1.3654604341810659, "learning_rate": 6.710332167138547e-07, "loss": 0.1495, "step": 23427 }, { "epoch": 0.40723808861617616, "grad_norm": 2.4339613595145324, "learning_rate": 6.710067651930966e-07, "loss": 0.3432, "step": 23428 }, { "epoch": 0.407255471153679, "grad_norm": 2.121481721433898, "learning_rate": 6.709803131303254e-07, "loss": 0.3557, "step": 23429 }, { "epoch": 0.4072728536911818, "grad_norm": 1.7059022850475276, "learning_rate": 6.709538605256254e-07, "loss": 0.2566, "step": 23430 }, { "epoch": 0.40729023622868465, "grad_norm": 2.961476948073681, "learning_rate": 6.709274073790801e-07, "loss": 0.3668, "step": 23431 }, { "epoch": 0.4073076187661875, "grad_norm": 1.0187302980606554, "learning_rate": 6.709009536907734e-07, "loss": 0.3013, "step": 23432 }, { "epoch": 0.4073250013036903, "grad_norm": 1.4612435918619884, "learning_rate": 6.708744994607892e-07, "loss": 0.3414, "step": 23433 }, { "epoch": 0.40734238384119315, "grad_norm": 1.3290273378970705, "learning_rate": 6.708480446892114e-07, "loss": 0.2519, "step": 23434 }, { "epoch": 0.407359766378696, "grad_norm": 2.109867031797, "learning_rate": 6.708215893761238e-07, "loss": 0.2905, "step": 23435 }, { "epoch": 0.4073771489161988, "grad_norm": 1.8079391124041957, "learning_rate": 6.707951335216101e-07, "loss": 0.2245, "step": 23436 }, { "epoch": 0.40739453145370164, "grad_norm": 2.048104077144987, "learning_rate": 6.707686771257543e-07, "loss": 0.3756, "step": 23437 }, { "epoch": 0.4074119139912044, "grad_norm": 1.3336957461915377, "learning_rate": 6.707422201886402e-07, "loss": 0.3428, "step": 23438 }, { "epoch": 0.40742929652870724, "grad_norm": 1.9222322507961613, "learning_rate": 6.707157627103519e-07, "loss": 0.2028, "step": 23439 }, { "epoch": 0.40744667906621007, "grad_norm": 2.4539106598312976, "learning_rate": 6.70689304690973e-07, "loss": 0.3843, "step": 23440 }, { "epoch": 0.4074640616037129, "grad_norm": 1.6776580777161934, "learning_rate": 6.706628461305872e-07, "loss": 0.3964, "step": 23441 }, { "epoch": 0.40748144414121573, "grad_norm": 1.611541188726594, "learning_rate": 6.706363870292787e-07, "loss": 0.3662, "step": 23442 }, { "epoch": 0.40749882667871856, "grad_norm": 1.0953415590493436, "learning_rate": 6.706099273871314e-07, "loss": 0.1497, "step": 23443 }, { "epoch": 0.4075162092162214, "grad_norm": 1.499636281769491, "learning_rate": 6.705834672042288e-07, "loss": 0.2521, "step": 23444 }, { "epoch": 0.4075335917537242, "grad_norm": 1.6391354511845142, "learning_rate": 6.70557006480655e-07, "loss": 0.2842, "step": 23445 }, { "epoch": 0.40755097429122705, "grad_norm": 1.3614476642836435, "learning_rate": 6.705305452164939e-07, "loss": 0.5359, "step": 23446 }, { "epoch": 0.4075683568287299, "grad_norm": 2.59895179447522, "learning_rate": 6.705040834118292e-07, "loss": 0.4055, "step": 23447 }, { "epoch": 0.40758573936623266, "grad_norm": 1.149784936282214, "learning_rate": 6.704776210667449e-07, "loss": 0.3359, "step": 23448 }, { "epoch": 0.4076031219037355, "grad_norm": 1.1900434679335994, "learning_rate": 6.70451158181325e-07, "loss": 0.2709, "step": 23449 }, { "epoch": 0.4076205044412383, "grad_norm": 3.8998621171980123, "learning_rate": 6.704246947556531e-07, "loss": 0.3208, "step": 23450 }, { "epoch": 0.40763788697874115, "grad_norm": 2.6729303068773906, "learning_rate": 6.703982307898132e-07, "loss": 0.3706, "step": 23451 }, { "epoch": 0.407655269516244, "grad_norm": 1.6032898376028513, "learning_rate": 6.703717662838893e-07, "loss": 0.2951, "step": 23452 }, { "epoch": 0.4076726520537468, "grad_norm": 2.3733691695453984, "learning_rate": 6.703453012379651e-07, "loss": 0.254, "step": 23453 }, { "epoch": 0.40769003459124964, "grad_norm": 1.5986209241347977, "learning_rate": 6.703188356521243e-07, "loss": 0.2125, "step": 23454 }, { "epoch": 0.40770741712875247, "grad_norm": 5.235492269307744, "learning_rate": 6.702923695264513e-07, "loss": 0.2225, "step": 23455 }, { "epoch": 0.4077247996662553, "grad_norm": 2.4562711658137455, "learning_rate": 6.702659028610297e-07, "loss": 0.3046, "step": 23456 }, { "epoch": 0.40774218220375813, "grad_norm": 2.2756195330296993, "learning_rate": 6.702394356559434e-07, "loss": 0.3226, "step": 23457 }, { "epoch": 0.4077595647412609, "grad_norm": 1.7682529234249844, "learning_rate": 6.702129679112762e-07, "loss": 0.278, "step": 23458 }, { "epoch": 0.40777694727876374, "grad_norm": 2.120301761737604, "learning_rate": 6.701864996271123e-07, "loss": 0.2995, "step": 23459 }, { "epoch": 0.40779432981626657, "grad_norm": 1.9038869715307807, "learning_rate": 6.701600308035351e-07, "loss": 0.6155, "step": 23460 }, { "epoch": 0.4078117123537694, "grad_norm": 1.8214251314498549, "learning_rate": 6.701335614406289e-07, "loss": 0.5559, "step": 23461 }, { "epoch": 0.4078290948912722, "grad_norm": 1.7408575181826818, "learning_rate": 6.701070915384774e-07, "loss": 0.2696, "step": 23462 }, { "epoch": 0.40784647742877506, "grad_norm": 1.0681210577329054, "learning_rate": 6.700806210971646e-07, "loss": 0.2412, "step": 23463 }, { "epoch": 0.4078638599662779, "grad_norm": 1.2070278094571885, "learning_rate": 6.700541501167744e-07, "loss": 0.2944, "step": 23464 }, { "epoch": 0.4078812425037807, "grad_norm": 1.6054890260237695, "learning_rate": 6.700276785973906e-07, "loss": 0.2456, "step": 23465 }, { "epoch": 0.40789862504128355, "grad_norm": 1.4510257105916442, "learning_rate": 6.700012065390972e-07, "loss": 0.4756, "step": 23466 }, { "epoch": 0.4079160075787864, "grad_norm": 1.6007299790340794, "learning_rate": 6.699747339419779e-07, "loss": 0.553, "step": 23467 }, { "epoch": 0.40793339011628915, "grad_norm": 2.3797611063967095, "learning_rate": 6.699482608061171e-07, "loss": 0.2763, "step": 23468 }, { "epoch": 0.407950772653792, "grad_norm": 1.2132676101445017, "learning_rate": 6.699217871315981e-07, "loss": 0.216, "step": 23469 }, { "epoch": 0.4079681551912948, "grad_norm": 1.9704167744093286, "learning_rate": 6.698953129185052e-07, "loss": 0.2425, "step": 23470 }, { "epoch": 0.40798553772879764, "grad_norm": 1.3127977104676547, "learning_rate": 6.698688381669222e-07, "loss": 0.2606, "step": 23471 }, { "epoch": 0.4080029202663005, "grad_norm": 2.659002644094079, "learning_rate": 6.698423628769331e-07, "loss": 0.256, "step": 23472 }, { "epoch": 0.4080203028038033, "grad_norm": 1.6184095743239562, "learning_rate": 6.698158870486216e-07, "loss": 0.2298, "step": 23473 }, { "epoch": 0.40803768534130613, "grad_norm": 1.5872807680900323, "learning_rate": 6.697894106820717e-07, "loss": 0.2794, "step": 23474 }, { "epoch": 0.40805506787880896, "grad_norm": 0.8680460792534562, "learning_rate": 6.697629337773676e-07, "loss": 0.1268, "step": 23475 }, { "epoch": 0.4080724504163118, "grad_norm": 2.000994339751868, "learning_rate": 6.697364563345928e-07, "loss": 0.2737, "step": 23476 }, { "epoch": 0.4080898329538146, "grad_norm": 1.7249100358637925, "learning_rate": 6.697099783538314e-07, "loss": 0.1851, "step": 23477 }, { "epoch": 0.4081072154913174, "grad_norm": 2.7848993539738474, "learning_rate": 6.696834998351675e-07, "loss": 0.34, "step": 23478 }, { "epoch": 0.40812459802882023, "grad_norm": 2.8755110864008535, "learning_rate": 6.696570207786846e-07, "loss": 0.2059, "step": 23479 }, { "epoch": 0.40814198056632306, "grad_norm": 1.5620350241953722, "learning_rate": 6.696305411844669e-07, "loss": 0.2593, "step": 23480 }, { "epoch": 0.4081593631038259, "grad_norm": 2.233621125760487, "learning_rate": 6.696040610525984e-07, "loss": 0.6772, "step": 23481 }, { "epoch": 0.4081767456413287, "grad_norm": 1.444662205849405, "learning_rate": 6.695775803831631e-07, "loss": 0.2151, "step": 23482 }, { "epoch": 0.40819412817883155, "grad_norm": 1.825780922295306, "learning_rate": 6.695510991762446e-07, "loss": 0.2497, "step": 23483 }, { "epoch": 0.4082115107163344, "grad_norm": 1.3071864705080398, "learning_rate": 6.69524617431927e-07, "loss": 0.2498, "step": 23484 }, { "epoch": 0.4082288932538372, "grad_norm": 1.0907567021135967, "learning_rate": 6.694981351502942e-07, "loss": 0.1393, "step": 23485 }, { "epoch": 0.40824627579134004, "grad_norm": 1.035074865177617, "learning_rate": 6.694716523314302e-07, "loss": 0.1459, "step": 23486 }, { "epoch": 0.40826365832884287, "grad_norm": 1.102912842597895, "learning_rate": 6.694451689754189e-07, "loss": 0.1884, "step": 23487 }, { "epoch": 0.40828104086634565, "grad_norm": 1.4710174921455885, "learning_rate": 6.694186850823443e-07, "loss": 0.2563, "step": 23488 }, { "epoch": 0.4082984234038485, "grad_norm": 1.4538815016090412, "learning_rate": 6.693922006522902e-07, "loss": 0.2897, "step": 23489 }, { "epoch": 0.4083158059413513, "grad_norm": 1.2879726848559045, "learning_rate": 6.693657156853407e-07, "loss": 0.2385, "step": 23490 }, { "epoch": 0.40833318847885414, "grad_norm": 2.6421959673584965, "learning_rate": 6.693392301815797e-07, "loss": 0.3369, "step": 23491 }, { "epoch": 0.40835057101635697, "grad_norm": 2.526674570880716, "learning_rate": 6.693127441410912e-07, "loss": 0.3315, "step": 23492 }, { "epoch": 0.4083679535538598, "grad_norm": 2.221995996837103, "learning_rate": 6.692862575639589e-07, "loss": 0.326, "step": 23493 }, { "epoch": 0.4083853360913626, "grad_norm": 1.336091208866188, "learning_rate": 6.69259770450267e-07, "loss": 0.2253, "step": 23494 }, { "epoch": 0.40840271862886546, "grad_norm": 2.1953131591921315, "learning_rate": 6.692332828000995e-07, "loss": 0.3647, "step": 23495 }, { "epoch": 0.4084201011663683, "grad_norm": 0.9749332770840613, "learning_rate": 6.692067946135401e-07, "loss": 0.3738, "step": 23496 }, { "epoch": 0.4084374837038711, "grad_norm": 1.1880541761139731, "learning_rate": 6.691803058906728e-07, "loss": 0.201, "step": 23497 }, { "epoch": 0.4084548662413739, "grad_norm": 1.2072253083519353, "learning_rate": 6.691538166315818e-07, "loss": 0.2919, "step": 23498 }, { "epoch": 0.4084722487788767, "grad_norm": 1.7596492321680055, "learning_rate": 6.691273268363509e-07, "loss": 0.3103, "step": 23499 }, { "epoch": 0.40848963131637955, "grad_norm": 1.3449654833129014, "learning_rate": 6.691008365050641e-07, "loss": 0.3008, "step": 23500 }, { "epoch": 0.4085070138538824, "grad_norm": 6.790980772549619, "learning_rate": 6.690743456378052e-07, "loss": 0.2771, "step": 23501 }, { "epoch": 0.4085243963913852, "grad_norm": 1.1482728226158003, "learning_rate": 6.690478542346582e-07, "loss": 0.2329, "step": 23502 }, { "epoch": 0.40854177892888804, "grad_norm": 1.535141098335668, "learning_rate": 6.690213622957075e-07, "loss": 0.1605, "step": 23503 }, { "epoch": 0.4085591614663909, "grad_norm": 1.5331382794255317, "learning_rate": 6.689948698210364e-07, "loss": 0.2066, "step": 23504 }, { "epoch": 0.4085765440038937, "grad_norm": 1.9752929916977342, "learning_rate": 6.689683768107295e-07, "loss": 0.3267, "step": 23505 }, { "epoch": 0.40859392654139653, "grad_norm": 2.393619450651532, "learning_rate": 6.689418832648702e-07, "loss": 0.2856, "step": 23506 }, { "epoch": 0.40861130907889937, "grad_norm": 1.3088108545894421, "learning_rate": 6.689153891835428e-07, "loss": 0.243, "step": 23507 }, { "epoch": 0.40862869161640214, "grad_norm": 1.205728610158164, "learning_rate": 6.688888945668312e-07, "loss": 0.2981, "step": 23508 }, { "epoch": 0.40864607415390497, "grad_norm": 1.6901912006804822, "learning_rate": 6.688623994148196e-07, "loss": 0.262, "step": 23509 }, { "epoch": 0.4086634566914078, "grad_norm": 4.356394996356647, "learning_rate": 6.688359037275914e-07, "loss": 0.366, "step": 23510 }, { "epoch": 0.40868083922891063, "grad_norm": 1.2975957907646103, "learning_rate": 6.688094075052312e-07, "loss": 0.1919, "step": 23511 }, { "epoch": 0.40869822176641346, "grad_norm": 2.5888721703171376, "learning_rate": 6.687829107478226e-07, "loss": 0.3073, "step": 23512 }, { "epoch": 0.4087156043039163, "grad_norm": 2.58890870662003, "learning_rate": 6.687564134554499e-07, "loss": 0.2359, "step": 23513 }, { "epoch": 0.4087329868414191, "grad_norm": 1.2379529307546846, "learning_rate": 6.687299156281966e-07, "loss": 0.2711, "step": 23514 }, { "epoch": 0.40875036937892195, "grad_norm": 1.1673026798106438, "learning_rate": 6.687034172661472e-07, "loss": 0.2647, "step": 23515 }, { "epoch": 0.4087677519164248, "grad_norm": 1.2083484452485311, "learning_rate": 6.686769183693853e-07, "loss": 0.2634, "step": 23516 }, { "epoch": 0.40878513445392756, "grad_norm": 2.133478402542685, "learning_rate": 6.686504189379953e-07, "loss": 0.2245, "step": 23517 }, { "epoch": 0.4088025169914304, "grad_norm": 1.3799945557559925, "learning_rate": 6.686239189720606e-07, "loss": 0.208, "step": 23518 }, { "epoch": 0.4088198995289332, "grad_norm": 1.248131570823602, "learning_rate": 6.685974184716657e-07, "loss": 0.2476, "step": 23519 }, { "epoch": 0.40883728206643605, "grad_norm": 2.173569326308693, "learning_rate": 6.685709174368945e-07, "loss": 0.3078, "step": 23520 }, { "epoch": 0.4088546646039389, "grad_norm": 1.4152431157812162, "learning_rate": 6.685444158678308e-07, "loss": 0.2794, "step": 23521 }, { "epoch": 0.4088720471414417, "grad_norm": 2.0594787754458523, "learning_rate": 6.685179137645589e-07, "loss": 0.3568, "step": 23522 }, { "epoch": 0.40888942967894454, "grad_norm": 1.3743725118371428, "learning_rate": 6.684914111271624e-07, "loss": 0.1653, "step": 23523 }, { "epoch": 0.40890681221644737, "grad_norm": 1.304291195184631, "learning_rate": 6.684649079557256e-07, "loss": 0.1778, "step": 23524 }, { "epoch": 0.4089241947539502, "grad_norm": 1.9885855748297254, "learning_rate": 6.684384042503326e-07, "loss": 0.2751, "step": 23525 }, { "epoch": 0.40894157729145303, "grad_norm": 1.2857969271818859, "learning_rate": 6.68411900011067e-07, "loss": 0.1941, "step": 23526 }, { "epoch": 0.4089589598289558, "grad_norm": 1.6967173559613822, "learning_rate": 6.683853952380132e-07, "loss": 0.2227, "step": 23527 }, { "epoch": 0.40897634236645863, "grad_norm": 1.3979165933415896, "learning_rate": 6.683588899312551e-07, "loss": 0.2377, "step": 23528 }, { "epoch": 0.40899372490396146, "grad_norm": 1.211505044482453, "learning_rate": 6.683323840908764e-07, "loss": 0.277, "step": 23529 }, { "epoch": 0.4090111074414643, "grad_norm": 1.150923735526181, "learning_rate": 6.683058777169616e-07, "loss": 0.1471, "step": 23530 }, { "epoch": 0.4090284899789671, "grad_norm": 2.817135604928338, "learning_rate": 6.682793708095945e-07, "loss": 0.2522, "step": 23531 }, { "epoch": 0.40904587251646995, "grad_norm": 1.5226321707003194, "learning_rate": 6.682528633688588e-07, "loss": 0.2395, "step": 23532 }, { "epoch": 0.4090632550539728, "grad_norm": 1.6825491480667774, "learning_rate": 6.682263553948391e-07, "loss": 0.1504, "step": 23533 }, { "epoch": 0.4090806375914756, "grad_norm": 2.903837079372242, "learning_rate": 6.681998468876189e-07, "loss": 0.2126, "step": 23534 }, { "epoch": 0.40909802012897845, "grad_norm": 2.3725110372492355, "learning_rate": 6.681733378472827e-07, "loss": 0.2417, "step": 23535 }, { "epoch": 0.4091154026664813, "grad_norm": 1.614080015805308, "learning_rate": 6.681468282739142e-07, "loss": 0.3103, "step": 23536 }, { "epoch": 0.40913278520398405, "grad_norm": 1.7983924033294052, "learning_rate": 6.681203181675973e-07, "loss": 0.2092, "step": 23537 }, { "epoch": 0.4091501677414869, "grad_norm": 1.7282896130624645, "learning_rate": 6.680938075284166e-07, "loss": 0.2374, "step": 23538 }, { "epoch": 0.4091675502789897, "grad_norm": 1.4640619663978853, "learning_rate": 6.680672963564554e-07, "loss": 0.1803, "step": 23539 }, { "epoch": 0.40918493281649254, "grad_norm": 2.5648623866037306, "learning_rate": 6.680407846517981e-07, "loss": 0.4227, "step": 23540 }, { "epoch": 0.40920231535399537, "grad_norm": 1.6326493340203638, "learning_rate": 6.680142724145289e-07, "loss": 0.4193, "step": 23541 }, { "epoch": 0.4092196978914982, "grad_norm": 1.715584333853731, "learning_rate": 6.679877596447316e-07, "loss": 0.2794, "step": 23542 }, { "epoch": 0.40923708042900103, "grad_norm": 1.3494091829383723, "learning_rate": 6.679612463424901e-07, "loss": 0.3345, "step": 23543 }, { "epoch": 0.40925446296650386, "grad_norm": 1.8844055316072374, "learning_rate": 6.679347325078888e-07, "loss": 0.1965, "step": 23544 }, { "epoch": 0.4092718455040067, "grad_norm": 13.851517353712127, "learning_rate": 6.679082181410113e-07, "loss": 0.2868, "step": 23545 }, { "epoch": 0.4092892280415095, "grad_norm": 1.9764537264527566, "learning_rate": 6.678817032419422e-07, "loss": 0.4587, "step": 23546 }, { "epoch": 0.4093066105790123, "grad_norm": 1.139306578529614, "learning_rate": 6.67855187810765e-07, "loss": 0.3278, "step": 23547 }, { "epoch": 0.40932399311651513, "grad_norm": 1.8084225567474566, "learning_rate": 6.678286718475639e-07, "loss": 0.3296, "step": 23548 }, { "epoch": 0.40934137565401796, "grad_norm": 1.6316849105293811, "learning_rate": 6.678021553524232e-07, "loss": 0.1991, "step": 23549 }, { "epoch": 0.4093587581915208, "grad_norm": 1.4493782173649212, "learning_rate": 6.677756383254266e-07, "loss": 0.1746, "step": 23550 }, { "epoch": 0.4093761407290236, "grad_norm": 1.9109655930556786, "learning_rate": 6.677491207666584e-07, "loss": 0.3968, "step": 23551 }, { "epoch": 0.40939352326652645, "grad_norm": 1.9298019580163568, "learning_rate": 6.677226026762025e-07, "loss": 0.2541, "step": 23552 }, { "epoch": 0.4094109058040293, "grad_norm": 1.6584940243364603, "learning_rate": 6.676960840541429e-07, "loss": 0.2443, "step": 23553 }, { "epoch": 0.4094282883415321, "grad_norm": 2.857641566052578, "learning_rate": 6.67669564900564e-07, "loss": 0.4025, "step": 23554 }, { "epoch": 0.40944567087903494, "grad_norm": 2.184096424890012, "learning_rate": 6.676430452155494e-07, "loss": 0.3265, "step": 23555 }, { "epoch": 0.40946305341653777, "grad_norm": 3.2691516172677493, "learning_rate": 6.676165249991832e-07, "loss": 0.273, "step": 23556 }, { "epoch": 0.40948043595404054, "grad_norm": 1.454402559079621, "learning_rate": 6.6759000425155e-07, "loss": 0.2399, "step": 23557 }, { "epoch": 0.4094978184915434, "grad_norm": 3.98018452277394, "learning_rate": 6.675634829727332e-07, "loss": 0.3773, "step": 23558 }, { "epoch": 0.4095152010290462, "grad_norm": 3.5184035199863524, "learning_rate": 6.675369611628172e-07, "loss": 0.2748, "step": 23559 }, { "epoch": 0.40953258356654904, "grad_norm": 2.4318587302248686, "learning_rate": 6.67510438821886e-07, "loss": 0.1978, "step": 23560 }, { "epoch": 0.40954996610405187, "grad_norm": 1.4139873858573364, "learning_rate": 6.674839159500237e-07, "loss": 0.2475, "step": 23561 }, { "epoch": 0.4095673486415547, "grad_norm": 1.787477852986966, "learning_rate": 6.674573925473142e-07, "loss": 0.2156, "step": 23562 }, { "epoch": 0.4095847311790575, "grad_norm": 1.6372481357889714, "learning_rate": 6.674308686138419e-07, "loss": 0.2337, "step": 23563 }, { "epoch": 0.40960211371656036, "grad_norm": 3.1832694008201106, "learning_rate": 6.674043441496905e-07, "loss": 0.3262, "step": 23564 }, { "epoch": 0.4096194962540632, "grad_norm": 1.1377562652580158, "learning_rate": 6.673778191549443e-07, "loss": 0.1818, "step": 23565 }, { "epoch": 0.409636878791566, "grad_norm": 1.628920254065024, "learning_rate": 6.673512936296872e-07, "loss": 0.214, "step": 23566 }, { "epoch": 0.4096542613290688, "grad_norm": 1.451731384670988, "learning_rate": 6.673247675740034e-07, "loss": 0.2296, "step": 23567 }, { "epoch": 0.4096716438665716, "grad_norm": 1.895466787578006, "learning_rate": 6.67298240987977e-07, "loss": 0.3107, "step": 23568 }, { "epoch": 0.40968902640407445, "grad_norm": 2.258064954866359, "learning_rate": 6.672717138716921e-07, "loss": 0.2875, "step": 23569 }, { "epoch": 0.4097064089415773, "grad_norm": 0.9337464468098596, "learning_rate": 6.672451862252326e-07, "loss": 0.3106, "step": 23570 }, { "epoch": 0.4097237914790801, "grad_norm": 2.7768941865630423, "learning_rate": 6.672186580486828e-07, "loss": 0.324, "step": 23571 }, { "epoch": 0.40974117401658294, "grad_norm": 1.635978421280759, "learning_rate": 6.671921293421265e-07, "loss": 0.1819, "step": 23572 }, { "epoch": 0.4097585565540858, "grad_norm": 2.4931804168198712, "learning_rate": 6.671656001056481e-07, "loss": 0.3195, "step": 23573 }, { "epoch": 0.4097759390915886, "grad_norm": 1.3154869189655856, "learning_rate": 6.671390703393313e-07, "loss": 0.2748, "step": 23574 }, { "epoch": 0.40979332162909143, "grad_norm": 1.5620769755713433, "learning_rate": 6.671125400432607e-07, "loss": 0.3037, "step": 23575 }, { "epoch": 0.40981070416659426, "grad_norm": 2.1095251407344455, "learning_rate": 6.670860092175199e-07, "loss": 0.371, "step": 23576 }, { "epoch": 0.40982808670409704, "grad_norm": 1.6477656420779887, "learning_rate": 6.670594778621935e-07, "loss": 0.4667, "step": 23577 }, { "epoch": 0.40984546924159987, "grad_norm": 3.9877896764714724, "learning_rate": 6.67032945977365e-07, "loss": 0.2582, "step": 23578 }, { "epoch": 0.4098628517791027, "grad_norm": 1.5931518289827986, "learning_rate": 6.670064135631188e-07, "loss": 0.2784, "step": 23579 }, { "epoch": 0.40988023431660553, "grad_norm": 1.6840541888746, "learning_rate": 6.669798806195391e-07, "loss": 0.267, "step": 23580 }, { "epoch": 0.40989761685410836, "grad_norm": 2.993036195536984, "learning_rate": 6.669533471467098e-07, "loss": 0.5628, "step": 23581 }, { "epoch": 0.4099149993916112, "grad_norm": 1.047215270345979, "learning_rate": 6.669268131447152e-07, "loss": 0.2015, "step": 23582 }, { "epoch": 0.409932381929114, "grad_norm": 1.4686986676998632, "learning_rate": 6.66900278613639e-07, "loss": 0.2664, "step": 23583 }, { "epoch": 0.40994976446661685, "grad_norm": 2.098271148394464, "learning_rate": 6.66873743553566e-07, "loss": 0.4472, "step": 23584 }, { "epoch": 0.4099671470041197, "grad_norm": 3.5852079460809936, "learning_rate": 6.668472079645797e-07, "loss": 0.4764, "step": 23585 }, { "epoch": 0.4099845295416225, "grad_norm": 1.9342934561280718, "learning_rate": 6.668206718467644e-07, "loss": 0.2964, "step": 23586 }, { "epoch": 0.4100019120791253, "grad_norm": 1.814763213370313, "learning_rate": 6.667941352002041e-07, "loss": 0.1866, "step": 23587 }, { "epoch": 0.4100192946166281, "grad_norm": 1.4968537162974858, "learning_rate": 6.667675980249831e-07, "loss": 0.3208, "step": 23588 }, { "epoch": 0.41003667715413095, "grad_norm": 1.1656451619959205, "learning_rate": 6.667410603211853e-07, "loss": 0.2121, "step": 23589 }, { "epoch": 0.4100540596916338, "grad_norm": 1.443635721420218, "learning_rate": 6.667145220888952e-07, "loss": 0.2113, "step": 23590 }, { "epoch": 0.4100714422291366, "grad_norm": 3.85047453790149, "learning_rate": 6.666879833281965e-07, "loss": 0.3699, "step": 23591 }, { "epoch": 0.41008882476663944, "grad_norm": 1.8014200979661223, "learning_rate": 6.666614440391736e-07, "loss": 0.3086, "step": 23592 }, { "epoch": 0.41010620730414227, "grad_norm": 1.6550769124313927, "learning_rate": 6.666349042219101e-07, "loss": 0.2386, "step": 23593 }, { "epoch": 0.4101235898416451, "grad_norm": 1.2801962251858026, "learning_rate": 6.666083638764908e-07, "loss": 0.3442, "step": 23594 }, { "epoch": 0.41014097237914793, "grad_norm": 1.7539575674412826, "learning_rate": 6.665818230029996e-07, "loss": 0.2387, "step": 23595 }, { "epoch": 0.41015835491665076, "grad_norm": 1.4353925520497395, "learning_rate": 6.665552816015203e-07, "loss": 0.1989, "step": 23596 }, { "epoch": 0.41017573745415353, "grad_norm": 1.7485108746511033, "learning_rate": 6.665287396721373e-07, "loss": 0.2719, "step": 23597 }, { "epoch": 0.41019311999165636, "grad_norm": 1.7166047363905679, "learning_rate": 6.66502197214935e-07, "loss": 0.2511, "step": 23598 }, { "epoch": 0.4102105025291592, "grad_norm": 1.461834921291137, "learning_rate": 6.664756542299968e-07, "loss": 0.2423, "step": 23599 }, { "epoch": 0.410227885066662, "grad_norm": 1.192077063807658, "learning_rate": 6.664491107174075e-07, "loss": 0.4351, "step": 23600 }, { "epoch": 0.41024526760416485, "grad_norm": 1.9610015939014926, "learning_rate": 6.664225666772509e-07, "loss": 0.1577, "step": 23601 }, { "epoch": 0.4102626501416677, "grad_norm": 3.617351584372914, "learning_rate": 6.663960221096112e-07, "loss": 0.324, "step": 23602 }, { "epoch": 0.4102800326791705, "grad_norm": 1.7251766588936563, "learning_rate": 6.663694770145726e-07, "loss": 0.3301, "step": 23603 }, { "epoch": 0.41029741521667334, "grad_norm": 3.1099614846697063, "learning_rate": 6.663429313922191e-07, "loss": 0.3342, "step": 23604 }, { "epoch": 0.4103147977541762, "grad_norm": 1.671254631139579, "learning_rate": 6.66316385242635e-07, "loss": 0.3221, "step": 23605 }, { "epoch": 0.410332180291679, "grad_norm": 1.7096389108629666, "learning_rate": 6.662898385659044e-07, "loss": 0.153, "step": 23606 }, { "epoch": 0.4103495628291818, "grad_norm": 1.1831664676049567, "learning_rate": 6.662632913621114e-07, "loss": 0.2848, "step": 23607 }, { "epoch": 0.4103669453666846, "grad_norm": 1.7146381577834953, "learning_rate": 6.662367436313399e-07, "loss": 0.2766, "step": 23608 }, { "epoch": 0.41038432790418744, "grad_norm": 1.6116353539908652, "learning_rate": 6.662101953736745e-07, "loss": 0.2359, "step": 23609 }, { "epoch": 0.41040171044169027, "grad_norm": 1.315728170912388, "learning_rate": 6.66183646589199e-07, "loss": 0.2146, "step": 23610 }, { "epoch": 0.4104190929791931, "grad_norm": 2.281604787685711, "learning_rate": 6.661570972779979e-07, "loss": 0.2201, "step": 23611 }, { "epoch": 0.41043647551669593, "grad_norm": 1.5998979599719005, "learning_rate": 6.661305474401549e-07, "loss": 0.4446, "step": 23612 }, { "epoch": 0.41045385805419876, "grad_norm": 1.3080428253958887, "learning_rate": 6.661039970757545e-07, "loss": 0.1712, "step": 23613 }, { "epoch": 0.4104712405917016, "grad_norm": 2.927028137876629, "learning_rate": 6.660774461848807e-07, "loss": 0.3504, "step": 23614 }, { "epoch": 0.4104886231292044, "grad_norm": 1.5539715436270083, "learning_rate": 6.660508947676177e-07, "loss": 0.1988, "step": 23615 }, { "epoch": 0.41050600566670725, "grad_norm": 1.2987572615201324, "learning_rate": 6.660243428240495e-07, "loss": 0.1692, "step": 23616 }, { "epoch": 0.41052338820421, "grad_norm": 2.0096676559126925, "learning_rate": 6.659977903542607e-07, "loss": 0.2449, "step": 23617 }, { "epoch": 0.41054077074171286, "grad_norm": 1.621751548081886, "learning_rate": 6.65971237358335e-07, "loss": 0.2028, "step": 23618 }, { "epoch": 0.4105581532792157, "grad_norm": 1.5730501703577295, "learning_rate": 6.659446838363566e-07, "loss": 0.3121, "step": 23619 }, { "epoch": 0.4105755358167185, "grad_norm": 1.5951636271077136, "learning_rate": 6.6591812978841e-07, "loss": 0.3654, "step": 23620 }, { "epoch": 0.41059291835422135, "grad_norm": 2.590094226269665, "learning_rate": 6.65891575214579e-07, "loss": 0.1997, "step": 23621 }, { "epoch": 0.4106103008917242, "grad_norm": 1.223468907532896, "learning_rate": 6.658650201149478e-07, "loss": 0.2927, "step": 23622 }, { "epoch": 0.410627683429227, "grad_norm": 1.525439822962529, "learning_rate": 6.658384644896011e-07, "loss": 0.2414, "step": 23623 }, { "epoch": 0.41064506596672984, "grad_norm": 2.1760287926691784, "learning_rate": 6.658119083386222e-07, "loss": 0.2699, "step": 23624 }, { "epoch": 0.41066244850423267, "grad_norm": 1.685223155867045, "learning_rate": 6.65785351662096e-07, "loss": 0.2948, "step": 23625 }, { "epoch": 0.4106798310417355, "grad_norm": 2.8636400080455546, "learning_rate": 6.657587944601061e-07, "loss": 0.2361, "step": 23626 }, { "epoch": 0.4106972135792383, "grad_norm": 2.094654017387538, "learning_rate": 6.657322367327373e-07, "loss": 0.2378, "step": 23627 }, { "epoch": 0.4107145961167411, "grad_norm": 1.8248503629470876, "learning_rate": 6.657056784800732e-07, "loss": 0.3174, "step": 23628 }, { "epoch": 0.41073197865424393, "grad_norm": 1.3621028262596475, "learning_rate": 6.656791197021984e-07, "loss": 0.1629, "step": 23629 }, { "epoch": 0.41074936119174676, "grad_norm": 1.6942105880591798, "learning_rate": 6.656525603991967e-07, "loss": 0.2667, "step": 23630 }, { "epoch": 0.4107667437292496, "grad_norm": 3.5216393189095467, "learning_rate": 6.656260005711526e-07, "loss": 0.2771, "step": 23631 }, { "epoch": 0.4107841262667524, "grad_norm": 1.4882811449099078, "learning_rate": 6.655994402181501e-07, "loss": 0.4354, "step": 23632 }, { "epoch": 0.41080150880425526, "grad_norm": 1.4788465191681872, "learning_rate": 6.655728793402735e-07, "loss": 0.1856, "step": 23633 }, { "epoch": 0.4108188913417581, "grad_norm": 0.9276816865964892, "learning_rate": 6.655463179376068e-07, "loss": 0.1478, "step": 23634 }, { "epoch": 0.4108362738792609, "grad_norm": 1.1481703594202057, "learning_rate": 6.655197560102346e-07, "loss": 0.2624, "step": 23635 }, { "epoch": 0.41085365641676375, "grad_norm": 1.4941850957480174, "learning_rate": 6.654931935582407e-07, "loss": 0.1859, "step": 23636 }, { "epoch": 0.4108710389542665, "grad_norm": 1.1802271165604838, "learning_rate": 6.654666305817092e-07, "loss": 0.2704, "step": 23637 }, { "epoch": 0.41088842149176935, "grad_norm": 1.1480687525731703, "learning_rate": 6.654400670807246e-07, "loss": 0.2015, "step": 23638 }, { "epoch": 0.4109058040292722, "grad_norm": 1.6128467340505719, "learning_rate": 6.65413503055371e-07, "loss": 0.2327, "step": 23639 }, { "epoch": 0.410923186566775, "grad_norm": 1.512129714244679, "learning_rate": 6.653869385057325e-07, "loss": 0.1945, "step": 23640 }, { "epoch": 0.41094056910427784, "grad_norm": 1.2567874456704657, "learning_rate": 6.653603734318936e-07, "loss": 0.2161, "step": 23641 }, { "epoch": 0.41095795164178067, "grad_norm": 2.212817513024917, "learning_rate": 6.653338078339381e-07, "loss": 0.347, "step": 23642 }, { "epoch": 0.4109753341792835, "grad_norm": 1.4397329842892077, "learning_rate": 6.653072417119504e-07, "loss": 0.2427, "step": 23643 }, { "epoch": 0.41099271671678633, "grad_norm": 2.056502958716638, "learning_rate": 6.652806750660147e-07, "loss": 0.2873, "step": 23644 }, { "epoch": 0.41101009925428916, "grad_norm": 1.1748479151480693, "learning_rate": 6.652541078962153e-07, "loss": 0.2072, "step": 23645 }, { "epoch": 0.411027481791792, "grad_norm": 1.684338045042145, "learning_rate": 6.652275402026362e-07, "loss": 0.1739, "step": 23646 }, { "epoch": 0.41104486432929477, "grad_norm": 2.427850208301372, "learning_rate": 6.652009719853615e-07, "loss": 0.2445, "step": 23647 }, { "epoch": 0.4110622468667976, "grad_norm": 2.261634942223534, "learning_rate": 6.651744032444759e-07, "loss": 0.2394, "step": 23648 }, { "epoch": 0.41107962940430043, "grad_norm": 1.5789372802109571, "learning_rate": 6.651478339800631e-07, "loss": 0.2347, "step": 23649 }, { "epoch": 0.41109701194180326, "grad_norm": 2.0115773847973593, "learning_rate": 6.651212641922077e-07, "loss": 0.1895, "step": 23650 }, { "epoch": 0.4111143944793061, "grad_norm": 2.1445851406747773, "learning_rate": 6.650946938809936e-07, "loss": 0.4398, "step": 23651 }, { "epoch": 0.4111317770168089, "grad_norm": 1.2300945870322437, "learning_rate": 6.650681230465053e-07, "loss": 0.1618, "step": 23652 }, { "epoch": 0.41114915955431175, "grad_norm": 2.5526852415316146, "learning_rate": 6.650415516888267e-07, "loss": 0.3463, "step": 23653 }, { "epoch": 0.4111665420918146, "grad_norm": 0.9813704092417146, "learning_rate": 6.650149798080423e-07, "loss": 0.1632, "step": 23654 }, { "epoch": 0.4111839246293174, "grad_norm": 1.5721173433933786, "learning_rate": 6.649884074042363e-07, "loss": 0.2173, "step": 23655 }, { "epoch": 0.4112013071668202, "grad_norm": 1.756598932030452, "learning_rate": 6.649618344774927e-07, "loss": 0.214, "step": 23656 }, { "epoch": 0.411218689704323, "grad_norm": 1.4430833381111543, "learning_rate": 6.649352610278957e-07, "loss": 0.4006, "step": 23657 }, { "epoch": 0.41123607224182585, "grad_norm": 1.4969248541494464, "learning_rate": 6.649086870555301e-07, "loss": 0.2676, "step": 23658 }, { "epoch": 0.4112534547793287, "grad_norm": 1.509074058505074, "learning_rate": 6.648821125604796e-07, "loss": 0.1275, "step": 23659 }, { "epoch": 0.4112708373168315, "grad_norm": 1.5535483945032462, "learning_rate": 6.648555375428284e-07, "loss": 0.1827, "step": 23660 }, { "epoch": 0.41128821985433434, "grad_norm": 1.910455486311627, "learning_rate": 6.64828962002661e-07, "loss": 0.2702, "step": 23661 }, { "epoch": 0.41130560239183717, "grad_norm": 1.6310572625191615, "learning_rate": 6.648023859400614e-07, "loss": 0.1831, "step": 23662 }, { "epoch": 0.41132298492934, "grad_norm": 1.385884165401222, "learning_rate": 6.647758093551139e-07, "loss": 0.1428, "step": 23663 }, { "epoch": 0.4113403674668428, "grad_norm": 1.707422462427432, "learning_rate": 6.647492322479028e-07, "loss": 0.2051, "step": 23664 }, { "epoch": 0.41135775000434566, "grad_norm": 2.886816154375566, "learning_rate": 6.647226546185124e-07, "loss": 0.2526, "step": 23665 }, { "epoch": 0.41137513254184843, "grad_norm": 2.7271980162378746, "learning_rate": 6.646960764670269e-07, "loss": 0.2112, "step": 23666 }, { "epoch": 0.41139251507935126, "grad_norm": 4.5547530206208195, "learning_rate": 6.646694977935304e-07, "loss": 0.3317, "step": 23667 }, { "epoch": 0.4114098976168541, "grad_norm": 2.1531622506257975, "learning_rate": 6.646429185981072e-07, "loss": 0.2692, "step": 23668 }, { "epoch": 0.4114272801543569, "grad_norm": 3.589093469127524, "learning_rate": 6.646163388808417e-07, "loss": 0.3289, "step": 23669 }, { "epoch": 0.41144466269185975, "grad_norm": 2.9281494974382793, "learning_rate": 6.645897586418179e-07, "loss": 0.3229, "step": 23670 }, { "epoch": 0.4114620452293626, "grad_norm": 2.047314412498895, "learning_rate": 6.645631778811203e-07, "loss": 0.2503, "step": 23671 }, { "epoch": 0.4114794277668654, "grad_norm": 2.026067555636481, "learning_rate": 6.645365965988327e-07, "loss": 0.2151, "step": 23672 }, { "epoch": 0.41149681030436824, "grad_norm": 1.5906572658833718, "learning_rate": 6.6451001479504e-07, "loss": 0.1855, "step": 23673 }, { "epoch": 0.4115141928418711, "grad_norm": 1.8347074779533785, "learning_rate": 6.644834324698261e-07, "loss": 0.2289, "step": 23674 }, { "epoch": 0.4115315753793739, "grad_norm": 0.9001460803962651, "learning_rate": 6.644568496232752e-07, "loss": 0.2439, "step": 23675 }, { "epoch": 0.4115489579168767, "grad_norm": 1.47570674320209, "learning_rate": 6.644302662554716e-07, "loss": 0.1843, "step": 23676 }, { "epoch": 0.4115663404543795, "grad_norm": 1.3627439742859542, "learning_rate": 6.644036823664998e-07, "loss": 0.3311, "step": 23677 }, { "epoch": 0.41158372299188234, "grad_norm": 1.4638013340470313, "learning_rate": 6.643770979564436e-07, "loss": 0.2907, "step": 23678 }, { "epoch": 0.41160110552938517, "grad_norm": 2.1051156649361347, "learning_rate": 6.643505130253876e-07, "loss": 0.2591, "step": 23679 }, { "epoch": 0.411618488066888, "grad_norm": 4.641048360165728, "learning_rate": 6.643239275734159e-07, "loss": 0.3168, "step": 23680 }, { "epoch": 0.41163587060439083, "grad_norm": 1.2365902755989702, "learning_rate": 6.642973416006129e-07, "loss": 0.2575, "step": 23681 }, { "epoch": 0.41165325314189366, "grad_norm": 2.365494550388795, "learning_rate": 6.642707551070628e-07, "loss": 0.4433, "step": 23682 }, { "epoch": 0.4116706356793965, "grad_norm": 4.203988698082765, "learning_rate": 6.642441680928499e-07, "loss": 0.4379, "step": 23683 }, { "epoch": 0.4116880182168993, "grad_norm": 2.3154735084385987, "learning_rate": 6.642175805580583e-07, "loss": 0.2689, "step": 23684 }, { "epoch": 0.41170540075440215, "grad_norm": 1.194411749610986, "learning_rate": 6.641909925027726e-07, "loss": 0.1564, "step": 23685 }, { "epoch": 0.4117227832919049, "grad_norm": 1.7701371566474624, "learning_rate": 6.641644039270767e-07, "loss": 0.2365, "step": 23686 }, { "epoch": 0.41174016582940776, "grad_norm": 1.282829790348083, "learning_rate": 6.641378148310552e-07, "loss": 0.3614, "step": 23687 }, { "epoch": 0.4117575483669106, "grad_norm": 1.1533765249937147, "learning_rate": 6.64111225214792e-07, "loss": 0.2193, "step": 23688 }, { "epoch": 0.4117749309044134, "grad_norm": 1.6032689938234248, "learning_rate": 6.64084635078372e-07, "loss": 0.1719, "step": 23689 }, { "epoch": 0.41179231344191625, "grad_norm": 1.3795636639810887, "learning_rate": 6.640580444218787e-07, "loss": 0.3615, "step": 23690 }, { "epoch": 0.4118096959794191, "grad_norm": 1.1226590513806354, "learning_rate": 6.640314532453969e-07, "loss": 0.1538, "step": 23691 }, { "epoch": 0.4118270785169219, "grad_norm": 1.042264647568049, "learning_rate": 6.640048615490107e-07, "loss": 0.3148, "step": 23692 }, { "epoch": 0.41184446105442474, "grad_norm": 1.608280077404991, "learning_rate": 6.639782693328045e-07, "loss": 0.1965, "step": 23693 }, { "epoch": 0.41186184359192757, "grad_norm": 1.0638430907978258, "learning_rate": 6.639516765968625e-07, "loss": 0.1836, "step": 23694 }, { "epoch": 0.4118792261294304, "grad_norm": 2.01136226106762, "learning_rate": 6.63925083341269e-07, "loss": 0.1635, "step": 23695 }, { "epoch": 0.4118966086669332, "grad_norm": 0.7996320090387116, "learning_rate": 6.638984895661083e-07, "loss": 0.3389, "step": 23696 }, { "epoch": 0.411913991204436, "grad_norm": 1.931494147702584, "learning_rate": 6.638718952714644e-07, "loss": 0.2665, "step": 23697 }, { "epoch": 0.41193137374193883, "grad_norm": 1.8720435491642131, "learning_rate": 6.638453004574223e-07, "loss": 0.2181, "step": 23698 }, { "epoch": 0.41194875627944166, "grad_norm": 3.306473258916337, "learning_rate": 6.638187051240655e-07, "loss": 0.3279, "step": 23699 }, { "epoch": 0.4119661388169445, "grad_norm": 4.717676657876376, "learning_rate": 6.63792109271479e-07, "loss": 0.3661, "step": 23700 }, { "epoch": 0.4119835213544473, "grad_norm": 2.099585484689424, "learning_rate": 6.637655128997465e-07, "loss": 0.2897, "step": 23701 }, { "epoch": 0.41200090389195015, "grad_norm": 1.1244012631197067, "learning_rate": 6.637389160089527e-07, "loss": 0.174, "step": 23702 }, { "epoch": 0.412018286429453, "grad_norm": 1.2721949918937545, "learning_rate": 6.637123185991817e-07, "loss": 0.1343, "step": 23703 }, { "epoch": 0.4120356689669558, "grad_norm": 1.4259848205457881, "learning_rate": 6.636857206705177e-07, "loss": 0.2357, "step": 23704 }, { "epoch": 0.41205305150445865, "grad_norm": 2.308430422247057, "learning_rate": 6.636591222230454e-07, "loss": 0.4016, "step": 23705 }, { "epoch": 0.4120704340419614, "grad_norm": 1.9890716836150721, "learning_rate": 6.636325232568487e-07, "loss": 0.2004, "step": 23706 }, { "epoch": 0.41208781657946425, "grad_norm": 2.0658541406967776, "learning_rate": 6.636059237720121e-07, "loss": 0.2781, "step": 23707 }, { "epoch": 0.4121051991169671, "grad_norm": 1.914004699724728, "learning_rate": 6.6357932376862e-07, "loss": 0.241, "step": 23708 }, { "epoch": 0.4121225816544699, "grad_norm": 1.4256353048289498, "learning_rate": 6.635527232467565e-07, "loss": 0.1879, "step": 23709 }, { "epoch": 0.41213996419197274, "grad_norm": 2.3929061036867783, "learning_rate": 6.63526122206506e-07, "loss": 0.2176, "step": 23710 }, { "epoch": 0.41215734672947557, "grad_norm": 1.3265705359389453, "learning_rate": 6.634995206479528e-07, "loss": 0.2125, "step": 23711 }, { "epoch": 0.4121747292669784, "grad_norm": 1.0661779118675727, "learning_rate": 6.634729185711811e-07, "loss": 0.1993, "step": 23712 }, { "epoch": 0.41219211180448123, "grad_norm": 1.1252692606249206, "learning_rate": 6.634463159762755e-07, "loss": 0.3209, "step": 23713 }, { "epoch": 0.41220949434198406, "grad_norm": 1.817650669067555, "learning_rate": 6.634197128633201e-07, "loss": 0.2334, "step": 23714 }, { "epoch": 0.4122268768794869, "grad_norm": 0.975301396041042, "learning_rate": 6.633931092323993e-07, "loss": 0.1855, "step": 23715 }, { "epoch": 0.41224425941698967, "grad_norm": 1.4760193490685591, "learning_rate": 6.633665050835975e-07, "loss": 0.2108, "step": 23716 }, { "epoch": 0.4122616419544925, "grad_norm": 2.060183461202143, "learning_rate": 6.633399004169987e-07, "loss": 0.2419, "step": 23717 }, { "epoch": 0.4122790244919953, "grad_norm": 1.5616834110593873, "learning_rate": 6.633132952326876e-07, "loss": 0.1917, "step": 23718 }, { "epoch": 0.41229640702949816, "grad_norm": 2.506287893283375, "learning_rate": 6.632866895307483e-07, "loss": 0.2632, "step": 23719 }, { "epoch": 0.412313789567001, "grad_norm": 1.4353165367130754, "learning_rate": 6.632600833112653e-07, "loss": 0.28, "step": 23720 }, { "epoch": 0.4123311721045038, "grad_norm": 1.9104267398728692, "learning_rate": 6.632334765743228e-07, "loss": 0.2128, "step": 23721 }, { "epoch": 0.41234855464200665, "grad_norm": 2.9486821986569693, "learning_rate": 6.632068693200052e-07, "loss": 0.2706, "step": 23722 }, { "epoch": 0.4123659371795095, "grad_norm": 1.7714248981357665, "learning_rate": 6.631802615483966e-07, "loss": 0.1695, "step": 23723 }, { "epoch": 0.4123833197170123, "grad_norm": 2.005796948419827, "learning_rate": 6.631536532595817e-07, "loss": 0.3525, "step": 23724 }, { "epoch": 0.41240070225451514, "grad_norm": 1.605803766570661, "learning_rate": 6.631270444536446e-07, "loss": 0.2321, "step": 23725 }, { "epoch": 0.4124180847920179, "grad_norm": 0.9628272595344969, "learning_rate": 6.631004351306698e-07, "loss": 0.1462, "step": 23726 }, { "epoch": 0.41243546732952074, "grad_norm": 1.5715425446741322, "learning_rate": 6.630738252907415e-07, "loss": 0.2377, "step": 23727 }, { "epoch": 0.4124528498670236, "grad_norm": 8.078448516437165, "learning_rate": 6.63047214933944e-07, "loss": 0.2288, "step": 23728 }, { "epoch": 0.4124702324045264, "grad_norm": 2.0117980856824866, "learning_rate": 6.630206040603618e-07, "loss": 0.1732, "step": 23729 }, { "epoch": 0.41248761494202923, "grad_norm": 1.916940676590586, "learning_rate": 6.629939926700791e-07, "loss": 0.2722, "step": 23730 }, { "epoch": 0.41250499747953207, "grad_norm": 0.9906945782334997, "learning_rate": 6.629673807631805e-07, "loss": 0.156, "step": 23731 }, { "epoch": 0.4125223800170349, "grad_norm": 1.6314968603216409, "learning_rate": 6.629407683397499e-07, "loss": 0.213, "step": 23732 }, { "epoch": 0.4125397625545377, "grad_norm": 1.4809353376427996, "learning_rate": 6.62914155399872e-07, "loss": 0.2334, "step": 23733 }, { "epoch": 0.41255714509204056, "grad_norm": 1.7446944395523252, "learning_rate": 6.628875419436313e-07, "loss": 0.275, "step": 23734 }, { "epoch": 0.4125745276295434, "grad_norm": 1.4454651671610914, "learning_rate": 6.628609279711117e-07, "loss": 0.2302, "step": 23735 }, { "epoch": 0.41259191016704616, "grad_norm": 1.9761901007835263, "learning_rate": 6.628343134823977e-07, "loss": 0.2423, "step": 23736 }, { "epoch": 0.412609292704549, "grad_norm": 2.076194658156931, "learning_rate": 6.62807698477574e-07, "loss": 0.191, "step": 23737 }, { "epoch": 0.4126266752420518, "grad_norm": 1.099114041180255, "learning_rate": 6.627810829567244e-07, "loss": 0.2389, "step": 23738 }, { "epoch": 0.41264405777955465, "grad_norm": 2.0119615010026113, "learning_rate": 6.627544669199336e-07, "loss": 0.249, "step": 23739 }, { "epoch": 0.4126614403170575, "grad_norm": 2.7842102185380817, "learning_rate": 6.62727850367286e-07, "loss": 0.2857, "step": 23740 }, { "epoch": 0.4126788228545603, "grad_norm": 2.3563916178414734, "learning_rate": 6.62701233298866e-07, "loss": 0.237, "step": 23741 }, { "epoch": 0.41269620539206314, "grad_norm": 3.4548370566662343, "learning_rate": 6.626746157147575e-07, "loss": 0.3854, "step": 23742 }, { "epoch": 0.412713587929566, "grad_norm": 1.6397146290971572, "learning_rate": 6.626479976150454e-07, "loss": 0.263, "step": 23743 }, { "epoch": 0.4127309704670688, "grad_norm": 1.3580166807301528, "learning_rate": 6.626213789998137e-07, "loss": 0.3479, "step": 23744 }, { "epoch": 0.41274835300457163, "grad_norm": 1.7455711502774958, "learning_rate": 6.625947598691471e-07, "loss": 0.1943, "step": 23745 }, { "epoch": 0.4127657355420744, "grad_norm": 4.911579151000255, "learning_rate": 6.625681402231298e-07, "loss": 0.3766, "step": 23746 }, { "epoch": 0.41278311807957724, "grad_norm": 1.3912807015362114, "learning_rate": 6.625415200618462e-07, "loss": 0.1812, "step": 23747 }, { "epoch": 0.41280050061708007, "grad_norm": 2.383080756830647, "learning_rate": 6.625148993853805e-07, "loss": 0.3161, "step": 23748 }, { "epoch": 0.4128178831545829, "grad_norm": 0.9247734820475983, "learning_rate": 6.624882781938173e-07, "loss": 0.2763, "step": 23749 }, { "epoch": 0.41283526569208573, "grad_norm": 2.980342796955173, "learning_rate": 6.624616564872408e-07, "loss": 0.4842, "step": 23750 }, { "epoch": 0.41285264822958856, "grad_norm": 1.8123006668469477, "learning_rate": 6.624350342657357e-07, "loss": 0.1859, "step": 23751 }, { "epoch": 0.4128700307670914, "grad_norm": 1.8495631185553334, "learning_rate": 6.624084115293859e-07, "loss": 0.1245, "step": 23752 }, { "epoch": 0.4128874133045942, "grad_norm": 1.4415662155264737, "learning_rate": 6.623817882782761e-07, "loss": 0.2191, "step": 23753 }, { "epoch": 0.41290479584209705, "grad_norm": 2.939692973317437, "learning_rate": 6.623551645124907e-07, "loss": 0.3362, "step": 23754 }, { "epoch": 0.4129221783795999, "grad_norm": 1.7967235947410223, "learning_rate": 6.62328540232114e-07, "loss": 0.242, "step": 23755 }, { "epoch": 0.41293956091710265, "grad_norm": 1.2982205672724592, "learning_rate": 6.623019154372304e-07, "loss": 0.1546, "step": 23756 }, { "epoch": 0.4129569434546055, "grad_norm": 2.0431357261284973, "learning_rate": 6.622752901279242e-07, "loss": 0.3718, "step": 23757 }, { "epoch": 0.4129743259921083, "grad_norm": 4.894947583158471, "learning_rate": 6.6224866430428e-07, "loss": 0.3539, "step": 23758 }, { "epoch": 0.41299170852961115, "grad_norm": 1.5152214364337424, "learning_rate": 6.62222037966382e-07, "loss": 0.1887, "step": 23759 }, { "epoch": 0.413009091067114, "grad_norm": 1.731200998259161, "learning_rate": 6.621954111143147e-07, "loss": 0.2389, "step": 23760 }, { "epoch": 0.4130264736046168, "grad_norm": 1.2793781938304887, "learning_rate": 6.621687837481623e-07, "loss": 0.2133, "step": 23761 }, { "epoch": 0.41304385614211964, "grad_norm": 1.5001399124510568, "learning_rate": 6.621421558680096e-07, "loss": 0.2392, "step": 23762 }, { "epoch": 0.41306123867962247, "grad_norm": 1.1850501880057884, "learning_rate": 6.621155274739405e-07, "loss": 0.2036, "step": 23763 }, { "epoch": 0.4130786212171253, "grad_norm": 3.2396277962185884, "learning_rate": 6.620888985660399e-07, "loss": 0.3797, "step": 23764 }, { "epoch": 0.4130960037546281, "grad_norm": 1.1804075065760267, "learning_rate": 6.620622691443918e-07, "loss": 0.2167, "step": 23765 }, { "epoch": 0.4131133862921309, "grad_norm": 1.288252810744547, "learning_rate": 6.620356392090807e-07, "loss": 0.1877, "step": 23766 }, { "epoch": 0.41313076882963373, "grad_norm": 1.3014748724638514, "learning_rate": 6.62009008760191e-07, "loss": 0.2947, "step": 23767 }, { "epoch": 0.41314815136713656, "grad_norm": 1.2165253240766545, "learning_rate": 6.619823777978075e-07, "loss": 0.1082, "step": 23768 }, { "epoch": 0.4131655339046394, "grad_norm": 1.4563707360433715, "learning_rate": 6.61955746322014e-07, "loss": 0.4024, "step": 23769 }, { "epoch": 0.4131829164421422, "grad_norm": 1.6842049195838993, "learning_rate": 6.619291143328952e-07, "loss": 0.1694, "step": 23770 }, { "epoch": 0.41320029897964505, "grad_norm": 2.0632643728547015, "learning_rate": 6.619024818305357e-07, "loss": 0.2808, "step": 23771 }, { "epoch": 0.4132176815171479, "grad_norm": 1.5517643711029114, "learning_rate": 6.618758488150195e-07, "loss": 0.1757, "step": 23772 }, { "epoch": 0.4132350640546507, "grad_norm": 0.9921335644963309, "learning_rate": 6.618492152864312e-07, "loss": 0.3673, "step": 23773 }, { "epoch": 0.41325244659215354, "grad_norm": 2.445009945445287, "learning_rate": 6.618225812448552e-07, "loss": 0.1918, "step": 23774 }, { "epoch": 0.4132698291296564, "grad_norm": 1.5839244077252432, "learning_rate": 6.617959466903761e-07, "loss": 0.275, "step": 23775 }, { "epoch": 0.41328721166715915, "grad_norm": 2.1890123369037595, "learning_rate": 6.617693116230781e-07, "loss": 0.2438, "step": 23776 }, { "epoch": 0.413304594204662, "grad_norm": 1.522593369714885, "learning_rate": 6.617426760430456e-07, "loss": 0.3038, "step": 23777 }, { "epoch": 0.4133219767421648, "grad_norm": 1.717666633264158, "learning_rate": 6.617160399503635e-07, "loss": 0.2034, "step": 23778 }, { "epoch": 0.41333935927966764, "grad_norm": 1.8690442428034497, "learning_rate": 6.616894033451154e-07, "loss": 0.2704, "step": 23779 }, { "epoch": 0.41335674181717047, "grad_norm": 1.9286492616959723, "learning_rate": 6.616627662273864e-07, "loss": 0.4421, "step": 23780 }, { "epoch": 0.4133741243546733, "grad_norm": 2.5067634899885864, "learning_rate": 6.616361285972606e-07, "loss": 0.2387, "step": 23781 }, { "epoch": 0.41339150689217613, "grad_norm": 1.2476208266152946, "learning_rate": 6.616094904548226e-07, "loss": 0.282, "step": 23782 }, { "epoch": 0.41340888942967896, "grad_norm": 1.2837108273505418, "learning_rate": 6.615828518001565e-07, "loss": 0.3701, "step": 23783 }, { "epoch": 0.4134262719671818, "grad_norm": 2.824560328875168, "learning_rate": 6.615562126333473e-07, "loss": 0.2633, "step": 23784 }, { "epoch": 0.4134436545046846, "grad_norm": 1.4425548316123942, "learning_rate": 6.61529572954479e-07, "loss": 0.2158, "step": 23785 }, { "epoch": 0.4134610370421874, "grad_norm": 1.7092372782637084, "learning_rate": 6.615029327636361e-07, "loss": 0.1065, "step": 23786 }, { "epoch": 0.4134784195796902, "grad_norm": 2.3034509208605582, "learning_rate": 6.614762920609032e-07, "loss": 0.1944, "step": 23787 }, { "epoch": 0.41349580211719306, "grad_norm": 1.0076231610827684, "learning_rate": 6.614496508463645e-07, "loss": 0.1425, "step": 23788 }, { "epoch": 0.4135131846546959, "grad_norm": 1.2470943309783777, "learning_rate": 6.614230091201046e-07, "loss": 0.1958, "step": 23789 }, { "epoch": 0.4135305671921987, "grad_norm": 1.820623464150151, "learning_rate": 6.61396366882208e-07, "loss": 0.3134, "step": 23790 }, { "epoch": 0.41354794972970155, "grad_norm": 1.9666920718982481, "learning_rate": 6.613697241327591e-07, "loss": 0.1733, "step": 23791 }, { "epoch": 0.4135653322672044, "grad_norm": 1.729667173033853, "learning_rate": 6.613430808718421e-07, "loss": 0.2993, "step": 23792 }, { "epoch": 0.4135827148047072, "grad_norm": 1.5807338090354393, "learning_rate": 6.613164370995418e-07, "loss": 0.2074, "step": 23793 }, { "epoch": 0.41360009734221004, "grad_norm": 2.6158263992796478, "learning_rate": 6.612897928159424e-07, "loss": 0.1799, "step": 23794 }, { "epoch": 0.41361747987971287, "grad_norm": 1.6389610217203536, "learning_rate": 6.612631480211286e-07, "loss": 0.2427, "step": 23795 }, { "epoch": 0.41363486241721564, "grad_norm": 1.6340569120787987, "learning_rate": 6.612365027151845e-07, "loss": 0.2861, "step": 23796 }, { "epoch": 0.4136522449547185, "grad_norm": 3.1189728600618194, "learning_rate": 6.61209856898195e-07, "loss": 0.2492, "step": 23797 }, { "epoch": 0.4136696274922213, "grad_norm": 1.7452455435524188, "learning_rate": 6.611832105702441e-07, "loss": 0.2342, "step": 23798 }, { "epoch": 0.41368701002972413, "grad_norm": 2.2093412748555274, "learning_rate": 6.611565637314164e-07, "loss": 0.3216, "step": 23799 }, { "epoch": 0.41370439256722696, "grad_norm": 1.2568779420148766, "learning_rate": 6.611299163817967e-07, "loss": 0.2496, "step": 23800 }, { "epoch": 0.4137217751047298, "grad_norm": 2.3923204403006477, "learning_rate": 6.611032685214691e-07, "loss": 0.2605, "step": 23801 }, { "epoch": 0.4137391576422326, "grad_norm": 1.7199884219149502, "learning_rate": 6.610766201505179e-07, "loss": 0.2398, "step": 23802 }, { "epoch": 0.41375654017973545, "grad_norm": 1.533395246606031, "learning_rate": 6.610499712690282e-07, "loss": 0.2366, "step": 23803 }, { "epoch": 0.4137739227172383, "grad_norm": 1.8720805842595354, "learning_rate": 6.610233218770838e-07, "loss": 0.3178, "step": 23804 }, { "epoch": 0.41379130525474106, "grad_norm": 2.9049396409195403, "learning_rate": 6.609966719747695e-07, "loss": 0.226, "step": 23805 }, { "epoch": 0.4138086877922439, "grad_norm": 1.4233781843851314, "learning_rate": 6.609700215621696e-07, "loss": 0.2849, "step": 23806 }, { "epoch": 0.4138260703297467, "grad_norm": 0.9558288783261408, "learning_rate": 6.609433706393688e-07, "loss": 0.2119, "step": 23807 }, { "epoch": 0.41384345286724955, "grad_norm": 2.2738352958671735, "learning_rate": 6.609167192064513e-07, "loss": 0.3426, "step": 23808 }, { "epoch": 0.4138608354047524, "grad_norm": 1.517157380878785, "learning_rate": 6.60890067263502e-07, "loss": 0.2109, "step": 23809 }, { "epoch": 0.4138782179422552, "grad_norm": 1.649211294602165, "learning_rate": 6.60863414810605e-07, "loss": 0.4813, "step": 23810 }, { "epoch": 0.41389560047975804, "grad_norm": 1.25851892297169, "learning_rate": 6.608367618478448e-07, "loss": 0.1808, "step": 23811 }, { "epoch": 0.41391298301726087, "grad_norm": 0.693598102471401, "learning_rate": 6.608101083753058e-07, "loss": 0.1489, "step": 23812 }, { "epoch": 0.4139303655547637, "grad_norm": 1.5182949119021871, "learning_rate": 6.607834543930728e-07, "loss": 0.2951, "step": 23813 }, { "epoch": 0.41394774809226653, "grad_norm": 1.2766778581615168, "learning_rate": 6.607567999012301e-07, "loss": 0.2287, "step": 23814 }, { "epoch": 0.4139651306297693, "grad_norm": 2.0202088198577615, "learning_rate": 6.607301448998621e-07, "loss": 0.2683, "step": 23815 }, { "epoch": 0.41398251316727214, "grad_norm": 3.6450365627336128, "learning_rate": 6.607034893890535e-07, "loss": 0.5908, "step": 23816 }, { "epoch": 0.41399989570477497, "grad_norm": 2.711392040360982, "learning_rate": 6.606768333688886e-07, "loss": 0.2967, "step": 23817 }, { "epoch": 0.4140172782422778, "grad_norm": 1.4122864030426285, "learning_rate": 6.606501768394519e-07, "loss": 0.2659, "step": 23818 }, { "epoch": 0.41403466077978063, "grad_norm": 1.635897149314028, "learning_rate": 6.606235198008281e-07, "loss": 0.1983, "step": 23819 }, { "epoch": 0.41405204331728346, "grad_norm": 1.5841542063397172, "learning_rate": 6.605968622531014e-07, "loss": 0.2583, "step": 23820 }, { "epoch": 0.4140694258547863, "grad_norm": 0.9224657138735755, "learning_rate": 6.605702041963563e-07, "loss": 0.1181, "step": 23821 }, { "epoch": 0.4140868083922891, "grad_norm": 1.4220998606520512, "learning_rate": 6.605435456306777e-07, "loss": 0.3121, "step": 23822 }, { "epoch": 0.41410419092979195, "grad_norm": 1.4923765354338485, "learning_rate": 6.605168865561497e-07, "loss": 0.2461, "step": 23823 }, { "epoch": 0.4141215734672948, "grad_norm": 1.6537139572887551, "learning_rate": 6.604902269728568e-07, "loss": 0.2156, "step": 23824 }, { "epoch": 0.41413895600479755, "grad_norm": 2.1963104302808882, "learning_rate": 6.604635668808837e-07, "loss": 0.3095, "step": 23825 }, { "epoch": 0.4141563385423004, "grad_norm": 3.1282950404651917, "learning_rate": 6.604369062803148e-07, "loss": 0.3809, "step": 23826 }, { "epoch": 0.4141737210798032, "grad_norm": 1.4768565700877276, "learning_rate": 6.604102451712344e-07, "loss": 0.2345, "step": 23827 }, { "epoch": 0.41419110361730604, "grad_norm": 2.982891450044437, "learning_rate": 6.603835835537276e-07, "loss": 0.2976, "step": 23828 }, { "epoch": 0.4142084861548089, "grad_norm": 1.39658076564566, "learning_rate": 6.603569214278781e-07, "loss": 0.2986, "step": 23829 }, { "epoch": 0.4142258686923117, "grad_norm": 1.5107209943426716, "learning_rate": 6.603302587937711e-07, "loss": 0.3522, "step": 23830 }, { "epoch": 0.41424325122981454, "grad_norm": 1.644164806664549, "learning_rate": 6.603035956514908e-07, "loss": 0.3547, "step": 23831 }, { "epoch": 0.41426063376731737, "grad_norm": 1.030935961566625, "learning_rate": 6.602769320011217e-07, "loss": 0.2757, "step": 23832 }, { "epoch": 0.4142780163048202, "grad_norm": 2.0838575148934626, "learning_rate": 6.602502678427483e-07, "loss": 0.2054, "step": 23833 }, { "epoch": 0.414295398842323, "grad_norm": 1.9475531737851621, "learning_rate": 6.602236031764552e-07, "loss": 0.2044, "step": 23834 }, { "epoch": 0.4143127813798258, "grad_norm": 2.5591702943207864, "learning_rate": 6.601969380023269e-07, "loss": 0.3633, "step": 23835 }, { "epoch": 0.41433016391732863, "grad_norm": 1.4645524147986564, "learning_rate": 6.601702723204479e-07, "loss": 0.1342, "step": 23836 }, { "epoch": 0.41434754645483146, "grad_norm": 2.0048902342109747, "learning_rate": 6.601436061309026e-07, "loss": 0.2294, "step": 23837 }, { "epoch": 0.4143649289923343, "grad_norm": 1.3361583188128177, "learning_rate": 6.601169394337758e-07, "loss": 0.2349, "step": 23838 }, { "epoch": 0.4143823115298371, "grad_norm": 1.536908353780687, "learning_rate": 6.600902722291518e-07, "loss": 0.1465, "step": 23839 }, { "epoch": 0.41439969406733995, "grad_norm": 2.156239789272362, "learning_rate": 6.60063604517115e-07, "loss": 0.4134, "step": 23840 }, { "epoch": 0.4144170766048428, "grad_norm": 2.946069149265025, "learning_rate": 6.600369362977504e-07, "loss": 0.3937, "step": 23841 }, { "epoch": 0.4144344591423456, "grad_norm": 1.4254340351949302, "learning_rate": 6.600102675711419e-07, "loss": 0.4555, "step": 23842 }, { "epoch": 0.41445184167984844, "grad_norm": 2.261385141516446, "learning_rate": 6.599835983373747e-07, "loss": 0.3614, "step": 23843 }, { "epoch": 0.4144692242173513, "grad_norm": 1.066299756982982, "learning_rate": 6.599569285965328e-07, "loss": 0.1974, "step": 23844 }, { "epoch": 0.41448660675485405, "grad_norm": 2.3684221017682825, "learning_rate": 6.59930258348701e-07, "loss": 0.2448, "step": 23845 }, { "epoch": 0.4145039892923569, "grad_norm": 1.2345892194479133, "learning_rate": 6.599035875939636e-07, "loss": 0.1676, "step": 23846 }, { "epoch": 0.4145213718298597, "grad_norm": 1.6173069573087442, "learning_rate": 6.598769163324054e-07, "loss": 0.1718, "step": 23847 }, { "epoch": 0.41453875436736254, "grad_norm": 1.0992982549977814, "learning_rate": 6.598502445641108e-07, "loss": 0.1884, "step": 23848 }, { "epoch": 0.41455613690486537, "grad_norm": 1.487110542479972, "learning_rate": 6.598235722891643e-07, "loss": 0.2295, "step": 23849 }, { "epoch": 0.4145735194423682, "grad_norm": 4.165813641833725, "learning_rate": 6.597968995076504e-07, "loss": 0.3657, "step": 23850 }, { "epoch": 0.41459090197987103, "grad_norm": 2.675519767905813, "learning_rate": 6.597702262196541e-07, "loss": 0.2494, "step": 23851 }, { "epoch": 0.41460828451737386, "grad_norm": 1.4918384283390684, "learning_rate": 6.597435524252592e-07, "loss": 0.1683, "step": 23852 }, { "epoch": 0.4146256670548767, "grad_norm": 1.3670426724325386, "learning_rate": 6.597168781245508e-07, "loss": 0.2311, "step": 23853 }, { "epoch": 0.4146430495923795, "grad_norm": 1.124328938385426, "learning_rate": 6.596902033176132e-07, "loss": 0.1678, "step": 23854 }, { "epoch": 0.4146604321298823, "grad_norm": 2.6719849021100632, "learning_rate": 6.59663528004531e-07, "loss": 0.3421, "step": 23855 }, { "epoch": 0.4146778146673851, "grad_norm": 1.486931426343789, "learning_rate": 6.596368521853888e-07, "loss": 0.2858, "step": 23856 }, { "epoch": 0.41469519720488796, "grad_norm": 1.3625936077043386, "learning_rate": 6.596101758602711e-07, "loss": 0.2383, "step": 23857 }, { "epoch": 0.4147125797423908, "grad_norm": 1.0412285279537643, "learning_rate": 6.595834990292623e-07, "loss": 0.1821, "step": 23858 }, { "epoch": 0.4147299622798936, "grad_norm": 1.1244910773304406, "learning_rate": 6.595568216924474e-07, "loss": 0.1679, "step": 23859 }, { "epoch": 0.41474734481739645, "grad_norm": 2.419783676724355, "learning_rate": 6.595301438499106e-07, "loss": 0.3221, "step": 23860 }, { "epoch": 0.4147647273548993, "grad_norm": 2.63927018145607, "learning_rate": 6.595034655017364e-07, "loss": 0.2693, "step": 23861 }, { "epoch": 0.4147821098924021, "grad_norm": 1.4674892118431293, "learning_rate": 6.594767866480095e-07, "loss": 0.2565, "step": 23862 }, { "epoch": 0.41479949242990494, "grad_norm": 1.3103824087476523, "learning_rate": 6.594501072888147e-07, "loss": 0.1896, "step": 23863 }, { "epoch": 0.41481687496740777, "grad_norm": 2.0113993140972677, "learning_rate": 6.59423427424236e-07, "loss": 0.3152, "step": 23864 }, { "epoch": 0.41483425750491054, "grad_norm": 1.0186124643377346, "learning_rate": 6.593967470543584e-07, "loss": 0.1416, "step": 23865 }, { "epoch": 0.41485164004241337, "grad_norm": 1.8824303576995745, "learning_rate": 6.593700661792663e-07, "loss": 0.188, "step": 23866 }, { "epoch": 0.4148690225799162, "grad_norm": 0.9231801249843145, "learning_rate": 6.593433847990443e-07, "loss": 0.1669, "step": 23867 }, { "epoch": 0.41488640511741903, "grad_norm": 1.484107319589595, "learning_rate": 6.593167029137769e-07, "loss": 0.1662, "step": 23868 }, { "epoch": 0.41490378765492186, "grad_norm": 1.5859418204795976, "learning_rate": 6.592900205235487e-07, "loss": 0.3163, "step": 23869 }, { "epoch": 0.4149211701924247, "grad_norm": 2.118991312725819, "learning_rate": 6.592633376284445e-07, "loss": 0.4979, "step": 23870 }, { "epoch": 0.4149385527299275, "grad_norm": 2.167598873332712, "learning_rate": 6.592366542285487e-07, "loss": 0.3943, "step": 23871 }, { "epoch": 0.41495593526743035, "grad_norm": 1.1796181241973431, "learning_rate": 6.592099703239457e-07, "loss": 0.2542, "step": 23872 }, { "epoch": 0.4149733178049332, "grad_norm": 2.1825981316767593, "learning_rate": 6.591832859147202e-07, "loss": 0.3108, "step": 23873 }, { "epoch": 0.414990700342436, "grad_norm": 1.5801807503355831, "learning_rate": 6.591566010009569e-07, "loss": 0.2199, "step": 23874 }, { "epoch": 0.4150080828799388, "grad_norm": 1.2292987348630495, "learning_rate": 6.591299155827403e-07, "loss": 0.3017, "step": 23875 }, { "epoch": 0.4150254654174416, "grad_norm": 1.5699405938365192, "learning_rate": 6.59103229660155e-07, "loss": 0.2091, "step": 23876 }, { "epoch": 0.41504284795494445, "grad_norm": 1.1728292479980713, "learning_rate": 6.590765432332855e-07, "loss": 0.2063, "step": 23877 }, { "epoch": 0.4150602304924473, "grad_norm": 1.2152931439714794, "learning_rate": 6.590498563022163e-07, "loss": 0.3161, "step": 23878 }, { "epoch": 0.4150776130299501, "grad_norm": 1.6951511862879167, "learning_rate": 6.590231688670323e-07, "loss": 0.2026, "step": 23879 }, { "epoch": 0.41509499556745294, "grad_norm": 4.305545466061261, "learning_rate": 6.589964809278178e-07, "loss": 0.3962, "step": 23880 }, { "epoch": 0.41511237810495577, "grad_norm": 2.6233871630119854, "learning_rate": 6.589697924846575e-07, "loss": 0.2943, "step": 23881 }, { "epoch": 0.4151297606424586, "grad_norm": 1.8751666958678173, "learning_rate": 6.589431035376361e-07, "loss": 0.3603, "step": 23882 }, { "epoch": 0.41514714317996143, "grad_norm": 2.0970048992802095, "learning_rate": 6.589164140868378e-07, "loss": 0.2077, "step": 23883 }, { "epoch": 0.41516452571746426, "grad_norm": 1.4421480307664776, "learning_rate": 6.588897241323478e-07, "loss": 0.2462, "step": 23884 }, { "epoch": 0.41518190825496704, "grad_norm": 1.363920828405145, "learning_rate": 6.588630336742502e-07, "loss": 0.5146, "step": 23885 }, { "epoch": 0.41519929079246987, "grad_norm": 1.5423287870558875, "learning_rate": 6.588363427126297e-07, "loss": 0.2175, "step": 23886 }, { "epoch": 0.4152166733299727, "grad_norm": 2.8327933208002816, "learning_rate": 6.58809651247571e-07, "loss": 0.2884, "step": 23887 }, { "epoch": 0.4152340558674755, "grad_norm": 1.4522188359341281, "learning_rate": 6.587829592791588e-07, "loss": 0.3521, "step": 23888 }, { "epoch": 0.41525143840497836, "grad_norm": 1.4689453959912306, "learning_rate": 6.587562668074775e-07, "loss": 0.2693, "step": 23889 }, { "epoch": 0.4152688209424812, "grad_norm": 1.9502500287318156, "learning_rate": 6.587295738326116e-07, "loss": 0.2894, "step": 23890 }, { "epoch": 0.415286203479984, "grad_norm": 0.9964490806370668, "learning_rate": 6.587028803546459e-07, "loss": 0.2342, "step": 23891 }, { "epoch": 0.41530358601748685, "grad_norm": 2.050368530990478, "learning_rate": 6.586761863736651e-07, "loss": 0.3138, "step": 23892 }, { "epoch": 0.4153209685549897, "grad_norm": 1.650741724446755, "learning_rate": 6.586494918897536e-07, "loss": 0.1665, "step": 23893 }, { "epoch": 0.4153383510924925, "grad_norm": 1.658951026478543, "learning_rate": 6.586227969029961e-07, "loss": 0.3087, "step": 23894 }, { "epoch": 0.4153557336299953, "grad_norm": 2.469253847738165, "learning_rate": 6.585961014134772e-07, "loss": 0.224, "step": 23895 }, { "epoch": 0.4153731161674981, "grad_norm": 1.5801585623944845, "learning_rate": 6.585694054212816e-07, "loss": 0.2743, "step": 23896 }, { "epoch": 0.41539049870500094, "grad_norm": 1.4304639922813622, "learning_rate": 6.585427089264937e-07, "loss": 0.1789, "step": 23897 }, { "epoch": 0.4154078812425038, "grad_norm": 1.5822562308378505, "learning_rate": 6.585160119291983e-07, "loss": 0.2395, "step": 23898 }, { "epoch": 0.4154252637800066, "grad_norm": 1.4798224469107, "learning_rate": 6.584893144294798e-07, "loss": 0.2089, "step": 23899 }, { "epoch": 0.41544264631750943, "grad_norm": 1.9160235197772635, "learning_rate": 6.584626164274232e-07, "loss": 0.2188, "step": 23900 }, { "epoch": 0.41546002885501226, "grad_norm": 1.7792438806858137, "learning_rate": 6.584359179231129e-07, "loss": 0.2492, "step": 23901 }, { "epoch": 0.4154774113925151, "grad_norm": 1.2076978143037262, "learning_rate": 6.584092189166333e-07, "loss": 0.1854, "step": 23902 }, { "epoch": 0.4154947939300179, "grad_norm": 1.4817406323793172, "learning_rate": 6.583825194080695e-07, "loss": 0.3136, "step": 23903 }, { "epoch": 0.41551217646752076, "grad_norm": 1.6612611608088073, "learning_rate": 6.583558193975057e-07, "loss": 0.4049, "step": 23904 }, { "epoch": 0.41552955900502353, "grad_norm": 2.4447715680404682, "learning_rate": 6.583291188850269e-07, "loss": 0.2152, "step": 23905 }, { "epoch": 0.41554694154252636, "grad_norm": 1.3743519946721456, "learning_rate": 6.583024178707171e-07, "loss": 0.1408, "step": 23906 }, { "epoch": 0.4155643240800292, "grad_norm": 1.4459903641205234, "learning_rate": 6.582757163546618e-07, "loss": 0.1474, "step": 23907 }, { "epoch": 0.415581706617532, "grad_norm": 1.7327288089527084, "learning_rate": 6.58249014336945e-07, "loss": 0.1605, "step": 23908 }, { "epoch": 0.41559908915503485, "grad_norm": 1.1318580947987467, "learning_rate": 6.582223118176515e-07, "loss": 0.1039, "step": 23909 }, { "epoch": 0.4156164716925377, "grad_norm": 1.5176064577304107, "learning_rate": 6.58195608796866e-07, "loss": 0.1842, "step": 23910 }, { "epoch": 0.4156338542300405, "grad_norm": 1.872219830716592, "learning_rate": 6.581689052746731e-07, "loss": 0.231, "step": 23911 }, { "epoch": 0.41565123676754334, "grad_norm": 2.2795047638900727, "learning_rate": 6.581422012511573e-07, "loss": 0.452, "step": 23912 }, { "epoch": 0.41566861930504617, "grad_norm": 1.5076585344498568, "learning_rate": 6.581154967264036e-07, "loss": 0.3729, "step": 23913 }, { "epoch": 0.415686001842549, "grad_norm": 1.0621838328401854, "learning_rate": 6.580887917004961e-07, "loss": 0.1016, "step": 23914 }, { "epoch": 0.4157033843800518, "grad_norm": 0.8934719757616689, "learning_rate": 6.5806208617352e-07, "loss": 0.17, "step": 23915 }, { "epoch": 0.4157207669175546, "grad_norm": 3.8195859570647555, "learning_rate": 6.580353801455595e-07, "loss": 0.1884, "step": 23916 }, { "epoch": 0.41573814945505744, "grad_norm": 1.1282173537317508, "learning_rate": 6.580086736166998e-07, "loss": 0.2086, "step": 23917 }, { "epoch": 0.41575553199256027, "grad_norm": 1.8335195896934424, "learning_rate": 6.579819665870248e-07, "loss": 0.2595, "step": 23918 }, { "epoch": 0.4157729145300631, "grad_norm": 1.428137177881423, "learning_rate": 6.579552590566197e-07, "loss": 0.2023, "step": 23919 }, { "epoch": 0.41579029706756593, "grad_norm": 1.3357102395031242, "learning_rate": 6.57928551025569e-07, "loss": 0.1943, "step": 23920 }, { "epoch": 0.41580767960506876, "grad_norm": 1.4251329581589622, "learning_rate": 6.579018424939573e-07, "loss": 0.3874, "step": 23921 }, { "epoch": 0.4158250621425716, "grad_norm": 2.2806808418511055, "learning_rate": 6.578751334618692e-07, "loss": 0.3578, "step": 23922 }, { "epoch": 0.4158424446800744, "grad_norm": 2.931809369093822, "learning_rate": 6.578484239293896e-07, "loss": 0.3767, "step": 23923 }, { "epoch": 0.41585982721757725, "grad_norm": 1.8017114426621081, "learning_rate": 6.578217138966029e-07, "loss": 0.2702, "step": 23924 }, { "epoch": 0.41587720975508, "grad_norm": 1.6353408722468503, "learning_rate": 6.577950033635939e-07, "loss": 0.1475, "step": 23925 }, { "epoch": 0.41589459229258285, "grad_norm": 1.6376225505899378, "learning_rate": 6.577682923304474e-07, "loss": 0.3069, "step": 23926 }, { "epoch": 0.4159119748300857, "grad_norm": 2.086338489294833, "learning_rate": 6.577415807972478e-07, "loss": 0.3521, "step": 23927 }, { "epoch": 0.4159293573675885, "grad_norm": 1.403018058321958, "learning_rate": 6.577148687640796e-07, "loss": 0.293, "step": 23928 }, { "epoch": 0.41594673990509134, "grad_norm": 1.5322517839933827, "learning_rate": 6.576881562310279e-07, "loss": 0.3838, "step": 23929 }, { "epoch": 0.4159641224425942, "grad_norm": 2.7349419838264875, "learning_rate": 6.576614431981773e-07, "loss": 0.3859, "step": 23930 }, { "epoch": 0.415981504980097, "grad_norm": 1.7926176594613226, "learning_rate": 6.576347296656121e-07, "loss": 0.1802, "step": 23931 }, { "epoch": 0.41599888751759984, "grad_norm": 1.1870676576478743, "learning_rate": 6.576080156334174e-07, "loss": 0.3766, "step": 23932 }, { "epoch": 0.41601627005510267, "grad_norm": 1.075606701535423, "learning_rate": 6.575813011016775e-07, "loss": 0.2153, "step": 23933 }, { "epoch": 0.4160336525926055, "grad_norm": 1.3646872230977847, "learning_rate": 6.575545860704775e-07, "loss": 0.303, "step": 23934 }, { "epoch": 0.41605103513010827, "grad_norm": 1.0790039857341498, "learning_rate": 6.575278705399017e-07, "loss": 0.3279, "step": 23935 }, { "epoch": 0.4160684176676111, "grad_norm": 1.4955392638592477, "learning_rate": 6.575011545100351e-07, "loss": 0.28, "step": 23936 }, { "epoch": 0.41608580020511393, "grad_norm": 1.796879767081114, "learning_rate": 6.574744379809619e-07, "loss": 0.2237, "step": 23937 }, { "epoch": 0.41610318274261676, "grad_norm": 1.7641750621754957, "learning_rate": 6.574477209527672e-07, "loss": 0.2269, "step": 23938 }, { "epoch": 0.4161205652801196, "grad_norm": 1.5339983721603767, "learning_rate": 6.574210034255355e-07, "loss": 0.2062, "step": 23939 }, { "epoch": 0.4161379478176224, "grad_norm": 1.1279169833928582, "learning_rate": 6.573942853993518e-07, "loss": 0.21, "step": 23940 }, { "epoch": 0.41615533035512525, "grad_norm": 2.5801315418543598, "learning_rate": 6.573675668743e-07, "loss": 0.3352, "step": 23941 }, { "epoch": 0.4161727128926281, "grad_norm": 1.5618082306105476, "learning_rate": 6.573408478504658e-07, "loss": 0.1749, "step": 23942 }, { "epoch": 0.4161900954301309, "grad_norm": 3.9308945798105226, "learning_rate": 6.573141283279331e-07, "loss": 0.563, "step": 23943 }, { "epoch": 0.4162074779676337, "grad_norm": 0.9315878237676164, "learning_rate": 6.57287408306787e-07, "loss": 0.371, "step": 23944 }, { "epoch": 0.4162248605051365, "grad_norm": 1.5429798307363998, "learning_rate": 6.572606877871121e-07, "loss": 0.1838, "step": 23945 }, { "epoch": 0.41624224304263935, "grad_norm": 1.0602637458842499, "learning_rate": 6.57233966768993e-07, "loss": 0.1516, "step": 23946 }, { "epoch": 0.4162596255801422, "grad_norm": 2.1062879877873026, "learning_rate": 6.572072452525143e-07, "loss": 0.5036, "step": 23947 }, { "epoch": 0.416277008117645, "grad_norm": 1.1398265326873531, "learning_rate": 6.571805232377612e-07, "loss": 0.2614, "step": 23948 }, { "epoch": 0.41629439065514784, "grad_norm": 2.5409112840238626, "learning_rate": 6.571538007248178e-07, "loss": 0.3854, "step": 23949 }, { "epoch": 0.41631177319265067, "grad_norm": 1.8486480835029195, "learning_rate": 6.571270777137689e-07, "loss": 0.2472, "step": 23950 }, { "epoch": 0.4163291557301535, "grad_norm": 1.5412138982332046, "learning_rate": 6.571003542046996e-07, "loss": 0.2085, "step": 23951 }, { "epoch": 0.41634653826765633, "grad_norm": 2.2453480503323786, "learning_rate": 6.570736301976944e-07, "loss": 0.4036, "step": 23952 }, { "epoch": 0.41636392080515916, "grad_norm": 1.2027892672084188, "learning_rate": 6.570469056928377e-07, "loss": 0.1529, "step": 23953 }, { "epoch": 0.41638130334266193, "grad_norm": 2.3271754441145363, "learning_rate": 6.570201806902146e-07, "loss": 0.3566, "step": 23954 }, { "epoch": 0.41639868588016477, "grad_norm": 1.3998770298596945, "learning_rate": 6.569934551899097e-07, "loss": 0.3053, "step": 23955 }, { "epoch": 0.4164160684176676, "grad_norm": 1.2913797444656439, "learning_rate": 6.569667291920076e-07, "loss": 0.1602, "step": 23956 }, { "epoch": 0.4164334509551704, "grad_norm": 1.8465764529733715, "learning_rate": 6.569400026965929e-07, "loss": 0.2312, "step": 23957 }, { "epoch": 0.41645083349267326, "grad_norm": 1.6363104246377744, "learning_rate": 6.569132757037506e-07, "loss": 0.2866, "step": 23958 }, { "epoch": 0.4164682160301761, "grad_norm": 1.9024556814297793, "learning_rate": 6.568865482135655e-07, "loss": 0.2193, "step": 23959 }, { "epoch": 0.4164855985676789, "grad_norm": 1.94137478870335, "learning_rate": 6.56859820226122e-07, "loss": 0.2693, "step": 23960 }, { "epoch": 0.41650298110518175, "grad_norm": 1.4766196904178754, "learning_rate": 6.568330917415048e-07, "loss": 0.1741, "step": 23961 }, { "epoch": 0.4165203636426846, "grad_norm": 1.5772925578546333, "learning_rate": 6.568063627597987e-07, "loss": 0.2115, "step": 23962 }, { "epoch": 0.4165377461801874, "grad_norm": 1.1150600693839496, "learning_rate": 6.567796332810887e-07, "loss": 0.3238, "step": 23963 }, { "epoch": 0.4165551287176902, "grad_norm": 1.8778212276308701, "learning_rate": 6.567529033054591e-07, "loss": 0.2405, "step": 23964 }, { "epoch": 0.416572511255193, "grad_norm": 1.6809402185553357, "learning_rate": 6.567261728329949e-07, "loss": 0.3479, "step": 23965 }, { "epoch": 0.41658989379269584, "grad_norm": 2.6331981742975814, "learning_rate": 6.566994418637806e-07, "loss": 0.3024, "step": 23966 }, { "epoch": 0.4166072763301987, "grad_norm": 1.4396913662381294, "learning_rate": 6.566727103979012e-07, "loss": 0.3428, "step": 23967 }, { "epoch": 0.4166246588677015, "grad_norm": 1.168794194863943, "learning_rate": 6.566459784354412e-07, "loss": 0.2781, "step": 23968 }, { "epoch": 0.41664204140520433, "grad_norm": 2.5375978316259555, "learning_rate": 6.566192459764854e-07, "loss": 0.4403, "step": 23969 }, { "epoch": 0.41665942394270716, "grad_norm": 1.7383625708698702, "learning_rate": 6.565925130211186e-07, "loss": 0.1814, "step": 23970 }, { "epoch": 0.41667680648021, "grad_norm": 1.5311887650268186, "learning_rate": 6.565657795694255e-07, "loss": 0.1435, "step": 23971 }, { "epoch": 0.4166941890177128, "grad_norm": 1.4599839652181876, "learning_rate": 6.565390456214906e-07, "loss": 0.2667, "step": 23972 }, { "epoch": 0.41671157155521565, "grad_norm": 2.5831864523093024, "learning_rate": 6.565123111773989e-07, "loss": 0.2784, "step": 23973 }, { "epoch": 0.41672895409271843, "grad_norm": 1.482708578815297, "learning_rate": 6.564855762372353e-07, "loss": 0.2544, "step": 23974 }, { "epoch": 0.41674633663022126, "grad_norm": 1.6184684455479734, "learning_rate": 6.56458840801084e-07, "loss": 0.4661, "step": 23975 }, { "epoch": 0.4167637191677241, "grad_norm": 1.1539737160581314, "learning_rate": 6.564321048690303e-07, "loss": 0.5176, "step": 23976 }, { "epoch": 0.4167811017052269, "grad_norm": 1.9255840486037086, "learning_rate": 6.564053684411585e-07, "loss": 0.2863, "step": 23977 }, { "epoch": 0.41679848424272975, "grad_norm": 1.5090395628717295, "learning_rate": 6.563786315175536e-07, "loss": 0.2395, "step": 23978 }, { "epoch": 0.4168158667802326, "grad_norm": 1.479477990940007, "learning_rate": 6.563518940983003e-07, "loss": 0.2105, "step": 23979 }, { "epoch": 0.4168332493177354, "grad_norm": 1.9002090871983492, "learning_rate": 6.563251561834833e-07, "loss": 0.3412, "step": 23980 }, { "epoch": 0.41685063185523824, "grad_norm": 2.508702335380885, "learning_rate": 6.562984177731873e-07, "loss": 0.3653, "step": 23981 }, { "epoch": 0.41686801439274107, "grad_norm": 1.261791492435298, "learning_rate": 6.562716788674971e-07, "loss": 0.3144, "step": 23982 }, { "epoch": 0.4168853969302439, "grad_norm": 3.370633387939071, "learning_rate": 6.562449394664975e-07, "loss": 0.3733, "step": 23983 }, { "epoch": 0.4169027794677467, "grad_norm": 2.554864889718467, "learning_rate": 6.562181995702733e-07, "loss": 0.3266, "step": 23984 }, { "epoch": 0.4169201620052495, "grad_norm": 1.2146404291953883, "learning_rate": 6.561914591789091e-07, "loss": 0.2087, "step": 23985 }, { "epoch": 0.41693754454275234, "grad_norm": 1.225024530232701, "learning_rate": 6.561647182924898e-07, "loss": 0.2779, "step": 23986 }, { "epoch": 0.41695492708025517, "grad_norm": 2.2465074038864334, "learning_rate": 6.561379769110999e-07, "loss": 0.2355, "step": 23987 }, { "epoch": 0.416972309617758, "grad_norm": 1.2285682850155006, "learning_rate": 6.561112350348244e-07, "loss": 0.277, "step": 23988 }, { "epoch": 0.4169896921552608, "grad_norm": 1.546817307740688, "learning_rate": 6.560844926637481e-07, "loss": 0.1922, "step": 23989 }, { "epoch": 0.41700707469276366, "grad_norm": 1.3691024118392325, "learning_rate": 6.560577497979556e-07, "loss": 0.3053, "step": 23990 }, { "epoch": 0.4170244572302665, "grad_norm": 1.7488153657015844, "learning_rate": 6.560310064375318e-07, "loss": 0.2526, "step": 23991 }, { "epoch": 0.4170418397677693, "grad_norm": 1.0675505914416956, "learning_rate": 6.560042625825613e-07, "loss": 0.2111, "step": 23992 }, { "epoch": 0.41705922230527215, "grad_norm": 4.300690872475537, "learning_rate": 6.559775182331289e-07, "loss": 0.2303, "step": 23993 }, { "epoch": 0.4170766048427749, "grad_norm": 3.9251984617111724, "learning_rate": 6.559507733893195e-07, "loss": 0.4359, "step": 23994 }, { "epoch": 0.41709398738027775, "grad_norm": 2.4541840221819884, "learning_rate": 6.559240280512177e-07, "loss": 0.2726, "step": 23995 }, { "epoch": 0.4171113699177806, "grad_norm": 2.1197235440315563, "learning_rate": 6.558972822189085e-07, "loss": 0.2948, "step": 23996 }, { "epoch": 0.4171287524552834, "grad_norm": 1.1270793514025579, "learning_rate": 6.558705358924765e-07, "loss": 0.2146, "step": 23997 }, { "epoch": 0.41714613499278624, "grad_norm": 2.6186293499162954, "learning_rate": 6.558437890720063e-07, "loss": 0.3646, "step": 23998 }, { "epoch": 0.4171635175302891, "grad_norm": 1.6048374802516332, "learning_rate": 6.558170417575831e-07, "loss": 0.2188, "step": 23999 }, { "epoch": 0.4171809000677919, "grad_norm": 1.3323749567569179, "learning_rate": 6.557902939492914e-07, "loss": 0.1949, "step": 24000 }, { "epoch": 0.41719828260529473, "grad_norm": 1.9547197835066012, "learning_rate": 6.557635456472159e-07, "loss": 0.2377, "step": 24001 }, { "epoch": 0.41721566514279756, "grad_norm": 1.425138172548669, "learning_rate": 6.557367968514417e-07, "loss": 0.1636, "step": 24002 }, { "epoch": 0.4172330476803004, "grad_norm": 1.327309280247217, "learning_rate": 6.557100475620534e-07, "loss": 0.2805, "step": 24003 }, { "epoch": 0.41725043021780317, "grad_norm": 1.5664840418331647, "learning_rate": 6.556832977791356e-07, "loss": 0.2733, "step": 24004 }, { "epoch": 0.417267812755306, "grad_norm": 1.5110742047253505, "learning_rate": 6.556565475027734e-07, "loss": 0.2644, "step": 24005 }, { "epoch": 0.41728519529280883, "grad_norm": 2.8895833640390625, "learning_rate": 6.556297967330514e-07, "loss": 0.4074, "step": 24006 }, { "epoch": 0.41730257783031166, "grad_norm": 1.5301084816055497, "learning_rate": 6.556030454700544e-07, "loss": 0.4446, "step": 24007 }, { "epoch": 0.4173199603678145, "grad_norm": 1.5053719642842813, "learning_rate": 6.555762937138674e-07, "loss": 0.2912, "step": 24008 }, { "epoch": 0.4173373429053173, "grad_norm": 1.4224379243749528, "learning_rate": 6.555495414645747e-07, "loss": 0.1722, "step": 24009 }, { "epoch": 0.41735472544282015, "grad_norm": 2.1202275289651586, "learning_rate": 6.555227887222616e-07, "loss": 0.3453, "step": 24010 }, { "epoch": 0.417372107980323, "grad_norm": 1.410489703876877, "learning_rate": 6.554960354870128e-07, "loss": 0.1922, "step": 24011 }, { "epoch": 0.4173894905178258, "grad_norm": 1.6792438847154758, "learning_rate": 6.554692817589129e-07, "loss": 0.3428, "step": 24012 }, { "epoch": 0.41740687305532864, "grad_norm": 1.4198885228950715, "learning_rate": 6.554425275380467e-07, "loss": 0.2261, "step": 24013 }, { "epoch": 0.4174242555928314, "grad_norm": 4.094380554675608, "learning_rate": 6.554157728244992e-07, "loss": 0.5275, "step": 24014 }, { "epoch": 0.41744163813033425, "grad_norm": 0.8582587676589611, "learning_rate": 6.553890176183551e-07, "loss": 0.1534, "step": 24015 }, { "epoch": 0.4174590206678371, "grad_norm": 1.5116587405430262, "learning_rate": 6.553622619196992e-07, "loss": 0.2465, "step": 24016 }, { "epoch": 0.4174764032053399, "grad_norm": 2.3708915926923546, "learning_rate": 6.553355057286161e-07, "loss": 0.6995, "step": 24017 }, { "epoch": 0.41749378574284274, "grad_norm": 1.522308409169985, "learning_rate": 6.55308749045191e-07, "loss": 0.3815, "step": 24018 }, { "epoch": 0.41751116828034557, "grad_norm": 2.228522935688658, "learning_rate": 6.552819918695085e-07, "loss": 0.347, "step": 24019 }, { "epoch": 0.4175285508178484, "grad_norm": 1.1978657066382483, "learning_rate": 6.552552342016533e-07, "loss": 0.2154, "step": 24020 }, { "epoch": 0.41754593335535123, "grad_norm": 1.22975310724228, "learning_rate": 6.552284760417105e-07, "loss": 0.1858, "step": 24021 }, { "epoch": 0.41756331589285406, "grad_norm": 1.1614275399398932, "learning_rate": 6.552017173897645e-07, "loss": 0.366, "step": 24022 }, { "epoch": 0.4175806984303569, "grad_norm": 1.908636898953158, "learning_rate": 6.551749582459005e-07, "loss": 0.2838, "step": 24023 }, { "epoch": 0.41759808096785966, "grad_norm": 2.079327009747381, "learning_rate": 6.551481986102032e-07, "loss": 0.2743, "step": 24024 }, { "epoch": 0.4176154635053625, "grad_norm": 1.767482363750194, "learning_rate": 6.551214384827574e-07, "loss": 0.3009, "step": 24025 }, { "epoch": 0.4176328460428653, "grad_norm": 1.4068028107549577, "learning_rate": 6.550946778636477e-07, "loss": 0.3234, "step": 24026 }, { "epoch": 0.41765022858036815, "grad_norm": 1.2988570258261727, "learning_rate": 6.550679167529593e-07, "loss": 0.2232, "step": 24027 }, { "epoch": 0.417667611117871, "grad_norm": 1.986535535458339, "learning_rate": 6.550411551507768e-07, "loss": 0.3445, "step": 24028 }, { "epoch": 0.4176849936553738, "grad_norm": 1.2546300188590616, "learning_rate": 6.550143930571851e-07, "loss": 0.1641, "step": 24029 }, { "epoch": 0.41770237619287665, "grad_norm": 2.3308773965022622, "learning_rate": 6.549876304722689e-07, "loss": 0.3153, "step": 24030 }, { "epoch": 0.4177197587303795, "grad_norm": 1.5422512752585273, "learning_rate": 6.549608673961132e-07, "loss": 0.1834, "step": 24031 }, { "epoch": 0.4177371412678823, "grad_norm": 1.248256110665168, "learning_rate": 6.549341038288026e-07, "loss": 0.1903, "step": 24032 }, { "epoch": 0.41775452380538514, "grad_norm": 1.9996892756534992, "learning_rate": 6.549073397704221e-07, "loss": 0.2896, "step": 24033 }, { "epoch": 0.4177719063428879, "grad_norm": 1.345169470239571, "learning_rate": 6.548805752210565e-07, "loss": 0.3616, "step": 24034 }, { "epoch": 0.41778928888039074, "grad_norm": 1.9615094349575297, "learning_rate": 6.548538101807906e-07, "loss": 0.3385, "step": 24035 }, { "epoch": 0.41780667141789357, "grad_norm": 1.2214629460902513, "learning_rate": 6.548270446497092e-07, "loss": 0.2237, "step": 24036 }, { "epoch": 0.4178240539553964, "grad_norm": 1.7134669108996055, "learning_rate": 6.548002786278974e-07, "loss": 0.2054, "step": 24037 }, { "epoch": 0.41784143649289923, "grad_norm": 2.3812583898591293, "learning_rate": 6.547735121154397e-07, "loss": 0.4306, "step": 24038 }, { "epoch": 0.41785881903040206, "grad_norm": 1.8815517957793628, "learning_rate": 6.547467451124211e-07, "loss": 0.2598, "step": 24039 }, { "epoch": 0.4178762015679049, "grad_norm": 1.544934567382538, "learning_rate": 6.547199776189263e-07, "loss": 0.2569, "step": 24040 }, { "epoch": 0.4178935841054077, "grad_norm": 1.101339443697221, "learning_rate": 6.546932096350405e-07, "loss": 0.3156, "step": 24041 }, { "epoch": 0.41791096664291055, "grad_norm": 1.3438245429832951, "learning_rate": 6.54666441160848e-07, "loss": 0.1826, "step": 24042 }, { "epoch": 0.4179283491804134, "grad_norm": 1.3617302085754046, "learning_rate": 6.54639672196434e-07, "loss": 0.226, "step": 24043 }, { "epoch": 0.41794573171791616, "grad_norm": 1.5642043066055218, "learning_rate": 6.546129027418834e-07, "loss": 0.2779, "step": 24044 }, { "epoch": 0.417963114255419, "grad_norm": 1.850064318747663, "learning_rate": 6.545861327972808e-07, "loss": 0.3565, "step": 24045 }, { "epoch": 0.4179804967929218, "grad_norm": 2.8149387600222324, "learning_rate": 6.545593623627113e-07, "loss": 0.5286, "step": 24046 }, { "epoch": 0.41799787933042465, "grad_norm": 1.4889478165736132, "learning_rate": 6.545325914382593e-07, "loss": 0.2144, "step": 24047 }, { "epoch": 0.4180152618679275, "grad_norm": 1.2248353150968894, "learning_rate": 6.545058200240103e-07, "loss": 0.3849, "step": 24048 }, { "epoch": 0.4180326444054303, "grad_norm": 3.0452169706939567, "learning_rate": 6.544790481200488e-07, "loss": 0.3078, "step": 24049 }, { "epoch": 0.41805002694293314, "grad_norm": 3.108612207779283, "learning_rate": 6.544522757264595e-07, "loss": 0.413, "step": 24050 }, { "epoch": 0.41806740948043597, "grad_norm": 2.031919747840985, "learning_rate": 6.544255028433275e-07, "loss": 0.474, "step": 24051 }, { "epoch": 0.4180847920179388, "grad_norm": 2.119307659632333, "learning_rate": 6.543987294707377e-07, "loss": 0.2736, "step": 24052 }, { "epoch": 0.41810217455544163, "grad_norm": 4.917279494836371, "learning_rate": 6.543719556087748e-07, "loss": 0.2465, "step": 24053 }, { "epoch": 0.4181195570929444, "grad_norm": 1.2404728134696117, "learning_rate": 6.543451812575237e-07, "loss": 0.2357, "step": 24054 }, { "epoch": 0.41813693963044724, "grad_norm": 2.454055336225717, "learning_rate": 6.543184064170692e-07, "loss": 0.3001, "step": 24055 }, { "epoch": 0.41815432216795007, "grad_norm": 4.348278102897182, "learning_rate": 6.542916310874964e-07, "loss": 0.3929, "step": 24056 }, { "epoch": 0.4181717047054529, "grad_norm": 1.3178396602218125, "learning_rate": 6.542648552688899e-07, "loss": 0.1681, "step": 24057 }, { "epoch": 0.4181890872429557, "grad_norm": 1.3800748372867138, "learning_rate": 6.542380789613346e-07, "loss": 0.363, "step": 24058 }, { "epoch": 0.41820646978045856, "grad_norm": 2.0185700441509495, "learning_rate": 6.542113021649156e-07, "loss": 0.2397, "step": 24059 }, { "epoch": 0.4182238523179614, "grad_norm": 1.6471970331622918, "learning_rate": 6.541845248797175e-07, "loss": 0.2424, "step": 24060 }, { "epoch": 0.4182412348554642, "grad_norm": 2.0249816455626473, "learning_rate": 6.541577471058253e-07, "loss": 0.2202, "step": 24061 }, { "epoch": 0.41825861739296705, "grad_norm": 3.3239662666560217, "learning_rate": 6.54130968843324e-07, "loss": 0.4418, "step": 24062 }, { "epoch": 0.4182759999304699, "grad_norm": 2.13520128555239, "learning_rate": 6.541041900922981e-07, "loss": 0.3751, "step": 24063 }, { "epoch": 0.41829338246797265, "grad_norm": 2.378301065502975, "learning_rate": 6.540774108528328e-07, "loss": 0.4294, "step": 24064 }, { "epoch": 0.4183107650054755, "grad_norm": 1.9091196370866779, "learning_rate": 6.54050631125013e-07, "loss": 0.2865, "step": 24065 }, { "epoch": 0.4183281475429783, "grad_norm": 1.9880907472084988, "learning_rate": 6.540238509089233e-07, "loss": 0.2966, "step": 24066 }, { "epoch": 0.41834553008048114, "grad_norm": 2.7505175310262113, "learning_rate": 6.539970702046487e-07, "loss": 0.3321, "step": 24067 }, { "epoch": 0.418362912617984, "grad_norm": 2.040615929183673, "learning_rate": 6.539702890122745e-07, "loss": 0.3734, "step": 24068 }, { "epoch": 0.4183802951554868, "grad_norm": 2.302870796386575, "learning_rate": 6.539435073318848e-07, "loss": 0.2571, "step": 24069 }, { "epoch": 0.41839767769298963, "grad_norm": 1.7078099868324608, "learning_rate": 6.539167251635651e-07, "loss": 0.1889, "step": 24070 }, { "epoch": 0.41841506023049246, "grad_norm": 2.3087153509899503, "learning_rate": 6.538899425074e-07, "loss": 0.1764, "step": 24071 }, { "epoch": 0.4184324427679953, "grad_norm": 1.4845862258014484, "learning_rate": 6.538631593634745e-07, "loss": 0.3786, "step": 24072 }, { "epoch": 0.4184498253054981, "grad_norm": 1.6854678627858577, "learning_rate": 6.538363757318734e-07, "loss": 0.2123, "step": 24073 }, { "epoch": 0.4184672078430009, "grad_norm": 1.5122999307072362, "learning_rate": 6.538095916126817e-07, "loss": 0.2658, "step": 24074 }, { "epoch": 0.41848459038050373, "grad_norm": 1.1951702022982382, "learning_rate": 6.537828070059844e-07, "loss": 0.1994, "step": 24075 }, { "epoch": 0.41850197291800656, "grad_norm": 2.108126015260258, "learning_rate": 6.53756021911866e-07, "loss": 0.2673, "step": 24076 }, { "epoch": 0.4185193554555094, "grad_norm": 1.8462158607768078, "learning_rate": 6.537292363304118e-07, "loss": 0.2317, "step": 24077 }, { "epoch": 0.4185367379930122, "grad_norm": 5.725123095511023, "learning_rate": 6.537024502617066e-07, "loss": 0.3598, "step": 24078 }, { "epoch": 0.41855412053051505, "grad_norm": 2.7331861146297958, "learning_rate": 6.536756637058352e-07, "loss": 0.3488, "step": 24079 }, { "epoch": 0.4185715030680179, "grad_norm": 1.081603280414719, "learning_rate": 6.536488766628823e-07, "loss": 0.2722, "step": 24080 }, { "epoch": 0.4185888856055207, "grad_norm": 1.9730374132124024, "learning_rate": 6.536220891329334e-07, "loss": 0.2011, "step": 24081 }, { "epoch": 0.41860626814302354, "grad_norm": 1.2600933412348045, "learning_rate": 6.535953011160726e-07, "loss": 0.1459, "step": 24082 }, { "epoch": 0.4186236506805263, "grad_norm": 1.1173803750580096, "learning_rate": 6.535685126123857e-07, "loss": 0.2223, "step": 24083 }, { "epoch": 0.41864103321802915, "grad_norm": 1.3698513701689718, "learning_rate": 6.535417236219569e-07, "loss": 0.2657, "step": 24084 }, { "epoch": 0.418658415755532, "grad_norm": 1.2801852364405732, "learning_rate": 6.535149341448715e-07, "loss": 0.1523, "step": 24085 }, { "epoch": 0.4186757982930348, "grad_norm": 2.7732491054913933, "learning_rate": 6.534881441812141e-07, "loss": 0.2557, "step": 24086 }, { "epoch": 0.41869318083053764, "grad_norm": 2.2345751314432234, "learning_rate": 6.534613537310701e-07, "loss": 0.2656, "step": 24087 }, { "epoch": 0.41871056336804047, "grad_norm": 1.496168060247896, "learning_rate": 6.534345627945238e-07, "loss": 0.2615, "step": 24088 }, { "epoch": 0.4187279459055433, "grad_norm": 2.395312081404994, "learning_rate": 6.534077713716606e-07, "loss": 0.2794, "step": 24089 }, { "epoch": 0.4187453284430461, "grad_norm": 1.0422533060150247, "learning_rate": 6.533809794625651e-07, "loss": 0.3799, "step": 24090 }, { "epoch": 0.41876271098054896, "grad_norm": 1.7626985126908574, "learning_rate": 6.533541870673225e-07, "loss": 0.216, "step": 24091 }, { "epoch": 0.4187800935180518, "grad_norm": 1.2889576031379182, "learning_rate": 6.533273941860173e-07, "loss": 0.2034, "step": 24092 }, { "epoch": 0.41879747605555456, "grad_norm": 1.3983946672442393, "learning_rate": 6.53300600818735e-07, "loss": 0.1874, "step": 24093 }, { "epoch": 0.4188148585930574, "grad_norm": 1.6766831676483223, "learning_rate": 6.532738069655601e-07, "loss": 0.2333, "step": 24094 }, { "epoch": 0.4188322411305602, "grad_norm": 1.399016117613664, "learning_rate": 6.532470126265777e-07, "loss": 0.3904, "step": 24095 }, { "epoch": 0.41884962366806305, "grad_norm": 1.7174357743762474, "learning_rate": 6.532202178018724e-07, "loss": 0.3233, "step": 24096 }, { "epoch": 0.4188670062055659, "grad_norm": 2.202647060630428, "learning_rate": 6.531934224915297e-07, "loss": 0.3863, "step": 24097 }, { "epoch": 0.4188843887430687, "grad_norm": 3.4326233493367404, "learning_rate": 6.531666266956341e-07, "loss": 0.4648, "step": 24098 }, { "epoch": 0.41890177128057154, "grad_norm": 5.647044161306445, "learning_rate": 6.531398304142707e-07, "loss": 0.3807, "step": 24099 }, { "epoch": 0.4189191538180744, "grad_norm": 2.8855911967316783, "learning_rate": 6.531130336475244e-07, "loss": 0.499, "step": 24100 }, { "epoch": 0.4189365363555772, "grad_norm": 1.1111976427614891, "learning_rate": 6.530862363954801e-07, "loss": 0.2267, "step": 24101 }, { "epoch": 0.41895391889308004, "grad_norm": 2.051581074676864, "learning_rate": 6.530594386582225e-07, "loss": 0.2607, "step": 24102 }, { "epoch": 0.4189713014305828, "grad_norm": 1.3581560282564376, "learning_rate": 6.530326404358371e-07, "loss": 0.2076, "step": 24103 }, { "epoch": 0.41898868396808564, "grad_norm": 1.7617341892986633, "learning_rate": 6.530058417284085e-07, "loss": 0.2549, "step": 24104 }, { "epoch": 0.41900606650558847, "grad_norm": 1.0534197999612072, "learning_rate": 6.529790425360215e-07, "loss": 0.3225, "step": 24105 }, { "epoch": 0.4190234490430913, "grad_norm": 1.8684113545375551, "learning_rate": 6.529522428587615e-07, "loss": 0.1971, "step": 24106 }, { "epoch": 0.41904083158059413, "grad_norm": 1.8849738625956074, "learning_rate": 6.529254426967128e-07, "loss": 0.3154, "step": 24107 }, { "epoch": 0.41905821411809696, "grad_norm": 1.071614957224642, "learning_rate": 6.528986420499609e-07, "loss": 0.1718, "step": 24108 }, { "epoch": 0.4190755966555998, "grad_norm": 1.25092569516761, "learning_rate": 6.528718409185904e-07, "loss": 0.2463, "step": 24109 }, { "epoch": 0.4190929791931026, "grad_norm": 2.131038850509023, "learning_rate": 6.528450393026866e-07, "loss": 0.2992, "step": 24110 }, { "epoch": 0.41911036173060545, "grad_norm": 1.388898334287284, "learning_rate": 6.52818237202334e-07, "loss": 0.3753, "step": 24111 }, { "epoch": 0.4191277442681083, "grad_norm": 2.2308074748903435, "learning_rate": 6.52791434617618e-07, "loss": 0.1818, "step": 24112 }, { "epoch": 0.41914512680561106, "grad_norm": 1.738270435865852, "learning_rate": 6.527646315486233e-07, "loss": 0.2319, "step": 24113 }, { "epoch": 0.4191625093431139, "grad_norm": 1.9358208549562645, "learning_rate": 6.527378279954348e-07, "loss": 0.3702, "step": 24114 }, { "epoch": 0.4191798918806167, "grad_norm": 1.4334544969011405, "learning_rate": 6.527110239581375e-07, "loss": 0.1829, "step": 24115 }, { "epoch": 0.41919727441811955, "grad_norm": 3.8184520439478487, "learning_rate": 6.526842194368167e-07, "loss": 0.3298, "step": 24116 }, { "epoch": 0.4192146569556224, "grad_norm": 1.3493692419701493, "learning_rate": 6.526574144315567e-07, "loss": 0.3396, "step": 24117 }, { "epoch": 0.4192320394931252, "grad_norm": 1.5213956702884606, "learning_rate": 6.52630608942443e-07, "loss": 0.2952, "step": 24118 }, { "epoch": 0.41924942203062804, "grad_norm": 1.8052625056301261, "learning_rate": 6.526038029695604e-07, "loss": 0.1828, "step": 24119 }, { "epoch": 0.41926680456813087, "grad_norm": 1.6782272888769765, "learning_rate": 6.52576996512994e-07, "loss": 0.2808, "step": 24120 }, { "epoch": 0.4192841871056337, "grad_norm": 3.6534845524827886, "learning_rate": 6.525501895728281e-07, "loss": 0.2502, "step": 24121 }, { "epoch": 0.41930156964313653, "grad_norm": 1.3597132660043687, "learning_rate": 6.525233821491487e-07, "loss": 0.194, "step": 24122 }, { "epoch": 0.4193189521806393, "grad_norm": 1.3287066465830428, "learning_rate": 6.524965742420399e-07, "loss": 0.2445, "step": 24123 }, { "epoch": 0.41933633471814213, "grad_norm": 2.3005178694547284, "learning_rate": 6.524697658515872e-07, "loss": 0.3394, "step": 24124 }, { "epoch": 0.41935371725564496, "grad_norm": 8.872211032197205, "learning_rate": 6.524429569778754e-07, "loss": 0.3435, "step": 24125 }, { "epoch": 0.4193710997931478, "grad_norm": 1.849938349797167, "learning_rate": 6.524161476209893e-07, "loss": 0.1677, "step": 24126 }, { "epoch": 0.4193884823306506, "grad_norm": 2.8513383766483384, "learning_rate": 6.523893377810141e-07, "loss": 0.3871, "step": 24127 }, { "epoch": 0.41940586486815346, "grad_norm": 1.753775352933946, "learning_rate": 6.523625274580347e-07, "loss": 0.348, "step": 24128 }, { "epoch": 0.4194232474056563, "grad_norm": 2.4623383033278907, "learning_rate": 6.523357166521361e-07, "loss": 0.4246, "step": 24129 }, { "epoch": 0.4194406299431591, "grad_norm": 1.5390979641758007, "learning_rate": 6.523089053634032e-07, "loss": 0.4409, "step": 24130 }, { "epoch": 0.41945801248066195, "grad_norm": 1.6567990176274565, "learning_rate": 6.522820935919211e-07, "loss": 0.3688, "step": 24131 }, { "epoch": 0.4194753950181648, "grad_norm": 1.2181592685350966, "learning_rate": 6.522552813377746e-07, "loss": 0.3543, "step": 24132 }, { "epoch": 0.41949277755566755, "grad_norm": 1.645357071243362, "learning_rate": 6.52228468601049e-07, "loss": 0.3255, "step": 24133 }, { "epoch": 0.4195101600931704, "grad_norm": 1.7501235123833885, "learning_rate": 6.52201655381829e-07, "loss": 0.3234, "step": 24134 }, { "epoch": 0.4195275426306732, "grad_norm": 1.0920499984181227, "learning_rate": 6.521748416801995e-07, "loss": 0.2045, "step": 24135 }, { "epoch": 0.41954492516817604, "grad_norm": 2.411320994338236, "learning_rate": 6.521480274962459e-07, "loss": 0.4457, "step": 24136 }, { "epoch": 0.41956230770567887, "grad_norm": 1.4359285567925721, "learning_rate": 6.521212128300528e-07, "loss": 0.1927, "step": 24137 }, { "epoch": 0.4195796902431817, "grad_norm": 1.7017354288627369, "learning_rate": 6.520943976817053e-07, "loss": 0.2059, "step": 24138 }, { "epoch": 0.41959707278068453, "grad_norm": 2.6674941157123677, "learning_rate": 6.520675820512887e-07, "loss": 0.5614, "step": 24139 }, { "epoch": 0.41961445531818736, "grad_norm": 2.262039387106646, "learning_rate": 6.520407659388873e-07, "loss": 0.3566, "step": 24140 }, { "epoch": 0.4196318378556902, "grad_norm": 1.5206353291209584, "learning_rate": 6.520139493445868e-07, "loss": 0.1739, "step": 24141 }, { "epoch": 0.419649220393193, "grad_norm": 1.6844929912383424, "learning_rate": 6.519871322684718e-07, "loss": 0.2598, "step": 24142 }, { "epoch": 0.4196666029306958, "grad_norm": 2.0659950201901283, "learning_rate": 6.519603147106275e-07, "loss": 0.1871, "step": 24143 }, { "epoch": 0.41968398546819863, "grad_norm": 1.4332722259080397, "learning_rate": 6.519334966711387e-07, "loss": 0.258, "step": 24144 }, { "epoch": 0.41970136800570146, "grad_norm": 1.499349564455838, "learning_rate": 6.519066781500906e-07, "loss": 0.3615, "step": 24145 }, { "epoch": 0.4197187505432043, "grad_norm": 1.9000023343157355, "learning_rate": 6.51879859147568e-07, "loss": 0.3209, "step": 24146 }, { "epoch": 0.4197361330807071, "grad_norm": 1.8237999914670593, "learning_rate": 6.518530396636562e-07, "loss": 0.4252, "step": 24147 }, { "epoch": 0.41975351561820995, "grad_norm": 1.516735303783133, "learning_rate": 6.518262196984398e-07, "loss": 0.2377, "step": 24148 }, { "epoch": 0.4197708981557128, "grad_norm": 1.1985763052146199, "learning_rate": 6.517993992520043e-07, "loss": 0.1883, "step": 24149 }, { "epoch": 0.4197882806932156, "grad_norm": 1.3868407480219316, "learning_rate": 6.517725783244343e-07, "loss": 0.219, "step": 24150 }, { "epoch": 0.41980566323071844, "grad_norm": 1.9337755864844723, "learning_rate": 6.517457569158149e-07, "loss": 0.288, "step": 24151 }, { "epoch": 0.41982304576822127, "grad_norm": 1.4461527513739834, "learning_rate": 6.517189350262311e-07, "loss": 0.3217, "step": 24152 }, { "epoch": 0.41984042830572404, "grad_norm": 1.7390690381558345, "learning_rate": 6.51692112655768e-07, "loss": 0.2263, "step": 24153 }, { "epoch": 0.4198578108432269, "grad_norm": 1.6966136265627962, "learning_rate": 6.516652898045106e-07, "loss": 0.2651, "step": 24154 }, { "epoch": 0.4198751933807297, "grad_norm": 1.5473569761870574, "learning_rate": 6.51638466472544e-07, "loss": 0.614, "step": 24155 }, { "epoch": 0.41989257591823254, "grad_norm": 1.7448822315690222, "learning_rate": 6.516116426599529e-07, "loss": 0.3716, "step": 24156 }, { "epoch": 0.41990995845573537, "grad_norm": 1.8437734913569492, "learning_rate": 6.515848183668229e-07, "loss": 0.2862, "step": 24157 }, { "epoch": 0.4199273409932382, "grad_norm": 2.5024308828128037, "learning_rate": 6.515579935932383e-07, "loss": 0.2419, "step": 24158 }, { "epoch": 0.419944723530741, "grad_norm": 1.9350000004907906, "learning_rate": 6.515311683392847e-07, "loss": 0.2519, "step": 24159 }, { "epoch": 0.41996210606824386, "grad_norm": 1.7832232041643188, "learning_rate": 6.515043426050469e-07, "loss": 0.2783, "step": 24160 }, { "epoch": 0.4199794886057467, "grad_norm": 1.3881460842625553, "learning_rate": 6.514775163906098e-07, "loss": 0.2065, "step": 24161 }, { "epoch": 0.4199968711432495, "grad_norm": 1.6082686125773897, "learning_rate": 6.514506896960585e-07, "loss": 0.2111, "step": 24162 }, { "epoch": 0.4200142536807523, "grad_norm": 1.7230180167951166, "learning_rate": 6.514238625214783e-07, "loss": 0.3268, "step": 24163 }, { "epoch": 0.4200316362182551, "grad_norm": 4.696752963624687, "learning_rate": 6.513970348669538e-07, "loss": 0.3631, "step": 24164 }, { "epoch": 0.42004901875575795, "grad_norm": 1.168851391415623, "learning_rate": 6.513702067325704e-07, "loss": 0.1554, "step": 24165 }, { "epoch": 0.4200664012932608, "grad_norm": 1.4687361639777972, "learning_rate": 6.51343378118413e-07, "loss": 0.2509, "step": 24166 }, { "epoch": 0.4200837838307636, "grad_norm": 1.2278434186222615, "learning_rate": 6.513165490245664e-07, "loss": 0.3656, "step": 24167 }, { "epoch": 0.42010116636826644, "grad_norm": 2.0359555419972732, "learning_rate": 6.512897194511161e-07, "loss": 0.1869, "step": 24168 }, { "epoch": 0.4201185489057693, "grad_norm": 1.7824771197822908, "learning_rate": 6.512628893981467e-07, "loss": 0.2555, "step": 24169 }, { "epoch": 0.4201359314432721, "grad_norm": 1.8644934506935897, "learning_rate": 6.512360588657436e-07, "loss": 0.2596, "step": 24170 }, { "epoch": 0.42015331398077493, "grad_norm": 1.8383298838949675, "learning_rate": 6.512092278539914e-07, "loss": 0.2587, "step": 24171 }, { "epoch": 0.42017069651827776, "grad_norm": 0.8895536579596972, "learning_rate": 6.511823963629756e-07, "loss": 0.1631, "step": 24172 }, { "epoch": 0.42018807905578054, "grad_norm": 1.430185175074384, "learning_rate": 6.511555643927811e-07, "loss": 0.3283, "step": 24173 }, { "epoch": 0.42020546159328337, "grad_norm": 1.6356862445660876, "learning_rate": 6.511287319434928e-07, "loss": 0.1994, "step": 24174 }, { "epoch": 0.4202228441307862, "grad_norm": 1.170776733588934, "learning_rate": 6.511018990151959e-07, "loss": 0.3057, "step": 24175 }, { "epoch": 0.42024022666828903, "grad_norm": 2.540634332902874, "learning_rate": 6.510750656079754e-07, "loss": 0.298, "step": 24176 }, { "epoch": 0.42025760920579186, "grad_norm": 1.6710752071311519, "learning_rate": 6.510482317219163e-07, "loss": 0.2129, "step": 24177 }, { "epoch": 0.4202749917432947, "grad_norm": 1.477668005220011, "learning_rate": 6.510213973571037e-07, "loss": 0.1992, "step": 24178 }, { "epoch": 0.4202923742807975, "grad_norm": 2.2796805163121943, "learning_rate": 6.509945625136226e-07, "loss": 0.3077, "step": 24179 }, { "epoch": 0.42030975681830035, "grad_norm": 1.386117966421969, "learning_rate": 6.509677271915581e-07, "loss": 0.3929, "step": 24180 }, { "epoch": 0.4203271393558032, "grad_norm": 1.827690470149989, "learning_rate": 6.509408913909953e-07, "loss": 0.2712, "step": 24181 }, { "epoch": 0.420344521893306, "grad_norm": 2.5751470527955034, "learning_rate": 6.509140551120194e-07, "loss": 0.3571, "step": 24182 }, { "epoch": 0.4203619044308088, "grad_norm": 2.362663130498982, "learning_rate": 6.508872183547149e-07, "loss": 0.298, "step": 24183 }, { "epoch": 0.4203792869683116, "grad_norm": 5.170653743699998, "learning_rate": 6.508603811191674e-07, "loss": 0.3263, "step": 24184 }, { "epoch": 0.42039666950581445, "grad_norm": 1.8044187049221732, "learning_rate": 6.508335434054619e-07, "loss": 0.3264, "step": 24185 }, { "epoch": 0.4204140520433173, "grad_norm": 2.1312557837138013, "learning_rate": 6.508067052136832e-07, "loss": 0.2552, "step": 24186 }, { "epoch": 0.4204314345808201, "grad_norm": 1.4446953764249773, "learning_rate": 6.507798665439165e-07, "loss": 0.2666, "step": 24187 }, { "epoch": 0.42044881711832294, "grad_norm": 1.7140570989806672, "learning_rate": 6.507530273962469e-07, "loss": 0.2176, "step": 24188 }, { "epoch": 0.42046619965582577, "grad_norm": 1.4864888720630307, "learning_rate": 6.507261877707595e-07, "loss": 0.1751, "step": 24189 }, { "epoch": 0.4204835821933286, "grad_norm": 1.7972644872544943, "learning_rate": 6.506993476675394e-07, "loss": 0.3422, "step": 24190 }, { "epoch": 0.42050096473083143, "grad_norm": 2.098931903984409, "learning_rate": 6.506725070866716e-07, "loss": 0.2872, "step": 24191 }, { "epoch": 0.42051834726833426, "grad_norm": 1.5875396285805465, "learning_rate": 6.506456660282408e-07, "loss": 0.3093, "step": 24192 }, { "epoch": 0.42053572980583703, "grad_norm": 2.6192725105411454, "learning_rate": 6.506188244923329e-07, "loss": 0.2553, "step": 24193 }, { "epoch": 0.42055311234333986, "grad_norm": 1.2508920573901505, "learning_rate": 6.505919824790324e-07, "loss": 0.1735, "step": 24194 }, { "epoch": 0.4205704948808427, "grad_norm": 2.3756840972670674, "learning_rate": 6.505651399884245e-07, "loss": 0.3803, "step": 24195 }, { "epoch": 0.4205878774183455, "grad_norm": 2.281248599053332, "learning_rate": 6.505382970205941e-07, "loss": 0.2303, "step": 24196 }, { "epoch": 0.42060525995584835, "grad_norm": 2.759405253811513, "learning_rate": 6.505114535756264e-07, "loss": 0.4515, "step": 24197 }, { "epoch": 0.4206226424933512, "grad_norm": 1.1274553753286611, "learning_rate": 6.504846096536068e-07, "loss": 0.3209, "step": 24198 }, { "epoch": 0.420640025030854, "grad_norm": 1.2095621367958385, "learning_rate": 6.5045776525462e-07, "loss": 0.3284, "step": 24199 }, { "epoch": 0.42065740756835684, "grad_norm": 1.7831421846505662, "learning_rate": 6.504309203787511e-07, "loss": 0.2236, "step": 24200 }, { "epoch": 0.4206747901058597, "grad_norm": 1.3702184031519407, "learning_rate": 6.504040750260856e-07, "loss": 0.1912, "step": 24201 }, { "epoch": 0.4206921726433625, "grad_norm": 2.5727147911138672, "learning_rate": 6.503772291967079e-07, "loss": 0.3128, "step": 24202 }, { "epoch": 0.4207095551808653, "grad_norm": 1.7695017243378337, "learning_rate": 6.503503828907037e-07, "loss": 0.3962, "step": 24203 }, { "epoch": 0.4207269377183681, "grad_norm": 1.6246682553756742, "learning_rate": 6.503235361081577e-07, "loss": 0.2767, "step": 24204 }, { "epoch": 0.42074432025587094, "grad_norm": 1.4541262979854641, "learning_rate": 6.502966888491552e-07, "loss": 0.2554, "step": 24205 }, { "epoch": 0.42076170279337377, "grad_norm": 0.9859407050254375, "learning_rate": 6.502698411137812e-07, "loss": 0.2556, "step": 24206 }, { "epoch": 0.4207790853308766, "grad_norm": 1.313913711983226, "learning_rate": 6.502429929021209e-07, "loss": 0.2132, "step": 24207 }, { "epoch": 0.42079646786837943, "grad_norm": 1.4541261105643108, "learning_rate": 6.502161442142593e-07, "loss": 0.3493, "step": 24208 }, { "epoch": 0.42081385040588226, "grad_norm": 1.5650675556378635, "learning_rate": 6.501892950502815e-07, "loss": 0.3661, "step": 24209 }, { "epoch": 0.4208312329433851, "grad_norm": 1.3674021487338215, "learning_rate": 6.501624454102728e-07, "loss": 0.1946, "step": 24210 }, { "epoch": 0.4208486154808879, "grad_norm": 2.1382390528095296, "learning_rate": 6.50135595294318e-07, "loss": 0.3872, "step": 24211 }, { "epoch": 0.42086599801839075, "grad_norm": 1.1585114471138498, "learning_rate": 6.501087447025022e-07, "loss": 0.2927, "step": 24212 }, { "epoch": 0.4208833805558935, "grad_norm": 1.3968794687105561, "learning_rate": 6.500818936349107e-07, "loss": 0.3146, "step": 24213 }, { "epoch": 0.42090076309339636, "grad_norm": 1.192505462550813, "learning_rate": 6.500550420916286e-07, "loss": 0.2402, "step": 24214 }, { "epoch": 0.4209181456308992, "grad_norm": 1.4988730237450167, "learning_rate": 6.50028190072741e-07, "loss": 0.2775, "step": 24215 }, { "epoch": 0.420935528168402, "grad_norm": 2.51624543523801, "learning_rate": 6.500013375783328e-07, "loss": 0.3657, "step": 24216 }, { "epoch": 0.42095291070590485, "grad_norm": 1.299278236071569, "learning_rate": 6.499744846084893e-07, "loss": 0.4108, "step": 24217 }, { "epoch": 0.4209702932434077, "grad_norm": 1.2578520818605534, "learning_rate": 6.499476311632958e-07, "loss": 0.3168, "step": 24218 }, { "epoch": 0.4209876757809105, "grad_norm": 2.4495592764252763, "learning_rate": 6.49920777242837e-07, "loss": 0.2655, "step": 24219 }, { "epoch": 0.42100505831841334, "grad_norm": 1.7441776986498831, "learning_rate": 6.498939228471982e-07, "loss": 0.2407, "step": 24220 }, { "epoch": 0.42102244085591617, "grad_norm": 1.6717399779793622, "learning_rate": 6.498670679764645e-07, "loss": 0.3216, "step": 24221 }, { "epoch": 0.42103982339341894, "grad_norm": 1.71818450443905, "learning_rate": 6.49840212630721e-07, "loss": 0.2591, "step": 24222 }, { "epoch": 0.4210572059309218, "grad_norm": 1.367065862133117, "learning_rate": 6.49813356810053e-07, "loss": 0.2378, "step": 24223 }, { "epoch": 0.4210745884684246, "grad_norm": 1.6642981891007314, "learning_rate": 6.497865005145456e-07, "loss": 0.2849, "step": 24224 }, { "epoch": 0.42109197100592743, "grad_norm": 1.3334589170225717, "learning_rate": 6.497596437442833e-07, "loss": 0.166, "step": 24225 }, { "epoch": 0.42110935354343026, "grad_norm": 1.0713414058581505, "learning_rate": 6.497327864993522e-07, "loss": 0.2462, "step": 24226 }, { "epoch": 0.4211267360809331, "grad_norm": 1.8764569373844069, "learning_rate": 6.497059287798368e-07, "loss": 0.2079, "step": 24227 }, { "epoch": 0.4211441186184359, "grad_norm": 1.0458682611741987, "learning_rate": 6.496790705858224e-07, "loss": 0.334, "step": 24228 }, { "epoch": 0.42116150115593876, "grad_norm": 2.425870754199086, "learning_rate": 6.49652211917394e-07, "loss": 0.2286, "step": 24229 }, { "epoch": 0.4211788836934416, "grad_norm": 1.5669279701924033, "learning_rate": 6.496253527746369e-07, "loss": 0.313, "step": 24230 }, { "epoch": 0.4211962662309444, "grad_norm": 0.9917028019957157, "learning_rate": 6.495984931576361e-07, "loss": 0.2835, "step": 24231 }, { "epoch": 0.4212136487684472, "grad_norm": 1.2024519001405591, "learning_rate": 6.495716330664768e-07, "loss": 0.3321, "step": 24232 }, { "epoch": 0.42123103130595, "grad_norm": 1.2393255759004516, "learning_rate": 6.495447725012444e-07, "loss": 0.2912, "step": 24233 }, { "epoch": 0.42124841384345285, "grad_norm": 1.3273353882843018, "learning_rate": 6.495179114620235e-07, "loss": 0.2189, "step": 24234 }, { "epoch": 0.4212657963809557, "grad_norm": 1.0702971268854893, "learning_rate": 6.494910499488995e-07, "loss": 0.1994, "step": 24235 }, { "epoch": 0.4212831789184585, "grad_norm": 1.5808251039419867, "learning_rate": 6.494641879619576e-07, "loss": 0.2548, "step": 24236 }, { "epoch": 0.42130056145596134, "grad_norm": 2.272409270358073, "learning_rate": 6.49437325501283e-07, "loss": 0.3793, "step": 24237 }, { "epoch": 0.4213179439934642, "grad_norm": 2.2093766811536355, "learning_rate": 6.494104625669606e-07, "loss": 0.3814, "step": 24238 }, { "epoch": 0.421335326530967, "grad_norm": 3.0554824468722095, "learning_rate": 6.493835991590757e-07, "loss": 0.3215, "step": 24239 }, { "epoch": 0.42135270906846983, "grad_norm": 1.4533570798011477, "learning_rate": 6.493567352777134e-07, "loss": 0.2865, "step": 24240 }, { "epoch": 0.42137009160597266, "grad_norm": 1.5938350781350799, "learning_rate": 6.493298709229587e-07, "loss": 0.2869, "step": 24241 }, { "epoch": 0.42138747414347544, "grad_norm": 3.032201956306088, "learning_rate": 6.493030060948972e-07, "loss": 0.2202, "step": 24242 }, { "epoch": 0.42140485668097827, "grad_norm": 1.393720249730968, "learning_rate": 6.492761407936135e-07, "loss": 0.3989, "step": 24243 }, { "epoch": 0.4214222392184811, "grad_norm": 1.5376675106999829, "learning_rate": 6.492492750191931e-07, "loss": 0.2767, "step": 24244 }, { "epoch": 0.42143962175598393, "grad_norm": 1.7858398697595004, "learning_rate": 6.492224087717211e-07, "loss": 0.2531, "step": 24245 }, { "epoch": 0.42145700429348676, "grad_norm": 1.8949449832518188, "learning_rate": 6.491955420512826e-07, "loss": 0.2995, "step": 24246 }, { "epoch": 0.4214743868309896, "grad_norm": 1.7076690318610197, "learning_rate": 6.491686748579627e-07, "loss": 0.1709, "step": 24247 }, { "epoch": 0.4214917693684924, "grad_norm": 1.7110088286225666, "learning_rate": 6.491418071918466e-07, "loss": 0.3087, "step": 24248 }, { "epoch": 0.42150915190599525, "grad_norm": 2.0846199145181115, "learning_rate": 6.491149390530196e-07, "loss": 0.1972, "step": 24249 }, { "epoch": 0.4215265344434981, "grad_norm": 2.599360481892833, "learning_rate": 6.490880704415667e-07, "loss": 0.3234, "step": 24250 }, { "epoch": 0.4215439169810009, "grad_norm": 1.8586268128976733, "learning_rate": 6.490612013575732e-07, "loss": 0.2994, "step": 24251 }, { "epoch": 0.4215612995185037, "grad_norm": 2.2634076237322085, "learning_rate": 6.490343318011239e-07, "loss": 0.1604, "step": 24252 }, { "epoch": 0.4215786820560065, "grad_norm": 1.583029458222498, "learning_rate": 6.490074617723044e-07, "loss": 0.2077, "step": 24253 }, { "epoch": 0.42159606459350935, "grad_norm": 4.0663078212640995, "learning_rate": 6.489805912711998e-07, "loss": 0.1501, "step": 24254 }, { "epoch": 0.4216134471310122, "grad_norm": 2.1216297771036556, "learning_rate": 6.48953720297895e-07, "loss": 0.3428, "step": 24255 }, { "epoch": 0.421630829668515, "grad_norm": 1.742180148144301, "learning_rate": 6.489268488524753e-07, "loss": 0.2846, "step": 24256 }, { "epoch": 0.42164821220601784, "grad_norm": 0.989486847958109, "learning_rate": 6.488999769350261e-07, "loss": 0.2036, "step": 24257 }, { "epoch": 0.42166559474352067, "grad_norm": 1.47569635996435, "learning_rate": 6.488731045456322e-07, "loss": 0.2921, "step": 24258 }, { "epoch": 0.4216829772810235, "grad_norm": 1.239962741560915, "learning_rate": 6.48846231684379e-07, "loss": 0.2675, "step": 24259 }, { "epoch": 0.4217003598185263, "grad_norm": 0.8492925687174725, "learning_rate": 6.488193583513514e-07, "loss": 0.271, "step": 24260 }, { "epoch": 0.42171774235602916, "grad_norm": 2.441840167346006, "learning_rate": 6.487924845466352e-07, "loss": 0.2013, "step": 24261 }, { "epoch": 0.42173512489353193, "grad_norm": 2.0584960233680047, "learning_rate": 6.487656102703149e-07, "loss": 0.4412, "step": 24262 }, { "epoch": 0.42175250743103476, "grad_norm": 1.852639137169476, "learning_rate": 6.487387355224761e-07, "loss": 0.2662, "step": 24263 }, { "epoch": 0.4217698899685376, "grad_norm": 2.477764575690975, "learning_rate": 6.487118603032038e-07, "loss": 0.4077, "step": 24264 }, { "epoch": 0.4217872725060404, "grad_norm": 1.4402562454725225, "learning_rate": 6.486849846125832e-07, "loss": 0.2266, "step": 24265 }, { "epoch": 0.42180465504354325, "grad_norm": 1.5181035180569453, "learning_rate": 6.486581084506995e-07, "loss": 0.2805, "step": 24266 }, { "epoch": 0.4218220375810461, "grad_norm": 1.6453849467325108, "learning_rate": 6.486312318176379e-07, "loss": 0.372, "step": 24267 }, { "epoch": 0.4218394201185489, "grad_norm": 1.025369718289277, "learning_rate": 6.486043547134834e-07, "loss": 0.2285, "step": 24268 }, { "epoch": 0.42185680265605174, "grad_norm": 3.601161225126368, "learning_rate": 6.485774771383216e-07, "loss": 0.2901, "step": 24269 }, { "epoch": 0.4218741851935546, "grad_norm": 1.4120734121878695, "learning_rate": 6.485505990922373e-07, "loss": 0.2173, "step": 24270 }, { "epoch": 0.4218915677310574, "grad_norm": 1.587700305750027, "learning_rate": 6.48523720575316e-07, "loss": 0.39, "step": 24271 }, { "epoch": 0.4219089502685602, "grad_norm": 2.2318960265986427, "learning_rate": 6.484968415876425e-07, "loss": 0.3404, "step": 24272 }, { "epoch": 0.421926332806063, "grad_norm": 2.4269691171349215, "learning_rate": 6.484699621293023e-07, "loss": 0.3532, "step": 24273 }, { "epoch": 0.42194371534356584, "grad_norm": 1.3445873265755082, "learning_rate": 6.484430822003806e-07, "loss": 0.1793, "step": 24274 }, { "epoch": 0.42196109788106867, "grad_norm": 1.70822947060988, "learning_rate": 6.484162018009625e-07, "loss": 0.2657, "step": 24275 }, { "epoch": 0.4219784804185715, "grad_norm": 1.2037639883873614, "learning_rate": 6.483893209311332e-07, "loss": 0.2565, "step": 24276 }, { "epoch": 0.42199586295607433, "grad_norm": 1.654933488738396, "learning_rate": 6.483624395909779e-07, "loss": 0.3394, "step": 24277 }, { "epoch": 0.42201324549357716, "grad_norm": 1.0857875013483442, "learning_rate": 6.483355577805818e-07, "loss": 0.469, "step": 24278 }, { "epoch": 0.42203062803108, "grad_norm": 3.088340583095649, "learning_rate": 6.483086755000303e-07, "loss": 0.3232, "step": 24279 }, { "epoch": 0.4220480105685828, "grad_norm": 1.024547136431226, "learning_rate": 6.482817927494082e-07, "loss": 0.479, "step": 24280 }, { "epoch": 0.42206539310608565, "grad_norm": 1.2259093991231018, "learning_rate": 6.48254909528801e-07, "loss": 0.1602, "step": 24281 }, { "epoch": 0.4220827756435884, "grad_norm": 1.2909556448095267, "learning_rate": 6.482280258382938e-07, "loss": 0.3205, "step": 24282 }, { "epoch": 0.42210015818109126, "grad_norm": 3.246896399868264, "learning_rate": 6.482011416779721e-07, "loss": 0.579, "step": 24283 }, { "epoch": 0.4221175407185941, "grad_norm": 1.3656206373621818, "learning_rate": 6.481742570479205e-07, "loss": 0.4692, "step": 24284 }, { "epoch": 0.4221349232560969, "grad_norm": 1.5348417605451645, "learning_rate": 6.481473719482247e-07, "loss": 0.2923, "step": 24285 }, { "epoch": 0.42215230579359975, "grad_norm": 3.0592519530158495, "learning_rate": 6.4812048637897e-07, "loss": 0.246, "step": 24286 }, { "epoch": 0.4221696883311026, "grad_norm": 1.2732240264627301, "learning_rate": 6.48093600340241e-07, "loss": 0.2872, "step": 24287 }, { "epoch": 0.4221870708686054, "grad_norm": 1.8089561414194675, "learning_rate": 6.480667138321236e-07, "loss": 0.2812, "step": 24288 }, { "epoch": 0.42220445340610824, "grad_norm": 1.8789439110752677, "learning_rate": 6.480398268547028e-07, "loss": 0.2699, "step": 24289 }, { "epoch": 0.42222183594361107, "grad_norm": 3.300140961471128, "learning_rate": 6.480129394080635e-07, "loss": 0.4389, "step": 24290 }, { "epoch": 0.4222392184811139, "grad_norm": 2.1309295900071237, "learning_rate": 6.479860514922912e-07, "loss": 0.243, "step": 24291 }, { "epoch": 0.4222566010186167, "grad_norm": 3.1152591288138303, "learning_rate": 6.479591631074712e-07, "loss": 0.227, "step": 24292 }, { "epoch": 0.4222739835561195, "grad_norm": 1.2680613219380679, "learning_rate": 6.479322742536886e-07, "loss": 0.2035, "step": 24293 }, { "epoch": 0.42229136609362233, "grad_norm": 1.7837524207211448, "learning_rate": 6.479053849310288e-07, "loss": 0.3305, "step": 24294 }, { "epoch": 0.42230874863112516, "grad_norm": 1.7608068397980312, "learning_rate": 6.478784951395766e-07, "loss": 0.2272, "step": 24295 }, { "epoch": 0.422326131168628, "grad_norm": 1.5023390041553035, "learning_rate": 6.478516048794178e-07, "loss": 0.1441, "step": 24296 }, { "epoch": 0.4223435137061308, "grad_norm": 1.0225935349674193, "learning_rate": 6.47824714150637e-07, "loss": 0.2349, "step": 24297 }, { "epoch": 0.42236089624363365, "grad_norm": 1.0378075049637743, "learning_rate": 6.4779782295332e-07, "loss": 0.3201, "step": 24298 }, { "epoch": 0.4223782787811365, "grad_norm": 1.6055810500306342, "learning_rate": 6.477709312875517e-07, "loss": 0.2561, "step": 24299 }, { "epoch": 0.4223956613186393, "grad_norm": 1.648638460047262, "learning_rate": 6.477440391534175e-07, "loss": 0.3438, "step": 24300 }, { "epoch": 0.42241304385614215, "grad_norm": 3.0674673851653247, "learning_rate": 6.477171465510025e-07, "loss": 0.4898, "step": 24301 }, { "epoch": 0.4224304263936449, "grad_norm": 2.369137582424834, "learning_rate": 6.47690253480392e-07, "loss": 0.2836, "step": 24302 }, { "epoch": 0.42244780893114775, "grad_norm": 2.046514559561441, "learning_rate": 6.476633599416712e-07, "loss": 0.3006, "step": 24303 }, { "epoch": 0.4224651914686506, "grad_norm": 1.8842555722819374, "learning_rate": 6.476364659349254e-07, "loss": 0.2273, "step": 24304 }, { "epoch": 0.4224825740061534, "grad_norm": 2.236150876288824, "learning_rate": 6.476095714602398e-07, "loss": 0.4885, "step": 24305 }, { "epoch": 0.42249995654365624, "grad_norm": 2.577591144063293, "learning_rate": 6.475826765176999e-07, "loss": 0.3713, "step": 24306 }, { "epoch": 0.42251733908115907, "grad_norm": 2.821811963394924, "learning_rate": 6.475557811073904e-07, "loss": 0.3427, "step": 24307 }, { "epoch": 0.4225347216186619, "grad_norm": 1.7001100785509873, "learning_rate": 6.47528885229397e-07, "loss": 0.2206, "step": 24308 }, { "epoch": 0.42255210415616473, "grad_norm": 1.3942328452527037, "learning_rate": 6.475019888838047e-07, "loss": 0.1442, "step": 24309 }, { "epoch": 0.42256948669366756, "grad_norm": 1.6789644465750198, "learning_rate": 6.474750920706989e-07, "loss": 0.1757, "step": 24310 }, { "epoch": 0.4225868692311704, "grad_norm": 2.172693800034309, "learning_rate": 6.47448194790165e-07, "loss": 0.3638, "step": 24311 }, { "epoch": 0.42260425176867317, "grad_norm": 1.7765389668673017, "learning_rate": 6.474212970422878e-07, "loss": 0.211, "step": 24312 }, { "epoch": 0.422621634306176, "grad_norm": 1.2456112166837159, "learning_rate": 6.473943988271528e-07, "loss": 0.3234, "step": 24313 }, { "epoch": 0.4226390168436788, "grad_norm": 1.8874065383019873, "learning_rate": 6.473675001448453e-07, "loss": 0.2255, "step": 24314 }, { "epoch": 0.42265639938118166, "grad_norm": 2.000766129912464, "learning_rate": 6.473406009954507e-07, "loss": 0.218, "step": 24315 }, { "epoch": 0.4226737819186845, "grad_norm": 7.846962281032164, "learning_rate": 6.47313701379054e-07, "loss": 0.1456, "step": 24316 }, { "epoch": 0.4226911644561873, "grad_norm": 2.2145132627181328, "learning_rate": 6.472868012957404e-07, "loss": 0.2284, "step": 24317 }, { "epoch": 0.42270854699369015, "grad_norm": 1.7111062859513635, "learning_rate": 6.472599007455954e-07, "loss": 0.2078, "step": 24318 }, { "epoch": 0.422725929531193, "grad_norm": 2.8873240578028683, "learning_rate": 6.472329997287042e-07, "loss": 0.3259, "step": 24319 }, { "epoch": 0.4227433120686958, "grad_norm": 1.0764404805767176, "learning_rate": 6.472060982451519e-07, "loss": 0.2489, "step": 24320 }, { "epoch": 0.42276069460619864, "grad_norm": 1.3562227922357741, "learning_rate": 6.471791962950242e-07, "loss": 0.3887, "step": 24321 }, { "epoch": 0.4227780771437014, "grad_norm": 1.3267659306761292, "learning_rate": 6.471522938784056e-07, "loss": 0.3212, "step": 24322 }, { "epoch": 0.42279545968120424, "grad_norm": 1.3896147016787352, "learning_rate": 6.471253909953821e-07, "loss": 0.2707, "step": 24323 }, { "epoch": 0.4228128422187071, "grad_norm": 1.6114350239151851, "learning_rate": 6.470984876460387e-07, "loss": 0.2169, "step": 24324 }, { "epoch": 0.4228302247562099, "grad_norm": 1.1337933074044781, "learning_rate": 6.470715838304606e-07, "loss": 0.2408, "step": 24325 }, { "epoch": 0.42284760729371274, "grad_norm": 1.4708531452264657, "learning_rate": 6.470446795487331e-07, "loss": 0.2752, "step": 24326 }, { "epoch": 0.42286498983121557, "grad_norm": 1.3004189012786427, "learning_rate": 6.470177748009417e-07, "loss": 0.231, "step": 24327 }, { "epoch": 0.4228823723687184, "grad_norm": 2.8789726787672025, "learning_rate": 6.469908695871712e-07, "loss": 0.3178, "step": 24328 }, { "epoch": 0.4228997549062212, "grad_norm": 1.749003197203179, "learning_rate": 6.469639639075073e-07, "loss": 0.2283, "step": 24329 }, { "epoch": 0.42291713744372406, "grad_norm": 2.3264012094200064, "learning_rate": 6.469370577620353e-07, "loss": 0.3111, "step": 24330 }, { "epoch": 0.4229345199812269, "grad_norm": 1.7893118783505835, "learning_rate": 6.469101511508401e-07, "loss": 0.3361, "step": 24331 }, { "epoch": 0.42295190251872966, "grad_norm": 2.139682645559034, "learning_rate": 6.468832440740072e-07, "loss": 0.6442, "step": 24332 }, { "epoch": 0.4229692850562325, "grad_norm": 1.097168706107658, "learning_rate": 6.468563365316219e-07, "loss": 0.3594, "step": 24333 }, { "epoch": 0.4229866675937353, "grad_norm": 1.7200492399933618, "learning_rate": 6.468294285237696e-07, "loss": 0.2314, "step": 24334 }, { "epoch": 0.42300405013123815, "grad_norm": 1.6357720666433186, "learning_rate": 6.468025200505355e-07, "loss": 0.7324, "step": 24335 }, { "epoch": 0.423021432668741, "grad_norm": 1.1972888378470015, "learning_rate": 6.467756111120046e-07, "loss": 0.3818, "step": 24336 }, { "epoch": 0.4230388152062438, "grad_norm": 1.1882898788289062, "learning_rate": 6.467487017082626e-07, "loss": 0.1972, "step": 24337 }, { "epoch": 0.42305619774374664, "grad_norm": 1.2209088244901858, "learning_rate": 6.467217918393948e-07, "loss": 0.4097, "step": 24338 }, { "epoch": 0.4230735802812495, "grad_norm": 1.7531899868834604, "learning_rate": 6.466948815054861e-07, "loss": 0.3676, "step": 24339 }, { "epoch": 0.4230909628187523, "grad_norm": 1.2002171025951822, "learning_rate": 6.46667970706622e-07, "loss": 0.24, "step": 24340 }, { "epoch": 0.42310834535625513, "grad_norm": 1.676628460072826, "learning_rate": 6.466410594428879e-07, "loss": 0.3376, "step": 24341 }, { "epoch": 0.4231257278937579, "grad_norm": 1.1427643310379667, "learning_rate": 6.466141477143689e-07, "loss": 0.2206, "step": 24342 }, { "epoch": 0.42314311043126074, "grad_norm": 1.5774304273357012, "learning_rate": 6.465872355211505e-07, "loss": 0.2159, "step": 24343 }, { "epoch": 0.42316049296876357, "grad_norm": 1.8374179898932343, "learning_rate": 6.465603228633178e-07, "loss": 0.2674, "step": 24344 }, { "epoch": 0.4231778755062664, "grad_norm": 1.742566295858071, "learning_rate": 6.465334097409563e-07, "loss": 0.3428, "step": 24345 }, { "epoch": 0.42319525804376923, "grad_norm": 1.6231112760749107, "learning_rate": 6.465064961541512e-07, "loss": 0.2119, "step": 24346 }, { "epoch": 0.42321264058127206, "grad_norm": 2.6391794922398355, "learning_rate": 6.464795821029877e-07, "loss": 0.3432, "step": 24347 }, { "epoch": 0.4232300231187749, "grad_norm": 1.4364585828219285, "learning_rate": 6.464526675875512e-07, "loss": 0.23, "step": 24348 }, { "epoch": 0.4232474056562777, "grad_norm": 1.8966829701230674, "learning_rate": 6.464257526079272e-07, "loss": 0.2539, "step": 24349 }, { "epoch": 0.42326478819378055, "grad_norm": 1.5055392039951232, "learning_rate": 6.463988371642007e-07, "loss": 0.3571, "step": 24350 }, { "epoch": 0.4232821707312834, "grad_norm": 1.4821959857439242, "learning_rate": 6.463719212564571e-07, "loss": 0.3163, "step": 24351 }, { "epoch": 0.42329955326878616, "grad_norm": 1.7985158676768003, "learning_rate": 6.463450048847819e-07, "loss": 0.296, "step": 24352 }, { "epoch": 0.423316935806289, "grad_norm": 1.7036357637390682, "learning_rate": 6.463180880492602e-07, "loss": 0.3057, "step": 24353 }, { "epoch": 0.4233343183437918, "grad_norm": 1.194692315898994, "learning_rate": 6.462911707499773e-07, "loss": 0.2386, "step": 24354 }, { "epoch": 0.42335170088129465, "grad_norm": 2.175991589782064, "learning_rate": 6.462642529870186e-07, "loss": 0.2972, "step": 24355 }, { "epoch": 0.4233690834187975, "grad_norm": 1.8146552893321315, "learning_rate": 6.462373347604695e-07, "loss": 0.433, "step": 24356 }, { "epoch": 0.4233864659563003, "grad_norm": 2.9491685633320994, "learning_rate": 6.462104160704152e-07, "loss": 0.4632, "step": 24357 }, { "epoch": 0.42340384849380314, "grad_norm": 2.3910460704032874, "learning_rate": 6.461834969169409e-07, "loss": 0.3692, "step": 24358 }, { "epoch": 0.42342123103130597, "grad_norm": 1.5674251503590457, "learning_rate": 6.461565773001322e-07, "loss": 0.2576, "step": 24359 }, { "epoch": 0.4234386135688088, "grad_norm": 4.926939448913876, "learning_rate": 6.461296572200743e-07, "loss": 0.2796, "step": 24360 }, { "epoch": 0.42345599610631157, "grad_norm": 0.8852732858976052, "learning_rate": 6.461027366768524e-07, "loss": 0.2585, "step": 24361 }, { "epoch": 0.4234733786438144, "grad_norm": 1.0870932892563965, "learning_rate": 6.46075815670552e-07, "loss": 0.1756, "step": 24362 }, { "epoch": 0.42349076118131723, "grad_norm": 3.00450955825044, "learning_rate": 6.460488942012585e-07, "loss": 0.5901, "step": 24363 }, { "epoch": 0.42350814371882006, "grad_norm": 2.6502202793180163, "learning_rate": 6.46021972269057e-07, "loss": 0.5561, "step": 24364 }, { "epoch": 0.4235255262563229, "grad_norm": 1.9706523474272724, "learning_rate": 6.459950498740328e-07, "loss": 0.3911, "step": 24365 }, { "epoch": 0.4235429087938257, "grad_norm": 1.5762015837741699, "learning_rate": 6.459681270162714e-07, "loss": 0.3814, "step": 24366 }, { "epoch": 0.42356029133132855, "grad_norm": 1.4558742255759554, "learning_rate": 6.459412036958582e-07, "loss": 0.2388, "step": 24367 }, { "epoch": 0.4235776738688314, "grad_norm": 1.5548956122770476, "learning_rate": 6.459142799128784e-07, "loss": 0.1778, "step": 24368 }, { "epoch": 0.4235950564063342, "grad_norm": 1.5083436213088413, "learning_rate": 6.458873556674174e-07, "loss": 0.2442, "step": 24369 }, { "epoch": 0.42361243894383704, "grad_norm": 2.2041605645972617, "learning_rate": 6.458604309595603e-07, "loss": 0.3621, "step": 24370 }, { "epoch": 0.4236298214813398, "grad_norm": 1.5138226674528397, "learning_rate": 6.458335057893929e-07, "loss": 0.172, "step": 24371 }, { "epoch": 0.42364720401884265, "grad_norm": 2.4784009503246374, "learning_rate": 6.458065801570001e-07, "loss": 0.3143, "step": 24372 }, { "epoch": 0.4236645865563455, "grad_norm": 1.765045169446055, "learning_rate": 6.457796540624673e-07, "loss": 0.2536, "step": 24373 }, { "epoch": 0.4236819690938483, "grad_norm": 1.9450143089243068, "learning_rate": 6.457527275058801e-07, "loss": 0.3006, "step": 24374 }, { "epoch": 0.42369935163135114, "grad_norm": 1.4766845211985933, "learning_rate": 6.457258004873239e-07, "loss": 0.3703, "step": 24375 }, { "epoch": 0.42371673416885397, "grad_norm": 1.007090557502837, "learning_rate": 6.456988730068837e-07, "loss": 0.3517, "step": 24376 }, { "epoch": 0.4237341167063568, "grad_norm": 1.5518696421582079, "learning_rate": 6.456719450646449e-07, "loss": 0.3371, "step": 24377 }, { "epoch": 0.42375149924385963, "grad_norm": 1.5472940117756555, "learning_rate": 6.45645016660693e-07, "loss": 0.245, "step": 24378 }, { "epoch": 0.42376888178136246, "grad_norm": 1.1647914039715215, "learning_rate": 6.456180877951134e-07, "loss": 0.2417, "step": 24379 }, { "epoch": 0.4237862643188653, "grad_norm": 2.331214255495142, "learning_rate": 6.455911584679913e-07, "loss": 0.2292, "step": 24380 }, { "epoch": 0.42380364685636807, "grad_norm": 1.3901991593070724, "learning_rate": 6.455642286794122e-07, "loss": 0.2671, "step": 24381 }, { "epoch": 0.4238210293938709, "grad_norm": 1.1752585187898998, "learning_rate": 6.455372984294612e-07, "loss": 0.2881, "step": 24382 }, { "epoch": 0.4238384119313737, "grad_norm": 2.067329124152228, "learning_rate": 6.455103677182238e-07, "loss": 0.243, "step": 24383 }, { "epoch": 0.42385579446887656, "grad_norm": 2.104055013605514, "learning_rate": 6.454834365457855e-07, "loss": 0.3348, "step": 24384 }, { "epoch": 0.4238731770063794, "grad_norm": 1.542224436598521, "learning_rate": 6.454565049122316e-07, "loss": 0.3333, "step": 24385 }, { "epoch": 0.4238905595438822, "grad_norm": 1.3327376854917845, "learning_rate": 6.454295728176471e-07, "loss": 0.2155, "step": 24386 }, { "epoch": 0.42390794208138505, "grad_norm": 2.7265051353342153, "learning_rate": 6.45402640262118e-07, "loss": 0.2941, "step": 24387 }, { "epoch": 0.4239253246188879, "grad_norm": 1.9018596052149292, "learning_rate": 6.453757072457291e-07, "loss": 0.2378, "step": 24388 }, { "epoch": 0.4239427071563907, "grad_norm": 1.1073220168972358, "learning_rate": 6.453487737685661e-07, "loss": 0.3256, "step": 24389 }, { "epoch": 0.42396008969389354, "grad_norm": 1.961874616223589, "learning_rate": 6.453218398307142e-07, "loss": 0.2627, "step": 24390 }, { "epoch": 0.4239774722313963, "grad_norm": 1.0426263102646545, "learning_rate": 6.452949054322588e-07, "loss": 0.3646, "step": 24391 }, { "epoch": 0.42399485476889914, "grad_norm": 1.8421026372146019, "learning_rate": 6.452679705732853e-07, "loss": 0.2286, "step": 24392 }, { "epoch": 0.424012237306402, "grad_norm": 1.272301148598292, "learning_rate": 6.45241035253879e-07, "loss": 0.509, "step": 24393 }, { "epoch": 0.4240296198439048, "grad_norm": 1.454046304426167, "learning_rate": 6.452140994741255e-07, "loss": 0.1537, "step": 24394 }, { "epoch": 0.42404700238140763, "grad_norm": 2.0973628691866546, "learning_rate": 6.451871632341101e-07, "loss": 0.2748, "step": 24395 }, { "epoch": 0.42406438491891046, "grad_norm": 1.2484217011971028, "learning_rate": 6.451602265339178e-07, "loss": 0.509, "step": 24396 }, { "epoch": 0.4240817674564133, "grad_norm": 1.5157233569020367, "learning_rate": 6.451332893736343e-07, "loss": 0.3605, "step": 24397 }, { "epoch": 0.4240991499939161, "grad_norm": 1.0009591395230562, "learning_rate": 6.45106351753345e-07, "loss": 0.3297, "step": 24398 }, { "epoch": 0.42411653253141895, "grad_norm": 1.3019895516708686, "learning_rate": 6.450794136731352e-07, "loss": 0.2284, "step": 24399 }, { "epoch": 0.4241339150689218, "grad_norm": 5.97211995971424, "learning_rate": 6.450524751330905e-07, "loss": 0.3783, "step": 24400 }, { "epoch": 0.42415129760642456, "grad_norm": 1.8245650364836112, "learning_rate": 6.450255361332959e-07, "loss": 0.3353, "step": 24401 }, { "epoch": 0.4241686801439274, "grad_norm": 1.630275144619816, "learning_rate": 6.44998596673837e-07, "loss": 0.28, "step": 24402 }, { "epoch": 0.4241860626814302, "grad_norm": 1.7615935492643913, "learning_rate": 6.449716567547992e-07, "loss": 0.2451, "step": 24403 }, { "epoch": 0.42420344521893305, "grad_norm": 1.572240518936654, "learning_rate": 6.449447163762678e-07, "loss": 0.3333, "step": 24404 }, { "epoch": 0.4242208277564359, "grad_norm": 3.059756923211234, "learning_rate": 6.449177755383281e-07, "loss": 0.3827, "step": 24405 }, { "epoch": 0.4242382102939387, "grad_norm": 1.3032617767167136, "learning_rate": 6.44890834241066e-07, "loss": 0.4719, "step": 24406 }, { "epoch": 0.42425559283144154, "grad_norm": 1.6743908940035297, "learning_rate": 6.448638924845662e-07, "loss": 0.3038, "step": 24407 }, { "epoch": 0.42427297536894437, "grad_norm": 1.337122525061675, "learning_rate": 6.448369502689145e-07, "loss": 0.3398, "step": 24408 }, { "epoch": 0.4242903579064472, "grad_norm": 2.4641360602446882, "learning_rate": 6.448100075941961e-07, "loss": 0.2037, "step": 24409 }, { "epoch": 0.42430774044395003, "grad_norm": 1.5491598874359243, "learning_rate": 6.447830644604967e-07, "loss": 0.205, "step": 24410 }, { "epoch": 0.4243251229814528, "grad_norm": 1.3594457241462659, "learning_rate": 6.447561208679012e-07, "loss": 0.2654, "step": 24411 }, { "epoch": 0.42434250551895564, "grad_norm": 1.2892772894087232, "learning_rate": 6.447291768164955e-07, "loss": 0.1725, "step": 24412 }, { "epoch": 0.42435988805645847, "grad_norm": 1.792520497759461, "learning_rate": 6.447022323063648e-07, "loss": 0.3773, "step": 24413 }, { "epoch": 0.4243772705939613, "grad_norm": 1.8571777987009754, "learning_rate": 6.446752873375945e-07, "loss": 0.2259, "step": 24414 }, { "epoch": 0.42439465313146413, "grad_norm": 1.0408536922056106, "learning_rate": 6.446483419102698e-07, "loss": 0.2544, "step": 24415 }, { "epoch": 0.42441203566896696, "grad_norm": 2.7179110777340867, "learning_rate": 6.446213960244767e-07, "loss": 0.2585, "step": 24416 }, { "epoch": 0.4244294182064698, "grad_norm": 1.5318688171920125, "learning_rate": 6.445944496802997e-07, "loss": 0.3165, "step": 24417 }, { "epoch": 0.4244468007439726, "grad_norm": 2.53385437168114, "learning_rate": 6.44567502877825e-07, "loss": 0.2863, "step": 24418 }, { "epoch": 0.42446418328147545, "grad_norm": 1.7990485023770881, "learning_rate": 6.445405556171377e-07, "loss": 0.1786, "step": 24419 }, { "epoch": 0.4244815658189783, "grad_norm": 3.340032848931756, "learning_rate": 6.445136078983233e-07, "loss": 0.3216, "step": 24420 }, { "epoch": 0.42449894835648105, "grad_norm": 2.0047440617097103, "learning_rate": 6.44486659721467e-07, "loss": 0.2387, "step": 24421 }, { "epoch": 0.4245163308939839, "grad_norm": 1.8836209177736347, "learning_rate": 6.444597110866544e-07, "loss": 0.2757, "step": 24422 }, { "epoch": 0.4245337134314867, "grad_norm": 1.470631338072666, "learning_rate": 6.44432761993971e-07, "loss": 0.2693, "step": 24423 }, { "epoch": 0.42455109596898954, "grad_norm": 2.9475563743576036, "learning_rate": 6.44405812443502e-07, "loss": 0.257, "step": 24424 }, { "epoch": 0.4245684785064924, "grad_norm": 2.355471412147524, "learning_rate": 6.44378862435333e-07, "loss": 0.2523, "step": 24425 }, { "epoch": 0.4245858610439952, "grad_norm": 2.834162495609251, "learning_rate": 6.44351911969549e-07, "loss": 0.5542, "step": 24426 }, { "epoch": 0.42460324358149804, "grad_norm": 1.8346788002033327, "learning_rate": 6.443249610462362e-07, "loss": 0.3896, "step": 24427 }, { "epoch": 0.42462062611900087, "grad_norm": 1.4522933541890397, "learning_rate": 6.442980096654793e-07, "loss": 0.3733, "step": 24428 }, { "epoch": 0.4246380086565037, "grad_norm": 1.9247746139849424, "learning_rate": 6.442710578273641e-07, "loss": 0.1913, "step": 24429 }, { "epoch": 0.4246553911940065, "grad_norm": 1.7117135759770608, "learning_rate": 6.442441055319757e-07, "loss": 0.2078, "step": 24430 }, { "epoch": 0.4246727737315093, "grad_norm": 1.9098029091836815, "learning_rate": 6.442171527794001e-07, "loss": 0.4642, "step": 24431 }, { "epoch": 0.42469015626901213, "grad_norm": 0.948399408730196, "learning_rate": 6.441901995697221e-07, "loss": 0.2601, "step": 24432 }, { "epoch": 0.42470753880651496, "grad_norm": 1.5898542271064378, "learning_rate": 6.441632459030275e-07, "loss": 0.2576, "step": 24433 }, { "epoch": 0.4247249213440178, "grad_norm": 1.4800081758645314, "learning_rate": 6.441362917794015e-07, "loss": 0.1799, "step": 24434 }, { "epoch": 0.4247423038815206, "grad_norm": 1.0077762541135833, "learning_rate": 6.4410933719893e-07, "loss": 0.1624, "step": 24435 }, { "epoch": 0.42475968641902345, "grad_norm": 1.8006954011252125, "learning_rate": 6.440823821616979e-07, "loss": 0.5076, "step": 24436 }, { "epoch": 0.4247770689565263, "grad_norm": 3.9814431832979458, "learning_rate": 6.440554266677908e-07, "loss": 0.2038, "step": 24437 }, { "epoch": 0.4247944514940291, "grad_norm": 2.949009804566065, "learning_rate": 6.440284707172941e-07, "loss": 0.3734, "step": 24438 }, { "epoch": 0.42481183403153194, "grad_norm": 1.247386733068981, "learning_rate": 6.440015143102935e-07, "loss": 0.274, "step": 24439 }, { "epoch": 0.4248292165690348, "grad_norm": 2.172337096711688, "learning_rate": 6.439745574468741e-07, "loss": 0.3406, "step": 24440 }, { "epoch": 0.42484659910653755, "grad_norm": 1.4548486796287927, "learning_rate": 6.439476001271217e-07, "loss": 0.2726, "step": 24441 }, { "epoch": 0.4248639816440404, "grad_norm": 2.1702298401748834, "learning_rate": 6.439206423511213e-07, "loss": 0.3776, "step": 24442 }, { "epoch": 0.4248813641815432, "grad_norm": 1.78984924140276, "learning_rate": 6.438936841189586e-07, "loss": 0.3458, "step": 24443 }, { "epoch": 0.42489874671904604, "grad_norm": 1.3715620045057846, "learning_rate": 6.438667254307192e-07, "loss": 0.2215, "step": 24444 }, { "epoch": 0.42491612925654887, "grad_norm": 1.9856712229523643, "learning_rate": 6.438397662864883e-07, "loss": 0.1941, "step": 24445 }, { "epoch": 0.4249335117940517, "grad_norm": 6.512172498670354, "learning_rate": 6.438128066863512e-07, "loss": 0.3615, "step": 24446 }, { "epoch": 0.42495089433155453, "grad_norm": 1.2588803428242608, "learning_rate": 6.437858466303938e-07, "loss": 0.3404, "step": 24447 }, { "epoch": 0.42496827686905736, "grad_norm": 1.8139093001970783, "learning_rate": 6.437588861187012e-07, "loss": 0.2868, "step": 24448 }, { "epoch": 0.4249856594065602, "grad_norm": 2.143007716141753, "learning_rate": 6.43731925151359e-07, "loss": 0.2285, "step": 24449 }, { "epoch": 0.425003041944063, "grad_norm": 1.99082471463333, "learning_rate": 6.437049637284527e-07, "loss": 0.1666, "step": 24450 }, { "epoch": 0.4250204244815658, "grad_norm": 3.8652560536342846, "learning_rate": 6.436780018500674e-07, "loss": 0.1584, "step": 24451 }, { "epoch": 0.4250378070190686, "grad_norm": 1.4169946885351912, "learning_rate": 6.436510395162892e-07, "loss": 0.2306, "step": 24452 }, { "epoch": 0.42505518955657146, "grad_norm": 1.2273115599907438, "learning_rate": 6.436240767272029e-07, "loss": 0.2819, "step": 24453 }, { "epoch": 0.4250725720940743, "grad_norm": 1.5101623104287534, "learning_rate": 6.435971134828946e-07, "loss": 0.2902, "step": 24454 }, { "epoch": 0.4250899546315771, "grad_norm": 1.6830280409068887, "learning_rate": 6.43570149783449e-07, "loss": 0.3819, "step": 24455 }, { "epoch": 0.42510733716907995, "grad_norm": 1.3094817626760153, "learning_rate": 6.435431856289523e-07, "loss": 0.2606, "step": 24456 }, { "epoch": 0.4251247197065828, "grad_norm": 1.7216605552954238, "learning_rate": 6.435162210194895e-07, "loss": 0.286, "step": 24457 }, { "epoch": 0.4251421022440856, "grad_norm": 1.0550234042361961, "learning_rate": 6.434892559551463e-07, "loss": 0.2365, "step": 24458 }, { "epoch": 0.42515948478158844, "grad_norm": 1.0975222332056134, "learning_rate": 6.434622904360078e-07, "loss": 0.2655, "step": 24459 }, { "epoch": 0.42517686731909127, "grad_norm": 2.0410633417308457, "learning_rate": 6.434353244621601e-07, "loss": 0.3912, "step": 24460 }, { "epoch": 0.42519424985659404, "grad_norm": 1.2847602634644981, "learning_rate": 6.434083580336881e-07, "loss": 0.2491, "step": 24461 }, { "epoch": 0.4252116323940969, "grad_norm": 1.6531815979888829, "learning_rate": 6.433813911506775e-07, "loss": 0.2673, "step": 24462 }, { "epoch": 0.4252290149315997, "grad_norm": 1.1862508138457164, "learning_rate": 6.433544238132138e-07, "loss": 0.1442, "step": 24463 }, { "epoch": 0.42524639746910253, "grad_norm": 1.4315278191971226, "learning_rate": 6.433274560213824e-07, "loss": 0.3162, "step": 24464 }, { "epoch": 0.42526378000660536, "grad_norm": 2.1899388197265637, "learning_rate": 6.433004877752688e-07, "loss": 0.1868, "step": 24465 }, { "epoch": 0.4252811625441082, "grad_norm": 1.0702212503233353, "learning_rate": 6.432735190749586e-07, "loss": 0.312, "step": 24466 }, { "epoch": 0.425298545081611, "grad_norm": 1.4193773936735947, "learning_rate": 6.432465499205369e-07, "loss": 0.1637, "step": 24467 }, { "epoch": 0.42531592761911385, "grad_norm": 1.8893540895212588, "learning_rate": 6.432195803120896e-07, "loss": 0.3544, "step": 24468 }, { "epoch": 0.4253333101566167, "grad_norm": 0.8436943188979439, "learning_rate": 6.43192610249702e-07, "loss": 0.2057, "step": 24469 }, { "epoch": 0.4253506926941195, "grad_norm": 1.9079788791785324, "learning_rate": 6.431656397334597e-07, "loss": 0.3056, "step": 24470 }, { "epoch": 0.4253680752316223, "grad_norm": 4.525645903241845, "learning_rate": 6.43138668763448e-07, "loss": 0.2908, "step": 24471 }, { "epoch": 0.4253854577691251, "grad_norm": 1.199065618662895, "learning_rate": 6.431116973397524e-07, "loss": 0.228, "step": 24472 }, { "epoch": 0.42540284030662795, "grad_norm": 1.5289340249096248, "learning_rate": 6.430847254624586e-07, "loss": 0.3525, "step": 24473 }, { "epoch": 0.4254202228441308, "grad_norm": 1.2590402505857428, "learning_rate": 6.430577531316519e-07, "loss": 0.2051, "step": 24474 }, { "epoch": 0.4254376053816336, "grad_norm": 1.4137743167330596, "learning_rate": 6.430307803474177e-07, "loss": 0.2045, "step": 24475 }, { "epoch": 0.42545498791913644, "grad_norm": 2.5371350324005064, "learning_rate": 6.430038071098419e-07, "loss": 0.2409, "step": 24476 }, { "epoch": 0.42547237045663927, "grad_norm": 2.251366124924898, "learning_rate": 6.429768334190096e-07, "loss": 0.2696, "step": 24477 }, { "epoch": 0.4254897529941421, "grad_norm": 1.3208953511500077, "learning_rate": 6.429498592750063e-07, "loss": 0.1749, "step": 24478 }, { "epoch": 0.42550713553164493, "grad_norm": 1.4256578625860907, "learning_rate": 6.429228846779178e-07, "loss": 0.4822, "step": 24479 }, { "epoch": 0.42552451806914776, "grad_norm": 1.8844919573783183, "learning_rate": 6.428959096278293e-07, "loss": 0.3423, "step": 24480 }, { "epoch": 0.42554190060665054, "grad_norm": 1.7349377490549542, "learning_rate": 6.428689341248264e-07, "loss": 0.233, "step": 24481 }, { "epoch": 0.42555928314415337, "grad_norm": 1.4261349006801924, "learning_rate": 6.428419581689946e-07, "loss": 0.2017, "step": 24482 }, { "epoch": 0.4255766656816562, "grad_norm": 1.2106198497331138, "learning_rate": 6.428149817604194e-07, "loss": 0.2161, "step": 24483 }, { "epoch": 0.425594048219159, "grad_norm": 2.2905674921265438, "learning_rate": 6.427880048991864e-07, "loss": 0.2493, "step": 24484 }, { "epoch": 0.42561143075666186, "grad_norm": 3.7495065702960693, "learning_rate": 6.42761027585381e-07, "loss": 0.2317, "step": 24485 }, { "epoch": 0.4256288132941647, "grad_norm": 1.3569201153105996, "learning_rate": 6.427340498190885e-07, "loss": 0.2741, "step": 24486 }, { "epoch": 0.4256461958316675, "grad_norm": 1.6263104169918932, "learning_rate": 6.427070716003949e-07, "loss": 0.3587, "step": 24487 }, { "epoch": 0.42566357836917035, "grad_norm": 1.33912916497327, "learning_rate": 6.426800929293854e-07, "loss": 0.2718, "step": 24488 }, { "epoch": 0.4256809609066732, "grad_norm": 1.8293903632896629, "learning_rate": 6.426531138061455e-07, "loss": 0.2875, "step": 24489 }, { "epoch": 0.425698343444176, "grad_norm": 2.429616469462945, "learning_rate": 6.426261342307606e-07, "loss": 0.3792, "step": 24490 }, { "epoch": 0.4257157259816788, "grad_norm": 2.68132093422006, "learning_rate": 6.425991542033167e-07, "loss": 0.3664, "step": 24491 }, { "epoch": 0.4257331085191816, "grad_norm": 1.6572175893320142, "learning_rate": 6.425721737238987e-07, "loss": 0.3141, "step": 24492 }, { "epoch": 0.42575049105668444, "grad_norm": 9.76497887468417, "learning_rate": 6.425451927925926e-07, "loss": 0.2065, "step": 24493 }, { "epoch": 0.4257678735941873, "grad_norm": 2.055408480940583, "learning_rate": 6.425182114094834e-07, "loss": 0.3256, "step": 24494 }, { "epoch": 0.4257852561316901, "grad_norm": 1.8087310923592734, "learning_rate": 6.424912295746574e-07, "loss": 0.2894, "step": 24495 }, { "epoch": 0.42580263866919293, "grad_norm": 1.513476995141112, "learning_rate": 6.424642472881994e-07, "loss": 0.2135, "step": 24496 }, { "epoch": 0.42582002120669576, "grad_norm": 1.9459076113379628, "learning_rate": 6.424372645501952e-07, "loss": 0.4095, "step": 24497 }, { "epoch": 0.4258374037441986, "grad_norm": 1.2629557621182712, "learning_rate": 6.424102813607303e-07, "loss": 0.2102, "step": 24498 }, { "epoch": 0.4258547862817014, "grad_norm": 1.8264655641984535, "learning_rate": 6.423832977198902e-07, "loss": 0.2624, "step": 24499 }, { "epoch": 0.4258721688192042, "grad_norm": 1.8237383607495101, "learning_rate": 6.423563136277605e-07, "loss": 0.3164, "step": 24500 }, { "epoch": 0.42588955135670703, "grad_norm": 1.821985439014792, "learning_rate": 6.423293290844267e-07, "loss": 0.3932, "step": 24501 }, { "epoch": 0.42590693389420986, "grad_norm": 1.6288443499840772, "learning_rate": 6.423023440899742e-07, "loss": 0.1415, "step": 24502 }, { "epoch": 0.4259243164317127, "grad_norm": 3.058281393481339, "learning_rate": 6.422753586444887e-07, "loss": 0.2957, "step": 24503 }, { "epoch": 0.4259416989692155, "grad_norm": 1.0909891112182584, "learning_rate": 6.422483727480557e-07, "loss": 0.3859, "step": 24504 }, { "epoch": 0.42595908150671835, "grad_norm": 1.3819587359809085, "learning_rate": 6.422213864007608e-07, "loss": 0.2446, "step": 24505 }, { "epoch": 0.4259764640442212, "grad_norm": 1.8628290355626755, "learning_rate": 6.421943996026892e-07, "loss": 0.3523, "step": 24506 }, { "epoch": 0.425993846581724, "grad_norm": 3.833686276949747, "learning_rate": 6.421674123539268e-07, "loss": 0.2468, "step": 24507 }, { "epoch": 0.42601122911922684, "grad_norm": 2.0543540032831125, "learning_rate": 6.42140424654559e-07, "loss": 0.4572, "step": 24508 }, { "epoch": 0.42602861165672967, "grad_norm": 1.4310768981588977, "learning_rate": 6.421134365046713e-07, "loss": 0.2948, "step": 24509 }, { "epoch": 0.42604599419423245, "grad_norm": 1.99768945325649, "learning_rate": 6.420864479043494e-07, "loss": 0.2651, "step": 24510 }, { "epoch": 0.4260633767317353, "grad_norm": 1.5552743755879228, "learning_rate": 6.420594588536785e-07, "loss": 0.2301, "step": 24511 }, { "epoch": 0.4260807592692381, "grad_norm": 2.6873976008413387, "learning_rate": 6.420324693527445e-07, "loss": 0.2607, "step": 24512 }, { "epoch": 0.42609814180674094, "grad_norm": 1.6587012705773114, "learning_rate": 6.42005479401633e-07, "loss": 0.3386, "step": 24513 }, { "epoch": 0.42611552434424377, "grad_norm": 1.19946886180232, "learning_rate": 6.419784890004292e-07, "loss": 0.2218, "step": 24514 }, { "epoch": 0.4261329068817466, "grad_norm": 1.9958536233920545, "learning_rate": 6.419514981492188e-07, "loss": 0.3777, "step": 24515 }, { "epoch": 0.42615028941924943, "grad_norm": 1.5923188181731853, "learning_rate": 6.419245068480874e-07, "loss": 0.2785, "step": 24516 }, { "epoch": 0.42616767195675226, "grad_norm": 1.7371366785137876, "learning_rate": 6.418975150971204e-07, "loss": 0.2773, "step": 24517 }, { "epoch": 0.4261850544942551, "grad_norm": 3.490112938850784, "learning_rate": 6.418705228964037e-07, "loss": 0.3061, "step": 24518 }, { "epoch": 0.4262024370317579, "grad_norm": 1.5072354597245694, "learning_rate": 6.418435302460223e-07, "loss": 0.346, "step": 24519 }, { "epoch": 0.4262198195692607, "grad_norm": 1.6550120754703816, "learning_rate": 6.418165371460625e-07, "loss": 0.2616, "step": 24520 }, { "epoch": 0.4262372021067635, "grad_norm": 1.9535315557528097, "learning_rate": 6.417895435966089e-07, "loss": 0.3462, "step": 24521 }, { "epoch": 0.42625458464426635, "grad_norm": 0.875748965037223, "learning_rate": 6.417625495977479e-07, "loss": 0.3328, "step": 24522 }, { "epoch": 0.4262719671817692, "grad_norm": 1.6867366889525546, "learning_rate": 6.417355551495647e-07, "loss": 0.3438, "step": 24523 }, { "epoch": 0.426289349719272, "grad_norm": 1.4838428268033321, "learning_rate": 6.417085602521448e-07, "loss": 0.2063, "step": 24524 }, { "epoch": 0.42630673225677485, "grad_norm": 1.7060641758368895, "learning_rate": 6.416815649055739e-07, "loss": 0.4206, "step": 24525 }, { "epoch": 0.4263241147942777, "grad_norm": 1.5784513197669636, "learning_rate": 6.416545691099377e-07, "loss": 0.1926, "step": 24526 }, { "epoch": 0.4263414973317805, "grad_norm": 4.477563043355783, "learning_rate": 6.416275728653213e-07, "loss": 0.2301, "step": 24527 }, { "epoch": 0.42635887986928334, "grad_norm": 0.8684337739991301, "learning_rate": 6.416005761718108e-07, "loss": 0.2738, "step": 24528 }, { "epoch": 0.42637626240678617, "grad_norm": 2.4139593410149387, "learning_rate": 6.415735790294914e-07, "loss": 0.4467, "step": 24529 }, { "epoch": 0.42639364494428894, "grad_norm": 2.3767677839972476, "learning_rate": 6.415465814384487e-07, "loss": 0.4411, "step": 24530 }, { "epoch": 0.42641102748179177, "grad_norm": 1.976315217082728, "learning_rate": 6.415195833987684e-07, "loss": 0.2709, "step": 24531 }, { "epoch": 0.4264284100192946, "grad_norm": 1.1368738193650143, "learning_rate": 6.414925849105362e-07, "loss": 0.328, "step": 24532 }, { "epoch": 0.42644579255679743, "grad_norm": 2.3336024557632524, "learning_rate": 6.414655859738374e-07, "loss": 0.4191, "step": 24533 }, { "epoch": 0.42646317509430026, "grad_norm": 1.2488631848541456, "learning_rate": 6.414385865887576e-07, "loss": 0.2747, "step": 24534 }, { "epoch": 0.4264805576318031, "grad_norm": 1.8699761977838907, "learning_rate": 6.414115867553824e-07, "loss": 0.2514, "step": 24535 }, { "epoch": 0.4264979401693059, "grad_norm": 4.668598831198376, "learning_rate": 6.413845864737976e-07, "loss": 0.323, "step": 24536 }, { "epoch": 0.42651532270680875, "grad_norm": 1.0745227095161947, "learning_rate": 6.413575857440886e-07, "loss": 0.3148, "step": 24537 }, { "epoch": 0.4265327052443116, "grad_norm": 2.2700686437926882, "learning_rate": 6.413305845663409e-07, "loss": 0.2243, "step": 24538 }, { "epoch": 0.4265500877818144, "grad_norm": 1.2361344861603958, "learning_rate": 6.413035829406402e-07, "loss": 0.3993, "step": 24539 }, { "epoch": 0.4265674703193172, "grad_norm": 6.292223584711593, "learning_rate": 6.412765808670721e-07, "loss": 0.4929, "step": 24540 }, { "epoch": 0.42658485285682, "grad_norm": 2.1088296957281836, "learning_rate": 6.412495783457221e-07, "loss": 0.5225, "step": 24541 }, { "epoch": 0.42660223539432285, "grad_norm": 2.5619816339195167, "learning_rate": 6.412225753766758e-07, "loss": 0.3472, "step": 24542 }, { "epoch": 0.4266196179318257, "grad_norm": 2.8442568969904793, "learning_rate": 6.411955719600188e-07, "loss": 0.4347, "step": 24543 }, { "epoch": 0.4266370004693285, "grad_norm": 1.8324766151327803, "learning_rate": 6.411685680958366e-07, "loss": 0.5006, "step": 24544 }, { "epoch": 0.42665438300683134, "grad_norm": 1.731861251521884, "learning_rate": 6.411415637842153e-07, "loss": 0.2548, "step": 24545 }, { "epoch": 0.42667176554433417, "grad_norm": 1.876617222795309, "learning_rate": 6.411145590252395e-07, "loss": 0.2057, "step": 24546 }, { "epoch": 0.426689148081837, "grad_norm": 1.3270591747197569, "learning_rate": 6.410875538189958e-07, "loss": 0.2564, "step": 24547 }, { "epoch": 0.42670653061933983, "grad_norm": 1.8563872132678099, "learning_rate": 6.410605481655692e-07, "loss": 0.3865, "step": 24548 }, { "epoch": 0.42672391315684266, "grad_norm": 1.7039844450675417, "learning_rate": 6.410335420650455e-07, "loss": 0.3395, "step": 24549 }, { "epoch": 0.42674129569434543, "grad_norm": 1.42651904748567, "learning_rate": 6.410065355175102e-07, "loss": 0.2177, "step": 24550 }, { "epoch": 0.42675867823184827, "grad_norm": 3.500968409438002, "learning_rate": 6.40979528523049e-07, "loss": 0.3549, "step": 24551 }, { "epoch": 0.4267760607693511, "grad_norm": 1.99219684603964, "learning_rate": 6.409525210817475e-07, "loss": 0.3034, "step": 24552 }, { "epoch": 0.4267934433068539, "grad_norm": 1.3590381420555473, "learning_rate": 6.409255131936911e-07, "loss": 0.334, "step": 24553 }, { "epoch": 0.42681082584435676, "grad_norm": 1.6924814955088838, "learning_rate": 6.408985048589657e-07, "loss": 0.2427, "step": 24554 }, { "epoch": 0.4268282083818596, "grad_norm": 1.7889936543697869, "learning_rate": 6.408714960776567e-07, "loss": 0.2135, "step": 24555 }, { "epoch": 0.4268455909193624, "grad_norm": 1.8001681538249084, "learning_rate": 6.408444868498498e-07, "loss": 0.3183, "step": 24556 }, { "epoch": 0.42686297345686525, "grad_norm": 1.9297867818111312, "learning_rate": 6.408174771756306e-07, "loss": 0.3033, "step": 24557 }, { "epoch": 0.4268803559943681, "grad_norm": 1.2869525323442101, "learning_rate": 6.407904670550847e-07, "loss": 0.1591, "step": 24558 }, { "epoch": 0.4268977385318709, "grad_norm": 1.8672001871941393, "learning_rate": 6.407634564882976e-07, "loss": 0.4639, "step": 24559 }, { "epoch": 0.4269151210693737, "grad_norm": 1.5358238820898744, "learning_rate": 6.40736445475355e-07, "loss": 0.2364, "step": 24560 }, { "epoch": 0.4269325036068765, "grad_norm": 1.7729431985390807, "learning_rate": 6.407094340163427e-07, "loss": 0.5076, "step": 24561 }, { "epoch": 0.42694988614437934, "grad_norm": 3.832993118954601, "learning_rate": 6.406824221113459e-07, "loss": 0.355, "step": 24562 }, { "epoch": 0.4269672686818822, "grad_norm": 1.861001646489623, "learning_rate": 6.406554097604506e-07, "loss": 0.267, "step": 24563 }, { "epoch": 0.426984651219385, "grad_norm": 1.0014388297339676, "learning_rate": 6.406283969637423e-07, "loss": 0.2031, "step": 24564 }, { "epoch": 0.42700203375688783, "grad_norm": 3.0375966648323014, "learning_rate": 6.406013837213065e-07, "loss": 0.3004, "step": 24565 }, { "epoch": 0.42701941629439066, "grad_norm": 1.6591288086648808, "learning_rate": 6.405743700332286e-07, "loss": 0.1899, "step": 24566 }, { "epoch": 0.4270367988318935, "grad_norm": 2.622344724888702, "learning_rate": 6.405473558995949e-07, "loss": 0.3386, "step": 24567 }, { "epoch": 0.4270541813693963, "grad_norm": 1.483155182355507, "learning_rate": 6.405203413204907e-07, "loss": 0.2657, "step": 24568 }, { "epoch": 0.42707156390689915, "grad_norm": 1.9850553692657178, "learning_rate": 6.404933262960014e-07, "loss": 0.2437, "step": 24569 }, { "epoch": 0.42708894644440193, "grad_norm": 1.9043715137062107, "learning_rate": 6.404663108262128e-07, "loss": 0.4389, "step": 24570 }, { "epoch": 0.42710632898190476, "grad_norm": 1.3796709902075726, "learning_rate": 6.404392949112105e-07, "loss": 0.3182, "step": 24571 }, { "epoch": 0.4271237115194076, "grad_norm": 1.4210644552906175, "learning_rate": 6.404122785510802e-07, "loss": 0.1886, "step": 24572 }, { "epoch": 0.4271410940569104, "grad_norm": 1.1488183654295987, "learning_rate": 6.403852617459074e-07, "loss": 0.2364, "step": 24573 }, { "epoch": 0.42715847659441325, "grad_norm": 0.8676662811927199, "learning_rate": 6.403582444957779e-07, "loss": 0.2932, "step": 24574 }, { "epoch": 0.4271758591319161, "grad_norm": 1.244613281247258, "learning_rate": 6.403312268007772e-07, "loss": 0.3197, "step": 24575 }, { "epoch": 0.4271932416694189, "grad_norm": 2.002345822881881, "learning_rate": 6.40304208660991e-07, "loss": 0.3186, "step": 24576 }, { "epoch": 0.42721062420692174, "grad_norm": 2.709271024051157, "learning_rate": 6.40277190076505e-07, "loss": 0.2809, "step": 24577 }, { "epoch": 0.42722800674442457, "grad_norm": 1.2567057570919549, "learning_rate": 6.402501710474045e-07, "loss": 0.2093, "step": 24578 }, { "epoch": 0.4272453892819274, "grad_norm": 1.3929853967750538, "learning_rate": 6.402231515737755e-07, "loss": 0.2048, "step": 24579 }, { "epoch": 0.4272627718194302, "grad_norm": 1.3342118583975613, "learning_rate": 6.401961316557036e-07, "loss": 0.3973, "step": 24580 }, { "epoch": 0.427280154356933, "grad_norm": 2.8371161371783264, "learning_rate": 6.401691112932744e-07, "loss": 0.5734, "step": 24581 }, { "epoch": 0.42729753689443584, "grad_norm": 3.2417266259900694, "learning_rate": 6.401420904865733e-07, "loss": 0.2339, "step": 24582 }, { "epoch": 0.42731491943193867, "grad_norm": 2.564001103214619, "learning_rate": 6.401150692356862e-07, "loss": 0.3492, "step": 24583 }, { "epoch": 0.4273323019694415, "grad_norm": 1.3460952037949205, "learning_rate": 6.400880475406989e-07, "loss": 0.2709, "step": 24584 }, { "epoch": 0.4273496845069443, "grad_norm": 2.793567976882604, "learning_rate": 6.400610254016966e-07, "loss": 0.3264, "step": 24585 }, { "epoch": 0.42736706704444716, "grad_norm": 2.4457723640671376, "learning_rate": 6.400340028187655e-07, "loss": 0.2873, "step": 24586 }, { "epoch": 0.42738444958195, "grad_norm": 1.525150424933882, "learning_rate": 6.400069797919904e-07, "loss": 0.373, "step": 24587 }, { "epoch": 0.4274018321194528, "grad_norm": 1.7414456208042242, "learning_rate": 6.399799563214579e-07, "loss": 0.1718, "step": 24588 }, { "epoch": 0.42741921465695565, "grad_norm": 2.0588925404705796, "learning_rate": 6.399529324072532e-07, "loss": 0.4281, "step": 24589 }, { "epoch": 0.4274365971944584, "grad_norm": 1.4713442138092712, "learning_rate": 6.39925908049462e-07, "loss": 0.1961, "step": 24590 }, { "epoch": 0.42745397973196125, "grad_norm": 1.3316242450310407, "learning_rate": 6.398988832481697e-07, "loss": 0.3445, "step": 24591 }, { "epoch": 0.4274713622694641, "grad_norm": 1.237443270228202, "learning_rate": 6.398718580034626e-07, "loss": 0.2001, "step": 24592 }, { "epoch": 0.4274887448069669, "grad_norm": 1.9737615438492524, "learning_rate": 6.398448323154257e-07, "loss": 0.3579, "step": 24593 }, { "epoch": 0.42750612734446974, "grad_norm": 1.4391300969142937, "learning_rate": 6.398178061841451e-07, "loss": 0.3004, "step": 24594 }, { "epoch": 0.4275235098819726, "grad_norm": 1.0576818572446558, "learning_rate": 6.39790779609706e-07, "loss": 0.2415, "step": 24595 }, { "epoch": 0.4275408924194754, "grad_norm": 2.991458814913885, "learning_rate": 6.397637525921945e-07, "loss": 0.4357, "step": 24596 }, { "epoch": 0.42755827495697823, "grad_norm": 1.9817869877198988, "learning_rate": 6.397367251316961e-07, "loss": 0.5387, "step": 24597 }, { "epoch": 0.42757565749448107, "grad_norm": 0.8959493060815801, "learning_rate": 6.397096972282965e-07, "loss": 0.201, "step": 24598 }, { "epoch": 0.4275930400319839, "grad_norm": 0.957271589792606, "learning_rate": 6.396826688820814e-07, "loss": 0.293, "step": 24599 }, { "epoch": 0.42761042256948667, "grad_norm": 1.5278102483610643, "learning_rate": 6.396556400931363e-07, "loss": 0.295, "step": 24600 }, { "epoch": 0.4276278051069895, "grad_norm": 2.8427450481789958, "learning_rate": 6.39628610861547e-07, "loss": 0.1822, "step": 24601 }, { "epoch": 0.42764518764449233, "grad_norm": 1.5700392265377354, "learning_rate": 6.39601581187399e-07, "loss": 0.3173, "step": 24602 }, { "epoch": 0.42766257018199516, "grad_norm": 1.1371026417210648, "learning_rate": 6.395745510707784e-07, "loss": 0.3332, "step": 24603 }, { "epoch": 0.427679952719498, "grad_norm": 2.3624695324298806, "learning_rate": 6.395475205117703e-07, "loss": 0.3481, "step": 24604 }, { "epoch": 0.4276973352570008, "grad_norm": 1.0042697714868545, "learning_rate": 6.395204895104609e-07, "loss": 0.4239, "step": 24605 }, { "epoch": 0.42771471779450365, "grad_norm": 1.2548381967590094, "learning_rate": 6.394934580669356e-07, "loss": 0.2073, "step": 24606 }, { "epoch": 0.4277321003320065, "grad_norm": 0.9973280067890518, "learning_rate": 6.3946642618128e-07, "loss": 0.2251, "step": 24607 }, { "epoch": 0.4277494828695093, "grad_norm": 1.6673990809961625, "learning_rate": 6.394393938535799e-07, "loss": 0.1992, "step": 24608 }, { "epoch": 0.42776686540701214, "grad_norm": 2.3416959710690537, "learning_rate": 6.394123610839211e-07, "loss": 0.2339, "step": 24609 }, { "epoch": 0.4277842479445149, "grad_norm": 1.762213110963132, "learning_rate": 6.393853278723891e-07, "loss": 0.2094, "step": 24610 }, { "epoch": 0.42780163048201775, "grad_norm": 2.2727934048995047, "learning_rate": 6.393582942190696e-07, "loss": 0.3916, "step": 24611 }, { "epoch": 0.4278190130195206, "grad_norm": 0.8934750977817079, "learning_rate": 6.393312601240484e-07, "loss": 0.2735, "step": 24612 }, { "epoch": 0.4278363955570234, "grad_norm": 2.9591900198112997, "learning_rate": 6.39304225587411e-07, "loss": 0.2976, "step": 24613 }, { "epoch": 0.42785377809452624, "grad_norm": 1.1639832295925183, "learning_rate": 6.392771906092431e-07, "loss": 0.3199, "step": 24614 }, { "epoch": 0.42787116063202907, "grad_norm": 1.724127779543848, "learning_rate": 6.392501551896309e-07, "loss": 0.3438, "step": 24615 }, { "epoch": 0.4278885431695319, "grad_norm": 1.418308411518823, "learning_rate": 6.392231193286592e-07, "loss": 0.2787, "step": 24616 }, { "epoch": 0.42790592570703473, "grad_norm": 1.2660691323994917, "learning_rate": 6.391960830264144e-07, "loss": 0.2178, "step": 24617 }, { "epoch": 0.42792330824453756, "grad_norm": 1.5028082328067025, "learning_rate": 6.391690462829821e-07, "loss": 0.2649, "step": 24618 }, { "epoch": 0.4279406907820404, "grad_norm": 1.3404768938330616, "learning_rate": 6.391420090984476e-07, "loss": 0.2623, "step": 24619 }, { "epoch": 0.42795807331954316, "grad_norm": 12.158252943639285, "learning_rate": 6.39114971472897e-07, "loss": 0.3034, "step": 24620 }, { "epoch": 0.427975455857046, "grad_norm": 1.0376460665660774, "learning_rate": 6.390879334064158e-07, "loss": 0.3293, "step": 24621 }, { "epoch": 0.4279928383945488, "grad_norm": 1.717003674354596, "learning_rate": 6.390608948990897e-07, "loss": 0.3115, "step": 24622 }, { "epoch": 0.42801022093205165, "grad_norm": 1.4871261571225458, "learning_rate": 6.390338559510045e-07, "loss": 0.3448, "step": 24623 }, { "epoch": 0.4280276034695545, "grad_norm": 1.7093774883389785, "learning_rate": 6.390068165622457e-07, "loss": 0.4078, "step": 24624 }, { "epoch": 0.4280449860070573, "grad_norm": 2.5993561662975053, "learning_rate": 6.389797767328992e-07, "loss": 0.3331, "step": 24625 }, { "epoch": 0.42806236854456015, "grad_norm": 2.0806982387569963, "learning_rate": 6.389527364630507e-07, "loss": 0.5339, "step": 24626 }, { "epoch": 0.428079751082063, "grad_norm": 2.004112399568438, "learning_rate": 6.38925695752786e-07, "loss": 0.2731, "step": 24627 }, { "epoch": 0.4280971336195658, "grad_norm": 1.2915033171914796, "learning_rate": 6.388986546021905e-07, "loss": 0.357, "step": 24628 }, { "epoch": 0.42811451615706864, "grad_norm": 3.034431292015236, "learning_rate": 6.388716130113501e-07, "loss": 0.3807, "step": 24629 }, { "epoch": 0.4281318986945714, "grad_norm": 1.1977601433421938, "learning_rate": 6.388445709803505e-07, "loss": 0.4014, "step": 24630 }, { "epoch": 0.42814928123207424, "grad_norm": 1.4370701750851882, "learning_rate": 6.388175285092773e-07, "loss": 0.4138, "step": 24631 }, { "epoch": 0.42816666376957707, "grad_norm": 1.2531354841526081, "learning_rate": 6.387904855982163e-07, "loss": 0.1888, "step": 24632 }, { "epoch": 0.4281840463070799, "grad_norm": 3.2561766716548655, "learning_rate": 6.387634422472532e-07, "loss": 0.318, "step": 24633 }, { "epoch": 0.42820142884458273, "grad_norm": 2.1567042356828523, "learning_rate": 6.38736398456474e-07, "loss": 0.3416, "step": 24634 }, { "epoch": 0.42821881138208556, "grad_norm": 1.3190329007556412, "learning_rate": 6.387093542259638e-07, "loss": 0.3281, "step": 24635 }, { "epoch": 0.4282361939195884, "grad_norm": 1.1092830814347214, "learning_rate": 6.38682309555809e-07, "loss": 0.2726, "step": 24636 }, { "epoch": 0.4282535764570912, "grad_norm": 1.2424499924899157, "learning_rate": 6.386552644460947e-07, "loss": 0.2729, "step": 24637 }, { "epoch": 0.42827095899459405, "grad_norm": 1.2299322017311218, "learning_rate": 6.386282188969071e-07, "loss": 0.2201, "step": 24638 }, { "epoch": 0.42828834153209683, "grad_norm": 1.7293344701274398, "learning_rate": 6.386011729083315e-07, "loss": 0.2774, "step": 24639 }, { "epoch": 0.42830572406959966, "grad_norm": 0.9797621235554159, "learning_rate": 6.385741264804542e-07, "loss": 0.1559, "step": 24640 }, { "epoch": 0.4283231066071025, "grad_norm": 1.7119578991865825, "learning_rate": 6.385470796133602e-07, "loss": 0.3573, "step": 24641 }, { "epoch": 0.4283404891446053, "grad_norm": 3.528774539108254, "learning_rate": 6.385200323071358e-07, "loss": 0.5822, "step": 24642 }, { "epoch": 0.42835787168210815, "grad_norm": 1.5764319495686163, "learning_rate": 6.384929845618665e-07, "loss": 0.1994, "step": 24643 }, { "epoch": 0.428375254219611, "grad_norm": 1.2062032795000739, "learning_rate": 6.384659363776381e-07, "loss": 0.2599, "step": 24644 }, { "epoch": 0.4283926367571138, "grad_norm": 1.7674863360150619, "learning_rate": 6.384388877545361e-07, "loss": 0.3719, "step": 24645 }, { "epoch": 0.42841001929461664, "grad_norm": 1.6608760307420976, "learning_rate": 6.384118386926467e-07, "loss": 0.1878, "step": 24646 }, { "epoch": 0.42842740183211947, "grad_norm": 1.570507481457133, "learning_rate": 6.383847891920551e-07, "loss": 0.2292, "step": 24647 }, { "epoch": 0.4284447843696223, "grad_norm": 1.6644798810848078, "learning_rate": 6.383577392528474e-07, "loss": 0.3154, "step": 24648 }, { "epoch": 0.4284621669071251, "grad_norm": 2.174953071893698, "learning_rate": 6.383306888751094e-07, "loss": 0.368, "step": 24649 }, { "epoch": 0.4284795494446279, "grad_norm": 0.9014515029971377, "learning_rate": 6.383036380589264e-07, "loss": 0.3614, "step": 24650 }, { "epoch": 0.42849693198213074, "grad_norm": 1.352948733874397, "learning_rate": 6.382765868043845e-07, "loss": 0.3546, "step": 24651 }, { "epoch": 0.42851431451963357, "grad_norm": 2.1510033397856123, "learning_rate": 6.382495351115692e-07, "loss": 0.3978, "step": 24652 }, { "epoch": 0.4285316970571364, "grad_norm": 1.8195386457524256, "learning_rate": 6.382224829805665e-07, "loss": 0.4409, "step": 24653 }, { "epoch": 0.4285490795946392, "grad_norm": 3.385762708032948, "learning_rate": 6.381954304114621e-07, "loss": 0.5342, "step": 24654 }, { "epoch": 0.42856646213214206, "grad_norm": 2.4337922488098576, "learning_rate": 6.381683774043415e-07, "loss": 0.3921, "step": 24655 }, { "epoch": 0.4285838446696449, "grad_norm": 1.628104052574595, "learning_rate": 6.381413239592908e-07, "loss": 0.2662, "step": 24656 }, { "epoch": 0.4286012272071477, "grad_norm": 2.2442458344994125, "learning_rate": 6.381142700763953e-07, "loss": 0.3464, "step": 24657 }, { "epoch": 0.42861860974465055, "grad_norm": 1.7795019201300109, "learning_rate": 6.380872157557413e-07, "loss": 0.1932, "step": 24658 }, { "epoch": 0.4286359922821533, "grad_norm": 2.01177800264193, "learning_rate": 6.380601609974141e-07, "loss": 0.1642, "step": 24659 }, { "epoch": 0.42865337481965615, "grad_norm": 1.0187076457124071, "learning_rate": 6.380331058014996e-07, "loss": 0.3128, "step": 24660 }, { "epoch": 0.428670757357159, "grad_norm": 1.8550705551139066, "learning_rate": 6.380060501680836e-07, "loss": 0.5119, "step": 24661 }, { "epoch": 0.4286881398946618, "grad_norm": 2.747860899194332, "learning_rate": 6.379789940972519e-07, "loss": 0.2799, "step": 24662 }, { "epoch": 0.42870552243216464, "grad_norm": 1.5433276132415228, "learning_rate": 6.3795193758909e-07, "loss": 0.3885, "step": 24663 }, { "epoch": 0.4287229049696675, "grad_norm": 1.2572349930778313, "learning_rate": 6.379248806436838e-07, "loss": 0.2924, "step": 24664 }, { "epoch": 0.4287402875071703, "grad_norm": 2.5460016683032434, "learning_rate": 6.378978232611194e-07, "loss": 0.2856, "step": 24665 }, { "epoch": 0.42875767004467313, "grad_norm": 4.101512257728745, "learning_rate": 6.37870765441482e-07, "loss": 0.3388, "step": 24666 }, { "epoch": 0.42877505258217596, "grad_norm": 1.2431124422991857, "learning_rate": 6.378437071848577e-07, "loss": 0.3337, "step": 24667 }, { "epoch": 0.4287924351196788, "grad_norm": 1.8761594771053722, "learning_rate": 6.378166484913321e-07, "loss": 0.3686, "step": 24668 }, { "epoch": 0.42880981765718157, "grad_norm": 1.3918344791394757, "learning_rate": 6.37789589360991e-07, "loss": 0.2616, "step": 24669 }, { "epoch": 0.4288272001946844, "grad_norm": 1.7366908462475796, "learning_rate": 6.377625297939203e-07, "loss": 0.2676, "step": 24670 }, { "epoch": 0.42884458273218723, "grad_norm": 1.612303639870587, "learning_rate": 6.377354697902057e-07, "loss": 0.2717, "step": 24671 }, { "epoch": 0.42886196526969006, "grad_norm": 1.5849205425051065, "learning_rate": 6.377084093499328e-07, "loss": 0.2168, "step": 24672 }, { "epoch": 0.4288793478071929, "grad_norm": 1.1579533469590677, "learning_rate": 6.376813484731875e-07, "loss": 0.3821, "step": 24673 }, { "epoch": 0.4288967303446957, "grad_norm": 1.8020771890286245, "learning_rate": 6.376542871600555e-07, "loss": 0.3065, "step": 24674 }, { "epoch": 0.42891411288219855, "grad_norm": 2.3509384072711157, "learning_rate": 6.376272254106229e-07, "loss": 0.3873, "step": 24675 }, { "epoch": 0.4289314954197014, "grad_norm": 2.563415800887792, "learning_rate": 6.37600163224975e-07, "loss": 0.2835, "step": 24676 }, { "epoch": 0.4289488779572042, "grad_norm": 1.8411355220613748, "learning_rate": 6.375731006031979e-07, "loss": 0.3842, "step": 24677 }, { "epoch": 0.42896626049470704, "grad_norm": 2.4016021529362335, "learning_rate": 6.375460375453772e-07, "loss": 0.2835, "step": 24678 }, { "epoch": 0.4289836430322098, "grad_norm": 2.935775368468537, "learning_rate": 6.375189740515989e-07, "loss": 0.3286, "step": 24679 }, { "epoch": 0.42900102556971265, "grad_norm": 1.897617932402734, "learning_rate": 6.374919101219484e-07, "loss": 0.369, "step": 24680 }, { "epoch": 0.4290184081072155, "grad_norm": 1.244548081864427, "learning_rate": 6.374648457565118e-07, "loss": 0.2834, "step": 24681 }, { "epoch": 0.4290357906447183, "grad_norm": 2.0210526205108796, "learning_rate": 6.374377809553748e-07, "loss": 0.2379, "step": 24682 }, { "epoch": 0.42905317318222114, "grad_norm": 1.397582513478188, "learning_rate": 6.374107157186231e-07, "loss": 0.4281, "step": 24683 }, { "epoch": 0.42907055571972397, "grad_norm": 1.4089214763195437, "learning_rate": 6.373836500463427e-07, "loss": 0.2036, "step": 24684 }, { "epoch": 0.4290879382572268, "grad_norm": 0.9039671404899462, "learning_rate": 6.37356583938619e-07, "loss": 0.2011, "step": 24685 }, { "epoch": 0.4291053207947296, "grad_norm": 2.4014614604844047, "learning_rate": 6.373295173955382e-07, "loss": 0.3139, "step": 24686 }, { "epoch": 0.42912270333223246, "grad_norm": 10.689898542803881, "learning_rate": 6.37302450417186e-07, "loss": 0.2349, "step": 24687 }, { "epoch": 0.4291400858697353, "grad_norm": 1.4474407641164866, "learning_rate": 6.372753830036479e-07, "loss": 0.3521, "step": 24688 }, { "epoch": 0.42915746840723806, "grad_norm": 1.2563228133848847, "learning_rate": 6.372483151550099e-07, "loss": 0.2361, "step": 24689 }, { "epoch": 0.4291748509447409, "grad_norm": 1.361258637257332, "learning_rate": 6.37221246871358e-07, "loss": 0.3114, "step": 24690 }, { "epoch": 0.4291922334822437, "grad_norm": 1.362278463413182, "learning_rate": 6.371941781527776e-07, "loss": 0.3449, "step": 24691 }, { "epoch": 0.42920961601974655, "grad_norm": 1.4112467126967403, "learning_rate": 6.371671089993548e-07, "loss": 0.359, "step": 24692 }, { "epoch": 0.4292269985572494, "grad_norm": 2.040840258092438, "learning_rate": 6.371400394111751e-07, "loss": 0.3329, "step": 24693 }, { "epoch": 0.4292443810947522, "grad_norm": 1.749848158245015, "learning_rate": 6.371129693883247e-07, "loss": 0.2581, "step": 24694 }, { "epoch": 0.42926176363225504, "grad_norm": 0.7021341443970716, "learning_rate": 6.370858989308889e-07, "loss": 0.1084, "step": 24695 }, { "epoch": 0.4292791461697579, "grad_norm": 2.0468201298285154, "learning_rate": 6.370588280389539e-07, "loss": 0.2726, "step": 24696 }, { "epoch": 0.4292965287072607, "grad_norm": 1.6224031177757934, "learning_rate": 6.370317567126055e-07, "loss": 0.2147, "step": 24697 }, { "epoch": 0.42931391124476354, "grad_norm": 1.5136778360465557, "learning_rate": 6.370046849519292e-07, "loss": 0.209, "step": 24698 }, { "epoch": 0.4293312937822663, "grad_norm": 2.214001015268133, "learning_rate": 6.369776127570111e-07, "loss": 0.3964, "step": 24699 }, { "epoch": 0.42934867631976914, "grad_norm": 1.3970996619548481, "learning_rate": 6.36950540127937e-07, "loss": 0.2275, "step": 24700 }, { "epoch": 0.42936605885727197, "grad_norm": 0.9626352468969279, "learning_rate": 6.369234670647923e-07, "loss": 0.229, "step": 24701 }, { "epoch": 0.4293834413947748, "grad_norm": 2.1647273034563863, "learning_rate": 6.368963935676633e-07, "loss": 0.5055, "step": 24702 }, { "epoch": 0.42940082393227763, "grad_norm": 1.3194839314065476, "learning_rate": 6.368693196366356e-07, "loss": 0.1664, "step": 24703 }, { "epoch": 0.42941820646978046, "grad_norm": 5.522645848459437, "learning_rate": 6.36842245271795e-07, "loss": 0.4078, "step": 24704 }, { "epoch": 0.4294355890072833, "grad_norm": 1.1574048444342555, "learning_rate": 6.368151704732273e-07, "loss": 0.2928, "step": 24705 }, { "epoch": 0.4294529715447861, "grad_norm": 1.2927235344166095, "learning_rate": 6.367880952410185e-07, "loss": 0.2151, "step": 24706 }, { "epoch": 0.42947035408228895, "grad_norm": 1.7011930984325812, "learning_rate": 6.367610195752541e-07, "loss": 0.3008, "step": 24707 }, { "epoch": 0.4294877366197918, "grad_norm": 1.6812453539485455, "learning_rate": 6.367339434760202e-07, "loss": 0.301, "step": 24708 }, { "epoch": 0.42950511915729456, "grad_norm": 1.5857467040224198, "learning_rate": 6.367068669434024e-07, "loss": 0.1929, "step": 24709 }, { "epoch": 0.4295225016947974, "grad_norm": 1.3020365754323469, "learning_rate": 6.366797899774868e-07, "loss": 0.2924, "step": 24710 }, { "epoch": 0.4295398842323002, "grad_norm": 1.924318464048845, "learning_rate": 6.366527125783588e-07, "loss": 0.2174, "step": 24711 }, { "epoch": 0.42955726676980305, "grad_norm": 1.1637769130050113, "learning_rate": 6.366256347461047e-07, "loss": 0.173, "step": 24712 }, { "epoch": 0.4295746493073059, "grad_norm": 2.4177803884788567, "learning_rate": 6.3659855648081e-07, "loss": 0.2311, "step": 24713 }, { "epoch": 0.4295920318448087, "grad_norm": 1.2947261661202436, "learning_rate": 6.365714777825607e-07, "loss": 0.2058, "step": 24714 }, { "epoch": 0.42960941438231154, "grad_norm": 2.090682283882141, "learning_rate": 6.365443986514425e-07, "loss": 0.2529, "step": 24715 }, { "epoch": 0.42962679691981437, "grad_norm": 3.733149545366319, "learning_rate": 6.365173190875413e-07, "loss": 0.3057, "step": 24716 }, { "epoch": 0.4296441794573172, "grad_norm": 1.4060469714923785, "learning_rate": 6.364902390909429e-07, "loss": 0.6046, "step": 24717 }, { "epoch": 0.42966156199482003, "grad_norm": 1.4414970372088955, "learning_rate": 6.364631586617331e-07, "loss": 0.4539, "step": 24718 }, { "epoch": 0.4296789445323228, "grad_norm": 1.8434626932073819, "learning_rate": 6.364360777999979e-07, "loss": 0.2103, "step": 24719 }, { "epoch": 0.42969632706982563, "grad_norm": 1.5366971852290723, "learning_rate": 6.364089965058229e-07, "loss": 0.3049, "step": 24720 }, { "epoch": 0.42971370960732846, "grad_norm": 2.1603685566529616, "learning_rate": 6.363819147792939e-07, "loss": 0.1813, "step": 24721 }, { "epoch": 0.4297310921448313, "grad_norm": 1.0755539266250498, "learning_rate": 6.363548326204971e-07, "loss": 0.2488, "step": 24722 }, { "epoch": 0.4297484746823341, "grad_norm": 1.8725416672138164, "learning_rate": 6.363277500295181e-07, "loss": 0.1819, "step": 24723 }, { "epoch": 0.42976585721983696, "grad_norm": 2.0221243202089827, "learning_rate": 6.363006670064426e-07, "loss": 0.4935, "step": 24724 }, { "epoch": 0.4297832397573398, "grad_norm": 1.8870093208898384, "learning_rate": 6.362735835513569e-07, "loss": 0.2742, "step": 24725 }, { "epoch": 0.4298006222948426, "grad_norm": 2.7324643734198832, "learning_rate": 6.362464996643462e-07, "loss": 0.455, "step": 24726 }, { "epoch": 0.42981800483234545, "grad_norm": 2.4434911606523486, "learning_rate": 6.362194153454969e-07, "loss": 0.3027, "step": 24727 }, { "epoch": 0.4298353873698483, "grad_norm": 2.0615576022677193, "learning_rate": 6.361923305948945e-07, "loss": 0.2616, "step": 24728 }, { "epoch": 0.42985276990735105, "grad_norm": 2.2247549900916774, "learning_rate": 6.361652454126251e-07, "loss": 0.3567, "step": 24729 }, { "epoch": 0.4298701524448539, "grad_norm": 2.2293368178268476, "learning_rate": 6.361381597987743e-07, "loss": 0.4506, "step": 24730 }, { "epoch": 0.4298875349823567, "grad_norm": 1.0673350984430343, "learning_rate": 6.361110737534282e-07, "loss": 0.2145, "step": 24731 }, { "epoch": 0.42990491751985954, "grad_norm": 1.3749645405163937, "learning_rate": 6.360839872766724e-07, "loss": 0.2378, "step": 24732 }, { "epoch": 0.42992230005736237, "grad_norm": 1.1014814008300116, "learning_rate": 6.360569003685928e-07, "loss": 0.2723, "step": 24733 }, { "epoch": 0.4299396825948652, "grad_norm": 1.0488256194756036, "learning_rate": 6.360298130292754e-07, "loss": 0.3218, "step": 24734 }, { "epoch": 0.42995706513236803, "grad_norm": 1.5631318651159485, "learning_rate": 6.360027252588061e-07, "loss": 0.1985, "step": 24735 }, { "epoch": 0.42997444766987086, "grad_norm": 1.7704340477603981, "learning_rate": 6.359756370572705e-07, "loss": 0.3951, "step": 24736 }, { "epoch": 0.4299918302073737, "grad_norm": 2.4377529221221708, "learning_rate": 6.359485484247547e-07, "loss": 0.2536, "step": 24737 }, { "epoch": 0.4300092127448765, "grad_norm": 1.7030936538064305, "learning_rate": 6.359214593613444e-07, "loss": 0.2524, "step": 24738 }, { "epoch": 0.4300265952823793, "grad_norm": 1.3723914205608612, "learning_rate": 6.358943698671256e-07, "loss": 0.2135, "step": 24739 }, { "epoch": 0.43004397781988213, "grad_norm": 0.9150887751012929, "learning_rate": 6.358672799421838e-07, "loss": 0.3532, "step": 24740 }, { "epoch": 0.43006136035738496, "grad_norm": 1.866130701546492, "learning_rate": 6.358401895866054e-07, "loss": 0.2218, "step": 24741 }, { "epoch": 0.4300787428948878, "grad_norm": 1.9009545064720834, "learning_rate": 6.358130988004759e-07, "loss": 0.31, "step": 24742 }, { "epoch": 0.4300961254323906, "grad_norm": 1.7141386600808357, "learning_rate": 6.357860075838813e-07, "loss": 0.4007, "step": 24743 }, { "epoch": 0.43011350796989345, "grad_norm": 1.5399675547853755, "learning_rate": 6.357589159369074e-07, "loss": 0.2692, "step": 24744 }, { "epoch": 0.4301308905073963, "grad_norm": 1.6839837823520005, "learning_rate": 6.357318238596401e-07, "loss": 0.25, "step": 24745 }, { "epoch": 0.4301482730448991, "grad_norm": 1.186819550610519, "learning_rate": 6.357047313521653e-07, "loss": 0.2064, "step": 24746 }, { "epoch": 0.43016565558240194, "grad_norm": 2.0276634397986633, "learning_rate": 6.356776384145688e-07, "loss": 0.284, "step": 24747 }, { "epoch": 0.43018303811990477, "grad_norm": 1.2421617188594911, "learning_rate": 6.356505450469365e-07, "loss": 0.3219, "step": 24748 }, { "epoch": 0.43020042065740755, "grad_norm": 2.3239102296393126, "learning_rate": 6.356234512493543e-07, "loss": 0.3299, "step": 24749 }, { "epoch": 0.4302178031949104, "grad_norm": 2.173398594338154, "learning_rate": 6.355963570219082e-07, "loss": 0.2471, "step": 24750 }, { "epoch": 0.4302351857324132, "grad_norm": 1.334391087104435, "learning_rate": 6.355692623646837e-07, "loss": 0.2209, "step": 24751 }, { "epoch": 0.43025256826991604, "grad_norm": 1.7380590849244593, "learning_rate": 6.355421672777669e-07, "loss": 0.311, "step": 24752 }, { "epoch": 0.43026995080741887, "grad_norm": 3.3127777416134196, "learning_rate": 6.355150717612439e-07, "loss": 0.3944, "step": 24753 }, { "epoch": 0.4302873333449217, "grad_norm": 1.2091773815075808, "learning_rate": 6.354879758152003e-07, "loss": 0.2755, "step": 24754 }, { "epoch": 0.4303047158824245, "grad_norm": 3.294260293611614, "learning_rate": 6.354608794397221e-07, "loss": 0.3304, "step": 24755 }, { "epoch": 0.43032209841992736, "grad_norm": 1.630496443757035, "learning_rate": 6.35433782634895e-07, "loss": 0.2904, "step": 24756 }, { "epoch": 0.4303394809574302, "grad_norm": 2.250655072101635, "learning_rate": 6.354066854008051e-07, "loss": 0.2831, "step": 24757 }, { "epoch": 0.430356863494933, "grad_norm": 2.5286489088996005, "learning_rate": 6.353795877375383e-07, "loss": 0.3651, "step": 24758 }, { "epoch": 0.4303742460324358, "grad_norm": 1.3779870058440185, "learning_rate": 6.353524896451801e-07, "loss": 0.2913, "step": 24759 }, { "epoch": 0.4303916285699386, "grad_norm": 2.2887611912517047, "learning_rate": 6.353253911238171e-07, "loss": 0.2529, "step": 24760 }, { "epoch": 0.43040901110744145, "grad_norm": 1.3469281138727927, "learning_rate": 6.352982921735345e-07, "loss": 0.5607, "step": 24761 }, { "epoch": 0.4304263936449443, "grad_norm": 1.085805178769465, "learning_rate": 6.352711927944185e-07, "loss": 0.2679, "step": 24762 }, { "epoch": 0.4304437761824471, "grad_norm": 1.6257969798345067, "learning_rate": 6.35244092986555e-07, "loss": 0.2767, "step": 24763 }, { "epoch": 0.43046115871994994, "grad_norm": 2.0301400493646167, "learning_rate": 6.352169927500299e-07, "loss": 0.251, "step": 24764 }, { "epoch": 0.4304785412574528, "grad_norm": 1.9578186906787658, "learning_rate": 6.351898920849288e-07, "loss": 0.3868, "step": 24765 }, { "epoch": 0.4304959237949556, "grad_norm": 3.250292222604848, "learning_rate": 6.351627909913382e-07, "loss": 0.4425, "step": 24766 }, { "epoch": 0.43051330633245843, "grad_norm": 1.2205134546174399, "learning_rate": 6.351356894693433e-07, "loss": 0.3877, "step": 24767 }, { "epoch": 0.43053068886996126, "grad_norm": 1.6550073550841202, "learning_rate": 6.351085875190306e-07, "loss": 0.1794, "step": 24768 }, { "epoch": 0.43054807140746404, "grad_norm": 1.1681659656671144, "learning_rate": 6.350814851404856e-07, "loss": 0.2233, "step": 24769 }, { "epoch": 0.43056545394496687, "grad_norm": 1.0889559046152972, "learning_rate": 6.350543823337945e-07, "loss": 0.4997, "step": 24770 }, { "epoch": 0.4305828364824697, "grad_norm": 1.5850585920083855, "learning_rate": 6.350272790990428e-07, "loss": 0.3999, "step": 24771 }, { "epoch": 0.43060021901997253, "grad_norm": 1.9120394084909416, "learning_rate": 6.350001754363169e-07, "loss": 0.3703, "step": 24772 }, { "epoch": 0.43061760155747536, "grad_norm": 1.7183191690245005, "learning_rate": 6.349730713457023e-07, "loss": 0.2619, "step": 24773 }, { "epoch": 0.4306349840949782, "grad_norm": 2.6921622799893217, "learning_rate": 6.349459668272852e-07, "loss": 0.2432, "step": 24774 }, { "epoch": 0.430652366632481, "grad_norm": 1.8919365317304027, "learning_rate": 6.349188618811512e-07, "loss": 0.2776, "step": 24775 }, { "epoch": 0.43066974916998385, "grad_norm": 2.678087305809909, "learning_rate": 6.348917565073865e-07, "loss": 0.4114, "step": 24776 }, { "epoch": 0.4306871317074867, "grad_norm": 1.8695084487443598, "learning_rate": 6.348646507060769e-07, "loss": 0.2876, "step": 24777 }, { "epoch": 0.43070451424498946, "grad_norm": 2.1530207294439303, "learning_rate": 6.348375444773082e-07, "loss": 0.2791, "step": 24778 }, { "epoch": 0.4307218967824923, "grad_norm": 2.7554522256970158, "learning_rate": 6.348104378211665e-07, "loss": 0.2242, "step": 24779 }, { "epoch": 0.4307392793199951, "grad_norm": 1.4028980070479729, "learning_rate": 6.347833307377376e-07, "loss": 0.2041, "step": 24780 }, { "epoch": 0.43075666185749795, "grad_norm": 1.88062934436485, "learning_rate": 6.347562232271075e-07, "loss": 0.3486, "step": 24781 }, { "epoch": 0.4307740443950008, "grad_norm": 1.4305264844319672, "learning_rate": 6.347291152893621e-07, "loss": 0.4231, "step": 24782 }, { "epoch": 0.4307914269325036, "grad_norm": 1.7701064847399042, "learning_rate": 6.347020069245873e-07, "loss": 0.4575, "step": 24783 }, { "epoch": 0.43080880947000644, "grad_norm": 2.3476772279296263, "learning_rate": 6.346748981328689e-07, "loss": 0.2855, "step": 24784 }, { "epoch": 0.43082619200750927, "grad_norm": 2.937874724871622, "learning_rate": 6.346477889142931e-07, "loss": 0.3791, "step": 24785 }, { "epoch": 0.4308435745450121, "grad_norm": 1.363403618041421, "learning_rate": 6.346206792689455e-07, "loss": 0.2997, "step": 24786 }, { "epoch": 0.43086095708251493, "grad_norm": 2.0878897406057897, "learning_rate": 6.345935691969123e-07, "loss": 0.2999, "step": 24787 }, { "epoch": 0.4308783396200177, "grad_norm": 2.1633276560252663, "learning_rate": 6.345664586982793e-07, "loss": 0.4282, "step": 24788 }, { "epoch": 0.43089572215752053, "grad_norm": 1.2611354518631586, "learning_rate": 6.345393477731324e-07, "loss": 0.38, "step": 24789 }, { "epoch": 0.43091310469502336, "grad_norm": 2.394391444262467, "learning_rate": 6.345122364215577e-07, "loss": 0.286, "step": 24790 }, { "epoch": 0.4309304872325262, "grad_norm": 1.6741318686146314, "learning_rate": 6.344851246436409e-07, "loss": 0.2613, "step": 24791 }, { "epoch": 0.430947869770029, "grad_norm": 1.885954890253172, "learning_rate": 6.34458012439468e-07, "loss": 0.2844, "step": 24792 }, { "epoch": 0.43096525230753185, "grad_norm": 2.3591247377592395, "learning_rate": 6.344308998091249e-07, "loss": 0.4154, "step": 24793 }, { "epoch": 0.4309826348450347, "grad_norm": 2.0795162038166346, "learning_rate": 6.344037867526977e-07, "loss": 0.507, "step": 24794 }, { "epoch": 0.4310000173825375, "grad_norm": 1.4941950765960899, "learning_rate": 6.343766732702722e-07, "loss": 0.2754, "step": 24795 }, { "epoch": 0.43101739992004034, "grad_norm": 1.4582627273570197, "learning_rate": 6.343495593619343e-07, "loss": 0.3705, "step": 24796 }, { "epoch": 0.4310347824575432, "grad_norm": 4.485940329359727, "learning_rate": 6.343224450277702e-07, "loss": 0.2706, "step": 24797 }, { "epoch": 0.43105216499504595, "grad_norm": 1.6461985425349361, "learning_rate": 6.342953302678655e-07, "loss": 0.3213, "step": 24798 }, { "epoch": 0.4310695475325488, "grad_norm": 2.9184099797889886, "learning_rate": 6.342682150823064e-07, "loss": 0.3143, "step": 24799 }, { "epoch": 0.4310869300700516, "grad_norm": 1.525238794639728, "learning_rate": 6.342410994711786e-07, "loss": 0.4328, "step": 24800 }, { "epoch": 0.43110431260755444, "grad_norm": 1.944564608223499, "learning_rate": 6.342139834345684e-07, "loss": 0.3112, "step": 24801 }, { "epoch": 0.43112169514505727, "grad_norm": 1.2900297042025197, "learning_rate": 6.341868669725614e-07, "loss": 0.2175, "step": 24802 }, { "epoch": 0.4311390776825601, "grad_norm": 1.6582005052915583, "learning_rate": 6.341597500852436e-07, "loss": 0.2695, "step": 24803 }, { "epoch": 0.43115646022006293, "grad_norm": 1.582609078529135, "learning_rate": 6.341326327727011e-07, "loss": 0.1964, "step": 24804 }, { "epoch": 0.43117384275756576, "grad_norm": 1.755024895160546, "learning_rate": 6.341055150350197e-07, "loss": 0.2317, "step": 24805 }, { "epoch": 0.4311912252950686, "grad_norm": 1.5940279532225186, "learning_rate": 6.340783968722856e-07, "loss": 0.3529, "step": 24806 }, { "epoch": 0.4312086078325714, "grad_norm": 1.5750357861605997, "learning_rate": 6.340512782845844e-07, "loss": 0.2744, "step": 24807 }, { "epoch": 0.4312259903700742, "grad_norm": 1.229437948605978, "learning_rate": 6.340241592720022e-07, "loss": 0.4069, "step": 24808 }, { "epoch": 0.431243372907577, "grad_norm": 2.3314731300224514, "learning_rate": 6.33997039834625e-07, "loss": 0.3824, "step": 24809 }, { "epoch": 0.43126075544507986, "grad_norm": 1.4275711214146771, "learning_rate": 6.33969919972539e-07, "loss": 0.314, "step": 24810 }, { "epoch": 0.4312781379825827, "grad_norm": 2.5892894764178815, "learning_rate": 6.339427996858296e-07, "loss": 0.5055, "step": 24811 }, { "epoch": 0.4312955205200855, "grad_norm": 1.1572534505442515, "learning_rate": 6.339156789745832e-07, "loss": 0.2158, "step": 24812 }, { "epoch": 0.43131290305758835, "grad_norm": 1.7647468719635286, "learning_rate": 6.338885578388854e-07, "loss": 0.2285, "step": 24813 }, { "epoch": 0.4313302855950912, "grad_norm": 1.8801151151861804, "learning_rate": 6.338614362788226e-07, "loss": 0.4034, "step": 24814 }, { "epoch": 0.431347668132594, "grad_norm": 1.0847657673329067, "learning_rate": 6.338343142944805e-07, "loss": 0.2206, "step": 24815 }, { "epoch": 0.43136505067009684, "grad_norm": 1.8532597884121933, "learning_rate": 6.33807191885945e-07, "loss": 0.2368, "step": 24816 }, { "epoch": 0.43138243320759967, "grad_norm": 1.3073287585134716, "learning_rate": 6.337800690533023e-07, "loss": 0.2947, "step": 24817 }, { "epoch": 0.43139981574510244, "grad_norm": 1.3346425657280079, "learning_rate": 6.337529457966381e-07, "loss": 0.262, "step": 24818 }, { "epoch": 0.4314171982826053, "grad_norm": 1.7637313742244456, "learning_rate": 6.337258221160386e-07, "loss": 0.2485, "step": 24819 }, { "epoch": 0.4314345808201081, "grad_norm": 1.761945171322367, "learning_rate": 6.336986980115897e-07, "loss": 0.4723, "step": 24820 }, { "epoch": 0.43145196335761093, "grad_norm": 1.4153325830933023, "learning_rate": 6.336715734833773e-07, "loss": 0.4017, "step": 24821 }, { "epoch": 0.43146934589511377, "grad_norm": 1.5705001950063973, "learning_rate": 6.336444485314874e-07, "loss": 0.2767, "step": 24822 }, { "epoch": 0.4314867284326166, "grad_norm": 2.5311003530027603, "learning_rate": 6.33617323156006e-07, "loss": 0.4548, "step": 24823 }, { "epoch": 0.4315041109701194, "grad_norm": 1.031775953297245, "learning_rate": 6.335901973570191e-07, "loss": 0.3882, "step": 24824 }, { "epoch": 0.43152149350762226, "grad_norm": 1.3226353632242793, "learning_rate": 6.335630711346125e-07, "loss": 0.2663, "step": 24825 }, { "epoch": 0.4315388760451251, "grad_norm": 1.5881067786815226, "learning_rate": 6.335359444888724e-07, "loss": 0.3062, "step": 24826 }, { "epoch": 0.4315562585826279, "grad_norm": 1.1385101288948283, "learning_rate": 6.335088174198848e-07, "loss": 0.2864, "step": 24827 }, { "epoch": 0.4315736411201307, "grad_norm": 1.7350327268573917, "learning_rate": 6.334816899277355e-07, "loss": 0.2797, "step": 24828 }, { "epoch": 0.4315910236576335, "grad_norm": 2.2965459895744065, "learning_rate": 6.334545620125105e-07, "loss": 0.3563, "step": 24829 }, { "epoch": 0.43160840619513635, "grad_norm": 1.454432916506509, "learning_rate": 6.334274336742959e-07, "loss": 0.2236, "step": 24830 }, { "epoch": 0.4316257887326392, "grad_norm": 1.5291353300423935, "learning_rate": 6.334003049131775e-07, "loss": 0.3089, "step": 24831 }, { "epoch": 0.431643171270142, "grad_norm": 1.3132004895863447, "learning_rate": 6.333731757292416e-07, "loss": 0.2031, "step": 24832 }, { "epoch": 0.43166055380764484, "grad_norm": 2.9651547812778567, "learning_rate": 6.33346046122574e-07, "loss": 0.3159, "step": 24833 }, { "epoch": 0.4316779363451477, "grad_norm": 1.8858811620032059, "learning_rate": 6.333189160932606e-07, "loss": 0.2124, "step": 24834 }, { "epoch": 0.4316953188826505, "grad_norm": 1.4530399288048668, "learning_rate": 6.332917856413875e-07, "loss": 0.346, "step": 24835 }, { "epoch": 0.43171270142015333, "grad_norm": 2.104204278771513, "learning_rate": 6.332646547670406e-07, "loss": 0.3802, "step": 24836 }, { "epoch": 0.43173008395765616, "grad_norm": 1.69397779801848, "learning_rate": 6.332375234703061e-07, "loss": 0.3952, "step": 24837 }, { "epoch": 0.43174746649515894, "grad_norm": 1.222889028688466, "learning_rate": 6.332103917512697e-07, "loss": 0.4201, "step": 24838 }, { "epoch": 0.43176484903266177, "grad_norm": 1.4670531833617482, "learning_rate": 6.331832596100177e-07, "loss": 0.2945, "step": 24839 }, { "epoch": 0.4317822315701646, "grad_norm": 1.8072608974845392, "learning_rate": 6.331561270466359e-07, "loss": 0.3478, "step": 24840 }, { "epoch": 0.43179961410766743, "grad_norm": 1.3687108544542266, "learning_rate": 6.331289940612104e-07, "loss": 0.3769, "step": 24841 }, { "epoch": 0.43181699664517026, "grad_norm": 1.7341500711407596, "learning_rate": 6.33101860653827e-07, "loss": 0.2954, "step": 24842 }, { "epoch": 0.4318343791826731, "grad_norm": 2.0318205001317433, "learning_rate": 6.33074726824572e-07, "loss": 0.3265, "step": 24843 }, { "epoch": 0.4318517617201759, "grad_norm": 2.0558374995270725, "learning_rate": 6.330475925735311e-07, "loss": 0.2912, "step": 24844 }, { "epoch": 0.43186914425767875, "grad_norm": 1.8077456070136848, "learning_rate": 6.330204579007905e-07, "loss": 0.3327, "step": 24845 }, { "epoch": 0.4318865267951816, "grad_norm": 1.1320623441930437, "learning_rate": 6.329933228064362e-07, "loss": 0.4105, "step": 24846 }, { "epoch": 0.4319039093326844, "grad_norm": 1.8701767777320046, "learning_rate": 6.329661872905541e-07, "loss": 0.2534, "step": 24847 }, { "epoch": 0.4319212918701872, "grad_norm": 1.8292655103403441, "learning_rate": 6.329390513532302e-07, "loss": 0.2607, "step": 24848 }, { "epoch": 0.43193867440769, "grad_norm": 2.486386543152556, "learning_rate": 6.329119149945508e-07, "loss": 0.2649, "step": 24849 }, { "epoch": 0.43195605694519285, "grad_norm": 2.69416854075226, "learning_rate": 6.328847782146014e-07, "loss": 0.473, "step": 24850 }, { "epoch": 0.4319734394826957, "grad_norm": 1.582472786349594, "learning_rate": 6.328576410134685e-07, "loss": 0.3438, "step": 24851 }, { "epoch": 0.4319908220201985, "grad_norm": 1.26578972430804, "learning_rate": 6.328305033912377e-07, "loss": 0.2996, "step": 24852 }, { "epoch": 0.43200820455770134, "grad_norm": 1.2776226164853268, "learning_rate": 6.328033653479955e-07, "loss": 0.3709, "step": 24853 }, { "epoch": 0.43202558709520417, "grad_norm": 1.4490741247628829, "learning_rate": 6.327762268838275e-07, "loss": 0.4372, "step": 24854 }, { "epoch": 0.432042969632707, "grad_norm": 1.6034490125167324, "learning_rate": 6.327490879988198e-07, "loss": 0.3276, "step": 24855 }, { "epoch": 0.4320603521702098, "grad_norm": 2.5275222217142552, "learning_rate": 6.327219486930587e-07, "loss": 0.3235, "step": 24856 }, { "epoch": 0.43207773470771266, "grad_norm": 1.336724001133552, "learning_rate": 6.326948089666298e-07, "loss": 0.3061, "step": 24857 }, { "epoch": 0.43209511724521543, "grad_norm": 2.898013421267752, "learning_rate": 6.326676688196193e-07, "loss": 0.2689, "step": 24858 }, { "epoch": 0.43211249978271826, "grad_norm": 1.7098270849296668, "learning_rate": 6.326405282521133e-07, "loss": 0.2093, "step": 24859 }, { "epoch": 0.4321298823202211, "grad_norm": 1.4781457027832028, "learning_rate": 6.326133872641976e-07, "loss": 0.2875, "step": 24860 }, { "epoch": 0.4321472648577239, "grad_norm": 1.7233754717539842, "learning_rate": 6.325862458559584e-07, "loss": 0.232, "step": 24861 }, { "epoch": 0.43216464739522675, "grad_norm": 1.398011857098433, "learning_rate": 6.32559104027482e-07, "loss": 0.1926, "step": 24862 }, { "epoch": 0.4321820299327296, "grad_norm": 1.2050929324603616, "learning_rate": 6.325319617788539e-07, "loss": 0.1589, "step": 24863 }, { "epoch": 0.4321994124702324, "grad_norm": 0.915189619946028, "learning_rate": 6.325048191101604e-07, "loss": 0.2002, "step": 24864 }, { "epoch": 0.43221679500773524, "grad_norm": 1.7804007140893645, "learning_rate": 6.324776760214874e-07, "loss": 0.2066, "step": 24865 }, { "epoch": 0.4322341775452381, "grad_norm": 1.6959947054028244, "learning_rate": 6.324505325129211e-07, "loss": 0.4182, "step": 24866 }, { "epoch": 0.4322515600827409, "grad_norm": 1.2028010754621297, "learning_rate": 6.324233885845475e-07, "loss": 0.4162, "step": 24867 }, { "epoch": 0.4322689426202437, "grad_norm": 1.3133323242698096, "learning_rate": 6.323962442364525e-07, "loss": 0.3961, "step": 24868 }, { "epoch": 0.4322863251577465, "grad_norm": 1.3917053286189764, "learning_rate": 6.323690994687223e-07, "loss": 0.3152, "step": 24869 }, { "epoch": 0.43230370769524934, "grad_norm": 1.8438165274606886, "learning_rate": 6.323419542814429e-07, "loss": 0.3635, "step": 24870 }, { "epoch": 0.43232109023275217, "grad_norm": 2.612926605323628, "learning_rate": 6.323148086747001e-07, "loss": 0.5895, "step": 24871 }, { "epoch": 0.432338472770255, "grad_norm": 1.6893820093777057, "learning_rate": 6.322876626485803e-07, "loss": 0.259, "step": 24872 }, { "epoch": 0.43235585530775783, "grad_norm": 2.4645485185953464, "learning_rate": 6.322605162031693e-07, "loss": 0.3109, "step": 24873 }, { "epoch": 0.43237323784526066, "grad_norm": 2.225693304621403, "learning_rate": 6.322333693385534e-07, "loss": 0.454, "step": 24874 }, { "epoch": 0.4323906203827635, "grad_norm": 1.3828877820462204, "learning_rate": 6.322062220548184e-07, "loss": 0.2126, "step": 24875 }, { "epoch": 0.4324080029202663, "grad_norm": 1.1545275177465524, "learning_rate": 6.321790743520501e-07, "loss": 0.2641, "step": 24876 }, { "epoch": 0.43242538545776915, "grad_norm": 1.5289105696035972, "learning_rate": 6.321519262303352e-07, "loss": 0.2832, "step": 24877 }, { "epoch": 0.4324427679952719, "grad_norm": 1.1810083445202775, "learning_rate": 6.321247776897594e-07, "loss": 0.2717, "step": 24878 }, { "epoch": 0.43246015053277476, "grad_norm": 0.8741244359418202, "learning_rate": 6.320976287304085e-07, "loss": 0.3106, "step": 24879 }, { "epoch": 0.4324775330702776, "grad_norm": 1.860119298102825, "learning_rate": 6.32070479352369e-07, "loss": 0.2974, "step": 24880 }, { "epoch": 0.4324949156077804, "grad_norm": 1.0391241309861288, "learning_rate": 6.320433295557266e-07, "loss": 0.2385, "step": 24881 }, { "epoch": 0.43251229814528325, "grad_norm": 2.4009464859282788, "learning_rate": 6.320161793405676e-07, "loss": 0.4712, "step": 24882 }, { "epoch": 0.4325296806827861, "grad_norm": 1.4940241805902408, "learning_rate": 6.31989028706978e-07, "loss": 0.368, "step": 24883 }, { "epoch": 0.4325470632202889, "grad_norm": 1.5415316309677256, "learning_rate": 6.319618776550439e-07, "loss": 0.2516, "step": 24884 }, { "epoch": 0.43256444575779174, "grad_norm": 1.9620733062448938, "learning_rate": 6.319347261848509e-07, "loss": 0.3516, "step": 24885 }, { "epoch": 0.43258182829529457, "grad_norm": 1.5708059128401561, "learning_rate": 6.319075742964857e-07, "loss": 0.4893, "step": 24886 }, { "epoch": 0.4325992108327974, "grad_norm": 1.2543987188196242, "learning_rate": 6.31880421990034e-07, "loss": 0.2585, "step": 24887 }, { "epoch": 0.4326165933703002, "grad_norm": 2.991343267468824, "learning_rate": 6.31853269265582e-07, "loss": 0.2872, "step": 24888 }, { "epoch": 0.432633975907803, "grad_norm": 1.8780587594589877, "learning_rate": 6.318261161232157e-07, "loss": 0.2377, "step": 24889 }, { "epoch": 0.43265135844530583, "grad_norm": 1.8779477622297756, "learning_rate": 6.31798962563021e-07, "loss": 0.3143, "step": 24890 }, { "epoch": 0.43266874098280866, "grad_norm": 1.152557487163517, "learning_rate": 6.317718085850842e-07, "loss": 0.2585, "step": 24891 }, { "epoch": 0.4326861235203115, "grad_norm": 2.2394183368409553, "learning_rate": 6.317446541894916e-07, "loss": 0.2168, "step": 24892 }, { "epoch": 0.4327035060578143, "grad_norm": 1.9445718021005272, "learning_rate": 6.317174993763287e-07, "loss": 0.2885, "step": 24893 }, { "epoch": 0.43272088859531715, "grad_norm": 2.7737054018256675, "learning_rate": 6.316903441456818e-07, "loss": 0.4851, "step": 24894 }, { "epoch": 0.43273827113282, "grad_norm": 4.139654483965299, "learning_rate": 6.316631884976372e-07, "loss": 0.2893, "step": 24895 }, { "epoch": 0.4327556536703228, "grad_norm": 1.608457685679893, "learning_rate": 6.316360324322805e-07, "loss": 0.2729, "step": 24896 }, { "epoch": 0.43277303620782565, "grad_norm": 1.9640443431495667, "learning_rate": 6.316088759496982e-07, "loss": 0.251, "step": 24897 }, { "epoch": 0.4327904187453284, "grad_norm": 2.3831824648942748, "learning_rate": 6.315817190499762e-07, "loss": 0.2816, "step": 24898 }, { "epoch": 0.43280780128283125, "grad_norm": 2.1134613947629837, "learning_rate": 6.315545617332007e-07, "loss": 0.3121, "step": 24899 }, { "epoch": 0.4328251838203341, "grad_norm": 1.6674548935758802, "learning_rate": 6.315274039994575e-07, "loss": 0.2088, "step": 24900 }, { "epoch": 0.4328425663578369, "grad_norm": 1.579049173620045, "learning_rate": 6.315002458488328e-07, "loss": 0.3781, "step": 24901 }, { "epoch": 0.43285994889533974, "grad_norm": 1.6855607421531118, "learning_rate": 6.314730872814129e-07, "loss": 0.2641, "step": 24902 }, { "epoch": 0.43287733143284257, "grad_norm": 1.3871555349309912, "learning_rate": 6.314459282972837e-07, "loss": 0.2799, "step": 24903 }, { "epoch": 0.4328947139703454, "grad_norm": 1.2849425713932965, "learning_rate": 6.314187688965312e-07, "loss": 0.3826, "step": 24904 }, { "epoch": 0.43291209650784823, "grad_norm": 2.233022909083884, "learning_rate": 6.313916090792417e-07, "loss": 0.2519, "step": 24905 }, { "epoch": 0.43292947904535106, "grad_norm": 2.2805347625549985, "learning_rate": 6.31364448845501e-07, "loss": 0.2709, "step": 24906 }, { "epoch": 0.4329468615828539, "grad_norm": 2.560071807808518, "learning_rate": 6.313372881953954e-07, "loss": 0.3318, "step": 24907 }, { "epoch": 0.43296424412035667, "grad_norm": 1.834365224524014, "learning_rate": 6.313101271290109e-07, "loss": 0.346, "step": 24908 }, { "epoch": 0.4329816266578595, "grad_norm": 1.8056146410291154, "learning_rate": 6.312829656464338e-07, "loss": 0.3622, "step": 24909 }, { "epoch": 0.4329990091953623, "grad_norm": 1.5322260659371338, "learning_rate": 6.312558037477498e-07, "loss": 0.3396, "step": 24910 }, { "epoch": 0.43301639173286516, "grad_norm": 1.5176252608568335, "learning_rate": 6.312286414330453e-07, "loss": 0.3661, "step": 24911 }, { "epoch": 0.433033774270368, "grad_norm": 0.9636589525021947, "learning_rate": 6.312014787024063e-07, "loss": 0.1751, "step": 24912 }, { "epoch": 0.4330511568078708, "grad_norm": 1.4759524380637126, "learning_rate": 6.311743155559188e-07, "loss": 0.2577, "step": 24913 }, { "epoch": 0.43306853934537365, "grad_norm": 1.9071073400741607, "learning_rate": 6.311471519936689e-07, "loss": 0.4439, "step": 24914 }, { "epoch": 0.4330859218828765, "grad_norm": 1.5831120580790832, "learning_rate": 6.311199880157429e-07, "loss": 0.302, "step": 24915 }, { "epoch": 0.4331033044203793, "grad_norm": 1.1731480220722272, "learning_rate": 6.310928236222267e-07, "loss": 0.2355, "step": 24916 }, { "epoch": 0.4331206869578821, "grad_norm": 1.4261771426080647, "learning_rate": 6.310656588132066e-07, "loss": 0.3869, "step": 24917 }, { "epoch": 0.4331380694953849, "grad_norm": 1.3315094274148573, "learning_rate": 6.310384935887685e-07, "loss": 0.3513, "step": 24918 }, { "epoch": 0.43315545203288774, "grad_norm": 1.6289048361393486, "learning_rate": 6.310113279489984e-07, "loss": 0.3602, "step": 24919 }, { "epoch": 0.4331728345703906, "grad_norm": 1.1830530713441376, "learning_rate": 6.309841618939827e-07, "loss": 0.1338, "step": 24920 }, { "epoch": 0.4331902171078934, "grad_norm": 1.5608995179425407, "learning_rate": 6.309569954238075e-07, "loss": 0.411, "step": 24921 }, { "epoch": 0.43320759964539624, "grad_norm": 3.719971800303364, "learning_rate": 6.309298285385586e-07, "loss": 0.5346, "step": 24922 }, { "epoch": 0.43322498218289907, "grad_norm": 1.5028031865426492, "learning_rate": 6.309026612383223e-07, "loss": 0.3519, "step": 24923 }, { "epoch": 0.4332423647204019, "grad_norm": 1.6333668766172447, "learning_rate": 6.30875493523185e-07, "loss": 0.1574, "step": 24924 }, { "epoch": 0.4332597472579047, "grad_norm": 1.6021159689001718, "learning_rate": 6.308483253932322e-07, "loss": 0.1491, "step": 24925 }, { "epoch": 0.43327712979540756, "grad_norm": 1.9709603840828138, "learning_rate": 6.308211568485505e-07, "loss": 0.3282, "step": 24926 }, { "epoch": 0.43329451233291033, "grad_norm": 1.571888515403844, "learning_rate": 6.307939878892255e-07, "loss": 0.2408, "step": 24927 }, { "epoch": 0.43331189487041316, "grad_norm": 1.075337967609021, "learning_rate": 6.307668185153439e-07, "loss": 0.256, "step": 24928 }, { "epoch": 0.433329277407916, "grad_norm": 1.6739924601210452, "learning_rate": 6.307396487269915e-07, "loss": 0.2763, "step": 24929 }, { "epoch": 0.4333466599454188, "grad_norm": 2.985687171568561, "learning_rate": 6.307124785242545e-07, "loss": 0.4021, "step": 24930 }, { "epoch": 0.43336404248292165, "grad_norm": 1.4943434845018047, "learning_rate": 6.30685307907219e-07, "loss": 0.2625, "step": 24931 }, { "epoch": 0.4333814250204245, "grad_norm": 2.5002068365850243, "learning_rate": 6.30658136875971e-07, "loss": 0.379, "step": 24932 }, { "epoch": 0.4333988075579273, "grad_norm": 1.9718943798327964, "learning_rate": 6.306309654305968e-07, "loss": 0.3957, "step": 24933 }, { "epoch": 0.43341619009543014, "grad_norm": 1.7516735164727308, "learning_rate": 6.306037935711825e-07, "loss": 0.1243, "step": 24934 }, { "epoch": 0.433433572632933, "grad_norm": 2.934297610126448, "learning_rate": 6.305766212978141e-07, "loss": 0.3487, "step": 24935 }, { "epoch": 0.4334509551704358, "grad_norm": 3.488583719684521, "learning_rate": 6.305494486105778e-07, "loss": 0.3353, "step": 24936 }, { "epoch": 0.4334683377079386, "grad_norm": 1.8420952457623816, "learning_rate": 6.305222755095597e-07, "loss": 0.4786, "step": 24937 }, { "epoch": 0.4334857202454414, "grad_norm": 2.155231030492281, "learning_rate": 6.30495101994846e-07, "loss": 0.3322, "step": 24938 }, { "epoch": 0.43350310278294424, "grad_norm": 1.4126269539411491, "learning_rate": 6.304679280665226e-07, "loss": 0.5182, "step": 24939 }, { "epoch": 0.43352048532044707, "grad_norm": 1.6003626800463702, "learning_rate": 6.304407537246761e-07, "loss": 0.3852, "step": 24940 }, { "epoch": 0.4335378678579499, "grad_norm": 1.7383564269292944, "learning_rate": 6.304135789693921e-07, "loss": 0.3766, "step": 24941 }, { "epoch": 0.43355525039545273, "grad_norm": 1.993684522656088, "learning_rate": 6.30386403800757e-07, "loss": 0.4145, "step": 24942 }, { "epoch": 0.43357263293295556, "grad_norm": 1.193370783568519, "learning_rate": 6.303592282188571e-07, "loss": 0.4135, "step": 24943 }, { "epoch": 0.4335900154704584, "grad_norm": 1.4673092503074725, "learning_rate": 6.303320522237781e-07, "loss": 0.2471, "step": 24944 }, { "epoch": 0.4336073980079612, "grad_norm": 1.914583196635693, "learning_rate": 6.303048758156063e-07, "loss": 0.4449, "step": 24945 }, { "epoch": 0.43362478054546405, "grad_norm": 1.3237144455645298, "learning_rate": 6.30277698994428e-07, "loss": 0.1618, "step": 24946 }, { "epoch": 0.4336421630829668, "grad_norm": 1.4345875172805478, "learning_rate": 6.302505217603293e-07, "loss": 0.2665, "step": 24947 }, { "epoch": 0.43365954562046966, "grad_norm": 1.2508148143262476, "learning_rate": 6.302233441133962e-07, "loss": 0.2641, "step": 24948 }, { "epoch": 0.4336769281579725, "grad_norm": 1.42668158916673, "learning_rate": 6.30196166053715e-07, "loss": 0.245, "step": 24949 }, { "epoch": 0.4336943106954753, "grad_norm": 1.6936452370945538, "learning_rate": 6.301689875813716e-07, "loss": 0.2771, "step": 24950 }, { "epoch": 0.43371169323297815, "grad_norm": 1.4772752999086798, "learning_rate": 6.301418086964525e-07, "loss": 0.3767, "step": 24951 }, { "epoch": 0.433729075770481, "grad_norm": 2.0797111918526254, "learning_rate": 6.301146293990436e-07, "loss": 0.233, "step": 24952 }, { "epoch": 0.4337464583079838, "grad_norm": 2.5560690232645427, "learning_rate": 6.30087449689231e-07, "loss": 0.3822, "step": 24953 }, { "epoch": 0.43376384084548664, "grad_norm": 1.5048835050809104, "learning_rate": 6.300602695671009e-07, "loss": 0.3827, "step": 24954 }, { "epoch": 0.43378122338298947, "grad_norm": 1.9660842789616593, "learning_rate": 6.300330890327397e-07, "loss": 0.3741, "step": 24955 }, { "epoch": 0.4337986059204923, "grad_norm": 1.3454271079136995, "learning_rate": 6.300059080862332e-07, "loss": 0.4199, "step": 24956 }, { "epoch": 0.43381598845799507, "grad_norm": 1.859176747504903, "learning_rate": 6.299787267276677e-07, "loss": 0.3389, "step": 24957 }, { "epoch": 0.4338333709954979, "grad_norm": 1.9530188561351023, "learning_rate": 6.299515449571293e-07, "loss": 0.3278, "step": 24958 }, { "epoch": 0.43385075353300073, "grad_norm": 1.518222292638861, "learning_rate": 6.299243627747044e-07, "loss": 0.2212, "step": 24959 }, { "epoch": 0.43386813607050356, "grad_norm": 1.1324444762218677, "learning_rate": 6.298971801804787e-07, "loss": 0.1734, "step": 24960 }, { "epoch": 0.4338855186080064, "grad_norm": 1.1042195016635699, "learning_rate": 6.298699971745388e-07, "loss": 0.2485, "step": 24961 }, { "epoch": 0.4339029011455092, "grad_norm": 1.4746619673936743, "learning_rate": 6.298428137569705e-07, "loss": 0.2949, "step": 24962 }, { "epoch": 0.43392028368301205, "grad_norm": 2.6094027770331083, "learning_rate": 6.298156299278603e-07, "loss": 0.4295, "step": 24963 }, { "epoch": 0.4339376662205149, "grad_norm": 1.733923348513291, "learning_rate": 6.297884456872941e-07, "loss": 0.2636, "step": 24964 }, { "epoch": 0.4339550487580177, "grad_norm": 1.4254472162523768, "learning_rate": 6.297612610353582e-07, "loss": 0.3006, "step": 24965 }, { "epoch": 0.43397243129552054, "grad_norm": 1.5586591254834041, "learning_rate": 6.297340759721386e-07, "loss": 0.2287, "step": 24966 }, { "epoch": 0.4339898138330233, "grad_norm": 1.4158224749139836, "learning_rate": 6.297068904977216e-07, "loss": 0.2861, "step": 24967 }, { "epoch": 0.43400719637052615, "grad_norm": 1.5585593149804842, "learning_rate": 6.296797046121934e-07, "loss": 0.161, "step": 24968 }, { "epoch": 0.434024578908029, "grad_norm": 1.395129257471285, "learning_rate": 6.296525183156402e-07, "loss": 0.1794, "step": 24969 }, { "epoch": 0.4340419614455318, "grad_norm": 1.07152558370828, "learning_rate": 6.296253316081478e-07, "loss": 0.1965, "step": 24970 }, { "epoch": 0.43405934398303464, "grad_norm": 0.8747141061021239, "learning_rate": 6.295981444898029e-07, "loss": 0.2063, "step": 24971 }, { "epoch": 0.43407672652053747, "grad_norm": 2.201604909304473, "learning_rate": 6.295709569606913e-07, "loss": 0.4549, "step": 24972 }, { "epoch": 0.4340941090580403, "grad_norm": 1.502867539181999, "learning_rate": 6.295437690208993e-07, "loss": 0.1434, "step": 24973 }, { "epoch": 0.43411149159554313, "grad_norm": 3.215905768389144, "learning_rate": 6.295165806705129e-07, "loss": 0.3301, "step": 24974 }, { "epoch": 0.43412887413304596, "grad_norm": 2.1726915508537323, "learning_rate": 6.294893919096186e-07, "loss": 0.3546, "step": 24975 }, { "epoch": 0.4341462566705488, "grad_norm": 3.1103420608005385, "learning_rate": 6.294622027383026e-07, "loss": 0.3902, "step": 24976 }, { "epoch": 0.43416363920805157, "grad_norm": 2.435214260208648, "learning_rate": 6.294350131566506e-07, "loss": 0.3057, "step": 24977 }, { "epoch": 0.4341810217455544, "grad_norm": 1.3826992452022857, "learning_rate": 6.294078231647491e-07, "loss": 0.3377, "step": 24978 }, { "epoch": 0.4341984042830572, "grad_norm": 1.426381007896424, "learning_rate": 6.293806327626842e-07, "loss": 0.3098, "step": 24979 }, { "epoch": 0.43421578682056006, "grad_norm": 1.8670100854053178, "learning_rate": 6.293534419505423e-07, "loss": 0.317, "step": 24980 }, { "epoch": 0.4342331693580629, "grad_norm": 1.85149191393729, "learning_rate": 6.293262507284094e-07, "loss": 0.2834, "step": 24981 }, { "epoch": 0.4342505518955657, "grad_norm": 1.4404586604849274, "learning_rate": 6.292990590963716e-07, "loss": 0.3195, "step": 24982 }, { "epoch": 0.43426793443306855, "grad_norm": 3.299923929625325, "learning_rate": 6.292718670545152e-07, "loss": 0.5908, "step": 24983 }, { "epoch": 0.4342853169705714, "grad_norm": 1.4438605868993917, "learning_rate": 6.292446746029265e-07, "loss": 0.5032, "step": 24984 }, { "epoch": 0.4343026995080742, "grad_norm": 3.3579096540101356, "learning_rate": 6.292174817416913e-07, "loss": 0.2926, "step": 24985 }, { "epoch": 0.43432008204557704, "grad_norm": 1.524367221941564, "learning_rate": 6.291902884708962e-07, "loss": 0.3165, "step": 24986 }, { "epoch": 0.4343374645830798, "grad_norm": 2.335639298912454, "learning_rate": 6.291630947906272e-07, "loss": 0.5679, "step": 24987 }, { "epoch": 0.43435484712058264, "grad_norm": 1.3275184819335077, "learning_rate": 6.291359007009705e-07, "loss": 0.408, "step": 24988 }, { "epoch": 0.4343722296580855, "grad_norm": 1.7049499083539401, "learning_rate": 6.291087062020122e-07, "loss": 0.2398, "step": 24989 }, { "epoch": 0.4343896121955883, "grad_norm": 1.7476027608858198, "learning_rate": 6.29081511293839e-07, "loss": 0.5228, "step": 24990 }, { "epoch": 0.43440699473309113, "grad_norm": 1.9365650390541835, "learning_rate": 6.290543159765363e-07, "loss": 0.3185, "step": 24991 }, { "epoch": 0.43442437727059396, "grad_norm": 1.518329215222565, "learning_rate": 6.290271202501909e-07, "loss": 0.3367, "step": 24992 }, { "epoch": 0.4344417598080968, "grad_norm": 2.6854551378899347, "learning_rate": 6.289999241148887e-07, "loss": 0.46, "step": 24993 }, { "epoch": 0.4344591423455996, "grad_norm": 1.5530211800309353, "learning_rate": 6.289727275707161e-07, "loss": 0.4873, "step": 24994 }, { "epoch": 0.43447652488310246, "grad_norm": 1.297951706383599, "learning_rate": 6.289455306177591e-07, "loss": 0.4893, "step": 24995 }, { "epoch": 0.4344939074206053, "grad_norm": 1.27038840286328, "learning_rate": 6.289183332561039e-07, "loss": 0.1657, "step": 24996 }, { "epoch": 0.43451128995810806, "grad_norm": 1.3119181107321387, "learning_rate": 6.288911354858371e-07, "loss": 0.2839, "step": 24997 }, { "epoch": 0.4345286724956109, "grad_norm": 2.41909468765178, "learning_rate": 6.288639373070444e-07, "loss": 0.4911, "step": 24998 }, { "epoch": 0.4345460550331137, "grad_norm": 1.414409827763085, "learning_rate": 6.288367387198123e-07, "loss": 0.2321, "step": 24999 }, { "epoch": 0.43456343757061655, "grad_norm": 3.1160538344501956, "learning_rate": 6.28809539724227e-07, "loss": 0.4225, "step": 25000 }, { "epoch": 0.4345808201081194, "grad_norm": 2.602699419092834, "learning_rate": 6.287823403203743e-07, "loss": 0.251, "step": 25001 }, { "epoch": 0.4345982026456222, "grad_norm": 1.4300687677593211, "learning_rate": 6.287551405083411e-07, "loss": 0.2409, "step": 25002 }, { "epoch": 0.43461558518312504, "grad_norm": 1.7082733700665182, "learning_rate": 6.287279402882131e-07, "loss": 0.3206, "step": 25003 }, { "epoch": 0.43463296772062787, "grad_norm": 2.3050820194471737, "learning_rate": 6.287007396600767e-07, "loss": 0.2053, "step": 25004 }, { "epoch": 0.4346503502581307, "grad_norm": 1.547146545948534, "learning_rate": 6.286735386240179e-07, "loss": 0.2793, "step": 25005 }, { "epoch": 0.43466773279563353, "grad_norm": 2.2369594099108205, "learning_rate": 6.286463371801233e-07, "loss": 0.3441, "step": 25006 }, { "epoch": 0.4346851153331363, "grad_norm": 1.437136707916672, "learning_rate": 6.286191353284789e-07, "loss": 0.2994, "step": 25007 }, { "epoch": 0.43470249787063914, "grad_norm": 1.5651496983488884, "learning_rate": 6.285919330691709e-07, "loss": 0.2958, "step": 25008 }, { "epoch": 0.43471988040814197, "grad_norm": 1.344886537069666, "learning_rate": 6.285647304022856e-07, "loss": 0.2662, "step": 25009 }, { "epoch": 0.4347372629456448, "grad_norm": 3.953770369053182, "learning_rate": 6.285375273279089e-07, "loss": 0.2584, "step": 25010 }, { "epoch": 0.43475464548314763, "grad_norm": 1.581829062245652, "learning_rate": 6.285103238461274e-07, "loss": 0.2155, "step": 25011 }, { "epoch": 0.43477202802065046, "grad_norm": 1.4946818476204355, "learning_rate": 6.284831199570273e-07, "loss": 0.2601, "step": 25012 }, { "epoch": 0.4347894105581533, "grad_norm": 2.2393216377773006, "learning_rate": 6.284559156606948e-07, "loss": 0.4877, "step": 25013 }, { "epoch": 0.4348067930956561, "grad_norm": 2.7471211666167905, "learning_rate": 6.284287109572158e-07, "loss": 0.3473, "step": 25014 }, { "epoch": 0.43482417563315895, "grad_norm": 1.572079918578393, "learning_rate": 6.284015058466769e-07, "loss": 0.2545, "step": 25015 }, { "epoch": 0.4348415581706618, "grad_norm": 1.2220295655457405, "learning_rate": 6.283743003291642e-07, "loss": 0.2915, "step": 25016 }, { "epoch": 0.43485894070816455, "grad_norm": 1.658598503857807, "learning_rate": 6.28347094404764e-07, "loss": 0.1699, "step": 25017 }, { "epoch": 0.4348763232456674, "grad_norm": 3.396503324883687, "learning_rate": 6.283198880735625e-07, "loss": 0.5489, "step": 25018 }, { "epoch": 0.4348937057831702, "grad_norm": 1.8778893266742291, "learning_rate": 6.282926813356459e-07, "loss": 0.2802, "step": 25019 }, { "epoch": 0.43491108832067304, "grad_norm": 1.186526038721182, "learning_rate": 6.282654741911004e-07, "loss": 0.3158, "step": 25020 }, { "epoch": 0.4349284708581759, "grad_norm": 2.953140373640361, "learning_rate": 6.282382666400122e-07, "loss": 0.3276, "step": 25021 }, { "epoch": 0.4349458533956787, "grad_norm": 1.8640490184422864, "learning_rate": 6.282110586824676e-07, "loss": 0.2587, "step": 25022 }, { "epoch": 0.43496323593318154, "grad_norm": 1.9048782202205814, "learning_rate": 6.28183850318553e-07, "loss": 0.289, "step": 25023 }, { "epoch": 0.43498061847068437, "grad_norm": 2.188305012917987, "learning_rate": 6.281566415483542e-07, "loss": 0.3145, "step": 25024 }, { "epoch": 0.4349980010081872, "grad_norm": 1.245578546635488, "learning_rate": 6.28129432371958e-07, "loss": 0.281, "step": 25025 }, { "epoch": 0.43501538354569, "grad_norm": 1.8558189920879293, "learning_rate": 6.281022227894503e-07, "loss": 0.3827, "step": 25026 }, { "epoch": 0.4350327660831928, "grad_norm": 3.025139047687879, "learning_rate": 6.280750128009173e-07, "loss": 0.1821, "step": 25027 }, { "epoch": 0.43505014862069563, "grad_norm": 1.417058741906042, "learning_rate": 6.280478024064454e-07, "loss": 0.2189, "step": 25028 }, { "epoch": 0.43506753115819846, "grad_norm": 2.1153590572331415, "learning_rate": 6.280205916061208e-07, "loss": 0.4075, "step": 25029 }, { "epoch": 0.4350849136957013, "grad_norm": 2.3261472352089623, "learning_rate": 6.279933804000296e-07, "loss": 0.3768, "step": 25030 }, { "epoch": 0.4351022962332041, "grad_norm": 1.0531388386999292, "learning_rate": 6.279661687882582e-07, "loss": 0.2034, "step": 25031 }, { "epoch": 0.43511967877070695, "grad_norm": 1.6759580335782518, "learning_rate": 6.279389567708931e-07, "loss": 0.4092, "step": 25032 }, { "epoch": 0.4351370613082098, "grad_norm": 2.438828012753596, "learning_rate": 6.2791174434802e-07, "loss": 0.4576, "step": 25033 }, { "epoch": 0.4351544438457126, "grad_norm": 1.9673946434295912, "learning_rate": 6.278845315197254e-07, "loss": 0.2853, "step": 25034 }, { "epoch": 0.43517182638321544, "grad_norm": 2.785041184239682, "learning_rate": 6.278573182860958e-07, "loss": 0.4456, "step": 25035 }, { "epoch": 0.4351892089207183, "grad_norm": 1.530261160158503, "learning_rate": 6.278301046472171e-07, "loss": 0.2596, "step": 25036 }, { "epoch": 0.43520659145822105, "grad_norm": 1.0752990775964648, "learning_rate": 6.278028906031757e-07, "loss": 0.1529, "step": 25037 }, { "epoch": 0.4352239739957239, "grad_norm": 1.1665066832091104, "learning_rate": 6.277756761540579e-07, "loss": 0.1848, "step": 25038 }, { "epoch": 0.4352413565332267, "grad_norm": 2.3923367910737148, "learning_rate": 6.277484612999497e-07, "loss": 0.2792, "step": 25039 }, { "epoch": 0.43525873907072954, "grad_norm": 1.033016194335994, "learning_rate": 6.277212460409378e-07, "loss": 0.275, "step": 25040 }, { "epoch": 0.43527612160823237, "grad_norm": 1.5053244558168852, "learning_rate": 6.276940303771082e-07, "loss": 0.4852, "step": 25041 }, { "epoch": 0.4352935041457352, "grad_norm": 3.571769655471154, "learning_rate": 6.27666814308547e-07, "loss": 0.446, "step": 25042 }, { "epoch": 0.43531088668323803, "grad_norm": 1.2394278158214695, "learning_rate": 6.276395978353408e-07, "loss": 0.2055, "step": 25043 }, { "epoch": 0.43532826922074086, "grad_norm": 1.6793682185604253, "learning_rate": 6.276123809575758e-07, "loss": 0.3632, "step": 25044 }, { "epoch": 0.4353456517582437, "grad_norm": 2.5532901718639076, "learning_rate": 6.27585163675338e-07, "loss": 0.1706, "step": 25045 }, { "epoch": 0.4353630342957465, "grad_norm": 2.0425033965787307, "learning_rate": 6.275579459887139e-07, "loss": 0.3878, "step": 25046 }, { "epoch": 0.4353804168332493, "grad_norm": 1.4898927073935313, "learning_rate": 6.275307278977897e-07, "loss": 0.3111, "step": 25047 }, { "epoch": 0.4353977993707521, "grad_norm": 2.145320734762864, "learning_rate": 6.275035094026517e-07, "loss": 0.3026, "step": 25048 }, { "epoch": 0.43541518190825496, "grad_norm": 1.2070107007842275, "learning_rate": 6.274762905033861e-07, "loss": 0.1979, "step": 25049 }, { "epoch": 0.4354325644457578, "grad_norm": 2.1914697337453144, "learning_rate": 6.274490712000793e-07, "loss": 0.3067, "step": 25050 }, { "epoch": 0.4354499469832606, "grad_norm": 2.5819033289163658, "learning_rate": 6.274218514928173e-07, "loss": 0.4543, "step": 25051 }, { "epoch": 0.43546732952076345, "grad_norm": 1.9580014020045078, "learning_rate": 6.273946313816867e-07, "loss": 0.4024, "step": 25052 }, { "epoch": 0.4354847120582663, "grad_norm": 4.137824828839886, "learning_rate": 6.273674108667734e-07, "loss": 0.4118, "step": 25053 }, { "epoch": 0.4355020945957691, "grad_norm": 1.9968178190399994, "learning_rate": 6.273401899481644e-07, "loss": 0.5617, "step": 25054 }, { "epoch": 0.43551947713327194, "grad_norm": 2.9573364294400437, "learning_rate": 6.273129686259451e-07, "loss": 0.6154, "step": 25055 }, { "epoch": 0.4355368596707747, "grad_norm": 1.1017080940866961, "learning_rate": 6.272857469002025e-07, "loss": 0.305, "step": 25056 }, { "epoch": 0.43555424220827754, "grad_norm": 1.2035023804544374, "learning_rate": 6.272585247710222e-07, "loss": 0.2213, "step": 25057 }, { "epoch": 0.4355716247457804, "grad_norm": 3.0577356703733827, "learning_rate": 6.272313022384911e-07, "loss": 0.3292, "step": 25058 }, { "epoch": 0.4355890072832832, "grad_norm": 1.4987577548839173, "learning_rate": 6.272040793026949e-07, "loss": 0.3717, "step": 25059 }, { "epoch": 0.43560638982078603, "grad_norm": 1.5835833471956662, "learning_rate": 6.271768559637205e-07, "loss": 0.3628, "step": 25060 }, { "epoch": 0.43562377235828886, "grad_norm": 1.5007983220193581, "learning_rate": 6.271496322216538e-07, "loss": 0.6106, "step": 25061 }, { "epoch": 0.4356411548957917, "grad_norm": 2.563749878302057, "learning_rate": 6.271224080765813e-07, "loss": 0.3618, "step": 25062 }, { "epoch": 0.4356585374332945, "grad_norm": 1.861339832218604, "learning_rate": 6.270951835285889e-07, "loss": 0.2399, "step": 25063 }, { "epoch": 0.43567591997079735, "grad_norm": 1.8218267679630853, "learning_rate": 6.270679585777632e-07, "loss": 0.2838, "step": 25064 }, { "epoch": 0.4356933025083002, "grad_norm": 1.7903521682325567, "learning_rate": 6.270407332241906e-07, "loss": 0.4944, "step": 25065 }, { "epoch": 0.43571068504580296, "grad_norm": 2.5019659759615336, "learning_rate": 6.270135074679572e-07, "loss": 0.2575, "step": 25066 }, { "epoch": 0.4357280675833058, "grad_norm": 1.9404537796722827, "learning_rate": 6.269862813091493e-07, "loss": 0.3682, "step": 25067 }, { "epoch": 0.4357454501208086, "grad_norm": 1.1605397305864498, "learning_rate": 6.269590547478531e-07, "loss": 0.1815, "step": 25068 }, { "epoch": 0.43576283265831145, "grad_norm": 2.018882039996602, "learning_rate": 6.269318277841552e-07, "loss": 0.269, "step": 25069 }, { "epoch": 0.4357802151958143, "grad_norm": 1.1045560952196543, "learning_rate": 6.269046004181416e-07, "loss": 0.2241, "step": 25070 }, { "epoch": 0.4357975977333171, "grad_norm": 1.8653316323120672, "learning_rate": 6.268773726498988e-07, "loss": 0.4755, "step": 25071 }, { "epoch": 0.43581498027081994, "grad_norm": 1.71967294438534, "learning_rate": 6.268501444795128e-07, "loss": 0.2125, "step": 25072 }, { "epoch": 0.43583236280832277, "grad_norm": 1.3934575923571635, "learning_rate": 6.268229159070704e-07, "loss": 0.489, "step": 25073 }, { "epoch": 0.4358497453458256, "grad_norm": 1.4140841956619472, "learning_rate": 6.267956869326574e-07, "loss": 0.2044, "step": 25074 }, { "epoch": 0.43586712788332843, "grad_norm": 2.0599504844126613, "learning_rate": 6.267684575563605e-07, "loss": 0.3237, "step": 25075 }, { "epoch": 0.4358845104208312, "grad_norm": 1.436784967351842, "learning_rate": 6.267412277782656e-07, "loss": 0.3376, "step": 25076 }, { "epoch": 0.43590189295833404, "grad_norm": 1.1736655311855433, "learning_rate": 6.267139975984594e-07, "loss": 0.2172, "step": 25077 }, { "epoch": 0.43591927549583687, "grad_norm": 1.7138381412277361, "learning_rate": 6.266867670170279e-07, "loss": 0.2412, "step": 25078 }, { "epoch": 0.4359366580333397, "grad_norm": 1.1424674965163502, "learning_rate": 6.266595360340579e-07, "loss": 0.3415, "step": 25079 }, { "epoch": 0.4359540405708425, "grad_norm": 0.8651491141515467, "learning_rate": 6.266323046496349e-07, "loss": 0.2179, "step": 25080 }, { "epoch": 0.43597142310834536, "grad_norm": 9.931336517944299, "learning_rate": 6.266050728638459e-07, "loss": 0.3447, "step": 25081 }, { "epoch": 0.4359888056458482, "grad_norm": 1.7135535781606839, "learning_rate": 6.26577840676777e-07, "loss": 0.5634, "step": 25082 }, { "epoch": 0.436006188183351, "grad_norm": 2.0481510128153197, "learning_rate": 6.265506080885144e-07, "loss": 0.2348, "step": 25083 }, { "epoch": 0.43602357072085385, "grad_norm": 1.0407651920850665, "learning_rate": 6.265233750991444e-07, "loss": 0.2724, "step": 25084 }, { "epoch": 0.4360409532583567, "grad_norm": 1.2129590772660603, "learning_rate": 6.264961417087537e-07, "loss": 0.3033, "step": 25085 }, { "epoch": 0.43605833579585945, "grad_norm": 2.403263548894835, "learning_rate": 6.264689079174281e-07, "loss": 0.3787, "step": 25086 }, { "epoch": 0.4360757183333623, "grad_norm": 2.3951902480719776, "learning_rate": 6.264416737252542e-07, "loss": 0.2426, "step": 25087 }, { "epoch": 0.4360931008708651, "grad_norm": 1.310399359783383, "learning_rate": 6.264144391323184e-07, "loss": 0.3654, "step": 25088 }, { "epoch": 0.43611048340836794, "grad_norm": 2.410537157721225, "learning_rate": 6.263872041387068e-07, "loss": 0.4962, "step": 25089 }, { "epoch": 0.4361278659458708, "grad_norm": 1.6321280716486382, "learning_rate": 6.263599687445059e-07, "loss": 0.206, "step": 25090 }, { "epoch": 0.4361452484833736, "grad_norm": 1.401435613349004, "learning_rate": 6.263327329498017e-07, "loss": 0.1867, "step": 25091 }, { "epoch": 0.43616263102087643, "grad_norm": 1.9705155145450435, "learning_rate": 6.26305496754681e-07, "loss": 0.3188, "step": 25092 }, { "epoch": 0.43618001355837926, "grad_norm": 1.2407797079595448, "learning_rate": 6.262782601592299e-07, "loss": 0.2753, "step": 25093 }, { "epoch": 0.4361973960958821, "grad_norm": 1.06412807520488, "learning_rate": 6.262510231635345e-07, "loss": 0.1942, "step": 25094 }, { "epoch": 0.4362147786333849, "grad_norm": 2.239727338224968, "learning_rate": 6.262237857676815e-07, "loss": 0.4402, "step": 25095 }, { "epoch": 0.4362321611708877, "grad_norm": 2.8777772595607845, "learning_rate": 6.261965479717573e-07, "loss": 0.4133, "step": 25096 }, { "epoch": 0.43624954370839053, "grad_norm": 1.7972776875975787, "learning_rate": 6.261693097758476e-07, "loss": 0.2103, "step": 25097 }, { "epoch": 0.43626692624589336, "grad_norm": 1.4050431314482308, "learning_rate": 6.261420711800395e-07, "loss": 0.2105, "step": 25098 }, { "epoch": 0.4362843087833962, "grad_norm": 1.4381425176721232, "learning_rate": 6.261148321844187e-07, "loss": 0.2089, "step": 25099 }, { "epoch": 0.436301691320899, "grad_norm": 1.2165165776200335, "learning_rate": 6.260875927890721e-07, "loss": 0.19, "step": 25100 }, { "epoch": 0.43631907385840185, "grad_norm": 1.639778124602088, "learning_rate": 6.260603529940856e-07, "loss": 0.2163, "step": 25101 }, { "epoch": 0.4363364563959047, "grad_norm": 2.67071690658013, "learning_rate": 6.260331127995456e-07, "loss": 0.3087, "step": 25102 }, { "epoch": 0.4363538389334075, "grad_norm": 3.2070685581644436, "learning_rate": 6.260058722055386e-07, "loss": 0.2842, "step": 25103 }, { "epoch": 0.43637122147091034, "grad_norm": 1.8357133858239942, "learning_rate": 6.25978631212151e-07, "loss": 0.2462, "step": 25104 }, { "epoch": 0.4363886040084132, "grad_norm": 1.642688915104502, "learning_rate": 6.259513898194688e-07, "loss": 0.2991, "step": 25105 }, { "epoch": 0.43640598654591595, "grad_norm": 2.8264250531216466, "learning_rate": 6.259241480275787e-07, "loss": 0.3728, "step": 25106 }, { "epoch": 0.4364233690834188, "grad_norm": 1.3936548056618405, "learning_rate": 6.258969058365668e-07, "loss": 0.3067, "step": 25107 }, { "epoch": 0.4364407516209216, "grad_norm": 2.0499295765993146, "learning_rate": 6.258696632465197e-07, "loss": 0.2452, "step": 25108 }, { "epoch": 0.43645813415842444, "grad_norm": 1.5697541699617477, "learning_rate": 6.258424202575234e-07, "loss": 0.2954, "step": 25109 }, { "epoch": 0.43647551669592727, "grad_norm": 1.1990691060751049, "learning_rate": 6.258151768696646e-07, "loss": 0.1594, "step": 25110 }, { "epoch": 0.4364928992334301, "grad_norm": 1.3908832170366046, "learning_rate": 6.257879330830292e-07, "loss": 0.1909, "step": 25111 }, { "epoch": 0.43651028177093293, "grad_norm": 2.0037918671960373, "learning_rate": 6.257606888977041e-07, "loss": 0.3329, "step": 25112 }, { "epoch": 0.43652766430843576, "grad_norm": 1.0729101220549717, "learning_rate": 6.257334443137752e-07, "loss": 0.2491, "step": 25113 }, { "epoch": 0.4365450468459386, "grad_norm": 1.5360624466771051, "learning_rate": 6.257061993313293e-07, "loss": 0.2795, "step": 25114 }, { "epoch": 0.4365624293834414, "grad_norm": 1.4677702420615848, "learning_rate": 6.256789539504523e-07, "loss": 0.2229, "step": 25115 }, { "epoch": 0.4365798119209442, "grad_norm": 3.023632613684631, "learning_rate": 6.256517081712307e-07, "loss": 0.2861, "step": 25116 }, { "epoch": 0.436597194458447, "grad_norm": 1.6659040161840284, "learning_rate": 6.256244619937511e-07, "loss": 0.2638, "step": 25117 }, { "epoch": 0.43661457699594985, "grad_norm": 2.486488360810175, "learning_rate": 6.255972154180995e-07, "loss": 0.3511, "step": 25118 }, { "epoch": 0.4366319595334527, "grad_norm": 2.243113525667485, "learning_rate": 6.255699684443624e-07, "loss": 0.1782, "step": 25119 }, { "epoch": 0.4366493420709555, "grad_norm": 0.9832179675061777, "learning_rate": 6.255427210726262e-07, "loss": 0.162, "step": 25120 }, { "epoch": 0.43666672460845835, "grad_norm": 7.078431200461401, "learning_rate": 6.255154733029774e-07, "loss": 0.5114, "step": 25121 }, { "epoch": 0.4366841071459612, "grad_norm": 1.6205620433292156, "learning_rate": 6.254882251355021e-07, "loss": 0.2135, "step": 25122 }, { "epoch": 0.436701489683464, "grad_norm": 3.990468972866897, "learning_rate": 6.254609765702868e-07, "loss": 0.4744, "step": 25123 }, { "epoch": 0.43671887222096684, "grad_norm": 1.0908008231277009, "learning_rate": 6.254337276074177e-07, "loss": 0.1324, "step": 25124 }, { "epoch": 0.43673625475846967, "grad_norm": 2.05416258981886, "learning_rate": 6.254064782469814e-07, "loss": 0.2852, "step": 25125 }, { "epoch": 0.43675363729597244, "grad_norm": 2.275526621319508, "learning_rate": 6.253792284890643e-07, "loss": 0.29, "step": 25126 }, { "epoch": 0.43677101983347527, "grad_norm": 0.966225161831023, "learning_rate": 6.253519783337525e-07, "loss": 0.1831, "step": 25127 }, { "epoch": 0.4367884023709781, "grad_norm": 1.4251442478135492, "learning_rate": 6.253247277811325e-07, "loss": 0.2067, "step": 25128 }, { "epoch": 0.43680578490848093, "grad_norm": 2.295297206837819, "learning_rate": 6.252974768312909e-07, "loss": 0.4743, "step": 25129 }, { "epoch": 0.43682316744598376, "grad_norm": 2.206900229388848, "learning_rate": 6.252702254843135e-07, "loss": 0.264, "step": 25130 }, { "epoch": 0.4368405499834866, "grad_norm": 2.4394334044347015, "learning_rate": 6.252429737402873e-07, "loss": 0.2834, "step": 25131 }, { "epoch": 0.4368579325209894, "grad_norm": 1.005748343656011, "learning_rate": 6.252157215992982e-07, "loss": 0.23, "step": 25132 }, { "epoch": 0.43687531505849225, "grad_norm": 2.6604282708233233, "learning_rate": 6.25188469061433e-07, "loss": 0.2477, "step": 25133 }, { "epoch": 0.4368926975959951, "grad_norm": 1.9570956623522018, "learning_rate": 6.251612161267776e-07, "loss": 0.2352, "step": 25134 }, { "epoch": 0.4369100801334979, "grad_norm": 1.7577055710343996, "learning_rate": 6.251339627954189e-07, "loss": 0.2365, "step": 25135 }, { "epoch": 0.4369274626710007, "grad_norm": 1.4794600051142588, "learning_rate": 6.251067090674429e-07, "loss": 0.1735, "step": 25136 }, { "epoch": 0.4369448452085035, "grad_norm": 1.0504167487305707, "learning_rate": 6.250794549429361e-07, "loss": 0.1934, "step": 25137 }, { "epoch": 0.43696222774600635, "grad_norm": 2.0536935437012875, "learning_rate": 6.250522004219849e-07, "loss": 0.4089, "step": 25138 }, { "epoch": 0.4369796102835092, "grad_norm": 1.365579320386028, "learning_rate": 6.250249455046758e-07, "loss": 0.237, "step": 25139 }, { "epoch": 0.436996992821012, "grad_norm": 2.3946419163184545, "learning_rate": 6.249976901910948e-07, "loss": 0.3191, "step": 25140 }, { "epoch": 0.43701437535851484, "grad_norm": 1.4854333560774442, "learning_rate": 6.249704344813286e-07, "loss": 0.1793, "step": 25141 }, { "epoch": 0.43703175789601767, "grad_norm": 1.2986298258090485, "learning_rate": 6.249431783754637e-07, "loss": 0.3769, "step": 25142 }, { "epoch": 0.4370491404335205, "grad_norm": 1.1673397325986388, "learning_rate": 6.249159218735863e-07, "loss": 0.2698, "step": 25143 }, { "epoch": 0.43706652297102333, "grad_norm": 2.0951788016994644, "learning_rate": 6.248886649757826e-07, "loss": 0.1697, "step": 25144 }, { "epoch": 0.43708390550852616, "grad_norm": 1.3806235467047339, "learning_rate": 6.248614076821395e-07, "loss": 0.2516, "step": 25145 }, { "epoch": 0.43710128804602894, "grad_norm": 1.440875874323269, "learning_rate": 6.248341499927428e-07, "loss": 0.195, "step": 25146 }, { "epoch": 0.43711867058353177, "grad_norm": 2.9508864735022913, "learning_rate": 6.248068919076793e-07, "loss": 0.188, "step": 25147 }, { "epoch": 0.4371360531210346, "grad_norm": 1.8811975950710034, "learning_rate": 6.247796334270353e-07, "loss": 0.2413, "step": 25148 }, { "epoch": 0.4371534356585374, "grad_norm": 1.8095933825779908, "learning_rate": 6.247523745508973e-07, "loss": 0.3656, "step": 25149 }, { "epoch": 0.43717081819604026, "grad_norm": 1.470655972144724, "learning_rate": 6.247251152793512e-07, "loss": 0.1452, "step": 25150 }, { "epoch": 0.4371882007335431, "grad_norm": 2.074949701151115, "learning_rate": 6.246978556124841e-07, "loss": 0.249, "step": 25151 }, { "epoch": 0.4372055832710459, "grad_norm": 4.000205841604862, "learning_rate": 6.246705955503821e-07, "loss": 0.5871, "step": 25152 }, { "epoch": 0.43722296580854875, "grad_norm": 1.1499713799600944, "learning_rate": 6.246433350931314e-07, "loss": 0.1246, "step": 25153 }, { "epoch": 0.4372403483460516, "grad_norm": 2.1470165407291355, "learning_rate": 6.246160742408187e-07, "loss": 0.282, "step": 25154 }, { "epoch": 0.4372577308835544, "grad_norm": 1.9771466605198607, "learning_rate": 6.245888129935302e-07, "loss": 0.2495, "step": 25155 }, { "epoch": 0.4372751134210572, "grad_norm": 2.4337039276999297, "learning_rate": 6.245615513513524e-07, "loss": 0.3138, "step": 25156 }, { "epoch": 0.43729249595856, "grad_norm": 2.4391280854769515, "learning_rate": 6.245342893143718e-07, "loss": 0.1775, "step": 25157 }, { "epoch": 0.43730987849606284, "grad_norm": 1.591086011062477, "learning_rate": 6.245070268826748e-07, "loss": 0.2408, "step": 25158 }, { "epoch": 0.4373272610335657, "grad_norm": 1.1747656726946218, "learning_rate": 6.244797640563475e-07, "loss": 0.1717, "step": 25159 }, { "epoch": 0.4373446435710685, "grad_norm": 1.166121770157699, "learning_rate": 6.244525008354766e-07, "loss": 0.1771, "step": 25160 }, { "epoch": 0.43736202610857133, "grad_norm": 1.7879458759134073, "learning_rate": 6.244252372201485e-07, "loss": 0.2224, "step": 25161 }, { "epoch": 0.43737940864607416, "grad_norm": 2.1472149021222937, "learning_rate": 6.243979732104495e-07, "loss": 0.229, "step": 25162 }, { "epoch": 0.437396791183577, "grad_norm": 1.479843315260428, "learning_rate": 6.24370708806466e-07, "loss": 0.1875, "step": 25163 }, { "epoch": 0.4374141737210798, "grad_norm": 1.3897004326954436, "learning_rate": 6.243434440082848e-07, "loss": 0.1906, "step": 25164 }, { "epoch": 0.43743155625858265, "grad_norm": 2.128917217296745, "learning_rate": 6.243161788159917e-07, "loss": 0.4332, "step": 25165 }, { "epoch": 0.43744893879608543, "grad_norm": 1.4825254978712874, "learning_rate": 6.242889132296733e-07, "loss": 0.2051, "step": 25166 }, { "epoch": 0.43746632133358826, "grad_norm": 1.2664173841817363, "learning_rate": 6.242616472494165e-07, "loss": 0.2687, "step": 25167 }, { "epoch": 0.4374837038710911, "grad_norm": 1.385684324118636, "learning_rate": 6.242343808753071e-07, "loss": 0.1881, "step": 25168 }, { "epoch": 0.4375010864085939, "grad_norm": 2.253696463488444, "learning_rate": 6.242071141074319e-07, "loss": 0.2736, "step": 25169 }, { "epoch": 0.43751846894609675, "grad_norm": 2.3031201103143157, "learning_rate": 6.241798469458772e-07, "loss": 0.2955, "step": 25170 }, { "epoch": 0.4375358514835996, "grad_norm": 2.2094035029142978, "learning_rate": 6.241525793907293e-07, "loss": 0.5225, "step": 25171 }, { "epoch": 0.4375532340211024, "grad_norm": 1.3827440420274257, "learning_rate": 6.24125311442075e-07, "loss": 0.2742, "step": 25172 }, { "epoch": 0.43757061655860524, "grad_norm": 1.1403072677033599, "learning_rate": 6.240980431000002e-07, "loss": 0.1653, "step": 25173 }, { "epoch": 0.43758799909610807, "grad_norm": 0.9940714914291969, "learning_rate": 6.240707743645917e-07, "loss": 0.2167, "step": 25174 }, { "epoch": 0.4376053816336109, "grad_norm": 2.4124003619934866, "learning_rate": 6.240435052359359e-07, "loss": 0.2773, "step": 25175 }, { "epoch": 0.4376227641711137, "grad_norm": 1.9919244141583134, "learning_rate": 6.240162357141192e-07, "loss": 0.2049, "step": 25176 }, { "epoch": 0.4376401467086165, "grad_norm": 1.3589210042936386, "learning_rate": 6.239889657992279e-07, "loss": 0.2421, "step": 25177 }, { "epoch": 0.43765752924611934, "grad_norm": 1.2532636075671422, "learning_rate": 6.239616954913487e-07, "loss": 0.1812, "step": 25178 }, { "epoch": 0.43767491178362217, "grad_norm": 2.0749763858851273, "learning_rate": 6.239344247905676e-07, "loss": 0.2319, "step": 25179 }, { "epoch": 0.437692294321125, "grad_norm": 1.8692518408010481, "learning_rate": 6.239071536969715e-07, "loss": 0.2541, "step": 25180 }, { "epoch": 0.4377096768586278, "grad_norm": 1.8736673314392602, "learning_rate": 6.238798822106466e-07, "loss": 0.3165, "step": 25181 }, { "epoch": 0.43772705939613066, "grad_norm": 1.7600741219261884, "learning_rate": 6.238526103316795e-07, "loss": 0.279, "step": 25182 }, { "epoch": 0.4377444419336335, "grad_norm": 1.0604937007472262, "learning_rate": 6.238253380601564e-07, "loss": 0.1726, "step": 25183 }, { "epoch": 0.4377618244711363, "grad_norm": 1.4616882001001406, "learning_rate": 6.237980653961636e-07, "loss": 0.2862, "step": 25184 }, { "epoch": 0.43777920700863915, "grad_norm": 1.8234752518077626, "learning_rate": 6.237707923397881e-07, "loss": 0.2346, "step": 25185 }, { "epoch": 0.4377965895461419, "grad_norm": 1.206539991796874, "learning_rate": 6.237435188911161e-07, "loss": 0.1635, "step": 25186 }, { "epoch": 0.43781397208364475, "grad_norm": 2.0163193056156237, "learning_rate": 6.237162450502339e-07, "loss": 0.2119, "step": 25187 }, { "epoch": 0.4378313546211476, "grad_norm": 1.773592159708108, "learning_rate": 6.236889708172279e-07, "loss": 0.3131, "step": 25188 }, { "epoch": 0.4378487371586504, "grad_norm": 1.8584738609770808, "learning_rate": 6.23661696192185e-07, "loss": 0.3929, "step": 25189 }, { "epoch": 0.43786611969615324, "grad_norm": 2.127635589200006, "learning_rate": 6.236344211751911e-07, "loss": 0.2593, "step": 25190 }, { "epoch": 0.4378835022336561, "grad_norm": 1.5167962473321057, "learning_rate": 6.236071457663329e-07, "loss": 0.1707, "step": 25191 }, { "epoch": 0.4379008847711589, "grad_norm": 1.7445654310902294, "learning_rate": 6.235798699656966e-07, "loss": 0.2632, "step": 25192 }, { "epoch": 0.43791826730866174, "grad_norm": 1.0530062381010301, "learning_rate": 6.235525937733694e-07, "loss": 0.3077, "step": 25193 }, { "epoch": 0.43793564984616457, "grad_norm": 1.943342494875787, "learning_rate": 6.235253171894368e-07, "loss": 0.2847, "step": 25194 }, { "epoch": 0.4379530323836674, "grad_norm": 1.3162305797130815, "learning_rate": 6.234980402139859e-07, "loss": 0.171, "step": 25195 }, { "epoch": 0.43797041492117017, "grad_norm": 1.8220805630259251, "learning_rate": 6.234707628471029e-07, "loss": 0.5113, "step": 25196 }, { "epoch": 0.437987797458673, "grad_norm": 1.1084040631883616, "learning_rate": 6.234434850888742e-07, "loss": 0.2043, "step": 25197 }, { "epoch": 0.43800517999617583, "grad_norm": 0.9156396851141589, "learning_rate": 6.234162069393865e-07, "loss": 0.1913, "step": 25198 }, { "epoch": 0.43802256253367866, "grad_norm": 1.0446607518667166, "learning_rate": 6.233889283987261e-07, "loss": 0.1841, "step": 25199 }, { "epoch": 0.4380399450711815, "grad_norm": 1.7691836315232974, "learning_rate": 6.233616494669794e-07, "loss": 0.3173, "step": 25200 }, { "epoch": 0.4380573276086843, "grad_norm": 1.2920682472911547, "learning_rate": 6.23334370144233e-07, "loss": 0.2341, "step": 25201 }, { "epoch": 0.43807471014618715, "grad_norm": 1.8580907052688345, "learning_rate": 6.233070904305732e-07, "loss": 0.1714, "step": 25202 }, { "epoch": 0.43809209268369, "grad_norm": 2.179260596881214, "learning_rate": 6.232798103260867e-07, "loss": 0.3336, "step": 25203 }, { "epoch": 0.4381094752211928, "grad_norm": 2.2155613710157174, "learning_rate": 6.232525298308598e-07, "loss": 0.3418, "step": 25204 }, { "epoch": 0.4381268577586956, "grad_norm": 1.4796009878775513, "learning_rate": 6.232252489449789e-07, "loss": 0.3298, "step": 25205 }, { "epoch": 0.4381442402961984, "grad_norm": 1.019013013442586, "learning_rate": 6.231979676685307e-07, "loss": 0.1413, "step": 25206 }, { "epoch": 0.43816162283370125, "grad_norm": 1.3986752444783015, "learning_rate": 6.231706860016014e-07, "loss": 0.2845, "step": 25207 }, { "epoch": 0.4381790053712041, "grad_norm": 2.1815811984471685, "learning_rate": 6.231434039442777e-07, "loss": 0.3234, "step": 25208 }, { "epoch": 0.4381963879087069, "grad_norm": 0.9218453481913699, "learning_rate": 6.231161214966459e-07, "loss": 0.2255, "step": 25209 }, { "epoch": 0.43821377044620974, "grad_norm": 1.8200683703910177, "learning_rate": 6.230888386587926e-07, "loss": 0.3745, "step": 25210 }, { "epoch": 0.43823115298371257, "grad_norm": 1.8360417698768259, "learning_rate": 6.230615554308043e-07, "loss": 0.1266, "step": 25211 }, { "epoch": 0.4382485355212154, "grad_norm": 3.3999656641658027, "learning_rate": 6.230342718127672e-07, "loss": 0.3385, "step": 25212 }, { "epoch": 0.43826591805871823, "grad_norm": 1.685033305822736, "learning_rate": 6.230069878047682e-07, "loss": 0.2196, "step": 25213 }, { "epoch": 0.43828330059622106, "grad_norm": 3.4416961512363278, "learning_rate": 6.229797034068934e-07, "loss": 0.41, "step": 25214 }, { "epoch": 0.43830068313372383, "grad_norm": 1.3333703097597391, "learning_rate": 6.229524186192296e-07, "loss": 0.308, "step": 25215 }, { "epoch": 0.43831806567122666, "grad_norm": 1.991813321608724, "learning_rate": 6.229251334418629e-07, "loss": 0.1957, "step": 25216 }, { "epoch": 0.4383354482087295, "grad_norm": 1.2280410465851366, "learning_rate": 6.2289784787488e-07, "loss": 0.3071, "step": 25217 }, { "epoch": 0.4383528307462323, "grad_norm": 1.2088266751953751, "learning_rate": 6.228705619183677e-07, "loss": 0.154, "step": 25218 }, { "epoch": 0.43837021328373516, "grad_norm": 1.2906929917558325, "learning_rate": 6.228432755724118e-07, "loss": 0.2877, "step": 25219 }, { "epoch": 0.438387595821238, "grad_norm": 2.710878611562827, "learning_rate": 6.228159888370993e-07, "loss": 0.4279, "step": 25220 }, { "epoch": 0.4384049783587408, "grad_norm": 1.3010033410339519, "learning_rate": 6.227887017125165e-07, "loss": 0.198, "step": 25221 }, { "epoch": 0.43842236089624365, "grad_norm": 1.1674067191993867, "learning_rate": 6.227614141987499e-07, "loss": 0.2487, "step": 25222 }, { "epoch": 0.4384397434337465, "grad_norm": 1.9002844589719499, "learning_rate": 6.22734126295886e-07, "loss": 0.3381, "step": 25223 }, { "epoch": 0.4384571259712493, "grad_norm": 1.6690811706157898, "learning_rate": 6.227068380040115e-07, "loss": 0.3124, "step": 25224 }, { "epoch": 0.4384745085087521, "grad_norm": 1.432810823432707, "learning_rate": 6.226795493232124e-07, "loss": 0.208, "step": 25225 }, { "epoch": 0.4384918910462549, "grad_norm": 4.593401319252496, "learning_rate": 6.226522602535756e-07, "loss": 0.5966, "step": 25226 }, { "epoch": 0.43850927358375774, "grad_norm": 2.4109773187285546, "learning_rate": 6.226249707951875e-07, "loss": 0.2896, "step": 25227 }, { "epoch": 0.43852665612126057, "grad_norm": 1.5607465670268803, "learning_rate": 6.225976809481346e-07, "loss": 0.4309, "step": 25228 }, { "epoch": 0.4385440386587634, "grad_norm": 2.38362174587726, "learning_rate": 6.225703907125033e-07, "loss": 0.3232, "step": 25229 }, { "epoch": 0.43856142119626623, "grad_norm": 1.5536540831293228, "learning_rate": 6.225431000883803e-07, "loss": 0.2668, "step": 25230 }, { "epoch": 0.43857880373376906, "grad_norm": 2.3044191651646777, "learning_rate": 6.225158090758517e-07, "loss": 0.4416, "step": 25231 }, { "epoch": 0.4385961862712719, "grad_norm": 1.7690495563871056, "learning_rate": 6.224885176750045e-07, "loss": 0.2304, "step": 25232 }, { "epoch": 0.4386135688087747, "grad_norm": 1.2827667598801809, "learning_rate": 6.224612258859248e-07, "loss": 0.2679, "step": 25233 }, { "epoch": 0.43863095134627755, "grad_norm": 1.0757705913816984, "learning_rate": 6.224339337086995e-07, "loss": 0.29, "step": 25234 }, { "epoch": 0.43864833388378033, "grad_norm": 1.5745820663532908, "learning_rate": 6.224066411434146e-07, "loss": 0.3606, "step": 25235 }, { "epoch": 0.43866571642128316, "grad_norm": 1.5332590142055893, "learning_rate": 6.22379348190157e-07, "loss": 0.2079, "step": 25236 }, { "epoch": 0.438683098958786, "grad_norm": 1.6306774374696156, "learning_rate": 6.223520548490131e-07, "loss": 0.2331, "step": 25237 }, { "epoch": 0.4387004814962888, "grad_norm": 1.8997821787077491, "learning_rate": 6.223247611200695e-07, "loss": 0.239, "step": 25238 }, { "epoch": 0.43871786403379165, "grad_norm": 1.5890830037064456, "learning_rate": 6.222974670034124e-07, "loss": 0.3178, "step": 25239 }, { "epoch": 0.4387352465712945, "grad_norm": 2.7704560822843183, "learning_rate": 6.222701724991285e-07, "loss": 0.3021, "step": 25240 }, { "epoch": 0.4387526291087973, "grad_norm": 1.434808122173559, "learning_rate": 6.222428776073045e-07, "loss": 0.3272, "step": 25241 }, { "epoch": 0.43877001164630014, "grad_norm": 1.724363585295567, "learning_rate": 6.222155823280267e-07, "loss": 0.3972, "step": 25242 }, { "epoch": 0.43878739418380297, "grad_norm": 1.7385889930863645, "learning_rate": 6.221882866613816e-07, "loss": 0.2024, "step": 25243 }, { "epoch": 0.4388047767213058, "grad_norm": 1.3863449163928174, "learning_rate": 6.221609906074556e-07, "loss": 0.2986, "step": 25244 }, { "epoch": 0.4388221592588086, "grad_norm": 1.2678234480249275, "learning_rate": 6.221336941663355e-07, "loss": 0.2362, "step": 25245 }, { "epoch": 0.4388395417963114, "grad_norm": 1.3388039897202586, "learning_rate": 6.221063973381078e-07, "loss": 0.349, "step": 25246 }, { "epoch": 0.43885692433381424, "grad_norm": 1.3380842119848126, "learning_rate": 6.220791001228588e-07, "loss": 0.2014, "step": 25247 }, { "epoch": 0.43887430687131707, "grad_norm": 1.6330794124155774, "learning_rate": 6.22051802520675e-07, "loss": 0.2073, "step": 25248 }, { "epoch": 0.4388916894088199, "grad_norm": 1.8576102856611565, "learning_rate": 6.220245045316434e-07, "loss": 0.1774, "step": 25249 }, { "epoch": 0.4389090719463227, "grad_norm": 1.4277079850468113, "learning_rate": 6.219972061558498e-07, "loss": 0.1897, "step": 25250 }, { "epoch": 0.43892645448382556, "grad_norm": 2.2028300933513005, "learning_rate": 6.219699073933814e-07, "loss": 0.2845, "step": 25251 }, { "epoch": 0.4389438370213284, "grad_norm": 1.4091281105125375, "learning_rate": 6.219426082443242e-07, "loss": 0.3402, "step": 25252 }, { "epoch": 0.4389612195588312, "grad_norm": 1.9437510858942535, "learning_rate": 6.219153087087652e-07, "loss": 0.2086, "step": 25253 }, { "epoch": 0.43897860209633405, "grad_norm": 1.2911130349244315, "learning_rate": 6.218880087867904e-07, "loss": 0.3772, "step": 25254 }, { "epoch": 0.4389959846338368, "grad_norm": 1.7676640939170185, "learning_rate": 6.218607084784867e-07, "loss": 0.1832, "step": 25255 }, { "epoch": 0.43901336717133965, "grad_norm": 1.3052176969529183, "learning_rate": 6.218334077839406e-07, "loss": 0.2503, "step": 25256 }, { "epoch": 0.4390307497088425, "grad_norm": 2.8013185836271797, "learning_rate": 6.218061067032386e-07, "loss": 0.2344, "step": 25257 }, { "epoch": 0.4390481322463453, "grad_norm": 1.5604110404402076, "learning_rate": 6.217788052364669e-07, "loss": 0.1964, "step": 25258 }, { "epoch": 0.43906551478384814, "grad_norm": 1.4092499843015265, "learning_rate": 6.217515033837128e-07, "loss": 0.2497, "step": 25259 }, { "epoch": 0.439082897321351, "grad_norm": 2.832016144663917, "learning_rate": 6.217242011450619e-07, "loss": 0.4481, "step": 25260 }, { "epoch": 0.4391002798588538, "grad_norm": 1.271727296942401, "learning_rate": 6.216968985206015e-07, "loss": 0.2514, "step": 25261 }, { "epoch": 0.43911766239635663, "grad_norm": 1.331359888934502, "learning_rate": 6.216695955104178e-07, "loss": 0.2905, "step": 25262 }, { "epoch": 0.43913504493385946, "grad_norm": 2.399229689833015, "learning_rate": 6.216422921145973e-07, "loss": 0.3453, "step": 25263 }, { "epoch": 0.4391524274713623, "grad_norm": 1.6658636939218296, "learning_rate": 6.216149883332264e-07, "loss": 0.2757, "step": 25264 }, { "epoch": 0.43916981000886507, "grad_norm": 4.472640494862112, "learning_rate": 6.215876841663922e-07, "loss": 0.3271, "step": 25265 }, { "epoch": 0.4391871925463679, "grad_norm": 1.7737307516711835, "learning_rate": 6.215603796141808e-07, "loss": 0.5464, "step": 25266 }, { "epoch": 0.43920457508387073, "grad_norm": 1.1049769907792775, "learning_rate": 6.215330746766789e-07, "loss": 0.3817, "step": 25267 }, { "epoch": 0.43922195762137356, "grad_norm": 2.2105977698507866, "learning_rate": 6.215057693539728e-07, "loss": 0.3028, "step": 25268 }, { "epoch": 0.4392393401588764, "grad_norm": 1.719450035843597, "learning_rate": 6.214784636461493e-07, "loss": 0.3138, "step": 25269 }, { "epoch": 0.4392567226963792, "grad_norm": 1.2059787721410027, "learning_rate": 6.214511575532948e-07, "loss": 0.2187, "step": 25270 }, { "epoch": 0.43927410523388205, "grad_norm": 0.8904099035454583, "learning_rate": 6.214238510754962e-07, "loss": 0.2329, "step": 25271 }, { "epoch": 0.4392914877713849, "grad_norm": 1.4084809645374232, "learning_rate": 6.213965442128395e-07, "loss": 0.3016, "step": 25272 }, { "epoch": 0.4393088703088877, "grad_norm": 1.1545720131173538, "learning_rate": 6.213692369654114e-07, "loss": 0.2851, "step": 25273 }, { "epoch": 0.43932625284639054, "grad_norm": 0.9911074131983927, "learning_rate": 6.213419293332989e-07, "loss": 0.2916, "step": 25274 }, { "epoch": 0.4393436353838933, "grad_norm": 1.9058002762315336, "learning_rate": 6.213146213165881e-07, "loss": 0.3047, "step": 25275 }, { "epoch": 0.43936101792139615, "grad_norm": 1.2808963693300286, "learning_rate": 6.212873129153657e-07, "loss": 0.4347, "step": 25276 }, { "epoch": 0.439378400458899, "grad_norm": 2.8253523231715274, "learning_rate": 6.21260004129718e-07, "loss": 0.3092, "step": 25277 }, { "epoch": 0.4393957829964018, "grad_norm": 2.6139582782840733, "learning_rate": 6.212326949597322e-07, "loss": 0.2735, "step": 25278 }, { "epoch": 0.43941316553390464, "grad_norm": 2.0635947725651165, "learning_rate": 6.212053854054942e-07, "loss": 0.2925, "step": 25279 }, { "epoch": 0.43943054807140747, "grad_norm": 1.81379850484468, "learning_rate": 6.211780754670908e-07, "loss": 0.3678, "step": 25280 }, { "epoch": 0.4394479306089103, "grad_norm": 1.6592091443600963, "learning_rate": 6.211507651446086e-07, "loss": 0.2322, "step": 25281 }, { "epoch": 0.43946531314641313, "grad_norm": 2.0913710185270795, "learning_rate": 6.211234544381342e-07, "loss": 0.2459, "step": 25282 }, { "epoch": 0.43948269568391596, "grad_norm": 1.648804065358589, "learning_rate": 6.21096143347754e-07, "loss": 0.3971, "step": 25283 }, { "epoch": 0.4395000782214188, "grad_norm": 2.7872091093275433, "learning_rate": 6.210688318735547e-07, "loss": 0.3262, "step": 25284 }, { "epoch": 0.43951746075892156, "grad_norm": 1.6102638087747247, "learning_rate": 6.210415200156227e-07, "loss": 0.2977, "step": 25285 }, { "epoch": 0.4395348432964244, "grad_norm": 1.4439668041583746, "learning_rate": 6.210142077740448e-07, "loss": 0.3967, "step": 25286 }, { "epoch": 0.4395522258339272, "grad_norm": 1.7637412505442096, "learning_rate": 6.209868951489074e-07, "loss": 0.1727, "step": 25287 }, { "epoch": 0.43956960837143005, "grad_norm": 2.2160853331224395, "learning_rate": 6.209595821402973e-07, "loss": 0.2161, "step": 25288 }, { "epoch": 0.4395869909089329, "grad_norm": 1.6373024826880875, "learning_rate": 6.209322687483006e-07, "loss": 0.2156, "step": 25289 }, { "epoch": 0.4396043734464357, "grad_norm": 3.520737986836524, "learning_rate": 6.209049549730045e-07, "loss": 0.2638, "step": 25290 }, { "epoch": 0.43962175598393854, "grad_norm": 1.2140098691072945, "learning_rate": 6.208776408144949e-07, "loss": 0.2126, "step": 25291 }, { "epoch": 0.4396391385214414, "grad_norm": 1.8690283432515509, "learning_rate": 6.208503262728588e-07, "loss": 0.1844, "step": 25292 }, { "epoch": 0.4396565210589442, "grad_norm": 1.6793014108627387, "learning_rate": 6.208230113481826e-07, "loss": 0.4017, "step": 25293 }, { "epoch": 0.43967390359644704, "grad_norm": 2.1002412399955683, "learning_rate": 6.207956960405532e-07, "loss": 0.2222, "step": 25294 }, { "epoch": 0.4396912861339498, "grad_norm": 1.9054123544518273, "learning_rate": 6.207683803500567e-07, "loss": 0.2494, "step": 25295 }, { "epoch": 0.43970866867145264, "grad_norm": 1.3740667341023078, "learning_rate": 6.207410642767801e-07, "loss": 0.2674, "step": 25296 }, { "epoch": 0.43972605120895547, "grad_norm": 1.4211944990067036, "learning_rate": 6.207137478208098e-07, "loss": 0.3008, "step": 25297 }, { "epoch": 0.4397434337464583, "grad_norm": 1.2958962952914495, "learning_rate": 6.206864309822321e-07, "loss": 0.2376, "step": 25298 }, { "epoch": 0.43976081628396113, "grad_norm": 1.7166637567487697, "learning_rate": 6.20659113761134e-07, "loss": 0.2614, "step": 25299 }, { "epoch": 0.43977819882146396, "grad_norm": 1.478814700630533, "learning_rate": 6.20631796157602e-07, "loss": 0.2114, "step": 25300 }, { "epoch": 0.4397955813589668, "grad_norm": 1.048650962914884, "learning_rate": 6.206044781717226e-07, "loss": 0.3268, "step": 25301 }, { "epoch": 0.4398129638964696, "grad_norm": 1.086758919208916, "learning_rate": 6.205771598035823e-07, "loss": 0.1998, "step": 25302 }, { "epoch": 0.43983034643397245, "grad_norm": 1.0088946465230288, "learning_rate": 6.20549841053268e-07, "loss": 0.2565, "step": 25303 }, { "epoch": 0.4398477289714753, "grad_norm": 1.1075356261475866, "learning_rate": 6.205225219208658e-07, "loss": 0.2472, "step": 25304 }, { "epoch": 0.43986511150897806, "grad_norm": 1.7743177032360895, "learning_rate": 6.204952024064628e-07, "loss": 0.1991, "step": 25305 }, { "epoch": 0.4398824940464809, "grad_norm": 1.4367365853995062, "learning_rate": 6.204678825101454e-07, "loss": 0.1748, "step": 25306 }, { "epoch": 0.4398998765839837, "grad_norm": 3.256485630060306, "learning_rate": 6.20440562232e-07, "loss": 0.3778, "step": 25307 }, { "epoch": 0.43991725912148655, "grad_norm": 1.6901936445333057, "learning_rate": 6.204132415721132e-07, "loss": 0.2316, "step": 25308 }, { "epoch": 0.4399346416589894, "grad_norm": 2.3320228659206097, "learning_rate": 6.203859205305721e-07, "loss": 0.2199, "step": 25309 }, { "epoch": 0.4399520241964922, "grad_norm": 2.9360024655851698, "learning_rate": 6.203585991074627e-07, "loss": 0.3654, "step": 25310 }, { "epoch": 0.43996940673399504, "grad_norm": 2.812582979226758, "learning_rate": 6.20331277302872e-07, "loss": 0.3015, "step": 25311 }, { "epoch": 0.43998678927149787, "grad_norm": 2.372289749839988, "learning_rate": 6.203039551168862e-07, "loss": 0.3751, "step": 25312 }, { "epoch": 0.4400041718090007, "grad_norm": 2.6662667095317785, "learning_rate": 6.202766325495923e-07, "loss": 0.4271, "step": 25313 }, { "epoch": 0.44002155434650353, "grad_norm": 1.6966235536448733, "learning_rate": 6.202493096010765e-07, "loss": 0.3468, "step": 25314 }, { "epoch": 0.4400389368840063, "grad_norm": 1.2814138020664154, "learning_rate": 6.202219862714258e-07, "loss": 0.3374, "step": 25315 }, { "epoch": 0.44005631942150913, "grad_norm": 2.7837179131824343, "learning_rate": 6.201946625607267e-07, "loss": 0.3376, "step": 25316 }, { "epoch": 0.44007370195901196, "grad_norm": 1.708118224482445, "learning_rate": 6.201673384690657e-07, "loss": 0.2357, "step": 25317 }, { "epoch": 0.4400910844965148, "grad_norm": 1.3979463483042591, "learning_rate": 6.201400139965293e-07, "loss": 0.3053, "step": 25318 }, { "epoch": 0.4401084670340176, "grad_norm": 1.1924406828784893, "learning_rate": 6.201126891432043e-07, "loss": 0.1691, "step": 25319 }, { "epoch": 0.44012584957152046, "grad_norm": 1.4222607767653679, "learning_rate": 6.200853639091773e-07, "loss": 0.3592, "step": 25320 }, { "epoch": 0.4401432321090233, "grad_norm": 2.1521208655635147, "learning_rate": 6.200580382945349e-07, "loss": 0.3524, "step": 25321 }, { "epoch": 0.4401606146465261, "grad_norm": 1.1142965118489816, "learning_rate": 6.200307122993636e-07, "loss": 0.3029, "step": 25322 }, { "epoch": 0.44017799718402895, "grad_norm": 1.040937743120559, "learning_rate": 6.200033859237501e-07, "loss": 0.1647, "step": 25323 }, { "epoch": 0.4401953797215318, "grad_norm": 2.852756237421275, "learning_rate": 6.199760591677809e-07, "loss": 0.3288, "step": 25324 }, { "epoch": 0.44021276225903455, "grad_norm": 3.494108533645746, "learning_rate": 6.199487320315427e-07, "loss": 0.2985, "step": 25325 }, { "epoch": 0.4402301447965374, "grad_norm": 1.8418301191545927, "learning_rate": 6.199214045151224e-07, "loss": 0.3913, "step": 25326 }, { "epoch": 0.4402475273340402, "grad_norm": 1.6711184051255255, "learning_rate": 6.19894076618606e-07, "loss": 0.2834, "step": 25327 }, { "epoch": 0.44026490987154304, "grad_norm": 0.8930196743043173, "learning_rate": 6.198667483420807e-07, "loss": 0.2291, "step": 25328 }, { "epoch": 0.4402822924090459, "grad_norm": 1.4066391171834256, "learning_rate": 6.198394196856325e-07, "loss": 0.2108, "step": 25329 }, { "epoch": 0.4402996749465487, "grad_norm": 1.7635146243378719, "learning_rate": 6.198120906493488e-07, "loss": 0.2203, "step": 25330 }, { "epoch": 0.44031705748405153, "grad_norm": 2.038131370866922, "learning_rate": 6.197847612333157e-07, "loss": 0.5165, "step": 25331 }, { "epoch": 0.44033444002155436, "grad_norm": 1.2775908680140997, "learning_rate": 6.197574314376199e-07, "loss": 0.2623, "step": 25332 }, { "epoch": 0.4403518225590572, "grad_norm": 1.4456474434757232, "learning_rate": 6.197301012623479e-07, "loss": 0.2279, "step": 25333 }, { "epoch": 0.44036920509656, "grad_norm": 1.1895372943460107, "learning_rate": 6.197027707075867e-07, "loss": 0.3144, "step": 25334 }, { "epoch": 0.4403865876340628, "grad_norm": 1.1362461847310168, "learning_rate": 6.196754397734226e-07, "loss": 0.2343, "step": 25335 }, { "epoch": 0.44040397017156563, "grad_norm": 1.8639612208063372, "learning_rate": 6.196481084599424e-07, "loss": 0.2591, "step": 25336 }, { "epoch": 0.44042135270906846, "grad_norm": 3.122128345027845, "learning_rate": 6.196207767672325e-07, "loss": 0.3769, "step": 25337 }, { "epoch": 0.4404387352465713, "grad_norm": 1.347512574095937, "learning_rate": 6.1959344469538e-07, "loss": 0.4483, "step": 25338 }, { "epoch": 0.4404561177840741, "grad_norm": 1.4777739072611211, "learning_rate": 6.195661122444708e-07, "loss": 0.4236, "step": 25339 }, { "epoch": 0.44047350032157695, "grad_norm": 2.155376446530714, "learning_rate": 6.195387794145924e-07, "loss": 0.4235, "step": 25340 }, { "epoch": 0.4404908828590798, "grad_norm": 3.6243892891759915, "learning_rate": 6.195114462058307e-07, "loss": 0.5551, "step": 25341 }, { "epoch": 0.4405082653965826, "grad_norm": 1.1520426609976817, "learning_rate": 6.194841126182726e-07, "loss": 0.4427, "step": 25342 }, { "epoch": 0.44052564793408544, "grad_norm": 1.081843471226706, "learning_rate": 6.194567786520048e-07, "loss": 0.258, "step": 25343 }, { "epoch": 0.4405430304715882, "grad_norm": 2.8452528542724287, "learning_rate": 6.19429444307114e-07, "loss": 0.2766, "step": 25344 }, { "epoch": 0.44056041300909105, "grad_norm": 2.9896632408220403, "learning_rate": 6.194021095836866e-07, "loss": 0.2899, "step": 25345 }, { "epoch": 0.4405777955465939, "grad_norm": 1.5269410019679965, "learning_rate": 6.193747744818093e-07, "loss": 0.4106, "step": 25346 }, { "epoch": 0.4405951780840967, "grad_norm": 1.6526387013848973, "learning_rate": 6.193474390015689e-07, "loss": 0.2816, "step": 25347 }, { "epoch": 0.44061256062159954, "grad_norm": 1.7571178829902807, "learning_rate": 6.193201031430519e-07, "loss": 0.3394, "step": 25348 }, { "epoch": 0.44062994315910237, "grad_norm": 1.9286309040441438, "learning_rate": 6.19292766906345e-07, "loss": 0.2494, "step": 25349 }, { "epoch": 0.4406473256966052, "grad_norm": 2.3584037115986787, "learning_rate": 6.192654302915348e-07, "loss": 0.2883, "step": 25350 }, { "epoch": 0.440664708234108, "grad_norm": 1.5932763079180365, "learning_rate": 6.19238093298708e-07, "loss": 0.2602, "step": 25351 }, { "epoch": 0.44068209077161086, "grad_norm": 2.0760809270978804, "learning_rate": 6.192107559279513e-07, "loss": 0.2552, "step": 25352 }, { "epoch": 0.4406994733091137, "grad_norm": 1.3497689681390905, "learning_rate": 6.19183418179351e-07, "loss": 0.2874, "step": 25353 }, { "epoch": 0.44071685584661646, "grad_norm": 2.42255417820512, "learning_rate": 6.191560800529944e-07, "loss": 0.2829, "step": 25354 }, { "epoch": 0.4407342383841193, "grad_norm": 2.093696911297525, "learning_rate": 6.191287415489676e-07, "loss": 0.1967, "step": 25355 }, { "epoch": 0.4407516209216221, "grad_norm": 1.8916943656942653, "learning_rate": 6.191014026673573e-07, "loss": 0.2427, "step": 25356 }, { "epoch": 0.44076900345912495, "grad_norm": 1.3633548594057334, "learning_rate": 6.190740634082503e-07, "loss": 0.2594, "step": 25357 }, { "epoch": 0.4407863859966278, "grad_norm": 1.6153680177850998, "learning_rate": 6.190467237717332e-07, "loss": 0.1731, "step": 25358 }, { "epoch": 0.4408037685341306, "grad_norm": 1.3817104737626695, "learning_rate": 6.190193837578927e-07, "loss": 0.2821, "step": 25359 }, { "epoch": 0.44082115107163344, "grad_norm": 1.6017616903944574, "learning_rate": 6.189920433668155e-07, "loss": 0.3326, "step": 25360 }, { "epoch": 0.4408385336091363, "grad_norm": 2.4856188047303918, "learning_rate": 6.189647025985882e-07, "loss": 0.3774, "step": 25361 }, { "epoch": 0.4408559161466391, "grad_norm": 2.1875042720503255, "learning_rate": 6.189373614532973e-07, "loss": 0.221, "step": 25362 }, { "epoch": 0.44087329868414193, "grad_norm": 2.2232449475462426, "learning_rate": 6.189100199310298e-07, "loss": 0.363, "step": 25363 }, { "epoch": 0.4408906812216447, "grad_norm": 2.556339460550214, "learning_rate": 6.18882678031872e-07, "loss": 0.1773, "step": 25364 }, { "epoch": 0.44090806375914754, "grad_norm": 1.163380663661052, "learning_rate": 6.188553357559107e-07, "loss": 0.4106, "step": 25365 }, { "epoch": 0.44092544629665037, "grad_norm": 2.6741838881129008, "learning_rate": 6.188279931032328e-07, "loss": 0.2679, "step": 25366 }, { "epoch": 0.4409428288341532, "grad_norm": 1.669299395310737, "learning_rate": 6.188006500739245e-07, "loss": 0.3334, "step": 25367 }, { "epoch": 0.44096021137165603, "grad_norm": 0.9841566352382548, "learning_rate": 6.187733066680727e-07, "loss": 0.3231, "step": 25368 }, { "epoch": 0.44097759390915886, "grad_norm": 1.222782894718432, "learning_rate": 6.187459628857643e-07, "loss": 0.3269, "step": 25369 }, { "epoch": 0.4409949764466617, "grad_norm": 2.56804821479187, "learning_rate": 6.187186187270857e-07, "loss": 0.3426, "step": 25370 }, { "epoch": 0.4410123589841645, "grad_norm": 1.7759214809004227, "learning_rate": 6.186912741921234e-07, "loss": 0.3208, "step": 25371 }, { "epoch": 0.44102974152166735, "grad_norm": 2.527620005639039, "learning_rate": 6.186639292809643e-07, "loss": 0.326, "step": 25372 }, { "epoch": 0.4410471240591702, "grad_norm": 4.622130960351787, "learning_rate": 6.186365839936954e-07, "loss": 0.3735, "step": 25373 }, { "epoch": 0.44106450659667296, "grad_norm": 1.626192686097679, "learning_rate": 6.186092383304027e-07, "loss": 0.2356, "step": 25374 }, { "epoch": 0.4410818891341758, "grad_norm": 1.4849511088441807, "learning_rate": 6.185818922911734e-07, "loss": 0.2404, "step": 25375 }, { "epoch": 0.4410992716716786, "grad_norm": 1.8513561693659701, "learning_rate": 6.185545458760938e-07, "loss": 0.2726, "step": 25376 }, { "epoch": 0.44111665420918145, "grad_norm": 1.772891567301623, "learning_rate": 6.185271990852509e-07, "loss": 0.203, "step": 25377 }, { "epoch": 0.4411340367466843, "grad_norm": 1.596786954078171, "learning_rate": 6.18499851918731e-07, "loss": 0.2197, "step": 25378 }, { "epoch": 0.4411514192841871, "grad_norm": 1.594262175034182, "learning_rate": 6.184725043766212e-07, "loss": 0.2822, "step": 25379 }, { "epoch": 0.44116880182168994, "grad_norm": 2.0792488900434596, "learning_rate": 6.18445156459008e-07, "loss": 0.3494, "step": 25380 }, { "epoch": 0.44118618435919277, "grad_norm": 1.5939204193076217, "learning_rate": 6.18417808165978e-07, "loss": 0.1959, "step": 25381 }, { "epoch": 0.4412035668966956, "grad_norm": 1.1811629939044699, "learning_rate": 6.183904594976179e-07, "loss": 0.1419, "step": 25382 }, { "epoch": 0.44122094943419843, "grad_norm": 2.0903085739645304, "learning_rate": 6.183631104540145e-07, "loss": 0.3427, "step": 25383 }, { "epoch": 0.4412383319717012, "grad_norm": 1.8152688781690483, "learning_rate": 6.183357610352544e-07, "loss": 0.391, "step": 25384 }, { "epoch": 0.44125571450920403, "grad_norm": 1.5236436535166173, "learning_rate": 6.183084112414242e-07, "loss": 0.3192, "step": 25385 }, { "epoch": 0.44127309704670686, "grad_norm": 1.3822238275352459, "learning_rate": 6.182810610726108e-07, "loss": 0.3097, "step": 25386 }, { "epoch": 0.4412904795842097, "grad_norm": 1.4635252753537222, "learning_rate": 6.182537105289006e-07, "loss": 0.3958, "step": 25387 }, { "epoch": 0.4413078621217125, "grad_norm": 1.7185911541388998, "learning_rate": 6.182263596103807e-07, "loss": 0.3271, "step": 25388 }, { "epoch": 0.44132524465921535, "grad_norm": 1.278222705436491, "learning_rate": 6.181990083171375e-07, "loss": 0.2815, "step": 25389 }, { "epoch": 0.4413426271967182, "grad_norm": 1.7389238356224304, "learning_rate": 6.181716566492576e-07, "loss": 0.4503, "step": 25390 }, { "epoch": 0.441360009734221, "grad_norm": 1.4822944485273664, "learning_rate": 6.18144304606828e-07, "loss": 0.2659, "step": 25391 }, { "epoch": 0.44137739227172385, "grad_norm": 1.9221647889081312, "learning_rate": 6.181169521899351e-07, "loss": 0.299, "step": 25392 }, { "epoch": 0.4413947748092267, "grad_norm": 1.7431253697832636, "learning_rate": 6.180895993986657e-07, "loss": 0.324, "step": 25393 }, { "epoch": 0.44141215734672945, "grad_norm": 1.3705831428716615, "learning_rate": 6.180622462331065e-07, "loss": 0.2488, "step": 25394 }, { "epoch": 0.4414295398842323, "grad_norm": 2.843560692011934, "learning_rate": 6.180348926933444e-07, "loss": 0.4094, "step": 25395 }, { "epoch": 0.4414469224217351, "grad_norm": 1.4895461806989716, "learning_rate": 6.180075387794657e-07, "loss": 0.3483, "step": 25396 }, { "epoch": 0.44146430495923794, "grad_norm": 2.132151354995489, "learning_rate": 6.179801844915574e-07, "loss": 0.2963, "step": 25397 }, { "epoch": 0.44148168749674077, "grad_norm": 1.132149679886709, "learning_rate": 6.179528298297063e-07, "loss": 0.1791, "step": 25398 }, { "epoch": 0.4414990700342436, "grad_norm": 4.431350983745916, "learning_rate": 6.179254747939985e-07, "loss": 0.5953, "step": 25399 }, { "epoch": 0.44151645257174643, "grad_norm": 1.810818746474565, "learning_rate": 6.178981193845215e-07, "loss": 0.2741, "step": 25400 }, { "epoch": 0.44153383510924926, "grad_norm": 1.2706434019445285, "learning_rate": 6.178707636013615e-07, "loss": 0.23, "step": 25401 }, { "epoch": 0.4415512176467521, "grad_norm": 1.1116185822923164, "learning_rate": 6.178434074446053e-07, "loss": 0.2207, "step": 25402 }, { "epoch": 0.4415686001842549, "grad_norm": 1.3909937161350465, "learning_rate": 6.178160509143395e-07, "loss": 0.3381, "step": 25403 }, { "epoch": 0.4415859827217577, "grad_norm": 2.584637649432344, "learning_rate": 6.177886940106511e-07, "loss": 0.3156, "step": 25404 }, { "epoch": 0.4416033652592605, "grad_norm": 1.4780758429789185, "learning_rate": 6.177613367336266e-07, "loss": 0.303, "step": 25405 }, { "epoch": 0.44162074779676336, "grad_norm": 1.6176974245512823, "learning_rate": 6.177339790833527e-07, "loss": 0.2965, "step": 25406 }, { "epoch": 0.4416381303342662, "grad_norm": 1.1669419178684448, "learning_rate": 6.177066210599164e-07, "loss": 0.2107, "step": 25407 }, { "epoch": 0.441655512871769, "grad_norm": 1.9648623067748785, "learning_rate": 6.17679262663404e-07, "loss": 0.3795, "step": 25408 }, { "epoch": 0.44167289540927185, "grad_norm": 0.9183678489382391, "learning_rate": 6.176519038939023e-07, "loss": 0.1752, "step": 25409 }, { "epoch": 0.4416902779467747, "grad_norm": 1.1953374363999059, "learning_rate": 6.176245447514982e-07, "loss": 0.1901, "step": 25410 }, { "epoch": 0.4417076604842775, "grad_norm": 1.6039031146846805, "learning_rate": 6.175971852362784e-07, "loss": 0.4601, "step": 25411 }, { "epoch": 0.44172504302178034, "grad_norm": 1.3455381670059845, "learning_rate": 6.175698253483295e-07, "loss": 0.2333, "step": 25412 }, { "epoch": 0.44174242555928317, "grad_norm": 1.5954881828707836, "learning_rate": 6.175424650877382e-07, "loss": 0.2203, "step": 25413 }, { "epoch": 0.44175980809678594, "grad_norm": 2.358514615537797, "learning_rate": 6.175151044545913e-07, "loss": 0.3765, "step": 25414 }, { "epoch": 0.4417771906342888, "grad_norm": 1.4098232090923928, "learning_rate": 6.174877434489756e-07, "loss": 0.3898, "step": 25415 }, { "epoch": 0.4417945731717916, "grad_norm": 1.3922992046711413, "learning_rate": 6.174603820709776e-07, "loss": 0.2163, "step": 25416 }, { "epoch": 0.44181195570929443, "grad_norm": 1.1231651431275556, "learning_rate": 6.174330203206844e-07, "loss": 0.2356, "step": 25417 }, { "epoch": 0.44182933824679727, "grad_norm": 1.4095087878622792, "learning_rate": 6.174056581981821e-07, "loss": 0.3082, "step": 25418 }, { "epoch": 0.4418467207843001, "grad_norm": 2.8081687679497946, "learning_rate": 6.173782957035579e-07, "loss": 0.3058, "step": 25419 }, { "epoch": 0.4418641033218029, "grad_norm": 1.2372476146688154, "learning_rate": 6.173509328368985e-07, "loss": 0.2189, "step": 25420 }, { "epoch": 0.44188148585930576, "grad_norm": 1.3866913168514674, "learning_rate": 6.173235695982906e-07, "loss": 0.2023, "step": 25421 }, { "epoch": 0.4418988683968086, "grad_norm": 2.7474253511340576, "learning_rate": 6.172962059878207e-07, "loss": 0.3637, "step": 25422 }, { "epoch": 0.4419162509343114, "grad_norm": 1.3616635976081373, "learning_rate": 6.172688420055761e-07, "loss": 0.2255, "step": 25423 }, { "epoch": 0.4419336334718142, "grad_norm": 1.5590120386031756, "learning_rate": 6.172414776516427e-07, "loss": 0.392, "step": 25424 }, { "epoch": 0.441951016009317, "grad_norm": 2.754452662397719, "learning_rate": 6.172141129261079e-07, "loss": 0.2212, "step": 25425 }, { "epoch": 0.44196839854681985, "grad_norm": 2.366556824599601, "learning_rate": 6.171867478290582e-07, "loss": 0.34, "step": 25426 }, { "epoch": 0.4419857810843227, "grad_norm": 1.443625733779573, "learning_rate": 6.171593823605802e-07, "loss": 0.2629, "step": 25427 }, { "epoch": 0.4420031636218255, "grad_norm": 1.3452053820838141, "learning_rate": 6.171320165207607e-07, "loss": 0.1977, "step": 25428 }, { "epoch": 0.44202054615932834, "grad_norm": 1.2135678273873955, "learning_rate": 6.171046503096868e-07, "loss": 0.188, "step": 25429 }, { "epoch": 0.4420379286968312, "grad_norm": 2.2757738075650518, "learning_rate": 6.170772837274449e-07, "loss": 0.3863, "step": 25430 }, { "epoch": 0.442055311234334, "grad_norm": 2.864516302795742, "learning_rate": 6.170499167741216e-07, "loss": 0.3461, "step": 25431 }, { "epoch": 0.44207269377183683, "grad_norm": 1.550773668935331, "learning_rate": 6.17022549449804e-07, "loss": 0.271, "step": 25432 }, { "epoch": 0.44209007630933966, "grad_norm": 2.553921896322513, "learning_rate": 6.169951817545786e-07, "loss": 0.3158, "step": 25433 }, { "epoch": 0.44210745884684244, "grad_norm": 1.8717159961057195, "learning_rate": 6.169678136885322e-07, "loss": 0.3247, "step": 25434 }, { "epoch": 0.44212484138434527, "grad_norm": 1.917726282818157, "learning_rate": 6.169404452517516e-07, "loss": 0.3126, "step": 25435 }, { "epoch": 0.4421422239218481, "grad_norm": 2.2992512308896598, "learning_rate": 6.169130764443236e-07, "loss": 0.2714, "step": 25436 }, { "epoch": 0.44215960645935093, "grad_norm": 2.838603478475086, "learning_rate": 6.168857072663347e-07, "loss": 0.1719, "step": 25437 }, { "epoch": 0.44217698899685376, "grad_norm": 1.098118146185717, "learning_rate": 6.168583377178718e-07, "loss": 0.2343, "step": 25438 }, { "epoch": 0.4421943715343566, "grad_norm": 1.945010224256296, "learning_rate": 6.168309677990218e-07, "loss": 0.2968, "step": 25439 }, { "epoch": 0.4422117540718594, "grad_norm": 1.8129984040749227, "learning_rate": 6.16803597509871e-07, "loss": 0.3713, "step": 25440 }, { "epoch": 0.44222913660936225, "grad_norm": 1.1893083158690307, "learning_rate": 6.167762268505068e-07, "loss": 0.206, "step": 25441 }, { "epoch": 0.4422465191468651, "grad_norm": 2.638636400050909, "learning_rate": 6.167488558210155e-07, "loss": 0.4776, "step": 25442 }, { "epoch": 0.4422639016843679, "grad_norm": 2.037220267003303, "learning_rate": 6.167214844214839e-07, "loss": 0.3391, "step": 25443 }, { "epoch": 0.4422812842218707, "grad_norm": 2.5006110775641557, "learning_rate": 6.166941126519987e-07, "loss": 0.1564, "step": 25444 }, { "epoch": 0.4422986667593735, "grad_norm": 1.5986357573536745, "learning_rate": 6.16666740512647e-07, "loss": 0.3282, "step": 25445 }, { "epoch": 0.44231604929687635, "grad_norm": 1.9257713949326962, "learning_rate": 6.166393680035152e-07, "loss": 0.2988, "step": 25446 }, { "epoch": 0.4423334318343792, "grad_norm": 1.6874398247128826, "learning_rate": 6.166119951246902e-07, "loss": 0.2752, "step": 25447 }, { "epoch": 0.442350814371882, "grad_norm": 1.7553241945499716, "learning_rate": 6.165846218762588e-07, "loss": 0.3548, "step": 25448 }, { "epoch": 0.44236819690938484, "grad_norm": 2.9389025143311627, "learning_rate": 6.165572482583077e-07, "loss": 0.6256, "step": 25449 }, { "epoch": 0.44238557944688767, "grad_norm": 1.9124034126059344, "learning_rate": 6.165298742709235e-07, "loss": 0.3846, "step": 25450 }, { "epoch": 0.4424029619843905, "grad_norm": 1.8020661499107449, "learning_rate": 6.165024999141933e-07, "loss": 0.2754, "step": 25451 }, { "epoch": 0.4424203445218933, "grad_norm": 1.6540547069067495, "learning_rate": 6.164751251882036e-07, "loss": 0.3348, "step": 25452 }, { "epoch": 0.44243772705939616, "grad_norm": 2.3513904252626023, "learning_rate": 6.164477500930413e-07, "loss": 0.2105, "step": 25453 }, { "epoch": 0.44245510959689893, "grad_norm": 2.2409032562059505, "learning_rate": 6.164203746287932e-07, "loss": 0.2057, "step": 25454 }, { "epoch": 0.44247249213440176, "grad_norm": 1.487402619263033, "learning_rate": 6.163929987955459e-07, "loss": 0.2423, "step": 25455 }, { "epoch": 0.4424898746719046, "grad_norm": 1.07905330423968, "learning_rate": 6.163656225933863e-07, "loss": 0.1668, "step": 25456 }, { "epoch": 0.4425072572094074, "grad_norm": 1.9666860081197521, "learning_rate": 6.16338246022401e-07, "loss": 0.2335, "step": 25457 }, { "epoch": 0.44252463974691025, "grad_norm": 1.5144913549131354, "learning_rate": 6.163108690826772e-07, "loss": 0.2264, "step": 25458 }, { "epoch": 0.4425420222844131, "grad_norm": 1.447881364491844, "learning_rate": 6.162834917743011e-07, "loss": 0.4044, "step": 25459 }, { "epoch": 0.4425594048219159, "grad_norm": 1.9762262693845487, "learning_rate": 6.162561140973597e-07, "loss": 0.3376, "step": 25460 }, { "epoch": 0.44257678735941874, "grad_norm": 1.8037998428190418, "learning_rate": 6.1622873605194e-07, "loss": 0.2328, "step": 25461 }, { "epoch": 0.4425941698969216, "grad_norm": 1.5965529165585293, "learning_rate": 6.162013576381287e-07, "loss": 0.1699, "step": 25462 }, { "epoch": 0.4426115524344244, "grad_norm": 1.7307216578791247, "learning_rate": 6.161739788560121e-07, "loss": 0.2633, "step": 25463 }, { "epoch": 0.4426289349719272, "grad_norm": 1.0913429655915294, "learning_rate": 6.161465997056777e-07, "loss": 0.1944, "step": 25464 }, { "epoch": 0.44264631750943, "grad_norm": 1.651771595868082, "learning_rate": 6.161192201872117e-07, "loss": 0.2952, "step": 25465 }, { "epoch": 0.44266370004693284, "grad_norm": 1.962812545887632, "learning_rate": 6.160918403007013e-07, "loss": 0.3455, "step": 25466 }, { "epoch": 0.44268108258443567, "grad_norm": 1.4824480166957537, "learning_rate": 6.16064460046233e-07, "loss": 0.1948, "step": 25467 }, { "epoch": 0.4426984651219385, "grad_norm": 1.27818869054192, "learning_rate": 6.160370794238937e-07, "loss": 0.165, "step": 25468 }, { "epoch": 0.44271584765944133, "grad_norm": 1.7706017363278383, "learning_rate": 6.160096984337701e-07, "loss": 0.3195, "step": 25469 }, { "epoch": 0.44273323019694416, "grad_norm": 1.1488547063099657, "learning_rate": 6.15982317075949e-07, "loss": 0.1377, "step": 25470 }, { "epoch": 0.442750612734447, "grad_norm": 1.3110625436171364, "learning_rate": 6.159549353505175e-07, "loss": 0.2781, "step": 25471 }, { "epoch": 0.4427679952719498, "grad_norm": 1.399871163402042, "learning_rate": 6.15927553257562e-07, "loss": 0.2563, "step": 25472 }, { "epoch": 0.44278537780945265, "grad_norm": 2.494447988824346, "learning_rate": 6.159001707971692e-07, "loss": 0.2869, "step": 25473 }, { "epoch": 0.4428027603469554, "grad_norm": 1.234053668550827, "learning_rate": 6.158727879694263e-07, "loss": 0.3465, "step": 25474 }, { "epoch": 0.44282014288445826, "grad_norm": 1.649100715195197, "learning_rate": 6.158454047744198e-07, "loss": 0.2528, "step": 25475 }, { "epoch": 0.4428375254219611, "grad_norm": 1.4758317768073188, "learning_rate": 6.158180212122367e-07, "loss": 0.6085, "step": 25476 }, { "epoch": 0.4428549079594639, "grad_norm": 1.5639967525229794, "learning_rate": 6.157906372829637e-07, "loss": 0.3784, "step": 25477 }, { "epoch": 0.44287229049696675, "grad_norm": 2.233724161069836, "learning_rate": 6.157632529866873e-07, "loss": 0.2614, "step": 25478 }, { "epoch": 0.4428896730344696, "grad_norm": 1.6119538895731733, "learning_rate": 6.157358683234948e-07, "loss": 0.3469, "step": 25479 }, { "epoch": 0.4429070555719724, "grad_norm": 1.5677854222256495, "learning_rate": 6.157084832934727e-07, "loss": 0.3194, "step": 25480 }, { "epoch": 0.44292443810947524, "grad_norm": 1.887273401898783, "learning_rate": 6.156810978967079e-07, "loss": 0.3576, "step": 25481 }, { "epoch": 0.44294182064697807, "grad_norm": 1.7254293269015515, "learning_rate": 6.15653712133287e-07, "loss": 0.2935, "step": 25482 }, { "epoch": 0.44295920318448084, "grad_norm": 1.4195914173606752, "learning_rate": 6.156263260032973e-07, "loss": 0.3522, "step": 25483 }, { "epoch": 0.4429765857219837, "grad_norm": 0.9828516965110511, "learning_rate": 6.155989395068249e-07, "loss": 0.144, "step": 25484 }, { "epoch": 0.4429939682594865, "grad_norm": 1.9763781930650692, "learning_rate": 6.155715526439571e-07, "loss": 0.3163, "step": 25485 }, { "epoch": 0.44301135079698933, "grad_norm": 1.1249730811849599, "learning_rate": 6.155441654147806e-07, "loss": 0.1736, "step": 25486 }, { "epoch": 0.44302873333449216, "grad_norm": 1.2346962387319362, "learning_rate": 6.155167778193821e-07, "loss": 0.2519, "step": 25487 }, { "epoch": 0.443046115871995, "grad_norm": 1.7606489205549694, "learning_rate": 6.154893898578485e-07, "loss": 0.3374, "step": 25488 }, { "epoch": 0.4430634984094978, "grad_norm": 1.5021357758379916, "learning_rate": 6.154620015302668e-07, "loss": 0.459, "step": 25489 }, { "epoch": 0.44308088094700065, "grad_norm": 1.3241352193610998, "learning_rate": 6.154346128367232e-07, "loss": 0.3781, "step": 25490 }, { "epoch": 0.4430982634845035, "grad_norm": 1.8755967131316218, "learning_rate": 6.154072237773051e-07, "loss": 0.216, "step": 25491 }, { "epoch": 0.4431156460220063, "grad_norm": 1.2781467434484874, "learning_rate": 6.153798343520992e-07, "loss": 0.2592, "step": 25492 }, { "epoch": 0.4431330285595091, "grad_norm": 2.629376046703899, "learning_rate": 6.153524445611922e-07, "loss": 0.3141, "step": 25493 }, { "epoch": 0.4431504110970119, "grad_norm": 1.1352547327899623, "learning_rate": 6.153250544046708e-07, "loss": 0.2888, "step": 25494 }, { "epoch": 0.44316779363451475, "grad_norm": 1.7594006925555965, "learning_rate": 6.15297663882622e-07, "loss": 0.2445, "step": 25495 }, { "epoch": 0.4431851761720176, "grad_norm": 1.4449838085942206, "learning_rate": 6.152702729951326e-07, "loss": 0.3337, "step": 25496 }, { "epoch": 0.4432025587095204, "grad_norm": 2.0161661521611207, "learning_rate": 6.152428817422896e-07, "loss": 0.2884, "step": 25497 }, { "epoch": 0.44321994124702324, "grad_norm": 1.6553239759128628, "learning_rate": 6.152154901241792e-07, "loss": 0.2773, "step": 25498 }, { "epoch": 0.44323732378452607, "grad_norm": 1.7697026246647338, "learning_rate": 6.151880981408889e-07, "loss": 0.2472, "step": 25499 }, { "epoch": 0.4432547063220289, "grad_norm": 1.9717709676158885, "learning_rate": 6.151607057925051e-07, "loss": 0.2963, "step": 25500 }, { "epoch": 0.44327208885953173, "grad_norm": 2.806722022302103, "learning_rate": 6.151333130791149e-07, "loss": 0.3964, "step": 25501 }, { "epoch": 0.44328947139703456, "grad_norm": 1.3565843723379398, "learning_rate": 6.151059200008049e-07, "loss": 0.23, "step": 25502 }, { "epoch": 0.44330685393453734, "grad_norm": 1.4147058858536667, "learning_rate": 6.150785265576619e-07, "loss": 0.4854, "step": 25503 }, { "epoch": 0.44332423647204017, "grad_norm": 1.882249601234817, "learning_rate": 6.15051132749773e-07, "loss": 0.2834, "step": 25504 }, { "epoch": 0.443341619009543, "grad_norm": 2.2354975570416977, "learning_rate": 6.150237385772249e-07, "loss": 0.3871, "step": 25505 }, { "epoch": 0.44335900154704583, "grad_norm": 2.040938633734325, "learning_rate": 6.149963440401043e-07, "loss": 0.3548, "step": 25506 }, { "epoch": 0.44337638408454866, "grad_norm": 1.4729796832307862, "learning_rate": 6.14968949138498e-07, "loss": 0.2067, "step": 25507 }, { "epoch": 0.4433937666220515, "grad_norm": 1.5928247646212508, "learning_rate": 6.149415538724932e-07, "loss": 0.3242, "step": 25508 }, { "epoch": 0.4434111491595543, "grad_norm": 1.9112584006408861, "learning_rate": 6.149141582421762e-07, "loss": 0.2677, "step": 25509 }, { "epoch": 0.44342853169705715, "grad_norm": 4.362741322440547, "learning_rate": 6.148867622476343e-07, "loss": 0.4301, "step": 25510 }, { "epoch": 0.44344591423456, "grad_norm": 1.9052307017338186, "learning_rate": 6.14859365888954e-07, "loss": 0.2668, "step": 25511 }, { "epoch": 0.4434632967720628, "grad_norm": 1.5589787040493333, "learning_rate": 6.148319691662224e-07, "loss": 0.1187, "step": 25512 }, { "epoch": 0.4434806793095656, "grad_norm": 1.440367073425211, "learning_rate": 6.148045720795261e-07, "loss": 0.3419, "step": 25513 }, { "epoch": 0.4434980618470684, "grad_norm": 1.3112546745929632, "learning_rate": 6.147771746289522e-07, "loss": 0.2548, "step": 25514 }, { "epoch": 0.44351544438457124, "grad_norm": 1.8498024354054614, "learning_rate": 6.147497768145873e-07, "loss": 0.2776, "step": 25515 }, { "epoch": 0.4435328269220741, "grad_norm": 1.44591255802784, "learning_rate": 6.147223786365182e-07, "loss": 0.2277, "step": 25516 }, { "epoch": 0.4435502094595769, "grad_norm": 2.818453970273117, "learning_rate": 6.146949800948319e-07, "loss": 0.2391, "step": 25517 }, { "epoch": 0.44356759199707974, "grad_norm": 1.911850008901015, "learning_rate": 6.146675811896154e-07, "loss": 0.2444, "step": 25518 }, { "epoch": 0.44358497453458257, "grad_norm": 0.6733187425829311, "learning_rate": 6.146401819209551e-07, "loss": 0.3185, "step": 25519 }, { "epoch": 0.4436023570720854, "grad_norm": 1.4984582500392556, "learning_rate": 6.146127822889382e-07, "loss": 0.2734, "step": 25520 }, { "epoch": 0.4436197396095882, "grad_norm": 1.5944444607210149, "learning_rate": 6.145853822936514e-07, "loss": 0.3096, "step": 25521 }, { "epoch": 0.44363712214709106, "grad_norm": 1.4566277991127021, "learning_rate": 6.145579819351817e-07, "loss": 0.3686, "step": 25522 }, { "epoch": 0.44365450468459383, "grad_norm": 3.033851223876614, "learning_rate": 6.145305812136155e-07, "loss": 0.1982, "step": 25523 }, { "epoch": 0.44367188722209666, "grad_norm": 1.9413125489098004, "learning_rate": 6.145031801290404e-07, "loss": 0.3631, "step": 25524 }, { "epoch": 0.4436892697595995, "grad_norm": 1.324920182085654, "learning_rate": 6.144757786815424e-07, "loss": 0.3019, "step": 25525 }, { "epoch": 0.4437066522971023, "grad_norm": 1.6540625764970474, "learning_rate": 6.144483768712091e-07, "loss": 0.2897, "step": 25526 }, { "epoch": 0.44372403483460515, "grad_norm": 1.9365424221276857, "learning_rate": 6.144209746981268e-07, "loss": 0.2327, "step": 25527 }, { "epoch": 0.443741417372108, "grad_norm": 1.0700609758426538, "learning_rate": 6.143935721623826e-07, "loss": 0.2571, "step": 25528 }, { "epoch": 0.4437587999096108, "grad_norm": 1.907730151035221, "learning_rate": 6.143661692640634e-07, "loss": 0.4181, "step": 25529 }, { "epoch": 0.44377618244711364, "grad_norm": 1.6106532217479426, "learning_rate": 6.14338766003256e-07, "loss": 0.3679, "step": 25530 }, { "epoch": 0.4437935649846165, "grad_norm": 1.0628727715664188, "learning_rate": 6.143113623800472e-07, "loss": 0.3482, "step": 25531 }, { "epoch": 0.4438109475221193, "grad_norm": 1.809388685623453, "learning_rate": 6.142839583945238e-07, "loss": 0.5124, "step": 25532 }, { "epoch": 0.4438283300596221, "grad_norm": 1.4904918043001112, "learning_rate": 6.142565540467729e-07, "loss": 0.2425, "step": 25533 }, { "epoch": 0.4438457125971249, "grad_norm": 1.5188020440239605, "learning_rate": 6.14229149336881e-07, "loss": 0.5761, "step": 25534 }, { "epoch": 0.44386309513462774, "grad_norm": 3.6683396044925867, "learning_rate": 6.142017442649354e-07, "loss": 0.3152, "step": 25535 }, { "epoch": 0.44388047767213057, "grad_norm": 2.9160346404605337, "learning_rate": 6.141743388310225e-07, "loss": 0.4246, "step": 25536 }, { "epoch": 0.4438978602096334, "grad_norm": 1.8959163775263361, "learning_rate": 6.141469330352298e-07, "loss": 0.3097, "step": 25537 }, { "epoch": 0.44391524274713623, "grad_norm": 1.1592370555902227, "learning_rate": 6.141195268776433e-07, "loss": 0.2397, "step": 25538 }, { "epoch": 0.44393262528463906, "grad_norm": 2.0400235128256647, "learning_rate": 6.140921203583506e-07, "loss": 0.3512, "step": 25539 }, { "epoch": 0.4439500078221419, "grad_norm": 2.5957724018706676, "learning_rate": 6.140647134774381e-07, "loss": 0.2728, "step": 25540 }, { "epoch": 0.4439673903596447, "grad_norm": 1.5393833215626425, "learning_rate": 6.140373062349931e-07, "loss": 0.3451, "step": 25541 }, { "epoch": 0.44398477289714755, "grad_norm": 2.655384364796522, "learning_rate": 6.14009898631102e-07, "loss": 0.5821, "step": 25542 }, { "epoch": 0.4440021554346503, "grad_norm": 1.5332359282643255, "learning_rate": 6.13982490665852e-07, "loss": 0.2159, "step": 25543 }, { "epoch": 0.44401953797215316, "grad_norm": 2.820314551287503, "learning_rate": 6.139550823393298e-07, "loss": 0.3042, "step": 25544 }, { "epoch": 0.444036920509656, "grad_norm": 1.2387509177383986, "learning_rate": 6.139276736516223e-07, "loss": 0.2123, "step": 25545 }, { "epoch": 0.4440543030471588, "grad_norm": 1.2091500437657348, "learning_rate": 6.139002646028166e-07, "loss": 0.3326, "step": 25546 }, { "epoch": 0.44407168558466165, "grad_norm": 1.626951477244659, "learning_rate": 6.138728551929993e-07, "loss": 0.2311, "step": 25547 }, { "epoch": 0.4440890681221645, "grad_norm": 1.290158194961676, "learning_rate": 6.138454454222572e-07, "loss": 0.2686, "step": 25548 }, { "epoch": 0.4441064506596673, "grad_norm": 1.1626106627515904, "learning_rate": 6.138180352906776e-07, "loss": 0.4034, "step": 25549 }, { "epoch": 0.44412383319717014, "grad_norm": 1.4662660191349313, "learning_rate": 6.137906247983469e-07, "loss": 0.2804, "step": 25550 }, { "epoch": 0.44414121573467297, "grad_norm": 2.0735185170555863, "learning_rate": 6.137632139453522e-07, "loss": 0.2754, "step": 25551 }, { "epoch": 0.4441585982721758, "grad_norm": 1.6308089524532219, "learning_rate": 6.137358027317804e-07, "loss": 0.2424, "step": 25552 }, { "epoch": 0.44417598080967857, "grad_norm": 1.9060178778129475, "learning_rate": 6.137083911577186e-07, "loss": 0.2568, "step": 25553 }, { "epoch": 0.4441933633471814, "grad_norm": 1.9939259663223283, "learning_rate": 6.13680979223253e-07, "loss": 0.3472, "step": 25554 }, { "epoch": 0.44421074588468423, "grad_norm": 1.1802231812611321, "learning_rate": 6.136535669284711e-07, "loss": 0.3385, "step": 25555 }, { "epoch": 0.44422812842218706, "grad_norm": 1.4152245683081497, "learning_rate": 6.136261542734598e-07, "loss": 0.316, "step": 25556 }, { "epoch": 0.4442455109596899, "grad_norm": 2.1008373284797464, "learning_rate": 6.135987412583056e-07, "loss": 0.22, "step": 25557 }, { "epoch": 0.4442628934971927, "grad_norm": 3.095917149109336, "learning_rate": 6.135713278830954e-07, "loss": 0.3727, "step": 25558 }, { "epoch": 0.44428027603469555, "grad_norm": 1.43476167344546, "learning_rate": 6.135439141479165e-07, "loss": 0.3346, "step": 25559 }, { "epoch": 0.4442976585721984, "grad_norm": 1.8578296902615257, "learning_rate": 6.135165000528555e-07, "loss": 0.2275, "step": 25560 }, { "epoch": 0.4443150411097012, "grad_norm": 2.7509849359171965, "learning_rate": 6.134890855979994e-07, "loss": 0.3804, "step": 25561 }, { "epoch": 0.44433242364720404, "grad_norm": 1.9052614984081298, "learning_rate": 6.134616707834351e-07, "loss": 0.253, "step": 25562 }, { "epoch": 0.4443498061847068, "grad_norm": 1.8286898448477455, "learning_rate": 6.134342556092491e-07, "loss": 0.324, "step": 25563 }, { "epoch": 0.44436718872220965, "grad_norm": 1.7744002374762353, "learning_rate": 6.134068400755288e-07, "loss": 0.2891, "step": 25564 }, { "epoch": 0.4443845712597125, "grad_norm": 2.163618867844693, "learning_rate": 6.13379424182361e-07, "loss": 0.1504, "step": 25565 }, { "epoch": 0.4444019537972153, "grad_norm": 2.035979002472784, "learning_rate": 6.133520079298324e-07, "loss": 0.2884, "step": 25566 }, { "epoch": 0.44441933633471814, "grad_norm": 1.8579358748869632, "learning_rate": 6.133245913180299e-07, "loss": 0.3684, "step": 25567 }, { "epoch": 0.44443671887222097, "grad_norm": 1.9856416591787796, "learning_rate": 6.132971743470409e-07, "loss": 0.3178, "step": 25568 }, { "epoch": 0.4444541014097238, "grad_norm": 2.1531662721969536, "learning_rate": 6.132697570169515e-07, "loss": 0.387, "step": 25569 }, { "epoch": 0.44447148394722663, "grad_norm": 1.72559534212955, "learning_rate": 6.132423393278491e-07, "loss": 0.5224, "step": 25570 }, { "epoch": 0.44448886648472946, "grad_norm": 1.6668224479958431, "learning_rate": 6.132149212798206e-07, "loss": 0.2389, "step": 25571 }, { "epoch": 0.4445062490222323, "grad_norm": 1.5221654452653643, "learning_rate": 6.131875028729529e-07, "loss": 0.2571, "step": 25572 }, { "epoch": 0.44452363155973507, "grad_norm": 3.080199692825623, "learning_rate": 6.131600841073325e-07, "loss": 0.3659, "step": 25573 }, { "epoch": 0.4445410140972379, "grad_norm": 3.052299058106799, "learning_rate": 6.131326649830469e-07, "loss": 0.2044, "step": 25574 }, { "epoch": 0.4445583966347407, "grad_norm": 2.721217994990492, "learning_rate": 6.131052455001825e-07, "loss": 0.4276, "step": 25575 }, { "epoch": 0.44457577917224356, "grad_norm": 2.3291476636035053, "learning_rate": 6.130778256588265e-07, "loss": 0.2458, "step": 25576 }, { "epoch": 0.4445931617097464, "grad_norm": 2.3115279014140433, "learning_rate": 6.130504054590657e-07, "loss": 0.3526, "step": 25577 }, { "epoch": 0.4446105442472492, "grad_norm": 1.8029366888119973, "learning_rate": 6.130229849009873e-07, "loss": 0.3565, "step": 25578 }, { "epoch": 0.44462792678475205, "grad_norm": 5.386208666285501, "learning_rate": 6.129955639846777e-07, "loss": 0.3667, "step": 25579 }, { "epoch": 0.4446453093222549, "grad_norm": 1.4845221186530355, "learning_rate": 6.129681427102242e-07, "loss": 0.1751, "step": 25580 }, { "epoch": 0.4446626918597577, "grad_norm": 1.860809915831955, "learning_rate": 6.129407210777135e-07, "loss": 0.371, "step": 25581 }, { "epoch": 0.44468007439726054, "grad_norm": 1.2571102348419783, "learning_rate": 6.129132990872326e-07, "loss": 0.3154, "step": 25582 }, { "epoch": 0.4446974569347633, "grad_norm": 1.4492569148693093, "learning_rate": 6.128858767388682e-07, "loss": 0.146, "step": 25583 }, { "epoch": 0.44471483947226614, "grad_norm": 1.6037631617302266, "learning_rate": 6.128584540327077e-07, "loss": 0.3495, "step": 25584 }, { "epoch": 0.444732222009769, "grad_norm": 3.121695170092249, "learning_rate": 6.128310309688377e-07, "loss": 0.4599, "step": 25585 }, { "epoch": 0.4447496045472718, "grad_norm": 1.569377299805918, "learning_rate": 6.128036075473452e-07, "loss": 0.6619, "step": 25586 }, { "epoch": 0.44476698708477463, "grad_norm": 1.7188725545586507, "learning_rate": 6.127761837683171e-07, "loss": 0.4433, "step": 25587 }, { "epoch": 0.44478436962227746, "grad_norm": 1.3961007541961714, "learning_rate": 6.127487596318401e-07, "loss": 0.1814, "step": 25588 }, { "epoch": 0.4448017521597803, "grad_norm": 2.0212306450433695, "learning_rate": 6.127213351380015e-07, "loss": 0.4042, "step": 25589 }, { "epoch": 0.4448191346972831, "grad_norm": 1.5292521364763023, "learning_rate": 6.12693910286888e-07, "loss": 0.249, "step": 25590 }, { "epoch": 0.44483651723478596, "grad_norm": 1.6829132661075796, "learning_rate": 6.126664850785866e-07, "loss": 0.3015, "step": 25591 }, { "epoch": 0.4448538997722888, "grad_norm": 1.7209485519806555, "learning_rate": 6.126390595131841e-07, "loss": 0.3025, "step": 25592 }, { "epoch": 0.44487128230979156, "grad_norm": 1.3799302601682484, "learning_rate": 6.126116335907676e-07, "loss": 0.2889, "step": 25593 }, { "epoch": 0.4448886648472944, "grad_norm": 1.7408936250432248, "learning_rate": 6.12584207311424e-07, "loss": 0.2462, "step": 25594 }, { "epoch": 0.4449060473847972, "grad_norm": 1.4085699630971962, "learning_rate": 6.125567806752402e-07, "loss": 0.3502, "step": 25595 }, { "epoch": 0.44492342992230005, "grad_norm": 1.5551429358084683, "learning_rate": 6.125293536823029e-07, "loss": 0.2309, "step": 25596 }, { "epoch": 0.4449408124598029, "grad_norm": 1.45130476572308, "learning_rate": 6.125019263326995e-07, "loss": 0.264, "step": 25597 }, { "epoch": 0.4449581949973057, "grad_norm": 1.2319999499685625, "learning_rate": 6.124744986265165e-07, "loss": 0.2857, "step": 25598 }, { "epoch": 0.44497557753480854, "grad_norm": 1.3370600666234604, "learning_rate": 6.124470705638411e-07, "loss": 0.2052, "step": 25599 }, { "epoch": 0.44499296007231137, "grad_norm": 1.8628497838556548, "learning_rate": 6.124196421447601e-07, "loss": 0.2869, "step": 25600 }, { "epoch": 0.4450103426098142, "grad_norm": 1.7980344414246625, "learning_rate": 6.123922133693604e-07, "loss": 0.2955, "step": 25601 }, { "epoch": 0.44502772514731703, "grad_norm": 2.396906167292486, "learning_rate": 6.123647842377291e-07, "loss": 0.5062, "step": 25602 }, { "epoch": 0.4450451076848198, "grad_norm": 1.537462739071424, "learning_rate": 6.123373547499532e-07, "loss": 0.307, "step": 25603 }, { "epoch": 0.44506249022232264, "grad_norm": 1.7252713327463969, "learning_rate": 6.123099249061193e-07, "loss": 0.4293, "step": 25604 }, { "epoch": 0.44507987275982547, "grad_norm": 1.419642904975254, "learning_rate": 6.122824947063145e-07, "loss": 0.2557, "step": 25605 }, { "epoch": 0.4450972552973283, "grad_norm": 2.0523454255336904, "learning_rate": 6.122550641506257e-07, "loss": 0.4216, "step": 25606 }, { "epoch": 0.44511463783483113, "grad_norm": 1.6334616373849644, "learning_rate": 6.122276332391402e-07, "loss": 0.2846, "step": 25607 }, { "epoch": 0.44513202037233396, "grad_norm": 1.3971924951466812, "learning_rate": 6.122002019719443e-07, "loss": 0.3388, "step": 25608 }, { "epoch": 0.4451494029098368, "grad_norm": 1.803443675827729, "learning_rate": 6.121727703491258e-07, "loss": 0.1905, "step": 25609 }, { "epoch": 0.4451667854473396, "grad_norm": 1.6435698123866493, "learning_rate": 6.121453383707706e-07, "loss": 0.2764, "step": 25610 }, { "epoch": 0.44518416798484245, "grad_norm": 1.731740799767608, "learning_rate": 6.121179060369664e-07, "loss": 0.307, "step": 25611 }, { "epoch": 0.4452015505223453, "grad_norm": 2.0327291496178974, "learning_rate": 6.120904733478e-07, "loss": 0.2194, "step": 25612 }, { "epoch": 0.44521893305984805, "grad_norm": 1.1022276344685185, "learning_rate": 6.120630403033584e-07, "loss": 0.3423, "step": 25613 }, { "epoch": 0.4452363155973509, "grad_norm": 1.4265990043966896, "learning_rate": 6.120356069037283e-07, "loss": 0.2743, "step": 25614 }, { "epoch": 0.4452536981348537, "grad_norm": 1.1789160755196175, "learning_rate": 6.120081731489966e-07, "loss": 0.3906, "step": 25615 }, { "epoch": 0.44527108067235655, "grad_norm": 3.7559885133320465, "learning_rate": 6.119807390392507e-07, "loss": 0.3183, "step": 25616 }, { "epoch": 0.4452884632098594, "grad_norm": 2.3058600091743666, "learning_rate": 6.119533045745773e-07, "loss": 0.3414, "step": 25617 }, { "epoch": 0.4453058457473622, "grad_norm": 1.348096545739589, "learning_rate": 6.119258697550632e-07, "loss": 0.1849, "step": 25618 }, { "epoch": 0.44532322828486504, "grad_norm": 2.986459817879661, "learning_rate": 6.118984345807956e-07, "loss": 0.4538, "step": 25619 }, { "epoch": 0.44534061082236787, "grad_norm": 8.630601186609727, "learning_rate": 6.118709990518613e-07, "loss": 0.4422, "step": 25620 }, { "epoch": 0.4453579933598707, "grad_norm": 1.5982967258877159, "learning_rate": 6.118435631683474e-07, "loss": 0.1332, "step": 25621 }, { "epoch": 0.44537537589737347, "grad_norm": 2.620538880481662, "learning_rate": 6.118161269303409e-07, "loss": 0.3301, "step": 25622 }, { "epoch": 0.4453927584348763, "grad_norm": 1.4956721087178275, "learning_rate": 6.117886903379283e-07, "loss": 0.4359, "step": 25623 }, { "epoch": 0.44541014097237913, "grad_norm": 3.2251497137081846, "learning_rate": 6.117612533911973e-07, "loss": 0.2598, "step": 25624 }, { "epoch": 0.44542752350988196, "grad_norm": 1.3689617643308798, "learning_rate": 6.117338160902343e-07, "loss": 0.5659, "step": 25625 }, { "epoch": 0.4454449060473848, "grad_norm": 1.654134433580657, "learning_rate": 6.117063784351263e-07, "loss": 0.351, "step": 25626 }, { "epoch": 0.4454622885848876, "grad_norm": 1.1382647289117704, "learning_rate": 6.116789404259604e-07, "loss": 0.2499, "step": 25627 }, { "epoch": 0.44547967112239045, "grad_norm": 2.3460140236558757, "learning_rate": 6.116515020628237e-07, "loss": 0.329, "step": 25628 }, { "epoch": 0.4454970536598933, "grad_norm": 1.3626757225811268, "learning_rate": 6.116240633458029e-07, "loss": 0.2711, "step": 25629 }, { "epoch": 0.4455144361973961, "grad_norm": 3.3035924805130494, "learning_rate": 6.115966242749853e-07, "loss": 0.5331, "step": 25630 }, { "epoch": 0.44553181873489894, "grad_norm": 1.3516401361686123, "learning_rate": 6.115691848504576e-07, "loss": 0.3336, "step": 25631 }, { "epoch": 0.4455492012724017, "grad_norm": 1.582518881760563, "learning_rate": 6.115417450723069e-07, "loss": 0.1996, "step": 25632 }, { "epoch": 0.44556658380990455, "grad_norm": 1.2504828631755622, "learning_rate": 6.115143049406197e-07, "loss": 0.2569, "step": 25633 }, { "epoch": 0.4455839663474074, "grad_norm": 1.3427535841594074, "learning_rate": 6.114868644554838e-07, "loss": 0.2431, "step": 25634 }, { "epoch": 0.4456013488849102, "grad_norm": 1.8917938302808086, "learning_rate": 6.114594236169855e-07, "loss": 0.1271, "step": 25635 }, { "epoch": 0.44561873142241304, "grad_norm": 2.002707170458496, "learning_rate": 6.114319824252122e-07, "loss": 0.485, "step": 25636 }, { "epoch": 0.44563611395991587, "grad_norm": 1.7525074955248636, "learning_rate": 6.114045408802507e-07, "loss": 0.3953, "step": 25637 }, { "epoch": 0.4456534964974187, "grad_norm": 1.5253033796526516, "learning_rate": 6.113770989821879e-07, "loss": 0.2785, "step": 25638 }, { "epoch": 0.44567087903492153, "grad_norm": 2.069522990130631, "learning_rate": 6.113496567311109e-07, "loss": 0.342, "step": 25639 }, { "epoch": 0.44568826157242436, "grad_norm": 2.18205774421206, "learning_rate": 6.113222141271066e-07, "loss": 0.4561, "step": 25640 }, { "epoch": 0.4457056441099272, "grad_norm": 1.256430062719546, "learning_rate": 6.11294771170262e-07, "loss": 0.3304, "step": 25641 }, { "epoch": 0.44572302664742997, "grad_norm": 2.9059675217666556, "learning_rate": 6.112673278606643e-07, "loss": 0.4309, "step": 25642 }, { "epoch": 0.4457404091849328, "grad_norm": 2.0165261342128082, "learning_rate": 6.112398841984e-07, "loss": 0.2769, "step": 25643 }, { "epoch": 0.4457577917224356, "grad_norm": 1.8140710864820915, "learning_rate": 6.112124401835565e-07, "loss": 0.2144, "step": 25644 }, { "epoch": 0.44577517425993846, "grad_norm": 1.5862842317321104, "learning_rate": 6.111849958162206e-07, "loss": 0.3152, "step": 25645 }, { "epoch": 0.4457925567974413, "grad_norm": 1.3663807367056853, "learning_rate": 6.111575510964795e-07, "loss": 0.2494, "step": 25646 }, { "epoch": 0.4458099393349441, "grad_norm": 1.728349520959388, "learning_rate": 6.111301060244199e-07, "loss": 0.3771, "step": 25647 }, { "epoch": 0.44582732187244695, "grad_norm": 1.8932589552258832, "learning_rate": 6.111026606001289e-07, "loss": 0.2909, "step": 25648 }, { "epoch": 0.4458447044099498, "grad_norm": 1.922387871461032, "learning_rate": 6.110752148236934e-07, "loss": 0.2856, "step": 25649 }, { "epoch": 0.4458620869474526, "grad_norm": 2.0385776676236413, "learning_rate": 6.110477686952007e-07, "loss": 0.5104, "step": 25650 }, { "epoch": 0.44587946948495544, "grad_norm": 1.3758638748521193, "learning_rate": 6.110203222147374e-07, "loss": 0.1837, "step": 25651 }, { "epoch": 0.4458968520224582, "grad_norm": 1.9421307705752744, "learning_rate": 6.109928753823907e-07, "loss": 0.3261, "step": 25652 }, { "epoch": 0.44591423455996104, "grad_norm": 1.4316331998513643, "learning_rate": 6.109654281982478e-07, "loss": 0.2289, "step": 25653 }, { "epoch": 0.4459316170974639, "grad_norm": 1.746548340063433, "learning_rate": 6.109379806623952e-07, "loss": 0.3318, "step": 25654 }, { "epoch": 0.4459489996349667, "grad_norm": 1.849867566376886, "learning_rate": 6.109105327749203e-07, "loss": 0.218, "step": 25655 }, { "epoch": 0.44596638217246953, "grad_norm": 1.7900109873561918, "learning_rate": 6.108830845359099e-07, "loss": 0.3803, "step": 25656 }, { "epoch": 0.44598376470997236, "grad_norm": 2.801821203983367, "learning_rate": 6.108556359454512e-07, "loss": 0.3008, "step": 25657 }, { "epoch": 0.4460011472474752, "grad_norm": 3.5966070495596325, "learning_rate": 6.108281870036308e-07, "loss": 0.3683, "step": 25658 }, { "epoch": 0.446018529784978, "grad_norm": 4.171886457134733, "learning_rate": 6.108007377105362e-07, "loss": 0.3931, "step": 25659 }, { "epoch": 0.44603591232248085, "grad_norm": 1.501727051220171, "learning_rate": 6.107732880662541e-07, "loss": 0.2347, "step": 25660 }, { "epoch": 0.4460532948599837, "grad_norm": 1.4193508220129778, "learning_rate": 6.107458380708715e-07, "loss": 0.4066, "step": 25661 }, { "epoch": 0.44607067739748646, "grad_norm": 1.6748978339030711, "learning_rate": 6.107183877244754e-07, "loss": 0.3131, "step": 25662 }, { "epoch": 0.4460880599349893, "grad_norm": 2.004618020748145, "learning_rate": 6.106909370271531e-07, "loss": 0.3976, "step": 25663 }, { "epoch": 0.4461054424724921, "grad_norm": 3.579495982980916, "learning_rate": 6.106634859789912e-07, "loss": 0.3842, "step": 25664 }, { "epoch": 0.44612282500999495, "grad_norm": 1.1028139739759184, "learning_rate": 6.106360345800769e-07, "loss": 0.2349, "step": 25665 }, { "epoch": 0.4461402075474978, "grad_norm": 1.2475444420591966, "learning_rate": 6.106085828304972e-07, "loss": 0.2369, "step": 25666 }, { "epoch": 0.4461575900850006, "grad_norm": 2.294017671637838, "learning_rate": 6.105811307303391e-07, "loss": 0.3937, "step": 25667 }, { "epoch": 0.44617497262250344, "grad_norm": 1.5086509772942254, "learning_rate": 6.105536782796897e-07, "loss": 0.1969, "step": 25668 }, { "epoch": 0.44619235516000627, "grad_norm": 1.6516396856262923, "learning_rate": 6.105262254786357e-07, "loss": 0.2681, "step": 25669 }, { "epoch": 0.4462097376975091, "grad_norm": 1.1971947827356908, "learning_rate": 6.104987723272645e-07, "loss": 0.2382, "step": 25670 }, { "epoch": 0.44622712023501193, "grad_norm": 0.9458506052611522, "learning_rate": 6.104713188256631e-07, "loss": 0.2094, "step": 25671 }, { "epoch": 0.4462445027725147, "grad_norm": 1.1784734792355456, "learning_rate": 6.10443864973918e-07, "loss": 0.1685, "step": 25672 }, { "epoch": 0.44626188531001754, "grad_norm": 1.807152843745391, "learning_rate": 6.104164107721169e-07, "loss": 0.3297, "step": 25673 }, { "epoch": 0.44627926784752037, "grad_norm": 1.187270785789986, "learning_rate": 6.103889562203464e-07, "loss": 0.1987, "step": 25674 }, { "epoch": 0.4462966503850232, "grad_norm": 1.1990587595609246, "learning_rate": 6.103615013186936e-07, "loss": 0.1552, "step": 25675 }, { "epoch": 0.446314032922526, "grad_norm": 1.2268189789506312, "learning_rate": 6.103340460672456e-07, "loss": 0.2234, "step": 25676 }, { "epoch": 0.44633141546002886, "grad_norm": 1.3451021781647545, "learning_rate": 6.103065904660891e-07, "loss": 0.3856, "step": 25677 }, { "epoch": 0.4463487979975317, "grad_norm": 2.230332356041407, "learning_rate": 6.102791345153117e-07, "loss": 0.2796, "step": 25678 }, { "epoch": 0.4463661805350345, "grad_norm": 2.6506490990543807, "learning_rate": 6.10251678215e-07, "loss": 0.3553, "step": 25679 }, { "epoch": 0.44638356307253735, "grad_norm": 3.5775995905833953, "learning_rate": 6.102242215652412e-07, "loss": 0.288, "step": 25680 }, { "epoch": 0.4464009456100402, "grad_norm": 1.4321556868940983, "learning_rate": 6.101967645661219e-07, "loss": 0.693, "step": 25681 }, { "epoch": 0.44641832814754295, "grad_norm": 1.5393916143192357, "learning_rate": 6.1016930721773e-07, "loss": 0.248, "step": 25682 }, { "epoch": 0.4464357106850458, "grad_norm": 1.1197562493770132, "learning_rate": 6.101418495201516e-07, "loss": 0.2986, "step": 25683 }, { "epoch": 0.4464530932225486, "grad_norm": 2.575868246562598, "learning_rate": 6.101143914734743e-07, "loss": 0.278, "step": 25684 }, { "epoch": 0.44647047576005144, "grad_norm": 3.6738137547318948, "learning_rate": 6.10086933077785e-07, "loss": 0.4143, "step": 25685 }, { "epoch": 0.4464878582975543, "grad_norm": 3.5669657467150513, "learning_rate": 6.100594743331705e-07, "loss": 0.4946, "step": 25686 }, { "epoch": 0.4465052408350571, "grad_norm": 1.8001499787897715, "learning_rate": 6.100320152397181e-07, "loss": 0.3371, "step": 25687 }, { "epoch": 0.44652262337255993, "grad_norm": 1.6667728817257068, "learning_rate": 6.10004555797515e-07, "loss": 0.2484, "step": 25688 }, { "epoch": 0.44654000591006276, "grad_norm": 1.3230957434898016, "learning_rate": 6.099770960066476e-07, "loss": 0.2129, "step": 25689 }, { "epoch": 0.4465573884475656, "grad_norm": 1.1123105279607535, "learning_rate": 6.099496358672036e-07, "loss": 0.1864, "step": 25690 }, { "epoch": 0.4465747709850684, "grad_norm": 1.1720827495615314, "learning_rate": 6.099221753792696e-07, "loss": 0.2764, "step": 25691 }, { "epoch": 0.4465921535225712, "grad_norm": 1.791379995441989, "learning_rate": 6.098947145429329e-07, "loss": 0.1671, "step": 25692 }, { "epoch": 0.44660953606007403, "grad_norm": 1.0072136008275718, "learning_rate": 6.098672533582804e-07, "loss": 0.3115, "step": 25693 }, { "epoch": 0.44662691859757686, "grad_norm": 1.7708944459990883, "learning_rate": 6.098397918253992e-07, "loss": 0.2469, "step": 25694 }, { "epoch": 0.4466443011350797, "grad_norm": 1.1390279227456768, "learning_rate": 6.098123299443762e-07, "loss": 0.2558, "step": 25695 }, { "epoch": 0.4466616836725825, "grad_norm": 1.1862846757147174, "learning_rate": 6.097848677152988e-07, "loss": 0.2796, "step": 25696 }, { "epoch": 0.44667906621008535, "grad_norm": 2.1685157040970293, "learning_rate": 6.097574051382536e-07, "loss": 0.3757, "step": 25697 }, { "epoch": 0.4466964487475882, "grad_norm": 1.4717734469867114, "learning_rate": 6.097299422133279e-07, "loss": 0.2747, "step": 25698 }, { "epoch": 0.446713831285091, "grad_norm": 1.7218421550885465, "learning_rate": 6.097024789406086e-07, "loss": 0.4175, "step": 25699 }, { "epoch": 0.44673121382259384, "grad_norm": 1.70109624505769, "learning_rate": 6.096750153201831e-07, "loss": 0.2318, "step": 25700 }, { "epoch": 0.4467485963600967, "grad_norm": 3.651391765150428, "learning_rate": 6.096475513521379e-07, "loss": 0.5935, "step": 25701 }, { "epoch": 0.44676597889759945, "grad_norm": 1.9484977330094113, "learning_rate": 6.096200870365606e-07, "loss": 0.2325, "step": 25702 }, { "epoch": 0.4467833614351023, "grad_norm": 2.5248540608801435, "learning_rate": 6.095926223735377e-07, "loss": 0.4813, "step": 25703 }, { "epoch": 0.4468007439726051, "grad_norm": 0.9244677013550079, "learning_rate": 6.095651573631566e-07, "loss": 0.3376, "step": 25704 }, { "epoch": 0.44681812651010794, "grad_norm": 1.1922492244601584, "learning_rate": 6.095376920055044e-07, "loss": 0.4791, "step": 25705 }, { "epoch": 0.44683550904761077, "grad_norm": 2.337665492093883, "learning_rate": 6.09510226300668e-07, "loss": 0.2375, "step": 25706 }, { "epoch": 0.4468528915851136, "grad_norm": 1.4649204719340998, "learning_rate": 6.094827602487345e-07, "loss": 0.3112, "step": 25707 }, { "epoch": 0.44687027412261643, "grad_norm": 1.22501154296578, "learning_rate": 6.094552938497908e-07, "loss": 0.2902, "step": 25708 }, { "epoch": 0.44688765666011926, "grad_norm": 0.8777316486161948, "learning_rate": 6.094278271039241e-07, "loss": 0.1546, "step": 25709 }, { "epoch": 0.4469050391976221, "grad_norm": 1.447484120451948, "learning_rate": 6.094003600112217e-07, "loss": 0.2668, "step": 25710 }, { "epoch": 0.4469224217351249, "grad_norm": 3.7006370444086953, "learning_rate": 6.093728925717703e-07, "loss": 0.2611, "step": 25711 }, { "epoch": 0.4469398042726277, "grad_norm": 1.1204928881151766, "learning_rate": 6.093454247856569e-07, "loss": 0.3743, "step": 25712 }, { "epoch": 0.4469571868101305, "grad_norm": 4.497440937864408, "learning_rate": 6.09317956652969e-07, "loss": 0.4317, "step": 25713 }, { "epoch": 0.44697456934763335, "grad_norm": 1.1157185159306537, "learning_rate": 6.092904881737931e-07, "loss": 0.2566, "step": 25714 }, { "epoch": 0.4469919518851362, "grad_norm": 3.6125099387038793, "learning_rate": 6.092630193482168e-07, "loss": 0.401, "step": 25715 }, { "epoch": 0.447009334422639, "grad_norm": 2.5671539329625284, "learning_rate": 6.092355501763268e-07, "loss": 0.4338, "step": 25716 }, { "epoch": 0.44702671696014185, "grad_norm": 1.8726638269660565, "learning_rate": 6.092080806582104e-07, "loss": 0.2923, "step": 25717 }, { "epoch": 0.4470440994976447, "grad_norm": 2.0454527516409007, "learning_rate": 6.091806107939544e-07, "loss": 0.3055, "step": 25718 }, { "epoch": 0.4470614820351475, "grad_norm": 1.6658078845191022, "learning_rate": 6.091531405836461e-07, "loss": 0.2115, "step": 25719 }, { "epoch": 0.44707886457265034, "grad_norm": 1.3930238427828288, "learning_rate": 6.091256700273725e-07, "loss": 0.2235, "step": 25720 }, { "epoch": 0.44709624711015317, "grad_norm": 1.0218884237250694, "learning_rate": 6.090981991252206e-07, "loss": 0.3973, "step": 25721 }, { "epoch": 0.44711362964765594, "grad_norm": 1.5221151350548352, "learning_rate": 6.090707278772776e-07, "loss": 0.2143, "step": 25722 }, { "epoch": 0.44713101218515877, "grad_norm": 2.2140373332009786, "learning_rate": 6.090432562836305e-07, "loss": 0.3773, "step": 25723 }, { "epoch": 0.4471483947226616, "grad_norm": 2.131945162572198, "learning_rate": 6.090157843443662e-07, "loss": 0.3291, "step": 25724 }, { "epoch": 0.44716577726016443, "grad_norm": 1.7476868945556747, "learning_rate": 6.08988312059572e-07, "loss": 0.3561, "step": 25725 }, { "epoch": 0.44718315979766726, "grad_norm": 2.3251746394193846, "learning_rate": 6.089608394293351e-07, "loss": 0.2097, "step": 25726 }, { "epoch": 0.4472005423351701, "grad_norm": 1.292782515241699, "learning_rate": 6.089333664537422e-07, "loss": 0.3503, "step": 25727 }, { "epoch": 0.4472179248726729, "grad_norm": 1.3766488492863846, "learning_rate": 6.089058931328805e-07, "loss": 0.2599, "step": 25728 }, { "epoch": 0.44723530741017575, "grad_norm": 1.5633185446320421, "learning_rate": 6.088784194668374e-07, "loss": 0.4327, "step": 25729 }, { "epoch": 0.4472526899476786, "grad_norm": 1.4499964852957519, "learning_rate": 6.088509454556996e-07, "loss": 0.2665, "step": 25730 }, { "epoch": 0.4472700724851814, "grad_norm": 1.1360558665929195, "learning_rate": 6.088234710995543e-07, "loss": 0.2184, "step": 25731 }, { "epoch": 0.4472874550226842, "grad_norm": 1.4545856963394235, "learning_rate": 6.087959963984885e-07, "loss": 0.195, "step": 25732 }, { "epoch": 0.447304837560187, "grad_norm": 1.6052843251587319, "learning_rate": 6.087685213525895e-07, "loss": 0.2481, "step": 25733 }, { "epoch": 0.44732222009768985, "grad_norm": 1.403752744133443, "learning_rate": 6.087410459619443e-07, "loss": 0.3811, "step": 25734 }, { "epoch": 0.4473396026351927, "grad_norm": 1.5977999875073805, "learning_rate": 6.087135702266398e-07, "loss": 0.2702, "step": 25735 }, { "epoch": 0.4473569851726955, "grad_norm": 1.3485699191672396, "learning_rate": 6.086860941467633e-07, "loss": 0.2496, "step": 25736 }, { "epoch": 0.44737436771019834, "grad_norm": 1.1701382955171236, "learning_rate": 6.086586177224018e-07, "loss": 0.2386, "step": 25737 }, { "epoch": 0.44739175024770117, "grad_norm": 3.579221493671556, "learning_rate": 6.086311409536423e-07, "loss": 0.3298, "step": 25738 }, { "epoch": 0.447409132785204, "grad_norm": 1.81852357336786, "learning_rate": 6.086036638405721e-07, "loss": 0.3241, "step": 25739 }, { "epoch": 0.44742651532270683, "grad_norm": 1.5320569469163166, "learning_rate": 6.085761863832782e-07, "loss": 0.372, "step": 25740 }, { "epoch": 0.44744389786020966, "grad_norm": 1.8733233223237555, "learning_rate": 6.085487085818475e-07, "loss": 0.3169, "step": 25741 }, { "epoch": 0.44746128039771244, "grad_norm": 2.5941080039116295, "learning_rate": 6.085212304363675e-07, "loss": 0.2833, "step": 25742 }, { "epoch": 0.44747866293521527, "grad_norm": 3.159127983580715, "learning_rate": 6.084937519469248e-07, "loss": 0.3202, "step": 25743 }, { "epoch": 0.4474960454727181, "grad_norm": 2.3242083938021914, "learning_rate": 6.084662731136068e-07, "loss": 0.4967, "step": 25744 }, { "epoch": 0.4475134280102209, "grad_norm": 1.0781241519326257, "learning_rate": 6.084387939365007e-07, "loss": 0.2405, "step": 25745 }, { "epoch": 0.44753081054772376, "grad_norm": 1.8122541322768801, "learning_rate": 6.084113144156933e-07, "loss": 0.2627, "step": 25746 }, { "epoch": 0.4475481930852266, "grad_norm": 1.6293345015062088, "learning_rate": 6.083838345512717e-07, "loss": 0.2268, "step": 25747 }, { "epoch": 0.4475655756227294, "grad_norm": 3.023323826921086, "learning_rate": 6.083563543433235e-07, "loss": 0.2615, "step": 25748 }, { "epoch": 0.44758295816023225, "grad_norm": 2.0031025354270753, "learning_rate": 6.083288737919351e-07, "loss": 0.3458, "step": 25749 }, { "epoch": 0.4476003406977351, "grad_norm": 3.362150944108998, "learning_rate": 6.083013928971941e-07, "loss": 0.3204, "step": 25750 }, { "epoch": 0.4476177232352379, "grad_norm": 1.4183336262276904, "learning_rate": 6.082739116591874e-07, "loss": 0.2399, "step": 25751 }, { "epoch": 0.4476351057727407, "grad_norm": 1.7977302370889139, "learning_rate": 6.082464300780021e-07, "loss": 0.2392, "step": 25752 }, { "epoch": 0.4476524883102435, "grad_norm": 2.858378101262348, "learning_rate": 6.082189481537253e-07, "loss": 0.2802, "step": 25753 }, { "epoch": 0.44766987084774634, "grad_norm": 1.7643167233456367, "learning_rate": 6.081914658864443e-07, "loss": 0.2504, "step": 25754 }, { "epoch": 0.4476872533852492, "grad_norm": 1.8885071251467873, "learning_rate": 6.08163983276246e-07, "loss": 0.3456, "step": 25755 }, { "epoch": 0.447704635922752, "grad_norm": 2.0405135113232635, "learning_rate": 6.081365003232175e-07, "loss": 0.2845, "step": 25756 }, { "epoch": 0.44772201846025483, "grad_norm": 3.2969083574070384, "learning_rate": 6.081090170274459e-07, "loss": 0.4089, "step": 25757 }, { "epoch": 0.44773940099775766, "grad_norm": 1.475461464628877, "learning_rate": 6.080815333890186e-07, "loss": 0.2908, "step": 25758 }, { "epoch": 0.4477567835352605, "grad_norm": 1.1132543680797338, "learning_rate": 6.080540494080222e-07, "loss": 0.3758, "step": 25759 }, { "epoch": 0.4477741660727633, "grad_norm": 2.2317174673289646, "learning_rate": 6.080265650845444e-07, "loss": 0.1978, "step": 25760 }, { "epoch": 0.4477915486102661, "grad_norm": 1.6685653451095777, "learning_rate": 6.079990804186719e-07, "loss": 0.2549, "step": 25761 }, { "epoch": 0.44780893114776893, "grad_norm": 1.4970094215738938, "learning_rate": 6.079715954104918e-07, "loss": 0.2933, "step": 25762 }, { "epoch": 0.44782631368527176, "grad_norm": 3.1837029666725285, "learning_rate": 6.079441100600914e-07, "loss": 0.5004, "step": 25763 }, { "epoch": 0.4478436962227746, "grad_norm": 2.210038581184001, "learning_rate": 6.079166243675579e-07, "loss": 0.3608, "step": 25764 }, { "epoch": 0.4478610787602774, "grad_norm": 1.6916505339033618, "learning_rate": 6.078891383329781e-07, "loss": 0.2064, "step": 25765 }, { "epoch": 0.44787846129778025, "grad_norm": 1.901022942543262, "learning_rate": 6.078616519564393e-07, "loss": 0.2344, "step": 25766 }, { "epoch": 0.4478958438352831, "grad_norm": 1.8039112925938297, "learning_rate": 6.078341652380287e-07, "loss": 0.2587, "step": 25767 }, { "epoch": 0.4479132263727859, "grad_norm": 1.1994919272069136, "learning_rate": 6.078066781778333e-07, "loss": 0.1578, "step": 25768 }, { "epoch": 0.44793060891028874, "grad_norm": 1.4603412843829626, "learning_rate": 6.077791907759403e-07, "loss": 0.2426, "step": 25769 }, { "epoch": 0.44794799144779157, "grad_norm": 2.6596514929151334, "learning_rate": 6.077517030324368e-07, "loss": 0.2478, "step": 25770 }, { "epoch": 0.44796537398529435, "grad_norm": 1.44620426692878, "learning_rate": 6.077242149474098e-07, "loss": 0.3549, "step": 25771 }, { "epoch": 0.4479827565227972, "grad_norm": 2.2924372638256125, "learning_rate": 6.076967265209464e-07, "loss": 0.3965, "step": 25772 }, { "epoch": 0.4480001390603, "grad_norm": 1.676100142968654, "learning_rate": 6.076692377531341e-07, "loss": 0.2458, "step": 25773 }, { "epoch": 0.44801752159780284, "grad_norm": 1.5269443112642864, "learning_rate": 6.076417486440595e-07, "loss": 0.2395, "step": 25774 }, { "epoch": 0.44803490413530567, "grad_norm": 2.6190641271991177, "learning_rate": 6.076142591938104e-07, "loss": 0.3377, "step": 25775 }, { "epoch": 0.4480522866728085, "grad_norm": 1.4185222768527281, "learning_rate": 6.075867694024731e-07, "loss": 0.304, "step": 25776 }, { "epoch": 0.4480696692103113, "grad_norm": 1.7380380654772263, "learning_rate": 6.075592792701355e-07, "loss": 0.2722, "step": 25777 }, { "epoch": 0.44808705174781416, "grad_norm": 1.1574017636251557, "learning_rate": 6.075317887968843e-07, "loss": 0.2109, "step": 25778 }, { "epoch": 0.448104434285317, "grad_norm": 1.3598526062136704, "learning_rate": 6.075042979828066e-07, "loss": 0.3424, "step": 25779 }, { "epoch": 0.4481218168228198, "grad_norm": 1.2861714677234866, "learning_rate": 6.074768068279899e-07, "loss": 0.3235, "step": 25780 }, { "epoch": 0.4481391993603226, "grad_norm": 1.5667829340320778, "learning_rate": 6.074493153325208e-07, "loss": 0.2407, "step": 25781 }, { "epoch": 0.4481565818978254, "grad_norm": 2.1446924968606984, "learning_rate": 6.074218234964868e-07, "loss": 0.232, "step": 25782 }, { "epoch": 0.44817396443532825, "grad_norm": 1.2272536623724304, "learning_rate": 6.073943313199752e-07, "loss": 0.404, "step": 25783 }, { "epoch": 0.4481913469728311, "grad_norm": 1.6194978100959407, "learning_rate": 6.073668388030726e-07, "loss": 0.2279, "step": 25784 }, { "epoch": 0.4482087295103339, "grad_norm": 1.8918119731685825, "learning_rate": 6.073393459458666e-07, "loss": 0.198, "step": 25785 }, { "epoch": 0.44822611204783674, "grad_norm": 1.5783498752782943, "learning_rate": 6.073118527484442e-07, "loss": 0.3317, "step": 25786 }, { "epoch": 0.4482434945853396, "grad_norm": 2.6304145917840778, "learning_rate": 6.072843592108925e-07, "loss": 0.2764, "step": 25787 }, { "epoch": 0.4482608771228424, "grad_norm": 2.6400043957343042, "learning_rate": 6.072568653332986e-07, "loss": 0.2551, "step": 25788 }, { "epoch": 0.44827825966034524, "grad_norm": 1.2005394041011044, "learning_rate": 6.072293711157498e-07, "loss": 0.2362, "step": 25789 }, { "epoch": 0.44829564219784807, "grad_norm": 1.8332054930938175, "learning_rate": 6.072018765583331e-07, "loss": 0.2813, "step": 25790 }, { "epoch": 0.44831302473535084, "grad_norm": 1.734696578880557, "learning_rate": 6.071743816611357e-07, "loss": 0.2516, "step": 25791 }, { "epoch": 0.44833040727285367, "grad_norm": 1.0586297963076758, "learning_rate": 6.071468864242447e-07, "loss": 0.2993, "step": 25792 }, { "epoch": 0.4483477898103565, "grad_norm": 1.4226971160029518, "learning_rate": 6.071193908477473e-07, "loss": 0.2708, "step": 25793 }, { "epoch": 0.44836517234785933, "grad_norm": 1.660628870698756, "learning_rate": 6.070918949317307e-07, "loss": 0.3052, "step": 25794 }, { "epoch": 0.44838255488536216, "grad_norm": 1.9692336741324534, "learning_rate": 6.07064398676282e-07, "loss": 0.426, "step": 25795 }, { "epoch": 0.448399937422865, "grad_norm": 1.1504622139494196, "learning_rate": 6.070369020814883e-07, "loss": 0.2413, "step": 25796 }, { "epoch": 0.4484173199603678, "grad_norm": 1.2048388934025267, "learning_rate": 6.070094051474367e-07, "loss": 0.325, "step": 25797 }, { "epoch": 0.44843470249787065, "grad_norm": 1.3186486920672473, "learning_rate": 6.069819078742145e-07, "loss": 0.1679, "step": 25798 }, { "epoch": 0.4484520850353735, "grad_norm": 1.425576592556271, "learning_rate": 6.069544102619088e-07, "loss": 0.2082, "step": 25799 }, { "epoch": 0.4484694675728763, "grad_norm": 1.7760798586872732, "learning_rate": 6.069269123106068e-07, "loss": 0.3157, "step": 25800 }, { "epoch": 0.4484868501103791, "grad_norm": 1.8677316015969363, "learning_rate": 6.068994140203956e-07, "loss": 0.2667, "step": 25801 }, { "epoch": 0.4485042326478819, "grad_norm": 1.8402387084633192, "learning_rate": 6.068719153913624e-07, "loss": 0.3783, "step": 25802 }, { "epoch": 0.44852161518538475, "grad_norm": 2.5065881188027235, "learning_rate": 6.068444164235942e-07, "loss": 0.2352, "step": 25803 }, { "epoch": 0.4485389977228876, "grad_norm": 1.4816068531185038, "learning_rate": 6.068169171171784e-07, "loss": 0.166, "step": 25804 }, { "epoch": 0.4485563802603904, "grad_norm": 1.9164783855511385, "learning_rate": 6.067894174722019e-07, "loss": 0.2652, "step": 25805 }, { "epoch": 0.44857376279789324, "grad_norm": 2.066567931431911, "learning_rate": 6.067619174887522e-07, "loss": 0.3098, "step": 25806 }, { "epoch": 0.44859114533539607, "grad_norm": 1.7577298448718384, "learning_rate": 6.06734417166916e-07, "loss": 0.1972, "step": 25807 }, { "epoch": 0.4486085278728989, "grad_norm": 1.1397566221829585, "learning_rate": 6.06706916506781e-07, "loss": 0.2802, "step": 25808 }, { "epoch": 0.44862591041040173, "grad_norm": 1.564295052297422, "learning_rate": 6.066794155084339e-07, "loss": 0.2542, "step": 25809 }, { "epoch": 0.44864329294790456, "grad_norm": 2.2964974366560282, "learning_rate": 6.066519141719622e-07, "loss": 0.2009, "step": 25810 }, { "epoch": 0.44866067548540733, "grad_norm": 1.8543747402567439, "learning_rate": 6.066244124974528e-07, "loss": 0.2826, "step": 25811 }, { "epoch": 0.44867805802291016, "grad_norm": 1.4858214739305975, "learning_rate": 6.06596910484993e-07, "loss": 0.2329, "step": 25812 }, { "epoch": 0.448695440560413, "grad_norm": 1.2427182271555846, "learning_rate": 6.065694081346699e-07, "loss": 0.2218, "step": 25813 }, { "epoch": 0.4487128230979158, "grad_norm": 1.6146165573311884, "learning_rate": 6.065419054465709e-07, "loss": 0.1837, "step": 25814 }, { "epoch": 0.44873020563541866, "grad_norm": 1.6958816960096426, "learning_rate": 6.06514402420783e-07, "loss": 0.3695, "step": 25815 }, { "epoch": 0.4487475881729215, "grad_norm": 1.5275719801035357, "learning_rate": 6.064868990573932e-07, "loss": 0.2579, "step": 25816 }, { "epoch": 0.4487649707104243, "grad_norm": 2.1568599654039136, "learning_rate": 6.06459395356489e-07, "loss": 0.3188, "step": 25817 }, { "epoch": 0.44878235324792715, "grad_norm": 0.9374613650674679, "learning_rate": 6.064318913181573e-07, "loss": 0.3564, "step": 25818 }, { "epoch": 0.44879973578543, "grad_norm": 1.3143897119857766, "learning_rate": 6.064043869424856e-07, "loss": 0.1702, "step": 25819 }, { "epoch": 0.4488171183229328, "grad_norm": 2.1450324002858525, "learning_rate": 6.063768822295608e-07, "loss": 0.4347, "step": 25820 }, { "epoch": 0.4488345008604356, "grad_norm": 1.8799858365308952, "learning_rate": 6.0634937717947e-07, "loss": 0.2992, "step": 25821 }, { "epoch": 0.4488518833979384, "grad_norm": 1.5610085437828947, "learning_rate": 6.063218717923006e-07, "loss": 0.4591, "step": 25822 }, { "epoch": 0.44886926593544124, "grad_norm": 1.1270506892616754, "learning_rate": 6.062943660681397e-07, "loss": 0.1793, "step": 25823 }, { "epoch": 0.44888664847294407, "grad_norm": 2.0552812846117052, "learning_rate": 6.062668600070745e-07, "loss": 0.2636, "step": 25824 }, { "epoch": 0.4489040310104469, "grad_norm": 1.6042712842838374, "learning_rate": 6.062393536091922e-07, "loss": 0.29, "step": 25825 }, { "epoch": 0.44892141354794973, "grad_norm": 1.2403047009185324, "learning_rate": 6.062118468745798e-07, "loss": 0.225, "step": 25826 }, { "epoch": 0.44893879608545256, "grad_norm": 1.1064527367570054, "learning_rate": 6.061843398033249e-07, "loss": 0.3011, "step": 25827 }, { "epoch": 0.4489561786229554, "grad_norm": 2.0528232666618016, "learning_rate": 6.061568323955142e-07, "loss": 0.2314, "step": 25828 }, { "epoch": 0.4489735611604582, "grad_norm": 1.4129820494210068, "learning_rate": 6.061293246512352e-07, "loss": 0.2343, "step": 25829 }, { "epoch": 0.44899094369796105, "grad_norm": 2.251527596210698, "learning_rate": 6.06101816570575e-07, "loss": 0.3205, "step": 25830 }, { "epoch": 0.44900832623546383, "grad_norm": 1.5166771125015763, "learning_rate": 6.060743081536208e-07, "loss": 0.2128, "step": 25831 }, { "epoch": 0.44902570877296666, "grad_norm": 2.3878382798181845, "learning_rate": 6.060467994004596e-07, "loss": 0.3032, "step": 25832 }, { "epoch": 0.4490430913104695, "grad_norm": 1.4323626439838744, "learning_rate": 6.060192903111789e-07, "loss": 0.18, "step": 25833 }, { "epoch": 0.4490604738479723, "grad_norm": 1.531271211968035, "learning_rate": 6.059917808858657e-07, "loss": 0.3201, "step": 25834 }, { "epoch": 0.44907785638547515, "grad_norm": 2.8526384822665825, "learning_rate": 6.059642711246074e-07, "loss": 0.4111, "step": 25835 }, { "epoch": 0.449095238922978, "grad_norm": 1.2930621546524708, "learning_rate": 6.059367610274908e-07, "loss": 0.1961, "step": 25836 }, { "epoch": 0.4491126214604808, "grad_norm": 1.590176740354892, "learning_rate": 6.059092505946037e-07, "loss": 0.2929, "step": 25837 }, { "epoch": 0.44913000399798364, "grad_norm": 2.614837492313436, "learning_rate": 6.058817398260325e-07, "loss": 0.5124, "step": 25838 }, { "epoch": 0.44914738653548647, "grad_norm": 1.933328663259544, "learning_rate": 6.058542287218651e-07, "loss": 0.321, "step": 25839 }, { "epoch": 0.4491647690729893, "grad_norm": 1.8904604254418658, "learning_rate": 6.058267172821883e-07, "loss": 0.2608, "step": 25840 }, { "epoch": 0.4491821516104921, "grad_norm": 1.0690178862520288, "learning_rate": 6.057992055070894e-07, "loss": 0.2434, "step": 25841 }, { "epoch": 0.4491995341479949, "grad_norm": 3.432762331372119, "learning_rate": 6.057716933966556e-07, "loss": 0.3362, "step": 25842 }, { "epoch": 0.44921691668549774, "grad_norm": 1.6915892635366663, "learning_rate": 6.057441809509743e-07, "loss": 0.2256, "step": 25843 }, { "epoch": 0.44923429922300057, "grad_norm": 1.3361072527380735, "learning_rate": 6.057166681701324e-07, "loss": 0.1581, "step": 25844 }, { "epoch": 0.4492516817605034, "grad_norm": 1.783292720431729, "learning_rate": 6.056891550542171e-07, "loss": 0.3233, "step": 25845 }, { "epoch": 0.4492690642980062, "grad_norm": 1.5321781129893959, "learning_rate": 6.05661641603316e-07, "loss": 0.212, "step": 25846 }, { "epoch": 0.44928644683550906, "grad_norm": 1.8610978764390687, "learning_rate": 6.056341278175159e-07, "loss": 0.3495, "step": 25847 }, { "epoch": 0.4493038293730119, "grad_norm": 2.2065398108535264, "learning_rate": 6.05606613696904e-07, "loss": 0.3251, "step": 25848 }, { "epoch": 0.4493212119105147, "grad_norm": 2.008232980921601, "learning_rate": 6.055790992415678e-07, "loss": 0.1608, "step": 25849 }, { "epoch": 0.44933859444801755, "grad_norm": 1.7923888803697505, "learning_rate": 6.055515844515943e-07, "loss": 0.3587, "step": 25850 }, { "epoch": 0.4493559769855203, "grad_norm": 2.2030527912166433, "learning_rate": 6.055240693270709e-07, "loss": 0.5443, "step": 25851 }, { "epoch": 0.44937335952302315, "grad_norm": 2.6454831058222363, "learning_rate": 6.054965538680844e-07, "loss": 0.2088, "step": 25852 }, { "epoch": 0.449390742060526, "grad_norm": 1.262518540241656, "learning_rate": 6.054690380747225e-07, "loss": 0.2787, "step": 25853 }, { "epoch": 0.4494081245980288, "grad_norm": 1.0067238509438536, "learning_rate": 6.054415219470721e-07, "loss": 0.1397, "step": 25854 }, { "epoch": 0.44942550713553164, "grad_norm": 3.353316379214037, "learning_rate": 6.054140054852204e-07, "loss": 0.3493, "step": 25855 }, { "epoch": 0.4494428896730345, "grad_norm": 1.1225341243212876, "learning_rate": 6.053864886892551e-07, "loss": 0.1738, "step": 25856 }, { "epoch": 0.4494602722105373, "grad_norm": 2.261491164404519, "learning_rate": 6.053589715592627e-07, "loss": 0.2085, "step": 25857 }, { "epoch": 0.44947765474804013, "grad_norm": 2.4141699899593125, "learning_rate": 6.053314540953309e-07, "loss": 0.3731, "step": 25858 }, { "epoch": 0.44949503728554296, "grad_norm": 2.151268544670083, "learning_rate": 6.053039362975468e-07, "loss": 0.3016, "step": 25859 }, { "epoch": 0.4495124198230458, "grad_norm": 2.1323889023400664, "learning_rate": 6.052764181659977e-07, "loss": 0.2372, "step": 25860 }, { "epoch": 0.44952980236054857, "grad_norm": 1.3483831908745274, "learning_rate": 6.052488997007704e-07, "loss": 0.1701, "step": 25861 }, { "epoch": 0.4495471848980514, "grad_norm": 2.421134371571744, "learning_rate": 6.052213809019528e-07, "loss": 0.3124, "step": 25862 }, { "epoch": 0.44956456743555423, "grad_norm": 1.6521797146345654, "learning_rate": 6.051938617696314e-07, "loss": 0.2994, "step": 25863 }, { "epoch": 0.44958194997305706, "grad_norm": 2.6646307351823197, "learning_rate": 6.05166342303894e-07, "loss": 0.4214, "step": 25864 }, { "epoch": 0.4495993325105599, "grad_norm": 1.3854609268019291, "learning_rate": 6.051388225048276e-07, "loss": 0.1985, "step": 25865 }, { "epoch": 0.4496167150480627, "grad_norm": 1.2825592300864768, "learning_rate": 6.051113023725194e-07, "loss": 0.2668, "step": 25866 }, { "epoch": 0.44963409758556555, "grad_norm": 2.207613006197621, "learning_rate": 6.050837819070564e-07, "loss": 0.2927, "step": 25867 }, { "epoch": 0.4496514801230684, "grad_norm": 1.502945354045448, "learning_rate": 6.050562611085265e-07, "loss": 0.277, "step": 25868 }, { "epoch": 0.4496688626605712, "grad_norm": 2.112589420077549, "learning_rate": 6.050287399770162e-07, "loss": 0.3677, "step": 25869 }, { "epoch": 0.44968624519807404, "grad_norm": 1.706149043947625, "learning_rate": 6.050012185126132e-07, "loss": 0.3882, "step": 25870 }, { "epoch": 0.4497036277355768, "grad_norm": 1.9951007972293517, "learning_rate": 6.049736967154046e-07, "loss": 0.3673, "step": 25871 }, { "epoch": 0.44972101027307965, "grad_norm": 0.7932673085050341, "learning_rate": 6.049461745854776e-07, "loss": 0.3007, "step": 25872 }, { "epoch": 0.4497383928105825, "grad_norm": 1.4245725062149444, "learning_rate": 6.049186521229192e-07, "loss": 0.188, "step": 25873 }, { "epoch": 0.4497557753480853, "grad_norm": 1.1459151754171646, "learning_rate": 6.04891129327817e-07, "loss": 0.2234, "step": 25874 }, { "epoch": 0.44977315788558814, "grad_norm": 1.3617822203443002, "learning_rate": 6.048636062002582e-07, "loss": 0.1948, "step": 25875 }, { "epoch": 0.44979054042309097, "grad_norm": 1.8732060116453109, "learning_rate": 6.048360827403299e-07, "loss": 0.2652, "step": 25876 }, { "epoch": 0.4498079229605938, "grad_norm": 2.180648282553704, "learning_rate": 6.048085589481194e-07, "loss": 0.3018, "step": 25877 }, { "epoch": 0.44982530549809663, "grad_norm": 2.9995899922092564, "learning_rate": 6.047810348237137e-07, "loss": 0.2413, "step": 25878 }, { "epoch": 0.44984268803559946, "grad_norm": 2.143840073116104, "learning_rate": 6.047535103672006e-07, "loss": 0.3525, "step": 25879 }, { "epoch": 0.4498600705731023, "grad_norm": 1.4718951022010625, "learning_rate": 6.047259855786668e-07, "loss": 0.3656, "step": 25880 }, { "epoch": 0.44987745311060506, "grad_norm": 1.68206782163045, "learning_rate": 6.046984604581998e-07, "loss": 0.2819, "step": 25881 }, { "epoch": 0.4498948356481079, "grad_norm": 1.448090808887437, "learning_rate": 6.046709350058866e-07, "loss": 0.2409, "step": 25882 }, { "epoch": 0.4499122181856107, "grad_norm": 1.6050480657885828, "learning_rate": 6.046434092218148e-07, "loss": 0.4061, "step": 25883 }, { "epoch": 0.44992960072311355, "grad_norm": 3.0173953011326655, "learning_rate": 6.046158831060714e-07, "loss": 0.3181, "step": 25884 }, { "epoch": 0.4499469832606164, "grad_norm": 1.2365789432839216, "learning_rate": 6.045883566587438e-07, "loss": 0.172, "step": 25885 }, { "epoch": 0.4499643657981192, "grad_norm": 1.3016158232828081, "learning_rate": 6.04560829879919e-07, "loss": 0.1941, "step": 25886 }, { "epoch": 0.44998174833562204, "grad_norm": 1.8744843581435113, "learning_rate": 6.045333027696845e-07, "loss": 0.2243, "step": 25887 }, { "epoch": 0.4499991308731249, "grad_norm": 1.3494043751135139, "learning_rate": 6.045057753281273e-07, "loss": 0.225, "step": 25888 }, { "epoch": 0.4500165134106277, "grad_norm": 1.4104123652267124, "learning_rate": 6.04478247555335e-07, "loss": 0.2504, "step": 25889 }, { "epoch": 0.45003389594813054, "grad_norm": 1.6641093678901904, "learning_rate": 6.044507194513946e-07, "loss": 0.2633, "step": 25890 }, { "epoch": 0.4500512784856333, "grad_norm": 1.7685098904178624, "learning_rate": 6.044231910163933e-07, "loss": 0.1945, "step": 25891 }, { "epoch": 0.45006866102313614, "grad_norm": 1.7599957649114228, "learning_rate": 6.043956622504185e-07, "loss": 0.3085, "step": 25892 }, { "epoch": 0.45008604356063897, "grad_norm": 1.428011394387235, "learning_rate": 6.043681331535573e-07, "loss": 0.2155, "step": 25893 }, { "epoch": 0.4501034260981418, "grad_norm": 1.4988637704992998, "learning_rate": 6.043406037258974e-07, "loss": 0.1949, "step": 25894 }, { "epoch": 0.45012080863564463, "grad_norm": 1.6256438220373541, "learning_rate": 6.043130739675254e-07, "loss": 0.2961, "step": 25895 }, { "epoch": 0.45013819117314746, "grad_norm": 2.5511545906156714, "learning_rate": 6.04285543878529e-07, "loss": 0.2765, "step": 25896 }, { "epoch": 0.4501555737106503, "grad_norm": 1.1849896960361113, "learning_rate": 6.042580134589953e-07, "loss": 0.2514, "step": 25897 }, { "epoch": 0.4501729562481531, "grad_norm": 1.7992110075473107, "learning_rate": 6.042304827090116e-07, "loss": 0.2282, "step": 25898 }, { "epoch": 0.45019033878565595, "grad_norm": 1.7660114474827917, "learning_rate": 6.042029516286651e-07, "loss": 0.2916, "step": 25899 }, { "epoch": 0.4502077213231587, "grad_norm": 1.5404243097717891, "learning_rate": 6.041754202180432e-07, "loss": 0.1997, "step": 25900 }, { "epoch": 0.45022510386066156, "grad_norm": 2.251106733602732, "learning_rate": 6.04147888477233e-07, "loss": 0.3005, "step": 25901 }, { "epoch": 0.4502424863981644, "grad_norm": 2.0091496307498335, "learning_rate": 6.041203564063217e-07, "loss": 0.3223, "step": 25902 }, { "epoch": 0.4502598689356672, "grad_norm": 1.9854053612017697, "learning_rate": 6.04092824005397e-07, "loss": 0.2933, "step": 25903 }, { "epoch": 0.45027725147317005, "grad_norm": 2.7498344504600394, "learning_rate": 6.040652912745457e-07, "loss": 0.3884, "step": 25904 }, { "epoch": 0.4502946340106729, "grad_norm": 3.3616374342205004, "learning_rate": 6.040377582138553e-07, "loss": 0.3598, "step": 25905 }, { "epoch": 0.4503120165481757, "grad_norm": 1.4535115692152556, "learning_rate": 6.040102248234129e-07, "loss": 0.2267, "step": 25906 }, { "epoch": 0.45032939908567854, "grad_norm": 1.3709765911577865, "learning_rate": 6.039826911033058e-07, "loss": 0.3578, "step": 25907 }, { "epoch": 0.45034678162318137, "grad_norm": 1.1085290271558825, "learning_rate": 6.039551570536216e-07, "loss": 0.2583, "step": 25908 }, { "epoch": 0.4503641641606842, "grad_norm": 4.15055965985481, "learning_rate": 6.039276226744471e-07, "loss": 0.6104, "step": 25909 }, { "epoch": 0.450381546698187, "grad_norm": 1.4793030898550783, "learning_rate": 6.039000879658698e-07, "loss": 0.1145, "step": 25910 }, { "epoch": 0.4503989292356898, "grad_norm": 1.5009361336487659, "learning_rate": 6.038725529279769e-07, "loss": 0.2539, "step": 25911 }, { "epoch": 0.45041631177319263, "grad_norm": 1.4113189838189717, "learning_rate": 6.038450175608559e-07, "loss": 0.2537, "step": 25912 }, { "epoch": 0.45043369431069546, "grad_norm": 2.44182122811798, "learning_rate": 6.038174818645938e-07, "loss": 0.4273, "step": 25913 }, { "epoch": 0.4504510768481983, "grad_norm": 1.7017571314050466, "learning_rate": 6.03789945839278e-07, "loss": 0.1748, "step": 25914 }, { "epoch": 0.4504684593857011, "grad_norm": 2.016838712609918, "learning_rate": 6.037624094849956e-07, "loss": 0.3101, "step": 25915 }, { "epoch": 0.45048584192320396, "grad_norm": 1.2510748194914472, "learning_rate": 6.037348728018343e-07, "loss": 0.2892, "step": 25916 }, { "epoch": 0.4505032244607068, "grad_norm": 1.035907501540611, "learning_rate": 6.037073357898808e-07, "loss": 0.1827, "step": 25917 }, { "epoch": 0.4505206069982096, "grad_norm": 2.262739931145926, "learning_rate": 6.036797984492229e-07, "loss": 0.4717, "step": 25918 }, { "epoch": 0.45053798953571245, "grad_norm": 1.6285478331018257, "learning_rate": 6.036522607799476e-07, "loss": 0.2034, "step": 25919 }, { "epoch": 0.4505553720732152, "grad_norm": 1.7460181418829497, "learning_rate": 6.036247227821423e-07, "loss": 0.1838, "step": 25920 }, { "epoch": 0.45057275461071805, "grad_norm": 1.9397348369176513, "learning_rate": 6.035971844558941e-07, "loss": 0.2357, "step": 25921 }, { "epoch": 0.4505901371482209, "grad_norm": 4.1023778788811995, "learning_rate": 6.035696458012906e-07, "loss": 0.3487, "step": 25922 }, { "epoch": 0.4506075196857237, "grad_norm": 1.697484603036124, "learning_rate": 6.035421068184187e-07, "loss": 0.1655, "step": 25923 }, { "epoch": 0.45062490222322654, "grad_norm": 2.742794608600567, "learning_rate": 6.03514567507366e-07, "loss": 0.3249, "step": 25924 }, { "epoch": 0.4506422847607294, "grad_norm": 1.1779526437557157, "learning_rate": 6.034870278682197e-07, "loss": 0.1771, "step": 25925 }, { "epoch": 0.4506596672982322, "grad_norm": 1.4230185914400466, "learning_rate": 6.03459487901067e-07, "loss": 0.3813, "step": 25926 }, { "epoch": 0.45067704983573503, "grad_norm": 1.7952375520055224, "learning_rate": 6.034319476059952e-07, "loss": 0.5008, "step": 25927 }, { "epoch": 0.45069443237323786, "grad_norm": 1.4286925293712982, "learning_rate": 6.034044069830918e-07, "loss": 0.256, "step": 25928 }, { "epoch": 0.4507118149107407, "grad_norm": 1.5824041888803129, "learning_rate": 6.033768660324438e-07, "loss": 0.243, "step": 25929 }, { "epoch": 0.45072919744824347, "grad_norm": 1.3114342806622759, "learning_rate": 6.033493247541386e-07, "loss": 0.3864, "step": 25930 }, { "epoch": 0.4507465799857463, "grad_norm": 2.1156642351506307, "learning_rate": 6.033217831482635e-07, "loss": 0.3355, "step": 25931 }, { "epoch": 0.45076396252324913, "grad_norm": 1.2918041568875194, "learning_rate": 6.03294241214906e-07, "loss": 0.1622, "step": 25932 }, { "epoch": 0.45078134506075196, "grad_norm": 1.8183494091771741, "learning_rate": 6.03266698954153e-07, "loss": 0.2837, "step": 25933 }, { "epoch": 0.4507987275982548, "grad_norm": 2.0893226117218644, "learning_rate": 6.03239156366092e-07, "loss": 0.3414, "step": 25934 }, { "epoch": 0.4508161101357576, "grad_norm": 1.8651800729241277, "learning_rate": 6.032116134508105e-07, "loss": 0.2703, "step": 25935 }, { "epoch": 0.45083349267326045, "grad_norm": 2.9901780621208087, "learning_rate": 6.031840702083954e-07, "loss": 0.3647, "step": 25936 }, { "epoch": 0.4508508752107633, "grad_norm": 1.6399604986096052, "learning_rate": 6.031565266389343e-07, "loss": 0.4529, "step": 25937 }, { "epoch": 0.4508682577482661, "grad_norm": 1.5924245783116446, "learning_rate": 6.031289827425144e-07, "loss": 0.2691, "step": 25938 }, { "epoch": 0.45088564028576894, "grad_norm": 1.3382247699602234, "learning_rate": 6.031014385192229e-07, "loss": 0.2487, "step": 25939 }, { "epoch": 0.4509030228232717, "grad_norm": 1.328087849986485, "learning_rate": 6.030738939691473e-07, "loss": 0.2255, "step": 25940 }, { "epoch": 0.45092040536077455, "grad_norm": 1.9131041423393302, "learning_rate": 6.030463490923747e-07, "loss": 0.4184, "step": 25941 }, { "epoch": 0.4509377878982774, "grad_norm": 2.5155228741657596, "learning_rate": 6.030188038889925e-07, "loss": 0.2994, "step": 25942 }, { "epoch": 0.4509551704357802, "grad_norm": 1.5358969254188224, "learning_rate": 6.029912583590881e-07, "loss": 0.2668, "step": 25943 }, { "epoch": 0.45097255297328304, "grad_norm": 1.225025544294306, "learning_rate": 6.029637125027488e-07, "loss": 0.2546, "step": 25944 }, { "epoch": 0.45098993551078587, "grad_norm": 1.012831730416182, "learning_rate": 6.029361663200619e-07, "loss": 0.2656, "step": 25945 }, { "epoch": 0.4510073180482887, "grad_norm": 1.4849832253953326, "learning_rate": 6.029086198111143e-07, "loss": 0.2131, "step": 25946 }, { "epoch": 0.4510247005857915, "grad_norm": 0.9276186359817372, "learning_rate": 6.028810729759939e-07, "loss": 0.2459, "step": 25947 }, { "epoch": 0.45104208312329436, "grad_norm": 2.241552463351288, "learning_rate": 6.028535258147876e-07, "loss": 0.3612, "step": 25948 }, { "epoch": 0.4510594656607972, "grad_norm": 3.392919370138132, "learning_rate": 6.02825978327583e-07, "loss": 0.2014, "step": 25949 }, { "epoch": 0.45107684819829996, "grad_norm": 1.5068200627362396, "learning_rate": 6.027984305144674e-07, "loss": 0.2031, "step": 25950 }, { "epoch": 0.4510942307358028, "grad_norm": 1.6100643665648624, "learning_rate": 6.027708823755277e-07, "loss": 0.1831, "step": 25951 }, { "epoch": 0.4511116132733056, "grad_norm": 2.1780654109297344, "learning_rate": 6.027433339108516e-07, "loss": 0.3374, "step": 25952 }, { "epoch": 0.45112899581080845, "grad_norm": 1.77624685883519, "learning_rate": 6.027157851205265e-07, "loss": 0.308, "step": 25953 }, { "epoch": 0.4511463783483113, "grad_norm": 1.89527596869101, "learning_rate": 6.026882360046394e-07, "loss": 0.2316, "step": 25954 }, { "epoch": 0.4511637608858141, "grad_norm": 1.024318811999869, "learning_rate": 6.026606865632777e-07, "loss": 0.1351, "step": 25955 }, { "epoch": 0.45118114342331694, "grad_norm": 2.242378063271234, "learning_rate": 6.026331367965288e-07, "loss": 0.375, "step": 25956 }, { "epoch": 0.4511985259608198, "grad_norm": 1.1473354007279604, "learning_rate": 6.026055867044803e-07, "loss": 0.2623, "step": 25957 }, { "epoch": 0.4512159084983226, "grad_norm": 1.4596177548439875, "learning_rate": 6.025780362872188e-07, "loss": 0.2043, "step": 25958 }, { "epoch": 0.45123329103582543, "grad_norm": 1.5566771783764213, "learning_rate": 6.025504855448324e-07, "loss": 0.2057, "step": 25959 }, { "epoch": 0.4512506735733282, "grad_norm": 1.7766162231307627, "learning_rate": 6.025229344774079e-07, "loss": 0.269, "step": 25960 }, { "epoch": 0.45126805611083104, "grad_norm": 2.738173727225742, "learning_rate": 6.024953830850328e-07, "loss": 0.4888, "step": 25961 }, { "epoch": 0.45128543864833387, "grad_norm": 1.1179613314001082, "learning_rate": 6.024678313677942e-07, "loss": 0.1281, "step": 25962 }, { "epoch": 0.4513028211858367, "grad_norm": 1.699840616412599, "learning_rate": 6.024402793257799e-07, "loss": 0.2288, "step": 25963 }, { "epoch": 0.45132020372333953, "grad_norm": 1.0179957982584482, "learning_rate": 6.02412726959077e-07, "loss": 0.252, "step": 25964 }, { "epoch": 0.45133758626084236, "grad_norm": 2.0598594261885075, "learning_rate": 6.023851742677728e-07, "loss": 0.3658, "step": 25965 }, { "epoch": 0.4513549687983452, "grad_norm": 1.4342558615537373, "learning_rate": 6.023576212519546e-07, "loss": 0.2493, "step": 25966 }, { "epoch": 0.451372351335848, "grad_norm": 1.739306776814015, "learning_rate": 6.023300679117096e-07, "loss": 0.3801, "step": 25967 }, { "epoch": 0.45138973387335085, "grad_norm": 1.509305968036375, "learning_rate": 6.023025142471254e-07, "loss": 0.3841, "step": 25968 }, { "epoch": 0.4514071164108537, "grad_norm": 1.9037150835570535, "learning_rate": 6.022749602582893e-07, "loss": 0.4644, "step": 25969 }, { "epoch": 0.45142449894835646, "grad_norm": 3.487833143450221, "learning_rate": 6.022474059452886e-07, "loss": 0.3976, "step": 25970 }, { "epoch": 0.4514418814858593, "grad_norm": 1.4256569016166833, "learning_rate": 6.022198513082102e-07, "loss": 0.3708, "step": 25971 }, { "epoch": 0.4514592640233621, "grad_norm": 1.7573881085860512, "learning_rate": 6.021922963471422e-07, "loss": 0.1682, "step": 25972 }, { "epoch": 0.45147664656086495, "grad_norm": 1.5524365069252628, "learning_rate": 6.021647410621715e-07, "loss": 0.2665, "step": 25973 }, { "epoch": 0.4514940290983678, "grad_norm": 1.4771614638997235, "learning_rate": 6.021371854533855e-07, "loss": 0.2804, "step": 25974 }, { "epoch": 0.4515114116358706, "grad_norm": 1.9047374418662626, "learning_rate": 6.021096295208715e-07, "loss": 0.2915, "step": 25975 }, { "epoch": 0.45152879417337344, "grad_norm": 2.026817736576075, "learning_rate": 6.020820732647171e-07, "loss": 0.3724, "step": 25976 }, { "epoch": 0.45154617671087627, "grad_norm": 1.5243671807270296, "learning_rate": 6.020545166850091e-07, "loss": 0.4818, "step": 25977 }, { "epoch": 0.4515635592483791, "grad_norm": 1.4307463407458667, "learning_rate": 6.020269597818352e-07, "loss": 0.382, "step": 25978 }, { "epoch": 0.45158094178588193, "grad_norm": 1.7987266788052345, "learning_rate": 6.019994025552828e-07, "loss": 0.3142, "step": 25979 }, { "epoch": 0.4515983243233847, "grad_norm": 2.899423880186467, "learning_rate": 6.019718450054392e-07, "loss": 0.2686, "step": 25980 }, { "epoch": 0.45161570686088753, "grad_norm": 0.8753397755589911, "learning_rate": 6.019442871323915e-07, "loss": 0.2784, "step": 25981 }, { "epoch": 0.45163308939839036, "grad_norm": 1.2100309670980085, "learning_rate": 6.019167289362275e-07, "loss": 0.2176, "step": 25982 }, { "epoch": 0.4516504719358932, "grad_norm": 1.5544403908737254, "learning_rate": 6.018891704170341e-07, "loss": 0.1968, "step": 25983 }, { "epoch": 0.451667854473396, "grad_norm": 1.3292942160077899, "learning_rate": 6.018616115748989e-07, "loss": 0.1676, "step": 25984 }, { "epoch": 0.45168523701089885, "grad_norm": 2.2248259897519733, "learning_rate": 6.018340524099093e-07, "loss": 0.3341, "step": 25985 }, { "epoch": 0.4517026195484017, "grad_norm": 1.4637086773302914, "learning_rate": 6.018064929221523e-07, "loss": 0.4403, "step": 25986 }, { "epoch": 0.4517200020859045, "grad_norm": 1.3037429202726252, "learning_rate": 6.017789331117155e-07, "loss": 0.1838, "step": 25987 }, { "epoch": 0.45173738462340735, "grad_norm": 2.6433151823753303, "learning_rate": 6.017513729786865e-07, "loss": 0.4848, "step": 25988 }, { "epoch": 0.4517547671609102, "grad_norm": 1.914675663857382, "learning_rate": 6.017238125231521e-07, "loss": 0.2964, "step": 25989 }, { "epoch": 0.45177214969841295, "grad_norm": 2.086291842817897, "learning_rate": 6.016962517452002e-07, "loss": 0.3412, "step": 25990 }, { "epoch": 0.4517895322359158, "grad_norm": 1.4741506638885589, "learning_rate": 6.016686906449177e-07, "loss": 0.2252, "step": 25991 }, { "epoch": 0.4518069147734186, "grad_norm": 1.3131162269510872, "learning_rate": 6.016411292223922e-07, "loss": 0.4014, "step": 25992 }, { "epoch": 0.45182429731092144, "grad_norm": 1.4689287442321701, "learning_rate": 6.01613567477711e-07, "loss": 0.2646, "step": 25993 }, { "epoch": 0.45184167984842427, "grad_norm": 1.67601558584537, "learning_rate": 6.015860054109614e-07, "loss": 0.2501, "step": 25994 }, { "epoch": 0.4518590623859271, "grad_norm": 1.9135208701883075, "learning_rate": 6.015584430222309e-07, "loss": 0.2173, "step": 25995 }, { "epoch": 0.45187644492342993, "grad_norm": 1.4261444618138983, "learning_rate": 6.015308803116068e-07, "loss": 0.1418, "step": 25996 }, { "epoch": 0.45189382746093276, "grad_norm": 2.652949637036898, "learning_rate": 6.015033172791765e-07, "loss": 0.3856, "step": 25997 }, { "epoch": 0.4519112099984356, "grad_norm": 1.216933504067421, "learning_rate": 6.014757539250272e-07, "loss": 0.2253, "step": 25998 }, { "epoch": 0.4519285925359384, "grad_norm": 1.5708313995428609, "learning_rate": 6.014481902492465e-07, "loss": 0.3571, "step": 25999 }, { "epoch": 0.4519459750734412, "grad_norm": 1.9946607833904386, "learning_rate": 6.014206262519215e-07, "loss": 0.2024, "step": 26000 }, { "epoch": 0.451963357610944, "grad_norm": 2.7858310746710937, "learning_rate": 6.013930619331399e-07, "loss": 0.2013, "step": 26001 }, { "epoch": 0.45198074014844686, "grad_norm": 1.5547084880884463, "learning_rate": 6.013654972929887e-07, "loss": 0.2182, "step": 26002 }, { "epoch": 0.4519981226859497, "grad_norm": 1.3090523458433985, "learning_rate": 6.013379323315555e-07, "loss": 0.2109, "step": 26003 }, { "epoch": 0.4520155052234525, "grad_norm": 1.314097480776179, "learning_rate": 6.013103670489276e-07, "loss": 0.1659, "step": 26004 }, { "epoch": 0.45203288776095535, "grad_norm": 1.5849947386641907, "learning_rate": 6.012828014451923e-07, "loss": 0.1586, "step": 26005 }, { "epoch": 0.4520502702984582, "grad_norm": 3.9371784441537345, "learning_rate": 6.012552355204369e-07, "loss": 0.2669, "step": 26006 }, { "epoch": 0.452067652835961, "grad_norm": 1.6182827088892957, "learning_rate": 6.012276692747495e-07, "loss": 0.2273, "step": 26007 }, { "epoch": 0.45208503537346384, "grad_norm": 1.5783970904347466, "learning_rate": 6.012001027082163e-07, "loss": 0.4518, "step": 26008 }, { "epoch": 0.45210241791096667, "grad_norm": 1.6940711719692239, "learning_rate": 6.011725358209256e-07, "loss": 0.2565, "step": 26009 }, { "epoch": 0.45211980044846944, "grad_norm": 1.4287846677085816, "learning_rate": 6.011449686129643e-07, "loss": 0.2357, "step": 26010 }, { "epoch": 0.4521371829859723, "grad_norm": 5.27254254335842, "learning_rate": 6.011174010844199e-07, "loss": 0.2736, "step": 26011 }, { "epoch": 0.4521545655234751, "grad_norm": 2.8896193887657913, "learning_rate": 6.010898332353798e-07, "loss": 0.2561, "step": 26012 }, { "epoch": 0.45217194806097794, "grad_norm": 1.2481800148836062, "learning_rate": 6.010622650659313e-07, "loss": 0.2577, "step": 26013 }, { "epoch": 0.45218933059848077, "grad_norm": 3.1662820656134394, "learning_rate": 6.010346965761619e-07, "loss": 0.4707, "step": 26014 }, { "epoch": 0.4522067131359836, "grad_norm": 1.3458653989051046, "learning_rate": 6.01007127766159e-07, "loss": 0.2497, "step": 26015 }, { "epoch": 0.4522240956734864, "grad_norm": 1.769387188920621, "learning_rate": 6.009795586360098e-07, "loss": 0.3239, "step": 26016 }, { "epoch": 0.45224147821098926, "grad_norm": 1.8351959678292988, "learning_rate": 6.009519891858019e-07, "loss": 0.2242, "step": 26017 }, { "epoch": 0.4522588607484921, "grad_norm": 1.7256414973994907, "learning_rate": 6.009244194156224e-07, "loss": 0.3214, "step": 26018 }, { "epoch": 0.4522762432859949, "grad_norm": 1.708784179098881, "learning_rate": 6.008968493255591e-07, "loss": 0.4125, "step": 26019 }, { "epoch": 0.4522936258234977, "grad_norm": 1.9002242228567985, "learning_rate": 6.008692789156989e-07, "loss": 0.2002, "step": 26020 }, { "epoch": 0.4523110083610005, "grad_norm": 2.911548014348485, "learning_rate": 6.008417081861296e-07, "loss": 0.2249, "step": 26021 }, { "epoch": 0.45232839089850335, "grad_norm": 1.9054956835636232, "learning_rate": 6.008141371369382e-07, "loss": 0.2231, "step": 26022 }, { "epoch": 0.4523457734360062, "grad_norm": 2.6793375089177327, "learning_rate": 6.007865657682124e-07, "loss": 0.2838, "step": 26023 }, { "epoch": 0.452363155973509, "grad_norm": 1.1315587383376142, "learning_rate": 6.007589940800395e-07, "loss": 0.2524, "step": 26024 }, { "epoch": 0.45238053851101184, "grad_norm": 1.9441082522792519, "learning_rate": 6.00731422072507e-07, "loss": 0.3228, "step": 26025 }, { "epoch": 0.4523979210485147, "grad_norm": 1.5705085318009302, "learning_rate": 6.00703849745702e-07, "loss": 0.3665, "step": 26026 }, { "epoch": 0.4524153035860175, "grad_norm": 2.539447137821281, "learning_rate": 6.00676277099712e-07, "loss": 0.2097, "step": 26027 }, { "epoch": 0.45243268612352033, "grad_norm": 1.6311092710952668, "learning_rate": 6.006487041346245e-07, "loss": 0.3294, "step": 26028 }, { "epoch": 0.45245006866102316, "grad_norm": 1.546717660156369, "learning_rate": 6.006211308505269e-07, "loss": 0.2769, "step": 26029 }, { "epoch": 0.45246745119852594, "grad_norm": 2.3401984119403174, "learning_rate": 6.005935572475066e-07, "loss": 0.2641, "step": 26030 }, { "epoch": 0.45248483373602877, "grad_norm": 1.376221056682981, "learning_rate": 6.005659833256508e-07, "loss": 0.1848, "step": 26031 }, { "epoch": 0.4525022162735316, "grad_norm": 1.5475743967278501, "learning_rate": 6.005384090850471e-07, "loss": 0.2694, "step": 26032 }, { "epoch": 0.45251959881103443, "grad_norm": 1.3326438891083583, "learning_rate": 6.005108345257828e-07, "loss": 0.4105, "step": 26033 }, { "epoch": 0.45253698134853726, "grad_norm": 1.3065638325793156, "learning_rate": 6.004832596479453e-07, "loss": 0.2624, "step": 26034 }, { "epoch": 0.4525543638860401, "grad_norm": 1.9283480977902767, "learning_rate": 6.00455684451622e-07, "loss": 0.2977, "step": 26035 }, { "epoch": 0.4525717464235429, "grad_norm": 1.4847039642072173, "learning_rate": 6.004281089369005e-07, "loss": 0.2897, "step": 26036 }, { "epoch": 0.45258912896104575, "grad_norm": 1.5173564738245577, "learning_rate": 6.004005331038679e-07, "loss": 0.2548, "step": 26037 }, { "epoch": 0.4526065114985486, "grad_norm": 0.9683775126499699, "learning_rate": 6.003729569526117e-07, "loss": 0.207, "step": 26038 }, { "epoch": 0.45262389403605136, "grad_norm": 2.3434405826322497, "learning_rate": 6.003453804832193e-07, "loss": 0.2206, "step": 26039 }, { "epoch": 0.4526412765735542, "grad_norm": 1.5464936598760384, "learning_rate": 6.003178036957784e-07, "loss": 0.2124, "step": 26040 }, { "epoch": 0.452658659111057, "grad_norm": 1.657247018817, "learning_rate": 6.002902265903757e-07, "loss": 0.4365, "step": 26041 }, { "epoch": 0.45267604164855985, "grad_norm": 2.4560222690075584, "learning_rate": 6.002626491670995e-07, "loss": 0.3219, "step": 26042 }, { "epoch": 0.4526934241860627, "grad_norm": 1.6295223100940386, "learning_rate": 6.002350714260364e-07, "loss": 0.2162, "step": 26043 }, { "epoch": 0.4527108067235655, "grad_norm": 1.036146759050175, "learning_rate": 6.002074933672745e-07, "loss": 0.1621, "step": 26044 }, { "epoch": 0.45272818926106834, "grad_norm": 1.2674513799092704, "learning_rate": 6.001799149909007e-07, "loss": 0.32, "step": 26045 }, { "epoch": 0.45274557179857117, "grad_norm": 1.9957318060959, "learning_rate": 6.001523362970027e-07, "loss": 0.3397, "step": 26046 }, { "epoch": 0.452762954336074, "grad_norm": 2.392866685652925, "learning_rate": 6.001247572856676e-07, "loss": 0.2673, "step": 26047 }, { "epoch": 0.4527803368735768, "grad_norm": 3.254993532570801, "learning_rate": 6.000971779569831e-07, "loss": 0.3671, "step": 26048 }, { "epoch": 0.4527977194110796, "grad_norm": 1.0106261672110814, "learning_rate": 6.000695983110366e-07, "loss": 0.3742, "step": 26049 }, { "epoch": 0.45281510194858243, "grad_norm": 1.3493636531882853, "learning_rate": 6.000420183479153e-07, "loss": 0.2326, "step": 26050 }, { "epoch": 0.45283248448608526, "grad_norm": 2.5514529248312035, "learning_rate": 6.000144380677069e-07, "loss": 0.3264, "step": 26051 }, { "epoch": 0.4528498670235881, "grad_norm": 1.845821290660654, "learning_rate": 5.999868574704985e-07, "loss": 0.2668, "step": 26052 }, { "epoch": 0.4528672495610909, "grad_norm": 2.114859235175918, "learning_rate": 5.999592765563779e-07, "loss": 0.3399, "step": 26053 }, { "epoch": 0.45288463209859375, "grad_norm": 1.4265537878666625, "learning_rate": 5.999316953254322e-07, "loss": 0.322, "step": 26054 }, { "epoch": 0.4529020146360966, "grad_norm": 4.431288064320123, "learning_rate": 5.99904113777749e-07, "loss": 0.3089, "step": 26055 }, { "epoch": 0.4529193971735994, "grad_norm": 2.007031031412658, "learning_rate": 5.998765319134155e-07, "loss": 0.2982, "step": 26056 }, { "epoch": 0.45293677971110224, "grad_norm": 3.1706254783401793, "learning_rate": 5.998489497325195e-07, "loss": 0.3867, "step": 26057 }, { "epoch": 0.4529541622486051, "grad_norm": 1.2114494835797278, "learning_rate": 5.99821367235148e-07, "loss": 0.2242, "step": 26058 }, { "epoch": 0.45297154478610785, "grad_norm": 4.854187027910215, "learning_rate": 5.997937844213887e-07, "loss": 0.2883, "step": 26059 }, { "epoch": 0.4529889273236107, "grad_norm": 1.0130018499519502, "learning_rate": 5.997662012913288e-07, "loss": 0.123, "step": 26060 }, { "epoch": 0.4530063098611135, "grad_norm": 2.455431247518237, "learning_rate": 5.997386178450562e-07, "loss": 0.2116, "step": 26061 }, { "epoch": 0.45302369239861634, "grad_norm": 1.4284609081856219, "learning_rate": 5.997110340826577e-07, "loss": 0.2832, "step": 26062 }, { "epoch": 0.45304107493611917, "grad_norm": 1.5893029618643273, "learning_rate": 5.996834500042211e-07, "loss": 0.2177, "step": 26063 }, { "epoch": 0.453058457473622, "grad_norm": 1.5391730858430992, "learning_rate": 5.996558656098338e-07, "loss": 0.38, "step": 26064 }, { "epoch": 0.45307584001112483, "grad_norm": 1.9213301492887531, "learning_rate": 5.996282808995831e-07, "loss": 0.3863, "step": 26065 }, { "epoch": 0.45309322254862766, "grad_norm": 1.6878989348336813, "learning_rate": 5.996006958735566e-07, "loss": 0.2335, "step": 26066 }, { "epoch": 0.4531106050861305, "grad_norm": 1.0842491167673611, "learning_rate": 5.995731105318417e-07, "loss": 0.1804, "step": 26067 }, { "epoch": 0.4531279876236333, "grad_norm": 2.181839865337748, "learning_rate": 5.995455248745257e-07, "loss": 0.29, "step": 26068 }, { "epoch": 0.4531453701611361, "grad_norm": 1.853064316264154, "learning_rate": 5.995179389016961e-07, "loss": 0.188, "step": 26069 }, { "epoch": 0.4531627526986389, "grad_norm": 1.4938196313352703, "learning_rate": 5.994903526134404e-07, "loss": 0.4278, "step": 26070 }, { "epoch": 0.45318013523614176, "grad_norm": 2.70107009945821, "learning_rate": 5.99462766009846e-07, "loss": 0.3298, "step": 26071 }, { "epoch": 0.4531975177736446, "grad_norm": 1.7185866626220403, "learning_rate": 5.994351790910002e-07, "loss": 0.4076, "step": 26072 }, { "epoch": 0.4532149003111474, "grad_norm": 1.7135861732156903, "learning_rate": 5.994075918569906e-07, "loss": 0.3758, "step": 26073 }, { "epoch": 0.45323228284865025, "grad_norm": 1.6309168252814799, "learning_rate": 5.993800043079047e-07, "loss": 0.2791, "step": 26074 }, { "epoch": 0.4532496653861531, "grad_norm": 1.4680914845518276, "learning_rate": 5.993524164438297e-07, "loss": 0.2654, "step": 26075 }, { "epoch": 0.4532670479236559, "grad_norm": 1.486582932025478, "learning_rate": 5.993248282648532e-07, "loss": 0.2494, "step": 26076 }, { "epoch": 0.45328443046115874, "grad_norm": 2.0761158995625575, "learning_rate": 5.992972397710628e-07, "loss": 0.3139, "step": 26077 }, { "epoch": 0.45330181299866157, "grad_norm": 1.659420126169153, "learning_rate": 5.992696509625455e-07, "loss": 0.3154, "step": 26078 }, { "epoch": 0.45331919553616434, "grad_norm": 2.9255039511643672, "learning_rate": 5.992420618393892e-07, "loss": 0.1746, "step": 26079 }, { "epoch": 0.4533365780736672, "grad_norm": 1.4619156392415729, "learning_rate": 5.992144724016812e-07, "loss": 0.245, "step": 26080 }, { "epoch": 0.45335396061117, "grad_norm": 1.8382190297644991, "learning_rate": 5.991868826495088e-07, "loss": 0.2882, "step": 26081 }, { "epoch": 0.45337134314867283, "grad_norm": 1.5996363975894252, "learning_rate": 5.991592925829594e-07, "loss": 0.1453, "step": 26082 }, { "epoch": 0.45338872568617566, "grad_norm": 1.0311512700681624, "learning_rate": 5.991317022021207e-07, "loss": 0.1869, "step": 26083 }, { "epoch": 0.4534061082236785, "grad_norm": 1.7695049588064098, "learning_rate": 5.991041115070802e-07, "loss": 0.2237, "step": 26084 }, { "epoch": 0.4534234907611813, "grad_norm": 1.1026821738399366, "learning_rate": 5.99076520497925e-07, "loss": 0.3161, "step": 26085 }, { "epoch": 0.45344087329868416, "grad_norm": 2.9420421351793262, "learning_rate": 5.990489291747428e-07, "loss": 0.3492, "step": 26086 }, { "epoch": 0.453458255836187, "grad_norm": 1.2857943433788388, "learning_rate": 5.990213375376209e-07, "loss": 0.2383, "step": 26087 }, { "epoch": 0.4534756383736898, "grad_norm": 1.9509151870246688, "learning_rate": 5.98993745586647e-07, "loss": 0.3554, "step": 26088 }, { "epoch": 0.4534930209111926, "grad_norm": 1.3767165339586176, "learning_rate": 5.989661533219084e-07, "loss": 0.2656, "step": 26089 }, { "epoch": 0.4535104034486954, "grad_norm": 1.335856751566074, "learning_rate": 5.989385607434925e-07, "loss": 0.4653, "step": 26090 }, { "epoch": 0.45352778598619825, "grad_norm": 1.715420978251398, "learning_rate": 5.989109678514867e-07, "loss": 0.2096, "step": 26091 }, { "epoch": 0.4535451685237011, "grad_norm": 1.8954504033592334, "learning_rate": 5.988833746459787e-07, "loss": 0.3429, "step": 26092 }, { "epoch": 0.4535625510612039, "grad_norm": 3.2947492071926763, "learning_rate": 5.988557811270558e-07, "loss": 0.5983, "step": 26093 }, { "epoch": 0.45357993359870674, "grad_norm": 2.6225132739578605, "learning_rate": 5.988281872948055e-07, "loss": 0.2278, "step": 26094 }, { "epoch": 0.45359731613620957, "grad_norm": 3.353173094297831, "learning_rate": 5.988005931493152e-07, "loss": 0.2208, "step": 26095 }, { "epoch": 0.4536146986737124, "grad_norm": 3.174341086977462, "learning_rate": 5.987729986906725e-07, "loss": 0.3907, "step": 26096 }, { "epoch": 0.45363208121121523, "grad_norm": 1.7269794524784021, "learning_rate": 5.987454039189647e-07, "loss": 0.4135, "step": 26097 }, { "epoch": 0.45364946374871806, "grad_norm": 1.1984595326759346, "learning_rate": 5.987178088342794e-07, "loss": 0.3408, "step": 26098 }, { "epoch": 0.45366684628622084, "grad_norm": 0.8654385251668317, "learning_rate": 5.986902134367039e-07, "loss": 0.1712, "step": 26099 }, { "epoch": 0.45368422882372367, "grad_norm": 1.349707178353407, "learning_rate": 5.986626177263258e-07, "loss": 0.2173, "step": 26100 }, { "epoch": 0.4537016113612265, "grad_norm": 1.885825866150273, "learning_rate": 5.986350217032324e-07, "loss": 0.274, "step": 26101 }, { "epoch": 0.45371899389872933, "grad_norm": 1.5755450320840911, "learning_rate": 5.986074253675115e-07, "loss": 0.2704, "step": 26102 }, { "epoch": 0.45373637643623216, "grad_norm": 1.5507295102047474, "learning_rate": 5.985798287192502e-07, "loss": 0.211, "step": 26103 }, { "epoch": 0.453753758973735, "grad_norm": 1.1782065609696444, "learning_rate": 5.985522317585362e-07, "loss": 0.2556, "step": 26104 }, { "epoch": 0.4537711415112378, "grad_norm": 1.878908450689087, "learning_rate": 5.985246344854569e-07, "loss": 0.2267, "step": 26105 }, { "epoch": 0.45378852404874065, "grad_norm": 2.0966283886178254, "learning_rate": 5.984970369000998e-07, "loss": 0.4058, "step": 26106 }, { "epoch": 0.4538059065862435, "grad_norm": 1.7211938456301927, "learning_rate": 5.984694390025522e-07, "loss": 0.2093, "step": 26107 }, { "epoch": 0.4538232891237463, "grad_norm": 2.9173800881726786, "learning_rate": 5.984418407929018e-07, "loss": 0.3797, "step": 26108 }, { "epoch": 0.4538406716612491, "grad_norm": 1.80966701024711, "learning_rate": 5.98414242271236e-07, "loss": 0.2632, "step": 26109 }, { "epoch": 0.4538580541987519, "grad_norm": 1.512022753979146, "learning_rate": 5.983866434376423e-07, "loss": 0.3587, "step": 26110 }, { "epoch": 0.45387543673625474, "grad_norm": 1.8845992168380417, "learning_rate": 5.983590442922081e-07, "loss": 0.2534, "step": 26111 }, { "epoch": 0.4538928192737576, "grad_norm": 1.6531882085472942, "learning_rate": 5.983314448350207e-07, "loss": 0.2773, "step": 26112 }, { "epoch": 0.4539102018112604, "grad_norm": 1.6399786712541014, "learning_rate": 5.98303845066168e-07, "loss": 0.299, "step": 26113 }, { "epoch": 0.45392758434876324, "grad_norm": 1.1082484018052958, "learning_rate": 5.982762449857374e-07, "loss": 0.1982, "step": 26114 }, { "epoch": 0.45394496688626607, "grad_norm": 2.025291272363811, "learning_rate": 5.98248644593816e-07, "loss": 0.2457, "step": 26115 }, { "epoch": 0.4539623494237689, "grad_norm": 1.492573719315102, "learning_rate": 5.982210438904915e-07, "loss": 0.1841, "step": 26116 }, { "epoch": 0.4539797319612717, "grad_norm": 2.6604176938456052, "learning_rate": 5.981934428758516e-07, "loss": 0.2555, "step": 26117 }, { "epoch": 0.45399711449877456, "grad_norm": 4.864629066033277, "learning_rate": 5.981658415499836e-07, "loss": 0.5571, "step": 26118 }, { "epoch": 0.45401449703627733, "grad_norm": 1.1838489284462324, "learning_rate": 5.981382399129749e-07, "loss": 0.1789, "step": 26119 }, { "epoch": 0.45403187957378016, "grad_norm": 1.9368149677860218, "learning_rate": 5.98110637964913e-07, "loss": 0.3233, "step": 26120 }, { "epoch": 0.454049262111283, "grad_norm": 2.65961483461155, "learning_rate": 5.980830357058857e-07, "loss": 0.3174, "step": 26121 }, { "epoch": 0.4540666446487858, "grad_norm": 1.912613995164332, "learning_rate": 5.9805543313598e-07, "loss": 0.2162, "step": 26122 }, { "epoch": 0.45408402718628865, "grad_norm": 0.9883426414510553, "learning_rate": 5.980278302552835e-07, "loss": 0.2775, "step": 26123 }, { "epoch": 0.4541014097237915, "grad_norm": 1.3851832357098455, "learning_rate": 5.980002270638841e-07, "loss": 0.2238, "step": 26124 }, { "epoch": 0.4541187922612943, "grad_norm": 1.3045205279715857, "learning_rate": 5.979726235618689e-07, "loss": 0.2151, "step": 26125 }, { "epoch": 0.45413617479879714, "grad_norm": 2.3736279047795237, "learning_rate": 5.979450197493253e-07, "loss": 0.2739, "step": 26126 }, { "epoch": 0.4541535573363, "grad_norm": 3.5585013735597593, "learning_rate": 5.979174156263413e-07, "loss": 0.4192, "step": 26127 }, { "epoch": 0.4541709398738028, "grad_norm": 1.5814455126620544, "learning_rate": 5.978898111930038e-07, "loss": 0.1759, "step": 26128 }, { "epoch": 0.4541883224113056, "grad_norm": 2.8635041081614756, "learning_rate": 5.978622064494007e-07, "loss": 0.3231, "step": 26129 }, { "epoch": 0.4542057049488084, "grad_norm": 1.6169008232959834, "learning_rate": 5.978346013956194e-07, "loss": 0.572, "step": 26130 }, { "epoch": 0.45422308748631124, "grad_norm": 2.421850826612837, "learning_rate": 5.978069960317472e-07, "loss": 0.3038, "step": 26131 }, { "epoch": 0.45424047002381407, "grad_norm": 1.4559266061597458, "learning_rate": 5.977793903578719e-07, "loss": 0.218, "step": 26132 }, { "epoch": 0.4542578525613169, "grad_norm": 3.7350360340073085, "learning_rate": 5.977517843740807e-07, "loss": 0.1567, "step": 26133 }, { "epoch": 0.45427523509881973, "grad_norm": 1.6460245131744926, "learning_rate": 5.977241780804614e-07, "loss": 0.3311, "step": 26134 }, { "epoch": 0.45429261763632256, "grad_norm": 1.9189557608792986, "learning_rate": 5.976965714771013e-07, "loss": 0.3044, "step": 26135 }, { "epoch": 0.4543100001738254, "grad_norm": 1.6403811892717848, "learning_rate": 5.976689645640878e-07, "loss": 0.2719, "step": 26136 }, { "epoch": 0.4543273827113282, "grad_norm": 2.7780779985368227, "learning_rate": 5.976413573415086e-07, "loss": 0.335, "step": 26137 }, { "epoch": 0.45434476524883105, "grad_norm": 1.6793737557078494, "learning_rate": 5.976137498094514e-07, "loss": 0.4032, "step": 26138 }, { "epoch": 0.4543621477863338, "grad_norm": 1.7637638644171643, "learning_rate": 5.975861419680033e-07, "loss": 0.2158, "step": 26139 }, { "epoch": 0.45437953032383666, "grad_norm": 1.5702156861914187, "learning_rate": 5.975585338172519e-07, "loss": 0.1897, "step": 26140 }, { "epoch": 0.4543969128613395, "grad_norm": 1.7552251239374403, "learning_rate": 5.975309253572848e-07, "loss": 0.2238, "step": 26141 }, { "epoch": 0.4544142953988423, "grad_norm": 1.4920015586390063, "learning_rate": 5.975033165881894e-07, "loss": 0.2807, "step": 26142 }, { "epoch": 0.45443167793634515, "grad_norm": 1.0077212155779218, "learning_rate": 5.974757075100533e-07, "loss": 0.2132, "step": 26143 }, { "epoch": 0.454449060473848, "grad_norm": 1.7224978199012495, "learning_rate": 5.974480981229642e-07, "loss": 0.1966, "step": 26144 }, { "epoch": 0.4544664430113508, "grad_norm": 2.1461753888006565, "learning_rate": 5.974204884270091e-07, "loss": 0.4042, "step": 26145 }, { "epoch": 0.45448382554885364, "grad_norm": 1.2305322443719622, "learning_rate": 5.973928784222761e-07, "loss": 0.2212, "step": 26146 }, { "epoch": 0.45450120808635647, "grad_norm": 2.2381864573179318, "learning_rate": 5.97365268108852e-07, "loss": 0.2278, "step": 26147 }, { "epoch": 0.4545185906238593, "grad_norm": 2.455483402085337, "learning_rate": 5.973376574868251e-07, "loss": 0.2155, "step": 26148 }, { "epoch": 0.4545359731613621, "grad_norm": 4.616186376722502, "learning_rate": 5.973100465562823e-07, "loss": 0.3245, "step": 26149 }, { "epoch": 0.4545533556988649, "grad_norm": 1.5947564512300023, "learning_rate": 5.972824353173116e-07, "loss": 0.3781, "step": 26150 }, { "epoch": 0.45457073823636773, "grad_norm": 1.453700057539345, "learning_rate": 5.972548237700001e-07, "loss": 0.2137, "step": 26151 }, { "epoch": 0.45458812077387056, "grad_norm": 1.3362991809333928, "learning_rate": 5.972272119144354e-07, "loss": 0.2164, "step": 26152 }, { "epoch": 0.4546055033113734, "grad_norm": 2.240569810609084, "learning_rate": 5.971995997507052e-07, "loss": 0.3156, "step": 26153 }, { "epoch": 0.4546228858488762, "grad_norm": 1.6170539527190824, "learning_rate": 5.971719872788969e-07, "loss": 0.3557, "step": 26154 }, { "epoch": 0.45464026838637905, "grad_norm": 1.1483355446175667, "learning_rate": 5.971443744990981e-07, "loss": 0.1729, "step": 26155 }, { "epoch": 0.4546576509238819, "grad_norm": 1.1870155779504747, "learning_rate": 5.971167614113964e-07, "loss": 0.3205, "step": 26156 }, { "epoch": 0.4546750334613847, "grad_norm": 1.6368870574187213, "learning_rate": 5.970891480158789e-07, "loss": 0.2689, "step": 26157 }, { "epoch": 0.45469241599888754, "grad_norm": 1.9221178283125393, "learning_rate": 5.970615343126334e-07, "loss": 0.3248, "step": 26158 }, { "epoch": 0.4547097985363903, "grad_norm": 1.0581653873633166, "learning_rate": 5.970339203017476e-07, "loss": 0.2191, "step": 26159 }, { "epoch": 0.45472718107389315, "grad_norm": 2.0381643192492964, "learning_rate": 5.970063059833089e-07, "loss": 0.1985, "step": 26160 }, { "epoch": 0.454744563611396, "grad_norm": 1.9437162160919486, "learning_rate": 5.969786913574044e-07, "loss": 0.2245, "step": 26161 }, { "epoch": 0.4547619461488988, "grad_norm": 1.5748011766310548, "learning_rate": 5.969510764241224e-07, "loss": 0.3917, "step": 26162 }, { "epoch": 0.45477932868640164, "grad_norm": 2.2955259534936157, "learning_rate": 5.969234611835497e-07, "loss": 0.5025, "step": 26163 }, { "epoch": 0.45479671122390447, "grad_norm": 2.4197579336582837, "learning_rate": 5.968958456357745e-07, "loss": 0.3171, "step": 26164 }, { "epoch": 0.4548140937614073, "grad_norm": 1.359047187332719, "learning_rate": 5.968682297808837e-07, "loss": 0.1969, "step": 26165 }, { "epoch": 0.45483147629891013, "grad_norm": 1.3744888180586439, "learning_rate": 5.968406136189652e-07, "loss": 0.1653, "step": 26166 }, { "epoch": 0.45484885883641296, "grad_norm": 1.781106592414853, "learning_rate": 5.968129971501064e-07, "loss": 0.2707, "step": 26167 }, { "epoch": 0.4548662413739158, "grad_norm": 1.3273206477921753, "learning_rate": 5.96785380374395e-07, "loss": 0.1435, "step": 26168 }, { "epoch": 0.45488362391141857, "grad_norm": 2.3351780715153208, "learning_rate": 5.967577632919183e-07, "loss": 0.3753, "step": 26169 }, { "epoch": 0.4549010064489214, "grad_norm": 1.232375449519919, "learning_rate": 5.967301459027639e-07, "loss": 0.2893, "step": 26170 }, { "epoch": 0.4549183889864242, "grad_norm": 1.9304915848628195, "learning_rate": 5.967025282070195e-07, "loss": 0.2155, "step": 26171 }, { "epoch": 0.45493577152392706, "grad_norm": 1.5134137170081612, "learning_rate": 5.966749102047723e-07, "loss": 0.4051, "step": 26172 }, { "epoch": 0.4549531540614299, "grad_norm": 2.007555558598777, "learning_rate": 5.966472918961102e-07, "loss": 0.3006, "step": 26173 }, { "epoch": 0.4549705365989327, "grad_norm": 1.461917682354608, "learning_rate": 5.966196732811206e-07, "loss": 0.3349, "step": 26174 }, { "epoch": 0.45498791913643555, "grad_norm": 1.6486450687695504, "learning_rate": 5.96592054359891e-07, "loss": 0.1882, "step": 26175 }, { "epoch": 0.4550053016739384, "grad_norm": 1.6577669503534775, "learning_rate": 5.965644351325088e-07, "loss": 0.2353, "step": 26176 }, { "epoch": 0.4550226842114412, "grad_norm": 2.4275361349080575, "learning_rate": 5.965368155990619e-07, "loss": 0.1982, "step": 26177 }, { "epoch": 0.455040066748944, "grad_norm": 3.124527534567132, "learning_rate": 5.965091957596375e-07, "loss": 0.2813, "step": 26178 }, { "epoch": 0.4550574492864468, "grad_norm": 1.8009761925913503, "learning_rate": 5.964815756143235e-07, "loss": 0.2714, "step": 26179 }, { "epoch": 0.45507483182394964, "grad_norm": 2.0930564525519384, "learning_rate": 5.964539551632069e-07, "loss": 0.2095, "step": 26180 }, { "epoch": 0.4550922143614525, "grad_norm": 1.4892822286805383, "learning_rate": 5.96426334406376e-07, "loss": 0.2985, "step": 26181 }, { "epoch": 0.4551095968989553, "grad_norm": 1.7180140873058134, "learning_rate": 5.963987133439174e-07, "loss": 0.2247, "step": 26182 }, { "epoch": 0.45512697943645813, "grad_norm": 1.8159297041821905, "learning_rate": 5.963710919759195e-07, "loss": 0.6044, "step": 26183 }, { "epoch": 0.45514436197396096, "grad_norm": 1.3982785324081335, "learning_rate": 5.963434703024695e-07, "loss": 0.1898, "step": 26184 }, { "epoch": 0.4551617445114638, "grad_norm": 2.60139694002768, "learning_rate": 5.963158483236549e-07, "loss": 0.432, "step": 26185 }, { "epoch": 0.4551791270489666, "grad_norm": 1.8013051277011416, "learning_rate": 5.962882260395632e-07, "loss": 0.2421, "step": 26186 }, { "epoch": 0.45519650958646946, "grad_norm": 1.5129816301393435, "learning_rate": 5.962606034502823e-07, "loss": 0.2719, "step": 26187 }, { "epoch": 0.45521389212397223, "grad_norm": 1.5642442173824473, "learning_rate": 5.962329805558992e-07, "loss": 0.445, "step": 26188 }, { "epoch": 0.45523127466147506, "grad_norm": 1.9766366162827524, "learning_rate": 5.962053573565019e-07, "loss": 0.2711, "step": 26189 }, { "epoch": 0.4552486571989779, "grad_norm": 1.2692176671098696, "learning_rate": 5.961777338521778e-07, "loss": 0.2484, "step": 26190 }, { "epoch": 0.4552660397364807, "grad_norm": 2.128479322947186, "learning_rate": 5.961501100430146e-07, "loss": 0.4921, "step": 26191 }, { "epoch": 0.45528342227398355, "grad_norm": 1.506374179577158, "learning_rate": 5.961224859290996e-07, "loss": 0.3692, "step": 26192 }, { "epoch": 0.4553008048114864, "grad_norm": 4.423538060280702, "learning_rate": 5.960948615105203e-07, "loss": 0.3579, "step": 26193 }, { "epoch": 0.4553181873489892, "grad_norm": 1.3353118535663333, "learning_rate": 5.960672367873647e-07, "loss": 0.2396, "step": 26194 }, { "epoch": 0.45533556988649204, "grad_norm": 4.221161512036903, "learning_rate": 5.9603961175972e-07, "loss": 0.2306, "step": 26195 }, { "epoch": 0.4553529524239949, "grad_norm": 2.0443358054450784, "learning_rate": 5.960119864276738e-07, "loss": 0.4413, "step": 26196 }, { "epoch": 0.4553703349614977, "grad_norm": 1.5456440754933125, "learning_rate": 5.959843607913138e-07, "loss": 0.1786, "step": 26197 }, { "epoch": 0.4553877174990005, "grad_norm": 1.4562156310352228, "learning_rate": 5.959567348507273e-07, "loss": 0.3227, "step": 26198 }, { "epoch": 0.4554051000365033, "grad_norm": 1.6814622523456946, "learning_rate": 5.959291086060023e-07, "loss": 0.2514, "step": 26199 }, { "epoch": 0.45542248257400614, "grad_norm": 1.6186598024211736, "learning_rate": 5.959014820572259e-07, "loss": 0.1743, "step": 26200 }, { "epoch": 0.45543986511150897, "grad_norm": 1.426132427746257, "learning_rate": 5.958738552044858e-07, "loss": 0.1974, "step": 26201 }, { "epoch": 0.4554572476490118, "grad_norm": 2.7658064056568175, "learning_rate": 5.958462280478698e-07, "loss": 0.5643, "step": 26202 }, { "epoch": 0.45547463018651463, "grad_norm": 1.4244464168243105, "learning_rate": 5.958186005874652e-07, "loss": 0.2855, "step": 26203 }, { "epoch": 0.45549201272401746, "grad_norm": 1.8514402084680763, "learning_rate": 5.957909728233597e-07, "loss": 0.2639, "step": 26204 }, { "epoch": 0.4555093952615203, "grad_norm": 2.452715657192354, "learning_rate": 5.957633447556407e-07, "loss": 0.3892, "step": 26205 }, { "epoch": 0.4555267777990231, "grad_norm": 1.731738566314502, "learning_rate": 5.957357163843961e-07, "loss": 0.4122, "step": 26206 }, { "epoch": 0.45554416033652595, "grad_norm": 2.2377293472659106, "learning_rate": 5.95708087709713e-07, "loss": 0.2236, "step": 26207 }, { "epoch": 0.4555615428740287, "grad_norm": 1.6715287793916251, "learning_rate": 5.956804587316794e-07, "loss": 0.2476, "step": 26208 }, { "epoch": 0.45557892541153155, "grad_norm": 1.3631012866611845, "learning_rate": 5.956528294503826e-07, "loss": 0.2195, "step": 26209 }, { "epoch": 0.4555963079490344, "grad_norm": 2.434638330626294, "learning_rate": 5.956251998659103e-07, "loss": 0.3504, "step": 26210 }, { "epoch": 0.4556136904865372, "grad_norm": 5.009382229171013, "learning_rate": 5.9559756997835e-07, "loss": 0.4242, "step": 26211 }, { "epoch": 0.45563107302404005, "grad_norm": 3.5566003976152745, "learning_rate": 5.955699397877893e-07, "loss": 0.3293, "step": 26212 }, { "epoch": 0.4556484555615429, "grad_norm": 0.9380039144430169, "learning_rate": 5.95542309294316e-07, "loss": 0.1523, "step": 26213 }, { "epoch": 0.4556658380990457, "grad_norm": 1.6500756062036532, "learning_rate": 5.955146784980174e-07, "loss": 0.2506, "step": 26214 }, { "epoch": 0.45568322063654854, "grad_norm": 1.5655642128220275, "learning_rate": 5.95487047398981e-07, "loss": 0.2119, "step": 26215 }, { "epoch": 0.45570060317405137, "grad_norm": 1.7090161386495315, "learning_rate": 5.954594159972947e-07, "loss": 0.1692, "step": 26216 }, { "epoch": 0.4557179857115542, "grad_norm": 1.6125072551985078, "learning_rate": 5.954317842930456e-07, "loss": 0.2352, "step": 26217 }, { "epoch": 0.45573536824905697, "grad_norm": 1.8706409480416273, "learning_rate": 5.954041522863219e-07, "loss": 0.3024, "step": 26218 }, { "epoch": 0.4557527507865598, "grad_norm": 1.5060820398865877, "learning_rate": 5.953765199772107e-07, "loss": 0.2345, "step": 26219 }, { "epoch": 0.45577013332406263, "grad_norm": 1.2208404482080293, "learning_rate": 5.953488873657998e-07, "loss": 0.149, "step": 26220 }, { "epoch": 0.45578751586156546, "grad_norm": 2.1603589487649786, "learning_rate": 5.953212544521766e-07, "loss": 0.3736, "step": 26221 }, { "epoch": 0.4558048983990683, "grad_norm": 1.8897090327696824, "learning_rate": 5.952936212364291e-07, "loss": 0.2412, "step": 26222 }, { "epoch": 0.4558222809365711, "grad_norm": 1.7295731989098713, "learning_rate": 5.952659877186444e-07, "loss": 0.2244, "step": 26223 }, { "epoch": 0.45583966347407395, "grad_norm": 1.540030401284462, "learning_rate": 5.952383538989103e-07, "loss": 0.2169, "step": 26224 }, { "epoch": 0.4558570460115768, "grad_norm": 1.7849124310953584, "learning_rate": 5.952107197773144e-07, "loss": 0.2243, "step": 26225 }, { "epoch": 0.4558744285490796, "grad_norm": 2.3809676974523177, "learning_rate": 5.951830853539442e-07, "loss": 0.2738, "step": 26226 }, { "epoch": 0.45589181108658244, "grad_norm": 1.6755139472481688, "learning_rate": 5.951554506288872e-07, "loss": 0.1918, "step": 26227 }, { "epoch": 0.4559091936240852, "grad_norm": 2.6480522314298898, "learning_rate": 5.951278156022314e-07, "loss": 0.411, "step": 26228 }, { "epoch": 0.45592657616158805, "grad_norm": 1.5337065401255525, "learning_rate": 5.95100180274064e-07, "loss": 0.1764, "step": 26229 }, { "epoch": 0.4559439586990909, "grad_norm": 1.9404150047809703, "learning_rate": 5.950725446444726e-07, "loss": 0.192, "step": 26230 }, { "epoch": 0.4559613412365937, "grad_norm": 1.9895935270012486, "learning_rate": 5.950449087135453e-07, "loss": 0.2116, "step": 26231 }, { "epoch": 0.45597872377409654, "grad_norm": 1.0714618054499216, "learning_rate": 5.950172724813689e-07, "loss": 0.1795, "step": 26232 }, { "epoch": 0.45599610631159937, "grad_norm": 2.6352655068159985, "learning_rate": 5.949896359480316e-07, "loss": 0.397, "step": 26233 }, { "epoch": 0.4560134888491022, "grad_norm": 2.0659054824969028, "learning_rate": 5.949619991136207e-07, "loss": 0.2655, "step": 26234 }, { "epoch": 0.45603087138660503, "grad_norm": 1.9760472964627507, "learning_rate": 5.94934361978224e-07, "loss": 0.3045, "step": 26235 }, { "epoch": 0.45604825392410786, "grad_norm": 1.5459128122000387, "learning_rate": 5.949067245419288e-07, "loss": 0.2099, "step": 26236 }, { "epoch": 0.4560656364616107, "grad_norm": 1.3638698239579656, "learning_rate": 5.948790868048229e-07, "loss": 0.2818, "step": 26237 }, { "epoch": 0.45608301899911347, "grad_norm": 1.5169526761473002, "learning_rate": 5.94851448766994e-07, "loss": 0.3445, "step": 26238 }, { "epoch": 0.4561004015366163, "grad_norm": 1.384780300767386, "learning_rate": 5.948238104285296e-07, "loss": 0.2077, "step": 26239 }, { "epoch": 0.4561177840741191, "grad_norm": 1.1889637607000447, "learning_rate": 5.947961717895172e-07, "loss": 0.1966, "step": 26240 }, { "epoch": 0.45613516661162196, "grad_norm": 2.2242565961740883, "learning_rate": 5.947685328500445e-07, "loss": 0.4, "step": 26241 }, { "epoch": 0.4561525491491248, "grad_norm": 1.4022062791718046, "learning_rate": 5.947408936101989e-07, "loss": 0.3355, "step": 26242 }, { "epoch": 0.4561699316866276, "grad_norm": 3.7263358702970866, "learning_rate": 5.947132540700685e-07, "loss": 0.3703, "step": 26243 }, { "epoch": 0.45618731422413045, "grad_norm": 1.1628224234452555, "learning_rate": 5.946856142297404e-07, "loss": 0.3945, "step": 26244 }, { "epoch": 0.4562046967616333, "grad_norm": 0.912615613445995, "learning_rate": 5.946579740893025e-07, "loss": 0.1966, "step": 26245 }, { "epoch": 0.4562220792991361, "grad_norm": 1.4585462125965656, "learning_rate": 5.946303336488422e-07, "loss": 0.2289, "step": 26246 }, { "epoch": 0.45623946183663894, "grad_norm": 3.395958998367463, "learning_rate": 5.946026929084473e-07, "loss": 0.2904, "step": 26247 }, { "epoch": 0.4562568443741417, "grad_norm": 5.956128343260339, "learning_rate": 5.945750518682052e-07, "loss": 0.2351, "step": 26248 }, { "epoch": 0.45627422691164454, "grad_norm": 2.519117331813542, "learning_rate": 5.945474105282038e-07, "loss": 0.3312, "step": 26249 }, { "epoch": 0.4562916094491474, "grad_norm": 0.9635990609969235, "learning_rate": 5.945197688885305e-07, "loss": 0.1687, "step": 26250 }, { "epoch": 0.4563089919866502, "grad_norm": 1.6490855924470844, "learning_rate": 5.944921269492729e-07, "loss": 0.2178, "step": 26251 }, { "epoch": 0.45632637452415303, "grad_norm": 2.221966432942424, "learning_rate": 5.944644847105186e-07, "loss": 0.3624, "step": 26252 }, { "epoch": 0.45634375706165586, "grad_norm": 1.1871307407966598, "learning_rate": 5.944368421723553e-07, "loss": 0.2766, "step": 26253 }, { "epoch": 0.4563611395991587, "grad_norm": 2.9109057830035745, "learning_rate": 5.944091993348708e-07, "loss": 0.2954, "step": 26254 }, { "epoch": 0.4563785221366615, "grad_norm": 1.1062577017532962, "learning_rate": 5.943815561981522e-07, "loss": 0.241, "step": 26255 }, { "epoch": 0.45639590467416435, "grad_norm": 0.8201401913114027, "learning_rate": 5.943539127622875e-07, "loss": 0.2949, "step": 26256 }, { "epoch": 0.4564132872116672, "grad_norm": 1.27156413968651, "learning_rate": 5.943262690273644e-07, "loss": 0.4258, "step": 26257 }, { "epoch": 0.45643066974916996, "grad_norm": 3.318636206309795, "learning_rate": 5.942986249934703e-07, "loss": 0.3366, "step": 26258 }, { "epoch": 0.4564480522866728, "grad_norm": 2.2201493034168105, "learning_rate": 5.942709806606928e-07, "loss": 0.2226, "step": 26259 }, { "epoch": 0.4564654348241756, "grad_norm": 1.777180859217488, "learning_rate": 5.942433360291197e-07, "loss": 0.2771, "step": 26260 }, { "epoch": 0.45648281736167845, "grad_norm": 1.6035475974931324, "learning_rate": 5.942156910988383e-07, "loss": 0.2724, "step": 26261 }, { "epoch": 0.4565001998991813, "grad_norm": 2.8792009603189075, "learning_rate": 5.941880458699366e-07, "loss": 0.1501, "step": 26262 }, { "epoch": 0.4565175824366841, "grad_norm": 3.9547820182296034, "learning_rate": 5.941604003425021e-07, "loss": 0.456, "step": 26263 }, { "epoch": 0.45653496497418694, "grad_norm": 1.2608663460618081, "learning_rate": 5.941327545166224e-07, "loss": 0.1985, "step": 26264 }, { "epoch": 0.45655234751168977, "grad_norm": 1.3567397602985658, "learning_rate": 5.941051083923849e-07, "loss": 0.3383, "step": 26265 }, { "epoch": 0.4565697300491926, "grad_norm": 1.121688291539729, "learning_rate": 5.940774619698777e-07, "loss": 0.233, "step": 26266 }, { "epoch": 0.45658711258669543, "grad_norm": 1.3730487632701442, "learning_rate": 5.94049815249188e-07, "loss": 0.2114, "step": 26267 }, { "epoch": 0.4566044951241982, "grad_norm": 3.8332060892236037, "learning_rate": 5.940221682304037e-07, "loss": 0.3568, "step": 26268 }, { "epoch": 0.45662187766170104, "grad_norm": 1.8594996967165311, "learning_rate": 5.939945209136123e-07, "loss": 0.2254, "step": 26269 }, { "epoch": 0.45663926019920387, "grad_norm": 1.791864938724235, "learning_rate": 5.939668732989014e-07, "loss": 0.2091, "step": 26270 }, { "epoch": 0.4566566427367067, "grad_norm": 1.3145113869928284, "learning_rate": 5.939392253863585e-07, "loss": 0.304, "step": 26271 }, { "epoch": 0.4566740252742095, "grad_norm": 1.665984808738784, "learning_rate": 5.939115771760717e-07, "loss": 0.2089, "step": 26272 }, { "epoch": 0.45669140781171236, "grad_norm": 1.234243614543716, "learning_rate": 5.938839286681282e-07, "loss": 0.3256, "step": 26273 }, { "epoch": 0.4567087903492152, "grad_norm": 1.2921639906162534, "learning_rate": 5.938562798626158e-07, "loss": 0.2886, "step": 26274 }, { "epoch": 0.456726172886718, "grad_norm": 1.235767632706419, "learning_rate": 5.938286307596219e-07, "loss": 0.367, "step": 26275 }, { "epoch": 0.45674355542422085, "grad_norm": 1.9371134863263406, "learning_rate": 5.938009813592347e-07, "loss": 0.2159, "step": 26276 }, { "epoch": 0.4567609379617237, "grad_norm": 1.5300255877861502, "learning_rate": 5.937733316615413e-07, "loss": 0.2107, "step": 26277 }, { "epoch": 0.45677832049922645, "grad_norm": 1.6179428310121, "learning_rate": 5.937456816666296e-07, "loss": 0.2531, "step": 26278 }, { "epoch": 0.4567957030367293, "grad_norm": 1.464466237632097, "learning_rate": 5.937180313745871e-07, "loss": 0.4307, "step": 26279 }, { "epoch": 0.4568130855742321, "grad_norm": 1.9965778708215045, "learning_rate": 5.936903807855014e-07, "loss": 0.2799, "step": 26280 }, { "epoch": 0.45683046811173494, "grad_norm": 2.626228015188466, "learning_rate": 5.936627298994602e-07, "loss": 0.2576, "step": 26281 }, { "epoch": 0.4568478506492378, "grad_norm": 1.4207856086869388, "learning_rate": 5.936350787165513e-07, "loss": 0.256, "step": 26282 }, { "epoch": 0.4568652331867406, "grad_norm": 2.1036799829787194, "learning_rate": 5.936074272368624e-07, "loss": 0.2909, "step": 26283 }, { "epoch": 0.45688261572424343, "grad_norm": 1.7094228974167491, "learning_rate": 5.935797754604807e-07, "loss": 0.3374, "step": 26284 }, { "epoch": 0.45689999826174627, "grad_norm": 2.3911856672463876, "learning_rate": 5.935521233874941e-07, "loss": 0.3223, "step": 26285 }, { "epoch": 0.4569173807992491, "grad_norm": 2.123680721031571, "learning_rate": 5.935244710179902e-07, "loss": 0.3405, "step": 26286 }, { "epoch": 0.4569347633367519, "grad_norm": 1.8053728297671736, "learning_rate": 5.934968183520568e-07, "loss": 0.2452, "step": 26287 }, { "epoch": 0.4569521458742547, "grad_norm": 1.0563787665037216, "learning_rate": 5.934691653897814e-07, "loss": 0.3212, "step": 26288 }, { "epoch": 0.45696952841175753, "grad_norm": 4.712899365457666, "learning_rate": 5.934415121312517e-07, "loss": 0.4013, "step": 26289 }, { "epoch": 0.45698691094926036, "grad_norm": 2.040525337893982, "learning_rate": 5.934138585765552e-07, "loss": 0.2928, "step": 26290 }, { "epoch": 0.4570042934867632, "grad_norm": 1.1498860428268003, "learning_rate": 5.933862047257799e-07, "loss": 0.1204, "step": 26291 }, { "epoch": 0.457021676024266, "grad_norm": 2.7660921671490057, "learning_rate": 5.933585505790131e-07, "loss": 0.3072, "step": 26292 }, { "epoch": 0.45703905856176885, "grad_norm": 1.7167853543767928, "learning_rate": 5.933308961363427e-07, "loss": 0.1857, "step": 26293 }, { "epoch": 0.4570564410992717, "grad_norm": 1.1212516839776014, "learning_rate": 5.93303241397856e-07, "loss": 0.2474, "step": 26294 }, { "epoch": 0.4570738236367745, "grad_norm": 2.037793189622231, "learning_rate": 5.932755863636411e-07, "loss": 0.2182, "step": 26295 }, { "epoch": 0.45709120617427734, "grad_norm": 1.7463585578693839, "learning_rate": 5.932479310337853e-07, "loss": 0.2426, "step": 26296 }, { "epoch": 0.4571085887117802, "grad_norm": 1.1331388931810862, "learning_rate": 5.932202754083765e-07, "loss": 0.2382, "step": 26297 }, { "epoch": 0.45712597124928295, "grad_norm": 2.4977865497395784, "learning_rate": 5.931926194875022e-07, "loss": 0.3366, "step": 26298 }, { "epoch": 0.4571433537867858, "grad_norm": 1.3085674050933347, "learning_rate": 5.931649632712502e-07, "loss": 0.278, "step": 26299 }, { "epoch": 0.4571607363242886, "grad_norm": 2.046313788408386, "learning_rate": 5.931373067597079e-07, "loss": 0.3315, "step": 26300 }, { "epoch": 0.45717811886179144, "grad_norm": 0.9867908446470061, "learning_rate": 5.931096499529634e-07, "loss": 0.2569, "step": 26301 }, { "epoch": 0.45719550139929427, "grad_norm": 1.4456783420550168, "learning_rate": 5.930819928511037e-07, "loss": 0.2921, "step": 26302 }, { "epoch": 0.4572128839367971, "grad_norm": 1.9464582134837436, "learning_rate": 5.93054335454217e-07, "loss": 0.3809, "step": 26303 }, { "epoch": 0.45723026647429993, "grad_norm": 1.410096427249911, "learning_rate": 5.930266777623909e-07, "loss": 0.3536, "step": 26304 }, { "epoch": 0.45724764901180276, "grad_norm": 1.67556006981343, "learning_rate": 5.929990197757129e-07, "loss": 0.2854, "step": 26305 }, { "epoch": 0.4572650315493056, "grad_norm": 0.9910865931378811, "learning_rate": 5.929713614942707e-07, "loss": 0.1626, "step": 26306 }, { "epoch": 0.4572824140868084, "grad_norm": 1.4468573802345144, "learning_rate": 5.929437029181521e-07, "loss": 0.2479, "step": 26307 }, { "epoch": 0.4572997966243112, "grad_norm": 2.4683741360812967, "learning_rate": 5.929160440474445e-07, "loss": 0.2379, "step": 26308 }, { "epoch": 0.457317179161814, "grad_norm": 2.9120360471710405, "learning_rate": 5.928883848822357e-07, "loss": 0.2643, "step": 26309 }, { "epoch": 0.45733456169931685, "grad_norm": 2.6107947256103863, "learning_rate": 5.928607254226135e-07, "loss": 0.2602, "step": 26310 }, { "epoch": 0.4573519442368197, "grad_norm": 1.5400332189108352, "learning_rate": 5.928330656686654e-07, "loss": 0.2305, "step": 26311 }, { "epoch": 0.4573693267743225, "grad_norm": 3.4022930319179387, "learning_rate": 5.928054056204791e-07, "loss": 0.3515, "step": 26312 }, { "epoch": 0.45738670931182535, "grad_norm": 0.6326444188032783, "learning_rate": 5.927777452781423e-07, "loss": 0.2045, "step": 26313 }, { "epoch": 0.4574040918493282, "grad_norm": 1.8923052111472247, "learning_rate": 5.927500846417428e-07, "loss": 0.2833, "step": 26314 }, { "epoch": 0.457421474386831, "grad_norm": 1.2265185174120652, "learning_rate": 5.927224237113682e-07, "loss": 0.3312, "step": 26315 }, { "epoch": 0.45743885692433384, "grad_norm": 1.3688518672321064, "learning_rate": 5.926947624871058e-07, "loss": 0.1828, "step": 26316 }, { "epoch": 0.4574562394618366, "grad_norm": 1.7267418061489974, "learning_rate": 5.926671009690438e-07, "loss": 0.2573, "step": 26317 }, { "epoch": 0.45747362199933944, "grad_norm": 1.6480626643810865, "learning_rate": 5.926394391572695e-07, "loss": 0.2322, "step": 26318 }, { "epoch": 0.45749100453684227, "grad_norm": 1.9311736237370944, "learning_rate": 5.926117770518709e-07, "loss": 0.45, "step": 26319 }, { "epoch": 0.4575083870743451, "grad_norm": 0.9869392362563953, "learning_rate": 5.925841146529356e-07, "loss": 0.321, "step": 26320 }, { "epoch": 0.45752576961184793, "grad_norm": 3.550244504408645, "learning_rate": 5.925564519605509e-07, "loss": 0.277, "step": 26321 }, { "epoch": 0.45754315214935076, "grad_norm": 1.3881359875659054, "learning_rate": 5.925287889748049e-07, "loss": 0.2345, "step": 26322 }, { "epoch": 0.4575605346868536, "grad_norm": 1.3492513809785458, "learning_rate": 5.925011256957852e-07, "loss": 0.2214, "step": 26323 }, { "epoch": 0.4575779172243564, "grad_norm": 1.7241378298752996, "learning_rate": 5.924734621235794e-07, "loss": 0.1902, "step": 26324 }, { "epoch": 0.45759529976185925, "grad_norm": 2.5119757809682843, "learning_rate": 5.92445798258275e-07, "loss": 0.2619, "step": 26325 }, { "epoch": 0.4576126822993621, "grad_norm": 1.6162588992457125, "learning_rate": 5.924181340999602e-07, "loss": 0.2822, "step": 26326 }, { "epoch": 0.45763006483686486, "grad_norm": 1.600892965086787, "learning_rate": 5.923904696487222e-07, "loss": 0.2787, "step": 26327 }, { "epoch": 0.4576474473743677, "grad_norm": 2.6650861414089344, "learning_rate": 5.923628049046489e-07, "loss": 0.276, "step": 26328 }, { "epoch": 0.4576648299118705, "grad_norm": 2.132474188395218, "learning_rate": 5.923351398678279e-07, "loss": 0.3758, "step": 26329 }, { "epoch": 0.45768221244937335, "grad_norm": 1.966321404048438, "learning_rate": 5.92307474538347e-07, "loss": 0.2129, "step": 26330 }, { "epoch": 0.4576995949868762, "grad_norm": 1.723903305413468, "learning_rate": 5.922798089162936e-07, "loss": 0.2581, "step": 26331 }, { "epoch": 0.457716977524379, "grad_norm": 1.7257110954225412, "learning_rate": 5.922521430017558e-07, "loss": 0.2737, "step": 26332 }, { "epoch": 0.45773436006188184, "grad_norm": 1.5153450946693592, "learning_rate": 5.92224476794821e-07, "loss": 0.2054, "step": 26333 }, { "epoch": 0.45775174259938467, "grad_norm": 1.698033316316351, "learning_rate": 5.92196810295577e-07, "loss": 0.1909, "step": 26334 }, { "epoch": 0.4577691251368875, "grad_norm": 1.262963383059093, "learning_rate": 5.921691435041114e-07, "loss": 0.2285, "step": 26335 }, { "epoch": 0.45778650767439033, "grad_norm": 1.3396068261590677, "learning_rate": 5.921414764205122e-07, "loss": 0.2794, "step": 26336 }, { "epoch": 0.4578038902118931, "grad_norm": 2.1507938495102192, "learning_rate": 5.921138090448665e-07, "loss": 0.2416, "step": 26337 }, { "epoch": 0.45782127274939594, "grad_norm": 1.4668331925144238, "learning_rate": 5.920861413772626e-07, "loss": 0.3463, "step": 26338 }, { "epoch": 0.45783865528689877, "grad_norm": 1.2665754969356395, "learning_rate": 5.920584734177878e-07, "loss": 0.3768, "step": 26339 }, { "epoch": 0.4578560378244016, "grad_norm": 2.080154190678572, "learning_rate": 5.920308051665299e-07, "loss": 0.2944, "step": 26340 }, { "epoch": 0.4578734203619044, "grad_norm": 1.3976303319930121, "learning_rate": 5.920031366235766e-07, "loss": 0.257, "step": 26341 }, { "epoch": 0.45789080289940726, "grad_norm": 1.827123459686996, "learning_rate": 5.919754677890157e-07, "loss": 0.4131, "step": 26342 }, { "epoch": 0.4579081854369101, "grad_norm": 1.7527636557541304, "learning_rate": 5.919477986629348e-07, "loss": 0.4007, "step": 26343 }, { "epoch": 0.4579255679744129, "grad_norm": 1.4300165764088764, "learning_rate": 5.919201292454217e-07, "loss": 0.3013, "step": 26344 }, { "epoch": 0.45794295051191575, "grad_norm": 1.122642520372171, "learning_rate": 5.91892459536564e-07, "loss": 0.3231, "step": 26345 }, { "epoch": 0.4579603330494186, "grad_norm": 2.585465186409972, "learning_rate": 5.918647895364492e-07, "loss": 0.3001, "step": 26346 }, { "epoch": 0.45797771558692135, "grad_norm": 1.573536865700823, "learning_rate": 5.918371192451654e-07, "loss": 0.3122, "step": 26347 }, { "epoch": 0.4579950981244242, "grad_norm": 1.8534223582025917, "learning_rate": 5.918094486628003e-07, "loss": 0.2132, "step": 26348 }, { "epoch": 0.458012480661927, "grad_norm": 0.9202992769551731, "learning_rate": 5.917817777894411e-07, "loss": 0.1809, "step": 26349 }, { "epoch": 0.45802986319942984, "grad_norm": 4.192053165049477, "learning_rate": 5.917541066251759e-07, "loss": 0.395, "step": 26350 }, { "epoch": 0.4580472457369327, "grad_norm": 1.2584202076765199, "learning_rate": 5.917264351700924e-07, "loss": 0.1853, "step": 26351 }, { "epoch": 0.4580646282744355, "grad_norm": 1.5959892373706048, "learning_rate": 5.916987634242782e-07, "loss": 0.2847, "step": 26352 }, { "epoch": 0.45808201081193833, "grad_norm": 2.5838261506184925, "learning_rate": 5.916710913878212e-07, "loss": 0.2148, "step": 26353 }, { "epoch": 0.45809939334944116, "grad_norm": 2.3157195514835567, "learning_rate": 5.916434190608086e-07, "loss": 0.4314, "step": 26354 }, { "epoch": 0.458116775886944, "grad_norm": 2.847014340437538, "learning_rate": 5.916157464433288e-07, "loss": 0.5271, "step": 26355 }, { "epoch": 0.4581341584244468, "grad_norm": 1.4863061983281032, "learning_rate": 5.915880735354689e-07, "loss": 0.2093, "step": 26356 }, { "epoch": 0.4581515409619496, "grad_norm": 1.5499029854941386, "learning_rate": 5.915604003373171e-07, "loss": 0.2025, "step": 26357 }, { "epoch": 0.45816892349945243, "grad_norm": 1.1217245054963985, "learning_rate": 5.915327268489608e-07, "loss": 0.1343, "step": 26358 }, { "epoch": 0.45818630603695526, "grad_norm": 1.7092639717393816, "learning_rate": 5.915050530704878e-07, "loss": 0.2191, "step": 26359 }, { "epoch": 0.4582036885744581, "grad_norm": 1.3141461969051644, "learning_rate": 5.914773790019858e-07, "loss": 0.2223, "step": 26360 }, { "epoch": 0.4582210711119609, "grad_norm": 1.90961942634281, "learning_rate": 5.914497046435425e-07, "loss": 0.297, "step": 26361 }, { "epoch": 0.45823845364946375, "grad_norm": 1.4670260772873924, "learning_rate": 5.914220299952458e-07, "loss": 0.3003, "step": 26362 }, { "epoch": 0.4582558361869666, "grad_norm": 2.532635193311557, "learning_rate": 5.913943550571831e-07, "loss": 0.2458, "step": 26363 }, { "epoch": 0.4582732187244694, "grad_norm": 1.8387540692351951, "learning_rate": 5.913666798294423e-07, "loss": 0.354, "step": 26364 }, { "epoch": 0.45829060126197224, "grad_norm": 1.419736062181933, "learning_rate": 5.913390043121113e-07, "loss": 0.4365, "step": 26365 }, { "epoch": 0.45830798379947507, "grad_norm": 1.7908315222306286, "learning_rate": 5.913113285052774e-07, "loss": 0.2877, "step": 26366 }, { "epoch": 0.45832536633697785, "grad_norm": 3.371106688392222, "learning_rate": 5.912836524090286e-07, "loss": 0.4403, "step": 26367 }, { "epoch": 0.4583427488744807, "grad_norm": 1.639315731372839, "learning_rate": 5.912559760234525e-07, "loss": 0.3891, "step": 26368 }, { "epoch": 0.4583601314119835, "grad_norm": 3.7882017306025593, "learning_rate": 5.912282993486369e-07, "loss": 0.3108, "step": 26369 }, { "epoch": 0.45837751394948634, "grad_norm": 1.8076134700746074, "learning_rate": 5.912006223846695e-07, "loss": 0.1922, "step": 26370 }, { "epoch": 0.45839489648698917, "grad_norm": 1.2162025841587936, "learning_rate": 5.91172945131638e-07, "loss": 0.4472, "step": 26371 }, { "epoch": 0.458412279024492, "grad_norm": 2.339401779783636, "learning_rate": 5.911452675896301e-07, "loss": 0.5406, "step": 26372 }, { "epoch": 0.45842966156199483, "grad_norm": 1.666198931340018, "learning_rate": 5.911175897587337e-07, "loss": 0.2473, "step": 26373 }, { "epoch": 0.45844704409949766, "grad_norm": 1.809100356785054, "learning_rate": 5.910899116390364e-07, "loss": 0.1987, "step": 26374 }, { "epoch": 0.4584644266370005, "grad_norm": 1.8814253871225943, "learning_rate": 5.910622332306256e-07, "loss": 0.2872, "step": 26375 }, { "epoch": 0.4584818091745033, "grad_norm": 2.4209332298232344, "learning_rate": 5.910345545335897e-07, "loss": 0.2863, "step": 26376 }, { "epoch": 0.4584991917120061, "grad_norm": 17.017610684535494, "learning_rate": 5.910068755480161e-07, "loss": 0.3477, "step": 26377 }, { "epoch": 0.4585165742495089, "grad_norm": 1.644288360429491, "learning_rate": 5.909791962739924e-07, "loss": 0.2569, "step": 26378 }, { "epoch": 0.45853395678701175, "grad_norm": 1.4763594963164193, "learning_rate": 5.909515167116064e-07, "loss": 0.2868, "step": 26379 }, { "epoch": 0.4585513393245146, "grad_norm": 1.2873145719504788, "learning_rate": 5.90923836860946e-07, "loss": 0.3128, "step": 26380 }, { "epoch": 0.4585687218620174, "grad_norm": 1.8916296522137244, "learning_rate": 5.908961567220986e-07, "loss": 0.3267, "step": 26381 }, { "epoch": 0.45858610439952024, "grad_norm": 1.8347081935257257, "learning_rate": 5.908684762951523e-07, "loss": 0.3434, "step": 26382 }, { "epoch": 0.4586034869370231, "grad_norm": 2.3717138912507494, "learning_rate": 5.908407955801946e-07, "loss": 0.2637, "step": 26383 }, { "epoch": 0.4586208694745259, "grad_norm": 1.5752529654660499, "learning_rate": 5.908131145773135e-07, "loss": 0.254, "step": 26384 }, { "epoch": 0.45863825201202874, "grad_norm": 1.9424062706541283, "learning_rate": 5.907854332865962e-07, "loss": 0.2366, "step": 26385 }, { "epoch": 0.45865563454953157, "grad_norm": 1.4605733404829493, "learning_rate": 5.907577517081311e-07, "loss": 0.1989, "step": 26386 }, { "epoch": 0.45867301708703434, "grad_norm": 1.5201767078631319, "learning_rate": 5.907300698420053e-07, "loss": 0.3167, "step": 26387 }, { "epoch": 0.45869039962453717, "grad_norm": 1.0794971344748094, "learning_rate": 5.90702387688307e-07, "loss": 0.2822, "step": 26388 }, { "epoch": 0.45870778216204, "grad_norm": 3.7736801535609925, "learning_rate": 5.906747052471239e-07, "loss": 0.2836, "step": 26389 }, { "epoch": 0.45872516469954283, "grad_norm": 2.5705786215992523, "learning_rate": 5.906470225185436e-07, "loss": 0.2818, "step": 26390 }, { "epoch": 0.45874254723704566, "grad_norm": 1.9823513413935776, "learning_rate": 5.906193395026538e-07, "loss": 0.4638, "step": 26391 }, { "epoch": 0.4587599297745485, "grad_norm": 1.3996368594777266, "learning_rate": 5.905916561995423e-07, "loss": 0.1529, "step": 26392 }, { "epoch": 0.4587773123120513, "grad_norm": 2.2723315781049, "learning_rate": 5.90563972609297e-07, "loss": 0.326, "step": 26393 }, { "epoch": 0.45879469484955415, "grad_norm": 2.1051711269745668, "learning_rate": 5.905362887320055e-07, "loss": 0.3136, "step": 26394 }, { "epoch": 0.458812077387057, "grad_norm": 1.297868186421592, "learning_rate": 5.905086045677555e-07, "loss": 0.2189, "step": 26395 }, { "epoch": 0.4588294599245598, "grad_norm": 2.28435074636327, "learning_rate": 5.904809201166348e-07, "loss": 0.2358, "step": 26396 }, { "epoch": 0.4588468424620626, "grad_norm": 0.9779332604404781, "learning_rate": 5.904532353787311e-07, "loss": 0.3389, "step": 26397 }, { "epoch": 0.4588642249995654, "grad_norm": 1.3364393937748282, "learning_rate": 5.904255503541322e-07, "loss": 0.235, "step": 26398 }, { "epoch": 0.45888160753706825, "grad_norm": 1.3488611892667497, "learning_rate": 5.903978650429259e-07, "loss": 0.2476, "step": 26399 }, { "epoch": 0.4588989900745711, "grad_norm": 2.744982828662158, "learning_rate": 5.903701794451998e-07, "loss": 0.3074, "step": 26400 }, { "epoch": 0.4589163726120739, "grad_norm": 1.4935600056431895, "learning_rate": 5.903424935610418e-07, "loss": 0.2676, "step": 26401 }, { "epoch": 0.45893375514957674, "grad_norm": 1.1445416264357342, "learning_rate": 5.903148073905396e-07, "loss": 0.2635, "step": 26402 }, { "epoch": 0.45895113768707957, "grad_norm": 3.0225776625765906, "learning_rate": 5.902871209337809e-07, "loss": 0.2768, "step": 26403 }, { "epoch": 0.4589685202245824, "grad_norm": 1.2780339096309838, "learning_rate": 5.902594341908537e-07, "loss": 0.1977, "step": 26404 }, { "epoch": 0.45898590276208523, "grad_norm": 2.562111561943896, "learning_rate": 5.902317471618454e-07, "loss": 0.272, "step": 26405 }, { "epoch": 0.45900328529958806, "grad_norm": 1.989665461358035, "learning_rate": 5.902040598468438e-07, "loss": 0.4557, "step": 26406 }, { "epoch": 0.45902066783709083, "grad_norm": 1.4925026107658863, "learning_rate": 5.90176372245937e-07, "loss": 0.3096, "step": 26407 }, { "epoch": 0.45903805037459366, "grad_norm": 1.0657705503471724, "learning_rate": 5.901486843592123e-07, "loss": 0.1474, "step": 26408 }, { "epoch": 0.4590554329120965, "grad_norm": 1.7199219166888768, "learning_rate": 5.90120996186758e-07, "loss": 0.2523, "step": 26409 }, { "epoch": 0.4590728154495993, "grad_norm": 1.5641832652553567, "learning_rate": 5.900933077286612e-07, "loss": 0.1622, "step": 26410 }, { "epoch": 0.45909019798710216, "grad_norm": 1.4058113977604934, "learning_rate": 5.900656189850101e-07, "loss": 0.2658, "step": 26411 }, { "epoch": 0.459107580524605, "grad_norm": 1.4663319963891042, "learning_rate": 5.900379299558924e-07, "loss": 0.2964, "step": 26412 }, { "epoch": 0.4591249630621078, "grad_norm": 2.2820842454531487, "learning_rate": 5.900102406413958e-07, "loss": 0.2691, "step": 26413 }, { "epoch": 0.45914234559961065, "grad_norm": 1.49510812594912, "learning_rate": 5.899825510416081e-07, "loss": 0.3014, "step": 26414 }, { "epoch": 0.4591597281371135, "grad_norm": 1.708225451061845, "learning_rate": 5.899548611566171e-07, "loss": 0.2721, "step": 26415 }, { "epoch": 0.4591771106746163, "grad_norm": 2.4954625413158493, "learning_rate": 5.899271709865104e-07, "loss": 0.2654, "step": 26416 }, { "epoch": 0.4591944932121191, "grad_norm": 1.6860326512220885, "learning_rate": 5.898994805313759e-07, "loss": 0.2848, "step": 26417 }, { "epoch": 0.4592118757496219, "grad_norm": 1.27254774496632, "learning_rate": 5.898717897913015e-07, "loss": 0.2745, "step": 26418 }, { "epoch": 0.45922925828712474, "grad_norm": 2.7351178895628028, "learning_rate": 5.898440987663747e-07, "loss": 0.2908, "step": 26419 }, { "epoch": 0.45924664082462757, "grad_norm": 1.2186979409642367, "learning_rate": 5.898164074566833e-07, "loss": 0.2095, "step": 26420 }, { "epoch": 0.4592640233621304, "grad_norm": 1.9640478837804551, "learning_rate": 5.897887158623154e-07, "loss": 0.2281, "step": 26421 }, { "epoch": 0.45928140589963323, "grad_norm": 1.618974062947724, "learning_rate": 5.897610239833581e-07, "loss": 0.2771, "step": 26422 }, { "epoch": 0.45929878843713606, "grad_norm": 1.8889629026058627, "learning_rate": 5.897333318198999e-07, "loss": 0.3138, "step": 26423 }, { "epoch": 0.4593161709746389, "grad_norm": 1.8829406373584394, "learning_rate": 5.897056393720283e-07, "loss": 0.2653, "step": 26424 }, { "epoch": 0.4593335535121417, "grad_norm": 1.5877780849638414, "learning_rate": 5.896779466398309e-07, "loss": 0.2012, "step": 26425 }, { "epoch": 0.45935093604964455, "grad_norm": 1.4000590569985008, "learning_rate": 5.896502536233955e-07, "loss": 0.2753, "step": 26426 }, { "epoch": 0.45936831858714733, "grad_norm": 1.7716334851114066, "learning_rate": 5.896225603228101e-07, "loss": 0.2668, "step": 26427 }, { "epoch": 0.45938570112465016, "grad_norm": 1.4431476507750758, "learning_rate": 5.895948667381623e-07, "loss": 0.4072, "step": 26428 }, { "epoch": 0.459403083662153, "grad_norm": 1.0360733679406535, "learning_rate": 5.895671728695401e-07, "loss": 0.2639, "step": 26429 }, { "epoch": 0.4594204661996558, "grad_norm": 1.4578750993564884, "learning_rate": 5.89539478717031e-07, "loss": 0.2459, "step": 26430 }, { "epoch": 0.45943784873715865, "grad_norm": 2.284668027422458, "learning_rate": 5.895117842807227e-07, "loss": 0.5859, "step": 26431 }, { "epoch": 0.4594552312746615, "grad_norm": 1.5789472604856651, "learning_rate": 5.894840895607034e-07, "loss": 0.1858, "step": 26432 }, { "epoch": 0.4594726138121643, "grad_norm": 2.8309300204042374, "learning_rate": 5.894563945570606e-07, "loss": 0.2278, "step": 26433 }, { "epoch": 0.45948999634966714, "grad_norm": 1.2399794148863763, "learning_rate": 5.89428699269882e-07, "loss": 0.3164, "step": 26434 }, { "epoch": 0.45950737888716997, "grad_norm": 2.6107081865199895, "learning_rate": 5.894010036992555e-07, "loss": 0.2192, "step": 26435 }, { "epoch": 0.4595247614246728, "grad_norm": 2.1111642044366934, "learning_rate": 5.893733078452691e-07, "loss": 0.175, "step": 26436 }, { "epoch": 0.4595421439621756, "grad_norm": 1.3897513241561241, "learning_rate": 5.893456117080103e-07, "loss": 0.1998, "step": 26437 }, { "epoch": 0.4595595264996784, "grad_norm": 2.091928844551146, "learning_rate": 5.893179152875668e-07, "loss": 0.3851, "step": 26438 }, { "epoch": 0.45957690903718124, "grad_norm": 1.9190204022596558, "learning_rate": 5.892902185840266e-07, "loss": 0.2333, "step": 26439 }, { "epoch": 0.45959429157468407, "grad_norm": 1.3364454281393767, "learning_rate": 5.892625215974776e-07, "loss": 0.3203, "step": 26440 }, { "epoch": 0.4596116741121869, "grad_norm": 2.5490465894692718, "learning_rate": 5.892348243280072e-07, "loss": 0.3507, "step": 26441 }, { "epoch": 0.4596290566496897, "grad_norm": 5.243448539397252, "learning_rate": 5.892071267757034e-07, "loss": 0.3344, "step": 26442 }, { "epoch": 0.45964643918719256, "grad_norm": 1.33833886748476, "learning_rate": 5.89179428940654e-07, "loss": 0.6681, "step": 26443 }, { "epoch": 0.4596638217246954, "grad_norm": 1.7757193432785243, "learning_rate": 5.89151730822947e-07, "loss": 0.2877, "step": 26444 }, { "epoch": 0.4596812042621982, "grad_norm": 1.5192781308610714, "learning_rate": 5.891240324226696e-07, "loss": 0.2668, "step": 26445 }, { "epoch": 0.45969858679970105, "grad_norm": 3.163776794552893, "learning_rate": 5.890963337399103e-07, "loss": 0.2245, "step": 26446 }, { "epoch": 0.4597159693372038, "grad_norm": 1.9558203486925934, "learning_rate": 5.890686347747563e-07, "loss": 0.2371, "step": 26447 }, { "epoch": 0.45973335187470665, "grad_norm": 1.395684951894795, "learning_rate": 5.890409355272958e-07, "loss": 0.2712, "step": 26448 }, { "epoch": 0.4597507344122095, "grad_norm": 1.9661079634631828, "learning_rate": 5.890132359976164e-07, "loss": 0.4503, "step": 26449 }, { "epoch": 0.4597681169497123, "grad_norm": 2.830768619724647, "learning_rate": 5.889855361858059e-07, "loss": 0.3707, "step": 26450 }, { "epoch": 0.45978549948721514, "grad_norm": 1.2165239432502468, "learning_rate": 5.889578360919521e-07, "loss": 0.1228, "step": 26451 }, { "epoch": 0.459802882024718, "grad_norm": 2.20985131052786, "learning_rate": 5.889301357161429e-07, "loss": 0.2697, "step": 26452 }, { "epoch": 0.4598202645622208, "grad_norm": 0.951575521102987, "learning_rate": 5.889024350584661e-07, "loss": 0.257, "step": 26453 }, { "epoch": 0.45983764709972363, "grad_norm": 1.5635126864246087, "learning_rate": 5.888747341190092e-07, "loss": 0.2269, "step": 26454 }, { "epoch": 0.45985502963722646, "grad_norm": 1.49012417134291, "learning_rate": 5.888470328978603e-07, "loss": 0.297, "step": 26455 }, { "epoch": 0.45987241217472924, "grad_norm": 1.4062059330858407, "learning_rate": 5.888193313951072e-07, "loss": 0.2284, "step": 26456 }, { "epoch": 0.45988979471223207, "grad_norm": 1.6545058722024806, "learning_rate": 5.887916296108375e-07, "loss": 0.2354, "step": 26457 }, { "epoch": 0.4599071772497349, "grad_norm": 1.8999240079278907, "learning_rate": 5.887639275451393e-07, "loss": 0.233, "step": 26458 }, { "epoch": 0.45992455978723773, "grad_norm": 4.262311522970991, "learning_rate": 5.887362251981e-07, "loss": 0.2496, "step": 26459 }, { "epoch": 0.45994194232474056, "grad_norm": 1.704427850030625, "learning_rate": 5.887085225698078e-07, "loss": 0.2427, "step": 26460 }, { "epoch": 0.4599593248622434, "grad_norm": 1.7377712274451547, "learning_rate": 5.886808196603502e-07, "loss": 0.6015, "step": 26461 }, { "epoch": 0.4599767073997462, "grad_norm": 1.232379580882722, "learning_rate": 5.886531164698152e-07, "loss": 0.1643, "step": 26462 }, { "epoch": 0.45999408993724905, "grad_norm": 1.3640183326468929, "learning_rate": 5.886254129982906e-07, "loss": 0.3575, "step": 26463 }, { "epoch": 0.4600114724747519, "grad_norm": 2.0224914654903476, "learning_rate": 5.88597709245864e-07, "loss": 0.4055, "step": 26464 }, { "epoch": 0.4600288550122547, "grad_norm": 0.9878997721437082, "learning_rate": 5.885700052126236e-07, "loss": 0.1973, "step": 26465 }, { "epoch": 0.4600462375497575, "grad_norm": 1.6049952796682598, "learning_rate": 5.885423008986567e-07, "loss": 0.3856, "step": 26466 }, { "epoch": 0.4600636200872603, "grad_norm": 1.4939544692408186, "learning_rate": 5.885145963040515e-07, "loss": 0.2927, "step": 26467 }, { "epoch": 0.46008100262476315, "grad_norm": 1.5977103390825604, "learning_rate": 5.884868914288958e-07, "loss": 0.3163, "step": 26468 }, { "epoch": 0.460098385162266, "grad_norm": 1.7575566288081224, "learning_rate": 5.884591862732772e-07, "loss": 0.3158, "step": 26469 }, { "epoch": 0.4601157676997688, "grad_norm": 2.7570622420159077, "learning_rate": 5.884314808372836e-07, "loss": 0.2841, "step": 26470 }, { "epoch": 0.46013315023727164, "grad_norm": 1.3149859581515986, "learning_rate": 5.884037751210029e-07, "loss": 0.2669, "step": 26471 }, { "epoch": 0.46015053277477447, "grad_norm": 2.897600009631542, "learning_rate": 5.883760691245228e-07, "loss": 0.3555, "step": 26472 }, { "epoch": 0.4601679153122773, "grad_norm": 1.3174256162050897, "learning_rate": 5.88348362847931e-07, "loss": 0.2126, "step": 26473 }, { "epoch": 0.46018529784978013, "grad_norm": 1.863299130833989, "learning_rate": 5.883206562913157e-07, "loss": 0.2593, "step": 26474 }, { "epoch": 0.46020268038728296, "grad_norm": 1.3653490341063452, "learning_rate": 5.882929494547645e-07, "loss": 0.1375, "step": 26475 }, { "epoch": 0.46022006292478573, "grad_norm": 1.6582221372540413, "learning_rate": 5.88265242338365e-07, "loss": 0.1947, "step": 26476 }, { "epoch": 0.46023744546228856, "grad_norm": 2.172841015707328, "learning_rate": 5.882375349422054e-07, "loss": 0.2754, "step": 26477 }, { "epoch": 0.4602548279997914, "grad_norm": 1.6886893484910266, "learning_rate": 5.882098272663732e-07, "loss": 0.2949, "step": 26478 }, { "epoch": 0.4602722105372942, "grad_norm": 2.9688856459318065, "learning_rate": 5.881821193109565e-07, "loss": 0.2752, "step": 26479 }, { "epoch": 0.46028959307479705, "grad_norm": 1.8135362976555958, "learning_rate": 5.88154411076043e-07, "loss": 0.2515, "step": 26480 }, { "epoch": 0.4603069756122999, "grad_norm": 1.7232365551949314, "learning_rate": 5.881267025617205e-07, "loss": 0.3455, "step": 26481 }, { "epoch": 0.4603243581498027, "grad_norm": 2.0225899532291662, "learning_rate": 5.880989937680767e-07, "loss": 0.2245, "step": 26482 }, { "epoch": 0.46034174068730555, "grad_norm": 3.687158880197694, "learning_rate": 5.880712846951997e-07, "loss": 0.3008, "step": 26483 }, { "epoch": 0.4603591232248084, "grad_norm": 1.4931249379982687, "learning_rate": 5.880435753431772e-07, "loss": 0.1666, "step": 26484 }, { "epoch": 0.4603765057623112, "grad_norm": 3.000241063858883, "learning_rate": 5.88015865712097e-07, "loss": 0.4068, "step": 26485 }, { "epoch": 0.460393888299814, "grad_norm": 1.622536196182434, "learning_rate": 5.879881558020468e-07, "loss": 0.2402, "step": 26486 }, { "epoch": 0.4604112708373168, "grad_norm": 1.8578627556494363, "learning_rate": 5.879604456131146e-07, "loss": 0.3331, "step": 26487 }, { "epoch": 0.46042865337481964, "grad_norm": 2.009630058584765, "learning_rate": 5.879327351453882e-07, "loss": 0.4092, "step": 26488 }, { "epoch": 0.46044603591232247, "grad_norm": 1.7849938347901564, "learning_rate": 5.879050243989556e-07, "loss": 0.362, "step": 26489 }, { "epoch": 0.4604634184498253, "grad_norm": 1.745435491218055, "learning_rate": 5.878773133739044e-07, "loss": 0.284, "step": 26490 }, { "epoch": 0.46048080098732813, "grad_norm": 1.4444549986200366, "learning_rate": 5.878496020703222e-07, "loss": 0.4006, "step": 26491 }, { "epoch": 0.46049818352483096, "grad_norm": 1.722084583384539, "learning_rate": 5.878218904882973e-07, "loss": 0.2315, "step": 26492 }, { "epoch": 0.4605155660623338, "grad_norm": 1.3779714931455669, "learning_rate": 5.877941786279174e-07, "loss": 0.3172, "step": 26493 }, { "epoch": 0.4605329485998366, "grad_norm": 1.0518876067814409, "learning_rate": 5.877664664892703e-07, "loss": 0.4042, "step": 26494 }, { "epoch": 0.46055033113733945, "grad_norm": 1.495280472175416, "learning_rate": 5.877387540724436e-07, "loss": 0.2485, "step": 26495 }, { "epoch": 0.4605677136748422, "grad_norm": 1.920795778664621, "learning_rate": 5.877110413775257e-07, "loss": 0.2158, "step": 26496 }, { "epoch": 0.46058509621234506, "grad_norm": 1.8007003060668387, "learning_rate": 5.876833284046039e-07, "loss": 0.2858, "step": 26497 }, { "epoch": 0.4606024787498479, "grad_norm": 1.6750024656885985, "learning_rate": 5.876556151537663e-07, "loss": 0.2477, "step": 26498 }, { "epoch": 0.4606198612873507, "grad_norm": 2.956715680557669, "learning_rate": 5.876279016251005e-07, "loss": 0.4013, "step": 26499 }, { "epoch": 0.46063724382485355, "grad_norm": 2.4622393687172806, "learning_rate": 5.876001878186947e-07, "loss": 0.3285, "step": 26500 }, { "epoch": 0.4606546263623564, "grad_norm": 2.004449736720932, "learning_rate": 5.875724737346363e-07, "loss": 0.29, "step": 26501 }, { "epoch": 0.4606720088998592, "grad_norm": 1.765956392711538, "learning_rate": 5.875447593730137e-07, "loss": 0.309, "step": 26502 }, { "epoch": 0.46068939143736204, "grad_norm": 1.0864289464591776, "learning_rate": 5.875170447339143e-07, "loss": 0.1473, "step": 26503 }, { "epoch": 0.46070677397486487, "grad_norm": 2.16111552543964, "learning_rate": 5.87489329817426e-07, "loss": 0.3179, "step": 26504 }, { "epoch": 0.4607241565123677, "grad_norm": 2.769871971831673, "learning_rate": 5.874616146236367e-07, "loss": 0.2807, "step": 26505 }, { "epoch": 0.4607415390498705, "grad_norm": 2.4890864728146918, "learning_rate": 5.874338991526344e-07, "loss": 0.2193, "step": 26506 }, { "epoch": 0.4607589215873733, "grad_norm": 2.331358101097265, "learning_rate": 5.874061834045067e-07, "loss": 0.3759, "step": 26507 }, { "epoch": 0.46077630412487613, "grad_norm": 1.5761665454630696, "learning_rate": 5.873784673793415e-07, "loss": 0.3218, "step": 26508 }, { "epoch": 0.46079368666237897, "grad_norm": 1.3760090845265223, "learning_rate": 5.873507510772269e-07, "loss": 0.2718, "step": 26509 }, { "epoch": 0.4608110691998818, "grad_norm": 1.1528018631059114, "learning_rate": 5.873230344982504e-07, "loss": 0.2484, "step": 26510 }, { "epoch": 0.4608284517373846, "grad_norm": 1.0451838742160184, "learning_rate": 5.872953176424998e-07, "loss": 0.2144, "step": 26511 }, { "epoch": 0.46084583427488746, "grad_norm": 1.6247390307245864, "learning_rate": 5.872676005100634e-07, "loss": 0.2754, "step": 26512 }, { "epoch": 0.4608632168123903, "grad_norm": 1.301115874263988, "learning_rate": 5.872398831010288e-07, "loss": 0.2245, "step": 26513 }, { "epoch": 0.4608805993498931, "grad_norm": 2.821565376017061, "learning_rate": 5.872121654154838e-07, "loss": 0.4011, "step": 26514 }, { "epoch": 0.46089798188739595, "grad_norm": 1.3154034168116195, "learning_rate": 5.871844474535161e-07, "loss": 0.2758, "step": 26515 }, { "epoch": 0.4609153644248987, "grad_norm": 0.8952692372747992, "learning_rate": 5.871567292152139e-07, "loss": 0.1339, "step": 26516 }, { "epoch": 0.46093274696240155, "grad_norm": 1.3034593641161187, "learning_rate": 5.871290107006651e-07, "loss": 0.1822, "step": 26517 }, { "epoch": 0.4609501294999044, "grad_norm": 1.412679310208093, "learning_rate": 5.871012919099572e-07, "loss": 0.208, "step": 26518 }, { "epoch": 0.4609675120374072, "grad_norm": 0.7825864533004521, "learning_rate": 5.870735728431782e-07, "loss": 0.2459, "step": 26519 }, { "epoch": 0.46098489457491004, "grad_norm": 2.014922193520548, "learning_rate": 5.870458535004159e-07, "loss": 0.3637, "step": 26520 }, { "epoch": 0.4610022771124129, "grad_norm": 3.8136015700378456, "learning_rate": 5.870181338817583e-07, "loss": 0.4543, "step": 26521 }, { "epoch": 0.4610196596499157, "grad_norm": 1.4311794483903715, "learning_rate": 5.869904139872934e-07, "loss": 0.3132, "step": 26522 }, { "epoch": 0.46103704218741853, "grad_norm": 1.7327335321751873, "learning_rate": 5.869626938171086e-07, "loss": 0.4485, "step": 26523 }, { "epoch": 0.46105442472492136, "grad_norm": 1.6532069274348924, "learning_rate": 5.86934973371292e-07, "loss": 0.3419, "step": 26524 }, { "epoch": 0.4610718072624242, "grad_norm": 1.7891387676594033, "learning_rate": 5.869072526499315e-07, "loss": 0.221, "step": 26525 }, { "epoch": 0.46108918979992697, "grad_norm": 2.1798333133765575, "learning_rate": 5.86879531653115e-07, "loss": 0.3192, "step": 26526 }, { "epoch": 0.4611065723374298, "grad_norm": 1.804276010784019, "learning_rate": 5.868518103809302e-07, "loss": 0.2985, "step": 26527 }, { "epoch": 0.46112395487493263, "grad_norm": 1.0425613448180784, "learning_rate": 5.868240888334652e-07, "loss": 0.2162, "step": 26528 }, { "epoch": 0.46114133741243546, "grad_norm": 1.6729592175283587, "learning_rate": 5.867963670108076e-07, "loss": 0.2068, "step": 26529 }, { "epoch": 0.4611587199499383, "grad_norm": 4.287517381434391, "learning_rate": 5.867686449130453e-07, "loss": 0.2606, "step": 26530 }, { "epoch": 0.4611761024874411, "grad_norm": 1.176429696433118, "learning_rate": 5.867409225402667e-07, "loss": 0.1802, "step": 26531 }, { "epoch": 0.46119348502494395, "grad_norm": 1.6953248718428264, "learning_rate": 5.867131998925587e-07, "loss": 0.2576, "step": 26532 }, { "epoch": 0.4612108675624468, "grad_norm": 1.5030116572043093, "learning_rate": 5.866854769700099e-07, "loss": 0.1807, "step": 26533 }, { "epoch": 0.4612282500999496, "grad_norm": 1.915695722259425, "learning_rate": 5.866577537727079e-07, "loss": 0.387, "step": 26534 }, { "epoch": 0.46124563263745244, "grad_norm": 1.8361891698803396, "learning_rate": 5.866300303007408e-07, "loss": 0.2368, "step": 26535 }, { "epoch": 0.4612630151749552, "grad_norm": 1.7975956959583976, "learning_rate": 5.866023065541961e-07, "loss": 0.2804, "step": 26536 }, { "epoch": 0.46128039771245805, "grad_norm": 1.4288966755765746, "learning_rate": 5.865745825331619e-07, "loss": 0.2947, "step": 26537 }, { "epoch": 0.4612977802499609, "grad_norm": 3.3873424222544846, "learning_rate": 5.865468582377261e-07, "loss": 0.566, "step": 26538 }, { "epoch": 0.4613151627874637, "grad_norm": 1.4144008922212061, "learning_rate": 5.865191336679765e-07, "loss": 0.185, "step": 26539 }, { "epoch": 0.46133254532496654, "grad_norm": 3.0165907091491966, "learning_rate": 5.864914088240009e-07, "loss": 0.4264, "step": 26540 }, { "epoch": 0.46134992786246937, "grad_norm": 1.579179920362727, "learning_rate": 5.864636837058875e-07, "loss": 0.3071, "step": 26541 }, { "epoch": 0.4613673103999722, "grad_norm": 1.8705532970561969, "learning_rate": 5.864359583137237e-07, "loss": 0.2769, "step": 26542 }, { "epoch": 0.461384692937475, "grad_norm": 2.128161800850299, "learning_rate": 5.864082326475978e-07, "loss": 0.3046, "step": 26543 }, { "epoch": 0.46140207547497786, "grad_norm": 1.946147178187594, "learning_rate": 5.863805067075973e-07, "loss": 0.2443, "step": 26544 }, { "epoch": 0.4614194580124807, "grad_norm": 2.846558707177778, "learning_rate": 5.863527804938104e-07, "loss": 0.3222, "step": 26545 }, { "epoch": 0.46143684054998346, "grad_norm": 0.8971885626193601, "learning_rate": 5.863250540063248e-07, "loss": 0.3254, "step": 26546 }, { "epoch": 0.4614542230874863, "grad_norm": 1.5251596455554437, "learning_rate": 5.862973272452285e-07, "loss": 0.1987, "step": 26547 }, { "epoch": 0.4614716056249891, "grad_norm": 2.287321623411649, "learning_rate": 5.862696002106093e-07, "loss": 0.2833, "step": 26548 }, { "epoch": 0.46148898816249195, "grad_norm": 1.3693759775745546, "learning_rate": 5.862418729025552e-07, "loss": 0.1517, "step": 26549 }, { "epoch": 0.4615063706999948, "grad_norm": 1.867173233624368, "learning_rate": 5.862141453211538e-07, "loss": 0.2345, "step": 26550 }, { "epoch": 0.4615237532374976, "grad_norm": 2.075435890430402, "learning_rate": 5.861864174664931e-07, "loss": 0.2704, "step": 26551 }, { "epoch": 0.46154113577500044, "grad_norm": 1.8752925729821546, "learning_rate": 5.861586893386612e-07, "loss": 0.3953, "step": 26552 }, { "epoch": 0.4615585183125033, "grad_norm": 1.6072990208225277, "learning_rate": 5.861309609377458e-07, "loss": 0.2939, "step": 26553 }, { "epoch": 0.4615759008500061, "grad_norm": 1.102003772962679, "learning_rate": 5.861032322638348e-07, "loss": 0.2927, "step": 26554 }, { "epoch": 0.46159328338750893, "grad_norm": 1.6042483300017825, "learning_rate": 5.86075503317016e-07, "loss": 0.2221, "step": 26555 }, { "epoch": 0.4616106659250117, "grad_norm": 1.5905216535698627, "learning_rate": 5.860477740973776e-07, "loss": 0.4875, "step": 26556 }, { "epoch": 0.46162804846251454, "grad_norm": 4.9519652382574835, "learning_rate": 5.860200446050072e-07, "loss": 0.4198, "step": 26557 }, { "epoch": 0.46164543100001737, "grad_norm": 2.1713127216218586, "learning_rate": 5.859923148399928e-07, "loss": 0.308, "step": 26558 }, { "epoch": 0.4616628135375202, "grad_norm": 1.2718552202258073, "learning_rate": 5.859645848024222e-07, "loss": 0.2819, "step": 26559 }, { "epoch": 0.46168019607502303, "grad_norm": 1.0955197392293172, "learning_rate": 5.859368544923835e-07, "loss": 0.2471, "step": 26560 }, { "epoch": 0.46169757861252586, "grad_norm": 1.3851889490659626, "learning_rate": 5.859091239099642e-07, "loss": 0.2558, "step": 26561 }, { "epoch": 0.4617149611500287, "grad_norm": 2.2561582160580773, "learning_rate": 5.858813930552526e-07, "loss": 0.2549, "step": 26562 }, { "epoch": 0.4617323436875315, "grad_norm": 1.113723871706443, "learning_rate": 5.858536619283365e-07, "loss": 0.1965, "step": 26563 }, { "epoch": 0.46174972622503435, "grad_norm": 1.4343519485931648, "learning_rate": 5.858259305293036e-07, "loss": 0.2174, "step": 26564 }, { "epoch": 0.4617671087625372, "grad_norm": 1.2663242710715146, "learning_rate": 5.85798198858242e-07, "loss": 0.2057, "step": 26565 }, { "epoch": 0.46178449130003996, "grad_norm": 1.207872690249312, "learning_rate": 5.857704669152396e-07, "loss": 0.2926, "step": 26566 }, { "epoch": 0.4618018738375428, "grad_norm": 1.7243804083068899, "learning_rate": 5.857427347003841e-07, "loss": 0.1487, "step": 26567 }, { "epoch": 0.4618192563750456, "grad_norm": 1.5175359886836775, "learning_rate": 5.857150022137634e-07, "loss": 0.2593, "step": 26568 }, { "epoch": 0.46183663891254845, "grad_norm": 2.4328090065623575, "learning_rate": 5.856872694554658e-07, "loss": 0.3083, "step": 26569 }, { "epoch": 0.4618540214500513, "grad_norm": 1.1106771256551373, "learning_rate": 5.856595364255787e-07, "loss": 0.2661, "step": 26570 }, { "epoch": 0.4618714039875541, "grad_norm": 1.4332732230201084, "learning_rate": 5.856318031241903e-07, "loss": 0.2347, "step": 26571 }, { "epoch": 0.46188878652505694, "grad_norm": 3.634766135903864, "learning_rate": 5.856040695513883e-07, "loss": 0.3198, "step": 26572 }, { "epoch": 0.46190616906255977, "grad_norm": 1.4308026025164085, "learning_rate": 5.855763357072608e-07, "loss": 0.2642, "step": 26573 }, { "epoch": 0.4619235516000626, "grad_norm": 0.9577091942723248, "learning_rate": 5.855486015918958e-07, "loss": 0.2269, "step": 26574 }, { "epoch": 0.46194093413756543, "grad_norm": 1.2086376875058222, "learning_rate": 5.855208672053808e-07, "loss": 0.1311, "step": 26575 }, { "epoch": 0.4619583166750682, "grad_norm": 4.389490682074555, "learning_rate": 5.85493132547804e-07, "loss": 0.3529, "step": 26576 }, { "epoch": 0.46197569921257103, "grad_norm": 1.16085063750297, "learning_rate": 5.854653976192533e-07, "loss": 0.2156, "step": 26577 }, { "epoch": 0.46199308175007386, "grad_norm": 2.399873067784337, "learning_rate": 5.854376624198165e-07, "loss": 0.3967, "step": 26578 }, { "epoch": 0.4620104642875767, "grad_norm": 1.2717495981983773, "learning_rate": 5.854099269495816e-07, "loss": 0.2401, "step": 26579 }, { "epoch": 0.4620278468250795, "grad_norm": 1.4268198186688485, "learning_rate": 5.853821912086363e-07, "loss": 0.2708, "step": 26580 }, { "epoch": 0.46204522936258235, "grad_norm": 1.6598847322458985, "learning_rate": 5.853544551970689e-07, "loss": 0.2753, "step": 26581 }, { "epoch": 0.4620626119000852, "grad_norm": 1.5726871081622311, "learning_rate": 5.85326718914967e-07, "loss": 0.324, "step": 26582 }, { "epoch": 0.462079994437588, "grad_norm": 1.5162791610957214, "learning_rate": 5.852989823624186e-07, "loss": 0.1824, "step": 26583 }, { "epoch": 0.46209737697509085, "grad_norm": 0.9904439200293796, "learning_rate": 5.852712455395115e-07, "loss": 0.1532, "step": 26584 }, { "epoch": 0.4621147595125937, "grad_norm": 1.8574605443905845, "learning_rate": 5.852435084463339e-07, "loss": 0.3188, "step": 26585 }, { "epoch": 0.46213214205009645, "grad_norm": 1.0629037017553107, "learning_rate": 5.852157710829735e-07, "loss": 0.1674, "step": 26586 }, { "epoch": 0.4621495245875993, "grad_norm": 1.1668413238040498, "learning_rate": 5.851880334495182e-07, "loss": 0.3246, "step": 26587 }, { "epoch": 0.4621669071251021, "grad_norm": 2.6213252703248964, "learning_rate": 5.85160295546056e-07, "loss": 0.4208, "step": 26588 }, { "epoch": 0.46218428966260494, "grad_norm": 2.406407720122519, "learning_rate": 5.851325573726748e-07, "loss": 0.3437, "step": 26589 }, { "epoch": 0.46220167220010777, "grad_norm": 1.5256719654168434, "learning_rate": 5.851048189294623e-07, "loss": 0.2444, "step": 26590 }, { "epoch": 0.4622190547376106, "grad_norm": 3.9064315762987794, "learning_rate": 5.850770802165069e-07, "loss": 0.3873, "step": 26591 }, { "epoch": 0.46223643727511343, "grad_norm": 1.7904171894091638, "learning_rate": 5.85049341233896e-07, "loss": 0.3451, "step": 26592 }, { "epoch": 0.46225381981261626, "grad_norm": 2.526389722115914, "learning_rate": 5.850216019817179e-07, "loss": 0.2122, "step": 26593 }, { "epoch": 0.4622712023501191, "grad_norm": 1.6318987578055435, "learning_rate": 5.849938624600603e-07, "loss": 0.3498, "step": 26594 }, { "epoch": 0.46228858488762187, "grad_norm": 2.464927786503512, "learning_rate": 5.849661226690113e-07, "loss": 0.3474, "step": 26595 }, { "epoch": 0.4623059674251247, "grad_norm": 1.9396959495470316, "learning_rate": 5.849383826086587e-07, "loss": 0.3075, "step": 26596 }, { "epoch": 0.46232334996262753, "grad_norm": 1.920549343431229, "learning_rate": 5.849106422790903e-07, "loss": 0.2464, "step": 26597 }, { "epoch": 0.46234073250013036, "grad_norm": 3.35590036001307, "learning_rate": 5.848829016803945e-07, "loss": 0.338, "step": 26598 }, { "epoch": 0.4623581150376332, "grad_norm": 1.8171756056981248, "learning_rate": 5.848551608126586e-07, "loss": 0.4446, "step": 26599 }, { "epoch": 0.462375497575136, "grad_norm": 1.0324499661174995, "learning_rate": 5.848274196759709e-07, "loss": 0.2178, "step": 26600 }, { "epoch": 0.46239288011263885, "grad_norm": 2.877155688549184, "learning_rate": 5.847996782704192e-07, "loss": 0.3353, "step": 26601 }, { "epoch": 0.4624102626501417, "grad_norm": 1.4187850971242515, "learning_rate": 5.847719365960914e-07, "loss": 0.2351, "step": 26602 }, { "epoch": 0.4624276451876445, "grad_norm": 1.438373293231767, "learning_rate": 5.847441946530757e-07, "loss": 0.1962, "step": 26603 }, { "epoch": 0.46244502772514734, "grad_norm": 1.6104529620911767, "learning_rate": 5.847164524414599e-07, "loss": 0.1699, "step": 26604 }, { "epoch": 0.4624624102626501, "grad_norm": 2.1636076980292547, "learning_rate": 5.846887099613316e-07, "loss": 0.2768, "step": 26605 }, { "epoch": 0.46247979280015294, "grad_norm": 1.3828349256977157, "learning_rate": 5.84660967212779e-07, "loss": 0.2799, "step": 26606 }, { "epoch": 0.4624971753376558, "grad_norm": 1.9650738138890471, "learning_rate": 5.846332241958902e-07, "loss": 0.1713, "step": 26607 }, { "epoch": 0.4625145578751586, "grad_norm": 1.3448869763391125, "learning_rate": 5.846054809107529e-07, "loss": 0.1613, "step": 26608 }, { "epoch": 0.46253194041266144, "grad_norm": 2.5749329896532838, "learning_rate": 5.84577737357455e-07, "loss": 0.3691, "step": 26609 }, { "epoch": 0.46254932295016427, "grad_norm": 1.0228641689819877, "learning_rate": 5.845499935360847e-07, "loss": 0.1621, "step": 26610 }, { "epoch": 0.4625667054876671, "grad_norm": 2.1487655922862636, "learning_rate": 5.845222494467295e-07, "loss": 0.138, "step": 26611 }, { "epoch": 0.4625840880251699, "grad_norm": 1.314173186985485, "learning_rate": 5.844945050894779e-07, "loss": 0.1584, "step": 26612 }, { "epoch": 0.46260147056267276, "grad_norm": 1.5310417237806175, "learning_rate": 5.844667604644173e-07, "loss": 0.1746, "step": 26613 }, { "epoch": 0.4626188531001756, "grad_norm": 1.7601518578241981, "learning_rate": 5.84439015571636e-07, "loss": 0.3893, "step": 26614 }, { "epoch": 0.46263623563767836, "grad_norm": 1.6416937453007028, "learning_rate": 5.844112704112218e-07, "loss": 0.2372, "step": 26615 }, { "epoch": 0.4626536181751812, "grad_norm": 1.500255737306103, "learning_rate": 5.843835249832624e-07, "loss": 0.3387, "step": 26616 }, { "epoch": 0.462671000712684, "grad_norm": 1.1905443639704467, "learning_rate": 5.843557792878463e-07, "loss": 0.2576, "step": 26617 }, { "epoch": 0.46268838325018685, "grad_norm": 1.0441955063701447, "learning_rate": 5.843280333250611e-07, "loss": 0.158, "step": 26618 }, { "epoch": 0.4627057657876897, "grad_norm": 1.6876089802394172, "learning_rate": 5.843002870949946e-07, "loss": 0.2823, "step": 26619 }, { "epoch": 0.4627231483251925, "grad_norm": 2.0433602486295404, "learning_rate": 5.842725405977351e-07, "loss": 0.2981, "step": 26620 }, { "epoch": 0.46274053086269534, "grad_norm": 1.0938283005418525, "learning_rate": 5.842447938333702e-07, "loss": 0.2448, "step": 26621 }, { "epoch": 0.4627579134001982, "grad_norm": 1.8934166904214171, "learning_rate": 5.842170468019881e-07, "loss": 0.2637, "step": 26622 }, { "epoch": 0.462775295937701, "grad_norm": 1.013008340338314, "learning_rate": 5.841892995036767e-07, "loss": 0.2157, "step": 26623 }, { "epoch": 0.46279267847520383, "grad_norm": 1.882229280495517, "learning_rate": 5.841615519385238e-07, "loss": 0.3635, "step": 26624 }, { "epoch": 0.4628100610127066, "grad_norm": 2.388967769475353, "learning_rate": 5.841338041066174e-07, "loss": 0.2296, "step": 26625 }, { "epoch": 0.46282744355020944, "grad_norm": 1.8797019124910748, "learning_rate": 5.841060560080456e-07, "loss": 0.3012, "step": 26626 }, { "epoch": 0.46284482608771227, "grad_norm": 1.9141644004741662, "learning_rate": 5.840783076428961e-07, "loss": 0.1396, "step": 26627 }, { "epoch": 0.4628622086252151, "grad_norm": 1.5935678367744222, "learning_rate": 5.840505590112571e-07, "loss": 0.3168, "step": 26628 }, { "epoch": 0.46287959116271793, "grad_norm": 1.289471502982439, "learning_rate": 5.840228101132165e-07, "loss": 0.2372, "step": 26629 }, { "epoch": 0.46289697370022076, "grad_norm": 1.3290681045282875, "learning_rate": 5.839950609488621e-07, "loss": 0.3108, "step": 26630 }, { "epoch": 0.4629143562377236, "grad_norm": 2.11057272225569, "learning_rate": 5.839673115182818e-07, "loss": 0.3238, "step": 26631 }, { "epoch": 0.4629317387752264, "grad_norm": 1.4277963019037572, "learning_rate": 5.839395618215639e-07, "loss": 0.2886, "step": 26632 }, { "epoch": 0.46294912131272925, "grad_norm": 2.7523704647004896, "learning_rate": 5.839118118587961e-07, "loss": 0.3037, "step": 26633 }, { "epoch": 0.4629665038502321, "grad_norm": 1.3379083900528461, "learning_rate": 5.838840616300664e-07, "loss": 0.292, "step": 26634 }, { "epoch": 0.46298388638773486, "grad_norm": 1.1883423940417106, "learning_rate": 5.838563111354626e-07, "loss": 0.2997, "step": 26635 }, { "epoch": 0.4630012689252377, "grad_norm": 1.4008225061981412, "learning_rate": 5.83828560375073e-07, "loss": 0.3406, "step": 26636 }, { "epoch": 0.4630186514627405, "grad_norm": 1.1283401486954645, "learning_rate": 5.838008093489853e-07, "loss": 0.153, "step": 26637 }, { "epoch": 0.46303603400024335, "grad_norm": 1.381023082009869, "learning_rate": 5.837730580572876e-07, "loss": 0.4377, "step": 26638 }, { "epoch": 0.4630534165377462, "grad_norm": 1.8968011025574785, "learning_rate": 5.837453065000679e-07, "loss": 0.1956, "step": 26639 }, { "epoch": 0.463070799075249, "grad_norm": 4.187360137582329, "learning_rate": 5.837175546774137e-07, "loss": 0.2845, "step": 26640 }, { "epoch": 0.46308818161275184, "grad_norm": 1.2447108487900884, "learning_rate": 5.836898025894136e-07, "loss": 0.4499, "step": 26641 }, { "epoch": 0.46310556415025467, "grad_norm": 2.6294085359321513, "learning_rate": 5.836620502361551e-07, "loss": 0.4571, "step": 26642 }, { "epoch": 0.4631229466877575, "grad_norm": 1.7435177759388891, "learning_rate": 5.836342976177264e-07, "loss": 0.2605, "step": 26643 }, { "epoch": 0.4631403292252603, "grad_norm": 1.3566546855563084, "learning_rate": 5.836065447342153e-07, "loss": 0.4799, "step": 26644 }, { "epoch": 0.4631577117627631, "grad_norm": 1.1961912967608277, "learning_rate": 5.835787915857101e-07, "loss": 0.2546, "step": 26645 }, { "epoch": 0.46317509430026593, "grad_norm": 1.6567707321910945, "learning_rate": 5.835510381722983e-07, "loss": 0.2006, "step": 26646 }, { "epoch": 0.46319247683776876, "grad_norm": 1.260978629081699, "learning_rate": 5.835232844940682e-07, "loss": 0.2598, "step": 26647 }, { "epoch": 0.4632098593752716, "grad_norm": 1.7944895761215676, "learning_rate": 5.834955305511077e-07, "loss": 0.2137, "step": 26648 }, { "epoch": 0.4632272419127744, "grad_norm": 1.7403723895313457, "learning_rate": 5.834677763435047e-07, "loss": 0.3268, "step": 26649 }, { "epoch": 0.46324462445027725, "grad_norm": 1.24074623577968, "learning_rate": 5.83440021871347e-07, "loss": 0.3825, "step": 26650 }, { "epoch": 0.4632620069877801, "grad_norm": 3.948643029235827, "learning_rate": 5.83412267134723e-07, "loss": 0.3727, "step": 26651 }, { "epoch": 0.4632793895252829, "grad_norm": 2.017470097280574, "learning_rate": 5.833845121337202e-07, "loss": 0.3013, "step": 26652 }, { "epoch": 0.46329677206278574, "grad_norm": 1.083869321985646, "learning_rate": 5.83356756868427e-07, "loss": 0.187, "step": 26653 }, { "epoch": 0.4633141546002886, "grad_norm": 1.7522466846942304, "learning_rate": 5.833290013389311e-07, "loss": 0.2116, "step": 26654 }, { "epoch": 0.46333153713779135, "grad_norm": 1.9707256674001397, "learning_rate": 5.833012455453206e-07, "loss": 0.369, "step": 26655 }, { "epoch": 0.4633489196752942, "grad_norm": 2.0905688706038172, "learning_rate": 5.832734894876833e-07, "loss": 0.3332, "step": 26656 }, { "epoch": 0.463366302212797, "grad_norm": 1.5147115025730074, "learning_rate": 5.832457331661074e-07, "loss": 0.2036, "step": 26657 }, { "epoch": 0.46338368475029984, "grad_norm": 1.688845084216806, "learning_rate": 5.832179765806807e-07, "loss": 0.2095, "step": 26658 }, { "epoch": 0.46340106728780267, "grad_norm": 4.647414751793059, "learning_rate": 5.831902197314913e-07, "loss": 0.2902, "step": 26659 }, { "epoch": 0.4634184498253055, "grad_norm": 3.76445714714321, "learning_rate": 5.831624626186269e-07, "loss": 0.3123, "step": 26660 }, { "epoch": 0.46343583236280833, "grad_norm": 1.9308136595332397, "learning_rate": 5.831347052421759e-07, "loss": 0.3327, "step": 26661 }, { "epoch": 0.46345321490031116, "grad_norm": 2.1349646476843858, "learning_rate": 5.831069476022261e-07, "loss": 0.2704, "step": 26662 }, { "epoch": 0.463470597437814, "grad_norm": 1.7908936056046383, "learning_rate": 5.830791896988654e-07, "loss": 0.3424, "step": 26663 }, { "epoch": 0.4634879799753168, "grad_norm": 1.4980187111952643, "learning_rate": 5.830514315321819e-07, "loss": 0.2621, "step": 26664 }, { "epoch": 0.4635053625128196, "grad_norm": 1.759430526089289, "learning_rate": 5.830236731022633e-07, "loss": 0.3813, "step": 26665 }, { "epoch": 0.4635227450503224, "grad_norm": 1.5261515904353655, "learning_rate": 5.829959144091979e-07, "loss": 0.2137, "step": 26666 }, { "epoch": 0.46354012758782526, "grad_norm": 1.4891021455972848, "learning_rate": 5.829681554530737e-07, "loss": 0.3072, "step": 26667 }, { "epoch": 0.4635575101253281, "grad_norm": 2.190439788313272, "learning_rate": 5.829403962339785e-07, "loss": 0.3133, "step": 26668 }, { "epoch": 0.4635748926628309, "grad_norm": 3.4611887795729337, "learning_rate": 5.829126367520002e-07, "loss": 0.4274, "step": 26669 }, { "epoch": 0.46359227520033375, "grad_norm": 1.2078715277330214, "learning_rate": 5.828848770072273e-07, "loss": 0.2862, "step": 26670 }, { "epoch": 0.4636096577378366, "grad_norm": 3.3600412239274005, "learning_rate": 5.828571169997469e-07, "loss": 0.3892, "step": 26671 }, { "epoch": 0.4636270402753394, "grad_norm": 2.4261059334348403, "learning_rate": 5.828293567296479e-07, "loss": 0.5018, "step": 26672 }, { "epoch": 0.46364442281284224, "grad_norm": 1.2601747348391938, "learning_rate": 5.828015961970177e-07, "loss": 0.3391, "step": 26673 }, { "epoch": 0.46366180535034507, "grad_norm": 3.405104691801296, "learning_rate": 5.827738354019446e-07, "loss": 0.3635, "step": 26674 }, { "epoch": 0.46367918788784784, "grad_norm": 2.5212552096496075, "learning_rate": 5.827460743445163e-07, "loss": 0.4263, "step": 26675 }, { "epoch": 0.4636965704253507, "grad_norm": 2.848723866218342, "learning_rate": 5.82718313024821e-07, "loss": 0.2718, "step": 26676 }, { "epoch": 0.4637139529628535, "grad_norm": 1.8313449958625845, "learning_rate": 5.826905514429467e-07, "loss": 0.1988, "step": 26677 }, { "epoch": 0.46373133550035633, "grad_norm": 1.949631014711816, "learning_rate": 5.826627895989814e-07, "loss": 0.3419, "step": 26678 }, { "epoch": 0.46374871803785916, "grad_norm": 1.5415781639212802, "learning_rate": 5.826350274930128e-07, "loss": 0.1862, "step": 26679 }, { "epoch": 0.463766100575362, "grad_norm": 2.3187221453773446, "learning_rate": 5.826072651251295e-07, "loss": 0.4226, "step": 26680 }, { "epoch": 0.4637834831128648, "grad_norm": 1.7226541807661457, "learning_rate": 5.825795024954188e-07, "loss": 0.2431, "step": 26681 }, { "epoch": 0.46380086565036766, "grad_norm": 2.4518079941865203, "learning_rate": 5.825517396039691e-07, "loss": 0.334, "step": 26682 }, { "epoch": 0.4638182481878705, "grad_norm": 1.0725728807756179, "learning_rate": 5.825239764508683e-07, "loss": 0.3254, "step": 26683 }, { "epoch": 0.4638356307253733, "grad_norm": 2.369025096167747, "learning_rate": 5.824962130362045e-07, "loss": 0.2194, "step": 26684 }, { "epoch": 0.4638530132628761, "grad_norm": 1.2904382333261535, "learning_rate": 5.824684493600654e-07, "loss": 0.3637, "step": 26685 }, { "epoch": 0.4638703958003789, "grad_norm": 2.4024853143176643, "learning_rate": 5.824406854225394e-07, "loss": 0.2991, "step": 26686 }, { "epoch": 0.46388777833788175, "grad_norm": 2.370938028452232, "learning_rate": 5.824129212237141e-07, "loss": 0.4262, "step": 26687 }, { "epoch": 0.4639051608753846, "grad_norm": 1.214550750924537, "learning_rate": 5.823851567636779e-07, "loss": 0.2595, "step": 26688 }, { "epoch": 0.4639225434128874, "grad_norm": 5.102448324939238, "learning_rate": 5.823573920425185e-07, "loss": 0.2532, "step": 26689 }, { "epoch": 0.46393992595039024, "grad_norm": 1.6518012366807941, "learning_rate": 5.823296270603239e-07, "loss": 0.2685, "step": 26690 }, { "epoch": 0.46395730848789307, "grad_norm": 1.5073503199366125, "learning_rate": 5.823018618171823e-07, "loss": 0.2311, "step": 26691 }, { "epoch": 0.4639746910253959, "grad_norm": 1.628219629116848, "learning_rate": 5.822740963131817e-07, "loss": 0.3273, "step": 26692 }, { "epoch": 0.46399207356289873, "grad_norm": 1.1994291713705707, "learning_rate": 5.822463305484098e-07, "loss": 0.345, "step": 26693 }, { "epoch": 0.46400945610040156, "grad_norm": 1.1776427705344152, "learning_rate": 5.822185645229551e-07, "loss": 0.351, "step": 26694 }, { "epoch": 0.46402683863790434, "grad_norm": 1.5296218601283835, "learning_rate": 5.821907982369049e-07, "loss": 0.3936, "step": 26695 }, { "epoch": 0.46404422117540717, "grad_norm": 1.8239877052853586, "learning_rate": 5.82163031690348e-07, "loss": 0.1936, "step": 26696 }, { "epoch": 0.46406160371291, "grad_norm": 1.5332439598274836, "learning_rate": 5.82135264883372e-07, "loss": 0.3219, "step": 26697 }, { "epoch": 0.46407898625041283, "grad_norm": 1.23689070905339, "learning_rate": 5.821074978160647e-07, "loss": 0.1895, "step": 26698 }, { "epoch": 0.46409636878791566, "grad_norm": 1.5202904025091144, "learning_rate": 5.820797304885146e-07, "loss": 0.2949, "step": 26699 }, { "epoch": 0.4641137513254185, "grad_norm": 1.9399948022264373, "learning_rate": 5.820519629008093e-07, "loss": 0.2756, "step": 26700 }, { "epoch": 0.4641311338629213, "grad_norm": 2.3476274657432255, "learning_rate": 5.820241950530371e-07, "loss": 0.2702, "step": 26701 }, { "epoch": 0.46414851640042415, "grad_norm": 1.5918422640539456, "learning_rate": 5.819964269452858e-07, "loss": 0.196, "step": 26702 }, { "epoch": 0.464165898937927, "grad_norm": 2.287423413984146, "learning_rate": 5.819686585776435e-07, "loss": 0.2861, "step": 26703 }, { "epoch": 0.4641832814754298, "grad_norm": 1.0396312071089386, "learning_rate": 5.81940889950198e-07, "loss": 0.3089, "step": 26704 }, { "epoch": 0.4642006640129326, "grad_norm": 4.100110365801915, "learning_rate": 5.819131210630379e-07, "loss": 0.4526, "step": 26705 }, { "epoch": 0.4642180465504354, "grad_norm": 1.9654915481873698, "learning_rate": 5.818853519162506e-07, "loss": 0.3345, "step": 26706 }, { "epoch": 0.46423542908793825, "grad_norm": 1.7752760449678697, "learning_rate": 5.818575825099245e-07, "loss": 0.3965, "step": 26707 }, { "epoch": 0.4642528116254411, "grad_norm": 2.2080305494101755, "learning_rate": 5.818298128441473e-07, "loss": 0.2971, "step": 26708 }, { "epoch": 0.4642701941629439, "grad_norm": 1.6021927006010943, "learning_rate": 5.818020429190072e-07, "loss": 0.3387, "step": 26709 }, { "epoch": 0.46428757670044674, "grad_norm": 2.249428820531725, "learning_rate": 5.817742727345922e-07, "loss": 0.2966, "step": 26710 }, { "epoch": 0.46430495923794957, "grad_norm": 3.0900800447780523, "learning_rate": 5.817465022909905e-07, "loss": 0.3128, "step": 26711 }, { "epoch": 0.4643223417754524, "grad_norm": 2.0369987361959856, "learning_rate": 5.817187315882897e-07, "loss": 0.4076, "step": 26712 }, { "epoch": 0.4643397243129552, "grad_norm": 3.0275439170715392, "learning_rate": 5.816909606265783e-07, "loss": 0.2607, "step": 26713 }, { "epoch": 0.46435710685045806, "grad_norm": 1.2635464565468948, "learning_rate": 5.816631894059439e-07, "loss": 0.2678, "step": 26714 }, { "epoch": 0.46437448938796083, "grad_norm": 1.7581423701247556, "learning_rate": 5.816354179264749e-07, "loss": 0.4858, "step": 26715 }, { "epoch": 0.46439187192546366, "grad_norm": 0.9400748761081332, "learning_rate": 5.816076461882589e-07, "loss": 0.1832, "step": 26716 }, { "epoch": 0.4644092544629665, "grad_norm": 1.2291254495319652, "learning_rate": 5.815798741913843e-07, "loss": 0.167, "step": 26717 }, { "epoch": 0.4644266370004693, "grad_norm": 2.1922952153972535, "learning_rate": 5.815521019359389e-07, "loss": 0.2779, "step": 26718 }, { "epoch": 0.46444401953797215, "grad_norm": 2.450172654164206, "learning_rate": 5.815243294220107e-07, "loss": 0.3733, "step": 26719 }, { "epoch": 0.464461402075475, "grad_norm": 1.9436251791957035, "learning_rate": 5.814965566496879e-07, "loss": 0.512, "step": 26720 }, { "epoch": 0.4644787846129778, "grad_norm": 4.653237771409232, "learning_rate": 5.814687836190585e-07, "loss": 0.3087, "step": 26721 }, { "epoch": 0.46449616715048064, "grad_norm": 1.5982715110796588, "learning_rate": 5.814410103302105e-07, "loss": 0.2726, "step": 26722 }, { "epoch": 0.4645135496879835, "grad_norm": 0.9671045918730236, "learning_rate": 5.814132367832319e-07, "loss": 0.1696, "step": 26723 }, { "epoch": 0.4645309322254863, "grad_norm": 1.1755917206661446, "learning_rate": 5.813854629782107e-07, "loss": 0.3195, "step": 26724 }, { "epoch": 0.4645483147629891, "grad_norm": 1.4674080401320673, "learning_rate": 5.813576889152349e-07, "loss": 0.2908, "step": 26725 }, { "epoch": 0.4645656973004919, "grad_norm": 1.6428425114147562, "learning_rate": 5.813299145943926e-07, "loss": 0.198, "step": 26726 }, { "epoch": 0.46458307983799474, "grad_norm": 1.3024572741844602, "learning_rate": 5.813021400157719e-07, "loss": 0.1671, "step": 26727 }, { "epoch": 0.46460046237549757, "grad_norm": 2.0430814704287696, "learning_rate": 5.812743651794608e-07, "loss": 0.1519, "step": 26728 }, { "epoch": 0.4646178449130004, "grad_norm": 1.2750541824286954, "learning_rate": 5.812465900855472e-07, "loss": 0.2094, "step": 26729 }, { "epoch": 0.46463522745050323, "grad_norm": 1.5523063866418991, "learning_rate": 5.812188147341195e-07, "loss": 0.2258, "step": 26730 }, { "epoch": 0.46465260998800606, "grad_norm": 1.5726873834375577, "learning_rate": 5.811910391252651e-07, "loss": 0.3413, "step": 26731 }, { "epoch": 0.4646699925255089, "grad_norm": 1.5411931667320282, "learning_rate": 5.811632632590726e-07, "loss": 0.3, "step": 26732 }, { "epoch": 0.4646873750630117, "grad_norm": 1.7138345557116117, "learning_rate": 5.811354871356297e-07, "loss": 0.2112, "step": 26733 }, { "epoch": 0.46470475760051455, "grad_norm": 1.6715913609046842, "learning_rate": 5.811077107550249e-07, "loss": 0.2945, "step": 26734 }, { "epoch": 0.4647221401380173, "grad_norm": 1.1471134971425565, "learning_rate": 5.810799341173455e-07, "loss": 0.1578, "step": 26735 }, { "epoch": 0.46473952267552016, "grad_norm": 1.0620817435113403, "learning_rate": 5.810521572226802e-07, "loss": 0.2257, "step": 26736 }, { "epoch": 0.464756905213023, "grad_norm": 1.5242890895444772, "learning_rate": 5.810243800711167e-07, "loss": 0.2548, "step": 26737 }, { "epoch": 0.4647742877505258, "grad_norm": 1.9229397891706372, "learning_rate": 5.809966026627432e-07, "loss": 0.2156, "step": 26738 }, { "epoch": 0.46479167028802865, "grad_norm": 1.3330610413079216, "learning_rate": 5.809688249976475e-07, "loss": 0.197, "step": 26739 }, { "epoch": 0.4648090528255315, "grad_norm": 1.6958600640511425, "learning_rate": 5.809410470759181e-07, "loss": 0.1985, "step": 26740 }, { "epoch": 0.4648264353630343, "grad_norm": 2.57987904999333, "learning_rate": 5.809132688976426e-07, "loss": 0.379, "step": 26741 }, { "epoch": 0.46484381790053714, "grad_norm": 1.7951568258351076, "learning_rate": 5.808854904629092e-07, "loss": 0.3541, "step": 26742 }, { "epoch": 0.46486120043803997, "grad_norm": 1.4475875916289715, "learning_rate": 5.808577117718061e-07, "loss": 0.273, "step": 26743 }, { "epoch": 0.46487858297554274, "grad_norm": 1.1840849935461222, "learning_rate": 5.80829932824421e-07, "loss": 0.2265, "step": 26744 }, { "epoch": 0.4648959655130456, "grad_norm": 1.9801259584967665, "learning_rate": 5.808021536208423e-07, "loss": 0.2943, "step": 26745 }, { "epoch": 0.4649133480505484, "grad_norm": 1.99161465417648, "learning_rate": 5.807743741611578e-07, "loss": 0.2638, "step": 26746 }, { "epoch": 0.46493073058805123, "grad_norm": 2.4477050295334477, "learning_rate": 5.807465944454557e-07, "loss": 0.3251, "step": 26747 }, { "epoch": 0.46494811312555406, "grad_norm": 2.3211590106616913, "learning_rate": 5.807188144738239e-07, "loss": 0.2496, "step": 26748 }, { "epoch": 0.4649654956630569, "grad_norm": 0.7012494774728902, "learning_rate": 5.806910342463507e-07, "loss": 0.2968, "step": 26749 }, { "epoch": 0.4649828782005597, "grad_norm": 1.5843794878862056, "learning_rate": 5.806632537631238e-07, "loss": 0.2357, "step": 26750 }, { "epoch": 0.46500026073806255, "grad_norm": 1.447307087695898, "learning_rate": 5.806354730242314e-07, "loss": 0.1808, "step": 26751 }, { "epoch": 0.4650176432755654, "grad_norm": 1.6557308259082006, "learning_rate": 5.806076920297618e-07, "loss": 0.3609, "step": 26752 }, { "epoch": 0.4650350258130682, "grad_norm": 1.9283301185990633, "learning_rate": 5.805799107798027e-07, "loss": 0.2027, "step": 26753 }, { "epoch": 0.465052408350571, "grad_norm": 1.489725248253482, "learning_rate": 5.805521292744423e-07, "loss": 0.192, "step": 26754 }, { "epoch": 0.4650697908880738, "grad_norm": 1.181947274127117, "learning_rate": 5.805243475137687e-07, "loss": 0.2867, "step": 26755 }, { "epoch": 0.46508717342557665, "grad_norm": 2.3610242764489255, "learning_rate": 5.8049656549787e-07, "loss": 0.2907, "step": 26756 }, { "epoch": 0.4651045559630795, "grad_norm": 1.5271613792832834, "learning_rate": 5.80468783226834e-07, "loss": 0.2093, "step": 26757 }, { "epoch": 0.4651219385005823, "grad_norm": 1.4926412778833442, "learning_rate": 5.804410007007488e-07, "loss": 0.2663, "step": 26758 }, { "epoch": 0.46513932103808514, "grad_norm": 1.627388358007812, "learning_rate": 5.804132179197029e-07, "loss": 0.311, "step": 26759 }, { "epoch": 0.46515670357558797, "grad_norm": 3.108171004123734, "learning_rate": 5.803854348837839e-07, "loss": 0.4351, "step": 26760 }, { "epoch": 0.4651740861130908, "grad_norm": 3.5481325285874172, "learning_rate": 5.803576515930801e-07, "loss": 0.2254, "step": 26761 }, { "epoch": 0.46519146865059363, "grad_norm": 2.2290406898641075, "learning_rate": 5.803298680476794e-07, "loss": 0.2014, "step": 26762 }, { "epoch": 0.46520885118809646, "grad_norm": 1.817495943631983, "learning_rate": 5.803020842476698e-07, "loss": 0.3034, "step": 26763 }, { "epoch": 0.46522623372559924, "grad_norm": 1.4801736905871197, "learning_rate": 5.802743001931395e-07, "loss": 0.3965, "step": 26764 }, { "epoch": 0.46524361626310207, "grad_norm": 1.485306558253211, "learning_rate": 5.802465158841768e-07, "loss": 0.2155, "step": 26765 }, { "epoch": 0.4652609988006049, "grad_norm": 1.7210191698749504, "learning_rate": 5.802187313208691e-07, "loss": 0.2069, "step": 26766 }, { "epoch": 0.4652783813381077, "grad_norm": 1.608659946968346, "learning_rate": 5.801909465033051e-07, "loss": 0.2156, "step": 26767 }, { "epoch": 0.46529576387561056, "grad_norm": 2.724008566742711, "learning_rate": 5.801631614315728e-07, "loss": 0.3392, "step": 26768 }, { "epoch": 0.4653131464131134, "grad_norm": 1.3839964884850537, "learning_rate": 5.801353761057599e-07, "loss": 0.2018, "step": 26769 }, { "epoch": 0.4653305289506162, "grad_norm": 4.569383182576875, "learning_rate": 5.801075905259546e-07, "loss": 0.3971, "step": 26770 }, { "epoch": 0.46534791148811905, "grad_norm": 1.6293143095785265, "learning_rate": 5.800798046922453e-07, "loss": 0.1981, "step": 26771 }, { "epoch": 0.4653652940256219, "grad_norm": 1.9360203341752944, "learning_rate": 5.800520186047195e-07, "loss": 0.3016, "step": 26772 }, { "epoch": 0.4653826765631247, "grad_norm": 3.734197619708022, "learning_rate": 5.800242322634657e-07, "loss": 0.2094, "step": 26773 }, { "epoch": 0.4654000591006275, "grad_norm": 1.5440330029406981, "learning_rate": 5.799964456685718e-07, "loss": 0.1858, "step": 26774 }, { "epoch": 0.4654174416381303, "grad_norm": 3.6255702821403655, "learning_rate": 5.799686588201261e-07, "loss": 0.3773, "step": 26775 }, { "epoch": 0.46543482417563314, "grad_norm": 3.4864346114242224, "learning_rate": 5.799408717182162e-07, "loss": 0.4005, "step": 26776 }, { "epoch": 0.465452206713136, "grad_norm": 1.8583711434313832, "learning_rate": 5.799130843629306e-07, "loss": 0.1808, "step": 26777 }, { "epoch": 0.4654695892506388, "grad_norm": 2.4788080962461327, "learning_rate": 5.798852967543572e-07, "loss": 0.261, "step": 26778 }, { "epoch": 0.46548697178814163, "grad_norm": 1.5785742864796022, "learning_rate": 5.798575088925842e-07, "loss": 0.3767, "step": 26779 }, { "epoch": 0.46550435432564446, "grad_norm": 2.0107911123132323, "learning_rate": 5.798297207776996e-07, "loss": 0.209, "step": 26780 }, { "epoch": 0.4655217368631473, "grad_norm": 1.3187643258376287, "learning_rate": 5.798019324097913e-07, "loss": 0.3136, "step": 26781 }, { "epoch": 0.4655391194006501, "grad_norm": 1.334271456095452, "learning_rate": 5.797741437889476e-07, "loss": 0.3902, "step": 26782 }, { "epoch": 0.46555650193815296, "grad_norm": 1.9827525506827481, "learning_rate": 5.797463549152566e-07, "loss": 0.3062, "step": 26783 }, { "epoch": 0.46557388447565573, "grad_norm": 5.083097085402375, "learning_rate": 5.797185657888061e-07, "loss": 0.2752, "step": 26784 }, { "epoch": 0.46559126701315856, "grad_norm": 1.1146755430173512, "learning_rate": 5.796907764096846e-07, "loss": 0.2435, "step": 26785 }, { "epoch": 0.4656086495506614, "grad_norm": 2.8779274460918645, "learning_rate": 5.796629867779798e-07, "loss": 0.3408, "step": 26786 }, { "epoch": 0.4656260320881642, "grad_norm": 2.5526483183741955, "learning_rate": 5.7963519689378e-07, "loss": 0.4493, "step": 26787 }, { "epoch": 0.46564341462566705, "grad_norm": 1.300097420987972, "learning_rate": 5.796074067571731e-07, "loss": 0.2889, "step": 26788 }, { "epoch": 0.4656607971631699, "grad_norm": 1.9136817241397879, "learning_rate": 5.795796163682472e-07, "loss": 0.2499, "step": 26789 }, { "epoch": 0.4656781797006727, "grad_norm": 1.8792997231465034, "learning_rate": 5.795518257270908e-07, "loss": 0.3015, "step": 26790 }, { "epoch": 0.46569556223817554, "grad_norm": 1.0480144814621655, "learning_rate": 5.795240348337915e-07, "loss": 0.3066, "step": 26791 }, { "epoch": 0.4657129447756784, "grad_norm": 7.087871532770876, "learning_rate": 5.794962436884374e-07, "loss": 0.2424, "step": 26792 }, { "epoch": 0.4657303273131812, "grad_norm": 1.3571345547444174, "learning_rate": 5.794684522911168e-07, "loss": 0.1759, "step": 26793 }, { "epoch": 0.465747709850684, "grad_norm": 1.5218413375989102, "learning_rate": 5.794406606419179e-07, "loss": 0.1925, "step": 26794 }, { "epoch": 0.4657650923881868, "grad_norm": 2.4114332988461378, "learning_rate": 5.794128687409283e-07, "loss": 0.3039, "step": 26795 }, { "epoch": 0.46578247492568964, "grad_norm": 1.6525230693374402, "learning_rate": 5.793850765882365e-07, "loss": 0.2337, "step": 26796 }, { "epoch": 0.46579985746319247, "grad_norm": 2.4852120029515317, "learning_rate": 5.793572841839306e-07, "loss": 0.3797, "step": 26797 }, { "epoch": 0.4658172400006953, "grad_norm": 1.289603321151926, "learning_rate": 5.793294915280985e-07, "loss": 0.2717, "step": 26798 }, { "epoch": 0.46583462253819813, "grad_norm": 2.2711823141317113, "learning_rate": 5.793016986208283e-07, "loss": 0.3148, "step": 26799 }, { "epoch": 0.46585200507570096, "grad_norm": 2.3206920893792953, "learning_rate": 5.792739054622082e-07, "loss": 0.2242, "step": 26800 }, { "epoch": 0.4658693876132038, "grad_norm": 2.079599126188846, "learning_rate": 5.792461120523261e-07, "loss": 0.3026, "step": 26801 }, { "epoch": 0.4658867701507066, "grad_norm": 2.1838518550292902, "learning_rate": 5.792183183912704e-07, "loss": 0.381, "step": 26802 }, { "epoch": 0.46590415268820945, "grad_norm": 2.430921275668216, "learning_rate": 5.79190524479129e-07, "loss": 0.2651, "step": 26803 }, { "epoch": 0.4659215352257122, "grad_norm": 1.5667959497901969, "learning_rate": 5.7916273031599e-07, "loss": 0.2443, "step": 26804 }, { "epoch": 0.46593891776321505, "grad_norm": 2.1170565175824905, "learning_rate": 5.791349359019413e-07, "loss": 0.3317, "step": 26805 }, { "epoch": 0.4659563003007179, "grad_norm": 1.2536388951414674, "learning_rate": 5.791071412370713e-07, "loss": 0.1837, "step": 26806 }, { "epoch": 0.4659736828382207, "grad_norm": 2.1024513445267208, "learning_rate": 5.790793463214683e-07, "loss": 0.2577, "step": 26807 }, { "epoch": 0.46599106537572355, "grad_norm": 1.2041537123697519, "learning_rate": 5.790515511552198e-07, "loss": 0.2712, "step": 26808 }, { "epoch": 0.4660084479132264, "grad_norm": 1.356639330437796, "learning_rate": 5.790237557384143e-07, "loss": 0.3059, "step": 26809 }, { "epoch": 0.4660258304507292, "grad_norm": 1.4040059113914671, "learning_rate": 5.789959600711397e-07, "loss": 0.2997, "step": 26810 }, { "epoch": 0.46604321298823204, "grad_norm": 1.9102254745004423, "learning_rate": 5.789681641534843e-07, "loss": 0.2688, "step": 26811 }, { "epoch": 0.46606059552573487, "grad_norm": 0.8788872965704694, "learning_rate": 5.78940367985536e-07, "loss": 0.2726, "step": 26812 }, { "epoch": 0.4660779780632377, "grad_norm": 0.9259943503628737, "learning_rate": 5.789125715673832e-07, "loss": 0.2329, "step": 26813 }, { "epoch": 0.46609536060074047, "grad_norm": 1.5396792183365395, "learning_rate": 5.788847748991135e-07, "loss": 0.3562, "step": 26814 }, { "epoch": 0.4661127431382433, "grad_norm": 2.4201267087353737, "learning_rate": 5.788569779808154e-07, "loss": 0.3554, "step": 26815 }, { "epoch": 0.46613012567574613, "grad_norm": 1.6780344261461664, "learning_rate": 5.788291808125771e-07, "loss": 0.3096, "step": 26816 }, { "epoch": 0.46614750821324896, "grad_norm": 2.3328824030072965, "learning_rate": 5.788013833944863e-07, "loss": 0.3478, "step": 26817 }, { "epoch": 0.4661648907507518, "grad_norm": 1.4219311799141596, "learning_rate": 5.787735857266313e-07, "loss": 0.214, "step": 26818 }, { "epoch": 0.4661822732882546, "grad_norm": 1.065372917587264, "learning_rate": 5.787457878091004e-07, "loss": 0.1778, "step": 26819 }, { "epoch": 0.46619965582575745, "grad_norm": 1.5314467271189938, "learning_rate": 5.787179896419814e-07, "loss": 0.1462, "step": 26820 }, { "epoch": 0.4662170383632603, "grad_norm": 1.025385727632242, "learning_rate": 5.786901912253626e-07, "loss": 0.2475, "step": 26821 }, { "epoch": 0.4662344209007631, "grad_norm": 2.3641267206070555, "learning_rate": 5.786623925593319e-07, "loss": 0.2505, "step": 26822 }, { "epoch": 0.46625180343826594, "grad_norm": 2.705999594826184, "learning_rate": 5.786345936439777e-07, "loss": 0.3249, "step": 26823 }, { "epoch": 0.4662691859757687, "grad_norm": 2.0240289137315868, "learning_rate": 5.786067944793879e-07, "loss": 0.2581, "step": 26824 }, { "epoch": 0.46628656851327155, "grad_norm": 1.630662917313121, "learning_rate": 5.785789950656508e-07, "loss": 0.2662, "step": 26825 }, { "epoch": 0.4663039510507744, "grad_norm": 1.3581016202120493, "learning_rate": 5.785511954028541e-07, "loss": 0.2701, "step": 26826 }, { "epoch": 0.4663213335882772, "grad_norm": 1.290664527133295, "learning_rate": 5.785233954910864e-07, "loss": 0.26, "step": 26827 }, { "epoch": 0.46633871612578004, "grad_norm": 2.054284984815644, "learning_rate": 5.784955953304356e-07, "loss": 0.3478, "step": 26828 }, { "epoch": 0.46635609866328287, "grad_norm": 1.250152228138056, "learning_rate": 5.784677949209897e-07, "loss": 0.183, "step": 26829 }, { "epoch": 0.4663734812007857, "grad_norm": 1.309318621314866, "learning_rate": 5.784399942628369e-07, "loss": 0.254, "step": 26830 }, { "epoch": 0.46639086373828853, "grad_norm": 1.1556252660499955, "learning_rate": 5.784121933560658e-07, "loss": 0.2199, "step": 26831 }, { "epoch": 0.46640824627579136, "grad_norm": 2.2065870883147176, "learning_rate": 5.783843922007635e-07, "loss": 0.3088, "step": 26832 }, { "epoch": 0.4664256288132942, "grad_norm": 1.2904476792255568, "learning_rate": 5.78356590797019e-07, "loss": 0.2502, "step": 26833 }, { "epoch": 0.46644301135079697, "grad_norm": 1.978691399773181, "learning_rate": 5.783287891449199e-07, "loss": 0.2982, "step": 26834 }, { "epoch": 0.4664603938882998, "grad_norm": 1.2135925368606195, "learning_rate": 5.783009872445547e-07, "loss": 0.2226, "step": 26835 }, { "epoch": 0.4664777764258026, "grad_norm": 1.1656305017304534, "learning_rate": 5.782731850960111e-07, "loss": 0.4164, "step": 26836 }, { "epoch": 0.46649515896330546, "grad_norm": 2.1431174078532083, "learning_rate": 5.782453826993779e-07, "loss": 0.3335, "step": 26837 }, { "epoch": 0.4665125415008083, "grad_norm": 1.1820282690563764, "learning_rate": 5.782175800547424e-07, "loss": 0.2125, "step": 26838 }, { "epoch": 0.4665299240383111, "grad_norm": 2.020243021927716, "learning_rate": 5.781897771621932e-07, "loss": 0.3121, "step": 26839 }, { "epoch": 0.46654730657581395, "grad_norm": 1.1447784233079306, "learning_rate": 5.781619740218183e-07, "loss": 0.1853, "step": 26840 }, { "epoch": 0.4665646891133168, "grad_norm": 1.4893154767001164, "learning_rate": 5.78134170633706e-07, "loss": 0.2376, "step": 26841 }, { "epoch": 0.4665820716508196, "grad_norm": 1.9957178474122015, "learning_rate": 5.781063669979441e-07, "loss": 0.3049, "step": 26842 }, { "epoch": 0.46659945418832244, "grad_norm": 1.546334712479968, "learning_rate": 5.780785631146209e-07, "loss": 0.2713, "step": 26843 }, { "epoch": 0.4666168367258252, "grad_norm": 1.8822377147113778, "learning_rate": 5.780507589838247e-07, "loss": 0.2848, "step": 26844 }, { "epoch": 0.46663421926332804, "grad_norm": 1.2031217369943394, "learning_rate": 5.780229546056432e-07, "loss": 0.2445, "step": 26845 }, { "epoch": 0.4666516018008309, "grad_norm": 2.3508121798040214, "learning_rate": 5.779951499801649e-07, "loss": 0.1971, "step": 26846 }, { "epoch": 0.4666689843383337, "grad_norm": 1.6683601898112956, "learning_rate": 5.779673451074778e-07, "loss": 0.2788, "step": 26847 }, { "epoch": 0.46668636687583653, "grad_norm": 1.8432775543496474, "learning_rate": 5.779395399876701e-07, "loss": 0.2339, "step": 26848 }, { "epoch": 0.46670374941333936, "grad_norm": 1.1923473936567894, "learning_rate": 5.779117346208296e-07, "loss": 0.3141, "step": 26849 }, { "epoch": 0.4667211319508422, "grad_norm": 1.6533375961368, "learning_rate": 5.77883929007045e-07, "loss": 0.3551, "step": 26850 }, { "epoch": 0.466738514488345, "grad_norm": 1.6045339273160908, "learning_rate": 5.77856123146404e-07, "loss": 0.3583, "step": 26851 }, { "epoch": 0.46675589702584785, "grad_norm": 3.2392901703924566, "learning_rate": 5.77828317038995e-07, "loss": 0.2889, "step": 26852 }, { "epoch": 0.4667732795633507, "grad_norm": 1.2763637253278481, "learning_rate": 5.778005106849057e-07, "loss": 0.2632, "step": 26853 }, { "epoch": 0.46679066210085346, "grad_norm": 0.8818597528659307, "learning_rate": 5.777727040842247e-07, "loss": 0.2435, "step": 26854 }, { "epoch": 0.4668080446383563, "grad_norm": 1.9130587228314964, "learning_rate": 5.777448972370399e-07, "loss": 0.3386, "step": 26855 }, { "epoch": 0.4668254271758591, "grad_norm": 1.5035448985563322, "learning_rate": 5.777170901434395e-07, "loss": 0.2244, "step": 26856 }, { "epoch": 0.46684280971336195, "grad_norm": 2.892371332161466, "learning_rate": 5.776892828035116e-07, "loss": 0.233, "step": 26857 }, { "epoch": 0.4668601922508648, "grad_norm": 2.1198063810441043, "learning_rate": 5.776614752173444e-07, "loss": 0.3685, "step": 26858 }, { "epoch": 0.4668775747883676, "grad_norm": 1.7197413100075414, "learning_rate": 5.77633667385026e-07, "loss": 0.1868, "step": 26859 }, { "epoch": 0.46689495732587044, "grad_norm": 2.6467906691567444, "learning_rate": 5.776058593066446e-07, "loss": 0.404, "step": 26860 }, { "epoch": 0.46691233986337327, "grad_norm": 1.8782198401193975, "learning_rate": 5.775780509822881e-07, "loss": 0.3164, "step": 26861 }, { "epoch": 0.4669297224008761, "grad_norm": 1.8118144600871962, "learning_rate": 5.77550242412045e-07, "loss": 0.2457, "step": 26862 }, { "epoch": 0.46694710493837893, "grad_norm": 1.473110503465948, "learning_rate": 5.775224335960031e-07, "loss": 0.2848, "step": 26863 }, { "epoch": 0.4669644874758817, "grad_norm": 2.292044852702435, "learning_rate": 5.774946245342508e-07, "loss": 0.3989, "step": 26864 }, { "epoch": 0.46698187001338454, "grad_norm": 0.7846082821457129, "learning_rate": 5.774668152268761e-07, "loss": 0.1912, "step": 26865 }, { "epoch": 0.46699925255088737, "grad_norm": 1.760212944655063, "learning_rate": 5.774390056739672e-07, "loss": 0.2755, "step": 26866 }, { "epoch": 0.4670166350883902, "grad_norm": 1.4944011288555588, "learning_rate": 5.774111958756122e-07, "loss": 0.1746, "step": 26867 }, { "epoch": 0.467034017625893, "grad_norm": 2.0141859448179193, "learning_rate": 5.773833858318993e-07, "loss": 0.438, "step": 26868 }, { "epoch": 0.46705140016339586, "grad_norm": 2.7518226663992764, "learning_rate": 5.773555755429165e-07, "loss": 0.4654, "step": 26869 }, { "epoch": 0.4670687827008987, "grad_norm": 2.59843907345693, "learning_rate": 5.773277650087523e-07, "loss": 0.3602, "step": 26870 }, { "epoch": 0.4670861652384015, "grad_norm": 2.0527886982804633, "learning_rate": 5.772999542294944e-07, "loss": 0.3684, "step": 26871 }, { "epoch": 0.46710354777590435, "grad_norm": 2.163067268093091, "learning_rate": 5.772721432052312e-07, "loss": 0.2069, "step": 26872 }, { "epoch": 0.4671209303134072, "grad_norm": 1.884149662066992, "learning_rate": 5.772443319360509e-07, "loss": 0.2711, "step": 26873 }, { "epoch": 0.46713831285090995, "grad_norm": 1.177896302565109, "learning_rate": 5.772165204220413e-07, "loss": 0.3391, "step": 26874 }, { "epoch": 0.4671556953884128, "grad_norm": 0.892070606534098, "learning_rate": 5.771887086632909e-07, "loss": 0.2889, "step": 26875 }, { "epoch": 0.4671730779259156, "grad_norm": 3.8429545141550845, "learning_rate": 5.77160896659888e-07, "loss": 0.2703, "step": 26876 }, { "epoch": 0.46719046046341844, "grad_norm": 2.4163215380800414, "learning_rate": 5.771330844119203e-07, "loss": 0.265, "step": 26877 }, { "epoch": 0.4672078430009213, "grad_norm": 2.4754436432733824, "learning_rate": 5.771052719194761e-07, "loss": 0.2575, "step": 26878 }, { "epoch": 0.4672252255384241, "grad_norm": 1.1373777707370225, "learning_rate": 5.770774591826438e-07, "loss": 0.2235, "step": 26879 }, { "epoch": 0.46724260807592694, "grad_norm": 1.357554982694306, "learning_rate": 5.770496462015111e-07, "loss": 0.4019, "step": 26880 }, { "epoch": 0.46725999061342977, "grad_norm": 1.8192003571061888, "learning_rate": 5.770218329761667e-07, "loss": 0.2179, "step": 26881 }, { "epoch": 0.4672773731509326, "grad_norm": 2.2718580734110065, "learning_rate": 5.769940195066982e-07, "loss": 0.3275, "step": 26882 }, { "epoch": 0.46729475568843537, "grad_norm": 1.415911941091758, "learning_rate": 5.769662057931942e-07, "loss": 0.281, "step": 26883 }, { "epoch": 0.4673121382259382, "grad_norm": 1.4839533168770052, "learning_rate": 5.769383918357427e-07, "loss": 0.1603, "step": 26884 }, { "epoch": 0.46732952076344103, "grad_norm": 2.3253615125812606, "learning_rate": 5.769105776344319e-07, "loss": 0.3813, "step": 26885 }, { "epoch": 0.46734690330094386, "grad_norm": 1.7916156674934263, "learning_rate": 5.768827631893495e-07, "loss": 0.2768, "step": 26886 }, { "epoch": 0.4673642858384467, "grad_norm": 2.3004915295300354, "learning_rate": 5.768549485005845e-07, "loss": 0.2845, "step": 26887 }, { "epoch": 0.4673816683759495, "grad_norm": 2.0511677042407626, "learning_rate": 5.768271335682244e-07, "loss": 0.2974, "step": 26888 }, { "epoch": 0.46739905091345235, "grad_norm": 2.429334722811212, "learning_rate": 5.767993183923577e-07, "loss": 0.154, "step": 26889 }, { "epoch": 0.4674164334509552, "grad_norm": 1.3116895872814345, "learning_rate": 5.767715029730723e-07, "loss": 0.5709, "step": 26890 }, { "epoch": 0.467433815988458, "grad_norm": 1.3114805711059108, "learning_rate": 5.767436873104565e-07, "loss": 0.1729, "step": 26891 }, { "epoch": 0.46745119852596084, "grad_norm": 1.5455583592157447, "learning_rate": 5.767158714045986e-07, "loss": 0.3111, "step": 26892 }, { "epoch": 0.4674685810634636, "grad_norm": 1.172644096646345, "learning_rate": 5.766880552555866e-07, "loss": 0.2319, "step": 26893 }, { "epoch": 0.46748596360096645, "grad_norm": 1.3109668865566637, "learning_rate": 5.766602388635086e-07, "loss": 0.3908, "step": 26894 }, { "epoch": 0.4675033461384693, "grad_norm": 1.4804867586639547, "learning_rate": 5.766324222284528e-07, "loss": 0.478, "step": 26895 }, { "epoch": 0.4675207286759721, "grad_norm": 1.2546722289269632, "learning_rate": 5.766046053505077e-07, "loss": 0.2098, "step": 26896 }, { "epoch": 0.46753811121347494, "grad_norm": 2.1922519160432397, "learning_rate": 5.76576788229761e-07, "loss": 0.3194, "step": 26897 }, { "epoch": 0.46755549375097777, "grad_norm": 2.1426159760958385, "learning_rate": 5.765489708663011e-07, "loss": 0.3232, "step": 26898 }, { "epoch": 0.4675728762884806, "grad_norm": 1.4419362276217595, "learning_rate": 5.765211532602161e-07, "loss": 0.2477, "step": 26899 }, { "epoch": 0.46759025882598343, "grad_norm": 1.2627261456213914, "learning_rate": 5.764933354115941e-07, "loss": 0.1956, "step": 26900 }, { "epoch": 0.46760764136348626, "grad_norm": 1.3828885344985526, "learning_rate": 5.764655173205236e-07, "loss": 0.2103, "step": 26901 }, { "epoch": 0.4676250239009891, "grad_norm": 1.6052495164465244, "learning_rate": 5.764376989870923e-07, "loss": 0.2719, "step": 26902 }, { "epoch": 0.46764240643849186, "grad_norm": 1.5466246349939634, "learning_rate": 5.764098804113887e-07, "loss": 0.1984, "step": 26903 }, { "epoch": 0.4676597889759947, "grad_norm": 1.7484615866606452, "learning_rate": 5.763820615935011e-07, "loss": 0.23, "step": 26904 }, { "epoch": 0.4676771715134975, "grad_norm": 1.2580212853010342, "learning_rate": 5.763542425335172e-07, "loss": 0.2327, "step": 26905 }, { "epoch": 0.46769455405100036, "grad_norm": 1.3850152206820956, "learning_rate": 5.763264232315255e-07, "loss": 0.217, "step": 26906 }, { "epoch": 0.4677119365885032, "grad_norm": 2.8513639545898495, "learning_rate": 5.76298603687614e-07, "loss": 0.2606, "step": 26907 }, { "epoch": 0.467729319126006, "grad_norm": 1.2032643795176843, "learning_rate": 5.762707839018711e-07, "loss": 0.1115, "step": 26908 }, { "epoch": 0.46774670166350885, "grad_norm": 1.3948340637033043, "learning_rate": 5.762429638743847e-07, "loss": 0.1974, "step": 26909 }, { "epoch": 0.4677640842010117, "grad_norm": 1.212213596386518, "learning_rate": 5.762151436052433e-07, "loss": 0.2242, "step": 26910 }, { "epoch": 0.4677814667385145, "grad_norm": 1.9192272353493043, "learning_rate": 5.761873230945348e-07, "loss": 0.5793, "step": 26911 }, { "epoch": 0.46779884927601734, "grad_norm": 2.0880222249939453, "learning_rate": 5.761595023423476e-07, "loss": 0.2378, "step": 26912 }, { "epoch": 0.4678162318135201, "grad_norm": 1.9121782529177433, "learning_rate": 5.761316813487695e-07, "loss": 0.2326, "step": 26913 }, { "epoch": 0.46783361435102294, "grad_norm": 1.3971605117517023, "learning_rate": 5.761038601138892e-07, "loss": 0.4535, "step": 26914 }, { "epoch": 0.46785099688852577, "grad_norm": 1.1937376911813598, "learning_rate": 5.760760386377945e-07, "loss": 0.2225, "step": 26915 }, { "epoch": 0.4678683794260286, "grad_norm": 3.6892556410418265, "learning_rate": 5.760482169205738e-07, "loss": 0.4614, "step": 26916 }, { "epoch": 0.46788576196353143, "grad_norm": 1.1491438164008998, "learning_rate": 5.760203949623151e-07, "loss": 0.2464, "step": 26917 }, { "epoch": 0.46790314450103426, "grad_norm": 1.1419889597477693, "learning_rate": 5.759925727631067e-07, "loss": 0.2243, "step": 26918 }, { "epoch": 0.4679205270385371, "grad_norm": 1.0011867660174543, "learning_rate": 5.759647503230367e-07, "loss": 0.2098, "step": 26919 }, { "epoch": 0.4679379095760399, "grad_norm": 2.520377889217922, "learning_rate": 5.759369276421935e-07, "loss": 0.3719, "step": 26920 }, { "epoch": 0.46795529211354275, "grad_norm": 2.602679452068042, "learning_rate": 5.75909104720665e-07, "loss": 0.3418, "step": 26921 }, { "epoch": 0.4679726746510456, "grad_norm": 1.5044416961398364, "learning_rate": 5.758812815585394e-07, "loss": 0.2447, "step": 26922 }, { "epoch": 0.46799005718854836, "grad_norm": 1.4984389037565515, "learning_rate": 5.758534581559052e-07, "loss": 0.4776, "step": 26923 }, { "epoch": 0.4680074397260512, "grad_norm": 2.0989336747380323, "learning_rate": 5.758256345128504e-07, "loss": 0.2735, "step": 26924 }, { "epoch": 0.468024822263554, "grad_norm": 1.602978322878712, "learning_rate": 5.75797810629463e-07, "loss": 0.2502, "step": 26925 }, { "epoch": 0.46804220480105685, "grad_norm": 1.2479202346689247, "learning_rate": 5.757699865058314e-07, "loss": 0.3901, "step": 26926 }, { "epoch": 0.4680595873385597, "grad_norm": 1.574050074489435, "learning_rate": 5.757421621420437e-07, "loss": 0.2729, "step": 26927 }, { "epoch": 0.4680769698760625, "grad_norm": 1.504120632598372, "learning_rate": 5.757143375381883e-07, "loss": 0.1413, "step": 26928 }, { "epoch": 0.46809435241356534, "grad_norm": 1.7363916720123644, "learning_rate": 5.756865126943532e-07, "loss": 0.2056, "step": 26929 }, { "epoch": 0.46811173495106817, "grad_norm": 1.739437295788733, "learning_rate": 5.756586876106264e-07, "loss": 0.2086, "step": 26930 }, { "epoch": 0.468129117488571, "grad_norm": 2.0122486063107843, "learning_rate": 5.756308622870966e-07, "loss": 0.2707, "step": 26931 }, { "epoch": 0.46814650002607383, "grad_norm": 2.9663487605085774, "learning_rate": 5.756030367238514e-07, "loss": 0.3248, "step": 26932 }, { "epoch": 0.4681638825635766, "grad_norm": 1.4313425652638556, "learning_rate": 5.755752109209797e-07, "loss": 0.1897, "step": 26933 }, { "epoch": 0.46818126510107944, "grad_norm": 4.277014245981488, "learning_rate": 5.755473848785689e-07, "loss": 0.2865, "step": 26934 }, { "epoch": 0.46819864763858227, "grad_norm": 1.6533235476055994, "learning_rate": 5.755195585967079e-07, "loss": 0.4415, "step": 26935 }, { "epoch": 0.4682160301760851, "grad_norm": 1.460982202589727, "learning_rate": 5.754917320754844e-07, "loss": 0.2721, "step": 26936 }, { "epoch": 0.4682334127135879, "grad_norm": 2.3244620225958528, "learning_rate": 5.754639053149869e-07, "loss": 0.2672, "step": 26937 }, { "epoch": 0.46825079525109076, "grad_norm": 1.2715864524369516, "learning_rate": 5.754360783153034e-07, "loss": 0.2498, "step": 26938 }, { "epoch": 0.4682681777885936, "grad_norm": 1.4406210414525178, "learning_rate": 5.754082510765223e-07, "loss": 0.1921, "step": 26939 }, { "epoch": 0.4682855603260964, "grad_norm": 1.6483268237483155, "learning_rate": 5.753804235987317e-07, "loss": 0.2807, "step": 26940 }, { "epoch": 0.46830294286359925, "grad_norm": 1.3786178727179925, "learning_rate": 5.753525958820196e-07, "loss": 0.241, "step": 26941 }, { "epoch": 0.4683203254011021, "grad_norm": 1.5289799399722028, "learning_rate": 5.753247679264746e-07, "loss": 0.2977, "step": 26942 }, { "epoch": 0.46833770793860485, "grad_norm": 1.4145362612367287, "learning_rate": 5.752969397321845e-07, "loss": 0.4065, "step": 26943 }, { "epoch": 0.4683550904761077, "grad_norm": 2.413548130767112, "learning_rate": 5.752691112992376e-07, "loss": 0.2817, "step": 26944 }, { "epoch": 0.4683724730136105, "grad_norm": 4.7305649767805775, "learning_rate": 5.752412826277225e-07, "loss": 0.3122, "step": 26945 }, { "epoch": 0.46838985555111334, "grad_norm": 1.5532947730802373, "learning_rate": 5.752134537177269e-07, "loss": 0.2377, "step": 26946 }, { "epoch": 0.4684072380886162, "grad_norm": 1.546330701527244, "learning_rate": 5.751856245693393e-07, "loss": 0.2466, "step": 26947 }, { "epoch": 0.468424620626119, "grad_norm": 2.0212498883858694, "learning_rate": 5.751577951826477e-07, "loss": 0.2427, "step": 26948 }, { "epoch": 0.46844200316362183, "grad_norm": 1.3413588009596902, "learning_rate": 5.751299655577406e-07, "loss": 0.2957, "step": 26949 }, { "epoch": 0.46845938570112466, "grad_norm": 1.210023347932399, "learning_rate": 5.751021356947057e-07, "loss": 0.2112, "step": 26950 }, { "epoch": 0.4684767682386275, "grad_norm": 1.613124378331602, "learning_rate": 5.750743055936318e-07, "loss": 0.208, "step": 26951 }, { "epoch": 0.4684941507761303, "grad_norm": 1.0045770602726114, "learning_rate": 5.750464752546068e-07, "loss": 0.2594, "step": 26952 }, { "epoch": 0.4685115333136331, "grad_norm": 1.9156336548970896, "learning_rate": 5.750186446777189e-07, "loss": 0.3527, "step": 26953 }, { "epoch": 0.46852891585113593, "grad_norm": 0.879112974913016, "learning_rate": 5.749908138630563e-07, "loss": 0.1746, "step": 26954 }, { "epoch": 0.46854629838863876, "grad_norm": 1.8936831988340177, "learning_rate": 5.749629828107073e-07, "loss": 0.2614, "step": 26955 }, { "epoch": 0.4685636809261416, "grad_norm": 1.1100442123773566, "learning_rate": 5.749351515207601e-07, "loss": 0.1686, "step": 26956 }, { "epoch": 0.4685810634636444, "grad_norm": 1.6897679151708693, "learning_rate": 5.74907319993303e-07, "loss": 0.2378, "step": 26957 }, { "epoch": 0.46859844600114725, "grad_norm": 1.5523686741624236, "learning_rate": 5.74879488228424e-07, "loss": 0.2661, "step": 26958 }, { "epoch": 0.4686158285386501, "grad_norm": 1.6757196149563893, "learning_rate": 5.748516562262113e-07, "loss": 0.2013, "step": 26959 }, { "epoch": 0.4686332110761529, "grad_norm": 2.448712520842166, "learning_rate": 5.748238239867535e-07, "loss": 0.3805, "step": 26960 }, { "epoch": 0.46865059361365574, "grad_norm": 1.9525752880560592, "learning_rate": 5.747959915101384e-07, "loss": 0.2539, "step": 26961 }, { "epoch": 0.46866797615115857, "grad_norm": 1.6935375231682324, "learning_rate": 5.747681587964544e-07, "loss": 0.2394, "step": 26962 }, { "epoch": 0.46868535868866135, "grad_norm": 1.3786140012745127, "learning_rate": 5.747403258457895e-07, "loss": 0.2563, "step": 26963 }, { "epoch": 0.4687027412261642, "grad_norm": 4.1105419102022225, "learning_rate": 5.747124926582325e-07, "loss": 0.1984, "step": 26964 }, { "epoch": 0.468720123763667, "grad_norm": 1.1580068556958858, "learning_rate": 5.746846592338708e-07, "loss": 0.2099, "step": 26965 }, { "epoch": 0.46873750630116984, "grad_norm": 1.548854750382798, "learning_rate": 5.746568255727933e-07, "loss": 0.427, "step": 26966 }, { "epoch": 0.46875488883867267, "grad_norm": 2.151455656426176, "learning_rate": 5.746289916750878e-07, "loss": 0.3206, "step": 26967 }, { "epoch": 0.4687722713761755, "grad_norm": 2.0229324740694974, "learning_rate": 5.746011575408429e-07, "loss": 0.3563, "step": 26968 }, { "epoch": 0.46878965391367833, "grad_norm": 1.5859138219041409, "learning_rate": 5.745733231701462e-07, "loss": 0.2096, "step": 26969 }, { "epoch": 0.46880703645118116, "grad_norm": 1.8010118063463314, "learning_rate": 5.745454885630869e-07, "loss": 0.1866, "step": 26970 }, { "epoch": 0.468824418988684, "grad_norm": 1.1356431774902964, "learning_rate": 5.745176537197521e-07, "loss": 0.3075, "step": 26971 }, { "epoch": 0.4688418015261868, "grad_norm": 1.7482725650165623, "learning_rate": 5.744898186402308e-07, "loss": 0.2487, "step": 26972 }, { "epoch": 0.4688591840636896, "grad_norm": 1.1115974203746388, "learning_rate": 5.744619833246109e-07, "loss": 0.3177, "step": 26973 }, { "epoch": 0.4688765666011924, "grad_norm": 1.126009081717694, "learning_rate": 5.744341477729809e-07, "loss": 0.257, "step": 26974 }, { "epoch": 0.46889394913869525, "grad_norm": 1.2781728031140134, "learning_rate": 5.744063119854287e-07, "loss": 0.2954, "step": 26975 }, { "epoch": 0.4689113316761981, "grad_norm": 1.085722703102691, "learning_rate": 5.743784759620427e-07, "loss": 0.2359, "step": 26976 }, { "epoch": 0.4689287142137009, "grad_norm": 1.124139301754486, "learning_rate": 5.743506397029112e-07, "loss": 0.2187, "step": 26977 }, { "epoch": 0.46894609675120374, "grad_norm": 2.4453039734256774, "learning_rate": 5.743228032081222e-07, "loss": 0.3091, "step": 26978 }, { "epoch": 0.4689634792887066, "grad_norm": 1.38153598900819, "learning_rate": 5.74294966477764e-07, "loss": 0.2252, "step": 26979 }, { "epoch": 0.4689808618262094, "grad_norm": 1.9608279852000507, "learning_rate": 5.74267129511925e-07, "loss": 0.1719, "step": 26980 }, { "epoch": 0.46899824436371224, "grad_norm": 1.5479351964886412, "learning_rate": 5.742392923106933e-07, "loss": 0.2279, "step": 26981 }, { "epoch": 0.46901562690121507, "grad_norm": 1.0682025683143266, "learning_rate": 5.742114548741573e-07, "loss": 0.1943, "step": 26982 }, { "epoch": 0.46903300943871784, "grad_norm": 1.9275239384949843, "learning_rate": 5.741836172024048e-07, "loss": 0.3096, "step": 26983 }, { "epoch": 0.46905039197622067, "grad_norm": 2.3717835035647905, "learning_rate": 5.741557792955243e-07, "loss": 0.3264, "step": 26984 }, { "epoch": 0.4690677745137235, "grad_norm": 1.0745574436548297, "learning_rate": 5.741279411536042e-07, "loss": 0.187, "step": 26985 }, { "epoch": 0.46908515705122633, "grad_norm": 1.4294321071127973, "learning_rate": 5.741001027767326e-07, "loss": 0.1874, "step": 26986 }, { "epoch": 0.46910253958872916, "grad_norm": 2.3016262910287075, "learning_rate": 5.740722641649977e-07, "loss": 0.3055, "step": 26987 }, { "epoch": 0.469119922126232, "grad_norm": 1.8937330732552413, "learning_rate": 5.740444253184876e-07, "loss": 0.241, "step": 26988 }, { "epoch": 0.4691373046637348, "grad_norm": 1.6824323630721214, "learning_rate": 5.740165862372908e-07, "loss": 0.2534, "step": 26989 }, { "epoch": 0.46915468720123765, "grad_norm": 2.476867758752621, "learning_rate": 5.739887469214955e-07, "loss": 0.279, "step": 26990 }, { "epoch": 0.4691720697387405, "grad_norm": 2.6745563802861883, "learning_rate": 5.739609073711897e-07, "loss": 0.3471, "step": 26991 }, { "epoch": 0.4691894522762433, "grad_norm": 2.2810781004908876, "learning_rate": 5.739330675864618e-07, "loss": 0.2619, "step": 26992 }, { "epoch": 0.4692068348137461, "grad_norm": 1.7470423501185715, "learning_rate": 5.739052275674002e-07, "loss": 0.2861, "step": 26993 }, { "epoch": 0.4692242173512489, "grad_norm": 1.9901427328707098, "learning_rate": 5.738773873140929e-07, "loss": 0.3311, "step": 26994 }, { "epoch": 0.46924159988875175, "grad_norm": 1.6694675695811216, "learning_rate": 5.738495468266282e-07, "loss": 0.2033, "step": 26995 }, { "epoch": 0.4692589824262546, "grad_norm": 1.2929189045230194, "learning_rate": 5.738217061050944e-07, "loss": 0.3456, "step": 26996 }, { "epoch": 0.4692763649637574, "grad_norm": 1.3663734344715097, "learning_rate": 5.737938651495798e-07, "loss": 0.2555, "step": 26997 }, { "epoch": 0.46929374750126024, "grad_norm": 0.962514684070748, "learning_rate": 5.737660239601721e-07, "loss": 0.2373, "step": 26998 }, { "epoch": 0.46931113003876307, "grad_norm": 1.1870966241421699, "learning_rate": 5.737381825369605e-07, "loss": 0.2818, "step": 26999 }, { "epoch": 0.4693285125762659, "grad_norm": 1.627293293799294, "learning_rate": 5.737103408800324e-07, "loss": 0.329, "step": 27000 }, { "epoch": 0.46934589511376873, "grad_norm": 1.7989447361089788, "learning_rate": 5.736824989894766e-07, "loss": 0.2265, "step": 27001 }, { "epoch": 0.46936327765127156, "grad_norm": 1.175882053568091, "learning_rate": 5.73654656865381e-07, "loss": 0.2545, "step": 27002 }, { "epoch": 0.46938066018877433, "grad_norm": 1.612964346992511, "learning_rate": 5.736268145078341e-07, "loss": 0.1935, "step": 27003 }, { "epoch": 0.46939804272627716, "grad_norm": 1.6261200392380002, "learning_rate": 5.735989719169238e-07, "loss": 0.1799, "step": 27004 }, { "epoch": 0.46941542526378, "grad_norm": 2.5119431614281433, "learning_rate": 5.735711290927389e-07, "loss": 0.3524, "step": 27005 }, { "epoch": 0.4694328078012828, "grad_norm": 1.1667049309262758, "learning_rate": 5.73543286035367e-07, "loss": 0.1983, "step": 27006 }, { "epoch": 0.46945019033878566, "grad_norm": 1.4152192380416937, "learning_rate": 5.735154427448969e-07, "loss": 0.2536, "step": 27007 }, { "epoch": 0.4694675728762885, "grad_norm": 1.1870811141026383, "learning_rate": 5.734875992214164e-07, "loss": 0.3938, "step": 27008 }, { "epoch": 0.4694849554137913, "grad_norm": 1.6415499578490347, "learning_rate": 5.734597554650141e-07, "loss": 0.3531, "step": 27009 }, { "epoch": 0.46950233795129415, "grad_norm": 1.4055913489243028, "learning_rate": 5.734319114757781e-07, "loss": 0.2309, "step": 27010 }, { "epoch": 0.469519720488797, "grad_norm": 1.1352485891261745, "learning_rate": 5.734040672537966e-07, "loss": 0.166, "step": 27011 }, { "epoch": 0.4695371030262998, "grad_norm": 4.374751827271638, "learning_rate": 5.733762227991581e-07, "loss": 0.8057, "step": 27012 }, { "epoch": 0.4695544855638026, "grad_norm": 1.4733810205787852, "learning_rate": 5.733483781119506e-07, "loss": 0.243, "step": 27013 }, { "epoch": 0.4695718681013054, "grad_norm": 1.4879499644056484, "learning_rate": 5.733205331922622e-07, "loss": 0.3148, "step": 27014 }, { "epoch": 0.46958925063880824, "grad_norm": 2.3420437757423165, "learning_rate": 5.732926880401816e-07, "loss": 0.2467, "step": 27015 }, { "epoch": 0.4696066331763111, "grad_norm": 1.86899747084955, "learning_rate": 5.732648426557969e-07, "loss": 0.2488, "step": 27016 }, { "epoch": 0.4696240157138139, "grad_norm": 1.7728661305230236, "learning_rate": 5.732369970391963e-07, "loss": 0.2729, "step": 27017 }, { "epoch": 0.46964139825131673, "grad_norm": 1.2870203456645095, "learning_rate": 5.73209151190468e-07, "loss": 0.1586, "step": 27018 }, { "epoch": 0.46965878078881956, "grad_norm": 3.3885151271408187, "learning_rate": 5.731813051097002e-07, "loss": 0.1966, "step": 27019 }, { "epoch": 0.4696761633263224, "grad_norm": 1.7704121414695406, "learning_rate": 5.731534587969815e-07, "loss": 0.3106, "step": 27020 }, { "epoch": 0.4696935458638252, "grad_norm": 1.6576160753682023, "learning_rate": 5.731256122523998e-07, "loss": 0.1812, "step": 27021 }, { "epoch": 0.469710928401328, "grad_norm": 1.457516498472338, "learning_rate": 5.730977654760436e-07, "loss": 0.2692, "step": 27022 }, { "epoch": 0.46972831093883083, "grad_norm": 2.0090199239287276, "learning_rate": 5.73069918468001e-07, "loss": 0.2721, "step": 27023 }, { "epoch": 0.46974569347633366, "grad_norm": 1.9085253952466996, "learning_rate": 5.730420712283604e-07, "loss": 0.3153, "step": 27024 }, { "epoch": 0.4697630760138365, "grad_norm": 2.299308697274579, "learning_rate": 5.730142237572097e-07, "loss": 0.4097, "step": 27025 }, { "epoch": 0.4697804585513393, "grad_norm": 1.3661211585362667, "learning_rate": 5.729863760546378e-07, "loss": 0.2238, "step": 27026 }, { "epoch": 0.46979784108884215, "grad_norm": 1.7203046040342944, "learning_rate": 5.729585281207326e-07, "loss": 0.2338, "step": 27027 }, { "epoch": 0.469815223626345, "grad_norm": 1.7629099422055552, "learning_rate": 5.729306799555822e-07, "loss": 0.2579, "step": 27028 }, { "epoch": 0.4698326061638478, "grad_norm": 1.7627863958120862, "learning_rate": 5.729028315592749e-07, "loss": 0.3602, "step": 27029 }, { "epoch": 0.46984998870135064, "grad_norm": 2.9610819482230486, "learning_rate": 5.728749829318995e-07, "loss": 0.2979, "step": 27030 }, { "epoch": 0.46986737123885347, "grad_norm": 1.718092768507697, "learning_rate": 5.728471340735437e-07, "loss": 0.197, "step": 27031 }, { "epoch": 0.46988475377635625, "grad_norm": 1.6693835619106496, "learning_rate": 5.72819284984296e-07, "loss": 0.2838, "step": 27032 }, { "epoch": 0.4699021363138591, "grad_norm": 1.427230208409386, "learning_rate": 5.727914356642445e-07, "loss": 0.2339, "step": 27033 }, { "epoch": 0.4699195188513619, "grad_norm": 1.5320923782129754, "learning_rate": 5.727635861134777e-07, "loss": 0.3967, "step": 27034 }, { "epoch": 0.46993690138886474, "grad_norm": 2.380758874276315, "learning_rate": 5.727357363320836e-07, "loss": 0.2234, "step": 27035 }, { "epoch": 0.46995428392636757, "grad_norm": 1.6228915916337938, "learning_rate": 5.727078863201508e-07, "loss": 0.3152, "step": 27036 }, { "epoch": 0.4699716664638704, "grad_norm": 1.884224505373245, "learning_rate": 5.726800360777674e-07, "loss": 0.3532, "step": 27037 }, { "epoch": 0.4699890490013732, "grad_norm": 0.9280004638635293, "learning_rate": 5.726521856050216e-07, "loss": 0.186, "step": 27038 }, { "epoch": 0.47000643153887606, "grad_norm": 1.034169531216416, "learning_rate": 5.726243349020017e-07, "loss": 0.1899, "step": 27039 }, { "epoch": 0.4700238140763789, "grad_norm": 3.407112814686495, "learning_rate": 5.72596483968796e-07, "loss": 0.2135, "step": 27040 }, { "epoch": 0.4700411966138817, "grad_norm": 1.180581280855816, "learning_rate": 5.725686328054929e-07, "loss": 0.2745, "step": 27041 }, { "epoch": 0.4700585791513845, "grad_norm": 2.1621497932277234, "learning_rate": 5.725407814121807e-07, "loss": 0.4491, "step": 27042 }, { "epoch": 0.4700759616888873, "grad_norm": 1.9644717722647416, "learning_rate": 5.725129297889473e-07, "loss": 0.1769, "step": 27043 }, { "epoch": 0.47009334422639015, "grad_norm": 0.995025396679843, "learning_rate": 5.724850779358812e-07, "loss": 0.1798, "step": 27044 }, { "epoch": 0.470110726763893, "grad_norm": 2.588374316471301, "learning_rate": 5.724572258530709e-07, "loss": 0.3119, "step": 27045 }, { "epoch": 0.4701281093013958, "grad_norm": 1.9551263273280408, "learning_rate": 5.724293735406044e-07, "loss": 0.4431, "step": 27046 }, { "epoch": 0.47014549183889864, "grad_norm": 2.147854119323297, "learning_rate": 5.7240152099857e-07, "loss": 0.2346, "step": 27047 }, { "epoch": 0.4701628743764015, "grad_norm": 1.9145148998180366, "learning_rate": 5.72373668227056e-07, "loss": 0.3736, "step": 27048 }, { "epoch": 0.4701802569139043, "grad_norm": 1.3359151645838534, "learning_rate": 5.723458152261509e-07, "loss": 0.2532, "step": 27049 }, { "epoch": 0.47019763945140713, "grad_norm": 1.699380424398418, "learning_rate": 5.723179619959427e-07, "loss": 0.2335, "step": 27050 }, { "epoch": 0.47021502198890996, "grad_norm": 1.5603133029829026, "learning_rate": 5.722901085365197e-07, "loss": 0.2613, "step": 27051 }, { "epoch": 0.47023240452641274, "grad_norm": 2.150910997572758, "learning_rate": 5.722622548479703e-07, "loss": 0.2746, "step": 27052 }, { "epoch": 0.47024978706391557, "grad_norm": 2.583419531041279, "learning_rate": 5.722344009303829e-07, "loss": 0.2539, "step": 27053 }, { "epoch": 0.4702671696014184, "grad_norm": 1.6529851259501767, "learning_rate": 5.722065467838455e-07, "loss": 0.2807, "step": 27054 }, { "epoch": 0.47028455213892123, "grad_norm": 1.5756399012908557, "learning_rate": 5.721786924084465e-07, "loss": 0.2732, "step": 27055 }, { "epoch": 0.47030193467642406, "grad_norm": 1.7923370399080782, "learning_rate": 5.721508378042743e-07, "loss": 0.3165, "step": 27056 }, { "epoch": 0.4703193172139269, "grad_norm": 2.4334409679659963, "learning_rate": 5.72122982971417e-07, "loss": 0.2064, "step": 27057 }, { "epoch": 0.4703366997514297, "grad_norm": 2.2206912384527504, "learning_rate": 5.720951279099628e-07, "loss": 0.3798, "step": 27058 }, { "epoch": 0.47035408228893255, "grad_norm": 1.4712973762161246, "learning_rate": 5.720672726200007e-07, "loss": 0.3753, "step": 27059 }, { "epoch": 0.4703714648264354, "grad_norm": 2.2722425192048172, "learning_rate": 5.720394171016178e-07, "loss": 0.2356, "step": 27060 }, { "epoch": 0.4703888473639382, "grad_norm": 1.0577802590983634, "learning_rate": 5.720115613549035e-07, "loss": 0.2857, "step": 27061 }, { "epoch": 0.470406229901441, "grad_norm": 1.5769113894357438, "learning_rate": 5.719837053799455e-07, "loss": 0.3505, "step": 27062 }, { "epoch": 0.4704236124389438, "grad_norm": 2.0173875289985874, "learning_rate": 5.719558491768321e-07, "loss": 0.2904, "step": 27063 }, { "epoch": 0.47044099497644665, "grad_norm": 2.172477519665697, "learning_rate": 5.719279927456519e-07, "loss": 0.2494, "step": 27064 }, { "epoch": 0.4704583775139495, "grad_norm": 1.30499925274826, "learning_rate": 5.71900136086493e-07, "loss": 0.338, "step": 27065 }, { "epoch": 0.4704757600514523, "grad_norm": 2.9352864559168763, "learning_rate": 5.718722791994434e-07, "loss": 0.4864, "step": 27066 }, { "epoch": 0.47049314258895514, "grad_norm": 1.184964275588984, "learning_rate": 5.718444220845921e-07, "loss": 0.3431, "step": 27067 }, { "epoch": 0.47051052512645797, "grad_norm": 1.2213054184339296, "learning_rate": 5.718165647420268e-07, "loss": 0.2447, "step": 27068 }, { "epoch": 0.4705279076639608, "grad_norm": 1.983933451478524, "learning_rate": 5.71788707171836e-07, "loss": 0.3233, "step": 27069 }, { "epoch": 0.47054529020146363, "grad_norm": 1.7285792956239068, "learning_rate": 5.717608493741077e-07, "loss": 0.208, "step": 27070 }, { "epoch": 0.47056267273896646, "grad_norm": 1.2529624830773354, "learning_rate": 5.717329913489309e-07, "loss": 0.2495, "step": 27071 }, { "epoch": 0.47058005527646923, "grad_norm": 1.9510073765413485, "learning_rate": 5.717051330963933e-07, "loss": 0.2957, "step": 27072 }, { "epoch": 0.47059743781397206, "grad_norm": 1.3844302089559886, "learning_rate": 5.716772746165832e-07, "loss": 0.2333, "step": 27073 }, { "epoch": 0.4706148203514749, "grad_norm": 1.589764968901329, "learning_rate": 5.716494159095891e-07, "loss": 0.2472, "step": 27074 }, { "epoch": 0.4706322028889777, "grad_norm": 1.6486215301682436, "learning_rate": 5.716215569754995e-07, "loss": 0.1853, "step": 27075 }, { "epoch": 0.47064958542648055, "grad_norm": 1.3958457361114545, "learning_rate": 5.715936978144023e-07, "loss": 0.1768, "step": 27076 }, { "epoch": 0.4706669679639834, "grad_norm": 1.5109125370307979, "learning_rate": 5.715658384263859e-07, "loss": 0.3344, "step": 27077 }, { "epoch": 0.4706843505014862, "grad_norm": 1.4411258467622343, "learning_rate": 5.71537978811539e-07, "loss": 0.3504, "step": 27078 }, { "epoch": 0.47070173303898905, "grad_norm": 2.6440594538590387, "learning_rate": 5.715101189699493e-07, "loss": 0.3731, "step": 27079 }, { "epoch": 0.4707191155764919, "grad_norm": 1.5882187855587047, "learning_rate": 5.714822589017054e-07, "loss": 0.1767, "step": 27080 }, { "epoch": 0.4707364981139947, "grad_norm": 1.7088776209764873, "learning_rate": 5.714543986068956e-07, "loss": 0.2661, "step": 27081 }, { "epoch": 0.4707538806514975, "grad_norm": 1.2365122550799077, "learning_rate": 5.714265380856081e-07, "loss": 0.2107, "step": 27082 }, { "epoch": 0.4707712631890003, "grad_norm": 1.0615924006987014, "learning_rate": 5.713986773379313e-07, "loss": 0.1412, "step": 27083 }, { "epoch": 0.47078864572650314, "grad_norm": 1.4151610453799248, "learning_rate": 5.713708163639537e-07, "loss": 0.3619, "step": 27084 }, { "epoch": 0.47080602826400597, "grad_norm": 2.5542553639008134, "learning_rate": 5.713429551637632e-07, "loss": 0.3202, "step": 27085 }, { "epoch": 0.4708234108015088, "grad_norm": 2.496573837283287, "learning_rate": 5.713150937374483e-07, "loss": 0.2567, "step": 27086 }, { "epoch": 0.47084079333901163, "grad_norm": 1.3849691810972435, "learning_rate": 5.712872320850975e-07, "loss": 0.2041, "step": 27087 }, { "epoch": 0.47085817587651446, "grad_norm": 1.7153210941240842, "learning_rate": 5.712593702067987e-07, "loss": 0.2304, "step": 27088 }, { "epoch": 0.4708755584140173, "grad_norm": 1.1631623273571985, "learning_rate": 5.712315081026405e-07, "loss": 0.2173, "step": 27089 }, { "epoch": 0.4708929409515201, "grad_norm": 1.4831159010988388, "learning_rate": 5.712036457727113e-07, "loss": 0.2173, "step": 27090 }, { "epoch": 0.47091032348902295, "grad_norm": 1.72488251936239, "learning_rate": 5.71175783217099e-07, "loss": 0.387, "step": 27091 }, { "epoch": 0.4709277060265257, "grad_norm": 1.8924061364724976, "learning_rate": 5.711479204358923e-07, "loss": 0.324, "step": 27092 }, { "epoch": 0.47094508856402856, "grad_norm": 1.255452342273168, "learning_rate": 5.711200574291792e-07, "loss": 0.3391, "step": 27093 }, { "epoch": 0.4709624711015314, "grad_norm": 3.344307998159487, "learning_rate": 5.710921941970484e-07, "loss": 0.34, "step": 27094 }, { "epoch": 0.4709798536390342, "grad_norm": 1.4092602951338131, "learning_rate": 5.710643307395879e-07, "loss": 0.3428, "step": 27095 }, { "epoch": 0.47099723617653705, "grad_norm": 1.904296444700125, "learning_rate": 5.710364670568863e-07, "loss": 0.2455, "step": 27096 }, { "epoch": 0.4710146187140399, "grad_norm": 1.0061141125354183, "learning_rate": 5.710086031490315e-07, "loss": 0.3317, "step": 27097 }, { "epoch": 0.4710320012515427, "grad_norm": 2.05847328178842, "learning_rate": 5.709807390161121e-07, "loss": 0.2975, "step": 27098 }, { "epoch": 0.47104938378904554, "grad_norm": 2.2133628039390385, "learning_rate": 5.709528746582164e-07, "loss": 0.2686, "step": 27099 }, { "epoch": 0.47106676632654837, "grad_norm": 2.021035746216807, "learning_rate": 5.709250100754326e-07, "loss": 0.2442, "step": 27100 }, { "epoch": 0.4710841488640512, "grad_norm": 2.069344201945611, "learning_rate": 5.708971452678493e-07, "loss": 0.2277, "step": 27101 }, { "epoch": 0.471101531401554, "grad_norm": 1.6245970124504217, "learning_rate": 5.708692802355543e-07, "loss": 0.2327, "step": 27102 }, { "epoch": 0.4711189139390568, "grad_norm": 1.3579604639019014, "learning_rate": 5.708414149786366e-07, "loss": 0.2283, "step": 27103 }, { "epoch": 0.47113629647655964, "grad_norm": 1.2574404091660818, "learning_rate": 5.708135494971839e-07, "loss": 0.2144, "step": 27104 }, { "epoch": 0.47115367901406247, "grad_norm": 1.2543647698165215, "learning_rate": 5.707856837912848e-07, "loss": 0.1571, "step": 27105 }, { "epoch": 0.4711710615515653, "grad_norm": 1.3085702348793518, "learning_rate": 5.707578178610277e-07, "loss": 0.2053, "step": 27106 }, { "epoch": 0.4711884440890681, "grad_norm": 1.1714333147219396, "learning_rate": 5.707299517065008e-07, "loss": 0.3188, "step": 27107 }, { "epoch": 0.47120582662657096, "grad_norm": 2.1017698697410725, "learning_rate": 5.707020853277923e-07, "loss": 0.2905, "step": 27108 }, { "epoch": 0.4712232091640738, "grad_norm": 1.115474072530375, "learning_rate": 5.706742187249909e-07, "loss": 0.306, "step": 27109 }, { "epoch": 0.4712405917015766, "grad_norm": 1.263519627943334, "learning_rate": 5.706463518981845e-07, "loss": 0.2694, "step": 27110 }, { "epoch": 0.47125797423907945, "grad_norm": 3.173020877808428, "learning_rate": 5.706184848474618e-07, "loss": 0.3805, "step": 27111 }, { "epoch": 0.4712753567765822, "grad_norm": 1.2486797182069345, "learning_rate": 5.705906175729107e-07, "loss": 0.3168, "step": 27112 }, { "epoch": 0.47129273931408505, "grad_norm": 2.342031812683583, "learning_rate": 5.705627500746201e-07, "loss": 0.3959, "step": 27113 }, { "epoch": 0.4713101218515879, "grad_norm": 1.9985376653012836, "learning_rate": 5.705348823526776e-07, "loss": 0.2314, "step": 27114 }, { "epoch": 0.4713275043890907, "grad_norm": 1.2246703302958826, "learning_rate": 5.705070144071722e-07, "loss": 0.3482, "step": 27115 }, { "epoch": 0.47134488692659354, "grad_norm": 1.6436664833397867, "learning_rate": 5.70479146238192e-07, "loss": 0.355, "step": 27116 }, { "epoch": 0.4713622694640964, "grad_norm": 1.7186250164793637, "learning_rate": 5.70451277845825e-07, "loss": 0.2831, "step": 27117 }, { "epoch": 0.4713796520015992, "grad_norm": 1.080231214245076, "learning_rate": 5.7042340923016e-07, "loss": 0.1675, "step": 27118 }, { "epoch": 0.47139703453910203, "grad_norm": 1.5403239409119278, "learning_rate": 5.703955403912853e-07, "loss": 0.2336, "step": 27119 }, { "epoch": 0.47141441707660486, "grad_norm": 2.016615264225638, "learning_rate": 5.703676713292886e-07, "loss": 0.2558, "step": 27120 }, { "epoch": 0.4714317996141077, "grad_norm": 2.0317666201762483, "learning_rate": 5.703398020442591e-07, "loss": 0.2334, "step": 27121 }, { "epoch": 0.47144918215161047, "grad_norm": 1.2029016702029611, "learning_rate": 5.703119325362847e-07, "loss": 0.3911, "step": 27122 }, { "epoch": 0.4714665646891133, "grad_norm": 1.7140635043715053, "learning_rate": 5.702840628054538e-07, "loss": 0.2746, "step": 27123 }, { "epoch": 0.47148394722661613, "grad_norm": 1.6228679770256451, "learning_rate": 5.702561928518544e-07, "loss": 0.277, "step": 27124 }, { "epoch": 0.47150132976411896, "grad_norm": 4.211510247102385, "learning_rate": 5.702283226755754e-07, "loss": 0.2239, "step": 27125 }, { "epoch": 0.4715187123016218, "grad_norm": 1.5969227279783382, "learning_rate": 5.702004522767049e-07, "loss": 0.2192, "step": 27126 }, { "epoch": 0.4715360948391246, "grad_norm": 1.261413183819597, "learning_rate": 5.701725816553311e-07, "loss": 0.1674, "step": 27127 }, { "epoch": 0.47155347737662745, "grad_norm": 1.2862494713923398, "learning_rate": 5.701447108115425e-07, "loss": 0.1964, "step": 27128 }, { "epoch": 0.4715708599141303, "grad_norm": 1.7510602320135578, "learning_rate": 5.701168397454274e-07, "loss": 0.2123, "step": 27129 }, { "epoch": 0.4715882424516331, "grad_norm": 1.7076402140246958, "learning_rate": 5.70088968457074e-07, "loss": 0.28, "step": 27130 }, { "epoch": 0.47160562498913594, "grad_norm": 0.9638626323054348, "learning_rate": 5.700610969465709e-07, "loss": 0.2775, "step": 27131 }, { "epoch": 0.4716230075266387, "grad_norm": 1.1763512914808572, "learning_rate": 5.700332252140063e-07, "loss": 0.1533, "step": 27132 }, { "epoch": 0.47164039006414155, "grad_norm": 1.7442182888521114, "learning_rate": 5.700053532594685e-07, "loss": 0.2181, "step": 27133 }, { "epoch": 0.4716577726016444, "grad_norm": 1.2550128212948044, "learning_rate": 5.699774810830459e-07, "loss": 0.207, "step": 27134 }, { "epoch": 0.4716751551391472, "grad_norm": 1.7981406070937986, "learning_rate": 5.699496086848269e-07, "loss": 0.2483, "step": 27135 }, { "epoch": 0.47169253767665004, "grad_norm": 1.4888944130651451, "learning_rate": 5.699217360648998e-07, "loss": 0.2655, "step": 27136 }, { "epoch": 0.47170992021415287, "grad_norm": 0.9199589085573509, "learning_rate": 5.698938632233526e-07, "loss": 0.5464, "step": 27137 }, { "epoch": 0.4717273027516557, "grad_norm": 1.3216434417260292, "learning_rate": 5.698659901602744e-07, "loss": 0.2137, "step": 27138 }, { "epoch": 0.4717446852891585, "grad_norm": 0.9598962561060216, "learning_rate": 5.698381168757529e-07, "loss": 0.1815, "step": 27139 }, { "epoch": 0.47176206782666136, "grad_norm": 9.169653229693271, "learning_rate": 5.698102433698766e-07, "loss": 0.3136, "step": 27140 }, { "epoch": 0.4717794503641642, "grad_norm": 5.318827073046733, "learning_rate": 5.697823696427339e-07, "loss": 0.4143, "step": 27141 }, { "epoch": 0.47179683290166696, "grad_norm": 1.705672039505925, "learning_rate": 5.697544956944132e-07, "loss": 0.3202, "step": 27142 }, { "epoch": 0.4718142154391698, "grad_norm": 2.09523908926845, "learning_rate": 5.697266215250028e-07, "loss": 0.234, "step": 27143 }, { "epoch": 0.4718315979766726, "grad_norm": 1.5575654974218978, "learning_rate": 5.696987471345911e-07, "loss": 0.2673, "step": 27144 }, { "epoch": 0.47184898051417545, "grad_norm": 1.473566233558622, "learning_rate": 5.696708725232663e-07, "loss": 0.1809, "step": 27145 }, { "epoch": 0.4718663630516783, "grad_norm": 1.3507765647564403, "learning_rate": 5.696429976911168e-07, "loss": 0.2222, "step": 27146 }, { "epoch": 0.4718837455891811, "grad_norm": 1.9114825484948739, "learning_rate": 5.696151226382311e-07, "loss": 0.2264, "step": 27147 }, { "epoch": 0.47190112812668394, "grad_norm": 2.030762987718959, "learning_rate": 5.695872473646975e-07, "loss": 0.4375, "step": 27148 }, { "epoch": 0.4719185106641868, "grad_norm": 1.187005564017008, "learning_rate": 5.69559371870604e-07, "loss": 0.1942, "step": 27149 }, { "epoch": 0.4719358932016896, "grad_norm": 1.4653946617055675, "learning_rate": 5.695314961560396e-07, "loss": 0.2318, "step": 27150 }, { "epoch": 0.47195327573919243, "grad_norm": 1.5185875535737543, "learning_rate": 5.69503620221092e-07, "loss": 0.1793, "step": 27151 }, { "epoch": 0.4719706582766952, "grad_norm": 1.3491447590433383, "learning_rate": 5.694757440658499e-07, "loss": 0.2492, "step": 27152 }, { "epoch": 0.47198804081419804, "grad_norm": 2.2227573236142297, "learning_rate": 5.694478676904016e-07, "loss": 0.2569, "step": 27153 }, { "epoch": 0.47200542335170087, "grad_norm": 1.9812681026211159, "learning_rate": 5.694199910948356e-07, "loss": 0.2618, "step": 27154 }, { "epoch": 0.4720228058892037, "grad_norm": 1.3811081702973214, "learning_rate": 5.693921142792401e-07, "loss": 0.1721, "step": 27155 }, { "epoch": 0.47204018842670653, "grad_norm": 1.063600977325549, "learning_rate": 5.693642372437034e-07, "loss": 0.1914, "step": 27156 }, { "epoch": 0.47205757096420936, "grad_norm": 1.2206958536191868, "learning_rate": 5.69336359988314e-07, "loss": 0.2856, "step": 27157 }, { "epoch": 0.4720749535017122, "grad_norm": 1.9372143097470982, "learning_rate": 5.693084825131601e-07, "loss": 0.3261, "step": 27158 }, { "epoch": 0.472092336039215, "grad_norm": 1.7164972747480562, "learning_rate": 5.692806048183301e-07, "loss": 0.2759, "step": 27159 }, { "epoch": 0.47210971857671785, "grad_norm": 1.5490776918845472, "learning_rate": 5.692527269039126e-07, "loss": 0.264, "step": 27160 }, { "epoch": 0.4721271011142206, "grad_norm": 1.7512631680327568, "learning_rate": 5.692248487699956e-07, "loss": 0.2527, "step": 27161 }, { "epoch": 0.47214448365172346, "grad_norm": 1.198355668544873, "learning_rate": 5.691969704166677e-07, "loss": 0.3355, "step": 27162 }, { "epoch": 0.4721618661892263, "grad_norm": 1.8348046822800994, "learning_rate": 5.691690918440173e-07, "loss": 0.1646, "step": 27163 }, { "epoch": 0.4721792487267291, "grad_norm": 8.358248680970357, "learning_rate": 5.691412130521323e-07, "loss": 0.5709, "step": 27164 }, { "epoch": 0.47219663126423195, "grad_norm": 5.651688000461567, "learning_rate": 5.691133340411017e-07, "loss": 0.492, "step": 27165 }, { "epoch": 0.4722140138017348, "grad_norm": 1.77810467568183, "learning_rate": 5.690854548110137e-07, "loss": 0.2347, "step": 27166 }, { "epoch": 0.4722313963392376, "grad_norm": 1.6016809894151451, "learning_rate": 5.690575753619563e-07, "loss": 0.2636, "step": 27167 }, { "epoch": 0.47224877887674044, "grad_norm": 2.4413146609771874, "learning_rate": 5.690296956940182e-07, "loss": 0.3269, "step": 27168 }, { "epoch": 0.47226616141424327, "grad_norm": 1.2962496887733352, "learning_rate": 5.690018158072877e-07, "loss": 0.1925, "step": 27169 }, { "epoch": 0.4722835439517461, "grad_norm": 1.4951356422238926, "learning_rate": 5.689739357018531e-07, "loss": 0.2074, "step": 27170 }, { "epoch": 0.4723009264892489, "grad_norm": 1.3026997293215665, "learning_rate": 5.689460553778027e-07, "loss": 0.3299, "step": 27171 }, { "epoch": 0.4723183090267517, "grad_norm": 1.2955543833393965, "learning_rate": 5.689181748352251e-07, "loss": 0.3112, "step": 27172 }, { "epoch": 0.47233569156425453, "grad_norm": 0.8691093474181453, "learning_rate": 5.688902940742087e-07, "loss": 0.4824, "step": 27173 }, { "epoch": 0.47235307410175736, "grad_norm": 1.9320986702170464, "learning_rate": 5.688624130948414e-07, "loss": 0.2128, "step": 27174 }, { "epoch": 0.4723704566392602, "grad_norm": 1.7852810966973833, "learning_rate": 5.68834531897212e-07, "loss": 0.3983, "step": 27175 }, { "epoch": 0.472387839176763, "grad_norm": 1.975653692135405, "learning_rate": 5.688066504814089e-07, "loss": 0.2268, "step": 27176 }, { "epoch": 0.47240522171426585, "grad_norm": 2.197326327642761, "learning_rate": 5.687787688475201e-07, "loss": 0.2855, "step": 27177 }, { "epoch": 0.4724226042517687, "grad_norm": 1.3153648378889167, "learning_rate": 5.687508869956342e-07, "loss": 0.1945, "step": 27178 }, { "epoch": 0.4724399867892715, "grad_norm": 1.2156289527514363, "learning_rate": 5.687230049258398e-07, "loss": 0.3277, "step": 27179 }, { "epoch": 0.47245736932677435, "grad_norm": 1.755002499447355, "learning_rate": 5.686951226382249e-07, "loss": 0.223, "step": 27180 }, { "epoch": 0.4724747518642771, "grad_norm": 1.3055668006451222, "learning_rate": 5.68667240132878e-07, "loss": 0.3004, "step": 27181 }, { "epoch": 0.47249213440177995, "grad_norm": 1.6862460842995797, "learning_rate": 5.686393574098876e-07, "loss": 0.3491, "step": 27182 }, { "epoch": 0.4725095169392828, "grad_norm": 1.4342352046954416, "learning_rate": 5.686114744693419e-07, "loss": 0.2818, "step": 27183 }, { "epoch": 0.4725268994767856, "grad_norm": 2.628017728828638, "learning_rate": 5.685835913113293e-07, "loss": 0.2503, "step": 27184 }, { "epoch": 0.47254428201428844, "grad_norm": 0.8820502490489203, "learning_rate": 5.685557079359383e-07, "loss": 0.1613, "step": 27185 }, { "epoch": 0.47256166455179127, "grad_norm": 1.4454774688620693, "learning_rate": 5.685278243432571e-07, "loss": 0.2277, "step": 27186 }, { "epoch": 0.4725790470892941, "grad_norm": 2.9122667581302877, "learning_rate": 5.684999405333742e-07, "loss": 0.1853, "step": 27187 }, { "epoch": 0.47259642962679693, "grad_norm": 2.0290128096376336, "learning_rate": 5.68472056506378e-07, "loss": 0.2596, "step": 27188 }, { "epoch": 0.47261381216429976, "grad_norm": 1.965314654380487, "learning_rate": 5.684441722623568e-07, "loss": 0.2097, "step": 27189 }, { "epoch": 0.4726311947018026, "grad_norm": 0.9675041483140275, "learning_rate": 5.68416287801399e-07, "loss": 0.2628, "step": 27190 }, { "epoch": 0.47264857723930537, "grad_norm": 1.81624726796488, "learning_rate": 5.68388403123593e-07, "loss": 0.2759, "step": 27191 }, { "epoch": 0.4726659597768082, "grad_norm": 1.8901156578550375, "learning_rate": 5.683605182290272e-07, "loss": 0.2782, "step": 27192 }, { "epoch": 0.47268334231431103, "grad_norm": 2.136483460818091, "learning_rate": 5.683326331177899e-07, "loss": 0.3443, "step": 27193 }, { "epoch": 0.47270072485181386, "grad_norm": 1.315238742074667, "learning_rate": 5.683047477899696e-07, "loss": 0.2376, "step": 27194 }, { "epoch": 0.4727181073893167, "grad_norm": 1.2580317247882324, "learning_rate": 5.682768622456548e-07, "loss": 0.2618, "step": 27195 }, { "epoch": 0.4727354899268195, "grad_norm": 1.5060684448293962, "learning_rate": 5.682489764849335e-07, "loss": 0.307, "step": 27196 }, { "epoch": 0.47275287246432235, "grad_norm": 7.14232125570778, "learning_rate": 5.682210905078943e-07, "loss": 0.3319, "step": 27197 }, { "epoch": 0.4727702550018252, "grad_norm": 1.2827723931053219, "learning_rate": 5.681932043146257e-07, "loss": 0.3, "step": 27198 }, { "epoch": 0.472787637539328, "grad_norm": 1.811889602701432, "learning_rate": 5.681653179052157e-07, "loss": 0.2823, "step": 27199 }, { "epoch": 0.47280502007683084, "grad_norm": 1.3376516933180684, "learning_rate": 5.681374312797532e-07, "loss": 0.2192, "step": 27200 }, { "epoch": 0.4728224026143336, "grad_norm": 1.0448306715505398, "learning_rate": 5.681095444383263e-07, "loss": 0.2058, "step": 27201 }, { "epoch": 0.47283978515183644, "grad_norm": 1.424816695222842, "learning_rate": 5.680816573810234e-07, "loss": 0.168, "step": 27202 }, { "epoch": 0.4728571676893393, "grad_norm": 1.607037091478346, "learning_rate": 5.680537701079329e-07, "loss": 0.2419, "step": 27203 }, { "epoch": 0.4728745502268421, "grad_norm": 1.9975871333261164, "learning_rate": 5.680258826191434e-07, "loss": 0.2787, "step": 27204 }, { "epoch": 0.47289193276434494, "grad_norm": 2.0900820640569875, "learning_rate": 5.679979949147429e-07, "loss": 0.243, "step": 27205 }, { "epoch": 0.47290931530184777, "grad_norm": 1.7775931339046753, "learning_rate": 5.6797010699482e-07, "loss": 0.272, "step": 27206 }, { "epoch": 0.4729266978393506, "grad_norm": 1.319944156568024, "learning_rate": 5.679422188594631e-07, "loss": 0.3123, "step": 27207 }, { "epoch": 0.4729440803768534, "grad_norm": 2.3710679780875736, "learning_rate": 5.679143305087607e-07, "loss": 0.3077, "step": 27208 }, { "epoch": 0.47296146291435626, "grad_norm": 2.0892291869715267, "learning_rate": 5.678864419428009e-07, "loss": 0.2105, "step": 27209 }, { "epoch": 0.4729788454518591, "grad_norm": 1.4499948289214803, "learning_rate": 5.678585531616724e-07, "loss": 0.3689, "step": 27210 }, { "epoch": 0.47299622798936186, "grad_norm": 1.5982326556335387, "learning_rate": 5.678306641654633e-07, "loss": 0.2839, "step": 27211 }, { "epoch": 0.4730136105268647, "grad_norm": 1.303305470015023, "learning_rate": 5.678027749542623e-07, "loss": 0.3388, "step": 27212 }, { "epoch": 0.4730309930643675, "grad_norm": 2.425379620900524, "learning_rate": 5.677748855281575e-07, "loss": 0.2869, "step": 27213 }, { "epoch": 0.47304837560187035, "grad_norm": 2.4541181132296384, "learning_rate": 5.677469958872375e-07, "loss": 0.2307, "step": 27214 }, { "epoch": 0.4730657581393732, "grad_norm": 1.0001629235339378, "learning_rate": 5.677191060315908e-07, "loss": 0.1507, "step": 27215 }, { "epoch": 0.473083140676876, "grad_norm": 1.3213183152131227, "learning_rate": 5.676912159613055e-07, "loss": 0.3594, "step": 27216 }, { "epoch": 0.47310052321437884, "grad_norm": 1.4306478109185512, "learning_rate": 5.676633256764701e-07, "loss": 0.3282, "step": 27217 }, { "epoch": 0.4731179057518817, "grad_norm": 2.4932573048557916, "learning_rate": 5.67635435177173e-07, "loss": 0.4463, "step": 27218 }, { "epoch": 0.4731352882893845, "grad_norm": 2.216572301220561, "learning_rate": 5.676075444635029e-07, "loss": 0.3429, "step": 27219 }, { "epoch": 0.47315267082688733, "grad_norm": 3.877171463686115, "learning_rate": 5.675796535355477e-07, "loss": 0.6436, "step": 27220 }, { "epoch": 0.4731700533643901, "grad_norm": 1.0869848913161204, "learning_rate": 5.675517623933962e-07, "loss": 0.2602, "step": 27221 }, { "epoch": 0.47318743590189294, "grad_norm": 1.2550272464749879, "learning_rate": 5.675238710371365e-07, "loss": 0.2612, "step": 27222 }, { "epoch": 0.47320481843939577, "grad_norm": 1.4239305260742503, "learning_rate": 5.674959794668573e-07, "loss": 0.1879, "step": 27223 }, { "epoch": 0.4732222009768986, "grad_norm": 3.5829974400503644, "learning_rate": 5.674680876826468e-07, "loss": 0.5249, "step": 27224 }, { "epoch": 0.47323958351440143, "grad_norm": 1.7040745016281988, "learning_rate": 5.674401956845934e-07, "loss": 0.3098, "step": 27225 }, { "epoch": 0.47325696605190426, "grad_norm": 1.5087381220789222, "learning_rate": 5.674123034727855e-07, "loss": 0.2275, "step": 27226 }, { "epoch": 0.4732743485894071, "grad_norm": 1.1339688350038566, "learning_rate": 5.673844110473117e-07, "loss": 0.3714, "step": 27227 }, { "epoch": 0.4732917311269099, "grad_norm": 2.0190392840049634, "learning_rate": 5.673565184082601e-07, "loss": 0.2274, "step": 27228 }, { "epoch": 0.47330911366441275, "grad_norm": 1.343195454800116, "learning_rate": 5.673286255557196e-07, "loss": 0.2783, "step": 27229 }, { "epoch": 0.4733264962019156, "grad_norm": 2.203089020587572, "learning_rate": 5.673007324897779e-07, "loss": 0.3505, "step": 27230 }, { "epoch": 0.47334387873941836, "grad_norm": 1.61535321716328, "learning_rate": 5.67272839210524e-07, "loss": 0.2505, "step": 27231 }, { "epoch": 0.4733612612769212, "grad_norm": 1.7272008644165389, "learning_rate": 5.672449457180461e-07, "loss": 0.2069, "step": 27232 }, { "epoch": 0.473378643814424, "grad_norm": 1.1854297760453523, "learning_rate": 5.672170520124326e-07, "loss": 0.2179, "step": 27233 }, { "epoch": 0.47339602635192685, "grad_norm": 1.8943122356023294, "learning_rate": 5.671891580937718e-07, "loss": 0.3938, "step": 27234 }, { "epoch": 0.4734134088894297, "grad_norm": 1.873275381150366, "learning_rate": 5.671612639621525e-07, "loss": 0.5134, "step": 27235 }, { "epoch": 0.4734307914269325, "grad_norm": 2.2847910029132223, "learning_rate": 5.671333696176626e-07, "loss": 0.2631, "step": 27236 }, { "epoch": 0.47344817396443534, "grad_norm": 1.5340869765868164, "learning_rate": 5.671054750603909e-07, "loss": 0.2118, "step": 27237 }, { "epoch": 0.47346555650193817, "grad_norm": 1.8790033511735078, "learning_rate": 5.670775802904255e-07, "loss": 0.5268, "step": 27238 }, { "epoch": 0.473482939039441, "grad_norm": 1.4688325775683173, "learning_rate": 5.670496853078552e-07, "loss": 0.292, "step": 27239 }, { "epoch": 0.47350032157694383, "grad_norm": 1.1185494924656536, "learning_rate": 5.67021790112768e-07, "loss": 0.3345, "step": 27240 }, { "epoch": 0.4735177041144466, "grad_norm": 1.8333702676427164, "learning_rate": 5.669938947052527e-07, "loss": 0.3981, "step": 27241 }, { "epoch": 0.47353508665194943, "grad_norm": 2.701668335911127, "learning_rate": 5.669659990853975e-07, "loss": 0.2695, "step": 27242 }, { "epoch": 0.47355246918945226, "grad_norm": 1.7536369580443958, "learning_rate": 5.669381032532908e-07, "loss": 0.3559, "step": 27243 }, { "epoch": 0.4735698517269551, "grad_norm": 2.3398186640210534, "learning_rate": 5.669102072090208e-07, "loss": 0.4023, "step": 27244 }, { "epoch": 0.4735872342644579, "grad_norm": 1.3022652332789544, "learning_rate": 5.668823109526766e-07, "loss": 0.2048, "step": 27245 }, { "epoch": 0.47360461680196075, "grad_norm": 0.7558780159705231, "learning_rate": 5.668544144843459e-07, "loss": 0.2595, "step": 27246 }, { "epoch": 0.4736219993394636, "grad_norm": 1.4900398391803704, "learning_rate": 5.668265178041176e-07, "loss": 0.1699, "step": 27247 }, { "epoch": 0.4736393818769664, "grad_norm": 2.5646524896285112, "learning_rate": 5.667986209120799e-07, "loss": 0.283, "step": 27248 }, { "epoch": 0.47365676441446924, "grad_norm": 1.0049503112481906, "learning_rate": 5.667707238083211e-07, "loss": 0.269, "step": 27249 }, { "epoch": 0.4736741469519721, "grad_norm": 2.1729445235260183, "learning_rate": 5.667428264929299e-07, "loss": 0.3965, "step": 27250 }, { "epoch": 0.47369152948947485, "grad_norm": 1.249089054465225, "learning_rate": 5.667149289659948e-07, "loss": 0.2219, "step": 27251 }, { "epoch": 0.4737089120269777, "grad_norm": 2.2229756473796036, "learning_rate": 5.666870312276038e-07, "loss": 0.1275, "step": 27252 }, { "epoch": 0.4737262945644805, "grad_norm": 2.583912075988855, "learning_rate": 5.666591332778454e-07, "loss": 0.2823, "step": 27253 }, { "epoch": 0.47374367710198334, "grad_norm": 2.559220240496619, "learning_rate": 5.666312351168083e-07, "loss": 0.318, "step": 27254 }, { "epoch": 0.47376105963948617, "grad_norm": 1.7678269839273193, "learning_rate": 5.66603336744581e-07, "loss": 0.1765, "step": 27255 }, { "epoch": 0.473778442176989, "grad_norm": 5.628240193193475, "learning_rate": 5.665754381612515e-07, "loss": 0.238, "step": 27256 }, { "epoch": 0.47379582471449183, "grad_norm": 2.1521974918255027, "learning_rate": 5.665475393669084e-07, "loss": 0.2219, "step": 27257 }, { "epoch": 0.47381320725199466, "grad_norm": 1.1113846393092637, "learning_rate": 5.665196403616404e-07, "loss": 0.1543, "step": 27258 }, { "epoch": 0.4738305897894975, "grad_norm": 11.212671299839672, "learning_rate": 5.664917411455354e-07, "loss": 0.4702, "step": 27259 }, { "epoch": 0.4738479723270003, "grad_norm": 1.8143761979863795, "learning_rate": 5.664638417186824e-07, "loss": 0.2657, "step": 27260 }, { "epoch": 0.4738653548645031, "grad_norm": 2.4620842643747056, "learning_rate": 5.664359420811694e-07, "loss": 0.2606, "step": 27261 }, { "epoch": 0.4738827374020059, "grad_norm": 1.2625638109364798, "learning_rate": 5.66408042233085e-07, "loss": 0.191, "step": 27262 }, { "epoch": 0.47390011993950876, "grad_norm": 1.100471625795334, "learning_rate": 5.663801421745175e-07, "loss": 0.1769, "step": 27263 }, { "epoch": 0.4739175024770116, "grad_norm": 2.4403054041574137, "learning_rate": 5.663522419055558e-07, "loss": 0.2445, "step": 27264 }, { "epoch": 0.4739348850145144, "grad_norm": 1.117963019461427, "learning_rate": 5.663243414262876e-07, "loss": 0.4475, "step": 27265 }, { "epoch": 0.47395226755201725, "grad_norm": 2.367077922818268, "learning_rate": 5.66296440736802e-07, "loss": 0.259, "step": 27266 }, { "epoch": 0.4739696500895201, "grad_norm": 1.0753837369542367, "learning_rate": 5.662685398371869e-07, "loss": 0.154, "step": 27267 }, { "epoch": 0.4739870326270229, "grad_norm": 1.8300873871528318, "learning_rate": 5.66240638727531e-07, "loss": 0.1776, "step": 27268 }, { "epoch": 0.47400441516452574, "grad_norm": 2.4041397977547154, "learning_rate": 5.662127374079227e-07, "loss": 0.421, "step": 27269 }, { "epoch": 0.47402179770202857, "grad_norm": 1.8941036247848604, "learning_rate": 5.661848358784506e-07, "loss": 0.2446, "step": 27270 }, { "epoch": 0.47403918023953134, "grad_norm": 2.532519811942911, "learning_rate": 5.661569341392028e-07, "loss": 0.1853, "step": 27271 }, { "epoch": 0.4740565627770342, "grad_norm": 1.5298030029727636, "learning_rate": 5.661290321902681e-07, "loss": 0.3195, "step": 27272 }, { "epoch": 0.474073945314537, "grad_norm": 2.8113037165857033, "learning_rate": 5.661011300317344e-07, "loss": 0.6125, "step": 27273 }, { "epoch": 0.47409132785203983, "grad_norm": 1.7364363968493972, "learning_rate": 5.660732276636908e-07, "loss": 0.1525, "step": 27274 }, { "epoch": 0.47410871038954266, "grad_norm": 3.10216111760809, "learning_rate": 5.660453250862254e-07, "loss": 0.3781, "step": 27275 }, { "epoch": 0.4741260929270455, "grad_norm": 1.8827642368595399, "learning_rate": 5.660174222994265e-07, "loss": 0.2259, "step": 27276 }, { "epoch": 0.4741434754645483, "grad_norm": 1.3662459588613094, "learning_rate": 5.659895193033828e-07, "loss": 0.2526, "step": 27277 }, { "epoch": 0.47416085800205116, "grad_norm": 1.2782018027564481, "learning_rate": 5.659616160981826e-07, "loss": 0.2167, "step": 27278 }, { "epoch": 0.474178240539554, "grad_norm": 1.9539164450069875, "learning_rate": 5.659337126839145e-07, "loss": 0.2747, "step": 27279 }, { "epoch": 0.4741956230770568, "grad_norm": 1.3568026034312455, "learning_rate": 5.659058090606667e-07, "loss": 0.2224, "step": 27280 }, { "epoch": 0.4742130056145596, "grad_norm": 1.3628709944096906, "learning_rate": 5.658779052285277e-07, "loss": 0.3356, "step": 27281 }, { "epoch": 0.4742303881520624, "grad_norm": 1.1807739692666581, "learning_rate": 5.65850001187586e-07, "loss": 0.3451, "step": 27282 }, { "epoch": 0.47424777068956525, "grad_norm": 1.2561816714567144, "learning_rate": 5.658220969379302e-07, "loss": 0.2628, "step": 27283 }, { "epoch": 0.4742651532270681, "grad_norm": 1.9377005889721557, "learning_rate": 5.657941924796484e-07, "loss": 0.1697, "step": 27284 }, { "epoch": 0.4742825357645709, "grad_norm": 1.4604136958116531, "learning_rate": 5.657662878128294e-07, "loss": 0.2027, "step": 27285 }, { "epoch": 0.47429991830207374, "grad_norm": 1.6374105045124907, "learning_rate": 5.657383829375614e-07, "loss": 0.1806, "step": 27286 }, { "epoch": 0.47431730083957657, "grad_norm": 1.5972752212354417, "learning_rate": 5.65710477853933e-07, "loss": 0.2672, "step": 27287 }, { "epoch": 0.4743346833770794, "grad_norm": 2.146134717738109, "learning_rate": 5.656825725620324e-07, "loss": 0.2055, "step": 27288 }, { "epoch": 0.47435206591458223, "grad_norm": 2.837645143275963, "learning_rate": 5.656546670619485e-07, "loss": 0.3507, "step": 27289 }, { "epoch": 0.47436944845208506, "grad_norm": 2.284829811693404, "learning_rate": 5.656267613537692e-07, "loss": 0.1745, "step": 27290 }, { "epoch": 0.47438683098958784, "grad_norm": 1.8229595406502015, "learning_rate": 5.655988554375834e-07, "loss": 0.2418, "step": 27291 }, { "epoch": 0.47440421352709067, "grad_norm": 1.2483013508893184, "learning_rate": 5.655709493134792e-07, "loss": 0.1651, "step": 27292 }, { "epoch": 0.4744215960645935, "grad_norm": 2.140404845840826, "learning_rate": 5.655430429815453e-07, "loss": 0.3806, "step": 27293 }, { "epoch": 0.47443897860209633, "grad_norm": 4.351914405794759, "learning_rate": 5.6551513644187e-07, "loss": 0.2257, "step": 27294 }, { "epoch": 0.47445636113959916, "grad_norm": 1.509067648745924, "learning_rate": 5.654872296945419e-07, "loss": 0.2195, "step": 27295 }, { "epoch": 0.474473743677102, "grad_norm": 1.4055001040059274, "learning_rate": 5.654593227396494e-07, "loss": 0.2264, "step": 27296 }, { "epoch": 0.4744911262146048, "grad_norm": 1.712360162439456, "learning_rate": 5.654314155772809e-07, "loss": 0.494, "step": 27297 }, { "epoch": 0.47450850875210765, "grad_norm": 1.8098321290845214, "learning_rate": 5.654035082075246e-07, "loss": 0.2867, "step": 27298 }, { "epoch": 0.4745258912896105, "grad_norm": 1.6295800097970463, "learning_rate": 5.653756006304695e-07, "loss": 0.2699, "step": 27299 }, { "epoch": 0.47454327382711325, "grad_norm": 2.087686564409745, "learning_rate": 5.653476928462036e-07, "loss": 0.2868, "step": 27300 }, { "epoch": 0.4745606563646161, "grad_norm": 1.9619050307600474, "learning_rate": 5.653197848548157e-07, "loss": 0.601, "step": 27301 }, { "epoch": 0.4745780389021189, "grad_norm": 1.520828017466583, "learning_rate": 5.652918766563941e-07, "loss": 0.3191, "step": 27302 }, { "epoch": 0.47459542143962175, "grad_norm": 1.4139509085618005, "learning_rate": 5.652639682510272e-07, "loss": 0.1339, "step": 27303 }, { "epoch": 0.4746128039771246, "grad_norm": 1.9197104703094872, "learning_rate": 5.652360596388034e-07, "loss": 0.2657, "step": 27304 }, { "epoch": 0.4746301865146274, "grad_norm": 1.0289619454142591, "learning_rate": 5.652081508198113e-07, "loss": 0.218, "step": 27305 }, { "epoch": 0.47464756905213024, "grad_norm": 3.002376712001298, "learning_rate": 5.651802417941393e-07, "loss": 0.3604, "step": 27306 }, { "epoch": 0.47466495158963307, "grad_norm": 1.4799526153697724, "learning_rate": 5.651523325618759e-07, "loss": 0.2442, "step": 27307 }, { "epoch": 0.4746823341271359, "grad_norm": 1.5534574450867837, "learning_rate": 5.651244231231098e-07, "loss": 0.2374, "step": 27308 }, { "epoch": 0.4746997166646387, "grad_norm": 1.725787222821104, "learning_rate": 5.650965134779288e-07, "loss": 0.2969, "step": 27309 }, { "epoch": 0.4747170992021415, "grad_norm": 1.4577891253897692, "learning_rate": 5.650686036264219e-07, "loss": 0.2165, "step": 27310 }, { "epoch": 0.47473448173964433, "grad_norm": 1.3270070872983957, "learning_rate": 5.650406935686774e-07, "loss": 0.3634, "step": 27311 }, { "epoch": 0.47475186427714716, "grad_norm": 2.1253828421485057, "learning_rate": 5.650127833047838e-07, "loss": 0.2722, "step": 27312 }, { "epoch": 0.47476924681465, "grad_norm": 2.8218813333273327, "learning_rate": 5.649848728348294e-07, "loss": 0.284, "step": 27313 }, { "epoch": 0.4747866293521528, "grad_norm": 6.080708064168131, "learning_rate": 5.64956962158903e-07, "loss": 0.4202, "step": 27314 }, { "epoch": 0.47480401188965565, "grad_norm": 2.1024283755060846, "learning_rate": 5.649290512770929e-07, "loss": 0.2434, "step": 27315 }, { "epoch": 0.4748213944271585, "grad_norm": 1.6251819551308206, "learning_rate": 5.649011401894874e-07, "loss": 0.2538, "step": 27316 }, { "epoch": 0.4748387769646613, "grad_norm": 1.663838193459263, "learning_rate": 5.648732288961751e-07, "loss": 0.2096, "step": 27317 }, { "epoch": 0.47485615950216414, "grad_norm": 1.0144276397484286, "learning_rate": 5.648453173972446e-07, "loss": 0.1643, "step": 27318 }, { "epoch": 0.474873542039667, "grad_norm": 2.1688811155441767, "learning_rate": 5.648174056927841e-07, "loss": 0.2253, "step": 27319 }, { "epoch": 0.47489092457716975, "grad_norm": 1.1117350240528587, "learning_rate": 5.647894937828822e-07, "loss": 0.2378, "step": 27320 }, { "epoch": 0.4749083071146726, "grad_norm": 2.1660217709809464, "learning_rate": 5.647615816676274e-07, "loss": 0.3016, "step": 27321 }, { "epoch": 0.4749256896521754, "grad_norm": 2.007635325653253, "learning_rate": 5.647336693471082e-07, "loss": 0.1576, "step": 27322 }, { "epoch": 0.47494307218967824, "grad_norm": 1.3488342213205113, "learning_rate": 5.647057568214128e-07, "loss": 0.2389, "step": 27323 }, { "epoch": 0.47496045472718107, "grad_norm": 2.2155234149676013, "learning_rate": 5.646778440906303e-07, "loss": 0.301, "step": 27324 }, { "epoch": 0.4749778372646839, "grad_norm": 1.6696833703692133, "learning_rate": 5.646499311548482e-07, "loss": 0.248, "step": 27325 }, { "epoch": 0.47499521980218673, "grad_norm": 2.434080596086667, "learning_rate": 5.646220180141558e-07, "loss": 0.216, "step": 27326 }, { "epoch": 0.47501260233968956, "grad_norm": 1.6917037367810694, "learning_rate": 5.645941046686414e-07, "loss": 0.2026, "step": 27327 }, { "epoch": 0.4750299848771924, "grad_norm": 1.7843156824580735, "learning_rate": 5.645661911183932e-07, "loss": 0.2533, "step": 27328 }, { "epoch": 0.4750473674146952, "grad_norm": 1.2646231067936764, "learning_rate": 5.645382773634998e-07, "loss": 0.2069, "step": 27329 }, { "epoch": 0.475064749952198, "grad_norm": 1.1724708124047365, "learning_rate": 5.645103634040498e-07, "loss": 0.219, "step": 27330 }, { "epoch": 0.4750821324897008, "grad_norm": 1.635074822448079, "learning_rate": 5.644824492401315e-07, "loss": 0.2758, "step": 27331 }, { "epoch": 0.47509951502720366, "grad_norm": 1.9735736195982116, "learning_rate": 5.644545348718336e-07, "loss": 0.3974, "step": 27332 }, { "epoch": 0.4751168975647065, "grad_norm": 1.4304071111865984, "learning_rate": 5.644266202992443e-07, "loss": 0.2178, "step": 27333 }, { "epoch": 0.4751342801022093, "grad_norm": 1.8286320822315494, "learning_rate": 5.643987055224523e-07, "loss": 0.214, "step": 27334 }, { "epoch": 0.47515166263971215, "grad_norm": 1.592440067888825, "learning_rate": 5.643707905415459e-07, "loss": 0.2285, "step": 27335 }, { "epoch": 0.475169045177215, "grad_norm": 1.6801266738789988, "learning_rate": 5.643428753566137e-07, "loss": 0.2941, "step": 27336 }, { "epoch": 0.4751864277147178, "grad_norm": 1.2444645777105718, "learning_rate": 5.643149599677441e-07, "loss": 0.3045, "step": 27337 }, { "epoch": 0.47520381025222064, "grad_norm": 1.3274330382828345, "learning_rate": 5.642870443750256e-07, "loss": 0.2447, "step": 27338 }, { "epoch": 0.47522119278972347, "grad_norm": 1.533242763725623, "learning_rate": 5.642591285785468e-07, "loss": 0.2587, "step": 27339 }, { "epoch": 0.47523857532722624, "grad_norm": 1.4836375532417672, "learning_rate": 5.642312125783961e-07, "loss": 0.2731, "step": 27340 }, { "epoch": 0.4752559578647291, "grad_norm": 1.3336597326020865, "learning_rate": 5.642032963746619e-07, "loss": 0.2819, "step": 27341 }, { "epoch": 0.4752733404022319, "grad_norm": 1.3710089433891124, "learning_rate": 5.641753799674327e-07, "loss": 0.2897, "step": 27342 }, { "epoch": 0.47529072293973473, "grad_norm": 2.205443838683746, "learning_rate": 5.641474633567972e-07, "loss": 0.2709, "step": 27343 }, { "epoch": 0.47530810547723756, "grad_norm": 1.5496658244425532, "learning_rate": 5.641195465428434e-07, "loss": 0.2345, "step": 27344 }, { "epoch": 0.4753254880147404, "grad_norm": 2.5743259272397645, "learning_rate": 5.640916295256603e-07, "loss": 0.3095, "step": 27345 }, { "epoch": 0.4753428705522432, "grad_norm": 2.7848972010473845, "learning_rate": 5.640637123053362e-07, "loss": 0.2647, "step": 27346 }, { "epoch": 0.47536025308974605, "grad_norm": 3.165688793846874, "learning_rate": 5.640357948819596e-07, "loss": 0.3123, "step": 27347 }, { "epoch": 0.4753776356272489, "grad_norm": 4.165710721272424, "learning_rate": 5.640078772556187e-07, "loss": 0.2281, "step": 27348 }, { "epoch": 0.4753950181647517, "grad_norm": 1.5129803567766145, "learning_rate": 5.639799594264025e-07, "loss": 0.3678, "step": 27349 }, { "epoch": 0.4754124007022545, "grad_norm": 1.3221385501961513, "learning_rate": 5.63952041394399e-07, "loss": 0.1919, "step": 27350 }, { "epoch": 0.4754297832397573, "grad_norm": 1.2595893145099701, "learning_rate": 5.639241231596971e-07, "loss": 0.254, "step": 27351 }, { "epoch": 0.47544716577726015, "grad_norm": 2.5827401812536097, "learning_rate": 5.638962047223849e-07, "loss": 0.3494, "step": 27352 }, { "epoch": 0.475464548314763, "grad_norm": 1.0970325544416288, "learning_rate": 5.638682860825514e-07, "loss": 0.2568, "step": 27353 }, { "epoch": 0.4754819308522658, "grad_norm": 1.109934307896243, "learning_rate": 5.638403672402844e-07, "loss": 0.2698, "step": 27354 }, { "epoch": 0.47549931338976864, "grad_norm": 1.65013443406654, "learning_rate": 5.638124481956729e-07, "loss": 0.2217, "step": 27355 }, { "epoch": 0.47551669592727147, "grad_norm": 2.535668779410883, "learning_rate": 5.637845289488053e-07, "loss": 0.3109, "step": 27356 }, { "epoch": 0.4755340784647743, "grad_norm": 1.797602293331048, "learning_rate": 5.637566094997701e-07, "loss": 0.2263, "step": 27357 }, { "epoch": 0.47555146100227713, "grad_norm": 3.644868511124146, "learning_rate": 5.637286898486555e-07, "loss": 0.4096, "step": 27358 }, { "epoch": 0.47556884353977996, "grad_norm": 1.453555645030786, "learning_rate": 5.637007699955504e-07, "loss": 0.3256, "step": 27359 }, { "epoch": 0.47558622607728274, "grad_norm": 2.0988723316402274, "learning_rate": 5.636728499405431e-07, "loss": 0.2377, "step": 27360 }, { "epoch": 0.47560360861478557, "grad_norm": 2.298480375900343, "learning_rate": 5.636449296837223e-07, "loss": 0.4709, "step": 27361 }, { "epoch": 0.4756209911522884, "grad_norm": 1.64380667057878, "learning_rate": 5.636170092251759e-07, "loss": 0.2753, "step": 27362 }, { "epoch": 0.4756383736897912, "grad_norm": 1.1595516840923825, "learning_rate": 5.635890885649931e-07, "loss": 0.3331, "step": 27363 }, { "epoch": 0.47565575622729406, "grad_norm": 2.4802660539983132, "learning_rate": 5.63561167703262e-07, "loss": 0.3174, "step": 27364 }, { "epoch": 0.4756731387647969, "grad_norm": 1.9112075405846676, "learning_rate": 5.635332466400713e-07, "loss": 0.1901, "step": 27365 }, { "epoch": 0.4756905213022997, "grad_norm": 1.3244906278836581, "learning_rate": 5.635053253755093e-07, "loss": 0.3065, "step": 27366 }, { "epoch": 0.47570790383980255, "grad_norm": 2.0005305158982902, "learning_rate": 5.634774039096644e-07, "loss": 0.326, "step": 27367 }, { "epoch": 0.4757252863773054, "grad_norm": 1.3758701140383351, "learning_rate": 5.634494822426257e-07, "loss": 0.3099, "step": 27368 }, { "epoch": 0.4757426689148082, "grad_norm": 1.278346962687453, "learning_rate": 5.634215603744809e-07, "loss": 0.1875, "step": 27369 }, { "epoch": 0.475760051452311, "grad_norm": 1.5593699885441175, "learning_rate": 5.633936383053193e-07, "loss": 0.2639, "step": 27370 }, { "epoch": 0.4757774339898138, "grad_norm": 1.2309591304595076, "learning_rate": 5.633657160352286e-07, "loss": 0.2986, "step": 27371 }, { "epoch": 0.47579481652731664, "grad_norm": 1.094227576026168, "learning_rate": 5.63337793564298e-07, "loss": 0.2102, "step": 27372 }, { "epoch": 0.4758121990648195, "grad_norm": 1.6087282424917662, "learning_rate": 5.633098708926154e-07, "loss": 0.1662, "step": 27373 }, { "epoch": 0.4758295816023223, "grad_norm": 1.51025417504033, "learning_rate": 5.632819480202697e-07, "loss": 0.251, "step": 27374 }, { "epoch": 0.47584696413982513, "grad_norm": 2.9964275815007695, "learning_rate": 5.632540249473494e-07, "loss": 0.9322, "step": 27375 }, { "epoch": 0.47586434667732797, "grad_norm": 2.462562265271402, "learning_rate": 5.632261016739428e-07, "loss": 0.2778, "step": 27376 }, { "epoch": 0.4758817292148308, "grad_norm": 1.9503429756112924, "learning_rate": 5.631981782001384e-07, "loss": 0.3386, "step": 27377 }, { "epoch": 0.4758991117523336, "grad_norm": 1.9825734282135712, "learning_rate": 5.631702545260251e-07, "loss": 0.205, "step": 27378 }, { "epoch": 0.47591649428983646, "grad_norm": 1.4754944270789077, "learning_rate": 5.631423306516909e-07, "loss": 0.3363, "step": 27379 }, { "epoch": 0.47593387682733923, "grad_norm": 2.807289845830057, "learning_rate": 5.631144065772245e-07, "loss": 0.5258, "step": 27380 }, { "epoch": 0.47595125936484206, "grad_norm": 1.830567954622318, "learning_rate": 5.630864823027145e-07, "loss": 0.2661, "step": 27381 }, { "epoch": 0.4759686419023449, "grad_norm": 1.248393068232126, "learning_rate": 5.630585578282494e-07, "loss": 0.3018, "step": 27382 }, { "epoch": 0.4759860244398477, "grad_norm": 1.8448391515382896, "learning_rate": 5.630306331539174e-07, "loss": 0.219, "step": 27383 }, { "epoch": 0.47600340697735055, "grad_norm": 4.049548340998832, "learning_rate": 5.630027082798075e-07, "loss": 0.2514, "step": 27384 }, { "epoch": 0.4760207895148534, "grad_norm": 1.9670324764502627, "learning_rate": 5.629747832060078e-07, "loss": 0.1985, "step": 27385 }, { "epoch": 0.4760381720523562, "grad_norm": 1.156955103362629, "learning_rate": 5.62946857932607e-07, "loss": 0.2112, "step": 27386 }, { "epoch": 0.47605555458985904, "grad_norm": 1.5081331212469204, "learning_rate": 5.629189324596937e-07, "loss": 0.3374, "step": 27387 }, { "epoch": 0.4760729371273619, "grad_norm": 2.78402038043115, "learning_rate": 5.628910067873561e-07, "loss": 0.227, "step": 27388 }, { "epoch": 0.4760903196648647, "grad_norm": 2.494684545421721, "learning_rate": 5.628630809156829e-07, "loss": 0.1423, "step": 27389 }, { "epoch": 0.4761077022023675, "grad_norm": 0.8927972332512114, "learning_rate": 5.628351548447628e-07, "loss": 0.3377, "step": 27390 }, { "epoch": 0.4761250847398703, "grad_norm": 1.992787886868524, "learning_rate": 5.62807228574684e-07, "loss": 0.3508, "step": 27391 }, { "epoch": 0.47614246727737314, "grad_norm": 1.55250600986332, "learning_rate": 5.627793021055351e-07, "loss": 0.2314, "step": 27392 }, { "epoch": 0.47615984981487597, "grad_norm": 1.9998935171072991, "learning_rate": 5.627513754374046e-07, "loss": 0.2937, "step": 27393 }, { "epoch": 0.4761772323523788, "grad_norm": 1.685758918659515, "learning_rate": 5.627234485703813e-07, "loss": 0.2358, "step": 27394 }, { "epoch": 0.47619461488988163, "grad_norm": 1.4805246538945909, "learning_rate": 5.626955215045533e-07, "loss": 0.3717, "step": 27395 }, { "epoch": 0.47621199742738446, "grad_norm": 2.306576001414532, "learning_rate": 5.626675942400093e-07, "loss": 0.303, "step": 27396 }, { "epoch": 0.4762293799648873, "grad_norm": 1.5194620864492892, "learning_rate": 5.62639666776838e-07, "loss": 0.3206, "step": 27397 }, { "epoch": 0.4762467625023901, "grad_norm": 2.303268756933412, "learning_rate": 5.626117391151274e-07, "loss": 0.3922, "step": 27398 }, { "epoch": 0.47626414503989295, "grad_norm": 1.4574320444687505, "learning_rate": 5.625838112549666e-07, "loss": 0.1916, "step": 27399 }, { "epoch": 0.4762815275773957, "grad_norm": 1.8836168640368616, "learning_rate": 5.625558831964437e-07, "loss": 0.3131, "step": 27400 }, { "epoch": 0.47629891011489855, "grad_norm": 2.0137015816638906, "learning_rate": 5.625279549396475e-07, "loss": 0.199, "step": 27401 }, { "epoch": 0.4763162926524014, "grad_norm": 1.405109107978571, "learning_rate": 5.625000264846664e-07, "loss": 0.2503, "step": 27402 }, { "epoch": 0.4763336751899042, "grad_norm": 2.4008300751331646, "learning_rate": 5.62472097831589e-07, "loss": 0.3903, "step": 27403 }, { "epoch": 0.47635105772740705, "grad_norm": 1.6481186070281042, "learning_rate": 5.624441689805037e-07, "loss": 0.2205, "step": 27404 }, { "epoch": 0.4763684402649099, "grad_norm": 1.4165425395592939, "learning_rate": 5.62416239931499e-07, "loss": 0.2706, "step": 27405 }, { "epoch": 0.4763858228024127, "grad_norm": 1.862885990914223, "learning_rate": 5.623883106846636e-07, "loss": 0.2567, "step": 27406 }, { "epoch": 0.47640320533991554, "grad_norm": 2.8278104030381757, "learning_rate": 5.623603812400857e-07, "loss": 0.2447, "step": 27407 }, { "epoch": 0.47642058787741837, "grad_norm": 1.2126811214507145, "learning_rate": 5.623324515978541e-07, "loss": 0.1935, "step": 27408 }, { "epoch": 0.4764379704149212, "grad_norm": 1.2715423461350237, "learning_rate": 5.623045217580575e-07, "loss": 0.2747, "step": 27409 }, { "epoch": 0.47645535295242397, "grad_norm": 2.3103004498950894, "learning_rate": 5.622765917207838e-07, "loss": 0.2221, "step": 27410 }, { "epoch": 0.4764727354899268, "grad_norm": 1.488018339914199, "learning_rate": 5.622486614861223e-07, "loss": 0.224, "step": 27411 }, { "epoch": 0.47649011802742963, "grad_norm": 1.3082561251980345, "learning_rate": 5.622207310541609e-07, "loss": 0.2081, "step": 27412 }, { "epoch": 0.47650750056493246, "grad_norm": 2.6667350774936973, "learning_rate": 5.621928004249885e-07, "loss": 0.6177, "step": 27413 }, { "epoch": 0.4765248831024353, "grad_norm": 2.938093224180512, "learning_rate": 5.621648695986934e-07, "loss": 0.3101, "step": 27414 }, { "epoch": 0.4765422656399381, "grad_norm": 1.8349435261952343, "learning_rate": 5.621369385753642e-07, "loss": 0.2808, "step": 27415 }, { "epoch": 0.47655964817744095, "grad_norm": 1.2997977322501055, "learning_rate": 5.621090073550896e-07, "loss": 0.217, "step": 27416 }, { "epoch": 0.4765770307149438, "grad_norm": 1.3125369148796766, "learning_rate": 5.620810759379579e-07, "loss": 0.1999, "step": 27417 }, { "epoch": 0.4765944132524466, "grad_norm": 1.966410274921974, "learning_rate": 5.620531443240576e-07, "loss": 0.3111, "step": 27418 }, { "epoch": 0.47661179578994944, "grad_norm": 1.3304580587977188, "learning_rate": 5.620252125134775e-07, "loss": 0.1516, "step": 27419 }, { "epoch": 0.4766291783274522, "grad_norm": 1.2769206890903693, "learning_rate": 5.619972805063057e-07, "loss": 0.1686, "step": 27420 }, { "epoch": 0.47664656086495505, "grad_norm": 1.659741241619862, "learning_rate": 5.619693483026313e-07, "loss": 0.4759, "step": 27421 }, { "epoch": 0.4766639434024579, "grad_norm": 1.5299190019925677, "learning_rate": 5.619414159025424e-07, "loss": 0.2414, "step": 27422 }, { "epoch": 0.4766813259399607, "grad_norm": 1.377101046460172, "learning_rate": 5.619134833061276e-07, "loss": 0.2046, "step": 27423 }, { "epoch": 0.47669870847746354, "grad_norm": 1.9440793556736116, "learning_rate": 5.618855505134757e-07, "loss": 0.3697, "step": 27424 }, { "epoch": 0.47671609101496637, "grad_norm": 1.1671892483807975, "learning_rate": 5.618576175246748e-07, "loss": 0.1928, "step": 27425 }, { "epoch": 0.4767334735524692, "grad_norm": 2.7166684780885384, "learning_rate": 5.618296843398139e-07, "loss": 0.3954, "step": 27426 }, { "epoch": 0.47675085608997203, "grad_norm": 1.9749111048466346, "learning_rate": 5.618017509589811e-07, "loss": 0.3639, "step": 27427 }, { "epoch": 0.47676823862747486, "grad_norm": 2.5094636948159907, "learning_rate": 5.617738173822654e-07, "loss": 0.2241, "step": 27428 }, { "epoch": 0.4767856211649777, "grad_norm": 1.209280362061735, "learning_rate": 5.617458836097547e-07, "loss": 0.3194, "step": 27429 }, { "epoch": 0.47680300370248047, "grad_norm": 2.1695875252954298, "learning_rate": 5.617179496415381e-07, "loss": 0.2002, "step": 27430 }, { "epoch": 0.4768203862399833, "grad_norm": 1.8679139313944757, "learning_rate": 5.616900154777041e-07, "loss": 0.2751, "step": 27431 }, { "epoch": 0.4768377687774861, "grad_norm": 1.4345679199771229, "learning_rate": 5.616620811183409e-07, "loss": 0.4061, "step": 27432 }, { "epoch": 0.47685515131498896, "grad_norm": 2.6986229914958235, "learning_rate": 5.616341465635372e-07, "loss": 0.2871, "step": 27433 }, { "epoch": 0.4768725338524918, "grad_norm": 1.924990507328109, "learning_rate": 5.616062118133817e-07, "loss": 0.2255, "step": 27434 }, { "epoch": 0.4768899163899946, "grad_norm": 2.2759816022696118, "learning_rate": 5.615782768679627e-07, "loss": 0.3416, "step": 27435 }, { "epoch": 0.47690729892749745, "grad_norm": 1.3778199256225816, "learning_rate": 5.61550341727369e-07, "loss": 0.2185, "step": 27436 }, { "epoch": 0.4769246814650003, "grad_norm": 1.3255443829602607, "learning_rate": 5.615224063916888e-07, "loss": 0.2187, "step": 27437 }, { "epoch": 0.4769420640025031, "grad_norm": 3.4518373683092642, "learning_rate": 5.614944708610111e-07, "loss": 0.3771, "step": 27438 }, { "epoch": 0.4769594465400059, "grad_norm": 2.638216047794368, "learning_rate": 5.614665351354238e-07, "loss": 0.3843, "step": 27439 }, { "epoch": 0.4769768290775087, "grad_norm": 1.6606596290850313, "learning_rate": 5.614385992150161e-07, "loss": 0.2425, "step": 27440 }, { "epoch": 0.47699421161501154, "grad_norm": 1.5070987280643198, "learning_rate": 5.614106630998763e-07, "loss": 0.2675, "step": 27441 }, { "epoch": 0.4770115941525144, "grad_norm": 1.4387020082570248, "learning_rate": 5.613827267900928e-07, "loss": 0.2462, "step": 27442 }, { "epoch": 0.4770289766900172, "grad_norm": 2.1781278606300525, "learning_rate": 5.613547902857541e-07, "loss": 0.2696, "step": 27443 }, { "epoch": 0.47704635922752003, "grad_norm": 1.5518846344996797, "learning_rate": 5.613268535869492e-07, "loss": 0.2537, "step": 27444 }, { "epoch": 0.47706374176502286, "grad_norm": 1.9368182155397364, "learning_rate": 5.612989166937661e-07, "loss": 0.268, "step": 27445 }, { "epoch": 0.4770811243025257, "grad_norm": 2.0129874553977745, "learning_rate": 5.612709796062937e-07, "loss": 0.323, "step": 27446 }, { "epoch": 0.4770985068400285, "grad_norm": 1.5161440714739096, "learning_rate": 5.612430423246205e-07, "loss": 0.3687, "step": 27447 }, { "epoch": 0.47711588937753135, "grad_norm": 2.217201362221075, "learning_rate": 5.612151048488348e-07, "loss": 0.3073, "step": 27448 }, { "epoch": 0.47713327191503413, "grad_norm": 2.9728428356324286, "learning_rate": 5.611871671790254e-07, "loss": 0.6276, "step": 27449 }, { "epoch": 0.47715065445253696, "grad_norm": 1.1838977372097237, "learning_rate": 5.61159229315281e-07, "loss": 0.1832, "step": 27450 }, { "epoch": 0.4771680369900398, "grad_norm": 1.7791993550664569, "learning_rate": 5.611312912576897e-07, "loss": 0.301, "step": 27451 }, { "epoch": 0.4771854195275426, "grad_norm": 1.2408437024147767, "learning_rate": 5.611033530063403e-07, "loss": 0.1891, "step": 27452 }, { "epoch": 0.47720280206504545, "grad_norm": 3.0383289105600797, "learning_rate": 5.610754145613213e-07, "loss": 0.3586, "step": 27453 }, { "epoch": 0.4772201846025483, "grad_norm": 1.2611167685689433, "learning_rate": 5.610474759227215e-07, "loss": 0.3252, "step": 27454 }, { "epoch": 0.4772375671400511, "grad_norm": 1.65843262371881, "learning_rate": 5.610195370906292e-07, "loss": 0.2355, "step": 27455 }, { "epoch": 0.47725494967755394, "grad_norm": 1.9582491666662494, "learning_rate": 5.609915980651328e-07, "loss": 0.3079, "step": 27456 }, { "epoch": 0.47727233221505677, "grad_norm": 1.8067846852207428, "learning_rate": 5.609636588463213e-07, "loss": 0.2956, "step": 27457 }, { "epoch": 0.4772897147525596, "grad_norm": 1.1652990880121117, "learning_rate": 5.609357194342829e-07, "loss": 0.1721, "step": 27458 }, { "epoch": 0.4773070972900624, "grad_norm": 1.3368843288373502, "learning_rate": 5.609077798291062e-07, "loss": 0.2731, "step": 27459 }, { "epoch": 0.4773244798275652, "grad_norm": 1.071839882876858, "learning_rate": 5.608798400308799e-07, "loss": 0.1377, "step": 27460 }, { "epoch": 0.47734186236506804, "grad_norm": 1.7623921235443738, "learning_rate": 5.608519000396925e-07, "loss": 0.3031, "step": 27461 }, { "epoch": 0.47735924490257087, "grad_norm": 2.518575603781736, "learning_rate": 5.608239598556323e-07, "loss": 0.2007, "step": 27462 }, { "epoch": 0.4773766274400737, "grad_norm": 1.691779845795692, "learning_rate": 5.607960194787884e-07, "loss": 0.3099, "step": 27463 }, { "epoch": 0.47739400997757653, "grad_norm": 3.1266257611535613, "learning_rate": 5.607680789092488e-07, "loss": 0.4449, "step": 27464 }, { "epoch": 0.47741139251507936, "grad_norm": 2.240825215137326, "learning_rate": 5.607401381471025e-07, "loss": 0.2981, "step": 27465 }, { "epoch": 0.4774287750525822, "grad_norm": 3.6781972015417974, "learning_rate": 5.607121971924378e-07, "loss": 0.5313, "step": 27466 }, { "epoch": 0.477446157590085, "grad_norm": 1.935275966950052, "learning_rate": 5.606842560453433e-07, "loss": 0.32, "step": 27467 }, { "epoch": 0.47746354012758785, "grad_norm": 2.076674814311423, "learning_rate": 5.606563147059074e-07, "loss": 0.3691, "step": 27468 }, { "epoch": 0.4774809226650906, "grad_norm": 1.733620631457309, "learning_rate": 5.606283731742193e-07, "loss": 0.3085, "step": 27469 }, { "epoch": 0.47749830520259345, "grad_norm": 1.8582690295943007, "learning_rate": 5.606004314503667e-07, "loss": 0.3439, "step": 27470 }, { "epoch": 0.4775156877400963, "grad_norm": 2.58944284381, "learning_rate": 5.605724895344387e-07, "loss": 0.3645, "step": 27471 }, { "epoch": 0.4775330702775991, "grad_norm": 1.8006326670962203, "learning_rate": 5.605445474265236e-07, "loss": 0.2205, "step": 27472 }, { "epoch": 0.47755045281510194, "grad_norm": 2.200803622805541, "learning_rate": 5.605166051267105e-07, "loss": 0.2533, "step": 27473 }, { "epoch": 0.4775678353526048, "grad_norm": 1.3960945360317298, "learning_rate": 5.604886626350872e-07, "loss": 0.1422, "step": 27474 }, { "epoch": 0.4775852178901076, "grad_norm": 2.1494800937787932, "learning_rate": 5.604607199517427e-07, "loss": 0.342, "step": 27475 }, { "epoch": 0.47760260042761044, "grad_norm": 1.4868022205595357, "learning_rate": 5.604327770767655e-07, "loss": 0.2668, "step": 27476 }, { "epoch": 0.47761998296511327, "grad_norm": 1.7203886474036398, "learning_rate": 5.604048340102441e-07, "loss": 0.1613, "step": 27477 }, { "epoch": 0.4776373655026161, "grad_norm": 6.059682727544574, "learning_rate": 5.603768907522672e-07, "loss": 0.3135, "step": 27478 }, { "epoch": 0.47765474804011887, "grad_norm": 2.6790650222871526, "learning_rate": 5.603489473029234e-07, "loss": 0.3575, "step": 27479 }, { "epoch": 0.4776721305776217, "grad_norm": 0.9935293072588784, "learning_rate": 5.603210036623009e-07, "loss": 0.1464, "step": 27480 }, { "epoch": 0.47768951311512453, "grad_norm": 1.449690385937907, "learning_rate": 5.602930598304888e-07, "loss": 0.3052, "step": 27481 }, { "epoch": 0.47770689565262736, "grad_norm": 1.257766969922316, "learning_rate": 5.602651158075753e-07, "loss": 0.2029, "step": 27482 }, { "epoch": 0.4777242781901302, "grad_norm": 1.2195822641721061, "learning_rate": 5.602371715936489e-07, "loss": 0.1366, "step": 27483 }, { "epoch": 0.477741660727633, "grad_norm": 0.9925410033983789, "learning_rate": 5.602092271887985e-07, "loss": 0.4022, "step": 27484 }, { "epoch": 0.47775904326513585, "grad_norm": 1.0734075795799678, "learning_rate": 5.601812825931125e-07, "loss": 0.1606, "step": 27485 }, { "epoch": 0.4777764258026387, "grad_norm": 1.8570148960397461, "learning_rate": 5.601533378066795e-07, "loss": 0.3271, "step": 27486 }, { "epoch": 0.4777938083401415, "grad_norm": 1.5405787699289126, "learning_rate": 5.601253928295879e-07, "loss": 0.2154, "step": 27487 }, { "epoch": 0.47781119087764434, "grad_norm": 1.3137582230513234, "learning_rate": 5.600974476619267e-07, "loss": 0.2579, "step": 27488 }, { "epoch": 0.4778285734151471, "grad_norm": 2.514156662389722, "learning_rate": 5.600695023037839e-07, "loss": 0.3071, "step": 27489 }, { "epoch": 0.47784595595264995, "grad_norm": 2.104947941080544, "learning_rate": 5.600415567552486e-07, "loss": 0.236, "step": 27490 }, { "epoch": 0.4778633384901528, "grad_norm": 2.537548802699499, "learning_rate": 5.60013611016409e-07, "loss": 0.8385, "step": 27491 }, { "epoch": 0.4778807210276556, "grad_norm": 2.099701211180308, "learning_rate": 5.599856650873539e-07, "loss": 0.2446, "step": 27492 }, { "epoch": 0.47789810356515844, "grad_norm": 1.5829022275707012, "learning_rate": 5.599577189681717e-07, "loss": 0.2501, "step": 27493 }, { "epoch": 0.47791548610266127, "grad_norm": 1.4203610923637353, "learning_rate": 5.599297726589511e-07, "loss": 0.2031, "step": 27494 }, { "epoch": 0.4779328686401641, "grad_norm": 1.5278632256006892, "learning_rate": 5.599018261597808e-07, "loss": 0.1947, "step": 27495 }, { "epoch": 0.47795025117766693, "grad_norm": 1.5714075125117788, "learning_rate": 5.598738794707492e-07, "loss": 0.3215, "step": 27496 }, { "epoch": 0.47796763371516976, "grad_norm": 1.3185168037365205, "learning_rate": 5.598459325919447e-07, "loss": 0.1667, "step": 27497 }, { "epoch": 0.4779850162526726, "grad_norm": 2.140492174859947, "learning_rate": 5.598179855234562e-07, "loss": 0.3648, "step": 27498 }, { "epoch": 0.47800239879017536, "grad_norm": 2.0865956086336324, "learning_rate": 5.597900382653722e-07, "loss": 0.3038, "step": 27499 }, { "epoch": 0.4780197813276782, "grad_norm": 1.4616468717022875, "learning_rate": 5.597620908177812e-07, "loss": 0.4415, "step": 27500 }, { "epoch": 0.478037163865181, "grad_norm": 1.4859449997550014, "learning_rate": 5.597341431807719e-07, "loss": 0.2944, "step": 27501 }, { "epoch": 0.47805454640268386, "grad_norm": 2.5434664077560214, "learning_rate": 5.597061953544327e-07, "loss": 0.5039, "step": 27502 }, { "epoch": 0.4780719289401867, "grad_norm": 2.1094106902751384, "learning_rate": 5.596782473388522e-07, "loss": 0.2491, "step": 27503 }, { "epoch": 0.4780893114776895, "grad_norm": 2.2362176332698973, "learning_rate": 5.596502991341193e-07, "loss": 0.223, "step": 27504 }, { "epoch": 0.47810669401519235, "grad_norm": 5.1704793529086235, "learning_rate": 5.596223507403222e-07, "loss": 0.5784, "step": 27505 }, { "epoch": 0.4781240765526952, "grad_norm": 2.1285724466289095, "learning_rate": 5.595944021575499e-07, "loss": 0.3329, "step": 27506 }, { "epoch": 0.478141459090198, "grad_norm": 1.485648384504594, "learning_rate": 5.595664533858905e-07, "loss": 0.4674, "step": 27507 }, { "epoch": 0.47815884162770084, "grad_norm": 1.4752983380355984, "learning_rate": 5.595385044254327e-07, "loss": 0.4641, "step": 27508 }, { "epoch": 0.4781762241652036, "grad_norm": 1.6194774879346379, "learning_rate": 5.595105552762654e-07, "loss": 0.2852, "step": 27509 }, { "epoch": 0.47819360670270644, "grad_norm": 3.0906600331049003, "learning_rate": 5.594826059384768e-07, "loss": 0.2634, "step": 27510 }, { "epoch": 0.47821098924020927, "grad_norm": 1.6174920726202289, "learning_rate": 5.594546564121557e-07, "loss": 0.2012, "step": 27511 }, { "epoch": 0.4782283717777121, "grad_norm": 1.6899471774813664, "learning_rate": 5.594267066973907e-07, "loss": 0.481, "step": 27512 }, { "epoch": 0.47824575431521493, "grad_norm": 1.419437309240068, "learning_rate": 5.593987567942705e-07, "loss": 0.3094, "step": 27513 }, { "epoch": 0.47826313685271776, "grad_norm": 2.4728385752514916, "learning_rate": 5.593708067028833e-07, "loss": 0.5106, "step": 27514 }, { "epoch": 0.4782805193902206, "grad_norm": 1.6283702406615463, "learning_rate": 5.593428564233179e-07, "loss": 0.1974, "step": 27515 }, { "epoch": 0.4782979019277234, "grad_norm": 1.558570323959377, "learning_rate": 5.593149059556629e-07, "loss": 0.4425, "step": 27516 }, { "epoch": 0.47831528446522625, "grad_norm": 1.3614251148086738, "learning_rate": 5.592869553000071e-07, "loss": 0.3581, "step": 27517 }, { "epoch": 0.4783326670027291, "grad_norm": 2.883402073278947, "learning_rate": 5.592590044564387e-07, "loss": 0.3217, "step": 27518 }, { "epoch": 0.47835004954023186, "grad_norm": 1.7509405535287361, "learning_rate": 5.592310534250466e-07, "loss": 0.3355, "step": 27519 }, { "epoch": 0.4783674320777347, "grad_norm": 2.1882650646702895, "learning_rate": 5.592031022059192e-07, "loss": 0.3032, "step": 27520 }, { "epoch": 0.4783848146152375, "grad_norm": 1.314559368874534, "learning_rate": 5.591751507991452e-07, "loss": 0.291, "step": 27521 }, { "epoch": 0.47840219715274035, "grad_norm": 4.7042185755817965, "learning_rate": 5.59147199204813e-07, "loss": 0.3736, "step": 27522 }, { "epoch": 0.4784195796902432, "grad_norm": 1.9176294073087392, "learning_rate": 5.591192474230117e-07, "loss": 0.3379, "step": 27523 }, { "epoch": 0.478436962227746, "grad_norm": 2.0475994540175826, "learning_rate": 5.590912954538292e-07, "loss": 0.2745, "step": 27524 }, { "epoch": 0.47845434476524884, "grad_norm": 1.4267285203903397, "learning_rate": 5.590633432973546e-07, "loss": 0.3596, "step": 27525 }, { "epoch": 0.47847172730275167, "grad_norm": 1.4550678592368498, "learning_rate": 5.590353909536762e-07, "loss": 0.216, "step": 27526 }, { "epoch": 0.4784891098402545, "grad_norm": 2.2849293884125563, "learning_rate": 5.590074384228829e-07, "loss": 0.4229, "step": 27527 }, { "epoch": 0.47850649237775733, "grad_norm": 1.7164862333290585, "learning_rate": 5.589794857050629e-07, "loss": 0.3957, "step": 27528 }, { "epoch": 0.4785238749152601, "grad_norm": 1.5202330078579196, "learning_rate": 5.589515328003054e-07, "loss": 0.1732, "step": 27529 }, { "epoch": 0.47854125745276294, "grad_norm": 1.8324851779867168, "learning_rate": 5.589235797086983e-07, "loss": 0.2839, "step": 27530 }, { "epoch": 0.47855863999026577, "grad_norm": 1.9271046934985723, "learning_rate": 5.588956264303306e-07, "loss": 0.2513, "step": 27531 }, { "epoch": 0.4785760225277686, "grad_norm": 1.424977176032216, "learning_rate": 5.588676729652908e-07, "loss": 0.2048, "step": 27532 }, { "epoch": 0.4785934050652714, "grad_norm": 1.3228417668790438, "learning_rate": 5.588397193136677e-07, "loss": 0.1852, "step": 27533 }, { "epoch": 0.47861078760277426, "grad_norm": 1.6637703048592078, "learning_rate": 5.588117654755495e-07, "loss": 0.197, "step": 27534 }, { "epoch": 0.4786281701402771, "grad_norm": 1.5527138143966568, "learning_rate": 5.58783811451025e-07, "loss": 0.4358, "step": 27535 }, { "epoch": 0.4786455526777799, "grad_norm": 1.4670858109114013, "learning_rate": 5.587558572401829e-07, "loss": 0.2051, "step": 27536 }, { "epoch": 0.47866293521528275, "grad_norm": 3.0928536265579143, "learning_rate": 5.587279028431118e-07, "loss": 0.326, "step": 27537 }, { "epoch": 0.4786803177527856, "grad_norm": 1.706085012426851, "learning_rate": 5.586999482599001e-07, "loss": 0.3705, "step": 27538 }, { "epoch": 0.47869770029028835, "grad_norm": 1.639618219722602, "learning_rate": 5.586719934906365e-07, "loss": 0.2364, "step": 27539 }, { "epoch": 0.4787150828277912, "grad_norm": 1.7000622021023284, "learning_rate": 5.586440385354099e-07, "loss": 0.2069, "step": 27540 }, { "epoch": 0.478732465365294, "grad_norm": 1.8324490173791614, "learning_rate": 5.586160833943083e-07, "loss": 0.3004, "step": 27541 }, { "epoch": 0.47874984790279684, "grad_norm": 1.8115536896720354, "learning_rate": 5.585881280674209e-07, "loss": 0.1886, "step": 27542 }, { "epoch": 0.4787672304402997, "grad_norm": 1.1320280848118467, "learning_rate": 5.585601725548359e-07, "loss": 0.1908, "step": 27543 }, { "epoch": 0.4787846129778025, "grad_norm": 1.8352522965236098, "learning_rate": 5.585322168566421e-07, "loss": 0.2319, "step": 27544 }, { "epoch": 0.47880199551530533, "grad_norm": 1.4717890006836285, "learning_rate": 5.58504260972928e-07, "loss": 0.2654, "step": 27545 }, { "epoch": 0.47881937805280816, "grad_norm": 1.0829005093002109, "learning_rate": 5.584763049037824e-07, "loss": 0.2018, "step": 27546 }, { "epoch": 0.478836760590311, "grad_norm": 3.2873903564728506, "learning_rate": 5.584483486492936e-07, "loss": 0.3479, "step": 27547 }, { "epoch": 0.4788541431278138, "grad_norm": 1.0515541040115852, "learning_rate": 5.584203922095506e-07, "loss": 0.3302, "step": 27548 }, { "epoch": 0.4788715256653166, "grad_norm": 2.189251099361208, "learning_rate": 5.583924355846415e-07, "loss": 0.4891, "step": 27549 }, { "epoch": 0.47888890820281943, "grad_norm": 1.5276004631617548, "learning_rate": 5.583644787746555e-07, "loss": 0.2649, "step": 27550 }, { "epoch": 0.47890629074032226, "grad_norm": 1.0366939556675103, "learning_rate": 5.583365217796808e-07, "loss": 0.215, "step": 27551 }, { "epoch": 0.4789236732778251, "grad_norm": 1.1109928247456964, "learning_rate": 5.583085645998061e-07, "loss": 0.2238, "step": 27552 }, { "epoch": 0.4789410558153279, "grad_norm": 1.2554217766721392, "learning_rate": 5.5828060723512e-07, "loss": 0.3937, "step": 27553 }, { "epoch": 0.47895843835283075, "grad_norm": 2.2598202276393695, "learning_rate": 5.582526496857112e-07, "loss": 0.2618, "step": 27554 }, { "epoch": 0.4789758208903336, "grad_norm": 1.1317150164490564, "learning_rate": 5.582246919516683e-07, "loss": 0.2018, "step": 27555 }, { "epoch": 0.4789932034278364, "grad_norm": 2.283658599961421, "learning_rate": 5.581967340330798e-07, "loss": 0.2379, "step": 27556 }, { "epoch": 0.47901058596533924, "grad_norm": 1.5454656217554308, "learning_rate": 5.581687759300343e-07, "loss": 0.2461, "step": 27557 }, { "epoch": 0.47902796850284207, "grad_norm": 0.9307746591211037, "learning_rate": 5.581408176426208e-07, "loss": 0.2059, "step": 27558 }, { "epoch": 0.47904535104034485, "grad_norm": 1.1551682154086562, "learning_rate": 5.581128591709273e-07, "loss": 0.2874, "step": 27559 }, { "epoch": 0.4790627335778477, "grad_norm": 2.169381300040994, "learning_rate": 5.58084900515043e-07, "loss": 0.18, "step": 27560 }, { "epoch": 0.4790801161153505, "grad_norm": 2.0471430312734946, "learning_rate": 5.58056941675056e-07, "loss": 0.2914, "step": 27561 }, { "epoch": 0.47909749865285334, "grad_norm": 1.1850024129944057, "learning_rate": 5.580289826510553e-07, "loss": 0.2178, "step": 27562 }, { "epoch": 0.47911488119035617, "grad_norm": 1.7590013985534554, "learning_rate": 5.580010234431293e-07, "loss": 0.2399, "step": 27563 }, { "epoch": 0.479132263727859, "grad_norm": 1.3930787465637882, "learning_rate": 5.579730640513668e-07, "loss": 0.2569, "step": 27564 }, { "epoch": 0.47914964626536183, "grad_norm": 2.3166750027119325, "learning_rate": 5.579451044758564e-07, "loss": 0.2885, "step": 27565 }, { "epoch": 0.47916702880286466, "grad_norm": 0.9508459344211309, "learning_rate": 5.579171447166864e-07, "loss": 0.2283, "step": 27566 }, { "epoch": 0.4791844113403675, "grad_norm": 2.428735838680089, "learning_rate": 5.578891847739458e-07, "loss": 0.7372, "step": 27567 }, { "epoch": 0.4792017938778703, "grad_norm": 1.19372417725515, "learning_rate": 5.57861224647723e-07, "loss": 0.3916, "step": 27568 }, { "epoch": 0.4792191764153731, "grad_norm": 1.3976653333239075, "learning_rate": 5.578332643381067e-07, "loss": 0.4349, "step": 27569 }, { "epoch": 0.4792365589528759, "grad_norm": 2.2596177257557173, "learning_rate": 5.578053038451855e-07, "loss": 0.3892, "step": 27570 }, { "epoch": 0.47925394149037875, "grad_norm": 2.742953042994408, "learning_rate": 5.577773431690482e-07, "loss": 0.2502, "step": 27571 }, { "epoch": 0.4792713240278816, "grad_norm": 2.479965492155619, "learning_rate": 5.577493823097831e-07, "loss": 0.2456, "step": 27572 }, { "epoch": 0.4792887065653844, "grad_norm": 1.2705277913022468, "learning_rate": 5.57721421267479e-07, "loss": 0.4574, "step": 27573 }, { "epoch": 0.47930608910288725, "grad_norm": 3.655090433564935, "learning_rate": 5.576934600422247e-07, "loss": 0.4558, "step": 27574 }, { "epoch": 0.4793234716403901, "grad_norm": 1.1368917868876043, "learning_rate": 5.576654986341084e-07, "loss": 0.317, "step": 27575 }, { "epoch": 0.4793408541778929, "grad_norm": 2.271661004324875, "learning_rate": 5.576375370432191e-07, "loss": 0.2694, "step": 27576 }, { "epoch": 0.47935823671539574, "grad_norm": 1.6298171993219048, "learning_rate": 5.576095752696453e-07, "loss": 0.2826, "step": 27577 }, { "epoch": 0.4793756192528985, "grad_norm": 1.3100984449902344, "learning_rate": 5.575816133134754e-07, "loss": 0.2912, "step": 27578 }, { "epoch": 0.47939300179040134, "grad_norm": 2.143983397465383, "learning_rate": 5.575536511747983e-07, "loss": 0.3101, "step": 27579 }, { "epoch": 0.47941038432790417, "grad_norm": 1.7672277049209086, "learning_rate": 5.575256888537028e-07, "loss": 0.1607, "step": 27580 }, { "epoch": 0.479427766865407, "grad_norm": 1.641111232795513, "learning_rate": 5.574977263502771e-07, "loss": 0.307, "step": 27581 }, { "epoch": 0.47944514940290983, "grad_norm": 2.0499647078136793, "learning_rate": 5.5746976366461e-07, "loss": 0.2559, "step": 27582 }, { "epoch": 0.47946253194041266, "grad_norm": 1.754840998740952, "learning_rate": 5.574418007967903e-07, "loss": 0.4063, "step": 27583 }, { "epoch": 0.4794799144779155, "grad_norm": 1.5606599726829504, "learning_rate": 5.574138377469063e-07, "loss": 0.3211, "step": 27584 }, { "epoch": 0.4794972970154183, "grad_norm": 1.7431634045044224, "learning_rate": 5.573858745150469e-07, "loss": 0.3162, "step": 27585 }, { "epoch": 0.47951467955292115, "grad_norm": 2.0849158096403215, "learning_rate": 5.573579111013007e-07, "loss": 0.246, "step": 27586 }, { "epoch": 0.479532062090424, "grad_norm": 1.0269811709006342, "learning_rate": 5.573299475057561e-07, "loss": 0.2936, "step": 27587 }, { "epoch": 0.47954944462792676, "grad_norm": 2.040053728929737, "learning_rate": 5.573019837285021e-07, "loss": 0.6139, "step": 27588 }, { "epoch": 0.4795668271654296, "grad_norm": 1.8917504564560903, "learning_rate": 5.572740197696271e-07, "loss": 0.3698, "step": 27589 }, { "epoch": 0.4795842097029324, "grad_norm": 1.1564151013774335, "learning_rate": 5.572460556292195e-07, "loss": 0.3028, "step": 27590 }, { "epoch": 0.47960159224043525, "grad_norm": 1.2065002730680985, "learning_rate": 5.572180913073685e-07, "loss": 0.2157, "step": 27591 }, { "epoch": 0.4796189747779381, "grad_norm": 2.6808348682438403, "learning_rate": 5.571901268041622e-07, "loss": 0.3174, "step": 27592 }, { "epoch": 0.4796363573154409, "grad_norm": 1.180798558166482, "learning_rate": 5.571621621196897e-07, "loss": 0.2629, "step": 27593 }, { "epoch": 0.47965373985294374, "grad_norm": 3.5521057435315435, "learning_rate": 5.571341972540394e-07, "loss": 0.304, "step": 27594 }, { "epoch": 0.47967112239044657, "grad_norm": 3.3654946135700805, "learning_rate": 5.571062322072996e-07, "loss": 0.3577, "step": 27595 }, { "epoch": 0.4796885049279494, "grad_norm": 1.9853666966027645, "learning_rate": 5.570782669795597e-07, "loss": 0.1812, "step": 27596 }, { "epoch": 0.47970588746545223, "grad_norm": 1.443257582779265, "learning_rate": 5.570503015709076e-07, "loss": 0.3168, "step": 27597 }, { "epoch": 0.479723270002955, "grad_norm": 1.7612139249681091, "learning_rate": 5.570223359814325e-07, "loss": 0.2792, "step": 27598 }, { "epoch": 0.47974065254045783, "grad_norm": 1.3485628436896253, "learning_rate": 5.569943702112226e-07, "loss": 0.1771, "step": 27599 }, { "epoch": 0.47975803507796067, "grad_norm": 1.7028485090202918, "learning_rate": 5.569664042603669e-07, "loss": 0.2888, "step": 27600 }, { "epoch": 0.4797754176154635, "grad_norm": 1.3818733663405929, "learning_rate": 5.569384381289537e-07, "loss": 0.2145, "step": 27601 }, { "epoch": 0.4797928001529663, "grad_norm": 1.557407703531683, "learning_rate": 5.56910471817072e-07, "loss": 0.2514, "step": 27602 }, { "epoch": 0.47981018269046916, "grad_norm": 1.0855972619002234, "learning_rate": 5.5688250532481e-07, "loss": 0.1905, "step": 27603 }, { "epoch": 0.479827565227972, "grad_norm": 1.5426181290611187, "learning_rate": 5.568545386522567e-07, "loss": 0.2094, "step": 27604 }, { "epoch": 0.4798449477654748, "grad_norm": 2.3491579989661884, "learning_rate": 5.568265717995007e-07, "loss": 0.2684, "step": 27605 }, { "epoch": 0.47986233030297765, "grad_norm": 1.7636700920821677, "learning_rate": 5.567986047666305e-07, "loss": 0.4564, "step": 27606 }, { "epoch": 0.4798797128404805, "grad_norm": 1.5735153180647832, "learning_rate": 5.567706375537348e-07, "loss": 0.3269, "step": 27607 }, { "epoch": 0.47989709537798325, "grad_norm": 0.9886431298592363, "learning_rate": 5.567426701609024e-07, "loss": 0.1714, "step": 27608 }, { "epoch": 0.4799144779154861, "grad_norm": 1.381038233424999, "learning_rate": 5.567147025882216e-07, "loss": 0.2199, "step": 27609 }, { "epoch": 0.4799318604529889, "grad_norm": 1.7094733048443806, "learning_rate": 5.566867348357813e-07, "loss": 0.2371, "step": 27610 }, { "epoch": 0.47994924299049174, "grad_norm": 1.3314577006010953, "learning_rate": 5.566587669036702e-07, "loss": 0.1954, "step": 27611 }, { "epoch": 0.4799666255279946, "grad_norm": 1.518158315944515, "learning_rate": 5.566307987919768e-07, "loss": 0.5015, "step": 27612 }, { "epoch": 0.4799840080654974, "grad_norm": 1.6151112985678198, "learning_rate": 5.566028305007896e-07, "loss": 0.2409, "step": 27613 }, { "epoch": 0.48000139060300023, "grad_norm": 1.0528302656922184, "learning_rate": 5.565748620301975e-07, "loss": 0.2138, "step": 27614 }, { "epoch": 0.48001877314050306, "grad_norm": 2.0364147167584172, "learning_rate": 5.565468933802893e-07, "loss": 0.2322, "step": 27615 }, { "epoch": 0.4800361556780059, "grad_norm": 2.198844844312185, "learning_rate": 5.565189245511532e-07, "loss": 0.4314, "step": 27616 }, { "epoch": 0.4800535382155087, "grad_norm": 1.2214016352840855, "learning_rate": 5.56490955542878e-07, "loss": 0.2518, "step": 27617 }, { "epoch": 0.4800709207530115, "grad_norm": 0.9581164576692275, "learning_rate": 5.564629863555527e-07, "loss": 0.2144, "step": 27618 }, { "epoch": 0.48008830329051433, "grad_norm": 4.734986903666196, "learning_rate": 5.564350169892655e-07, "loss": 0.3335, "step": 27619 }, { "epoch": 0.48010568582801716, "grad_norm": 1.3879413591572844, "learning_rate": 5.564070474441052e-07, "loss": 0.3111, "step": 27620 }, { "epoch": 0.48012306836552, "grad_norm": 1.3503415762271165, "learning_rate": 5.563790777201605e-07, "loss": 0.3561, "step": 27621 }, { "epoch": 0.4801404509030228, "grad_norm": 2.616281805290846, "learning_rate": 5.563511078175202e-07, "loss": 0.2765, "step": 27622 }, { "epoch": 0.48015783344052565, "grad_norm": 2.4725587873715975, "learning_rate": 5.563231377362725e-07, "loss": 0.2972, "step": 27623 }, { "epoch": 0.4801752159780285, "grad_norm": 1.1927331968363182, "learning_rate": 5.562951674765066e-07, "loss": 0.2653, "step": 27624 }, { "epoch": 0.4801925985155313, "grad_norm": 1.9191205513966352, "learning_rate": 5.562671970383105e-07, "loss": 0.1474, "step": 27625 }, { "epoch": 0.48020998105303414, "grad_norm": 2.232864835493292, "learning_rate": 5.562392264217736e-07, "loss": 0.431, "step": 27626 }, { "epoch": 0.48022736359053697, "grad_norm": 1.5745156963983566, "learning_rate": 5.56211255626984e-07, "loss": 0.3519, "step": 27627 }, { "epoch": 0.48024474612803975, "grad_norm": 1.448594227761472, "learning_rate": 5.561832846540306e-07, "loss": 0.3905, "step": 27628 }, { "epoch": 0.4802621286655426, "grad_norm": 1.82268568573167, "learning_rate": 5.561553135030019e-07, "loss": 0.2987, "step": 27629 }, { "epoch": 0.4802795112030454, "grad_norm": 1.05499640230691, "learning_rate": 5.561273421739869e-07, "loss": 0.2112, "step": 27630 }, { "epoch": 0.48029689374054824, "grad_norm": 1.7540343451675264, "learning_rate": 5.560993706670738e-07, "loss": 0.7642, "step": 27631 }, { "epoch": 0.48031427627805107, "grad_norm": 1.4069553532276498, "learning_rate": 5.560713989823513e-07, "loss": 0.3352, "step": 27632 }, { "epoch": 0.4803316588155539, "grad_norm": 1.2067376259147395, "learning_rate": 5.560434271199085e-07, "loss": 0.2556, "step": 27633 }, { "epoch": 0.4803490413530567, "grad_norm": 1.4630674591577912, "learning_rate": 5.560154550798337e-07, "loss": 0.2015, "step": 27634 }, { "epoch": 0.48036642389055956, "grad_norm": 1.7167023690508665, "learning_rate": 5.559874828622157e-07, "loss": 0.4195, "step": 27635 }, { "epoch": 0.4803838064280624, "grad_norm": 1.4187817248935357, "learning_rate": 5.559595104671429e-07, "loss": 0.2408, "step": 27636 }, { "epoch": 0.4804011889655652, "grad_norm": 2.08312219777414, "learning_rate": 5.559315378947045e-07, "loss": 0.253, "step": 27637 }, { "epoch": 0.480418571503068, "grad_norm": 1.415855660614989, "learning_rate": 5.559035651449885e-07, "loss": 0.3449, "step": 27638 }, { "epoch": 0.4804359540405708, "grad_norm": 2.2107551588016, "learning_rate": 5.55875592218084e-07, "loss": 0.332, "step": 27639 }, { "epoch": 0.48045333657807365, "grad_norm": 1.917019061597455, "learning_rate": 5.558476191140795e-07, "loss": 0.2473, "step": 27640 }, { "epoch": 0.4804707191155765, "grad_norm": 2.3534972082936867, "learning_rate": 5.558196458330638e-07, "loss": 0.8278, "step": 27641 }, { "epoch": 0.4804881016530793, "grad_norm": 1.0835876090172556, "learning_rate": 5.557916723751254e-07, "loss": 0.2718, "step": 27642 }, { "epoch": 0.48050548419058214, "grad_norm": 2.1035021656023587, "learning_rate": 5.557636987403531e-07, "loss": 0.3281, "step": 27643 }, { "epoch": 0.480522866728085, "grad_norm": 1.6842221900616197, "learning_rate": 5.557357249288354e-07, "loss": 0.3353, "step": 27644 }, { "epoch": 0.4805402492655878, "grad_norm": 1.4285947273741426, "learning_rate": 5.55707750940661e-07, "loss": 0.2236, "step": 27645 }, { "epoch": 0.48055763180309063, "grad_norm": 1.0016417820243544, "learning_rate": 5.556797767759188e-07, "loss": 0.2116, "step": 27646 }, { "epoch": 0.48057501434059346, "grad_norm": 1.5022627470757972, "learning_rate": 5.556518024346971e-07, "loss": 0.2919, "step": 27647 }, { "epoch": 0.48059239687809624, "grad_norm": 1.3689955858429974, "learning_rate": 5.556238279170847e-07, "loss": 0.3305, "step": 27648 }, { "epoch": 0.48060977941559907, "grad_norm": 1.7569579715235053, "learning_rate": 5.555958532231706e-07, "loss": 0.1448, "step": 27649 }, { "epoch": 0.4806271619531019, "grad_norm": 1.6520788295808786, "learning_rate": 5.55567878353043e-07, "loss": 0.3816, "step": 27650 }, { "epoch": 0.48064454449060473, "grad_norm": 2.5594347427848985, "learning_rate": 5.555399033067908e-07, "loss": 0.4867, "step": 27651 }, { "epoch": 0.48066192702810756, "grad_norm": 1.01071195228119, "learning_rate": 5.555119280845025e-07, "loss": 0.1947, "step": 27652 }, { "epoch": 0.4806793095656104, "grad_norm": 1.3634067672685997, "learning_rate": 5.55483952686267e-07, "loss": 0.2612, "step": 27653 }, { "epoch": 0.4806966921031132, "grad_norm": 2.211528894944562, "learning_rate": 5.554559771121729e-07, "loss": 0.3584, "step": 27654 }, { "epoch": 0.48071407464061605, "grad_norm": 2.0834552142653306, "learning_rate": 5.554280013623088e-07, "loss": 0.2863, "step": 27655 }, { "epoch": 0.4807314571781189, "grad_norm": 2.635329412079097, "learning_rate": 5.554000254367633e-07, "loss": 0.4182, "step": 27656 }, { "epoch": 0.4807488397156217, "grad_norm": 1.6467114465531478, "learning_rate": 5.553720493356252e-07, "loss": 0.2235, "step": 27657 }, { "epoch": 0.4807662222531245, "grad_norm": 1.3969218014805136, "learning_rate": 5.553440730589832e-07, "loss": 0.2159, "step": 27658 }, { "epoch": 0.4807836047906273, "grad_norm": 1.6415232195157878, "learning_rate": 5.553160966069259e-07, "loss": 0.4234, "step": 27659 }, { "epoch": 0.48080098732813015, "grad_norm": 1.8731914862736962, "learning_rate": 5.55288119979542e-07, "loss": 0.2873, "step": 27660 }, { "epoch": 0.480818369865633, "grad_norm": 1.4305037717977787, "learning_rate": 5.5526014317692e-07, "loss": 0.2196, "step": 27661 }, { "epoch": 0.4808357524031358, "grad_norm": 1.7588672638893867, "learning_rate": 5.55232166199149e-07, "loss": 0.1839, "step": 27662 }, { "epoch": 0.48085313494063864, "grad_norm": 1.1092277390520686, "learning_rate": 5.552041890463173e-07, "loss": 0.3506, "step": 27663 }, { "epoch": 0.48087051747814147, "grad_norm": 1.4320185470521092, "learning_rate": 5.551762117185136e-07, "loss": 0.2039, "step": 27664 }, { "epoch": 0.4808879000156443, "grad_norm": 1.9629120752168812, "learning_rate": 5.551482342158267e-07, "loss": 0.3751, "step": 27665 }, { "epoch": 0.48090528255314713, "grad_norm": 1.6634358715437245, "learning_rate": 5.551202565383452e-07, "loss": 0.2638, "step": 27666 }, { "epoch": 0.48092266509064996, "grad_norm": 1.2066829351835668, "learning_rate": 5.550922786861578e-07, "loss": 0.291, "step": 27667 }, { "epoch": 0.48094004762815273, "grad_norm": 1.5213602720430106, "learning_rate": 5.550643006593534e-07, "loss": 0.2406, "step": 27668 }, { "epoch": 0.48095743016565556, "grad_norm": 1.261658803550321, "learning_rate": 5.550363224580202e-07, "loss": 0.1699, "step": 27669 }, { "epoch": 0.4809748127031584, "grad_norm": 1.3216069547320612, "learning_rate": 5.550083440822472e-07, "loss": 0.2609, "step": 27670 }, { "epoch": 0.4809921952406612, "grad_norm": 1.5731667877806446, "learning_rate": 5.54980365532123e-07, "loss": 0.5656, "step": 27671 }, { "epoch": 0.48100957777816405, "grad_norm": 2.5863468585321807, "learning_rate": 5.549523868077364e-07, "loss": 0.2407, "step": 27672 }, { "epoch": 0.4810269603156669, "grad_norm": 1.9511741272967853, "learning_rate": 5.549244079091758e-07, "loss": 0.4615, "step": 27673 }, { "epoch": 0.4810443428531697, "grad_norm": 1.4689804353633578, "learning_rate": 5.548964288365302e-07, "loss": 0.1831, "step": 27674 }, { "epoch": 0.48106172539067255, "grad_norm": 4.243206143323034, "learning_rate": 5.548684495898881e-07, "loss": 0.5493, "step": 27675 }, { "epoch": 0.4810791079281754, "grad_norm": 1.3683654106326497, "learning_rate": 5.548404701693383e-07, "loss": 0.1705, "step": 27676 }, { "epoch": 0.4810964904656782, "grad_norm": 2.1093571019138286, "learning_rate": 5.548124905749693e-07, "loss": 0.3664, "step": 27677 }, { "epoch": 0.481113873003181, "grad_norm": 1.5467627027043647, "learning_rate": 5.547845108068699e-07, "loss": 0.2441, "step": 27678 }, { "epoch": 0.4811312555406838, "grad_norm": 1.8795552120922425, "learning_rate": 5.547565308651288e-07, "loss": 0.2628, "step": 27679 }, { "epoch": 0.48114863807818664, "grad_norm": 2.3212217822313996, "learning_rate": 5.547285507498346e-07, "loss": 0.2594, "step": 27680 }, { "epoch": 0.48116602061568947, "grad_norm": 1.365478448461808, "learning_rate": 5.547005704610762e-07, "loss": 0.5527, "step": 27681 }, { "epoch": 0.4811834031531923, "grad_norm": 1.642409569408012, "learning_rate": 5.54672589998942e-07, "loss": 0.2719, "step": 27682 }, { "epoch": 0.48120078569069513, "grad_norm": 1.4803328775405464, "learning_rate": 5.546446093635207e-07, "loss": 0.3552, "step": 27683 }, { "epoch": 0.48121816822819796, "grad_norm": 1.0881173842158987, "learning_rate": 5.54616628554901e-07, "loss": 0.254, "step": 27684 }, { "epoch": 0.4812355507657008, "grad_norm": 1.0822092563573966, "learning_rate": 5.54588647573172e-07, "loss": 0.202, "step": 27685 }, { "epoch": 0.4812529333032036, "grad_norm": 1.1054312785150604, "learning_rate": 5.545606664184218e-07, "loss": 0.2728, "step": 27686 }, { "epoch": 0.48127031584070645, "grad_norm": 1.200128736134619, "learning_rate": 5.545326850907396e-07, "loss": 0.2686, "step": 27687 }, { "epoch": 0.4812876983782092, "grad_norm": 1.9039583385998262, "learning_rate": 5.545047035902136e-07, "loss": 0.2309, "step": 27688 }, { "epoch": 0.48130508091571206, "grad_norm": 1.623511351807803, "learning_rate": 5.544767219169328e-07, "loss": 0.1761, "step": 27689 }, { "epoch": 0.4813224634532149, "grad_norm": 0.7142311408250402, "learning_rate": 5.544487400709859e-07, "loss": 0.3146, "step": 27690 }, { "epoch": 0.4813398459907177, "grad_norm": 0.9077108176881824, "learning_rate": 5.544207580524615e-07, "loss": 0.353, "step": 27691 }, { "epoch": 0.48135722852822055, "grad_norm": 1.1350909360807622, "learning_rate": 5.543927758614482e-07, "loss": 0.4121, "step": 27692 }, { "epoch": 0.4813746110657234, "grad_norm": 1.3138957279681303, "learning_rate": 5.543647934980349e-07, "loss": 0.3184, "step": 27693 }, { "epoch": 0.4813919936032262, "grad_norm": 1.3992228041585415, "learning_rate": 5.543368109623102e-07, "loss": 0.2528, "step": 27694 }, { "epoch": 0.48140937614072904, "grad_norm": 0.8600837881079776, "learning_rate": 5.543088282543625e-07, "loss": 0.2732, "step": 27695 }, { "epoch": 0.48142675867823187, "grad_norm": 1.9692824879525601, "learning_rate": 5.542808453742809e-07, "loss": 0.2197, "step": 27696 }, { "epoch": 0.4814441412157347, "grad_norm": 1.7714389654009453, "learning_rate": 5.542528623221542e-07, "loss": 0.1264, "step": 27697 }, { "epoch": 0.4814615237532375, "grad_norm": 2.0978967878378247, "learning_rate": 5.542248790980705e-07, "loss": 0.2591, "step": 27698 }, { "epoch": 0.4814789062907403, "grad_norm": 2.184200539396766, "learning_rate": 5.54196895702119e-07, "loss": 0.4547, "step": 27699 }, { "epoch": 0.48149628882824314, "grad_norm": 2.309850418084387, "learning_rate": 5.541689121343884e-07, "loss": 0.3015, "step": 27700 }, { "epoch": 0.48151367136574597, "grad_norm": 1.1705325371375734, "learning_rate": 5.54140928394967e-07, "loss": 0.1943, "step": 27701 }, { "epoch": 0.4815310539032488, "grad_norm": 2.701118051287729, "learning_rate": 5.541129444839437e-07, "loss": 0.2247, "step": 27702 }, { "epoch": 0.4815484364407516, "grad_norm": 2.4368104196572284, "learning_rate": 5.540849604014074e-07, "loss": 0.5166, "step": 27703 }, { "epoch": 0.48156581897825446, "grad_norm": 1.3084665305455043, "learning_rate": 5.540569761474464e-07, "loss": 0.3786, "step": 27704 }, { "epoch": 0.4815832015157573, "grad_norm": 1.2317572353459731, "learning_rate": 5.540289917221498e-07, "loss": 0.6073, "step": 27705 }, { "epoch": 0.4816005840532601, "grad_norm": 23.33042397822098, "learning_rate": 5.54001007125606e-07, "loss": 0.5426, "step": 27706 }, { "epoch": 0.48161796659076295, "grad_norm": 1.2755499926827247, "learning_rate": 5.53973022357904e-07, "loss": 0.3293, "step": 27707 }, { "epoch": 0.4816353491282657, "grad_norm": 1.3572758819718476, "learning_rate": 5.539450374191321e-07, "loss": 0.2237, "step": 27708 }, { "epoch": 0.48165273166576855, "grad_norm": 1.105812376656063, "learning_rate": 5.539170523093793e-07, "loss": 0.2821, "step": 27709 }, { "epoch": 0.4816701142032714, "grad_norm": 1.0347876124334057, "learning_rate": 5.538890670287343e-07, "loss": 0.1387, "step": 27710 }, { "epoch": 0.4816874967407742, "grad_norm": 2.3769244100341376, "learning_rate": 5.538610815772856e-07, "loss": 0.2931, "step": 27711 }, { "epoch": 0.48170487927827704, "grad_norm": 1.6858054104243694, "learning_rate": 5.53833095955122e-07, "loss": 0.4476, "step": 27712 }, { "epoch": 0.4817222618157799, "grad_norm": 1.127324062940121, "learning_rate": 5.538051101623322e-07, "loss": 0.2602, "step": 27713 }, { "epoch": 0.4817396443532827, "grad_norm": 1.139626343193698, "learning_rate": 5.53777124199005e-07, "loss": 0.3856, "step": 27714 }, { "epoch": 0.48175702689078553, "grad_norm": 1.7781252702518988, "learning_rate": 5.537491380652291e-07, "loss": 0.3683, "step": 27715 }, { "epoch": 0.48177440942828836, "grad_norm": 2.1347599358353864, "learning_rate": 5.537211517610929e-07, "loss": 0.2949, "step": 27716 }, { "epoch": 0.48179179196579114, "grad_norm": 1.545528165088617, "learning_rate": 5.536931652866854e-07, "loss": 0.2277, "step": 27717 }, { "epoch": 0.48180917450329397, "grad_norm": 4.197977019047274, "learning_rate": 5.536651786420953e-07, "loss": 0.4764, "step": 27718 }, { "epoch": 0.4818265570407968, "grad_norm": 1.6160829539664447, "learning_rate": 5.536371918274112e-07, "loss": 0.2632, "step": 27719 }, { "epoch": 0.48184393957829963, "grad_norm": 1.2839065078232264, "learning_rate": 5.536092048427219e-07, "loss": 0.1715, "step": 27720 }, { "epoch": 0.48186132211580246, "grad_norm": 1.7304632011072651, "learning_rate": 5.535812176881158e-07, "loss": 0.1979, "step": 27721 }, { "epoch": 0.4818787046533053, "grad_norm": 1.34994302863998, "learning_rate": 5.535532303636823e-07, "loss": 0.2456, "step": 27722 }, { "epoch": 0.4818960871908081, "grad_norm": 2.9942687994198423, "learning_rate": 5.535252428695093e-07, "loss": 0.242, "step": 27723 }, { "epoch": 0.48191346972831095, "grad_norm": 1.4686802191200783, "learning_rate": 5.534972552056859e-07, "loss": 0.3375, "step": 27724 }, { "epoch": 0.4819308522658138, "grad_norm": 1.5696269889119343, "learning_rate": 5.534692673723009e-07, "loss": 0.2868, "step": 27725 }, { "epoch": 0.4819482348033166, "grad_norm": 2.662504479627418, "learning_rate": 5.534412793694428e-07, "loss": 0.3878, "step": 27726 }, { "epoch": 0.4819656173408194, "grad_norm": 1.4082945243113056, "learning_rate": 5.534132911972004e-07, "loss": 0.2442, "step": 27727 }, { "epoch": 0.4819829998783222, "grad_norm": 1.6191184448259683, "learning_rate": 5.533853028556625e-07, "loss": 0.4784, "step": 27728 }, { "epoch": 0.48200038241582505, "grad_norm": 2.254221022252782, "learning_rate": 5.533573143449177e-07, "loss": 0.3619, "step": 27729 }, { "epoch": 0.4820177649533279, "grad_norm": 0.8038887907072387, "learning_rate": 5.533293256650546e-07, "loss": 0.2312, "step": 27730 }, { "epoch": 0.4820351474908307, "grad_norm": 2.290217558813445, "learning_rate": 5.533013368161621e-07, "loss": 0.3323, "step": 27731 }, { "epoch": 0.48205253002833354, "grad_norm": 2.7763400868926755, "learning_rate": 5.532733477983289e-07, "loss": 0.4355, "step": 27732 }, { "epoch": 0.48206991256583637, "grad_norm": 1.2156369803034401, "learning_rate": 5.532453586116435e-07, "loss": 0.3237, "step": 27733 }, { "epoch": 0.4820872951033392, "grad_norm": 1.8477947861419013, "learning_rate": 5.532173692561949e-07, "loss": 0.2351, "step": 27734 }, { "epoch": 0.482104677640842, "grad_norm": 2.137452098385277, "learning_rate": 5.531893797320717e-07, "loss": 0.3266, "step": 27735 }, { "epoch": 0.48212206017834486, "grad_norm": 4.303063767131718, "learning_rate": 5.531613900393625e-07, "loss": 0.3658, "step": 27736 }, { "epoch": 0.48213944271584763, "grad_norm": 1.4742718481541845, "learning_rate": 5.531334001781561e-07, "loss": 0.2013, "step": 27737 }, { "epoch": 0.48215682525335046, "grad_norm": 2.696108021873387, "learning_rate": 5.531054101485414e-07, "loss": 0.2869, "step": 27738 }, { "epoch": 0.4821742077908533, "grad_norm": 5.638626232912037, "learning_rate": 5.530774199506068e-07, "loss": 0.4529, "step": 27739 }, { "epoch": 0.4821915903283561, "grad_norm": 1.6311573967353394, "learning_rate": 5.530494295844412e-07, "loss": 0.3574, "step": 27740 }, { "epoch": 0.48220897286585895, "grad_norm": 1.403326598041074, "learning_rate": 5.530214390501334e-07, "loss": 0.2842, "step": 27741 }, { "epoch": 0.4822263554033618, "grad_norm": 1.77900715513673, "learning_rate": 5.529934483477718e-07, "loss": 0.2357, "step": 27742 }, { "epoch": 0.4822437379408646, "grad_norm": 1.0878869480423317, "learning_rate": 5.529654574774454e-07, "loss": 0.2488, "step": 27743 }, { "epoch": 0.48226112047836744, "grad_norm": 2.500169049787395, "learning_rate": 5.529374664392429e-07, "loss": 0.2667, "step": 27744 }, { "epoch": 0.4822785030158703, "grad_norm": 1.3451823744870886, "learning_rate": 5.529094752332529e-07, "loss": 0.3421, "step": 27745 }, { "epoch": 0.4822958855533731, "grad_norm": 4.177242887401261, "learning_rate": 5.52881483859564e-07, "loss": 0.3364, "step": 27746 }, { "epoch": 0.4823132680908759, "grad_norm": 1.70754149690317, "learning_rate": 5.528534923182654e-07, "loss": 0.2, "step": 27747 }, { "epoch": 0.4823306506283787, "grad_norm": 2.8719266868849247, "learning_rate": 5.528255006094452e-07, "loss": 0.466, "step": 27748 }, { "epoch": 0.48234803316588154, "grad_norm": 2.5925875773004843, "learning_rate": 5.527975087331927e-07, "loss": 0.3259, "step": 27749 }, { "epoch": 0.48236541570338437, "grad_norm": 2.3001142431498787, "learning_rate": 5.527695166895962e-07, "loss": 0.2157, "step": 27750 }, { "epoch": 0.4823827982408872, "grad_norm": 1.4520630649343, "learning_rate": 5.527415244787446e-07, "loss": 0.2609, "step": 27751 }, { "epoch": 0.48240018077839003, "grad_norm": 1.3419048052583393, "learning_rate": 5.527135321007265e-07, "loss": 0.2427, "step": 27752 }, { "epoch": 0.48241756331589286, "grad_norm": 2.389187766046803, "learning_rate": 5.526855395556308e-07, "loss": 0.2735, "step": 27753 }, { "epoch": 0.4824349458533957, "grad_norm": 1.6696700658578087, "learning_rate": 5.526575468435461e-07, "loss": 0.291, "step": 27754 }, { "epoch": 0.4824523283908985, "grad_norm": 1.9162341711534963, "learning_rate": 5.526295539645613e-07, "loss": 0.1975, "step": 27755 }, { "epoch": 0.48246971092840135, "grad_norm": 1.876922835756263, "learning_rate": 5.526015609187647e-07, "loss": 0.3215, "step": 27756 }, { "epoch": 0.4824870934659041, "grad_norm": 2.4888229743951418, "learning_rate": 5.525735677062457e-07, "loss": 0.2112, "step": 27757 }, { "epoch": 0.48250447600340696, "grad_norm": 1.3727858026861073, "learning_rate": 5.525455743270924e-07, "loss": 0.1998, "step": 27758 }, { "epoch": 0.4825218585409098, "grad_norm": 1.9785644475008437, "learning_rate": 5.525175807813938e-07, "loss": 0.2245, "step": 27759 }, { "epoch": 0.4825392410784126, "grad_norm": 2.68998535376514, "learning_rate": 5.524895870692386e-07, "loss": 0.1318, "step": 27760 }, { "epoch": 0.48255662361591545, "grad_norm": 1.6205147343279098, "learning_rate": 5.524615931907156e-07, "loss": 0.3137, "step": 27761 }, { "epoch": 0.4825740061534183, "grad_norm": 1.7588229311370234, "learning_rate": 5.524335991459133e-07, "loss": 0.2988, "step": 27762 }, { "epoch": 0.4825913886909211, "grad_norm": 0.7883083953735318, "learning_rate": 5.524056049349209e-07, "loss": 0.3142, "step": 27763 }, { "epoch": 0.48260877122842394, "grad_norm": 1.5243389183332352, "learning_rate": 5.523776105578263e-07, "loss": 0.3119, "step": 27764 }, { "epoch": 0.48262615376592677, "grad_norm": 1.7008423884640733, "learning_rate": 5.523496160147191e-07, "loss": 0.1932, "step": 27765 }, { "epoch": 0.4826435363034296, "grad_norm": 1.5966358619447634, "learning_rate": 5.523216213056876e-07, "loss": 0.3291, "step": 27766 }, { "epoch": 0.4826609188409324, "grad_norm": 1.3645405253618055, "learning_rate": 5.522936264308206e-07, "loss": 0.2708, "step": 27767 }, { "epoch": 0.4826783013784352, "grad_norm": 2.371416825738735, "learning_rate": 5.522656313902066e-07, "loss": 0.2465, "step": 27768 }, { "epoch": 0.48269568391593803, "grad_norm": 1.7414108291981516, "learning_rate": 5.522376361839348e-07, "loss": 0.3907, "step": 27769 }, { "epoch": 0.48271306645344086, "grad_norm": 1.5200518790142148, "learning_rate": 5.522096408120937e-07, "loss": 0.3037, "step": 27770 }, { "epoch": 0.4827304489909437, "grad_norm": 1.7586222926650317, "learning_rate": 5.52181645274772e-07, "loss": 0.2212, "step": 27771 }, { "epoch": 0.4827478315284465, "grad_norm": 2.7730966049548904, "learning_rate": 5.521536495720582e-07, "loss": 0.3007, "step": 27772 }, { "epoch": 0.48276521406594936, "grad_norm": 1.4993370406921975, "learning_rate": 5.521256537040417e-07, "loss": 0.2892, "step": 27773 }, { "epoch": 0.4827825966034522, "grad_norm": 1.7176912716042547, "learning_rate": 5.520976576708105e-07, "loss": 0.3285, "step": 27774 }, { "epoch": 0.482799979140955, "grad_norm": 3.550891062088649, "learning_rate": 5.520696614724537e-07, "loss": 0.4675, "step": 27775 }, { "epoch": 0.48281736167845785, "grad_norm": 1.4292836198714143, "learning_rate": 5.520416651090603e-07, "loss": 0.1406, "step": 27776 }, { "epoch": 0.4828347442159606, "grad_norm": 2.200464511761747, "learning_rate": 5.520136685807183e-07, "loss": 0.272, "step": 27777 }, { "epoch": 0.48285212675346345, "grad_norm": 2.3138249527666255, "learning_rate": 5.519856718875171e-07, "loss": 0.3575, "step": 27778 }, { "epoch": 0.4828695092909663, "grad_norm": 1.7426467606647322, "learning_rate": 5.519576750295452e-07, "loss": 0.292, "step": 27779 }, { "epoch": 0.4828868918284691, "grad_norm": 2.3807519176450076, "learning_rate": 5.519296780068913e-07, "loss": 0.2132, "step": 27780 }, { "epoch": 0.48290427436597194, "grad_norm": 1.397141306295281, "learning_rate": 5.519016808196442e-07, "loss": 0.2797, "step": 27781 }, { "epoch": 0.48292165690347477, "grad_norm": 2.108960742569696, "learning_rate": 5.518736834678926e-07, "loss": 0.45, "step": 27782 }, { "epoch": 0.4829390394409776, "grad_norm": 1.0794913082692223, "learning_rate": 5.518456859517252e-07, "loss": 0.1833, "step": 27783 }, { "epoch": 0.48295642197848043, "grad_norm": 0.9686342854815699, "learning_rate": 5.518176882712309e-07, "loss": 0.1599, "step": 27784 }, { "epoch": 0.48297380451598326, "grad_norm": 2.0941569054277775, "learning_rate": 5.517896904264983e-07, "loss": 0.3448, "step": 27785 }, { "epoch": 0.4829911870534861, "grad_norm": 1.2817205031639378, "learning_rate": 5.51761692417616e-07, "loss": 0.3384, "step": 27786 }, { "epoch": 0.48300856959098887, "grad_norm": 1.8978507797786723, "learning_rate": 5.517336942446731e-07, "loss": 0.2468, "step": 27787 }, { "epoch": 0.4830259521284917, "grad_norm": 3.0918294107236863, "learning_rate": 5.517056959077582e-07, "loss": 0.2464, "step": 27788 }, { "epoch": 0.48304333466599453, "grad_norm": 1.3569157619438357, "learning_rate": 5.516776974069598e-07, "loss": 0.3701, "step": 27789 }, { "epoch": 0.48306071720349736, "grad_norm": 1.4662818522706864, "learning_rate": 5.516496987423669e-07, "loss": 0.2563, "step": 27790 }, { "epoch": 0.4830780997410002, "grad_norm": 1.1237946899571811, "learning_rate": 5.516216999140683e-07, "loss": 0.3192, "step": 27791 }, { "epoch": 0.483095482278503, "grad_norm": 0.9834701327486958, "learning_rate": 5.515937009221525e-07, "loss": 0.1676, "step": 27792 }, { "epoch": 0.48311286481600585, "grad_norm": 1.5114551893511363, "learning_rate": 5.515657017667083e-07, "loss": 0.4479, "step": 27793 }, { "epoch": 0.4831302473535087, "grad_norm": 2.3178187698726522, "learning_rate": 5.515377024478247e-07, "loss": 0.3867, "step": 27794 }, { "epoch": 0.4831476298910115, "grad_norm": 1.7926052702287247, "learning_rate": 5.515097029655902e-07, "loss": 0.2873, "step": 27795 }, { "epoch": 0.48316501242851434, "grad_norm": 1.4838364638024857, "learning_rate": 5.514817033200936e-07, "loss": 0.3341, "step": 27796 }, { "epoch": 0.4831823949660171, "grad_norm": 2.098254743206317, "learning_rate": 5.514537035114235e-07, "loss": 0.2694, "step": 27797 }, { "epoch": 0.48319977750351994, "grad_norm": 1.1051580375313508, "learning_rate": 5.51425703539669e-07, "loss": 0.2093, "step": 27798 }, { "epoch": 0.4832171600410228, "grad_norm": 1.6040269921426031, "learning_rate": 5.513977034049185e-07, "loss": 0.277, "step": 27799 }, { "epoch": 0.4832345425785256, "grad_norm": 2.5278427624442403, "learning_rate": 5.513697031072611e-07, "loss": 0.3066, "step": 27800 }, { "epoch": 0.48325192511602844, "grad_norm": 2.254208503917057, "learning_rate": 5.513417026467851e-07, "loss": 0.2745, "step": 27801 }, { "epoch": 0.48326930765353127, "grad_norm": 1.7397707940608296, "learning_rate": 5.513137020235796e-07, "loss": 0.2632, "step": 27802 }, { "epoch": 0.4832866901910341, "grad_norm": 1.815130209989812, "learning_rate": 5.512857012377332e-07, "loss": 0.376, "step": 27803 }, { "epoch": 0.4833040727285369, "grad_norm": 1.5440017751839454, "learning_rate": 5.512577002893347e-07, "loss": 0.2901, "step": 27804 }, { "epoch": 0.48332145526603976, "grad_norm": 1.1170016548187034, "learning_rate": 5.512296991784729e-07, "loss": 0.2781, "step": 27805 }, { "epoch": 0.4833388378035426, "grad_norm": 2.1920126921606977, "learning_rate": 5.512016979052363e-07, "loss": 0.3481, "step": 27806 }, { "epoch": 0.48335622034104536, "grad_norm": 1.3873370874789561, "learning_rate": 5.511736964697142e-07, "loss": 0.2218, "step": 27807 }, { "epoch": 0.4833736028785482, "grad_norm": 3.166009756386516, "learning_rate": 5.511456948719945e-07, "loss": 0.412, "step": 27808 }, { "epoch": 0.483390985416051, "grad_norm": 2.2431899828459576, "learning_rate": 5.511176931121668e-07, "loss": 0.3946, "step": 27809 }, { "epoch": 0.48340836795355385, "grad_norm": 1.470620571292695, "learning_rate": 5.510896911903195e-07, "loss": 0.2613, "step": 27810 }, { "epoch": 0.4834257504910567, "grad_norm": 1.7834053050001835, "learning_rate": 5.510616891065411e-07, "loss": 0.4019, "step": 27811 }, { "epoch": 0.4834431330285595, "grad_norm": 1.8134162162758634, "learning_rate": 5.510336868609208e-07, "loss": 0.2821, "step": 27812 }, { "epoch": 0.48346051556606234, "grad_norm": 1.6104787917039616, "learning_rate": 5.510056844535471e-07, "loss": 0.2768, "step": 27813 }, { "epoch": 0.4834778981035652, "grad_norm": 1.6443680510220309, "learning_rate": 5.509776818845088e-07, "loss": 0.1566, "step": 27814 }, { "epoch": 0.483495280641068, "grad_norm": 1.1834396309013093, "learning_rate": 5.509496791538947e-07, "loss": 0.2145, "step": 27815 }, { "epoch": 0.48351266317857083, "grad_norm": 2.921851701536686, "learning_rate": 5.509216762617934e-07, "loss": 0.323, "step": 27816 }, { "epoch": 0.4835300457160736, "grad_norm": 2.062150828813395, "learning_rate": 5.508936732082941e-07, "loss": 0.3312, "step": 27817 }, { "epoch": 0.48354742825357644, "grad_norm": 2.3590454666647473, "learning_rate": 5.508656699934848e-07, "loss": 0.25, "step": 27818 }, { "epoch": 0.48356481079107927, "grad_norm": 1.5698220630596909, "learning_rate": 5.508376666174549e-07, "loss": 0.3871, "step": 27819 }, { "epoch": 0.4835821933285821, "grad_norm": 1.481620015407935, "learning_rate": 5.50809663080293e-07, "loss": 0.3308, "step": 27820 }, { "epoch": 0.48359957586608493, "grad_norm": 0.9286929103952188, "learning_rate": 5.507816593820877e-07, "loss": 0.2258, "step": 27821 }, { "epoch": 0.48361695840358776, "grad_norm": 1.373858467389296, "learning_rate": 5.50753655522928e-07, "loss": 0.2849, "step": 27822 }, { "epoch": 0.4836343409410906, "grad_norm": 1.7653120162450102, "learning_rate": 5.507256515029025e-07, "loss": 0.2612, "step": 27823 }, { "epoch": 0.4836517234785934, "grad_norm": 2.228020799228965, "learning_rate": 5.506976473220999e-07, "loss": 0.3812, "step": 27824 }, { "epoch": 0.48366910601609625, "grad_norm": 3.114244873053561, "learning_rate": 5.506696429806091e-07, "loss": 0.3995, "step": 27825 }, { "epoch": 0.4836864885535991, "grad_norm": 1.255505209699232, "learning_rate": 5.506416384785188e-07, "loss": 0.2889, "step": 27826 }, { "epoch": 0.48370387109110186, "grad_norm": 1.5239232208183795, "learning_rate": 5.506136338159176e-07, "loss": 0.256, "step": 27827 }, { "epoch": 0.4837212536286047, "grad_norm": 2.737310770128015, "learning_rate": 5.505856289928946e-07, "loss": 0.2806, "step": 27828 }, { "epoch": 0.4837386361661075, "grad_norm": 2.149261200843508, "learning_rate": 5.505576240095384e-07, "loss": 0.3587, "step": 27829 }, { "epoch": 0.48375601870361035, "grad_norm": 4.019772068002464, "learning_rate": 5.505296188659378e-07, "loss": 0.4699, "step": 27830 }, { "epoch": 0.4837734012411132, "grad_norm": 1.534772432839603, "learning_rate": 5.505016135621814e-07, "loss": 0.2261, "step": 27831 }, { "epoch": 0.483790783778616, "grad_norm": 1.3649094091344847, "learning_rate": 5.504736080983581e-07, "loss": 0.4568, "step": 27832 }, { "epoch": 0.48380816631611884, "grad_norm": 1.3704283813134837, "learning_rate": 5.504456024745568e-07, "loss": 0.3158, "step": 27833 }, { "epoch": 0.48382554885362167, "grad_norm": 1.703816385545077, "learning_rate": 5.50417596690866e-07, "loss": 0.1949, "step": 27834 }, { "epoch": 0.4838429313911245, "grad_norm": 2.1176431246462823, "learning_rate": 5.503895907473744e-07, "loss": 0.1848, "step": 27835 }, { "epoch": 0.48386031392862733, "grad_norm": 1.8027354609875599, "learning_rate": 5.503615846441714e-07, "loss": 0.2442, "step": 27836 }, { "epoch": 0.4838776964661301, "grad_norm": 1.8318271860322732, "learning_rate": 5.503335783813449e-07, "loss": 0.277, "step": 27837 }, { "epoch": 0.48389507900363293, "grad_norm": 1.561600203768944, "learning_rate": 5.503055719589842e-07, "loss": 0.2662, "step": 27838 }, { "epoch": 0.48391246154113576, "grad_norm": 1.7017028701069252, "learning_rate": 5.502775653771779e-07, "loss": 0.3017, "step": 27839 }, { "epoch": 0.4839298440786386, "grad_norm": 1.8166105159292354, "learning_rate": 5.502495586360149e-07, "loss": 0.2205, "step": 27840 }, { "epoch": 0.4839472266161414, "grad_norm": 2.3013262472684817, "learning_rate": 5.502215517355838e-07, "loss": 0.2743, "step": 27841 }, { "epoch": 0.48396460915364425, "grad_norm": 1.2213149891478607, "learning_rate": 5.501935446759737e-07, "loss": 0.4327, "step": 27842 }, { "epoch": 0.4839819916911471, "grad_norm": 1.4904368498378628, "learning_rate": 5.501655374572728e-07, "loss": 0.3241, "step": 27843 }, { "epoch": 0.4839993742286499, "grad_norm": 1.6094272444116875, "learning_rate": 5.501375300795704e-07, "loss": 0.2833, "step": 27844 }, { "epoch": 0.48401675676615274, "grad_norm": 3.8720969422263996, "learning_rate": 5.50109522542955e-07, "loss": 0.2917, "step": 27845 }, { "epoch": 0.4840341393036556, "grad_norm": 1.2728494234327954, "learning_rate": 5.500815148475154e-07, "loss": 0.2007, "step": 27846 }, { "epoch": 0.48405152184115835, "grad_norm": 1.5483363423491947, "learning_rate": 5.500535069933403e-07, "loss": 0.3484, "step": 27847 }, { "epoch": 0.4840689043786612, "grad_norm": 1.1263411553994545, "learning_rate": 5.500254989805189e-07, "loss": 0.107, "step": 27848 }, { "epoch": 0.484086286916164, "grad_norm": 1.8699192842543289, "learning_rate": 5.499974908091393e-07, "loss": 0.3787, "step": 27849 }, { "epoch": 0.48410366945366684, "grad_norm": 1.7262252987278353, "learning_rate": 5.499694824792908e-07, "loss": 0.2954, "step": 27850 }, { "epoch": 0.48412105199116967, "grad_norm": 1.7726285036524074, "learning_rate": 5.499414739910619e-07, "loss": 0.3608, "step": 27851 }, { "epoch": 0.4841384345286725, "grad_norm": 1.4627216410789552, "learning_rate": 5.499134653445416e-07, "loss": 0.2741, "step": 27852 }, { "epoch": 0.48415581706617533, "grad_norm": 1.2651722965715053, "learning_rate": 5.498854565398183e-07, "loss": 0.2465, "step": 27853 }, { "epoch": 0.48417319960367816, "grad_norm": 2.034625957497641, "learning_rate": 5.498574475769813e-07, "loss": 0.2487, "step": 27854 }, { "epoch": 0.484190582141181, "grad_norm": 1.6143079126315365, "learning_rate": 5.49829438456119e-07, "loss": 0.3941, "step": 27855 }, { "epoch": 0.48420796467868377, "grad_norm": 2.374109781150863, "learning_rate": 5.498014291773202e-07, "loss": 0.1977, "step": 27856 }, { "epoch": 0.4842253472161866, "grad_norm": 1.6268886368519773, "learning_rate": 5.497734197406737e-07, "loss": 0.3572, "step": 27857 }, { "epoch": 0.4842427297536894, "grad_norm": 1.9597005199376116, "learning_rate": 5.497454101462685e-07, "loss": 0.3566, "step": 27858 }, { "epoch": 0.48426011229119226, "grad_norm": 0.9880182343918266, "learning_rate": 5.497174003941932e-07, "loss": 0.1831, "step": 27859 }, { "epoch": 0.4842774948286951, "grad_norm": 1.7022963385002028, "learning_rate": 5.496893904845365e-07, "loss": 0.3629, "step": 27860 }, { "epoch": 0.4842948773661979, "grad_norm": 1.8483916660592246, "learning_rate": 5.496613804173872e-07, "loss": 0.2003, "step": 27861 }, { "epoch": 0.48431225990370075, "grad_norm": 2.664982771274736, "learning_rate": 5.496333701928342e-07, "loss": 0.3552, "step": 27862 }, { "epoch": 0.4843296424412036, "grad_norm": 3.313917633075997, "learning_rate": 5.496053598109661e-07, "loss": 0.2647, "step": 27863 }, { "epoch": 0.4843470249787064, "grad_norm": 3.5612540553858536, "learning_rate": 5.49577349271872e-07, "loss": 0.3316, "step": 27864 }, { "epoch": 0.48436440751620924, "grad_norm": 1.311420540622991, "learning_rate": 5.495493385756403e-07, "loss": 0.1708, "step": 27865 }, { "epoch": 0.484381790053712, "grad_norm": 0.7256056323949235, "learning_rate": 5.4952132772236e-07, "loss": 0.1411, "step": 27866 }, { "epoch": 0.48439917259121484, "grad_norm": 1.7896615644722262, "learning_rate": 5.494933167121201e-07, "loss": 0.256, "step": 27867 }, { "epoch": 0.4844165551287177, "grad_norm": 3.2372206082282906, "learning_rate": 5.494653055450087e-07, "loss": 0.4003, "step": 27868 }, { "epoch": 0.4844339376662205, "grad_norm": 1.5757388718467842, "learning_rate": 5.494372942211153e-07, "loss": 0.3001, "step": 27869 }, { "epoch": 0.48445132020372333, "grad_norm": 1.5528361635863306, "learning_rate": 5.494092827405284e-07, "loss": 0.2034, "step": 27870 }, { "epoch": 0.48446870274122616, "grad_norm": 1.9299421994656285, "learning_rate": 5.493812711033366e-07, "loss": 0.4001, "step": 27871 }, { "epoch": 0.484486085278729, "grad_norm": 2.9197622755328347, "learning_rate": 5.493532593096288e-07, "loss": 0.3099, "step": 27872 }, { "epoch": 0.4845034678162318, "grad_norm": 1.2192802277070807, "learning_rate": 5.493252473594939e-07, "loss": 0.2708, "step": 27873 }, { "epoch": 0.48452085035373466, "grad_norm": 2.600638122953853, "learning_rate": 5.492972352530207e-07, "loss": 0.2046, "step": 27874 }, { "epoch": 0.4845382328912375, "grad_norm": 1.3021926359953544, "learning_rate": 5.49269222990298e-07, "loss": 0.1812, "step": 27875 }, { "epoch": 0.48455561542874026, "grad_norm": 1.1941913498541663, "learning_rate": 5.492412105714143e-07, "loss": 0.2237, "step": 27876 }, { "epoch": 0.4845729979662431, "grad_norm": 1.696076923526635, "learning_rate": 5.492131979964588e-07, "loss": 0.2107, "step": 27877 }, { "epoch": 0.4845903805037459, "grad_norm": 1.9132925254306326, "learning_rate": 5.491851852655199e-07, "loss": 0.2121, "step": 27878 }, { "epoch": 0.48460776304124875, "grad_norm": 1.5161983195657298, "learning_rate": 5.491571723786866e-07, "loss": 0.2635, "step": 27879 }, { "epoch": 0.4846251455787516, "grad_norm": 2.0904931823100883, "learning_rate": 5.491291593360476e-07, "loss": 0.3016, "step": 27880 }, { "epoch": 0.4846425281162544, "grad_norm": 1.1540028543945835, "learning_rate": 5.491011461376918e-07, "loss": 0.2497, "step": 27881 }, { "epoch": 0.48465991065375724, "grad_norm": 1.9912085166676767, "learning_rate": 5.490731327837078e-07, "loss": 0.2532, "step": 27882 }, { "epoch": 0.4846772931912601, "grad_norm": 0.9749260700195443, "learning_rate": 5.490451192741846e-07, "loss": 0.1652, "step": 27883 }, { "epoch": 0.4846946757287629, "grad_norm": 1.417412371756652, "learning_rate": 5.490171056092109e-07, "loss": 0.2965, "step": 27884 }, { "epoch": 0.48471205826626573, "grad_norm": 1.3782692847592533, "learning_rate": 5.489890917888755e-07, "loss": 0.306, "step": 27885 }, { "epoch": 0.4847294408037685, "grad_norm": 1.4952360887134397, "learning_rate": 5.489610778132673e-07, "loss": 0.3381, "step": 27886 }, { "epoch": 0.48474682334127134, "grad_norm": 2.1688254885515956, "learning_rate": 5.489330636824747e-07, "loss": 0.3673, "step": 27887 }, { "epoch": 0.48476420587877417, "grad_norm": 1.459066798096618, "learning_rate": 5.489050493965869e-07, "loss": 0.2464, "step": 27888 }, { "epoch": 0.484781588416277, "grad_norm": 1.759134909271298, "learning_rate": 5.488770349556926e-07, "loss": 0.2985, "step": 27889 }, { "epoch": 0.48479897095377983, "grad_norm": 1.2199070116261492, "learning_rate": 5.488490203598806e-07, "loss": 0.3043, "step": 27890 }, { "epoch": 0.48481635349128266, "grad_norm": 1.618723633929862, "learning_rate": 5.488210056092395e-07, "loss": 0.3832, "step": 27891 }, { "epoch": 0.4848337360287855, "grad_norm": 1.1491291990264054, "learning_rate": 5.487929907038584e-07, "loss": 0.2935, "step": 27892 }, { "epoch": 0.4848511185662883, "grad_norm": 1.297491181975725, "learning_rate": 5.487649756438258e-07, "loss": 0.2756, "step": 27893 }, { "epoch": 0.48486850110379115, "grad_norm": 1.5079841015471789, "learning_rate": 5.487369604292307e-07, "loss": 0.2815, "step": 27894 }, { "epoch": 0.484885883641294, "grad_norm": 1.339119716741394, "learning_rate": 5.487089450601617e-07, "loss": 0.1284, "step": 27895 }, { "epoch": 0.48490326617879675, "grad_norm": 2.275805185497061, "learning_rate": 5.486809295367078e-07, "loss": 0.4803, "step": 27896 }, { "epoch": 0.4849206487162996, "grad_norm": 1.8562603183725561, "learning_rate": 5.486529138589577e-07, "loss": 0.3357, "step": 27897 }, { "epoch": 0.4849380312538024, "grad_norm": 3.3399351148335983, "learning_rate": 5.486248980270003e-07, "loss": 0.3343, "step": 27898 }, { "epoch": 0.48495541379130525, "grad_norm": 2.418502706647077, "learning_rate": 5.485968820409242e-07, "loss": 0.3008, "step": 27899 }, { "epoch": 0.4849727963288081, "grad_norm": 2.4399009430161582, "learning_rate": 5.485688659008185e-07, "loss": 0.3777, "step": 27900 }, { "epoch": 0.4849901788663109, "grad_norm": 1.00136790312389, "learning_rate": 5.485408496067716e-07, "loss": 0.2129, "step": 27901 }, { "epoch": 0.48500756140381374, "grad_norm": 2.4226826799166257, "learning_rate": 5.485128331588727e-07, "loss": 0.3161, "step": 27902 }, { "epoch": 0.48502494394131657, "grad_norm": 1.600487074458783, "learning_rate": 5.484848165572101e-07, "loss": 0.2678, "step": 27903 }, { "epoch": 0.4850423264788194, "grad_norm": 1.6801068099882532, "learning_rate": 5.484567998018731e-07, "loss": 0.3072, "step": 27904 }, { "epoch": 0.4850597090163222, "grad_norm": 1.3581000966676113, "learning_rate": 5.484287828929503e-07, "loss": 0.1579, "step": 27905 }, { "epoch": 0.485077091553825, "grad_norm": 2.321635357590552, "learning_rate": 5.484007658305306e-07, "loss": 0.38, "step": 27906 }, { "epoch": 0.48509447409132783, "grad_norm": 1.1383104389645664, "learning_rate": 5.483727486147026e-07, "loss": 0.2657, "step": 27907 }, { "epoch": 0.48511185662883066, "grad_norm": 1.0981182002025411, "learning_rate": 5.483447312455551e-07, "loss": 0.17, "step": 27908 }, { "epoch": 0.4851292391663335, "grad_norm": 3.1018600466080075, "learning_rate": 5.483167137231771e-07, "loss": 0.2191, "step": 27909 }, { "epoch": 0.4851466217038363, "grad_norm": 1.6110760046380697, "learning_rate": 5.482886960476573e-07, "loss": 0.4804, "step": 27910 }, { "epoch": 0.48516400424133915, "grad_norm": 1.0556991354731988, "learning_rate": 5.482606782190846e-07, "loss": 0.2306, "step": 27911 }, { "epoch": 0.485181386778842, "grad_norm": 1.7642340297992856, "learning_rate": 5.482326602375476e-07, "loss": 0.3846, "step": 27912 }, { "epoch": 0.4851987693163448, "grad_norm": 2.624129196974326, "learning_rate": 5.482046421031354e-07, "loss": 0.305, "step": 27913 }, { "epoch": 0.48521615185384764, "grad_norm": 2.6855462785282724, "learning_rate": 5.481766238159365e-07, "loss": 0.3869, "step": 27914 }, { "epoch": 0.4852335343913505, "grad_norm": 2.1078589471436557, "learning_rate": 5.481486053760398e-07, "loss": 0.17, "step": 27915 }, { "epoch": 0.48525091692885325, "grad_norm": 1.8288416677218948, "learning_rate": 5.481205867835341e-07, "loss": 0.3683, "step": 27916 }, { "epoch": 0.4852682994663561, "grad_norm": 1.3034723136757973, "learning_rate": 5.480925680385084e-07, "loss": 0.2605, "step": 27917 }, { "epoch": 0.4852856820038589, "grad_norm": 1.5750252794778163, "learning_rate": 5.480645491410511e-07, "loss": 0.3133, "step": 27918 }, { "epoch": 0.48530306454136174, "grad_norm": 1.2211661617543463, "learning_rate": 5.480365300912515e-07, "loss": 0.2428, "step": 27919 }, { "epoch": 0.48532044707886457, "grad_norm": 1.258144749155896, "learning_rate": 5.480085108891981e-07, "loss": 0.2374, "step": 27920 }, { "epoch": 0.4853378296163674, "grad_norm": 2.301990449629315, "learning_rate": 5.479804915349798e-07, "loss": 0.2879, "step": 27921 }, { "epoch": 0.48535521215387023, "grad_norm": 2.520450128230739, "learning_rate": 5.479524720286854e-07, "loss": 0.2933, "step": 27922 }, { "epoch": 0.48537259469137306, "grad_norm": 1.403744135337919, "learning_rate": 5.479244523704036e-07, "loss": 0.2442, "step": 27923 }, { "epoch": 0.4853899772288759, "grad_norm": 1.6125069528391782, "learning_rate": 5.478964325602233e-07, "loss": 0.3987, "step": 27924 }, { "epoch": 0.4854073597663787, "grad_norm": 1.8368368470678162, "learning_rate": 5.478684125982335e-07, "loss": 0.3644, "step": 27925 }, { "epoch": 0.4854247423038815, "grad_norm": 1.740416810490042, "learning_rate": 5.478403924845225e-07, "loss": 0.3372, "step": 27926 }, { "epoch": 0.4854421248413843, "grad_norm": 1.6554246835041602, "learning_rate": 5.478123722191798e-07, "loss": 0.3236, "step": 27927 }, { "epoch": 0.48545950737888716, "grad_norm": 3.937130977533721, "learning_rate": 5.477843518022936e-07, "loss": 0.4065, "step": 27928 }, { "epoch": 0.48547688991639, "grad_norm": 1.448933720227512, "learning_rate": 5.47756331233953e-07, "loss": 0.1952, "step": 27929 }, { "epoch": 0.4854942724538928, "grad_norm": 1.3405470404794155, "learning_rate": 5.477283105142469e-07, "loss": 0.2546, "step": 27930 }, { "epoch": 0.48551165499139565, "grad_norm": 2.2343384228695244, "learning_rate": 5.477002896432638e-07, "loss": 0.2124, "step": 27931 }, { "epoch": 0.4855290375288985, "grad_norm": 1.8608226236199836, "learning_rate": 5.476722686210926e-07, "loss": 0.2164, "step": 27932 }, { "epoch": 0.4855464200664013, "grad_norm": 1.573895914368763, "learning_rate": 5.476442474478225e-07, "loss": 0.1235, "step": 27933 }, { "epoch": 0.48556380260390414, "grad_norm": 2.3871088544804677, "learning_rate": 5.476162261235418e-07, "loss": 0.168, "step": 27934 }, { "epoch": 0.48558118514140697, "grad_norm": 1.9668814572313456, "learning_rate": 5.475882046483397e-07, "loss": 0.3709, "step": 27935 }, { "epoch": 0.48559856767890974, "grad_norm": 2.7084250775489553, "learning_rate": 5.475601830223046e-07, "loss": 0.4542, "step": 27936 }, { "epoch": 0.4856159502164126, "grad_norm": 2.3970472144335546, "learning_rate": 5.475321612455259e-07, "loss": 0.2637, "step": 27937 }, { "epoch": 0.4856333327539154, "grad_norm": 1.8942021163851792, "learning_rate": 5.475041393180918e-07, "loss": 0.2634, "step": 27938 }, { "epoch": 0.48565071529141823, "grad_norm": 2.0274250114990005, "learning_rate": 5.474761172400916e-07, "loss": 0.3469, "step": 27939 }, { "epoch": 0.48566809782892106, "grad_norm": 2.1223226726932753, "learning_rate": 5.474480950116138e-07, "loss": 0.4264, "step": 27940 }, { "epoch": 0.4856854803664239, "grad_norm": 8.51934295165155, "learning_rate": 5.474200726327474e-07, "loss": 0.5725, "step": 27941 }, { "epoch": 0.4857028629039267, "grad_norm": 3.3750177132607218, "learning_rate": 5.47392050103581e-07, "loss": 0.369, "step": 27942 }, { "epoch": 0.48572024544142955, "grad_norm": 1.5721153183199599, "learning_rate": 5.473640274242037e-07, "loss": 0.1754, "step": 27943 }, { "epoch": 0.4857376279789324, "grad_norm": 1.258088519488312, "learning_rate": 5.473360045947043e-07, "loss": 0.349, "step": 27944 }, { "epoch": 0.4857550105164352, "grad_norm": 1.6153168320703561, "learning_rate": 5.473079816151713e-07, "loss": 0.2667, "step": 27945 }, { "epoch": 0.485772393053938, "grad_norm": 1.1574755097765212, "learning_rate": 5.47279958485694e-07, "loss": 0.195, "step": 27946 }, { "epoch": 0.4857897755914408, "grad_norm": 1.6863361509296673, "learning_rate": 5.472519352063606e-07, "loss": 0.3298, "step": 27947 }, { "epoch": 0.48580715812894365, "grad_norm": 1.8810492628189406, "learning_rate": 5.472239117772605e-07, "loss": 0.2363, "step": 27948 }, { "epoch": 0.4858245406664465, "grad_norm": 0.969540098562535, "learning_rate": 5.471958881984823e-07, "loss": 0.2984, "step": 27949 }, { "epoch": 0.4858419232039493, "grad_norm": 1.6571219001603048, "learning_rate": 5.471678644701147e-07, "loss": 0.2582, "step": 27950 }, { "epoch": 0.48585930574145214, "grad_norm": 1.3931894978256267, "learning_rate": 5.471398405922466e-07, "loss": 0.2006, "step": 27951 }, { "epoch": 0.48587668827895497, "grad_norm": 1.3326323649050564, "learning_rate": 5.47111816564967e-07, "loss": 0.242, "step": 27952 }, { "epoch": 0.4858940708164578, "grad_norm": 2.0499423018224214, "learning_rate": 5.470837923883646e-07, "loss": 0.2075, "step": 27953 }, { "epoch": 0.48591145335396063, "grad_norm": 1.8500463486878203, "learning_rate": 5.470557680625281e-07, "loss": 0.3921, "step": 27954 }, { "epoch": 0.48592883589146346, "grad_norm": 1.3161350143697512, "learning_rate": 5.470277435875464e-07, "loss": 0.2005, "step": 27955 }, { "epoch": 0.48594621842896624, "grad_norm": 1.243860149319101, "learning_rate": 5.469997189635085e-07, "loss": 0.3317, "step": 27956 }, { "epoch": 0.48596360096646907, "grad_norm": 2.048493748428673, "learning_rate": 5.469716941905029e-07, "loss": 0.2873, "step": 27957 }, { "epoch": 0.4859809835039719, "grad_norm": 1.754560190154252, "learning_rate": 5.469436692686188e-07, "loss": 0.2288, "step": 27958 }, { "epoch": 0.4859983660414747, "grad_norm": 1.8564751887993374, "learning_rate": 5.469156441979448e-07, "loss": 0.5123, "step": 27959 }, { "epoch": 0.48601574857897756, "grad_norm": 1.8288986898283668, "learning_rate": 5.468876189785697e-07, "loss": 0.3496, "step": 27960 }, { "epoch": 0.4860331311164804, "grad_norm": 1.1693110020260338, "learning_rate": 5.468595936105823e-07, "loss": 0.2417, "step": 27961 }, { "epoch": 0.4860505136539832, "grad_norm": 1.6818052456421253, "learning_rate": 5.468315680940717e-07, "loss": 0.2976, "step": 27962 }, { "epoch": 0.48606789619148605, "grad_norm": 1.5097865978637837, "learning_rate": 5.468035424291264e-07, "loss": 0.3246, "step": 27963 }, { "epoch": 0.4860852787289889, "grad_norm": 1.2920271914462536, "learning_rate": 5.467755166158354e-07, "loss": 0.3144, "step": 27964 }, { "epoch": 0.4861026612664917, "grad_norm": 2.0431429764362643, "learning_rate": 5.467474906542875e-07, "loss": 0.2463, "step": 27965 }, { "epoch": 0.4861200438039945, "grad_norm": 2.9628227804847866, "learning_rate": 5.467194645445716e-07, "loss": 0.3713, "step": 27966 }, { "epoch": 0.4861374263414973, "grad_norm": 1.2492575711647678, "learning_rate": 5.466914382867763e-07, "loss": 0.1963, "step": 27967 }, { "epoch": 0.48615480887900014, "grad_norm": 1.3652559762724343, "learning_rate": 5.466634118809907e-07, "loss": 0.2364, "step": 27968 }, { "epoch": 0.486172191416503, "grad_norm": 2.7792512466830517, "learning_rate": 5.466353853273036e-07, "loss": 0.2836, "step": 27969 }, { "epoch": 0.4861895739540058, "grad_norm": 2.1846055812764025, "learning_rate": 5.466073586258036e-07, "loss": 0.2617, "step": 27970 }, { "epoch": 0.48620695649150864, "grad_norm": 1.5656557549724788, "learning_rate": 5.465793317765798e-07, "loss": 0.2501, "step": 27971 }, { "epoch": 0.48622433902901147, "grad_norm": 2.065165150649489, "learning_rate": 5.465513047797208e-07, "loss": 0.375, "step": 27972 }, { "epoch": 0.4862417215665143, "grad_norm": 1.8046372783913727, "learning_rate": 5.465232776353156e-07, "loss": 0.5002, "step": 27973 }, { "epoch": 0.4862591041040171, "grad_norm": 1.2296026396311275, "learning_rate": 5.46495250343453e-07, "loss": 0.3127, "step": 27974 }, { "epoch": 0.48627648664151996, "grad_norm": 1.958611712771365, "learning_rate": 5.464672229042218e-07, "loss": 0.3169, "step": 27975 }, { "epoch": 0.48629386917902273, "grad_norm": 1.8364839163437263, "learning_rate": 5.464391953177108e-07, "loss": 0.4786, "step": 27976 }, { "epoch": 0.48631125171652556, "grad_norm": 1.3677833299219684, "learning_rate": 5.464111675840089e-07, "loss": 0.2457, "step": 27977 }, { "epoch": 0.4863286342540284, "grad_norm": 1.4586126143222518, "learning_rate": 5.463831397032051e-07, "loss": 0.2694, "step": 27978 }, { "epoch": 0.4863460167915312, "grad_norm": 1.381512057295125, "learning_rate": 5.463551116753878e-07, "loss": 0.4152, "step": 27979 }, { "epoch": 0.48636339932903405, "grad_norm": 1.6855682138679389, "learning_rate": 5.463270835006462e-07, "loss": 0.1991, "step": 27980 }, { "epoch": 0.4863807818665369, "grad_norm": 2.6070428278724544, "learning_rate": 5.462990551790691e-07, "loss": 0.337, "step": 27981 }, { "epoch": 0.4863981644040397, "grad_norm": 1.3510352144137818, "learning_rate": 5.462710267107451e-07, "loss": 0.2569, "step": 27982 }, { "epoch": 0.48641554694154254, "grad_norm": 1.4502058469308177, "learning_rate": 5.462429980957634e-07, "loss": 0.2519, "step": 27983 }, { "epoch": 0.4864329294790454, "grad_norm": 1.592284685519785, "learning_rate": 5.462149693342126e-07, "loss": 0.3782, "step": 27984 }, { "epoch": 0.4864503120165482, "grad_norm": 0.9986220479901017, "learning_rate": 5.461869404261814e-07, "loss": 0.2424, "step": 27985 }, { "epoch": 0.486467694554051, "grad_norm": 2.3586307108975157, "learning_rate": 5.461589113717588e-07, "loss": 0.2427, "step": 27986 }, { "epoch": 0.4864850770915538, "grad_norm": 3.43382933521194, "learning_rate": 5.46130882171034e-07, "loss": 0.3591, "step": 27987 }, { "epoch": 0.48650245962905664, "grad_norm": 1.8008248087045362, "learning_rate": 5.461028528240951e-07, "loss": 0.2348, "step": 27988 }, { "epoch": 0.48651984216655947, "grad_norm": 1.2696267175899185, "learning_rate": 5.460748233310315e-07, "loss": 0.3028, "step": 27989 }, { "epoch": 0.4865372247040623, "grad_norm": 1.6421919521782975, "learning_rate": 5.46046793691932e-07, "loss": 0.2941, "step": 27990 }, { "epoch": 0.48655460724156513, "grad_norm": 0.8623809392622477, "learning_rate": 5.460187639068852e-07, "loss": 0.2554, "step": 27991 }, { "epoch": 0.48657198977906796, "grad_norm": 2.120445505456068, "learning_rate": 5.459907339759799e-07, "loss": 0.3382, "step": 27992 }, { "epoch": 0.4865893723165708, "grad_norm": 1.4532603747549193, "learning_rate": 5.459627038993053e-07, "loss": 0.2463, "step": 27993 }, { "epoch": 0.4866067548540736, "grad_norm": 1.0730707658491943, "learning_rate": 5.4593467367695e-07, "loss": 0.3408, "step": 27994 }, { "epoch": 0.4866241373915764, "grad_norm": 2.3956882330598632, "learning_rate": 5.459066433090029e-07, "loss": 0.2683, "step": 27995 }, { "epoch": 0.4866415199290792, "grad_norm": 1.9140414123176852, "learning_rate": 5.458786127955527e-07, "loss": 0.2605, "step": 27996 }, { "epoch": 0.48665890246658206, "grad_norm": 1.7487140311597555, "learning_rate": 5.458505821366887e-07, "loss": 0.1923, "step": 27997 }, { "epoch": 0.4866762850040849, "grad_norm": 5.135889445521608, "learning_rate": 5.45822551332499e-07, "loss": 0.3878, "step": 27998 }, { "epoch": 0.4866936675415877, "grad_norm": 1.1955890463884495, "learning_rate": 5.45794520383073e-07, "loss": 0.206, "step": 27999 }, { "epoch": 0.48671105007909055, "grad_norm": 1.8248786674401734, "learning_rate": 5.457664892884995e-07, "loss": 0.4885, "step": 28000 }, { "epoch": 0.4867284326165934, "grad_norm": 1.3699513905738774, "learning_rate": 5.457384580488673e-07, "loss": 0.225, "step": 28001 }, { "epoch": 0.4867458151540962, "grad_norm": 1.367522703031663, "learning_rate": 5.45710426664265e-07, "loss": 0.1931, "step": 28002 }, { "epoch": 0.48676319769159904, "grad_norm": 1.721481553812199, "learning_rate": 5.456823951347819e-07, "loss": 0.4021, "step": 28003 }, { "epoch": 0.48678058022910187, "grad_norm": 1.3586274110313221, "learning_rate": 5.456543634605064e-07, "loss": 0.221, "step": 28004 }, { "epoch": 0.48679796276660464, "grad_norm": 1.2435583831674792, "learning_rate": 5.456263316415276e-07, "loss": 0.2477, "step": 28005 }, { "epoch": 0.48681534530410747, "grad_norm": 1.619434834975303, "learning_rate": 5.455982996779342e-07, "loss": 0.2837, "step": 28006 }, { "epoch": 0.4868327278416103, "grad_norm": 1.4886426444645, "learning_rate": 5.455702675698153e-07, "loss": 0.3148, "step": 28007 }, { "epoch": 0.48685011037911313, "grad_norm": 1.0843258631382195, "learning_rate": 5.455422353172594e-07, "loss": 0.2827, "step": 28008 }, { "epoch": 0.48686749291661596, "grad_norm": 1.544240952019019, "learning_rate": 5.455142029203557e-07, "loss": 0.176, "step": 28009 }, { "epoch": 0.4868848754541188, "grad_norm": 2.1583078660937316, "learning_rate": 5.454861703791929e-07, "loss": 0.4028, "step": 28010 }, { "epoch": 0.4869022579916216, "grad_norm": 1.5382599413737534, "learning_rate": 5.454581376938596e-07, "loss": 0.2061, "step": 28011 }, { "epoch": 0.48691964052912445, "grad_norm": 2.272529479451306, "learning_rate": 5.45430104864445e-07, "loss": 0.1551, "step": 28012 }, { "epoch": 0.4869370230666273, "grad_norm": 0.917761536882124, "learning_rate": 5.454020718910379e-07, "loss": 0.3041, "step": 28013 }, { "epoch": 0.4869544056041301, "grad_norm": 1.08658127762934, "learning_rate": 5.453740387737271e-07, "loss": 0.1792, "step": 28014 }, { "epoch": 0.4869717881416329, "grad_norm": 1.351037589402541, "learning_rate": 5.453460055126014e-07, "loss": 0.2122, "step": 28015 }, { "epoch": 0.4869891706791357, "grad_norm": 3.173996353076249, "learning_rate": 5.453179721077498e-07, "loss": 0.2493, "step": 28016 }, { "epoch": 0.48700655321663855, "grad_norm": 2.8863196425877296, "learning_rate": 5.452899385592608e-07, "loss": 0.3048, "step": 28017 }, { "epoch": 0.4870239357541414, "grad_norm": 2.0947593091832206, "learning_rate": 5.452619048672236e-07, "loss": 0.4323, "step": 28018 }, { "epoch": 0.4870413182916442, "grad_norm": 2.752764219140891, "learning_rate": 5.452338710317271e-07, "loss": 0.2187, "step": 28019 }, { "epoch": 0.48705870082914704, "grad_norm": 1.4642733471499747, "learning_rate": 5.452058370528599e-07, "loss": 0.2766, "step": 28020 }, { "epoch": 0.48707608336664987, "grad_norm": 1.571395460936225, "learning_rate": 5.451778029307109e-07, "loss": 0.2221, "step": 28021 }, { "epoch": 0.4870934659041527, "grad_norm": 1.6727269731112455, "learning_rate": 5.451497686653692e-07, "loss": 0.3573, "step": 28022 }, { "epoch": 0.48711084844165553, "grad_norm": 1.5276736157371953, "learning_rate": 5.451217342569233e-07, "loss": 0.3146, "step": 28023 }, { "epoch": 0.48712823097915836, "grad_norm": 1.8856140401957266, "learning_rate": 5.450936997054623e-07, "loss": 0.2389, "step": 28024 }, { "epoch": 0.48714561351666114, "grad_norm": 1.0881819193692894, "learning_rate": 5.45065665011075e-07, "loss": 0.2706, "step": 28025 }, { "epoch": 0.48716299605416397, "grad_norm": 0.919679051952417, "learning_rate": 5.450376301738502e-07, "loss": 0.2148, "step": 28026 }, { "epoch": 0.4871803785916668, "grad_norm": 1.8383671842535292, "learning_rate": 5.450095951938768e-07, "loss": 0.2737, "step": 28027 }, { "epoch": 0.4871977611291696, "grad_norm": 1.55562473307787, "learning_rate": 5.449815600712436e-07, "loss": 0.3831, "step": 28028 }, { "epoch": 0.48721514366667246, "grad_norm": 1.6708547784935033, "learning_rate": 5.449535248060397e-07, "loss": 0.2938, "step": 28029 }, { "epoch": 0.4872325262041753, "grad_norm": 1.156938583876251, "learning_rate": 5.449254893983537e-07, "loss": 0.2637, "step": 28030 }, { "epoch": 0.4872499087416781, "grad_norm": 1.7411526501896128, "learning_rate": 5.448974538482743e-07, "loss": 0.281, "step": 28031 }, { "epoch": 0.48726729127918095, "grad_norm": 1.4278093273006025, "learning_rate": 5.448694181558908e-07, "loss": 0.2033, "step": 28032 }, { "epoch": 0.4872846738166838, "grad_norm": 2.3540246060855434, "learning_rate": 5.448413823212919e-07, "loss": 0.17, "step": 28033 }, { "epoch": 0.4873020563541866, "grad_norm": 1.463921123653659, "learning_rate": 5.448133463445664e-07, "loss": 0.3812, "step": 28034 }, { "epoch": 0.4873194388916894, "grad_norm": 4.953646913127709, "learning_rate": 5.447853102258032e-07, "loss": 0.328, "step": 28035 }, { "epoch": 0.4873368214291922, "grad_norm": 3.2631284838558114, "learning_rate": 5.44757273965091e-07, "loss": 0.4248, "step": 28036 }, { "epoch": 0.48735420396669504, "grad_norm": 1.542699981698056, "learning_rate": 5.447292375625188e-07, "loss": 0.3698, "step": 28037 }, { "epoch": 0.4873715865041979, "grad_norm": 0.9950259013311838, "learning_rate": 5.447012010181757e-07, "loss": 0.2484, "step": 28038 }, { "epoch": 0.4873889690417007, "grad_norm": 1.9042431695260245, "learning_rate": 5.446731643321501e-07, "loss": 0.1805, "step": 28039 }, { "epoch": 0.48740635157920353, "grad_norm": 2.0074746368998913, "learning_rate": 5.446451275045312e-07, "loss": 0.3163, "step": 28040 }, { "epoch": 0.48742373411670636, "grad_norm": 2.9107584856143007, "learning_rate": 5.446170905354078e-07, "loss": 0.3405, "step": 28041 }, { "epoch": 0.4874411166542092, "grad_norm": 4.2808639317803685, "learning_rate": 5.445890534248686e-07, "loss": 0.4578, "step": 28042 }, { "epoch": 0.487458499191712, "grad_norm": 2.0526557363984432, "learning_rate": 5.445610161730026e-07, "loss": 0.2013, "step": 28043 }, { "epoch": 0.48747588172921485, "grad_norm": 2.0966064427480453, "learning_rate": 5.445329787798987e-07, "loss": 0.2815, "step": 28044 }, { "epoch": 0.48749326426671763, "grad_norm": 1.6244923598927576, "learning_rate": 5.445049412456458e-07, "loss": 0.2985, "step": 28045 }, { "epoch": 0.48751064680422046, "grad_norm": 2.700750216929279, "learning_rate": 5.444769035703324e-07, "loss": 0.2969, "step": 28046 }, { "epoch": 0.4875280293417233, "grad_norm": 1.6357465206149147, "learning_rate": 5.44448865754048e-07, "loss": 0.2077, "step": 28047 }, { "epoch": 0.4875454118792261, "grad_norm": 1.5163003582777883, "learning_rate": 5.444208277968809e-07, "loss": 0.1874, "step": 28048 }, { "epoch": 0.48756279441672895, "grad_norm": 1.600389103720109, "learning_rate": 5.443927896989202e-07, "loss": 0.356, "step": 28049 }, { "epoch": 0.4875801769542318, "grad_norm": 1.4480629732107893, "learning_rate": 5.443647514602547e-07, "loss": 0.1457, "step": 28050 }, { "epoch": 0.4875975594917346, "grad_norm": 3.43022859311199, "learning_rate": 5.443367130809734e-07, "loss": 0.3372, "step": 28051 }, { "epoch": 0.48761494202923744, "grad_norm": 1.573887963758922, "learning_rate": 5.44308674561165e-07, "loss": 0.3196, "step": 28052 }, { "epoch": 0.48763232456674027, "grad_norm": 1.997802831212957, "learning_rate": 5.442806359009185e-07, "loss": 0.431, "step": 28053 }, { "epoch": 0.4876497071042431, "grad_norm": 2.0634277283103835, "learning_rate": 5.442525971003228e-07, "loss": 0.4835, "step": 28054 }, { "epoch": 0.4876670896417459, "grad_norm": 1.2952339895518103, "learning_rate": 5.442245581594667e-07, "loss": 0.2751, "step": 28055 }, { "epoch": 0.4876844721792487, "grad_norm": 1.5284544388032555, "learning_rate": 5.441965190784388e-07, "loss": 0.4669, "step": 28056 }, { "epoch": 0.48770185471675154, "grad_norm": 1.4838260853878302, "learning_rate": 5.441684798573287e-07, "loss": 0.4083, "step": 28057 }, { "epoch": 0.48771923725425437, "grad_norm": 1.8178486326350218, "learning_rate": 5.441404404962245e-07, "loss": 0.2023, "step": 28058 }, { "epoch": 0.4877366197917572, "grad_norm": 1.2317695055957665, "learning_rate": 5.441124009952154e-07, "loss": 0.4292, "step": 28059 }, { "epoch": 0.48775400232926003, "grad_norm": 3.7389080364834415, "learning_rate": 5.440843613543902e-07, "loss": 0.177, "step": 28060 }, { "epoch": 0.48777138486676286, "grad_norm": 1.8714428191970955, "learning_rate": 5.440563215738379e-07, "loss": 0.2964, "step": 28061 }, { "epoch": 0.4877887674042657, "grad_norm": 1.7351100998415308, "learning_rate": 5.440282816536474e-07, "loss": 0.1489, "step": 28062 }, { "epoch": 0.4878061499417685, "grad_norm": 1.3667799327296373, "learning_rate": 5.440002415939073e-07, "loss": 0.2632, "step": 28063 }, { "epoch": 0.48782353247927135, "grad_norm": 1.4176124188247923, "learning_rate": 5.439722013947068e-07, "loss": 0.3546, "step": 28064 }, { "epoch": 0.4878409150167741, "grad_norm": 4.3255852868330456, "learning_rate": 5.439441610561343e-07, "loss": 0.2242, "step": 28065 }, { "epoch": 0.48785829755427695, "grad_norm": 1.2938937804457704, "learning_rate": 5.439161205782795e-07, "loss": 0.2501, "step": 28066 }, { "epoch": 0.4878756800917798, "grad_norm": 2.0775309048747364, "learning_rate": 5.438880799612304e-07, "loss": 0.3598, "step": 28067 }, { "epoch": 0.4878930626292826, "grad_norm": 1.1116769635740877, "learning_rate": 5.438600392050764e-07, "loss": 0.2169, "step": 28068 }, { "epoch": 0.48791044516678544, "grad_norm": 3.5290483780571518, "learning_rate": 5.438319983099062e-07, "loss": 0.2314, "step": 28069 }, { "epoch": 0.4879278277042883, "grad_norm": 2.400195205052644, "learning_rate": 5.438039572758086e-07, "loss": 0.4331, "step": 28070 }, { "epoch": 0.4879452102417911, "grad_norm": 2.1625528170928496, "learning_rate": 5.437759161028727e-07, "loss": 0.3307, "step": 28071 }, { "epoch": 0.48796259277929394, "grad_norm": 1.9102165482907918, "learning_rate": 5.437478747911872e-07, "loss": 0.2588, "step": 28072 }, { "epoch": 0.48797997531679677, "grad_norm": 1.0258561332513323, "learning_rate": 5.43719833340841e-07, "loss": 0.3279, "step": 28073 }, { "epoch": 0.4879973578542996, "grad_norm": 0.9459529686956579, "learning_rate": 5.436917917519232e-07, "loss": 0.1946, "step": 28074 }, { "epoch": 0.48801474039180237, "grad_norm": 1.8956864997822749, "learning_rate": 5.436637500245221e-07, "loss": 0.3439, "step": 28075 }, { "epoch": 0.4880321229293052, "grad_norm": 3.2747630413888933, "learning_rate": 5.436357081587274e-07, "loss": 0.2106, "step": 28076 }, { "epoch": 0.48804950546680803, "grad_norm": 1.9474563296155796, "learning_rate": 5.436076661546273e-07, "loss": 0.2947, "step": 28077 }, { "epoch": 0.48806688800431086, "grad_norm": 1.6453801173591047, "learning_rate": 5.435796240123111e-07, "loss": 0.4819, "step": 28078 }, { "epoch": 0.4880842705418137, "grad_norm": 1.8307181926106384, "learning_rate": 5.435515817318674e-07, "loss": 0.3345, "step": 28079 }, { "epoch": 0.4881016530793165, "grad_norm": 1.4037324164208675, "learning_rate": 5.435235393133853e-07, "loss": 0.2703, "step": 28080 }, { "epoch": 0.48811903561681935, "grad_norm": 1.502708088810976, "learning_rate": 5.434954967569533e-07, "loss": 0.3886, "step": 28081 }, { "epoch": 0.4881364181543222, "grad_norm": 3.0551639059083664, "learning_rate": 5.434674540626608e-07, "loss": 0.2209, "step": 28082 }, { "epoch": 0.488153800691825, "grad_norm": 1.2742960836254498, "learning_rate": 5.434394112305963e-07, "loss": 0.2975, "step": 28083 }, { "epoch": 0.48817118322932784, "grad_norm": 2.582986306967921, "learning_rate": 5.434113682608489e-07, "loss": 0.267, "step": 28084 }, { "epoch": 0.4881885657668306, "grad_norm": 1.715977349309519, "learning_rate": 5.433833251535075e-07, "loss": 0.3273, "step": 28085 }, { "epoch": 0.48820594830433345, "grad_norm": 1.5346662093120558, "learning_rate": 5.433552819086608e-07, "loss": 0.1586, "step": 28086 }, { "epoch": 0.4882233308418363, "grad_norm": 1.3896433714925673, "learning_rate": 5.433272385263976e-07, "loss": 0.2676, "step": 28087 }, { "epoch": 0.4882407133793391, "grad_norm": 2.8628073124314986, "learning_rate": 5.432991950068071e-07, "loss": 0.3288, "step": 28088 }, { "epoch": 0.48825809591684194, "grad_norm": 1.1308864428651633, "learning_rate": 5.432711513499781e-07, "loss": 0.1461, "step": 28089 }, { "epoch": 0.48827547845434477, "grad_norm": 2.165142155395997, "learning_rate": 5.432431075559994e-07, "loss": 0.339, "step": 28090 }, { "epoch": 0.4882928609918476, "grad_norm": 2.181504077161625, "learning_rate": 5.432150636249598e-07, "loss": 0.4093, "step": 28091 }, { "epoch": 0.48831024352935043, "grad_norm": 1.4214842234928613, "learning_rate": 5.431870195569485e-07, "loss": 0.1526, "step": 28092 }, { "epoch": 0.48832762606685326, "grad_norm": 1.641161809025056, "learning_rate": 5.43158975352054e-07, "loss": 0.3066, "step": 28093 }, { "epoch": 0.4883450086043561, "grad_norm": 0.9575718466742281, "learning_rate": 5.431309310103656e-07, "loss": 0.2981, "step": 28094 }, { "epoch": 0.48836239114185886, "grad_norm": 1.424563116037578, "learning_rate": 5.431028865319718e-07, "loss": 0.2823, "step": 28095 }, { "epoch": 0.4883797736793617, "grad_norm": 1.7291425972772738, "learning_rate": 5.430748419169615e-07, "loss": 0.398, "step": 28096 }, { "epoch": 0.4883971562168645, "grad_norm": 1.569613819717553, "learning_rate": 5.430467971654239e-07, "loss": 0.2748, "step": 28097 }, { "epoch": 0.48841453875436736, "grad_norm": 1.517922935876132, "learning_rate": 5.430187522774477e-07, "loss": 0.3635, "step": 28098 }, { "epoch": 0.4884319212918702, "grad_norm": 1.3630338813895406, "learning_rate": 5.429907072531219e-07, "loss": 0.3513, "step": 28099 }, { "epoch": 0.488449303829373, "grad_norm": 1.530052263030963, "learning_rate": 5.429626620925352e-07, "loss": 0.4072, "step": 28100 }, { "epoch": 0.48846668636687585, "grad_norm": 1.6540806929606942, "learning_rate": 5.429346167957768e-07, "loss": 0.1753, "step": 28101 }, { "epoch": 0.4884840689043787, "grad_norm": 1.3096412736320806, "learning_rate": 5.429065713629351e-07, "loss": 0.2095, "step": 28102 }, { "epoch": 0.4885014514418815, "grad_norm": 1.5112198478029324, "learning_rate": 5.428785257940995e-07, "loss": 0.2839, "step": 28103 }, { "epoch": 0.48851883397938434, "grad_norm": 1.5960098274522312, "learning_rate": 5.428504800893585e-07, "loss": 0.26, "step": 28104 }, { "epoch": 0.4885362165168871, "grad_norm": 2.0053406787898354, "learning_rate": 5.428224342488014e-07, "loss": 0.2393, "step": 28105 }, { "epoch": 0.48855359905438994, "grad_norm": 1.1007381185543128, "learning_rate": 5.427943882725165e-07, "loss": 0.2543, "step": 28106 }, { "epoch": 0.4885709815918928, "grad_norm": 1.598028464219437, "learning_rate": 5.427663421605934e-07, "loss": 0.3208, "step": 28107 }, { "epoch": 0.4885883641293956, "grad_norm": 1.9410659955532124, "learning_rate": 5.427382959131205e-07, "loss": 0.3235, "step": 28108 }, { "epoch": 0.48860574666689843, "grad_norm": 2.0673572622265817, "learning_rate": 5.427102495301867e-07, "loss": 0.2988, "step": 28109 }, { "epoch": 0.48862312920440126, "grad_norm": 2.55291235273244, "learning_rate": 5.426822030118813e-07, "loss": 0.3924, "step": 28110 }, { "epoch": 0.4886405117419041, "grad_norm": 1.2593250842519257, "learning_rate": 5.426541563582927e-07, "loss": 0.2749, "step": 28111 }, { "epoch": 0.4886578942794069, "grad_norm": 2.0689480411304473, "learning_rate": 5.426261095695101e-07, "loss": 0.2238, "step": 28112 }, { "epoch": 0.48867527681690975, "grad_norm": 1.904838775643178, "learning_rate": 5.425980626456222e-07, "loss": 0.3086, "step": 28113 }, { "epoch": 0.4886926593544126, "grad_norm": 3.334130838902571, "learning_rate": 5.425700155867182e-07, "loss": 0.3964, "step": 28114 }, { "epoch": 0.48871004189191536, "grad_norm": 2.1997391376618323, "learning_rate": 5.425419683928868e-07, "loss": 0.2307, "step": 28115 }, { "epoch": 0.4887274244294182, "grad_norm": 1.6364039125265148, "learning_rate": 5.425139210642166e-07, "loss": 0.3791, "step": 28116 }, { "epoch": 0.488744806966921, "grad_norm": 1.8690282662935687, "learning_rate": 5.424858736007971e-07, "loss": 0.3322, "step": 28117 }, { "epoch": 0.48876218950442385, "grad_norm": 2.1427940871979203, "learning_rate": 5.424578260027169e-07, "loss": 0.2615, "step": 28118 }, { "epoch": 0.4887795720419267, "grad_norm": 1.4955001594918191, "learning_rate": 5.424297782700647e-07, "loss": 0.3569, "step": 28119 }, { "epoch": 0.4887969545794295, "grad_norm": 3.545309059734654, "learning_rate": 5.424017304029298e-07, "loss": 0.3429, "step": 28120 }, { "epoch": 0.48881433711693234, "grad_norm": 0.8140483678979571, "learning_rate": 5.423736824014007e-07, "loss": 0.1391, "step": 28121 }, { "epoch": 0.48883171965443517, "grad_norm": 1.671904698482551, "learning_rate": 5.423456342655667e-07, "loss": 0.1557, "step": 28122 }, { "epoch": 0.488849102191938, "grad_norm": 1.4251820879078883, "learning_rate": 5.423175859955164e-07, "loss": 0.1158, "step": 28123 }, { "epoch": 0.48886648472944083, "grad_norm": 1.8258481931211856, "learning_rate": 5.422895375913387e-07, "loss": 0.3078, "step": 28124 }, { "epoch": 0.4888838672669436, "grad_norm": 2.5805038286617523, "learning_rate": 5.422614890531227e-07, "loss": 0.3759, "step": 28125 }, { "epoch": 0.48890124980444644, "grad_norm": 1.571022655553866, "learning_rate": 5.422334403809572e-07, "loss": 0.5907, "step": 28126 }, { "epoch": 0.48891863234194927, "grad_norm": 2.3013842208996227, "learning_rate": 5.42205391574931e-07, "loss": 0.2687, "step": 28127 }, { "epoch": 0.4889360148794521, "grad_norm": 1.231971754317882, "learning_rate": 5.421773426351332e-07, "loss": 0.3892, "step": 28128 }, { "epoch": 0.4889533974169549, "grad_norm": 1.3335634712319473, "learning_rate": 5.421492935616526e-07, "loss": 0.2821, "step": 28129 }, { "epoch": 0.48897077995445776, "grad_norm": 1.1879895509863367, "learning_rate": 5.42121244354578e-07, "loss": 0.6644, "step": 28130 }, { "epoch": 0.4889881624919606, "grad_norm": 3.2122124742303786, "learning_rate": 5.420931950139984e-07, "loss": 0.4526, "step": 28131 }, { "epoch": 0.4890055450294634, "grad_norm": 1.0244850949398356, "learning_rate": 5.420651455400027e-07, "loss": 0.176, "step": 28132 }, { "epoch": 0.48902292756696625, "grad_norm": 2.095472358441349, "learning_rate": 5.4203709593268e-07, "loss": 0.2664, "step": 28133 }, { "epoch": 0.4890403101044691, "grad_norm": 1.5353684198826631, "learning_rate": 5.420090461921189e-07, "loss": 0.1635, "step": 28134 }, { "epoch": 0.48905769264197185, "grad_norm": 1.3661750807265105, "learning_rate": 5.419809963184083e-07, "loss": 0.1656, "step": 28135 }, { "epoch": 0.4890750751794747, "grad_norm": 2.0735052970688326, "learning_rate": 5.419529463116374e-07, "loss": 0.2388, "step": 28136 }, { "epoch": 0.4890924577169775, "grad_norm": 1.5472192578552786, "learning_rate": 5.419248961718949e-07, "loss": 0.3402, "step": 28137 }, { "epoch": 0.48910984025448034, "grad_norm": 1.5247650608218795, "learning_rate": 5.418968458992696e-07, "loss": 0.1703, "step": 28138 }, { "epoch": 0.4891272227919832, "grad_norm": 2.074052039875203, "learning_rate": 5.418687954938507e-07, "loss": 0.1277, "step": 28139 }, { "epoch": 0.489144605329486, "grad_norm": 3.630419487985763, "learning_rate": 5.41840744955727e-07, "loss": 0.5277, "step": 28140 }, { "epoch": 0.48916198786698883, "grad_norm": 2.0551042971461864, "learning_rate": 5.418126942849871e-07, "loss": 0.3676, "step": 28141 }, { "epoch": 0.48917937040449166, "grad_norm": 2.664460030905204, "learning_rate": 5.417846434817205e-07, "loss": 0.2825, "step": 28142 }, { "epoch": 0.4891967529419945, "grad_norm": 2.114850147851283, "learning_rate": 5.417565925460155e-07, "loss": 0.2559, "step": 28143 }, { "epoch": 0.48921413547949727, "grad_norm": 1.2640403593674605, "learning_rate": 5.417285414779614e-07, "loss": 0.2796, "step": 28144 }, { "epoch": 0.4892315180170001, "grad_norm": 1.9447807800607262, "learning_rate": 5.417004902776469e-07, "loss": 0.3814, "step": 28145 }, { "epoch": 0.48924890055450293, "grad_norm": 1.7732659091679752, "learning_rate": 5.416724389451612e-07, "loss": 0.1842, "step": 28146 }, { "epoch": 0.48926628309200576, "grad_norm": 1.6970468129188105, "learning_rate": 5.416443874805928e-07, "loss": 0.278, "step": 28147 }, { "epoch": 0.4892836656295086, "grad_norm": 3.006438678799647, "learning_rate": 5.41616335884031e-07, "loss": 0.3369, "step": 28148 }, { "epoch": 0.4893010481670114, "grad_norm": 2.160956366409816, "learning_rate": 5.415882841555645e-07, "loss": 0.2713, "step": 28149 }, { "epoch": 0.48931843070451425, "grad_norm": 2.4808957756014265, "learning_rate": 5.415602322952821e-07, "loss": 0.6283, "step": 28150 }, { "epoch": 0.4893358132420171, "grad_norm": 1.8179541501143444, "learning_rate": 5.415321803032728e-07, "loss": 0.2937, "step": 28151 }, { "epoch": 0.4893531957795199, "grad_norm": 1.045916471039126, "learning_rate": 5.415041281796258e-07, "loss": 0.1612, "step": 28152 }, { "epoch": 0.48937057831702274, "grad_norm": 1.5118978921824302, "learning_rate": 5.414760759244297e-07, "loss": 0.2106, "step": 28153 }, { "epoch": 0.4893879608545255, "grad_norm": 1.06182322404786, "learning_rate": 5.414480235377734e-07, "loss": 0.2178, "step": 28154 }, { "epoch": 0.48940534339202835, "grad_norm": 1.7204661571556106, "learning_rate": 5.414199710197461e-07, "loss": 0.3735, "step": 28155 }, { "epoch": 0.4894227259295312, "grad_norm": 2.0531759411897164, "learning_rate": 5.413919183704363e-07, "loss": 0.475, "step": 28156 }, { "epoch": 0.489440108467034, "grad_norm": 1.4931195110869007, "learning_rate": 5.413638655899333e-07, "loss": 0.3335, "step": 28157 }, { "epoch": 0.48945749100453684, "grad_norm": 1.2830344636432158, "learning_rate": 5.413358126783258e-07, "loss": 0.2698, "step": 28158 }, { "epoch": 0.48947487354203967, "grad_norm": 2.3105462242211368, "learning_rate": 5.413077596357028e-07, "loss": 0.3036, "step": 28159 }, { "epoch": 0.4894922560795425, "grad_norm": 1.5295101630966763, "learning_rate": 5.41279706462153e-07, "loss": 0.2743, "step": 28160 }, { "epoch": 0.48950963861704533, "grad_norm": 1.5853439869625934, "learning_rate": 5.412516531577658e-07, "loss": 0.2663, "step": 28161 }, { "epoch": 0.48952702115454816, "grad_norm": 1.5691208474383576, "learning_rate": 5.412235997226294e-07, "loss": 0.3412, "step": 28162 }, { "epoch": 0.489544403692051, "grad_norm": 1.0961637820347292, "learning_rate": 5.411955461568335e-07, "loss": 0.182, "step": 28163 }, { "epoch": 0.48956178622955376, "grad_norm": 1.9474099051314429, "learning_rate": 5.411674924604664e-07, "loss": 0.1703, "step": 28164 }, { "epoch": 0.4895791687670566, "grad_norm": 1.548007681071078, "learning_rate": 5.411394386336174e-07, "loss": 0.3771, "step": 28165 }, { "epoch": 0.4895965513045594, "grad_norm": 2.012967590780855, "learning_rate": 5.411113846763752e-07, "loss": 0.2272, "step": 28166 }, { "epoch": 0.48961393384206225, "grad_norm": 1.2491310578345425, "learning_rate": 5.410833305888289e-07, "loss": 0.3475, "step": 28167 }, { "epoch": 0.4896313163795651, "grad_norm": 1.7987479702553582, "learning_rate": 5.410552763710672e-07, "loss": 0.2201, "step": 28168 }, { "epoch": 0.4896486989170679, "grad_norm": 1.147948518933678, "learning_rate": 5.410272220231792e-07, "loss": 0.2722, "step": 28169 }, { "epoch": 0.48966608145457075, "grad_norm": 2.2045550630977275, "learning_rate": 5.409991675452538e-07, "loss": 0.2259, "step": 28170 }, { "epoch": 0.4896834639920736, "grad_norm": 1.7552900027647285, "learning_rate": 5.409711129373797e-07, "loss": 0.1811, "step": 28171 }, { "epoch": 0.4897008465295764, "grad_norm": 1.0775427284813752, "learning_rate": 5.40943058199646e-07, "loss": 0.3339, "step": 28172 }, { "epoch": 0.48971822906707924, "grad_norm": 1.297926879657809, "learning_rate": 5.409150033321418e-07, "loss": 0.3692, "step": 28173 }, { "epoch": 0.489735611604582, "grad_norm": 1.8386661783596612, "learning_rate": 5.408869483349558e-07, "loss": 0.2768, "step": 28174 }, { "epoch": 0.48975299414208484, "grad_norm": 1.2219317669463081, "learning_rate": 5.408588932081768e-07, "loss": 0.1781, "step": 28175 }, { "epoch": 0.48977037667958767, "grad_norm": 1.1846532469119948, "learning_rate": 5.408308379518939e-07, "loss": 0.1917, "step": 28176 }, { "epoch": 0.4897877592170905, "grad_norm": 1.557469850279392, "learning_rate": 5.408027825661961e-07, "loss": 0.2461, "step": 28177 }, { "epoch": 0.48980514175459333, "grad_norm": 1.5097488070658998, "learning_rate": 5.407747270511722e-07, "loss": 0.2916, "step": 28178 }, { "epoch": 0.48982252429209616, "grad_norm": 2.0180053898837795, "learning_rate": 5.407466714069112e-07, "loss": 0.5319, "step": 28179 }, { "epoch": 0.489839906829599, "grad_norm": 1.331178409308073, "learning_rate": 5.407186156335019e-07, "loss": 0.2536, "step": 28180 }, { "epoch": 0.4898572893671018, "grad_norm": 1.2129411742729035, "learning_rate": 5.406905597310332e-07, "loss": 0.4148, "step": 28181 }, { "epoch": 0.48987467190460465, "grad_norm": 2.0351716347610074, "learning_rate": 5.406625036995942e-07, "loss": 0.2686, "step": 28182 }, { "epoch": 0.4898920544421075, "grad_norm": 1.4895999325221783, "learning_rate": 5.406344475392739e-07, "loss": 0.4722, "step": 28183 }, { "epoch": 0.48990943697961026, "grad_norm": 4.272457184154703, "learning_rate": 5.40606391250161e-07, "loss": 0.399, "step": 28184 }, { "epoch": 0.4899268195171131, "grad_norm": 2.1914618771471983, "learning_rate": 5.405783348323443e-07, "loss": 0.4487, "step": 28185 }, { "epoch": 0.4899442020546159, "grad_norm": 2.2749664974822124, "learning_rate": 5.40550278285913e-07, "loss": 0.1967, "step": 28186 }, { "epoch": 0.48996158459211875, "grad_norm": 2.0510476342166943, "learning_rate": 5.40522221610956e-07, "loss": 0.2226, "step": 28187 }, { "epoch": 0.4899789671296216, "grad_norm": 1.3362148472151663, "learning_rate": 5.404941648075621e-07, "loss": 0.1616, "step": 28188 }, { "epoch": 0.4899963496671244, "grad_norm": 2.465336291357568, "learning_rate": 5.404661078758205e-07, "loss": 0.2578, "step": 28189 }, { "epoch": 0.49001373220462724, "grad_norm": 1.7783502853720181, "learning_rate": 5.404380508158197e-07, "loss": 0.4001, "step": 28190 }, { "epoch": 0.49003111474213007, "grad_norm": 1.2214163218802807, "learning_rate": 5.40409993627649e-07, "loss": 0.332, "step": 28191 }, { "epoch": 0.4900484972796329, "grad_norm": 1.4902442707016277, "learning_rate": 5.40381936311397e-07, "loss": 0.276, "step": 28192 }, { "epoch": 0.49006587981713573, "grad_norm": 1.6214667017131223, "learning_rate": 5.40353878867153e-07, "loss": 0.1705, "step": 28193 }, { "epoch": 0.4900832623546385, "grad_norm": 2.01728219483251, "learning_rate": 5.403258212950057e-07, "loss": 0.2443, "step": 28194 }, { "epoch": 0.49010064489214133, "grad_norm": 4.610490619142734, "learning_rate": 5.40297763595044e-07, "loss": 0.2969, "step": 28195 }, { "epoch": 0.49011802742964417, "grad_norm": 1.449185248679429, "learning_rate": 5.402697057673569e-07, "loss": 0.1628, "step": 28196 }, { "epoch": 0.490135409967147, "grad_norm": 1.5531674094365255, "learning_rate": 5.402416478120335e-07, "loss": 0.2513, "step": 28197 }, { "epoch": 0.4901527925046498, "grad_norm": 1.393046740958327, "learning_rate": 5.402135897291623e-07, "loss": 0.3296, "step": 28198 }, { "epoch": 0.49017017504215266, "grad_norm": 1.6233724005033228, "learning_rate": 5.401855315188327e-07, "loss": 0.3138, "step": 28199 }, { "epoch": 0.4901875575796555, "grad_norm": 1.5602518157652339, "learning_rate": 5.401574731811334e-07, "loss": 0.2053, "step": 28200 }, { "epoch": 0.4902049401171583, "grad_norm": 1.2505557263405287, "learning_rate": 5.401294147161532e-07, "loss": 0.1963, "step": 28201 }, { "epoch": 0.49022232265466115, "grad_norm": 1.3988863966931635, "learning_rate": 5.401013561239813e-07, "loss": 0.3043, "step": 28202 }, { "epoch": 0.490239705192164, "grad_norm": 1.4250875997617924, "learning_rate": 5.400732974047066e-07, "loss": 0.2516, "step": 28203 }, { "epoch": 0.49025708772966675, "grad_norm": 2.0317761139822514, "learning_rate": 5.400452385584179e-07, "loss": 0.2824, "step": 28204 }, { "epoch": 0.4902744702671696, "grad_norm": 1.5244220713985963, "learning_rate": 5.400171795852042e-07, "loss": 0.3799, "step": 28205 }, { "epoch": 0.4902918528046724, "grad_norm": 4.263562901512797, "learning_rate": 5.399891204851543e-07, "loss": 0.4128, "step": 28206 }, { "epoch": 0.49030923534217524, "grad_norm": 1.7945472925799717, "learning_rate": 5.399610612583572e-07, "loss": 0.2108, "step": 28207 }, { "epoch": 0.4903266178796781, "grad_norm": 1.8380881460623117, "learning_rate": 5.399330019049022e-07, "loss": 0.209, "step": 28208 }, { "epoch": 0.4903440004171809, "grad_norm": 1.0270708830218371, "learning_rate": 5.399049424248778e-07, "loss": 0.3248, "step": 28209 }, { "epoch": 0.49036138295468373, "grad_norm": 1.1801438082552858, "learning_rate": 5.39876882818373e-07, "loss": 0.3322, "step": 28210 }, { "epoch": 0.49037876549218656, "grad_norm": 1.299181725574605, "learning_rate": 5.398488230854767e-07, "loss": 0.2606, "step": 28211 }, { "epoch": 0.4903961480296894, "grad_norm": 1.5026597656887748, "learning_rate": 5.398207632262781e-07, "loss": 0.1985, "step": 28212 }, { "epoch": 0.4904135305671922, "grad_norm": 2.919752511670923, "learning_rate": 5.39792703240866e-07, "loss": 0.4156, "step": 28213 }, { "epoch": 0.490430913104695, "grad_norm": 1.4449750809105668, "learning_rate": 5.397646431293291e-07, "loss": 0.2379, "step": 28214 }, { "epoch": 0.49044829564219783, "grad_norm": 0.9145687591140427, "learning_rate": 5.39736582891757e-07, "loss": 0.3951, "step": 28215 }, { "epoch": 0.49046567817970066, "grad_norm": 2.3590800675005643, "learning_rate": 5.397085225282377e-07, "loss": 0.3622, "step": 28216 }, { "epoch": 0.4904830607172035, "grad_norm": 1.7767240084145666, "learning_rate": 5.396804620388609e-07, "loss": 0.3094, "step": 28217 }, { "epoch": 0.4905004432547063, "grad_norm": 1.3759520568093728, "learning_rate": 5.396524014237151e-07, "loss": 0.4582, "step": 28218 }, { "epoch": 0.49051782579220915, "grad_norm": 1.8024038582560833, "learning_rate": 5.396243406828896e-07, "loss": 0.2266, "step": 28219 }, { "epoch": 0.490535208329712, "grad_norm": 1.5169763087231503, "learning_rate": 5.395962798164728e-07, "loss": 0.2298, "step": 28220 }, { "epoch": 0.4905525908672148, "grad_norm": 1.5286656723997452, "learning_rate": 5.395682188245544e-07, "loss": 0.2648, "step": 28221 }, { "epoch": 0.49056997340471764, "grad_norm": 3.7983868535705176, "learning_rate": 5.395401577072226e-07, "loss": 0.3191, "step": 28222 }, { "epoch": 0.49058735594222047, "grad_norm": 0.8922259854854419, "learning_rate": 5.395120964645668e-07, "loss": 0.175, "step": 28223 }, { "epoch": 0.49060473847972325, "grad_norm": 1.5083806254605903, "learning_rate": 5.394840350966759e-07, "loss": 0.3238, "step": 28224 }, { "epoch": 0.4906221210172261, "grad_norm": 1.1335964501071338, "learning_rate": 5.394559736036386e-07, "loss": 0.1766, "step": 28225 }, { "epoch": 0.4906395035547289, "grad_norm": 1.2476221389428932, "learning_rate": 5.394279119855439e-07, "loss": 0.3585, "step": 28226 }, { "epoch": 0.49065688609223174, "grad_norm": 2.111606239295702, "learning_rate": 5.39399850242481e-07, "loss": 0.2117, "step": 28227 }, { "epoch": 0.49067426862973457, "grad_norm": 1.3218270200989104, "learning_rate": 5.393717883745386e-07, "loss": 0.315, "step": 28228 }, { "epoch": 0.4906916511672374, "grad_norm": 0.9524993097064711, "learning_rate": 5.393437263818058e-07, "loss": 0.2989, "step": 28229 }, { "epoch": 0.4907090337047402, "grad_norm": 1.7305115330999898, "learning_rate": 5.393156642643714e-07, "loss": 0.2037, "step": 28230 }, { "epoch": 0.49072641624224306, "grad_norm": 1.527639321387658, "learning_rate": 5.392876020223244e-07, "loss": 0.2108, "step": 28231 }, { "epoch": 0.4907437987797459, "grad_norm": 1.717254347230922, "learning_rate": 5.392595396557537e-07, "loss": 0.348, "step": 28232 }, { "epoch": 0.4907611813172487, "grad_norm": 2.034973505297518, "learning_rate": 5.392314771647483e-07, "loss": 0.2557, "step": 28233 }, { "epoch": 0.4907785638547515, "grad_norm": 1.8421077258915968, "learning_rate": 5.392034145493972e-07, "loss": 0.2327, "step": 28234 }, { "epoch": 0.4907959463922543, "grad_norm": 1.6163758701656554, "learning_rate": 5.391753518097893e-07, "loss": 0.2715, "step": 28235 }, { "epoch": 0.49081332892975715, "grad_norm": 1.3918575953022108, "learning_rate": 5.391472889460135e-07, "loss": 0.2849, "step": 28236 }, { "epoch": 0.49083071146726, "grad_norm": 1.4734483377878578, "learning_rate": 5.391192259581587e-07, "loss": 0.2286, "step": 28237 }, { "epoch": 0.4908480940047628, "grad_norm": 1.3086298655482163, "learning_rate": 5.390911628463141e-07, "loss": 0.184, "step": 28238 }, { "epoch": 0.49086547654226564, "grad_norm": 1.7574258893779175, "learning_rate": 5.390630996105684e-07, "loss": 0.2542, "step": 28239 }, { "epoch": 0.4908828590797685, "grad_norm": 1.5859647882341281, "learning_rate": 5.390350362510107e-07, "loss": 0.2807, "step": 28240 }, { "epoch": 0.4909002416172713, "grad_norm": 1.2710201255156917, "learning_rate": 5.390069727677295e-07, "loss": 0.2914, "step": 28241 }, { "epoch": 0.49091762415477413, "grad_norm": 1.3007369942246436, "learning_rate": 5.389789091608145e-07, "loss": 0.1948, "step": 28242 }, { "epoch": 0.49093500669227697, "grad_norm": 1.2150752308436799, "learning_rate": 5.389508454303542e-07, "loss": 0.1982, "step": 28243 }, { "epoch": 0.49095238922977974, "grad_norm": 1.8284805079744926, "learning_rate": 5.389227815764376e-07, "loss": 0.3342, "step": 28244 }, { "epoch": 0.49096977176728257, "grad_norm": 1.5088352637596443, "learning_rate": 5.388947175991533e-07, "loss": 0.2449, "step": 28245 }, { "epoch": 0.4909871543047854, "grad_norm": 1.48986493989215, "learning_rate": 5.388666534985912e-07, "loss": 0.3425, "step": 28246 }, { "epoch": 0.49100453684228823, "grad_norm": 1.3299281326074353, "learning_rate": 5.388385892748393e-07, "loss": 0.3443, "step": 28247 }, { "epoch": 0.49102191937979106, "grad_norm": 1.902281010725901, "learning_rate": 5.388105249279869e-07, "loss": 0.3361, "step": 28248 }, { "epoch": 0.4910393019172939, "grad_norm": 1.4232887500287104, "learning_rate": 5.387824604581231e-07, "loss": 0.2208, "step": 28249 }, { "epoch": 0.4910566844547967, "grad_norm": 1.424280618357807, "learning_rate": 5.387543958653368e-07, "loss": 0.3336, "step": 28250 }, { "epoch": 0.49107406699229955, "grad_norm": 2.018221007350257, "learning_rate": 5.387263311497167e-07, "loss": 0.292, "step": 28251 }, { "epoch": 0.4910914495298024, "grad_norm": 2.596530077427715, "learning_rate": 5.386982663113519e-07, "loss": 0.196, "step": 28252 }, { "epoch": 0.4911088320673052, "grad_norm": 1.4220805575414557, "learning_rate": 5.386702013503315e-07, "loss": 0.2501, "step": 28253 }, { "epoch": 0.491126214604808, "grad_norm": 2.0007524291859733, "learning_rate": 5.386421362667443e-07, "loss": 0.3017, "step": 28254 }, { "epoch": 0.4911435971423108, "grad_norm": 1.3387473602675095, "learning_rate": 5.386140710606791e-07, "loss": 0.2847, "step": 28255 }, { "epoch": 0.49116097967981365, "grad_norm": 1.3539759889718197, "learning_rate": 5.385860057322253e-07, "loss": 0.292, "step": 28256 }, { "epoch": 0.4911783622173165, "grad_norm": 1.3380963838347022, "learning_rate": 5.385579402814714e-07, "loss": 0.3544, "step": 28257 }, { "epoch": 0.4911957447548193, "grad_norm": 0.8564126668934773, "learning_rate": 5.385298747085066e-07, "loss": 0.19, "step": 28258 }, { "epoch": 0.49121312729232214, "grad_norm": 3.7588164992538604, "learning_rate": 5.385018090134199e-07, "loss": 0.3189, "step": 28259 }, { "epoch": 0.49123050982982497, "grad_norm": 1.7621918227213145, "learning_rate": 5.384737431963e-07, "loss": 0.3802, "step": 28260 }, { "epoch": 0.4912478923673278, "grad_norm": 1.4650143480989573, "learning_rate": 5.38445677257236e-07, "loss": 0.2491, "step": 28261 }, { "epoch": 0.49126527490483063, "grad_norm": 1.1621821255249887, "learning_rate": 5.384176111963169e-07, "loss": 0.3206, "step": 28262 }, { "epoch": 0.49128265744233346, "grad_norm": 4.780270605933461, "learning_rate": 5.383895450136316e-07, "loss": 0.2097, "step": 28263 }, { "epoch": 0.49130003997983623, "grad_norm": 1.834306180061948, "learning_rate": 5.383614787092692e-07, "loss": 0.2866, "step": 28264 }, { "epoch": 0.49131742251733906, "grad_norm": 1.447215897818297, "learning_rate": 5.383334122833185e-07, "loss": 0.2741, "step": 28265 }, { "epoch": 0.4913348050548419, "grad_norm": 1.7175874906825523, "learning_rate": 5.383053457358684e-07, "loss": 0.3875, "step": 28266 }, { "epoch": 0.4913521875923447, "grad_norm": 1.2312720068094263, "learning_rate": 5.382772790670081e-07, "loss": 0.2529, "step": 28267 }, { "epoch": 0.49136957012984755, "grad_norm": 2.6320807738180636, "learning_rate": 5.382492122768261e-07, "loss": 0.2905, "step": 28268 }, { "epoch": 0.4913869526673504, "grad_norm": 1.8128262023428139, "learning_rate": 5.38221145365412e-07, "loss": 0.3931, "step": 28269 }, { "epoch": 0.4914043352048532, "grad_norm": 1.8032993628406673, "learning_rate": 5.381930783328542e-07, "loss": 0.3029, "step": 28270 }, { "epoch": 0.49142171774235605, "grad_norm": 3.553994889439591, "learning_rate": 5.38165011179242e-07, "loss": 0.3352, "step": 28271 }, { "epoch": 0.4914391002798589, "grad_norm": 1.6442476680940625, "learning_rate": 5.381369439046643e-07, "loss": 0.3304, "step": 28272 }, { "epoch": 0.4914564828173617, "grad_norm": 1.727451404046187, "learning_rate": 5.381088765092098e-07, "loss": 0.2446, "step": 28273 }, { "epoch": 0.4914738653548645, "grad_norm": 1.327470628118267, "learning_rate": 5.380808089929679e-07, "loss": 0.1658, "step": 28274 }, { "epoch": 0.4914912478923673, "grad_norm": 1.2197453030232672, "learning_rate": 5.380527413560273e-07, "loss": 0.2275, "step": 28275 }, { "epoch": 0.49150863042987014, "grad_norm": 2.2302969320010866, "learning_rate": 5.380246735984769e-07, "loss": 0.3328, "step": 28276 }, { "epoch": 0.49152601296737297, "grad_norm": 1.6894373120007877, "learning_rate": 5.379966057204057e-07, "loss": 0.2402, "step": 28277 }, { "epoch": 0.4915433955048758, "grad_norm": 2.4164982551000644, "learning_rate": 5.379685377219029e-07, "loss": 0.2156, "step": 28278 }, { "epoch": 0.49156077804237863, "grad_norm": 2.484220954491417, "learning_rate": 5.379404696030572e-07, "loss": 0.2134, "step": 28279 }, { "epoch": 0.49157816057988146, "grad_norm": 1.975139446461467, "learning_rate": 5.379124013639574e-07, "loss": 0.3856, "step": 28280 }, { "epoch": 0.4915955431173843, "grad_norm": 2.3259059521149554, "learning_rate": 5.378843330046931e-07, "loss": 0.4346, "step": 28281 }, { "epoch": 0.4916129256548871, "grad_norm": 2.6536910251064905, "learning_rate": 5.378562645253526e-07, "loss": 0.2189, "step": 28282 }, { "epoch": 0.4916303081923899, "grad_norm": 1.2737368475932238, "learning_rate": 5.378281959260253e-07, "loss": 0.3052, "step": 28283 }, { "epoch": 0.49164769072989273, "grad_norm": 2.738497624827148, "learning_rate": 5.378001272067998e-07, "loss": 0.381, "step": 28284 }, { "epoch": 0.49166507326739556, "grad_norm": 1.8726969129676994, "learning_rate": 5.377720583677654e-07, "loss": 0.2826, "step": 28285 }, { "epoch": 0.4916824558048984, "grad_norm": 1.0522499130480605, "learning_rate": 5.377439894090108e-07, "loss": 0.2086, "step": 28286 }, { "epoch": 0.4916998383424012, "grad_norm": 2.215172505752952, "learning_rate": 5.377159203306253e-07, "loss": 0.3401, "step": 28287 }, { "epoch": 0.49171722087990405, "grad_norm": 2.9157431241872267, "learning_rate": 5.376878511326975e-07, "loss": 0.3251, "step": 28288 }, { "epoch": 0.4917346034174069, "grad_norm": 0.9880603348442305, "learning_rate": 5.376597818153166e-07, "loss": 0.215, "step": 28289 }, { "epoch": 0.4917519859549097, "grad_norm": 1.3180282446918976, "learning_rate": 5.376317123785714e-07, "loss": 0.4458, "step": 28290 }, { "epoch": 0.49176936849241254, "grad_norm": 0.899376605507099, "learning_rate": 5.376036428225511e-07, "loss": 0.1912, "step": 28291 }, { "epoch": 0.49178675102991537, "grad_norm": 1.3705415062630426, "learning_rate": 5.375755731473443e-07, "loss": 0.356, "step": 28292 }, { "epoch": 0.49180413356741814, "grad_norm": 1.0459492612467118, "learning_rate": 5.375475033530404e-07, "loss": 0.2149, "step": 28293 }, { "epoch": 0.491821516104921, "grad_norm": 2.463593791273127, "learning_rate": 5.375194334397281e-07, "loss": 0.3549, "step": 28294 }, { "epoch": 0.4918388986424238, "grad_norm": 1.5368871992906104, "learning_rate": 5.374913634074964e-07, "loss": 0.2231, "step": 28295 }, { "epoch": 0.49185628117992664, "grad_norm": 1.3506831167430189, "learning_rate": 5.374632932564342e-07, "loss": 0.2909, "step": 28296 }, { "epoch": 0.49187366371742947, "grad_norm": 1.870931370108669, "learning_rate": 5.374352229866308e-07, "loss": 0.2506, "step": 28297 }, { "epoch": 0.4918910462549323, "grad_norm": 2.1408851103920807, "learning_rate": 5.374071525981748e-07, "loss": 0.2581, "step": 28298 }, { "epoch": 0.4919084287924351, "grad_norm": 1.908944559459355, "learning_rate": 5.373790820911552e-07, "loss": 0.2936, "step": 28299 }, { "epoch": 0.49192581132993796, "grad_norm": 1.3048547003040971, "learning_rate": 5.373510114656614e-07, "loss": 0.4639, "step": 28300 }, { "epoch": 0.4919431938674408, "grad_norm": 2.2757740385368366, "learning_rate": 5.373229407217819e-07, "loss": 0.2287, "step": 28301 }, { "epoch": 0.4919605764049436, "grad_norm": 2.218301086514016, "learning_rate": 5.372948698596057e-07, "loss": 0.2469, "step": 28302 }, { "epoch": 0.4919779589424464, "grad_norm": 2.7813019960311314, "learning_rate": 5.372667988792221e-07, "loss": 0.3489, "step": 28303 }, { "epoch": 0.4919953414799492, "grad_norm": 1.7753212412144022, "learning_rate": 5.372387277807198e-07, "loss": 0.3946, "step": 28304 }, { "epoch": 0.49201272401745205, "grad_norm": 2.067866381725988, "learning_rate": 5.372106565641876e-07, "loss": 0.3941, "step": 28305 }, { "epoch": 0.4920301065549549, "grad_norm": 1.073202124070988, "learning_rate": 5.371825852297151e-07, "loss": 0.261, "step": 28306 }, { "epoch": 0.4920474890924577, "grad_norm": 2.7813122546451017, "learning_rate": 5.371545137773906e-07, "loss": 0.3171, "step": 28307 }, { "epoch": 0.49206487162996054, "grad_norm": 1.192026068087303, "learning_rate": 5.371264422073035e-07, "loss": 0.2236, "step": 28308 }, { "epoch": 0.4920822541674634, "grad_norm": 1.7234186743195408, "learning_rate": 5.370983705195425e-07, "loss": 0.257, "step": 28309 }, { "epoch": 0.4920996367049662, "grad_norm": 1.2369825586892338, "learning_rate": 5.370702987141968e-07, "loss": 0.2332, "step": 28310 }, { "epoch": 0.49211701924246903, "grad_norm": 1.0342636541215493, "learning_rate": 5.370422267913552e-07, "loss": 0.2317, "step": 28311 }, { "epoch": 0.49213440177997186, "grad_norm": 1.843229242364998, "learning_rate": 5.370141547511069e-07, "loss": 0.1849, "step": 28312 }, { "epoch": 0.49215178431747464, "grad_norm": 4.804894360701224, "learning_rate": 5.369860825935407e-07, "loss": 0.3315, "step": 28313 }, { "epoch": 0.49216916685497747, "grad_norm": 2.287262593964432, "learning_rate": 5.369580103187456e-07, "loss": 0.4468, "step": 28314 }, { "epoch": 0.4921865493924803, "grad_norm": 1.9945691327755264, "learning_rate": 5.369299379268105e-07, "loss": 0.3274, "step": 28315 }, { "epoch": 0.49220393192998313, "grad_norm": 1.1405094231259798, "learning_rate": 5.369018654178247e-07, "loss": 0.2949, "step": 28316 }, { "epoch": 0.49222131446748596, "grad_norm": 8.985689373960463, "learning_rate": 5.368737927918766e-07, "loss": 0.5026, "step": 28317 }, { "epoch": 0.4922386970049888, "grad_norm": 2.861933685497373, "learning_rate": 5.368457200490557e-07, "loss": 0.4489, "step": 28318 }, { "epoch": 0.4922560795424916, "grad_norm": 1.4208592981535406, "learning_rate": 5.368176471894507e-07, "loss": 0.3314, "step": 28319 }, { "epoch": 0.49227346207999445, "grad_norm": 0.9210064955280005, "learning_rate": 5.367895742131508e-07, "loss": 0.3946, "step": 28320 }, { "epoch": 0.4922908446174973, "grad_norm": 2.3556974806099715, "learning_rate": 5.367615011202448e-07, "loss": 0.3249, "step": 28321 }, { "epoch": 0.4923082271550001, "grad_norm": 1.3185117369995651, "learning_rate": 5.367334279108216e-07, "loss": 0.3164, "step": 28322 }, { "epoch": 0.4923256096925029, "grad_norm": 2.1038683624287042, "learning_rate": 5.367053545849705e-07, "loss": 0.196, "step": 28323 }, { "epoch": 0.4923429922300057, "grad_norm": 1.7742127154819416, "learning_rate": 5.366772811427803e-07, "loss": 0.2382, "step": 28324 }, { "epoch": 0.49236037476750855, "grad_norm": 0.9419835067817127, "learning_rate": 5.366492075843398e-07, "loss": 0.1798, "step": 28325 }, { "epoch": 0.4923777573050114, "grad_norm": 1.9610325414872471, "learning_rate": 5.366211339097381e-07, "loss": 0.3709, "step": 28326 }, { "epoch": 0.4923951398425142, "grad_norm": 1.2573416846379832, "learning_rate": 5.365930601190645e-07, "loss": 0.3512, "step": 28327 }, { "epoch": 0.49241252238001704, "grad_norm": 2.1265928041547495, "learning_rate": 5.365649862124075e-07, "loss": 0.3196, "step": 28328 }, { "epoch": 0.49242990491751987, "grad_norm": 2.1252272040233233, "learning_rate": 5.365369121898564e-07, "loss": 0.4285, "step": 28329 }, { "epoch": 0.4924472874550227, "grad_norm": 1.826868771719507, "learning_rate": 5.365088380515e-07, "loss": 0.2891, "step": 28330 }, { "epoch": 0.49246466999252553, "grad_norm": 1.869418350000474, "learning_rate": 5.364807637974274e-07, "loss": 0.2317, "step": 28331 }, { "epoch": 0.49248205253002836, "grad_norm": 1.4627490241670964, "learning_rate": 5.364526894277275e-07, "loss": 0.383, "step": 28332 }, { "epoch": 0.49249943506753113, "grad_norm": 1.58916469301458, "learning_rate": 5.364246149424892e-07, "loss": 0.1416, "step": 28333 }, { "epoch": 0.49251681760503396, "grad_norm": 1.8982632132673827, "learning_rate": 5.363965403418017e-07, "loss": 0.3021, "step": 28334 }, { "epoch": 0.4925342001425368, "grad_norm": 2.131143474471617, "learning_rate": 5.363684656257541e-07, "loss": 0.2989, "step": 28335 }, { "epoch": 0.4925515826800396, "grad_norm": 1.7420083941701996, "learning_rate": 5.36340390794435e-07, "loss": 0.4624, "step": 28336 }, { "epoch": 0.49256896521754245, "grad_norm": 1.4442758029918734, "learning_rate": 5.363123158479335e-07, "loss": 0.3609, "step": 28337 }, { "epoch": 0.4925863477550453, "grad_norm": 2.062287330519532, "learning_rate": 5.362842407863388e-07, "loss": 0.2496, "step": 28338 }, { "epoch": 0.4926037302925481, "grad_norm": 1.4068169497819576, "learning_rate": 5.362561656097395e-07, "loss": 0.2269, "step": 28339 }, { "epoch": 0.49262111283005094, "grad_norm": 1.7248514839788975, "learning_rate": 5.362280903182248e-07, "loss": 0.1848, "step": 28340 }, { "epoch": 0.4926384953675538, "grad_norm": 1.6740525434201263, "learning_rate": 5.362000149118839e-07, "loss": 0.1844, "step": 28341 }, { "epoch": 0.4926558779050566, "grad_norm": 1.9450153142106792, "learning_rate": 5.361719393908056e-07, "loss": 0.352, "step": 28342 }, { "epoch": 0.4926732604425594, "grad_norm": 1.5104762526809847, "learning_rate": 5.361438637550786e-07, "loss": 0.2854, "step": 28343 }, { "epoch": 0.4926906429800622, "grad_norm": 1.0569546437576764, "learning_rate": 5.361157880047924e-07, "loss": 0.1627, "step": 28344 }, { "epoch": 0.49270802551756504, "grad_norm": 1.4587948737954413, "learning_rate": 5.360877121400357e-07, "loss": 0.2735, "step": 28345 }, { "epoch": 0.49272540805506787, "grad_norm": 3.516305363400994, "learning_rate": 5.360596361608975e-07, "loss": 0.1483, "step": 28346 }, { "epoch": 0.4927427905925707, "grad_norm": 1.7097464254898034, "learning_rate": 5.360315600674668e-07, "loss": 0.2357, "step": 28347 }, { "epoch": 0.49276017313007353, "grad_norm": 1.7396063568224325, "learning_rate": 5.360034838598325e-07, "loss": 0.2667, "step": 28348 }, { "epoch": 0.49277755566757636, "grad_norm": 1.1556568307276893, "learning_rate": 5.359754075380839e-07, "loss": 0.2378, "step": 28349 }, { "epoch": 0.4927949382050792, "grad_norm": 1.6965475922492528, "learning_rate": 5.359473311023097e-07, "loss": 0.3095, "step": 28350 }, { "epoch": 0.492812320742582, "grad_norm": 0.9163735985301041, "learning_rate": 5.359192545525989e-07, "loss": 0.2487, "step": 28351 }, { "epoch": 0.49282970328008485, "grad_norm": 0.738665577129182, "learning_rate": 5.358911778890406e-07, "loss": 0.3045, "step": 28352 }, { "epoch": 0.4928470858175876, "grad_norm": 2.683563572882384, "learning_rate": 5.358631011117239e-07, "loss": 0.3422, "step": 28353 }, { "epoch": 0.49286446835509046, "grad_norm": 1.8286561606924228, "learning_rate": 5.358350242207374e-07, "loss": 0.2842, "step": 28354 }, { "epoch": 0.4928818508925933, "grad_norm": 1.8246046271873924, "learning_rate": 5.358069472161705e-07, "loss": 0.2862, "step": 28355 }, { "epoch": 0.4928992334300961, "grad_norm": 1.992258353468463, "learning_rate": 5.35778870098112e-07, "loss": 0.256, "step": 28356 }, { "epoch": 0.49291661596759895, "grad_norm": 1.4688663087313278, "learning_rate": 5.357507928666509e-07, "loss": 0.34, "step": 28357 }, { "epoch": 0.4929339985051018, "grad_norm": 1.8668710252895122, "learning_rate": 5.357227155218762e-07, "loss": 0.2081, "step": 28358 }, { "epoch": 0.4929513810426046, "grad_norm": 1.8868384907264677, "learning_rate": 5.356946380638768e-07, "loss": 0.2816, "step": 28359 }, { "epoch": 0.49296876358010744, "grad_norm": 2.636482886626145, "learning_rate": 5.356665604927421e-07, "loss": 0.1909, "step": 28360 }, { "epoch": 0.49298614611761027, "grad_norm": 1.5868322794684153, "learning_rate": 5.356384828085605e-07, "loss": 0.2443, "step": 28361 }, { "epoch": 0.4930035286551131, "grad_norm": 1.771723289317482, "learning_rate": 5.356104050114214e-07, "loss": 0.1992, "step": 28362 }, { "epoch": 0.4930209111926159, "grad_norm": 1.4820107878322013, "learning_rate": 5.355823271014137e-07, "loss": 0.1957, "step": 28363 }, { "epoch": 0.4930382937301187, "grad_norm": 0.9525297546735936, "learning_rate": 5.355542490786263e-07, "loss": 0.2204, "step": 28364 }, { "epoch": 0.49305567626762153, "grad_norm": 1.6810971884979875, "learning_rate": 5.355261709431483e-07, "loss": 0.2201, "step": 28365 }, { "epoch": 0.49307305880512436, "grad_norm": 1.4532272409090332, "learning_rate": 5.354980926950686e-07, "loss": 0.1951, "step": 28366 }, { "epoch": 0.4930904413426272, "grad_norm": 1.3422039447301184, "learning_rate": 5.354700143344762e-07, "loss": 0.456, "step": 28367 }, { "epoch": 0.49310782388013, "grad_norm": 1.147733091130314, "learning_rate": 5.354419358614603e-07, "loss": 0.166, "step": 28368 }, { "epoch": 0.49312520641763286, "grad_norm": 1.0939844191893795, "learning_rate": 5.354138572761097e-07, "loss": 0.2886, "step": 28369 }, { "epoch": 0.4931425889551357, "grad_norm": 1.0068494923846816, "learning_rate": 5.353857785785135e-07, "loss": 0.1837, "step": 28370 }, { "epoch": 0.4931599714926385, "grad_norm": 2.0403615311984966, "learning_rate": 5.353576997687604e-07, "loss": 0.4073, "step": 28371 }, { "epoch": 0.49317735403014135, "grad_norm": 1.9941636946194856, "learning_rate": 5.353296208469398e-07, "loss": 0.2339, "step": 28372 }, { "epoch": 0.4931947365676441, "grad_norm": 1.584299662350842, "learning_rate": 5.353015418131406e-07, "loss": 0.2754, "step": 28373 }, { "epoch": 0.49321211910514695, "grad_norm": 3.324298118231886, "learning_rate": 5.352734626674517e-07, "loss": 0.2405, "step": 28374 }, { "epoch": 0.4932295016426498, "grad_norm": 1.2798094516912486, "learning_rate": 5.35245383409962e-07, "loss": 0.2917, "step": 28375 }, { "epoch": 0.4932468841801526, "grad_norm": 2.2733039798434582, "learning_rate": 5.352173040407608e-07, "loss": 0.2919, "step": 28376 }, { "epoch": 0.49326426671765544, "grad_norm": 2.224973177740839, "learning_rate": 5.351892245599368e-07, "loss": 0.2974, "step": 28377 }, { "epoch": 0.49328164925515827, "grad_norm": 2.240662307109668, "learning_rate": 5.351611449675791e-07, "loss": 0.2204, "step": 28378 }, { "epoch": 0.4932990317926611, "grad_norm": 2.762592864750671, "learning_rate": 5.351330652637769e-07, "loss": 0.4341, "step": 28379 }, { "epoch": 0.49331641433016393, "grad_norm": 1.2923913333118546, "learning_rate": 5.351049854486188e-07, "loss": 0.2643, "step": 28380 }, { "epoch": 0.49333379686766676, "grad_norm": 2.117207036181721, "learning_rate": 5.350769055221941e-07, "loss": 0.3015, "step": 28381 }, { "epoch": 0.4933511794051696, "grad_norm": 1.3682382149167867, "learning_rate": 5.350488254845917e-07, "loss": 0.3262, "step": 28382 }, { "epoch": 0.49336856194267237, "grad_norm": 1.3823418776477576, "learning_rate": 5.350207453359007e-07, "loss": 0.1779, "step": 28383 }, { "epoch": 0.4933859444801752, "grad_norm": 2.3067983888022368, "learning_rate": 5.3499266507621e-07, "loss": 0.4023, "step": 28384 }, { "epoch": 0.49340332701767803, "grad_norm": 1.555886031751471, "learning_rate": 5.349645847056086e-07, "loss": 0.2967, "step": 28385 }, { "epoch": 0.49342070955518086, "grad_norm": 2.3729288909699973, "learning_rate": 5.349365042241856e-07, "loss": 0.2779, "step": 28386 }, { "epoch": 0.4934380920926837, "grad_norm": 1.1308544273509493, "learning_rate": 5.349084236320298e-07, "loss": 0.2916, "step": 28387 }, { "epoch": 0.4934554746301865, "grad_norm": 1.50562156685814, "learning_rate": 5.348803429292304e-07, "loss": 0.449, "step": 28388 }, { "epoch": 0.49347285716768935, "grad_norm": 1.9922324888866731, "learning_rate": 5.348522621158764e-07, "loss": 0.3233, "step": 28389 }, { "epoch": 0.4934902397051922, "grad_norm": 2.2210243145213306, "learning_rate": 5.348241811920567e-07, "loss": 0.3616, "step": 28390 }, { "epoch": 0.493507622242695, "grad_norm": 2.4469873646580003, "learning_rate": 5.347961001578602e-07, "loss": 0.3591, "step": 28391 }, { "epoch": 0.49352500478019784, "grad_norm": 1.4175303619622548, "learning_rate": 5.347680190133762e-07, "loss": 0.3032, "step": 28392 }, { "epoch": 0.4935423873177006, "grad_norm": 1.10553002039142, "learning_rate": 5.347399377586936e-07, "loss": 0.2006, "step": 28393 }, { "epoch": 0.49355976985520345, "grad_norm": 1.7715049782026122, "learning_rate": 5.347118563939012e-07, "loss": 0.1649, "step": 28394 }, { "epoch": 0.4935771523927063, "grad_norm": 1.6921893774841266, "learning_rate": 5.346837749190883e-07, "loss": 0.3718, "step": 28395 }, { "epoch": 0.4935945349302091, "grad_norm": 3.1417391709907934, "learning_rate": 5.346556933343436e-07, "loss": 0.4172, "step": 28396 }, { "epoch": 0.49361191746771194, "grad_norm": 1.239806473744123, "learning_rate": 5.346276116397565e-07, "loss": 0.2519, "step": 28397 }, { "epoch": 0.49362930000521477, "grad_norm": 2.099323263644829, "learning_rate": 5.345995298354157e-07, "loss": 0.2664, "step": 28398 }, { "epoch": 0.4936466825427176, "grad_norm": 2.52099425580837, "learning_rate": 5.345714479214102e-07, "loss": 0.3639, "step": 28399 }, { "epoch": 0.4936640650802204, "grad_norm": 2.0066264554587647, "learning_rate": 5.34543365897829e-07, "loss": 0.4941, "step": 28400 }, { "epoch": 0.49368144761772326, "grad_norm": 1.3222042041998736, "learning_rate": 5.345152837647615e-07, "loss": 0.2075, "step": 28401 }, { "epoch": 0.4936988301552261, "grad_norm": 1.8564014466859862, "learning_rate": 5.344872015222961e-07, "loss": 0.2407, "step": 28402 }, { "epoch": 0.49371621269272886, "grad_norm": 2.61752068089977, "learning_rate": 5.344591191705223e-07, "loss": 0.2524, "step": 28403 }, { "epoch": 0.4937335952302317, "grad_norm": 3.3753390440583058, "learning_rate": 5.344310367095288e-07, "loss": 0.2705, "step": 28404 }, { "epoch": 0.4937509777677345, "grad_norm": 1.033284786061185, "learning_rate": 5.344029541394048e-07, "loss": 0.2566, "step": 28405 }, { "epoch": 0.49376836030523735, "grad_norm": 1.7340401307016264, "learning_rate": 5.343748714602392e-07, "loss": 0.4046, "step": 28406 }, { "epoch": 0.4937857428427402, "grad_norm": 1.6725540601189455, "learning_rate": 5.343467886721211e-07, "loss": 0.3504, "step": 28407 }, { "epoch": 0.493803125380243, "grad_norm": 1.5717345889818568, "learning_rate": 5.343187057751395e-07, "loss": 0.3868, "step": 28408 }, { "epoch": 0.49382050791774584, "grad_norm": 1.2034540241192262, "learning_rate": 5.342906227693833e-07, "loss": 0.3737, "step": 28409 }, { "epoch": 0.4938378904552487, "grad_norm": 1.5063201767849224, "learning_rate": 5.342625396549416e-07, "loss": 0.2203, "step": 28410 }, { "epoch": 0.4938552729927515, "grad_norm": 0.9103480063211511, "learning_rate": 5.342344564319033e-07, "loss": 0.1835, "step": 28411 }, { "epoch": 0.49387265553025433, "grad_norm": 1.445520373857817, "learning_rate": 5.342063731003577e-07, "loss": 0.3243, "step": 28412 }, { "epoch": 0.4938900380677571, "grad_norm": 1.372161167026911, "learning_rate": 5.341782896603935e-07, "loss": 0.1833, "step": 28413 }, { "epoch": 0.49390742060525994, "grad_norm": 1.4815712792733327, "learning_rate": 5.341502061120997e-07, "loss": 0.3482, "step": 28414 }, { "epoch": 0.49392480314276277, "grad_norm": 1.2999480404869461, "learning_rate": 5.341221224555654e-07, "loss": 0.3657, "step": 28415 }, { "epoch": 0.4939421856802656, "grad_norm": 2.5121605852644344, "learning_rate": 5.340940386908799e-07, "loss": 0.2656, "step": 28416 }, { "epoch": 0.49395956821776843, "grad_norm": 2.4488032669865305, "learning_rate": 5.340659548181319e-07, "loss": 0.3578, "step": 28417 }, { "epoch": 0.49397695075527126, "grad_norm": 2.048700377701947, "learning_rate": 5.340378708374104e-07, "loss": 0.3208, "step": 28418 }, { "epoch": 0.4939943332927741, "grad_norm": 2.1410874923048144, "learning_rate": 5.340097867488044e-07, "loss": 0.2193, "step": 28419 }, { "epoch": 0.4940117158302769, "grad_norm": 1.569046476137522, "learning_rate": 5.339817025524032e-07, "loss": 0.1769, "step": 28420 }, { "epoch": 0.49402909836777975, "grad_norm": 2.851000586341893, "learning_rate": 5.339536182482954e-07, "loss": 0.3398, "step": 28421 }, { "epoch": 0.4940464809052825, "grad_norm": 2.00942148725565, "learning_rate": 5.339255338365705e-07, "loss": 0.3066, "step": 28422 }, { "epoch": 0.49406386344278536, "grad_norm": 1.1550536732340178, "learning_rate": 5.33897449317317e-07, "loss": 0.2511, "step": 28423 }, { "epoch": 0.4940812459802882, "grad_norm": 4.54556645225121, "learning_rate": 5.338693646906243e-07, "loss": 0.3521, "step": 28424 }, { "epoch": 0.494098628517791, "grad_norm": 1.4065317185178319, "learning_rate": 5.338412799565811e-07, "loss": 0.1992, "step": 28425 }, { "epoch": 0.49411601105529385, "grad_norm": 1.0866024355028752, "learning_rate": 5.338131951152768e-07, "loss": 0.2044, "step": 28426 }, { "epoch": 0.4941333935927967, "grad_norm": 1.347422719630559, "learning_rate": 5.337851101668e-07, "loss": 0.2109, "step": 28427 }, { "epoch": 0.4941507761302995, "grad_norm": 6.554269661072282, "learning_rate": 5.337570251112401e-07, "loss": 0.3476, "step": 28428 }, { "epoch": 0.49416815866780234, "grad_norm": 1.4192029979152052, "learning_rate": 5.337289399486858e-07, "loss": 0.392, "step": 28429 }, { "epoch": 0.49418554120530517, "grad_norm": 1.24930030088769, "learning_rate": 5.337008546792264e-07, "loss": 0.2958, "step": 28430 }, { "epoch": 0.494202923742808, "grad_norm": 1.6800501190920027, "learning_rate": 5.336727693029506e-07, "loss": 0.3127, "step": 28431 }, { "epoch": 0.4942203062803108, "grad_norm": 2.2298643979576966, "learning_rate": 5.336446838199476e-07, "loss": 0.3871, "step": 28432 }, { "epoch": 0.4942376888178136, "grad_norm": 1.9274531996653106, "learning_rate": 5.336165982303066e-07, "loss": 0.2492, "step": 28433 }, { "epoch": 0.49425507135531643, "grad_norm": 2.089177972668513, "learning_rate": 5.335885125341163e-07, "loss": 0.3512, "step": 28434 }, { "epoch": 0.49427245389281926, "grad_norm": 1.7187144574164728, "learning_rate": 5.335604267314658e-07, "loss": 0.4272, "step": 28435 }, { "epoch": 0.4942898364303221, "grad_norm": 2.9932466313600767, "learning_rate": 5.335323408224443e-07, "loss": 0.2783, "step": 28436 }, { "epoch": 0.4943072189678249, "grad_norm": 1.2317088793781876, "learning_rate": 5.335042548071406e-07, "loss": 0.1839, "step": 28437 }, { "epoch": 0.49432460150532775, "grad_norm": 2.0128379367490035, "learning_rate": 5.334761686856437e-07, "loss": 0.3711, "step": 28438 }, { "epoch": 0.4943419840428306, "grad_norm": 1.1259967591471238, "learning_rate": 5.33448082458043e-07, "loss": 0.1715, "step": 28439 }, { "epoch": 0.4943593665803334, "grad_norm": 1.620945495962976, "learning_rate": 5.334199961244269e-07, "loss": 0.1889, "step": 28440 }, { "epoch": 0.49437674911783624, "grad_norm": 2.2040674670190614, "learning_rate": 5.333919096848849e-07, "loss": 0.3393, "step": 28441 }, { "epoch": 0.494394131655339, "grad_norm": 1.2830891741027208, "learning_rate": 5.33363823139506e-07, "loss": 0.2862, "step": 28442 }, { "epoch": 0.49441151419284185, "grad_norm": 1.6377806561007051, "learning_rate": 5.333357364883791e-07, "loss": 0.4872, "step": 28443 }, { "epoch": 0.4944288967303447, "grad_norm": 1.0548915566634935, "learning_rate": 5.33307649731593e-07, "loss": 0.2853, "step": 28444 }, { "epoch": 0.4944462792678475, "grad_norm": 2.5109990688852366, "learning_rate": 5.332795628692373e-07, "loss": 0.3106, "step": 28445 }, { "epoch": 0.49446366180535034, "grad_norm": 5.394910095474295, "learning_rate": 5.332514759014003e-07, "loss": 0.5056, "step": 28446 }, { "epoch": 0.49448104434285317, "grad_norm": 1.1944883918614444, "learning_rate": 5.332233888281716e-07, "loss": 0.3015, "step": 28447 }, { "epoch": 0.494498426880356, "grad_norm": 2.3517510446895082, "learning_rate": 5.3319530164964e-07, "loss": 0.4209, "step": 28448 }, { "epoch": 0.49451580941785883, "grad_norm": 1.6840140620906423, "learning_rate": 5.331672143658946e-07, "loss": 0.7075, "step": 28449 }, { "epoch": 0.49453319195536166, "grad_norm": 1.8576930383582504, "learning_rate": 5.331391269770242e-07, "loss": 0.2883, "step": 28450 }, { "epoch": 0.4945505744928645, "grad_norm": 0.7789722256067939, "learning_rate": 5.331110394831181e-07, "loss": 0.2532, "step": 28451 }, { "epoch": 0.49456795703036727, "grad_norm": 1.0766550114569984, "learning_rate": 5.330829518842653e-07, "loss": 0.1867, "step": 28452 }, { "epoch": 0.4945853395678701, "grad_norm": 1.9354552522877357, "learning_rate": 5.330548641805546e-07, "loss": 0.3008, "step": 28453 }, { "epoch": 0.4946027221053729, "grad_norm": 1.3358061670020296, "learning_rate": 5.330267763720752e-07, "loss": 0.1976, "step": 28454 }, { "epoch": 0.49462010464287576, "grad_norm": 1.8600964427032096, "learning_rate": 5.329986884589161e-07, "loss": 0.2728, "step": 28455 }, { "epoch": 0.4946374871803786, "grad_norm": 1.7282354182306685, "learning_rate": 5.329706004411663e-07, "loss": 0.3177, "step": 28456 }, { "epoch": 0.4946548697178814, "grad_norm": 2.345018870583917, "learning_rate": 5.329425123189149e-07, "loss": 0.2813, "step": 28457 }, { "epoch": 0.49467225225538425, "grad_norm": 1.1815131299368917, "learning_rate": 5.329144240922508e-07, "loss": 0.1966, "step": 28458 }, { "epoch": 0.4946896347928871, "grad_norm": 1.781411373052243, "learning_rate": 5.328863357612632e-07, "loss": 0.1897, "step": 28459 }, { "epoch": 0.4947070173303899, "grad_norm": 1.1438536988077115, "learning_rate": 5.328582473260408e-07, "loss": 0.3169, "step": 28460 }, { "epoch": 0.49472439986789274, "grad_norm": 2.3029409621035435, "learning_rate": 5.328301587866731e-07, "loss": 0.3341, "step": 28461 }, { "epoch": 0.4947417824053955, "grad_norm": 2.151014871139252, "learning_rate": 5.328020701432487e-07, "loss": 0.192, "step": 28462 }, { "epoch": 0.49475916494289834, "grad_norm": 1.4811734111862167, "learning_rate": 5.327739813958569e-07, "loss": 0.4707, "step": 28463 }, { "epoch": 0.4947765474804012, "grad_norm": 1.4533865053966126, "learning_rate": 5.327458925445867e-07, "loss": 0.3117, "step": 28464 }, { "epoch": 0.494793930017904, "grad_norm": 1.1769687419952697, "learning_rate": 5.327178035895269e-07, "loss": 0.2851, "step": 28465 }, { "epoch": 0.49481131255540683, "grad_norm": 1.9976415788378654, "learning_rate": 5.326897145307667e-07, "loss": 0.2948, "step": 28466 }, { "epoch": 0.49482869509290967, "grad_norm": 1.9639774914503232, "learning_rate": 5.326616253683952e-07, "loss": 0.2487, "step": 28467 }, { "epoch": 0.4948460776304125, "grad_norm": 1.747368121968681, "learning_rate": 5.326335361025014e-07, "loss": 0.4384, "step": 28468 }, { "epoch": 0.4948634601679153, "grad_norm": 1.759579087274414, "learning_rate": 5.326054467331742e-07, "loss": 0.1828, "step": 28469 }, { "epoch": 0.49488084270541816, "grad_norm": 1.8165001832192431, "learning_rate": 5.325773572605028e-07, "loss": 0.3555, "step": 28470 }, { "epoch": 0.494898225242921, "grad_norm": 0.9462528640625382, "learning_rate": 5.32549267684576e-07, "loss": 0.185, "step": 28471 }, { "epoch": 0.49491560778042376, "grad_norm": 1.5458309533381107, "learning_rate": 5.32521178005483e-07, "loss": 0.2109, "step": 28472 }, { "epoch": 0.4949329903179266, "grad_norm": 0.9471931111930694, "learning_rate": 5.324930882233128e-07, "loss": 0.3307, "step": 28473 }, { "epoch": 0.4949503728554294, "grad_norm": 0.9863748906360271, "learning_rate": 5.324649983381546e-07, "loss": 0.1952, "step": 28474 }, { "epoch": 0.49496775539293225, "grad_norm": 2.0079299763347205, "learning_rate": 5.324369083500971e-07, "loss": 0.3826, "step": 28475 }, { "epoch": 0.4949851379304351, "grad_norm": 2.217576419949659, "learning_rate": 5.324088182592296e-07, "loss": 0.2119, "step": 28476 }, { "epoch": 0.4950025204679379, "grad_norm": 1.395227102368588, "learning_rate": 5.32380728065641e-07, "loss": 0.3096, "step": 28477 }, { "epoch": 0.49501990300544074, "grad_norm": 1.655289789918663, "learning_rate": 5.323526377694203e-07, "loss": 0.2934, "step": 28478 }, { "epoch": 0.4950372855429436, "grad_norm": 1.313970345984973, "learning_rate": 5.323245473706566e-07, "loss": 0.1772, "step": 28479 }, { "epoch": 0.4950546680804464, "grad_norm": 2.3526468353706433, "learning_rate": 5.322964568694391e-07, "loss": 0.4842, "step": 28480 }, { "epoch": 0.49507205061794923, "grad_norm": 1.2083578073443313, "learning_rate": 5.322683662658564e-07, "loss": 0.2623, "step": 28481 }, { "epoch": 0.495089433155452, "grad_norm": 2.19537363028146, "learning_rate": 5.322402755599981e-07, "loss": 0.3193, "step": 28482 }, { "epoch": 0.49510681569295484, "grad_norm": 1.16236916088767, "learning_rate": 5.322121847519529e-07, "loss": 0.1619, "step": 28483 }, { "epoch": 0.49512419823045767, "grad_norm": 1.1813352611155867, "learning_rate": 5.321840938418097e-07, "loss": 0.2874, "step": 28484 }, { "epoch": 0.4951415807679605, "grad_norm": 1.3664979126179921, "learning_rate": 5.321560028296577e-07, "loss": 0.3477, "step": 28485 }, { "epoch": 0.49515896330546333, "grad_norm": 3.7441467243971935, "learning_rate": 5.321279117155861e-07, "loss": 0.3383, "step": 28486 }, { "epoch": 0.49517634584296616, "grad_norm": 1.4650360021761417, "learning_rate": 5.320998204996836e-07, "loss": 0.4721, "step": 28487 }, { "epoch": 0.495193728380469, "grad_norm": 1.6745864551338874, "learning_rate": 5.320717291820396e-07, "loss": 0.2419, "step": 28488 }, { "epoch": 0.4952111109179718, "grad_norm": 1.24160498777101, "learning_rate": 5.320436377627428e-07, "loss": 0.2534, "step": 28489 }, { "epoch": 0.49522849345547465, "grad_norm": 1.5440851128882505, "learning_rate": 5.320155462418824e-07, "loss": 0.2693, "step": 28490 }, { "epoch": 0.4952458759929775, "grad_norm": 1.5662694218181952, "learning_rate": 5.319874546195475e-07, "loss": 0.357, "step": 28491 }, { "epoch": 0.49526325853048025, "grad_norm": 1.828561535189507, "learning_rate": 5.31959362895827e-07, "loss": 0.2179, "step": 28492 }, { "epoch": 0.4952806410679831, "grad_norm": 1.3861481156187379, "learning_rate": 5.319312710708101e-07, "loss": 0.3295, "step": 28493 }, { "epoch": 0.4952980236054859, "grad_norm": 2.070573474864365, "learning_rate": 5.319031791445857e-07, "loss": 0.2611, "step": 28494 }, { "epoch": 0.49531540614298875, "grad_norm": 1.5495142099988208, "learning_rate": 5.318750871172428e-07, "loss": 0.2658, "step": 28495 }, { "epoch": 0.4953327886804916, "grad_norm": 1.2419171418050512, "learning_rate": 5.318469949888706e-07, "loss": 0.3197, "step": 28496 }, { "epoch": 0.4953501712179944, "grad_norm": 2.856483948284823, "learning_rate": 5.318189027595579e-07, "loss": 0.3022, "step": 28497 }, { "epoch": 0.49536755375549724, "grad_norm": 2.394490842556548, "learning_rate": 5.317908104293941e-07, "loss": 0.3402, "step": 28498 }, { "epoch": 0.49538493629300007, "grad_norm": 1.6407879497219253, "learning_rate": 5.31762717998468e-07, "loss": 0.301, "step": 28499 }, { "epoch": 0.4954023188305029, "grad_norm": 2.2864223796958325, "learning_rate": 5.317346254668685e-07, "loss": 0.2229, "step": 28500 }, { "epoch": 0.4954197013680057, "grad_norm": 2.80869278288409, "learning_rate": 5.31706532834685e-07, "loss": 0.3828, "step": 28501 }, { "epoch": 0.4954370839055085, "grad_norm": 2.6432232029275387, "learning_rate": 5.316784401020063e-07, "loss": 0.3622, "step": 28502 }, { "epoch": 0.49545446644301133, "grad_norm": 1.8696328003770242, "learning_rate": 5.316503472689215e-07, "loss": 0.3589, "step": 28503 }, { "epoch": 0.49547184898051416, "grad_norm": 1.5553707255067541, "learning_rate": 5.316222543355196e-07, "loss": 0.2112, "step": 28504 }, { "epoch": 0.495489231518017, "grad_norm": 0.9371530553996296, "learning_rate": 5.315941613018899e-07, "loss": 0.1397, "step": 28505 }, { "epoch": 0.4955066140555198, "grad_norm": 1.883977016189158, "learning_rate": 5.315660681681209e-07, "loss": 0.3794, "step": 28506 }, { "epoch": 0.49552399659302265, "grad_norm": 1.609251451995658, "learning_rate": 5.315379749343021e-07, "loss": 0.2024, "step": 28507 }, { "epoch": 0.4955413791305255, "grad_norm": 1.9204556921259888, "learning_rate": 5.315098816005225e-07, "loss": 0.3239, "step": 28508 }, { "epoch": 0.4955587616680283, "grad_norm": 1.4137239372609338, "learning_rate": 5.314817881668709e-07, "loss": 0.2109, "step": 28509 }, { "epoch": 0.49557614420553114, "grad_norm": 2.085397354928034, "learning_rate": 5.314536946334365e-07, "loss": 0.3171, "step": 28510 }, { "epoch": 0.495593526743034, "grad_norm": 1.3167921135625347, "learning_rate": 5.314256010003084e-07, "loss": 0.2615, "step": 28511 }, { "epoch": 0.49561090928053675, "grad_norm": 2.0574587690795507, "learning_rate": 5.313975072675756e-07, "loss": 0.2801, "step": 28512 }, { "epoch": 0.4956282918180396, "grad_norm": 2.3004556402768332, "learning_rate": 5.313694134353271e-07, "loss": 0.4255, "step": 28513 }, { "epoch": 0.4956456743555424, "grad_norm": 1.5630708281988415, "learning_rate": 5.313413195036518e-07, "loss": 0.1492, "step": 28514 }, { "epoch": 0.49566305689304524, "grad_norm": 1.7101501188959733, "learning_rate": 5.313132254726392e-07, "loss": 0.2263, "step": 28515 }, { "epoch": 0.49568043943054807, "grad_norm": 1.0326686052360305, "learning_rate": 5.312851313423779e-07, "loss": 0.3369, "step": 28516 }, { "epoch": 0.4956978219680509, "grad_norm": 1.5109413181258917, "learning_rate": 5.312570371129572e-07, "loss": 0.2699, "step": 28517 }, { "epoch": 0.49571520450555373, "grad_norm": 1.8645645374232178, "learning_rate": 5.31228942784466e-07, "loss": 0.4476, "step": 28518 }, { "epoch": 0.49573258704305656, "grad_norm": 1.8500035200131102, "learning_rate": 5.312008483569934e-07, "loss": 0.1887, "step": 28519 }, { "epoch": 0.4957499695805594, "grad_norm": 1.1842690621659024, "learning_rate": 5.311727538306283e-07, "loss": 0.2472, "step": 28520 }, { "epoch": 0.4957673521180622, "grad_norm": 2.7564093183824276, "learning_rate": 5.311446592054601e-07, "loss": 0.3072, "step": 28521 }, { "epoch": 0.495784734655565, "grad_norm": 1.643892546055954, "learning_rate": 5.311165644815774e-07, "loss": 0.3965, "step": 28522 }, { "epoch": 0.4958021171930678, "grad_norm": 2.4501075923872166, "learning_rate": 5.310884696590699e-07, "loss": 0.4567, "step": 28523 }, { "epoch": 0.49581949973057066, "grad_norm": 2.215878476263906, "learning_rate": 5.310603747380259e-07, "loss": 0.2643, "step": 28524 }, { "epoch": 0.4958368822680735, "grad_norm": 1.7874823466067282, "learning_rate": 5.310322797185349e-07, "loss": 0.2317, "step": 28525 }, { "epoch": 0.4958542648055763, "grad_norm": 3.789044349589576, "learning_rate": 5.310041846006856e-07, "loss": 0.8324, "step": 28526 }, { "epoch": 0.49587164734307915, "grad_norm": 1.4068819606545562, "learning_rate": 5.309760893845675e-07, "loss": 0.228, "step": 28527 }, { "epoch": 0.495889029880582, "grad_norm": 1.4211656374039239, "learning_rate": 5.309479940702694e-07, "loss": 0.2193, "step": 28528 }, { "epoch": 0.4959064124180848, "grad_norm": 2.413882205300523, "learning_rate": 5.309198986578804e-07, "loss": 0.2246, "step": 28529 }, { "epoch": 0.49592379495558764, "grad_norm": 1.4430998757582663, "learning_rate": 5.308918031474895e-07, "loss": 0.4438, "step": 28530 }, { "epoch": 0.49594117749309047, "grad_norm": 1.7202345758324018, "learning_rate": 5.308637075391857e-07, "loss": 0.1705, "step": 28531 }, { "epoch": 0.49595856003059324, "grad_norm": 3.5012315655840665, "learning_rate": 5.308356118330582e-07, "loss": 0.3401, "step": 28532 }, { "epoch": 0.4959759425680961, "grad_norm": 1.3154289734536306, "learning_rate": 5.308075160291959e-07, "loss": 0.2279, "step": 28533 }, { "epoch": 0.4959933251055989, "grad_norm": 2.1669356138179254, "learning_rate": 5.30779420127688e-07, "loss": 0.1813, "step": 28534 }, { "epoch": 0.49601070764310173, "grad_norm": 5.493517339305617, "learning_rate": 5.307513241286234e-07, "loss": 0.5738, "step": 28535 }, { "epoch": 0.49602809018060456, "grad_norm": 1.662159536145315, "learning_rate": 5.307232280320914e-07, "loss": 0.2807, "step": 28536 }, { "epoch": 0.4960454727181074, "grad_norm": 1.9101391487991444, "learning_rate": 5.306951318381807e-07, "loss": 0.2056, "step": 28537 }, { "epoch": 0.4960628552556102, "grad_norm": 1.9441018657528781, "learning_rate": 5.306670355469806e-07, "loss": 0.2657, "step": 28538 }, { "epoch": 0.49608023779311305, "grad_norm": 2.3540661783428245, "learning_rate": 5.306389391585801e-07, "loss": 0.3709, "step": 28539 }, { "epoch": 0.4960976203306159, "grad_norm": 1.2477293840234942, "learning_rate": 5.306108426730684e-07, "loss": 0.259, "step": 28540 }, { "epoch": 0.4961150028681187, "grad_norm": 2.6347494556883824, "learning_rate": 5.305827460905341e-07, "loss": 0.3846, "step": 28541 }, { "epoch": 0.4961323854056215, "grad_norm": 1.9802282313194652, "learning_rate": 5.305546494110667e-07, "loss": 0.2186, "step": 28542 }, { "epoch": 0.4961497679431243, "grad_norm": 1.369212738605174, "learning_rate": 5.30526552634755e-07, "loss": 0.1867, "step": 28543 }, { "epoch": 0.49616715048062715, "grad_norm": 1.644585141583607, "learning_rate": 5.304984557616883e-07, "loss": 0.1955, "step": 28544 }, { "epoch": 0.49618453301813, "grad_norm": 3.2331026454499545, "learning_rate": 5.304703587919553e-07, "loss": 0.2445, "step": 28545 }, { "epoch": 0.4962019155556328, "grad_norm": 1.5855056961450436, "learning_rate": 5.304422617256454e-07, "loss": 0.1793, "step": 28546 }, { "epoch": 0.49621929809313564, "grad_norm": 0.8881183316342023, "learning_rate": 5.304141645628475e-07, "loss": 0.149, "step": 28547 }, { "epoch": 0.49623668063063847, "grad_norm": 1.4675568268457706, "learning_rate": 5.303860673036506e-07, "loss": 0.1787, "step": 28548 }, { "epoch": 0.4962540631681413, "grad_norm": 1.6602295618916987, "learning_rate": 5.303579699481439e-07, "loss": 0.1787, "step": 28549 }, { "epoch": 0.49627144570564413, "grad_norm": 1.6347928445538595, "learning_rate": 5.303298724964164e-07, "loss": 0.1599, "step": 28550 }, { "epoch": 0.49628882824314696, "grad_norm": 2.1475397411684596, "learning_rate": 5.303017749485569e-07, "loss": 0.302, "step": 28551 }, { "epoch": 0.49630621078064974, "grad_norm": 1.9473572479776466, "learning_rate": 5.30273677304655e-07, "loss": 0.3351, "step": 28552 }, { "epoch": 0.49632359331815257, "grad_norm": 1.9983585652067446, "learning_rate": 5.302455795647994e-07, "loss": 0.2661, "step": 28553 }, { "epoch": 0.4963409758556554, "grad_norm": 1.2459345615719446, "learning_rate": 5.302174817290792e-07, "loss": 0.1561, "step": 28554 }, { "epoch": 0.4963583583931582, "grad_norm": 1.6857666334745878, "learning_rate": 5.301893837975833e-07, "loss": 0.1566, "step": 28555 }, { "epoch": 0.49637574093066106, "grad_norm": 3.0064964485442767, "learning_rate": 5.301612857704011e-07, "loss": 0.4228, "step": 28556 }, { "epoch": 0.4963931234681639, "grad_norm": 1.3725405423528934, "learning_rate": 5.301331876476214e-07, "loss": 0.1863, "step": 28557 }, { "epoch": 0.4964105060056667, "grad_norm": 1.3595377524188714, "learning_rate": 5.301050894293333e-07, "loss": 0.1695, "step": 28558 }, { "epoch": 0.49642788854316955, "grad_norm": 1.6800129025386983, "learning_rate": 5.30076991115626e-07, "loss": 0.1948, "step": 28559 }, { "epoch": 0.4964452710806724, "grad_norm": 0.9958782758505924, "learning_rate": 5.300488927065882e-07, "loss": 0.2313, "step": 28560 }, { "epoch": 0.49646265361817515, "grad_norm": 1.2711694706169556, "learning_rate": 5.300207942023095e-07, "loss": 0.2913, "step": 28561 }, { "epoch": 0.496480036155678, "grad_norm": 1.585878036225063, "learning_rate": 5.299926956028786e-07, "loss": 0.3358, "step": 28562 }, { "epoch": 0.4964974186931808, "grad_norm": 1.0050648956094623, "learning_rate": 5.299645969083847e-07, "loss": 0.3165, "step": 28563 }, { "epoch": 0.49651480123068364, "grad_norm": 1.8581522555787462, "learning_rate": 5.299364981189166e-07, "loss": 0.2855, "step": 28564 }, { "epoch": 0.4965321837681865, "grad_norm": 1.172907445739784, "learning_rate": 5.299083992345637e-07, "loss": 0.2451, "step": 28565 }, { "epoch": 0.4965495663056893, "grad_norm": 1.8371510526058359, "learning_rate": 5.298803002554149e-07, "loss": 0.3183, "step": 28566 }, { "epoch": 0.49656694884319214, "grad_norm": 1.6480373478935268, "learning_rate": 5.298522011815592e-07, "loss": 0.3698, "step": 28567 }, { "epoch": 0.49658433138069497, "grad_norm": 1.5551452099994625, "learning_rate": 5.298241020130858e-07, "loss": 0.4948, "step": 28568 }, { "epoch": 0.4966017139181978, "grad_norm": 2.1637267744200273, "learning_rate": 5.297960027500838e-07, "loss": 0.2183, "step": 28569 }, { "epoch": 0.4966190964557006, "grad_norm": 1.9055919798021093, "learning_rate": 5.297679033926418e-07, "loss": 0.2594, "step": 28570 }, { "epoch": 0.4966364789932034, "grad_norm": 1.4569364382691823, "learning_rate": 5.297398039408496e-07, "loss": 0.3865, "step": 28571 }, { "epoch": 0.49665386153070623, "grad_norm": 1.1358526723954863, "learning_rate": 5.297117043947957e-07, "loss": 0.2667, "step": 28572 }, { "epoch": 0.49667124406820906, "grad_norm": 1.1391267281398876, "learning_rate": 5.296836047545694e-07, "loss": 0.385, "step": 28573 }, { "epoch": 0.4966886266057119, "grad_norm": 1.5641706764797467, "learning_rate": 5.296555050202597e-07, "loss": 0.2062, "step": 28574 }, { "epoch": 0.4967060091432147, "grad_norm": 2.811904626180726, "learning_rate": 5.296274051919558e-07, "loss": 0.3911, "step": 28575 }, { "epoch": 0.49672339168071755, "grad_norm": 1.5620863155389473, "learning_rate": 5.295993052697464e-07, "loss": 0.2126, "step": 28576 }, { "epoch": 0.4967407742182204, "grad_norm": 1.5708339649110619, "learning_rate": 5.295712052537209e-07, "loss": 0.2969, "step": 28577 }, { "epoch": 0.4967581567557232, "grad_norm": 2.0636099656041478, "learning_rate": 5.295431051439682e-07, "loss": 0.1893, "step": 28578 }, { "epoch": 0.49677553929322604, "grad_norm": 1.2447026563568493, "learning_rate": 5.295150049405775e-07, "loss": 0.2435, "step": 28579 }, { "epoch": 0.4967929218307289, "grad_norm": 1.5628543988742813, "learning_rate": 5.294869046436378e-07, "loss": 0.3756, "step": 28580 }, { "epoch": 0.49681030436823165, "grad_norm": 1.4118829567597917, "learning_rate": 5.294588042532382e-07, "loss": 0.2435, "step": 28581 }, { "epoch": 0.4968276869057345, "grad_norm": 1.1205392484182797, "learning_rate": 5.294307037694677e-07, "loss": 0.293, "step": 28582 }, { "epoch": 0.4968450694432373, "grad_norm": 1.4737413881630619, "learning_rate": 5.294026031924153e-07, "loss": 0.3364, "step": 28583 }, { "epoch": 0.49686245198074014, "grad_norm": 1.7520248250822432, "learning_rate": 5.293745025221702e-07, "loss": 0.2192, "step": 28584 }, { "epoch": 0.49687983451824297, "grad_norm": 1.0175803360823923, "learning_rate": 5.293464017588214e-07, "loss": 0.2059, "step": 28585 }, { "epoch": 0.4968972170557458, "grad_norm": 2.0474257713379287, "learning_rate": 5.29318300902458e-07, "loss": 0.4266, "step": 28586 }, { "epoch": 0.49691459959324863, "grad_norm": 1.7623943133527213, "learning_rate": 5.292901999531691e-07, "loss": 0.3697, "step": 28587 }, { "epoch": 0.49693198213075146, "grad_norm": 1.5686110797510517, "learning_rate": 5.292620989110437e-07, "loss": 0.3691, "step": 28588 }, { "epoch": 0.4969493646682543, "grad_norm": 1.0903929620774127, "learning_rate": 5.292339977761708e-07, "loss": 0.285, "step": 28589 }, { "epoch": 0.4969667472057571, "grad_norm": 1.4132945303053, "learning_rate": 5.292058965486398e-07, "loss": 0.3248, "step": 28590 }, { "epoch": 0.4969841297432599, "grad_norm": 1.7824714611050505, "learning_rate": 5.291777952285392e-07, "loss": 0.3122, "step": 28591 }, { "epoch": 0.4970015122807627, "grad_norm": 2.806218498048571, "learning_rate": 5.291496938159587e-07, "loss": 0.2614, "step": 28592 }, { "epoch": 0.49701889481826556, "grad_norm": 2.4167567260914575, "learning_rate": 5.291215923109868e-07, "loss": 0.3564, "step": 28593 }, { "epoch": 0.4970362773557684, "grad_norm": 1.356217175791144, "learning_rate": 5.29093490713713e-07, "loss": 0.1775, "step": 28594 }, { "epoch": 0.4970536598932712, "grad_norm": 1.2718166930002106, "learning_rate": 5.290653890242261e-07, "loss": 0.3346, "step": 28595 }, { "epoch": 0.49707104243077405, "grad_norm": 2.22676777584462, "learning_rate": 5.290372872426153e-07, "loss": 0.1863, "step": 28596 }, { "epoch": 0.4970884249682769, "grad_norm": 1.140622364796374, "learning_rate": 5.290091853689697e-07, "loss": 0.1396, "step": 28597 }, { "epoch": 0.4971058075057797, "grad_norm": 1.244254687342998, "learning_rate": 5.289810834033782e-07, "loss": 0.4546, "step": 28598 }, { "epoch": 0.49712319004328254, "grad_norm": 1.4012483284188213, "learning_rate": 5.2895298134593e-07, "loss": 0.3294, "step": 28599 }, { "epoch": 0.49714057258078537, "grad_norm": 1.2708868534580968, "learning_rate": 5.289248791967143e-07, "loss": 0.1743, "step": 28600 }, { "epoch": 0.49715795511828814, "grad_norm": 1.6096150105007387, "learning_rate": 5.288967769558198e-07, "loss": 0.236, "step": 28601 }, { "epoch": 0.49717533765579097, "grad_norm": 0.9397671991286456, "learning_rate": 5.28868674623336e-07, "loss": 0.2774, "step": 28602 }, { "epoch": 0.4971927201932938, "grad_norm": 2.4690968295861313, "learning_rate": 5.288405721993517e-07, "loss": 0.4625, "step": 28603 }, { "epoch": 0.49721010273079663, "grad_norm": 1.8168710411566567, "learning_rate": 5.288124696839559e-07, "loss": 0.4464, "step": 28604 }, { "epoch": 0.49722748526829946, "grad_norm": 1.5278422769367852, "learning_rate": 5.287843670772378e-07, "loss": 0.1852, "step": 28605 }, { "epoch": 0.4972448678058023, "grad_norm": 1.061070226322947, "learning_rate": 5.287562643792868e-07, "loss": 0.3064, "step": 28606 }, { "epoch": 0.4972622503433051, "grad_norm": 3.477318670761419, "learning_rate": 5.287281615901913e-07, "loss": 0.2232, "step": 28607 }, { "epoch": 0.49727963288080795, "grad_norm": 2.8808602392457874, "learning_rate": 5.287000587100408e-07, "loss": 0.2032, "step": 28608 }, { "epoch": 0.4972970154183108, "grad_norm": 1.3221453290260294, "learning_rate": 5.286719557389245e-07, "loss": 0.3991, "step": 28609 }, { "epoch": 0.4973143979558136, "grad_norm": 1.463008610151665, "learning_rate": 5.28643852676931e-07, "loss": 0.2846, "step": 28610 }, { "epoch": 0.4973317804933164, "grad_norm": 2.8202077055134294, "learning_rate": 5.286157495241498e-07, "loss": 0.2124, "step": 28611 }, { "epoch": 0.4973491630308192, "grad_norm": 2.701827454251134, "learning_rate": 5.285876462806697e-07, "loss": 0.3675, "step": 28612 }, { "epoch": 0.49736654556832205, "grad_norm": 1.574365441810564, "learning_rate": 5.2855954294658e-07, "loss": 0.2611, "step": 28613 }, { "epoch": 0.4973839281058249, "grad_norm": 2.0903035698892496, "learning_rate": 5.285314395219697e-07, "loss": 0.3287, "step": 28614 }, { "epoch": 0.4974013106433277, "grad_norm": 1.5432254182824088, "learning_rate": 5.285033360069276e-07, "loss": 0.2105, "step": 28615 }, { "epoch": 0.49741869318083054, "grad_norm": 1.0847497629680156, "learning_rate": 5.284752324015433e-07, "loss": 0.1893, "step": 28616 }, { "epoch": 0.49743607571833337, "grad_norm": 1.1418319040647458, "learning_rate": 5.284471287059056e-07, "loss": 0.2404, "step": 28617 }, { "epoch": 0.4974534582558362, "grad_norm": 1.3857184313243212, "learning_rate": 5.284190249201035e-07, "loss": 0.2496, "step": 28618 }, { "epoch": 0.49747084079333903, "grad_norm": 2.62198327900056, "learning_rate": 5.283909210442261e-07, "loss": 0.2092, "step": 28619 }, { "epoch": 0.49748822333084186, "grad_norm": 1.0790920571516276, "learning_rate": 5.283628170783624e-07, "loss": 0.2554, "step": 28620 }, { "epoch": 0.49750560586834464, "grad_norm": 1.6408346507504454, "learning_rate": 5.283347130226018e-07, "loss": 0.2792, "step": 28621 }, { "epoch": 0.49752298840584747, "grad_norm": 2.0814060224562843, "learning_rate": 5.283066088770332e-07, "loss": 0.4447, "step": 28622 }, { "epoch": 0.4975403709433503, "grad_norm": 2.0665731926561848, "learning_rate": 5.282785046417456e-07, "loss": 0.2916, "step": 28623 }, { "epoch": 0.4975577534808531, "grad_norm": 0.9709432183176123, "learning_rate": 5.282504003168279e-07, "loss": 0.2084, "step": 28624 }, { "epoch": 0.49757513601835596, "grad_norm": 1.4088046272284844, "learning_rate": 5.282222959023698e-07, "loss": 0.3377, "step": 28625 }, { "epoch": 0.4975925185558588, "grad_norm": 2.4753671929378274, "learning_rate": 5.281941913984596e-07, "loss": 0.2172, "step": 28626 }, { "epoch": 0.4976099010933616, "grad_norm": 1.3909795164223353, "learning_rate": 5.281660868051871e-07, "loss": 0.2767, "step": 28627 }, { "epoch": 0.49762728363086445, "grad_norm": 0.9899033295453834, "learning_rate": 5.281379821226408e-07, "loss": 0.2595, "step": 28628 }, { "epoch": 0.4976446661683673, "grad_norm": 1.6383109926999377, "learning_rate": 5.281098773509102e-07, "loss": 0.2676, "step": 28629 }, { "epoch": 0.4976620487058701, "grad_norm": 1.563288853857564, "learning_rate": 5.28081772490084e-07, "loss": 0.3222, "step": 28630 }, { "epoch": 0.4976794312433729, "grad_norm": 1.1383108301220763, "learning_rate": 5.280536675402516e-07, "loss": 0.4677, "step": 28631 }, { "epoch": 0.4976968137808757, "grad_norm": 2.151544058117886, "learning_rate": 5.28025562501502e-07, "loss": 0.3034, "step": 28632 }, { "epoch": 0.49771419631837854, "grad_norm": 1.6017424588774944, "learning_rate": 5.279974573739242e-07, "loss": 0.3194, "step": 28633 }, { "epoch": 0.4977315788558814, "grad_norm": 2.01297676039745, "learning_rate": 5.279693521576073e-07, "loss": 0.2707, "step": 28634 }, { "epoch": 0.4977489613933842, "grad_norm": 1.6704900005893244, "learning_rate": 5.279412468526405e-07, "loss": 0.3044, "step": 28635 }, { "epoch": 0.49776634393088703, "grad_norm": 2.2302647964348465, "learning_rate": 5.279131414591127e-07, "loss": 0.2447, "step": 28636 }, { "epoch": 0.49778372646838986, "grad_norm": 1.2034688961756994, "learning_rate": 5.278850359771131e-07, "loss": 0.4189, "step": 28637 }, { "epoch": 0.4978011090058927, "grad_norm": 1.5704417558422124, "learning_rate": 5.278569304067307e-07, "loss": 0.2909, "step": 28638 }, { "epoch": 0.4978184915433955, "grad_norm": 1.2440489563021462, "learning_rate": 5.278288247480547e-07, "loss": 0.2282, "step": 28639 }, { "epoch": 0.49783587408089836, "grad_norm": 1.7805544610838202, "learning_rate": 5.27800719001174e-07, "loss": 0.2378, "step": 28640 }, { "epoch": 0.49785325661840113, "grad_norm": 1.2853445981480327, "learning_rate": 5.277726131661778e-07, "loss": 0.149, "step": 28641 }, { "epoch": 0.49787063915590396, "grad_norm": 2.0842041534591464, "learning_rate": 5.277445072431552e-07, "loss": 0.2418, "step": 28642 }, { "epoch": 0.4978880216934068, "grad_norm": 1.1337246627767195, "learning_rate": 5.277164012321953e-07, "loss": 0.1849, "step": 28643 }, { "epoch": 0.4979054042309096, "grad_norm": 2.624114648920317, "learning_rate": 5.276882951333872e-07, "loss": 0.2762, "step": 28644 }, { "epoch": 0.49792278676841245, "grad_norm": 1.8571646036934422, "learning_rate": 5.276601889468197e-07, "loss": 0.2755, "step": 28645 }, { "epoch": 0.4979401693059153, "grad_norm": 0.9757329527549594, "learning_rate": 5.276320826725823e-07, "loss": 0.2518, "step": 28646 }, { "epoch": 0.4979575518434181, "grad_norm": 1.3609401945577193, "learning_rate": 5.276039763107639e-07, "loss": 0.2054, "step": 28647 }, { "epoch": 0.49797493438092094, "grad_norm": 2.5142540974406096, "learning_rate": 5.275758698614535e-07, "loss": 0.3121, "step": 28648 }, { "epoch": 0.49799231691842377, "grad_norm": 1.6735205042486674, "learning_rate": 5.275477633247401e-07, "loss": 0.2047, "step": 28649 }, { "epoch": 0.4980096994559266, "grad_norm": 3.721126329055903, "learning_rate": 5.275196567007133e-07, "loss": 0.3189, "step": 28650 }, { "epoch": 0.4980270819934294, "grad_norm": 1.3190384731126212, "learning_rate": 5.274915499894615e-07, "loss": 0.2032, "step": 28651 }, { "epoch": 0.4980444645309322, "grad_norm": 3.7698132501983106, "learning_rate": 5.274634431910744e-07, "loss": 0.2338, "step": 28652 }, { "epoch": 0.49806184706843504, "grad_norm": 1.6114661702029907, "learning_rate": 5.274353363056405e-07, "loss": 0.4155, "step": 28653 }, { "epoch": 0.49807922960593787, "grad_norm": 1.2361232552715413, "learning_rate": 5.274072293332495e-07, "loss": 0.4258, "step": 28654 }, { "epoch": 0.4980966121434407, "grad_norm": 1.6229394119096912, "learning_rate": 5.2737912227399e-07, "loss": 0.213, "step": 28655 }, { "epoch": 0.49811399468094353, "grad_norm": 1.174834466010113, "learning_rate": 5.273510151279512e-07, "loss": 0.2701, "step": 28656 }, { "epoch": 0.49813137721844636, "grad_norm": 1.416129606481697, "learning_rate": 5.273229078952223e-07, "loss": 0.2714, "step": 28657 }, { "epoch": 0.4981487597559492, "grad_norm": 1.1436284978652738, "learning_rate": 5.272948005758925e-07, "loss": 0.2123, "step": 28658 }, { "epoch": 0.498166142293452, "grad_norm": 5.096102877901049, "learning_rate": 5.272666931700504e-07, "loss": 0.2279, "step": 28659 }, { "epoch": 0.49818352483095485, "grad_norm": 0.8920190543472551, "learning_rate": 5.272385856777858e-07, "loss": 0.2157, "step": 28660 }, { "epoch": 0.4982009073684576, "grad_norm": 0.9980165631372517, "learning_rate": 5.27210478099187e-07, "loss": 0.282, "step": 28661 }, { "epoch": 0.49821828990596045, "grad_norm": 2.434064789532527, "learning_rate": 5.271823704343438e-07, "loss": 0.2576, "step": 28662 }, { "epoch": 0.4982356724434633, "grad_norm": 1.7546273398214778, "learning_rate": 5.271542626833448e-07, "loss": 0.1438, "step": 28663 }, { "epoch": 0.4982530549809661, "grad_norm": 1.8440821555689746, "learning_rate": 5.271261548462794e-07, "loss": 0.4724, "step": 28664 }, { "epoch": 0.49827043751846894, "grad_norm": 0.7482424136338963, "learning_rate": 5.270980469232364e-07, "loss": 0.1499, "step": 28665 }, { "epoch": 0.4982878200559718, "grad_norm": 2.706742508271483, "learning_rate": 5.270699389143052e-07, "loss": 0.4356, "step": 28666 }, { "epoch": 0.4983052025934746, "grad_norm": 1.558273690415427, "learning_rate": 5.270418308195746e-07, "loss": 0.2536, "step": 28667 }, { "epoch": 0.49832258513097744, "grad_norm": 2.1518900277288155, "learning_rate": 5.270137226391338e-07, "loss": 0.4558, "step": 28668 }, { "epoch": 0.49833996766848027, "grad_norm": 1.495363309719929, "learning_rate": 5.269856143730719e-07, "loss": 0.2055, "step": 28669 }, { "epoch": 0.4983573502059831, "grad_norm": 1.5143516766954246, "learning_rate": 5.269575060214779e-07, "loss": 0.1448, "step": 28670 }, { "epoch": 0.49837473274348587, "grad_norm": 2.169034711701727, "learning_rate": 5.269293975844412e-07, "loss": 0.2935, "step": 28671 }, { "epoch": 0.4983921152809887, "grad_norm": 1.8141082183639965, "learning_rate": 5.269012890620507e-07, "loss": 0.2042, "step": 28672 }, { "epoch": 0.49840949781849153, "grad_norm": 1.4258679653222066, "learning_rate": 5.268731804543954e-07, "loss": 0.1424, "step": 28673 }, { "epoch": 0.49842688035599436, "grad_norm": 1.954426400273956, "learning_rate": 5.268450717615644e-07, "loss": 0.2772, "step": 28674 }, { "epoch": 0.4984442628934972, "grad_norm": 1.6426984198248655, "learning_rate": 5.268169629836469e-07, "loss": 0.2055, "step": 28675 }, { "epoch": 0.498461645431, "grad_norm": 1.9332500235061107, "learning_rate": 5.267888541207319e-07, "loss": 0.3209, "step": 28676 }, { "epoch": 0.49847902796850285, "grad_norm": 1.3435493446700997, "learning_rate": 5.267607451729085e-07, "loss": 0.2214, "step": 28677 }, { "epoch": 0.4984964105060057, "grad_norm": 1.3893033742375407, "learning_rate": 5.26732636140266e-07, "loss": 0.2836, "step": 28678 }, { "epoch": 0.4985137930435085, "grad_norm": 1.89368953994798, "learning_rate": 5.267045270228934e-07, "loss": 0.3231, "step": 28679 }, { "epoch": 0.49853117558101134, "grad_norm": 1.7733515841654865, "learning_rate": 5.266764178208793e-07, "loss": 0.3411, "step": 28680 }, { "epoch": 0.4985485581185141, "grad_norm": 1.2657314303686715, "learning_rate": 5.266483085343136e-07, "loss": 0.2923, "step": 28681 }, { "epoch": 0.49856594065601695, "grad_norm": 0.8922278250915616, "learning_rate": 5.266201991632849e-07, "loss": 0.3879, "step": 28682 }, { "epoch": 0.4985833231935198, "grad_norm": 1.5275992489081425, "learning_rate": 5.265920897078823e-07, "loss": 0.238, "step": 28683 }, { "epoch": 0.4986007057310226, "grad_norm": 1.1207134739037008, "learning_rate": 5.265639801681951e-07, "loss": 0.279, "step": 28684 }, { "epoch": 0.49861808826852544, "grad_norm": 2.56272517675181, "learning_rate": 5.265358705443123e-07, "loss": 0.4039, "step": 28685 }, { "epoch": 0.49863547080602827, "grad_norm": 1.9552736994126687, "learning_rate": 5.265077608363229e-07, "loss": 0.2342, "step": 28686 }, { "epoch": 0.4986528533435311, "grad_norm": 1.9515155035859348, "learning_rate": 5.264796510443161e-07, "loss": 0.3842, "step": 28687 }, { "epoch": 0.49867023588103393, "grad_norm": 1.6073822320860314, "learning_rate": 5.26451541168381e-07, "loss": 0.23, "step": 28688 }, { "epoch": 0.49868761841853676, "grad_norm": 1.9895414368792694, "learning_rate": 5.264234312086067e-07, "loss": 0.2618, "step": 28689 }, { "epoch": 0.4987050009560396, "grad_norm": 1.3759104841671075, "learning_rate": 5.26395321165082e-07, "loss": 0.3523, "step": 28690 }, { "epoch": 0.49872238349354236, "grad_norm": 1.332326959164369, "learning_rate": 5.263672110378965e-07, "loss": 0.3333, "step": 28691 }, { "epoch": 0.4987397660310452, "grad_norm": 1.7947235853223258, "learning_rate": 5.263391008271391e-07, "loss": 0.1521, "step": 28692 }, { "epoch": 0.498757148568548, "grad_norm": 1.9220996807145752, "learning_rate": 5.263109905328987e-07, "loss": 0.3059, "step": 28693 }, { "epoch": 0.49877453110605086, "grad_norm": 2.622963570807758, "learning_rate": 5.262828801552644e-07, "loss": 0.3104, "step": 28694 }, { "epoch": 0.4987919136435537, "grad_norm": 1.9972922169092213, "learning_rate": 5.262547696943259e-07, "loss": 0.5852, "step": 28695 }, { "epoch": 0.4988092961810565, "grad_norm": 1.7894957151548823, "learning_rate": 5.262266591501715e-07, "loss": 0.2343, "step": 28696 }, { "epoch": 0.49882667871855935, "grad_norm": 1.3668574791760952, "learning_rate": 5.261985485228907e-07, "loss": 0.3651, "step": 28697 }, { "epoch": 0.4988440612560622, "grad_norm": 1.574887472908054, "learning_rate": 5.261704378125726e-07, "loss": 0.2019, "step": 28698 }, { "epoch": 0.498861443793565, "grad_norm": 1.8389149513194398, "learning_rate": 5.261423270193062e-07, "loss": 0.2268, "step": 28699 }, { "epoch": 0.4988788263310678, "grad_norm": 3.8259144044811686, "learning_rate": 5.261142161431805e-07, "loss": 0.334, "step": 28700 }, { "epoch": 0.4988962088685706, "grad_norm": 1.9594074826595347, "learning_rate": 5.260861051842849e-07, "loss": 0.2682, "step": 28701 }, { "epoch": 0.49891359140607344, "grad_norm": 1.9380874008700415, "learning_rate": 5.260579941427083e-07, "loss": 0.2767, "step": 28702 }, { "epoch": 0.4989309739435763, "grad_norm": 1.6232090108584467, "learning_rate": 5.260298830185399e-07, "loss": 0.2686, "step": 28703 }, { "epoch": 0.4989483564810791, "grad_norm": 1.4029650374698428, "learning_rate": 5.260017718118687e-07, "loss": 0.2913, "step": 28704 }, { "epoch": 0.49896573901858193, "grad_norm": 1.2806045952456815, "learning_rate": 5.259736605227836e-07, "loss": 0.1489, "step": 28705 }, { "epoch": 0.49898312155608476, "grad_norm": 1.1150761100975253, "learning_rate": 5.259455491513741e-07, "loss": 0.2149, "step": 28706 }, { "epoch": 0.4990005040935876, "grad_norm": 1.7079071538494983, "learning_rate": 5.259174376977292e-07, "loss": 0.268, "step": 28707 }, { "epoch": 0.4990178866310904, "grad_norm": 2.849611819549461, "learning_rate": 5.258893261619378e-07, "loss": 0.2467, "step": 28708 }, { "epoch": 0.49903526916859325, "grad_norm": 1.281858071856923, "learning_rate": 5.258612145440891e-07, "loss": 0.2807, "step": 28709 }, { "epoch": 0.49905265170609603, "grad_norm": 1.0700237058809678, "learning_rate": 5.258331028442725e-07, "loss": 0.2584, "step": 28710 }, { "epoch": 0.49907003424359886, "grad_norm": 1.2035210428447216, "learning_rate": 5.258049910625765e-07, "loss": 0.306, "step": 28711 }, { "epoch": 0.4990874167811017, "grad_norm": 1.4387765427035872, "learning_rate": 5.257768791990907e-07, "loss": 0.2724, "step": 28712 }, { "epoch": 0.4991047993186045, "grad_norm": 1.3025792278412969, "learning_rate": 5.257487672539039e-07, "loss": 0.2192, "step": 28713 }, { "epoch": 0.49912218185610735, "grad_norm": 1.6426558185761326, "learning_rate": 5.257206552271056e-07, "loss": 0.2627, "step": 28714 }, { "epoch": 0.4991395643936102, "grad_norm": 1.7796705687001593, "learning_rate": 5.256925431187845e-07, "loss": 0.3839, "step": 28715 }, { "epoch": 0.499156946931113, "grad_norm": 1.0917331406738322, "learning_rate": 5.256644309290298e-07, "loss": 0.2144, "step": 28716 }, { "epoch": 0.49917432946861584, "grad_norm": 1.4902016715809798, "learning_rate": 5.256363186579306e-07, "loss": 0.3173, "step": 28717 }, { "epoch": 0.49919171200611867, "grad_norm": 1.6062945384498293, "learning_rate": 5.256082063055761e-07, "loss": 0.2286, "step": 28718 }, { "epoch": 0.4992090945436215, "grad_norm": 1.5408082704735695, "learning_rate": 5.255800938720554e-07, "loss": 0.4168, "step": 28719 }, { "epoch": 0.4992264770811243, "grad_norm": 2.0335184148550365, "learning_rate": 5.255519813574576e-07, "loss": 0.3686, "step": 28720 }, { "epoch": 0.4992438596186271, "grad_norm": 1.187491019407298, "learning_rate": 5.255238687618716e-07, "loss": 0.1758, "step": 28721 }, { "epoch": 0.49926124215612994, "grad_norm": 3.644699360110773, "learning_rate": 5.254957560853867e-07, "loss": 0.9802, "step": 28722 }, { "epoch": 0.49927862469363277, "grad_norm": 1.8191720808277698, "learning_rate": 5.25467643328092e-07, "loss": 0.2564, "step": 28723 }, { "epoch": 0.4992960072311356, "grad_norm": 1.8676582012749354, "learning_rate": 5.254395304900767e-07, "loss": 0.1725, "step": 28724 }, { "epoch": 0.4993133897686384, "grad_norm": 1.3638924007289632, "learning_rate": 5.254114175714295e-07, "loss": 0.2823, "step": 28725 }, { "epoch": 0.49933077230614126, "grad_norm": 1.1127493730990223, "learning_rate": 5.253833045722399e-07, "loss": 0.2534, "step": 28726 }, { "epoch": 0.4993481548436441, "grad_norm": 2.5399199344132, "learning_rate": 5.253551914925968e-07, "loss": 0.2338, "step": 28727 }, { "epoch": 0.4993655373811469, "grad_norm": 1.3224578510102212, "learning_rate": 5.253270783325897e-07, "loss": 0.4975, "step": 28728 }, { "epoch": 0.49938291991864975, "grad_norm": 1.3393303590712795, "learning_rate": 5.252989650923072e-07, "loss": 0.2521, "step": 28729 }, { "epoch": 0.4994003024561525, "grad_norm": 1.8175067823131763, "learning_rate": 5.252708517718385e-07, "loss": 0.2443, "step": 28730 }, { "epoch": 0.49941768499365535, "grad_norm": 2.602214085475679, "learning_rate": 5.252427383712728e-07, "loss": 0.2864, "step": 28731 }, { "epoch": 0.4994350675311582, "grad_norm": 2.586018031553919, "learning_rate": 5.252146248906995e-07, "loss": 0.3721, "step": 28732 }, { "epoch": 0.499452450068661, "grad_norm": 1.0986129463322118, "learning_rate": 5.251865113302072e-07, "loss": 0.2749, "step": 28733 }, { "epoch": 0.49946983260616384, "grad_norm": 0.9865529124077345, "learning_rate": 5.251583976898851e-07, "loss": 0.2392, "step": 28734 }, { "epoch": 0.4994872151436667, "grad_norm": 1.488935830028381, "learning_rate": 5.251302839698227e-07, "loss": 0.1968, "step": 28735 }, { "epoch": 0.4995045976811695, "grad_norm": 1.1270164161512797, "learning_rate": 5.251021701701087e-07, "loss": 0.3018, "step": 28736 }, { "epoch": 0.49952198021867233, "grad_norm": 1.9391595521875795, "learning_rate": 5.250740562908324e-07, "loss": 0.2534, "step": 28737 }, { "epoch": 0.49953936275617516, "grad_norm": 1.6424797906500588, "learning_rate": 5.250459423320828e-07, "loss": 0.3766, "step": 28738 }, { "epoch": 0.499556745293678, "grad_norm": 0.9992407854497987, "learning_rate": 5.250178282939493e-07, "loss": 0.2304, "step": 28739 }, { "epoch": 0.49957412783118077, "grad_norm": 3.019974188395179, "learning_rate": 5.249897141765205e-07, "loss": 0.4172, "step": 28740 }, { "epoch": 0.4995915103686836, "grad_norm": 1.2197811403488887, "learning_rate": 5.249615999798859e-07, "loss": 0.1416, "step": 28741 }, { "epoch": 0.49960889290618643, "grad_norm": 1.6023836116145176, "learning_rate": 5.249334857041345e-07, "loss": 0.4969, "step": 28742 }, { "epoch": 0.49962627544368926, "grad_norm": 3.2483604441061202, "learning_rate": 5.249053713493554e-07, "loss": 0.3004, "step": 28743 }, { "epoch": 0.4996436579811921, "grad_norm": 1.026267284229681, "learning_rate": 5.248772569156376e-07, "loss": 0.2631, "step": 28744 }, { "epoch": 0.4996610405186949, "grad_norm": 1.0981019032246235, "learning_rate": 5.248491424030706e-07, "loss": 0.2713, "step": 28745 }, { "epoch": 0.49967842305619775, "grad_norm": 1.3432242241100145, "learning_rate": 5.248210278117429e-07, "loss": 0.2679, "step": 28746 }, { "epoch": 0.4996958055937006, "grad_norm": 1.5856494088850956, "learning_rate": 5.247929131417441e-07, "loss": 0.2807, "step": 28747 }, { "epoch": 0.4997131881312034, "grad_norm": 1.8047941691134648, "learning_rate": 5.247647983931632e-07, "loss": 0.2846, "step": 28748 }, { "epoch": 0.49973057066870624, "grad_norm": 2.0203236996867315, "learning_rate": 5.247366835660893e-07, "loss": 0.2786, "step": 28749 }, { "epoch": 0.499747953206209, "grad_norm": 1.8162113861299785, "learning_rate": 5.247085686606114e-07, "loss": 0.1936, "step": 28750 }, { "epoch": 0.49976533574371185, "grad_norm": 2.146530411003323, "learning_rate": 5.246804536768186e-07, "loss": 0.3835, "step": 28751 }, { "epoch": 0.4997827182812147, "grad_norm": 1.4894291543674614, "learning_rate": 5.246523386148002e-07, "loss": 0.4982, "step": 28752 }, { "epoch": 0.4998001008187175, "grad_norm": 2.2007044464862573, "learning_rate": 5.246242234746452e-07, "loss": 0.3955, "step": 28753 }, { "epoch": 0.49981748335622034, "grad_norm": 2.6542258018108407, "learning_rate": 5.245961082564426e-07, "loss": 0.2248, "step": 28754 }, { "epoch": 0.49983486589372317, "grad_norm": 1.548755568426033, "learning_rate": 5.245679929602818e-07, "loss": 0.1735, "step": 28755 }, { "epoch": 0.499852248431226, "grad_norm": 1.0548537425345894, "learning_rate": 5.245398775862518e-07, "loss": 0.1627, "step": 28756 }, { "epoch": 0.49986963096872883, "grad_norm": 1.9369498749628051, "learning_rate": 5.245117621344415e-07, "loss": 0.2309, "step": 28757 }, { "epoch": 0.49988701350623166, "grad_norm": 1.1290146658095794, "learning_rate": 5.244836466049402e-07, "loss": 0.4371, "step": 28758 }, { "epoch": 0.4999043960437345, "grad_norm": 1.276649692969688, "learning_rate": 5.244555309978371e-07, "loss": 0.2187, "step": 28759 }, { "epoch": 0.49992177858123726, "grad_norm": 1.3712122650847844, "learning_rate": 5.24427415313221e-07, "loss": 0.3036, "step": 28760 }, { "epoch": 0.4999391611187401, "grad_norm": 3.240425505085216, "learning_rate": 5.243992995511813e-07, "loss": 0.3718, "step": 28761 }, { "epoch": 0.4999565436562429, "grad_norm": 2.052277062452371, "learning_rate": 5.243711837118071e-07, "loss": 0.1909, "step": 28762 }, { "epoch": 0.49997392619374575, "grad_norm": 2.5095764418283153, "learning_rate": 5.243430677951873e-07, "loss": 0.2575, "step": 28763 }, { "epoch": 0.4999913087312486, "grad_norm": 1.894231178809735, "learning_rate": 5.243149518014115e-07, "loss": 0.2184, "step": 28764 }, { "epoch": 0.5000086912687514, "grad_norm": 1.4183101206709798, "learning_rate": 5.242868357305681e-07, "loss": 0.1712, "step": 28765 }, { "epoch": 0.5000260738062542, "grad_norm": 1.4384334272097472, "learning_rate": 5.242587195827467e-07, "loss": 0.2463, "step": 28766 }, { "epoch": 0.500043456343757, "grad_norm": 1.8227546003961692, "learning_rate": 5.242306033580363e-07, "loss": 0.217, "step": 28767 }, { "epoch": 0.5000608388812599, "grad_norm": 1.5758270820925828, "learning_rate": 5.242024870565261e-07, "loss": 0.335, "step": 28768 }, { "epoch": 0.5000782214187627, "grad_norm": 3.718772894843757, "learning_rate": 5.24174370678305e-07, "loss": 0.5015, "step": 28769 }, { "epoch": 0.5000956039562655, "grad_norm": 2.156562945226949, "learning_rate": 5.241462542234625e-07, "loss": 0.3497, "step": 28770 }, { "epoch": 0.5001129864937683, "grad_norm": 1.5236098518861685, "learning_rate": 5.241181376920873e-07, "loss": 0.2457, "step": 28771 }, { "epoch": 0.5001303690312712, "grad_norm": 4.409501171418477, "learning_rate": 5.240900210842686e-07, "loss": 0.2743, "step": 28772 }, { "epoch": 0.500147751568774, "grad_norm": 0.9100752692644745, "learning_rate": 5.240619044000956e-07, "loss": 0.2391, "step": 28773 }, { "epoch": 0.5001651341062768, "grad_norm": 1.9838048503235397, "learning_rate": 5.240337876396575e-07, "loss": 0.3772, "step": 28774 }, { "epoch": 0.5001825166437797, "grad_norm": 1.6559187758780116, "learning_rate": 5.240056708030434e-07, "loss": 0.2081, "step": 28775 }, { "epoch": 0.5001998991812825, "grad_norm": 3.568514431083875, "learning_rate": 5.239775538903422e-07, "loss": 0.3162, "step": 28776 }, { "epoch": 0.5002172817187853, "grad_norm": 1.474682400676385, "learning_rate": 5.239494369016433e-07, "loss": 0.1529, "step": 28777 }, { "epoch": 0.5002346642562882, "grad_norm": 2.721791099748913, "learning_rate": 5.239213198370356e-07, "loss": 0.4372, "step": 28778 }, { "epoch": 0.500252046793791, "grad_norm": 3.5317865136534854, "learning_rate": 5.238932026966083e-07, "loss": 0.2931, "step": 28779 }, { "epoch": 0.5002694293312938, "grad_norm": 3.0175729606050226, "learning_rate": 5.238650854804507e-07, "loss": 0.3123, "step": 28780 }, { "epoch": 0.5002868118687966, "grad_norm": 2.509470389251532, "learning_rate": 5.238369681886515e-07, "loss": 0.1739, "step": 28781 }, { "epoch": 0.5003041944062995, "grad_norm": 1.8388295174632407, "learning_rate": 5.238088508213002e-07, "loss": 0.2976, "step": 28782 }, { "epoch": 0.5003215769438023, "grad_norm": 1.2015374985720884, "learning_rate": 5.237807333784857e-07, "loss": 0.2855, "step": 28783 }, { "epoch": 0.5003389594813051, "grad_norm": 1.132755262219855, "learning_rate": 5.237526158602973e-07, "loss": 0.2605, "step": 28784 }, { "epoch": 0.5003563420188079, "grad_norm": 2.195570985710407, "learning_rate": 5.237244982668238e-07, "loss": 0.2613, "step": 28785 }, { "epoch": 0.5003737245563107, "grad_norm": 1.4644068288900256, "learning_rate": 5.236963805981548e-07, "loss": 0.2444, "step": 28786 }, { "epoch": 0.5003911070938135, "grad_norm": 1.4962365491376286, "learning_rate": 5.236682628543791e-07, "loss": 0.2424, "step": 28787 }, { "epoch": 0.5004084896313163, "grad_norm": 1.6943183391508452, "learning_rate": 5.236401450355859e-07, "loss": 0.3276, "step": 28788 }, { "epoch": 0.5004258721688192, "grad_norm": 2.1652263421023843, "learning_rate": 5.236120271418641e-07, "loss": 0.2915, "step": 28789 }, { "epoch": 0.500443254706322, "grad_norm": 2.16267265628211, "learning_rate": 5.235839091733031e-07, "loss": 0.2195, "step": 28790 }, { "epoch": 0.5004606372438248, "grad_norm": 1.0098475518075447, "learning_rate": 5.23555791129992e-07, "loss": 0.2061, "step": 28791 }, { "epoch": 0.5004780197813277, "grad_norm": 1.919401834738975, "learning_rate": 5.2352767301202e-07, "loss": 0.3782, "step": 28792 }, { "epoch": 0.5004954023188305, "grad_norm": 2.1416901117422684, "learning_rate": 5.234995548194759e-07, "loss": 0.3885, "step": 28793 }, { "epoch": 0.5005127848563333, "grad_norm": 2.151609851775531, "learning_rate": 5.23471436552449e-07, "loss": 0.37, "step": 28794 }, { "epoch": 0.5005301673938362, "grad_norm": 0.900854997978319, "learning_rate": 5.234433182110285e-07, "loss": 0.2999, "step": 28795 }, { "epoch": 0.500547549931339, "grad_norm": 1.9947828284660267, "learning_rate": 5.234151997953034e-07, "loss": 0.1844, "step": 28796 }, { "epoch": 0.5005649324688418, "grad_norm": 1.313124864742513, "learning_rate": 5.233870813053629e-07, "loss": 0.2551, "step": 28797 }, { "epoch": 0.5005823150063446, "grad_norm": 2.443347165881505, "learning_rate": 5.23358962741296e-07, "loss": 0.389, "step": 28798 }, { "epoch": 0.5005996975438475, "grad_norm": 2.9014751907529677, "learning_rate": 5.233308441031921e-07, "loss": 0.1747, "step": 28799 }, { "epoch": 0.5006170800813503, "grad_norm": 4.604704918999288, "learning_rate": 5.233027253911401e-07, "loss": 0.4266, "step": 28800 }, { "epoch": 0.5006344626188531, "grad_norm": 2.1838022556703316, "learning_rate": 5.23274606605229e-07, "loss": 0.204, "step": 28801 }, { "epoch": 0.500651845156356, "grad_norm": 1.8137602685670675, "learning_rate": 5.232464877455483e-07, "loss": 0.2767, "step": 28802 }, { "epoch": 0.5006692276938588, "grad_norm": 1.1200773297047462, "learning_rate": 5.232183688121868e-07, "loss": 0.1917, "step": 28803 }, { "epoch": 0.5006866102313616, "grad_norm": 1.7898430063071724, "learning_rate": 5.231902498052337e-07, "loss": 0.2193, "step": 28804 }, { "epoch": 0.5007039927688643, "grad_norm": 1.3268501569815243, "learning_rate": 5.231621307247783e-07, "loss": 0.2491, "step": 28805 }, { "epoch": 0.5007213753063672, "grad_norm": 1.6431211215614645, "learning_rate": 5.231340115709096e-07, "loss": 0.318, "step": 28806 }, { "epoch": 0.50073875784387, "grad_norm": 1.6194930505678975, "learning_rate": 5.231058923437164e-07, "loss": 0.2114, "step": 28807 }, { "epoch": 0.5007561403813728, "grad_norm": 1.3770178963590205, "learning_rate": 5.230777730432885e-07, "loss": 0.2404, "step": 28808 }, { "epoch": 0.5007735229188757, "grad_norm": 1.6617129605822656, "learning_rate": 5.230496536697146e-07, "loss": 0.191, "step": 28809 }, { "epoch": 0.5007909054563785, "grad_norm": 1.2240288732651468, "learning_rate": 5.230215342230836e-07, "loss": 0.3185, "step": 28810 }, { "epoch": 0.5008082879938813, "grad_norm": 1.9994620176344815, "learning_rate": 5.229934147034852e-07, "loss": 0.2778, "step": 28811 }, { "epoch": 0.5008256705313842, "grad_norm": 1.0423978013938393, "learning_rate": 5.229652951110081e-07, "loss": 0.2152, "step": 28812 }, { "epoch": 0.500843053068887, "grad_norm": 1.1921585746568995, "learning_rate": 5.229371754457416e-07, "loss": 0.2938, "step": 28813 }, { "epoch": 0.5008604356063898, "grad_norm": 2.2193316080398526, "learning_rate": 5.229090557077747e-07, "loss": 0.2321, "step": 28814 }, { "epoch": 0.5008778181438927, "grad_norm": 1.5314215378015406, "learning_rate": 5.228809358971969e-07, "loss": 0.1759, "step": 28815 }, { "epoch": 0.5008952006813955, "grad_norm": 2.1401924899228284, "learning_rate": 5.228528160140968e-07, "loss": 0.42, "step": 28816 }, { "epoch": 0.5009125832188983, "grad_norm": 1.7625472663223338, "learning_rate": 5.228246960585639e-07, "loss": 0.2314, "step": 28817 }, { "epoch": 0.5009299657564011, "grad_norm": 1.7959711672799703, "learning_rate": 5.22796576030687e-07, "loss": 0.2986, "step": 28818 }, { "epoch": 0.500947348293904, "grad_norm": 1.2696958334535748, "learning_rate": 5.227684559305555e-07, "loss": 0.1969, "step": 28819 }, { "epoch": 0.5009647308314068, "grad_norm": 1.1636440854344625, "learning_rate": 5.227403357582586e-07, "loss": 0.2196, "step": 28820 }, { "epoch": 0.5009821133689096, "grad_norm": 1.4895740888976623, "learning_rate": 5.227122155138852e-07, "loss": 0.1643, "step": 28821 }, { "epoch": 0.5009994959064125, "grad_norm": 2.967422023899234, "learning_rate": 5.226840951975246e-07, "loss": 0.477, "step": 28822 }, { "epoch": 0.5010168784439153, "grad_norm": 1.3274405882058586, "learning_rate": 5.226559748092656e-07, "loss": 0.1455, "step": 28823 }, { "epoch": 0.5010342609814181, "grad_norm": 0.9337050943316387, "learning_rate": 5.226278543491977e-07, "loss": 0.3083, "step": 28824 }, { "epoch": 0.5010516435189208, "grad_norm": 1.6704011399408194, "learning_rate": 5.2259973381741e-07, "loss": 0.1711, "step": 28825 }, { "epoch": 0.5010690260564237, "grad_norm": 2.272775805532916, "learning_rate": 5.225716132139914e-07, "loss": 0.3483, "step": 28826 }, { "epoch": 0.5010864085939265, "grad_norm": 1.6639524288380725, "learning_rate": 5.225434925390312e-07, "loss": 0.2184, "step": 28827 }, { "epoch": 0.5011037911314293, "grad_norm": 1.389436512724788, "learning_rate": 5.225153717926185e-07, "loss": 0.2623, "step": 28828 }, { "epoch": 0.5011211736689322, "grad_norm": 4.064178681642374, "learning_rate": 5.224872509748423e-07, "loss": 0.4024, "step": 28829 }, { "epoch": 0.501138556206435, "grad_norm": 1.575586326505788, "learning_rate": 5.22459130085792e-07, "loss": 0.2842, "step": 28830 }, { "epoch": 0.5011559387439378, "grad_norm": 1.2914313185156123, "learning_rate": 5.224310091255563e-07, "loss": 0.3925, "step": 28831 }, { "epoch": 0.5011733212814407, "grad_norm": 1.5572940128213884, "learning_rate": 5.22402888094225e-07, "loss": 0.1762, "step": 28832 }, { "epoch": 0.5011907038189435, "grad_norm": 1.3897012746149344, "learning_rate": 5.223747669918865e-07, "loss": 0.252, "step": 28833 }, { "epoch": 0.5012080863564463, "grad_norm": 1.3634641039416096, "learning_rate": 5.223466458186305e-07, "loss": 0.2843, "step": 28834 }, { "epoch": 0.5012254688939491, "grad_norm": 1.7482407679482546, "learning_rate": 5.223185245745458e-07, "loss": 0.2699, "step": 28835 }, { "epoch": 0.501242851431452, "grad_norm": 2.1509614338897034, "learning_rate": 5.222904032597217e-07, "loss": 0.3311, "step": 28836 }, { "epoch": 0.5012602339689548, "grad_norm": 1.2527790516181274, "learning_rate": 5.222622818742472e-07, "loss": 0.2359, "step": 28837 }, { "epoch": 0.5012776165064576, "grad_norm": 2.9434644905678753, "learning_rate": 5.222341604182115e-07, "loss": 0.2973, "step": 28838 }, { "epoch": 0.5012949990439605, "grad_norm": 1.5025319285020082, "learning_rate": 5.222060388917036e-07, "loss": 0.1942, "step": 28839 }, { "epoch": 0.5013123815814633, "grad_norm": 1.1806774517871723, "learning_rate": 5.22177917294813e-07, "loss": 0.3295, "step": 28840 }, { "epoch": 0.5013297641189661, "grad_norm": 1.2570219683058161, "learning_rate": 5.221497956276283e-07, "loss": 0.2524, "step": 28841 }, { "epoch": 0.501347146656469, "grad_norm": 2.514865628287869, "learning_rate": 5.22121673890239e-07, "loss": 0.3224, "step": 28842 }, { "epoch": 0.5013645291939718, "grad_norm": 2.3233403333366076, "learning_rate": 5.220935520827344e-07, "loss": 0.1562, "step": 28843 }, { "epoch": 0.5013819117314745, "grad_norm": 2.0821415272425887, "learning_rate": 5.220654302052033e-07, "loss": 0.3328, "step": 28844 }, { "epoch": 0.5013992942689773, "grad_norm": 2.7226662951275267, "learning_rate": 5.220373082577347e-07, "loss": 0.384, "step": 28845 }, { "epoch": 0.5014166768064802, "grad_norm": 2.633076420957569, "learning_rate": 5.220091862404181e-07, "loss": 0.2524, "step": 28846 }, { "epoch": 0.501434059343983, "grad_norm": 1.136672006630832, "learning_rate": 5.219810641533426e-07, "loss": 0.1474, "step": 28847 }, { "epoch": 0.5014514418814858, "grad_norm": 1.7165157279067762, "learning_rate": 5.219529419965972e-07, "loss": 0.2262, "step": 28848 }, { "epoch": 0.5014688244189887, "grad_norm": 3.9442281848528356, "learning_rate": 5.219248197702709e-07, "loss": 0.3589, "step": 28849 }, { "epoch": 0.5014862069564915, "grad_norm": 3.6460451583157814, "learning_rate": 5.218966974744529e-07, "loss": 0.2988, "step": 28850 }, { "epoch": 0.5015035894939943, "grad_norm": 1.0085316800547548, "learning_rate": 5.218685751092327e-07, "loss": 0.3061, "step": 28851 }, { "epoch": 0.5015209720314971, "grad_norm": 1.677757152775986, "learning_rate": 5.218404526746991e-07, "loss": 0.1798, "step": 28852 }, { "epoch": 0.501538354569, "grad_norm": 1.3506557326606405, "learning_rate": 5.218123301709413e-07, "loss": 0.1733, "step": 28853 }, { "epoch": 0.5015557371065028, "grad_norm": 1.96376688816517, "learning_rate": 5.217842075980482e-07, "loss": 0.219, "step": 28854 }, { "epoch": 0.5015731196440056, "grad_norm": 1.0430167816681508, "learning_rate": 5.217560849561095e-07, "loss": 0.2959, "step": 28855 }, { "epoch": 0.5015905021815085, "grad_norm": 1.757033509881419, "learning_rate": 5.217279622452137e-07, "loss": 0.213, "step": 28856 }, { "epoch": 0.5016078847190113, "grad_norm": 2.115734991399976, "learning_rate": 5.216998394654505e-07, "loss": 0.2791, "step": 28857 }, { "epoch": 0.5016252672565141, "grad_norm": 1.4212605580070288, "learning_rate": 5.216717166169087e-07, "loss": 0.3238, "step": 28858 }, { "epoch": 0.501642649794017, "grad_norm": 0.9792744979736738, "learning_rate": 5.216435936996776e-07, "loss": 0.1383, "step": 28859 }, { "epoch": 0.5016600323315198, "grad_norm": 1.40869564631213, "learning_rate": 5.21615470713846e-07, "loss": 0.1991, "step": 28860 }, { "epoch": 0.5016774148690226, "grad_norm": 1.7049020379541047, "learning_rate": 5.215873476595035e-07, "loss": 0.4114, "step": 28861 }, { "epoch": 0.5016947974065255, "grad_norm": 1.1902892949205894, "learning_rate": 5.21559224536739e-07, "loss": 0.2346, "step": 28862 }, { "epoch": 0.5017121799440283, "grad_norm": 2.202997215286702, "learning_rate": 5.215311013456416e-07, "loss": 0.3552, "step": 28863 }, { "epoch": 0.501729562481531, "grad_norm": 1.1621762931149233, "learning_rate": 5.215029780863004e-07, "loss": 0.166, "step": 28864 }, { "epoch": 0.5017469450190338, "grad_norm": 1.4094905382030052, "learning_rate": 5.214748547588049e-07, "loss": 0.3103, "step": 28865 }, { "epoch": 0.5017643275565367, "grad_norm": 1.6598593031661852, "learning_rate": 5.214467313632438e-07, "loss": 0.2813, "step": 28866 }, { "epoch": 0.5017817100940395, "grad_norm": 1.3122361982527622, "learning_rate": 5.214186078997064e-07, "loss": 0.2191, "step": 28867 }, { "epoch": 0.5017990926315423, "grad_norm": 1.1425497671424008, "learning_rate": 5.213904843682819e-07, "loss": 0.2464, "step": 28868 }, { "epoch": 0.5018164751690452, "grad_norm": 1.4725960970283232, "learning_rate": 5.213623607690595e-07, "loss": 0.2151, "step": 28869 }, { "epoch": 0.501833857706548, "grad_norm": 1.8110156395965096, "learning_rate": 5.21334237102128e-07, "loss": 0.3959, "step": 28870 }, { "epoch": 0.5018512402440508, "grad_norm": 1.5575297822580707, "learning_rate": 5.213061133675769e-07, "loss": 0.215, "step": 28871 }, { "epoch": 0.5018686227815536, "grad_norm": 1.267413584085647, "learning_rate": 5.212779895654952e-07, "loss": 0.201, "step": 28872 }, { "epoch": 0.5018860053190565, "grad_norm": 1.6566793756047025, "learning_rate": 5.212498656959721e-07, "loss": 0.2352, "step": 28873 }, { "epoch": 0.5019033878565593, "grad_norm": 1.8222677327317884, "learning_rate": 5.212217417590965e-07, "loss": 0.2538, "step": 28874 }, { "epoch": 0.5019207703940621, "grad_norm": 2.0667967059311234, "learning_rate": 5.211936177549578e-07, "loss": 0.3091, "step": 28875 }, { "epoch": 0.501938152931565, "grad_norm": 1.0945856350587926, "learning_rate": 5.211654936836451e-07, "loss": 0.2195, "step": 28876 }, { "epoch": 0.5019555354690678, "grad_norm": 1.8598976246243097, "learning_rate": 5.211373695452477e-07, "loss": 0.2257, "step": 28877 }, { "epoch": 0.5019729180065706, "grad_norm": 2.1145416419605976, "learning_rate": 5.211092453398543e-07, "loss": 0.3273, "step": 28878 }, { "epoch": 0.5019903005440735, "grad_norm": 1.8545180520674565, "learning_rate": 5.210811210675543e-07, "loss": 0.1878, "step": 28879 }, { "epoch": 0.5020076830815763, "grad_norm": 2.4823350807901248, "learning_rate": 5.210529967284369e-07, "loss": 0.3372, "step": 28880 }, { "epoch": 0.5020250656190791, "grad_norm": 1.1978976844646503, "learning_rate": 5.210248723225912e-07, "loss": 0.2444, "step": 28881 }, { "epoch": 0.5020424481565819, "grad_norm": 1.160293022123592, "learning_rate": 5.209967478501064e-07, "loss": 0.3132, "step": 28882 }, { "epoch": 0.5020598306940848, "grad_norm": 2.094843791193603, "learning_rate": 5.209686233110712e-07, "loss": 0.3223, "step": 28883 }, { "epoch": 0.5020772132315875, "grad_norm": 2.1315818671116022, "learning_rate": 5.209404987055756e-07, "loss": 0.3273, "step": 28884 }, { "epoch": 0.5020945957690903, "grad_norm": 1.99713871885999, "learning_rate": 5.209123740337078e-07, "loss": 0.2363, "step": 28885 }, { "epoch": 0.5021119783065932, "grad_norm": 1.5048210487687754, "learning_rate": 5.208842492955576e-07, "loss": 0.2599, "step": 28886 }, { "epoch": 0.502129360844096, "grad_norm": 1.4349462015929966, "learning_rate": 5.208561244912138e-07, "loss": 0.3021, "step": 28887 }, { "epoch": 0.5021467433815988, "grad_norm": 1.954174062802682, "learning_rate": 5.208279996207659e-07, "loss": 0.2228, "step": 28888 }, { "epoch": 0.5021641259191016, "grad_norm": 2.7791038374193593, "learning_rate": 5.207998746843025e-07, "loss": 0.3297, "step": 28889 }, { "epoch": 0.5021815084566045, "grad_norm": 1.4468393295241686, "learning_rate": 5.207717496819134e-07, "loss": 0.206, "step": 28890 }, { "epoch": 0.5021988909941073, "grad_norm": 1.5647561609955654, "learning_rate": 5.20743624613687e-07, "loss": 0.2884, "step": 28891 }, { "epoch": 0.5022162735316101, "grad_norm": 1.4117973407831093, "learning_rate": 5.207154994797131e-07, "loss": 0.4546, "step": 28892 }, { "epoch": 0.502233656069113, "grad_norm": 1.8615878191816029, "learning_rate": 5.206873742800804e-07, "loss": 0.2395, "step": 28893 }, { "epoch": 0.5022510386066158, "grad_norm": 2.176594596057618, "learning_rate": 5.206592490148785e-07, "loss": 0.3409, "step": 28894 }, { "epoch": 0.5022684211441186, "grad_norm": 1.288868134452642, "learning_rate": 5.20631123684196e-07, "loss": 0.2335, "step": 28895 }, { "epoch": 0.5022858036816215, "grad_norm": 1.574750866935438, "learning_rate": 5.206029982881225e-07, "loss": 0.2061, "step": 28896 }, { "epoch": 0.5023031862191243, "grad_norm": 1.5968479766558608, "learning_rate": 5.20574872826747e-07, "loss": 0.3127, "step": 28897 }, { "epoch": 0.5023205687566271, "grad_norm": 2.0832175192246813, "learning_rate": 5.205467473001585e-07, "loss": 0.2215, "step": 28898 }, { "epoch": 0.50233795129413, "grad_norm": 2.709415381458841, "learning_rate": 5.205186217084461e-07, "loss": 0.2714, "step": 28899 }, { "epoch": 0.5023553338316328, "grad_norm": 2.466940562924626, "learning_rate": 5.204904960516994e-07, "loss": 0.2239, "step": 28900 }, { "epoch": 0.5023727163691356, "grad_norm": 1.425644068064918, "learning_rate": 5.204623703300069e-07, "loss": 0.1808, "step": 28901 }, { "epoch": 0.5023900989066384, "grad_norm": 2.1463809934884623, "learning_rate": 5.204342445434583e-07, "loss": 0.2813, "step": 28902 }, { "epoch": 0.5024074814441413, "grad_norm": 2.2875539078045337, "learning_rate": 5.204061186921426e-07, "loss": 0.3603, "step": 28903 }, { "epoch": 0.502424863981644, "grad_norm": 1.3765160003781158, "learning_rate": 5.203779927761486e-07, "loss": 0.248, "step": 28904 }, { "epoch": 0.5024422465191468, "grad_norm": 1.2916545483348234, "learning_rate": 5.203498667955659e-07, "loss": 0.1759, "step": 28905 }, { "epoch": 0.5024596290566496, "grad_norm": 2.1713865411389803, "learning_rate": 5.203217407504835e-07, "loss": 0.281, "step": 28906 }, { "epoch": 0.5024770115941525, "grad_norm": 1.0536260517862517, "learning_rate": 5.202936146409905e-07, "loss": 0.4577, "step": 28907 }, { "epoch": 0.5024943941316553, "grad_norm": 1.9744130071418997, "learning_rate": 5.20265488467176e-07, "loss": 0.3442, "step": 28908 }, { "epoch": 0.5025117766691581, "grad_norm": 1.4137428956025095, "learning_rate": 5.202373622291294e-07, "loss": 0.2818, "step": 28909 }, { "epoch": 0.502529159206661, "grad_norm": 2.0947491661283015, "learning_rate": 5.202092359269394e-07, "loss": 0.2367, "step": 28910 }, { "epoch": 0.5025465417441638, "grad_norm": 2.199919909818338, "learning_rate": 5.201811095606955e-07, "loss": 0.3017, "step": 28911 }, { "epoch": 0.5025639242816666, "grad_norm": 1.530495228172547, "learning_rate": 5.201529831304867e-07, "loss": 0.2662, "step": 28912 }, { "epoch": 0.5025813068191695, "grad_norm": 2.2795718612834976, "learning_rate": 5.201248566364024e-07, "loss": 0.2192, "step": 28913 }, { "epoch": 0.5025986893566723, "grad_norm": 1.001255724310217, "learning_rate": 5.200967300785314e-07, "loss": 0.3155, "step": 28914 }, { "epoch": 0.5026160718941751, "grad_norm": 1.9351734568481487, "learning_rate": 5.20068603456963e-07, "loss": 0.5002, "step": 28915 }, { "epoch": 0.502633454431678, "grad_norm": 1.01752588484198, "learning_rate": 5.200404767717864e-07, "loss": 0.2209, "step": 28916 }, { "epoch": 0.5026508369691808, "grad_norm": 1.921467190697613, "learning_rate": 5.200123500230907e-07, "loss": 0.2502, "step": 28917 }, { "epoch": 0.5026682195066836, "grad_norm": 1.6008299096881147, "learning_rate": 5.199842232109649e-07, "loss": 0.2056, "step": 28918 }, { "epoch": 0.5026856020441864, "grad_norm": 1.4575237530469711, "learning_rate": 5.199560963354985e-07, "loss": 0.2745, "step": 28919 }, { "epoch": 0.5027029845816893, "grad_norm": 1.6731271244418986, "learning_rate": 5.199279693967803e-07, "loss": 0.2742, "step": 28920 }, { "epoch": 0.5027203671191921, "grad_norm": 1.4250077850883953, "learning_rate": 5.198998423948996e-07, "loss": 0.3009, "step": 28921 }, { "epoch": 0.5027377496566949, "grad_norm": 1.6308375869967564, "learning_rate": 5.198717153299456e-07, "loss": 0.3203, "step": 28922 }, { "epoch": 0.5027551321941978, "grad_norm": 0.7220545278889222, "learning_rate": 5.198435882020073e-07, "loss": 0.2053, "step": 28923 }, { "epoch": 0.5027725147317005, "grad_norm": 1.2723342856425142, "learning_rate": 5.19815461011174e-07, "loss": 0.2764, "step": 28924 }, { "epoch": 0.5027898972692033, "grad_norm": 2.948452923621434, "learning_rate": 5.197873337575349e-07, "loss": 0.2284, "step": 28925 }, { "epoch": 0.5028072798067061, "grad_norm": 1.6142036360400627, "learning_rate": 5.197592064411788e-07, "loss": 0.3525, "step": 28926 }, { "epoch": 0.502824662344209, "grad_norm": 1.4921692219717437, "learning_rate": 5.197310790621954e-07, "loss": 0.2544, "step": 28927 }, { "epoch": 0.5028420448817118, "grad_norm": 1.9824607544490407, "learning_rate": 5.197029516206733e-07, "loss": 0.2823, "step": 28928 }, { "epoch": 0.5028594274192146, "grad_norm": 3.0055524639859224, "learning_rate": 5.19674824116702e-07, "loss": 0.4511, "step": 28929 }, { "epoch": 0.5028768099567175, "grad_norm": 1.3167207346286163, "learning_rate": 5.196466965503704e-07, "loss": 0.2218, "step": 28930 }, { "epoch": 0.5028941924942203, "grad_norm": 2.072611355202257, "learning_rate": 5.196185689217679e-07, "loss": 0.2243, "step": 28931 }, { "epoch": 0.5029115750317231, "grad_norm": 1.1492629395440608, "learning_rate": 5.195904412309836e-07, "loss": 0.1909, "step": 28932 }, { "epoch": 0.502928957569226, "grad_norm": 1.8409608299212368, "learning_rate": 5.195623134781066e-07, "loss": 0.328, "step": 28933 }, { "epoch": 0.5029463401067288, "grad_norm": 2.4057311088138973, "learning_rate": 5.195341856632259e-07, "loss": 0.3527, "step": 28934 }, { "epoch": 0.5029637226442316, "grad_norm": 1.8511256498873239, "learning_rate": 5.195060577864309e-07, "loss": 0.2955, "step": 28935 }, { "epoch": 0.5029811051817344, "grad_norm": 3.3308532130705037, "learning_rate": 5.194779298478108e-07, "loss": 0.558, "step": 28936 }, { "epoch": 0.5029984877192373, "grad_norm": 2.1514658605007635, "learning_rate": 5.194498018474544e-07, "loss": 0.2606, "step": 28937 }, { "epoch": 0.5030158702567401, "grad_norm": 2.0465576719954885, "learning_rate": 5.194216737854512e-07, "loss": 0.2381, "step": 28938 }, { "epoch": 0.5030332527942429, "grad_norm": 1.0220509191184175, "learning_rate": 5.193935456618901e-07, "loss": 0.2077, "step": 28939 }, { "epoch": 0.5030506353317458, "grad_norm": 1.1725156931389766, "learning_rate": 5.193654174768605e-07, "loss": 0.2986, "step": 28940 }, { "epoch": 0.5030680178692486, "grad_norm": 1.632862507494442, "learning_rate": 5.193372892304513e-07, "loss": 0.2641, "step": 28941 }, { "epoch": 0.5030854004067514, "grad_norm": 1.0797606566618743, "learning_rate": 5.193091609227519e-07, "loss": 0.379, "step": 28942 }, { "epoch": 0.5031027829442543, "grad_norm": 1.8544264707282057, "learning_rate": 5.192810325538511e-07, "loss": 0.3063, "step": 28943 }, { "epoch": 0.503120165481757, "grad_norm": 1.9769752542920644, "learning_rate": 5.192529041238386e-07, "loss": 0.2894, "step": 28944 }, { "epoch": 0.5031375480192598, "grad_norm": 1.593799408616977, "learning_rate": 5.19224775632803e-07, "loss": 0.3291, "step": 28945 }, { "epoch": 0.5031549305567626, "grad_norm": 1.18261396113767, "learning_rate": 5.191966470808338e-07, "loss": 0.1838, "step": 28946 }, { "epoch": 0.5031723130942655, "grad_norm": 1.0696621616431026, "learning_rate": 5.1916851846802e-07, "loss": 0.1577, "step": 28947 }, { "epoch": 0.5031896956317683, "grad_norm": 1.580486154103332, "learning_rate": 5.191403897944508e-07, "loss": 0.1595, "step": 28948 }, { "epoch": 0.5032070781692711, "grad_norm": 1.3745489703512552, "learning_rate": 5.191122610602153e-07, "loss": 0.291, "step": 28949 }, { "epoch": 0.503224460706774, "grad_norm": 1.669647670298514, "learning_rate": 5.190841322654029e-07, "loss": 0.3284, "step": 28950 }, { "epoch": 0.5032418432442768, "grad_norm": 2.5170604091180224, "learning_rate": 5.190560034101024e-07, "loss": 0.3492, "step": 28951 }, { "epoch": 0.5032592257817796, "grad_norm": 1.9257941840153203, "learning_rate": 5.190278744944032e-07, "loss": 0.2294, "step": 28952 }, { "epoch": 0.5032766083192824, "grad_norm": 3.5436847530895985, "learning_rate": 5.189997455183943e-07, "loss": 0.4179, "step": 28953 }, { "epoch": 0.5032939908567853, "grad_norm": 1.5654110056014037, "learning_rate": 5.189716164821651e-07, "loss": 0.2116, "step": 28954 }, { "epoch": 0.5033113733942881, "grad_norm": 1.6659229641989992, "learning_rate": 5.189434873858044e-07, "loss": 0.2291, "step": 28955 }, { "epoch": 0.5033287559317909, "grad_norm": 1.6034379341090768, "learning_rate": 5.189153582294017e-07, "loss": 0.2191, "step": 28956 }, { "epoch": 0.5033461384692938, "grad_norm": 2.2209617426026154, "learning_rate": 5.18887229013046e-07, "loss": 0.2268, "step": 28957 }, { "epoch": 0.5033635210067966, "grad_norm": 1.3620789640931863, "learning_rate": 5.188590997368263e-07, "loss": 0.3286, "step": 28958 }, { "epoch": 0.5033809035442994, "grad_norm": 2.1556082170705926, "learning_rate": 5.188309704008319e-07, "loss": 0.2717, "step": 28959 }, { "epoch": 0.5033982860818023, "grad_norm": 1.04696347349992, "learning_rate": 5.188028410051521e-07, "loss": 0.1994, "step": 28960 }, { "epoch": 0.5034156686193051, "grad_norm": 2.017218570993422, "learning_rate": 5.18774711549876e-07, "loss": 0.3151, "step": 28961 }, { "epoch": 0.5034330511568079, "grad_norm": 1.3241589443482324, "learning_rate": 5.187465820350926e-07, "loss": 0.1978, "step": 28962 }, { "epoch": 0.5034504336943108, "grad_norm": 1.3914352496765334, "learning_rate": 5.187184524608912e-07, "loss": 0.1727, "step": 28963 }, { "epoch": 0.5034678162318135, "grad_norm": 1.4148015915867964, "learning_rate": 5.186903228273607e-07, "loss": 0.3768, "step": 28964 }, { "epoch": 0.5034851987693163, "grad_norm": 2.052066722244154, "learning_rate": 5.186621931345907e-07, "loss": 0.3527, "step": 28965 }, { "epoch": 0.5035025813068191, "grad_norm": 1.8107292356611302, "learning_rate": 5.1863406338267e-07, "loss": 0.4032, "step": 28966 }, { "epoch": 0.503519963844322, "grad_norm": 2.1928742513013986, "learning_rate": 5.186059335716879e-07, "loss": 0.2458, "step": 28967 }, { "epoch": 0.5035373463818248, "grad_norm": 1.8520327365989468, "learning_rate": 5.185778037017335e-07, "loss": 0.2379, "step": 28968 }, { "epoch": 0.5035547289193276, "grad_norm": 1.2407961813362371, "learning_rate": 5.185496737728963e-07, "loss": 0.215, "step": 28969 }, { "epoch": 0.5035721114568305, "grad_norm": 1.076878457468445, "learning_rate": 5.185215437852647e-07, "loss": 0.2774, "step": 28970 }, { "epoch": 0.5035894939943333, "grad_norm": 1.078330785429807, "learning_rate": 5.184934137389285e-07, "loss": 0.1282, "step": 28971 }, { "epoch": 0.5036068765318361, "grad_norm": 1.2660469697889813, "learning_rate": 5.184652836339767e-07, "loss": 0.1552, "step": 28972 }, { "epoch": 0.5036242590693389, "grad_norm": 1.4784364409159523, "learning_rate": 5.184371534704985e-07, "loss": 0.1843, "step": 28973 }, { "epoch": 0.5036416416068418, "grad_norm": 2.325627045983602, "learning_rate": 5.184090232485828e-07, "loss": 0.451, "step": 28974 }, { "epoch": 0.5036590241443446, "grad_norm": 4.903525934555364, "learning_rate": 5.183808929683191e-07, "loss": 0.4767, "step": 28975 }, { "epoch": 0.5036764066818474, "grad_norm": 1.061716298015623, "learning_rate": 5.183527626297965e-07, "loss": 0.1918, "step": 28976 }, { "epoch": 0.5036937892193503, "grad_norm": 1.4224775331776993, "learning_rate": 5.18324632233104e-07, "loss": 0.2192, "step": 28977 }, { "epoch": 0.5037111717568531, "grad_norm": 1.8823359026854756, "learning_rate": 5.182965017783306e-07, "loss": 0.3166, "step": 28978 }, { "epoch": 0.5037285542943559, "grad_norm": 1.2084532037420015, "learning_rate": 5.18268371265566e-07, "loss": 0.1981, "step": 28979 }, { "epoch": 0.5037459368318588, "grad_norm": 0.8129383753871373, "learning_rate": 5.182402406948989e-07, "loss": 0.2436, "step": 28980 }, { "epoch": 0.5037633193693616, "grad_norm": 0.8782255603751226, "learning_rate": 5.182121100664187e-07, "loss": 0.2136, "step": 28981 }, { "epoch": 0.5037807019068644, "grad_norm": 2.021184325557611, "learning_rate": 5.181839793802144e-07, "loss": 0.3449, "step": 28982 }, { "epoch": 0.5037980844443671, "grad_norm": 1.2628088638787474, "learning_rate": 5.181558486363753e-07, "loss": 0.2338, "step": 28983 }, { "epoch": 0.50381546698187, "grad_norm": 1.9789886454862857, "learning_rate": 5.181277178349904e-07, "loss": 0.2726, "step": 28984 }, { "epoch": 0.5038328495193728, "grad_norm": 1.602732138524459, "learning_rate": 5.180995869761492e-07, "loss": 0.303, "step": 28985 }, { "epoch": 0.5038502320568756, "grad_norm": 1.71438776724569, "learning_rate": 5.180714560599403e-07, "loss": 0.1605, "step": 28986 }, { "epoch": 0.5038676145943785, "grad_norm": 1.7683953346819061, "learning_rate": 5.180433250864535e-07, "loss": 0.2645, "step": 28987 }, { "epoch": 0.5038849971318813, "grad_norm": 2.7455175549292044, "learning_rate": 5.180151940557774e-07, "loss": 0.3531, "step": 28988 }, { "epoch": 0.5039023796693841, "grad_norm": 1.6272507497220923, "learning_rate": 5.179870629680016e-07, "loss": 0.2394, "step": 28989 }, { "epoch": 0.503919762206887, "grad_norm": 2.0016418423374858, "learning_rate": 5.179589318232149e-07, "loss": 0.2352, "step": 28990 }, { "epoch": 0.5039371447443898, "grad_norm": 1.7381041439094034, "learning_rate": 5.179308006215066e-07, "loss": 0.234, "step": 28991 }, { "epoch": 0.5039545272818926, "grad_norm": 1.3885590518836077, "learning_rate": 5.17902669362966e-07, "loss": 0.2275, "step": 28992 }, { "epoch": 0.5039719098193954, "grad_norm": 1.9815117685174073, "learning_rate": 5.178745380476823e-07, "loss": 0.2425, "step": 28993 }, { "epoch": 0.5039892923568983, "grad_norm": 1.803556288726, "learning_rate": 5.178464066757442e-07, "loss": 0.2847, "step": 28994 }, { "epoch": 0.5040066748944011, "grad_norm": 1.3342374294885586, "learning_rate": 5.178182752472415e-07, "loss": 0.163, "step": 28995 }, { "epoch": 0.5040240574319039, "grad_norm": 1.7697592621790035, "learning_rate": 5.177901437622629e-07, "loss": 0.2392, "step": 28996 }, { "epoch": 0.5040414399694068, "grad_norm": 1.5538369595450916, "learning_rate": 5.177620122208976e-07, "loss": 0.2478, "step": 28997 }, { "epoch": 0.5040588225069096, "grad_norm": 1.6591701728760275, "learning_rate": 5.177338806232352e-07, "loss": 0.2239, "step": 28998 }, { "epoch": 0.5040762050444124, "grad_norm": 1.1534755563877028, "learning_rate": 5.177057489693643e-07, "loss": 0.3234, "step": 28999 }, { "epoch": 0.5040935875819152, "grad_norm": 2.057030991252524, "learning_rate": 5.176776172593745e-07, "loss": 0.1831, "step": 29000 }, { "epoch": 0.5041109701194181, "grad_norm": 2.5861998621482276, "learning_rate": 5.176494854933546e-07, "loss": 0.2773, "step": 29001 }, { "epoch": 0.5041283526569209, "grad_norm": 1.915262298276359, "learning_rate": 5.176213536713941e-07, "loss": 0.1976, "step": 29002 }, { "epoch": 0.5041457351944236, "grad_norm": 1.9348572760124367, "learning_rate": 5.175932217935818e-07, "loss": 0.2482, "step": 29003 }, { "epoch": 0.5041631177319265, "grad_norm": 1.519414916309889, "learning_rate": 5.175650898600073e-07, "loss": 0.2367, "step": 29004 }, { "epoch": 0.5041805002694293, "grad_norm": 2.151757945405034, "learning_rate": 5.175369578707592e-07, "loss": 0.2922, "step": 29005 }, { "epoch": 0.5041978828069321, "grad_norm": 1.4059037867099373, "learning_rate": 5.175088258259273e-07, "loss": 0.2057, "step": 29006 }, { "epoch": 0.504215265344435, "grad_norm": 1.6163802639991793, "learning_rate": 5.174806937256004e-07, "loss": 0.3241, "step": 29007 }, { "epoch": 0.5042326478819378, "grad_norm": 1.8108225084576623, "learning_rate": 5.174525615698678e-07, "loss": 0.1226, "step": 29008 }, { "epoch": 0.5042500304194406, "grad_norm": 1.3441790135312122, "learning_rate": 5.174244293588182e-07, "loss": 0.2327, "step": 29009 }, { "epoch": 0.5042674129569434, "grad_norm": 1.5667867839432537, "learning_rate": 5.173962970925418e-07, "loss": 0.2213, "step": 29010 }, { "epoch": 0.5042847954944463, "grad_norm": 1.6072302655125108, "learning_rate": 5.173681647711266e-07, "loss": 0.2856, "step": 29011 }, { "epoch": 0.5043021780319491, "grad_norm": 1.249376821921838, "learning_rate": 5.173400323946625e-07, "loss": 0.381, "step": 29012 }, { "epoch": 0.5043195605694519, "grad_norm": 1.9444908797701925, "learning_rate": 5.173118999632383e-07, "loss": 0.3655, "step": 29013 }, { "epoch": 0.5043369431069548, "grad_norm": 1.7348967799637411, "learning_rate": 5.172837674769437e-07, "loss": 0.3036, "step": 29014 }, { "epoch": 0.5043543256444576, "grad_norm": 1.3167797796185319, "learning_rate": 5.172556349358671e-07, "loss": 0.2598, "step": 29015 }, { "epoch": 0.5043717081819604, "grad_norm": 1.421931965107121, "learning_rate": 5.172275023400983e-07, "loss": 0.1913, "step": 29016 }, { "epoch": 0.5043890907194633, "grad_norm": 1.228426931406683, "learning_rate": 5.171993696897263e-07, "loss": 0.2841, "step": 29017 }, { "epoch": 0.5044064732569661, "grad_norm": 1.380703767358366, "learning_rate": 5.1717123698484e-07, "loss": 0.3412, "step": 29018 }, { "epoch": 0.5044238557944689, "grad_norm": 2.117259981569083, "learning_rate": 5.171431042255287e-07, "loss": 0.2668, "step": 29019 }, { "epoch": 0.5044412383319717, "grad_norm": 2.049712188288479, "learning_rate": 5.171149714118817e-07, "loss": 0.4922, "step": 29020 }, { "epoch": 0.5044586208694746, "grad_norm": 1.4423581811467305, "learning_rate": 5.170868385439883e-07, "loss": 0.3139, "step": 29021 }, { "epoch": 0.5044760034069774, "grad_norm": 1.9451708625622672, "learning_rate": 5.170587056219373e-07, "loss": 0.2751, "step": 29022 }, { "epoch": 0.5044933859444801, "grad_norm": 1.4012435467592592, "learning_rate": 5.170305726458182e-07, "loss": 0.4794, "step": 29023 }, { "epoch": 0.504510768481983, "grad_norm": 1.0190763806523713, "learning_rate": 5.170024396157197e-07, "loss": 0.2151, "step": 29024 }, { "epoch": 0.5045281510194858, "grad_norm": 1.5441825278886498, "learning_rate": 5.169743065317316e-07, "loss": 0.172, "step": 29025 }, { "epoch": 0.5045455335569886, "grad_norm": 1.6839157242960554, "learning_rate": 5.169461733939426e-07, "loss": 0.251, "step": 29026 }, { "epoch": 0.5045629160944914, "grad_norm": 2.357270749765053, "learning_rate": 5.16918040202442e-07, "loss": 0.354, "step": 29027 }, { "epoch": 0.5045802986319943, "grad_norm": 2.102404223446304, "learning_rate": 5.16889906957319e-07, "loss": 0.2331, "step": 29028 }, { "epoch": 0.5045976811694971, "grad_norm": 1.3374511237324591, "learning_rate": 5.168617736586629e-07, "loss": 0.2104, "step": 29029 }, { "epoch": 0.5046150637069999, "grad_norm": 1.3796459417479165, "learning_rate": 5.168336403065624e-07, "loss": 0.3539, "step": 29030 }, { "epoch": 0.5046324462445028, "grad_norm": 1.5131558680848771, "learning_rate": 5.168055069011072e-07, "loss": 0.335, "step": 29031 }, { "epoch": 0.5046498287820056, "grad_norm": 1.677770114421334, "learning_rate": 5.167773734423863e-07, "loss": 0.2569, "step": 29032 }, { "epoch": 0.5046672113195084, "grad_norm": 1.6604932025596497, "learning_rate": 5.167492399304888e-07, "loss": 0.1494, "step": 29033 }, { "epoch": 0.5046845938570113, "grad_norm": 1.7050789987102963, "learning_rate": 5.167211063655038e-07, "loss": 0.2437, "step": 29034 }, { "epoch": 0.5047019763945141, "grad_norm": 1.5621622480850899, "learning_rate": 5.166929727475206e-07, "loss": 0.2543, "step": 29035 }, { "epoch": 0.5047193589320169, "grad_norm": 2.3788050867560124, "learning_rate": 5.166648390766285e-07, "loss": 0.3295, "step": 29036 }, { "epoch": 0.5047367414695197, "grad_norm": 1.2570801366086342, "learning_rate": 5.166367053529165e-07, "loss": 0.2115, "step": 29037 }, { "epoch": 0.5047541240070226, "grad_norm": 2.2860918694346677, "learning_rate": 5.166085715764735e-07, "loss": 0.5018, "step": 29038 }, { "epoch": 0.5047715065445254, "grad_norm": 1.553554350091683, "learning_rate": 5.165804377473894e-07, "loss": 0.3034, "step": 29039 }, { "epoch": 0.5047888890820282, "grad_norm": 2.89308644767263, "learning_rate": 5.165523038657525e-07, "loss": 0.2728, "step": 29040 }, { "epoch": 0.5048062716195311, "grad_norm": 2.3567636137165744, "learning_rate": 5.165241699316525e-07, "loss": 0.0914, "step": 29041 }, { "epoch": 0.5048236541570339, "grad_norm": 1.361651400172944, "learning_rate": 5.164960359451787e-07, "loss": 0.2524, "step": 29042 }, { "epoch": 0.5048410366945366, "grad_norm": 1.767740574883168, "learning_rate": 5.1646790190642e-07, "loss": 0.251, "step": 29043 }, { "epoch": 0.5048584192320394, "grad_norm": 1.6906267250225, "learning_rate": 5.164397678154654e-07, "loss": 0.187, "step": 29044 }, { "epoch": 0.5048758017695423, "grad_norm": 1.684545429492083, "learning_rate": 5.164116336724044e-07, "loss": 0.3107, "step": 29045 }, { "epoch": 0.5048931843070451, "grad_norm": 1.8469761751810925, "learning_rate": 5.16383499477326e-07, "loss": 0.2714, "step": 29046 }, { "epoch": 0.5049105668445479, "grad_norm": 0.9421067696442169, "learning_rate": 5.163553652303196e-07, "loss": 0.1508, "step": 29047 }, { "epoch": 0.5049279493820508, "grad_norm": 1.6470188222433289, "learning_rate": 5.163272309314741e-07, "loss": 0.3441, "step": 29048 }, { "epoch": 0.5049453319195536, "grad_norm": 1.2529677170847657, "learning_rate": 5.162990965808787e-07, "loss": 0.2433, "step": 29049 }, { "epoch": 0.5049627144570564, "grad_norm": 1.6849546298257843, "learning_rate": 5.162709621786228e-07, "loss": 0.2256, "step": 29050 }, { "epoch": 0.5049800969945593, "grad_norm": 1.705004182977997, "learning_rate": 5.162428277247954e-07, "loss": 0.442, "step": 29051 }, { "epoch": 0.5049974795320621, "grad_norm": 1.505805781794164, "learning_rate": 5.162146932194856e-07, "loss": 0.3888, "step": 29052 }, { "epoch": 0.5050148620695649, "grad_norm": 1.134106524300745, "learning_rate": 5.161865586627826e-07, "loss": 0.2133, "step": 29053 }, { "epoch": 0.5050322446070677, "grad_norm": 1.591503473880234, "learning_rate": 5.161584240547758e-07, "loss": 0.2772, "step": 29054 }, { "epoch": 0.5050496271445706, "grad_norm": 2.3672048085240416, "learning_rate": 5.161302893955542e-07, "loss": 0.5353, "step": 29055 }, { "epoch": 0.5050670096820734, "grad_norm": 1.4416063176347882, "learning_rate": 5.16102154685207e-07, "loss": 0.2506, "step": 29056 }, { "epoch": 0.5050843922195762, "grad_norm": 1.2285072816657732, "learning_rate": 5.160740199238233e-07, "loss": 0.2335, "step": 29057 }, { "epoch": 0.5051017747570791, "grad_norm": 2.7133409967340216, "learning_rate": 5.160458851114925e-07, "loss": 0.4759, "step": 29058 }, { "epoch": 0.5051191572945819, "grad_norm": 2.6367674615748733, "learning_rate": 5.160177502483034e-07, "loss": 0.2594, "step": 29059 }, { "epoch": 0.5051365398320847, "grad_norm": 1.4837433352736948, "learning_rate": 5.159896153343456e-07, "loss": 0.2841, "step": 29060 }, { "epoch": 0.5051539223695876, "grad_norm": 1.5072183891286353, "learning_rate": 5.159614803697079e-07, "loss": 0.2629, "step": 29061 }, { "epoch": 0.5051713049070904, "grad_norm": 2.6963675477758087, "learning_rate": 5.159333453544797e-07, "loss": 0.305, "step": 29062 }, { "epoch": 0.5051886874445931, "grad_norm": 1.4475131497987943, "learning_rate": 5.159052102887501e-07, "loss": 0.3441, "step": 29063 }, { "epoch": 0.5052060699820959, "grad_norm": 1.1512593722755635, "learning_rate": 5.158770751726083e-07, "loss": 0.2453, "step": 29064 }, { "epoch": 0.5052234525195988, "grad_norm": 0.8715391786991604, "learning_rate": 5.158489400061434e-07, "loss": 0.1802, "step": 29065 }, { "epoch": 0.5052408350571016, "grad_norm": 1.3204454484737485, "learning_rate": 5.158208047894447e-07, "loss": 0.3394, "step": 29066 }, { "epoch": 0.5052582175946044, "grad_norm": 1.6137592488687411, "learning_rate": 5.157926695226015e-07, "loss": 0.2378, "step": 29067 }, { "epoch": 0.5052756001321073, "grad_norm": 1.829998210405731, "learning_rate": 5.157645342057026e-07, "loss": 0.3047, "step": 29068 }, { "epoch": 0.5052929826696101, "grad_norm": 2.0274205399982774, "learning_rate": 5.157363988388373e-07, "loss": 0.2313, "step": 29069 }, { "epoch": 0.5053103652071129, "grad_norm": 1.7238099388029517, "learning_rate": 5.15708263422095e-07, "loss": 0.1673, "step": 29070 }, { "epoch": 0.5053277477446158, "grad_norm": 5.243563479860259, "learning_rate": 5.156801279555646e-07, "loss": 0.3293, "step": 29071 }, { "epoch": 0.5053451302821186, "grad_norm": 2.080523903184277, "learning_rate": 5.156519924393355e-07, "loss": 0.178, "step": 29072 }, { "epoch": 0.5053625128196214, "grad_norm": 1.6724200451338453, "learning_rate": 5.156238568734965e-07, "loss": 0.287, "step": 29073 }, { "epoch": 0.5053798953571242, "grad_norm": 2.1107568555965046, "learning_rate": 5.155957212581374e-07, "loss": 0.3099, "step": 29074 }, { "epoch": 0.5053972778946271, "grad_norm": 1.2784187613891815, "learning_rate": 5.155675855933469e-07, "loss": 0.2401, "step": 29075 }, { "epoch": 0.5054146604321299, "grad_norm": 1.279044054738046, "learning_rate": 5.155394498792143e-07, "loss": 0.2898, "step": 29076 }, { "epoch": 0.5054320429696327, "grad_norm": 2.415918242755772, "learning_rate": 5.155113141158288e-07, "loss": 0.1896, "step": 29077 }, { "epoch": 0.5054494255071356, "grad_norm": 1.3879832601661246, "learning_rate": 5.154831783032796e-07, "loss": 0.2566, "step": 29078 }, { "epoch": 0.5054668080446384, "grad_norm": 2.037128944870446, "learning_rate": 5.154550424416557e-07, "loss": 0.5087, "step": 29079 }, { "epoch": 0.5054841905821412, "grad_norm": 1.3097008643769203, "learning_rate": 5.154269065310465e-07, "loss": 0.4668, "step": 29080 }, { "epoch": 0.505501573119644, "grad_norm": 1.8111161246198006, "learning_rate": 5.153987705715412e-07, "loss": 0.3107, "step": 29081 }, { "epoch": 0.5055189556571469, "grad_norm": 1.7100049122436338, "learning_rate": 5.153706345632287e-07, "loss": 0.2692, "step": 29082 }, { "epoch": 0.5055363381946496, "grad_norm": 2.074125540612886, "learning_rate": 5.153424985061984e-07, "loss": 0.2333, "step": 29083 }, { "epoch": 0.5055537207321524, "grad_norm": 2.7801986672642123, "learning_rate": 5.153143624005394e-07, "loss": 0.3976, "step": 29084 }, { "epoch": 0.5055711032696553, "grad_norm": 2.533422452835207, "learning_rate": 5.152862262463409e-07, "loss": 0.2517, "step": 29085 }, { "epoch": 0.5055884858071581, "grad_norm": 1.5349926326486556, "learning_rate": 5.152580900436922e-07, "loss": 0.2766, "step": 29086 }, { "epoch": 0.5056058683446609, "grad_norm": 2.096330630934324, "learning_rate": 5.152299537926823e-07, "loss": 0.2218, "step": 29087 }, { "epoch": 0.5056232508821638, "grad_norm": 1.67843136308642, "learning_rate": 5.152018174934003e-07, "loss": 0.2812, "step": 29088 }, { "epoch": 0.5056406334196666, "grad_norm": 1.9240067333836581, "learning_rate": 5.151736811459358e-07, "loss": 0.3, "step": 29089 }, { "epoch": 0.5056580159571694, "grad_norm": 2.176118838690021, "learning_rate": 5.151455447503774e-07, "loss": 0.3359, "step": 29090 }, { "epoch": 0.5056753984946722, "grad_norm": 2.229043729262326, "learning_rate": 5.151174083068148e-07, "loss": 0.2765, "step": 29091 }, { "epoch": 0.5056927810321751, "grad_norm": 2.1005342546258876, "learning_rate": 5.150892718153368e-07, "loss": 0.2959, "step": 29092 }, { "epoch": 0.5057101635696779, "grad_norm": 2.1464660064088075, "learning_rate": 5.15061135276033e-07, "loss": 0.2166, "step": 29093 }, { "epoch": 0.5057275461071807, "grad_norm": 1.5605549088150314, "learning_rate": 5.150329986889921e-07, "loss": 0.3877, "step": 29094 }, { "epoch": 0.5057449286446836, "grad_norm": 2.922718812852979, "learning_rate": 5.150048620543035e-07, "loss": 0.2884, "step": 29095 }, { "epoch": 0.5057623111821864, "grad_norm": 1.0321517701057015, "learning_rate": 5.149767253720565e-07, "loss": 0.1919, "step": 29096 }, { "epoch": 0.5057796937196892, "grad_norm": 2.434332479874753, "learning_rate": 5.149485886423401e-07, "loss": 0.2094, "step": 29097 }, { "epoch": 0.5057970762571921, "grad_norm": 1.2155240293082052, "learning_rate": 5.149204518652434e-07, "loss": 0.3771, "step": 29098 }, { "epoch": 0.5058144587946949, "grad_norm": 1.4746825802664134, "learning_rate": 5.148923150408561e-07, "loss": 0.3076, "step": 29099 }, { "epoch": 0.5058318413321977, "grad_norm": 1.4483610625625798, "learning_rate": 5.148641781692666e-07, "loss": 0.1682, "step": 29100 }, { "epoch": 0.5058492238697005, "grad_norm": 1.6593430777933844, "learning_rate": 5.148360412505646e-07, "loss": 0.1714, "step": 29101 }, { "epoch": 0.5058666064072034, "grad_norm": 1.5962120655071, "learning_rate": 5.148079042848393e-07, "loss": 0.2614, "step": 29102 }, { "epoch": 0.5058839889447061, "grad_norm": 1.7631767188229968, "learning_rate": 5.147797672721796e-07, "loss": 0.2703, "step": 29103 }, { "epoch": 0.5059013714822089, "grad_norm": 1.2664753277654313, "learning_rate": 5.147516302126748e-07, "loss": 0.1846, "step": 29104 }, { "epoch": 0.5059187540197118, "grad_norm": 2.4251887732058326, "learning_rate": 5.147234931064142e-07, "loss": 0.2825, "step": 29105 }, { "epoch": 0.5059361365572146, "grad_norm": 1.5868704658264332, "learning_rate": 5.14695355953487e-07, "loss": 0.2328, "step": 29106 }, { "epoch": 0.5059535190947174, "grad_norm": 1.3308153865774004, "learning_rate": 5.146672187539822e-07, "loss": 0.1979, "step": 29107 }, { "epoch": 0.5059709016322202, "grad_norm": 2.8711922769188156, "learning_rate": 5.14639081507989e-07, "loss": 0.5045, "step": 29108 }, { "epoch": 0.5059882841697231, "grad_norm": 1.5330854474792317, "learning_rate": 5.146109442155965e-07, "loss": 0.2472, "step": 29109 }, { "epoch": 0.5060056667072259, "grad_norm": 2.444254756904412, "learning_rate": 5.145828068768941e-07, "loss": 0.3087, "step": 29110 }, { "epoch": 0.5060230492447287, "grad_norm": 2.2480256559552645, "learning_rate": 5.145546694919711e-07, "loss": 0.2543, "step": 29111 }, { "epoch": 0.5060404317822316, "grad_norm": 1.7044487911452952, "learning_rate": 5.145265320609163e-07, "loss": 0.3636, "step": 29112 }, { "epoch": 0.5060578143197344, "grad_norm": 2.4090364451979642, "learning_rate": 5.14498394583819e-07, "loss": 0.2905, "step": 29113 }, { "epoch": 0.5060751968572372, "grad_norm": 1.9674521128670344, "learning_rate": 5.144702570607686e-07, "loss": 0.3533, "step": 29114 }, { "epoch": 0.5060925793947401, "grad_norm": 1.188847275567493, "learning_rate": 5.144421194918541e-07, "loss": 0.2269, "step": 29115 }, { "epoch": 0.5061099619322429, "grad_norm": 1.6168917008552848, "learning_rate": 5.144139818771648e-07, "loss": 0.3036, "step": 29116 }, { "epoch": 0.5061273444697457, "grad_norm": 1.7935031000734678, "learning_rate": 5.143858442167896e-07, "loss": 0.1983, "step": 29117 }, { "epoch": 0.5061447270072486, "grad_norm": 0.8892926922619905, "learning_rate": 5.143577065108181e-07, "loss": 0.2664, "step": 29118 }, { "epoch": 0.5061621095447514, "grad_norm": 1.0594082666561926, "learning_rate": 5.143295687593391e-07, "loss": 0.2432, "step": 29119 }, { "epoch": 0.5061794920822542, "grad_norm": 1.0691933401235614, "learning_rate": 5.14301430962442e-07, "loss": 0.1862, "step": 29120 }, { "epoch": 0.506196874619757, "grad_norm": 2.223639314740833, "learning_rate": 5.14273293120216e-07, "loss": 0.3459, "step": 29121 }, { "epoch": 0.5062142571572598, "grad_norm": 1.3658519047927338, "learning_rate": 5.142451552327501e-07, "loss": 0.2519, "step": 29122 }, { "epoch": 0.5062316396947626, "grad_norm": 1.244953873395419, "learning_rate": 5.142170173001335e-07, "loss": 0.3262, "step": 29123 }, { "epoch": 0.5062490222322654, "grad_norm": 1.1874701897801498, "learning_rate": 5.141888793224559e-07, "loss": 0.2317, "step": 29124 }, { "epoch": 0.5062664047697683, "grad_norm": 1.6335959547339216, "learning_rate": 5.141607412998057e-07, "loss": 0.3081, "step": 29125 }, { "epoch": 0.5062837873072711, "grad_norm": 2.60725571755364, "learning_rate": 5.141326032322726e-07, "loss": 0.3956, "step": 29126 }, { "epoch": 0.5063011698447739, "grad_norm": 1.3363992790598525, "learning_rate": 5.141044651199457e-07, "loss": 0.1862, "step": 29127 }, { "epoch": 0.5063185523822767, "grad_norm": 2.2992208545073565, "learning_rate": 5.14076326962914e-07, "loss": 0.2356, "step": 29128 }, { "epoch": 0.5063359349197796, "grad_norm": 1.1380727397803876, "learning_rate": 5.140481887612668e-07, "loss": 0.3209, "step": 29129 }, { "epoch": 0.5063533174572824, "grad_norm": 1.622399458366892, "learning_rate": 5.140200505150934e-07, "loss": 0.2911, "step": 29130 }, { "epoch": 0.5063706999947852, "grad_norm": 1.661457989498564, "learning_rate": 5.139919122244826e-07, "loss": 0.2259, "step": 29131 }, { "epoch": 0.5063880825322881, "grad_norm": 1.3193474633274032, "learning_rate": 5.139637738895243e-07, "loss": 0.2751, "step": 29132 }, { "epoch": 0.5064054650697909, "grad_norm": 2.373824356593333, "learning_rate": 5.139356355103068e-07, "loss": 0.1948, "step": 29133 }, { "epoch": 0.5064228476072937, "grad_norm": 1.3419387776075145, "learning_rate": 5.139074970869201e-07, "loss": 0.2672, "step": 29134 }, { "epoch": 0.5064402301447966, "grad_norm": 1.5660447466142335, "learning_rate": 5.138793586194528e-07, "loss": 0.148, "step": 29135 }, { "epoch": 0.5064576126822994, "grad_norm": 2.116579677571877, "learning_rate": 5.138512201079945e-07, "loss": 0.2221, "step": 29136 }, { "epoch": 0.5064749952198022, "grad_norm": 1.7157405299303559, "learning_rate": 5.138230815526341e-07, "loss": 0.2302, "step": 29137 }, { "epoch": 0.506492377757305, "grad_norm": 2.1872384609457884, "learning_rate": 5.137949429534609e-07, "loss": 0.2428, "step": 29138 }, { "epoch": 0.5065097602948079, "grad_norm": 1.902641261584541, "learning_rate": 5.137668043105639e-07, "loss": 0.2598, "step": 29139 }, { "epoch": 0.5065271428323107, "grad_norm": 1.5441535503959385, "learning_rate": 5.137386656240327e-07, "loss": 0.3352, "step": 29140 }, { "epoch": 0.5065445253698135, "grad_norm": 1.6462781325353733, "learning_rate": 5.137105268939562e-07, "loss": 0.252, "step": 29141 }, { "epoch": 0.5065619079073163, "grad_norm": 2.216794242570494, "learning_rate": 5.136823881204234e-07, "loss": 0.2924, "step": 29142 }, { "epoch": 0.5065792904448191, "grad_norm": 1.740842755616802, "learning_rate": 5.136542493035241e-07, "loss": 0.4136, "step": 29143 }, { "epoch": 0.5065966729823219, "grad_norm": 2.2060946721974153, "learning_rate": 5.136261104433467e-07, "loss": 0.2582, "step": 29144 }, { "epoch": 0.5066140555198247, "grad_norm": 1.9153570757158567, "learning_rate": 5.135979715399811e-07, "loss": 0.2573, "step": 29145 }, { "epoch": 0.5066314380573276, "grad_norm": 3.105036877607163, "learning_rate": 5.135698325935161e-07, "loss": 0.3707, "step": 29146 }, { "epoch": 0.5066488205948304, "grad_norm": 2.6454859626665783, "learning_rate": 5.135416936040409e-07, "loss": 0.292, "step": 29147 }, { "epoch": 0.5066662031323332, "grad_norm": 1.9256482522793157, "learning_rate": 5.135135545716446e-07, "loss": 0.2387, "step": 29148 }, { "epoch": 0.5066835856698361, "grad_norm": 1.4653364026323041, "learning_rate": 5.134854154964169e-07, "loss": 0.2035, "step": 29149 }, { "epoch": 0.5067009682073389, "grad_norm": 2.812141690872598, "learning_rate": 5.134572763784462e-07, "loss": 0.2722, "step": 29150 }, { "epoch": 0.5067183507448417, "grad_norm": 1.9438286980222035, "learning_rate": 5.134291372178224e-07, "loss": 0.3625, "step": 29151 }, { "epoch": 0.5067357332823446, "grad_norm": 1.0782227464696392, "learning_rate": 5.134009980146343e-07, "loss": 0.2124, "step": 29152 }, { "epoch": 0.5067531158198474, "grad_norm": 1.5755423039301204, "learning_rate": 5.133728587689714e-07, "loss": 0.1927, "step": 29153 }, { "epoch": 0.5067704983573502, "grad_norm": 1.1937024678619161, "learning_rate": 5.133447194809225e-07, "loss": 0.2307, "step": 29154 }, { "epoch": 0.506787880894853, "grad_norm": 1.5211836044618952, "learning_rate": 5.13316580150577e-07, "loss": 0.2973, "step": 29155 }, { "epoch": 0.5068052634323559, "grad_norm": 1.3876377533532769, "learning_rate": 5.13288440778024e-07, "loss": 0.2325, "step": 29156 }, { "epoch": 0.5068226459698587, "grad_norm": 1.6195251910436759, "learning_rate": 5.132603013633529e-07, "loss": 0.2995, "step": 29157 }, { "epoch": 0.5068400285073615, "grad_norm": 1.779865962676094, "learning_rate": 5.132321619066525e-07, "loss": 0.2103, "step": 29158 }, { "epoch": 0.5068574110448644, "grad_norm": 1.0688931797865282, "learning_rate": 5.132040224080125e-07, "loss": 0.2223, "step": 29159 }, { "epoch": 0.5068747935823672, "grad_norm": 1.4632693742208376, "learning_rate": 5.131758828675216e-07, "loss": 0.2664, "step": 29160 }, { "epoch": 0.50689217611987, "grad_norm": 1.6618338566215738, "learning_rate": 5.131477432852693e-07, "loss": 0.202, "step": 29161 }, { "epoch": 0.5069095586573727, "grad_norm": 1.9324911658063466, "learning_rate": 5.131196036613447e-07, "loss": 0.2399, "step": 29162 }, { "epoch": 0.5069269411948756, "grad_norm": 1.5066836110590618, "learning_rate": 5.13091463995837e-07, "loss": 0.2372, "step": 29163 }, { "epoch": 0.5069443237323784, "grad_norm": 2.2557311527182353, "learning_rate": 5.130633242888352e-07, "loss": 0.3311, "step": 29164 }, { "epoch": 0.5069617062698812, "grad_norm": 1.3833828298084574, "learning_rate": 5.130351845404288e-07, "loss": 0.2961, "step": 29165 }, { "epoch": 0.5069790888073841, "grad_norm": 1.5400553885785517, "learning_rate": 5.130070447507069e-07, "loss": 0.3077, "step": 29166 }, { "epoch": 0.5069964713448869, "grad_norm": 1.4258311628202094, "learning_rate": 5.129789049197586e-07, "loss": 0.2228, "step": 29167 }, { "epoch": 0.5070138538823897, "grad_norm": 1.2541684026471525, "learning_rate": 5.12950765047673e-07, "loss": 0.168, "step": 29168 }, { "epoch": 0.5070312364198926, "grad_norm": 1.7606072530350199, "learning_rate": 5.129226251345397e-07, "loss": 0.3358, "step": 29169 }, { "epoch": 0.5070486189573954, "grad_norm": 1.9865503392816777, "learning_rate": 5.128944851804475e-07, "loss": 0.269, "step": 29170 }, { "epoch": 0.5070660014948982, "grad_norm": 1.6959019687152628, "learning_rate": 5.128663451854856e-07, "loss": 0.2962, "step": 29171 }, { "epoch": 0.507083384032401, "grad_norm": 1.4041880513768064, "learning_rate": 5.128382051497434e-07, "loss": 0.3333, "step": 29172 }, { "epoch": 0.5071007665699039, "grad_norm": 1.440225606747224, "learning_rate": 5.128100650733098e-07, "loss": 0.2993, "step": 29173 }, { "epoch": 0.5071181491074067, "grad_norm": 1.2442863951433456, "learning_rate": 5.127819249562743e-07, "loss": 0.2679, "step": 29174 }, { "epoch": 0.5071355316449095, "grad_norm": 1.165262436458121, "learning_rate": 5.127537847987261e-07, "loss": 0.3876, "step": 29175 }, { "epoch": 0.5071529141824124, "grad_norm": 2.051759835725094, "learning_rate": 5.127256446007541e-07, "loss": 0.2591, "step": 29176 }, { "epoch": 0.5071702967199152, "grad_norm": 1.8567938373516943, "learning_rate": 5.126975043624477e-07, "loss": 0.3189, "step": 29177 }, { "epoch": 0.507187679257418, "grad_norm": 2.756443464872469, "learning_rate": 5.126693640838962e-07, "loss": 0.2353, "step": 29178 }, { "epoch": 0.5072050617949209, "grad_norm": 1.8699909836073765, "learning_rate": 5.126412237651884e-07, "loss": 0.2291, "step": 29179 }, { "epoch": 0.5072224443324237, "grad_norm": 2.0028512924786632, "learning_rate": 5.126130834064138e-07, "loss": 0.2941, "step": 29180 }, { "epoch": 0.5072398268699265, "grad_norm": 1.4531576518257387, "learning_rate": 5.125849430076615e-07, "loss": 0.3111, "step": 29181 }, { "epoch": 0.5072572094074292, "grad_norm": 1.3503836344959765, "learning_rate": 5.125568025690208e-07, "loss": 0.2023, "step": 29182 }, { "epoch": 0.5072745919449321, "grad_norm": 1.7131008370168868, "learning_rate": 5.125286620905806e-07, "loss": 0.1689, "step": 29183 }, { "epoch": 0.5072919744824349, "grad_norm": 0.9083713556513289, "learning_rate": 5.125005215724306e-07, "loss": 0.2239, "step": 29184 }, { "epoch": 0.5073093570199377, "grad_norm": 1.3331995198460584, "learning_rate": 5.124723810146595e-07, "loss": 0.1232, "step": 29185 }, { "epoch": 0.5073267395574406, "grad_norm": 1.2366875521853566, "learning_rate": 5.124442404173568e-07, "loss": 0.3517, "step": 29186 }, { "epoch": 0.5073441220949434, "grad_norm": 1.4266338323850365, "learning_rate": 5.124160997806115e-07, "loss": 0.3835, "step": 29187 }, { "epoch": 0.5073615046324462, "grad_norm": 1.404496276561377, "learning_rate": 5.123879591045129e-07, "loss": 0.3365, "step": 29188 }, { "epoch": 0.507378887169949, "grad_norm": 2.7418353398842616, "learning_rate": 5.1235981838915e-07, "loss": 0.2963, "step": 29189 }, { "epoch": 0.5073962697074519, "grad_norm": 1.895119382600286, "learning_rate": 5.123316776346123e-07, "loss": 0.3118, "step": 29190 }, { "epoch": 0.5074136522449547, "grad_norm": 1.9026486792179522, "learning_rate": 5.123035368409888e-07, "loss": 0.2858, "step": 29191 }, { "epoch": 0.5074310347824575, "grad_norm": 1.0534187996975284, "learning_rate": 5.122753960083687e-07, "loss": 0.1451, "step": 29192 }, { "epoch": 0.5074484173199604, "grad_norm": 2.714534546798066, "learning_rate": 5.122472551368413e-07, "loss": 0.4149, "step": 29193 }, { "epoch": 0.5074657998574632, "grad_norm": 2.4148104607387144, "learning_rate": 5.122191142264958e-07, "loss": 0.2841, "step": 29194 }, { "epoch": 0.507483182394966, "grad_norm": 1.3729221412142938, "learning_rate": 5.121909732774212e-07, "loss": 0.1569, "step": 29195 }, { "epoch": 0.5075005649324689, "grad_norm": 1.3253375416357531, "learning_rate": 5.121628322897069e-07, "loss": 0.2508, "step": 29196 }, { "epoch": 0.5075179474699717, "grad_norm": 1.3036416852229595, "learning_rate": 5.12134691263442e-07, "loss": 0.1762, "step": 29197 }, { "epoch": 0.5075353300074745, "grad_norm": 0.846984962804173, "learning_rate": 5.121065501987155e-07, "loss": 0.2269, "step": 29198 }, { "epoch": 0.5075527125449774, "grad_norm": 2.6988651982418443, "learning_rate": 5.120784090956171e-07, "loss": 0.2109, "step": 29199 }, { "epoch": 0.5075700950824802, "grad_norm": 1.506374916885261, "learning_rate": 5.120502679542356e-07, "loss": 0.299, "step": 29200 }, { "epoch": 0.507587477619983, "grad_norm": 1.1815503422907478, "learning_rate": 5.120221267746602e-07, "loss": 0.2671, "step": 29201 }, { "epoch": 0.5076048601574857, "grad_norm": 1.5677278212574262, "learning_rate": 5.119939855569802e-07, "loss": 0.2049, "step": 29202 }, { "epoch": 0.5076222426949886, "grad_norm": 2.0894846147783155, "learning_rate": 5.119658443012849e-07, "loss": 0.1929, "step": 29203 }, { "epoch": 0.5076396252324914, "grad_norm": 2.3029676220019435, "learning_rate": 5.119377030076631e-07, "loss": 0.3255, "step": 29204 }, { "epoch": 0.5076570077699942, "grad_norm": 1.9565445214579344, "learning_rate": 5.119095616762045e-07, "loss": 0.2511, "step": 29205 }, { "epoch": 0.5076743903074971, "grad_norm": 1.3656023814734468, "learning_rate": 5.11881420306998e-07, "loss": 0.3801, "step": 29206 }, { "epoch": 0.5076917728449999, "grad_norm": 2.8047256417993043, "learning_rate": 5.118532789001329e-07, "loss": 0.2907, "step": 29207 }, { "epoch": 0.5077091553825027, "grad_norm": 1.4665133724820836, "learning_rate": 5.118251374556983e-07, "loss": 0.2425, "step": 29208 }, { "epoch": 0.5077265379200055, "grad_norm": 1.8613604298237603, "learning_rate": 5.117969959737834e-07, "loss": 0.314, "step": 29209 }, { "epoch": 0.5077439204575084, "grad_norm": 1.6120028274926872, "learning_rate": 5.117688544544774e-07, "loss": 0.3022, "step": 29210 }, { "epoch": 0.5077613029950112, "grad_norm": 3.3334386496200095, "learning_rate": 5.117407128978697e-07, "loss": 0.4464, "step": 29211 }, { "epoch": 0.507778685532514, "grad_norm": 1.2611121744878633, "learning_rate": 5.117125713040492e-07, "loss": 0.1865, "step": 29212 }, { "epoch": 0.5077960680700169, "grad_norm": 1.246258799081584, "learning_rate": 5.116844296731054e-07, "loss": 0.1409, "step": 29213 }, { "epoch": 0.5078134506075197, "grad_norm": 1.11913948767483, "learning_rate": 5.116562880051271e-07, "loss": 0.1841, "step": 29214 }, { "epoch": 0.5078308331450225, "grad_norm": 2.478793443909114, "learning_rate": 5.116281463002039e-07, "loss": 0.2026, "step": 29215 }, { "epoch": 0.5078482156825254, "grad_norm": 1.3471303546459636, "learning_rate": 5.116000045584248e-07, "loss": 0.2337, "step": 29216 }, { "epoch": 0.5078655982200282, "grad_norm": 1.4449796329763143, "learning_rate": 5.11571862779879e-07, "loss": 0.1466, "step": 29217 }, { "epoch": 0.507882980757531, "grad_norm": 1.976449456121464, "learning_rate": 5.115437209646555e-07, "loss": 0.3122, "step": 29218 }, { "epoch": 0.5079003632950339, "grad_norm": 2.59275471870993, "learning_rate": 5.11515579112844e-07, "loss": 0.2295, "step": 29219 }, { "epoch": 0.5079177458325367, "grad_norm": 1.5355511235904902, "learning_rate": 5.114874372245332e-07, "loss": 0.2318, "step": 29220 }, { "epoch": 0.5079351283700395, "grad_norm": 2.287111525902115, "learning_rate": 5.114592952998126e-07, "loss": 0.2817, "step": 29221 }, { "epoch": 0.5079525109075422, "grad_norm": 2.5353442934413284, "learning_rate": 5.114311533387714e-07, "loss": 0.4505, "step": 29222 }, { "epoch": 0.5079698934450451, "grad_norm": 1.111726506080265, "learning_rate": 5.114030113414986e-07, "loss": 0.2095, "step": 29223 }, { "epoch": 0.5079872759825479, "grad_norm": 1.650406807978459, "learning_rate": 5.113748693080833e-07, "loss": 0.1694, "step": 29224 }, { "epoch": 0.5080046585200507, "grad_norm": 2.452974493303507, "learning_rate": 5.113467272386152e-07, "loss": 0.2951, "step": 29225 }, { "epoch": 0.5080220410575536, "grad_norm": 2.1502554552938964, "learning_rate": 5.113185851331831e-07, "loss": 0.2381, "step": 29226 }, { "epoch": 0.5080394235950564, "grad_norm": 2.1198612487544533, "learning_rate": 5.112904429918761e-07, "loss": 0.1722, "step": 29227 }, { "epoch": 0.5080568061325592, "grad_norm": 2.141433210167404, "learning_rate": 5.112623008147839e-07, "loss": 0.2299, "step": 29228 }, { "epoch": 0.508074188670062, "grad_norm": 0.8704056620559916, "learning_rate": 5.11234158601995e-07, "loss": 0.1275, "step": 29229 }, { "epoch": 0.5080915712075649, "grad_norm": 5.651750372319129, "learning_rate": 5.112060163535993e-07, "loss": 0.1444, "step": 29230 }, { "epoch": 0.5081089537450677, "grad_norm": 1.1076095582256704, "learning_rate": 5.111778740696854e-07, "loss": 0.1345, "step": 29231 }, { "epoch": 0.5081263362825705, "grad_norm": 1.9045540219983437, "learning_rate": 5.11149731750343e-07, "loss": 0.3476, "step": 29232 }, { "epoch": 0.5081437188200734, "grad_norm": 2.490450391776256, "learning_rate": 5.11121589395661e-07, "loss": 0.4044, "step": 29233 }, { "epoch": 0.5081611013575762, "grad_norm": 1.5916430096352725, "learning_rate": 5.110934470057286e-07, "loss": 0.182, "step": 29234 }, { "epoch": 0.508178483895079, "grad_norm": 1.988005631586468, "learning_rate": 5.110653045806353e-07, "loss": 0.3043, "step": 29235 }, { "epoch": 0.5081958664325819, "grad_norm": 2.1788654987516405, "learning_rate": 5.110371621204699e-07, "loss": 0.1468, "step": 29236 }, { "epoch": 0.5082132489700847, "grad_norm": 1.2978593768092643, "learning_rate": 5.110090196253216e-07, "loss": 0.3952, "step": 29237 }, { "epoch": 0.5082306315075875, "grad_norm": 1.468450403392825, "learning_rate": 5.1098087709528e-07, "loss": 0.1836, "step": 29238 }, { "epoch": 0.5082480140450903, "grad_norm": 2.6675890782911527, "learning_rate": 5.10952734530434e-07, "loss": 0.3932, "step": 29239 }, { "epoch": 0.5082653965825932, "grad_norm": 1.950161299462533, "learning_rate": 5.109245919308729e-07, "loss": 0.2896, "step": 29240 }, { "epoch": 0.508282779120096, "grad_norm": 1.2248666680706568, "learning_rate": 5.108964492966859e-07, "loss": 0.1629, "step": 29241 }, { "epoch": 0.5083001616575987, "grad_norm": 1.3867825258991298, "learning_rate": 5.10868306627962e-07, "loss": 0.2257, "step": 29242 }, { "epoch": 0.5083175441951016, "grad_norm": 2.3483667352935935, "learning_rate": 5.108401639247906e-07, "loss": 0.2023, "step": 29243 }, { "epoch": 0.5083349267326044, "grad_norm": 1.6496111456232299, "learning_rate": 5.108120211872611e-07, "loss": 0.239, "step": 29244 }, { "epoch": 0.5083523092701072, "grad_norm": 1.4566503724802344, "learning_rate": 5.107838784154622e-07, "loss": 0.1672, "step": 29245 }, { "epoch": 0.50836969180761, "grad_norm": 1.302693416489045, "learning_rate": 5.107557356094834e-07, "loss": 0.191, "step": 29246 }, { "epoch": 0.5083870743451129, "grad_norm": 1.053903462862614, "learning_rate": 5.107275927694139e-07, "loss": 0.3402, "step": 29247 }, { "epoch": 0.5084044568826157, "grad_norm": 1.2377887193172399, "learning_rate": 5.106994498953429e-07, "loss": 0.2853, "step": 29248 }, { "epoch": 0.5084218394201185, "grad_norm": 1.8796485144683757, "learning_rate": 5.106713069873592e-07, "loss": 0.1339, "step": 29249 }, { "epoch": 0.5084392219576214, "grad_norm": 1.7586807117819983, "learning_rate": 5.106431640455528e-07, "loss": 0.2359, "step": 29250 }, { "epoch": 0.5084566044951242, "grad_norm": 1.9669802052193146, "learning_rate": 5.106150210700123e-07, "loss": 0.2861, "step": 29251 }, { "epoch": 0.508473987032627, "grad_norm": 1.317094866157186, "learning_rate": 5.105868780608271e-07, "loss": 0.215, "step": 29252 }, { "epoch": 0.5084913695701299, "grad_norm": 1.7050508286017465, "learning_rate": 5.105587350180863e-07, "loss": 0.2528, "step": 29253 }, { "epoch": 0.5085087521076327, "grad_norm": 1.0706109673142958, "learning_rate": 5.105305919418792e-07, "loss": 0.1836, "step": 29254 }, { "epoch": 0.5085261346451355, "grad_norm": 1.3509336272124475, "learning_rate": 5.10502448832295e-07, "loss": 0.1965, "step": 29255 }, { "epoch": 0.5085435171826383, "grad_norm": 1.651491909768358, "learning_rate": 5.104743056894229e-07, "loss": 0.2565, "step": 29256 }, { "epoch": 0.5085608997201412, "grad_norm": 1.2427523855934832, "learning_rate": 5.104461625133519e-07, "loss": 0.3315, "step": 29257 }, { "epoch": 0.508578282257644, "grad_norm": 3.683087379832841, "learning_rate": 5.104180193041714e-07, "loss": 0.371, "step": 29258 }, { "epoch": 0.5085956647951468, "grad_norm": 0.9646775311100385, "learning_rate": 5.103898760619706e-07, "loss": 0.3023, "step": 29259 }, { "epoch": 0.5086130473326497, "grad_norm": 1.6794431567517163, "learning_rate": 5.103617327868387e-07, "loss": 0.3066, "step": 29260 }, { "epoch": 0.5086304298701524, "grad_norm": 1.5939553094107315, "learning_rate": 5.103335894788649e-07, "loss": 0.2725, "step": 29261 }, { "epoch": 0.5086478124076552, "grad_norm": 1.659583987971866, "learning_rate": 5.103054461381382e-07, "loss": 0.2938, "step": 29262 }, { "epoch": 0.508665194945158, "grad_norm": 1.5453669988947347, "learning_rate": 5.102773027647482e-07, "loss": 0.1673, "step": 29263 }, { "epoch": 0.5086825774826609, "grad_norm": 1.5862304917328591, "learning_rate": 5.102491593587837e-07, "loss": 0.174, "step": 29264 }, { "epoch": 0.5086999600201637, "grad_norm": 1.5020676784996523, "learning_rate": 5.102210159203342e-07, "loss": 0.2178, "step": 29265 }, { "epoch": 0.5087173425576665, "grad_norm": 3.0910440526514247, "learning_rate": 5.101928724494887e-07, "loss": 0.3275, "step": 29266 }, { "epoch": 0.5087347250951694, "grad_norm": 1.9735447007976767, "learning_rate": 5.101647289463364e-07, "loss": 0.26, "step": 29267 }, { "epoch": 0.5087521076326722, "grad_norm": 1.4087269075250994, "learning_rate": 5.101365854109666e-07, "loss": 0.344, "step": 29268 }, { "epoch": 0.508769490170175, "grad_norm": 1.6061247930506968, "learning_rate": 5.101084418434686e-07, "loss": 0.2913, "step": 29269 }, { "epoch": 0.5087868727076779, "grad_norm": 1.7494030903023665, "learning_rate": 5.100802982439314e-07, "loss": 0.1751, "step": 29270 }, { "epoch": 0.5088042552451807, "grad_norm": 1.754210596561605, "learning_rate": 5.100521546124443e-07, "loss": 0.1759, "step": 29271 }, { "epoch": 0.5088216377826835, "grad_norm": 2.6419280125373374, "learning_rate": 5.100240109490964e-07, "loss": 0.2618, "step": 29272 }, { "epoch": 0.5088390203201864, "grad_norm": 1.7592338066739954, "learning_rate": 5.099958672539772e-07, "loss": 0.2624, "step": 29273 }, { "epoch": 0.5088564028576892, "grad_norm": 1.348175365992965, "learning_rate": 5.099677235271755e-07, "loss": 0.1966, "step": 29274 }, { "epoch": 0.508873785395192, "grad_norm": 2.0900729402867477, "learning_rate": 5.099395797687808e-07, "loss": 0.343, "step": 29275 }, { "epoch": 0.5088911679326948, "grad_norm": 1.6443346415333457, "learning_rate": 5.099114359788822e-07, "loss": 0.2107, "step": 29276 }, { "epoch": 0.5089085504701977, "grad_norm": 1.6657132642842567, "learning_rate": 5.09883292157569e-07, "loss": 0.2893, "step": 29277 }, { "epoch": 0.5089259330077005, "grad_norm": 1.6644038952401772, "learning_rate": 5.098551483049301e-07, "loss": 0.2932, "step": 29278 }, { "epoch": 0.5089433155452033, "grad_norm": 2.185838077729093, "learning_rate": 5.098270044210549e-07, "loss": 0.1502, "step": 29279 }, { "epoch": 0.5089606980827062, "grad_norm": 3.03734114080501, "learning_rate": 5.097988605060327e-07, "loss": 0.3966, "step": 29280 }, { "epoch": 0.5089780806202089, "grad_norm": 2.364662347837453, "learning_rate": 5.097707165599527e-07, "loss": 0.2237, "step": 29281 }, { "epoch": 0.5089954631577117, "grad_norm": 2.4061261552691278, "learning_rate": 5.097425725829039e-07, "loss": 0.18, "step": 29282 }, { "epoch": 0.5090128456952145, "grad_norm": 1.6307743389650733, "learning_rate": 5.097144285749755e-07, "loss": 0.4449, "step": 29283 }, { "epoch": 0.5090302282327174, "grad_norm": 1.6018174468874742, "learning_rate": 5.096862845362571e-07, "loss": 0.2694, "step": 29284 }, { "epoch": 0.5090476107702202, "grad_norm": 1.0257065976353723, "learning_rate": 5.096581404668374e-07, "loss": 0.1866, "step": 29285 }, { "epoch": 0.509064993307723, "grad_norm": 2.2510132243418357, "learning_rate": 5.09629996366806e-07, "loss": 0.5179, "step": 29286 }, { "epoch": 0.5090823758452259, "grad_norm": 1.1722243868457092, "learning_rate": 5.096018522362518e-07, "loss": 0.1281, "step": 29287 }, { "epoch": 0.5090997583827287, "grad_norm": 1.539763888836999, "learning_rate": 5.095737080752643e-07, "loss": 0.2992, "step": 29288 }, { "epoch": 0.5091171409202315, "grad_norm": 1.9012872638274738, "learning_rate": 5.095455638839323e-07, "loss": 0.2881, "step": 29289 }, { "epoch": 0.5091345234577344, "grad_norm": 1.3797941812385321, "learning_rate": 5.095174196623454e-07, "loss": 0.2038, "step": 29290 }, { "epoch": 0.5091519059952372, "grad_norm": 1.9702558472996923, "learning_rate": 5.094892754105926e-07, "loss": 0.2743, "step": 29291 }, { "epoch": 0.50916928853274, "grad_norm": 1.5249640602634835, "learning_rate": 5.094611311287633e-07, "loss": 0.1855, "step": 29292 }, { "epoch": 0.5091866710702428, "grad_norm": 2.6736009405000227, "learning_rate": 5.094329868169464e-07, "loss": 0.2713, "step": 29293 }, { "epoch": 0.5092040536077457, "grad_norm": 1.3095165495872931, "learning_rate": 5.094048424752313e-07, "loss": 0.2793, "step": 29294 }, { "epoch": 0.5092214361452485, "grad_norm": 1.942321268117895, "learning_rate": 5.093766981037072e-07, "loss": 0.1918, "step": 29295 }, { "epoch": 0.5092388186827513, "grad_norm": 1.6338517892830888, "learning_rate": 5.093485537024634e-07, "loss": 0.2979, "step": 29296 }, { "epoch": 0.5092562012202542, "grad_norm": 1.6787192010695238, "learning_rate": 5.093204092715887e-07, "loss": 0.4167, "step": 29297 }, { "epoch": 0.509273583757757, "grad_norm": 1.4687682192789848, "learning_rate": 5.092922648111728e-07, "loss": 0.4151, "step": 29298 }, { "epoch": 0.5092909662952598, "grad_norm": 1.3857344736140786, "learning_rate": 5.092641203213045e-07, "loss": 0.2331, "step": 29299 }, { "epoch": 0.5093083488327627, "grad_norm": 1.9609875987714833, "learning_rate": 5.092359758020734e-07, "loss": 0.2494, "step": 29300 }, { "epoch": 0.5093257313702654, "grad_norm": 1.6263963961860344, "learning_rate": 5.092078312535684e-07, "loss": 0.2029, "step": 29301 }, { "epoch": 0.5093431139077682, "grad_norm": 1.208337298332584, "learning_rate": 5.091796866758789e-07, "loss": 0.1977, "step": 29302 }, { "epoch": 0.509360496445271, "grad_norm": 2.51379188444979, "learning_rate": 5.091515420690939e-07, "loss": 0.1915, "step": 29303 }, { "epoch": 0.5093778789827739, "grad_norm": 1.4844947428123545, "learning_rate": 5.091233974333028e-07, "loss": 0.2521, "step": 29304 }, { "epoch": 0.5093952615202767, "grad_norm": 1.4423385882474984, "learning_rate": 5.090952527685946e-07, "loss": 0.2865, "step": 29305 }, { "epoch": 0.5094126440577795, "grad_norm": 1.4166150416544434, "learning_rate": 5.090671080750586e-07, "loss": 0.2205, "step": 29306 }, { "epoch": 0.5094300265952824, "grad_norm": 1.7209618704423317, "learning_rate": 5.090389633527841e-07, "loss": 0.1925, "step": 29307 }, { "epoch": 0.5094474091327852, "grad_norm": 1.0784722623338903, "learning_rate": 5.090108186018603e-07, "loss": 0.2548, "step": 29308 }, { "epoch": 0.509464791670288, "grad_norm": 1.2249584233069348, "learning_rate": 5.089826738223763e-07, "loss": 0.2257, "step": 29309 }, { "epoch": 0.5094821742077908, "grad_norm": 2.5631993907362207, "learning_rate": 5.089545290144214e-07, "loss": 0.3862, "step": 29310 }, { "epoch": 0.5094995567452937, "grad_norm": 1.6808033782592746, "learning_rate": 5.089263841780847e-07, "loss": 0.2141, "step": 29311 }, { "epoch": 0.5095169392827965, "grad_norm": 1.8697243857205474, "learning_rate": 5.088982393134554e-07, "loss": 0.1614, "step": 29312 }, { "epoch": 0.5095343218202993, "grad_norm": 1.39161425390274, "learning_rate": 5.088700944206227e-07, "loss": 0.2765, "step": 29313 }, { "epoch": 0.5095517043578022, "grad_norm": 4.3172126742162344, "learning_rate": 5.088419494996762e-07, "loss": 0.2677, "step": 29314 }, { "epoch": 0.509569086895305, "grad_norm": 1.1777543510236825, "learning_rate": 5.088138045507045e-07, "loss": 0.2496, "step": 29315 }, { "epoch": 0.5095864694328078, "grad_norm": 1.5547130195833632, "learning_rate": 5.087856595737972e-07, "loss": 0.2039, "step": 29316 }, { "epoch": 0.5096038519703107, "grad_norm": 1.109357093981899, "learning_rate": 5.087575145690434e-07, "loss": 0.1733, "step": 29317 }, { "epoch": 0.5096212345078135, "grad_norm": 1.311764758702359, "learning_rate": 5.087293695365323e-07, "loss": 0.3116, "step": 29318 }, { "epoch": 0.5096386170453163, "grad_norm": 2.4626300112529607, "learning_rate": 5.087012244763531e-07, "loss": 0.3392, "step": 29319 }, { "epoch": 0.5096559995828192, "grad_norm": 1.8900652464608807, "learning_rate": 5.086730793885951e-07, "loss": 0.2284, "step": 29320 }, { "epoch": 0.5096733821203219, "grad_norm": 2.730196595178322, "learning_rate": 5.086449342733473e-07, "loss": 0.2366, "step": 29321 }, { "epoch": 0.5096907646578247, "grad_norm": 1.2051720750800734, "learning_rate": 5.08616789130699e-07, "loss": 0.3147, "step": 29322 }, { "epoch": 0.5097081471953275, "grad_norm": 2.2716245499445824, "learning_rate": 5.085886439607395e-07, "loss": 0.2345, "step": 29323 }, { "epoch": 0.5097255297328304, "grad_norm": 1.3935227041723124, "learning_rate": 5.085604987635579e-07, "loss": 0.1579, "step": 29324 }, { "epoch": 0.5097429122703332, "grad_norm": 1.9949601687015566, "learning_rate": 5.085323535392435e-07, "loss": 0.1642, "step": 29325 }, { "epoch": 0.509760294807836, "grad_norm": 1.2511070775696642, "learning_rate": 5.085042082878855e-07, "loss": 0.1851, "step": 29326 }, { "epoch": 0.5097776773453389, "grad_norm": 1.5661238085714952, "learning_rate": 5.08476063009573e-07, "loss": 0.1823, "step": 29327 }, { "epoch": 0.5097950598828417, "grad_norm": 1.0637861181165102, "learning_rate": 5.084479177043952e-07, "loss": 0.2087, "step": 29328 }, { "epoch": 0.5098124424203445, "grad_norm": 2.4486914899335255, "learning_rate": 5.084197723724416e-07, "loss": 0.3277, "step": 29329 }, { "epoch": 0.5098298249578473, "grad_norm": 1.1609835931559702, "learning_rate": 5.08391627013801e-07, "loss": 0.3271, "step": 29330 }, { "epoch": 0.5098472074953502, "grad_norm": 1.7863171644372087, "learning_rate": 5.083634816285629e-07, "loss": 0.2319, "step": 29331 }, { "epoch": 0.509864590032853, "grad_norm": 1.9607756440217343, "learning_rate": 5.083353362168162e-07, "loss": 0.3477, "step": 29332 }, { "epoch": 0.5098819725703558, "grad_norm": 2.365012459774717, "learning_rate": 5.083071907786507e-07, "loss": 0.3561, "step": 29333 }, { "epoch": 0.5098993551078587, "grad_norm": 1.4540781260233033, "learning_rate": 5.082790453141549e-07, "loss": 0.1843, "step": 29334 }, { "epoch": 0.5099167376453615, "grad_norm": 2.3772739481317497, "learning_rate": 5.082508998234184e-07, "loss": 0.2852, "step": 29335 }, { "epoch": 0.5099341201828643, "grad_norm": 1.8088333693563372, "learning_rate": 5.082227543065303e-07, "loss": 0.2751, "step": 29336 }, { "epoch": 0.5099515027203672, "grad_norm": 2.566804075164566, "learning_rate": 5.0819460876358e-07, "loss": 0.2719, "step": 29337 }, { "epoch": 0.50996888525787, "grad_norm": 1.5525313735501152, "learning_rate": 5.081664631946564e-07, "loss": 0.2186, "step": 29338 }, { "epoch": 0.5099862677953728, "grad_norm": 1.145855239028597, "learning_rate": 5.08138317599849e-07, "loss": 0.2222, "step": 29339 }, { "epoch": 0.5100036503328756, "grad_norm": 1.3998156485706288, "learning_rate": 5.081101719792467e-07, "loss": 0.238, "step": 29340 }, { "epoch": 0.5100210328703784, "grad_norm": 1.7987568878670015, "learning_rate": 5.080820263329391e-07, "loss": 0.2381, "step": 29341 }, { "epoch": 0.5100384154078812, "grad_norm": 1.8369341755747197, "learning_rate": 5.08053880661015e-07, "loss": 0.4867, "step": 29342 }, { "epoch": 0.510055797945384, "grad_norm": 1.9332192781517414, "learning_rate": 5.080257349635638e-07, "loss": 0.2509, "step": 29343 }, { "epoch": 0.5100731804828869, "grad_norm": 1.5298035948493505, "learning_rate": 5.079975892406749e-07, "loss": 0.2733, "step": 29344 }, { "epoch": 0.5100905630203897, "grad_norm": 1.3648508413553035, "learning_rate": 5.079694434924372e-07, "loss": 0.2905, "step": 29345 }, { "epoch": 0.5101079455578925, "grad_norm": 1.3006191061141092, "learning_rate": 5.0794129771894e-07, "loss": 0.4175, "step": 29346 }, { "epoch": 0.5101253280953953, "grad_norm": 1.1699458822775621, "learning_rate": 5.079131519202725e-07, "loss": 0.2432, "step": 29347 }, { "epoch": 0.5101427106328982, "grad_norm": 1.268424800164492, "learning_rate": 5.07885006096524e-07, "loss": 0.1887, "step": 29348 }, { "epoch": 0.510160093170401, "grad_norm": 2.2565030566804882, "learning_rate": 5.078568602477836e-07, "loss": 0.3252, "step": 29349 }, { "epoch": 0.5101774757079038, "grad_norm": 1.839946343189043, "learning_rate": 5.078287143741406e-07, "loss": 0.2777, "step": 29350 }, { "epoch": 0.5101948582454067, "grad_norm": 1.3722139459102312, "learning_rate": 5.078005684756842e-07, "loss": 0.2676, "step": 29351 }, { "epoch": 0.5102122407829095, "grad_norm": 1.9922536646718547, "learning_rate": 5.077724225525037e-07, "loss": 0.3348, "step": 29352 }, { "epoch": 0.5102296233204123, "grad_norm": 1.2683706093918132, "learning_rate": 5.077442766046878e-07, "loss": 0.3838, "step": 29353 }, { "epoch": 0.5102470058579152, "grad_norm": 1.5328903141527592, "learning_rate": 5.077161306323265e-07, "loss": 0.2124, "step": 29354 }, { "epoch": 0.510264388395418, "grad_norm": 1.9220817897022355, "learning_rate": 5.076879846355084e-07, "loss": 0.2807, "step": 29355 }, { "epoch": 0.5102817709329208, "grad_norm": 1.5159821447801953, "learning_rate": 5.076598386143231e-07, "loss": 0.4996, "step": 29356 }, { "epoch": 0.5102991534704236, "grad_norm": 1.8517897531609528, "learning_rate": 5.076316925688594e-07, "loss": 0.2977, "step": 29357 }, { "epoch": 0.5103165360079265, "grad_norm": 1.2571059677462806, "learning_rate": 5.07603546499207e-07, "loss": 0.2149, "step": 29358 }, { "epoch": 0.5103339185454293, "grad_norm": 1.1416944204252817, "learning_rate": 5.075754004054547e-07, "loss": 0.2494, "step": 29359 }, { "epoch": 0.5103513010829321, "grad_norm": 1.6235671825357312, "learning_rate": 5.075472542876918e-07, "loss": 0.2448, "step": 29360 }, { "epoch": 0.5103686836204349, "grad_norm": 1.3324461135100976, "learning_rate": 5.075191081460076e-07, "loss": 0.2321, "step": 29361 }, { "epoch": 0.5103860661579377, "grad_norm": 1.920110966092791, "learning_rate": 5.074909619804914e-07, "loss": 0.1916, "step": 29362 }, { "epoch": 0.5104034486954405, "grad_norm": 1.5041463155020645, "learning_rate": 5.07462815791232e-07, "loss": 0.1645, "step": 29363 }, { "epoch": 0.5104208312329434, "grad_norm": 1.1594981236749233, "learning_rate": 5.074346695783193e-07, "loss": 0.1735, "step": 29364 }, { "epoch": 0.5104382137704462, "grad_norm": 1.867325361780696, "learning_rate": 5.074065233418419e-07, "loss": 0.2075, "step": 29365 }, { "epoch": 0.510455596307949, "grad_norm": 1.145472202357925, "learning_rate": 5.073783770818893e-07, "loss": 0.2251, "step": 29366 }, { "epoch": 0.5104729788454518, "grad_norm": 2.1404241624136944, "learning_rate": 5.073502307985506e-07, "loss": 0.2513, "step": 29367 }, { "epoch": 0.5104903613829547, "grad_norm": 2.026988435621227, "learning_rate": 5.07322084491915e-07, "loss": 0.2962, "step": 29368 }, { "epoch": 0.5105077439204575, "grad_norm": 1.7861210429354843, "learning_rate": 5.072939381620716e-07, "loss": 0.3213, "step": 29369 }, { "epoch": 0.5105251264579603, "grad_norm": 3.841557662835426, "learning_rate": 5.0726579180911e-07, "loss": 0.1982, "step": 29370 }, { "epoch": 0.5105425089954632, "grad_norm": 1.8988266115959793, "learning_rate": 5.072376454331192e-07, "loss": 0.2427, "step": 29371 }, { "epoch": 0.510559891532966, "grad_norm": 0.9782333627148515, "learning_rate": 5.072094990341882e-07, "loss": 0.2032, "step": 29372 }, { "epoch": 0.5105772740704688, "grad_norm": 3.126470841611029, "learning_rate": 5.071813526124066e-07, "loss": 0.3717, "step": 29373 }, { "epoch": 0.5105946566079717, "grad_norm": 2.2365230573677994, "learning_rate": 5.071532061678633e-07, "loss": 0.1867, "step": 29374 }, { "epoch": 0.5106120391454745, "grad_norm": 1.7417123844788045, "learning_rate": 5.071250597006477e-07, "loss": 0.2435, "step": 29375 }, { "epoch": 0.5106294216829773, "grad_norm": 1.1038461815832135, "learning_rate": 5.070969132108488e-07, "loss": 0.1788, "step": 29376 }, { "epoch": 0.5106468042204801, "grad_norm": 2.0540763580062853, "learning_rate": 5.070687666985561e-07, "loss": 0.2838, "step": 29377 }, { "epoch": 0.510664186757983, "grad_norm": 3.2104444153714238, "learning_rate": 5.070406201638585e-07, "loss": 0.2987, "step": 29378 }, { "epoch": 0.5106815692954858, "grad_norm": 2.9903241418170627, "learning_rate": 5.070124736068455e-07, "loss": 0.4396, "step": 29379 }, { "epoch": 0.5106989518329886, "grad_norm": 1.3343339212495433, "learning_rate": 5.06984327027606e-07, "loss": 0.2043, "step": 29380 }, { "epoch": 0.5107163343704914, "grad_norm": 1.7356907792522842, "learning_rate": 5.069561804262297e-07, "loss": 0.2203, "step": 29381 }, { "epoch": 0.5107337169079942, "grad_norm": 1.2100802335068865, "learning_rate": 5.069280338028052e-07, "loss": 0.1643, "step": 29382 }, { "epoch": 0.510751099445497, "grad_norm": 1.6866464508388832, "learning_rate": 5.068998871574222e-07, "loss": 0.1779, "step": 29383 }, { "epoch": 0.5107684819829998, "grad_norm": 3.710796625052539, "learning_rate": 5.068717404901697e-07, "loss": 0.2626, "step": 29384 }, { "epoch": 0.5107858645205027, "grad_norm": 1.28638820113019, "learning_rate": 5.068435938011368e-07, "loss": 0.1999, "step": 29385 }, { "epoch": 0.5108032470580055, "grad_norm": 0.9136432975534544, "learning_rate": 5.06815447090413e-07, "loss": 0.2346, "step": 29386 }, { "epoch": 0.5108206295955083, "grad_norm": 1.4913772165749486, "learning_rate": 5.067873003580874e-07, "loss": 0.2103, "step": 29387 }, { "epoch": 0.5108380121330112, "grad_norm": 2.08537950619159, "learning_rate": 5.067591536042491e-07, "loss": 0.3654, "step": 29388 }, { "epoch": 0.510855394670514, "grad_norm": 2.091743368804147, "learning_rate": 5.067310068289874e-07, "loss": 0.2249, "step": 29389 }, { "epoch": 0.5108727772080168, "grad_norm": 2.189194560719618, "learning_rate": 5.067028600323913e-07, "loss": 0.2124, "step": 29390 }, { "epoch": 0.5108901597455197, "grad_norm": 1.1573501174500191, "learning_rate": 5.066747132145506e-07, "loss": 0.2908, "step": 29391 }, { "epoch": 0.5109075422830225, "grad_norm": 1.4469231489607683, "learning_rate": 5.066465663755539e-07, "loss": 0.2497, "step": 29392 }, { "epoch": 0.5109249248205253, "grad_norm": 1.367960571620894, "learning_rate": 5.066184195154906e-07, "loss": 0.353, "step": 29393 }, { "epoch": 0.5109423073580281, "grad_norm": 1.5543467756653937, "learning_rate": 5.065902726344499e-07, "loss": 0.1847, "step": 29394 }, { "epoch": 0.510959689895531, "grad_norm": 1.743171185467377, "learning_rate": 5.065621257325213e-07, "loss": 0.3174, "step": 29395 }, { "epoch": 0.5109770724330338, "grad_norm": 1.6299477433538585, "learning_rate": 5.065339788097936e-07, "loss": 0.2148, "step": 29396 }, { "epoch": 0.5109944549705366, "grad_norm": 1.9438451755749413, "learning_rate": 5.065058318663563e-07, "loss": 0.2418, "step": 29397 }, { "epoch": 0.5110118375080395, "grad_norm": 1.6479613029402211, "learning_rate": 5.064776849022983e-07, "loss": 0.2873, "step": 29398 }, { "epoch": 0.5110292200455423, "grad_norm": 2.313528600862567, "learning_rate": 5.064495379177093e-07, "loss": 0.3537, "step": 29399 }, { "epoch": 0.511046602583045, "grad_norm": 2.8344502467382537, "learning_rate": 5.064213909126781e-07, "loss": 0.3436, "step": 29400 }, { "epoch": 0.5110639851205478, "grad_norm": 2.124598694445666, "learning_rate": 5.06393243887294e-07, "loss": 0.2305, "step": 29401 }, { "epoch": 0.5110813676580507, "grad_norm": 1.6802419414798522, "learning_rate": 5.063650968416463e-07, "loss": 0.202, "step": 29402 }, { "epoch": 0.5110987501955535, "grad_norm": 1.7668832246169226, "learning_rate": 5.063369497758241e-07, "loss": 0.2855, "step": 29403 }, { "epoch": 0.5111161327330563, "grad_norm": 2.4500583152710496, "learning_rate": 5.063088026899167e-07, "loss": 0.2736, "step": 29404 }, { "epoch": 0.5111335152705592, "grad_norm": 2.1906959792419585, "learning_rate": 5.062806555840134e-07, "loss": 0.2437, "step": 29405 }, { "epoch": 0.511150897808062, "grad_norm": 1.3383787415959991, "learning_rate": 5.062525084582032e-07, "loss": 0.3905, "step": 29406 }, { "epoch": 0.5111682803455648, "grad_norm": 1.4526612540510828, "learning_rate": 5.062243613125753e-07, "loss": 0.1546, "step": 29407 }, { "epoch": 0.5111856628830677, "grad_norm": 2.0499112933914314, "learning_rate": 5.061962141472194e-07, "loss": 0.213, "step": 29408 }, { "epoch": 0.5112030454205705, "grad_norm": 1.3883475788062334, "learning_rate": 5.06168066962224e-07, "loss": 0.4536, "step": 29409 }, { "epoch": 0.5112204279580733, "grad_norm": 1.3905967058282267, "learning_rate": 5.061399197576788e-07, "loss": 0.3069, "step": 29410 }, { "epoch": 0.5112378104955762, "grad_norm": 2.1219389183902906, "learning_rate": 5.061117725336727e-07, "loss": 0.3218, "step": 29411 }, { "epoch": 0.511255193033079, "grad_norm": 2.6567415812795923, "learning_rate": 5.060836252902955e-07, "loss": 0.2892, "step": 29412 }, { "epoch": 0.5112725755705818, "grad_norm": 1.6978303241509243, "learning_rate": 5.060554780276356e-07, "loss": 0.2303, "step": 29413 }, { "epoch": 0.5112899581080846, "grad_norm": 1.3594647030613989, "learning_rate": 5.060273307457829e-07, "loss": 0.3203, "step": 29414 }, { "epoch": 0.5113073406455875, "grad_norm": 2.3514080041998304, "learning_rate": 5.059991834448261e-07, "loss": 0.2332, "step": 29415 }, { "epoch": 0.5113247231830903, "grad_norm": 1.4049644509638013, "learning_rate": 5.059710361248548e-07, "loss": 0.2263, "step": 29416 }, { "epoch": 0.5113421057205931, "grad_norm": 1.3972970116500492, "learning_rate": 5.059428887859579e-07, "loss": 0.2019, "step": 29417 }, { "epoch": 0.511359488258096, "grad_norm": 1.5979997155667571, "learning_rate": 5.05914741428225e-07, "loss": 0.3283, "step": 29418 }, { "epoch": 0.5113768707955988, "grad_norm": 2.3866923714243864, "learning_rate": 5.058865940517448e-07, "loss": 0.2221, "step": 29419 }, { "epoch": 0.5113942533331015, "grad_norm": 1.691953824708455, "learning_rate": 5.058584466566071e-07, "loss": 0.3073, "step": 29420 }, { "epoch": 0.5114116358706043, "grad_norm": 3.412327335148728, "learning_rate": 5.058302992429007e-07, "loss": 0.3629, "step": 29421 }, { "epoch": 0.5114290184081072, "grad_norm": 1.2396819650621334, "learning_rate": 5.058021518107149e-07, "loss": 0.2605, "step": 29422 }, { "epoch": 0.51144640094561, "grad_norm": 1.521062073468625, "learning_rate": 5.057740043601388e-07, "loss": 0.2279, "step": 29423 }, { "epoch": 0.5114637834831128, "grad_norm": 2.1235582867419684, "learning_rate": 5.05745856891262e-07, "loss": 0.4523, "step": 29424 }, { "epoch": 0.5114811660206157, "grad_norm": 3.466484395399217, "learning_rate": 5.057177094041734e-07, "loss": 0.2239, "step": 29425 }, { "epoch": 0.5114985485581185, "grad_norm": 1.3081209189727487, "learning_rate": 5.056895618989622e-07, "loss": 0.2915, "step": 29426 }, { "epoch": 0.5115159310956213, "grad_norm": 2.423839531147731, "learning_rate": 5.056614143757179e-07, "loss": 0.2784, "step": 29427 }, { "epoch": 0.5115333136331242, "grad_norm": 1.8912367838562456, "learning_rate": 5.056332668345292e-07, "loss": 0.3545, "step": 29428 }, { "epoch": 0.511550696170627, "grad_norm": 1.982955844812248, "learning_rate": 5.056051192754858e-07, "loss": 0.3829, "step": 29429 }, { "epoch": 0.5115680787081298, "grad_norm": 2.3378548787944675, "learning_rate": 5.055769716986769e-07, "loss": 0.42, "step": 29430 }, { "epoch": 0.5115854612456326, "grad_norm": 2.4442309542628893, "learning_rate": 5.055488241041915e-07, "loss": 0.4099, "step": 29431 }, { "epoch": 0.5116028437831355, "grad_norm": 1.98655258854138, "learning_rate": 5.055206764921187e-07, "loss": 0.2886, "step": 29432 }, { "epoch": 0.5116202263206383, "grad_norm": 1.157216658458861, "learning_rate": 5.05492528862548e-07, "loss": 0.2376, "step": 29433 }, { "epoch": 0.5116376088581411, "grad_norm": 1.5617349718945777, "learning_rate": 5.054643812155685e-07, "loss": 0.4443, "step": 29434 }, { "epoch": 0.511654991395644, "grad_norm": 1.0165402969051742, "learning_rate": 5.054362335512695e-07, "loss": 0.3053, "step": 29435 }, { "epoch": 0.5116723739331468, "grad_norm": 1.1204835716609431, "learning_rate": 5.0540808586974e-07, "loss": 0.2308, "step": 29436 }, { "epoch": 0.5116897564706496, "grad_norm": 1.1677380467579503, "learning_rate": 5.053799381710695e-07, "loss": 0.2197, "step": 29437 }, { "epoch": 0.5117071390081525, "grad_norm": 2.3559232544169086, "learning_rate": 5.053517904553469e-07, "loss": 0.5984, "step": 29438 }, { "epoch": 0.5117245215456553, "grad_norm": 1.2645735429685918, "learning_rate": 5.053236427226617e-07, "loss": 0.2012, "step": 29439 }, { "epoch": 0.511741904083158, "grad_norm": 2.1589482447994013, "learning_rate": 5.05295494973103e-07, "loss": 0.2565, "step": 29440 }, { "epoch": 0.5117592866206608, "grad_norm": 1.3699605226818161, "learning_rate": 5.052673472067599e-07, "loss": 0.4666, "step": 29441 }, { "epoch": 0.5117766691581637, "grad_norm": 1.4922075923925329, "learning_rate": 5.052391994237217e-07, "loss": 0.2789, "step": 29442 }, { "epoch": 0.5117940516956665, "grad_norm": 1.6945621893624228, "learning_rate": 5.05211051624078e-07, "loss": 0.1795, "step": 29443 }, { "epoch": 0.5118114342331693, "grad_norm": 1.8956699712561746, "learning_rate": 5.051829038079173e-07, "loss": 0.2315, "step": 29444 }, { "epoch": 0.5118288167706722, "grad_norm": 1.024145623359146, "learning_rate": 5.051547559753293e-07, "loss": 0.229, "step": 29445 }, { "epoch": 0.511846199308175, "grad_norm": 1.423176769215731, "learning_rate": 5.05126608126403e-07, "loss": 0.2187, "step": 29446 }, { "epoch": 0.5118635818456778, "grad_norm": 1.5885495765806494, "learning_rate": 5.050984602612279e-07, "loss": 0.2993, "step": 29447 }, { "epoch": 0.5118809643831806, "grad_norm": 1.6968392473527352, "learning_rate": 5.050703123798928e-07, "loss": 0.2912, "step": 29448 }, { "epoch": 0.5118983469206835, "grad_norm": 2.0325444021754824, "learning_rate": 5.050421644824874e-07, "loss": 0.3621, "step": 29449 }, { "epoch": 0.5119157294581863, "grad_norm": 1.39598604998683, "learning_rate": 5.050140165691003e-07, "loss": 0.2192, "step": 29450 }, { "epoch": 0.5119331119956891, "grad_norm": 1.2748199122933197, "learning_rate": 5.049858686398214e-07, "loss": 0.2923, "step": 29451 }, { "epoch": 0.511950494533192, "grad_norm": 2.1297087166337056, "learning_rate": 5.049577206947394e-07, "loss": 0.1806, "step": 29452 }, { "epoch": 0.5119678770706948, "grad_norm": 1.3493976487883708, "learning_rate": 5.049295727339438e-07, "loss": 0.3005, "step": 29453 }, { "epoch": 0.5119852596081976, "grad_norm": 2.2038465041845074, "learning_rate": 5.049014247575236e-07, "loss": 0.2373, "step": 29454 }, { "epoch": 0.5120026421457005, "grad_norm": 1.2068544573138955, "learning_rate": 5.048732767655682e-07, "loss": 0.1833, "step": 29455 }, { "epoch": 0.5120200246832033, "grad_norm": 1.4025157771337118, "learning_rate": 5.048451287581668e-07, "loss": 0.1601, "step": 29456 }, { "epoch": 0.5120374072207061, "grad_norm": 0.9356850782141165, "learning_rate": 5.048169807354086e-07, "loss": 0.2068, "step": 29457 }, { "epoch": 0.512054789758209, "grad_norm": 1.316789489331231, "learning_rate": 5.047888326973826e-07, "loss": 0.2343, "step": 29458 }, { "epoch": 0.5120721722957118, "grad_norm": 2.1281781072660406, "learning_rate": 5.047606846441784e-07, "loss": 0.25, "step": 29459 }, { "epoch": 0.5120895548332145, "grad_norm": 1.4916259461224197, "learning_rate": 5.04732536575885e-07, "loss": 0.2769, "step": 29460 }, { "epoch": 0.5121069373707173, "grad_norm": 1.165639399686519, "learning_rate": 5.047043884925915e-07, "loss": 0.2273, "step": 29461 }, { "epoch": 0.5121243199082202, "grad_norm": 1.8234446301789426, "learning_rate": 5.046762403943873e-07, "loss": 0.3314, "step": 29462 }, { "epoch": 0.512141702445723, "grad_norm": 1.1847875230405978, "learning_rate": 5.046480922813616e-07, "loss": 0.233, "step": 29463 }, { "epoch": 0.5121590849832258, "grad_norm": 2.104060597586672, "learning_rate": 5.046199441536035e-07, "loss": 0.3374, "step": 29464 }, { "epoch": 0.5121764675207287, "grad_norm": 1.029857002420712, "learning_rate": 5.045917960112024e-07, "loss": 0.2904, "step": 29465 }, { "epoch": 0.5121938500582315, "grad_norm": 1.4335732146518223, "learning_rate": 5.045636478542474e-07, "loss": 0.1409, "step": 29466 }, { "epoch": 0.5122112325957343, "grad_norm": 1.0888644375292544, "learning_rate": 5.045354996828277e-07, "loss": 0.1374, "step": 29467 }, { "epoch": 0.5122286151332371, "grad_norm": 2.289618951083894, "learning_rate": 5.045073514970327e-07, "loss": 0.4289, "step": 29468 }, { "epoch": 0.51224599767074, "grad_norm": 2.308441609571229, "learning_rate": 5.044792032969512e-07, "loss": 0.2624, "step": 29469 }, { "epoch": 0.5122633802082428, "grad_norm": 1.7979239567754888, "learning_rate": 5.044510550826729e-07, "loss": 0.3678, "step": 29470 }, { "epoch": 0.5122807627457456, "grad_norm": 1.4878782882885575, "learning_rate": 5.044229068542866e-07, "loss": 0.3727, "step": 29471 }, { "epoch": 0.5122981452832485, "grad_norm": 2.1404226652199525, "learning_rate": 5.043947586118819e-07, "loss": 0.2801, "step": 29472 }, { "epoch": 0.5123155278207513, "grad_norm": 1.4168270961233305, "learning_rate": 5.043666103555477e-07, "loss": 0.5075, "step": 29473 }, { "epoch": 0.5123329103582541, "grad_norm": 2.2104042158338326, "learning_rate": 5.043384620853735e-07, "loss": 0.2926, "step": 29474 }, { "epoch": 0.512350292895757, "grad_norm": 1.4526986650889253, "learning_rate": 5.043103138014483e-07, "loss": 0.2225, "step": 29475 }, { "epoch": 0.5123676754332598, "grad_norm": 2.7539985983388684, "learning_rate": 5.042821655038613e-07, "loss": 0.4373, "step": 29476 }, { "epoch": 0.5123850579707626, "grad_norm": 2.018628624716166, "learning_rate": 5.042540171927017e-07, "loss": 0.245, "step": 29477 }, { "epoch": 0.5124024405082654, "grad_norm": 1.8221673708891886, "learning_rate": 5.042258688680592e-07, "loss": 0.2541, "step": 29478 }, { "epoch": 0.5124198230457683, "grad_norm": 1.5779839874320791, "learning_rate": 5.041977205300224e-07, "loss": 0.2393, "step": 29479 }, { "epoch": 0.512437205583271, "grad_norm": 8.467252801423536, "learning_rate": 5.041695721786807e-07, "loss": 0.3317, "step": 29480 }, { "epoch": 0.5124545881207738, "grad_norm": 3.0126029458449777, "learning_rate": 5.041414238141235e-07, "loss": 0.3404, "step": 29481 }, { "epoch": 0.5124719706582767, "grad_norm": 1.586946165156064, "learning_rate": 5.041132754364399e-07, "loss": 0.342, "step": 29482 }, { "epoch": 0.5124893531957795, "grad_norm": 1.5156781123554632, "learning_rate": 5.040851270457189e-07, "loss": 0.256, "step": 29483 }, { "epoch": 0.5125067357332823, "grad_norm": 2.1065219488953195, "learning_rate": 5.040569786420502e-07, "loss": 0.3912, "step": 29484 }, { "epoch": 0.5125241182707851, "grad_norm": 3.215694445172965, "learning_rate": 5.040288302255226e-07, "loss": 0.3068, "step": 29485 }, { "epoch": 0.512541500808288, "grad_norm": 2.080652245230866, "learning_rate": 5.040006817962255e-07, "loss": 0.3751, "step": 29486 }, { "epoch": 0.5125588833457908, "grad_norm": 1.9424915746109832, "learning_rate": 5.039725333542481e-07, "loss": 0.3857, "step": 29487 }, { "epoch": 0.5125762658832936, "grad_norm": 2.233150536655361, "learning_rate": 5.039443848996793e-07, "loss": 0.1603, "step": 29488 }, { "epoch": 0.5125936484207965, "grad_norm": 1.450569694587939, "learning_rate": 5.039162364326089e-07, "loss": 0.3693, "step": 29489 }, { "epoch": 0.5126110309582993, "grad_norm": 1.9525037961172518, "learning_rate": 5.038880879531259e-07, "loss": 0.2089, "step": 29490 }, { "epoch": 0.5126284134958021, "grad_norm": 1.7520267411661337, "learning_rate": 5.038599394613193e-07, "loss": 0.2359, "step": 29491 }, { "epoch": 0.512645796033305, "grad_norm": 1.4223805299534187, "learning_rate": 5.038317909572785e-07, "loss": 0.3811, "step": 29492 }, { "epoch": 0.5126631785708078, "grad_norm": 3.956792894253957, "learning_rate": 5.038036424410926e-07, "loss": 0.3616, "step": 29493 }, { "epoch": 0.5126805611083106, "grad_norm": 1.5734054240646045, "learning_rate": 5.03775493912851e-07, "loss": 0.2414, "step": 29494 }, { "epoch": 0.5126979436458134, "grad_norm": 1.3880357154595124, "learning_rate": 5.037473453726429e-07, "loss": 0.2336, "step": 29495 }, { "epoch": 0.5127153261833163, "grad_norm": 1.2694023745365992, "learning_rate": 5.037191968205571e-07, "loss": 0.2045, "step": 29496 }, { "epoch": 0.5127327087208191, "grad_norm": 1.3853103444928778, "learning_rate": 5.036910482566837e-07, "loss": 0.4076, "step": 29497 }, { "epoch": 0.5127500912583219, "grad_norm": 2.5424785622156953, "learning_rate": 5.036628996811109e-07, "loss": 0.2232, "step": 29498 }, { "epoch": 0.5127674737958248, "grad_norm": 1.006432087342187, "learning_rate": 5.036347510939286e-07, "loss": 0.3297, "step": 29499 }, { "epoch": 0.5127848563333275, "grad_norm": 1.5261290330666566, "learning_rate": 5.036066024952258e-07, "loss": 0.2176, "step": 29500 }, { "epoch": 0.5128022388708303, "grad_norm": 1.2544692862011395, "learning_rate": 5.035784538850916e-07, "loss": 0.2247, "step": 29501 }, { "epoch": 0.5128196214083331, "grad_norm": 1.3612153514569156, "learning_rate": 5.035503052636154e-07, "loss": 0.2171, "step": 29502 }, { "epoch": 0.512837003945836, "grad_norm": 1.4912577463006742, "learning_rate": 5.035221566308866e-07, "loss": 0.3467, "step": 29503 }, { "epoch": 0.5128543864833388, "grad_norm": 1.40772751499544, "learning_rate": 5.034940079869938e-07, "loss": 0.1684, "step": 29504 }, { "epoch": 0.5128717690208416, "grad_norm": 1.8326955817936292, "learning_rate": 5.034658593320268e-07, "loss": 0.2519, "step": 29505 }, { "epoch": 0.5128891515583445, "grad_norm": 2.5052600541142445, "learning_rate": 5.034377106660746e-07, "loss": 0.2124, "step": 29506 }, { "epoch": 0.5129065340958473, "grad_norm": 2.777341934597995, "learning_rate": 5.034095619892265e-07, "loss": 0.2501, "step": 29507 }, { "epoch": 0.5129239166333501, "grad_norm": 2.29955607944648, "learning_rate": 5.033814133015715e-07, "loss": 0.2675, "step": 29508 }, { "epoch": 0.512941299170853, "grad_norm": 1.8344231149705446, "learning_rate": 5.033532646031992e-07, "loss": 0.1779, "step": 29509 }, { "epoch": 0.5129586817083558, "grad_norm": 1.1244261829510025, "learning_rate": 5.033251158941983e-07, "loss": 0.1634, "step": 29510 }, { "epoch": 0.5129760642458586, "grad_norm": 1.4285989767101446, "learning_rate": 5.032969671746585e-07, "loss": 0.2357, "step": 29511 }, { "epoch": 0.5129934467833615, "grad_norm": 3.4475304039998655, "learning_rate": 5.032688184446688e-07, "loss": 0.3098, "step": 29512 }, { "epoch": 0.5130108293208643, "grad_norm": 1.1665855854649492, "learning_rate": 5.032406697043184e-07, "loss": 0.2547, "step": 29513 }, { "epoch": 0.5130282118583671, "grad_norm": 1.9780102323444129, "learning_rate": 5.032125209536966e-07, "loss": 0.2728, "step": 29514 }, { "epoch": 0.5130455943958699, "grad_norm": 1.8425756239207671, "learning_rate": 5.031843721928927e-07, "loss": 0.2466, "step": 29515 }, { "epoch": 0.5130629769333728, "grad_norm": 3.9930640638123402, "learning_rate": 5.031562234219955e-07, "loss": 0.3464, "step": 29516 }, { "epoch": 0.5130803594708756, "grad_norm": 1.724620208472128, "learning_rate": 5.031280746410948e-07, "loss": 0.3977, "step": 29517 }, { "epoch": 0.5130977420083784, "grad_norm": 1.5584258662366628, "learning_rate": 5.030999258502795e-07, "loss": 0.334, "step": 29518 }, { "epoch": 0.5131151245458813, "grad_norm": 1.4423353803295909, "learning_rate": 5.030717770496388e-07, "loss": 0.344, "step": 29519 }, { "epoch": 0.513132507083384, "grad_norm": 1.5533127807133615, "learning_rate": 5.03043628239262e-07, "loss": 0.2303, "step": 29520 }, { "epoch": 0.5131498896208868, "grad_norm": 1.6296439438071835, "learning_rate": 5.030154794192381e-07, "loss": 0.1655, "step": 29521 }, { "epoch": 0.5131672721583896, "grad_norm": 3.503826208009362, "learning_rate": 5.029873305896569e-07, "loss": 0.2778, "step": 29522 }, { "epoch": 0.5131846546958925, "grad_norm": 2.1061838267893704, "learning_rate": 5.029591817506069e-07, "loss": 0.1605, "step": 29523 }, { "epoch": 0.5132020372333953, "grad_norm": 2.3489365968114337, "learning_rate": 5.029310329021779e-07, "loss": 0.1811, "step": 29524 }, { "epoch": 0.5132194197708981, "grad_norm": 2.176765769187167, "learning_rate": 5.029028840444588e-07, "loss": 0.3272, "step": 29525 }, { "epoch": 0.513236802308401, "grad_norm": 1.8086789182105951, "learning_rate": 5.028747351775389e-07, "loss": 0.3711, "step": 29526 }, { "epoch": 0.5132541848459038, "grad_norm": 1.732510490641179, "learning_rate": 5.028465863015072e-07, "loss": 0.2083, "step": 29527 }, { "epoch": 0.5132715673834066, "grad_norm": 0.8351496483581969, "learning_rate": 5.028184374164534e-07, "loss": 0.4863, "step": 29528 }, { "epoch": 0.5132889499209095, "grad_norm": 2.119418135789267, "learning_rate": 5.027902885224663e-07, "loss": 0.3445, "step": 29529 }, { "epoch": 0.5133063324584123, "grad_norm": 2.5534585573223643, "learning_rate": 5.027621396196353e-07, "loss": 0.2144, "step": 29530 }, { "epoch": 0.5133237149959151, "grad_norm": 1.1820428406310177, "learning_rate": 5.027339907080495e-07, "loss": 0.3397, "step": 29531 }, { "epoch": 0.513341097533418, "grad_norm": 2.0712791422125405, "learning_rate": 5.027058417877984e-07, "loss": 0.2537, "step": 29532 }, { "epoch": 0.5133584800709208, "grad_norm": 1.6525111550076024, "learning_rate": 5.026776928589709e-07, "loss": 0.2121, "step": 29533 }, { "epoch": 0.5133758626084236, "grad_norm": 0.9241102141233644, "learning_rate": 5.026495439216563e-07, "loss": 0.1734, "step": 29534 }, { "epoch": 0.5133932451459264, "grad_norm": 1.7445082007076467, "learning_rate": 5.02621394975944e-07, "loss": 0.2202, "step": 29535 }, { "epoch": 0.5134106276834293, "grad_norm": 1.7515936499897893, "learning_rate": 5.02593246021923e-07, "loss": 0.2619, "step": 29536 }, { "epoch": 0.5134280102209321, "grad_norm": 2.1615521749929694, "learning_rate": 5.025650970596826e-07, "loss": 0.3212, "step": 29537 }, { "epoch": 0.5134453927584349, "grad_norm": 1.247481590225461, "learning_rate": 5.025369480893122e-07, "loss": 0.2382, "step": 29538 }, { "epoch": 0.5134627752959376, "grad_norm": 2.480821801212661, "learning_rate": 5.025087991109005e-07, "loss": 0.2873, "step": 29539 }, { "epoch": 0.5134801578334405, "grad_norm": 1.692820273582608, "learning_rate": 5.024806501245372e-07, "loss": 0.4939, "step": 29540 }, { "epoch": 0.5134975403709433, "grad_norm": 1.1616063727229362, "learning_rate": 5.024525011303115e-07, "loss": 0.3392, "step": 29541 }, { "epoch": 0.5135149229084461, "grad_norm": 1.425607888130535, "learning_rate": 5.024243521283123e-07, "loss": 0.1792, "step": 29542 }, { "epoch": 0.513532305445949, "grad_norm": 1.6405263433526658, "learning_rate": 5.023962031186292e-07, "loss": 0.3262, "step": 29543 }, { "epoch": 0.5135496879834518, "grad_norm": 1.101185572186013, "learning_rate": 5.023680541013511e-07, "loss": 0.4724, "step": 29544 }, { "epoch": 0.5135670705209546, "grad_norm": 1.079583495519754, "learning_rate": 5.023399050765674e-07, "loss": 0.2556, "step": 29545 }, { "epoch": 0.5135844530584575, "grad_norm": 1.5545231774054054, "learning_rate": 5.023117560443674e-07, "loss": 0.3158, "step": 29546 }, { "epoch": 0.5136018355959603, "grad_norm": 0.9871016500098933, "learning_rate": 5.0228360700484e-07, "loss": 0.1996, "step": 29547 }, { "epoch": 0.5136192181334631, "grad_norm": 1.4601568351339327, "learning_rate": 5.022554579580747e-07, "loss": 0.3519, "step": 29548 }, { "epoch": 0.513636600670966, "grad_norm": 1.0158268388986795, "learning_rate": 5.022273089041607e-07, "loss": 0.1495, "step": 29549 }, { "epoch": 0.5136539832084688, "grad_norm": 2.730427324153641, "learning_rate": 5.02199159843187e-07, "loss": 0.2744, "step": 29550 }, { "epoch": 0.5136713657459716, "grad_norm": 1.8220127959657122, "learning_rate": 5.021710107752431e-07, "loss": 0.2234, "step": 29551 }, { "epoch": 0.5136887482834744, "grad_norm": 2.194737813603383, "learning_rate": 5.021428617004181e-07, "loss": 0.3398, "step": 29552 }, { "epoch": 0.5137061308209773, "grad_norm": 1.7053509685827195, "learning_rate": 5.021147126188011e-07, "loss": 0.2732, "step": 29553 }, { "epoch": 0.5137235133584801, "grad_norm": 1.357189367241192, "learning_rate": 5.020865635304815e-07, "loss": 0.3128, "step": 29554 }, { "epoch": 0.5137408958959829, "grad_norm": 2.3520628935409316, "learning_rate": 5.020584144355485e-07, "loss": 0.5846, "step": 29555 }, { "epoch": 0.5137582784334858, "grad_norm": 1.2615369570348896, "learning_rate": 5.020302653340912e-07, "loss": 0.2913, "step": 29556 }, { "epoch": 0.5137756609709886, "grad_norm": 1.9280911840254324, "learning_rate": 5.020021162261991e-07, "loss": 0.2419, "step": 29557 }, { "epoch": 0.5137930435084914, "grad_norm": 1.3885301953079447, "learning_rate": 5.019739671119608e-07, "loss": 0.1893, "step": 29558 }, { "epoch": 0.5138104260459941, "grad_norm": 2.7114913224233934, "learning_rate": 5.019458179914663e-07, "loss": 0.3568, "step": 29559 }, { "epoch": 0.513827808583497, "grad_norm": 1.090472526214375, "learning_rate": 5.019176688648043e-07, "loss": 0.2226, "step": 29560 }, { "epoch": 0.5138451911209998, "grad_norm": 1.4395699082443452, "learning_rate": 5.018895197320642e-07, "loss": 0.2903, "step": 29561 }, { "epoch": 0.5138625736585026, "grad_norm": 2.0757712505931587, "learning_rate": 5.018613705933351e-07, "loss": 0.2479, "step": 29562 }, { "epoch": 0.5138799561960055, "grad_norm": 1.3752675381525414, "learning_rate": 5.018332214487065e-07, "loss": 0.2563, "step": 29563 }, { "epoch": 0.5138973387335083, "grad_norm": 2.364366199914022, "learning_rate": 5.018050722982672e-07, "loss": 0.2764, "step": 29564 }, { "epoch": 0.5139147212710111, "grad_norm": 3.4400506661246437, "learning_rate": 5.017769231421069e-07, "loss": 0.2262, "step": 29565 }, { "epoch": 0.513932103808514, "grad_norm": 0.9913370268402767, "learning_rate": 5.017487739803143e-07, "loss": 0.1495, "step": 29566 }, { "epoch": 0.5139494863460168, "grad_norm": 1.827423792707896, "learning_rate": 5.01720624812979e-07, "loss": 0.2481, "step": 29567 }, { "epoch": 0.5139668688835196, "grad_norm": 1.7060795246608922, "learning_rate": 5.016924756401902e-07, "loss": 0.3439, "step": 29568 }, { "epoch": 0.5139842514210224, "grad_norm": 1.0039404986085798, "learning_rate": 5.01664326462037e-07, "loss": 0.197, "step": 29569 }, { "epoch": 0.5140016339585253, "grad_norm": 2.040417164870012, "learning_rate": 5.016361772786086e-07, "loss": 0.2259, "step": 29570 }, { "epoch": 0.5140190164960281, "grad_norm": 1.5239165885525703, "learning_rate": 5.016080280899943e-07, "loss": 0.5121, "step": 29571 }, { "epoch": 0.5140363990335309, "grad_norm": 2.9640481279730464, "learning_rate": 5.015798788962832e-07, "loss": 0.2873, "step": 29572 }, { "epoch": 0.5140537815710338, "grad_norm": 1.575576017968315, "learning_rate": 5.015517296975647e-07, "loss": 0.1847, "step": 29573 }, { "epoch": 0.5140711641085366, "grad_norm": 1.1511534922858646, "learning_rate": 5.015235804939279e-07, "loss": 0.1733, "step": 29574 }, { "epoch": 0.5140885466460394, "grad_norm": 1.1620754412009995, "learning_rate": 5.014954312854621e-07, "loss": 0.2337, "step": 29575 }, { "epoch": 0.5141059291835423, "grad_norm": 1.4278497778855856, "learning_rate": 5.014672820722565e-07, "loss": 0.3009, "step": 29576 }, { "epoch": 0.5141233117210451, "grad_norm": 1.5060393324248298, "learning_rate": 5.014391328544001e-07, "loss": 0.2235, "step": 29577 }, { "epoch": 0.5141406942585479, "grad_norm": 1.8559385372876003, "learning_rate": 5.014109836319824e-07, "loss": 0.3151, "step": 29578 }, { "epoch": 0.5141580767960506, "grad_norm": 1.719931110593856, "learning_rate": 5.013828344050925e-07, "loss": 0.2686, "step": 29579 }, { "epoch": 0.5141754593335535, "grad_norm": 1.6027551280150385, "learning_rate": 5.013546851738199e-07, "loss": 0.3121, "step": 29580 }, { "epoch": 0.5141928418710563, "grad_norm": 1.6956741598243228, "learning_rate": 5.013265359382533e-07, "loss": 0.1815, "step": 29581 }, { "epoch": 0.5142102244085591, "grad_norm": 1.5612896760400732, "learning_rate": 5.012983866984824e-07, "loss": 0.2039, "step": 29582 }, { "epoch": 0.514227606946062, "grad_norm": 2.874337168621168, "learning_rate": 5.012702374545959e-07, "loss": 0.3548, "step": 29583 }, { "epoch": 0.5142449894835648, "grad_norm": 1.0805762328716464, "learning_rate": 5.012420882066835e-07, "loss": 0.1397, "step": 29584 }, { "epoch": 0.5142623720210676, "grad_norm": 1.1372976424383046, "learning_rate": 5.012139389548343e-07, "loss": 0.1514, "step": 29585 }, { "epoch": 0.5142797545585704, "grad_norm": 2.3956066303390458, "learning_rate": 5.011857896991375e-07, "loss": 0.1926, "step": 29586 }, { "epoch": 0.5142971370960733, "grad_norm": 1.238528368638967, "learning_rate": 5.011576404396821e-07, "loss": 0.2622, "step": 29587 }, { "epoch": 0.5143145196335761, "grad_norm": 2.3982227119094817, "learning_rate": 5.011294911765578e-07, "loss": 0.2003, "step": 29588 }, { "epoch": 0.5143319021710789, "grad_norm": 1.5046189334327638, "learning_rate": 5.011013419098533e-07, "loss": 0.2386, "step": 29589 }, { "epoch": 0.5143492847085818, "grad_norm": 1.5331360742554172, "learning_rate": 5.010731926396581e-07, "loss": 0.2121, "step": 29590 }, { "epoch": 0.5143666672460846, "grad_norm": 1.3929899361488827, "learning_rate": 5.010450433660614e-07, "loss": 0.1325, "step": 29591 }, { "epoch": 0.5143840497835874, "grad_norm": 2.250790798193136, "learning_rate": 5.010168940891525e-07, "loss": 0.3061, "step": 29592 }, { "epoch": 0.5144014323210903, "grad_norm": 1.9611381835379031, "learning_rate": 5.009887448090203e-07, "loss": 0.2285, "step": 29593 }, { "epoch": 0.5144188148585931, "grad_norm": 2.7916758689331673, "learning_rate": 5.009605955257544e-07, "loss": 0.5582, "step": 29594 }, { "epoch": 0.5144361973960959, "grad_norm": 1.5701140971049354, "learning_rate": 5.009324462394437e-07, "loss": 0.1606, "step": 29595 }, { "epoch": 0.5144535799335987, "grad_norm": 0.8313817318566107, "learning_rate": 5.009042969501777e-07, "loss": 0.2508, "step": 29596 }, { "epoch": 0.5144709624711016, "grad_norm": 1.8662874311786437, "learning_rate": 5.008761476580454e-07, "loss": 0.2595, "step": 29597 }, { "epoch": 0.5144883450086044, "grad_norm": 1.3714111313094837, "learning_rate": 5.008479983631363e-07, "loss": 0.2635, "step": 29598 }, { "epoch": 0.5145057275461071, "grad_norm": 1.8686691817872985, "learning_rate": 5.008198490655392e-07, "loss": 0.221, "step": 29599 }, { "epoch": 0.51452311008361, "grad_norm": 1.7021862242238313, "learning_rate": 5.007916997653436e-07, "loss": 0.3011, "step": 29600 }, { "epoch": 0.5145404926211128, "grad_norm": 1.4834017272007685, "learning_rate": 5.007635504626389e-07, "loss": 0.248, "step": 29601 }, { "epoch": 0.5145578751586156, "grad_norm": 1.7575883486270425, "learning_rate": 5.007354011575139e-07, "loss": 0.3161, "step": 29602 }, { "epoch": 0.5145752576961184, "grad_norm": 2.0977692485538757, "learning_rate": 5.007072518500579e-07, "loss": 0.3606, "step": 29603 }, { "epoch": 0.5145926402336213, "grad_norm": 1.2272791424419405, "learning_rate": 5.006791025403604e-07, "loss": 0.1584, "step": 29604 }, { "epoch": 0.5146100227711241, "grad_norm": 1.9202407826374681, "learning_rate": 5.006509532285105e-07, "loss": 0.3406, "step": 29605 }, { "epoch": 0.5146274053086269, "grad_norm": 1.6647867646012098, "learning_rate": 5.006228039145972e-07, "loss": 0.3089, "step": 29606 }, { "epoch": 0.5146447878461298, "grad_norm": 1.3133896622336336, "learning_rate": 5.005946545987102e-07, "loss": 0.225, "step": 29607 }, { "epoch": 0.5146621703836326, "grad_norm": 2.1458360443491533, "learning_rate": 5.005665052809381e-07, "loss": 0.2712, "step": 29608 }, { "epoch": 0.5146795529211354, "grad_norm": 5.292066001627045, "learning_rate": 5.005383559613707e-07, "loss": 0.4016, "step": 29609 }, { "epoch": 0.5146969354586383, "grad_norm": 2.148098360397886, "learning_rate": 5.005102066400968e-07, "loss": 0.2959, "step": 29610 }, { "epoch": 0.5147143179961411, "grad_norm": 1.990668359593652, "learning_rate": 5.004820573172058e-07, "loss": 0.2495, "step": 29611 }, { "epoch": 0.5147317005336439, "grad_norm": 1.5678214758637516, "learning_rate": 5.004539079927869e-07, "loss": 0.2045, "step": 29612 }, { "epoch": 0.5147490830711468, "grad_norm": 0.969422831661011, "learning_rate": 5.004257586669292e-07, "loss": 0.1568, "step": 29613 }, { "epoch": 0.5147664656086496, "grad_norm": 1.7305180451059894, "learning_rate": 5.003976093397223e-07, "loss": 0.2207, "step": 29614 }, { "epoch": 0.5147838481461524, "grad_norm": 1.2521125890068359, "learning_rate": 5.00369460011255e-07, "loss": 0.2481, "step": 29615 }, { "epoch": 0.5148012306836552, "grad_norm": 3.3919985993498996, "learning_rate": 5.003413106816167e-07, "loss": 0.3764, "step": 29616 }, { "epoch": 0.5148186132211581, "grad_norm": 2.054344261220042, "learning_rate": 5.003131613508966e-07, "loss": 0.2349, "step": 29617 }, { "epoch": 0.5148359957586609, "grad_norm": 1.8420104166632398, "learning_rate": 5.00285012019184e-07, "loss": 0.2223, "step": 29618 }, { "epoch": 0.5148533782961636, "grad_norm": 1.3325677678762051, "learning_rate": 5.00256862686568e-07, "loss": 0.1984, "step": 29619 }, { "epoch": 0.5148707608336665, "grad_norm": 2.0659259181184733, "learning_rate": 5.002287133531377e-07, "loss": 0.1653, "step": 29620 }, { "epoch": 0.5148881433711693, "grad_norm": 1.003526177640329, "learning_rate": 5.002005640189827e-07, "loss": 0.3106, "step": 29621 }, { "epoch": 0.5149055259086721, "grad_norm": 1.3789869530117662, "learning_rate": 5.001724146841919e-07, "loss": 0.223, "step": 29622 }, { "epoch": 0.5149229084461749, "grad_norm": 1.004192063343062, "learning_rate": 5.001442653488548e-07, "loss": 0.2588, "step": 29623 }, { "epoch": 0.5149402909836778, "grad_norm": 2.747989237825969, "learning_rate": 5.001161160130602e-07, "loss": 0.3249, "step": 29624 }, { "epoch": 0.5149576735211806, "grad_norm": 2.241431188181605, "learning_rate": 5.000879666768977e-07, "loss": 0.2428, "step": 29625 }, { "epoch": 0.5149750560586834, "grad_norm": 2.0685601684916417, "learning_rate": 5.000598173404563e-07, "loss": 0.1854, "step": 29626 }, { "epoch": 0.5149924385961863, "grad_norm": 1.4401155878252319, "learning_rate": 5.000316680038255e-07, "loss": 0.2961, "step": 29627 }, { "epoch": 0.5150098211336891, "grad_norm": 1.8179315864455476, "learning_rate": 5.00003518667094e-07, "loss": 0.33, "step": 29628 }, { "epoch": 0.5150272036711919, "grad_norm": 1.875522632434629, "learning_rate": 4.999753693303515e-07, "loss": 0.2661, "step": 29629 }, { "epoch": 0.5150445862086948, "grad_norm": 1.3551651759512613, "learning_rate": 4.999472199936871e-07, "loss": 0.2669, "step": 29630 }, { "epoch": 0.5150619687461976, "grad_norm": 1.4274530299244603, "learning_rate": 4.999190706571901e-07, "loss": 0.1902, "step": 29631 }, { "epoch": 0.5150793512837004, "grad_norm": 1.3760268271310532, "learning_rate": 4.998909213209494e-07, "loss": 0.3086, "step": 29632 }, { "epoch": 0.5150967338212032, "grad_norm": 1.6448043532584777, "learning_rate": 4.998627719850546e-07, "loss": 0.2631, "step": 29633 }, { "epoch": 0.5151141163587061, "grad_norm": 1.6101514428989585, "learning_rate": 4.998346226495947e-07, "loss": 0.3425, "step": 29634 }, { "epoch": 0.5151314988962089, "grad_norm": 1.0876277395354625, "learning_rate": 4.998064733146589e-07, "loss": 0.267, "step": 29635 }, { "epoch": 0.5151488814337117, "grad_norm": 1.6011098562571489, "learning_rate": 4.997783239803366e-07, "loss": 0.2876, "step": 29636 }, { "epoch": 0.5151662639712146, "grad_norm": 1.3171197225828226, "learning_rate": 4.997501746467168e-07, "loss": 0.3994, "step": 29637 }, { "epoch": 0.5151836465087174, "grad_norm": 2.3398876113308384, "learning_rate": 4.997220253138888e-07, "loss": 0.3682, "step": 29638 }, { "epoch": 0.5152010290462201, "grad_norm": 2.6373187455692957, "learning_rate": 4.99693875981942e-07, "loss": 0.2195, "step": 29639 }, { "epoch": 0.515218411583723, "grad_norm": 1.49576696496083, "learning_rate": 4.996657266509655e-07, "loss": 0.2964, "step": 29640 }, { "epoch": 0.5152357941212258, "grad_norm": 1.3021326757865184, "learning_rate": 4.996375773210483e-07, "loss": 0.2279, "step": 29641 }, { "epoch": 0.5152531766587286, "grad_norm": 1.809136860845113, "learning_rate": 4.9960942799228e-07, "loss": 0.1991, "step": 29642 }, { "epoch": 0.5152705591962314, "grad_norm": 1.2084889691334146, "learning_rate": 4.995812786647495e-07, "loss": 0.2319, "step": 29643 }, { "epoch": 0.5152879417337343, "grad_norm": 1.4843182460937205, "learning_rate": 4.995531293385461e-07, "loss": 0.1712, "step": 29644 }, { "epoch": 0.5153053242712371, "grad_norm": 1.182679499934844, "learning_rate": 4.995249800137591e-07, "loss": 0.239, "step": 29645 }, { "epoch": 0.5153227068087399, "grad_norm": 2.44761439166538, "learning_rate": 4.994968306904778e-07, "loss": 0.322, "step": 29646 }, { "epoch": 0.5153400893462428, "grad_norm": 1.9188032625297538, "learning_rate": 4.994686813687914e-07, "loss": 0.2248, "step": 29647 }, { "epoch": 0.5153574718837456, "grad_norm": 1.5101373467453725, "learning_rate": 4.99440532048789e-07, "loss": 0.2699, "step": 29648 }, { "epoch": 0.5153748544212484, "grad_norm": 4.792555427141873, "learning_rate": 4.994123827305597e-07, "loss": 0.3944, "step": 29649 }, { "epoch": 0.5153922369587512, "grad_norm": 1.0416380065320971, "learning_rate": 4.99384233414193e-07, "loss": 0.2641, "step": 29650 }, { "epoch": 0.5154096194962541, "grad_norm": 2.0282370869669974, "learning_rate": 4.993560840997778e-07, "loss": 0.1834, "step": 29651 }, { "epoch": 0.5154270020337569, "grad_norm": 2.05983815672163, "learning_rate": 4.993279347874037e-07, "loss": 0.2834, "step": 29652 }, { "epoch": 0.5154443845712597, "grad_norm": 2.7668864792528884, "learning_rate": 4.992997854771598e-07, "loss": 0.2672, "step": 29653 }, { "epoch": 0.5154617671087626, "grad_norm": 1.4703442857564921, "learning_rate": 4.992716361691351e-07, "loss": 0.3538, "step": 29654 }, { "epoch": 0.5154791496462654, "grad_norm": 2.499219309877319, "learning_rate": 4.992434868634191e-07, "loss": 0.1775, "step": 29655 }, { "epoch": 0.5154965321837682, "grad_norm": 2.013130093855796, "learning_rate": 4.992153375601008e-07, "loss": 0.4377, "step": 29656 }, { "epoch": 0.5155139147212711, "grad_norm": 1.3527087252954573, "learning_rate": 4.991871882592695e-07, "loss": 0.2088, "step": 29657 }, { "epoch": 0.5155312972587739, "grad_norm": 1.384511927774183, "learning_rate": 4.991590389610146e-07, "loss": 0.1648, "step": 29658 }, { "epoch": 0.5155486797962766, "grad_norm": 2.4611686384465727, "learning_rate": 4.991308896654252e-07, "loss": 0.3027, "step": 29659 }, { "epoch": 0.5155660623337794, "grad_norm": 2.302532463778169, "learning_rate": 4.991027403725902e-07, "loss": 0.2932, "step": 29660 }, { "epoch": 0.5155834448712823, "grad_norm": 1.3620363882451911, "learning_rate": 4.990745910825992e-07, "loss": 0.3653, "step": 29661 }, { "epoch": 0.5156008274087851, "grad_norm": 1.7241501224429396, "learning_rate": 4.990464417955413e-07, "loss": 0.1957, "step": 29662 }, { "epoch": 0.5156182099462879, "grad_norm": 1.641530328710984, "learning_rate": 4.990182925115058e-07, "loss": 0.2385, "step": 29663 }, { "epoch": 0.5156355924837908, "grad_norm": 2.12248157589681, "learning_rate": 4.98990143230582e-07, "loss": 0.4165, "step": 29664 }, { "epoch": 0.5156529750212936, "grad_norm": 1.3279911906854167, "learning_rate": 4.989619939528587e-07, "loss": 0.1872, "step": 29665 }, { "epoch": 0.5156703575587964, "grad_norm": 1.3093878923178548, "learning_rate": 4.989338446784257e-07, "loss": 0.1678, "step": 29666 }, { "epoch": 0.5156877400962993, "grad_norm": 1.1969693214767183, "learning_rate": 4.989056954073718e-07, "loss": 0.2446, "step": 29667 }, { "epoch": 0.5157051226338021, "grad_norm": 1.6673610473688893, "learning_rate": 4.988775461397862e-07, "loss": 0.1976, "step": 29668 }, { "epoch": 0.5157225051713049, "grad_norm": 1.09785378607298, "learning_rate": 4.988493968757584e-07, "loss": 0.1405, "step": 29669 }, { "epoch": 0.5157398877088077, "grad_norm": 1.9002965376096208, "learning_rate": 4.988212476153774e-07, "loss": 0.1893, "step": 29670 }, { "epoch": 0.5157572702463106, "grad_norm": 1.264056947319329, "learning_rate": 4.987930983587326e-07, "loss": 0.2479, "step": 29671 }, { "epoch": 0.5157746527838134, "grad_norm": 1.3698707269234982, "learning_rate": 4.987649491059132e-07, "loss": 0.3692, "step": 29672 }, { "epoch": 0.5157920353213162, "grad_norm": 2.004534787501563, "learning_rate": 4.987367998570083e-07, "loss": 0.2941, "step": 29673 }, { "epoch": 0.5158094178588191, "grad_norm": 1.5715212912552896, "learning_rate": 4.98708650612107e-07, "loss": 0.2795, "step": 29674 }, { "epoch": 0.5158268003963219, "grad_norm": 1.306571839228691, "learning_rate": 4.986805013712988e-07, "loss": 0.2204, "step": 29675 }, { "epoch": 0.5158441829338247, "grad_norm": 1.4072636358332702, "learning_rate": 4.986523521346727e-07, "loss": 0.1675, "step": 29676 }, { "epoch": 0.5158615654713276, "grad_norm": 2.0080460673879563, "learning_rate": 4.986242029023181e-07, "loss": 0.2387, "step": 29677 }, { "epoch": 0.5158789480088303, "grad_norm": 2.1777747603563244, "learning_rate": 4.985960536743244e-07, "loss": 0.3091, "step": 29678 }, { "epoch": 0.5158963305463331, "grad_norm": 1.6246599426264747, "learning_rate": 4.985679044507803e-07, "loss": 0.1498, "step": 29679 }, { "epoch": 0.5159137130838359, "grad_norm": 2.2722171696882794, "learning_rate": 4.985397552317752e-07, "loss": 0.3855, "step": 29680 }, { "epoch": 0.5159310956213388, "grad_norm": 2.255785527877391, "learning_rate": 4.985116060173985e-07, "loss": 0.2732, "step": 29681 }, { "epoch": 0.5159484781588416, "grad_norm": 1.343206396248261, "learning_rate": 4.984834568077393e-07, "loss": 0.223, "step": 29682 }, { "epoch": 0.5159658606963444, "grad_norm": 1.7997980764825008, "learning_rate": 4.984553076028869e-07, "loss": 0.1974, "step": 29683 }, { "epoch": 0.5159832432338473, "grad_norm": 1.2873049575846414, "learning_rate": 4.984271584029304e-07, "loss": 0.2258, "step": 29684 }, { "epoch": 0.5160006257713501, "grad_norm": 1.1483740870510695, "learning_rate": 4.983990092079592e-07, "loss": 0.3703, "step": 29685 }, { "epoch": 0.5160180083088529, "grad_norm": 1.222982666720199, "learning_rate": 4.983708600180624e-07, "loss": 0.2793, "step": 29686 }, { "epoch": 0.5160353908463557, "grad_norm": 1.6649966834784424, "learning_rate": 4.983427108333292e-07, "loss": 0.2555, "step": 29687 }, { "epoch": 0.5160527733838586, "grad_norm": 1.7112093703101547, "learning_rate": 4.983145616538487e-07, "loss": 0.2567, "step": 29688 }, { "epoch": 0.5160701559213614, "grad_norm": 1.193241318288714, "learning_rate": 4.982864124797103e-07, "loss": 0.3302, "step": 29689 }, { "epoch": 0.5160875384588642, "grad_norm": 2.2006129163314254, "learning_rate": 4.982582633110032e-07, "loss": 0.2605, "step": 29690 }, { "epoch": 0.5161049209963671, "grad_norm": 1.6954080127090956, "learning_rate": 4.982301141478168e-07, "loss": 0.229, "step": 29691 }, { "epoch": 0.5161223035338699, "grad_norm": 1.9689080941433859, "learning_rate": 4.9820196499024e-07, "loss": 0.4717, "step": 29692 }, { "epoch": 0.5161396860713727, "grad_norm": 1.768545412309525, "learning_rate": 4.981738158383621e-07, "loss": 0.2457, "step": 29693 }, { "epoch": 0.5161570686088756, "grad_norm": 1.4623603690635771, "learning_rate": 4.981456666922724e-07, "loss": 0.2445, "step": 29694 }, { "epoch": 0.5161744511463784, "grad_norm": 1.4110769668737482, "learning_rate": 4.981175175520601e-07, "loss": 0.2529, "step": 29695 }, { "epoch": 0.5161918336838812, "grad_norm": 1.3086105728106159, "learning_rate": 4.980893684178144e-07, "loss": 0.2411, "step": 29696 }, { "epoch": 0.516209216221384, "grad_norm": 2.3504381253345352, "learning_rate": 4.980612192896246e-07, "loss": 0.2762, "step": 29697 }, { "epoch": 0.5162265987588868, "grad_norm": 3.657522958897346, "learning_rate": 4.980330701675798e-07, "loss": 0.2734, "step": 29698 }, { "epoch": 0.5162439812963896, "grad_norm": 1.267416373740959, "learning_rate": 4.980049210517691e-07, "loss": 0.2391, "step": 29699 }, { "epoch": 0.5162613638338924, "grad_norm": 1.2785292576936704, "learning_rate": 4.979767719422821e-07, "loss": 0.1805, "step": 29700 }, { "epoch": 0.5162787463713953, "grad_norm": 2.239233573372564, "learning_rate": 4.979486228392075e-07, "loss": 0.3736, "step": 29701 }, { "epoch": 0.5162961289088981, "grad_norm": 1.92697000436097, "learning_rate": 4.979204737426352e-07, "loss": 0.2972, "step": 29702 }, { "epoch": 0.5163135114464009, "grad_norm": 1.760495336300468, "learning_rate": 4.978923246526537e-07, "loss": 0.3033, "step": 29703 }, { "epoch": 0.5163308939839037, "grad_norm": 1.2997818439698448, "learning_rate": 4.97864175569353e-07, "loss": 0.233, "step": 29704 }, { "epoch": 0.5163482765214066, "grad_norm": 1.5792061273123712, "learning_rate": 4.978360264928215e-07, "loss": 0.3178, "step": 29705 }, { "epoch": 0.5163656590589094, "grad_norm": 1.810334774636297, "learning_rate": 4.97807877423149e-07, "loss": 0.3004, "step": 29706 }, { "epoch": 0.5163830415964122, "grad_norm": 1.954244113428707, "learning_rate": 4.977797283604243e-07, "loss": 0.2964, "step": 29707 }, { "epoch": 0.5164004241339151, "grad_norm": 1.3698266631344014, "learning_rate": 4.977515793047371e-07, "loss": 0.2657, "step": 29708 }, { "epoch": 0.5164178066714179, "grad_norm": 3.07223378884589, "learning_rate": 4.977234302561761e-07, "loss": 0.2595, "step": 29709 }, { "epoch": 0.5164351892089207, "grad_norm": 1.153032163325965, "learning_rate": 4.976952812148311e-07, "loss": 0.2563, "step": 29710 }, { "epoch": 0.5164525717464236, "grad_norm": 1.9840796462625643, "learning_rate": 4.976671321807908e-07, "loss": 0.3692, "step": 29711 }, { "epoch": 0.5164699542839264, "grad_norm": 1.4127302650632338, "learning_rate": 4.976389831541447e-07, "loss": 0.2746, "step": 29712 }, { "epoch": 0.5164873368214292, "grad_norm": 0.8963091410761286, "learning_rate": 4.976108341349818e-07, "loss": 0.1672, "step": 29713 }, { "epoch": 0.516504719358932, "grad_norm": 1.3410389464614347, "learning_rate": 4.975826851233916e-07, "loss": 0.3615, "step": 29714 }, { "epoch": 0.5165221018964349, "grad_norm": 1.130205481117603, "learning_rate": 4.975545361194629e-07, "loss": 0.1614, "step": 29715 }, { "epoch": 0.5165394844339377, "grad_norm": 1.4139502390290353, "learning_rate": 4.975263871232857e-07, "loss": 0.2311, "step": 29716 }, { "epoch": 0.5165568669714405, "grad_norm": 2.524349204540286, "learning_rate": 4.974982381349484e-07, "loss": 0.3828, "step": 29717 }, { "epoch": 0.5165742495089433, "grad_norm": 1.5372938695634475, "learning_rate": 4.974700891545404e-07, "loss": 0.1784, "step": 29718 }, { "epoch": 0.5165916320464461, "grad_norm": 1.2552994844674032, "learning_rate": 4.974419401821513e-07, "loss": 0.2896, "step": 29719 }, { "epoch": 0.5166090145839489, "grad_norm": 0.984175577488161, "learning_rate": 4.9741379121787e-07, "loss": 0.2931, "step": 29720 }, { "epoch": 0.5166263971214518, "grad_norm": 1.7792746430620616, "learning_rate": 4.973856422617858e-07, "loss": 0.2208, "step": 29721 }, { "epoch": 0.5166437796589546, "grad_norm": 1.0702645030977136, "learning_rate": 4.973574933139879e-07, "loss": 0.4386, "step": 29722 }, { "epoch": 0.5166611621964574, "grad_norm": 1.1249958152693127, "learning_rate": 4.973293443745656e-07, "loss": 0.2438, "step": 29723 }, { "epoch": 0.5166785447339602, "grad_norm": 1.4367602599157658, "learning_rate": 4.97301195443608e-07, "loss": 0.2316, "step": 29724 }, { "epoch": 0.5166959272714631, "grad_norm": 1.4864531650884147, "learning_rate": 4.972730465212043e-07, "loss": 0.1836, "step": 29725 }, { "epoch": 0.5167133098089659, "grad_norm": 1.518492294398321, "learning_rate": 4.972448976074439e-07, "loss": 0.2196, "step": 29726 }, { "epoch": 0.5167306923464687, "grad_norm": 1.472504062495169, "learning_rate": 4.972167487024158e-07, "loss": 0.177, "step": 29727 }, { "epoch": 0.5167480748839716, "grad_norm": 1.134567793206668, "learning_rate": 4.971885998062094e-07, "loss": 0.1954, "step": 29728 }, { "epoch": 0.5167654574214744, "grad_norm": 2.103319271627692, "learning_rate": 4.97160450918914e-07, "loss": 0.2797, "step": 29729 }, { "epoch": 0.5167828399589772, "grad_norm": 3.0650472705234346, "learning_rate": 4.971323020406185e-07, "loss": 0.1966, "step": 29730 }, { "epoch": 0.51680022249648, "grad_norm": 1.8659950897277062, "learning_rate": 4.971041531714122e-07, "loss": 0.2977, "step": 29731 }, { "epoch": 0.5168176050339829, "grad_norm": 1.1788591799496577, "learning_rate": 4.970760043113844e-07, "loss": 0.2435, "step": 29732 }, { "epoch": 0.5168349875714857, "grad_norm": 1.3450957736057239, "learning_rate": 4.970478554606245e-07, "loss": 0.2833, "step": 29733 }, { "epoch": 0.5168523701089885, "grad_norm": 1.840066772665251, "learning_rate": 4.970197066192213e-07, "loss": 0.2459, "step": 29734 }, { "epoch": 0.5168697526464914, "grad_norm": 1.271745044871979, "learning_rate": 4.969915577872648e-07, "loss": 0.1975, "step": 29735 }, { "epoch": 0.5168871351839942, "grad_norm": 1.325587274787231, "learning_rate": 4.969634089648432e-07, "loss": 0.3758, "step": 29736 }, { "epoch": 0.516904517721497, "grad_norm": 1.6843528853513672, "learning_rate": 4.969352601520462e-07, "loss": 0.1968, "step": 29737 }, { "epoch": 0.5169219002589998, "grad_norm": 2.1930133095017847, "learning_rate": 4.969071113489631e-07, "loss": 0.2135, "step": 29738 }, { "epoch": 0.5169392827965026, "grad_norm": 4.8127073481189395, "learning_rate": 4.96878962555683e-07, "loss": 0.4572, "step": 29739 }, { "epoch": 0.5169566653340054, "grad_norm": 1.546910620698271, "learning_rate": 4.968508137722951e-07, "loss": 0.1512, "step": 29740 }, { "epoch": 0.5169740478715082, "grad_norm": 2.834597179948184, "learning_rate": 4.968226649988889e-07, "loss": 0.3144, "step": 29741 }, { "epoch": 0.5169914304090111, "grad_norm": 1.0233904466447512, "learning_rate": 4.967945162355534e-07, "loss": 0.2361, "step": 29742 }, { "epoch": 0.5170088129465139, "grad_norm": 3.6782268478680877, "learning_rate": 4.967663674823777e-07, "loss": 0.3019, "step": 29743 }, { "epoch": 0.5170261954840167, "grad_norm": 2.0123628544258403, "learning_rate": 4.96738218739451e-07, "loss": 0.285, "step": 29744 }, { "epoch": 0.5170435780215196, "grad_norm": 1.1841698470816433, "learning_rate": 4.967100700068628e-07, "loss": 0.1701, "step": 29745 }, { "epoch": 0.5170609605590224, "grad_norm": 2.2219991633753278, "learning_rate": 4.96681921284702e-07, "loss": 0.2411, "step": 29746 }, { "epoch": 0.5170783430965252, "grad_norm": 1.2622144275705656, "learning_rate": 4.966537725730581e-07, "loss": 0.2112, "step": 29747 }, { "epoch": 0.5170957256340281, "grad_norm": 1.6542396367271177, "learning_rate": 4.966256238720204e-07, "loss": 0.345, "step": 29748 }, { "epoch": 0.5171131081715309, "grad_norm": 1.15339534217273, "learning_rate": 4.965974751816776e-07, "loss": 0.1318, "step": 29749 }, { "epoch": 0.5171304907090337, "grad_norm": 1.512522375454583, "learning_rate": 4.965693265021194e-07, "loss": 0.2376, "step": 29750 }, { "epoch": 0.5171478732465365, "grad_norm": 2.330306642731134, "learning_rate": 4.965411778334348e-07, "loss": 0.2833, "step": 29751 }, { "epoch": 0.5171652557840394, "grad_norm": 1.2547260042483466, "learning_rate": 4.965130291757131e-07, "loss": 0.326, "step": 29752 }, { "epoch": 0.5171826383215422, "grad_norm": 1.1924057364142062, "learning_rate": 4.964848805290436e-07, "loss": 0.1662, "step": 29753 }, { "epoch": 0.517200020859045, "grad_norm": 5.18223777191515, "learning_rate": 4.964567318935155e-07, "loss": 0.242, "step": 29754 }, { "epoch": 0.5172174033965479, "grad_norm": 2.176883148400139, "learning_rate": 4.964285832692176e-07, "loss": 0.3009, "step": 29755 }, { "epoch": 0.5172347859340507, "grad_norm": 2.8613272032452386, "learning_rate": 4.964004346562396e-07, "loss": 0.5429, "step": 29756 }, { "epoch": 0.5172521684715535, "grad_norm": 2.1465025580055013, "learning_rate": 4.963722860546705e-07, "loss": 0.3488, "step": 29757 }, { "epoch": 0.5172695510090562, "grad_norm": 3.368362439950742, "learning_rate": 4.963441374645997e-07, "loss": 0.307, "step": 29758 }, { "epoch": 0.5172869335465591, "grad_norm": 1.130123571408656, "learning_rate": 4.963159888861162e-07, "loss": 0.1588, "step": 29759 }, { "epoch": 0.5173043160840619, "grad_norm": 1.5886784915867231, "learning_rate": 4.962878403193097e-07, "loss": 0.3153, "step": 29760 }, { "epoch": 0.5173216986215647, "grad_norm": 1.8466973846655081, "learning_rate": 4.962596917642685e-07, "loss": 0.2268, "step": 29761 }, { "epoch": 0.5173390811590676, "grad_norm": 2.0768168274371104, "learning_rate": 4.962315432210827e-07, "loss": 0.2554, "step": 29762 }, { "epoch": 0.5173564636965704, "grad_norm": 2.170730394044674, "learning_rate": 4.962033946898409e-07, "loss": 0.2609, "step": 29763 }, { "epoch": 0.5173738462340732, "grad_norm": 1.2542823358513244, "learning_rate": 4.961752461706329e-07, "loss": 0.2125, "step": 29764 }, { "epoch": 0.5173912287715761, "grad_norm": 1.232447623969714, "learning_rate": 4.961470976635474e-07, "loss": 0.27, "step": 29765 }, { "epoch": 0.5174086113090789, "grad_norm": 1.9228007303728778, "learning_rate": 4.961189491686738e-07, "loss": 0.3144, "step": 29766 }, { "epoch": 0.5174259938465817, "grad_norm": 1.2703858531786787, "learning_rate": 4.960908006861016e-07, "loss": 0.2081, "step": 29767 }, { "epoch": 0.5174433763840846, "grad_norm": 1.3351256231721955, "learning_rate": 4.960626522159197e-07, "loss": 0.2451, "step": 29768 }, { "epoch": 0.5174607589215874, "grad_norm": 1.3268464856440498, "learning_rate": 4.960345037582172e-07, "loss": 0.235, "step": 29769 }, { "epoch": 0.5174781414590902, "grad_norm": 1.6621075888629941, "learning_rate": 4.960063553130837e-07, "loss": 0.2144, "step": 29770 }, { "epoch": 0.517495523996593, "grad_norm": 2.371380389486512, "learning_rate": 4.95978206880608e-07, "loss": 0.2191, "step": 29771 }, { "epoch": 0.5175129065340959, "grad_norm": 1.7909430168897786, "learning_rate": 4.959500584608797e-07, "loss": 0.2591, "step": 29772 }, { "epoch": 0.5175302890715987, "grad_norm": 1.2730970583457655, "learning_rate": 4.959219100539879e-07, "loss": 0.3436, "step": 29773 }, { "epoch": 0.5175476716091015, "grad_norm": 1.5111568843406076, "learning_rate": 4.958937616600216e-07, "loss": 0.1825, "step": 29774 }, { "epoch": 0.5175650541466044, "grad_norm": 1.839246337125239, "learning_rate": 4.958656132790703e-07, "loss": 0.3181, "step": 29775 }, { "epoch": 0.5175824366841072, "grad_norm": 2.2659305183591547, "learning_rate": 4.958374649112232e-07, "loss": 0.2367, "step": 29776 }, { "epoch": 0.51759981922161, "grad_norm": 2.3994872374492098, "learning_rate": 4.958093165565691e-07, "loss": 0.3364, "step": 29777 }, { "epoch": 0.5176172017591127, "grad_norm": 1.9019854227769482, "learning_rate": 4.957811682151978e-07, "loss": 0.2564, "step": 29778 }, { "epoch": 0.5176345842966156, "grad_norm": 1.4151060626440208, "learning_rate": 4.957530198871983e-07, "loss": 0.2772, "step": 29779 }, { "epoch": 0.5176519668341184, "grad_norm": 1.251113864490562, "learning_rate": 4.957248715726596e-07, "loss": 0.2222, "step": 29780 }, { "epoch": 0.5176693493716212, "grad_norm": 1.3195831902625477, "learning_rate": 4.956967232716713e-07, "loss": 0.2488, "step": 29781 }, { "epoch": 0.5176867319091241, "grad_norm": 1.3545719970328345, "learning_rate": 4.956685749843222e-07, "loss": 0.2707, "step": 29782 }, { "epoch": 0.5177041144466269, "grad_norm": 1.8707121901553956, "learning_rate": 4.956404267107018e-07, "loss": 0.2564, "step": 29783 }, { "epoch": 0.5177214969841297, "grad_norm": 1.398961415563982, "learning_rate": 4.956122784508993e-07, "loss": 0.2032, "step": 29784 }, { "epoch": 0.5177388795216326, "grad_norm": 4.434410231735947, "learning_rate": 4.955841302050037e-07, "loss": 0.4635, "step": 29785 }, { "epoch": 0.5177562620591354, "grad_norm": 1.6011558140735378, "learning_rate": 4.955559819731046e-07, "loss": 0.3284, "step": 29786 }, { "epoch": 0.5177736445966382, "grad_norm": 1.4841049249830864, "learning_rate": 4.955278337552909e-07, "loss": 0.2692, "step": 29787 }, { "epoch": 0.517791027134141, "grad_norm": 3.058448179237576, "learning_rate": 4.954996855516519e-07, "loss": 0.1829, "step": 29788 }, { "epoch": 0.5178084096716439, "grad_norm": 2.231742826106714, "learning_rate": 4.954715373622769e-07, "loss": 0.2241, "step": 29789 }, { "epoch": 0.5178257922091467, "grad_norm": 2.573498256796508, "learning_rate": 4.954433891872549e-07, "loss": 0.2464, "step": 29790 }, { "epoch": 0.5178431747466495, "grad_norm": 1.8067091582431742, "learning_rate": 4.954152410266754e-07, "loss": 0.2273, "step": 29791 }, { "epoch": 0.5178605572841524, "grad_norm": 1.6434770828884757, "learning_rate": 4.953870928806275e-07, "loss": 0.1619, "step": 29792 }, { "epoch": 0.5178779398216552, "grad_norm": 1.616291535612361, "learning_rate": 4.953589447492003e-07, "loss": 0.2263, "step": 29793 }, { "epoch": 0.517895322359158, "grad_norm": 2.748735428652398, "learning_rate": 4.953307966324831e-07, "loss": 0.2246, "step": 29794 }, { "epoch": 0.5179127048966609, "grad_norm": 1.4485423809003848, "learning_rate": 4.953026485305652e-07, "loss": 0.2681, "step": 29795 }, { "epoch": 0.5179300874341637, "grad_norm": 1.6560854657168464, "learning_rate": 4.952745004435355e-07, "loss": 0.2104, "step": 29796 }, { "epoch": 0.5179474699716665, "grad_norm": 1.4771270607292801, "learning_rate": 4.952463523714838e-07, "loss": 0.174, "step": 29797 }, { "epoch": 0.5179648525091692, "grad_norm": 1.0260691432415965, "learning_rate": 4.95218204314499e-07, "loss": 0.3031, "step": 29798 }, { "epoch": 0.5179822350466721, "grad_norm": 1.3936537416351134, "learning_rate": 4.9519005627267e-07, "loss": 0.3196, "step": 29799 }, { "epoch": 0.5179996175841749, "grad_norm": 1.5505428121465092, "learning_rate": 4.951619082460865e-07, "loss": 0.2541, "step": 29800 }, { "epoch": 0.5180170001216777, "grad_norm": 2.82907309085585, "learning_rate": 4.951337602348376e-07, "loss": 0.2894, "step": 29801 }, { "epoch": 0.5180343826591806, "grad_norm": 1.2571094443733337, "learning_rate": 4.951056122390122e-07, "loss": 0.2451, "step": 29802 }, { "epoch": 0.5180517651966834, "grad_norm": 2.9769419610843446, "learning_rate": 4.950774642587e-07, "loss": 0.2641, "step": 29803 }, { "epoch": 0.5180691477341862, "grad_norm": 0.9358236733018261, "learning_rate": 4.950493162939898e-07, "loss": 0.2399, "step": 29804 }, { "epoch": 0.518086530271689, "grad_norm": 1.838780483533742, "learning_rate": 4.950211683449712e-07, "loss": 0.2601, "step": 29805 }, { "epoch": 0.5181039128091919, "grad_norm": 2.433585032072876, "learning_rate": 4.94993020411733e-07, "loss": 0.1992, "step": 29806 }, { "epoch": 0.5181212953466947, "grad_norm": 0.8784027665857697, "learning_rate": 4.949648724943646e-07, "loss": 0.1631, "step": 29807 }, { "epoch": 0.5181386778841975, "grad_norm": 1.1201652350847209, "learning_rate": 4.949367245929554e-07, "loss": 0.2264, "step": 29808 }, { "epoch": 0.5181560604217004, "grad_norm": 1.7475298450437533, "learning_rate": 4.949085767075945e-07, "loss": 0.1958, "step": 29809 }, { "epoch": 0.5181734429592032, "grad_norm": 1.7620796018200091, "learning_rate": 4.948804288383708e-07, "loss": 0.2274, "step": 29810 }, { "epoch": 0.518190825496706, "grad_norm": 1.4139376075308834, "learning_rate": 4.948522809853741e-07, "loss": 0.2626, "step": 29811 }, { "epoch": 0.5182082080342089, "grad_norm": 1.2014277595965313, "learning_rate": 4.948241331486932e-07, "loss": 0.1653, "step": 29812 }, { "epoch": 0.5182255905717117, "grad_norm": 2.842824367202928, "learning_rate": 4.947959853284173e-07, "loss": 0.2419, "step": 29813 }, { "epoch": 0.5182429731092145, "grad_norm": 1.5701425248162815, "learning_rate": 4.947678375246358e-07, "loss": 0.2179, "step": 29814 }, { "epoch": 0.5182603556467174, "grad_norm": 1.7539588755732916, "learning_rate": 4.947396897374379e-07, "loss": 0.2354, "step": 29815 }, { "epoch": 0.5182777381842202, "grad_norm": 3.375772580372392, "learning_rate": 4.947115419669127e-07, "loss": 0.263, "step": 29816 }, { "epoch": 0.5182951207217229, "grad_norm": 1.5995753343108274, "learning_rate": 4.946833942131497e-07, "loss": 0.2922, "step": 29817 }, { "epoch": 0.5183125032592257, "grad_norm": 1.2261167943573303, "learning_rate": 4.946552464762378e-07, "loss": 0.1937, "step": 29818 }, { "epoch": 0.5183298857967286, "grad_norm": 3.0815949810977536, "learning_rate": 4.946270987562661e-07, "loss": 0.2865, "step": 29819 }, { "epoch": 0.5183472683342314, "grad_norm": 1.6414477741058173, "learning_rate": 4.945989510533241e-07, "loss": 0.2693, "step": 29820 }, { "epoch": 0.5183646508717342, "grad_norm": 1.8085528705319485, "learning_rate": 4.94570803367501e-07, "loss": 0.3103, "step": 29821 }, { "epoch": 0.518382033409237, "grad_norm": 1.883545667968279, "learning_rate": 4.94542655698886e-07, "loss": 0.3531, "step": 29822 }, { "epoch": 0.5183994159467399, "grad_norm": 1.6526838388735448, "learning_rate": 4.945145080475681e-07, "loss": 0.2173, "step": 29823 }, { "epoch": 0.5184167984842427, "grad_norm": 2.784617355543173, "learning_rate": 4.944863604136372e-07, "loss": 0.4119, "step": 29824 }, { "epoch": 0.5184341810217455, "grad_norm": 1.4158364666741174, "learning_rate": 4.944582127971815e-07, "loss": 0.302, "step": 29825 }, { "epoch": 0.5184515635592484, "grad_norm": 1.4890780472006266, "learning_rate": 4.944300651982908e-07, "loss": 0.263, "step": 29826 }, { "epoch": 0.5184689460967512, "grad_norm": 2.3083900255173955, "learning_rate": 4.944019176170542e-07, "loss": 0.2252, "step": 29827 }, { "epoch": 0.518486328634254, "grad_norm": 1.7010186389103952, "learning_rate": 4.943737700535611e-07, "loss": 0.191, "step": 29828 }, { "epoch": 0.5185037111717569, "grad_norm": 2.0286145444952366, "learning_rate": 4.943456225079004e-07, "loss": 0.3982, "step": 29829 }, { "epoch": 0.5185210937092597, "grad_norm": 2.1736521792715666, "learning_rate": 4.943174749801617e-07, "loss": 0.2836, "step": 29830 }, { "epoch": 0.5185384762467625, "grad_norm": 1.866336686810521, "learning_rate": 4.94289327470434e-07, "loss": 0.1769, "step": 29831 }, { "epoch": 0.5185558587842654, "grad_norm": 1.2823624615060436, "learning_rate": 4.942611799788064e-07, "loss": 0.2741, "step": 29832 }, { "epoch": 0.5185732413217682, "grad_norm": 1.0676777857792015, "learning_rate": 4.942330325053681e-07, "loss": 0.1988, "step": 29833 }, { "epoch": 0.518590623859271, "grad_norm": 1.7031210602561364, "learning_rate": 4.942048850502086e-07, "loss": 0.3169, "step": 29834 }, { "epoch": 0.5186080063967738, "grad_norm": 2.2572740687752977, "learning_rate": 4.941767376134168e-07, "loss": 0.3134, "step": 29835 }, { "epoch": 0.5186253889342767, "grad_norm": 1.691374530821161, "learning_rate": 4.941485901950823e-07, "loss": 0.3157, "step": 29836 }, { "epoch": 0.5186427714717794, "grad_norm": 3.4805194536813446, "learning_rate": 4.941204427952939e-07, "loss": 0.3261, "step": 29837 }, { "epoch": 0.5186601540092822, "grad_norm": 1.3826334401401983, "learning_rate": 4.94092295414141e-07, "loss": 0.3172, "step": 29838 }, { "epoch": 0.5186775365467851, "grad_norm": 2.1755271537958087, "learning_rate": 4.940641480517129e-07, "loss": 0.2746, "step": 29839 }, { "epoch": 0.5186949190842879, "grad_norm": 1.4692441158886624, "learning_rate": 4.940360007080986e-07, "loss": 0.2474, "step": 29840 }, { "epoch": 0.5187123016217907, "grad_norm": 1.477392082075724, "learning_rate": 4.940078533833875e-07, "loss": 0.1842, "step": 29841 }, { "epoch": 0.5187296841592935, "grad_norm": 1.302036525383935, "learning_rate": 4.939797060776688e-07, "loss": 0.2687, "step": 29842 }, { "epoch": 0.5187470666967964, "grad_norm": 1.5899578830379615, "learning_rate": 4.939515587910318e-07, "loss": 0.3434, "step": 29843 }, { "epoch": 0.5187644492342992, "grad_norm": 2.0376640321719233, "learning_rate": 4.939234115235653e-07, "loss": 0.4541, "step": 29844 }, { "epoch": 0.518781831771802, "grad_norm": 1.3699246774820502, "learning_rate": 4.938952642753589e-07, "loss": 0.2481, "step": 29845 }, { "epoch": 0.5187992143093049, "grad_norm": 1.8885066357910347, "learning_rate": 4.938671170465017e-07, "loss": 0.285, "step": 29846 }, { "epoch": 0.5188165968468077, "grad_norm": 1.0692952803098035, "learning_rate": 4.938389698370829e-07, "loss": 0.1876, "step": 29847 }, { "epoch": 0.5188339793843105, "grad_norm": 1.7984589645647469, "learning_rate": 4.938108226471917e-07, "loss": 0.2977, "step": 29848 }, { "epoch": 0.5188513619218134, "grad_norm": 1.1311348342893097, "learning_rate": 4.937826754769178e-07, "loss": 0.1595, "step": 29849 }, { "epoch": 0.5188687444593162, "grad_norm": 1.4162916457828694, "learning_rate": 4.937545283263494e-07, "loss": 0.2119, "step": 29850 }, { "epoch": 0.518886126996819, "grad_norm": 1.5706513468385939, "learning_rate": 4.937263811955765e-07, "loss": 0.2538, "step": 29851 }, { "epoch": 0.5189035095343218, "grad_norm": 1.523729336506184, "learning_rate": 4.936982340846879e-07, "loss": 0.3008, "step": 29852 }, { "epoch": 0.5189208920718247, "grad_norm": 1.301232395991512, "learning_rate": 4.936700869937733e-07, "loss": 0.2298, "step": 29853 }, { "epoch": 0.5189382746093275, "grad_norm": 1.560602501597447, "learning_rate": 4.936419399229214e-07, "loss": 0.1906, "step": 29854 }, { "epoch": 0.5189556571468303, "grad_norm": 2.3580763948317793, "learning_rate": 4.936137928722218e-07, "loss": 0.3326, "step": 29855 }, { "epoch": 0.5189730396843332, "grad_norm": 1.5721424716052617, "learning_rate": 4.935856458417634e-07, "loss": 0.3216, "step": 29856 }, { "epoch": 0.5189904222218359, "grad_norm": 4.258259841011677, "learning_rate": 4.935574988316357e-07, "loss": 0.1954, "step": 29857 }, { "epoch": 0.5190078047593387, "grad_norm": 1.3603579207897083, "learning_rate": 4.935293518419275e-07, "loss": 0.2801, "step": 29858 }, { "epoch": 0.5190251872968416, "grad_norm": 1.0622463967405837, "learning_rate": 4.935012048727285e-07, "loss": 0.2248, "step": 29859 }, { "epoch": 0.5190425698343444, "grad_norm": 1.3221063680666667, "learning_rate": 4.934730579241276e-07, "loss": 0.3541, "step": 29860 }, { "epoch": 0.5190599523718472, "grad_norm": 1.6373619355266036, "learning_rate": 4.934449109962142e-07, "loss": 0.2864, "step": 29861 }, { "epoch": 0.51907733490935, "grad_norm": 1.0902466406922033, "learning_rate": 4.934167640890774e-07, "loss": 0.1862, "step": 29862 }, { "epoch": 0.5190947174468529, "grad_norm": 1.9266351900742573, "learning_rate": 4.933886172028064e-07, "loss": 0.2926, "step": 29863 }, { "epoch": 0.5191120999843557, "grad_norm": 1.6747590584635457, "learning_rate": 4.933604703374905e-07, "loss": 0.1725, "step": 29864 }, { "epoch": 0.5191294825218585, "grad_norm": 2.029239864462784, "learning_rate": 4.933323234932189e-07, "loss": 0.4689, "step": 29865 }, { "epoch": 0.5191468650593614, "grad_norm": 2.0768024727522856, "learning_rate": 4.933041766700805e-07, "loss": 0.3715, "step": 29866 }, { "epoch": 0.5191642475968642, "grad_norm": 1.768807269805599, "learning_rate": 4.93276029868165e-07, "loss": 0.2546, "step": 29867 }, { "epoch": 0.519181630134367, "grad_norm": 1.8500034637388003, "learning_rate": 4.932478830875616e-07, "loss": 0.2614, "step": 29868 }, { "epoch": 0.5191990126718699, "grad_norm": 1.3477213400120291, "learning_rate": 4.932197363283589e-07, "loss": 0.2237, "step": 29869 }, { "epoch": 0.5192163952093727, "grad_norm": 1.9596656697588721, "learning_rate": 4.931915895906467e-07, "loss": 0.1952, "step": 29870 }, { "epoch": 0.5192337777468755, "grad_norm": 1.5990856111103229, "learning_rate": 4.93163442874514e-07, "loss": 0.3198, "step": 29871 }, { "epoch": 0.5192511602843783, "grad_norm": 1.4888305796257097, "learning_rate": 4.931352961800501e-07, "loss": 0.2556, "step": 29872 }, { "epoch": 0.5192685428218812, "grad_norm": 1.4130181874031567, "learning_rate": 4.931071495073441e-07, "loss": 0.2024, "step": 29873 }, { "epoch": 0.519285925359384, "grad_norm": 2.013596732815162, "learning_rate": 4.930790028564855e-07, "loss": 0.2339, "step": 29874 }, { "epoch": 0.5193033078968868, "grad_norm": 1.589513545664862, "learning_rate": 4.930508562275629e-07, "loss": 0.1655, "step": 29875 }, { "epoch": 0.5193206904343897, "grad_norm": 1.2542169468674278, "learning_rate": 4.930227096206661e-07, "loss": 0.2793, "step": 29876 }, { "epoch": 0.5193380729718924, "grad_norm": 1.2747913496273104, "learning_rate": 4.929945630358841e-07, "loss": 0.2836, "step": 29877 }, { "epoch": 0.5193554555093952, "grad_norm": 1.89826967694371, "learning_rate": 4.929664164733062e-07, "loss": 0.2054, "step": 29878 }, { "epoch": 0.519372838046898, "grad_norm": 1.241366742422425, "learning_rate": 4.929382699330213e-07, "loss": 0.3137, "step": 29879 }, { "epoch": 0.5193902205844009, "grad_norm": 1.7434128673836722, "learning_rate": 4.929101234151192e-07, "loss": 0.2177, "step": 29880 }, { "epoch": 0.5194076031219037, "grad_norm": 2.077215336300041, "learning_rate": 4.928819769196884e-07, "loss": 0.3218, "step": 29881 }, { "epoch": 0.5194249856594065, "grad_norm": 2.51155189143951, "learning_rate": 4.928538304468187e-07, "loss": 0.3842, "step": 29882 }, { "epoch": 0.5194423681969094, "grad_norm": 0.9989850250070431, "learning_rate": 4.928256839965988e-07, "loss": 0.2303, "step": 29883 }, { "epoch": 0.5194597507344122, "grad_norm": 2.1054783346372803, "learning_rate": 4.927975375691184e-07, "loss": 0.2667, "step": 29884 }, { "epoch": 0.519477133271915, "grad_norm": 1.4891482292112033, "learning_rate": 4.927693911644664e-07, "loss": 0.2251, "step": 29885 }, { "epoch": 0.5194945158094179, "grad_norm": 1.4796559581195792, "learning_rate": 4.927412447827321e-07, "loss": 0.2228, "step": 29886 }, { "epoch": 0.5195118983469207, "grad_norm": 1.9529049857189922, "learning_rate": 4.927130984240049e-07, "loss": 0.2689, "step": 29887 }, { "epoch": 0.5195292808844235, "grad_norm": 2.1217434582025394, "learning_rate": 4.926849520883736e-07, "loss": 0.2637, "step": 29888 }, { "epoch": 0.5195466634219263, "grad_norm": 1.6492917903634663, "learning_rate": 4.926568057759278e-07, "loss": 0.2597, "step": 29889 }, { "epoch": 0.5195640459594292, "grad_norm": 1.3961571450910222, "learning_rate": 4.926286594867565e-07, "loss": 0.3114, "step": 29890 }, { "epoch": 0.519581428496932, "grad_norm": 1.0952996618382387, "learning_rate": 4.926005132209489e-07, "loss": 0.3481, "step": 29891 }, { "epoch": 0.5195988110344348, "grad_norm": 2.0584829761268066, "learning_rate": 4.925723669785944e-07, "loss": 0.4672, "step": 29892 }, { "epoch": 0.5196161935719377, "grad_norm": 2.4030541885340413, "learning_rate": 4.925442207597821e-07, "loss": 0.3496, "step": 29893 }, { "epoch": 0.5196335761094405, "grad_norm": 3.5833528098989773, "learning_rate": 4.92516074564601e-07, "loss": 0.4206, "step": 29894 }, { "epoch": 0.5196509586469433, "grad_norm": 1.634728858897118, "learning_rate": 4.924879283931407e-07, "loss": 0.1998, "step": 29895 }, { "epoch": 0.5196683411844462, "grad_norm": 1.3712878270136737, "learning_rate": 4.924597822454901e-07, "loss": 0.3536, "step": 29896 }, { "epoch": 0.5196857237219489, "grad_norm": 1.3581774476900454, "learning_rate": 4.924316361217386e-07, "loss": 0.2473, "step": 29897 }, { "epoch": 0.5197031062594517, "grad_norm": 1.1885487955349334, "learning_rate": 4.924034900219753e-07, "loss": 0.1892, "step": 29898 }, { "epoch": 0.5197204887969545, "grad_norm": 1.163120885852752, "learning_rate": 4.923753439462895e-07, "loss": 0.2088, "step": 29899 }, { "epoch": 0.5197378713344574, "grad_norm": 1.7898587458965713, "learning_rate": 4.923471978947703e-07, "loss": 0.3211, "step": 29900 }, { "epoch": 0.5197552538719602, "grad_norm": 1.9249641715007477, "learning_rate": 4.92319051867507e-07, "loss": 0.2442, "step": 29901 }, { "epoch": 0.519772636409463, "grad_norm": 1.2099125995431639, "learning_rate": 4.922909058645886e-07, "loss": 0.1419, "step": 29902 }, { "epoch": 0.5197900189469659, "grad_norm": 2.0211262484345633, "learning_rate": 4.922627598861047e-07, "loss": 0.322, "step": 29903 }, { "epoch": 0.5198074014844687, "grad_norm": 1.7879388299237868, "learning_rate": 4.922346139321442e-07, "loss": 0.3032, "step": 29904 }, { "epoch": 0.5198247840219715, "grad_norm": 1.0094772017753406, "learning_rate": 4.922064680027965e-07, "loss": 0.1517, "step": 29905 }, { "epoch": 0.5198421665594744, "grad_norm": 0.9592044582433034, "learning_rate": 4.921783220981506e-07, "loss": 0.1995, "step": 29906 }, { "epoch": 0.5198595490969772, "grad_norm": 2.588133450812853, "learning_rate": 4.92150176218296e-07, "loss": 0.2732, "step": 29907 }, { "epoch": 0.51987693163448, "grad_norm": 1.3593301203108175, "learning_rate": 4.921220303633215e-07, "loss": 0.2837, "step": 29908 }, { "epoch": 0.5198943141719828, "grad_norm": 2.453609762378592, "learning_rate": 4.920938845333168e-07, "loss": 0.2408, "step": 29909 }, { "epoch": 0.5199116967094857, "grad_norm": 5.3480543598685, "learning_rate": 4.920657387283705e-07, "loss": 0.3104, "step": 29910 }, { "epoch": 0.5199290792469885, "grad_norm": 2.1729410183158597, "learning_rate": 4.920375929485725e-07, "loss": 0.1424, "step": 29911 }, { "epoch": 0.5199464617844913, "grad_norm": 2.079631108522927, "learning_rate": 4.920094471940117e-07, "loss": 0.1946, "step": 29912 }, { "epoch": 0.5199638443219942, "grad_norm": 1.725648462145349, "learning_rate": 4.919813014647771e-07, "loss": 0.2231, "step": 29913 }, { "epoch": 0.519981226859497, "grad_norm": 1.1747707802118552, "learning_rate": 4.919531557609581e-07, "loss": 0.207, "step": 29914 }, { "epoch": 0.5199986093969998, "grad_norm": 2.2929817624233015, "learning_rate": 4.919250100826439e-07, "loss": 0.2972, "step": 29915 }, { "epoch": 0.5200159919345027, "grad_norm": 1.1506809818765487, "learning_rate": 4.918968644299237e-07, "loss": 0.1507, "step": 29916 }, { "epoch": 0.5200333744720054, "grad_norm": 1.3352501728653496, "learning_rate": 4.918687188028868e-07, "loss": 0.2079, "step": 29917 }, { "epoch": 0.5200507570095082, "grad_norm": 1.3304048130402713, "learning_rate": 4.918405732016223e-07, "loss": 0.2065, "step": 29918 }, { "epoch": 0.520068139547011, "grad_norm": 1.2597676769714605, "learning_rate": 4.918124276262194e-07, "loss": 0.2831, "step": 29919 }, { "epoch": 0.5200855220845139, "grad_norm": 2.2250329506014332, "learning_rate": 4.917842820767674e-07, "loss": 0.1799, "step": 29920 }, { "epoch": 0.5201029046220167, "grad_norm": 1.4416416533793004, "learning_rate": 4.917561365533554e-07, "loss": 0.1908, "step": 29921 }, { "epoch": 0.5201202871595195, "grad_norm": 0.95387896578251, "learning_rate": 4.917279910560726e-07, "loss": 0.1893, "step": 29922 }, { "epoch": 0.5201376696970224, "grad_norm": 1.7772074073749307, "learning_rate": 4.916998455850083e-07, "loss": 0.1677, "step": 29923 }, { "epoch": 0.5201550522345252, "grad_norm": 2.2725723248055196, "learning_rate": 4.916717001402516e-07, "loss": 0.3362, "step": 29924 }, { "epoch": 0.520172434772028, "grad_norm": 6.31538826460168, "learning_rate": 4.916435547218921e-07, "loss": 0.4128, "step": 29925 }, { "epoch": 0.5201898173095308, "grad_norm": 1.922903838170712, "learning_rate": 4.916154093300185e-07, "loss": 0.1866, "step": 29926 }, { "epoch": 0.5202071998470337, "grad_norm": 1.6103210693297043, "learning_rate": 4.915872639647202e-07, "loss": 0.2235, "step": 29927 }, { "epoch": 0.5202245823845365, "grad_norm": 1.8489221939814018, "learning_rate": 4.915591186260864e-07, "loss": 0.3123, "step": 29928 }, { "epoch": 0.5202419649220393, "grad_norm": 1.636777820371478, "learning_rate": 4.915309733142063e-07, "loss": 0.2116, "step": 29929 }, { "epoch": 0.5202593474595422, "grad_norm": 1.6852466783773514, "learning_rate": 4.915028280291691e-07, "loss": 0.1951, "step": 29930 }, { "epoch": 0.520276729997045, "grad_norm": 2.6500871222442046, "learning_rate": 4.914746827710642e-07, "loss": 0.2872, "step": 29931 }, { "epoch": 0.5202941125345478, "grad_norm": 2.1851967473815375, "learning_rate": 4.914465375399805e-07, "loss": 0.3883, "step": 29932 }, { "epoch": 0.5203114950720507, "grad_norm": 1.2870132521451587, "learning_rate": 4.914183923360073e-07, "loss": 0.2306, "step": 29933 }, { "epoch": 0.5203288776095535, "grad_norm": 4.079479619530599, "learning_rate": 4.913902471592339e-07, "loss": 0.2067, "step": 29934 }, { "epoch": 0.5203462601470563, "grad_norm": 1.190747957983211, "learning_rate": 4.913621020097494e-07, "loss": 0.2718, "step": 29935 }, { "epoch": 0.5203636426845591, "grad_norm": 2.193586163973363, "learning_rate": 4.913339568876432e-07, "loss": 0.1643, "step": 29936 }, { "epoch": 0.5203810252220619, "grad_norm": 0.9279434540442854, "learning_rate": 4.913058117930045e-07, "loss": 0.2957, "step": 29937 }, { "epoch": 0.5203984077595647, "grad_norm": 1.7792364353214367, "learning_rate": 4.912776667259222e-07, "loss": 0.3388, "step": 29938 }, { "epoch": 0.5204157902970675, "grad_norm": 1.686097395737698, "learning_rate": 4.912495216864856e-07, "loss": 0.3333, "step": 29939 }, { "epoch": 0.5204331728345704, "grad_norm": 1.4186827851112853, "learning_rate": 4.912213766747841e-07, "loss": 0.251, "step": 29940 }, { "epoch": 0.5204505553720732, "grad_norm": 3.4689771601326505, "learning_rate": 4.911932316909067e-07, "loss": 0.4125, "step": 29941 }, { "epoch": 0.520467937909576, "grad_norm": 2.3107111011423407, "learning_rate": 4.911650867349428e-07, "loss": 0.3032, "step": 29942 }, { "epoch": 0.5204853204470788, "grad_norm": 1.8556507162932125, "learning_rate": 4.911369418069814e-07, "loss": 0.2795, "step": 29943 }, { "epoch": 0.5205027029845817, "grad_norm": 1.6471283515437647, "learning_rate": 4.911087969071121e-07, "loss": 0.1614, "step": 29944 }, { "epoch": 0.5205200855220845, "grad_norm": 1.7051937067724157, "learning_rate": 4.910806520354237e-07, "loss": 0.236, "step": 29945 }, { "epoch": 0.5205374680595873, "grad_norm": 2.1696818246283582, "learning_rate": 4.910525071920056e-07, "loss": 0.2723, "step": 29946 }, { "epoch": 0.5205548505970902, "grad_norm": 2.251002555481321, "learning_rate": 4.910243623769468e-07, "loss": 0.2363, "step": 29947 }, { "epoch": 0.520572233134593, "grad_norm": 1.080478311690457, "learning_rate": 4.909962175903366e-07, "loss": 0.2555, "step": 29948 }, { "epoch": 0.5205896156720958, "grad_norm": 1.4870911815192696, "learning_rate": 4.909680728322643e-07, "loss": 0.1322, "step": 29949 }, { "epoch": 0.5206069982095987, "grad_norm": 1.0563339383206456, "learning_rate": 4.909399281028193e-07, "loss": 0.1555, "step": 29950 }, { "epoch": 0.5206243807471015, "grad_norm": 2.174726107426654, "learning_rate": 4.909117834020904e-07, "loss": 0.2644, "step": 29951 }, { "epoch": 0.5206417632846043, "grad_norm": 1.01042922359324, "learning_rate": 4.908836387301668e-07, "loss": 0.1284, "step": 29952 }, { "epoch": 0.5206591458221072, "grad_norm": 1.2916013351934927, "learning_rate": 4.908554940871381e-07, "loss": 0.3157, "step": 29953 }, { "epoch": 0.52067652835961, "grad_norm": 1.771683491635701, "learning_rate": 4.908273494730932e-07, "loss": 0.243, "step": 29954 }, { "epoch": 0.5206939108971128, "grad_norm": 1.7282288631159488, "learning_rate": 4.907992048881213e-07, "loss": 0.4781, "step": 29955 }, { "epoch": 0.5207112934346155, "grad_norm": 1.614987628974801, "learning_rate": 4.90771060332312e-07, "loss": 0.3244, "step": 29956 }, { "epoch": 0.5207286759721184, "grad_norm": 1.6600528883721526, "learning_rate": 4.90742915805754e-07, "loss": 0.238, "step": 29957 }, { "epoch": 0.5207460585096212, "grad_norm": 1.3925672021929638, "learning_rate": 4.907147713085366e-07, "loss": 0.1714, "step": 29958 }, { "epoch": 0.520763441047124, "grad_norm": 1.4488072796658216, "learning_rate": 4.906866268407492e-07, "loss": 0.2929, "step": 29959 }, { "epoch": 0.5207808235846269, "grad_norm": 2.112263441499259, "learning_rate": 4.906584824024809e-07, "loss": 0.3098, "step": 29960 }, { "epoch": 0.5207982061221297, "grad_norm": 1.678581222665334, "learning_rate": 4.90630337993821e-07, "loss": 0.352, "step": 29961 }, { "epoch": 0.5208155886596325, "grad_norm": 3.8944600666774796, "learning_rate": 4.906021936148586e-07, "loss": 0.2789, "step": 29962 }, { "epoch": 0.5208329711971353, "grad_norm": 1.3104400301375596, "learning_rate": 4.90574049265683e-07, "loss": 0.1908, "step": 29963 }, { "epoch": 0.5208503537346382, "grad_norm": 1.123314563165706, "learning_rate": 4.905459049463831e-07, "loss": 0.1777, "step": 29964 }, { "epoch": 0.520867736272141, "grad_norm": 2.2380984730705937, "learning_rate": 4.905177606570485e-07, "loss": 0.3253, "step": 29965 }, { "epoch": 0.5208851188096438, "grad_norm": 1.6367472804829069, "learning_rate": 4.904896163977682e-07, "loss": 0.2057, "step": 29966 }, { "epoch": 0.5209025013471467, "grad_norm": 1.2387077589295916, "learning_rate": 4.904614721686315e-07, "loss": 0.1707, "step": 29967 }, { "epoch": 0.5209198838846495, "grad_norm": 1.4414541413605833, "learning_rate": 4.904333279697273e-07, "loss": 0.2335, "step": 29968 }, { "epoch": 0.5209372664221523, "grad_norm": 1.8658144342530776, "learning_rate": 4.904051838011456e-07, "loss": 0.4231, "step": 29969 }, { "epoch": 0.5209546489596552, "grad_norm": 1.9208361193614345, "learning_rate": 4.903770396629746e-07, "loss": 0.3139, "step": 29970 }, { "epoch": 0.520972031497158, "grad_norm": 3.0558724071067846, "learning_rate": 4.903488955553041e-07, "loss": 0.38, "step": 29971 }, { "epoch": 0.5209894140346608, "grad_norm": 2.0358352726654076, "learning_rate": 4.903207514782231e-07, "loss": 0.2674, "step": 29972 }, { "epoch": 0.5210067965721636, "grad_norm": 1.3371585844381446, "learning_rate": 4.90292607431821e-07, "loss": 0.2332, "step": 29973 }, { "epoch": 0.5210241791096665, "grad_norm": 1.6638538982964757, "learning_rate": 4.902644634161866e-07, "loss": 0.2113, "step": 29974 }, { "epoch": 0.5210415616471693, "grad_norm": 1.973223314653504, "learning_rate": 4.902363194314098e-07, "loss": 0.4137, "step": 29975 }, { "epoch": 0.521058944184672, "grad_norm": 1.7164335124731604, "learning_rate": 4.902081754775792e-07, "loss": 0.1948, "step": 29976 }, { "epoch": 0.5210763267221749, "grad_norm": 1.5260578869839618, "learning_rate": 4.901800315547842e-07, "loss": 0.1962, "step": 29977 }, { "epoch": 0.5210937092596777, "grad_norm": 1.640114102563022, "learning_rate": 4.901518876631139e-07, "loss": 0.2303, "step": 29978 }, { "epoch": 0.5211110917971805, "grad_norm": 1.5883130891454542, "learning_rate": 4.901237438026575e-07, "loss": 0.3703, "step": 29979 }, { "epoch": 0.5211284743346833, "grad_norm": 1.9958199933494463, "learning_rate": 4.900955999735044e-07, "loss": 0.3289, "step": 29980 }, { "epoch": 0.5211458568721862, "grad_norm": 1.7563012977048935, "learning_rate": 4.900674561757437e-07, "loss": 0.201, "step": 29981 }, { "epoch": 0.521163239409689, "grad_norm": 1.7275776337163335, "learning_rate": 4.900393124094648e-07, "loss": 0.3679, "step": 29982 }, { "epoch": 0.5211806219471918, "grad_norm": 1.4706419410963294, "learning_rate": 4.900111686747564e-07, "loss": 0.1424, "step": 29983 }, { "epoch": 0.5211980044846947, "grad_norm": 1.619566508356927, "learning_rate": 4.899830249717083e-07, "loss": 0.2604, "step": 29984 }, { "epoch": 0.5212153870221975, "grad_norm": 2.145392752223792, "learning_rate": 4.899548813004091e-07, "loss": 0.1845, "step": 29985 }, { "epoch": 0.5212327695597003, "grad_norm": 2.3180469603145815, "learning_rate": 4.899267376609485e-07, "loss": 0.3181, "step": 29986 }, { "epoch": 0.5212501520972032, "grad_norm": 3.172622605365396, "learning_rate": 4.898985940534155e-07, "loss": 0.2813, "step": 29987 }, { "epoch": 0.521267534634706, "grad_norm": 3.441327949486402, "learning_rate": 4.898704504778994e-07, "loss": 0.3775, "step": 29988 }, { "epoch": 0.5212849171722088, "grad_norm": 4.0944355237553856, "learning_rate": 4.898423069344891e-07, "loss": 0.4605, "step": 29989 }, { "epoch": 0.5213022997097116, "grad_norm": 3.2797892997362603, "learning_rate": 4.898141634232742e-07, "loss": 0.2029, "step": 29990 }, { "epoch": 0.5213196822472145, "grad_norm": 1.4403775791651, "learning_rate": 4.897860199443435e-07, "loss": 0.3027, "step": 29991 }, { "epoch": 0.5213370647847173, "grad_norm": 1.546930955850731, "learning_rate": 4.897578764977867e-07, "loss": 0.2367, "step": 29992 }, { "epoch": 0.5213544473222201, "grad_norm": 1.9828091067247322, "learning_rate": 4.897297330836925e-07, "loss": 0.1824, "step": 29993 }, { "epoch": 0.521371829859723, "grad_norm": 2.364793865346775, "learning_rate": 4.897015897021506e-07, "loss": 0.226, "step": 29994 }, { "epoch": 0.5213892123972258, "grad_norm": 1.3446065533451084, "learning_rate": 4.896734463532498e-07, "loss": 0.2768, "step": 29995 }, { "epoch": 0.5214065949347285, "grad_norm": 1.980743490310943, "learning_rate": 4.896453030370793e-07, "loss": 0.468, "step": 29996 }, { "epoch": 0.5214239774722313, "grad_norm": 2.628615444232748, "learning_rate": 4.896171597537285e-07, "loss": 0.1695, "step": 29997 }, { "epoch": 0.5214413600097342, "grad_norm": 1.4917444029035463, "learning_rate": 4.895890165032866e-07, "loss": 0.231, "step": 29998 }, { "epoch": 0.521458742547237, "grad_norm": 2.392493886258286, "learning_rate": 4.895608732858426e-07, "loss": 0.2647, "step": 29999 }, { "epoch": 0.5214761250847398, "grad_norm": 1.2585055785712174, "learning_rate": 4.895327301014861e-07, "loss": 0.2552, "step": 30000 }, { "epoch": 0.5214935076222427, "grad_norm": 1.9882579958154252, "learning_rate": 4.895045869503058e-07, "loss": 0.4416, "step": 30001 }, { "epoch": 0.5215108901597455, "grad_norm": 1.4557210637801434, "learning_rate": 4.894764438323913e-07, "loss": 0.274, "step": 30002 }, { "epoch": 0.5215282726972483, "grad_norm": 1.9619688519618181, "learning_rate": 4.894483007478314e-07, "loss": 0.2706, "step": 30003 }, { "epoch": 0.5215456552347512, "grad_norm": 1.651344535000265, "learning_rate": 4.894201576967157e-07, "loss": 0.249, "step": 30004 }, { "epoch": 0.521563037772254, "grad_norm": 1.4786837461860614, "learning_rate": 4.893920146791332e-07, "loss": 0.2485, "step": 30005 }, { "epoch": 0.5215804203097568, "grad_norm": 4.015342185688536, "learning_rate": 4.893638716951732e-07, "loss": 0.2134, "step": 30006 }, { "epoch": 0.5215978028472597, "grad_norm": 1.886628627107412, "learning_rate": 4.89335728744925e-07, "loss": 0.211, "step": 30007 }, { "epoch": 0.5216151853847625, "grad_norm": 1.1560236456516833, "learning_rate": 4.893075858284773e-07, "loss": 0.2475, "step": 30008 }, { "epoch": 0.5216325679222653, "grad_norm": 1.6846965257553967, "learning_rate": 4.892794429459198e-07, "loss": 0.1633, "step": 30009 }, { "epoch": 0.5216499504597681, "grad_norm": 1.4693921806689951, "learning_rate": 4.892513000973415e-07, "loss": 0.2593, "step": 30010 }, { "epoch": 0.521667332997271, "grad_norm": 1.3557683858538607, "learning_rate": 4.892231572828317e-07, "loss": 0.2754, "step": 30011 }, { "epoch": 0.5216847155347738, "grad_norm": 1.6796102368733417, "learning_rate": 4.891950145024796e-07, "loss": 0.3311, "step": 30012 }, { "epoch": 0.5217020980722766, "grad_norm": 3.734501678560659, "learning_rate": 4.891668717563743e-07, "loss": 0.2946, "step": 30013 }, { "epoch": 0.5217194806097795, "grad_norm": 1.9703669286471832, "learning_rate": 4.891387290446049e-07, "loss": 0.3939, "step": 30014 }, { "epoch": 0.5217368631472823, "grad_norm": 1.4637871008403698, "learning_rate": 4.891105863672609e-07, "loss": 0.3592, "step": 30015 }, { "epoch": 0.521754245684785, "grad_norm": 1.345007497704705, "learning_rate": 4.890824437244311e-07, "loss": 0.256, "step": 30016 }, { "epoch": 0.5217716282222878, "grad_norm": 2.8136337811362613, "learning_rate": 4.890543011162052e-07, "loss": 0.2207, "step": 30017 }, { "epoch": 0.5217890107597907, "grad_norm": 1.5362656881969, "learning_rate": 4.89026158542672e-07, "loss": 0.3583, "step": 30018 }, { "epoch": 0.5218063932972935, "grad_norm": 1.2704024257101305, "learning_rate": 4.889980160039211e-07, "loss": 0.2321, "step": 30019 }, { "epoch": 0.5218237758347963, "grad_norm": 0.9252023158003259, "learning_rate": 4.889698735000411e-07, "loss": 0.1686, "step": 30020 }, { "epoch": 0.5218411583722992, "grad_norm": 2.029407095837296, "learning_rate": 4.889417310311216e-07, "loss": 0.3604, "step": 30021 }, { "epoch": 0.521858540909802, "grad_norm": 3.1351983083563235, "learning_rate": 4.889135885972518e-07, "loss": 0.4979, "step": 30022 }, { "epoch": 0.5218759234473048, "grad_norm": 2.5374044977479766, "learning_rate": 4.888854461985208e-07, "loss": 0.3128, "step": 30023 }, { "epoch": 0.5218933059848077, "grad_norm": 1.9384315531937644, "learning_rate": 4.888573038350178e-07, "loss": 0.4306, "step": 30024 }, { "epoch": 0.5219106885223105, "grad_norm": 1.809158896391666, "learning_rate": 4.888291615068321e-07, "loss": 0.3216, "step": 30025 }, { "epoch": 0.5219280710598133, "grad_norm": 2.0733692349308726, "learning_rate": 4.888010192140529e-07, "loss": 0.2608, "step": 30026 }, { "epoch": 0.5219454535973161, "grad_norm": 2.8099109565381846, "learning_rate": 4.887728769567693e-07, "loss": 0.2754, "step": 30027 }, { "epoch": 0.521962836134819, "grad_norm": 0.9831061580301731, "learning_rate": 4.887447347350704e-07, "loss": 0.285, "step": 30028 }, { "epoch": 0.5219802186723218, "grad_norm": 1.3762678751132174, "learning_rate": 4.887165925490456e-07, "loss": 0.2763, "step": 30029 }, { "epoch": 0.5219976012098246, "grad_norm": 1.6173020571160102, "learning_rate": 4.886884503987839e-07, "loss": 0.1648, "step": 30030 }, { "epoch": 0.5220149837473275, "grad_norm": 1.6936298915450552, "learning_rate": 4.886603082843747e-07, "loss": 0.247, "step": 30031 }, { "epoch": 0.5220323662848303, "grad_norm": 3.1076739017760975, "learning_rate": 4.886321662059072e-07, "loss": 0.4092, "step": 30032 }, { "epoch": 0.5220497488223331, "grad_norm": 1.6162481913776783, "learning_rate": 4.886040241634704e-07, "loss": 0.1684, "step": 30033 }, { "epoch": 0.522067131359836, "grad_norm": 1.9916302433620034, "learning_rate": 4.885758821571537e-07, "loss": 0.3389, "step": 30034 }, { "epoch": 0.5220845138973388, "grad_norm": 1.9352771945439127, "learning_rate": 4.885477401870462e-07, "loss": 0.2835, "step": 30035 }, { "epoch": 0.5221018964348415, "grad_norm": 1.8930698839862503, "learning_rate": 4.885195982532369e-07, "loss": 0.4206, "step": 30036 }, { "epoch": 0.5221192789723443, "grad_norm": 1.661956212744097, "learning_rate": 4.884914563558154e-07, "loss": 0.2699, "step": 30037 }, { "epoch": 0.5221366615098472, "grad_norm": 1.614192887628996, "learning_rate": 4.884633144948708e-07, "loss": 0.2889, "step": 30038 }, { "epoch": 0.52215404404735, "grad_norm": 2.43452298555805, "learning_rate": 4.88435172670492e-07, "loss": 0.3793, "step": 30039 }, { "epoch": 0.5221714265848528, "grad_norm": 2.243481554260697, "learning_rate": 4.884070308827684e-07, "loss": 0.3297, "step": 30040 }, { "epoch": 0.5221888091223557, "grad_norm": 1.5831123848251103, "learning_rate": 4.883788891317891e-07, "loss": 0.2111, "step": 30041 }, { "epoch": 0.5222061916598585, "grad_norm": 2.0196866646445906, "learning_rate": 4.883507474176436e-07, "loss": 0.335, "step": 30042 }, { "epoch": 0.5222235741973613, "grad_norm": 4.194055044166824, "learning_rate": 4.883226057404207e-07, "loss": 0.4186, "step": 30043 }, { "epoch": 0.5222409567348641, "grad_norm": 2.0002798770255215, "learning_rate": 4.882944641002098e-07, "loss": 0.2782, "step": 30044 }, { "epoch": 0.522258339272367, "grad_norm": 1.3933043126741294, "learning_rate": 4.882663224971003e-07, "loss": 0.3945, "step": 30045 }, { "epoch": 0.5222757218098698, "grad_norm": 0.873311777106139, "learning_rate": 4.88238180931181e-07, "loss": 0.1245, "step": 30046 }, { "epoch": 0.5222931043473726, "grad_norm": 1.7709982322549778, "learning_rate": 4.882100394025412e-07, "loss": 0.2971, "step": 30047 }, { "epoch": 0.5223104868848755, "grad_norm": 1.648312719335049, "learning_rate": 4.881818979112701e-07, "loss": 0.371, "step": 30048 }, { "epoch": 0.5223278694223783, "grad_norm": 1.577687183966857, "learning_rate": 4.881537564574571e-07, "loss": 0.4237, "step": 30049 }, { "epoch": 0.5223452519598811, "grad_norm": 2.181176007302084, "learning_rate": 4.881256150411912e-07, "loss": 0.2989, "step": 30050 }, { "epoch": 0.522362634497384, "grad_norm": 1.5766096873536293, "learning_rate": 4.880974736625619e-07, "loss": 0.2956, "step": 30051 }, { "epoch": 0.5223800170348868, "grad_norm": 1.935590460898171, "learning_rate": 4.880693323216579e-07, "loss": 0.2593, "step": 30052 }, { "epoch": 0.5223973995723896, "grad_norm": 1.482445692862257, "learning_rate": 4.880411910185686e-07, "loss": 0.2257, "step": 30053 }, { "epoch": 0.5224147821098925, "grad_norm": 0.9368621332743765, "learning_rate": 4.880130497533832e-07, "loss": 0.2208, "step": 30054 }, { "epoch": 0.5224321646473953, "grad_norm": 1.969786376730547, "learning_rate": 4.87984908526191e-07, "loss": 0.4687, "step": 30055 }, { "epoch": 0.522449547184898, "grad_norm": 1.9856539815571197, "learning_rate": 4.879567673370811e-07, "loss": 0.2758, "step": 30056 }, { "epoch": 0.5224669297224008, "grad_norm": 0.9266046895345119, "learning_rate": 4.879286261861429e-07, "loss": 0.1958, "step": 30057 }, { "epoch": 0.5224843122599037, "grad_norm": 1.7874060342532416, "learning_rate": 4.879004850734653e-07, "loss": 0.2639, "step": 30058 }, { "epoch": 0.5225016947974065, "grad_norm": 0.926072518078289, "learning_rate": 4.878723439991374e-07, "loss": 0.1939, "step": 30059 }, { "epoch": 0.5225190773349093, "grad_norm": 1.3911607442222182, "learning_rate": 4.878442029632488e-07, "loss": 0.1776, "step": 30060 }, { "epoch": 0.5225364598724122, "grad_norm": 1.567013631572889, "learning_rate": 4.878160619658883e-07, "loss": 0.2918, "step": 30061 }, { "epoch": 0.522553842409915, "grad_norm": 1.5507320338566213, "learning_rate": 4.877879210071455e-07, "loss": 0.2212, "step": 30062 }, { "epoch": 0.5225712249474178, "grad_norm": 1.3178911546203242, "learning_rate": 4.877597800871092e-07, "loss": 0.1236, "step": 30063 }, { "epoch": 0.5225886074849206, "grad_norm": 1.6968697119873906, "learning_rate": 4.87731639205869e-07, "loss": 0.1757, "step": 30064 }, { "epoch": 0.5226059900224235, "grad_norm": 2.3261493959630437, "learning_rate": 4.877034983635138e-07, "loss": 0.2669, "step": 30065 }, { "epoch": 0.5226233725599263, "grad_norm": 1.3370376583422392, "learning_rate": 4.876753575601328e-07, "loss": 0.2909, "step": 30066 }, { "epoch": 0.5226407550974291, "grad_norm": 1.5589334788531697, "learning_rate": 4.876472167958153e-07, "loss": 0.3395, "step": 30067 }, { "epoch": 0.522658137634932, "grad_norm": 1.5463040179473504, "learning_rate": 4.876190760706504e-07, "loss": 0.2478, "step": 30068 }, { "epoch": 0.5226755201724348, "grad_norm": 1.7650397619958846, "learning_rate": 4.875909353847273e-07, "loss": 0.2501, "step": 30069 }, { "epoch": 0.5226929027099376, "grad_norm": 1.3463278597982167, "learning_rate": 4.875627947381354e-07, "loss": 0.2669, "step": 30070 }, { "epoch": 0.5227102852474405, "grad_norm": 1.6372271955803819, "learning_rate": 4.875346541309636e-07, "loss": 0.197, "step": 30071 }, { "epoch": 0.5227276677849433, "grad_norm": 1.2745894032601013, "learning_rate": 4.875065135633012e-07, "loss": 0.2993, "step": 30072 }, { "epoch": 0.5227450503224461, "grad_norm": 1.8908013366616423, "learning_rate": 4.874783730352375e-07, "loss": 0.2213, "step": 30073 }, { "epoch": 0.522762432859949, "grad_norm": 1.7238672792638348, "learning_rate": 4.874502325468615e-07, "loss": 0.282, "step": 30074 }, { "epoch": 0.5227798153974518, "grad_norm": 0.7974508147978178, "learning_rate": 4.874220920982625e-07, "loss": 0.202, "step": 30075 }, { "epoch": 0.5227971979349545, "grad_norm": 2.0291373978001226, "learning_rate": 4.873939516895299e-07, "loss": 0.223, "step": 30076 }, { "epoch": 0.5228145804724573, "grad_norm": 1.5481839895447949, "learning_rate": 4.873658113207526e-07, "loss": 0.1588, "step": 30077 }, { "epoch": 0.5228319630099602, "grad_norm": 1.9875120184360109, "learning_rate": 4.873376709920196e-07, "loss": 0.3567, "step": 30078 }, { "epoch": 0.522849345547463, "grad_norm": 1.2506277374404249, "learning_rate": 4.873095307034207e-07, "loss": 0.2782, "step": 30079 }, { "epoch": 0.5228667280849658, "grad_norm": 2.2896724104577046, "learning_rate": 4.872813904550445e-07, "loss": 0.3032, "step": 30080 }, { "epoch": 0.5228841106224686, "grad_norm": 1.1155480103483237, "learning_rate": 4.872532502469806e-07, "loss": 0.2651, "step": 30081 }, { "epoch": 0.5229014931599715, "grad_norm": 1.425533634222151, "learning_rate": 4.87225110079318e-07, "loss": 0.1919, "step": 30082 }, { "epoch": 0.5229188756974743, "grad_norm": 2.643758783809352, "learning_rate": 4.871969699521461e-07, "loss": 0.3755, "step": 30083 }, { "epoch": 0.5229362582349771, "grad_norm": 2.0181868961731775, "learning_rate": 4.871688298655537e-07, "loss": 0.3502, "step": 30084 }, { "epoch": 0.52295364077248, "grad_norm": 1.3120233062954807, "learning_rate": 4.871406898196303e-07, "loss": 0.2292, "step": 30085 }, { "epoch": 0.5229710233099828, "grad_norm": 1.0559174523222028, "learning_rate": 4.87112549814465e-07, "loss": 0.2842, "step": 30086 }, { "epoch": 0.5229884058474856, "grad_norm": 1.4274975999915172, "learning_rate": 4.87084409850147e-07, "loss": 0.3045, "step": 30087 }, { "epoch": 0.5230057883849885, "grad_norm": 1.3636481999029397, "learning_rate": 4.870562699267654e-07, "loss": 0.2352, "step": 30088 }, { "epoch": 0.5230231709224913, "grad_norm": 1.623007610789909, "learning_rate": 4.870281300444097e-07, "loss": 0.2015, "step": 30089 }, { "epoch": 0.5230405534599941, "grad_norm": 1.5697880152957664, "learning_rate": 4.869999902031688e-07, "loss": 0.2316, "step": 30090 }, { "epoch": 0.523057935997497, "grad_norm": 1.6786410135288965, "learning_rate": 4.869718504031319e-07, "loss": 0.1589, "step": 30091 }, { "epoch": 0.5230753185349998, "grad_norm": 1.6906177398809061, "learning_rate": 4.869437106443882e-07, "loss": 0.2714, "step": 30092 }, { "epoch": 0.5230927010725026, "grad_norm": 1.7510474459428098, "learning_rate": 4.869155709270269e-07, "loss": 0.2796, "step": 30093 }, { "epoch": 0.5231100836100054, "grad_norm": 2.08485894661668, "learning_rate": 4.868874312511372e-07, "loss": 0.2748, "step": 30094 }, { "epoch": 0.5231274661475082, "grad_norm": 2.6709920143144577, "learning_rate": 4.868592916168086e-07, "loss": 0.2248, "step": 30095 }, { "epoch": 0.523144848685011, "grad_norm": 1.0024788306738013, "learning_rate": 4.868311520241299e-07, "loss": 0.2304, "step": 30096 }, { "epoch": 0.5231622312225138, "grad_norm": 1.5462542041141083, "learning_rate": 4.868030124731903e-07, "loss": 0.2773, "step": 30097 }, { "epoch": 0.5231796137600166, "grad_norm": 1.6330834569698494, "learning_rate": 4.867748729640791e-07, "loss": 0.2727, "step": 30098 }, { "epoch": 0.5231969962975195, "grad_norm": 1.0996365399412587, "learning_rate": 4.867467334968855e-07, "loss": 0.2841, "step": 30099 }, { "epoch": 0.5232143788350223, "grad_norm": 3.0742200107730215, "learning_rate": 4.867185940716986e-07, "loss": 0.2828, "step": 30100 }, { "epoch": 0.5232317613725251, "grad_norm": 1.1947023823360967, "learning_rate": 4.866904546886077e-07, "loss": 0.1892, "step": 30101 }, { "epoch": 0.523249143910028, "grad_norm": 1.6575283124351343, "learning_rate": 4.866623153477022e-07, "loss": 0.27, "step": 30102 }, { "epoch": 0.5232665264475308, "grad_norm": 1.2361921044744146, "learning_rate": 4.866341760490706e-07, "loss": 0.2783, "step": 30103 }, { "epoch": 0.5232839089850336, "grad_norm": 2.0671737198294204, "learning_rate": 4.866060367928028e-07, "loss": 0.4576, "step": 30104 }, { "epoch": 0.5233012915225365, "grad_norm": 1.9597911108752193, "learning_rate": 4.865778975789876e-07, "loss": 0.5113, "step": 30105 }, { "epoch": 0.5233186740600393, "grad_norm": 1.6380253704462244, "learning_rate": 4.865497584077143e-07, "loss": 0.2733, "step": 30106 }, { "epoch": 0.5233360565975421, "grad_norm": 1.753847057275505, "learning_rate": 4.865216192790723e-07, "loss": 0.2589, "step": 30107 }, { "epoch": 0.523353439135045, "grad_norm": 2.502745511097942, "learning_rate": 4.864934801931504e-07, "loss": 0.2976, "step": 30108 }, { "epoch": 0.5233708216725478, "grad_norm": 2.5225493422513274, "learning_rate": 4.864653411500379e-07, "loss": 0.2678, "step": 30109 }, { "epoch": 0.5233882042100506, "grad_norm": 1.4353899110438806, "learning_rate": 4.864372021498241e-07, "loss": 0.3573, "step": 30110 }, { "epoch": 0.5234055867475534, "grad_norm": 1.0087886860490511, "learning_rate": 4.864090631925981e-07, "loss": 0.2195, "step": 30111 }, { "epoch": 0.5234229692850563, "grad_norm": 1.5179148146868318, "learning_rate": 4.863809242784492e-07, "loss": 0.1795, "step": 30112 }, { "epoch": 0.5234403518225591, "grad_norm": 1.7147382545336693, "learning_rate": 4.863527854074664e-07, "loss": 0.2384, "step": 30113 }, { "epoch": 0.5234577343600619, "grad_norm": 1.2144563350048652, "learning_rate": 4.863246465797394e-07, "loss": 0.2049, "step": 30114 }, { "epoch": 0.5234751168975647, "grad_norm": 5.376309840825993, "learning_rate": 4.862965077953566e-07, "loss": 0.4164, "step": 30115 }, { "epoch": 0.5234924994350675, "grad_norm": 2.4908967144526266, "learning_rate": 4.862683690544076e-07, "loss": 0.4622, "step": 30116 }, { "epoch": 0.5235098819725703, "grad_norm": 2.4190250209491007, "learning_rate": 4.862402303569815e-07, "loss": 0.3473, "step": 30117 }, { "epoch": 0.5235272645100731, "grad_norm": 1.3799402446449627, "learning_rate": 4.862120917031677e-07, "loss": 0.3202, "step": 30118 }, { "epoch": 0.523544647047576, "grad_norm": 2.00420795494629, "learning_rate": 4.861839530930552e-07, "loss": 0.4757, "step": 30119 }, { "epoch": 0.5235620295850788, "grad_norm": 1.9635184300501627, "learning_rate": 4.861558145267333e-07, "loss": 0.1954, "step": 30120 }, { "epoch": 0.5235794121225816, "grad_norm": 1.2724140314418708, "learning_rate": 4.861276760042911e-07, "loss": 0.297, "step": 30121 }, { "epoch": 0.5235967946600845, "grad_norm": 3.3379891964635777, "learning_rate": 4.860995375258176e-07, "loss": 0.3876, "step": 30122 }, { "epoch": 0.5236141771975873, "grad_norm": 1.1654563228993902, "learning_rate": 4.860713990914024e-07, "loss": 0.1351, "step": 30123 }, { "epoch": 0.5236315597350901, "grad_norm": 1.6798511008400976, "learning_rate": 4.860432607011344e-07, "loss": 0.2084, "step": 30124 }, { "epoch": 0.523648942272593, "grad_norm": 1.6123845424170147, "learning_rate": 4.860151223551027e-07, "loss": 0.3254, "step": 30125 }, { "epoch": 0.5236663248100958, "grad_norm": 1.916191429152015, "learning_rate": 4.859869840533968e-07, "loss": 0.2704, "step": 30126 }, { "epoch": 0.5236837073475986, "grad_norm": 1.5805410408100227, "learning_rate": 4.859588457961058e-07, "loss": 0.3142, "step": 30127 }, { "epoch": 0.5237010898851014, "grad_norm": 1.482136536294018, "learning_rate": 4.859307075833186e-07, "loss": 0.2586, "step": 30128 }, { "epoch": 0.5237184724226043, "grad_norm": 2.017006675322419, "learning_rate": 4.859025694151248e-07, "loss": 0.2361, "step": 30129 }, { "epoch": 0.5237358549601071, "grad_norm": 1.851602088069971, "learning_rate": 4.858744312916132e-07, "loss": 0.3143, "step": 30130 }, { "epoch": 0.5237532374976099, "grad_norm": 1.3135978208142718, "learning_rate": 4.858462932128732e-07, "loss": 0.1863, "step": 30131 }, { "epoch": 0.5237706200351128, "grad_norm": 1.190187305108385, "learning_rate": 4.85818155178994e-07, "loss": 0.1322, "step": 30132 }, { "epoch": 0.5237880025726156, "grad_norm": 1.1651360742071575, "learning_rate": 4.85790017190065e-07, "loss": 0.216, "step": 30133 }, { "epoch": 0.5238053851101184, "grad_norm": 1.2380426678256766, "learning_rate": 4.857618792461746e-07, "loss": 0.3083, "step": 30134 }, { "epoch": 0.5238227676476211, "grad_norm": 1.7260709871774955, "learning_rate": 4.857337413474127e-07, "loss": 0.2879, "step": 30135 }, { "epoch": 0.523840150185124, "grad_norm": 3.622456309456363, "learning_rate": 4.857056034938683e-07, "loss": 0.2535, "step": 30136 }, { "epoch": 0.5238575327226268, "grad_norm": 1.3931205846263082, "learning_rate": 4.856774656856307e-07, "loss": 0.2398, "step": 30137 }, { "epoch": 0.5238749152601296, "grad_norm": 1.5548979556708584, "learning_rate": 4.856493279227888e-07, "loss": 0.3242, "step": 30138 }, { "epoch": 0.5238922977976325, "grad_norm": 1.6908702319787048, "learning_rate": 4.856211902054322e-07, "loss": 0.3261, "step": 30139 }, { "epoch": 0.5239096803351353, "grad_norm": 1.939553042647101, "learning_rate": 4.855930525336496e-07, "loss": 0.1662, "step": 30140 }, { "epoch": 0.5239270628726381, "grad_norm": 1.233630486483842, "learning_rate": 4.855649149075303e-07, "loss": 0.2997, "step": 30141 }, { "epoch": 0.523944445410141, "grad_norm": 2.2287972223935366, "learning_rate": 4.855367773271637e-07, "loss": 0.2222, "step": 30142 }, { "epoch": 0.5239618279476438, "grad_norm": 1.731162887800844, "learning_rate": 4.855086397926389e-07, "loss": 0.2339, "step": 30143 }, { "epoch": 0.5239792104851466, "grad_norm": 3.3391789406190675, "learning_rate": 4.85480502304045e-07, "loss": 0.2887, "step": 30144 }, { "epoch": 0.5239965930226494, "grad_norm": 1.7306745032668918, "learning_rate": 4.854523648614712e-07, "loss": 0.2834, "step": 30145 }, { "epoch": 0.5240139755601523, "grad_norm": 2.069674614804074, "learning_rate": 4.85424227465007e-07, "loss": 0.1611, "step": 30146 }, { "epoch": 0.5240313580976551, "grad_norm": 1.3100822062273798, "learning_rate": 4.85396090114741e-07, "loss": 0.2726, "step": 30147 }, { "epoch": 0.5240487406351579, "grad_norm": 2.0260902683596473, "learning_rate": 4.853679528107627e-07, "loss": 0.3144, "step": 30148 }, { "epoch": 0.5240661231726608, "grad_norm": 2.0895249314899025, "learning_rate": 4.853398155531615e-07, "loss": 0.1858, "step": 30149 }, { "epoch": 0.5240835057101636, "grad_norm": 2.7933511187722755, "learning_rate": 4.85311678342026e-07, "loss": 0.4638, "step": 30150 }, { "epoch": 0.5241008882476664, "grad_norm": 1.8367454564692605, "learning_rate": 4.85283541177446e-07, "loss": 0.2942, "step": 30151 }, { "epoch": 0.5241182707851693, "grad_norm": 1.7742382598368804, "learning_rate": 4.852554040595105e-07, "loss": 0.186, "step": 30152 }, { "epoch": 0.5241356533226721, "grad_norm": 2.081682099774347, "learning_rate": 4.852272669883083e-07, "loss": 0.3985, "step": 30153 }, { "epoch": 0.5241530358601749, "grad_norm": 1.5881538553335541, "learning_rate": 4.85199129963929e-07, "loss": 0.2903, "step": 30154 }, { "epoch": 0.5241704183976776, "grad_norm": 4.5315231872755195, "learning_rate": 4.851709929864618e-07, "loss": 0.2791, "step": 30155 }, { "epoch": 0.5241878009351805, "grad_norm": 1.5072015948036475, "learning_rate": 4.851428560559955e-07, "loss": 0.3246, "step": 30156 }, { "epoch": 0.5242051834726833, "grad_norm": 1.4013341568646178, "learning_rate": 4.851147191726196e-07, "loss": 0.2683, "step": 30157 }, { "epoch": 0.5242225660101861, "grad_norm": 1.8765600160974576, "learning_rate": 4.850865823364234e-07, "loss": 0.4182, "step": 30158 }, { "epoch": 0.524239948547689, "grad_norm": 2.661495769049981, "learning_rate": 4.850584455474957e-07, "loss": 0.3647, "step": 30159 }, { "epoch": 0.5242573310851918, "grad_norm": 1.617211920220049, "learning_rate": 4.85030308805926e-07, "loss": 0.335, "step": 30160 }, { "epoch": 0.5242747136226946, "grad_norm": 2.4104995811454955, "learning_rate": 4.85002172111803e-07, "loss": 0.2615, "step": 30161 }, { "epoch": 0.5242920961601975, "grad_norm": 1.43467066758773, "learning_rate": 4.849740354652165e-07, "loss": 0.3143, "step": 30162 }, { "epoch": 0.5243094786977003, "grad_norm": 2.0936176570633918, "learning_rate": 4.849458988662555e-07, "loss": 0.2101, "step": 30163 }, { "epoch": 0.5243268612352031, "grad_norm": 2.6587388200312647, "learning_rate": 4.849177623150088e-07, "loss": 0.2651, "step": 30164 }, { "epoch": 0.5243442437727059, "grad_norm": 1.4596611530036163, "learning_rate": 4.848896258115662e-07, "loss": 0.4715, "step": 30165 }, { "epoch": 0.5243616263102088, "grad_norm": 1.13426292544578, "learning_rate": 4.848614893560164e-07, "loss": 0.3486, "step": 30166 }, { "epoch": 0.5243790088477116, "grad_norm": 2.215430143157506, "learning_rate": 4.848333529484486e-07, "loss": 0.2061, "step": 30167 }, { "epoch": 0.5243963913852144, "grad_norm": 1.4166173153592612, "learning_rate": 4.848052165889521e-07, "loss": 0.3825, "step": 30168 }, { "epoch": 0.5244137739227173, "grad_norm": 1.5496479461255437, "learning_rate": 4.847770802776162e-07, "loss": 0.3488, "step": 30169 }, { "epoch": 0.5244311564602201, "grad_norm": 2.542339001478854, "learning_rate": 4.847489440145299e-07, "loss": 0.3181, "step": 30170 }, { "epoch": 0.5244485389977229, "grad_norm": 1.0988075299987787, "learning_rate": 4.847208077997827e-07, "loss": 0.2167, "step": 30171 }, { "epoch": 0.5244659215352258, "grad_norm": 1.5323925192413765, "learning_rate": 4.846926716334632e-07, "loss": 0.2007, "step": 30172 }, { "epoch": 0.5244833040727286, "grad_norm": 1.3874934141088153, "learning_rate": 4.846645355156609e-07, "loss": 0.2366, "step": 30173 }, { "epoch": 0.5245006866102314, "grad_norm": 1.4004357228186362, "learning_rate": 4.846363994464651e-07, "loss": 0.4918, "step": 30174 }, { "epoch": 0.5245180691477341, "grad_norm": 2.6644648495126506, "learning_rate": 4.846082634259648e-07, "loss": 0.2104, "step": 30175 }, { "epoch": 0.524535451685237, "grad_norm": 3.0516576272447598, "learning_rate": 4.845801274542493e-07, "loss": 0.293, "step": 30176 }, { "epoch": 0.5245528342227398, "grad_norm": 1.7253453238969503, "learning_rate": 4.845519915314078e-07, "loss": 0.3013, "step": 30177 }, { "epoch": 0.5245702167602426, "grad_norm": 1.358932790560016, "learning_rate": 4.845238556575291e-07, "loss": 0.1552, "step": 30178 }, { "epoch": 0.5245875992977455, "grad_norm": 1.0576363132638686, "learning_rate": 4.84495719832703e-07, "loss": 0.1716, "step": 30179 }, { "epoch": 0.5246049818352483, "grad_norm": 2.223138738057507, "learning_rate": 4.844675840570181e-07, "loss": 0.3054, "step": 30180 }, { "epoch": 0.5246223643727511, "grad_norm": 1.3807060416823533, "learning_rate": 4.844394483305638e-07, "loss": 0.3817, "step": 30181 }, { "epoch": 0.524639746910254, "grad_norm": 1.6530017691934633, "learning_rate": 4.844113126534295e-07, "loss": 0.2333, "step": 30182 }, { "epoch": 0.5246571294477568, "grad_norm": 1.6652001536515695, "learning_rate": 4.84383177025704e-07, "loss": 0.4427, "step": 30183 }, { "epoch": 0.5246745119852596, "grad_norm": 1.278395717591334, "learning_rate": 4.84355041447477e-07, "loss": 0.2046, "step": 30184 }, { "epoch": 0.5246918945227624, "grad_norm": 1.5720489879221857, "learning_rate": 4.84326905918837e-07, "loss": 0.276, "step": 30185 }, { "epoch": 0.5247092770602653, "grad_norm": 2.0165873218416652, "learning_rate": 4.842987704398735e-07, "loss": 0.4069, "step": 30186 }, { "epoch": 0.5247266595977681, "grad_norm": 1.313701989449811, "learning_rate": 4.842706350106757e-07, "loss": 0.1448, "step": 30187 }, { "epoch": 0.5247440421352709, "grad_norm": 1.252791562525719, "learning_rate": 4.84242499631333e-07, "loss": 0.1914, "step": 30188 }, { "epoch": 0.5247614246727738, "grad_norm": 1.2979397391487775, "learning_rate": 4.84214364301934e-07, "loss": 0.209, "step": 30189 }, { "epoch": 0.5247788072102766, "grad_norm": 2.324599629016267, "learning_rate": 4.841862290225685e-07, "loss": 0.2038, "step": 30190 }, { "epoch": 0.5247961897477794, "grad_norm": 1.1454730011197105, "learning_rate": 4.841580937933253e-07, "loss": 0.334, "step": 30191 }, { "epoch": 0.5248135722852822, "grad_norm": 2.117656565294887, "learning_rate": 4.841299586142936e-07, "loss": 0.2202, "step": 30192 }, { "epoch": 0.5248309548227851, "grad_norm": 1.9224980556284712, "learning_rate": 4.841018234855627e-07, "loss": 0.2611, "step": 30193 }, { "epoch": 0.5248483373602879, "grad_norm": 1.8383253171526288, "learning_rate": 4.840736884072216e-07, "loss": 0.1834, "step": 30194 }, { "epoch": 0.5248657198977906, "grad_norm": 2.764009041770286, "learning_rate": 4.840455533793597e-07, "loss": 0.2705, "step": 30195 }, { "epoch": 0.5248831024352935, "grad_norm": 1.4163681884569548, "learning_rate": 4.840174184020661e-07, "loss": 0.355, "step": 30196 }, { "epoch": 0.5249004849727963, "grad_norm": 1.7644796705738388, "learning_rate": 4.8398928347543e-07, "loss": 0.3901, "step": 30197 }, { "epoch": 0.5249178675102991, "grad_norm": 1.4087160410521613, "learning_rate": 4.839611485995403e-07, "loss": 0.164, "step": 30198 }, { "epoch": 0.524935250047802, "grad_norm": 1.8659660416688875, "learning_rate": 4.839330137744865e-07, "loss": 0.3229, "step": 30199 }, { "epoch": 0.5249526325853048, "grad_norm": 1.5415629480005817, "learning_rate": 4.839048790003576e-07, "loss": 0.206, "step": 30200 }, { "epoch": 0.5249700151228076, "grad_norm": 1.5252936888940707, "learning_rate": 4.838767442772429e-07, "loss": 0.2349, "step": 30201 }, { "epoch": 0.5249873976603104, "grad_norm": 1.5941098888876326, "learning_rate": 4.838486096052315e-07, "loss": 0.204, "step": 30202 }, { "epoch": 0.5250047801978133, "grad_norm": 4.886357780306892, "learning_rate": 4.838204749844129e-07, "loss": 0.3626, "step": 30203 }, { "epoch": 0.5250221627353161, "grad_norm": 2.1596866455855657, "learning_rate": 4.837923404148756e-07, "loss": 0.387, "step": 30204 }, { "epoch": 0.5250395452728189, "grad_norm": 1.6086860524241458, "learning_rate": 4.837642058967092e-07, "loss": 0.2407, "step": 30205 }, { "epoch": 0.5250569278103218, "grad_norm": 2.3285145715806626, "learning_rate": 4.837360714300027e-07, "loss": 0.219, "step": 30206 }, { "epoch": 0.5250743103478246, "grad_norm": 0.6083685054026373, "learning_rate": 4.837079370148455e-07, "loss": 0.2287, "step": 30207 }, { "epoch": 0.5250916928853274, "grad_norm": 2.828913978616489, "learning_rate": 4.836798026513266e-07, "loss": 0.3949, "step": 30208 }, { "epoch": 0.5251090754228303, "grad_norm": 1.2924089419486218, "learning_rate": 4.836516683395354e-07, "loss": 0.2255, "step": 30209 }, { "epoch": 0.5251264579603331, "grad_norm": 1.167946325094128, "learning_rate": 4.836235340795609e-07, "loss": 0.1707, "step": 30210 }, { "epoch": 0.5251438404978359, "grad_norm": 1.756070239754731, "learning_rate": 4.835953998714921e-07, "loss": 0.197, "step": 30211 }, { "epoch": 0.5251612230353387, "grad_norm": 1.1990817581008613, "learning_rate": 4.835672657154184e-07, "loss": 0.1413, "step": 30212 }, { "epoch": 0.5251786055728416, "grad_norm": 1.4371140394192174, "learning_rate": 4.835391316114289e-07, "loss": 0.2427, "step": 30213 }, { "epoch": 0.5251959881103444, "grad_norm": 1.6131281884293933, "learning_rate": 4.835109975596127e-07, "loss": 0.2326, "step": 30214 }, { "epoch": 0.5252133706478471, "grad_norm": 2.370221093444617, "learning_rate": 4.834828635600593e-07, "loss": 0.1426, "step": 30215 }, { "epoch": 0.52523075318535, "grad_norm": 2.0963977186627187, "learning_rate": 4.834547296128576e-07, "loss": 0.3451, "step": 30216 }, { "epoch": 0.5252481357228528, "grad_norm": 1.6477791960377672, "learning_rate": 4.834265957180967e-07, "loss": 0.2968, "step": 30217 }, { "epoch": 0.5252655182603556, "grad_norm": 1.4445207232883146, "learning_rate": 4.833984618758659e-07, "loss": 0.2574, "step": 30218 }, { "epoch": 0.5252829007978584, "grad_norm": 1.6873493515357367, "learning_rate": 4.833703280862543e-07, "loss": 0.272, "step": 30219 }, { "epoch": 0.5253002833353613, "grad_norm": 1.1434591356701271, "learning_rate": 4.833421943493511e-07, "loss": 0.2798, "step": 30220 }, { "epoch": 0.5253176658728641, "grad_norm": 1.1391175391453048, "learning_rate": 4.833140606652456e-07, "loss": 0.2019, "step": 30221 }, { "epoch": 0.5253350484103669, "grad_norm": 1.7294787918943784, "learning_rate": 4.832859270340269e-07, "loss": 0.1875, "step": 30222 }, { "epoch": 0.5253524309478698, "grad_norm": 1.7893019128547023, "learning_rate": 4.832577934557841e-07, "loss": 0.3563, "step": 30223 }, { "epoch": 0.5253698134853726, "grad_norm": 2.989706616272827, "learning_rate": 4.832296599306063e-07, "loss": 0.2321, "step": 30224 }, { "epoch": 0.5253871960228754, "grad_norm": 1.3566814194925503, "learning_rate": 4.832015264585828e-07, "loss": 0.3941, "step": 30225 }, { "epoch": 0.5254045785603783, "grad_norm": 1.0563113762597225, "learning_rate": 4.831733930398027e-07, "loss": 0.3649, "step": 30226 }, { "epoch": 0.5254219610978811, "grad_norm": 3.492666866011352, "learning_rate": 4.831452596743553e-07, "loss": 0.4276, "step": 30227 }, { "epoch": 0.5254393436353839, "grad_norm": 1.990873331658431, "learning_rate": 4.831171263623299e-07, "loss": 0.2135, "step": 30228 }, { "epoch": 0.5254567261728867, "grad_norm": 2.737682435499394, "learning_rate": 4.830889931038152e-07, "loss": 0.2655, "step": 30229 }, { "epoch": 0.5254741087103896, "grad_norm": 1.249681421098086, "learning_rate": 4.830608598989007e-07, "loss": 0.153, "step": 30230 }, { "epoch": 0.5254914912478924, "grad_norm": 1.058931000383933, "learning_rate": 4.830327267476753e-07, "loss": 0.3534, "step": 30231 }, { "epoch": 0.5255088737853952, "grad_norm": 1.3668947625830152, "learning_rate": 4.830045936502286e-07, "loss": 0.1953, "step": 30232 }, { "epoch": 0.5255262563228981, "grad_norm": 1.6503602847911876, "learning_rate": 4.829764606066494e-07, "loss": 0.1578, "step": 30233 }, { "epoch": 0.5255436388604009, "grad_norm": 1.4119779251145217, "learning_rate": 4.829483276170271e-07, "loss": 0.3868, "step": 30234 }, { "epoch": 0.5255610213979036, "grad_norm": 2.4430363249256004, "learning_rate": 4.829201946814508e-07, "loss": 0.22, "step": 30235 }, { "epoch": 0.5255784039354064, "grad_norm": 1.6494172200177482, "learning_rate": 4.828920618000096e-07, "loss": 0.2777, "step": 30236 }, { "epoch": 0.5255957864729093, "grad_norm": 1.4860578046606903, "learning_rate": 4.828639289727924e-07, "loss": 0.2655, "step": 30237 }, { "epoch": 0.5256131690104121, "grad_norm": 1.9800436120300822, "learning_rate": 4.82835796199889e-07, "loss": 0.2879, "step": 30238 }, { "epoch": 0.5256305515479149, "grad_norm": 2.001156211173327, "learning_rate": 4.828076634813881e-07, "loss": 0.2118, "step": 30239 }, { "epoch": 0.5256479340854178, "grad_norm": 2.0004455843812763, "learning_rate": 4.827795308173792e-07, "loss": 0.229, "step": 30240 }, { "epoch": 0.5256653166229206, "grad_norm": 1.6665648059688607, "learning_rate": 4.827513982079512e-07, "loss": 0.2644, "step": 30241 }, { "epoch": 0.5256826991604234, "grad_norm": 1.267150462801764, "learning_rate": 4.827232656531932e-07, "loss": 0.2153, "step": 30242 }, { "epoch": 0.5257000816979263, "grad_norm": 1.2016265861179352, "learning_rate": 4.826951331531947e-07, "loss": 0.2016, "step": 30243 }, { "epoch": 0.5257174642354291, "grad_norm": 3.6828060324168646, "learning_rate": 4.826670007080446e-07, "loss": 0.2162, "step": 30244 }, { "epoch": 0.5257348467729319, "grad_norm": 0.9739012900787304, "learning_rate": 4.82638868317832e-07, "loss": 0.139, "step": 30245 }, { "epoch": 0.5257522293104347, "grad_norm": 1.026230243829452, "learning_rate": 4.826107359826464e-07, "loss": 0.2339, "step": 30246 }, { "epoch": 0.5257696118479376, "grad_norm": 0.803883995015934, "learning_rate": 4.825826037025768e-07, "loss": 0.3326, "step": 30247 }, { "epoch": 0.5257869943854404, "grad_norm": 2.0542851606265162, "learning_rate": 4.825544714777122e-07, "loss": 0.1478, "step": 30248 }, { "epoch": 0.5258043769229432, "grad_norm": 1.12211462118967, "learning_rate": 4.825263393081421e-07, "loss": 0.2622, "step": 30249 }, { "epoch": 0.5258217594604461, "grad_norm": 2.638066230695402, "learning_rate": 4.824982071939552e-07, "loss": 0.2452, "step": 30250 }, { "epoch": 0.5258391419979489, "grad_norm": 1.308057097640988, "learning_rate": 4.824700751352412e-07, "loss": 0.2051, "step": 30251 }, { "epoch": 0.5258565245354517, "grad_norm": 1.932976912765985, "learning_rate": 4.824419431320889e-07, "loss": 0.2383, "step": 30252 }, { "epoch": 0.5258739070729546, "grad_norm": 1.3777576227456876, "learning_rate": 4.824138111845877e-07, "loss": 0.233, "step": 30253 }, { "epoch": 0.5258912896104573, "grad_norm": 1.3587819410309847, "learning_rate": 4.823856792928265e-07, "loss": 0.3135, "step": 30254 }, { "epoch": 0.5259086721479601, "grad_norm": 1.393198929566756, "learning_rate": 4.823575474568946e-07, "loss": 0.2311, "step": 30255 }, { "epoch": 0.5259260546854629, "grad_norm": 1.4027594651765423, "learning_rate": 4.823294156768812e-07, "loss": 0.2659, "step": 30256 }, { "epoch": 0.5259434372229658, "grad_norm": 1.5974611785593673, "learning_rate": 4.823012839528755e-07, "loss": 0.1981, "step": 30257 }, { "epoch": 0.5259608197604686, "grad_norm": 1.4691032299204088, "learning_rate": 4.822731522849664e-07, "loss": 0.1254, "step": 30258 }, { "epoch": 0.5259782022979714, "grad_norm": 1.5334104681502696, "learning_rate": 4.822450206732437e-07, "loss": 0.2081, "step": 30259 }, { "epoch": 0.5259955848354743, "grad_norm": 0.9522456025883053, "learning_rate": 4.822168891177957e-07, "loss": 0.303, "step": 30260 }, { "epoch": 0.5260129673729771, "grad_norm": 1.26645380231957, "learning_rate": 4.821887576187121e-07, "loss": 0.1858, "step": 30261 }, { "epoch": 0.5260303499104799, "grad_norm": 1.9850455420375888, "learning_rate": 4.82160626176082e-07, "loss": 0.3387, "step": 30262 }, { "epoch": 0.5260477324479828, "grad_norm": 1.099460540418857, "learning_rate": 4.821324947899944e-07, "loss": 0.1701, "step": 30263 }, { "epoch": 0.5260651149854856, "grad_norm": 1.331171532777934, "learning_rate": 4.821043634605386e-07, "loss": 0.2422, "step": 30264 }, { "epoch": 0.5260824975229884, "grad_norm": 1.0958349184323066, "learning_rate": 4.820762321878039e-07, "loss": 0.2245, "step": 30265 }, { "epoch": 0.5260998800604912, "grad_norm": 1.4277102216126483, "learning_rate": 4.820481009718795e-07, "loss": 0.2015, "step": 30266 }, { "epoch": 0.5261172625979941, "grad_norm": 1.7163026970022415, "learning_rate": 4.820199698128539e-07, "loss": 0.2711, "step": 30267 }, { "epoch": 0.5261346451354969, "grad_norm": 2.1667304764956445, "learning_rate": 4.81991838710817e-07, "loss": 0.3269, "step": 30268 }, { "epoch": 0.5261520276729997, "grad_norm": 3.717750955742967, "learning_rate": 4.819637076658577e-07, "loss": 0.277, "step": 30269 }, { "epoch": 0.5261694102105026, "grad_norm": 1.9182456536841772, "learning_rate": 4.81935576678065e-07, "loss": 0.2697, "step": 30270 }, { "epoch": 0.5261867927480054, "grad_norm": 1.0358440236504967, "learning_rate": 4.819074457475284e-07, "loss": 0.2717, "step": 30271 }, { "epoch": 0.5262041752855082, "grad_norm": 1.4067294004663087, "learning_rate": 4.81879314874337e-07, "loss": 0.1976, "step": 30272 }, { "epoch": 0.526221557823011, "grad_norm": 1.9256511825917766, "learning_rate": 4.818511840585796e-07, "loss": 0.3233, "step": 30273 }, { "epoch": 0.5262389403605138, "grad_norm": 0.9812958922001355, "learning_rate": 4.818230533003456e-07, "loss": 0.2428, "step": 30274 }, { "epoch": 0.5262563228980166, "grad_norm": 1.6619624512766134, "learning_rate": 4.817949225997242e-07, "loss": 0.3486, "step": 30275 }, { "epoch": 0.5262737054355194, "grad_norm": 1.4695203323940897, "learning_rate": 4.817667919568046e-07, "loss": 0.1691, "step": 30276 }, { "epoch": 0.5262910879730223, "grad_norm": 2.2412940974871125, "learning_rate": 4.817386613716759e-07, "loss": 0.1851, "step": 30277 }, { "epoch": 0.5263084705105251, "grad_norm": 1.975737483732962, "learning_rate": 4.817105308444274e-07, "loss": 0.2281, "step": 30278 }, { "epoch": 0.5263258530480279, "grad_norm": 1.4748816328953873, "learning_rate": 4.816824003751478e-07, "loss": 0.2047, "step": 30279 }, { "epoch": 0.5263432355855308, "grad_norm": 2.0561760509494675, "learning_rate": 4.816542699639268e-07, "loss": 0.259, "step": 30280 }, { "epoch": 0.5263606181230336, "grad_norm": 2.235899486308108, "learning_rate": 4.816261396108532e-07, "loss": 0.4971, "step": 30281 }, { "epoch": 0.5263780006605364, "grad_norm": 1.3171022389040363, "learning_rate": 4.815980093160164e-07, "loss": 0.2193, "step": 30282 }, { "epoch": 0.5263953831980392, "grad_norm": 0.9522426025128897, "learning_rate": 4.815698790795053e-07, "loss": 0.1513, "step": 30283 }, { "epoch": 0.5264127657355421, "grad_norm": 1.4409619505188742, "learning_rate": 4.815417489014094e-07, "loss": 0.3142, "step": 30284 }, { "epoch": 0.5264301482730449, "grad_norm": 2.072590729115459, "learning_rate": 4.815136187818177e-07, "loss": 0.5321, "step": 30285 }, { "epoch": 0.5264475308105477, "grad_norm": 1.5427033276807725, "learning_rate": 4.814854887208193e-07, "loss": 0.3893, "step": 30286 }, { "epoch": 0.5264649133480506, "grad_norm": 1.3405552811488928, "learning_rate": 4.814573587185032e-07, "loss": 0.2571, "step": 30287 }, { "epoch": 0.5264822958855534, "grad_norm": 1.6806399734647484, "learning_rate": 4.814292287749589e-07, "loss": 0.3454, "step": 30288 }, { "epoch": 0.5264996784230562, "grad_norm": 1.3369895804975456, "learning_rate": 4.814010988902754e-07, "loss": 0.1263, "step": 30289 }, { "epoch": 0.5265170609605591, "grad_norm": 1.9081337425438345, "learning_rate": 4.81372969064542e-07, "loss": 0.3249, "step": 30290 }, { "epoch": 0.5265344434980619, "grad_norm": 2.1553571980221733, "learning_rate": 4.813448392978477e-07, "loss": 0.337, "step": 30291 }, { "epoch": 0.5265518260355647, "grad_norm": 1.430234690816086, "learning_rate": 4.813167095902816e-07, "loss": 0.3023, "step": 30292 }, { "epoch": 0.5265692085730675, "grad_norm": 1.6387147314410653, "learning_rate": 4.812885799419328e-07, "loss": 0.2585, "step": 30293 }, { "epoch": 0.5265865911105703, "grad_norm": 1.4639046784192917, "learning_rate": 4.812604503528909e-07, "loss": 0.2016, "step": 30294 }, { "epoch": 0.5266039736480731, "grad_norm": 1.4632074317318793, "learning_rate": 4.812323208232445e-07, "loss": 0.1833, "step": 30295 }, { "epoch": 0.5266213561855759, "grad_norm": 1.4760287079268957, "learning_rate": 4.812041913530831e-07, "loss": 0.2641, "step": 30296 }, { "epoch": 0.5266387387230788, "grad_norm": 1.618313496127854, "learning_rate": 4.81176061942496e-07, "loss": 0.2455, "step": 30297 }, { "epoch": 0.5266561212605816, "grad_norm": 2.6233413019952865, "learning_rate": 4.811479325915719e-07, "loss": 0.2338, "step": 30298 }, { "epoch": 0.5266735037980844, "grad_norm": 1.4651835781288134, "learning_rate": 4.811198033004003e-07, "loss": 0.1887, "step": 30299 }, { "epoch": 0.5266908863355872, "grad_norm": 1.4791588915904677, "learning_rate": 4.810916740690703e-07, "loss": 0.3131, "step": 30300 }, { "epoch": 0.5267082688730901, "grad_norm": 1.8345529996167729, "learning_rate": 4.810635448976707e-07, "loss": 0.2364, "step": 30301 }, { "epoch": 0.5267256514105929, "grad_norm": 1.3118493318589834, "learning_rate": 4.810354157862912e-07, "loss": 0.2601, "step": 30302 }, { "epoch": 0.5267430339480957, "grad_norm": 1.4301880267245435, "learning_rate": 4.810072867350207e-07, "loss": 0.1829, "step": 30303 }, { "epoch": 0.5267604164855986, "grad_norm": 1.7384203551093962, "learning_rate": 4.809791577439485e-07, "loss": 0.4161, "step": 30304 }, { "epoch": 0.5267777990231014, "grad_norm": 1.7602328395524591, "learning_rate": 4.809510288131635e-07, "loss": 0.3853, "step": 30305 }, { "epoch": 0.5267951815606042, "grad_norm": 1.9304361450259846, "learning_rate": 4.80922899942755e-07, "loss": 0.3014, "step": 30306 }, { "epoch": 0.5268125640981071, "grad_norm": 1.7242161039377446, "learning_rate": 4.808947711328121e-07, "loss": 0.1783, "step": 30307 }, { "epoch": 0.5268299466356099, "grad_norm": 2.219606385278555, "learning_rate": 4.808666423834239e-07, "loss": 0.1874, "step": 30308 }, { "epoch": 0.5268473291731127, "grad_norm": 0.8189726499988031, "learning_rate": 4.808385136946798e-07, "loss": 0.1995, "step": 30309 }, { "epoch": 0.5268647117106156, "grad_norm": 1.735051192226978, "learning_rate": 4.80810385066669e-07, "loss": 0.4088, "step": 30310 }, { "epoch": 0.5268820942481184, "grad_norm": 1.4534529817900403, "learning_rate": 4.807822564994803e-07, "loss": 0.2284, "step": 30311 }, { "epoch": 0.5268994767856212, "grad_norm": 2.454754289938918, "learning_rate": 4.807541279932029e-07, "loss": 0.239, "step": 30312 }, { "epoch": 0.526916859323124, "grad_norm": 2.916235996478861, "learning_rate": 4.807259995479262e-07, "loss": 0.4039, "step": 30313 }, { "epoch": 0.5269342418606268, "grad_norm": 3.2151535657306627, "learning_rate": 4.806978711637391e-07, "loss": 0.428, "step": 30314 }, { "epoch": 0.5269516243981296, "grad_norm": 2.8086040449089724, "learning_rate": 4.806697428407311e-07, "loss": 0.3726, "step": 30315 }, { "epoch": 0.5269690069356324, "grad_norm": 1.5715768307886973, "learning_rate": 4.80641614578991e-07, "loss": 0.3155, "step": 30316 }, { "epoch": 0.5269863894731353, "grad_norm": 1.1337686923178505, "learning_rate": 4.806134863786082e-07, "loss": 0.3633, "step": 30317 }, { "epoch": 0.5270037720106381, "grad_norm": 1.0764683408631868, "learning_rate": 4.805853582396715e-07, "loss": 0.222, "step": 30318 }, { "epoch": 0.5270211545481409, "grad_norm": 1.5226107243097602, "learning_rate": 4.805572301622705e-07, "loss": 0.2692, "step": 30319 }, { "epoch": 0.5270385370856437, "grad_norm": 1.7038066270526957, "learning_rate": 4.80529102146494e-07, "loss": 0.3316, "step": 30320 }, { "epoch": 0.5270559196231466, "grad_norm": 1.6012215659663767, "learning_rate": 4.805009741924314e-07, "loss": 0.224, "step": 30321 }, { "epoch": 0.5270733021606494, "grad_norm": 1.6383789427121236, "learning_rate": 4.804728463001716e-07, "loss": 0.15, "step": 30322 }, { "epoch": 0.5270906846981522, "grad_norm": 1.6734356461803084, "learning_rate": 4.804447184698043e-07, "loss": 0.215, "step": 30323 }, { "epoch": 0.5271080672356551, "grad_norm": 2.251545869528535, "learning_rate": 4.80416590701418e-07, "loss": 0.2024, "step": 30324 }, { "epoch": 0.5271254497731579, "grad_norm": 1.4155051314463105, "learning_rate": 4.803884629951021e-07, "loss": 0.3255, "step": 30325 }, { "epoch": 0.5271428323106607, "grad_norm": 2.4103304953233247, "learning_rate": 4.803603353509458e-07, "loss": 0.2687, "step": 30326 }, { "epoch": 0.5271602148481636, "grad_norm": 2.1531053844294243, "learning_rate": 4.803322077690382e-07, "loss": 0.1811, "step": 30327 }, { "epoch": 0.5271775973856664, "grad_norm": 2.1795724707248376, "learning_rate": 4.803040802494683e-07, "loss": 0.3679, "step": 30328 }, { "epoch": 0.5271949799231692, "grad_norm": 2.2371443689503545, "learning_rate": 4.802759527923257e-07, "loss": 0.2569, "step": 30329 }, { "epoch": 0.527212362460672, "grad_norm": 3.6970389635882683, "learning_rate": 4.802478253976993e-07, "loss": 0.2846, "step": 30330 }, { "epoch": 0.5272297449981749, "grad_norm": 1.9374241474329779, "learning_rate": 4.802196980656779e-07, "loss": 0.2088, "step": 30331 }, { "epoch": 0.5272471275356777, "grad_norm": 1.7621660120768978, "learning_rate": 4.801915707963511e-07, "loss": 0.2411, "step": 30332 }, { "epoch": 0.5272645100731805, "grad_norm": 3.4376365838489975, "learning_rate": 4.801634435898081e-07, "loss": 0.3819, "step": 30333 }, { "epoch": 0.5272818926106833, "grad_norm": 1.5732901457819881, "learning_rate": 4.801353164461376e-07, "loss": 0.213, "step": 30334 }, { "epoch": 0.5272992751481861, "grad_norm": 2.4807331798046057, "learning_rate": 4.801071893654293e-07, "loss": 0.2898, "step": 30335 }, { "epoch": 0.5273166576856889, "grad_norm": 1.2809328108779674, "learning_rate": 4.80079062347772e-07, "loss": 0.1218, "step": 30336 }, { "epoch": 0.5273340402231917, "grad_norm": 2.368909608490686, "learning_rate": 4.800509353932547e-07, "loss": 0.2616, "step": 30337 }, { "epoch": 0.5273514227606946, "grad_norm": 2.0071706453460503, "learning_rate": 4.80022808501967e-07, "loss": 0.3156, "step": 30338 }, { "epoch": 0.5273688052981974, "grad_norm": 2.046611782298977, "learning_rate": 4.799946816739976e-07, "loss": 0.3272, "step": 30339 }, { "epoch": 0.5273861878357002, "grad_norm": 2.59500423284826, "learning_rate": 4.799665549094361e-07, "loss": 0.4302, "step": 30340 }, { "epoch": 0.5274035703732031, "grad_norm": 1.0452968923235315, "learning_rate": 4.799384282083713e-07, "loss": 0.1952, "step": 30341 }, { "epoch": 0.5274209529107059, "grad_norm": 1.53034764746657, "learning_rate": 4.799103015708926e-07, "loss": 0.1717, "step": 30342 }, { "epoch": 0.5274383354482087, "grad_norm": 2.2213237985122434, "learning_rate": 4.798821749970888e-07, "loss": 0.4224, "step": 30343 }, { "epoch": 0.5274557179857116, "grad_norm": 2.1605235402778646, "learning_rate": 4.798540484870495e-07, "loss": 0.207, "step": 30344 }, { "epoch": 0.5274731005232144, "grad_norm": 2.306271025297761, "learning_rate": 4.798259220408634e-07, "loss": 0.2429, "step": 30345 }, { "epoch": 0.5274904830607172, "grad_norm": 1.7831397663402533, "learning_rate": 4.7979779565862e-07, "loss": 0.1956, "step": 30346 }, { "epoch": 0.52750786559822, "grad_norm": 1.792692961995279, "learning_rate": 4.797696693404082e-07, "loss": 0.3347, "step": 30347 }, { "epoch": 0.5275252481357229, "grad_norm": 4.005765712945068, "learning_rate": 4.797415430863175e-07, "loss": 0.3795, "step": 30348 }, { "epoch": 0.5275426306732257, "grad_norm": 2.1533937363076374, "learning_rate": 4.797134168964366e-07, "loss": 0.3641, "step": 30349 }, { "epoch": 0.5275600132107285, "grad_norm": 1.808914358602565, "learning_rate": 4.796852907708548e-07, "loss": 0.2287, "step": 30350 }, { "epoch": 0.5275773957482314, "grad_norm": 1.3660025143034273, "learning_rate": 4.796571647096612e-07, "loss": 0.3306, "step": 30351 }, { "epoch": 0.5275947782857342, "grad_norm": 1.7421448081719433, "learning_rate": 4.796290387129453e-07, "loss": 0.2489, "step": 30352 }, { "epoch": 0.527612160823237, "grad_norm": 1.5454365528725125, "learning_rate": 4.796009127807958e-07, "loss": 0.2537, "step": 30353 }, { "epoch": 0.5276295433607398, "grad_norm": 1.0467177267751713, "learning_rate": 4.795727869133023e-07, "loss": 0.3314, "step": 30354 }, { "epoch": 0.5276469258982426, "grad_norm": 2.2494966062352297, "learning_rate": 4.795446611105535e-07, "loss": 0.326, "step": 30355 }, { "epoch": 0.5276643084357454, "grad_norm": 1.5076299494409477, "learning_rate": 4.795165353726387e-07, "loss": 0.201, "step": 30356 }, { "epoch": 0.5276816909732482, "grad_norm": 1.8930034744170379, "learning_rate": 4.794884096996471e-07, "loss": 0.1786, "step": 30357 }, { "epoch": 0.5276990735107511, "grad_norm": 2.0308824027226646, "learning_rate": 4.794602840916679e-07, "loss": 0.1822, "step": 30358 }, { "epoch": 0.5277164560482539, "grad_norm": 1.6061641669554985, "learning_rate": 4.794321585487899e-07, "loss": 0.2474, "step": 30359 }, { "epoch": 0.5277338385857567, "grad_norm": 1.7760410533235926, "learning_rate": 4.794040330711028e-07, "loss": 0.2856, "step": 30360 }, { "epoch": 0.5277512211232596, "grad_norm": 2.1140630480540263, "learning_rate": 4.793759076586955e-07, "loss": 0.2528, "step": 30361 }, { "epoch": 0.5277686036607624, "grad_norm": 1.9050589026280822, "learning_rate": 4.793477823116569e-07, "loss": 0.2912, "step": 30362 }, { "epoch": 0.5277859861982652, "grad_norm": 1.8980459381938068, "learning_rate": 4.793196570300764e-07, "loss": 0.3045, "step": 30363 }, { "epoch": 0.527803368735768, "grad_norm": 0.9211816005008677, "learning_rate": 4.79291531814043e-07, "loss": 0.2762, "step": 30364 }, { "epoch": 0.5278207512732709, "grad_norm": 1.468170685447556, "learning_rate": 4.79263406663646e-07, "loss": 0.2715, "step": 30365 }, { "epoch": 0.5278381338107737, "grad_norm": 1.5748550027022554, "learning_rate": 4.792352815789745e-07, "loss": 0.4056, "step": 30366 }, { "epoch": 0.5278555163482765, "grad_norm": 1.5335344244330251, "learning_rate": 4.792071565601178e-07, "loss": 0.2731, "step": 30367 }, { "epoch": 0.5278728988857794, "grad_norm": 1.6434396511984073, "learning_rate": 4.791790316071645e-07, "loss": 0.2888, "step": 30368 }, { "epoch": 0.5278902814232822, "grad_norm": 1.305773505696797, "learning_rate": 4.791509067202044e-07, "loss": 0.2835, "step": 30369 }, { "epoch": 0.527907663960785, "grad_norm": 1.2168211473260335, "learning_rate": 4.791227818993262e-07, "loss": 0.1551, "step": 30370 }, { "epoch": 0.5279250464982879, "grad_norm": 1.8086401525450768, "learning_rate": 4.790946571446191e-07, "loss": 0.3245, "step": 30371 }, { "epoch": 0.5279424290357907, "grad_norm": 1.2066511134930573, "learning_rate": 4.790665324561725e-07, "loss": 0.3401, "step": 30372 }, { "epoch": 0.5279598115732935, "grad_norm": 2.801487403567418, "learning_rate": 4.790384078340755e-07, "loss": 0.2573, "step": 30373 }, { "epoch": 0.5279771941107962, "grad_norm": 1.9388261317999487, "learning_rate": 4.790102832784169e-07, "loss": 0.2446, "step": 30374 }, { "epoch": 0.5279945766482991, "grad_norm": 1.9240163884190822, "learning_rate": 4.789821587892861e-07, "loss": 0.2046, "step": 30375 }, { "epoch": 0.5280119591858019, "grad_norm": 3.963559362864537, "learning_rate": 4.789540343667722e-07, "loss": 0.3261, "step": 30376 }, { "epoch": 0.5280293417233047, "grad_norm": 1.8504828685346109, "learning_rate": 4.789259100109644e-07, "loss": 0.2477, "step": 30377 }, { "epoch": 0.5280467242608076, "grad_norm": 1.5332026031585864, "learning_rate": 4.788977857219516e-07, "loss": 0.4163, "step": 30378 }, { "epoch": 0.5280641067983104, "grad_norm": 1.9421253867724797, "learning_rate": 4.788696614998234e-07, "loss": 0.2859, "step": 30379 }, { "epoch": 0.5280814893358132, "grad_norm": 1.9597323090393965, "learning_rate": 4.788415373446685e-07, "loss": 0.1965, "step": 30380 }, { "epoch": 0.5280988718733161, "grad_norm": 5.2824520027623745, "learning_rate": 4.788134132565763e-07, "loss": 0.4191, "step": 30381 }, { "epoch": 0.5281162544108189, "grad_norm": 2.4083642466674986, "learning_rate": 4.787852892356357e-07, "loss": 0.2587, "step": 30382 }, { "epoch": 0.5281336369483217, "grad_norm": 2.0894005260905497, "learning_rate": 4.787571652819361e-07, "loss": 0.2774, "step": 30383 }, { "epoch": 0.5281510194858245, "grad_norm": 2.1964449867479177, "learning_rate": 4.787290413955665e-07, "loss": 0.1454, "step": 30384 }, { "epoch": 0.5281684020233274, "grad_norm": 1.4227449693659542, "learning_rate": 4.78700917576616e-07, "loss": 0.3552, "step": 30385 }, { "epoch": 0.5281857845608302, "grad_norm": 0.8666304982949468, "learning_rate": 4.78672793825174e-07, "loss": 0.2243, "step": 30386 }, { "epoch": 0.528203167098333, "grad_norm": 2.2316502310896102, "learning_rate": 4.786446701413293e-07, "loss": 0.2313, "step": 30387 }, { "epoch": 0.5282205496358359, "grad_norm": 1.6797158198286184, "learning_rate": 4.786165465251712e-07, "loss": 0.3336, "step": 30388 }, { "epoch": 0.5282379321733387, "grad_norm": 1.1265929711410105, "learning_rate": 4.785884229767889e-07, "loss": 0.3451, "step": 30389 }, { "epoch": 0.5282553147108415, "grad_norm": 1.7664538422341338, "learning_rate": 4.785602994962712e-07, "loss": 0.2089, "step": 30390 }, { "epoch": 0.5282726972483444, "grad_norm": 1.7562943575753356, "learning_rate": 4.785321760837078e-07, "loss": 0.2564, "step": 30391 }, { "epoch": 0.5282900797858472, "grad_norm": 2.451790642466377, "learning_rate": 4.785040527391876e-07, "loss": 0.2311, "step": 30392 }, { "epoch": 0.5283074623233499, "grad_norm": 1.7693294767577776, "learning_rate": 4.784759294627993e-07, "loss": 0.1733, "step": 30393 }, { "epoch": 0.5283248448608527, "grad_norm": 2.1429200948933946, "learning_rate": 4.784478062546327e-07, "loss": 0.1957, "step": 30394 }, { "epoch": 0.5283422273983556, "grad_norm": 1.5895970935689439, "learning_rate": 4.784196831147765e-07, "loss": 0.2209, "step": 30395 }, { "epoch": 0.5283596099358584, "grad_norm": 1.7423717571558046, "learning_rate": 4.783915600433202e-07, "loss": 0.4465, "step": 30396 }, { "epoch": 0.5283769924733612, "grad_norm": 1.6734304332725995, "learning_rate": 4.783634370403526e-07, "loss": 0.4121, "step": 30397 }, { "epoch": 0.5283943750108641, "grad_norm": 1.9520232326558604, "learning_rate": 4.783353141059631e-07, "loss": 0.1857, "step": 30398 }, { "epoch": 0.5284117575483669, "grad_norm": 1.45354679731566, "learning_rate": 4.783071912402405e-07, "loss": 0.2923, "step": 30399 }, { "epoch": 0.5284291400858697, "grad_norm": 2.7209676887817222, "learning_rate": 4.782790684432742e-07, "loss": 0.2373, "step": 30400 }, { "epoch": 0.5284465226233725, "grad_norm": 1.006714545418066, "learning_rate": 4.782509457151533e-07, "loss": 0.325, "step": 30401 }, { "epoch": 0.5284639051608754, "grad_norm": 0.7275321647653805, "learning_rate": 4.782228230559669e-07, "loss": 0.2483, "step": 30402 }, { "epoch": 0.5284812876983782, "grad_norm": 1.8667307376876523, "learning_rate": 4.78194700465804e-07, "loss": 0.3164, "step": 30403 }, { "epoch": 0.528498670235881, "grad_norm": 2.394711829471793, "learning_rate": 4.781665779447541e-07, "loss": 0.2623, "step": 30404 }, { "epoch": 0.5285160527733839, "grad_norm": 0.9692851846862162, "learning_rate": 4.781384554929061e-07, "loss": 0.2218, "step": 30405 }, { "epoch": 0.5285334353108867, "grad_norm": 2.1298898321613504, "learning_rate": 4.781103331103493e-07, "loss": 0.281, "step": 30406 }, { "epoch": 0.5285508178483895, "grad_norm": 2.5784280790280305, "learning_rate": 4.780822107971722e-07, "loss": 0.2426, "step": 30407 }, { "epoch": 0.5285682003858924, "grad_norm": 1.4859755506511454, "learning_rate": 4.780540885534648e-07, "loss": 0.19, "step": 30408 }, { "epoch": 0.5285855829233952, "grad_norm": 3.262535477835863, "learning_rate": 4.780259663793157e-07, "loss": 0.3772, "step": 30409 }, { "epoch": 0.528602965460898, "grad_norm": 1.1395488845562378, "learning_rate": 4.779978442748144e-07, "loss": 0.289, "step": 30410 }, { "epoch": 0.5286203479984009, "grad_norm": 1.461070869174194, "learning_rate": 4.779697222400498e-07, "loss": 0.2832, "step": 30411 }, { "epoch": 0.5286377305359037, "grad_norm": 1.378556016209478, "learning_rate": 4.779416002751108e-07, "loss": 0.2728, "step": 30412 }, { "epoch": 0.5286551130734064, "grad_norm": 1.6410230113742317, "learning_rate": 4.77913478380087e-07, "loss": 0.3084, "step": 30413 }, { "epoch": 0.5286724956109092, "grad_norm": 1.3024888016699276, "learning_rate": 4.778853565550673e-07, "loss": 0.1589, "step": 30414 }, { "epoch": 0.5286898781484121, "grad_norm": 2.138701635369791, "learning_rate": 4.778572348001408e-07, "loss": 0.346, "step": 30415 }, { "epoch": 0.5287072606859149, "grad_norm": 1.873273063075984, "learning_rate": 4.778291131153967e-07, "loss": 0.2198, "step": 30416 }, { "epoch": 0.5287246432234177, "grad_norm": 1.3838095127971113, "learning_rate": 4.778009915009243e-07, "loss": 0.2147, "step": 30417 }, { "epoch": 0.5287420257609206, "grad_norm": 1.7130884722485016, "learning_rate": 4.777728699568123e-07, "loss": 0.1936, "step": 30418 }, { "epoch": 0.5287594082984234, "grad_norm": 2.814526157865907, "learning_rate": 4.777447484831502e-07, "loss": 0.2344, "step": 30419 }, { "epoch": 0.5287767908359262, "grad_norm": 2.2257551944400933, "learning_rate": 4.77716627080027e-07, "loss": 0.1995, "step": 30420 }, { "epoch": 0.528794173373429, "grad_norm": 1.0656745867416102, "learning_rate": 4.776885057475319e-07, "loss": 0.1407, "step": 30421 }, { "epoch": 0.5288115559109319, "grad_norm": 1.1388403457030731, "learning_rate": 4.77660384485754e-07, "loss": 0.1623, "step": 30422 }, { "epoch": 0.5288289384484347, "grad_norm": 1.3694110891496858, "learning_rate": 4.776322632947823e-07, "loss": 0.3812, "step": 30423 }, { "epoch": 0.5288463209859375, "grad_norm": 1.8731909382116276, "learning_rate": 4.776041421747062e-07, "loss": 0.2506, "step": 30424 }, { "epoch": 0.5288637035234404, "grad_norm": 3.0327883877760353, "learning_rate": 4.775760211256147e-07, "loss": 0.3778, "step": 30425 }, { "epoch": 0.5288810860609432, "grad_norm": 1.2520047142131259, "learning_rate": 4.775479001475969e-07, "loss": 0.1567, "step": 30426 }, { "epoch": 0.528898468598446, "grad_norm": 2.5343121814628606, "learning_rate": 4.775197792407418e-07, "loss": 0.3041, "step": 30427 }, { "epoch": 0.5289158511359489, "grad_norm": 1.7536728743131622, "learning_rate": 4.774916584051387e-07, "loss": 0.2618, "step": 30428 }, { "epoch": 0.5289332336734517, "grad_norm": 1.1931574845619024, "learning_rate": 4.774635376408767e-07, "loss": 0.2462, "step": 30429 }, { "epoch": 0.5289506162109545, "grad_norm": 1.3215863313827347, "learning_rate": 4.774354169480452e-07, "loss": 0.2337, "step": 30430 }, { "epoch": 0.5289679987484573, "grad_norm": 1.5736073922906297, "learning_rate": 4.774072963267329e-07, "loss": 0.1966, "step": 30431 }, { "epoch": 0.5289853812859602, "grad_norm": 1.4515881402528983, "learning_rate": 4.77379175777029e-07, "loss": 0.2123, "step": 30432 }, { "epoch": 0.5290027638234629, "grad_norm": 1.4570658699999057, "learning_rate": 4.773510552990227e-07, "loss": 0.3533, "step": 30433 }, { "epoch": 0.5290201463609657, "grad_norm": 1.7704781474042204, "learning_rate": 4.773229348928032e-07, "loss": 0.4012, "step": 30434 }, { "epoch": 0.5290375288984686, "grad_norm": 3.690387222657415, "learning_rate": 4.772948145584596e-07, "loss": 0.3398, "step": 30435 }, { "epoch": 0.5290549114359714, "grad_norm": 3.105859241047998, "learning_rate": 4.772666942960812e-07, "loss": 0.2053, "step": 30436 }, { "epoch": 0.5290722939734742, "grad_norm": 1.4458038137506093, "learning_rate": 4.772385741057567e-07, "loss": 0.2638, "step": 30437 }, { "epoch": 0.529089676510977, "grad_norm": 1.794887037645712, "learning_rate": 4.772104539875754e-07, "loss": 0.5481, "step": 30438 }, { "epoch": 0.5291070590484799, "grad_norm": 1.6906094577390454, "learning_rate": 4.771823339416267e-07, "loss": 0.1554, "step": 30439 }, { "epoch": 0.5291244415859827, "grad_norm": 1.3960399705246553, "learning_rate": 4.771542139679993e-07, "loss": 0.2706, "step": 30440 }, { "epoch": 0.5291418241234855, "grad_norm": 1.2586605309516163, "learning_rate": 4.771260940667827e-07, "loss": 0.1579, "step": 30441 }, { "epoch": 0.5291592066609884, "grad_norm": 1.5397660980829815, "learning_rate": 4.770979742380657e-07, "loss": 0.3036, "step": 30442 }, { "epoch": 0.5291765891984912, "grad_norm": 2.0834392083998767, "learning_rate": 4.770698544819379e-07, "loss": 0.4078, "step": 30443 }, { "epoch": 0.529193971735994, "grad_norm": 1.8353816826557894, "learning_rate": 4.77041734798488e-07, "loss": 0.5236, "step": 30444 }, { "epoch": 0.5292113542734969, "grad_norm": 1.3877102980766973, "learning_rate": 4.770136151878053e-07, "loss": 0.3278, "step": 30445 }, { "epoch": 0.5292287368109997, "grad_norm": 1.6338441692947656, "learning_rate": 4.769854956499787e-07, "loss": 0.2347, "step": 30446 }, { "epoch": 0.5292461193485025, "grad_norm": 1.5221766470722178, "learning_rate": 4.769573761850977e-07, "loss": 0.2139, "step": 30447 }, { "epoch": 0.5292635018860053, "grad_norm": 2.8972345367231958, "learning_rate": 4.76929256793251e-07, "loss": 0.2717, "step": 30448 }, { "epoch": 0.5292808844235082, "grad_norm": 1.0861713586309534, "learning_rate": 4.769011374745282e-07, "loss": 0.2138, "step": 30449 }, { "epoch": 0.529298266961011, "grad_norm": 1.2463694157984309, "learning_rate": 4.768730182290181e-07, "loss": 0.2816, "step": 30450 }, { "epoch": 0.5293156494985138, "grad_norm": 2.9192803522635997, "learning_rate": 4.768448990568098e-07, "loss": 0.3951, "step": 30451 }, { "epoch": 0.5293330320360167, "grad_norm": 1.9452768134313136, "learning_rate": 4.7681677995799275e-07, "loss": 0.1895, "step": 30452 }, { "epoch": 0.5293504145735194, "grad_norm": 1.5878585659811182, "learning_rate": 4.767886609326556e-07, "loss": 0.1333, "step": 30453 }, { "epoch": 0.5293677971110222, "grad_norm": 0.9480487317061013, "learning_rate": 4.7676054198088797e-07, "loss": 0.3096, "step": 30454 }, { "epoch": 0.529385179648525, "grad_norm": 1.5941320775433652, "learning_rate": 4.767324231027787e-07, "loss": 0.1947, "step": 30455 }, { "epoch": 0.5294025621860279, "grad_norm": 1.211741513671266, "learning_rate": 4.76704304298417e-07, "loss": 0.1847, "step": 30456 }, { "epoch": 0.5294199447235307, "grad_norm": 1.401467898294852, "learning_rate": 4.7667618556789174e-07, "loss": 0.2957, "step": 30457 }, { "epoch": 0.5294373272610335, "grad_norm": 1.2466634310802795, "learning_rate": 4.7664806691129246e-07, "loss": 0.2232, "step": 30458 }, { "epoch": 0.5294547097985364, "grad_norm": 1.7202842416771027, "learning_rate": 4.766199483287079e-07, "loss": 0.365, "step": 30459 }, { "epoch": 0.5294720923360392, "grad_norm": 2.4670209151915015, "learning_rate": 4.7659182982022745e-07, "loss": 0.1905, "step": 30460 }, { "epoch": 0.529489474873542, "grad_norm": 1.4487770973968033, "learning_rate": 4.7656371138594007e-07, "loss": 0.2184, "step": 30461 }, { "epoch": 0.5295068574110449, "grad_norm": 2.037221103451147, "learning_rate": 4.765355930259353e-07, "loss": 0.2046, "step": 30462 }, { "epoch": 0.5295242399485477, "grad_norm": 1.3257310387025676, "learning_rate": 4.765074747403016e-07, "loss": 0.2745, "step": 30463 }, { "epoch": 0.5295416224860505, "grad_norm": 1.2266614733131682, "learning_rate": 4.764793565291285e-07, "loss": 0.2715, "step": 30464 }, { "epoch": 0.5295590050235534, "grad_norm": 1.400284055138636, "learning_rate": 4.7645123839250496e-07, "loss": 0.2461, "step": 30465 }, { "epoch": 0.5295763875610562, "grad_norm": 5.438793150114109, "learning_rate": 4.764231203305202e-07, "loss": 0.6607, "step": 30466 }, { "epoch": 0.529593770098559, "grad_norm": 2.2696400511597425, "learning_rate": 4.763950023432633e-07, "loss": 0.3539, "step": 30467 }, { "epoch": 0.5296111526360618, "grad_norm": 1.1560682495571404, "learning_rate": 4.763668844308236e-07, "loss": 0.173, "step": 30468 }, { "epoch": 0.5296285351735647, "grad_norm": 2.0090990488240217, "learning_rate": 4.763387665932899e-07, "loss": 0.2649, "step": 30469 }, { "epoch": 0.5296459177110675, "grad_norm": 0.809691588946013, "learning_rate": 4.7631064883075147e-07, "loss": 0.1419, "step": 30470 }, { "epoch": 0.5296633002485703, "grad_norm": 1.4739624455440108, "learning_rate": 4.7628253114329727e-07, "loss": 0.1618, "step": 30471 }, { "epoch": 0.5296806827860732, "grad_norm": 2.2928066627364037, "learning_rate": 4.762544135310166e-07, "loss": 0.2302, "step": 30472 }, { "epoch": 0.5296980653235759, "grad_norm": 5.487393716609789, "learning_rate": 4.762262959939985e-07, "loss": 0.4007, "step": 30473 }, { "epoch": 0.5297154478610787, "grad_norm": 1.5951305831575202, "learning_rate": 4.761981785323323e-07, "loss": 0.1436, "step": 30474 }, { "epoch": 0.5297328303985815, "grad_norm": 1.5779164512228803, "learning_rate": 4.7617006114610685e-07, "loss": 0.2966, "step": 30475 }, { "epoch": 0.5297502129360844, "grad_norm": 1.7835034514663037, "learning_rate": 4.7614194383541127e-07, "loss": 0.2081, "step": 30476 }, { "epoch": 0.5297675954735872, "grad_norm": 1.2487030041405434, "learning_rate": 4.761138266003348e-07, "loss": 0.2418, "step": 30477 }, { "epoch": 0.52978497801109, "grad_norm": 1.3479213984098606, "learning_rate": 4.760857094409666e-07, "loss": 0.2305, "step": 30478 }, { "epoch": 0.5298023605485929, "grad_norm": 1.5342863651683785, "learning_rate": 4.7605759235739564e-07, "loss": 0.2778, "step": 30479 }, { "epoch": 0.5298197430860957, "grad_norm": 5.08970012805275, "learning_rate": 4.7602947534971127e-07, "loss": 0.2996, "step": 30480 }, { "epoch": 0.5298371256235985, "grad_norm": 2.229702606441291, "learning_rate": 4.760013584180023e-07, "loss": 0.2254, "step": 30481 }, { "epoch": 0.5298545081611014, "grad_norm": 2.178659122600057, "learning_rate": 4.75973241562358e-07, "loss": 0.2948, "step": 30482 }, { "epoch": 0.5298718906986042, "grad_norm": 1.6331912110637476, "learning_rate": 4.759451247828675e-07, "loss": 0.1718, "step": 30483 }, { "epoch": 0.529889273236107, "grad_norm": 1.253869325323283, "learning_rate": 4.7591700807961987e-07, "loss": 0.1652, "step": 30484 }, { "epoch": 0.5299066557736098, "grad_norm": 0.672938786079007, "learning_rate": 4.7588889145270434e-07, "loss": 0.1521, "step": 30485 }, { "epoch": 0.5299240383111127, "grad_norm": 1.3543088192166541, "learning_rate": 4.758607749022099e-07, "loss": 0.2931, "step": 30486 }, { "epoch": 0.5299414208486155, "grad_norm": 1.841183518997915, "learning_rate": 4.7583265842822587e-07, "loss": 0.2773, "step": 30487 }, { "epoch": 0.5299588033861183, "grad_norm": 1.5071660242203306, "learning_rate": 4.75804542030841e-07, "loss": 0.2684, "step": 30488 }, { "epoch": 0.5299761859236212, "grad_norm": 1.4003709321718614, "learning_rate": 4.7577642571014474e-07, "loss": 0.3131, "step": 30489 }, { "epoch": 0.529993568461124, "grad_norm": 3.1554209846470127, "learning_rate": 4.7574830946622595e-07, "loss": 0.3284, "step": 30490 }, { "epoch": 0.5300109509986268, "grad_norm": 2.3931457625492447, "learning_rate": 4.7572019329917403e-07, "loss": 0.3361, "step": 30491 }, { "epoch": 0.5300283335361297, "grad_norm": 1.5814575303129912, "learning_rate": 4.7569207720907776e-07, "loss": 0.2702, "step": 30492 }, { "epoch": 0.5300457160736324, "grad_norm": 1.7761102236536024, "learning_rate": 4.756639611960268e-07, "loss": 0.2728, "step": 30493 }, { "epoch": 0.5300630986111352, "grad_norm": 1.8864771253685548, "learning_rate": 4.756358452601096e-07, "loss": 0.3334, "step": 30494 }, { "epoch": 0.530080481148638, "grad_norm": 1.2897086704176874, "learning_rate": 4.756077294014156e-07, "loss": 0.1254, "step": 30495 }, { "epoch": 0.5300978636861409, "grad_norm": 0.9977887074987363, "learning_rate": 4.755796136200339e-07, "loss": 0.2598, "step": 30496 }, { "epoch": 0.5301152462236437, "grad_norm": 1.751206252445901, "learning_rate": 4.7555149791605366e-07, "loss": 0.3275, "step": 30497 }, { "epoch": 0.5301326287611465, "grad_norm": 1.3991682411487254, "learning_rate": 4.755233822895638e-07, "loss": 0.2688, "step": 30498 }, { "epoch": 0.5301500112986494, "grad_norm": 5.494470195743047, "learning_rate": 4.754952667406539e-07, "loss": 0.242, "step": 30499 }, { "epoch": 0.5301673938361522, "grad_norm": 1.8283526640756171, "learning_rate": 4.754671512694125e-07, "loss": 0.3357, "step": 30500 }, { "epoch": 0.530184776373655, "grad_norm": 1.4712986100015257, "learning_rate": 4.754390358759289e-07, "loss": 0.2924, "step": 30501 }, { "epoch": 0.5302021589111579, "grad_norm": 1.5229880825853863, "learning_rate": 4.754109205602925e-07, "loss": 0.2296, "step": 30502 }, { "epoch": 0.5302195414486607, "grad_norm": 2.0397785803065998, "learning_rate": 4.7538280532259204e-07, "loss": 0.2694, "step": 30503 }, { "epoch": 0.5302369239861635, "grad_norm": 1.1382201564588275, "learning_rate": 4.7535469016291673e-07, "loss": 0.2557, "step": 30504 }, { "epoch": 0.5302543065236663, "grad_norm": 1.5478919293504403, "learning_rate": 4.753265750813558e-07, "loss": 0.1666, "step": 30505 }, { "epoch": 0.5302716890611692, "grad_norm": 1.3797263832249935, "learning_rate": 4.7529846007799845e-07, "loss": 0.1973, "step": 30506 }, { "epoch": 0.530289071598672, "grad_norm": 1.468332436360738, "learning_rate": 4.752703451529334e-07, "loss": 0.173, "step": 30507 }, { "epoch": 0.5303064541361748, "grad_norm": 1.7816025328851681, "learning_rate": 4.752422303062501e-07, "loss": 0.1791, "step": 30508 }, { "epoch": 0.5303238366736777, "grad_norm": 1.4343075936328906, "learning_rate": 4.752141155380375e-07, "loss": 0.2339, "step": 30509 }, { "epoch": 0.5303412192111805, "grad_norm": 1.3649377547018677, "learning_rate": 4.7518600084838476e-07, "loss": 0.1619, "step": 30510 }, { "epoch": 0.5303586017486833, "grad_norm": 1.57502617852484, "learning_rate": 4.7515788623738107e-07, "loss": 0.463, "step": 30511 }, { "epoch": 0.5303759842861862, "grad_norm": 2.3375555551582323, "learning_rate": 4.751297717051156e-07, "loss": 0.3371, "step": 30512 }, { "epoch": 0.5303933668236889, "grad_norm": 1.6999949212372418, "learning_rate": 4.7510165725167703e-07, "loss": 0.202, "step": 30513 }, { "epoch": 0.5304107493611917, "grad_norm": 2.1664314477540607, "learning_rate": 4.75073542877155e-07, "loss": 0.2067, "step": 30514 }, { "epoch": 0.5304281318986945, "grad_norm": 1.873644776570806, "learning_rate": 4.750454285816382e-07, "loss": 0.275, "step": 30515 }, { "epoch": 0.5304455144361974, "grad_norm": 1.9674288316560706, "learning_rate": 4.7501731436521607e-07, "loss": 0.1803, "step": 30516 }, { "epoch": 0.5304628969737002, "grad_norm": 1.534208962913231, "learning_rate": 4.7498920022797744e-07, "loss": 0.1911, "step": 30517 }, { "epoch": 0.530480279511203, "grad_norm": 1.2055052837762503, "learning_rate": 4.749610861700119e-07, "loss": 0.1382, "step": 30518 }, { "epoch": 0.5304976620487059, "grad_norm": 1.3413125483484973, "learning_rate": 4.7493297219140786e-07, "loss": 0.2137, "step": 30519 }, { "epoch": 0.5305150445862087, "grad_norm": 1.4606432919106267, "learning_rate": 4.749048582922549e-07, "loss": 0.3561, "step": 30520 }, { "epoch": 0.5305324271237115, "grad_norm": 2.0832790219340427, "learning_rate": 4.748767444726419e-07, "loss": 0.3215, "step": 30521 }, { "epoch": 0.5305498096612143, "grad_norm": 2.3656486813984587, "learning_rate": 4.7484863073265825e-07, "loss": 0.2494, "step": 30522 }, { "epoch": 0.5305671921987172, "grad_norm": 3.179656898575169, "learning_rate": 4.748205170723927e-07, "loss": 0.2858, "step": 30523 }, { "epoch": 0.53058457473622, "grad_norm": 2.535924384717854, "learning_rate": 4.747924034919347e-07, "loss": 0.2389, "step": 30524 }, { "epoch": 0.5306019572737228, "grad_norm": 3.9546078931770134, "learning_rate": 4.747642899913733e-07, "loss": 0.2691, "step": 30525 }, { "epoch": 0.5306193398112257, "grad_norm": 2.3130728384302697, "learning_rate": 4.7473617657079737e-07, "loss": 0.2554, "step": 30526 }, { "epoch": 0.5306367223487285, "grad_norm": 2.0307005677794554, "learning_rate": 4.747080632302961e-07, "loss": 0.2316, "step": 30527 }, { "epoch": 0.5306541048862313, "grad_norm": 1.2887038596806675, "learning_rate": 4.7467994996995875e-07, "loss": 0.1632, "step": 30528 }, { "epoch": 0.5306714874237342, "grad_norm": 1.9721225821004185, "learning_rate": 4.746518367898742e-07, "loss": 0.3133, "step": 30529 }, { "epoch": 0.530688869961237, "grad_norm": 2.638205210727296, "learning_rate": 4.7462372369013174e-07, "loss": 0.2777, "step": 30530 }, { "epoch": 0.5307062524987398, "grad_norm": 2.0219663175091136, "learning_rate": 4.745956106708206e-07, "loss": 0.4273, "step": 30531 }, { "epoch": 0.5307236350362425, "grad_norm": 2.2546711211913046, "learning_rate": 4.7456749773202945e-07, "loss": 0.2412, "step": 30532 }, { "epoch": 0.5307410175737454, "grad_norm": 1.613778960122247, "learning_rate": 4.745393848738478e-07, "loss": 0.1185, "step": 30533 }, { "epoch": 0.5307584001112482, "grad_norm": 2.687375961073575, "learning_rate": 4.745112720963646e-07, "loss": 0.3717, "step": 30534 }, { "epoch": 0.530775782648751, "grad_norm": 1.4374954100096093, "learning_rate": 4.744831593996688e-07, "loss": 0.3159, "step": 30535 }, { "epoch": 0.5307931651862539, "grad_norm": 2.0630591932833133, "learning_rate": 4.7445504678384985e-07, "loss": 0.1919, "step": 30536 }, { "epoch": 0.5308105477237567, "grad_norm": 1.9696462366269316, "learning_rate": 4.744269342489967e-07, "loss": 0.3014, "step": 30537 }, { "epoch": 0.5308279302612595, "grad_norm": 2.906065588477071, "learning_rate": 4.7439882179519826e-07, "loss": 0.2879, "step": 30538 }, { "epoch": 0.5308453127987623, "grad_norm": 1.2890234988550011, "learning_rate": 4.743707094225439e-07, "loss": 0.223, "step": 30539 }, { "epoch": 0.5308626953362652, "grad_norm": 1.585233769193647, "learning_rate": 4.7434259713112244e-07, "loss": 0.182, "step": 30540 }, { "epoch": 0.530880077873768, "grad_norm": 1.4688747178108617, "learning_rate": 4.743144849210233e-07, "loss": 0.2615, "step": 30541 }, { "epoch": 0.5308974604112708, "grad_norm": 1.1583992347044039, "learning_rate": 4.7428637279233533e-07, "loss": 0.316, "step": 30542 }, { "epoch": 0.5309148429487737, "grad_norm": 1.691824461615083, "learning_rate": 4.742582607451479e-07, "loss": 0.2988, "step": 30543 }, { "epoch": 0.5309322254862765, "grad_norm": 1.1664545522321854, "learning_rate": 4.7423014877955003e-07, "loss": 0.2321, "step": 30544 }, { "epoch": 0.5309496080237793, "grad_norm": 2.4517736704244317, "learning_rate": 4.7420203689563064e-07, "loss": 0.2316, "step": 30545 }, { "epoch": 0.5309669905612822, "grad_norm": 1.9760656035468376, "learning_rate": 4.741739250934789e-07, "loss": 0.4607, "step": 30546 }, { "epoch": 0.530984373098785, "grad_norm": 1.517607470473209, "learning_rate": 4.74145813373184e-07, "loss": 0.2105, "step": 30547 }, { "epoch": 0.5310017556362878, "grad_norm": 1.2369533717570813, "learning_rate": 4.741177017348349e-07, "loss": 0.1632, "step": 30548 }, { "epoch": 0.5310191381737907, "grad_norm": 1.5769445960060735, "learning_rate": 4.740895901785209e-07, "loss": 0.2524, "step": 30549 }, { "epoch": 0.5310365207112935, "grad_norm": 2.02767906560862, "learning_rate": 4.740614787043311e-07, "loss": 0.234, "step": 30550 }, { "epoch": 0.5310539032487963, "grad_norm": 1.3225049656682624, "learning_rate": 4.7403336731235436e-07, "loss": 0.1962, "step": 30551 }, { "epoch": 0.531071285786299, "grad_norm": 1.5336909085665253, "learning_rate": 4.740052560026799e-07, "loss": 0.291, "step": 30552 }, { "epoch": 0.5310886683238019, "grad_norm": 1.3556158177795714, "learning_rate": 4.7397714477539686e-07, "loss": 0.2644, "step": 30553 }, { "epoch": 0.5311060508613047, "grad_norm": 1.7906527445109715, "learning_rate": 4.7394903363059424e-07, "loss": 0.3169, "step": 30554 }, { "epoch": 0.5311234333988075, "grad_norm": 1.4158070948239094, "learning_rate": 4.739209225683613e-07, "loss": 0.2445, "step": 30555 }, { "epoch": 0.5311408159363104, "grad_norm": 2.5294318261514053, "learning_rate": 4.7389281158878726e-07, "loss": 0.3043, "step": 30556 }, { "epoch": 0.5311581984738132, "grad_norm": 3.134631361137828, "learning_rate": 4.738647006919607e-07, "loss": 0.3116, "step": 30557 }, { "epoch": 0.531175581011316, "grad_norm": 1.4086338568656973, "learning_rate": 4.7383658987797117e-07, "loss": 0.1605, "step": 30558 }, { "epoch": 0.5311929635488188, "grad_norm": 2.443309054126881, "learning_rate": 4.7380847914690756e-07, "loss": 0.438, "step": 30559 }, { "epoch": 0.5312103460863217, "grad_norm": 2.109407782699464, "learning_rate": 4.7378036849885906e-07, "loss": 0.2578, "step": 30560 }, { "epoch": 0.5312277286238245, "grad_norm": 2.3665827073469434, "learning_rate": 4.7375225793391476e-07, "loss": 0.2891, "step": 30561 }, { "epoch": 0.5312451111613273, "grad_norm": 1.3937004116990546, "learning_rate": 4.737241474521637e-07, "loss": 0.2081, "step": 30562 }, { "epoch": 0.5312624936988302, "grad_norm": 1.7457882484396794, "learning_rate": 4.736960370536952e-07, "loss": 0.2517, "step": 30563 }, { "epoch": 0.531279876236333, "grad_norm": 1.5376314906583515, "learning_rate": 4.7366792673859814e-07, "loss": 0.3152, "step": 30564 }, { "epoch": 0.5312972587738358, "grad_norm": 1.8195890164467348, "learning_rate": 4.7363981650696147e-07, "loss": 0.1794, "step": 30565 }, { "epoch": 0.5313146413113387, "grad_norm": 1.848597354840831, "learning_rate": 4.736117063588746e-07, "loss": 0.4846, "step": 30566 }, { "epoch": 0.5313320238488415, "grad_norm": 1.1690422497981698, "learning_rate": 4.7358359629442643e-07, "loss": 0.2882, "step": 30567 }, { "epoch": 0.5313494063863443, "grad_norm": 1.5168695524178204, "learning_rate": 4.735554863137061e-07, "loss": 0.2011, "step": 30568 }, { "epoch": 0.5313667889238471, "grad_norm": 1.7094984295452202, "learning_rate": 4.7352737641680293e-07, "loss": 0.2289, "step": 30569 }, { "epoch": 0.53138417146135, "grad_norm": 1.1467857535831343, "learning_rate": 4.734992666038058e-07, "loss": 0.3109, "step": 30570 }, { "epoch": 0.5314015539988528, "grad_norm": 2.1960026716632677, "learning_rate": 4.734711568748036e-07, "loss": 0.3596, "step": 30571 }, { "epoch": 0.5314189365363555, "grad_norm": 2.2160108122561297, "learning_rate": 4.7344304722988577e-07, "loss": 0.3305, "step": 30572 }, { "epoch": 0.5314363190738584, "grad_norm": 2.104759957742899, "learning_rate": 4.734149376691412e-07, "loss": 0.7159, "step": 30573 }, { "epoch": 0.5314537016113612, "grad_norm": 2.487077502265728, "learning_rate": 4.7338682819265914e-07, "loss": 0.2919, "step": 30574 }, { "epoch": 0.531471084148864, "grad_norm": 2.028885935079592, "learning_rate": 4.7335871880052873e-07, "loss": 0.2626, "step": 30575 }, { "epoch": 0.5314884666863668, "grad_norm": 1.0119800449334422, "learning_rate": 4.7333060949283887e-07, "loss": 0.2112, "step": 30576 }, { "epoch": 0.5315058492238697, "grad_norm": 1.0011497993264469, "learning_rate": 4.7330250026967856e-07, "loss": 0.163, "step": 30577 }, { "epoch": 0.5315232317613725, "grad_norm": 1.5363651345423373, "learning_rate": 4.732743911311372e-07, "loss": 0.2195, "step": 30578 }, { "epoch": 0.5315406142988753, "grad_norm": 1.5164404370193483, "learning_rate": 4.7324628207730366e-07, "loss": 0.4087, "step": 30579 }, { "epoch": 0.5315579968363782, "grad_norm": 1.7367778601126096, "learning_rate": 4.7321817310826716e-07, "loss": 0.2551, "step": 30580 }, { "epoch": 0.531575379373881, "grad_norm": 1.1818040289117655, "learning_rate": 4.7319006422411664e-07, "loss": 0.1591, "step": 30581 }, { "epoch": 0.5315927619113838, "grad_norm": 1.299480248308153, "learning_rate": 4.7316195542494175e-07, "loss": 0.1825, "step": 30582 }, { "epoch": 0.5316101444488867, "grad_norm": 3.52159222142088, "learning_rate": 4.7313384671083066e-07, "loss": 0.3387, "step": 30583 }, { "epoch": 0.5316275269863895, "grad_norm": 2.206754446938862, "learning_rate": 4.731057380818732e-07, "loss": 0.2337, "step": 30584 }, { "epoch": 0.5316449095238923, "grad_norm": 1.845317827003948, "learning_rate": 4.7307762953815796e-07, "loss": 0.2364, "step": 30585 }, { "epoch": 0.5316622920613951, "grad_norm": 1.3151423071507629, "learning_rate": 4.730495210797744e-07, "loss": 0.2642, "step": 30586 }, { "epoch": 0.531679674598898, "grad_norm": 2.0940755166092524, "learning_rate": 4.730214127068113e-07, "loss": 0.2732, "step": 30587 }, { "epoch": 0.5316970571364008, "grad_norm": 1.5686481013147535, "learning_rate": 4.7299330441935825e-07, "loss": 0.2605, "step": 30588 }, { "epoch": 0.5317144396739036, "grad_norm": 1.9493682153915042, "learning_rate": 4.729651962175039e-07, "loss": 0.2779, "step": 30589 }, { "epoch": 0.5317318222114065, "grad_norm": 4.9105373409128745, "learning_rate": 4.7293708810133735e-07, "loss": 0.26, "step": 30590 }, { "epoch": 0.5317492047489093, "grad_norm": 1.423493224754212, "learning_rate": 4.7290898007094787e-07, "loss": 0.3063, "step": 30591 }, { "epoch": 0.531766587286412, "grad_norm": 2.669303088885709, "learning_rate": 4.728808721264245e-07, "loss": 0.3163, "step": 30592 }, { "epoch": 0.5317839698239148, "grad_norm": 1.8728523056470225, "learning_rate": 4.728527642678562e-07, "loss": 0.2689, "step": 30593 }, { "epoch": 0.5318013523614177, "grad_norm": 0.9644217280608116, "learning_rate": 4.7282465649533245e-07, "loss": 0.1346, "step": 30594 }, { "epoch": 0.5318187348989205, "grad_norm": 1.4128714215273392, "learning_rate": 4.7279654880894185e-07, "loss": 0.2275, "step": 30595 }, { "epoch": 0.5318361174364233, "grad_norm": 2.1966987683767765, "learning_rate": 4.7276844120877364e-07, "loss": 0.2589, "step": 30596 }, { "epoch": 0.5318534999739262, "grad_norm": 1.458556135586073, "learning_rate": 4.7274033369491703e-07, "loss": 0.3081, "step": 30597 }, { "epoch": 0.531870882511429, "grad_norm": 1.1479406144327542, "learning_rate": 4.7271222626746095e-07, "loss": 0.2493, "step": 30598 }, { "epoch": 0.5318882650489318, "grad_norm": 1.0868968213172134, "learning_rate": 4.726841189264947e-07, "loss": 0.2119, "step": 30599 }, { "epoch": 0.5319056475864347, "grad_norm": 1.9567494568285153, "learning_rate": 4.7265601167210734e-07, "loss": 0.2315, "step": 30600 }, { "epoch": 0.5319230301239375, "grad_norm": 1.3740692944559827, "learning_rate": 4.726279045043877e-07, "loss": 0.1422, "step": 30601 }, { "epoch": 0.5319404126614403, "grad_norm": 1.087462853253798, "learning_rate": 4.72599797423425e-07, "loss": 0.2335, "step": 30602 }, { "epoch": 0.5319577951989432, "grad_norm": 1.4406116755539617, "learning_rate": 4.725716904293084e-07, "loss": 0.2552, "step": 30603 }, { "epoch": 0.531975177736446, "grad_norm": 1.4765329613541227, "learning_rate": 4.7254358352212693e-07, "loss": 0.1359, "step": 30604 }, { "epoch": 0.5319925602739488, "grad_norm": 2.2804447029213195, "learning_rate": 4.725154767019697e-07, "loss": 0.3034, "step": 30605 }, { "epoch": 0.5320099428114516, "grad_norm": 2.34937791972856, "learning_rate": 4.7248736996892576e-07, "loss": 0.3144, "step": 30606 }, { "epoch": 0.5320273253489545, "grad_norm": 0.9037032922419562, "learning_rate": 4.724592633230844e-07, "loss": 0.1386, "step": 30607 }, { "epoch": 0.5320447078864573, "grad_norm": 1.9204928083279613, "learning_rate": 4.724311567645344e-07, "loss": 0.2804, "step": 30608 }, { "epoch": 0.5320620904239601, "grad_norm": 1.0212310841030607, "learning_rate": 4.724030502933649e-07, "loss": 0.2365, "step": 30609 }, { "epoch": 0.532079472961463, "grad_norm": 1.558846960926025, "learning_rate": 4.72374943909665e-07, "loss": 0.354, "step": 30610 }, { "epoch": 0.5320968554989658, "grad_norm": 1.203491084346633, "learning_rate": 4.72346837613524e-07, "loss": 0.244, "step": 30611 }, { "epoch": 0.5321142380364685, "grad_norm": 1.3628417881413628, "learning_rate": 4.723187314050307e-07, "loss": 0.199, "step": 30612 }, { "epoch": 0.5321316205739713, "grad_norm": 2.714098583556199, "learning_rate": 4.7229062528427447e-07, "loss": 0.2974, "step": 30613 }, { "epoch": 0.5321490031114742, "grad_norm": 2.0191570636823606, "learning_rate": 4.722625192513442e-07, "loss": 0.3415, "step": 30614 }, { "epoch": 0.532166385648977, "grad_norm": 0.9165369956655233, "learning_rate": 4.722344133063289e-07, "loss": 0.1958, "step": 30615 }, { "epoch": 0.5321837681864798, "grad_norm": 3.0846749672487443, "learning_rate": 4.722063074493177e-07, "loss": 0.2595, "step": 30616 }, { "epoch": 0.5322011507239827, "grad_norm": 2.5496470906231616, "learning_rate": 4.7217820168039983e-07, "loss": 0.3571, "step": 30617 }, { "epoch": 0.5322185332614855, "grad_norm": 1.318055933864301, "learning_rate": 4.721500959996642e-07, "loss": 0.2166, "step": 30618 }, { "epoch": 0.5322359157989883, "grad_norm": 1.6325520663416775, "learning_rate": 4.7212199040720017e-07, "loss": 0.2696, "step": 30619 }, { "epoch": 0.5322532983364912, "grad_norm": 1.414917679279714, "learning_rate": 4.720938849030965e-07, "loss": 0.3565, "step": 30620 }, { "epoch": 0.532270680873994, "grad_norm": 0.9228734781477225, "learning_rate": 4.7206577948744237e-07, "loss": 0.2215, "step": 30621 }, { "epoch": 0.5322880634114968, "grad_norm": 0.9354528250979653, "learning_rate": 4.720376741603269e-07, "loss": 0.2423, "step": 30622 }, { "epoch": 0.5323054459489996, "grad_norm": 1.4412317642630201, "learning_rate": 4.720095689218392e-07, "loss": 0.1941, "step": 30623 }, { "epoch": 0.5323228284865025, "grad_norm": 1.515016142102272, "learning_rate": 4.7198146377206824e-07, "loss": 0.1791, "step": 30624 }, { "epoch": 0.5323402110240053, "grad_norm": 1.818323211886094, "learning_rate": 4.719533587111032e-07, "loss": 0.2902, "step": 30625 }, { "epoch": 0.5323575935615081, "grad_norm": 1.8936797138734065, "learning_rate": 4.719252537390333e-07, "loss": 0.2527, "step": 30626 }, { "epoch": 0.532374976099011, "grad_norm": 3.607585184163714, "learning_rate": 4.718971488559472e-07, "loss": 0.2798, "step": 30627 }, { "epoch": 0.5323923586365138, "grad_norm": 1.6703894363466736, "learning_rate": 4.7186904406193434e-07, "loss": 0.1656, "step": 30628 }, { "epoch": 0.5324097411740166, "grad_norm": 2.002681222907477, "learning_rate": 4.7184093935708356e-07, "loss": 0.2917, "step": 30629 }, { "epoch": 0.5324271237115195, "grad_norm": 2.5536970595214465, "learning_rate": 4.718128347414842e-07, "loss": 0.3281, "step": 30630 }, { "epoch": 0.5324445062490223, "grad_norm": 1.1337040529475995, "learning_rate": 4.717847302152252e-07, "loss": 0.17, "step": 30631 }, { "epoch": 0.532461888786525, "grad_norm": 2.942258854842825, "learning_rate": 4.7175662577839577e-07, "loss": 0.2312, "step": 30632 }, { "epoch": 0.5324792713240278, "grad_norm": 0.7839932010105557, "learning_rate": 4.717285214310846e-07, "loss": 0.1312, "step": 30633 }, { "epoch": 0.5324966538615307, "grad_norm": 1.4307522620892645, "learning_rate": 4.717004171733812e-07, "loss": 0.2095, "step": 30634 }, { "epoch": 0.5325140363990335, "grad_norm": 1.1467365296774619, "learning_rate": 4.716723130053743e-07, "loss": 0.177, "step": 30635 }, { "epoch": 0.5325314189365363, "grad_norm": 1.426410507452486, "learning_rate": 4.7164420892715334e-07, "loss": 0.2569, "step": 30636 }, { "epoch": 0.5325488014740392, "grad_norm": 1.4438127635842741, "learning_rate": 4.7161610493880705e-07, "loss": 0.2843, "step": 30637 }, { "epoch": 0.532566184011542, "grad_norm": 1.2967234817626132, "learning_rate": 4.71588001040425e-07, "loss": 0.1516, "step": 30638 }, { "epoch": 0.5325835665490448, "grad_norm": 1.664647952122766, "learning_rate": 4.715598972320956e-07, "loss": 0.2655, "step": 30639 }, { "epoch": 0.5326009490865476, "grad_norm": 1.5720031103046803, "learning_rate": 4.715317935139084e-07, "loss": 0.4077, "step": 30640 }, { "epoch": 0.5326183316240505, "grad_norm": 1.4412659763832993, "learning_rate": 4.7150368988595225e-07, "loss": 0.2247, "step": 30641 }, { "epoch": 0.5326357141615533, "grad_norm": 2.096637413942323, "learning_rate": 4.7147558634831637e-07, "loss": 0.1778, "step": 30642 }, { "epoch": 0.5326530966990561, "grad_norm": 1.9926590349798134, "learning_rate": 4.714474829010897e-07, "loss": 0.1665, "step": 30643 }, { "epoch": 0.532670479236559, "grad_norm": 1.962057190645427, "learning_rate": 4.7141937954436144e-07, "loss": 0.2187, "step": 30644 }, { "epoch": 0.5326878617740618, "grad_norm": 1.4789804556147117, "learning_rate": 4.7139127627822085e-07, "loss": 0.2559, "step": 30645 }, { "epoch": 0.5327052443115646, "grad_norm": 3.5313214048606882, "learning_rate": 4.713631731027564e-07, "loss": 0.2495, "step": 30646 }, { "epoch": 0.5327226268490675, "grad_norm": 1.7983135453013128, "learning_rate": 4.713350700180578e-07, "loss": 0.2431, "step": 30647 }, { "epoch": 0.5327400093865703, "grad_norm": 2.202501105804405, "learning_rate": 4.713069670242137e-07, "loss": 0.2856, "step": 30648 }, { "epoch": 0.5327573919240731, "grad_norm": 2.377574928191603, "learning_rate": 4.7127886412131336e-07, "loss": 0.2887, "step": 30649 }, { "epoch": 0.532774774461576, "grad_norm": 1.4918857188354764, "learning_rate": 4.712507613094459e-07, "loss": 0.3674, "step": 30650 }, { "epoch": 0.5327921569990788, "grad_norm": 1.264414495821465, "learning_rate": 4.712226585887004e-07, "loss": 0.2097, "step": 30651 }, { "epoch": 0.5328095395365815, "grad_norm": 2.2760547057566396, "learning_rate": 4.711945559591657e-07, "loss": 0.325, "step": 30652 }, { "epoch": 0.5328269220740843, "grad_norm": 1.5136295403769688, "learning_rate": 4.7116645342093107e-07, "loss": 0.2169, "step": 30653 }, { "epoch": 0.5328443046115872, "grad_norm": 2.177363814251994, "learning_rate": 4.711383509740854e-07, "loss": 0.2227, "step": 30654 }, { "epoch": 0.53286168714909, "grad_norm": 1.4048178707911803, "learning_rate": 4.7111024861871806e-07, "loss": 0.2306, "step": 30655 }, { "epoch": 0.5328790696865928, "grad_norm": 1.544054084566577, "learning_rate": 4.71082146354918e-07, "loss": 0.198, "step": 30656 }, { "epoch": 0.5328964522240957, "grad_norm": 1.9027137459820351, "learning_rate": 4.710540441827743e-07, "loss": 0.2887, "step": 30657 }, { "epoch": 0.5329138347615985, "grad_norm": 1.5850354362552714, "learning_rate": 4.710259421023758e-07, "loss": 0.3193, "step": 30658 }, { "epoch": 0.5329312172991013, "grad_norm": 3.691192740906106, "learning_rate": 4.709978401138118e-07, "loss": 0.2979, "step": 30659 }, { "epoch": 0.5329485998366041, "grad_norm": 1.7204690540559546, "learning_rate": 4.709697382171713e-07, "loss": 0.2129, "step": 30660 }, { "epoch": 0.532965982374107, "grad_norm": 2.007810399445588, "learning_rate": 4.709416364125435e-07, "loss": 0.2719, "step": 30661 }, { "epoch": 0.5329833649116098, "grad_norm": 3.000922521668365, "learning_rate": 4.709135347000172e-07, "loss": 0.3189, "step": 30662 }, { "epoch": 0.5330007474491126, "grad_norm": 1.1756454490247605, "learning_rate": 4.7088543307968177e-07, "loss": 0.2473, "step": 30663 }, { "epoch": 0.5330181299866155, "grad_norm": 1.4051116705006006, "learning_rate": 4.708573315516262e-07, "loss": 0.2515, "step": 30664 }, { "epoch": 0.5330355125241183, "grad_norm": 1.426777323510279, "learning_rate": 4.7082923011593944e-07, "loss": 0.1923, "step": 30665 }, { "epoch": 0.5330528950616211, "grad_norm": 2.04935176458362, "learning_rate": 4.708011287727105e-07, "loss": 0.3138, "step": 30666 }, { "epoch": 0.533070277599124, "grad_norm": 1.430563034485613, "learning_rate": 4.707730275220287e-07, "loss": 0.3477, "step": 30667 }, { "epoch": 0.5330876601366268, "grad_norm": 1.7287908064354047, "learning_rate": 4.7074492636398284e-07, "loss": 0.2374, "step": 30668 }, { "epoch": 0.5331050426741296, "grad_norm": 0.9571828828416302, "learning_rate": 4.707168252986622e-07, "loss": 0.2367, "step": 30669 }, { "epoch": 0.5331224252116324, "grad_norm": 1.8538080154111598, "learning_rate": 4.70688724326156e-07, "loss": 0.3409, "step": 30670 }, { "epoch": 0.5331398077491352, "grad_norm": 1.628289738460851, "learning_rate": 4.706606234465529e-07, "loss": 0.3113, "step": 30671 }, { "epoch": 0.533157190286638, "grad_norm": 1.282173689532972, "learning_rate": 4.70632522659942e-07, "loss": 0.2555, "step": 30672 }, { "epoch": 0.5331745728241408, "grad_norm": 1.2359488242601877, "learning_rate": 4.706044219664127e-07, "loss": 0.2391, "step": 30673 }, { "epoch": 0.5331919553616437, "grad_norm": 1.5818739523552166, "learning_rate": 4.705763213660537e-07, "loss": 0.3206, "step": 30674 }, { "epoch": 0.5332093378991465, "grad_norm": 2.1387586934540015, "learning_rate": 4.705482208589544e-07, "loss": 0.1992, "step": 30675 }, { "epoch": 0.5332267204366493, "grad_norm": 1.7313474886435585, "learning_rate": 4.7052012044520377e-07, "loss": 0.176, "step": 30676 }, { "epoch": 0.5332441029741521, "grad_norm": 2.670728742331235, "learning_rate": 4.7049202012489066e-07, "loss": 0.4008, "step": 30677 }, { "epoch": 0.533261485511655, "grad_norm": 1.5659112004148519, "learning_rate": 4.7046391989810434e-07, "loss": 0.2657, "step": 30678 }, { "epoch": 0.5332788680491578, "grad_norm": 1.1305344818355685, "learning_rate": 4.704358197649339e-07, "loss": 0.2208, "step": 30679 }, { "epoch": 0.5332962505866606, "grad_norm": 1.672203840048193, "learning_rate": 4.704077197254682e-07, "loss": 0.1802, "step": 30680 }, { "epoch": 0.5333136331241635, "grad_norm": 1.9626293301382698, "learning_rate": 4.7037961977979653e-07, "loss": 0.3878, "step": 30681 }, { "epoch": 0.5333310156616663, "grad_norm": 1.1308496683591025, "learning_rate": 4.703515199280077e-07, "loss": 0.2096, "step": 30682 }, { "epoch": 0.5333483981991691, "grad_norm": 2.8615798072633467, "learning_rate": 4.703234201701912e-07, "loss": 0.2632, "step": 30683 }, { "epoch": 0.533365780736672, "grad_norm": 1.9753449478689011, "learning_rate": 4.7029532050643574e-07, "loss": 0.3259, "step": 30684 }, { "epoch": 0.5333831632741748, "grad_norm": 2.1388452264418643, "learning_rate": 4.702672209368303e-07, "loss": 0.2248, "step": 30685 }, { "epoch": 0.5334005458116776, "grad_norm": 1.0061082146209965, "learning_rate": 4.702391214614643e-07, "loss": 0.187, "step": 30686 }, { "epoch": 0.5334179283491804, "grad_norm": 2.0330779949930053, "learning_rate": 4.702110220804264e-07, "loss": 0.2275, "step": 30687 }, { "epoch": 0.5334353108866833, "grad_norm": 1.450422666873904, "learning_rate": 4.7018292279380604e-07, "loss": 0.246, "step": 30688 }, { "epoch": 0.5334526934241861, "grad_norm": 2.0312948950573855, "learning_rate": 4.701548236016922e-07, "loss": 0.1796, "step": 30689 }, { "epoch": 0.5334700759616889, "grad_norm": 1.1626034132880516, "learning_rate": 4.7012672450417377e-07, "loss": 0.2203, "step": 30690 }, { "epoch": 0.5334874584991917, "grad_norm": 2.1458038720302235, "learning_rate": 4.7009862550133985e-07, "loss": 0.2265, "step": 30691 }, { "epoch": 0.5335048410366945, "grad_norm": 1.747878760366456, "learning_rate": 4.7007052659327964e-07, "loss": 0.2163, "step": 30692 }, { "epoch": 0.5335222235741973, "grad_norm": 2.0968224947560796, "learning_rate": 4.70042427780082e-07, "loss": 0.2535, "step": 30693 }, { "epoch": 0.5335396061117001, "grad_norm": 2.0432936234893977, "learning_rate": 4.7001432906183615e-07, "loss": 0.2788, "step": 30694 }, { "epoch": 0.533556988649203, "grad_norm": 2.117561720444992, "learning_rate": 4.699862304386312e-07, "loss": 0.3043, "step": 30695 }, { "epoch": 0.5335743711867058, "grad_norm": 2.1374940975323153, "learning_rate": 4.699581319105561e-07, "loss": 0.1939, "step": 30696 }, { "epoch": 0.5335917537242086, "grad_norm": 1.5387420106795482, "learning_rate": 4.6993003347769973e-07, "loss": 0.1889, "step": 30697 }, { "epoch": 0.5336091362617115, "grad_norm": 1.5045932440148557, "learning_rate": 4.6990193514015147e-07, "loss": 0.2512, "step": 30698 }, { "epoch": 0.5336265187992143, "grad_norm": 1.2326637154574884, "learning_rate": 4.6987383689800014e-07, "loss": 0.3044, "step": 30699 }, { "epoch": 0.5336439013367171, "grad_norm": 2.156114590090077, "learning_rate": 4.6984573875133505e-07, "loss": 0.1937, "step": 30700 }, { "epoch": 0.53366128387422, "grad_norm": 1.2570353094548785, "learning_rate": 4.6981764070024497e-07, "loss": 0.1328, "step": 30701 }, { "epoch": 0.5336786664117228, "grad_norm": 1.2659030795275448, "learning_rate": 4.6978954274481937e-07, "loss": 0.2887, "step": 30702 }, { "epoch": 0.5336960489492256, "grad_norm": 1.6321882538783634, "learning_rate": 4.6976144488514693e-07, "loss": 0.241, "step": 30703 }, { "epoch": 0.5337134314867285, "grad_norm": 2.2312201184348677, "learning_rate": 4.6973334712131675e-07, "loss": 0.1591, "step": 30704 }, { "epoch": 0.5337308140242313, "grad_norm": 2.268284940168782, "learning_rate": 4.6970524945341786e-07, "loss": 0.1671, "step": 30705 }, { "epoch": 0.5337481965617341, "grad_norm": 1.095442911814166, "learning_rate": 4.6967715188153964e-07, "loss": 0.2133, "step": 30706 }, { "epoch": 0.5337655790992369, "grad_norm": 0.8752130617128155, "learning_rate": 4.696490544057707e-07, "loss": 0.1118, "step": 30707 }, { "epoch": 0.5337829616367398, "grad_norm": 2.0373698568861625, "learning_rate": 4.6962095702620055e-07, "loss": 0.3903, "step": 30708 }, { "epoch": 0.5338003441742426, "grad_norm": 1.8435304087685942, "learning_rate": 4.695928597429179e-07, "loss": 0.1995, "step": 30709 }, { "epoch": 0.5338177267117454, "grad_norm": 2.795781446246841, "learning_rate": 4.695647625560118e-07, "loss": 0.3529, "step": 30710 }, { "epoch": 0.5338351092492482, "grad_norm": 2.006152380434467, "learning_rate": 4.6953666546557153e-07, "loss": 0.2257, "step": 30711 }, { "epoch": 0.533852491786751, "grad_norm": 1.8128764284873724, "learning_rate": 4.695085684716861e-07, "loss": 0.2456, "step": 30712 }, { "epoch": 0.5338698743242538, "grad_norm": 1.6209720073244052, "learning_rate": 4.694804715744443e-07, "loss": 0.2001, "step": 30713 }, { "epoch": 0.5338872568617566, "grad_norm": 1.6143232369027385, "learning_rate": 4.694523747739357e-07, "loss": 0.3173, "step": 30714 }, { "epoch": 0.5339046393992595, "grad_norm": 1.0712249535399494, "learning_rate": 4.694242780702489e-07, "loss": 0.2162, "step": 30715 }, { "epoch": 0.5339220219367623, "grad_norm": 3.346655402465795, "learning_rate": 4.6939618146347295e-07, "loss": 0.3443, "step": 30716 }, { "epoch": 0.5339394044742651, "grad_norm": 1.3007567424346484, "learning_rate": 4.693680849536971e-07, "loss": 0.1782, "step": 30717 }, { "epoch": 0.533956787011768, "grad_norm": 2.907385564532907, "learning_rate": 4.693399885410103e-07, "loss": 0.2228, "step": 30718 }, { "epoch": 0.5339741695492708, "grad_norm": 2.3677071093862727, "learning_rate": 4.693118922255018e-07, "loss": 0.2212, "step": 30719 }, { "epoch": 0.5339915520867736, "grad_norm": 1.5483110280683643, "learning_rate": 4.692837960072606e-07, "loss": 0.1833, "step": 30720 }, { "epoch": 0.5340089346242765, "grad_norm": 1.6210147403335422, "learning_rate": 4.6925569988637553e-07, "loss": 0.2108, "step": 30721 }, { "epoch": 0.5340263171617793, "grad_norm": 1.2768476080417406, "learning_rate": 4.6922760386293563e-07, "loss": 0.2186, "step": 30722 }, { "epoch": 0.5340436996992821, "grad_norm": 2.4286294232643213, "learning_rate": 4.6919950793703026e-07, "loss": 0.2305, "step": 30723 }, { "epoch": 0.534061082236785, "grad_norm": 1.473811948595589, "learning_rate": 4.691714121087482e-07, "loss": 0.2124, "step": 30724 }, { "epoch": 0.5340784647742878, "grad_norm": 1.954048563231673, "learning_rate": 4.691433163781786e-07, "loss": 0.2114, "step": 30725 }, { "epoch": 0.5340958473117906, "grad_norm": 1.245756151193488, "learning_rate": 4.6911522074541047e-07, "loss": 0.1738, "step": 30726 }, { "epoch": 0.5341132298492934, "grad_norm": 1.5127187246411342, "learning_rate": 4.690871252105333e-07, "loss": 0.254, "step": 30727 }, { "epoch": 0.5341306123867963, "grad_norm": 1.6444807077673826, "learning_rate": 4.6905902977363534e-07, "loss": 0.191, "step": 30728 }, { "epoch": 0.5341479949242991, "grad_norm": 1.825366307635847, "learning_rate": 4.6903093443480616e-07, "loss": 0.1977, "step": 30729 }, { "epoch": 0.5341653774618019, "grad_norm": 1.5677711316515868, "learning_rate": 4.6900283919413456e-07, "loss": 0.3272, "step": 30730 }, { "epoch": 0.5341827599993046, "grad_norm": 1.5905945381028745, "learning_rate": 4.689747440517099e-07, "loss": 0.1726, "step": 30731 }, { "epoch": 0.5342001425368075, "grad_norm": 2.7438943335595987, "learning_rate": 4.6894664900762086e-07, "loss": 0.4228, "step": 30732 }, { "epoch": 0.5342175250743103, "grad_norm": 1.6302608702757049, "learning_rate": 4.6891855406195695e-07, "loss": 0.1827, "step": 30733 }, { "epoch": 0.5342349076118131, "grad_norm": 1.2992230869351, "learning_rate": 4.6889045921480685e-07, "loss": 0.1674, "step": 30734 }, { "epoch": 0.534252290149316, "grad_norm": 1.7775057129171896, "learning_rate": 4.6886236446625953e-07, "loss": 0.3134, "step": 30735 }, { "epoch": 0.5342696726868188, "grad_norm": 2.2291475965046703, "learning_rate": 4.688342698164044e-07, "loss": 0.2972, "step": 30736 }, { "epoch": 0.5342870552243216, "grad_norm": 1.6685856422213317, "learning_rate": 4.6880617526533023e-07, "loss": 0.1532, "step": 30737 }, { "epoch": 0.5343044377618245, "grad_norm": 1.73545090399262, "learning_rate": 4.687780808131261e-07, "loss": 0.2113, "step": 30738 }, { "epoch": 0.5343218202993273, "grad_norm": 1.7707802237715933, "learning_rate": 4.6874998645988136e-07, "loss": 0.2516, "step": 30739 }, { "epoch": 0.5343392028368301, "grad_norm": 2.229433281402527, "learning_rate": 4.6872189220568465e-07, "loss": 0.2112, "step": 30740 }, { "epoch": 0.534356585374333, "grad_norm": 1.8998454779458163, "learning_rate": 4.6869379805062505e-07, "loss": 0.3192, "step": 30741 }, { "epoch": 0.5343739679118358, "grad_norm": 2.0009857488705056, "learning_rate": 4.6866570399479187e-07, "loss": 0.2159, "step": 30742 }, { "epoch": 0.5343913504493386, "grad_norm": 1.8706973011525003, "learning_rate": 4.686376100382739e-07, "loss": 0.1892, "step": 30743 }, { "epoch": 0.5344087329868414, "grad_norm": 1.5273367057236893, "learning_rate": 4.686095161811604e-07, "loss": 0.2278, "step": 30744 }, { "epoch": 0.5344261155243443, "grad_norm": 0.9030449287633632, "learning_rate": 4.685814224235403e-07, "loss": 0.1387, "step": 30745 }, { "epoch": 0.5344434980618471, "grad_norm": 2.1836892393551275, "learning_rate": 4.685533287655028e-07, "loss": 0.4785, "step": 30746 }, { "epoch": 0.5344608805993499, "grad_norm": 1.319532340340316, "learning_rate": 4.685252352071366e-07, "loss": 0.2164, "step": 30747 }, { "epoch": 0.5344782631368528, "grad_norm": 1.9507968639513695, "learning_rate": 4.68497141748531e-07, "loss": 0.306, "step": 30748 }, { "epoch": 0.5344956456743556, "grad_norm": 2.356959400510714, "learning_rate": 4.684690483897749e-07, "loss": 0.2977, "step": 30749 }, { "epoch": 0.5345130282118584, "grad_norm": 2.138196684745905, "learning_rate": 4.6844095513095754e-07, "loss": 0.2465, "step": 30750 }, { "epoch": 0.5345304107493611, "grad_norm": 1.3990845151609494, "learning_rate": 4.6841286197216777e-07, "loss": 0.2542, "step": 30751 }, { "epoch": 0.534547793286864, "grad_norm": 2.335202008204754, "learning_rate": 4.68384768913495e-07, "loss": 0.2474, "step": 30752 }, { "epoch": 0.5345651758243668, "grad_norm": 1.35006933896458, "learning_rate": 4.683566759550277e-07, "loss": 0.1979, "step": 30753 }, { "epoch": 0.5345825583618696, "grad_norm": 2.879347568009613, "learning_rate": 4.6832858309685537e-07, "loss": 0.3369, "step": 30754 }, { "epoch": 0.5345999408993725, "grad_norm": 2.8056576484567266, "learning_rate": 4.6830049033906666e-07, "loss": 0.3157, "step": 30755 }, { "epoch": 0.5346173234368753, "grad_norm": 1.3280921458345532, "learning_rate": 4.68272397681751e-07, "loss": 0.1941, "step": 30756 }, { "epoch": 0.5346347059743781, "grad_norm": 2.1279068931386185, "learning_rate": 4.6824430512499714e-07, "loss": 0.2027, "step": 30757 }, { "epoch": 0.534652088511881, "grad_norm": 1.4026928514387755, "learning_rate": 4.6821621266889447e-07, "loss": 0.2069, "step": 30758 }, { "epoch": 0.5346694710493838, "grad_norm": 1.8903358825989396, "learning_rate": 4.681881203135317e-07, "loss": 0.2672, "step": 30759 }, { "epoch": 0.5346868535868866, "grad_norm": 1.3023341114247402, "learning_rate": 4.6816002805899794e-07, "loss": 0.2005, "step": 30760 }, { "epoch": 0.5347042361243894, "grad_norm": 1.83201960247268, "learning_rate": 4.6813193590538227e-07, "loss": 0.2566, "step": 30761 }, { "epoch": 0.5347216186618923, "grad_norm": 1.2918962983223485, "learning_rate": 4.681038438527737e-07, "loss": 0.3298, "step": 30762 }, { "epoch": 0.5347390011993951, "grad_norm": 3.7887622401217245, "learning_rate": 4.6807575190126125e-07, "loss": 0.3, "step": 30763 }, { "epoch": 0.5347563837368979, "grad_norm": 1.4780629417106548, "learning_rate": 4.680476600509341e-07, "loss": 0.3432, "step": 30764 }, { "epoch": 0.5347737662744008, "grad_norm": 1.641486269922541, "learning_rate": 4.6801956830188127e-07, "loss": 0.1833, "step": 30765 }, { "epoch": 0.5347911488119036, "grad_norm": 1.6191594726129885, "learning_rate": 4.6799147665419155e-07, "loss": 0.2045, "step": 30766 }, { "epoch": 0.5348085313494064, "grad_norm": 1.7294488882970913, "learning_rate": 4.679633851079542e-07, "loss": 0.4609, "step": 30767 }, { "epoch": 0.5348259138869093, "grad_norm": 1.637117027102665, "learning_rate": 4.679352936632582e-07, "loss": 0.166, "step": 30768 }, { "epoch": 0.5348432964244121, "grad_norm": 1.470068123592641, "learning_rate": 4.6790720232019256e-07, "loss": 0.1992, "step": 30769 }, { "epoch": 0.5348606789619149, "grad_norm": 2.85870005913096, "learning_rate": 4.6787911107884636e-07, "loss": 0.2767, "step": 30770 }, { "epoch": 0.5348780614994176, "grad_norm": 1.811644639046309, "learning_rate": 4.678510199393088e-07, "loss": 0.2069, "step": 30771 }, { "epoch": 0.5348954440369205, "grad_norm": 1.6212161431072822, "learning_rate": 4.6782292890166853e-07, "loss": 0.3389, "step": 30772 }, { "epoch": 0.5349128265744233, "grad_norm": 1.80983683877408, "learning_rate": 4.677948379660149e-07, "loss": 0.2526, "step": 30773 }, { "epoch": 0.5349302091119261, "grad_norm": 1.5388322874113527, "learning_rate": 4.677667471324367e-07, "loss": 0.3437, "step": 30774 }, { "epoch": 0.534947591649429, "grad_norm": 1.439936439460204, "learning_rate": 4.6773865640102317e-07, "loss": 0.2114, "step": 30775 }, { "epoch": 0.5349649741869318, "grad_norm": 1.8830146127901113, "learning_rate": 4.6771056577186326e-07, "loss": 0.1895, "step": 30776 }, { "epoch": 0.5349823567244346, "grad_norm": 1.2313703217607659, "learning_rate": 4.6768247524504626e-07, "loss": 0.2373, "step": 30777 }, { "epoch": 0.5349997392619374, "grad_norm": 1.4318767199905738, "learning_rate": 4.6765438482066074e-07, "loss": 0.262, "step": 30778 }, { "epoch": 0.5350171217994403, "grad_norm": 1.403928097291165, "learning_rate": 4.6762629449879596e-07, "loss": 0.1787, "step": 30779 }, { "epoch": 0.5350345043369431, "grad_norm": 1.3562854490545186, "learning_rate": 4.6759820427954096e-07, "loss": 0.2408, "step": 30780 }, { "epoch": 0.5350518868744459, "grad_norm": 1.4022586810212156, "learning_rate": 4.675701141629848e-07, "loss": 0.1363, "step": 30781 }, { "epoch": 0.5350692694119488, "grad_norm": 2.2272841480204812, "learning_rate": 4.675420241492164e-07, "loss": 0.229, "step": 30782 }, { "epoch": 0.5350866519494516, "grad_norm": 1.5108291860684329, "learning_rate": 4.6751393423832496e-07, "loss": 0.1485, "step": 30783 }, { "epoch": 0.5351040344869544, "grad_norm": 1.4877574183690052, "learning_rate": 4.674858444303996e-07, "loss": 0.2221, "step": 30784 }, { "epoch": 0.5351214170244573, "grad_norm": 1.787415248864068, "learning_rate": 4.67457754725529e-07, "loss": 0.3662, "step": 30785 }, { "epoch": 0.5351387995619601, "grad_norm": 1.3236175815517246, "learning_rate": 4.674296651238023e-07, "loss": 0.417, "step": 30786 }, { "epoch": 0.5351561820994629, "grad_norm": 1.4710988654996855, "learning_rate": 4.674015756253087e-07, "loss": 0.3971, "step": 30787 }, { "epoch": 0.5351735646369657, "grad_norm": 3.5532026711401894, "learning_rate": 4.6737348623013703e-07, "loss": 0.1867, "step": 30788 }, { "epoch": 0.5351909471744686, "grad_norm": 1.9238454537522043, "learning_rate": 4.673453969383765e-07, "loss": 0.4261, "step": 30789 }, { "epoch": 0.5352083297119714, "grad_norm": 2.0733985621622653, "learning_rate": 4.6731730775011613e-07, "loss": 0.2667, "step": 30790 }, { "epoch": 0.5352257122494741, "grad_norm": 1.5744642524683832, "learning_rate": 4.672892186654447e-07, "loss": 0.1808, "step": 30791 }, { "epoch": 0.535243094786977, "grad_norm": 1.7641215502478336, "learning_rate": 4.6726112968445157e-07, "loss": 0.3936, "step": 30792 }, { "epoch": 0.5352604773244798, "grad_norm": 1.285095081688962, "learning_rate": 4.672330408072256e-07, "loss": 0.1635, "step": 30793 }, { "epoch": 0.5352778598619826, "grad_norm": 1.879375967538343, "learning_rate": 4.672049520338557e-07, "loss": 0.1676, "step": 30794 }, { "epoch": 0.5352952423994854, "grad_norm": 2.0185161834357253, "learning_rate": 4.671768633644312e-07, "loss": 0.3139, "step": 30795 }, { "epoch": 0.5353126249369883, "grad_norm": 0.984537899147947, "learning_rate": 4.6714877479904106e-07, "loss": 0.2636, "step": 30796 }, { "epoch": 0.5353300074744911, "grad_norm": 1.4517064980300696, "learning_rate": 4.6712068633777394e-07, "loss": 0.3054, "step": 30797 }, { "epoch": 0.5353473900119939, "grad_norm": 1.840619979489129, "learning_rate": 4.670925979807193e-07, "loss": 0.3494, "step": 30798 }, { "epoch": 0.5353647725494968, "grad_norm": 3.827351407757228, "learning_rate": 4.670645097279659e-07, "loss": 0.3564, "step": 30799 }, { "epoch": 0.5353821550869996, "grad_norm": 1.4179538441837551, "learning_rate": 4.67036421579603e-07, "loss": 0.1875, "step": 30800 }, { "epoch": 0.5353995376245024, "grad_norm": 1.684406525770967, "learning_rate": 4.670083335357194e-07, "loss": 0.2564, "step": 30801 }, { "epoch": 0.5354169201620053, "grad_norm": 1.2659673731879395, "learning_rate": 4.6698024559640427e-07, "loss": 0.3082, "step": 30802 }, { "epoch": 0.5354343026995081, "grad_norm": 5.0799337143933245, "learning_rate": 4.6695215776174676e-07, "loss": 0.3384, "step": 30803 }, { "epoch": 0.5354516852370109, "grad_norm": 2.677490832284412, "learning_rate": 4.669240700318355e-07, "loss": 0.3918, "step": 30804 }, { "epoch": 0.5354690677745138, "grad_norm": 2.1010644532348217, "learning_rate": 4.668959824067598e-07, "loss": 0.2827, "step": 30805 }, { "epoch": 0.5354864503120166, "grad_norm": 1.462309995310369, "learning_rate": 4.6686789488660864e-07, "loss": 0.3681, "step": 30806 }, { "epoch": 0.5355038328495194, "grad_norm": 1.8059544525899696, "learning_rate": 4.66839807471471e-07, "loss": 0.1661, "step": 30807 }, { "epoch": 0.5355212153870222, "grad_norm": 2.428076782195786, "learning_rate": 4.66811720161436e-07, "loss": 0.166, "step": 30808 }, { "epoch": 0.5355385979245251, "grad_norm": 3.4716673876222712, "learning_rate": 4.667836329565927e-07, "loss": 0.2392, "step": 30809 }, { "epoch": 0.5355559804620278, "grad_norm": 2.505741725832565, "learning_rate": 4.667555458570299e-07, "loss": 0.335, "step": 30810 }, { "epoch": 0.5355733629995306, "grad_norm": 2.9885902040025507, "learning_rate": 4.667274588628367e-07, "loss": 0.2989, "step": 30811 }, { "epoch": 0.5355907455370335, "grad_norm": 1.381774368377889, "learning_rate": 4.6669937197410225e-07, "loss": 0.237, "step": 30812 }, { "epoch": 0.5356081280745363, "grad_norm": 1.631851129316384, "learning_rate": 4.666712851909154e-07, "loss": 0.1728, "step": 30813 }, { "epoch": 0.5356255106120391, "grad_norm": 1.280748358605488, "learning_rate": 4.666431985133653e-07, "loss": 0.2087, "step": 30814 }, { "epoch": 0.5356428931495419, "grad_norm": 2.0923819247583486, "learning_rate": 4.666151119415411e-07, "loss": 0.2247, "step": 30815 }, { "epoch": 0.5356602756870448, "grad_norm": 1.2440927836401572, "learning_rate": 4.665870254755316e-07, "loss": 0.2257, "step": 30816 }, { "epoch": 0.5356776582245476, "grad_norm": 1.26076427112868, "learning_rate": 4.665589391154257e-07, "loss": 0.1921, "step": 30817 }, { "epoch": 0.5356950407620504, "grad_norm": 1.4544249464919796, "learning_rate": 4.665308528613127e-07, "loss": 0.229, "step": 30818 }, { "epoch": 0.5357124232995533, "grad_norm": 1.519812194944558, "learning_rate": 4.665027667132815e-07, "loss": 0.1668, "step": 30819 }, { "epoch": 0.5357298058370561, "grad_norm": 1.421149465419217, "learning_rate": 4.664746806714212e-07, "loss": 0.2627, "step": 30820 }, { "epoch": 0.5357471883745589, "grad_norm": 2.2679217476647877, "learning_rate": 4.664465947358207e-07, "loss": 0.2193, "step": 30821 }, { "epoch": 0.5357645709120618, "grad_norm": 1.3206971871953772, "learning_rate": 4.664185089065692e-07, "loss": 0.3622, "step": 30822 }, { "epoch": 0.5357819534495646, "grad_norm": 1.7397626025339608, "learning_rate": 4.663904231837555e-07, "loss": 0.1892, "step": 30823 }, { "epoch": 0.5357993359870674, "grad_norm": 1.7695382267774793, "learning_rate": 4.663623375674688e-07, "loss": 0.2872, "step": 30824 }, { "epoch": 0.5358167185245702, "grad_norm": 0.9331377978495866, "learning_rate": 4.663342520577979e-07, "loss": 0.2725, "step": 30825 }, { "epoch": 0.5358341010620731, "grad_norm": 1.5754699498683804, "learning_rate": 4.66306166654832e-07, "loss": 0.2408, "step": 30826 }, { "epoch": 0.5358514835995759, "grad_norm": 1.1297846558011857, "learning_rate": 4.662780813586601e-07, "loss": 0.4289, "step": 30827 }, { "epoch": 0.5358688661370787, "grad_norm": 1.0670995264322811, "learning_rate": 4.6624999616937135e-07, "loss": 0.2932, "step": 30828 }, { "epoch": 0.5358862486745816, "grad_norm": 1.2945950007139535, "learning_rate": 4.6622191108705445e-07, "loss": 0.2715, "step": 30829 }, { "epoch": 0.5359036312120843, "grad_norm": 1.10296952570119, "learning_rate": 4.661938261117985e-07, "loss": 0.2004, "step": 30830 }, { "epoch": 0.5359210137495871, "grad_norm": 2.349252967673601, "learning_rate": 4.6616574124369266e-07, "loss": 0.2932, "step": 30831 }, { "epoch": 0.53593839628709, "grad_norm": 2.146429788379095, "learning_rate": 4.661376564828258e-07, "loss": 0.5337, "step": 30832 }, { "epoch": 0.5359557788245928, "grad_norm": 2.258078103677718, "learning_rate": 4.661095718292872e-07, "loss": 0.3905, "step": 30833 }, { "epoch": 0.5359731613620956, "grad_norm": 1.3515787160171921, "learning_rate": 4.660814872831657e-07, "loss": 0.2235, "step": 30834 }, { "epoch": 0.5359905438995984, "grad_norm": 1.0585890794697164, "learning_rate": 4.6605340284455027e-07, "loss": 0.2572, "step": 30835 }, { "epoch": 0.5360079264371013, "grad_norm": 1.207769949582617, "learning_rate": 4.6602531851352977e-07, "loss": 0.2517, "step": 30836 }, { "epoch": 0.5360253089746041, "grad_norm": 0.9523006780309423, "learning_rate": 4.6599723429019353e-07, "loss": 0.4328, "step": 30837 }, { "epoch": 0.5360426915121069, "grad_norm": 2.136580359638394, "learning_rate": 4.659691501746303e-07, "loss": 0.2121, "step": 30838 }, { "epoch": 0.5360600740496098, "grad_norm": 0.9904949224361134, "learning_rate": 4.6594106616692945e-07, "loss": 0.1394, "step": 30839 }, { "epoch": 0.5360774565871126, "grad_norm": 0.6339914025757012, "learning_rate": 4.659129822671798e-07, "loss": 0.1698, "step": 30840 }, { "epoch": 0.5360948391246154, "grad_norm": 1.1778280532289167, "learning_rate": 4.6588489847547026e-07, "loss": 0.1817, "step": 30841 }, { "epoch": 0.5361122216621182, "grad_norm": 9.50633947567031, "learning_rate": 4.6585681479188983e-07, "loss": 0.2627, "step": 30842 }, { "epoch": 0.5361296041996211, "grad_norm": 1.7643976903249725, "learning_rate": 4.6582873121652765e-07, "loss": 0.1983, "step": 30843 }, { "epoch": 0.5361469867371239, "grad_norm": 0.9951048536345772, "learning_rate": 4.6580064774947266e-07, "loss": 0.1919, "step": 30844 }, { "epoch": 0.5361643692746267, "grad_norm": 1.5413646640394367, "learning_rate": 4.6577256439081397e-07, "loss": 0.2985, "step": 30845 }, { "epoch": 0.5361817518121296, "grad_norm": 1.1987015552527183, "learning_rate": 4.6574448114064044e-07, "loss": 0.2723, "step": 30846 }, { "epoch": 0.5361991343496324, "grad_norm": 3.1764018443942925, "learning_rate": 4.657163979990414e-07, "loss": 0.3601, "step": 30847 }, { "epoch": 0.5362165168871352, "grad_norm": 1.300586116932106, "learning_rate": 4.656883149661055e-07, "loss": 0.2729, "step": 30848 }, { "epoch": 0.5362338994246381, "grad_norm": 1.4504116593059855, "learning_rate": 4.6566023204192183e-07, "loss": 0.1866, "step": 30849 }, { "epoch": 0.5362512819621408, "grad_norm": 1.3576058852736526, "learning_rate": 4.6563214922657944e-07, "loss": 0.3672, "step": 30850 }, { "epoch": 0.5362686644996436, "grad_norm": 1.3700828965288854, "learning_rate": 4.6560406652016735e-07, "loss": 0.1861, "step": 30851 }, { "epoch": 0.5362860470371464, "grad_norm": 1.152444680204388, "learning_rate": 4.655759839227746e-07, "loss": 0.1773, "step": 30852 }, { "epoch": 0.5363034295746493, "grad_norm": 1.272900670183918, "learning_rate": 4.655479014344903e-07, "loss": 0.1923, "step": 30853 }, { "epoch": 0.5363208121121521, "grad_norm": 1.3971378565990789, "learning_rate": 4.6551981905540324e-07, "loss": 0.248, "step": 30854 }, { "epoch": 0.5363381946496549, "grad_norm": 1.1958573965587083, "learning_rate": 4.654917367856024e-07, "loss": 0.4927, "step": 30855 }, { "epoch": 0.5363555771871578, "grad_norm": 2.356587961742087, "learning_rate": 4.6546365462517697e-07, "loss": 0.3122, "step": 30856 }, { "epoch": 0.5363729597246606, "grad_norm": 1.1798235833398085, "learning_rate": 4.654355725742159e-07, "loss": 0.2803, "step": 30857 }, { "epoch": 0.5363903422621634, "grad_norm": 1.946967367936269, "learning_rate": 4.654074906328081e-07, "loss": 0.2038, "step": 30858 }, { "epoch": 0.5364077247996663, "grad_norm": 1.4828098642323593, "learning_rate": 4.653794088010429e-07, "loss": 0.2902, "step": 30859 }, { "epoch": 0.5364251073371691, "grad_norm": 1.155933746053136, "learning_rate": 4.653513270790089e-07, "loss": 0.1949, "step": 30860 }, { "epoch": 0.5364424898746719, "grad_norm": 1.8696278065545018, "learning_rate": 4.6532324546679526e-07, "loss": 0.2179, "step": 30861 }, { "epoch": 0.5364598724121747, "grad_norm": 1.2592124345804645, "learning_rate": 4.652951639644911e-07, "loss": 0.2551, "step": 30862 }, { "epoch": 0.5364772549496776, "grad_norm": 1.6805364029798542, "learning_rate": 4.6526708257218514e-07, "loss": 0.2433, "step": 30863 }, { "epoch": 0.5364946374871804, "grad_norm": 1.981234428254257, "learning_rate": 4.652390012899667e-07, "loss": 0.6215, "step": 30864 }, { "epoch": 0.5365120200246832, "grad_norm": 1.0964159142226277, "learning_rate": 4.6521092011792474e-07, "loss": 0.2369, "step": 30865 }, { "epoch": 0.5365294025621861, "grad_norm": 1.4341049319761034, "learning_rate": 4.651828390561482e-07, "loss": 0.1293, "step": 30866 }, { "epoch": 0.5365467850996889, "grad_norm": 2.6360634452732623, "learning_rate": 4.651547581047259e-07, "loss": 0.2912, "step": 30867 }, { "epoch": 0.5365641676371917, "grad_norm": 1.3381350391075626, "learning_rate": 4.651266772637471e-07, "loss": 0.1854, "step": 30868 }, { "epoch": 0.5365815501746946, "grad_norm": 1.1912965847794568, "learning_rate": 4.6509859653330056e-07, "loss": 0.1158, "step": 30869 }, { "epoch": 0.5365989327121973, "grad_norm": 1.8225020129076772, "learning_rate": 4.650705159134756e-07, "loss": 0.2387, "step": 30870 }, { "epoch": 0.5366163152497001, "grad_norm": 1.9801268897898499, "learning_rate": 4.65042435404361e-07, "loss": 0.2276, "step": 30871 }, { "epoch": 0.5366336977872029, "grad_norm": 1.386857834910417, "learning_rate": 4.650143550060461e-07, "loss": 0.1567, "step": 30872 }, { "epoch": 0.5366510803247058, "grad_norm": 1.7656151788887722, "learning_rate": 4.649862747186193e-07, "loss": 0.217, "step": 30873 }, { "epoch": 0.5366684628622086, "grad_norm": 1.0438958218093173, "learning_rate": 4.6495819454217e-07, "loss": 0.2308, "step": 30874 }, { "epoch": 0.5366858453997114, "grad_norm": 1.4791396053520365, "learning_rate": 4.64930114476787e-07, "loss": 0.1819, "step": 30875 }, { "epoch": 0.5367032279372143, "grad_norm": 4.230324613741538, "learning_rate": 4.649020345225596e-07, "loss": 0.2733, "step": 30876 }, { "epoch": 0.5367206104747171, "grad_norm": 1.8186669838864147, "learning_rate": 4.648739546795765e-07, "loss": 0.2349, "step": 30877 }, { "epoch": 0.5367379930122199, "grad_norm": 2.1392273089516336, "learning_rate": 4.6484587494792706e-07, "loss": 0.3127, "step": 30878 }, { "epoch": 0.5367553755497227, "grad_norm": 1.9400165247519858, "learning_rate": 4.648177953276999e-07, "loss": 0.2304, "step": 30879 }, { "epoch": 0.5367727580872256, "grad_norm": 1.5803789303622453, "learning_rate": 4.647897158189841e-07, "loss": 0.1451, "step": 30880 }, { "epoch": 0.5367901406247284, "grad_norm": 0.8627150706321933, "learning_rate": 4.647616364218688e-07, "loss": 0.1715, "step": 30881 }, { "epoch": 0.5368075231622312, "grad_norm": 1.687817784539181, "learning_rate": 4.647335571364429e-07, "loss": 0.2544, "step": 30882 }, { "epoch": 0.5368249056997341, "grad_norm": 1.8906165773560566, "learning_rate": 4.6470547796279534e-07, "loss": 0.1829, "step": 30883 }, { "epoch": 0.5368422882372369, "grad_norm": 2.947845141716297, "learning_rate": 4.6467739890101535e-07, "loss": 0.366, "step": 30884 }, { "epoch": 0.5368596707747397, "grad_norm": 2.058884084210224, "learning_rate": 4.646493199511919e-07, "loss": 0.2289, "step": 30885 }, { "epoch": 0.5368770533122426, "grad_norm": 1.1328076895566834, "learning_rate": 4.646212411134136e-07, "loss": 0.2849, "step": 30886 }, { "epoch": 0.5368944358497454, "grad_norm": 2.370611485738962, "learning_rate": 4.6459316238776987e-07, "loss": 0.2134, "step": 30887 }, { "epoch": 0.5369118183872482, "grad_norm": 0.9477812410899369, "learning_rate": 4.645650837743494e-07, "loss": 0.3288, "step": 30888 }, { "epoch": 0.536929200924751, "grad_norm": 0.9499066304702549, "learning_rate": 4.6453700527324144e-07, "loss": 0.1649, "step": 30889 }, { "epoch": 0.5369465834622538, "grad_norm": 1.8502622504589286, "learning_rate": 4.645089268845349e-07, "loss": 0.2187, "step": 30890 }, { "epoch": 0.5369639659997566, "grad_norm": 1.160845727815117, "learning_rate": 4.6448084860831883e-07, "loss": 0.2372, "step": 30891 }, { "epoch": 0.5369813485372594, "grad_norm": 1.2026356461203265, "learning_rate": 4.64452770444682e-07, "loss": 0.2535, "step": 30892 }, { "epoch": 0.5369987310747623, "grad_norm": 2.3597698493582295, "learning_rate": 4.644246923937136e-07, "loss": 0.2472, "step": 30893 }, { "epoch": 0.5370161136122651, "grad_norm": 1.0741702220958131, "learning_rate": 4.6439661445550256e-07, "loss": 0.183, "step": 30894 }, { "epoch": 0.5370334961497679, "grad_norm": 1.623364702330058, "learning_rate": 4.6436853663013794e-07, "loss": 0.269, "step": 30895 }, { "epoch": 0.5370508786872707, "grad_norm": 2.6788377999761854, "learning_rate": 4.643404589177086e-07, "loss": 0.2226, "step": 30896 }, { "epoch": 0.5370682612247736, "grad_norm": 1.1230077227768105, "learning_rate": 4.64312381318304e-07, "loss": 0.3119, "step": 30897 }, { "epoch": 0.5370856437622764, "grad_norm": 1.857387406576968, "learning_rate": 4.642843038320123e-07, "loss": 0.2637, "step": 30898 }, { "epoch": 0.5371030262997792, "grad_norm": 1.2662280507230466, "learning_rate": 4.6425622645892316e-07, "loss": 0.2012, "step": 30899 }, { "epoch": 0.5371204088372821, "grad_norm": 1.9355088445171442, "learning_rate": 4.6422814919912525e-07, "loss": 0.1459, "step": 30900 }, { "epoch": 0.5371377913747849, "grad_norm": 4.282382548532049, "learning_rate": 4.642000720527078e-07, "loss": 0.3197, "step": 30901 }, { "epoch": 0.5371551739122877, "grad_norm": 1.2733954115230819, "learning_rate": 4.641719950197595e-07, "loss": 0.2268, "step": 30902 }, { "epoch": 0.5371725564497906, "grad_norm": 2.934893446640811, "learning_rate": 4.641439181003697e-07, "loss": 0.177, "step": 30903 }, { "epoch": 0.5371899389872934, "grad_norm": 1.034398337541478, "learning_rate": 4.6411584129462723e-07, "loss": 0.4603, "step": 30904 }, { "epoch": 0.5372073215247962, "grad_norm": 1.2905192055716075, "learning_rate": 4.64087764602621e-07, "loss": 0.2954, "step": 30905 }, { "epoch": 0.537224704062299, "grad_norm": 1.84059796420749, "learning_rate": 4.6405968802443994e-07, "loss": 0.3465, "step": 30906 }, { "epoch": 0.5372420865998019, "grad_norm": 1.537108142440624, "learning_rate": 4.6403161156017336e-07, "loss": 0.3413, "step": 30907 }, { "epoch": 0.5372594691373047, "grad_norm": 1.711106158559381, "learning_rate": 4.6400353520990984e-07, "loss": 0.2098, "step": 30908 }, { "epoch": 0.5372768516748075, "grad_norm": 1.1824423561430795, "learning_rate": 4.639754589737387e-07, "loss": 0.2496, "step": 30909 }, { "epoch": 0.5372942342123103, "grad_norm": 1.8018036096657992, "learning_rate": 4.639473828517489e-07, "loss": 0.2177, "step": 30910 }, { "epoch": 0.5373116167498131, "grad_norm": 1.0083782666688434, "learning_rate": 4.639193068440291e-07, "loss": 0.2278, "step": 30911 }, { "epoch": 0.5373289992873159, "grad_norm": 2.540950744273231, "learning_rate": 4.638912309506687e-07, "loss": 0.2208, "step": 30912 }, { "epoch": 0.5373463818248188, "grad_norm": 1.953024180570234, "learning_rate": 4.638631551717565e-07, "loss": 0.3396, "step": 30913 }, { "epoch": 0.5373637643623216, "grad_norm": 2.0385934066964966, "learning_rate": 4.6383507950738136e-07, "loss": 0.1584, "step": 30914 }, { "epoch": 0.5373811468998244, "grad_norm": 1.0701593493360193, "learning_rate": 4.638070039576325e-07, "loss": 0.2245, "step": 30915 }, { "epoch": 0.5373985294373272, "grad_norm": 2.549856633581074, "learning_rate": 4.6377892852259896e-07, "loss": 0.2118, "step": 30916 }, { "epoch": 0.5374159119748301, "grad_norm": 1.2096280658209193, "learning_rate": 4.637508532023694e-07, "loss": 0.1971, "step": 30917 }, { "epoch": 0.5374332945123329, "grad_norm": 1.8100812047688186, "learning_rate": 4.6372277799703305e-07, "loss": 0.3291, "step": 30918 }, { "epoch": 0.5374506770498357, "grad_norm": 1.02008963081775, "learning_rate": 4.636947029066787e-07, "loss": 0.2413, "step": 30919 }, { "epoch": 0.5374680595873386, "grad_norm": 1.399700041734647, "learning_rate": 4.6366662793139554e-07, "loss": 0.2443, "step": 30920 }, { "epoch": 0.5374854421248414, "grad_norm": 1.0055659925996567, "learning_rate": 4.636385530712724e-07, "loss": 0.3578, "step": 30921 }, { "epoch": 0.5375028246623442, "grad_norm": 1.400249039158934, "learning_rate": 4.636104783263985e-07, "loss": 0.2419, "step": 30922 }, { "epoch": 0.537520207199847, "grad_norm": 1.153181560046945, "learning_rate": 4.6358240369686275e-07, "loss": 0.2768, "step": 30923 }, { "epoch": 0.5375375897373499, "grad_norm": 2.495366310934207, "learning_rate": 4.6355432918275395e-07, "loss": 0.3089, "step": 30924 }, { "epoch": 0.5375549722748527, "grad_norm": 1.3286656962679197, "learning_rate": 4.63526254784161e-07, "loss": 0.2109, "step": 30925 }, { "epoch": 0.5375723548123555, "grad_norm": 1.9193807426523197, "learning_rate": 4.634981805011733e-07, "loss": 0.3687, "step": 30926 }, { "epoch": 0.5375897373498584, "grad_norm": 1.8202599178455043, "learning_rate": 4.634701063338794e-07, "loss": 0.3283, "step": 30927 }, { "epoch": 0.5376071198873612, "grad_norm": 1.1322970461543458, "learning_rate": 4.6344203228236864e-07, "loss": 0.2204, "step": 30928 }, { "epoch": 0.537624502424864, "grad_norm": 1.5925608034653742, "learning_rate": 4.634139583467299e-07, "loss": 0.2744, "step": 30929 }, { "epoch": 0.5376418849623668, "grad_norm": 2.527540286848692, "learning_rate": 4.633858845270521e-07, "loss": 0.3353, "step": 30930 }, { "epoch": 0.5376592674998696, "grad_norm": 1.5187874539050148, "learning_rate": 4.63357810823424e-07, "loss": 0.3277, "step": 30931 }, { "epoch": 0.5376766500373724, "grad_norm": 1.5548746078505347, "learning_rate": 4.6332973723593495e-07, "loss": 0.2099, "step": 30932 }, { "epoch": 0.5376940325748752, "grad_norm": 1.2246846689164455, "learning_rate": 4.6330166376467373e-07, "loss": 0.1228, "step": 30933 }, { "epoch": 0.5377114151123781, "grad_norm": 1.8485584405980122, "learning_rate": 4.632735904097294e-07, "loss": 0.2572, "step": 30934 }, { "epoch": 0.5377287976498809, "grad_norm": 2.459225927321551, "learning_rate": 4.632455171711911e-07, "loss": 0.3245, "step": 30935 }, { "epoch": 0.5377461801873837, "grad_norm": 1.6582085165749347, "learning_rate": 4.632174440491474e-07, "loss": 0.4645, "step": 30936 }, { "epoch": 0.5377635627248866, "grad_norm": 1.6882774294478275, "learning_rate": 4.631893710436876e-07, "loss": 0.2662, "step": 30937 }, { "epoch": 0.5377809452623894, "grad_norm": 1.3407470390944405, "learning_rate": 4.631612981549006e-07, "loss": 0.2756, "step": 30938 }, { "epoch": 0.5377983277998922, "grad_norm": 1.033370199275533, "learning_rate": 4.6313322538287524e-07, "loss": 0.2063, "step": 30939 }, { "epoch": 0.5378157103373951, "grad_norm": 1.4122190635333853, "learning_rate": 4.6310515272770073e-07, "loss": 0.212, "step": 30940 }, { "epoch": 0.5378330928748979, "grad_norm": 1.3540081650831663, "learning_rate": 4.630770801894658e-07, "loss": 0.2548, "step": 30941 }, { "epoch": 0.5378504754124007, "grad_norm": 3.1680526926638293, "learning_rate": 4.6304900776825984e-07, "loss": 0.2669, "step": 30942 }, { "epoch": 0.5378678579499035, "grad_norm": 2.8835373378446874, "learning_rate": 4.6302093546417144e-07, "loss": 0.298, "step": 30943 }, { "epoch": 0.5378852404874064, "grad_norm": 2.1258237213407867, "learning_rate": 4.6299286327728947e-07, "loss": 0.3382, "step": 30944 }, { "epoch": 0.5379026230249092, "grad_norm": 1.352618522506815, "learning_rate": 4.6296479120770336e-07, "loss": 0.2323, "step": 30945 }, { "epoch": 0.537920005562412, "grad_norm": 2.0601494899697337, "learning_rate": 4.629367192555018e-07, "loss": 0.2371, "step": 30946 }, { "epoch": 0.5379373880999149, "grad_norm": 1.5059247295092506, "learning_rate": 4.6290864742077375e-07, "loss": 0.1574, "step": 30947 }, { "epoch": 0.5379547706374177, "grad_norm": 2.548094694587628, "learning_rate": 4.628805757036084e-07, "loss": 0.2611, "step": 30948 }, { "epoch": 0.5379721531749204, "grad_norm": 1.5540156620205061, "learning_rate": 4.628525041040945e-07, "loss": 0.3132, "step": 30949 }, { "epoch": 0.5379895357124233, "grad_norm": 1.879817251337967, "learning_rate": 4.62824432622321e-07, "loss": 0.1801, "step": 30950 }, { "epoch": 0.5380069182499261, "grad_norm": 1.2573924498271325, "learning_rate": 4.62796361258377e-07, "loss": 0.1871, "step": 30951 }, { "epoch": 0.5380243007874289, "grad_norm": 1.513427021283336, "learning_rate": 4.627682900123514e-07, "loss": 0.1471, "step": 30952 }, { "epoch": 0.5380416833249317, "grad_norm": 1.7610162418746744, "learning_rate": 4.6274021888433334e-07, "loss": 0.2195, "step": 30953 }, { "epoch": 0.5380590658624346, "grad_norm": 1.3043177044883318, "learning_rate": 4.6271214787441175e-07, "loss": 0.2186, "step": 30954 }, { "epoch": 0.5380764483999374, "grad_norm": 1.2513476064029274, "learning_rate": 4.626840769826754e-07, "loss": 0.2806, "step": 30955 }, { "epoch": 0.5380938309374402, "grad_norm": 1.9580036316472453, "learning_rate": 4.6265600620921333e-07, "loss": 0.2041, "step": 30956 }, { "epoch": 0.5381112134749431, "grad_norm": 1.5579593654534285, "learning_rate": 4.6262793555411464e-07, "loss": 0.3253, "step": 30957 }, { "epoch": 0.5381285960124459, "grad_norm": 1.1454479019624058, "learning_rate": 4.6259986501746816e-07, "loss": 0.1696, "step": 30958 }, { "epoch": 0.5381459785499487, "grad_norm": 1.9676513813703331, "learning_rate": 4.6257179459936294e-07, "loss": 0.2132, "step": 30959 }, { "epoch": 0.5381633610874516, "grad_norm": 2.4548669813264423, "learning_rate": 4.625437242998881e-07, "loss": 0.189, "step": 30960 }, { "epoch": 0.5381807436249544, "grad_norm": 1.3272237700684473, "learning_rate": 4.625156541191324e-07, "loss": 0.3024, "step": 30961 }, { "epoch": 0.5381981261624572, "grad_norm": 2.0255318592668914, "learning_rate": 4.624875840571847e-07, "loss": 0.3113, "step": 30962 }, { "epoch": 0.53821550869996, "grad_norm": 1.7864210677050334, "learning_rate": 4.6245951411413426e-07, "loss": 0.2905, "step": 30963 }, { "epoch": 0.5382328912374629, "grad_norm": 1.5065617865121006, "learning_rate": 4.6243144429006976e-07, "loss": 0.1869, "step": 30964 }, { "epoch": 0.5382502737749657, "grad_norm": 1.9165214783282838, "learning_rate": 4.6240337458508046e-07, "loss": 0.327, "step": 30965 }, { "epoch": 0.5382676563124685, "grad_norm": 1.3980147522894788, "learning_rate": 4.6237530499925504e-07, "loss": 0.327, "step": 30966 }, { "epoch": 0.5382850388499714, "grad_norm": 1.7724455181900227, "learning_rate": 4.623472355326829e-07, "loss": 0.2619, "step": 30967 }, { "epoch": 0.5383024213874742, "grad_norm": 1.8749483667390465, "learning_rate": 4.623191661854526e-07, "loss": 0.213, "step": 30968 }, { "epoch": 0.5383198039249769, "grad_norm": 1.0108966494648564, "learning_rate": 4.622910969576531e-07, "loss": 0.1464, "step": 30969 }, { "epoch": 0.5383371864624797, "grad_norm": 0.9778399027908042, "learning_rate": 4.622630278493736e-07, "loss": 0.247, "step": 30970 }, { "epoch": 0.5383545689999826, "grad_norm": 1.4482996087934095, "learning_rate": 4.62234958860703e-07, "loss": 0.2464, "step": 30971 }, { "epoch": 0.5383719515374854, "grad_norm": 1.432499655017708, "learning_rate": 4.6220688999173015e-07, "loss": 0.2489, "step": 30972 }, { "epoch": 0.5383893340749882, "grad_norm": 1.758490310978925, "learning_rate": 4.6217882124254427e-07, "loss": 0.2771, "step": 30973 }, { "epoch": 0.5384067166124911, "grad_norm": 1.3264464539150416, "learning_rate": 4.6215075261323403e-07, "loss": 0.3025, "step": 30974 }, { "epoch": 0.5384240991499939, "grad_norm": 1.4451251834529832, "learning_rate": 4.6212268410388844e-07, "loss": 0.2194, "step": 30975 }, { "epoch": 0.5384414816874967, "grad_norm": 1.9769499610546195, "learning_rate": 4.6209461571459664e-07, "loss": 0.2225, "step": 30976 }, { "epoch": 0.5384588642249996, "grad_norm": 2.46595332746619, "learning_rate": 4.6206654744544735e-07, "loss": 0.2467, "step": 30977 }, { "epoch": 0.5384762467625024, "grad_norm": 1.289114350541702, "learning_rate": 4.620384792965299e-07, "loss": 0.324, "step": 30978 }, { "epoch": 0.5384936293000052, "grad_norm": 1.5380083850608874, "learning_rate": 4.6201041126793304e-07, "loss": 0.1685, "step": 30979 }, { "epoch": 0.538511011837508, "grad_norm": 2.7574186135591647, "learning_rate": 4.619823433597457e-07, "loss": 0.3529, "step": 30980 }, { "epoch": 0.5385283943750109, "grad_norm": 1.1968926806258533, "learning_rate": 4.619542755720567e-07, "loss": 0.1286, "step": 30981 }, { "epoch": 0.5385457769125137, "grad_norm": 2.0129151706513992, "learning_rate": 4.6192620790495524e-07, "loss": 0.2184, "step": 30982 }, { "epoch": 0.5385631594500165, "grad_norm": 1.9215856369081266, "learning_rate": 4.6189814035853015e-07, "loss": 0.2683, "step": 30983 }, { "epoch": 0.5385805419875194, "grad_norm": 1.3459079238310727, "learning_rate": 4.6187007293287057e-07, "loss": 0.5378, "step": 30984 }, { "epoch": 0.5385979245250222, "grad_norm": 0.9218293504385556, "learning_rate": 4.618420056280652e-07, "loss": 0.2185, "step": 30985 }, { "epoch": 0.538615307062525, "grad_norm": 2.047093692221677, "learning_rate": 4.6181393844420353e-07, "loss": 0.3169, "step": 30986 }, { "epoch": 0.5386326896000279, "grad_norm": 2.056000260440658, "learning_rate": 4.6178587138137374e-07, "loss": 0.2638, "step": 30987 }, { "epoch": 0.5386500721375307, "grad_norm": 3.254074468485811, "learning_rate": 4.617578044396653e-07, "loss": 0.2814, "step": 30988 }, { "epoch": 0.5386674546750334, "grad_norm": 1.96011399028822, "learning_rate": 4.61729737619167e-07, "loss": 0.3406, "step": 30989 }, { "epoch": 0.5386848372125362, "grad_norm": 1.9575186950061696, "learning_rate": 4.617016709199679e-07, "loss": 0.3651, "step": 30990 }, { "epoch": 0.5387022197500391, "grad_norm": 1.7540381700058767, "learning_rate": 4.6167360434215687e-07, "loss": 0.4764, "step": 30991 }, { "epoch": 0.5387196022875419, "grad_norm": 6.485522753843154, "learning_rate": 4.616455378858231e-07, "loss": 0.2938, "step": 30992 }, { "epoch": 0.5387369848250447, "grad_norm": 1.9382062843361805, "learning_rate": 4.6161747155105524e-07, "loss": 0.3505, "step": 30993 }, { "epoch": 0.5387543673625476, "grad_norm": 1.7107970399846233, "learning_rate": 4.6158940533794233e-07, "loss": 0.3336, "step": 30994 }, { "epoch": 0.5387717499000504, "grad_norm": 1.4542557973436194, "learning_rate": 4.615613392465733e-07, "loss": 0.335, "step": 30995 }, { "epoch": 0.5387891324375532, "grad_norm": 1.4327710736388626, "learning_rate": 4.615332732770372e-07, "loss": 0.267, "step": 30996 }, { "epoch": 0.538806514975056, "grad_norm": 1.6057849982650116, "learning_rate": 4.615052074294229e-07, "loss": 0.1814, "step": 30997 }, { "epoch": 0.5388238975125589, "grad_norm": 1.208686769403466, "learning_rate": 4.6147714170381967e-07, "loss": 0.2475, "step": 30998 }, { "epoch": 0.5388412800500617, "grad_norm": 1.4752463726078042, "learning_rate": 4.614490761003161e-07, "loss": 0.1814, "step": 30999 }, { "epoch": 0.5388586625875645, "grad_norm": 1.4125161738113519, "learning_rate": 4.6142101061900106e-07, "loss": 0.1751, "step": 31000 }, { "epoch": 0.5388760451250674, "grad_norm": 0.8083580238224619, "learning_rate": 4.613929452599638e-07, "loss": 0.1603, "step": 31001 }, { "epoch": 0.5388934276625702, "grad_norm": 1.9111828500881618, "learning_rate": 4.613648800232932e-07, "loss": 0.323, "step": 31002 }, { "epoch": 0.538910810200073, "grad_norm": 2.277962443187513, "learning_rate": 4.613368149090781e-07, "loss": 0.267, "step": 31003 }, { "epoch": 0.5389281927375759, "grad_norm": 1.5894915416844109, "learning_rate": 4.6130874991740763e-07, "loss": 0.2606, "step": 31004 }, { "epoch": 0.5389455752750787, "grad_norm": 1.523016161595886, "learning_rate": 4.612806850483708e-07, "loss": 0.2673, "step": 31005 }, { "epoch": 0.5389629578125815, "grad_norm": 2.1637347780155474, "learning_rate": 4.6125262030205614e-07, "loss": 0.2853, "step": 31006 }, { "epoch": 0.5389803403500844, "grad_norm": 1.63818744978265, "learning_rate": 4.6122455567855296e-07, "loss": 0.2192, "step": 31007 }, { "epoch": 0.5389977228875872, "grad_norm": 1.7028636811527424, "learning_rate": 4.611964911779501e-07, "loss": 0.2837, "step": 31008 }, { "epoch": 0.5390151054250899, "grad_norm": 1.7573817489325494, "learning_rate": 4.6116842680033655e-07, "loss": 0.1442, "step": 31009 }, { "epoch": 0.5390324879625927, "grad_norm": 1.438458049552515, "learning_rate": 4.611403625458012e-07, "loss": 0.214, "step": 31010 }, { "epoch": 0.5390498705000956, "grad_norm": 1.2473687387826409, "learning_rate": 4.611122984144333e-07, "loss": 0.2246, "step": 31011 }, { "epoch": 0.5390672530375984, "grad_norm": 1.6933737241424478, "learning_rate": 4.6108423440632134e-07, "loss": 0.1924, "step": 31012 }, { "epoch": 0.5390846355751012, "grad_norm": 2.03724798737838, "learning_rate": 4.6105617052155456e-07, "loss": 0.1935, "step": 31013 }, { "epoch": 0.539102018112604, "grad_norm": 1.66022241028414, "learning_rate": 4.610281067602217e-07, "loss": 0.207, "step": 31014 }, { "epoch": 0.5391194006501069, "grad_norm": 1.4768695701205088, "learning_rate": 4.6100004312241194e-07, "loss": 0.292, "step": 31015 }, { "epoch": 0.5391367831876097, "grad_norm": 1.0638754523857938, "learning_rate": 4.60971979608214e-07, "loss": 0.1241, "step": 31016 }, { "epoch": 0.5391541657251125, "grad_norm": 1.4700299001936468, "learning_rate": 4.6094391621771723e-07, "loss": 0.2215, "step": 31017 }, { "epoch": 0.5391715482626154, "grad_norm": 2.5643897446444175, "learning_rate": 4.609158529510102e-07, "loss": 0.4473, "step": 31018 }, { "epoch": 0.5391889308001182, "grad_norm": 1.6448712874692082, "learning_rate": 4.608877898081819e-07, "loss": 0.1881, "step": 31019 }, { "epoch": 0.539206313337621, "grad_norm": 1.511248173094535, "learning_rate": 4.6085972678932123e-07, "loss": 0.3185, "step": 31020 }, { "epoch": 0.5392236958751239, "grad_norm": 1.2036197470059193, "learning_rate": 4.608316638945174e-07, "loss": 0.2329, "step": 31021 }, { "epoch": 0.5392410784126267, "grad_norm": 2.331598389341729, "learning_rate": 4.6080360112385907e-07, "loss": 0.2712, "step": 31022 }, { "epoch": 0.5392584609501295, "grad_norm": 0.953775308564847, "learning_rate": 4.6077553847743546e-07, "loss": 0.2467, "step": 31023 }, { "epoch": 0.5392758434876324, "grad_norm": 1.1894341967735966, "learning_rate": 4.607474759553354e-07, "loss": 0.3635, "step": 31024 }, { "epoch": 0.5392932260251352, "grad_norm": 1.386228566188082, "learning_rate": 4.607194135576477e-07, "loss": 0.2712, "step": 31025 }, { "epoch": 0.539310608562638, "grad_norm": 1.2224320264354085, "learning_rate": 4.6069135128446147e-07, "loss": 0.2428, "step": 31026 }, { "epoch": 0.5393279911001408, "grad_norm": 0.9293439924722319, "learning_rate": 4.606632891358656e-07, "loss": 0.2192, "step": 31027 }, { "epoch": 0.5393453736376437, "grad_norm": 2.3050924207300127, "learning_rate": 4.6063522711194893e-07, "loss": 0.3785, "step": 31028 }, { "epoch": 0.5393627561751464, "grad_norm": 2.5166395233473415, "learning_rate": 4.6060716521280065e-07, "loss": 0.3467, "step": 31029 }, { "epoch": 0.5393801387126492, "grad_norm": 2.7271551495200304, "learning_rate": 4.605791034385096e-07, "loss": 0.359, "step": 31030 }, { "epoch": 0.5393975212501521, "grad_norm": 2.215969752415863, "learning_rate": 4.605510417891645e-07, "loss": 0.2237, "step": 31031 }, { "epoch": 0.5394149037876549, "grad_norm": 2.2530989958863983, "learning_rate": 4.605229802648546e-07, "loss": 0.3748, "step": 31032 }, { "epoch": 0.5394322863251577, "grad_norm": 2.2118027783093313, "learning_rate": 4.604949188656686e-07, "loss": 0.1858, "step": 31033 }, { "epoch": 0.5394496688626605, "grad_norm": 1.707808383826615, "learning_rate": 4.604668575916957e-07, "loss": 0.256, "step": 31034 }, { "epoch": 0.5394670514001634, "grad_norm": 1.4041241381469793, "learning_rate": 4.6043879644302463e-07, "loss": 0.1522, "step": 31035 }, { "epoch": 0.5394844339376662, "grad_norm": 1.3243814891203256, "learning_rate": 4.604107354197446e-07, "loss": 0.2854, "step": 31036 }, { "epoch": 0.539501816475169, "grad_norm": 1.3174285627862146, "learning_rate": 4.6038267452194404e-07, "loss": 0.3838, "step": 31037 }, { "epoch": 0.5395191990126719, "grad_norm": 1.8999700340958376, "learning_rate": 4.603546137497124e-07, "loss": 0.1984, "step": 31038 }, { "epoch": 0.5395365815501747, "grad_norm": 1.2557965814620318, "learning_rate": 4.603265531031382e-07, "loss": 0.1793, "step": 31039 }, { "epoch": 0.5395539640876775, "grad_norm": 1.115037019254607, "learning_rate": 4.602984925823108e-07, "loss": 0.256, "step": 31040 }, { "epoch": 0.5395713466251804, "grad_norm": 1.5177974038245066, "learning_rate": 4.602704321873188e-07, "loss": 0.2053, "step": 31041 }, { "epoch": 0.5395887291626832, "grad_norm": 1.6295604885789041, "learning_rate": 4.602423719182514e-07, "loss": 0.2339, "step": 31042 }, { "epoch": 0.539606111700186, "grad_norm": 2.6682049298062522, "learning_rate": 4.6021431177519755e-07, "loss": 0.2341, "step": 31043 }, { "epoch": 0.5396234942376888, "grad_norm": 0.870843919187461, "learning_rate": 4.601862517582459e-07, "loss": 0.2689, "step": 31044 }, { "epoch": 0.5396408767751917, "grad_norm": 1.0159174985661705, "learning_rate": 4.601581918674854e-07, "loss": 0.2167, "step": 31045 }, { "epoch": 0.5396582593126945, "grad_norm": 1.4711700811898119, "learning_rate": 4.601301321030054e-07, "loss": 0.1928, "step": 31046 }, { "epoch": 0.5396756418501973, "grad_norm": 1.56819109583955, "learning_rate": 4.601020724648943e-07, "loss": 0.1501, "step": 31047 }, { "epoch": 0.5396930243877002, "grad_norm": 1.4893283175664107, "learning_rate": 4.600740129532415e-07, "loss": 0.3506, "step": 31048 }, { "epoch": 0.5397104069252029, "grad_norm": 2.1607478825520374, "learning_rate": 4.600459535681358e-07, "loss": 0.234, "step": 31049 }, { "epoch": 0.5397277894627057, "grad_norm": 1.6016811490589946, "learning_rate": 4.60017894309666e-07, "loss": 0.1976, "step": 31050 }, { "epoch": 0.5397451720002086, "grad_norm": 2.6273401392280586, "learning_rate": 4.5998983517792093e-07, "loss": 0.3444, "step": 31051 }, { "epoch": 0.5397625545377114, "grad_norm": 1.4109525070043671, "learning_rate": 4.599617761729899e-07, "loss": 0.2532, "step": 31052 }, { "epoch": 0.5397799370752142, "grad_norm": 1.8644647792297184, "learning_rate": 4.599337172949615e-07, "loss": 0.1519, "step": 31053 }, { "epoch": 0.539797319612717, "grad_norm": 1.1377408304983077, "learning_rate": 4.599056585439249e-07, "loss": 0.3864, "step": 31054 }, { "epoch": 0.5398147021502199, "grad_norm": 1.3228991508772208, "learning_rate": 4.598775999199691e-07, "loss": 0.1706, "step": 31055 }, { "epoch": 0.5398320846877227, "grad_norm": 1.3053577271220929, "learning_rate": 4.598495414231826e-07, "loss": 0.1771, "step": 31056 }, { "epoch": 0.5398494672252255, "grad_norm": 1.8682344435467975, "learning_rate": 4.5982148305365475e-07, "loss": 0.3092, "step": 31057 }, { "epoch": 0.5398668497627284, "grad_norm": 1.9855453415819355, "learning_rate": 4.597934248114744e-07, "loss": 0.2278, "step": 31058 }, { "epoch": 0.5398842323002312, "grad_norm": 1.981029018642834, "learning_rate": 4.597653666967302e-07, "loss": 0.1812, "step": 31059 }, { "epoch": 0.539901614837734, "grad_norm": 0.8311394590413452, "learning_rate": 4.5973730870951147e-07, "loss": 0.1733, "step": 31060 }, { "epoch": 0.5399189973752369, "grad_norm": 1.9689938799964248, "learning_rate": 4.5970925084990694e-07, "loss": 0.2685, "step": 31061 }, { "epoch": 0.5399363799127397, "grad_norm": 1.482029681524849, "learning_rate": 4.596811931180057e-07, "loss": 0.1634, "step": 31062 }, { "epoch": 0.5399537624502425, "grad_norm": 1.5464496004203083, "learning_rate": 4.5965313551389643e-07, "loss": 0.2427, "step": 31063 }, { "epoch": 0.5399711449877453, "grad_norm": 3.002615758885791, "learning_rate": 4.5962507803766817e-07, "loss": 0.327, "step": 31064 }, { "epoch": 0.5399885275252482, "grad_norm": 2.3542483214224053, "learning_rate": 4.595970206894099e-07, "loss": 0.3057, "step": 31065 }, { "epoch": 0.540005910062751, "grad_norm": 2.390306077179725, "learning_rate": 4.5956896346921044e-07, "loss": 0.2273, "step": 31066 }, { "epoch": 0.5400232926002538, "grad_norm": 2.0510801573151998, "learning_rate": 4.595409063771589e-07, "loss": 0.3665, "step": 31067 }, { "epoch": 0.5400406751377567, "grad_norm": 1.3725894991311731, "learning_rate": 4.5951284941334417e-07, "loss": 0.3102, "step": 31068 }, { "epoch": 0.5400580576752594, "grad_norm": 1.5859127849785615, "learning_rate": 4.5948479257785504e-07, "loss": 0.3409, "step": 31069 }, { "epoch": 0.5400754402127622, "grad_norm": 1.669823907832486, "learning_rate": 4.5945673587078044e-07, "loss": 0.3522, "step": 31070 }, { "epoch": 0.540092822750265, "grad_norm": 1.7956478331021453, "learning_rate": 4.5942867929220943e-07, "loss": 0.3196, "step": 31071 }, { "epoch": 0.5401102052877679, "grad_norm": 1.475642961321935, "learning_rate": 4.5940062284223067e-07, "loss": 0.2035, "step": 31072 }, { "epoch": 0.5401275878252707, "grad_norm": 3.718759156142815, "learning_rate": 4.5937256652093353e-07, "loss": 0.2789, "step": 31073 }, { "epoch": 0.5401449703627735, "grad_norm": 1.7836311689363131, "learning_rate": 4.593445103284068e-07, "loss": 0.2249, "step": 31074 }, { "epoch": 0.5401623529002764, "grad_norm": 1.8467561690129797, "learning_rate": 4.593164542647391e-07, "loss": 0.2189, "step": 31075 }, { "epoch": 0.5401797354377792, "grad_norm": 1.7460216607512082, "learning_rate": 4.592883983300195e-07, "loss": 0.2155, "step": 31076 }, { "epoch": 0.540197117975282, "grad_norm": 1.6417673846408931, "learning_rate": 4.59260342524337e-07, "loss": 0.2374, "step": 31077 }, { "epoch": 0.5402145005127849, "grad_norm": 2.4099058017996287, "learning_rate": 4.592322868477805e-07, "loss": 0.3223, "step": 31078 }, { "epoch": 0.5402318830502877, "grad_norm": 1.577171894681766, "learning_rate": 4.5920423130043896e-07, "loss": 0.3262, "step": 31079 }, { "epoch": 0.5402492655877905, "grad_norm": 0.7958320373584068, "learning_rate": 4.5917617588240145e-07, "loss": 0.2541, "step": 31080 }, { "epoch": 0.5402666481252933, "grad_norm": 1.3959124769405735, "learning_rate": 4.591481205937564e-07, "loss": 0.1983, "step": 31081 }, { "epoch": 0.5402840306627962, "grad_norm": 3.224187697538628, "learning_rate": 4.591200654345932e-07, "loss": 0.2175, "step": 31082 }, { "epoch": 0.540301413200299, "grad_norm": 1.1036767933420806, "learning_rate": 4.590920104050006e-07, "loss": 0.2472, "step": 31083 }, { "epoch": 0.5403187957378018, "grad_norm": 2.5298891991095425, "learning_rate": 4.5906395550506747e-07, "loss": 0.4841, "step": 31084 }, { "epoch": 0.5403361782753047, "grad_norm": 1.2132939774007296, "learning_rate": 4.5903590073488284e-07, "loss": 0.2827, "step": 31085 }, { "epoch": 0.5403535608128075, "grad_norm": 2.103870962998687, "learning_rate": 4.5900784609453555e-07, "loss": 0.2482, "step": 31086 }, { "epoch": 0.5403709433503103, "grad_norm": 2.266548834946128, "learning_rate": 4.589797915841147e-07, "loss": 0.205, "step": 31087 }, { "epoch": 0.540388325887813, "grad_norm": 2.336668161037095, "learning_rate": 4.58951737203709e-07, "loss": 0.4469, "step": 31088 }, { "epoch": 0.5404057084253159, "grad_norm": 2.246125338508085, "learning_rate": 4.589236829534073e-07, "loss": 0.3778, "step": 31089 }, { "epoch": 0.5404230909628187, "grad_norm": 1.7062769001886244, "learning_rate": 4.588956288332988e-07, "loss": 0.2258, "step": 31090 }, { "epoch": 0.5404404735003215, "grad_norm": 1.258433724946301, "learning_rate": 4.5886757484347225e-07, "loss": 0.1522, "step": 31091 }, { "epoch": 0.5404578560378244, "grad_norm": 1.1069492704518922, "learning_rate": 4.588395209840165e-07, "loss": 0.287, "step": 31092 }, { "epoch": 0.5404752385753272, "grad_norm": 2.5635983403589724, "learning_rate": 4.588114672550208e-07, "loss": 0.3081, "step": 31093 }, { "epoch": 0.54049262111283, "grad_norm": 1.7692918911377098, "learning_rate": 4.587834136565737e-07, "loss": 0.2281, "step": 31094 }, { "epoch": 0.5405100036503329, "grad_norm": 1.659804042404163, "learning_rate": 4.5875536018876417e-07, "loss": 0.1926, "step": 31095 }, { "epoch": 0.5405273861878357, "grad_norm": 2.5392554971163306, "learning_rate": 4.5872730685168125e-07, "loss": 0.3409, "step": 31096 }, { "epoch": 0.5405447687253385, "grad_norm": 2.262018762977458, "learning_rate": 4.5869925364541374e-07, "loss": 0.2663, "step": 31097 }, { "epoch": 0.5405621512628414, "grad_norm": 1.2925758463623471, "learning_rate": 4.586712005700507e-07, "loss": 0.3221, "step": 31098 }, { "epoch": 0.5405795338003442, "grad_norm": 2.107785919733144, "learning_rate": 4.5864314762568107e-07, "loss": 0.2621, "step": 31099 }, { "epoch": 0.540596916337847, "grad_norm": 1.272305048439802, "learning_rate": 4.586150948123936e-07, "loss": 0.2818, "step": 31100 }, { "epoch": 0.5406142988753498, "grad_norm": 1.2990127370250717, "learning_rate": 4.585870421302771e-07, "loss": 0.3737, "step": 31101 }, { "epoch": 0.5406316814128527, "grad_norm": 1.2804168305701733, "learning_rate": 4.5855898957942086e-07, "loss": 0.3223, "step": 31102 }, { "epoch": 0.5406490639503555, "grad_norm": 3.1123037874835546, "learning_rate": 4.585309371599134e-07, "loss": 0.2699, "step": 31103 }, { "epoch": 0.5406664464878583, "grad_norm": 1.2473873771769173, "learning_rate": 4.5850288487184393e-07, "loss": 0.1688, "step": 31104 }, { "epoch": 0.5406838290253612, "grad_norm": 1.6820212666116539, "learning_rate": 4.5847483271530117e-07, "loss": 0.2482, "step": 31105 }, { "epoch": 0.540701211562864, "grad_norm": 1.8934710215072295, "learning_rate": 4.584467806903744e-07, "loss": 0.2936, "step": 31106 }, { "epoch": 0.5407185941003668, "grad_norm": 1.5341608277730208, "learning_rate": 4.58418728797152e-07, "loss": 0.3769, "step": 31107 }, { "epoch": 0.5407359766378695, "grad_norm": 2.8915745412992755, "learning_rate": 4.583906770357232e-07, "loss": 0.2369, "step": 31108 }, { "epoch": 0.5407533591753724, "grad_norm": 1.941259591649975, "learning_rate": 4.5836262540617673e-07, "loss": 0.3351, "step": 31109 }, { "epoch": 0.5407707417128752, "grad_norm": 2.8507059100411936, "learning_rate": 4.583345739086016e-07, "loss": 0.274, "step": 31110 }, { "epoch": 0.540788124250378, "grad_norm": 1.516697425944086, "learning_rate": 4.583065225430868e-07, "loss": 0.3069, "step": 31111 }, { "epoch": 0.5408055067878809, "grad_norm": 2.412704126498704, "learning_rate": 4.582784713097213e-07, "loss": 0.2921, "step": 31112 }, { "epoch": 0.5408228893253837, "grad_norm": 1.4895929668935315, "learning_rate": 4.5825042020859383e-07, "loss": 0.1501, "step": 31113 }, { "epoch": 0.5408402718628865, "grad_norm": 1.292795338164294, "learning_rate": 4.582223692397932e-07, "loss": 0.1819, "step": 31114 }, { "epoch": 0.5408576544003894, "grad_norm": 2.137874298564544, "learning_rate": 4.5819431840340854e-07, "loss": 0.2569, "step": 31115 }, { "epoch": 0.5408750369378922, "grad_norm": 2.459458038729371, "learning_rate": 4.5816626769952877e-07, "loss": 0.3911, "step": 31116 }, { "epoch": 0.540892419475395, "grad_norm": 3.622687252045878, "learning_rate": 4.581382171282426e-07, "loss": 0.3015, "step": 31117 }, { "epoch": 0.5409098020128978, "grad_norm": 1.6743568479359872, "learning_rate": 4.581101666896392e-07, "loss": 0.3626, "step": 31118 }, { "epoch": 0.5409271845504007, "grad_norm": 1.3519298376564033, "learning_rate": 4.5808211638380723e-07, "loss": 0.1775, "step": 31119 }, { "epoch": 0.5409445670879035, "grad_norm": 0.9127500524560791, "learning_rate": 4.5805406621083564e-07, "loss": 0.1973, "step": 31120 }, { "epoch": 0.5409619496254063, "grad_norm": 1.3692220657369796, "learning_rate": 4.580260161708135e-07, "loss": 0.2539, "step": 31121 }, { "epoch": 0.5409793321629092, "grad_norm": 1.4561957645790387, "learning_rate": 4.579979662638294e-07, "loss": 0.2443, "step": 31122 }, { "epoch": 0.540996714700412, "grad_norm": 2.231213284496794, "learning_rate": 4.579699164899727e-07, "loss": 0.2785, "step": 31123 }, { "epoch": 0.5410140972379148, "grad_norm": 1.2087855896815083, "learning_rate": 4.5794186684933193e-07, "loss": 0.2181, "step": 31124 }, { "epoch": 0.5410314797754177, "grad_norm": 1.6167305605143465, "learning_rate": 4.579138173419963e-07, "loss": 0.3448, "step": 31125 }, { "epoch": 0.5410488623129205, "grad_norm": 2.0309403003329662, "learning_rate": 4.5788576796805434e-07, "loss": 0.322, "step": 31126 }, { "epoch": 0.5410662448504233, "grad_norm": 1.8897905047810586, "learning_rate": 4.578577187275952e-07, "loss": 0.2648, "step": 31127 }, { "epoch": 0.541083627387926, "grad_norm": 3.465814897211376, "learning_rate": 4.578296696207077e-07, "loss": 0.3763, "step": 31128 }, { "epoch": 0.5411010099254289, "grad_norm": 1.2312517316817455, "learning_rate": 4.578016206474808e-07, "loss": 0.2497, "step": 31129 }, { "epoch": 0.5411183924629317, "grad_norm": 2.031611474523385, "learning_rate": 4.577735718080033e-07, "loss": 0.37, "step": 31130 }, { "epoch": 0.5411357750004345, "grad_norm": 1.6504832430320677, "learning_rate": 4.577455231023645e-07, "loss": 0.3113, "step": 31131 }, { "epoch": 0.5411531575379374, "grad_norm": 1.6776930595549093, "learning_rate": 4.577174745306527e-07, "loss": 0.2068, "step": 31132 }, { "epoch": 0.5411705400754402, "grad_norm": 1.8267586175047184, "learning_rate": 4.5768942609295715e-07, "loss": 0.233, "step": 31133 }, { "epoch": 0.541187922612943, "grad_norm": 3.909694192559331, "learning_rate": 4.576613777893666e-07, "loss": 0.2807, "step": 31134 }, { "epoch": 0.5412053051504458, "grad_norm": 1.2758302942229618, "learning_rate": 4.576333296199701e-07, "loss": 0.4196, "step": 31135 }, { "epoch": 0.5412226876879487, "grad_norm": 3.0555348776887965, "learning_rate": 4.576052815848564e-07, "loss": 0.397, "step": 31136 }, { "epoch": 0.5412400702254515, "grad_norm": 1.7017939237389834, "learning_rate": 4.575772336841147e-07, "loss": 0.2989, "step": 31137 }, { "epoch": 0.5412574527629543, "grad_norm": 1.094488214702866, "learning_rate": 4.575491859178336e-07, "loss": 0.3011, "step": 31138 }, { "epoch": 0.5412748353004572, "grad_norm": 1.8924615139304561, "learning_rate": 4.5752113828610204e-07, "loss": 0.3013, "step": 31139 }, { "epoch": 0.54129221783796, "grad_norm": 1.712908149568332, "learning_rate": 4.574930907890089e-07, "loss": 0.1646, "step": 31140 }, { "epoch": 0.5413096003754628, "grad_norm": 1.4061718733824131, "learning_rate": 4.5746504342664313e-07, "loss": 0.1473, "step": 31141 }, { "epoch": 0.5413269829129657, "grad_norm": 1.8421554724724223, "learning_rate": 4.5743699619909367e-07, "loss": 0.3139, "step": 31142 }, { "epoch": 0.5413443654504685, "grad_norm": 1.2462951627214602, "learning_rate": 4.574089491064494e-07, "loss": 0.2407, "step": 31143 }, { "epoch": 0.5413617479879713, "grad_norm": 1.3603237574664944, "learning_rate": 4.573809021487993e-07, "loss": 0.1505, "step": 31144 }, { "epoch": 0.5413791305254742, "grad_norm": 2.144344193094861, "learning_rate": 4.57352855326232e-07, "loss": 0.2728, "step": 31145 }, { "epoch": 0.541396513062977, "grad_norm": 2.4963460953165533, "learning_rate": 4.5732480863883664e-07, "loss": 0.1816, "step": 31146 }, { "epoch": 0.5414138956004798, "grad_norm": 1.5431671111915009, "learning_rate": 4.5729676208670203e-07, "loss": 0.1864, "step": 31147 }, { "epoch": 0.5414312781379825, "grad_norm": 1.1589630767207937, "learning_rate": 4.57268715669917e-07, "loss": 0.1917, "step": 31148 }, { "epoch": 0.5414486606754854, "grad_norm": 1.4284733458741348, "learning_rate": 4.572406693885705e-07, "loss": 0.4142, "step": 31149 }, { "epoch": 0.5414660432129882, "grad_norm": 1.635595913937917, "learning_rate": 4.572126232427517e-07, "loss": 0.222, "step": 31150 }, { "epoch": 0.541483425750491, "grad_norm": 1.7257424959743843, "learning_rate": 4.5718457723254893e-07, "loss": 0.3222, "step": 31151 }, { "epoch": 0.5415008082879939, "grad_norm": 1.7502658171481102, "learning_rate": 4.571565313580515e-07, "loss": 0.2957, "step": 31152 }, { "epoch": 0.5415181908254967, "grad_norm": 2.0966915213984274, "learning_rate": 4.571284856193481e-07, "loss": 0.3067, "step": 31153 }, { "epoch": 0.5415355733629995, "grad_norm": 1.1391092726794052, "learning_rate": 4.571004400165278e-07, "loss": 0.2456, "step": 31154 }, { "epoch": 0.5415529559005023, "grad_norm": 1.8234959091057352, "learning_rate": 4.570723945496793e-07, "loss": 0.2067, "step": 31155 }, { "epoch": 0.5415703384380052, "grad_norm": 1.460186381134995, "learning_rate": 4.5704434921889186e-07, "loss": 0.2614, "step": 31156 }, { "epoch": 0.541587720975508, "grad_norm": 1.5834000030543836, "learning_rate": 4.5701630402425377e-07, "loss": 0.235, "step": 31157 }, { "epoch": 0.5416051035130108, "grad_norm": 2.1817126021409066, "learning_rate": 4.5698825896585446e-07, "loss": 0.35, "step": 31158 }, { "epoch": 0.5416224860505137, "grad_norm": 3.0190920185410826, "learning_rate": 4.569602140437824e-07, "loss": 0.6152, "step": 31159 }, { "epoch": 0.5416398685880165, "grad_norm": 1.4975813085452503, "learning_rate": 4.5693216925812687e-07, "loss": 0.3516, "step": 31160 }, { "epoch": 0.5416572511255193, "grad_norm": 2.2781167447807165, "learning_rate": 4.569041246089764e-07, "loss": 0.3474, "step": 31161 }, { "epoch": 0.5416746336630222, "grad_norm": 3.319448666785608, "learning_rate": 4.5687608009642027e-07, "loss": 0.3605, "step": 31162 }, { "epoch": 0.541692016200525, "grad_norm": 1.57292153431888, "learning_rate": 4.568480357205472e-07, "loss": 0.2111, "step": 31163 }, { "epoch": 0.5417093987380278, "grad_norm": 2.857004884165454, "learning_rate": 4.5681999148144596e-07, "loss": 0.3718, "step": 31164 }, { "epoch": 0.5417267812755306, "grad_norm": 3.699556941378947, "learning_rate": 4.5679194737920535e-07, "loss": 0.2274, "step": 31165 }, { "epoch": 0.5417441638130335, "grad_norm": 4.004457592788701, "learning_rate": 4.5676390341391455e-07, "loss": 0.2487, "step": 31166 }, { "epoch": 0.5417615463505363, "grad_norm": 1.5668635084375861, "learning_rate": 4.567358595856622e-07, "loss": 0.2608, "step": 31167 }, { "epoch": 0.541778928888039, "grad_norm": 2.427107859678638, "learning_rate": 4.5670781589453745e-07, "loss": 0.2304, "step": 31168 }, { "epoch": 0.5417963114255419, "grad_norm": 1.1306948122705403, "learning_rate": 4.566797723406291e-07, "loss": 0.3742, "step": 31169 }, { "epoch": 0.5418136939630447, "grad_norm": 1.351040766671553, "learning_rate": 4.566517289240258e-07, "loss": 0.4311, "step": 31170 }, { "epoch": 0.5418310765005475, "grad_norm": 3.324249573533152, "learning_rate": 4.566236856448167e-07, "loss": 0.4333, "step": 31171 }, { "epoch": 0.5418484590380503, "grad_norm": 1.8421325403337205, "learning_rate": 4.565956425030906e-07, "loss": 0.3641, "step": 31172 }, { "epoch": 0.5418658415755532, "grad_norm": 2.2184928394887917, "learning_rate": 4.5656759949893617e-07, "loss": 0.3526, "step": 31173 }, { "epoch": 0.541883224113056, "grad_norm": 1.2936682899719976, "learning_rate": 4.565395566324427e-07, "loss": 0.1688, "step": 31174 }, { "epoch": 0.5419006066505588, "grad_norm": 2.003844233168313, "learning_rate": 4.5651151390369896e-07, "loss": 0.2995, "step": 31175 }, { "epoch": 0.5419179891880617, "grad_norm": 2.2054873116862983, "learning_rate": 4.564834713127935e-07, "loss": 0.1875, "step": 31176 }, { "epoch": 0.5419353717255645, "grad_norm": 1.5337191420690763, "learning_rate": 4.5645542885981556e-07, "loss": 0.1739, "step": 31177 }, { "epoch": 0.5419527542630673, "grad_norm": 1.249317259275651, "learning_rate": 4.5642738654485385e-07, "loss": 0.1642, "step": 31178 }, { "epoch": 0.5419701368005702, "grad_norm": 1.5195184924391332, "learning_rate": 4.563993443679974e-07, "loss": 0.1642, "step": 31179 }, { "epoch": 0.541987519338073, "grad_norm": 1.3842947108552153, "learning_rate": 4.56371302329335e-07, "loss": 0.3683, "step": 31180 }, { "epoch": 0.5420049018755758, "grad_norm": 1.409730231044575, "learning_rate": 4.563432604289554e-07, "loss": 0.1952, "step": 31181 }, { "epoch": 0.5420222844130786, "grad_norm": 2.105431932876985, "learning_rate": 4.5631521866694783e-07, "loss": 0.3423, "step": 31182 }, { "epoch": 0.5420396669505815, "grad_norm": 1.2173397328910196, "learning_rate": 4.5628717704340085e-07, "loss": 0.227, "step": 31183 }, { "epoch": 0.5420570494880843, "grad_norm": 1.6893886941995477, "learning_rate": 4.562591355584033e-07, "loss": 0.2218, "step": 31184 }, { "epoch": 0.5420744320255871, "grad_norm": 1.4552170326571383, "learning_rate": 4.562310942120444e-07, "loss": 0.2322, "step": 31185 }, { "epoch": 0.54209181456309, "grad_norm": 3.1947807676365083, "learning_rate": 4.5620305300441265e-07, "loss": 0.2456, "step": 31186 }, { "epoch": 0.5421091971005928, "grad_norm": 1.423080133220879, "learning_rate": 4.561750119355972e-07, "loss": 0.2847, "step": 31187 }, { "epoch": 0.5421265796380955, "grad_norm": 1.3593056079541486, "learning_rate": 4.5614697100568697e-07, "loss": 0.3301, "step": 31188 }, { "epoch": 0.5421439621755983, "grad_norm": 1.7667445812662057, "learning_rate": 4.5611893021477057e-07, "loss": 0.2349, "step": 31189 }, { "epoch": 0.5421613447131012, "grad_norm": 2.827117330475545, "learning_rate": 4.560908895629369e-07, "loss": 0.3174, "step": 31190 }, { "epoch": 0.542178727250604, "grad_norm": 1.4520722299834217, "learning_rate": 4.5606284905027504e-07, "loss": 0.1633, "step": 31191 }, { "epoch": 0.5421961097881068, "grad_norm": 1.1046840429448614, "learning_rate": 4.560348086768737e-07, "loss": 0.2637, "step": 31192 }, { "epoch": 0.5422134923256097, "grad_norm": 1.891366954780637, "learning_rate": 4.5600676844282183e-07, "loss": 0.1912, "step": 31193 }, { "epoch": 0.5422308748631125, "grad_norm": 1.6684051065753238, "learning_rate": 4.559787283482085e-07, "loss": 0.2464, "step": 31194 }, { "epoch": 0.5422482574006153, "grad_norm": 3.166657802702315, "learning_rate": 4.5595068839312224e-07, "loss": 0.2346, "step": 31195 }, { "epoch": 0.5422656399381182, "grad_norm": 1.9613122566940608, "learning_rate": 4.5592264857765195e-07, "loss": 0.1953, "step": 31196 }, { "epoch": 0.542283022475621, "grad_norm": 1.9334276730692332, "learning_rate": 4.5589460890188667e-07, "loss": 0.2252, "step": 31197 }, { "epoch": 0.5423004050131238, "grad_norm": 1.172906156555627, "learning_rate": 4.5586656936591523e-07, "loss": 0.2119, "step": 31198 }, { "epoch": 0.5423177875506267, "grad_norm": 1.1715223917530617, "learning_rate": 4.558385299698265e-07, "loss": 0.1579, "step": 31199 }, { "epoch": 0.5423351700881295, "grad_norm": 0.9150011203869814, "learning_rate": 4.558104907137095e-07, "loss": 0.2885, "step": 31200 }, { "epoch": 0.5423525526256323, "grad_norm": 1.2014236916306977, "learning_rate": 4.557824515976527e-07, "loss": 0.2323, "step": 31201 }, { "epoch": 0.5423699351631351, "grad_norm": 2.295805091677521, "learning_rate": 4.557544126217454e-07, "loss": 0.2516, "step": 31202 }, { "epoch": 0.542387317700638, "grad_norm": 1.2049274583611214, "learning_rate": 4.557263737860761e-07, "loss": 0.1417, "step": 31203 }, { "epoch": 0.5424047002381408, "grad_norm": 2.7258975138697696, "learning_rate": 4.55698335090734e-07, "loss": 0.2377, "step": 31204 }, { "epoch": 0.5424220827756436, "grad_norm": 1.1242462177136803, "learning_rate": 4.556702965358078e-07, "loss": 0.2594, "step": 31205 }, { "epoch": 0.5424394653131465, "grad_norm": 1.3751524654232792, "learning_rate": 4.556422581213863e-07, "loss": 0.2508, "step": 31206 }, { "epoch": 0.5424568478506493, "grad_norm": 1.5575292010159092, "learning_rate": 4.5561421984755874e-07, "loss": 0.2448, "step": 31207 }, { "epoch": 0.542474230388152, "grad_norm": 0.8915166595035033, "learning_rate": 4.555861817144135e-07, "loss": 0.2395, "step": 31208 }, { "epoch": 0.5424916129256548, "grad_norm": 2.619021895840916, "learning_rate": 4.555581437220396e-07, "loss": 0.1967, "step": 31209 }, { "epoch": 0.5425089954631577, "grad_norm": 1.3099402978774384, "learning_rate": 4.555301058705261e-07, "loss": 0.1067, "step": 31210 }, { "epoch": 0.5425263780006605, "grad_norm": 1.266851519324709, "learning_rate": 4.555020681599616e-07, "loss": 0.2033, "step": 31211 }, { "epoch": 0.5425437605381633, "grad_norm": 2.2247624963350887, "learning_rate": 4.554740305904352e-07, "loss": 0.2576, "step": 31212 }, { "epoch": 0.5425611430756662, "grad_norm": 3.366533899500689, "learning_rate": 4.554459931620358e-07, "loss": 0.2708, "step": 31213 }, { "epoch": 0.542578525613169, "grad_norm": 2.3388346384577776, "learning_rate": 4.5541795587485203e-07, "loss": 0.2929, "step": 31214 }, { "epoch": 0.5425959081506718, "grad_norm": 1.9060058288125241, "learning_rate": 4.553899187289728e-07, "loss": 0.2659, "step": 31215 }, { "epoch": 0.5426132906881747, "grad_norm": 2.1732018417848367, "learning_rate": 4.553618817244871e-07, "loss": 0.2762, "step": 31216 }, { "epoch": 0.5426306732256775, "grad_norm": 1.3753926036108477, "learning_rate": 4.5533384486148355e-07, "loss": 0.1984, "step": 31217 }, { "epoch": 0.5426480557631803, "grad_norm": 2.274182370751109, "learning_rate": 4.553058081400514e-07, "loss": 0.3477, "step": 31218 }, { "epoch": 0.5426654383006831, "grad_norm": 1.922395300928239, "learning_rate": 4.5527777156027936e-07, "loss": 0.193, "step": 31219 }, { "epoch": 0.542682820838186, "grad_norm": 1.2599146363231355, "learning_rate": 4.552497351222562e-07, "loss": 0.2552, "step": 31220 }, { "epoch": 0.5427002033756888, "grad_norm": 2.5788673082813927, "learning_rate": 4.5522169882607067e-07, "loss": 0.2328, "step": 31221 }, { "epoch": 0.5427175859131916, "grad_norm": 1.8198291225217618, "learning_rate": 4.551936626718119e-07, "loss": 0.2714, "step": 31222 }, { "epoch": 0.5427349684506945, "grad_norm": 1.095466259105836, "learning_rate": 4.551656266595685e-07, "loss": 0.1401, "step": 31223 }, { "epoch": 0.5427523509881973, "grad_norm": 1.3590063119250233, "learning_rate": 4.551375907894296e-07, "loss": 0.2388, "step": 31224 }, { "epoch": 0.5427697335257001, "grad_norm": 1.6686116256012267, "learning_rate": 4.551095550614838e-07, "loss": 0.1476, "step": 31225 }, { "epoch": 0.542787116063203, "grad_norm": 1.7794139720614754, "learning_rate": 4.5508151947582036e-07, "loss": 0.3303, "step": 31226 }, { "epoch": 0.5428044986007057, "grad_norm": 2.1196043598051366, "learning_rate": 4.550534840325277e-07, "loss": 0.3789, "step": 31227 }, { "epoch": 0.5428218811382085, "grad_norm": 1.3868228820466528, "learning_rate": 4.5502544873169493e-07, "loss": 0.2698, "step": 31228 }, { "epoch": 0.5428392636757113, "grad_norm": 1.1342503995144677, "learning_rate": 4.5499741357341057e-07, "loss": 0.1764, "step": 31229 }, { "epoch": 0.5428566462132142, "grad_norm": 1.2796295104779303, "learning_rate": 4.5496937855776397e-07, "loss": 0.2264, "step": 31230 }, { "epoch": 0.542874028750717, "grad_norm": 1.0153355302629996, "learning_rate": 4.549413436848436e-07, "loss": 0.2755, "step": 31231 }, { "epoch": 0.5428914112882198, "grad_norm": 1.4246685480057026, "learning_rate": 4.549133089547387e-07, "loss": 0.1679, "step": 31232 }, { "epoch": 0.5429087938257227, "grad_norm": 2.0451211683892994, "learning_rate": 4.5488527436753784e-07, "loss": 0.2826, "step": 31233 }, { "epoch": 0.5429261763632255, "grad_norm": 1.049028540952109, "learning_rate": 4.548572399233298e-07, "loss": 0.3689, "step": 31234 }, { "epoch": 0.5429435589007283, "grad_norm": 1.12940050332221, "learning_rate": 4.548292056222036e-07, "loss": 0.3159, "step": 31235 }, { "epoch": 0.5429609414382311, "grad_norm": 1.5368934701710724, "learning_rate": 4.5480117146424815e-07, "loss": 0.1955, "step": 31236 }, { "epoch": 0.542978323975734, "grad_norm": 1.5132049351947714, "learning_rate": 4.5477313744955213e-07, "loss": 0.337, "step": 31237 }, { "epoch": 0.5429957065132368, "grad_norm": 5.5644340579633385, "learning_rate": 4.547451035782047e-07, "loss": 0.321, "step": 31238 }, { "epoch": 0.5430130890507396, "grad_norm": 3.0651711185775907, "learning_rate": 4.5471706985029434e-07, "loss": 0.4937, "step": 31239 }, { "epoch": 0.5430304715882425, "grad_norm": 1.0510915225542485, "learning_rate": 4.5468903626591e-07, "loss": 0.1711, "step": 31240 }, { "epoch": 0.5430478541257453, "grad_norm": 1.6963514445275938, "learning_rate": 4.546610028251407e-07, "loss": 0.2819, "step": 31241 }, { "epoch": 0.5430652366632481, "grad_norm": 1.0013058688489989, "learning_rate": 4.546329695280751e-07, "loss": 0.1591, "step": 31242 }, { "epoch": 0.543082619200751, "grad_norm": 2.0545254574648686, "learning_rate": 4.546049363748022e-07, "loss": 0.3858, "step": 31243 }, { "epoch": 0.5431000017382538, "grad_norm": 0.9730294181056794, "learning_rate": 4.5457690336541073e-07, "loss": 0.2744, "step": 31244 }, { "epoch": 0.5431173842757566, "grad_norm": 4.170346491213542, "learning_rate": 4.545488704999899e-07, "loss": 0.5056, "step": 31245 }, { "epoch": 0.5431347668132595, "grad_norm": 3.6428016104737093, "learning_rate": 4.5452083777862804e-07, "loss": 0.2202, "step": 31246 }, { "epoch": 0.5431521493507622, "grad_norm": 1.2885595080041448, "learning_rate": 4.5449280520141427e-07, "loss": 0.2594, "step": 31247 }, { "epoch": 0.543169531888265, "grad_norm": 3.868174326434859, "learning_rate": 4.544647727684373e-07, "loss": 0.164, "step": 31248 }, { "epoch": 0.5431869144257678, "grad_norm": 1.330858676190111, "learning_rate": 4.544367404797862e-07, "loss": 0.2704, "step": 31249 }, { "epoch": 0.5432042969632707, "grad_norm": 1.4676166428099244, "learning_rate": 4.5440870833554954e-07, "loss": 0.1749, "step": 31250 }, { "epoch": 0.5432216795007735, "grad_norm": 1.286377991580993, "learning_rate": 4.543806763358167e-07, "loss": 0.1633, "step": 31251 }, { "epoch": 0.5432390620382763, "grad_norm": 1.6967396408441027, "learning_rate": 4.543526444806759e-07, "loss": 0.2574, "step": 31252 }, { "epoch": 0.5432564445757792, "grad_norm": 1.6356078605387827, "learning_rate": 4.5432461277021633e-07, "loss": 0.2626, "step": 31253 }, { "epoch": 0.543273827113282, "grad_norm": 1.3892813557523356, "learning_rate": 4.5429658120452656e-07, "loss": 0.3009, "step": 31254 }, { "epoch": 0.5432912096507848, "grad_norm": 1.2322647370965256, "learning_rate": 4.5426854978369583e-07, "loss": 0.134, "step": 31255 }, { "epoch": 0.5433085921882876, "grad_norm": 1.3093417611681653, "learning_rate": 4.5424051850781266e-07, "loss": 0.1462, "step": 31256 }, { "epoch": 0.5433259747257905, "grad_norm": 1.3809325637670864, "learning_rate": 4.542124873769662e-07, "loss": 0.4017, "step": 31257 }, { "epoch": 0.5433433572632933, "grad_norm": 1.7715793768470118, "learning_rate": 4.5418445639124507e-07, "loss": 0.1946, "step": 31258 }, { "epoch": 0.5433607398007961, "grad_norm": 1.1023139716541326, "learning_rate": 4.5415642555073806e-07, "loss": 0.2074, "step": 31259 }, { "epoch": 0.543378122338299, "grad_norm": 1.9698734895655903, "learning_rate": 4.5412839485553425e-07, "loss": 0.2156, "step": 31260 }, { "epoch": 0.5433955048758018, "grad_norm": 1.7950454483829066, "learning_rate": 4.5410036430572227e-07, "loss": 0.2763, "step": 31261 }, { "epoch": 0.5434128874133046, "grad_norm": 1.2119161465739992, "learning_rate": 4.54072333901391e-07, "loss": 0.2181, "step": 31262 }, { "epoch": 0.5434302699508075, "grad_norm": 1.233775247117648, "learning_rate": 4.540443036426294e-07, "loss": 0.2224, "step": 31263 }, { "epoch": 0.5434476524883103, "grad_norm": 3.668199967540321, "learning_rate": 4.540162735295264e-07, "loss": 0.45, "step": 31264 }, { "epoch": 0.5434650350258131, "grad_norm": 1.791786814633771, "learning_rate": 4.5398824356217046e-07, "loss": 0.4781, "step": 31265 }, { "epoch": 0.543482417563316, "grad_norm": 1.3995369697091495, "learning_rate": 4.5396021374065075e-07, "loss": 0.1899, "step": 31266 }, { "epoch": 0.5434998001008187, "grad_norm": 1.462372136520127, "learning_rate": 4.539321840650559e-07, "loss": 0.1634, "step": 31267 }, { "epoch": 0.5435171826383215, "grad_norm": 1.72104327952698, "learning_rate": 4.53904154535475e-07, "loss": 0.2046, "step": 31268 }, { "epoch": 0.5435345651758243, "grad_norm": 2.29105244326055, "learning_rate": 4.5387612515199674e-07, "loss": 0.3139, "step": 31269 }, { "epoch": 0.5435519477133272, "grad_norm": 1.3326328164920012, "learning_rate": 4.5384809591471006e-07, "loss": 0.2889, "step": 31270 }, { "epoch": 0.54356933025083, "grad_norm": 0.9507657311257044, "learning_rate": 4.5382006682370354e-07, "loss": 0.1742, "step": 31271 }, { "epoch": 0.5435867127883328, "grad_norm": 1.378584971735069, "learning_rate": 4.5379203787906627e-07, "loss": 0.2926, "step": 31272 }, { "epoch": 0.5436040953258356, "grad_norm": 0.8924326173572407, "learning_rate": 4.53764009080887e-07, "loss": 0.3544, "step": 31273 }, { "epoch": 0.5436214778633385, "grad_norm": 2.5347801535757046, "learning_rate": 4.537359804292546e-07, "loss": 0.3307, "step": 31274 }, { "epoch": 0.5436388604008413, "grad_norm": 1.240539061617261, "learning_rate": 4.5370795192425777e-07, "loss": 0.2048, "step": 31275 }, { "epoch": 0.5436562429383441, "grad_norm": 2.2884531537534034, "learning_rate": 4.5367992356598584e-07, "loss": 0.3078, "step": 31276 }, { "epoch": 0.543673625475847, "grad_norm": 1.0725962127489708, "learning_rate": 4.536518953545269e-07, "loss": 0.2244, "step": 31277 }, { "epoch": 0.5436910080133498, "grad_norm": 1.7405623023656382, "learning_rate": 4.536238672899703e-07, "loss": 0.1923, "step": 31278 }, { "epoch": 0.5437083905508526, "grad_norm": 1.8770474376223907, "learning_rate": 4.5359583937240463e-07, "loss": 0.2609, "step": 31279 }, { "epoch": 0.5437257730883555, "grad_norm": 1.6984870012128965, "learning_rate": 4.5356781160191893e-07, "loss": 0.2647, "step": 31280 }, { "epoch": 0.5437431556258583, "grad_norm": 2.179825351505547, "learning_rate": 4.535397839786018e-07, "loss": 0.2764, "step": 31281 }, { "epoch": 0.5437605381633611, "grad_norm": 1.6928717182457063, "learning_rate": 4.5351175650254243e-07, "loss": 0.2002, "step": 31282 }, { "epoch": 0.543777920700864, "grad_norm": 1.7058640101442384, "learning_rate": 4.534837291738294e-07, "loss": 0.2919, "step": 31283 }, { "epoch": 0.5437953032383668, "grad_norm": 1.4994860309070241, "learning_rate": 4.534557019925516e-07, "loss": 0.2474, "step": 31284 }, { "epoch": 0.5438126857758696, "grad_norm": 1.6346142904805736, "learning_rate": 4.534276749587977e-07, "loss": 0.1912, "step": 31285 }, { "epoch": 0.5438300683133724, "grad_norm": 1.6909520263646332, "learning_rate": 4.5339964807265676e-07, "loss": 0.3347, "step": 31286 }, { "epoch": 0.5438474508508752, "grad_norm": 2.041408407431122, "learning_rate": 4.5337162133421744e-07, "loss": 0.1715, "step": 31287 }, { "epoch": 0.543864833388378, "grad_norm": 1.1503711153093252, "learning_rate": 4.533435947435687e-07, "loss": 0.4002, "step": 31288 }, { "epoch": 0.5438822159258808, "grad_norm": 2.137815238227656, "learning_rate": 4.5331556830079956e-07, "loss": 0.32, "step": 31289 }, { "epoch": 0.5438995984633836, "grad_norm": 1.813153592401149, "learning_rate": 4.532875420059983e-07, "loss": 0.2299, "step": 31290 }, { "epoch": 0.5439169810008865, "grad_norm": 2.9755801236225357, "learning_rate": 4.5325951585925427e-07, "loss": 0.1992, "step": 31291 }, { "epoch": 0.5439343635383893, "grad_norm": 1.3944641907766242, "learning_rate": 4.5323148986065603e-07, "loss": 0.2319, "step": 31292 }, { "epoch": 0.5439517460758921, "grad_norm": 2.2335567724366254, "learning_rate": 4.5320346401029243e-07, "loss": 0.3219, "step": 31293 }, { "epoch": 0.543969128613395, "grad_norm": 2.22723718911758, "learning_rate": 4.531754383082524e-07, "loss": 0.2811, "step": 31294 }, { "epoch": 0.5439865111508978, "grad_norm": 1.7961445464481878, "learning_rate": 4.5314741275462487e-07, "loss": 0.2644, "step": 31295 }, { "epoch": 0.5440038936884006, "grad_norm": 2.3676756946276982, "learning_rate": 4.5311938734949826e-07, "loss": 0.2504, "step": 31296 }, { "epoch": 0.5440212762259035, "grad_norm": 0.7769999305641057, "learning_rate": 4.530913620929618e-07, "loss": 0.2785, "step": 31297 }, { "epoch": 0.5440386587634063, "grad_norm": 1.3593450296369392, "learning_rate": 4.530633369851041e-07, "loss": 0.2817, "step": 31298 }, { "epoch": 0.5440560413009091, "grad_norm": 1.8132568176820207, "learning_rate": 4.5303531202601416e-07, "loss": 0.299, "step": 31299 }, { "epoch": 0.544073423838412, "grad_norm": 1.6696598892470356, "learning_rate": 4.530072872157806e-07, "loss": 0.2255, "step": 31300 }, { "epoch": 0.5440908063759148, "grad_norm": 2.3006005420112787, "learning_rate": 4.5297926255449243e-07, "loss": 0.2994, "step": 31301 }, { "epoch": 0.5441081889134176, "grad_norm": 2.940991473109546, "learning_rate": 4.529512380422385e-07, "loss": 0.239, "step": 31302 }, { "epoch": 0.5441255714509204, "grad_norm": 2.0704590942756655, "learning_rate": 4.529232136791074e-07, "loss": 0.1651, "step": 31303 }, { "epoch": 0.5441429539884233, "grad_norm": 2.6859851534454373, "learning_rate": 4.5289518946518806e-07, "loss": 0.2241, "step": 31304 }, { "epoch": 0.5441603365259261, "grad_norm": 2.2776087807477006, "learning_rate": 4.528671654005694e-07, "loss": 0.2344, "step": 31305 }, { "epoch": 0.5441777190634289, "grad_norm": 2.353007499358342, "learning_rate": 4.5283914148534e-07, "loss": 0.3057, "step": 31306 }, { "epoch": 0.5441951016009317, "grad_norm": 1.6247521279943995, "learning_rate": 4.528111177195891e-07, "loss": 0.2106, "step": 31307 }, { "epoch": 0.5442124841384345, "grad_norm": 3.780690995481836, "learning_rate": 4.527830941034053e-07, "loss": 0.3063, "step": 31308 }, { "epoch": 0.5442298666759373, "grad_norm": 1.2700563393578426, "learning_rate": 4.527550706368773e-07, "loss": 0.2298, "step": 31309 }, { "epoch": 0.5442472492134401, "grad_norm": 2.0836499003586235, "learning_rate": 4.5272704732009395e-07, "loss": 0.2561, "step": 31310 }, { "epoch": 0.544264631750943, "grad_norm": 2.938735509996298, "learning_rate": 4.526990241531442e-07, "loss": 0.2693, "step": 31311 }, { "epoch": 0.5442820142884458, "grad_norm": 1.2907376608419692, "learning_rate": 4.526710011361168e-07, "loss": 0.1956, "step": 31312 }, { "epoch": 0.5442993968259486, "grad_norm": 2.021922246861387, "learning_rate": 4.526429782691006e-07, "loss": 0.2576, "step": 31313 }, { "epoch": 0.5443167793634515, "grad_norm": 3.4056130733851147, "learning_rate": 4.5261495555218454e-07, "loss": 0.2656, "step": 31314 }, { "epoch": 0.5443341619009543, "grad_norm": 2.226003666785354, "learning_rate": 4.525869329854571e-07, "loss": 0.2994, "step": 31315 }, { "epoch": 0.5443515444384571, "grad_norm": 3.199081663581444, "learning_rate": 4.525589105690074e-07, "loss": 0.2317, "step": 31316 }, { "epoch": 0.54436892697596, "grad_norm": 1.6259893643558168, "learning_rate": 4.525308883029241e-07, "loss": 0.3876, "step": 31317 }, { "epoch": 0.5443863095134628, "grad_norm": 1.6866029138130079, "learning_rate": 4.5250286618729606e-07, "loss": 0.23, "step": 31318 }, { "epoch": 0.5444036920509656, "grad_norm": 2.540073936923081, "learning_rate": 4.5247484422221223e-07, "loss": 0.2861, "step": 31319 }, { "epoch": 0.5444210745884684, "grad_norm": 0.9881071751099018, "learning_rate": 4.5244682240776136e-07, "loss": 0.3001, "step": 31320 }, { "epoch": 0.5444384571259713, "grad_norm": 2.170023362857383, "learning_rate": 4.5241880074403206e-07, "loss": 0.3969, "step": 31321 }, { "epoch": 0.5444558396634741, "grad_norm": 3.187018092099584, "learning_rate": 4.5239077923111337e-07, "loss": 0.3099, "step": 31322 }, { "epoch": 0.5444732222009769, "grad_norm": 1.2393794649675074, "learning_rate": 4.523627578690939e-07, "loss": 0.5609, "step": 31323 }, { "epoch": 0.5444906047384798, "grad_norm": 1.3632701988276241, "learning_rate": 4.523347366580628e-07, "loss": 0.1041, "step": 31324 }, { "epoch": 0.5445079872759826, "grad_norm": 1.18114549442313, "learning_rate": 4.523067155981086e-07, "loss": 0.1587, "step": 31325 }, { "epoch": 0.5445253698134854, "grad_norm": 1.8548527435317474, "learning_rate": 4.522786946893202e-07, "loss": 0.2307, "step": 31326 }, { "epoch": 0.5445427523509881, "grad_norm": 1.5687862111768471, "learning_rate": 4.522506739317866e-07, "loss": 0.3257, "step": 31327 }, { "epoch": 0.544560134888491, "grad_norm": 1.676699077101314, "learning_rate": 4.522226533255963e-07, "loss": 0.2537, "step": 31328 }, { "epoch": 0.5445775174259938, "grad_norm": 2.5191528611536325, "learning_rate": 4.5219463287083806e-07, "loss": 0.248, "step": 31329 }, { "epoch": 0.5445948999634966, "grad_norm": 1.7521703924907674, "learning_rate": 4.5216661256760106e-07, "loss": 0.2956, "step": 31330 }, { "epoch": 0.5446122825009995, "grad_norm": 2.702822306965482, "learning_rate": 4.5213859241597386e-07, "loss": 0.275, "step": 31331 }, { "epoch": 0.5446296650385023, "grad_norm": 1.7835950372281397, "learning_rate": 4.521105724160454e-07, "loss": 0.2311, "step": 31332 }, { "epoch": 0.5446470475760051, "grad_norm": 1.5339012489865809, "learning_rate": 4.520825525679045e-07, "loss": 0.3261, "step": 31333 }, { "epoch": 0.544664430113508, "grad_norm": 1.1410313810807118, "learning_rate": 4.520545328716398e-07, "loss": 0.2044, "step": 31334 }, { "epoch": 0.5446818126510108, "grad_norm": 1.3550473807042736, "learning_rate": 4.520265133273402e-07, "loss": 0.2412, "step": 31335 }, { "epoch": 0.5446991951885136, "grad_norm": 2.7693141116478004, "learning_rate": 4.5199849393509447e-07, "loss": 0.2398, "step": 31336 }, { "epoch": 0.5447165777260164, "grad_norm": 1.8324829965470717, "learning_rate": 4.5197047469499145e-07, "loss": 0.3428, "step": 31337 }, { "epoch": 0.5447339602635193, "grad_norm": 1.8982051757164449, "learning_rate": 4.5194245560712004e-07, "loss": 0.2763, "step": 31338 }, { "epoch": 0.5447513428010221, "grad_norm": 1.8158111907119818, "learning_rate": 4.519144366715691e-07, "loss": 0.299, "step": 31339 }, { "epoch": 0.5447687253385249, "grad_norm": 1.2992858702192491, "learning_rate": 4.518864178884271e-07, "loss": 0.1735, "step": 31340 }, { "epoch": 0.5447861078760278, "grad_norm": 2.210834267849585, "learning_rate": 4.518583992577831e-07, "loss": 0.3573, "step": 31341 }, { "epoch": 0.5448034904135306, "grad_norm": 1.1753690763068352, "learning_rate": 4.5183038077972585e-07, "loss": 0.2043, "step": 31342 }, { "epoch": 0.5448208729510334, "grad_norm": 1.8554264890896193, "learning_rate": 4.5180236245434413e-07, "loss": 0.1731, "step": 31343 }, { "epoch": 0.5448382554885363, "grad_norm": 1.7594606072708252, "learning_rate": 4.517743442817268e-07, "loss": 0.2276, "step": 31344 }, { "epoch": 0.5448556380260391, "grad_norm": 1.3339032923276455, "learning_rate": 4.5174632626196264e-07, "loss": 0.3647, "step": 31345 }, { "epoch": 0.5448730205635419, "grad_norm": 1.4160007329367106, "learning_rate": 4.5171830839514063e-07, "loss": 0.1896, "step": 31346 }, { "epoch": 0.5448904031010446, "grad_norm": 1.4145341086165797, "learning_rate": 4.516902906813493e-07, "loss": 0.2234, "step": 31347 }, { "epoch": 0.5449077856385475, "grad_norm": 1.646454456586277, "learning_rate": 4.516622731206775e-07, "loss": 0.153, "step": 31348 }, { "epoch": 0.5449251681760503, "grad_norm": 1.214332327992632, "learning_rate": 4.5163425571321413e-07, "loss": 0.3701, "step": 31349 }, { "epoch": 0.5449425507135531, "grad_norm": 2.5063306219175514, "learning_rate": 4.51606238459048e-07, "loss": 0.3183, "step": 31350 }, { "epoch": 0.544959933251056, "grad_norm": 2.0212369782769963, "learning_rate": 4.5157822135826765e-07, "loss": 0.241, "step": 31351 }, { "epoch": 0.5449773157885588, "grad_norm": 1.5661264486763349, "learning_rate": 4.5155020441096243e-07, "loss": 0.2433, "step": 31352 }, { "epoch": 0.5449946983260616, "grad_norm": 2.4971158791516905, "learning_rate": 4.515221876172207e-07, "loss": 0.2607, "step": 31353 }, { "epoch": 0.5450120808635645, "grad_norm": 0.8979863682963065, "learning_rate": 4.514941709771312e-07, "loss": 0.2638, "step": 31354 }, { "epoch": 0.5450294634010673, "grad_norm": 1.871523657988732, "learning_rate": 4.5146615449078294e-07, "loss": 0.2523, "step": 31355 }, { "epoch": 0.5450468459385701, "grad_norm": 1.2393236806061734, "learning_rate": 4.514381381582647e-07, "loss": 0.2945, "step": 31356 }, { "epoch": 0.5450642284760729, "grad_norm": 1.3833000643638587, "learning_rate": 4.5141012197966534e-07, "loss": 0.2808, "step": 31357 }, { "epoch": 0.5450816110135758, "grad_norm": 0.9028932377092233, "learning_rate": 4.513821059550736e-07, "loss": 0.2218, "step": 31358 }, { "epoch": 0.5450989935510786, "grad_norm": 1.9304954402844998, "learning_rate": 4.513540900845782e-07, "loss": 0.217, "step": 31359 }, { "epoch": 0.5451163760885814, "grad_norm": 1.9534524380903515, "learning_rate": 4.5132607436826785e-07, "loss": 0.2806, "step": 31360 }, { "epoch": 0.5451337586260843, "grad_norm": 2.0052892508326066, "learning_rate": 4.512980588062316e-07, "loss": 0.3509, "step": 31361 }, { "epoch": 0.5451511411635871, "grad_norm": 1.5508668176947344, "learning_rate": 4.5127004339855805e-07, "loss": 0.2184, "step": 31362 }, { "epoch": 0.5451685237010899, "grad_norm": 1.8543601922450634, "learning_rate": 4.512420281453362e-07, "loss": 0.3039, "step": 31363 }, { "epoch": 0.5451859062385928, "grad_norm": 1.2576822339981852, "learning_rate": 4.5121401304665455e-07, "loss": 0.2323, "step": 31364 }, { "epoch": 0.5452032887760956, "grad_norm": 1.2076034988529492, "learning_rate": 4.5118599810260247e-07, "loss": 0.2453, "step": 31365 }, { "epoch": 0.5452206713135983, "grad_norm": 1.5729086093230662, "learning_rate": 4.51157983313268e-07, "loss": 0.2494, "step": 31366 }, { "epoch": 0.5452380538511011, "grad_norm": 1.3055791225781024, "learning_rate": 4.5112996867874035e-07, "loss": 0.2224, "step": 31367 }, { "epoch": 0.545255436388604, "grad_norm": 1.0515687111027552, "learning_rate": 4.5110195419910823e-07, "loss": 0.1215, "step": 31368 }, { "epoch": 0.5452728189261068, "grad_norm": 1.2395806367399191, "learning_rate": 4.510739398744605e-07, "loss": 0.205, "step": 31369 }, { "epoch": 0.5452902014636096, "grad_norm": 1.2665522162014675, "learning_rate": 4.5104592570488583e-07, "loss": 0.3827, "step": 31370 }, { "epoch": 0.5453075840011125, "grad_norm": 1.6868927719162294, "learning_rate": 4.5101791169047327e-07, "loss": 0.168, "step": 31371 }, { "epoch": 0.5453249665386153, "grad_norm": 1.5868671862765813, "learning_rate": 4.509898978313113e-07, "loss": 0.2289, "step": 31372 }, { "epoch": 0.5453423490761181, "grad_norm": 1.5851843871098523, "learning_rate": 4.5096188412748885e-07, "loss": 0.2129, "step": 31373 }, { "epoch": 0.545359731613621, "grad_norm": 1.7872091304017608, "learning_rate": 4.5093387057909465e-07, "loss": 0.3229, "step": 31374 }, { "epoch": 0.5453771141511238, "grad_norm": 1.7804618828787473, "learning_rate": 4.509058571862176e-07, "loss": 0.4543, "step": 31375 }, { "epoch": 0.5453944966886266, "grad_norm": 4.116220694360383, "learning_rate": 4.5087784394894635e-07, "loss": 0.272, "step": 31376 }, { "epoch": 0.5454118792261294, "grad_norm": 1.8403147144465664, "learning_rate": 4.5084983086737e-07, "loss": 0.1293, "step": 31377 }, { "epoch": 0.5454292617636323, "grad_norm": 1.901183395142862, "learning_rate": 4.508218179415769e-07, "loss": 0.2384, "step": 31378 }, { "epoch": 0.5454466443011351, "grad_norm": 1.2019932615569149, "learning_rate": 4.5079380517165606e-07, "loss": 0.2986, "step": 31379 }, { "epoch": 0.5454640268386379, "grad_norm": 3.3994813246583355, "learning_rate": 4.5076579255769633e-07, "loss": 0.4056, "step": 31380 }, { "epoch": 0.5454814093761408, "grad_norm": 2.2026572270493907, "learning_rate": 4.507377800997864e-07, "loss": 0.2228, "step": 31381 }, { "epoch": 0.5454987919136436, "grad_norm": 1.817973856043406, "learning_rate": 4.5070976779801493e-07, "loss": 0.2361, "step": 31382 }, { "epoch": 0.5455161744511464, "grad_norm": 2.53682438746771, "learning_rate": 4.5068175565247104e-07, "loss": 0.2254, "step": 31383 }, { "epoch": 0.5455335569886492, "grad_norm": 2.295574128798325, "learning_rate": 4.5065374366324337e-07, "loss": 0.3057, "step": 31384 }, { "epoch": 0.5455509395261521, "grad_norm": 1.6658043381784342, "learning_rate": 4.5062573183042054e-07, "loss": 0.2826, "step": 31385 }, { "epoch": 0.5455683220636548, "grad_norm": 1.8055991748658333, "learning_rate": 4.5059772015409145e-07, "loss": 0.2551, "step": 31386 }, { "epoch": 0.5455857046011576, "grad_norm": 1.857153533389599, "learning_rate": 4.505697086343449e-07, "loss": 0.2062, "step": 31387 }, { "epoch": 0.5456030871386605, "grad_norm": 1.558135087581038, "learning_rate": 4.5054169727126974e-07, "loss": 0.1671, "step": 31388 }, { "epoch": 0.5456204696761633, "grad_norm": 1.7221313266929816, "learning_rate": 4.5051368606495457e-07, "loss": 0.2914, "step": 31389 }, { "epoch": 0.5456378522136661, "grad_norm": 1.4995281348270841, "learning_rate": 4.5048567501548857e-07, "loss": 0.2219, "step": 31390 }, { "epoch": 0.545655234751169, "grad_norm": 1.0976008209753303, "learning_rate": 4.5045766412295993e-07, "loss": 0.2105, "step": 31391 }, { "epoch": 0.5456726172886718, "grad_norm": 1.4745696702208504, "learning_rate": 4.504296533874578e-07, "loss": 0.1906, "step": 31392 }, { "epoch": 0.5456899998261746, "grad_norm": 2.4625452710304687, "learning_rate": 4.5040164280907087e-07, "loss": 0.2804, "step": 31393 }, { "epoch": 0.5457073823636774, "grad_norm": 1.571780682725871, "learning_rate": 4.50373632387888e-07, "loss": 0.3519, "step": 31394 }, { "epoch": 0.5457247649011803, "grad_norm": 1.9646956105236342, "learning_rate": 4.5034562212399785e-07, "loss": 0.2573, "step": 31395 }, { "epoch": 0.5457421474386831, "grad_norm": 0.9478095236946299, "learning_rate": 4.503176120174895e-07, "loss": 0.3724, "step": 31396 }, { "epoch": 0.5457595299761859, "grad_norm": 2.0711494425597023, "learning_rate": 4.5028960206845136e-07, "loss": 0.2237, "step": 31397 }, { "epoch": 0.5457769125136888, "grad_norm": 0.6535754658157393, "learning_rate": 4.502615922769723e-07, "loss": 0.0996, "step": 31398 }, { "epoch": 0.5457942950511916, "grad_norm": 1.7314310585470167, "learning_rate": 4.502335826431411e-07, "loss": 0.255, "step": 31399 }, { "epoch": 0.5458116775886944, "grad_norm": 1.9306866269304375, "learning_rate": 4.5020557316704664e-07, "loss": 0.4786, "step": 31400 }, { "epoch": 0.5458290601261973, "grad_norm": 1.6660610247336174, "learning_rate": 4.501775638487776e-07, "loss": 0.2479, "step": 31401 }, { "epoch": 0.5458464426637001, "grad_norm": 1.7023770120928057, "learning_rate": 4.5014955468842287e-07, "loss": 0.2487, "step": 31402 }, { "epoch": 0.5458638252012029, "grad_norm": 0.9910808968774177, "learning_rate": 4.501215456860712e-07, "loss": 0.1848, "step": 31403 }, { "epoch": 0.5458812077387057, "grad_norm": 1.6289482538671636, "learning_rate": 4.5009353684181125e-07, "loss": 0.2791, "step": 31404 }, { "epoch": 0.5458985902762086, "grad_norm": 1.0192716614331079, "learning_rate": 4.5006552815573186e-07, "loss": 0.3646, "step": 31405 }, { "epoch": 0.5459159728137113, "grad_norm": 1.7396796824457919, "learning_rate": 4.500375196279218e-07, "loss": 0.2062, "step": 31406 }, { "epoch": 0.5459333553512141, "grad_norm": 1.5206588380882125, "learning_rate": 4.5000951125846974e-07, "loss": 0.2283, "step": 31407 }, { "epoch": 0.545950737888717, "grad_norm": 3.5546353070643666, "learning_rate": 4.499815030474647e-07, "loss": 0.1796, "step": 31408 }, { "epoch": 0.5459681204262198, "grad_norm": 1.7518898086443975, "learning_rate": 4.4995349499499547e-07, "loss": 0.2485, "step": 31409 }, { "epoch": 0.5459855029637226, "grad_norm": 1.819186375877942, "learning_rate": 4.499254871011504e-07, "loss": 0.237, "step": 31410 }, { "epoch": 0.5460028855012254, "grad_norm": 1.1504381356421638, "learning_rate": 4.498974793660187e-07, "loss": 0.2716, "step": 31411 }, { "epoch": 0.5460202680387283, "grad_norm": 0.7313750228437493, "learning_rate": 4.498694717896888e-07, "loss": 0.2512, "step": 31412 }, { "epoch": 0.5460376505762311, "grad_norm": 1.3043163110039315, "learning_rate": 4.498414643722498e-07, "loss": 0.2098, "step": 31413 }, { "epoch": 0.5460550331137339, "grad_norm": 1.4916041217807148, "learning_rate": 4.498134571137903e-07, "loss": 0.3289, "step": 31414 }, { "epoch": 0.5460724156512368, "grad_norm": 1.8641895211871333, "learning_rate": 4.497854500143992e-07, "loss": 0.1546, "step": 31415 }, { "epoch": 0.5460897981887396, "grad_norm": 2.7244087563285917, "learning_rate": 4.497574430741649e-07, "loss": 0.2531, "step": 31416 }, { "epoch": 0.5461071807262424, "grad_norm": 1.252438268826841, "learning_rate": 4.497294362931766e-07, "loss": 0.4199, "step": 31417 }, { "epoch": 0.5461245632637453, "grad_norm": 2.1275572365302087, "learning_rate": 4.4970142967152275e-07, "loss": 0.2979, "step": 31418 }, { "epoch": 0.5461419458012481, "grad_norm": 1.2073868313798575, "learning_rate": 4.4967342320929237e-07, "loss": 0.2556, "step": 31419 }, { "epoch": 0.5461593283387509, "grad_norm": 1.649545935246189, "learning_rate": 4.49645416906574e-07, "loss": 0.1936, "step": 31420 }, { "epoch": 0.5461767108762537, "grad_norm": 2.9544778509751044, "learning_rate": 4.496174107634567e-07, "loss": 0.4127, "step": 31421 }, { "epoch": 0.5461940934137566, "grad_norm": 2.703939102156945, "learning_rate": 4.495894047800291e-07, "loss": 0.2946, "step": 31422 }, { "epoch": 0.5462114759512594, "grad_norm": 1.493101544479207, "learning_rate": 4.4956139895637987e-07, "loss": 0.1738, "step": 31423 }, { "epoch": 0.5462288584887622, "grad_norm": 2.3949856597929493, "learning_rate": 4.4953339329259763e-07, "loss": 0.2456, "step": 31424 }, { "epoch": 0.5462462410262651, "grad_norm": 2.1023234327009184, "learning_rate": 4.495053877887716e-07, "loss": 0.3931, "step": 31425 }, { "epoch": 0.5462636235637678, "grad_norm": 1.8602588307929555, "learning_rate": 4.4947738244499014e-07, "loss": 0.4412, "step": 31426 }, { "epoch": 0.5462810061012706, "grad_norm": 2.4921234641235923, "learning_rate": 4.4944937726134223e-07, "loss": 0.25, "step": 31427 }, { "epoch": 0.5462983886387734, "grad_norm": 1.93362869189835, "learning_rate": 4.494213722379167e-07, "loss": 0.3034, "step": 31428 }, { "epoch": 0.5463157711762763, "grad_norm": 1.9047111746170644, "learning_rate": 4.493933673748021e-07, "loss": 0.144, "step": 31429 }, { "epoch": 0.5463331537137791, "grad_norm": 1.3209426729026872, "learning_rate": 4.4936536267208715e-07, "loss": 0.238, "step": 31430 }, { "epoch": 0.5463505362512819, "grad_norm": 2.036789057363847, "learning_rate": 4.4933735812986087e-07, "loss": 0.3244, "step": 31431 }, { "epoch": 0.5463679187887848, "grad_norm": 2.01599012338277, "learning_rate": 4.4930935374821176e-07, "loss": 0.264, "step": 31432 }, { "epoch": 0.5463853013262876, "grad_norm": 1.651646729901963, "learning_rate": 4.4928134952722886e-07, "loss": 0.3414, "step": 31433 }, { "epoch": 0.5464026838637904, "grad_norm": 1.6296224299357862, "learning_rate": 4.4925334546700086e-07, "loss": 0.2039, "step": 31434 }, { "epoch": 0.5464200664012933, "grad_norm": 1.5885158667210193, "learning_rate": 4.492253415676162e-07, "loss": 0.2057, "step": 31435 }, { "epoch": 0.5464374489387961, "grad_norm": 1.2467415761219802, "learning_rate": 4.4919733782916405e-07, "loss": 0.215, "step": 31436 }, { "epoch": 0.5464548314762989, "grad_norm": 1.636245933827267, "learning_rate": 4.4916933425173286e-07, "loss": 0.3139, "step": 31437 }, { "epoch": 0.5464722140138017, "grad_norm": 1.2679375319141748, "learning_rate": 4.491413308354116e-07, "loss": 0.2754, "step": 31438 }, { "epoch": 0.5464895965513046, "grad_norm": 2.2991196738349817, "learning_rate": 4.49113327580289e-07, "loss": 0.279, "step": 31439 }, { "epoch": 0.5465069790888074, "grad_norm": 1.457253894807741, "learning_rate": 4.490853244864539e-07, "loss": 0.2862, "step": 31440 }, { "epoch": 0.5465243616263102, "grad_norm": 2.023782672910401, "learning_rate": 4.4905732155399475e-07, "loss": 0.2701, "step": 31441 }, { "epoch": 0.5465417441638131, "grad_norm": 1.6978946129676058, "learning_rate": 4.4902931878300054e-07, "loss": 0.2169, "step": 31442 }, { "epoch": 0.5465591267013159, "grad_norm": 2.1883229723099653, "learning_rate": 4.490013161735599e-07, "loss": 0.2618, "step": 31443 }, { "epoch": 0.5465765092388187, "grad_norm": 1.309746914279357, "learning_rate": 4.4897331372576173e-07, "loss": 0.1543, "step": 31444 }, { "epoch": 0.5465938917763216, "grad_norm": 1.2199578795648436, "learning_rate": 4.489453114396946e-07, "loss": 0.2419, "step": 31445 }, { "epoch": 0.5466112743138243, "grad_norm": 1.6393213512499538, "learning_rate": 4.489173093154475e-07, "loss": 0.3372, "step": 31446 }, { "epoch": 0.5466286568513271, "grad_norm": 1.1752422606617456, "learning_rate": 4.4888930735310926e-07, "loss": 0.269, "step": 31447 }, { "epoch": 0.5466460393888299, "grad_norm": 1.9400345982935, "learning_rate": 4.4886130555276823e-07, "loss": 0.1932, "step": 31448 }, { "epoch": 0.5466634219263328, "grad_norm": 1.62176946616516, "learning_rate": 4.488333039145133e-07, "loss": 0.3087, "step": 31449 }, { "epoch": 0.5466808044638356, "grad_norm": 1.7909059960532059, "learning_rate": 4.488053024384334e-07, "loss": 0.1649, "step": 31450 }, { "epoch": 0.5466981870013384, "grad_norm": 0.919006137325866, "learning_rate": 4.4877730112461705e-07, "loss": 0.1723, "step": 31451 }, { "epoch": 0.5467155695388413, "grad_norm": 1.5522308639595217, "learning_rate": 4.4874929997315323e-07, "loss": 0.1807, "step": 31452 }, { "epoch": 0.5467329520763441, "grad_norm": 1.7465252150231803, "learning_rate": 4.487212989841307e-07, "loss": 0.4289, "step": 31453 }, { "epoch": 0.5467503346138469, "grad_norm": 1.5763058667268603, "learning_rate": 4.48693298157638e-07, "loss": 0.2224, "step": 31454 }, { "epoch": 0.5467677171513498, "grad_norm": 1.8594551530126002, "learning_rate": 4.486652974937639e-07, "loss": 0.2848, "step": 31455 }, { "epoch": 0.5467850996888526, "grad_norm": 1.5249237338874664, "learning_rate": 4.486372969925972e-07, "loss": 0.2027, "step": 31456 }, { "epoch": 0.5468024822263554, "grad_norm": 1.8703194187150822, "learning_rate": 4.486092966542267e-07, "loss": 0.2756, "step": 31457 }, { "epoch": 0.5468198647638582, "grad_norm": 1.2765796677371681, "learning_rate": 4.485812964787412e-07, "loss": 0.1095, "step": 31458 }, { "epoch": 0.5468372473013611, "grad_norm": 2.2969139123988525, "learning_rate": 4.4855329646622945e-07, "loss": 0.4048, "step": 31459 }, { "epoch": 0.5468546298388639, "grad_norm": 1.7295396527159395, "learning_rate": 4.4852529661677995e-07, "loss": 0.3073, "step": 31460 }, { "epoch": 0.5468720123763667, "grad_norm": 0.6947040080571307, "learning_rate": 4.4849729693048164e-07, "loss": 0.3743, "step": 31461 }, { "epoch": 0.5468893949138696, "grad_norm": 2.1563439498496675, "learning_rate": 4.4846929740742336e-07, "loss": 0.2708, "step": 31462 }, { "epoch": 0.5469067774513724, "grad_norm": 1.1889358769617795, "learning_rate": 4.484412980476935e-07, "loss": 0.2403, "step": 31463 }, { "epoch": 0.5469241599888752, "grad_norm": 1.3376996488628412, "learning_rate": 4.484132988513812e-07, "loss": 0.2364, "step": 31464 }, { "epoch": 0.546941542526378, "grad_norm": 2.3765702252156284, "learning_rate": 4.4838529981857497e-07, "loss": 0.3437, "step": 31465 }, { "epoch": 0.5469589250638808, "grad_norm": 2.247171338466651, "learning_rate": 4.4835730094936387e-07, "loss": 0.1642, "step": 31466 }, { "epoch": 0.5469763076013836, "grad_norm": 1.30906358618364, "learning_rate": 4.483293022438363e-07, "loss": 0.267, "step": 31467 }, { "epoch": 0.5469936901388864, "grad_norm": 1.4419511384303916, "learning_rate": 4.4830130370208086e-07, "loss": 0.1805, "step": 31468 }, { "epoch": 0.5470110726763893, "grad_norm": 2.050586616531805, "learning_rate": 4.482733053241868e-07, "loss": 0.2154, "step": 31469 }, { "epoch": 0.5470284552138921, "grad_norm": 1.59640976924632, "learning_rate": 4.482453071102425e-07, "loss": 0.2023, "step": 31470 }, { "epoch": 0.5470458377513949, "grad_norm": 1.5114999454281508, "learning_rate": 4.482173090603367e-07, "loss": 0.2598, "step": 31471 }, { "epoch": 0.5470632202888978, "grad_norm": 1.7003818886113253, "learning_rate": 4.481893111745585e-07, "loss": 0.2169, "step": 31472 }, { "epoch": 0.5470806028264006, "grad_norm": 1.3096639263324679, "learning_rate": 4.481613134529963e-07, "loss": 0.4715, "step": 31473 }, { "epoch": 0.5470979853639034, "grad_norm": 2.118061962654944, "learning_rate": 4.4813331589573876e-07, "loss": 0.3389, "step": 31474 }, { "epoch": 0.5471153679014062, "grad_norm": 1.873048364772578, "learning_rate": 4.481053185028749e-07, "loss": 0.2451, "step": 31475 }, { "epoch": 0.5471327504389091, "grad_norm": 1.5401405512263542, "learning_rate": 4.4807732127449325e-07, "loss": 0.1733, "step": 31476 }, { "epoch": 0.5471501329764119, "grad_norm": 1.9397294056729433, "learning_rate": 4.4804932421068274e-07, "loss": 0.2159, "step": 31477 }, { "epoch": 0.5471675155139147, "grad_norm": 1.7138202240141829, "learning_rate": 4.4802132731153216e-07, "loss": 0.2777, "step": 31478 }, { "epoch": 0.5471848980514176, "grad_norm": 2.051579195418573, "learning_rate": 4.4799333057712996e-07, "loss": 0.2906, "step": 31479 }, { "epoch": 0.5472022805889204, "grad_norm": 1.666287771351086, "learning_rate": 4.4796533400756487e-07, "loss": 0.2447, "step": 31480 }, { "epoch": 0.5472196631264232, "grad_norm": 1.2581795524353054, "learning_rate": 4.479373376029259e-07, "loss": 0.2946, "step": 31481 }, { "epoch": 0.5472370456639261, "grad_norm": 1.1667286900069396, "learning_rate": 4.4790934136330157e-07, "loss": 0.2544, "step": 31482 }, { "epoch": 0.5472544282014289, "grad_norm": 2.9281931779088075, "learning_rate": 4.4788134528878076e-07, "loss": 0.2529, "step": 31483 }, { "epoch": 0.5472718107389317, "grad_norm": 1.6416538843220416, "learning_rate": 4.47853349379452e-07, "loss": 0.2023, "step": 31484 }, { "epoch": 0.5472891932764345, "grad_norm": 1.1127342537396103, "learning_rate": 4.4782535363540445e-07, "loss": 0.2112, "step": 31485 }, { "epoch": 0.5473065758139373, "grad_norm": 1.564891659760552, "learning_rate": 4.4779735805672646e-07, "loss": 0.2247, "step": 31486 }, { "epoch": 0.5473239583514401, "grad_norm": 1.8704022112564442, "learning_rate": 4.477693626435068e-07, "loss": 0.3418, "step": 31487 }, { "epoch": 0.5473413408889429, "grad_norm": 2.065752877289323, "learning_rate": 4.4774136739583427e-07, "loss": 0.2752, "step": 31488 }, { "epoch": 0.5473587234264458, "grad_norm": 2.570501018718048, "learning_rate": 4.477133723137977e-07, "loss": 0.2292, "step": 31489 }, { "epoch": 0.5473761059639486, "grad_norm": 2.166098765277537, "learning_rate": 4.4768537739748553e-07, "loss": 0.34, "step": 31490 }, { "epoch": 0.5473934885014514, "grad_norm": 1.2335066436969857, "learning_rate": 4.4765738264698696e-07, "loss": 0.3156, "step": 31491 }, { "epoch": 0.5474108710389542, "grad_norm": 1.6359028616603422, "learning_rate": 4.4762938806239027e-07, "loss": 0.1667, "step": 31492 }, { "epoch": 0.5474282535764571, "grad_norm": 1.5734981751992958, "learning_rate": 4.476013936437843e-07, "loss": 0.3537, "step": 31493 }, { "epoch": 0.5474456361139599, "grad_norm": 1.2742860425268376, "learning_rate": 4.4757339939125794e-07, "loss": 0.207, "step": 31494 }, { "epoch": 0.5474630186514627, "grad_norm": 1.4091441543113743, "learning_rate": 4.475454053048998e-07, "loss": 0.2609, "step": 31495 }, { "epoch": 0.5474804011889656, "grad_norm": 2.683499582883676, "learning_rate": 4.4751741138479855e-07, "loss": 0.2178, "step": 31496 }, { "epoch": 0.5474977837264684, "grad_norm": 0.9822980151889313, "learning_rate": 4.474894176310432e-07, "loss": 0.1431, "step": 31497 }, { "epoch": 0.5475151662639712, "grad_norm": 3.2380503739161814, "learning_rate": 4.474614240437222e-07, "loss": 0.2615, "step": 31498 }, { "epoch": 0.5475325488014741, "grad_norm": 1.4596820020067818, "learning_rate": 4.474334306229242e-07, "loss": 0.1992, "step": 31499 }, { "epoch": 0.5475499313389769, "grad_norm": 1.8136188453320692, "learning_rate": 4.474054373687382e-07, "loss": 0.2041, "step": 31500 }, { "epoch": 0.5475673138764797, "grad_norm": 2.1518106503923673, "learning_rate": 4.473774442812527e-07, "loss": 0.2589, "step": 31501 }, { "epoch": 0.5475846964139826, "grad_norm": 1.7725186581133872, "learning_rate": 4.4734945136055663e-07, "loss": 0.3234, "step": 31502 }, { "epoch": 0.5476020789514854, "grad_norm": 1.2989768643487853, "learning_rate": 4.473214586067386e-07, "loss": 0.2133, "step": 31503 }, { "epoch": 0.5476194614889882, "grad_norm": 1.4007887503680816, "learning_rate": 4.472934660198875e-07, "loss": 0.2258, "step": 31504 }, { "epoch": 0.5476368440264909, "grad_norm": 1.4203422844464055, "learning_rate": 4.4726547360009166e-07, "loss": 0.3009, "step": 31505 }, { "epoch": 0.5476542265639938, "grad_norm": 1.1758289426098927, "learning_rate": 4.472374813474401e-07, "loss": 0.1743, "step": 31506 }, { "epoch": 0.5476716091014966, "grad_norm": 1.546740535928426, "learning_rate": 4.472094892620215e-07, "loss": 0.2674, "step": 31507 }, { "epoch": 0.5476889916389994, "grad_norm": 2.03246272929294, "learning_rate": 4.471814973439246e-07, "loss": 0.2515, "step": 31508 }, { "epoch": 0.5477063741765023, "grad_norm": 1.5392810011537035, "learning_rate": 4.4715350559323793e-07, "loss": 0.1407, "step": 31509 }, { "epoch": 0.5477237567140051, "grad_norm": 1.5487602012209072, "learning_rate": 4.471255140100508e-07, "loss": 0.173, "step": 31510 }, { "epoch": 0.5477411392515079, "grad_norm": 4.058967350554838, "learning_rate": 4.470975225944511e-07, "loss": 0.223, "step": 31511 }, { "epoch": 0.5477585217890107, "grad_norm": 2.0899716591995916, "learning_rate": 4.4706953134652815e-07, "loss": 0.1619, "step": 31512 }, { "epoch": 0.5477759043265136, "grad_norm": 2.6532193002091815, "learning_rate": 4.4704154026637025e-07, "loss": 0.3152, "step": 31513 }, { "epoch": 0.5477932868640164, "grad_norm": 0.8297700955020499, "learning_rate": 4.4701354935406654e-07, "loss": 0.1821, "step": 31514 }, { "epoch": 0.5478106694015192, "grad_norm": 1.8494084736695586, "learning_rate": 4.4698555860970537e-07, "loss": 0.2477, "step": 31515 }, { "epoch": 0.5478280519390221, "grad_norm": 1.6885828540968302, "learning_rate": 4.4695756803337584e-07, "loss": 0.3448, "step": 31516 }, { "epoch": 0.5478454344765249, "grad_norm": 1.9539773133869884, "learning_rate": 4.469295776251664e-07, "loss": 0.2395, "step": 31517 }, { "epoch": 0.5478628170140277, "grad_norm": 1.3636643224561136, "learning_rate": 4.4690158738516584e-07, "loss": 0.2435, "step": 31518 }, { "epoch": 0.5478801995515306, "grad_norm": 1.5679321906299057, "learning_rate": 4.468735973134627e-07, "loss": 0.3758, "step": 31519 }, { "epoch": 0.5478975820890334, "grad_norm": 1.9551024778992203, "learning_rate": 4.468456074101459e-07, "loss": 0.3023, "step": 31520 }, { "epoch": 0.5479149646265362, "grad_norm": 1.4632351397307095, "learning_rate": 4.4681761767530407e-07, "loss": 0.3079, "step": 31521 }, { "epoch": 0.547932347164039, "grad_norm": 1.2563293632581911, "learning_rate": 4.467896281090261e-07, "loss": 0.2792, "step": 31522 }, { "epoch": 0.5479497297015419, "grad_norm": 1.2564796203629833, "learning_rate": 4.467616387114006e-07, "loss": 0.1685, "step": 31523 }, { "epoch": 0.5479671122390447, "grad_norm": 5.861404915129618, "learning_rate": 4.4673364948251606e-07, "loss": 0.262, "step": 31524 }, { "epoch": 0.5479844947765474, "grad_norm": 3.358969902126603, "learning_rate": 4.4670566042246153e-07, "loss": 0.2977, "step": 31525 }, { "epoch": 0.5480018773140503, "grad_norm": 3.090910379588768, "learning_rate": 4.466776715313255e-07, "loss": 0.2376, "step": 31526 }, { "epoch": 0.5480192598515531, "grad_norm": 1.4415835404426334, "learning_rate": 4.4664968280919673e-07, "loss": 0.2616, "step": 31527 }, { "epoch": 0.5480366423890559, "grad_norm": 1.025058276373051, "learning_rate": 4.46621694256164e-07, "loss": 0.2248, "step": 31528 }, { "epoch": 0.5480540249265587, "grad_norm": 1.7070639872667515, "learning_rate": 4.4659370587231616e-07, "loss": 0.2948, "step": 31529 }, { "epoch": 0.5480714074640616, "grad_norm": 1.2653914410425469, "learning_rate": 4.465657176577415e-07, "loss": 0.2316, "step": 31530 }, { "epoch": 0.5480887900015644, "grad_norm": 2.0223514069019237, "learning_rate": 4.4653772961252906e-07, "loss": 0.2648, "step": 31531 }, { "epoch": 0.5481061725390672, "grad_norm": 1.844804570200559, "learning_rate": 4.465097417367673e-07, "loss": 0.2688, "step": 31532 }, { "epoch": 0.5481235550765701, "grad_norm": 1.5872533231722292, "learning_rate": 4.464817540305453e-07, "loss": 0.151, "step": 31533 }, { "epoch": 0.5481409376140729, "grad_norm": 2.3592441901591417, "learning_rate": 4.4645376649395137e-07, "loss": 0.2698, "step": 31534 }, { "epoch": 0.5481583201515757, "grad_norm": 1.303702509083918, "learning_rate": 4.4642577912707474e-07, "loss": 0.1704, "step": 31535 }, { "epoch": 0.5481757026890786, "grad_norm": 1.148011300454628, "learning_rate": 4.463977919300035e-07, "loss": 0.1404, "step": 31536 }, { "epoch": 0.5481930852265814, "grad_norm": 1.4915973598964294, "learning_rate": 4.4636980490282666e-07, "loss": 0.3293, "step": 31537 }, { "epoch": 0.5482104677640842, "grad_norm": 1.6176897674293864, "learning_rate": 4.463418180456329e-07, "loss": 0.3071, "step": 31538 }, { "epoch": 0.548227850301587, "grad_norm": 1.4373619053451803, "learning_rate": 4.46313831358511e-07, "loss": 0.195, "step": 31539 }, { "epoch": 0.5482452328390899, "grad_norm": 1.4487808326253286, "learning_rate": 4.462858448415494e-07, "loss": 0.4105, "step": 31540 }, { "epoch": 0.5482626153765927, "grad_norm": 0.9405929506061415, "learning_rate": 4.4625785849483716e-07, "loss": 0.3227, "step": 31541 }, { "epoch": 0.5482799979140955, "grad_norm": 1.449023506234399, "learning_rate": 4.46229872318463e-07, "loss": 0.208, "step": 31542 }, { "epoch": 0.5482973804515984, "grad_norm": 1.4643830766733543, "learning_rate": 4.4620188631251527e-07, "loss": 0.3098, "step": 31543 }, { "epoch": 0.5483147629891012, "grad_norm": 1.7311480423606644, "learning_rate": 4.461739004770827e-07, "loss": 0.2544, "step": 31544 }, { "epoch": 0.5483321455266039, "grad_norm": 2.6387202079555454, "learning_rate": 4.461459148122543e-07, "loss": 0.2274, "step": 31545 }, { "epoch": 0.5483495280641068, "grad_norm": 1.3162400469434612, "learning_rate": 4.4611792931811855e-07, "loss": 0.2245, "step": 31546 }, { "epoch": 0.5483669106016096, "grad_norm": 2.193417592307958, "learning_rate": 4.460899439947642e-07, "loss": 0.3216, "step": 31547 }, { "epoch": 0.5483842931391124, "grad_norm": 1.8868421899702759, "learning_rate": 4.4606195884228016e-07, "loss": 0.3019, "step": 31548 }, { "epoch": 0.5484016756766152, "grad_norm": 1.8261118937924465, "learning_rate": 4.460339738607547e-07, "loss": 0.2056, "step": 31549 }, { "epoch": 0.5484190582141181, "grad_norm": 1.7002428890270185, "learning_rate": 4.460059890502769e-07, "loss": 0.1296, "step": 31550 }, { "epoch": 0.5484364407516209, "grad_norm": 1.8143224019604367, "learning_rate": 4.4597800441093527e-07, "loss": 0.1801, "step": 31551 }, { "epoch": 0.5484538232891237, "grad_norm": 1.7776514297501922, "learning_rate": 4.459500199428184e-07, "loss": 0.2623, "step": 31552 }, { "epoch": 0.5484712058266266, "grad_norm": 2.1656336123338256, "learning_rate": 4.4592203564601534e-07, "loss": 0.4393, "step": 31553 }, { "epoch": 0.5484885883641294, "grad_norm": 1.9550487927642346, "learning_rate": 4.458940515206146e-07, "loss": 0.2076, "step": 31554 }, { "epoch": 0.5485059709016322, "grad_norm": 1.9353388681744836, "learning_rate": 4.458660675667047e-07, "loss": 0.2375, "step": 31555 }, { "epoch": 0.548523353439135, "grad_norm": 4.901876471550491, "learning_rate": 4.458380837843746e-07, "loss": 0.3886, "step": 31556 }, { "epoch": 0.5485407359766379, "grad_norm": 2.254294886229407, "learning_rate": 4.458101001737128e-07, "loss": 0.331, "step": 31557 }, { "epoch": 0.5485581185141407, "grad_norm": 1.6216460877200058, "learning_rate": 4.4578211673480813e-07, "loss": 0.2368, "step": 31558 }, { "epoch": 0.5485755010516435, "grad_norm": 1.8544842112765552, "learning_rate": 4.457541334677493e-07, "loss": 0.2062, "step": 31559 }, { "epoch": 0.5485928835891464, "grad_norm": 1.7384099700016495, "learning_rate": 4.4572615037262506e-07, "loss": 0.3808, "step": 31560 }, { "epoch": 0.5486102661266492, "grad_norm": 1.8202986500974943, "learning_rate": 4.4569816744952377e-07, "loss": 0.283, "step": 31561 }, { "epoch": 0.548627648664152, "grad_norm": 1.0838186653845525, "learning_rate": 4.4567018469853444e-07, "loss": 0.2765, "step": 31562 }, { "epoch": 0.5486450312016549, "grad_norm": 3.165139761530382, "learning_rate": 4.456422021197456e-07, "loss": 0.3256, "step": 31563 }, { "epoch": 0.5486624137391577, "grad_norm": 0.7915030642158222, "learning_rate": 4.456142197132461e-07, "loss": 0.1574, "step": 31564 }, { "epoch": 0.5486797962766604, "grad_norm": 1.816149919656163, "learning_rate": 4.4558623747912436e-07, "loss": 0.2174, "step": 31565 }, { "epoch": 0.5486971788141632, "grad_norm": 1.3060821827261033, "learning_rate": 4.4555825541746945e-07, "loss": 0.3269, "step": 31566 }, { "epoch": 0.5487145613516661, "grad_norm": 1.479050188781311, "learning_rate": 4.455302735283699e-07, "loss": 0.2168, "step": 31567 }, { "epoch": 0.5487319438891689, "grad_norm": 1.6637801620286525, "learning_rate": 4.4550229181191434e-07, "loss": 0.2999, "step": 31568 }, { "epoch": 0.5487493264266717, "grad_norm": 1.3802349614531773, "learning_rate": 4.454743102681913e-07, "loss": 0.2984, "step": 31569 }, { "epoch": 0.5487667089641746, "grad_norm": 1.3934598197048036, "learning_rate": 4.4544632889728977e-07, "loss": 0.2933, "step": 31570 }, { "epoch": 0.5487840915016774, "grad_norm": 1.6250158314402405, "learning_rate": 4.4541834769929824e-07, "loss": 0.1835, "step": 31571 }, { "epoch": 0.5488014740391802, "grad_norm": 2.2008862574460686, "learning_rate": 4.4539036667430556e-07, "loss": 0.1801, "step": 31572 }, { "epoch": 0.5488188565766831, "grad_norm": 0.7187887437967757, "learning_rate": 4.4536238582240047e-07, "loss": 0.1556, "step": 31573 }, { "epoch": 0.5488362391141859, "grad_norm": 1.5127442584122877, "learning_rate": 4.4533440514367137e-07, "loss": 0.4755, "step": 31574 }, { "epoch": 0.5488536216516887, "grad_norm": 1.152579448830506, "learning_rate": 4.45306424638207e-07, "loss": 0.1933, "step": 31575 }, { "epoch": 0.5488710041891915, "grad_norm": 3.0769995945234974, "learning_rate": 4.452784443060963e-07, "loss": 0.4391, "step": 31576 }, { "epoch": 0.5488883867266944, "grad_norm": 1.2333272616824382, "learning_rate": 4.452504641474276e-07, "loss": 0.141, "step": 31577 }, { "epoch": 0.5489057692641972, "grad_norm": 1.4414029661120655, "learning_rate": 4.452224841622899e-07, "loss": 0.2382, "step": 31578 }, { "epoch": 0.5489231518017, "grad_norm": 1.3278247963404262, "learning_rate": 4.451945043507719e-07, "loss": 0.2862, "step": 31579 }, { "epoch": 0.5489405343392029, "grad_norm": 1.389122804407782, "learning_rate": 4.45166524712962e-07, "loss": 0.2162, "step": 31580 }, { "epoch": 0.5489579168767057, "grad_norm": 1.440936005020259, "learning_rate": 4.45138545248949e-07, "loss": 0.3168, "step": 31581 }, { "epoch": 0.5489752994142085, "grad_norm": 1.0036425201491037, "learning_rate": 4.451105659588216e-07, "loss": 0.339, "step": 31582 }, { "epoch": 0.5489926819517114, "grad_norm": 1.627687815999144, "learning_rate": 4.4508258684266853e-07, "loss": 0.3182, "step": 31583 }, { "epoch": 0.5490100644892142, "grad_norm": 1.5708819595337735, "learning_rate": 4.4505460790057847e-07, "loss": 0.1721, "step": 31584 }, { "epoch": 0.5490274470267169, "grad_norm": 1.8143871854355211, "learning_rate": 4.450266291326399e-07, "loss": 0.1674, "step": 31585 }, { "epoch": 0.5490448295642197, "grad_norm": 1.4656501035821137, "learning_rate": 4.4499865053894197e-07, "loss": 0.3266, "step": 31586 }, { "epoch": 0.5490622121017226, "grad_norm": 1.9108194170786899, "learning_rate": 4.449706721195729e-07, "loss": 0.1712, "step": 31587 }, { "epoch": 0.5490795946392254, "grad_norm": 1.5262564144109765, "learning_rate": 4.4494269387462143e-07, "loss": 0.3231, "step": 31588 }, { "epoch": 0.5490969771767282, "grad_norm": 1.5698217765798308, "learning_rate": 4.449147158041764e-07, "loss": 0.2578, "step": 31589 }, { "epoch": 0.5491143597142311, "grad_norm": 1.6397305035564607, "learning_rate": 4.448867379083263e-07, "loss": 0.2253, "step": 31590 }, { "epoch": 0.5491317422517339, "grad_norm": 2.436209693005168, "learning_rate": 4.4485876018716006e-07, "loss": 0.3015, "step": 31591 }, { "epoch": 0.5491491247892367, "grad_norm": 2.424959322687226, "learning_rate": 4.4483078264076637e-07, "loss": 0.2291, "step": 31592 }, { "epoch": 0.5491665073267396, "grad_norm": 1.9137590903968449, "learning_rate": 4.4480280526923363e-07, "loss": 0.2794, "step": 31593 }, { "epoch": 0.5491838898642424, "grad_norm": 1.6597186775139001, "learning_rate": 4.4477482807265046e-07, "loss": 0.3015, "step": 31594 }, { "epoch": 0.5492012724017452, "grad_norm": 2.7094776014823667, "learning_rate": 4.447468510511059e-07, "loss": 0.2919, "step": 31595 }, { "epoch": 0.549218654939248, "grad_norm": 2.2241081141525245, "learning_rate": 4.4471887420468834e-07, "loss": 0.2229, "step": 31596 }, { "epoch": 0.5492360374767509, "grad_norm": 1.9403321061717715, "learning_rate": 4.446908975334866e-07, "loss": 0.1258, "step": 31597 }, { "epoch": 0.5492534200142537, "grad_norm": 1.2305934461769419, "learning_rate": 4.4466292103758944e-07, "loss": 0.2982, "step": 31598 }, { "epoch": 0.5492708025517565, "grad_norm": 1.9696077889579107, "learning_rate": 4.4463494471708535e-07, "loss": 0.3582, "step": 31599 }, { "epoch": 0.5492881850892594, "grad_norm": 2.346293175385316, "learning_rate": 4.446069685720629e-07, "loss": 0.2229, "step": 31600 }, { "epoch": 0.5493055676267622, "grad_norm": 2.694652223238283, "learning_rate": 4.44578992602611e-07, "loss": 0.1764, "step": 31601 }, { "epoch": 0.549322950164265, "grad_norm": 1.402340367141903, "learning_rate": 4.4455101680881813e-07, "loss": 0.1839, "step": 31602 }, { "epoch": 0.5493403327017679, "grad_norm": 1.153810166539983, "learning_rate": 4.4452304119077324e-07, "loss": 0.2919, "step": 31603 }, { "epoch": 0.5493577152392707, "grad_norm": 1.4888840318340462, "learning_rate": 4.444950657485646e-07, "loss": 0.3495, "step": 31604 }, { "epoch": 0.5493750977767734, "grad_norm": 1.3520781248821439, "learning_rate": 4.4446709048228144e-07, "loss": 0.219, "step": 31605 }, { "epoch": 0.5493924803142762, "grad_norm": 1.172141665353638, "learning_rate": 4.444391153920119e-07, "loss": 0.338, "step": 31606 }, { "epoch": 0.5494098628517791, "grad_norm": 1.923227249950366, "learning_rate": 4.444111404778449e-07, "loss": 0.2607, "step": 31607 }, { "epoch": 0.5494272453892819, "grad_norm": 1.6155065754981854, "learning_rate": 4.443831657398689e-07, "loss": 0.3354, "step": 31608 }, { "epoch": 0.5494446279267847, "grad_norm": 1.6256996902253757, "learning_rate": 4.443551911781728e-07, "loss": 0.2277, "step": 31609 }, { "epoch": 0.5494620104642876, "grad_norm": 1.52493822235565, "learning_rate": 4.4432721679284515e-07, "loss": 0.2747, "step": 31610 }, { "epoch": 0.5494793930017904, "grad_norm": 0.984936074070182, "learning_rate": 4.442992425839748e-07, "loss": 0.2149, "step": 31611 }, { "epoch": 0.5494967755392932, "grad_norm": 1.1478948204527946, "learning_rate": 4.442712685516502e-07, "loss": 0.2074, "step": 31612 }, { "epoch": 0.549514158076796, "grad_norm": 1.8872659507264196, "learning_rate": 4.4424329469596e-07, "loss": 0.3662, "step": 31613 }, { "epoch": 0.5495315406142989, "grad_norm": 2.9609451709556955, "learning_rate": 4.44215321016993e-07, "loss": 0.2427, "step": 31614 }, { "epoch": 0.5495489231518017, "grad_norm": 1.653139842109354, "learning_rate": 4.441873475148378e-07, "loss": 0.4403, "step": 31615 }, { "epoch": 0.5495663056893045, "grad_norm": 1.8784047714186003, "learning_rate": 4.44159374189583e-07, "loss": 0.3311, "step": 31616 }, { "epoch": 0.5495836882268074, "grad_norm": 1.7739223094831833, "learning_rate": 4.441314010413175e-07, "loss": 0.2954, "step": 31617 }, { "epoch": 0.5496010707643102, "grad_norm": 1.183224143227232, "learning_rate": 4.4410342807012973e-07, "loss": 0.2006, "step": 31618 }, { "epoch": 0.549618453301813, "grad_norm": 2.1174637108413945, "learning_rate": 4.440754552761083e-07, "loss": 0.2147, "step": 31619 }, { "epoch": 0.5496358358393159, "grad_norm": 1.8497935564236112, "learning_rate": 4.4404748265934207e-07, "loss": 0.2568, "step": 31620 }, { "epoch": 0.5496532183768187, "grad_norm": 1.912501898348466, "learning_rate": 4.440195102199195e-07, "loss": 0.2302, "step": 31621 }, { "epoch": 0.5496706009143215, "grad_norm": 3.0630946181121614, "learning_rate": 4.4399153795792953e-07, "loss": 0.2568, "step": 31622 }, { "epoch": 0.5496879834518243, "grad_norm": 1.964518262334874, "learning_rate": 4.439635658734605e-07, "loss": 0.3192, "step": 31623 }, { "epoch": 0.5497053659893272, "grad_norm": 1.1779340189484544, "learning_rate": 4.439355939666016e-07, "loss": 0.2535, "step": 31624 }, { "epoch": 0.5497227485268299, "grad_norm": 1.2080671580557056, "learning_rate": 4.439076222374407e-07, "loss": 0.3998, "step": 31625 }, { "epoch": 0.5497401310643327, "grad_norm": 1.1012821782614046, "learning_rate": 4.4387965068606707e-07, "loss": 0.2487, "step": 31626 }, { "epoch": 0.5497575136018356, "grad_norm": 2.1655174021111976, "learning_rate": 4.438516793125691e-07, "loss": 0.2994, "step": 31627 }, { "epoch": 0.5497748961393384, "grad_norm": 2.214134118856944, "learning_rate": 4.438237081170355e-07, "loss": 0.2352, "step": 31628 }, { "epoch": 0.5497922786768412, "grad_norm": 4.572672106384015, "learning_rate": 4.437957370995549e-07, "loss": 0.4197, "step": 31629 }, { "epoch": 0.549809661214344, "grad_norm": 1.379273907581873, "learning_rate": 4.4376776626021615e-07, "loss": 0.401, "step": 31630 }, { "epoch": 0.5498270437518469, "grad_norm": 1.4616695372002033, "learning_rate": 4.437397955991077e-07, "loss": 0.3265, "step": 31631 }, { "epoch": 0.5498444262893497, "grad_norm": 3.227636265793552, "learning_rate": 4.437118251163182e-07, "loss": 0.216, "step": 31632 }, { "epoch": 0.5498618088268525, "grad_norm": 1.328582008008704, "learning_rate": 4.436838548119363e-07, "loss": 0.2907, "step": 31633 }, { "epoch": 0.5498791913643554, "grad_norm": 2.182469007781009, "learning_rate": 4.4365588468605083e-07, "loss": 0.2807, "step": 31634 }, { "epoch": 0.5498965739018582, "grad_norm": 1.481497442276161, "learning_rate": 4.436279147387502e-07, "loss": 0.1581, "step": 31635 }, { "epoch": 0.549913956439361, "grad_norm": 1.6463161097429144, "learning_rate": 4.435999449701234e-07, "loss": 0.2366, "step": 31636 }, { "epoch": 0.5499313389768639, "grad_norm": 1.2612116915708047, "learning_rate": 4.435719753802588e-07, "loss": 0.2678, "step": 31637 }, { "epoch": 0.5499487215143667, "grad_norm": 0.8618879555786778, "learning_rate": 4.435440059692449e-07, "loss": 0.3026, "step": 31638 }, { "epoch": 0.5499661040518695, "grad_norm": 1.4024739304981955, "learning_rate": 4.4351603673717077e-07, "loss": 0.4853, "step": 31639 }, { "epoch": 0.5499834865893724, "grad_norm": 1.3522782289898734, "learning_rate": 4.434880676841248e-07, "loss": 0.319, "step": 31640 }, { "epoch": 0.5500008691268752, "grad_norm": 1.7660771506256567, "learning_rate": 4.434600988101956e-07, "loss": 0.1996, "step": 31641 }, { "epoch": 0.550018251664378, "grad_norm": 1.1748989376606855, "learning_rate": 4.43432130115472e-07, "loss": 0.2066, "step": 31642 }, { "epoch": 0.5500356342018808, "grad_norm": 1.3691494804212576, "learning_rate": 4.434041616000427e-07, "loss": 0.2757, "step": 31643 }, { "epoch": 0.5500530167393836, "grad_norm": 2.1175455268814156, "learning_rate": 4.4337619326399603e-07, "loss": 0.3965, "step": 31644 }, { "epoch": 0.5500703992768864, "grad_norm": 1.47810224861795, "learning_rate": 4.4334822510742085e-07, "loss": 0.1995, "step": 31645 }, { "epoch": 0.5500877818143892, "grad_norm": 1.342633770855027, "learning_rate": 4.4332025713040576e-07, "loss": 0.2384, "step": 31646 }, { "epoch": 0.550105164351892, "grad_norm": 1.4458128049502235, "learning_rate": 4.4329228933303947e-07, "loss": 0.3277, "step": 31647 }, { "epoch": 0.5501225468893949, "grad_norm": 1.0498011937287444, "learning_rate": 4.4326432171541056e-07, "loss": 0.1879, "step": 31648 }, { "epoch": 0.5501399294268977, "grad_norm": 2.33343823011622, "learning_rate": 4.432363542776078e-07, "loss": 0.3321, "step": 31649 }, { "epoch": 0.5501573119644005, "grad_norm": 2.276329536503208, "learning_rate": 4.4320838701971954e-07, "loss": 0.1931, "step": 31650 }, { "epoch": 0.5501746945019034, "grad_norm": 2.70199028692147, "learning_rate": 4.431804199418347e-07, "loss": 0.3017, "step": 31651 }, { "epoch": 0.5501920770394062, "grad_norm": 3.8767442283141325, "learning_rate": 4.431524530440417e-07, "loss": 0.2792, "step": 31652 }, { "epoch": 0.550209459576909, "grad_norm": 1.4678165426007905, "learning_rate": 4.431244863264294e-07, "loss": 0.2182, "step": 31653 }, { "epoch": 0.5502268421144119, "grad_norm": 2.2873628186638366, "learning_rate": 4.430965197890863e-07, "loss": 0.2159, "step": 31654 }, { "epoch": 0.5502442246519147, "grad_norm": 1.7853733196927468, "learning_rate": 4.4306855343210137e-07, "loss": 0.2386, "step": 31655 }, { "epoch": 0.5502616071894175, "grad_norm": 1.9465547854717389, "learning_rate": 4.430405872555627e-07, "loss": 0.2074, "step": 31656 }, { "epoch": 0.5502789897269204, "grad_norm": 1.652644722625808, "learning_rate": 4.430126212595593e-07, "loss": 0.2214, "step": 31657 }, { "epoch": 0.5502963722644232, "grad_norm": 1.9296982274485712, "learning_rate": 4.429846554441796e-07, "loss": 0.3042, "step": 31658 }, { "epoch": 0.550313754801926, "grad_norm": 2.4002451038330395, "learning_rate": 4.429566898095124e-07, "loss": 0.3683, "step": 31659 }, { "epoch": 0.5503311373394288, "grad_norm": 2.3989018385736283, "learning_rate": 4.429287243556462e-07, "loss": 0.3485, "step": 31660 }, { "epoch": 0.5503485198769317, "grad_norm": 1.6861545015671655, "learning_rate": 4.4290075908266986e-07, "loss": 0.385, "step": 31661 }, { "epoch": 0.5503659024144345, "grad_norm": 2.8256539855007436, "learning_rate": 4.42872793990672e-07, "loss": 0.351, "step": 31662 }, { "epoch": 0.5503832849519373, "grad_norm": 1.40829789491681, "learning_rate": 4.4284482907974107e-07, "loss": 0.2556, "step": 31663 }, { "epoch": 0.55040066748944, "grad_norm": 4.896999123698434, "learning_rate": 4.4281686434996564e-07, "loss": 0.4095, "step": 31664 }, { "epoch": 0.5504180500269429, "grad_norm": 1.9686967369101034, "learning_rate": 4.427888998014345e-07, "loss": 0.2724, "step": 31665 }, { "epoch": 0.5504354325644457, "grad_norm": 1.2759063233037724, "learning_rate": 4.427609354342363e-07, "loss": 0.1712, "step": 31666 }, { "epoch": 0.5504528151019485, "grad_norm": 1.5800316871983953, "learning_rate": 4.4273297124845967e-07, "loss": 0.2528, "step": 31667 }, { "epoch": 0.5504701976394514, "grad_norm": 1.0634538703820564, "learning_rate": 4.4270500724419337e-07, "loss": 0.2049, "step": 31668 }, { "epoch": 0.5504875801769542, "grad_norm": 2.3127588464241846, "learning_rate": 4.4267704342152564e-07, "loss": 0.342, "step": 31669 }, { "epoch": 0.550504962714457, "grad_norm": 1.735350804192556, "learning_rate": 4.426490797805454e-07, "loss": 0.3671, "step": 31670 }, { "epoch": 0.5505223452519599, "grad_norm": 1.1903823464489764, "learning_rate": 4.4262111632134117e-07, "loss": 0.2215, "step": 31671 }, { "epoch": 0.5505397277894627, "grad_norm": 1.9733344968610167, "learning_rate": 4.425931530440017e-07, "loss": 0.3212, "step": 31672 }, { "epoch": 0.5505571103269655, "grad_norm": 2.93496926537819, "learning_rate": 4.425651899486157e-07, "loss": 0.2778, "step": 31673 }, { "epoch": 0.5505744928644684, "grad_norm": 1.8626878180965323, "learning_rate": 4.425372270352717e-07, "loss": 0.2994, "step": 31674 }, { "epoch": 0.5505918754019712, "grad_norm": 2.1890514527176905, "learning_rate": 4.4250926430405803e-07, "loss": 0.3342, "step": 31675 }, { "epoch": 0.550609257939474, "grad_norm": 1.771474217118932, "learning_rate": 4.424813017550638e-07, "loss": 0.2802, "step": 31676 }, { "epoch": 0.5506266404769768, "grad_norm": 2.1912900224284195, "learning_rate": 4.424533393883772e-07, "loss": 0.2479, "step": 31677 }, { "epoch": 0.5506440230144797, "grad_norm": 2.214033130143173, "learning_rate": 4.424253772040872e-07, "loss": 0.3331, "step": 31678 }, { "epoch": 0.5506614055519825, "grad_norm": 1.7886439617053191, "learning_rate": 4.4239741520228225e-07, "loss": 0.2286, "step": 31679 }, { "epoch": 0.5506787880894853, "grad_norm": 1.9875466869828133, "learning_rate": 4.423694533830513e-07, "loss": 0.2999, "step": 31680 }, { "epoch": 0.5506961706269882, "grad_norm": 1.4497725214664599, "learning_rate": 4.4234149174648244e-07, "loss": 0.2669, "step": 31681 }, { "epoch": 0.550713553164491, "grad_norm": 3.4423107822991694, "learning_rate": 4.4231353029266464e-07, "loss": 0.1788, "step": 31682 }, { "epoch": 0.5507309357019938, "grad_norm": 1.1360296434432617, "learning_rate": 4.422855690216864e-07, "loss": 0.4495, "step": 31683 }, { "epoch": 0.5507483182394965, "grad_norm": 1.6010878742281414, "learning_rate": 4.4225760793363645e-07, "loss": 0.3215, "step": 31684 }, { "epoch": 0.5507657007769994, "grad_norm": 1.6823159122941085, "learning_rate": 4.4222964702860324e-07, "loss": 0.2308, "step": 31685 }, { "epoch": 0.5507830833145022, "grad_norm": 2.1914848738725983, "learning_rate": 4.4220168630667564e-07, "loss": 0.3191, "step": 31686 }, { "epoch": 0.550800465852005, "grad_norm": 1.5955063914092065, "learning_rate": 4.4217372576794226e-07, "loss": 0.2293, "step": 31687 }, { "epoch": 0.5508178483895079, "grad_norm": 1.3543832722092308, "learning_rate": 4.421457654124915e-07, "loss": 0.2744, "step": 31688 }, { "epoch": 0.5508352309270107, "grad_norm": 1.5744407880956823, "learning_rate": 4.4211780524041195e-07, "loss": 0.2798, "step": 31689 }, { "epoch": 0.5508526134645135, "grad_norm": 1.6108497113583802, "learning_rate": 4.420898452517925e-07, "loss": 0.2117, "step": 31690 }, { "epoch": 0.5508699960020164, "grad_norm": 2.71277982658355, "learning_rate": 4.420618854467215e-07, "loss": 0.2587, "step": 31691 }, { "epoch": 0.5508873785395192, "grad_norm": 1.7080563063264802, "learning_rate": 4.4203392582528786e-07, "loss": 0.1588, "step": 31692 }, { "epoch": 0.550904761077022, "grad_norm": 2.1477259504556967, "learning_rate": 4.4200596638758015e-07, "loss": 0.3469, "step": 31693 }, { "epoch": 0.5509221436145249, "grad_norm": 2.0674582383314686, "learning_rate": 4.4197800713368664e-07, "loss": 0.2274, "step": 31694 }, { "epoch": 0.5509395261520277, "grad_norm": 2.001737031438176, "learning_rate": 4.419500480636963e-07, "loss": 0.2827, "step": 31695 }, { "epoch": 0.5509569086895305, "grad_norm": 1.0179519773798598, "learning_rate": 4.419220891776977e-07, "loss": 0.2062, "step": 31696 }, { "epoch": 0.5509742912270333, "grad_norm": 1.399414850725447, "learning_rate": 4.418941304757793e-07, "loss": 0.2878, "step": 31697 }, { "epoch": 0.5509916737645362, "grad_norm": 1.1412410576666987, "learning_rate": 4.4186617195802984e-07, "loss": 0.2613, "step": 31698 }, { "epoch": 0.551009056302039, "grad_norm": 1.4671506385285116, "learning_rate": 4.4183821362453806e-07, "loss": 0.3209, "step": 31699 }, { "epoch": 0.5510264388395418, "grad_norm": 4.704641188009342, "learning_rate": 4.418102554753922e-07, "loss": 0.3157, "step": 31700 }, { "epoch": 0.5510438213770447, "grad_norm": 3.1173846255587216, "learning_rate": 4.417822975106812e-07, "loss": 0.2366, "step": 31701 }, { "epoch": 0.5510612039145475, "grad_norm": 1.5269375208976483, "learning_rate": 4.4175433973049345e-07, "loss": 0.2432, "step": 31702 }, { "epoch": 0.5510785864520503, "grad_norm": 1.730094226118321, "learning_rate": 4.4172638213491787e-07, "loss": 0.2515, "step": 31703 }, { "epoch": 0.551095968989553, "grad_norm": 1.535532193440131, "learning_rate": 4.4169842472404286e-07, "loss": 0.4608, "step": 31704 }, { "epoch": 0.5511133515270559, "grad_norm": 2.191310507375506, "learning_rate": 4.4167046749795686e-07, "loss": 0.3076, "step": 31705 }, { "epoch": 0.5511307340645587, "grad_norm": 1.9588316550029963, "learning_rate": 4.4164251045674905e-07, "loss": 0.294, "step": 31706 }, { "epoch": 0.5511481166020615, "grad_norm": 2.4463820986341456, "learning_rate": 4.416145536005075e-07, "loss": 0.2737, "step": 31707 }, { "epoch": 0.5511654991395644, "grad_norm": 1.2957086390756574, "learning_rate": 4.415865969293209e-07, "loss": 0.2696, "step": 31708 }, { "epoch": 0.5511828816770672, "grad_norm": 1.3789317490862125, "learning_rate": 4.41558640443278e-07, "loss": 0.4278, "step": 31709 }, { "epoch": 0.55120026421457, "grad_norm": 1.2435408331334292, "learning_rate": 4.415306841424673e-07, "loss": 0.1839, "step": 31710 }, { "epoch": 0.5512176467520729, "grad_norm": 2.2618446311945153, "learning_rate": 4.415027280269776e-07, "loss": 0.2485, "step": 31711 }, { "epoch": 0.5512350292895757, "grad_norm": 2.0673369526346157, "learning_rate": 4.4147477209689744e-07, "loss": 0.2986, "step": 31712 }, { "epoch": 0.5512524118270785, "grad_norm": 0.9016720124401987, "learning_rate": 4.4144681635231527e-07, "loss": 0.1827, "step": 31713 }, { "epoch": 0.5512697943645813, "grad_norm": 1.6554942167425997, "learning_rate": 4.4141886079331977e-07, "loss": 0.1204, "step": 31714 }, { "epoch": 0.5512871769020842, "grad_norm": 1.5711416395593107, "learning_rate": 4.4139090541999955e-07, "loss": 0.1792, "step": 31715 }, { "epoch": 0.551304559439587, "grad_norm": 1.1169971788918718, "learning_rate": 4.413629502324432e-07, "loss": 0.2293, "step": 31716 }, { "epoch": 0.5513219419770898, "grad_norm": 1.877249974113833, "learning_rate": 4.4133499523073946e-07, "loss": 0.1922, "step": 31717 }, { "epoch": 0.5513393245145927, "grad_norm": 1.5767976646523445, "learning_rate": 4.41307040414977e-07, "loss": 0.2215, "step": 31718 }, { "epoch": 0.5513567070520955, "grad_norm": 1.8524818017514038, "learning_rate": 4.4127908578524397e-07, "loss": 0.321, "step": 31719 }, { "epoch": 0.5513740895895983, "grad_norm": 1.0733418837943365, "learning_rate": 4.412511313416294e-07, "loss": 0.1986, "step": 31720 }, { "epoch": 0.5513914721271012, "grad_norm": 2.1624736595094194, "learning_rate": 4.412231770842217e-07, "loss": 0.2733, "step": 31721 }, { "epoch": 0.551408854664604, "grad_norm": 1.7073482644677318, "learning_rate": 4.411952230131095e-07, "loss": 0.2639, "step": 31722 }, { "epoch": 0.5514262372021068, "grad_norm": 1.4538168901466604, "learning_rate": 4.411672691283815e-07, "loss": 0.1666, "step": 31723 }, { "epoch": 0.5514436197396095, "grad_norm": 1.2000170740748919, "learning_rate": 4.411393154301262e-07, "loss": 0.3097, "step": 31724 }, { "epoch": 0.5514610022771124, "grad_norm": 0.9011953373350776, "learning_rate": 4.4111136191843234e-07, "loss": 0.1899, "step": 31725 }, { "epoch": 0.5514783848146152, "grad_norm": 1.6370600392483612, "learning_rate": 4.4108340859338835e-07, "loss": 0.1801, "step": 31726 }, { "epoch": 0.551495767352118, "grad_norm": 2.072402933046296, "learning_rate": 4.410554554550828e-07, "loss": 0.2091, "step": 31727 }, { "epoch": 0.5515131498896209, "grad_norm": 1.574619467013758, "learning_rate": 4.410275025036044e-07, "loss": 0.24, "step": 31728 }, { "epoch": 0.5515305324271237, "grad_norm": 1.9598007032765763, "learning_rate": 4.4099954973904185e-07, "loss": 0.2146, "step": 31729 }, { "epoch": 0.5515479149646265, "grad_norm": 2.245139963839491, "learning_rate": 4.409715971614835e-07, "loss": 0.2782, "step": 31730 }, { "epoch": 0.5515652975021293, "grad_norm": 1.5356328509258712, "learning_rate": 4.409436447710182e-07, "loss": 0.2464, "step": 31731 }, { "epoch": 0.5515826800396322, "grad_norm": 1.8823734689789022, "learning_rate": 4.4091569256773436e-07, "loss": 0.204, "step": 31732 }, { "epoch": 0.551600062577135, "grad_norm": 1.2591858568212522, "learning_rate": 4.408877405517205e-07, "loss": 0.3266, "step": 31733 }, { "epoch": 0.5516174451146378, "grad_norm": 1.525774553505203, "learning_rate": 4.408597887230655e-07, "loss": 0.2488, "step": 31734 }, { "epoch": 0.5516348276521407, "grad_norm": 2.1797744612370615, "learning_rate": 4.408318370818577e-07, "loss": 0.3802, "step": 31735 }, { "epoch": 0.5516522101896435, "grad_norm": 1.3922636229121945, "learning_rate": 4.408038856281858e-07, "loss": 0.1619, "step": 31736 }, { "epoch": 0.5516695927271463, "grad_norm": 1.950621317185032, "learning_rate": 4.407759343621386e-07, "loss": 0.2484, "step": 31737 }, { "epoch": 0.5516869752646492, "grad_norm": 1.9263827770110113, "learning_rate": 4.4074798328380433e-07, "loss": 0.3113, "step": 31738 }, { "epoch": 0.551704357802152, "grad_norm": 2.327070327399521, "learning_rate": 4.4072003239327173e-07, "loss": 0.2196, "step": 31739 }, { "epoch": 0.5517217403396548, "grad_norm": 1.9492091475452733, "learning_rate": 4.4069208169062936e-07, "loss": 0.3066, "step": 31740 }, { "epoch": 0.5517391228771577, "grad_norm": 1.3384627482612828, "learning_rate": 4.406641311759658e-07, "loss": 0.2862, "step": 31741 }, { "epoch": 0.5517565054146605, "grad_norm": 2.559024559801341, "learning_rate": 4.406361808493698e-07, "loss": 0.3156, "step": 31742 }, { "epoch": 0.5517738879521633, "grad_norm": 1.6389778247495015, "learning_rate": 4.4060823071092975e-07, "loss": 0.2918, "step": 31743 }, { "epoch": 0.551791270489666, "grad_norm": 1.9403660836960726, "learning_rate": 4.4058028076073464e-07, "loss": 0.2665, "step": 31744 }, { "epoch": 0.5518086530271689, "grad_norm": 1.3541216592442253, "learning_rate": 4.405523309988724e-07, "loss": 0.2036, "step": 31745 }, { "epoch": 0.5518260355646717, "grad_norm": 1.4153671171082571, "learning_rate": 4.405243814254321e-07, "loss": 0.2716, "step": 31746 }, { "epoch": 0.5518434181021745, "grad_norm": 2.302971752496917, "learning_rate": 4.40496432040502e-07, "loss": 0.1307, "step": 31747 }, { "epoch": 0.5518608006396774, "grad_norm": 1.6356490942874322, "learning_rate": 4.4046848284417106e-07, "loss": 0.2275, "step": 31748 }, { "epoch": 0.5518781831771802, "grad_norm": 1.1544104485324909, "learning_rate": 4.404405338365276e-07, "loss": 0.5397, "step": 31749 }, { "epoch": 0.551895565714683, "grad_norm": 1.7968921156631008, "learning_rate": 4.4041258501766044e-07, "loss": 0.3058, "step": 31750 }, { "epoch": 0.5519129482521858, "grad_norm": 1.8901303172802066, "learning_rate": 4.4038463638765793e-07, "loss": 0.3214, "step": 31751 }, { "epoch": 0.5519303307896887, "grad_norm": 1.2898625178019285, "learning_rate": 4.403566879466087e-07, "loss": 0.3149, "step": 31752 }, { "epoch": 0.5519477133271915, "grad_norm": 1.8280216748785025, "learning_rate": 4.403287396946013e-07, "loss": 0.3823, "step": 31753 }, { "epoch": 0.5519650958646943, "grad_norm": 1.4175478490083917, "learning_rate": 4.4030079163172453e-07, "loss": 0.217, "step": 31754 }, { "epoch": 0.5519824784021972, "grad_norm": 2.363491651237799, "learning_rate": 4.402728437580667e-07, "loss": 0.4148, "step": 31755 }, { "epoch": 0.5519998609397, "grad_norm": 1.8269015714122483, "learning_rate": 4.402448960737167e-07, "loss": 0.3087, "step": 31756 }, { "epoch": 0.5520172434772028, "grad_norm": 4.5048784111109175, "learning_rate": 4.4021694857876284e-07, "loss": 0.2847, "step": 31757 }, { "epoch": 0.5520346260147057, "grad_norm": 1.1597325784393344, "learning_rate": 4.4018900127329365e-07, "loss": 0.2206, "step": 31758 }, { "epoch": 0.5520520085522085, "grad_norm": 2.807735248099825, "learning_rate": 4.40161054157398e-07, "loss": 0.3611, "step": 31759 }, { "epoch": 0.5520693910897113, "grad_norm": 2.071552385159166, "learning_rate": 4.401331072311643e-07, "loss": 0.2698, "step": 31760 }, { "epoch": 0.5520867736272141, "grad_norm": 0.7931858380758775, "learning_rate": 4.4010516049468107e-07, "loss": 0.3219, "step": 31761 }, { "epoch": 0.552104156164717, "grad_norm": 1.3061423319259913, "learning_rate": 4.4007721394803703e-07, "loss": 0.2916, "step": 31762 }, { "epoch": 0.5521215387022198, "grad_norm": 1.3627283724129529, "learning_rate": 4.4004926759132086e-07, "loss": 0.2003, "step": 31763 }, { "epoch": 0.5521389212397225, "grad_norm": 1.9101749701739184, "learning_rate": 4.400213214246207e-07, "loss": 0.352, "step": 31764 }, { "epoch": 0.5521563037772254, "grad_norm": 2.02928814515441, "learning_rate": 4.3999337544802556e-07, "loss": 0.3984, "step": 31765 }, { "epoch": 0.5521736863147282, "grad_norm": 1.669383948802209, "learning_rate": 4.3996542966162377e-07, "loss": 0.355, "step": 31766 }, { "epoch": 0.552191068852231, "grad_norm": 1.820532844184138, "learning_rate": 4.39937484065504e-07, "loss": 0.3631, "step": 31767 }, { "epoch": 0.5522084513897338, "grad_norm": 1.4143059696304188, "learning_rate": 4.399095386597548e-07, "loss": 0.2893, "step": 31768 }, { "epoch": 0.5522258339272367, "grad_norm": 1.46743301863399, "learning_rate": 4.3988159344446503e-07, "loss": 0.2519, "step": 31769 }, { "epoch": 0.5522432164647395, "grad_norm": 1.361097623899961, "learning_rate": 4.398536484197227e-07, "loss": 0.3923, "step": 31770 }, { "epoch": 0.5522605990022423, "grad_norm": 1.2947409519560196, "learning_rate": 4.3982570358561673e-07, "loss": 0.2471, "step": 31771 }, { "epoch": 0.5522779815397452, "grad_norm": 1.7369212103972398, "learning_rate": 4.3979775894223556e-07, "loss": 0.1858, "step": 31772 }, { "epoch": 0.552295364077248, "grad_norm": 2.3264394817803784, "learning_rate": 4.3976981448966793e-07, "loss": 0.2656, "step": 31773 }, { "epoch": 0.5523127466147508, "grad_norm": 1.376440644701101, "learning_rate": 4.3974187022800226e-07, "loss": 0.4582, "step": 31774 }, { "epoch": 0.5523301291522537, "grad_norm": 1.8952363518428952, "learning_rate": 4.397139261573274e-07, "loss": 0.1598, "step": 31775 }, { "epoch": 0.5523475116897565, "grad_norm": 1.5626268514769086, "learning_rate": 4.3968598227773154e-07, "loss": 0.2618, "step": 31776 }, { "epoch": 0.5523648942272593, "grad_norm": 1.9792505578111408, "learning_rate": 4.396580385893034e-07, "loss": 0.2726, "step": 31777 }, { "epoch": 0.5523822767647621, "grad_norm": 2.564140139935529, "learning_rate": 4.396300950921315e-07, "loss": 0.2192, "step": 31778 }, { "epoch": 0.552399659302265, "grad_norm": 1.502449065830608, "learning_rate": 4.3960215178630456e-07, "loss": 0.1793, "step": 31779 }, { "epoch": 0.5524170418397678, "grad_norm": 0.9909461067313635, "learning_rate": 4.39574208671911e-07, "loss": 0.3991, "step": 31780 }, { "epoch": 0.5524344243772706, "grad_norm": 2.112360492125084, "learning_rate": 4.3954626574903943e-07, "loss": 0.331, "step": 31781 }, { "epoch": 0.5524518069147735, "grad_norm": 2.727848097861816, "learning_rate": 4.3951832301777865e-07, "loss": 0.3804, "step": 31782 }, { "epoch": 0.5524691894522762, "grad_norm": 1.8494210890917788, "learning_rate": 4.3949038047821667e-07, "loss": 0.2329, "step": 31783 }, { "epoch": 0.552486571989779, "grad_norm": 1.0525585967982714, "learning_rate": 4.394624381304426e-07, "loss": 0.2679, "step": 31784 }, { "epoch": 0.5525039545272818, "grad_norm": 1.8290298027576635, "learning_rate": 4.394344959745447e-07, "loss": 0.2461, "step": 31785 }, { "epoch": 0.5525213370647847, "grad_norm": 1.6293805505001737, "learning_rate": 4.394065540106116e-07, "loss": 0.27, "step": 31786 }, { "epoch": 0.5525387196022875, "grad_norm": 1.5015238815552887, "learning_rate": 4.39378612238732e-07, "loss": 0.2069, "step": 31787 }, { "epoch": 0.5525561021397903, "grad_norm": 1.31680061104801, "learning_rate": 4.3935067065899446e-07, "loss": 0.2093, "step": 31788 }, { "epoch": 0.5525734846772932, "grad_norm": 1.9486079328387305, "learning_rate": 4.393227292714872e-07, "loss": 0.1654, "step": 31789 }, { "epoch": 0.552590867214796, "grad_norm": 0.8634671759022362, "learning_rate": 4.3929478807629905e-07, "loss": 0.1152, "step": 31790 }, { "epoch": 0.5526082497522988, "grad_norm": 1.5015328323000934, "learning_rate": 4.3926684707351853e-07, "loss": 0.4519, "step": 31791 }, { "epoch": 0.5526256322898017, "grad_norm": 2.23515865129679, "learning_rate": 4.3923890626323424e-07, "loss": 0.3227, "step": 31792 }, { "epoch": 0.5526430148273045, "grad_norm": 1.3938075353623514, "learning_rate": 4.392109656455348e-07, "loss": 0.1807, "step": 31793 }, { "epoch": 0.5526603973648073, "grad_norm": 1.6966882596104664, "learning_rate": 4.3918302522050873e-07, "loss": 0.2965, "step": 31794 }, { "epoch": 0.5526777799023102, "grad_norm": 1.3406896798589596, "learning_rate": 4.391550849882443e-07, "loss": 0.2947, "step": 31795 }, { "epoch": 0.552695162439813, "grad_norm": 2.9752690163808566, "learning_rate": 4.3912714494883043e-07, "loss": 0.3561, "step": 31796 }, { "epoch": 0.5527125449773158, "grad_norm": 1.2974974423162629, "learning_rate": 4.390992051023554e-07, "loss": 0.202, "step": 31797 }, { "epoch": 0.5527299275148186, "grad_norm": 1.2792017419685904, "learning_rate": 4.3907126544890804e-07, "loss": 0.1824, "step": 31798 }, { "epoch": 0.5527473100523215, "grad_norm": 1.6171927112197828, "learning_rate": 4.390433259885767e-07, "loss": 0.2691, "step": 31799 }, { "epoch": 0.5527646925898243, "grad_norm": 1.630084456146015, "learning_rate": 4.390153867214503e-07, "loss": 0.1985, "step": 31800 }, { "epoch": 0.5527820751273271, "grad_norm": 1.5983416144141198, "learning_rate": 4.389874476476168e-07, "loss": 0.3362, "step": 31801 }, { "epoch": 0.55279945766483, "grad_norm": 2.6139181444095168, "learning_rate": 4.389595087671652e-07, "loss": 0.4961, "step": 31802 }, { "epoch": 0.5528168402023327, "grad_norm": 0.8907963522300121, "learning_rate": 4.389315700801838e-07, "loss": 0.1925, "step": 31803 }, { "epoch": 0.5528342227398355, "grad_norm": 1.801180970493315, "learning_rate": 4.3890363158676136e-07, "loss": 0.3809, "step": 31804 }, { "epoch": 0.5528516052773383, "grad_norm": 0.984379875725866, "learning_rate": 4.3887569328698627e-07, "loss": 0.2378, "step": 31805 }, { "epoch": 0.5528689878148412, "grad_norm": 1.8486362449152522, "learning_rate": 4.388477551809472e-07, "loss": 0.342, "step": 31806 }, { "epoch": 0.552886370352344, "grad_norm": 1.147946450229979, "learning_rate": 4.388198172687328e-07, "loss": 0.2576, "step": 31807 }, { "epoch": 0.5529037528898468, "grad_norm": 0.8126138175616249, "learning_rate": 4.3879187955043135e-07, "loss": 0.3401, "step": 31808 }, { "epoch": 0.5529211354273497, "grad_norm": 1.9090664872222411, "learning_rate": 4.3876394202613144e-07, "loss": 0.2309, "step": 31809 }, { "epoch": 0.5529385179648525, "grad_norm": 2.450719821264788, "learning_rate": 4.387360046959218e-07, "loss": 0.3253, "step": 31810 }, { "epoch": 0.5529559005023553, "grad_norm": 1.6489133272140009, "learning_rate": 4.3870806755989075e-07, "loss": 0.1765, "step": 31811 }, { "epoch": 0.5529732830398582, "grad_norm": 1.6346339578355755, "learning_rate": 4.3868013061812717e-07, "loss": 0.2067, "step": 31812 }, { "epoch": 0.552990665577361, "grad_norm": 1.4606483301108506, "learning_rate": 4.3865219387071947e-07, "loss": 0.1971, "step": 31813 }, { "epoch": 0.5530080481148638, "grad_norm": 2.1885449194841753, "learning_rate": 4.3862425731775586e-07, "loss": 0.2335, "step": 31814 }, { "epoch": 0.5530254306523666, "grad_norm": 1.754572243149996, "learning_rate": 4.385963209593253e-07, "loss": 0.421, "step": 31815 }, { "epoch": 0.5530428131898695, "grad_norm": 1.4180738908021615, "learning_rate": 4.385683847955161e-07, "loss": 0.2685, "step": 31816 }, { "epoch": 0.5530601957273723, "grad_norm": 2.3433418209145147, "learning_rate": 4.3854044882641706e-07, "loss": 0.2359, "step": 31817 }, { "epoch": 0.5530775782648751, "grad_norm": 1.2736526397685597, "learning_rate": 4.3851251305211644e-07, "loss": 0.3386, "step": 31818 }, { "epoch": 0.553094960802378, "grad_norm": 2.9970933660297905, "learning_rate": 4.384845774727031e-07, "loss": 0.2564, "step": 31819 }, { "epoch": 0.5531123433398808, "grad_norm": 1.9666621435718101, "learning_rate": 4.384566420882652e-07, "loss": 0.3477, "step": 31820 }, { "epoch": 0.5531297258773836, "grad_norm": 1.2080747224605233, "learning_rate": 4.3842870689889154e-07, "loss": 0.3747, "step": 31821 }, { "epoch": 0.5531471084148865, "grad_norm": 2.4248244152534713, "learning_rate": 4.384007719046705e-07, "loss": 0.3361, "step": 31822 }, { "epoch": 0.5531644909523892, "grad_norm": 3.8608463647384723, "learning_rate": 4.3837283710569086e-07, "loss": 0.2517, "step": 31823 }, { "epoch": 0.553181873489892, "grad_norm": 1.2504773255270083, "learning_rate": 4.383449025020408e-07, "loss": 0.1376, "step": 31824 }, { "epoch": 0.5531992560273948, "grad_norm": 2.029231662607558, "learning_rate": 4.383169680938093e-07, "loss": 0.2898, "step": 31825 }, { "epoch": 0.5532166385648977, "grad_norm": 1.7566307215825852, "learning_rate": 4.3828903388108476e-07, "loss": 0.2491, "step": 31826 }, { "epoch": 0.5532340211024005, "grad_norm": 1.4735730547905834, "learning_rate": 4.3826109986395547e-07, "loss": 0.2777, "step": 31827 }, { "epoch": 0.5532514036399033, "grad_norm": 1.2597557794486312, "learning_rate": 4.3823316604251e-07, "loss": 0.3647, "step": 31828 }, { "epoch": 0.5532687861774062, "grad_norm": 2.4457414819302747, "learning_rate": 4.3820523241683727e-07, "loss": 0.1591, "step": 31829 }, { "epoch": 0.553286168714909, "grad_norm": 1.5933578181682573, "learning_rate": 4.381772989870254e-07, "loss": 0.1366, "step": 31830 }, { "epoch": 0.5533035512524118, "grad_norm": 1.1745655427420199, "learning_rate": 4.3814936575316313e-07, "loss": 0.2668, "step": 31831 }, { "epoch": 0.5533209337899146, "grad_norm": 1.8272201064803075, "learning_rate": 4.3812143271533914e-07, "loss": 0.2195, "step": 31832 }, { "epoch": 0.5533383163274175, "grad_norm": 1.401334843496628, "learning_rate": 4.3809349987364165e-07, "loss": 0.2075, "step": 31833 }, { "epoch": 0.5533556988649203, "grad_norm": 1.2015021485456416, "learning_rate": 4.380655672281592e-07, "loss": 0.2978, "step": 31834 }, { "epoch": 0.5533730814024231, "grad_norm": 1.481240942710954, "learning_rate": 4.380376347789806e-07, "loss": 0.259, "step": 31835 }, { "epoch": 0.553390463939926, "grad_norm": 1.1208950217498623, "learning_rate": 4.380097025261941e-07, "loss": 0.2779, "step": 31836 }, { "epoch": 0.5534078464774288, "grad_norm": 0.9547770372566251, "learning_rate": 4.379817704698885e-07, "loss": 0.3205, "step": 31837 }, { "epoch": 0.5534252290149316, "grad_norm": 1.796034710893249, "learning_rate": 4.379538386101522e-07, "loss": 0.2254, "step": 31838 }, { "epoch": 0.5534426115524345, "grad_norm": 2.034255318172501, "learning_rate": 4.3792590694707366e-07, "loss": 0.3664, "step": 31839 }, { "epoch": 0.5534599940899373, "grad_norm": 2.746855249416815, "learning_rate": 4.3789797548074153e-07, "loss": 0.2354, "step": 31840 }, { "epoch": 0.5534773766274401, "grad_norm": 2.0165732654753983, "learning_rate": 4.378700442112443e-07, "loss": 0.3575, "step": 31841 }, { "epoch": 0.553494759164943, "grad_norm": 1.5829696080362305, "learning_rate": 4.3784211313867035e-07, "loss": 0.2212, "step": 31842 }, { "epoch": 0.5535121417024457, "grad_norm": 1.4372299535254287, "learning_rate": 4.378141822631085e-07, "loss": 0.2019, "step": 31843 }, { "epoch": 0.5535295242399485, "grad_norm": 1.792307215754484, "learning_rate": 4.37786251584647e-07, "loss": 0.2074, "step": 31844 }, { "epoch": 0.5535469067774513, "grad_norm": 2.9630990890358566, "learning_rate": 4.3775832110337473e-07, "loss": 0.1568, "step": 31845 }, { "epoch": 0.5535642893149542, "grad_norm": 2.0300720891030584, "learning_rate": 4.3773039081937984e-07, "loss": 0.3739, "step": 31846 }, { "epoch": 0.553581671852457, "grad_norm": 1.4692448652457464, "learning_rate": 4.37702460732751e-07, "loss": 0.3163, "step": 31847 }, { "epoch": 0.5535990543899598, "grad_norm": 1.564275733952669, "learning_rate": 4.376745308435767e-07, "loss": 0.3068, "step": 31848 }, { "epoch": 0.5536164369274627, "grad_norm": 1.1007010633411254, "learning_rate": 4.3764660115194565e-07, "loss": 0.2501, "step": 31849 }, { "epoch": 0.5536338194649655, "grad_norm": 1.4621829661687802, "learning_rate": 4.3761867165794604e-07, "loss": 0.2283, "step": 31850 }, { "epoch": 0.5536512020024683, "grad_norm": 1.1730336774834154, "learning_rate": 4.375907423616668e-07, "loss": 0.2249, "step": 31851 }, { "epoch": 0.5536685845399711, "grad_norm": 1.658912983505801, "learning_rate": 4.3756281326319616e-07, "loss": 0.3229, "step": 31852 }, { "epoch": 0.553685967077474, "grad_norm": 1.8882793698580809, "learning_rate": 4.375348843626226e-07, "loss": 0.3862, "step": 31853 }, { "epoch": 0.5537033496149768, "grad_norm": 2.1246251533768947, "learning_rate": 4.375069556600349e-07, "loss": 0.3078, "step": 31854 }, { "epoch": 0.5537207321524796, "grad_norm": 1.4835888123545413, "learning_rate": 4.3747902715552127e-07, "loss": 0.2655, "step": 31855 }, { "epoch": 0.5537381146899825, "grad_norm": 1.5075270861811352, "learning_rate": 4.3745109884917054e-07, "loss": 0.2361, "step": 31856 }, { "epoch": 0.5537554972274853, "grad_norm": 2.509719470742214, "learning_rate": 4.3742317074107126e-07, "loss": 0.3638, "step": 31857 }, { "epoch": 0.5537728797649881, "grad_norm": 2.6113937304300525, "learning_rate": 4.373952428313116e-07, "loss": 0.5238, "step": 31858 }, { "epoch": 0.553790262302491, "grad_norm": 2.827167851380504, "learning_rate": 4.373673151199802e-07, "loss": 0.2908, "step": 31859 }, { "epoch": 0.5538076448399938, "grad_norm": 1.8066222180148874, "learning_rate": 4.3733938760716573e-07, "loss": 0.3918, "step": 31860 }, { "epoch": 0.5538250273774966, "grad_norm": 1.5416940960447845, "learning_rate": 4.373114602929565e-07, "loss": 0.3173, "step": 31861 }, { "epoch": 0.5538424099149994, "grad_norm": 2.1336768792912575, "learning_rate": 4.372835331774413e-07, "loss": 0.3115, "step": 31862 }, { "epoch": 0.5538597924525022, "grad_norm": 1.598896193981996, "learning_rate": 4.372556062607083e-07, "loss": 0.2353, "step": 31863 }, { "epoch": 0.553877174990005, "grad_norm": 2.0210182814476485, "learning_rate": 4.372276795428464e-07, "loss": 0.2836, "step": 31864 }, { "epoch": 0.5538945575275078, "grad_norm": 2.5444616284082504, "learning_rate": 4.3719975302394396e-07, "loss": 0.3288, "step": 31865 }, { "epoch": 0.5539119400650107, "grad_norm": 1.8268140895184295, "learning_rate": 4.3717182670408936e-07, "loss": 0.2278, "step": 31866 }, { "epoch": 0.5539293226025135, "grad_norm": 1.8029853381702625, "learning_rate": 4.3714390058337115e-07, "loss": 0.1848, "step": 31867 }, { "epoch": 0.5539467051400163, "grad_norm": 1.3016291878191302, "learning_rate": 4.3711597466187794e-07, "loss": 0.1351, "step": 31868 }, { "epoch": 0.5539640876775191, "grad_norm": 2.858586127433101, "learning_rate": 4.3708804893969816e-07, "loss": 0.4105, "step": 31869 }, { "epoch": 0.553981470215022, "grad_norm": 1.3081763974603562, "learning_rate": 4.3706012341692057e-07, "loss": 0.333, "step": 31870 }, { "epoch": 0.5539988527525248, "grad_norm": 1.6286313345441363, "learning_rate": 4.370321980936333e-07, "loss": 0.3707, "step": 31871 }, { "epoch": 0.5540162352900276, "grad_norm": 2.6404793874537407, "learning_rate": 4.37004272969925e-07, "loss": 0.5458, "step": 31872 }, { "epoch": 0.5540336178275305, "grad_norm": 1.819856469090917, "learning_rate": 4.369763480458843e-07, "loss": 0.2969, "step": 31873 }, { "epoch": 0.5540510003650333, "grad_norm": 1.2725519432483283, "learning_rate": 4.3694842332159967e-07, "loss": 0.4308, "step": 31874 }, { "epoch": 0.5540683829025361, "grad_norm": 1.4896097214702195, "learning_rate": 4.3692049879715936e-07, "loss": 0.2517, "step": 31875 }, { "epoch": 0.554085765440039, "grad_norm": 2.0155586990165446, "learning_rate": 4.368925744726524e-07, "loss": 0.4697, "step": 31876 }, { "epoch": 0.5541031479775418, "grad_norm": 1.3457237636426445, "learning_rate": 4.368646503481669e-07, "loss": 0.2836, "step": 31877 }, { "epoch": 0.5541205305150446, "grad_norm": 1.5553289040595502, "learning_rate": 4.3683672642379125e-07, "loss": 0.2271, "step": 31878 }, { "epoch": 0.5541379130525474, "grad_norm": 4.917736547547143, "learning_rate": 4.3680880269961433e-07, "loss": 0.2705, "step": 31879 }, { "epoch": 0.5541552955900503, "grad_norm": 1.5000940641418206, "learning_rate": 4.367808791757244e-07, "loss": 0.1781, "step": 31880 }, { "epoch": 0.5541726781275531, "grad_norm": 1.1351177375007937, "learning_rate": 4.367529558522101e-07, "loss": 0.3407, "step": 31881 }, { "epoch": 0.5541900606650559, "grad_norm": 2.214252548879069, "learning_rate": 4.367250327291599e-07, "loss": 0.3115, "step": 31882 }, { "epoch": 0.5542074432025587, "grad_norm": 1.4814773740914156, "learning_rate": 4.3669710980666233e-07, "loss": 0.2016, "step": 31883 }, { "epoch": 0.5542248257400615, "grad_norm": 1.4976041795672068, "learning_rate": 4.366691870848057e-07, "loss": 0.4859, "step": 31884 }, { "epoch": 0.5542422082775643, "grad_norm": 2.122733236034227, "learning_rate": 4.366412645636788e-07, "loss": 0.3364, "step": 31885 }, { "epoch": 0.5542595908150671, "grad_norm": 1.2490836156118503, "learning_rate": 4.3661334224336976e-07, "loss": 0.2338, "step": 31886 }, { "epoch": 0.55427697335257, "grad_norm": 1.4633605821819906, "learning_rate": 4.365854201239675e-07, "loss": 0.2663, "step": 31887 }, { "epoch": 0.5542943558900728, "grad_norm": 1.8569314954902487, "learning_rate": 4.365574982055602e-07, "loss": 0.3302, "step": 31888 }, { "epoch": 0.5543117384275756, "grad_norm": 2.188876931816988, "learning_rate": 4.365295764882368e-07, "loss": 0.3795, "step": 31889 }, { "epoch": 0.5543291209650785, "grad_norm": 1.6124331510760312, "learning_rate": 4.365016549720851e-07, "loss": 0.2751, "step": 31890 }, { "epoch": 0.5543465035025813, "grad_norm": 1.5063736649257848, "learning_rate": 4.364737336571942e-07, "loss": 0.312, "step": 31891 }, { "epoch": 0.5543638860400841, "grad_norm": 1.4724201871690918, "learning_rate": 4.3644581254365224e-07, "loss": 0.1901, "step": 31892 }, { "epoch": 0.554381268577587, "grad_norm": 1.3888717647069697, "learning_rate": 4.36417891631548e-07, "loss": 0.3048, "step": 31893 }, { "epoch": 0.5543986511150898, "grad_norm": 1.4350375037556178, "learning_rate": 4.3638997092096966e-07, "loss": 0.2553, "step": 31894 }, { "epoch": 0.5544160336525926, "grad_norm": 1.6488324136584247, "learning_rate": 4.3636205041200614e-07, "loss": 0.2685, "step": 31895 }, { "epoch": 0.5544334161900955, "grad_norm": 1.2550516866845318, "learning_rate": 4.363341301047456e-07, "loss": 0.2177, "step": 31896 }, { "epoch": 0.5544507987275983, "grad_norm": 1.286350573861196, "learning_rate": 4.3630620999927656e-07, "loss": 0.223, "step": 31897 }, { "epoch": 0.5544681812651011, "grad_norm": 2.47443857930371, "learning_rate": 4.3627829009568754e-07, "loss": 0.3734, "step": 31898 }, { "epoch": 0.5544855638026039, "grad_norm": 1.276965471683258, "learning_rate": 4.362503703940671e-07, "loss": 0.3969, "step": 31899 }, { "epoch": 0.5545029463401068, "grad_norm": 1.4628102078678462, "learning_rate": 4.362224508945037e-07, "loss": 0.2222, "step": 31900 }, { "epoch": 0.5545203288776096, "grad_norm": 2.0665993950656003, "learning_rate": 4.361945315970859e-07, "loss": 0.2871, "step": 31901 }, { "epoch": 0.5545377114151124, "grad_norm": 1.2124418006815962, "learning_rate": 4.361666125019022e-07, "loss": 0.1564, "step": 31902 }, { "epoch": 0.5545550939526152, "grad_norm": 1.7041598322429288, "learning_rate": 4.3613869360904084e-07, "loss": 0.3757, "step": 31903 }, { "epoch": 0.554572476490118, "grad_norm": 1.409529557834216, "learning_rate": 4.3611077491859056e-07, "loss": 0.1208, "step": 31904 }, { "epoch": 0.5545898590276208, "grad_norm": 1.596439834207108, "learning_rate": 4.3608285643063966e-07, "loss": 0.314, "step": 31905 }, { "epoch": 0.5546072415651236, "grad_norm": 2.384825543267172, "learning_rate": 4.3605493814527685e-07, "loss": 0.2606, "step": 31906 }, { "epoch": 0.5546246241026265, "grad_norm": 1.853608492209778, "learning_rate": 4.360270200625905e-07, "loss": 0.3423, "step": 31907 }, { "epoch": 0.5546420066401293, "grad_norm": 1.3251367851241926, "learning_rate": 4.359991021826693e-07, "loss": 0.202, "step": 31908 }, { "epoch": 0.5546593891776321, "grad_norm": 1.7510164107526898, "learning_rate": 4.359711845056013e-07, "loss": 0.4246, "step": 31909 }, { "epoch": 0.554676771715135, "grad_norm": 1.9746311804721595, "learning_rate": 4.359432670314753e-07, "loss": 0.2159, "step": 31910 }, { "epoch": 0.5546941542526378, "grad_norm": 3.2754579955249334, "learning_rate": 4.3591534976037964e-07, "loss": 0.3099, "step": 31911 }, { "epoch": 0.5547115367901406, "grad_norm": 1.7950212059206625, "learning_rate": 4.35887432692403e-07, "loss": 0.2005, "step": 31912 }, { "epoch": 0.5547289193276435, "grad_norm": 2.1204233963057217, "learning_rate": 4.358595158276336e-07, "loss": 0.3547, "step": 31913 }, { "epoch": 0.5547463018651463, "grad_norm": 1.9030018832356472, "learning_rate": 4.358315991661604e-07, "loss": 0.2092, "step": 31914 }, { "epoch": 0.5547636844026491, "grad_norm": 1.9371759884694417, "learning_rate": 4.358036827080712e-07, "loss": 0.1428, "step": 31915 }, { "epoch": 0.554781066940152, "grad_norm": 2.363052989388053, "learning_rate": 4.3577576645345506e-07, "loss": 0.4386, "step": 31916 }, { "epoch": 0.5547984494776548, "grad_norm": 1.2546139080249123, "learning_rate": 4.3574785040240005e-07, "loss": 0.4058, "step": 31917 }, { "epoch": 0.5548158320151576, "grad_norm": 1.5260111726613632, "learning_rate": 4.3571993455499495e-07, "loss": 0.3682, "step": 31918 }, { "epoch": 0.5548332145526604, "grad_norm": 2.2819002007369065, "learning_rate": 4.35692018911328e-07, "loss": 0.3315, "step": 31919 }, { "epoch": 0.5548505970901633, "grad_norm": 2.4294270383846595, "learning_rate": 4.3566410347148806e-07, "loss": 0.3717, "step": 31920 }, { "epoch": 0.5548679796276661, "grad_norm": 1.8651759637881418, "learning_rate": 4.3563618823556324e-07, "loss": 0.4359, "step": 31921 }, { "epoch": 0.5548853621651688, "grad_norm": 10.716507195750477, "learning_rate": 4.3560827320364213e-07, "loss": 0.3545, "step": 31922 }, { "epoch": 0.5549027447026716, "grad_norm": 1.516726890032073, "learning_rate": 4.3558035837581313e-07, "loss": 0.246, "step": 31923 }, { "epoch": 0.5549201272401745, "grad_norm": 3.9461050261904176, "learning_rate": 4.355524437521649e-07, "loss": 0.3341, "step": 31924 }, { "epoch": 0.5549375097776773, "grad_norm": 1.5149419339417756, "learning_rate": 4.3552452933278573e-07, "loss": 0.2836, "step": 31925 }, { "epoch": 0.5549548923151801, "grad_norm": 0.8630192079713951, "learning_rate": 4.3549661511776427e-07, "loss": 0.1597, "step": 31926 }, { "epoch": 0.554972274852683, "grad_norm": 1.9651519519932148, "learning_rate": 4.35468701107189e-07, "loss": 0.162, "step": 31927 }, { "epoch": 0.5549896573901858, "grad_norm": 1.8747834755963546, "learning_rate": 4.354407873011481e-07, "loss": 0.2289, "step": 31928 }, { "epoch": 0.5550070399276886, "grad_norm": 1.9475306777397818, "learning_rate": 4.354128736997304e-07, "loss": 0.2696, "step": 31929 }, { "epoch": 0.5550244224651915, "grad_norm": 1.1544398506959062, "learning_rate": 4.353849603030242e-07, "loss": 0.2801, "step": 31930 }, { "epoch": 0.5550418050026943, "grad_norm": 1.5914904443430051, "learning_rate": 4.353570471111178e-07, "loss": 0.2506, "step": 31931 }, { "epoch": 0.5550591875401971, "grad_norm": 1.7253015905825007, "learning_rate": 4.353291341241001e-07, "loss": 0.182, "step": 31932 }, { "epoch": 0.5550765700777, "grad_norm": 2.259618362516, "learning_rate": 4.353012213420594e-07, "loss": 0.3099, "step": 31933 }, { "epoch": 0.5550939526152028, "grad_norm": 1.388488170766248, "learning_rate": 4.352733087650838e-07, "loss": 0.4047, "step": 31934 }, { "epoch": 0.5551113351527056, "grad_norm": 1.9309197524314325, "learning_rate": 4.352453963932623e-07, "loss": 0.1742, "step": 31935 }, { "epoch": 0.5551287176902084, "grad_norm": 1.32081050166311, "learning_rate": 4.3521748422668304e-07, "loss": 0.353, "step": 31936 }, { "epoch": 0.5551461002277113, "grad_norm": 1.611112809717069, "learning_rate": 4.3518957226543467e-07, "loss": 0.2843, "step": 31937 }, { "epoch": 0.5551634827652141, "grad_norm": 1.5463071209582926, "learning_rate": 4.3516166050960554e-07, "loss": 0.3002, "step": 31938 }, { "epoch": 0.5551808653027169, "grad_norm": 1.659750086141152, "learning_rate": 4.3513374895928434e-07, "loss": 0.2174, "step": 31939 }, { "epoch": 0.5551982478402198, "grad_norm": 1.2036078862931254, "learning_rate": 4.351058376145591e-07, "loss": 0.431, "step": 31940 }, { "epoch": 0.5552156303777226, "grad_norm": 1.6952464986857443, "learning_rate": 4.3507792647551873e-07, "loss": 0.2645, "step": 31941 }, { "epoch": 0.5552330129152253, "grad_norm": 0.9326746086001304, "learning_rate": 4.350500155422513e-07, "loss": 0.113, "step": 31942 }, { "epoch": 0.5552503954527281, "grad_norm": 1.3978495044955215, "learning_rate": 4.350221048148457e-07, "loss": 0.2812, "step": 31943 }, { "epoch": 0.555267777990231, "grad_norm": 1.1790870136909999, "learning_rate": 4.3499419429338995e-07, "loss": 0.2232, "step": 31944 }, { "epoch": 0.5552851605277338, "grad_norm": 2.2137831253786002, "learning_rate": 4.349662839779729e-07, "loss": 0.3046, "step": 31945 }, { "epoch": 0.5553025430652366, "grad_norm": 2.2323169854348714, "learning_rate": 4.3493837386868303e-07, "loss": 0.5013, "step": 31946 }, { "epoch": 0.5553199256027395, "grad_norm": 1.9578661699091067, "learning_rate": 4.349104639656084e-07, "loss": 0.2877, "step": 31947 }, { "epoch": 0.5553373081402423, "grad_norm": 1.3532677240547748, "learning_rate": 4.348825542688377e-07, "loss": 0.1906, "step": 31948 }, { "epoch": 0.5553546906777451, "grad_norm": 1.4802591417079254, "learning_rate": 4.3485464477845945e-07, "loss": 0.3375, "step": 31949 }, { "epoch": 0.555372073215248, "grad_norm": 1.3679548165200694, "learning_rate": 4.3482673549456194e-07, "loss": 0.2375, "step": 31950 }, { "epoch": 0.5553894557527508, "grad_norm": 1.4804151279176692, "learning_rate": 4.3479882641723385e-07, "loss": 0.2628, "step": 31951 }, { "epoch": 0.5554068382902536, "grad_norm": 1.4175771923521283, "learning_rate": 4.3477091754656364e-07, "loss": 0.2576, "step": 31952 }, { "epoch": 0.5554242208277564, "grad_norm": 1.7184245673199772, "learning_rate": 4.347430088826395e-07, "loss": 0.2621, "step": 31953 }, { "epoch": 0.5554416033652593, "grad_norm": 1.2701670044155324, "learning_rate": 4.3471510042555e-07, "loss": 0.2335, "step": 31954 }, { "epoch": 0.5554589859027621, "grad_norm": 2.0758013424621704, "learning_rate": 4.346871921753838e-07, "loss": 0.2362, "step": 31955 }, { "epoch": 0.5554763684402649, "grad_norm": 1.011169385890799, "learning_rate": 4.3465928413222904e-07, "loss": 0.165, "step": 31956 }, { "epoch": 0.5554937509777678, "grad_norm": 1.8485941984990117, "learning_rate": 4.346313762961744e-07, "loss": 0.2193, "step": 31957 }, { "epoch": 0.5555111335152706, "grad_norm": 3.2848132183042638, "learning_rate": 4.346034686673084e-07, "loss": 0.2681, "step": 31958 }, { "epoch": 0.5555285160527734, "grad_norm": 5.408623414035165, "learning_rate": 4.3457556124571913e-07, "loss": 0.2944, "step": 31959 }, { "epoch": 0.5555458985902763, "grad_norm": 1.6117989704106614, "learning_rate": 4.3454765403149543e-07, "loss": 0.144, "step": 31960 }, { "epoch": 0.5555632811277791, "grad_norm": 1.145729169008824, "learning_rate": 4.345197470247255e-07, "loss": 0.2107, "step": 31961 }, { "epoch": 0.5555806636652818, "grad_norm": 1.4480256795095785, "learning_rate": 4.34491840225498e-07, "loss": 0.2386, "step": 31962 }, { "epoch": 0.5555980462027846, "grad_norm": 1.096016320864509, "learning_rate": 4.344639336339012e-07, "loss": 0.253, "step": 31963 }, { "epoch": 0.5556154287402875, "grad_norm": 1.2054865431551722, "learning_rate": 4.344360272500236e-07, "loss": 0.3213, "step": 31964 }, { "epoch": 0.5556328112777903, "grad_norm": 1.6749209927761226, "learning_rate": 4.3440812107395386e-07, "loss": 0.4019, "step": 31965 }, { "epoch": 0.5556501938152931, "grad_norm": 3.071057302443521, "learning_rate": 4.3438021510578016e-07, "loss": 0.4782, "step": 31966 }, { "epoch": 0.555667576352796, "grad_norm": 1.8513863342511911, "learning_rate": 4.3435230934559085e-07, "loss": 0.2186, "step": 31967 }, { "epoch": 0.5556849588902988, "grad_norm": 2.6220253329111967, "learning_rate": 4.3432440379347476e-07, "loss": 0.1824, "step": 31968 }, { "epoch": 0.5557023414278016, "grad_norm": 1.611142810432681, "learning_rate": 4.3429649844952e-07, "loss": 0.2222, "step": 31969 }, { "epoch": 0.5557197239653044, "grad_norm": 1.4514476663176534, "learning_rate": 4.342685933138153e-07, "loss": 0.2265, "step": 31970 }, { "epoch": 0.5557371065028073, "grad_norm": 1.7454042810823345, "learning_rate": 4.34240688386449e-07, "loss": 0.2602, "step": 31971 }, { "epoch": 0.5557544890403101, "grad_norm": 1.8680045304147812, "learning_rate": 4.3421278366750945e-07, "loss": 0.3067, "step": 31972 }, { "epoch": 0.5557718715778129, "grad_norm": 1.2434650192429288, "learning_rate": 4.34184879157085e-07, "loss": 0.1793, "step": 31973 }, { "epoch": 0.5557892541153158, "grad_norm": 1.5467049803832278, "learning_rate": 4.341569748552644e-07, "loss": 0.1827, "step": 31974 }, { "epoch": 0.5558066366528186, "grad_norm": 1.6998501544691256, "learning_rate": 4.3412907076213584e-07, "loss": 0.3267, "step": 31975 }, { "epoch": 0.5558240191903214, "grad_norm": 16.358658757796217, "learning_rate": 4.3410116687778797e-07, "loss": 0.2987, "step": 31976 }, { "epoch": 0.5558414017278243, "grad_norm": 1.373326860058201, "learning_rate": 4.3407326320230927e-07, "loss": 0.2782, "step": 31977 }, { "epoch": 0.5558587842653271, "grad_norm": 1.3747143490794562, "learning_rate": 4.340453597357879e-07, "loss": 0.2041, "step": 31978 }, { "epoch": 0.5558761668028299, "grad_norm": 1.08775398499775, "learning_rate": 4.340174564783123e-07, "loss": 0.2599, "step": 31979 }, { "epoch": 0.5558935493403327, "grad_norm": 1.4116779961540395, "learning_rate": 4.339895534299712e-07, "loss": 0.3964, "step": 31980 }, { "epoch": 0.5559109318778356, "grad_norm": 1.5190078575125379, "learning_rate": 4.3396165059085276e-07, "loss": 0.3004, "step": 31981 }, { "epoch": 0.5559283144153383, "grad_norm": 3.1607129190413685, "learning_rate": 4.339337479610457e-07, "loss": 0.319, "step": 31982 }, { "epoch": 0.5559456969528411, "grad_norm": 0.90876194881788, "learning_rate": 4.339058455406381e-07, "loss": 0.1423, "step": 31983 }, { "epoch": 0.555963079490344, "grad_norm": 1.813876649335057, "learning_rate": 4.338779433297189e-07, "loss": 0.1814, "step": 31984 }, { "epoch": 0.5559804620278468, "grad_norm": 1.509091730619552, "learning_rate": 4.338500413283762e-07, "loss": 0.2639, "step": 31985 }, { "epoch": 0.5559978445653496, "grad_norm": 1.404334685883121, "learning_rate": 4.338221395366984e-07, "loss": 0.2215, "step": 31986 }, { "epoch": 0.5560152271028524, "grad_norm": 1.5973599457033814, "learning_rate": 4.33794237954774e-07, "loss": 0.1801, "step": 31987 }, { "epoch": 0.5560326096403553, "grad_norm": 1.273315946022589, "learning_rate": 4.337663365826915e-07, "loss": 0.1664, "step": 31988 }, { "epoch": 0.5560499921778581, "grad_norm": 1.436261554587156, "learning_rate": 4.337384354205391e-07, "loss": 0.2451, "step": 31989 }, { "epoch": 0.5560673747153609, "grad_norm": 1.287417265680311, "learning_rate": 4.337105344684058e-07, "loss": 0.3675, "step": 31990 }, { "epoch": 0.5560847572528638, "grad_norm": 1.6922283628060912, "learning_rate": 4.3368263372637943e-07, "loss": 0.2273, "step": 31991 }, { "epoch": 0.5561021397903666, "grad_norm": 1.7679932144983106, "learning_rate": 4.336547331945486e-07, "loss": 0.2733, "step": 31992 }, { "epoch": 0.5561195223278694, "grad_norm": 1.0915235790312294, "learning_rate": 4.3362683287300186e-07, "loss": 0.3682, "step": 31993 }, { "epoch": 0.5561369048653723, "grad_norm": 1.6124204565253437, "learning_rate": 4.335989327618276e-07, "loss": 0.2291, "step": 31994 }, { "epoch": 0.5561542874028751, "grad_norm": 1.4074152521390781, "learning_rate": 4.33571032861114e-07, "loss": 0.191, "step": 31995 }, { "epoch": 0.5561716699403779, "grad_norm": 1.5204106133764947, "learning_rate": 4.3354313317095003e-07, "loss": 0.2423, "step": 31996 }, { "epoch": 0.5561890524778808, "grad_norm": 1.9969885912443597, "learning_rate": 4.3351523369142373e-07, "loss": 0.3301, "step": 31997 }, { "epoch": 0.5562064350153836, "grad_norm": 2.0470135058348053, "learning_rate": 4.334873344226234e-07, "loss": 0.3411, "step": 31998 }, { "epoch": 0.5562238175528864, "grad_norm": 1.7116146792287277, "learning_rate": 4.334594353646378e-07, "loss": 0.2268, "step": 31999 }, { "epoch": 0.5562412000903892, "grad_norm": 2.4368994580661485, "learning_rate": 4.3343153651755513e-07, "loss": 0.2091, "step": 32000 }, { "epoch": 0.5562585826278921, "grad_norm": 3.1252820884405996, "learning_rate": 4.3340363788146406e-07, "loss": 0.3033, "step": 32001 }, { "epoch": 0.5562759651653948, "grad_norm": 1.1810663720155086, "learning_rate": 4.333757394564527e-07, "loss": 0.2168, "step": 32002 }, { "epoch": 0.5562933477028976, "grad_norm": 1.14964034053932, "learning_rate": 4.3334784124261e-07, "loss": 0.1291, "step": 32003 }, { "epoch": 0.5563107302404005, "grad_norm": 0.746754435513176, "learning_rate": 4.3331994324002364e-07, "loss": 0.2893, "step": 32004 }, { "epoch": 0.5563281127779033, "grad_norm": 1.5991934587901517, "learning_rate": 4.332920454487826e-07, "loss": 0.1681, "step": 32005 }, { "epoch": 0.5563454953154061, "grad_norm": 1.275877225092891, "learning_rate": 4.3326414786897503e-07, "loss": 0.3659, "step": 32006 }, { "epoch": 0.5563628778529089, "grad_norm": 2.0196609820469758, "learning_rate": 4.332362505006895e-07, "loss": 0.2284, "step": 32007 }, { "epoch": 0.5563802603904118, "grad_norm": 1.8428093113642534, "learning_rate": 4.3320835334401434e-07, "loss": 0.3075, "step": 32008 }, { "epoch": 0.5563976429279146, "grad_norm": 1.0976205897962996, "learning_rate": 4.331804563990382e-07, "loss": 0.4015, "step": 32009 }, { "epoch": 0.5564150254654174, "grad_norm": 1.4174049265019366, "learning_rate": 4.331525596658492e-07, "loss": 0.2576, "step": 32010 }, { "epoch": 0.5564324080029203, "grad_norm": 1.2785263211061089, "learning_rate": 4.3312466314453593e-07, "loss": 0.2108, "step": 32011 }, { "epoch": 0.5564497905404231, "grad_norm": 2.1228363850495615, "learning_rate": 4.3309676683518666e-07, "loss": 0.1597, "step": 32012 }, { "epoch": 0.5564671730779259, "grad_norm": 2.5929405489486994, "learning_rate": 4.330688707378899e-07, "loss": 0.2844, "step": 32013 }, { "epoch": 0.5564845556154288, "grad_norm": 1.6840723411172023, "learning_rate": 4.330409748527341e-07, "loss": 0.3831, "step": 32014 }, { "epoch": 0.5565019381529316, "grad_norm": 1.620401800121074, "learning_rate": 4.3301307917980777e-07, "loss": 0.5345, "step": 32015 }, { "epoch": 0.5565193206904344, "grad_norm": 1.5804214976677418, "learning_rate": 4.3298518371919917e-07, "loss": 0.2513, "step": 32016 }, { "epoch": 0.5565367032279372, "grad_norm": 1.7263776289728605, "learning_rate": 4.3295728847099663e-07, "loss": 0.3796, "step": 32017 }, { "epoch": 0.5565540857654401, "grad_norm": 1.7958998286569485, "learning_rate": 4.3292939343528877e-07, "loss": 0.3975, "step": 32018 }, { "epoch": 0.5565714683029429, "grad_norm": 2.075763420142561, "learning_rate": 4.3290149861216395e-07, "loss": 0.2445, "step": 32019 }, { "epoch": 0.5565888508404457, "grad_norm": 1.7072132009658065, "learning_rate": 4.328736040017104e-07, "loss": 0.2004, "step": 32020 }, { "epoch": 0.5566062333779486, "grad_norm": 3.27553073210956, "learning_rate": 4.328457096040168e-07, "loss": 0.2011, "step": 32021 }, { "epoch": 0.5566236159154513, "grad_norm": 1.4496363352149768, "learning_rate": 4.328178154191716e-07, "loss": 0.2808, "step": 32022 }, { "epoch": 0.5566409984529541, "grad_norm": 1.5374665600831126, "learning_rate": 4.327899214472629e-07, "loss": 0.2743, "step": 32023 }, { "epoch": 0.556658380990457, "grad_norm": 2.049582831203413, "learning_rate": 4.3276202768837935e-07, "loss": 0.2666, "step": 32024 }, { "epoch": 0.5566757635279598, "grad_norm": 0.9004240041101307, "learning_rate": 4.3273413414260906e-07, "loss": 0.2807, "step": 32025 }, { "epoch": 0.5566931460654626, "grad_norm": 1.3122472960620126, "learning_rate": 4.3270624081004093e-07, "loss": 0.3584, "step": 32026 }, { "epoch": 0.5567105286029654, "grad_norm": 1.1574350255440535, "learning_rate": 4.32678347690763e-07, "loss": 0.1745, "step": 32027 }, { "epoch": 0.5567279111404683, "grad_norm": 1.2788521612950898, "learning_rate": 4.3265045478486396e-07, "loss": 0.2496, "step": 32028 }, { "epoch": 0.5567452936779711, "grad_norm": 2.1045355473645793, "learning_rate": 4.326225620924318e-07, "loss": 0.4566, "step": 32029 }, { "epoch": 0.5567626762154739, "grad_norm": 0.7219637266628406, "learning_rate": 4.3259466961355523e-07, "loss": 0.1502, "step": 32030 }, { "epoch": 0.5567800587529768, "grad_norm": 1.1750310705651847, "learning_rate": 4.325667773483225e-07, "loss": 0.2611, "step": 32031 }, { "epoch": 0.5567974412904796, "grad_norm": 1.54308717804466, "learning_rate": 4.325388852968223e-07, "loss": 0.2464, "step": 32032 }, { "epoch": 0.5568148238279824, "grad_norm": 1.551388545169021, "learning_rate": 4.325109934591427e-07, "loss": 0.1498, "step": 32033 }, { "epoch": 0.5568322063654852, "grad_norm": 2.335641420549622, "learning_rate": 4.324831018353725e-07, "loss": 0.3039, "step": 32034 }, { "epoch": 0.5568495889029881, "grad_norm": 1.52618477410981, "learning_rate": 4.3245521042559963e-07, "loss": 0.2858, "step": 32035 }, { "epoch": 0.5568669714404909, "grad_norm": 5.637012033109575, "learning_rate": 4.324273192299127e-07, "loss": 0.335, "step": 32036 }, { "epoch": 0.5568843539779937, "grad_norm": 1.0263265816183538, "learning_rate": 4.3239942824840016e-07, "loss": 0.2282, "step": 32037 }, { "epoch": 0.5569017365154966, "grad_norm": 1.5131230000486484, "learning_rate": 4.323715374811504e-07, "loss": 0.2663, "step": 32038 }, { "epoch": 0.5569191190529994, "grad_norm": 1.8470470455232335, "learning_rate": 4.323436469282517e-07, "loss": 0.2509, "step": 32039 }, { "epoch": 0.5569365015905022, "grad_norm": 2.0698462049572766, "learning_rate": 4.323157565897928e-07, "loss": 0.4019, "step": 32040 }, { "epoch": 0.5569538841280051, "grad_norm": 1.305244957865121, "learning_rate": 4.322878664658617e-07, "loss": 0.2197, "step": 32041 }, { "epoch": 0.5569712666655078, "grad_norm": 1.583717305793509, "learning_rate": 4.322599765565471e-07, "loss": 0.2702, "step": 32042 }, { "epoch": 0.5569886492030106, "grad_norm": 1.6275013172657755, "learning_rate": 4.32232086861937e-07, "loss": 0.2126, "step": 32043 }, { "epoch": 0.5570060317405134, "grad_norm": 1.2988187797703776, "learning_rate": 4.322041973821202e-07, "loss": 0.1782, "step": 32044 }, { "epoch": 0.5570234142780163, "grad_norm": 1.5239190139063354, "learning_rate": 4.321763081171849e-07, "loss": 0.2342, "step": 32045 }, { "epoch": 0.5570407968155191, "grad_norm": 0.8056903602442108, "learning_rate": 4.321484190672195e-07, "loss": 0.2483, "step": 32046 }, { "epoch": 0.5570581793530219, "grad_norm": 2.656615857168311, "learning_rate": 4.3212053023231276e-07, "loss": 0.2994, "step": 32047 }, { "epoch": 0.5570755618905248, "grad_norm": 2.5654390815543517, "learning_rate": 4.320926416125524e-07, "loss": 0.3538, "step": 32048 }, { "epoch": 0.5570929444280276, "grad_norm": 1.2287994871885048, "learning_rate": 4.320647532080273e-07, "loss": 0.2134, "step": 32049 }, { "epoch": 0.5571103269655304, "grad_norm": 1.405604727462543, "learning_rate": 4.320368650188257e-07, "loss": 0.2728, "step": 32050 }, { "epoch": 0.5571277095030333, "grad_norm": 5.8002752176184025, "learning_rate": 4.32008977045036e-07, "loss": 0.4083, "step": 32051 }, { "epoch": 0.5571450920405361, "grad_norm": 3.282074094089558, "learning_rate": 4.319810892867467e-07, "loss": 0.3287, "step": 32052 }, { "epoch": 0.5571624745780389, "grad_norm": 1.9975522293718173, "learning_rate": 4.319532017440462e-07, "loss": 0.1685, "step": 32053 }, { "epoch": 0.5571798571155417, "grad_norm": 1.5492364031154569, "learning_rate": 4.3192531441702256e-07, "loss": 0.252, "step": 32054 }, { "epoch": 0.5571972396530446, "grad_norm": 1.3936168928719872, "learning_rate": 4.3189742730576453e-07, "loss": 0.2008, "step": 32055 }, { "epoch": 0.5572146221905474, "grad_norm": 1.6804218999409377, "learning_rate": 4.318695404103603e-07, "loss": 0.2549, "step": 32056 }, { "epoch": 0.5572320047280502, "grad_norm": 1.395619706018672, "learning_rate": 4.318416537308983e-07, "loss": 0.2614, "step": 32057 }, { "epoch": 0.5572493872655531, "grad_norm": 1.3876184411101133, "learning_rate": 4.3181376726746696e-07, "loss": 0.2453, "step": 32058 }, { "epoch": 0.5572667698030559, "grad_norm": 1.0990821149608614, "learning_rate": 4.3178588102015495e-07, "loss": 0.1997, "step": 32059 }, { "epoch": 0.5572841523405587, "grad_norm": 3.003502394534776, "learning_rate": 4.3175799498905e-07, "loss": 0.4959, "step": 32060 }, { "epoch": 0.5573015348780614, "grad_norm": 1.149698242439531, "learning_rate": 4.31730109174241e-07, "loss": 0.2315, "step": 32061 }, { "epoch": 0.5573189174155643, "grad_norm": 1.6208100088549935, "learning_rate": 4.317022235758161e-07, "loss": 0.2014, "step": 32062 }, { "epoch": 0.5573362999530671, "grad_norm": 1.626431034684017, "learning_rate": 4.316743381938639e-07, "loss": 0.4102, "step": 32063 }, { "epoch": 0.5573536824905699, "grad_norm": 0.8666602593633999, "learning_rate": 4.316464530284725e-07, "loss": 0.2803, "step": 32064 }, { "epoch": 0.5573710650280728, "grad_norm": 2.3456440700412244, "learning_rate": 4.3161856807973057e-07, "loss": 0.2188, "step": 32065 }, { "epoch": 0.5573884475655756, "grad_norm": 1.160310616214285, "learning_rate": 4.315906833477265e-07, "loss": 0.1572, "step": 32066 }, { "epoch": 0.5574058301030784, "grad_norm": 1.1543888622607466, "learning_rate": 4.3156279883254844e-07, "loss": 0.2604, "step": 32067 }, { "epoch": 0.5574232126405813, "grad_norm": 1.0286341201746185, "learning_rate": 4.315349145342847e-07, "loss": 0.479, "step": 32068 }, { "epoch": 0.5574405951780841, "grad_norm": 1.4387502928090292, "learning_rate": 4.31507030453024e-07, "loss": 0.115, "step": 32069 }, { "epoch": 0.5574579777155869, "grad_norm": 1.2127526743161867, "learning_rate": 4.314791465888544e-07, "loss": 0.1748, "step": 32070 }, { "epoch": 0.5574753602530897, "grad_norm": 1.236397850688821, "learning_rate": 4.3145126294186457e-07, "loss": 0.1557, "step": 32071 }, { "epoch": 0.5574927427905926, "grad_norm": 1.850974889232219, "learning_rate": 4.314233795121428e-07, "loss": 0.3315, "step": 32072 }, { "epoch": 0.5575101253280954, "grad_norm": 1.8606274312910511, "learning_rate": 4.313954962997773e-07, "loss": 0.2045, "step": 32073 }, { "epoch": 0.5575275078655982, "grad_norm": 1.9987557212103817, "learning_rate": 4.313676133048566e-07, "loss": 0.363, "step": 32074 }, { "epoch": 0.5575448904031011, "grad_norm": 0.9621017114132445, "learning_rate": 4.3133973052746904e-07, "loss": 0.2921, "step": 32075 }, { "epoch": 0.5575622729406039, "grad_norm": 1.9824732665240967, "learning_rate": 4.3131184796770295e-07, "loss": 0.4061, "step": 32076 }, { "epoch": 0.5575796554781067, "grad_norm": 1.0805011177077541, "learning_rate": 4.312839656256467e-07, "loss": 0.2816, "step": 32077 }, { "epoch": 0.5575970380156096, "grad_norm": 2.618269496736317, "learning_rate": 4.3125608350138897e-07, "loss": 0.3139, "step": 32078 }, { "epoch": 0.5576144205531124, "grad_norm": 2.1667610992506434, "learning_rate": 4.312282015950176e-07, "loss": 0.3435, "step": 32079 }, { "epoch": 0.5576318030906152, "grad_norm": 1.157820992118991, "learning_rate": 4.312003199066214e-07, "loss": 0.218, "step": 32080 }, { "epoch": 0.5576491856281179, "grad_norm": 2.6041433818173547, "learning_rate": 4.311724384362884e-07, "loss": 0.2573, "step": 32081 }, { "epoch": 0.5576665681656208, "grad_norm": 1.420260178069614, "learning_rate": 4.311445571841073e-07, "loss": 0.3525, "step": 32082 }, { "epoch": 0.5576839507031236, "grad_norm": 4.206993527628412, "learning_rate": 4.311166761501663e-07, "loss": 0.3335, "step": 32083 }, { "epoch": 0.5577013332406264, "grad_norm": 1.3424899985589624, "learning_rate": 4.310887953345537e-07, "loss": 0.224, "step": 32084 }, { "epoch": 0.5577187157781293, "grad_norm": 3.3223473129253174, "learning_rate": 4.3106091473735825e-07, "loss": 0.3791, "step": 32085 }, { "epoch": 0.5577360983156321, "grad_norm": 2.0755464381525925, "learning_rate": 4.3103303435866785e-07, "loss": 0.3707, "step": 32086 }, { "epoch": 0.5577534808531349, "grad_norm": 1.6286388713912383, "learning_rate": 4.31005154198571e-07, "loss": 0.3167, "step": 32087 }, { "epoch": 0.5577708633906378, "grad_norm": 2.6975804640475625, "learning_rate": 4.3097727425715616e-07, "loss": 0.2403, "step": 32088 }, { "epoch": 0.5577882459281406, "grad_norm": 1.7328301888516802, "learning_rate": 4.309493945345115e-07, "loss": 0.2294, "step": 32089 }, { "epoch": 0.5578056284656434, "grad_norm": 0.8406854579952633, "learning_rate": 4.309215150307258e-07, "loss": 0.4192, "step": 32090 }, { "epoch": 0.5578230110031462, "grad_norm": 1.2911271579802768, "learning_rate": 4.308936357458871e-07, "loss": 0.151, "step": 32091 }, { "epoch": 0.5578403935406491, "grad_norm": 2.074146576687514, "learning_rate": 4.3086575668008387e-07, "loss": 0.3451, "step": 32092 }, { "epoch": 0.5578577760781519, "grad_norm": 1.9540999211671068, "learning_rate": 4.3083787783340423e-07, "loss": 0.2726, "step": 32093 }, { "epoch": 0.5578751586156547, "grad_norm": 2.1627025121626944, "learning_rate": 4.308099992059369e-07, "loss": 0.4071, "step": 32094 }, { "epoch": 0.5578925411531576, "grad_norm": 4.7633618338856225, "learning_rate": 4.3078212079777e-07, "loss": 0.2401, "step": 32095 }, { "epoch": 0.5579099236906604, "grad_norm": 2.16060392134137, "learning_rate": 4.30754242608992e-07, "loss": 0.2166, "step": 32096 }, { "epoch": 0.5579273062281632, "grad_norm": 0.866823305149495, "learning_rate": 4.307263646396915e-07, "loss": 0.2756, "step": 32097 }, { "epoch": 0.557944688765666, "grad_norm": 1.3742801685051085, "learning_rate": 4.3069848688995626e-07, "loss": 0.186, "step": 32098 }, { "epoch": 0.5579620713031689, "grad_norm": 1.8117158046917776, "learning_rate": 4.306706093598751e-07, "loss": 0.4734, "step": 32099 }, { "epoch": 0.5579794538406717, "grad_norm": 1.1112983918481487, "learning_rate": 4.306427320495363e-07, "loss": 0.3061, "step": 32100 }, { "epoch": 0.5579968363781744, "grad_norm": 1.6928697692613257, "learning_rate": 4.3061485495902806e-07, "loss": 0.2903, "step": 32101 }, { "epoch": 0.5580142189156773, "grad_norm": 1.2663385866261054, "learning_rate": 4.3058697808843896e-07, "loss": 0.1915, "step": 32102 }, { "epoch": 0.5580316014531801, "grad_norm": 1.46776797945745, "learning_rate": 4.3055910143785715e-07, "loss": 0.2887, "step": 32103 }, { "epoch": 0.5580489839906829, "grad_norm": 1.149395069390989, "learning_rate": 4.3053122500737133e-07, "loss": 0.2974, "step": 32104 }, { "epoch": 0.5580663665281858, "grad_norm": 1.935203202744514, "learning_rate": 4.3050334879706946e-07, "loss": 0.4294, "step": 32105 }, { "epoch": 0.5580837490656886, "grad_norm": 1.8626701059584723, "learning_rate": 4.3047547280703993e-07, "loss": 0.3361, "step": 32106 }, { "epoch": 0.5581011316031914, "grad_norm": 1.2292374504225638, "learning_rate": 4.3044759703737137e-07, "loss": 0.2133, "step": 32107 }, { "epoch": 0.5581185141406942, "grad_norm": 1.44531989233741, "learning_rate": 4.30419721488152e-07, "loss": 0.1877, "step": 32108 }, { "epoch": 0.5581358966781971, "grad_norm": 1.9077627842510643, "learning_rate": 4.3039184615947e-07, "loss": 0.2926, "step": 32109 }, { "epoch": 0.5581532792156999, "grad_norm": 2.1842397902823043, "learning_rate": 4.303639710514141e-07, "loss": 0.3532, "step": 32110 }, { "epoch": 0.5581706617532027, "grad_norm": 1.1134468109256044, "learning_rate": 4.303360961640723e-07, "loss": 0.1793, "step": 32111 }, { "epoch": 0.5581880442907056, "grad_norm": 0.6676765479251118, "learning_rate": 4.3030822149753296e-07, "loss": 0.1901, "step": 32112 }, { "epoch": 0.5582054268282084, "grad_norm": 2.2893747382498773, "learning_rate": 4.3028034705188463e-07, "loss": 0.2636, "step": 32113 }, { "epoch": 0.5582228093657112, "grad_norm": 1.819277677388522, "learning_rate": 4.3025247282721555e-07, "loss": 0.5151, "step": 32114 }, { "epoch": 0.5582401919032141, "grad_norm": 1.2792328614703057, "learning_rate": 4.302245988236142e-07, "loss": 0.3504, "step": 32115 }, { "epoch": 0.5582575744407169, "grad_norm": 1.9889299717452407, "learning_rate": 4.301967250411688e-07, "loss": 0.482, "step": 32116 }, { "epoch": 0.5582749569782197, "grad_norm": 2.2462197662337204, "learning_rate": 4.3016885147996773e-07, "loss": 0.1952, "step": 32117 }, { "epoch": 0.5582923395157225, "grad_norm": 1.6966486188183048, "learning_rate": 4.301409781400992e-07, "loss": 0.2204, "step": 32118 }, { "epoch": 0.5583097220532254, "grad_norm": 1.7124444880066167, "learning_rate": 4.3011310502165177e-07, "loss": 0.1906, "step": 32119 }, { "epoch": 0.5583271045907282, "grad_norm": 1.193418687799938, "learning_rate": 4.3008523212471356e-07, "loss": 0.2294, "step": 32120 }, { "epoch": 0.5583444871282309, "grad_norm": 1.6539261926224953, "learning_rate": 4.3005735944937314e-07, "loss": 0.194, "step": 32121 }, { "epoch": 0.5583618696657338, "grad_norm": 1.1629948543180069, "learning_rate": 4.300294869957187e-07, "loss": 0.3627, "step": 32122 }, { "epoch": 0.5583792522032366, "grad_norm": 1.419135838148717, "learning_rate": 4.3000161476383895e-07, "loss": 0.2437, "step": 32123 }, { "epoch": 0.5583966347407394, "grad_norm": 1.8473642949180258, "learning_rate": 4.2997374275382163e-07, "loss": 0.2405, "step": 32124 }, { "epoch": 0.5584140172782422, "grad_norm": 1.8636700708507552, "learning_rate": 4.299458709657555e-07, "loss": 0.3003, "step": 32125 }, { "epoch": 0.5584313998157451, "grad_norm": 2.6472651274397916, "learning_rate": 4.2991799939972863e-07, "loss": 0.3208, "step": 32126 }, { "epoch": 0.5584487823532479, "grad_norm": 1.4994555224161106, "learning_rate": 4.2989012805582956e-07, "loss": 0.2313, "step": 32127 }, { "epoch": 0.5584661648907507, "grad_norm": 2.5440583820260225, "learning_rate": 4.2986225693414657e-07, "loss": 0.2935, "step": 32128 }, { "epoch": 0.5584835474282536, "grad_norm": 1.7825539810045061, "learning_rate": 4.2983438603476817e-07, "loss": 0.2974, "step": 32129 }, { "epoch": 0.5585009299657564, "grad_norm": 1.2736685674854658, "learning_rate": 4.2980651535778245e-07, "loss": 0.1716, "step": 32130 }, { "epoch": 0.5585183125032592, "grad_norm": 1.658452358267574, "learning_rate": 4.297786449032778e-07, "loss": 0.2146, "step": 32131 }, { "epoch": 0.5585356950407621, "grad_norm": 1.381547057352183, "learning_rate": 4.2975077467134253e-07, "loss": 0.2351, "step": 32132 }, { "epoch": 0.5585530775782649, "grad_norm": 1.399575418927811, "learning_rate": 4.297229046620651e-07, "loss": 0.266, "step": 32133 }, { "epoch": 0.5585704601157677, "grad_norm": 2.562771885517361, "learning_rate": 4.2969503487553373e-07, "loss": 0.3323, "step": 32134 }, { "epoch": 0.5585878426532705, "grad_norm": 2.7811890566759625, "learning_rate": 4.296671653118369e-07, "loss": 0.4229, "step": 32135 }, { "epoch": 0.5586052251907734, "grad_norm": 1.4106616167050696, "learning_rate": 4.2963929597106283e-07, "loss": 0.252, "step": 32136 }, { "epoch": 0.5586226077282762, "grad_norm": 3.092958794590858, "learning_rate": 4.2961142685329975e-07, "loss": 0.3268, "step": 32137 }, { "epoch": 0.558639990265779, "grad_norm": 1.4985155693999912, "learning_rate": 4.2958355795863626e-07, "loss": 0.1665, "step": 32138 }, { "epoch": 0.5586573728032819, "grad_norm": 1.0641862248613616, "learning_rate": 4.295556892871604e-07, "loss": 0.1721, "step": 32139 }, { "epoch": 0.5586747553407847, "grad_norm": 1.0741736601395715, "learning_rate": 4.2952782083896077e-07, "loss": 0.1573, "step": 32140 }, { "epoch": 0.5586921378782874, "grad_norm": 1.3634889982039993, "learning_rate": 4.2949995261412553e-07, "loss": 0.2071, "step": 32141 }, { "epoch": 0.5587095204157903, "grad_norm": 1.8011158103365974, "learning_rate": 4.294720846127432e-07, "loss": 0.2075, "step": 32142 }, { "epoch": 0.5587269029532931, "grad_norm": 3.197160745951598, "learning_rate": 4.2944421683490175e-07, "loss": 0.3357, "step": 32143 }, { "epoch": 0.5587442854907959, "grad_norm": 1.8800819159203848, "learning_rate": 4.294163492806898e-07, "loss": 0.2143, "step": 32144 }, { "epoch": 0.5587616680282987, "grad_norm": 1.4945294623152097, "learning_rate": 4.293884819501955e-07, "loss": 0.2216, "step": 32145 }, { "epoch": 0.5587790505658016, "grad_norm": 1.081795289175955, "learning_rate": 4.2936061484350736e-07, "loss": 0.2643, "step": 32146 }, { "epoch": 0.5587964331033044, "grad_norm": 1.603301828162939, "learning_rate": 4.293327479607136e-07, "loss": 0.2707, "step": 32147 }, { "epoch": 0.5588138156408072, "grad_norm": 1.4361730772172288, "learning_rate": 4.2930488130190285e-07, "loss": 0.31, "step": 32148 }, { "epoch": 0.5588311981783101, "grad_norm": 1.2017409144501578, "learning_rate": 4.2927701486716285e-07, "loss": 0.2905, "step": 32149 }, { "epoch": 0.5588485807158129, "grad_norm": 2.0306308373075836, "learning_rate": 4.2924914865658224e-07, "loss": 0.3405, "step": 32150 }, { "epoch": 0.5588659632533157, "grad_norm": 2.530781511800057, "learning_rate": 4.2922128267024925e-07, "loss": 0.2583, "step": 32151 }, { "epoch": 0.5588833457908186, "grad_norm": 1.3665821132266076, "learning_rate": 4.2919341690825246e-07, "loss": 0.1933, "step": 32152 }, { "epoch": 0.5589007283283214, "grad_norm": 1.216546174902165, "learning_rate": 4.291655513706799e-07, "loss": 0.2108, "step": 32153 }, { "epoch": 0.5589181108658242, "grad_norm": 1.8725048879292854, "learning_rate": 4.291376860576201e-07, "loss": 0.4489, "step": 32154 }, { "epoch": 0.558935493403327, "grad_norm": 2.815990106643582, "learning_rate": 4.291098209691613e-07, "loss": 0.2041, "step": 32155 }, { "epoch": 0.5589528759408299, "grad_norm": 1.7764931316572696, "learning_rate": 4.290819561053918e-07, "loss": 0.2318, "step": 32156 }, { "epoch": 0.5589702584783327, "grad_norm": 1.476476452711488, "learning_rate": 4.2905409146639975e-07, "loss": 0.281, "step": 32157 }, { "epoch": 0.5589876410158355, "grad_norm": 1.7329355417546977, "learning_rate": 4.2902622705227373e-07, "loss": 0.143, "step": 32158 }, { "epoch": 0.5590050235533384, "grad_norm": 2.073179199728256, "learning_rate": 4.2899836286310193e-07, "loss": 0.28, "step": 32159 }, { "epoch": 0.5590224060908412, "grad_norm": 1.5475975043973422, "learning_rate": 4.289704988989729e-07, "loss": 0.1919, "step": 32160 }, { "epoch": 0.5590397886283439, "grad_norm": 1.2095539376854887, "learning_rate": 4.289426351599746e-07, "loss": 0.2151, "step": 32161 }, { "epoch": 0.5590571711658467, "grad_norm": 1.5892124426901164, "learning_rate": 4.289147716461955e-07, "loss": 0.2252, "step": 32162 }, { "epoch": 0.5590745537033496, "grad_norm": 1.56933635630095, "learning_rate": 4.2888690835772396e-07, "loss": 0.3696, "step": 32163 }, { "epoch": 0.5590919362408524, "grad_norm": 1.4908396002651108, "learning_rate": 4.2885904529464827e-07, "loss": 0.3987, "step": 32164 }, { "epoch": 0.5591093187783552, "grad_norm": 1.0802263472956226, "learning_rate": 4.288311824570566e-07, "loss": 0.1169, "step": 32165 }, { "epoch": 0.5591267013158581, "grad_norm": 1.822140364364203, "learning_rate": 4.2880331984503755e-07, "loss": 0.2771, "step": 32166 }, { "epoch": 0.5591440838533609, "grad_norm": 1.6727536535228913, "learning_rate": 4.2877545745867934e-07, "loss": 0.3247, "step": 32167 }, { "epoch": 0.5591614663908637, "grad_norm": 1.3380927967916845, "learning_rate": 4.2874759529807e-07, "loss": 0.1807, "step": 32168 }, { "epoch": 0.5591788489283666, "grad_norm": 1.6396060576771327, "learning_rate": 4.287197333632981e-07, "loss": 0.2214, "step": 32169 }, { "epoch": 0.5591962314658694, "grad_norm": 3.550109807527199, "learning_rate": 4.286918716544519e-07, "loss": 0.203, "step": 32170 }, { "epoch": 0.5592136140033722, "grad_norm": 1.192429596601985, "learning_rate": 4.2866401017161974e-07, "loss": 0.1572, "step": 32171 }, { "epoch": 0.559230996540875, "grad_norm": 2.432573018999169, "learning_rate": 4.2863614891489e-07, "loss": 0.2621, "step": 32172 }, { "epoch": 0.5592483790783779, "grad_norm": 1.1788347783892297, "learning_rate": 4.286082878843509e-07, "loss": 0.153, "step": 32173 }, { "epoch": 0.5592657616158807, "grad_norm": 1.7758195578524505, "learning_rate": 4.285804270800905e-07, "loss": 0.183, "step": 32174 }, { "epoch": 0.5592831441533835, "grad_norm": 0.8513701751633028, "learning_rate": 4.285525665021975e-07, "loss": 0.1191, "step": 32175 }, { "epoch": 0.5593005266908864, "grad_norm": 3.8681087651043375, "learning_rate": 4.285247061507599e-07, "loss": 0.2136, "step": 32176 }, { "epoch": 0.5593179092283892, "grad_norm": 1.919404811255285, "learning_rate": 4.2849684602586627e-07, "loss": 0.3691, "step": 32177 }, { "epoch": 0.559335291765892, "grad_norm": 2.113661336740331, "learning_rate": 4.284689861276047e-07, "loss": 0.242, "step": 32178 }, { "epoch": 0.5593526743033949, "grad_norm": 1.4396568449268659, "learning_rate": 4.2844112645606387e-07, "loss": 0.2399, "step": 32179 }, { "epoch": 0.5593700568408977, "grad_norm": 1.7478328111075379, "learning_rate": 4.2841326701133156e-07, "loss": 0.2352, "step": 32180 }, { "epoch": 0.5593874393784004, "grad_norm": 1.2644855333811227, "learning_rate": 4.2838540779349634e-07, "loss": 0.1658, "step": 32181 }, { "epoch": 0.5594048219159032, "grad_norm": 2.651435218847825, "learning_rate": 4.283575488026464e-07, "loss": 0.4172, "step": 32182 }, { "epoch": 0.5594222044534061, "grad_norm": 1.6010730920221048, "learning_rate": 4.283296900388702e-07, "loss": 0.3182, "step": 32183 }, { "epoch": 0.5594395869909089, "grad_norm": 1.5261991547484022, "learning_rate": 4.2830183150225593e-07, "loss": 0.358, "step": 32184 }, { "epoch": 0.5594569695284117, "grad_norm": 1.7047479033585378, "learning_rate": 4.282739731928919e-07, "loss": 0.3296, "step": 32185 }, { "epoch": 0.5594743520659146, "grad_norm": 2.0928158024006653, "learning_rate": 4.282461151108667e-07, "loss": 0.353, "step": 32186 }, { "epoch": 0.5594917346034174, "grad_norm": 1.5879751020117232, "learning_rate": 4.2821825725626805e-07, "loss": 0.2682, "step": 32187 }, { "epoch": 0.5595091171409202, "grad_norm": 2.1025713589574413, "learning_rate": 4.281903996291846e-07, "loss": 0.4034, "step": 32188 }, { "epoch": 0.559526499678423, "grad_norm": 1.8770965991297774, "learning_rate": 4.2816254222970464e-07, "loss": 0.2206, "step": 32189 }, { "epoch": 0.5595438822159259, "grad_norm": 1.5303047893005455, "learning_rate": 4.281346850579163e-07, "loss": 0.1839, "step": 32190 }, { "epoch": 0.5595612647534287, "grad_norm": 1.0914188693698492, "learning_rate": 4.281068281139082e-07, "loss": 0.19, "step": 32191 }, { "epoch": 0.5595786472909315, "grad_norm": 1.5806147926663263, "learning_rate": 4.2807897139776845e-07, "loss": 0.1764, "step": 32192 }, { "epoch": 0.5595960298284344, "grad_norm": 1.1079960608015542, "learning_rate": 4.280511149095851e-07, "loss": 0.1846, "step": 32193 }, { "epoch": 0.5596134123659372, "grad_norm": 1.588872441425971, "learning_rate": 4.280232586494468e-07, "loss": 0.1903, "step": 32194 }, { "epoch": 0.55963079490344, "grad_norm": 1.1780569184092309, "learning_rate": 4.279954026174416e-07, "loss": 0.3128, "step": 32195 }, { "epoch": 0.5596481774409429, "grad_norm": 1.2209932356709692, "learning_rate": 4.27967546813658e-07, "loss": 0.1954, "step": 32196 }, { "epoch": 0.5596655599784457, "grad_norm": 2.0998696198954008, "learning_rate": 4.279396912381842e-07, "loss": 0.239, "step": 32197 }, { "epoch": 0.5596829425159485, "grad_norm": 1.7517628304957529, "learning_rate": 4.279118358911086e-07, "loss": 0.2106, "step": 32198 }, { "epoch": 0.5597003250534514, "grad_norm": 1.844626077785527, "learning_rate": 4.2788398077251906e-07, "loss": 0.2414, "step": 32199 }, { "epoch": 0.5597177075909541, "grad_norm": 1.7368069247128672, "learning_rate": 4.2785612588250435e-07, "loss": 0.2328, "step": 32200 }, { "epoch": 0.5597350901284569, "grad_norm": 0.9978182626309887, "learning_rate": 4.278282712211525e-07, "loss": 0.3275, "step": 32201 }, { "epoch": 0.5597524726659597, "grad_norm": 1.7587308136546993, "learning_rate": 4.278004167885519e-07, "loss": 0.2781, "step": 32202 }, { "epoch": 0.5597698552034626, "grad_norm": 3.400953402632027, "learning_rate": 4.2777256258479075e-07, "loss": 0.2236, "step": 32203 }, { "epoch": 0.5597872377409654, "grad_norm": 1.2151296797670554, "learning_rate": 4.2774470860995755e-07, "loss": 0.3064, "step": 32204 }, { "epoch": 0.5598046202784682, "grad_norm": 1.797498474295911, "learning_rate": 4.277168548641405e-07, "loss": 0.3813, "step": 32205 }, { "epoch": 0.559822002815971, "grad_norm": 1.9996790922222814, "learning_rate": 4.276890013474277e-07, "loss": 0.2021, "step": 32206 }, { "epoch": 0.5598393853534739, "grad_norm": 1.2627896815084874, "learning_rate": 4.276611480599075e-07, "loss": 0.1962, "step": 32207 }, { "epoch": 0.5598567678909767, "grad_norm": 1.965705375522369, "learning_rate": 4.2763329500166824e-07, "loss": 0.3065, "step": 32208 }, { "epoch": 0.5598741504284795, "grad_norm": 1.2601595605509288, "learning_rate": 4.2760544217279815e-07, "loss": 0.316, "step": 32209 }, { "epoch": 0.5598915329659824, "grad_norm": 1.7969464187242032, "learning_rate": 4.275775895733856e-07, "loss": 0.2479, "step": 32210 }, { "epoch": 0.5599089155034852, "grad_norm": 1.5241933239442496, "learning_rate": 4.275497372035191e-07, "loss": 0.3469, "step": 32211 }, { "epoch": 0.559926298040988, "grad_norm": 1.1766777956166745, "learning_rate": 4.2752188506328635e-07, "loss": 0.3239, "step": 32212 }, { "epoch": 0.5599436805784909, "grad_norm": 1.7877262754678738, "learning_rate": 4.274940331527759e-07, "loss": 0.1972, "step": 32213 }, { "epoch": 0.5599610631159937, "grad_norm": 1.9365848020337808, "learning_rate": 4.2746618147207615e-07, "loss": 0.2727, "step": 32214 }, { "epoch": 0.5599784456534965, "grad_norm": 2.4007075572302896, "learning_rate": 4.2743833002127514e-07, "loss": 0.2758, "step": 32215 }, { "epoch": 0.5599958281909994, "grad_norm": 1.792114774066531, "learning_rate": 4.274104788004615e-07, "loss": 0.2114, "step": 32216 }, { "epoch": 0.5600132107285022, "grad_norm": 1.600055752589957, "learning_rate": 4.273826278097233e-07, "loss": 0.3416, "step": 32217 }, { "epoch": 0.560030593266005, "grad_norm": 2.0472323306482316, "learning_rate": 4.273547770491487e-07, "loss": 0.158, "step": 32218 }, { "epoch": 0.5600479758035078, "grad_norm": 1.4952480781235624, "learning_rate": 4.273269265188261e-07, "loss": 0.2019, "step": 32219 }, { "epoch": 0.5600653583410106, "grad_norm": 1.401364753948437, "learning_rate": 4.272990762188439e-07, "loss": 0.2582, "step": 32220 }, { "epoch": 0.5600827408785134, "grad_norm": 4.013004214969305, "learning_rate": 4.2727122614928997e-07, "loss": 0.4402, "step": 32221 }, { "epoch": 0.5601001234160162, "grad_norm": 2.317552116017075, "learning_rate": 4.27243376310253e-07, "loss": 0.2669, "step": 32222 }, { "epoch": 0.5601175059535191, "grad_norm": 1.8359681516116322, "learning_rate": 4.27215526701821e-07, "loss": 0.2759, "step": 32223 }, { "epoch": 0.5601348884910219, "grad_norm": 2.6152396821084056, "learning_rate": 4.271876773240826e-07, "loss": 0.5401, "step": 32224 }, { "epoch": 0.5601522710285247, "grad_norm": 1.950406969964767, "learning_rate": 4.2715982817712563e-07, "loss": 0.4963, "step": 32225 }, { "epoch": 0.5601696535660275, "grad_norm": 1.3318334036118262, "learning_rate": 4.271319792610385e-07, "loss": 0.1827, "step": 32226 }, { "epoch": 0.5601870361035304, "grad_norm": 1.0852188779346963, "learning_rate": 4.271041305759096e-07, "loss": 0.2519, "step": 32227 }, { "epoch": 0.5602044186410332, "grad_norm": 0.8527669338101919, "learning_rate": 4.2707628212182716e-07, "loss": 0.1247, "step": 32228 }, { "epoch": 0.560221801178536, "grad_norm": 1.0895892495673158, "learning_rate": 4.2704843389887923e-07, "loss": 0.2481, "step": 32229 }, { "epoch": 0.5602391837160389, "grad_norm": 0.9142073895266137, "learning_rate": 4.270205859071546e-07, "loss": 0.2675, "step": 32230 }, { "epoch": 0.5602565662535417, "grad_norm": 1.3522662172182494, "learning_rate": 4.2699273814674096e-07, "loss": 0.2768, "step": 32231 }, { "epoch": 0.5602739487910445, "grad_norm": 1.9941161972476376, "learning_rate": 4.2696489061772674e-07, "loss": 0.2682, "step": 32232 }, { "epoch": 0.5602913313285474, "grad_norm": 1.3832440084416082, "learning_rate": 4.269370433202003e-07, "loss": 0.2, "step": 32233 }, { "epoch": 0.5603087138660502, "grad_norm": 2.0734174745687874, "learning_rate": 4.2690919625424985e-07, "loss": 0.2401, "step": 32234 }, { "epoch": 0.560326096403553, "grad_norm": 1.6438946253622415, "learning_rate": 4.268813494199638e-07, "loss": 0.2696, "step": 32235 }, { "epoch": 0.5603434789410559, "grad_norm": 1.3327206926702553, "learning_rate": 4.268535028174303e-07, "loss": 0.1819, "step": 32236 }, { "epoch": 0.5603608614785587, "grad_norm": 1.642923940382329, "learning_rate": 4.2682565644673755e-07, "loss": 0.2715, "step": 32237 }, { "epoch": 0.5603782440160615, "grad_norm": 1.753368406626273, "learning_rate": 4.2679781030797375e-07, "loss": 0.2389, "step": 32238 }, { "epoch": 0.5603956265535643, "grad_norm": 1.86632299515724, "learning_rate": 4.2676996440122736e-07, "loss": 0.1766, "step": 32239 }, { "epoch": 0.5604130090910671, "grad_norm": 0.9402982375045634, "learning_rate": 4.2674211872658637e-07, "loss": 0.2521, "step": 32240 }, { "epoch": 0.5604303916285699, "grad_norm": 1.9589695798420748, "learning_rate": 4.2671427328413946e-07, "loss": 0.2345, "step": 32241 }, { "epoch": 0.5604477741660727, "grad_norm": 2.158096612425919, "learning_rate": 4.2668642807397444e-07, "loss": 0.3323, "step": 32242 }, { "epoch": 0.5604651567035756, "grad_norm": 2.0527034808317266, "learning_rate": 4.2665858309618e-07, "loss": 0.3245, "step": 32243 }, { "epoch": 0.5604825392410784, "grad_norm": 1.3840416727813318, "learning_rate": 4.26630738350844e-07, "loss": 0.26, "step": 32244 }, { "epoch": 0.5604999217785812, "grad_norm": 1.590333040707062, "learning_rate": 4.266028938380549e-07, "loss": 0.2022, "step": 32245 }, { "epoch": 0.560517304316084, "grad_norm": 1.7647088926041419, "learning_rate": 4.265750495579008e-07, "loss": 0.2608, "step": 32246 }, { "epoch": 0.5605346868535869, "grad_norm": 1.9753351818928562, "learning_rate": 4.2654720551047017e-07, "loss": 0.2324, "step": 32247 }, { "epoch": 0.5605520693910897, "grad_norm": 1.1081935165855403, "learning_rate": 4.265193616958511e-07, "loss": 0.2062, "step": 32248 }, { "epoch": 0.5605694519285925, "grad_norm": 2.29698861535092, "learning_rate": 4.264915181141321e-07, "loss": 0.3635, "step": 32249 }, { "epoch": 0.5605868344660954, "grad_norm": 1.6963609382209397, "learning_rate": 4.2646367476540106e-07, "loss": 0.2193, "step": 32250 }, { "epoch": 0.5606042170035982, "grad_norm": 2.139623116399254, "learning_rate": 4.264358316497463e-07, "loss": 0.3122, "step": 32251 }, { "epoch": 0.560621599541101, "grad_norm": 1.5768619908385537, "learning_rate": 4.2640798876725623e-07, "loss": 0.2213, "step": 32252 }, { "epoch": 0.5606389820786039, "grad_norm": 2.582522811026553, "learning_rate": 4.2638014611801915e-07, "loss": 0.2917, "step": 32253 }, { "epoch": 0.5606563646161067, "grad_norm": 2.1041294424232055, "learning_rate": 4.26352303702123e-07, "loss": 0.2883, "step": 32254 }, { "epoch": 0.5606737471536095, "grad_norm": 1.4010306108567219, "learning_rate": 4.2632446151965646e-07, "loss": 0.2316, "step": 32255 }, { "epoch": 0.5606911296911123, "grad_norm": 1.369049700898514, "learning_rate": 4.262966195707074e-07, "loss": 0.1912, "step": 32256 }, { "epoch": 0.5607085122286152, "grad_norm": 0.8523674322493353, "learning_rate": 4.262687778553641e-07, "loss": 0.4087, "step": 32257 }, { "epoch": 0.560725894766118, "grad_norm": 2.7608983194360395, "learning_rate": 4.26240936373715e-07, "loss": 0.3116, "step": 32258 }, { "epoch": 0.5607432773036208, "grad_norm": 1.5094218384640479, "learning_rate": 4.2621309512584814e-07, "loss": 0.447, "step": 32259 }, { "epoch": 0.5607606598411236, "grad_norm": 1.1222198957804106, "learning_rate": 4.2618525411185205e-07, "loss": 0.1984, "step": 32260 }, { "epoch": 0.5607780423786264, "grad_norm": 1.5160993520192727, "learning_rate": 4.261574133318147e-07, "loss": 0.2489, "step": 32261 }, { "epoch": 0.5607954249161292, "grad_norm": 1.4490264163285038, "learning_rate": 4.2612957278582455e-07, "loss": 0.1426, "step": 32262 }, { "epoch": 0.560812807453632, "grad_norm": 2.0562693389371396, "learning_rate": 4.2610173247396953e-07, "loss": 0.2136, "step": 32263 }, { "epoch": 0.5608301899911349, "grad_norm": 1.7632960991576228, "learning_rate": 4.260738923963382e-07, "loss": 0.3461, "step": 32264 }, { "epoch": 0.5608475725286377, "grad_norm": 1.8623934661718906, "learning_rate": 4.2604605255301855e-07, "loss": 0.2949, "step": 32265 }, { "epoch": 0.5608649550661405, "grad_norm": 1.4059802535923058, "learning_rate": 4.260182129440991e-07, "loss": 0.2068, "step": 32266 }, { "epoch": 0.5608823376036434, "grad_norm": 2.08202438149778, "learning_rate": 4.259903735696677e-07, "loss": 0.2645, "step": 32267 }, { "epoch": 0.5608997201411462, "grad_norm": 1.5336213254290867, "learning_rate": 4.2596253442981323e-07, "loss": 0.212, "step": 32268 }, { "epoch": 0.560917102678649, "grad_norm": 1.8066652674482757, "learning_rate": 4.259346955246232e-07, "loss": 0.378, "step": 32269 }, { "epoch": 0.5609344852161519, "grad_norm": 1.4208241288377887, "learning_rate": 4.259068568541862e-07, "loss": 0.4632, "step": 32270 }, { "epoch": 0.5609518677536547, "grad_norm": 1.6253232408539977, "learning_rate": 4.258790184185904e-07, "loss": 0.3524, "step": 32271 }, { "epoch": 0.5609692502911575, "grad_norm": 1.9203337791862798, "learning_rate": 4.258511802179241e-07, "loss": 0.2509, "step": 32272 }, { "epoch": 0.5609866328286603, "grad_norm": 1.5055918283857301, "learning_rate": 4.2582334225227546e-07, "loss": 0.2774, "step": 32273 }, { "epoch": 0.5610040153661632, "grad_norm": 1.8229850117382487, "learning_rate": 4.257955045217329e-07, "loss": 0.2581, "step": 32274 }, { "epoch": 0.561021397903666, "grad_norm": 3.0199725968511415, "learning_rate": 4.2576766702638443e-07, "loss": 0.2783, "step": 32275 }, { "epoch": 0.5610387804411688, "grad_norm": 1.7786226533046574, "learning_rate": 4.257398297663184e-07, "loss": 0.2519, "step": 32276 }, { "epoch": 0.5610561629786717, "grad_norm": 2.176210480341132, "learning_rate": 4.2571199274162286e-07, "loss": 0.1903, "step": 32277 }, { "epoch": 0.5610735455161745, "grad_norm": 1.565662030255225, "learning_rate": 4.2568415595238624e-07, "loss": 0.2115, "step": 32278 }, { "epoch": 0.5610909280536773, "grad_norm": 1.729897919063253, "learning_rate": 4.256563193986966e-07, "loss": 0.3843, "step": 32279 }, { "epoch": 0.56110831059118, "grad_norm": 1.210336540736539, "learning_rate": 4.2562848308064255e-07, "loss": 0.1832, "step": 32280 }, { "epoch": 0.5611256931286829, "grad_norm": 1.3660722138870154, "learning_rate": 4.256006469983119e-07, "loss": 0.2225, "step": 32281 }, { "epoch": 0.5611430756661857, "grad_norm": 1.1937500392917753, "learning_rate": 4.255728111517929e-07, "loss": 0.1518, "step": 32282 }, { "epoch": 0.5611604582036885, "grad_norm": 2.36484582392532, "learning_rate": 4.255449755411741e-07, "loss": 0.3094, "step": 32283 }, { "epoch": 0.5611778407411914, "grad_norm": 2.684636980908727, "learning_rate": 4.2551714016654326e-07, "loss": 0.2529, "step": 32284 }, { "epoch": 0.5611952232786942, "grad_norm": 2.7671812524733284, "learning_rate": 4.254893050279891e-07, "loss": 0.357, "step": 32285 }, { "epoch": 0.561212605816197, "grad_norm": 1.2748825921070115, "learning_rate": 4.2546147012559957e-07, "loss": 0.2119, "step": 32286 }, { "epoch": 0.5612299883536999, "grad_norm": 1.5888610148895592, "learning_rate": 4.254336354594631e-07, "loss": 0.2921, "step": 32287 }, { "epoch": 0.5612473708912027, "grad_norm": 2.159823826523932, "learning_rate": 4.254058010296675e-07, "loss": 0.3526, "step": 32288 }, { "epoch": 0.5612647534287055, "grad_norm": 1.3297396902223861, "learning_rate": 4.2537796683630135e-07, "loss": 0.176, "step": 32289 }, { "epoch": 0.5612821359662084, "grad_norm": 1.25825593356506, "learning_rate": 4.2535013287945264e-07, "loss": 0.3036, "step": 32290 }, { "epoch": 0.5612995185037112, "grad_norm": 1.2210938072143784, "learning_rate": 4.253222991592098e-07, "loss": 0.3618, "step": 32291 }, { "epoch": 0.561316901041214, "grad_norm": 1.2485632794063821, "learning_rate": 4.2529446567566094e-07, "loss": 0.2457, "step": 32292 }, { "epoch": 0.5613342835787168, "grad_norm": 1.5840973152556217, "learning_rate": 4.252666324288946e-07, "loss": 0.4269, "step": 32293 }, { "epoch": 0.5613516661162197, "grad_norm": 1.5458299989556692, "learning_rate": 4.252387994189984e-07, "loss": 0.1849, "step": 32294 }, { "epoch": 0.5613690486537225, "grad_norm": 1.3709193247149216, "learning_rate": 4.252109666460609e-07, "loss": 0.2512, "step": 32295 }, { "epoch": 0.5613864311912253, "grad_norm": 1.921883489071422, "learning_rate": 4.251831341101703e-07, "loss": 0.1994, "step": 32296 }, { "epoch": 0.5614038137287282, "grad_norm": 2.0599550445782877, "learning_rate": 4.2515530181141477e-07, "loss": 0.248, "step": 32297 }, { "epoch": 0.561421196266231, "grad_norm": 1.7996500976183714, "learning_rate": 4.2512746974988253e-07, "loss": 0.2873, "step": 32298 }, { "epoch": 0.5614385788037338, "grad_norm": 1.2195570909090865, "learning_rate": 4.250996379256619e-07, "loss": 0.2668, "step": 32299 }, { "epoch": 0.5614559613412365, "grad_norm": 0.8671822261337566, "learning_rate": 4.250718063388411e-07, "loss": 0.2877, "step": 32300 }, { "epoch": 0.5614733438787394, "grad_norm": 1.376290969379136, "learning_rate": 4.250439749895082e-07, "loss": 0.221, "step": 32301 }, { "epoch": 0.5614907264162422, "grad_norm": 1.7956109716646382, "learning_rate": 4.2501614387775125e-07, "loss": 0.2622, "step": 32302 }, { "epoch": 0.561508108953745, "grad_norm": 1.5786782461681486, "learning_rate": 4.249883130036589e-07, "loss": 0.3461, "step": 32303 }, { "epoch": 0.5615254914912479, "grad_norm": 1.6699337686492952, "learning_rate": 4.24960482367319e-07, "loss": 0.1943, "step": 32304 }, { "epoch": 0.5615428740287507, "grad_norm": 1.4101395848025322, "learning_rate": 4.2493265196881994e-07, "loss": 0.2978, "step": 32305 }, { "epoch": 0.5615602565662535, "grad_norm": 3.2296579469960087, "learning_rate": 4.2490482180825e-07, "loss": 0.3204, "step": 32306 }, { "epoch": 0.5615776391037564, "grad_norm": 0.9387579883904811, "learning_rate": 4.248769918856972e-07, "loss": 0.134, "step": 32307 }, { "epoch": 0.5615950216412592, "grad_norm": 1.7369695308690893, "learning_rate": 4.248491622012498e-07, "loss": 0.3198, "step": 32308 }, { "epoch": 0.561612404178762, "grad_norm": 1.9669985352245347, "learning_rate": 4.248213327549961e-07, "loss": 0.2563, "step": 32309 }, { "epoch": 0.5616297867162648, "grad_norm": 1.4422122573570337, "learning_rate": 4.2479350354702413e-07, "loss": 0.1937, "step": 32310 }, { "epoch": 0.5616471692537677, "grad_norm": 1.341140498121381, "learning_rate": 4.247656745774223e-07, "loss": 0.2557, "step": 32311 }, { "epoch": 0.5616645517912705, "grad_norm": 1.343727047042449, "learning_rate": 4.2473784584627887e-07, "loss": 0.1877, "step": 32312 }, { "epoch": 0.5616819343287733, "grad_norm": 1.4638079072505643, "learning_rate": 4.2471001735368163e-07, "loss": 0.2621, "step": 32313 }, { "epoch": 0.5616993168662762, "grad_norm": 1.4505573317396756, "learning_rate": 4.246821890997192e-07, "loss": 0.3324, "step": 32314 }, { "epoch": 0.561716699403779, "grad_norm": 1.573165588982047, "learning_rate": 4.246543610844795e-07, "loss": 0.1843, "step": 32315 }, { "epoch": 0.5617340819412818, "grad_norm": 2.0393526818182584, "learning_rate": 4.24626533308051e-07, "loss": 0.2925, "step": 32316 }, { "epoch": 0.5617514644787847, "grad_norm": 1.1631808056390078, "learning_rate": 4.245987057705217e-07, "loss": 0.2193, "step": 32317 }, { "epoch": 0.5617688470162875, "grad_norm": 2.9535059422930057, "learning_rate": 4.2457087847198005e-07, "loss": 0.3752, "step": 32318 }, { "epoch": 0.5617862295537903, "grad_norm": 1.6978312193454432, "learning_rate": 4.245430514125138e-07, "loss": 0.3221, "step": 32319 }, { "epoch": 0.561803612091293, "grad_norm": 1.595023041655897, "learning_rate": 4.245152245922115e-07, "loss": 0.3434, "step": 32320 }, { "epoch": 0.5618209946287959, "grad_norm": 2.399880939625457, "learning_rate": 4.244873980111612e-07, "loss": 0.3032, "step": 32321 }, { "epoch": 0.5618383771662987, "grad_norm": 1.265034779812927, "learning_rate": 4.2445957166945123e-07, "loss": 0.1825, "step": 32322 }, { "epoch": 0.5618557597038015, "grad_norm": 1.975409718118694, "learning_rate": 4.2443174556716963e-07, "loss": 0.3151, "step": 32323 }, { "epoch": 0.5618731422413044, "grad_norm": 1.9431758001580013, "learning_rate": 4.244039197044048e-07, "loss": 0.296, "step": 32324 }, { "epoch": 0.5618905247788072, "grad_norm": 0.9292861747503742, "learning_rate": 4.24376094081245e-07, "loss": 0.1905, "step": 32325 }, { "epoch": 0.56190790731631, "grad_norm": 1.4955278990126508, "learning_rate": 4.24348268697778e-07, "loss": 0.1611, "step": 32326 }, { "epoch": 0.5619252898538128, "grad_norm": 1.29830115443386, "learning_rate": 4.243204435540922e-07, "loss": 0.2169, "step": 32327 }, { "epoch": 0.5619426723913157, "grad_norm": 1.1237192970368495, "learning_rate": 4.242926186502759e-07, "loss": 0.1795, "step": 32328 }, { "epoch": 0.5619600549288185, "grad_norm": 1.9215437027093707, "learning_rate": 4.2426479398641713e-07, "loss": 0.2445, "step": 32329 }, { "epoch": 0.5619774374663213, "grad_norm": 1.649718618885857, "learning_rate": 4.242369695626043e-07, "loss": 0.32, "step": 32330 }, { "epoch": 0.5619948200038242, "grad_norm": 1.0338760624504597, "learning_rate": 4.242091453789256e-07, "loss": 0.179, "step": 32331 }, { "epoch": 0.562012202541327, "grad_norm": 0.9107761537679894, "learning_rate": 4.241813214354688e-07, "loss": 0.1596, "step": 32332 }, { "epoch": 0.5620295850788298, "grad_norm": 2.013481718520783, "learning_rate": 4.241534977323226e-07, "loss": 0.2471, "step": 32333 }, { "epoch": 0.5620469676163327, "grad_norm": 1.4701864988595088, "learning_rate": 4.241256742695749e-07, "loss": 0.2346, "step": 32334 }, { "epoch": 0.5620643501538355, "grad_norm": 2.5780879647871373, "learning_rate": 4.2409785104731386e-07, "loss": 0.2147, "step": 32335 }, { "epoch": 0.5620817326913383, "grad_norm": 1.536624372647579, "learning_rate": 4.2407002806562786e-07, "loss": 0.2988, "step": 32336 }, { "epoch": 0.5620991152288412, "grad_norm": 2.1962821726261734, "learning_rate": 4.240422053246051e-07, "loss": 0.2599, "step": 32337 }, { "epoch": 0.562116497766344, "grad_norm": 1.7841059699186907, "learning_rate": 4.240143828243335e-07, "loss": 0.2774, "step": 32338 }, { "epoch": 0.5621338803038467, "grad_norm": 2.6154140084550694, "learning_rate": 4.2398656056490146e-07, "loss": 0.313, "step": 32339 }, { "epoch": 0.5621512628413495, "grad_norm": 1.6346752824624595, "learning_rate": 4.2395873854639694e-07, "loss": 0.2592, "step": 32340 }, { "epoch": 0.5621686453788524, "grad_norm": 1.7216268456504389, "learning_rate": 4.239309167689085e-07, "loss": 0.1661, "step": 32341 }, { "epoch": 0.5621860279163552, "grad_norm": 2.629939767925852, "learning_rate": 4.239030952325241e-07, "loss": 0.2866, "step": 32342 }, { "epoch": 0.562203410453858, "grad_norm": 1.8523843892028298, "learning_rate": 4.238752739373318e-07, "loss": 0.216, "step": 32343 }, { "epoch": 0.5622207929913609, "grad_norm": 1.5480474929678494, "learning_rate": 4.238474528834201e-07, "loss": 0.172, "step": 32344 }, { "epoch": 0.5622381755288637, "grad_norm": 1.6358656983413549, "learning_rate": 4.238196320708769e-07, "loss": 0.322, "step": 32345 }, { "epoch": 0.5622555580663665, "grad_norm": 1.3820923104448177, "learning_rate": 4.237918114997904e-07, "loss": 0.2526, "step": 32346 }, { "epoch": 0.5622729406038693, "grad_norm": 1.193948995524854, "learning_rate": 4.237639911702489e-07, "loss": 0.4519, "step": 32347 }, { "epoch": 0.5622903231413722, "grad_norm": 2.4407635027796415, "learning_rate": 4.2373617108234043e-07, "loss": 0.2767, "step": 32348 }, { "epoch": 0.562307705678875, "grad_norm": 2.1298057433354036, "learning_rate": 4.2370835123615345e-07, "loss": 0.3021, "step": 32349 }, { "epoch": 0.5623250882163778, "grad_norm": 2.1313452654846956, "learning_rate": 4.2368053163177595e-07, "loss": 0.4165, "step": 32350 }, { "epoch": 0.5623424707538807, "grad_norm": 1.9642665422483578, "learning_rate": 4.2365271226929613e-07, "loss": 0.3692, "step": 32351 }, { "epoch": 0.5623598532913835, "grad_norm": 2.623126029745354, "learning_rate": 4.236248931488019e-07, "loss": 0.3906, "step": 32352 }, { "epoch": 0.5623772358288863, "grad_norm": 1.2865436236680292, "learning_rate": 4.2359707427038185e-07, "loss": 0.2717, "step": 32353 }, { "epoch": 0.5623946183663892, "grad_norm": 1.7039889854732004, "learning_rate": 4.2356925563412383e-07, "loss": 0.3736, "step": 32354 }, { "epoch": 0.562412000903892, "grad_norm": 1.5480429143122927, "learning_rate": 4.2354143724011634e-07, "loss": 0.1373, "step": 32355 }, { "epoch": 0.5624293834413948, "grad_norm": 2.083509749756301, "learning_rate": 4.235136190884474e-07, "loss": 0.4378, "step": 32356 }, { "epoch": 0.5624467659788976, "grad_norm": 2.5044348598635016, "learning_rate": 4.2348580117920507e-07, "loss": 0.3139, "step": 32357 }, { "epoch": 0.5624641485164005, "grad_norm": 5.6502549964457085, "learning_rate": 4.2345798351247754e-07, "loss": 0.4534, "step": 32358 }, { "epoch": 0.5624815310539032, "grad_norm": 2.998678352752701, "learning_rate": 4.234301660883531e-07, "loss": 0.3322, "step": 32359 }, { "epoch": 0.562498913591406, "grad_norm": 1.66562667123696, "learning_rate": 4.2340234890691975e-07, "loss": 0.1572, "step": 32360 }, { "epoch": 0.5625162961289089, "grad_norm": 1.2486957250566104, "learning_rate": 4.233745319682659e-07, "loss": 0.1795, "step": 32361 }, { "epoch": 0.5625336786664117, "grad_norm": 1.713923781308875, "learning_rate": 4.2334671527247947e-07, "loss": 0.352, "step": 32362 }, { "epoch": 0.5625510612039145, "grad_norm": 3.778613477802236, "learning_rate": 4.2331889881964894e-07, "loss": 0.2411, "step": 32363 }, { "epoch": 0.5625684437414173, "grad_norm": 2.3042066320037096, "learning_rate": 4.2329108260986217e-07, "loss": 0.2713, "step": 32364 }, { "epoch": 0.5625858262789202, "grad_norm": 2.063416146667568, "learning_rate": 4.232632666432074e-07, "loss": 0.2108, "step": 32365 }, { "epoch": 0.562603208816423, "grad_norm": 1.686977959316101, "learning_rate": 4.2323545091977284e-07, "loss": 0.2878, "step": 32366 }, { "epoch": 0.5626205913539258, "grad_norm": 2.242110741157999, "learning_rate": 4.2320763543964667e-07, "loss": 0.2981, "step": 32367 }, { "epoch": 0.5626379738914287, "grad_norm": 1.322486042353342, "learning_rate": 4.2317982020291684e-07, "loss": 0.254, "step": 32368 }, { "epoch": 0.5626553564289315, "grad_norm": 1.3874275163323233, "learning_rate": 4.2315200520967204e-07, "loss": 0.2915, "step": 32369 }, { "epoch": 0.5626727389664343, "grad_norm": 1.7493050891021664, "learning_rate": 4.2312419045999993e-07, "loss": 0.1258, "step": 32370 }, { "epoch": 0.5626901215039372, "grad_norm": 1.6168985417437174, "learning_rate": 4.2309637595398865e-07, "loss": 0.3338, "step": 32371 }, { "epoch": 0.56270750404144, "grad_norm": 1.3802975978531435, "learning_rate": 4.230685616917267e-07, "loss": 0.1891, "step": 32372 }, { "epoch": 0.5627248865789428, "grad_norm": 3.909273513040012, "learning_rate": 4.2304074767330196e-07, "loss": 0.2117, "step": 32373 }, { "epoch": 0.5627422691164456, "grad_norm": 1.6167975694977121, "learning_rate": 4.230129338988028e-07, "loss": 0.2543, "step": 32374 }, { "epoch": 0.5627596516539485, "grad_norm": 1.2311191808973532, "learning_rate": 4.2298512036831733e-07, "loss": 0.2225, "step": 32375 }, { "epoch": 0.5627770341914513, "grad_norm": 2.083212764873585, "learning_rate": 4.229573070819336e-07, "loss": 0.2689, "step": 32376 }, { "epoch": 0.5627944167289541, "grad_norm": 1.2338959480429614, "learning_rate": 4.229294940397397e-07, "loss": 0.2119, "step": 32377 }, { "epoch": 0.562811799266457, "grad_norm": 2.1441120102723215, "learning_rate": 4.22901681241824e-07, "loss": 0.2229, "step": 32378 }, { "epoch": 0.5628291818039597, "grad_norm": 1.0122068808403224, "learning_rate": 4.2287386868827444e-07, "loss": 0.3888, "step": 32379 }, { "epoch": 0.5628465643414625, "grad_norm": 1.5100466830024402, "learning_rate": 4.228460563791795e-07, "loss": 0.5453, "step": 32380 }, { "epoch": 0.5628639468789653, "grad_norm": 2.5120889369190764, "learning_rate": 4.2281824431462685e-07, "loss": 0.3634, "step": 32381 }, { "epoch": 0.5628813294164682, "grad_norm": 2.2729967518941976, "learning_rate": 4.227904324947053e-07, "loss": 0.3843, "step": 32382 }, { "epoch": 0.562898711953971, "grad_norm": 1.0194079878049547, "learning_rate": 4.227626209195023e-07, "loss": 0.2115, "step": 32383 }, { "epoch": 0.5629160944914738, "grad_norm": 2.8230058242044973, "learning_rate": 4.227348095891064e-07, "loss": 0.21, "step": 32384 }, { "epoch": 0.5629334770289767, "grad_norm": 1.0888551736876506, "learning_rate": 4.227069985036056e-07, "loss": 0.1249, "step": 32385 }, { "epoch": 0.5629508595664795, "grad_norm": 1.355027689755035, "learning_rate": 4.226791876630882e-07, "loss": 0.203, "step": 32386 }, { "epoch": 0.5629682421039823, "grad_norm": 1.5104648721067062, "learning_rate": 4.226513770676421e-07, "loss": 0.2724, "step": 32387 }, { "epoch": 0.5629856246414852, "grad_norm": 1.6250716194983474, "learning_rate": 4.2262356671735583e-07, "loss": 0.1802, "step": 32388 }, { "epoch": 0.563003007178988, "grad_norm": 1.7522141810193963, "learning_rate": 4.225957566123172e-07, "loss": 0.1979, "step": 32389 }, { "epoch": 0.5630203897164908, "grad_norm": 1.4373104577839104, "learning_rate": 4.225679467526145e-07, "loss": 0.2758, "step": 32390 }, { "epoch": 0.5630377722539937, "grad_norm": 1.1396518588722324, "learning_rate": 4.2254013713833577e-07, "loss": 0.2636, "step": 32391 }, { "epoch": 0.5630551547914965, "grad_norm": 1.8425932304982924, "learning_rate": 4.2251232776956926e-07, "loss": 0.1752, "step": 32392 }, { "epoch": 0.5630725373289993, "grad_norm": 1.910590768271482, "learning_rate": 4.22484518646403e-07, "loss": 0.2945, "step": 32393 }, { "epoch": 0.5630899198665021, "grad_norm": 3.521357949247253, "learning_rate": 4.224567097689254e-07, "loss": 0.3246, "step": 32394 }, { "epoch": 0.563107302404005, "grad_norm": 1.3132960558156872, "learning_rate": 4.224289011372243e-07, "loss": 0.2355, "step": 32395 }, { "epoch": 0.5631246849415078, "grad_norm": 1.2375451712837942, "learning_rate": 4.2240109275138785e-07, "loss": 0.2137, "step": 32396 }, { "epoch": 0.5631420674790106, "grad_norm": 1.2963604073762425, "learning_rate": 4.223732846115044e-07, "loss": 0.265, "step": 32397 }, { "epoch": 0.5631594500165135, "grad_norm": 1.3717022578808984, "learning_rate": 4.2234547671766197e-07, "loss": 0.226, "step": 32398 }, { "epoch": 0.5631768325540162, "grad_norm": 0.9534904193865342, "learning_rate": 4.2231766906994856e-07, "loss": 0.2804, "step": 32399 }, { "epoch": 0.563194215091519, "grad_norm": 1.8850988367737598, "learning_rate": 4.222898616684527e-07, "loss": 0.2957, "step": 32400 }, { "epoch": 0.5632115976290218, "grad_norm": 3.7632473800754545, "learning_rate": 4.222620545132623e-07, "loss": 0.2704, "step": 32401 }, { "epoch": 0.5632289801665247, "grad_norm": 1.4639754984680455, "learning_rate": 4.2223424760446525e-07, "loss": 0.3273, "step": 32402 }, { "epoch": 0.5632463627040275, "grad_norm": 1.3155344312638957, "learning_rate": 4.2220644094215e-07, "loss": 0.1645, "step": 32403 }, { "epoch": 0.5632637452415303, "grad_norm": 1.794908396711308, "learning_rate": 4.2217863452640457e-07, "loss": 0.735, "step": 32404 }, { "epoch": 0.5632811277790332, "grad_norm": 0.9243392642415149, "learning_rate": 4.221508283573172e-07, "loss": 0.2992, "step": 32405 }, { "epoch": 0.563298510316536, "grad_norm": 1.2389918861693447, "learning_rate": 4.221230224349759e-07, "loss": 0.155, "step": 32406 }, { "epoch": 0.5633158928540388, "grad_norm": 1.255181421419596, "learning_rate": 4.2209521675946897e-07, "loss": 0.1628, "step": 32407 }, { "epoch": 0.5633332753915417, "grad_norm": 1.1869013576975067, "learning_rate": 4.220674113308842e-07, "loss": 0.2319, "step": 32408 }, { "epoch": 0.5633506579290445, "grad_norm": 1.1967869182071418, "learning_rate": 4.2203960614931003e-07, "loss": 0.2474, "step": 32409 }, { "epoch": 0.5633680404665473, "grad_norm": 3.323722008494428, "learning_rate": 4.220118012148344e-07, "loss": 0.4619, "step": 32410 }, { "epoch": 0.5633854230040501, "grad_norm": 2.285075314734705, "learning_rate": 4.219839965275457e-07, "loss": 0.254, "step": 32411 }, { "epoch": 0.563402805541553, "grad_norm": 0.9563572807070411, "learning_rate": 4.2195619208753175e-07, "loss": 0.1674, "step": 32412 }, { "epoch": 0.5634201880790558, "grad_norm": 1.6509509700721927, "learning_rate": 4.219283878948811e-07, "loss": 0.2381, "step": 32413 }, { "epoch": 0.5634375706165586, "grad_norm": 1.2112142151345, "learning_rate": 4.219005839496813e-07, "loss": 0.172, "step": 32414 }, { "epoch": 0.5634549531540615, "grad_norm": 1.1841991232385518, "learning_rate": 4.2187278025202087e-07, "loss": 0.3103, "step": 32415 }, { "epoch": 0.5634723356915643, "grad_norm": 1.205304419363589, "learning_rate": 4.218449768019878e-07, "loss": 0.6651, "step": 32416 }, { "epoch": 0.5634897182290671, "grad_norm": 1.668864171769731, "learning_rate": 4.218171735996703e-07, "loss": 0.3603, "step": 32417 }, { "epoch": 0.56350710076657, "grad_norm": 2.203269007867828, "learning_rate": 4.2178937064515637e-07, "loss": 0.278, "step": 32418 }, { "epoch": 0.5635244833040727, "grad_norm": 1.1634437851644008, "learning_rate": 4.217615679385344e-07, "loss": 0.3103, "step": 32419 }, { "epoch": 0.5635418658415755, "grad_norm": 1.497948225843266, "learning_rate": 4.217337654798922e-07, "loss": 0.1788, "step": 32420 }, { "epoch": 0.5635592483790783, "grad_norm": 1.4331824741929755, "learning_rate": 4.217059632693179e-07, "loss": 0.2442, "step": 32421 }, { "epoch": 0.5635766309165812, "grad_norm": 3.1784172925731267, "learning_rate": 4.216781613068999e-07, "loss": 0.404, "step": 32422 }, { "epoch": 0.563594013454084, "grad_norm": 1.8584876593116282, "learning_rate": 4.2165035959272613e-07, "loss": 0.2035, "step": 32423 }, { "epoch": 0.5636113959915868, "grad_norm": 1.5226776428267925, "learning_rate": 4.2162255812688453e-07, "loss": 0.1991, "step": 32424 }, { "epoch": 0.5636287785290897, "grad_norm": 1.8499209365350926, "learning_rate": 4.215947569094636e-07, "loss": 0.2415, "step": 32425 }, { "epoch": 0.5636461610665925, "grad_norm": 1.4890935940388346, "learning_rate": 4.2156695594055137e-07, "loss": 0.2639, "step": 32426 }, { "epoch": 0.5636635436040953, "grad_norm": 1.8691107548540422, "learning_rate": 4.2153915522023567e-07, "loss": 0.22, "step": 32427 }, { "epoch": 0.5636809261415981, "grad_norm": 1.0517068971768087, "learning_rate": 4.2151135474860487e-07, "loss": 0.1422, "step": 32428 }, { "epoch": 0.563698308679101, "grad_norm": 1.9713778860468334, "learning_rate": 4.214835545257469e-07, "loss": 0.4682, "step": 32429 }, { "epoch": 0.5637156912166038, "grad_norm": 1.3268699055260418, "learning_rate": 4.214557545517502e-07, "loss": 0.2617, "step": 32430 }, { "epoch": 0.5637330737541066, "grad_norm": 1.0754773357620258, "learning_rate": 4.214279548267026e-07, "loss": 0.2456, "step": 32431 }, { "epoch": 0.5637504562916095, "grad_norm": 1.2316920125585387, "learning_rate": 4.214001553506924e-07, "loss": 0.2068, "step": 32432 }, { "epoch": 0.5637678388291123, "grad_norm": 1.8494096448914001, "learning_rate": 4.213723561238074e-07, "loss": 0.3506, "step": 32433 }, { "epoch": 0.5637852213666151, "grad_norm": 1.1397446182290987, "learning_rate": 4.2134455714613605e-07, "loss": 0.1918, "step": 32434 }, { "epoch": 0.563802603904118, "grad_norm": 1.5280028575562266, "learning_rate": 4.2131675841776613e-07, "loss": 0.2745, "step": 32435 }, { "epoch": 0.5638199864416208, "grad_norm": 1.6484487963006051, "learning_rate": 4.212889599387861e-07, "loss": 0.3573, "step": 32436 }, { "epoch": 0.5638373689791236, "grad_norm": 2.1587917459678274, "learning_rate": 4.212611617092838e-07, "loss": 0.4708, "step": 32437 }, { "epoch": 0.5638547515166265, "grad_norm": 3.959896278738984, "learning_rate": 4.212333637293478e-07, "loss": 0.2267, "step": 32438 }, { "epoch": 0.5638721340541292, "grad_norm": 1.1960717952130846, "learning_rate": 4.212055659990655e-07, "loss": 0.2104, "step": 32439 }, { "epoch": 0.563889516591632, "grad_norm": 1.5827485544538575, "learning_rate": 4.211777685185255e-07, "loss": 0.3454, "step": 32440 }, { "epoch": 0.5639068991291348, "grad_norm": 1.8443430636378164, "learning_rate": 4.2114997128781564e-07, "loss": 0.1828, "step": 32441 }, { "epoch": 0.5639242816666377, "grad_norm": 2.186360748278985, "learning_rate": 4.2112217430702424e-07, "loss": 0.2846, "step": 32442 }, { "epoch": 0.5639416642041405, "grad_norm": 2.040521974498588, "learning_rate": 4.2109437757623923e-07, "loss": 0.2661, "step": 32443 }, { "epoch": 0.5639590467416433, "grad_norm": 1.8435295157754046, "learning_rate": 4.210665810955489e-07, "loss": 0.3426, "step": 32444 }, { "epoch": 0.5639764292791462, "grad_norm": 1.1902737702090216, "learning_rate": 4.2103878486504134e-07, "loss": 0.181, "step": 32445 }, { "epoch": 0.563993811816649, "grad_norm": 1.3383963194145816, "learning_rate": 4.2101098888480444e-07, "loss": 0.1884, "step": 32446 }, { "epoch": 0.5640111943541518, "grad_norm": 1.0624138206593992, "learning_rate": 4.2098319315492634e-07, "loss": 0.2304, "step": 32447 }, { "epoch": 0.5640285768916546, "grad_norm": 1.6523708197593852, "learning_rate": 4.209553976754954e-07, "loss": 0.3904, "step": 32448 }, { "epoch": 0.5640459594291575, "grad_norm": 1.8445520938098645, "learning_rate": 4.209276024465993e-07, "loss": 0.2727, "step": 32449 }, { "epoch": 0.5640633419666603, "grad_norm": 2.1439045200709446, "learning_rate": 4.2089980746832653e-07, "loss": 0.2568, "step": 32450 }, { "epoch": 0.5640807245041631, "grad_norm": 0.783360892329482, "learning_rate": 4.208720127407652e-07, "loss": 0.2484, "step": 32451 }, { "epoch": 0.564098107041666, "grad_norm": 1.4910836651773585, "learning_rate": 4.2084421826400297e-07, "loss": 0.2167, "step": 32452 }, { "epoch": 0.5641154895791688, "grad_norm": 0.905888131276354, "learning_rate": 4.2081642403812827e-07, "loss": 0.1678, "step": 32453 }, { "epoch": 0.5641328721166716, "grad_norm": 1.5846534813057995, "learning_rate": 4.2078863006322916e-07, "loss": 0.3056, "step": 32454 }, { "epoch": 0.5641502546541745, "grad_norm": 2.078712452436207, "learning_rate": 4.2076083633939364e-07, "loss": 0.6021, "step": 32455 }, { "epoch": 0.5641676371916773, "grad_norm": 1.3115347940176052, "learning_rate": 4.2073304286670995e-07, "loss": 0.1771, "step": 32456 }, { "epoch": 0.5641850197291801, "grad_norm": 1.9768796471370016, "learning_rate": 4.207052496452663e-07, "loss": 0.2636, "step": 32457 }, { "epoch": 0.564202402266683, "grad_norm": 1.8611239816317366, "learning_rate": 4.2067745667515023e-07, "loss": 0.2225, "step": 32458 }, { "epoch": 0.5642197848041857, "grad_norm": 1.1345470785198484, "learning_rate": 4.2064966395645033e-07, "loss": 0.1556, "step": 32459 }, { "epoch": 0.5642371673416885, "grad_norm": 1.2832161622892204, "learning_rate": 4.2062187148925446e-07, "loss": 0.2091, "step": 32460 }, { "epoch": 0.5642545498791913, "grad_norm": 1.3029963143801995, "learning_rate": 4.2059407927365094e-07, "loss": 0.2611, "step": 32461 }, { "epoch": 0.5642719324166942, "grad_norm": 1.7957884124043235, "learning_rate": 4.2056628730972765e-07, "loss": 0.2557, "step": 32462 }, { "epoch": 0.564289314954197, "grad_norm": 1.2925573193747404, "learning_rate": 4.2053849559757266e-07, "loss": 0.1918, "step": 32463 }, { "epoch": 0.5643066974916998, "grad_norm": 1.3593919883464036, "learning_rate": 4.2051070413727435e-07, "loss": 0.2979, "step": 32464 }, { "epoch": 0.5643240800292026, "grad_norm": 0.7283248040554244, "learning_rate": 4.204829129289205e-07, "loss": 0.2568, "step": 32465 }, { "epoch": 0.5643414625667055, "grad_norm": 1.1732758346881578, "learning_rate": 4.2045512197259915e-07, "loss": 0.2518, "step": 32466 }, { "epoch": 0.5643588451042083, "grad_norm": 2.4415379076549955, "learning_rate": 4.204273312683986e-07, "loss": 0.4031, "step": 32467 }, { "epoch": 0.5643762276417111, "grad_norm": 2.247363313642263, "learning_rate": 4.2039954081640683e-07, "loss": 0.3762, "step": 32468 }, { "epoch": 0.564393610179214, "grad_norm": 1.328156287228116, "learning_rate": 4.2037175061671203e-07, "loss": 0.1912, "step": 32469 }, { "epoch": 0.5644109927167168, "grad_norm": 1.3278486077532672, "learning_rate": 4.203439606694023e-07, "loss": 0.3295, "step": 32470 }, { "epoch": 0.5644283752542196, "grad_norm": 1.2201221142686354, "learning_rate": 4.2031617097456553e-07, "loss": 0.2513, "step": 32471 }, { "epoch": 0.5644457577917225, "grad_norm": 1.9823716061575567, "learning_rate": 4.2028838153228976e-07, "loss": 0.2534, "step": 32472 }, { "epoch": 0.5644631403292253, "grad_norm": 1.4302852285613274, "learning_rate": 4.202605923426633e-07, "loss": 0.3558, "step": 32473 }, { "epoch": 0.5644805228667281, "grad_norm": 1.8411776072940087, "learning_rate": 4.2023280340577405e-07, "loss": 0.2508, "step": 32474 }, { "epoch": 0.564497905404231, "grad_norm": 2.894874732090365, "learning_rate": 4.2020501472171034e-07, "loss": 0.4071, "step": 32475 }, { "epoch": 0.5645152879417338, "grad_norm": 1.80332906976544, "learning_rate": 4.201772262905601e-07, "loss": 0.2996, "step": 32476 }, { "epoch": 0.5645326704792366, "grad_norm": 1.1832844880588307, "learning_rate": 4.201494381124112e-07, "loss": 0.2948, "step": 32477 }, { "epoch": 0.5645500530167393, "grad_norm": 1.1155417523006323, "learning_rate": 4.20121650187352e-07, "loss": 0.2289, "step": 32478 }, { "epoch": 0.5645674355542422, "grad_norm": 1.0276461674346173, "learning_rate": 4.2009386251547046e-07, "loss": 0.1796, "step": 32479 }, { "epoch": 0.564584818091745, "grad_norm": 1.1335388049592354, "learning_rate": 4.200660750968546e-07, "loss": 0.368, "step": 32480 }, { "epoch": 0.5646022006292478, "grad_norm": 2.446535082301897, "learning_rate": 4.200382879315927e-07, "loss": 0.2629, "step": 32481 }, { "epoch": 0.5646195831667506, "grad_norm": 2.049987763456315, "learning_rate": 4.200105010197725e-07, "loss": 0.2143, "step": 32482 }, { "epoch": 0.5646369657042535, "grad_norm": 1.7400629548289401, "learning_rate": 4.199827143614825e-07, "loss": 0.2713, "step": 32483 }, { "epoch": 0.5646543482417563, "grad_norm": 2.2689047356654486, "learning_rate": 4.1995492795681046e-07, "loss": 0.1655, "step": 32484 }, { "epoch": 0.5646717307792591, "grad_norm": 2.0451292207709684, "learning_rate": 4.199271418058444e-07, "loss": 0.3657, "step": 32485 }, { "epoch": 0.564689113316762, "grad_norm": 2.3632428060677877, "learning_rate": 4.1989935590867263e-07, "loss": 0.4918, "step": 32486 }, { "epoch": 0.5647064958542648, "grad_norm": 1.3949884739233103, "learning_rate": 4.1987157026538314e-07, "loss": 0.2398, "step": 32487 }, { "epoch": 0.5647238783917676, "grad_norm": 1.3739180351283993, "learning_rate": 4.198437848760639e-07, "loss": 0.2412, "step": 32488 }, { "epoch": 0.5647412609292705, "grad_norm": 1.6926357083386685, "learning_rate": 4.1981599974080316e-07, "loss": 0.1669, "step": 32489 }, { "epoch": 0.5647586434667733, "grad_norm": 3.4190555604002975, "learning_rate": 4.197882148596888e-07, "loss": 0.4716, "step": 32490 }, { "epoch": 0.5647760260042761, "grad_norm": 2.7814737524555246, "learning_rate": 4.197604302328088e-07, "loss": 0.3087, "step": 32491 }, { "epoch": 0.564793408541779, "grad_norm": 3.0968488627091393, "learning_rate": 4.1973264586025156e-07, "loss": 0.2491, "step": 32492 }, { "epoch": 0.5648107910792818, "grad_norm": 2.565263860470339, "learning_rate": 4.1970486174210477e-07, "loss": 0.2434, "step": 32493 }, { "epoch": 0.5648281736167846, "grad_norm": 2.3434937923581143, "learning_rate": 4.1967707787845686e-07, "loss": 0.2124, "step": 32494 }, { "epoch": 0.5648455561542874, "grad_norm": 1.1678567027699087, "learning_rate": 4.196492942693958e-07, "loss": 0.3169, "step": 32495 }, { "epoch": 0.5648629386917903, "grad_norm": 1.289386732331069, "learning_rate": 4.196215109150095e-07, "loss": 0.2014, "step": 32496 }, { "epoch": 0.5648803212292931, "grad_norm": 1.7415237104903587, "learning_rate": 4.195937278153859e-07, "loss": 0.2205, "step": 32497 }, { "epoch": 0.5648977037667958, "grad_norm": 1.3547936512215788, "learning_rate": 4.1956594497061336e-07, "loss": 0.278, "step": 32498 }, { "epoch": 0.5649150863042987, "grad_norm": 2.2125137980410696, "learning_rate": 4.1953816238077976e-07, "loss": 0.3999, "step": 32499 }, { "epoch": 0.5649324688418015, "grad_norm": 2.3385125260059185, "learning_rate": 4.195103800459734e-07, "loss": 0.3327, "step": 32500 }, { "epoch": 0.5649498513793043, "grad_norm": 1.5164513128308699, "learning_rate": 4.1948259796628194e-07, "loss": 0.1747, "step": 32501 }, { "epoch": 0.5649672339168071, "grad_norm": 1.092236162030386, "learning_rate": 4.19454816141794e-07, "loss": 0.1778, "step": 32502 }, { "epoch": 0.56498461645431, "grad_norm": 1.091954968880768, "learning_rate": 4.19427034572597e-07, "loss": 0.2943, "step": 32503 }, { "epoch": 0.5650019989918128, "grad_norm": 1.617453674707983, "learning_rate": 4.193992532587794e-07, "loss": 0.2702, "step": 32504 }, { "epoch": 0.5650193815293156, "grad_norm": 0.994171780104865, "learning_rate": 4.1937147220042903e-07, "loss": 0.2561, "step": 32505 }, { "epoch": 0.5650367640668185, "grad_norm": 2.0622404399971774, "learning_rate": 4.1934369139763415e-07, "loss": 0.2501, "step": 32506 }, { "epoch": 0.5650541466043213, "grad_norm": 2.769941175087533, "learning_rate": 4.1931591085048255e-07, "loss": 0.3629, "step": 32507 }, { "epoch": 0.5650715291418241, "grad_norm": 2.657790290438627, "learning_rate": 4.1928813055906275e-07, "loss": 0.401, "step": 32508 }, { "epoch": 0.565088911679327, "grad_norm": 1.8459056488469734, "learning_rate": 4.1926035052346235e-07, "loss": 0.3377, "step": 32509 }, { "epoch": 0.5651062942168298, "grad_norm": 2.2864776885786036, "learning_rate": 4.192325707437696e-07, "loss": 0.2038, "step": 32510 }, { "epoch": 0.5651236767543326, "grad_norm": 1.6550055580495935, "learning_rate": 4.1920479122007235e-07, "loss": 0.2371, "step": 32511 }, { "epoch": 0.5651410592918354, "grad_norm": 1.1931491453958836, "learning_rate": 4.1917701195245894e-07, "loss": 0.2229, "step": 32512 }, { "epoch": 0.5651584418293383, "grad_norm": 1.8753724051491214, "learning_rate": 4.1914923294101714e-07, "loss": 0.3377, "step": 32513 }, { "epoch": 0.5651758243668411, "grad_norm": 1.4352661640482414, "learning_rate": 4.191214541858353e-07, "loss": 0.1903, "step": 32514 }, { "epoch": 0.5651932069043439, "grad_norm": 1.2209830140878626, "learning_rate": 4.190936756870013e-07, "loss": 0.2523, "step": 32515 }, { "epoch": 0.5652105894418468, "grad_norm": 1.9781688644111841, "learning_rate": 4.1906589744460304e-07, "loss": 0.3026, "step": 32516 }, { "epoch": 0.5652279719793496, "grad_norm": 1.7703594627560388, "learning_rate": 4.190381194587287e-07, "loss": 0.305, "step": 32517 }, { "epoch": 0.5652453545168523, "grad_norm": 1.5456197889913128, "learning_rate": 4.1901034172946637e-07, "loss": 0.1796, "step": 32518 }, { "epoch": 0.5652627370543551, "grad_norm": 2.57686825619579, "learning_rate": 4.189825642569041e-07, "loss": 0.3547, "step": 32519 }, { "epoch": 0.565280119591858, "grad_norm": 1.1945111117516227, "learning_rate": 4.189547870411299e-07, "loss": 0.2435, "step": 32520 }, { "epoch": 0.5652975021293608, "grad_norm": 1.8269934747661083, "learning_rate": 4.189270100822319e-07, "loss": 0.3079, "step": 32521 }, { "epoch": 0.5653148846668636, "grad_norm": 1.6523126137890751, "learning_rate": 4.1889923338029783e-07, "loss": 0.1987, "step": 32522 }, { "epoch": 0.5653322672043665, "grad_norm": 2.9367723206088363, "learning_rate": 4.18871456935416e-07, "loss": 0.2658, "step": 32523 }, { "epoch": 0.5653496497418693, "grad_norm": 1.5978469685909236, "learning_rate": 4.188436807476743e-07, "loss": 0.2013, "step": 32524 }, { "epoch": 0.5653670322793721, "grad_norm": 2.738997945653409, "learning_rate": 4.1881590481716096e-07, "loss": 0.3585, "step": 32525 }, { "epoch": 0.565384414816875, "grad_norm": 2.384319366374831, "learning_rate": 4.187881291439638e-07, "loss": 0.3002, "step": 32526 }, { "epoch": 0.5654017973543778, "grad_norm": 1.287339623332723, "learning_rate": 4.1876035372817134e-07, "loss": 0.1978, "step": 32527 }, { "epoch": 0.5654191798918806, "grad_norm": 1.5146451118299344, "learning_rate": 4.187325785698708e-07, "loss": 0.4203, "step": 32528 }, { "epoch": 0.5654365624293834, "grad_norm": 3.059722141893621, "learning_rate": 4.1870480366915086e-07, "loss": 0.2591, "step": 32529 }, { "epoch": 0.5654539449668863, "grad_norm": 1.898554855175349, "learning_rate": 4.186770290260992e-07, "loss": 0.2417, "step": 32530 }, { "epoch": 0.5654713275043891, "grad_norm": 1.2535387074149222, "learning_rate": 4.1864925464080414e-07, "loss": 0.2683, "step": 32531 }, { "epoch": 0.5654887100418919, "grad_norm": 2.222833444680752, "learning_rate": 4.186214805133534e-07, "loss": 0.4149, "step": 32532 }, { "epoch": 0.5655060925793948, "grad_norm": 1.6109840752659428, "learning_rate": 4.185937066438355e-07, "loss": 0.2888, "step": 32533 }, { "epoch": 0.5655234751168976, "grad_norm": 1.4041850298102878, "learning_rate": 4.18565933032338e-07, "loss": 0.2879, "step": 32534 }, { "epoch": 0.5655408576544004, "grad_norm": 1.095659626115941, "learning_rate": 4.185381596789491e-07, "loss": 0.1523, "step": 32535 }, { "epoch": 0.5655582401919033, "grad_norm": 1.2606432073352032, "learning_rate": 4.1851038658375666e-07, "loss": 0.4066, "step": 32536 }, { "epoch": 0.5655756227294061, "grad_norm": 1.329287877543337, "learning_rate": 4.1848261374684894e-07, "loss": 0.317, "step": 32537 }, { "epoch": 0.5655930052669088, "grad_norm": 0.9835383028599439, "learning_rate": 4.1845484116831384e-07, "loss": 0.1686, "step": 32538 }, { "epoch": 0.5656103878044116, "grad_norm": 2.490644942877271, "learning_rate": 4.184270688482397e-07, "loss": 0.2703, "step": 32539 }, { "epoch": 0.5656277703419145, "grad_norm": 1.218851003416625, "learning_rate": 4.183992967867141e-07, "loss": 0.2035, "step": 32540 }, { "epoch": 0.5656451528794173, "grad_norm": 1.3669454119134032, "learning_rate": 4.1837152498382514e-07, "loss": 0.3469, "step": 32541 }, { "epoch": 0.5656625354169201, "grad_norm": 3.3328141862183873, "learning_rate": 4.183437534396611e-07, "loss": 0.355, "step": 32542 }, { "epoch": 0.565679917954423, "grad_norm": 1.8740797822885162, "learning_rate": 4.183159821543098e-07, "loss": 0.2489, "step": 32543 }, { "epoch": 0.5656973004919258, "grad_norm": 4.593595810460072, "learning_rate": 4.1828821112785924e-07, "loss": 0.2991, "step": 32544 }, { "epoch": 0.5657146830294286, "grad_norm": 1.645867286619004, "learning_rate": 4.182604403603976e-07, "loss": 0.2676, "step": 32545 }, { "epoch": 0.5657320655669315, "grad_norm": 1.9713672794621109, "learning_rate": 4.182326698520129e-07, "loss": 0.1978, "step": 32546 }, { "epoch": 0.5657494481044343, "grad_norm": 1.5140076009578793, "learning_rate": 4.182048996027929e-07, "loss": 0.2438, "step": 32547 }, { "epoch": 0.5657668306419371, "grad_norm": 1.4989768658193543, "learning_rate": 4.181771296128259e-07, "loss": 0.2596, "step": 32548 }, { "epoch": 0.5657842131794399, "grad_norm": 2.0858292924873427, "learning_rate": 4.181493598821997e-07, "loss": 0.3439, "step": 32549 }, { "epoch": 0.5658015957169428, "grad_norm": 1.245185177573337, "learning_rate": 4.181215904110026e-07, "loss": 0.2443, "step": 32550 }, { "epoch": 0.5658189782544456, "grad_norm": 1.63613556169463, "learning_rate": 4.1809382119932236e-07, "loss": 0.1559, "step": 32551 }, { "epoch": 0.5658363607919484, "grad_norm": 1.2204760334433358, "learning_rate": 4.1806605224724723e-07, "loss": 0.166, "step": 32552 }, { "epoch": 0.5658537433294513, "grad_norm": 1.3629828029609075, "learning_rate": 4.1803828355486484e-07, "loss": 0.2502, "step": 32553 }, { "epoch": 0.5658711258669541, "grad_norm": 1.0589610204640019, "learning_rate": 4.180105151222635e-07, "loss": 0.2562, "step": 32554 }, { "epoch": 0.5658885084044569, "grad_norm": 1.3262983595384201, "learning_rate": 4.179827469495312e-07, "loss": 0.238, "step": 32555 }, { "epoch": 0.5659058909419598, "grad_norm": 1.3695627254240736, "learning_rate": 4.179549790367559e-07, "loss": 0.149, "step": 32556 }, { "epoch": 0.5659232734794626, "grad_norm": 1.5366969542705833, "learning_rate": 4.1792721138402555e-07, "loss": 0.2968, "step": 32557 }, { "epoch": 0.5659406560169653, "grad_norm": 1.048473541081515, "learning_rate": 4.1789944399142855e-07, "loss": 0.2596, "step": 32558 }, { "epoch": 0.5659580385544681, "grad_norm": 1.266923359547117, "learning_rate": 4.1787167685905236e-07, "loss": 0.2906, "step": 32559 }, { "epoch": 0.565975421091971, "grad_norm": 2.2295871191070358, "learning_rate": 4.1784390998698527e-07, "loss": 0.3372, "step": 32560 }, { "epoch": 0.5659928036294738, "grad_norm": 1.3903830068468341, "learning_rate": 4.178161433753151e-07, "loss": 0.274, "step": 32561 }, { "epoch": 0.5660101861669766, "grad_norm": 2.2589345505996943, "learning_rate": 4.1778837702413014e-07, "loss": 0.4003, "step": 32562 }, { "epoch": 0.5660275687044795, "grad_norm": 2.1587732520998113, "learning_rate": 4.177606109335182e-07, "loss": 0.2513, "step": 32563 }, { "epoch": 0.5660449512419823, "grad_norm": 2.1692231576371186, "learning_rate": 4.1773284510356737e-07, "loss": 0.204, "step": 32564 }, { "epoch": 0.5660623337794851, "grad_norm": 1.90666187802321, "learning_rate": 4.177050795343658e-07, "loss": 0.2839, "step": 32565 }, { "epoch": 0.566079716316988, "grad_norm": 1.583607481036854, "learning_rate": 4.176773142260011e-07, "loss": 0.2401, "step": 32566 }, { "epoch": 0.5660970988544908, "grad_norm": 1.6769277138914382, "learning_rate": 4.176495491785616e-07, "loss": 0.2052, "step": 32567 }, { "epoch": 0.5661144813919936, "grad_norm": 3.6642476054505475, "learning_rate": 4.176217843921352e-07, "loss": 0.2611, "step": 32568 }, { "epoch": 0.5661318639294964, "grad_norm": 1.2243726473274796, "learning_rate": 4.1759401986680983e-07, "loss": 0.2272, "step": 32569 }, { "epoch": 0.5661492464669993, "grad_norm": 1.5473364408995187, "learning_rate": 4.175662556026736e-07, "loss": 0.3582, "step": 32570 }, { "epoch": 0.5661666290045021, "grad_norm": 1.2279782688398855, "learning_rate": 4.1753849159981457e-07, "loss": 0.3687, "step": 32571 }, { "epoch": 0.5661840115420049, "grad_norm": 1.5304358506136344, "learning_rate": 4.1751072785832054e-07, "loss": 0.317, "step": 32572 }, { "epoch": 0.5662013940795078, "grad_norm": 1.3835363838057435, "learning_rate": 4.1748296437827963e-07, "loss": 0.1888, "step": 32573 }, { "epoch": 0.5662187766170106, "grad_norm": 2.0358187912663355, "learning_rate": 4.1745520115977973e-07, "loss": 0.2629, "step": 32574 }, { "epoch": 0.5662361591545134, "grad_norm": 2.2083116536420575, "learning_rate": 4.1742743820290904e-07, "loss": 0.2734, "step": 32575 }, { "epoch": 0.5662535416920162, "grad_norm": 1.7706095720220254, "learning_rate": 4.173996755077554e-07, "loss": 0.2023, "step": 32576 }, { "epoch": 0.5662709242295191, "grad_norm": 2.180953587345552, "learning_rate": 4.17371913074407e-07, "loss": 0.3249, "step": 32577 }, { "epoch": 0.5662883067670218, "grad_norm": 1.033413824379207, "learning_rate": 4.1734415090295144e-07, "loss": 0.2059, "step": 32578 }, { "epoch": 0.5663056893045246, "grad_norm": 3.145866159030635, "learning_rate": 4.173163889934771e-07, "loss": 0.4589, "step": 32579 }, { "epoch": 0.5663230718420275, "grad_norm": 1.3726720899214906, "learning_rate": 4.1728862734607166e-07, "loss": 0.2495, "step": 32580 }, { "epoch": 0.5663404543795303, "grad_norm": 1.5203919397554104, "learning_rate": 4.1726086596082344e-07, "loss": 0.278, "step": 32581 }, { "epoch": 0.5663578369170331, "grad_norm": 1.2729586626537193, "learning_rate": 4.172331048378201e-07, "loss": 0.124, "step": 32582 }, { "epoch": 0.566375219454536, "grad_norm": 1.5614981892799327, "learning_rate": 4.172053439771499e-07, "loss": 0.2232, "step": 32583 }, { "epoch": 0.5663926019920388, "grad_norm": 1.9563017926866872, "learning_rate": 4.171775833789009e-07, "loss": 0.4512, "step": 32584 }, { "epoch": 0.5664099845295416, "grad_norm": 1.5650971182777038, "learning_rate": 4.171498230431608e-07, "loss": 0.2267, "step": 32585 }, { "epoch": 0.5664273670670444, "grad_norm": 0.8265506469955276, "learning_rate": 4.1712206297001756e-07, "loss": 0.3136, "step": 32586 }, { "epoch": 0.5664447496045473, "grad_norm": 1.757925144517939, "learning_rate": 4.170943031595594e-07, "loss": 0.3482, "step": 32587 }, { "epoch": 0.5664621321420501, "grad_norm": 3.468087107797571, "learning_rate": 4.170665436118742e-07, "loss": 0.2021, "step": 32588 }, { "epoch": 0.5664795146795529, "grad_norm": 1.4136042138984026, "learning_rate": 4.1703878432704997e-07, "loss": 0.3227, "step": 32589 }, { "epoch": 0.5664968972170558, "grad_norm": 1.3815663240124043, "learning_rate": 4.1701102530517487e-07, "loss": 0.3206, "step": 32590 }, { "epoch": 0.5665142797545586, "grad_norm": 2.32664618399301, "learning_rate": 4.169832665463365e-07, "loss": 0.3274, "step": 32591 }, { "epoch": 0.5665316622920614, "grad_norm": 2.172611804888409, "learning_rate": 4.16955508050623e-07, "loss": 0.3858, "step": 32592 }, { "epoch": 0.5665490448295643, "grad_norm": 1.0172901362273707, "learning_rate": 4.1692774981812244e-07, "loss": 0.2459, "step": 32593 }, { "epoch": 0.5665664273670671, "grad_norm": 1.6458325006112315, "learning_rate": 4.1689999184892273e-07, "loss": 0.299, "step": 32594 }, { "epoch": 0.5665838099045699, "grad_norm": 1.5837292819072273, "learning_rate": 4.168722341431119e-07, "loss": 0.2307, "step": 32595 }, { "epoch": 0.5666011924420727, "grad_norm": 2.029275502607148, "learning_rate": 4.1684447670077805e-07, "loss": 0.2579, "step": 32596 }, { "epoch": 0.5666185749795756, "grad_norm": 1.2527135753994476, "learning_rate": 4.168167195220088e-07, "loss": 0.3521, "step": 32597 }, { "epoch": 0.5666359575170783, "grad_norm": 1.855560221616296, "learning_rate": 4.1678896260689244e-07, "loss": 0.4816, "step": 32598 }, { "epoch": 0.5666533400545811, "grad_norm": 1.5438460160066383, "learning_rate": 4.1676120595551686e-07, "loss": 0.2372, "step": 32599 }, { "epoch": 0.566670722592084, "grad_norm": 2.294781806994332, "learning_rate": 4.1673344956796986e-07, "loss": 0.2005, "step": 32600 }, { "epoch": 0.5666881051295868, "grad_norm": 1.3017812126156103, "learning_rate": 4.167056934443397e-07, "loss": 0.425, "step": 32601 }, { "epoch": 0.5667054876670896, "grad_norm": 2.0754173780216627, "learning_rate": 4.166779375847141e-07, "loss": 0.3568, "step": 32602 }, { "epoch": 0.5667228702045924, "grad_norm": 1.5606718378560818, "learning_rate": 4.1665018198918147e-07, "loss": 0.2357, "step": 32603 }, { "epoch": 0.5667402527420953, "grad_norm": 0.9343698624038176, "learning_rate": 4.1662242665782934e-07, "loss": 0.3539, "step": 32604 }, { "epoch": 0.5667576352795981, "grad_norm": 1.595879923081436, "learning_rate": 4.1659467159074566e-07, "loss": 0.3064, "step": 32605 }, { "epoch": 0.5667750178171009, "grad_norm": 1.1830753964476306, "learning_rate": 4.165669167880187e-07, "loss": 0.2316, "step": 32606 }, { "epoch": 0.5667924003546038, "grad_norm": 1.0187637997511585, "learning_rate": 4.1653916224973615e-07, "loss": 0.168, "step": 32607 }, { "epoch": 0.5668097828921066, "grad_norm": 1.217322146516336, "learning_rate": 4.165114079759863e-07, "loss": 0.2686, "step": 32608 }, { "epoch": 0.5668271654296094, "grad_norm": 2.951778134458903, "learning_rate": 4.164836539668569e-07, "loss": 0.3269, "step": 32609 }, { "epoch": 0.5668445479671123, "grad_norm": 2.2992502671632833, "learning_rate": 4.16455900222436e-07, "loss": 0.2864, "step": 32610 }, { "epoch": 0.5668619305046151, "grad_norm": 1.933942261389956, "learning_rate": 4.1642814674281134e-07, "loss": 0.2464, "step": 32611 }, { "epoch": 0.5668793130421179, "grad_norm": 1.70057489907278, "learning_rate": 4.1640039352807114e-07, "loss": 0.2454, "step": 32612 }, { "epoch": 0.5668966955796207, "grad_norm": 2.0824575726367907, "learning_rate": 4.1637264057830323e-07, "loss": 0.3427, "step": 32613 }, { "epoch": 0.5669140781171236, "grad_norm": 1.9749970359210414, "learning_rate": 4.1634488789359575e-07, "loss": 0.2341, "step": 32614 }, { "epoch": 0.5669314606546264, "grad_norm": 1.0123233209029816, "learning_rate": 4.1631713547403664e-07, "loss": 0.2642, "step": 32615 }, { "epoch": 0.5669488431921292, "grad_norm": 1.597278270783873, "learning_rate": 4.1628938331971366e-07, "loss": 0.3508, "step": 32616 }, { "epoch": 0.566966225729632, "grad_norm": 2.064766981731336, "learning_rate": 4.1626163143071476e-07, "loss": 0.2516, "step": 32617 }, { "epoch": 0.5669836082671348, "grad_norm": 1.6461031914466164, "learning_rate": 4.162338798071281e-07, "loss": 0.2819, "step": 32618 }, { "epoch": 0.5670009908046376, "grad_norm": 1.690564092433657, "learning_rate": 4.162061284490415e-07, "loss": 0.3442, "step": 32619 }, { "epoch": 0.5670183733421404, "grad_norm": 1.5719930237205038, "learning_rate": 4.161783773565431e-07, "loss": 0.2614, "step": 32620 }, { "epoch": 0.5670357558796433, "grad_norm": 0.8862135193572487, "learning_rate": 4.161506265297206e-07, "loss": 0.2064, "step": 32621 }, { "epoch": 0.5670531384171461, "grad_norm": 1.9903465975458388, "learning_rate": 4.161228759686624e-07, "loss": 0.2897, "step": 32622 }, { "epoch": 0.5670705209546489, "grad_norm": 1.6711101782634297, "learning_rate": 4.160951256734559e-07, "loss": 0.4362, "step": 32623 }, { "epoch": 0.5670879034921518, "grad_norm": 2.040218230889497, "learning_rate": 4.1606737564418936e-07, "loss": 0.2809, "step": 32624 }, { "epoch": 0.5671052860296546, "grad_norm": 2.6386068587698466, "learning_rate": 4.1603962588095066e-07, "loss": 0.4179, "step": 32625 }, { "epoch": 0.5671226685671574, "grad_norm": 1.2960380576710384, "learning_rate": 4.160118763838278e-07, "loss": 0.2038, "step": 32626 }, { "epoch": 0.5671400511046603, "grad_norm": 1.776318115952075, "learning_rate": 4.159841271529086e-07, "loss": 0.198, "step": 32627 }, { "epoch": 0.5671574336421631, "grad_norm": 1.8011274512648547, "learning_rate": 4.159563781882814e-07, "loss": 0.3734, "step": 32628 }, { "epoch": 0.5671748161796659, "grad_norm": 1.3978401089672288, "learning_rate": 4.159286294900337e-07, "loss": 0.2333, "step": 32629 }, { "epoch": 0.5671921987171687, "grad_norm": 2.065308504791122, "learning_rate": 4.1590088105825356e-07, "loss": 0.2219, "step": 32630 }, { "epoch": 0.5672095812546716, "grad_norm": 0.919664358944825, "learning_rate": 4.1587313289302907e-07, "loss": 0.2817, "step": 32631 }, { "epoch": 0.5672269637921744, "grad_norm": 2.6229176490556054, "learning_rate": 4.1584538499444817e-07, "loss": 0.2231, "step": 32632 }, { "epoch": 0.5672443463296772, "grad_norm": 1.476322350101828, "learning_rate": 4.158176373625986e-07, "loss": 0.2987, "step": 32633 }, { "epoch": 0.5672617288671801, "grad_norm": 1.933783274572776, "learning_rate": 4.157898899975686e-07, "loss": 0.3687, "step": 32634 }, { "epoch": 0.5672791114046829, "grad_norm": 1.1184256023603918, "learning_rate": 4.1576214289944597e-07, "loss": 0.242, "step": 32635 }, { "epoch": 0.5672964939421857, "grad_norm": 1.449212773673108, "learning_rate": 4.1573439606831844e-07, "loss": 0.3742, "step": 32636 }, { "epoch": 0.5673138764796885, "grad_norm": 1.5093509527118598, "learning_rate": 4.157066495042743e-07, "loss": 0.2046, "step": 32637 }, { "epoch": 0.5673312590171913, "grad_norm": 1.4759779330398222, "learning_rate": 4.156789032074013e-07, "loss": 0.186, "step": 32638 }, { "epoch": 0.5673486415546941, "grad_norm": 2.029400990236752, "learning_rate": 4.156511571777875e-07, "loss": 0.2533, "step": 32639 }, { "epoch": 0.5673660240921969, "grad_norm": 1.0768252258798718, "learning_rate": 4.156234114155208e-07, "loss": 0.4206, "step": 32640 }, { "epoch": 0.5673834066296998, "grad_norm": 1.3490707476674602, "learning_rate": 4.155956659206893e-07, "loss": 0.3373, "step": 32641 }, { "epoch": 0.5674007891672026, "grad_norm": 2.309361620660773, "learning_rate": 4.1556792069338046e-07, "loss": 0.2211, "step": 32642 }, { "epoch": 0.5674181717047054, "grad_norm": 1.8820207540402303, "learning_rate": 4.1554017573368265e-07, "loss": 0.2312, "step": 32643 }, { "epoch": 0.5674355542422083, "grad_norm": 1.630476122853638, "learning_rate": 4.155124310416836e-07, "loss": 0.193, "step": 32644 }, { "epoch": 0.5674529367797111, "grad_norm": 4.05051942247608, "learning_rate": 4.154846866174714e-07, "loss": 0.1287, "step": 32645 }, { "epoch": 0.5674703193172139, "grad_norm": 2.8007500999128356, "learning_rate": 4.154569424611339e-07, "loss": 0.1762, "step": 32646 }, { "epoch": 0.5674877018547168, "grad_norm": 1.2835573326422305, "learning_rate": 4.154291985727593e-07, "loss": 0.1858, "step": 32647 }, { "epoch": 0.5675050843922196, "grad_norm": 1.340279186434209, "learning_rate": 4.15401454952435e-07, "loss": 0.2537, "step": 32648 }, { "epoch": 0.5675224669297224, "grad_norm": 1.4565516243359173, "learning_rate": 4.153737116002494e-07, "loss": 0.2191, "step": 32649 }, { "epoch": 0.5675398494672252, "grad_norm": 1.2154243968066194, "learning_rate": 4.1534596851629004e-07, "loss": 0.1923, "step": 32650 }, { "epoch": 0.5675572320047281, "grad_norm": 1.72212313406095, "learning_rate": 4.1531822570064523e-07, "loss": 0.1873, "step": 32651 }, { "epoch": 0.5675746145422309, "grad_norm": 1.3814481826429659, "learning_rate": 4.1529048315340266e-07, "loss": 0.1831, "step": 32652 }, { "epoch": 0.5675919970797337, "grad_norm": 1.6412115181184024, "learning_rate": 4.152627408746505e-07, "loss": 0.2826, "step": 32653 }, { "epoch": 0.5676093796172366, "grad_norm": 2.547807592713852, "learning_rate": 4.1523499886447644e-07, "loss": 0.1904, "step": 32654 }, { "epoch": 0.5676267621547394, "grad_norm": 1.2418677163862548, "learning_rate": 4.152072571229684e-07, "loss": 0.2669, "step": 32655 }, { "epoch": 0.5676441446922422, "grad_norm": 1.284672443048531, "learning_rate": 4.1517951565021455e-07, "loss": 0.2669, "step": 32656 }, { "epoch": 0.5676615272297449, "grad_norm": 2.479589974271175, "learning_rate": 4.151517744463026e-07, "loss": 0.2474, "step": 32657 }, { "epoch": 0.5676789097672478, "grad_norm": 1.468572250003602, "learning_rate": 4.151240335113205e-07, "loss": 0.3888, "step": 32658 }, { "epoch": 0.5676962923047506, "grad_norm": 1.8723587222301143, "learning_rate": 4.150962928453564e-07, "loss": 0.2881, "step": 32659 }, { "epoch": 0.5677136748422534, "grad_norm": 1.8766471269043483, "learning_rate": 4.150685524484979e-07, "loss": 0.3565, "step": 32660 }, { "epoch": 0.5677310573797563, "grad_norm": 1.3379598859641282, "learning_rate": 4.150408123208331e-07, "loss": 0.2447, "step": 32661 }, { "epoch": 0.5677484399172591, "grad_norm": 2.4553214810296558, "learning_rate": 4.150130724624499e-07, "loss": 0.3607, "step": 32662 }, { "epoch": 0.5677658224547619, "grad_norm": 2.112282835497711, "learning_rate": 4.149853328734361e-07, "loss": 0.2337, "step": 32663 }, { "epoch": 0.5677832049922648, "grad_norm": 2.848994262146618, "learning_rate": 4.1495759355387993e-07, "loss": 0.4293, "step": 32664 }, { "epoch": 0.5678005875297676, "grad_norm": 1.451848972482783, "learning_rate": 4.1492985450386906e-07, "loss": 0.267, "step": 32665 }, { "epoch": 0.5678179700672704, "grad_norm": 1.8295203672431857, "learning_rate": 4.1490211572349164e-07, "loss": 0.35, "step": 32666 }, { "epoch": 0.5678353526047732, "grad_norm": 1.5743171090579926, "learning_rate": 4.1487437721283514e-07, "loss": 0.3573, "step": 32667 }, { "epoch": 0.5678527351422761, "grad_norm": 1.5069174469038016, "learning_rate": 4.148466389719879e-07, "loss": 0.1379, "step": 32668 }, { "epoch": 0.5678701176797789, "grad_norm": 2.0659337600245307, "learning_rate": 4.148189010010376e-07, "loss": 0.2018, "step": 32669 }, { "epoch": 0.5678875002172817, "grad_norm": 1.6114050817264987, "learning_rate": 4.1479116330007233e-07, "loss": 0.2388, "step": 32670 }, { "epoch": 0.5679048827547846, "grad_norm": 1.34709741855664, "learning_rate": 4.147634258691799e-07, "loss": 0.2891, "step": 32671 }, { "epoch": 0.5679222652922874, "grad_norm": 1.3707779458243492, "learning_rate": 4.1473568870844856e-07, "loss": 0.491, "step": 32672 }, { "epoch": 0.5679396478297902, "grad_norm": 2.005023980279943, "learning_rate": 4.1470795181796555e-07, "loss": 0.1323, "step": 32673 }, { "epoch": 0.5679570303672931, "grad_norm": 1.1971161420929188, "learning_rate": 4.1468021519781924e-07, "loss": 0.2113, "step": 32674 }, { "epoch": 0.5679744129047959, "grad_norm": 1.3661525236927707, "learning_rate": 4.146524788480974e-07, "loss": 0.1966, "step": 32675 }, { "epoch": 0.5679917954422987, "grad_norm": 1.4436217547408141, "learning_rate": 4.146247427688881e-07, "loss": 0.1883, "step": 32676 }, { "epoch": 0.5680091779798014, "grad_norm": 1.685075606033166, "learning_rate": 4.14597006960279e-07, "loss": 0.2901, "step": 32677 }, { "epoch": 0.5680265605173043, "grad_norm": 1.619893713300677, "learning_rate": 4.145692714223584e-07, "loss": 0.3018, "step": 32678 }, { "epoch": 0.5680439430548071, "grad_norm": 1.5159926979834184, "learning_rate": 4.145415361552139e-07, "loss": 0.329, "step": 32679 }, { "epoch": 0.5680613255923099, "grad_norm": 1.3089832781484556, "learning_rate": 4.1451380115893334e-07, "loss": 0.168, "step": 32680 }, { "epoch": 0.5680787081298128, "grad_norm": 2.0817580182178963, "learning_rate": 4.1448606643360477e-07, "loss": 0.3751, "step": 32681 }, { "epoch": 0.5680960906673156, "grad_norm": 1.4792251845944095, "learning_rate": 4.144583319793162e-07, "loss": 0.2786, "step": 32682 }, { "epoch": 0.5681134732048184, "grad_norm": 1.413132920209057, "learning_rate": 4.1443059779615526e-07, "loss": 0.3236, "step": 32683 }, { "epoch": 0.5681308557423213, "grad_norm": 1.107305921915226, "learning_rate": 4.1440286388421014e-07, "loss": 0.1573, "step": 32684 }, { "epoch": 0.5681482382798241, "grad_norm": 1.3081926274780122, "learning_rate": 4.143751302435687e-07, "loss": 0.3288, "step": 32685 }, { "epoch": 0.5681656208173269, "grad_norm": 2.0577237680794136, "learning_rate": 4.143473968743186e-07, "loss": 0.2427, "step": 32686 }, { "epoch": 0.5681830033548297, "grad_norm": 1.4456871571433925, "learning_rate": 4.14319663776548e-07, "loss": 0.1427, "step": 32687 }, { "epoch": 0.5682003858923326, "grad_norm": 1.7050554075298892, "learning_rate": 4.1429193095034476e-07, "loss": 0.2199, "step": 32688 }, { "epoch": 0.5682177684298354, "grad_norm": 2.0081383366082943, "learning_rate": 4.142641983957965e-07, "loss": 0.2098, "step": 32689 }, { "epoch": 0.5682351509673382, "grad_norm": 0.9885700232061213, "learning_rate": 4.142364661129916e-07, "loss": 0.276, "step": 32690 }, { "epoch": 0.5682525335048411, "grad_norm": 1.7038974836211707, "learning_rate": 4.142087341020177e-07, "loss": 0.3696, "step": 32691 }, { "epoch": 0.5682699160423439, "grad_norm": 0.9801230797230465, "learning_rate": 4.1418100236296257e-07, "loss": 0.1517, "step": 32692 }, { "epoch": 0.5682872985798467, "grad_norm": 1.131127806912068, "learning_rate": 4.141532708959143e-07, "loss": 0.1222, "step": 32693 }, { "epoch": 0.5683046811173496, "grad_norm": 1.220527852764666, "learning_rate": 4.141255397009606e-07, "loss": 0.1552, "step": 32694 }, { "epoch": 0.5683220636548524, "grad_norm": 3.990388257448379, "learning_rate": 4.1409780877818963e-07, "loss": 0.3741, "step": 32695 }, { "epoch": 0.5683394461923552, "grad_norm": 2.8196114655557287, "learning_rate": 4.140700781276891e-07, "loss": 0.2656, "step": 32696 }, { "epoch": 0.5683568287298579, "grad_norm": 1.7655498393496059, "learning_rate": 4.140423477495471e-07, "loss": 0.4051, "step": 32697 }, { "epoch": 0.5683742112673608, "grad_norm": 0.8775584823569935, "learning_rate": 4.1401461764385114e-07, "loss": 0.2835, "step": 32698 }, { "epoch": 0.5683915938048636, "grad_norm": 2.702188943221885, "learning_rate": 4.1398688781068946e-07, "loss": 0.2903, "step": 32699 }, { "epoch": 0.5684089763423664, "grad_norm": 1.3239386700806088, "learning_rate": 4.1395915825014974e-07, "loss": 0.1743, "step": 32700 }, { "epoch": 0.5684263588798693, "grad_norm": 1.513280011873388, "learning_rate": 4.139314289623201e-07, "loss": 0.144, "step": 32701 }, { "epoch": 0.5684437414173721, "grad_norm": 1.6301295973036807, "learning_rate": 4.1390369994728813e-07, "loss": 0.1577, "step": 32702 }, { "epoch": 0.5684611239548749, "grad_norm": 1.6139861078608229, "learning_rate": 4.13875971205142e-07, "loss": 0.2171, "step": 32703 }, { "epoch": 0.5684785064923777, "grad_norm": 1.6183756884258957, "learning_rate": 4.1384824273596956e-07, "loss": 0.3368, "step": 32704 }, { "epoch": 0.5684958890298806, "grad_norm": 1.054516897216752, "learning_rate": 4.1382051453985856e-07, "loss": 0.3144, "step": 32705 }, { "epoch": 0.5685132715673834, "grad_norm": 1.10070402590333, "learning_rate": 4.137927866168968e-07, "loss": 0.1993, "step": 32706 }, { "epoch": 0.5685306541048862, "grad_norm": 2.7781642706788925, "learning_rate": 4.1376505896717245e-07, "loss": 0.4348, "step": 32707 }, { "epoch": 0.5685480366423891, "grad_norm": 2.3972130875772333, "learning_rate": 4.1373733159077314e-07, "loss": 0.3655, "step": 32708 }, { "epoch": 0.5685654191798919, "grad_norm": 1.3499667605898804, "learning_rate": 4.137096044877869e-07, "loss": 0.299, "step": 32709 }, { "epoch": 0.5685828017173947, "grad_norm": 1.872003356546258, "learning_rate": 4.1368187765830174e-07, "loss": 0.2056, "step": 32710 }, { "epoch": 0.5686001842548976, "grad_norm": 1.8001382100738257, "learning_rate": 4.136541511024052e-07, "loss": 0.1639, "step": 32711 }, { "epoch": 0.5686175667924004, "grad_norm": 3.22350501204189, "learning_rate": 4.1362642482018537e-07, "loss": 0.3813, "step": 32712 }, { "epoch": 0.5686349493299032, "grad_norm": 1.5539100303048459, "learning_rate": 4.1359869881173014e-07, "loss": 0.332, "step": 32713 }, { "epoch": 0.568652331867406, "grad_norm": 0.9207351383650759, "learning_rate": 4.135709730771272e-07, "loss": 0.1734, "step": 32714 }, { "epoch": 0.5686697144049089, "grad_norm": 1.3799913678621507, "learning_rate": 4.1354324761646476e-07, "loss": 0.216, "step": 32715 }, { "epoch": 0.5686870969424117, "grad_norm": 1.6774906833982632, "learning_rate": 4.1351552242983054e-07, "loss": 0.3998, "step": 32716 }, { "epoch": 0.5687044794799144, "grad_norm": 1.491095189631324, "learning_rate": 4.134877975173122e-07, "loss": 0.1799, "step": 32717 }, { "epoch": 0.5687218620174173, "grad_norm": 1.1763312891821411, "learning_rate": 4.1346007287899785e-07, "loss": 0.2598, "step": 32718 }, { "epoch": 0.5687392445549201, "grad_norm": 1.4528092382279856, "learning_rate": 4.1343234851497534e-07, "loss": 0.225, "step": 32719 }, { "epoch": 0.5687566270924229, "grad_norm": 2.9295657310973997, "learning_rate": 4.1340462442533254e-07, "loss": 0.2617, "step": 32720 }, { "epoch": 0.5687740096299257, "grad_norm": 1.799098620998247, "learning_rate": 4.1337690061015726e-07, "loss": 0.2413, "step": 32721 }, { "epoch": 0.5687913921674286, "grad_norm": 2.0891894264565614, "learning_rate": 4.133491770695374e-07, "loss": 0.3731, "step": 32722 }, { "epoch": 0.5688087747049314, "grad_norm": 1.2079046733580652, "learning_rate": 4.1332145380356097e-07, "loss": 0.2002, "step": 32723 }, { "epoch": 0.5688261572424342, "grad_norm": 1.7006622840370416, "learning_rate": 4.1329373081231566e-07, "loss": 0.3022, "step": 32724 }, { "epoch": 0.5688435397799371, "grad_norm": 1.3362336312108607, "learning_rate": 4.132660080958893e-07, "loss": 0.3336, "step": 32725 }, { "epoch": 0.5688609223174399, "grad_norm": 2.2575255206502174, "learning_rate": 4.132382856543699e-07, "loss": 0.3146, "step": 32726 }, { "epoch": 0.5688783048549427, "grad_norm": 1.479663890548103, "learning_rate": 4.132105634878452e-07, "loss": 0.1884, "step": 32727 }, { "epoch": 0.5688956873924456, "grad_norm": 1.772233158464077, "learning_rate": 4.131828415964033e-07, "loss": 0.2358, "step": 32728 }, { "epoch": 0.5689130699299484, "grad_norm": 2.5447647424387494, "learning_rate": 4.1315511998013195e-07, "loss": 0.2486, "step": 32729 }, { "epoch": 0.5689304524674512, "grad_norm": 1.049123328084745, "learning_rate": 4.1312739863911895e-07, "loss": 0.1842, "step": 32730 }, { "epoch": 0.568947835004954, "grad_norm": 1.7000289577289496, "learning_rate": 4.13099677573452e-07, "loss": 0.3049, "step": 32731 }, { "epoch": 0.5689652175424569, "grad_norm": 1.8027252762790542, "learning_rate": 4.130719567832193e-07, "loss": 0.2697, "step": 32732 }, { "epoch": 0.5689826000799597, "grad_norm": 1.3409458662375806, "learning_rate": 4.130442362685085e-07, "loss": 0.2571, "step": 32733 }, { "epoch": 0.5689999826174625, "grad_norm": 1.4438875699829483, "learning_rate": 4.1301651602940754e-07, "loss": 0.284, "step": 32734 }, { "epoch": 0.5690173651549654, "grad_norm": 2.1063969892139336, "learning_rate": 4.1298879606600443e-07, "loss": 0.2027, "step": 32735 }, { "epoch": 0.5690347476924682, "grad_norm": 1.455492308174587, "learning_rate": 4.129610763783867e-07, "loss": 0.3233, "step": 32736 }, { "epoch": 0.5690521302299709, "grad_norm": 1.5602277763413102, "learning_rate": 4.129333569666424e-07, "loss": 0.2012, "step": 32737 }, { "epoch": 0.5690695127674738, "grad_norm": 2.355486238080927, "learning_rate": 4.1290563783085934e-07, "loss": 0.2291, "step": 32738 }, { "epoch": 0.5690868953049766, "grad_norm": 0.7907841885053698, "learning_rate": 4.1287791897112533e-07, "loss": 0.2418, "step": 32739 }, { "epoch": 0.5691042778424794, "grad_norm": 1.6542178714530096, "learning_rate": 4.1285020038752846e-07, "loss": 0.2308, "step": 32740 }, { "epoch": 0.5691216603799822, "grad_norm": 0.9651213509506384, "learning_rate": 4.1282248208015623e-07, "loss": 0.1923, "step": 32741 }, { "epoch": 0.5691390429174851, "grad_norm": 1.2626450836660656, "learning_rate": 4.1279476404909694e-07, "loss": 0.2193, "step": 32742 }, { "epoch": 0.5691564254549879, "grad_norm": 1.2589523138444705, "learning_rate": 4.1276704629443817e-07, "loss": 0.1118, "step": 32743 }, { "epoch": 0.5691738079924907, "grad_norm": 1.6856383069321963, "learning_rate": 4.127393288162677e-07, "loss": 0.2108, "step": 32744 }, { "epoch": 0.5691911905299936, "grad_norm": 1.821181681935426, "learning_rate": 4.1271161161467335e-07, "loss": 0.2854, "step": 32745 }, { "epoch": 0.5692085730674964, "grad_norm": 1.811386273329825, "learning_rate": 4.1268389468974317e-07, "loss": 0.3566, "step": 32746 }, { "epoch": 0.5692259556049992, "grad_norm": 1.138192176918698, "learning_rate": 4.126561780415649e-07, "loss": 0.3176, "step": 32747 }, { "epoch": 0.569243338142502, "grad_norm": 2.535304708010275, "learning_rate": 4.126284616702266e-07, "loss": 0.2458, "step": 32748 }, { "epoch": 0.5692607206800049, "grad_norm": 3.367636645794059, "learning_rate": 4.126007455758159e-07, "loss": 0.3698, "step": 32749 }, { "epoch": 0.5692781032175077, "grad_norm": 0.920299770203902, "learning_rate": 4.125730297584205e-07, "loss": 0.231, "step": 32750 }, { "epoch": 0.5692954857550105, "grad_norm": 1.4999855782080829, "learning_rate": 4.1254531421812853e-07, "loss": 0.2506, "step": 32751 }, { "epoch": 0.5693128682925134, "grad_norm": 1.9097355796423627, "learning_rate": 4.125175989550277e-07, "loss": 0.3991, "step": 32752 }, { "epoch": 0.5693302508300162, "grad_norm": 1.2933584972862422, "learning_rate": 4.12489883969206e-07, "loss": 0.1701, "step": 32753 }, { "epoch": 0.569347633367519, "grad_norm": 1.6943105823414408, "learning_rate": 4.1246216926075127e-07, "loss": 0.6876, "step": 32754 }, { "epoch": 0.5693650159050219, "grad_norm": 1.8887533312250702, "learning_rate": 4.1243445482975115e-07, "loss": 0.3241, "step": 32755 }, { "epoch": 0.5693823984425246, "grad_norm": 1.417246994429722, "learning_rate": 4.1240674067629345e-07, "loss": 0.267, "step": 32756 }, { "epoch": 0.5693997809800274, "grad_norm": 1.477772435266147, "learning_rate": 4.123790268004662e-07, "loss": 0.1402, "step": 32757 }, { "epoch": 0.5694171635175302, "grad_norm": 3.2129326827088454, "learning_rate": 4.123513132023572e-07, "loss": 0.1992, "step": 32758 }, { "epoch": 0.5694345460550331, "grad_norm": 1.01544606862259, "learning_rate": 4.123235998820543e-07, "loss": 0.2149, "step": 32759 }, { "epoch": 0.5694519285925359, "grad_norm": 1.1656202722404225, "learning_rate": 4.1229588683964524e-07, "loss": 0.1626, "step": 32760 }, { "epoch": 0.5694693111300387, "grad_norm": 1.8119245566527982, "learning_rate": 4.122681740752182e-07, "loss": 0.4732, "step": 32761 }, { "epoch": 0.5694866936675416, "grad_norm": 2.172071824853958, "learning_rate": 4.122404615888605e-07, "loss": 0.3744, "step": 32762 }, { "epoch": 0.5695040762050444, "grad_norm": 1.3583222987880046, "learning_rate": 4.122127493806603e-07, "loss": 0.1992, "step": 32763 }, { "epoch": 0.5695214587425472, "grad_norm": 2.0021607337829224, "learning_rate": 4.121850374507052e-07, "loss": 0.2076, "step": 32764 }, { "epoch": 0.5695388412800501, "grad_norm": 0.9313397059751578, "learning_rate": 4.1215732579908336e-07, "loss": 0.393, "step": 32765 }, { "epoch": 0.5695562238175529, "grad_norm": 1.707218429546011, "learning_rate": 4.1212961442588234e-07, "loss": 0.2777, "step": 32766 }, { "epoch": 0.5695736063550557, "grad_norm": 2.033527753374727, "learning_rate": 4.1210190333119026e-07, "loss": 0.1791, "step": 32767 }, { "epoch": 0.5695909888925585, "grad_norm": 1.3605219560611337, "learning_rate": 4.120741925150947e-07, "loss": 0.2441, "step": 32768 }, { "epoch": 0.5696083714300614, "grad_norm": 2.2425133933730486, "learning_rate": 4.120464819776835e-07, "loss": 0.1992, "step": 32769 }, { "epoch": 0.5696257539675642, "grad_norm": 1.64065546140537, "learning_rate": 4.120187717190445e-07, "loss": 0.2433, "step": 32770 }, { "epoch": 0.569643136505067, "grad_norm": 1.5960989925592106, "learning_rate": 4.119910617392657e-07, "loss": 0.2208, "step": 32771 }, { "epoch": 0.5696605190425699, "grad_norm": 1.3847538536504767, "learning_rate": 4.119633520384346e-07, "loss": 0.1482, "step": 32772 }, { "epoch": 0.5696779015800727, "grad_norm": 4.725123579608113, "learning_rate": 4.1193564261663955e-07, "loss": 0.2163, "step": 32773 }, { "epoch": 0.5696952841175755, "grad_norm": 1.46190590441961, "learning_rate": 4.119079334739679e-07, "loss": 0.1219, "step": 32774 }, { "epoch": 0.5697126666550784, "grad_norm": 1.0993022386752695, "learning_rate": 4.1188022461050754e-07, "loss": 0.239, "step": 32775 }, { "epoch": 0.5697300491925811, "grad_norm": 1.076364152656496, "learning_rate": 4.1185251602634653e-07, "loss": 0.3066, "step": 32776 }, { "epoch": 0.5697474317300839, "grad_norm": 1.0289520925972506, "learning_rate": 4.1182480772157245e-07, "loss": 0.1759, "step": 32777 }, { "epoch": 0.5697648142675867, "grad_norm": 1.4821793591398014, "learning_rate": 4.1179709969627324e-07, "loss": 0.2899, "step": 32778 }, { "epoch": 0.5697821968050896, "grad_norm": 1.122786534728332, "learning_rate": 4.1176939195053683e-07, "loss": 0.2484, "step": 32779 }, { "epoch": 0.5697995793425924, "grad_norm": 1.9610703277054933, "learning_rate": 4.117416844844509e-07, "loss": 0.1463, "step": 32780 }, { "epoch": 0.5698169618800952, "grad_norm": 1.2467161956595691, "learning_rate": 4.117139772981031e-07, "loss": 0.1503, "step": 32781 }, { "epoch": 0.5698343444175981, "grad_norm": 1.2064102323511783, "learning_rate": 4.116862703915816e-07, "loss": 0.3074, "step": 32782 }, { "epoch": 0.5698517269551009, "grad_norm": 1.5699064872466602, "learning_rate": 4.1165856376497393e-07, "loss": 0.1877, "step": 32783 }, { "epoch": 0.5698691094926037, "grad_norm": 2.494455919953402, "learning_rate": 4.1163085741836813e-07, "loss": 0.2611, "step": 32784 }, { "epoch": 0.5698864920301066, "grad_norm": 1.0905533813164767, "learning_rate": 4.1160315135185193e-07, "loss": 0.2, "step": 32785 }, { "epoch": 0.5699038745676094, "grad_norm": 1.3575177038379125, "learning_rate": 4.115754455655132e-07, "loss": 0.1459, "step": 32786 }, { "epoch": 0.5699212571051122, "grad_norm": 1.8829665290066577, "learning_rate": 4.115477400594395e-07, "loss": 0.2077, "step": 32787 }, { "epoch": 0.569938639642615, "grad_norm": 2.479710926596314, "learning_rate": 4.1152003483371895e-07, "loss": 0.2473, "step": 32788 }, { "epoch": 0.5699560221801179, "grad_norm": 1.546505432907791, "learning_rate": 4.114923298884391e-07, "loss": 0.2099, "step": 32789 }, { "epoch": 0.5699734047176207, "grad_norm": 4.57024371375952, "learning_rate": 4.1146462522368806e-07, "loss": 0.2182, "step": 32790 }, { "epoch": 0.5699907872551235, "grad_norm": 1.949492831469282, "learning_rate": 4.114369208395534e-07, "loss": 0.2708, "step": 32791 }, { "epoch": 0.5700081697926264, "grad_norm": 1.6102102133998064, "learning_rate": 4.114092167361233e-07, "loss": 0.179, "step": 32792 }, { "epoch": 0.5700255523301292, "grad_norm": 1.792181294152357, "learning_rate": 4.11381512913485e-07, "loss": 0.2705, "step": 32793 }, { "epoch": 0.570042934867632, "grad_norm": 1.5788843982889966, "learning_rate": 4.1135380937172664e-07, "loss": 0.2056, "step": 32794 }, { "epoch": 0.5700603174051349, "grad_norm": 1.816185749966821, "learning_rate": 4.1132610611093593e-07, "loss": 0.4026, "step": 32795 }, { "epoch": 0.5700776999426376, "grad_norm": 2.5741200804228983, "learning_rate": 4.1129840313120086e-07, "loss": 0.2764, "step": 32796 }, { "epoch": 0.5700950824801404, "grad_norm": 0.9837716126178302, "learning_rate": 4.112707004326089e-07, "loss": 0.1717, "step": 32797 }, { "epoch": 0.5701124650176432, "grad_norm": 1.3719593746332752, "learning_rate": 4.112429980152484e-07, "loss": 0.4576, "step": 32798 }, { "epoch": 0.5701298475551461, "grad_norm": 1.1609666499532387, "learning_rate": 4.112152958792066e-07, "loss": 0.3849, "step": 32799 }, { "epoch": 0.5701472300926489, "grad_norm": 1.8353980565576806, "learning_rate": 4.111875940245715e-07, "loss": 0.2344, "step": 32800 }, { "epoch": 0.5701646126301517, "grad_norm": 2.7549231341918303, "learning_rate": 4.11159892451431e-07, "loss": 0.3032, "step": 32801 }, { "epoch": 0.5701819951676546, "grad_norm": 2.7804289906802295, "learning_rate": 4.1113219115987287e-07, "loss": 0.2294, "step": 32802 }, { "epoch": 0.5701993777051574, "grad_norm": 1.8858891327053493, "learning_rate": 4.111044901499847e-07, "loss": 0.2703, "step": 32803 }, { "epoch": 0.5702167602426602, "grad_norm": 1.4708842219011056, "learning_rate": 4.1107678942185465e-07, "loss": 0.2939, "step": 32804 }, { "epoch": 0.570234142780163, "grad_norm": 1.7042179530530617, "learning_rate": 4.1104908897557035e-07, "loss": 0.3289, "step": 32805 }, { "epoch": 0.5702515253176659, "grad_norm": 2.177909137098301, "learning_rate": 4.110213888112194e-07, "loss": 0.3456, "step": 32806 }, { "epoch": 0.5702689078551687, "grad_norm": 1.093578445430318, "learning_rate": 4.1099368892888993e-07, "loss": 0.1273, "step": 32807 }, { "epoch": 0.5702862903926715, "grad_norm": 1.9652725797084507, "learning_rate": 4.1096598932866937e-07, "loss": 0.2794, "step": 32808 }, { "epoch": 0.5703036729301744, "grad_norm": 1.8530032580038527, "learning_rate": 4.109382900106459e-07, "loss": 0.2834, "step": 32809 }, { "epoch": 0.5703210554676772, "grad_norm": 1.1002055140305866, "learning_rate": 4.1091059097490717e-07, "loss": 0.1728, "step": 32810 }, { "epoch": 0.57033843800518, "grad_norm": 1.3303184436752868, "learning_rate": 4.10882892221541e-07, "loss": 0.2287, "step": 32811 }, { "epoch": 0.5703558205426829, "grad_norm": 1.9526618635610498, "learning_rate": 4.1085519375063493e-07, "loss": 0.3438, "step": 32812 }, { "epoch": 0.5703732030801857, "grad_norm": 1.437215979412624, "learning_rate": 4.1082749556227704e-07, "loss": 0.4364, "step": 32813 }, { "epoch": 0.5703905856176885, "grad_norm": 1.1905593542716797, "learning_rate": 4.1079979765655493e-07, "loss": 0.2026, "step": 32814 }, { "epoch": 0.5704079681551913, "grad_norm": 2.1679244426479873, "learning_rate": 4.1077210003355655e-07, "loss": 0.2427, "step": 32815 }, { "epoch": 0.5704253506926941, "grad_norm": 2.521965208951398, "learning_rate": 4.1074440269336954e-07, "loss": 0.2598, "step": 32816 }, { "epoch": 0.5704427332301969, "grad_norm": 2.0842049078138922, "learning_rate": 4.107167056360821e-07, "loss": 0.2163, "step": 32817 }, { "epoch": 0.5704601157676997, "grad_norm": 2.333908498326335, "learning_rate": 4.1068900886178136e-07, "loss": 0.3488, "step": 32818 }, { "epoch": 0.5704774983052026, "grad_norm": 1.489588047951516, "learning_rate": 4.106613123705556e-07, "loss": 0.2732, "step": 32819 }, { "epoch": 0.5704948808427054, "grad_norm": 1.3742957975132466, "learning_rate": 4.1063361616249227e-07, "loss": 0.1618, "step": 32820 }, { "epoch": 0.5705122633802082, "grad_norm": 3.673699022953233, "learning_rate": 4.106059202376795e-07, "loss": 0.2972, "step": 32821 }, { "epoch": 0.570529645917711, "grad_norm": 3.2894399100332596, "learning_rate": 4.105782245962047e-07, "loss": 0.2652, "step": 32822 }, { "epoch": 0.5705470284552139, "grad_norm": 1.1441948105053477, "learning_rate": 4.10550529238156e-07, "loss": 0.2146, "step": 32823 }, { "epoch": 0.5705644109927167, "grad_norm": 1.5019403223521692, "learning_rate": 4.1052283416362115e-07, "loss": 0.2042, "step": 32824 }, { "epoch": 0.5705817935302195, "grad_norm": 1.5305203067809177, "learning_rate": 4.104951393726877e-07, "loss": 0.2142, "step": 32825 }, { "epoch": 0.5705991760677224, "grad_norm": 1.5831193570770472, "learning_rate": 4.1046744486544346e-07, "loss": 0.1931, "step": 32826 }, { "epoch": 0.5706165586052252, "grad_norm": 1.3997454514981835, "learning_rate": 4.1043975064197634e-07, "loss": 0.3261, "step": 32827 }, { "epoch": 0.570633941142728, "grad_norm": 0.9525314009581255, "learning_rate": 4.10412056702374e-07, "loss": 0.1941, "step": 32828 }, { "epoch": 0.5706513236802309, "grad_norm": 1.5454201363697406, "learning_rate": 4.103843630467244e-07, "loss": 0.2276, "step": 32829 }, { "epoch": 0.5706687062177337, "grad_norm": 1.8159671057790305, "learning_rate": 4.103566696751153e-07, "loss": 0.2458, "step": 32830 }, { "epoch": 0.5706860887552365, "grad_norm": 1.3705770452249755, "learning_rate": 4.1032897658763423e-07, "loss": 0.2122, "step": 32831 }, { "epoch": 0.5707034712927394, "grad_norm": 1.6316671370296465, "learning_rate": 4.1030128378436914e-07, "loss": 0.1913, "step": 32832 }, { "epoch": 0.5707208538302422, "grad_norm": 1.8801419990700672, "learning_rate": 4.1027359126540775e-07, "loss": 0.2418, "step": 32833 }, { "epoch": 0.570738236367745, "grad_norm": 1.4924645969522898, "learning_rate": 4.102458990308378e-07, "loss": 0.2061, "step": 32834 }, { "epoch": 0.5707556189052478, "grad_norm": 1.2628374023496651, "learning_rate": 4.102182070807472e-07, "loss": 0.3446, "step": 32835 }, { "epoch": 0.5707730014427506, "grad_norm": 3.011693448510476, "learning_rate": 4.1019051541522377e-07, "loss": 0.2233, "step": 32836 }, { "epoch": 0.5707903839802534, "grad_norm": 2.2506644016677195, "learning_rate": 4.1016282403435496e-07, "loss": 0.3207, "step": 32837 }, { "epoch": 0.5708077665177562, "grad_norm": 1.5081880961498284, "learning_rate": 4.101351329382288e-07, "loss": 0.3168, "step": 32838 }, { "epoch": 0.570825149055259, "grad_norm": 1.6667266449561027, "learning_rate": 4.101074421269329e-07, "loss": 0.4407, "step": 32839 }, { "epoch": 0.5708425315927619, "grad_norm": 2.292821566602544, "learning_rate": 4.100797516005552e-07, "loss": 0.2113, "step": 32840 }, { "epoch": 0.5708599141302647, "grad_norm": 2.2501460680317673, "learning_rate": 4.100520613591833e-07, "loss": 0.1772, "step": 32841 }, { "epoch": 0.5708772966677675, "grad_norm": 1.7948611026159143, "learning_rate": 4.1002437140290513e-07, "loss": 0.2413, "step": 32842 }, { "epoch": 0.5708946792052704, "grad_norm": 4.621991845319526, "learning_rate": 4.0999668173180854e-07, "loss": 0.3342, "step": 32843 }, { "epoch": 0.5709120617427732, "grad_norm": 1.3347964986526082, "learning_rate": 4.0996899234598094e-07, "loss": 0.2503, "step": 32844 }, { "epoch": 0.570929444280276, "grad_norm": 0.891270728546516, "learning_rate": 4.099413032455102e-07, "loss": 0.1281, "step": 32845 }, { "epoch": 0.5709468268177789, "grad_norm": 1.962351507911471, "learning_rate": 4.099136144304843e-07, "loss": 0.1489, "step": 32846 }, { "epoch": 0.5709642093552817, "grad_norm": 2.9177630549067124, "learning_rate": 4.098859259009907e-07, "loss": 0.2907, "step": 32847 }, { "epoch": 0.5709815918927845, "grad_norm": 2.173421568076805, "learning_rate": 4.0985823765711747e-07, "loss": 0.3504, "step": 32848 }, { "epoch": 0.5709989744302874, "grad_norm": 1.4403854136120453, "learning_rate": 4.0983054969895226e-07, "loss": 0.3344, "step": 32849 }, { "epoch": 0.5710163569677902, "grad_norm": 2.380594668812976, "learning_rate": 4.098028620265828e-07, "loss": 0.2817, "step": 32850 }, { "epoch": 0.571033739505293, "grad_norm": 3.5429593360494422, "learning_rate": 4.097751746400966e-07, "loss": 0.2186, "step": 32851 }, { "epoch": 0.5710511220427958, "grad_norm": 1.1680647720427781, "learning_rate": 4.0974748753958176e-07, "loss": 0.1871, "step": 32852 }, { "epoch": 0.5710685045802987, "grad_norm": 2.158851280645533, "learning_rate": 4.097198007251259e-07, "loss": 0.3193, "step": 32853 }, { "epoch": 0.5710858871178015, "grad_norm": 2.4666138442159173, "learning_rate": 4.096921141968169e-07, "loss": 0.2006, "step": 32854 }, { "epoch": 0.5711032696553043, "grad_norm": 1.5607382854807361, "learning_rate": 4.096644279547425e-07, "loss": 0.2362, "step": 32855 }, { "epoch": 0.571120652192807, "grad_norm": 0.9666830404975385, "learning_rate": 4.096367419989901e-07, "loss": 0.1808, "step": 32856 }, { "epoch": 0.5711380347303099, "grad_norm": 1.43258729055172, "learning_rate": 4.0960905632964785e-07, "loss": 0.1537, "step": 32857 }, { "epoch": 0.5711554172678127, "grad_norm": 1.4725378183739553, "learning_rate": 4.0958137094680346e-07, "loss": 0.2623, "step": 32858 }, { "epoch": 0.5711727998053155, "grad_norm": 2.013416428889322, "learning_rate": 4.095536858505444e-07, "loss": 0.2037, "step": 32859 }, { "epoch": 0.5711901823428184, "grad_norm": 1.9385939887394736, "learning_rate": 4.095260010409587e-07, "loss": 0.2553, "step": 32860 }, { "epoch": 0.5712075648803212, "grad_norm": 1.0121562912918418, "learning_rate": 4.0949831651813396e-07, "loss": 0.1789, "step": 32861 }, { "epoch": 0.571224947417824, "grad_norm": 1.5602402385324667, "learning_rate": 4.0947063228215815e-07, "loss": 0.2172, "step": 32862 }, { "epoch": 0.5712423299553269, "grad_norm": 1.892507262605859, "learning_rate": 4.094429483331188e-07, "loss": 0.2101, "step": 32863 }, { "epoch": 0.5712597124928297, "grad_norm": 2.503469831237358, "learning_rate": 4.0941526467110356e-07, "loss": 0.226, "step": 32864 }, { "epoch": 0.5712770950303325, "grad_norm": 1.3589845513848988, "learning_rate": 4.0938758129620044e-07, "loss": 0.2872, "step": 32865 }, { "epoch": 0.5712944775678354, "grad_norm": 2.055992065184642, "learning_rate": 4.09359898208497e-07, "loss": 0.335, "step": 32866 }, { "epoch": 0.5713118601053382, "grad_norm": 2.063842558559378, "learning_rate": 4.0933221540808106e-07, "loss": 0.2421, "step": 32867 }, { "epoch": 0.571329242642841, "grad_norm": 1.2487980722247687, "learning_rate": 4.0930453289504047e-07, "loss": 0.2504, "step": 32868 }, { "epoch": 0.5713466251803438, "grad_norm": 2.4371738462160595, "learning_rate": 4.092768506694628e-07, "loss": 0.2308, "step": 32869 }, { "epoch": 0.5713640077178467, "grad_norm": 1.966633458895624, "learning_rate": 4.0924916873143565e-07, "loss": 0.1826, "step": 32870 }, { "epoch": 0.5713813902553495, "grad_norm": 1.8001988213451205, "learning_rate": 4.0922148708104713e-07, "loss": 0.2395, "step": 32871 }, { "epoch": 0.5713987727928523, "grad_norm": 1.410220669381881, "learning_rate": 4.091938057183847e-07, "loss": 0.437, "step": 32872 }, { "epoch": 0.5714161553303552, "grad_norm": 1.6577917178045185, "learning_rate": 4.091661246435362e-07, "loss": 0.2443, "step": 32873 }, { "epoch": 0.571433537867858, "grad_norm": 1.4249634354153884, "learning_rate": 4.091384438565896e-07, "loss": 0.2906, "step": 32874 }, { "epoch": 0.5714509204053608, "grad_norm": 3.7935807452352006, "learning_rate": 4.091107633576322e-07, "loss": 0.2167, "step": 32875 }, { "epoch": 0.5714683029428635, "grad_norm": 2.4216782688358127, "learning_rate": 4.090830831467518e-07, "loss": 0.2765, "step": 32876 }, { "epoch": 0.5714856854803664, "grad_norm": 2.1217629310635355, "learning_rate": 4.0905540322403644e-07, "loss": 0.3436, "step": 32877 }, { "epoch": 0.5715030680178692, "grad_norm": 1.5290036735281456, "learning_rate": 4.0902772358957353e-07, "loss": 0.2639, "step": 32878 }, { "epoch": 0.571520450555372, "grad_norm": 1.0455954675229353, "learning_rate": 4.0900004424345106e-07, "loss": 0.1921, "step": 32879 }, { "epoch": 0.5715378330928749, "grad_norm": 1.3254143235695701, "learning_rate": 4.0897236518575654e-07, "loss": 0.1408, "step": 32880 }, { "epoch": 0.5715552156303777, "grad_norm": 2.4055777859601575, "learning_rate": 4.089446864165781e-07, "loss": 0.2867, "step": 32881 }, { "epoch": 0.5715725981678805, "grad_norm": 1.4100323703205964, "learning_rate": 4.0891700793600294e-07, "loss": 0.166, "step": 32882 }, { "epoch": 0.5715899807053834, "grad_norm": 2.484242176154739, "learning_rate": 4.0888932974411903e-07, "loss": 0.4048, "step": 32883 }, { "epoch": 0.5716073632428862, "grad_norm": 1.3849389323100538, "learning_rate": 4.0886165184101407e-07, "loss": 0.144, "step": 32884 }, { "epoch": 0.571624745780389, "grad_norm": 1.3465984469256953, "learning_rate": 4.088339742267759e-07, "loss": 0.2042, "step": 32885 }, { "epoch": 0.5716421283178919, "grad_norm": 2.363659693731677, "learning_rate": 4.0880629690149206e-07, "loss": 0.2935, "step": 32886 }, { "epoch": 0.5716595108553947, "grad_norm": 1.479049717873003, "learning_rate": 4.087786198652505e-07, "loss": 0.2851, "step": 32887 }, { "epoch": 0.5716768933928975, "grad_norm": 1.9933525559043759, "learning_rate": 4.0875094311813874e-07, "loss": 0.2998, "step": 32888 }, { "epoch": 0.5716942759304003, "grad_norm": 1.6195842617882779, "learning_rate": 4.0872326666024446e-07, "loss": 0.4008, "step": 32889 }, { "epoch": 0.5717116584679032, "grad_norm": 1.452327461735911, "learning_rate": 4.0869559049165566e-07, "loss": 0.3277, "step": 32890 }, { "epoch": 0.571729041005406, "grad_norm": 1.49656512613209, "learning_rate": 4.0866791461245985e-07, "loss": 0.2074, "step": 32891 }, { "epoch": 0.5717464235429088, "grad_norm": 1.0009729534510872, "learning_rate": 4.086402390227447e-07, "loss": 0.164, "step": 32892 }, { "epoch": 0.5717638060804117, "grad_norm": 1.0869036302408641, "learning_rate": 4.0861256372259823e-07, "loss": 0.399, "step": 32893 }, { "epoch": 0.5717811886179145, "grad_norm": 1.3118921765095415, "learning_rate": 4.085848887121078e-07, "loss": 0.1678, "step": 32894 }, { "epoch": 0.5717985711554172, "grad_norm": 1.340482325864303, "learning_rate": 4.085572139913613e-07, "loss": 0.3025, "step": 32895 }, { "epoch": 0.57181595369292, "grad_norm": 1.475956540904222, "learning_rate": 4.0852953956044643e-07, "loss": 0.2694, "step": 32896 }, { "epoch": 0.5718333362304229, "grad_norm": 1.2754771537497804, "learning_rate": 4.085018654194508e-07, "loss": 0.2165, "step": 32897 }, { "epoch": 0.5718507187679257, "grad_norm": 1.427727354254667, "learning_rate": 4.084741915684624e-07, "loss": 0.4477, "step": 32898 }, { "epoch": 0.5718681013054285, "grad_norm": 1.5082482841747336, "learning_rate": 4.0844651800756877e-07, "loss": 0.4421, "step": 32899 }, { "epoch": 0.5718854838429314, "grad_norm": 1.6397971673545406, "learning_rate": 4.084188447368576e-07, "loss": 0.3212, "step": 32900 }, { "epoch": 0.5719028663804342, "grad_norm": 1.0098841620555024, "learning_rate": 4.083911717564165e-07, "loss": 0.2637, "step": 32901 }, { "epoch": 0.571920248917937, "grad_norm": 1.2753760080318008, "learning_rate": 4.0836349906633345e-07, "loss": 0.264, "step": 32902 }, { "epoch": 0.5719376314554399, "grad_norm": 1.423981774708543, "learning_rate": 4.0833582666669584e-07, "loss": 0.3939, "step": 32903 }, { "epoch": 0.5719550139929427, "grad_norm": 1.0138114817774027, "learning_rate": 4.0830815455759163e-07, "loss": 0.2604, "step": 32904 }, { "epoch": 0.5719723965304455, "grad_norm": 1.6965536879777514, "learning_rate": 4.082804827391084e-07, "loss": 0.2047, "step": 32905 }, { "epoch": 0.5719897790679483, "grad_norm": 1.7435091672348941, "learning_rate": 4.0825281121133424e-07, "loss": 0.1996, "step": 32906 }, { "epoch": 0.5720071616054512, "grad_norm": 2.064030958198699, "learning_rate": 4.082251399743562e-07, "loss": 0.2553, "step": 32907 }, { "epoch": 0.572024544142954, "grad_norm": 1.6944331633382632, "learning_rate": 4.081974690282624e-07, "loss": 0.3846, "step": 32908 }, { "epoch": 0.5720419266804568, "grad_norm": 4.221627096778339, "learning_rate": 4.0816979837314034e-07, "loss": 0.4603, "step": 32909 }, { "epoch": 0.5720593092179597, "grad_norm": 1.3051386942662766, "learning_rate": 4.0814212800907796e-07, "loss": 0.2077, "step": 32910 }, { "epoch": 0.5720766917554625, "grad_norm": 1.2656792809501225, "learning_rate": 4.081144579361627e-07, "loss": 0.184, "step": 32911 }, { "epoch": 0.5720940742929653, "grad_norm": 1.4725077433705034, "learning_rate": 4.080867881544826e-07, "loss": 0.1924, "step": 32912 }, { "epoch": 0.5721114568304682, "grad_norm": 0.7276250105579452, "learning_rate": 4.080591186641251e-07, "loss": 0.2087, "step": 32913 }, { "epoch": 0.572128839367971, "grad_norm": 2.279445097080777, "learning_rate": 4.08031449465178e-07, "loss": 0.4225, "step": 32914 }, { "epoch": 0.5721462219054737, "grad_norm": 3.969138229073173, "learning_rate": 4.080037805577288e-07, "loss": 0.2649, "step": 32915 }, { "epoch": 0.5721636044429765, "grad_norm": 2.5809984642864183, "learning_rate": 4.0797611194186544e-07, "loss": 0.2481, "step": 32916 }, { "epoch": 0.5721809869804794, "grad_norm": 1.7058972413483824, "learning_rate": 4.0794844361767544e-07, "loss": 0.3215, "step": 32917 }, { "epoch": 0.5721983695179822, "grad_norm": 3.179940900461886, "learning_rate": 4.079207755852468e-07, "loss": 0.2921, "step": 32918 }, { "epoch": 0.572215752055485, "grad_norm": 2.778313577935616, "learning_rate": 4.078931078446669e-07, "loss": 0.3389, "step": 32919 }, { "epoch": 0.5722331345929879, "grad_norm": 1.159488175341978, "learning_rate": 4.0786544039602347e-07, "loss": 0.185, "step": 32920 }, { "epoch": 0.5722505171304907, "grad_norm": 2.2039289704349545, "learning_rate": 4.0783777323940433e-07, "loss": 0.2489, "step": 32921 }, { "epoch": 0.5722678996679935, "grad_norm": 1.2666280913621064, "learning_rate": 4.078101063748971e-07, "loss": 0.2247, "step": 32922 }, { "epoch": 0.5722852822054963, "grad_norm": 1.4499953450329717, "learning_rate": 4.077824398025894e-07, "loss": 0.3097, "step": 32923 }, { "epoch": 0.5723026647429992, "grad_norm": 1.3943271169563178, "learning_rate": 4.077547735225691e-07, "loss": 0.1772, "step": 32924 }, { "epoch": 0.572320047280502, "grad_norm": 4.962084023089376, "learning_rate": 4.0772710753492396e-07, "loss": 0.3987, "step": 32925 }, { "epoch": 0.5723374298180048, "grad_norm": 2.016941155449627, "learning_rate": 4.076994418397413e-07, "loss": 0.3901, "step": 32926 }, { "epoch": 0.5723548123555077, "grad_norm": 1.2038518854473228, "learning_rate": 4.07671776437109e-07, "loss": 0.1994, "step": 32927 }, { "epoch": 0.5723721948930105, "grad_norm": 1.2412396856122376, "learning_rate": 4.076441113271147e-07, "loss": 0.1797, "step": 32928 }, { "epoch": 0.5723895774305133, "grad_norm": 1.4550514093276707, "learning_rate": 4.076164465098463e-07, "loss": 0.229, "step": 32929 }, { "epoch": 0.5724069599680162, "grad_norm": 1.7682250014792371, "learning_rate": 4.0758878198539126e-07, "loss": 0.3138, "step": 32930 }, { "epoch": 0.572424342505519, "grad_norm": 2.194832206837462, "learning_rate": 4.0756111775383747e-07, "loss": 0.2568, "step": 32931 }, { "epoch": 0.5724417250430218, "grad_norm": 1.8516921277013587, "learning_rate": 4.075334538152722e-07, "loss": 0.3236, "step": 32932 }, { "epoch": 0.5724591075805247, "grad_norm": 2.5648925428443645, "learning_rate": 4.075057901697836e-07, "loss": 0.1459, "step": 32933 }, { "epoch": 0.5724764901180275, "grad_norm": 2.933896328630954, "learning_rate": 4.0747812681745903e-07, "loss": 0.372, "step": 32934 }, { "epoch": 0.5724938726555302, "grad_norm": 0.9271018693923251, "learning_rate": 4.074504637583864e-07, "loss": 0.1465, "step": 32935 }, { "epoch": 0.572511255193033, "grad_norm": 1.7991115664251627, "learning_rate": 4.0742280099265313e-07, "loss": 0.2477, "step": 32936 }, { "epoch": 0.5725286377305359, "grad_norm": 1.5167941709159625, "learning_rate": 4.073951385203475e-07, "loss": 0.1954, "step": 32937 }, { "epoch": 0.5725460202680387, "grad_norm": 1.4436964732183573, "learning_rate": 4.0736747634155627e-07, "loss": 0.2655, "step": 32938 }, { "epoch": 0.5725634028055415, "grad_norm": 1.1636716823315005, "learning_rate": 4.073398144563678e-07, "loss": 0.2082, "step": 32939 }, { "epoch": 0.5725807853430444, "grad_norm": 2.591268643539981, "learning_rate": 4.073121528648694e-07, "loss": 0.3494, "step": 32940 }, { "epoch": 0.5725981678805472, "grad_norm": 1.4277121684465577, "learning_rate": 4.072844915671489e-07, "loss": 0.3068, "step": 32941 }, { "epoch": 0.57261555041805, "grad_norm": 1.4841419517783883, "learning_rate": 4.0725683056329406e-07, "loss": 0.2343, "step": 32942 }, { "epoch": 0.5726329329555528, "grad_norm": 2.156807754377386, "learning_rate": 4.072291698533925e-07, "loss": 0.2842, "step": 32943 }, { "epoch": 0.5726503154930557, "grad_norm": 1.5934745387786942, "learning_rate": 4.0720150943753197e-07, "loss": 0.1345, "step": 32944 }, { "epoch": 0.5726676980305585, "grad_norm": 2.414842471092437, "learning_rate": 4.071738493157997e-07, "loss": 0.2155, "step": 32945 }, { "epoch": 0.5726850805680613, "grad_norm": 1.7614753025848304, "learning_rate": 4.0714618948828393e-07, "loss": 0.2089, "step": 32946 }, { "epoch": 0.5727024631055642, "grad_norm": 1.3236828616730822, "learning_rate": 4.0711852995507207e-07, "loss": 0.2534, "step": 32947 }, { "epoch": 0.572719845643067, "grad_norm": 1.040436638986697, "learning_rate": 4.0709087071625165e-07, "loss": 0.2003, "step": 32948 }, { "epoch": 0.5727372281805698, "grad_norm": 1.5012815645506787, "learning_rate": 4.070632117719106e-07, "loss": 0.2937, "step": 32949 }, { "epoch": 0.5727546107180727, "grad_norm": 0.920131903024867, "learning_rate": 4.070355531221367e-07, "loss": 0.2268, "step": 32950 }, { "epoch": 0.5727719932555755, "grad_norm": 0.9494130527467941, "learning_rate": 4.07007894767017e-07, "loss": 0.1691, "step": 32951 }, { "epoch": 0.5727893757930783, "grad_norm": 3.0155744454949875, "learning_rate": 4.0698023670663974e-07, "loss": 0.3714, "step": 32952 }, { "epoch": 0.5728067583305811, "grad_norm": 1.2584958291936887, "learning_rate": 4.069525789410923e-07, "loss": 0.1954, "step": 32953 }, { "epoch": 0.572824140868084, "grad_norm": 1.09629510474194, "learning_rate": 4.0692492147046254e-07, "loss": 0.4249, "step": 32954 }, { "epoch": 0.5728415234055867, "grad_norm": 2.6395737419699206, "learning_rate": 4.068972642948381e-07, "loss": 0.3314, "step": 32955 }, { "epoch": 0.5728589059430895, "grad_norm": 2.179762776601617, "learning_rate": 4.068696074143065e-07, "loss": 0.3157, "step": 32956 }, { "epoch": 0.5728762884805924, "grad_norm": 2.4503703144272517, "learning_rate": 4.068419508289554e-07, "loss": 0.252, "step": 32957 }, { "epoch": 0.5728936710180952, "grad_norm": 1.322845568344284, "learning_rate": 4.068142945388726e-07, "loss": 0.2302, "step": 32958 }, { "epoch": 0.572911053555598, "grad_norm": 1.765016002399659, "learning_rate": 4.0678663854414554e-07, "loss": 0.3411, "step": 32959 }, { "epoch": 0.5729284360931008, "grad_norm": 1.164385968761029, "learning_rate": 4.0675898284486214e-07, "loss": 0.1583, "step": 32960 }, { "epoch": 0.5729458186306037, "grad_norm": 2.487823744838758, "learning_rate": 4.067313274411098e-07, "loss": 0.3472, "step": 32961 }, { "epoch": 0.5729632011681065, "grad_norm": 1.4552955861277137, "learning_rate": 4.067036723329765e-07, "loss": 0.2388, "step": 32962 }, { "epoch": 0.5729805837056093, "grad_norm": 1.423685318370531, "learning_rate": 4.066760175205497e-07, "loss": 0.1929, "step": 32963 }, { "epoch": 0.5729979662431122, "grad_norm": 0.7822323238288766, "learning_rate": 4.0664836300391705e-07, "loss": 0.2279, "step": 32964 }, { "epoch": 0.573015348780615, "grad_norm": 1.7534262924157378, "learning_rate": 4.06620708783166e-07, "loss": 0.2856, "step": 32965 }, { "epoch": 0.5730327313181178, "grad_norm": 0.9570980832209484, "learning_rate": 4.0659305485838457e-07, "loss": 0.332, "step": 32966 }, { "epoch": 0.5730501138556207, "grad_norm": 2.5898692942401724, "learning_rate": 4.065654012296601e-07, "loss": 0.2308, "step": 32967 }, { "epoch": 0.5730674963931235, "grad_norm": 1.9863047171545165, "learning_rate": 4.0653774789708054e-07, "loss": 0.1596, "step": 32968 }, { "epoch": 0.5730848789306263, "grad_norm": 1.8335934639744318, "learning_rate": 4.065100948607335e-07, "loss": 0.1981, "step": 32969 }, { "epoch": 0.5731022614681291, "grad_norm": 1.4284606775733346, "learning_rate": 4.0648244212070637e-07, "loss": 0.3664, "step": 32970 }, { "epoch": 0.573119644005632, "grad_norm": 3.368753840586657, "learning_rate": 4.0645478967708685e-07, "loss": 0.4524, "step": 32971 }, { "epoch": 0.5731370265431348, "grad_norm": 2.705689033649241, "learning_rate": 4.0642713752996285e-07, "loss": 0.3037, "step": 32972 }, { "epoch": 0.5731544090806376, "grad_norm": 1.1208010359677738, "learning_rate": 4.063994856794216e-07, "loss": 0.2901, "step": 32973 }, { "epoch": 0.5731717916181405, "grad_norm": 1.1712872566268406, "learning_rate": 4.063718341255512e-07, "loss": 0.2242, "step": 32974 }, { "epoch": 0.5731891741556432, "grad_norm": 1.4841995577100093, "learning_rate": 4.063441828684391e-07, "loss": 0.2281, "step": 32975 }, { "epoch": 0.573206556693146, "grad_norm": 2.4659005580373354, "learning_rate": 4.0631653190817276e-07, "loss": 0.2595, "step": 32976 }, { "epoch": 0.5732239392306488, "grad_norm": 1.399983349017131, "learning_rate": 4.0628888124484005e-07, "loss": 0.2234, "step": 32977 }, { "epoch": 0.5732413217681517, "grad_norm": 1.218102562224978, "learning_rate": 4.062612308785286e-07, "loss": 0.134, "step": 32978 }, { "epoch": 0.5732587043056545, "grad_norm": 1.8042925594872603, "learning_rate": 4.062335808093258e-07, "loss": 0.2522, "step": 32979 }, { "epoch": 0.5732760868431573, "grad_norm": 1.3735740627801012, "learning_rate": 4.0620593103731963e-07, "loss": 0.1959, "step": 32980 }, { "epoch": 0.5732934693806602, "grad_norm": 0.9928905168943404, "learning_rate": 4.061782815625975e-07, "loss": 0.3233, "step": 32981 }, { "epoch": 0.573310851918163, "grad_norm": 2.4524518609446386, "learning_rate": 4.061506323852473e-07, "loss": 0.3217, "step": 32982 }, { "epoch": 0.5733282344556658, "grad_norm": 1.75070695942782, "learning_rate": 4.0612298350535633e-07, "loss": 0.3413, "step": 32983 }, { "epoch": 0.5733456169931687, "grad_norm": 1.2232030686787856, "learning_rate": 4.0609533492301236e-07, "loss": 0.1842, "step": 32984 }, { "epoch": 0.5733629995306715, "grad_norm": 1.7131483097557736, "learning_rate": 4.06067686638303e-07, "loss": 0.2855, "step": 32985 }, { "epoch": 0.5733803820681743, "grad_norm": 2.1898823597949173, "learning_rate": 4.0604003865131596e-07, "loss": 0.2767, "step": 32986 }, { "epoch": 0.5733977646056772, "grad_norm": 1.7417979863666453, "learning_rate": 4.060123909621389e-07, "loss": 0.2268, "step": 32987 }, { "epoch": 0.57341514714318, "grad_norm": 1.5501262460816072, "learning_rate": 4.0598474357085954e-07, "loss": 0.1838, "step": 32988 }, { "epoch": 0.5734325296806828, "grad_norm": 1.908708693312786, "learning_rate": 4.059570964775652e-07, "loss": 0.2715, "step": 32989 }, { "epoch": 0.5734499122181856, "grad_norm": 1.4545123006972656, "learning_rate": 4.0592944968234353e-07, "loss": 0.245, "step": 32990 }, { "epoch": 0.5734672947556885, "grad_norm": 1.5429422738030663, "learning_rate": 4.0590180318528245e-07, "loss": 0.291, "step": 32991 }, { "epoch": 0.5734846772931913, "grad_norm": 1.8883904828298792, "learning_rate": 4.058741569864693e-07, "loss": 0.1938, "step": 32992 }, { "epoch": 0.5735020598306941, "grad_norm": 1.9104806929568117, "learning_rate": 4.0584651108599194e-07, "loss": 0.2156, "step": 32993 }, { "epoch": 0.573519442368197, "grad_norm": 1.1371948255081732, "learning_rate": 4.05818865483938e-07, "loss": 0.1622, "step": 32994 }, { "epoch": 0.5735368249056997, "grad_norm": 1.9479149472765325, "learning_rate": 4.057912201803949e-07, "loss": 0.2751, "step": 32995 }, { "epoch": 0.5735542074432025, "grad_norm": 0.9727531800186141, "learning_rate": 4.057635751754502e-07, "loss": 0.2807, "step": 32996 }, { "epoch": 0.5735715899807053, "grad_norm": 2.1922678140367555, "learning_rate": 4.0573593046919186e-07, "loss": 0.2893, "step": 32997 }, { "epoch": 0.5735889725182082, "grad_norm": 1.8046471182317807, "learning_rate": 4.0570828606170717e-07, "loss": 0.1981, "step": 32998 }, { "epoch": 0.573606355055711, "grad_norm": 22.745421809236586, "learning_rate": 4.0568064195308397e-07, "loss": 0.3023, "step": 32999 }, { "epoch": 0.5736237375932138, "grad_norm": 1.0570464996923978, "learning_rate": 4.056529981434098e-07, "loss": 0.1617, "step": 33000 }, { "epoch": 0.5736411201307167, "grad_norm": 2.5868546163306556, "learning_rate": 4.056253546327724e-07, "loss": 0.1958, "step": 33001 }, { "epoch": 0.5736585026682195, "grad_norm": 1.5089163920333886, "learning_rate": 4.055977114212592e-07, "loss": 0.3356, "step": 33002 }, { "epoch": 0.5736758852057223, "grad_norm": 1.2473316639738967, "learning_rate": 4.0557006850895794e-07, "loss": 0.3138, "step": 33003 }, { "epoch": 0.5736932677432252, "grad_norm": 1.489200728994456, "learning_rate": 4.05542425895956e-07, "loss": 0.3532, "step": 33004 }, { "epoch": 0.573710650280728, "grad_norm": 1.8071765889118654, "learning_rate": 4.0551478358234137e-07, "loss": 0.3012, "step": 33005 }, { "epoch": 0.5737280328182308, "grad_norm": 1.3031731274322407, "learning_rate": 4.054871415682013e-07, "loss": 0.3374, "step": 33006 }, { "epoch": 0.5737454153557336, "grad_norm": 2.660683759506768, "learning_rate": 4.0545949985362383e-07, "loss": 0.2832, "step": 33007 }, { "epoch": 0.5737627978932365, "grad_norm": 4.103685346318319, "learning_rate": 4.0543185843869617e-07, "loss": 0.2905, "step": 33008 }, { "epoch": 0.5737801804307393, "grad_norm": 1.4232449282455717, "learning_rate": 4.0540421732350593e-07, "loss": 0.2053, "step": 33009 }, { "epoch": 0.5737975629682421, "grad_norm": 1.7155515216087398, "learning_rate": 4.0537657650814106e-07, "loss": 0.2114, "step": 33010 }, { "epoch": 0.573814945505745, "grad_norm": 1.4072139214371333, "learning_rate": 4.0534893599268896e-07, "loss": 0.1528, "step": 33011 }, { "epoch": 0.5738323280432478, "grad_norm": 1.7444933013946222, "learning_rate": 4.0532129577723713e-07, "loss": 0.2098, "step": 33012 }, { "epoch": 0.5738497105807506, "grad_norm": 2.2409060945749775, "learning_rate": 4.052936558618735e-07, "loss": 0.2291, "step": 33013 }, { "epoch": 0.5738670931182535, "grad_norm": 1.4422104550011823, "learning_rate": 4.0526601624668546e-07, "loss": 0.1581, "step": 33014 }, { "epoch": 0.5738844756557562, "grad_norm": 1.4130973893067416, "learning_rate": 4.052383769317604e-07, "loss": 0.1698, "step": 33015 }, { "epoch": 0.573901858193259, "grad_norm": 3.037991977097105, "learning_rate": 4.052107379171863e-07, "loss": 0.346, "step": 33016 }, { "epoch": 0.5739192407307618, "grad_norm": 1.2833860429773212, "learning_rate": 4.051830992030505e-07, "loss": 0.411, "step": 33017 }, { "epoch": 0.5739366232682647, "grad_norm": 3.5307095799477546, "learning_rate": 4.051554607894409e-07, "loss": 0.3453, "step": 33018 }, { "epoch": 0.5739540058057675, "grad_norm": 1.5792623478761962, "learning_rate": 4.05127822676445e-07, "loss": 0.1068, "step": 33019 }, { "epoch": 0.5739713883432703, "grad_norm": 1.248862907419277, "learning_rate": 4.051001848641502e-07, "loss": 0.2191, "step": 33020 }, { "epoch": 0.5739887708807732, "grad_norm": 1.1672328318410825, "learning_rate": 4.0507254735264405e-07, "loss": 0.2319, "step": 33021 }, { "epoch": 0.574006153418276, "grad_norm": 2.259313170706591, "learning_rate": 4.0504491014201455e-07, "loss": 0.3256, "step": 33022 }, { "epoch": 0.5740235359557788, "grad_norm": 1.9790924556993963, "learning_rate": 4.0501727323234883e-07, "loss": 0.2717, "step": 33023 }, { "epoch": 0.5740409184932816, "grad_norm": 1.6620210640577038, "learning_rate": 4.049896366237349e-07, "loss": 0.3265, "step": 33024 }, { "epoch": 0.5740583010307845, "grad_norm": 1.231028310851474, "learning_rate": 4.049620003162601e-07, "loss": 0.1704, "step": 33025 }, { "epoch": 0.5740756835682873, "grad_norm": 3.188879106475607, "learning_rate": 4.049343643100124e-07, "loss": 0.332, "step": 33026 }, { "epoch": 0.5740930661057901, "grad_norm": 1.4203141138113289, "learning_rate": 4.0490672860507876e-07, "loss": 0.2161, "step": 33027 }, { "epoch": 0.574110448643293, "grad_norm": 1.8089983274230355, "learning_rate": 4.048790932015472e-07, "loss": 0.265, "step": 33028 }, { "epoch": 0.5741278311807958, "grad_norm": 1.823360480803525, "learning_rate": 4.0485145809950517e-07, "loss": 0.235, "step": 33029 }, { "epoch": 0.5741452137182986, "grad_norm": 2.8415830825947856, "learning_rate": 4.0482382329904037e-07, "loss": 0.23, "step": 33030 }, { "epoch": 0.5741625962558015, "grad_norm": 1.8253674987468889, "learning_rate": 4.047961888002402e-07, "loss": 0.2491, "step": 33031 }, { "epoch": 0.5741799787933043, "grad_norm": 1.870282874015657, "learning_rate": 4.0476855460319264e-07, "loss": 0.1981, "step": 33032 }, { "epoch": 0.5741973613308071, "grad_norm": 2.0965154016424474, "learning_rate": 4.047409207079849e-07, "loss": 0.2705, "step": 33033 }, { "epoch": 0.5742147438683098, "grad_norm": 2.9045905738182567, "learning_rate": 4.0471328711470455e-07, "loss": 0.3447, "step": 33034 }, { "epoch": 0.5742321264058127, "grad_norm": 1.3506342930640203, "learning_rate": 4.0468565382343944e-07, "loss": 0.2302, "step": 33035 }, { "epoch": 0.5742495089433155, "grad_norm": 1.5741129043675657, "learning_rate": 4.0465802083427704e-07, "loss": 0.3047, "step": 33036 }, { "epoch": 0.5742668914808183, "grad_norm": 1.2047469010478504, "learning_rate": 4.0463038814730477e-07, "loss": 0.1955, "step": 33037 }, { "epoch": 0.5742842740183212, "grad_norm": 2.8866149054787975, "learning_rate": 4.046027557626106e-07, "loss": 0.344, "step": 33038 }, { "epoch": 0.574301656555824, "grad_norm": 1.6140211075289332, "learning_rate": 4.0457512368028173e-07, "loss": 0.2153, "step": 33039 }, { "epoch": 0.5743190390933268, "grad_norm": 1.606404947283028, "learning_rate": 4.0454749190040583e-07, "loss": 0.1982, "step": 33040 }, { "epoch": 0.5743364216308297, "grad_norm": 1.375753920546641, "learning_rate": 4.045198604230706e-07, "loss": 0.1191, "step": 33041 }, { "epoch": 0.5743538041683325, "grad_norm": 1.795442733782369, "learning_rate": 4.044922292483635e-07, "loss": 0.2258, "step": 33042 }, { "epoch": 0.5743711867058353, "grad_norm": 0.9111665224213518, "learning_rate": 4.044645983763722e-07, "loss": 0.1474, "step": 33043 }, { "epoch": 0.5743885692433381, "grad_norm": 3.2424041282695946, "learning_rate": 4.044369678071843e-07, "loss": 0.2157, "step": 33044 }, { "epoch": 0.574405951780841, "grad_norm": 1.9993986963792636, "learning_rate": 4.044093375408874e-07, "loss": 0.3435, "step": 33045 }, { "epoch": 0.5744233343183438, "grad_norm": 1.5654572919548013, "learning_rate": 4.043817075775687e-07, "loss": 0.2977, "step": 33046 }, { "epoch": 0.5744407168558466, "grad_norm": 1.1524542161731615, "learning_rate": 4.0435407791731626e-07, "loss": 0.2701, "step": 33047 }, { "epoch": 0.5744580993933495, "grad_norm": 1.6082672287984432, "learning_rate": 4.043264485602173e-07, "loss": 0.2183, "step": 33048 }, { "epoch": 0.5744754819308523, "grad_norm": 10.274003655075438, "learning_rate": 4.042988195063597e-07, "loss": 0.3903, "step": 33049 }, { "epoch": 0.5744928644683551, "grad_norm": 1.20688287183863, "learning_rate": 4.042711907558307e-07, "loss": 0.2536, "step": 33050 }, { "epoch": 0.574510247005858, "grad_norm": 1.6790669870550685, "learning_rate": 4.042435623087185e-07, "loss": 0.3695, "step": 33051 }, { "epoch": 0.5745276295433608, "grad_norm": 1.9829277514308001, "learning_rate": 4.0421593416510977e-07, "loss": 0.2424, "step": 33052 }, { "epoch": 0.5745450120808636, "grad_norm": 2.137096126126269, "learning_rate": 4.041883063250927e-07, "loss": 0.2432, "step": 33053 }, { "epoch": 0.5745623946183663, "grad_norm": 1.1928108968181141, "learning_rate": 4.0416067878875455e-07, "loss": 0.152, "step": 33054 }, { "epoch": 0.5745797771558692, "grad_norm": 2.339450556175829, "learning_rate": 4.041330515561832e-07, "loss": 0.1575, "step": 33055 }, { "epoch": 0.574597159693372, "grad_norm": 1.6810283856776844, "learning_rate": 4.041054246274658e-07, "loss": 0.3046, "step": 33056 }, { "epoch": 0.5746145422308748, "grad_norm": 1.5980939431519585, "learning_rate": 4.040777980026905e-07, "loss": 0.337, "step": 33057 }, { "epoch": 0.5746319247683777, "grad_norm": 2.2763728157997862, "learning_rate": 4.0405017168194433e-07, "loss": 0.377, "step": 33058 }, { "epoch": 0.5746493073058805, "grad_norm": 1.7832365227957008, "learning_rate": 4.0402254566531506e-07, "loss": 0.241, "step": 33059 }, { "epoch": 0.5746666898433833, "grad_norm": 1.7840145934090181, "learning_rate": 4.0399491995289016e-07, "loss": 0.1671, "step": 33060 }, { "epoch": 0.5746840723808861, "grad_norm": 1.4606090172091513, "learning_rate": 4.039672945447573e-07, "loss": 0.1887, "step": 33061 }, { "epoch": 0.574701454918389, "grad_norm": 1.935191364921834, "learning_rate": 4.03939669441004e-07, "loss": 0.1561, "step": 33062 }, { "epoch": 0.5747188374558918, "grad_norm": 1.2968102442653022, "learning_rate": 4.0391204464171785e-07, "loss": 0.3006, "step": 33063 }, { "epoch": 0.5747362199933946, "grad_norm": 1.2217898892152312, "learning_rate": 4.0388442014698656e-07, "loss": 0.1932, "step": 33064 }, { "epoch": 0.5747536025308975, "grad_norm": 1.6674736068644818, "learning_rate": 4.0385679595689713e-07, "loss": 0.2073, "step": 33065 }, { "epoch": 0.5747709850684003, "grad_norm": 1.8524417551543857, "learning_rate": 4.0382917207153775e-07, "loss": 0.2846, "step": 33066 }, { "epoch": 0.5747883676059031, "grad_norm": 1.8433157078714664, "learning_rate": 4.038015484909957e-07, "loss": 0.3281, "step": 33067 }, { "epoch": 0.574805750143406, "grad_norm": 1.603674152352949, "learning_rate": 4.0377392521535836e-07, "loss": 0.3696, "step": 33068 }, { "epoch": 0.5748231326809088, "grad_norm": 1.1893337794705319, "learning_rate": 4.0374630224471367e-07, "loss": 0.2633, "step": 33069 }, { "epoch": 0.5748405152184116, "grad_norm": 3.071619700707916, "learning_rate": 4.037186795791491e-07, "loss": 0.3044, "step": 33070 }, { "epoch": 0.5748578977559144, "grad_norm": 1.077548348652124, "learning_rate": 4.0369105721875177e-07, "loss": 0.1846, "step": 33071 }, { "epoch": 0.5748752802934173, "grad_norm": 3.107542685532529, "learning_rate": 4.0366343516360973e-07, "loss": 0.2656, "step": 33072 }, { "epoch": 0.5748926628309201, "grad_norm": 0.8141969830485538, "learning_rate": 4.0363581341381026e-07, "loss": 0.2148, "step": 33073 }, { "epoch": 0.5749100453684228, "grad_norm": 2.02269970225388, "learning_rate": 4.0360819196944107e-07, "loss": 0.3113, "step": 33074 }, { "epoch": 0.5749274279059257, "grad_norm": 1.922555314052557, "learning_rate": 4.035805708305895e-07, "loss": 0.3112, "step": 33075 }, { "epoch": 0.5749448104434285, "grad_norm": 2.5223420521996776, "learning_rate": 4.035529499973436e-07, "loss": 0.414, "step": 33076 }, { "epoch": 0.5749621929809313, "grad_norm": 2.595452703756885, "learning_rate": 4.035253294697902e-07, "loss": 0.2811, "step": 33077 }, { "epoch": 0.5749795755184341, "grad_norm": 1.5427170586624748, "learning_rate": 4.034977092480173e-07, "loss": 0.2286, "step": 33078 }, { "epoch": 0.574996958055937, "grad_norm": 2.125998740469614, "learning_rate": 4.0347008933211225e-07, "loss": 0.3168, "step": 33079 }, { "epoch": 0.5750143405934398, "grad_norm": 1.423654100931379, "learning_rate": 4.0344246972216274e-07, "loss": 0.3084, "step": 33080 }, { "epoch": 0.5750317231309426, "grad_norm": 2.021423110261689, "learning_rate": 4.0341485041825614e-07, "loss": 0.3266, "step": 33081 }, { "epoch": 0.5750491056684455, "grad_norm": 1.247782576388049, "learning_rate": 4.033872314204802e-07, "loss": 0.28, "step": 33082 }, { "epoch": 0.5750664882059483, "grad_norm": 2.0854702773121536, "learning_rate": 4.033596127289225e-07, "loss": 0.3014, "step": 33083 }, { "epoch": 0.5750838707434511, "grad_norm": 2.126580959669613, "learning_rate": 4.0333199434367026e-07, "loss": 0.1604, "step": 33084 }, { "epoch": 0.575101253280954, "grad_norm": 1.770991979720454, "learning_rate": 4.0330437626481115e-07, "loss": 0.2704, "step": 33085 }, { "epoch": 0.5751186358184568, "grad_norm": 1.2917359954697338, "learning_rate": 4.0327675849243286e-07, "loss": 0.1784, "step": 33086 }, { "epoch": 0.5751360183559596, "grad_norm": 0.9808440583559628, "learning_rate": 4.0324914102662264e-07, "loss": 0.2478, "step": 33087 }, { "epoch": 0.5751534008934625, "grad_norm": 1.9125319056275902, "learning_rate": 4.0322152386746836e-07, "loss": 0.3019, "step": 33088 }, { "epoch": 0.5751707834309653, "grad_norm": 2.1168757669562632, "learning_rate": 4.0319390701505753e-07, "loss": 0.238, "step": 33089 }, { "epoch": 0.5751881659684681, "grad_norm": 2.2215271379698844, "learning_rate": 4.031662904694773e-07, "loss": 0.2091, "step": 33090 }, { "epoch": 0.5752055485059709, "grad_norm": 1.0681193411962586, "learning_rate": 4.0313867423081553e-07, "loss": 0.1946, "step": 33091 }, { "epoch": 0.5752229310434738, "grad_norm": 1.6119725244182266, "learning_rate": 4.031110582991597e-07, "loss": 0.2972, "step": 33092 }, { "epoch": 0.5752403135809766, "grad_norm": 0.9758172077144041, "learning_rate": 4.030834426745972e-07, "loss": 0.1481, "step": 33093 }, { "epoch": 0.5752576961184793, "grad_norm": 1.39926369954532, "learning_rate": 4.030558273572158e-07, "loss": 0.3638, "step": 33094 }, { "epoch": 0.5752750786559822, "grad_norm": 1.6237659944032616, "learning_rate": 4.0302821234710293e-07, "loss": 0.2758, "step": 33095 }, { "epoch": 0.575292461193485, "grad_norm": 1.2366486505340957, "learning_rate": 4.030005976443459e-07, "loss": 0.2839, "step": 33096 }, { "epoch": 0.5753098437309878, "grad_norm": 1.1775983540295387, "learning_rate": 4.029729832490325e-07, "loss": 0.2789, "step": 33097 }, { "epoch": 0.5753272262684906, "grad_norm": 1.7153497938475275, "learning_rate": 4.029453691612501e-07, "loss": 0.2968, "step": 33098 }, { "epoch": 0.5753446088059935, "grad_norm": 1.4912180682255418, "learning_rate": 4.029177553810864e-07, "loss": 0.3373, "step": 33099 }, { "epoch": 0.5753619913434963, "grad_norm": 1.11104041869017, "learning_rate": 4.028901419086288e-07, "loss": 0.1264, "step": 33100 }, { "epoch": 0.5753793738809991, "grad_norm": 1.678643270693185, "learning_rate": 4.0286252874396477e-07, "loss": 0.2623, "step": 33101 }, { "epoch": 0.575396756418502, "grad_norm": 1.5778946520553951, "learning_rate": 4.028349158871821e-07, "loss": 0.3005, "step": 33102 }, { "epoch": 0.5754141389560048, "grad_norm": 1.6068366819105675, "learning_rate": 4.02807303338368e-07, "loss": 0.2401, "step": 33103 }, { "epoch": 0.5754315214935076, "grad_norm": 1.072881816681417, "learning_rate": 4.0277969109761e-07, "loss": 0.2745, "step": 33104 }, { "epoch": 0.5754489040310105, "grad_norm": 2.425043367595656, "learning_rate": 4.027520791649958e-07, "loss": 0.3007, "step": 33105 }, { "epoch": 0.5754662865685133, "grad_norm": 2.234243622648732, "learning_rate": 4.0272446754061274e-07, "loss": 0.2822, "step": 33106 }, { "epoch": 0.5754836691060161, "grad_norm": 0.9205013779306306, "learning_rate": 4.026968562245486e-07, "loss": 0.1633, "step": 33107 }, { "epoch": 0.575501051643519, "grad_norm": 1.0379005019226877, "learning_rate": 4.026692452168908e-07, "loss": 0.1947, "step": 33108 }, { "epoch": 0.5755184341810218, "grad_norm": 1.1771175073322602, "learning_rate": 4.0264163451772666e-07, "loss": 0.1475, "step": 33109 }, { "epoch": 0.5755358167185246, "grad_norm": 1.7082568133518654, "learning_rate": 4.026140241271437e-07, "loss": 0.4059, "step": 33110 }, { "epoch": 0.5755531992560274, "grad_norm": 1.3642646673205443, "learning_rate": 4.025864140452296e-07, "loss": 0.2361, "step": 33111 }, { "epoch": 0.5755705817935303, "grad_norm": 1.4664265262835596, "learning_rate": 4.0255880427207183e-07, "loss": 0.3554, "step": 33112 }, { "epoch": 0.5755879643310331, "grad_norm": 1.4006607967293416, "learning_rate": 4.025311948077579e-07, "loss": 0.1603, "step": 33113 }, { "epoch": 0.5756053468685358, "grad_norm": 2.885177885985173, "learning_rate": 4.025035856523755e-07, "loss": 0.1908, "step": 33114 }, { "epoch": 0.5756227294060386, "grad_norm": 1.993251730954253, "learning_rate": 4.0247597680601183e-07, "loss": 0.3039, "step": 33115 }, { "epoch": 0.5756401119435415, "grad_norm": 1.3327568404522168, "learning_rate": 4.024483682687544e-07, "loss": 0.2222, "step": 33116 }, { "epoch": 0.5756574944810443, "grad_norm": 1.8090526965357279, "learning_rate": 4.0242076004069086e-07, "loss": 0.3546, "step": 33117 }, { "epoch": 0.5756748770185471, "grad_norm": 1.887559635333772, "learning_rate": 4.023931521219087e-07, "loss": 0.1839, "step": 33118 }, { "epoch": 0.57569225955605, "grad_norm": 1.012406347444826, "learning_rate": 4.0236554451249544e-07, "loss": 0.2412, "step": 33119 }, { "epoch": 0.5757096420935528, "grad_norm": 2.809610339076204, "learning_rate": 4.0233793721253843e-07, "loss": 0.3807, "step": 33120 }, { "epoch": 0.5757270246310556, "grad_norm": 1.8044611698471777, "learning_rate": 4.023103302221256e-07, "loss": 0.1929, "step": 33121 }, { "epoch": 0.5757444071685585, "grad_norm": 2.110046912834816, "learning_rate": 4.0228272354134395e-07, "loss": 0.3144, "step": 33122 }, { "epoch": 0.5757617897060613, "grad_norm": 0.9628861115966044, "learning_rate": 4.022551171702811e-07, "loss": 0.2508, "step": 33123 }, { "epoch": 0.5757791722435641, "grad_norm": 1.6912680965607712, "learning_rate": 4.0222751110902473e-07, "loss": 0.2745, "step": 33124 }, { "epoch": 0.575796554781067, "grad_norm": 2.1751689518436823, "learning_rate": 4.021999053576622e-07, "loss": 0.2149, "step": 33125 }, { "epoch": 0.5758139373185698, "grad_norm": 2.6609141816183066, "learning_rate": 4.0217229991628105e-07, "loss": 0.2353, "step": 33126 }, { "epoch": 0.5758313198560726, "grad_norm": 1.1660747298007217, "learning_rate": 4.0214469478496894e-07, "loss": 0.276, "step": 33127 }, { "epoch": 0.5758487023935754, "grad_norm": 2.3701238381062955, "learning_rate": 4.0211708996381304e-07, "loss": 0.4881, "step": 33128 }, { "epoch": 0.5758660849310783, "grad_norm": 1.745410861471692, "learning_rate": 4.0208948545290095e-07, "loss": 0.2204, "step": 33129 }, { "epoch": 0.5758834674685811, "grad_norm": 1.1661078430412384, "learning_rate": 4.0206188125232024e-07, "loss": 0.2491, "step": 33130 }, { "epoch": 0.5759008500060839, "grad_norm": 1.2310037354255983, "learning_rate": 4.020342773621583e-07, "loss": 0.2425, "step": 33131 }, { "epoch": 0.5759182325435868, "grad_norm": 3.364861610634726, "learning_rate": 4.020066737825029e-07, "loss": 0.1669, "step": 33132 }, { "epoch": 0.5759356150810896, "grad_norm": 1.330508398964873, "learning_rate": 4.019790705134414e-07, "loss": 0.2316, "step": 33133 }, { "epoch": 0.5759529976185923, "grad_norm": 1.6619343942452514, "learning_rate": 4.0195146755506106e-07, "loss": 0.2414, "step": 33134 }, { "epoch": 0.5759703801560951, "grad_norm": 1.5795824906147715, "learning_rate": 4.0192386490744945e-07, "loss": 0.1598, "step": 33135 }, { "epoch": 0.575987762693598, "grad_norm": 1.990141318557255, "learning_rate": 4.018962625706942e-07, "loss": 0.3838, "step": 33136 }, { "epoch": 0.5760051452311008, "grad_norm": 1.419420886323986, "learning_rate": 4.018686605448827e-07, "loss": 0.2797, "step": 33137 }, { "epoch": 0.5760225277686036, "grad_norm": 2.985141582912354, "learning_rate": 4.018410588301025e-07, "loss": 0.4746, "step": 33138 }, { "epoch": 0.5760399103061065, "grad_norm": 0.8859093112824478, "learning_rate": 4.0181345742644124e-07, "loss": 0.1732, "step": 33139 }, { "epoch": 0.5760572928436093, "grad_norm": 1.1587384941390562, "learning_rate": 4.0178585633398605e-07, "loss": 0.1987, "step": 33140 }, { "epoch": 0.5760746753811121, "grad_norm": 1.3602452883153795, "learning_rate": 4.017582555528245e-07, "loss": 0.2795, "step": 33141 }, { "epoch": 0.576092057918615, "grad_norm": 1.3612878978539487, "learning_rate": 4.017306550830442e-07, "loss": 0.225, "step": 33142 }, { "epoch": 0.5761094404561178, "grad_norm": 2.4320810892417035, "learning_rate": 4.0170305492473257e-07, "loss": 0.3598, "step": 33143 }, { "epoch": 0.5761268229936206, "grad_norm": 1.2520073183161635, "learning_rate": 4.016754550779771e-07, "loss": 0.2002, "step": 33144 }, { "epoch": 0.5761442055311234, "grad_norm": 1.7462085557379232, "learning_rate": 4.0164785554286527e-07, "loss": 0.2388, "step": 33145 }, { "epoch": 0.5761615880686263, "grad_norm": 1.365749610483061, "learning_rate": 4.0162025631948473e-07, "loss": 0.2464, "step": 33146 }, { "epoch": 0.5761789706061291, "grad_norm": 1.5878424851599262, "learning_rate": 4.015926574079227e-07, "loss": 0.2745, "step": 33147 }, { "epoch": 0.5761963531436319, "grad_norm": 1.6066771825748785, "learning_rate": 4.0156505880826667e-07, "loss": 0.1992, "step": 33148 }, { "epoch": 0.5762137356811348, "grad_norm": 7.709017654435827, "learning_rate": 4.015374605206041e-07, "loss": 0.3418, "step": 33149 }, { "epoch": 0.5762311182186376, "grad_norm": 2.612583541010091, "learning_rate": 4.0150986254502274e-07, "loss": 0.4345, "step": 33150 }, { "epoch": 0.5762485007561404, "grad_norm": 3.0461465629403377, "learning_rate": 4.014822648816097e-07, "loss": 0.2594, "step": 33151 }, { "epoch": 0.5762658832936433, "grad_norm": 1.414681887546316, "learning_rate": 4.014546675304529e-07, "loss": 0.2252, "step": 33152 }, { "epoch": 0.5762832658311461, "grad_norm": 1.4922021250982693, "learning_rate": 4.0142707049163934e-07, "loss": 0.3241, "step": 33153 }, { "epoch": 0.5763006483686488, "grad_norm": 3.192353494975798, "learning_rate": 4.013994737652566e-07, "loss": 0.2278, "step": 33154 }, { "epoch": 0.5763180309061516, "grad_norm": 1.5408836354946898, "learning_rate": 4.0137187735139234e-07, "loss": 0.1705, "step": 33155 }, { "epoch": 0.5763354134436545, "grad_norm": 1.2472552497108327, "learning_rate": 4.01344281250134e-07, "loss": 0.2835, "step": 33156 }, { "epoch": 0.5763527959811573, "grad_norm": 1.1092792580388333, "learning_rate": 4.013166854615688e-07, "loss": 0.1637, "step": 33157 }, { "epoch": 0.5763701785186601, "grad_norm": 2.020743018696234, "learning_rate": 4.0128908998578456e-07, "loss": 0.3447, "step": 33158 }, { "epoch": 0.576387561056163, "grad_norm": 1.930601767874761, "learning_rate": 4.012614948228685e-07, "loss": 0.3167, "step": 33159 }, { "epoch": 0.5764049435936658, "grad_norm": 1.4805041548267346, "learning_rate": 4.0123389997290803e-07, "loss": 0.2325, "step": 33160 }, { "epoch": 0.5764223261311686, "grad_norm": 1.7292492670937876, "learning_rate": 4.012063054359908e-07, "loss": 0.2594, "step": 33161 }, { "epoch": 0.5764397086686714, "grad_norm": 1.4210509141981116, "learning_rate": 4.011787112122041e-07, "loss": 0.2142, "step": 33162 }, { "epoch": 0.5764570912061743, "grad_norm": 1.1805383590722436, "learning_rate": 4.011511173016356e-07, "loss": 0.2205, "step": 33163 }, { "epoch": 0.5764744737436771, "grad_norm": 1.8299697428287358, "learning_rate": 4.0112352370437275e-07, "loss": 0.2372, "step": 33164 }, { "epoch": 0.5764918562811799, "grad_norm": 2.1464262321922947, "learning_rate": 4.0109593042050296e-07, "loss": 0.2208, "step": 33165 }, { "epoch": 0.5765092388186828, "grad_norm": 2.0346431919229144, "learning_rate": 4.010683374501134e-07, "loss": 0.369, "step": 33166 }, { "epoch": 0.5765266213561856, "grad_norm": 2.114171256533476, "learning_rate": 4.010407447932918e-07, "loss": 0.7488, "step": 33167 }, { "epoch": 0.5765440038936884, "grad_norm": 1.840134508631716, "learning_rate": 4.0101315245012555e-07, "loss": 0.2692, "step": 33168 }, { "epoch": 0.5765613864311913, "grad_norm": 1.3865565475441115, "learning_rate": 4.0098556042070224e-07, "loss": 0.2881, "step": 33169 }, { "epoch": 0.5765787689686941, "grad_norm": 2.2122139033522994, "learning_rate": 4.0095796870510914e-07, "loss": 0.1992, "step": 33170 }, { "epoch": 0.5765961515061969, "grad_norm": 1.4484408286012393, "learning_rate": 4.0093037730343406e-07, "loss": 0.2621, "step": 33171 }, { "epoch": 0.5766135340436997, "grad_norm": 1.6340148878298155, "learning_rate": 4.009027862157639e-07, "loss": 0.1804, "step": 33172 }, { "epoch": 0.5766309165812026, "grad_norm": 2.0274544362173774, "learning_rate": 4.0087519544218647e-07, "loss": 0.2524, "step": 33173 }, { "epoch": 0.5766482991187053, "grad_norm": 1.588651859907106, "learning_rate": 4.00847604982789e-07, "loss": 0.3273, "step": 33174 }, { "epoch": 0.5766656816562081, "grad_norm": 1.5789905628556273, "learning_rate": 4.008200148376592e-07, "loss": 0.1504, "step": 33175 }, { "epoch": 0.576683064193711, "grad_norm": 1.3907179589579608, "learning_rate": 4.0079242500688425e-07, "loss": 0.3351, "step": 33176 }, { "epoch": 0.5767004467312138, "grad_norm": 1.9183488579095127, "learning_rate": 4.0076483549055205e-07, "loss": 0.1767, "step": 33177 }, { "epoch": 0.5767178292687166, "grad_norm": 0.8630363383265796, "learning_rate": 4.0073724628874955e-07, "loss": 0.1599, "step": 33178 }, { "epoch": 0.5767352118062194, "grad_norm": 1.0734056187607117, "learning_rate": 4.007096574015643e-07, "loss": 0.2836, "step": 33179 }, { "epoch": 0.5767525943437223, "grad_norm": 0.9807341931322644, "learning_rate": 4.0068206882908394e-07, "loss": 0.2402, "step": 33180 }, { "epoch": 0.5767699768812251, "grad_norm": 1.1924295100709943, "learning_rate": 4.006544805713958e-07, "loss": 0.245, "step": 33181 }, { "epoch": 0.5767873594187279, "grad_norm": 2.828190592017878, "learning_rate": 4.006268926285872e-07, "loss": 0.3074, "step": 33182 }, { "epoch": 0.5768047419562308, "grad_norm": 1.4643191202107322, "learning_rate": 4.005993050007458e-07, "loss": 0.2603, "step": 33183 }, { "epoch": 0.5768221244937336, "grad_norm": 1.2387056979827877, "learning_rate": 4.005717176879591e-07, "loss": 0.2582, "step": 33184 }, { "epoch": 0.5768395070312364, "grad_norm": 1.5966864131572687, "learning_rate": 4.0054413069031413e-07, "loss": 0.1464, "step": 33185 }, { "epoch": 0.5768568895687393, "grad_norm": 1.3845131931022194, "learning_rate": 4.005165440078987e-07, "loss": 0.184, "step": 33186 }, { "epoch": 0.5768742721062421, "grad_norm": 1.1653470398958803, "learning_rate": 4.004889576408e-07, "loss": 0.1967, "step": 33187 }, { "epoch": 0.5768916546437449, "grad_norm": 1.7564766437348904, "learning_rate": 4.004613715891057e-07, "loss": 0.4162, "step": 33188 }, { "epoch": 0.5769090371812478, "grad_norm": 6.06264460537349, "learning_rate": 4.004337858529031e-07, "loss": 0.2388, "step": 33189 }, { "epoch": 0.5769264197187506, "grad_norm": 0.825650306350972, "learning_rate": 4.004062004322798e-07, "loss": 0.1603, "step": 33190 }, { "epoch": 0.5769438022562534, "grad_norm": 1.3813284996560156, "learning_rate": 4.003786153273229e-07, "loss": 0.2195, "step": 33191 }, { "epoch": 0.5769611847937562, "grad_norm": 2.4095515697031016, "learning_rate": 4.003510305381201e-07, "loss": 0.1872, "step": 33192 }, { "epoch": 0.576978567331259, "grad_norm": 1.6716193914313526, "learning_rate": 4.003234460647587e-07, "loss": 0.2123, "step": 33193 }, { "epoch": 0.5769959498687618, "grad_norm": 1.3392721745651621, "learning_rate": 4.0029586190732626e-07, "loss": 0.3207, "step": 33194 }, { "epoch": 0.5770133324062646, "grad_norm": 1.3863207860103592, "learning_rate": 4.0026827806591004e-07, "loss": 0.2167, "step": 33195 }, { "epoch": 0.5770307149437675, "grad_norm": 1.0738445894282684, "learning_rate": 4.002406945405979e-07, "loss": 0.5377, "step": 33196 }, { "epoch": 0.5770480974812703, "grad_norm": 1.5348219535900856, "learning_rate": 4.0021311133147657e-07, "loss": 0.2629, "step": 33197 }, { "epoch": 0.5770654800187731, "grad_norm": 2.185109229474005, "learning_rate": 4.0018552843863396e-07, "loss": 0.4932, "step": 33198 }, { "epoch": 0.5770828625562759, "grad_norm": 3.76739477319159, "learning_rate": 4.0015794586215726e-07, "loss": 0.3419, "step": 33199 }, { "epoch": 0.5771002450937788, "grad_norm": 2.0828288167917286, "learning_rate": 4.0013036360213414e-07, "loss": 0.2557, "step": 33200 }, { "epoch": 0.5771176276312816, "grad_norm": 1.3775230860156606, "learning_rate": 4.001027816586518e-07, "loss": 0.2408, "step": 33201 }, { "epoch": 0.5771350101687844, "grad_norm": 1.5301604546700924, "learning_rate": 4.0007520003179785e-07, "loss": 0.3027, "step": 33202 }, { "epoch": 0.5771523927062873, "grad_norm": 2.8044390402207737, "learning_rate": 4.000476187216597e-07, "loss": 0.2641, "step": 33203 }, { "epoch": 0.5771697752437901, "grad_norm": 0.8239160881835562, "learning_rate": 4.000200377283246e-07, "loss": 0.2472, "step": 33204 }, { "epoch": 0.5771871577812929, "grad_norm": 1.9740278794225623, "learning_rate": 3.999924570518799e-07, "loss": 0.2398, "step": 33205 }, { "epoch": 0.5772045403187958, "grad_norm": 1.7605491315956843, "learning_rate": 3.999648766924133e-07, "loss": 0.2118, "step": 33206 }, { "epoch": 0.5772219228562986, "grad_norm": 1.4056255496300067, "learning_rate": 3.9993729665001203e-07, "loss": 0.2751, "step": 33207 }, { "epoch": 0.5772393053938014, "grad_norm": 1.4777287669823305, "learning_rate": 3.999097169247636e-07, "loss": 0.1451, "step": 33208 }, { "epoch": 0.5772566879313042, "grad_norm": 0.8880902538406572, "learning_rate": 3.9988213751675556e-07, "loss": 0.4761, "step": 33209 }, { "epoch": 0.5772740704688071, "grad_norm": 0.8123388004684373, "learning_rate": 3.998545584260749e-07, "loss": 0.204, "step": 33210 }, { "epoch": 0.5772914530063099, "grad_norm": 2.4383602657044787, "learning_rate": 3.998269796528094e-07, "loss": 0.2958, "step": 33211 }, { "epoch": 0.5773088355438127, "grad_norm": 1.9264228851645002, "learning_rate": 3.9979940119704635e-07, "loss": 0.3304, "step": 33212 }, { "epoch": 0.5773262180813155, "grad_norm": 1.4708512482515714, "learning_rate": 3.9977182305887306e-07, "loss": 0.2968, "step": 33213 }, { "epoch": 0.5773436006188183, "grad_norm": 1.32919984757428, "learning_rate": 3.997442452383772e-07, "loss": 0.2161, "step": 33214 }, { "epoch": 0.5773609831563211, "grad_norm": 1.916448549841761, "learning_rate": 3.997166677356461e-07, "loss": 0.2447, "step": 33215 }, { "epoch": 0.577378365693824, "grad_norm": 2.824000366144394, "learning_rate": 3.996890905507669e-07, "loss": 0.2653, "step": 33216 }, { "epoch": 0.5773957482313268, "grad_norm": 2.4355739086734722, "learning_rate": 3.996615136838273e-07, "loss": 0.368, "step": 33217 }, { "epoch": 0.5774131307688296, "grad_norm": 2.121138698171785, "learning_rate": 3.996339371349145e-07, "loss": 0.3168, "step": 33218 }, { "epoch": 0.5774305133063324, "grad_norm": 1.4730117932823739, "learning_rate": 3.9960636090411613e-07, "loss": 0.3576, "step": 33219 }, { "epoch": 0.5774478958438353, "grad_norm": 2.3464417199180287, "learning_rate": 3.9957878499151937e-07, "loss": 0.2459, "step": 33220 }, { "epoch": 0.5774652783813381, "grad_norm": 1.9421980227367133, "learning_rate": 3.995512093972118e-07, "loss": 0.2323, "step": 33221 }, { "epoch": 0.5774826609188409, "grad_norm": 1.7347339084030888, "learning_rate": 3.9952363412128095e-07, "loss": 0.3434, "step": 33222 }, { "epoch": 0.5775000434563438, "grad_norm": 1.5921745130940723, "learning_rate": 3.9949605916381384e-07, "loss": 0.2343, "step": 33223 }, { "epoch": 0.5775174259938466, "grad_norm": 1.142328655224775, "learning_rate": 3.99468484524898e-07, "loss": 0.1716, "step": 33224 }, { "epoch": 0.5775348085313494, "grad_norm": 1.3752057605367025, "learning_rate": 3.9944091020462087e-07, "loss": 0.2811, "step": 33225 }, { "epoch": 0.5775521910688522, "grad_norm": 1.4616799410303678, "learning_rate": 3.994133362030699e-07, "loss": 0.2607, "step": 33226 }, { "epoch": 0.5775695736063551, "grad_norm": 1.7200299307362716, "learning_rate": 3.993857625203325e-07, "loss": 0.3891, "step": 33227 }, { "epoch": 0.5775869561438579, "grad_norm": 1.3884498347414502, "learning_rate": 3.993581891564961e-07, "loss": 0.366, "step": 33228 }, { "epoch": 0.5776043386813607, "grad_norm": 1.4578498212651874, "learning_rate": 3.9933061611164797e-07, "loss": 0.3011, "step": 33229 }, { "epoch": 0.5776217212188636, "grad_norm": 1.8686521172146238, "learning_rate": 3.993030433858754e-07, "loss": 0.2147, "step": 33230 }, { "epoch": 0.5776391037563664, "grad_norm": 1.4954432810310303, "learning_rate": 3.99275470979266e-07, "loss": 0.3901, "step": 33231 }, { "epoch": 0.5776564862938692, "grad_norm": 1.317240403942436, "learning_rate": 3.9924789889190704e-07, "loss": 0.1558, "step": 33232 }, { "epoch": 0.577673868831372, "grad_norm": 1.2307814055735704, "learning_rate": 3.99220327123886e-07, "loss": 0.2226, "step": 33233 }, { "epoch": 0.5776912513688748, "grad_norm": 2.4556235312158705, "learning_rate": 3.9919275567529035e-07, "loss": 0.1773, "step": 33234 }, { "epoch": 0.5777086339063776, "grad_norm": 1.838046456853532, "learning_rate": 3.991651845462072e-07, "loss": 0.3681, "step": 33235 }, { "epoch": 0.5777260164438804, "grad_norm": 1.694082935846934, "learning_rate": 3.9913761373672415e-07, "loss": 0.3851, "step": 33236 }, { "epoch": 0.5777433989813833, "grad_norm": 2.541377203325994, "learning_rate": 3.9911004324692856e-07, "loss": 0.3686, "step": 33237 }, { "epoch": 0.5777607815188861, "grad_norm": 1.0013853671639013, "learning_rate": 3.990824730769077e-07, "loss": 0.3459, "step": 33238 }, { "epoch": 0.5777781640563889, "grad_norm": 1.2736829060030692, "learning_rate": 3.9905490322674904e-07, "loss": 0.2863, "step": 33239 }, { "epoch": 0.5777955465938918, "grad_norm": 1.2461579658114292, "learning_rate": 3.9902733369654e-07, "loss": 0.1216, "step": 33240 }, { "epoch": 0.5778129291313946, "grad_norm": 1.2757289106301366, "learning_rate": 3.98999764486368e-07, "loss": 0.3117, "step": 33241 }, { "epoch": 0.5778303116688974, "grad_norm": 1.787782476399597, "learning_rate": 3.9897219559632035e-07, "loss": 0.34, "step": 33242 }, { "epoch": 0.5778476942064003, "grad_norm": 2.014477206313904, "learning_rate": 3.989446270264843e-07, "loss": 0.2717, "step": 33243 }, { "epoch": 0.5778650767439031, "grad_norm": 1.4190302017091858, "learning_rate": 3.989170587769475e-07, "loss": 0.2291, "step": 33244 }, { "epoch": 0.5778824592814059, "grad_norm": 1.31520944968125, "learning_rate": 3.988894908477971e-07, "loss": 0.1967, "step": 33245 }, { "epoch": 0.5778998418189087, "grad_norm": 1.7385266198614386, "learning_rate": 3.988619232391205e-07, "loss": 0.4145, "step": 33246 }, { "epoch": 0.5779172243564116, "grad_norm": 1.7746403050229382, "learning_rate": 3.9883435595100545e-07, "loss": 0.2793, "step": 33247 }, { "epoch": 0.5779346068939144, "grad_norm": 1.4089139477181785, "learning_rate": 3.988067889835388e-07, "loss": 0.2298, "step": 33248 }, { "epoch": 0.5779519894314172, "grad_norm": 1.5576854391384285, "learning_rate": 3.9877922233680805e-07, "loss": 0.3623, "step": 33249 }, { "epoch": 0.5779693719689201, "grad_norm": 1.29935762189492, "learning_rate": 3.987516560109008e-07, "loss": 0.2995, "step": 33250 }, { "epoch": 0.5779867545064229, "grad_norm": 2.203662403585329, "learning_rate": 3.987240900059042e-07, "loss": 0.291, "step": 33251 }, { "epoch": 0.5780041370439257, "grad_norm": 1.974684292488483, "learning_rate": 3.986965243219058e-07, "loss": 0.3205, "step": 33252 }, { "epoch": 0.5780215195814284, "grad_norm": 1.314004610946055, "learning_rate": 3.98668958958993e-07, "loss": 0.2499, "step": 33253 }, { "epoch": 0.5780389021189313, "grad_norm": 2.373992554470064, "learning_rate": 3.9864139391725293e-07, "loss": 0.2421, "step": 33254 }, { "epoch": 0.5780562846564341, "grad_norm": 2.5271601142812883, "learning_rate": 3.98613829196773e-07, "loss": 0.2422, "step": 33255 }, { "epoch": 0.5780736671939369, "grad_norm": 1.2545017129008542, "learning_rate": 3.9858626479764073e-07, "loss": 0.1993, "step": 33256 }, { "epoch": 0.5780910497314398, "grad_norm": 1.2789652623727896, "learning_rate": 3.9855870071994337e-07, "loss": 0.1758, "step": 33257 }, { "epoch": 0.5781084322689426, "grad_norm": 1.2955329913387799, "learning_rate": 3.985311369637684e-07, "loss": 0.2588, "step": 33258 }, { "epoch": 0.5781258148064454, "grad_norm": 1.8799813315092482, "learning_rate": 3.985035735292032e-07, "loss": 0.207, "step": 33259 }, { "epoch": 0.5781431973439483, "grad_norm": 1.5673945502568725, "learning_rate": 3.9847601041633494e-07, "loss": 0.2555, "step": 33260 }, { "epoch": 0.5781605798814511, "grad_norm": 1.20906790989869, "learning_rate": 3.98448447625251e-07, "loss": 0.224, "step": 33261 }, { "epoch": 0.5781779624189539, "grad_norm": 1.1669556527973077, "learning_rate": 3.9842088515603895e-07, "loss": 0.3439, "step": 33262 }, { "epoch": 0.5781953449564567, "grad_norm": 1.5101219462503948, "learning_rate": 3.983933230087859e-07, "loss": 0.254, "step": 33263 }, { "epoch": 0.5782127274939596, "grad_norm": 1.4510340302890596, "learning_rate": 3.983657611835795e-07, "loss": 0.2033, "step": 33264 }, { "epoch": 0.5782301100314624, "grad_norm": 0.9777598563446821, "learning_rate": 3.9833819968050673e-07, "loss": 0.206, "step": 33265 }, { "epoch": 0.5782474925689652, "grad_norm": 1.484450975996976, "learning_rate": 3.9831063849965553e-07, "loss": 0.2908, "step": 33266 }, { "epoch": 0.5782648751064681, "grad_norm": 1.2708498281084935, "learning_rate": 3.982830776411127e-07, "loss": 0.2359, "step": 33267 }, { "epoch": 0.5782822576439709, "grad_norm": 1.4380099575327563, "learning_rate": 3.982555171049657e-07, "loss": 0.2698, "step": 33268 }, { "epoch": 0.5782996401814737, "grad_norm": 1.62486233208749, "learning_rate": 3.9822795689130205e-07, "loss": 0.3445, "step": 33269 }, { "epoch": 0.5783170227189766, "grad_norm": 1.1914146308452618, "learning_rate": 3.982003970002091e-07, "loss": 0.2116, "step": 33270 }, { "epoch": 0.5783344052564794, "grad_norm": 1.4850996866016855, "learning_rate": 3.981728374317739e-07, "loss": 0.2015, "step": 33271 }, { "epoch": 0.5783517877939822, "grad_norm": 1.0892690747476532, "learning_rate": 3.981452781860843e-07, "loss": 0.3119, "step": 33272 }, { "epoch": 0.5783691703314849, "grad_norm": 1.8805572748170847, "learning_rate": 3.9811771926322735e-07, "loss": 0.3763, "step": 33273 }, { "epoch": 0.5783865528689878, "grad_norm": 2.049738424631269, "learning_rate": 3.9809016066329026e-07, "loss": 0.429, "step": 33274 }, { "epoch": 0.5784039354064906, "grad_norm": 1.570583465950529, "learning_rate": 3.980626023863607e-07, "loss": 0.2871, "step": 33275 }, { "epoch": 0.5784213179439934, "grad_norm": 2.848909089401678, "learning_rate": 3.980350444325257e-07, "loss": 0.2403, "step": 33276 }, { "epoch": 0.5784387004814963, "grad_norm": 1.085810348365812, "learning_rate": 3.9800748680187284e-07, "loss": 0.3364, "step": 33277 }, { "epoch": 0.5784560830189991, "grad_norm": 2.2775030717918145, "learning_rate": 3.9797992949448955e-07, "loss": 0.3171, "step": 33278 }, { "epoch": 0.5784734655565019, "grad_norm": 1.8510353124068841, "learning_rate": 3.97952372510463e-07, "loss": 0.2038, "step": 33279 }, { "epoch": 0.5784908480940048, "grad_norm": 1.7956466831641473, "learning_rate": 3.979248158498803e-07, "loss": 0.1614, "step": 33280 }, { "epoch": 0.5785082306315076, "grad_norm": 1.8059107732235684, "learning_rate": 3.978972595128292e-07, "loss": 0.4191, "step": 33281 }, { "epoch": 0.5785256131690104, "grad_norm": 3.492702681191294, "learning_rate": 3.978697034993968e-07, "loss": 0.2397, "step": 33282 }, { "epoch": 0.5785429957065132, "grad_norm": 1.1873384154194873, "learning_rate": 3.978421478096706e-07, "loss": 0.2818, "step": 33283 }, { "epoch": 0.5785603782440161, "grad_norm": 1.437162259244814, "learning_rate": 3.978145924437378e-07, "loss": 0.387, "step": 33284 }, { "epoch": 0.5785777607815189, "grad_norm": 0.8887868642360838, "learning_rate": 3.977870374016861e-07, "loss": 0.2602, "step": 33285 }, { "epoch": 0.5785951433190217, "grad_norm": 1.747107014684758, "learning_rate": 3.977594826836023e-07, "loss": 0.2041, "step": 33286 }, { "epoch": 0.5786125258565246, "grad_norm": 0.8466895065737982, "learning_rate": 3.9773192828957394e-07, "loss": 0.2182, "step": 33287 }, { "epoch": 0.5786299083940274, "grad_norm": 0.9870000344991855, "learning_rate": 3.977043742196883e-07, "loss": 0.1468, "step": 33288 }, { "epoch": 0.5786472909315302, "grad_norm": 1.5408297213371018, "learning_rate": 3.97676820474033e-07, "loss": 0.1699, "step": 33289 }, { "epoch": 0.578664673469033, "grad_norm": 1.4825335411293585, "learning_rate": 3.9764926705269504e-07, "loss": 0.2511, "step": 33290 }, { "epoch": 0.5786820560065359, "grad_norm": 1.0269121826661947, "learning_rate": 3.9762171395576207e-07, "loss": 0.1518, "step": 33291 }, { "epoch": 0.5786994385440387, "grad_norm": 2.5624352491886904, "learning_rate": 3.975941611833211e-07, "loss": 0.1995, "step": 33292 }, { "epoch": 0.5787168210815414, "grad_norm": 0.9742225320004381, "learning_rate": 3.9756660873545966e-07, "loss": 0.1885, "step": 33293 }, { "epoch": 0.5787342036190443, "grad_norm": 1.5309944307231333, "learning_rate": 3.9753905661226497e-07, "loss": 0.2889, "step": 33294 }, { "epoch": 0.5787515861565471, "grad_norm": 1.3597427290691984, "learning_rate": 3.975115048138244e-07, "loss": 0.2376, "step": 33295 }, { "epoch": 0.5787689686940499, "grad_norm": 1.1234806677547848, "learning_rate": 3.974839533402252e-07, "loss": 0.156, "step": 33296 }, { "epoch": 0.5787863512315528, "grad_norm": 1.8260075125932393, "learning_rate": 3.9745640219155495e-07, "loss": 0.1726, "step": 33297 }, { "epoch": 0.5788037337690556, "grad_norm": 1.0065608679566944, "learning_rate": 3.9742885136790076e-07, "loss": 0.1522, "step": 33298 }, { "epoch": 0.5788211163065584, "grad_norm": 1.3322174044199093, "learning_rate": 3.974013008693499e-07, "loss": 0.143, "step": 33299 }, { "epoch": 0.5788384988440612, "grad_norm": 1.5774928025870802, "learning_rate": 3.973737506959899e-07, "loss": 0.3573, "step": 33300 }, { "epoch": 0.5788558813815641, "grad_norm": 1.6134709373144105, "learning_rate": 3.9734620084790785e-07, "loss": 0.2137, "step": 33301 }, { "epoch": 0.5788732639190669, "grad_norm": 1.2925256499191657, "learning_rate": 3.9731865132519113e-07, "loss": 0.3339, "step": 33302 }, { "epoch": 0.5788906464565697, "grad_norm": 1.2019597265089565, "learning_rate": 3.9729110212792734e-07, "loss": 0.2097, "step": 33303 }, { "epoch": 0.5789080289940726, "grad_norm": 1.1709102876404538, "learning_rate": 3.972635532562036e-07, "loss": 0.3066, "step": 33304 }, { "epoch": 0.5789254115315754, "grad_norm": 1.4633609710790707, "learning_rate": 3.97236004710107e-07, "loss": 0.2472, "step": 33305 }, { "epoch": 0.5789427940690782, "grad_norm": 1.5930250481711352, "learning_rate": 3.9720845648972513e-07, "loss": 0.1945, "step": 33306 }, { "epoch": 0.5789601766065811, "grad_norm": 1.5621585975375973, "learning_rate": 3.971809085951451e-07, "loss": 0.2955, "step": 33307 }, { "epoch": 0.5789775591440839, "grad_norm": 2.588396900567986, "learning_rate": 3.9715336102645453e-07, "loss": 0.3992, "step": 33308 }, { "epoch": 0.5789949416815867, "grad_norm": 1.704576935530155, "learning_rate": 3.9712581378374044e-07, "loss": 0.4483, "step": 33309 }, { "epoch": 0.5790123242190895, "grad_norm": 0.8463907208047821, "learning_rate": 3.9709826686709057e-07, "loss": 0.2089, "step": 33310 }, { "epoch": 0.5790297067565924, "grad_norm": 2.286121794211814, "learning_rate": 3.9707072027659163e-07, "loss": 0.2518, "step": 33311 }, { "epoch": 0.5790470892940952, "grad_norm": 1.3585500400728716, "learning_rate": 3.9704317401233136e-07, "loss": 0.1899, "step": 33312 }, { "epoch": 0.5790644718315979, "grad_norm": 1.252352040834754, "learning_rate": 3.9701562807439677e-07, "loss": 0.108, "step": 33313 }, { "epoch": 0.5790818543691008, "grad_norm": 2.22884120218635, "learning_rate": 3.969880824628754e-07, "loss": 0.2516, "step": 33314 }, { "epoch": 0.5790992369066036, "grad_norm": 0.9436207733239392, "learning_rate": 3.969605371778545e-07, "loss": 0.2677, "step": 33315 }, { "epoch": 0.5791166194441064, "grad_norm": 1.4935647432434482, "learning_rate": 3.969329922194215e-07, "loss": 0.2856, "step": 33316 }, { "epoch": 0.5791340019816092, "grad_norm": 1.9183299370648614, "learning_rate": 3.969054475876635e-07, "loss": 0.178, "step": 33317 }, { "epoch": 0.5791513845191121, "grad_norm": 0.7837164779211806, "learning_rate": 3.9687790328266777e-07, "loss": 0.1607, "step": 33318 }, { "epoch": 0.5791687670566149, "grad_norm": 1.4264847195543906, "learning_rate": 3.9685035930452173e-07, "loss": 0.2406, "step": 33319 }, { "epoch": 0.5791861495941177, "grad_norm": 1.2851747323784257, "learning_rate": 3.968228156533128e-07, "loss": 0.5147, "step": 33320 }, { "epoch": 0.5792035321316206, "grad_norm": 1.4206126887013124, "learning_rate": 3.967952723291279e-07, "loss": 0.2415, "step": 33321 }, { "epoch": 0.5792209146691234, "grad_norm": 2.921910480694832, "learning_rate": 3.9676772933205475e-07, "loss": 0.2401, "step": 33322 }, { "epoch": 0.5792382972066262, "grad_norm": 1.0401186348880693, "learning_rate": 3.9674018666218064e-07, "loss": 0.1229, "step": 33323 }, { "epoch": 0.5792556797441291, "grad_norm": 1.4248714541919505, "learning_rate": 3.9671264431959243e-07, "loss": 0.2517, "step": 33324 }, { "epoch": 0.5792730622816319, "grad_norm": 1.4178935640673918, "learning_rate": 3.966851023043778e-07, "loss": 0.146, "step": 33325 }, { "epoch": 0.5792904448191347, "grad_norm": 2.1937031695841402, "learning_rate": 3.96657560616624e-07, "loss": 0.1632, "step": 33326 }, { "epoch": 0.5793078273566376, "grad_norm": 1.2582187729426035, "learning_rate": 3.966300192564181e-07, "loss": 0.3669, "step": 33327 }, { "epoch": 0.5793252098941404, "grad_norm": 1.2246834580351407, "learning_rate": 3.966024782238476e-07, "loss": 0.1881, "step": 33328 }, { "epoch": 0.5793425924316432, "grad_norm": 2.0015725008688667, "learning_rate": 3.9657493751899993e-07, "loss": 0.3237, "step": 33329 }, { "epoch": 0.579359974969146, "grad_norm": 1.1528471183338682, "learning_rate": 3.96547397141962e-07, "loss": 0.2235, "step": 33330 }, { "epoch": 0.5793773575066489, "grad_norm": 1.3157495317524803, "learning_rate": 3.965198570928214e-07, "loss": 0.1965, "step": 33331 }, { "epoch": 0.5793947400441516, "grad_norm": 0.9423500915274101, "learning_rate": 3.9649231737166515e-07, "loss": 0.2333, "step": 33332 }, { "epoch": 0.5794121225816544, "grad_norm": 2.0562774025241803, "learning_rate": 3.9646477797858093e-07, "loss": 0.2879, "step": 33333 }, { "epoch": 0.5794295051191573, "grad_norm": 1.4941383484535535, "learning_rate": 3.9643723891365564e-07, "loss": 0.2255, "step": 33334 }, { "epoch": 0.5794468876566601, "grad_norm": 5.137817701528693, "learning_rate": 3.9640970017697696e-07, "loss": 0.4371, "step": 33335 }, { "epoch": 0.5794642701941629, "grad_norm": 1.4752317569520847, "learning_rate": 3.963821617686317e-07, "loss": 0.2488, "step": 33336 }, { "epoch": 0.5794816527316657, "grad_norm": 1.1661351572408825, "learning_rate": 3.963546236887075e-07, "loss": 0.3095, "step": 33337 }, { "epoch": 0.5794990352691686, "grad_norm": 1.594074106863142, "learning_rate": 3.963270859372914e-07, "loss": 0.215, "step": 33338 }, { "epoch": 0.5795164178066714, "grad_norm": 1.5752134415571202, "learning_rate": 3.9629954851447095e-07, "loss": 0.2974, "step": 33339 }, { "epoch": 0.5795338003441742, "grad_norm": 1.0096930764202516, "learning_rate": 3.962720114203332e-07, "loss": 0.2867, "step": 33340 }, { "epoch": 0.5795511828816771, "grad_norm": 1.8451219958765859, "learning_rate": 3.9624447465496556e-07, "loss": 0.3039, "step": 33341 }, { "epoch": 0.5795685654191799, "grad_norm": 1.9770448157031724, "learning_rate": 3.9621693821845546e-07, "loss": 0.2328, "step": 33342 }, { "epoch": 0.5795859479566827, "grad_norm": 1.7623391954664958, "learning_rate": 3.9618940211088985e-07, "loss": 0.1828, "step": 33343 }, { "epoch": 0.5796033304941856, "grad_norm": 2.877247753907494, "learning_rate": 3.9616186633235605e-07, "loss": 0.2344, "step": 33344 }, { "epoch": 0.5796207130316884, "grad_norm": 2.5718765000528716, "learning_rate": 3.9613433088294156e-07, "loss": 0.2969, "step": 33345 }, { "epoch": 0.5796380955691912, "grad_norm": 1.6900239089625455, "learning_rate": 3.961067957627334e-07, "loss": 0.3473, "step": 33346 }, { "epoch": 0.579655478106694, "grad_norm": 2.8061276620743505, "learning_rate": 3.960792609718191e-07, "loss": 0.1875, "step": 33347 }, { "epoch": 0.5796728606441969, "grad_norm": 1.8069066464645507, "learning_rate": 3.9605172651028586e-07, "loss": 0.3432, "step": 33348 }, { "epoch": 0.5796902431816997, "grad_norm": 2.220463227820807, "learning_rate": 3.9602419237822084e-07, "loss": 0.3013, "step": 33349 }, { "epoch": 0.5797076257192025, "grad_norm": 2.2813980950774884, "learning_rate": 3.9599665857571125e-07, "loss": 0.3311, "step": 33350 }, { "epoch": 0.5797250082567054, "grad_norm": 2.2097691066344094, "learning_rate": 3.959691251028446e-07, "loss": 0.2005, "step": 33351 }, { "epoch": 0.5797423907942081, "grad_norm": 1.5032519413944327, "learning_rate": 3.959415919597079e-07, "loss": 0.2059, "step": 33352 }, { "epoch": 0.5797597733317109, "grad_norm": 1.3117702744840543, "learning_rate": 3.9591405914638864e-07, "loss": 0.1671, "step": 33353 }, { "epoch": 0.5797771558692137, "grad_norm": 1.7162784846540236, "learning_rate": 3.958865266629741e-07, "loss": 0.2445, "step": 33354 }, { "epoch": 0.5797945384067166, "grad_norm": 2.0039576013407245, "learning_rate": 3.958589945095513e-07, "loss": 0.197, "step": 33355 }, { "epoch": 0.5798119209442194, "grad_norm": 0.9266716430178231, "learning_rate": 3.9583146268620773e-07, "loss": 0.2055, "step": 33356 }, { "epoch": 0.5798293034817222, "grad_norm": 1.604180385741248, "learning_rate": 3.9580393119303045e-07, "loss": 0.2072, "step": 33357 }, { "epoch": 0.5798466860192251, "grad_norm": 1.7188568989185768, "learning_rate": 3.957764000301069e-07, "loss": 0.2737, "step": 33358 }, { "epoch": 0.5798640685567279, "grad_norm": 1.403553810545666, "learning_rate": 3.957488691975244e-07, "loss": 0.2439, "step": 33359 }, { "epoch": 0.5798814510942307, "grad_norm": 2.327573966788776, "learning_rate": 3.9572133869536993e-07, "loss": 0.2471, "step": 33360 }, { "epoch": 0.5798988336317336, "grad_norm": 1.1455587089482426, "learning_rate": 3.9569380852373113e-07, "loss": 0.2423, "step": 33361 }, { "epoch": 0.5799162161692364, "grad_norm": 1.165426267635155, "learning_rate": 3.9566627868269494e-07, "loss": 0.2888, "step": 33362 }, { "epoch": 0.5799335987067392, "grad_norm": 2.0297386267170574, "learning_rate": 3.956387491723486e-07, "loss": 0.2315, "step": 33363 }, { "epoch": 0.579950981244242, "grad_norm": 1.2756365088417039, "learning_rate": 3.956112199927796e-07, "loss": 0.2385, "step": 33364 }, { "epoch": 0.5799683637817449, "grad_norm": 1.965670228412473, "learning_rate": 3.9558369114407495e-07, "loss": 0.2581, "step": 33365 }, { "epoch": 0.5799857463192477, "grad_norm": 2.400582881626392, "learning_rate": 3.955561626263222e-07, "loss": 0.3821, "step": 33366 }, { "epoch": 0.5800031288567505, "grad_norm": 1.3638048044901032, "learning_rate": 3.955286344396085e-07, "loss": 0.3419, "step": 33367 }, { "epoch": 0.5800205113942534, "grad_norm": 1.8693121022058976, "learning_rate": 3.955011065840209e-07, "loss": 0.438, "step": 33368 }, { "epoch": 0.5800378939317562, "grad_norm": 1.1860301177794532, "learning_rate": 3.9547357905964673e-07, "loss": 0.192, "step": 33369 }, { "epoch": 0.580055276469259, "grad_norm": 1.501897129655977, "learning_rate": 3.9544605186657345e-07, "loss": 0.3632, "step": 33370 }, { "epoch": 0.5800726590067619, "grad_norm": 3.0509107620248446, "learning_rate": 3.95418525004888e-07, "loss": 0.179, "step": 33371 }, { "epoch": 0.5800900415442646, "grad_norm": 1.0083005520821136, "learning_rate": 3.953909984746778e-07, "loss": 0.1242, "step": 33372 }, { "epoch": 0.5801074240817674, "grad_norm": 3.0264346289062174, "learning_rate": 3.9536347227603037e-07, "loss": 0.3574, "step": 33373 }, { "epoch": 0.5801248066192702, "grad_norm": 1.92035919770142, "learning_rate": 3.953359464090324e-07, "loss": 0.3215, "step": 33374 }, { "epoch": 0.5801421891567731, "grad_norm": 2.0862057374634975, "learning_rate": 3.9530842087377134e-07, "loss": 0.2388, "step": 33375 }, { "epoch": 0.5801595716942759, "grad_norm": 1.1380730418710965, "learning_rate": 3.9528089567033467e-07, "loss": 0.2729, "step": 33376 }, { "epoch": 0.5801769542317787, "grad_norm": 1.6454682708674244, "learning_rate": 3.9525337079880934e-07, "loss": 0.2032, "step": 33377 }, { "epoch": 0.5801943367692816, "grad_norm": 1.570357752853835, "learning_rate": 3.9522584625928276e-07, "loss": 0.1949, "step": 33378 }, { "epoch": 0.5802117193067844, "grad_norm": 1.8465291885698527, "learning_rate": 3.951983220518422e-07, "loss": 0.1591, "step": 33379 }, { "epoch": 0.5802291018442872, "grad_norm": 1.4563899491264165, "learning_rate": 3.9517079817657465e-07, "loss": 0.3324, "step": 33380 }, { "epoch": 0.58024648438179, "grad_norm": 1.811651014233105, "learning_rate": 3.9514327463356767e-07, "loss": 0.4457, "step": 33381 }, { "epoch": 0.5802638669192929, "grad_norm": 2.09504923980378, "learning_rate": 3.9511575142290825e-07, "loss": 0.1674, "step": 33382 }, { "epoch": 0.5802812494567957, "grad_norm": 1.0501533891159416, "learning_rate": 3.9508822854468364e-07, "loss": 0.2132, "step": 33383 }, { "epoch": 0.5802986319942985, "grad_norm": 1.5307874797402188, "learning_rate": 3.9506070599898135e-07, "loss": 0.2711, "step": 33384 }, { "epoch": 0.5803160145318014, "grad_norm": 2.8983827074830613, "learning_rate": 3.950331837858882e-07, "loss": 0.3264, "step": 33385 }, { "epoch": 0.5803333970693042, "grad_norm": 1.0942888922573748, "learning_rate": 3.950056619054919e-07, "loss": 0.1514, "step": 33386 }, { "epoch": 0.580350779606807, "grad_norm": 1.1617903267849516, "learning_rate": 3.9497814035787924e-07, "loss": 0.1639, "step": 33387 }, { "epoch": 0.5803681621443099, "grad_norm": 1.0163036134515036, "learning_rate": 3.949506191431376e-07, "loss": 0.1524, "step": 33388 }, { "epoch": 0.5803855446818127, "grad_norm": 1.824381226402634, "learning_rate": 3.9492309826135435e-07, "loss": 0.1945, "step": 33389 }, { "epoch": 0.5804029272193155, "grad_norm": 1.2662327379035008, "learning_rate": 3.948955777126166e-07, "loss": 0.1797, "step": 33390 }, { "epoch": 0.5804203097568184, "grad_norm": 2.0072285629821605, "learning_rate": 3.9486805749701144e-07, "loss": 0.2379, "step": 33391 }, { "epoch": 0.5804376922943211, "grad_norm": 1.455045291531194, "learning_rate": 3.9484053761462653e-07, "loss": 0.2233, "step": 33392 }, { "epoch": 0.5804550748318239, "grad_norm": 1.6954793311293694, "learning_rate": 3.9481301806554867e-07, "loss": 0.2181, "step": 33393 }, { "epoch": 0.5804724573693267, "grad_norm": 2.1490575927763094, "learning_rate": 3.9478549884986514e-07, "loss": 0.1568, "step": 33394 }, { "epoch": 0.5804898399068296, "grad_norm": 2.179113077942394, "learning_rate": 3.9475797996766325e-07, "loss": 0.3031, "step": 33395 }, { "epoch": 0.5805072224443324, "grad_norm": 1.1686331460342139, "learning_rate": 3.9473046141903024e-07, "loss": 0.1751, "step": 33396 }, { "epoch": 0.5805246049818352, "grad_norm": 2.9529575036964375, "learning_rate": 3.9470294320405335e-07, "loss": 0.1729, "step": 33397 }, { "epoch": 0.580541987519338, "grad_norm": 2.222358219087932, "learning_rate": 3.946754253228199e-07, "loss": 0.2703, "step": 33398 }, { "epoch": 0.5805593700568409, "grad_norm": 1.4004733499994977, "learning_rate": 3.9464790777541677e-07, "loss": 0.5161, "step": 33399 }, { "epoch": 0.5805767525943437, "grad_norm": 7.477868424984777, "learning_rate": 3.946203905619314e-07, "loss": 0.3662, "step": 33400 }, { "epoch": 0.5805941351318465, "grad_norm": 1.62917822377767, "learning_rate": 3.9459287368245104e-07, "loss": 0.1734, "step": 33401 }, { "epoch": 0.5806115176693494, "grad_norm": 1.3286353037248781, "learning_rate": 3.9456535713706275e-07, "loss": 0.2829, "step": 33402 }, { "epoch": 0.5806289002068522, "grad_norm": 2.2577403168553594, "learning_rate": 3.9453784092585395e-07, "loss": 0.2814, "step": 33403 }, { "epoch": 0.580646282744355, "grad_norm": 1.3785446402947203, "learning_rate": 3.945103250489116e-07, "loss": 0.1974, "step": 33404 }, { "epoch": 0.5806636652818579, "grad_norm": 1.5166240498683834, "learning_rate": 3.944828095063234e-07, "loss": 0.2113, "step": 33405 }, { "epoch": 0.5806810478193607, "grad_norm": 2.235304367917844, "learning_rate": 3.944552942981759e-07, "loss": 0.3317, "step": 33406 }, { "epoch": 0.5806984303568635, "grad_norm": 1.7706600761431364, "learning_rate": 3.944277794245568e-07, "loss": 0.1972, "step": 33407 }, { "epoch": 0.5807158128943664, "grad_norm": 1.4589430445701748, "learning_rate": 3.94400264885553e-07, "loss": 0.2258, "step": 33408 }, { "epoch": 0.5807331954318692, "grad_norm": 1.2275819971892072, "learning_rate": 3.9437275068125195e-07, "loss": 0.2494, "step": 33409 }, { "epoch": 0.580750577969372, "grad_norm": 1.3033578696677086, "learning_rate": 3.943452368117407e-07, "loss": 0.139, "step": 33410 }, { "epoch": 0.5807679605068748, "grad_norm": 2.0957802014479094, "learning_rate": 3.9431772327710667e-07, "loss": 0.2426, "step": 33411 }, { "epoch": 0.5807853430443776, "grad_norm": 2.272430972647155, "learning_rate": 3.942902100774368e-07, "loss": 0.4405, "step": 33412 }, { "epoch": 0.5808027255818804, "grad_norm": 1.4984857582884812, "learning_rate": 3.942626972128183e-07, "loss": 0.1281, "step": 33413 }, { "epoch": 0.5808201081193832, "grad_norm": 2.6328744464895495, "learning_rate": 3.942351846833386e-07, "loss": 0.2093, "step": 33414 }, { "epoch": 0.5808374906568861, "grad_norm": 2.010391636607155, "learning_rate": 3.942076724890848e-07, "loss": 0.2651, "step": 33415 }, { "epoch": 0.5808548731943889, "grad_norm": 1.9820997034330445, "learning_rate": 3.9418016063014397e-07, "loss": 0.3137, "step": 33416 }, { "epoch": 0.5808722557318917, "grad_norm": 1.6547648632344543, "learning_rate": 3.9415264910660364e-07, "loss": 0.334, "step": 33417 }, { "epoch": 0.5808896382693945, "grad_norm": 1.9324378872645773, "learning_rate": 3.941251379185506e-07, "loss": 0.209, "step": 33418 }, { "epoch": 0.5809070208068974, "grad_norm": 1.4728880371323854, "learning_rate": 3.9409762706607226e-07, "loss": 0.1984, "step": 33419 }, { "epoch": 0.5809244033444002, "grad_norm": 3.2223752038721445, "learning_rate": 3.940701165492558e-07, "loss": 0.3575, "step": 33420 }, { "epoch": 0.580941785881903, "grad_norm": 1.4702601296866338, "learning_rate": 3.9404260636818836e-07, "loss": 0.2067, "step": 33421 }, { "epoch": 0.5809591684194059, "grad_norm": 1.3224337061372216, "learning_rate": 3.9401509652295727e-07, "loss": 0.1714, "step": 33422 }, { "epoch": 0.5809765509569087, "grad_norm": 1.4057342633570629, "learning_rate": 3.9398758701364965e-07, "loss": 0.3045, "step": 33423 }, { "epoch": 0.5809939334944115, "grad_norm": 1.1682481681989025, "learning_rate": 3.939600778403528e-07, "loss": 0.2194, "step": 33424 }, { "epoch": 0.5810113160319144, "grad_norm": 1.0662704783418053, "learning_rate": 3.9393256900315354e-07, "loss": 0.214, "step": 33425 }, { "epoch": 0.5810286985694172, "grad_norm": 1.6142431501296264, "learning_rate": 3.9390506050213943e-07, "loss": 0.2727, "step": 33426 }, { "epoch": 0.58104608110692, "grad_norm": 1.7818688362110973, "learning_rate": 3.9387755233739746e-07, "loss": 0.3859, "step": 33427 }, { "epoch": 0.5810634636444229, "grad_norm": 1.157786163474422, "learning_rate": 3.9385004450901497e-07, "loss": 0.364, "step": 33428 }, { "epoch": 0.5810808461819257, "grad_norm": 1.4926967522485806, "learning_rate": 3.93822537017079e-07, "loss": 0.2173, "step": 33429 }, { "epoch": 0.5810982287194285, "grad_norm": 1.6820930165297414, "learning_rate": 3.937950298616771e-07, "loss": 0.3734, "step": 33430 }, { "epoch": 0.5811156112569313, "grad_norm": 1.2335633710944214, "learning_rate": 3.937675230428959e-07, "loss": 0.2495, "step": 33431 }, { "epoch": 0.5811329937944341, "grad_norm": 1.1362621169675664, "learning_rate": 3.9374001656082293e-07, "loss": 0.4039, "step": 33432 }, { "epoch": 0.5811503763319369, "grad_norm": 1.209372400556821, "learning_rate": 3.937125104155452e-07, "loss": 0.2617, "step": 33433 }, { "epoch": 0.5811677588694397, "grad_norm": 0.9964854382947618, "learning_rate": 3.936850046071501e-07, "loss": 0.1351, "step": 33434 }, { "epoch": 0.5811851414069426, "grad_norm": 1.8292056641929666, "learning_rate": 3.9365749913572456e-07, "loss": 0.1866, "step": 33435 }, { "epoch": 0.5812025239444454, "grad_norm": 2.1507626491189495, "learning_rate": 3.9362999400135614e-07, "loss": 0.155, "step": 33436 }, { "epoch": 0.5812199064819482, "grad_norm": 1.7243623493101858, "learning_rate": 3.936024892041317e-07, "loss": 0.2938, "step": 33437 }, { "epoch": 0.581237289019451, "grad_norm": 0.9260661283954874, "learning_rate": 3.935749847441384e-07, "loss": 0.1455, "step": 33438 }, { "epoch": 0.5812546715569539, "grad_norm": 1.80761421166629, "learning_rate": 3.9354748062146346e-07, "loss": 0.2026, "step": 33439 }, { "epoch": 0.5812720540944567, "grad_norm": 1.8334891252879888, "learning_rate": 3.9351997683619427e-07, "loss": 0.178, "step": 33440 }, { "epoch": 0.5812894366319595, "grad_norm": 2.6413405350652335, "learning_rate": 3.9349247338841763e-07, "loss": 0.2694, "step": 33441 }, { "epoch": 0.5813068191694624, "grad_norm": 2.4660318555998892, "learning_rate": 3.934649702782211e-07, "loss": 0.1742, "step": 33442 }, { "epoch": 0.5813242017069652, "grad_norm": 2.6760155895116413, "learning_rate": 3.934374675056917e-07, "loss": 0.2422, "step": 33443 }, { "epoch": 0.581341584244468, "grad_norm": 4.632479942985817, "learning_rate": 3.934099650709165e-07, "loss": 0.4416, "step": 33444 }, { "epoch": 0.5813589667819709, "grad_norm": 1.1288705588580665, "learning_rate": 3.9338246297398273e-07, "loss": 0.3151, "step": 33445 }, { "epoch": 0.5813763493194737, "grad_norm": 2.247519073760045, "learning_rate": 3.933549612149776e-07, "loss": 0.4803, "step": 33446 }, { "epoch": 0.5813937318569765, "grad_norm": 1.8691457848681412, "learning_rate": 3.933274597939882e-07, "loss": 0.184, "step": 33447 }, { "epoch": 0.5814111143944793, "grad_norm": 3.4162543479187377, "learning_rate": 3.9329995871110186e-07, "loss": 0.3221, "step": 33448 }, { "epoch": 0.5814284969319822, "grad_norm": 1.4262824562120384, "learning_rate": 3.9327245796640566e-07, "loss": 0.2824, "step": 33449 }, { "epoch": 0.581445879469485, "grad_norm": 1.2636105664901816, "learning_rate": 3.932449575599866e-07, "loss": 0.2713, "step": 33450 }, { "epoch": 0.5814632620069878, "grad_norm": 1.316450740451531, "learning_rate": 3.932174574919321e-07, "loss": 0.2103, "step": 33451 }, { "epoch": 0.5814806445444906, "grad_norm": 1.346355826522979, "learning_rate": 3.9318995776232906e-07, "loss": 0.3618, "step": 33452 }, { "epoch": 0.5814980270819934, "grad_norm": 2.2747422955236467, "learning_rate": 3.9316245837126495e-07, "loss": 0.2374, "step": 33453 }, { "epoch": 0.5815154096194962, "grad_norm": 2.080561300555413, "learning_rate": 3.9313495931882655e-07, "loss": 0.2707, "step": 33454 }, { "epoch": 0.581532792156999, "grad_norm": 2.5159399482970652, "learning_rate": 3.931074606051017e-07, "loss": 0.3497, "step": 33455 }, { "epoch": 0.5815501746945019, "grad_norm": 1.3208915293634895, "learning_rate": 3.930799622301767e-07, "loss": 0.2719, "step": 33456 }, { "epoch": 0.5815675572320047, "grad_norm": 2.0106114136789484, "learning_rate": 3.930524641941392e-07, "loss": 0.2679, "step": 33457 }, { "epoch": 0.5815849397695075, "grad_norm": 1.5439677443527005, "learning_rate": 3.9302496649707617e-07, "loss": 0.2543, "step": 33458 }, { "epoch": 0.5816023223070104, "grad_norm": 1.6872866746081694, "learning_rate": 3.9299746913907494e-07, "loss": 0.1558, "step": 33459 }, { "epoch": 0.5816197048445132, "grad_norm": 1.6333769414151258, "learning_rate": 3.929699721202225e-07, "loss": 0.3299, "step": 33460 }, { "epoch": 0.581637087382016, "grad_norm": 2.017817567714965, "learning_rate": 3.929424754406062e-07, "loss": 0.3242, "step": 33461 }, { "epoch": 0.5816544699195189, "grad_norm": 1.2865998979797995, "learning_rate": 3.9291497910031316e-07, "loss": 0.1373, "step": 33462 }, { "epoch": 0.5816718524570217, "grad_norm": 1.3439699120598598, "learning_rate": 3.928874830994304e-07, "loss": 0.1888, "step": 33463 }, { "epoch": 0.5816892349945245, "grad_norm": 1.438186332030364, "learning_rate": 3.9285998743804493e-07, "loss": 0.2953, "step": 33464 }, { "epoch": 0.5817066175320273, "grad_norm": 2.0234921282381397, "learning_rate": 3.928324921162442e-07, "loss": 0.3313, "step": 33465 }, { "epoch": 0.5817240000695302, "grad_norm": 1.0429940201837407, "learning_rate": 3.9280499713411517e-07, "loss": 0.1369, "step": 33466 }, { "epoch": 0.581741382607033, "grad_norm": 1.3679174018940161, "learning_rate": 3.9277750249174513e-07, "loss": 0.2867, "step": 33467 }, { "epoch": 0.5817587651445358, "grad_norm": 1.2374663581703407, "learning_rate": 3.9275000818922133e-07, "loss": 0.2054, "step": 33468 }, { "epoch": 0.5817761476820387, "grad_norm": 0.9618893281838456, "learning_rate": 3.9272251422663044e-07, "loss": 0.1972, "step": 33469 }, { "epoch": 0.5817935302195415, "grad_norm": 1.3312867573030052, "learning_rate": 3.9269502060406e-07, "loss": 0.2538, "step": 33470 }, { "epoch": 0.5818109127570442, "grad_norm": 8.242363114238872, "learning_rate": 3.9266752732159714e-07, "loss": 0.2828, "step": 33471 }, { "epoch": 0.581828295294547, "grad_norm": 2.0899181738533366, "learning_rate": 3.9264003437932874e-07, "loss": 0.3291, "step": 33472 }, { "epoch": 0.5818456778320499, "grad_norm": 2.5027257576210777, "learning_rate": 3.926125417773423e-07, "loss": 0.2166, "step": 33473 }, { "epoch": 0.5818630603695527, "grad_norm": 1.74381909387535, "learning_rate": 3.925850495157248e-07, "loss": 0.4361, "step": 33474 }, { "epoch": 0.5818804429070555, "grad_norm": 1.8052291866511836, "learning_rate": 3.925575575945631e-07, "loss": 0.2673, "step": 33475 }, { "epoch": 0.5818978254445584, "grad_norm": 1.248590998948758, "learning_rate": 3.9253006601394477e-07, "loss": 0.1799, "step": 33476 }, { "epoch": 0.5819152079820612, "grad_norm": 1.2857330423304953, "learning_rate": 3.9250257477395663e-07, "loss": 0.2138, "step": 33477 }, { "epoch": 0.581932590519564, "grad_norm": 1.9488939242941965, "learning_rate": 3.9247508387468607e-07, "loss": 0.1807, "step": 33478 }, { "epoch": 0.5819499730570669, "grad_norm": 2.8923122925565363, "learning_rate": 3.9244759331622006e-07, "loss": 0.2463, "step": 33479 }, { "epoch": 0.5819673555945697, "grad_norm": 1.404575048866553, "learning_rate": 3.924201030986457e-07, "loss": 0.2138, "step": 33480 }, { "epoch": 0.5819847381320725, "grad_norm": 2.4079663950510364, "learning_rate": 3.923926132220504e-07, "loss": 0.2571, "step": 33481 }, { "epoch": 0.5820021206695754, "grad_norm": 2.2091799151959584, "learning_rate": 3.92365123686521e-07, "loss": 0.2311, "step": 33482 }, { "epoch": 0.5820195032070782, "grad_norm": 2.226796371414454, "learning_rate": 3.9233763449214464e-07, "loss": 0.2313, "step": 33483 }, { "epoch": 0.582036885744581, "grad_norm": 0.8994496974438382, "learning_rate": 3.9231014563900853e-07, "loss": 0.1262, "step": 33484 }, { "epoch": 0.5820542682820838, "grad_norm": 2.930335600962686, "learning_rate": 3.9228265712719975e-07, "loss": 0.1899, "step": 33485 }, { "epoch": 0.5820716508195867, "grad_norm": 1.512542099702978, "learning_rate": 3.9225516895680555e-07, "loss": 0.2887, "step": 33486 }, { "epoch": 0.5820890333570895, "grad_norm": 1.3436218953593289, "learning_rate": 3.922276811279131e-07, "loss": 0.1833, "step": 33487 }, { "epoch": 0.5821064158945923, "grad_norm": 1.4730106650394115, "learning_rate": 3.922001936406093e-07, "loss": 0.2148, "step": 33488 }, { "epoch": 0.5821237984320952, "grad_norm": 1.9450688998329178, "learning_rate": 3.9217270649498125e-07, "loss": 0.2259, "step": 33489 }, { "epoch": 0.582141180969598, "grad_norm": 2.035728059638809, "learning_rate": 3.9214521969111625e-07, "loss": 0.3039, "step": 33490 }, { "epoch": 0.5821585635071007, "grad_norm": 1.034918541376265, "learning_rate": 3.9211773322910126e-07, "loss": 0.1848, "step": 33491 }, { "epoch": 0.5821759460446035, "grad_norm": 2.0051955456083537, "learning_rate": 3.9209024710902363e-07, "loss": 0.2755, "step": 33492 }, { "epoch": 0.5821933285821064, "grad_norm": 2.056446858616708, "learning_rate": 3.920627613309705e-07, "loss": 0.3417, "step": 33493 }, { "epoch": 0.5822107111196092, "grad_norm": 2.242236378525314, "learning_rate": 3.9203527589502866e-07, "loss": 0.3385, "step": 33494 }, { "epoch": 0.582228093657112, "grad_norm": 1.3265349247035543, "learning_rate": 3.9200779080128534e-07, "loss": 0.2941, "step": 33495 }, { "epoch": 0.5822454761946149, "grad_norm": 1.6007376629631285, "learning_rate": 3.919803060498277e-07, "loss": 0.1891, "step": 33496 }, { "epoch": 0.5822628587321177, "grad_norm": 2.323195501813775, "learning_rate": 3.919528216407429e-07, "loss": 0.2041, "step": 33497 }, { "epoch": 0.5822802412696205, "grad_norm": 1.3751033917221656, "learning_rate": 3.9192533757411805e-07, "loss": 0.1601, "step": 33498 }, { "epoch": 0.5822976238071234, "grad_norm": 1.4655672973429192, "learning_rate": 3.9189785385004045e-07, "loss": 0.1964, "step": 33499 }, { "epoch": 0.5823150063446262, "grad_norm": 1.8319627010983122, "learning_rate": 3.9187037046859667e-07, "loss": 0.2272, "step": 33500 }, { "epoch": 0.582332388882129, "grad_norm": 2.3179381410962536, "learning_rate": 3.9184288742987434e-07, "loss": 0.3011, "step": 33501 }, { "epoch": 0.5823497714196318, "grad_norm": 2.2088224566182957, "learning_rate": 3.9181540473396016e-07, "loss": 0.2078, "step": 33502 }, { "epoch": 0.5823671539571347, "grad_norm": 1.7424373174096088, "learning_rate": 3.917879223809416e-07, "loss": 0.1516, "step": 33503 }, { "epoch": 0.5823845364946375, "grad_norm": 1.9422978152516233, "learning_rate": 3.917604403709057e-07, "loss": 0.2102, "step": 33504 }, { "epoch": 0.5824019190321403, "grad_norm": 2.9436110591612246, "learning_rate": 3.917329587039393e-07, "loss": 0.3015, "step": 33505 }, { "epoch": 0.5824193015696432, "grad_norm": 2.2580565087671207, "learning_rate": 3.9170547738012987e-07, "loss": 0.4122, "step": 33506 }, { "epoch": 0.582436684107146, "grad_norm": 1.6325845172511695, "learning_rate": 3.916779963995643e-07, "loss": 0.2335, "step": 33507 }, { "epoch": 0.5824540666446488, "grad_norm": 1.2303600674077235, "learning_rate": 3.9165051576232955e-07, "loss": 0.3358, "step": 33508 }, { "epoch": 0.5824714491821517, "grad_norm": 1.4693417076145765, "learning_rate": 3.9162303546851304e-07, "loss": 0.1507, "step": 33509 }, { "epoch": 0.5824888317196545, "grad_norm": 1.9318844131614834, "learning_rate": 3.915955555182016e-07, "loss": 0.2894, "step": 33510 }, { "epoch": 0.5825062142571572, "grad_norm": 1.6938441357935756, "learning_rate": 3.9156807591148253e-07, "loss": 0.2922, "step": 33511 }, { "epoch": 0.58252359679466, "grad_norm": 1.6023660838839795, "learning_rate": 3.91540596648443e-07, "loss": 0.3654, "step": 33512 }, { "epoch": 0.5825409793321629, "grad_norm": 1.4558167514741391, "learning_rate": 3.915131177291698e-07, "loss": 0.3265, "step": 33513 }, { "epoch": 0.5825583618696657, "grad_norm": 4.334203280796892, "learning_rate": 3.9148563915375017e-07, "loss": 0.1993, "step": 33514 }, { "epoch": 0.5825757444071685, "grad_norm": 1.391911714197209, "learning_rate": 3.9145816092227127e-07, "loss": 0.3512, "step": 33515 }, { "epoch": 0.5825931269446714, "grad_norm": 1.0622156449840745, "learning_rate": 3.9143068303482004e-07, "loss": 0.2602, "step": 33516 }, { "epoch": 0.5826105094821742, "grad_norm": 1.7503010450686587, "learning_rate": 3.914032054914838e-07, "loss": 0.2379, "step": 33517 }, { "epoch": 0.582627892019677, "grad_norm": 1.1504007777280771, "learning_rate": 3.9137572829234957e-07, "loss": 0.2085, "step": 33518 }, { "epoch": 0.5826452745571798, "grad_norm": 2.5037717896344605, "learning_rate": 3.913482514375043e-07, "loss": 0.2975, "step": 33519 }, { "epoch": 0.5826626570946827, "grad_norm": 1.4349303548976196, "learning_rate": 3.9132077492703504e-07, "loss": 0.2698, "step": 33520 }, { "epoch": 0.5826800396321855, "grad_norm": 1.6644892134418112, "learning_rate": 3.9129329876102914e-07, "loss": 0.2111, "step": 33521 }, { "epoch": 0.5826974221696883, "grad_norm": 1.6215580337633622, "learning_rate": 3.912658229395734e-07, "loss": 0.166, "step": 33522 }, { "epoch": 0.5827148047071912, "grad_norm": 1.3053215325276801, "learning_rate": 3.912383474627552e-07, "loss": 0.1595, "step": 33523 }, { "epoch": 0.582732187244694, "grad_norm": 3.0998142544136815, "learning_rate": 3.912108723306613e-07, "loss": 0.4223, "step": 33524 }, { "epoch": 0.5827495697821968, "grad_norm": 1.6365072894722004, "learning_rate": 3.9118339754337923e-07, "loss": 0.1616, "step": 33525 }, { "epoch": 0.5827669523196997, "grad_norm": 1.4252588083868019, "learning_rate": 3.911559231009957e-07, "loss": 0.2504, "step": 33526 }, { "epoch": 0.5827843348572025, "grad_norm": 0.9523663160122358, "learning_rate": 3.9112844900359785e-07, "loss": 0.21, "step": 33527 }, { "epoch": 0.5828017173947053, "grad_norm": 0.9653401384588005, "learning_rate": 3.911009752512728e-07, "loss": 0.1896, "step": 33528 }, { "epoch": 0.5828190999322082, "grad_norm": 1.913072392653918, "learning_rate": 3.910735018441077e-07, "loss": 0.1995, "step": 33529 }, { "epoch": 0.582836482469711, "grad_norm": 1.7214993153241436, "learning_rate": 3.910460287821894e-07, "loss": 0.1908, "step": 33530 }, { "epoch": 0.5828538650072137, "grad_norm": 1.9232670882582912, "learning_rate": 3.910185560656054e-07, "loss": 0.2209, "step": 33531 }, { "epoch": 0.5828712475447165, "grad_norm": 2.6955956142191893, "learning_rate": 3.909910836944424e-07, "loss": 0.2989, "step": 33532 }, { "epoch": 0.5828886300822194, "grad_norm": 1.7118900665386398, "learning_rate": 3.9096361166878743e-07, "loss": 0.2452, "step": 33533 }, { "epoch": 0.5829060126197222, "grad_norm": 1.3961029962367772, "learning_rate": 3.9093613998872785e-07, "loss": 0.2338, "step": 33534 }, { "epoch": 0.582923395157225, "grad_norm": 1.6090820174547293, "learning_rate": 3.9090866865435067e-07, "loss": 0.3342, "step": 33535 }, { "epoch": 0.5829407776947279, "grad_norm": 1.5265103359907444, "learning_rate": 3.9088119766574277e-07, "loss": 0.1741, "step": 33536 }, { "epoch": 0.5829581602322307, "grad_norm": 1.5710600991897228, "learning_rate": 3.908537270229916e-07, "loss": 0.1867, "step": 33537 }, { "epoch": 0.5829755427697335, "grad_norm": 1.9804259168028289, "learning_rate": 3.908262567261837e-07, "loss": 0.215, "step": 33538 }, { "epoch": 0.5829929253072363, "grad_norm": 1.903534143242823, "learning_rate": 3.907987867754065e-07, "loss": 0.2598, "step": 33539 }, { "epoch": 0.5830103078447392, "grad_norm": 1.9460853667686824, "learning_rate": 3.9077131717074703e-07, "loss": 0.36, "step": 33540 }, { "epoch": 0.583027690382242, "grad_norm": 1.0815880927011294, "learning_rate": 3.907438479122922e-07, "loss": 0.2265, "step": 33541 }, { "epoch": 0.5830450729197448, "grad_norm": 1.5600264668531458, "learning_rate": 3.907163790001292e-07, "loss": 0.1471, "step": 33542 }, { "epoch": 0.5830624554572477, "grad_norm": 1.474516829111046, "learning_rate": 3.906889104343451e-07, "loss": 0.313, "step": 33543 }, { "epoch": 0.5830798379947505, "grad_norm": 2.249895070316281, "learning_rate": 3.906614422150272e-07, "loss": 0.3676, "step": 33544 }, { "epoch": 0.5830972205322533, "grad_norm": 1.3232327054037758, "learning_rate": 3.90633974342262e-07, "loss": 0.292, "step": 33545 }, { "epoch": 0.5831146030697562, "grad_norm": 1.433810708800205, "learning_rate": 3.9060650681613695e-07, "loss": 0.2936, "step": 33546 }, { "epoch": 0.583131985607259, "grad_norm": 1.3918472396927506, "learning_rate": 3.905790396367389e-07, "loss": 0.166, "step": 33547 }, { "epoch": 0.5831493681447618, "grad_norm": 1.4657883141400003, "learning_rate": 3.905515728041552e-07, "loss": 0.31, "step": 33548 }, { "epoch": 0.5831667506822646, "grad_norm": 1.3675631312889371, "learning_rate": 3.905241063184726e-07, "loss": 0.1488, "step": 33549 }, { "epoch": 0.5831841332197675, "grad_norm": 0.9620302525512178, "learning_rate": 3.904966401797785e-07, "loss": 0.2228, "step": 33550 }, { "epoch": 0.5832015157572702, "grad_norm": 1.738839952163576, "learning_rate": 3.904691743881596e-07, "loss": 0.1741, "step": 33551 }, { "epoch": 0.583218898294773, "grad_norm": 2.0332479730567306, "learning_rate": 3.904417089437032e-07, "loss": 0.239, "step": 33552 }, { "epoch": 0.5832362808322759, "grad_norm": 1.8078336011490803, "learning_rate": 3.9041424384649614e-07, "loss": 0.2146, "step": 33553 }, { "epoch": 0.5832536633697787, "grad_norm": 1.4323425437209942, "learning_rate": 3.9038677909662566e-07, "loss": 0.3825, "step": 33554 }, { "epoch": 0.5832710459072815, "grad_norm": 2.1550150910115735, "learning_rate": 3.903593146941786e-07, "loss": 0.301, "step": 33555 }, { "epoch": 0.5832884284447843, "grad_norm": 1.5169127855312514, "learning_rate": 3.9033185063924233e-07, "loss": 0.2994, "step": 33556 }, { "epoch": 0.5833058109822872, "grad_norm": 1.0641532394282904, "learning_rate": 3.903043869319037e-07, "loss": 0.1514, "step": 33557 }, { "epoch": 0.58332319351979, "grad_norm": 1.480738855736187, "learning_rate": 3.902769235722496e-07, "loss": 0.227, "step": 33558 }, { "epoch": 0.5833405760572928, "grad_norm": 1.888710528909423, "learning_rate": 3.9024946056036745e-07, "loss": 0.2085, "step": 33559 }, { "epoch": 0.5833579585947957, "grad_norm": 2.0716296012754443, "learning_rate": 3.9022199789634403e-07, "loss": 0.2187, "step": 33560 }, { "epoch": 0.5833753411322985, "grad_norm": 2.013234840969914, "learning_rate": 3.9019453558026634e-07, "loss": 0.2446, "step": 33561 }, { "epoch": 0.5833927236698013, "grad_norm": 1.593061624196283, "learning_rate": 3.901670736122216e-07, "loss": 0.1879, "step": 33562 }, { "epoch": 0.5834101062073042, "grad_norm": 0.9715027135058617, "learning_rate": 3.9013961199229697e-07, "loss": 0.2677, "step": 33563 }, { "epoch": 0.583427488744807, "grad_norm": 1.7275977608025348, "learning_rate": 3.9011215072057904e-07, "loss": 0.4354, "step": 33564 }, { "epoch": 0.5834448712823098, "grad_norm": 1.5018972656003409, "learning_rate": 3.900846897971553e-07, "loss": 0.2813, "step": 33565 }, { "epoch": 0.5834622538198126, "grad_norm": 1.7096417498168288, "learning_rate": 3.9005722922211236e-07, "loss": 0.2183, "step": 33566 }, { "epoch": 0.5834796363573155, "grad_norm": 1.4357579925902253, "learning_rate": 3.9002976899553763e-07, "loss": 0.2307, "step": 33567 }, { "epoch": 0.5834970188948183, "grad_norm": 1.4505919817570248, "learning_rate": 3.900023091175181e-07, "loss": 0.3011, "step": 33568 }, { "epoch": 0.5835144014323211, "grad_norm": 1.3358214133596136, "learning_rate": 3.899748495881408e-07, "loss": 0.1677, "step": 33569 }, { "epoch": 0.583531783969824, "grad_norm": 1.6624693372977917, "learning_rate": 3.899473904074925e-07, "loss": 0.1736, "step": 33570 }, { "epoch": 0.5835491665073267, "grad_norm": 1.5668429947685476, "learning_rate": 3.8991993157566043e-07, "loss": 0.1421, "step": 33571 }, { "epoch": 0.5835665490448295, "grad_norm": 2.4912174598983765, "learning_rate": 3.8989247309273156e-07, "loss": 0.2141, "step": 33572 }, { "epoch": 0.5835839315823323, "grad_norm": 1.7796508156704447, "learning_rate": 3.8986501495879307e-07, "loss": 0.341, "step": 33573 }, { "epoch": 0.5836013141198352, "grad_norm": 1.313679239706855, "learning_rate": 3.898375571739318e-07, "loss": 0.2232, "step": 33574 }, { "epoch": 0.583618696657338, "grad_norm": 1.0774914528185073, "learning_rate": 3.898100997382352e-07, "loss": 0.1407, "step": 33575 }, { "epoch": 0.5836360791948408, "grad_norm": 1.529210525908727, "learning_rate": 3.897826426517896e-07, "loss": 0.1978, "step": 33576 }, { "epoch": 0.5836534617323437, "grad_norm": 1.3411358240144662, "learning_rate": 3.897551859146825e-07, "loss": 0.2081, "step": 33577 }, { "epoch": 0.5836708442698465, "grad_norm": 1.0245624188447708, "learning_rate": 3.8972772952700077e-07, "loss": 0.2566, "step": 33578 }, { "epoch": 0.5836882268073493, "grad_norm": 1.3556942460885992, "learning_rate": 3.8970027348883154e-07, "loss": 0.2322, "step": 33579 }, { "epoch": 0.5837056093448522, "grad_norm": 1.4271102874756176, "learning_rate": 3.896728178002617e-07, "loss": 0.2402, "step": 33580 }, { "epoch": 0.583722991882355, "grad_norm": 1.0978922411979732, "learning_rate": 3.896453624613785e-07, "loss": 0.2616, "step": 33581 }, { "epoch": 0.5837403744198578, "grad_norm": 1.2657486560704085, "learning_rate": 3.8961790747226883e-07, "loss": 0.3489, "step": 33582 }, { "epoch": 0.5837577569573607, "grad_norm": 2.3930901135265175, "learning_rate": 3.895904528330196e-07, "loss": 0.2846, "step": 33583 }, { "epoch": 0.5837751394948635, "grad_norm": 1.617754781124995, "learning_rate": 3.8956299854371783e-07, "loss": 0.2187, "step": 33584 }, { "epoch": 0.5837925220323663, "grad_norm": 2.1027001029807537, "learning_rate": 3.895355446044507e-07, "loss": 0.3205, "step": 33585 }, { "epoch": 0.5838099045698691, "grad_norm": 1.6708596502635387, "learning_rate": 3.895080910153051e-07, "loss": 0.1941, "step": 33586 }, { "epoch": 0.583827287107372, "grad_norm": 2.5041431725176944, "learning_rate": 3.8948063777636816e-07, "loss": 0.2304, "step": 33587 }, { "epoch": 0.5838446696448748, "grad_norm": 2.017833004140878, "learning_rate": 3.8945318488772694e-07, "loss": 0.266, "step": 33588 }, { "epoch": 0.5838620521823776, "grad_norm": 2.034273387429454, "learning_rate": 3.8942573234946805e-07, "loss": 0.2225, "step": 33589 }, { "epoch": 0.5838794347198805, "grad_norm": 1.189791834553498, "learning_rate": 3.8939828016167904e-07, "loss": 0.2479, "step": 33590 }, { "epoch": 0.5838968172573832, "grad_norm": 2.1276920539421593, "learning_rate": 3.8937082832444653e-07, "loss": 0.1384, "step": 33591 }, { "epoch": 0.583914199794886, "grad_norm": 2.4382837828112476, "learning_rate": 3.893433768378577e-07, "loss": 0.2225, "step": 33592 }, { "epoch": 0.5839315823323888, "grad_norm": 1.4739592230614962, "learning_rate": 3.893159257019996e-07, "loss": 0.2649, "step": 33593 }, { "epoch": 0.5839489648698917, "grad_norm": 3.3793373362337014, "learning_rate": 3.892884749169593e-07, "loss": 0.2424, "step": 33594 }, { "epoch": 0.5839663474073945, "grad_norm": 1.5764051294791335, "learning_rate": 3.8926102448282343e-07, "loss": 0.1916, "step": 33595 }, { "epoch": 0.5839837299448973, "grad_norm": 1.699183688829658, "learning_rate": 3.892335743996794e-07, "loss": 0.3301, "step": 33596 }, { "epoch": 0.5840011124824002, "grad_norm": 1.8851155708212777, "learning_rate": 3.892061246676139e-07, "loss": 0.1954, "step": 33597 }, { "epoch": 0.584018495019903, "grad_norm": 1.5028447016600954, "learning_rate": 3.891786752867142e-07, "loss": 0.3084, "step": 33598 }, { "epoch": 0.5840358775574058, "grad_norm": 1.2986303292844155, "learning_rate": 3.891512262570671e-07, "loss": 0.1609, "step": 33599 }, { "epoch": 0.5840532600949087, "grad_norm": 1.9637756257193788, "learning_rate": 3.891237775787598e-07, "loss": 0.3751, "step": 33600 }, { "epoch": 0.5840706426324115, "grad_norm": 1.6637077052647526, "learning_rate": 3.890963292518793e-07, "loss": 0.2196, "step": 33601 }, { "epoch": 0.5840880251699143, "grad_norm": 1.907154448177286, "learning_rate": 3.890688812765123e-07, "loss": 0.338, "step": 33602 }, { "epoch": 0.5841054077074171, "grad_norm": 2.9187162072707538, "learning_rate": 3.89041433652746e-07, "loss": 0.2309, "step": 33603 }, { "epoch": 0.58412279024492, "grad_norm": 1.0784447238846948, "learning_rate": 3.890139863806675e-07, "loss": 0.1807, "step": 33604 }, { "epoch": 0.5841401727824228, "grad_norm": 1.770637329795955, "learning_rate": 3.8898653946036345e-07, "loss": 0.2887, "step": 33605 }, { "epoch": 0.5841575553199256, "grad_norm": 1.8402344223978189, "learning_rate": 3.889590928919213e-07, "loss": 0.1633, "step": 33606 }, { "epoch": 0.5841749378574285, "grad_norm": 1.2942271886519106, "learning_rate": 3.889316466754279e-07, "loss": 0.1659, "step": 33607 }, { "epoch": 0.5841923203949313, "grad_norm": 1.3625580881076218, "learning_rate": 3.8890420081097e-07, "loss": 0.2003, "step": 33608 }, { "epoch": 0.5842097029324341, "grad_norm": 1.784322970324977, "learning_rate": 3.8887675529863465e-07, "loss": 0.3259, "step": 33609 }, { "epoch": 0.5842270854699368, "grad_norm": 1.5825286460300656, "learning_rate": 3.888493101385091e-07, "loss": 0.2386, "step": 33610 }, { "epoch": 0.5842444680074397, "grad_norm": 1.3599324017152328, "learning_rate": 3.8882186533068e-07, "loss": 0.1323, "step": 33611 }, { "epoch": 0.5842618505449425, "grad_norm": 1.1710951873258122, "learning_rate": 3.8879442087523467e-07, "loss": 0.1746, "step": 33612 }, { "epoch": 0.5842792330824453, "grad_norm": 1.8303277653488947, "learning_rate": 3.8876697677226005e-07, "loss": 0.2811, "step": 33613 }, { "epoch": 0.5842966156199482, "grad_norm": 1.123544996781806, "learning_rate": 3.8873953302184283e-07, "loss": 0.1564, "step": 33614 }, { "epoch": 0.584313998157451, "grad_norm": 2.2051849392302842, "learning_rate": 3.8871208962407026e-07, "loss": 0.2599, "step": 33615 }, { "epoch": 0.5843313806949538, "grad_norm": 2.1083201111024286, "learning_rate": 3.886846465790292e-07, "loss": 0.2307, "step": 33616 }, { "epoch": 0.5843487632324567, "grad_norm": 2.206650680844557, "learning_rate": 3.886572038868066e-07, "loss": 0.2665, "step": 33617 }, { "epoch": 0.5843661457699595, "grad_norm": 1.2336341687908305, "learning_rate": 3.886297615474897e-07, "loss": 0.1524, "step": 33618 }, { "epoch": 0.5843835283074623, "grad_norm": 1.7251719541437391, "learning_rate": 3.886023195611653e-07, "loss": 0.1957, "step": 33619 }, { "epoch": 0.5844009108449651, "grad_norm": 1.4915743457543897, "learning_rate": 3.885748779279202e-07, "loss": 0.2177, "step": 33620 }, { "epoch": 0.584418293382468, "grad_norm": 3.128554385416707, "learning_rate": 3.885474366478417e-07, "loss": 0.2643, "step": 33621 }, { "epoch": 0.5844356759199708, "grad_norm": 1.370438683364721, "learning_rate": 3.8851999572101644e-07, "loss": 0.1708, "step": 33622 }, { "epoch": 0.5844530584574736, "grad_norm": 0.973423109267555, "learning_rate": 3.8849255514753176e-07, "loss": 0.2051, "step": 33623 }, { "epoch": 0.5844704409949765, "grad_norm": 2.884274816247343, "learning_rate": 3.884651149274744e-07, "loss": 0.4077, "step": 33624 }, { "epoch": 0.5844878235324793, "grad_norm": 1.3201396494408129, "learning_rate": 3.884376750609313e-07, "loss": 0.3589, "step": 33625 }, { "epoch": 0.5845052060699821, "grad_norm": 1.6646606051492174, "learning_rate": 3.884102355479898e-07, "loss": 0.3391, "step": 33626 }, { "epoch": 0.584522588607485, "grad_norm": 1.5600472610063405, "learning_rate": 3.8838279638873637e-07, "loss": 0.1637, "step": 33627 }, { "epoch": 0.5845399711449878, "grad_norm": 1.269917286872349, "learning_rate": 3.883553575832582e-07, "loss": 0.3065, "step": 33628 }, { "epoch": 0.5845573536824906, "grad_norm": 0.9982166435534575, "learning_rate": 3.883279191316423e-07, "loss": 0.2522, "step": 33629 }, { "epoch": 0.5845747362199933, "grad_norm": 1.554810603833031, "learning_rate": 3.8830048103397555e-07, "loss": 0.1201, "step": 33630 }, { "epoch": 0.5845921187574962, "grad_norm": 1.404174221972975, "learning_rate": 3.8827304329034505e-07, "loss": 0.2106, "step": 33631 }, { "epoch": 0.584609501294999, "grad_norm": 1.6387543554220523, "learning_rate": 3.882456059008378e-07, "loss": 0.2009, "step": 33632 }, { "epoch": 0.5846268838325018, "grad_norm": 1.473641076301168, "learning_rate": 3.882181688655405e-07, "loss": 0.2988, "step": 33633 }, { "epoch": 0.5846442663700047, "grad_norm": 1.467783076125289, "learning_rate": 3.8819073218454015e-07, "loss": 0.2757, "step": 33634 }, { "epoch": 0.5846616489075075, "grad_norm": 2.2378136247342675, "learning_rate": 3.88163295857924e-07, "loss": 0.257, "step": 33635 }, { "epoch": 0.5846790314450103, "grad_norm": 1.2057761428522644, "learning_rate": 3.8813585988577856e-07, "loss": 0.1862, "step": 33636 }, { "epoch": 0.5846964139825132, "grad_norm": 1.6211202146625974, "learning_rate": 3.881084242681913e-07, "loss": 0.2769, "step": 33637 }, { "epoch": 0.584713796520016, "grad_norm": 1.0125180889545393, "learning_rate": 3.88080989005249e-07, "loss": 0.1508, "step": 33638 }, { "epoch": 0.5847311790575188, "grad_norm": 1.7638365614706313, "learning_rate": 3.8805355409703844e-07, "loss": 0.2568, "step": 33639 }, { "epoch": 0.5847485615950216, "grad_norm": 1.0156409147253636, "learning_rate": 3.880261195436466e-07, "loss": 0.3259, "step": 33640 }, { "epoch": 0.5847659441325245, "grad_norm": 1.2945848103413962, "learning_rate": 3.8799868534516066e-07, "loss": 0.2798, "step": 33641 }, { "epoch": 0.5847833266700273, "grad_norm": 2.9235210868338743, "learning_rate": 3.879712515016673e-07, "loss": 0.3038, "step": 33642 }, { "epoch": 0.5848007092075301, "grad_norm": 2.9311413028135287, "learning_rate": 3.8794381801325373e-07, "loss": 0.2586, "step": 33643 }, { "epoch": 0.584818091745033, "grad_norm": 1.206189632158292, "learning_rate": 3.879163848800067e-07, "loss": 0.2256, "step": 33644 }, { "epoch": 0.5848354742825358, "grad_norm": 1.7300830303094978, "learning_rate": 3.8788895210201334e-07, "loss": 0.2483, "step": 33645 }, { "epoch": 0.5848528568200386, "grad_norm": 1.5063721603122433, "learning_rate": 3.878615196793605e-07, "loss": 0.2567, "step": 33646 }, { "epoch": 0.5848702393575415, "grad_norm": 1.5665823672900252, "learning_rate": 3.87834087612135e-07, "loss": 0.2039, "step": 33647 }, { "epoch": 0.5848876218950443, "grad_norm": 1.423405186926166, "learning_rate": 3.8780665590042405e-07, "loss": 0.357, "step": 33648 }, { "epoch": 0.5849050044325471, "grad_norm": 1.570040243899063, "learning_rate": 3.877792245443144e-07, "loss": 0.2884, "step": 33649 }, { "epoch": 0.5849223869700498, "grad_norm": 1.6818910906836435, "learning_rate": 3.87751793543893e-07, "loss": 0.2084, "step": 33650 }, { "epoch": 0.5849397695075527, "grad_norm": 1.519122929378939, "learning_rate": 3.8772436289924705e-07, "loss": 0.1652, "step": 33651 }, { "epoch": 0.5849571520450555, "grad_norm": 1.2881030819807715, "learning_rate": 3.876969326104632e-07, "loss": 0.1427, "step": 33652 }, { "epoch": 0.5849745345825583, "grad_norm": 0.7748341495927912, "learning_rate": 3.876695026776284e-07, "loss": 0.0951, "step": 33653 }, { "epoch": 0.5849919171200612, "grad_norm": 2.8820517492450315, "learning_rate": 3.8764207310082974e-07, "loss": 0.4148, "step": 33654 }, { "epoch": 0.585009299657564, "grad_norm": 1.9785888579700466, "learning_rate": 3.8761464388015397e-07, "loss": 0.1799, "step": 33655 }, { "epoch": 0.5850266821950668, "grad_norm": 1.7314232988624405, "learning_rate": 3.875872150156883e-07, "loss": 0.2345, "step": 33656 }, { "epoch": 0.5850440647325696, "grad_norm": 1.5079664139951854, "learning_rate": 3.875597865075196e-07, "loss": 0.1936, "step": 33657 }, { "epoch": 0.5850614472700725, "grad_norm": 2.373851134987041, "learning_rate": 3.875323583557347e-07, "loss": 0.1881, "step": 33658 }, { "epoch": 0.5850788298075753, "grad_norm": 1.4346540318885128, "learning_rate": 3.8750493056042033e-07, "loss": 0.2009, "step": 33659 }, { "epoch": 0.5850962123450781, "grad_norm": 1.4024328228297676, "learning_rate": 3.874775031216638e-07, "loss": 0.3783, "step": 33660 }, { "epoch": 0.585113594882581, "grad_norm": 1.5021268540221393, "learning_rate": 3.874500760395519e-07, "loss": 0.3135, "step": 33661 }, { "epoch": 0.5851309774200838, "grad_norm": 0.8928033707631733, "learning_rate": 3.8742264931417155e-07, "loss": 0.2945, "step": 33662 }, { "epoch": 0.5851483599575866, "grad_norm": 1.4217300756101976, "learning_rate": 3.873952229456096e-07, "loss": 0.1826, "step": 33663 }, { "epoch": 0.5851657424950895, "grad_norm": 1.5685418709370338, "learning_rate": 3.873677969339534e-07, "loss": 0.2036, "step": 33664 }, { "epoch": 0.5851831250325923, "grad_norm": 3.037691786660493, "learning_rate": 3.873403712792892e-07, "loss": 0.4552, "step": 33665 }, { "epoch": 0.5852005075700951, "grad_norm": 1.0857075388803603, "learning_rate": 3.8731294598170444e-07, "loss": 0.1666, "step": 33666 }, { "epoch": 0.585217890107598, "grad_norm": 1.2635547274239654, "learning_rate": 3.872855210412857e-07, "loss": 0.1579, "step": 33667 }, { "epoch": 0.5852352726451008, "grad_norm": 0.8896455772017576, "learning_rate": 3.872580964581202e-07, "loss": 0.2095, "step": 33668 }, { "epoch": 0.5852526551826036, "grad_norm": 2.084015130602804, "learning_rate": 3.8723067223229467e-07, "loss": 0.2303, "step": 33669 }, { "epoch": 0.5852700377201063, "grad_norm": 1.4975224500732145, "learning_rate": 3.8720324836389626e-07, "loss": 0.2929, "step": 33670 }, { "epoch": 0.5852874202576092, "grad_norm": 1.147419812220668, "learning_rate": 3.871758248530118e-07, "loss": 0.2913, "step": 33671 }, { "epoch": 0.585304802795112, "grad_norm": 1.2368187582420143, "learning_rate": 3.87148401699728e-07, "loss": 0.1481, "step": 33672 }, { "epoch": 0.5853221853326148, "grad_norm": 2.0296034005624786, "learning_rate": 3.871209789041318e-07, "loss": 0.319, "step": 33673 }, { "epoch": 0.5853395678701176, "grad_norm": 1.599278915061315, "learning_rate": 3.870935564663104e-07, "loss": 0.2341, "step": 33674 }, { "epoch": 0.5853569504076205, "grad_norm": 1.4756404010425208, "learning_rate": 3.8706613438635046e-07, "loss": 0.2744, "step": 33675 }, { "epoch": 0.5853743329451233, "grad_norm": 1.0995504896936859, "learning_rate": 3.870387126643392e-07, "loss": 0.5013, "step": 33676 }, { "epoch": 0.5853917154826261, "grad_norm": 1.909040774485705, "learning_rate": 3.870112913003633e-07, "loss": 0.4179, "step": 33677 }, { "epoch": 0.585409098020129, "grad_norm": 1.3076670165619593, "learning_rate": 3.869838702945094e-07, "loss": 0.3204, "step": 33678 }, { "epoch": 0.5854264805576318, "grad_norm": 1.4560283573989299, "learning_rate": 3.86956449646865e-07, "loss": 0.1876, "step": 33679 }, { "epoch": 0.5854438630951346, "grad_norm": 1.4240062674664031, "learning_rate": 3.8692902935751663e-07, "loss": 0.2103, "step": 33680 }, { "epoch": 0.5854612456326375, "grad_norm": 0.9223562056455118, "learning_rate": 3.8690160942655125e-07, "loss": 0.116, "step": 33681 }, { "epoch": 0.5854786281701403, "grad_norm": 1.4696084303972388, "learning_rate": 3.868741898540559e-07, "loss": 0.1759, "step": 33682 }, { "epoch": 0.5854960107076431, "grad_norm": 1.2523033135255004, "learning_rate": 3.868467706401175e-07, "loss": 0.1674, "step": 33683 }, { "epoch": 0.585513393245146, "grad_norm": 1.8798440202360291, "learning_rate": 3.8681935178482266e-07, "loss": 0.3585, "step": 33684 }, { "epoch": 0.5855307757826488, "grad_norm": 1.2604245658714694, "learning_rate": 3.867919332882586e-07, "loss": 0.2619, "step": 33685 }, { "epoch": 0.5855481583201516, "grad_norm": 1.9932563380292647, "learning_rate": 3.86764515150512e-07, "loss": 0.2358, "step": 33686 }, { "epoch": 0.5855655408576544, "grad_norm": 2.5784308331729764, "learning_rate": 3.8673709737166994e-07, "loss": 0.3354, "step": 33687 }, { "epoch": 0.5855829233951573, "grad_norm": 2.165973132613366, "learning_rate": 3.867096799518192e-07, "loss": 0.2613, "step": 33688 }, { "epoch": 0.5856003059326601, "grad_norm": 1.5782456622954455, "learning_rate": 3.86682262891047e-07, "loss": 0.1741, "step": 33689 }, { "epoch": 0.5856176884701628, "grad_norm": 1.0589858371014, "learning_rate": 3.8665484618943965e-07, "loss": 0.2257, "step": 33690 }, { "epoch": 0.5856350710076657, "grad_norm": 1.4721153596492538, "learning_rate": 3.866274298470845e-07, "loss": 0.2703, "step": 33691 }, { "epoch": 0.5856524535451685, "grad_norm": 1.6616404240448206, "learning_rate": 3.866000138640683e-07, "loss": 0.2975, "step": 33692 }, { "epoch": 0.5856698360826713, "grad_norm": 1.6104601216951757, "learning_rate": 3.8657259824047795e-07, "loss": 0.2219, "step": 33693 }, { "epoch": 0.5856872186201741, "grad_norm": 3.4842760236764496, "learning_rate": 3.8654518297640026e-07, "loss": 0.2245, "step": 33694 }, { "epoch": 0.585704601157677, "grad_norm": 2.095389376912515, "learning_rate": 3.8651776807192247e-07, "loss": 0.3411, "step": 33695 }, { "epoch": 0.5857219836951798, "grad_norm": 2.8810561811013113, "learning_rate": 3.864903535271312e-07, "loss": 0.3808, "step": 33696 }, { "epoch": 0.5857393662326826, "grad_norm": 2.428631279227502, "learning_rate": 3.864629393421133e-07, "loss": 0.2884, "step": 33697 }, { "epoch": 0.5857567487701855, "grad_norm": 1.8985879107630022, "learning_rate": 3.8643552551695557e-07, "loss": 0.211, "step": 33698 }, { "epoch": 0.5857741313076883, "grad_norm": 0.9387734104515012, "learning_rate": 3.8640811205174515e-07, "loss": 0.2632, "step": 33699 }, { "epoch": 0.5857915138451911, "grad_norm": 1.7361657908234676, "learning_rate": 3.863806989465688e-07, "loss": 0.2281, "step": 33700 }, { "epoch": 0.585808896382694, "grad_norm": 2.079086879386904, "learning_rate": 3.863532862015135e-07, "loss": 0.2618, "step": 33701 }, { "epoch": 0.5858262789201968, "grad_norm": 1.636018984394455, "learning_rate": 3.8632587381666624e-07, "loss": 0.1866, "step": 33702 }, { "epoch": 0.5858436614576996, "grad_norm": 1.0567091922898728, "learning_rate": 3.862984617921135e-07, "loss": 0.2467, "step": 33703 }, { "epoch": 0.5858610439952024, "grad_norm": 2.442038527866981, "learning_rate": 3.862710501279425e-07, "loss": 0.4881, "step": 33704 }, { "epoch": 0.5858784265327053, "grad_norm": 1.649064212917483, "learning_rate": 3.8624363882424004e-07, "loss": 0.1517, "step": 33705 }, { "epoch": 0.5858958090702081, "grad_norm": 1.5569625264207818, "learning_rate": 3.862162278810929e-07, "loss": 0.1937, "step": 33706 }, { "epoch": 0.5859131916077109, "grad_norm": 1.9048477649173585, "learning_rate": 3.861888172985881e-07, "loss": 0.3781, "step": 33707 }, { "epoch": 0.5859305741452138, "grad_norm": 1.9207357706977608, "learning_rate": 3.861614070768127e-07, "loss": 0.1598, "step": 33708 }, { "epoch": 0.5859479566827166, "grad_norm": 1.2455215640348825, "learning_rate": 3.86133997215853e-07, "loss": 0.2988, "step": 33709 }, { "epoch": 0.5859653392202193, "grad_norm": 1.4644498404032817, "learning_rate": 3.861065877157963e-07, "loss": 0.2291, "step": 33710 }, { "epoch": 0.5859827217577221, "grad_norm": 1.6747536564317718, "learning_rate": 3.8607917857672944e-07, "loss": 0.2217, "step": 33711 }, { "epoch": 0.586000104295225, "grad_norm": 2.3157779067768614, "learning_rate": 3.8605176979873926e-07, "loss": 0.363, "step": 33712 }, { "epoch": 0.5860174868327278, "grad_norm": 1.3029513826958485, "learning_rate": 3.860243613819127e-07, "loss": 0.1716, "step": 33713 }, { "epoch": 0.5860348693702306, "grad_norm": 1.7569811651084957, "learning_rate": 3.859969533263366e-07, "loss": 0.252, "step": 33714 }, { "epoch": 0.5860522519077335, "grad_norm": 1.023904743033355, "learning_rate": 3.859695456320976e-07, "loss": 0.27, "step": 33715 }, { "epoch": 0.5860696344452363, "grad_norm": 1.2326795018970755, "learning_rate": 3.8594213829928284e-07, "loss": 0.3086, "step": 33716 }, { "epoch": 0.5860870169827391, "grad_norm": 1.435790009463431, "learning_rate": 3.8591473132797905e-07, "loss": 0.1598, "step": 33717 }, { "epoch": 0.586104399520242, "grad_norm": 2.2168661179730877, "learning_rate": 3.8588732471827325e-07, "loss": 0.2127, "step": 33718 }, { "epoch": 0.5861217820577448, "grad_norm": 1.5556809155468545, "learning_rate": 3.8585991847025206e-07, "loss": 0.206, "step": 33719 }, { "epoch": 0.5861391645952476, "grad_norm": 1.3341925092398232, "learning_rate": 3.858325125840026e-07, "loss": 0.2151, "step": 33720 }, { "epoch": 0.5861565471327504, "grad_norm": 1.3226133391242616, "learning_rate": 3.8580510705961177e-07, "loss": 0.2236, "step": 33721 }, { "epoch": 0.5861739296702533, "grad_norm": 1.3580249453090332, "learning_rate": 3.857777018971662e-07, "loss": 0.2398, "step": 33722 }, { "epoch": 0.5861913122077561, "grad_norm": 1.5289765008902596, "learning_rate": 3.857502970967527e-07, "loss": 0.3628, "step": 33723 }, { "epoch": 0.5862086947452589, "grad_norm": 1.4204374611258692, "learning_rate": 3.857228926584585e-07, "loss": 0.1893, "step": 33724 }, { "epoch": 0.5862260772827618, "grad_norm": 3.0908550482628843, "learning_rate": 3.8569548858237e-07, "loss": 0.6509, "step": 33725 }, { "epoch": 0.5862434598202646, "grad_norm": 1.7653336808612934, "learning_rate": 3.856680848685744e-07, "loss": 0.3078, "step": 33726 }, { "epoch": 0.5862608423577674, "grad_norm": 1.6150345908126307, "learning_rate": 3.856406815171586e-07, "loss": 0.2662, "step": 33727 }, { "epoch": 0.5862782248952703, "grad_norm": 3.638669112423605, "learning_rate": 3.856132785282092e-07, "loss": 0.3586, "step": 33728 }, { "epoch": 0.5862956074327731, "grad_norm": 1.3934068325751225, "learning_rate": 3.855858759018131e-07, "loss": 0.2739, "step": 33729 }, { "epoch": 0.5863129899702758, "grad_norm": 2.110794093032897, "learning_rate": 3.8555847363805725e-07, "loss": 0.1856, "step": 33730 }, { "epoch": 0.5863303725077786, "grad_norm": 1.2580217590949525, "learning_rate": 3.8553107173702836e-07, "loss": 0.3731, "step": 33731 }, { "epoch": 0.5863477550452815, "grad_norm": 1.963227243151213, "learning_rate": 3.8550367019881354e-07, "loss": 0.2815, "step": 33732 }, { "epoch": 0.5863651375827843, "grad_norm": 2.0774523727384024, "learning_rate": 3.854762690234996e-07, "loss": 0.2652, "step": 33733 }, { "epoch": 0.5863825201202871, "grad_norm": 2.137998462206635, "learning_rate": 3.85448868211173e-07, "loss": 0.1284, "step": 33734 }, { "epoch": 0.58639990265779, "grad_norm": 1.383457765825349, "learning_rate": 3.85421467761921e-07, "loss": 0.3092, "step": 33735 }, { "epoch": 0.5864172851952928, "grad_norm": 1.1309223589847197, "learning_rate": 3.8539406767583025e-07, "loss": 0.1514, "step": 33736 }, { "epoch": 0.5864346677327956, "grad_norm": 1.3097687770485673, "learning_rate": 3.8536666795298776e-07, "loss": 0.1938, "step": 33737 }, { "epoch": 0.5864520502702985, "grad_norm": 1.173782925121631, "learning_rate": 3.853392685934803e-07, "loss": 0.1742, "step": 33738 }, { "epoch": 0.5864694328078013, "grad_norm": 2.0023227354532156, "learning_rate": 3.853118695973947e-07, "loss": 0.2465, "step": 33739 }, { "epoch": 0.5864868153453041, "grad_norm": 1.2860358955204918, "learning_rate": 3.852844709648176e-07, "loss": 0.262, "step": 33740 }, { "epoch": 0.5865041978828069, "grad_norm": 1.2591760190145829, "learning_rate": 3.852570726958362e-07, "loss": 0.2281, "step": 33741 }, { "epoch": 0.5865215804203098, "grad_norm": 1.4587108188902314, "learning_rate": 3.85229674790537e-07, "loss": 0.1905, "step": 33742 }, { "epoch": 0.5865389629578126, "grad_norm": 3.269544296712661, "learning_rate": 3.852022772490072e-07, "loss": 0.2169, "step": 33743 }, { "epoch": 0.5865563454953154, "grad_norm": 1.1301386753230729, "learning_rate": 3.8517488007133327e-07, "loss": 0.2389, "step": 33744 }, { "epoch": 0.5865737280328183, "grad_norm": 1.5612330543497357, "learning_rate": 3.8514748325760237e-07, "loss": 0.2164, "step": 33745 }, { "epoch": 0.5865911105703211, "grad_norm": 2.2488699030057235, "learning_rate": 3.8512008680790123e-07, "loss": 0.2311, "step": 33746 }, { "epoch": 0.5866084931078239, "grad_norm": 1.131104865840432, "learning_rate": 3.850926907223166e-07, "loss": 0.265, "step": 33747 }, { "epoch": 0.5866258756453268, "grad_norm": 2.6690356818153056, "learning_rate": 3.850652950009352e-07, "loss": 0.3221, "step": 33748 }, { "epoch": 0.5866432581828295, "grad_norm": 2.549882346135348, "learning_rate": 3.850378996438442e-07, "loss": 0.2409, "step": 33749 }, { "epoch": 0.5866606407203323, "grad_norm": 1.5919134449670582, "learning_rate": 3.850105046511301e-07, "loss": 0.2073, "step": 33750 }, { "epoch": 0.5866780232578351, "grad_norm": 1.459959729101925, "learning_rate": 3.8498311002287995e-07, "loss": 0.1534, "step": 33751 }, { "epoch": 0.586695405795338, "grad_norm": 1.7781456979322687, "learning_rate": 3.849557157591807e-07, "loss": 0.2326, "step": 33752 }, { "epoch": 0.5867127883328408, "grad_norm": 2.0491437663442666, "learning_rate": 3.849283218601188e-07, "loss": 0.2501, "step": 33753 }, { "epoch": 0.5867301708703436, "grad_norm": 1.284544941948143, "learning_rate": 3.8490092832578114e-07, "loss": 0.1702, "step": 33754 }, { "epoch": 0.5867475534078465, "grad_norm": 1.396349776489245, "learning_rate": 3.8487353515625486e-07, "loss": 0.2546, "step": 33755 }, { "epoch": 0.5867649359453493, "grad_norm": 1.6156838255220747, "learning_rate": 3.848461423516264e-07, "loss": 0.2786, "step": 33756 }, { "epoch": 0.5867823184828521, "grad_norm": 2.3917602812984207, "learning_rate": 3.8481874991198295e-07, "loss": 0.2066, "step": 33757 }, { "epoch": 0.586799701020355, "grad_norm": 1.916346650580835, "learning_rate": 3.847913578374112e-07, "loss": 0.2024, "step": 33758 }, { "epoch": 0.5868170835578578, "grad_norm": 2.1720811078896944, "learning_rate": 3.8476396612799774e-07, "loss": 0.2672, "step": 33759 }, { "epoch": 0.5868344660953606, "grad_norm": 1.4579192057192456, "learning_rate": 3.8473657478382963e-07, "loss": 0.262, "step": 33760 }, { "epoch": 0.5868518486328634, "grad_norm": 1.362754693392396, "learning_rate": 3.8470918380499365e-07, "loss": 0.5284, "step": 33761 }, { "epoch": 0.5868692311703663, "grad_norm": 1.7665028663203088, "learning_rate": 3.846817931915766e-07, "loss": 0.2438, "step": 33762 }, { "epoch": 0.5868866137078691, "grad_norm": 2.5579293874618987, "learning_rate": 3.8465440294366525e-07, "loss": 0.217, "step": 33763 }, { "epoch": 0.5869039962453719, "grad_norm": 2.0456550668949354, "learning_rate": 3.8462701306134635e-07, "loss": 0.2698, "step": 33764 }, { "epoch": 0.5869213787828748, "grad_norm": 1.4482426661270251, "learning_rate": 3.8459962354470714e-07, "loss": 0.2145, "step": 33765 }, { "epoch": 0.5869387613203776, "grad_norm": 1.568786612665355, "learning_rate": 3.8457223439383396e-07, "loss": 0.2156, "step": 33766 }, { "epoch": 0.5869561438578804, "grad_norm": 2.3001794971683567, "learning_rate": 3.845448456088137e-07, "loss": 0.2403, "step": 33767 }, { "epoch": 0.5869735263953832, "grad_norm": 1.689127985415042, "learning_rate": 3.8451745718973325e-07, "loss": 0.2836, "step": 33768 }, { "epoch": 0.586990908932886, "grad_norm": 1.7991388728576208, "learning_rate": 3.844900691366795e-07, "loss": 0.2025, "step": 33769 }, { "epoch": 0.5870082914703888, "grad_norm": 2.2141191246428593, "learning_rate": 3.8446268144973905e-07, "loss": 0.3023, "step": 33770 }, { "epoch": 0.5870256740078916, "grad_norm": 1.7865458319053156, "learning_rate": 3.84435294128999e-07, "loss": 0.1859, "step": 33771 }, { "epoch": 0.5870430565453945, "grad_norm": 1.4354856768229958, "learning_rate": 3.84407907174546e-07, "loss": 0.1746, "step": 33772 }, { "epoch": 0.5870604390828973, "grad_norm": 1.8738171149575438, "learning_rate": 3.843805205864666e-07, "loss": 0.2186, "step": 33773 }, { "epoch": 0.5870778216204001, "grad_norm": 1.8256245435241187, "learning_rate": 3.8435313436484796e-07, "loss": 0.1541, "step": 33774 }, { "epoch": 0.587095204157903, "grad_norm": 1.6342838818660588, "learning_rate": 3.8432574850977665e-07, "loss": 0.1891, "step": 33775 }, { "epoch": 0.5871125866954058, "grad_norm": 1.7080317511502898, "learning_rate": 3.8429836302133974e-07, "loss": 0.2346, "step": 33776 }, { "epoch": 0.5871299692329086, "grad_norm": 3.6162319017552136, "learning_rate": 3.8427097789962394e-07, "loss": 0.3224, "step": 33777 }, { "epoch": 0.5871473517704114, "grad_norm": 2.576155052184939, "learning_rate": 3.8424359314471586e-07, "loss": 0.3128, "step": 33778 }, { "epoch": 0.5871647343079143, "grad_norm": 1.1740971419572077, "learning_rate": 3.8421620875670233e-07, "loss": 0.1896, "step": 33779 }, { "epoch": 0.5871821168454171, "grad_norm": 3.04783734379819, "learning_rate": 3.841888247356703e-07, "loss": 0.3665, "step": 33780 }, { "epoch": 0.5871994993829199, "grad_norm": 1.5866034229372574, "learning_rate": 3.8416144108170645e-07, "loss": 0.1656, "step": 33781 }, { "epoch": 0.5872168819204228, "grad_norm": 1.1068160628878252, "learning_rate": 3.8413405779489774e-07, "loss": 0.2644, "step": 33782 }, { "epoch": 0.5872342644579256, "grad_norm": 1.6151146323675838, "learning_rate": 3.8410667487533065e-07, "loss": 0.216, "step": 33783 }, { "epoch": 0.5872516469954284, "grad_norm": 1.4237055942593746, "learning_rate": 3.8407929232309247e-07, "loss": 0.2608, "step": 33784 }, { "epoch": 0.5872690295329313, "grad_norm": 2.610735892524966, "learning_rate": 3.8405191013826955e-07, "loss": 0.2058, "step": 33785 }, { "epoch": 0.5872864120704341, "grad_norm": 1.235373188569449, "learning_rate": 3.8402452832094876e-07, "loss": 0.246, "step": 33786 }, { "epoch": 0.5873037946079369, "grad_norm": 1.2857278582349654, "learning_rate": 3.8399714687121687e-07, "loss": 0.172, "step": 33787 }, { "epoch": 0.5873211771454397, "grad_norm": 0.9628092547626552, "learning_rate": 3.8396976578916086e-07, "loss": 0.2698, "step": 33788 }, { "epoch": 0.5873385596829425, "grad_norm": 0.8353856942383884, "learning_rate": 3.8394238507486724e-07, "loss": 0.1923, "step": 33789 }, { "epoch": 0.5873559422204453, "grad_norm": 1.0035642216500615, "learning_rate": 3.839150047284232e-07, "loss": 0.1084, "step": 33790 }, { "epoch": 0.5873733247579481, "grad_norm": 1.9054260507502834, "learning_rate": 3.8388762474991516e-07, "loss": 0.196, "step": 33791 }, { "epoch": 0.587390707295451, "grad_norm": 1.8589225111952283, "learning_rate": 3.8386024513942987e-07, "loss": 0.2763, "step": 33792 }, { "epoch": 0.5874080898329538, "grad_norm": 1.694613762538506, "learning_rate": 3.8383286589705446e-07, "loss": 0.2462, "step": 33793 }, { "epoch": 0.5874254723704566, "grad_norm": 1.8315515350601608, "learning_rate": 3.838054870228754e-07, "loss": 0.2279, "step": 33794 }, { "epoch": 0.5874428549079594, "grad_norm": 1.434168771186972, "learning_rate": 3.8377810851697944e-07, "loss": 0.2889, "step": 33795 }, { "epoch": 0.5874602374454623, "grad_norm": 2.2209921252950617, "learning_rate": 3.8375073037945375e-07, "loss": 0.3122, "step": 33796 }, { "epoch": 0.5874776199829651, "grad_norm": 4.992861915462976, "learning_rate": 3.837233526103848e-07, "loss": 0.3027, "step": 33797 }, { "epoch": 0.5874950025204679, "grad_norm": 2.362065679494879, "learning_rate": 3.836959752098592e-07, "loss": 0.2563, "step": 33798 }, { "epoch": 0.5875123850579708, "grad_norm": 4.960301407383833, "learning_rate": 3.836685981779641e-07, "loss": 0.3328, "step": 33799 }, { "epoch": 0.5875297675954736, "grad_norm": 1.3527422148869315, "learning_rate": 3.8364122151478596e-07, "loss": 0.2002, "step": 33800 }, { "epoch": 0.5875471501329764, "grad_norm": 1.0422691786451848, "learning_rate": 3.8361384522041185e-07, "loss": 0.2145, "step": 33801 }, { "epoch": 0.5875645326704793, "grad_norm": 0.768706646163653, "learning_rate": 3.835864692949283e-07, "loss": 0.3381, "step": 33802 }, { "epoch": 0.5875819152079821, "grad_norm": 1.7855836853789273, "learning_rate": 3.835590937384224e-07, "loss": 0.3208, "step": 33803 }, { "epoch": 0.5875992977454849, "grad_norm": 2.0516853152989283, "learning_rate": 3.835317185509804e-07, "loss": 0.4639, "step": 33804 }, { "epoch": 0.5876166802829877, "grad_norm": 1.343762516344479, "learning_rate": 3.8350434373268947e-07, "loss": 0.2672, "step": 33805 }, { "epoch": 0.5876340628204906, "grad_norm": 1.2924284020008403, "learning_rate": 3.8347696928363614e-07, "loss": 0.1461, "step": 33806 }, { "epoch": 0.5876514453579934, "grad_norm": 1.259192847693021, "learning_rate": 3.8344959520390743e-07, "loss": 0.2135, "step": 33807 }, { "epoch": 0.5876688278954962, "grad_norm": 1.0996020801267914, "learning_rate": 3.834222214935898e-07, "loss": 0.3532, "step": 33808 }, { "epoch": 0.587686210432999, "grad_norm": 1.2397237706804152, "learning_rate": 3.833948481527705e-07, "loss": 0.2832, "step": 33809 }, { "epoch": 0.5877035929705018, "grad_norm": 1.2899265399737878, "learning_rate": 3.8336747518153565e-07, "loss": 0.2379, "step": 33810 }, { "epoch": 0.5877209755080046, "grad_norm": 1.8781967130436075, "learning_rate": 3.833401025799725e-07, "loss": 0.3194, "step": 33811 }, { "epoch": 0.5877383580455074, "grad_norm": 1.9213662511746241, "learning_rate": 3.8331273034816745e-07, "loss": 0.3394, "step": 33812 }, { "epoch": 0.5877557405830103, "grad_norm": 1.5297216670720186, "learning_rate": 3.8328535848620764e-07, "loss": 0.3419, "step": 33813 }, { "epoch": 0.5877731231205131, "grad_norm": 1.5279945425556332, "learning_rate": 3.8325798699417946e-07, "loss": 0.3594, "step": 33814 }, { "epoch": 0.5877905056580159, "grad_norm": 1.2347472905822827, "learning_rate": 3.8323061587217006e-07, "loss": 0.1561, "step": 33815 }, { "epoch": 0.5878078881955188, "grad_norm": 1.3748080679891805, "learning_rate": 3.832032451202659e-07, "loss": 0.1836, "step": 33816 }, { "epoch": 0.5878252707330216, "grad_norm": 1.699117262295531, "learning_rate": 3.8317587473855374e-07, "loss": 0.315, "step": 33817 }, { "epoch": 0.5878426532705244, "grad_norm": 1.2288073277848268, "learning_rate": 3.831485047271203e-07, "loss": 0.3481, "step": 33818 }, { "epoch": 0.5878600358080273, "grad_norm": 1.314803561747789, "learning_rate": 3.831211350860525e-07, "loss": 0.1552, "step": 33819 }, { "epoch": 0.5878774183455301, "grad_norm": 1.1895208471698777, "learning_rate": 3.8309376581543695e-07, "loss": 0.3388, "step": 33820 }, { "epoch": 0.5878948008830329, "grad_norm": 3.2526940226512693, "learning_rate": 3.8306639691536056e-07, "loss": 0.3023, "step": 33821 }, { "epoch": 0.5879121834205357, "grad_norm": 2.4515061785007064, "learning_rate": 3.8303902838591005e-07, "loss": 0.2681, "step": 33822 }, { "epoch": 0.5879295659580386, "grad_norm": 1.2337895019970109, "learning_rate": 3.830116602271719e-07, "loss": 0.3287, "step": 33823 }, { "epoch": 0.5879469484955414, "grad_norm": 1.6408839677579625, "learning_rate": 3.829842924392332e-07, "loss": 0.1515, "step": 33824 }, { "epoch": 0.5879643310330442, "grad_norm": 2.8224136670776985, "learning_rate": 3.829569250221804e-07, "loss": 0.4891, "step": 33825 }, { "epoch": 0.5879817135705471, "grad_norm": 2.0817951013645035, "learning_rate": 3.829295579761005e-07, "loss": 0.2458, "step": 33826 }, { "epoch": 0.5879990961080499, "grad_norm": 2.342982187041611, "learning_rate": 3.829021913010801e-07, "loss": 0.1461, "step": 33827 }, { "epoch": 0.5880164786455527, "grad_norm": 1.805678396103955, "learning_rate": 3.828748249972061e-07, "loss": 0.1979, "step": 33828 }, { "epoch": 0.5880338611830555, "grad_norm": 1.9592573307703434, "learning_rate": 3.8284745906456483e-07, "loss": 0.2623, "step": 33829 }, { "epoch": 0.5880512437205583, "grad_norm": 1.8371557201943916, "learning_rate": 3.8282009350324345e-07, "loss": 0.3029, "step": 33830 }, { "epoch": 0.5880686262580611, "grad_norm": 1.784645957232138, "learning_rate": 3.8279272831332844e-07, "loss": 0.2272, "step": 33831 }, { "epoch": 0.5880860087955639, "grad_norm": 1.31691903393663, "learning_rate": 3.8276536349490673e-07, "loss": 0.4464, "step": 33832 }, { "epoch": 0.5881033913330668, "grad_norm": 1.2800692218933822, "learning_rate": 3.827379990480649e-07, "loss": 0.15, "step": 33833 }, { "epoch": 0.5881207738705696, "grad_norm": 1.9634681007516246, "learning_rate": 3.8271063497289e-07, "loss": 0.2512, "step": 33834 }, { "epoch": 0.5881381564080724, "grad_norm": 1.2216287652226039, "learning_rate": 3.826832712694682e-07, "loss": 0.2142, "step": 33835 }, { "epoch": 0.5881555389455753, "grad_norm": 1.1430925847609306, "learning_rate": 3.826559079378867e-07, "loss": 0.1396, "step": 33836 }, { "epoch": 0.5881729214830781, "grad_norm": 0.9105382782936253, "learning_rate": 3.8262854497823194e-07, "loss": 0.2449, "step": 33837 }, { "epoch": 0.5881903040205809, "grad_norm": 2.2449359901315082, "learning_rate": 3.8260118239059087e-07, "loss": 0.2244, "step": 33838 }, { "epoch": 0.5882076865580838, "grad_norm": 1.425821002778394, "learning_rate": 3.8257382017505e-07, "loss": 0.2645, "step": 33839 }, { "epoch": 0.5882250690955866, "grad_norm": 1.750158490693816, "learning_rate": 3.825464583316963e-07, "loss": 0.1822, "step": 33840 }, { "epoch": 0.5882424516330894, "grad_norm": 2.2836199385933984, "learning_rate": 3.825190968606165e-07, "loss": 0.23, "step": 33841 }, { "epoch": 0.5882598341705922, "grad_norm": 1.9189185653826133, "learning_rate": 3.8249173576189706e-07, "loss": 0.2911, "step": 33842 }, { "epoch": 0.5882772167080951, "grad_norm": 1.2907927618029507, "learning_rate": 3.8246437503562476e-07, "loss": 0.1811, "step": 33843 }, { "epoch": 0.5882945992455979, "grad_norm": 2.188575568186196, "learning_rate": 3.824370146818865e-07, "loss": 0.2569, "step": 33844 }, { "epoch": 0.5883119817831007, "grad_norm": 1.3280012874703555, "learning_rate": 3.824096547007688e-07, "loss": 0.3023, "step": 33845 }, { "epoch": 0.5883293643206036, "grad_norm": 0.7811354762856456, "learning_rate": 3.8238229509235854e-07, "loss": 0.2274, "step": 33846 }, { "epoch": 0.5883467468581064, "grad_norm": 1.6983190137760176, "learning_rate": 3.823549358567426e-07, "loss": 0.4174, "step": 33847 }, { "epoch": 0.5883641293956092, "grad_norm": 1.3098934436178367, "learning_rate": 3.823275769940071e-07, "loss": 0.1818, "step": 33848 }, { "epoch": 0.588381511933112, "grad_norm": 4.036526975165713, "learning_rate": 3.823002185042393e-07, "loss": 0.2343, "step": 33849 }, { "epoch": 0.5883988944706148, "grad_norm": 1.2064163980846934, "learning_rate": 3.8227286038752577e-07, "loss": 0.1198, "step": 33850 }, { "epoch": 0.5884162770081176, "grad_norm": 1.4396026811338576, "learning_rate": 3.8224550264395305e-07, "loss": 0.3681, "step": 33851 }, { "epoch": 0.5884336595456204, "grad_norm": 1.3578403778435513, "learning_rate": 3.8221814527360814e-07, "loss": 0.4351, "step": 33852 }, { "epoch": 0.5884510420831233, "grad_norm": 1.2901245611013967, "learning_rate": 3.821907882765777e-07, "loss": 0.1885, "step": 33853 }, { "epoch": 0.5884684246206261, "grad_norm": 1.374848885793426, "learning_rate": 3.821634316529481e-07, "loss": 0.2021, "step": 33854 }, { "epoch": 0.5884858071581289, "grad_norm": 1.577293574316693, "learning_rate": 3.821360754028064e-07, "loss": 0.1941, "step": 33855 }, { "epoch": 0.5885031896956318, "grad_norm": 1.569416398351278, "learning_rate": 3.821087195262391e-07, "loss": 0.2022, "step": 33856 }, { "epoch": 0.5885205722331346, "grad_norm": 1.8866135253169356, "learning_rate": 3.820813640233331e-07, "loss": 0.1633, "step": 33857 }, { "epoch": 0.5885379547706374, "grad_norm": 1.0655965367633375, "learning_rate": 3.8205400889417503e-07, "loss": 0.1394, "step": 33858 }, { "epoch": 0.5885553373081402, "grad_norm": 1.4829713285886759, "learning_rate": 3.820266541388517e-07, "loss": 0.3206, "step": 33859 }, { "epoch": 0.5885727198456431, "grad_norm": 1.7104103802606736, "learning_rate": 3.819992997574494e-07, "loss": 0.2408, "step": 33860 }, { "epoch": 0.5885901023831459, "grad_norm": 1.459935416005104, "learning_rate": 3.8197194575005534e-07, "loss": 0.1622, "step": 33861 }, { "epoch": 0.5886074849206487, "grad_norm": 2.3481055437629306, "learning_rate": 3.819445921167558e-07, "loss": 0.1777, "step": 33862 }, { "epoch": 0.5886248674581516, "grad_norm": 1.4208653588776756, "learning_rate": 3.8191723885763775e-07, "loss": 0.1311, "step": 33863 }, { "epoch": 0.5886422499956544, "grad_norm": 2.034182805300979, "learning_rate": 3.8188988597278774e-07, "loss": 0.2883, "step": 33864 }, { "epoch": 0.5886596325331572, "grad_norm": 1.3363664976395908, "learning_rate": 3.8186253346229267e-07, "loss": 0.2858, "step": 33865 }, { "epoch": 0.5886770150706601, "grad_norm": 1.4321060208776797, "learning_rate": 3.818351813262392e-07, "loss": 0.2763, "step": 33866 }, { "epoch": 0.5886943976081629, "grad_norm": 1.4268388299598924, "learning_rate": 3.818078295647138e-07, "loss": 0.2418, "step": 33867 }, { "epoch": 0.5887117801456657, "grad_norm": 1.1021527182352346, "learning_rate": 3.817804781778032e-07, "loss": 0.3886, "step": 33868 }, { "epoch": 0.5887291626831684, "grad_norm": 1.723301901189183, "learning_rate": 3.8175312716559425e-07, "loss": 0.1775, "step": 33869 }, { "epoch": 0.5887465452206713, "grad_norm": 1.077747925037507, "learning_rate": 3.8172577652817353e-07, "loss": 0.2536, "step": 33870 }, { "epoch": 0.5887639277581741, "grad_norm": 2.5067091171911273, "learning_rate": 3.816984262656278e-07, "loss": 0.2753, "step": 33871 }, { "epoch": 0.5887813102956769, "grad_norm": 1.8672953217962776, "learning_rate": 3.8167107637804387e-07, "loss": 0.1913, "step": 33872 }, { "epoch": 0.5887986928331798, "grad_norm": 1.1718069002762264, "learning_rate": 3.816437268655081e-07, "loss": 0.1917, "step": 33873 }, { "epoch": 0.5888160753706826, "grad_norm": 2.2275658322759164, "learning_rate": 3.816163777281073e-07, "loss": 0.3878, "step": 33874 }, { "epoch": 0.5888334579081854, "grad_norm": 1.7293411800539236, "learning_rate": 3.815890289659283e-07, "loss": 0.2632, "step": 33875 }, { "epoch": 0.5888508404456883, "grad_norm": 1.501673584722176, "learning_rate": 3.815616805790576e-07, "loss": 0.2972, "step": 33876 }, { "epoch": 0.5888682229831911, "grad_norm": 1.2938138807766646, "learning_rate": 3.8153433256758205e-07, "loss": 0.2587, "step": 33877 }, { "epoch": 0.5888856055206939, "grad_norm": 1.0858694015356412, "learning_rate": 3.8150698493158833e-07, "loss": 0.3075, "step": 33878 }, { "epoch": 0.5889029880581967, "grad_norm": 1.4588566081701784, "learning_rate": 3.814796376711628e-07, "loss": 0.2005, "step": 33879 }, { "epoch": 0.5889203705956996, "grad_norm": 1.3853787545099725, "learning_rate": 3.8145229078639255e-07, "loss": 0.3206, "step": 33880 }, { "epoch": 0.5889377531332024, "grad_norm": 1.695250189248252, "learning_rate": 3.8142494427736393e-07, "loss": 0.2627, "step": 33881 }, { "epoch": 0.5889551356707052, "grad_norm": 1.0177791465699615, "learning_rate": 3.8139759814416393e-07, "loss": 0.311, "step": 33882 }, { "epoch": 0.5889725182082081, "grad_norm": 4.326100012732076, "learning_rate": 3.81370252386879e-07, "loss": 0.2581, "step": 33883 }, { "epoch": 0.5889899007457109, "grad_norm": 1.2290614338652706, "learning_rate": 3.8134290700559577e-07, "loss": 0.3018, "step": 33884 }, { "epoch": 0.5890072832832137, "grad_norm": 1.8016324868552611, "learning_rate": 3.8131556200040125e-07, "loss": 0.2501, "step": 33885 }, { "epoch": 0.5890246658207166, "grad_norm": 1.2943120781166513, "learning_rate": 3.8128821737138185e-07, "loss": 0.1598, "step": 33886 }, { "epoch": 0.5890420483582194, "grad_norm": 1.2212773731548558, "learning_rate": 3.812608731186241e-07, "loss": 0.4186, "step": 33887 }, { "epoch": 0.5890594308957221, "grad_norm": 1.7117119331731583, "learning_rate": 3.812335292422149e-07, "loss": 0.1741, "step": 33888 }, { "epoch": 0.5890768134332249, "grad_norm": 9.168631894235352, "learning_rate": 3.8120618574224077e-07, "loss": 0.5028, "step": 33889 }, { "epoch": 0.5890941959707278, "grad_norm": 1.2656205319799787, "learning_rate": 3.8117884261878865e-07, "loss": 0.2405, "step": 33890 }, { "epoch": 0.5891115785082306, "grad_norm": 2.093038248117693, "learning_rate": 3.81151499871945e-07, "loss": 0.3391, "step": 33891 }, { "epoch": 0.5891289610457334, "grad_norm": 2.680999134515575, "learning_rate": 3.811241575017965e-07, "loss": 0.238, "step": 33892 }, { "epoch": 0.5891463435832363, "grad_norm": 1.2667894997284634, "learning_rate": 3.8109681550842965e-07, "loss": 0.1467, "step": 33893 }, { "epoch": 0.5891637261207391, "grad_norm": 1.368940893335442, "learning_rate": 3.8106947389193136e-07, "loss": 0.2529, "step": 33894 }, { "epoch": 0.5891811086582419, "grad_norm": 1.3717993825051507, "learning_rate": 3.8104213265238814e-07, "loss": 0.1637, "step": 33895 }, { "epoch": 0.5891984911957447, "grad_norm": 1.1989823494454688, "learning_rate": 3.810147917898868e-07, "loss": 0.1626, "step": 33896 }, { "epoch": 0.5892158737332476, "grad_norm": 1.228876397602992, "learning_rate": 3.8098745130451405e-07, "loss": 0.289, "step": 33897 }, { "epoch": 0.5892332562707504, "grad_norm": 2.9456812608146614, "learning_rate": 3.809601111963563e-07, "loss": 0.2851, "step": 33898 }, { "epoch": 0.5892506388082532, "grad_norm": 1.4915957972961287, "learning_rate": 3.809327714655002e-07, "loss": 0.2351, "step": 33899 }, { "epoch": 0.5892680213457561, "grad_norm": 2.175159096986196, "learning_rate": 3.809054321120326e-07, "loss": 0.3348, "step": 33900 }, { "epoch": 0.5892854038832589, "grad_norm": 1.5850792955994997, "learning_rate": 3.8087809313604e-07, "loss": 0.1322, "step": 33901 }, { "epoch": 0.5893027864207617, "grad_norm": 1.5421287909086423, "learning_rate": 3.8085075453760917e-07, "loss": 0.2237, "step": 33902 }, { "epoch": 0.5893201689582646, "grad_norm": 1.016003518781587, "learning_rate": 3.808234163168267e-07, "loss": 0.1577, "step": 33903 }, { "epoch": 0.5893375514957674, "grad_norm": 1.4397274316613857, "learning_rate": 3.807960784737794e-07, "loss": 0.2946, "step": 33904 }, { "epoch": 0.5893549340332702, "grad_norm": 1.623630675741384, "learning_rate": 3.807687410085537e-07, "loss": 0.2028, "step": 33905 }, { "epoch": 0.589372316570773, "grad_norm": 1.8657421252414579, "learning_rate": 3.807414039212363e-07, "loss": 0.2203, "step": 33906 }, { "epoch": 0.5893896991082759, "grad_norm": 1.374115418532753, "learning_rate": 3.8071406721191373e-07, "loss": 0.1682, "step": 33907 }, { "epoch": 0.5894070816457786, "grad_norm": 1.13589669162619, "learning_rate": 3.806867308806729e-07, "loss": 0.2346, "step": 33908 }, { "epoch": 0.5894244641832814, "grad_norm": 1.504307476651879, "learning_rate": 3.8065939492760023e-07, "loss": 0.2221, "step": 33909 }, { "epoch": 0.5894418467207843, "grad_norm": 1.2322246501339877, "learning_rate": 3.806320593527827e-07, "loss": 0.2123, "step": 33910 }, { "epoch": 0.5894592292582871, "grad_norm": 1.3140894697944336, "learning_rate": 3.8060472415630663e-07, "loss": 0.4528, "step": 33911 }, { "epoch": 0.5894766117957899, "grad_norm": 1.242183234697821, "learning_rate": 3.8057738933825857e-07, "loss": 0.3783, "step": 33912 }, { "epoch": 0.5894939943332927, "grad_norm": 1.901873476109311, "learning_rate": 3.805500548987254e-07, "loss": 0.3212, "step": 33913 }, { "epoch": 0.5895113768707956, "grad_norm": 1.1899938262555232, "learning_rate": 3.8052272083779377e-07, "loss": 0.2345, "step": 33914 }, { "epoch": 0.5895287594082984, "grad_norm": 1.0279589275210643, "learning_rate": 3.804953871555501e-07, "loss": 0.3606, "step": 33915 }, { "epoch": 0.5895461419458012, "grad_norm": 1.8673354947896923, "learning_rate": 3.804680538520813e-07, "loss": 0.5387, "step": 33916 }, { "epoch": 0.5895635244833041, "grad_norm": 1.2897067551558201, "learning_rate": 3.804407209274738e-07, "loss": 0.1999, "step": 33917 }, { "epoch": 0.5895809070208069, "grad_norm": 1.5824408433582886, "learning_rate": 3.8041338838181425e-07, "loss": 0.333, "step": 33918 }, { "epoch": 0.5895982895583097, "grad_norm": 0.9203786547386127, "learning_rate": 3.803860562151894e-07, "loss": 0.2384, "step": 33919 }, { "epoch": 0.5896156720958126, "grad_norm": 1.5796260748222988, "learning_rate": 3.8035872442768565e-07, "loss": 0.4808, "step": 33920 }, { "epoch": 0.5896330546333154, "grad_norm": 3.4496313198870037, "learning_rate": 3.803313930193899e-07, "loss": 0.3226, "step": 33921 }, { "epoch": 0.5896504371708182, "grad_norm": 2.0010100654870286, "learning_rate": 3.8030406199038856e-07, "loss": 0.2538, "step": 33922 }, { "epoch": 0.589667819708321, "grad_norm": 1.8080758360956801, "learning_rate": 3.8027673134076873e-07, "loss": 0.3637, "step": 33923 }, { "epoch": 0.5896852022458239, "grad_norm": 1.8708784898077264, "learning_rate": 3.8024940107061627e-07, "loss": 0.3614, "step": 33924 }, { "epoch": 0.5897025847833267, "grad_norm": 1.910623049523469, "learning_rate": 3.8022207118001837e-07, "loss": 0.3318, "step": 33925 }, { "epoch": 0.5897199673208295, "grad_norm": 1.3975131356251649, "learning_rate": 3.8019474166906133e-07, "loss": 0.1773, "step": 33926 }, { "epoch": 0.5897373498583324, "grad_norm": 1.5196176252497058, "learning_rate": 3.8016741253783207e-07, "loss": 0.2532, "step": 33927 }, { "epoch": 0.5897547323958351, "grad_norm": 1.3583599637315422, "learning_rate": 3.8014008378641694e-07, "loss": 0.2667, "step": 33928 }, { "epoch": 0.5897721149333379, "grad_norm": 1.7332270406721273, "learning_rate": 3.801127554149029e-07, "loss": 0.2357, "step": 33929 }, { "epoch": 0.5897894974708408, "grad_norm": 1.8034590008171523, "learning_rate": 3.8008542742337623e-07, "loss": 0.1487, "step": 33930 }, { "epoch": 0.5898068800083436, "grad_norm": 1.989511049742081, "learning_rate": 3.800580998119237e-07, "loss": 0.3093, "step": 33931 }, { "epoch": 0.5898242625458464, "grad_norm": 2.0662491713169358, "learning_rate": 3.8003077258063176e-07, "loss": 0.2631, "step": 33932 }, { "epoch": 0.5898416450833492, "grad_norm": 1.6855328892067902, "learning_rate": 3.800034457295873e-07, "loss": 0.3228, "step": 33933 }, { "epoch": 0.5898590276208521, "grad_norm": 2.283958689796419, "learning_rate": 3.7997611925887666e-07, "loss": 0.2153, "step": 33934 }, { "epoch": 0.5898764101583549, "grad_norm": 1.2749874770136769, "learning_rate": 3.7994879316858685e-07, "loss": 0.2482, "step": 33935 }, { "epoch": 0.5898937926958577, "grad_norm": 1.8344429953692363, "learning_rate": 3.7992146745880403e-07, "loss": 0.2832, "step": 33936 }, { "epoch": 0.5899111752333606, "grad_norm": 1.0397826752881907, "learning_rate": 3.79894142129615e-07, "loss": 0.3308, "step": 33937 }, { "epoch": 0.5899285577708634, "grad_norm": 1.2797185031242009, "learning_rate": 3.798668171811064e-07, "loss": 0.1624, "step": 33938 }, { "epoch": 0.5899459403083662, "grad_norm": 2.5283181465077926, "learning_rate": 3.798394926133648e-07, "loss": 0.3006, "step": 33939 }, { "epoch": 0.589963322845869, "grad_norm": 0.9054865833130222, "learning_rate": 3.7981216842647674e-07, "loss": 0.1986, "step": 33940 }, { "epoch": 0.5899807053833719, "grad_norm": 1.1836350282664094, "learning_rate": 3.7978484462052895e-07, "loss": 0.2326, "step": 33941 }, { "epoch": 0.5899980879208747, "grad_norm": 1.1288168690019222, "learning_rate": 3.797575211956082e-07, "loss": 0.4803, "step": 33942 }, { "epoch": 0.5900154704583775, "grad_norm": 1.30506826881023, "learning_rate": 3.7973019815180054e-07, "loss": 0.3705, "step": 33943 }, { "epoch": 0.5900328529958804, "grad_norm": 1.56320072054759, "learning_rate": 3.7970287548919307e-07, "loss": 0.2946, "step": 33944 }, { "epoch": 0.5900502355333832, "grad_norm": 1.9326042323871588, "learning_rate": 3.796755532078721e-07, "loss": 0.2556, "step": 33945 }, { "epoch": 0.590067618070886, "grad_norm": 1.8847206134004728, "learning_rate": 3.7964823130792446e-07, "loss": 0.2785, "step": 33946 }, { "epoch": 0.5900850006083889, "grad_norm": 1.581287595846454, "learning_rate": 3.7962090978943663e-07, "loss": 0.1939, "step": 33947 }, { "epoch": 0.5901023831458916, "grad_norm": 1.04341607413846, "learning_rate": 3.7959358865249533e-07, "loss": 0.1994, "step": 33948 }, { "epoch": 0.5901197656833944, "grad_norm": 1.0037201949586567, "learning_rate": 3.795662678971869e-07, "loss": 0.2951, "step": 33949 }, { "epoch": 0.5901371482208972, "grad_norm": 1.117825977202821, "learning_rate": 3.7953894752359804e-07, "loss": 0.2608, "step": 33950 }, { "epoch": 0.5901545307584001, "grad_norm": 2.08403403474061, "learning_rate": 3.795116275318154e-07, "loss": 0.2924, "step": 33951 }, { "epoch": 0.5901719132959029, "grad_norm": 1.8183864260218943, "learning_rate": 3.7948430792192566e-07, "loss": 0.2918, "step": 33952 }, { "epoch": 0.5901892958334057, "grad_norm": 1.6522560163834357, "learning_rate": 3.794569886940151e-07, "loss": 0.2097, "step": 33953 }, { "epoch": 0.5902066783709086, "grad_norm": 2.3909966127254436, "learning_rate": 3.7942966984817096e-07, "loss": 0.236, "step": 33954 }, { "epoch": 0.5902240609084114, "grad_norm": 1.5507894603479926, "learning_rate": 3.79402351384479e-07, "loss": 0.1818, "step": 33955 }, { "epoch": 0.5902414434459142, "grad_norm": 2.48105716125842, "learning_rate": 3.7937503330302623e-07, "loss": 0.3398, "step": 33956 }, { "epoch": 0.5902588259834171, "grad_norm": 1.23121722129781, "learning_rate": 3.793477156038991e-07, "loss": 0.1894, "step": 33957 }, { "epoch": 0.5902762085209199, "grad_norm": 1.9817484504004008, "learning_rate": 3.793203982871845e-07, "loss": 0.3326, "step": 33958 }, { "epoch": 0.5902935910584227, "grad_norm": 2.4236718922315355, "learning_rate": 3.792930813529686e-07, "loss": 0.2397, "step": 33959 }, { "epoch": 0.5903109735959255, "grad_norm": 1.4316085123732116, "learning_rate": 3.792657648013383e-07, "loss": 0.2653, "step": 33960 }, { "epoch": 0.5903283561334284, "grad_norm": 1.4924612735920046, "learning_rate": 3.7923844863238017e-07, "loss": 0.2407, "step": 33961 }, { "epoch": 0.5903457386709312, "grad_norm": 1.8727100188053376, "learning_rate": 3.7921113284618067e-07, "loss": 0.2591, "step": 33962 }, { "epoch": 0.590363121208434, "grad_norm": 3.9519629979923376, "learning_rate": 3.791838174428262e-07, "loss": 0.3098, "step": 33963 }, { "epoch": 0.5903805037459369, "grad_norm": 1.1867520421097217, "learning_rate": 3.7915650242240366e-07, "loss": 0.259, "step": 33964 }, { "epoch": 0.5903978862834397, "grad_norm": 2.554826717292239, "learning_rate": 3.791291877849994e-07, "loss": 0.2394, "step": 33965 }, { "epoch": 0.5904152688209425, "grad_norm": 1.3478783275089496, "learning_rate": 3.7910187353070015e-07, "loss": 0.3519, "step": 33966 }, { "epoch": 0.5904326513584454, "grad_norm": 1.314958942888429, "learning_rate": 3.790745596595926e-07, "loss": 0.315, "step": 33967 }, { "epoch": 0.5904500338959481, "grad_norm": 1.8064966940881169, "learning_rate": 3.790472461717629e-07, "loss": 0.4021, "step": 33968 }, { "epoch": 0.5904674164334509, "grad_norm": 1.248938822730055, "learning_rate": 3.7901993306729797e-07, "loss": 0.2497, "step": 33969 }, { "epoch": 0.5904847989709537, "grad_norm": 1.991782087926158, "learning_rate": 3.789926203462842e-07, "loss": 0.2591, "step": 33970 }, { "epoch": 0.5905021815084566, "grad_norm": 1.0496507600877747, "learning_rate": 3.789653080088083e-07, "loss": 0.2348, "step": 33971 }, { "epoch": 0.5905195640459594, "grad_norm": 3.3256182363656586, "learning_rate": 3.7893799605495673e-07, "loss": 0.2124, "step": 33972 }, { "epoch": 0.5905369465834622, "grad_norm": 1.7698311680250605, "learning_rate": 3.7891068448481634e-07, "loss": 0.3122, "step": 33973 }, { "epoch": 0.5905543291209651, "grad_norm": 1.4154185331179623, "learning_rate": 3.7888337329847315e-07, "loss": 0.1877, "step": 33974 }, { "epoch": 0.5905717116584679, "grad_norm": 1.0769523254326163, "learning_rate": 3.7885606249601413e-07, "loss": 0.2, "step": 33975 }, { "epoch": 0.5905890941959707, "grad_norm": 2.8507262011604118, "learning_rate": 3.788287520775257e-07, "loss": 0.3241, "step": 33976 }, { "epoch": 0.5906064767334736, "grad_norm": 1.0808713203108622, "learning_rate": 3.788014420430945e-07, "loss": 0.2227, "step": 33977 }, { "epoch": 0.5906238592709764, "grad_norm": 1.2030346950851603, "learning_rate": 3.78774132392807e-07, "loss": 0.1849, "step": 33978 }, { "epoch": 0.5906412418084792, "grad_norm": 2.0347706840416033, "learning_rate": 3.787468231267501e-07, "loss": 0.247, "step": 33979 }, { "epoch": 0.590658624345982, "grad_norm": 2.186980668882151, "learning_rate": 3.787195142450098e-07, "loss": 0.4029, "step": 33980 }, { "epoch": 0.5906760068834849, "grad_norm": 1.7818646074317623, "learning_rate": 3.7869220574767303e-07, "loss": 0.2248, "step": 33981 }, { "epoch": 0.5906933894209877, "grad_norm": 0.9243704051230621, "learning_rate": 3.786648976348261e-07, "loss": 0.2468, "step": 33982 }, { "epoch": 0.5907107719584905, "grad_norm": 1.5533912401381886, "learning_rate": 3.786375899065558e-07, "loss": 0.3509, "step": 33983 }, { "epoch": 0.5907281544959934, "grad_norm": 1.1556278484721165, "learning_rate": 3.786102825629485e-07, "loss": 0.159, "step": 33984 }, { "epoch": 0.5907455370334962, "grad_norm": 2.2158841957634583, "learning_rate": 3.78582975604091e-07, "loss": 0.1958, "step": 33985 }, { "epoch": 0.590762919570999, "grad_norm": 1.794099844412428, "learning_rate": 3.7855566903006976e-07, "loss": 0.1195, "step": 33986 }, { "epoch": 0.5907803021085019, "grad_norm": 1.1746507426088166, "learning_rate": 3.7852836284097113e-07, "loss": 0.2644, "step": 33987 }, { "epoch": 0.5907976846460046, "grad_norm": 1.233751273162093, "learning_rate": 3.7850105703688175e-07, "loss": 0.2649, "step": 33988 }, { "epoch": 0.5908150671835074, "grad_norm": 1.0067935374731498, "learning_rate": 3.784737516178882e-07, "loss": 0.1442, "step": 33989 }, { "epoch": 0.5908324497210102, "grad_norm": 2.5188912912038184, "learning_rate": 3.7844644658407705e-07, "loss": 0.2595, "step": 33990 }, { "epoch": 0.5908498322585131, "grad_norm": 1.8348157322060255, "learning_rate": 3.7841914193553484e-07, "loss": 0.2053, "step": 33991 }, { "epoch": 0.5908672147960159, "grad_norm": 2.9917494283817, "learning_rate": 3.7839183767234827e-07, "loss": 0.2122, "step": 33992 }, { "epoch": 0.5908845973335187, "grad_norm": 1.583835880983145, "learning_rate": 3.7836453379460355e-07, "loss": 0.1638, "step": 33993 }, { "epoch": 0.5909019798710216, "grad_norm": 3.2414124719798765, "learning_rate": 3.783372303023874e-07, "loss": 0.2566, "step": 33994 }, { "epoch": 0.5909193624085244, "grad_norm": 2.97326165180096, "learning_rate": 3.783099271957864e-07, "loss": 0.3515, "step": 33995 }, { "epoch": 0.5909367449460272, "grad_norm": 1.3687367947853344, "learning_rate": 3.782826244748868e-07, "loss": 0.3304, "step": 33996 }, { "epoch": 0.59095412748353, "grad_norm": 6.3534258927556895, "learning_rate": 3.7825532213977555e-07, "loss": 0.3893, "step": 33997 }, { "epoch": 0.5909715100210329, "grad_norm": 1.7140525899604686, "learning_rate": 3.782280201905392e-07, "loss": 0.2369, "step": 33998 }, { "epoch": 0.5909888925585357, "grad_norm": 1.265450349062992, "learning_rate": 3.782007186272638e-07, "loss": 0.2473, "step": 33999 }, { "epoch": 0.5910062750960385, "grad_norm": 1.182202733083392, "learning_rate": 3.7817341745003624e-07, "loss": 0.1598, "step": 34000 }, { "epoch": 0.5910236576335414, "grad_norm": 1.6206974959333762, "learning_rate": 3.781461166589429e-07, "loss": 0.2977, "step": 34001 }, { "epoch": 0.5910410401710442, "grad_norm": 1.7254121844481582, "learning_rate": 3.781188162540705e-07, "loss": 0.1795, "step": 34002 }, { "epoch": 0.591058422708547, "grad_norm": 2.3243765706718893, "learning_rate": 3.780915162355055e-07, "loss": 0.3433, "step": 34003 }, { "epoch": 0.5910758052460499, "grad_norm": 1.4836229298174155, "learning_rate": 3.7806421660333424e-07, "loss": 0.2082, "step": 34004 }, { "epoch": 0.5910931877835527, "grad_norm": 1.6246027734070745, "learning_rate": 3.780369173576436e-07, "loss": 0.2837, "step": 34005 }, { "epoch": 0.5911105703210555, "grad_norm": 1.3790464191235836, "learning_rate": 3.7800961849851994e-07, "loss": 0.241, "step": 34006 }, { "epoch": 0.5911279528585583, "grad_norm": 1.2810797822586846, "learning_rate": 3.779823200260495e-07, "loss": 0.1832, "step": 34007 }, { "epoch": 0.5911453353960611, "grad_norm": 1.685784944827277, "learning_rate": 3.779550219403192e-07, "loss": 0.1558, "step": 34008 }, { "epoch": 0.5911627179335639, "grad_norm": 1.109358516023775, "learning_rate": 3.7792772424141535e-07, "loss": 0.1475, "step": 34009 }, { "epoch": 0.5911801004710667, "grad_norm": 1.7981671266202648, "learning_rate": 3.779004269294246e-07, "loss": 0.2275, "step": 34010 }, { "epoch": 0.5911974830085696, "grad_norm": 1.3784726163801029, "learning_rate": 3.7787313000443354e-07, "loss": 0.1505, "step": 34011 }, { "epoch": 0.5912148655460724, "grad_norm": 2.2504780717227137, "learning_rate": 3.778458334665284e-07, "loss": 0.319, "step": 34012 }, { "epoch": 0.5912322480835752, "grad_norm": 1.3255325429450433, "learning_rate": 3.778185373157958e-07, "loss": 0.2336, "step": 34013 }, { "epoch": 0.591249630621078, "grad_norm": 1.0301240658533437, "learning_rate": 3.7779124155232244e-07, "loss": 0.2203, "step": 34014 }, { "epoch": 0.5912670131585809, "grad_norm": 1.0228111789891432, "learning_rate": 3.7776394617619454e-07, "loss": 0.2358, "step": 34015 }, { "epoch": 0.5912843956960837, "grad_norm": 1.2120671812235477, "learning_rate": 3.777366511874989e-07, "loss": 0.1928, "step": 34016 }, { "epoch": 0.5913017782335865, "grad_norm": 2.226153741005661, "learning_rate": 3.7770935658632203e-07, "loss": 0.1846, "step": 34017 }, { "epoch": 0.5913191607710894, "grad_norm": 1.1664472671062849, "learning_rate": 3.7768206237275013e-07, "loss": 0.2958, "step": 34018 }, { "epoch": 0.5913365433085922, "grad_norm": 1.27403336617573, "learning_rate": 3.7765476854687e-07, "loss": 0.2358, "step": 34019 }, { "epoch": 0.591353925846095, "grad_norm": 4.201462058683877, "learning_rate": 3.77627475108768e-07, "loss": 0.1823, "step": 34020 }, { "epoch": 0.5913713083835979, "grad_norm": 3.446014910406485, "learning_rate": 3.776001820585307e-07, "loss": 0.2222, "step": 34021 }, { "epoch": 0.5913886909211007, "grad_norm": 1.696650248624455, "learning_rate": 3.775728893962446e-07, "loss": 0.2986, "step": 34022 }, { "epoch": 0.5914060734586035, "grad_norm": 1.1294113067150984, "learning_rate": 3.7754559712199615e-07, "loss": 0.1897, "step": 34023 }, { "epoch": 0.5914234559961064, "grad_norm": 1.5605794705434994, "learning_rate": 3.775183052358721e-07, "loss": 0.2663, "step": 34024 }, { "epoch": 0.5914408385336092, "grad_norm": 2.2380744247081625, "learning_rate": 3.774910137379586e-07, "loss": 0.3517, "step": 34025 }, { "epoch": 0.591458221071112, "grad_norm": 1.237563429435771, "learning_rate": 3.7746372262834227e-07, "loss": 0.2418, "step": 34026 }, { "epoch": 0.5914756036086147, "grad_norm": 1.183797944631293, "learning_rate": 3.774364319071097e-07, "loss": 0.1489, "step": 34027 }, { "epoch": 0.5914929861461176, "grad_norm": 1.6558029212993166, "learning_rate": 3.7740914157434747e-07, "loss": 0.3481, "step": 34028 }, { "epoch": 0.5915103686836204, "grad_norm": 1.311949217520304, "learning_rate": 3.773818516301417e-07, "loss": 0.4044, "step": 34029 }, { "epoch": 0.5915277512211232, "grad_norm": 1.3027754197195536, "learning_rate": 3.773545620745794e-07, "loss": 0.2876, "step": 34030 }, { "epoch": 0.591545133758626, "grad_norm": 1.1519033039682487, "learning_rate": 3.773272729077467e-07, "loss": 0.2234, "step": 34031 }, { "epoch": 0.5915625162961289, "grad_norm": 1.9487055714712744, "learning_rate": 3.7729998412973005e-07, "loss": 0.2679, "step": 34032 }, { "epoch": 0.5915798988336317, "grad_norm": 1.3401560740245586, "learning_rate": 3.7727269574061625e-07, "loss": 0.1371, "step": 34033 }, { "epoch": 0.5915972813711345, "grad_norm": 1.8609398217153323, "learning_rate": 3.7724540774049154e-07, "loss": 0.2849, "step": 34034 }, { "epoch": 0.5916146639086374, "grad_norm": 1.5537553985987573, "learning_rate": 3.7721812012944256e-07, "loss": 0.2358, "step": 34035 }, { "epoch": 0.5916320464461402, "grad_norm": 1.6149755155771237, "learning_rate": 3.7719083290755584e-07, "loss": 0.1957, "step": 34036 }, { "epoch": 0.591649428983643, "grad_norm": 1.3331137750810838, "learning_rate": 3.7716354607491765e-07, "loss": 0.3398, "step": 34037 }, { "epoch": 0.5916668115211459, "grad_norm": 1.731068998326608, "learning_rate": 3.7713625963161445e-07, "loss": 0.3054, "step": 34038 }, { "epoch": 0.5916841940586487, "grad_norm": 1.2034648142027196, "learning_rate": 3.77108973577733e-07, "loss": 0.3364, "step": 34039 }, { "epoch": 0.5917015765961515, "grad_norm": 1.5964510474846214, "learning_rate": 3.770816879133596e-07, "loss": 0.1489, "step": 34040 }, { "epoch": 0.5917189591336544, "grad_norm": 1.803000569438612, "learning_rate": 3.770544026385808e-07, "loss": 0.3417, "step": 34041 }, { "epoch": 0.5917363416711572, "grad_norm": 1.269632146370595, "learning_rate": 3.7702711775348303e-07, "loss": 0.1996, "step": 34042 }, { "epoch": 0.59175372420866, "grad_norm": 2.262872236372334, "learning_rate": 3.7699983325815317e-07, "loss": 0.3535, "step": 34043 }, { "epoch": 0.5917711067461628, "grad_norm": 1.6126224790193457, "learning_rate": 3.7697254915267695e-07, "loss": 0.2834, "step": 34044 }, { "epoch": 0.5917884892836657, "grad_norm": 1.9531019733331594, "learning_rate": 3.769452654371414e-07, "loss": 0.2724, "step": 34045 }, { "epoch": 0.5918058718211685, "grad_norm": 2.393518501735928, "learning_rate": 3.769179821116327e-07, "loss": 0.2594, "step": 34046 }, { "epoch": 0.5918232543586712, "grad_norm": 1.1893840819368993, "learning_rate": 3.768906991762375e-07, "loss": 0.2105, "step": 34047 }, { "epoch": 0.591840636896174, "grad_norm": 1.9753381653062796, "learning_rate": 3.7686341663104214e-07, "loss": 0.3271, "step": 34048 }, { "epoch": 0.5918580194336769, "grad_norm": 1.8761646896575648, "learning_rate": 3.768361344761335e-07, "loss": 0.3379, "step": 34049 }, { "epoch": 0.5918754019711797, "grad_norm": 2.7777160096183775, "learning_rate": 3.7680885271159753e-07, "loss": 0.2791, "step": 34050 }, { "epoch": 0.5918927845086825, "grad_norm": 3.240180727504633, "learning_rate": 3.767815713375209e-07, "loss": 0.4602, "step": 34051 }, { "epoch": 0.5919101670461854, "grad_norm": 2.0868781384147725, "learning_rate": 3.7675429035399003e-07, "loss": 0.4015, "step": 34052 }, { "epoch": 0.5919275495836882, "grad_norm": 2.1614535181107315, "learning_rate": 3.7672700976109157e-07, "loss": 0.2106, "step": 34053 }, { "epoch": 0.591944932121191, "grad_norm": 1.255095973615299, "learning_rate": 3.7669972955891173e-07, "loss": 0.1619, "step": 34054 }, { "epoch": 0.5919623146586939, "grad_norm": 0.9845847828025847, "learning_rate": 3.766724497475373e-07, "loss": 0.3498, "step": 34055 }, { "epoch": 0.5919796971961967, "grad_norm": 1.123420412193635, "learning_rate": 3.7664517032705443e-07, "loss": 0.2105, "step": 34056 }, { "epoch": 0.5919970797336995, "grad_norm": 1.1614669999743084, "learning_rate": 3.7661789129754967e-07, "loss": 0.1727, "step": 34057 }, { "epoch": 0.5920144622712024, "grad_norm": 2.0194665908443996, "learning_rate": 3.765906126591096e-07, "loss": 0.2969, "step": 34058 }, { "epoch": 0.5920318448087052, "grad_norm": 2.363497174937731, "learning_rate": 3.7656333441182043e-07, "loss": 0.2617, "step": 34059 }, { "epoch": 0.592049227346208, "grad_norm": 1.639414572462206, "learning_rate": 3.7653605655576896e-07, "loss": 0.3726, "step": 34060 }, { "epoch": 0.5920666098837108, "grad_norm": 1.403518319059523, "learning_rate": 3.765087790910415e-07, "loss": 0.2204, "step": 34061 }, { "epoch": 0.5920839924212137, "grad_norm": 2.402057143457382, "learning_rate": 3.7648150201772447e-07, "loss": 0.2861, "step": 34062 }, { "epoch": 0.5921013749587165, "grad_norm": 1.271823807673397, "learning_rate": 3.764542253359042e-07, "loss": 0.1668, "step": 34063 }, { "epoch": 0.5921187574962193, "grad_norm": 1.3321994890064888, "learning_rate": 3.764269490456674e-07, "loss": 0.2939, "step": 34064 }, { "epoch": 0.5921361400337222, "grad_norm": 1.840533078689474, "learning_rate": 3.763996731471003e-07, "loss": 0.2471, "step": 34065 }, { "epoch": 0.592153522571225, "grad_norm": 1.7007249599342469, "learning_rate": 3.763723976402896e-07, "loss": 0.1814, "step": 34066 }, { "epoch": 0.5921709051087277, "grad_norm": 1.138825049324204, "learning_rate": 3.7634512252532146e-07, "loss": 0.2001, "step": 34067 }, { "epoch": 0.5921882876462305, "grad_norm": 3.514425666850524, "learning_rate": 3.7631784780228276e-07, "loss": 0.2922, "step": 34068 }, { "epoch": 0.5922056701837334, "grad_norm": 1.031428061484575, "learning_rate": 3.7629057347125944e-07, "loss": 0.1728, "step": 34069 }, { "epoch": 0.5922230527212362, "grad_norm": 1.1989781747987778, "learning_rate": 3.762632995323382e-07, "loss": 0.227, "step": 34070 }, { "epoch": 0.592240435258739, "grad_norm": 1.231392500309672, "learning_rate": 3.762360259856054e-07, "loss": 0.1497, "step": 34071 }, { "epoch": 0.5922578177962419, "grad_norm": 1.0374796963804938, "learning_rate": 3.762087528311476e-07, "loss": 0.1806, "step": 34072 }, { "epoch": 0.5922752003337447, "grad_norm": 1.3312843312955895, "learning_rate": 3.761814800690511e-07, "loss": 0.1479, "step": 34073 }, { "epoch": 0.5922925828712475, "grad_norm": 1.1868266260277134, "learning_rate": 3.761542076994027e-07, "loss": 0.2553, "step": 34074 }, { "epoch": 0.5923099654087504, "grad_norm": 2.549674447193345, "learning_rate": 3.761269357222884e-07, "loss": 0.1648, "step": 34075 }, { "epoch": 0.5923273479462532, "grad_norm": 2.146795372552466, "learning_rate": 3.7609966413779483e-07, "loss": 0.1753, "step": 34076 }, { "epoch": 0.592344730483756, "grad_norm": 1.7099441534008077, "learning_rate": 3.760723929460083e-07, "loss": 0.1686, "step": 34077 }, { "epoch": 0.5923621130212589, "grad_norm": 1.31987759151746, "learning_rate": 3.760451221470155e-07, "loss": 0.2847, "step": 34078 }, { "epoch": 0.5923794955587617, "grad_norm": 1.5070794210454068, "learning_rate": 3.760178517409026e-07, "loss": 0.1509, "step": 34079 }, { "epoch": 0.5923968780962645, "grad_norm": 2.082035187954081, "learning_rate": 3.759905817277562e-07, "loss": 0.2894, "step": 34080 }, { "epoch": 0.5924142606337673, "grad_norm": 1.4216338237759474, "learning_rate": 3.7596331210766285e-07, "loss": 0.1804, "step": 34081 }, { "epoch": 0.5924316431712702, "grad_norm": 0.6884516782288569, "learning_rate": 3.759360428807086e-07, "loss": 0.1191, "step": 34082 }, { "epoch": 0.592449025708773, "grad_norm": 1.7248120177938098, "learning_rate": 3.759087740469803e-07, "loss": 0.3629, "step": 34083 }, { "epoch": 0.5924664082462758, "grad_norm": 1.2177701962622203, "learning_rate": 3.7588150560656406e-07, "loss": 0.3129, "step": 34084 }, { "epoch": 0.5924837907837787, "grad_norm": 1.3249749550297345, "learning_rate": 3.7585423755954635e-07, "loss": 0.268, "step": 34085 }, { "epoch": 0.5925011733212815, "grad_norm": 2.033187048262692, "learning_rate": 3.758269699060138e-07, "loss": 0.2229, "step": 34086 }, { "epoch": 0.5925185558587842, "grad_norm": 0.8926061754572654, "learning_rate": 3.757997026460529e-07, "loss": 0.2802, "step": 34087 }, { "epoch": 0.592535938396287, "grad_norm": 2.3055619525798794, "learning_rate": 3.7577243577974955e-07, "loss": 0.1804, "step": 34088 }, { "epoch": 0.5925533209337899, "grad_norm": 1.6154454137848309, "learning_rate": 3.757451693071907e-07, "loss": 0.2034, "step": 34089 }, { "epoch": 0.5925707034712927, "grad_norm": 3.3666856662910076, "learning_rate": 3.757179032284624e-07, "loss": 0.3158, "step": 34090 }, { "epoch": 0.5925880860087955, "grad_norm": 0.709302243695467, "learning_rate": 3.7569063754365145e-07, "loss": 0.0911, "step": 34091 }, { "epoch": 0.5926054685462984, "grad_norm": 1.4076100245241632, "learning_rate": 3.7566337225284407e-07, "loss": 0.1556, "step": 34092 }, { "epoch": 0.5926228510838012, "grad_norm": 1.2457903350085888, "learning_rate": 3.756361073561267e-07, "loss": 0.2275, "step": 34093 }, { "epoch": 0.592640233621304, "grad_norm": 1.6440632976172458, "learning_rate": 3.7560884285358564e-07, "loss": 0.1883, "step": 34094 }, { "epoch": 0.5926576161588069, "grad_norm": 1.1941119689862507, "learning_rate": 3.7558157874530745e-07, "loss": 0.238, "step": 34095 }, { "epoch": 0.5926749986963097, "grad_norm": 2.9702295791210926, "learning_rate": 3.7555431503137837e-07, "loss": 0.2189, "step": 34096 }, { "epoch": 0.5926923812338125, "grad_norm": 1.144591059393104, "learning_rate": 3.755270517118851e-07, "loss": 0.1845, "step": 34097 }, { "epoch": 0.5927097637713153, "grad_norm": 2.1254552686347132, "learning_rate": 3.754997887869138e-07, "loss": 0.1711, "step": 34098 }, { "epoch": 0.5927271463088182, "grad_norm": 1.6086590395522347, "learning_rate": 3.7547252625655126e-07, "loss": 0.3047, "step": 34099 }, { "epoch": 0.592744528846321, "grad_norm": 1.4339596467127447, "learning_rate": 3.7544526412088326e-07, "loss": 0.2587, "step": 34100 }, { "epoch": 0.5927619113838238, "grad_norm": 1.3007632739583577, "learning_rate": 3.754180023799968e-07, "loss": 0.1547, "step": 34101 }, { "epoch": 0.5927792939213267, "grad_norm": 1.325137501588051, "learning_rate": 3.7539074103397784e-07, "loss": 0.1676, "step": 34102 }, { "epoch": 0.5927966764588295, "grad_norm": 1.8938922573724941, "learning_rate": 3.7536348008291306e-07, "loss": 0.1782, "step": 34103 }, { "epoch": 0.5928140589963323, "grad_norm": 1.732436495510129, "learning_rate": 3.7533621952688877e-07, "loss": 0.2411, "step": 34104 }, { "epoch": 0.5928314415338352, "grad_norm": 1.6675751248245794, "learning_rate": 3.753089593659914e-07, "loss": 0.2462, "step": 34105 }, { "epoch": 0.592848824071338, "grad_norm": 0.9365322319978633, "learning_rate": 3.752816996003076e-07, "loss": 0.1299, "step": 34106 }, { "epoch": 0.5928662066088407, "grad_norm": 4.7594592339002695, "learning_rate": 3.752544402299233e-07, "loss": 0.279, "step": 34107 }, { "epoch": 0.5928835891463435, "grad_norm": 1.1147153458146892, "learning_rate": 3.752271812549251e-07, "loss": 0.2088, "step": 34108 }, { "epoch": 0.5929009716838464, "grad_norm": 1.4758535206635994, "learning_rate": 3.7519992267539946e-07, "loss": 0.1996, "step": 34109 }, { "epoch": 0.5929183542213492, "grad_norm": 1.5620315370836269, "learning_rate": 3.7517266449143265e-07, "loss": 0.4201, "step": 34110 }, { "epoch": 0.592935736758852, "grad_norm": 1.5561575862109234, "learning_rate": 3.7514540670311123e-07, "loss": 0.2756, "step": 34111 }, { "epoch": 0.5929531192963549, "grad_norm": 1.5378934290853792, "learning_rate": 3.7511814931052164e-07, "loss": 0.2939, "step": 34112 }, { "epoch": 0.5929705018338577, "grad_norm": 1.482021011071081, "learning_rate": 3.7509089231375e-07, "loss": 0.2363, "step": 34113 }, { "epoch": 0.5929878843713605, "grad_norm": 1.7707594312541555, "learning_rate": 3.750636357128829e-07, "loss": 0.1946, "step": 34114 }, { "epoch": 0.5930052669088633, "grad_norm": 1.4934070614613262, "learning_rate": 3.750363795080066e-07, "loss": 0.2294, "step": 34115 }, { "epoch": 0.5930226494463662, "grad_norm": 1.3481996891817287, "learning_rate": 3.7500912369920766e-07, "loss": 0.1687, "step": 34116 }, { "epoch": 0.593040031983869, "grad_norm": 1.0446569575372253, "learning_rate": 3.749818682865724e-07, "loss": 0.1837, "step": 34117 }, { "epoch": 0.5930574145213718, "grad_norm": 0.9188838970340949, "learning_rate": 3.7495461327018727e-07, "loss": 0.1938, "step": 34118 }, { "epoch": 0.5930747970588747, "grad_norm": 1.3713482431454735, "learning_rate": 3.7492735865013837e-07, "loss": 0.2316, "step": 34119 }, { "epoch": 0.5930921795963775, "grad_norm": 2.392984110400271, "learning_rate": 3.7490010442651244e-07, "loss": 0.2517, "step": 34120 }, { "epoch": 0.5931095621338803, "grad_norm": 1.5947947099937358, "learning_rate": 3.7487285059939554e-07, "loss": 0.2355, "step": 34121 }, { "epoch": 0.5931269446713832, "grad_norm": 1.4112515707656603, "learning_rate": 3.7484559716887435e-07, "loss": 0.2159, "step": 34122 }, { "epoch": 0.593144327208886, "grad_norm": 1.6436077881470228, "learning_rate": 3.7481834413503504e-07, "loss": 0.3166, "step": 34123 }, { "epoch": 0.5931617097463888, "grad_norm": 1.6458313941630618, "learning_rate": 3.747910914979642e-07, "loss": 0.2842, "step": 34124 }, { "epoch": 0.5931790922838917, "grad_norm": 2.0036107808564716, "learning_rate": 3.747638392577481e-07, "loss": 0.2112, "step": 34125 }, { "epoch": 0.5931964748213945, "grad_norm": 1.823281378334145, "learning_rate": 3.747365874144731e-07, "loss": 0.3944, "step": 34126 }, { "epoch": 0.5932138573588972, "grad_norm": 1.0589742283032786, "learning_rate": 3.747093359682254e-07, "loss": 0.1645, "step": 34127 }, { "epoch": 0.5932312398964, "grad_norm": 1.5637435420519683, "learning_rate": 3.746820849190917e-07, "loss": 0.2397, "step": 34128 }, { "epoch": 0.5932486224339029, "grad_norm": 1.5421863732813261, "learning_rate": 3.746548342671581e-07, "loss": 0.1802, "step": 34129 }, { "epoch": 0.5932660049714057, "grad_norm": 2.5256588391529533, "learning_rate": 3.746275840125112e-07, "loss": 0.226, "step": 34130 }, { "epoch": 0.5932833875089085, "grad_norm": 1.3134731319805277, "learning_rate": 3.746003341552374e-07, "loss": 0.1484, "step": 34131 }, { "epoch": 0.5933007700464114, "grad_norm": 2.446309230454467, "learning_rate": 3.745730846954228e-07, "loss": 0.3364, "step": 34132 }, { "epoch": 0.5933181525839142, "grad_norm": 1.573939334341159, "learning_rate": 3.745458356331538e-07, "loss": 0.2269, "step": 34133 }, { "epoch": 0.593335535121417, "grad_norm": 1.9077450242769176, "learning_rate": 3.74518586968517e-07, "loss": 0.2404, "step": 34134 }, { "epoch": 0.5933529176589198, "grad_norm": 1.7647184468998578, "learning_rate": 3.7449133870159855e-07, "loss": 0.2596, "step": 34135 }, { "epoch": 0.5933703001964227, "grad_norm": 1.0882080684729432, "learning_rate": 3.74464090832485e-07, "loss": 0.1785, "step": 34136 }, { "epoch": 0.5933876827339255, "grad_norm": 1.1629282775622858, "learning_rate": 3.7443684336126267e-07, "loss": 0.2212, "step": 34137 }, { "epoch": 0.5934050652714283, "grad_norm": 2.101189665996353, "learning_rate": 3.7440959628801766e-07, "loss": 0.3685, "step": 34138 }, { "epoch": 0.5934224478089312, "grad_norm": 2.3231682179667685, "learning_rate": 3.7438234961283664e-07, "loss": 0.2438, "step": 34139 }, { "epoch": 0.593439830346434, "grad_norm": 2.5164226442012927, "learning_rate": 3.743551033358059e-07, "loss": 0.2613, "step": 34140 }, { "epoch": 0.5934572128839368, "grad_norm": 1.8346256909077656, "learning_rate": 3.7432785745701165e-07, "loss": 0.3459, "step": 34141 }, { "epoch": 0.5934745954214397, "grad_norm": 1.1300067099690256, "learning_rate": 3.7430061197654053e-07, "loss": 0.1596, "step": 34142 }, { "epoch": 0.5934919779589425, "grad_norm": 2.7765540039362993, "learning_rate": 3.742733668944785e-07, "loss": 0.2381, "step": 34143 }, { "epoch": 0.5935093604964453, "grad_norm": 1.4215830127568845, "learning_rate": 3.7424612221091244e-07, "loss": 0.1834, "step": 34144 }, { "epoch": 0.5935267430339481, "grad_norm": 1.064672831327825, "learning_rate": 3.742188779259283e-07, "loss": 0.2363, "step": 34145 }, { "epoch": 0.593544125571451, "grad_norm": 1.389175484076341, "learning_rate": 3.741916340396124e-07, "loss": 0.3146, "step": 34146 }, { "epoch": 0.5935615081089537, "grad_norm": 1.0921622175652157, "learning_rate": 3.741643905520514e-07, "loss": 0.3198, "step": 34147 }, { "epoch": 0.5935788906464565, "grad_norm": 1.946719793899918, "learning_rate": 3.741371474633314e-07, "loss": 0.2455, "step": 34148 }, { "epoch": 0.5935962731839594, "grad_norm": 2.7302918885441194, "learning_rate": 3.741099047735388e-07, "loss": 0.2262, "step": 34149 }, { "epoch": 0.5936136557214622, "grad_norm": 1.462507414469141, "learning_rate": 3.740826624827601e-07, "loss": 0.2082, "step": 34150 }, { "epoch": 0.593631038258965, "grad_norm": 2.2372679777792683, "learning_rate": 3.740554205910814e-07, "loss": 0.4353, "step": 34151 }, { "epoch": 0.5936484207964678, "grad_norm": 2.0378806608863624, "learning_rate": 3.7402817909858906e-07, "loss": 0.1907, "step": 34152 }, { "epoch": 0.5936658033339707, "grad_norm": 1.4705641093461463, "learning_rate": 3.740009380053697e-07, "loss": 0.186, "step": 34153 }, { "epoch": 0.5936831858714735, "grad_norm": 0.9231466208131398, "learning_rate": 3.739736973115094e-07, "loss": 0.2152, "step": 34154 }, { "epoch": 0.5937005684089763, "grad_norm": 2.798386739457808, "learning_rate": 3.739464570170946e-07, "loss": 0.2508, "step": 34155 }, { "epoch": 0.5937179509464792, "grad_norm": 1.42013882090775, "learning_rate": 3.739192171222118e-07, "loss": 0.1457, "step": 34156 }, { "epoch": 0.593735333483982, "grad_norm": 1.5035422604965267, "learning_rate": 3.73891977626947e-07, "loss": 0.3128, "step": 34157 }, { "epoch": 0.5937527160214848, "grad_norm": 1.1902521886034667, "learning_rate": 3.7386473853138667e-07, "loss": 0.294, "step": 34158 }, { "epoch": 0.5937700985589877, "grad_norm": 1.8234144894587392, "learning_rate": 3.738374998356172e-07, "loss": 0.1525, "step": 34159 }, { "epoch": 0.5937874810964905, "grad_norm": 1.573192588229704, "learning_rate": 3.7381026153972487e-07, "loss": 0.3726, "step": 34160 }, { "epoch": 0.5938048636339933, "grad_norm": 1.7085537076948498, "learning_rate": 3.7378302364379614e-07, "loss": 0.3041, "step": 34161 }, { "epoch": 0.5938222461714961, "grad_norm": 1.057392427782777, "learning_rate": 3.7375578614791714e-07, "loss": 0.1217, "step": 34162 }, { "epoch": 0.593839628708999, "grad_norm": 1.293939585589414, "learning_rate": 3.737285490521745e-07, "loss": 0.2969, "step": 34163 }, { "epoch": 0.5938570112465018, "grad_norm": 2.3206500399229566, "learning_rate": 3.7370131235665425e-07, "loss": 0.2153, "step": 34164 }, { "epoch": 0.5938743937840046, "grad_norm": 2.1614546683292706, "learning_rate": 3.736740760614428e-07, "loss": 0.1758, "step": 34165 }, { "epoch": 0.5938917763215074, "grad_norm": 1.8283579242076007, "learning_rate": 3.736468401666264e-07, "loss": 0.2278, "step": 34166 }, { "epoch": 0.5939091588590102, "grad_norm": 2.2691924943701838, "learning_rate": 3.736196046722916e-07, "loss": 0.3653, "step": 34167 }, { "epoch": 0.593926541396513, "grad_norm": 2.1162232319158734, "learning_rate": 3.7359236957852447e-07, "loss": 0.7111, "step": 34168 }, { "epoch": 0.5939439239340158, "grad_norm": 1.3226212139150852, "learning_rate": 3.7356513488541175e-07, "loss": 0.2411, "step": 34169 }, { "epoch": 0.5939613064715187, "grad_norm": 2.0143387633786864, "learning_rate": 3.735379005930393e-07, "loss": 0.1211, "step": 34170 }, { "epoch": 0.5939786890090215, "grad_norm": 1.3475087697381412, "learning_rate": 3.735106667014935e-07, "loss": 0.1373, "step": 34171 }, { "epoch": 0.5939960715465243, "grad_norm": 1.547908220886868, "learning_rate": 3.7348343321086096e-07, "loss": 0.1578, "step": 34172 }, { "epoch": 0.5940134540840272, "grad_norm": 1.467397322987207, "learning_rate": 3.734562001212277e-07, "loss": 0.198, "step": 34173 }, { "epoch": 0.59403083662153, "grad_norm": 1.625170896690615, "learning_rate": 3.7342896743268014e-07, "loss": 0.261, "step": 34174 }, { "epoch": 0.5940482191590328, "grad_norm": 1.436656574460945, "learning_rate": 3.7340173514530484e-07, "loss": 0.2536, "step": 34175 }, { "epoch": 0.5940656016965357, "grad_norm": 1.1637583665290203, "learning_rate": 3.733745032591877e-07, "loss": 0.1786, "step": 34176 }, { "epoch": 0.5940829842340385, "grad_norm": 1.2629742241586126, "learning_rate": 3.7334727177441516e-07, "loss": 0.2346, "step": 34177 }, { "epoch": 0.5941003667715413, "grad_norm": 1.26566411797933, "learning_rate": 3.7332004069107364e-07, "loss": 0.2392, "step": 34178 }, { "epoch": 0.5941177493090442, "grad_norm": 1.1950796991293953, "learning_rate": 3.732928100092494e-07, "loss": 0.2211, "step": 34179 }, { "epoch": 0.594135131846547, "grad_norm": 1.7059984054108401, "learning_rate": 3.732655797290287e-07, "loss": 0.2369, "step": 34180 }, { "epoch": 0.5941525143840498, "grad_norm": 1.0482264495333036, "learning_rate": 3.7323834985049807e-07, "loss": 0.17, "step": 34181 }, { "epoch": 0.5941698969215526, "grad_norm": 1.6204243032304675, "learning_rate": 3.7321112037374363e-07, "loss": 0.1794, "step": 34182 }, { "epoch": 0.5941872794590555, "grad_norm": 2.2588835465493133, "learning_rate": 3.7318389129885153e-07, "loss": 0.2023, "step": 34183 }, { "epoch": 0.5942046619965583, "grad_norm": 1.2098387440002767, "learning_rate": 3.731566626259083e-07, "loss": 0.2707, "step": 34184 }, { "epoch": 0.5942220445340611, "grad_norm": 1.7949635429183122, "learning_rate": 3.731294343550001e-07, "loss": 0.2869, "step": 34185 }, { "epoch": 0.5942394270715639, "grad_norm": 3.0819589361705173, "learning_rate": 3.7310220648621346e-07, "loss": 0.2589, "step": 34186 }, { "epoch": 0.5942568096090667, "grad_norm": 2.023233568098786, "learning_rate": 3.730749790196344e-07, "loss": 0.1956, "step": 34187 }, { "epoch": 0.5942741921465695, "grad_norm": 1.343331508777932, "learning_rate": 3.7304775195534963e-07, "loss": 0.1155, "step": 34188 }, { "epoch": 0.5942915746840723, "grad_norm": 1.996447414828698, "learning_rate": 3.730205252934449e-07, "loss": 0.2157, "step": 34189 }, { "epoch": 0.5943089572215752, "grad_norm": 1.9217085552875321, "learning_rate": 3.729932990340069e-07, "loss": 0.1757, "step": 34190 }, { "epoch": 0.594326339759078, "grad_norm": 1.1548275614511565, "learning_rate": 3.7296607317712166e-07, "loss": 0.2115, "step": 34191 }, { "epoch": 0.5943437222965808, "grad_norm": 3.100291099909967, "learning_rate": 3.729388477228757e-07, "loss": 0.7562, "step": 34192 }, { "epoch": 0.5943611048340837, "grad_norm": 0.8504178857356643, "learning_rate": 3.729116226713551e-07, "loss": 0.3207, "step": 34193 }, { "epoch": 0.5943784873715865, "grad_norm": 1.2010777515037363, "learning_rate": 3.728843980226466e-07, "loss": 0.2632, "step": 34194 }, { "epoch": 0.5943958699090893, "grad_norm": 0.9113289645436858, "learning_rate": 3.72857173776836e-07, "loss": 0.1359, "step": 34195 }, { "epoch": 0.5944132524465922, "grad_norm": 1.9459265957651157, "learning_rate": 3.7282994993400974e-07, "loss": 0.2293, "step": 34196 }, { "epoch": 0.594430634984095, "grad_norm": 1.493765075310342, "learning_rate": 3.7280272649425404e-07, "loss": 0.1842, "step": 34197 }, { "epoch": 0.5944480175215978, "grad_norm": 0.8561957832554541, "learning_rate": 3.727755034576553e-07, "loss": 0.1793, "step": 34198 }, { "epoch": 0.5944654000591006, "grad_norm": 1.3717516044925444, "learning_rate": 3.7274828082429975e-07, "loss": 0.2429, "step": 34199 }, { "epoch": 0.5944827825966035, "grad_norm": 1.5636835358060253, "learning_rate": 3.727210585942737e-07, "loss": 0.266, "step": 34200 }, { "epoch": 0.5945001651341063, "grad_norm": 3.833401711098939, "learning_rate": 3.7269383676766366e-07, "loss": 0.2641, "step": 34201 }, { "epoch": 0.5945175476716091, "grad_norm": 1.6661075257764195, "learning_rate": 3.726666153445554e-07, "loss": 0.2908, "step": 34202 }, { "epoch": 0.594534930209112, "grad_norm": 1.9109596168012828, "learning_rate": 3.726393943250356e-07, "loss": 0.2282, "step": 34203 }, { "epoch": 0.5945523127466148, "grad_norm": 1.2405565645500642, "learning_rate": 3.726121737091903e-07, "loss": 0.2293, "step": 34204 }, { "epoch": 0.5945696952841176, "grad_norm": 1.471104215046084, "learning_rate": 3.72584953497106e-07, "loss": 0.2624, "step": 34205 }, { "epoch": 0.5945870778216203, "grad_norm": 2.3160975197857923, "learning_rate": 3.725577336888689e-07, "loss": 0.2271, "step": 34206 }, { "epoch": 0.5946044603591232, "grad_norm": 1.3869255927816178, "learning_rate": 3.725305142845653e-07, "loss": 0.2005, "step": 34207 }, { "epoch": 0.594621842896626, "grad_norm": 2.1002042312323357, "learning_rate": 3.725032952842812e-07, "loss": 0.2057, "step": 34208 }, { "epoch": 0.5946392254341288, "grad_norm": 1.9061057627108364, "learning_rate": 3.724760766881032e-07, "loss": 0.345, "step": 34209 }, { "epoch": 0.5946566079716317, "grad_norm": 1.2456711674990562, "learning_rate": 3.724488584961174e-07, "loss": 0.2067, "step": 34210 }, { "epoch": 0.5946739905091345, "grad_norm": 1.3301839908518518, "learning_rate": 3.724216407084102e-07, "loss": 0.2028, "step": 34211 }, { "epoch": 0.5946913730466373, "grad_norm": 1.9010824847790997, "learning_rate": 3.7239442332506765e-07, "loss": 0.2408, "step": 34212 }, { "epoch": 0.5947087555841402, "grad_norm": 1.3185033208908792, "learning_rate": 3.723672063461765e-07, "loss": 0.2739, "step": 34213 }, { "epoch": 0.594726138121643, "grad_norm": 1.380886487617232, "learning_rate": 3.7233998977182243e-07, "loss": 0.1736, "step": 34214 }, { "epoch": 0.5947435206591458, "grad_norm": 1.236106804897523, "learning_rate": 3.7231277360209196e-07, "loss": 0.2096, "step": 34215 }, { "epoch": 0.5947609031966486, "grad_norm": 1.459491290570216, "learning_rate": 3.722855578370713e-07, "loss": 0.212, "step": 34216 }, { "epoch": 0.5947782857341515, "grad_norm": 1.9922771219194335, "learning_rate": 3.722583424768468e-07, "loss": 0.1931, "step": 34217 }, { "epoch": 0.5947956682716543, "grad_norm": 1.4528340007005054, "learning_rate": 3.7223112752150464e-07, "loss": 0.1727, "step": 34218 }, { "epoch": 0.5948130508091571, "grad_norm": 1.4725039275671172, "learning_rate": 3.722039129711313e-07, "loss": 0.2624, "step": 34219 }, { "epoch": 0.59483043334666, "grad_norm": 2.601751168810973, "learning_rate": 3.721766988258127e-07, "loss": 0.2325, "step": 34220 }, { "epoch": 0.5948478158841628, "grad_norm": 1.4985990479240563, "learning_rate": 3.721494850856353e-07, "loss": 0.6617, "step": 34221 }, { "epoch": 0.5948651984216656, "grad_norm": 0.9272234514847766, "learning_rate": 3.721222717506852e-07, "loss": 0.114, "step": 34222 }, { "epoch": 0.5948825809591685, "grad_norm": 1.7850429208599827, "learning_rate": 3.720950588210488e-07, "loss": 0.1909, "step": 34223 }, { "epoch": 0.5948999634966713, "grad_norm": 1.9474487135968401, "learning_rate": 3.720678462968122e-07, "loss": 0.1755, "step": 34224 }, { "epoch": 0.5949173460341741, "grad_norm": 29.7269318983717, "learning_rate": 3.7204063417806195e-07, "loss": 1.0915, "step": 34225 }, { "epoch": 0.5949347285716768, "grad_norm": 1.2621065650534622, "learning_rate": 3.720134224648842e-07, "loss": 0.1698, "step": 34226 }, { "epoch": 0.5949521111091797, "grad_norm": 0.9886648948801473, "learning_rate": 3.7198621115736486e-07, "loss": 0.1578, "step": 34227 }, { "epoch": 0.5949694936466825, "grad_norm": 1.9506140593658086, "learning_rate": 3.719590002555906e-07, "loss": 0.257, "step": 34228 }, { "epoch": 0.5949868761841853, "grad_norm": 1.929379894527931, "learning_rate": 3.7193178975964736e-07, "loss": 0.3262, "step": 34229 }, { "epoch": 0.5950042587216882, "grad_norm": 1.3357752081609207, "learning_rate": 3.719045796696215e-07, "loss": 0.2242, "step": 34230 }, { "epoch": 0.595021641259191, "grad_norm": 0.964772458317345, "learning_rate": 3.718773699855994e-07, "loss": 0.1999, "step": 34231 }, { "epoch": 0.5950390237966938, "grad_norm": 1.7396794155023887, "learning_rate": 3.718501607076672e-07, "loss": 0.2237, "step": 34232 }, { "epoch": 0.5950564063341967, "grad_norm": 1.9683618762239479, "learning_rate": 3.71822951835911e-07, "loss": 0.228, "step": 34233 }, { "epoch": 0.5950737888716995, "grad_norm": 2.1760785199411448, "learning_rate": 3.7179574337041725e-07, "loss": 0.1796, "step": 34234 }, { "epoch": 0.5950911714092023, "grad_norm": 1.3673829616522932, "learning_rate": 3.7176853531127194e-07, "loss": 0.1479, "step": 34235 }, { "epoch": 0.5951085539467051, "grad_norm": 1.160496872119297, "learning_rate": 3.7174132765856167e-07, "loss": 0.1353, "step": 34236 }, { "epoch": 0.595125936484208, "grad_norm": 1.0500234985723298, "learning_rate": 3.717141204123724e-07, "loss": 0.2506, "step": 34237 }, { "epoch": 0.5951433190217108, "grad_norm": 1.483749908801198, "learning_rate": 3.7168691357279056e-07, "loss": 0.2716, "step": 34238 }, { "epoch": 0.5951607015592136, "grad_norm": 1.386224150841144, "learning_rate": 3.716597071399021e-07, "loss": 0.2455, "step": 34239 }, { "epoch": 0.5951780840967165, "grad_norm": 1.5198577158820146, "learning_rate": 3.7163250111379344e-07, "loss": 0.2645, "step": 34240 }, { "epoch": 0.5951954666342193, "grad_norm": 1.4641198289554833, "learning_rate": 3.716052954945508e-07, "loss": 0.2774, "step": 34241 }, { "epoch": 0.5952128491717221, "grad_norm": 2.0903714313786277, "learning_rate": 3.715780902822604e-07, "loss": 0.2309, "step": 34242 }, { "epoch": 0.595230231709225, "grad_norm": 1.6291993631914332, "learning_rate": 3.715508854770083e-07, "loss": 0.2789, "step": 34243 }, { "epoch": 0.5952476142467278, "grad_norm": 1.2424838667748048, "learning_rate": 3.715236810788811e-07, "loss": 0.1661, "step": 34244 }, { "epoch": 0.5952649967842306, "grad_norm": 3.7662242147424703, "learning_rate": 3.714964770879649e-07, "loss": 0.2901, "step": 34245 }, { "epoch": 0.5952823793217333, "grad_norm": 1.8703267937978572, "learning_rate": 3.714692735043457e-07, "loss": 0.3368, "step": 34246 }, { "epoch": 0.5952997618592362, "grad_norm": 2.1070482920229012, "learning_rate": 3.714420703281098e-07, "loss": 0.3365, "step": 34247 }, { "epoch": 0.595317144396739, "grad_norm": 1.1577845228200696, "learning_rate": 3.7141486755934354e-07, "loss": 0.1055, "step": 34248 }, { "epoch": 0.5953345269342418, "grad_norm": 1.5036829940764156, "learning_rate": 3.71387665198133e-07, "loss": 0.1973, "step": 34249 }, { "epoch": 0.5953519094717447, "grad_norm": 1.4277907709782987, "learning_rate": 3.713604632445646e-07, "loss": 0.1891, "step": 34250 }, { "epoch": 0.5953692920092475, "grad_norm": 2.2311774766066383, "learning_rate": 3.7133326169872447e-07, "loss": 0.2644, "step": 34251 }, { "epoch": 0.5953866745467503, "grad_norm": 2.614822470260366, "learning_rate": 3.713060605606987e-07, "loss": 0.235, "step": 34252 }, { "epoch": 0.5954040570842531, "grad_norm": 1.4758809168002154, "learning_rate": 3.7127885983057363e-07, "loss": 0.1826, "step": 34253 }, { "epoch": 0.595421439621756, "grad_norm": 1.2809046084791282, "learning_rate": 3.712516595084355e-07, "loss": 0.282, "step": 34254 }, { "epoch": 0.5954388221592588, "grad_norm": 1.4721301086252037, "learning_rate": 3.712244595943703e-07, "loss": 0.3472, "step": 34255 }, { "epoch": 0.5954562046967616, "grad_norm": 1.920543415229086, "learning_rate": 3.7119726008846455e-07, "loss": 0.1528, "step": 34256 }, { "epoch": 0.5954735872342645, "grad_norm": 1.636974464888713, "learning_rate": 3.711700609908044e-07, "loss": 0.1426, "step": 34257 }, { "epoch": 0.5954909697717673, "grad_norm": 2.0815663888075875, "learning_rate": 3.711428623014758e-07, "loss": 0.2181, "step": 34258 }, { "epoch": 0.5955083523092701, "grad_norm": 2.311651318427331, "learning_rate": 3.711156640205652e-07, "loss": 0.2503, "step": 34259 }, { "epoch": 0.595525734846773, "grad_norm": 1.2788936782459621, "learning_rate": 3.710884661481587e-07, "loss": 0.31, "step": 34260 }, { "epoch": 0.5955431173842758, "grad_norm": 1.2450082874224357, "learning_rate": 3.7106126868434257e-07, "loss": 0.2081, "step": 34261 }, { "epoch": 0.5955604999217786, "grad_norm": 2.838366076822268, "learning_rate": 3.71034071629203e-07, "loss": 0.2852, "step": 34262 }, { "epoch": 0.5955778824592814, "grad_norm": 1.5975719591871524, "learning_rate": 3.710068749828261e-07, "loss": 0.1803, "step": 34263 }, { "epoch": 0.5955952649967843, "grad_norm": 1.220878010143347, "learning_rate": 3.709796787452984e-07, "loss": 0.2214, "step": 34264 }, { "epoch": 0.5956126475342871, "grad_norm": 3.4945354029084443, "learning_rate": 3.709524829167057e-07, "loss": 0.3841, "step": 34265 }, { "epoch": 0.5956300300717898, "grad_norm": 0.9220586356364869, "learning_rate": 3.7092528749713423e-07, "loss": 0.1649, "step": 34266 }, { "epoch": 0.5956474126092927, "grad_norm": 1.1812290892812003, "learning_rate": 3.708980924866705e-07, "loss": 0.1778, "step": 34267 }, { "epoch": 0.5956647951467955, "grad_norm": 2.2603536117547347, "learning_rate": 3.708708978854003e-07, "loss": 0.2784, "step": 34268 }, { "epoch": 0.5956821776842983, "grad_norm": 2.5844379194796194, "learning_rate": 3.708437036934102e-07, "loss": 0.1947, "step": 34269 }, { "epoch": 0.5956995602218011, "grad_norm": 1.2502755323132024, "learning_rate": 3.7081650991078633e-07, "loss": 0.241, "step": 34270 }, { "epoch": 0.595716942759304, "grad_norm": 1.3152710743507707, "learning_rate": 3.707893165376147e-07, "loss": 0.2322, "step": 34271 }, { "epoch": 0.5957343252968068, "grad_norm": 1.4701808887040955, "learning_rate": 3.707621235739814e-07, "loss": 0.1706, "step": 34272 }, { "epoch": 0.5957517078343096, "grad_norm": 1.1878055868309987, "learning_rate": 3.7073493101997296e-07, "loss": 0.2233, "step": 34273 }, { "epoch": 0.5957690903718125, "grad_norm": 1.6456704837171106, "learning_rate": 3.707077388756753e-07, "loss": 0.1608, "step": 34274 }, { "epoch": 0.5957864729093153, "grad_norm": 2.012900722223788, "learning_rate": 3.7068054714117483e-07, "loss": 0.2141, "step": 34275 }, { "epoch": 0.5958038554468181, "grad_norm": 1.2408253166997876, "learning_rate": 3.7065335581655765e-07, "loss": 0.1399, "step": 34276 }, { "epoch": 0.595821237984321, "grad_norm": 1.726868203873225, "learning_rate": 3.706261649019099e-07, "loss": 0.1753, "step": 34277 }, { "epoch": 0.5958386205218238, "grad_norm": 2.2962780767125373, "learning_rate": 3.705989743973176e-07, "loss": 0.2766, "step": 34278 }, { "epoch": 0.5958560030593266, "grad_norm": 1.0997800969333893, "learning_rate": 3.705717843028673e-07, "loss": 0.1679, "step": 34279 }, { "epoch": 0.5958733855968295, "grad_norm": 1.7020021487877635, "learning_rate": 3.7054459461864483e-07, "loss": 0.1894, "step": 34280 }, { "epoch": 0.5958907681343323, "grad_norm": 1.3363188176306326, "learning_rate": 3.705174053447366e-07, "loss": 0.1798, "step": 34281 }, { "epoch": 0.5959081506718351, "grad_norm": 1.5824700558875926, "learning_rate": 3.704902164812287e-07, "loss": 0.4255, "step": 34282 }, { "epoch": 0.5959255332093379, "grad_norm": 1.431259640901047, "learning_rate": 3.704630280282075e-07, "loss": 0.3662, "step": 34283 }, { "epoch": 0.5959429157468408, "grad_norm": 1.071060483133224, "learning_rate": 3.704358399857589e-07, "loss": 0.2142, "step": 34284 }, { "epoch": 0.5959602982843436, "grad_norm": 2.415303293026944, "learning_rate": 3.704086523539691e-07, "loss": 0.3162, "step": 34285 }, { "epoch": 0.5959776808218463, "grad_norm": 2.645867125796286, "learning_rate": 3.7038146513292433e-07, "loss": 0.2679, "step": 34286 }, { "epoch": 0.5959950633593492, "grad_norm": 1.7187725181779654, "learning_rate": 3.703542783227108e-07, "loss": 0.2166, "step": 34287 }, { "epoch": 0.596012445896852, "grad_norm": 14.361683156898284, "learning_rate": 3.703270919234146e-07, "loss": 0.4713, "step": 34288 }, { "epoch": 0.5960298284343548, "grad_norm": 3.357960302941285, "learning_rate": 3.7029990593512215e-07, "loss": 0.4883, "step": 34289 }, { "epoch": 0.5960472109718576, "grad_norm": 1.6869441135712948, "learning_rate": 3.7027272035791934e-07, "loss": 0.1918, "step": 34290 }, { "epoch": 0.5960645935093605, "grad_norm": 2.772821309743074, "learning_rate": 3.7024553519189227e-07, "loss": 0.2861, "step": 34291 }, { "epoch": 0.5960819760468633, "grad_norm": 1.2256397902467313, "learning_rate": 3.702183504371273e-07, "loss": 0.2684, "step": 34292 }, { "epoch": 0.5960993585843661, "grad_norm": 1.0764331131020408, "learning_rate": 3.7019116609371054e-07, "loss": 0.2272, "step": 34293 }, { "epoch": 0.596116741121869, "grad_norm": 1.5603116152212144, "learning_rate": 3.701639821617282e-07, "loss": 0.1577, "step": 34294 }, { "epoch": 0.5961341236593718, "grad_norm": 1.7583178449361736, "learning_rate": 3.7013679864126655e-07, "loss": 0.1921, "step": 34295 }, { "epoch": 0.5961515061968746, "grad_norm": 1.994481780431792, "learning_rate": 3.701096155324115e-07, "loss": 0.1832, "step": 34296 }, { "epoch": 0.5961688887343775, "grad_norm": 2.0537866524432578, "learning_rate": 3.700824328352491e-07, "loss": 0.2388, "step": 34297 }, { "epoch": 0.5961862712718803, "grad_norm": 1.5625918173951892, "learning_rate": 3.700552505498659e-07, "loss": 0.1911, "step": 34298 }, { "epoch": 0.5962036538093831, "grad_norm": 2.003840258519464, "learning_rate": 3.7002806867634773e-07, "loss": 0.1978, "step": 34299 }, { "epoch": 0.596221036346886, "grad_norm": 1.3571562783593658, "learning_rate": 3.7000088721478097e-07, "loss": 0.1452, "step": 34300 }, { "epoch": 0.5962384188843888, "grad_norm": 1.463577091023636, "learning_rate": 3.699737061652516e-07, "loss": 0.2238, "step": 34301 }, { "epoch": 0.5962558014218916, "grad_norm": 1.5578561892669598, "learning_rate": 3.699465255278461e-07, "loss": 0.1785, "step": 34302 }, { "epoch": 0.5962731839593944, "grad_norm": 1.8546599403044295, "learning_rate": 3.6991934530265014e-07, "loss": 0.2932, "step": 34303 }, { "epoch": 0.5962905664968973, "grad_norm": 2.281077133657588, "learning_rate": 3.698921654897502e-07, "loss": 0.2399, "step": 34304 }, { "epoch": 0.5963079490344, "grad_norm": 1.4718869189666208, "learning_rate": 3.698649860892322e-07, "loss": 0.2725, "step": 34305 }, { "epoch": 0.5963253315719028, "grad_norm": 1.852191010475757, "learning_rate": 3.698378071011825e-07, "loss": 0.335, "step": 34306 }, { "epoch": 0.5963427141094056, "grad_norm": 1.4166666561330143, "learning_rate": 3.698106285256871e-07, "loss": 0.1675, "step": 34307 }, { "epoch": 0.5963600966469085, "grad_norm": 1.8890870462306517, "learning_rate": 3.697834503628324e-07, "loss": 0.2368, "step": 34308 }, { "epoch": 0.5963774791844113, "grad_norm": 1.5361208270792928, "learning_rate": 3.697562726127042e-07, "loss": 0.3049, "step": 34309 }, { "epoch": 0.5963948617219141, "grad_norm": 1.3406177036314166, "learning_rate": 3.697290952753888e-07, "loss": 0.2318, "step": 34310 }, { "epoch": 0.596412244259417, "grad_norm": 1.4800194387641925, "learning_rate": 3.6970191835097224e-07, "loss": 0.1909, "step": 34311 }, { "epoch": 0.5964296267969198, "grad_norm": 0.9903851831235787, "learning_rate": 3.6967474183954083e-07, "loss": 0.2316, "step": 34312 }, { "epoch": 0.5964470093344226, "grad_norm": 5.07605044310889, "learning_rate": 3.696475657411806e-07, "loss": 0.4211, "step": 34313 }, { "epoch": 0.5964643918719255, "grad_norm": 1.0744525281472532, "learning_rate": 3.6962039005597785e-07, "loss": 0.2842, "step": 34314 }, { "epoch": 0.5964817744094283, "grad_norm": 1.895020154325699, "learning_rate": 3.6959321478401844e-07, "loss": 0.1876, "step": 34315 }, { "epoch": 0.5964991569469311, "grad_norm": 1.4266263030208977, "learning_rate": 3.695660399253886e-07, "loss": 0.2453, "step": 34316 }, { "epoch": 0.596516539484434, "grad_norm": 2.288909145695564, "learning_rate": 3.6953886548017453e-07, "loss": 0.3066, "step": 34317 }, { "epoch": 0.5965339220219368, "grad_norm": 3.554939399250716, "learning_rate": 3.695116914484624e-07, "loss": 0.4747, "step": 34318 }, { "epoch": 0.5965513045594396, "grad_norm": 1.7765112441666067, "learning_rate": 3.6948451783033806e-07, "loss": 0.3256, "step": 34319 }, { "epoch": 0.5965686870969424, "grad_norm": 1.3139254871334898, "learning_rate": 3.694573446258881e-07, "loss": 0.246, "step": 34320 }, { "epoch": 0.5965860696344453, "grad_norm": 2.2561793227368487, "learning_rate": 3.694301718351983e-07, "loss": 0.266, "step": 34321 }, { "epoch": 0.5966034521719481, "grad_norm": 1.7650915591142629, "learning_rate": 3.6940299945835487e-07, "loss": 0.2559, "step": 34322 }, { "epoch": 0.5966208347094509, "grad_norm": 1.8251290063686516, "learning_rate": 3.6937582749544396e-07, "loss": 0.2187, "step": 34323 }, { "epoch": 0.5966382172469538, "grad_norm": 1.3442926533056616, "learning_rate": 3.6934865594655154e-07, "loss": 0.2248, "step": 34324 }, { "epoch": 0.5966555997844565, "grad_norm": 1.4751560532016559, "learning_rate": 3.6932148481176394e-07, "loss": 0.2752, "step": 34325 }, { "epoch": 0.5966729823219593, "grad_norm": 1.843553917905159, "learning_rate": 3.692943140911673e-07, "loss": 0.2096, "step": 34326 }, { "epoch": 0.5966903648594621, "grad_norm": 1.554298754931871, "learning_rate": 3.692671437848477e-07, "loss": 0.236, "step": 34327 }, { "epoch": 0.596707747396965, "grad_norm": 0.9592747297598352, "learning_rate": 3.69239973892891e-07, "loss": 0.1676, "step": 34328 }, { "epoch": 0.5967251299344678, "grad_norm": 1.4028610326063766, "learning_rate": 3.6921280441538354e-07, "loss": 0.1549, "step": 34329 }, { "epoch": 0.5967425124719706, "grad_norm": 1.139486026036967, "learning_rate": 3.691856353524114e-07, "loss": 0.1783, "step": 34330 }, { "epoch": 0.5967598950094735, "grad_norm": 2.003143154972403, "learning_rate": 3.691584667040608e-07, "loss": 0.3849, "step": 34331 }, { "epoch": 0.5967772775469763, "grad_norm": 0.981303898356648, "learning_rate": 3.6913129847041756e-07, "loss": 0.094, "step": 34332 }, { "epoch": 0.5967946600844791, "grad_norm": 1.322284643890979, "learning_rate": 3.6910413065156844e-07, "loss": 0.172, "step": 34333 }, { "epoch": 0.596812042621982, "grad_norm": 2.5916546209077223, "learning_rate": 3.6907696324759876e-07, "loss": 0.3993, "step": 34334 }, { "epoch": 0.5968294251594848, "grad_norm": 2.7944291562519012, "learning_rate": 3.69049796258595e-07, "loss": 0.191, "step": 34335 }, { "epoch": 0.5968468076969876, "grad_norm": 1.735084993052564, "learning_rate": 3.6902262968464314e-07, "loss": 0.2762, "step": 34336 }, { "epoch": 0.5968641902344904, "grad_norm": 3.340665502664704, "learning_rate": 3.6899546352582957e-07, "loss": 0.2029, "step": 34337 }, { "epoch": 0.5968815727719933, "grad_norm": 1.3722634002089118, "learning_rate": 3.6896829778223997e-07, "loss": 0.2326, "step": 34338 }, { "epoch": 0.5968989553094961, "grad_norm": 1.3639542350419955, "learning_rate": 3.6894113245396095e-07, "loss": 0.1907, "step": 34339 }, { "epoch": 0.5969163378469989, "grad_norm": 2.3758420722869404, "learning_rate": 3.689139675410782e-07, "loss": 0.2315, "step": 34340 }, { "epoch": 0.5969337203845018, "grad_norm": 1.9528782800435829, "learning_rate": 3.68886803043678e-07, "loss": 0.3031, "step": 34341 }, { "epoch": 0.5969511029220046, "grad_norm": 1.3557552314547765, "learning_rate": 3.688596389618463e-07, "loss": 0.3013, "step": 34342 }, { "epoch": 0.5969684854595074, "grad_norm": 2.5777766939160975, "learning_rate": 3.6883247529566943e-07, "loss": 0.2791, "step": 34343 }, { "epoch": 0.5969858679970103, "grad_norm": 1.4408431307562266, "learning_rate": 3.6880531204523317e-07, "loss": 0.2069, "step": 34344 }, { "epoch": 0.597003250534513, "grad_norm": 1.487188400962444, "learning_rate": 3.68778149210624e-07, "loss": 0.2886, "step": 34345 }, { "epoch": 0.5970206330720158, "grad_norm": 1.4421437308658591, "learning_rate": 3.6875098679192784e-07, "loss": 0.2471, "step": 34346 }, { "epoch": 0.5970380156095186, "grad_norm": 1.263499642640653, "learning_rate": 3.687238247892306e-07, "loss": 0.2058, "step": 34347 }, { "epoch": 0.5970553981470215, "grad_norm": 1.9766808249503758, "learning_rate": 3.686966632026186e-07, "loss": 0.2423, "step": 34348 }, { "epoch": 0.5970727806845243, "grad_norm": 1.326422197033711, "learning_rate": 3.6866950203217785e-07, "loss": 0.1945, "step": 34349 }, { "epoch": 0.5970901632220271, "grad_norm": 1.3382816336473276, "learning_rate": 3.686423412779945e-07, "loss": 0.2709, "step": 34350 }, { "epoch": 0.59710754575953, "grad_norm": 1.3155701020782107, "learning_rate": 3.6861518094015455e-07, "loss": 0.3076, "step": 34351 }, { "epoch": 0.5971249282970328, "grad_norm": 2.632445279817819, "learning_rate": 3.6858802101874427e-07, "loss": 0.314, "step": 34352 }, { "epoch": 0.5971423108345356, "grad_norm": 2.004988696083788, "learning_rate": 3.685608615138494e-07, "loss": 0.3054, "step": 34353 }, { "epoch": 0.5971596933720384, "grad_norm": 1.1098746686216314, "learning_rate": 3.6853370242555625e-07, "loss": 0.2453, "step": 34354 }, { "epoch": 0.5971770759095413, "grad_norm": 1.3564445738540256, "learning_rate": 3.6850654375395087e-07, "loss": 0.1963, "step": 34355 }, { "epoch": 0.5971944584470441, "grad_norm": 1.9166644324349147, "learning_rate": 3.6847938549911937e-07, "loss": 0.2901, "step": 34356 }, { "epoch": 0.5972118409845469, "grad_norm": 1.4983165648396428, "learning_rate": 3.684522276611477e-07, "loss": 0.3149, "step": 34357 }, { "epoch": 0.5972292235220498, "grad_norm": 0.943804349847813, "learning_rate": 3.684250702401224e-07, "loss": 0.1515, "step": 34358 }, { "epoch": 0.5972466060595526, "grad_norm": 1.4560593916500741, "learning_rate": 3.6839791323612885e-07, "loss": 0.2247, "step": 34359 }, { "epoch": 0.5972639885970554, "grad_norm": 1.51774328849802, "learning_rate": 3.6837075664925356e-07, "loss": 0.2177, "step": 34360 }, { "epoch": 0.5972813711345583, "grad_norm": 1.8597386677509444, "learning_rate": 3.683436004795824e-07, "loss": 0.3678, "step": 34361 }, { "epoch": 0.5972987536720611, "grad_norm": 1.6429371808014697, "learning_rate": 3.683164447272017e-07, "loss": 0.203, "step": 34362 }, { "epoch": 0.5973161362095639, "grad_norm": 1.7370884433704108, "learning_rate": 3.682892893921972e-07, "loss": 0.1951, "step": 34363 }, { "epoch": 0.5973335187470667, "grad_norm": 1.1752218940573986, "learning_rate": 3.682621344746553e-07, "loss": 0.2988, "step": 34364 }, { "epoch": 0.5973509012845695, "grad_norm": 1.6465428675268785, "learning_rate": 3.6823497997466204e-07, "loss": 0.249, "step": 34365 }, { "epoch": 0.5973682838220723, "grad_norm": 1.9047294272899031, "learning_rate": 3.6820782589230324e-07, "loss": 0.2925, "step": 34366 }, { "epoch": 0.5973856663595751, "grad_norm": 1.330729915141405, "learning_rate": 3.68180672227665e-07, "loss": 0.1717, "step": 34367 }, { "epoch": 0.597403048897078, "grad_norm": 1.4018952617051403, "learning_rate": 3.6815351898083353e-07, "loss": 0.2187, "step": 34368 }, { "epoch": 0.5974204314345808, "grad_norm": 1.3412649885644337, "learning_rate": 3.6812636615189477e-07, "loss": 0.2182, "step": 34369 }, { "epoch": 0.5974378139720836, "grad_norm": 1.6571503593317691, "learning_rate": 3.6809921374093495e-07, "loss": 0.2756, "step": 34370 }, { "epoch": 0.5974551965095865, "grad_norm": 1.3531803286461714, "learning_rate": 3.6807206174804015e-07, "loss": 0.3256, "step": 34371 }, { "epoch": 0.5974725790470893, "grad_norm": 0.8945865800030984, "learning_rate": 3.680449101732961e-07, "loss": 0.2459, "step": 34372 }, { "epoch": 0.5974899615845921, "grad_norm": 1.4155352797012104, "learning_rate": 3.6801775901678915e-07, "loss": 0.1936, "step": 34373 }, { "epoch": 0.5975073441220949, "grad_norm": 1.2714006790092653, "learning_rate": 3.6799060827860527e-07, "loss": 0.1905, "step": 34374 }, { "epoch": 0.5975247266595978, "grad_norm": 1.5256682395698267, "learning_rate": 3.6796345795883046e-07, "loss": 0.4762, "step": 34375 }, { "epoch": 0.5975421091971006, "grad_norm": 1.868712897528536, "learning_rate": 3.679363080575509e-07, "loss": 0.2586, "step": 34376 }, { "epoch": 0.5975594917346034, "grad_norm": 1.5063624343615252, "learning_rate": 3.679091585748527e-07, "loss": 0.2038, "step": 34377 }, { "epoch": 0.5975768742721063, "grad_norm": 1.2686864447821542, "learning_rate": 3.678820095108216e-07, "loss": 0.131, "step": 34378 }, { "epoch": 0.5975942568096091, "grad_norm": 1.7378557831721166, "learning_rate": 3.6785486086554395e-07, "loss": 0.2625, "step": 34379 }, { "epoch": 0.5976116393471119, "grad_norm": 1.8545934565807776, "learning_rate": 3.6782771263910557e-07, "loss": 0.2532, "step": 34380 }, { "epoch": 0.5976290218846148, "grad_norm": 1.261109888894143, "learning_rate": 3.6780056483159273e-07, "loss": 0.1708, "step": 34381 }, { "epoch": 0.5976464044221176, "grad_norm": 1.177112077035013, "learning_rate": 3.677734174430914e-07, "loss": 0.2756, "step": 34382 }, { "epoch": 0.5976637869596204, "grad_norm": 1.0583812525131335, "learning_rate": 3.6774627047368747e-07, "loss": 0.2417, "step": 34383 }, { "epoch": 0.5976811694971232, "grad_norm": 1.5182466907339465, "learning_rate": 3.6771912392346725e-07, "loss": 0.2808, "step": 34384 }, { "epoch": 0.597698552034626, "grad_norm": 2.770471775887493, "learning_rate": 3.676919777925166e-07, "loss": 0.171, "step": 34385 }, { "epoch": 0.5977159345721288, "grad_norm": 0.6721088239786214, "learning_rate": 3.6766483208092145e-07, "loss": 0.1652, "step": 34386 }, { "epoch": 0.5977333171096316, "grad_norm": 1.4269653570252616, "learning_rate": 3.6763768678876814e-07, "loss": 0.1826, "step": 34387 }, { "epoch": 0.5977506996471345, "grad_norm": 3.0806352387278224, "learning_rate": 3.6761054191614247e-07, "loss": 0.2519, "step": 34388 }, { "epoch": 0.5977680821846373, "grad_norm": 1.448885850131504, "learning_rate": 3.675833974631307e-07, "loss": 0.2417, "step": 34389 }, { "epoch": 0.5977854647221401, "grad_norm": 2.233777468085662, "learning_rate": 3.6755625342981877e-07, "loss": 0.2103, "step": 34390 }, { "epoch": 0.5978028472596429, "grad_norm": 1.719090362374321, "learning_rate": 3.675291098162926e-07, "loss": 0.225, "step": 34391 }, { "epoch": 0.5978202297971458, "grad_norm": 2.1784113961386122, "learning_rate": 3.675019666226382e-07, "loss": 0.2655, "step": 34392 }, { "epoch": 0.5978376123346486, "grad_norm": 1.8549758741936093, "learning_rate": 3.674748238489418e-07, "loss": 0.3091, "step": 34393 }, { "epoch": 0.5978549948721514, "grad_norm": 1.7593683083693237, "learning_rate": 3.6744768149528926e-07, "loss": 0.2303, "step": 34394 }, { "epoch": 0.5978723774096543, "grad_norm": 2.00319682629994, "learning_rate": 3.6742053956176674e-07, "loss": 0.1907, "step": 34395 }, { "epoch": 0.5978897599471571, "grad_norm": 0.9358107365970447, "learning_rate": 3.673933980484603e-07, "loss": 0.2893, "step": 34396 }, { "epoch": 0.5979071424846599, "grad_norm": 1.1333550075097414, "learning_rate": 3.673662569554557e-07, "loss": 0.2256, "step": 34397 }, { "epoch": 0.5979245250221628, "grad_norm": 1.1169078941010802, "learning_rate": 3.673391162828392e-07, "loss": 0.2936, "step": 34398 }, { "epoch": 0.5979419075596656, "grad_norm": 1.2888643413658993, "learning_rate": 3.6731197603069684e-07, "loss": 0.3431, "step": 34399 }, { "epoch": 0.5979592900971684, "grad_norm": 1.5719189477143731, "learning_rate": 3.6728483619911443e-07, "loss": 0.3104, "step": 34400 }, { "epoch": 0.5979766726346712, "grad_norm": 1.3754603102154421, "learning_rate": 3.672576967881782e-07, "loss": 0.2306, "step": 34401 }, { "epoch": 0.5979940551721741, "grad_norm": 1.9876409786875546, "learning_rate": 3.6723055779797396e-07, "loss": 0.3014, "step": 34402 }, { "epoch": 0.5980114377096769, "grad_norm": 1.3306271524905866, "learning_rate": 3.672034192285881e-07, "loss": 0.2672, "step": 34403 }, { "epoch": 0.5980288202471797, "grad_norm": 2.4592093678687887, "learning_rate": 3.6717628108010633e-07, "loss": 0.2144, "step": 34404 }, { "epoch": 0.5980462027846825, "grad_norm": 2.3646006255982077, "learning_rate": 3.671491433526146e-07, "loss": 0.174, "step": 34405 }, { "epoch": 0.5980635853221853, "grad_norm": 1.9134887201662332, "learning_rate": 3.671220060461992e-07, "loss": 0.2113, "step": 34406 }, { "epoch": 0.5980809678596881, "grad_norm": 3.4828345164517294, "learning_rate": 3.67094869160946e-07, "loss": 0.2931, "step": 34407 }, { "epoch": 0.598098350397191, "grad_norm": 2.0295096482594293, "learning_rate": 3.670677326969409e-07, "loss": 0.2139, "step": 34408 }, { "epoch": 0.5981157329346938, "grad_norm": 1.6829453108580699, "learning_rate": 3.670405966542702e-07, "loss": 0.1911, "step": 34409 }, { "epoch": 0.5981331154721966, "grad_norm": 1.1270368123415833, "learning_rate": 3.6701346103301963e-07, "loss": 0.1764, "step": 34410 }, { "epoch": 0.5981504980096994, "grad_norm": 2.079128671417825, "learning_rate": 3.6698632583327516e-07, "loss": 0.2412, "step": 34411 }, { "epoch": 0.5981678805472023, "grad_norm": 1.280703684195614, "learning_rate": 3.669591910551231e-07, "loss": 0.1588, "step": 34412 }, { "epoch": 0.5981852630847051, "grad_norm": 2.371424043530874, "learning_rate": 3.669320566986492e-07, "loss": 0.2615, "step": 34413 }, { "epoch": 0.5982026456222079, "grad_norm": 1.6522998445154584, "learning_rate": 3.669049227639396e-07, "loss": 0.1901, "step": 34414 }, { "epoch": 0.5982200281597108, "grad_norm": 2.636036169154065, "learning_rate": 3.6687778925108035e-07, "loss": 0.2593, "step": 34415 }, { "epoch": 0.5982374106972136, "grad_norm": 1.4169835302769507, "learning_rate": 3.6685065616015734e-07, "loss": 0.2272, "step": 34416 }, { "epoch": 0.5982547932347164, "grad_norm": 2.0164175179167163, "learning_rate": 3.6682352349125647e-07, "loss": 0.1979, "step": 34417 }, { "epoch": 0.5982721757722193, "grad_norm": 3.0999092100429753, "learning_rate": 3.6679639124446394e-07, "loss": 0.3791, "step": 34418 }, { "epoch": 0.5982895583097221, "grad_norm": 1.140363800543789, "learning_rate": 3.6676925941986545e-07, "loss": 0.1805, "step": 34419 }, { "epoch": 0.5983069408472249, "grad_norm": 1.218783874803937, "learning_rate": 3.6674212801754746e-07, "loss": 0.1716, "step": 34420 }, { "epoch": 0.5983243233847277, "grad_norm": 3.1360102468278592, "learning_rate": 3.6671499703759556e-07, "loss": 0.2896, "step": 34421 }, { "epoch": 0.5983417059222306, "grad_norm": 2.31399489943294, "learning_rate": 3.666878664800962e-07, "loss": 0.2286, "step": 34422 }, { "epoch": 0.5983590884597334, "grad_norm": 1.9261523719613762, "learning_rate": 3.6666073634513474e-07, "loss": 0.1888, "step": 34423 }, { "epoch": 0.5983764709972362, "grad_norm": 1.142235094826996, "learning_rate": 3.6663360663279756e-07, "loss": 0.1292, "step": 34424 }, { "epoch": 0.598393853534739, "grad_norm": 2.3052666246839117, "learning_rate": 3.6660647734317053e-07, "loss": 0.2616, "step": 34425 }, { "epoch": 0.5984112360722418, "grad_norm": 2.083989958890466, "learning_rate": 3.665793484763398e-07, "loss": 0.205, "step": 34426 }, { "epoch": 0.5984286186097446, "grad_norm": 2.0673658312676793, "learning_rate": 3.665522200323912e-07, "loss": 0.261, "step": 34427 }, { "epoch": 0.5984460011472474, "grad_norm": 1.2790648859319718, "learning_rate": 3.6652509201141083e-07, "loss": 0.2514, "step": 34428 }, { "epoch": 0.5984633836847503, "grad_norm": 1.169049781203435, "learning_rate": 3.6649796441348457e-07, "loss": 0.2461, "step": 34429 }, { "epoch": 0.5984807662222531, "grad_norm": 2.139679139341191, "learning_rate": 3.6647083723869843e-07, "loss": 0.4348, "step": 34430 }, { "epoch": 0.5984981487597559, "grad_norm": 1.7029303204261965, "learning_rate": 3.664437104871383e-07, "loss": 0.2102, "step": 34431 }, { "epoch": 0.5985155312972588, "grad_norm": 2.627237530827282, "learning_rate": 3.6641658415889043e-07, "loss": 0.2836, "step": 34432 }, { "epoch": 0.5985329138347616, "grad_norm": 1.6278794673837649, "learning_rate": 3.6638945825404046e-07, "loss": 0.2646, "step": 34433 }, { "epoch": 0.5985502963722644, "grad_norm": 1.3700286590895854, "learning_rate": 3.6636233277267467e-07, "loss": 0.168, "step": 34434 }, { "epoch": 0.5985676789097673, "grad_norm": 2.562550110110229, "learning_rate": 3.6633520771487893e-07, "loss": 0.1525, "step": 34435 }, { "epoch": 0.5985850614472701, "grad_norm": 2.0510210541546656, "learning_rate": 3.6630808308073895e-07, "loss": 0.3128, "step": 34436 }, { "epoch": 0.5986024439847729, "grad_norm": 2.3059858752613493, "learning_rate": 3.662809588703411e-07, "loss": 0.2908, "step": 34437 }, { "epoch": 0.5986198265222757, "grad_norm": 1.853562405807734, "learning_rate": 3.6625383508377106e-07, "loss": 0.2073, "step": 34438 }, { "epoch": 0.5986372090597786, "grad_norm": 2.030967026890146, "learning_rate": 3.662267117211151e-07, "loss": 0.2245, "step": 34439 }, { "epoch": 0.5986545915972814, "grad_norm": 1.99371401705944, "learning_rate": 3.6619958878245894e-07, "loss": 0.3582, "step": 34440 }, { "epoch": 0.5986719741347842, "grad_norm": 1.783106186675834, "learning_rate": 3.6617246626788873e-07, "loss": 0.2062, "step": 34441 }, { "epoch": 0.5986893566722871, "grad_norm": 1.4666329679503343, "learning_rate": 3.661453441774901e-07, "loss": 0.2492, "step": 34442 }, { "epoch": 0.5987067392097899, "grad_norm": 1.0876035440605964, "learning_rate": 3.661182225113494e-07, "loss": 0.122, "step": 34443 }, { "epoch": 0.5987241217472926, "grad_norm": 1.9229396392941618, "learning_rate": 3.660911012695523e-07, "loss": 0.2545, "step": 34444 }, { "epoch": 0.5987415042847954, "grad_norm": 2.9537099850416735, "learning_rate": 3.66063980452185e-07, "loss": 0.1709, "step": 34445 }, { "epoch": 0.5987588868222983, "grad_norm": 2.002209825990552, "learning_rate": 3.6603686005933323e-07, "loss": 0.3371, "step": 34446 }, { "epoch": 0.5987762693598011, "grad_norm": 1.7000873491440196, "learning_rate": 3.6600974009108345e-07, "loss": 0.3166, "step": 34447 }, { "epoch": 0.5987936518973039, "grad_norm": 1.260372297899249, "learning_rate": 3.659826205475209e-07, "loss": 0.1899, "step": 34448 }, { "epoch": 0.5988110344348068, "grad_norm": 1.0234754319590247, "learning_rate": 3.65955501428732e-07, "loss": 0.1204, "step": 34449 }, { "epoch": 0.5988284169723096, "grad_norm": 1.876425601390842, "learning_rate": 3.659283827348024e-07, "loss": 0.2455, "step": 34450 }, { "epoch": 0.5988457995098124, "grad_norm": 1.0869530372164609, "learning_rate": 3.659012644658184e-07, "loss": 0.2591, "step": 34451 }, { "epoch": 0.5988631820473153, "grad_norm": 1.201666411434358, "learning_rate": 3.6587414662186574e-07, "loss": 0.2611, "step": 34452 }, { "epoch": 0.5988805645848181, "grad_norm": 3.9259572894437733, "learning_rate": 3.6584702920303055e-07, "loss": 0.2604, "step": 34453 }, { "epoch": 0.5988979471223209, "grad_norm": 0.9288777341972408, "learning_rate": 3.6581991220939864e-07, "loss": 0.1208, "step": 34454 }, { "epoch": 0.5989153296598237, "grad_norm": 1.0478122882801366, "learning_rate": 3.657927956410559e-07, "loss": 0.1442, "step": 34455 }, { "epoch": 0.5989327121973266, "grad_norm": 1.5894956838780845, "learning_rate": 3.657656794980882e-07, "loss": 0.2153, "step": 34456 }, { "epoch": 0.5989500947348294, "grad_norm": 1.5603675213434005, "learning_rate": 3.657385637805818e-07, "loss": 0.2977, "step": 34457 }, { "epoch": 0.5989674772723322, "grad_norm": 2.5434063218577476, "learning_rate": 3.657114484886224e-07, "loss": 0.3519, "step": 34458 }, { "epoch": 0.5989848598098351, "grad_norm": 1.444746588806684, "learning_rate": 3.6568433362229623e-07, "loss": 0.3036, "step": 34459 }, { "epoch": 0.5990022423473379, "grad_norm": 1.4752713935053685, "learning_rate": 3.656572191816889e-07, "loss": 0.2852, "step": 34460 }, { "epoch": 0.5990196248848407, "grad_norm": 3.0855093589877702, "learning_rate": 3.656301051668863e-07, "loss": 0.2147, "step": 34461 }, { "epoch": 0.5990370074223436, "grad_norm": 1.0523878974246283, "learning_rate": 3.656029915779747e-07, "loss": 0.2259, "step": 34462 }, { "epoch": 0.5990543899598464, "grad_norm": 1.430757822298254, "learning_rate": 3.655758784150399e-07, "loss": 0.2929, "step": 34463 }, { "epoch": 0.5990717724973491, "grad_norm": 1.0289390009987964, "learning_rate": 3.655487656781677e-07, "loss": 0.2639, "step": 34464 }, { "epoch": 0.5990891550348519, "grad_norm": 2.3059090164537217, "learning_rate": 3.6552165336744424e-07, "loss": 0.3072, "step": 34465 }, { "epoch": 0.5991065375723548, "grad_norm": 1.1716263806764011, "learning_rate": 3.654945414829555e-07, "loss": 0.1805, "step": 34466 }, { "epoch": 0.5991239201098576, "grad_norm": 1.1790039093327092, "learning_rate": 3.654674300247871e-07, "loss": 0.2166, "step": 34467 }, { "epoch": 0.5991413026473604, "grad_norm": 1.5226526018877347, "learning_rate": 3.654403189930251e-07, "loss": 0.1415, "step": 34468 }, { "epoch": 0.5991586851848633, "grad_norm": 1.6913036563891746, "learning_rate": 3.654132083877555e-07, "loss": 0.2568, "step": 34469 }, { "epoch": 0.5991760677223661, "grad_norm": 1.5590967637208757, "learning_rate": 3.653860982090643e-07, "loss": 0.3065, "step": 34470 }, { "epoch": 0.5991934502598689, "grad_norm": 2.60258820929274, "learning_rate": 3.653589884570373e-07, "loss": 0.2418, "step": 34471 }, { "epoch": 0.5992108327973718, "grad_norm": 1.4803426632943022, "learning_rate": 3.6533187913176057e-07, "loss": 0.1626, "step": 34472 }, { "epoch": 0.5992282153348746, "grad_norm": 0.7382526088392547, "learning_rate": 3.6530477023331974e-07, "loss": 0.143, "step": 34473 }, { "epoch": 0.5992455978723774, "grad_norm": 1.2742775650920892, "learning_rate": 3.65277661761801e-07, "loss": 0.1534, "step": 34474 }, { "epoch": 0.5992629804098802, "grad_norm": 1.0178134583207534, "learning_rate": 3.6525055371729006e-07, "loss": 0.3552, "step": 34475 }, { "epoch": 0.5992803629473831, "grad_norm": 4.54547518611884, "learning_rate": 3.652234460998731e-07, "loss": 0.1804, "step": 34476 }, { "epoch": 0.5992977454848859, "grad_norm": 0.9382701786629633, "learning_rate": 3.6519633890963574e-07, "loss": 0.1942, "step": 34477 }, { "epoch": 0.5993151280223887, "grad_norm": 1.2395317961994172, "learning_rate": 3.651692321466645e-07, "loss": 0.2992, "step": 34478 }, { "epoch": 0.5993325105598916, "grad_norm": 1.3836880863729941, "learning_rate": 3.651421258110444e-07, "loss": 0.2152, "step": 34479 }, { "epoch": 0.5993498930973944, "grad_norm": 1.9360412699148952, "learning_rate": 3.65115019902862e-07, "loss": 0.2748, "step": 34480 }, { "epoch": 0.5993672756348972, "grad_norm": 1.8104988871848988, "learning_rate": 3.6508791442220294e-07, "loss": 0.2258, "step": 34481 }, { "epoch": 0.5993846581724, "grad_norm": 2.5361933636496836, "learning_rate": 3.650608093691533e-07, "loss": 0.3764, "step": 34482 }, { "epoch": 0.5994020407099029, "grad_norm": 1.4576043935094305, "learning_rate": 3.650337047437988e-07, "loss": 0.2752, "step": 34483 }, { "epoch": 0.5994194232474056, "grad_norm": 1.6311450828290293, "learning_rate": 3.6500660054622557e-07, "loss": 0.3102, "step": 34484 }, { "epoch": 0.5994368057849084, "grad_norm": 1.2611550308844075, "learning_rate": 3.6497949677651947e-07, "loss": 0.2211, "step": 34485 }, { "epoch": 0.5994541883224113, "grad_norm": 1.7769930894595112, "learning_rate": 3.649523934347661e-07, "loss": 0.1913, "step": 34486 }, { "epoch": 0.5994715708599141, "grad_norm": 2.817905458502896, "learning_rate": 3.6492529052105183e-07, "loss": 0.3162, "step": 34487 }, { "epoch": 0.5994889533974169, "grad_norm": 2.3687835890006963, "learning_rate": 3.6489818803546224e-07, "loss": 0.2752, "step": 34488 }, { "epoch": 0.5995063359349198, "grad_norm": 1.7898490764969432, "learning_rate": 3.648710859780833e-07, "loss": 0.2714, "step": 34489 }, { "epoch": 0.5995237184724226, "grad_norm": 1.9140695987748935, "learning_rate": 3.64843984349001e-07, "loss": 0.2784, "step": 34490 }, { "epoch": 0.5995411010099254, "grad_norm": 1.205259343866813, "learning_rate": 3.648168831483013e-07, "loss": 0.3796, "step": 34491 }, { "epoch": 0.5995584835474282, "grad_norm": 1.9618451791927696, "learning_rate": 3.6478978237606985e-07, "loss": 0.265, "step": 34492 }, { "epoch": 0.5995758660849311, "grad_norm": 0.8110621311312404, "learning_rate": 3.647626820323927e-07, "loss": 0.2356, "step": 34493 }, { "epoch": 0.5995932486224339, "grad_norm": 3.0936887045138697, "learning_rate": 3.647355821173556e-07, "loss": 0.2481, "step": 34494 }, { "epoch": 0.5996106311599367, "grad_norm": 1.9991027092653053, "learning_rate": 3.647084826310447e-07, "loss": 0.3292, "step": 34495 }, { "epoch": 0.5996280136974396, "grad_norm": 1.5513322591304362, "learning_rate": 3.6468138357354575e-07, "loss": 0.1323, "step": 34496 }, { "epoch": 0.5996453962349424, "grad_norm": 3.60608834099154, "learning_rate": 3.646542849449448e-07, "loss": 0.2019, "step": 34497 }, { "epoch": 0.5996627787724452, "grad_norm": 1.9480186804512083, "learning_rate": 3.646271867453274e-07, "loss": 0.2424, "step": 34498 }, { "epoch": 0.5996801613099481, "grad_norm": 2.0278896960339376, "learning_rate": 3.646000889747797e-07, "loss": 0.3477, "step": 34499 }, { "epoch": 0.5996975438474509, "grad_norm": 1.538282665741688, "learning_rate": 3.645729916333874e-07, "loss": 0.2864, "step": 34500 }, { "epoch": 0.5997149263849537, "grad_norm": 1.0731525048497745, "learning_rate": 3.6454589472123664e-07, "loss": 0.223, "step": 34501 }, { "epoch": 0.5997323089224565, "grad_norm": 2.135829821591427, "learning_rate": 3.64518798238413e-07, "loss": 0.3599, "step": 34502 }, { "epoch": 0.5997496914599594, "grad_norm": 1.8790187818443906, "learning_rate": 3.644917021850027e-07, "loss": 0.1322, "step": 34503 }, { "epoch": 0.5997670739974621, "grad_norm": 1.2002074441974853, "learning_rate": 3.6446460656109157e-07, "loss": 0.2499, "step": 34504 }, { "epoch": 0.5997844565349649, "grad_norm": 1.5823882912043439, "learning_rate": 3.6443751136676524e-07, "loss": 0.2106, "step": 34505 }, { "epoch": 0.5998018390724678, "grad_norm": 1.304255966509004, "learning_rate": 3.644104166021097e-07, "loss": 0.2164, "step": 34506 }, { "epoch": 0.5998192216099706, "grad_norm": 1.5348560842560135, "learning_rate": 3.643833222672109e-07, "loss": 0.1671, "step": 34507 }, { "epoch": 0.5998366041474734, "grad_norm": 1.1088142106775123, "learning_rate": 3.6435622836215454e-07, "loss": 0.1924, "step": 34508 }, { "epoch": 0.5998539866849762, "grad_norm": 2.184089301704454, "learning_rate": 3.643291348870268e-07, "loss": 0.2894, "step": 34509 }, { "epoch": 0.5998713692224791, "grad_norm": 1.559070488116344, "learning_rate": 3.6430204184191337e-07, "loss": 0.2273, "step": 34510 }, { "epoch": 0.5998887517599819, "grad_norm": 1.3667151691242534, "learning_rate": 3.6427494922690014e-07, "loss": 0.1783, "step": 34511 }, { "epoch": 0.5999061342974847, "grad_norm": 1.061069925199445, "learning_rate": 3.642478570420728e-07, "loss": 0.1998, "step": 34512 }, { "epoch": 0.5999235168349876, "grad_norm": 1.8748419487530934, "learning_rate": 3.6422076528751754e-07, "loss": 0.2751, "step": 34513 }, { "epoch": 0.5999408993724904, "grad_norm": 3.713045120765101, "learning_rate": 3.6419367396332e-07, "loss": 0.1576, "step": 34514 }, { "epoch": 0.5999582819099932, "grad_norm": 2.1490405597386806, "learning_rate": 3.6416658306956613e-07, "loss": 0.3463, "step": 34515 }, { "epoch": 0.5999756644474961, "grad_norm": 3.1081544416626934, "learning_rate": 3.64139492606342e-07, "loss": 0.2622, "step": 34516 }, { "epoch": 0.5999930469849989, "grad_norm": 3.5628657515671738, "learning_rate": 3.6411240257373303e-07, "loss": 0.2863, "step": 34517 }, { "epoch": 0.6000104295225017, "grad_norm": 2.280596898890596, "learning_rate": 3.640853129718254e-07, "loss": 0.1793, "step": 34518 }, { "epoch": 0.6000278120600046, "grad_norm": 1.5355013681405543, "learning_rate": 3.6405822380070496e-07, "loss": 0.2504, "step": 34519 }, { "epoch": 0.6000451945975074, "grad_norm": 1.2537224520700319, "learning_rate": 3.6403113506045735e-07, "loss": 0.3187, "step": 34520 }, { "epoch": 0.6000625771350102, "grad_norm": 1.0656895915974858, "learning_rate": 3.640040467511687e-07, "loss": 0.1356, "step": 34521 }, { "epoch": 0.600079959672513, "grad_norm": 1.631966192262331, "learning_rate": 3.6397695887292466e-07, "loss": 0.1718, "step": 34522 }, { "epoch": 0.6000973422100159, "grad_norm": 3.7238618839436524, "learning_rate": 3.6394987142581133e-07, "loss": 0.2966, "step": 34523 }, { "epoch": 0.6001147247475186, "grad_norm": 2.055286045827599, "learning_rate": 3.639227844099144e-07, "loss": 0.2588, "step": 34524 }, { "epoch": 0.6001321072850214, "grad_norm": 1.180550592228188, "learning_rate": 3.6389569782531955e-07, "loss": 0.2112, "step": 34525 }, { "epoch": 0.6001494898225243, "grad_norm": 3.343998832494091, "learning_rate": 3.638686116721129e-07, "loss": 0.2591, "step": 34526 }, { "epoch": 0.6001668723600271, "grad_norm": 1.2774828240914118, "learning_rate": 3.638415259503802e-07, "loss": 0.1766, "step": 34527 }, { "epoch": 0.6001842548975299, "grad_norm": 1.2520450452895442, "learning_rate": 3.638144406602074e-07, "loss": 0.1632, "step": 34528 }, { "epoch": 0.6002016374350327, "grad_norm": 1.5113867004748642, "learning_rate": 3.637873558016803e-07, "loss": 0.1404, "step": 34529 }, { "epoch": 0.6002190199725356, "grad_norm": 1.7786709493706472, "learning_rate": 3.637602713748847e-07, "loss": 0.2944, "step": 34530 }, { "epoch": 0.6002364025100384, "grad_norm": 1.395630549790347, "learning_rate": 3.637331873799063e-07, "loss": 0.1753, "step": 34531 }, { "epoch": 0.6002537850475412, "grad_norm": 2.2365734470500844, "learning_rate": 3.637061038168312e-07, "loss": 0.2347, "step": 34532 }, { "epoch": 0.6002711675850441, "grad_norm": 1.211387395509577, "learning_rate": 3.636790206857451e-07, "loss": 0.2991, "step": 34533 }, { "epoch": 0.6002885501225469, "grad_norm": 1.8706406880340738, "learning_rate": 3.6365193798673387e-07, "loss": 0.1813, "step": 34534 }, { "epoch": 0.6003059326600497, "grad_norm": 1.5544959685015085, "learning_rate": 3.6362485571988357e-07, "loss": 0.3075, "step": 34535 }, { "epoch": 0.6003233151975526, "grad_norm": 1.2403414516913078, "learning_rate": 3.635977738852797e-07, "loss": 0.2622, "step": 34536 }, { "epoch": 0.6003406977350554, "grad_norm": 2.160649000521162, "learning_rate": 3.635706924830081e-07, "loss": 0.2803, "step": 34537 }, { "epoch": 0.6003580802725582, "grad_norm": 1.5017482127199435, "learning_rate": 3.635436115131548e-07, "loss": 0.1496, "step": 34538 }, { "epoch": 0.600375462810061, "grad_norm": 1.6916472632711463, "learning_rate": 3.635165309758056e-07, "loss": 0.313, "step": 34539 }, { "epoch": 0.6003928453475639, "grad_norm": 1.4535593490970404, "learning_rate": 3.6348945087104633e-07, "loss": 0.1739, "step": 34540 }, { "epoch": 0.6004102278850667, "grad_norm": 1.2956124438980896, "learning_rate": 3.634623711989626e-07, "loss": 0.2577, "step": 34541 }, { "epoch": 0.6004276104225695, "grad_norm": 1.5568577376181354, "learning_rate": 3.634352919596408e-07, "loss": 0.2263, "step": 34542 }, { "epoch": 0.6004449929600724, "grad_norm": 1.961066177398081, "learning_rate": 3.634082131531662e-07, "loss": 0.1827, "step": 34543 }, { "epoch": 0.6004623754975751, "grad_norm": 0.9193676643936833, "learning_rate": 3.6338113477962485e-07, "loss": 0.2126, "step": 34544 }, { "epoch": 0.6004797580350779, "grad_norm": 1.409249533715118, "learning_rate": 3.6335405683910237e-07, "loss": 0.2721, "step": 34545 }, { "epoch": 0.6004971405725807, "grad_norm": 1.8870872484802348, "learning_rate": 3.6332697933168493e-07, "loss": 0.2418, "step": 34546 }, { "epoch": 0.6005145231100836, "grad_norm": 3.3061721905017047, "learning_rate": 3.632999022574581e-07, "loss": 0.4943, "step": 34547 }, { "epoch": 0.6005319056475864, "grad_norm": 1.640967467281325, "learning_rate": 3.63272825616508e-07, "loss": 0.2196, "step": 34548 }, { "epoch": 0.6005492881850892, "grad_norm": 1.1878220536581305, "learning_rate": 3.6324574940892004e-07, "loss": 0.2954, "step": 34549 }, { "epoch": 0.6005666707225921, "grad_norm": 2.185439062082266, "learning_rate": 3.632186736347802e-07, "loss": 0.2551, "step": 34550 }, { "epoch": 0.6005840532600949, "grad_norm": 29.749810367619475, "learning_rate": 3.6319159829417437e-07, "loss": 0.4346, "step": 34551 }, { "epoch": 0.6006014357975977, "grad_norm": 1.2469207707709613, "learning_rate": 3.6316452338718837e-07, "loss": 0.2394, "step": 34552 }, { "epoch": 0.6006188183351006, "grad_norm": 2.434854957974404, "learning_rate": 3.631374489139079e-07, "loss": 0.2916, "step": 34553 }, { "epoch": 0.6006362008726034, "grad_norm": 1.1129376359961285, "learning_rate": 3.63110374874419e-07, "loss": 0.2224, "step": 34554 }, { "epoch": 0.6006535834101062, "grad_norm": 0.9971725303394805, "learning_rate": 3.630833012688073e-07, "loss": 0.1909, "step": 34555 }, { "epoch": 0.600670965947609, "grad_norm": 1.7587385241056677, "learning_rate": 3.6305622809715854e-07, "loss": 0.2679, "step": 34556 }, { "epoch": 0.6006883484851119, "grad_norm": 4.2384156142938885, "learning_rate": 3.6302915535955865e-07, "loss": 0.2034, "step": 34557 }, { "epoch": 0.6007057310226147, "grad_norm": 1.4758784969893863, "learning_rate": 3.630020830560934e-07, "loss": 0.2251, "step": 34558 }, { "epoch": 0.6007231135601175, "grad_norm": 1.4916396197691113, "learning_rate": 3.629750111868486e-07, "loss": 0.2592, "step": 34559 }, { "epoch": 0.6007404960976204, "grad_norm": 1.3421228988608949, "learning_rate": 3.629479397519102e-07, "loss": 0.2762, "step": 34560 }, { "epoch": 0.6007578786351232, "grad_norm": 1.9771314143212753, "learning_rate": 3.6292086875136385e-07, "loss": 0.2965, "step": 34561 }, { "epoch": 0.600775261172626, "grad_norm": 2.8104144032162233, "learning_rate": 3.628937981852953e-07, "loss": 0.2311, "step": 34562 }, { "epoch": 0.6007926437101289, "grad_norm": 3.82249988528967, "learning_rate": 3.6286672805379047e-07, "loss": 0.2736, "step": 34563 }, { "epoch": 0.6008100262476316, "grad_norm": 1.4697745112529839, "learning_rate": 3.62839658356935e-07, "loss": 0.325, "step": 34564 }, { "epoch": 0.6008274087851344, "grad_norm": 2.2118531773898797, "learning_rate": 3.6281258909481493e-07, "loss": 0.2394, "step": 34565 }, { "epoch": 0.6008447913226372, "grad_norm": 3.043806403964789, "learning_rate": 3.627855202675158e-07, "loss": 0.2717, "step": 34566 }, { "epoch": 0.6008621738601401, "grad_norm": 2.2114433755705623, "learning_rate": 3.627584518751239e-07, "loss": 0.169, "step": 34567 }, { "epoch": 0.6008795563976429, "grad_norm": 1.5790302561153813, "learning_rate": 3.627313839177243e-07, "loss": 0.202, "step": 34568 }, { "epoch": 0.6008969389351457, "grad_norm": 2.8490538353920907, "learning_rate": 3.627043163954033e-07, "loss": 0.3017, "step": 34569 }, { "epoch": 0.6009143214726486, "grad_norm": 2.167563594912379, "learning_rate": 3.6267724930824645e-07, "loss": 0.2236, "step": 34570 }, { "epoch": 0.6009317040101514, "grad_norm": 2.935578796455526, "learning_rate": 3.6265018265633974e-07, "loss": 0.2386, "step": 34571 }, { "epoch": 0.6009490865476542, "grad_norm": 2.1476682688729856, "learning_rate": 3.6262311643976873e-07, "loss": 0.2594, "step": 34572 }, { "epoch": 0.600966469085157, "grad_norm": 1.7544018434678716, "learning_rate": 3.625960506586195e-07, "loss": 0.1767, "step": 34573 }, { "epoch": 0.6009838516226599, "grad_norm": 2.4176974917513774, "learning_rate": 3.625689853129777e-07, "loss": 0.2173, "step": 34574 }, { "epoch": 0.6010012341601627, "grad_norm": 1.08592443721885, "learning_rate": 3.6254192040292896e-07, "loss": 0.1323, "step": 34575 }, { "epoch": 0.6010186166976655, "grad_norm": 1.4857805537790978, "learning_rate": 3.6251485592855914e-07, "loss": 0.1321, "step": 34576 }, { "epoch": 0.6010359992351684, "grad_norm": 2.6971646164039065, "learning_rate": 3.6248779188995416e-07, "loss": 0.141, "step": 34577 }, { "epoch": 0.6010533817726712, "grad_norm": 1.3246158217362638, "learning_rate": 3.624607282871996e-07, "loss": 0.2575, "step": 34578 }, { "epoch": 0.601070764310174, "grad_norm": 1.2368549387007024, "learning_rate": 3.6243366512038165e-07, "loss": 0.2602, "step": 34579 }, { "epoch": 0.6010881468476769, "grad_norm": 1.3640499091743297, "learning_rate": 3.624066023895856e-07, "loss": 0.2346, "step": 34580 }, { "epoch": 0.6011055293851797, "grad_norm": 2.393399150102247, "learning_rate": 3.623795400948973e-07, "loss": 0.2085, "step": 34581 }, { "epoch": 0.6011229119226825, "grad_norm": 1.4241542763510284, "learning_rate": 3.623524782364028e-07, "loss": 0.1925, "step": 34582 }, { "epoch": 0.6011402944601852, "grad_norm": 1.8000946897356656, "learning_rate": 3.6232541681418753e-07, "loss": 0.1829, "step": 34583 }, { "epoch": 0.6011576769976881, "grad_norm": 1.6648561448068366, "learning_rate": 3.6229835582833763e-07, "loss": 0.1782, "step": 34584 }, { "epoch": 0.6011750595351909, "grad_norm": 1.2516152577732564, "learning_rate": 3.622712952789386e-07, "loss": 0.1508, "step": 34585 }, { "epoch": 0.6011924420726937, "grad_norm": 2.207323149592729, "learning_rate": 3.622442351660765e-07, "loss": 0.2302, "step": 34586 }, { "epoch": 0.6012098246101966, "grad_norm": 1.4259671156704246, "learning_rate": 3.6221717548983666e-07, "loss": 0.1633, "step": 34587 }, { "epoch": 0.6012272071476994, "grad_norm": 3.3932389531147487, "learning_rate": 3.6219011625030516e-07, "loss": 0.2089, "step": 34588 }, { "epoch": 0.6012445896852022, "grad_norm": 1.9665274103009214, "learning_rate": 3.621630574475676e-07, "loss": 0.1443, "step": 34589 }, { "epoch": 0.601261972222705, "grad_norm": 1.3791227957931367, "learning_rate": 3.621359990817099e-07, "loss": 0.154, "step": 34590 }, { "epoch": 0.6012793547602079, "grad_norm": 1.4581653363916034, "learning_rate": 3.6210894115281764e-07, "loss": 0.2508, "step": 34591 }, { "epoch": 0.6012967372977107, "grad_norm": 1.6961690635611624, "learning_rate": 3.6208188366097705e-07, "loss": 0.2063, "step": 34592 }, { "epoch": 0.6013141198352135, "grad_norm": 1.668570159346086, "learning_rate": 3.6205482660627323e-07, "loss": 0.2481, "step": 34593 }, { "epoch": 0.6013315023727164, "grad_norm": 1.6038205739268476, "learning_rate": 3.6202776998879235e-07, "loss": 0.2768, "step": 34594 }, { "epoch": 0.6013488849102192, "grad_norm": 1.918630379054929, "learning_rate": 3.6200071380861993e-07, "loss": 0.1652, "step": 34595 }, { "epoch": 0.601366267447722, "grad_norm": 2.505097245283136, "learning_rate": 3.61973658065842e-07, "loss": 0.2126, "step": 34596 }, { "epoch": 0.6013836499852249, "grad_norm": 1.5655327602327584, "learning_rate": 3.61946602760544e-07, "loss": 0.2566, "step": 34597 }, { "epoch": 0.6014010325227277, "grad_norm": 1.269635473775685, "learning_rate": 3.619195478928121e-07, "loss": 0.2439, "step": 34598 }, { "epoch": 0.6014184150602305, "grad_norm": 2.680248247524248, "learning_rate": 3.618924934627317e-07, "loss": 0.2871, "step": 34599 }, { "epoch": 0.6014357975977334, "grad_norm": 1.8542296411209238, "learning_rate": 3.618654394703886e-07, "loss": 0.2686, "step": 34600 }, { "epoch": 0.6014531801352362, "grad_norm": 1.9050511898439595, "learning_rate": 3.618383859158686e-07, "loss": 0.3842, "step": 34601 }, { "epoch": 0.601470562672739, "grad_norm": 2.538853783425378, "learning_rate": 3.618113327992575e-07, "loss": 0.2981, "step": 34602 }, { "epoch": 0.6014879452102417, "grad_norm": 4.531292485080681, "learning_rate": 3.6178428012064085e-07, "loss": 0.3577, "step": 34603 }, { "epoch": 0.6015053277477446, "grad_norm": 1.7173953508978053, "learning_rate": 3.6175722788010464e-07, "loss": 0.2128, "step": 34604 }, { "epoch": 0.6015227102852474, "grad_norm": 1.079892934961317, "learning_rate": 3.617301760777347e-07, "loss": 0.2102, "step": 34605 }, { "epoch": 0.6015400928227502, "grad_norm": 1.3039119663071352, "learning_rate": 3.617031247136163e-07, "loss": 0.1857, "step": 34606 }, { "epoch": 0.6015574753602531, "grad_norm": 2.5867064069404893, "learning_rate": 3.6167607378783566e-07, "loss": 0.2317, "step": 34607 }, { "epoch": 0.6015748578977559, "grad_norm": 2.0582266675006133, "learning_rate": 3.616490233004783e-07, "loss": 0.2512, "step": 34608 }, { "epoch": 0.6015922404352587, "grad_norm": 2.2724507284287565, "learning_rate": 3.616219732516298e-07, "loss": 0.2903, "step": 34609 }, { "epoch": 0.6016096229727615, "grad_norm": 1.9462920179995122, "learning_rate": 3.615949236413762e-07, "loss": 0.2691, "step": 34610 }, { "epoch": 0.6016270055102644, "grad_norm": 0.9831056251395635, "learning_rate": 3.615678744698033e-07, "loss": 0.282, "step": 34611 }, { "epoch": 0.6016443880477672, "grad_norm": 1.6674610030096764, "learning_rate": 3.615408257369964e-07, "loss": 0.1838, "step": 34612 }, { "epoch": 0.60166177058527, "grad_norm": 1.8688621593553578, "learning_rate": 3.615137774430416e-07, "loss": 0.134, "step": 34613 }, { "epoch": 0.6016791531227729, "grad_norm": 1.2333531112234153, "learning_rate": 3.614867295880244e-07, "loss": 0.2406, "step": 34614 }, { "epoch": 0.6016965356602757, "grad_norm": 1.1213416214395286, "learning_rate": 3.614596821720307e-07, "loss": 0.1473, "step": 34615 }, { "epoch": 0.6017139181977785, "grad_norm": 1.880278330892862, "learning_rate": 3.6143263519514623e-07, "loss": 0.2606, "step": 34616 }, { "epoch": 0.6017313007352814, "grad_norm": 2.123404591427809, "learning_rate": 3.6140558865745674e-07, "loss": 0.2224, "step": 34617 }, { "epoch": 0.6017486832727842, "grad_norm": 1.1546803359845048, "learning_rate": 3.6137854255904765e-07, "loss": 0.1732, "step": 34618 }, { "epoch": 0.601766065810287, "grad_norm": 1.3839607057633982, "learning_rate": 3.61351496900005e-07, "loss": 0.1907, "step": 34619 }, { "epoch": 0.6017834483477899, "grad_norm": 1.9534149198721347, "learning_rate": 3.6132445168041427e-07, "loss": 0.1587, "step": 34620 }, { "epoch": 0.6018008308852927, "grad_norm": 1.5917893514430597, "learning_rate": 3.6129740690036145e-07, "loss": 0.2763, "step": 34621 }, { "epoch": 0.6018182134227955, "grad_norm": 1.5134348601658434, "learning_rate": 3.61270362559932e-07, "loss": 0.1832, "step": 34622 }, { "epoch": 0.6018355959602982, "grad_norm": 0.9914060509271856, "learning_rate": 3.6124331865921196e-07, "loss": 0.1781, "step": 34623 }, { "epoch": 0.6018529784978011, "grad_norm": 1.952868087895922, "learning_rate": 3.61216275198287e-07, "loss": 0.3376, "step": 34624 }, { "epoch": 0.6018703610353039, "grad_norm": 1.8425698186921062, "learning_rate": 3.6118923217724253e-07, "loss": 0.1662, "step": 34625 }, { "epoch": 0.6018877435728067, "grad_norm": 1.7114269267053754, "learning_rate": 3.6116218959616427e-07, "loss": 0.157, "step": 34626 }, { "epoch": 0.6019051261103096, "grad_norm": 1.177693402245305, "learning_rate": 3.6113514745513827e-07, "loss": 0.138, "step": 34627 }, { "epoch": 0.6019225086478124, "grad_norm": 1.0387706190984083, "learning_rate": 3.6110810575425e-07, "loss": 0.1611, "step": 34628 }, { "epoch": 0.6019398911853152, "grad_norm": 1.082074851592787, "learning_rate": 3.610810644935852e-07, "loss": 0.1062, "step": 34629 }, { "epoch": 0.601957273722818, "grad_norm": 1.4637710541089455, "learning_rate": 3.610540236732298e-07, "loss": 0.1574, "step": 34630 }, { "epoch": 0.6019746562603209, "grad_norm": 1.2089194363730218, "learning_rate": 3.6102698329326913e-07, "loss": 0.218, "step": 34631 }, { "epoch": 0.6019920387978237, "grad_norm": 1.729205210251224, "learning_rate": 3.609999433537891e-07, "loss": 0.1565, "step": 34632 }, { "epoch": 0.6020094213353265, "grad_norm": 1.8184623591009121, "learning_rate": 3.6097290385487555e-07, "loss": 0.1684, "step": 34633 }, { "epoch": 0.6020268038728294, "grad_norm": 1.323608549916497, "learning_rate": 3.6094586479661383e-07, "loss": 0.3209, "step": 34634 }, { "epoch": 0.6020441864103322, "grad_norm": 1.7561820717410013, "learning_rate": 3.6091882617908993e-07, "loss": 0.2081, "step": 34635 }, { "epoch": 0.602061568947835, "grad_norm": 1.9920152930007176, "learning_rate": 3.608917880023896e-07, "loss": 0.1984, "step": 34636 }, { "epoch": 0.6020789514853379, "grad_norm": 2.0880133260445737, "learning_rate": 3.608647502665981e-07, "loss": 0.2513, "step": 34637 }, { "epoch": 0.6020963340228407, "grad_norm": 2.035523551764157, "learning_rate": 3.6083771297180165e-07, "loss": 0.1349, "step": 34638 }, { "epoch": 0.6021137165603435, "grad_norm": 4.087890628402462, "learning_rate": 3.608106761180856e-07, "loss": 0.3569, "step": 34639 }, { "epoch": 0.6021310990978463, "grad_norm": 1.8703213138275445, "learning_rate": 3.607836397055358e-07, "loss": 0.2876, "step": 34640 }, { "epoch": 0.6021484816353492, "grad_norm": 1.1760799963282664, "learning_rate": 3.60756603734238e-07, "loss": 0.1866, "step": 34641 }, { "epoch": 0.602165864172852, "grad_norm": 1.4354330130612039, "learning_rate": 3.607295682042777e-07, "loss": 0.2094, "step": 34642 }, { "epoch": 0.6021832467103547, "grad_norm": 1.1482326036003265, "learning_rate": 3.6070253311574083e-07, "loss": 0.2094, "step": 34643 }, { "epoch": 0.6022006292478576, "grad_norm": 1.6015612384965179, "learning_rate": 3.6067549846871293e-07, "loss": 0.145, "step": 34644 }, { "epoch": 0.6022180117853604, "grad_norm": 1.9922556798114064, "learning_rate": 3.606484642632796e-07, "loss": 0.203, "step": 34645 }, { "epoch": 0.6022353943228632, "grad_norm": 1.6315184600682517, "learning_rate": 3.6062143049952657e-07, "loss": 0.2323, "step": 34646 }, { "epoch": 0.602252776860366, "grad_norm": 2.187498116937843, "learning_rate": 3.605943971775396e-07, "loss": 0.2657, "step": 34647 }, { "epoch": 0.6022701593978689, "grad_norm": 2.156878001267095, "learning_rate": 3.605673642974044e-07, "loss": 0.2446, "step": 34648 }, { "epoch": 0.6022875419353717, "grad_norm": 1.9652012707552526, "learning_rate": 3.605403318592067e-07, "loss": 0.2063, "step": 34649 }, { "epoch": 0.6023049244728745, "grad_norm": 1.7508452423909147, "learning_rate": 3.6051329986303207e-07, "loss": 0.3867, "step": 34650 }, { "epoch": 0.6023223070103774, "grad_norm": 1.2097842926993636, "learning_rate": 3.6048626830896605e-07, "loss": 0.185, "step": 34651 }, { "epoch": 0.6023396895478802, "grad_norm": 2.0413629724302065, "learning_rate": 3.604592371970946e-07, "loss": 0.2064, "step": 34652 }, { "epoch": 0.602357072085383, "grad_norm": 1.509129554484683, "learning_rate": 3.604322065275032e-07, "loss": 0.1926, "step": 34653 }, { "epoch": 0.6023744546228859, "grad_norm": 2.3883629555890264, "learning_rate": 3.604051763002776e-07, "loss": 0.4089, "step": 34654 }, { "epoch": 0.6023918371603887, "grad_norm": 1.2745670685613701, "learning_rate": 3.6037814651550356e-07, "loss": 0.1882, "step": 34655 }, { "epoch": 0.6024092196978915, "grad_norm": 2.1847691554950908, "learning_rate": 3.603511171732666e-07, "loss": 0.318, "step": 34656 }, { "epoch": 0.6024266022353943, "grad_norm": 1.4577117434791962, "learning_rate": 3.603240882736524e-07, "loss": 0.1633, "step": 34657 }, { "epoch": 0.6024439847728972, "grad_norm": 1.58356025292562, "learning_rate": 3.6029705981674674e-07, "loss": 0.1784, "step": 34658 }, { "epoch": 0.6024613673104, "grad_norm": 2.0123057734096323, "learning_rate": 3.602700318026351e-07, "loss": 0.2056, "step": 34659 }, { "epoch": 0.6024787498479028, "grad_norm": 2.211678334367528, "learning_rate": 3.602430042314034e-07, "loss": 0.1876, "step": 34660 }, { "epoch": 0.6024961323854057, "grad_norm": 1.8959031507078752, "learning_rate": 3.6021597710313706e-07, "loss": 0.2724, "step": 34661 }, { "epoch": 0.6025135149229085, "grad_norm": 2.1229379139853486, "learning_rate": 3.601889504179221e-07, "loss": 0.2442, "step": 34662 }, { "epoch": 0.6025308974604112, "grad_norm": 2.293059512085194, "learning_rate": 3.601619241758438e-07, "loss": 0.3403, "step": 34663 }, { "epoch": 0.602548279997914, "grad_norm": 1.1246708732157034, "learning_rate": 3.6013489837698796e-07, "loss": 0.1914, "step": 34664 }, { "epoch": 0.6025656625354169, "grad_norm": 1.0450063716885962, "learning_rate": 3.601078730214402e-07, "loss": 0.1987, "step": 34665 }, { "epoch": 0.6025830450729197, "grad_norm": 4.632858395188836, "learning_rate": 3.6008084810928624e-07, "loss": 0.4165, "step": 34666 }, { "epoch": 0.6026004276104225, "grad_norm": 1.4794292009036698, "learning_rate": 3.6005382364061165e-07, "loss": 0.1888, "step": 34667 }, { "epoch": 0.6026178101479254, "grad_norm": 1.4880967962081817, "learning_rate": 3.6002679961550243e-07, "loss": 0.2267, "step": 34668 }, { "epoch": 0.6026351926854282, "grad_norm": 1.2751930483509095, "learning_rate": 3.5999977603404377e-07, "loss": 0.2272, "step": 34669 }, { "epoch": 0.602652575222931, "grad_norm": 1.899748229110033, "learning_rate": 3.599727528963214e-07, "loss": 0.2788, "step": 34670 }, { "epoch": 0.6026699577604339, "grad_norm": 1.6583773731393916, "learning_rate": 3.599457302024212e-07, "loss": 0.1636, "step": 34671 }, { "epoch": 0.6026873402979367, "grad_norm": 1.365258769362241, "learning_rate": 3.599187079524286e-07, "loss": 0.3692, "step": 34672 }, { "epoch": 0.6027047228354395, "grad_norm": 1.3621090362227335, "learning_rate": 3.5989168614642945e-07, "loss": 0.1562, "step": 34673 }, { "epoch": 0.6027221053729424, "grad_norm": 2.1839691993211106, "learning_rate": 3.598646647845094e-07, "loss": 0.2661, "step": 34674 }, { "epoch": 0.6027394879104452, "grad_norm": 1.0308175765047352, "learning_rate": 3.5983764386675394e-07, "loss": 0.2167, "step": 34675 }, { "epoch": 0.602756870447948, "grad_norm": 2.239316188958955, "learning_rate": 3.5981062339324857e-07, "loss": 0.2024, "step": 34676 }, { "epoch": 0.6027742529854508, "grad_norm": 1.377169803218544, "learning_rate": 3.5978360336407933e-07, "loss": 0.1948, "step": 34677 }, { "epoch": 0.6027916355229537, "grad_norm": 1.7277232310879151, "learning_rate": 3.5975658377933144e-07, "loss": 0.2192, "step": 34678 }, { "epoch": 0.6028090180604565, "grad_norm": 1.4801222134272214, "learning_rate": 3.597295646390909e-07, "loss": 0.1346, "step": 34679 }, { "epoch": 0.6028264005979593, "grad_norm": 1.2719051383089353, "learning_rate": 3.5970254594344313e-07, "loss": 0.2489, "step": 34680 }, { "epoch": 0.6028437831354622, "grad_norm": 2.0283682921343775, "learning_rate": 3.5967552769247413e-07, "loss": 0.2144, "step": 34681 }, { "epoch": 0.602861165672965, "grad_norm": 1.834749689165739, "learning_rate": 3.5964850988626895e-07, "loss": 0.2721, "step": 34682 }, { "epoch": 0.6028785482104677, "grad_norm": 1.4012766312965033, "learning_rate": 3.596214925249136e-07, "loss": 0.1671, "step": 34683 }, { "epoch": 0.6028959307479705, "grad_norm": 1.4979653979656526, "learning_rate": 3.5959447560849353e-07, "loss": 0.2951, "step": 34684 }, { "epoch": 0.6029133132854734, "grad_norm": 1.3345080963065532, "learning_rate": 3.5956745913709464e-07, "loss": 0.1628, "step": 34685 }, { "epoch": 0.6029306958229762, "grad_norm": 1.2672914252138405, "learning_rate": 3.5954044311080225e-07, "loss": 0.158, "step": 34686 }, { "epoch": 0.602948078360479, "grad_norm": 1.3156103931482102, "learning_rate": 3.595134275297024e-07, "loss": 0.2408, "step": 34687 }, { "epoch": 0.6029654608979819, "grad_norm": 1.1990257112220057, "learning_rate": 3.594864123938802e-07, "loss": 0.1781, "step": 34688 }, { "epoch": 0.6029828434354847, "grad_norm": 1.7841694204592702, "learning_rate": 3.5945939770342163e-07, "loss": 0.2365, "step": 34689 }, { "epoch": 0.6030002259729875, "grad_norm": 2.477324962152379, "learning_rate": 3.594323834584121e-07, "loss": 0.2515, "step": 34690 }, { "epoch": 0.6030176085104904, "grad_norm": 1.3746889052003743, "learning_rate": 3.594053696589374e-07, "loss": 0.121, "step": 34691 }, { "epoch": 0.6030349910479932, "grad_norm": 1.8529096752181589, "learning_rate": 3.5937835630508305e-07, "loss": 0.2171, "step": 34692 }, { "epoch": 0.603052373585496, "grad_norm": 1.874553435886465, "learning_rate": 3.5935134339693494e-07, "loss": 0.1887, "step": 34693 }, { "epoch": 0.6030697561229988, "grad_norm": 1.645271525825894, "learning_rate": 3.593243309345783e-07, "loss": 0.2518, "step": 34694 }, { "epoch": 0.6030871386605017, "grad_norm": 1.4340803451406654, "learning_rate": 3.5929731891809886e-07, "loss": 0.273, "step": 34695 }, { "epoch": 0.6031045211980045, "grad_norm": 3.825154614436418, "learning_rate": 3.5927030734758237e-07, "loss": 0.2564, "step": 34696 }, { "epoch": 0.6031219037355073, "grad_norm": 1.7903068505666302, "learning_rate": 3.592432962231143e-07, "loss": 0.3107, "step": 34697 }, { "epoch": 0.6031392862730102, "grad_norm": 1.6442052878054971, "learning_rate": 3.592162855447803e-07, "loss": 0.4005, "step": 34698 }, { "epoch": 0.603156668810513, "grad_norm": 1.7953769848512033, "learning_rate": 3.5918927531266616e-07, "loss": 0.2684, "step": 34699 }, { "epoch": 0.6031740513480158, "grad_norm": 1.9670157299410926, "learning_rate": 3.5916226552685725e-07, "loss": 0.3846, "step": 34700 }, { "epoch": 0.6031914338855187, "grad_norm": 1.8092415946767764, "learning_rate": 3.591352561874391e-07, "loss": 0.1731, "step": 34701 }, { "epoch": 0.6032088164230215, "grad_norm": 1.8231610996156167, "learning_rate": 3.5910824729449767e-07, "loss": 0.1595, "step": 34702 }, { "epoch": 0.6032261989605242, "grad_norm": 4.387564691906047, "learning_rate": 3.5908123884811826e-07, "loss": 0.3446, "step": 34703 }, { "epoch": 0.603243581498027, "grad_norm": 1.2424776933706323, "learning_rate": 3.5905423084838663e-07, "loss": 0.2297, "step": 34704 }, { "epoch": 0.6032609640355299, "grad_norm": 1.1701969976606492, "learning_rate": 3.5902722329538844e-07, "loss": 0.1307, "step": 34705 }, { "epoch": 0.6032783465730327, "grad_norm": 7.304659741213805, "learning_rate": 3.5900021618920923e-07, "loss": 0.31, "step": 34706 }, { "epoch": 0.6032957291105355, "grad_norm": 2.477066200603838, "learning_rate": 3.589732095299344e-07, "loss": 0.1818, "step": 34707 }, { "epoch": 0.6033131116480384, "grad_norm": 0.9852424367676164, "learning_rate": 3.589462033176498e-07, "loss": 0.1574, "step": 34708 }, { "epoch": 0.6033304941855412, "grad_norm": 1.7961534135830215, "learning_rate": 3.589191975524408e-07, "loss": 0.2064, "step": 34709 }, { "epoch": 0.603347876723044, "grad_norm": 1.237987799764692, "learning_rate": 3.5889219223439325e-07, "loss": 0.1403, "step": 34710 }, { "epoch": 0.6033652592605468, "grad_norm": 1.5650388354736424, "learning_rate": 3.588651873635925e-07, "loss": 0.2334, "step": 34711 }, { "epoch": 0.6033826417980497, "grad_norm": 2.095021132713455, "learning_rate": 3.588381829401247e-07, "loss": 0.2313, "step": 34712 }, { "epoch": 0.6034000243355525, "grad_norm": 0.9156223917367223, "learning_rate": 3.5881117896407464e-07, "loss": 0.1625, "step": 34713 }, { "epoch": 0.6034174068730553, "grad_norm": 1.403882935085537, "learning_rate": 3.5878417543552835e-07, "loss": 0.2731, "step": 34714 }, { "epoch": 0.6034347894105582, "grad_norm": 3.9074917727405745, "learning_rate": 3.5875717235457125e-07, "loss": 0.2335, "step": 34715 }, { "epoch": 0.603452171948061, "grad_norm": 1.4076701120314001, "learning_rate": 3.587301697212892e-07, "loss": 0.1766, "step": 34716 }, { "epoch": 0.6034695544855638, "grad_norm": 1.5909285792486316, "learning_rate": 3.5870316753576745e-07, "loss": 0.2185, "step": 34717 }, { "epoch": 0.6034869370230667, "grad_norm": 4.30167392833892, "learning_rate": 3.58676165798092e-07, "loss": 0.3968, "step": 34718 }, { "epoch": 0.6035043195605695, "grad_norm": 1.5567124839191697, "learning_rate": 3.5864916450834813e-07, "loss": 0.2797, "step": 34719 }, { "epoch": 0.6035217020980723, "grad_norm": 1.1603215173271466, "learning_rate": 3.586221636666213e-07, "loss": 0.2435, "step": 34720 }, { "epoch": 0.6035390846355752, "grad_norm": 1.7656034618401153, "learning_rate": 3.5859516327299737e-07, "loss": 0.1965, "step": 34721 }, { "epoch": 0.6035564671730779, "grad_norm": 2.467485465880243, "learning_rate": 3.5856816332756185e-07, "loss": 0.3004, "step": 34722 }, { "epoch": 0.6035738497105807, "grad_norm": 1.321000027069801, "learning_rate": 3.5854116383040015e-07, "loss": 0.1638, "step": 34723 }, { "epoch": 0.6035912322480835, "grad_norm": 1.0702410738049093, "learning_rate": 3.5851416478159813e-07, "loss": 0.3284, "step": 34724 }, { "epoch": 0.6036086147855864, "grad_norm": 1.2341676434632194, "learning_rate": 3.584871661812413e-07, "loss": 0.2013, "step": 34725 }, { "epoch": 0.6036259973230892, "grad_norm": 1.6553240361685697, "learning_rate": 3.5846016802941494e-07, "loss": 0.2341, "step": 34726 }, { "epoch": 0.603643379860592, "grad_norm": 2.6923284464219113, "learning_rate": 3.584331703262049e-07, "loss": 0.2052, "step": 34727 }, { "epoch": 0.6036607623980949, "grad_norm": 1.6025575299959567, "learning_rate": 3.584061730716966e-07, "loss": 0.2641, "step": 34728 }, { "epoch": 0.6036781449355977, "grad_norm": 0.9937141670001338, "learning_rate": 3.583791762659758e-07, "loss": 0.1707, "step": 34729 }, { "epoch": 0.6036955274731005, "grad_norm": 1.2800520715394557, "learning_rate": 3.583521799091279e-07, "loss": 0.2062, "step": 34730 }, { "epoch": 0.6037129100106033, "grad_norm": 2.372392050890776, "learning_rate": 3.583251840012387e-07, "loss": 0.2427, "step": 34731 }, { "epoch": 0.6037302925481062, "grad_norm": 1.9384681671411719, "learning_rate": 3.5829818854239333e-07, "loss": 0.2298, "step": 34732 }, { "epoch": 0.603747675085609, "grad_norm": 0.905438390895837, "learning_rate": 3.5827119353267777e-07, "loss": 0.2206, "step": 34733 }, { "epoch": 0.6037650576231118, "grad_norm": 1.5976373992877464, "learning_rate": 3.582441989721773e-07, "loss": 0.1736, "step": 34734 }, { "epoch": 0.6037824401606147, "grad_norm": 1.6792684921307606, "learning_rate": 3.582172048609776e-07, "loss": 0.1917, "step": 34735 }, { "epoch": 0.6037998226981175, "grad_norm": 1.9169665790578452, "learning_rate": 3.5819021119916426e-07, "loss": 0.2274, "step": 34736 }, { "epoch": 0.6038172052356203, "grad_norm": 1.8584393799647039, "learning_rate": 3.581632179868231e-07, "loss": 0.2666, "step": 34737 }, { "epoch": 0.6038345877731232, "grad_norm": 1.952037884849041, "learning_rate": 3.58136225224039e-07, "loss": 0.2814, "step": 34738 }, { "epoch": 0.603851970310626, "grad_norm": 1.3747138150214973, "learning_rate": 3.581092329108981e-07, "loss": 0.3111, "step": 34739 }, { "epoch": 0.6038693528481288, "grad_norm": 1.1889401322163349, "learning_rate": 3.580822410474856e-07, "loss": 0.1758, "step": 34740 }, { "epoch": 0.6038867353856316, "grad_norm": 1.35826251180317, "learning_rate": 3.5805524963388726e-07, "loss": 0.1652, "step": 34741 }, { "epoch": 0.6039041179231344, "grad_norm": 1.3531573356608104, "learning_rate": 3.580282586701885e-07, "loss": 0.1895, "step": 34742 }, { "epoch": 0.6039215004606372, "grad_norm": 1.4110661493264014, "learning_rate": 3.5800126815647515e-07, "loss": 0.155, "step": 34743 }, { "epoch": 0.60393888299814, "grad_norm": 2.339005495752101, "learning_rate": 3.5797427809283256e-07, "loss": 0.2022, "step": 34744 }, { "epoch": 0.6039562655356429, "grad_norm": 1.718074570280527, "learning_rate": 3.579472884793461e-07, "loss": 0.2275, "step": 34745 }, { "epoch": 0.6039736480731457, "grad_norm": 1.344503781892388, "learning_rate": 3.5792029931610146e-07, "loss": 0.1987, "step": 34746 }, { "epoch": 0.6039910306106485, "grad_norm": 1.1035651228733754, "learning_rate": 3.5789331060318436e-07, "loss": 0.1696, "step": 34747 }, { "epoch": 0.6040084131481513, "grad_norm": 1.5827563330958179, "learning_rate": 3.5786632234068e-07, "loss": 0.2227, "step": 34748 }, { "epoch": 0.6040257956856542, "grad_norm": 1.1422517625017998, "learning_rate": 3.578393345286742e-07, "loss": 0.1817, "step": 34749 }, { "epoch": 0.604043178223157, "grad_norm": 1.6557133259322567, "learning_rate": 3.5781234716725263e-07, "loss": 0.2267, "step": 34750 }, { "epoch": 0.6040605607606598, "grad_norm": 1.027521551600797, "learning_rate": 3.5778536025650024e-07, "loss": 0.1737, "step": 34751 }, { "epoch": 0.6040779432981627, "grad_norm": 1.5510631763365463, "learning_rate": 3.577583737965031e-07, "loss": 0.177, "step": 34752 }, { "epoch": 0.6040953258356655, "grad_norm": 1.1395742425452777, "learning_rate": 3.577313877873466e-07, "loss": 0.1121, "step": 34753 }, { "epoch": 0.6041127083731683, "grad_norm": 2.2923254481776634, "learning_rate": 3.577044022291161e-07, "loss": 0.2142, "step": 34754 }, { "epoch": 0.6041300909106712, "grad_norm": 1.326212637140013, "learning_rate": 3.576774171218974e-07, "loss": 0.1618, "step": 34755 }, { "epoch": 0.604147473448174, "grad_norm": 1.2637338739189636, "learning_rate": 3.5765043246577607e-07, "loss": 0.1997, "step": 34756 }, { "epoch": 0.6041648559856768, "grad_norm": 1.1254080955923063, "learning_rate": 3.5762344826083724e-07, "loss": 0.2063, "step": 34757 }, { "epoch": 0.6041822385231796, "grad_norm": 2.309964528221152, "learning_rate": 3.5759646450716676e-07, "loss": 0.2588, "step": 34758 }, { "epoch": 0.6041996210606825, "grad_norm": 1.8751367708385824, "learning_rate": 3.5756948120484996e-07, "loss": 0.2292, "step": 34759 }, { "epoch": 0.6042170035981853, "grad_norm": 0.7919042002156103, "learning_rate": 3.5754249835397255e-07, "loss": 0.1516, "step": 34760 }, { "epoch": 0.6042343861356881, "grad_norm": 1.6409424293733634, "learning_rate": 3.575155159546199e-07, "loss": 0.2021, "step": 34761 }, { "epoch": 0.6042517686731909, "grad_norm": 1.5588024144985677, "learning_rate": 3.574885340068778e-07, "loss": 0.2893, "step": 34762 }, { "epoch": 0.6042691512106937, "grad_norm": 1.4256747453744762, "learning_rate": 3.574615525108316e-07, "loss": 0.1746, "step": 34763 }, { "epoch": 0.6042865337481965, "grad_norm": 0.7803360423549053, "learning_rate": 3.5743457146656677e-07, "loss": 0.2071, "step": 34764 }, { "epoch": 0.6043039162856993, "grad_norm": 1.577497535795371, "learning_rate": 3.5740759087416867e-07, "loss": 0.1734, "step": 34765 }, { "epoch": 0.6043212988232022, "grad_norm": 1.8534106001946642, "learning_rate": 3.5738061073372316e-07, "loss": 0.2183, "step": 34766 }, { "epoch": 0.604338681360705, "grad_norm": 2.881703497746463, "learning_rate": 3.5735363104531547e-07, "loss": 0.2803, "step": 34767 }, { "epoch": 0.6043560638982078, "grad_norm": 2.5194590435722413, "learning_rate": 3.573266518090313e-07, "loss": 0.3345, "step": 34768 }, { "epoch": 0.6043734464357107, "grad_norm": 1.7980128073548087, "learning_rate": 3.572996730249563e-07, "loss": 0.2373, "step": 34769 }, { "epoch": 0.6043908289732135, "grad_norm": 1.392269282023247, "learning_rate": 3.5727269469317566e-07, "loss": 0.2425, "step": 34770 }, { "epoch": 0.6044082115107163, "grad_norm": 1.7211285881547098, "learning_rate": 3.572457168137749e-07, "loss": 0.1223, "step": 34771 }, { "epoch": 0.6044255940482192, "grad_norm": 1.9182167978839955, "learning_rate": 3.572187393868398e-07, "loss": 0.122, "step": 34772 }, { "epoch": 0.604442976585722, "grad_norm": 1.2865304841019316, "learning_rate": 3.571917624124555e-07, "loss": 0.2214, "step": 34773 }, { "epoch": 0.6044603591232248, "grad_norm": 1.712552113471889, "learning_rate": 3.5716478589070787e-07, "loss": 0.1931, "step": 34774 }, { "epoch": 0.6044777416607277, "grad_norm": 2.131428228889273, "learning_rate": 3.5713780982168234e-07, "loss": 0.2177, "step": 34775 }, { "epoch": 0.6044951241982305, "grad_norm": 1.5404780151822306, "learning_rate": 3.571108342054641e-07, "loss": 0.1944, "step": 34776 }, { "epoch": 0.6045125067357333, "grad_norm": 1.4378111318037745, "learning_rate": 3.57083859042139e-07, "loss": 0.2155, "step": 34777 }, { "epoch": 0.6045298892732361, "grad_norm": 2.1619803111674685, "learning_rate": 3.5705688433179246e-07, "loss": 0.1964, "step": 34778 }, { "epoch": 0.604547271810739, "grad_norm": 1.5421209781452305, "learning_rate": 3.570299100745098e-07, "loss": 0.2145, "step": 34779 }, { "epoch": 0.6045646543482418, "grad_norm": 1.6711161562560124, "learning_rate": 3.570029362703768e-07, "loss": 0.2598, "step": 34780 }, { "epoch": 0.6045820368857446, "grad_norm": 0.8980204521798099, "learning_rate": 3.5697596291947864e-07, "loss": 0.1445, "step": 34781 }, { "epoch": 0.6045994194232474, "grad_norm": 1.5767784274832686, "learning_rate": 3.5694899002190117e-07, "loss": 0.1914, "step": 34782 }, { "epoch": 0.6046168019607502, "grad_norm": 1.9764110686358172, "learning_rate": 3.569220175777297e-07, "loss": 0.1925, "step": 34783 }, { "epoch": 0.604634184498253, "grad_norm": 0.8464693630828263, "learning_rate": 3.568950455870494e-07, "loss": 0.1404, "step": 34784 }, { "epoch": 0.6046515670357558, "grad_norm": 5.384815803520335, "learning_rate": 3.5686807404994635e-07, "loss": 0.1941, "step": 34785 }, { "epoch": 0.6046689495732587, "grad_norm": 1.5064808527360125, "learning_rate": 3.568411029665057e-07, "loss": 0.2361, "step": 34786 }, { "epoch": 0.6046863321107615, "grad_norm": 1.8318450228729515, "learning_rate": 3.568141323368129e-07, "loss": 0.1736, "step": 34787 }, { "epoch": 0.6047037146482643, "grad_norm": 1.824525285145228, "learning_rate": 3.567871621609537e-07, "loss": 0.1633, "step": 34788 }, { "epoch": 0.6047210971857672, "grad_norm": 1.6180090520098684, "learning_rate": 3.5676019243901335e-07, "loss": 0.1494, "step": 34789 }, { "epoch": 0.60473847972327, "grad_norm": 1.3472408946370147, "learning_rate": 3.5673322317107724e-07, "loss": 0.1684, "step": 34790 }, { "epoch": 0.6047558622607728, "grad_norm": 1.6473965004841054, "learning_rate": 3.567062543572311e-07, "loss": 0.1963, "step": 34791 }, { "epoch": 0.6047732447982757, "grad_norm": 1.7007342836073538, "learning_rate": 3.5667928599756027e-07, "loss": 0.22, "step": 34792 }, { "epoch": 0.6047906273357785, "grad_norm": 1.6639317622850456, "learning_rate": 3.5665231809215035e-07, "loss": 0.2303, "step": 34793 }, { "epoch": 0.6048080098732813, "grad_norm": 1.1399058765073842, "learning_rate": 3.5662535064108684e-07, "loss": 0.1703, "step": 34794 }, { "epoch": 0.6048253924107841, "grad_norm": 2.3859905508644874, "learning_rate": 3.56598383644455e-07, "loss": 0.2364, "step": 34795 }, { "epoch": 0.604842774948287, "grad_norm": 1.5083617631899773, "learning_rate": 3.565714171023403e-07, "loss": 0.3948, "step": 34796 }, { "epoch": 0.6048601574857898, "grad_norm": 5.568110861250764, "learning_rate": 3.5654445101482836e-07, "loss": 0.2853, "step": 34797 }, { "epoch": 0.6048775400232926, "grad_norm": 1.2153019357853432, "learning_rate": 3.565174853820046e-07, "loss": 0.1726, "step": 34798 }, { "epoch": 0.6048949225607955, "grad_norm": 1.6108892805355002, "learning_rate": 3.5649052020395454e-07, "loss": 0.2369, "step": 34799 }, { "epoch": 0.6049123050982983, "grad_norm": 1.7122877581588136, "learning_rate": 3.5646355548076355e-07, "loss": 0.1981, "step": 34800 }, { "epoch": 0.6049296876358011, "grad_norm": 2.3132716749959705, "learning_rate": 3.5643659121251745e-07, "loss": 0.2558, "step": 34801 }, { "epoch": 0.6049470701733038, "grad_norm": 1.781958712920762, "learning_rate": 3.5640962739930115e-07, "loss": 0.2103, "step": 34802 }, { "epoch": 0.6049644527108067, "grad_norm": 1.4835097839682674, "learning_rate": 3.563826640412004e-07, "loss": 0.2466, "step": 34803 }, { "epoch": 0.6049818352483095, "grad_norm": 2.489943530499081, "learning_rate": 3.5635570113830054e-07, "loss": 0.3407, "step": 34804 }, { "epoch": 0.6049992177858123, "grad_norm": 2.9339898867060072, "learning_rate": 3.563287386906872e-07, "loss": 0.2427, "step": 34805 }, { "epoch": 0.6050166003233152, "grad_norm": 2.0079590409282337, "learning_rate": 3.5630177669844576e-07, "loss": 0.2063, "step": 34806 }, { "epoch": 0.605033982860818, "grad_norm": 1.2238355359194102, "learning_rate": 3.562748151616618e-07, "loss": 0.1959, "step": 34807 }, { "epoch": 0.6050513653983208, "grad_norm": 1.518117332940623, "learning_rate": 3.5624785408042057e-07, "loss": 0.2142, "step": 34808 }, { "epoch": 0.6050687479358237, "grad_norm": 1.2440447203030442, "learning_rate": 3.562208934548077e-07, "loss": 0.1643, "step": 34809 }, { "epoch": 0.6050861304733265, "grad_norm": 1.3537499840952518, "learning_rate": 3.561939332849083e-07, "loss": 0.1434, "step": 34810 }, { "epoch": 0.6051035130108293, "grad_norm": 1.661315448609063, "learning_rate": 3.561669735708083e-07, "loss": 0.1599, "step": 34811 }, { "epoch": 0.6051208955483321, "grad_norm": 1.886182459286108, "learning_rate": 3.5614001431259265e-07, "loss": 0.2465, "step": 34812 }, { "epoch": 0.605138278085835, "grad_norm": 1.4670213051169343, "learning_rate": 3.561130555103474e-07, "loss": 0.1823, "step": 34813 }, { "epoch": 0.6051556606233378, "grad_norm": 1.2300132901238825, "learning_rate": 3.560860971641576e-07, "loss": 0.1555, "step": 34814 }, { "epoch": 0.6051730431608406, "grad_norm": 2.109839181430147, "learning_rate": 3.5605913927410857e-07, "loss": 0.2163, "step": 34815 }, { "epoch": 0.6051904256983435, "grad_norm": 0.6646062492727473, "learning_rate": 3.5603218184028603e-07, "loss": 0.1551, "step": 34816 }, { "epoch": 0.6052078082358463, "grad_norm": 4.266586822954343, "learning_rate": 3.5600522486277533e-07, "loss": 0.3596, "step": 34817 }, { "epoch": 0.6052251907733491, "grad_norm": 3.4016227571388344, "learning_rate": 3.55978268341662e-07, "loss": 0.3155, "step": 34818 }, { "epoch": 0.605242573310852, "grad_norm": 2.4892630164008867, "learning_rate": 3.559513122770315e-07, "loss": 0.2443, "step": 34819 }, { "epoch": 0.6052599558483548, "grad_norm": 1.31124353788975, "learning_rate": 3.5592435666896904e-07, "loss": 0.1701, "step": 34820 }, { "epoch": 0.6052773383858576, "grad_norm": 1.8763827002973816, "learning_rate": 3.558974015175601e-07, "loss": 0.2448, "step": 34821 }, { "epoch": 0.6052947209233603, "grad_norm": 1.0884243679427221, "learning_rate": 3.5587044682289034e-07, "loss": 0.1897, "step": 34822 }, { "epoch": 0.6053121034608632, "grad_norm": 3.3884047995592, "learning_rate": 3.558434925850449e-07, "loss": 0.283, "step": 34823 }, { "epoch": 0.605329485998366, "grad_norm": 0.8440506450163141, "learning_rate": 3.558165388041095e-07, "loss": 0.1226, "step": 34824 }, { "epoch": 0.6053468685358688, "grad_norm": 2.341184590870613, "learning_rate": 3.5578958548016933e-07, "loss": 0.3333, "step": 34825 }, { "epoch": 0.6053642510733717, "grad_norm": 2.871803111252626, "learning_rate": 3.5576263261331027e-07, "loss": 0.2787, "step": 34826 }, { "epoch": 0.6053816336108745, "grad_norm": 1.7015353029233315, "learning_rate": 3.5573568020361707e-07, "loss": 0.1887, "step": 34827 }, { "epoch": 0.6053990161483773, "grad_norm": 1.4587387717174027, "learning_rate": 3.557087282511756e-07, "loss": 0.1548, "step": 34828 }, { "epoch": 0.6054163986858802, "grad_norm": 2.2416532778241756, "learning_rate": 3.5568177675607103e-07, "loss": 0.1936, "step": 34829 }, { "epoch": 0.605433781223383, "grad_norm": 1.633872092371634, "learning_rate": 3.5565482571838904e-07, "loss": 0.1709, "step": 34830 }, { "epoch": 0.6054511637608858, "grad_norm": 5.004782320156204, "learning_rate": 3.556278751382149e-07, "loss": 0.2679, "step": 34831 }, { "epoch": 0.6054685462983886, "grad_norm": 2.5287751572148847, "learning_rate": 3.5560092501563427e-07, "loss": 0.2924, "step": 34832 }, { "epoch": 0.6054859288358915, "grad_norm": 4.098274597821119, "learning_rate": 3.5557397535073233e-07, "loss": 0.3571, "step": 34833 }, { "epoch": 0.6055033113733943, "grad_norm": 1.2444013852906897, "learning_rate": 3.5554702614359447e-07, "loss": 0.2301, "step": 34834 }, { "epoch": 0.6055206939108971, "grad_norm": 1.9519873826372935, "learning_rate": 3.55520077394306e-07, "loss": 0.1962, "step": 34835 }, { "epoch": 0.6055380764484, "grad_norm": 1.9765378465698362, "learning_rate": 3.554931291029527e-07, "loss": 0.2117, "step": 34836 }, { "epoch": 0.6055554589859028, "grad_norm": 1.7051590835011885, "learning_rate": 3.5546618126961976e-07, "loss": 0.2756, "step": 34837 }, { "epoch": 0.6055728415234056, "grad_norm": 1.3588567574487393, "learning_rate": 3.5543923389439277e-07, "loss": 0.144, "step": 34838 }, { "epoch": 0.6055902240609085, "grad_norm": 1.5102414316285198, "learning_rate": 3.554122869773569e-07, "loss": 0.269, "step": 34839 }, { "epoch": 0.6056076065984113, "grad_norm": 1.7257100331246304, "learning_rate": 3.553853405185976e-07, "loss": 0.1127, "step": 34840 }, { "epoch": 0.6056249891359141, "grad_norm": 1.8325667490194122, "learning_rate": 3.553583945182004e-07, "loss": 0.2417, "step": 34841 }, { "epoch": 0.6056423716734168, "grad_norm": 1.6421251725795067, "learning_rate": 3.553314489762507e-07, "loss": 0.1844, "step": 34842 }, { "epoch": 0.6056597542109197, "grad_norm": 1.2848726240612753, "learning_rate": 3.553045038928336e-07, "loss": 0.1407, "step": 34843 }, { "epoch": 0.6056771367484225, "grad_norm": 1.3698273877465503, "learning_rate": 3.55277559268035e-07, "loss": 0.1458, "step": 34844 }, { "epoch": 0.6056945192859253, "grad_norm": 1.8205733374712332, "learning_rate": 3.552506151019401e-07, "loss": 0.1949, "step": 34845 }, { "epoch": 0.6057119018234282, "grad_norm": 3.559211754568686, "learning_rate": 3.552236713946341e-07, "loss": 0.2225, "step": 34846 }, { "epoch": 0.605729284360931, "grad_norm": 1.4327060750013694, "learning_rate": 3.551967281462026e-07, "loss": 0.1491, "step": 34847 }, { "epoch": 0.6057466668984338, "grad_norm": 1.8118108964767419, "learning_rate": 3.5516978535673093e-07, "loss": 0.1606, "step": 34848 }, { "epoch": 0.6057640494359366, "grad_norm": 3.222318113064942, "learning_rate": 3.551428430263046e-07, "loss": 0.2475, "step": 34849 }, { "epoch": 0.6057814319734395, "grad_norm": 1.3475438578730985, "learning_rate": 3.551159011550089e-07, "loss": 0.184, "step": 34850 }, { "epoch": 0.6057988145109423, "grad_norm": 1.4727817846483946, "learning_rate": 3.550889597429293e-07, "loss": 0.1864, "step": 34851 }, { "epoch": 0.6058161970484451, "grad_norm": 1.1153058574945882, "learning_rate": 3.5506201879015095e-07, "loss": 0.2086, "step": 34852 }, { "epoch": 0.605833579585948, "grad_norm": 1.75187716291639, "learning_rate": 3.550350782967595e-07, "loss": 0.1737, "step": 34853 }, { "epoch": 0.6058509621234508, "grad_norm": 1.3149496668506075, "learning_rate": 3.550081382628402e-07, "loss": 0.2155, "step": 34854 }, { "epoch": 0.6058683446609536, "grad_norm": 1.1108408928467421, "learning_rate": 3.5498119868847857e-07, "loss": 0.1383, "step": 34855 }, { "epoch": 0.6058857271984565, "grad_norm": 2.340563478268617, "learning_rate": 3.5495425957375987e-07, "loss": 0.2136, "step": 34856 }, { "epoch": 0.6059031097359593, "grad_norm": 2.232412180435498, "learning_rate": 3.549273209187698e-07, "loss": 0.1736, "step": 34857 }, { "epoch": 0.6059204922734621, "grad_norm": 1.552901894560302, "learning_rate": 3.549003827235932e-07, "loss": 0.1566, "step": 34858 }, { "epoch": 0.605937874810965, "grad_norm": 0.9429749665490487, "learning_rate": 3.548734449883158e-07, "loss": 0.2056, "step": 34859 }, { "epoch": 0.6059552573484678, "grad_norm": 1.4660993448862316, "learning_rate": 3.548465077130228e-07, "loss": 0.1545, "step": 34860 }, { "epoch": 0.6059726398859705, "grad_norm": 1.9691815521173943, "learning_rate": 3.548195708977999e-07, "loss": 0.2124, "step": 34861 }, { "epoch": 0.6059900224234733, "grad_norm": 1.8735469465580354, "learning_rate": 3.5479263454273206e-07, "loss": 0.1569, "step": 34862 }, { "epoch": 0.6060074049609762, "grad_norm": 1.1597777850450008, "learning_rate": 3.5476569864790504e-07, "loss": 0.176, "step": 34863 }, { "epoch": 0.606024787498479, "grad_norm": 1.672513934889207, "learning_rate": 3.547387632134041e-07, "loss": 0.2191, "step": 34864 }, { "epoch": 0.6060421700359818, "grad_norm": 1.176836500600939, "learning_rate": 3.5471182823931433e-07, "loss": 0.0888, "step": 34865 }, { "epoch": 0.6060595525734847, "grad_norm": 1.8769450610091731, "learning_rate": 3.546848937257215e-07, "loss": 0.3145, "step": 34866 }, { "epoch": 0.6060769351109875, "grad_norm": 1.4168054231554967, "learning_rate": 3.5465795967271074e-07, "loss": 0.1607, "step": 34867 }, { "epoch": 0.6060943176484903, "grad_norm": 1.4492279200415992, "learning_rate": 3.546310260803674e-07, "loss": 0.161, "step": 34868 }, { "epoch": 0.6061117001859931, "grad_norm": 1.2313326210840336, "learning_rate": 3.5460409294877704e-07, "loss": 0.213, "step": 34869 }, { "epoch": 0.606129082723496, "grad_norm": 1.0856361372406567, "learning_rate": 3.5457716027802497e-07, "loss": 0.1918, "step": 34870 }, { "epoch": 0.6061464652609988, "grad_norm": 1.8837045159733452, "learning_rate": 3.5455022806819636e-07, "loss": 0.1501, "step": 34871 }, { "epoch": 0.6061638477985016, "grad_norm": 1.753420552653453, "learning_rate": 3.5452329631937676e-07, "loss": 0.1831, "step": 34872 }, { "epoch": 0.6061812303360045, "grad_norm": 1.0000745225349839, "learning_rate": 3.544963650316514e-07, "loss": 0.2361, "step": 34873 }, { "epoch": 0.6061986128735073, "grad_norm": 1.6751184547443905, "learning_rate": 3.544694342051059e-07, "loss": 0.2677, "step": 34874 }, { "epoch": 0.6062159954110101, "grad_norm": 1.140386087070976, "learning_rate": 3.5444250383982535e-07, "loss": 0.1708, "step": 34875 }, { "epoch": 0.606233377948513, "grad_norm": 1.0576786593117222, "learning_rate": 3.544155739358954e-07, "loss": 0.117, "step": 34876 }, { "epoch": 0.6062507604860158, "grad_norm": 1.6623682078642925, "learning_rate": 3.5438864449340086e-07, "loss": 0.2075, "step": 34877 }, { "epoch": 0.6062681430235186, "grad_norm": 1.4539891693680052, "learning_rate": 3.543617155124276e-07, "loss": 0.2691, "step": 34878 }, { "epoch": 0.6062855255610214, "grad_norm": 1.3516177058882226, "learning_rate": 3.5433478699306076e-07, "loss": 0.1637, "step": 34879 }, { "epoch": 0.6063029080985243, "grad_norm": 1.1555771130231105, "learning_rate": 3.5430785893538575e-07, "loss": 0.2135, "step": 34880 }, { "epoch": 0.606320290636027, "grad_norm": 1.23024569659298, "learning_rate": 3.542809313394878e-07, "loss": 0.1831, "step": 34881 }, { "epoch": 0.6063376731735298, "grad_norm": 0.9485712751206813, "learning_rate": 3.542540042054525e-07, "loss": 0.2327, "step": 34882 }, { "epoch": 0.6063550557110327, "grad_norm": 0.9393629381843909, "learning_rate": 3.542270775333651e-07, "loss": 0.1413, "step": 34883 }, { "epoch": 0.6063724382485355, "grad_norm": 2.3500449785589264, "learning_rate": 3.5420015132331094e-07, "loss": 0.1899, "step": 34884 }, { "epoch": 0.6063898207860383, "grad_norm": 2.3316416601815906, "learning_rate": 3.541732255753751e-07, "loss": 0.1853, "step": 34885 }, { "epoch": 0.6064072033235411, "grad_norm": 1.5424544927124577, "learning_rate": 3.5414630028964324e-07, "loss": 0.1793, "step": 34886 }, { "epoch": 0.606424585861044, "grad_norm": 1.7549967285464243, "learning_rate": 3.541193754662006e-07, "loss": 0.2493, "step": 34887 }, { "epoch": 0.6064419683985468, "grad_norm": 2.1465568032984743, "learning_rate": 3.540924511051325e-07, "loss": 0.1451, "step": 34888 }, { "epoch": 0.6064593509360496, "grad_norm": 1.8311923197883688, "learning_rate": 3.540655272065245e-07, "loss": 0.184, "step": 34889 }, { "epoch": 0.6064767334735525, "grad_norm": 1.0373473484672342, "learning_rate": 3.5403860377046167e-07, "loss": 0.1136, "step": 34890 }, { "epoch": 0.6064941160110553, "grad_norm": 0.9785727651524856, "learning_rate": 3.5401168079702924e-07, "loss": 0.1605, "step": 34891 }, { "epoch": 0.6065114985485581, "grad_norm": 1.9600176986414806, "learning_rate": 3.5398475828631286e-07, "loss": 0.2163, "step": 34892 }, { "epoch": 0.606528881086061, "grad_norm": 2.323194143887295, "learning_rate": 3.539578362383976e-07, "loss": 0.2897, "step": 34893 }, { "epoch": 0.6065462636235638, "grad_norm": 1.4224520105836216, "learning_rate": 3.53930914653369e-07, "loss": 0.1735, "step": 34894 }, { "epoch": 0.6065636461610666, "grad_norm": 1.970316840462545, "learning_rate": 3.5390399353131247e-07, "loss": 0.1544, "step": 34895 }, { "epoch": 0.6065810286985694, "grad_norm": 1.8688336913088885, "learning_rate": 3.538770728723129e-07, "loss": 0.1766, "step": 34896 }, { "epoch": 0.6065984112360723, "grad_norm": 2.044433510188138, "learning_rate": 3.53850152676456e-07, "loss": 0.1983, "step": 34897 }, { "epoch": 0.6066157937735751, "grad_norm": 1.7027350871491995, "learning_rate": 3.53823232943827e-07, "loss": 0.1663, "step": 34898 }, { "epoch": 0.6066331763110779, "grad_norm": 2.7261727992556244, "learning_rate": 3.537963136745111e-07, "loss": 0.2164, "step": 34899 }, { "epoch": 0.6066505588485808, "grad_norm": 1.4243588598341284, "learning_rate": 3.537693948685938e-07, "loss": 0.1442, "step": 34900 }, { "epoch": 0.6066679413860835, "grad_norm": 1.7154016389779443, "learning_rate": 3.5374247652616027e-07, "loss": 0.1928, "step": 34901 }, { "epoch": 0.6066853239235863, "grad_norm": 2.082109741334344, "learning_rate": 3.537155586472961e-07, "loss": 0.159, "step": 34902 }, { "epoch": 0.6067027064610891, "grad_norm": 1.98134226444624, "learning_rate": 3.536886412320863e-07, "loss": 0.2173, "step": 34903 }, { "epoch": 0.606720088998592, "grad_norm": 2.0724549051852055, "learning_rate": 3.5366172428061626e-07, "loss": 0.3279, "step": 34904 }, { "epoch": 0.6067374715360948, "grad_norm": 1.1253547423554613, "learning_rate": 3.536348077929714e-07, "loss": 0.117, "step": 34905 }, { "epoch": 0.6067548540735976, "grad_norm": 1.492219338518727, "learning_rate": 3.5360789176923676e-07, "loss": 0.1569, "step": 34906 }, { "epoch": 0.6067722366111005, "grad_norm": 1.6093578374496398, "learning_rate": 3.535809762094981e-07, "loss": 0.1522, "step": 34907 }, { "epoch": 0.6067896191486033, "grad_norm": 2.7959986878819527, "learning_rate": 3.5355406111384054e-07, "loss": 0.1828, "step": 34908 }, { "epoch": 0.6068070016861061, "grad_norm": 1.16634946255772, "learning_rate": 3.535271464823492e-07, "loss": 0.1011, "step": 34909 }, { "epoch": 0.606824384223609, "grad_norm": 2.961816889339084, "learning_rate": 3.5350023231510947e-07, "loss": 0.2444, "step": 34910 }, { "epoch": 0.6068417667611118, "grad_norm": 1.1373225859351632, "learning_rate": 3.5347331861220674e-07, "loss": 0.1979, "step": 34911 }, { "epoch": 0.6068591492986146, "grad_norm": 1.5572180848108192, "learning_rate": 3.534464053737262e-07, "loss": 0.2373, "step": 34912 }, { "epoch": 0.6068765318361174, "grad_norm": 1.6652503469911772, "learning_rate": 3.534194925997534e-07, "loss": 0.2331, "step": 34913 }, { "epoch": 0.6068939143736203, "grad_norm": 1.5702938132713706, "learning_rate": 3.5339258029037354e-07, "loss": 0.1933, "step": 34914 }, { "epoch": 0.6069112969111231, "grad_norm": 1.1684996842080873, "learning_rate": 3.5336566844567173e-07, "loss": 0.121, "step": 34915 }, { "epoch": 0.6069286794486259, "grad_norm": 1.413667333987907, "learning_rate": 3.533387570657333e-07, "loss": 0.2431, "step": 34916 }, { "epoch": 0.6069460619861288, "grad_norm": 2.1238165917260345, "learning_rate": 3.5331184615064376e-07, "loss": 0.2084, "step": 34917 }, { "epoch": 0.6069634445236316, "grad_norm": 1.7566854835584262, "learning_rate": 3.532849357004881e-07, "loss": 0.2135, "step": 34918 }, { "epoch": 0.6069808270611344, "grad_norm": 2.2932403982307354, "learning_rate": 3.53258025715352e-07, "loss": 0.238, "step": 34919 }, { "epoch": 0.6069982095986373, "grad_norm": 2.658240822802502, "learning_rate": 3.532311161953204e-07, "loss": 0.1642, "step": 34920 }, { "epoch": 0.60701559213614, "grad_norm": 2.178351957090843, "learning_rate": 3.5320420714047906e-07, "loss": 0.166, "step": 34921 }, { "epoch": 0.6070329746736428, "grad_norm": 1.1040618462278973, "learning_rate": 3.531772985509127e-07, "loss": 0.1885, "step": 34922 }, { "epoch": 0.6070503572111456, "grad_norm": 1.9195509281882748, "learning_rate": 3.531503904267069e-07, "loss": 0.1754, "step": 34923 }, { "epoch": 0.6070677397486485, "grad_norm": 1.5231492794471497, "learning_rate": 3.5312348276794686e-07, "loss": 0.0975, "step": 34924 }, { "epoch": 0.6070851222861513, "grad_norm": 1.4962450912464145, "learning_rate": 3.5309657557471796e-07, "loss": 0.1943, "step": 34925 }, { "epoch": 0.6071025048236541, "grad_norm": 2.148617235800195, "learning_rate": 3.530696688471053e-07, "loss": 0.1814, "step": 34926 }, { "epoch": 0.607119887361157, "grad_norm": 0.9142188937466929, "learning_rate": 3.530427625851946e-07, "loss": 0.1519, "step": 34927 }, { "epoch": 0.6071372698986598, "grad_norm": 1.5757584093027683, "learning_rate": 3.5301585678907066e-07, "loss": 0.1696, "step": 34928 }, { "epoch": 0.6071546524361626, "grad_norm": 1.624632999148396, "learning_rate": 3.529889514588188e-07, "loss": 0.1222, "step": 34929 }, { "epoch": 0.6071720349736655, "grad_norm": 1.79615074646781, "learning_rate": 3.529620465945246e-07, "loss": 0.1965, "step": 34930 }, { "epoch": 0.6071894175111683, "grad_norm": 1.944832692354182, "learning_rate": 3.529351421962732e-07, "loss": 0.2377, "step": 34931 }, { "epoch": 0.6072068000486711, "grad_norm": 2.9132861036768296, "learning_rate": 3.529082382641496e-07, "loss": 0.2468, "step": 34932 }, { "epoch": 0.6072241825861739, "grad_norm": 1.8354698775437934, "learning_rate": 3.5288133479823966e-07, "loss": 0.1507, "step": 34933 }, { "epoch": 0.6072415651236768, "grad_norm": 1.6569629967934782, "learning_rate": 3.5285443179862816e-07, "loss": 0.1549, "step": 34934 }, { "epoch": 0.6072589476611796, "grad_norm": 2.2810236289523655, "learning_rate": 3.5282752926540036e-07, "loss": 0.3003, "step": 34935 }, { "epoch": 0.6072763301986824, "grad_norm": 2.3303114825576303, "learning_rate": 3.528006271986419e-07, "loss": 0.2209, "step": 34936 }, { "epoch": 0.6072937127361853, "grad_norm": 1.072665448330112, "learning_rate": 3.5277372559843763e-07, "loss": 0.1974, "step": 34937 }, { "epoch": 0.6073110952736881, "grad_norm": 1.5492806703222601, "learning_rate": 3.527468244648732e-07, "loss": 0.1682, "step": 34938 }, { "epoch": 0.6073284778111909, "grad_norm": 1.4603226120223733, "learning_rate": 3.527199237980337e-07, "loss": 0.1472, "step": 34939 }, { "epoch": 0.6073458603486938, "grad_norm": 2.381415927183717, "learning_rate": 3.526930235980044e-07, "loss": 0.2314, "step": 34940 }, { "epoch": 0.6073632428861965, "grad_norm": 2.0911147147558533, "learning_rate": 3.5266612386487036e-07, "loss": 0.2335, "step": 34941 }, { "epoch": 0.6073806254236993, "grad_norm": 1.3228584278032902, "learning_rate": 3.526392245987172e-07, "loss": 0.1232, "step": 34942 }, { "epoch": 0.6073980079612021, "grad_norm": 2.0413744300710905, "learning_rate": 3.5261232579962986e-07, "loss": 0.2245, "step": 34943 }, { "epoch": 0.607415390498705, "grad_norm": 1.2652559993008272, "learning_rate": 3.5258542746769384e-07, "loss": 0.1323, "step": 34944 }, { "epoch": 0.6074327730362078, "grad_norm": 1.4932967907943906, "learning_rate": 3.525585296029942e-07, "loss": 0.1266, "step": 34945 }, { "epoch": 0.6074501555737106, "grad_norm": 2.831887032926214, "learning_rate": 3.525316322056166e-07, "loss": 0.2754, "step": 34946 }, { "epoch": 0.6074675381112135, "grad_norm": 2.1076818720082753, "learning_rate": 3.5250473527564573e-07, "loss": 0.1789, "step": 34947 }, { "epoch": 0.6074849206487163, "grad_norm": 2.9888152030972868, "learning_rate": 3.524778388131671e-07, "loss": 0.2561, "step": 34948 }, { "epoch": 0.6075023031862191, "grad_norm": 1.1689242789665435, "learning_rate": 3.5245094281826595e-07, "loss": 0.1467, "step": 34949 }, { "epoch": 0.607519685723722, "grad_norm": 1.9654328680108415, "learning_rate": 3.5242404729102754e-07, "loss": 0.1915, "step": 34950 }, { "epoch": 0.6075370682612248, "grad_norm": 1.377880701009096, "learning_rate": 3.5239715223153704e-07, "loss": 0.1915, "step": 34951 }, { "epoch": 0.6075544507987276, "grad_norm": 1.8495154120203205, "learning_rate": 3.5237025763988e-07, "loss": 0.1246, "step": 34952 }, { "epoch": 0.6075718333362304, "grad_norm": 2.367881191270058, "learning_rate": 3.523433635161413e-07, "loss": 0.2032, "step": 34953 }, { "epoch": 0.6075892158737333, "grad_norm": 1.5580835885850262, "learning_rate": 3.523164698604062e-07, "loss": 0.2442, "step": 34954 }, { "epoch": 0.6076065984112361, "grad_norm": 1.3184104553433562, "learning_rate": 3.5228957667276014e-07, "loss": 0.1947, "step": 34955 }, { "epoch": 0.6076239809487389, "grad_norm": 1.2145094611238774, "learning_rate": 3.5226268395328836e-07, "loss": 0.127, "step": 34956 }, { "epoch": 0.6076413634862418, "grad_norm": 2.153349675226009, "learning_rate": 3.5223579170207587e-07, "loss": 0.2051, "step": 34957 }, { "epoch": 0.6076587460237446, "grad_norm": 1.665197958170593, "learning_rate": 3.5220889991920823e-07, "loss": 0.1888, "step": 34958 }, { "epoch": 0.6076761285612474, "grad_norm": 1.2665221537325595, "learning_rate": 3.521820086047703e-07, "loss": 0.1445, "step": 34959 }, { "epoch": 0.6076935110987502, "grad_norm": 1.639651485771098, "learning_rate": 3.5215511775884747e-07, "loss": 0.2074, "step": 34960 }, { "epoch": 0.607710893636253, "grad_norm": 2.2309706686400177, "learning_rate": 3.521282273815251e-07, "loss": 0.1848, "step": 34961 }, { "epoch": 0.6077282761737558, "grad_norm": 1.3800862518116026, "learning_rate": 3.521013374728883e-07, "loss": 0.1663, "step": 34962 }, { "epoch": 0.6077456587112586, "grad_norm": 1.3690999014351257, "learning_rate": 3.520744480330223e-07, "loss": 0.1714, "step": 34963 }, { "epoch": 0.6077630412487615, "grad_norm": 0.9918323456543479, "learning_rate": 3.5204755906201247e-07, "loss": 0.1516, "step": 34964 }, { "epoch": 0.6077804237862643, "grad_norm": 1.7929709438768893, "learning_rate": 3.520206705599439e-07, "loss": 0.2273, "step": 34965 }, { "epoch": 0.6077978063237671, "grad_norm": 1.0633144259993594, "learning_rate": 3.5199378252690163e-07, "loss": 0.176, "step": 34966 }, { "epoch": 0.60781518886127, "grad_norm": 1.6042897896803445, "learning_rate": 3.519668949629713e-07, "loss": 0.1618, "step": 34967 }, { "epoch": 0.6078325713987728, "grad_norm": 1.140634837969076, "learning_rate": 3.5194000786823766e-07, "loss": 0.1853, "step": 34968 }, { "epoch": 0.6078499539362756, "grad_norm": 1.0002488125071056, "learning_rate": 3.519131212427864e-07, "loss": 0.2115, "step": 34969 }, { "epoch": 0.6078673364737784, "grad_norm": 2.789866244552484, "learning_rate": 3.5188623508670236e-07, "loss": 0.1319, "step": 34970 }, { "epoch": 0.6078847190112813, "grad_norm": 1.596381735213735, "learning_rate": 3.5185934940007123e-07, "loss": 0.2202, "step": 34971 }, { "epoch": 0.6079021015487841, "grad_norm": 3.161540713678841, "learning_rate": 3.5183246418297763e-07, "loss": 0.2859, "step": 34972 }, { "epoch": 0.6079194840862869, "grad_norm": 1.3484445020211293, "learning_rate": 3.5180557943550715e-07, "loss": 0.1782, "step": 34973 }, { "epoch": 0.6079368666237898, "grad_norm": 1.5903964143034437, "learning_rate": 3.5177869515774477e-07, "loss": 0.1354, "step": 34974 }, { "epoch": 0.6079542491612926, "grad_norm": 1.5419652052601212, "learning_rate": 3.5175181134977597e-07, "loss": 0.1757, "step": 34975 }, { "epoch": 0.6079716316987954, "grad_norm": 1.7023266944593467, "learning_rate": 3.5172492801168573e-07, "loss": 0.145, "step": 34976 }, { "epoch": 0.6079890142362983, "grad_norm": 2.0866424740988814, "learning_rate": 3.5169804514355955e-07, "loss": 0.1437, "step": 34977 }, { "epoch": 0.6080063967738011, "grad_norm": 1.7598078652774578, "learning_rate": 3.516711627454824e-07, "loss": 0.2805, "step": 34978 }, { "epoch": 0.6080237793113039, "grad_norm": 2.154416134888649, "learning_rate": 3.516442808175394e-07, "loss": 0.3311, "step": 34979 }, { "epoch": 0.6080411618488067, "grad_norm": 1.88966008167268, "learning_rate": 3.516173993598159e-07, "loss": 0.1995, "step": 34980 }, { "epoch": 0.6080585443863095, "grad_norm": 1.956835491588591, "learning_rate": 3.515905183723971e-07, "loss": 0.2215, "step": 34981 }, { "epoch": 0.6080759269238123, "grad_norm": 1.515794888418042, "learning_rate": 3.5156363785536814e-07, "loss": 0.2949, "step": 34982 }, { "epoch": 0.6080933094613151, "grad_norm": 1.6920370757079561, "learning_rate": 3.515367578088143e-07, "loss": 0.2644, "step": 34983 }, { "epoch": 0.608110691998818, "grad_norm": 1.9438028762380983, "learning_rate": 3.5150987823282096e-07, "loss": 0.1927, "step": 34984 }, { "epoch": 0.6081280745363208, "grad_norm": 1.3737713612066798, "learning_rate": 3.514829991274728e-07, "loss": 0.1766, "step": 34985 }, { "epoch": 0.6081454570738236, "grad_norm": 3.283773375773202, "learning_rate": 3.5145612049285544e-07, "loss": 0.4179, "step": 34986 }, { "epoch": 0.6081628396113264, "grad_norm": 1.6616367179669076, "learning_rate": 3.514292423290539e-07, "loss": 0.2258, "step": 34987 }, { "epoch": 0.6081802221488293, "grad_norm": 1.5731562901281517, "learning_rate": 3.514023646361534e-07, "loss": 0.1714, "step": 34988 }, { "epoch": 0.6081976046863321, "grad_norm": 3.0733562467334052, "learning_rate": 3.5137548741423913e-07, "loss": 0.2312, "step": 34989 }, { "epoch": 0.6082149872238349, "grad_norm": 1.1692584595252384, "learning_rate": 3.5134861066339644e-07, "loss": 0.1507, "step": 34990 }, { "epoch": 0.6082323697613378, "grad_norm": 0.9661939491423901, "learning_rate": 3.5132173438371015e-07, "loss": 0.1718, "step": 34991 }, { "epoch": 0.6082497522988406, "grad_norm": 0.8857107072100656, "learning_rate": 3.512948585752659e-07, "loss": 0.1955, "step": 34992 }, { "epoch": 0.6082671348363434, "grad_norm": 1.364174115414806, "learning_rate": 3.512679832381483e-07, "loss": 0.1875, "step": 34993 }, { "epoch": 0.6082845173738463, "grad_norm": 1.8257441584609824, "learning_rate": 3.5124110837244315e-07, "loss": 0.2114, "step": 34994 }, { "epoch": 0.6083018999113491, "grad_norm": 1.6298855543569135, "learning_rate": 3.5121423397823526e-07, "loss": 0.2084, "step": 34995 }, { "epoch": 0.6083192824488519, "grad_norm": 1.7901301179500002, "learning_rate": 3.511873600556101e-07, "loss": 0.192, "step": 34996 }, { "epoch": 0.6083366649863547, "grad_norm": 1.5527827333690396, "learning_rate": 3.5116048660465234e-07, "loss": 0.2686, "step": 34997 }, { "epoch": 0.6083540475238576, "grad_norm": 1.031754063101071, "learning_rate": 3.5113361362544767e-07, "loss": 0.2079, "step": 34998 }, { "epoch": 0.6083714300613604, "grad_norm": 1.054740610197348, "learning_rate": 3.511067411180809e-07, "loss": 0.1292, "step": 34999 }, { "epoch": 0.6083888125988631, "grad_norm": 1.1552608145351604, "learning_rate": 3.5107986908263753e-07, "loss": 0.1657, "step": 35000 }, { "epoch": 0.608406195136366, "grad_norm": 1.2745847808695858, "learning_rate": 3.510529975192024e-07, "loss": 0.2156, "step": 35001 }, { "epoch": 0.6084235776738688, "grad_norm": 2.7099791976002807, "learning_rate": 3.510261264278609e-07, "loss": 0.2371, "step": 35002 }, { "epoch": 0.6084409602113716, "grad_norm": 3.878973832235368, "learning_rate": 3.5099925580869836e-07, "loss": 0.2584, "step": 35003 }, { "epoch": 0.6084583427488744, "grad_norm": 2.022761253490245, "learning_rate": 3.509723856617996e-07, "loss": 0.2027, "step": 35004 }, { "epoch": 0.6084757252863773, "grad_norm": 1.8304316048063918, "learning_rate": 3.5094551598724976e-07, "loss": 0.152, "step": 35005 }, { "epoch": 0.6084931078238801, "grad_norm": 1.215909424437847, "learning_rate": 3.5091864678513435e-07, "loss": 0.1374, "step": 35006 }, { "epoch": 0.6085104903613829, "grad_norm": 1.1645465187319628, "learning_rate": 3.508917780555382e-07, "loss": 0.2102, "step": 35007 }, { "epoch": 0.6085278728988858, "grad_norm": 1.3106910818658317, "learning_rate": 3.508649097985467e-07, "loss": 0.1429, "step": 35008 }, { "epoch": 0.6085452554363886, "grad_norm": 1.459765184865781, "learning_rate": 3.5083804201424506e-07, "loss": 0.2028, "step": 35009 }, { "epoch": 0.6085626379738914, "grad_norm": 1.3354396591420596, "learning_rate": 3.508111747027181e-07, "loss": 0.1814, "step": 35010 }, { "epoch": 0.6085800205113943, "grad_norm": 1.3032689566309776, "learning_rate": 3.507843078640513e-07, "loss": 0.1297, "step": 35011 }, { "epoch": 0.6085974030488971, "grad_norm": 1.441028404571546, "learning_rate": 3.5075744149832963e-07, "loss": 0.1835, "step": 35012 }, { "epoch": 0.6086147855863999, "grad_norm": 1.352004575052215, "learning_rate": 3.507305756056383e-07, "loss": 0.1395, "step": 35013 }, { "epoch": 0.6086321681239028, "grad_norm": 1.2216445558965157, "learning_rate": 3.5070371018606253e-07, "loss": 0.1163, "step": 35014 }, { "epoch": 0.6086495506614056, "grad_norm": 1.956396108839872, "learning_rate": 3.5067684523968756e-07, "loss": 0.2125, "step": 35015 }, { "epoch": 0.6086669331989084, "grad_norm": 0.7451141934962074, "learning_rate": 3.5064998076659813e-07, "loss": 0.1012, "step": 35016 }, { "epoch": 0.6086843157364112, "grad_norm": 1.6443525342696022, "learning_rate": 3.5062311676687983e-07, "loss": 0.1702, "step": 35017 }, { "epoch": 0.6087016982739141, "grad_norm": 2.459597868429438, "learning_rate": 3.505962532406175e-07, "loss": 0.2618, "step": 35018 }, { "epoch": 0.6087190808114169, "grad_norm": 1.3500575019409329, "learning_rate": 3.505693901878965e-07, "loss": 0.1658, "step": 35019 }, { "epoch": 0.6087364633489196, "grad_norm": 1.7818587556265573, "learning_rate": 3.505425276088019e-07, "loss": 0.1712, "step": 35020 }, { "epoch": 0.6087538458864225, "grad_norm": 1.6654616894011263, "learning_rate": 3.505156655034187e-07, "loss": 0.177, "step": 35021 }, { "epoch": 0.6087712284239253, "grad_norm": 1.0960198610065688, "learning_rate": 3.5048880387183244e-07, "loss": 0.1272, "step": 35022 }, { "epoch": 0.6087886109614281, "grad_norm": 1.9262218684632781, "learning_rate": 3.504619427141279e-07, "loss": 0.1341, "step": 35023 }, { "epoch": 0.6088059934989309, "grad_norm": 1.436649195821593, "learning_rate": 3.504350820303901e-07, "loss": 0.2259, "step": 35024 }, { "epoch": 0.6088233760364338, "grad_norm": 1.6678626642467298, "learning_rate": 3.504082218207046e-07, "loss": 0.1824, "step": 35025 }, { "epoch": 0.6088407585739366, "grad_norm": 1.6154003338566159, "learning_rate": 3.5038136208515615e-07, "loss": 0.1127, "step": 35026 }, { "epoch": 0.6088581411114394, "grad_norm": 1.990804429365316, "learning_rate": 3.5035450282383016e-07, "loss": 0.2382, "step": 35027 }, { "epoch": 0.6088755236489423, "grad_norm": 1.253534603274823, "learning_rate": 3.503276440368118e-07, "loss": 0.1227, "step": 35028 }, { "epoch": 0.6088929061864451, "grad_norm": 1.7361238039796016, "learning_rate": 3.5030078572418595e-07, "loss": 0.1583, "step": 35029 }, { "epoch": 0.6089102887239479, "grad_norm": 4.0735739546047105, "learning_rate": 3.5027392788603773e-07, "loss": 0.2169, "step": 35030 }, { "epoch": 0.6089276712614508, "grad_norm": 1.4709124838219718, "learning_rate": 3.502470705224524e-07, "loss": 0.1291, "step": 35031 }, { "epoch": 0.6089450537989536, "grad_norm": 1.4464828821337439, "learning_rate": 3.5022021363351505e-07, "loss": 0.2825, "step": 35032 }, { "epoch": 0.6089624363364564, "grad_norm": 1.4132557195740882, "learning_rate": 3.501933572193109e-07, "loss": 0.1812, "step": 35033 }, { "epoch": 0.6089798188739592, "grad_norm": 2.12924354422949, "learning_rate": 3.5016650127992515e-07, "loss": 0.1841, "step": 35034 }, { "epoch": 0.6089972014114621, "grad_norm": 1.5818754032470042, "learning_rate": 3.501396458154426e-07, "loss": 0.2209, "step": 35035 }, { "epoch": 0.6090145839489649, "grad_norm": 1.135089177769741, "learning_rate": 3.5011279082594846e-07, "loss": 0.1578, "step": 35036 }, { "epoch": 0.6090319664864677, "grad_norm": 1.807970218973822, "learning_rate": 3.5008593631152793e-07, "loss": 0.2292, "step": 35037 }, { "epoch": 0.6090493490239706, "grad_norm": 0.9623644353165112, "learning_rate": 3.500590822722661e-07, "loss": 0.174, "step": 35038 }, { "epoch": 0.6090667315614734, "grad_norm": 1.185644377881776, "learning_rate": 3.5003222870824824e-07, "loss": 0.1424, "step": 35039 }, { "epoch": 0.6090841140989761, "grad_norm": 1.9592999083509774, "learning_rate": 3.500053756195592e-07, "loss": 0.2101, "step": 35040 }, { "epoch": 0.609101496636479, "grad_norm": 1.5440921168165815, "learning_rate": 3.499785230062844e-07, "loss": 0.1342, "step": 35041 }, { "epoch": 0.6091188791739818, "grad_norm": 2.118735387795186, "learning_rate": 3.499516708685086e-07, "loss": 0.2082, "step": 35042 }, { "epoch": 0.6091362617114846, "grad_norm": 1.7659131827347125, "learning_rate": 3.4992481920631717e-07, "loss": 0.417, "step": 35043 }, { "epoch": 0.6091536442489874, "grad_norm": 1.6040027738202334, "learning_rate": 3.4989796801979497e-07, "loss": 0.221, "step": 35044 }, { "epoch": 0.6091710267864903, "grad_norm": 1.268461451077414, "learning_rate": 3.498711173090274e-07, "loss": 0.1501, "step": 35045 }, { "epoch": 0.6091884093239931, "grad_norm": 1.4229211241138766, "learning_rate": 3.498442670740993e-07, "loss": 0.1318, "step": 35046 }, { "epoch": 0.6092057918614959, "grad_norm": 0.6846680723520202, "learning_rate": 3.4981741731509614e-07, "loss": 0.0921, "step": 35047 }, { "epoch": 0.6092231743989988, "grad_norm": 1.5677138778673085, "learning_rate": 3.497905680321026e-07, "loss": 0.1829, "step": 35048 }, { "epoch": 0.6092405569365016, "grad_norm": 1.7096958174405714, "learning_rate": 3.497637192252039e-07, "loss": 0.1977, "step": 35049 }, { "epoch": 0.6092579394740044, "grad_norm": 2.5532932224787714, "learning_rate": 3.4973687089448536e-07, "loss": 0.1646, "step": 35050 }, { "epoch": 0.6092753220115072, "grad_norm": 0.9500077965173158, "learning_rate": 3.4971002304003173e-07, "loss": 0.2472, "step": 35051 }, { "epoch": 0.6092927045490101, "grad_norm": 1.2805623837525606, "learning_rate": 3.496831756619284e-07, "loss": 0.143, "step": 35052 }, { "epoch": 0.6093100870865129, "grad_norm": 1.5712177459257097, "learning_rate": 3.4965632876026045e-07, "loss": 0.0987, "step": 35053 }, { "epoch": 0.6093274696240157, "grad_norm": 1.3257846101796424, "learning_rate": 3.496294823351128e-07, "loss": 0.1645, "step": 35054 }, { "epoch": 0.6093448521615186, "grad_norm": 1.225231815398656, "learning_rate": 3.496026363865705e-07, "loss": 0.1405, "step": 35055 }, { "epoch": 0.6093622346990214, "grad_norm": 2.1448986596449453, "learning_rate": 3.495757909147189e-07, "loss": 0.322, "step": 35056 }, { "epoch": 0.6093796172365242, "grad_norm": 1.1390887577805695, "learning_rate": 3.4954894591964277e-07, "loss": 0.1788, "step": 35057 }, { "epoch": 0.6093969997740271, "grad_norm": 1.2236570644378415, "learning_rate": 3.4952210140142757e-07, "loss": 0.1743, "step": 35058 }, { "epoch": 0.6094143823115299, "grad_norm": 1.4703066342008209, "learning_rate": 3.4949525736015814e-07, "loss": 0.2244, "step": 35059 }, { "epoch": 0.6094317648490326, "grad_norm": 1.4818049813706842, "learning_rate": 3.494684137959196e-07, "loss": 0.207, "step": 35060 }, { "epoch": 0.6094491473865354, "grad_norm": 1.6006523908935224, "learning_rate": 3.49441570708797e-07, "loss": 0.1178, "step": 35061 }, { "epoch": 0.6094665299240383, "grad_norm": 1.243162283295597, "learning_rate": 3.494147280988755e-07, "loss": 0.1529, "step": 35062 }, { "epoch": 0.6094839124615411, "grad_norm": 1.5544569714041558, "learning_rate": 3.4938788596624e-07, "loss": 0.297, "step": 35063 }, { "epoch": 0.6095012949990439, "grad_norm": 3.1797748071815892, "learning_rate": 3.4936104431097584e-07, "loss": 0.1921, "step": 35064 }, { "epoch": 0.6095186775365468, "grad_norm": 2.4604768219857696, "learning_rate": 3.4933420313316784e-07, "loss": 0.1619, "step": 35065 }, { "epoch": 0.6095360600740496, "grad_norm": 1.8993921289300293, "learning_rate": 3.493073624329015e-07, "loss": 0.2127, "step": 35066 }, { "epoch": 0.6095534426115524, "grad_norm": 0.9454347767476684, "learning_rate": 3.492805222102614e-07, "loss": 0.1431, "step": 35067 }, { "epoch": 0.6095708251490553, "grad_norm": 1.4086429436422314, "learning_rate": 3.492536824653329e-07, "loss": 0.2045, "step": 35068 }, { "epoch": 0.6095882076865581, "grad_norm": 2.320897215265382, "learning_rate": 3.4922684319820083e-07, "loss": 0.1781, "step": 35069 }, { "epoch": 0.6096055902240609, "grad_norm": 1.8837885155908782, "learning_rate": 3.4920000440895046e-07, "loss": 0.1421, "step": 35070 }, { "epoch": 0.6096229727615637, "grad_norm": 1.4782032957680684, "learning_rate": 3.4917316609766676e-07, "loss": 0.1336, "step": 35071 }, { "epoch": 0.6096403552990666, "grad_norm": 2.2235415704730075, "learning_rate": 3.49146328264435e-07, "loss": 0.1628, "step": 35072 }, { "epoch": 0.6096577378365694, "grad_norm": 1.1774395185237612, "learning_rate": 3.4911949090934005e-07, "loss": 0.1556, "step": 35073 }, { "epoch": 0.6096751203740722, "grad_norm": 1.5932972518105073, "learning_rate": 3.490926540324669e-07, "loss": 0.2313, "step": 35074 }, { "epoch": 0.6096925029115751, "grad_norm": 2.524113325712477, "learning_rate": 3.490658176339007e-07, "loss": 0.1751, "step": 35075 }, { "epoch": 0.6097098854490779, "grad_norm": 1.2173028273513256, "learning_rate": 3.490389817137267e-07, "loss": 0.1547, "step": 35076 }, { "epoch": 0.6097272679865807, "grad_norm": 1.0069039505383284, "learning_rate": 3.490121462720296e-07, "loss": 0.22, "step": 35077 }, { "epoch": 0.6097446505240836, "grad_norm": 1.4766426833606414, "learning_rate": 3.489853113088948e-07, "loss": 0.1944, "step": 35078 }, { "epoch": 0.6097620330615864, "grad_norm": 1.3297054995485234, "learning_rate": 3.4895847682440716e-07, "loss": 0.2358, "step": 35079 }, { "epoch": 0.6097794155990891, "grad_norm": 2.5883041767004764, "learning_rate": 3.4893164281865164e-07, "loss": 0.2006, "step": 35080 }, { "epoch": 0.6097967981365919, "grad_norm": 1.5098961921188128, "learning_rate": 3.4890480929171347e-07, "loss": 0.1555, "step": 35081 }, { "epoch": 0.6098141806740948, "grad_norm": 4.2072843576880965, "learning_rate": 3.4887797624367756e-07, "loss": 0.1404, "step": 35082 }, { "epoch": 0.6098315632115976, "grad_norm": 1.6616565523878577, "learning_rate": 3.488511436746292e-07, "loss": 0.193, "step": 35083 }, { "epoch": 0.6098489457491004, "grad_norm": 1.3462904158361406, "learning_rate": 3.488243115846532e-07, "loss": 0.1996, "step": 35084 }, { "epoch": 0.6098663282866033, "grad_norm": 1.4661616625523184, "learning_rate": 3.487974799738349e-07, "loss": 0.1204, "step": 35085 }, { "epoch": 0.6098837108241061, "grad_norm": 2.9226163546862702, "learning_rate": 3.4877064884225884e-07, "loss": 0.2072, "step": 35086 }, { "epoch": 0.6099010933616089, "grad_norm": 0.8926751992815727, "learning_rate": 3.487438181900105e-07, "loss": 0.1734, "step": 35087 }, { "epoch": 0.6099184758991117, "grad_norm": 2.0241247611801105, "learning_rate": 3.4871698801717465e-07, "loss": 0.3187, "step": 35088 }, { "epoch": 0.6099358584366146, "grad_norm": 1.0445155318594994, "learning_rate": 3.4869015832383655e-07, "loss": 0.1839, "step": 35089 }, { "epoch": 0.6099532409741174, "grad_norm": 2.0850483834629063, "learning_rate": 3.4866332911008104e-07, "loss": 0.2519, "step": 35090 }, { "epoch": 0.6099706235116202, "grad_norm": 1.4179392092059582, "learning_rate": 3.4863650037599357e-07, "loss": 0.1675, "step": 35091 }, { "epoch": 0.6099880060491231, "grad_norm": 1.0441989084437817, "learning_rate": 3.4860967212165857e-07, "loss": 0.1598, "step": 35092 }, { "epoch": 0.6100053885866259, "grad_norm": 1.5029247220216664, "learning_rate": 3.4858284434716144e-07, "loss": 0.1523, "step": 35093 }, { "epoch": 0.6100227711241287, "grad_norm": 1.9315385433070802, "learning_rate": 3.485560170525871e-07, "loss": 0.1198, "step": 35094 }, { "epoch": 0.6100401536616316, "grad_norm": 1.982637424733659, "learning_rate": 3.485291902380207e-07, "loss": 0.2398, "step": 35095 }, { "epoch": 0.6100575361991344, "grad_norm": 2.4903700340968298, "learning_rate": 3.48502363903547e-07, "loss": 0.2639, "step": 35096 }, { "epoch": 0.6100749187366372, "grad_norm": 2.0035361937072875, "learning_rate": 3.4847553804925155e-07, "loss": 0.2111, "step": 35097 }, { "epoch": 0.61009230127414, "grad_norm": 1.349578612421166, "learning_rate": 3.4844871267521884e-07, "loss": 0.1377, "step": 35098 }, { "epoch": 0.6101096838116429, "grad_norm": 1.2179678034588899, "learning_rate": 3.48421887781534e-07, "loss": 0.2247, "step": 35099 }, { "epoch": 0.6101270663491456, "grad_norm": 0.8760496335095042, "learning_rate": 3.4839506336828226e-07, "loss": 0.1433, "step": 35100 }, { "epoch": 0.6101444488866484, "grad_norm": 0.996965970868706, "learning_rate": 3.483682394355486e-07, "loss": 0.1242, "step": 35101 }, { "epoch": 0.6101618314241513, "grad_norm": 1.4150056027168718, "learning_rate": 3.483414159834178e-07, "loss": 0.2433, "step": 35102 }, { "epoch": 0.6101792139616541, "grad_norm": 1.254094151423158, "learning_rate": 3.483145930119752e-07, "loss": 0.124, "step": 35103 }, { "epoch": 0.6101965964991569, "grad_norm": 1.937633933057823, "learning_rate": 3.4828777052130565e-07, "loss": 0.2371, "step": 35104 }, { "epoch": 0.6102139790366597, "grad_norm": 1.903346167950928, "learning_rate": 3.48260948511494e-07, "loss": 0.2428, "step": 35105 }, { "epoch": 0.6102313615741626, "grad_norm": 1.485376346671664, "learning_rate": 3.482341269826257e-07, "loss": 0.1892, "step": 35106 }, { "epoch": 0.6102487441116654, "grad_norm": 1.4289012503010137, "learning_rate": 3.4820730593478523e-07, "loss": 0.1434, "step": 35107 }, { "epoch": 0.6102661266491682, "grad_norm": 1.6620893911721948, "learning_rate": 3.48180485368058e-07, "loss": 0.1311, "step": 35108 }, { "epoch": 0.6102835091866711, "grad_norm": 1.6865042811809663, "learning_rate": 3.481536652825289e-07, "loss": 0.2046, "step": 35109 }, { "epoch": 0.6103008917241739, "grad_norm": 1.3983791023857048, "learning_rate": 3.481268456782831e-07, "loss": 0.1707, "step": 35110 }, { "epoch": 0.6103182742616767, "grad_norm": 2.362768441647932, "learning_rate": 3.4810002655540516e-07, "loss": 0.233, "step": 35111 }, { "epoch": 0.6103356567991796, "grad_norm": 1.3356203563844158, "learning_rate": 3.480732079139804e-07, "loss": 0.1735, "step": 35112 }, { "epoch": 0.6103530393366824, "grad_norm": 1.03969420676504, "learning_rate": 3.480463897540938e-07, "loss": 0.1337, "step": 35113 }, { "epoch": 0.6103704218741852, "grad_norm": 1.6706782866481986, "learning_rate": 3.480195720758304e-07, "loss": 0.1459, "step": 35114 }, { "epoch": 0.610387804411688, "grad_norm": 1.6174042251225542, "learning_rate": 3.4799275487927506e-07, "loss": 0.1796, "step": 35115 }, { "epoch": 0.6104051869491909, "grad_norm": 1.5298341706136753, "learning_rate": 3.479659381645131e-07, "loss": 0.1818, "step": 35116 }, { "epoch": 0.6104225694866937, "grad_norm": 2.5658321146930536, "learning_rate": 3.4793912193162906e-07, "loss": 0.1658, "step": 35117 }, { "epoch": 0.6104399520241965, "grad_norm": 1.7441268871327762, "learning_rate": 3.4791230618070823e-07, "loss": 0.2403, "step": 35118 }, { "epoch": 0.6104573345616994, "grad_norm": 1.1115172511006406, "learning_rate": 3.478854909118354e-07, "loss": 0.1727, "step": 35119 }, { "epoch": 0.6104747170992021, "grad_norm": 2.016289311434272, "learning_rate": 3.4785867612509576e-07, "loss": 0.2278, "step": 35120 }, { "epoch": 0.6104920996367049, "grad_norm": 1.7320731358159518, "learning_rate": 3.478318618205742e-07, "loss": 0.1905, "step": 35121 }, { "epoch": 0.6105094821742078, "grad_norm": 2.6343956418970342, "learning_rate": 3.4780504799835583e-07, "loss": 0.2233, "step": 35122 }, { "epoch": 0.6105268647117106, "grad_norm": 1.450486198866294, "learning_rate": 3.477782346585256e-07, "loss": 0.1227, "step": 35123 }, { "epoch": 0.6105442472492134, "grad_norm": 1.4838444266484905, "learning_rate": 3.4775142180116833e-07, "loss": 0.1744, "step": 35124 }, { "epoch": 0.6105616297867162, "grad_norm": 2.733837194032031, "learning_rate": 3.4772460942636905e-07, "loss": 0.1327, "step": 35125 }, { "epoch": 0.6105790123242191, "grad_norm": 2.694833910308695, "learning_rate": 3.476977975342129e-07, "loss": 0.2449, "step": 35126 }, { "epoch": 0.6105963948617219, "grad_norm": 0.7788348551594632, "learning_rate": 3.476709861247847e-07, "loss": 0.2393, "step": 35127 }, { "epoch": 0.6106137773992247, "grad_norm": 1.6678275066142432, "learning_rate": 3.4764417519816956e-07, "loss": 0.2027, "step": 35128 }, { "epoch": 0.6106311599367276, "grad_norm": 3.3735608687709897, "learning_rate": 3.476173647544525e-07, "loss": 0.2635, "step": 35129 }, { "epoch": 0.6106485424742304, "grad_norm": 1.4836535160145923, "learning_rate": 3.4759055479371826e-07, "loss": 0.1856, "step": 35130 }, { "epoch": 0.6106659250117332, "grad_norm": 1.4269391993735256, "learning_rate": 3.475637453160519e-07, "loss": 0.1708, "step": 35131 }, { "epoch": 0.610683307549236, "grad_norm": 2.0881417412205288, "learning_rate": 3.475369363215386e-07, "loss": 0.2634, "step": 35132 }, { "epoch": 0.6107006900867389, "grad_norm": 1.698318439140498, "learning_rate": 3.47510127810263e-07, "loss": 0.2224, "step": 35133 }, { "epoch": 0.6107180726242417, "grad_norm": 0.804998839042529, "learning_rate": 3.4748331978231037e-07, "loss": 0.2212, "step": 35134 }, { "epoch": 0.6107354551617445, "grad_norm": 1.3841327780458221, "learning_rate": 3.474565122377656e-07, "loss": 0.2362, "step": 35135 }, { "epoch": 0.6107528376992474, "grad_norm": 1.4314448574767102, "learning_rate": 3.4742970517671354e-07, "loss": 0.1739, "step": 35136 }, { "epoch": 0.6107702202367502, "grad_norm": 2.0839627945892003, "learning_rate": 3.474028985992392e-07, "loss": 0.2352, "step": 35137 }, { "epoch": 0.610787602774253, "grad_norm": 1.2938537240781287, "learning_rate": 3.4737609250542757e-07, "loss": 0.2626, "step": 35138 }, { "epoch": 0.6108049853117558, "grad_norm": 1.024300690277013, "learning_rate": 3.473492868953637e-07, "loss": 0.1946, "step": 35139 }, { "epoch": 0.6108223678492586, "grad_norm": 1.2244912540469055, "learning_rate": 3.4732248176913224e-07, "loss": 0.1149, "step": 35140 }, { "epoch": 0.6108397503867614, "grad_norm": 1.3915971816401973, "learning_rate": 3.472956771268186e-07, "loss": 0.1409, "step": 35141 }, { "epoch": 0.6108571329242642, "grad_norm": 2.169676171394498, "learning_rate": 3.472688729685076e-07, "loss": 0.2039, "step": 35142 }, { "epoch": 0.6108745154617671, "grad_norm": 1.2141917504397173, "learning_rate": 3.47242069294284e-07, "loss": 0.1333, "step": 35143 }, { "epoch": 0.6108918979992699, "grad_norm": 1.1774756706414518, "learning_rate": 3.472152661042328e-07, "loss": 0.2215, "step": 35144 }, { "epoch": 0.6109092805367727, "grad_norm": 1.4534958221365641, "learning_rate": 3.471884633984391e-07, "loss": 0.2675, "step": 35145 }, { "epoch": 0.6109266630742756, "grad_norm": 1.4044715750593715, "learning_rate": 3.471616611769877e-07, "loss": 0.1645, "step": 35146 }, { "epoch": 0.6109440456117784, "grad_norm": 2.732422876077128, "learning_rate": 3.471348594399637e-07, "loss": 0.1326, "step": 35147 }, { "epoch": 0.6109614281492812, "grad_norm": 2.457875212022449, "learning_rate": 3.471080581874521e-07, "loss": 0.2552, "step": 35148 }, { "epoch": 0.6109788106867841, "grad_norm": 0.9941820835102915, "learning_rate": 3.470812574195376e-07, "loss": 0.1801, "step": 35149 }, { "epoch": 0.6109961932242869, "grad_norm": 1.1834766182133105, "learning_rate": 3.4705445713630517e-07, "loss": 0.1592, "step": 35150 }, { "epoch": 0.6110135757617897, "grad_norm": 1.83843129618106, "learning_rate": 3.4702765733783997e-07, "loss": 0.1897, "step": 35151 }, { "epoch": 0.6110309582992925, "grad_norm": 1.459775851840423, "learning_rate": 3.470008580242267e-07, "loss": 0.165, "step": 35152 }, { "epoch": 0.6110483408367954, "grad_norm": 1.4191626158474941, "learning_rate": 3.469740591955506e-07, "loss": 0.1604, "step": 35153 }, { "epoch": 0.6110657233742982, "grad_norm": 1.5868151033822697, "learning_rate": 3.4694726085189643e-07, "loss": 0.2089, "step": 35154 }, { "epoch": 0.611083105911801, "grad_norm": 1.214414711260977, "learning_rate": 3.4692046299334896e-07, "loss": 0.2535, "step": 35155 }, { "epoch": 0.6111004884493039, "grad_norm": 1.7128705594192135, "learning_rate": 3.4689366561999345e-07, "loss": 0.183, "step": 35156 }, { "epoch": 0.6111178709868067, "grad_norm": 0.9870677636919202, "learning_rate": 3.468668687319146e-07, "loss": 0.1559, "step": 35157 }, { "epoch": 0.6111352535243095, "grad_norm": 2.5581078501905212, "learning_rate": 3.4684007232919745e-07, "loss": 0.1647, "step": 35158 }, { "epoch": 0.6111526360618122, "grad_norm": 1.1404210333698164, "learning_rate": 3.4681327641192694e-07, "loss": 0.2822, "step": 35159 }, { "epoch": 0.6111700185993151, "grad_norm": 1.405390805681277, "learning_rate": 3.4678648098018794e-07, "loss": 0.1861, "step": 35160 }, { "epoch": 0.6111874011368179, "grad_norm": 0.9205120468594455, "learning_rate": 3.4675968603406554e-07, "loss": 0.1219, "step": 35161 }, { "epoch": 0.6112047836743207, "grad_norm": 2.141602592466557, "learning_rate": 3.467328915736445e-07, "loss": 0.2413, "step": 35162 }, { "epoch": 0.6112221662118236, "grad_norm": 1.9389951493158992, "learning_rate": 3.467060975990096e-07, "loss": 0.1953, "step": 35163 }, { "epoch": 0.6112395487493264, "grad_norm": 1.7875410914889422, "learning_rate": 3.4667930411024615e-07, "loss": 0.1827, "step": 35164 }, { "epoch": 0.6112569312868292, "grad_norm": 1.308218443940197, "learning_rate": 3.466525111074389e-07, "loss": 0.228, "step": 35165 }, { "epoch": 0.6112743138243321, "grad_norm": 1.28145196125715, "learning_rate": 3.466257185906725e-07, "loss": 0.1729, "step": 35166 }, { "epoch": 0.6112916963618349, "grad_norm": 1.8470381830807654, "learning_rate": 3.465989265600324e-07, "loss": 0.2898, "step": 35167 }, { "epoch": 0.6113090788993377, "grad_norm": 1.8181969885338631, "learning_rate": 3.465721350156032e-07, "loss": 0.1939, "step": 35168 }, { "epoch": 0.6113264614368406, "grad_norm": 1.8376384670700094, "learning_rate": 3.465453439574697e-07, "loss": 0.1621, "step": 35169 }, { "epoch": 0.6113438439743434, "grad_norm": 3.9059821063848115, "learning_rate": 3.4651855338571705e-07, "loss": 0.2363, "step": 35170 }, { "epoch": 0.6113612265118462, "grad_norm": 1.609039167594007, "learning_rate": 3.4649176330042995e-07, "loss": 0.201, "step": 35171 }, { "epoch": 0.611378609049349, "grad_norm": 1.6733070262641396, "learning_rate": 3.464649737016936e-07, "loss": 0.1429, "step": 35172 }, { "epoch": 0.6113959915868519, "grad_norm": 1.1560617396870079, "learning_rate": 3.464381845895929e-07, "loss": 0.1846, "step": 35173 }, { "epoch": 0.6114133741243547, "grad_norm": 1.3076557906879638, "learning_rate": 3.464113959642124e-07, "loss": 0.2154, "step": 35174 }, { "epoch": 0.6114307566618575, "grad_norm": 1.3913040980837228, "learning_rate": 3.463846078256372e-07, "loss": 0.2128, "step": 35175 }, { "epoch": 0.6114481391993604, "grad_norm": 2.0826075609712245, "learning_rate": 3.463578201739523e-07, "loss": 0.2226, "step": 35176 }, { "epoch": 0.6114655217368632, "grad_norm": 1.736515309865058, "learning_rate": 3.463310330092425e-07, "loss": 0.2924, "step": 35177 }, { "epoch": 0.611482904274366, "grad_norm": 1.2232555022402618, "learning_rate": 3.4630424633159277e-07, "loss": 0.1216, "step": 35178 }, { "epoch": 0.6115002868118687, "grad_norm": 2.815397600497866, "learning_rate": 3.4627746014108805e-07, "loss": 0.1695, "step": 35179 }, { "epoch": 0.6115176693493716, "grad_norm": 1.0002321032323924, "learning_rate": 3.4625067443781315e-07, "loss": 0.2564, "step": 35180 }, { "epoch": 0.6115350518868744, "grad_norm": 1.4420044352141161, "learning_rate": 3.4622388922185287e-07, "loss": 0.1809, "step": 35181 }, { "epoch": 0.6115524344243772, "grad_norm": 2.983205730753018, "learning_rate": 3.4619710449329224e-07, "loss": 0.1984, "step": 35182 }, { "epoch": 0.6115698169618801, "grad_norm": 1.8395098018055185, "learning_rate": 3.461703202522161e-07, "loss": 0.2263, "step": 35183 }, { "epoch": 0.6115871994993829, "grad_norm": 1.6172388011283356, "learning_rate": 3.4614353649870946e-07, "loss": 0.3251, "step": 35184 }, { "epoch": 0.6116045820368857, "grad_norm": 1.6862633086821799, "learning_rate": 3.4611675323285704e-07, "loss": 0.2167, "step": 35185 }, { "epoch": 0.6116219645743886, "grad_norm": 1.6741336867076686, "learning_rate": 3.4608997045474403e-07, "loss": 0.1682, "step": 35186 }, { "epoch": 0.6116393471118914, "grad_norm": 2.187464345134201, "learning_rate": 3.4606318816445503e-07, "loss": 0.2497, "step": 35187 }, { "epoch": 0.6116567296493942, "grad_norm": 1.309826008297768, "learning_rate": 3.4603640636207485e-07, "loss": 0.212, "step": 35188 }, { "epoch": 0.611674112186897, "grad_norm": 1.4520701460118186, "learning_rate": 3.460096250476887e-07, "loss": 0.1698, "step": 35189 }, { "epoch": 0.6116914947243999, "grad_norm": 1.614782758849281, "learning_rate": 3.459828442213813e-07, "loss": 0.1529, "step": 35190 }, { "epoch": 0.6117088772619027, "grad_norm": 1.2445434862690128, "learning_rate": 3.459560638832374e-07, "loss": 0.1633, "step": 35191 }, { "epoch": 0.6117262597994055, "grad_norm": 1.603158988868151, "learning_rate": 3.459292840333422e-07, "loss": 0.1273, "step": 35192 }, { "epoch": 0.6117436423369084, "grad_norm": 1.7854802793756388, "learning_rate": 3.4590250467178037e-07, "loss": 0.2496, "step": 35193 }, { "epoch": 0.6117610248744112, "grad_norm": 2.3907604041281756, "learning_rate": 3.458757257986367e-07, "loss": 0.2583, "step": 35194 }, { "epoch": 0.611778407411914, "grad_norm": 1.6088590190927032, "learning_rate": 3.458489474139962e-07, "loss": 0.4167, "step": 35195 }, { "epoch": 0.6117957899494169, "grad_norm": 1.508437340220316, "learning_rate": 3.458221695179437e-07, "loss": 0.1816, "step": 35196 }, { "epoch": 0.6118131724869197, "grad_norm": 0.8600107599237734, "learning_rate": 3.4579539211056416e-07, "loss": 0.1024, "step": 35197 }, { "epoch": 0.6118305550244225, "grad_norm": 1.5202839375150348, "learning_rate": 3.457686151919425e-07, "loss": 0.1542, "step": 35198 }, { "epoch": 0.6118479375619252, "grad_norm": 1.2080630478604257, "learning_rate": 3.457418387621634e-07, "loss": 0.2204, "step": 35199 }, { "epoch": 0.6118653200994281, "grad_norm": 2.1544892750531366, "learning_rate": 3.4571506282131166e-07, "loss": 0.2415, "step": 35200 }, { "epoch": 0.6118827026369309, "grad_norm": 1.2799608846380657, "learning_rate": 3.4568828736947244e-07, "loss": 0.1843, "step": 35201 }, { "epoch": 0.6119000851744337, "grad_norm": 0.9351910046142905, "learning_rate": 3.4566151240673036e-07, "loss": 0.1505, "step": 35202 }, { "epoch": 0.6119174677119366, "grad_norm": 2.0262783308374757, "learning_rate": 3.456347379331705e-07, "loss": 0.1916, "step": 35203 }, { "epoch": 0.6119348502494394, "grad_norm": 1.2046671465135315, "learning_rate": 3.456079639488775e-07, "loss": 0.2441, "step": 35204 }, { "epoch": 0.6119522327869422, "grad_norm": 1.1615599422893461, "learning_rate": 3.4558119045393663e-07, "loss": 0.1131, "step": 35205 }, { "epoch": 0.611969615324445, "grad_norm": 2.0384457795868753, "learning_rate": 3.4555441744843216e-07, "loss": 0.1622, "step": 35206 }, { "epoch": 0.6119869978619479, "grad_norm": 1.0103686315861429, "learning_rate": 3.4552764493244937e-07, "loss": 0.1706, "step": 35207 }, { "epoch": 0.6120043803994507, "grad_norm": 1.8708285931632653, "learning_rate": 3.455008729060728e-07, "loss": 0.1743, "step": 35208 }, { "epoch": 0.6120217629369535, "grad_norm": 0.7584646407774419, "learning_rate": 3.4547410136938766e-07, "loss": 0.1408, "step": 35209 }, { "epoch": 0.6120391454744564, "grad_norm": 1.8406153654148345, "learning_rate": 3.4544733032247853e-07, "loss": 0.1989, "step": 35210 }, { "epoch": 0.6120565280119592, "grad_norm": 0.8961536212473293, "learning_rate": 3.454205597654305e-07, "loss": 0.1354, "step": 35211 }, { "epoch": 0.612073910549462, "grad_norm": 1.1639484422341686, "learning_rate": 3.453937896983283e-07, "loss": 0.1956, "step": 35212 }, { "epoch": 0.6120912930869649, "grad_norm": 1.0808961225251281, "learning_rate": 3.453670201212567e-07, "loss": 0.1612, "step": 35213 }, { "epoch": 0.6121086756244677, "grad_norm": 1.3544882323343526, "learning_rate": 3.4534025103430055e-07, "loss": 0.2305, "step": 35214 }, { "epoch": 0.6121260581619705, "grad_norm": 1.6700805217698438, "learning_rate": 3.4531348243754476e-07, "loss": 0.2981, "step": 35215 }, { "epoch": 0.6121434406994734, "grad_norm": 2.2152123437667024, "learning_rate": 3.4528671433107414e-07, "loss": 0.2354, "step": 35216 }, { "epoch": 0.6121608232369762, "grad_norm": 1.9695011875189996, "learning_rate": 3.452599467149738e-07, "loss": 0.2037, "step": 35217 }, { "epoch": 0.612178205774479, "grad_norm": 1.8789629364911757, "learning_rate": 3.4523317958932817e-07, "loss": 0.229, "step": 35218 }, { "epoch": 0.6121955883119817, "grad_norm": 1.4051068638794102, "learning_rate": 3.452064129542223e-07, "loss": 0.1864, "step": 35219 }, { "epoch": 0.6122129708494846, "grad_norm": 1.7195098600447847, "learning_rate": 3.45179646809741e-07, "loss": 0.2714, "step": 35220 }, { "epoch": 0.6122303533869874, "grad_norm": 1.3879174320452818, "learning_rate": 3.4515288115596906e-07, "loss": 0.27, "step": 35221 }, { "epoch": 0.6122477359244902, "grad_norm": 1.5723091432028016, "learning_rate": 3.4512611599299135e-07, "loss": 0.2168, "step": 35222 }, { "epoch": 0.612265118461993, "grad_norm": 1.8910944174002977, "learning_rate": 3.4509935132089273e-07, "loss": 0.2026, "step": 35223 }, { "epoch": 0.6122825009994959, "grad_norm": 2.5119099967775327, "learning_rate": 3.450725871397582e-07, "loss": 0.2526, "step": 35224 }, { "epoch": 0.6122998835369987, "grad_norm": 1.4408115304683664, "learning_rate": 3.450458234496721e-07, "loss": 0.1137, "step": 35225 }, { "epoch": 0.6123172660745015, "grad_norm": 2.333489170460963, "learning_rate": 3.450190602507197e-07, "loss": 0.2856, "step": 35226 }, { "epoch": 0.6123346486120044, "grad_norm": 1.0486823905085512, "learning_rate": 3.449922975429856e-07, "loss": 0.2082, "step": 35227 }, { "epoch": 0.6123520311495072, "grad_norm": 1.5355910984726528, "learning_rate": 3.449655353265548e-07, "loss": 0.1767, "step": 35228 }, { "epoch": 0.61236941368701, "grad_norm": 0.8821839672905223, "learning_rate": 3.4493877360151204e-07, "loss": 0.1606, "step": 35229 }, { "epoch": 0.6123867962245129, "grad_norm": 1.4023694256624137, "learning_rate": 3.4491201236794225e-07, "loss": 0.1474, "step": 35230 }, { "epoch": 0.6124041787620157, "grad_norm": 1.6336264126356885, "learning_rate": 3.4488525162592997e-07, "loss": 0.2041, "step": 35231 }, { "epoch": 0.6124215612995185, "grad_norm": 0.6278163714848688, "learning_rate": 3.4485849137556023e-07, "loss": 0.2048, "step": 35232 }, { "epoch": 0.6124389438370214, "grad_norm": 0.9359744434801174, "learning_rate": 3.448317316169177e-07, "loss": 0.3096, "step": 35233 }, { "epoch": 0.6124563263745242, "grad_norm": 1.7996516685950332, "learning_rate": 3.4480497235008745e-07, "loss": 0.1993, "step": 35234 }, { "epoch": 0.612473708912027, "grad_norm": 1.129906596491291, "learning_rate": 3.4477821357515403e-07, "loss": 0.1241, "step": 35235 }, { "epoch": 0.6124910914495298, "grad_norm": 2.666952486971069, "learning_rate": 3.447514552922025e-07, "loss": 0.1956, "step": 35236 }, { "epoch": 0.6125084739870327, "grad_norm": 1.6494554006378863, "learning_rate": 3.447246975013175e-07, "loss": 0.1593, "step": 35237 }, { "epoch": 0.6125258565245355, "grad_norm": 1.5256270859450085, "learning_rate": 3.446979402025839e-07, "loss": 0.1608, "step": 35238 }, { "epoch": 0.6125432390620382, "grad_norm": 1.376579668646877, "learning_rate": 3.4467118339608635e-07, "loss": 0.2566, "step": 35239 }, { "epoch": 0.612560621599541, "grad_norm": 1.270935721114967, "learning_rate": 3.4464442708191e-07, "loss": 0.1565, "step": 35240 }, { "epoch": 0.6125780041370439, "grad_norm": 0.8612672695614614, "learning_rate": 3.4461767126013916e-07, "loss": 0.117, "step": 35241 }, { "epoch": 0.6125953866745467, "grad_norm": 1.3771089111101573, "learning_rate": 3.445909159308592e-07, "loss": 0.1612, "step": 35242 }, { "epoch": 0.6126127692120495, "grad_norm": 1.1753524316805275, "learning_rate": 3.445641610941547e-07, "loss": 0.131, "step": 35243 }, { "epoch": 0.6126301517495524, "grad_norm": 2.698503083475673, "learning_rate": 3.445374067501102e-07, "loss": 0.2484, "step": 35244 }, { "epoch": 0.6126475342870552, "grad_norm": 2.054574822897903, "learning_rate": 3.4451065289881074e-07, "loss": 0.1438, "step": 35245 }, { "epoch": 0.612664916824558, "grad_norm": 1.16496270444724, "learning_rate": 3.444838995403412e-07, "loss": 0.2054, "step": 35246 }, { "epoch": 0.6126822993620609, "grad_norm": 1.2922395912069822, "learning_rate": 3.4445714667478607e-07, "loss": 0.1856, "step": 35247 }, { "epoch": 0.6126996818995637, "grad_norm": 1.106232477421739, "learning_rate": 3.4443039430223053e-07, "loss": 0.1505, "step": 35248 }, { "epoch": 0.6127170644370665, "grad_norm": 1.1696082871339015, "learning_rate": 3.444036424227592e-07, "loss": 0.1956, "step": 35249 }, { "epoch": 0.6127344469745694, "grad_norm": 1.6590391296616644, "learning_rate": 3.4437689103645663e-07, "loss": 0.2109, "step": 35250 }, { "epoch": 0.6127518295120722, "grad_norm": 1.4653088505106462, "learning_rate": 3.44350140143408e-07, "loss": 0.1572, "step": 35251 }, { "epoch": 0.612769212049575, "grad_norm": 1.3898575070330248, "learning_rate": 3.443233897436978e-07, "loss": 0.168, "step": 35252 }, { "epoch": 0.6127865945870778, "grad_norm": 2.161906939970085, "learning_rate": 3.44296639837411e-07, "loss": 0.2275, "step": 35253 }, { "epoch": 0.6128039771245807, "grad_norm": 1.2248117843055444, "learning_rate": 3.442698904246324e-07, "loss": 0.155, "step": 35254 }, { "epoch": 0.6128213596620835, "grad_norm": 1.3405976956174488, "learning_rate": 3.4424314150544677e-07, "loss": 0.2199, "step": 35255 }, { "epoch": 0.6128387421995863, "grad_norm": 1.670756218177812, "learning_rate": 3.4421639307993856e-07, "loss": 0.212, "step": 35256 }, { "epoch": 0.6128561247370892, "grad_norm": 1.201355634126712, "learning_rate": 3.44189645148193e-07, "loss": 0.1218, "step": 35257 }, { "epoch": 0.612873507274592, "grad_norm": 1.6008317121219662, "learning_rate": 3.441628977102946e-07, "loss": 0.1946, "step": 35258 }, { "epoch": 0.6128908898120947, "grad_norm": 1.9697516260705066, "learning_rate": 3.441361507663283e-07, "loss": 0.1824, "step": 35259 }, { "epoch": 0.6129082723495975, "grad_norm": 1.525737462068318, "learning_rate": 3.4410940431637875e-07, "loss": 0.0829, "step": 35260 }, { "epoch": 0.6129256548871004, "grad_norm": 3.713658308724522, "learning_rate": 3.4408265836053084e-07, "loss": 0.3599, "step": 35261 }, { "epoch": 0.6129430374246032, "grad_norm": 1.2269269053758107, "learning_rate": 3.4405591289886937e-07, "loss": 0.1544, "step": 35262 }, { "epoch": 0.612960419962106, "grad_norm": 4.256924804253247, "learning_rate": 3.4402916793147894e-07, "loss": 0.2378, "step": 35263 }, { "epoch": 0.6129778024996089, "grad_norm": 1.3892354817939014, "learning_rate": 3.440024234584443e-07, "loss": 0.1863, "step": 35264 }, { "epoch": 0.6129951850371117, "grad_norm": 2.010990756809649, "learning_rate": 3.439756794798504e-07, "loss": 0.3342, "step": 35265 }, { "epoch": 0.6130125675746145, "grad_norm": 1.2239857296613306, "learning_rate": 3.439489359957819e-07, "loss": 0.1354, "step": 35266 }, { "epoch": 0.6130299501121174, "grad_norm": 1.5122967200013155, "learning_rate": 3.439221930063236e-07, "loss": 0.2892, "step": 35267 }, { "epoch": 0.6130473326496202, "grad_norm": 1.0588261574785776, "learning_rate": 3.438954505115604e-07, "loss": 0.181, "step": 35268 }, { "epoch": 0.613064715187123, "grad_norm": 1.3780072932409584, "learning_rate": 3.4386870851157676e-07, "loss": 0.1927, "step": 35269 }, { "epoch": 0.6130820977246259, "grad_norm": 2.1835098363810226, "learning_rate": 3.438419670064575e-07, "loss": 0.2319, "step": 35270 }, { "epoch": 0.6130994802621287, "grad_norm": 1.6473352602851796, "learning_rate": 3.438152259962876e-07, "loss": 0.2203, "step": 35271 }, { "epoch": 0.6131168627996315, "grad_norm": 2.005752974051777, "learning_rate": 3.4378848548115166e-07, "loss": 0.3202, "step": 35272 }, { "epoch": 0.6131342453371343, "grad_norm": 1.514916375914303, "learning_rate": 3.4376174546113446e-07, "loss": 0.153, "step": 35273 }, { "epoch": 0.6131516278746372, "grad_norm": 1.1609689649911787, "learning_rate": 3.437350059363209e-07, "loss": 0.1566, "step": 35274 }, { "epoch": 0.61316901041214, "grad_norm": 1.2941427884306032, "learning_rate": 3.4370826690679537e-07, "loss": 0.1765, "step": 35275 }, { "epoch": 0.6131863929496428, "grad_norm": 2.0039982022966907, "learning_rate": 3.43681528372643e-07, "loss": 0.2127, "step": 35276 }, { "epoch": 0.6132037754871457, "grad_norm": 1.604712887630228, "learning_rate": 3.436547903339483e-07, "loss": 0.1995, "step": 35277 }, { "epoch": 0.6132211580246484, "grad_norm": 2.016135854670049, "learning_rate": 3.43628052790796e-07, "loss": 0.3281, "step": 35278 }, { "epoch": 0.6132385405621512, "grad_norm": 3.708159027802823, "learning_rate": 3.436013157432711e-07, "loss": 0.2919, "step": 35279 }, { "epoch": 0.613255923099654, "grad_norm": 1.7450605266428767, "learning_rate": 3.4357457919145807e-07, "loss": 0.2254, "step": 35280 }, { "epoch": 0.6132733056371569, "grad_norm": 1.6633258061702956, "learning_rate": 3.435478431354419e-07, "loss": 0.2132, "step": 35281 }, { "epoch": 0.6132906881746597, "grad_norm": 1.74491057959517, "learning_rate": 3.4352110757530715e-07, "loss": 0.2352, "step": 35282 }, { "epoch": 0.6133080707121625, "grad_norm": 1.1022896444641328, "learning_rate": 3.434943725111384e-07, "loss": 0.2233, "step": 35283 }, { "epoch": 0.6133254532496654, "grad_norm": 1.268999930193389, "learning_rate": 3.4346763794302094e-07, "loss": 0.2257, "step": 35284 }, { "epoch": 0.6133428357871682, "grad_norm": 1.2463912784491353, "learning_rate": 3.4344090387103887e-07, "loss": 0.1599, "step": 35285 }, { "epoch": 0.613360218324671, "grad_norm": 1.4412578827860651, "learning_rate": 3.4341417029527734e-07, "loss": 0.2029, "step": 35286 }, { "epoch": 0.6133776008621739, "grad_norm": 2.823954885387151, "learning_rate": 3.433874372158211e-07, "loss": 0.2156, "step": 35287 }, { "epoch": 0.6133949833996767, "grad_norm": 2.466488496922119, "learning_rate": 3.4336070463275467e-07, "loss": 0.2631, "step": 35288 }, { "epoch": 0.6134123659371795, "grad_norm": 1.6951962372908453, "learning_rate": 3.433339725461627e-07, "loss": 0.1877, "step": 35289 }, { "epoch": 0.6134297484746823, "grad_norm": 1.7886238613638552, "learning_rate": 3.4330724095613017e-07, "loss": 0.2836, "step": 35290 }, { "epoch": 0.6134471310121852, "grad_norm": 1.3721970804572652, "learning_rate": 3.432805098627416e-07, "loss": 0.1928, "step": 35291 }, { "epoch": 0.613464513549688, "grad_norm": 2.068138634098961, "learning_rate": 3.43253779266082e-07, "loss": 0.1933, "step": 35292 }, { "epoch": 0.6134818960871908, "grad_norm": 2.0928227365579426, "learning_rate": 3.432270491662359e-07, "loss": 0.1985, "step": 35293 }, { "epoch": 0.6134992786246937, "grad_norm": 1.660437545133796, "learning_rate": 3.43200319563288e-07, "loss": 0.1233, "step": 35294 }, { "epoch": 0.6135166611621965, "grad_norm": 1.53910788295488, "learning_rate": 3.4317359045732296e-07, "loss": 0.1801, "step": 35295 }, { "epoch": 0.6135340436996993, "grad_norm": 1.4659265014508582, "learning_rate": 3.4314686184842573e-07, "loss": 0.25, "step": 35296 }, { "epoch": 0.6135514262372022, "grad_norm": 1.7309856285774803, "learning_rate": 3.431201337366807e-07, "loss": 0.2039, "step": 35297 }, { "epoch": 0.6135688087747049, "grad_norm": 2.2036274441412638, "learning_rate": 3.4309340612217297e-07, "loss": 0.2486, "step": 35298 }, { "epoch": 0.6135861913122077, "grad_norm": 2.23645091013624, "learning_rate": 3.430666790049872e-07, "loss": 0.2675, "step": 35299 }, { "epoch": 0.6136035738497105, "grad_norm": 1.9047990568090614, "learning_rate": 3.430399523852077e-07, "loss": 0.1922, "step": 35300 }, { "epoch": 0.6136209563872134, "grad_norm": 2.226895772551494, "learning_rate": 3.430132262629195e-07, "loss": 0.2127, "step": 35301 }, { "epoch": 0.6136383389247162, "grad_norm": 0.8163993683391946, "learning_rate": 3.4298650063820734e-07, "loss": 0.1397, "step": 35302 }, { "epoch": 0.613655721462219, "grad_norm": 2.0976909129255747, "learning_rate": 3.429597755111557e-07, "loss": 0.2331, "step": 35303 }, { "epoch": 0.6136731039997219, "grad_norm": 1.4582941801036622, "learning_rate": 3.429330508818496e-07, "loss": 0.155, "step": 35304 }, { "epoch": 0.6136904865372247, "grad_norm": 2.175849607446619, "learning_rate": 3.4290632675037347e-07, "loss": 0.3065, "step": 35305 }, { "epoch": 0.6137078690747275, "grad_norm": 1.9584817964827574, "learning_rate": 3.4287960311681225e-07, "loss": 0.1762, "step": 35306 }, { "epoch": 0.6137252516122303, "grad_norm": 1.0560820271388123, "learning_rate": 3.4285287998125044e-07, "loss": 0.1648, "step": 35307 }, { "epoch": 0.6137426341497332, "grad_norm": 4.7514861756847715, "learning_rate": 3.4282615734377276e-07, "loss": 0.269, "step": 35308 }, { "epoch": 0.613760016687236, "grad_norm": 2.1926977790946287, "learning_rate": 3.4279943520446395e-07, "loss": 0.1824, "step": 35309 }, { "epoch": 0.6137773992247388, "grad_norm": 2.8115407046445546, "learning_rate": 3.427727135634089e-07, "loss": 0.2485, "step": 35310 }, { "epoch": 0.6137947817622417, "grad_norm": 1.8063774000168118, "learning_rate": 3.427459924206918e-07, "loss": 0.2172, "step": 35311 }, { "epoch": 0.6138121642997445, "grad_norm": 1.8136651884956865, "learning_rate": 3.42719271776398e-07, "loss": 0.3718, "step": 35312 }, { "epoch": 0.6138295468372473, "grad_norm": 1.4423015764734368, "learning_rate": 3.426925516306118e-07, "loss": 0.1614, "step": 35313 }, { "epoch": 0.6138469293747502, "grad_norm": 0.9839490242706634, "learning_rate": 3.4266583198341775e-07, "loss": 0.1916, "step": 35314 }, { "epoch": 0.613864311912253, "grad_norm": 1.0354790348741243, "learning_rate": 3.4263911283490085e-07, "loss": 0.155, "step": 35315 }, { "epoch": 0.6138816944497558, "grad_norm": 1.7486634300897244, "learning_rate": 3.4261239418514565e-07, "loss": 0.1926, "step": 35316 }, { "epoch": 0.6138990769872587, "grad_norm": 1.3526333592203066, "learning_rate": 3.4258567603423695e-07, "loss": 0.2259, "step": 35317 }, { "epoch": 0.6139164595247614, "grad_norm": 1.8118867447940517, "learning_rate": 3.425589583822594e-07, "loss": 0.2356, "step": 35318 }, { "epoch": 0.6139338420622642, "grad_norm": 1.790084624934301, "learning_rate": 3.4253224122929755e-07, "loss": 0.1508, "step": 35319 }, { "epoch": 0.613951224599767, "grad_norm": 1.3344718317989877, "learning_rate": 3.4250552457543605e-07, "loss": 0.1739, "step": 35320 }, { "epoch": 0.6139686071372699, "grad_norm": 1.2848504657491135, "learning_rate": 3.424788084207598e-07, "loss": 0.1891, "step": 35321 }, { "epoch": 0.6139859896747727, "grad_norm": 1.6783393490660392, "learning_rate": 3.4245209276535326e-07, "loss": 0.1833, "step": 35322 }, { "epoch": 0.6140033722122755, "grad_norm": 3.102136717953793, "learning_rate": 3.4242537760930133e-07, "loss": 0.2604, "step": 35323 }, { "epoch": 0.6140207547497784, "grad_norm": 1.2769432521694626, "learning_rate": 3.423986629526884e-07, "loss": 0.2027, "step": 35324 }, { "epoch": 0.6140381372872812, "grad_norm": 1.1298547190388293, "learning_rate": 3.423719487955997e-07, "loss": 0.1536, "step": 35325 }, { "epoch": 0.614055519824784, "grad_norm": 1.983650673033445, "learning_rate": 3.4234523513811917e-07, "loss": 0.1609, "step": 35326 }, { "epoch": 0.6140729023622868, "grad_norm": 1.4255660862194888, "learning_rate": 3.4231852198033194e-07, "loss": 0.2156, "step": 35327 }, { "epoch": 0.6140902848997897, "grad_norm": 1.026998064377371, "learning_rate": 3.422918093223225e-07, "loss": 0.1945, "step": 35328 }, { "epoch": 0.6141076674372925, "grad_norm": 1.3580447475746136, "learning_rate": 3.4226509716417564e-07, "loss": 0.1922, "step": 35329 }, { "epoch": 0.6141250499747953, "grad_norm": 2.1868887051145243, "learning_rate": 3.422383855059758e-07, "loss": 0.1777, "step": 35330 }, { "epoch": 0.6141424325122982, "grad_norm": 1.8791729003846294, "learning_rate": 3.422116743478081e-07, "loss": 0.2141, "step": 35331 }, { "epoch": 0.614159815049801, "grad_norm": 1.176033102367317, "learning_rate": 3.421849636897568e-07, "loss": 0.2536, "step": 35332 }, { "epoch": 0.6141771975873038, "grad_norm": 3.019169394697006, "learning_rate": 3.421582535319065e-07, "loss": 0.2179, "step": 35333 }, { "epoch": 0.6141945801248067, "grad_norm": 0.9977357936774723, "learning_rate": 3.4213154387434224e-07, "loss": 0.1552, "step": 35334 }, { "epoch": 0.6142119626623095, "grad_norm": 1.4000103687066294, "learning_rate": 3.421048347171484e-07, "loss": 0.2881, "step": 35335 }, { "epoch": 0.6142293451998123, "grad_norm": 1.7981425559212019, "learning_rate": 3.4207812606040963e-07, "loss": 0.1959, "step": 35336 }, { "epoch": 0.6142467277373151, "grad_norm": 3.1037849992153945, "learning_rate": 3.420514179042109e-07, "loss": 0.3038, "step": 35337 }, { "epoch": 0.6142641102748179, "grad_norm": 1.4602597916981728, "learning_rate": 3.4202471024863644e-07, "loss": 0.2482, "step": 35338 }, { "epoch": 0.6142814928123207, "grad_norm": 1.5292022300711736, "learning_rate": 3.4199800309377097e-07, "loss": 0.1787, "step": 35339 }, { "epoch": 0.6142988753498235, "grad_norm": 1.3688704328980752, "learning_rate": 3.4197129643969933e-07, "loss": 0.1922, "step": 35340 }, { "epoch": 0.6143162578873264, "grad_norm": 0.9602850963488477, "learning_rate": 3.4194459028650607e-07, "loss": 0.1411, "step": 35341 }, { "epoch": 0.6143336404248292, "grad_norm": 1.5695081940106421, "learning_rate": 3.4191788463427593e-07, "loss": 0.1178, "step": 35342 }, { "epoch": 0.614351022962332, "grad_norm": 1.4712792357426685, "learning_rate": 3.418911794830935e-07, "loss": 0.1996, "step": 35343 }, { "epoch": 0.6143684054998348, "grad_norm": 1.6172493549757587, "learning_rate": 3.418644748330435e-07, "loss": 0.2146, "step": 35344 }, { "epoch": 0.6143857880373377, "grad_norm": 2.15540068687313, "learning_rate": 3.4183777068421014e-07, "loss": 0.2548, "step": 35345 }, { "epoch": 0.6144031705748405, "grad_norm": 1.5808196144720486, "learning_rate": 3.4181106703667866e-07, "loss": 0.1546, "step": 35346 }, { "epoch": 0.6144205531123433, "grad_norm": 2.0482894602830117, "learning_rate": 3.417843638905332e-07, "loss": 0.1614, "step": 35347 }, { "epoch": 0.6144379356498462, "grad_norm": 1.5373485846480954, "learning_rate": 3.417576612458589e-07, "loss": 0.239, "step": 35348 }, { "epoch": 0.614455318187349, "grad_norm": 1.8706590575297442, "learning_rate": 3.4173095910273987e-07, "loss": 0.1887, "step": 35349 }, { "epoch": 0.6144727007248518, "grad_norm": 1.1365978340285137, "learning_rate": 3.417042574612614e-07, "loss": 0.1566, "step": 35350 }, { "epoch": 0.6144900832623547, "grad_norm": 1.535696746536877, "learning_rate": 3.416775563215073e-07, "loss": 0.1435, "step": 35351 }, { "epoch": 0.6145074657998575, "grad_norm": 1.0386022113896853, "learning_rate": 3.416508556835628e-07, "loss": 0.0761, "step": 35352 }, { "epoch": 0.6145248483373603, "grad_norm": 0.7243266058272854, "learning_rate": 3.4162415554751225e-07, "loss": 0.1553, "step": 35353 }, { "epoch": 0.6145422308748631, "grad_norm": 1.5586962985681736, "learning_rate": 3.4159745591344046e-07, "loss": 0.2094, "step": 35354 }, { "epoch": 0.614559613412366, "grad_norm": 1.4016736817168158, "learning_rate": 3.415707567814319e-07, "loss": 0.1513, "step": 35355 }, { "epoch": 0.6145769959498688, "grad_norm": 1.7591053644468988, "learning_rate": 3.415440581515714e-07, "loss": 0.1905, "step": 35356 }, { "epoch": 0.6145943784873716, "grad_norm": 1.700782442555137, "learning_rate": 3.4151736002394335e-07, "loss": 0.235, "step": 35357 }, { "epoch": 0.6146117610248744, "grad_norm": 1.0471998542686054, "learning_rate": 3.4149066239863254e-07, "loss": 0.122, "step": 35358 }, { "epoch": 0.6146291435623772, "grad_norm": 1.1373322537038342, "learning_rate": 3.414639652757234e-07, "loss": 0.1191, "step": 35359 }, { "epoch": 0.61464652609988, "grad_norm": 1.631814571046048, "learning_rate": 3.4143726865530075e-07, "loss": 0.2005, "step": 35360 }, { "epoch": 0.6146639086373828, "grad_norm": 1.436790584191282, "learning_rate": 3.4141057253744896e-07, "loss": 0.2497, "step": 35361 }, { "epoch": 0.6146812911748857, "grad_norm": 3.0782655617179633, "learning_rate": 3.41383876922253e-07, "loss": 0.2072, "step": 35362 }, { "epoch": 0.6146986737123885, "grad_norm": 1.4567639318868935, "learning_rate": 3.4135718180979735e-07, "loss": 0.1127, "step": 35363 }, { "epoch": 0.6147160562498913, "grad_norm": 1.5872439391181346, "learning_rate": 3.4133048720016635e-07, "loss": 0.2032, "step": 35364 }, { "epoch": 0.6147334387873942, "grad_norm": 1.557658825804409, "learning_rate": 3.413037930934449e-07, "loss": 0.1788, "step": 35365 }, { "epoch": 0.614750821324897, "grad_norm": 1.4316332609243012, "learning_rate": 3.4127709948971756e-07, "loss": 0.1415, "step": 35366 }, { "epoch": 0.6147682038623998, "grad_norm": 2.5608523376337278, "learning_rate": 3.4125040638906876e-07, "loss": 0.1803, "step": 35367 }, { "epoch": 0.6147855863999027, "grad_norm": 1.6904481395515403, "learning_rate": 3.412237137915834e-07, "loss": 0.263, "step": 35368 }, { "epoch": 0.6148029689374055, "grad_norm": 5.794788682877148, "learning_rate": 3.41197021697346e-07, "loss": 0.3577, "step": 35369 }, { "epoch": 0.6148203514749083, "grad_norm": 1.8673923360447005, "learning_rate": 3.411703301064409e-07, "loss": 0.2843, "step": 35370 }, { "epoch": 0.6148377340124112, "grad_norm": 1.5544391218059963, "learning_rate": 3.4114363901895307e-07, "loss": 0.1934, "step": 35371 }, { "epoch": 0.614855116549914, "grad_norm": 1.6455419483657263, "learning_rate": 3.4111694843496674e-07, "loss": 0.173, "step": 35372 }, { "epoch": 0.6148724990874168, "grad_norm": 1.3777017017202473, "learning_rate": 3.410902583545668e-07, "loss": 0.1551, "step": 35373 }, { "epoch": 0.6148898816249196, "grad_norm": 1.8893806384038991, "learning_rate": 3.4106356877783765e-07, "loss": 0.1969, "step": 35374 }, { "epoch": 0.6149072641624225, "grad_norm": 1.1808891084813677, "learning_rate": 3.410368797048643e-07, "loss": 0.1755, "step": 35375 }, { "epoch": 0.6149246466999253, "grad_norm": 1.7273743906654124, "learning_rate": 3.410101911357307e-07, "loss": 0.1673, "step": 35376 }, { "epoch": 0.6149420292374281, "grad_norm": 1.4792107641101002, "learning_rate": 3.409835030705219e-07, "loss": 0.1293, "step": 35377 }, { "epoch": 0.6149594117749309, "grad_norm": 1.9339300811271498, "learning_rate": 3.409568155093222e-07, "loss": 0.1696, "step": 35378 }, { "epoch": 0.6149767943124337, "grad_norm": 1.6891040271229174, "learning_rate": 3.4093012845221645e-07, "loss": 0.1468, "step": 35379 }, { "epoch": 0.6149941768499365, "grad_norm": 1.74026807974394, "learning_rate": 3.40903441899289e-07, "loss": 0.1791, "step": 35380 }, { "epoch": 0.6150115593874393, "grad_norm": 2.934065264507081, "learning_rate": 3.408767558506247e-07, "loss": 0.2371, "step": 35381 }, { "epoch": 0.6150289419249422, "grad_norm": 1.9416980194594562, "learning_rate": 3.408500703063081e-07, "loss": 0.1657, "step": 35382 }, { "epoch": 0.615046324462445, "grad_norm": 1.9872706142506587, "learning_rate": 3.408233852664235e-07, "loss": 0.1988, "step": 35383 }, { "epoch": 0.6150637069999478, "grad_norm": 1.3800020584246528, "learning_rate": 3.407967007310556e-07, "loss": 0.1934, "step": 35384 }, { "epoch": 0.6150810895374507, "grad_norm": 1.054092191627076, "learning_rate": 3.4077001670028915e-07, "loss": 0.1466, "step": 35385 }, { "epoch": 0.6150984720749535, "grad_norm": 1.396585863005799, "learning_rate": 3.4074333317420846e-07, "loss": 0.1768, "step": 35386 }, { "epoch": 0.6151158546124563, "grad_norm": 1.9910925250004494, "learning_rate": 3.4071665015289836e-07, "loss": 0.1783, "step": 35387 }, { "epoch": 0.6151332371499592, "grad_norm": 1.294027276352854, "learning_rate": 3.406899676364434e-07, "loss": 0.1918, "step": 35388 }, { "epoch": 0.615150619687462, "grad_norm": 1.5709232612193693, "learning_rate": 3.4066328562492787e-07, "loss": 0.2199, "step": 35389 }, { "epoch": 0.6151680022249648, "grad_norm": 3.7888141399973696, "learning_rate": 3.406366041184366e-07, "loss": 0.1873, "step": 35390 }, { "epoch": 0.6151853847624676, "grad_norm": 1.9804306697960379, "learning_rate": 3.4060992311705415e-07, "loss": 0.1639, "step": 35391 }, { "epoch": 0.6152027672999705, "grad_norm": 2.2931318802195917, "learning_rate": 3.405832426208649e-07, "loss": 0.174, "step": 35392 }, { "epoch": 0.6152201498374733, "grad_norm": 1.5347364245884678, "learning_rate": 3.4055656262995365e-07, "loss": 0.1942, "step": 35393 }, { "epoch": 0.6152375323749761, "grad_norm": 1.607261151823801, "learning_rate": 3.40529883144405e-07, "loss": 0.1642, "step": 35394 }, { "epoch": 0.615254914912479, "grad_norm": 1.3581642735790356, "learning_rate": 3.40503204164303e-07, "loss": 0.1974, "step": 35395 }, { "epoch": 0.6152722974499818, "grad_norm": 1.2103769332965575, "learning_rate": 3.404765256897328e-07, "loss": 0.1497, "step": 35396 }, { "epoch": 0.6152896799874846, "grad_norm": 2.153782743190177, "learning_rate": 3.4044984772077865e-07, "loss": 0.193, "step": 35397 }, { "epoch": 0.6153070625249873, "grad_norm": 1.142295314946145, "learning_rate": 3.4042317025752526e-07, "loss": 0.1692, "step": 35398 }, { "epoch": 0.6153244450624902, "grad_norm": 0.908798076075752, "learning_rate": 3.403964933000571e-07, "loss": 0.1414, "step": 35399 }, { "epoch": 0.615341827599993, "grad_norm": 1.4342523976206785, "learning_rate": 3.4036981684845865e-07, "loss": 0.1094, "step": 35400 }, { "epoch": 0.6153592101374958, "grad_norm": 2.3098349181412665, "learning_rate": 3.403431409028148e-07, "loss": 0.2258, "step": 35401 }, { "epoch": 0.6153765926749987, "grad_norm": 2.4481525391165526, "learning_rate": 3.403164654632097e-07, "loss": 0.1905, "step": 35402 }, { "epoch": 0.6153939752125015, "grad_norm": 1.023253471375424, "learning_rate": 3.4028979052972796e-07, "loss": 0.1616, "step": 35403 }, { "epoch": 0.6154113577500043, "grad_norm": 1.3850701912309922, "learning_rate": 3.402631161024543e-07, "loss": 0.21, "step": 35404 }, { "epoch": 0.6154287402875072, "grad_norm": 1.143243600502369, "learning_rate": 3.4023644218147315e-07, "loss": 0.2124, "step": 35405 }, { "epoch": 0.61544612282501, "grad_norm": 0.8970102199045475, "learning_rate": 3.402097687668691e-07, "loss": 0.2248, "step": 35406 }, { "epoch": 0.6154635053625128, "grad_norm": 1.1611133823498827, "learning_rate": 3.4018309585872684e-07, "loss": 0.1107, "step": 35407 }, { "epoch": 0.6154808879000156, "grad_norm": 1.3727622313804615, "learning_rate": 3.4015642345713065e-07, "loss": 0.1894, "step": 35408 }, { "epoch": 0.6154982704375185, "grad_norm": 1.250895612026943, "learning_rate": 3.4012975156216503e-07, "loss": 0.142, "step": 35409 }, { "epoch": 0.6155156529750213, "grad_norm": 1.5606270576734065, "learning_rate": 3.401030801739148e-07, "loss": 0.1556, "step": 35410 }, { "epoch": 0.6155330355125241, "grad_norm": 2.1800777879810247, "learning_rate": 3.400764092924643e-07, "loss": 0.2144, "step": 35411 }, { "epoch": 0.615550418050027, "grad_norm": 3.408232758143, "learning_rate": 3.4004973891789814e-07, "loss": 0.2768, "step": 35412 }, { "epoch": 0.6155678005875298, "grad_norm": 1.1878758934526539, "learning_rate": 3.4002306905030096e-07, "loss": 0.1966, "step": 35413 }, { "epoch": 0.6155851831250326, "grad_norm": 1.9125869825165454, "learning_rate": 3.39996399689757e-07, "loss": 0.2338, "step": 35414 }, { "epoch": 0.6156025656625355, "grad_norm": 1.5550389436567962, "learning_rate": 3.3996973083635093e-07, "loss": 0.2385, "step": 35415 }, { "epoch": 0.6156199482000383, "grad_norm": 1.690117936129203, "learning_rate": 3.3994306249016743e-07, "loss": 0.21, "step": 35416 }, { "epoch": 0.615637330737541, "grad_norm": 1.0856116668766362, "learning_rate": 3.3991639465129076e-07, "loss": 0.1862, "step": 35417 }, { "epoch": 0.6156547132750438, "grad_norm": 3.1835740232545646, "learning_rate": 3.398897273198057e-07, "loss": 0.2198, "step": 35418 }, { "epoch": 0.6156720958125467, "grad_norm": 2.5090606301630975, "learning_rate": 3.398630604957967e-07, "loss": 0.2148, "step": 35419 }, { "epoch": 0.6156894783500495, "grad_norm": 2.9926146638613553, "learning_rate": 3.3983639417934806e-07, "loss": 0.2066, "step": 35420 }, { "epoch": 0.6157068608875523, "grad_norm": 1.60603286645792, "learning_rate": 3.3980972837054456e-07, "loss": 0.126, "step": 35421 }, { "epoch": 0.6157242434250552, "grad_norm": 1.4766922737428163, "learning_rate": 3.3978306306947055e-07, "loss": 0.2025, "step": 35422 }, { "epoch": 0.615741625962558, "grad_norm": 2.052307806095207, "learning_rate": 3.3975639827621075e-07, "loss": 0.1791, "step": 35423 }, { "epoch": 0.6157590085000608, "grad_norm": 1.022987643975639, "learning_rate": 3.397297339908496e-07, "loss": 0.1064, "step": 35424 }, { "epoch": 0.6157763910375637, "grad_norm": 1.19385350211704, "learning_rate": 3.3970307021347144e-07, "loss": 0.2043, "step": 35425 }, { "epoch": 0.6157937735750665, "grad_norm": 1.0073582412401445, "learning_rate": 3.396764069441611e-07, "loss": 0.1717, "step": 35426 }, { "epoch": 0.6158111561125693, "grad_norm": 1.0152247467911242, "learning_rate": 3.3964974418300276e-07, "loss": 0.1417, "step": 35427 }, { "epoch": 0.6158285386500721, "grad_norm": 1.650858088214271, "learning_rate": 3.3962308193008106e-07, "loss": 0.1682, "step": 35428 }, { "epoch": 0.615845921187575, "grad_norm": 2.533696266760097, "learning_rate": 3.395964201854805e-07, "loss": 0.2173, "step": 35429 }, { "epoch": 0.6158633037250778, "grad_norm": 1.900214016115962, "learning_rate": 3.3956975894928564e-07, "loss": 0.2284, "step": 35430 }, { "epoch": 0.6158806862625806, "grad_norm": 0.9329454752401082, "learning_rate": 3.3954309822158096e-07, "loss": 0.2074, "step": 35431 }, { "epoch": 0.6158980688000835, "grad_norm": 1.4606501248153874, "learning_rate": 3.395164380024511e-07, "loss": 0.2669, "step": 35432 }, { "epoch": 0.6159154513375863, "grad_norm": 1.3719122779121002, "learning_rate": 3.394897782919802e-07, "loss": 0.2338, "step": 35433 }, { "epoch": 0.6159328338750891, "grad_norm": 1.962868534682672, "learning_rate": 3.3946311909025304e-07, "loss": 0.1549, "step": 35434 }, { "epoch": 0.615950216412592, "grad_norm": 1.5069527108012635, "learning_rate": 3.3943646039735406e-07, "loss": 0.1913, "step": 35435 }, { "epoch": 0.6159675989500948, "grad_norm": 1.2505011558352381, "learning_rate": 3.394098022133676e-07, "loss": 0.1343, "step": 35436 }, { "epoch": 0.6159849814875975, "grad_norm": 1.3945928392069364, "learning_rate": 3.3938314453837846e-07, "loss": 0.1887, "step": 35437 }, { "epoch": 0.6160023640251003, "grad_norm": 1.9796779359482288, "learning_rate": 3.393564873724711e-07, "loss": 0.1474, "step": 35438 }, { "epoch": 0.6160197465626032, "grad_norm": 1.6013146978905368, "learning_rate": 3.3932983071572974e-07, "loss": 0.213, "step": 35439 }, { "epoch": 0.616037129100106, "grad_norm": 2.1705008933412113, "learning_rate": 3.393031745682389e-07, "loss": 0.1992, "step": 35440 }, { "epoch": 0.6160545116376088, "grad_norm": 1.7367495479722257, "learning_rate": 3.3927651893008323e-07, "loss": 0.1657, "step": 35441 }, { "epoch": 0.6160718941751117, "grad_norm": 1.6193314940363426, "learning_rate": 3.392498638013471e-07, "loss": 0.1642, "step": 35442 }, { "epoch": 0.6160892767126145, "grad_norm": 1.3590550163721618, "learning_rate": 3.3922320918211524e-07, "loss": 0.166, "step": 35443 }, { "epoch": 0.6161066592501173, "grad_norm": 2.4317176900827726, "learning_rate": 3.391965550724718e-07, "loss": 0.1924, "step": 35444 }, { "epoch": 0.6161240417876201, "grad_norm": 0.9600146471283509, "learning_rate": 3.3916990147250155e-07, "loss": 0.1344, "step": 35445 }, { "epoch": 0.616141424325123, "grad_norm": 0.9762580712698122, "learning_rate": 3.391432483822888e-07, "loss": 0.2569, "step": 35446 }, { "epoch": 0.6161588068626258, "grad_norm": 2.983058868308126, "learning_rate": 3.391165958019181e-07, "loss": 0.1952, "step": 35447 }, { "epoch": 0.6161761894001286, "grad_norm": 0.9976002717702416, "learning_rate": 3.3908994373147373e-07, "loss": 0.1944, "step": 35448 }, { "epoch": 0.6161935719376315, "grad_norm": 1.4793426915172565, "learning_rate": 3.3906329217104046e-07, "loss": 0.1977, "step": 35449 }, { "epoch": 0.6162109544751343, "grad_norm": 1.679830017269068, "learning_rate": 3.3903664112070245e-07, "loss": 0.2048, "step": 35450 }, { "epoch": 0.6162283370126371, "grad_norm": 1.7351771752384115, "learning_rate": 3.3900999058054456e-07, "loss": 0.1987, "step": 35451 }, { "epoch": 0.61624571955014, "grad_norm": 5.174594030611498, "learning_rate": 3.38983340550651e-07, "loss": 0.2832, "step": 35452 }, { "epoch": 0.6162631020876428, "grad_norm": 1.4713122649797414, "learning_rate": 3.389566910311062e-07, "loss": 0.1962, "step": 35453 }, { "epoch": 0.6162804846251456, "grad_norm": 2.0752692906219368, "learning_rate": 3.389300420219948e-07, "loss": 0.2155, "step": 35454 }, { "epoch": 0.6162978671626484, "grad_norm": 1.8317620860479318, "learning_rate": 3.3890339352340116e-07, "loss": 0.213, "step": 35455 }, { "epoch": 0.6163152497001513, "grad_norm": 1.2882449683439596, "learning_rate": 3.388767455354096e-07, "loss": 0.2665, "step": 35456 }, { "epoch": 0.616332632237654, "grad_norm": 1.449752258522906, "learning_rate": 3.3885009805810506e-07, "loss": 0.1344, "step": 35457 }, { "epoch": 0.6163500147751568, "grad_norm": 1.5633348463087207, "learning_rate": 3.388234510915715e-07, "loss": 0.1918, "step": 35458 }, { "epoch": 0.6163673973126597, "grad_norm": 0.8536195731834564, "learning_rate": 3.3879680463589346e-07, "loss": 0.2285, "step": 35459 }, { "epoch": 0.6163847798501625, "grad_norm": 2.153841378294207, "learning_rate": 3.3877015869115564e-07, "loss": 0.2535, "step": 35460 }, { "epoch": 0.6164021623876653, "grad_norm": 1.491938997154448, "learning_rate": 3.3874351325744223e-07, "loss": 0.2338, "step": 35461 }, { "epoch": 0.6164195449251682, "grad_norm": 2.8178197291371814, "learning_rate": 3.387168683348379e-07, "loss": 0.3262, "step": 35462 }, { "epoch": 0.616436927462671, "grad_norm": 1.9066712659729494, "learning_rate": 3.3869022392342705e-07, "loss": 0.1796, "step": 35463 }, { "epoch": 0.6164543100001738, "grad_norm": 1.601478888141271, "learning_rate": 3.386635800232942e-07, "loss": 0.2104, "step": 35464 }, { "epoch": 0.6164716925376766, "grad_norm": 1.4233829440584707, "learning_rate": 3.386369366345234e-07, "loss": 0.1976, "step": 35465 }, { "epoch": 0.6164890750751795, "grad_norm": 1.3972294306443442, "learning_rate": 3.386102937571996e-07, "loss": 0.1857, "step": 35466 }, { "epoch": 0.6165064576126823, "grad_norm": 1.2100195604777606, "learning_rate": 3.3858365139140685e-07, "loss": 0.144, "step": 35467 }, { "epoch": 0.6165238401501851, "grad_norm": 1.383417083129069, "learning_rate": 3.385570095372299e-07, "loss": 0.1705, "step": 35468 }, { "epoch": 0.616541222687688, "grad_norm": 1.067503115373811, "learning_rate": 3.3853036819475295e-07, "loss": 0.2507, "step": 35469 }, { "epoch": 0.6165586052251908, "grad_norm": 2.447724485908129, "learning_rate": 3.3850372736406073e-07, "loss": 0.2649, "step": 35470 }, { "epoch": 0.6165759877626936, "grad_norm": 2.687617208605537, "learning_rate": 3.3847708704523745e-07, "loss": 0.2801, "step": 35471 }, { "epoch": 0.6165933703001965, "grad_norm": 1.37121564577379, "learning_rate": 3.3845044723836767e-07, "loss": 0.2567, "step": 35472 }, { "epoch": 0.6166107528376993, "grad_norm": 1.5455654483497177, "learning_rate": 3.3842380794353565e-07, "loss": 0.2454, "step": 35473 }, { "epoch": 0.6166281353752021, "grad_norm": 0.819638371566577, "learning_rate": 3.3839716916082595e-07, "loss": 0.1838, "step": 35474 }, { "epoch": 0.6166455179127049, "grad_norm": 1.6296952344741538, "learning_rate": 3.3837053089032294e-07, "loss": 0.1981, "step": 35475 }, { "epoch": 0.6166629004502078, "grad_norm": 1.0966294947559825, "learning_rate": 3.383438931321113e-07, "loss": 0.1986, "step": 35476 }, { "epoch": 0.6166802829877105, "grad_norm": 1.5644324077848746, "learning_rate": 3.383172558862751e-07, "loss": 0.1623, "step": 35477 }, { "epoch": 0.6166976655252133, "grad_norm": 1.1958556529047688, "learning_rate": 3.3829061915289894e-07, "loss": 0.2298, "step": 35478 }, { "epoch": 0.6167150480627162, "grad_norm": 1.583450675759074, "learning_rate": 3.382639829320673e-07, "loss": 0.2126, "step": 35479 }, { "epoch": 0.616732430600219, "grad_norm": 2.2689849379025855, "learning_rate": 3.3823734722386446e-07, "loss": 0.2632, "step": 35480 }, { "epoch": 0.6167498131377218, "grad_norm": 2.2319392549374837, "learning_rate": 3.382107120283749e-07, "loss": 0.2571, "step": 35481 }, { "epoch": 0.6167671956752246, "grad_norm": 1.642688165921949, "learning_rate": 3.381840773456831e-07, "loss": 0.2001, "step": 35482 }, { "epoch": 0.6167845782127275, "grad_norm": 1.193191007944868, "learning_rate": 3.381574431758736e-07, "loss": 0.1828, "step": 35483 }, { "epoch": 0.6168019607502303, "grad_norm": 0.9202346568823813, "learning_rate": 3.3813080951903044e-07, "loss": 0.2156, "step": 35484 }, { "epoch": 0.6168193432877331, "grad_norm": 1.9369377154769765, "learning_rate": 3.3810417637523835e-07, "loss": 0.2385, "step": 35485 }, { "epoch": 0.616836725825236, "grad_norm": 1.4941788501635593, "learning_rate": 3.3807754374458156e-07, "loss": 0.2641, "step": 35486 }, { "epoch": 0.6168541083627388, "grad_norm": 1.0310148963973134, "learning_rate": 3.3805091162714474e-07, "loss": 0.1858, "step": 35487 }, { "epoch": 0.6168714909002416, "grad_norm": 1.6241290426679085, "learning_rate": 3.3802428002301205e-07, "loss": 0.133, "step": 35488 }, { "epoch": 0.6168888734377445, "grad_norm": 0.7820806724684042, "learning_rate": 3.379976489322681e-07, "loss": 0.2404, "step": 35489 }, { "epoch": 0.6169062559752473, "grad_norm": 2.0143616793120858, "learning_rate": 3.3797101835499695e-07, "loss": 0.1997, "step": 35490 }, { "epoch": 0.6169236385127501, "grad_norm": 3.195134828012464, "learning_rate": 3.379443882912834e-07, "loss": 0.1173, "step": 35491 }, { "epoch": 0.616941021050253, "grad_norm": 1.0828735504396734, "learning_rate": 3.379177587412116e-07, "loss": 0.2318, "step": 35492 }, { "epoch": 0.6169584035877558, "grad_norm": 1.3225633361119526, "learning_rate": 3.378911297048661e-07, "loss": 0.2633, "step": 35493 }, { "epoch": 0.6169757861252586, "grad_norm": 1.7332475323225027, "learning_rate": 3.378645011823312e-07, "loss": 0.173, "step": 35494 }, { "epoch": 0.6169931686627614, "grad_norm": 1.1122934995835985, "learning_rate": 3.3783787317369165e-07, "loss": 0.1422, "step": 35495 }, { "epoch": 0.6170105512002643, "grad_norm": 1.5095071313662487, "learning_rate": 3.3781124567903126e-07, "loss": 0.1788, "step": 35496 }, { "epoch": 0.617027933737767, "grad_norm": 2.0599421896449814, "learning_rate": 3.377846186984348e-07, "loss": 0.1939, "step": 35497 }, { "epoch": 0.6170453162752698, "grad_norm": 1.8065565378495727, "learning_rate": 3.377579922319865e-07, "loss": 0.1828, "step": 35498 }, { "epoch": 0.6170626988127726, "grad_norm": 2.5188284905404315, "learning_rate": 3.377313662797709e-07, "loss": 0.2518, "step": 35499 }, { "epoch": 0.6170800813502755, "grad_norm": 0.9812355420187353, "learning_rate": 3.3770474084187224e-07, "loss": 0.1881, "step": 35500 }, { "epoch": 0.6170974638877783, "grad_norm": 1.0384329208726546, "learning_rate": 3.3767811591837507e-07, "loss": 0.1696, "step": 35501 }, { "epoch": 0.6171148464252811, "grad_norm": 1.1738311637540624, "learning_rate": 3.376514915093639e-07, "loss": 0.1477, "step": 35502 }, { "epoch": 0.617132228962784, "grad_norm": 1.2556813063466505, "learning_rate": 3.376248676149227e-07, "loss": 0.1723, "step": 35503 }, { "epoch": 0.6171496115002868, "grad_norm": 2.2516390921900817, "learning_rate": 3.3759824423513607e-07, "loss": 0.1624, "step": 35504 }, { "epoch": 0.6171669940377896, "grad_norm": 1.3193366361589285, "learning_rate": 3.3757162137008847e-07, "loss": 0.1898, "step": 35505 }, { "epoch": 0.6171843765752925, "grad_norm": 1.5804964486371615, "learning_rate": 3.3754499901986416e-07, "loss": 0.2202, "step": 35506 }, { "epoch": 0.6172017591127953, "grad_norm": 1.1251003135368822, "learning_rate": 3.375183771845477e-07, "loss": 0.1339, "step": 35507 }, { "epoch": 0.6172191416502981, "grad_norm": 2.322425528282013, "learning_rate": 3.374917558642233e-07, "loss": 0.2532, "step": 35508 }, { "epoch": 0.617236524187801, "grad_norm": 1.4423922179456337, "learning_rate": 3.3746513505897536e-07, "loss": 0.1825, "step": 35509 }, { "epoch": 0.6172539067253038, "grad_norm": 1.024257435434861, "learning_rate": 3.374385147688883e-07, "loss": 0.1631, "step": 35510 }, { "epoch": 0.6172712892628066, "grad_norm": 0.948975846080748, "learning_rate": 3.374118949940464e-07, "loss": 0.1453, "step": 35511 }, { "epoch": 0.6172886718003094, "grad_norm": 1.1330016595951464, "learning_rate": 3.373852757345341e-07, "loss": 0.2988, "step": 35512 }, { "epoch": 0.6173060543378123, "grad_norm": 1.8235593303899031, "learning_rate": 3.373586569904359e-07, "loss": 0.1816, "step": 35513 }, { "epoch": 0.6173234368753151, "grad_norm": 1.8090860507658149, "learning_rate": 3.3733203876183604e-07, "loss": 0.2113, "step": 35514 }, { "epoch": 0.6173408194128179, "grad_norm": 2.190850297862134, "learning_rate": 3.3730542104881875e-07, "loss": 0.1666, "step": 35515 }, { "epoch": 0.6173582019503208, "grad_norm": 1.9223371561268443, "learning_rate": 3.3727880385146866e-07, "loss": 0.1697, "step": 35516 }, { "epoch": 0.6173755844878235, "grad_norm": 1.301572995401815, "learning_rate": 3.3725218716986994e-07, "loss": 0.2069, "step": 35517 }, { "epoch": 0.6173929670253263, "grad_norm": 1.1240706561628153, "learning_rate": 3.3722557100410703e-07, "loss": 0.2544, "step": 35518 }, { "epoch": 0.6174103495628291, "grad_norm": 1.328707964957512, "learning_rate": 3.371989553542642e-07, "loss": 0.1397, "step": 35519 }, { "epoch": 0.617427732100332, "grad_norm": 1.4594474383874145, "learning_rate": 3.3717234022042604e-07, "loss": 0.1331, "step": 35520 }, { "epoch": 0.6174451146378348, "grad_norm": 1.2300868990511007, "learning_rate": 3.371457256026769e-07, "loss": 0.2385, "step": 35521 }, { "epoch": 0.6174624971753376, "grad_norm": 0.845791218729159, "learning_rate": 3.371191115011008e-07, "loss": 0.1264, "step": 35522 }, { "epoch": 0.6174798797128405, "grad_norm": 2.342899552464753, "learning_rate": 3.3709249791578234e-07, "loss": 0.1923, "step": 35523 }, { "epoch": 0.6174972622503433, "grad_norm": 0.9362181723127292, "learning_rate": 3.370658848468059e-07, "loss": 0.1698, "step": 35524 }, { "epoch": 0.6175146447878461, "grad_norm": 1.802469190889044, "learning_rate": 3.3703927229425555e-07, "loss": 0.2483, "step": 35525 }, { "epoch": 0.617532027325349, "grad_norm": 0.957909233723014, "learning_rate": 3.3701266025821597e-07, "loss": 0.1207, "step": 35526 }, { "epoch": 0.6175494098628518, "grad_norm": 2.207185870562446, "learning_rate": 3.3698604873877156e-07, "loss": 0.2287, "step": 35527 }, { "epoch": 0.6175667924003546, "grad_norm": 1.8239867377950139, "learning_rate": 3.3695943773600635e-07, "loss": 0.1587, "step": 35528 }, { "epoch": 0.6175841749378574, "grad_norm": 1.510458449214145, "learning_rate": 3.369328272500047e-07, "loss": 0.1811, "step": 35529 }, { "epoch": 0.6176015574753603, "grad_norm": 2.37895130870676, "learning_rate": 3.3690621728085125e-07, "loss": 0.2444, "step": 35530 }, { "epoch": 0.6176189400128631, "grad_norm": 4.93815223442052, "learning_rate": 3.3687960782863003e-07, "loss": 0.2796, "step": 35531 }, { "epoch": 0.6176363225503659, "grad_norm": 1.3371044394769562, "learning_rate": 3.368529988934256e-07, "loss": 0.1952, "step": 35532 }, { "epoch": 0.6176537050878688, "grad_norm": 1.4871395966860315, "learning_rate": 3.3682639047532235e-07, "loss": 0.1673, "step": 35533 }, { "epoch": 0.6176710876253716, "grad_norm": 1.3990137811895391, "learning_rate": 3.367997825744043e-07, "loss": 0.1366, "step": 35534 }, { "epoch": 0.6176884701628744, "grad_norm": 2.028368668468994, "learning_rate": 3.3677317519075605e-07, "loss": 0.3555, "step": 35535 }, { "epoch": 0.6177058527003773, "grad_norm": 2.0526258461843687, "learning_rate": 3.367465683244618e-07, "loss": 0.1406, "step": 35536 }, { "epoch": 0.61772323523788, "grad_norm": 3.241734963559081, "learning_rate": 3.367199619756059e-07, "loss": 0.1701, "step": 35537 }, { "epoch": 0.6177406177753828, "grad_norm": 1.675740330722577, "learning_rate": 3.366933561442729e-07, "loss": 0.2737, "step": 35538 }, { "epoch": 0.6177580003128856, "grad_norm": 1.019304452941283, "learning_rate": 3.366667508305469e-07, "loss": 0.167, "step": 35539 }, { "epoch": 0.6177753828503885, "grad_norm": 1.5365432002393289, "learning_rate": 3.366401460345121e-07, "loss": 0.1834, "step": 35540 }, { "epoch": 0.6177927653878913, "grad_norm": 2.4612424439848906, "learning_rate": 3.366135417562531e-07, "loss": 0.2196, "step": 35541 }, { "epoch": 0.6178101479253941, "grad_norm": 2.2624252427892166, "learning_rate": 3.3658693799585407e-07, "loss": 0.2081, "step": 35542 }, { "epoch": 0.617827530462897, "grad_norm": 1.7187555879481355, "learning_rate": 3.365603347533995e-07, "loss": 0.2723, "step": 35543 }, { "epoch": 0.6178449130003998, "grad_norm": 1.285085814160671, "learning_rate": 3.365337320289735e-07, "loss": 0.2327, "step": 35544 }, { "epoch": 0.6178622955379026, "grad_norm": 2.3426886201050947, "learning_rate": 3.3650712982266037e-07, "loss": 0.1599, "step": 35545 }, { "epoch": 0.6178796780754054, "grad_norm": 1.4194862210928056, "learning_rate": 3.3648052813454485e-07, "loss": 0.2726, "step": 35546 }, { "epoch": 0.6178970606129083, "grad_norm": 1.8873618581409797, "learning_rate": 3.3645392696471073e-07, "loss": 0.329, "step": 35547 }, { "epoch": 0.6179144431504111, "grad_norm": 1.196611838232774, "learning_rate": 3.364273263132425e-07, "loss": 0.1488, "step": 35548 }, { "epoch": 0.6179318256879139, "grad_norm": 1.4755690530180101, "learning_rate": 3.364007261802246e-07, "loss": 0.2171, "step": 35549 }, { "epoch": 0.6179492082254168, "grad_norm": 2.5440091981357162, "learning_rate": 3.3637412656574117e-07, "loss": 0.2853, "step": 35550 }, { "epoch": 0.6179665907629196, "grad_norm": 2.5846045786803717, "learning_rate": 3.363475274698766e-07, "loss": 0.18, "step": 35551 }, { "epoch": 0.6179839733004224, "grad_norm": 1.3826051824485797, "learning_rate": 3.3632092889271535e-07, "loss": 0.2365, "step": 35552 }, { "epoch": 0.6180013558379253, "grad_norm": 1.2253545767957998, "learning_rate": 3.362943308343415e-07, "loss": 0.1186, "step": 35553 }, { "epoch": 0.6180187383754281, "grad_norm": 1.4153356966618222, "learning_rate": 3.3626773329483927e-07, "loss": 0.2086, "step": 35554 }, { "epoch": 0.6180361209129309, "grad_norm": 1.2140635546106608, "learning_rate": 3.3624113627429324e-07, "loss": 0.2502, "step": 35555 }, { "epoch": 0.6180535034504336, "grad_norm": 1.0245549630862345, "learning_rate": 3.3621453977278746e-07, "loss": 0.1442, "step": 35556 }, { "epoch": 0.6180708859879365, "grad_norm": 1.5175503339816694, "learning_rate": 3.361879437904065e-07, "loss": 0.3407, "step": 35557 }, { "epoch": 0.6180882685254393, "grad_norm": 1.1208990644629502, "learning_rate": 3.3616134832723454e-07, "loss": 0.1745, "step": 35558 }, { "epoch": 0.6181056510629421, "grad_norm": 2.7620198610054305, "learning_rate": 3.3613475338335585e-07, "loss": 0.1989, "step": 35559 }, { "epoch": 0.618123033600445, "grad_norm": 1.7131424447204875, "learning_rate": 3.361081589588545e-07, "loss": 0.2113, "step": 35560 }, { "epoch": 0.6181404161379478, "grad_norm": 2.070427919486128, "learning_rate": 3.360815650538151e-07, "loss": 0.2382, "step": 35561 }, { "epoch": 0.6181577986754506, "grad_norm": 1.5720582115064627, "learning_rate": 3.360549716683219e-07, "loss": 0.1644, "step": 35562 }, { "epoch": 0.6181751812129535, "grad_norm": 2.5405473790493778, "learning_rate": 3.36028378802459e-07, "loss": 0.2904, "step": 35563 }, { "epoch": 0.6181925637504563, "grad_norm": 1.2385176117262249, "learning_rate": 3.360017864563109e-07, "loss": 0.1265, "step": 35564 }, { "epoch": 0.6182099462879591, "grad_norm": 1.2233063402204045, "learning_rate": 3.359751946299619e-07, "loss": 0.1156, "step": 35565 }, { "epoch": 0.6182273288254619, "grad_norm": 1.1709237868766895, "learning_rate": 3.359486033234962e-07, "loss": 0.1418, "step": 35566 }, { "epoch": 0.6182447113629648, "grad_norm": 1.9027179884282575, "learning_rate": 3.3592201253699787e-07, "loss": 0.2204, "step": 35567 }, { "epoch": 0.6182620939004676, "grad_norm": 1.206035234886242, "learning_rate": 3.3589542227055153e-07, "loss": 0.2579, "step": 35568 }, { "epoch": 0.6182794764379704, "grad_norm": 1.1562125938252672, "learning_rate": 3.3586883252424124e-07, "loss": 0.1514, "step": 35569 }, { "epoch": 0.6182968589754733, "grad_norm": 1.36290464807016, "learning_rate": 3.3584224329815137e-07, "loss": 0.1407, "step": 35570 }, { "epoch": 0.6183142415129761, "grad_norm": 1.4009362083500403, "learning_rate": 3.3581565459236625e-07, "loss": 0.2593, "step": 35571 }, { "epoch": 0.6183316240504789, "grad_norm": 1.633350690957407, "learning_rate": 3.357890664069701e-07, "loss": 0.2031, "step": 35572 }, { "epoch": 0.6183490065879818, "grad_norm": 1.8553806852998052, "learning_rate": 3.3576247874204705e-07, "loss": 0.2197, "step": 35573 }, { "epoch": 0.6183663891254846, "grad_norm": 1.3060147390709802, "learning_rate": 3.3573589159768157e-07, "loss": 0.2123, "step": 35574 }, { "epoch": 0.6183837716629874, "grad_norm": 2.51523548182619, "learning_rate": 3.3570930497395776e-07, "loss": 0.2055, "step": 35575 }, { "epoch": 0.6184011542004901, "grad_norm": 2.0229815636678894, "learning_rate": 3.356827188709601e-07, "loss": 0.255, "step": 35576 }, { "epoch": 0.618418536737993, "grad_norm": 1.4701368961245609, "learning_rate": 3.3565613328877283e-07, "loss": 0.1902, "step": 35577 }, { "epoch": 0.6184359192754958, "grad_norm": 1.3551748314365464, "learning_rate": 3.3562954822748006e-07, "loss": 0.2087, "step": 35578 }, { "epoch": 0.6184533018129986, "grad_norm": 1.839626802554186, "learning_rate": 3.3560296368716603e-07, "loss": 0.2069, "step": 35579 }, { "epoch": 0.6184706843505015, "grad_norm": 2.224481907228634, "learning_rate": 3.355763796679151e-07, "loss": 0.3189, "step": 35580 }, { "epoch": 0.6184880668880043, "grad_norm": 2.0246110619772093, "learning_rate": 3.355497961698115e-07, "loss": 0.2947, "step": 35581 }, { "epoch": 0.6185054494255071, "grad_norm": 1.847417451839259, "learning_rate": 3.3552321319293953e-07, "loss": 0.1451, "step": 35582 }, { "epoch": 0.61852283196301, "grad_norm": 2.478665386029338, "learning_rate": 3.354966307373834e-07, "loss": 0.224, "step": 35583 }, { "epoch": 0.6185402145005128, "grad_norm": 0.8943933270859147, "learning_rate": 3.354700488032276e-07, "loss": 0.1469, "step": 35584 }, { "epoch": 0.6185575970380156, "grad_norm": 1.3331747628709953, "learning_rate": 3.3544346739055595e-07, "loss": 0.2148, "step": 35585 }, { "epoch": 0.6185749795755184, "grad_norm": 1.872772523316874, "learning_rate": 3.35416886499453e-07, "loss": 0.2298, "step": 35586 }, { "epoch": 0.6185923621130213, "grad_norm": 2.500768547489671, "learning_rate": 3.3539030613000276e-07, "loss": 0.1808, "step": 35587 }, { "epoch": 0.6186097446505241, "grad_norm": 0.9592294318231548, "learning_rate": 3.353637262822897e-07, "loss": 0.1653, "step": 35588 }, { "epoch": 0.6186271271880269, "grad_norm": 2.275424959422257, "learning_rate": 3.3533714695639804e-07, "loss": 0.2426, "step": 35589 }, { "epoch": 0.6186445097255298, "grad_norm": 3.0977011164979666, "learning_rate": 3.353105681524121e-07, "loss": 0.3061, "step": 35590 }, { "epoch": 0.6186618922630326, "grad_norm": 7.225818271458358, "learning_rate": 3.352839898704159e-07, "loss": 0.2469, "step": 35591 }, { "epoch": 0.6186792748005354, "grad_norm": 1.0789143382780593, "learning_rate": 3.352574121104938e-07, "loss": 0.1752, "step": 35592 }, { "epoch": 0.6186966573380382, "grad_norm": 1.460084081257995, "learning_rate": 3.3523083487272994e-07, "loss": 0.1803, "step": 35593 }, { "epoch": 0.6187140398755411, "grad_norm": 1.7243043675704992, "learning_rate": 3.352042581572087e-07, "loss": 0.2121, "step": 35594 }, { "epoch": 0.6187314224130439, "grad_norm": 1.207392058406497, "learning_rate": 3.351776819640142e-07, "loss": 0.1765, "step": 35595 }, { "epoch": 0.6187488049505466, "grad_norm": 2.379929260822014, "learning_rate": 3.3515110629323097e-07, "loss": 0.3985, "step": 35596 }, { "epoch": 0.6187661874880495, "grad_norm": 1.0677350738247007, "learning_rate": 3.3512453114494287e-07, "loss": 0.2687, "step": 35597 }, { "epoch": 0.6187835700255523, "grad_norm": 1.3725965794206079, "learning_rate": 3.3509795651923413e-07, "loss": 0.1908, "step": 35598 }, { "epoch": 0.6188009525630551, "grad_norm": 2.208391985305176, "learning_rate": 3.3507138241618925e-07, "loss": 0.3734, "step": 35599 }, { "epoch": 0.618818335100558, "grad_norm": 1.2860909013116226, "learning_rate": 3.350448088358924e-07, "loss": 0.1424, "step": 35600 }, { "epoch": 0.6188357176380608, "grad_norm": 1.1466692274125037, "learning_rate": 3.3501823577842746e-07, "loss": 0.2486, "step": 35601 }, { "epoch": 0.6188531001755636, "grad_norm": 2.1022416181287964, "learning_rate": 3.3499166324387916e-07, "loss": 0.2148, "step": 35602 }, { "epoch": 0.6188704827130664, "grad_norm": 1.281214291329749, "learning_rate": 3.349650912323316e-07, "loss": 0.1848, "step": 35603 }, { "epoch": 0.6188878652505693, "grad_norm": 1.976571442544024, "learning_rate": 3.3493851974386866e-07, "loss": 0.2037, "step": 35604 }, { "epoch": 0.6189052477880721, "grad_norm": 1.9210544775073566, "learning_rate": 3.349119487785749e-07, "loss": 0.1952, "step": 35605 }, { "epoch": 0.6189226303255749, "grad_norm": 1.5797510469982519, "learning_rate": 3.3488537833653426e-07, "loss": 0.1747, "step": 35606 }, { "epoch": 0.6189400128630778, "grad_norm": 1.7206426066024227, "learning_rate": 3.348588084178313e-07, "loss": 0.2602, "step": 35607 }, { "epoch": 0.6189573954005806, "grad_norm": 1.1422752341413305, "learning_rate": 3.348322390225499e-07, "loss": 0.1791, "step": 35608 }, { "epoch": 0.6189747779380834, "grad_norm": 3.0000135172394216, "learning_rate": 3.3480567015077476e-07, "loss": 0.4145, "step": 35609 }, { "epoch": 0.6189921604755863, "grad_norm": 1.570093942862253, "learning_rate": 3.347791018025894e-07, "loss": 0.1821, "step": 35610 }, { "epoch": 0.6190095430130891, "grad_norm": 1.1535708267738471, "learning_rate": 3.3475253397807856e-07, "loss": 0.1974, "step": 35611 }, { "epoch": 0.6190269255505919, "grad_norm": 1.540669464812053, "learning_rate": 3.347259666773262e-07, "loss": 0.2939, "step": 35612 }, { "epoch": 0.6190443080880947, "grad_norm": 1.3348560093885335, "learning_rate": 3.346993999004166e-07, "loss": 0.2521, "step": 35613 }, { "epoch": 0.6190616906255976, "grad_norm": 1.8099970783300643, "learning_rate": 3.34672833647434e-07, "loss": 0.1974, "step": 35614 }, { "epoch": 0.6190790731631004, "grad_norm": 1.6248361707226768, "learning_rate": 3.346462679184627e-07, "loss": 0.2055, "step": 35615 }, { "epoch": 0.6190964557006031, "grad_norm": 1.3285070484266308, "learning_rate": 3.346197027135867e-07, "loss": 0.2016, "step": 35616 }, { "epoch": 0.619113838238106, "grad_norm": 1.1933063088414688, "learning_rate": 3.345931380328902e-07, "loss": 0.1574, "step": 35617 }, { "epoch": 0.6191312207756088, "grad_norm": 1.5939458167387632, "learning_rate": 3.3456657387645746e-07, "loss": 0.214, "step": 35618 }, { "epoch": 0.6191486033131116, "grad_norm": 2.503835967012671, "learning_rate": 3.345400102443728e-07, "loss": 0.2671, "step": 35619 }, { "epoch": 0.6191659858506144, "grad_norm": 1.2036536426306514, "learning_rate": 3.345134471367201e-07, "loss": 0.2147, "step": 35620 }, { "epoch": 0.6191833683881173, "grad_norm": 1.2849079450596843, "learning_rate": 3.34486884553584e-07, "loss": 0.1472, "step": 35621 }, { "epoch": 0.6192007509256201, "grad_norm": 1.8807385540301271, "learning_rate": 3.344603224950484e-07, "loss": 0.1961, "step": 35622 }, { "epoch": 0.6192181334631229, "grad_norm": 1.1354012723827525, "learning_rate": 3.344337609611974e-07, "loss": 0.2017, "step": 35623 }, { "epoch": 0.6192355160006258, "grad_norm": 1.32169500688319, "learning_rate": 3.344071999521154e-07, "loss": 0.1886, "step": 35624 }, { "epoch": 0.6192528985381286, "grad_norm": 5.558811217420332, "learning_rate": 3.3438063946788654e-07, "loss": 0.2738, "step": 35625 }, { "epoch": 0.6192702810756314, "grad_norm": 1.039385879793249, "learning_rate": 3.3435407950859484e-07, "loss": 0.1226, "step": 35626 }, { "epoch": 0.6192876636131343, "grad_norm": 1.3800021330110643, "learning_rate": 3.3432752007432477e-07, "loss": 0.1511, "step": 35627 }, { "epoch": 0.6193050461506371, "grad_norm": 3.0748567060243808, "learning_rate": 3.343009611651604e-07, "loss": 0.1884, "step": 35628 }, { "epoch": 0.6193224286881399, "grad_norm": 1.2820971728057953, "learning_rate": 3.342744027811857e-07, "loss": 0.177, "step": 35629 }, { "epoch": 0.6193398112256427, "grad_norm": 1.0980846975276664, "learning_rate": 3.3424784492248514e-07, "loss": 0.161, "step": 35630 }, { "epoch": 0.6193571937631456, "grad_norm": 1.7791645008190318, "learning_rate": 3.342212875891426e-07, "loss": 0.2056, "step": 35631 }, { "epoch": 0.6193745763006484, "grad_norm": 1.09328326625812, "learning_rate": 3.341947307812426e-07, "loss": 0.1778, "step": 35632 }, { "epoch": 0.6193919588381512, "grad_norm": 4.670729213775156, "learning_rate": 3.3416817449886903e-07, "loss": 0.4408, "step": 35633 }, { "epoch": 0.6194093413756541, "grad_norm": 1.279672079068327, "learning_rate": 3.341416187421064e-07, "loss": 0.182, "step": 35634 }, { "epoch": 0.6194267239131569, "grad_norm": 2.085951173169428, "learning_rate": 3.3411506351103834e-07, "loss": 0.1784, "step": 35635 }, { "epoch": 0.6194441064506596, "grad_norm": 1.6677265656773492, "learning_rate": 3.340885088057495e-07, "loss": 0.1488, "step": 35636 }, { "epoch": 0.6194614889881624, "grad_norm": 1.2581173232604874, "learning_rate": 3.3406195462632375e-07, "loss": 0.1742, "step": 35637 }, { "epoch": 0.6194788715256653, "grad_norm": 1.123209026054505, "learning_rate": 3.340354009728455e-07, "loss": 0.2117, "step": 35638 }, { "epoch": 0.6194962540631681, "grad_norm": 1.3788887085406685, "learning_rate": 3.340088478453986e-07, "loss": 0.224, "step": 35639 }, { "epoch": 0.6195136366006709, "grad_norm": 2.2527659326962555, "learning_rate": 3.339822952440676e-07, "loss": 0.3127, "step": 35640 }, { "epoch": 0.6195310191381738, "grad_norm": 1.2019293095075285, "learning_rate": 3.339557431689366e-07, "loss": 0.1806, "step": 35641 }, { "epoch": 0.6195484016756766, "grad_norm": 3.1646080563905934, "learning_rate": 3.339291916200894e-07, "loss": 0.2063, "step": 35642 }, { "epoch": 0.6195657842131794, "grad_norm": 1.2139816814303586, "learning_rate": 3.3390264059761037e-07, "loss": 0.1761, "step": 35643 }, { "epoch": 0.6195831667506823, "grad_norm": 1.4602464404746156, "learning_rate": 3.338760901015837e-07, "loss": 0.1811, "step": 35644 }, { "epoch": 0.6196005492881851, "grad_norm": 1.9946832284261276, "learning_rate": 3.338495401320934e-07, "loss": 0.2097, "step": 35645 }, { "epoch": 0.6196179318256879, "grad_norm": 1.4447522309122764, "learning_rate": 3.3382299068922393e-07, "loss": 0.2516, "step": 35646 }, { "epoch": 0.6196353143631907, "grad_norm": 2.8943976423990896, "learning_rate": 3.3379644177305933e-07, "loss": 0.206, "step": 35647 }, { "epoch": 0.6196526969006936, "grad_norm": 1.4643703189966866, "learning_rate": 3.337698933836835e-07, "loss": 0.2609, "step": 35648 }, { "epoch": 0.6196700794381964, "grad_norm": 2.7026684070196434, "learning_rate": 3.337433455211807e-07, "loss": 0.2848, "step": 35649 }, { "epoch": 0.6196874619756992, "grad_norm": 2.3025600875285503, "learning_rate": 3.3371679818563514e-07, "loss": 0.2015, "step": 35650 }, { "epoch": 0.6197048445132021, "grad_norm": 2.2526653943659096, "learning_rate": 3.336902513771309e-07, "loss": 0.1005, "step": 35651 }, { "epoch": 0.6197222270507049, "grad_norm": 1.384967117037017, "learning_rate": 3.3366370509575224e-07, "loss": 0.2628, "step": 35652 }, { "epoch": 0.6197396095882077, "grad_norm": 2.1646886070047824, "learning_rate": 3.3363715934158343e-07, "loss": 0.1768, "step": 35653 }, { "epoch": 0.6197569921257106, "grad_norm": 1.7173630542922815, "learning_rate": 3.336106141147081e-07, "loss": 0.1221, "step": 35654 }, { "epoch": 0.6197743746632134, "grad_norm": 2.078873930866814, "learning_rate": 3.335840694152108e-07, "loss": 0.2232, "step": 35655 }, { "epoch": 0.6197917572007161, "grad_norm": 1.6830047395055185, "learning_rate": 3.335575252431755e-07, "loss": 0.1825, "step": 35656 }, { "epoch": 0.6198091397382189, "grad_norm": 0.8611538539610895, "learning_rate": 3.3353098159868644e-07, "loss": 0.246, "step": 35657 }, { "epoch": 0.6198265222757218, "grad_norm": 1.2967389851858413, "learning_rate": 3.335044384818277e-07, "loss": 0.1957, "step": 35658 }, { "epoch": 0.6198439048132246, "grad_norm": 1.1096001909355204, "learning_rate": 3.3347789589268356e-07, "loss": 0.1507, "step": 35659 }, { "epoch": 0.6198612873507274, "grad_norm": 2.423939073171606, "learning_rate": 3.334513538313378e-07, "loss": 0.3555, "step": 35660 }, { "epoch": 0.6198786698882303, "grad_norm": 1.587762584584708, "learning_rate": 3.3342481229787477e-07, "loss": 0.3422, "step": 35661 }, { "epoch": 0.6198960524257331, "grad_norm": 1.0128694681527488, "learning_rate": 3.333982712923785e-07, "loss": 0.1367, "step": 35662 }, { "epoch": 0.6199134349632359, "grad_norm": 1.059741321138684, "learning_rate": 3.333717308149333e-07, "loss": 0.1977, "step": 35663 }, { "epoch": 0.6199308175007388, "grad_norm": 1.5304941991576981, "learning_rate": 3.33345190865623e-07, "loss": 0.1945, "step": 35664 }, { "epoch": 0.6199482000382416, "grad_norm": 1.0567440756157358, "learning_rate": 3.333186514445321e-07, "loss": 0.203, "step": 35665 }, { "epoch": 0.6199655825757444, "grad_norm": 1.443038946921328, "learning_rate": 3.332921125517445e-07, "loss": 0.1558, "step": 35666 }, { "epoch": 0.6199829651132472, "grad_norm": 1.1423432392252317, "learning_rate": 3.332655741873443e-07, "loss": 0.2579, "step": 35667 }, { "epoch": 0.6200003476507501, "grad_norm": 1.1343300306688875, "learning_rate": 3.332390363514155e-07, "loss": 0.1477, "step": 35668 }, { "epoch": 0.6200177301882529, "grad_norm": 0.9889530609386364, "learning_rate": 3.3321249904404245e-07, "loss": 0.1771, "step": 35669 }, { "epoch": 0.6200351127257557, "grad_norm": 1.6055529543900817, "learning_rate": 3.331859622653091e-07, "loss": 0.2835, "step": 35670 }, { "epoch": 0.6200524952632586, "grad_norm": 1.2383809794669143, "learning_rate": 3.331594260152997e-07, "loss": 0.1703, "step": 35671 }, { "epoch": 0.6200698778007614, "grad_norm": 1.1931143351342322, "learning_rate": 3.3313289029409833e-07, "loss": 0.2565, "step": 35672 }, { "epoch": 0.6200872603382642, "grad_norm": 1.5848989813655348, "learning_rate": 3.33106355101789e-07, "loss": 0.1191, "step": 35673 }, { "epoch": 0.620104642875767, "grad_norm": 1.3140449476394425, "learning_rate": 3.3307982043845575e-07, "loss": 0.1979, "step": 35674 }, { "epoch": 0.6201220254132699, "grad_norm": 0.9001390617520193, "learning_rate": 3.3305328630418297e-07, "loss": 0.1074, "step": 35675 }, { "epoch": 0.6201394079507726, "grad_norm": 1.6584777388389813, "learning_rate": 3.330267526990543e-07, "loss": 0.2318, "step": 35676 }, { "epoch": 0.6201567904882754, "grad_norm": 1.2852969294529637, "learning_rate": 3.330002196231544e-07, "loss": 0.2289, "step": 35677 }, { "epoch": 0.6201741730257783, "grad_norm": 1.1145765249191106, "learning_rate": 3.329736870765671e-07, "loss": 0.1815, "step": 35678 }, { "epoch": 0.6201915555632811, "grad_norm": 1.0955810337359906, "learning_rate": 3.329471550593763e-07, "loss": 0.1697, "step": 35679 }, { "epoch": 0.6202089381007839, "grad_norm": 0.8807885460614333, "learning_rate": 3.329206235716664e-07, "loss": 0.1965, "step": 35680 }, { "epoch": 0.6202263206382868, "grad_norm": 1.584218685676752, "learning_rate": 3.3289409261352135e-07, "loss": 0.1352, "step": 35681 }, { "epoch": 0.6202437031757896, "grad_norm": 1.3612926903265965, "learning_rate": 3.3286756218502523e-07, "loss": 0.2457, "step": 35682 }, { "epoch": 0.6202610857132924, "grad_norm": 1.5379364395291397, "learning_rate": 3.3284103228626214e-07, "loss": 0.2119, "step": 35683 }, { "epoch": 0.6202784682507952, "grad_norm": 1.6102369764292932, "learning_rate": 3.3281450291731614e-07, "loss": 0.2616, "step": 35684 }, { "epoch": 0.6202958507882981, "grad_norm": 2.3115705809707747, "learning_rate": 3.3278797407827163e-07, "loss": 0.1844, "step": 35685 }, { "epoch": 0.6203132333258009, "grad_norm": 3.9321375609372473, "learning_rate": 3.327614457692123e-07, "loss": 0.1997, "step": 35686 }, { "epoch": 0.6203306158633037, "grad_norm": 1.1916222899336775, "learning_rate": 3.3273491799022226e-07, "loss": 0.1935, "step": 35687 }, { "epoch": 0.6203479984008066, "grad_norm": 1.6604673380382688, "learning_rate": 3.3270839074138576e-07, "loss": 0.1797, "step": 35688 }, { "epoch": 0.6203653809383094, "grad_norm": 2.758378785449604, "learning_rate": 3.3268186402278685e-07, "loss": 0.2516, "step": 35689 }, { "epoch": 0.6203827634758122, "grad_norm": 2.8869670300129, "learning_rate": 3.3265533783450945e-07, "loss": 0.1649, "step": 35690 }, { "epoch": 0.6204001460133151, "grad_norm": 1.4080287724821852, "learning_rate": 3.32628812176638e-07, "loss": 0.1967, "step": 35691 }, { "epoch": 0.6204175285508179, "grad_norm": 2.7746833233487878, "learning_rate": 3.3260228704925617e-07, "loss": 0.2036, "step": 35692 }, { "epoch": 0.6204349110883207, "grad_norm": 2.336049065169098, "learning_rate": 3.325757624524481e-07, "loss": 0.2127, "step": 35693 }, { "epoch": 0.6204522936258235, "grad_norm": 2.0587436406345154, "learning_rate": 3.325492383862981e-07, "loss": 0.1582, "step": 35694 }, { "epoch": 0.6204696761633263, "grad_norm": 1.4263276314446973, "learning_rate": 3.325227148508899e-07, "loss": 0.13, "step": 35695 }, { "epoch": 0.6204870587008291, "grad_norm": 1.594768779981981, "learning_rate": 3.3249619184630793e-07, "loss": 0.1927, "step": 35696 }, { "epoch": 0.6205044412383319, "grad_norm": 1.6892630892005955, "learning_rate": 3.324696693726362e-07, "loss": 0.1839, "step": 35697 }, { "epoch": 0.6205218237758348, "grad_norm": 2.132950504219935, "learning_rate": 3.324431474299586e-07, "loss": 0.1868, "step": 35698 }, { "epoch": 0.6205392063133376, "grad_norm": 2.043971173100216, "learning_rate": 3.3241662601835907e-07, "loss": 0.2299, "step": 35699 }, { "epoch": 0.6205565888508404, "grad_norm": 1.4508357187482275, "learning_rate": 3.32390105137922e-07, "loss": 0.1501, "step": 35700 }, { "epoch": 0.6205739713883432, "grad_norm": 1.1769233872171254, "learning_rate": 3.3236358478873116e-07, "loss": 0.1115, "step": 35701 }, { "epoch": 0.6205913539258461, "grad_norm": 1.0787473275570572, "learning_rate": 3.3233706497087087e-07, "loss": 0.205, "step": 35702 }, { "epoch": 0.6206087364633489, "grad_norm": 1.3423517884866507, "learning_rate": 3.32310545684425e-07, "loss": 0.1261, "step": 35703 }, { "epoch": 0.6206261190008517, "grad_norm": 1.3908458892582982, "learning_rate": 3.322840269294779e-07, "loss": 0.2017, "step": 35704 }, { "epoch": 0.6206435015383546, "grad_norm": 1.2684374386106478, "learning_rate": 3.3225750870611324e-07, "loss": 0.1825, "step": 35705 }, { "epoch": 0.6206608840758574, "grad_norm": 1.1711077471891518, "learning_rate": 3.3223099101441523e-07, "loss": 0.1871, "step": 35706 }, { "epoch": 0.6206782666133602, "grad_norm": 1.8798735147114558, "learning_rate": 3.322044738544678e-07, "loss": 0.2938, "step": 35707 }, { "epoch": 0.6206956491508631, "grad_norm": 1.6612546879146626, "learning_rate": 3.321779572263552e-07, "loss": 0.1988, "step": 35708 }, { "epoch": 0.6207130316883659, "grad_norm": 1.1436814925903105, "learning_rate": 3.3215144113016123e-07, "loss": 0.1943, "step": 35709 }, { "epoch": 0.6207304142258687, "grad_norm": 1.193645195956502, "learning_rate": 3.321249255659704e-07, "loss": 0.1993, "step": 35710 }, { "epoch": 0.6207477967633716, "grad_norm": 1.9296579273417773, "learning_rate": 3.3209841053386636e-07, "loss": 0.1804, "step": 35711 }, { "epoch": 0.6207651793008744, "grad_norm": 1.7174104753792243, "learning_rate": 3.32071896033933e-07, "loss": 0.1924, "step": 35712 }, { "epoch": 0.6207825618383772, "grad_norm": 1.5334556649528592, "learning_rate": 3.3204538206625475e-07, "loss": 0.2254, "step": 35713 }, { "epoch": 0.62079994437588, "grad_norm": 1.3786858828605337, "learning_rate": 3.3201886863091556e-07, "loss": 0.2576, "step": 35714 }, { "epoch": 0.6208173269133828, "grad_norm": 2.1370947826808826, "learning_rate": 3.3199235572799916e-07, "loss": 0.2611, "step": 35715 }, { "epoch": 0.6208347094508856, "grad_norm": 1.8522186344690683, "learning_rate": 3.319658433575901e-07, "loss": 0.189, "step": 35716 }, { "epoch": 0.6208520919883884, "grad_norm": 1.140950869041486, "learning_rate": 3.3193933151977203e-07, "loss": 0.1245, "step": 35717 }, { "epoch": 0.6208694745258913, "grad_norm": 3.246335972754322, "learning_rate": 3.3191282021462896e-07, "loss": 0.2837, "step": 35718 }, { "epoch": 0.6208868570633941, "grad_norm": 4.327401674317053, "learning_rate": 3.318863094422451e-07, "loss": 0.3195, "step": 35719 }, { "epoch": 0.6209042396008969, "grad_norm": 2.0245017712515456, "learning_rate": 3.318597992027044e-07, "loss": 0.1591, "step": 35720 }, { "epoch": 0.6209216221383997, "grad_norm": 0.9818422847111916, "learning_rate": 3.318332894960909e-07, "loss": 0.1616, "step": 35721 }, { "epoch": 0.6209390046759026, "grad_norm": 1.5114341808245513, "learning_rate": 3.3180678032248865e-07, "loss": 0.1756, "step": 35722 }, { "epoch": 0.6209563872134054, "grad_norm": 0.9757877557440965, "learning_rate": 3.317802716819818e-07, "loss": 0.1965, "step": 35723 }, { "epoch": 0.6209737697509082, "grad_norm": 1.9110243938911677, "learning_rate": 3.31753763574654e-07, "loss": 0.2181, "step": 35724 }, { "epoch": 0.6209911522884111, "grad_norm": 1.5953399108447783, "learning_rate": 3.3172725600058955e-07, "loss": 0.3106, "step": 35725 }, { "epoch": 0.6210085348259139, "grad_norm": 1.4460374422666669, "learning_rate": 3.3170074895987235e-07, "loss": 0.155, "step": 35726 }, { "epoch": 0.6210259173634167, "grad_norm": 1.3433584435786519, "learning_rate": 3.3167424245258647e-07, "loss": 0.1234, "step": 35727 }, { "epoch": 0.6210432999009196, "grad_norm": 1.6617868714447808, "learning_rate": 3.316477364788159e-07, "loss": 0.2275, "step": 35728 }, { "epoch": 0.6210606824384224, "grad_norm": 1.3641077874514922, "learning_rate": 3.3162123103864494e-07, "loss": 0.1392, "step": 35729 }, { "epoch": 0.6210780649759252, "grad_norm": 1.220017605637024, "learning_rate": 3.31594726132157e-07, "loss": 0.144, "step": 35730 }, { "epoch": 0.621095447513428, "grad_norm": 1.9596590474455469, "learning_rate": 3.315682217594366e-07, "loss": 0.2041, "step": 35731 }, { "epoch": 0.6211128300509309, "grad_norm": 7.553503292606688, "learning_rate": 3.315417179205674e-07, "loss": 0.2882, "step": 35732 }, { "epoch": 0.6211302125884337, "grad_norm": 1.6112237070329007, "learning_rate": 3.315152146156337e-07, "loss": 0.2538, "step": 35733 }, { "epoch": 0.6211475951259365, "grad_norm": 2.49985064887518, "learning_rate": 3.3148871184471927e-07, "loss": 0.2404, "step": 35734 }, { "epoch": 0.6211649776634393, "grad_norm": 1.617827030374575, "learning_rate": 3.3146220960790845e-07, "loss": 0.2104, "step": 35735 }, { "epoch": 0.6211823602009421, "grad_norm": 1.0088641385849784, "learning_rate": 3.3143570790528483e-07, "loss": 0.1553, "step": 35736 }, { "epoch": 0.6211997427384449, "grad_norm": 1.800019327486403, "learning_rate": 3.3140920673693265e-07, "loss": 0.155, "step": 35737 }, { "epoch": 0.6212171252759477, "grad_norm": 1.2204806722898296, "learning_rate": 3.3138270610293575e-07, "loss": 0.1407, "step": 35738 }, { "epoch": 0.6212345078134506, "grad_norm": 2.6491695327620697, "learning_rate": 3.313562060033782e-07, "loss": 0.3579, "step": 35739 }, { "epoch": 0.6212518903509534, "grad_norm": 2.4047322574906804, "learning_rate": 3.3132970643834403e-07, "loss": 0.252, "step": 35740 }, { "epoch": 0.6212692728884562, "grad_norm": 1.6935007462066491, "learning_rate": 3.313032074079173e-07, "loss": 0.1732, "step": 35741 }, { "epoch": 0.6212866554259591, "grad_norm": 1.6101012075271686, "learning_rate": 3.3127670891218197e-07, "loss": 0.1204, "step": 35742 }, { "epoch": 0.6213040379634619, "grad_norm": 1.4243660444041255, "learning_rate": 3.3125021095122173e-07, "loss": 0.1362, "step": 35743 }, { "epoch": 0.6213214205009647, "grad_norm": 1.3854641863271693, "learning_rate": 3.31223713525121e-07, "loss": 0.1262, "step": 35744 }, { "epoch": 0.6213388030384676, "grad_norm": 1.7312688239618985, "learning_rate": 3.311972166339635e-07, "loss": 0.224, "step": 35745 }, { "epoch": 0.6213561855759704, "grad_norm": 2.4571556177637928, "learning_rate": 3.311707202778332e-07, "loss": 0.1189, "step": 35746 }, { "epoch": 0.6213735681134732, "grad_norm": 1.5442117864655696, "learning_rate": 3.3114422445681425e-07, "loss": 0.1602, "step": 35747 }, { "epoch": 0.621390950650976, "grad_norm": 1.2817690855457249, "learning_rate": 3.311177291709907e-07, "loss": 0.1717, "step": 35748 }, { "epoch": 0.6214083331884789, "grad_norm": 2.6689898832582464, "learning_rate": 3.3109123442044604e-07, "loss": 0.1996, "step": 35749 }, { "epoch": 0.6214257157259817, "grad_norm": 1.1371197388355025, "learning_rate": 3.310647402052648e-07, "loss": 0.2334, "step": 35750 }, { "epoch": 0.6214430982634845, "grad_norm": 1.4150773873758669, "learning_rate": 3.310382465255306e-07, "loss": 0.1266, "step": 35751 }, { "epoch": 0.6214604808009874, "grad_norm": 1.778600700631304, "learning_rate": 3.310117533813276e-07, "loss": 0.1642, "step": 35752 }, { "epoch": 0.6214778633384902, "grad_norm": 1.1521506897251226, "learning_rate": 3.3098526077273967e-07, "loss": 0.2082, "step": 35753 }, { "epoch": 0.621495245875993, "grad_norm": 1.248857039340252, "learning_rate": 3.309587686998511e-07, "loss": 0.1805, "step": 35754 }, { "epoch": 0.6215126284134957, "grad_norm": 1.0193044614844846, "learning_rate": 3.309322771627453e-07, "loss": 0.1508, "step": 35755 }, { "epoch": 0.6215300109509986, "grad_norm": 2.8822714944552477, "learning_rate": 3.309057861615066e-07, "loss": 0.1562, "step": 35756 }, { "epoch": 0.6215473934885014, "grad_norm": 1.9254213978412968, "learning_rate": 3.3087929569621883e-07, "loss": 0.1459, "step": 35757 }, { "epoch": 0.6215647760260042, "grad_norm": 1.718852503496921, "learning_rate": 3.308528057669661e-07, "loss": 0.1583, "step": 35758 }, { "epoch": 0.6215821585635071, "grad_norm": 1.1674603633921423, "learning_rate": 3.3082631637383204e-07, "loss": 0.1526, "step": 35759 }, { "epoch": 0.6215995411010099, "grad_norm": 1.1636712036622625, "learning_rate": 3.3079982751690104e-07, "loss": 0.1994, "step": 35760 }, { "epoch": 0.6216169236385127, "grad_norm": 2.341653005725636, "learning_rate": 3.3077333919625704e-07, "loss": 0.2422, "step": 35761 }, { "epoch": 0.6216343061760156, "grad_norm": 1.3285903505394745, "learning_rate": 3.307468514119836e-07, "loss": 0.1659, "step": 35762 }, { "epoch": 0.6216516887135184, "grad_norm": 1.521843208846933, "learning_rate": 3.307203641641648e-07, "loss": 0.1865, "step": 35763 }, { "epoch": 0.6216690712510212, "grad_norm": 1.6015613123453905, "learning_rate": 3.306938774528849e-07, "loss": 0.1591, "step": 35764 }, { "epoch": 0.621686453788524, "grad_norm": 1.2713150662758477, "learning_rate": 3.306673912782274e-07, "loss": 0.17, "step": 35765 }, { "epoch": 0.6217038363260269, "grad_norm": 1.9754675586877763, "learning_rate": 3.306409056402767e-07, "loss": 0.1311, "step": 35766 }, { "epoch": 0.6217212188635297, "grad_norm": 1.7011120941276037, "learning_rate": 3.306144205391166e-07, "loss": 0.2846, "step": 35767 }, { "epoch": 0.6217386014010325, "grad_norm": 1.6214567223645793, "learning_rate": 3.305879359748307e-07, "loss": 0.1906, "step": 35768 }, { "epoch": 0.6217559839385354, "grad_norm": 2.091637738216578, "learning_rate": 3.305614519475034e-07, "loss": 0.2233, "step": 35769 }, { "epoch": 0.6217733664760382, "grad_norm": 1.3493781816839825, "learning_rate": 3.305349684572184e-07, "loss": 0.1813, "step": 35770 }, { "epoch": 0.621790749013541, "grad_norm": 1.5165829604228362, "learning_rate": 3.3050848550405964e-07, "loss": 0.1163, "step": 35771 }, { "epoch": 0.6218081315510439, "grad_norm": 1.9507662143523914, "learning_rate": 3.3048200308811127e-07, "loss": 0.133, "step": 35772 }, { "epoch": 0.6218255140885467, "grad_norm": 1.034192536813533, "learning_rate": 3.304555212094571e-07, "loss": 0.1196, "step": 35773 }, { "epoch": 0.6218428966260495, "grad_norm": 2.8062712726448193, "learning_rate": 3.3042903986818085e-07, "loss": 0.2317, "step": 35774 }, { "epoch": 0.6218602791635522, "grad_norm": 1.91500974288063, "learning_rate": 3.3040255906436684e-07, "loss": 0.1922, "step": 35775 }, { "epoch": 0.6218776617010551, "grad_norm": 3.2602107523901154, "learning_rate": 3.3037607879809857e-07, "loss": 0.2169, "step": 35776 }, { "epoch": 0.6218950442385579, "grad_norm": 3.9661108030201127, "learning_rate": 3.303495990694605e-07, "loss": 0.1464, "step": 35777 }, { "epoch": 0.6219124267760607, "grad_norm": 1.4473513690582804, "learning_rate": 3.303231198785361e-07, "loss": 0.2143, "step": 35778 }, { "epoch": 0.6219298093135636, "grad_norm": 1.656783054430745, "learning_rate": 3.302966412254097e-07, "loss": 0.1929, "step": 35779 }, { "epoch": 0.6219471918510664, "grad_norm": 1.624440264080845, "learning_rate": 3.3027016311016464e-07, "loss": 0.1517, "step": 35780 }, { "epoch": 0.6219645743885692, "grad_norm": 1.4718615455072523, "learning_rate": 3.302436855328854e-07, "loss": 0.1366, "step": 35781 }, { "epoch": 0.621981956926072, "grad_norm": 1.703332495379841, "learning_rate": 3.3021720849365555e-07, "loss": 0.1703, "step": 35782 }, { "epoch": 0.6219993394635749, "grad_norm": 1.335616668006001, "learning_rate": 3.301907319925593e-07, "loss": 0.1615, "step": 35783 }, { "epoch": 0.6220167220010777, "grad_norm": 1.4821024746746043, "learning_rate": 3.3016425602968027e-07, "loss": 0.159, "step": 35784 }, { "epoch": 0.6220341045385805, "grad_norm": 1.932086251267687, "learning_rate": 3.3013778060510267e-07, "loss": 0.1833, "step": 35785 }, { "epoch": 0.6220514870760834, "grad_norm": 1.6704827053385827, "learning_rate": 3.3011130571891043e-07, "loss": 0.2401, "step": 35786 }, { "epoch": 0.6220688696135862, "grad_norm": 1.597604841073688, "learning_rate": 3.3008483137118715e-07, "loss": 0.1985, "step": 35787 }, { "epoch": 0.622086252151089, "grad_norm": 1.872339685776568, "learning_rate": 3.300583575620168e-07, "loss": 0.1696, "step": 35788 }, { "epoch": 0.6221036346885919, "grad_norm": 2.2031971916658994, "learning_rate": 3.3003188429148347e-07, "loss": 0.1756, "step": 35789 }, { "epoch": 0.6221210172260947, "grad_norm": 1.569545493654938, "learning_rate": 3.3000541155967094e-07, "loss": 0.1129, "step": 35790 }, { "epoch": 0.6221383997635975, "grad_norm": 1.8697031490491824, "learning_rate": 3.299789393666632e-07, "loss": 0.1554, "step": 35791 }, { "epoch": 0.6221557823011004, "grad_norm": 1.6563756141471304, "learning_rate": 3.299524677125443e-07, "loss": 0.2661, "step": 35792 }, { "epoch": 0.6221731648386032, "grad_norm": 1.7814437918267627, "learning_rate": 3.2992599659739786e-07, "loss": 0.2203, "step": 35793 }, { "epoch": 0.622190547376106, "grad_norm": 0.9970234735966974, "learning_rate": 3.298995260213078e-07, "loss": 0.1901, "step": 35794 }, { "epoch": 0.6222079299136087, "grad_norm": 1.108589768838349, "learning_rate": 3.298730559843581e-07, "loss": 0.1432, "step": 35795 }, { "epoch": 0.6222253124511116, "grad_norm": 1.767942724047656, "learning_rate": 3.298465864866326e-07, "loss": 0.2011, "step": 35796 }, { "epoch": 0.6222426949886144, "grad_norm": 1.7103758774494875, "learning_rate": 3.298201175282154e-07, "loss": 0.1888, "step": 35797 }, { "epoch": 0.6222600775261172, "grad_norm": 1.9370493406547116, "learning_rate": 3.297936491091903e-07, "loss": 0.2762, "step": 35798 }, { "epoch": 0.6222774600636201, "grad_norm": 1.3513262099634058, "learning_rate": 3.2976718122964097e-07, "loss": 0.1275, "step": 35799 }, { "epoch": 0.6222948426011229, "grad_norm": 1.1906794966649064, "learning_rate": 3.2974071388965165e-07, "loss": 0.2007, "step": 35800 }, { "epoch": 0.6223122251386257, "grad_norm": 1.312097510287849, "learning_rate": 3.2971424708930587e-07, "loss": 0.2266, "step": 35801 }, { "epoch": 0.6223296076761285, "grad_norm": 1.28769191985031, "learning_rate": 3.296877808286879e-07, "loss": 0.1432, "step": 35802 }, { "epoch": 0.6223469902136314, "grad_norm": 2.3004842109166486, "learning_rate": 3.296613151078813e-07, "loss": 0.2149, "step": 35803 }, { "epoch": 0.6223643727511342, "grad_norm": 2.653252141301045, "learning_rate": 3.2963484992696997e-07, "loss": 0.1823, "step": 35804 }, { "epoch": 0.622381755288637, "grad_norm": 1.2940172109962578, "learning_rate": 3.296083852860382e-07, "loss": 0.1644, "step": 35805 }, { "epoch": 0.6223991378261399, "grad_norm": 2.047384553724325, "learning_rate": 3.295819211851695e-07, "loss": 0.2522, "step": 35806 }, { "epoch": 0.6224165203636427, "grad_norm": 1.5375393944123408, "learning_rate": 3.295554576244477e-07, "loss": 0.1882, "step": 35807 }, { "epoch": 0.6224339029011455, "grad_norm": 2.695835141120022, "learning_rate": 3.295289946039569e-07, "loss": 0.1772, "step": 35808 }, { "epoch": 0.6224512854386484, "grad_norm": 1.289008322539984, "learning_rate": 3.295025321237807e-07, "loss": 0.151, "step": 35809 }, { "epoch": 0.6224686679761512, "grad_norm": 1.5189754140023923, "learning_rate": 3.2947607018400334e-07, "loss": 0.1729, "step": 35810 }, { "epoch": 0.622486050513654, "grad_norm": 1.8919585444300655, "learning_rate": 3.2944960878470856e-07, "loss": 0.1803, "step": 35811 }, { "epoch": 0.6225034330511569, "grad_norm": 1.3112708662941934, "learning_rate": 3.2942314792598015e-07, "loss": 0.1864, "step": 35812 }, { "epoch": 0.6225208155886597, "grad_norm": 1.2826699756626598, "learning_rate": 3.2939668760790186e-07, "loss": 0.1353, "step": 35813 }, { "epoch": 0.6225381981261625, "grad_norm": 1.3313907957389737, "learning_rate": 3.2937022783055775e-07, "loss": 0.1704, "step": 35814 }, { "epoch": 0.6225555806636652, "grad_norm": 3.1147208598813263, "learning_rate": 3.293437685940316e-07, "loss": 0.1982, "step": 35815 }, { "epoch": 0.6225729632011681, "grad_norm": 1.250457862305157, "learning_rate": 3.293173098984073e-07, "loss": 0.1268, "step": 35816 }, { "epoch": 0.6225903457386709, "grad_norm": 1.4532679768284724, "learning_rate": 3.2929085174376896e-07, "loss": 0.3085, "step": 35817 }, { "epoch": 0.6226077282761737, "grad_norm": 2.177841418342156, "learning_rate": 3.2926439413020006e-07, "loss": 0.1301, "step": 35818 }, { "epoch": 0.6226251108136766, "grad_norm": 1.5222116740052067, "learning_rate": 3.292379370577845e-07, "loss": 0.1975, "step": 35819 }, { "epoch": 0.6226424933511794, "grad_norm": 1.487086835224584, "learning_rate": 3.2921148052660636e-07, "loss": 0.1352, "step": 35820 }, { "epoch": 0.6226598758886822, "grad_norm": 1.3481379657971462, "learning_rate": 3.291850245367492e-07, "loss": 0.2738, "step": 35821 }, { "epoch": 0.622677258426185, "grad_norm": 1.862749201385247, "learning_rate": 3.291585690882972e-07, "loss": 0.2262, "step": 35822 }, { "epoch": 0.6226946409636879, "grad_norm": 1.2066077911688682, "learning_rate": 3.2913211418133395e-07, "loss": 0.1868, "step": 35823 }, { "epoch": 0.6227120235011907, "grad_norm": 1.295208539795191, "learning_rate": 3.2910565981594365e-07, "loss": 0.2166, "step": 35824 }, { "epoch": 0.6227294060386935, "grad_norm": 2.714530864079461, "learning_rate": 3.2907920599220974e-07, "loss": 0.2736, "step": 35825 }, { "epoch": 0.6227467885761964, "grad_norm": 1.7602801057509099, "learning_rate": 3.2905275271021624e-07, "loss": 0.1341, "step": 35826 }, { "epoch": 0.6227641711136992, "grad_norm": 2.738149491023398, "learning_rate": 3.2902629997004696e-07, "loss": 0.2256, "step": 35827 }, { "epoch": 0.622781553651202, "grad_norm": 1.3517026137698445, "learning_rate": 3.289998477717858e-07, "loss": 0.1745, "step": 35828 }, { "epoch": 0.6227989361887049, "grad_norm": 1.4300671582131286, "learning_rate": 3.2897339611551645e-07, "loss": 0.2042, "step": 35829 }, { "epoch": 0.6228163187262077, "grad_norm": 1.2043655051790705, "learning_rate": 3.2894694500132314e-07, "loss": 0.2061, "step": 35830 }, { "epoch": 0.6228337012637105, "grad_norm": 1.0359948597288922, "learning_rate": 3.289204944292893e-07, "loss": 0.1416, "step": 35831 }, { "epoch": 0.6228510838012133, "grad_norm": 1.5769336112162806, "learning_rate": 3.288940443994988e-07, "loss": 0.2405, "step": 35832 }, { "epoch": 0.6228684663387162, "grad_norm": 2.596500498753861, "learning_rate": 3.288675949120357e-07, "loss": 0.1185, "step": 35833 }, { "epoch": 0.6228858488762189, "grad_norm": 1.4253960605134508, "learning_rate": 3.288411459669837e-07, "loss": 0.1866, "step": 35834 }, { "epoch": 0.6229032314137217, "grad_norm": 2.3639335732535596, "learning_rate": 3.288146975644266e-07, "loss": 0.207, "step": 35835 }, { "epoch": 0.6229206139512246, "grad_norm": 1.5117996957772095, "learning_rate": 3.287882497044484e-07, "loss": 0.1472, "step": 35836 }, { "epoch": 0.6229379964887274, "grad_norm": 1.310958029660036, "learning_rate": 3.287618023871327e-07, "loss": 0.1145, "step": 35837 }, { "epoch": 0.6229553790262302, "grad_norm": 5.789191796617554, "learning_rate": 3.287353556125634e-07, "loss": 0.2402, "step": 35838 }, { "epoch": 0.622972761563733, "grad_norm": 1.5306662907898745, "learning_rate": 3.2870890938082437e-07, "loss": 0.1943, "step": 35839 }, { "epoch": 0.6229901441012359, "grad_norm": 1.3265374876898903, "learning_rate": 3.2868246369199933e-07, "loss": 0.1983, "step": 35840 }, { "epoch": 0.6230075266387387, "grad_norm": 1.1708324807464057, "learning_rate": 3.286560185461723e-07, "loss": 0.2369, "step": 35841 }, { "epoch": 0.6230249091762415, "grad_norm": 3.327280900771887, "learning_rate": 3.286295739434268e-07, "loss": 0.2051, "step": 35842 }, { "epoch": 0.6230422917137444, "grad_norm": 1.446919047319173, "learning_rate": 3.2860312988384726e-07, "loss": 0.1792, "step": 35843 }, { "epoch": 0.6230596742512472, "grad_norm": 1.4014308236049426, "learning_rate": 3.285766863675167e-07, "loss": 0.1649, "step": 35844 }, { "epoch": 0.62307705678875, "grad_norm": 2.336903306960583, "learning_rate": 3.285502433945194e-07, "loss": 0.1811, "step": 35845 }, { "epoch": 0.6230944393262529, "grad_norm": 1.9146578315231346, "learning_rate": 3.28523800964939e-07, "loss": 0.1853, "step": 35846 }, { "epoch": 0.6231118218637557, "grad_norm": 1.2885786609333418, "learning_rate": 3.2849735907885947e-07, "loss": 0.2145, "step": 35847 }, { "epoch": 0.6231292044012585, "grad_norm": 2.246530337190038, "learning_rate": 3.2847091773636435e-07, "loss": 0.2626, "step": 35848 }, { "epoch": 0.6231465869387613, "grad_norm": 1.7616305629672904, "learning_rate": 3.2844447693753797e-07, "loss": 0.2074, "step": 35849 }, { "epoch": 0.6231639694762642, "grad_norm": 2.758588021463662, "learning_rate": 3.284180366824636e-07, "loss": 0.3044, "step": 35850 }, { "epoch": 0.623181352013767, "grad_norm": 1.1611932581172408, "learning_rate": 3.283915969712252e-07, "loss": 0.2766, "step": 35851 }, { "epoch": 0.6231987345512698, "grad_norm": 1.2658943804050125, "learning_rate": 3.283651578039066e-07, "loss": 0.1897, "step": 35852 }, { "epoch": 0.6232161170887727, "grad_norm": 1.4176559065491054, "learning_rate": 3.2833871918059166e-07, "loss": 0.2307, "step": 35853 }, { "epoch": 0.6232334996262754, "grad_norm": 1.7738487522576396, "learning_rate": 3.2831228110136403e-07, "loss": 0.1984, "step": 35854 }, { "epoch": 0.6232508821637782, "grad_norm": 1.4896235552676762, "learning_rate": 3.2828584356630785e-07, "loss": 0.2712, "step": 35855 }, { "epoch": 0.623268264701281, "grad_norm": 2.445296518610747, "learning_rate": 3.282594065755065e-07, "loss": 0.1746, "step": 35856 }, { "epoch": 0.6232856472387839, "grad_norm": 1.3074708700482134, "learning_rate": 3.282329701290438e-07, "loss": 0.2232, "step": 35857 }, { "epoch": 0.6233030297762867, "grad_norm": 3.0448139127746954, "learning_rate": 3.282065342270039e-07, "loss": 0.375, "step": 35858 }, { "epoch": 0.6233204123137895, "grad_norm": 2.5979607875010347, "learning_rate": 3.2818009886947027e-07, "loss": 0.2489, "step": 35859 }, { "epoch": 0.6233377948512924, "grad_norm": 2.1996667856615306, "learning_rate": 3.281536640565268e-07, "loss": 0.2346, "step": 35860 }, { "epoch": 0.6233551773887952, "grad_norm": 1.2540906100082685, "learning_rate": 3.281272297882573e-07, "loss": 0.1745, "step": 35861 }, { "epoch": 0.623372559926298, "grad_norm": 1.5294598007504934, "learning_rate": 3.2810079606474567e-07, "loss": 0.1626, "step": 35862 }, { "epoch": 0.6233899424638009, "grad_norm": 1.8640903303734033, "learning_rate": 3.2807436288607534e-07, "loss": 0.2636, "step": 35863 }, { "epoch": 0.6234073250013037, "grad_norm": 1.3726749580654865, "learning_rate": 3.2804793025233036e-07, "loss": 0.1917, "step": 35864 }, { "epoch": 0.6234247075388065, "grad_norm": 1.408543196644089, "learning_rate": 3.280214981635944e-07, "loss": 0.2114, "step": 35865 }, { "epoch": 0.6234420900763094, "grad_norm": 1.6721835402329417, "learning_rate": 3.279950666199514e-07, "loss": 0.2365, "step": 35866 }, { "epoch": 0.6234594726138122, "grad_norm": 1.5933305402145963, "learning_rate": 3.27968635621485e-07, "loss": 0.2644, "step": 35867 }, { "epoch": 0.623476855151315, "grad_norm": 1.237170291925658, "learning_rate": 3.279422051682791e-07, "loss": 0.1996, "step": 35868 }, { "epoch": 0.6234942376888178, "grad_norm": 2.0669963837725085, "learning_rate": 3.279157752604171e-07, "loss": 0.1929, "step": 35869 }, { "epoch": 0.6235116202263207, "grad_norm": 1.4378628309058732, "learning_rate": 3.2788934589798326e-07, "loss": 0.2475, "step": 35870 }, { "epoch": 0.6235290027638235, "grad_norm": 1.5608943649306897, "learning_rate": 3.27862917081061e-07, "loss": 0.1949, "step": 35871 }, { "epoch": 0.6235463853013263, "grad_norm": 1.758975671710271, "learning_rate": 3.2783648880973435e-07, "loss": 0.157, "step": 35872 }, { "epoch": 0.6235637678388292, "grad_norm": 1.3349237274623977, "learning_rate": 3.2781006108408674e-07, "loss": 0.1261, "step": 35873 }, { "epoch": 0.6235811503763319, "grad_norm": 1.4471963329336928, "learning_rate": 3.2778363390420257e-07, "loss": 0.1099, "step": 35874 }, { "epoch": 0.6235985329138347, "grad_norm": 1.4790917563763022, "learning_rate": 3.2775720727016476e-07, "loss": 0.2202, "step": 35875 }, { "epoch": 0.6236159154513375, "grad_norm": 1.4865383265657521, "learning_rate": 3.2773078118205767e-07, "loss": 0.1963, "step": 35876 }, { "epoch": 0.6236332979888404, "grad_norm": 1.083494485252146, "learning_rate": 3.277043556399647e-07, "loss": 0.1312, "step": 35877 }, { "epoch": 0.6236506805263432, "grad_norm": 1.2500739206266709, "learning_rate": 3.276779306439699e-07, "loss": 0.1521, "step": 35878 }, { "epoch": 0.623668063063846, "grad_norm": 1.5939677734400968, "learning_rate": 3.2765150619415685e-07, "loss": 0.2091, "step": 35879 }, { "epoch": 0.6236854456013489, "grad_norm": 1.503244294174795, "learning_rate": 3.2762508229060936e-07, "loss": 0.1599, "step": 35880 }, { "epoch": 0.6237028281388517, "grad_norm": 1.0771188653248576, "learning_rate": 3.275986589334113e-07, "loss": 0.1218, "step": 35881 }, { "epoch": 0.6237202106763545, "grad_norm": 2.243875449285415, "learning_rate": 3.2757223612264627e-07, "loss": 0.1785, "step": 35882 }, { "epoch": 0.6237375932138574, "grad_norm": 1.7141211096777482, "learning_rate": 3.2754581385839785e-07, "loss": 0.2429, "step": 35883 }, { "epoch": 0.6237549757513602, "grad_norm": 1.2267623966271946, "learning_rate": 3.2751939214075017e-07, "loss": 0.2545, "step": 35884 }, { "epoch": 0.623772358288863, "grad_norm": 1.6298431450276782, "learning_rate": 3.2749297096978666e-07, "loss": 0.1447, "step": 35885 }, { "epoch": 0.6237897408263658, "grad_norm": 2.1621494590596457, "learning_rate": 3.2746655034559123e-07, "loss": 0.1703, "step": 35886 }, { "epoch": 0.6238071233638687, "grad_norm": 1.0673844725294237, "learning_rate": 3.274401302682478e-07, "loss": 0.2189, "step": 35887 }, { "epoch": 0.6238245059013715, "grad_norm": 5.055369843981752, "learning_rate": 3.2741371073783955e-07, "loss": 0.2556, "step": 35888 }, { "epoch": 0.6238418884388743, "grad_norm": 2.5173077569059648, "learning_rate": 3.2738729175445075e-07, "loss": 0.226, "step": 35889 }, { "epoch": 0.6238592709763772, "grad_norm": 1.651121151003559, "learning_rate": 3.273608733181648e-07, "loss": 0.3048, "step": 35890 }, { "epoch": 0.62387665351388, "grad_norm": 1.97579401085476, "learning_rate": 3.2733445542906567e-07, "loss": 0.2015, "step": 35891 }, { "epoch": 0.6238940360513828, "grad_norm": 1.7119571991647804, "learning_rate": 3.2730803808723705e-07, "loss": 0.3253, "step": 35892 }, { "epoch": 0.6239114185888857, "grad_norm": 1.080949673503273, "learning_rate": 3.272816212927627e-07, "loss": 0.1731, "step": 35893 }, { "epoch": 0.6239288011263884, "grad_norm": 1.4813577633261488, "learning_rate": 3.2725520504572607e-07, "loss": 0.1822, "step": 35894 }, { "epoch": 0.6239461836638912, "grad_norm": 1.306294382030428, "learning_rate": 3.2722878934621117e-07, "loss": 0.1766, "step": 35895 }, { "epoch": 0.623963566201394, "grad_norm": 2.6014745441353653, "learning_rate": 3.272023741943015e-07, "loss": 0.2047, "step": 35896 }, { "epoch": 0.6239809487388969, "grad_norm": 1.2163306675014625, "learning_rate": 3.2717595959008116e-07, "loss": 0.1701, "step": 35897 }, { "epoch": 0.6239983312763997, "grad_norm": 1.2575822420294616, "learning_rate": 3.2714954553363346e-07, "loss": 0.1599, "step": 35898 }, { "epoch": 0.6240157138139025, "grad_norm": 1.4971951033134998, "learning_rate": 3.271231320250426e-07, "loss": 0.2775, "step": 35899 }, { "epoch": 0.6240330963514054, "grad_norm": 1.279594026619484, "learning_rate": 3.270967190643917e-07, "loss": 0.1479, "step": 35900 }, { "epoch": 0.6240504788889082, "grad_norm": 2.0598866483048384, "learning_rate": 3.2707030665176483e-07, "loss": 0.2054, "step": 35901 }, { "epoch": 0.624067861426411, "grad_norm": 1.9279311226838618, "learning_rate": 3.270438947872456e-07, "loss": 0.1267, "step": 35902 }, { "epoch": 0.6240852439639138, "grad_norm": 2.1536547016448333, "learning_rate": 3.2701748347091787e-07, "loss": 0.2555, "step": 35903 }, { "epoch": 0.6241026265014167, "grad_norm": 1.8437158314882238, "learning_rate": 3.269910727028652e-07, "loss": 0.1949, "step": 35904 }, { "epoch": 0.6241200090389195, "grad_norm": 1.3422010774405198, "learning_rate": 3.2696466248317134e-07, "loss": 0.2346, "step": 35905 }, { "epoch": 0.6241373915764223, "grad_norm": 1.067487320375451, "learning_rate": 3.269382528119202e-07, "loss": 0.1732, "step": 35906 }, { "epoch": 0.6241547741139252, "grad_norm": 1.2434779746368771, "learning_rate": 3.269118436891952e-07, "loss": 0.2462, "step": 35907 }, { "epoch": 0.624172156651428, "grad_norm": 1.3159384398694034, "learning_rate": 3.2688543511508006e-07, "loss": 0.2286, "step": 35908 }, { "epoch": 0.6241895391889308, "grad_norm": 1.5084059831316081, "learning_rate": 3.268590270896586e-07, "loss": 0.1823, "step": 35909 }, { "epoch": 0.6242069217264337, "grad_norm": 1.6382041733999442, "learning_rate": 3.2683261961301447e-07, "loss": 0.1631, "step": 35910 }, { "epoch": 0.6242243042639365, "grad_norm": 1.2766139631402984, "learning_rate": 3.268062126852314e-07, "loss": 0.1682, "step": 35911 }, { "epoch": 0.6242416868014393, "grad_norm": 1.813317541725406, "learning_rate": 3.2677980630639326e-07, "loss": 0.1741, "step": 35912 }, { "epoch": 0.6242590693389422, "grad_norm": 2.1230573119840854, "learning_rate": 3.267534004765833e-07, "loss": 0.2111, "step": 35913 }, { "epoch": 0.6242764518764449, "grad_norm": 1.4348475797659164, "learning_rate": 3.267269951958856e-07, "loss": 0.2417, "step": 35914 }, { "epoch": 0.6242938344139477, "grad_norm": 2.1571699856596838, "learning_rate": 3.2670059046438375e-07, "loss": 0.2041, "step": 35915 }, { "epoch": 0.6243112169514505, "grad_norm": 1.4129606495116613, "learning_rate": 3.266741862821613e-07, "loss": 0.1488, "step": 35916 }, { "epoch": 0.6243285994889534, "grad_norm": 1.47129674720729, "learning_rate": 3.2664778264930214e-07, "loss": 0.2724, "step": 35917 }, { "epoch": 0.6243459820264562, "grad_norm": 2.1932266838373207, "learning_rate": 3.2662137956589003e-07, "loss": 0.3008, "step": 35918 }, { "epoch": 0.624363364563959, "grad_norm": 1.0536460003777077, "learning_rate": 3.265949770320083e-07, "loss": 0.245, "step": 35919 }, { "epoch": 0.6243807471014619, "grad_norm": 2.3542134149843834, "learning_rate": 3.265685750477409e-07, "loss": 0.2071, "step": 35920 }, { "epoch": 0.6243981296389647, "grad_norm": 2.881705045600018, "learning_rate": 3.265421736131714e-07, "loss": 0.2937, "step": 35921 }, { "epoch": 0.6244155121764675, "grad_norm": 1.9092374025504781, "learning_rate": 3.2651577272838354e-07, "loss": 0.2464, "step": 35922 }, { "epoch": 0.6244328947139703, "grad_norm": 1.8924004399141996, "learning_rate": 3.2648937239346105e-07, "loss": 0.2099, "step": 35923 }, { "epoch": 0.6244502772514732, "grad_norm": 2.5230341999108754, "learning_rate": 3.264629726084874e-07, "loss": 0.2895, "step": 35924 }, { "epoch": 0.624467659788976, "grad_norm": 2.160359153203704, "learning_rate": 3.264365733735467e-07, "loss": 0.2874, "step": 35925 }, { "epoch": 0.6244850423264788, "grad_norm": 1.9833338987055489, "learning_rate": 3.2641017468872214e-07, "loss": 0.2873, "step": 35926 }, { "epoch": 0.6245024248639817, "grad_norm": 1.3404064527129158, "learning_rate": 3.2638377655409754e-07, "loss": 0.2142, "step": 35927 }, { "epoch": 0.6245198074014845, "grad_norm": 2.064590786960194, "learning_rate": 3.263573789697567e-07, "loss": 0.2271, "step": 35928 }, { "epoch": 0.6245371899389873, "grad_norm": 1.367644408185593, "learning_rate": 3.263309819357831e-07, "loss": 0.1775, "step": 35929 }, { "epoch": 0.6245545724764902, "grad_norm": 1.6602939007797082, "learning_rate": 3.263045854522606e-07, "loss": 0.1433, "step": 35930 }, { "epoch": 0.624571955013993, "grad_norm": 2.8343382226681615, "learning_rate": 3.262781895192729e-07, "loss": 0.303, "step": 35931 }, { "epoch": 0.6245893375514958, "grad_norm": 1.491141837875321, "learning_rate": 3.262517941369034e-07, "loss": 0.2093, "step": 35932 }, { "epoch": 0.6246067200889986, "grad_norm": 1.381394959071893, "learning_rate": 3.2622539930523586e-07, "loss": 0.2524, "step": 35933 }, { "epoch": 0.6246241026265014, "grad_norm": 1.3645200772721169, "learning_rate": 3.2619900502435404e-07, "loss": 0.2666, "step": 35934 }, { "epoch": 0.6246414851640042, "grad_norm": 1.1398142790977426, "learning_rate": 3.2617261129434147e-07, "loss": 0.1508, "step": 35935 }, { "epoch": 0.624658867701507, "grad_norm": 1.5412109948837942, "learning_rate": 3.261462181152819e-07, "loss": 0.2352, "step": 35936 }, { "epoch": 0.6246762502390099, "grad_norm": 1.4507385106227597, "learning_rate": 3.2611982548725916e-07, "loss": 0.2082, "step": 35937 }, { "epoch": 0.6246936327765127, "grad_norm": 1.6481264620591332, "learning_rate": 3.2609343341035643e-07, "loss": 0.1603, "step": 35938 }, { "epoch": 0.6247110153140155, "grad_norm": 1.7473167462805248, "learning_rate": 3.2606704188465775e-07, "loss": 0.2334, "step": 35939 }, { "epoch": 0.6247283978515183, "grad_norm": 1.2225404885531976, "learning_rate": 3.260406509102466e-07, "loss": 0.2516, "step": 35940 }, { "epoch": 0.6247457803890212, "grad_norm": 1.0680582078365317, "learning_rate": 3.260142604872066e-07, "loss": 0.3195, "step": 35941 }, { "epoch": 0.624763162926524, "grad_norm": 0.9384420042094292, "learning_rate": 3.259878706156216e-07, "loss": 0.1907, "step": 35942 }, { "epoch": 0.6247805454640268, "grad_norm": 1.1194621150228006, "learning_rate": 3.2596148129557497e-07, "loss": 0.1481, "step": 35943 }, { "epoch": 0.6247979280015297, "grad_norm": 1.3245533944135706, "learning_rate": 3.259350925271508e-07, "loss": 0.2681, "step": 35944 }, { "epoch": 0.6248153105390325, "grad_norm": 0.9044949517391632, "learning_rate": 3.259087043104323e-07, "loss": 0.2969, "step": 35945 }, { "epoch": 0.6248326930765353, "grad_norm": 2.2146493995601237, "learning_rate": 3.2588231664550304e-07, "loss": 0.2055, "step": 35946 }, { "epoch": 0.6248500756140382, "grad_norm": 1.3193325404295895, "learning_rate": 3.25855929532447e-07, "loss": 0.1529, "step": 35947 }, { "epoch": 0.624867458151541, "grad_norm": 1.4391219242817714, "learning_rate": 3.2582954297134766e-07, "loss": 0.1839, "step": 35948 }, { "epoch": 0.6248848406890438, "grad_norm": 0.9431550148266424, "learning_rate": 3.2580315696228854e-07, "loss": 0.1588, "step": 35949 }, { "epoch": 0.6249022232265466, "grad_norm": 1.2184119043552868, "learning_rate": 3.2577677150535366e-07, "loss": 0.1472, "step": 35950 }, { "epoch": 0.6249196057640495, "grad_norm": 1.2233228421316615, "learning_rate": 3.257503866006263e-07, "loss": 0.2892, "step": 35951 }, { "epoch": 0.6249369883015523, "grad_norm": 1.5631984819257252, "learning_rate": 3.2572400224818996e-07, "loss": 0.2052, "step": 35952 }, { "epoch": 0.6249543708390551, "grad_norm": 1.1376948280424546, "learning_rate": 3.256976184481287e-07, "loss": 0.1877, "step": 35953 }, { "epoch": 0.6249717533765579, "grad_norm": 1.856543133016738, "learning_rate": 3.2567123520052574e-07, "loss": 0.2287, "step": 35954 }, { "epoch": 0.6249891359140607, "grad_norm": 1.3914193077362966, "learning_rate": 3.25644852505465e-07, "loss": 0.198, "step": 35955 }, { "epoch": 0.6250065184515635, "grad_norm": 1.838287863386612, "learning_rate": 3.256184703630301e-07, "loss": 0.3356, "step": 35956 }, { "epoch": 0.6250239009890663, "grad_norm": 1.3919988247645125, "learning_rate": 3.2559208877330444e-07, "loss": 0.253, "step": 35957 }, { "epoch": 0.6250412835265692, "grad_norm": 1.3913562244480908, "learning_rate": 3.255657077363716e-07, "loss": 0.2279, "step": 35958 }, { "epoch": 0.625058666064072, "grad_norm": 2.312225347188613, "learning_rate": 3.255393272523155e-07, "loss": 0.2169, "step": 35959 }, { "epoch": 0.6250760486015748, "grad_norm": 2.3859281765762708, "learning_rate": 3.2551294732121946e-07, "loss": 0.2485, "step": 35960 }, { "epoch": 0.6250934311390777, "grad_norm": 1.8707198231467361, "learning_rate": 3.254865679431673e-07, "loss": 0.2739, "step": 35961 }, { "epoch": 0.6251108136765805, "grad_norm": 1.5230097477459388, "learning_rate": 3.254601891182425e-07, "loss": 0.193, "step": 35962 }, { "epoch": 0.6251281962140833, "grad_norm": 1.6912663808455657, "learning_rate": 3.2543381084652904e-07, "loss": 0.1417, "step": 35963 }, { "epoch": 0.6251455787515862, "grad_norm": 1.7814020905557968, "learning_rate": 3.2540743312810993e-07, "loss": 0.2031, "step": 35964 }, { "epoch": 0.625162961289089, "grad_norm": 1.409409422302619, "learning_rate": 3.253810559630691e-07, "loss": 0.1536, "step": 35965 }, { "epoch": 0.6251803438265918, "grad_norm": 1.456943066708881, "learning_rate": 3.2535467935148994e-07, "loss": 0.1889, "step": 35966 }, { "epoch": 0.6251977263640947, "grad_norm": 1.329790885530405, "learning_rate": 3.2532830329345647e-07, "loss": 0.1897, "step": 35967 }, { "epoch": 0.6252151089015975, "grad_norm": 3.0635314923737718, "learning_rate": 3.2530192778905183e-07, "loss": 0.1795, "step": 35968 }, { "epoch": 0.6252324914391003, "grad_norm": 1.926088827892194, "learning_rate": 3.2527555283836004e-07, "loss": 0.1663, "step": 35969 }, { "epoch": 0.6252498739766031, "grad_norm": 2.146421616506972, "learning_rate": 3.252491784414645e-07, "loss": 0.273, "step": 35970 }, { "epoch": 0.625267256514106, "grad_norm": 2.1298587396585176, "learning_rate": 3.2522280459844863e-07, "loss": 0.2398, "step": 35971 }, { "epoch": 0.6252846390516088, "grad_norm": 1.11737579445794, "learning_rate": 3.2519643130939615e-07, "loss": 0.1634, "step": 35972 }, { "epoch": 0.6253020215891116, "grad_norm": 1.4346457853367658, "learning_rate": 3.251700585743908e-07, "loss": 0.183, "step": 35973 }, { "epoch": 0.6253194041266144, "grad_norm": 1.5262033895364093, "learning_rate": 3.25143686393516e-07, "loss": 0.232, "step": 35974 }, { "epoch": 0.6253367866641172, "grad_norm": 2.515105203554334, "learning_rate": 3.251173147668555e-07, "loss": 0.2408, "step": 35975 }, { "epoch": 0.62535416920162, "grad_norm": 2.2455859814759704, "learning_rate": 3.2509094369449273e-07, "loss": 0.2209, "step": 35976 }, { "epoch": 0.6253715517391228, "grad_norm": 1.7862272423693226, "learning_rate": 3.2506457317651123e-07, "loss": 0.4001, "step": 35977 }, { "epoch": 0.6253889342766257, "grad_norm": 1.316204211546918, "learning_rate": 3.250382032129947e-07, "loss": 0.2059, "step": 35978 }, { "epoch": 0.6254063168141285, "grad_norm": 1.0735100384875444, "learning_rate": 3.2501183380402676e-07, "loss": 0.2778, "step": 35979 }, { "epoch": 0.6254236993516313, "grad_norm": 4.302326969038258, "learning_rate": 3.249854649496908e-07, "loss": 0.1585, "step": 35980 }, { "epoch": 0.6254410818891342, "grad_norm": 1.2882226807530375, "learning_rate": 3.2495909665007065e-07, "loss": 0.1188, "step": 35981 }, { "epoch": 0.625458464426637, "grad_norm": 0.9190136952998785, "learning_rate": 3.249327289052498e-07, "loss": 0.133, "step": 35982 }, { "epoch": 0.6254758469641398, "grad_norm": 1.0749449177112158, "learning_rate": 3.249063617153116e-07, "loss": 0.1209, "step": 35983 }, { "epoch": 0.6254932295016427, "grad_norm": 1.6812651300540005, "learning_rate": 3.2487999508034e-07, "loss": 0.2943, "step": 35984 }, { "epoch": 0.6255106120391455, "grad_norm": 1.3477676883481693, "learning_rate": 3.2485362900041815e-07, "loss": 0.1974, "step": 35985 }, { "epoch": 0.6255279945766483, "grad_norm": 2.3444539296718974, "learning_rate": 3.2482726347563e-07, "loss": 0.2274, "step": 35986 }, { "epoch": 0.6255453771141511, "grad_norm": 0.9416157653693819, "learning_rate": 3.2480089850605885e-07, "loss": 0.2472, "step": 35987 }, { "epoch": 0.625562759651654, "grad_norm": 1.9698229493983253, "learning_rate": 3.2477453409178867e-07, "loss": 0.2016, "step": 35988 }, { "epoch": 0.6255801421891568, "grad_norm": 1.3495994654692318, "learning_rate": 3.247481702329024e-07, "loss": 0.1703, "step": 35989 }, { "epoch": 0.6255975247266596, "grad_norm": 2.110972512704889, "learning_rate": 3.2472180692948403e-07, "loss": 0.2098, "step": 35990 }, { "epoch": 0.6256149072641625, "grad_norm": 2.731136907502525, "learning_rate": 3.246954441816169e-07, "loss": 0.3512, "step": 35991 }, { "epoch": 0.6256322898016653, "grad_norm": 1.2267859388297992, "learning_rate": 3.246690819893848e-07, "loss": 0.2731, "step": 35992 }, { "epoch": 0.625649672339168, "grad_norm": 2.352381853439676, "learning_rate": 3.246427203528711e-07, "loss": 0.3742, "step": 35993 }, { "epoch": 0.6256670548766708, "grad_norm": 1.494086357264332, "learning_rate": 3.2461635927215955e-07, "loss": 0.1963, "step": 35994 }, { "epoch": 0.6256844374141737, "grad_norm": 1.9015816580739677, "learning_rate": 3.2458999874733364e-07, "loss": 0.2481, "step": 35995 }, { "epoch": 0.6257018199516765, "grad_norm": 1.182735809065742, "learning_rate": 3.245636387784767e-07, "loss": 0.2336, "step": 35996 }, { "epoch": 0.6257192024891793, "grad_norm": 1.4659196337753055, "learning_rate": 3.245372793656724e-07, "loss": 0.1747, "step": 35997 }, { "epoch": 0.6257365850266822, "grad_norm": 1.556728509733153, "learning_rate": 3.245109205090044e-07, "loss": 0.1924, "step": 35998 }, { "epoch": 0.625753967564185, "grad_norm": 1.3660267131723367, "learning_rate": 3.2448456220855606e-07, "loss": 0.2569, "step": 35999 }, { "epoch": 0.6257713501016878, "grad_norm": 1.13912154072018, "learning_rate": 3.244582044644112e-07, "loss": 0.2252, "step": 36000 }, { "epoch": 0.6257887326391907, "grad_norm": 1.3125157963472034, "learning_rate": 3.2443184727665317e-07, "loss": 0.1507, "step": 36001 }, { "epoch": 0.6258061151766935, "grad_norm": 1.3980984870412532, "learning_rate": 3.244054906453655e-07, "loss": 0.1452, "step": 36002 }, { "epoch": 0.6258234977141963, "grad_norm": 1.314697115422899, "learning_rate": 3.2437913457063173e-07, "loss": 0.2399, "step": 36003 }, { "epoch": 0.6258408802516991, "grad_norm": 1.4043172040209502, "learning_rate": 3.2435277905253546e-07, "loss": 0.1817, "step": 36004 }, { "epoch": 0.625858262789202, "grad_norm": 1.5928031126778046, "learning_rate": 3.2432642409116006e-07, "loss": 0.1494, "step": 36005 }, { "epoch": 0.6258756453267048, "grad_norm": 1.6076258057423538, "learning_rate": 3.243000696865894e-07, "loss": 0.2527, "step": 36006 }, { "epoch": 0.6258930278642076, "grad_norm": 1.7743295651411488, "learning_rate": 3.2427371583890684e-07, "loss": 0.1963, "step": 36007 }, { "epoch": 0.6259104104017105, "grad_norm": 2.280369919189434, "learning_rate": 3.242473625481957e-07, "loss": 0.1731, "step": 36008 }, { "epoch": 0.6259277929392133, "grad_norm": 1.5327369515164853, "learning_rate": 3.242210098145398e-07, "loss": 0.189, "step": 36009 }, { "epoch": 0.6259451754767161, "grad_norm": 2.18543457900725, "learning_rate": 3.241946576380224e-07, "loss": 0.2241, "step": 36010 }, { "epoch": 0.625962558014219, "grad_norm": 0.9999072125082921, "learning_rate": 3.2416830601872734e-07, "loss": 0.1634, "step": 36011 }, { "epoch": 0.6259799405517218, "grad_norm": 5.662590829160239, "learning_rate": 3.241419549567379e-07, "loss": 0.1827, "step": 36012 }, { "epoch": 0.6259973230892245, "grad_norm": 1.1902724781488132, "learning_rate": 3.241156044521379e-07, "loss": 0.1614, "step": 36013 }, { "epoch": 0.6260147056267273, "grad_norm": 1.740883525890269, "learning_rate": 3.2408925450501034e-07, "loss": 0.2308, "step": 36014 }, { "epoch": 0.6260320881642302, "grad_norm": 1.4260784568483635, "learning_rate": 3.2406290511543916e-07, "loss": 0.3938, "step": 36015 }, { "epoch": 0.626049470701733, "grad_norm": 2.16453165862765, "learning_rate": 3.2403655628350763e-07, "loss": 0.2687, "step": 36016 }, { "epoch": 0.6260668532392358, "grad_norm": 1.6614336547397006, "learning_rate": 3.2401020800929954e-07, "loss": 0.2424, "step": 36017 }, { "epoch": 0.6260842357767387, "grad_norm": 1.3445296891677905, "learning_rate": 3.239838602928981e-07, "loss": 0.2418, "step": 36018 }, { "epoch": 0.6261016183142415, "grad_norm": 1.3517672111980465, "learning_rate": 3.239575131343873e-07, "loss": 0.168, "step": 36019 }, { "epoch": 0.6261190008517443, "grad_norm": 1.4581949712888225, "learning_rate": 3.2393116653385e-07, "loss": 0.1478, "step": 36020 }, { "epoch": 0.6261363833892472, "grad_norm": 1.9886036996213998, "learning_rate": 3.239048204913701e-07, "loss": 0.184, "step": 36021 }, { "epoch": 0.62615376592675, "grad_norm": 2.390232977409721, "learning_rate": 3.23878475007031e-07, "loss": 0.1471, "step": 36022 }, { "epoch": 0.6261711484642528, "grad_norm": 2.3931503040645894, "learning_rate": 3.238521300809163e-07, "loss": 0.1581, "step": 36023 }, { "epoch": 0.6261885310017556, "grad_norm": 1.1644500288813997, "learning_rate": 3.238257857131093e-07, "loss": 0.2045, "step": 36024 }, { "epoch": 0.6262059135392585, "grad_norm": 1.7639888916890802, "learning_rate": 3.237994419036937e-07, "loss": 0.3269, "step": 36025 }, { "epoch": 0.6262232960767613, "grad_norm": 1.3101821871744852, "learning_rate": 3.237730986527531e-07, "loss": 0.3237, "step": 36026 }, { "epoch": 0.6262406786142641, "grad_norm": 1.3254756126904474, "learning_rate": 3.237467559603707e-07, "loss": 0.1344, "step": 36027 }, { "epoch": 0.626258061151767, "grad_norm": 1.2973460151481861, "learning_rate": 3.2372041382663e-07, "loss": 0.1578, "step": 36028 }, { "epoch": 0.6262754436892698, "grad_norm": 1.289651865610483, "learning_rate": 3.236940722516147e-07, "loss": 0.1342, "step": 36029 }, { "epoch": 0.6262928262267726, "grad_norm": 0.7895332730418015, "learning_rate": 3.2366773123540815e-07, "loss": 0.1518, "step": 36030 }, { "epoch": 0.6263102087642755, "grad_norm": 1.4082148960061391, "learning_rate": 3.23641390778094e-07, "loss": 0.2327, "step": 36031 }, { "epoch": 0.6263275913017783, "grad_norm": 1.743074465458165, "learning_rate": 3.236150508797557e-07, "loss": 0.1871, "step": 36032 }, { "epoch": 0.626344973839281, "grad_norm": 3.705661219594465, "learning_rate": 3.235887115404765e-07, "loss": 0.2684, "step": 36033 }, { "epoch": 0.6263623563767838, "grad_norm": 2.0979068269733245, "learning_rate": 3.2356237276034007e-07, "loss": 0.2203, "step": 36034 }, { "epoch": 0.6263797389142867, "grad_norm": 1.253122862789044, "learning_rate": 3.2353603453942977e-07, "loss": 0.1664, "step": 36035 }, { "epoch": 0.6263971214517895, "grad_norm": 1.0403472692564808, "learning_rate": 3.2350969687782934e-07, "loss": 0.3621, "step": 36036 }, { "epoch": 0.6264145039892923, "grad_norm": 1.7034306258971534, "learning_rate": 3.2348335977562213e-07, "loss": 0.2057, "step": 36037 }, { "epoch": 0.6264318865267952, "grad_norm": 1.0123300716493033, "learning_rate": 3.2345702323289167e-07, "loss": 0.1712, "step": 36038 }, { "epoch": 0.626449269064298, "grad_norm": 1.836794854795322, "learning_rate": 3.234306872497211e-07, "loss": 0.2841, "step": 36039 }, { "epoch": 0.6264666516018008, "grad_norm": 1.5689721920719641, "learning_rate": 3.2340435182619425e-07, "loss": 0.1711, "step": 36040 }, { "epoch": 0.6264840341393036, "grad_norm": 1.0117060330822254, "learning_rate": 3.233780169623944e-07, "loss": 0.288, "step": 36041 }, { "epoch": 0.6265014166768065, "grad_norm": 1.4948282123071108, "learning_rate": 3.233516826584052e-07, "loss": 0.2022, "step": 36042 }, { "epoch": 0.6265187992143093, "grad_norm": 1.3032952814164647, "learning_rate": 3.233253489143099e-07, "loss": 0.2097, "step": 36043 }, { "epoch": 0.6265361817518121, "grad_norm": 1.1462641116352905, "learning_rate": 3.232990157301922e-07, "loss": 0.1859, "step": 36044 }, { "epoch": 0.626553564289315, "grad_norm": 0.6989199291796174, "learning_rate": 3.2327268310613556e-07, "loss": 0.1297, "step": 36045 }, { "epoch": 0.6265709468268178, "grad_norm": 0.9446348892102582, "learning_rate": 3.232463510422232e-07, "loss": 0.1962, "step": 36046 }, { "epoch": 0.6265883293643206, "grad_norm": 1.3758314624568693, "learning_rate": 3.2322001953853865e-07, "loss": 0.2086, "step": 36047 }, { "epoch": 0.6266057119018235, "grad_norm": 1.8618535065063118, "learning_rate": 3.2319368859516554e-07, "loss": 0.2096, "step": 36048 }, { "epoch": 0.6266230944393263, "grad_norm": 1.5699003883313525, "learning_rate": 3.231673582121871e-07, "loss": 0.1538, "step": 36049 }, { "epoch": 0.6266404769768291, "grad_norm": 2.18989755212912, "learning_rate": 3.23141028389687e-07, "loss": 0.2581, "step": 36050 }, { "epoch": 0.626657859514332, "grad_norm": 1.1797723818773835, "learning_rate": 3.231146991277487e-07, "loss": 0.2196, "step": 36051 }, { "epoch": 0.6266752420518348, "grad_norm": 1.0728033556726895, "learning_rate": 3.2308837042645545e-07, "loss": 0.268, "step": 36052 }, { "epoch": 0.6266926245893375, "grad_norm": 1.1609683803499355, "learning_rate": 3.2306204228589075e-07, "loss": 0.17, "step": 36053 }, { "epoch": 0.6267100071268403, "grad_norm": 1.6759517143887541, "learning_rate": 3.230357147061381e-07, "loss": 0.2022, "step": 36054 }, { "epoch": 0.6267273896643432, "grad_norm": 2.0793757459675932, "learning_rate": 3.230093876872809e-07, "loss": 0.2104, "step": 36055 }, { "epoch": 0.626744772201846, "grad_norm": 0.8590832637491953, "learning_rate": 3.229830612294027e-07, "loss": 0.1941, "step": 36056 }, { "epoch": 0.6267621547393488, "grad_norm": 1.6886041865176304, "learning_rate": 3.2295673533258705e-07, "loss": 0.173, "step": 36057 }, { "epoch": 0.6267795372768517, "grad_norm": 1.6793643220686736, "learning_rate": 3.2293040999691696e-07, "loss": 0.257, "step": 36058 }, { "epoch": 0.6267969198143545, "grad_norm": 1.4377387643810795, "learning_rate": 3.2290408522247623e-07, "loss": 0.2013, "step": 36059 }, { "epoch": 0.6268143023518573, "grad_norm": 1.8690540308942292, "learning_rate": 3.2287776100934825e-07, "loss": 0.2274, "step": 36060 }, { "epoch": 0.6268316848893601, "grad_norm": 2.024725034907196, "learning_rate": 3.228514373576163e-07, "loss": 0.1916, "step": 36061 }, { "epoch": 0.626849067426863, "grad_norm": 1.1141906095175775, "learning_rate": 3.22825114267364e-07, "loss": 0.1515, "step": 36062 }, { "epoch": 0.6268664499643658, "grad_norm": 0.8489736526304735, "learning_rate": 3.227987917386745e-07, "loss": 0.1951, "step": 36063 }, { "epoch": 0.6268838325018686, "grad_norm": 1.5275574581821656, "learning_rate": 3.2277246977163175e-07, "loss": 0.3187, "step": 36064 }, { "epoch": 0.6269012150393715, "grad_norm": 1.5333241616486786, "learning_rate": 3.227461483663187e-07, "loss": 0.1942, "step": 36065 }, { "epoch": 0.6269185975768743, "grad_norm": 2.4659783049086275, "learning_rate": 3.227198275228189e-07, "loss": 0.1999, "step": 36066 }, { "epoch": 0.6269359801143771, "grad_norm": 1.6761096725224511, "learning_rate": 3.2269350724121593e-07, "loss": 0.0984, "step": 36067 }, { "epoch": 0.62695336265188, "grad_norm": 1.7315954557471667, "learning_rate": 3.2266718752159307e-07, "loss": 0.2266, "step": 36068 }, { "epoch": 0.6269707451893828, "grad_norm": 1.3089855334729785, "learning_rate": 3.226408683640336e-07, "loss": 0.2497, "step": 36069 }, { "epoch": 0.6269881277268856, "grad_norm": 1.393275390392943, "learning_rate": 3.2261454976862135e-07, "loss": 0.1301, "step": 36070 }, { "epoch": 0.6270055102643884, "grad_norm": 1.1065332153264544, "learning_rate": 3.2258823173543934e-07, "loss": 0.14, "step": 36071 }, { "epoch": 0.6270228928018913, "grad_norm": 1.7312520235338953, "learning_rate": 3.225619142645711e-07, "loss": 0.2454, "step": 36072 }, { "epoch": 0.627040275339394, "grad_norm": 1.535695616246353, "learning_rate": 3.225355973561002e-07, "loss": 0.2916, "step": 36073 }, { "epoch": 0.6270576578768968, "grad_norm": 0.8649827677821013, "learning_rate": 3.2250928101010985e-07, "loss": 0.2203, "step": 36074 }, { "epoch": 0.6270750404143997, "grad_norm": 0.9208260962973854, "learning_rate": 3.2248296522668356e-07, "loss": 0.1927, "step": 36075 }, { "epoch": 0.6270924229519025, "grad_norm": 1.58751161855144, "learning_rate": 3.224566500059049e-07, "loss": 0.1572, "step": 36076 }, { "epoch": 0.6271098054894053, "grad_norm": 0.8164150010309125, "learning_rate": 3.2243033534785696e-07, "loss": 0.2068, "step": 36077 }, { "epoch": 0.6271271880269081, "grad_norm": 0.935692908212377, "learning_rate": 3.2240402125262324e-07, "loss": 0.1381, "step": 36078 }, { "epoch": 0.627144570564411, "grad_norm": 2.4626942685544586, "learning_rate": 3.223777077202872e-07, "loss": 0.3067, "step": 36079 }, { "epoch": 0.6271619531019138, "grad_norm": 1.756894590503658, "learning_rate": 3.223513947509322e-07, "loss": 0.2266, "step": 36080 }, { "epoch": 0.6271793356394166, "grad_norm": 1.550279980769629, "learning_rate": 3.223250823446417e-07, "loss": 0.234, "step": 36081 }, { "epoch": 0.6271967181769195, "grad_norm": 1.320360610407018, "learning_rate": 3.222987705014991e-07, "loss": 0.2212, "step": 36082 }, { "epoch": 0.6272141007144223, "grad_norm": 1.2389256313904418, "learning_rate": 3.2227245922158786e-07, "loss": 0.198, "step": 36083 }, { "epoch": 0.6272314832519251, "grad_norm": 1.3489511658871751, "learning_rate": 3.222461485049912e-07, "loss": 0.1476, "step": 36084 }, { "epoch": 0.627248865789428, "grad_norm": 2.91904721075353, "learning_rate": 3.2221983835179267e-07, "loss": 0.2077, "step": 36085 }, { "epoch": 0.6272662483269308, "grad_norm": 1.7925449613135167, "learning_rate": 3.2219352876207537e-07, "loss": 0.2376, "step": 36086 }, { "epoch": 0.6272836308644336, "grad_norm": 1.447950093951585, "learning_rate": 3.221672197359231e-07, "loss": 0.2862, "step": 36087 }, { "epoch": 0.6273010134019364, "grad_norm": 1.7698549656667857, "learning_rate": 3.221409112734189e-07, "loss": 0.2073, "step": 36088 }, { "epoch": 0.6273183959394393, "grad_norm": 1.4252696838267358, "learning_rate": 3.221146033746466e-07, "loss": 0.2408, "step": 36089 }, { "epoch": 0.6273357784769421, "grad_norm": 1.7117286750176055, "learning_rate": 3.220882960396891e-07, "loss": 0.2154, "step": 36090 }, { "epoch": 0.6273531610144449, "grad_norm": 1.7177233576970312, "learning_rate": 3.220619892686299e-07, "loss": 0.3194, "step": 36091 }, { "epoch": 0.6273705435519478, "grad_norm": 3.8608781141597914, "learning_rate": 3.220356830615526e-07, "loss": 0.344, "step": 36092 }, { "epoch": 0.6273879260894505, "grad_norm": 2.426952723521073, "learning_rate": 3.220093774185404e-07, "loss": 0.2393, "step": 36093 }, { "epoch": 0.6274053086269533, "grad_norm": 1.2775159856318155, "learning_rate": 3.2198307233967657e-07, "loss": 0.2449, "step": 36094 }, { "epoch": 0.6274226911644561, "grad_norm": 1.7728991365414102, "learning_rate": 3.219567678250449e-07, "loss": 0.283, "step": 36095 }, { "epoch": 0.627440073701959, "grad_norm": 1.4990810681658693, "learning_rate": 3.2193046387472834e-07, "loss": 0.2106, "step": 36096 }, { "epoch": 0.6274574562394618, "grad_norm": 1.5190464388013623, "learning_rate": 3.219041604888103e-07, "loss": 0.2024, "step": 36097 }, { "epoch": 0.6274748387769646, "grad_norm": 1.4790563879245806, "learning_rate": 3.2187785766737443e-07, "loss": 0.1951, "step": 36098 }, { "epoch": 0.6274922213144675, "grad_norm": 2.4592669931527578, "learning_rate": 3.2185155541050373e-07, "loss": 0.2037, "step": 36099 }, { "epoch": 0.6275096038519703, "grad_norm": 3.022872447283575, "learning_rate": 3.2182525371828193e-07, "loss": 0.2651, "step": 36100 }, { "epoch": 0.6275269863894731, "grad_norm": 1.4613263701630974, "learning_rate": 3.217989525907922e-07, "loss": 0.1891, "step": 36101 }, { "epoch": 0.627544368926976, "grad_norm": 1.6876680198732363, "learning_rate": 3.217726520281181e-07, "loss": 0.2172, "step": 36102 }, { "epoch": 0.6275617514644788, "grad_norm": 1.3524791859522671, "learning_rate": 3.217463520303425e-07, "loss": 0.1638, "step": 36103 }, { "epoch": 0.6275791340019816, "grad_norm": 2.8593363054600966, "learning_rate": 3.217200525975493e-07, "loss": 0.67, "step": 36104 }, { "epoch": 0.6275965165394845, "grad_norm": 1.6638121180075012, "learning_rate": 3.2169375372982145e-07, "loss": 0.2779, "step": 36105 }, { "epoch": 0.6276138990769873, "grad_norm": 1.0071329954075503, "learning_rate": 3.216674554272426e-07, "loss": 0.265, "step": 36106 }, { "epoch": 0.6276312816144901, "grad_norm": 2.3771430055605607, "learning_rate": 3.21641157689896e-07, "loss": 0.3588, "step": 36107 }, { "epoch": 0.6276486641519929, "grad_norm": 0.8625650284702838, "learning_rate": 3.2161486051786523e-07, "loss": 0.213, "step": 36108 }, { "epoch": 0.6276660466894958, "grad_norm": 2.0137086767998187, "learning_rate": 3.2158856391123305e-07, "loss": 0.3333, "step": 36109 }, { "epoch": 0.6276834292269986, "grad_norm": 1.3196112269251188, "learning_rate": 3.2156226787008335e-07, "loss": 0.147, "step": 36110 }, { "epoch": 0.6277008117645014, "grad_norm": 1.1629210349276329, "learning_rate": 3.2153597239449914e-07, "loss": 0.2066, "step": 36111 }, { "epoch": 0.6277181943020043, "grad_norm": 1.4523838577028254, "learning_rate": 3.21509677484564e-07, "loss": 0.2088, "step": 36112 }, { "epoch": 0.627735576839507, "grad_norm": 1.1693268904739775, "learning_rate": 3.214833831403612e-07, "loss": 0.2166, "step": 36113 }, { "epoch": 0.6277529593770098, "grad_norm": 2.033714308803785, "learning_rate": 3.2145708936197423e-07, "loss": 0.4047, "step": 36114 }, { "epoch": 0.6277703419145126, "grad_norm": 0.9529953912204749, "learning_rate": 3.214307961494861e-07, "loss": 0.3589, "step": 36115 }, { "epoch": 0.6277877244520155, "grad_norm": 2.587477992534778, "learning_rate": 3.2140450350298043e-07, "loss": 0.2857, "step": 36116 }, { "epoch": 0.6278051069895183, "grad_norm": 0.7117882542018537, "learning_rate": 3.213782114225402e-07, "loss": 0.1556, "step": 36117 }, { "epoch": 0.6278224895270211, "grad_norm": 1.4120044959394096, "learning_rate": 3.2135191990824916e-07, "loss": 0.3142, "step": 36118 }, { "epoch": 0.627839872064524, "grad_norm": 1.389597200195667, "learning_rate": 3.2132562896019046e-07, "loss": 0.3127, "step": 36119 }, { "epoch": 0.6278572546020268, "grad_norm": 2.0601956435088664, "learning_rate": 3.2129933857844736e-07, "loss": 0.3087, "step": 36120 }, { "epoch": 0.6278746371395296, "grad_norm": 1.3654308823333612, "learning_rate": 3.2127304876310353e-07, "loss": 0.2731, "step": 36121 }, { "epoch": 0.6278920196770325, "grad_norm": 1.5147728659551871, "learning_rate": 3.2124675951424174e-07, "loss": 0.2283, "step": 36122 }, { "epoch": 0.6279094022145353, "grad_norm": 1.8906944495379827, "learning_rate": 3.212204708319458e-07, "loss": 0.2529, "step": 36123 }, { "epoch": 0.6279267847520381, "grad_norm": 1.986268112794924, "learning_rate": 3.211941827162986e-07, "loss": 0.1582, "step": 36124 }, { "epoch": 0.6279441672895409, "grad_norm": 2.1359091759785898, "learning_rate": 3.21167895167384e-07, "loss": 0.2628, "step": 36125 }, { "epoch": 0.6279615498270438, "grad_norm": 0.9196798118798368, "learning_rate": 3.2114160818528484e-07, "loss": 0.3176, "step": 36126 }, { "epoch": 0.6279789323645466, "grad_norm": 1.8363074543682656, "learning_rate": 3.2111532177008485e-07, "loss": 0.1647, "step": 36127 }, { "epoch": 0.6279963149020494, "grad_norm": 2.71352401500125, "learning_rate": 3.210890359218668e-07, "loss": 0.224, "step": 36128 }, { "epoch": 0.6280136974395523, "grad_norm": 2.0239662502879385, "learning_rate": 3.210627506407145e-07, "loss": 0.215, "step": 36129 }, { "epoch": 0.6280310799770551, "grad_norm": 1.4902986232762598, "learning_rate": 3.2103646592671096e-07, "loss": 0.2526, "step": 36130 }, { "epoch": 0.6280484625145579, "grad_norm": 1.4806686123486466, "learning_rate": 3.2101018177993963e-07, "loss": 0.207, "step": 36131 }, { "epoch": 0.6280658450520606, "grad_norm": 1.4896083882428859, "learning_rate": 3.2098389820048387e-07, "loss": 0.2662, "step": 36132 }, { "epoch": 0.6280832275895635, "grad_norm": 2.0492801089126775, "learning_rate": 3.209576151884271e-07, "loss": 0.2404, "step": 36133 }, { "epoch": 0.6281006101270663, "grad_norm": 1.6038802416095805, "learning_rate": 3.209313327438522e-07, "loss": 0.1706, "step": 36134 }, { "epoch": 0.6281179926645691, "grad_norm": 1.5707786816992164, "learning_rate": 3.2090505086684285e-07, "loss": 0.2345, "step": 36135 }, { "epoch": 0.628135375202072, "grad_norm": 1.5079261415424867, "learning_rate": 3.2087876955748207e-07, "loss": 0.2267, "step": 36136 }, { "epoch": 0.6281527577395748, "grad_norm": 1.3204098500092747, "learning_rate": 3.2085248881585334e-07, "loss": 0.354, "step": 36137 }, { "epoch": 0.6281701402770776, "grad_norm": 1.4567305866385283, "learning_rate": 3.208262086420399e-07, "loss": 0.2448, "step": 36138 }, { "epoch": 0.6281875228145805, "grad_norm": 2.08528483946566, "learning_rate": 3.207999290361253e-07, "loss": 0.2957, "step": 36139 }, { "epoch": 0.6282049053520833, "grad_norm": 2.2165515459689216, "learning_rate": 3.2077364999819247e-07, "loss": 0.2081, "step": 36140 }, { "epoch": 0.6282222878895861, "grad_norm": 1.2014285658852477, "learning_rate": 3.207473715283249e-07, "loss": 0.2165, "step": 36141 }, { "epoch": 0.628239670427089, "grad_norm": 1.6042220711719144, "learning_rate": 3.207210936266056e-07, "loss": 0.1733, "step": 36142 }, { "epoch": 0.6282570529645918, "grad_norm": 3.916655698594197, "learning_rate": 3.2069481629311823e-07, "loss": 0.2016, "step": 36143 }, { "epoch": 0.6282744355020946, "grad_norm": 0.9086361976136663, "learning_rate": 3.2066853952794586e-07, "loss": 0.2647, "step": 36144 }, { "epoch": 0.6282918180395974, "grad_norm": 1.187030500187057, "learning_rate": 3.2064226333117193e-07, "loss": 0.1694, "step": 36145 }, { "epoch": 0.6283092005771003, "grad_norm": 1.3575684732227107, "learning_rate": 3.206159877028797e-07, "loss": 0.2072, "step": 36146 }, { "epoch": 0.6283265831146031, "grad_norm": 1.6083050734352782, "learning_rate": 3.205897126431523e-07, "loss": 0.3468, "step": 36147 }, { "epoch": 0.6283439656521059, "grad_norm": 1.2674408929696064, "learning_rate": 3.205634381520731e-07, "loss": 0.1861, "step": 36148 }, { "epoch": 0.6283613481896088, "grad_norm": 1.5658129858052536, "learning_rate": 3.205371642297253e-07, "loss": 0.1745, "step": 36149 }, { "epoch": 0.6283787307271116, "grad_norm": 1.4483263910480493, "learning_rate": 3.205108908761923e-07, "loss": 0.242, "step": 36150 }, { "epoch": 0.6283961132646144, "grad_norm": 2.273778237143844, "learning_rate": 3.2048461809155737e-07, "loss": 0.3622, "step": 36151 }, { "epoch": 0.6284134958021171, "grad_norm": 1.8211122664285748, "learning_rate": 3.2045834587590383e-07, "loss": 0.2623, "step": 36152 }, { "epoch": 0.62843087833962, "grad_norm": 3.1922670128443014, "learning_rate": 3.204320742293146e-07, "loss": 0.5452, "step": 36153 }, { "epoch": 0.6284482608771228, "grad_norm": 3.403971566554139, "learning_rate": 3.204058031518734e-07, "loss": 0.3121, "step": 36154 }, { "epoch": 0.6284656434146256, "grad_norm": 1.3530235943107014, "learning_rate": 3.2037953264366315e-07, "loss": 0.2708, "step": 36155 }, { "epoch": 0.6284830259521285, "grad_norm": 2.7646344870222967, "learning_rate": 3.203532627047674e-07, "loss": 0.1465, "step": 36156 }, { "epoch": 0.6285004084896313, "grad_norm": 2.300783439367393, "learning_rate": 3.203269933352693e-07, "loss": 0.2275, "step": 36157 }, { "epoch": 0.6285177910271341, "grad_norm": 1.5391542874127984, "learning_rate": 3.2030072453525215e-07, "loss": 0.2096, "step": 36158 }, { "epoch": 0.628535173564637, "grad_norm": 4.947110329009967, "learning_rate": 3.20274456304799e-07, "loss": 0.3319, "step": 36159 }, { "epoch": 0.6285525561021398, "grad_norm": 1.303519125146565, "learning_rate": 3.2024818864399334e-07, "loss": 0.1581, "step": 36160 }, { "epoch": 0.6285699386396426, "grad_norm": 1.265234261093517, "learning_rate": 3.2022192155291825e-07, "loss": 0.1742, "step": 36161 }, { "epoch": 0.6285873211771454, "grad_norm": 1.922443996368188, "learning_rate": 3.201956550316572e-07, "loss": 0.1992, "step": 36162 }, { "epoch": 0.6286047037146483, "grad_norm": 0.960612235520842, "learning_rate": 3.201693890802932e-07, "loss": 0.1872, "step": 36163 }, { "epoch": 0.6286220862521511, "grad_norm": 1.4767149223605904, "learning_rate": 3.201431236989097e-07, "loss": 0.2541, "step": 36164 }, { "epoch": 0.6286394687896539, "grad_norm": 1.775495008237075, "learning_rate": 3.201168588875901e-07, "loss": 0.2428, "step": 36165 }, { "epoch": 0.6286568513271568, "grad_norm": 1.5598148455890366, "learning_rate": 3.2009059464641726e-07, "loss": 0.4037, "step": 36166 }, { "epoch": 0.6286742338646596, "grad_norm": 2.177880179590653, "learning_rate": 3.200643309754745e-07, "loss": 0.2845, "step": 36167 }, { "epoch": 0.6286916164021624, "grad_norm": 0.7176786949448446, "learning_rate": 3.200380678748452e-07, "loss": 0.1978, "step": 36168 }, { "epoch": 0.6287089989396653, "grad_norm": 2.150812129126497, "learning_rate": 3.200118053446125e-07, "loss": 0.2057, "step": 36169 }, { "epoch": 0.6287263814771681, "grad_norm": 1.3140017687133998, "learning_rate": 3.1998554338485977e-07, "loss": 0.0919, "step": 36170 }, { "epoch": 0.6287437640146709, "grad_norm": 1.3930102620653444, "learning_rate": 3.1995928199567034e-07, "loss": 0.1378, "step": 36171 }, { "epoch": 0.6287611465521736, "grad_norm": 1.906299860682289, "learning_rate": 3.19933021177127e-07, "loss": 0.224, "step": 36172 }, { "epoch": 0.6287785290896765, "grad_norm": 1.5416609978576064, "learning_rate": 3.1990676092931347e-07, "loss": 0.2245, "step": 36173 }, { "epoch": 0.6287959116271793, "grad_norm": 1.3375463601057784, "learning_rate": 3.198805012523127e-07, "loss": 0.2077, "step": 36174 }, { "epoch": 0.6288132941646821, "grad_norm": 1.0740253801113966, "learning_rate": 3.1985424214620787e-07, "loss": 0.2221, "step": 36175 }, { "epoch": 0.628830676702185, "grad_norm": 1.4669656289085644, "learning_rate": 3.1982798361108256e-07, "loss": 0.3098, "step": 36176 }, { "epoch": 0.6288480592396878, "grad_norm": 2.0389489783955295, "learning_rate": 3.198017256470198e-07, "loss": 0.2782, "step": 36177 }, { "epoch": 0.6288654417771906, "grad_norm": 2.062001986009265, "learning_rate": 3.197754682541025e-07, "loss": 0.19, "step": 36178 }, { "epoch": 0.6288828243146934, "grad_norm": 1.0693377284371537, "learning_rate": 3.197492114324144e-07, "loss": 0.1303, "step": 36179 }, { "epoch": 0.6289002068521963, "grad_norm": 1.636692049669116, "learning_rate": 3.197229551820384e-07, "loss": 0.2525, "step": 36180 }, { "epoch": 0.6289175893896991, "grad_norm": 1.190036254010416, "learning_rate": 3.1969669950305787e-07, "loss": 0.2228, "step": 36181 }, { "epoch": 0.6289349719272019, "grad_norm": 1.2961828744452997, "learning_rate": 3.19670444395556e-07, "loss": 0.18, "step": 36182 }, { "epoch": 0.6289523544647048, "grad_norm": 2.0092038627327846, "learning_rate": 3.1964418985961596e-07, "loss": 0.2137, "step": 36183 }, { "epoch": 0.6289697370022076, "grad_norm": 2.0752893692952163, "learning_rate": 3.1961793589532105e-07, "loss": 0.2004, "step": 36184 }, { "epoch": 0.6289871195397104, "grad_norm": 1.464766906520681, "learning_rate": 3.195916825027544e-07, "loss": 0.1166, "step": 36185 }, { "epoch": 0.6290045020772133, "grad_norm": 1.5309399436449351, "learning_rate": 3.195654296819992e-07, "loss": 0.3175, "step": 36186 }, { "epoch": 0.6290218846147161, "grad_norm": 0.9038202400671467, "learning_rate": 3.195391774331387e-07, "loss": 0.196, "step": 36187 }, { "epoch": 0.6290392671522189, "grad_norm": 1.6376439030409273, "learning_rate": 3.19512925756256e-07, "loss": 0.1589, "step": 36188 }, { "epoch": 0.6290566496897217, "grad_norm": 1.3150654133693074, "learning_rate": 3.194866746514346e-07, "loss": 0.1864, "step": 36189 }, { "epoch": 0.6290740322272246, "grad_norm": 1.4337496379135592, "learning_rate": 3.194604241187576e-07, "loss": 0.2329, "step": 36190 }, { "epoch": 0.6290914147647274, "grad_norm": 1.6023622909552164, "learning_rate": 3.1943417415830797e-07, "loss": 0.2581, "step": 36191 }, { "epoch": 0.6291087973022301, "grad_norm": 1.4401327228219414, "learning_rate": 3.1940792477016896e-07, "loss": 0.3237, "step": 36192 }, { "epoch": 0.629126179839733, "grad_norm": 1.3225514643357732, "learning_rate": 3.1938167595442405e-07, "loss": 0.1664, "step": 36193 }, { "epoch": 0.6291435623772358, "grad_norm": 1.5297341446413448, "learning_rate": 3.193554277111561e-07, "loss": 0.1462, "step": 36194 }, { "epoch": 0.6291609449147386, "grad_norm": 1.4842903110377261, "learning_rate": 3.193291800404486e-07, "loss": 0.2489, "step": 36195 }, { "epoch": 0.6291783274522414, "grad_norm": 1.331091101882176, "learning_rate": 3.193029329423847e-07, "loss": 0.161, "step": 36196 }, { "epoch": 0.6291957099897443, "grad_norm": 3.0021128421401, "learning_rate": 3.192766864170474e-07, "loss": 0.352, "step": 36197 }, { "epoch": 0.6292130925272471, "grad_norm": 3.05462560569622, "learning_rate": 3.1925044046451987e-07, "loss": 0.326, "step": 36198 }, { "epoch": 0.6292304750647499, "grad_norm": 1.2955661885375311, "learning_rate": 3.192241950848855e-07, "loss": 0.2848, "step": 36199 }, { "epoch": 0.6292478576022528, "grad_norm": 2.470111204932763, "learning_rate": 3.1919795027822727e-07, "loss": 0.2014, "step": 36200 }, { "epoch": 0.6292652401397556, "grad_norm": 2.2932591500375263, "learning_rate": 3.1917170604462864e-07, "loss": 0.2552, "step": 36201 }, { "epoch": 0.6292826226772584, "grad_norm": 1.0888551687218855, "learning_rate": 3.191454623841725e-07, "loss": 0.1828, "step": 36202 }, { "epoch": 0.6293000052147613, "grad_norm": 2.815767150758694, "learning_rate": 3.191192192969424e-07, "loss": 0.363, "step": 36203 }, { "epoch": 0.6293173877522641, "grad_norm": 1.7020385423221214, "learning_rate": 3.1909297678302116e-07, "loss": 0.3169, "step": 36204 }, { "epoch": 0.6293347702897669, "grad_norm": 1.6500763236687055, "learning_rate": 3.1906673484249213e-07, "loss": 0.2748, "step": 36205 }, { "epoch": 0.6293521528272698, "grad_norm": 1.6100573331801746, "learning_rate": 3.190404934754383e-07, "loss": 0.3234, "step": 36206 }, { "epoch": 0.6293695353647726, "grad_norm": 0.9073419458855995, "learning_rate": 3.1901425268194306e-07, "loss": 0.2872, "step": 36207 }, { "epoch": 0.6293869179022754, "grad_norm": 2.238611439814041, "learning_rate": 3.189880124620894e-07, "loss": 0.3492, "step": 36208 }, { "epoch": 0.6294043004397782, "grad_norm": 1.3454502823941934, "learning_rate": 3.1896177281596083e-07, "loss": 0.1988, "step": 36209 }, { "epoch": 0.6294216829772811, "grad_norm": 2.2603704422316278, "learning_rate": 3.1893553374364015e-07, "loss": 0.2058, "step": 36210 }, { "epoch": 0.6294390655147839, "grad_norm": 1.5302900059683406, "learning_rate": 3.1890929524521056e-07, "loss": 0.1351, "step": 36211 }, { "epoch": 0.6294564480522866, "grad_norm": 1.6203162752363136, "learning_rate": 3.1888305732075537e-07, "loss": 0.2943, "step": 36212 }, { "epoch": 0.6294738305897895, "grad_norm": 2.5601153303012505, "learning_rate": 3.1885681997035775e-07, "loss": 0.3172, "step": 36213 }, { "epoch": 0.6294912131272923, "grad_norm": 1.5506635339517894, "learning_rate": 3.188305831941007e-07, "loss": 0.4296, "step": 36214 }, { "epoch": 0.6295085956647951, "grad_norm": 1.6719310409470143, "learning_rate": 3.1880434699206766e-07, "loss": 0.1184, "step": 36215 }, { "epoch": 0.6295259782022979, "grad_norm": 4.515055507675853, "learning_rate": 3.187781113643415e-07, "loss": 0.291, "step": 36216 }, { "epoch": 0.6295433607398008, "grad_norm": 2.183547668991112, "learning_rate": 3.187518763110054e-07, "loss": 0.2422, "step": 36217 }, { "epoch": 0.6295607432773036, "grad_norm": 2.449423156099029, "learning_rate": 3.187256418321427e-07, "loss": 0.3576, "step": 36218 }, { "epoch": 0.6295781258148064, "grad_norm": 2.7913294932162436, "learning_rate": 3.186994079278363e-07, "loss": 0.3108, "step": 36219 }, { "epoch": 0.6295955083523093, "grad_norm": 1.4994721482537245, "learning_rate": 3.1867317459816964e-07, "loss": 0.2006, "step": 36220 }, { "epoch": 0.6296128908898121, "grad_norm": 1.9664449172573388, "learning_rate": 3.186469418432256e-07, "loss": 0.2789, "step": 36221 }, { "epoch": 0.6296302734273149, "grad_norm": 1.3340205634911935, "learning_rate": 3.1862070966308773e-07, "loss": 0.1698, "step": 36222 }, { "epoch": 0.6296476559648178, "grad_norm": 2.5475030926936335, "learning_rate": 3.1859447805783863e-07, "loss": 0.3321, "step": 36223 }, { "epoch": 0.6296650385023206, "grad_norm": 1.4775240815671902, "learning_rate": 3.185682470275618e-07, "loss": 0.2193, "step": 36224 }, { "epoch": 0.6296824210398234, "grad_norm": 1.206631363511841, "learning_rate": 3.1854201657234016e-07, "loss": 0.1672, "step": 36225 }, { "epoch": 0.6296998035773262, "grad_norm": 1.2930023808037145, "learning_rate": 3.185157866922571e-07, "loss": 0.1517, "step": 36226 }, { "epoch": 0.6297171861148291, "grad_norm": 2.085217862031401, "learning_rate": 3.1848955738739546e-07, "loss": 0.1651, "step": 36227 }, { "epoch": 0.6297345686523319, "grad_norm": 1.8460472429972994, "learning_rate": 3.184633286578389e-07, "loss": 0.2712, "step": 36228 }, { "epoch": 0.6297519511898347, "grad_norm": 2.0136631101506106, "learning_rate": 3.184371005036699e-07, "loss": 0.2739, "step": 36229 }, { "epoch": 0.6297693337273376, "grad_norm": 1.384677485432797, "learning_rate": 3.1841087292497204e-07, "loss": 0.1655, "step": 36230 }, { "epoch": 0.6297867162648404, "grad_norm": 2.338146570181712, "learning_rate": 3.183846459218281e-07, "loss": 0.2206, "step": 36231 }, { "epoch": 0.6298040988023431, "grad_norm": 2.0313544772862926, "learning_rate": 3.1835841949432153e-07, "loss": 0.1665, "step": 36232 }, { "epoch": 0.629821481339846, "grad_norm": 1.3569270229400534, "learning_rate": 3.183321936425353e-07, "loss": 0.259, "step": 36233 }, { "epoch": 0.6298388638773488, "grad_norm": 2.7220795903902015, "learning_rate": 3.1830596836655264e-07, "loss": 0.3038, "step": 36234 }, { "epoch": 0.6298562464148516, "grad_norm": 1.3633877078599799, "learning_rate": 3.182797436664566e-07, "loss": 0.2067, "step": 36235 }, { "epoch": 0.6298736289523544, "grad_norm": 2.1207184133611894, "learning_rate": 3.1825351954233014e-07, "loss": 0.3476, "step": 36236 }, { "epoch": 0.6298910114898573, "grad_norm": 1.3419692843269488, "learning_rate": 3.182272959942567e-07, "loss": 0.2518, "step": 36237 }, { "epoch": 0.6299083940273601, "grad_norm": 1.5433387701695112, "learning_rate": 3.1820107302231916e-07, "loss": 0.2661, "step": 36238 }, { "epoch": 0.6299257765648629, "grad_norm": 1.5086528130176964, "learning_rate": 3.1817485062660063e-07, "loss": 0.2507, "step": 36239 }, { "epoch": 0.6299431591023658, "grad_norm": 2.8851690877676854, "learning_rate": 3.181486288071844e-07, "loss": 0.2842, "step": 36240 }, { "epoch": 0.6299605416398686, "grad_norm": 1.7945324838907846, "learning_rate": 3.1812240756415363e-07, "loss": 0.2365, "step": 36241 }, { "epoch": 0.6299779241773714, "grad_norm": 0.9938158908475684, "learning_rate": 3.18096186897591e-07, "loss": 0.1943, "step": 36242 }, { "epoch": 0.6299953067148742, "grad_norm": 1.7666856856210795, "learning_rate": 3.180699668075801e-07, "loss": 0.2613, "step": 36243 }, { "epoch": 0.6300126892523771, "grad_norm": 2.7154967872616793, "learning_rate": 3.180437472942037e-07, "loss": 0.2238, "step": 36244 }, { "epoch": 0.6300300717898799, "grad_norm": 1.7707856952986551, "learning_rate": 3.18017528357545e-07, "loss": 0.313, "step": 36245 }, { "epoch": 0.6300474543273827, "grad_norm": 2.564751300362807, "learning_rate": 3.1799130999768733e-07, "loss": 0.2376, "step": 36246 }, { "epoch": 0.6300648368648856, "grad_norm": 1.813525891893534, "learning_rate": 3.1796509221471355e-07, "loss": 0.2334, "step": 36247 }, { "epoch": 0.6300822194023884, "grad_norm": 2.4658183231289543, "learning_rate": 3.1793887500870675e-07, "loss": 0.1936, "step": 36248 }, { "epoch": 0.6300996019398912, "grad_norm": 1.658413814866796, "learning_rate": 3.179126583797501e-07, "loss": 0.2092, "step": 36249 }, { "epoch": 0.6301169844773941, "grad_norm": 1.409201728568473, "learning_rate": 3.178864423279266e-07, "loss": 0.1931, "step": 36250 }, { "epoch": 0.6301343670148969, "grad_norm": 1.0507441219412434, "learning_rate": 3.1786022685331946e-07, "loss": 0.1407, "step": 36251 }, { "epoch": 0.6301517495523996, "grad_norm": 2.3727496233586685, "learning_rate": 3.1783401195601165e-07, "loss": 0.2239, "step": 36252 }, { "epoch": 0.6301691320899024, "grad_norm": 1.950158960772814, "learning_rate": 3.178077976360867e-07, "loss": 0.2467, "step": 36253 }, { "epoch": 0.6301865146274053, "grad_norm": 1.735763686684592, "learning_rate": 3.1778158389362707e-07, "loss": 0.2423, "step": 36254 }, { "epoch": 0.6302038971649081, "grad_norm": 0.9978648244089202, "learning_rate": 3.1775537072871615e-07, "loss": 0.1531, "step": 36255 }, { "epoch": 0.6302212797024109, "grad_norm": 1.738818972705266, "learning_rate": 3.1772915814143686e-07, "loss": 0.233, "step": 36256 }, { "epoch": 0.6302386622399138, "grad_norm": 1.4149934391667187, "learning_rate": 3.1770294613187253e-07, "loss": 0.1752, "step": 36257 }, { "epoch": 0.6302560447774166, "grad_norm": 3.451457306021953, "learning_rate": 3.17676734700106e-07, "loss": 0.2222, "step": 36258 }, { "epoch": 0.6302734273149194, "grad_norm": 2.4798358882855718, "learning_rate": 3.1765052384622083e-07, "loss": 0.4459, "step": 36259 }, { "epoch": 0.6302908098524223, "grad_norm": 1.7902171819626802, "learning_rate": 3.1762431357029943e-07, "loss": 0.2172, "step": 36260 }, { "epoch": 0.6303081923899251, "grad_norm": 1.240175842584078, "learning_rate": 3.175981038724253e-07, "loss": 0.1638, "step": 36261 }, { "epoch": 0.6303255749274279, "grad_norm": 2.236354448974298, "learning_rate": 3.1757189475268125e-07, "loss": 0.2806, "step": 36262 }, { "epoch": 0.6303429574649307, "grad_norm": 2.287591828678801, "learning_rate": 3.1754568621115065e-07, "loss": 0.2281, "step": 36263 }, { "epoch": 0.6303603400024336, "grad_norm": 1.1083274830183572, "learning_rate": 3.175194782479162e-07, "loss": 0.1895, "step": 36264 }, { "epoch": 0.6303777225399364, "grad_norm": 2.6751287691488375, "learning_rate": 3.174932708630613e-07, "loss": 0.2294, "step": 36265 }, { "epoch": 0.6303951050774392, "grad_norm": 2.5195109662768806, "learning_rate": 3.174670640566691e-07, "loss": 0.2371, "step": 36266 }, { "epoch": 0.6304124876149421, "grad_norm": 1.2557426996977856, "learning_rate": 3.174408578288221e-07, "loss": 0.1984, "step": 36267 }, { "epoch": 0.6304298701524449, "grad_norm": 1.6234841320578668, "learning_rate": 3.1741465217960394e-07, "loss": 0.1378, "step": 36268 }, { "epoch": 0.6304472526899477, "grad_norm": 1.7693484177627932, "learning_rate": 3.1738844710909736e-07, "loss": 0.2335, "step": 36269 }, { "epoch": 0.6304646352274506, "grad_norm": 1.34361466592864, "learning_rate": 3.173622426173856e-07, "loss": 0.1994, "step": 36270 }, { "epoch": 0.6304820177649533, "grad_norm": 1.2673524379092496, "learning_rate": 3.173360387045516e-07, "loss": 0.25, "step": 36271 }, { "epoch": 0.6304994003024561, "grad_norm": 2.2614208398740194, "learning_rate": 3.1730983537067857e-07, "loss": 0.2649, "step": 36272 }, { "epoch": 0.6305167828399589, "grad_norm": 1.9486071036120394, "learning_rate": 3.172836326158492e-07, "loss": 0.2772, "step": 36273 }, { "epoch": 0.6305341653774618, "grad_norm": 1.473454098922534, "learning_rate": 3.17257430440147e-07, "loss": 0.2254, "step": 36274 }, { "epoch": 0.6305515479149646, "grad_norm": 1.9291482813527132, "learning_rate": 3.172312288436546e-07, "loss": 0.2011, "step": 36275 }, { "epoch": 0.6305689304524674, "grad_norm": 1.8807683898545067, "learning_rate": 3.1720502782645537e-07, "loss": 0.189, "step": 36276 }, { "epoch": 0.6305863129899703, "grad_norm": 2.8913545302981283, "learning_rate": 3.1717882738863213e-07, "loss": 0.1855, "step": 36277 }, { "epoch": 0.6306036955274731, "grad_norm": 1.4496785665968883, "learning_rate": 3.1715262753026836e-07, "loss": 0.1963, "step": 36278 }, { "epoch": 0.6306210780649759, "grad_norm": 1.2263920184291146, "learning_rate": 3.1712642825144644e-07, "loss": 0.2673, "step": 36279 }, { "epoch": 0.6306384606024787, "grad_norm": 1.5946063303175855, "learning_rate": 3.171002295522498e-07, "loss": 0.2243, "step": 36280 }, { "epoch": 0.6306558431399816, "grad_norm": 1.876643364854693, "learning_rate": 3.1707403143276134e-07, "loss": 0.1792, "step": 36281 }, { "epoch": 0.6306732256774844, "grad_norm": 3.220970344058258, "learning_rate": 3.170478338930643e-07, "loss": 0.2833, "step": 36282 }, { "epoch": 0.6306906082149872, "grad_norm": 1.2096342672399418, "learning_rate": 3.1702163693324144e-07, "loss": 0.2544, "step": 36283 }, { "epoch": 0.6307079907524901, "grad_norm": 1.395335055584435, "learning_rate": 3.1699544055337607e-07, "loss": 0.2056, "step": 36284 }, { "epoch": 0.6307253732899929, "grad_norm": 1.4912334775085194, "learning_rate": 3.1696924475355113e-07, "loss": 0.2426, "step": 36285 }, { "epoch": 0.6307427558274957, "grad_norm": 1.7242625253984745, "learning_rate": 3.169430495338496e-07, "loss": 0.2504, "step": 36286 }, { "epoch": 0.6307601383649986, "grad_norm": 2.9928486681291866, "learning_rate": 3.169168548943544e-07, "loss": 0.2036, "step": 36287 }, { "epoch": 0.6307775209025014, "grad_norm": 1.5626601132169387, "learning_rate": 3.168906608351487e-07, "loss": 0.2141, "step": 36288 }, { "epoch": 0.6307949034400042, "grad_norm": 1.1273462581415334, "learning_rate": 3.1686446735631547e-07, "loss": 0.184, "step": 36289 }, { "epoch": 0.630812285977507, "grad_norm": 1.332390180781167, "learning_rate": 3.168382744579378e-07, "loss": 0.2214, "step": 36290 }, { "epoch": 0.6308296685150098, "grad_norm": 1.5910975093005637, "learning_rate": 3.1681208214009874e-07, "loss": 0.2325, "step": 36291 }, { "epoch": 0.6308470510525126, "grad_norm": 1.4431901859344618, "learning_rate": 3.1678589040288104e-07, "loss": 0.299, "step": 36292 }, { "epoch": 0.6308644335900154, "grad_norm": 1.9213426362767816, "learning_rate": 3.1675969924636806e-07, "loss": 0.1816, "step": 36293 }, { "epoch": 0.6308818161275183, "grad_norm": 2.0775326756413324, "learning_rate": 3.1673350867064265e-07, "loss": 0.1987, "step": 36294 }, { "epoch": 0.6308991986650211, "grad_norm": 1.0477956572628055, "learning_rate": 3.167073186757877e-07, "loss": 0.1497, "step": 36295 }, { "epoch": 0.6309165812025239, "grad_norm": 1.9315007640594357, "learning_rate": 3.166811292618865e-07, "loss": 0.2241, "step": 36296 }, { "epoch": 0.6309339637400267, "grad_norm": 1.6439474379206929, "learning_rate": 3.1665494042902197e-07, "loss": 0.1972, "step": 36297 }, { "epoch": 0.6309513462775296, "grad_norm": 1.444684688704549, "learning_rate": 3.166287521772768e-07, "loss": 0.2053, "step": 36298 }, { "epoch": 0.6309687288150324, "grad_norm": 1.9305011536408376, "learning_rate": 3.1660256450673444e-07, "loss": 0.3513, "step": 36299 }, { "epoch": 0.6309861113525352, "grad_norm": 1.194232224008916, "learning_rate": 3.1657637741747753e-07, "loss": 0.1422, "step": 36300 }, { "epoch": 0.6310034938900381, "grad_norm": 1.8426301238933682, "learning_rate": 3.1655019090958947e-07, "loss": 0.1717, "step": 36301 }, { "epoch": 0.6310208764275409, "grad_norm": 1.2275318891447253, "learning_rate": 3.165240049831529e-07, "loss": 0.1181, "step": 36302 }, { "epoch": 0.6310382589650437, "grad_norm": 1.2162716204246644, "learning_rate": 3.1649781963825096e-07, "loss": 0.1599, "step": 36303 }, { "epoch": 0.6310556415025466, "grad_norm": 1.6978092496182833, "learning_rate": 3.1647163487496674e-07, "loss": 0.1935, "step": 36304 }, { "epoch": 0.6310730240400494, "grad_norm": 1.1043858897563725, "learning_rate": 3.1644545069338315e-07, "loss": 0.1291, "step": 36305 }, { "epoch": 0.6310904065775522, "grad_norm": 2.4749206019628107, "learning_rate": 3.1641926709358296e-07, "loss": 0.1795, "step": 36306 }, { "epoch": 0.631107789115055, "grad_norm": 1.2225355321817553, "learning_rate": 3.163930840756495e-07, "loss": 0.1381, "step": 36307 }, { "epoch": 0.6311251716525579, "grad_norm": 1.5597013609172368, "learning_rate": 3.163669016396655e-07, "loss": 0.1769, "step": 36308 }, { "epoch": 0.6311425541900607, "grad_norm": 1.7538775003088884, "learning_rate": 3.163407197857142e-07, "loss": 0.1624, "step": 36309 }, { "epoch": 0.6311599367275635, "grad_norm": 2.565009102780094, "learning_rate": 3.163145385138786e-07, "loss": 0.2694, "step": 36310 }, { "epoch": 0.6311773192650663, "grad_norm": 2.6332695632959497, "learning_rate": 3.1628835782424136e-07, "loss": 0.2326, "step": 36311 }, { "epoch": 0.6311947018025691, "grad_norm": 1.6643103303048834, "learning_rate": 3.162621777168855e-07, "loss": 0.4254, "step": 36312 }, { "epoch": 0.6312120843400719, "grad_norm": 1.5105963190836202, "learning_rate": 3.162359981918943e-07, "loss": 0.1835, "step": 36313 }, { "epoch": 0.6312294668775748, "grad_norm": 7.718081075729058, "learning_rate": 3.1620981924935043e-07, "loss": 0.323, "step": 36314 }, { "epoch": 0.6312468494150776, "grad_norm": 1.477350514536559, "learning_rate": 3.1618364088933715e-07, "loss": 0.1894, "step": 36315 }, { "epoch": 0.6312642319525804, "grad_norm": 1.5328224266716228, "learning_rate": 3.1615746311193736e-07, "loss": 0.2076, "step": 36316 }, { "epoch": 0.6312816144900832, "grad_norm": 1.318720531822294, "learning_rate": 3.1613128591723384e-07, "loss": 0.3041, "step": 36317 }, { "epoch": 0.6312989970275861, "grad_norm": 2.680343407338551, "learning_rate": 3.1610510930530967e-07, "loss": 0.2053, "step": 36318 }, { "epoch": 0.6313163795650889, "grad_norm": 2.7092965287201327, "learning_rate": 3.1607893327624783e-07, "loss": 0.2533, "step": 36319 }, { "epoch": 0.6313337621025917, "grad_norm": 2.3307520803146584, "learning_rate": 3.160527578301312e-07, "loss": 0.2294, "step": 36320 }, { "epoch": 0.6313511446400946, "grad_norm": 1.8202486313609256, "learning_rate": 3.16026582967043e-07, "loss": 0.1736, "step": 36321 }, { "epoch": 0.6313685271775974, "grad_norm": 1.5515820684273653, "learning_rate": 3.160004086870659e-07, "loss": 0.1475, "step": 36322 }, { "epoch": 0.6313859097151002, "grad_norm": 1.3265715226482337, "learning_rate": 3.1597423499028316e-07, "loss": 0.2114, "step": 36323 }, { "epoch": 0.631403292252603, "grad_norm": 2.5117395841173598, "learning_rate": 3.159480618767775e-07, "loss": 0.2177, "step": 36324 }, { "epoch": 0.6314206747901059, "grad_norm": 1.3262013081852781, "learning_rate": 3.159218893466318e-07, "loss": 0.2162, "step": 36325 }, { "epoch": 0.6314380573276087, "grad_norm": 1.7438919403978714, "learning_rate": 3.158957173999292e-07, "loss": 0.1596, "step": 36326 }, { "epoch": 0.6314554398651115, "grad_norm": 1.118247848831998, "learning_rate": 3.1586954603675263e-07, "loss": 0.1627, "step": 36327 }, { "epoch": 0.6314728224026144, "grad_norm": 1.2367177400486666, "learning_rate": 3.1584337525718495e-07, "loss": 0.1655, "step": 36328 }, { "epoch": 0.6314902049401172, "grad_norm": 1.7633238588446405, "learning_rate": 3.1581720506130936e-07, "loss": 0.1606, "step": 36329 }, { "epoch": 0.63150758747762, "grad_norm": 1.057468902950126, "learning_rate": 3.157910354492086e-07, "loss": 0.1406, "step": 36330 }, { "epoch": 0.6315249700151228, "grad_norm": 2.4799151616455615, "learning_rate": 3.1576486642096546e-07, "loss": 0.4198, "step": 36331 }, { "epoch": 0.6315423525526256, "grad_norm": 1.4737411216910201, "learning_rate": 3.157386979766631e-07, "loss": 0.263, "step": 36332 }, { "epoch": 0.6315597350901284, "grad_norm": 0.8938245388940727, "learning_rate": 3.157125301163844e-07, "loss": 0.1658, "step": 36333 }, { "epoch": 0.6315771176276312, "grad_norm": 1.9905875969884421, "learning_rate": 3.1568636284021245e-07, "loss": 0.261, "step": 36334 }, { "epoch": 0.6315945001651341, "grad_norm": 1.879630279225721, "learning_rate": 3.1566019614823016e-07, "loss": 0.1816, "step": 36335 }, { "epoch": 0.6316118827026369, "grad_norm": 1.3221256612534584, "learning_rate": 3.1563403004052024e-07, "loss": 0.1971, "step": 36336 }, { "epoch": 0.6316292652401397, "grad_norm": 1.3503002877458097, "learning_rate": 3.1560786451716564e-07, "loss": 0.1119, "step": 36337 }, { "epoch": 0.6316466477776426, "grad_norm": 3.001136928765333, "learning_rate": 3.1558169957824953e-07, "loss": 0.285, "step": 36338 }, { "epoch": 0.6316640303151454, "grad_norm": 1.2590165337115604, "learning_rate": 3.1555553522385456e-07, "loss": 0.2911, "step": 36339 }, { "epoch": 0.6316814128526482, "grad_norm": 1.211685801453038, "learning_rate": 3.15529371454064e-07, "loss": 0.1176, "step": 36340 }, { "epoch": 0.6316987953901511, "grad_norm": 0.7850051666470351, "learning_rate": 3.1550320826896047e-07, "loss": 0.2873, "step": 36341 }, { "epoch": 0.6317161779276539, "grad_norm": 2.199677829659248, "learning_rate": 3.154770456686273e-07, "loss": 0.221, "step": 36342 }, { "epoch": 0.6317335604651567, "grad_norm": 1.2574926805046753, "learning_rate": 3.1545088365314685e-07, "loss": 0.3224, "step": 36343 }, { "epoch": 0.6317509430026595, "grad_norm": 1.2618868521140632, "learning_rate": 3.154247222226024e-07, "loss": 0.1559, "step": 36344 }, { "epoch": 0.6317683255401624, "grad_norm": 1.1645959761503417, "learning_rate": 3.153985613770768e-07, "loss": 0.1424, "step": 36345 }, { "epoch": 0.6317857080776652, "grad_norm": 1.940831457018908, "learning_rate": 3.15372401116653e-07, "loss": 0.1806, "step": 36346 }, { "epoch": 0.631803090615168, "grad_norm": 1.1372219413002302, "learning_rate": 3.153462414414137e-07, "loss": 0.222, "step": 36347 }, { "epoch": 0.6318204731526709, "grad_norm": 2.0995606936584585, "learning_rate": 3.1532008235144226e-07, "loss": 0.2262, "step": 36348 }, { "epoch": 0.6318378556901737, "grad_norm": 2.0855857199832317, "learning_rate": 3.152939238468212e-07, "loss": 0.3161, "step": 36349 }, { "epoch": 0.6318552382276765, "grad_norm": 1.5681007573878398, "learning_rate": 3.1526776592763357e-07, "loss": 0.2006, "step": 36350 }, { "epoch": 0.6318726207651792, "grad_norm": 1.0445587457709238, "learning_rate": 3.152416085939621e-07, "loss": 0.1259, "step": 36351 }, { "epoch": 0.6318900033026821, "grad_norm": 0.9870883874249163, "learning_rate": 3.1521545184589004e-07, "loss": 0.2182, "step": 36352 }, { "epoch": 0.6319073858401849, "grad_norm": 1.0353876471555086, "learning_rate": 3.1518929568349994e-07, "loss": 0.2884, "step": 36353 }, { "epoch": 0.6319247683776877, "grad_norm": 1.3554398753754437, "learning_rate": 3.1516314010687516e-07, "loss": 0.2318, "step": 36354 }, { "epoch": 0.6319421509151906, "grad_norm": 1.1701550378130716, "learning_rate": 3.151369851160982e-07, "loss": 0.251, "step": 36355 }, { "epoch": 0.6319595334526934, "grad_norm": 0.8426295295662204, "learning_rate": 3.1511083071125194e-07, "loss": 0.2036, "step": 36356 }, { "epoch": 0.6319769159901962, "grad_norm": 1.2274073300577797, "learning_rate": 3.150846768924196e-07, "loss": 0.2952, "step": 36357 }, { "epoch": 0.6319942985276991, "grad_norm": 0.908887949448711, "learning_rate": 3.1505852365968366e-07, "loss": 0.1605, "step": 36358 }, { "epoch": 0.6320116810652019, "grad_norm": 1.0906311061119123, "learning_rate": 3.1503237101312743e-07, "loss": 0.2786, "step": 36359 }, { "epoch": 0.6320290636027047, "grad_norm": 1.022289878075421, "learning_rate": 3.150062189528336e-07, "loss": 0.1137, "step": 36360 }, { "epoch": 0.6320464461402076, "grad_norm": 1.425462759106768, "learning_rate": 3.149800674788852e-07, "loss": 0.2682, "step": 36361 }, { "epoch": 0.6320638286777104, "grad_norm": 2.1038294604432313, "learning_rate": 3.149539165913647e-07, "loss": 0.2075, "step": 36362 }, { "epoch": 0.6320812112152132, "grad_norm": 2.2981537926391646, "learning_rate": 3.149277662903555e-07, "loss": 0.2775, "step": 36363 }, { "epoch": 0.632098593752716, "grad_norm": 2.1848815272361626, "learning_rate": 3.149016165759402e-07, "loss": 0.1584, "step": 36364 }, { "epoch": 0.6321159762902189, "grad_norm": 1.6654074814638113, "learning_rate": 3.148754674482017e-07, "loss": 0.175, "step": 36365 }, { "epoch": 0.6321333588277217, "grad_norm": 1.3783589307031887, "learning_rate": 3.148493189072229e-07, "loss": 0.1772, "step": 36366 }, { "epoch": 0.6321507413652245, "grad_norm": 1.5304481180801985, "learning_rate": 3.1482317095308695e-07, "loss": 0.1988, "step": 36367 }, { "epoch": 0.6321681239027274, "grad_norm": 1.4261262213830141, "learning_rate": 3.1479702358587627e-07, "loss": 0.1912, "step": 36368 }, { "epoch": 0.6321855064402302, "grad_norm": 1.6115524405568453, "learning_rate": 3.1477087680567403e-07, "loss": 0.1756, "step": 36369 }, { "epoch": 0.632202888977733, "grad_norm": 1.3635231893442628, "learning_rate": 3.1474473061256293e-07, "loss": 0.2109, "step": 36370 }, { "epoch": 0.6322202715152357, "grad_norm": 1.8435418082959147, "learning_rate": 3.14718585006626e-07, "loss": 0.1687, "step": 36371 }, { "epoch": 0.6322376540527386, "grad_norm": 1.392444152143826, "learning_rate": 3.146924399879459e-07, "loss": 0.19, "step": 36372 }, { "epoch": 0.6322550365902414, "grad_norm": 1.3115251615971792, "learning_rate": 3.1466629555660595e-07, "loss": 0.1486, "step": 36373 }, { "epoch": 0.6322724191277442, "grad_norm": 1.799528573834588, "learning_rate": 3.1464015171268853e-07, "loss": 0.3188, "step": 36374 }, { "epoch": 0.6322898016652471, "grad_norm": 1.4198639963150528, "learning_rate": 3.146140084562767e-07, "loss": 0.1723, "step": 36375 }, { "epoch": 0.6323071842027499, "grad_norm": 1.3002228839364798, "learning_rate": 3.145878657874533e-07, "loss": 0.1728, "step": 36376 }, { "epoch": 0.6323245667402527, "grad_norm": 1.2805474506781331, "learning_rate": 3.145617237063012e-07, "loss": 0.1609, "step": 36377 }, { "epoch": 0.6323419492777556, "grad_norm": 0.9859170346771615, "learning_rate": 3.1453558221290313e-07, "loss": 0.1078, "step": 36378 }, { "epoch": 0.6323593318152584, "grad_norm": 1.259406799552066, "learning_rate": 3.1450944130734237e-07, "loss": 0.191, "step": 36379 }, { "epoch": 0.6323767143527612, "grad_norm": 1.4621369462604157, "learning_rate": 3.144833009897012e-07, "loss": 0.1739, "step": 36380 }, { "epoch": 0.632394096890264, "grad_norm": 1.7077840106917794, "learning_rate": 3.1445716126006283e-07, "loss": 0.206, "step": 36381 }, { "epoch": 0.6324114794277669, "grad_norm": 1.0612984829046095, "learning_rate": 3.1443102211851e-07, "loss": 0.1696, "step": 36382 }, { "epoch": 0.6324288619652697, "grad_norm": 1.063195304968702, "learning_rate": 3.144048835651256e-07, "loss": 0.2571, "step": 36383 }, { "epoch": 0.6324462445027725, "grad_norm": 1.2862401159099859, "learning_rate": 3.1437874559999244e-07, "loss": 0.1308, "step": 36384 }, { "epoch": 0.6324636270402754, "grad_norm": 1.1449215656877383, "learning_rate": 3.1435260822319344e-07, "loss": 0.1613, "step": 36385 }, { "epoch": 0.6324810095777782, "grad_norm": 1.8648253387239866, "learning_rate": 3.143264714348115e-07, "loss": 0.2023, "step": 36386 }, { "epoch": 0.632498392115281, "grad_norm": 2.5166884251866, "learning_rate": 3.1430033523492914e-07, "loss": 0.2508, "step": 36387 }, { "epoch": 0.6325157746527839, "grad_norm": 1.1833633865069095, "learning_rate": 3.1427419962362955e-07, "loss": 0.3053, "step": 36388 }, { "epoch": 0.6325331571902867, "grad_norm": 1.296542614859211, "learning_rate": 3.1424806460099536e-07, "loss": 0.2113, "step": 36389 }, { "epoch": 0.6325505397277895, "grad_norm": 2.2358588555272174, "learning_rate": 3.142219301671095e-07, "loss": 0.2243, "step": 36390 }, { "epoch": 0.6325679222652922, "grad_norm": 1.1558414618889594, "learning_rate": 3.1419579632205483e-07, "loss": 0.1075, "step": 36391 }, { "epoch": 0.6325853048027951, "grad_norm": 1.5009287352844485, "learning_rate": 3.141696630659142e-07, "loss": 0.1968, "step": 36392 }, { "epoch": 0.6326026873402979, "grad_norm": 1.57637727386212, "learning_rate": 3.1414353039877017e-07, "loss": 0.2677, "step": 36393 }, { "epoch": 0.6326200698778007, "grad_norm": 1.6150354162554756, "learning_rate": 3.1411739832070594e-07, "loss": 0.2738, "step": 36394 }, { "epoch": 0.6326374524153036, "grad_norm": 2.1136811104338946, "learning_rate": 3.14091266831804e-07, "loss": 0.2131, "step": 36395 }, { "epoch": 0.6326548349528064, "grad_norm": 2.3102096404811494, "learning_rate": 3.1406513593214746e-07, "loss": 0.3325, "step": 36396 }, { "epoch": 0.6326722174903092, "grad_norm": 1.8653632042089883, "learning_rate": 3.1403900562181897e-07, "loss": 0.2193, "step": 36397 }, { "epoch": 0.632689600027812, "grad_norm": 1.7097524618577793, "learning_rate": 3.1401287590090164e-07, "loss": 0.2556, "step": 36398 }, { "epoch": 0.6327069825653149, "grad_norm": 1.1716177901750136, "learning_rate": 3.139867467694778e-07, "loss": 0.1381, "step": 36399 }, { "epoch": 0.6327243651028177, "grad_norm": 2.804167633110748, "learning_rate": 3.139606182276306e-07, "loss": 0.2833, "step": 36400 }, { "epoch": 0.6327417476403205, "grad_norm": 2.330633589603963, "learning_rate": 3.139344902754427e-07, "loss": 0.2115, "step": 36401 }, { "epoch": 0.6327591301778234, "grad_norm": 1.9731210757421405, "learning_rate": 3.1390836291299704e-07, "loss": 0.1992, "step": 36402 }, { "epoch": 0.6327765127153262, "grad_norm": 1.3554444405181403, "learning_rate": 3.1388223614037635e-07, "loss": 0.1897, "step": 36403 }, { "epoch": 0.632793895252829, "grad_norm": 1.2466776637127222, "learning_rate": 3.138561099576636e-07, "loss": 0.2347, "step": 36404 }, { "epoch": 0.6328112777903319, "grad_norm": 1.5541162955507446, "learning_rate": 3.1382998436494154e-07, "loss": 0.1457, "step": 36405 }, { "epoch": 0.6328286603278347, "grad_norm": 1.3768209719481777, "learning_rate": 3.138038593622927e-07, "loss": 0.2982, "step": 36406 }, { "epoch": 0.6328460428653375, "grad_norm": 1.068638578235295, "learning_rate": 3.137777349498002e-07, "loss": 0.1782, "step": 36407 }, { "epoch": 0.6328634254028404, "grad_norm": 1.9215476373802076, "learning_rate": 3.137516111275468e-07, "loss": 0.2431, "step": 36408 }, { "epoch": 0.6328808079403432, "grad_norm": 1.4952152485719292, "learning_rate": 3.1372548789561503e-07, "loss": 0.2307, "step": 36409 }, { "epoch": 0.6328981904778459, "grad_norm": 1.362204454248635, "learning_rate": 3.13699365254088e-07, "loss": 0.1785, "step": 36410 }, { "epoch": 0.6329155730153487, "grad_norm": 1.2606073872059786, "learning_rate": 3.1367324320304855e-07, "loss": 0.17, "step": 36411 }, { "epoch": 0.6329329555528516, "grad_norm": 0.7177472729034207, "learning_rate": 3.136471217425791e-07, "loss": 0.1557, "step": 36412 }, { "epoch": 0.6329503380903544, "grad_norm": 1.5515669498169866, "learning_rate": 3.1362100087276284e-07, "loss": 0.1694, "step": 36413 }, { "epoch": 0.6329677206278572, "grad_norm": 1.7754515417604775, "learning_rate": 3.1359488059368223e-07, "loss": 0.2208, "step": 36414 }, { "epoch": 0.63298510316536, "grad_norm": 1.572847883584038, "learning_rate": 3.1356876090542033e-07, "loss": 0.2244, "step": 36415 }, { "epoch": 0.6330024857028629, "grad_norm": 1.297287453393055, "learning_rate": 3.1354264180805984e-07, "loss": 0.1326, "step": 36416 }, { "epoch": 0.6330198682403657, "grad_norm": 1.23901033667319, "learning_rate": 3.135165233016836e-07, "loss": 0.1908, "step": 36417 }, { "epoch": 0.6330372507778685, "grad_norm": 1.8082348880536059, "learning_rate": 3.1349040538637416e-07, "loss": 0.28, "step": 36418 }, { "epoch": 0.6330546333153714, "grad_norm": 1.80754028463577, "learning_rate": 3.1346428806221446e-07, "loss": 0.2512, "step": 36419 }, { "epoch": 0.6330720158528742, "grad_norm": 1.1955327822722999, "learning_rate": 3.134381713292873e-07, "loss": 0.2254, "step": 36420 }, { "epoch": 0.633089398390377, "grad_norm": 1.5005655284971198, "learning_rate": 3.1341205518767544e-07, "loss": 0.1436, "step": 36421 }, { "epoch": 0.6331067809278799, "grad_norm": 1.2498111704184283, "learning_rate": 3.133859396374616e-07, "loss": 0.2703, "step": 36422 }, { "epoch": 0.6331241634653827, "grad_norm": 1.5519511269571549, "learning_rate": 3.1335982467872866e-07, "loss": 0.1605, "step": 36423 }, { "epoch": 0.6331415460028855, "grad_norm": 1.4747069261800725, "learning_rate": 3.1333371031155955e-07, "loss": 0.1997, "step": 36424 }, { "epoch": 0.6331589285403884, "grad_norm": 0.9328500180440472, "learning_rate": 3.1330759653603665e-07, "loss": 0.1387, "step": 36425 }, { "epoch": 0.6331763110778912, "grad_norm": 2.0063568194107937, "learning_rate": 3.132814833522428e-07, "loss": 0.2354, "step": 36426 }, { "epoch": 0.633193693615394, "grad_norm": 1.8186361565490732, "learning_rate": 3.1325537076026096e-07, "loss": 0.222, "step": 36427 }, { "epoch": 0.6332110761528968, "grad_norm": 2.3458196809946394, "learning_rate": 3.132292587601738e-07, "loss": 0.2447, "step": 36428 }, { "epoch": 0.6332284586903997, "grad_norm": 2.620895846455565, "learning_rate": 3.1320314735206417e-07, "loss": 0.2286, "step": 36429 }, { "epoch": 0.6332458412279024, "grad_norm": 1.313688799423602, "learning_rate": 3.131770365360148e-07, "loss": 0.2317, "step": 36430 }, { "epoch": 0.6332632237654052, "grad_norm": 1.5922348324011615, "learning_rate": 3.131509263121084e-07, "loss": 0.3046, "step": 36431 }, { "epoch": 0.6332806063029081, "grad_norm": 1.6124760491707877, "learning_rate": 3.1312481668042754e-07, "loss": 0.1454, "step": 36432 }, { "epoch": 0.6332979888404109, "grad_norm": 1.6566425511470066, "learning_rate": 3.1309870764105527e-07, "loss": 0.1918, "step": 36433 }, { "epoch": 0.6333153713779137, "grad_norm": 1.241111162101684, "learning_rate": 3.1307259919407414e-07, "loss": 0.2354, "step": 36434 }, { "epoch": 0.6333327539154165, "grad_norm": 1.7431884163360003, "learning_rate": 3.130464913395672e-07, "loss": 0.2011, "step": 36435 }, { "epoch": 0.6333501364529194, "grad_norm": 1.0694769137894953, "learning_rate": 3.13020384077617e-07, "loss": 0.2083, "step": 36436 }, { "epoch": 0.6333675189904222, "grad_norm": 2.539248437142231, "learning_rate": 3.129942774083061e-07, "loss": 0.2848, "step": 36437 }, { "epoch": 0.633384901527925, "grad_norm": 1.739281659885565, "learning_rate": 3.129681713317176e-07, "loss": 0.2879, "step": 36438 }, { "epoch": 0.6334022840654279, "grad_norm": 1.3393686591683078, "learning_rate": 3.1294206584793405e-07, "loss": 0.1783, "step": 36439 }, { "epoch": 0.6334196666029307, "grad_norm": 2.6683316686946617, "learning_rate": 3.129159609570381e-07, "loss": 0.1729, "step": 36440 }, { "epoch": 0.6334370491404335, "grad_norm": 1.4906064583172036, "learning_rate": 3.128898566591127e-07, "loss": 0.2107, "step": 36441 }, { "epoch": 0.6334544316779364, "grad_norm": 1.759756007664225, "learning_rate": 3.1286375295424044e-07, "loss": 0.1921, "step": 36442 }, { "epoch": 0.6334718142154392, "grad_norm": 1.244077509089936, "learning_rate": 3.128376498425043e-07, "loss": 0.1828, "step": 36443 }, { "epoch": 0.633489196752942, "grad_norm": 2.0505679559548646, "learning_rate": 3.128115473239867e-07, "loss": 0.291, "step": 36444 }, { "epoch": 0.6335065792904448, "grad_norm": 1.718506196352772, "learning_rate": 3.127854453987705e-07, "loss": 0.3037, "step": 36445 }, { "epoch": 0.6335239618279477, "grad_norm": 1.5047661489857462, "learning_rate": 3.127593440669385e-07, "loss": 0.1232, "step": 36446 }, { "epoch": 0.6335413443654505, "grad_norm": 2.978025533106857, "learning_rate": 3.127332433285734e-07, "loss": 0.2934, "step": 36447 }, { "epoch": 0.6335587269029533, "grad_norm": 2.1685252266486725, "learning_rate": 3.127071431837577e-07, "loss": 0.255, "step": 36448 }, { "epoch": 0.6335761094404562, "grad_norm": 1.6822040269694236, "learning_rate": 3.126810436325746e-07, "loss": 0.1941, "step": 36449 }, { "epoch": 0.6335934919779589, "grad_norm": 3.2619530052122885, "learning_rate": 3.1265494467510646e-07, "loss": 0.1469, "step": 36450 }, { "epoch": 0.6336108745154617, "grad_norm": 1.531684909710856, "learning_rate": 3.126288463114359e-07, "loss": 0.1646, "step": 36451 }, { "epoch": 0.6336282570529645, "grad_norm": 1.052266723865216, "learning_rate": 3.12602748541646e-07, "loss": 0.1516, "step": 36452 }, { "epoch": 0.6336456395904674, "grad_norm": 2.0882854772182706, "learning_rate": 3.1257665136581917e-07, "loss": 0.2103, "step": 36453 }, { "epoch": 0.6336630221279702, "grad_norm": 1.5985610839986308, "learning_rate": 3.1255055478403834e-07, "loss": 0.2506, "step": 36454 }, { "epoch": 0.633680404665473, "grad_norm": 1.32907904942357, "learning_rate": 3.125244587963863e-07, "loss": 0.1759, "step": 36455 }, { "epoch": 0.6336977872029759, "grad_norm": 1.385380006558385, "learning_rate": 3.124983634029455e-07, "loss": 0.1866, "step": 36456 }, { "epoch": 0.6337151697404787, "grad_norm": 1.2740270788489663, "learning_rate": 3.124722686037986e-07, "loss": 0.1498, "step": 36457 }, { "epoch": 0.6337325522779815, "grad_norm": 1.8599306532372772, "learning_rate": 3.1244617439902866e-07, "loss": 0.214, "step": 36458 }, { "epoch": 0.6337499348154844, "grad_norm": 2.3443111419270237, "learning_rate": 3.12420080788718e-07, "loss": 0.1819, "step": 36459 }, { "epoch": 0.6337673173529872, "grad_norm": 1.7337400444205406, "learning_rate": 3.1239398777294966e-07, "loss": 0.1621, "step": 36460 }, { "epoch": 0.63378469989049, "grad_norm": 1.0293130827826737, "learning_rate": 3.1236789535180605e-07, "loss": 0.2996, "step": 36461 }, { "epoch": 0.6338020824279929, "grad_norm": 3.0989466984978735, "learning_rate": 3.123418035253703e-07, "loss": 0.1975, "step": 36462 }, { "epoch": 0.6338194649654957, "grad_norm": 1.2753970661462635, "learning_rate": 3.1231571229372465e-07, "loss": 0.2234, "step": 36463 }, { "epoch": 0.6338368475029985, "grad_norm": 1.3281888279828558, "learning_rate": 3.1228962165695203e-07, "loss": 0.1873, "step": 36464 }, { "epoch": 0.6338542300405013, "grad_norm": 1.3885511397107462, "learning_rate": 3.12263531615135e-07, "loss": 0.2495, "step": 36465 }, { "epoch": 0.6338716125780042, "grad_norm": 2.253651352989055, "learning_rate": 3.122374421683564e-07, "loss": 0.2869, "step": 36466 }, { "epoch": 0.633888995115507, "grad_norm": 1.4541287582743434, "learning_rate": 3.122113533166988e-07, "loss": 0.1294, "step": 36467 }, { "epoch": 0.6339063776530098, "grad_norm": 2.220737308637668, "learning_rate": 3.121852650602451e-07, "loss": 0.25, "step": 36468 }, { "epoch": 0.6339237601905127, "grad_norm": 1.7989369781415516, "learning_rate": 3.121591773990777e-07, "loss": 0.3083, "step": 36469 }, { "epoch": 0.6339411427280154, "grad_norm": 1.4379004249448766, "learning_rate": 3.1213309033327937e-07, "loss": 0.1529, "step": 36470 }, { "epoch": 0.6339585252655182, "grad_norm": 2.054105362212297, "learning_rate": 3.121070038629329e-07, "loss": 0.2448, "step": 36471 }, { "epoch": 0.633975907803021, "grad_norm": 2.0334060403846137, "learning_rate": 3.1208091798812097e-07, "loss": 0.1952, "step": 36472 }, { "epoch": 0.6339932903405239, "grad_norm": 1.8141312341996023, "learning_rate": 3.120548327089261e-07, "loss": 0.1834, "step": 36473 }, { "epoch": 0.6340106728780267, "grad_norm": 1.4911020615754806, "learning_rate": 3.120287480254313e-07, "loss": 0.1816, "step": 36474 }, { "epoch": 0.6340280554155295, "grad_norm": 1.94310429025822, "learning_rate": 3.120026639377189e-07, "loss": 0.2157, "step": 36475 }, { "epoch": 0.6340454379530324, "grad_norm": 1.6471669224966279, "learning_rate": 3.119765804458715e-07, "loss": 0.2969, "step": 36476 }, { "epoch": 0.6340628204905352, "grad_norm": 1.9617165882672174, "learning_rate": 3.1195049754997223e-07, "loss": 0.1758, "step": 36477 }, { "epoch": 0.634080203028038, "grad_norm": 1.4123310083419383, "learning_rate": 3.119244152501033e-07, "loss": 0.1819, "step": 36478 }, { "epoch": 0.6340975855655409, "grad_norm": 2.0475828396858082, "learning_rate": 3.118983335463477e-07, "loss": 0.2081, "step": 36479 }, { "epoch": 0.6341149681030437, "grad_norm": 1.3051444277040452, "learning_rate": 3.11872252438788e-07, "loss": 0.1597, "step": 36480 }, { "epoch": 0.6341323506405465, "grad_norm": 1.7222696609863837, "learning_rate": 3.1184617192750696e-07, "loss": 0.2268, "step": 36481 }, { "epoch": 0.6341497331780493, "grad_norm": 1.017952346655201, "learning_rate": 3.1182009201258687e-07, "loss": 0.1413, "step": 36482 }, { "epoch": 0.6341671157155522, "grad_norm": 1.3803715955613678, "learning_rate": 3.1179401269411074e-07, "loss": 0.1611, "step": 36483 }, { "epoch": 0.634184498253055, "grad_norm": 1.6125257216588211, "learning_rate": 3.1176793397216106e-07, "loss": 0.2094, "step": 36484 }, { "epoch": 0.6342018807905578, "grad_norm": 1.0957972757805925, "learning_rate": 3.117418558468207e-07, "loss": 0.2475, "step": 36485 }, { "epoch": 0.6342192633280607, "grad_norm": 1.425723278436096, "learning_rate": 3.1171577831817196e-07, "loss": 0.2069, "step": 36486 }, { "epoch": 0.6342366458655635, "grad_norm": 1.6556889922305038, "learning_rate": 3.1168970138629814e-07, "loss": 0.2851, "step": 36487 }, { "epoch": 0.6342540284030663, "grad_norm": 1.1097473649275975, "learning_rate": 3.1166362505128106e-07, "loss": 0.1656, "step": 36488 }, { "epoch": 0.6342714109405692, "grad_norm": 2.382262144725482, "learning_rate": 3.1163754931320394e-07, "loss": 0.2171, "step": 36489 }, { "epoch": 0.6342887934780719, "grad_norm": 1.5063559861007862, "learning_rate": 3.1161147417214915e-07, "loss": 0.1687, "step": 36490 }, { "epoch": 0.6343061760155747, "grad_norm": 1.4309844300186922, "learning_rate": 3.1158539962819954e-07, "loss": 0.1775, "step": 36491 }, { "epoch": 0.6343235585530775, "grad_norm": 1.5112074474406203, "learning_rate": 3.115593256814376e-07, "loss": 0.1525, "step": 36492 }, { "epoch": 0.6343409410905804, "grad_norm": 1.104855473144029, "learning_rate": 3.1153325233194615e-07, "loss": 0.1745, "step": 36493 }, { "epoch": 0.6343583236280832, "grad_norm": 1.8281201071571642, "learning_rate": 3.115071795798077e-07, "loss": 0.1918, "step": 36494 }, { "epoch": 0.634375706165586, "grad_norm": 1.571369528396991, "learning_rate": 3.114811074251049e-07, "loss": 0.2043, "step": 36495 }, { "epoch": 0.6343930887030889, "grad_norm": 1.8158035190901949, "learning_rate": 3.114550358679202e-07, "loss": 0.159, "step": 36496 }, { "epoch": 0.6344104712405917, "grad_norm": 2.1416477702854846, "learning_rate": 3.1142896490833667e-07, "loss": 0.3322, "step": 36497 }, { "epoch": 0.6344278537780945, "grad_norm": 2.1469729131872572, "learning_rate": 3.114028945464365e-07, "loss": 0.1691, "step": 36498 }, { "epoch": 0.6344452363155973, "grad_norm": 2.0208120253213098, "learning_rate": 3.1137682478230274e-07, "loss": 0.4218, "step": 36499 }, { "epoch": 0.6344626188531002, "grad_norm": 1.7128927923886499, "learning_rate": 3.1135075561601766e-07, "loss": 0.2122, "step": 36500 }, { "epoch": 0.634480001390603, "grad_norm": 1.5828073256735615, "learning_rate": 3.11324687047664e-07, "loss": 0.1593, "step": 36501 }, { "epoch": 0.6344973839281058, "grad_norm": 1.7554648659995242, "learning_rate": 3.112986190773245e-07, "loss": 0.2017, "step": 36502 }, { "epoch": 0.6345147664656087, "grad_norm": 1.1728526326508664, "learning_rate": 3.1127255170508155e-07, "loss": 0.1567, "step": 36503 }, { "epoch": 0.6345321490031115, "grad_norm": 1.001474301003879, "learning_rate": 3.11246484931018e-07, "loss": 0.1119, "step": 36504 }, { "epoch": 0.6345495315406143, "grad_norm": 1.5803698243714441, "learning_rate": 3.112204187552164e-07, "loss": 0.2354, "step": 36505 }, { "epoch": 0.6345669140781172, "grad_norm": 1.399599342396686, "learning_rate": 3.1119435317775954e-07, "loss": 0.2963, "step": 36506 }, { "epoch": 0.63458429661562, "grad_norm": 2.535834135085975, "learning_rate": 3.1116828819872953e-07, "loss": 0.1806, "step": 36507 }, { "epoch": 0.6346016791531228, "grad_norm": 1.145281095771372, "learning_rate": 3.1114222381820956e-07, "loss": 0.1794, "step": 36508 }, { "epoch": 0.6346190616906257, "grad_norm": 1.7411183110930417, "learning_rate": 3.111161600362817e-07, "loss": 0.1322, "step": 36509 }, { "epoch": 0.6346364442281284, "grad_norm": 1.7767270694226034, "learning_rate": 3.110900968530291e-07, "loss": 0.1676, "step": 36510 }, { "epoch": 0.6346538267656312, "grad_norm": 1.3658265016640454, "learning_rate": 3.110640342685339e-07, "loss": 0.1758, "step": 36511 }, { "epoch": 0.634671209303134, "grad_norm": 1.468224470993867, "learning_rate": 3.110379722828793e-07, "loss": 0.1282, "step": 36512 }, { "epoch": 0.6346885918406369, "grad_norm": 3.0698922302567437, "learning_rate": 3.110119108961472e-07, "loss": 0.2743, "step": 36513 }, { "epoch": 0.6347059743781397, "grad_norm": 1.71378924887673, "learning_rate": 3.109858501084206e-07, "loss": 0.279, "step": 36514 }, { "epoch": 0.6347233569156425, "grad_norm": 1.3446891555084095, "learning_rate": 3.10959789919782e-07, "loss": 0.1924, "step": 36515 }, { "epoch": 0.6347407394531454, "grad_norm": 1.232171830229934, "learning_rate": 3.109337303303141e-07, "loss": 0.1598, "step": 36516 }, { "epoch": 0.6347581219906482, "grad_norm": 1.6004073730182986, "learning_rate": 3.109076713400993e-07, "loss": 0.2216, "step": 36517 }, { "epoch": 0.634775504528151, "grad_norm": 1.0099477830791526, "learning_rate": 3.108816129492205e-07, "loss": 0.1792, "step": 36518 }, { "epoch": 0.6347928870656538, "grad_norm": 0.8996049303989543, "learning_rate": 3.108555551577601e-07, "loss": 0.1476, "step": 36519 }, { "epoch": 0.6348102696031567, "grad_norm": 1.0550544566143787, "learning_rate": 3.108294979658006e-07, "loss": 0.2968, "step": 36520 }, { "epoch": 0.6348276521406595, "grad_norm": 1.144750387722576, "learning_rate": 3.1080344137342463e-07, "loss": 0.1804, "step": 36521 }, { "epoch": 0.6348450346781623, "grad_norm": 1.8659592451040836, "learning_rate": 3.1077738538071497e-07, "loss": 0.1445, "step": 36522 }, { "epoch": 0.6348624172156652, "grad_norm": 1.7180032151056546, "learning_rate": 3.107513299877539e-07, "loss": 0.1489, "step": 36523 }, { "epoch": 0.634879799753168, "grad_norm": 2.7845625140942785, "learning_rate": 3.107252751946244e-07, "loss": 0.2488, "step": 36524 }, { "epoch": 0.6348971822906708, "grad_norm": 1.4765337938514855, "learning_rate": 3.106992210014089e-07, "loss": 0.2361, "step": 36525 }, { "epoch": 0.6349145648281737, "grad_norm": 1.3222467999944976, "learning_rate": 3.106731674081896e-07, "loss": 0.1594, "step": 36526 }, { "epoch": 0.6349319473656765, "grad_norm": 2.040557504336252, "learning_rate": 3.106471144150496e-07, "loss": 0.2101, "step": 36527 }, { "epoch": 0.6349493299031793, "grad_norm": 1.8468473903660798, "learning_rate": 3.1062106202207124e-07, "loss": 0.3324, "step": 36528 }, { "epoch": 0.6349667124406821, "grad_norm": 2.011097700434172, "learning_rate": 3.1059501022933697e-07, "loss": 0.2216, "step": 36529 }, { "epoch": 0.6349840949781849, "grad_norm": 2.268418671870215, "learning_rate": 3.1056895903692973e-07, "loss": 0.2527, "step": 36530 }, { "epoch": 0.6350014775156877, "grad_norm": 2.1520617142263263, "learning_rate": 3.1054290844493184e-07, "loss": 0.1535, "step": 36531 }, { "epoch": 0.6350188600531905, "grad_norm": 1.6471119331908222, "learning_rate": 3.105168584534258e-07, "loss": 0.1753, "step": 36532 }, { "epoch": 0.6350362425906934, "grad_norm": 1.2463602064679216, "learning_rate": 3.1049080906249435e-07, "loss": 0.3319, "step": 36533 }, { "epoch": 0.6350536251281962, "grad_norm": 2.4320570183353563, "learning_rate": 3.1046476027221987e-07, "loss": 0.2762, "step": 36534 }, { "epoch": 0.635071007665699, "grad_norm": 1.72893055378666, "learning_rate": 3.1043871208268514e-07, "loss": 0.1853, "step": 36535 }, { "epoch": 0.6350883902032018, "grad_norm": 1.3361533418063856, "learning_rate": 3.104126644939726e-07, "loss": 0.1682, "step": 36536 }, { "epoch": 0.6351057727407047, "grad_norm": 1.8490401962626637, "learning_rate": 3.103866175061649e-07, "loss": 0.3179, "step": 36537 }, { "epoch": 0.6351231552782075, "grad_norm": 1.382819613371348, "learning_rate": 3.103605711193444e-07, "loss": 0.218, "step": 36538 }, { "epoch": 0.6351405378157103, "grad_norm": 1.4456818809200767, "learning_rate": 3.103345253335938e-07, "loss": 0.2618, "step": 36539 }, { "epoch": 0.6351579203532132, "grad_norm": 0.9056000063466332, "learning_rate": 3.1030848014899555e-07, "loss": 0.1382, "step": 36540 }, { "epoch": 0.635175302890716, "grad_norm": 1.307432297326183, "learning_rate": 3.102824355656324e-07, "loss": 0.1279, "step": 36541 }, { "epoch": 0.6351926854282188, "grad_norm": 1.627770319550074, "learning_rate": 3.102563915835867e-07, "loss": 0.1374, "step": 36542 }, { "epoch": 0.6352100679657217, "grad_norm": 1.837987963651882, "learning_rate": 3.1023034820294114e-07, "loss": 0.2686, "step": 36543 }, { "epoch": 0.6352274505032245, "grad_norm": 1.7804150229743572, "learning_rate": 3.1020430542377827e-07, "loss": 0.2029, "step": 36544 }, { "epoch": 0.6352448330407273, "grad_norm": 1.181860926199325, "learning_rate": 3.101782632461806e-07, "loss": 0.1145, "step": 36545 }, { "epoch": 0.6352622155782301, "grad_norm": 1.3262341812059601, "learning_rate": 3.101522216702304e-07, "loss": 0.1805, "step": 36546 }, { "epoch": 0.635279598115733, "grad_norm": 1.4847490201582105, "learning_rate": 3.101261806960106e-07, "loss": 0.1788, "step": 36547 }, { "epoch": 0.6352969806532358, "grad_norm": 2.404304546882798, "learning_rate": 3.1010014032360347e-07, "loss": 0.1869, "step": 36548 }, { "epoch": 0.6353143631907385, "grad_norm": 1.4557070342895697, "learning_rate": 3.100741005530918e-07, "loss": 0.1978, "step": 36549 }, { "epoch": 0.6353317457282414, "grad_norm": 0.9487088712824452, "learning_rate": 3.100480613845581e-07, "loss": 0.1327, "step": 36550 }, { "epoch": 0.6353491282657442, "grad_norm": 1.7220219701879904, "learning_rate": 3.1002202281808453e-07, "loss": 0.1615, "step": 36551 }, { "epoch": 0.635366510803247, "grad_norm": 3.2106261895070065, "learning_rate": 3.09995984853754e-07, "loss": 0.2499, "step": 36552 }, { "epoch": 0.6353838933407499, "grad_norm": 1.4585855575407167, "learning_rate": 3.099699474916489e-07, "loss": 0.1796, "step": 36553 }, { "epoch": 0.6354012758782527, "grad_norm": 2.1795388466828216, "learning_rate": 3.099439107318517e-07, "loss": 0.3023, "step": 36554 }, { "epoch": 0.6354186584157555, "grad_norm": 2.6821194776057253, "learning_rate": 3.0991787457444507e-07, "loss": 0.2085, "step": 36555 }, { "epoch": 0.6354360409532583, "grad_norm": 1.5692899521735775, "learning_rate": 3.0989183901951154e-07, "loss": 0.1498, "step": 36556 }, { "epoch": 0.6354534234907612, "grad_norm": 1.4623565061178139, "learning_rate": 3.098658040671335e-07, "loss": 0.1564, "step": 36557 }, { "epoch": 0.635470806028264, "grad_norm": 2.93445191581535, "learning_rate": 3.098397697173934e-07, "loss": 0.2583, "step": 36558 }, { "epoch": 0.6354881885657668, "grad_norm": 1.0714126170329323, "learning_rate": 3.0981373597037396e-07, "loss": 0.1886, "step": 36559 }, { "epoch": 0.6355055711032697, "grad_norm": 2.251285609159622, "learning_rate": 3.097877028261576e-07, "loss": 0.2082, "step": 36560 }, { "epoch": 0.6355229536407725, "grad_norm": 1.8635885768601506, "learning_rate": 3.097616702848269e-07, "loss": 0.1428, "step": 36561 }, { "epoch": 0.6355403361782753, "grad_norm": 3.3759549808323475, "learning_rate": 3.097356383464642e-07, "loss": 0.1808, "step": 36562 }, { "epoch": 0.6355577187157782, "grad_norm": 2.289862719489886, "learning_rate": 3.0970960701115234e-07, "loss": 0.1911, "step": 36563 }, { "epoch": 0.635575101253281, "grad_norm": 1.4806435544310317, "learning_rate": 3.0968357627897354e-07, "loss": 0.2303, "step": 36564 }, { "epoch": 0.6355924837907838, "grad_norm": 1.539737679882798, "learning_rate": 3.096575461500102e-07, "loss": 0.153, "step": 36565 }, { "epoch": 0.6356098663282866, "grad_norm": 1.265243811133697, "learning_rate": 3.096315166243452e-07, "loss": 0.2093, "step": 36566 }, { "epoch": 0.6356272488657895, "grad_norm": 2.0964607636853336, "learning_rate": 3.0960548770206063e-07, "loss": 0.2104, "step": 36567 }, { "epoch": 0.6356446314032923, "grad_norm": 2.0213397941639646, "learning_rate": 3.0957945938323936e-07, "loss": 0.3097, "step": 36568 }, { "epoch": 0.635662013940795, "grad_norm": 1.5322054536077443, "learning_rate": 3.095534316679639e-07, "loss": 0.2564, "step": 36569 }, { "epoch": 0.6356793964782979, "grad_norm": 1.76270795832716, "learning_rate": 3.0952740455631636e-07, "loss": 0.2198, "step": 36570 }, { "epoch": 0.6356967790158007, "grad_norm": 0.9646310612276542, "learning_rate": 3.095013780483794e-07, "loss": 0.1246, "step": 36571 }, { "epoch": 0.6357141615533035, "grad_norm": 1.385983423451174, "learning_rate": 3.094753521442357e-07, "loss": 0.2313, "step": 36572 }, { "epoch": 0.6357315440908063, "grad_norm": 1.5854634663397322, "learning_rate": 3.0944932684396753e-07, "loss": 0.1548, "step": 36573 }, { "epoch": 0.6357489266283092, "grad_norm": 2.155415310088136, "learning_rate": 3.094233021476574e-07, "loss": 0.1723, "step": 36574 }, { "epoch": 0.635766309165812, "grad_norm": 2.576803555321481, "learning_rate": 3.093972780553882e-07, "loss": 0.2853, "step": 36575 }, { "epoch": 0.6357836917033148, "grad_norm": 2.2921743674728132, "learning_rate": 3.093712545672418e-07, "loss": 0.2796, "step": 36576 }, { "epoch": 0.6358010742408177, "grad_norm": 1.214268860806744, "learning_rate": 3.093452316833009e-07, "loss": 0.2029, "step": 36577 }, { "epoch": 0.6358184567783205, "grad_norm": 1.7001770311211606, "learning_rate": 3.0931920940364816e-07, "loss": 0.1721, "step": 36578 }, { "epoch": 0.6358358393158233, "grad_norm": 2.0074460637564755, "learning_rate": 3.0929318772836575e-07, "loss": 0.1971, "step": 36579 }, { "epoch": 0.6358532218533262, "grad_norm": 1.5628399887311575, "learning_rate": 3.092671666575365e-07, "loss": 0.2534, "step": 36580 }, { "epoch": 0.635870604390829, "grad_norm": 1.555144036660351, "learning_rate": 3.0924114619124263e-07, "loss": 0.206, "step": 36581 }, { "epoch": 0.6358879869283318, "grad_norm": 2.086625243976715, "learning_rate": 3.0921512632956693e-07, "loss": 0.1928, "step": 36582 }, { "epoch": 0.6359053694658346, "grad_norm": 1.8454447926551247, "learning_rate": 3.0918910707259143e-07, "loss": 0.2503, "step": 36583 }, { "epoch": 0.6359227520033375, "grad_norm": 1.3020156137667127, "learning_rate": 3.0916308842039884e-07, "loss": 0.2507, "step": 36584 }, { "epoch": 0.6359401345408403, "grad_norm": 1.2552316095378644, "learning_rate": 3.0913707037307157e-07, "loss": 0.1218, "step": 36585 }, { "epoch": 0.6359575170783431, "grad_norm": 2.395118923476792, "learning_rate": 3.0911105293069217e-07, "loss": 0.2218, "step": 36586 }, { "epoch": 0.635974899615846, "grad_norm": 1.174865919406688, "learning_rate": 3.090850360933429e-07, "loss": 0.1156, "step": 36587 }, { "epoch": 0.6359922821533488, "grad_norm": 1.16953242531835, "learning_rate": 3.090590198611066e-07, "loss": 0.1398, "step": 36588 }, { "epoch": 0.6360096646908515, "grad_norm": 1.9848805345307923, "learning_rate": 3.0903300423406543e-07, "loss": 0.1733, "step": 36589 }, { "epoch": 0.6360270472283543, "grad_norm": 2.0561591648117687, "learning_rate": 3.0900698921230184e-07, "loss": 0.2139, "step": 36590 }, { "epoch": 0.6360444297658572, "grad_norm": 3.575118482097141, "learning_rate": 3.0898097479589835e-07, "loss": 0.2392, "step": 36591 }, { "epoch": 0.63606181230336, "grad_norm": 1.630772315253995, "learning_rate": 3.089549609849374e-07, "loss": 0.3379, "step": 36592 }, { "epoch": 0.6360791948408628, "grad_norm": 1.3718441772536871, "learning_rate": 3.089289477795015e-07, "loss": 0.2199, "step": 36593 }, { "epoch": 0.6360965773783657, "grad_norm": 1.5488324173865728, "learning_rate": 3.0890293517967324e-07, "loss": 0.2377, "step": 36594 }, { "epoch": 0.6361139599158685, "grad_norm": 1.646299949304427, "learning_rate": 3.088769231855348e-07, "loss": 0.2047, "step": 36595 }, { "epoch": 0.6361313424533713, "grad_norm": 2.0949632722971883, "learning_rate": 3.0885091179716853e-07, "loss": 0.4961, "step": 36596 }, { "epoch": 0.6361487249908742, "grad_norm": 1.4185661370024758, "learning_rate": 3.0882490101465724e-07, "loss": 0.1914, "step": 36597 }, { "epoch": 0.636166107528377, "grad_norm": 2.038762795694401, "learning_rate": 3.087988908380831e-07, "loss": 0.1533, "step": 36598 }, { "epoch": 0.6361834900658798, "grad_norm": 1.1814837187335097, "learning_rate": 3.0877288126752863e-07, "loss": 0.1915, "step": 36599 }, { "epoch": 0.6362008726033827, "grad_norm": 1.3195663101785657, "learning_rate": 3.0874687230307627e-07, "loss": 0.162, "step": 36600 }, { "epoch": 0.6362182551408855, "grad_norm": 1.5349481965744576, "learning_rate": 3.087208639448088e-07, "loss": 0.1322, "step": 36601 }, { "epoch": 0.6362356376783883, "grad_norm": 1.1355677584009625, "learning_rate": 3.0869485619280797e-07, "loss": 0.1976, "step": 36602 }, { "epoch": 0.6362530202158911, "grad_norm": 2.752140973182041, "learning_rate": 3.0866884904715674e-07, "loss": 0.2278, "step": 36603 }, { "epoch": 0.636270402753394, "grad_norm": 0.9077723203980039, "learning_rate": 3.086428425079372e-07, "loss": 0.146, "step": 36604 }, { "epoch": 0.6362877852908968, "grad_norm": 1.8992138638504623, "learning_rate": 3.0861683657523207e-07, "loss": 0.126, "step": 36605 }, { "epoch": 0.6363051678283996, "grad_norm": 1.87008592312593, "learning_rate": 3.0859083124912354e-07, "loss": 0.1435, "step": 36606 }, { "epoch": 0.6363225503659025, "grad_norm": 1.7806788385049894, "learning_rate": 3.085648265296944e-07, "loss": 0.2767, "step": 36607 }, { "epoch": 0.6363399329034053, "grad_norm": 1.9509099851915466, "learning_rate": 3.0853882241702666e-07, "loss": 0.2174, "step": 36608 }, { "epoch": 0.636357315440908, "grad_norm": 1.5972568766270228, "learning_rate": 3.0851281891120295e-07, "loss": 0.1853, "step": 36609 }, { "epoch": 0.6363746979784108, "grad_norm": 1.2061164738775505, "learning_rate": 3.0848681601230545e-07, "loss": 0.1968, "step": 36610 }, { "epoch": 0.6363920805159137, "grad_norm": 2.981935759560094, "learning_rate": 3.08460813720417e-07, "loss": 0.2004, "step": 36611 }, { "epoch": 0.6364094630534165, "grad_norm": 1.4659620106555544, "learning_rate": 3.084348120356196e-07, "loss": 0.3206, "step": 36612 }, { "epoch": 0.6364268455909193, "grad_norm": 0.967139998436625, "learning_rate": 3.0840881095799603e-07, "loss": 0.2543, "step": 36613 }, { "epoch": 0.6364442281284222, "grad_norm": 1.0506196022546377, "learning_rate": 3.083828104876285e-07, "loss": 0.18, "step": 36614 }, { "epoch": 0.636461610665925, "grad_norm": 1.1560981099711138, "learning_rate": 3.083568106245993e-07, "loss": 0.1859, "step": 36615 }, { "epoch": 0.6364789932034278, "grad_norm": 4.887068232873529, "learning_rate": 3.0833081136899104e-07, "loss": 0.2606, "step": 36616 }, { "epoch": 0.6364963757409307, "grad_norm": 1.8674763187885928, "learning_rate": 3.083048127208861e-07, "loss": 0.2165, "step": 36617 }, { "epoch": 0.6365137582784335, "grad_norm": 0.8996644695274308, "learning_rate": 3.0827881468036676e-07, "loss": 0.293, "step": 36618 }, { "epoch": 0.6365311408159363, "grad_norm": 1.3702522626397826, "learning_rate": 3.0825281724751565e-07, "loss": 0.1672, "step": 36619 }, { "epoch": 0.6365485233534391, "grad_norm": 2.033473850302611, "learning_rate": 3.0822682042241497e-07, "loss": 0.2563, "step": 36620 }, { "epoch": 0.636565905890942, "grad_norm": 1.8458669829248064, "learning_rate": 3.082008242051471e-07, "loss": 0.2439, "step": 36621 }, { "epoch": 0.6365832884284448, "grad_norm": 0.9464646733305334, "learning_rate": 3.081748285957946e-07, "loss": 0.155, "step": 36622 }, { "epoch": 0.6366006709659476, "grad_norm": 1.0108099022803823, "learning_rate": 3.0814883359443964e-07, "loss": 0.1306, "step": 36623 }, { "epoch": 0.6366180535034505, "grad_norm": 1.425277872138052, "learning_rate": 3.081228392011649e-07, "loss": 0.2369, "step": 36624 }, { "epoch": 0.6366354360409533, "grad_norm": 1.6740434699803484, "learning_rate": 3.0809684541605253e-07, "loss": 0.2304, "step": 36625 }, { "epoch": 0.6366528185784561, "grad_norm": 1.3937123604919905, "learning_rate": 3.0807085223918516e-07, "loss": 0.2865, "step": 36626 }, { "epoch": 0.636670201115959, "grad_norm": 2.0437996341198774, "learning_rate": 3.0804485967064484e-07, "loss": 0.2179, "step": 36627 }, { "epoch": 0.6366875836534618, "grad_norm": 1.683007472272149, "learning_rate": 3.080188677105142e-07, "loss": 0.1282, "step": 36628 }, { "epoch": 0.6367049661909645, "grad_norm": 1.8299158012328216, "learning_rate": 3.079928763588755e-07, "loss": 0.2462, "step": 36629 }, { "epoch": 0.6367223487284673, "grad_norm": 1.514345872805723, "learning_rate": 3.079668856158113e-07, "loss": 0.1406, "step": 36630 }, { "epoch": 0.6367397312659702, "grad_norm": 1.4523522171273582, "learning_rate": 3.0794089548140367e-07, "loss": 0.1555, "step": 36631 }, { "epoch": 0.636757113803473, "grad_norm": 0.7765835695514001, "learning_rate": 3.0791490595573556e-07, "loss": 0.2378, "step": 36632 }, { "epoch": 0.6367744963409758, "grad_norm": 1.591318554066989, "learning_rate": 3.0788891703888863e-07, "loss": 0.2814, "step": 36633 }, { "epoch": 0.6367918788784787, "grad_norm": 1.9333110845229524, "learning_rate": 3.0786292873094564e-07, "loss": 0.2356, "step": 36634 }, { "epoch": 0.6368092614159815, "grad_norm": 2.0552403738447995, "learning_rate": 3.078369410319889e-07, "loss": 0.3241, "step": 36635 }, { "epoch": 0.6368266439534843, "grad_norm": 1.6406926053032043, "learning_rate": 3.0781095394210087e-07, "loss": 0.212, "step": 36636 }, { "epoch": 0.6368440264909871, "grad_norm": 1.5833407632284937, "learning_rate": 3.077849674613637e-07, "loss": 0.2314, "step": 36637 }, { "epoch": 0.63686140902849, "grad_norm": 1.6288798510251856, "learning_rate": 3.077589815898601e-07, "loss": 0.1872, "step": 36638 }, { "epoch": 0.6368787915659928, "grad_norm": 1.6482713002543357, "learning_rate": 3.077329963276721e-07, "loss": 0.1573, "step": 36639 }, { "epoch": 0.6368961741034956, "grad_norm": 0.9965539130684717, "learning_rate": 3.077070116748821e-07, "loss": 0.2735, "step": 36640 }, { "epoch": 0.6369135566409985, "grad_norm": 1.3535557981144986, "learning_rate": 3.076810276315726e-07, "loss": 0.2156, "step": 36641 }, { "epoch": 0.6369309391785013, "grad_norm": 2.2653926474117103, "learning_rate": 3.0765504419782593e-07, "loss": 0.2934, "step": 36642 }, { "epoch": 0.6369483217160041, "grad_norm": 1.1186383730175218, "learning_rate": 3.076290613737243e-07, "loss": 0.1861, "step": 36643 }, { "epoch": 0.636965704253507, "grad_norm": 2.8107347328398924, "learning_rate": 3.076030791593503e-07, "loss": 0.1564, "step": 36644 }, { "epoch": 0.6369830867910098, "grad_norm": 0.9880954878477293, "learning_rate": 3.075770975547862e-07, "loss": 0.1335, "step": 36645 }, { "epoch": 0.6370004693285126, "grad_norm": 1.5872039169327268, "learning_rate": 3.0755111656011424e-07, "loss": 0.2368, "step": 36646 }, { "epoch": 0.6370178518660154, "grad_norm": 1.143247982650883, "learning_rate": 3.075251361754168e-07, "loss": 0.1633, "step": 36647 }, { "epoch": 0.6370352344035183, "grad_norm": 1.3290959927046342, "learning_rate": 3.074991564007762e-07, "loss": 0.2718, "step": 36648 }, { "epoch": 0.637052616941021, "grad_norm": 2.4430092384807502, "learning_rate": 3.07473177236275e-07, "loss": 0.1956, "step": 36649 }, { "epoch": 0.6370699994785238, "grad_norm": 1.7608541496080403, "learning_rate": 3.074471986819953e-07, "loss": 0.2024, "step": 36650 }, { "epoch": 0.6370873820160267, "grad_norm": 2.261774810587334, "learning_rate": 3.0742122073801966e-07, "loss": 0.2874, "step": 36651 }, { "epoch": 0.6371047645535295, "grad_norm": 1.5807185532267336, "learning_rate": 3.0739524340443014e-07, "loss": 0.1839, "step": 36652 }, { "epoch": 0.6371221470910323, "grad_norm": 2.373222325444834, "learning_rate": 3.073692666813093e-07, "loss": 0.2263, "step": 36653 }, { "epoch": 0.6371395296285352, "grad_norm": 1.2610790920216592, "learning_rate": 3.073432905687393e-07, "loss": 0.2587, "step": 36654 }, { "epoch": 0.637156912166038, "grad_norm": 2.6437809734744606, "learning_rate": 3.0731731506680266e-07, "loss": 0.1999, "step": 36655 }, { "epoch": 0.6371742947035408, "grad_norm": 1.2706877634622589, "learning_rate": 3.072913401755815e-07, "loss": 0.1915, "step": 36656 }, { "epoch": 0.6371916772410436, "grad_norm": 1.843992110144627, "learning_rate": 3.072653658951586e-07, "loss": 0.2273, "step": 36657 }, { "epoch": 0.6372090597785465, "grad_norm": 2.800030031375725, "learning_rate": 3.072393922256157e-07, "loss": 0.3493, "step": 36658 }, { "epoch": 0.6372264423160493, "grad_norm": 2.3339627132369465, "learning_rate": 3.0721341916703534e-07, "loss": 0.1744, "step": 36659 }, { "epoch": 0.6372438248535521, "grad_norm": 1.3230259561419773, "learning_rate": 3.0718744671949984e-07, "loss": 0.2764, "step": 36660 }, { "epoch": 0.637261207391055, "grad_norm": 1.4987742806843805, "learning_rate": 3.0716147488309173e-07, "loss": 0.2086, "step": 36661 }, { "epoch": 0.6372785899285578, "grad_norm": 6.581958244972781, "learning_rate": 3.07135503657893e-07, "loss": 0.28, "step": 36662 }, { "epoch": 0.6372959724660606, "grad_norm": 2.906570696161078, "learning_rate": 3.071095330439862e-07, "loss": 0.2791, "step": 36663 }, { "epoch": 0.6373133550035635, "grad_norm": 1.5088777544864258, "learning_rate": 3.0708356304145377e-07, "loss": 0.336, "step": 36664 }, { "epoch": 0.6373307375410663, "grad_norm": 1.4714972376431665, "learning_rate": 3.0705759365037763e-07, "loss": 0.2033, "step": 36665 }, { "epoch": 0.6373481200785691, "grad_norm": 0.9033734366859449, "learning_rate": 3.0703162487084027e-07, "loss": 0.1792, "step": 36666 }, { "epoch": 0.6373655026160719, "grad_norm": 1.6676659107302982, "learning_rate": 3.070056567029241e-07, "loss": 0.2048, "step": 36667 }, { "epoch": 0.6373828851535748, "grad_norm": 2.122202866976606, "learning_rate": 3.0697968914671124e-07, "loss": 0.2173, "step": 36668 }, { "epoch": 0.6374002676910775, "grad_norm": 1.2082464435046378, "learning_rate": 3.0695372220228415e-07, "loss": 0.1739, "step": 36669 }, { "epoch": 0.6374176502285803, "grad_norm": 1.6217945059159085, "learning_rate": 3.069277558697252e-07, "loss": 0.3315, "step": 36670 }, { "epoch": 0.6374350327660832, "grad_norm": 2.007796528848477, "learning_rate": 3.0690179014911645e-07, "loss": 0.2001, "step": 36671 }, { "epoch": 0.637452415303586, "grad_norm": 1.5283862862128257, "learning_rate": 3.068758250405403e-07, "loss": 0.2654, "step": 36672 }, { "epoch": 0.6374697978410888, "grad_norm": 2.0938963428233333, "learning_rate": 3.068498605440791e-07, "loss": 0.2058, "step": 36673 }, { "epoch": 0.6374871803785916, "grad_norm": 1.3954637787769713, "learning_rate": 3.068238966598151e-07, "loss": 0.1861, "step": 36674 }, { "epoch": 0.6375045629160945, "grad_norm": 1.8433130570087222, "learning_rate": 3.0679793338783065e-07, "loss": 0.1844, "step": 36675 }, { "epoch": 0.6375219454535973, "grad_norm": 1.4863105534177372, "learning_rate": 3.0677197072820813e-07, "loss": 0.2097, "step": 36676 }, { "epoch": 0.6375393279911001, "grad_norm": 1.270820845718353, "learning_rate": 3.067460086810295e-07, "loss": 0.1255, "step": 36677 }, { "epoch": 0.637556710528603, "grad_norm": 1.466214303593893, "learning_rate": 3.0672004724637737e-07, "loss": 0.2556, "step": 36678 }, { "epoch": 0.6375740930661058, "grad_norm": 1.1634235819609053, "learning_rate": 3.066940864243337e-07, "loss": 0.1878, "step": 36679 }, { "epoch": 0.6375914756036086, "grad_norm": 1.7170717187752456, "learning_rate": 3.0666812621498115e-07, "loss": 0.2825, "step": 36680 }, { "epoch": 0.6376088581411115, "grad_norm": 2.165361084808303, "learning_rate": 3.0664216661840183e-07, "loss": 0.2124, "step": 36681 }, { "epoch": 0.6376262406786143, "grad_norm": 1.5267557682879014, "learning_rate": 3.066162076346779e-07, "loss": 0.1593, "step": 36682 }, { "epoch": 0.6376436232161171, "grad_norm": 1.8970556593732142, "learning_rate": 3.06590249263892e-07, "loss": 0.1616, "step": 36683 }, { "epoch": 0.63766100575362, "grad_norm": 1.4740384663055044, "learning_rate": 3.065642915061261e-07, "loss": 0.3985, "step": 36684 }, { "epoch": 0.6376783882911228, "grad_norm": 1.3032736491673351, "learning_rate": 3.065383343614623e-07, "loss": 0.2379, "step": 36685 }, { "epoch": 0.6376957708286256, "grad_norm": 1.6055699948874746, "learning_rate": 3.065123778299833e-07, "loss": 0.2446, "step": 36686 }, { "epoch": 0.6377131533661284, "grad_norm": 3.0967419815091235, "learning_rate": 3.064864219117711e-07, "loss": 0.4268, "step": 36687 }, { "epoch": 0.6377305359036312, "grad_norm": 1.3612946844373595, "learning_rate": 3.0646046660690806e-07, "loss": 0.2362, "step": 36688 }, { "epoch": 0.637747918441134, "grad_norm": 1.6665332315319175, "learning_rate": 3.0643451191547663e-07, "loss": 0.236, "step": 36689 }, { "epoch": 0.6377653009786368, "grad_norm": 1.752201466955381, "learning_rate": 3.064085578375587e-07, "loss": 0.1979, "step": 36690 }, { "epoch": 0.6377826835161396, "grad_norm": 0.6757481059386526, "learning_rate": 3.063826043732367e-07, "loss": 0.0831, "step": 36691 }, { "epoch": 0.6378000660536425, "grad_norm": 1.525977321883858, "learning_rate": 3.0635665152259294e-07, "loss": 0.2153, "step": 36692 }, { "epoch": 0.6378174485911453, "grad_norm": 2.016634498198044, "learning_rate": 3.063306992857095e-07, "loss": 0.2113, "step": 36693 }, { "epoch": 0.6378348311286481, "grad_norm": 2.130000400112868, "learning_rate": 3.06304747662669e-07, "loss": 0.1604, "step": 36694 }, { "epoch": 0.637852213666151, "grad_norm": 1.471509292392154, "learning_rate": 3.0627879665355356e-07, "loss": 0.2205, "step": 36695 }, { "epoch": 0.6378695962036538, "grad_norm": 1.844801431088925, "learning_rate": 3.0625284625844503e-07, "loss": 0.2533, "step": 36696 }, { "epoch": 0.6378869787411566, "grad_norm": 0.9721785308073331, "learning_rate": 3.062268964774262e-07, "loss": 0.1272, "step": 36697 }, { "epoch": 0.6379043612786595, "grad_norm": 2.093601023208684, "learning_rate": 3.0620094731057905e-07, "loss": 0.2238, "step": 36698 }, { "epoch": 0.6379217438161623, "grad_norm": 1.3138363069765222, "learning_rate": 3.061749987579858e-07, "loss": 0.2302, "step": 36699 }, { "epoch": 0.6379391263536651, "grad_norm": 2.9716836664182926, "learning_rate": 3.061490508197288e-07, "loss": 0.2432, "step": 36700 }, { "epoch": 0.637956508891168, "grad_norm": 1.0680171358752886, "learning_rate": 3.061231034958902e-07, "loss": 0.2244, "step": 36701 }, { "epoch": 0.6379738914286708, "grad_norm": 1.2648836639242325, "learning_rate": 3.060971567865525e-07, "loss": 0.2589, "step": 36702 }, { "epoch": 0.6379912739661736, "grad_norm": 1.906041399657915, "learning_rate": 3.060712106917976e-07, "loss": 0.2237, "step": 36703 }, { "epoch": 0.6380086565036764, "grad_norm": 2.1939506543391887, "learning_rate": 3.060452652117079e-07, "loss": 0.1507, "step": 36704 }, { "epoch": 0.6380260390411793, "grad_norm": 2.022698651852388, "learning_rate": 3.060193203463656e-07, "loss": 0.2635, "step": 36705 }, { "epoch": 0.6380434215786821, "grad_norm": 1.5233950306986017, "learning_rate": 3.0599337609585297e-07, "loss": 0.2474, "step": 36706 }, { "epoch": 0.6380608041161849, "grad_norm": 1.3838259931105137, "learning_rate": 3.059674324602521e-07, "loss": 0.2388, "step": 36707 }, { "epoch": 0.6380781866536877, "grad_norm": 1.2442146391763589, "learning_rate": 3.0594148943964557e-07, "loss": 0.1297, "step": 36708 }, { "epoch": 0.6380955691911905, "grad_norm": 0.9960803115340571, "learning_rate": 3.0591554703411533e-07, "loss": 0.2475, "step": 36709 }, { "epoch": 0.6381129517286933, "grad_norm": 1.4603135535115543, "learning_rate": 3.0588960524374346e-07, "loss": 0.1667, "step": 36710 }, { "epoch": 0.6381303342661961, "grad_norm": 1.5627815492289625, "learning_rate": 3.0586366406861254e-07, "loss": 0.1645, "step": 36711 }, { "epoch": 0.638147716803699, "grad_norm": 1.4157272556266278, "learning_rate": 3.058377235088045e-07, "loss": 0.2024, "step": 36712 }, { "epoch": 0.6381650993412018, "grad_norm": 1.9800777787154875, "learning_rate": 3.0581178356440174e-07, "loss": 0.1914, "step": 36713 }, { "epoch": 0.6381824818787046, "grad_norm": 1.9952309318671526, "learning_rate": 3.057858442354866e-07, "loss": 0.1911, "step": 36714 }, { "epoch": 0.6381998644162075, "grad_norm": 1.4329133612068232, "learning_rate": 3.0575990552214095e-07, "loss": 0.3178, "step": 36715 }, { "epoch": 0.6382172469537103, "grad_norm": 2.4514529503583633, "learning_rate": 3.057339674244471e-07, "loss": 0.2016, "step": 36716 }, { "epoch": 0.6382346294912131, "grad_norm": 1.8040311100950637, "learning_rate": 3.0570802994248747e-07, "loss": 0.1808, "step": 36717 }, { "epoch": 0.638252012028716, "grad_norm": 1.140750581411971, "learning_rate": 3.0568209307634397e-07, "loss": 0.2217, "step": 36718 }, { "epoch": 0.6382693945662188, "grad_norm": 1.6445211912206297, "learning_rate": 3.0565615682609914e-07, "loss": 0.1755, "step": 36719 }, { "epoch": 0.6382867771037216, "grad_norm": 0.9284005066312032, "learning_rate": 3.056302211918349e-07, "loss": 0.1782, "step": 36720 }, { "epoch": 0.6383041596412244, "grad_norm": 1.6383589659559938, "learning_rate": 3.056042861736339e-07, "loss": 0.2763, "step": 36721 }, { "epoch": 0.6383215421787273, "grad_norm": 0.8903030862716402, "learning_rate": 3.055783517715776e-07, "loss": 0.1492, "step": 36722 }, { "epoch": 0.6383389247162301, "grad_norm": 1.6839937834426242, "learning_rate": 3.0555241798574885e-07, "loss": 0.1948, "step": 36723 }, { "epoch": 0.6383563072537329, "grad_norm": 0.9711054058194998, "learning_rate": 3.055264848162294e-07, "loss": 0.1355, "step": 36724 }, { "epoch": 0.6383736897912358, "grad_norm": 0.9471904771284839, "learning_rate": 3.055005522631018e-07, "loss": 0.206, "step": 36725 }, { "epoch": 0.6383910723287386, "grad_norm": 2.0414631945597757, "learning_rate": 3.05474620326448e-07, "loss": 0.1731, "step": 36726 }, { "epoch": 0.6384084548662414, "grad_norm": 0.8126054776845738, "learning_rate": 3.0544868900635056e-07, "loss": 0.2037, "step": 36727 }, { "epoch": 0.6384258374037441, "grad_norm": 1.3935951863592733, "learning_rate": 3.054227583028912e-07, "loss": 0.1153, "step": 36728 }, { "epoch": 0.638443219941247, "grad_norm": 1.0753625385726597, "learning_rate": 3.0539682821615244e-07, "loss": 0.1471, "step": 36729 }, { "epoch": 0.6384606024787498, "grad_norm": 1.2026846621497334, "learning_rate": 3.0537089874621614e-07, "loss": 0.2108, "step": 36730 }, { "epoch": 0.6384779850162526, "grad_norm": 1.4629153790710174, "learning_rate": 3.0534496989316483e-07, "loss": 0.2288, "step": 36731 }, { "epoch": 0.6384953675537555, "grad_norm": 1.321469237793672, "learning_rate": 3.0531904165708046e-07, "loss": 0.1913, "step": 36732 }, { "epoch": 0.6385127500912583, "grad_norm": 1.6793149786922525, "learning_rate": 3.052931140380454e-07, "loss": 0.2415, "step": 36733 }, { "epoch": 0.6385301326287611, "grad_norm": 2.3005678894105497, "learning_rate": 3.052671870361417e-07, "loss": 0.2395, "step": 36734 }, { "epoch": 0.638547515166264, "grad_norm": 1.4244078680043826, "learning_rate": 3.052412606514515e-07, "loss": 0.215, "step": 36735 }, { "epoch": 0.6385648977037668, "grad_norm": 1.5673201399273955, "learning_rate": 3.052153348840571e-07, "loss": 0.1703, "step": 36736 }, { "epoch": 0.6385822802412696, "grad_norm": 2.0483189280447722, "learning_rate": 3.051894097340405e-07, "loss": 0.1925, "step": 36737 }, { "epoch": 0.6385996627787724, "grad_norm": 1.2103375154825076, "learning_rate": 3.051634852014841e-07, "loss": 0.1673, "step": 36738 }, { "epoch": 0.6386170453162753, "grad_norm": 2.3694076824978545, "learning_rate": 3.051375612864701e-07, "loss": 0.231, "step": 36739 }, { "epoch": 0.6386344278537781, "grad_norm": 2.460116030739185, "learning_rate": 3.0511163798908034e-07, "loss": 0.238, "step": 36740 }, { "epoch": 0.6386518103912809, "grad_norm": 2.018752321822065, "learning_rate": 3.050857153093971e-07, "loss": 0.1787, "step": 36741 }, { "epoch": 0.6386691929287838, "grad_norm": 1.6450541880396476, "learning_rate": 3.050597932475027e-07, "loss": 0.2454, "step": 36742 }, { "epoch": 0.6386865754662866, "grad_norm": 1.943938012080873, "learning_rate": 3.050338718034791e-07, "loss": 0.1539, "step": 36743 }, { "epoch": 0.6387039580037894, "grad_norm": 1.6025932930331694, "learning_rate": 3.0500795097740866e-07, "loss": 0.1582, "step": 36744 }, { "epoch": 0.6387213405412923, "grad_norm": 1.4601467193543278, "learning_rate": 3.0498203076937334e-07, "loss": 0.1983, "step": 36745 }, { "epoch": 0.6387387230787951, "grad_norm": 1.1734594947155472, "learning_rate": 3.0495611117945576e-07, "loss": 0.1693, "step": 36746 }, { "epoch": 0.6387561056162979, "grad_norm": 0.8237416894308548, "learning_rate": 3.049301922077373e-07, "loss": 0.2209, "step": 36747 }, { "epoch": 0.6387734881538006, "grad_norm": 1.496681555152851, "learning_rate": 3.0490427385430073e-07, "loss": 0.28, "step": 36748 }, { "epoch": 0.6387908706913035, "grad_norm": 1.4501368654827806, "learning_rate": 3.0487835611922786e-07, "loss": 0.1752, "step": 36749 }, { "epoch": 0.6388082532288063, "grad_norm": 1.7609693721334378, "learning_rate": 3.0485243900260107e-07, "loss": 0.1378, "step": 36750 }, { "epoch": 0.6388256357663091, "grad_norm": 1.0619080473820743, "learning_rate": 3.0482652250450224e-07, "loss": 0.1729, "step": 36751 }, { "epoch": 0.638843018303812, "grad_norm": 2.0599982244282544, "learning_rate": 3.0480060662501397e-07, "loss": 0.2949, "step": 36752 }, { "epoch": 0.6388604008413148, "grad_norm": 1.582235576262721, "learning_rate": 3.0477469136421794e-07, "loss": 0.2222, "step": 36753 }, { "epoch": 0.6388777833788176, "grad_norm": 2.315809444845865, "learning_rate": 3.047487767221966e-07, "loss": 0.2877, "step": 36754 }, { "epoch": 0.6388951659163205, "grad_norm": 1.6130083422729509, "learning_rate": 3.0472286269903167e-07, "loss": 0.1967, "step": 36755 }, { "epoch": 0.6389125484538233, "grad_norm": 1.6066724208063679, "learning_rate": 3.046969492948057e-07, "loss": 0.1744, "step": 36756 }, { "epoch": 0.6389299309913261, "grad_norm": 1.0760853778014483, "learning_rate": 3.0467103650960063e-07, "loss": 0.1724, "step": 36757 }, { "epoch": 0.6389473135288289, "grad_norm": 1.4995657791623214, "learning_rate": 3.0464512434349887e-07, "loss": 0.177, "step": 36758 }, { "epoch": 0.6389646960663318, "grad_norm": 2.278812024636912, "learning_rate": 3.046192127965822e-07, "loss": 0.2502, "step": 36759 }, { "epoch": 0.6389820786038346, "grad_norm": 1.5391946584646452, "learning_rate": 3.0459330186893273e-07, "loss": 0.1747, "step": 36760 }, { "epoch": 0.6389994611413374, "grad_norm": 0.9821171864444536, "learning_rate": 3.045673915606329e-07, "loss": 0.2267, "step": 36761 }, { "epoch": 0.6390168436788403, "grad_norm": 1.2256770179591345, "learning_rate": 3.045414818717646e-07, "loss": 0.2904, "step": 36762 }, { "epoch": 0.6390342262163431, "grad_norm": 1.2553942974958716, "learning_rate": 3.0451557280240994e-07, "loss": 0.2118, "step": 36763 }, { "epoch": 0.6390516087538459, "grad_norm": 1.322934005553902, "learning_rate": 3.044896643526512e-07, "loss": 0.1987, "step": 36764 }, { "epoch": 0.6390689912913488, "grad_norm": 1.2956764598776465, "learning_rate": 3.044637565225706e-07, "loss": 0.1435, "step": 36765 }, { "epoch": 0.6390863738288516, "grad_norm": 1.386436025732022, "learning_rate": 3.044378493122498e-07, "loss": 0.2316, "step": 36766 }, { "epoch": 0.6391037563663544, "grad_norm": 0.9109025634151706, "learning_rate": 3.044119427217713e-07, "loss": 0.1288, "step": 36767 }, { "epoch": 0.6391211389038571, "grad_norm": 2.2752576684349335, "learning_rate": 3.04386036751217e-07, "loss": 0.2684, "step": 36768 }, { "epoch": 0.63913852144136, "grad_norm": 1.516701267061763, "learning_rate": 3.043601314006692e-07, "loss": 0.1648, "step": 36769 }, { "epoch": 0.6391559039788628, "grad_norm": 1.7155973731986343, "learning_rate": 3.0433422667020985e-07, "loss": 0.2087, "step": 36770 }, { "epoch": 0.6391732865163656, "grad_norm": 1.1697884228608013, "learning_rate": 3.043083225599213e-07, "loss": 0.2071, "step": 36771 }, { "epoch": 0.6391906690538685, "grad_norm": 1.2562125736060779, "learning_rate": 3.042824190698852e-07, "loss": 0.1785, "step": 36772 }, { "epoch": 0.6392080515913713, "grad_norm": 1.5945099829849971, "learning_rate": 3.0425651620018413e-07, "loss": 0.1676, "step": 36773 }, { "epoch": 0.6392254341288741, "grad_norm": 1.2775336681492944, "learning_rate": 3.042306139508998e-07, "loss": 0.1188, "step": 36774 }, { "epoch": 0.639242816666377, "grad_norm": 1.580181820468304, "learning_rate": 3.0420471232211464e-07, "loss": 0.5968, "step": 36775 }, { "epoch": 0.6392601992038798, "grad_norm": 1.4182805837655952, "learning_rate": 3.041788113139105e-07, "loss": 0.141, "step": 36776 }, { "epoch": 0.6392775817413826, "grad_norm": 2.374188039380355, "learning_rate": 3.0415291092636987e-07, "loss": 0.2752, "step": 36777 }, { "epoch": 0.6392949642788854, "grad_norm": 1.5569151651606996, "learning_rate": 3.0412701115957425e-07, "loss": 0.2747, "step": 36778 }, { "epoch": 0.6393123468163883, "grad_norm": 2.3568148263164903, "learning_rate": 3.041011120136061e-07, "loss": 0.3594, "step": 36779 }, { "epoch": 0.6393297293538911, "grad_norm": 1.2071120348761755, "learning_rate": 3.040752134885474e-07, "loss": 0.2019, "step": 36780 }, { "epoch": 0.6393471118913939, "grad_norm": 1.8424983322628923, "learning_rate": 3.040493155844803e-07, "loss": 0.1811, "step": 36781 }, { "epoch": 0.6393644944288968, "grad_norm": 1.5645382702167971, "learning_rate": 3.0402341830148677e-07, "loss": 0.1447, "step": 36782 }, { "epoch": 0.6393818769663996, "grad_norm": 1.2481044702785236, "learning_rate": 3.039975216396491e-07, "loss": 0.1887, "step": 36783 }, { "epoch": 0.6393992595039024, "grad_norm": 1.164130619768041, "learning_rate": 3.0397162559904934e-07, "loss": 0.2196, "step": 36784 }, { "epoch": 0.6394166420414052, "grad_norm": 2.3767633820022636, "learning_rate": 3.0394573017976934e-07, "loss": 0.2468, "step": 36785 }, { "epoch": 0.6394340245789081, "grad_norm": 1.3045758648570422, "learning_rate": 3.039198353818913e-07, "loss": 0.223, "step": 36786 }, { "epoch": 0.6394514071164109, "grad_norm": 1.4118312880813635, "learning_rate": 3.038939412054974e-07, "loss": 0.1977, "step": 36787 }, { "epoch": 0.6394687896539136, "grad_norm": 1.7236125606128008, "learning_rate": 3.0386804765066947e-07, "loss": 0.2433, "step": 36788 }, { "epoch": 0.6394861721914165, "grad_norm": 1.0838403098081244, "learning_rate": 3.038421547174899e-07, "loss": 0.2093, "step": 36789 }, { "epoch": 0.6395035547289193, "grad_norm": 1.198622524798405, "learning_rate": 3.038162624060406e-07, "loss": 0.0974, "step": 36790 }, { "epoch": 0.6395209372664221, "grad_norm": 1.7607196040872575, "learning_rate": 3.0379037071640343e-07, "loss": 0.1788, "step": 36791 }, { "epoch": 0.639538319803925, "grad_norm": 2.6874874082431273, "learning_rate": 3.037644796486608e-07, "loss": 0.2984, "step": 36792 }, { "epoch": 0.6395557023414278, "grad_norm": 1.439878758587597, "learning_rate": 3.037385892028946e-07, "loss": 0.1954, "step": 36793 }, { "epoch": 0.6395730848789306, "grad_norm": 3.4409845477548706, "learning_rate": 3.037126993791869e-07, "loss": 0.2572, "step": 36794 }, { "epoch": 0.6395904674164334, "grad_norm": 3.9626706674286534, "learning_rate": 3.036868101776198e-07, "loss": 0.1533, "step": 36795 }, { "epoch": 0.6396078499539363, "grad_norm": 4.577552136189616, "learning_rate": 3.036609215982755e-07, "loss": 0.3123, "step": 36796 }, { "epoch": 0.6396252324914391, "grad_norm": 2.8424251597118335, "learning_rate": 3.0363503364123567e-07, "loss": 0.1816, "step": 36797 }, { "epoch": 0.6396426150289419, "grad_norm": 3.364805493196947, "learning_rate": 3.0360914630658265e-07, "loss": 0.3152, "step": 36798 }, { "epoch": 0.6396599975664448, "grad_norm": 1.8606551129205742, "learning_rate": 3.0358325959439826e-07, "loss": 0.2363, "step": 36799 }, { "epoch": 0.6396773801039476, "grad_norm": 1.5221848547477037, "learning_rate": 3.035573735047649e-07, "loss": 0.1865, "step": 36800 }, { "epoch": 0.6396947626414504, "grad_norm": 1.4434607558063872, "learning_rate": 3.035314880377643e-07, "loss": 0.2369, "step": 36801 }, { "epoch": 0.6397121451789533, "grad_norm": 3.1140577740165436, "learning_rate": 3.035056031934787e-07, "loss": 0.2563, "step": 36802 }, { "epoch": 0.6397295277164561, "grad_norm": 1.2086284797899367, "learning_rate": 3.034797189719902e-07, "loss": 0.138, "step": 36803 }, { "epoch": 0.6397469102539589, "grad_norm": 1.3779432509087906, "learning_rate": 3.0345383537338067e-07, "loss": 0.2147, "step": 36804 }, { "epoch": 0.6397642927914617, "grad_norm": 1.8219030767020383, "learning_rate": 3.0342795239773204e-07, "loss": 0.2607, "step": 36805 }, { "epoch": 0.6397816753289646, "grad_norm": 2.509524484963601, "learning_rate": 3.0340207004512665e-07, "loss": 0.2753, "step": 36806 }, { "epoch": 0.6397990578664674, "grad_norm": 1.5848479356536087, "learning_rate": 3.0337618831564624e-07, "loss": 0.2174, "step": 36807 }, { "epoch": 0.6398164404039701, "grad_norm": 1.2095353570855951, "learning_rate": 3.033503072093731e-07, "loss": 0.2052, "step": 36808 }, { "epoch": 0.639833822941473, "grad_norm": 1.404075838348524, "learning_rate": 3.0332442672638926e-07, "loss": 0.1639, "step": 36809 }, { "epoch": 0.6398512054789758, "grad_norm": 3.0355696873387323, "learning_rate": 3.0329854686677653e-07, "loss": 0.2574, "step": 36810 }, { "epoch": 0.6398685880164786, "grad_norm": 1.127998017822512, "learning_rate": 3.03272667630617e-07, "loss": 0.1199, "step": 36811 }, { "epoch": 0.6398859705539814, "grad_norm": 1.473143965567682, "learning_rate": 3.0324678901799274e-07, "loss": 0.174, "step": 36812 }, { "epoch": 0.6399033530914843, "grad_norm": 1.1574005176064748, "learning_rate": 3.0322091102898574e-07, "loss": 0.2063, "step": 36813 }, { "epoch": 0.6399207356289871, "grad_norm": 1.6359511854487963, "learning_rate": 3.0319503366367816e-07, "loss": 0.2366, "step": 36814 }, { "epoch": 0.6399381181664899, "grad_norm": 1.3066589754721338, "learning_rate": 3.0316915692215205e-07, "loss": 0.2244, "step": 36815 }, { "epoch": 0.6399555007039928, "grad_norm": 1.5458904909853128, "learning_rate": 3.03143280804489e-07, "loss": 0.1606, "step": 36816 }, { "epoch": 0.6399728832414956, "grad_norm": 2.27149852508733, "learning_rate": 3.0311740531077156e-07, "loss": 0.1269, "step": 36817 }, { "epoch": 0.6399902657789984, "grad_norm": 2.5392176261873436, "learning_rate": 3.030915304410814e-07, "loss": 0.2844, "step": 36818 }, { "epoch": 0.6400076483165013, "grad_norm": 0.967731537444071, "learning_rate": 3.0306565619550054e-07, "loss": 0.1751, "step": 36819 }, { "epoch": 0.6400250308540041, "grad_norm": 1.4223866138329302, "learning_rate": 3.030397825741112e-07, "loss": 0.2131, "step": 36820 }, { "epoch": 0.6400424133915069, "grad_norm": 1.2175549386378317, "learning_rate": 3.0301390957699525e-07, "loss": 0.1769, "step": 36821 }, { "epoch": 0.6400597959290097, "grad_norm": 2.635855079882648, "learning_rate": 3.029880372042348e-07, "loss": 0.1991, "step": 36822 }, { "epoch": 0.6400771784665126, "grad_norm": 4.256238941475032, "learning_rate": 3.029621654559118e-07, "loss": 0.2236, "step": 36823 }, { "epoch": 0.6400945610040154, "grad_norm": 1.616568494726625, "learning_rate": 3.02936294332108e-07, "loss": 0.1872, "step": 36824 }, { "epoch": 0.6401119435415182, "grad_norm": 2.372284997094736, "learning_rate": 3.0291042383290576e-07, "loss": 0.2114, "step": 36825 }, { "epoch": 0.6401293260790211, "grad_norm": 1.919620570361126, "learning_rate": 3.0288455395838683e-07, "loss": 0.1501, "step": 36826 }, { "epoch": 0.6401467086165238, "grad_norm": 1.2764444796363537, "learning_rate": 3.028586847086335e-07, "loss": 0.2209, "step": 36827 }, { "epoch": 0.6401640911540266, "grad_norm": 1.930464639746559, "learning_rate": 3.028328160837276e-07, "loss": 0.2424, "step": 36828 }, { "epoch": 0.6401814736915294, "grad_norm": 3.2987784337481734, "learning_rate": 3.0280694808375106e-07, "loss": 0.1819, "step": 36829 }, { "epoch": 0.6401988562290323, "grad_norm": 2.2904447546133277, "learning_rate": 3.027810807087857e-07, "loss": 0.3048, "step": 36830 }, { "epoch": 0.6402162387665351, "grad_norm": 1.2288926223639012, "learning_rate": 3.027552139589139e-07, "loss": 0.1774, "step": 36831 }, { "epoch": 0.6402336213040379, "grad_norm": 1.4453453930055449, "learning_rate": 3.0272934783421736e-07, "loss": 0.21, "step": 36832 }, { "epoch": 0.6402510038415408, "grad_norm": 1.8824120250989897, "learning_rate": 3.027034823347783e-07, "loss": 0.1732, "step": 36833 }, { "epoch": 0.6402683863790436, "grad_norm": 1.0628372083324669, "learning_rate": 3.0267761746067866e-07, "loss": 0.2506, "step": 36834 }, { "epoch": 0.6402857689165464, "grad_norm": 1.9322210828105304, "learning_rate": 3.026517532120002e-07, "loss": 0.1698, "step": 36835 }, { "epoch": 0.6403031514540493, "grad_norm": 3.2563405388213473, "learning_rate": 3.02625889588825e-07, "loss": 0.2073, "step": 36836 }, { "epoch": 0.6403205339915521, "grad_norm": 2.603581601714771, "learning_rate": 3.02600026591235e-07, "loss": 0.2138, "step": 36837 }, { "epoch": 0.6403379165290549, "grad_norm": 1.2228586041343728, "learning_rate": 3.0257416421931224e-07, "loss": 0.124, "step": 36838 }, { "epoch": 0.6403552990665577, "grad_norm": 1.6151223282649176, "learning_rate": 3.025483024731388e-07, "loss": 0.149, "step": 36839 }, { "epoch": 0.6403726816040606, "grad_norm": 1.3449995653801796, "learning_rate": 3.025224413527964e-07, "loss": 0.175, "step": 36840 }, { "epoch": 0.6403900641415634, "grad_norm": 1.6392658572991978, "learning_rate": 3.0249658085836736e-07, "loss": 0.1983, "step": 36841 }, { "epoch": 0.6404074466790662, "grad_norm": 1.4338039215358118, "learning_rate": 3.024707209899334e-07, "loss": 0.1678, "step": 36842 }, { "epoch": 0.6404248292165691, "grad_norm": 1.1000391974248056, "learning_rate": 3.0244486174757646e-07, "loss": 0.1786, "step": 36843 }, { "epoch": 0.6404422117540719, "grad_norm": 1.8017327119381734, "learning_rate": 3.024190031313785e-07, "loss": 0.1653, "step": 36844 }, { "epoch": 0.6404595942915747, "grad_norm": 1.9001725479546212, "learning_rate": 3.0239314514142154e-07, "loss": 0.2046, "step": 36845 }, { "epoch": 0.6404769768290776, "grad_norm": 1.5955326415870852, "learning_rate": 3.023672877777875e-07, "loss": 0.1509, "step": 36846 }, { "epoch": 0.6404943593665803, "grad_norm": 1.5867159128417385, "learning_rate": 3.0234143104055853e-07, "loss": 0.2503, "step": 36847 }, { "epoch": 0.6405117419040831, "grad_norm": 1.4673028619484982, "learning_rate": 3.023155749298164e-07, "loss": 0.2232, "step": 36848 }, { "epoch": 0.6405291244415859, "grad_norm": 1.211276365807451, "learning_rate": 3.0228971944564296e-07, "loss": 0.1814, "step": 36849 }, { "epoch": 0.6405465069790888, "grad_norm": 2.165109345443128, "learning_rate": 3.022638645881204e-07, "loss": 0.191, "step": 36850 }, { "epoch": 0.6405638895165916, "grad_norm": 1.156403573573936, "learning_rate": 3.0223801035733056e-07, "loss": 0.1613, "step": 36851 }, { "epoch": 0.6405812720540944, "grad_norm": 1.6351985526130446, "learning_rate": 3.022121567533552e-07, "loss": 0.1766, "step": 36852 }, { "epoch": 0.6405986545915973, "grad_norm": 2.516037048527239, "learning_rate": 3.0218630377627676e-07, "loss": 0.1839, "step": 36853 }, { "epoch": 0.6406160371291001, "grad_norm": 1.9069343622709019, "learning_rate": 3.021604514261767e-07, "loss": 0.1856, "step": 36854 }, { "epoch": 0.6406334196666029, "grad_norm": 1.4543474492278579, "learning_rate": 3.0213459970313706e-07, "loss": 0.1872, "step": 36855 }, { "epoch": 0.6406508022041058, "grad_norm": 1.6367757788032538, "learning_rate": 3.021087486072399e-07, "loss": 0.1338, "step": 36856 }, { "epoch": 0.6406681847416086, "grad_norm": 1.5799721258158284, "learning_rate": 3.02082898138567e-07, "loss": 0.1636, "step": 36857 }, { "epoch": 0.6406855672791114, "grad_norm": 1.8451090525428886, "learning_rate": 3.020570482972006e-07, "loss": 0.222, "step": 36858 }, { "epoch": 0.6407029498166142, "grad_norm": 1.2863428114623545, "learning_rate": 3.020311990832224e-07, "loss": 0.2221, "step": 36859 }, { "epoch": 0.6407203323541171, "grad_norm": 1.859341705191715, "learning_rate": 3.0200535049671437e-07, "loss": 0.2451, "step": 36860 }, { "epoch": 0.6407377148916199, "grad_norm": 1.126257234097671, "learning_rate": 3.019795025377583e-07, "loss": 0.2718, "step": 36861 }, { "epoch": 0.6407550974291227, "grad_norm": 1.4115288090785671, "learning_rate": 3.0195365520643633e-07, "loss": 0.115, "step": 36862 }, { "epoch": 0.6407724799666256, "grad_norm": 1.9280947762143923, "learning_rate": 3.019278085028302e-07, "loss": 0.2153, "step": 36863 }, { "epoch": 0.6407898625041284, "grad_norm": 1.7278381757901569, "learning_rate": 3.0190196242702197e-07, "loss": 0.2406, "step": 36864 }, { "epoch": 0.6408072450416312, "grad_norm": 1.1115161930499964, "learning_rate": 3.018761169790935e-07, "loss": 0.1391, "step": 36865 }, { "epoch": 0.640824627579134, "grad_norm": 1.6781271385344438, "learning_rate": 3.01850272159127e-07, "loss": 0.1807, "step": 36866 }, { "epoch": 0.6408420101166368, "grad_norm": 1.7183815144327086, "learning_rate": 3.018244279672038e-07, "loss": 0.2537, "step": 36867 }, { "epoch": 0.6408593926541396, "grad_norm": 1.416283307989105, "learning_rate": 3.017985844034063e-07, "loss": 0.2183, "step": 36868 }, { "epoch": 0.6408767751916424, "grad_norm": 1.1092425378656807, "learning_rate": 3.0177274146781606e-07, "loss": 0.1835, "step": 36869 }, { "epoch": 0.6408941577291453, "grad_norm": 1.516449260059247, "learning_rate": 3.017468991605153e-07, "loss": 0.2225, "step": 36870 }, { "epoch": 0.6409115402666481, "grad_norm": 1.721787226495126, "learning_rate": 3.017210574815857e-07, "loss": 0.2166, "step": 36871 }, { "epoch": 0.6409289228041509, "grad_norm": 1.4942986582993911, "learning_rate": 3.0169521643110945e-07, "loss": 0.3226, "step": 36872 }, { "epoch": 0.6409463053416538, "grad_norm": 1.5423823850287197, "learning_rate": 3.016693760091682e-07, "loss": 0.2192, "step": 36873 }, { "epoch": 0.6409636878791566, "grad_norm": 1.6309300493143857, "learning_rate": 3.016435362158438e-07, "loss": 0.1796, "step": 36874 }, { "epoch": 0.6409810704166594, "grad_norm": 10.050733349858517, "learning_rate": 3.0161769705121837e-07, "loss": 0.2988, "step": 36875 }, { "epoch": 0.6409984529541622, "grad_norm": 1.9311165430671924, "learning_rate": 3.015918585153737e-07, "loss": 0.1483, "step": 36876 }, { "epoch": 0.6410158354916651, "grad_norm": 1.4827426206839738, "learning_rate": 3.0156602060839167e-07, "loss": 0.2044, "step": 36877 }, { "epoch": 0.6410332180291679, "grad_norm": 2.0617656917009164, "learning_rate": 3.015401833303543e-07, "loss": 0.2632, "step": 36878 }, { "epoch": 0.6410506005666707, "grad_norm": 1.4817795535824303, "learning_rate": 3.0151434668134334e-07, "loss": 0.2143, "step": 36879 }, { "epoch": 0.6410679831041736, "grad_norm": 1.7805163298619298, "learning_rate": 3.0148851066144065e-07, "loss": 0.3095, "step": 36880 }, { "epoch": 0.6410853656416764, "grad_norm": 2.650592845668472, "learning_rate": 3.014626752707282e-07, "loss": 0.2116, "step": 36881 }, { "epoch": 0.6411027481791792, "grad_norm": 1.51883091482611, "learning_rate": 3.014368405092879e-07, "loss": 0.2667, "step": 36882 }, { "epoch": 0.6411201307166821, "grad_norm": 1.1799265980121802, "learning_rate": 3.014110063772016e-07, "loss": 0.2317, "step": 36883 }, { "epoch": 0.6411375132541849, "grad_norm": 1.489172175979352, "learning_rate": 3.0138517287455123e-07, "loss": 0.2082, "step": 36884 }, { "epoch": 0.6411548957916877, "grad_norm": 1.436710261306647, "learning_rate": 3.013593400014187e-07, "loss": 0.1878, "step": 36885 }, { "epoch": 0.6411722783291905, "grad_norm": 1.75339769876274, "learning_rate": 3.0133350775788556e-07, "loss": 0.2889, "step": 36886 }, { "epoch": 0.6411896608666933, "grad_norm": 1.7478714129961725, "learning_rate": 3.0130767614403417e-07, "loss": 0.1581, "step": 36887 }, { "epoch": 0.6412070434041961, "grad_norm": 1.9277651513608904, "learning_rate": 3.0128184515994606e-07, "loss": 0.2844, "step": 36888 }, { "epoch": 0.6412244259416989, "grad_norm": 1.4033368295614803, "learning_rate": 3.0125601480570317e-07, "loss": 0.4003, "step": 36889 }, { "epoch": 0.6412418084792018, "grad_norm": 2.2847891833691434, "learning_rate": 3.012301850813874e-07, "loss": 0.1817, "step": 36890 }, { "epoch": 0.6412591910167046, "grad_norm": 2.6427503671744628, "learning_rate": 3.0120435598708105e-07, "loss": 0.1535, "step": 36891 }, { "epoch": 0.6412765735542074, "grad_norm": 2.0060609281456236, "learning_rate": 3.011785275228651e-07, "loss": 0.106, "step": 36892 }, { "epoch": 0.6412939560917102, "grad_norm": 1.4798491998359409, "learning_rate": 3.011526996888221e-07, "loss": 0.1948, "step": 36893 }, { "epoch": 0.6413113386292131, "grad_norm": 1.3083150131952037, "learning_rate": 3.0112687248503354e-07, "loss": 0.1527, "step": 36894 }, { "epoch": 0.6413287211667159, "grad_norm": 3.7558996354343375, "learning_rate": 3.0110104591158156e-07, "loss": 0.2451, "step": 36895 }, { "epoch": 0.6413461037042187, "grad_norm": 1.3103137140505297, "learning_rate": 3.010752199685478e-07, "loss": 0.3887, "step": 36896 }, { "epoch": 0.6413634862417216, "grad_norm": 1.3639039954074803, "learning_rate": 3.010493946560144e-07, "loss": 0.2023, "step": 36897 }, { "epoch": 0.6413808687792244, "grad_norm": 1.6403943758543231, "learning_rate": 3.0102356997406296e-07, "loss": 0.1222, "step": 36898 }, { "epoch": 0.6413982513167272, "grad_norm": 2.1975667168078195, "learning_rate": 3.0099774592277536e-07, "loss": 0.1905, "step": 36899 }, { "epoch": 0.6414156338542301, "grad_norm": 1.7146624709689187, "learning_rate": 3.009719225022335e-07, "loss": 0.2057, "step": 36900 }, { "epoch": 0.6414330163917329, "grad_norm": 2.016818326101635, "learning_rate": 3.009460997125192e-07, "loss": 0.1899, "step": 36901 }, { "epoch": 0.6414503989292357, "grad_norm": 2.4385749045939984, "learning_rate": 3.009202775537142e-07, "loss": 0.1749, "step": 36902 }, { "epoch": 0.6414677814667386, "grad_norm": 1.9006129240673708, "learning_rate": 3.008944560259007e-07, "loss": 0.1778, "step": 36903 }, { "epoch": 0.6414851640042414, "grad_norm": 1.5048563930381385, "learning_rate": 3.008686351291603e-07, "loss": 0.1832, "step": 36904 }, { "epoch": 0.6415025465417442, "grad_norm": 1.529442645258968, "learning_rate": 3.0084281486357463e-07, "loss": 0.2487, "step": 36905 }, { "epoch": 0.641519929079247, "grad_norm": 1.4758083278396565, "learning_rate": 3.00816995229226e-07, "loss": 0.2048, "step": 36906 }, { "epoch": 0.6415373116167498, "grad_norm": 1.4784685899159586, "learning_rate": 3.0079117622619587e-07, "loss": 0.1603, "step": 36907 }, { "epoch": 0.6415546941542526, "grad_norm": 1.656655075633762, "learning_rate": 3.0076535785456615e-07, "loss": 0.1832, "step": 36908 }, { "epoch": 0.6415720766917554, "grad_norm": 1.8491488512544032, "learning_rate": 3.007395401144188e-07, "loss": 0.171, "step": 36909 }, { "epoch": 0.6415894592292583, "grad_norm": 2.579080172964807, "learning_rate": 3.0071372300583574e-07, "loss": 0.1974, "step": 36910 }, { "epoch": 0.6416068417667611, "grad_norm": 2.0488014011227462, "learning_rate": 3.0068790652889844e-07, "loss": 0.2623, "step": 36911 }, { "epoch": 0.6416242243042639, "grad_norm": 1.5367202302133267, "learning_rate": 3.006620906836889e-07, "loss": 0.2411, "step": 36912 }, { "epoch": 0.6416416068417667, "grad_norm": 1.4364455538739367, "learning_rate": 3.006362754702889e-07, "loss": 0.2018, "step": 36913 }, { "epoch": 0.6416589893792696, "grad_norm": 2.156183093740016, "learning_rate": 3.006104608887805e-07, "loss": 0.1957, "step": 36914 }, { "epoch": 0.6416763719167724, "grad_norm": 3.315203491673535, "learning_rate": 3.005846469392453e-07, "loss": 0.1949, "step": 36915 }, { "epoch": 0.6416937544542752, "grad_norm": 1.814972717305568, "learning_rate": 3.0055883362176526e-07, "loss": 0.2006, "step": 36916 }, { "epoch": 0.6417111369917781, "grad_norm": 1.2715746447322516, "learning_rate": 3.00533020936422e-07, "loss": 0.3519, "step": 36917 }, { "epoch": 0.6417285195292809, "grad_norm": 1.7766237817252293, "learning_rate": 3.005072088832974e-07, "loss": 0.3247, "step": 36918 }, { "epoch": 0.6417459020667837, "grad_norm": 1.36898355437238, "learning_rate": 3.0048139746247325e-07, "loss": 0.3602, "step": 36919 }, { "epoch": 0.6417632846042866, "grad_norm": 1.536598133679268, "learning_rate": 3.004555866740315e-07, "loss": 0.1553, "step": 36920 }, { "epoch": 0.6417806671417894, "grad_norm": 2.4109354091638457, "learning_rate": 3.004297765180538e-07, "loss": 0.2449, "step": 36921 }, { "epoch": 0.6417980496792922, "grad_norm": 1.2217215381258943, "learning_rate": 3.004039669946221e-07, "loss": 0.1476, "step": 36922 }, { "epoch": 0.641815432216795, "grad_norm": 1.2173440013109342, "learning_rate": 3.003781581038183e-07, "loss": 0.1871, "step": 36923 }, { "epoch": 0.6418328147542979, "grad_norm": 0.9560777962871874, "learning_rate": 3.003523498457239e-07, "loss": 0.1839, "step": 36924 }, { "epoch": 0.6418501972918007, "grad_norm": 1.0169412422499593, "learning_rate": 3.003265422204208e-07, "loss": 0.1936, "step": 36925 }, { "epoch": 0.6418675798293035, "grad_norm": 1.7018125680342755, "learning_rate": 3.003007352279909e-07, "loss": 0.2923, "step": 36926 }, { "epoch": 0.6418849623668063, "grad_norm": 2.2573336856000186, "learning_rate": 3.002749288685158e-07, "loss": 0.1606, "step": 36927 }, { "epoch": 0.6419023449043091, "grad_norm": 0.8902955195502095, "learning_rate": 3.002491231420776e-07, "loss": 0.1896, "step": 36928 }, { "epoch": 0.6419197274418119, "grad_norm": 1.2733402192937973, "learning_rate": 3.00223318048758e-07, "loss": 0.2083, "step": 36929 }, { "epoch": 0.6419371099793147, "grad_norm": 3.6514296754553817, "learning_rate": 3.0019751358863843e-07, "loss": 0.4114, "step": 36930 }, { "epoch": 0.6419544925168176, "grad_norm": 1.18733916365647, "learning_rate": 3.001717097618012e-07, "loss": 0.1501, "step": 36931 }, { "epoch": 0.6419718750543204, "grad_norm": 1.601392029121213, "learning_rate": 3.0014590656832774e-07, "loss": 0.3187, "step": 36932 }, { "epoch": 0.6419892575918232, "grad_norm": 1.7223870890467783, "learning_rate": 3.001201040082999e-07, "loss": 0.248, "step": 36933 }, { "epoch": 0.6420066401293261, "grad_norm": 1.928729133495128, "learning_rate": 3.0009430208179953e-07, "loss": 0.1527, "step": 36934 }, { "epoch": 0.6420240226668289, "grad_norm": 2.6244131906599275, "learning_rate": 3.000685007889086e-07, "loss": 0.3688, "step": 36935 }, { "epoch": 0.6420414052043317, "grad_norm": 1.4669155834665581, "learning_rate": 3.0004270012970846e-07, "loss": 0.1747, "step": 36936 }, { "epoch": 0.6420587877418346, "grad_norm": 1.7040085801253577, "learning_rate": 3.000169001042812e-07, "loss": 0.1904, "step": 36937 }, { "epoch": 0.6420761702793374, "grad_norm": 1.7157803277707373, "learning_rate": 2.999911007127084e-07, "loss": 0.1972, "step": 36938 }, { "epoch": 0.6420935528168402, "grad_norm": 1.4852096427512886, "learning_rate": 2.99965301955072e-07, "loss": 0.2329, "step": 36939 }, { "epoch": 0.642110935354343, "grad_norm": 1.6346206936549015, "learning_rate": 2.999395038314537e-07, "loss": 0.2191, "step": 36940 }, { "epoch": 0.6421283178918459, "grad_norm": 2.1619320177312815, "learning_rate": 2.999137063419352e-07, "loss": 0.2235, "step": 36941 }, { "epoch": 0.6421457004293487, "grad_norm": 1.484995050426077, "learning_rate": 2.9988790948659857e-07, "loss": 0.2155, "step": 36942 }, { "epoch": 0.6421630829668515, "grad_norm": 1.1665303184147622, "learning_rate": 2.998621132655252e-07, "loss": 0.1752, "step": 36943 }, { "epoch": 0.6421804655043544, "grad_norm": 1.2470784168553521, "learning_rate": 2.9983631767879695e-07, "loss": 0.1226, "step": 36944 }, { "epoch": 0.6421978480418572, "grad_norm": 1.6282539813454868, "learning_rate": 2.998105227264957e-07, "loss": 0.1863, "step": 36945 }, { "epoch": 0.64221523057936, "grad_norm": 1.9432861873522436, "learning_rate": 2.9978472840870303e-07, "loss": 0.2741, "step": 36946 }, { "epoch": 0.6422326131168627, "grad_norm": 1.1784554035584311, "learning_rate": 2.997589347255009e-07, "loss": 0.2194, "step": 36947 }, { "epoch": 0.6422499956543656, "grad_norm": 1.3666618366208179, "learning_rate": 2.997331416769711e-07, "loss": 0.2761, "step": 36948 }, { "epoch": 0.6422673781918684, "grad_norm": 1.5003553318483873, "learning_rate": 2.997073492631951e-07, "loss": 0.2103, "step": 36949 }, { "epoch": 0.6422847607293712, "grad_norm": 1.2072301471712126, "learning_rate": 2.996815574842546e-07, "loss": 0.1395, "step": 36950 }, { "epoch": 0.6423021432668741, "grad_norm": 1.151618495468778, "learning_rate": 2.996557663402318e-07, "loss": 0.3368, "step": 36951 }, { "epoch": 0.6423195258043769, "grad_norm": 1.482331477901311, "learning_rate": 2.9962997583120805e-07, "loss": 0.228, "step": 36952 }, { "epoch": 0.6423369083418797, "grad_norm": 1.1223817933601201, "learning_rate": 2.996041859572653e-07, "loss": 0.2065, "step": 36953 }, { "epoch": 0.6423542908793826, "grad_norm": 1.7796018131483509, "learning_rate": 2.995783967184853e-07, "loss": 0.2147, "step": 36954 }, { "epoch": 0.6423716734168854, "grad_norm": 1.2959189984056465, "learning_rate": 2.995526081149497e-07, "loss": 0.3108, "step": 36955 }, { "epoch": 0.6423890559543882, "grad_norm": 2.6416209563670887, "learning_rate": 2.9952682014674013e-07, "loss": 0.3106, "step": 36956 }, { "epoch": 0.642406438491891, "grad_norm": 1.603507952333033, "learning_rate": 2.995010328139385e-07, "loss": 0.252, "step": 36957 }, { "epoch": 0.6424238210293939, "grad_norm": 1.1916275512389365, "learning_rate": 2.994752461166264e-07, "loss": 0.2289, "step": 36958 }, { "epoch": 0.6424412035668967, "grad_norm": 1.539210624458327, "learning_rate": 2.9944946005488583e-07, "loss": 0.1858, "step": 36959 }, { "epoch": 0.6424585861043995, "grad_norm": 1.307134895039857, "learning_rate": 2.994236746287982e-07, "loss": 0.3129, "step": 36960 }, { "epoch": 0.6424759686419024, "grad_norm": 1.478952828125514, "learning_rate": 2.993978898384456e-07, "loss": 0.1738, "step": 36961 }, { "epoch": 0.6424933511794052, "grad_norm": 0.9219840166504122, "learning_rate": 2.9937210568390936e-07, "loss": 0.1682, "step": 36962 }, { "epoch": 0.642510733716908, "grad_norm": 1.788423877053325, "learning_rate": 2.9934632216527147e-07, "loss": 0.2047, "step": 36963 }, { "epoch": 0.6425281162544109, "grad_norm": 1.5556422040100877, "learning_rate": 2.9932053928261345e-07, "loss": 0.2007, "step": 36964 }, { "epoch": 0.6425454987919137, "grad_norm": 1.931779752247172, "learning_rate": 2.9929475703601724e-07, "loss": 0.261, "step": 36965 }, { "epoch": 0.6425628813294164, "grad_norm": 2.188031584633625, "learning_rate": 2.9926897542556433e-07, "loss": 0.1537, "step": 36966 }, { "epoch": 0.6425802638669192, "grad_norm": 1.483615326323003, "learning_rate": 2.9924319445133676e-07, "loss": 0.162, "step": 36967 }, { "epoch": 0.6425976464044221, "grad_norm": 0.8911537937440132, "learning_rate": 2.99217414113416e-07, "loss": 0.2406, "step": 36968 }, { "epoch": 0.6426150289419249, "grad_norm": 1.2069380223376198, "learning_rate": 2.9919163441188366e-07, "loss": 0.1703, "step": 36969 }, { "epoch": 0.6426324114794277, "grad_norm": 1.2585632235805189, "learning_rate": 2.991658553468217e-07, "loss": 0.1422, "step": 36970 }, { "epoch": 0.6426497940169306, "grad_norm": 2.0318414395024895, "learning_rate": 2.991400769183116e-07, "loss": 0.1925, "step": 36971 }, { "epoch": 0.6426671765544334, "grad_norm": 1.3030748325629107, "learning_rate": 2.991142991264353e-07, "loss": 0.1916, "step": 36972 }, { "epoch": 0.6426845590919362, "grad_norm": 1.6689106198083135, "learning_rate": 2.990885219712745e-07, "loss": 0.2337, "step": 36973 }, { "epoch": 0.642701941629439, "grad_norm": 1.1376128961426974, "learning_rate": 2.990627454529107e-07, "loss": 0.1493, "step": 36974 }, { "epoch": 0.6427193241669419, "grad_norm": 1.4034301030336451, "learning_rate": 2.990369695714256e-07, "loss": 0.2354, "step": 36975 }, { "epoch": 0.6427367067044447, "grad_norm": 1.7634688323594205, "learning_rate": 2.990111943269011e-07, "loss": 0.1706, "step": 36976 }, { "epoch": 0.6427540892419475, "grad_norm": 1.972171968733966, "learning_rate": 2.9898541971941866e-07, "loss": 0.1723, "step": 36977 }, { "epoch": 0.6427714717794504, "grad_norm": 1.314262337108082, "learning_rate": 2.989596457490602e-07, "loss": 0.1702, "step": 36978 }, { "epoch": 0.6427888543169532, "grad_norm": 1.3956293647202396, "learning_rate": 2.989338724159072e-07, "loss": 0.1415, "step": 36979 }, { "epoch": 0.642806236854456, "grad_norm": 1.5034008560852463, "learning_rate": 2.989080997200418e-07, "loss": 0.1408, "step": 36980 }, { "epoch": 0.6428236193919589, "grad_norm": 1.0794260924609842, "learning_rate": 2.98882327661545e-07, "loss": 0.1434, "step": 36981 }, { "epoch": 0.6428410019294617, "grad_norm": 1.4580718852098502, "learning_rate": 2.98856556240499e-07, "loss": 0.2626, "step": 36982 }, { "epoch": 0.6428583844669645, "grad_norm": 1.8029935694677235, "learning_rate": 2.988307854569852e-07, "loss": 0.2068, "step": 36983 }, { "epoch": 0.6428757670044674, "grad_norm": 2.3227080691008566, "learning_rate": 2.988050153110855e-07, "loss": 0.2288, "step": 36984 }, { "epoch": 0.6428931495419702, "grad_norm": 2.336657615138193, "learning_rate": 2.987792458028814e-07, "loss": 0.1688, "step": 36985 }, { "epoch": 0.6429105320794729, "grad_norm": 1.749254778472412, "learning_rate": 2.987534769324548e-07, "loss": 0.106, "step": 36986 }, { "epoch": 0.6429279146169757, "grad_norm": 1.3023440823741745, "learning_rate": 2.9872770869988715e-07, "loss": 0.1117, "step": 36987 }, { "epoch": 0.6429452971544786, "grad_norm": 1.252384873441621, "learning_rate": 2.987019411052602e-07, "loss": 0.0977, "step": 36988 }, { "epoch": 0.6429626796919814, "grad_norm": 1.4844164633662973, "learning_rate": 2.986761741486555e-07, "loss": 0.1537, "step": 36989 }, { "epoch": 0.6429800622294842, "grad_norm": 1.6920190466846412, "learning_rate": 2.9865040783015506e-07, "loss": 0.2218, "step": 36990 }, { "epoch": 0.6429974447669871, "grad_norm": 2.2731132373009904, "learning_rate": 2.986246421498402e-07, "loss": 0.2131, "step": 36991 }, { "epoch": 0.6430148273044899, "grad_norm": 1.852765548208312, "learning_rate": 2.9859887710779287e-07, "loss": 0.2309, "step": 36992 }, { "epoch": 0.6430322098419927, "grad_norm": 2.002423499700628, "learning_rate": 2.9857311270409446e-07, "loss": 0.3099, "step": 36993 }, { "epoch": 0.6430495923794955, "grad_norm": 1.8471741823926415, "learning_rate": 2.985473489388267e-07, "loss": 0.2772, "step": 36994 }, { "epoch": 0.6430669749169984, "grad_norm": 1.847020336940226, "learning_rate": 2.9852158581207143e-07, "loss": 0.2403, "step": 36995 }, { "epoch": 0.6430843574545012, "grad_norm": 1.2798381538558976, "learning_rate": 2.984958233239101e-07, "loss": 0.161, "step": 36996 }, { "epoch": 0.643101739992004, "grad_norm": 1.953988897676794, "learning_rate": 2.9847006147442443e-07, "loss": 0.4053, "step": 36997 }, { "epoch": 0.6431191225295069, "grad_norm": 1.6894120027316615, "learning_rate": 2.984443002636963e-07, "loss": 0.2149, "step": 36998 }, { "epoch": 0.6431365050670097, "grad_norm": 1.5433864611740942, "learning_rate": 2.9841853969180695e-07, "loss": 0.342, "step": 36999 }, { "epoch": 0.6431538876045125, "grad_norm": 1.0724457535379197, "learning_rate": 2.983927797588382e-07, "loss": 0.2158, "step": 37000 }, { "epoch": 0.6431712701420154, "grad_norm": 1.37562515603173, "learning_rate": 2.9836702046487183e-07, "loss": 0.2319, "step": 37001 }, { "epoch": 0.6431886526795182, "grad_norm": 1.724673072100821, "learning_rate": 2.983412618099892e-07, "loss": 0.2053, "step": 37002 }, { "epoch": 0.643206035217021, "grad_norm": 1.044900208514763, "learning_rate": 2.9831550379427236e-07, "loss": 0.1942, "step": 37003 }, { "epoch": 0.6432234177545239, "grad_norm": 1.082000133074289, "learning_rate": 2.982897464178027e-07, "loss": 0.2453, "step": 37004 }, { "epoch": 0.6432408002920267, "grad_norm": 1.49715215108166, "learning_rate": 2.982639896806619e-07, "loss": 0.2443, "step": 37005 }, { "epoch": 0.6432581828295294, "grad_norm": 1.8470302319550764, "learning_rate": 2.982382335829314e-07, "loss": 0.2744, "step": 37006 }, { "epoch": 0.6432755653670322, "grad_norm": 1.3589380530635353, "learning_rate": 2.982124781246932e-07, "loss": 0.1743, "step": 37007 }, { "epoch": 0.6432929479045351, "grad_norm": 1.1583960095265737, "learning_rate": 2.9818672330602863e-07, "loss": 0.1457, "step": 37008 }, { "epoch": 0.6433103304420379, "grad_norm": 3.1758771645517876, "learning_rate": 2.981609691270195e-07, "loss": 0.2926, "step": 37009 }, { "epoch": 0.6433277129795407, "grad_norm": 1.4574402081501916, "learning_rate": 2.9813521558774727e-07, "loss": 0.2194, "step": 37010 }, { "epoch": 0.6433450955170436, "grad_norm": 1.1734557628485558, "learning_rate": 2.9810946268829407e-07, "loss": 0.2351, "step": 37011 }, { "epoch": 0.6433624780545464, "grad_norm": 1.4294521879707804, "learning_rate": 2.980837104287407e-07, "loss": 0.1741, "step": 37012 }, { "epoch": 0.6433798605920492, "grad_norm": 1.2781820901968406, "learning_rate": 2.9805795880916937e-07, "loss": 0.2676, "step": 37013 }, { "epoch": 0.643397243129552, "grad_norm": 1.480431549521997, "learning_rate": 2.9803220782966156e-07, "loss": 0.1707, "step": 37014 }, { "epoch": 0.6434146256670549, "grad_norm": 1.8256362568844215, "learning_rate": 2.9800645749029885e-07, "loss": 0.2124, "step": 37015 }, { "epoch": 0.6434320082045577, "grad_norm": 1.3086124327250765, "learning_rate": 2.979807077911628e-07, "loss": 0.1642, "step": 37016 }, { "epoch": 0.6434493907420605, "grad_norm": 3.298573516392241, "learning_rate": 2.979549587323353e-07, "loss": 0.3175, "step": 37017 }, { "epoch": 0.6434667732795634, "grad_norm": 0.7407933762182662, "learning_rate": 2.9792921031389767e-07, "loss": 0.2134, "step": 37018 }, { "epoch": 0.6434841558170662, "grad_norm": 1.0058473195755524, "learning_rate": 2.9790346253593156e-07, "loss": 0.2159, "step": 37019 }, { "epoch": 0.643501538354569, "grad_norm": 1.483420449792316, "learning_rate": 2.9787771539851867e-07, "loss": 0.2028, "step": 37020 }, { "epoch": 0.6435189208920719, "grad_norm": 1.5234055319322533, "learning_rate": 2.978519689017406e-07, "loss": 0.2614, "step": 37021 }, { "epoch": 0.6435363034295747, "grad_norm": 0.9784151657345108, "learning_rate": 2.978262230456788e-07, "loss": 0.1882, "step": 37022 }, { "epoch": 0.6435536859670775, "grad_norm": 1.4964966825390336, "learning_rate": 2.978004778304152e-07, "loss": 0.1431, "step": 37023 }, { "epoch": 0.6435710685045803, "grad_norm": 1.4991263879850487, "learning_rate": 2.9777473325603123e-07, "loss": 0.1985, "step": 37024 }, { "epoch": 0.6435884510420832, "grad_norm": 2.743215953077033, "learning_rate": 2.977489893226083e-07, "loss": 0.228, "step": 37025 }, { "epoch": 0.6436058335795859, "grad_norm": 1.3253488909908513, "learning_rate": 2.9772324603022824e-07, "loss": 0.204, "step": 37026 }, { "epoch": 0.6436232161170887, "grad_norm": 0.8007531542355815, "learning_rate": 2.9769750337897246e-07, "loss": 0.1933, "step": 37027 }, { "epoch": 0.6436405986545916, "grad_norm": 3.175851522068585, "learning_rate": 2.976717613689228e-07, "loss": 0.3213, "step": 37028 }, { "epoch": 0.6436579811920944, "grad_norm": 2.7577621123395684, "learning_rate": 2.976460200001607e-07, "loss": 0.1432, "step": 37029 }, { "epoch": 0.6436753637295972, "grad_norm": 1.1208276569307032, "learning_rate": 2.976202792727679e-07, "loss": 0.2049, "step": 37030 }, { "epoch": 0.6436927462671, "grad_norm": 1.5474193242941088, "learning_rate": 2.9759453918682556e-07, "loss": 0.22, "step": 37031 }, { "epoch": 0.6437101288046029, "grad_norm": 3.1589019652034542, "learning_rate": 2.975687997424157e-07, "loss": 0.2047, "step": 37032 }, { "epoch": 0.6437275113421057, "grad_norm": 1.0104433441107925, "learning_rate": 2.9754306093961967e-07, "loss": 0.1681, "step": 37033 }, { "epoch": 0.6437448938796085, "grad_norm": 5.729568899380231, "learning_rate": 2.9751732277851926e-07, "loss": 0.3051, "step": 37034 }, { "epoch": 0.6437622764171114, "grad_norm": 1.5158453028531766, "learning_rate": 2.9749158525919577e-07, "loss": 0.1716, "step": 37035 }, { "epoch": 0.6437796589546142, "grad_norm": 0.7264822590094158, "learning_rate": 2.974658483817313e-07, "loss": 0.1758, "step": 37036 }, { "epoch": 0.643797041492117, "grad_norm": 1.160327581517309, "learning_rate": 2.9744011214620665e-07, "loss": 0.1883, "step": 37037 }, { "epoch": 0.6438144240296199, "grad_norm": 1.6346974833149186, "learning_rate": 2.97414376552704e-07, "loss": 0.2128, "step": 37038 }, { "epoch": 0.6438318065671227, "grad_norm": 1.903083510351219, "learning_rate": 2.973886416013045e-07, "loss": 0.2786, "step": 37039 }, { "epoch": 0.6438491891046255, "grad_norm": 2.1290136602210303, "learning_rate": 2.973629072920901e-07, "loss": 0.2421, "step": 37040 }, { "epoch": 0.6438665716421283, "grad_norm": 1.5504392897989454, "learning_rate": 2.973371736251421e-07, "loss": 0.1937, "step": 37041 }, { "epoch": 0.6438839541796312, "grad_norm": 4.715243607859628, "learning_rate": 2.9731144060054227e-07, "loss": 0.2596, "step": 37042 }, { "epoch": 0.643901336717134, "grad_norm": 1.6539087304156839, "learning_rate": 2.9728570821837217e-07, "loss": 0.2547, "step": 37043 }, { "epoch": 0.6439187192546368, "grad_norm": 2.713383109223433, "learning_rate": 2.9725997647871313e-07, "loss": 0.233, "step": 37044 }, { "epoch": 0.6439361017921397, "grad_norm": 2.183756558205708, "learning_rate": 2.9723424538164675e-07, "loss": 0.2191, "step": 37045 }, { "epoch": 0.6439534843296424, "grad_norm": 1.1740189064493396, "learning_rate": 2.9720851492725476e-07, "loss": 0.1678, "step": 37046 }, { "epoch": 0.6439708668671452, "grad_norm": 1.0100855187261912, "learning_rate": 2.971827851156185e-07, "loss": 0.2146, "step": 37047 }, { "epoch": 0.643988249404648, "grad_norm": 2.671320151110192, "learning_rate": 2.971570559468198e-07, "loss": 0.2521, "step": 37048 }, { "epoch": 0.6440056319421509, "grad_norm": 1.5711408238793878, "learning_rate": 2.9713132742094013e-07, "loss": 0.2587, "step": 37049 }, { "epoch": 0.6440230144796537, "grad_norm": 3.637504983629728, "learning_rate": 2.9710559953806075e-07, "loss": 0.2589, "step": 37050 }, { "epoch": 0.6440403970171565, "grad_norm": 1.5104089952404711, "learning_rate": 2.9707987229826346e-07, "loss": 0.2915, "step": 37051 }, { "epoch": 0.6440577795546594, "grad_norm": 0.9824004175602555, "learning_rate": 2.9705414570162987e-07, "loss": 0.1185, "step": 37052 }, { "epoch": 0.6440751620921622, "grad_norm": 1.2851569732993853, "learning_rate": 2.9702841974824126e-07, "loss": 0.1655, "step": 37053 }, { "epoch": 0.644092544629665, "grad_norm": 1.5806476718779925, "learning_rate": 2.9700269443817947e-07, "loss": 0.1831, "step": 37054 }, { "epoch": 0.6441099271671679, "grad_norm": 1.798576632981753, "learning_rate": 2.969769697715259e-07, "loss": 0.243, "step": 37055 }, { "epoch": 0.6441273097046707, "grad_norm": 1.3599446031269073, "learning_rate": 2.9695124574836196e-07, "loss": 0.1884, "step": 37056 }, { "epoch": 0.6441446922421735, "grad_norm": 3.0501412598352267, "learning_rate": 2.9692552236876935e-07, "loss": 0.2908, "step": 37057 }, { "epoch": 0.6441620747796764, "grad_norm": 1.2068826218273474, "learning_rate": 2.9689979963282946e-07, "loss": 0.3312, "step": 37058 }, { "epoch": 0.6441794573171792, "grad_norm": 1.3967034008391928, "learning_rate": 2.9687407754062397e-07, "loss": 0.2448, "step": 37059 }, { "epoch": 0.644196839854682, "grad_norm": 1.9494492670767667, "learning_rate": 2.9684835609223435e-07, "loss": 0.3125, "step": 37060 }, { "epoch": 0.6442142223921848, "grad_norm": 1.0351269153954357, "learning_rate": 2.9682263528774213e-07, "loss": 0.1665, "step": 37061 }, { "epoch": 0.6442316049296877, "grad_norm": 1.7639106629078851, "learning_rate": 2.9679691512722894e-07, "loss": 0.1856, "step": 37062 }, { "epoch": 0.6442489874671905, "grad_norm": 1.9767467182360803, "learning_rate": 2.967711956107761e-07, "loss": 0.2168, "step": 37063 }, { "epoch": 0.6442663700046933, "grad_norm": 3.420967339395428, "learning_rate": 2.967454767384652e-07, "loss": 0.3408, "step": 37064 }, { "epoch": 0.6442837525421962, "grad_norm": 1.46345125010117, "learning_rate": 2.9671975851037774e-07, "loss": 0.1542, "step": 37065 }, { "epoch": 0.6443011350796989, "grad_norm": 3.1912139810051223, "learning_rate": 2.966940409265952e-07, "loss": 0.1937, "step": 37066 }, { "epoch": 0.6443185176172017, "grad_norm": 1.6338276494008723, "learning_rate": 2.9666832398719925e-07, "loss": 0.2447, "step": 37067 }, { "epoch": 0.6443359001547045, "grad_norm": 0.6293608299846867, "learning_rate": 2.9664260769227147e-07, "loss": 0.1571, "step": 37068 }, { "epoch": 0.6443532826922074, "grad_norm": 1.129133081993094, "learning_rate": 2.96616892041893e-07, "loss": 0.2786, "step": 37069 }, { "epoch": 0.6443706652297102, "grad_norm": 1.2874668896878565, "learning_rate": 2.9659117703614556e-07, "loss": 0.2008, "step": 37070 }, { "epoch": 0.644388047767213, "grad_norm": 1.3627525963519778, "learning_rate": 2.9656546267511074e-07, "loss": 0.2111, "step": 37071 }, { "epoch": 0.6444054303047159, "grad_norm": 1.442746460401717, "learning_rate": 2.965397489588698e-07, "loss": 0.1642, "step": 37072 }, { "epoch": 0.6444228128422187, "grad_norm": 1.3952134840509691, "learning_rate": 2.965140358875045e-07, "loss": 0.1821, "step": 37073 }, { "epoch": 0.6444401953797215, "grad_norm": 1.6012930222901987, "learning_rate": 2.9648832346109634e-07, "loss": 0.2379, "step": 37074 }, { "epoch": 0.6444575779172244, "grad_norm": 1.2339701845248514, "learning_rate": 2.964626116797265e-07, "loss": 0.1836, "step": 37075 }, { "epoch": 0.6444749604547272, "grad_norm": 1.25508283379356, "learning_rate": 2.9643690054347683e-07, "loss": 0.1368, "step": 37076 }, { "epoch": 0.64449234299223, "grad_norm": 5.3085896237096035, "learning_rate": 2.9641119005242866e-07, "loss": 0.2387, "step": 37077 }, { "epoch": 0.6445097255297328, "grad_norm": 1.1112331837574014, "learning_rate": 2.963854802066634e-07, "loss": 0.1569, "step": 37078 }, { "epoch": 0.6445271080672357, "grad_norm": 2.0419170974014738, "learning_rate": 2.9635977100626275e-07, "loss": 0.1716, "step": 37079 }, { "epoch": 0.6445444906047385, "grad_norm": 1.213532773526778, "learning_rate": 2.963340624513079e-07, "loss": 0.1668, "step": 37080 }, { "epoch": 0.6445618731422413, "grad_norm": 1.259459843536081, "learning_rate": 2.963083545418808e-07, "loss": 0.2061, "step": 37081 }, { "epoch": 0.6445792556797442, "grad_norm": 0.9693419290617177, "learning_rate": 2.962826472780625e-07, "loss": 0.238, "step": 37082 }, { "epoch": 0.644596638217247, "grad_norm": 1.5291687773964833, "learning_rate": 2.9625694065993454e-07, "loss": 0.1688, "step": 37083 }, { "epoch": 0.6446140207547498, "grad_norm": 1.9602028214565161, "learning_rate": 2.962312346875786e-07, "loss": 0.3493, "step": 37084 }, { "epoch": 0.6446314032922527, "grad_norm": 2.1402276693701157, "learning_rate": 2.96205529361076e-07, "loss": 0.2454, "step": 37085 }, { "epoch": 0.6446487858297554, "grad_norm": 0.9532813543284431, "learning_rate": 2.961798246805082e-07, "loss": 0.2012, "step": 37086 }, { "epoch": 0.6446661683672582, "grad_norm": 1.4477016572968868, "learning_rate": 2.961541206459569e-07, "loss": 0.2731, "step": 37087 }, { "epoch": 0.644683550904761, "grad_norm": 0.9909331898526024, "learning_rate": 2.961284172575034e-07, "loss": 0.2322, "step": 37088 }, { "epoch": 0.6447009334422639, "grad_norm": 1.192182585142328, "learning_rate": 2.961027145152289e-07, "loss": 0.3106, "step": 37089 }, { "epoch": 0.6447183159797667, "grad_norm": 1.4423316494675669, "learning_rate": 2.9607701241921535e-07, "loss": 0.1942, "step": 37090 }, { "epoch": 0.6447356985172695, "grad_norm": 2.692270175535978, "learning_rate": 2.960513109695438e-07, "loss": 0.2336, "step": 37091 }, { "epoch": 0.6447530810547724, "grad_norm": 1.395373968889613, "learning_rate": 2.960256101662961e-07, "loss": 0.2075, "step": 37092 }, { "epoch": 0.6447704635922752, "grad_norm": 1.0113931525747184, "learning_rate": 2.9599991000955357e-07, "loss": 0.2056, "step": 37093 }, { "epoch": 0.644787846129778, "grad_norm": 1.2063970117479645, "learning_rate": 2.959742104993976e-07, "loss": 0.1621, "step": 37094 }, { "epoch": 0.6448052286672808, "grad_norm": 1.170002957451051, "learning_rate": 2.959485116359095e-07, "loss": 0.1569, "step": 37095 }, { "epoch": 0.6448226112047837, "grad_norm": 1.138205615053766, "learning_rate": 2.95922813419171e-07, "loss": 0.1472, "step": 37096 }, { "epoch": 0.6448399937422865, "grad_norm": 1.546913165942543, "learning_rate": 2.9589711584926325e-07, "loss": 0.2273, "step": 37097 }, { "epoch": 0.6448573762797893, "grad_norm": 1.1629641277263667, "learning_rate": 2.958714189262681e-07, "loss": 0.1994, "step": 37098 }, { "epoch": 0.6448747588172922, "grad_norm": 1.3131589401681998, "learning_rate": 2.9584572265026665e-07, "loss": 0.2669, "step": 37099 }, { "epoch": 0.644892141354795, "grad_norm": 1.6558829850802324, "learning_rate": 2.958200270213407e-07, "loss": 0.2269, "step": 37100 }, { "epoch": 0.6449095238922978, "grad_norm": 0.919753180399308, "learning_rate": 2.957943320395713e-07, "loss": 0.3288, "step": 37101 }, { "epoch": 0.6449269064298007, "grad_norm": 2.531262180280316, "learning_rate": 2.9576863770504004e-07, "loss": 0.2221, "step": 37102 }, { "epoch": 0.6449442889673035, "grad_norm": 1.7274941642955106, "learning_rate": 2.9574294401782836e-07, "loss": 0.2353, "step": 37103 }, { "epoch": 0.6449616715048063, "grad_norm": 1.8144197596360883, "learning_rate": 2.957172509780178e-07, "loss": 0.3739, "step": 37104 }, { "epoch": 0.644979054042309, "grad_norm": 1.1267092531826026, "learning_rate": 2.956915585856896e-07, "loss": 0.2191, "step": 37105 }, { "epoch": 0.6449964365798119, "grad_norm": 1.4928860463025293, "learning_rate": 2.9566586684092556e-07, "loss": 0.2702, "step": 37106 }, { "epoch": 0.6450138191173147, "grad_norm": 1.443548196684623, "learning_rate": 2.9564017574380665e-07, "loss": 0.1918, "step": 37107 }, { "epoch": 0.6450312016548175, "grad_norm": 1.7386005284210326, "learning_rate": 2.9561448529441447e-07, "loss": 0.2953, "step": 37108 }, { "epoch": 0.6450485841923204, "grad_norm": 1.3082703927089985, "learning_rate": 2.955887954928306e-07, "loss": 0.2383, "step": 37109 }, { "epoch": 0.6450659667298232, "grad_norm": 0.9190577841302815, "learning_rate": 2.9556310633913635e-07, "loss": 0.4348, "step": 37110 }, { "epoch": 0.645083349267326, "grad_norm": 2.662166972149267, "learning_rate": 2.9553741783341304e-07, "loss": 0.2699, "step": 37111 }, { "epoch": 0.6451007318048289, "grad_norm": 2.534296442403659, "learning_rate": 2.9551172997574236e-07, "loss": 0.3632, "step": 37112 }, { "epoch": 0.6451181143423317, "grad_norm": 3.596732905131479, "learning_rate": 2.9548604276620546e-07, "loss": 0.398, "step": 37113 }, { "epoch": 0.6451354968798345, "grad_norm": 0.7531165226887695, "learning_rate": 2.9546035620488387e-07, "loss": 0.1286, "step": 37114 }, { "epoch": 0.6451528794173373, "grad_norm": 1.8938860077702762, "learning_rate": 2.9543467029185896e-07, "loss": 0.21, "step": 37115 }, { "epoch": 0.6451702619548402, "grad_norm": 1.2278483054437521, "learning_rate": 2.9540898502721214e-07, "loss": 0.2471, "step": 37116 }, { "epoch": 0.645187644492343, "grad_norm": 3.382576688821649, "learning_rate": 2.9538330041102496e-07, "loss": 0.2956, "step": 37117 }, { "epoch": 0.6452050270298458, "grad_norm": 1.5669165900272177, "learning_rate": 2.9535761644337876e-07, "loss": 0.219, "step": 37118 }, { "epoch": 0.6452224095673487, "grad_norm": 1.253518732720833, "learning_rate": 2.953319331243549e-07, "loss": 0.2229, "step": 37119 }, { "epoch": 0.6452397921048515, "grad_norm": 1.2335883528084413, "learning_rate": 2.953062504540346e-07, "loss": 0.1836, "step": 37120 }, { "epoch": 0.6452571746423543, "grad_norm": 1.8261318158486912, "learning_rate": 2.952805684324996e-07, "loss": 0.3075, "step": 37121 }, { "epoch": 0.6452745571798572, "grad_norm": 1.4493689607032654, "learning_rate": 2.952548870598311e-07, "loss": 0.2504, "step": 37122 }, { "epoch": 0.64529193971736, "grad_norm": 1.5206186744880632, "learning_rate": 2.9522920633611057e-07, "loss": 0.1421, "step": 37123 }, { "epoch": 0.6453093222548628, "grad_norm": 2.5180334913669964, "learning_rate": 2.9520352626141944e-07, "loss": 0.4978, "step": 37124 }, { "epoch": 0.6453267047923655, "grad_norm": 2.308428750287621, "learning_rate": 2.9517784683583924e-07, "loss": 0.241, "step": 37125 }, { "epoch": 0.6453440873298684, "grad_norm": 1.1695252171955186, "learning_rate": 2.951521680594509e-07, "loss": 0.2366, "step": 37126 }, { "epoch": 0.6453614698673712, "grad_norm": 1.937077187040992, "learning_rate": 2.9512648993233615e-07, "loss": 0.2191, "step": 37127 }, { "epoch": 0.645378852404874, "grad_norm": 1.5532563504053758, "learning_rate": 2.9510081245457624e-07, "loss": 0.1691, "step": 37128 }, { "epoch": 0.6453962349423769, "grad_norm": 1.6485025541916651, "learning_rate": 2.9507513562625275e-07, "loss": 0.2891, "step": 37129 }, { "epoch": 0.6454136174798797, "grad_norm": 1.7688272276079273, "learning_rate": 2.950494594474469e-07, "loss": 0.2233, "step": 37130 }, { "epoch": 0.6454310000173825, "grad_norm": 2.2471946973053694, "learning_rate": 2.9502378391824016e-07, "loss": 0.1917, "step": 37131 }, { "epoch": 0.6454483825548853, "grad_norm": 1.5533754614078346, "learning_rate": 2.949981090387139e-07, "loss": 0.3126, "step": 37132 }, { "epoch": 0.6454657650923882, "grad_norm": 2.128490621472719, "learning_rate": 2.949724348089494e-07, "loss": 0.2794, "step": 37133 }, { "epoch": 0.645483147629891, "grad_norm": 1.3938551493241307, "learning_rate": 2.9494676122902805e-07, "loss": 0.1439, "step": 37134 }, { "epoch": 0.6455005301673938, "grad_norm": 1.6198774323868512, "learning_rate": 2.9492108829903135e-07, "loss": 0.1703, "step": 37135 }, { "epoch": 0.6455179127048967, "grad_norm": 0.9446736534843324, "learning_rate": 2.948954160190404e-07, "loss": 0.1676, "step": 37136 }, { "epoch": 0.6455352952423995, "grad_norm": 2.0026028698052407, "learning_rate": 2.948697443891371e-07, "loss": 0.2359, "step": 37137 }, { "epoch": 0.6455526777799023, "grad_norm": 1.6686707091154518, "learning_rate": 2.948440734094024e-07, "loss": 0.3017, "step": 37138 }, { "epoch": 0.6455700603174052, "grad_norm": 1.2045268234392643, "learning_rate": 2.948184030799176e-07, "loss": 0.1866, "step": 37139 }, { "epoch": 0.645587442854908, "grad_norm": 1.2320516862290478, "learning_rate": 2.947927334007643e-07, "loss": 0.3091, "step": 37140 }, { "epoch": 0.6456048253924108, "grad_norm": 0.9697106137259973, "learning_rate": 2.947670643720237e-07, "loss": 0.2764, "step": 37141 }, { "epoch": 0.6456222079299136, "grad_norm": 2.504758283712152, "learning_rate": 2.947413959937772e-07, "loss": 0.2913, "step": 37142 }, { "epoch": 0.6456395904674165, "grad_norm": 1.0951637587147538, "learning_rate": 2.947157282661063e-07, "loss": 0.1462, "step": 37143 }, { "epoch": 0.6456569730049193, "grad_norm": 1.2374044727657756, "learning_rate": 2.9469006118909233e-07, "loss": 0.2618, "step": 37144 }, { "epoch": 0.645674355542422, "grad_norm": 2.4184773688194996, "learning_rate": 2.9466439476281635e-07, "loss": 0.2324, "step": 37145 }, { "epoch": 0.6456917380799249, "grad_norm": 1.9659756098164107, "learning_rate": 2.9463872898736e-07, "loss": 0.2726, "step": 37146 }, { "epoch": 0.6457091206174277, "grad_norm": 1.3810987345600774, "learning_rate": 2.9461306386280445e-07, "loss": 0.2326, "step": 37147 }, { "epoch": 0.6457265031549305, "grad_norm": 2.0577370292871087, "learning_rate": 2.9458739938923126e-07, "loss": 0.1926, "step": 37148 }, { "epoch": 0.6457438856924334, "grad_norm": 1.6509901342338755, "learning_rate": 2.945617355667216e-07, "loss": 0.172, "step": 37149 }, { "epoch": 0.6457612682299362, "grad_norm": 3.9615630490063563, "learning_rate": 2.9453607239535706e-07, "loss": 0.3158, "step": 37150 }, { "epoch": 0.645778650767439, "grad_norm": 1.53134227636264, "learning_rate": 2.9451040987521845e-07, "loss": 0.1967, "step": 37151 }, { "epoch": 0.6457960333049418, "grad_norm": 1.0780034144605048, "learning_rate": 2.944847480063877e-07, "loss": 0.2866, "step": 37152 }, { "epoch": 0.6458134158424447, "grad_norm": 2.544742022875603, "learning_rate": 2.9445908678894573e-07, "loss": 0.2147, "step": 37153 }, { "epoch": 0.6458307983799475, "grad_norm": 2.5933223021100678, "learning_rate": 2.9443342622297413e-07, "loss": 0.3191, "step": 37154 }, { "epoch": 0.6458481809174503, "grad_norm": 1.9523521656716925, "learning_rate": 2.9440776630855404e-07, "loss": 0.1677, "step": 37155 }, { "epoch": 0.6458655634549532, "grad_norm": 1.195567104072527, "learning_rate": 2.9438210704576714e-07, "loss": 0.3396, "step": 37156 }, { "epoch": 0.645882945992456, "grad_norm": 2.2369325646485003, "learning_rate": 2.943564484346943e-07, "loss": 0.1832, "step": 37157 }, { "epoch": 0.6459003285299588, "grad_norm": 1.2900840972241747, "learning_rate": 2.9433079047541706e-07, "loss": 0.1816, "step": 37158 }, { "epoch": 0.6459177110674617, "grad_norm": 0.9940986944854826, "learning_rate": 2.9430513316801673e-07, "loss": 0.2053, "step": 37159 }, { "epoch": 0.6459350936049645, "grad_norm": 1.2539892643165669, "learning_rate": 2.942794765125747e-07, "loss": 0.257, "step": 37160 }, { "epoch": 0.6459524761424673, "grad_norm": 0.898335329852667, "learning_rate": 2.942538205091721e-07, "loss": 0.1996, "step": 37161 }, { "epoch": 0.6459698586799701, "grad_norm": 1.6510868506323626, "learning_rate": 2.942281651578905e-07, "loss": 0.2757, "step": 37162 }, { "epoch": 0.645987241217473, "grad_norm": 1.8639732225571517, "learning_rate": 2.942025104588113e-07, "loss": 0.2132, "step": 37163 }, { "epoch": 0.6460046237549758, "grad_norm": 4.3314095825241345, "learning_rate": 2.941768564120153e-07, "loss": 0.3109, "step": 37164 }, { "epoch": 0.6460220062924785, "grad_norm": 1.6044593555777975, "learning_rate": 2.941512030175842e-07, "loss": 0.1688, "step": 37165 }, { "epoch": 0.6460393888299814, "grad_norm": 2.4882296044741943, "learning_rate": 2.941255502755992e-07, "loss": 0.1999, "step": 37166 }, { "epoch": 0.6460567713674842, "grad_norm": 1.5327343567907854, "learning_rate": 2.940998981861416e-07, "loss": 0.1956, "step": 37167 }, { "epoch": 0.646074153904987, "grad_norm": 1.2270185278172647, "learning_rate": 2.9407424674929285e-07, "loss": 0.189, "step": 37168 }, { "epoch": 0.6460915364424898, "grad_norm": 4.1174552242393325, "learning_rate": 2.940485959651342e-07, "loss": 0.3872, "step": 37169 }, { "epoch": 0.6461089189799927, "grad_norm": 1.6765539812842243, "learning_rate": 2.9402294583374673e-07, "loss": 0.3433, "step": 37170 }, { "epoch": 0.6461263015174955, "grad_norm": 1.5372047742813841, "learning_rate": 2.93997296355212e-07, "loss": 0.1115, "step": 37171 }, { "epoch": 0.6461436840549983, "grad_norm": 1.424174587247265, "learning_rate": 2.939716475296111e-07, "loss": 0.325, "step": 37172 }, { "epoch": 0.6461610665925012, "grad_norm": 1.373631460145615, "learning_rate": 2.9394599935702545e-07, "loss": 0.2628, "step": 37173 }, { "epoch": 0.646178449130004, "grad_norm": 1.6643139004760057, "learning_rate": 2.939203518375364e-07, "loss": 0.359, "step": 37174 }, { "epoch": 0.6461958316675068, "grad_norm": 2.3117216676238828, "learning_rate": 2.938947049712253e-07, "loss": 0.2998, "step": 37175 }, { "epoch": 0.6462132142050097, "grad_norm": 1.1603267581868386, "learning_rate": 2.9386905875817315e-07, "loss": 0.3884, "step": 37176 }, { "epoch": 0.6462305967425125, "grad_norm": 1.2177266130716933, "learning_rate": 2.938434131984614e-07, "loss": 0.1809, "step": 37177 }, { "epoch": 0.6462479792800153, "grad_norm": 2.7880445363981354, "learning_rate": 2.9381776829217123e-07, "loss": 0.307, "step": 37178 }, { "epoch": 0.6462653618175181, "grad_norm": 1.2982042734720245, "learning_rate": 2.937921240393842e-07, "loss": 0.2957, "step": 37179 }, { "epoch": 0.646282744355021, "grad_norm": 1.608512128887912, "learning_rate": 2.9376648044018127e-07, "loss": 0.2761, "step": 37180 }, { "epoch": 0.6463001268925238, "grad_norm": 1.4747080515250535, "learning_rate": 2.9374083749464395e-07, "loss": 0.3003, "step": 37181 }, { "epoch": 0.6463175094300266, "grad_norm": 1.437624590867989, "learning_rate": 2.9371519520285354e-07, "loss": 0.2751, "step": 37182 }, { "epoch": 0.6463348919675295, "grad_norm": 1.347662727423698, "learning_rate": 2.936895535648911e-07, "loss": 0.2378, "step": 37183 }, { "epoch": 0.6463522745050323, "grad_norm": 1.6418653354710315, "learning_rate": 2.936639125808379e-07, "loss": 0.2942, "step": 37184 }, { "epoch": 0.646369657042535, "grad_norm": 1.9556108264830334, "learning_rate": 2.9363827225077544e-07, "loss": 0.2734, "step": 37185 }, { "epoch": 0.6463870395800378, "grad_norm": 1.6571448903500983, "learning_rate": 2.936126325747847e-07, "loss": 0.191, "step": 37186 }, { "epoch": 0.6464044221175407, "grad_norm": 1.4625439560121516, "learning_rate": 2.9358699355294726e-07, "loss": 0.1902, "step": 37187 }, { "epoch": 0.6464218046550435, "grad_norm": 1.3448585519288596, "learning_rate": 2.935613551853443e-07, "loss": 0.1202, "step": 37188 }, { "epoch": 0.6464391871925463, "grad_norm": 2.249377563527221, "learning_rate": 2.9353571747205696e-07, "loss": 0.1788, "step": 37189 }, { "epoch": 0.6464565697300492, "grad_norm": 1.0186478682516134, "learning_rate": 2.935100804131664e-07, "loss": 0.178, "step": 37190 }, { "epoch": 0.646473952267552, "grad_norm": 1.473558327861957, "learning_rate": 2.9348444400875417e-07, "loss": 0.2604, "step": 37191 }, { "epoch": 0.6464913348050548, "grad_norm": 1.673216933314178, "learning_rate": 2.9345880825890123e-07, "loss": 0.2121, "step": 37192 }, { "epoch": 0.6465087173425577, "grad_norm": 1.6927544283279141, "learning_rate": 2.9343317316368914e-07, "loss": 0.2469, "step": 37193 }, { "epoch": 0.6465260998800605, "grad_norm": 1.1425457931148602, "learning_rate": 2.934075387231991e-07, "loss": 0.298, "step": 37194 }, { "epoch": 0.6465434824175633, "grad_norm": 1.5680140159223765, "learning_rate": 2.933819049375121e-07, "loss": 0.1688, "step": 37195 }, { "epoch": 0.6465608649550662, "grad_norm": 2.068156621874362, "learning_rate": 2.9335627180670957e-07, "loss": 0.2084, "step": 37196 }, { "epoch": 0.646578247492569, "grad_norm": 1.3569212281710754, "learning_rate": 2.9333063933087276e-07, "loss": 0.2022, "step": 37197 }, { "epoch": 0.6465956300300718, "grad_norm": 1.4438360702114907, "learning_rate": 2.933050075100828e-07, "loss": 0.205, "step": 37198 }, { "epoch": 0.6466130125675746, "grad_norm": 1.0476647865199578, "learning_rate": 2.932793763444211e-07, "loss": 0.2079, "step": 37199 }, { "epoch": 0.6466303951050775, "grad_norm": 1.3639474833288667, "learning_rate": 2.932537458339688e-07, "loss": 0.2102, "step": 37200 }, { "epoch": 0.6466477776425803, "grad_norm": 1.658040938677043, "learning_rate": 2.9322811597880727e-07, "loss": 0.1892, "step": 37201 }, { "epoch": 0.6466651601800831, "grad_norm": 1.4505378815668801, "learning_rate": 2.9320248677901755e-07, "loss": 0.2104, "step": 37202 }, { "epoch": 0.646682542717586, "grad_norm": 1.2643251180914767, "learning_rate": 2.931768582346809e-07, "loss": 0.2837, "step": 37203 }, { "epoch": 0.6466999252550888, "grad_norm": 1.697509766747505, "learning_rate": 2.9315123034587856e-07, "loss": 0.1172, "step": 37204 }, { "epoch": 0.6467173077925915, "grad_norm": 2.2470912529273845, "learning_rate": 2.9312560311269187e-07, "loss": 0.2697, "step": 37205 }, { "epoch": 0.6467346903300943, "grad_norm": 1.528152011265334, "learning_rate": 2.9309997653520197e-07, "loss": 0.1863, "step": 37206 }, { "epoch": 0.6467520728675972, "grad_norm": 1.8129635083855262, "learning_rate": 2.930743506134902e-07, "loss": 0.2318, "step": 37207 }, { "epoch": 0.6467694554051, "grad_norm": 1.8072696476457193, "learning_rate": 2.930487253476377e-07, "loss": 0.3557, "step": 37208 }, { "epoch": 0.6467868379426028, "grad_norm": 1.2669013438044376, "learning_rate": 2.9302310073772544e-07, "loss": 0.1689, "step": 37209 }, { "epoch": 0.6468042204801057, "grad_norm": 1.8121299410793719, "learning_rate": 2.92997476783835e-07, "loss": 0.2495, "step": 37210 }, { "epoch": 0.6468216030176085, "grad_norm": 2.2489058037501586, "learning_rate": 2.9297185348604747e-07, "loss": 0.1959, "step": 37211 }, { "epoch": 0.6468389855551113, "grad_norm": 2.012075083369187, "learning_rate": 2.9294623084444407e-07, "loss": 0.3161, "step": 37212 }, { "epoch": 0.6468563680926142, "grad_norm": 1.0895556310115364, "learning_rate": 2.9292060885910617e-07, "loss": 0.3048, "step": 37213 }, { "epoch": 0.646873750630117, "grad_norm": 2.5686319552908956, "learning_rate": 2.928949875301147e-07, "loss": 0.1747, "step": 37214 }, { "epoch": 0.6468911331676198, "grad_norm": 1.6924485802002618, "learning_rate": 2.928693668575508e-07, "loss": 0.1829, "step": 37215 }, { "epoch": 0.6469085157051226, "grad_norm": 1.7866148241016326, "learning_rate": 2.9284374684149606e-07, "loss": 0.2534, "step": 37216 }, { "epoch": 0.6469258982426255, "grad_norm": 0.8196867476783605, "learning_rate": 2.9281812748203134e-07, "loss": 0.1957, "step": 37217 }, { "epoch": 0.6469432807801283, "grad_norm": 1.7812537774594284, "learning_rate": 2.9279250877923813e-07, "loss": 0.1684, "step": 37218 }, { "epoch": 0.6469606633176311, "grad_norm": 1.456981266055786, "learning_rate": 2.9276689073319727e-07, "loss": 0.1318, "step": 37219 }, { "epoch": 0.646978045855134, "grad_norm": 2.353150330526127, "learning_rate": 2.927412733439905e-07, "loss": 0.2417, "step": 37220 }, { "epoch": 0.6469954283926368, "grad_norm": 1.1562069992315371, "learning_rate": 2.927156566116985e-07, "loss": 0.3833, "step": 37221 }, { "epoch": 0.6470128109301396, "grad_norm": 1.2862256908790797, "learning_rate": 2.9269004053640267e-07, "loss": 0.286, "step": 37222 }, { "epoch": 0.6470301934676425, "grad_norm": 1.3480564278259213, "learning_rate": 2.926644251181841e-07, "loss": 0.1958, "step": 37223 }, { "epoch": 0.6470475760051453, "grad_norm": 1.2881104676523691, "learning_rate": 2.926388103571241e-07, "loss": 0.2328, "step": 37224 }, { "epoch": 0.647064958542648, "grad_norm": 1.228394467452757, "learning_rate": 2.9261319625330385e-07, "loss": 0.2458, "step": 37225 }, { "epoch": 0.6470823410801508, "grad_norm": 1.276944932699409, "learning_rate": 2.925875828068046e-07, "loss": 0.1704, "step": 37226 }, { "epoch": 0.6470997236176537, "grad_norm": 0.9980340150826144, "learning_rate": 2.9256197001770735e-07, "loss": 0.4097, "step": 37227 }, { "epoch": 0.6471171061551565, "grad_norm": 1.84008691733521, "learning_rate": 2.9253635788609325e-07, "loss": 0.199, "step": 37228 }, { "epoch": 0.6471344886926593, "grad_norm": 1.3253792065336922, "learning_rate": 2.9251074641204375e-07, "loss": 0.169, "step": 37229 }, { "epoch": 0.6471518712301622, "grad_norm": 2.508123264649962, "learning_rate": 2.924851355956398e-07, "loss": 0.2636, "step": 37230 }, { "epoch": 0.647169253767665, "grad_norm": 1.2565458565424263, "learning_rate": 2.9245952543696263e-07, "loss": 0.1501, "step": 37231 }, { "epoch": 0.6471866363051678, "grad_norm": 1.8673600952585643, "learning_rate": 2.924339159360936e-07, "loss": 0.2099, "step": 37232 }, { "epoch": 0.6472040188426706, "grad_norm": 1.8934426699801146, "learning_rate": 2.9240830709311355e-07, "loss": 0.3046, "step": 37233 }, { "epoch": 0.6472214013801735, "grad_norm": 1.113041694589755, "learning_rate": 2.923826989081037e-07, "loss": 0.1885, "step": 37234 }, { "epoch": 0.6472387839176763, "grad_norm": 1.9343108557399489, "learning_rate": 2.923570913811455e-07, "loss": 0.2056, "step": 37235 }, { "epoch": 0.6472561664551791, "grad_norm": 1.9055937305233923, "learning_rate": 2.9233148451231984e-07, "loss": 0.16, "step": 37236 }, { "epoch": 0.647273548992682, "grad_norm": 1.193125476566436, "learning_rate": 2.9230587830170803e-07, "loss": 0.208, "step": 37237 }, { "epoch": 0.6472909315301848, "grad_norm": 1.6796307360079343, "learning_rate": 2.922802727493913e-07, "loss": 0.1413, "step": 37238 }, { "epoch": 0.6473083140676876, "grad_norm": 1.6823854707911563, "learning_rate": 2.922546678554506e-07, "loss": 0.2638, "step": 37239 }, { "epoch": 0.6473256966051905, "grad_norm": 1.2158859819171037, "learning_rate": 2.9222906361996703e-07, "loss": 0.1653, "step": 37240 }, { "epoch": 0.6473430791426933, "grad_norm": 1.1556139475950715, "learning_rate": 2.92203460043022e-07, "loss": 0.339, "step": 37241 }, { "epoch": 0.6473604616801961, "grad_norm": 1.5384987196448738, "learning_rate": 2.9217785712469645e-07, "loss": 0.3116, "step": 37242 }, { "epoch": 0.647377844217699, "grad_norm": 1.450967199489303, "learning_rate": 2.9215225486507173e-07, "loss": 0.1654, "step": 37243 }, { "epoch": 0.6473952267552017, "grad_norm": 1.1633160777375566, "learning_rate": 2.921266532642288e-07, "loss": 0.1885, "step": 37244 }, { "epoch": 0.6474126092927045, "grad_norm": 1.1689341176526664, "learning_rate": 2.9210105232224916e-07, "loss": 0.1981, "step": 37245 }, { "epoch": 0.6474299918302073, "grad_norm": 1.224973133728726, "learning_rate": 2.920754520392134e-07, "loss": 0.4257, "step": 37246 }, { "epoch": 0.6474473743677102, "grad_norm": 2.206972584840883, "learning_rate": 2.920498524152031e-07, "loss": 0.181, "step": 37247 }, { "epoch": 0.647464756905213, "grad_norm": 2.2878023870547706, "learning_rate": 2.9202425345029903e-07, "loss": 0.267, "step": 37248 }, { "epoch": 0.6474821394427158, "grad_norm": 1.7254079961889983, "learning_rate": 2.919986551445827e-07, "loss": 0.2476, "step": 37249 }, { "epoch": 0.6474995219802187, "grad_norm": 2.0261062949296305, "learning_rate": 2.91973057498135e-07, "loss": 0.3511, "step": 37250 }, { "epoch": 0.6475169045177215, "grad_norm": 1.5378013357752556, "learning_rate": 2.9194746051103733e-07, "loss": 0.2343, "step": 37251 }, { "epoch": 0.6475342870552243, "grad_norm": 3.2397511023103522, "learning_rate": 2.919218641833704e-07, "loss": 0.3049, "step": 37252 }, { "epoch": 0.6475516695927271, "grad_norm": 1.4296069002579088, "learning_rate": 2.9189626851521576e-07, "loss": 0.1985, "step": 37253 }, { "epoch": 0.64756905213023, "grad_norm": 1.3119913675745614, "learning_rate": 2.918706735066543e-07, "loss": 0.3633, "step": 37254 }, { "epoch": 0.6475864346677328, "grad_norm": 1.5921710245495815, "learning_rate": 2.9184507915776724e-07, "loss": 0.2831, "step": 37255 }, { "epoch": 0.6476038172052356, "grad_norm": 1.703807093732504, "learning_rate": 2.9181948546863566e-07, "loss": 0.3227, "step": 37256 }, { "epoch": 0.6476211997427385, "grad_norm": 1.3013210456566473, "learning_rate": 2.917938924393406e-07, "loss": 0.2343, "step": 37257 }, { "epoch": 0.6476385822802413, "grad_norm": 2.371427928550494, "learning_rate": 2.9176830006996333e-07, "loss": 0.262, "step": 37258 }, { "epoch": 0.6476559648177441, "grad_norm": 1.3047835797431646, "learning_rate": 2.917427083605849e-07, "loss": 0.3109, "step": 37259 }, { "epoch": 0.647673347355247, "grad_norm": 2.0890605978959362, "learning_rate": 2.917171173112862e-07, "loss": 0.2668, "step": 37260 }, { "epoch": 0.6476907298927498, "grad_norm": 1.3818956218058127, "learning_rate": 2.916915269221487e-07, "loss": 0.2425, "step": 37261 }, { "epoch": 0.6477081124302526, "grad_norm": 2.082113705508544, "learning_rate": 2.916659371932534e-07, "loss": 0.2051, "step": 37262 }, { "epoch": 0.6477254949677554, "grad_norm": 1.2770231502702056, "learning_rate": 2.916403481246815e-07, "loss": 0.1963, "step": 37263 }, { "epoch": 0.6477428775052582, "grad_norm": 1.9183992888472665, "learning_rate": 2.9161475971651386e-07, "loss": 0.384, "step": 37264 }, { "epoch": 0.647760260042761, "grad_norm": 1.510800914415921, "learning_rate": 2.915891719688317e-07, "loss": 0.1761, "step": 37265 }, { "epoch": 0.6477776425802638, "grad_norm": 0.8182713872677897, "learning_rate": 2.9156358488171617e-07, "loss": 0.1995, "step": 37266 }, { "epoch": 0.6477950251177667, "grad_norm": 0.826853357569096, "learning_rate": 2.9153799845524836e-07, "loss": 0.1724, "step": 37267 }, { "epoch": 0.6478124076552695, "grad_norm": 1.423057770794773, "learning_rate": 2.9151241268950913e-07, "loss": 0.2658, "step": 37268 }, { "epoch": 0.6478297901927723, "grad_norm": 2.102852657131594, "learning_rate": 2.914868275845799e-07, "loss": 0.2791, "step": 37269 }, { "epoch": 0.6478471727302751, "grad_norm": 1.353657369632038, "learning_rate": 2.914612431405419e-07, "loss": 0.2086, "step": 37270 }, { "epoch": 0.647864555267778, "grad_norm": 0.9652231310436048, "learning_rate": 2.914356593574757e-07, "loss": 0.1178, "step": 37271 }, { "epoch": 0.6478819378052808, "grad_norm": 2.0025748514288213, "learning_rate": 2.9141007623546256e-07, "loss": 0.1856, "step": 37272 }, { "epoch": 0.6478993203427836, "grad_norm": 1.9702827018280815, "learning_rate": 2.913844937745838e-07, "loss": 0.2617, "step": 37273 }, { "epoch": 0.6479167028802865, "grad_norm": 1.7775558796531812, "learning_rate": 2.9135891197492037e-07, "loss": 0.2547, "step": 37274 }, { "epoch": 0.6479340854177893, "grad_norm": 1.268300574296989, "learning_rate": 2.9133333083655336e-07, "loss": 0.1635, "step": 37275 }, { "epoch": 0.6479514679552921, "grad_norm": 2.053522575717349, "learning_rate": 2.9130775035956387e-07, "loss": 0.2612, "step": 37276 }, { "epoch": 0.647968850492795, "grad_norm": 1.5364739017923161, "learning_rate": 2.912821705440329e-07, "loss": 0.1908, "step": 37277 }, { "epoch": 0.6479862330302978, "grad_norm": 1.0531901105338142, "learning_rate": 2.9125659139004157e-07, "loss": 0.1978, "step": 37278 }, { "epoch": 0.6480036155678006, "grad_norm": 1.4187720665808583, "learning_rate": 2.912310128976709e-07, "loss": 0.192, "step": 37279 }, { "epoch": 0.6480209981053034, "grad_norm": 1.9641139704536934, "learning_rate": 2.9120543506700193e-07, "loss": 0.2507, "step": 37280 }, { "epoch": 0.6480383806428063, "grad_norm": 0.8672028247819515, "learning_rate": 2.911798578981159e-07, "loss": 0.1921, "step": 37281 }, { "epoch": 0.6480557631803091, "grad_norm": 1.076397673171307, "learning_rate": 2.9115428139109393e-07, "loss": 0.2332, "step": 37282 }, { "epoch": 0.6480731457178119, "grad_norm": 1.0510143839519193, "learning_rate": 2.9112870554601687e-07, "loss": 0.2472, "step": 37283 }, { "epoch": 0.6480905282553147, "grad_norm": 1.1523701881191837, "learning_rate": 2.9110313036296593e-07, "loss": 0.2549, "step": 37284 }, { "epoch": 0.6481079107928175, "grad_norm": 2.100035300936159, "learning_rate": 2.91077555842022e-07, "loss": 0.1761, "step": 37285 }, { "epoch": 0.6481252933303203, "grad_norm": 2.2920448164677523, "learning_rate": 2.9105198198326633e-07, "loss": 0.2522, "step": 37286 }, { "epoch": 0.6481426758678231, "grad_norm": 1.0519885852346795, "learning_rate": 2.9102640878677987e-07, "loss": 0.272, "step": 37287 }, { "epoch": 0.648160058405326, "grad_norm": 1.0241617128175096, "learning_rate": 2.9100083625264347e-07, "loss": 0.2089, "step": 37288 }, { "epoch": 0.6481774409428288, "grad_norm": 1.3706854511205862, "learning_rate": 2.909752643809389e-07, "loss": 0.3322, "step": 37289 }, { "epoch": 0.6481948234803316, "grad_norm": 2.1667041646059357, "learning_rate": 2.9094969317174646e-07, "loss": 0.147, "step": 37290 }, { "epoch": 0.6482122060178345, "grad_norm": 0.7696524585527253, "learning_rate": 2.9092412262514725e-07, "loss": 0.2755, "step": 37291 }, { "epoch": 0.6482295885553373, "grad_norm": 1.9683042826100026, "learning_rate": 2.908985527412228e-07, "loss": 0.2166, "step": 37292 }, { "epoch": 0.6482469710928401, "grad_norm": 1.5378882614460236, "learning_rate": 2.9087298352005386e-07, "loss": 0.2677, "step": 37293 }, { "epoch": 0.648264353630343, "grad_norm": 1.4727509469068818, "learning_rate": 2.9084741496172145e-07, "loss": 0.2782, "step": 37294 }, { "epoch": 0.6482817361678458, "grad_norm": 1.367451052619783, "learning_rate": 2.908218470663066e-07, "loss": 0.2078, "step": 37295 }, { "epoch": 0.6482991187053486, "grad_norm": 1.622257449989857, "learning_rate": 2.9079627983389053e-07, "loss": 0.2894, "step": 37296 }, { "epoch": 0.6483165012428515, "grad_norm": 2.7512977121271813, "learning_rate": 2.9077071326455417e-07, "loss": 0.3507, "step": 37297 }, { "epoch": 0.6483338837803543, "grad_norm": 0.9979481481009644, "learning_rate": 2.9074514735837843e-07, "loss": 0.213, "step": 37298 }, { "epoch": 0.6483512663178571, "grad_norm": 1.9304219735320345, "learning_rate": 2.907195821154443e-07, "loss": 0.2127, "step": 37299 }, { "epoch": 0.6483686488553599, "grad_norm": 1.634426257511869, "learning_rate": 2.906940175358331e-07, "loss": 0.2126, "step": 37300 }, { "epoch": 0.6483860313928628, "grad_norm": 2.0750069868550365, "learning_rate": 2.906684536196258e-07, "loss": 0.2861, "step": 37301 }, { "epoch": 0.6484034139303656, "grad_norm": 1.149255425560726, "learning_rate": 2.9064289036690356e-07, "loss": 0.3709, "step": 37302 }, { "epoch": 0.6484207964678684, "grad_norm": 1.4545882657707154, "learning_rate": 2.9061732777774675e-07, "loss": 0.2047, "step": 37303 }, { "epoch": 0.6484381790053712, "grad_norm": 1.6272958517773997, "learning_rate": 2.9059176585223697e-07, "loss": 0.2782, "step": 37304 }, { "epoch": 0.648455561542874, "grad_norm": 1.6919374706921961, "learning_rate": 2.9056620459045516e-07, "loss": 0.3577, "step": 37305 }, { "epoch": 0.6484729440803768, "grad_norm": 1.6415924332088982, "learning_rate": 2.905406439924822e-07, "loss": 0.3134, "step": 37306 }, { "epoch": 0.6484903266178796, "grad_norm": 1.8220271373393808, "learning_rate": 2.905150840583991e-07, "loss": 0.2437, "step": 37307 }, { "epoch": 0.6485077091553825, "grad_norm": 1.1553123616564998, "learning_rate": 2.904895247882873e-07, "loss": 0.1677, "step": 37308 }, { "epoch": 0.6485250916928853, "grad_norm": 0.9502988396398987, "learning_rate": 2.9046396618222724e-07, "loss": 0.3232, "step": 37309 }, { "epoch": 0.6485424742303881, "grad_norm": 0.9713302451745399, "learning_rate": 2.9043840824030017e-07, "loss": 0.1981, "step": 37310 }, { "epoch": 0.648559856767891, "grad_norm": 1.4212635642688363, "learning_rate": 2.9041285096258696e-07, "loss": 0.2287, "step": 37311 }, { "epoch": 0.6485772393053938, "grad_norm": 3.3519333460687863, "learning_rate": 2.9038729434916887e-07, "loss": 0.1704, "step": 37312 }, { "epoch": 0.6485946218428966, "grad_norm": 1.5020666188991234, "learning_rate": 2.9036173840012674e-07, "loss": 0.2145, "step": 37313 }, { "epoch": 0.6486120043803995, "grad_norm": 2.9091832844051737, "learning_rate": 2.9033618311554166e-07, "loss": 0.234, "step": 37314 }, { "epoch": 0.6486293869179023, "grad_norm": 1.8645272005765157, "learning_rate": 2.903106284954945e-07, "loss": 0.3719, "step": 37315 }, { "epoch": 0.6486467694554051, "grad_norm": 3.16870082888775, "learning_rate": 2.9028507454006637e-07, "loss": 0.2339, "step": 37316 }, { "epoch": 0.648664151992908, "grad_norm": 1.6292403697617683, "learning_rate": 2.902595212493382e-07, "loss": 0.2662, "step": 37317 }, { "epoch": 0.6486815345304108, "grad_norm": 2.704628761726455, "learning_rate": 2.902339686233908e-07, "loss": 0.3526, "step": 37318 }, { "epoch": 0.6486989170679136, "grad_norm": 1.470586058255978, "learning_rate": 2.9020841666230557e-07, "loss": 0.1728, "step": 37319 }, { "epoch": 0.6487162996054164, "grad_norm": 0.6905872221405903, "learning_rate": 2.901828653661633e-07, "loss": 0.163, "step": 37320 }, { "epoch": 0.6487336821429193, "grad_norm": 1.1290967076875373, "learning_rate": 2.9015731473504513e-07, "loss": 0.2051, "step": 37321 }, { "epoch": 0.6487510646804221, "grad_norm": 1.464350427111792, "learning_rate": 2.9013176476903144e-07, "loss": 0.2108, "step": 37322 }, { "epoch": 0.6487684472179249, "grad_norm": 2.0269947916595137, "learning_rate": 2.901062154682039e-07, "loss": 0.2094, "step": 37323 }, { "epoch": 0.6487858297554276, "grad_norm": 1.2522057766876047, "learning_rate": 2.9008066683264333e-07, "loss": 0.2169, "step": 37324 }, { "epoch": 0.6488032122929305, "grad_norm": 1.156495998206526, "learning_rate": 2.900551188624305e-07, "loss": 0.1115, "step": 37325 }, { "epoch": 0.6488205948304333, "grad_norm": 1.5476926508883484, "learning_rate": 2.900295715576463e-07, "loss": 0.3013, "step": 37326 }, { "epoch": 0.6488379773679361, "grad_norm": 1.3775591456310803, "learning_rate": 2.900040249183724e-07, "loss": 0.348, "step": 37327 }, { "epoch": 0.648855359905439, "grad_norm": 1.5202218693026168, "learning_rate": 2.8997847894468896e-07, "loss": 0.1754, "step": 37328 }, { "epoch": 0.6488727424429418, "grad_norm": 1.6891856370431468, "learning_rate": 2.8995293363667737e-07, "loss": 0.2979, "step": 37329 }, { "epoch": 0.6488901249804446, "grad_norm": 1.0574705985187876, "learning_rate": 2.8992738899441827e-07, "loss": 0.1582, "step": 37330 }, { "epoch": 0.6489075075179475, "grad_norm": 1.3442618328280935, "learning_rate": 2.89901845017993e-07, "loss": 0.214, "step": 37331 }, { "epoch": 0.6489248900554503, "grad_norm": 1.5892995677919382, "learning_rate": 2.898763017074824e-07, "loss": 0.296, "step": 37332 }, { "epoch": 0.6489422725929531, "grad_norm": 1.4909750309165641, "learning_rate": 2.898507590629674e-07, "loss": 0.3357, "step": 37333 }, { "epoch": 0.648959655130456, "grad_norm": 1.2440177340945409, "learning_rate": 2.898252170845289e-07, "loss": 0.2818, "step": 37334 }, { "epoch": 0.6489770376679588, "grad_norm": 1.6281880384253553, "learning_rate": 2.89799675772248e-07, "loss": 0.2262, "step": 37335 }, { "epoch": 0.6489944202054616, "grad_norm": 2.5144242777894807, "learning_rate": 2.897741351262055e-07, "loss": 0.2353, "step": 37336 }, { "epoch": 0.6490118027429644, "grad_norm": 1.1664069805282251, "learning_rate": 2.8974859514648254e-07, "loss": 0.2545, "step": 37337 }, { "epoch": 0.6490291852804673, "grad_norm": 0.7213689305122853, "learning_rate": 2.8972305583315965e-07, "loss": 0.2664, "step": 37338 }, { "epoch": 0.6490465678179701, "grad_norm": 2.3681208883841904, "learning_rate": 2.896975171863183e-07, "loss": 0.2394, "step": 37339 }, { "epoch": 0.6490639503554729, "grad_norm": 1.4775555778776235, "learning_rate": 2.896719792060394e-07, "loss": 0.2192, "step": 37340 }, { "epoch": 0.6490813328929758, "grad_norm": 1.8819716528038093, "learning_rate": 2.896464418924033e-07, "loss": 0.2068, "step": 37341 }, { "epoch": 0.6490987154304786, "grad_norm": 5.34492118229634, "learning_rate": 2.8962090524549166e-07, "loss": 0.3134, "step": 37342 }, { "epoch": 0.6491160979679814, "grad_norm": 1.5849880382873491, "learning_rate": 2.89595369265385e-07, "loss": 0.2341, "step": 37343 }, { "epoch": 0.6491334805054841, "grad_norm": 1.1815673647013982, "learning_rate": 2.895698339521644e-07, "loss": 0.2156, "step": 37344 }, { "epoch": 0.649150863042987, "grad_norm": 1.3749706519340985, "learning_rate": 2.8954429930591073e-07, "loss": 0.3037, "step": 37345 }, { "epoch": 0.6491682455804898, "grad_norm": 1.948456412028769, "learning_rate": 2.89518765326705e-07, "loss": 0.1966, "step": 37346 }, { "epoch": 0.6491856281179926, "grad_norm": 1.792663779624805, "learning_rate": 2.8949323201462805e-07, "loss": 0.2372, "step": 37347 }, { "epoch": 0.6492030106554955, "grad_norm": 1.6065377060768606, "learning_rate": 2.894676993697609e-07, "loss": 0.2866, "step": 37348 }, { "epoch": 0.6492203931929983, "grad_norm": 1.9219752547024247, "learning_rate": 2.894421673921842e-07, "loss": 0.2068, "step": 37349 }, { "epoch": 0.6492377757305011, "grad_norm": 1.0281228354368745, "learning_rate": 2.894166360819794e-07, "loss": 0.2649, "step": 37350 }, { "epoch": 0.649255158268004, "grad_norm": 2.5509976783250674, "learning_rate": 2.89391105439227e-07, "loss": 0.3258, "step": 37351 }, { "epoch": 0.6492725408055068, "grad_norm": 2.0519375316247306, "learning_rate": 2.893655754640083e-07, "loss": 0.3193, "step": 37352 }, { "epoch": 0.6492899233430096, "grad_norm": 1.2703421543407218, "learning_rate": 2.893400461564036e-07, "loss": 0.2662, "step": 37353 }, { "epoch": 0.6493073058805124, "grad_norm": 1.1269633282684932, "learning_rate": 2.893145175164944e-07, "loss": 0.1747, "step": 37354 }, { "epoch": 0.6493246884180153, "grad_norm": 1.4176836017823862, "learning_rate": 2.892889895443613e-07, "loss": 0.268, "step": 37355 }, { "epoch": 0.6493420709555181, "grad_norm": 1.248334473544256, "learning_rate": 2.8926346224008534e-07, "loss": 0.1747, "step": 37356 }, { "epoch": 0.6493594534930209, "grad_norm": 1.1547857123377896, "learning_rate": 2.892379356037473e-07, "loss": 0.2644, "step": 37357 }, { "epoch": 0.6493768360305238, "grad_norm": 1.6938049007274842, "learning_rate": 2.892124096354285e-07, "loss": 0.1952, "step": 37358 }, { "epoch": 0.6493942185680266, "grad_norm": 1.1736038878430997, "learning_rate": 2.891868843352093e-07, "loss": 0.1424, "step": 37359 }, { "epoch": 0.6494116011055294, "grad_norm": 1.2127687351188703, "learning_rate": 2.891613597031709e-07, "loss": 0.2357, "step": 37360 }, { "epoch": 0.6494289836430323, "grad_norm": 2.0861358256554206, "learning_rate": 2.8913583573939395e-07, "loss": 0.2465, "step": 37361 }, { "epoch": 0.6494463661805351, "grad_norm": 1.9285607515567376, "learning_rate": 2.8911031244395966e-07, "loss": 0.2541, "step": 37362 }, { "epoch": 0.6494637487180379, "grad_norm": 1.1976054211035216, "learning_rate": 2.890847898169489e-07, "loss": 0.2662, "step": 37363 }, { "epoch": 0.6494811312555406, "grad_norm": 1.348363337373881, "learning_rate": 2.8905926785844244e-07, "loss": 0.2654, "step": 37364 }, { "epoch": 0.6494985137930435, "grad_norm": 1.5340706390752503, "learning_rate": 2.890337465685212e-07, "loss": 0.1796, "step": 37365 }, { "epoch": 0.6495158963305463, "grad_norm": 1.0972143654572448, "learning_rate": 2.8900822594726605e-07, "loss": 0.2089, "step": 37366 }, { "epoch": 0.6495332788680491, "grad_norm": 1.5461318642522002, "learning_rate": 2.889827059947579e-07, "loss": 0.2311, "step": 37367 }, { "epoch": 0.649550661405552, "grad_norm": 2.7018329139953576, "learning_rate": 2.889571867110776e-07, "loss": 0.3328, "step": 37368 }, { "epoch": 0.6495680439430548, "grad_norm": 1.8803180950844003, "learning_rate": 2.88931668096306e-07, "loss": 0.2278, "step": 37369 }, { "epoch": 0.6495854264805576, "grad_norm": 1.1378719205824381, "learning_rate": 2.8890615015052413e-07, "loss": 0.1413, "step": 37370 }, { "epoch": 0.6496028090180604, "grad_norm": 1.4830685267883938, "learning_rate": 2.8888063287381305e-07, "loss": 0.2436, "step": 37371 }, { "epoch": 0.6496201915555633, "grad_norm": 2.4163919856443634, "learning_rate": 2.8885511626625303e-07, "loss": 0.2146, "step": 37372 }, { "epoch": 0.6496375740930661, "grad_norm": 1.1050729775074377, "learning_rate": 2.888296003279254e-07, "loss": 0.2091, "step": 37373 }, { "epoch": 0.6496549566305689, "grad_norm": 1.4834093693716979, "learning_rate": 2.8880408505891097e-07, "loss": 0.2181, "step": 37374 }, { "epoch": 0.6496723391680718, "grad_norm": 1.8082562116691407, "learning_rate": 2.887785704592906e-07, "loss": 0.4007, "step": 37375 }, { "epoch": 0.6496897217055746, "grad_norm": 0.931437116701902, "learning_rate": 2.8875305652914514e-07, "loss": 0.3566, "step": 37376 }, { "epoch": 0.6497071042430774, "grad_norm": 1.7832175827771635, "learning_rate": 2.887275432685554e-07, "loss": 0.3015, "step": 37377 }, { "epoch": 0.6497244867805803, "grad_norm": 1.6359245854369013, "learning_rate": 2.8870203067760237e-07, "loss": 0.2144, "step": 37378 }, { "epoch": 0.6497418693180831, "grad_norm": 4.721401647062051, "learning_rate": 2.886765187563668e-07, "loss": 0.238, "step": 37379 }, { "epoch": 0.6497592518555859, "grad_norm": 1.7084081837016374, "learning_rate": 2.886510075049294e-07, "loss": 0.2474, "step": 37380 }, { "epoch": 0.6497766343930887, "grad_norm": 1.3454203567649043, "learning_rate": 2.886254969233714e-07, "loss": 0.2825, "step": 37381 }, { "epoch": 0.6497940169305916, "grad_norm": 2.7759214701815544, "learning_rate": 2.885999870117735e-07, "loss": 0.2579, "step": 37382 }, { "epoch": 0.6498113994680943, "grad_norm": 2.098331373084741, "learning_rate": 2.8857447777021647e-07, "loss": 0.2085, "step": 37383 }, { "epoch": 0.6498287820055971, "grad_norm": 1.886749212502342, "learning_rate": 2.8854896919878137e-07, "loss": 0.2941, "step": 37384 }, { "epoch": 0.6498461645431, "grad_norm": 1.5715603746451283, "learning_rate": 2.885234612975488e-07, "loss": 0.2142, "step": 37385 }, { "epoch": 0.6498635470806028, "grad_norm": 1.9335434234843936, "learning_rate": 2.884979540665997e-07, "loss": 0.1817, "step": 37386 }, { "epoch": 0.6498809296181056, "grad_norm": 1.2768014048342853, "learning_rate": 2.8847244750601497e-07, "loss": 0.1879, "step": 37387 }, { "epoch": 0.6498983121556084, "grad_norm": 1.2991365586832004, "learning_rate": 2.884469416158752e-07, "loss": 0.1964, "step": 37388 }, { "epoch": 0.6499156946931113, "grad_norm": 1.176571065451128, "learning_rate": 2.884214363962616e-07, "loss": 0.0969, "step": 37389 }, { "epoch": 0.6499330772306141, "grad_norm": 1.5780565439268555, "learning_rate": 2.883959318472551e-07, "loss": 0.1966, "step": 37390 }, { "epoch": 0.6499504597681169, "grad_norm": 1.7386011202112317, "learning_rate": 2.8837042796893607e-07, "loss": 0.2845, "step": 37391 }, { "epoch": 0.6499678423056198, "grad_norm": 2.1124977789358304, "learning_rate": 2.8834492476138544e-07, "loss": 0.2903, "step": 37392 }, { "epoch": 0.6499852248431226, "grad_norm": 3.0560301729353774, "learning_rate": 2.8831942222468433e-07, "loss": 0.2321, "step": 37393 }, { "epoch": 0.6500026073806254, "grad_norm": 3.1908845177410563, "learning_rate": 2.882939203589134e-07, "loss": 0.317, "step": 37394 }, { "epoch": 0.6500199899181283, "grad_norm": 1.614155164152184, "learning_rate": 2.8826841916415346e-07, "loss": 0.2536, "step": 37395 }, { "epoch": 0.6500373724556311, "grad_norm": 2.2249152073276073, "learning_rate": 2.8824291864048543e-07, "loss": 0.2646, "step": 37396 }, { "epoch": 0.6500547549931339, "grad_norm": 1.3164238858100712, "learning_rate": 2.8821741878799007e-07, "loss": 0.1445, "step": 37397 }, { "epoch": 0.6500721375306368, "grad_norm": 1.3168827457170573, "learning_rate": 2.8819191960674814e-07, "loss": 0.1563, "step": 37398 }, { "epoch": 0.6500895200681396, "grad_norm": 1.3981531332970274, "learning_rate": 2.8816642109684065e-07, "loss": 0.2429, "step": 37399 }, { "epoch": 0.6501069026056424, "grad_norm": 1.620608913194029, "learning_rate": 2.88140923258348e-07, "loss": 0.2719, "step": 37400 }, { "epoch": 0.6501242851431452, "grad_norm": 1.6677017259930993, "learning_rate": 2.8811542609135154e-07, "loss": 0.2177, "step": 37401 }, { "epoch": 0.6501416676806481, "grad_norm": 1.5291664477433977, "learning_rate": 2.880899295959318e-07, "loss": 0.3412, "step": 37402 }, { "epoch": 0.6501590502181508, "grad_norm": 1.6740279671451996, "learning_rate": 2.8806443377216973e-07, "loss": 0.2504, "step": 37403 }, { "epoch": 0.6501764327556536, "grad_norm": 1.3306698151368244, "learning_rate": 2.8803893862014595e-07, "loss": 0.1901, "step": 37404 }, { "epoch": 0.6501938152931565, "grad_norm": 1.2452922659635401, "learning_rate": 2.8801344413994143e-07, "loss": 0.3075, "step": 37405 }, { "epoch": 0.6502111978306593, "grad_norm": 1.4958074226147375, "learning_rate": 2.879879503316369e-07, "loss": 0.3492, "step": 37406 }, { "epoch": 0.6502285803681621, "grad_norm": 1.782547097473012, "learning_rate": 2.879624571953132e-07, "loss": 0.2791, "step": 37407 }, { "epoch": 0.6502459629056649, "grad_norm": 1.4449731294476504, "learning_rate": 2.879369647310509e-07, "loss": 0.3656, "step": 37408 }, { "epoch": 0.6502633454431678, "grad_norm": 1.460513643980242, "learning_rate": 2.8791147293893134e-07, "loss": 0.3057, "step": 37409 }, { "epoch": 0.6502807279806706, "grad_norm": 1.293635072920542, "learning_rate": 2.8788598181903485e-07, "loss": 0.5246, "step": 37410 }, { "epoch": 0.6502981105181734, "grad_norm": 1.0939146028842563, "learning_rate": 2.878604913714421e-07, "loss": 0.2287, "step": 37411 }, { "epoch": 0.6503154930556763, "grad_norm": 1.576470771604713, "learning_rate": 2.878350015962344e-07, "loss": 0.2566, "step": 37412 }, { "epoch": 0.6503328755931791, "grad_norm": 0.9865815650275683, "learning_rate": 2.8780951249349226e-07, "loss": 0.2067, "step": 37413 }, { "epoch": 0.6503502581306819, "grad_norm": 2.1506798354683156, "learning_rate": 2.8778402406329653e-07, "loss": 0.3896, "step": 37414 }, { "epoch": 0.6503676406681848, "grad_norm": 0.9172771878904836, "learning_rate": 2.87758536305728e-07, "loss": 0.1229, "step": 37415 }, { "epoch": 0.6503850232056876, "grad_norm": 1.3839723915910407, "learning_rate": 2.8773304922086733e-07, "loss": 0.314, "step": 37416 }, { "epoch": 0.6504024057431904, "grad_norm": 2.5762232668806737, "learning_rate": 2.8770756280879536e-07, "loss": 0.3227, "step": 37417 }, { "epoch": 0.6504197882806932, "grad_norm": 1.0233460479909458, "learning_rate": 2.87682077069593e-07, "loss": 0.266, "step": 37418 }, { "epoch": 0.6504371708181961, "grad_norm": 1.1275555670965225, "learning_rate": 2.8765659200334064e-07, "loss": 0.1177, "step": 37419 }, { "epoch": 0.6504545533556989, "grad_norm": 3.7033019976100245, "learning_rate": 2.8763110761011956e-07, "loss": 0.2323, "step": 37420 }, { "epoch": 0.6504719358932017, "grad_norm": 1.9177339160855842, "learning_rate": 2.876056238900103e-07, "loss": 0.1893, "step": 37421 }, { "epoch": 0.6504893184307046, "grad_norm": 1.6835955520881376, "learning_rate": 2.875801408430937e-07, "loss": 0.1681, "step": 37422 }, { "epoch": 0.6505067009682073, "grad_norm": 6.99924999663903, "learning_rate": 2.875546584694505e-07, "loss": 0.2715, "step": 37423 }, { "epoch": 0.6505240835057101, "grad_norm": 2.7958876479470613, "learning_rate": 2.8752917676916135e-07, "loss": 0.2641, "step": 37424 }, { "epoch": 0.650541466043213, "grad_norm": 2.0138078629791405, "learning_rate": 2.8750369574230713e-07, "loss": 0.1559, "step": 37425 }, { "epoch": 0.6505588485807158, "grad_norm": 5.1383121186056115, "learning_rate": 2.8747821538896866e-07, "loss": 0.3251, "step": 37426 }, { "epoch": 0.6505762311182186, "grad_norm": 1.4260513934985288, "learning_rate": 2.8745273570922637e-07, "loss": 0.164, "step": 37427 }, { "epoch": 0.6505936136557214, "grad_norm": 2.3440430698287362, "learning_rate": 2.8742725670316157e-07, "loss": 0.388, "step": 37428 }, { "epoch": 0.6506109961932243, "grad_norm": 2.39326295475624, "learning_rate": 2.8740177837085467e-07, "loss": 0.3385, "step": 37429 }, { "epoch": 0.6506283787307271, "grad_norm": 1.302072740980079, "learning_rate": 2.8737630071238615e-07, "loss": 0.2512, "step": 37430 }, { "epoch": 0.6506457612682299, "grad_norm": 1.1716782092547948, "learning_rate": 2.873508237278374e-07, "loss": 0.1328, "step": 37431 }, { "epoch": 0.6506631438057328, "grad_norm": 1.2827505653891536, "learning_rate": 2.873253474172888e-07, "loss": 0.2769, "step": 37432 }, { "epoch": 0.6506805263432356, "grad_norm": 1.0688403343227397, "learning_rate": 2.8729987178082117e-07, "loss": 0.1206, "step": 37433 }, { "epoch": 0.6506979088807384, "grad_norm": 1.738049876514048, "learning_rate": 2.872743968185152e-07, "loss": 0.3096, "step": 37434 }, { "epoch": 0.6507152914182412, "grad_norm": 1.247558348492565, "learning_rate": 2.872489225304517e-07, "loss": 0.2223, "step": 37435 }, { "epoch": 0.6507326739557441, "grad_norm": 1.0788434902712019, "learning_rate": 2.872234489167115e-07, "loss": 0.2436, "step": 37436 }, { "epoch": 0.6507500564932469, "grad_norm": 0.8225956408964825, "learning_rate": 2.8719797597737506e-07, "loss": 0.216, "step": 37437 }, { "epoch": 0.6507674390307497, "grad_norm": 2.0144189919255413, "learning_rate": 2.8717250371252313e-07, "loss": 0.2623, "step": 37438 }, { "epoch": 0.6507848215682526, "grad_norm": 1.1280089062407344, "learning_rate": 2.8714703212223676e-07, "loss": 0.2714, "step": 37439 }, { "epoch": 0.6508022041057554, "grad_norm": 0.9558704177866838, "learning_rate": 2.8712156120659657e-07, "loss": 0.2097, "step": 37440 }, { "epoch": 0.6508195866432582, "grad_norm": 1.3702107771744068, "learning_rate": 2.870960909656835e-07, "loss": 0.2919, "step": 37441 }, { "epoch": 0.6508369691807611, "grad_norm": 5.32816462875331, "learning_rate": 2.8707062139957754e-07, "loss": 0.3974, "step": 37442 }, { "epoch": 0.6508543517182638, "grad_norm": 2.030245907842586, "learning_rate": 2.870451525083601e-07, "loss": 0.2286, "step": 37443 }, { "epoch": 0.6508717342557666, "grad_norm": 1.408508145647544, "learning_rate": 2.8701968429211176e-07, "loss": 0.2462, "step": 37444 }, { "epoch": 0.6508891167932694, "grad_norm": 1.5020449756903786, "learning_rate": 2.869942167509132e-07, "loss": 0.1979, "step": 37445 }, { "epoch": 0.6509064993307723, "grad_norm": 0.935611700398054, "learning_rate": 2.869687498848448e-07, "loss": 0.2501, "step": 37446 }, { "epoch": 0.6509238818682751, "grad_norm": 1.5886878417554948, "learning_rate": 2.869432836939881e-07, "loss": 0.1506, "step": 37447 }, { "epoch": 0.6509412644057779, "grad_norm": 1.4473892302033764, "learning_rate": 2.869178181784231e-07, "loss": 0.2382, "step": 37448 }, { "epoch": 0.6509586469432808, "grad_norm": 3.0555816952830446, "learning_rate": 2.868923533382307e-07, "loss": 0.4973, "step": 37449 }, { "epoch": 0.6509760294807836, "grad_norm": 2.063099206758236, "learning_rate": 2.8686688917349153e-07, "loss": 0.3381, "step": 37450 }, { "epoch": 0.6509934120182864, "grad_norm": 1.8887345916570304, "learning_rate": 2.868414256842866e-07, "loss": 0.2611, "step": 37451 }, { "epoch": 0.6510107945557893, "grad_norm": 1.9515135557171384, "learning_rate": 2.868159628706964e-07, "loss": 0.4491, "step": 37452 }, { "epoch": 0.6510281770932921, "grad_norm": 1.1024389668716688, "learning_rate": 2.8679050073280166e-07, "loss": 0.37, "step": 37453 }, { "epoch": 0.6510455596307949, "grad_norm": 1.0875981198305915, "learning_rate": 2.8676503927068306e-07, "loss": 0.172, "step": 37454 }, { "epoch": 0.6510629421682977, "grad_norm": 6.31570735803548, "learning_rate": 2.8673957848442136e-07, "loss": 0.2802, "step": 37455 }, { "epoch": 0.6510803247058006, "grad_norm": 2.1862859912782766, "learning_rate": 2.867141183740973e-07, "loss": 0.2131, "step": 37456 }, { "epoch": 0.6510977072433034, "grad_norm": 1.6324818015042826, "learning_rate": 2.8668865893979146e-07, "loss": 0.2428, "step": 37457 }, { "epoch": 0.6511150897808062, "grad_norm": 1.2951119530259725, "learning_rate": 2.8666320018158434e-07, "loss": 0.2195, "step": 37458 }, { "epoch": 0.6511324723183091, "grad_norm": 1.2256643539828953, "learning_rate": 2.866377420995572e-07, "loss": 0.1536, "step": 37459 }, { "epoch": 0.6511498548558119, "grad_norm": 1.8629158366703766, "learning_rate": 2.8661228469379047e-07, "loss": 0.2378, "step": 37460 }, { "epoch": 0.6511672373933147, "grad_norm": 1.0549145733259377, "learning_rate": 2.865868279643644e-07, "loss": 0.208, "step": 37461 }, { "epoch": 0.6511846199308176, "grad_norm": 3.781116929692042, "learning_rate": 2.865613719113603e-07, "loss": 0.3057, "step": 37462 }, { "epoch": 0.6512020024683203, "grad_norm": 1.4250784012273592, "learning_rate": 2.865359165348586e-07, "loss": 0.2749, "step": 37463 }, { "epoch": 0.6512193850058231, "grad_norm": 1.146306602540898, "learning_rate": 2.8651046183494e-07, "loss": 0.329, "step": 37464 }, { "epoch": 0.6512367675433259, "grad_norm": 1.3349100365706341, "learning_rate": 2.864850078116851e-07, "loss": 0.281, "step": 37465 }, { "epoch": 0.6512541500808288, "grad_norm": 1.4131347260106217, "learning_rate": 2.864595544651747e-07, "loss": 0.2292, "step": 37466 }, { "epoch": 0.6512715326183316, "grad_norm": 1.493017884640783, "learning_rate": 2.8643410179548936e-07, "loss": 0.1816, "step": 37467 }, { "epoch": 0.6512889151558344, "grad_norm": 1.0070609721293944, "learning_rate": 2.8640864980270984e-07, "loss": 0.2096, "step": 37468 }, { "epoch": 0.6513062976933373, "grad_norm": 0.8591849621311658, "learning_rate": 2.8638319848691663e-07, "loss": 0.1445, "step": 37469 }, { "epoch": 0.6513236802308401, "grad_norm": 4.007285677911619, "learning_rate": 2.863577478481908e-07, "loss": 0.2586, "step": 37470 }, { "epoch": 0.6513410627683429, "grad_norm": 1.5744695192349925, "learning_rate": 2.863322978866126e-07, "loss": 0.2852, "step": 37471 }, { "epoch": 0.6513584453058457, "grad_norm": 1.5457961449280149, "learning_rate": 2.8630684860226316e-07, "loss": 0.2311, "step": 37472 }, { "epoch": 0.6513758278433486, "grad_norm": 2.1868899318312285, "learning_rate": 2.862813999952225e-07, "loss": 0.2798, "step": 37473 }, { "epoch": 0.6513932103808514, "grad_norm": 1.8359651445547698, "learning_rate": 2.8625595206557174e-07, "loss": 0.2656, "step": 37474 }, { "epoch": 0.6514105929183542, "grad_norm": 2.935034435936573, "learning_rate": 2.862305048133915e-07, "loss": 0.2713, "step": 37475 }, { "epoch": 0.6514279754558571, "grad_norm": 1.3768188872016032, "learning_rate": 2.8620505823876227e-07, "loss": 0.1764, "step": 37476 }, { "epoch": 0.6514453579933599, "grad_norm": 0.9814826412465587, "learning_rate": 2.8617961234176467e-07, "loss": 0.171, "step": 37477 }, { "epoch": 0.6514627405308627, "grad_norm": 0.9664457113779512, "learning_rate": 2.861541671224799e-07, "loss": 0.2936, "step": 37478 }, { "epoch": 0.6514801230683656, "grad_norm": 1.6739305617856592, "learning_rate": 2.861287225809879e-07, "loss": 0.259, "step": 37479 }, { "epoch": 0.6514975056058684, "grad_norm": 1.1664008918989748, "learning_rate": 2.8610327871736965e-07, "loss": 0.1918, "step": 37480 }, { "epoch": 0.6515148881433712, "grad_norm": 1.1671160151909024, "learning_rate": 2.860778355317056e-07, "loss": 0.2007, "step": 37481 }, { "epoch": 0.651532270680874, "grad_norm": 1.5207895929082518, "learning_rate": 2.860523930240767e-07, "loss": 0.2834, "step": 37482 }, { "epoch": 0.6515496532183768, "grad_norm": 1.4427488074329946, "learning_rate": 2.8602695119456345e-07, "loss": 0.2154, "step": 37483 }, { "epoch": 0.6515670357558796, "grad_norm": 1.2167786189091565, "learning_rate": 2.860015100432464e-07, "loss": 0.1998, "step": 37484 }, { "epoch": 0.6515844182933824, "grad_norm": 1.4235544400080473, "learning_rate": 2.8597606957020633e-07, "loss": 0.2439, "step": 37485 }, { "epoch": 0.6516018008308853, "grad_norm": 2.25797691843307, "learning_rate": 2.859506297755238e-07, "loss": 0.3551, "step": 37486 }, { "epoch": 0.6516191833683881, "grad_norm": 2.1562267220380735, "learning_rate": 2.859251906592794e-07, "loss": 0.2975, "step": 37487 }, { "epoch": 0.6516365659058909, "grad_norm": 1.206493173568921, "learning_rate": 2.8589975222155384e-07, "loss": 0.1498, "step": 37488 }, { "epoch": 0.6516539484433937, "grad_norm": 2.0459410924677757, "learning_rate": 2.858743144624275e-07, "loss": 0.2282, "step": 37489 }, { "epoch": 0.6516713309808966, "grad_norm": 1.422596732787372, "learning_rate": 2.858488773819815e-07, "loss": 0.2991, "step": 37490 }, { "epoch": 0.6516887135183994, "grad_norm": 1.0879912745885147, "learning_rate": 2.858234409802963e-07, "loss": 0.1716, "step": 37491 }, { "epoch": 0.6517060960559022, "grad_norm": 1.2523177540753223, "learning_rate": 2.85798005257452e-07, "loss": 0.2011, "step": 37492 }, { "epoch": 0.6517234785934051, "grad_norm": 1.5498549090530689, "learning_rate": 2.8577257021352986e-07, "loss": 0.1582, "step": 37493 }, { "epoch": 0.6517408611309079, "grad_norm": 1.586962336863975, "learning_rate": 2.8574713584861035e-07, "loss": 0.3693, "step": 37494 }, { "epoch": 0.6517582436684107, "grad_norm": 1.4686608855193686, "learning_rate": 2.8572170216277387e-07, "loss": 0.119, "step": 37495 }, { "epoch": 0.6517756262059136, "grad_norm": 1.1570949815280196, "learning_rate": 2.8569626915610117e-07, "loss": 0.2488, "step": 37496 }, { "epoch": 0.6517930087434164, "grad_norm": 1.670038578935893, "learning_rate": 2.856708368286729e-07, "loss": 0.3261, "step": 37497 }, { "epoch": 0.6518103912809192, "grad_norm": 1.4406148175090145, "learning_rate": 2.856454051805696e-07, "loss": 0.1957, "step": 37498 }, { "epoch": 0.651827773818422, "grad_norm": 1.2124356798523768, "learning_rate": 2.8561997421187195e-07, "loss": 0.1996, "step": 37499 }, { "epoch": 0.6518451563559249, "grad_norm": 3.1371711880480566, "learning_rate": 2.8559454392266025e-07, "loss": 0.1931, "step": 37500 }, { "epoch": 0.6518625388934277, "grad_norm": 1.5999521367335132, "learning_rate": 2.8556911431301557e-07, "loss": 0.1942, "step": 37501 }, { "epoch": 0.6518799214309305, "grad_norm": 2.001854909290848, "learning_rate": 2.855436853830183e-07, "loss": 0.2581, "step": 37502 }, { "epoch": 0.6518973039684333, "grad_norm": 2.106448289421526, "learning_rate": 2.85518257132749e-07, "loss": 0.2306, "step": 37503 }, { "epoch": 0.6519146865059361, "grad_norm": 3.375936882120661, "learning_rate": 2.8549282956228836e-07, "loss": 0.474, "step": 37504 }, { "epoch": 0.6519320690434389, "grad_norm": 2.8269370447137203, "learning_rate": 2.8546740267171685e-07, "loss": 0.2449, "step": 37505 }, { "epoch": 0.6519494515809418, "grad_norm": 2.9331792583433005, "learning_rate": 2.854419764611152e-07, "loss": 0.2523, "step": 37506 }, { "epoch": 0.6519668341184446, "grad_norm": 2.618326888077184, "learning_rate": 2.854165509305639e-07, "loss": 0.2984, "step": 37507 }, { "epoch": 0.6519842166559474, "grad_norm": 1.4258413679306097, "learning_rate": 2.853911260801434e-07, "loss": 0.1891, "step": 37508 }, { "epoch": 0.6520015991934502, "grad_norm": 1.4790416515995601, "learning_rate": 2.8536570190993456e-07, "loss": 0.2144, "step": 37509 }, { "epoch": 0.6520189817309531, "grad_norm": 0.6610894194271534, "learning_rate": 2.8534027842001806e-07, "loss": 0.1502, "step": 37510 }, { "epoch": 0.6520363642684559, "grad_norm": 1.6716768528558335, "learning_rate": 2.8531485561047384e-07, "loss": 0.2545, "step": 37511 }, { "epoch": 0.6520537468059587, "grad_norm": 1.993746383570367, "learning_rate": 2.852894334813832e-07, "loss": 0.305, "step": 37512 }, { "epoch": 0.6520711293434616, "grad_norm": 1.2732617005381972, "learning_rate": 2.852640120328264e-07, "loss": 0.2511, "step": 37513 }, { "epoch": 0.6520885118809644, "grad_norm": 1.5773491644314623, "learning_rate": 2.8523859126488404e-07, "loss": 0.3247, "step": 37514 }, { "epoch": 0.6521058944184672, "grad_norm": 1.5943748649572163, "learning_rate": 2.8521317117763664e-07, "loss": 0.2304, "step": 37515 }, { "epoch": 0.65212327695597, "grad_norm": 1.2218720329634642, "learning_rate": 2.851877517711649e-07, "loss": 0.2923, "step": 37516 }, { "epoch": 0.6521406594934729, "grad_norm": 1.4813737538508973, "learning_rate": 2.851623330455493e-07, "loss": 0.2281, "step": 37517 }, { "epoch": 0.6521580420309757, "grad_norm": 1.6626237311049925, "learning_rate": 2.851369150008704e-07, "loss": 0.3487, "step": 37518 }, { "epoch": 0.6521754245684785, "grad_norm": 2.1855742347093203, "learning_rate": 2.8511149763720856e-07, "loss": 0.3486, "step": 37519 }, { "epoch": 0.6521928071059814, "grad_norm": 3.7177572227515667, "learning_rate": 2.8508608095464477e-07, "loss": 0.3891, "step": 37520 }, { "epoch": 0.6522101896434842, "grad_norm": 1.372506793921206, "learning_rate": 2.8506066495325936e-07, "loss": 0.2295, "step": 37521 }, { "epoch": 0.6522275721809869, "grad_norm": 1.2083778240287977, "learning_rate": 2.85035249633133e-07, "loss": 0.2616, "step": 37522 }, { "epoch": 0.6522449547184898, "grad_norm": 3.0597394011712775, "learning_rate": 2.850098349943461e-07, "loss": 0.2546, "step": 37523 }, { "epoch": 0.6522623372559926, "grad_norm": 1.3029730818748044, "learning_rate": 2.849844210369792e-07, "loss": 0.2323, "step": 37524 }, { "epoch": 0.6522797197934954, "grad_norm": 1.0073956143570748, "learning_rate": 2.849590077611129e-07, "loss": 0.1967, "step": 37525 }, { "epoch": 0.6522971023309982, "grad_norm": 0.9203055334359149, "learning_rate": 2.8493359516682787e-07, "loss": 0.1918, "step": 37526 }, { "epoch": 0.6523144848685011, "grad_norm": 2.058935145582457, "learning_rate": 2.849081832542043e-07, "loss": 0.3509, "step": 37527 }, { "epoch": 0.6523318674060039, "grad_norm": 1.2459572891854538, "learning_rate": 2.848827720233232e-07, "loss": 0.1497, "step": 37528 }, { "epoch": 0.6523492499435067, "grad_norm": 0.8982475024740441, "learning_rate": 2.848573614742651e-07, "loss": 0.1787, "step": 37529 }, { "epoch": 0.6523666324810096, "grad_norm": 1.3187005802330978, "learning_rate": 2.848319516071101e-07, "loss": 0.4424, "step": 37530 }, { "epoch": 0.6523840150185124, "grad_norm": 2.016230888097566, "learning_rate": 2.848065424219388e-07, "loss": 0.2914, "step": 37531 }, { "epoch": 0.6524013975560152, "grad_norm": 2.4806635185274195, "learning_rate": 2.847811339188321e-07, "loss": 0.4809, "step": 37532 }, { "epoch": 0.6524187800935181, "grad_norm": 2.3963432824142092, "learning_rate": 2.8475572609787034e-07, "loss": 0.4171, "step": 37533 }, { "epoch": 0.6524361626310209, "grad_norm": 1.4833512348447477, "learning_rate": 2.84730318959134e-07, "loss": 0.2474, "step": 37534 }, { "epoch": 0.6524535451685237, "grad_norm": 3.037148836969833, "learning_rate": 2.8470491250270376e-07, "loss": 0.3331, "step": 37535 }, { "epoch": 0.6524709277060265, "grad_norm": 0.8155143186242801, "learning_rate": 2.8467950672866003e-07, "loss": 0.1138, "step": 37536 }, { "epoch": 0.6524883102435294, "grad_norm": 1.9760599359139641, "learning_rate": 2.8465410163708334e-07, "loss": 0.4422, "step": 37537 }, { "epoch": 0.6525056927810322, "grad_norm": 1.6179647499679932, "learning_rate": 2.8462869722805416e-07, "loss": 0.2013, "step": 37538 }, { "epoch": 0.652523075318535, "grad_norm": 1.486818912005945, "learning_rate": 2.8460329350165296e-07, "loss": 0.211, "step": 37539 }, { "epoch": 0.6525404578560379, "grad_norm": 2.3755814818129357, "learning_rate": 2.8457789045796053e-07, "loss": 0.4203, "step": 37540 }, { "epoch": 0.6525578403935407, "grad_norm": 2.440065300091729, "learning_rate": 2.845524880970573e-07, "loss": 0.2204, "step": 37541 }, { "epoch": 0.6525752229310434, "grad_norm": 2.0268599918393013, "learning_rate": 2.845270864190237e-07, "loss": 0.2218, "step": 37542 }, { "epoch": 0.6525926054685462, "grad_norm": 1.8699005508699318, "learning_rate": 2.8450168542394016e-07, "loss": 0.2987, "step": 37543 }, { "epoch": 0.6526099880060491, "grad_norm": 0.9371759786128969, "learning_rate": 2.844762851118874e-07, "loss": 0.2129, "step": 37544 }, { "epoch": 0.6526273705435519, "grad_norm": 1.4349650610541598, "learning_rate": 2.844508854829457e-07, "loss": 0.3278, "step": 37545 }, { "epoch": 0.6526447530810547, "grad_norm": 1.0493916205721494, "learning_rate": 2.8442548653719576e-07, "loss": 0.1346, "step": 37546 }, { "epoch": 0.6526621356185576, "grad_norm": 2.4679898164088354, "learning_rate": 2.8440008827471775e-07, "loss": 0.4451, "step": 37547 }, { "epoch": 0.6526795181560604, "grad_norm": 1.5643515762807645, "learning_rate": 2.8437469069559284e-07, "loss": 0.1617, "step": 37548 }, { "epoch": 0.6526969006935632, "grad_norm": 1.011218194006585, "learning_rate": 2.843492937999009e-07, "loss": 0.2118, "step": 37549 }, { "epoch": 0.6527142832310661, "grad_norm": 1.2350291998657172, "learning_rate": 2.8432389758772244e-07, "loss": 0.2709, "step": 37550 }, { "epoch": 0.6527316657685689, "grad_norm": 1.5142183534106168, "learning_rate": 2.842985020591384e-07, "loss": 0.3845, "step": 37551 }, { "epoch": 0.6527490483060717, "grad_norm": 2.1166523144823453, "learning_rate": 2.84273107214229e-07, "loss": 0.2575, "step": 37552 }, { "epoch": 0.6527664308435746, "grad_norm": 1.7934654670196761, "learning_rate": 2.842477130530747e-07, "loss": 0.2445, "step": 37553 }, { "epoch": 0.6527838133810774, "grad_norm": 1.2586637104622693, "learning_rate": 2.842223195757562e-07, "loss": 0.182, "step": 37554 }, { "epoch": 0.6528011959185802, "grad_norm": 1.2670059175712012, "learning_rate": 2.8419692678235365e-07, "loss": 0.2112, "step": 37555 }, { "epoch": 0.652818578456083, "grad_norm": 1.3400926668091602, "learning_rate": 2.8417153467294776e-07, "loss": 0.2381, "step": 37556 }, { "epoch": 0.6528359609935859, "grad_norm": 1.1918102139448123, "learning_rate": 2.8414614324761896e-07, "loss": 0.2599, "step": 37557 }, { "epoch": 0.6528533435310887, "grad_norm": 2.054597397754686, "learning_rate": 2.8412075250644757e-07, "loss": 0.2893, "step": 37558 }, { "epoch": 0.6528707260685915, "grad_norm": 2.5144288939877053, "learning_rate": 2.8409536244951437e-07, "loss": 0.2679, "step": 37559 }, { "epoch": 0.6528881086060944, "grad_norm": 0.9632811842268324, "learning_rate": 2.840699730768997e-07, "loss": 0.1808, "step": 37560 }, { "epoch": 0.6529054911435972, "grad_norm": 0.9739542839810353, "learning_rate": 2.840445843886843e-07, "loss": 0.2645, "step": 37561 }, { "epoch": 0.6529228736810999, "grad_norm": 1.2662302809777874, "learning_rate": 2.840191963849479e-07, "loss": 0.2368, "step": 37562 }, { "epoch": 0.6529402562186027, "grad_norm": 2.2882273291667086, "learning_rate": 2.839938090657716e-07, "loss": 0.3229, "step": 37563 }, { "epoch": 0.6529576387561056, "grad_norm": 1.6489196245538478, "learning_rate": 2.839684224312358e-07, "loss": 0.2968, "step": 37564 }, { "epoch": 0.6529750212936084, "grad_norm": 0.9068806004645998, "learning_rate": 2.839430364814208e-07, "loss": 0.1727, "step": 37565 }, { "epoch": 0.6529924038311112, "grad_norm": 1.3612387132458554, "learning_rate": 2.83917651216407e-07, "loss": 0.3221, "step": 37566 }, { "epoch": 0.6530097863686141, "grad_norm": 1.5875569576893627, "learning_rate": 2.838922666362753e-07, "loss": 0.206, "step": 37567 }, { "epoch": 0.6530271689061169, "grad_norm": 1.054277749042926, "learning_rate": 2.8386688274110563e-07, "loss": 0.2083, "step": 37568 }, { "epoch": 0.6530445514436197, "grad_norm": 2.956549133219568, "learning_rate": 2.838414995309787e-07, "loss": 0.2017, "step": 37569 }, { "epoch": 0.6530619339811226, "grad_norm": 1.911045447691324, "learning_rate": 2.8381611700597476e-07, "loss": 0.1778, "step": 37570 }, { "epoch": 0.6530793165186254, "grad_norm": 2.0066988362660596, "learning_rate": 2.8379073516617457e-07, "loss": 0.2662, "step": 37571 }, { "epoch": 0.6530966990561282, "grad_norm": 0.8095046660334003, "learning_rate": 2.8376535401165844e-07, "loss": 0.1719, "step": 37572 }, { "epoch": 0.653114081593631, "grad_norm": 1.359638525350642, "learning_rate": 2.837399735425068e-07, "loss": 0.1093, "step": 37573 }, { "epoch": 0.6531314641311339, "grad_norm": 2.2582131121113864, "learning_rate": 2.8371459375880015e-07, "loss": 0.3212, "step": 37574 }, { "epoch": 0.6531488466686367, "grad_norm": 2.7404446973937717, "learning_rate": 2.836892146606188e-07, "loss": 0.3456, "step": 37575 }, { "epoch": 0.6531662292061395, "grad_norm": 2.9611209665966967, "learning_rate": 2.8366383624804336e-07, "loss": 0.2358, "step": 37576 }, { "epoch": 0.6531836117436424, "grad_norm": 1.8573337596744035, "learning_rate": 2.836384585211541e-07, "loss": 0.1962, "step": 37577 }, { "epoch": 0.6532009942811452, "grad_norm": 1.646865580834985, "learning_rate": 2.836130814800314e-07, "loss": 0.1634, "step": 37578 }, { "epoch": 0.653218376818648, "grad_norm": 1.646256504338649, "learning_rate": 2.83587705124756e-07, "loss": 0.2737, "step": 37579 }, { "epoch": 0.6532357593561509, "grad_norm": 1.4317124248813355, "learning_rate": 2.835623294554084e-07, "loss": 0.1243, "step": 37580 }, { "epoch": 0.6532531418936537, "grad_norm": 1.2496739813518078, "learning_rate": 2.835369544720683e-07, "loss": 0.2804, "step": 37581 }, { "epoch": 0.6532705244311564, "grad_norm": 1.3500653969348333, "learning_rate": 2.835115801748169e-07, "loss": 0.277, "step": 37582 }, { "epoch": 0.6532879069686592, "grad_norm": 0.9976691758924968, "learning_rate": 2.8348620656373437e-07, "loss": 0.3757, "step": 37583 }, { "epoch": 0.6533052895061621, "grad_norm": 1.698046819333838, "learning_rate": 2.8346083363890105e-07, "loss": 0.5465, "step": 37584 }, { "epoch": 0.6533226720436649, "grad_norm": 3.0536492461430926, "learning_rate": 2.834354614003974e-07, "loss": 0.3471, "step": 37585 }, { "epoch": 0.6533400545811677, "grad_norm": 1.0682356218912152, "learning_rate": 2.8341008984830395e-07, "loss": 0.2906, "step": 37586 }, { "epoch": 0.6533574371186706, "grad_norm": 1.4027833935479148, "learning_rate": 2.8338471898270095e-07, "loss": 0.2745, "step": 37587 }, { "epoch": 0.6533748196561734, "grad_norm": 1.2031013273602809, "learning_rate": 2.8335934880366887e-07, "loss": 0.244, "step": 37588 }, { "epoch": 0.6533922021936762, "grad_norm": 1.931953175259733, "learning_rate": 2.83333979311288e-07, "loss": 0.2764, "step": 37589 }, { "epoch": 0.653409584731179, "grad_norm": 1.9649500910202464, "learning_rate": 2.83308610505639e-07, "loss": 0.2206, "step": 37590 }, { "epoch": 0.6534269672686819, "grad_norm": 1.5198501001761593, "learning_rate": 2.8328324238680224e-07, "loss": 0.3662, "step": 37591 }, { "epoch": 0.6534443498061847, "grad_norm": 2.636180864729167, "learning_rate": 2.832578749548582e-07, "loss": 0.2635, "step": 37592 }, { "epoch": 0.6534617323436875, "grad_norm": 1.0454357311109135, "learning_rate": 2.832325082098868e-07, "loss": 0.2393, "step": 37593 }, { "epoch": 0.6534791148811904, "grad_norm": 1.7805443094463715, "learning_rate": 2.8320714215196895e-07, "loss": 0.3509, "step": 37594 }, { "epoch": 0.6534964974186932, "grad_norm": 1.430374747444487, "learning_rate": 2.8318177678118483e-07, "loss": 0.2043, "step": 37595 }, { "epoch": 0.653513879956196, "grad_norm": 1.4843059615988008, "learning_rate": 2.8315641209761495e-07, "loss": 0.4133, "step": 37596 }, { "epoch": 0.6535312624936989, "grad_norm": 1.193766035606526, "learning_rate": 2.8313104810133935e-07, "loss": 0.1899, "step": 37597 }, { "epoch": 0.6535486450312017, "grad_norm": 3.3970230522501406, "learning_rate": 2.8310568479243915e-07, "loss": 0.2521, "step": 37598 }, { "epoch": 0.6535660275687045, "grad_norm": 0.8649785136444645, "learning_rate": 2.830803221709941e-07, "loss": 0.1915, "step": 37599 }, { "epoch": 0.6535834101062074, "grad_norm": 1.3385319826565307, "learning_rate": 2.8305496023708476e-07, "loss": 0.2118, "step": 37600 }, { "epoch": 0.6536007926437102, "grad_norm": 1.1096453139108757, "learning_rate": 2.8302959899079136e-07, "loss": 0.3121, "step": 37601 }, { "epoch": 0.6536181751812129, "grad_norm": 1.2800274376893483, "learning_rate": 2.830042384321947e-07, "loss": 0.1274, "step": 37602 }, { "epoch": 0.6536355577187157, "grad_norm": 2.5077886902032605, "learning_rate": 2.8297887856137486e-07, "loss": 0.1671, "step": 37603 }, { "epoch": 0.6536529402562186, "grad_norm": 1.015868676048259, "learning_rate": 2.8295351937841227e-07, "loss": 0.2237, "step": 37604 }, { "epoch": 0.6536703227937214, "grad_norm": 1.2489870505029854, "learning_rate": 2.829281608833874e-07, "loss": 0.2583, "step": 37605 }, { "epoch": 0.6536877053312242, "grad_norm": 1.548603576320904, "learning_rate": 2.829028030763805e-07, "loss": 0.3444, "step": 37606 }, { "epoch": 0.653705087868727, "grad_norm": 1.2828675833410237, "learning_rate": 2.8287744595747196e-07, "loss": 0.244, "step": 37607 }, { "epoch": 0.6537224704062299, "grad_norm": 2.1789577167581466, "learning_rate": 2.828520895267421e-07, "loss": 0.1775, "step": 37608 }, { "epoch": 0.6537398529437327, "grad_norm": 1.307301241997059, "learning_rate": 2.8282673378427146e-07, "loss": 0.1405, "step": 37609 }, { "epoch": 0.6537572354812355, "grad_norm": 1.964943836202177, "learning_rate": 2.828013787301403e-07, "loss": 0.2793, "step": 37610 }, { "epoch": 0.6537746180187384, "grad_norm": 3.3590435973284, "learning_rate": 2.8277602436442923e-07, "loss": 0.3205, "step": 37611 }, { "epoch": 0.6537920005562412, "grad_norm": 1.5337591477276604, "learning_rate": 2.8275067068721796e-07, "loss": 0.2783, "step": 37612 }, { "epoch": 0.653809383093744, "grad_norm": 2.051231018431233, "learning_rate": 2.827253176985875e-07, "loss": 0.2381, "step": 37613 }, { "epoch": 0.6538267656312469, "grad_norm": 2.2188218918238913, "learning_rate": 2.82699965398618e-07, "loss": 0.3068, "step": 37614 }, { "epoch": 0.6538441481687497, "grad_norm": 2.0094371267876228, "learning_rate": 2.826746137873897e-07, "loss": 0.27, "step": 37615 }, { "epoch": 0.6538615307062525, "grad_norm": 3.4571924487998653, "learning_rate": 2.8264926286498293e-07, "loss": 0.3256, "step": 37616 }, { "epoch": 0.6538789132437554, "grad_norm": 1.414289313096532, "learning_rate": 2.8262391263147844e-07, "loss": 0.1433, "step": 37617 }, { "epoch": 0.6538962957812582, "grad_norm": 1.3887814656454482, "learning_rate": 2.8259856308695617e-07, "loss": 0.3085, "step": 37618 }, { "epoch": 0.653913678318761, "grad_norm": 1.897078859330695, "learning_rate": 2.825732142314966e-07, "loss": 0.4002, "step": 37619 }, { "epoch": 0.6539310608562638, "grad_norm": 1.422801132377812, "learning_rate": 2.8254786606517986e-07, "loss": 0.2635, "step": 37620 }, { "epoch": 0.6539484433937667, "grad_norm": 1.4483824302010988, "learning_rate": 2.825225185880867e-07, "loss": 0.3011, "step": 37621 }, { "epoch": 0.6539658259312694, "grad_norm": 1.8770325046221745, "learning_rate": 2.824971718002972e-07, "loss": 0.2327, "step": 37622 }, { "epoch": 0.6539832084687722, "grad_norm": 5.104637886041238, "learning_rate": 2.824718257018918e-07, "loss": 0.2185, "step": 37623 }, { "epoch": 0.6540005910062751, "grad_norm": 1.0602616197402865, "learning_rate": 2.824464802929508e-07, "loss": 0.2075, "step": 37624 }, { "epoch": 0.6540179735437779, "grad_norm": 0.9925708511929381, "learning_rate": 2.824211355735545e-07, "loss": 0.224, "step": 37625 }, { "epoch": 0.6540353560812807, "grad_norm": 2.714727406771236, "learning_rate": 2.8239579154378326e-07, "loss": 0.2243, "step": 37626 }, { "epoch": 0.6540527386187835, "grad_norm": 2.890713904142858, "learning_rate": 2.823704482037174e-07, "loss": 0.1738, "step": 37627 }, { "epoch": 0.6540701211562864, "grad_norm": 2.2238486152615717, "learning_rate": 2.8234510555343705e-07, "loss": 0.2161, "step": 37628 }, { "epoch": 0.6540875036937892, "grad_norm": 1.97821360728987, "learning_rate": 2.823197635930229e-07, "loss": 0.2856, "step": 37629 }, { "epoch": 0.654104886231292, "grad_norm": 1.4703371412865975, "learning_rate": 2.8229442232255536e-07, "loss": 0.2178, "step": 37630 }, { "epoch": 0.6541222687687949, "grad_norm": 1.8064904885037292, "learning_rate": 2.8226908174211405e-07, "loss": 0.2165, "step": 37631 }, { "epoch": 0.6541396513062977, "grad_norm": 2.02910757327607, "learning_rate": 2.8224374185177997e-07, "loss": 0.3652, "step": 37632 }, { "epoch": 0.6541570338438005, "grad_norm": 1.262418016018812, "learning_rate": 2.822184026516332e-07, "loss": 0.3632, "step": 37633 }, { "epoch": 0.6541744163813034, "grad_norm": 2.1499363993400835, "learning_rate": 2.8219306414175405e-07, "loss": 0.1554, "step": 37634 }, { "epoch": 0.6541917989188062, "grad_norm": 1.5200098186105429, "learning_rate": 2.821677263222228e-07, "loss": 0.2887, "step": 37635 }, { "epoch": 0.654209181456309, "grad_norm": 1.9906606506621318, "learning_rate": 2.8214238919311985e-07, "loss": 0.2306, "step": 37636 }, { "epoch": 0.6542265639938118, "grad_norm": 1.455560866930576, "learning_rate": 2.8211705275452534e-07, "loss": 0.3743, "step": 37637 }, { "epoch": 0.6542439465313147, "grad_norm": 1.3848373661264872, "learning_rate": 2.820917170065198e-07, "loss": 0.2045, "step": 37638 }, { "epoch": 0.6542613290688175, "grad_norm": 2.3930996639892927, "learning_rate": 2.820663819491832e-07, "loss": 0.2794, "step": 37639 }, { "epoch": 0.6542787116063203, "grad_norm": 1.1923536007607847, "learning_rate": 2.820410475825962e-07, "loss": 0.3221, "step": 37640 }, { "epoch": 0.6542960941438232, "grad_norm": 1.85317192454003, "learning_rate": 2.8201571390683897e-07, "loss": 0.3528, "step": 37641 }, { "epoch": 0.6543134766813259, "grad_norm": 1.37313060474158, "learning_rate": 2.8199038092199187e-07, "loss": 0.2481, "step": 37642 }, { "epoch": 0.6543308592188287, "grad_norm": 2.1836082397303462, "learning_rate": 2.8196504862813503e-07, "loss": 0.3476, "step": 37643 }, { "epoch": 0.6543482417563316, "grad_norm": 1.4000822781657163, "learning_rate": 2.8193971702534887e-07, "loss": 0.248, "step": 37644 }, { "epoch": 0.6543656242938344, "grad_norm": 1.5927217935504623, "learning_rate": 2.8191438611371363e-07, "loss": 0.296, "step": 37645 }, { "epoch": 0.6543830068313372, "grad_norm": 1.6542268575696475, "learning_rate": 2.818890558933096e-07, "loss": 0.2406, "step": 37646 }, { "epoch": 0.65440038936884, "grad_norm": 1.1196172924382695, "learning_rate": 2.8186372636421684e-07, "loss": 0.3403, "step": 37647 }, { "epoch": 0.6544177719063429, "grad_norm": 1.3024427743283067, "learning_rate": 2.8183839752651615e-07, "loss": 0.2591, "step": 37648 }, { "epoch": 0.6544351544438457, "grad_norm": 1.4878166332192162, "learning_rate": 2.8181306938028764e-07, "loss": 0.2522, "step": 37649 }, { "epoch": 0.6544525369813485, "grad_norm": 2.761707395724869, "learning_rate": 2.8178774192561134e-07, "loss": 0.3366, "step": 37650 }, { "epoch": 0.6544699195188514, "grad_norm": 1.3062548920889858, "learning_rate": 2.8176241516256736e-07, "loss": 0.1186, "step": 37651 }, { "epoch": 0.6544873020563542, "grad_norm": 0.9053387032822328, "learning_rate": 2.8173708909123663e-07, "loss": 0.2657, "step": 37652 }, { "epoch": 0.654504684593857, "grad_norm": 1.7462379945279392, "learning_rate": 2.8171176371169894e-07, "loss": 0.1178, "step": 37653 }, { "epoch": 0.6545220671313599, "grad_norm": 1.1108886320362399, "learning_rate": 2.816864390240347e-07, "loss": 0.3181, "step": 37654 }, { "epoch": 0.6545394496688627, "grad_norm": 1.8060460025878888, "learning_rate": 2.816611150283242e-07, "loss": 0.2012, "step": 37655 }, { "epoch": 0.6545568322063655, "grad_norm": 2.2270229717356265, "learning_rate": 2.816357917246477e-07, "loss": 0.144, "step": 37656 }, { "epoch": 0.6545742147438683, "grad_norm": 1.3046373551866888, "learning_rate": 2.816104691130854e-07, "loss": 0.2594, "step": 37657 }, { "epoch": 0.6545915972813712, "grad_norm": 4.05300071366114, "learning_rate": 2.815851471937175e-07, "loss": 0.3243, "step": 37658 }, { "epoch": 0.654608979818874, "grad_norm": 1.3150356641949779, "learning_rate": 2.8155982596662425e-07, "loss": 0.3284, "step": 37659 }, { "epoch": 0.6546263623563768, "grad_norm": 1.2257983378139863, "learning_rate": 2.8153450543188616e-07, "loss": 0.2945, "step": 37660 }, { "epoch": 0.6546437448938796, "grad_norm": 1.1446493494154777, "learning_rate": 2.815091855895834e-07, "loss": 0.2048, "step": 37661 }, { "epoch": 0.6546611274313824, "grad_norm": 1.6772511422972576, "learning_rate": 2.814838664397962e-07, "loss": 0.3327, "step": 37662 }, { "epoch": 0.6546785099688852, "grad_norm": 1.3784479223014101, "learning_rate": 2.814585479826046e-07, "loss": 0.1629, "step": 37663 }, { "epoch": 0.654695892506388, "grad_norm": 1.1446396780809693, "learning_rate": 2.814332302180891e-07, "loss": 0.1643, "step": 37664 }, { "epoch": 0.6547132750438909, "grad_norm": 2.180103422307079, "learning_rate": 2.8140791314632984e-07, "loss": 0.3082, "step": 37665 }, { "epoch": 0.6547306575813937, "grad_norm": 2.0662675016159024, "learning_rate": 2.8138259676740713e-07, "loss": 0.2623, "step": 37666 }, { "epoch": 0.6547480401188965, "grad_norm": 1.2724699649075646, "learning_rate": 2.813572810814009e-07, "loss": 0.2805, "step": 37667 }, { "epoch": 0.6547654226563994, "grad_norm": 1.0077482333410912, "learning_rate": 2.813319660883921e-07, "loss": 0.2142, "step": 37668 }, { "epoch": 0.6547828051939022, "grad_norm": 1.3397094900365991, "learning_rate": 2.813066517884603e-07, "loss": 0.1975, "step": 37669 }, { "epoch": 0.654800187731405, "grad_norm": 2.0095089980808836, "learning_rate": 2.8128133818168573e-07, "loss": 0.2668, "step": 37670 }, { "epoch": 0.6548175702689079, "grad_norm": 1.0309397789898034, "learning_rate": 2.8125602526814914e-07, "loss": 0.223, "step": 37671 }, { "epoch": 0.6548349528064107, "grad_norm": 1.4956564446296572, "learning_rate": 2.812307130479304e-07, "loss": 0.2457, "step": 37672 }, { "epoch": 0.6548523353439135, "grad_norm": 1.1837396788933234, "learning_rate": 2.8120540152110976e-07, "loss": 0.3196, "step": 37673 }, { "epoch": 0.6548697178814163, "grad_norm": 0.9407169358657933, "learning_rate": 2.811800906877676e-07, "loss": 0.293, "step": 37674 }, { "epoch": 0.6548871004189192, "grad_norm": 1.414658478250673, "learning_rate": 2.811547805479839e-07, "loss": 0.1662, "step": 37675 }, { "epoch": 0.654904482956422, "grad_norm": 2.0277815885582817, "learning_rate": 2.8112947110183916e-07, "loss": 0.2995, "step": 37676 }, { "epoch": 0.6549218654939248, "grad_norm": 1.58481941554714, "learning_rate": 2.811041623494134e-07, "loss": 0.201, "step": 37677 }, { "epoch": 0.6549392480314277, "grad_norm": 1.1416550564572177, "learning_rate": 2.8107885429078657e-07, "loss": 0.1827, "step": 37678 }, { "epoch": 0.6549566305689305, "grad_norm": 1.41177370734766, "learning_rate": 2.8105354692603957e-07, "loss": 0.2016, "step": 37679 }, { "epoch": 0.6549740131064333, "grad_norm": 1.1474488811002042, "learning_rate": 2.810282402552522e-07, "loss": 0.2538, "step": 37680 }, { "epoch": 0.654991395643936, "grad_norm": 1.3486763030788194, "learning_rate": 2.810029342785049e-07, "loss": 0.2198, "step": 37681 }, { "epoch": 0.6550087781814389, "grad_norm": 1.756692018579018, "learning_rate": 2.809776289958773e-07, "loss": 0.358, "step": 37682 }, { "epoch": 0.6550261607189417, "grad_norm": 1.3879440191210355, "learning_rate": 2.8095232440745027e-07, "loss": 0.2024, "step": 37683 }, { "epoch": 0.6550435432564445, "grad_norm": 1.5705518726633036, "learning_rate": 2.809270205133037e-07, "loss": 0.1803, "step": 37684 }, { "epoch": 0.6550609257939474, "grad_norm": 3.487177035633156, "learning_rate": 2.809017173135179e-07, "loss": 0.2558, "step": 37685 }, { "epoch": 0.6550783083314502, "grad_norm": 1.3890520749606483, "learning_rate": 2.808764148081727e-07, "loss": 0.2757, "step": 37686 }, { "epoch": 0.655095690868953, "grad_norm": 1.6773757745805529, "learning_rate": 2.8085111299734904e-07, "loss": 0.2001, "step": 37687 }, { "epoch": 0.6551130734064559, "grad_norm": 1.297395305287439, "learning_rate": 2.808258118811265e-07, "loss": 0.2725, "step": 37688 }, { "epoch": 0.6551304559439587, "grad_norm": 1.5455150349366729, "learning_rate": 2.808005114595855e-07, "loss": 0.3215, "step": 37689 }, { "epoch": 0.6551478384814615, "grad_norm": 1.126552942960295, "learning_rate": 2.8077521173280594e-07, "loss": 0.1511, "step": 37690 }, { "epoch": 0.6551652210189643, "grad_norm": 0.9792801407488342, "learning_rate": 2.8074991270086843e-07, "loss": 0.2118, "step": 37691 }, { "epoch": 0.6551826035564672, "grad_norm": 3.1660155330315582, "learning_rate": 2.80724614363853e-07, "loss": 0.2173, "step": 37692 }, { "epoch": 0.65519998609397, "grad_norm": 1.8455924451758827, "learning_rate": 2.8069931672183986e-07, "loss": 0.2009, "step": 37693 }, { "epoch": 0.6552173686314728, "grad_norm": 2.206448742813163, "learning_rate": 2.806740197749091e-07, "loss": 0.184, "step": 37694 }, { "epoch": 0.6552347511689757, "grad_norm": 2.635692590290418, "learning_rate": 2.8064872352314093e-07, "loss": 0.2641, "step": 37695 }, { "epoch": 0.6552521337064785, "grad_norm": 1.7181194871354706, "learning_rate": 2.8062342796661565e-07, "loss": 0.2774, "step": 37696 }, { "epoch": 0.6552695162439813, "grad_norm": 1.5492787823256196, "learning_rate": 2.80598133105413e-07, "loss": 0.1562, "step": 37697 }, { "epoch": 0.6552868987814842, "grad_norm": 1.2034488014111773, "learning_rate": 2.805728389396138e-07, "loss": 0.2139, "step": 37698 }, { "epoch": 0.655304281318987, "grad_norm": 1.3110521063016554, "learning_rate": 2.805475454692978e-07, "loss": 0.224, "step": 37699 }, { "epoch": 0.6553216638564898, "grad_norm": 0.9281490278728789, "learning_rate": 2.8052225269454557e-07, "loss": 0.1897, "step": 37700 }, { "epoch": 0.6553390463939925, "grad_norm": 1.7122558894699502, "learning_rate": 2.804969606154365e-07, "loss": 0.2459, "step": 37701 }, { "epoch": 0.6553564289314954, "grad_norm": 1.575972884316088, "learning_rate": 2.8047166923205144e-07, "loss": 0.2302, "step": 37702 }, { "epoch": 0.6553738114689982, "grad_norm": 1.3791859118745922, "learning_rate": 2.804463785444704e-07, "loss": 0.33, "step": 37703 }, { "epoch": 0.655391194006501, "grad_norm": 2.01334714076331, "learning_rate": 2.804210885527735e-07, "loss": 0.2665, "step": 37704 }, { "epoch": 0.6554085765440039, "grad_norm": 2.2110753707298407, "learning_rate": 2.803957992570406e-07, "loss": 0.1688, "step": 37705 }, { "epoch": 0.6554259590815067, "grad_norm": 1.4116122555305441, "learning_rate": 2.8037051065735253e-07, "loss": 0.1952, "step": 37706 }, { "epoch": 0.6554433416190095, "grad_norm": 1.9095643186977123, "learning_rate": 2.8034522275378894e-07, "loss": 0.2098, "step": 37707 }, { "epoch": 0.6554607241565124, "grad_norm": 2.5809851465040543, "learning_rate": 2.8031993554643e-07, "loss": 0.3224, "step": 37708 }, { "epoch": 0.6554781066940152, "grad_norm": 1.333956395445127, "learning_rate": 2.802946490353558e-07, "loss": 0.3639, "step": 37709 }, { "epoch": 0.655495489231518, "grad_norm": 2.6960181419198936, "learning_rate": 2.8026936322064687e-07, "loss": 0.3125, "step": 37710 }, { "epoch": 0.6555128717690208, "grad_norm": 1.4000738578658107, "learning_rate": 2.8024407810238304e-07, "loss": 0.2448, "step": 37711 }, { "epoch": 0.6555302543065237, "grad_norm": 1.8309684368387578, "learning_rate": 2.802187936806446e-07, "loss": 0.1775, "step": 37712 }, { "epoch": 0.6555476368440265, "grad_norm": 2.2391117310341935, "learning_rate": 2.801935099555116e-07, "loss": 0.1741, "step": 37713 }, { "epoch": 0.6555650193815293, "grad_norm": 1.0005047265038962, "learning_rate": 2.801682269270642e-07, "loss": 0.3204, "step": 37714 }, { "epoch": 0.6555824019190322, "grad_norm": 1.1461174054419485, "learning_rate": 2.801429445953826e-07, "loss": 0.3186, "step": 37715 }, { "epoch": 0.655599784456535, "grad_norm": 1.3370835513622337, "learning_rate": 2.8011766296054684e-07, "loss": 0.2379, "step": 37716 }, { "epoch": 0.6556171669940378, "grad_norm": 1.5441592030580902, "learning_rate": 2.800923820226369e-07, "loss": 0.2586, "step": 37717 }, { "epoch": 0.6556345495315407, "grad_norm": 2.0673676512057413, "learning_rate": 2.8006710178173344e-07, "loss": 0.2254, "step": 37718 }, { "epoch": 0.6556519320690435, "grad_norm": 1.4959313081277144, "learning_rate": 2.80041822237916e-07, "loss": 0.1999, "step": 37719 }, { "epoch": 0.6556693146065463, "grad_norm": 1.4373872421270508, "learning_rate": 2.800165433912648e-07, "loss": 0.2633, "step": 37720 }, { "epoch": 0.655686697144049, "grad_norm": 1.5808032325006383, "learning_rate": 2.7999126524186035e-07, "loss": 0.2419, "step": 37721 }, { "epoch": 0.6557040796815519, "grad_norm": 1.220262994319604, "learning_rate": 2.799659877897824e-07, "loss": 0.1594, "step": 37722 }, { "epoch": 0.6557214622190547, "grad_norm": 1.7084054614400022, "learning_rate": 2.799407110351113e-07, "loss": 0.2451, "step": 37723 }, { "epoch": 0.6557388447565575, "grad_norm": 3.4177552489436573, "learning_rate": 2.79915434977927e-07, "loss": 0.3879, "step": 37724 }, { "epoch": 0.6557562272940604, "grad_norm": 1.9044455817802481, "learning_rate": 2.7989015961830975e-07, "loss": 0.1775, "step": 37725 }, { "epoch": 0.6557736098315632, "grad_norm": 2.74329121515383, "learning_rate": 2.7986488495633953e-07, "loss": 0.3292, "step": 37726 }, { "epoch": 0.655790992369066, "grad_norm": 3.413827138220309, "learning_rate": 2.798396109920965e-07, "loss": 0.2685, "step": 37727 }, { "epoch": 0.6558083749065688, "grad_norm": 1.77980236456572, "learning_rate": 2.798143377256606e-07, "loss": 0.2013, "step": 37728 }, { "epoch": 0.6558257574440717, "grad_norm": 1.3597060131040215, "learning_rate": 2.797890651571123e-07, "loss": 0.3645, "step": 37729 }, { "epoch": 0.6558431399815745, "grad_norm": 1.7194748534811064, "learning_rate": 2.7976379328653145e-07, "loss": 0.2023, "step": 37730 }, { "epoch": 0.6558605225190773, "grad_norm": 1.3124485576802418, "learning_rate": 2.7973852211399843e-07, "loss": 0.3461, "step": 37731 }, { "epoch": 0.6558779050565802, "grad_norm": 2.2119073412930828, "learning_rate": 2.7971325163959283e-07, "loss": 0.2137, "step": 37732 }, { "epoch": 0.655895287594083, "grad_norm": 0.7431145585424993, "learning_rate": 2.7968798186339505e-07, "loss": 0.3095, "step": 37733 }, { "epoch": 0.6559126701315858, "grad_norm": 1.8876974068068793, "learning_rate": 2.7966271278548525e-07, "loss": 0.2104, "step": 37734 }, { "epoch": 0.6559300526690887, "grad_norm": 2.3895697424381828, "learning_rate": 2.796374444059434e-07, "loss": 0.3017, "step": 37735 }, { "epoch": 0.6559474352065915, "grad_norm": 0.9749723312318727, "learning_rate": 2.796121767248495e-07, "loss": 0.3185, "step": 37736 }, { "epoch": 0.6559648177440943, "grad_norm": 1.4038278147445125, "learning_rate": 2.795869097422842e-07, "loss": 0.1771, "step": 37737 }, { "epoch": 0.6559822002815971, "grad_norm": 2.4888689050311923, "learning_rate": 2.795616434583268e-07, "loss": 0.2537, "step": 37738 }, { "epoch": 0.6559995828191, "grad_norm": 1.1949594157606425, "learning_rate": 2.795363778730578e-07, "loss": 0.2777, "step": 37739 }, { "epoch": 0.6560169653566028, "grad_norm": 2.169742090108953, "learning_rate": 2.79511112986557e-07, "loss": 0.3373, "step": 37740 }, { "epoch": 0.6560343478941055, "grad_norm": 1.5430089778200342, "learning_rate": 2.794858487989049e-07, "loss": 0.246, "step": 37741 }, { "epoch": 0.6560517304316084, "grad_norm": 0.9701018409239907, "learning_rate": 2.7946058531018134e-07, "loss": 0.2834, "step": 37742 }, { "epoch": 0.6560691129691112, "grad_norm": 1.9128913823023697, "learning_rate": 2.794353225204664e-07, "loss": 0.309, "step": 37743 }, { "epoch": 0.656086495506614, "grad_norm": 2.5105572766344366, "learning_rate": 2.7941006042984025e-07, "loss": 0.1795, "step": 37744 }, { "epoch": 0.6561038780441169, "grad_norm": 1.7399600663682386, "learning_rate": 2.7938479903838287e-07, "loss": 0.202, "step": 37745 }, { "epoch": 0.6561212605816197, "grad_norm": 0.9410329341661462, "learning_rate": 2.7935953834617427e-07, "loss": 0.2183, "step": 37746 }, { "epoch": 0.6561386431191225, "grad_norm": 2.416692343060271, "learning_rate": 2.793342783532946e-07, "loss": 0.1977, "step": 37747 }, { "epoch": 0.6561560256566253, "grad_norm": 2.95340021593357, "learning_rate": 2.7930901905982373e-07, "loss": 0.2511, "step": 37748 }, { "epoch": 0.6561734081941282, "grad_norm": 1.5061639711409465, "learning_rate": 2.79283760465842e-07, "loss": 0.2098, "step": 37749 }, { "epoch": 0.656190790731631, "grad_norm": 1.343719836964684, "learning_rate": 2.792585025714297e-07, "loss": 0.1823, "step": 37750 }, { "epoch": 0.6562081732691338, "grad_norm": 0.9853459176799304, "learning_rate": 2.7923324537666603e-07, "loss": 0.265, "step": 37751 }, { "epoch": 0.6562255558066367, "grad_norm": 1.5767454715377365, "learning_rate": 2.7920798888163184e-07, "loss": 0.1471, "step": 37752 }, { "epoch": 0.6562429383441395, "grad_norm": 1.7757182637062983, "learning_rate": 2.7918273308640684e-07, "loss": 0.3046, "step": 37753 }, { "epoch": 0.6562603208816423, "grad_norm": 1.8477482736717905, "learning_rate": 2.7915747799107114e-07, "loss": 0.2133, "step": 37754 }, { "epoch": 0.6562777034191452, "grad_norm": 1.8850710269970932, "learning_rate": 2.791322235957049e-07, "loss": 0.4408, "step": 37755 }, { "epoch": 0.656295085956648, "grad_norm": 0.929012161392482, "learning_rate": 2.7910696990038796e-07, "loss": 0.2298, "step": 37756 }, { "epoch": 0.6563124684941508, "grad_norm": 1.1624458504969175, "learning_rate": 2.7908171690520046e-07, "loss": 0.3053, "step": 37757 }, { "epoch": 0.6563298510316536, "grad_norm": 1.5636467497956767, "learning_rate": 2.7905646461022244e-07, "loss": 0.2651, "step": 37758 }, { "epoch": 0.6563472335691565, "grad_norm": 1.1561935240087124, "learning_rate": 2.7903121301553367e-07, "loss": 0.1857, "step": 37759 }, { "epoch": 0.6563646161066593, "grad_norm": 1.603188009896716, "learning_rate": 2.7900596212121473e-07, "loss": 0.2282, "step": 37760 }, { "epoch": 0.656381998644162, "grad_norm": 1.146033277075745, "learning_rate": 2.789807119273454e-07, "loss": 0.2318, "step": 37761 }, { "epoch": 0.6563993811816649, "grad_norm": 1.8598225392341858, "learning_rate": 2.789554624340056e-07, "loss": 0.179, "step": 37762 }, { "epoch": 0.6564167637191677, "grad_norm": 1.6048649611729273, "learning_rate": 2.789302136412755e-07, "loss": 0.2384, "step": 37763 }, { "epoch": 0.6564341462566705, "grad_norm": 4.22199806346244, "learning_rate": 2.7890496554923506e-07, "loss": 0.2387, "step": 37764 }, { "epoch": 0.6564515287941733, "grad_norm": 1.6646745916572276, "learning_rate": 2.788797181579643e-07, "loss": 0.1585, "step": 37765 }, { "epoch": 0.6564689113316762, "grad_norm": 0.9293244983643231, "learning_rate": 2.788544714675433e-07, "loss": 0.3452, "step": 37766 }, { "epoch": 0.656486293869179, "grad_norm": 1.6305211542518476, "learning_rate": 2.788292254780518e-07, "loss": 0.1656, "step": 37767 }, { "epoch": 0.6565036764066818, "grad_norm": 1.8835244162246627, "learning_rate": 2.788039801895703e-07, "loss": 0.2299, "step": 37768 }, { "epoch": 0.6565210589441847, "grad_norm": 1.732811185755913, "learning_rate": 2.7877873560217873e-07, "loss": 0.158, "step": 37769 }, { "epoch": 0.6565384414816875, "grad_norm": 1.4412673382745613, "learning_rate": 2.7875349171595676e-07, "loss": 0.2472, "step": 37770 }, { "epoch": 0.6565558240191903, "grad_norm": 1.0471781351978777, "learning_rate": 2.787282485309844e-07, "loss": 0.4761, "step": 37771 }, { "epoch": 0.6565732065566932, "grad_norm": 1.1053800368099236, "learning_rate": 2.78703006047342e-07, "loss": 0.1265, "step": 37772 }, { "epoch": 0.656590589094196, "grad_norm": 1.634963535761391, "learning_rate": 2.7867776426510945e-07, "loss": 0.1961, "step": 37773 }, { "epoch": 0.6566079716316988, "grad_norm": 2.710092315456569, "learning_rate": 2.786525231843668e-07, "loss": 0.3651, "step": 37774 }, { "epoch": 0.6566253541692016, "grad_norm": 1.1428026884988611, "learning_rate": 2.7862728280519385e-07, "loss": 0.1336, "step": 37775 }, { "epoch": 0.6566427367067045, "grad_norm": 1.430811321398486, "learning_rate": 2.7860204312767075e-07, "loss": 0.2122, "step": 37776 }, { "epoch": 0.6566601192442073, "grad_norm": 1.0801652930124437, "learning_rate": 2.7857680415187756e-07, "loss": 0.1629, "step": 37777 }, { "epoch": 0.6566775017817101, "grad_norm": 2.154477171507197, "learning_rate": 2.785515658778941e-07, "loss": 0.3858, "step": 37778 }, { "epoch": 0.656694884319213, "grad_norm": 1.6750131727235653, "learning_rate": 2.7852632830580024e-07, "loss": 0.1778, "step": 37779 }, { "epoch": 0.6567122668567158, "grad_norm": 1.5437642553998538, "learning_rate": 2.785010914356765e-07, "loss": 0.2311, "step": 37780 }, { "epoch": 0.6567296493942185, "grad_norm": 1.5353326952120936, "learning_rate": 2.784758552676024e-07, "loss": 0.2874, "step": 37781 }, { "epoch": 0.6567470319317213, "grad_norm": 1.5593724762075087, "learning_rate": 2.7845061980165816e-07, "loss": 0.1506, "step": 37782 }, { "epoch": 0.6567644144692242, "grad_norm": 1.6209073420109357, "learning_rate": 2.7842538503792365e-07, "loss": 0.1426, "step": 37783 }, { "epoch": 0.656781797006727, "grad_norm": 0.9594378453183461, "learning_rate": 2.78400150976479e-07, "loss": 0.1687, "step": 37784 }, { "epoch": 0.6567991795442298, "grad_norm": 1.315591068676906, "learning_rate": 2.783749176174039e-07, "loss": 0.1879, "step": 37785 }, { "epoch": 0.6568165620817327, "grad_norm": 2.5498210619836663, "learning_rate": 2.783496849607784e-07, "loss": 0.3288, "step": 37786 }, { "epoch": 0.6568339446192355, "grad_norm": 1.2968976023910794, "learning_rate": 2.783244530066828e-07, "loss": 0.2844, "step": 37787 }, { "epoch": 0.6568513271567383, "grad_norm": 1.513447143256847, "learning_rate": 2.78299221755197e-07, "loss": 0.2339, "step": 37788 }, { "epoch": 0.6568687096942412, "grad_norm": 1.6582819389145877, "learning_rate": 2.7827399120640064e-07, "loss": 0.1942, "step": 37789 }, { "epoch": 0.656886092231744, "grad_norm": 1.5726163138616427, "learning_rate": 2.7824876136037366e-07, "loss": 0.2145, "step": 37790 }, { "epoch": 0.6569034747692468, "grad_norm": 1.558833572470516, "learning_rate": 2.782235322171965e-07, "loss": 0.1074, "step": 37791 }, { "epoch": 0.6569208573067497, "grad_norm": 1.6528713295060697, "learning_rate": 2.781983037769489e-07, "loss": 0.3223, "step": 37792 }, { "epoch": 0.6569382398442525, "grad_norm": 1.425098593939609, "learning_rate": 2.7817307603971065e-07, "loss": 0.1785, "step": 37793 }, { "epoch": 0.6569556223817553, "grad_norm": 1.7725136144175453, "learning_rate": 2.7814784900556195e-07, "loss": 0.2978, "step": 37794 }, { "epoch": 0.6569730049192581, "grad_norm": 1.2901162504252996, "learning_rate": 2.7812262267458264e-07, "loss": 0.2832, "step": 37795 }, { "epoch": 0.656990387456761, "grad_norm": 2.5252956410963874, "learning_rate": 2.7809739704685264e-07, "loss": 0.337, "step": 37796 }, { "epoch": 0.6570077699942638, "grad_norm": 1.1283135014022043, "learning_rate": 2.7807217212245205e-07, "loss": 0.2109, "step": 37797 }, { "epoch": 0.6570251525317666, "grad_norm": 1.467101603093917, "learning_rate": 2.7804694790146045e-07, "loss": 0.3249, "step": 37798 }, { "epoch": 0.6570425350692695, "grad_norm": 0.916050021236823, "learning_rate": 2.7802172438395835e-07, "loss": 0.1546, "step": 37799 }, { "epoch": 0.6570599176067722, "grad_norm": 1.9785366794652848, "learning_rate": 2.779965015700253e-07, "loss": 0.2781, "step": 37800 }, { "epoch": 0.657077300144275, "grad_norm": 1.8195426903143674, "learning_rate": 2.779712794597414e-07, "loss": 0.2085, "step": 37801 }, { "epoch": 0.6570946826817778, "grad_norm": 1.88855663711628, "learning_rate": 2.779460580531866e-07, "loss": 0.1782, "step": 37802 }, { "epoch": 0.6571120652192807, "grad_norm": 1.568263936591391, "learning_rate": 2.7792083735044083e-07, "loss": 0.2261, "step": 37803 }, { "epoch": 0.6571294477567835, "grad_norm": 1.302711739205875, "learning_rate": 2.778956173515839e-07, "loss": 0.145, "step": 37804 }, { "epoch": 0.6571468302942863, "grad_norm": 1.451603524253581, "learning_rate": 2.778703980566959e-07, "loss": 0.1991, "step": 37805 }, { "epoch": 0.6571642128317892, "grad_norm": 1.743189606504579, "learning_rate": 2.778451794658564e-07, "loss": 0.1871, "step": 37806 }, { "epoch": 0.657181595369292, "grad_norm": 1.3683163124820419, "learning_rate": 2.778199615791461e-07, "loss": 0.1856, "step": 37807 }, { "epoch": 0.6571989779067948, "grad_norm": 1.9359154499336149, "learning_rate": 2.777947443966443e-07, "loss": 0.2447, "step": 37808 }, { "epoch": 0.6572163604442977, "grad_norm": 1.3781726847666553, "learning_rate": 2.7776952791843085e-07, "loss": 0.237, "step": 37809 }, { "epoch": 0.6572337429818005, "grad_norm": 1.0582558202359278, "learning_rate": 2.777443121445861e-07, "loss": 0.176, "step": 37810 }, { "epoch": 0.6572511255193033, "grad_norm": 2.219910216373888, "learning_rate": 2.777190970751898e-07, "loss": 0.2521, "step": 37811 }, { "epoch": 0.6572685080568061, "grad_norm": 1.032049234579504, "learning_rate": 2.776938827103218e-07, "loss": 0.1817, "step": 37812 }, { "epoch": 0.657285890594309, "grad_norm": 2.5645599355050424, "learning_rate": 2.7766866905006215e-07, "loss": 0.2418, "step": 37813 }, { "epoch": 0.6573032731318118, "grad_norm": 0.9206365404250574, "learning_rate": 2.776434560944907e-07, "loss": 0.2363, "step": 37814 }, { "epoch": 0.6573206556693146, "grad_norm": 3.3374557457236897, "learning_rate": 2.7761824384368727e-07, "loss": 0.3396, "step": 37815 }, { "epoch": 0.6573380382068175, "grad_norm": 1.0720687645203077, "learning_rate": 2.775930322977319e-07, "loss": 0.1364, "step": 37816 }, { "epoch": 0.6573554207443203, "grad_norm": 1.6575744300740083, "learning_rate": 2.775678214567043e-07, "loss": 0.1902, "step": 37817 }, { "epoch": 0.6573728032818231, "grad_norm": 0.7687422616900862, "learning_rate": 2.775426113206847e-07, "loss": 0.3576, "step": 37818 }, { "epoch": 0.657390185819326, "grad_norm": 1.4012723740884865, "learning_rate": 2.7751740188975274e-07, "loss": 0.2396, "step": 37819 }, { "epoch": 0.6574075683568287, "grad_norm": 1.420207956614298, "learning_rate": 2.774921931639887e-07, "loss": 0.2313, "step": 37820 }, { "epoch": 0.6574249508943315, "grad_norm": 0.9882728720053029, "learning_rate": 2.774669851434718e-07, "loss": 0.3044, "step": 37821 }, { "epoch": 0.6574423334318343, "grad_norm": 1.7390169773696251, "learning_rate": 2.774417778282825e-07, "loss": 0.2446, "step": 37822 }, { "epoch": 0.6574597159693372, "grad_norm": 2.0800131041401317, "learning_rate": 2.774165712185005e-07, "loss": 0.2579, "step": 37823 }, { "epoch": 0.65747709850684, "grad_norm": 1.909064940899974, "learning_rate": 2.773913653142058e-07, "loss": 0.3232, "step": 37824 }, { "epoch": 0.6574944810443428, "grad_norm": 1.432072586308917, "learning_rate": 2.7736616011547796e-07, "loss": 0.1701, "step": 37825 }, { "epoch": 0.6575118635818457, "grad_norm": 1.3649869695355956, "learning_rate": 2.773409556223976e-07, "loss": 0.2114, "step": 37826 }, { "epoch": 0.6575292461193485, "grad_norm": 1.8530609301314618, "learning_rate": 2.773157518350439e-07, "loss": 0.2705, "step": 37827 }, { "epoch": 0.6575466286568513, "grad_norm": 1.117346602437743, "learning_rate": 2.772905487534969e-07, "loss": 0.2505, "step": 37828 }, { "epoch": 0.6575640111943541, "grad_norm": 0.9822281973634737, "learning_rate": 2.772653463778364e-07, "loss": 0.1469, "step": 37829 }, { "epoch": 0.657581393731857, "grad_norm": 1.269699507430432, "learning_rate": 2.772401447081427e-07, "loss": 0.1961, "step": 37830 }, { "epoch": 0.6575987762693598, "grad_norm": 2.2657409925337513, "learning_rate": 2.772149437444953e-07, "loss": 0.4581, "step": 37831 }, { "epoch": 0.6576161588068626, "grad_norm": 1.8772222034059631, "learning_rate": 2.771897434869742e-07, "loss": 0.2131, "step": 37832 }, { "epoch": 0.6576335413443655, "grad_norm": 1.9075301711807027, "learning_rate": 2.7716454393565937e-07, "loss": 0.2231, "step": 37833 }, { "epoch": 0.6576509238818683, "grad_norm": 1.4316784207869213, "learning_rate": 2.771393450906305e-07, "loss": 0.2897, "step": 37834 }, { "epoch": 0.6576683064193711, "grad_norm": 2.558778839764511, "learning_rate": 2.7711414695196755e-07, "loss": 0.3428, "step": 37835 }, { "epoch": 0.657685688956874, "grad_norm": 1.84258855569827, "learning_rate": 2.770889495197504e-07, "loss": 0.218, "step": 37836 }, { "epoch": 0.6577030714943768, "grad_norm": 1.4659015487203644, "learning_rate": 2.770637527940586e-07, "loss": 0.2224, "step": 37837 }, { "epoch": 0.6577204540318796, "grad_norm": 1.0910513011879923, "learning_rate": 2.7703855677497273e-07, "loss": 0.1826, "step": 37838 }, { "epoch": 0.6577378365693825, "grad_norm": 1.5460166065611547, "learning_rate": 2.7701336146257203e-07, "loss": 0.1599, "step": 37839 }, { "epoch": 0.6577552191068852, "grad_norm": 1.3192521458797268, "learning_rate": 2.769881668569363e-07, "loss": 0.1284, "step": 37840 }, { "epoch": 0.657772601644388, "grad_norm": 2.1354666060266725, "learning_rate": 2.7696297295814593e-07, "loss": 0.3939, "step": 37841 }, { "epoch": 0.6577899841818908, "grad_norm": 1.2404832440594478, "learning_rate": 2.769377797662804e-07, "loss": 0.142, "step": 37842 }, { "epoch": 0.6578073667193937, "grad_norm": 1.605004858369387, "learning_rate": 2.769125872814196e-07, "loss": 0.2935, "step": 37843 }, { "epoch": 0.6578247492568965, "grad_norm": 1.3228389530509048, "learning_rate": 2.768873955036436e-07, "loss": 0.2678, "step": 37844 }, { "epoch": 0.6578421317943993, "grad_norm": 1.923820482715956, "learning_rate": 2.768622044330319e-07, "loss": 0.294, "step": 37845 }, { "epoch": 0.6578595143319022, "grad_norm": 1.1647340966326487, "learning_rate": 2.768370140696646e-07, "loss": 0.2356, "step": 37846 }, { "epoch": 0.657876896869405, "grad_norm": 1.1630016446695404, "learning_rate": 2.768118244136214e-07, "loss": 0.3555, "step": 37847 }, { "epoch": 0.6578942794069078, "grad_norm": 2.449532965132564, "learning_rate": 2.7678663546498197e-07, "loss": 0.2455, "step": 37848 }, { "epoch": 0.6579116619444106, "grad_norm": 1.3249840932363852, "learning_rate": 2.7676144722382666e-07, "loss": 0.2079, "step": 37849 }, { "epoch": 0.6579290444819135, "grad_norm": 2.050150643452155, "learning_rate": 2.7673625969023493e-07, "loss": 0.2186, "step": 37850 }, { "epoch": 0.6579464270194163, "grad_norm": 2.0901143983173256, "learning_rate": 2.76711072864287e-07, "loss": 0.2098, "step": 37851 }, { "epoch": 0.6579638095569191, "grad_norm": 2.5616328794872607, "learning_rate": 2.7668588674606183e-07, "loss": 0.2269, "step": 37852 }, { "epoch": 0.657981192094422, "grad_norm": 1.4154634114106706, "learning_rate": 2.766607013356402e-07, "loss": 0.1303, "step": 37853 }, { "epoch": 0.6579985746319248, "grad_norm": 0.7919160871323007, "learning_rate": 2.766355166331015e-07, "loss": 0.2062, "step": 37854 }, { "epoch": 0.6580159571694276, "grad_norm": 3.181323323862932, "learning_rate": 2.766103326385255e-07, "loss": 0.2863, "step": 37855 }, { "epoch": 0.6580333397069305, "grad_norm": 1.1399070536499587, "learning_rate": 2.76585149351992e-07, "loss": 0.2007, "step": 37856 }, { "epoch": 0.6580507222444333, "grad_norm": 1.8601521917065036, "learning_rate": 2.765599667735814e-07, "loss": 0.3033, "step": 37857 }, { "epoch": 0.6580681047819361, "grad_norm": 1.101766656926644, "learning_rate": 2.765347849033728e-07, "loss": 0.2151, "step": 37858 }, { "epoch": 0.6580854873194389, "grad_norm": 2.0308540845871508, "learning_rate": 2.765096037414463e-07, "loss": 0.3886, "step": 37859 }, { "epoch": 0.6581028698569417, "grad_norm": 1.6524248588790698, "learning_rate": 2.7648442328788146e-07, "loss": 0.1677, "step": 37860 }, { "epoch": 0.6581202523944445, "grad_norm": 0.9595394862745686, "learning_rate": 2.7645924354275863e-07, "loss": 0.1797, "step": 37861 }, { "epoch": 0.6581376349319473, "grad_norm": 2.970926654417933, "learning_rate": 2.764340645061572e-07, "loss": 0.2452, "step": 37862 }, { "epoch": 0.6581550174694502, "grad_norm": 1.8793021922285436, "learning_rate": 2.764088861781571e-07, "loss": 0.1999, "step": 37863 }, { "epoch": 0.658172400006953, "grad_norm": 1.7154656078818233, "learning_rate": 2.763837085588382e-07, "loss": 0.2125, "step": 37864 }, { "epoch": 0.6581897825444558, "grad_norm": 4.254469335662304, "learning_rate": 2.7635853164828015e-07, "loss": 0.2371, "step": 37865 }, { "epoch": 0.6582071650819586, "grad_norm": 1.3288421599067755, "learning_rate": 2.7633335544656284e-07, "loss": 0.1771, "step": 37866 }, { "epoch": 0.6582245476194615, "grad_norm": 1.7653216079466538, "learning_rate": 2.7630817995376606e-07, "loss": 0.2351, "step": 37867 }, { "epoch": 0.6582419301569643, "grad_norm": 1.5735400970036428, "learning_rate": 2.7628300516996937e-07, "loss": 0.2245, "step": 37868 }, { "epoch": 0.6582593126944671, "grad_norm": 4.908187554850299, "learning_rate": 2.76257831095253e-07, "loss": 0.36, "step": 37869 }, { "epoch": 0.65827669523197, "grad_norm": 2.570897344140006, "learning_rate": 2.7623265772969674e-07, "loss": 0.2821, "step": 37870 }, { "epoch": 0.6582940777694728, "grad_norm": 2.2264954048787002, "learning_rate": 2.762074850733798e-07, "loss": 0.1442, "step": 37871 }, { "epoch": 0.6583114603069756, "grad_norm": 2.2748093988430584, "learning_rate": 2.761823131263824e-07, "loss": 0.2048, "step": 37872 }, { "epoch": 0.6583288428444785, "grad_norm": 2.2140970368535653, "learning_rate": 2.761571418887844e-07, "loss": 0.2897, "step": 37873 }, { "epoch": 0.6583462253819813, "grad_norm": 1.1621926728895104, "learning_rate": 2.7613197136066533e-07, "loss": 0.181, "step": 37874 }, { "epoch": 0.6583636079194841, "grad_norm": 1.4641323041118492, "learning_rate": 2.7610680154210515e-07, "loss": 0.1532, "step": 37875 }, { "epoch": 0.658380990456987, "grad_norm": 1.163354403183332, "learning_rate": 2.760816324331835e-07, "loss": 0.1798, "step": 37876 }, { "epoch": 0.6583983729944898, "grad_norm": 1.2507217974876912, "learning_rate": 2.7605646403398024e-07, "loss": 0.119, "step": 37877 }, { "epoch": 0.6584157555319926, "grad_norm": 2.1785793689203956, "learning_rate": 2.7603129634457513e-07, "loss": 0.1358, "step": 37878 }, { "epoch": 0.6584331380694954, "grad_norm": 2.227459743127374, "learning_rate": 2.7600612936504773e-07, "loss": 0.2512, "step": 37879 }, { "epoch": 0.6584505206069982, "grad_norm": 2.0053135791383316, "learning_rate": 2.759809630954782e-07, "loss": 0.1535, "step": 37880 }, { "epoch": 0.658467903144501, "grad_norm": 1.6620066848537032, "learning_rate": 2.759557975359461e-07, "loss": 0.2391, "step": 37881 }, { "epoch": 0.6584852856820038, "grad_norm": 1.966382143365359, "learning_rate": 2.7593063268653124e-07, "loss": 0.2243, "step": 37882 }, { "epoch": 0.6585026682195066, "grad_norm": 1.8227432973283983, "learning_rate": 2.7590546854731326e-07, "loss": 0.1806, "step": 37883 }, { "epoch": 0.6585200507570095, "grad_norm": 1.98582524309684, "learning_rate": 2.7588030511837206e-07, "loss": 0.2016, "step": 37884 }, { "epoch": 0.6585374332945123, "grad_norm": 1.1598809015841385, "learning_rate": 2.7585514239978736e-07, "loss": 0.2808, "step": 37885 }, { "epoch": 0.6585548158320151, "grad_norm": 2.8745831382253777, "learning_rate": 2.7582998039163885e-07, "loss": 0.2766, "step": 37886 }, { "epoch": 0.658572198369518, "grad_norm": 1.1161979962551283, "learning_rate": 2.7580481909400624e-07, "loss": 0.2397, "step": 37887 }, { "epoch": 0.6585895809070208, "grad_norm": 2.0533374309118506, "learning_rate": 2.7577965850696936e-07, "loss": 0.266, "step": 37888 }, { "epoch": 0.6586069634445236, "grad_norm": 1.3765088019868954, "learning_rate": 2.7575449863060833e-07, "loss": 0.2494, "step": 37889 }, { "epoch": 0.6586243459820265, "grad_norm": 1.8961234643742333, "learning_rate": 2.757293394650021e-07, "loss": 0.3245, "step": 37890 }, { "epoch": 0.6586417285195293, "grad_norm": 1.6768768956811972, "learning_rate": 2.7570418101023103e-07, "loss": 0.2026, "step": 37891 }, { "epoch": 0.6586591110570321, "grad_norm": 3.1699804802445217, "learning_rate": 2.7567902326637465e-07, "loss": 0.1685, "step": 37892 }, { "epoch": 0.658676493594535, "grad_norm": 1.6230391502754675, "learning_rate": 2.7565386623351275e-07, "loss": 0.1887, "step": 37893 }, { "epoch": 0.6586938761320378, "grad_norm": 2.085827977866298, "learning_rate": 2.7562870991172505e-07, "loss": 0.2249, "step": 37894 }, { "epoch": 0.6587112586695406, "grad_norm": 2.8980568834550997, "learning_rate": 2.7560355430109124e-07, "loss": 0.3283, "step": 37895 }, { "epoch": 0.6587286412070434, "grad_norm": 1.8165260376073455, "learning_rate": 2.755783994016911e-07, "loss": 0.2673, "step": 37896 }, { "epoch": 0.6587460237445463, "grad_norm": 0.9395192269791255, "learning_rate": 2.7555324521360437e-07, "loss": 0.1181, "step": 37897 }, { "epoch": 0.6587634062820491, "grad_norm": 1.1138227572299093, "learning_rate": 2.755280917369105e-07, "loss": 0.2076, "step": 37898 }, { "epoch": 0.6587807888195519, "grad_norm": 1.2841660641757309, "learning_rate": 2.7550293897168964e-07, "loss": 0.2343, "step": 37899 }, { "epoch": 0.6587981713570547, "grad_norm": 2.1701649084726173, "learning_rate": 2.754777869180214e-07, "loss": 0.2306, "step": 37900 }, { "epoch": 0.6588155538945575, "grad_norm": 1.6491077429681593, "learning_rate": 2.754526355759854e-07, "loss": 0.5872, "step": 37901 }, { "epoch": 0.6588329364320603, "grad_norm": 1.5018910779620793, "learning_rate": 2.754274849456614e-07, "loss": 0.2188, "step": 37902 }, { "epoch": 0.6588503189695631, "grad_norm": 1.3924894749676853, "learning_rate": 2.754023350271292e-07, "loss": 0.2896, "step": 37903 }, { "epoch": 0.658867701507066, "grad_norm": 1.9882050824748114, "learning_rate": 2.7537718582046833e-07, "loss": 0.2462, "step": 37904 }, { "epoch": 0.6588850840445688, "grad_norm": 1.1032868222606331, "learning_rate": 2.753520373257586e-07, "loss": 0.1649, "step": 37905 }, { "epoch": 0.6589024665820716, "grad_norm": 1.2624669285240187, "learning_rate": 2.753268895430795e-07, "loss": 0.2912, "step": 37906 }, { "epoch": 0.6589198491195745, "grad_norm": 0.753203197020351, "learning_rate": 2.7530174247251124e-07, "loss": 0.2217, "step": 37907 }, { "epoch": 0.6589372316570773, "grad_norm": 1.0393993856784538, "learning_rate": 2.7527659611413335e-07, "loss": 0.2986, "step": 37908 }, { "epoch": 0.6589546141945801, "grad_norm": 1.84214495785795, "learning_rate": 2.752514504680253e-07, "loss": 0.2698, "step": 37909 }, { "epoch": 0.658971996732083, "grad_norm": 1.579846921012005, "learning_rate": 2.752263055342666e-07, "loss": 0.2012, "step": 37910 }, { "epoch": 0.6589893792695858, "grad_norm": 1.9459968930294393, "learning_rate": 2.752011613129376e-07, "loss": 0.2576, "step": 37911 }, { "epoch": 0.6590067618070886, "grad_norm": 1.063118241026993, "learning_rate": 2.751760178041176e-07, "loss": 0.1753, "step": 37912 }, { "epoch": 0.6590241443445914, "grad_norm": 1.1891735314666843, "learning_rate": 2.751508750078863e-07, "loss": 0.1843, "step": 37913 }, { "epoch": 0.6590415268820943, "grad_norm": 1.199219931544023, "learning_rate": 2.751257329243235e-07, "loss": 0.1644, "step": 37914 }, { "epoch": 0.6590589094195971, "grad_norm": 1.2135189071018904, "learning_rate": 2.751005915535087e-07, "loss": 0.1396, "step": 37915 }, { "epoch": 0.6590762919570999, "grad_norm": 2.310468962839678, "learning_rate": 2.750754508955219e-07, "loss": 0.2664, "step": 37916 }, { "epoch": 0.6590936744946028, "grad_norm": 1.6091434429587523, "learning_rate": 2.750503109504425e-07, "loss": 0.3048, "step": 37917 }, { "epoch": 0.6591110570321056, "grad_norm": 3.2902188376869184, "learning_rate": 2.750251717183501e-07, "loss": 0.289, "step": 37918 }, { "epoch": 0.6591284395696084, "grad_norm": 1.0849504238848486, "learning_rate": 2.750000331993247e-07, "loss": 0.1869, "step": 37919 }, { "epoch": 0.6591458221071111, "grad_norm": 1.9779920062468428, "learning_rate": 2.749748953934459e-07, "loss": 0.295, "step": 37920 }, { "epoch": 0.659163204644614, "grad_norm": 2.136827217218331, "learning_rate": 2.7494975830079334e-07, "loss": 0.2336, "step": 37921 }, { "epoch": 0.6591805871821168, "grad_norm": 1.5466972100715863, "learning_rate": 2.749246219214466e-07, "loss": 0.2023, "step": 37922 }, { "epoch": 0.6591979697196196, "grad_norm": 1.265087704878736, "learning_rate": 2.7489948625548546e-07, "loss": 0.2068, "step": 37923 }, { "epoch": 0.6592153522571225, "grad_norm": 1.7996642477220275, "learning_rate": 2.7487435130298954e-07, "loss": 0.2352, "step": 37924 }, { "epoch": 0.6592327347946253, "grad_norm": 1.7045832385897373, "learning_rate": 2.748492170640385e-07, "loss": 0.3357, "step": 37925 }, { "epoch": 0.6592501173321281, "grad_norm": 1.7278992686209014, "learning_rate": 2.748240835387119e-07, "loss": 0.2476, "step": 37926 }, { "epoch": 0.659267499869631, "grad_norm": 2.068635987011559, "learning_rate": 2.747989507270898e-07, "loss": 0.2205, "step": 37927 }, { "epoch": 0.6592848824071338, "grad_norm": 0.9764813385694696, "learning_rate": 2.747738186292514e-07, "loss": 0.2462, "step": 37928 }, { "epoch": 0.6593022649446366, "grad_norm": 1.3504934209084056, "learning_rate": 2.7474868724527633e-07, "loss": 0.3165, "step": 37929 }, { "epoch": 0.6593196474821394, "grad_norm": 1.252887800454227, "learning_rate": 2.747235565752447e-07, "loss": 0.5361, "step": 37930 }, { "epoch": 0.6593370300196423, "grad_norm": 1.03883379667917, "learning_rate": 2.7469842661923593e-07, "loss": 0.2815, "step": 37931 }, { "epoch": 0.6593544125571451, "grad_norm": 1.3546734645570915, "learning_rate": 2.7467329737732957e-07, "loss": 0.2188, "step": 37932 }, { "epoch": 0.6593717950946479, "grad_norm": 1.6916378835359214, "learning_rate": 2.746481688496054e-07, "loss": 0.2624, "step": 37933 }, { "epoch": 0.6593891776321508, "grad_norm": 1.986518106856792, "learning_rate": 2.74623041036143e-07, "loss": 0.3474, "step": 37934 }, { "epoch": 0.6594065601696536, "grad_norm": 1.280687220244519, "learning_rate": 2.7459791393702203e-07, "loss": 0.2524, "step": 37935 }, { "epoch": 0.6594239427071564, "grad_norm": 1.7958398833229285, "learning_rate": 2.745727875523222e-07, "loss": 0.226, "step": 37936 }, { "epoch": 0.6594413252446593, "grad_norm": 1.904474666709528, "learning_rate": 2.7454766188212274e-07, "loss": 0.2917, "step": 37937 }, { "epoch": 0.6594587077821621, "grad_norm": 1.4652974287129532, "learning_rate": 2.7452253692650395e-07, "loss": 0.1065, "step": 37938 }, { "epoch": 0.6594760903196648, "grad_norm": 1.471695759125851, "learning_rate": 2.7449741268554507e-07, "loss": 0.1855, "step": 37939 }, { "epoch": 0.6594934728571676, "grad_norm": 1.0787926040221778, "learning_rate": 2.744722891593261e-07, "loss": 0.2103, "step": 37940 }, { "epoch": 0.6595108553946705, "grad_norm": 3.2528530974877556, "learning_rate": 2.7444716634792584e-07, "loss": 0.3654, "step": 37941 }, { "epoch": 0.6595282379321733, "grad_norm": 4.550167897316659, "learning_rate": 2.7442204425142473e-07, "loss": 0.2472, "step": 37942 }, { "epoch": 0.6595456204696761, "grad_norm": 1.6113996988427561, "learning_rate": 2.743969228699021e-07, "loss": 0.472, "step": 37943 }, { "epoch": 0.659563003007179, "grad_norm": 2.3448148593500133, "learning_rate": 2.743718022034376e-07, "loss": 0.2572, "step": 37944 }, { "epoch": 0.6595803855446818, "grad_norm": 1.6711119070145495, "learning_rate": 2.743466822521107e-07, "loss": 0.2353, "step": 37945 }, { "epoch": 0.6595977680821846, "grad_norm": 1.9716389342955003, "learning_rate": 2.743215630160014e-07, "loss": 0.2411, "step": 37946 }, { "epoch": 0.6596151506196875, "grad_norm": 1.3748455949233676, "learning_rate": 2.74296444495189e-07, "loss": 0.2802, "step": 37947 }, { "epoch": 0.6596325331571903, "grad_norm": 1.8800404248037672, "learning_rate": 2.7427132668975314e-07, "loss": 0.2352, "step": 37948 }, { "epoch": 0.6596499156946931, "grad_norm": 1.7956810980177533, "learning_rate": 2.7424620959977336e-07, "loss": 0.1378, "step": 37949 }, { "epoch": 0.6596672982321959, "grad_norm": 1.1775242289570496, "learning_rate": 2.742210932253295e-07, "loss": 0.1729, "step": 37950 }, { "epoch": 0.6596846807696988, "grad_norm": 3.1678221899477834, "learning_rate": 2.7419597756650117e-07, "loss": 0.3058, "step": 37951 }, { "epoch": 0.6597020633072016, "grad_norm": 0.9609384168927101, "learning_rate": 2.7417086262336773e-07, "loss": 0.2172, "step": 37952 }, { "epoch": 0.6597194458447044, "grad_norm": 2.469894743764091, "learning_rate": 2.74145748396009e-07, "loss": 0.3322, "step": 37953 }, { "epoch": 0.6597368283822073, "grad_norm": 1.2478066653386375, "learning_rate": 2.7412063488450457e-07, "loss": 0.2702, "step": 37954 }, { "epoch": 0.6597542109197101, "grad_norm": 1.6726853841842313, "learning_rate": 2.740955220889338e-07, "loss": 0.2688, "step": 37955 }, { "epoch": 0.6597715934572129, "grad_norm": 1.1500480817096965, "learning_rate": 2.7407041000937657e-07, "loss": 0.1297, "step": 37956 }, { "epoch": 0.6597889759947158, "grad_norm": 1.4160305768815273, "learning_rate": 2.740452986459121e-07, "loss": 0.2397, "step": 37957 }, { "epoch": 0.6598063585322186, "grad_norm": 2.300823790000739, "learning_rate": 2.740201879986207e-07, "loss": 0.1887, "step": 37958 }, { "epoch": 0.6598237410697213, "grad_norm": 1.8981642817844229, "learning_rate": 2.739950780675812e-07, "loss": 0.2762, "step": 37959 }, { "epoch": 0.6598411236072241, "grad_norm": 1.8437255161290473, "learning_rate": 2.739699688528734e-07, "loss": 0.2362, "step": 37960 }, { "epoch": 0.659858506144727, "grad_norm": 1.1964513143676994, "learning_rate": 2.739448603545771e-07, "loss": 0.2753, "step": 37961 }, { "epoch": 0.6598758886822298, "grad_norm": 2.0634028082066114, "learning_rate": 2.739197525727718e-07, "loss": 0.1772, "step": 37962 }, { "epoch": 0.6598932712197326, "grad_norm": 1.845876567195928, "learning_rate": 2.7389464550753694e-07, "loss": 0.1998, "step": 37963 }, { "epoch": 0.6599106537572355, "grad_norm": 1.7753517952485427, "learning_rate": 2.7386953915895227e-07, "loss": 0.2274, "step": 37964 }, { "epoch": 0.6599280362947383, "grad_norm": 2.5716159994587513, "learning_rate": 2.738444335270972e-07, "loss": 0.2044, "step": 37965 }, { "epoch": 0.6599454188322411, "grad_norm": 0.9343470971697566, "learning_rate": 2.738193286120515e-07, "loss": 0.3059, "step": 37966 }, { "epoch": 0.659962801369744, "grad_norm": 1.6138255817890237, "learning_rate": 2.7379422441389454e-07, "loss": 0.2131, "step": 37967 }, { "epoch": 0.6599801839072468, "grad_norm": 1.0810418365207584, "learning_rate": 2.737691209327058e-07, "loss": 0.1822, "step": 37968 }, { "epoch": 0.6599975664447496, "grad_norm": 5.618468096994287, "learning_rate": 2.737440181685653e-07, "loss": 0.2691, "step": 37969 }, { "epoch": 0.6600149489822524, "grad_norm": 1.2525497555959049, "learning_rate": 2.737189161215522e-07, "loss": 0.133, "step": 37970 }, { "epoch": 0.6600323315197553, "grad_norm": 1.5842099555037383, "learning_rate": 2.7369381479174645e-07, "loss": 0.2094, "step": 37971 }, { "epoch": 0.6600497140572581, "grad_norm": 1.6078272748172429, "learning_rate": 2.7366871417922697e-07, "loss": 0.2098, "step": 37972 }, { "epoch": 0.6600670965947609, "grad_norm": 1.3366009725198198, "learning_rate": 2.736436142840739e-07, "loss": 0.1519, "step": 37973 }, { "epoch": 0.6600844791322638, "grad_norm": 2.1529143447230066, "learning_rate": 2.736185151063666e-07, "loss": 0.1769, "step": 37974 }, { "epoch": 0.6601018616697666, "grad_norm": 1.6561632807144213, "learning_rate": 2.735934166461846e-07, "loss": 0.3219, "step": 37975 }, { "epoch": 0.6601192442072694, "grad_norm": 0.761676384135608, "learning_rate": 2.7356831890360725e-07, "loss": 0.1858, "step": 37976 }, { "epoch": 0.6601366267447722, "grad_norm": 1.4029560889382904, "learning_rate": 2.735432218787147e-07, "loss": 0.1866, "step": 37977 }, { "epoch": 0.6601540092822751, "grad_norm": 1.6628501799754083, "learning_rate": 2.7351812557158594e-07, "loss": 0.1952, "step": 37978 }, { "epoch": 0.6601713918197778, "grad_norm": 1.70707404921836, "learning_rate": 2.734930299823005e-07, "loss": 0.2863, "step": 37979 }, { "epoch": 0.6601887743572806, "grad_norm": 1.3739399728613724, "learning_rate": 2.734679351109383e-07, "loss": 0.3377, "step": 37980 }, { "epoch": 0.6602061568947835, "grad_norm": 1.4589525315214589, "learning_rate": 2.734428409575787e-07, "loss": 0.1374, "step": 37981 }, { "epoch": 0.6602235394322863, "grad_norm": 1.6370763107143471, "learning_rate": 2.7341774752230116e-07, "loss": 0.3588, "step": 37982 }, { "epoch": 0.6602409219697891, "grad_norm": 1.714624151894401, "learning_rate": 2.7339265480518537e-07, "loss": 0.1269, "step": 37983 }, { "epoch": 0.660258304507292, "grad_norm": 1.950890470236779, "learning_rate": 2.733675628063107e-07, "loss": 0.2202, "step": 37984 }, { "epoch": 0.6602756870447948, "grad_norm": 1.2123084521829293, "learning_rate": 2.7334247152575674e-07, "loss": 0.3884, "step": 37985 }, { "epoch": 0.6602930695822976, "grad_norm": 1.7874091459626469, "learning_rate": 2.7331738096360304e-07, "loss": 0.429, "step": 37986 }, { "epoch": 0.6603104521198004, "grad_norm": 1.0374957129200326, "learning_rate": 2.732922911199289e-07, "loss": 0.146, "step": 37987 }, { "epoch": 0.6603278346573033, "grad_norm": 1.2960297364514313, "learning_rate": 2.7326720199481433e-07, "loss": 0.1989, "step": 37988 }, { "epoch": 0.6603452171948061, "grad_norm": 2.279475159827882, "learning_rate": 2.7324211358833846e-07, "loss": 0.2897, "step": 37989 }, { "epoch": 0.6603625997323089, "grad_norm": 2.132467818789471, "learning_rate": 2.732170259005813e-07, "loss": 0.2407, "step": 37990 }, { "epoch": 0.6603799822698118, "grad_norm": 1.4713550433168072, "learning_rate": 2.731919389316214e-07, "loss": 0.2089, "step": 37991 }, { "epoch": 0.6603973648073146, "grad_norm": 1.3135327459886468, "learning_rate": 2.7316685268153914e-07, "loss": 0.1548, "step": 37992 }, { "epoch": 0.6604147473448174, "grad_norm": 1.8320121113718264, "learning_rate": 2.731417671504138e-07, "loss": 0.2992, "step": 37993 }, { "epoch": 0.6604321298823203, "grad_norm": 1.993974596202605, "learning_rate": 2.7311668233832485e-07, "loss": 0.1827, "step": 37994 }, { "epoch": 0.6604495124198231, "grad_norm": 1.8242728942382758, "learning_rate": 2.7309159824535156e-07, "loss": 0.2336, "step": 37995 }, { "epoch": 0.6604668949573259, "grad_norm": 1.6419704193940645, "learning_rate": 2.7306651487157404e-07, "loss": 0.1989, "step": 37996 }, { "epoch": 0.6604842774948287, "grad_norm": 1.301751347790649, "learning_rate": 2.7304143221707127e-07, "loss": 0.2067, "step": 37997 }, { "epoch": 0.6605016600323316, "grad_norm": 1.4793900471959183, "learning_rate": 2.730163502819229e-07, "loss": 0.2038, "step": 37998 }, { "epoch": 0.6605190425698343, "grad_norm": 1.1458471603001006, "learning_rate": 2.7299126906620823e-07, "loss": 0.1919, "step": 37999 }, { "epoch": 0.6605364251073371, "grad_norm": 2.777703908483829, "learning_rate": 2.7296618857000715e-07, "loss": 0.2334, "step": 38000 }, { "epoch": 0.66055380764484, "grad_norm": 3.2866275044320843, "learning_rate": 2.7294110879339893e-07, "loss": 0.1678, "step": 38001 }, { "epoch": 0.6605711901823428, "grad_norm": 1.5366273007859859, "learning_rate": 2.729160297364631e-07, "loss": 0.2503, "step": 38002 }, { "epoch": 0.6605885727198456, "grad_norm": 1.4385137382292457, "learning_rate": 2.728909513992792e-07, "loss": 0.1662, "step": 38003 }, { "epoch": 0.6606059552573484, "grad_norm": 4.599350616088425, "learning_rate": 2.728658737819266e-07, "loss": 0.1106, "step": 38004 }, { "epoch": 0.6606233377948513, "grad_norm": 1.3235287912926232, "learning_rate": 2.728407968844849e-07, "loss": 0.2474, "step": 38005 }, { "epoch": 0.6606407203323541, "grad_norm": 1.5385756582384575, "learning_rate": 2.728157207070335e-07, "loss": 0.2373, "step": 38006 }, { "epoch": 0.6606581028698569, "grad_norm": 1.3373745594658888, "learning_rate": 2.727906452496517e-07, "loss": 0.2744, "step": 38007 }, { "epoch": 0.6606754854073598, "grad_norm": 1.1108787989300826, "learning_rate": 2.727655705124194e-07, "loss": 0.2428, "step": 38008 }, { "epoch": 0.6606928679448626, "grad_norm": 1.4977371556088694, "learning_rate": 2.7274049649541596e-07, "loss": 0.3161, "step": 38009 }, { "epoch": 0.6607102504823654, "grad_norm": 1.7535609080829684, "learning_rate": 2.727154231987205e-07, "loss": 0.3, "step": 38010 }, { "epoch": 0.6607276330198683, "grad_norm": 0.9504894816430106, "learning_rate": 2.7269035062241287e-07, "loss": 0.3582, "step": 38011 }, { "epoch": 0.6607450155573711, "grad_norm": 1.511493497600585, "learning_rate": 2.7266527876657243e-07, "loss": 0.2758, "step": 38012 }, { "epoch": 0.6607623980948739, "grad_norm": 1.7522225360830685, "learning_rate": 2.726402076312786e-07, "loss": 0.2482, "step": 38013 }, { "epoch": 0.6607797806323767, "grad_norm": 3.5838634789905615, "learning_rate": 2.726151372166109e-07, "loss": 0.1591, "step": 38014 }, { "epoch": 0.6607971631698796, "grad_norm": 0.8588555541644087, "learning_rate": 2.7259006752264867e-07, "loss": 0.2685, "step": 38015 }, { "epoch": 0.6608145457073824, "grad_norm": 0.8186402306175632, "learning_rate": 2.7256499854947157e-07, "loss": 0.1627, "step": 38016 }, { "epoch": 0.6608319282448852, "grad_norm": 1.4933748568555472, "learning_rate": 2.7253993029715895e-07, "loss": 0.2089, "step": 38017 }, { "epoch": 0.6608493107823881, "grad_norm": 0.9701034234294642, "learning_rate": 2.7251486276579005e-07, "loss": 0.227, "step": 38018 }, { "epoch": 0.6608666933198908, "grad_norm": 2.441356235446829, "learning_rate": 2.7248979595544467e-07, "loss": 0.1894, "step": 38019 }, { "epoch": 0.6608840758573936, "grad_norm": 2.632495345381379, "learning_rate": 2.7246472986620214e-07, "loss": 0.1999, "step": 38020 }, { "epoch": 0.6609014583948964, "grad_norm": 2.9583602502887594, "learning_rate": 2.72439664498142e-07, "loss": 0.3338, "step": 38021 }, { "epoch": 0.6609188409323993, "grad_norm": 1.5974729149801183, "learning_rate": 2.724145998513435e-07, "loss": 0.2572, "step": 38022 }, { "epoch": 0.6609362234699021, "grad_norm": 1.5158232911887353, "learning_rate": 2.723895359258862e-07, "loss": 0.2184, "step": 38023 }, { "epoch": 0.6609536060074049, "grad_norm": 1.5389219840349153, "learning_rate": 2.7236447272184953e-07, "loss": 0.3195, "step": 38024 }, { "epoch": 0.6609709885449078, "grad_norm": 1.6646933613686716, "learning_rate": 2.723394102393129e-07, "loss": 0.2647, "step": 38025 }, { "epoch": 0.6609883710824106, "grad_norm": 1.632650756640078, "learning_rate": 2.7231434847835553e-07, "loss": 0.1688, "step": 38026 }, { "epoch": 0.6610057536199134, "grad_norm": 1.6946799864656072, "learning_rate": 2.722892874390573e-07, "loss": 0.1998, "step": 38027 }, { "epoch": 0.6610231361574163, "grad_norm": 1.539242426088547, "learning_rate": 2.722642271214977e-07, "loss": 0.2636, "step": 38028 }, { "epoch": 0.6610405186949191, "grad_norm": 1.9947027871942289, "learning_rate": 2.722391675257556e-07, "loss": 0.3331, "step": 38029 }, { "epoch": 0.6610579012324219, "grad_norm": 1.2301262828989, "learning_rate": 2.7221410865191054e-07, "loss": 0.2364, "step": 38030 }, { "epoch": 0.6610752837699247, "grad_norm": 1.4850489344953774, "learning_rate": 2.721890505000423e-07, "loss": 0.2286, "step": 38031 }, { "epoch": 0.6610926663074276, "grad_norm": 2.217366225518865, "learning_rate": 2.721639930702302e-07, "loss": 0.2888, "step": 38032 }, { "epoch": 0.6611100488449304, "grad_norm": 1.5198615563302917, "learning_rate": 2.7213893636255345e-07, "loss": 0.1304, "step": 38033 }, { "epoch": 0.6611274313824332, "grad_norm": 1.3585292390036903, "learning_rate": 2.721138803770917e-07, "loss": 0.2119, "step": 38034 }, { "epoch": 0.6611448139199361, "grad_norm": 2.544881665597031, "learning_rate": 2.7208882511392426e-07, "loss": 0.2574, "step": 38035 }, { "epoch": 0.6611621964574389, "grad_norm": 1.1053936595127904, "learning_rate": 2.720637705731306e-07, "loss": 0.1944, "step": 38036 }, { "epoch": 0.6611795789949417, "grad_norm": 2.3157211786065073, "learning_rate": 2.7203871675478994e-07, "loss": 0.3722, "step": 38037 }, { "epoch": 0.6611969615324446, "grad_norm": 1.538069674070688, "learning_rate": 2.720136636589817e-07, "loss": 0.3683, "step": 38038 }, { "epoch": 0.6612143440699473, "grad_norm": 1.3330182724482673, "learning_rate": 2.7198861128578567e-07, "loss": 0.2323, "step": 38039 }, { "epoch": 0.6612317266074501, "grad_norm": 1.5382292105910897, "learning_rate": 2.7196355963528096e-07, "loss": 0.1374, "step": 38040 }, { "epoch": 0.6612491091449529, "grad_norm": 1.2316736096641792, "learning_rate": 2.71938508707547e-07, "loss": 0.1561, "step": 38041 }, { "epoch": 0.6612664916824558, "grad_norm": 1.0848560614476608, "learning_rate": 2.7191345850266323e-07, "loss": 0.2034, "step": 38042 }, { "epoch": 0.6612838742199586, "grad_norm": 1.5589629136080165, "learning_rate": 2.71888409020709e-07, "loss": 0.3459, "step": 38043 }, { "epoch": 0.6613012567574614, "grad_norm": 2.0528465479321256, "learning_rate": 2.718633602617637e-07, "loss": 0.3163, "step": 38044 }, { "epoch": 0.6613186392949643, "grad_norm": 1.4482331390740828, "learning_rate": 2.718383122259068e-07, "loss": 0.1769, "step": 38045 }, { "epoch": 0.6613360218324671, "grad_norm": 1.4051802331418912, "learning_rate": 2.7181326491321743e-07, "loss": 0.2246, "step": 38046 }, { "epoch": 0.6613534043699699, "grad_norm": 1.8115584314695534, "learning_rate": 2.7178821832377556e-07, "loss": 0.3962, "step": 38047 }, { "epoch": 0.6613707869074728, "grad_norm": 1.5420699626750516, "learning_rate": 2.7176317245766007e-07, "loss": 0.2672, "step": 38048 }, { "epoch": 0.6613881694449756, "grad_norm": 1.2262335048143513, "learning_rate": 2.7173812731495025e-07, "loss": 0.3411, "step": 38049 }, { "epoch": 0.6614055519824784, "grad_norm": 2.4886671422218645, "learning_rate": 2.717130828957259e-07, "loss": 0.291, "step": 38050 }, { "epoch": 0.6614229345199812, "grad_norm": 1.4409688208616849, "learning_rate": 2.716880392000662e-07, "loss": 0.2132, "step": 38051 }, { "epoch": 0.6614403170574841, "grad_norm": 1.6032510722662972, "learning_rate": 2.7166299622805055e-07, "loss": 0.3661, "step": 38052 }, { "epoch": 0.6614576995949869, "grad_norm": 0.8213572046396062, "learning_rate": 2.716379539797583e-07, "loss": 0.2057, "step": 38053 }, { "epoch": 0.6614750821324897, "grad_norm": 1.907234026250652, "learning_rate": 2.7161291245526884e-07, "loss": 0.3892, "step": 38054 }, { "epoch": 0.6614924646699926, "grad_norm": 1.8509570617331268, "learning_rate": 2.7158787165466157e-07, "loss": 0.2792, "step": 38055 }, { "epoch": 0.6615098472074954, "grad_norm": 1.1590945030075235, "learning_rate": 2.7156283157801584e-07, "loss": 0.2112, "step": 38056 }, { "epoch": 0.6615272297449982, "grad_norm": 2.091855970392849, "learning_rate": 2.7153779222541076e-07, "loss": 0.1727, "step": 38057 }, { "epoch": 0.661544612282501, "grad_norm": 1.0366159048074182, "learning_rate": 2.715127535969261e-07, "loss": 0.2467, "step": 38058 }, { "epoch": 0.6615619948200038, "grad_norm": 1.2656072162331433, "learning_rate": 2.7148771569264105e-07, "loss": 0.1979, "step": 38059 }, { "epoch": 0.6615793773575066, "grad_norm": 2.0399238389181984, "learning_rate": 2.7146267851263524e-07, "loss": 0.3242, "step": 38060 }, { "epoch": 0.6615967598950094, "grad_norm": 1.5984627549493255, "learning_rate": 2.7143764205698734e-07, "loss": 0.2129, "step": 38061 }, { "epoch": 0.6616141424325123, "grad_norm": 1.7754341705389376, "learning_rate": 2.714126063257773e-07, "loss": 0.3134, "step": 38062 }, { "epoch": 0.6616315249700151, "grad_norm": 1.4815312392713578, "learning_rate": 2.7138757131908427e-07, "loss": 0.2616, "step": 38063 }, { "epoch": 0.6616489075075179, "grad_norm": 1.7008913787809277, "learning_rate": 2.7136253703698766e-07, "loss": 0.1815, "step": 38064 }, { "epoch": 0.6616662900450208, "grad_norm": 1.5859480060070035, "learning_rate": 2.7133750347956654e-07, "loss": 0.2171, "step": 38065 }, { "epoch": 0.6616836725825236, "grad_norm": 1.9584732671416105, "learning_rate": 2.7131247064690095e-07, "loss": 0.2195, "step": 38066 }, { "epoch": 0.6617010551200264, "grad_norm": 1.5320237261705272, "learning_rate": 2.7128743853906957e-07, "loss": 0.2517, "step": 38067 }, { "epoch": 0.6617184376575292, "grad_norm": 1.2897614625501805, "learning_rate": 2.712624071561519e-07, "loss": 0.1922, "step": 38068 }, { "epoch": 0.6617358201950321, "grad_norm": 1.5341065118741748, "learning_rate": 2.712373764982272e-07, "loss": 0.1964, "step": 38069 }, { "epoch": 0.6617532027325349, "grad_norm": 1.3070914049039946, "learning_rate": 2.7121234656537505e-07, "loss": 0.2233, "step": 38070 }, { "epoch": 0.6617705852700377, "grad_norm": 1.3094854736122392, "learning_rate": 2.711873173576747e-07, "loss": 0.1322, "step": 38071 }, { "epoch": 0.6617879678075406, "grad_norm": 1.4084938686292658, "learning_rate": 2.711622888752054e-07, "loss": 0.1966, "step": 38072 }, { "epoch": 0.6618053503450434, "grad_norm": 2.192350101238725, "learning_rate": 2.711372611180466e-07, "loss": 0.2497, "step": 38073 }, { "epoch": 0.6618227328825462, "grad_norm": 1.4168777556571042, "learning_rate": 2.7111223408627756e-07, "loss": 0.2756, "step": 38074 }, { "epoch": 0.6618401154200491, "grad_norm": 1.1632544687279487, "learning_rate": 2.710872077799775e-07, "loss": 0.1558, "step": 38075 }, { "epoch": 0.6618574979575519, "grad_norm": 1.9652468011253474, "learning_rate": 2.710621821992257e-07, "loss": 0.297, "step": 38076 }, { "epoch": 0.6618748804950547, "grad_norm": 1.1673592826192412, "learning_rate": 2.710371573441018e-07, "loss": 0.1613, "step": 38077 }, { "epoch": 0.6618922630325574, "grad_norm": 2.0710070590364036, "learning_rate": 2.710121332146852e-07, "loss": 0.6474, "step": 38078 }, { "epoch": 0.6619096455700603, "grad_norm": 1.0785282174298247, "learning_rate": 2.7098710981105466e-07, "loss": 0.2108, "step": 38079 }, { "epoch": 0.6619270281075631, "grad_norm": 2.701646442693639, "learning_rate": 2.7096208713328967e-07, "loss": 0.3604, "step": 38080 }, { "epoch": 0.6619444106450659, "grad_norm": 1.5192130508397648, "learning_rate": 2.709370651814697e-07, "loss": 0.1855, "step": 38081 }, { "epoch": 0.6619617931825688, "grad_norm": 1.110899000228275, "learning_rate": 2.709120439556742e-07, "loss": 0.3748, "step": 38082 }, { "epoch": 0.6619791757200716, "grad_norm": 1.8297706080372336, "learning_rate": 2.708870234559822e-07, "loss": 0.296, "step": 38083 }, { "epoch": 0.6619965582575744, "grad_norm": 4.202111679663463, "learning_rate": 2.708620036824729e-07, "loss": 0.4603, "step": 38084 }, { "epoch": 0.6620139407950772, "grad_norm": 1.2679539708031404, "learning_rate": 2.708369846352262e-07, "loss": 0.2415, "step": 38085 }, { "epoch": 0.6620313233325801, "grad_norm": 1.3599903514573548, "learning_rate": 2.7081196631432076e-07, "loss": 0.2594, "step": 38086 }, { "epoch": 0.6620487058700829, "grad_norm": 1.5676826419455037, "learning_rate": 2.7078694871983607e-07, "loss": 0.24, "step": 38087 }, { "epoch": 0.6620660884075857, "grad_norm": 2.004822750309924, "learning_rate": 2.707619318518514e-07, "loss": 0.2493, "step": 38088 }, { "epoch": 0.6620834709450886, "grad_norm": 1.9768380335875888, "learning_rate": 2.707369157104461e-07, "loss": 0.2889, "step": 38089 }, { "epoch": 0.6621008534825914, "grad_norm": 1.7068508327087348, "learning_rate": 2.7071190029569955e-07, "loss": 0.1885, "step": 38090 }, { "epoch": 0.6621182360200942, "grad_norm": 1.2828853519312962, "learning_rate": 2.7068688560769106e-07, "loss": 0.2736, "step": 38091 }, { "epoch": 0.6621356185575971, "grad_norm": 1.0835307392497358, "learning_rate": 2.7066187164649967e-07, "loss": 0.3729, "step": 38092 }, { "epoch": 0.6621530010950999, "grad_norm": 1.4107661230017965, "learning_rate": 2.7063685841220486e-07, "loss": 0.1877, "step": 38093 }, { "epoch": 0.6621703836326027, "grad_norm": 1.956270319848224, "learning_rate": 2.7061184590488586e-07, "loss": 0.1693, "step": 38094 }, { "epoch": 0.6621877661701056, "grad_norm": 1.2295675152008778, "learning_rate": 2.705868341246219e-07, "loss": 0.2759, "step": 38095 }, { "epoch": 0.6622051487076084, "grad_norm": 7.287629194737258, "learning_rate": 2.705618230714921e-07, "loss": 0.4134, "step": 38096 }, { "epoch": 0.6622225312451112, "grad_norm": 2.3557133842841878, "learning_rate": 2.7053681274557626e-07, "loss": 0.2614, "step": 38097 }, { "epoch": 0.6622399137826139, "grad_norm": 1.3849718958624115, "learning_rate": 2.705118031469532e-07, "loss": 0.2418, "step": 38098 }, { "epoch": 0.6622572963201168, "grad_norm": 2.114027834853093, "learning_rate": 2.704867942757021e-07, "loss": 0.2094, "step": 38099 }, { "epoch": 0.6622746788576196, "grad_norm": 1.340252998470233, "learning_rate": 2.7046178613190264e-07, "loss": 0.1712, "step": 38100 }, { "epoch": 0.6622920613951224, "grad_norm": 1.046007078784999, "learning_rate": 2.704367787156339e-07, "loss": 0.205, "step": 38101 }, { "epoch": 0.6623094439326253, "grad_norm": 2.797670896425204, "learning_rate": 2.70411772026975e-07, "loss": 0.2113, "step": 38102 }, { "epoch": 0.6623268264701281, "grad_norm": 2.2230681578400513, "learning_rate": 2.703867660660054e-07, "loss": 0.2204, "step": 38103 }, { "epoch": 0.6623442090076309, "grad_norm": 1.174881552713172, "learning_rate": 2.7036176083280425e-07, "loss": 0.3038, "step": 38104 }, { "epoch": 0.6623615915451337, "grad_norm": 2.36426620060833, "learning_rate": 2.703367563274509e-07, "loss": 0.2805, "step": 38105 }, { "epoch": 0.6623789740826366, "grad_norm": 1.1377872642584772, "learning_rate": 2.703117525500245e-07, "loss": 0.1917, "step": 38106 }, { "epoch": 0.6623963566201394, "grad_norm": 0.8793899577080102, "learning_rate": 2.702867495006041e-07, "loss": 0.3165, "step": 38107 }, { "epoch": 0.6624137391576422, "grad_norm": 1.101021343150506, "learning_rate": 2.702617471792694e-07, "loss": 0.2011, "step": 38108 }, { "epoch": 0.6624311216951451, "grad_norm": 1.4327268519849952, "learning_rate": 2.7023674558609943e-07, "loss": 0.1869, "step": 38109 }, { "epoch": 0.6624485042326479, "grad_norm": 1.5863018345495175, "learning_rate": 2.702117447211736e-07, "loss": 0.2112, "step": 38110 }, { "epoch": 0.6624658867701507, "grad_norm": 2.594326634763885, "learning_rate": 2.7018674458457064e-07, "loss": 0.3245, "step": 38111 }, { "epoch": 0.6624832693076536, "grad_norm": 1.6657049648448843, "learning_rate": 2.701617451763703e-07, "loss": 0.1566, "step": 38112 }, { "epoch": 0.6625006518451564, "grad_norm": 1.5148050368889356, "learning_rate": 2.7013674649665164e-07, "loss": 0.2846, "step": 38113 }, { "epoch": 0.6625180343826592, "grad_norm": 2.4825623309634053, "learning_rate": 2.701117485454939e-07, "loss": 0.2756, "step": 38114 }, { "epoch": 0.662535416920162, "grad_norm": 0.8989941217763304, "learning_rate": 2.7008675132297623e-07, "loss": 0.1708, "step": 38115 }, { "epoch": 0.6625527994576649, "grad_norm": 5.132726889287225, "learning_rate": 2.700617548291782e-07, "loss": 0.536, "step": 38116 }, { "epoch": 0.6625701819951677, "grad_norm": 0.754589535993932, "learning_rate": 2.700367590641786e-07, "loss": 0.1743, "step": 38117 }, { "epoch": 0.6625875645326704, "grad_norm": 1.1629403422644349, "learning_rate": 2.700117640280569e-07, "loss": 0.3242, "step": 38118 }, { "epoch": 0.6626049470701733, "grad_norm": 0.938290622327344, "learning_rate": 2.6998676972089204e-07, "loss": 0.2579, "step": 38119 }, { "epoch": 0.6626223296076761, "grad_norm": 1.2697298456070671, "learning_rate": 2.699617761427636e-07, "loss": 0.2213, "step": 38120 }, { "epoch": 0.6626397121451789, "grad_norm": 1.2647943119186493, "learning_rate": 2.6993678329375073e-07, "loss": 0.2061, "step": 38121 }, { "epoch": 0.6626570946826817, "grad_norm": 1.9567756766563467, "learning_rate": 2.699117911739326e-07, "loss": 0.2501, "step": 38122 }, { "epoch": 0.6626744772201846, "grad_norm": 1.59374259733454, "learning_rate": 2.698867997833883e-07, "loss": 0.1917, "step": 38123 }, { "epoch": 0.6626918597576874, "grad_norm": 1.460397015712486, "learning_rate": 2.698618091221972e-07, "loss": 0.2946, "step": 38124 }, { "epoch": 0.6627092422951902, "grad_norm": 1.6821212473584228, "learning_rate": 2.6983681919043846e-07, "loss": 0.2285, "step": 38125 }, { "epoch": 0.6627266248326931, "grad_norm": 2.556318122336675, "learning_rate": 2.6981182998819117e-07, "loss": 0.2356, "step": 38126 }, { "epoch": 0.6627440073701959, "grad_norm": 1.5943965421874355, "learning_rate": 2.6978684151553454e-07, "loss": 0.2861, "step": 38127 }, { "epoch": 0.6627613899076987, "grad_norm": 3.849968975742448, "learning_rate": 2.697618537725481e-07, "loss": 0.2609, "step": 38128 }, { "epoch": 0.6627787724452016, "grad_norm": 1.9660175197799061, "learning_rate": 2.69736866759311e-07, "loss": 0.322, "step": 38129 }, { "epoch": 0.6627961549827044, "grad_norm": 2.171857425322119, "learning_rate": 2.6971188047590174e-07, "loss": 0.2098, "step": 38130 }, { "epoch": 0.6628135375202072, "grad_norm": 1.3388050885533818, "learning_rate": 2.696868949224004e-07, "loss": 0.3178, "step": 38131 }, { "epoch": 0.66283092005771, "grad_norm": 2.341474364181053, "learning_rate": 2.6966191009888574e-07, "loss": 0.1703, "step": 38132 }, { "epoch": 0.6628483025952129, "grad_norm": 1.0370775452919758, "learning_rate": 2.69636926005437e-07, "loss": 0.213, "step": 38133 }, { "epoch": 0.6628656851327157, "grad_norm": 1.1251488643165062, "learning_rate": 2.696119426421334e-07, "loss": 0.1968, "step": 38134 }, { "epoch": 0.6628830676702185, "grad_norm": 1.96222312816969, "learning_rate": 2.6958696000905405e-07, "loss": 0.2139, "step": 38135 }, { "epoch": 0.6629004502077214, "grad_norm": 1.2456345675484892, "learning_rate": 2.6956197810627824e-07, "loss": 0.2502, "step": 38136 }, { "epoch": 0.6629178327452242, "grad_norm": 1.7028033583975548, "learning_rate": 2.695369969338851e-07, "loss": 0.5053, "step": 38137 }, { "epoch": 0.6629352152827269, "grad_norm": 0.8097062685462708, "learning_rate": 2.6951201649195366e-07, "loss": 0.1976, "step": 38138 }, { "epoch": 0.6629525978202297, "grad_norm": 1.4154819614206597, "learning_rate": 2.6948703678056337e-07, "loss": 0.3235, "step": 38139 }, { "epoch": 0.6629699803577326, "grad_norm": 1.3515115967327986, "learning_rate": 2.6946205779979337e-07, "loss": 0.2114, "step": 38140 }, { "epoch": 0.6629873628952354, "grad_norm": 1.3145940059695727, "learning_rate": 2.6943707954972274e-07, "loss": 0.2203, "step": 38141 }, { "epoch": 0.6630047454327382, "grad_norm": 2.5133273122089426, "learning_rate": 2.694121020304306e-07, "loss": 0.3549, "step": 38142 }, { "epoch": 0.6630221279702411, "grad_norm": 2.3097236046030956, "learning_rate": 2.693871252419962e-07, "loss": 0.2052, "step": 38143 }, { "epoch": 0.6630395105077439, "grad_norm": 1.5837492042019297, "learning_rate": 2.6936214918449864e-07, "loss": 0.2414, "step": 38144 }, { "epoch": 0.6630568930452467, "grad_norm": 2.9926713491683365, "learning_rate": 2.6933717385801713e-07, "loss": 0.4184, "step": 38145 }, { "epoch": 0.6630742755827496, "grad_norm": 1.37868143103297, "learning_rate": 2.693121992626307e-07, "loss": 0.231, "step": 38146 }, { "epoch": 0.6630916581202524, "grad_norm": 1.1485421855823605, "learning_rate": 2.6928722539841877e-07, "loss": 0.2746, "step": 38147 }, { "epoch": 0.6631090406577552, "grad_norm": 1.3039202912601013, "learning_rate": 2.6926225226546053e-07, "loss": 0.2159, "step": 38148 }, { "epoch": 0.663126423195258, "grad_norm": 1.2276408649113308, "learning_rate": 2.692372798638348e-07, "loss": 0.2595, "step": 38149 }, { "epoch": 0.6631438057327609, "grad_norm": 3.1219556565974966, "learning_rate": 2.692123081936207e-07, "loss": 0.323, "step": 38150 }, { "epoch": 0.6631611882702637, "grad_norm": 0.7897119939732294, "learning_rate": 2.6918733725489775e-07, "loss": 0.1962, "step": 38151 }, { "epoch": 0.6631785708077665, "grad_norm": 1.7338091403892497, "learning_rate": 2.691623670477449e-07, "loss": 0.2305, "step": 38152 }, { "epoch": 0.6631959533452694, "grad_norm": 1.4142171365576501, "learning_rate": 2.691373975722413e-07, "loss": 0.2052, "step": 38153 }, { "epoch": 0.6632133358827722, "grad_norm": 1.455578769607969, "learning_rate": 2.6911242882846606e-07, "loss": 0.1682, "step": 38154 }, { "epoch": 0.663230718420275, "grad_norm": 3.0795457015467163, "learning_rate": 2.6908746081649845e-07, "loss": 0.2757, "step": 38155 }, { "epoch": 0.6632481009577779, "grad_norm": 1.50915283691316, "learning_rate": 2.6906249353641754e-07, "loss": 0.2042, "step": 38156 }, { "epoch": 0.6632654834952807, "grad_norm": 1.00465949565228, "learning_rate": 2.6903752698830227e-07, "loss": 0.2742, "step": 38157 }, { "epoch": 0.6632828660327834, "grad_norm": 1.4300520026932986, "learning_rate": 2.6901256117223194e-07, "loss": 0.1906, "step": 38158 }, { "epoch": 0.6633002485702862, "grad_norm": 2.1908962480339795, "learning_rate": 2.689875960882858e-07, "loss": 0.3801, "step": 38159 }, { "epoch": 0.6633176311077891, "grad_norm": 1.8372544127850954, "learning_rate": 2.6896263173654286e-07, "loss": 0.1843, "step": 38160 }, { "epoch": 0.6633350136452919, "grad_norm": 1.230135177110638, "learning_rate": 2.689376681170822e-07, "loss": 0.2597, "step": 38161 }, { "epoch": 0.6633523961827947, "grad_norm": 2.249408032033105, "learning_rate": 2.689127052299831e-07, "loss": 0.315, "step": 38162 }, { "epoch": 0.6633697787202976, "grad_norm": 1.7491420855645532, "learning_rate": 2.6888774307532446e-07, "loss": 0.1889, "step": 38163 }, { "epoch": 0.6633871612578004, "grad_norm": 1.5971972796158567, "learning_rate": 2.688627816531856e-07, "loss": 0.3137, "step": 38164 }, { "epoch": 0.6634045437953032, "grad_norm": 1.5968201377164908, "learning_rate": 2.688378209636453e-07, "loss": 0.2398, "step": 38165 }, { "epoch": 0.6634219263328061, "grad_norm": 1.8365114863220566, "learning_rate": 2.6881286100678313e-07, "loss": 0.2552, "step": 38166 }, { "epoch": 0.6634393088703089, "grad_norm": 1.762869359324665, "learning_rate": 2.6878790178267816e-07, "loss": 0.3086, "step": 38167 }, { "epoch": 0.6634566914078117, "grad_norm": 1.8388160333636778, "learning_rate": 2.687629432914092e-07, "loss": 0.195, "step": 38168 }, { "epoch": 0.6634740739453145, "grad_norm": 1.4864601256360985, "learning_rate": 2.6873798553305526e-07, "loss": 0.1825, "step": 38169 }, { "epoch": 0.6634914564828174, "grad_norm": 1.5573053821238765, "learning_rate": 2.6871302850769583e-07, "loss": 0.2039, "step": 38170 }, { "epoch": 0.6635088390203202, "grad_norm": 1.4877776340779827, "learning_rate": 2.686880722154099e-07, "loss": 0.2438, "step": 38171 }, { "epoch": 0.663526221557823, "grad_norm": 2.6148183785721764, "learning_rate": 2.686631166562765e-07, "loss": 0.3685, "step": 38172 }, { "epoch": 0.6635436040953259, "grad_norm": 0.9412855207978217, "learning_rate": 2.686381618303748e-07, "loss": 0.1695, "step": 38173 }, { "epoch": 0.6635609866328287, "grad_norm": 1.1460372114475446, "learning_rate": 2.686132077377838e-07, "loss": 0.1801, "step": 38174 }, { "epoch": 0.6635783691703315, "grad_norm": 1.2876731991181676, "learning_rate": 2.6858825437858266e-07, "loss": 0.2635, "step": 38175 }, { "epoch": 0.6635957517078344, "grad_norm": 2.209288130722616, "learning_rate": 2.685633017528505e-07, "loss": 0.1926, "step": 38176 }, { "epoch": 0.6636131342453372, "grad_norm": 1.1506365213468366, "learning_rate": 2.6853834986066604e-07, "loss": 0.1641, "step": 38177 }, { "epoch": 0.6636305167828399, "grad_norm": 2.294465598575372, "learning_rate": 2.68513398702109e-07, "loss": 0.3092, "step": 38178 }, { "epoch": 0.6636478993203427, "grad_norm": 1.050087805839871, "learning_rate": 2.6848844827725805e-07, "loss": 0.2146, "step": 38179 }, { "epoch": 0.6636652818578456, "grad_norm": 1.7717157845189604, "learning_rate": 2.6846349858619243e-07, "loss": 0.2928, "step": 38180 }, { "epoch": 0.6636826643953484, "grad_norm": 1.0506645709296314, "learning_rate": 2.6843854962899117e-07, "loss": 0.168, "step": 38181 }, { "epoch": 0.6637000469328512, "grad_norm": 1.6243229556304233, "learning_rate": 2.6841360140573333e-07, "loss": 0.2707, "step": 38182 }, { "epoch": 0.6637174294703541, "grad_norm": 1.4659048343443188, "learning_rate": 2.6838865391649803e-07, "loss": 0.2227, "step": 38183 }, { "epoch": 0.6637348120078569, "grad_norm": 1.245793151375432, "learning_rate": 2.6836370716136417e-07, "loss": 0.1704, "step": 38184 }, { "epoch": 0.6637521945453597, "grad_norm": 1.3148370813847496, "learning_rate": 2.683387611404109e-07, "loss": 0.2326, "step": 38185 }, { "epoch": 0.6637695770828625, "grad_norm": 2.2180007300441384, "learning_rate": 2.6831381585371765e-07, "loss": 0.2276, "step": 38186 }, { "epoch": 0.6637869596203654, "grad_norm": 1.3838277689000638, "learning_rate": 2.68288871301363e-07, "loss": 0.227, "step": 38187 }, { "epoch": 0.6638043421578682, "grad_norm": 1.9065887835769184, "learning_rate": 2.6826392748342595e-07, "loss": 0.2216, "step": 38188 }, { "epoch": 0.663821724695371, "grad_norm": 2.2676778569463356, "learning_rate": 2.682389843999861e-07, "loss": 0.3162, "step": 38189 }, { "epoch": 0.6638391072328739, "grad_norm": 0.9616877296851091, "learning_rate": 2.682140420511222e-07, "loss": 0.1624, "step": 38190 }, { "epoch": 0.6638564897703767, "grad_norm": 1.8856886816703782, "learning_rate": 2.6818910043691324e-07, "loss": 0.2132, "step": 38191 }, { "epoch": 0.6638738723078795, "grad_norm": 1.323892249151091, "learning_rate": 2.681641595574383e-07, "loss": 0.2928, "step": 38192 }, { "epoch": 0.6638912548453824, "grad_norm": 1.365949476363499, "learning_rate": 2.681392194127766e-07, "loss": 0.2214, "step": 38193 }, { "epoch": 0.6639086373828852, "grad_norm": 1.3852360302805236, "learning_rate": 2.68114280003007e-07, "loss": 0.1512, "step": 38194 }, { "epoch": 0.663926019920388, "grad_norm": 2.2773640176062333, "learning_rate": 2.680893413282086e-07, "loss": 0.1687, "step": 38195 }, { "epoch": 0.6639434024578909, "grad_norm": 1.3536996697096748, "learning_rate": 2.6806440338846034e-07, "loss": 0.2127, "step": 38196 }, { "epoch": 0.6639607849953937, "grad_norm": 1.6512129101394308, "learning_rate": 2.6803946618384154e-07, "loss": 0.2679, "step": 38197 }, { "epoch": 0.6639781675328964, "grad_norm": 1.4530224736355297, "learning_rate": 2.680145297144312e-07, "loss": 0.2065, "step": 38198 }, { "epoch": 0.6639955500703992, "grad_norm": 1.197291469986936, "learning_rate": 2.6798959398030814e-07, "loss": 0.2095, "step": 38199 }, { "epoch": 0.6640129326079021, "grad_norm": 2.101492660947926, "learning_rate": 2.6796465898155124e-07, "loss": 0.1351, "step": 38200 }, { "epoch": 0.6640303151454049, "grad_norm": 2.5267183159069684, "learning_rate": 2.6793972471824e-07, "loss": 0.2983, "step": 38201 }, { "epoch": 0.6640476976829077, "grad_norm": 1.331669779599076, "learning_rate": 2.679147911904532e-07, "loss": 0.2477, "step": 38202 }, { "epoch": 0.6640650802204106, "grad_norm": 1.1549088033400055, "learning_rate": 2.6788985839826993e-07, "loss": 0.3331, "step": 38203 }, { "epoch": 0.6640824627579134, "grad_norm": 1.3181402239977245, "learning_rate": 2.67864926341769e-07, "loss": 0.2092, "step": 38204 }, { "epoch": 0.6640998452954162, "grad_norm": 3.3623334400862013, "learning_rate": 2.6783999502102995e-07, "loss": 0.2735, "step": 38205 }, { "epoch": 0.664117227832919, "grad_norm": 1.0220886576232278, "learning_rate": 2.6781506443613135e-07, "loss": 0.2211, "step": 38206 }, { "epoch": 0.6641346103704219, "grad_norm": 1.194853289747074, "learning_rate": 2.677901345871523e-07, "loss": 0.2136, "step": 38207 }, { "epoch": 0.6641519929079247, "grad_norm": 0.9448454042403098, "learning_rate": 2.677652054741717e-07, "loss": 0.2224, "step": 38208 }, { "epoch": 0.6641693754454275, "grad_norm": 1.6122087497372373, "learning_rate": 2.6774027709726886e-07, "loss": 0.252, "step": 38209 }, { "epoch": 0.6641867579829304, "grad_norm": 2.0345547145576712, "learning_rate": 2.6771534945652263e-07, "loss": 0.3299, "step": 38210 }, { "epoch": 0.6642041405204332, "grad_norm": 1.185746695511534, "learning_rate": 2.6769042255201215e-07, "loss": 0.4561, "step": 38211 }, { "epoch": 0.664221523057936, "grad_norm": 1.5381284845035683, "learning_rate": 2.676654963838162e-07, "loss": 0.3084, "step": 38212 }, { "epoch": 0.6642389055954389, "grad_norm": 1.1928816140789618, "learning_rate": 2.67640570952014e-07, "loss": 0.4002, "step": 38213 }, { "epoch": 0.6642562881329417, "grad_norm": 1.1323585164385928, "learning_rate": 2.676156462566844e-07, "loss": 0.2523, "step": 38214 }, { "epoch": 0.6642736706704445, "grad_norm": 1.7026343200143677, "learning_rate": 2.675907222979064e-07, "loss": 0.2466, "step": 38215 }, { "epoch": 0.6642910532079473, "grad_norm": 1.5457421095744877, "learning_rate": 2.6756579907575896e-07, "loss": 0.3539, "step": 38216 }, { "epoch": 0.6643084357454501, "grad_norm": 2.2098296050607917, "learning_rate": 2.6754087659032143e-07, "loss": 0.3105, "step": 38217 }, { "epoch": 0.6643258182829529, "grad_norm": 1.468982503866127, "learning_rate": 2.6751595484167244e-07, "loss": 0.2261, "step": 38218 }, { "epoch": 0.6643432008204557, "grad_norm": 1.3660495797164378, "learning_rate": 2.674910338298909e-07, "loss": 0.1369, "step": 38219 }, { "epoch": 0.6643605833579586, "grad_norm": 0.8392600128420349, "learning_rate": 2.674661135550561e-07, "loss": 0.2195, "step": 38220 }, { "epoch": 0.6643779658954614, "grad_norm": 1.6802138005839773, "learning_rate": 2.67441194017247e-07, "loss": 0.186, "step": 38221 }, { "epoch": 0.6643953484329642, "grad_norm": 1.3496888560345626, "learning_rate": 2.6741627521654237e-07, "loss": 0.1576, "step": 38222 }, { "epoch": 0.664412730970467, "grad_norm": 1.533708880541542, "learning_rate": 2.6739135715302133e-07, "loss": 0.1423, "step": 38223 }, { "epoch": 0.6644301135079699, "grad_norm": 1.5772138651926382, "learning_rate": 2.6736643982676286e-07, "loss": 0.2254, "step": 38224 }, { "epoch": 0.6644474960454727, "grad_norm": 2.0419235240137144, "learning_rate": 2.67341523237846e-07, "loss": 0.1692, "step": 38225 }, { "epoch": 0.6644648785829755, "grad_norm": 1.8131923065315216, "learning_rate": 2.6731660738634953e-07, "loss": 0.3169, "step": 38226 }, { "epoch": 0.6644822611204784, "grad_norm": 1.3383012496959734, "learning_rate": 2.6729169227235233e-07, "loss": 0.1906, "step": 38227 }, { "epoch": 0.6644996436579812, "grad_norm": 3.4693713279834193, "learning_rate": 2.672667778959338e-07, "loss": 0.3099, "step": 38228 }, { "epoch": 0.664517026195484, "grad_norm": 1.5937804423578148, "learning_rate": 2.6724186425717264e-07, "loss": 0.2402, "step": 38229 }, { "epoch": 0.6645344087329869, "grad_norm": 1.441897639503653, "learning_rate": 2.6721695135614806e-07, "loss": 0.2553, "step": 38230 }, { "epoch": 0.6645517912704897, "grad_norm": 1.0610129846571008, "learning_rate": 2.671920391929384e-07, "loss": 0.1724, "step": 38231 }, { "epoch": 0.6645691738079925, "grad_norm": 3.2434789951922838, "learning_rate": 2.671671277676233e-07, "loss": 0.2473, "step": 38232 }, { "epoch": 0.6645865563454953, "grad_norm": 1.459024592329615, "learning_rate": 2.671422170802814e-07, "loss": 0.2204, "step": 38233 }, { "epoch": 0.6646039388829982, "grad_norm": 1.617707476250191, "learning_rate": 2.671173071309918e-07, "loss": 0.1996, "step": 38234 }, { "epoch": 0.664621321420501, "grad_norm": 1.2678259693252996, "learning_rate": 2.6709239791983305e-07, "loss": 0.206, "step": 38235 }, { "epoch": 0.6646387039580038, "grad_norm": 1.0072860776858286, "learning_rate": 2.6706748944688486e-07, "loss": 0.1647, "step": 38236 }, { "epoch": 0.6646560864955066, "grad_norm": 1.052875214919506, "learning_rate": 2.670425817122255e-07, "loss": 0.2641, "step": 38237 }, { "epoch": 0.6646734690330094, "grad_norm": 2.684181456582413, "learning_rate": 2.670176747159341e-07, "loss": 0.1479, "step": 38238 }, { "epoch": 0.6646908515705122, "grad_norm": 1.8099145726215167, "learning_rate": 2.6699276845808947e-07, "loss": 0.3708, "step": 38239 }, { "epoch": 0.664708234108015, "grad_norm": 1.4386812566249256, "learning_rate": 2.66967862938771e-07, "loss": 0.4185, "step": 38240 }, { "epoch": 0.6647256166455179, "grad_norm": 1.4699008215076, "learning_rate": 2.669429581580573e-07, "loss": 0.1958, "step": 38241 }, { "epoch": 0.6647429991830207, "grad_norm": 1.1903282982410655, "learning_rate": 2.669180541160274e-07, "loss": 0.1431, "step": 38242 }, { "epoch": 0.6647603817205235, "grad_norm": 1.223636680742787, "learning_rate": 2.6689315081276016e-07, "loss": 0.159, "step": 38243 }, { "epoch": 0.6647777642580264, "grad_norm": 1.4355156631854777, "learning_rate": 2.6686824824833455e-07, "loss": 0.3104, "step": 38244 }, { "epoch": 0.6647951467955292, "grad_norm": 1.5406984496546507, "learning_rate": 2.6684334642282957e-07, "loss": 0.2933, "step": 38245 }, { "epoch": 0.664812529333032, "grad_norm": 2.2099056744668637, "learning_rate": 2.6681844533632404e-07, "loss": 0.2678, "step": 38246 }, { "epoch": 0.6648299118705349, "grad_norm": 1.9013051424097895, "learning_rate": 2.667935449888967e-07, "loss": 0.3346, "step": 38247 }, { "epoch": 0.6648472944080377, "grad_norm": 1.961977040904052, "learning_rate": 2.6676864538062686e-07, "loss": 0.3411, "step": 38248 }, { "epoch": 0.6648646769455405, "grad_norm": 1.8706749911294913, "learning_rate": 2.6674374651159355e-07, "loss": 0.4519, "step": 38249 }, { "epoch": 0.6648820594830434, "grad_norm": 1.356624473039934, "learning_rate": 2.6671884838187496e-07, "loss": 0.3608, "step": 38250 }, { "epoch": 0.6648994420205462, "grad_norm": 1.5236255408301673, "learning_rate": 2.666939509915507e-07, "loss": 0.201, "step": 38251 }, { "epoch": 0.664916824558049, "grad_norm": 1.3013949344550153, "learning_rate": 2.666690543406994e-07, "loss": 0.2702, "step": 38252 }, { "epoch": 0.6649342070955518, "grad_norm": 1.4604109648666774, "learning_rate": 2.666441584294001e-07, "loss": 0.2188, "step": 38253 }, { "epoch": 0.6649515896330547, "grad_norm": 1.5180163812681986, "learning_rate": 2.666192632577314e-07, "loss": 0.434, "step": 38254 }, { "epoch": 0.6649689721705575, "grad_norm": 1.1830577692186746, "learning_rate": 2.665943688257728e-07, "loss": 0.234, "step": 38255 }, { "epoch": 0.6649863547080603, "grad_norm": 1.9831832038092627, "learning_rate": 2.665694751336026e-07, "loss": 0.3002, "step": 38256 }, { "epoch": 0.665003737245563, "grad_norm": 0.8277436272271675, "learning_rate": 2.6654458218129994e-07, "loss": 0.1739, "step": 38257 }, { "epoch": 0.6650211197830659, "grad_norm": 1.1719758610630842, "learning_rate": 2.6651968996894356e-07, "loss": 0.1327, "step": 38258 }, { "epoch": 0.6650385023205687, "grad_norm": 2.8405890972685244, "learning_rate": 2.664947984966127e-07, "loss": 0.3444, "step": 38259 }, { "epoch": 0.6650558848580715, "grad_norm": 2.335167569553511, "learning_rate": 2.6646990776438605e-07, "loss": 0.3434, "step": 38260 }, { "epoch": 0.6650732673955744, "grad_norm": 0.4588936014091627, "learning_rate": 2.664450177723425e-07, "loss": 0.2228, "step": 38261 }, { "epoch": 0.6650906499330772, "grad_norm": 1.1507310366256802, "learning_rate": 2.66420128520561e-07, "loss": 0.1793, "step": 38262 }, { "epoch": 0.66510803247058, "grad_norm": 1.1231757044556294, "learning_rate": 2.6639524000912037e-07, "loss": 0.1659, "step": 38263 }, { "epoch": 0.6651254150080829, "grad_norm": 1.1834358512824705, "learning_rate": 2.6637035223809956e-07, "loss": 0.2398, "step": 38264 }, { "epoch": 0.6651427975455857, "grad_norm": 5.066608548304586, "learning_rate": 2.663454652075774e-07, "loss": 0.2345, "step": 38265 }, { "epoch": 0.6651601800830885, "grad_norm": 1.990325168989839, "learning_rate": 2.663205789176325e-07, "loss": 0.2906, "step": 38266 }, { "epoch": 0.6651775626205914, "grad_norm": 2.199031138337075, "learning_rate": 2.662956933683443e-07, "loss": 0.2963, "step": 38267 }, { "epoch": 0.6651949451580942, "grad_norm": 3.0516839532875335, "learning_rate": 2.6627080855979156e-07, "loss": 0.229, "step": 38268 }, { "epoch": 0.665212327695597, "grad_norm": 1.5526076614740378, "learning_rate": 2.6624592449205264e-07, "loss": 0.2734, "step": 38269 }, { "epoch": 0.6652297102330998, "grad_norm": 2.0107504641156435, "learning_rate": 2.662210411652069e-07, "loss": 0.2612, "step": 38270 }, { "epoch": 0.6652470927706027, "grad_norm": 1.5357407924613313, "learning_rate": 2.661961585793331e-07, "loss": 0.238, "step": 38271 }, { "epoch": 0.6652644753081055, "grad_norm": 2.5092261274195633, "learning_rate": 2.661712767345101e-07, "loss": 0.1454, "step": 38272 }, { "epoch": 0.6652818578456083, "grad_norm": 1.1661713426877383, "learning_rate": 2.6614639563081676e-07, "loss": 0.2603, "step": 38273 }, { "epoch": 0.6652992403831112, "grad_norm": 1.3353001133819185, "learning_rate": 2.661215152683318e-07, "loss": 0.2576, "step": 38274 }, { "epoch": 0.665316622920614, "grad_norm": 3.657189454644966, "learning_rate": 2.6609663564713434e-07, "loss": 0.205, "step": 38275 }, { "epoch": 0.6653340054581168, "grad_norm": 1.301190365624241, "learning_rate": 2.66071756767303e-07, "loss": 0.2196, "step": 38276 }, { "epoch": 0.6653513879956195, "grad_norm": 2.0097241759906956, "learning_rate": 2.660468786289166e-07, "loss": 0.233, "step": 38277 }, { "epoch": 0.6653687705331224, "grad_norm": 1.1804974430747717, "learning_rate": 2.660220012320543e-07, "loss": 0.2554, "step": 38278 }, { "epoch": 0.6653861530706252, "grad_norm": 2.0876667275570604, "learning_rate": 2.659971245767948e-07, "loss": 0.2501, "step": 38279 }, { "epoch": 0.665403535608128, "grad_norm": 1.5233891763817011, "learning_rate": 2.659722486632169e-07, "loss": 0.1847, "step": 38280 }, { "epoch": 0.6654209181456309, "grad_norm": 1.3436824187953342, "learning_rate": 2.6594737349139946e-07, "loss": 0.1612, "step": 38281 }, { "epoch": 0.6654383006831337, "grad_norm": 3.319856624686132, "learning_rate": 2.659224990614213e-07, "loss": 0.3753, "step": 38282 }, { "epoch": 0.6654556832206365, "grad_norm": 1.6596523087216477, "learning_rate": 2.6589762537336136e-07, "loss": 0.3105, "step": 38283 }, { "epoch": 0.6654730657581394, "grad_norm": 2.1198478869953568, "learning_rate": 2.6587275242729833e-07, "loss": 0.1614, "step": 38284 }, { "epoch": 0.6654904482956422, "grad_norm": 1.2089800984360082, "learning_rate": 2.658478802233111e-07, "loss": 0.1159, "step": 38285 }, { "epoch": 0.665507830833145, "grad_norm": 2.683650176393283, "learning_rate": 2.658230087614786e-07, "loss": 0.1741, "step": 38286 }, { "epoch": 0.6655252133706479, "grad_norm": 1.1781943567390019, "learning_rate": 2.657981380418797e-07, "loss": 0.5256, "step": 38287 }, { "epoch": 0.6655425959081507, "grad_norm": 1.39884103617768, "learning_rate": 2.65773268064593e-07, "loss": 0.178, "step": 38288 }, { "epoch": 0.6655599784456535, "grad_norm": 1.2825528591278748, "learning_rate": 2.6574839882969727e-07, "loss": 0.216, "step": 38289 }, { "epoch": 0.6655773609831563, "grad_norm": 0.8594373183921017, "learning_rate": 2.657235303372717e-07, "loss": 0.263, "step": 38290 }, { "epoch": 0.6655947435206592, "grad_norm": 1.8118973880778289, "learning_rate": 2.656986625873949e-07, "loss": 0.3069, "step": 38291 }, { "epoch": 0.665612126058162, "grad_norm": 3.0644351342324185, "learning_rate": 2.656737955801458e-07, "loss": 0.2996, "step": 38292 }, { "epoch": 0.6656295085956648, "grad_norm": 1.304593762133437, "learning_rate": 2.65648929315603e-07, "loss": 0.2436, "step": 38293 }, { "epoch": 0.6656468911331677, "grad_norm": 1.1883147722983909, "learning_rate": 2.656240637938455e-07, "loss": 0.2265, "step": 38294 }, { "epoch": 0.6656642736706705, "grad_norm": 1.0329828729571653, "learning_rate": 2.655991990149521e-07, "loss": 0.2254, "step": 38295 }, { "epoch": 0.6656816562081733, "grad_norm": 1.981238389524406, "learning_rate": 2.655743349790015e-07, "loss": 0.27, "step": 38296 }, { "epoch": 0.665699038745676, "grad_norm": 2.663997470641948, "learning_rate": 2.6554947168607237e-07, "loss": 0.3754, "step": 38297 }, { "epoch": 0.6657164212831789, "grad_norm": 2.7326423644101077, "learning_rate": 2.65524609136244e-07, "loss": 0.2395, "step": 38298 }, { "epoch": 0.6657338038206817, "grad_norm": 1.790487999749062, "learning_rate": 2.654997473295948e-07, "loss": 0.2983, "step": 38299 }, { "epoch": 0.6657511863581845, "grad_norm": 0.7620258914629896, "learning_rate": 2.654748862662037e-07, "loss": 0.2049, "step": 38300 }, { "epoch": 0.6657685688956874, "grad_norm": 1.060933983166831, "learning_rate": 2.6545002594614954e-07, "loss": 0.2048, "step": 38301 }, { "epoch": 0.6657859514331902, "grad_norm": 1.2025191148076817, "learning_rate": 2.65425166369511e-07, "loss": 0.1269, "step": 38302 }, { "epoch": 0.665803333970693, "grad_norm": 1.4998929435918407, "learning_rate": 2.654003075363669e-07, "loss": 0.1926, "step": 38303 }, { "epoch": 0.6658207165081959, "grad_norm": 1.6600516930970521, "learning_rate": 2.6537544944679614e-07, "loss": 0.1843, "step": 38304 }, { "epoch": 0.6658380990456987, "grad_norm": 1.5282162215608528, "learning_rate": 2.653505921008772e-07, "loss": 0.2667, "step": 38305 }, { "epoch": 0.6658554815832015, "grad_norm": 1.019950382612669, "learning_rate": 2.653257354986895e-07, "loss": 0.2187, "step": 38306 }, { "epoch": 0.6658728641207043, "grad_norm": 1.0543224775395823, "learning_rate": 2.653008796403112e-07, "loss": 0.2493, "step": 38307 }, { "epoch": 0.6658902466582072, "grad_norm": 2.968961220184772, "learning_rate": 2.652760245258211e-07, "loss": 0.2523, "step": 38308 }, { "epoch": 0.66590762919571, "grad_norm": 2.364367782877411, "learning_rate": 2.652511701552984e-07, "loss": 0.1766, "step": 38309 }, { "epoch": 0.6659250117332128, "grad_norm": 1.3301872257924836, "learning_rate": 2.6522631652882165e-07, "loss": 0.1959, "step": 38310 }, { "epoch": 0.6659423942707157, "grad_norm": 1.1806895455670334, "learning_rate": 2.6520146364646955e-07, "loss": 0.207, "step": 38311 }, { "epoch": 0.6659597768082185, "grad_norm": 3.9219194882731414, "learning_rate": 2.65176611508321e-07, "loss": 0.3167, "step": 38312 }, { "epoch": 0.6659771593457213, "grad_norm": 1.8800847159163638, "learning_rate": 2.6515176011445484e-07, "loss": 0.2277, "step": 38313 }, { "epoch": 0.6659945418832242, "grad_norm": 1.9167414214665763, "learning_rate": 2.6512690946494957e-07, "loss": 0.3067, "step": 38314 }, { "epoch": 0.666011924420727, "grad_norm": 0.9625599483893101, "learning_rate": 2.651020595598842e-07, "loss": 0.2, "step": 38315 }, { "epoch": 0.6660293069582298, "grad_norm": 1.383937705266735, "learning_rate": 2.6507721039933716e-07, "loss": 0.2125, "step": 38316 }, { "epoch": 0.6660466894957325, "grad_norm": 2.842133896776948, "learning_rate": 2.6505236198338767e-07, "loss": 0.2662, "step": 38317 }, { "epoch": 0.6660640720332354, "grad_norm": 1.6149196529858558, "learning_rate": 2.650275143121144e-07, "loss": 0.3233, "step": 38318 }, { "epoch": 0.6660814545707382, "grad_norm": 1.9837262872379893, "learning_rate": 2.6500266738559586e-07, "loss": 0.2914, "step": 38319 }, { "epoch": 0.666098837108241, "grad_norm": 1.7198487944585916, "learning_rate": 2.6497782120391065e-07, "loss": 0.1761, "step": 38320 }, { "epoch": 0.6661162196457439, "grad_norm": 1.4540691781159738, "learning_rate": 2.64952975767138e-07, "loss": 0.2287, "step": 38321 }, { "epoch": 0.6661336021832467, "grad_norm": 1.0893247224737779, "learning_rate": 2.6492813107535637e-07, "loss": 0.244, "step": 38322 }, { "epoch": 0.6661509847207495, "grad_norm": 1.8787054407973083, "learning_rate": 2.649032871286447e-07, "loss": 0.2062, "step": 38323 }, { "epoch": 0.6661683672582523, "grad_norm": 1.2732540947139546, "learning_rate": 2.6487844392708136e-07, "loss": 0.2125, "step": 38324 }, { "epoch": 0.6661857497957552, "grad_norm": 1.2713825595006651, "learning_rate": 2.6485360147074574e-07, "loss": 0.1863, "step": 38325 }, { "epoch": 0.666203132333258, "grad_norm": 2.3970571601615673, "learning_rate": 2.648287597597161e-07, "loss": 0.2279, "step": 38326 }, { "epoch": 0.6662205148707608, "grad_norm": 1.344300400104731, "learning_rate": 2.6480391879407117e-07, "loss": 0.2017, "step": 38327 }, { "epoch": 0.6662378974082637, "grad_norm": 2.3193671434056307, "learning_rate": 2.647790785738896e-07, "loss": 0.1355, "step": 38328 }, { "epoch": 0.6662552799457665, "grad_norm": 1.5951611859357033, "learning_rate": 2.647542390992504e-07, "loss": 0.1946, "step": 38329 }, { "epoch": 0.6662726624832693, "grad_norm": 1.4178862506395922, "learning_rate": 2.647294003702323e-07, "loss": 0.1714, "step": 38330 }, { "epoch": 0.6662900450207722, "grad_norm": 1.2546196275839931, "learning_rate": 2.647045623869139e-07, "loss": 0.2, "step": 38331 }, { "epoch": 0.666307427558275, "grad_norm": 1.2199729671305048, "learning_rate": 2.6467972514937396e-07, "loss": 0.2038, "step": 38332 }, { "epoch": 0.6663248100957778, "grad_norm": 0.9820882588415883, "learning_rate": 2.646548886576912e-07, "loss": 0.1221, "step": 38333 }, { "epoch": 0.6663421926332806, "grad_norm": 1.246799269586403, "learning_rate": 2.646300529119443e-07, "loss": 0.2603, "step": 38334 }, { "epoch": 0.6663595751707835, "grad_norm": 1.078928335799614, "learning_rate": 2.646052179122121e-07, "loss": 0.1195, "step": 38335 }, { "epoch": 0.6663769577082863, "grad_norm": 1.6276053095711307, "learning_rate": 2.6458038365857295e-07, "loss": 0.1677, "step": 38336 }, { "epoch": 0.666394340245789, "grad_norm": 1.7222448616146704, "learning_rate": 2.645555501511062e-07, "loss": 0.2215, "step": 38337 }, { "epoch": 0.6664117227832919, "grad_norm": 3.0315992816846347, "learning_rate": 2.6453071738989007e-07, "loss": 0.3029, "step": 38338 }, { "epoch": 0.6664291053207947, "grad_norm": 2.3314900791975774, "learning_rate": 2.6450588537500317e-07, "loss": 0.4719, "step": 38339 }, { "epoch": 0.6664464878582975, "grad_norm": 0.8233019530076029, "learning_rate": 2.6448105410652467e-07, "loss": 0.1581, "step": 38340 }, { "epoch": 0.6664638703958004, "grad_norm": 1.8624917580442943, "learning_rate": 2.6445622358453303e-07, "loss": 0.1792, "step": 38341 }, { "epoch": 0.6664812529333032, "grad_norm": 1.4939134001866303, "learning_rate": 2.6443139380910696e-07, "loss": 0.1646, "step": 38342 }, { "epoch": 0.666498635470806, "grad_norm": 1.5583436743853962, "learning_rate": 2.644065647803252e-07, "loss": 0.17, "step": 38343 }, { "epoch": 0.6665160180083088, "grad_norm": 1.3894038413155443, "learning_rate": 2.643817364982663e-07, "loss": 0.253, "step": 38344 }, { "epoch": 0.6665334005458117, "grad_norm": 0.9529041690425565, "learning_rate": 2.643569089630091e-07, "loss": 0.114, "step": 38345 }, { "epoch": 0.6665507830833145, "grad_norm": 1.4034630805120374, "learning_rate": 2.6433208217463233e-07, "loss": 0.1791, "step": 38346 }, { "epoch": 0.6665681656208173, "grad_norm": 1.3219162878583774, "learning_rate": 2.6430725613321437e-07, "loss": 0.2948, "step": 38347 }, { "epoch": 0.6665855481583202, "grad_norm": 1.6043118997231742, "learning_rate": 2.6428243083883426e-07, "loss": 0.1849, "step": 38348 }, { "epoch": 0.666602930695823, "grad_norm": 2.3313852088030558, "learning_rate": 2.6425760629157067e-07, "loss": 0.1527, "step": 38349 }, { "epoch": 0.6666203132333258, "grad_norm": 1.0526463808557252, "learning_rate": 2.6423278249150226e-07, "loss": 0.2213, "step": 38350 }, { "epoch": 0.6666376957708287, "grad_norm": 0.9602644717313022, "learning_rate": 2.6420795943870734e-07, "loss": 0.2396, "step": 38351 }, { "epoch": 0.6666550783083315, "grad_norm": 0.714175441846138, "learning_rate": 2.64183137133265e-07, "loss": 0.1567, "step": 38352 }, { "epoch": 0.6666724608458343, "grad_norm": 0.7797111797114734, "learning_rate": 2.641583155752539e-07, "loss": 0.2648, "step": 38353 }, { "epoch": 0.6666898433833371, "grad_norm": 3.12780166768954, "learning_rate": 2.641334947647524e-07, "loss": 0.3207, "step": 38354 }, { "epoch": 0.66670722592084, "grad_norm": 1.5210028251941599, "learning_rate": 2.641086747018394e-07, "loss": 0.1552, "step": 38355 }, { "epoch": 0.6667246084583427, "grad_norm": 1.4707332534161177, "learning_rate": 2.6408385538659386e-07, "loss": 0.1618, "step": 38356 }, { "epoch": 0.6667419909958455, "grad_norm": 1.3893388930243127, "learning_rate": 2.640590368190939e-07, "loss": 0.2212, "step": 38357 }, { "epoch": 0.6667593735333484, "grad_norm": 1.449089067297611, "learning_rate": 2.640342189994182e-07, "loss": 0.1516, "step": 38358 }, { "epoch": 0.6667767560708512, "grad_norm": 1.3922409857584823, "learning_rate": 2.6400940192764586e-07, "loss": 0.2253, "step": 38359 }, { "epoch": 0.666794138608354, "grad_norm": 1.5333575586385249, "learning_rate": 2.6398458560385526e-07, "loss": 0.1461, "step": 38360 }, { "epoch": 0.6668115211458568, "grad_norm": 1.5636739746324657, "learning_rate": 2.639597700281252e-07, "loss": 0.2266, "step": 38361 }, { "epoch": 0.6668289036833597, "grad_norm": 2.3378293018128904, "learning_rate": 2.639349552005341e-07, "loss": 0.2813, "step": 38362 }, { "epoch": 0.6668462862208625, "grad_norm": 2.0384045526587706, "learning_rate": 2.639101411211609e-07, "loss": 0.2372, "step": 38363 }, { "epoch": 0.6668636687583653, "grad_norm": 1.4095885992561283, "learning_rate": 2.6388532779008396e-07, "loss": 0.1238, "step": 38364 }, { "epoch": 0.6668810512958682, "grad_norm": 1.328118270460897, "learning_rate": 2.638605152073822e-07, "loss": 0.2065, "step": 38365 }, { "epoch": 0.666898433833371, "grad_norm": 2.175030001178651, "learning_rate": 2.638357033731339e-07, "loss": 0.2532, "step": 38366 }, { "epoch": 0.6669158163708738, "grad_norm": 1.4075528214873394, "learning_rate": 2.6381089228741817e-07, "loss": 0.2031, "step": 38367 }, { "epoch": 0.6669331989083767, "grad_norm": 2.518795464523139, "learning_rate": 2.637860819503133e-07, "loss": 0.3295, "step": 38368 }, { "epoch": 0.6669505814458795, "grad_norm": 1.2628304412332991, "learning_rate": 2.637612723618983e-07, "loss": 0.2449, "step": 38369 }, { "epoch": 0.6669679639833823, "grad_norm": 1.123382812591383, "learning_rate": 2.637364635222512e-07, "loss": 0.2505, "step": 38370 }, { "epoch": 0.6669853465208851, "grad_norm": 1.7172426281621838, "learning_rate": 2.6371165543145115e-07, "loss": 0.2428, "step": 38371 }, { "epoch": 0.667002729058388, "grad_norm": 1.2822504715372733, "learning_rate": 2.6368684808957654e-07, "loss": 0.2165, "step": 38372 }, { "epoch": 0.6670201115958908, "grad_norm": 1.2213500621662117, "learning_rate": 2.636620414967061e-07, "loss": 0.2299, "step": 38373 }, { "epoch": 0.6670374941333936, "grad_norm": 1.4335554938340642, "learning_rate": 2.6363723565291827e-07, "loss": 0.1401, "step": 38374 }, { "epoch": 0.6670548766708965, "grad_norm": 1.3841484194470035, "learning_rate": 2.6361243055829216e-07, "loss": 0.1984, "step": 38375 }, { "epoch": 0.6670722592083992, "grad_norm": 1.2254334454213365, "learning_rate": 2.63587626212906e-07, "loss": 0.2187, "step": 38376 }, { "epoch": 0.667089641745902, "grad_norm": 1.3460106830641536, "learning_rate": 2.6356282261683833e-07, "loss": 0.2798, "step": 38377 }, { "epoch": 0.6671070242834048, "grad_norm": 2.0773982877496477, "learning_rate": 2.635380197701678e-07, "loss": 0.2018, "step": 38378 }, { "epoch": 0.6671244068209077, "grad_norm": 1.2647824843150104, "learning_rate": 2.6351321767297316e-07, "loss": 0.2715, "step": 38379 }, { "epoch": 0.6671417893584105, "grad_norm": 1.0456820231541597, "learning_rate": 2.6348841632533314e-07, "loss": 0.1818, "step": 38380 }, { "epoch": 0.6671591718959133, "grad_norm": 2.7224291201540978, "learning_rate": 2.634636157273261e-07, "loss": 0.3235, "step": 38381 }, { "epoch": 0.6671765544334162, "grad_norm": 2.0112559670169494, "learning_rate": 2.634388158790308e-07, "loss": 0.4756, "step": 38382 }, { "epoch": 0.667193936970919, "grad_norm": 2.400026732712686, "learning_rate": 2.634140167805258e-07, "loss": 0.1955, "step": 38383 }, { "epoch": 0.6672113195084218, "grad_norm": 1.6689941084079944, "learning_rate": 2.6338921843188966e-07, "loss": 0.1868, "step": 38384 }, { "epoch": 0.6672287020459247, "grad_norm": 2.7440183857084346, "learning_rate": 2.63364420833201e-07, "loss": 0.244, "step": 38385 }, { "epoch": 0.6672460845834275, "grad_norm": 1.0784861878150964, "learning_rate": 2.6333962398453825e-07, "loss": 0.264, "step": 38386 }, { "epoch": 0.6672634671209303, "grad_norm": 1.2192841825202891, "learning_rate": 2.6331482788598035e-07, "loss": 0.1218, "step": 38387 }, { "epoch": 0.6672808496584332, "grad_norm": 1.979942662588598, "learning_rate": 2.6329003253760597e-07, "loss": 0.2555, "step": 38388 }, { "epoch": 0.667298232195936, "grad_norm": 1.8475548561044015, "learning_rate": 2.63265237939493e-07, "loss": 0.2912, "step": 38389 }, { "epoch": 0.6673156147334388, "grad_norm": 1.5893152523037681, "learning_rate": 2.632404440917207e-07, "loss": 0.2665, "step": 38390 }, { "epoch": 0.6673329972709416, "grad_norm": 1.569010259096622, "learning_rate": 2.632156509943674e-07, "loss": 0.2044, "step": 38391 }, { "epoch": 0.6673503798084445, "grad_norm": 1.3575279968249907, "learning_rate": 2.631908586475118e-07, "loss": 0.2811, "step": 38392 }, { "epoch": 0.6673677623459473, "grad_norm": 2.819807085161405, "learning_rate": 2.631660670512323e-07, "loss": 0.3031, "step": 38393 }, { "epoch": 0.6673851448834501, "grad_norm": 1.2830123834721527, "learning_rate": 2.631412762056076e-07, "loss": 0.4627, "step": 38394 }, { "epoch": 0.667402527420953, "grad_norm": 1.2157147902727978, "learning_rate": 2.6311648611071626e-07, "loss": 0.2761, "step": 38395 }, { "epoch": 0.6674199099584557, "grad_norm": 1.7717256120925067, "learning_rate": 2.630916967666369e-07, "loss": 0.179, "step": 38396 }, { "epoch": 0.6674372924959585, "grad_norm": 1.265132228948847, "learning_rate": 2.630669081734478e-07, "loss": 0.2263, "step": 38397 }, { "epoch": 0.6674546750334613, "grad_norm": 1.5760855488806094, "learning_rate": 2.630421203312279e-07, "loss": 0.1862, "step": 38398 }, { "epoch": 0.6674720575709642, "grad_norm": 1.7239925431836198, "learning_rate": 2.630173332400557e-07, "loss": 0.2439, "step": 38399 }, { "epoch": 0.667489440108467, "grad_norm": 1.5274423360078064, "learning_rate": 2.6299254690000964e-07, "loss": 0.3329, "step": 38400 }, { "epoch": 0.6675068226459698, "grad_norm": 1.7739426935429572, "learning_rate": 2.6296776131116836e-07, "loss": 0.2869, "step": 38401 }, { "epoch": 0.6675242051834727, "grad_norm": 1.4343853983286583, "learning_rate": 2.629429764736104e-07, "loss": 0.2951, "step": 38402 }, { "epoch": 0.6675415877209755, "grad_norm": 1.5749022968974251, "learning_rate": 2.629181923874143e-07, "loss": 0.2366, "step": 38403 }, { "epoch": 0.6675589702584783, "grad_norm": 1.6136630583017086, "learning_rate": 2.628934090526587e-07, "loss": 0.1751, "step": 38404 }, { "epoch": 0.6675763527959812, "grad_norm": 1.0177362280369, "learning_rate": 2.6286862646942176e-07, "loss": 0.353, "step": 38405 }, { "epoch": 0.667593735333484, "grad_norm": 1.8252505179857055, "learning_rate": 2.628438446377826e-07, "loss": 0.2148, "step": 38406 }, { "epoch": 0.6676111178709868, "grad_norm": 1.6409623784959237, "learning_rate": 2.628190635578197e-07, "loss": 0.2025, "step": 38407 }, { "epoch": 0.6676285004084896, "grad_norm": 1.029184364190231, "learning_rate": 2.627942832296112e-07, "loss": 0.1508, "step": 38408 }, { "epoch": 0.6676458829459925, "grad_norm": 1.1772481106280075, "learning_rate": 2.627695036532357e-07, "loss": 0.1901, "step": 38409 }, { "epoch": 0.6676632654834953, "grad_norm": 2.054869653320042, "learning_rate": 2.627447248287721e-07, "loss": 0.2991, "step": 38410 }, { "epoch": 0.6676806480209981, "grad_norm": 1.3192394324600591, "learning_rate": 2.627199467562987e-07, "loss": 0.2418, "step": 38411 }, { "epoch": 0.667698030558501, "grad_norm": 1.3072275901693775, "learning_rate": 2.626951694358941e-07, "loss": 0.2, "step": 38412 }, { "epoch": 0.6677154130960038, "grad_norm": 1.534263185294365, "learning_rate": 2.6267039286763676e-07, "loss": 0.2112, "step": 38413 }, { "epoch": 0.6677327956335066, "grad_norm": 1.565031929960135, "learning_rate": 2.6264561705160524e-07, "loss": 0.236, "step": 38414 }, { "epoch": 0.6677501781710095, "grad_norm": 1.7036979924240498, "learning_rate": 2.6262084198787815e-07, "loss": 0.1986, "step": 38415 }, { "epoch": 0.6677675607085122, "grad_norm": 1.0424848083979443, "learning_rate": 2.6259606767653396e-07, "loss": 0.2626, "step": 38416 }, { "epoch": 0.667784943246015, "grad_norm": 1.194938038651484, "learning_rate": 2.625712941176509e-07, "loss": 0.1848, "step": 38417 }, { "epoch": 0.6678023257835178, "grad_norm": 3.2523995834430695, "learning_rate": 2.6254652131130807e-07, "loss": 0.2476, "step": 38418 }, { "epoch": 0.6678197083210207, "grad_norm": 1.2486720733003942, "learning_rate": 2.6252174925758366e-07, "loss": 0.3639, "step": 38419 }, { "epoch": 0.6678370908585235, "grad_norm": 1.370667702742229, "learning_rate": 2.6249697795655613e-07, "loss": 0.2088, "step": 38420 }, { "epoch": 0.6678544733960263, "grad_norm": 2.237652970964385, "learning_rate": 2.6247220740830414e-07, "loss": 0.2943, "step": 38421 }, { "epoch": 0.6678718559335292, "grad_norm": 0.9006228852808464, "learning_rate": 2.624474376129061e-07, "loss": 0.1163, "step": 38422 }, { "epoch": 0.667889238471032, "grad_norm": 1.4802530263169111, "learning_rate": 2.6242266857044067e-07, "loss": 0.1983, "step": 38423 }, { "epoch": 0.6679066210085348, "grad_norm": 0.9716772722190171, "learning_rate": 2.6239790028098616e-07, "loss": 0.2937, "step": 38424 }, { "epoch": 0.6679240035460376, "grad_norm": 1.7331359070696388, "learning_rate": 2.6237313274462096e-07, "loss": 0.295, "step": 38425 }, { "epoch": 0.6679413860835405, "grad_norm": 1.2847208854296512, "learning_rate": 2.623483659614242e-07, "loss": 0.3163, "step": 38426 }, { "epoch": 0.6679587686210433, "grad_norm": 1.1051899845763389, "learning_rate": 2.6232359993147373e-07, "loss": 0.1963, "step": 38427 }, { "epoch": 0.6679761511585461, "grad_norm": 1.1534697513095877, "learning_rate": 2.622988346548481e-07, "loss": 0.1795, "step": 38428 }, { "epoch": 0.667993533696049, "grad_norm": 1.8953785499913216, "learning_rate": 2.622740701316262e-07, "loss": 0.2331, "step": 38429 }, { "epoch": 0.6680109162335518, "grad_norm": 1.5554590726295745, "learning_rate": 2.6224930636188623e-07, "loss": 0.2862, "step": 38430 }, { "epoch": 0.6680282987710546, "grad_norm": 1.8058676514391259, "learning_rate": 2.6222454334570675e-07, "loss": 0.2584, "step": 38431 }, { "epoch": 0.6680456813085575, "grad_norm": 1.4839918854415006, "learning_rate": 2.6219978108316625e-07, "loss": 0.1821, "step": 38432 }, { "epoch": 0.6680630638460603, "grad_norm": 2.355606101658794, "learning_rate": 2.621750195743432e-07, "loss": 0.2249, "step": 38433 }, { "epoch": 0.6680804463835631, "grad_norm": 1.9679021828099819, "learning_rate": 2.621502588193161e-07, "loss": 0.2381, "step": 38434 }, { "epoch": 0.668097828921066, "grad_norm": 1.0042451820785836, "learning_rate": 2.621254988181635e-07, "loss": 0.3031, "step": 38435 }, { "epoch": 0.6681152114585687, "grad_norm": 1.526790893826447, "learning_rate": 2.6210073957096345e-07, "loss": 0.2845, "step": 38436 }, { "epoch": 0.6681325939960715, "grad_norm": 2.8025576578099276, "learning_rate": 2.6207598107779506e-07, "loss": 0.2279, "step": 38437 }, { "epoch": 0.6681499765335743, "grad_norm": 0.9214531275878977, "learning_rate": 2.6205122333873677e-07, "loss": 0.2144, "step": 38438 }, { "epoch": 0.6681673590710772, "grad_norm": 1.3514573577063973, "learning_rate": 2.620264663538665e-07, "loss": 0.1122, "step": 38439 }, { "epoch": 0.66818474160858, "grad_norm": 0.9278527754068059, "learning_rate": 2.6200171012326283e-07, "loss": 0.1875, "step": 38440 }, { "epoch": 0.6682021241460828, "grad_norm": 1.0804993751125629, "learning_rate": 2.6197695464700467e-07, "loss": 0.2209, "step": 38441 }, { "epoch": 0.6682195066835857, "grad_norm": 1.6488401292147998, "learning_rate": 2.6195219992517017e-07, "loss": 0.2036, "step": 38442 }, { "epoch": 0.6682368892210885, "grad_norm": 2.361628054024012, "learning_rate": 2.6192744595783784e-07, "loss": 0.3054, "step": 38443 }, { "epoch": 0.6682542717585913, "grad_norm": 1.1745297117083013, "learning_rate": 2.61902692745086e-07, "loss": 0.1819, "step": 38444 }, { "epoch": 0.6682716542960941, "grad_norm": 1.3385427375254664, "learning_rate": 2.618779402869936e-07, "loss": 0.2897, "step": 38445 }, { "epoch": 0.668289036833597, "grad_norm": 1.222585926307638, "learning_rate": 2.618531885836385e-07, "loss": 0.2555, "step": 38446 }, { "epoch": 0.6683064193710998, "grad_norm": 0.5859227864509567, "learning_rate": 2.618284376350993e-07, "loss": 0.2547, "step": 38447 }, { "epoch": 0.6683238019086026, "grad_norm": 1.2317034774409097, "learning_rate": 2.618036874414547e-07, "loss": 0.1902, "step": 38448 }, { "epoch": 0.6683411844461055, "grad_norm": 0.8931953003614526, "learning_rate": 2.61778938002783e-07, "loss": 0.2316, "step": 38449 }, { "epoch": 0.6683585669836083, "grad_norm": 1.3379329357458156, "learning_rate": 2.617541893191626e-07, "loss": 0.1686, "step": 38450 }, { "epoch": 0.6683759495211111, "grad_norm": 1.5489485597711572, "learning_rate": 2.6172944139067207e-07, "loss": 0.1615, "step": 38451 }, { "epoch": 0.668393332058614, "grad_norm": 2.000448199422866, "learning_rate": 2.617046942173897e-07, "loss": 0.1509, "step": 38452 }, { "epoch": 0.6684107145961168, "grad_norm": 1.085617500843539, "learning_rate": 2.616799477993939e-07, "loss": 0.2682, "step": 38453 }, { "epoch": 0.6684280971336196, "grad_norm": 0.6302070766384987, "learning_rate": 2.616552021367633e-07, "loss": 0.1387, "step": 38454 }, { "epoch": 0.6684454796711224, "grad_norm": 1.0195421422380535, "learning_rate": 2.61630457229576e-07, "loss": 0.2909, "step": 38455 }, { "epoch": 0.6684628622086252, "grad_norm": 3.11443676492645, "learning_rate": 2.616057130779109e-07, "loss": 0.269, "step": 38456 }, { "epoch": 0.668480244746128, "grad_norm": 2.1807658690416676, "learning_rate": 2.615809696818463e-07, "loss": 0.26, "step": 38457 }, { "epoch": 0.6684976272836308, "grad_norm": 1.8577741072688538, "learning_rate": 2.615562270414603e-07, "loss": 0.1995, "step": 38458 }, { "epoch": 0.6685150098211337, "grad_norm": 1.9145140077368665, "learning_rate": 2.6153148515683146e-07, "loss": 0.2385, "step": 38459 }, { "epoch": 0.6685323923586365, "grad_norm": 12.754227261770453, "learning_rate": 2.615067440280383e-07, "loss": 0.3358, "step": 38460 }, { "epoch": 0.6685497748961393, "grad_norm": 1.0682804424268288, "learning_rate": 2.614820036551593e-07, "loss": 0.1874, "step": 38461 }, { "epoch": 0.6685671574336421, "grad_norm": 1.163316728071775, "learning_rate": 2.6145726403827275e-07, "loss": 0.1575, "step": 38462 }, { "epoch": 0.668584539971145, "grad_norm": 2.3568285609652384, "learning_rate": 2.6143252517745693e-07, "loss": 0.2069, "step": 38463 }, { "epoch": 0.6686019225086478, "grad_norm": 2.7332142634053613, "learning_rate": 2.6140778707279076e-07, "loss": 0.2203, "step": 38464 }, { "epoch": 0.6686193050461506, "grad_norm": 2.4257031753268268, "learning_rate": 2.6138304972435216e-07, "loss": 0.2008, "step": 38465 }, { "epoch": 0.6686366875836535, "grad_norm": 1.728790496249973, "learning_rate": 2.613583131322197e-07, "loss": 0.2567, "step": 38466 }, { "epoch": 0.6686540701211563, "grad_norm": 1.4394574589672091, "learning_rate": 2.6133357729647146e-07, "loss": 0.2591, "step": 38467 }, { "epoch": 0.6686714526586591, "grad_norm": 1.5009472176448753, "learning_rate": 2.6130884221718643e-07, "loss": 0.2578, "step": 38468 }, { "epoch": 0.668688835196162, "grad_norm": 2.2254326627535095, "learning_rate": 2.6128410789444264e-07, "loss": 0.238, "step": 38469 }, { "epoch": 0.6687062177336648, "grad_norm": 1.7691980323476804, "learning_rate": 2.6125937432831866e-07, "loss": 0.1886, "step": 38470 }, { "epoch": 0.6687236002711676, "grad_norm": 1.6573460267191722, "learning_rate": 2.6123464151889277e-07, "loss": 0.2682, "step": 38471 }, { "epoch": 0.6687409828086704, "grad_norm": 1.228278023625531, "learning_rate": 2.6120990946624344e-07, "loss": 0.1636, "step": 38472 }, { "epoch": 0.6687583653461733, "grad_norm": 1.8467403234318767, "learning_rate": 2.611851781704489e-07, "loss": 0.3459, "step": 38473 }, { "epoch": 0.6687757478836761, "grad_norm": 1.932166549467903, "learning_rate": 2.6116044763158776e-07, "loss": 0.3428, "step": 38474 }, { "epoch": 0.6687931304211789, "grad_norm": 3.418767261369006, "learning_rate": 2.61135717849738e-07, "loss": 0.2886, "step": 38475 }, { "epoch": 0.6688105129586817, "grad_norm": 2.8471932604256676, "learning_rate": 2.6111098882497867e-07, "loss": 0.1813, "step": 38476 }, { "epoch": 0.6688278954961845, "grad_norm": 1.1094686545881003, "learning_rate": 2.610862605573876e-07, "loss": 0.2514, "step": 38477 }, { "epoch": 0.6688452780336873, "grad_norm": 1.1598923922033073, "learning_rate": 2.6106153304704314e-07, "loss": 0.1365, "step": 38478 }, { "epoch": 0.6688626605711901, "grad_norm": 2.032380370408074, "learning_rate": 2.6103680629402406e-07, "loss": 0.3614, "step": 38479 }, { "epoch": 0.668880043108693, "grad_norm": 1.166544433794361, "learning_rate": 2.610120802984086e-07, "loss": 0.3765, "step": 38480 }, { "epoch": 0.6688974256461958, "grad_norm": 3.1010012916423717, "learning_rate": 2.6098735506027497e-07, "loss": 0.2912, "step": 38481 }, { "epoch": 0.6689148081836986, "grad_norm": 1.5575176625395086, "learning_rate": 2.609626305797017e-07, "loss": 0.1926, "step": 38482 }, { "epoch": 0.6689321907212015, "grad_norm": 1.5371738155233134, "learning_rate": 2.60937906856767e-07, "loss": 0.237, "step": 38483 }, { "epoch": 0.6689495732587043, "grad_norm": 1.2928343517330159, "learning_rate": 2.609131838915494e-07, "loss": 0.2118, "step": 38484 }, { "epoch": 0.6689669557962071, "grad_norm": 1.317935936664631, "learning_rate": 2.6088846168412715e-07, "loss": 0.259, "step": 38485 }, { "epoch": 0.66898433833371, "grad_norm": 2.145993589155825, "learning_rate": 2.608637402345784e-07, "loss": 0.2525, "step": 38486 }, { "epoch": 0.6690017208712128, "grad_norm": 1.161578322856744, "learning_rate": 2.60839019542982e-07, "loss": 0.3791, "step": 38487 }, { "epoch": 0.6690191034087156, "grad_norm": 1.945106650928259, "learning_rate": 2.60814299609416e-07, "loss": 0.2819, "step": 38488 }, { "epoch": 0.6690364859462185, "grad_norm": 2.008842478392689, "learning_rate": 2.60789580433959e-07, "loss": 0.21, "step": 38489 }, { "epoch": 0.6690538684837213, "grad_norm": 1.6252523402236536, "learning_rate": 2.607648620166888e-07, "loss": 0.2879, "step": 38490 }, { "epoch": 0.6690712510212241, "grad_norm": 2.9727998049573907, "learning_rate": 2.6074014435768426e-07, "loss": 0.2544, "step": 38491 }, { "epoch": 0.6690886335587269, "grad_norm": 1.9766231495006998, "learning_rate": 2.6071542745702356e-07, "loss": 0.2197, "step": 38492 }, { "epoch": 0.6691060160962298, "grad_norm": 1.8742441407940775, "learning_rate": 2.6069071131478505e-07, "loss": 0.1546, "step": 38493 }, { "epoch": 0.6691233986337326, "grad_norm": 2.2762787822135655, "learning_rate": 2.606659959310468e-07, "loss": 0.2473, "step": 38494 }, { "epoch": 0.6691407811712353, "grad_norm": 2.757456159509315, "learning_rate": 2.6064128130588787e-07, "loss": 0.3154, "step": 38495 }, { "epoch": 0.6691581637087382, "grad_norm": 1.1045342042736477, "learning_rate": 2.6061656743938585e-07, "loss": 0.1621, "step": 38496 }, { "epoch": 0.669175546246241, "grad_norm": 1.0826256001823775, "learning_rate": 2.6059185433161947e-07, "loss": 0.1631, "step": 38497 }, { "epoch": 0.6691929287837438, "grad_norm": 3.27399339913691, "learning_rate": 2.605671419826666e-07, "loss": 0.2417, "step": 38498 }, { "epoch": 0.6692103113212466, "grad_norm": 1.33031420323665, "learning_rate": 2.6054243039260613e-07, "loss": 0.2041, "step": 38499 }, { "epoch": 0.6692276938587495, "grad_norm": 1.053066438212029, "learning_rate": 2.6051771956151624e-07, "loss": 0.1328, "step": 38500 }, { "epoch": 0.6692450763962523, "grad_norm": 1.136561520824705, "learning_rate": 2.604930094894751e-07, "loss": 0.2175, "step": 38501 }, { "epoch": 0.6692624589337551, "grad_norm": 1.282394787910415, "learning_rate": 2.6046830017656114e-07, "loss": 0.1439, "step": 38502 }, { "epoch": 0.669279841471258, "grad_norm": 1.2312892476761905, "learning_rate": 2.604435916228526e-07, "loss": 0.2405, "step": 38503 }, { "epoch": 0.6692972240087608, "grad_norm": 1.2363535000047843, "learning_rate": 2.6041888382842784e-07, "loss": 0.3227, "step": 38504 }, { "epoch": 0.6693146065462636, "grad_norm": 1.6394589276325404, "learning_rate": 2.603941767933652e-07, "loss": 0.1745, "step": 38505 }, { "epoch": 0.6693319890837665, "grad_norm": 2.0668589101581216, "learning_rate": 2.603694705177427e-07, "loss": 0.2502, "step": 38506 }, { "epoch": 0.6693493716212693, "grad_norm": 1.5449585644813468, "learning_rate": 2.6034476500163916e-07, "loss": 0.255, "step": 38507 }, { "epoch": 0.6693667541587721, "grad_norm": 2.8724888943977462, "learning_rate": 2.6032006024513276e-07, "loss": 0.2101, "step": 38508 }, { "epoch": 0.669384136696275, "grad_norm": 1.3866586471199507, "learning_rate": 2.6029535624830137e-07, "loss": 0.3248, "step": 38509 }, { "epoch": 0.6694015192337778, "grad_norm": 1.270495128210295, "learning_rate": 2.6027065301122364e-07, "loss": 0.2374, "step": 38510 }, { "epoch": 0.6694189017712806, "grad_norm": 2.423740450057218, "learning_rate": 2.602459505339779e-07, "loss": 0.2552, "step": 38511 }, { "epoch": 0.6694362843087834, "grad_norm": 1.3980378211465447, "learning_rate": 2.602212488166424e-07, "loss": 0.1708, "step": 38512 }, { "epoch": 0.6694536668462863, "grad_norm": 1.502690130945451, "learning_rate": 2.601965478592953e-07, "loss": 0.2242, "step": 38513 }, { "epoch": 0.6694710493837891, "grad_norm": 1.6997850962896601, "learning_rate": 2.601718476620151e-07, "loss": 0.1679, "step": 38514 }, { "epoch": 0.6694884319212918, "grad_norm": 1.7449812356099186, "learning_rate": 2.6014714822487993e-07, "loss": 0.2807, "step": 38515 }, { "epoch": 0.6695058144587946, "grad_norm": 1.7862052958403873, "learning_rate": 2.601224495479681e-07, "loss": 0.1857, "step": 38516 }, { "epoch": 0.6695231969962975, "grad_norm": 1.8441501191514493, "learning_rate": 2.600977516313577e-07, "loss": 0.2039, "step": 38517 }, { "epoch": 0.6695405795338003, "grad_norm": 1.3378509515261974, "learning_rate": 2.600730544751275e-07, "loss": 0.1512, "step": 38518 }, { "epoch": 0.6695579620713031, "grad_norm": 1.913244735637809, "learning_rate": 2.600483580793554e-07, "loss": 0.3001, "step": 38519 }, { "epoch": 0.669575344608806, "grad_norm": 1.500954514853185, "learning_rate": 2.600236624441199e-07, "loss": 0.2044, "step": 38520 }, { "epoch": 0.6695927271463088, "grad_norm": 2.256414507094746, "learning_rate": 2.59998967569499e-07, "loss": 0.1653, "step": 38521 }, { "epoch": 0.6696101096838116, "grad_norm": 2.159686998198131, "learning_rate": 2.599742734555712e-07, "loss": 0.1761, "step": 38522 }, { "epoch": 0.6696274922213145, "grad_norm": 1.2574147562583138, "learning_rate": 2.5994958010241476e-07, "loss": 0.2269, "step": 38523 }, { "epoch": 0.6696448747588173, "grad_norm": 1.5083141552237365, "learning_rate": 2.599248875101078e-07, "loss": 0.1857, "step": 38524 }, { "epoch": 0.6696622572963201, "grad_norm": 0.9305695119833566, "learning_rate": 2.599001956787285e-07, "loss": 0.2209, "step": 38525 }, { "epoch": 0.669679639833823, "grad_norm": 0.973328010675678, "learning_rate": 2.598755046083555e-07, "loss": 0.1836, "step": 38526 }, { "epoch": 0.6696970223713258, "grad_norm": 1.0312957890106456, "learning_rate": 2.5985081429906703e-07, "loss": 0.1298, "step": 38527 }, { "epoch": 0.6697144049088286, "grad_norm": 1.7017399643109261, "learning_rate": 2.5982612475094096e-07, "loss": 0.2158, "step": 38528 }, { "epoch": 0.6697317874463314, "grad_norm": 1.7746382522959017, "learning_rate": 2.598014359640556e-07, "loss": 0.1993, "step": 38529 }, { "epoch": 0.6697491699838343, "grad_norm": 1.2620873735418148, "learning_rate": 2.597767479384895e-07, "loss": 0.131, "step": 38530 }, { "epoch": 0.6697665525213371, "grad_norm": 1.3092133308273972, "learning_rate": 2.5975206067432064e-07, "loss": 0.1567, "step": 38531 }, { "epoch": 0.6697839350588399, "grad_norm": 2.559962367558649, "learning_rate": 2.597273741716275e-07, "loss": 0.1765, "step": 38532 }, { "epoch": 0.6698013175963428, "grad_norm": 1.3227142558880365, "learning_rate": 2.597026884304883e-07, "loss": 0.307, "step": 38533 }, { "epoch": 0.6698187001338456, "grad_norm": 1.0780910432176327, "learning_rate": 2.5967800345098106e-07, "loss": 0.1215, "step": 38534 }, { "epoch": 0.6698360826713483, "grad_norm": 1.6663189509852725, "learning_rate": 2.5965331923318425e-07, "loss": 0.3147, "step": 38535 }, { "epoch": 0.6698534652088511, "grad_norm": 1.8338770394472879, "learning_rate": 2.5962863577717594e-07, "loss": 0.1475, "step": 38536 }, { "epoch": 0.669870847746354, "grad_norm": 1.515289325738794, "learning_rate": 2.596039530830343e-07, "loss": 0.2373, "step": 38537 }, { "epoch": 0.6698882302838568, "grad_norm": 0.7546926840679199, "learning_rate": 2.595792711508378e-07, "loss": 0.133, "step": 38538 }, { "epoch": 0.6699056128213596, "grad_norm": 2.461222396464939, "learning_rate": 2.5955458998066467e-07, "loss": 0.1401, "step": 38539 }, { "epoch": 0.6699229953588625, "grad_norm": 2.488679354164092, "learning_rate": 2.59529909572593e-07, "loss": 0.2767, "step": 38540 }, { "epoch": 0.6699403778963653, "grad_norm": 2.4318689777412374, "learning_rate": 2.59505229926701e-07, "loss": 0.2351, "step": 38541 }, { "epoch": 0.6699577604338681, "grad_norm": 1.4327236554725444, "learning_rate": 2.59480551043067e-07, "loss": 0.2211, "step": 38542 }, { "epoch": 0.669975142971371, "grad_norm": 1.3230785356050192, "learning_rate": 2.594558729217692e-07, "loss": 0.1862, "step": 38543 }, { "epoch": 0.6699925255088738, "grad_norm": 2.6732356607160614, "learning_rate": 2.5943119556288556e-07, "loss": 0.2353, "step": 38544 }, { "epoch": 0.6700099080463766, "grad_norm": 1.1553747514780677, "learning_rate": 2.594065189664947e-07, "loss": 0.1945, "step": 38545 }, { "epoch": 0.6700272905838794, "grad_norm": 2.29382177177124, "learning_rate": 2.5938184313267485e-07, "loss": 0.2349, "step": 38546 }, { "epoch": 0.6700446731213823, "grad_norm": 1.2384425827249592, "learning_rate": 2.5935716806150387e-07, "loss": 0.2516, "step": 38547 }, { "epoch": 0.6700620556588851, "grad_norm": 1.4971933695315909, "learning_rate": 2.5933249375305985e-07, "loss": 0.3995, "step": 38548 }, { "epoch": 0.6700794381963879, "grad_norm": 1.1786307652662358, "learning_rate": 2.593078202074216e-07, "loss": 0.2734, "step": 38549 }, { "epoch": 0.6700968207338908, "grad_norm": 2.3320871838675954, "learning_rate": 2.592831474246669e-07, "loss": 0.236, "step": 38550 }, { "epoch": 0.6701142032713936, "grad_norm": 1.3299396176791636, "learning_rate": 2.5925847540487405e-07, "loss": 0.2542, "step": 38551 }, { "epoch": 0.6701315858088964, "grad_norm": 1.4087550893768253, "learning_rate": 2.5923380414812124e-07, "loss": 0.1447, "step": 38552 }, { "epoch": 0.6701489683463993, "grad_norm": 1.5751830704882646, "learning_rate": 2.592091336544867e-07, "loss": 0.2368, "step": 38553 }, { "epoch": 0.6701663508839021, "grad_norm": 0.955383234283113, "learning_rate": 2.591844639240486e-07, "loss": 0.1786, "step": 38554 }, { "epoch": 0.6701837334214048, "grad_norm": 1.4195834081421954, "learning_rate": 2.591597949568851e-07, "loss": 0.1643, "step": 38555 }, { "epoch": 0.6702011159589076, "grad_norm": 1.1419811421004913, "learning_rate": 2.5913512675307426e-07, "loss": 0.2294, "step": 38556 }, { "epoch": 0.6702184984964105, "grad_norm": 0.7891357758232259, "learning_rate": 2.591104593126946e-07, "loss": 0.1688, "step": 38557 }, { "epoch": 0.6702358810339133, "grad_norm": 3.0390157697981928, "learning_rate": 2.590857926358243e-07, "loss": 0.3023, "step": 38558 }, { "epoch": 0.6702532635714161, "grad_norm": 1.1523531598316141, "learning_rate": 2.5906112672254097e-07, "loss": 0.1464, "step": 38559 }, { "epoch": 0.670270646108919, "grad_norm": 0.7981569097192299, "learning_rate": 2.5903646157292345e-07, "loss": 0.2184, "step": 38560 }, { "epoch": 0.6702880286464218, "grad_norm": 1.3201775424707614, "learning_rate": 2.5901179718704965e-07, "loss": 0.1614, "step": 38561 }, { "epoch": 0.6703054111839246, "grad_norm": 2.7646970028368676, "learning_rate": 2.589871335649977e-07, "loss": 0.3418, "step": 38562 }, { "epoch": 0.6703227937214274, "grad_norm": 1.341002770607142, "learning_rate": 2.5896247070684587e-07, "loss": 0.1936, "step": 38563 }, { "epoch": 0.6703401762589303, "grad_norm": 1.439182883636492, "learning_rate": 2.5893780861267206e-07, "loss": 0.2516, "step": 38564 }, { "epoch": 0.6703575587964331, "grad_norm": 1.3465491668087637, "learning_rate": 2.58913147282555e-07, "loss": 0.1765, "step": 38565 }, { "epoch": 0.6703749413339359, "grad_norm": 1.5061269976039608, "learning_rate": 2.5888848671657243e-07, "loss": 0.2621, "step": 38566 }, { "epoch": 0.6703923238714388, "grad_norm": 2.359290498595708, "learning_rate": 2.5886382691480234e-07, "loss": 0.3147, "step": 38567 }, { "epoch": 0.6704097064089416, "grad_norm": 1.8777481912513514, "learning_rate": 2.5883916787732343e-07, "loss": 0.3823, "step": 38568 }, { "epoch": 0.6704270889464444, "grad_norm": 7.145029880857204, "learning_rate": 2.5881450960421345e-07, "loss": 0.2551, "step": 38569 }, { "epoch": 0.6704444714839473, "grad_norm": 2.1032590468692707, "learning_rate": 2.587898520955507e-07, "loss": 0.2026, "step": 38570 }, { "epoch": 0.6704618540214501, "grad_norm": 1.817229204065979, "learning_rate": 2.587651953514134e-07, "loss": 0.2595, "step": 38571 }, { "epoch": 0.6704792365589529, "grad_norm": 1.0838894503675964, "learning_rate": 2.5874053937187954e-07, "loss": 0.2522, "step": 38572 }, { "epoch": 0.6704966190964557, "grad_norm": 0.7148404809897366, "learning_rate": 2.587158841570274e-07, "loss": 0.1464, "step": 38573 }, { "epoch": 0.6705140016339586, "grad_norm": 1.3317474077376819, "learning_rate": 2.5869122970693503e-07, "loss": 0.1604, "step": 38574 }, { "epoch": 0.6705313841714613, "grad_norm": 1.1586769535928951, "learning_rate": 2.5866657602168044e-07, "loss": 0.3734, "step": 38575 }, { "epoch": 0.6705487667089641, "grad_norm": 1.3595836037176068, "learning_rate": 2.586419231013421e-07, "loss": 0.2439, "step": 38576 }, { "epoch": 0.670566149246467, "grad_norm": 1.5562742104928462, "learning_rate": 2.586172709459983e-07, "loss": 0.2118, "step": 38577 }, { "epoch": 0.6705835317839698, "grad_norm": 1.445634953839059, "learning_rate": 2.585926195557266e-07, "loss": 0.1646, "step": 38578 }, { "epoch": 0.6706009143214726, "grad_norm": 1.577040661628572, "learning_rate": 2.585679689306052e-07, "loss": 0.2979, "step": 38579 }, { "epoch": 0.6706182968589754, "grad_norm": 1.5469201880138472, "learning_rate": 2.5854331907071265e-07, "loss": 0.1471, "step": 38580 }, { "epoch": 0.6706356793964783, "grad_norm": 2.234852955160682, "learning_rate": 2.585186699761268e-07, "loss": 0.194, "step": 38581 }, { "epoch": 0.6706530619339811, "grad_norm": 1.924556877640317, "learning_rate": 2.584940216469259e-07, "loss": 0.2011, "step": 38582 }, { "epoch": 0.6706704444714839, "grad_norm": 1.664675424028577, "learning_rate": 2.584693740831878e-07, "loss": 0.1619, "step": 38583 }, { "epoch": 0.6706878270089868, "grad_norm": 1.0901520553378896, "learning_rate": 2.5844472728499125e-07, "loss": 0.1816, "step": 38584 }, { "epoch": 0.6707052095464896, "grad_norm": 1.530542359861192, "learning_rate": 2.584200812524138e-07, "loss": 0.1621, "step": 38585 }, { "epoch": 0.6707225920839924, "grad_norm": 1.597247997523729, "learning_rate": 2.583954359855336e-07, "loss": 0.2207, "step": 38586 }, { "epoch": 0.6707399746214953, "grad_norm": 1.2787413832003105, "learning_rate": 2.5837079148442874e-07, "loss": 0.2071, "step": 38587 }, { "epoch": 0.6707573571589981, "grad_norm": 1.4634369788220298, "learning_rate": 2.583461477491777e-07, "loss": 0.1877, "step": 38588 }, { "epoch": 0.6707747396965009, "grad_norm": 1.9083698647195109, "learning_rate": 2.583215047798583e-07, "loss": 0.3566, "step": 38589 }, { "epoch": 0.6707921222340038, "grad_norm": 1.881440272392068, "learning_rate": 2.582968625765488e-07, "loss": 0.2219, "step": 38590 }, { "epoch": 0.6708095047715066, "grad_norm": 1.4425910038174918, "learning_rate": 2.5827222113932713e-07, "loss": 0.1663, "step": 38591 }, { "epoch": 0.6708268873090094, "grad_norm": 3.6188028972080475, "learning_rate": 2.5824758046827156e-07, "loss": 0.312, "step": 38592 }, { "epoch": 0.6708442698465122, "grad_norm": 1.6177767439953759, "learning_rate": 2.5822294056346004e-07, "loss": 0.3481, "step": 38593 }, { "epoch": 0.6708616523840151, "grad_norm": 1.2372357222453132, "learning_rate": 2.581983014249707e-07, "loss": 0.2219, "step": 38594 }, { "epoch": 0.6708790349215178, "grad_norm": 1.415570221939605, "learning_rate": 2.581736630528816e-07, "loss": 0.1597, "step": 38595 }, { "epoch": 0.6708964174590206, "grad_norm": 1.1161439997133233, "learning_rate": 2.5814902544727117e-07, "loss": 0.2915, "step": 38596 }, { "epoch": 0.6709137999965235, "grad_norm": 1.8356749330222202, "learning_rate": 2.581243886082171e-07, "loss": 0.2888, "step": 38597 }, { "epoch": 0.6709311825340263, "grad_norm": 1.4307266922064674, "learning_rate": 2.5809975253579743e-07, "loss": 0.2609, "step": 38598 }, { "epoch": 0.6709485650715291, "grad_norm": 1.4400870658860172, "learning_rate": 2.580751172300906e-07, "loss": 0.2562, "step": 38599 }, { "epoch": 0.6709659476090319, "grad_norm": 1.2066945083258438, "learning_rate": 2.580504826911745e-07, "loss": 0.3638, "step": 38600 }, { "epoch": 0.6709833301465348, "grad_norm": 1.8031414142896085, "learning_rate": 2.5802584891912727e-07, "loss": 0.2375, "step": 38601 }, { "epoch": 0.6710007126840376, "grad_norm": 1.502583076051652, "learning_rate": 2.580012159140269e-07, "loss": 0.247, "step": 38602 }, { "epoch": 0.6710180952215404, "grad_norm": 1.0116520655292836, "learning_rate": 2.5797658367595154e-07, "loss": 0.2787, "step": 38603 }, { "epoch": 0.6710354777590433, "grad_norm": 1.2398101953551124, "learning_rate": 2.579519522049792e-07, "loss": 0.3329, "step": 38604 }, { "epoch": 0.6710528602965461, "grad_norm": 1.852099850469443, "learning_rate": 2.57927321501188e-07, "loss": 0.223, "step": 38605 }, { "epoch": 0.6710702428340489, "grad_norm": 3.649641307298433, "learning_rate": 2.5790269156465585e-07, "loss": 0.2257, "step": 38606 }, { "epoch": 0.6710876253715518, "grad_norm": 1.1629303707287175, "learning_rate": 2.5787806239546104e-07, "loss": 0.223, "step": 38607 }, { "epoch": 0.6711050079090546, "grad_norm": 1.0697842708007739, "learning_rate": 2.578534339936816e-07, "loss": 0.1603, "step": 38608 }, { "epoch": 0.6711223904465574, "grad_norm": 1.4159469138592038, "learning_rate": 2.578288063593958e-07, "loss": 0.2503, "step": 38609 }, { "epoch": 0.6711397729840602, "grad_norm": 1.8892570088298093, "learning_rate": 2.5780417949268096e-07, "loss": 0.2704, "step": 38610 }, { "epoch": 0.6711571555215631, "grad_norm": 2.2819267266905263, "learning_rate": 2.577795533936159e-07, "loss": 0.2714, "step": 38611 }, { "epoch": 0.6711745380590659, "grad_norm": 1.5870951477798663, "learning_rate": 2.5775492806227833e-07, "loss": 0.2259, "step": 38612 }, { "epoch": 0.6711919205965687, "grad_norm": 1.0914947828341226, "learning_rate": 2.5773030349874635e-07, "loss": 0.2133, "step": 38613 }, { "epoch": 0.6712093031340716, "grad_norm": 3.1645683530742836, "learning_rate": 2.5770567970309785e-07, "loss": 0.4098, "step": 38614 }, { "epoch": 0.6712266856715743, "grad_norm": 2.145104141150203, "learning_rate": 2.576810566754114e-07, "loss": 0.2589, "step": 38615 }, { "epoch": 0.6712440682090771, "grad_norm": 0.9325705678112516, "learning_rate": 2.5765643441576456e-07, "loss": 0.1889, "step": 38616 }, { "epoch": 0.67126145074658, "grad_norm": 1.4184071985371538, "learning_rate": 2.576318129242354e-07, "loss": 0.152, "step": 38617 }, { "epoch": 0.6712788332840828, "grad_norm": 1.7816084268720267, "learning_rate": 2.57607192200902e-07, "loss": 0.1813, "step": 38618 }, { "epoch": 0.6712962158215856, "grad_norm": 1.207118435048043, "learning_rate": 2.5758257224584255e-07, "loss": 0.2617, "step": 38619 }, { "epoch": 0.6713135983590884, "grad_norm": 1.5051867576588596, "learning_rate": 2.57557953059135e-07, "loss": 0.2558, "step": 38620 }, { "epoch": 0.6713309808965913, "grad_norm": 1.0201524102955326, "learning_rate": 2.5753333464085737e-07, "loss": 0.1227, "step": 38621 }, { "epoch": 0.6713483634340941, "grad_norm": 1.6225247453804315, "learning_rate": 2.575087169910877e-07, "loss": 0.1131, "step": 38622 }, { "epoch": 0.6713657459715969, "grad_norm": 1.312901328597654, "learning_rate": 2.57484100109904e-07, "loss": 0.2171, "step": 38623 }, { "epoch": 0.6713831285090998, "grad_norm": 1.4275442602073742, "learning_rate": 2.574594839973843e-07, "loss": 0.1511, "step": 38624 }, { "epoch": 0.6714005110466026, "grad_norm": 1.4420007259656087, "learning_rate": 2.5743486865360665e-07, "loss": 0.1813, "step": 38625 }, { "epoch": 0.6714178935841054, "grad_norm": 0.9665453016089878, "learning_rate": 2.574102540786488e-07, "loss": 0.1743, "step": 38626 }, { "epoch": 0.6714352761216082, "grad_norm": 1.0638191013349128, "learning_rate": 2.5738564027258925e-07, "loss": 0.3353, "step": 38627 }, { "epoch": 0.6714526586591111, "grad_norm": 1.6486576926174412, "learning_rate": 2.5736102723550596e-07, "loss": 0.1461, "step": 38628 }, { "epoch": 0.6714700411966139, "grad_norm": 1.2510805518567794, "learning_rate": 2.573364149674764e-07, "loss": 0.2074, "step": 38629 }, { "epoch": 0.6714874237341167, "grad_norm": 1.440800931359309, "learning_rate": 2.57311803468579e-07, "loss": 0.1451, "step": 38630 }, { "epoch": 0.6715048062716196, "grad_norm": 1.778005262428736, "learning_rate": 2.5728719273889175e-07, "loss": 0.1515, "step": 38631 }, { "epoch": 0.6715221888091224, "grad_norm": 1.3516373503536085, "learning_rate": 2.5726258277849264e-07, "loss": 0.201, "step": 38632 }, { "epoch": 0.6715395713466252, "grad_norm": 2.6507043741686256, "learning_rate": 2.572379735874595e-07, "loss": 0.3279, "step": 38633 }, { "epoch": 0.671556953884128, "grad_norm": 2.359054609429853, "learning_rate": 2.572133651658708e-07, "loss": 0.319, "step": 38634 }, { "epoch": 0.6715743364216308, "grad_norm": 1.6433939044100319, "learning_rate": 2.5718875751380397e-07, "loss": 0.4281, "step": 38635 }, { "epoch": 0.6715917189591336, "grad_norm": 2.031417913350614, "learning_rate": 2.571641506313373e-07, "loss": 0.3118, "step": 38636 }, { "epoch": 0.6716091014966364, "grad_norm": 0.9715893845041451, "learning_rate": 2.5713954451854846e-07, "loss": 0.1217, "step": 38637 }, { "epoch": 0.6716264840341393, "grad_norm": 2.3632990394802404, "learning_rate": 2.5711493917551595e-07, "loss": 0.245, "step": 38638 }, { "epoch": 0.6716438665716421, "grad_norm": 1.260502879764935, "learning_rate": 2.5709033460231753e-07, "loss": 0.2321, "step": 38639 }, { "epoch": 0.6716612491091449, "grad_norm": 1.3799182956445313, "learning_rate": 2.5706573079903115e-07, "loss": 0.2761, "step": 38640 }, { "epoch": 0.6716786316466478, "grad_norm": 2.0327727275793372, "learning_rate": 2.5704112776573477e-07, "loss": 0.3171, "step": 38641 }, { "epoch": 0.6716960141841506, "grad_norm": 1.4267150337814536, "learning_rate": 2.5701652550250643e-07, "loss": 0.2052, "step": 38642 }, { "epoch": 0.6717133967216534, "grad_norm": 0.9011206994942449, "learning_rate": 2.569919240094241e-07, "loss": 0.1861, "step": 38643 }, { "epoch": 0.6717307792591563, "grad_norm": 1.594258119344956, "learning_rate": 2.5696732328656573e-07, "loss": 0.363, "step": 38644 }, { "epoch": 0.6717481617966591, "grad_norm": 1.4826755966171712, "learning_rate": 2.5694272333400913e-07, "loss": 0.2193, "step": 38645 }, { "epoch": 0.6717655443341619, "grad_norm": 1.153418161268103, "learning_rate": 2.5691812415183264e-07, "loss": 0.2194, "step": 38646 }, { "epoch": 0.6717829268716647, "grad_norm": 1.3954676007736777, "learning_rate": 2.568935257401142e-07, "loss": 0.3453, "step": 38647 }, { "epoch": 0.6718003094091676, "grad_norm": 1.1856909001812999, "learning_rate": 2.5686892809893125e-07, "loss": 0.1357, "step": 38648 }, { "epoch": 0.6718176919466704, "grad_norm": 2.312599788107007, "learning_rate": 2.5684433122836226e-07, "loss": 0.2001, "step": 38649 }, { "epoch": 0.6718350744841732, "grad_norm": 1.6204111004956272, "learning_rate": 2.568197351284851e-07, "loss": 0.1969, "step": 38650 }, { "epoch": 0.6718524570216761, "grad_norm": 1.5068631950842764, "learning_rate": 2.567951397993776e-07, "loss": 0.1939, "step": 38651 }, { "epoch": 0.6718698395591789, "grad_norm": 1.5843868629956397, "learning_rate": 2.5677054524111784e-07, "loss": 0.2025, "step": 38652 }, { "epoch": 0.6718872220966817, "grad_norm": 1.1693711503529378, "learning_rate": 2.5674595145378376e-07, "loss": 0.2921, "step": 38653 }, { "epoch": 0.6719046046341844, "grad_norm": 1.319009789237788, "learning_rate": 2.5672135843745317e-07, "loss": 0.2975, "step": 38654 }, { "epoch": 0.6719219871716873, "grad_norm": 1.0956335313669787, "learning_rate": 2.566967661922042e-07, "loss": 0.1882, "step": 38655 }, { "epoch": 0.6719393697091901, "grad_norm": 2.498697515294252, "learning_rate": 2.566721747181145e-07, "loss": 0.2231, "step": 38656 }, { "epoch": 0.6719567522466929, "grad_norm": 3.816938321104513, "learning_rate": 2.566475840152624e-07, "loss": 0.4675, "step": 38657 }, { "epoch": 0.6719741347841958, "grad_norm": 1.1476764983531427, "learning_rate": 2.5662299408372575e-07, "loss": 0.3082, "step": 38658 }, { "epoch": 0.6719915173216986, "grad_norm": 2.9864359333400134, "learning_rate": 2.565984049235823e-07, "loss": 0.3181, "step": 38659 }, { "epoch": 0.6720088998592014, "grad_norm": 1.5511512219055819, "learning_rate": 2.5657381653491015e-07, "loss": 0.2161, "step": 38660 }, { "epoch": 0.6720262823967043, "grad_norm": 1.3262391624613783, "learning_rate": 2.565492289177872e-07, "loss": 0.1345, "step": 38661 }, { "epoch": 0.6720436649342071, "grad_norm": 1.2434536773662475, "learning_rate": 2.565246420722914e-07, "loss": 0.199, "step": 38662 }, { "epoch": 0.6720610474717099, "grad_norm": 1.2239778144037525, "learning_rate": 2.5650005599850053e-07, "loss": 0.1129, "step": 38663 }, { "epoch": 0.6720784300092127, "grad_norm": 2.0251115014010397, "learning_rate": 2.564754706964925e-07, "loss": 0.1957, "step": 38664 }, { "epoch": 0.6720958125467156, "grad_norm": 1.0024907345092289, "learning_rate": 2.564508861663456e-07, "loss": 0.197, "step": 38665 }, { "epoch": 0.6721131950842184, "grad_norm": 0.9997239450201131, "learning_rate": 2.5642630240813764e-07, "loss": 0.1927, "step": 38666 }, { "epoch": 0.6721305776217212, "grad_norm": 2.1279548104823713, "learning_rate": 2.564017194219462e-07, "loss": 0.2545, "step": 38667 }, { "epoch": 0.6721479601592241, "grad_norm": 1.127513118165133, "learning_rate": 2.5637713720784925e-07, "loss": 0.2801, "step": 38668 }, { "epoch": 0.6721653426967269, "grad_norm": 2.4729991244122314, "learning_rate": 2.563525557659251e-07, "loss": 0.5865, "step": 38669 }, { "epoch": 0.6721827252342297, "grad_norm": 1.754414107343179, "learning_rate": 2.5632797509625135e-07, "loss": 0.2377, "step": 38670 }, { "epoch": 0.6722001077717326, "grad_norm": 3.0924137908375307, "learning_rate": 2.5630339519890596e-07, "loss": 0.2163, "step": 38671 }, { "epoch": 0.6722174903092354, "grad_norm": 2.0014646917229286, "learning_rate": 2.5627881607396695e-07, "loss": 0.2126, "step": 38672 }, { "epoch": 0.6722348728467382, "grad_norm": 1.810895601479885, "learning_rate": 2.562542377215121e-07, "loss": 0.1649, "step": 38673 }, { "epoch": 0.6722522553842409, "grad_norm": 1.3334797902520712, "learning_rate": 2.562296601416194e-07, "loss": 0.228, "step": 38674 }, { "epoch": 0.6722696379217438, "grad_norm": 2.433949286691027, "learning_rate": 2.562050833343666e-07, "loss": 0.2809, "step": 38675 }, { "epoch": 0.6722870204592466, "grad_norm": 1.8496234845149884, "learning_rate": 2.561805072998316e-07, "loss": 0.2075, "step": 38676 }, { "epoch": 0.6723044029967494, "grad_norm": 1.7498493010421492, "learning_rate": 2.5615593203809255e-07, "loss": 0.1852, "step": 38677 }, { "epoch": 0.6723217855342523, "grad_norm": 2.2876789313888923, "learning_rate": 2.5613135754922734e-07, "loss": 0.2433, "step": 38678 }, { "epoch": 0.6723391680717551, "grad_norm": 2.191177521037103, "learning_rate": 2.5610678383331335e-07, "loss": 0.2935, "step": 38679 }, { "epoch": 0.6723565506092579, "grad_norm": 2.242438460784634, "learning_rate": 2.56082210890429e-07, "loss": 0.2111, "step": 38680 }, { "epoch": 0.6723739331467607, "grad_norm": 1.855264046891719, "learning_rate": 2.56057638720652e-07, "loss": 0.2503, "step": 38681 }, { "epoch": 0.6723913156842636, "grad_norm": 3.2245706026586185, "learning_rate": 2.5603306732406025e-07, "loss": 0.2272, "step": 38682 }, { "epoch": 0.6724086982217664, "grad_norm": 1.8361818970698074, "learning_rate": 2.5600849670073154e-07, "loss": 0.1443, "step": 38683 }, { "epoch": 0.6724260807592692, "grad_norm": 1.0273262235629286, "learning_rate": 2.5598392685074363e-07, "loss": 0.2902, "step": 38684 }, { "epoch": 0.6724434632967721, "grad_norm": 2.5614835537894014, "learning_rate": 2.559593577741751e-07, "loss": 0.1798, "step": 38685 }, { "epoch": 0.6724608458342749, "grad_norm": 1.7382723864124057, "learning_rate": 2.55934789471103e-07, "loss": 0.2705, "step": 38686 }, { "epoch": 0.6724782283717777, "grad_norm": 1.4187160572592723, "learning_rate": 2.5591022194160526e-07, "loss": 0.2193, "step": 38687 }, { "epoch": 0.6724956109092806, "grad_norm": 1.487387969270986, "learning_rate": 2.5588565518576027e-07, "loss": 0.2434, "step": 38688 }, { "epoch": 0.6725129934467834, "grad_norm": 2.343217218729821, "learning_rate": 2.5586108920364556e-07, "loss": 0.2407, "step": 38689 }, { "epoch": 0.6725303759842862, "grad_norm": 1.1975140247529235, "learning_rate": 2.558365239953392e-07, "loss": 0.1876, "step": 38690 }, { "epoch": 0.672547758521789, "grad_norm": 1.6659031477208777, "learning_rate": 2.5581195956091874e-07, "loss": 0.2998, "step": 38691 }, { "epoch": 0.6725651410592919, "grad_norm": 1.5693948004476896, "learning_rate": 2.557873959004622e-07, "loss": 0.1466, "step": 38692 }, { "epoch": 0.6725825235967947, "grad_norm": 2.115273799957376, "learning_rate": 2.557628330140474e-07, "loss": 0.2306, "step": 38693 }, { "epoch": 0.6725999061342974, "grad_norm": 2.585184476060816, "learning_rate": 2.557382709017524e-07, "loss": 0.1781, "step": 38694 }, { "epoch": 0.6726172886718003, "grad_norm": 1.3684066887553983, "learning_rate": 2.557137095636546e-07, "loss": 0.2427, "step": 38695 }, { "epoch": 0.6726346712093031, "grad_norm": 1.827356268527117, "learning_rate": 2.556891489998323e-07, "loss": 0.1894, "step": 38696 }, { "epoch": 0.6726520537468059, "grad_norm": 1.762354369356716, "learning_rate": 2.556645892103633e-07, "loss": 0.2534, "step": 38697 }, { "epoch": 0.6726694362843088, "grad_norm": 1.547090316689131, "learning_rate": 2.556400301953252e-07, "loss": 0.2517, "step": 38698 }, { "epoch": 0.6726868188218116, "grad_norm": 1.4407291742022883, "learning_rate": 2.556154719547957e-07, "loss": 0.1382, "step": 38699 }, { "epoch": 0.6727042013593144, "grad_norm": 1.5745114913327707, "learning_rate": 2.5559091448885316e-07, "loss": 0.4857, "step": 38700 }, { "epoch": 0.6727215838968172, "grad_norm": 2.519744900631826, "learning_rate": 2.5556635779757514e-07, "loss": 0.2269, "step": 38701 }, { "epoch": 0.6727389664343201, "grad_norm": 1.6116868952955012, "learning_rate": 2.5554180188103946e-07, "loss": 0.1667, "step": 38702 }, { "epoch": 0.6727563489718229, "grad_norm": 1.9827686588072775, "learning_rate": 2.555172467393237e-07, "loss": 0.2212, "step": 38703 }, { "epoch": 0.6727737315093257, "grad_norm": 8.382653258582547, "learning_rate": 2.554926923725065e-07, "loss": 0.2812, "step": 38704 }, { "epoch": 0.6727911140468286, "grad_norm": 1.287919315553776, "learning_rate": 2.5546813878066486e-07, "loss": 0.2166, "step": 38705 }, { "epoch": 0.6728084965843314, "grad_norm": 2.0918340762856666, "learning_rate": 2.5544358596387696e-07, "loss": 0.2624, "step": 38706 }, { "epoch": 0.6728258791218342, "grad_norm": 1.1586533828725791, "learning_rate": 2.554190339222203e-07, "loss": 0.1526, "step": 38707 }, { "epoch": 0.672843261659337, "grad_norm": 1.5003740967126307, "learning_rate": 2.5539448265577314e-07, "loss": 0.2397, "step": 38708 }, { "epoch": 0.6728606441968399, "grad_norm": 1.877007159673281, "learning_rate": 2.5536993216461313e-07, "loss": 0.205, "step": 38709 }, { "epoch": 0.6728780267343427, "grad_norm": 1.5489353110445139, "learning_rate": 2.553453824488181e-07, "loss": 0.1837, "step": 38710 }, { "epoch": 0.6728954092718455, "grad_norm": 1.181278287360762, "learning_rate": 2.553208335084657e-07, "loss": 0.1682, "step": 38711 }, { "epoch": 0.6729127918093484, "grad_norm": 1.5748275947816344, "learning_rate": 2.55296285343634e-07, "loss": 0.226, "step": 38712 }, { "epoch": 0.6729301743468512, "grad_norm": 1.3623970228596682, "learning_rate": 2.552717379544006e-07, "loss": 0.1847, "step": 38713 }, { "epoch": 0.6729475568843539, "grad_norm": 1.8873554719805243, "learning_rate": 2.552471913408434e-07, "loss": 0.2422, "step": 38714 }, { "epoch": 0.6729649394218568, "grad_norm": 0.7685509507149149, "learning_rate": 2.5522264550303994e-07, "loss": 0.1684, "step": 38715 }, { "epoch": 0.6729823219593596, "grad_norm": 3.04195351072319, "learning_rate": 2.5519810044106864e-07, "loss": 0.2187, "step": 38716 }, { "epoch": 0.6729997044968624, "grad_norm": 1.5180032035577615, "learning_rate": 2.551735561550067e-07, "loss": 0.1987, "step": 38717 }, { "epoch": 0.6730170870343652, "grad_norm": 1.6354949368601743, "learning_rate": 2.55149012644932e-07, "loss": 0.1351, "step": 38718 }, { "epoch": 0.6730344695718681, "grad_norm": 1.8219561972348377, "learning_rate": 2.551244699109226e-07, "loss": 0.2955, "step": 38719 }, { "epoch": 0.6730518521093709, "grad_norm": 1.809594456777276, "learning_rate": 2.550999279530561e-07, "loss": 0.2417, "step": 38720 }, { "epoch": 0.6730692346468737, "grad_norm": 0.7230998837404543, "learning_rate": 2.5507538677141045e-07, "loss": 0.1564, "step": 38721 }, { "epoch": 0.6730866171843766, "grad_norm": 1.5340983222984157, "learning_rate": 2.5505084636606323e-07, "loss": 0.234, "step": 38722 }, { "epoch": 0.6731039997218794, "grad_norm": 1.1080765227549652, "learning_rate": 2.550263067370924e-07, "loss": 0.1607, "step": 38723 }, { "epoch": 0.6731213822593822, "grad_norm": 1.0805612507380555, "learning_rate": 2.550017678845755e-07, "loss": 0.1957, "step": 38724 }, { "epoch": 0.6731387647968851, "grad_norm": 1.135228598214887, "learning_rate": 2.5497722980859055e-07, "loss": 0.2243, "step": 38725 }, { "epoch": 0.6731561473343879, "grad_norm": 1.023559862624601, "learning_rate": 2.5495269250921505e-07, "loss": 0.1797, "step": 38726 }, { "epoch": 0.6731735298718907, "grad_norm": 1.7803198563618228, "learning_rate": 2.5492815598652704e-07, "loss": 0.2425, "step": 38727 }, { "epoch": 0.6731909124093935, "grad_norm": 1.7676004161025856, "learning_rate": 2.549036202406043e-07, "loss": 0.2629, "step": 38728 }, { "epoch": 0.6732082949468964, "grad_norm": 1.7699357526692094, "learning_rate": 2.5487908527152444e-07, "loss": 0.3044, "step": 38729 }, { "epoch": 0.6732256774843992, "grad_norm": 1.6670423067622817, "learning_rate": 2.5485455107936527e-07, "loss": 0.3427, "step": 38730 }, { "epoch": 0.673243060021902, "grad_norm": 1.3427075523471863, "learning_rate": 2.5483001766420467e-07, "loss": 0.1569, "step": 38731 }, { "epoch": 0.6732604425594049, "grad_norm": 1.9258946875274447, "learning_rate": 2.5480548502612014e-07, "loss": 0.3168, "step": 38732 }, { "epoch": 0.6732778250969077, "grad_norm": 0.8142316824966451, "learning_rate": 2.5478095316518966e-07, "loss": 0.227, "step": 38733 }, { "epoch": 0.6732952076344104, "grad_norm": 1.2874954405518055, "learning_rate": 2.5475642208149074e-07, "loss": 0.1808, "step": 38734 }, { "epoch": 0.6733125901719133, "grad_norm": 1.3804845326121702, "learning_rate": 2.547318917751017e-07, "loss": 0.2245, "step": 38735 }, { "epoch": 0.6733299727094161, "grad_norm": 2.7669143528369924, "learning_rate": 2.547073622460997e-07, "loss": 0.4354, "step": 38736 }, { "epoch": 0.6733473552469189, "grad_norm": 4.834395516450093, "learning_rate": 2.5468283349456243e-07, "loss": 0.1929, "step": 38737 }, { "epoch": 0.6733647377844217, "grad_norm": 3.4319753424619743, "learning_rate": 2.546583055205681e-07, "loss": 0.2737, "step": 38738 }, { "epoch": 0.6733821203219246, "grad_norm": 1.2925370323380372, "learning_rate": 2.546337783241943e-07, "loss": 0.1361, "step": 38739 }, { "epoch": 0.6733995028594274, "grad_norm": 1.6279778511883436, "learning_rate": 2.546092519055186e-07, "loss": 0.2843, "step": 38740 }, { "epoch": 0.6734168853969302, "grad_norm": 2.3338718857933927, "learning_rate": 2.5458472626461895e-07, "loss": 0.1735, "step": 38741 }, { "epoch": 0.6734342679344331, "grad_norm": 1.6841935556296992, "learning_rate": 2.54560201401573e-07, "loss": 0.1609, "step": 38742 }, { "epoch": 0.6734516504719359, "grad_norm": 1.3935081348661875, "learning_rate": 2.545356773164584e-07, "loss": 0.1394, "step": 38743 }, { "epoch": 0.6734690330094387, "grad_norm": 1.6971866329097225, "learning_rate": 2.5451115400935296e-07, "loss": 0.2962, "step": 38744 }, { "epoch": 0.6734864155469416, "grad_norm": 1.3397347081183428, "learning_rate": 2.544866314803342e-07, "loss": 0.2358, "step": 38745 }, { "epoch": 0.6735037980844444, "grad_norm": 1.8825038486218946, "learning_rate": 2.544621097294802e-07, "loss": 0.2296, "step": 38746 }, { "epoch": 0.6735211806219472, "grad_norm": 2.1171725777106927, "learning_rate": 2.544375887568686e-07, "loss": 0.182, "step": 38747 }, { "epoch": 0.67353856315945, "grad_norm": 0.7737676890747454, "learning_rate": 2.544130685625772e-07, "loss": 0.1291, "step": 38748 }, { "epoch": 0.6735559456969529, "grad_norm": 2.2383272889084327, "learning_rate": 2.543885491466831e-07, "loss": 0.2331, "step": 38749 }, { "epoch": 0.6735733282344557, "grad_norm": 1.6006948410947532, "learning_rate": 2.5436403050926476e-07, "loss": 0.1835, "step": 38750 }, { "epoch": 0.6735907107719585, "grad_norm": 1.7375302042448675, "learning_rate": 2.5433951265039956e-07, "loss": 0.1598, "step": 38751 }, { "epoch": 0.6736080933094614, "grad_norm": 2.018829618113503, "learning_rate": 2.543149955701653e-07, "loss": 0.2013, "step": 38752 }, { "epoch": 0.6736254758469642, "grad_norm": 2.513611735810757, "learning_rate": 2.5429047926863944e-07, "loss": 0.2754, "step": 38753 }, { "epoch": 0.6736428583844669, "grad_norm": 1.5818867607529452, "learning_rate": 2.5426596374590026e-07, "loss": 0.216, "step": 38754 }, { "epoch": 0.6736602409219697, "grad_norm": 1.3886267728343293, "learning_rate": 2.5424144900202493e-07, "loss": 0.1809, "step": 38755 }, { "epoch": 0.6736776234594726, "grad_norm": 2.8590467459273445, "learning_rate": 2.542169350370913e-07, "loss": 0.3133, "step": 38756 }, { "epoch": 0.6736950059969754, "grad_norm": 1.690680262633081, "learning_rate": 2.5419242185117687e-07, "loss": 0.2737, "step": 38757 }, { "epoch": 0.6737123885344782, "grad_norm": 2.944036613089541, "learning_rate": 2.541679094443597e-07, "loss": 0.2924, "step": 38758 }, { "epoch": 0.6737297710719811, "grad_norm": 2.0287408723610905, "learning_rate": 2.5414339781671744e-07, "loss": 0.2859, "step": 38759 }, { "epoch": 0.6737471536094839, "grad_norm": 2.2793923160853087, "learning_rate": 2.541188869683276e-07, "loss": 0.2123, "step": 38760 }, { "epoch": 0.6737645361469867, "grad_norm": 1.1913662313258853, "learning_rate": 2.5409437689926795e-07, "loss": 0.2855, "step": 38761 }, { "epoch": 0.6737819186844896, "grad_norm": 1.191757780267391, "learning_rate": 2.540698676096161e-07, "loss": 0.1793, "step": 38762 }, { "epoch": 0.6737993012219924, "grad_norm": 1.676466508505818, "learning_rate": 2.540453590994499e-07, "loss": 0.2451, "step": 38763 }, { "epoch": 0.6738166837594952, "grad_norm": 3.303894319383774, "learning_rate": 2.540208513688469e-07, "loss": 0.2391, "step": 38764 }, { "epoch": 0.673834066296998, "grad_norm": 1.9846479697598325, "learning_rate": 2.5399634441788465e-07, "loss": 0.185, "step": 38765 }, { "epoch": 0.6738514488345009, "grad_norm": 1.959451555746831, "learning_rate": 2.5397183824664116e-07, "loss": 0.2212, "step": 38766 }, { "epoch": 0.6738688313720037, "grad_norm": 1.3671194774123407, "learning_rate": 2.539473328551941e-07, "loss": 0.1847, "step": 38767 }, { "epoch": 0.6738862139095065, "grad_norm": 1.6343882527972806, "learning_rate": 2.539228282436205e-07, "loss": 0.2523, "step": 38768 }, { "epoch": 0.6739035964470094, "grad_norm": 2.0283967725674614, "learning_rate": 2.538983244119988e-07, "loss": 0.2366, "step": 38769 }, { "epoch": 0.6739209789845122, "grad_norm": 1.0526742901423034, "learning_rate": 2.5387382136040635e-07, "loss": 0.2458, "step": 38770 }, { "epoch": 0.673938361522015, "grad_norm": 14.686575626123696, "learning_rate": 2.538493190889208e-07, "loss": 0.2432, "step": 38771 }, { "epoch": 0.6739557440595179, "grad_norm": 1.9074760093043366, "learning_rate": 2.5382481759761984e-07, "loss": 0.2694, "step": 38772 }, { "epoch": 0.6739731265970206, "grad_norm": 1.9466159750985614, "learning_rate": 2.538003168865811e-07, "loss": 0.2314, "step": 38773 }, { "epoch": 0.6739905091345234, "grad_norm": 1.244001329801044, "learning_rate": 2.5377581695588235e-07, "loss": 0.2753, "step": 38774 }, { "epoch": 0.6740078916720262, "grad_norm": 1.6712905113215148, "learning_rate": 2.537513178056011e-07, "loss": 0.1527, "step": 38775 }, { "epoch": 0.6740252742095291, "grad_norm": 2.079301398897199, "learning_rate": 2.537268194358148e-07, "loss": 0.2434, "step": 38776 }, { "epoch": 0.6740426567470319, "grad_norm": 1.841002365013848, "learning_rate": 2.537023218466016e-07, "loss": 0.246, "step": 38777 }, { "epoch": 0.6740600392845347, "grad_norm": 1.3397556398834713, "learning_rate": 2.536778250380389e-07, "loss": 0.1506, "step": 38778 }, { "epoch": 0.6740774218220376, "grad_norm": 1.5283581826704231, "learning_rate": 2.5365332901020437e-07, "loss": 0.2079, "step": 38779 }, { "epoch": 0.6740948043595404, "grad_norm": 1.4255645960977807, "learning_rate": 2.5362883376317555e-07, "loss": 0.1763, "step": 38780 }, { "epoch": 0.6741121868970432, "grad_norm": 1.9394468459699012, "learning_rate": 2.536043392970302e-07, "loss": 0.3644, "step": 38781 }, { "epoch": 0.674129569434546, "grad_norm": 1.4630996567454655, "learning_rate": 2.53579845611846e-07, "loss": 0.2097, "step": 38782 }, { "epoch": 0.6741469519720489, "grad_norm": 2.024493319924869, "learning_rate": 2.5355535270770034e-07, "loss": 0.2275, "step": 38783 }, { "epoch": 0.6741643345095517, "grad_norm": 1.2930841104204038, "learning_rate": 2.5353086058467086e-07, "loss": 0.1947, "step": 38784 }, { "epoch": 0.6741817170470545, "grad_norm": 1.4491376253819592, "learning_rate": 2.5350636924283557e-07, "loss": 0.1766, "step": 38785 }, { "epoch": 0.6741990995845574, "grad_norm": 1.2712458001072586, "learning_rate": 2.53481878682272e-07, "loss": 0.2348, "step": 38786 }, { "epoch": 0.6742164821220602, "grad_norm": 0.8670759020890039, "learning_rate": 2.534573889030575e-07, "loss": 0.1753, "step": 38787 }, { "epoch": 0.674233864659563, "grad_norm": 2.138136699104101, "learning_rate": 2.5343289990526963e-07, "loss": 0.3625, "step": 38788 }, { "epoch": 0.6742512471970659, "grad_norm": 2.0002852258094688, "learning_rate": 2.534084116889863e-07, "loss": 0.2196, "step": 38789 }, { "epoch": 0.6742686297345687, "grad_norm": 2.6718185093283755, "learning_rate": 2.533839242542851e-07, "loss": 0.2296, "step": 38790 }, { "epoch": 0.6742860122720715, "grad_norm": 1.2172725835433509, "learning_rate": 2.533594376012436e-07, "loss": 0.2326, "step": 38791 }, { "epoch": 0.6743033948095744, "grad_norm": 1.1533281824973027, "learning_rate": 2.5333495172993934e-07, "loss": 0.2924, "step": 38792 }, { "epoch": 0.6743207773470771, "grad_norm": 0.6610900080847246, "learning_rate": 2.5331046664044996e-07, "loss": 0.1707, "step": 38793 }, { "epoch": 0.6743381598845799, "grad_norm": 2.3092918350953195, "learning_rate": 2.5328598233285313e-07, "loss": 0.1472, "step": 38794 }, { "epoch": 0.6743555424220827, "grad_norm": 0.8030578696375384, "learning_rate": 2.532614988072264e-07, "loss": 0.1642, "step": 38795 }, { "epoch": 0.6743729249595856, "grad_norm": 1.1917896468947644, "learning_rate": 2.5323701606364715e-07, "loss": 0.1595, "step": 38796 }, { "epoch": 0.6743903074970884, "grad_norm": 1.2248126482967574, "learning_rate": 2.532125341021935e-07, "loss": 0.165, "step": 38797 }, { "epoch": 0.6744076900345912, "grad_norm": 0.6607658482634859, "learning_rate": 2.5318805292294275e-07, "loss": 0.1746, "step": 38798 }, { "epoch": 0.674425072572094, "grad_norm": 1.4549924342292115, "learning_rate": 2.5316357252597227e-07, "loss": 0.1048, "step": 38799 }, { "epoch": 0.6744424551095969, "grad_norm": 1.1125139774528363, "learning_rate": 2.5313909291136006e-07, "loss": 0.1474, "step": 38800 }, { "epoch": 0.6744598376470997, "grad_norm": 1.3651571535730531, "learning_rate": 2.531146140791834e-07, "loss": 0.2572, "step": 38801 }, { "epoch": 0.6744772201846025, "grad_norm": 1.5275503121249498, "learning_rate": 2.530901360295201e-07, "loss": 0.2634, "step": 38802 }, { "epoch": 0.6744946027221054, "grad_norm": 1.395080402969226, "learning_rate": 2.5306565876244766e-07, "loss": 0.3034, "step": 38803 }, { "epoch": 0.6745119852596082, "grad_norm": 1.2351021506653876, "learning_rate": 2.5304118227804343e-07, "loss": 0.1223, "step": 38804 }, { "epoch": 0.674529367797111, "grad_norm": 1.3715091744510222, "learning_rate": 2.5301670657638563e-07, "loss": 0.1906, "step": 38805 }, { "epoch": 0.6745467503346139, "grad_norm": 0.8224613613374933, "learning_rate": 2.529922316575512e-07, "loss": 0.236, "step": 38806 }, { "epoch": 0.6745641328721167, "grad_norm": 3.4205751115076524, "learning_rate": 2.529677575216177e-07, "loss": 0.2176, "step": 38807 }, { "epoch": 0.6745815154096195, "grad_norm": 0.9026056453260545, "learning_rate": 2.5294328416866315e-07, "loss": 0.1622, "step": 38808 }, { "epoch": 0.6745988979471224, "grad_norm": 1.1939566268227018, "learning_rate": 2.5291881159876494e-07, "loss": 0.1749, "step": 38809 }, { "epoch": 0.6746162804846252, "grad_norm": 1.224753147137571, "learning_rate": 2.528943398120005e-07, "loss": 0.1323, "step": 38810 }, { "epoch": 0.674633663022128, "grad_norm": 1.369814742476974, "learning_rate": 2.528698688084476e-07, "loss": 0.2323, "step": 38811 }, { "epoch": 0.6746510455596308, "grad_norm": 2.4013528126030113, "learning_rate": 2.528453985881837e-07, "loss": 0.2363, "step": 38812 }, { "epoch": 0.6746684280971336, "grad_norm": 1.4057609660844486, "learning_rate": 2.528209291512862e-07, "loss": 0.1942, "step": 38813 }, { "epoch": 0.6746858106346364, "grad_norm": 1.4463253880459255, "learning_rate": 2.5279646049783293e-07, "loss": 0.3601, "step": 38814 }, { "epoch": 0.6747031931721392, "grad_norm": 2.0937413154101083, "learning_rate": 2.527719926279011e-07, "loss": 0.2038, "step": 38815 }, { "epoch": 0.6747205757096421, "grad_norm": 1.3519520093756823, "learning_rate": 2.527475255415687e-07, "loss": 0.1413, "step": 38816 }, { "epoch": 0.6747379582471449, "grad_norm": 1.2401453085365575, "learning_rate": 2.5272305923891325e-07, "loss": 0.1678, "step": 38817 }, { "epoch": 0.6747553407846477, "grad_norm": 2.036393278223331, "learning_rate": 2.526985937200119e-07, "loss": 0.2292, "step": 38818 }, { "epoch": 0.6747727233221505, "grad_norm": 0.9538354356968912, "learning_rate": 2.5267412898494224e-07, "loss": 0.2426, "step": 38819 }, { "epoch": 0.6747901058596534, "grad_norm": 1.5047656272820358, "learning_rate": 2.5264966503378214e-07, "loss": 0.1935, "step": 38820 }, { "epoch": 0.6748074883971562, "grad_norm": 1.9145293981320153, "learning_rate": 2.5262520186660906e-07, "loss": 0.2373, "step": 38821 }, { "epoch": 0.674824870934659, "grad_norm": 4.424646555714531, "learning_rate": 2.5260073948350034e-07, "loss": 0.3537, "step": 38822 }, { "epoch": 0.6748422534721619, "grad_norm": 1.205103477120118, "learning_rate": 2.5257627788453353e-07, "loss": 0.1829, "step": 38823 }, { "epoch": 0.6748596360096647, "grad_norm": 1.1076946659206388, "learning_rate": 2.525518170697866e-07, "loss": 0.2546, "step": 38824 }, { "epoch": 0.6748770185471675, "grad_norm": 1.4828947230531542, "learning_rate": 2.525273570393366e-07, "loss": 0.2531, "step": 38825 }, { "epoch": 0.6748944010846704, "grad_norm": 2.039272705060456, "learning_rate": 2.5250289779326096e-07, "loss": 0.3171, "step": 38826 }, { "epoch": 0.6749117836221732, "grad_norm": 1.573945294455955, "learning_rate": 2.524784393316376e-07, "loss": 0.1706, "step": 38827 }, { "epoch": 0.674929166159676, "grad_norm": 1.0098360348263051, "learning_rate": 2.5245398165454397e-07, "loss": 0.0919, "step": 38828 }, { "epoch": 0.6749465486971788, "grad_norm": 1.1724536530725433, "learning_rate": 2.524295247620575e-07, "loss": 0.1892, "step": 38829 }, { "epoch": 0.6749639312346817, "grad_norm": 1.7709374775623445, "learning_rate": 2.524050686542556e-07, "loss": 0.2074, "step": 38830 }, { "epoch": 0.6749813137721845, "grad_norm": 0.992194272558505, "learning_rate": 2.5238061333121597e-07, "loss": 0.1811, "step": 38831 }, { "epoch": 0.6749986963096873, "grad_norm": 1.7179078424818746, "learning_rate": 2.5235615879301607e-07, "loss": 0.1578, "step": 38832 }, { "epoch": 0.6750160788471901, "grad_norm": 3.2714228198895046, "learning_rate": 2.5233170503973334e-07, "loss": 0.3278, "step": 38833 }, { "epoch": 0.6750334613846929, "grad_norm": 1.3430590374354208, "learning_rate": 2.523072520714452e-07, "loss": 0.1561, "step": 38834 }, { "epoch": 0.6750508439221957, "grad_norm": 1.8295523022620783, "learning_rate": 2.522827998882294e-07, "loss": 0.2352, "step": 38835 }, { "epoch": 0.6750682264596986, "grad_norm": 1.0415248209188235, "learning_rate": 2.5225834849016357e-07, "loss": 0.2547, "step": 38836 }, { "epoch": 0.6750856089972014, "grad_norm": 1.600976501221397, "learning_rate": 2.5223389787732483e-07, "loss": 0.1732, "step": 38837 }, { "epoch": 0.6751029915347042, "grad_norm": 1.5567148628192502, "learning_rate": 2.522094480497906e-07, "loss": 0.1705, "step": 38838 }, { "epoch": 0.675120374072207, "grad_norm": 2.5861239706263213, "learning_rate": 2.521849990076388e-07, "loss": 0.1882, "step": 38839 }, { "epoch": 0.6751377566097099, "grad_norm": 0.9421318813335942, "learning_rate": 2.521605507509467e-07, "loss": 0.2547, "step": 38840 }, { "epoch": 0.6751551391472127, "grad_norm": 1.6609237799839225, "learning_rate": 2.5213610327979174e-07, "loss": 0.2028, "step": 38841 }, { "epoch": 0.6751725216847155, "grad_norm": 2.8980991530656284, "learning_rate": 2.521116565942514e-07, "loss": 0.2252, "step": 38842 }, { "epoch": 0.6751899042222184, "grad_norm": 1.7246281905709533, "learning_rate": 2.520872106944035e-07, "loss": 0.2622, "step": 38843 }, { "epoch": 0.6752072867597212, "grad_norm": 1.0685239441664245, "learning_rate": 2.520627655803251e-07, "loss": 0.1642, "step": 38844 }, { "epoch": 0.675224669297224, "grad_norm": 2.1161308577701026, "learning_rate": 2.520383212520939e-07, "loss": 0.2291, "step": 38845 }, { "epoch": 0.6752420518347269, "grad_norm": 1.3362679213573516, "learning_rate": 2.520138777097871e-07, "loss": 0.3485, "step": 38846 }, { "epoch": 0.6752594343722297, "grad_norm": 1.5462871497597923, "learning_rate": 2.519894349534826e-07, "loss": 0.1652, "step": 38847 }, { "epoch": 0.6752768169097325, "grad_norm": 3.556079356795907, "learning_rate": 2.519649929832576e-07, "loss": 0.2082, "step": 38848 }, { "epoch": 0.6752941994472353, "grad_norm": 2.091268649114747, "learning_rate": 2.5194055179918967e-07, "loss": 0.2254, "step": 38849 }, { "epoch": 0.6753115819847382, "grad_norm": 1.0020370580537585, "learning_rate": 2.5191611140135626e-07, "loss": 0.1201, "step": 38850 }, { "epoch": 0.675328964522241, "grad_norm": 1.686386303420359, "learning_rate": 2.5189167178983476e-07, "loss": 0.2326, "step": 38851 }, { "epoch": 0.6753463470597438, "grad_norm": 2.0422874420654344, "learning_rate": 2.5186723296470277e-07, "loss": 0.1978, "step": 38852 }, { "epoch": 0.6753637295972466, "grad_norm": 1.3837410187720005, "learning_rate": 2.518427949260376e-07, "loss": 0.198, "step": 38853 }, { "epoch": 0.6753811121347494, "grad_norm": 2.4216947159241906, "learning_rate": 2.5181835767391663e-07, "loss": 0.2183, "step": 38854 }, { "epoch": 0.6753984946722522, "grad_norm": 3.303665491021903, "learning_rate": 2.517939212084178e-07, "loss": 0.4328, "step": 38855 }, { "epoch": 0.675415877209755, "grad_norm": 1.4643298736457335, "learning_rate": 2.5176948552961807e-07, "loss": 0.2567, "step": 38856 }, { "epoch": 0.6754332597472579, "grad_norm": 1.2203023797024208, "learning_rate": 2.517450506375949e-07, "loss": 0.28, "step": 38857 }, { "epoch": 0.6754506422847607, "grad_norm": 1.4367914887759812, "learning_rate": 2.5172061653242595e-07, "loss": 0.294, "step": 38858 }, { "epoch": 0.6754680248222635, "grad_norm": 1.7461708663452922, "learning_rate": 2.516961832141887e-07, "loss": 0.263, "step": 38859 }, { "epoch": 0.6754854073597664, "grad_norm": 1.5679146824101275, "learning_rate": 2.516717506829604e-07, "loss": 0.1645, "step": 38860 }, { "epoch": 0.6755027898972692, "grad_norm": 1.2593013230709342, "learning_rate": 2.516473189388186e-07, "loss": 0.3559, "step": 38861 }, { "epoch": 0.675520172434772, "grad_norm": 1.0765002037355262, "learning_rate": 2.516228879818408e-07, "loss": 0.2427, "step": 38862 }, { "epoch": 0.6755375549722749, "grad_norm": 1.656028757666369, "learning_rate": 2.5159845781210424e-07, "loss": 0.189, "step": 38863 }, { "epoch": 0.6755549375097777, "grad_norm": 2.830680758190811, "learning_rate": 2.5157402842968655e-07, "loss": 0.3166, "step": 38864 }, { "epoch": 0.6755723200472805, "grad_norm": 2.132860662293542, "learning_rate": 2.5154959983466486e-07, "loss": 0.244, "step": 38865 }, { "epoch": 0.6755897025847833, "grad_norm": 1.2820559868598644, "learning_rate": 2.51525172027117e-07, "loss": 0.2336, "step": 38866 }, { "epoch": 0.6756070851222862, "grad_norm": 1.37180932514221, "learning_rate": 2.5150074500712016e-07, "loss": 0.2107, "step": 38867 }, { "epoch": 0.675624467659789, "grad_norm": 3.0517747163176105, "learning_rate": 2.51476318774752e-07, "loss": 0.2658, "step": 38868 }, { "epoch": 0.6756418501972918, "grad_norm": 1.4953113961902074, "learning_rate": 2.514518933300894e-07, "loss": 0.1386, "step": 38869 }, { "epoch": 0.6756592327347947, "grad_norm": 1.1391274736607377, "learning_rate": 2.514274686732103e-07, "loss": 0.1765, "step": 38870 }, { "epoch": 0.6756766152722975, "grad_norm": 1.1575312325315188, "learning_rate": 2.5140304480419195e-07, "loss": 0.165, "step": 38871 }, { "epoch": 0.6756939978098003, "grad_norm": 1.7733551355491672, "learning_rate": 2.5137862172311175e-07, "loss": 0.2017, "step": 38872 }, { "epoch": 0.675711380347303, "grad_norm": 1.8262305522247542, "learning_rate": 2.5135419943004686e-07, "loss": 0.1885, "step": 38873 }, { "epoch": 0.6757287628848059, "grad_norm": 1.202909006908761, "learning_rate": 2.5132977792507544e-07, "loss": 0.1658, "step": 38874 }, { "epoch": 0.6757461454223087, "grad_norm": 1.6277499161771822, "learning_rate": 2.5130535720827416e-07, "loss": 0.2946, "step": 38875 }, { "epoch": 0.6757635279598115, "grad_norm": 1.3062747541074153, "learning_rate": 2.512809372797206e-07, "loss": 0.1427, "step": 38876 }, { "epoch": 0.6757809104973144, "grad_norm": 1.9237342928741727, "learning_rate": 2.512565181394921e-07, "loss": 0.27, "step": 38877 }, { "epoch": 0.6757982930348172, "grad_norm": 1.4954264500807513, "learning_rate": 2.512320997876663e-07, "loss": 0.1602, "step": 38878 }, { "epoch": 0.67581567557232, "grad_norm": 1.661605220861888, "learning_rate": 2.512076822243205e-07, "loss": 0.1516, "step": 38879 }, { "epoch": 0.6758330581098229, "grad_norm": 1.1900118483450994, "learning_rate": 2.5118326544953207e-07, "loss": 0.2255, "step": 38880 }, { "epoch": 0.6758504406473257, "grad_norm": 0.9995803910897905, "learning_rate": 2.511588494633784e-07, "loss": 0.1485, "step": 38881 }, { "epoch": 0.6758678231848285, "grad_norm": 1.0424345486772997, "learning_rate": 2.5113443426593684e-07, "loss": 0.1702, "step": 38882 }, { "epoch": 0.6758852057223314, "grad_norm": 2.2807569805990076, "learning_rate": 2.511100198572848e-07, "loss": 0.2075, "step": 38883 }, { "epoch": 0.6759025882598342, "grad_norm": 1.405912971862559, "learning_rate": 2.510856062374997e-07, "loss": 0.1685, "step": 38884 }, { "epoch": 0.675919970797337, "grad_norm": 1.0085552439898071, "learning_rate": 2.510611934066587e-07, "loss": 0.1281, "step": 38885 }, { "epoch": 0.6759373533348398, "grad_norm": 2.259400881570239, "learning_rate": 2.510367813648395e-07, "loss": 0.2587, "step": 38886 }, { "epoch": 0.6759547358723427, "grad_norm": 1.2717825660370181, "learning_rate": 2.5101237011211957e-07, "loss": 0.2032, "step": 38887 }, { "epoch": 0.6759721184098455, "grad_norm": 2.4021400265522703, "learning_rate": 2.509879596485757e-07, "loss": 0.1708, "step": 38888 }, { "epoch": 0.6759895009473483, "grad_norm": 1.450237703732152, "learning_rate": 2.509635499742857e-07, "loss": 0.206, "step": 38889 }, { "epoch": 0.6760068834848512, "grad_norm": 0.8268998811913275, "learning_rate": 2.509391410893269e-07, "loss": 0.2657, "step": 38890 }, { "epoch": 0.676024266022354, "grad_norm": 1.3759430376056727, "learning_rate": 2.509147329937766e-07, "loss": 0.2017, "step": 38891 }, { "epoch": 0.6760416485598568, "grad_norm": 1.7736979735835254, "learning_rate": 2.5089032568771225e-07, "loss": 0.1777, "step": 38892 }, { "epoch": 0.6760590310973595, "grad_norm": 2.1113414126426866, "learning_rate": 2.508659191712111e-07, "loss": 0.1888, "step": 38893 }, { "epoch": 0.6760764136348624, "grad_norm": 1.6617980597306075, "learning_rate": 2.508415134443505e-07, "loss": 0.182, "step": 38894 }, { "epoch": 0.6760937961723652, "grad_norm": 2.437927015057869, "learning_rate": 2.508171085072079e-07, "loss": 0.2627, "step": 38895 }, { "epoch": 0.676111178709868, "grad_norm": 1.584692339872973, "learning_rate": 2.507927043598603e-07, "loss": 0.2679, "step": 38896 }, { "epoch": 0.6761285612473709, "grad_norm": 1.172744913423812, "learning_rate": 2.5076830100238566e-07, "loss": 0.2022, "step": 38897 }, { "epoch": 0.6761459437848737, "grad_norm": 1.0942701255101699, "learning_rate": 2.50743898434861e-07, "loss": 0.1771, "step": 38898 }, { "epoch": 0.6761633263223765, "grad_norm": 1.3288384874011039, "learning_rate": 2.5071949665736367e-07, "loss": 0.1749, "step": 38899 }, { "epoch": 0.6761807088598794, "grad_norm": 1.4361099154001362, "learning_rate": 2.5069509566997107e-07, "loss": 0.2247, "step": 38900 }, { "epoch": 0.6761980913973822, "grad_norm": 1.7651641389148065, "learning_rate": 2.506706954727604e-07, "loss": 0.2247, "step": 38901 }, { "epoch": 0.676215473934885, "grad_norm": 1.5091803814445923, "learning_rate": 2.5064629606580916e-07, "loss": 0.1499, "step": 38902 }, { "epoch": 0.6762328564723878, "grad_norm": 1.784999516203712, "learning_rate": 2.506218974491946e-07, "loss": 0.2227, "step": 38903 }, { "epoch": 0.6762502390098907, "grad_norm": 1.7780394292846304, "learning_rate": 2.505974996229939e-07, "loss": 0.2201, "step": 38904 }, { "epoch": 0.6762676215473935, "grad_norm": 1.626516424667172, "learning_rate": 2.505731025872847e-07, "loss": 0.2685, "step": 38905 }, { "epoch": 0.6762850040848963, "grad_norm": 1.7870295243377265, "learning_rate": 2.5054870634214445e-07, "loss": 0.2025, "step": 38906 }, { "epoch": 0.6763023866223992, "grad_norm": 1.3894742705505396, "learning_rate": 2.5052431088765e-07, "loss": 0.1818, "step": 38907 }, { "epoch": 0.676319769159902, "grad_norm": 1.1000026024494256, "learning_rate": 2.504999162238787e-07, "loss": 0.2191, "step": 38908 }, { "epoch": 0.6763371516974048, "grad_norm": 2.8186211168566886, "learning_rate": 2.504755223509082e-07, "loss": 0.2296, "step": 38909 }, { "epoch": 0.6763545342349077, "grad_norm": 1.8774929544055046, "learning_rate": 2.504511292688157e-07, "loss": 0.2489, "step": 38910 }, { "epoch": 0.6763719167724105, "grad_norm": 1.6668262916778938, "learning_rate": 2.504267369776785e-07, "loss": 0.2147, "step": 38911 }, { "epoch": 0.6763892993099133, "grad_norm": 1.2297195512946677, "learning_rate": 2.5040234547757385e-07, "loss": 0.1578, "step": 38912 }, { "epoch": 0.676406681847416, "grad_norm": 3.252308557957343, "learning_rate": 2.5037795476857915e-07, "loss": 0.1945, "step": 38913 }, { "epoch": 0.6764240643849189, "grad_norm": 1.5642202613158487, "learning_rate": 2.503535648507716e-07, "loss": 0.2865, "step": 38914 }, { "epoch": 0.6764414469224217, "grad_norm": 1.198881603619906, "learning_rate": 2.5032917572422843e-07, "loss": 0.161, "step": 38915 }, { "epoch": 0.6764588294599245, "grad_norm": 1.0921986067648346, "learning_rate": 2.5030478738902726e-07, "loss": 0.1212, "step": 38916 }, { "epoch": 0.6764762119974274, "grad_norm": 1.4270413813961278, "learning_rate": 2.502803998452453e-07, "loss": 0.2567, "step": 38917 }, { "epoch": 0.6764935945349302, "grad_norm": 1.6173089256005633, "learning_rate": 2.5025601309295986e-07, "loss": 0.4353, "step": 38918 }, { "epoch": 0.676510977072433, "grad_norm": 1.7133401954741962, "learning_rate": 2.502316271322478e-07, "loss": 0.4053, "step": 38919 }, { "epoch": 0.6765283596099358, "grad_norm": 1.3832757115811742, "learning_rate": 2.5020724196318685e-07, "loss": 0.125, "step": 38920 }, { "epoch": 0.6765457421474387, "grad_norm": 1.3216763779494305, "learning_rate": 2.5018285758585433e-07, "loss": 0.2257, "step": 38921 }, { "epoch": 0.6765631246849415, "grad_norm": 1.564915967318277, "learning_rate": 2.501584740003273e-07, "loss": 0.1567, "step": 38922 }, { "epoch": 0.6765805072224443, "grad_norm": 2.25341709081782, "learning_rate": 2.50134091206683e-07, "loss": 0.1605, "step": 38923 }, { "epoch": 0.6765978897599472, "grad_norm": 0.8342726373915328, "learning_rate": 2.5010970920499903e-07, "loss": 0.1809, "step": 38924 }, { "epoch": 0.67661527229745, "grad_norm": 1.4009317292091543, "learning_rate": 2.500853279953526e-07, "loss": 0.2073, "step": 38925 }, { "epoch": 0.6766326548349528, "grad_norm": 1.8346758380580748, "learning_rate": 2.5006094757782083e-07, "loss": 0.2683, "step": 38926 }, { "epoch": 0.6766500373724557, "grad_norm": 2.3468523025714045, "learning_rate": 2.5003656795248074e-07, "loss": 0.1924, "step": 38927 }, { "epoch": 0.6766674199099585, "grad_norm": 1.8815804548428305, "learning_rate": 2.5001218911941015e-07, "loss": 0.235, "step": 38928 }, { "epoch": 0.6766848024474613, "grad_norm": 0.8503106399374027, "learning_rate": 2.499878110786861e-07, "loss": 0.1752, "step": 38929 }, { "epoch": 0.6767021849849642, "grad_norm": 1.7565057175426948, "learning_rate": 2.499634338303858e-07, "loss": 0.2082, "step": 38930 }, { "epoch": 0.676719567522467, "grad_norm": 1.263478997425766, "learning_rate": 2.499390573745866e-07, "loss": 0.182, "step": 38931 }, { "epoch": 0.6767369500599697, "grad_norm": 1.4611476830614665, "learning_rate": 2.499146817113657e-07, "loss": 0.1149, "step": 38932 }, { "epoch": 0.6767543325974725, "grad_norm": 1.8536048787168018, "learning_rate": 2.498903068408004e-07, "loss": 0.2325, "step": 38933 }, { "epoch": 0.6767717151349754, "grad_norm": 1.7303366544518004, "learning_rate": 2.4986593276296785e-07, "loss": 0.2039, "step": 38934 }, { "epoch": 0.6767890976724782, "grad_norm": 1.6353081288792082, "learning_rate": 2.498415594779453e-07, "loss": 0.2284, "step": 38935 }, { "epoch": 0.676806480209981, "grad_norm": 2.8605975543797504, "learning_rate": 2.4981718698581025e-07, "loss": 0.1734, "step": 38936 }, { "epoch": 0.6768238627474839, "grad_norm": 1.4094974220218615, "learning_rate": 2.4979281528664e-07, "loss": 0.2003, "step": 38937 }, { "epoch": 0.6768412452849867, "grad_norm": 2.455508041757751, "learning_rate": 2.4976844438051114e-07, "loss": 0.2106, "step": 38938 }, { "epoch": 0.6768586278224895, "grad_norm": 1.72828264520602, "learning_rate": 2.497440742675016e-07, "loss": 0.2573, "step": 38939 }, { "epoch": 0.6768760103599923, "grad_norm": 1.8660468930468832, "learning_rate": 2.497197049476884e-07, "loss": 0.1751, "step": 38940 }, { "epoch": 0.6768933928974952, "grad_norm": 2.139684282912818, "learning_rate": 2.4969533642114874e-07, "loss": 0.0992, "step": 38941 }, { "epoch": 0.676910775434998, "grad_norm": 2.0746422148205297, "learning_rate": 2.496709686879599e-07, "loss": 0.2489, "step": 38942 }, { "epoch": 0.6769281579725008, "grad_norm": 1.6414538662633222, "learning_rate": 2.496466017481989e-07, "loss": 0.2376, "step": 38943 }, { "epoch": 0.6769455405100037, "grad_norm": 4.176781647822771, "learning_rate": 2.496222356019436e-07, "loss": 0.346, "step": 38944 }, { "epoch": 0.6769629230475065, "grad_norm": 1.5934121062878943, "learning_rate": 2.4959787024927053e-07, "loss": 0.1776, "step": 38945 }, { "epoch": 0.6769803055850093, "grad_norm": 1.3799850461355152, "learning_rate": 2.4957350569025703e-07, "loss": 0.1708, "step": 38946 }, { "epoch": 0.6769976881225122, "grad_norm": 0.9108376505984893, "learning_rate": 2.4954914192498064e-07, "loss": 0.1366, "step": 38947 }, { "epoch": 0.677015070660015, "grad_norm": 2.1689132300205167, "learning_rate": 2.495247789535185e-07, "loss": 0.2577, "step": 38948 }, { "epoch": 0.6770324531975178, "grad_norm": 4.0725514667165195, "learning_rate": 2.495004167759477e-07, "loss": 0.4234, "step": 38949 }, { "epoch": 0.6770498357350206, "grad_norm": 2.997362932205048, "learning_rate": 2.494760553923455e-07, "loss": 0.2528, "step": 38950 }, { "epoch": 0.6770672182725235, "grad_norm": 0.9360537210084817, "learning_rate": 2.4945169480278914e-07, "loss": 0.1423, "step": 38951 }, { "epoch": 0.6770846008100262, "grad_norm": 1.0655012795061256, "learning_rate": 2.494273350073558e-07, "loss": 0.2698, "step": 38952 }, { "epoch": 0.677101983347529, "grad_norm": 0.9080152039875965, "learning_rate": 2.494029760061227e-07, "loss": 0.1884, "step": 38953 }, { "epoch": 0.6771193658850319, "grad_norm": 1.1627185433635965, "learning_rate": 2.493786177991669e-07, "loss": 0.1208, "step": 38954 }, { "epoch": 0.6771367484225347, "grad_norm": 2.2474344081964923, "learning_rate": 2.493542603865659e-07, "loss": 0.2085, "step": 38955 }, { "epoch": 0.6771541309600375, "grad_norm": 1.3610509798005832, "learning_rate": 2.4932990376839704e-07, "loss": 0.1617, "step": 38956 }, { "epoch": 0.6771715134975403, "grad_norm": 1.3789081919897332, "learning_rate": 2.493055479447369e-07, "loss": 0.2537, "step": 38957 }, { "epoch": 0.6771888960350432, "grad_norm": 1.5312205252059001, "learning_rate": 2.492811929156629e-07, "loss": 0.1884, "step": 38958 }, { "epoch": 0.677206278572546, "grad_norm": 1.5883787265925167, "learning_rate": 2.4925683868125257e-07, "loss": 0.218, "step": 38959 }, { "epoch": 0.6772236611100488, "grad_norm": 1.572594122309495, "learning_rate": 2.492324852415829e-07, "loss": 0.2851, "step": 38960 }, { "epoch": 0.6772410436475517, "grad_norm": 1.1108862170582232, "learning_rate": 2.4920813259673095e-07, "loss": 0.1614, "step": 38961 }, { "epoch": 0.6772584261850545, "grad_norm": 0.9550549849376386, "learning_rate": 2.4918378074677385e-07, "loss": 0.1681, "step": 38962 }, { "epoch": 0.6772758087225573, "grad_norm": 1.3508011370612878, "learning_rate": 2.4915942969178937e-07, "loss": 0.205, "step": 38963 }, { "epoch": 0.6772931912600602, "grad_norm": 1.5013973646458076, "learning_rate": 2.491350794318541e-07, "loss": 0.288, "step": 38964 }, { "epoch": 0.677310573797563, "grad_norm": 1.7222444963648107, "learning_rate": 2.491107299670454e-07, "loss": 0.2721, "step": 38965 }, { "epoch": 0.6773279563350658, "grad_norm": 1.7628029027181276, "learning_rate": 2.4908638129744014e-07, "loss": 0.2633, "step": 38966 }, { "epoch": 0.6773453388725686, "grad_norm": 1.2788354573632925, "learning_rate": 2.4906203342311604e-07, "loss": 0.2268, "step": 38967 }, { "epoch": 0.6773627214100715, "grad_norm": 1.2803820942054496, "learning_rate": 2.4903768634415005e-07, "loss": 0.1336, "step": 38968 }, { "epoch": 0.6773801039475743, "grad_norm": 1.717384703018389, "learning_rate": 2.490133400606193e-07, "loss": 0.241, "step": 38969 }, { "epoch": 0.6773974864850771, "grad_norm": 8.114660401417147, "learning_rate": 2.4898899457260095e-07, "loss": 0.1145, "step": 38970 }, { "epoch": 0.67741486902258, "grad_norm": 3.009379546600871, "learning_rate": 2.4896464988017216e-07, "loss": 0.2091, "step": 38971 }, { "epoch": 0.6774322515600827, "grad_norm": 1.2402668048387697, "learning_rate": 2.4894030598341023e-07, "loss": 0.3695, "step": 38972 }, { "epoch": 0.6774496340975855, "grad_norm": 0.7415076403078138, "learning_rate": 2.4891596288239207e-07, "loss": 0.1025, "step": 38973 }, { "epoch": 0.6774670166350883, "grad_norm": 2.0602312714903515, "learning_rate": 2.4889162057719483e-07, "loss": 0.3053, "step": 38974 }, { "epoch": 0.6774843991725912, "grad_norm": 1.1389246736108156, "learning_rate": 2.4886727906789616e-07, "loss": 0.1163, "step": 38975 }, { "epoch": 0.677501781710094, "grad_norm": 1.6363696118902837, "learning_rate": 2.488429383545727e-07, "loss": 0.1686, "step": 38976 }, { "epoch": 0.6775191642475968, "grad_norm": 1.0755855221899095, "learning_rate": 2.488185984373016e-07, "loss": 0.2064, "step": 38977 }, { "epoch": 0.6775365467850997, "grad_norm": 1.4934711298104053, "learning_rate": 2.4879425931616027e-07, "loss": 0.206, "step": 38978 }, { "epoch": 0.6775539293226025, "grad_norm": 1.990966245879748, "learning_rate": 2.487699209912258e-07, "loss": 0.2111, "step": 38979 }, { "epoch": 0.6775713118601053, "grad_norm": 1.9898560874274593, "learning_rate": 2.4874558346257525e-07, "loss": 0.2405, "step": 38980 }, { "epoch": 0.6775886943976082, "grad_norm": 1.5403218875473161, "learning_rate": 2.4872124673028575e-07, "loss": 0.2129, "step": 38981 }, { "epoch": 0.677606076935111, "grad_norm": 1.7576063631062186, "learning_rate": 2.486969107944345e-07, "loss": 0.205, "step": 38982 }, { "epoch": 0.6776234594726138, "grad_norm": 1.5236600831818898, "learning_rate": 2.4867257565509866e-07, "loss": 0.15, "step": 38983 }, { "epoch": 0.6776408420101167, "grad_norm": 3.4239103096498456, "learning_rate": 2.4864824131235517e-07, "loss": 0.4663, "step": 38984 }, { "epoch": 0.6776582245476195, "grad_norm": 1.3058401943920899, "learning_rate": 2.486239077662812e-07, "loss": 0.1985, "step": 38985 }, { "epoch": 0.6776756070851223, "grad_norm": 2.78726785162806, "learning_rate": 2.4859957501695414e-07, "loss": 0.1841, "step": 38986 }, { "epoch": 0.6776929896226251, "grad_norm": 1.1229621643650416, "learning_rate": 2.485752430644509e-07, "loss": 0.2946, "step": 38987 }, { "epoch": 0.677710372160128, "grad_norm": 2.0203688840305323, "learning_rate": 2.48550911908849e-07, "loss": 0.1824, "step": 38988 }, { "epoch": 0.6777277546976308, "grad_norm": 1.6902799587987651, "learning_rate": 2.4852658155022466e-07, "loss": 0.2442, "step": 38989 }, { "epoch": 0.6777451372351336, "grad_norm": 1.3995328772771392, "learning_rate": 2.4850225198865574e-07, "loss": 0.2677, "step": 38990 }, { "epoch": 0.6777625197726365, "grad_norm": 1.5423841001952856, "learning_rate": 2.484779232242192e-07, "loss": 0.1722, "step": 38991 }, { "epoch": 0.6777799023101392, "grad_norm": 1.6276160755676081, "learning_rate": 2.484535952569921e-07, "loss": 0.1576, "step": 38992 }, { "epoch": 0.677797284847642, "grad_norm": 1.6163818735617834, "learning_rate": 2.484292680870513e-07, "loss": 0.1752, "step": 38993 }, { "epoch": 0.6778146673851448, "grad_norm": 1.612458298183501, "learning_rate": 2.484049417144746e-07, "loss": 0.3376, "step": 38994 }, { "epoch": 0.6778320499226477, "grad_norm": 1.9600432214635348, "learning_rate": 2.4838061613933847e-07, "loss": 0.2114, "step": 38995 }, { "epoch": 0.6778494324601505, "grad_norm": 1.6896204593214412, "learning_rate": 2.483562913617202e-07, "loss": 0.1958, "step": 38996 }, { "epoch": 0.6778668149976533, "grad_norm": 1.4307830346541603, "learning_rate": 2.483319673816967e-07, "loss": 0.2421, "step": 38997 }, { "epoch": 0.6778841975351562, "grad_norm": 0.842746766618619, "learning_rate": 2.4830764419934545e-07, "loss": 0.1972, "step": 38998 }, { "epoch": 0.677901580072659, "grad_norm": 2.4067967608455847, "learning_rate": 2.4828332181474334e-07, "loss": 0.1912, "step": 38999 }, { "epoch": 0.6779189626101618, "grad_norm": 1.3079581149145736, "learning_rate": 2.482590002279675e-07, "loss": 0.2413, "step": 39000 }, { "epoch": 0.6779363451476647, "grad_norm": 1.0866660265870303, "learning_rate": 2.482346794390949e-07, "loss": 0.2042, "step": 39001 }, { "epoch": 0.6779537276851675, "grad_norm": 1.657656621594144, "learning_rate": 2.482103594482028e-07, "loss": 0.1506, "step": 39002 }, { "epoch": 0.6779711102226703, "grad_norm": 1.5279243767388913, "learning_rate": 2.4818604025536813e-07, "loss": 0.1725, "step": 39003 }, { "epoch": 0.6779884927601731, "grad_norm": 1.5044235888007835, "learning_rate": 2.4816172186066813e-07, "loss": 0.1873, "step": 39004 }, { "epoch": 0.678005875297676, "grad_norm": 1.7842698307893456, "learning_rate": 2.481374042641796e-07, "loss": 0.2437, "step": 39005 }, { "epoch": 0.6780232578351788, "grad_norm": 1.7388089490866427, "learning_rate": 2.4811308746597987e-07, "loss": 0.1617, "step": 39006 }, { "epoch": 0.6780406403726816, "grad_norm": 2.0668195644615657, "learning_rate": 2.480887714661462e-07, "loss": 0.288, "step": 39007 }, { "epoch": 0.6780580229101845, "grad_norm": 1.1943641094199207, "learning_rate": 2.4806445626475505e-07, "loss": 0.2132, "step": 39008 }, { "epoch": 0.6780754054476873, "grad_norm": 2.130872114575712, "learning_rate": 2.48040141861884e-07, "loss": 0.3371, "step": 39009 }, { "epoch": 0.6780927879851901, "grad_norm": 3.9833644734400404, "learning_rate": 2.4801582825760996e-07, "loss": 0.21, "step": 39010 }, { "epoch": 0.678110170522693, "grad_norm": 1.5249305355798757, "learning_rate": 2.4799151545201e-07, "loss": 0.1997, "step": 39011 }, { "epoch": 0.6781275530601957, "grad_norm": 1.6928037238844265, "learning_rate": 2.4796720344516106e-07, "loss": 0.3371, "step": 39012 }, { "epoch": 0.6781449355976985, "grad_norm": 2.7174860331964736, "learning_rate": 2.479428922371406e-07, "loss": 0.2941, "step": 39013 }, { "epoch": 0.6781623181352013, "grad_norm": 1.8058896891247582, "learning_rate": 2.479185818280252e-07, "loss": 0.1386, "step": 39014 }, { "epoch": 0.6781797006727042, "grad_norm": 1.0988131181078997, "learning_rate": 2.478942722178921e-07, "loss": 0.2526, "step": 39015 }, { "epoch": 0.678197083210207, "grad_norm": 2.2644874278049354, "learning_rate": 2.478699634068182e-07, "loss": 0.3292, "step": 39016 }, { "epoch": 0.6782144657477098, "grad_norm": 1.091626720221659, "learning_rate": 2.4784565539488083e-07, "loss": 0.3245, "step": 39017 }, { "epoch": 0.6782318482852127, "grad_norm": 3.1820357207341163, "learning_rate": 2.4782134818215695e-07, "loss": 0.2403, "step": 39018 }, { "epoch": 0.6782492308227155, "grad_norm": 1.798859945431623, "learning_rate": 2.4779704176872353e-07, "loss": 0.1496, "step": 39019 }, { "epoch": 0.6782666133602183, "grad_norm": 1.2234136816777692, "learning_rate": 2.4777273615465765e-07, "loss": 0.179, "step": 39020 }, { "epoch": 0.6782839958977211, "grad_norm": 1.6047565265163208, "learning_rate": 2.477484313400363e-07, "loss": 0.2459, "step": 39021 }, { "epoch": 0.678301378435224, "grad_norm": 1.7669339219375448, "learning_rate": 2.477241273249366e-07, "loss": 0.1205, "step": 39022 }, { "epoch": 0.6783187609727268, "grad_norm": 2.770923472939875, "learning_rate": 2.476998241094355e-07, "loss": 0.1825, "step": 39023 }, { "epoch": 0.6783361435102296, "grad_norm": 0.9441662064456765, "learning_rate": 2.476755216936099e-07, "loss": 0.2771, "step": 39024 }, { "epoch": 0.6783535260477325, "grad_norm": 0.9747376430195467, "learning_rate": 2.4765122007753713e-07, "loss": 0.1531, "step": 39025 }, { "epoch": 0.6783709085852353, "grad_norm": 1.185340587109015, "learning_rate": 2.4762691926129425e-07, "loss": 0.1669, "step": 39026 }, { "epoch": 0.6783882911227381, "grad_norm": 1.6699559866068154, "learning_rate": 2.476026192449578e-07, "loss": 0.2857, "step": 39027 }, { "epoch": 0.678405673660241, "grad_norm": 1.9322204791625697, "learning_rate": 2.4757832002860525e-07, "loss": 0.1686, "step": 39028 }, { "epoch": 0.6784230561977438, "grad_norm": 1.6171831809640755, "learning_rate": 2.475540216123135e-07, "loss": 0.2568, "step": 39029 }, { "epoch": 0.6784404387352466, "grad_norm": 1.158216494347861, "learning_rate": 2.4752972399615945e-07, "loss": 0.1745, "step": 39030 }, { "epoch": 0.6784578212727495, "grad_norm": 1.673717632457699, "learning_rate": 2.475054271802203e-07, "loss": 0.194, "step": 39031 }, { "epoch": 0.6784752038102522, "grad_norm": 4.742171129801979, "learning_rate": 2.474811311645729e-07, "loss": 0.2375, "step": 39032 }, { "epoch": 0.678492586347755, "grad_norm": 1.2642233617716716, "learning_rate": 2.4745683594929436e-07, "loss": 0.2322, "step": 39033 }, { "epoch": 0.6785099688852578, "grad_norm": 1.2388603384352763, "learning_rate": 2.4743254153446164e-07, "loss": 0.2057, "step": 39034 }, { "epoch": 0.6785273514227607, "grad_norm": 2.8627427477186957, "learning_rate": 2.4740824792015154e-07, "loss": 0.4007, "step": 39035 }, { "epoch": 0.6785447339602635, "grad_norm": 2.086117108187443, "learning_rate": 2.473839551064414e-07, "loss": 0.1759, "step": 39036 }, { "epoch": 0.6785621164977663, "grad_norm": 1.2777237191096427, "learning_rate": 2.4735966309340815e-07, "loss": 0.1697, "step": 39037 }, { "epoch": 0.6785794990352692, "grad_norm": 1.8932189419909482, "learning_rate": 2.473353718811289e-07, "loss": 0.2193, "step": 39038 }, { "epoch": 0.678596881572772, "grad_norm": 1.6775695971812017, "learning_rate": 2.4731108146968005e-07, "loss": 0.2875, "step": 39039 }, { "epoch": 0.6786142641102748, "grad_norm": 2.1709923990128646, "learning_rate": 2.472867918591392e-07, "loss": 0.245, "step": 39040 }, { "epoch": 0.6786316466477776, "grad_norm": 1.735341831738508, "learning_rate": 2.472625030495831e-07, "loss": 0.1878, "step": 39041 }, { "epoch": 0.6786490291852805, "grad_norm": 2.2322793548177238, "learning_rate": 2.4723821504108885e-07, "loss": 0.2899, "step": 39042 }, { "epoch": 0.6786664117227833, "grad_norm": 3.473738786141159, "learning_rate": 2.4721392783373305e-07, "loss": 0.223, "step": 39043 }, { "epoch": 0.6786837942602861, "grad_norm": 1.359940158873658, "learning_rate": 2.4718964142759324e-07, "loss": 0.2372, "step": 39044 }, { "epoch": 0.678701176797789, "grad_norm": 2.485158810981095, "learning_rate": 2.471653558227463e-07, "loss": 0.165, "step": 39045 }, { "epoch": 0.6787185593352918, "grad_norm": 1.5549151018975647, "learning_rate": 2.4714107101926883e-07, "loss": 0.2405, "step": 39046 }, { "epoch": 0.6787359418727946, "grad_norm": 0.8322504405858456, "learning_rate": 2.4711678701723786e-07, "loss": 0.16, "step": 39047 }, { "epoch": 0.6787533244102975, "grad_norm": 1.6529493024754363, "learning_rate": 2.470925038167307e-07, "loss": 0.1666, "step": 39048 }, { "epoch": 0.6787707069478003, "grad_norm": 1.0611438189855298, "learning_rate": 2.4706822141782415e-07, "loss": 0.1634, "step": 39049 }, { "epoch": 0.6787880894853031, "grad_norm": 2.669649907506912, "learning_rate": 2.4704393982059505e-07, "loss": 0.2299, "step": 39050 }, { "epoch": 0.678805472022806, "grad_norm": 1.3070556985675141, "learning_rate": 2.4701965902512054e-07, "loss": 0.1946, "step": 39051 }, { "epoch": 0.6788228545603087, "grad_norm": 1.3660026640952923, "learning_rate": 2.469953790314775e-07, "loss": 0.2278, "step": 39052 }, { "epoch": 0.6788402370978115, "grad_norm": 1.3014646094432631, "learning_rate": 2.469710998397429e-07, "loss": 0.1447, "step": 39053 }, { "epoch": 0.6788576196353143, "grad_norm": 1.3266400359196266, "learning_rate": 2.4694682144999365e-07, "loss": 0.1395, "step": 39054 }, { "epoch": 0.6788750021728172, "grad_norm": 1.1514742442616388, "learning_rate": 2.4692254386230654e-07, "loss": 0.1533, "step": 39055 }, { "epoch": 0.67889238471032, "grad_norm": 2.0669251152268693, "learning_rate": 2.468982670767589e-07, "loss": 0.1662, "step": 39056 }, { "epoch": 0.6789097672478228, "grad_norm": 1.3424466847878354, "learning_rate": 2.468739910934277e-07, "loss": 0.1554, "step": 39057 }, { "epoch": 0.6789271497853256, "grad_norm": 1.716989236272763, "learning_rate": 2.468497159123893e-07, "loss": 0.2074, "step": 39058 }, { "epoch": 0.6789445323228285, "grad_norm": 1.3182723307729554, "learning_rate": 2.4682544153372115e-07, "loss": 0.2027, "step": 39059 }, { "epoch": 0.6789619148603313, "grad_norm": 2.3086417331486304, "learning_rate": 2.4680116795750014e-07, "loss": 0.203, "step": 39060 }, { "epoch": 0.6789792973978341, "grad_norm": 1.620712930360081, "learning_rate": 2.4677689518380303e-07, "loss": 0.1459, "step": 39061 }, { "epoch": 0.678996679935337, "grad_norm": 2.083911359777076, "learning_rate": 2.4675262321270686e-07, "loss": 0.1808, "step": 39062 }, { "epoch": 0.6790140624728398, "grad_norm": 1.4884891490582561, "learning_rate": 2.467283520442884e-07, "loss": 0.1673, "step": 39063 }, { "epoch": 0.6790314450103426, "grad_norm": 2.2441860137521714, "learning_rate": 2.4670408167862505e-07, "loss": 0.6645, "step": 39064 }, { "epoch": 0.6790488275478455, "grad_norm": 1.212463112871132, "learning_rate": 2.4667981211579326e-07, "loss": 0.2867, "step": 39065 }, { "epoch": 0.6790662100853483, "grad_norm": 1.8635261323663588, "learning_rate": 2.4665554335586987e-07, "loss": 0.243, "step": 39066 }, { "epoch": 0.6790835926228511, "grad_norm": 2.4556415497183397, "learning_rate": 2.466312753989322e-07, "loss": 0.2153, "step": 39067 }, { "epoch": 0.679100975160354, "grad_norm": 1.2041509817221032, "learning_rate": 2.46607008245057e-07, "loss": 0.2359, "step": 39068 }, { "epoch": 0.6791183576978568, "grad_norm": 2.1919857082096845, "learning_rate": 2.465827418943213e-07, "loss": 0.2152, "step": 39069 }, { "epoch": 0.6791357402353596, "grad_norm": 1.0366969257426562, "learning_rate": 2.465584763468018e-07, "loss": 0.2553, "step": 39070 }, { "epoch": 0.6791531227728623, "grad_norm": 1.3462836477882898, "learning_rate": 2.465342116025755e-07, "loss": 0.1905, "step": 39071 }, { "epoch": 0.6791705053103652, "grad_norm": 1.832148497991681, "learning_rate": 2.4650994766171937e-07, "loss": 0.229, "step": 39072 }, { "epoch": 0.679187887847868, "grad_norm": 1.7180099542294738, "learning_rate": 2.464856845243102e-07, "loss": 0.2448, "step": 39073 }, { "epoch": 0.6792052703853708, "grad_norm": 3.054566068111411, "learning_rate": 2.4646142219042483e-07, "loss": 0.1856, "step": 39074 }, { "epoch": 0.6792226529228736, "grad_norm": 1.8334956607002149, "learning_rate": 2.464371606601404e-07, "loss": 0.2864, "step": 39075 }, { "epoch": 0.6792400354603765, "grad_norm": 1.6176123223822894, "learning_rate": 2.4641289993353394e-07, "loss": 0.1873, "step": 39076 }, { "epoch": 0.6792574179978793, "grad_norm": 1.4587642005692498, "learning_rate": 2.463886400106819e-07, "loss": 0.299, "step": 39077 }, { "epoch": 0.6792748005353821, "grad_norm": 2.29166835125051, "learning_rate": 2.4636438089166115e-07, "loss": 0.2086, "step": 39078 }, { "epoch": 0.679292183072885, "grad_norm": 1.05761582596612, "learning_rate": 2.4634012257654906e-07, "loss": 0.2628, "step": 39079 }, { "epoch": 0.6793095656103878, "grad_norm": 1.4976876000538668, "learning_rate": 2.463158650654222e-07, "loss": 0.2102, "step": 39080 }, { "epoch": 0.6793269481478906, "grad_norm": 2.169771273677459, "learning_rate": 2.4629160835835754e-07, "loss": 0.2099, "step": 39081 }, { "epoch": 0.6793443306853935, "grad_norm": 1.4170235968621943, "learning_rate": 2.4626735245543175e-07, "loss": 0.1971, "step": 39082 }, { "epoch": 0.6793617132228963, "grad_norm": 1.6742994213532751, "learning_rate": 2.462430973567223e-07, "loss": 0.1736, "step": 39083 }, { "epoch": 0.6793790957603991, "grad_norm": 0.9580047759148879, "learning_rate": 2.462188430623054e-07, "loss": 0.2298, "step": 39084 }, { "epoch": 0.679396478297902, "grad_norm": 1.0668817685193204, "learning_rate": 2.4619458957225825e-07, "loss": 0.1965, "step": 39085 }, { "epoch": 0.6794138608354048, "grad_norm": 1.6378321971062233, "learning_rate": 2.4617033688665745e-07, "loss": 0.3062, "step": 39086 }, { "epoch": 0.6794312433729076, "grad_norm": 1.031276378693522, "learning_rate": 2.4614608500558026e-07, "loss": 0.1456, "step": 39087 }, { "epoch": 0.6794486259104104, "grad_norm": 2.5237827579833745, "learning_rate": 2.461218339291034e-07, "loss": 0.2878, "step": 39088 }, { "epoch": 0.6794660084479133, "grad_norm": 1.9929635985528475, "learning_rate": 2.460975836573037e-07, "loss": 0.2923, "step": 39089 }, { "epoch": 0.6794833909854161, "grad_norm": 1.4025342871557451, "learning_rate": 2.46073334190258e-07, "loss": 0.144, "step": 39090 }, { "epoch": 0.6795007735229188, "grad_norm": 1.238531645089414, "learning_rate": 2.460490855280432e-07, "loss": 0.1284, "step": 39091 }, { "epoch": 0.6795181560604217, "grad_norm": 1.332473958417661, "learning_rate": 2.4602483767073617e-07, "loss": 0.1645, "step": 39092 }, { "epoch": 0.6795355385979245, "grad_norm": 2.0495465793645566, "learning_rate": 2.460005906184137e-07, "loss": 0.1604, "step": 39093 }, { "epoch": 0.6795529211354273, "grad_norm": 1.87567219873079, "learning_rate": 2.459763443711525e-07, "loss": 0.2404, "step": 39094 }, { "epoch": 0.6795703036729301, "grad_norm": 2.186072172597022, "learning_rate": 2.4595209892903e-07, "loss": 0.1301, "step": 39095 }, { "epoch": 0.679587686210433, "grad_norm": 0.8850702675850508, "learning_rate": 2.459278542921224e-07, "loss": 0.1986, "step": 39096 }, { "epoch": 0.6796050687479358, "grad_norm": 1.6382308256005458, "learning_rate": 2.459036104605067e-07, "loss": 0.2227, "step": 39097 }, { "epoch": 0.6796224512854386, "grad_norm": 1.8646456958114255, "learning_rate": 2.4587936743426e-07, "loss": 0.1778, "step": 39098 }, { "epoch": 0.6796398338229415, "grad_norm": 1.2792405001153007, "learning_rate": 2.458551252134589e-07, "loss": 0.2203, "step": 39099 }, { "epoch": 0.6796572163604443, "grad_norm": 1.808958671321775, "learning_rate": 2.458308837981803e-07, "loss": 0.1419, "step": 39100 }, { "epoch": 0.6796745988979471, "grad_norm": 2.2311561796579835, "learning_rate": 2.4580664318850094e-07, "loss": 0.2283, "step": 39101 }, { "epoch": 0.67969198143545, "grad_norm": 1.664025111527476, "learning_rate": 2.4578240338449806e-07, "loss": 0.1757, "step": 39102 }, { "epoch": 0.6797093639729528, "grad_norm": 2.558734388903459, "learning_rate": 2.4575816438624804e-07, "loss": 0.2306, "step": 39103 }, { "epoch": 0.6797267465104556, "grad_norm": 4.925150573058474, "learning_rate": 2.4573392619382784e-07, "loss": 0.3099, "step": 39104 }, { "epoch": 0.6797441290479584, "grad_norm": 1.3449266867529006, "learning_rate": 2.4570968880731406e-07, "loss": 0.1947, "step": 39105 }, { "epoch": 0.6797615115854613, "grad_norm": 1.9116913316888733, "learning_rate": 2.4568545222678406e-07, "loss": 0.1298, "step": 39106 }, { "epoch": 0.6797788941229641, "grad_norm": 2.5414510336472493, "learning_rate": 2.456612164523142e-07, "loss": 0.3176, "step": 39107 }, { "epoch": 0.6797962766604669, "grad_norm": 1.279488886587397, "learning_rate": 2.456369814839816e-07, "loss": 0.3113, "step": 39108 }, { "epoch": 0.6798136591979698, "grad_norm": 1.6182589582838243, "learning_rate": 2.4561274732186283e-07, "loss": 0.1786, "step": 39109 }, { "epoch": 0.6798310417354726, "grad_norm": 1.295984569738073, "learning_rate": 2.455885139660348e-07, "loss": 0.1126, "step": 39110 }, { "epoch": 0.6798484242729753, "grad_norm": 2.641103162093522, "learning_rate": 2.455642814165743e-07, "loss": 0.1746, "step": 39111 }, { "epoch": 0.6798658068104781, "grad_norm": 1.6564193519946928, "learning_rate": 2.4554004967355825e-07, "loss": 0.2287, "step": 39112 }, { "epoch": 0.679883189347981, "grad_norm": 1.3622672666108309, "learning_rate": 2.4551581873706304e-07, "loss": 0.1791, "step": 39113 }, { "epoch": 0.6799005718854838, "grad_norm": 1.207665990453975, "learning_rate": 2.4549158860716624e-07, "loss": 0.1857, "step": 39114 }, { "epoch": 0.6799179544229866, "grad_norm": 3.2412392550841114, "learning_rate": 2.4546735928394396e-07, "loss": 0.2188, "step": 39115 }, { "epoch": 0.6799353369604895, "grad_norm": 1.229940703773797, "learning_rate": 2.454431307674731e-07, "loss": 0.1162, "step": 39116 }, { "epoch": 0.6799527194979923, "grad_norm": 1.8775514740589625, "learning_rate": 2.4541890305783067e-07, "loss": 0.1525, "step": 39117 }, { "epoch": 0.6799701020354951, "grad_norm": 1.4135906672086216, "learning_rate": 2.4539467615509345e-07, "loss": 0.2533, "step": 39118 }, { "epoch": 0.679987484572998, "grad_norm": 1.4230391983983957, "learning_rate": 2.453704500593382e-07, "loss": 0.2564, "step": 39119 }, { "epoch": 0.6800048671105008, "grad_norm": 1.9746776532640191, "learning_rate": 2.4534622477064156e-07, "loss": 0.2649, "step": 39120 }, { "epoch": 0.6800222496480036, "grad_norm": 1.1409261425732453, "learning_rate": 2.453220002890804e-07, "loss": 0.2176, "step": 39121 }, { "epoch": 0.6800396321855064, "grad_norm": 2.308135063365253, "learning_rate": 2.4529777661473155e-07, "loss": 0.1765, "step": 39122 }, { "epoch": 0.6800570147230093, "grad_norm": 1.9011374696754464, "learning_rate": 2.452735537476718e-07, "loss": 0.1784, "step": 39123 }, { "epoch": 0.6800743972605121, "grad_norm": 1.4754744889547673, "learning_rate": 2.452493316879776e-07, "loss": 0.2171, "step": 39124 }, { "epoch": 0.6800917797980149, "grad_norm": 2.133289468740882, "learning_rate": 2.4522511043572616e-07, "loss": 0.1775, "step": 39125 }, { "epoch": 0.6801091623355178, "grad_norm": 3.97544981407307, "learning_rate": 2.452008899909942e-07, "loss": 0.1725, "step": 39126 }, { "epoch": 0.6801265448730206, "grad_norm": 1.154121194927562, "learning_rate": 2.4517667035385846e-07, "loss": 0.1876, "step": 39127 }, { "epoch": 0.6801439274105234, "grad_norm": 1.1989728114980711, "learning_rate": 2.451524515243953e-07, "loss": 0.1134, "step": 39128 }, { "epoch": 0.6801613099480263, "grad_norm": 1.012857205919087, "learning_rate": 2.45128233502682e-07, "loss": 0.1608, "step": 39129 }, { "epoch": 0.6801786924855291, "grad_norm": 1.2489160482886372, "learning_rate": 2.451040162887951e-07, "loss": 0.2059, "step": 39130 }, { "epoch": 0.6801960750230318, "grad_norm": 1.7565017088668515, "learning_rate": 2.4507979988281133e-07, "loss": 0.1171, "step": 39131 }, { "epoch": 0.6802134575605346, "grad_norm": 1.7966894952120962, "learning_rate": 2.450555842848074e-07, "loss": 0.2674, "step": 39132 }, { "epoch": 0.6802308400980375, "grad_norm": 1.3197090391329276, "learning_rate": 2.450313694948605e-07, "loss": 0.2419, "step": 39133 }, { "epoch": 0.6802482226355403, "grad_norm": 1.07750139462912, "learning_rate": 2.450071555130468e-07, "loss": 0.1194, "step": 39134 }, { "epoch": 0.6802656051730431, "grad_norm": 1.0547837259195092, "learning_rate": 2.4498294233944326e-07, "loss": 0.2459, "step": 39135 }, { "epoch": 0.680282987710546, "grad_norm": 1.4607787224481505, "learning_rate": 2.449587299741265e-07, "loss": 0.2377, "step": 39136 }, { "epoch": 0.6803003702480488, "grad_norm": 2.3655995076183083, "learning_rate": 2.4493451841717367e-07, "loss": 0.2059, "step": 39137 }, { "epoch": 0.6803177527855516, "grad_norm": 1.6909823671073134, "learning_rate": 2.4491030766866114e-07, "loss": 0.1871, "step": 39138 }, { "epoch": 0.6803351353230545, "grad_norm": 1.3074669223110833, "learning_rate": 2.448860977286658e-07, "loss": 0.1644, "step": 39139 }, { "epoch": 0.6803525178605573, "grad_norm": 1.21030881823304, "learning_rate": 2.448618885972644e-07, "loss": 0.2906, "step": 39140 }, { "epoch": 0.6803699003980601, "grad_norm": 2.9727404942009565, "learning_rate": 2.4483768027453356e-07, "loss": 0.2655, "step": 39141 }, { "epoch": 0.6803872829355629, "grad_norm": 1.7888541966622082, "learning_rate": 2.4481347276055e-07, "loss": 0.203, "step": 39142 }, { "epoch": 0.6804046654730658, "grad_norm": 1.0906230410973228, "learning_rate": 2.4478926605539055e-07, "loss": 0.1785, "step": 39143 }, { "epoch": 0.6804220480105686, "grad_norm": 1.9089436840106577, "learning_rate": 2.447650601591318e-07, "loss": 0.1728, "step": 39144 }, { "epoch": 0.6804394305480714, "grad_norm": 1.2609634015370645, "learning_rate": 2.447408550718506e-07, "loss": 0.1915, "step": 39145 }, { "epoch": 0.6804568130855743, "grad_norm": 7.135552004500576, "learning_rate": 2.4471665079362393e-07, "loss": 0.4292, "step": 39146 }, { "epoch": 0.6804741956230771, "grad_norm": 1.6237984037770397, "learning_rate": 2.446924473245278e-07, "loss": 0.1922, "step": 39147 }, { "epoch": 0.6804915781605799, "grad_norm": 2.6240337847362323, "learning_rate": 2.4466824466463956e-07, "loss": 0.2219, "step": 39148 }, { "epoch": 0.6805089606980828, "grad_norm": 2.4785668916583603, "learning_rate": 2.446440428140356e-07, "loss": 0.1443, "step": 39149 }, { "epoch": 0.6805263432355856, "grad_norm": 1.0102200192844153, "learning_rate": 2.4461984177279277e-07, "loss": 0.2532, "step": 39150 }, { "epoch": 0.6805437257730883, "grad_norm": 1.533429245896545, "learning_rate": 2.4459564154098774e-07, "loss": 0.1958, "step": 39151 }, { "epoch": 0.6805611083105911, "grad_norm": 4.406539960058653, "learning_rate": 2.445714421186972e-07, "loss": 0.2114, "step": 39152 }, { "epoch": 0.680578490848094, "grad_norm": 1.3988524598545191, "learning_rate": 2.4454724350599785e-07, "loss": 0.2816, "step": 39153 }, { "epoch": 0.6805958733855968, "grad_norm": 2.4956608909890003, "learning_rate": 2.445230457029664e-07, "loss": 0.3391, "step": 39154 }, { "epoch": 0.6806132559230996, "grad_norm": 1.0391880327681395, "learning_rate": 2.4449884870967936e-07, "loss": 0.1652, "step": 39155 }, { "epoch": 0.6806306384606025, "grad_norm": 3.1030992528242893, "learning_rate": 2.444746525262137e-07, "loss": 0.2005, "step": 39156 }, { "epoch": 0.6806480209981053, "grad_norm": 1.8637969063684172, "learning_rate": 2.4445045715264615e-07, "loss": 0.2213, "step": 39157 }, { "epoch": 0.6806654035356081, "grad_norm": 1.2602223526458347, "learning_rate": 2.444262625890534e-07, "loss": 0.1875, "step": 39158 }, { "epoch": 0.680682786073111, "grad_norm": 0.9904375689725565, "learning_rate": 2.4440206883551163e-07, "loss": 0.169, "step": 39159 }, { "epoch": 0.6807001686106138, "grad_norm": 1.1523424728827543, "learning_rate": 2.44377875892098e-07, "loss": 0.1696, "step": 39160 }, { "epoch": 0.6807175511481166, "grad_norm": 1.5039712577500401, "learning_rate": 2.4435368375888917e-07, "loss": 0.3684, "step": 39161 }, { "epoch": 0.6807349336856194, "grad_norm": 0.867875511519509, "learning_rate": 2.443294924359617e-07, "loss": 0.1886, "step": 39162 }, { "epoch": 0.6807523162231223, "grad_norm": 1.522818555563808, "learning_rate": 2.443053019233921e-07, "loss": 0.3332, "step": 39163 }, { "epoch": 0.6807696987606251, "grad_norm": 1.7803989099783644, "learning_rate": 2.442811122212574e-07, "loss": 0.1574, "step": 39164 }, { "epoch": 0.6807870812981279, "grad_norm": 2.9175959237797375, "learning_rate": 2.442569233296343e-07, "loss": 0.3347, "step": 39165 }, { "epoch": 0.6808044638356308, "grad_norm": 1.5360419609355518, "learning_rate": 2.442327352485991e-07, "loss": 0.2662, "step": 39166 }, { "epoch": 0.6808218463731336, "grad_norm": 1.2262381229014816, "learning_rate": 2.4420854797822846e-07, "loss": 0.167, "step": 39167 }, { "epoch": 0.6808392289106364, "grad_norm": 1.0940132381656202, "learning_rate": 2.4418436151859947e-07, "loss": 0.2235, "step": 39168 }, { "epoch": 0.6808566114481392, "grad_norm": 2.7603582695443882, "learning_rate": 2.441601758697884e-07, "loss": 0.3001, "step": 39169 }, { "epoch": 0.6808739939856421, "grad_norm": 2.0458544914738304, "learning_rate": 2.4413599103187213e-07, "loss": 0.2348, "step": 39170 }, { "epoch": 0.6808913765231448, "grad_norm": 1.8615242616459808, "learning_rate": 2.4411180700492727e-07, "loss": 0.1963, "step": 39171 }, { "epoch": 0.6809087590606476, "grad_norm": 1.3591398060917705, "learning_rate": 2.440876237890304e-07, "loss": 0.2259, "step": 39172 }, { "epoch": 0.6809261415981505, "grad_norm": 3.786774570293223, "learning_rate": 2.440634413842582e-07, "loss": 0.2716, "step": 39173 }, { "epoch": 0.6809435241356533, "grad_norm": 0.7827085063270145, "learning_rate": 2.4403925979068733e-07, "loss": 0.1211, "step": 39174 }, { "epoch": 0.6809609066731561, "grad_norm": 1.7197140568803386, "learning_rate": 2.4401507900839423e-07, "loss": 0.182, "step": 39175 }, { "epoch": 0.680978289210659, "grad_norm": 1.8083490067779615, "learning_rate": 2.4399089903745594e-07, "loss": 0.2181, "step": 39176 }, { "epoch": 0.6809956717481618, "grad_norm": 3.0434305917671542, "learning_rate": 2.439667198779491e-07, "loss": 0.4424, "step": 39177 }, { "epoch": 0.6810130542856646, "grad_norm": 2.0971207537173404, "learning_rate": 2.4394254152994977e-07, "loss": 0.2575, "step": 39178 }, { "epoch": 0.6810304368231674, "grad_norm": 1.0582789175578045, "learning_rate": 2.439183639935351e-07, "loss": 0.1552, "step": 39179 }, { "epoch": 0.6810478193606703, "grad_norm": 1.0047894424928105, "learning_rate": 2.4389418726878165e-07, "loss": 0.3305, "step": 39180 }, { "epoch": 0.6810652018981731, "grad_norm": 2.77058759755006, "learning_rate": 2.438700113557659e-07, "loss": 0.3604, "step": 39181 }, { "epoch": 0.6810825844356759, "grad_norm": 3.6750338304941508, "learning_rate": 2.438458362545646e-07, "loss": 0.3081, "step": 39182 }, { "epoch": 0.6810999669731788, "grad_norm": 1.2490788617203554, "learning_rate": 2.4382166196525414e-07, "loss": 0.1708, "step": 39183 }, { "epoch": 0.6811173495106816, "grad_norm": 1.1540008660753105, "learning_rate": 2.437974884879117e-07, "loss": 0.1923, "step": 39184 }, { "epoch": 0.6811347320481844, "grad_norm": 1.605840283826158, "learning_rate": 2.437733158226134e-07, "loss": 0.2611, "step": 39185 }, { "epoch": 0.6811521145856873, "grad_norm": 1.2151865844125163, "learning_rate": 2.4374914396943573e-07, "loss": 0.2794, "step": 39186 }, { "epoch": 0.6811694971231901, "grad_norm": 0.9133941620914241, "learning_rate": 2.437249729284558e-07, "loss": 0.1779, "step": 39187 }, { "epoch": 0.6811868796606929, "grad_norm": 1.485945781384393, "learning_rate": 2.4370080269974995e-07, "loss": 0.1735, "step": 39188 }, { "epoch": 0.6812042621981957, "grad_norm": 4.081499597425115, "learning_rate": 2.436766332833948e-07, "loss": 0.2985, "step": 39189 }, { "epoch": 0.6812216447356986, "grad_norm": 1.6292380057098366, "learning_rate": 2.4365246467946705e-07, "loss": 0.2381, "step": 39190 }, { "epoch": 0.6812390272732013, "grad_norm": 1.6551191846346924, "learning_rate": 2.4362829688804314e-07, "loss": 0.1751, "step": 39191 }, { "epoch": 0.6812564098107041, "grad_norm": 1.7980579375698402, "learning_rate": 2.436041299091998e-07, "loss": 0.278, "step": 39192 }, { "epoch": 0.681273792348207, "grad_norm": 2.975921245187797, "learning_rate": 2.4357996374301363e-07, "loss": 0.3896, "step": 39193 }, { "epoch": 0.6812911748857098, "grad_norm": 1.8245085359935809, "learning_rate": 2.43555798389561e-07, "loss": 0.2338, "step": 39194 }, { "epoch": 0.6813085574232126, "grad_norm": 1.5878628470188825, "learning_rate": 2.4353163384891877e-07, "loss": 0.2433, "step": 39195 }, { "epoch": 0.6813259399607154, "grad_norm": 1.3101283298007618, "learning_rate": 2.4350747012116374e-07, "loss": 0.1978, "step": 39196 }, { "epoch": 0.6813433224982183, "grad_norm": 1.7901944863676824, "learning_rate": 2.434833072063718e-07, "loss": 0.3209, "step": 39197 }, { "epoch": 0.6813607050357211, "grad_norm": 1.4550872733385054, "learning_rate": 2.434591451046201e-07, "loss": 0.2251, "step": 39198 }, { "epoch": 0.6813780875732239, "grad_norm": 1.4105797826464364, "learning_rate": 2.4343498381598514e-07, "loss": 0.2527, "step": 39199 }, { "epoch": 0.6813954701107268, "grad_norm": 0.9245401196191775, "learning_rate": 2.4341082334054335e-07, "loss": 0.1128, "step": 39200 }, { "epoch": 0.6814128526482296, "grad_norm": 0.8401939931861412, "learning_rate": 2.433866636783714e-07, "loss": 0.1498, "step": 39201 }, { "epoch": 0.6814302351857324, "grad_norm": 2.3736621435958236, "learning_rate": 2.433625048295457e-07, "loss": 0.2161, "step": 39202 }, { "epoch": 0.6814476177232353, "grad_norm": 2.548020365498066, "learning_rate": 2.4333834679414335e-07, "loss": 0.4074, "step": 39203 }, { "epoch": 0.6814650002607381, "grad_norm": 1.3887027488288304, "learning_rate": 2.433141895722403e-07, "loss": 0.2025, "step": 39204 }, { "epoch": 0.6814823827982409, "grad_norm": 0.9552337446759833, "learning_rate": 2.432900331639132e-07, "loss": 0.1823, "step": 39205 }, { "epoch": 0.6814997653357437, "grad_norm": 2.962149642411189, "learning_rate": 2.432658775692389e-07, "loss": 0.3179, "step": 39206 }, { "epoch": 0.6815171478732466, "grad_norm": 1.1338408067781327, "learning_rate": 2.432417227882939e-07, "loss": 0.2058, "step": 39207 }, { "epoch": 0.6815345304107494, "grad_norm": 1.044992242613089, "learning_rate": 2.432175688211547e-07, "loss": 0.1655, "step": 39208 }, { "epoch": 0.6815519129482522, "grad_norm": 1.5303771385939902, "learning_rate": 2.431934156678978e-07, "loss": 0.1022, "step": 39209 }, { "epoch": 0.681569295485755, "grad_norm": 2.022461682041358, "learning_rate": 2.431692633285998e-07, "loss": 0.265, "step": 39210 }, { "epoch": 0.6815866780232578, "grad_norm": 1.3427164956271742, "learning_rate": 2.431451118033374e-07, "loss": 0.2714, "step": 39211 }, { "epoch": 0.6816040605607606, "grad_norm": 1.4348942651537646, "learning_rate": 2.4312096109218685e-07, "loss": 0.2504, "step": 39212 }, { "epoch": 0.6816214430982634, "grad_norm": 1.7176485232071448, "learning_rate": 2.430968111952247e-07, "loss": 0.275, "step": 39213 }, { "epoch": 0.6816388256357663, "grad_norm": 3.1443987881498865, "learning_rate": 2.430726621125278e-07, "loss": 0.1837, "step": 39214 }, { "epoch": 0.6816562081732691, "grad_norm": 1.5087397038944947, "learning_rate": 2.430485138441728e-07, "loss": 0.2297, "step": 39215 }, { "epoch": 0.6816735907107719, "grad_norm": 2.0558028088984783, "learning_rate": 2.430243663902358e-07, "loss": 0.264, "step": 39216 }, { "epoch": 0.6816909732482748, "grad_norm": 3.10443980035813, "learning_rate": 2.4300021975079327e-07, "loss": 0.28, "step": 39217 }, { "epoch": 0.6817083557857776, "grad_norm": 1.7940009769786072, "learning_rate": 2.4297607392592217e-07, "loss": 0.1812, "step": 39218 }, { "epoch": 0.6817257383232804, "grad_norm": 1.1736721227671045, "learning_rate": 2.429519289156989e-07, "loss": 0.2514, "step": 39219 }, { "epoch": 0.6817431208607833, "grad_norm": 1.4869847205282938, "learning_rate": 2.4292778472019985e-07, "loss": 0.2718, "step": 39220 }, { "epoch": 0.6817605033982861, "grad_norm": 1.1444555662203832, "learning_rate": 2.429036413395015e-07, "loss": 0.2267, "step": 39221 }, { "epoch": 0.6817778859357889, "grad_norm": 2.585409673225014, "learning_rate": 2.428794987736809e-07, "loss": 0.1599, "step": 39222 }, { "epoch": 0.6817952684732917, "grad_norm": 2.7135253991669828, "learning_rate": 2.42855357022814e-07, "loss": 0.3303, "step": 39223 }, { "epoch": 0.6818126510107946, "grad_norm": 1.8859161943363143, "learning_rate": 2.4283121608697745e-07, "loss": 0.2277, "step": 39224 }, { "epoch": 0.6818300335482974, "grad_norm": 1.644286947103006, "learning_rate": 2.4280707596624767e-07, "loss": 0.142, "step": 39225 }, { "epoch": 0.6818474160858002, "grad_norm": 1.301309862581104, "learning_rate": 2.4278293666070144e-07, "loss": 0.1923, "step": 39226 }, { "epoch": 0.6818647986233031, "grad_norm": 1.7246648491449093, "learning_rate": 2.4275879817041516e-07, "loss": 0.232, "step": 39227 }, { "epoch": 0.6818821811608059, "grad_norm": 1.7529500213202898, "learning_rate": 2.427346604954654e-07, "loss": 0.2379, "step": 39228 }, { "epoch": 0.6818995636983087, "grad_norm": 2.5868298692406815, "learning_rate": 2.4271052363592854e-07, "loss": 0.1783, "step": 39229 }, { "epoch": 0.6819169462358114, "grad_norm": 1.6316175842113059, "learning_rate": 2.426863875918811e-07, "loss": 0.2819, "step": 39230 }, { "epoch": 0.6819343287733143, "grad_norm": 1.4043717978698749, "learning_rate": 2.426622523633996e-07, "loss": 0.1802, "step": 39231 }, { "epoch": 0.6819517113108171, "grad_norm": 1.5663021892422024, "learning_rate": 2.426381179505606e-07, "loss": 0.1468, "step": 39232 }, { "epoch": 0.6819690938483199, "grad_norm": 1.401072700968752, "learning_rate": 2.4261398435344035e-07, "loss": 0.1573, "step": 39233 }, { "epoch": 0.6819864763858228, "grad_norm": 1.4549349191230019, "learning_rate": 2.4258985157211585e-07, "loss": 0.1889, "step": 39234 }, { "epoch": 0.6820038589233256, "grad_norm": 1.3730884211302932, "learning_rate": 2.4256571960666317e-07, "loss": 0.1364, "step": 39235 }, { "epoch": 0.6820212414608284, "grad_norm": 1.5642591428835655, "learning_rate": 2.425415884571587e-07, "loss": 0.2272, "step": 39236 }, { "epoch": 0.6820386239983313, "grad_norm": 2.1077781677152223, "learning_rate": 2.4251745812367927e-07, "loss": 0.2185, "step": 39237 }, { "epoch": 0.6820560065358341, "grad_norm": 2.0419393163148616, "learning_rate": 2.4249332860630126e-07, "loss": 0.1683, "step": 39238 }, { "epoch": 0.6820733890733369, "grad_norm": 2.060366171967633, "learning_rate": 2.424691999051011e-07, "loss": 0.2587, "step": 39239 }, { "epoch": 0.6820907716108398, "grad_norm": 1.6833776882705238, "learning_rate": 2.424450720201553e-07, "loss": 0.228, "step": 39240 }, { "epoch": 0.6821081541483426, "grad_norm": 1.5267256060280074, "learning_rate": 2.4242094495154025e-07, "loss": 0.1658, "step": 39241 }, { "epoch": 0.6821255366858454, "grad_norm": 2.2727445632481293, "learning_rate": 2.4239681869933253e-07, "loss": 0.2567, "step": 39242 }, { "epoch": 0.6821429192233482, "grad_norm": 1.1009681572952414, "learning_rate": 2.4237269326360856e-07, "loss": 0.118, "step": 39243 }, { "epoch": 0.6821603017608511, "grad_norm": 2.2044974245099436, "learning_rate": 2.4234856864444457e-07, "loss": 0.2586, "step": 39244 }, { "epoch": 0.6821776842983539, "grad_norm": 1.339197949455727, "learning_rate": 2.423244448419175e-07, "loss": 0.2228, "step": 39245 }, { "epoch": 0.6821950668358567, "grad_norm": 2.4414348808214985, "learning_rate": 2.423003218561036e-07, "loss": 0.2514, "step": 39246 }, { "epoch": 0.6822124493733596, "grad_norm": 2.0274161821822902, "learning_rate": 2.4227619968707946e-07, "loss": 0.2079, "step": 39247 }, { "epoch": 0.6822298319108624, "grad_norm": 1.491131014302156, "learning_rate": 2.4225207833492097e-07, "loss": 0.2961, "step": 39248 }, { "epoch": 0.6822472144483652, "grad_norm": 1.484405134939065, "learning_rate": 2.422279577997052e-07, "loss": 0.3563, "step": 39249 }, { "epoch": 0.6822645969858679, "grad_norm": 1.3142593343792508, "learning_rate": 2.4220383808150843e-07, "loss": 0.1091, "step": 39250 }, { "epoch": 0.6822819795233708, "grad_norm": 1.5176465779089217, "learning_rate": 2.42179719180407e-07, "loss": 0.1148, "step": 39251 }, { "epoch": 0.6822993620608736, "grad_norm": 2.156798258815662, "learning_rate": 2.421556010964773e-07, "loss": 0.2177, "step": 39252 }, { "epoch": 0.6823167445983764, "grad_norm": 1.7890793249533277, "learning_rate": 2.4213148382979624e-07, "loss": 0.1942, "step": 39253 }, { "epoch": 0.6823341271358793, "grad_norm": 1.9042369016971374, "learning_rate": 2.4210736738043973e-07, "loss": 0.2932, "step": 39254 }, { "epoch": 0.6823515096733821, "grad_norm": 1.0889927658259797, "learning_rate": 2.420832517484844e-07, "loss": 0.2312, "step": 39255 }, { "epoch": 0.6823688922108849, "grad_norm": 1.019744043639425, "learning_rate": 2.4205913693400653e-07, "loss": 0.2268, "step": 39256 }, { "epoch": 0.6823862747483878, "grad_norm": 1.417694331551031, "learning_rate": 2.420350229370829e-07, "loss": 0.303, "step": 39257 }, { "epoch": 0.6824036572858906, "grad_norm": 5.537052150205562, "learning_rate": 2.4201090975778976e-07, "loss": 0.2648, "step": 39258 }, { "epoch": 0.6824210398233934, "grad_norm": 1.4329811558254064, "learning_rate": 2.4198679739620346e-07, "loss": 0.2724, "step": 39259 }, { "epoch": 0.6824384223608962, "grad_norm": 2.815154398445328, "learning_rate": 2.419626858524005e-07, "loss": 0.3792, "step": 39260 }, { "epoch": 0.6824558048983991, "grad_norm": 3.2845295878965035, "learning_rate": 2.419385751264574e-07, "loss": 0.2723, "step": 39261 }, { "epoch": 0.6824731874359019, "grad_norm": 1.0674439793219201, "learning_rate": 2.4191446521845035e-07, "loss": 0.2163, "step": 39262 }, { "epoch": 0.6824905699734047, "grad_norm": 2.3935766588349816, "learning_rate": 2.418903561284559e-07, "loss": 0.2771, "step": 39263 }, { "epoch": 0.6825079525109076, "grad_norm": 1.6240384802379608, "learning_rate": 2.418662478565503e-07, "loss": 0.1048, "step": 39264 }, { "epoch": 0.6825253350484104, "grad_norm": 1.0309595214253433, "learning_rate": 2.418421404028103e-07, "loss": 0.2128, "step": 39265 }, { "epoch": 0.6825427175859132, "grad_norm": 0.8638420506515663, "learning_rate": 2.4181803376731237e-07, "loss": 0.2703, "step": 39266 }, { "epoch": 0.6825601001234161, "grad_norm": 1.752211655929189, "learning_rate": 2.417939279501322e-07, "loss": 0.1201, "step": 39267 }, { "epoch": 0.6825774826609189, "grad_norm": 1.7397012820509155, "learning_rate": 2.417698229513469e-07, "loss": 0.2616, "step": 39268 }, { "epoch": 0.6825948651984217, "grad_norm": 1.1725304598625024, "learning_rate": 2.417457187710327e-07, "loss": 0.2181, "step": 39269 }, { "epoch": 0.6826122477359244, "grad_norm": 2.153788700166646, "learning_rate": 2.4172161540926584e-07, "loss": 0.133, "step": 39270 }, { "epoch": 0.6826296302734273, "grad_norm": 1.5180488606879647, "learning_rate": 2.416975128661229e-07, "loss": 0.225, "step": 39271 }, { "epoch": 0.6826470128109301, "grad_norm": 2.362521414586363, "learning_rate": 2.416734111416801e-07, "loss": 0.2724, "step": 39272 }, { "epoch": 0.6826643953484329, "grad_norm": 1.743631393615628, "learning_rate": 2.4164931023601396e-07, "loss": 0.2245, "step": 39273 }, { "epoch": 0.6826817778859358, "grad_norm": 2.7628228757932694, "learning_rate": 2.4162521014920085e-07, "loss": 0.1726, "step": 39274 }, { "epoch": 0.6826991604234386, "grad_norm": 1.2623421985631076, "learning_rate": 2.4160111088131697e-07, "loss": 0.1119, "step": 39275 }, { "epoch": 0.6827165429609414, "grad_norm": 1.2172279615431534, "learning_rate": 2.41577012432439e-07, "loss": 0.1557, "step": 39276 }, { "epoch": 0.6827339254984442, "grad_norm": 2.984829917643719, "learning_rate": 2.415529148026432e-07, "loss": 0.2362, "step": 39277 }, { "epoch": 0.6827513080359471, "grad_norm": 1.353128495175354, "learning_rate": 2.4152881799200615e-07, "loss": 0.2101, "step": 39278 }, { "epoch": 0.6827686905734499, "grad_norm": 1.1863533490282319, "learning_rate": 2.4150472200060363e-07, "loss": 0.236, "step": 39279 }, { "epoch": 0.6827860731109527, "grad_norm": 3.4919219523065843, "learning_rate": 2.4148062682851263e-07, "loss": 0.2339, "step": 39280 }, { "epoch": 0.6828034556484556, "grad_norm": 2.7030393974152847, "learning_rate": 2.4145653247580923e-07, "loss": 0.3126, "step": 39281 }, { "epoch": 0.6828208381859584, "grad_norm": 1.3175998617069145, "learning_rate": 2.4143243894256986e-07, "loss": 0.2013, "step": 39282 }, { "epoch": 0.6828382207234612, "grad_norm": 1.694908359951365, "learning_rate": 2.414083462288707e-07, "loss": 0.2162, "step": 39283 }, { "epoch": 0.6828556032609641, "grad_norm": 1.5044764146570893, "learning_rate": 2.413842543347885e-07, "loss": 0.1916, "step": 39284 }, { "epoch": 0.6828729857984669, "grad_norm": 1.6345203333977938, "learning_rate": 2.413601632603996e-07, "loss": 0.2399, "step": 39285 }, { "epoch": 0.6828903683359697, "grad_norm": 0.9586235857833768, "learning_rate": 2.4133607300577995e-07, "loss": 0.1417, "step": 39286 }, { "epoch": 0.6829077508734726, "grad_norm": 1.3565909609411146, "learning_rate": 2.4131198357100595e-07, "loss": 0.1672, "step": 39287 }, { "epoch": 0.6829251334109754, "grad_norm": 1.238764557347072, "learning_rate": 2.4128789495615434e-07, "loss": 0.1175, "step": 39288 }, { "epoch": 0.6829425159484782, "grad_norm": 4.1294321387222945, "learning_rate": 2.412638071613013e-07, "loss": 0.3202, "step": 39289 }, { "epoch": 0.6829598984859809, "grad_norm": 2.2245748468562065, "learning_rate": 2.4123972018652306e-07, "loss": 0.2887, "step": 39290 }, { "epoch": 0.6829772810234838, "grad_norm": 1.6832668312156422, "learning_rate": 2.412156340318961e-07, "loss": 0.2268, "step": 39291 }, { "epoch": 0.6829946635609866, "grad_norm": 2.2212478073411592, "learning_rate": 2.411915486974967e-07, "loss": 0.2276, "step": 39292 }, { "epoch": 0.6830120460984894, "grad_norm": 0.7941546487527458, "learning_rate": 2.4116746418340116e-07, "loss": 0.1902, "step": 39293 }, { "epoch": 0.6830294286359923, "grad_norm": 1.3126420301183992, "learning_rate": 2.411433804896857e-07, "loss": 0.2923, "step": 39294 }, { "epoch": 0.6830468111734951, "grad_norm": 2.4716529924655743, "learning_rate": 2.4111929761642703e-07, "loss": 0.2115, "step": 39295 }, { "epoch": 0.6830641937109979, "grad_norm": 1.869697793613772, "learning_rate": 2.4109521556370124e-07, "loss": 0.2853, "step": 39296 }, { "epoch": 0.6830815762485007, "grad_norm": 3.0736138523153436, "learning_rate": 2.410711343315848e-07, "loss": 0.2256, "step": 39297 }, { "epoch": 0.6830989587860036, "grad_norm": 1.20926866393485, "learning_rate": 2.4104705392015366e-07, "loss": 0.1257, "step": 39298 }, { "epoch": 0.6831163413235064, "grad_norm": 2.8901919093775086, "learning_rate": 2.4102297432948455e-07, "loss": 0.3828, "step": 39299 }, { "epoch": 0.6831337238610092, "grad_norm": 1.787057242543931, "learning_rate": 2.4099889555965364e-07, "loss": 0.1973, "step": 39300 }, { "epoch": 0.6831511063985121, "grad_norm": 1.368097795787611, "learning_rate": 2.409748176107373e-07, "loss": 0.181, "step": 39301 }, { "epoch": 0.6831684889360149, "grad_norm": 1.46636345144525, "learning_rate": 2.4095074048281157e-07, "loss": 0.2267, "step": 39302 }, { "epoch": 0.6831858714735177, "grad_norm": 1.7672250693690965, "learning_rate": 2.409266641759532e-07, "loss": 0.2748, "step": 39303 }, { "epoch": 0.6832032540110206, "grad_norm": 2.2196860215543577, "learning_rate": 2.409025886902384e-07, "loss": 0.2335, "step": 39304 }, { "epoch": 0.6832206365485234, "grad_norm": 1.3642731179196224, "learning_rate": 2.408785140257433e-07, "loss": 0.2288, "step": 39305 }, { "epoch": 0.6832380190860262, "grad_norm": 0.7879581047564749, "learning_rate": 2.4085444018254407e-07, "loss": 0.1583, "step": 39306 }, { "epoch": 0.683255401623529, "grad_norm": 1.2699589149892383, "learning_rate": 2.408303671607174e-07, "loss": 0.3071, "step": 39307 }, { "epoch": 0.6832727841610319, "grad_norm": 1.7223494702187172, "learning_rate": 2.408062949603394e-07, "loss": 0.2058, "step": 39308 }, { "epoch": 0.6832901666985347, "grad_norm": 1.4289623171860946, "learning_rate": 2.4078222358148635e-07, "loss": 0.2096, "step": 39309 }, { "epoch": 0.6833075492360374, "grad_norm": 2.8463151532247704, "learning_rate": 2.4075815302423464e-07, "loss": 0.2878, "step": 39310 }, { "epoch": 0.6833249317735403, "grad_norm": 1.216869405763845, "learning_rate": 2.4073408328866046e-07, "loss": 0.2165, "step": 39311 }, { "epoch": 0.6833423143110431, "grad_norm": 1.4498547193093758, "learning_rate": 2.407100143748402e-07, "loss": 0.1418, "step": 39312 }, { "epoch": 0.6833596968485459, "grad_norm": 1.38181181959044, "learning_rate": 2.4068594628285e-07, "loss": 0.2662, "step": 39313 }, { "epoch": 0.6833770793860487, "grad_norm": 1.9371078651953535, "learning_rate": 2.406618790127661e-07, "loss": 0.2486, "step": 39314 }, { "epoch": 0.6833944619235516, "grad_norm": 1.9257315465094438, "learning_rate": 2.406378125646651e-07, "loss": 0.2216, "step": 39315 }, { "epoch": 0.6834118444610544, "grad_norm": 1.6688137451204195, "learning_rate": 2.406137469386232e-07, "loss": 0.1719, "step": 39316 }, { "epoch": 0.6834292269985572, "grad_norm": 1.5556050562940615, "learning_rate": 2.405896821347163e-07, "loss": 0.1925, "step": 39317 }, { "epoch": 0.6834466095360601, "grad_norm": 1.4822801289248704, "learning_rate": 2.405656181530211e-07, "loss": 0.275, "step": 39318 }, { "epoch": 0.6834639920735629, "grad_norm": 1.6966336258756223, "learning_rate": 2.405415549936137e-07, "loss": 0.1675, "step": 39319 }, { "epoch": 0.6834813746110657, "grad_norm": 1.1096990611611857, "learning_rate": 2.405174926565704e-07, "loss": 0.185, "step": 39320 }, { "epoch": 0.6834987571485686, "grad_norm": 1.5788766991530363, "learning_rate": 2.404934311419674e-07, "loss": 0.245, "step": 39321 }, { "epoch": 0.6835161396860714, "grad_norm": 1.4422706840891586, "learning_rate": 2.4046937044988093e-07, "loss": 0.2233, "step": 39322 }, { "epoch": 0.6835335222235742, "grad_norm": 1.7856169205497494, "learning_rate": 2.4044531058038766e-07, "loss": 0.274, "step": 39323 }, { "epoch": 0.683550904761077, "grad_norm": 2.41190134181779, "learning_rate": 2.404212515335633e-07, "loss": 0.1742, "step": 39324 }, { "epoch": 0.6835682872985799, "grad_norm": 2.2088447630139654, "learning_rate": 2.403971933094842e-07, "loss": 0.2518, "step": 39325 }, { "epoch": 0.6835856698360827, "grad_norm": 1.7114159261732511, "learning_rate": 2.403731359082269e-07, "loss": 0.1837, "step": 39326 }, { "epoch": 0.6836030523735855, "grad_norm": 0.7767689876812235, "learning_rate": 2.4034907932986745e-07, "loss": 0.217, "step": 39327 }, { "epoch": 0.6836204349110884, "grad_norm": 2.532966242094249, "learning_rate": 2.4032502357448224e-07, "loss": 0.2341, "step": 39328 }, { "epoch": 0.6836378174485912, "grad_norm": 1.444338614129219, "learning_rate": 2.403009686421474e-07, "loss": 0.3272, "step": 39329 }, { "epoch": 0.6836551999860939, "grad_norm": 1.6022940999293636, "learning_rate": 2.402769145329391e-07, "loss": 0.1707, "step": 39330 }, { "epoch": 0.6836725825235968, "grad_norm": 1.3912608137321973, "learning_rate": 2.4025286124693377e-07, "loss": 0.2228, "step": 39331 }, { "epoch": 0.6836899650610996, "grad_norm": 1.109512163093506, "learning_rate": 2.402288087842076e-07, "loss": 0.1179, "step": 39332 }, { "epoch": 0.6837073475986024, "grad_norm": 5.108862237668695, "learning_rate": 2.4020475714483643e-07, "loss": 0.2899, "step": 39333 }, { "epoch": 0.6837247301361052, "grad_norm": 1.4656869704869422, "learning_rate": 2.401807063288972e-07, "loss": 0.0981, "step": 39334 }, { "epoch": 0.6837421126736081, "grad_norm": 1.7924042682688281, "learning_rate": 2.401566563364659e-07, "loss": 0.1658, "step": 39335 }, { "epoch": 0.6837594952111109, "grad_norm": 1.4337499483232419, "learning_rate": 2.401326071676184e-07, "loss": 0.1154, "step": 39336 }, { "epoch": 0.6837768777486137, "grad_norm": 2.965144069478945, "learning_rate": 2.40108558822431e-07, "loss": 0.2883, "step": 39337 }, { "epoch": 0.6837942602861166, "grad_norm": 1.5718045856724832, "learning_rate": 2.4008451130098026e-07, "loss": 0.2207, "step": 39338 }, { "epoch": 0.6838116428236194, "grad_norm": 1.5596317424645063, "learning_rate": 2.400604646033423e-07, "loss": 0.2053, "step": 39339 }, { "epoch": 0.6838290253611222, "grad_norm": 1.9554037371810098, "learning_rate": 2.400364187295932e-07, "loss": 0.2105, "step": 39340 }, { "epoch": 0.683846407898625, "grad_norm": 2.4795428179567702, "learning_rate": 2.40012373679809e-07, "loss": 0.1935, "step": 39341 }, { "epoch": 0.6838637904361279, "grad_norm": 1.4797774713897094, "learning_rate": 2.399883294540666e-07, "loss": 0.2406, "step": 39342 }, { "epoch": 0.6838811729736307, "grad_norm": 2.4032080682855295, "learning_rate": 2.399642860524415e-07, "loss": 0.2019, "step": 39343 }, { "epoch": 0.6838985555111335, "grad_norm": 1.0507391748322534, "learning_rate": 2.3994024347501025e-07, "loss": 0.2144, "step": 39344 }, { "epoch": 0.6839159380486364, "grad_norm": 1.2975120354577347, "learning_rate": 2.399162017218487e-07, "loss": 0.1743, "step": 39345 }, { "epoch": 0.6839333205861392, "grad_norm": 2.2927951687729773, "learning_rate": 2.3989216079303355e-07, "loss": 0.199, "step": 39346 }, { "epoch": 0.683950703123642, "grad_norm": 1.0817727810985842, "learning_rate": 2.3986812068864075e-07, "loss": 0.1175, "step": 39347 }, { "epoch": 0.6839680856611449, "grad_norm": 1.053532943766277, "learning_rate": 2.3984408140874654e-07, "loss": 0.1669, "step": 39348 }, { "epoch": 0.6839854681986476, "grad_norm": 2.239602858523802, "learning_rate": 2.3982004295342704e-07, "loss": 0.2164, "step": 39349 }, { "epoch": 0.6840028507361504, "grad_norm": 1.1046797188295365, "learning_rate": 2.397960053227585e-07, "loss": 0.1969, "step": 39350 }, { "epoch": 0.6840202332736532, "grad_norm": 1.208478474739836, "learning_rate": 2.3977196851681713e-07, "loss": 0.2124, "step": 39351 }, { "epoch": 0.6840376158111561, "grad_norm": 3.0134826418365677, "learning_rate": 2.39747932535679e-07, "loss": 0.2497, "step": 39352 }, { "epoch": 0.6840549983486589, "grad_norm": 2.5056789096181857, "learning_rate": 2.397238973794203e-07, "loss": 0.3852, "step": 39353 }, { "epoch": 0.6840723808861617, "grad_norm": 0.7148651196611274, "learning_rate": 2.396998630481176e-07, "loss": 0.1197, "step": 39354 }, { "epoch": 0.6840897634236646, "grad_norm": 2.086708053537429, "learning_rate": 2.3967582954184655e-07, "loss": 0.2906, "step": 39355 }, { "epoch": 0.6841071459611674, "grad_norm": 1.5967677086185723, "learning_rate": 2.396517968606834e-07, "loss": 0.2173, "step": 39356 }, { "epoch": 0.6841245284986702, "grad_norm": 1.7974338035127682, "learning_rate": 2.396277650047046e-07, "loss": 0.1776, "step": 39357 }, { "epoch": 0.6841419110361731, "grad_norm": 1.857300657322673, "learning_rate": 2.3960373397398624e-07, "loss": 0.2719, "step": 39358 }, { "epoch": 0.6841592935736759, "grad_norm": 1.9634628525954394, "learning_rate": 2.395797037686044e-07, "loss": 0.1445, "step": 39359 }, { "epoch": 0.6841766761111787, "grad_norm": 2.5041152387403383, "learning_rate": 2.3955567438863527e-07, "loss": 0.1893, "step": 39360 }, { "epoch": 0.6841940586486815, "grad_norm": 1.416871315433655, "learning_rate": 2.39531645834155e-07, "loss": 0.2733, "step": 39361 }, { "epoch": 0.6842114411861844, "grad_norm": 1.7879731481311303, "learning_rate": 2.3950761810523976e-07, "loss": 0.3123, "step": 39362 }, { "epoch": 0.6842288237236872, "grad_norm": 0.968212917127902, "learning_rate": 2.394835912019657e-07, "loss": 0.2996, "step": 39363 }, { "epoch": 0.68424620626119, "grad_norm": 1.417833564797632, "learning_rate": 2.394595651244088e-07, "loss": 0.1968, "step": 39364 }, { "epoch": 0.6842635887986929, "grad_norm": 1.8466638097978005, "learning_rate": 2.3943553987264563e-07, "loss": 0.1627, "step": 39365 }, { "epoch": 0.6842809713361957, "grad_norm": 1.3829187710769053, "learning_rate": 2.394115154467521e-07, "loss": 0.2303, "step": 39366 }, { "epoch": 0.6842983538736985, "grad_norm": 1.705945243018162, "learning_rate": 2.393874918468045e-07, "loss": 0.2358, "step": 39367 }, { "epoch": 0.6843157364112014, "grad_norm": 1.2940438195648656, "learning_rate": 2.393634690728785e-07, "loss": 0.1899, "step": 39368 }, { "epoch": 0.6843331189487041, "grad_norm": 1.8909735403946082, "learning_rate": 2.3933944712505073e-07, "loss": 0.1615, "step": 39369 }, { "epoch": 0.6843505014862069, "grad_norm": 1.5437649863467975, "learning_rate": 2.393154260033972e-07, "loss": 0.2, "step": 39370 }, { "epoch": 0.6843678840237097, "grad_norm": 1.9227578307733983, "learning_rate": 2.39291405707994e-07, "loss": 0.1758, "step": 39371 }, { "epoch": 0.6843852665612126, "grad_norm": 1.8123792929258289, "learning_rate": 2.392673862389171e-07, "loss": 0.1304, "step": 39372 }, { "epoch": 0.6844026490987154, "grad_norm": 1.2680450602720794, "learning_rate": 2.392433675962433e-07, "loss": 0.2044, "step": 39373 }, { "epoch": 0.6844200316362182, "grad_norm": 3.117863415095539, "learning_rate": 2.3921934978004785e-07, "loss": 0.2286, "step": 39374 }, { "epoch": 0.6844374141737211, "grad_norm": 1.25827097356454, "learning_rate": 2.3919533279040745e-07, "loss": 0.1701, "step": 39375 }, { "epoch": 0.6844547967112239, "grad_norm": 1.6535469191816268, "learning_rate": 2.391713166273978e-07, "loss": 0.1439, "step": 39376 }, { "epoch": 0.6844721792487267, "grad_norm": 1.2636590331797009, "learning_rate": 2.391473012910954e-07, "loss": 0.1626, "step": 39377 }, { "epoch": 0.6844895617862296, "grad_norm": 1.2343628315415547, "learning_rate": 2.3912328678157625e-07, "loss": 0.2203, "step": 39378 }, { "epoch": 0.6845069443237324, "grad_norm": 1.0816602021421227, "learning_rate": 2.3909927309891643e-07, "loss": 0.1449, "step": 39379 }, { "epoch": 0.6845243268612352, "grad_norm": 2.072970788582876, "learning_rate": 2.3907526024319206e-07, "loss": 0.2703, "step": 39380 }, { "epoch": 0.684541709398738, "grad_norm": 1.5178627980514143, "learning_rate": 2.390512482144793e-07, "loss": 0.214, "step": 39381 }, { "epoch": 0.6845590919362409, "grad_norm": 1.118013859543209, "learning_rate": 2.3902723701285423e-07, "loss": 0.3365, "step": 39382 }, { "epoch": 0.6845764744737437, "grad_norm": 2.0424079721083466, "learning_rate": 2.3900322663839265e-07, "loss": 0.3467, "step": 39383 }, { "epoch": 0.6845938570112465, "grad_norm": 1.1789770610972572, "learning_rate": 2.3897921709117124e-07, "loss": 0.1683, "step": 39384 }, { "epoch": 0.6846112395487494, "grad_norm": 3.8696809748604344, "learning_rate": 2.3895520837126576e-07, "loss": 0.2872, "step": 39385 }, { "epoch": 0.6846286220862522, "grad_norm": 2.1214697686641433, "learning_rate": 2.389312004787526e-07, "loss": 0.1603, "step": 39386 }, { "epoch": 0.684646004623755, "grad_norm": 1.4597957724521708, "learning_rate": 2.3890719341370714e-07, "loss": 0.1377, "step": 39387 }, { "epoch": 0.6846633871612579, "grad_norm": 1.1634815360369783, "learning_rate": 2.388831871762062e-07, "loss": 0.1646, "step": 39388 }, { "epoch": 0.6846807696987606, "grad_norm": 3.9213528471207044, "learning_rate": 2.388591817663256e-07, "loss": 0.2982, "step": 39389 }, { "epoch": 0.6846981522362634, "grad_norm": 2.534237695115458, "learning_rate": 2.3883517718414143e-07, "loss": 0.7311, "step": 39390 }, { "epoch": 0.6847155347737662, "grad_norm": 1.5310199691225213, "learning_rate": 2.3881117342972954e-07, "loss": 0.1822, "step": 39391 }, { "epoch": 0.6847329173112691, "grad_norm": 1.7999419350859849, "learning_rate": 2.387871705031667e-07, "loss": 0.3343, "step": 39392 }, { "epoch": 0.6847502998487719, "grad_norm": 2.052923560715884, "learning_rate": 2.3876316840452833e-07, "loss": 0.1956, "step": 39393 }, { "epoch": 0.6847676823862747, "grad_norm": 1.4562822809712666, "learning_rate": 2.387391671338907e-07, "loss": 0.1311, "step": 39394 }, { "epoch": 0.6847850649237776, "grad_norm": 1.3579505902936297, "learning_rate": 2.387151666913297e-07, "loss": 0.1997, "step": 39395 }, { "epoch": 0.6848024474612804, "grad_norm": 2.1324947338859, "learning_rate": 2.386911670769218e-07, "loss": 0.2285, "step": 39396 }, { "epoch": 0.6848198299987832, "grad_norm": 1.046681677612237, "learning_rate": 2.3866716829074284e-07, "loss": 0.2671, "step": 39397 }, { "epoch": 0.684837212536286, "grad_norm": 1.7037318887949722, "learning_rate": 2.3864317033286895e-07, "loss": 0.2738, "step": 39398 }, { "epoch": 0.6848545950737889, "grad_norm": 1.202816237302791, "learning_rate": 2.386191732033761e-07, "loss": 0.2155, "step": 39399 }, { "epoch": 0.6848719776112917, "grad_norm": 0.896606240166287, "learning_rate": 2.3859517690234044e-07, "loss": 0.185, "step": 39400 }, { "epoch": 0.6848893601487945, "grad_norm": 1.8501527072084891, "learning_rate": 2.3857118142983803e-07, "loss": 0.1977, "step": 39401 }, { "epoch": 0.6849067426862974, "grad_norm": 1.7832302529202169, "learning_rate": 2.385471867859448e-07, "loss": 0.2766, "step": 39402 }, { "epoch": 0.6849241252238002, "grad_norm": 0.8487868632659975, "learning_rate": 2.3852319297073674e-07, "loss": 0.155, "step": 39403 }, { "epoch": 0.684941507761303, "grad_norm": 1.1300205505398293, "learning_rate": 2.384991999842902e-07, "loss": 0.2564, "step": 39404 }, { "epoch": 0.6849588902988059, "grad_norm": 1.4412892008661096, "learning_rate": 2.3847520782668137e-07, "loss": 0.2107, "step": 39405 }, { "epoch": 0.6849762728363087, "grad_norm": 1.3346648039286728, "learning_rate": 2.384512164979855e-07, "loss": 0.1619, "step": 39406 }, { "epoch": 0.6849936553738115, "grad_norm": 1.4419526409768422, "learning_rate": 2.3842722599827935e-07, "loss": 0.1936, "step": 39407 }, { "epoch": 0.6850110379113143, "grad_norm": 5.471347379382697, "learning_rate": 2.3840323632763875e-07, "loss": 0.486, "step": 39408 }, { "epoch": 0.6850284204488171, "grad_norm": 1.9770061628223226, "learning_rate": 2.3837924748613975e-07, "loss": 0.1528, "step": 39409 }, { "epoch": 0.6850458029863199, "grad_norm": 1.1916663098502123, "learning_rate": 2.3835525947385832e-07, "loss": 0.1943, "step": 39410 }, { "epoch": 0.6850631855238227, "grad_norm": 1.1334990172337378, "learning_rate": 2.3833127229087053e-07, "loss": 0.2138, "step": 39411 }, { "epoch": 0.6850805680613256, "grad_norm": 1.2378256353654218, "learning_rate": 2.383072859372524e-07, "loss": 0.3367, "step": 39412 }, { "epoch": 0.6850979505988284, "grad_norm": 1.790379482965028, "learning_rate": 2.3828330041308e-07, "loss": 0.2418, "step": 39413 }, { "epoch": 0.6851153331363312, "grad_norm": 1.4343187564418558, "learning_rate": 2.3825931571842907e-07, "loss": 0.1535, "step": 39414 }, { "epoch": 0.685132715673834, "grad_norm": 2.9710767341149373, "learning_rate": 2.3823533185337608e-07, "loss": 0.1892, "step": 39415 }, { "epoch": 0.6851500982113369, "grad_norm": 1.311882075365056, "learning_rate": 2.3821134881799688e-07, "loss": 0.2044, "step": 39416 }, { "epoch": 0.6851674807488397, "grad_norm": 1.3345895608343525, "learning_rate": 2.381873666123676e-07, "loss": 0.1421, "step": 39417 }, { "epoch": 0.6851848632863425, "grad_norm": 1.8971854112601823, "learning_rate": 2.3816338523656376e-07, "loss": 0.2075, "step": 39418 }, { "epoch": 0.6852022458238454, "grad_norm": 1.2632494108789525, "learning_rate": 2.3813940469066185e-07, "loss": 0.2131, "step": 39419 }, { "epoch": 0.6852196283613482, "grad_norm": 1.6776130413220667, "learning_rate": 2.3811542497473774e-07, "loss": 0.1898, "step": 39420 }, { "epoch": 0.685237010898851, "grad_norm": 1.1954519772817997, "learning_rate": 2.380914460888675e-07, "loss": 0.1617, "step": 39421 }, { "epoch": 0.6852543934363539, "grad_norm": 2.2600679037321783, "learning_rate": 2.3806746803312684e-07, "loss": 0.1652, "step": 39422 }, { "epoch": 0.6852717759738567, "grad_norm": 3.335722846475871, "learning_rate": 2.3804349080759212e-07, "loss": 0.2653, "step": 39423 }, { "epoch": 0.6852891585113595, "grad_norm": 2.6392120337683362, "learning_rate": 2.3801951441233947e-07, "loss": 0.4523, "step": 39424 }, { "epoch": 0.6853065410488623, "grad_norm": 1.7809253456301126, "learning_rate": 2.379955388474444e-07, "loss": 0.2133, "step": 39425 }, { "epoch": 0.6853239235863652, "grad_norm": 1.809547622208067, "learning_rate": 2.379715641129829e-07, "loss": 0.195, "step": 39426 }, { "epoch": 0.685341306123868, "grad_norm": 1.3275861852723514, "learning_rate": 2.3794759020903138e-07, "loss": 0.1655, "step": 39427 }, { "epoch": 0.6853586886613708, "grad_norm": 1.2622746801540081, "learning_rate": 2.379236171356656e-07, "loss": 0.1407, "step": 39428 }, { "epoch": 0.6853760711988736, "grad_norm": 1.4228118293580312, "learning_rate": 2.378996448929616e-07, "loss": 0.1713, "step": 39429 }, { "epoch": 0.6853934537363764, "grad_norm": 1.1271435625070045, "learning_rate": 2.3787567348099535e-07, "loss": 0.2354, "step": 39430 }, { "epoch": 0.6854108362738792, "grad_norm": 1.2822128299096751, "learning_rate": 2.3785170289984273e-07, "loss": 0.1916, "step": 39431 }, { "epoch": 0.685428218811382, "grad_norm": 2.5366496255207447, "learning_rate": 2.3782773314957987e-07, "loss": 0.2984, "step": 39432 }, { "epoch": 0.6854456013488849, "grad_norm": 1.650343876051402, "learning_rate": 2.3780376423028266e-07, "loss": 0.1942, "step": 39433 }, { "epoch": 0.6854629838863877, "grad_norm": 1.4205349196573753, "learning_rate": 2.377797961420268e-07, "loss": 0.1622, "step": 39434 }, { "epoch": 0.6854803664238905, "grad_norm": 1.688307460939499, "learning_rate": 2.3775582888488882e-07, "loss": 0.2599, "step": 39435 }, { "epoch": 0.6854977489613934, "grad_norm": 1.8046564143620316, "learning_rate": 2.377318624589445e-07, "loss": 0.1075, "step": 39436 }, { "epoch": 0.6855151314988962, "grad_norm": 0.8817207394933629, "learning_rate": 2.377078968642694e-07, "loss": 0.1512, "step": 39437 }, { "epoch": 0.685532514036399, "grad_norm": 3.7064673849972514, "learning_rate": 2.376839321009399e-07, "loss": 0.2744, "step": 39438 }, { "epoch": 0.6855498965739019, "grad_norm": 3.564463692275026, "learning_rate": 2.376599681690319e-07, "loss": 0.2965, "step": 39439 }, { "epoch": 0.6855672791114047, "grad_norm": 1.1997934824248875, "learning_rate": 2.3763600506862125e-07, "loss": 0.2131, "step": 39440 }, { "epoch": 0.6855846616489075, "grad_norm": 1.8743767863710592, "learning_rate": 2.3761204279978398e-07, "loss": 0.2429, "step": 39441 }, { "epoch": 0.6856020441864104, "grad_norm": 1.1466530550331246, "learning_rate": 2.3758808136259577e-07, "loss": 0.186, "step": 39442 }, { "epoch": 0.6856194267239132, "grad_norm": 1.2467606839916117, "learning_rate": 2.3756412075713317e-07, "loss": 0.1316, "step": 39443 }, { "epoch": 0.685636809261416, "grad_norm": 1.2744383165608546, "learning_rate": 2.3754016098347163e-07, "loss": 0.1787, "step": 39444 }, { "epoch": 0.6856541917989188, "grad_norm": 1.1619808100053908, "learning_rate": 2.3751620204168698e-07, "loss": 0.1812, "step": 39445 }, { "epoch": 0.6856715743364217, "grad_norm": 1.2460640690975795, "learning_rate": 2.3749224393185557e-07, "loss": 0.2503, "step": 39446 }, { "epoch": 0.6856889568739245, "grad_norm": 2.278685966593728, "learning_rate": 2.3746828665405322e-07, "loss": 0.3591, "step": 39447 }, { "epoch": 0.6857063394114273, "grad_norm": 1.5914983062521633, "learning_rate": 2.3744433020835576e-07, "loss": 0.2629, "step": 39448 }, { "epoch": 0.68572372194893, "grad_norm": 1.2747214469206927, "learning_rate": 2.3742037459483916e-07, "loss": 0.1564, "step": 39449 }, { "epoch": 0.6857411044864329, "grad_norm": 1.706426724098527, "learning_rate": 2.3739641981357933e-07, "loss": 0.2159, "step": 39450 }, { "epoch": 0.6857584870239357, "grad_norm": 1.3258076266679004, "learning_rate": 2.3737246586465225e-07, "loss": 0.2068, "step": 39451 }, { "epoch": 0.6857758695614385, "grad_norm": 3.018688255025886, "learning_rate": 2.3734851274813382e-07, "loss": 0.2522, "step": 39452 }, { "epoch": 0.6857932520989414, "grad_norm": 1.4317007478186343, "learning_rate": 2.3732456046409976e-07, "loss": 0.1816, "step": 39453 }, { "epoch": 0.6858106346364442, "grad_norm": 1.3653772655683674, "learning_rate": 2.3730060901262638e-07, "loss": 0.1718, "step": 39454 }, { "epoch": 0.685828017173947, "grad_norm": 2.7757699342789994, "learning_rate": 2.3727665839378958e-07, "loss": 0.2884, "step": 39455 }, { "epoch": 0.6858453997114499, "grad_norm": 1.5795906721388462, "learning_rate": 2.372527086076649e-07, "loss": 0.1834, "step": 39456 }, { "epoch": 0.6858627822489527, "grad_norm": 1.2338024528757094, "learning_rate": 2.3722875965432825e-07, "loss": 0.231, "step": 39457 }, { "epoch": 0.6858801647864555, "grad_norm": 1.3103675334318852, "learning_rate": 2.372048115338559e-07, "loss": 0.1572, "step": 39458 }, { "epoch": 0.6858975473239584, "grad_norm": 0.9983142791792337, "learning_rate": 2.3718086424632355e-07, "loss": 0.2668, "step": 39459 }, { "epoch": 0.6859149298614612, "grad_norm": 1.2018752211375336, "learning_rate": 2.371569177918072e-07, "loss": 0.1766, "step": 39460 }, { "epoch": 0.685932312398964, "grad_norm": 1.3061887773857248, "learning_rate": 2.371329721703824e-07, "loss": 0.257, "step": 39461 }, { "epoch": 0.6859496949364668, "grad_norm": 1.558704908203015, "learning_rate": 2.3710902738212579e-07, "loss": 0.2004, "step": 39462 }, { "epoch": 0.6859670774739697, "grad_norm": 0.9378143825915243, "learning_rate": 2.370850834271126e-07, "loss": 0.2549, "step": 39463 }, { "epoch": 0.6859844600114725, "grad_norm": 0.9982437951036536, "learning_rate": 2.3706114030541892e-07, "loss": 0.236, "step": 39464 }, { "epoch": 0.6860018425489753, "grad_norm": 1.884381707481798, "learning_rate": 2.370371980171204e-07, "loss": 0.2136, "step": 39465 }, { "epoch": 0.6860192250864782, "grad_norm": 1.4882682132156204, "learning_rate": 2.3701325656229338e-07, "loss": 0.1976, "step": 39466 }, { "epoch": 0.686036607623981, "grad_norm": 2.2004785505170354, "learning_rate": 2.3698931594101357e-07, "loss": 0.3198, "step": 39467 }, { "epoch": 0.6860539901614838, "grad_norm": 0.839607933353396, "learning_rate": 2.3696537615335676e-07, "loss": 0.189, "step": 39468 }, { "epoch": 0.6860713726989865, "grad_norm": 1.360249562415126, "learning_rate": 2.3694143719939892e-07, "loss": 0.1864, "step": 39469 }, { "epoch": 0.6860887552364894, "grad_norm": 1.6386452935831646, "learning_rate": 2.3691749907921588e-07, "loss": 0.1983, "step": 39470 }, { "epoch": 0.6861061377739922, "grad_norm": 2.0480368806083047, "learning_rate": 2.368935617928835e-07, "loss": 0.1889, "step": 39471 }, { "epoch": 0.686123520311495, "grad_norm": 1.6205435427060615, "learning_rate": 2.3686962534047767e-07, "loss": 0.1417, "step": 39472 }, { "epoch": 0.6861409028489979, "grad_norm": 1.3623940502319873, "learning_rate": 2.3684568972207408e-07, "loss": 0.1634, "step": 39473 }, { "epoch": 0.6861582853865007, "grad_norm": 2.6304210267754073, "learning_rate": 2.3682175493774915e-07, "loss": 0.3146, "step": 39474 }, { "epoch": 0.6861756679240035, "grad_norm": 1.7752062070970849, "learning_rate": 2.3679782098757812e-07, "loss": 0.2267, "step": 39475 }, { "epoch": 0.6861930504615064, "grad_norm": 1.745444495836656, "learning_rate": 2.3677388787163694e-07, "loss": 0.2096, "step": 39476 }, { "epoch": 0.6862104329990092, "grad_norm": 1.413266592468245, "learning_rate": 2.3674995559000183e-07, "loss": 0.1612, "step": 39477 }, { "epoch": 0.686227815536512, "grad_norm": 1.2220923976952949, "learning_rate": 2.3672602414274846e-07, "loss": 0.1817, "step": 39478 }, { "epoch": 0.6862451980740149, "grad_norm": 1.179388237725826, "learning_rate": 2.3670209352995259e-07, "loss": 0.2871, "step": 39479 }, { "epoch": 0.6862625806115177, "grad_norm": 1.3583862321883666, "learning_rate": 2.3667816375168997e-07, "loss": 0.1698, "step": 39480 }, { "epoch": 0.6862799631490205, "grad_norm": 1.0197322004343694, "learning_rate": 2.3665423480803698e-07, "loss": 0.1712, "step": 39481 }, { "epoch": 0.6862973456865233, "grad_norm": 1.2953344236561686, "learning_rate": 2.366303066990689e-07, "loss": 0.1696, "step": 39482 }, { "epoch": 0.6863147282240262, "grad_norm": 1.4459096512949943, "learning_rate": 2.3660637942486177e-07, "loss": 0.1493, "step": 39483 }, { "epoch": 0.686332110761529, "grad_norm": 1.3526044175236178, "learning_rate": 2.3658245298549122e-07, "loss": 0.1876, "step": 39484 }, { "epoch": 0.6863494932990318, "grad_norm": 0.9685152094863352, "learning_rate": 2.3655852738103354e-07, "loss": 0.1808, "step": 39485 }, { "epoch": 0.6863668758365347, "grad_norm": 1.6307749102365368, "learning_rate": 2.3653460261156428e-07, "loss": 0.177, "step": 39486 }, { "epoch": 0.6863842583740375, "grad_norm": 2.109153684865011, "learning_rate": 2.365106786771593e-07, "loss": 0.2523, "step": 39487 }, { "epoch": 0.6864016409115402, "grad_norm": 1.5107864954460661, "learning_rate": 2.3648675557789443e-07, "loss": 0.1614, "step": 39488 }, { "epoch": 0.686419023449043, "grad_norm": 1.5059736520846811, "learning_rate": 2.3646283331384547e-07, "loss": 0.2055, "step": 39489 }, { "epoch": 0.6864364059865459, "grad_norm": 1.4886449934092523, "learning_rate": 2.364389118850883e-07, "loss": 0.1278, "step": 39490 }, { "epoch": 0.6864537885240487, "grad_norm": 1.1721775996635635, "learning_rate": 2.3641499129169873e-07, "loss": 0.2005, "step": 39491 }, { "epoch": 0.6864711710615515, "grad_norm": 1.3512358187926878, "learning_rate": 2.3639107153375231e-07, "loss": 0.1914, "step": 39492 }, { "epoch": 0.6864885535990544, "grad_norm": 1.4097998696812182, "learning_rate": 2.363671526113255e-07, "loss": 0.1623, "step": 39493 }, { "epoch": 0.6865059361365572, "grad_norm": 1.4700517791044154, "learning_rate": 2.3634323452449356e-07, "loss": 0.1655, "step": 39494 }, { "epoch": 0.68652331867406, "grad_norm": 1.9831195494774712, "learning_rate": 2.3631931727333226e-07, "loss": 0.1624, "step": 39495 }, { "epoch": 0.6865407012115629, "grad_norm": 1.0896669982410634, "learning_rate": 2.3629540085791778e-07, "loss": 0.1594, "step": 39496 }, { "epoch": 0.6865580837490657, "grad_norm": 0.715373026262517, "learning_rate": 2.3627148527832568e-07, "loss": 0.1586, "step": 39497 }, { "epoch": 0.6865754662865685, "grad_norm": 1.5351563975333224, "learning_rate": 2.362475705346319e-07, "loss": 0.1815, "step": 39498 }, { "epoch": 0.6865928488240713, "grad_norm": 1.2527520226135735, "learning_rate": 2.3622365662691212e-07, "loss": 0.1506, "step": 39499 }, { "epoch": 0.6866102313615742, "grad_norm": 1.576928319045516, "learning_rate": 2.361997435552422e-07, "loss": 0.1825, "step": 39500 }, { "epoch": 0.686627613899077, "grad_norm": 0.9910813628333164, "learning_rate": 2.3617583131969793e-07, "loss": 0.196, "step": 39501 }, { "epoch": 0.6866449964365798, "grad_norm": 1.595365850712975, "learning_rate": 2.3615191992035504e-07, "loss": 0.2783, "step": 39502 }, { "epoch": 0.6866623789740827, "grad_norm": 1.7019527512151715, "learning_rate": 2.361280093572892e-07, "loss": 0.2325, "step": 39503 }, { "epoch": 0.6866797615115855, "grad_norm": 1.7278986306385853, "learning_rate": 2.3610409963057659e-07, "loss": 0.1866, "step": 39504 }, { "epoch": 0.6866971440490883, "grad_norm": 1.7775972732677476, "learning_rate": 2.3608019074029267e-07, "loss": 0.1666, "step": 39505 }, { "epoch": 0.6867145265865912, "grad_norm": 1.5407083797730186, "learning_rate": 2.3605628268651356e-07, "loss": 0.1535, "step": 39506 }, { "epoch": 0.686731909124094, "grad_norm": 1.7903950183711788, "learning_rate": 2.3603237546931442e-07, "loss": 0.2429, "step": 39507 }, { "epoch": 0.6867492916615967, "grad_norm": 1.2316896429601882, "learning_rate": 2.3600846908877153e-07, "loss": 0.1648, "step": 39508 }, { "epoch": 0.6867666741990995, "grad_norm": 2.5821679000552584, "learning_rate": 2.3598456354496055e-07, "loss": 0.3583, "step": 39509 }, { "epoch": 0.6867840567366024, "grad_norm": 0.9840383884744517, "learning_rate": 2.3596065883795724e-07, "loss": 0.3003, "step": 39510 }, { "epoch": 0.6868014392741052, "grad_norm": 1.1343258277333659, "learning_rate": 2.3593675496783717e-07, "loss": 0.158, "step": 39511 }, { "epoch": 0.686818821811608, "grad_norm": 2.1658375877616733, "learning_rate": 2.3591285193467663e-07, "loss": 0.348, "step": 39512 }, { "epoch": 0.6868362043491109, "grad_norm": 1.7730526793939758, "learning_rate": 2.3588894973855085e-07, "loss": 0.2313, "step": 39513 }, { "epoch": 0.6868535868866137, "grad_norm": 1.4087066927419722, "learning_rate": 2.3586504837953581e-07, "loss": 0.1652, "step": 39514 }, { "epoch": 0.6868709694241165, "grad_norm": 0.9257063900300331, "learning_rate": 2.3584114785770702e-07, "loss": 0.2576, "step": 39515 }, { "epoch": 0.6868883519616193, "grad_norm": 1.071096736706233, "learning_rate": 2.3581724817314063e-07, "loss": 0.242, "step": 39516 }, { "epoch": 0.6869057344991222, "grad_norm": 1.50221007491995, "learning_rate": 2.3579334932591223e-07, "loss": 0.1644, "step": 39517 }, { "epoch": 0.686923117036625, "grad_norm": 1.5200917843652808, "learning_rate": 2.3576945131609755e-07, "loss": 0.3807, "step": 39518 }, { "epoch": 0.6869404995741278, "grad_norm": 1.8412779633125018, "learning_rate": 2.357455541437723e-07, "loss": 0.2264, "step": 39519 }, { "epoch": 0.6869578821116307, "grad_norm": 1.2900946220025395, "learning_rate": 2.3572165780901226e-07, "loss": 0.2013, "step": 39520 }, { "epoch": 0.6869752646491335, "grad_norm": 1.4483230026233183, "learning_rate": 2.3569776231189316e-07, "loss": 0.2317, "step": 39521 }, { "epoch": 0.6869926471866363, "grad_norm": 2.3101221455647933, "learning_rate": 2.3567386765249075e-07, "loss": 0.2801, "step": 39522 }, { "epoch": 0.6870100297241392, "grad_norm": 1.2861377965680776, "learning_rate": 2.356499738308806e-07, "loss": 0.1705, "step": 39523 }, { "epoch": 0.687027412261642, "grad_norm": 1.7166998659214103, "learning_rate": 2.3562608084713874e-07, "loss": 0.1176, "step": 39524 }, { "epoch": 0.6870447947991448, "grad_norm": 1.5558674119899785, "learning_rate": 2.3560218870134097e-07, "loss": 0.183, "step": 39525 }, { "epoch": 0.6870621773366477, "grad_norm": 1.6260598586018427, "learning_rate": 2.3557829739356244e-07, "loss": 0.2401, "step": 39526 }, { "epoch": 0.6870795598741505, "grad_norm": 1.7172736766861683, "learning_rate": 2.3555440692387947e-07, "loss": 0.2531, "step": 39527 }, { "epoch": 0.6870969424116532, "grad_norm": 2.1487582422587264, "learning_rate": 2.355305172923675e-07, "loss": 0.2359, "step": 39528 }, { "epoch": 0.687114324949156, "grad_norm": 1.3876510122802055, "learning_rate": 2.355066284991023e-07, "loss": 0.1699, "step": 39529 }, { "epoch": 0.6871317074866589, "grad_norm": 1.4505879944943036, "learning_rate": 2.3548274054415956e-07, "loss": 0.2506, "step": 39530 }, { "epoch": 0.6871490900241617, "grad_norm": 1.6639173216275172, "learning_rate": 2.3545885342761512e-07, "loss": 0.2317, "step": 39531 }, { "epoch": 0.6871664725616645, "grad_norm": 3.1020576900641093, "learning_rate": 2.3543496714954452e-07, "loss": 0.3211, "step": 39532 }, { "epoch": 0.6871838550991674, "grad_norm": 1.3325066847212916, "learning_rate": 2.3541108171002356e-07, "loss": 0.2285, "step": 39533 }, { "epoch": 0.6872012376366702, "grad_norm": 1.0385250319298698, "learning_rate": 2.3538719710912774e-07, "loss": 0.1419, "step": 39534 }, { "epoch": 0.687218620174173, "grad_norm": 1.418385877163983, "learning_rate": 2.3536331334693317e-07, "loss": 0.2254, "step": 39535 }, { "epoch": 0.6872360027116758, "grad_norm": 2.191857636738004, "learning_rate": 2.353394304235153e-07, "loss": 0.2317, "step": 39536 }, { "epoch": 0.6872533852491787, "grad_norm": 1.3548963891000323, "learning_rate": 2.3531554833895002e-07, "loss": 0.252, "step": 39537 }, { "epoch": 0.6872707677866815, "grad_norm": 0.8490239772757059, "learning_rate": 2.3529166709331256e-07, "loss": 0.1017, "step": 39538 }, { "epoch": 0.6872881503241843, "grad_norm": 2.004005635285975, "learning_rate": 2.3526778668667903e-07, "loss": 0.1967, "step": 39539 }, { "epoch": 0.6873055328616872, "grad_norm": 1.1198976147286244, "learning_rate": 2.3524390711912506e-07, "loss": 0.2339, "step": 39540 }, { "epoch": 0.68732291539919, "grad_norm": 1.3589813115049154, "learning_rate": 2.3522002839072625e-07, "loss": 0.2473, "step": 39541 }, { "epoch": 0.6873402979366928, "grad_norm": 1.1077560280116543, "learning_rate": 2.3519615050155817e-07, "loss": 0.1597, "step": 39542 }, { "epoch": 0.6873576804741957, "grad_norm": 1.4600997861747074, "learning_rate": 2.3517227345169683e-07, "loss": 0.2011, "step": 39543 }, { "epoch": 0.6873750630116985, "grad_norm": 2.688593775259789, "learning_rate": 2.3514839724121793e-07, "loss": 0.2279, "step": 39544 }, { "epoch": 0.6873924455492013, "grad_norm": 2.5271311482704504, "learning_rate": 2.3512452187019672e-07, "loss": 0.3038, "step": 39545 }, { "epoch": 0.6874098280867041, "grad_norm": 1.4964645046541885, "learning_rate": 2.3510064733870894e-07, "loss": 0.1319, "step": 39546 }, { "epoch": 0.687427210624207, "grad_norm": 1.857326334470754, "learning_rate": 2.350767736468306e-07, "loss": 0.2692, "step": 39547 }, { "epoch": 0.6874445931617097, "grad_norm": 2.0951046385743415, "learning_rate": 2.350529007946372e-07, "loss": 0.2385, "step": 39548 }, { "epoch": 0.6874619756992125, "grad_norm": 3.087751777722157, "learning_rate": 2.3502902878220444e-07, "loss": 0.301, "step": 39549 }, { "epoch": 0.6874793582367154, "grad_norm": 1.0583408967876338, "learning_rate": 2.3500515760960788e-07, "loss": 0.144, "step": 39550 }, { "epoch": 0.6874967407742182, "grad_norm": 1.3509017317051941, "learning_rate": 2.3498128727692322e-07, "loss": 0.2092, "step": 39551 }, { "epoch": 0.687514123311721, "grad_norm": 2.3619999959753417, "learning_rate": 2.3495741778422623e-07, "loss": 0.2431, "step": 39552 }, { "epoch": 0.6875315058492238, "grad_norm": 1.7930129069128762, "learning_rate": 2.349335491315924e-07, "loss": 0.2099, "step": 39553 }, { "epoch": 0.6875488883867267, "grad_norm": 2.29256862559706, "learning_rate": 2.3490968131909733e-07, "loss": 0.2734, "step": 39554 }, { "epoch": 0.6875662709242295, "grad_norm": 1.3308999122091068, "learning_rate": 2.3488581434681692e-07, "loss": 0.2648, "step": 39555 }, { "epoch": 0.6875836534617323, "grad_norm": 1.8068237057026082, "learning_rate": 2.348619482148269e-07, "loss": 0.1862, "step": 39556 }, { "epoch": 0.6876010359992352, "grad_norm": 1.2840283292993944, "learning_rate": 2.3483808292320234e-07, "loss": 0.2652, "step": 39557 }, { "epoch": 0.687618418536738, "grad_norm": 1.311604304834167, "learning_rate": 2.3481421847201948e-07, "loss": 0.2002, "step": 39558 }, { "epoch": 0.6876358010742408, "grad_norm": 2.1261515928549337, "learning_rate": 2.3479035486135367e-07, "loss": 0.1817, "step": 39559 }, { "epoch": 0.6876531836117437, "grad_norm": 1.0760763270030902, "learning_rate": 2.3476649209128062e-07, "loss": 0.1331, "step": 39560 }, { "epoch": 0.6876705661492465, "grad_norm": 1.2636286528049383, "learning_rate": 2.34742630161876e-07, "loss": 0.1408, "step": 39561 }, { "epoch": 0.6876879486867493, "grad_norm": 1.1629550673869573, "learning_rate": 2.347187690732152e-07, "loss": 0.1249, "step": 39562 }, { "epoch": 0.6877053312242521, "grad_norm": 1.3527592790505623, "learning_rate": 2.3469490882537435e-07, "loss": 0.2015, "step": 39563 }, { "epoch": 0.687722713761755, "grad_norm": 1.6689391546155905, "learning_rate": 2.3467104941842863e-07, "loss": 0.1809, "step": 39564 }, { "epoch": 0.6877400962992578, "grad_norm": 1.887767446318492, "learning_rate": 2.346471908524536e-07, "loss": 0.2515, "step": 39565 }, { "epoch": 0.6877574788367606, "grad_norm": 1.300219429015358, "learning_rate": 2.346233331275253e-07, "loss": 0.259, "step": 39566 }, { "epoch": 0.6877748613742635, "grad_norm": 1.1806051685983412, "learning_rate": 2.3459947624371912e-07, "loss": 0.1061, "step": 39567 }, { "epoch": 0.6877922439117662, "grad_norm": 1.0129256225697172, "learning_rate": 2.3457562020111066e-07, "loss": 0.1493, "step": 39568 }, { "epoch": 0.687809626449269, "grad_norm": 2.255622090024869, "learning_rate": 2.3455176499977553e-07, "loss": 0.1989, "step": 39569 }, { "epoch": 0.6878270089867718, "grad_norm": 1.7154999642956648, "learning_rate": 2.3452791063978943e-07, "loss": 0.2131, "step": 39570 }, { "epoch": 0.6878443915242747, "grad_norm": 1.8564536795201714, "learning_rate": 2.345040571212279e-07, "loss": 0.221, "step": 39571 }, { "epoch": 0.6878617740617775, "grad_norm": 1.1532403563882292, "learning_rate": 2.344802044441665e-07, "loss": 0.1314, "step": 39572 }, { "epoch": 0.6878791565992803, "grad_norm": 2.124091534209758, "learning_rate": 2.3445635260868073e-07, "loss": 0.3146, "step": 39573 }, { "epoch": 0.6878965391367832, "grad_norm": 1.3286448073317207, "learning_rate": 2.344325016148465e-07, "loss": 0.1838, "step": 39574 }, { "epoch": 0.687913921674286, "grad_norm": 1.719367933674189, "learning_rate": 2.3440865146273947e-07, "loss": 0.2267, "step": 39575 }, { "epoch": 0.6879313042117888, "grad_norm": 1.2272919654526064, "learning_rate": 2.3438480215243466e-07, "loss": 0.1661, "step": 39576 }, { "epoch": 0.6879486867492917, "grad_norm": 1.2037277583693662, "learning_rate": 2.3436095368400815e-07, "loss": 0.3191, "step": 39577 }, { "epoch": 0.6879660692867945, "grad_norm": 1.7749126188574595, "learning_rate": 2.3433710605753543e-07, "loss": 0.1471, "step": 39578 }, { "epoch": 0.6879834518242973, "grad_norm": 1.269857706576766, "learning_rate": 2.3431325927309203e-07, "loss": 0.2026, "step": 39579 }, { "epoch": 0.6880008343618002, "grad_norm": 0.9245994258914244, "learning_rate": 2.3428941333075353e-07, "loss": 0.112, "step": 39580 }, { "epoch": 0.688018216899303, "grad_norm": 1.3148601045400383, "learning_rate": 2.3426556823059533e-07, "loss": 0.2049, "step": 39581 }, { "epoch": 0.6880355994368058, "grad_norm": 1.001777870784224, "learning_rate": 2.3424172397269366e-07, "loss": 0.1306, "step": 39582 }, { "epoch": 0.6880529819743086, "grad_norm": 1.2292252399564103, "learning_rate": 2.3421788055712343e-07, "loss": 0.1767, "step": 39583 }, { "epoch": 0.6880703645118115, "grad_norm": 1.5290408309637618, "learning_rate": 2.341940379839602e-07, "loss": 0.2674, "step": 39584 }, { "epoch": 0.6880877470493143, "grad_norm": 1.7730324932093733, "learning_rate": 2.3417019625327999e-07, "loss": 0.1509, "step": 39585 }, { "epoch": 0.6881051295868171, "grad_norm": 1.3107214877311, "learning_rate": 2.3414635536515816e-07, "loss": 0.1739, "step": 39586 }, { "epoch": 0.68812251212432, "grad_norm": 1.206330814045505, "learning_rate": 2.341225153196702e-07, "loss": 0.1475, "step": 39587 }, { "epoch": 0.6881398946618227, "grad_norm": 1.1390176415370408, "learning_rate": 2.3409867611689182e-07, "loss": 0.1968, "step": 39588 }, { "epoch": 0.6881572771993255, "grad_norm": 1.2802671062316284, "learning_rate": 2.340748377568984e-07, "loss": 0.1652, "step": 39589 }, { "epoch": 0.6881746597368283, "grad_norm": 1.0057372762421888, "learning_rate": 2.3405100023976566e-07, "loss": 0.2545, "step": 39590 }, { "epoch": 0.6881920422743312, "grad_norm": 1.5246947511461737, "learning_rate": 2.3402716356556905e-07, "loss": 0.2729, "step": 39591 }, { "epoch": 0.688209424811834, "grad_norm": 1.447216641858292, "learning_rate": 2.3400332773438397e-07, "loss": 0.2968, "step": 39592 }, { "epoch": 0.6882268073493368, "grad_norm": 1.620138834372611, "learning_rate": 2.3397949274628636e-07, "loss": 0.1394, "step": 39593 }, { "epoch": 0.6882441898868397, "grad_norm": 1.315399216327286, "learning_rate": 2.339556586013517e-07, "loss": 0.1899, "step": 39594 }, { "epoch": 0.6882615724243425, "grad_norm": 1.5813700247539562, "learning_rate": 2.3393182529965522e-07, "loss": 0.2377, "step": 39595 }, { "epoch": 0.6882789549618453, "grad_norm": 1.8991375012986293, "learning_rate": 2.339079928412725e-07, "loss": 0.2082, "step": 39596 }, { "epoch": 0.6882963374993482, "grad_norm": 1.844902118459117, "learning_rate": 2.3388416122627935e-07, "loss": 0.1703, "step": 39597 }, { "epoch": 0.688313720036851, "grad_norm": 0.8592158443988895, "learning_rate": 2.3386033045475117e-07, "loss": 0.1219, "step": 39598 }, { "epoch": 0.6883311025743538, "grad_norm": 1.4457543031791507, "learning_rate": 2.3383650052676345e-07, "loss": 0.1692, "step": 39599 }, { "epoch": 0.6883484851118566, "grad_norm": 1.3683652229377496, "learning_rate": 2.3381267144239165e-07, "loss": 0.2308, "step": 39600 }, { "epoch": 0.6883658676493595, "grad_norm": 1.1858249635242, "learning_rate": 2.3378884320171173e-07, "loss": 0.1601, "step": 39601 }, { "epoch": 0.6883832501868623, "grad_norm": 2.0751847569825603, "learning_rate": 2.3376501580479867e-07, "loss": 0.197, "step": 39602 }, { "epoch": 0.6884006327243651, "grad_norm": 1.9238434914690723, "learning_rate": 2.3374118925172826e-07, "loss": 0.3081, "step": 39603 }, { "epoch": 0.688418015261868, "grad_norm": 1.02041077775539, "learning_rate": 2.3371736354257576e-07, "loss": 0.2965, "step": 39604 }, { "epoch": 0.6884353977993708, "grad_norm": 1.7982794669655595, "learning_rate": 2.3369353867741713e-07, "loss": 0.2647, "step": 39605 }, { "epoch": 0.6884527803368736, "grad_norm": 2.4839639823445334, "learning_rate": 2.3366971465632763e-07, "loss": 0.1596, "step": 39606 }, { "epoch": 0.6884701628743765, "grad_norm": 1.4910466384533436, "learning_rate": 2.3364589147938274e-07, "loss": 0.1737, "step": 39607 }, { "epoch": 0.6884875454118792, "grad_norm": 1.82410628488164, "learning_rate": 2.336220691466581e-07, "loss": 0.13, "step": 39608 }, { "epoch": 0.688504927949382, "grad_norm": 1.183641221667962, "learning_rate": 2.335982476582291e-07, "loss": 0.181, "step": 39609 }, { "epoch": 0.6885223104868848, "grad_norm": 1.2622632913990126, "learning_rate": 2.3357442701417124e-07, "loss": 0.1706, "step": 39610 }, { "epoch": 0.6885396930243877, "grad_norm": 1.4249572172689988, "learning_rate": 2.335506072145601e-07, "loss": 0.1881, "step": 39611 }, { "epoch": 0.6885570755618905, "grad_norm": 1.860068091030338, "learning_rate": 2.3352678825947096e-07, "loss": 0.1877, "step": 39612 }, { "epoch": 0.6885744580993933, "grad_norm": 1.128773537345058, "learning_rate": 2.3350297014897985e-07, "loss": 0.2012, "step": 39613 }, { "epoch": 0.6885918406368962, "grad_norm": 1.637880606778415, "learning_rate": 2.3347915288316173e-07, "loss": 0.2581, "step": 39614 }, { "epoch": 0.688609223174399, "grad_norm": 0.9843362496753825, "learning_rate": 2.334553364620921e-07, "loss": 0.1926, "step": 39615 }, { "epoch": 0.6886266057119018, "grad_norm": 0.9751680496660088, "learning_rate": 2.3343152088584678e-07, "loss": 0.235, "step": 39616 }, { "epoch": 0.6886439882494046, "grad_norm": 1.0934783912429218, "learning_rate": 2.3340770615450107e-07, "loss": 0.3058, "step": 39617 }, { "epoch": 0.6886613707869075, "grad_norm": 1.2508576292452995, "learning_rate": 2.333838922681305e-07, "loss": 0.1179, "step": 39618 }, { "epoch": 0.6886787533244103, "grad_norm": 1.1403820472366115, "learning_rate": 2.333600792268105e-07, "loss": 0.1662, "step": 39619 }, { "epoch": 0.6886961358619131, "grad_norm": 1.589011904213258, "learning_rate": 2.3333626703061654e-07, "loss": 0.2449, "step": 39620 }, { "epoch": 0.688713518399416, "grad_norm": 1.6328524785313545, "learning_rate": 2.3331245567962417e-07, "loss": 0.2174, "step": 39621 }, { "epoch": 0.6887309009369188, "grad_norm": 2.2099082260204304, "learning_rate": 2.3328864517390878e-07, "loss": 0.2426, "step": 39622 }, { "epoch": 0.6887482834744216, "grad_norm": 1.2588331367583563, "learning_rate": 2.332648355135457e-07, "loss": 0.2988, "step": 39623 }, { "epoch": 0.6887656660119245, "grad_norm": 2.169419821393078, "learning_rate": 2.3324102669861073e-07, "loss": 0.2468, "step": 39624 }, { "epoch": 0.6887830485494273, "grad_norm": 1.6885228872896056, "learning_rate": 2.3321721872917915e-07, "loss": 0.2313, "step": 39625 }, { "epoch": 0.6888004310869301, "grad_norm": 2.0454607672956158, "learning_rate": 2.3319341160532664e-07, "loss": 0.184, "step": 39626 }, { "epoch": 0.6888178136244328, "grad_norm": 1.2001544834466276, "learning_rate": 2.3316960532712808e-07, "loss": 0.1817, "step": 39627 }, { "epoch": 0.6888351961619357, "grad_norm": 1.214025088898974, "learning_rate": 2.331457998946595e-07, "loss": 0.2222, "step": 39628 }, { "epoch": 0.6888525786994385, "grad_norm": 2.0353339064401514, "learning_rate": 2.3312199530799613e-07, "loss": 0.2742, "step": 39629 }, { "epoch": 0.6888699612369413, "grad_norm": 1.8733037778746318, "learning_rate": 2.3309819156721343e-07, "loss": 0.2503, "step": 39630 }, { "epoch": 0.6888873437744442, "grad_norm": 1.0740725604183783, "learning_rate": 2.330743886723867e-07, "loss": 0.2348, "step": 39631 }, { "epoch": 0.688904726311947, "grad_norm": 1.8541565820225185, "learning_rate": 2.3305058662359195e-07, "loss": 0.2683, "step": 39632 }, { "epoch": 0.6889221088494498, "grad_norm": 1.4584369537329964, "learning_rate": 2.3302678542090399e-07, "loss": 0.1589, "step": 39633 }, { "epoch": 0.6889394913869527, "grad_norm": 1.0880482904038153, "learning_rate": 2.3300298506439847e-07, "loss": 0.2063, "step": 39634 }, { "epoch": 0.6889568739244555, "grad_norm": 2.116245953876195, "learning_rate": 2.3297918555415063e-07, "loss": 0.1975, "step": 39635 }, { "epoch": 0.6889742564619583, "grad_norm": 1.5409271504508335, "learning_rate": 2.3295538689023637e-07, "loss": 0.2602, "step": 39636 }, { "epoch": 0.6889916389994611, "grad_norm": 1.2013028473967804, "learning_rate": 2.329315890727308e-07, "loss": 0.208, "step": 39637 }, { "epoch": 0.689009021536964, "grad_norm": 1.5782682334212637, "learning_rate": 2.329077921017094e-07, "loss": 0.1401, "step": 39638 }, { "epoch": 0.6890264040744668, "grad_norm": 1.0388601579359693, "learning_rate": 2.3288399597724768e-07, "loss": 0.1818, "step": 39639 }, { "epoch": 0.6890437866119696, "grad_norm": 2.922047052837077, "learning_rate": 2.3286020069942098e-07, "loss": 0.1824, "step": 39640 }, { "epoch": 0.6890611691494725, "grad_norm": 1.1626204520016818, "learning_rate": 2.328364062683047e-07, "loss": 0.1677, "step": 39641 }, { "epoch": 0.6890785516869753, "grad_norm": 1.675838257965653, "learning_rate": 2.328126126839743e-07, "loss": 0.2581, "step": 39642 }, { "epoch": 0.6890959342244781, "grad_norm": 1.900707785812858, "learning_rate": 2.3278881994650496e-07, "loss": 0.2613, "step": 39643 }, { "epoch": 0.689113316761981, "grad_norm": 1.8459358967684032, "learning_rate": 2.3276502805597252e-07, "loss": 0.2633, "step": 39644 }, { "epoch": 0.6891306992994838, "grad_norm": 1.3065349310335705, "learning_rate": 2.3274123701245236e-07, "loss": 0.1565, "step": 39645 }, { "epoch": 0.6891480818369866, "grad_norm": 1.131362796403437, "learning_rate": 2.3271744681601935e-07, "loss": 0.2299, "step": 39646 }, { "epoch": 0.6891654643744893, "grad_norm": 1.2314836471236488, "learning_rate": 2.3269365746674946e-07, "loss": 0.1335, "step": 39647 }, { "epoch": 0.6891828469119922, "grad_norm": 1.9980699665154102, "learning_rate": 2.3266986896471785e-07, "loss": 0.2255, "step": 39648 }, { "epoch": 0.689200229449495, "grad_norm": 1.1648590792535338, "learning_rate": 2.3264608130999996e-07, "loss": 0.2534, "step": 39649 }, { "epoch": 0.6892176119869978, "grad_norm": 2.0116616708863573, "learning_rate": 2.3262229450267124e-07, "loss": 0.2233, "step": 39650 }, { "epoch": 0.6892349945245007, "grad_norm": 2.4798301818314648, "learning_rate": 2.3259850854280695e-07, "loss": 0.236, "step": 39651 }, { "epoch": 0.6892523770620035, "grad_norm": 0.9315803316919954, "learning_rate": 2.3257472343048256e-07, "loss": 0.1706, "step": 39652 }, { "epoch": 0.6892697595995063, "grad_norm": 1.5926962657048709, "learning_rate": 2.325509391657735e-07, "loss": 0.1765, "step": 39653 }, { "epoch": 0.6892871421370091, "grad_norm": 2.3808032866576756, "learning_rate": 2.3252715574875487e-07, "loss": 0.1853, "step": 39654 }, { "epoch": 0.689304524674512, "grad_norm": 1.4160931782172825, "learning_rate": 2.3250337317950254e-07, "loss": 0.2336, "step": 39655 }, { "epoch": 0.6893219072120148, "grad_norm": 2.028165462687807, "learning_rate": 2.3247959145809154e-07, "loss": 0.2406, "step": 39656 }, { "epoch": 0.6893392897495176, "grad_norm": 1.019515371014404, "learning_rate": 2.3245581058459763e-07, "loss": 0.222, "step": 39657 }, { "epoch": 0.6893566722870205, "grad_norm": 1.6308419819552384, "learning_rate": 2.3243203055909548e-07, "loss": 0.1149, "step": 39658 }, { "epoch": 0.6893740548245233, "grad_norm": 1.4959845236475142, "learning_rate": 2.3240825138166104e-07, "loss": 0.4109, "step": 39659 }, { "epoch": 0.6893914373620261, "grad_norm": 0.8694675571566398, "learning_rate": 2.3238447305236958e-07, "loss": 0.2091, "step": 39660 }, { "epoch": 0.689408819899529, "grad_norm": 1.1412276959720038, "learning_rate": 2.3236069557129638e-07, "loss": 0.1956, "step": 39661 }, { "epoch": 0.6894262024370318, "grad_norm": 0.9721954450033572, "learning_rate": 2.3233691893851665e-07, "loss": 0.1432, "step": 39662 }, { "epoch": 0.6894435849745346, "grad_norm": 1.3665789458780906, "learning_rate": 2.3231314315410612e-07, "loss": 0.1662, "step": 39663 }, { "epoch": 0.6894609675120374, "grad_norm": 2.055963971044825, "learning_rate": 2.3228936821814016e-07, "loss": 0.2767, "step": 39664 }, { "epoch": 0.6894783500495403, "grad_norm": 1.6462038506721208, "learning_rate": 2.322655941306935e-07, "loss": 0.2063, "step": 39665 }, { "epoch": 0.6894957325870431, "grad_norm": 2.068834763816987, "learning_rate": 2.3224182089184219e-07, "loss": 0.2784, "step": 39666 }, { "epoch": 0.6895131151245458, "grad_norm": 1.1472156574832322, "learning_rate": 2.3221804850166126e-07, "loss": 0.1835, "step": 39667 }, { "epoch": 0.6895304976620487, "grad_norm": 1.749961673076176, "learning_rate": 2.3219427696022608e-07, "loss": 0.1768, "step": 39668 }, { "epoch": 0.6895478801995515, "grad_norm": 2.854849352188392, "learning_rate": 2.321705062676121e-07, "loss": 0.2596, "step": 39669 }, { "epoch": 0.6895652627370543, "grad_norm": 1.0389381299636302, "learning_rate": 2.3214673642389453e-07, "loss": 0.1578, "step": 39670 }, { "epoch": 0.6895826452745571, "grad_norm": 5.727127949250312, "learning_rate": 2.3212296742914877e-07, "loss": 0.2898, "step": 39671 }, { "epoch": 0.68960002781206, "grad_norm": 1.1372416330948512, "learning_rate": 2.3209919928345013e-07, "loss": 0.1871, "step": 39672 }, { "epoch": 0.6896174103495628, "grad_norm": 0.6043259814874617, "learning_rate": 2.320754319868738e-07, "loss": 0.2359, "step": 39673 }, { "epoch": 0.6896347928870656, "grad_norm": 0.785609696251237, "learning_rate": 2.3205166553949545e-07, "loss": 0.2742, "step": 39674 }, { "epoch": 0.6896521754245685, "grad_norm": 1.1504670038800562, "learning_rate": 2.3202789994139027e-07, "loss": 0.1864, "step": 39675 }, { "epoch": 0.6896695579620713, "grad_norm": 1.9482119896275898, "learning_rate": 2.3200413519263372e-07, "loss": 0.3697, "step": 39676 }, { "epoch": 0.6896869404995741, "grad_norm": 1.3704971181844605, "learning_rate": 2.3198037129330055e-07, "loss": 0.2515, "step": 39677 }, { "epoch": 0.689704323037077, "grad_norm": 2.5322565863412385, "learning_rate": 2.3195660824346675e-07, "loss": 0.6098, "step": 39678 }, { "epoch": 0.6897217055745798, "grad_norm": 1.7323461005197167, "learning_rate": 2.319328460432073e-07, "loss": 0.2174, "step": 39679 }, { "epoch": 0.6897390881120826, "grad_norm": 1.5097721319388506, "learning_rate": 2.3190908469259761e-07, "loss": 0.233, "step": 39680 }, { "epoch": 0.6897564706495855, "grad_norm": 3.6030388417421797, "learning_rate": 2.318853241917128e-07, "loss": 0.2738, "step": 39681 }, { "epoch": 0.6897738531870883, "grad_norm": 1.3885179731957584, "learning_rate": 2.318615645406285e-07, "loss": 0.3681, "step": 39682 }, { "epoch": 0.6897912357245911, "grad_norm": 1.1676827590881347, "learning_rate": 2.3183780573942008e-07, "loss": 0.1871, "step": 39683 }, { "epoch": 0.6898086182620939, "grad_norm": 1.918371735658325, "learning_rate": 2.3181404778816244e-07, "loss": 0.2401, "step": 39684 }, { "epoch": 0.6898260007995968, "grad_norm": 1.083994149597855, "learning_rate": 2.317902906869309e-07, "loss": 0.237, "step": 39685 }, { "epoch": 0.6898433833370996, "grad_norm": 1.4579705667169762, "learning_rate": 2.3176653443580106e-07, "loss": 0.2433, "step": 39686 }, { "epoch": 0.6898607658746023, "grad_norm": 1.7796501365076096, "learning_rate": 2.3174277903484818e-07, "loss": 0.2134, "step": 39687 }, { "epoch": 0.6898781484121052, "grad_norm": 2.162838081091039, "learning_rate": 2.3171902448414743e-07, "loss": 0.2296, "step": 39688 }, { "epoch": 0.689895530949608, "grad_norm": 1.0664491555032147, "learning_rate": 2.316952707837741e-07, "loss": 0.2771, "step": 39689 }, { "epoch": 0.6899129134871108, "grad_norm": 1.9226007625482229, "learning_rate": 2.3167151793380356e-07, "loss": 0.2137, "step": 39690 }, { "epoch": 0.6899302960246136, "grad_norm": 1.3796198545089038, "learning_rate": 2.3164776593431102e-07, "loss": 0.21, "step": 39691 }, { "epoch": 0.6899476785621165, "grad_norm": 1.2509332052491622, "learning_rate": 2.316240147853718e-07, "loss": 0.2115, "step": 39692 }, { "epoch": 0.6899650610996193, "grad_norm": 1.7795974113041895, "learning_rate": 2.31600264487061e-07, "loss": 0.3526, "step": 39693 }, { "epoch": 0.6899824436371221, "grad_norm": 1.877278642292072, "learning_rate": 2.3157651503945418e-07, "loss": 0.3236, "step": 39694 }, { "epoch": 0.689999826174625, "grad_norm": 1.2944862495781033, "learning_rate": 2.315527664426268e-07, "loss": 0.2188, "step": 39695 }, { "epoch": 0.6900172087121278, "grad_norm": 1.9943438265672202, "learning_rate": 2.3152901869665336e-07, "loss": 0.1649, "step": 39696 }, { "epoch": 0.6900345912496306, "grad_norm": 1.2898083571879735, "learning_rate": 2.315052718016099e-07, "loss": 0.1622, "step": 39697 }, { "epoch": 0.6900519737871335, "grad_norm": 1.893197835782323, "learning_rate": 2.314815257575713e-07, "loss": 0.2821, "step": 39698 }, { "epoch": 0.6900693563246363, "grad_norm": 2.0404697077282052, "learning_rate": 2.3145778056461295e-07, "loss": 0.2296, "step": 39699 }, { "epoch": 0.6900867388621391, "grad_norm": 1.9949444023031766, "learning_rate": 2.3143403622281012e-07, "loss": 0.1644, "step": 39700 }, { "epoch": 0.690104121399642, "grad_norm": 1.54926150181354, "learning_rate": 2.3141029273223778e-07, "loss": 0.25, "step": 39701 }, { "epoch": 0.6901215039371448, "grad_norm": 2.42698756312173, "learning_rate": 2.313865500929718e-07, "loss": 0.1876, "step": 39702 }, { "epoch": 0.6901388864746476, "grad_norm": 1.9566714062218469, "learning_rate": 2.3136280830508692e-07, "loss": 0.3405, "step": 39703 }, { "epoch": 0.6901562690121504, "grad_norm": 2.070046303776899, "learning_rate": 2.3133906736865831e-07, "loss": 0.2073, "step": 39704 }, { "epoch": 0.6901736515496533, "grad_norm": 2.449373266819611, "learning_rate": 2.3131532728376162e-07, "loss": 0.2178, "step": 39705 }, { "epoch": 0.6901910340871561, "grad_norm": 2.809520801543159, "learning_rate": 2.3129158805047199e-07, "loss": 0.2129, "step": 39706 }, { "epoch": 0.6902084166246588, "grad_norm": 1.872316948959356, "learning_rate": 2.3126784966886453e-07, "loss": 0.1956, "step": 39707 }, { "epoch": 0.6902257991621616, "grad_norm": 1.2894442443499474, "learning_rate": 2.3124411213901456e-07, "loss": 0.2006, "step": 39708 }, { "epoch": 0.6902431816996645, "grad_norm": 1.87930556535759, "learning_rate": 2.3122037546099732e-07, "loss": 0.1852, "step": 39709 }, { "epoch": 0.6902605642371673, "grad_norm": 2.098205079511022, "learning_rate": 2.31196639634888e-07, "loss": 0.2946, "step": 39710 }, { "epoch": 0.6902779467746701, "grad_norm": 1.903608398351067, "learning_rate": 2.3117290466076182e-07, "loss": 0.2253, "step": 39711 }, { "epoch": 0.690295329312173, "grad_norm": 1.7331004319100562, "learning_rate": 2.3114917053869393e-07, "loss": 0.192, "step": 39712 }, { "epoch": 0.6903127118496758, "grad_norm": 1.654345408432055, "learning_rate": 2.3112543726875977e-07, "loss": 0.4814, "step": 39713 }, { "epoch": 0.6903300943871786, "grad_norm": 1.3670576444919655, "learning_rate": 2.3110170485103474e-07, "loss": 0.2532, "step": 39714 }, { "epoch": 0.6903474769246815, "grad_norm": 1.283036786564358, "learning_rate": 2.3107797328559352e-07, "loss": 0.314, "step": 39715 }, { "epoch": 0.6903648594621843, "grad_norm": 1.3693260905111764, "learning_rate": 2.3105424257251143e-07, "loss": 0.2026, "step": 39716 }, { "epoch": 0.6903822419996871, "grad_norm": 1.313869329084385, "learning_rate": 2.3103051271186407e-07, "loss": 0.2846, "step": 39717 }, { "epoch": 0.69039962453719, "grad_norm": 2.4067334201991204, "learning_rate": 2.3100678370372639e-07, "loss": 0.3247, "step": 39718 }, { "epoch": 0.6904170070746928, "grad_norm": 1.2376047693707781, "learning_rate": 2.3098305554817366e-07, "loss": 0.148, "step": 39719 }, { "epoch": 0.6904343896121956, "grad_norm": 1.3752262164360756, "learning_rate": 2.309593282452809e-07, "loss": 0.1874, "step": 39720 }, { "epoch": 0.6904517721496984, "grad_norm": 1.3074705138707212, "learning_rate": 2.3093560179512384e-07, "loss": 0.2386, "step": 39721 }, { "epoch": 0.6904691546872013, "grad_norm": 2.3763355303242486, "learning_rate": 2.3091187619777713e-07, "loss": 0.1879, "step": 39722 }, { "epoch": 0.6904865372247041, "grad_norm": 1.3370001154254132, "learning_rate": 2.3088815145331614e-07, "loss": 0.1866, "step": 39723 }, { "epoch": 0.6905039197622069, "grad_norm": 1.5730968008624304, "learning_rate": 2.3086442756181595e-07, "loss": 0.2044, "step": 39724 }, { "epoch": 0.6905213022997098, "grad_norm": 1.6336853581017445, "learning_rate": 2.3084070452335202e-07, "loss": 0.1214, "step": 39725 }, { "epoch": 0.6905386848372126, "grad_norm": 1.516613870622777, "learning_rate": 2.3081698233799945e-07, "loss": 0.2596, "step": 39726 }, { "epoch": 0.6905560673747153, "grad_norm": 2.751066855360824, "learning_rate": 2.307932610058334e-07, "loss": 0.2592, "step": 39727 }, { "epoch": 0.6905734499122181, "grad_norm": 1.6773288213749642, "learning_rate": 2.3076954052692905e-07, "loss": 0.1685, "step": 39728 }, { "epoch": 0.690590832449721, "grad_norm": 1.1884299232694064, "learning_rate": 2.3074582090136156e-07, "loss": 0.1537, "step": 39729 }, { "epoch": 0.6906082149872238, "grad_norm": 1.2318784347176504, "learning_rate": 2.3072210212920619e-07, "loss": 0.2459, "step": 39730 }, { "epoch": 0.6906255975247266, "grad_norm": 2.873819284124125, "learning_rate": 2.30698384210538e-07, "loss": 0.2255, "step": 39731 }, { "epoch": 0.6906429800622295, "grad_norm": 1.2170401866868619, "learning_rate": 2.3067466714543204e-07, "loss": 0.1094, "step": 39732 }, { "epoch": 0.6906603625997323, "grad_norm": 1.35521590768898, "learning_rate": 2.306509509339641e-07, "loss": 0.211, "step": 39733 }, { "epoch": 0.6906777451372351, "grad_norm": 3.974977166629089, "learning_rate": 2.3062723557620866e-07, "loss": 0.2845, "step": 39734 }, { "epoch": 0.690695127674738, "grad_norm": 1.744705935023247, "learning_rate": 2.30603521072241e-07, "loss": 0.1647, "step": 39735 }, { "epoch": 0.6907125102122408, "grad_norm": 1.2953682089877678, "learning_rate": 2.305798074221366e-07, "loss": 0.1651, "step": 39736 }, { "epoch": 0.6907298927497436, "grad_norm": 1.1097789896274988, "learning_rate": 2.305560946259704e-07, "loss": 0.2218, "step": 39737 }, { "epoch": 0.6907472752872464, "grad_norm": 0.8651184158718526, "learning_rate": 2.305323826838177e-07, "loss": 0.1531, "step": 39738 }, { "epoch": 0.6907646578247493, "grad_norm": 2.3998794878550997, "learning_rate": 2.3050867159575354e-07, "loss": 0.3335, "step": 39739 }, { "epoch": 0.6907820403622521, "grad_norm": 2.54022351744263, "learning_rate": 2.304849613618531e-07, "loss": 0.255, "step": 39740 }, { "epoch": 0.6907994228997549, "grad_norm": 1.8648732848303313, "learning_rate": 2.3046125198219145e-07, "loss": 0.2307, "step": 39741 }, { "epoch": 0.6908168054372578, "grad_norm": 1.3452919207269034, "learning_rate": 2.3043754345684386e-07, "loss": 0.199, "step": 39742 }, { "epoch": 0.6908341879747606, "grad_norm": 1.7869575844291288, "learning_rate": 2.3041383578588524e-07, "loss": 0.2515, "step": 39743 }, { "epoch": 0.6908515705122634, "grad_norm": 1.6108840203934482, "learning_rate": 2.3039012896939115e-07, "loss": 0.125, "step": 39744 }, { "epoch": 0.6908689530497663, "grad_norm": 1.0439299446886996, "learning_rate": 2.3036642300743648e-07, "loss": 0.1195, "step": 39745 }, { "epoch": 0.6908863355872691, "grad_norm": 1.0586006164240673, "learning_rate": 2.3034271790009656e-07, "loss": 0.3171, "step": 39746 }, { "epoch": 0.6909037181247718, "grad_norm": 2.001929954899691, "learning_rate": 2.30319013647446e-07, "loss": 0.176, "step": 39747 }, { "epoch": 0.6909211006622746, "grad_norm": 1.765871759812991, "learning_rate": 2.3029531024956044e-07, "loss": 0.151, "step": 39748 }, { "epoch": 0.6909384831997775, "grad_norm": 1.5561519917935518, "learning_rate": 2.3027160770651488e-07, "loss": 0.3721, "step": 39749 }, { "epoch": 0.6909558657372803, "grad_norm": 1.5214738649089399, "learning_rate": 2.3024790601838444e-07, "loss": 0.1829, "step": 39750 }, { "epoch": 0.6909732482747831, "grad_norm": 1.694076350672434, "learning_rate": 2.30224205185244e-07, "loss": 0.3154, "step": 39751 }, { "epoch": 0.690990630812286, "grad_norm": 1.184417075248707, "learning_rate": 2.3020050520716932e-07, "loss": 0.2328, "step": 39752 }, { "epoch": 0.6910080133497888, "grad_norm": 1.5100695876367762, "learning_rate": 2.301768060842349e-07, "loss": 0.2891, "step": 39753 }, { "epoch": 0.6910253958872916, "grad_norm": 1.567752049240672, "learning_rate": 2.3015310781651604e-07, "loss": 0.1783, "step": 39754 }, { "epoch": 0.6910427784247944, "grad_norm": 1.3063040460944464, "learning_rate": 2.3012941040408767e-07, "loss": 0.2585, "step": 39755 }, { "epoch": 0.6910601609622973, "grad_norm": 3.563083123506115, "learning_rate": 2.3010571384702532e-07, "loss": 0.2477, "step": 39756 }, { "epoch": 0.6910775434998001, "grad_norm": 11.571154627602365, "learning_rate": 2.3008201814540384e-07, "loss": 0.2568, "step": 39757 }, { "epoch": 0.6910949260373029, "grad_norm": 1.8155251846657272, "learning_rate": 2.3005832329929842e-07, "loss": 0.1181, "step": 39758 }, { "epoch": 0.6911123085748058, "grad_norm": 2.6509228735029633, "learning_rate": 2.300346293087841e-07, "loss": 0.1706, "step": 39759 }, { "epoch": 0.6911296911123086, "grad_norm": 1.3108122842940682, "learning_rate": 2.3001093617393595e-07, "loss": 0.2617, "step": 39760 }, { "epoch": 0.6911470736498114, "grad_norm": 1.420460665823228, "learning_rate": 2.2998724389482915e-07, "loss": 0.215, "step": 39761 }, { "epoch": 0.6911644561873143, "grad_norm": 1.1125516068209873, "learning_rate": 2.2996355247153858e-07, "loss": 0.2266, "step": 39762 }, { "epoch": 0.6911818387248171, "grad_norm": 1.486802540993872, "learning_rate": 2.2993986190413968e-07, "loss": 0.1118, "step": 39763 }, { "epoch": 0.6911992212623199, "grad_norm": 1.381669157112095, "learning_rate": 2.2991617219270737e-07, "loss": 0.2553, "step": 39764 }, { "epoch": 0.6912166037998227, "grad_norm": 2.1413322174743405, "learning_rate": 2.2989248333731688e-07, "loss": 0.2176, "step": 39765 }, { "epoch": 0.6912339863373255, "grad_norm": 1.7573020011100207, "learning_rate": 2.2986879533804275e-07, "loss": 0.1659, "step": 39766 }, { "epoch": 0.6912513688748283, "grad_norm": 1.0064511977897435, "learning_rate": 2.2984510819496073e-07, "loss": 0.2223, "step": 39767 }, { "epoch": 0.6912687514123311, "grad_norm": 2.4482406144204534, "learning_rate": 2.2982142190814558e-07, "loss": 0.3278, "step": 39768 }, { "epoch": 0.691286133949834, "grad_norm": 1.318951614331218, "learning_rate": 2.2979773647767237e-07, "loss": 0.1441, "step": 39769 }, { "epoch": 0.6913035164873368, "grad_norm": 1.0275335078621362, "learning_rate": 2.2977405190361605e-07, "loss": 0.1501, "step": 39770 }, { "epoch": 0.6913208990248396, "grad_norm": 1.168843408216328, "learning_rate": 2.297503681860522e-07, "loss": 0.3225, "step": 39771 }, { "epoch": 0.6913382815623424, "grad_norm": 1.7114092317347436, "learning_rate": 2.297266853250554e-07, "loss": 0.1186, "step": 39772 }, { "epoch": 0.6913556640998453, "grad_norm": 1.2575039471058784, "learning_rate": 2.2970300332070085e-07, "loss": 0.2288, "step": 39773 }, { "epoch": 0.6913730466373481, "grad_norm": 1.461756891781065, "learning_rate": 2.296793221730634e-07, "loss": 0.2537, "step": 39774 }, { "epoch": 0.6913904291748509, "grad_norm": 1.710037433858837, "learning_rate": 2.2965564188221853e-07, "loss": 0.2311, "step": 39775 }, { "epoch": 0.6914078117123538, "grad_norm": 1.5392563306507976, "learning_rate": 2.2963196244824107e-07, "loss": 0.2334, "step": 39776 }, { "epoch": 0.6914251942498566, "grad_norm": 3.0833854163996706, "learning_rate": 2.2960828387120612e-07, "loss": 0.3235, "step": 39777 }, { "epoch": 0.6914425767873594, "grad_norm": 1.8947025604798764, "learning_rate": 2.2958460615118869e-07, "loss": 0.3385, "step": 39778 }, { "epoch": 0.6914599593248623, "grad_norm": 2.358694333435633, "learning_rate": 2.2956092928826387e-07, "loss": 0.2917, "step": 39779 }, { "epoch": 0.6914773418623651, "grad_norm": 1.1512614963623289, "learning_rate": 2.2953725328250663e-07, "loss": 0.2245, "step": 39780 }, { "epoch": 0.6914947243998679, "grad_norm": 1.1838121427978525, "learning_rate": 2.295135781339921e-07, "loss": 0.1814, "step": 39781 }, { "epoch": 0.6915121069373708, "grad_norm": 1.279768390175996, "learning_rate": 2.2948990384279505e-07, "loss": 0.2628, "step": 39782 }, { "epoch": 0.6915294894748736, "grad_norm": 3.311083296148671, "learning_rate": 2.2946623040899093e-07, "loss": 0.2865, "step": 39783 }, { "epoch": 0.6915468720123764, "grad_norm": 2.054929167663485, "learning_rate": 2.2944255783265475e-07, "loss": 0.2524, "step": 39784 }, { "epoch": 0.6915642545498792, "grad_norm": 1.342564877121749, "learning_rate": 2.2941888611386106e-07, "loss": 0.225, "step": 39785 }, { "epoch": 0.691581637087382, "grad_norm": 1.0638128810948146, "learning_rate": 2.2939521525268534e-07, "loss": 0.2409, "step": 39786 }, { "epoch": 0.6915990196248848, "grad_norm": 1.7861520578131156, "learning_rate": 2.2937154524920253e-07, "loss": 0.1809, "step": 39787 }, { "epoch": 0.6916164021623876, "grad_norm": 1.3981314162625247, "learning_rate": 2.293478761034876e-07, "loss": 0.2805, "step": 39788 }, { "epoch": 0.6916337846998905, "grad_norm": 2.0863281920165657, "learning_rate": 2.2932420781561557e-07, "loss": 0.1505, "step": 39789 }, { "epoch": 0.6916511672373933, "grad_norm": 1.6211576612007133, "learning_rate": 2.2930054038566143e-07, "loss": 0.2475, "step": 39790 }, { "epoch": 0.6916685497748961, "grad_norm": 3.5258185077385744, "learning_rate": 2.2927687381370025e-07, "loss": 0.3576, "step": 39791 }, { "epoch": 0.6916859323123989, "grad_norm": 1.840766176407185, "learning_rate": 2.2925320809980704e-07, "loss": 0.22, "step": 39792 }, { "epoch": 0.6917033148499018, "grad_norm": 1.1641824600362738, "learning_rate": 2.292295432440566e-07, "loss": 0.1497, "step": 39793 }, { "epoch": 0.6917206973874046, "grad_norm": 1.3071839901221116, "learning_rate": 2.2920587924652429e-07, "loss": 0.1419, "step": 39794 }, { "epoch": 0.6917380799249074, "grad_norm": 1.7762335406117513, "learning_rate": 2.2918221610728494e-07, "loss": 0.297, "step": 39795 }, { "epoch": 0.6917554624624103, "grad_norm": 1.3462194001115706, "learning_rate": 2.291585538264138e-07, "loss": 0.3228, "step": 39796 }, { "epoch": 0.6917728449999131, "grad_norm": 1.4141825854608019, "learning_rate": 2.291348924039852e-07, "loss": 0.2171, "step": 39797 }, { "epoch": 0.6917902275374159, "grad_norm": 1.2629276526743212, "learning_rate": 2.291112318400748e-07, "loss": 0.347, "step": 39798 }, { "epoch": 0.6918076100749188, "grad_norm": 1.6235827675423642, "learning_rate": 2.2908757213475733e-07, "loss": 0.3177, "step": 39799 }, { "epoch": 0.6918249926124216, "grad_norm": 2.208316544909819, "learning_rate": 2.2906391328810782e-07, "loss": 0.3082, "step": 39800 }, { "epoch": 0.6918423751499244, "grad_norm": 1.356206051981381, "learning_rate": 2.2904025530020104e-07, "loss": 0.2056, "step": 39801 }, { "epoch": 0.6918597576874272, "grad_norm": 3.1839266897077945, "learning_rate": 2.290165981711124e-07, "loss": 0.2763, "step": 39802 }, { "epoch": 0.6918771402249301, "grad_norm": 1.27285493368011, "learning_rate": 2.2899294190091683e-07, "loss": 0.2402, "step": 39803 }, { "epoch": 0.6918945227624329, "grad_norm": 1.6998735048413691, "learning_rate": 2.2896928648968893e-07, "loss": 0.1719, "step": 39804 }, { "epoch": 0.6919119052999357, "grad_norm": 1.6353835768904963, "learning_rate": 2.2894563193750377e-07, "loss": 0.1551, "step": 39805 }, { "epoch": 0.6919292878374385, "grad_norm": 1.5641362210313912, "learning_rate": 2.289219782444366e-07, "loss": 0.2358, "step": 39806 }, { "epoch": 0.6919466703749413, "grad_norm": 1.6829833770127252, "learning_rate": 2.288983254105622e-07, "loss": 0.2656, "step": 39807 }, { "epoch": 0.6919640529124441, "grad_norm": 1.3073923679310469, "learning_rate": 2.2887467343595562e-07, "loss": 0.32, "step": 39808 }, { "epoch": 0.691981435449947, "grad_norm": 1.4690521570240949, "learning_rate": 2.2885102232069175e-07, "loss": 0.2341, "step": 39809 }, { "epoch": 0.6919988179874498, "grad_norm": 4.114362952014991, "learning_rate": 2.2882737206484565e-07, "loss": 0.24, "step": 39810 }, { "epoch": 0.6920162005249526, "grad_norm": 1.1551148380245813, "learning_rate": 2.2880372266849213e-07, "loss": 0.1867, "step": 39811 }, { "epoch": 0.6920335830624554, "grad_norm": 0.9524037603702149, "learning_rate": 2.287800741317063e-07, "loss": 0.2676, "step": 39812 }, { "epoch": 0.6920509655999583, "grad_norm": 1.8985906309189136, "learning_rate": 2.2875642645456277e-07, "loss": 0.2441, "step": 39813 }, { "epoch": 0.6920683481374611, "grad_norm": 1.134762539034774, "learning_rate": 2.2873277963713705e-07, "loss": 0.1561, "step": 39814 }, { "epoch": 0.6920857306749639, "grad_norm": 1.8628254519001046, "learning_rate": 2.2870913367950396e-07, "loss": 0.2029, "step": 39815 }, { "epoch": 0.6921031132124668, "grad_norm": 1.1745717606775044, "learning_rate": 2.2868548858173792e-07, "loss": 0.194, "step": 39816 }, { "epoch": 0.6921204957499696, "grad_norm": 1.1628962848109776, "learning_rate": 2.2866184434391444e-07, "loss": 0.238, "step": 39817 }, { "epoch": 0.6921378782874724, "grad_norm": 1.277881000897305, "learning_rate": 2.2863820096610825e-07, "loss": 0.2122, "step": 39818 }, { "epoch": 0.6921552608249752, "grad_norm": 0.7714431175434024, "learning_rate": 2.2861455844839433e-07, "loss": 0.2119, "step": 39819 }, { "epoch": 0.6921726433624781, "grad_norm": 1.3597740631070792, "learning_rate": 2.2859091679084758e-07, "loss": 0.132, "step": 39820 }, { "epoch": 0.6921900258999809, "grad_norm": 1.1659344433114907, "learning_rate": 2.285672759935428e-07, "loss": 0.1943, "step": 39821 }, { "epoch": 0.6922074084374837, "grad_norm": 1.5777546160959324, "learning_rate": 2.285436360565554e-07, "loss": 0.1429, "step": 39822 }, { "epoch": 0.6922247909749866, "grad_norm": 0.9848202293534344, "learning_rate": 2.2851999697995984e-07, "loss": 0.1919, "step": 39823 }, { "epoch": 0.6922421735124894, "grad_norm": 1.5051145024952581, "learning_rate": 2.2849635876383095e-07, "loss": 0.1978, "step": 39824 }, { "epoch": 0.6922595560499922, "grad_norm": 1.8280129198547164, "learning_rate": 2.2847272140824408e-07, "loss": 0.1909, "step": 39825 }, { "epoch": 0.692276938587495, "grad_norm": 2.8650102102051034, "learning_rate": 2.2844908491327396e-07, "loss": 0.3014, "step": 39826 }, { "epoch": 0.6922943211249978, "grad_norm": 1.9854442480725167, "learning_rate": 2.284254492789955e-07, "loss": 0.3227, "step": 39827 }, { "epoch": 0.6923117036625006, "grad_norm": 1.457071646051665, "learning_rate": 2.2840181450548356e-07, "loss": 0.2646, "step": 39828 }, { "epoch": 0.6923290862000034, "grad_norm": 1.5451034140998863, "learning_rate": 2.2837818059281321e-07, "loss": 0.251, "step": 39829 }, { "epoch": 0.6923464687375063, "grad_norm": 1.0061984993708628, "learning_rate": 2.2835454754105922e-07, "loss": 0.1657, "step": 39830 }, { "epoch": 0.6923638512750091, "grad_norm": 1.5305701855009313, "learning_rate": 2.2833091535029648e-07, "loss": 0.2193, "step": 39831 }, { "epoch": 0.6923812338125119, "grad_norm": 1.359233667441569, "learning_rate": 2.283072840205998e-07, "loss": 0.3997, "step": 39832 }, { "epoch": 0.6923986163500148, "grad_norm": 5.535060827393738, "learning_rate": 2.2828365355204437e-07, "loss": 0.5752, "step": 39833 }, { "epoch": 0.6924159988875176, "grad_norm": 1.7870899863936396, "learning_rate": 2.2826002394470517e-07, "loss": 0.147, "step": 39834 }, { "epoch": 0.6924333814250204, "grad_norm": 0.7688912263712404, "learning_rate": 2.2823639519865669e-07, "loss": 0.2701, "step": 39835 }, { "epoch": 0.6924507639625233, "grad_norm": 1.7779590578525255, "learning_rate": 2.2821276731397376e-07, "loss": 0.3814, "step": 39836 }, { "epoch": 0.6924681465000261, "grad_norm": 1.7251031539960104, "learning_rate": 2.2818914029073176e-07, "loss": 0.2893, "step": 39837 }, { "epoch": 0.6924855290375289, "grad_norm": 1.0851426236399586, "learning_rate": 2.2816551412900525e-07, "loss": 0.2341, "step": 39838 }, { "epoch": 0.6925029115750317, "grad_norm": 1.370992032279021, "learning_rate": 2.2814188882886925e-07, "loss": 0.2496, "step": 39839 }, { "epoch": 0.6925202941125346, "grad_norm": 1.673553520588685, "learning_rate": 2.281182643903984e-07, "loss": 0.2849, "step": 39840 }, { "epoch": 0.6925376766500374, "grad_norm": 0.9821553202353958, "learning_rate": 2.2809464081366808e-07, "loss": 0.2359, "step": 39841 }, { "epoch": 0.6925550591875402, "grad_norm": 1.1314829661281447, "learning_rate": 2.2807101809875273e-07, "loss": 0.4365, "step": 39842 }, { "epoch": 0.6925724417250431, "grad_norm": 1.207332496663082, "learning_rate": 2.2804739624572729e-07, "loss": 0.3103, "step": 39843 }, { "epoch": 0.6925898242625459, "grad_norm": 1.024239270851795, "learning_rate": 2.2802377525466647e-07, "loss": 0.253, "step": 39844 }, { "epoch": 0.6926072068000487, "grad_norm": 1.732849554588893, "learning_rate": 2.2800015512564557e-07, "loss": 0.1637, "step": 39845 }, { "epoch": 0.6926245893375514, "grad_norm": 2.2627355118429153, "learning_rate": 2.2797653585873922e-07, "loss": 0.2521, "step": 39846 }, { "epoch": 0.6926419718750543, "grad_norm": 1.2766742561399773, "learning_rate": 2.2795291745402233e-07, "loss": 0.1866, "step": 39847 }, { "epoch": 0.6926593544125571, "grad_norm": 1.2002863696782566, "learning_rate": 2.2792929991156966e-07, "loss": 0.3375, "step": 39848 }, { "epoch": 0.6926767369500599, "grad_norm": 1.9744774401313963, "learning_rate": 2.279056832314562e-07, "loss": 0.2137, "step": 39849 }, { "epoch": 0.6926941194875628, "grad_norm": 1.349854327862945, "learning_rate": 2.278820674137567e-07, "loss": 0.1827, "step": 39850 }, { "epoch": 0.6927115020250656, "grad_norm": 2.2756489072798103, "learning_rate": 2.2785845245854584e-07, "loss": 0.2041, "step": 39851 }, { "epoch": 0.6927288845625684, "grad_norm": 1.6459998414382078, "learning_rate": 2.278348383658989e-07, "loss": 0.1477, "step": 39852 }, { "epoch": 0.6927462671000713, "grad_norm": 1.9275332933310747, "learning_rate": 2.2781122513589063e-07, "loss": 0.2023, "step": 39853 }, { "epoch": 0.6927636496375741, "grad_norm": 1.2575390569254978, "learning_rate": 2.277876127685956e-07, "loss": 0.2738, "step": 39854 }, { "epoch": 0.6927810321750769, "grad_norm": 2.5969650831148154, "learning_rate": 2.2776400126408857e-07, "loss": 0.321, "step": 39855 }, { "epoch": 0.6927984147125797, "grad_norm": 0.981184982547871, "learning_rate": 2.2774039062244482e-07, "loss": 0.2019, "step": 39856 }, { "epoch": 0.6928157972500826, "grad_norm": 1.6772418236654536, "learning_rate": 2.2771678084373896e-07, "loss": 0.2613, "step": 39857 }, { "epoch": 0.6928331797875854, "grad_norm": 1.021723604845836, "learning_rate": 2.2769317192804584e-07, "loss": 0.1885, "step": 39858 }, { "epoch": 0.6928505623250882, "grad_norm": 1.7934825346460177, "learning_rate": 2.2766956387544005e-07, "loss": 0.1802, "step": 39859 }, { "epoch": 0.6928679448625911, "grad_norm": 1.8226365758354774, "learning_rate": 2.2764595668599705e-07, "loss": 0.3074, "step": 39860 }, { "epoch": 0.6928853274000939, "grad_norm": 1.3820213327542727, "learning_rate": 2.276223503597911e-07, "loss": 0.1716, "step": 39861 }, { "epoch": 0.6929027099375967, "grad_norm": 1.2081586201549221, "learning_rate": 2.275987448968971e-07, "loss": 0.1514, "step": 39862 }, { "epoch": 0.6929200924750996, "grad_norm": 1.8764834312234098, "learning_rate": 2.2757514029738983e-07, "loss": 0.2019, "step": 39863 }, { "epoch": 0.6929374750126024, "grad_norm": 1.622550883482257, "learning_rate": 2.2755153656134441e-07, "loss": 0.2455, "step": 39864 }, { "epoch": 0.6929548575501052, "grad_norm": 2.349633450224334, "learning_rate": 2.2752793368883543e-07, "loss": 0.3003, "step": 39865 }, { "epoch": 0.6929722400876079, "grad_norm": 2.6442855787272777, "learning_rate": 2.2750433167993776e-07, "loss": 0.3565, "step": 39866 }, { "epoch": 0.6929896226251108, "grad_norm": 2.6580694261208992, "learning_rate": 2.2748073053472615e-07, "loss": 0.1983, "step": 39867 }, { "epoch": 0.6930070051626136, "grad_norm": 1.2414340336955312, "learning_rate": 2.2745713025327552e-07, "loss": 0.2072, "step": 39868 }, { "epoch": 0.6930243877001164, "grad_norm": 1.2714337942803278, "learning_rate": 2.2743353083566054e-07, "loss": 0.2414, "step": 39869 }, { "epoch": 0.6930417702376193, "grad_norm": 1.6974476268735121, "learning_rate": 2.2740993228195605e-07, "loss": 0.185, "step": 39870 }, { "epoch": 0.6930591527751221, "grad_norm": 1.6157059152067264, "learning_rate": 2.2738633459223666e-07, "loss": 0.2405, "step": 39871 }, { "epoch": 0.6930765353126249, "grad_norm": 2.7386065828718875, "learning_rate": 2.2736273776657778e-07, "loss": 0.2894, "step": 39872 }, { "epoch": 0.6930939178501277, "grad_norm": 1.8114419580933194, "learning_rate": 2.2733914180505355e-07, "loss": 0.1224, "step": 39873 }, { "epoch": 0.6931113003876306, "grad_norm": 1.9334094510510809, "learning_rate": 2.2731554670773885e-07, "loss": 0.1533, "step": 39874 }, { "epoch": 0.6931286829251334, "grad_norm": 1.9184887395611856, "learning_rate": 2.2729195247470872e-07, "loss": 0.239, "step": 39875 }, { "epoch": 0.6931460654626362, "grad_norm": 1.5433835492934342, "learning_rate": 2.2726835910603786e-07, "loss": 0.1118, "step": 39876 }, { "epoch": 0.6931634480001391, "grad_norm": 2.0274986579949217, "learning_rate": 2.2724476660180102e-07, "loss": 0.1647, "step": 39877 }, { "epoch": 0.6931808305376419, "grad_norm": 1.6156961644361998, "learning_rate": 2.2722117496207298e-07, "loss": 0.3571, "step": 39878 }, { "epoch": 0.6931982130751447, "grad_norm": 1.1816680564390436, "learning_rate": 2.2719758418692857e-07, "loss": 0.2139, "step": 39879 }, { "epoch": 0.6932155956126476, "grad_norm": 1.8723019927978832, "learning_rate": 2.2717399427644246e-07, "loss": 0.2932, "step": 39880 }, { "epoch": 0.6932329781501504, "grad_norm": 1.304482833511394, "learning_rate": 2.271504052306894e-07, "loss": 0.1878, "step": 39881 }, { "epoch": 0.6932503606876532, "grad_norm": 1.8995931471478336, "learning_rate": 2.2712681704974412e-07, "loss": 0.2241, "step": 39882 }, { "epoch": 0.693267743225156, "grad_norm": 1.0968952092244102, "learning_rate": 2.2710322973368162e-07, "loss": 0.2309, "step": 39883 }, { "epoch": 0.6932851257626589, "grad_norm": 1.1487584521118177, "learning_rate": 2.2707964328257656e-07, "loss": 0.3113, "step": 39884 }, { "epoch": 0.6933025083001617, "grad_norm": 1.3497531090329966, "learning_rate": 2.2705605769650382e-07, "loss": 0.2439, "step": 39885 }, { "epoch": 0.6933198908376644, "grad_norm": 3.6536787592477213, "learning_rate": 2.2703247297553762e-07, "loss": 0.2967, "step": 39886 }, { "epoch": 0.6933372733751673, "grad_norm": 1.2767065959258992, "learning_rate": 2.270088891197533e-07, "loss": 0.2182, "step": 39887 }, { "epoch": 0.6933546559126701, "grad_norm": 2.3496296504095833, "learning_rate": 2.269853061292254e-07, "loss": 0.209, "step": 39888 }, { "epoch": 0.6933720384501729, "grad_norm": 0.8254924307940256, "learning_rate": 2.2696172400402864e-07, "loss": 0.214, "step": 39889 }, { "epoch": 0.6933894209876758, "grad_norm": 1.497851980703774, "learning_rate": 2.2693814274423761e-07, "loss": 0.1083, "step": 39890 }, { "epoch": 0.6934068035251786, "grad_norm": 1.784493590088757, "learning_rate": 2.2691456234992762e-07, "loss": 0.295, "step": 39891 }, { "epoch": 0.6934241860626814, "grad_norm": 1.2545046864857967, "learning_rate": 2.268909828211728e-07, "loss": 0.2353, "step": 39892 }, { "epoch": 0.6934415686001842, "grad_norm": 1.3493133669968533, "learning_rate": 2.2686740415804817e-07, "loss": 0.2288, "step": 39893 }, { "epoch": 0.6934589511376871, "grad_norm": 1.2779679577130465, "learning_rate": 2.2684382636062816e-07, "loss": 0.2129, "step": 39894 }, { "epoch": 0.6934763336751899, "grad_norm": 1.8092148610919012, "learning_rate": 2.2682024942898798e-07, "loss": 0.2447, "step": 39895 }, { "epoch": 0.6934937162126927, "grad_norm": 2.273901144243573, "learning_rate": 2.2679667336320208e-07, "loss": 0.2124, "step": 39896 }, { "epoch": 0.6935110987501956, "grad_norm": 1.7733253910875584, "learning_rate": 2.2677309816334522e-07, "loss": 0.2355, "step": 39897 }, { "epoch": 0.6935284812876984, "grad_norm": 2.076657105717574, "learning_rate": 2.2674952382949214e-07, "loss": 0.2428, "step": 39898 }, { "epoch": 0.6935458638252012, "grad_norm": 2.2869209859086954, "learning_rate": 2.2672595036171755e-07, "loss": 0.2644, "step": 39899 }, { "epoch": 0.6935632463627041, "grad_norm": 2.215263249472321, "learning_rate": 2.2670237776009622e-07, "loss": 0.1852, "step": 39900 }, { "epoch": 0.6935806289002069, "grad_norm": 1.316478372654147, "learning_rate": 2.2667880602470274e-07, "loss": 0.1941, "step": 39901 }, { "epoch": 0.6935980114377097, "grad_norm": 1.590318502808876, "learning_rate": 2.266552351556117e-07, "loss": 0.1338, "step": 39902 }, { "epoch": 0.6936153939752125, "grad_norm": 1.1290811918832844, "learning_rate": 2.2663166515289822e-07, "loss": 0.1327, "step": 39903 }, { "epoch": 0.6936327765127154, "grad_norm": 1.739142784121903, "learning_rate": 2.26608096016637e-07, "loss": 0.1614, "step": 39904 }, { "epoch": 0.6936501590502181, "grad_norm": 1.4057570972845084, "learning_rate": 2.2658452774690213e-07, "loss": 0.1599, "step": 39905 }, { "epoch": 0.6936675415877209, "grad_norm": 1.012736037141982, "learning_rate": 2.2656096034376888e-07, "loss": 0.2646, "step": 39906 }, { "epoch": 0.6936849241252238, "grad_norm": 1.81881205951327, "learning_rate": 2.2653739380731174e-07, "loss": 0.2334, "step": 39907 }, { "epoch": 0.6937023066627266, "grad_norm": 1.0708078706180206, "learning_rate": 2.265138281376055e-07, "loss": 0.193, "step": 39908 }, { "epoch": 0.6937196892002294, "grad_norm": 1.370914188850227, "learning_rate": 2.2649026333472483e-07, "loss": 0.2626, "step": 39909 }, { "epoch": 0.6937370717377322, "grad_norm": 1.1181129081424281, "learning_rate": 2.2646669939874429e-07, "loss": 0.153, "step": 39910 }, { "epoch": 0.6937544542752351, "grad_norm": 1.6592367302344022, "learning_rate": 2.264431363297387e-07, "loss": 0.1825, "step": 39911 }, { "epoch": 0.6937718368127379, "grad_norm": 2.246646563863569, "learning_rate": 2.2641957412778273e-07, "loss": 0.2342, "step": 39912 }, { "epoch": 0.6937892193502407, "grad_norm": 1.7243095678535532, "learning_rate": 2.2639601279295085e-07, "loss": 0.273, "step": 39913 }, { "epoch": 0.6938066018877436, "grad_norm": 1.5755775655799935, "learning_rate": 2.263724523253181e-07, "loss": 0.1177, "step": 39914 }, { "epoch": 0.6938239844252464, "grad_norm": 1.036798242994658, "learning_rate": 2.26348892724959e-07, "loss": 0.2043, "step": 39915 }, { "epoch": 0.6938413669627492, "grad_norm": 1.1667806775080012, "learning_rate": 2.2632533399194837e-07, "loss": 0.1653, "step": 39916 }, { "epoch": 0.6938587495002521, "grad_norm": 1.2682425922887965, "learning_rate": 2.2630177612636037e-07, "loss": 0.1977, "step": 39917 }, { "epoch": 0.6938761320377549, "grad_norm": 1.5117403120930253, "learning_rate": 2.2627821912827015e-07, "loss": 0.2939, "step": 39918 }, { "epoch": 0.6938935145752577, "grad_norm": 1.5335751919353282, "learning_rate": 2.2625466299775224e-07, "loss": 0.224, "step": 39919 }, { "epoch": 0.6939108971127605, "grad_norm": 2.179443391101677, "learning_rate": 2.2623110773488136e-07, "loss": 0.143, "step": 39920 }, { "epoch": 0.6939282796502634, "grad_norm": 1.711196077872615, "learning_rate": 2.262075533397319e-07, "loss": 0.2618, "step": 39921 }, { "epoch": 0.6939456621877662, "grad_norm": 1.884783711807755, "learning_rate": 2.261839998123789e-07, "loss": 0.1809, "step": 39922 }, { "epoch": 0.693963044725269, "grad_norm": 0.5648137585460502, "learning_rate": 2.26160447152897e-07, "loss": 0.1654, "step": 39923 }, { "epoch": 0.6939804272627719, "grad_norm": 2.3309759220822746, "learning_rate": 2.2613689536136056e-07, "loss": 0.2232, "step": 39924 }, { "epoch": 0.6939978098002746, "grad_norm": 0.9099900743362659, "learning_rate": 2.2611334443784413e-07, "loss": 0.1327, "step": 39925 }, { "epoch": 0.6940151923377774, "grad_norm": 3.1970287510626747, "learning_rate": 2.260897943824228e-07, "loss": 0.3729, "step": 39926 }, { "epoch": 0.6940325748752803, "grad_norm": 1.0402636088462531, "learning_rate": 2.2606624519517097e-07, "loss": 0.108, "step": 39927 }, { "epoch": 0.6940499574127831, "grad_norm": 1.7416159932454962, "learning_rate": 2.2604269687616335e-07, "loss": 0.2279, "step": 39928 }, { "epoch": 0.6940673399502859, "grad_norm": 2.1593017902011944, "learning_rate": 2.2601914942547446e-07, "loss": 0.3223, "step": 39929 }, { "epoch": 0.6940847224877887, "grad_norm": 1.4570016326572484, "learning_rate": 2.259956028431791e-07, "loss": 0.2644, "step": 39930 }, { "epoch": 0.6941021050252916, "grad_norm": 5.51161635635718, "learning_rate": 2.2597205712935175e-07, "loss": 0.2236, "step": 39931 }, { "epoch": 0.6941194875627944, "grad_norm": 1.2431187439284577, "learning_rate": 2.2594851228406715e-07, "loss": 0.1732, "step": 39932 }, { "epoch": 0.6941368701002972, "grad_norm": 0.6595637628575808, "learning_rate": 2.259249683073996e-07, "loss": 0.202, "step": 39933 }, { "epoch": 0.6941542526378001, "grad_norm": 1.6085627601840295, "learning_rate": 2.2590142519942428e-07, "loss": 0.3334, "step": 39934 }, { "epoch": 0.6941716351753029, "grad_norm": 1.214175510326707, "learning_rate": 2.2587788296021566e-07, "loss": 0.2219, "step": 39935 }, { "epoch": 0.6941890177128057, "grad_norm": 1.4265033960335405, "learning_rate": 2.258543415898479e-07, "loss": 0.2115, "step": 39936 }, { "epoch": 0.6942064002503086, "grad_norm": 1.1123631762186421, "learning_rate": 2.258308010883961e-07, "loss": 0.2088, "step": 39937 }, { "epoch": 0.6942237827878114, "grad_norm": 1.593953129327767, "learning_rate": 2.2580726145593475e-07, "loss": 0.3034, "step": 39938 }, { "epoch": 0.6942411653253142, "grad_norm": 1.2847469069739264, "learning_rate": 2.2578372269253838e-07, "loss": 0.1527, "step": 39939 }, { "epoch": 0.694258547862817, "grad_norm": 0.8423245753851757, "learning_rate": 2.2576018479828169e-07, "loss": 0.1128, "step": 39940 }, { "epoch": 0.6942759304003199, "grad_norm": 1.3804118802616776, "learning_rate": 2.2573664777323903e-07, "loss": 0.1774, "step": 39941 }, { "epoch": 0.6942933129378227, "grad_norm": 1.9070875835520549, "learning_rate": 2.2571311161748557e-07, "loss": 0.2069, "step": 39942 }, { "epoch": 0.6943106954753255, "grad_norm": 1.7640816608091598, "learning_rate": 2.2568957633109541e-07, "loss": 0.1966, "step": 39943 }, { "epoch": 0.6943280780128284, "grad_norm": 1.2679978318378327, "learning_rate": 2.2566604191414307e-07, "loss": 0.212, "step": 39944 }, { "epoch": 0.6943454605503311, "grad_norm": 1.7373588255372876, "learning_rate": 2.256425083667035e-07, "loss": 0.2715, "step": 39945 }, { "epoch": 0.6943628430878339, "grad_norm": 1.139224871916214, "learning_rate": 2.2561897568885123e-07, "loss": 0.2314, "step": 39946 }, { "epoch": 0.6943802256253367, "grad_norm": 1.2331786946564804, "learning_rate": 2.255954438806607e-07, "loss": 0.1503, "step": 39947 }, { "epoch": 0.6943976081628396, "grad_norm": 1.5210151583355778, "learning_rate": 2.2557191294220663e-07, "loss": 0.1652, "step": 39948 }, { "epoch": 0.6944149907003424, "grad_norm": 2.1860585637432792, "learning_rate": 2.2554838287356349e-07, "loss": 0.2693, "step": 39949 }, { "epoch": 0.6944323732378452, "grad_norm": 1.4481625917037724, "learning_rate": 2.255248536748059e-07, "loss": 0.2197, "step": 39950 }, { "epoch": 0.6944497557753481, "grad_norm": 1.1940358291292668, "learning_rate": 2.2550132534600848e-07, "loss": 0.185, "step": 39951 }, { "epoch": 0.6944671383128509, "grad_norm": 3.081306301309311, "learning_rate": 2.2547779788724553e-07, "loss": 0.2857, "step": 39952 }, { "epoch": 0.6944845208503537, "grad_norm": 1.2998269351479108, "learning_rate": 2.2545427129859206e-07, "loss": 0.1479, "step": 39953 }, { "epoch": 0.6945019033878566, "grad_norm": 2.1979466028184014, "learning_rate": 2.254307455801226e-07, "loss": 0.1988, "step": 39954 }, { "epoch": 0.6945192859253594, "grad_norm": 0.8246213303476273, "learning_rate": 2.254072207319112e-07, "loss": 0.1362, "step": 39955 }, { "epoch": 0.6945366684628622, "grad_norm": 1.7592367496079673, "learning_rate": 2.2538369675403295e-07, "loss": 0.2031, "step": 39956 }, { "epoch": 0.694554051000365, "grad_norm": 1.5147461203467634, "learning_rate": 2.2536017364656224e-07, "loss": 0.2115, "step": 39957 }, { "epoch": 0.6945714335378679, "grad_norm": 1.6459583550417576, "learning_rate": 2.2533665140957363e-07, "loss": 0.1329, "step": 39958 }, { "epoch": 0.6945888160753707, "grad_norm": 1.9280408600294818, "learning_rate": 2.253131300431416e-07, "loss": 0.2862, "step": 39959 }, { "epoch": 0.6946061986128735, "grad_norm": 1.0006539832707944, "learning_rate": 2.2528960954734066e-07, "loss": 0.2753, "step": 39960 }, { "epoch": 0.6946235811503764, "grad_norm": 1.0942392372489351, "learning_rate": 2.2526608992224583e-07, "loss": 0.2318, "step": 39961 }, { "epoch": 0.6946409636878792, "grad_norm": 2.5563720054293526, "learning_rate": 2.2524257116793112e-07, "loss": 0.2009, "step": 39962 }, { "epoch": 0.694658346225382, "grad_norm": 2.3905749982778524, "learning_rate": 2.25219053284471e-07, "loss": 0.2239, "step": 39963 }, { "epoch": 0.6946757287628849, "grad_norm": 1.7299267178256341, "learning_rate": 2.251955362719405e-07, "loss": 0.1632, "step": 39964 }, { "epoch": 0.6946931113003876, "grad_norm": 2.354427107064103, "learning_rate": 2.2517202013041387e-07, "loss": 0.2199, "step": 39965 }, { "epoch": 0.6947104938378904, "grad_norm": 1.4071379271592903, "learning_rate": 2.2514850485996566e-07, "loss": 0.1785, "step": 39966 }, { "epoch": 0.6947278763753932, "grad_norm": 1.5493839767114537, "learning_rate": 2.251249904606705e-07, "loss": 0.3675, "step": 39967 }, { "epoch": 0.6947452589128961, "grad_norm": 1.0495081819128187, "learning_rate": 2.2510147693260284e-07, "loss": 0.1537, "step": 39968 }, { "epoch": 0.6947626414503989, "grad_norm": 2.3228076709930052, "learning_rate": 2.2507796427583726e-07, "loss": 0.2598, "step": 39969 }, { "epoch": 0.6947800239879017, "grad_norm": 1.4153055939666392, "learning_rate": 2.250544524904482e-07, "loss": 0.2787, "step": 39970 }, { "epoch": 0.6947974065254046, "grad_norm": 1.856977873181176, "learning_rate": 2.2503094157650997e-07, "loss": 0.2376, "step": 39971 }, { "epoch": 0.6948147890629074, "grad_norm": 2.942587466944353, "learning_rate": 2.2500743153409763e-07, "loss": 0.2392, "step": 39972 }, { "epoch": 0.6948321716004102, "grad_norm": 1.7116143974787348, "learning_rate": 2.2498392236328556e-07, "loss": 0.1936, "step": 39973 }, { "epoch": 0.694849554137913, "grad_norm": 1.9932439205138213, "learning_rate": 2.2496041406414794e-07, "loss": 0.1382, "step": 39974 }, { "epoch": 0.6948669366754159, "grad_norm": 1.2502624403000886, "learning_rate": 2.249369066367593e-07, "loss": 0.2305, "step": 39975 }, { "epoch": 0.6948843192129187, "grad_norm": 1.1277008231948946, "learning_rate": 2.2491340008119448e-07, "loss": 0.1429, "step": 39976 }, { "epoch": 0.6949017017504215, "grad_norm": 0.8351868056744964, "learning_rate": 2.2488989439752782e-07, "loss": 0.1761, "step": 39977 }, { "epoch": 0.6949190842879244, "grad_norm": 1.8069748881703827, "learning_rate": 2.2486638958583383e-07, "loss": 0.2224, "step": 39978 }, { "epoch": 0.6949364668254272, "grad_norm": 1.5563970685149735, "learning_rate": 2.248428856461868e-07, "loss": 0.236, "step": 39979 }, { "epoch": 0.69495384936293, "grad_norm": 1.2219040477425716, "learning_rate": 2.2481938257866179e-07, "loss": 0.207, "step": 39980 }, { "epoch": 0.6949712319004329, "grad_norm": 1.432281800368918, "learning_rate": 2.2479588038333274e-07, "loss": 0.2218, "step": 39981 }, { "epoch": 0.6949886144379357, "grad_norm": 1.2508917766840946, "learning_rate": 2.2477237906027436e-07, "loss": 0.183, "step": 39982 }, { "epoch": 0.6950059969754385, "grad_norm": 1.7356592978700902, "learning_rate": 2.2474887860956089e-07, "loss": 0.2058, "step": 39983 }, { "epoch": 0.6950233795129414, "grad_norm": 3.0423547995362945, "learning_rate": 2.2472537903126726e-07, "loss": 0.2287, "step": 39984 }, { "epoch": 0.6950407620504441, "grad_norm": 1.474622843583022, "learning_rate": 2.2470188032546766e-07, "loss": 0.165, "step": 39985 }, { "epoch": 0.6950581445879469, "grad_norm": 1.713312581808986, "learning_rate": 2.2467838249223668e-07, "loss": 0.2046, "step": 39986 }, { "epoch": 0.6950755271254497, "grad_norm": 1.5190425950371307, "learning_rate": 2.246548855316488e-07, "loss": 0.2219, "step": 39987 }, { "epoch": 0.6950929096629526, "grad_norm": 3.8489866554875043, "learning_rate": 2.246313894437784e-07, "loss": 0.2028, "step": 39988 }, { "epoch": 0.6951102922004554, "grad_norm": 2.1750506805675034, "learning_rate": 2.2460789422869996e-07, "loss": 0.2244, "step": 39989 }, { "epoch": 0.6951276747379582, "grad_norm": 1.4718205032716365, "learning_rate": 2.2458439988648807e-07, "loss": 0.2492, "step": 39990 }, { "epoch": 0.695145057275461, "grad_norm": 1.2195904547259984, "learning_rate": 2.2456090641721693e-07, "loss": 0.2456, "step": 39991 }, { "epoch": 0.6951624398129639, "grad_norm": 1.3480254371231226, "learning_rate": 2.2453741382096152e-07, "loss": 0.2448, "step": 39992 }, { "epoch": 0.6951798223504667, "grad_norm": 1.3125165578341624, "learning_rate": 2.245139220977958e-07, "loss": 0.2138, "step": 39993 }, { "epoch": 0.6951972048879695, "grad_norm": 3.0542369650066923, "learning_rate": 2.2449043124779422e-07, "loss": 0.1969, "step": 39994 }, { "epoch": 0.6952145874254724, "grad_norm": 1.2195773508685868, "learning_rate": 2.2446694127103156e-07, "loss": 0.1578, "step": 39995 }, { "epoch": 0.6952319699629752, "grad_norm": 1.834491148426612, "learning_rate": 2.2444345216758215e-07, "loss": 0.1552, "step": 39996 }, { "epoch": 0.695249352500478, "grad_norm": 1.8233546304489214, "learning_rate": 2.2441996393752044e-07, "loss": 0.2203, "step": 39997 }, { "epoch": 0.6952667350379809, "grad_norm": 1.1336450464428025, "learning_rate": 2.2439647658092082e-07, "loss": 0.1929, "step": 39998 }, { "epoch": 0.6952841175754837, "grad_norm": 1.2486845215882856, "learning_rate": 2.243729900978578e-07, "loss": 0.2562, "step": 39999 }, { "epoch": 0.6953015001129865, "grad_norm": 1.23120563807656, "learning_rate": 2.2434950448840578e-07, "loss": 0.2709, "step": 40000 }, { "epoch": 0.6953188826504894, "grad_norm": 0.9437948043355195, "learning_rate": 2.243260197526392e-07, "loss": 0.1476, "step": 40001 }, { "epoch": 0.6953362651879922, "grad_norm": 1.8805541001409232, "learning_rate": 2.243025358906323e-07, "loss": 0.2042, "step": 40002 }, { "epoch": 0.695353647725495, "grad_norm": 1.3315926628724488, "learning_rate": 2.2427905290245992e-07, "loss": 0.1879, "step": 40003 }, { "epoch": 0.6953710302629978, "grad_norm": 1.5352262925525149, "learning_rate": 2.242555707881963e-07, "loss": 0.1854, "step": 40004 }, { "epoch": 0.6953884128005006, "grad_norm": 2.1221822679803215, "learning_rate": 2.24232089547916e-07, "loss": 0.3055, "step": 40005 }, { "epoch": 0.6954057953380034, "grad_norm": 1.0954428015844735, "learning_rate": 2.24208609181693e-07, "loss": 0.2038, "step": 40006 }, { "epoch": 0.6954231778755062, "grad_norm": 2.061144414489681, "learning_rate": 2.2418512968960212e-07, "loss": 0.1726, "step": 40007 }, { "epoch": 0.6954405604130091, "grad_norm": 1.7771780438463498, "learning_rate": 2.241616510717177e-07, "loss": 0.185, "step": 40008 }, { "epoch": 0.6954579429505119, "grad_norm": 2.4391891141039785, "learning_rate": 2.2413817332811418e-07, "loss": 0.1692, "step": 40009 }, { "epoch": 0.6954753254880147, "grad_norm": 1.2676531790342653, "learning_rate": 2.2411469645886573e-07, "loss": 0.1585, "step": 40010 }, { "epoch": 0.6954927080255175, "grad_norm": 1.6649306355462736, "learning_rate": 2.240912204640473e-07, "loss": 0.1452, "step": 40011 }, { "epoch": 0.6955100905630204, "grad_norm": 1.3798115978362544, "learning_rate": 2.240677453437328e-07, "loss": 0.1661, "step": 40012 }, { "epoch": 0.6955274731005232, "grad_norm": 0.5596510830242823, "learning_rate": 2.2404427109799678e-07, "loss": 0.1395, "step": 40013 }, { "epoch": 0.695544855638026, "grad_norm": 1.862686178502811, "learning_rate": 2.240207977269135e-07, "loss": 0.2143, "step": 40014 }, { "epoch": 0.6955622381755289, "grad_norm": 1.613051924590555, "learning_rate": 2.2399732523055764e-07, "loss": 0.1531, "step": 40015 }, { "epoch": 0.6955796207130317, "grad_norm": 1.2431218668478987, "learning_rate": 2.2397385360900355e-07, "loss": 0.2055, "step": 40016 }, { "epoch": 0.6955970032505345, "grad_norm": 1.0488872657950707, "learning_rate": 2.2395038286232554e-07, "loss": 0.1691, "step": 40017 }, { "epoch": 0.6956143857880374, "grad_norm": 2.366226080384574, "learning_rate": 2.2392691299059795e-07, "loss": 0.2085, "step": 40018 }, { "epoch": 0.6956317683255402, "grad_norm": 1.694330277879699, "learning_rate": 2.2390344399389526e-07, "loss": 0.2832, "step": 40019 }, { "epoch": 0.695649150863043, "grad_norm": 1.273367695203205, "learning_rate": 2.2387997587229185e-07, "loss": 0.1565, "step": 40020 }, { "epoch": 0.6956665334005459, "grad_norm": 1.599723532121964, "learning_rate": 2.2385650862586204e-07, "loss": 0.1712, "step": 40021 }, { "epoch": 0.6956839159380487, "grad_norm": 1.607600933004304, "learning_rate": 2.2383304225468008e-07, "loss": 0.1686, "step": 40022 }, { "epoch": 0.6957012984755515, "grad_norm": 2.099400918369922, "learning_rate": 2.2380957675882067e-07, "loss": 0.1613, "step": 40023 }, { "epoch": 0.6957186810130543, "grad_norm": 1.2856798753478553, "learning_rate": 2.2378611213835825e-07, "loss": 0.1922, "step": 40024 }, { "epoch": 0.6957360635505571, "grad_norm": 1.0775402632335667, "learning_rate": 2.237626483933666e-07, "loss": 0.101, "step": 40025 }, { "epoch": 0.6957534460880599, "grad_norm": 1.4298336332224886, "learning_rate": 2.2373918552392063e-07, "loss": 0.1576, "step": 40026 }, { "epoch": 0.6957708286255627, "grad_norm": 1.5998883387104963, "learning_rate": 2.2371572353009454e-07, "loss": 0.1726, "step": 40027 }, { "epoch": 0.6957882111630656, "grad_norm": 1.4168335799063339, "learning_rate": 2.2369226241196272e-07, "loss": 0.1662, "step": 40028 }, { "epoch": 0.6958055937005684, "grad_norm": 1.6054654213413693, "learning_rate": 2.236688021695995e-07, "loss": 0.2604, "step": 40029 }, { "epoch": 0.6958229762380712, "grad_norm": 0.6775094742422678, "learning_rate": 2.2364534280307923e-07, "loss": 0.1357, "step": 40030 }, { "epoch": 0.695840358775574, "grad_norm": 1.693890187009647, "learning_rate": 2.2362188431247624e-07, "loss": 0.1664, "step": 40031 }, { "epoch": 0.6958577413130769, "grad_norm": 5.220577056349455, "learning_rate": 2.2359842669786494e-07, "loss": 0.2339, "step": 40032 }, { "epoch": 0.6958751238505797, "grad_norm": 1.2785851188271944, "learning_rate": 2.235749699593195e-07, "loss": 0.1864, "step": 40033 }, { "epoch": 0.6958925063880825, "grad_norm": 2.0401544504172753, "learning_rate": 2.2355151409691457e-07, "loss": 0.3107, "step": 40034 }, { "epoch": 0.6959098889255854, "grad_norm": 1.1491655478832066, "learning_rate": 2.2352805911072443e-07, "loss": 0.1848, "step": 40035 }, { "epoch": 0.6959272714630882, "grad_norm": 1.208036116643088, "learning_rate": 2.2350460500082346e-07, "loss": 0.1383, "step": 40036 }, { "epoch": 0.695944654000591, "grad_norm": 1.4715348595115025, "learning_rate": 2.234811517672855e-07, "loss": 0.1251, "step": 40037 }, { "epoch": 0.6959620365380939, "grad_norm": 1.9095373397642454, "learning_rate": 2.234576994101855e-07, "loss": 0.1933, "step": 40038 }, { "epoch": 0.6959794190755967, "grad_norm": 1.439799753076373, "learning_rate": 2.2343424792959752e-07, "loss": 0.1566, "step": 40039 }, { "epoch": 0.6959968016130995, "grad_norm": 1.4596067760662828, "learning_rate": 2.234107973255959e-07, "loss": 0.171, "step": 40040 }, { "epoch": 0.6960141841506023, "grad_norm": 1.236831379266386, "learning_rate": 2.233873475982549e-07, "loss": 0.1568, "step": 40041 }, { "epoch": 0.6960315666881052, "grad_norm": 2.34908889221155, "learning_rate": 2.233638987476491e-07, "loss": 0.198, "step": 40042 }, { "epoch": 0.696048949225608, "grad_norm": 1.567931171153006, "learning_rate": 2.2334045077385283e-07, "loss": 0.1262, "step": 40043 }, { "epoch": 0.6960663317631107, "grad_norm": 1.4222986016772303, "learning_rate": 2.2331700367693984e-07, "loss": 0.1701, "step": 40044 }, { "epoch": 0.6960837143006136, "grad_norm": 0.8363127564103866, "learning_rate": 2.2329355745698507e-07, "loss": 0.1169, "step": 40045 }, { "epoch": 0.6961010968381164, "grad_norm": 0.8137366926120896, "learning_rate": 2.2327011211406267e-07, "loss": 0.1112, "step": 40046 }, { "epoch": 0.6961184793756192, "grad_norm": 3.0822527259851595, "learning_rate": 2.2324666764824678e-07, "loss": 0.2665, "step": 40047 }, { "epoch": 0.696135861913122, "grad_norm": 1.7707278763911465, "learning_rate": 2.232232240596119e-07, "loss": 0.1522, "step": 40048 }, { "epoch": 0.6961532444506249, "grad_norm": 1.1002921063230942, "learning_rate": 2.2319978134823225e-07, "loss": 0.1381, "step": 40049 }, { "epoch": 0.6961706269881277, "grad_norm": 1.2375533224661037, "learning_rate": 2.2317633951418214e-07, "loss": 0.1769, "step": 40050 }, { "epoch": 0.6961880095256305, "grad_norm": 2.3646944757571386, "learning_rate": 2.231528985575359e-07, "loss": 0.28, "step": 40051 }, { "epoch": 0.6962053920631334, "grad_norm": 1.3478032127895392, "learning_rate": 2.2312945847836755e-07, "loss": 0.1629, "step": 40052 }, { "epoch": 0.6962227746006362, "grad_norm": 1.1692663537642627, "learning_rate": 2.2310601927675187e-07, "loss": 0.1469, "step": 40053 }, { "epoch": 0.696240157138139, "grad_norm": 1.3067835066832736, "learning_rate": 2.2308258095276288e-07, "loss": 0.1633, "step": 40054 }, { "epoch": 0.6962575396756419, "grad_norm": 1.3668398288342496, "learning_rate": 2.230591435064751e-07, "loss": 0.1456, "step": 40055 }, { "epoch": 0.6962749222131447, "grad_norm": 2.6976754928711686, "learning_rate": 2.2303570693796226e-07, "loss": 0.1486, "step": 40056 }, { "epoch": 0.6962923047506475, "grad_norm": 1.4478229715853244, "learning_rate": 2.2301227124729915e-07, "loss": 0.1854, "step": 40057 }, { "epoch": 0.6963096872881503, "grad_norm": 1.9276563948518186, "learning_rate": 2.2298883643455996e-07, "loss": 0.2093, "step": 40058 }, { "epoch": 0.6963270698256532, "grad_norm": 0.9794328293025554, "learning_rate": 2.2296540249981888e-07, "loss": 0.1337, "step": 40059 }, { "epoch": 0.696344452363156, "grad_norm": 1.5926651028878278, "learning_rate": 2.2294196944315001e-07, "loss": 0.1756, "step": 40060 }, { "epoch": 0.6963618349006588, "grad_norm": 1.7500965846244028, "learning_rate": 2.2291853726462805e-07, "loss": 0.1521, "step": 40061 }, { "epoch": 0.6963792174381617, "grad_norm": 1.1494715753100004, "learning_rate": 2.228951059643272e-07, "loss": 0.248, "step": 40062 }, { "epoch": 0.6963965999756645, "grad_norm": 1.4312144220259253, "learning_rate": 2.228716755423214e-07, "loss": 0.1447, "step": 40063 }, { "epoch": 0.6964139825131672, "grad_norm": 1.7673995937454243, "learning_rate": 2.228482459986849e-07, "loss": 0.1671, "step": 40064 }, { "epoch": 0.69643136505067, "grad_norm": 1.3689697706371327, "learning_rate": 2.2282481733349234e-07, "loss": 0.1582, "step": 40065 }, { "epoch": 0.6964487475881729, "grad_norm": 1.5688138860909, "learning_rate": 2.2280138954681776e-07, "loss": 0.208, "step": 40066 }, { "epoch": 0.6964661301256757, "grad_norm": 1.1302675226264127, "learning_rate": 2.227779626387355e-07, "loss": 0.1654, "step": 40067 }, { "epoch": 0.6964835126631785, "grad_norm": 1.7027609753506, "learning_rate": 2.227545366093197e-07, "loss": 0.2239, "step": 40068 }, { "epoch": 0.6965008952006814, "grad_norm": 1.4661340537237113, "learning_rate": 2.2273111145864464e-07, "loss": 0.2258, "step": 40069 }, { "epoch": 0.6965182777381842, "grad_norm": 1.1432528148653682, "learning_rate": 2.2270768718678462e-07, "loss": 0.1182, "step": 40070 }, { "epoch": 0.696535660275687, "grad_norm": 1.688885836986974, "learning_rate": 2.2268426379381388e-07, "loss": 0.1877, "step": 40071 }, { "epoch": 0.6965530428131899, "grad_norm": 1.6096848936351407, "learning_rate": 2.2266084127980645e-07, "loss": 0.1488, "step": 40072 }, { "epoch": 0.6965704253506927, "grad_norm": 2.8605809489091576, "learning_rate": 2.226374196448369e-07, "loss": 0.3719, "step": 40073 }, { "epoch": 0.6965878078881955, "grad_norm": 1.4904453700855111, "learning_rate": 2.2261399888897953e-07, "loss": 0.2052, "step": 40074 }, { "epoch": 0.6966051904256984, "grad_norm": 1.7499244338636528, "learning_rate": 2.22590579012308e-07, "loss": 0.1632, "step": 40075 }, { "epoch": 0.6966225729632012, "grad_norm": 1.2017297381064638, "learning_rate": 2.2256716001489707e-07, "loss": 0.0991, "step": 40076 }, { "epoch": 0.696639955500704, "grad_norm": 1.403611728999143, "learning_rate": 2.225437418968208e-07, "loss": 0.2222, "step": 40077 }, { "epoch": 0.6966573380382068, "grad_norm": 1.2265402493343895, "learning_rate": 2.225203246581534e-07, "loss": 0.1248, "step": 40078 }, { "epoch": 0.6966747205757097, "grad_norm": 1.1220742717846313, "learning_rate": 2.2249690829896911e-07, "loss": 0.1355, "step": 40079 }, { "epoch": 0.6966921031132125, "grad_norm": 1.5493958380333865, "learning_rate": 2.2247349281934198e-07, "loss": 0.132, "step": 40080 }, { "epoch": 0.6967094856507153, "grad_norm": 2.316499091278683, "learning_rate": 2.2245007821934675e-07, "loss": 0.1903, "step": 40081 }, { "epoch": 0.6967268681882182, "grad_norm": 1.6554829957172628, "learning_rate": 2.224266644990571e-07, "loss": 0.2014, "step": 40082 }, { "epoch": 0.696744250725721, "grad_norm": 2.271294722483309, "learning_rate": 2.2240325165854723e-07, "loss": 0.3273, "step": 40083 }, { "epoch": 0.6967616332632237, "grad_norm": 1.4450622919227825, "learning_rate": 2.2237983969789176e-07, "loss": 0.1939, "step": 40084 }, { "epoch": 0.6967790158007265, "grad_norm": 2.7051461115225797, "learning_rate": 2.2235642861716464e-07, "loss": 0.2038, "step": 40085 }, { "epoch": 0.6967963983382294, "grad_norm": 2.2855048612108777, "learning_rate": 2.223330184164401e-07, "loss": 0.2456, "step": 40086 }, { "epoch": 0.6968137808757322, "grad_norm": 2.3561838632289613, "learning_rate": 2.2230960909579233e-07, "loss": 0.2045, "step": 40087 }, { "epoch": 0.696831163413235, "grad_norm": 1.8546458828849757, "learning_rate": 2.2228620065529552e-07, "loss": 0.1774, "step": 40088 }, { "epoch": 0.6968485459507379, "grad_norm": 1.0546267314226414, "learning_rate": 2.2226279309502394e-07, "loss": 0.2181, "step": 40089 }, { "epoch": 0.6968659284882407, "grad_norm": 1.3434264752917766, "learning_rate": 2.2223938641505168e-07, "loss": 0.1891, "step": 40090 }, { "epoch": 0.6968833110257435, "grad_norm": 0.6912922025423495, "learning_rate": 2.2221598061545278e-07, "loss": 0.1524, "step": 40091 }, { "epoch": 0.6969006935632464, "grad_norm": 1.9189696856164806, "learning_rate": 2.2219257569630185e-07, "loss": 0.2414, "step": 40092 }, { "epoch": 0.6969180761007492, "grad_norm": 1.6989825170978026, "learning_rate": 2.2216917165767296e-07, "loss": 0.1366, "step": 40093 }, { "epoch": 0.696935458638252, "grad_norm": 1.0793311764984603, "learning_rate": 2.2214576849964005e-07, "loss": 0.1521, "step": 40094 }, { "epoch": 0.6969528411757548, "grad_norm": 1.3382052584434843, "learning_rate": 2.221223662222772e-07, "loss": 0.1182, "step": 40095 }, { "epoch": 0.6969702237132577, "grad_norm": 1.7550171696892118, "learning_rate": 2.2209896482565898e-07, "loss": 0.2106, "step": 40096 }, { "epoch": 0.6969876062507605, "grad_norm": 1.7648956539890417, "learning_rate": 2.220755643098594e-07, "loss": 0.1795, "step": 40097 }, { "epoch": 0.6970049887882633, "grad_norm": 1.3903413163494451, "learning_rate": 2.2205216467495264e-07, "loss": 0.191, "step": 40098 }, { "epoch": 0.6970223713257662, "grad_norm": 1.2565555402817736, "learning_rate": 2.220287659210126e-07, "loss": 0.1415, "step": 40099 }, { "epoch": 0.697039753863269, "grad_norm": 3.9074016398926052, "learning_rate": 2.2200536804811409e-07, "loss": 0.2534, "step": 40100 }, { "epoch": 0.6970571364007718, "grad_norm": 1.4339182840127649, "learning_rate": 2.2198197105633067e-07, "loss": 0.195, "step": 40101 }, { "epoch": 0.6970745189382747, "grad_norm": 2.2276937625699684, "learning_rate": 2.2195857494573666e-07, "loss": 0.2223, "step": 40102 }, { "epoch": 0.6970919014757775, "grad_norm": 2.144128701019841, "learning_rate": 2.2193517971640606e-07, "loss": 0.1607, "step": 40103 }, { "epoch": 0.6971092840132802, "grad_norm": 1.4046938264617683, "learning_rate": 2.2191178536841348e-07, "loss": 0.1556, "step": 40104 }, { "epoch": 0.697126666550783, "grad_norm": 2.41349334161854, "learning_rate": 2.2188839190183272e-07, "loss": 0.2039, "step": 40105 }, { "epoch": 0.6971440490882859, "grad_norm": 1.2187178782420116, "learning_rate": 2.2186499931673802e-07, "loss": 0.1621, "step": 40106 }, { "epoch": 0.6971614316257887, "grad_norm": 2.310620286053999, "learning_rate": 2.2184160761320353e-07, "loss": 0.1849, "step": 40107 }, { "epoch": 0.6971788141632915, "grad_norm": 1.875633985880268, "learning_rate": 2.218182167913033e-07, "loss": 0.2125, "step": 40108 }, { "epoch": 0.6971961967007944, "grad_norm": 1.0919885011853154, "learning_rate": 2.217948268511116e-07, "loss": 0.162, "step": 40109 }, { "epoch": 0.6972135792382972, "grad_norm": 1.2626855642587589, "learning_rate": 2.2177143779270246e-07, "loss": 0.2259, "step": 40110 }, { "epoch": 0.6972309617758, "grad_norm": 0.8702970268235415, "learning_rate": 2.217480496161499e-07, "loss": 0.1361, "step": 40111 }, { "epoch": 0.6972483443133028, "grad_norm": 0.854058025284314, "learning_rate": 2.217246623215286e-07, "loss": 0.1352, "step": 40112 }, { "epoch": 0.6972657268508057, "grad_norm": 1.0770621018925959, "learning_rate": 2.2170127590891207e-07, "loss": 0.1522, "step": 40113 }, { "epoch": 0.6972831093883085, "grad_norm": 1.2091017448094368, "learning_rate": 2.2167789037837447e-07, "loss": 0.2955, "step": 40114 }, { "epoch": 0.6973004919258113, "grad_norm": 1.346956082133164, "learning_rate": 2.216545057299903e-07, "loss": 0.2026, "step": 40115 }, { "epoch": 0.6973178744633142, "grad_norm": 1.3917790104853063, "learning_rate": 2.216311219638335e-07, "loss": 0.2774, "step": 40116 }, { "epoch": 0.697335257000817, "grad_norm": 1.452600244444609, "learning_rate": 2.2160773907997816e-07, "loss": 0.2287, "step": 40117 }, { "epoch": 0.6973526395383198, "grad_norm": 7.427139217019003, "learning_rate": 2.2158435707849839e-07, "loss": 0.1705, "step": 40118 }, { "epoch": 0.6973700220758227, "grad_norm": 1.6215664928797653, "learning_rate": 2.215609759594683e-07, "loss": 0.4278, "step": 40119 }, { "epoch": 0.6973874046133255, "grad_norm": 2.385059278886932, "learning_rate": 2.2153759572296204e-07, "loss": 0.229, "step": 40120 }, { "epoch": 0.6974047871508283, "grad_norm": 1.2565093785976578, "learning_rate": 2.2151421636905365e-07, "loss": 0.2046, "step": 40121 }, { "epoch": 0.6974221696883312, "grad_norm": 2.407244050761586, "learning_rate": 2.2149083789781715e-07, "loss": 0.3605, "step": 40122 }, { "epoch": 0.697439552225834, "grad_norm": 1.5310396144743599, "learning_rate": 2.2146746030932688e-07, "loss": 0.2144, "step": 40123 }, { "epoch": 0.6974569347633367, "grad_norm": 1.704143785256463, "learning_rate": 2.2144408360365684e-07, "loss": 0.1574, "step": 40124 }, { "epoch": 0.6974743173008395, "grad_norm": 3.4416649893224607, "learning_rate": 2.214207077808813e-07, "loss": 0.3034, "step": 40125 }, { "epoch": 0.6974916998383424, "grad_norm": 1.2372370679263094, "learning_rate": 2.2139733284107375e-07, "loss": 0.126, "step": 40126 }, { "epoch": 0.6975090823758452, "grad_norm": 1.5703358645619214, "learning_rate": 2.2137395878430887e-07, "loss": 0.2454, "step": 40127 }, { "epoch": 0.697526464913348, "grad_norm": 1.6248298274960062, "learning_rate": 2.2135058561066055e-07, "loss": 0.1803, "step": 40128 }, { "epoch": 0.6975438474508509, "grad_norm": 2.898234726226368, "learning_rate": 2.2132721332020287e-07, "loss": 0.2387, "step": 40129 }, { "epoch": 0.6975612299883537, "grad_norm": 1.0463176421849565, "learning_rate": 2.213038419130097e-07, "loss": 0.182, "step": 40130 }, { "epoch": 0.6975786125258565, "grad_norm": 1.7066026154211846, "learning_rate": 2.2128047138915573e-07, "loss": 0.2188, "step": 40131 }, { "epoch": 0.6975959950633593, "grad_norm": 2.13694909955772, "learning_rate": 2.2125710174871447e-07, "loss": 0.1779, "step": 40132 }, { "epoch": 0.6976133776008622, "grad_norm": 2.0440753100702076, "learning_rate": 2.2123373299175995e-07, "loss": 0.2029, "step": 40133 }, { "epoch": 0.697630760138365, "grad_norm": 0.8441635827183137, "learning_rate": 2.2121036511836661e-07, "loss": 0.1494, "step": 40134 }, { "epoch": 0.6976481426758678, "grad_norm": 1.6897306434243164, "learning_rate": 2.2118699812860835e-07, "loss": 0.2081, "step": 40135 }, { "epoch": 0.6976655252133707, "grad_norm": 1.762082640506587, "learning_rate": 2.211636320225592e-07, "loss": 0.2409, "step": 40136 }, { "epoch": 0.6976829077508735, "grad_norm": 2.2456963443377784, "learning_rate": 2.2114026680029334e-07, "loss": 0.2993, "step": 40137 }, { "epoch": 0.6977002902883763, "grad_norm": 2.1712363755720108, "learning_rate": 2.2111690246188468e-07, "loss": 0.1923, "step": 40138 }, { "epoch": 0.6977176728258792, "grad_norm": 1.2817714488905707, "learning_rate": 2.2109353900740736e-07, "loss": 0.1274, "step": 40139 }, { "epoch": 0.697735055363382, "grad_norm": 2.9483495985878205, "learning_rate": 2.210701764369353e-07, "loss": 0.2203, "step": 40140 }, { "epoch": 0.6977524379008848, "grad_norm": 1.2281207832680043, "learning_rate": 2.210468147505426e-07, "loss": 0.2076, "step": 40141 }, { "epoch": 0.6977698204383876, "grad_norm": 1.2847653998134891, "learning_rate": 2.2102345394830347e-07, "loss": 0.2589, "step": 40142 }, { "epoch": 0.6977872029758905, "grad_norm": 2.7225647806281783, "learning_rate": 2.2100009403029186e-07, "loss": 0.1936, "step": 40143 }, { "epoch": 0.6978045855133932, "grad_norm": 1.5227523249268735, "learning_rate": 2.2097673499658198e-07, "loss": 0.2062, "step": 40144 }, { "epoch": 0.697821968050896, "grad_norm": 2.636635662220633, "learning_rate": 2.209533768472473e-07, "loss": 0.3027, "step": 40145 }, { "epoch": 0.6978393505883989, "grad_norm": 1.712781864435191, "learning_rate": 2.209300195823624e-07, "loss": 0.297, "step": 40146 }, { "epoch": 0.6978567331259017, "grad_norm": 1.5706964973693698, "learning_rate": 2.2090666320200113e-07, "loss": 0.2731, "step": 40147 }, { "epoch": 0.6978741156634045, "grad_norm": 1.4897411813932688, "learning_rate": 2.2088330770623758e-07, "loss": 0.1404, "step": 40148 }, { "epoch": 0.6978914982009073, "grad_norm": 0.8987065546617635, "learning_rate": 2.2085995309514549e-07, "loss": 0.2531, "step": 40149 }, { "epoch": 0.6979088807384102, "grad_norm": 1.3065589434934457, "learning_rate": 2.2083659936879956e-07, "loss": 0.2246, "step": 40150 }, { "epoch": 0.697926263275913, "grad_norm": 1.3089781897162336, "learning_rate": 2.208132465272731e-07, "loss": 0.1972, "step": 40151 }, { "epoch": 0.6979436458134158, "grad_norm": 3.230333114571227, "learning_rate": 2.2078989457064045e-07, "loss": 0.2717, "step": 40152 }, { "epoch": 0.6979610283509187, "grad_norm": 1.2383286552595512, "learning_rate": 2.2076654349897545e-07, "loss": 0.1597, "step": 40153 }, { "epoch": 0.6979784108884215, "grad_norm": 2.0618993151151837, "learning_rate": 2.2074319331235236e-07, "loss": 0.1853, "step": 40154 }, { "epoch": 0.6979957934259243, "grad_norm": 2.750576660605815, "learning_rate": 2.2071984401084504e-07, "loss": 0.2881, "step": 40155 }, { "epoch": 0.6980131759634272, "grad_norm": 1.6704888376674696, "learning_rate": 2.2069649559452758e-07, "loss": 0.1803, "step": 40156 }, { "epoch": 0.69803055850093, "grad_norm": 1.6512464765478763, "learning_rate": 2.2067314806347386e-07, "loss": 0.1664, "step": 40157 }, { "epoch": 0.6980479410384328, "grad_norm": 2.056846391432136, "learning_rate": 2.2064980141775803e-07, "loss": 0.2201, "step": 40158 }, { "epoch": 0.6980653235759356, "grad_norm": 2.546658071460873, "learning_rate": 2.2062645565745397e-07, "loss": 0.1968, "step": 40159 }, { "epoch": 0.6980827061134385, "grad_norm": 1.192780202547484, "learning_rate": 2.2060311078263571e-07, "loss": 0.3115, "step": 40160 }, { "epoch": 0.6981000886509413, "grad_norm": 1.5684693768404105, "learning_rate": 2.2057976679337704e-07, "loss": 0.2491, "step": 40161 }, { "epoch": 0.6981174711884441, "grad_norm": 1.5589889852065721, "learning_rate": 2.2055642368975235e-07, "loss": 0.1613, "step": 40162 }, { "epoch": 0.698134853725947, "grad_norm": 2.251347566994918, "learning_rate": 2.2053308147183564e-07, "loss": 0.1763, "step": 40163 }, { "epoch": 0.6981522362634497, "grad_norm": 1.7063183334399752, "learning_rate": 2.2050974013970031e-07, "loss": 0.197, "step": 40164 }, { "epoch": 0.6981696188009525, "grad_norm": 1.5459639470417725, "learning_rate": 2.2048639969342087e-07, "loss": 0.2296, "step": 40165 }, { "epoch": 0.6981870013384553, "grad_norm": 0.762966795571506, "learning_rate": 2.204630601330712e-07, "loss": 0.2412, "step": 40166 }, { "epoch": 0.6982043838759582, "grad_norm": 2.2764372870220466, "learning_rate": 2.204397214587252e-07, "loss": 0.2448, "step": 40167 }, { "epoch": 0.698221766413461, "grad_norm": 1.4271208987964403, "learning_rate": 2.2041638367045685e-07, "loss": 0.224, "step": 40168 }, { "epoch": 0.6982391489509638, "grad_norm": 1.332220893014413, "learning_rate": 2.2039304676834008e-07, "loss": 0.1962, "step": 40169 }, { "epoch": 0.6982565314884667, "grad_norm": 1.8692865934593663, "learning_rate": 2.2036971075244897e-07, "loss": 0.3169, "step": 40170 }, { "epoch": 0.6982739140259695, "grad_norm": 1.2614438830427088, "learning_rate": 2.2034637562285746e-07, "loss": 0.2254, "step": 40171 }, { "epoch": 0.6982912965634723, "grad_norm": 1.6351671226977944, "learning_rate": 2.2032304137963926e-07, "loss": 0.1902, "step": 40172 }, { "epoch": 0.6983086791009752, "grad_norm": 1.5731587027048108, "learning_rate": 2.2029970802286868e-07, "loss": 0.2007, "step": 40173 }, { "epoch": 0.698326061638478, "grad_norm": 1.6614513102983173, "learning_rate": 2.2027637555261964e-07, "loss": 0.2235, "step": 40174 }, { "epoch": 0.6983434441759808, "grad_norm": 1.1750866946282594, "learning_rate": 2.202530439689661e-07, "loss": 0.182, "step": 40175 }, { "epoch": 0.6983608267134837, "grad_norm": 1.838842966910894, "learning_rate": 2.2022971327198154e-07, "loss": 0.3896, "step": 40176 }, { "epoch": 0.6983782092509865, "grad_norm": 2.2329929287482932, "learning_rate": 2.2020638346174047e-07, "loss": 0.2208, "step": 40177 }, { "epoch": 0.6983955917884893, "grad_norm": 1.4062794520744926, "learning_rate": 2.2018305453831664e-07, "loss": 0.2305, "step": 40178 }, { "epoch": 0.6984129743259921, "grad_norm": 1.1011597858008124, "learning_rate": 2.2015972650178406e-07, "loss": 0.1795, "step": 40179 }, { "epoch": 0.698430356863495, "grad_norm": 2.4826689206582095, "learning_rate": 2.2013639935221628e-07, "loss": 0.1337, "step": 40180 }, { "epoch": 0.6984477394009978, "grad_norm": 2.1039213865938775, "learning_rate": 2.2011307308968785e-07, "loss": 0.2443, "step": 40181 }, { "epoch": 0.6984651219385006, "grad_norm": 1.170302634759315, "learning_rate": 2.200897477142725e-07, "loss": 0.1914, "step": 40182 }, { "epoch": 0.6984825044760034, "grad_norm": 1.6232298726737209, "learning_rate": 2.2006642322604396e-07, "loss": 0.2298, "step": 40183 }, { "epoch": 0.6984998870135062, "grad_norm": 1.627498758417504, "learning_rate": 2.2004309962507606e-07, "loss": 0.2293, "step": 40184 }, { "epoch": 0.698517269551009, "grad_norm": 1.2804421284762415, "learning_rate": 2.2001977691144312e-07, "loss": 0.1838, "step": 40185 }, { "epoch": 0.6985346520885118, "grad_norm": 1.2703714879996848, "learning_rate": 2.1999645508521892e-07, "loss": 0.2107, "step": 40186 }, { "epoch": 0.6985520346260147, "grad_norm": 1.527039871373025, "learning_rate": 2.199731341464773e-07, "loss": 0.1185, "step": 40187 }, { "epoch": 0.6985694171635175, "grad_norm": 1.8921059349911147, "learning_rate": 2.1994981409529218e-07, "loss": 0.2748, "step": 40188 }, { "epoch": 0.6985867997010203, "grad_norm": 2.658797732771098, "learning_rate": 2.1992649493173748e-07, "loss": 0.2535, "step": 40189 }, { "epoch": 0.6986041822385232, "grad_norm": 1.4297167208189898, "learning_rate": 2.1990317665588721e-07, "loss": 0.1984, "step": 40190 }, { "epoch": 0.698621564776026, "grad_norm": 1.3986611164082803, "learning_rate": 2.1987985926781517e-07, "loss": 0.1768, "step": 40191 }, { "epoch": 0.6986389473135288, "grad_norm": 1.932744339246421, "learning_rate": 2.1985654276759514e-07, "loss": 0.2948, "step": 40192 }, { "epoch": 0.6986563298510317, "grad_norm": 0.9629611544831639, "learning_rate": 2.1983322715530133e-07, "loss": 0.1234, "step": 40193 }, { "epoch": 0.6986737123885345, "grad_norm": 1.531311612163576, "learning_rate": 2.1980991243100767e-07, "loss": 0.2372, "step": 40194 }, { "epoch": 0.6986910949260373, "grad_norm": 1.578459892358723, "learning_rate": 2.1978659859478755e-07, "loss": 0.1895, "step": 40195 }, { "epoch": 0.6987084774635401, "grad_norm": 2.0130883360385368, "learning_rate": 2.197632856467153e-07, "loss": 0.2325, "step": 40196 }, { "epoch": 0.698725860001043, "grad_norm": 2.285497042516418, "learning_rate": 2.197399735868648e-07, "loss": 0.2188, "step": 40197 }, { "epoch": 0.6987432425385458, "grad_norm": 1.4761842481073146, "learning_rate": 2.1971666241530978e-07, "loss": 0.234, "step": 40198 }, { "epoch": 0.6987606250760486, "grad_norm": 1.486635824311914, "learning_rate": 2.1969335213212426e-07, "loss": 0.1538, "step": 40199 }, { "epoch": 0.6987780076135515, "grad_norm": 2.2641625129276584, "learning_rate": 2.1967004273738183e-07, "loss": 0.2437, "step": 40200 }, { "epoch": 0.6987953901510543, "grad_norm": 0.9715502489557933, "learning_rate": 2.1964673423115693e-07, "loss": 0.2176, "step": 40201 }, { "epoch": 0.6988127726885571, "grad_norm": 1.2784856575168517, "learning_rate": 2.196234266135229e-07, "loss": 0.2093, "step": 40202 }, { "epoch": 0.6988301552260598, "grad_norm": 1.2084840176912799, "learning_rate": 2.196001198845537e-07, "loss": 0.1832, "step": 40203 }, { "epoch": 0.6988475377635627, "grad_norm": 2.1958915841202953, "learning_rate": 2.1957681404432343e-07, "loss": 0.3748, "step": 40204 }, { "epoch": 0.6988649203010655, "grad_norm": 3.3540078501822554, "learning_rate": 2.1955350909290588e-07, "loss": 0.2404, "step": 40205 }, { "epoch": 0.6988823028385683, "grad_norm": 1.3707637801059527, "learning_rate": 2.195302050303749e-07, "loss": 0.1488, "step": 40206 }, { "epoch": 0.6988996853760712, "grad_norm": 2.0094403418055666, "learning_rate": 2.195069018568043e-07, "loss": 0.204, "step": 40207 }, { "epoch": 0.698917067913574, "grad_norm": 1.3883465334092313, "learning_rate": 2.1948359957226796e-07, "loss": 0.2602, "step": 40208 }, { "epoch": 0.6989344504510768, "grad_norm": 2.3215482143370814, "learning_rate": 2.194602981768398e-07, "loss": 0.2499, "step": 40209 }, { "epoch": 0.6989518329885797, "grad_norm": 3.2109980051080145, "learning_rate": 2.194369976705936e-07, "loss": 0.2766, "step": 40210 }, { "epoch": 0.6989692155260825, "grad_norm": 2.416623421400106, "learning_rate": 2.1941369805360305e-07, "loss": 0.2942, "step": 40211 }, { "epoch": 0.6989865980635853, "grad_norm": 1.746931053935166, "learning_rate": 2.1939039932594238e-07, "loss": 0.211, "step": 40212 }, { "epoch": 0.6990039806010881, "grad_norm": 1.2316250978742895, "learning_rate": 2.193671014876854e-07, "loss": 0.18, "step": 40213 }, { "epoch": 0.699021363138591, "grad_norm": 1.1630565508501614, "learning_rate": 2.1934380453890561e-07, "loss": 0.1853, "step": 40214 }, { "epoch": 0.6990387456760938, "grad_norm": 1.1145622675695623, "learning_rate": 2.1932050847967693e-07, "loss": 0.3179, "step": 40215 }, { "epoch": 0.6990561282135966, "grad_norm": 1.0804090763767844, "learning_rate": 2.192972133100734e-07, "loss": 0.2206, "step": 40216 }, { "epoch": 0.6990735107510995, "grad_norm": 2.734342557028676, "learning_rate": 2.1927391903016878e-07, "loss": 0.236, "step": 40217 }, { "epoch": 0.6990908932886023, "grad_norm": 1.2669864287270989, "learning_rate": 2.192506256400369e-07, "loss": 0.1195, "step": 40218 }, { "epoch": 0.6991082758261051, "grad_norm": 1.1694489774714063, "learning_rate": 2.1922733313975138e-07, "loss": 0.1213, "step": 40219 }, { "epoch": 0.699125658363608, "grad_norm": 1.5866094844987522, "learning_rate": 2.192040415293866e-07, "loss": 0.2255, "step": 40220 }, { "epoch": 0.6991430409011108, "grad_norm": 1.5943982993703152, "learning_rate": 2.1918075080901583e-07, "loss": 0.1805, "step": 40221 }, { "epoch": 0.6991604234386136, "grad_norm": 1.262018363733659, "learning_rate": 2.1915746097871312e-07, "loss": 0.1835, "step": 40222 }, { "epoch": 0.6991778059761163, "grad_norm": 1.0545039771119045, "learning_rate": 2.1913417203855207e-07, "loss": 0.1467, "step": 40223 }, { "epoch": 0.6991951885136192, "grad_norm": 1.1565268744067951, "learning_rate": 2.1911088398860678e-07, "loss": 0.3106, "step": 40224 }, { "epoch": 0.699212571051122, "grad_norm": 2.1706838284942616, "learning_rate": 2.1908759682895102e-07, "loss": 0.2086, "step": 40225 }, { "epoch": 0.6992299535886248, "grad_norm": 1.693499283635434, "learning_rate": 2.190643105596585e-07, "loss": 0.2052, "step": 40226 }, { "epoch": 0.6992473361261277, "grad_norm": 3.3966233547711653, "learning_rate": 2.1904102518080315e-07, "loss": 0.198, "step": 40227 }, { "epoch": 0.6992647186636305, "grad_norm": 0.7884415796076194, "learning_rate": 2.1901774069245859e-07, "loss": 0.2012, "step": 40228 }, { "epoch": 0.6992821012011333, "grad_norm": 1.695277479992042, "learning_rate": 2.1899445709469876e-07, "loss": 0.3483, "step": 40229 }, { "epoch": 0.6992994837386362, "grad_norm": 0.9695538212349626, "learning_rate": 2.1897117438759723e-07, "loss": 0.1546, "step": 40230 }, { "epoch": 0.699316866276139, "grad_norm": 1.3399445082503896, "learning_rate": 2.189478925712282e-07, "loss": 0.2709, "step": 40231 }, { "epoch": 0.6993342488136418, "grad_norm": 1.0113563127733183, "learning_rate": 2.189246116456654e-07, "loss": 0.1376, "step": 40232 }, { "epoch": 0.6993516313511446, "grad_norm": 1.9745168967578914, "learning_rate": 2.1890133161098222e-07, "loss": 0.2234, "step": 40233 }, { "epoch": 0.6993690138886475, "grad_norm": 1.4270012605392035, "learning_rate": 2.188780524672526e-07, "loss": 0.1611, "step": 40234 }, { "epoch": 0.6993863964261503, "grad_norm": 1.1388044526066254, "learning_rate": 2.1885477421455057e-07, "loss": 0.3125, "step": 40235 }, { "epoch": 0.6994037789636531, "grad_norm": 3.042734680384767, "learning_rate": 2.1883149685294977e-07, "loss": 0.2392, "step": 40236 }, { "epoch": 0.699421161501156, "grad_norm": 1.1482462230174988, "learning_rate": 2.1880822038252396e-07, "loss": 0.2626, "step": 40237 }, { "epoch": 0.6994385440386588, "grad_norm": 1.70746108650695, "learning_rate": 2.1878494480334692e-07, "loss": 0.1864, "step": 40238 }, { "epoch": 0.6994559265761616, "grad_norm": 1.74040920068491, "learning_rate": 2.187616701154924e-07, "loss": 0.2277, "step": 40239 }, { "epoch": 0.6994733091136645, "grad_norm": 1.5697594153336845, "learning_rate": 2.1873839631903424e-07, "loss": 0.3646, "step": 40240 }, { "epoch": 0.6994906916511673, "grad_norm": 1.0354164182167593, "learning_rate": 2.1871512341404612e-07, "loss": 0.1878, "step": 40241 }, { "epoch": 0.6995080741886701, "grad_norm": 1.5560113153002035, "learning_rate": 2.1869185140060164e-07, "loss": 0.141, "step": 40242 }, { "epoch": 0.6995254567261728, "grad_norm": 1.3400041677432217, "learning_rate": 2.1866858027877506e-07, "loss": 0.1504, "step": 40243 }, { "epoch": 0.6995428392636757, "grad_norm": 1.322317107997305, "learning_rate": 2.1864531004863973e-07, "loss": 0.2552, "step": 40244 }, { "epoch": 0.6995602218011785, "grad_norm": 1.9807351275458012, "learning_rate": 2.1862204071026957e-07, "loss": 0.2516, "step": 40245 }, { "epoch": 0.6995776043386813, "grad_norm": 1.460397391162123, "learning_rate": 2.185987722637383e-07, "loss": 0.1602, "step": 40246 }, { "epoch": 0.6995949868761842, "grad_norm": 1.380047817100265, "learning_rate": 2.1857550470911966e-07, "loss": 0.3605, "step": 40247 }, { "epoch": 0.699612369413687, "grad_norm": 1.7127540259719722, "learning_rate": 2.1855223804648737e-07, "loss": 0.1979, "step": 40248 }, { "epoch": 0.6996297519511898, "grad_norm": 1.0394278998123396, "learning_rate": 2.185289722759152e-07, "loss": 0.3349, "step": 40249 }, { "epoch": 0.6996471344886926, "grad_norm": 2.7522898352184537, "learning_rate": 2.1850570739747676e-07, "loss": 0.1844, "step": 40250 }, { "epoch": 0.6996645170261955, "grad_norm": 2.6696764586759008, "learning_rate": 2.1848244341124622e-07, "loss": 0.1541, "step": 40251 }, { "epoch": 0.6996818995636983, "grad_norm": 10.905623017632164, "learning_rate": 2.1845918031729688e-07, "loss": 0.2848, "step": 40252 }, { "epoch": 0.6996992821012011, "grad_norm": 1.542816987937826, "learning_rate": 2.1843591811570238e-07, "loss": 0.2603, "step": 40253 }, { "epoch": 0.699716664638704, "grad_norm": 0.7801558141742139, "learning_rate": 2.1841265680653692e-07, "loss": 0.1165, "step": 40254 }, { "epoch": 0.6997340471762068, "grad_norm": 1.1526919065498127, "learning_rate": 2.183893963898739e-07, "loss": 0.2951, "step": 40255 }, { "epoch": 0.6997514297137096, "grad_norm": 1.2833420777093154, "learning_rate": 2.1836613686578726e-07, "loss": 0.1352, "step": 40256 }, { "epoch": 0.6997688122512125, "grad_norm": 1.3458497056973555, "learning_rate": 2.183428782343505e-07, "loss": 0.2364, "step": 40257 }, { "epoch": 0.6997861947887153, "grad_norm": 0.9101950230057599, "learning_rate": 2.1831962049563745e-07, "loss": 0.1343, "step": 40258 }, { "epoch": 0.6998035773262181, "grad_norm": 3.431230952522763, "learning_rate": 2.1829636364972181e-07, "loss": 0.2788, "step": 40259 }, { "epoch": 0.699820959863721, "grad_norm": 1.8745331811409331, "learning_rate": 2.182731076966773e-07, "loss": 0.2251, "step": 40260 }, { "epoch": 0.6998383424012238, "grad_norm": 1.37617103417115, "learning_rate": 2.1824985263657743e-07, "loss": 0.1491, "step": 40261 }, { "epoch": 0.6998557249387266, "grad_norm": 1.7479034220384024, "learning_rate": 2.1822659846949632e-07, "loss": 0.1913, "step": 40262 }, { "epoch": 0.6998731074762293, "grad_norm": 2.2103347436690473, "learning_rate": 2.1820334519550743e-07, "loss": 0.1871, "step": 40263 }, { "epoch": 0.6998904900137322, "grad_norm": 2.077272641108971, "learning_rate": 2.1818009281468463e-07, "loss": 0.3975, "step": 40264 }, { "epoch": 0.699907872551235, "grad_norm": 1.4990353710530366, "learning_rate": 2.1815684132710117e-07, "loss": 0.2045, "step": 40265 }, { "epoch": 0.6999252550887378, "grad_norm": 1.293794662142524, "learning_rate": 2.181335907328312e-07, "loss": 0.1423, "step": 40266 }, { "epoch": 0.6999426376262406, "grad_norm": 2.1474700438071714, "learning_rate": 2.1811034103194831e-07, "loss": 0.2835, "step": 40267 }, { "epoch": 0.6999600201637435, "grad_norm": 1.1438289019665102, "learning_rate": 2.1808709222452615e-07, "loss": 0.2611, "step": 40268 }, { "epoch": 0.6999774027012463, "grad_norm": 1.1721545102116684, "learning_rate": 2.1806384431063817e-07, "loss": 0.2124, "step": 40269 }, { "epoch": 0.6999947852387491, "grad_norm": 1.454331516824465, "learning_rate": 2.1804059729035867e-07, "loss": 0.1695, "step": 40270 }, { "epoch": 0.700012167776252, "grad_norm": 2.252267415139809, "learning_rate": 2.1801735116376079e-07, "loss": 0.1715, "step": 40271 }, { "epoch": 0.7000295503137548, "grad_norm": 1.6635569099023548, "learning_rate": 2.1799410593091843e-07, "loss": 0.199, "step": 40272 }, { "epoch": 0.7000469328512576, "grad_norm": 3.082667166793202, "learning_rate": 2.1797086159190497e-07, "loss": 0.2528, "step": 40273 }, { "epoch": 0.7000643153887605, "grad_norm": 1.5467943267114133, "learning_rate": 2.1794761814679452e-07, "loss": 0.2333, "step": 40274 }, { "epoch": 0.7000816979262633, "grad_norm": 1.2394020903133434, "learning_rate": 2.179243755956605e-07, "loss": 0.2494, "step": 40275 }, { "epoch": 0.7000990804637661, "grad_norm": 1.5035370868321394, "learning_rate": 2.1790113393857674e-07, "loss": 0.1775, "step": 40276 }, { "epoch": 0.700116463001269, "grad_norm": 1.0298544494540485, "learning_rate": 2.1787789317561671e-07, "loss": 0.1933, "step": 40277 }, { "epoch": 0.7001338455387718, "grad_norm": 1.4104070948261547, "learning_rate": 2.178546533068542e-07, "loss": 0.2303, "step": 40278 }, { "epoch": 0.7001512280762746, "grad_norm": 1.2782714511890343, "learning_rate": 2.1783141433236284e-07, "loss": 0.3281, "step": 40279 }, { "epoch": 0.7001686106137774, "grad_norm": 1.0616685386560731, "learning_rate": 2.1780817625221625e-07, "loss": 0.1666, "step": 40280 }, { "epoch": 0.7001859931512803, "grad_norm": 1.4861580182776657, "learning_rate": 2.17784939066488e-07, "loss": 0.2283, "step": 40281 }, { "epoch": 0.7002033756887831, "grad_norm": 1.2517114325836658, "learning_rate": 2.1776170277525195e-07, "loss": 0.2013, "step": 40282 }, { "epoch": 0.7002207582262858, "grad_norm": 2.2572430717145866, "learning_rate": 2.1773846737858187e-07, "loss": 0.2956, "step": 40283 }, { "epoch": 0.7002381407637887, "grad_norm": 1.7557483980244646, "learning_rate": 2.1771523287655086e-07, "loss": 0.2209, "step": 40284 }, { "epoch": 0.7002555233012915, "grad_norm": 1.3929505057878746, "learning_rate": 2.1769199926923303e-07, "loss": 0.175, "step": 40285 }, { "epoch": 0.7002729058387943, "grad_norm": 1.9699286939984564, "learning_rate": 2.1766876655670186e-07, "loss": 0.2336, "step": 40286 }, { "epoch": 0.7002902883762971, "grad_norm": 1.2016437004821903, "learning_rate": 2.1764553473903102e-07, "loss": 0.1876, "step": 40287 }, { "epoch": 0.7003076709138, "grad_norm": 3.340946807937179, "learning_rate": 2.1762230381629415e-07, "loss": 0.1355, "step": 40288 }, { "epoch": 0.7003250534513028, "grad_norm": 0.8153016587608455, "learning_rate": 2.175990737885649e-07, "loss": 0.2339, "step": 40289 }, { "epoch": 0.7003424359888056, "grad_norm": 1.7788039237112852, "learning_rate": 2.1757584465591683e-07, "loss": 0.1701, "step": 40290 }, { "epoch": 0.7003598185263085, "grad_norm": 0.9726811925833865, "learning_rate": 2.175526164184236e-07, "loss": 0.2396, "step": 40291 }, { "epoch": 0.7003772010638113, "grad_norm": 1.3143339489324597, "learning_rate": 2.1752938907615858e-07, "loss": 0.1369, "step": 40292 }, { "epoch": 0.7003945836013141, "grad_norm": 1.2054497984900956, "learning_rate": 2.1750616262919592e-07, "loss": 0.1224, "step": 40293 }, { "epoch": 0.700411966138817, "grad_norm": 0.9196687147078775, "learning_rate": 2.174829370776089e-07, "loss": 0.1139, "step": 40294 }, { "epoch": 0.7004293486763198, "grad_norm": 1.4168986285812273, "learning_rate": 2.1745971242147137e-07, "loss": 0.3382, "step": 40295 }, { "epoch": 0.7004467312138226, "grad_norm": 4.682726459569948, "learning_rate": 2.1743648866085644e-07, "loss": 0.2843, "step": 40296 }, { "epoch": 0.7004641137513254, "grad_norm": 0.8234068225583778, "learning_rate": 2.174132657958382e-07, "loss": 0.1682, "step": 40297 }, { "epoch": 0.7004814962888283, "grad_norm": 1.2633391903424125, "learning_rate": 2.173900438264901e-07, "loss": 0.2398, "step": 40298 }, { "epoch": 0.7004988788263311, "grad_norm": 2.8296860395875996, "learning_rate": 2.1736682275288577e-07, "loss": 0.2726, "step": 40299 }, { "epoch": 0.7005162613638339, "grad_norm": 1.8655653915469212, "learning_rate": 2.1734360257509853e-07, "loss": 0.195, "step": 40300 }, { "epoch": 0.7005336439013368, "grad_norm": 1.004844369221592, "learning_rate": 2.1732038329320247e-07, "loss": 0.2742, "step": 40301 }, { "epoch": 0.7005510264388396, "grad_norm": 1.1407149023592238, "learning_rate": 2.1729716490727113e-07, "loss": 0.2196, "step": 40302 }, { "epoch": 0.7005684089763423, "grad_norm": 1.1778692547640919, "learning_rate": 2.172739474173777e-07, "loss": 0.1905, "step": 40303 }, { "epoch": 0.7005857915138451, "grad_norm": 1.6710387683309036, "learning_rate": 2.172507308235958e-07, "loss": 0.3635, "step": 40304 }, { "epoch": 0.700603174051348, "grad_norm": 2.1138333796668283, "learning_rate": 2.172275151259994e-07, "loss": 0.2706, "step": 40305 }, { "epoch": 0.7006205565888508, "grad_norm": 1.4508486276696944, "learning_rate": 2.1720430032466187e-07, "loss": 0.2464, "step": 40306 }, { "epoch": 0.7006379391263536, "grad_norm": 2.206229022075672, "learning_rate": 2.1718108641965682e-07, "loss": 0.2885, "step": 40307 }, { "epoch": 0.7006553216638565, "grad_norm": 1.5448207576251038, "learning_rate": 2.171578734110578e-07, "loss": 0.1613, "step": 40308 }, { "epoch": 0.7006727042013593, "grad_norm": 1.7500401570456854, "learning_rate": 2.1713466129893837e-07, "loss": 0.1891, "step": 40309 }, { "epoch": 0.7006900867388621, "grad_norm": 1.4326145572119182, "learning_rate": 2.171114500833721e-07, "loss": 0.1291, "step": 40310 }, { "epoch": 0.700707469276365, "grad_norm": 1.1565443903351078, "learning_rate": 2.170882397644327e-07, "loss": 0.0995, "step": 40311 }, { "epoch": 0.7007248518138678, "grad_norm": 1.7477704677847408, "learning_rate": 2.170650303421933e-07, "loss": 0.2342, "step": 40312 }, { "epoch": 0.7007422343513706, "grad_norm": 1.5408522291592122, "learning_rate": 2.170418218167281e-07, "loss": 0.145, "step": 40313 }, { "epoch": 0.7007596168888734, "grad_norm": 2.344001512372365, "learning_rate": 2.1701861418811045e-07, "loss": 0.2396, "step": 40314 }, { "epoch": 0.7007769994263763, "grad_norm": 1.0299576153095855, "learning_rate": 2.1699540745641342e-07, "loss": 0.1898, "step": 40315 }, { "epoch": 0.7007943819638791, "grad_norm": 0.9030798705968892, "learning_rate": 2.1697220162171115e-07, "loss": 0.1335, "step": 40316 }, { "epoch": 0.7008117645013819, "grad_norm": 1.2351056681281203, "learning_rate": 2.1694899668407702e-07, "loss": 0.1693, "step": 40317 }, { "epoch": 0.7008291470388848, "grad_norm": 1.9058321621690213, "learning_rate": 2.169257926435845e-07, "loss": 0.1736, "step": 40318 }, { "epoch": 0.7008465295763876, "grad_norm": 2.8509685316319056, "learning_rate": 2.1690258950030704e-07, "loss": 0.3213, "step": 40319 }, { "epoch": 0.7008639121138904, "grad_norm": 1.5385271687727087, "learning_rate": 2.1687938725431849e-07, "loss": 0.2488, "step": 40320 }, { "epoch": 0.7008812946513933, "grad_norm": 1.1312273238596797, "learning_rate": 2.1685618590569244e-07, "loss": 0.2073, "step": 40321 }, { "epoch": 0.700898677188896, "grad_norm": 2.165917655444457, "learning_rate": 2.1683298545450202e-07, "loss": 0.2496, "step": 40322 }, { "epoch": 0.7009160597263988, "grad_norm": 1.3457536719534389, "learning_rate": 2.168097859008207e-07, "loss": 0.1358, "step": 40323 }, { "epoch": 0.7009334422639016, "grad_norm": 1.8194991690909288, "learning_rate": 2.1678658724472254e-07, "loss": 0.1187, "step": 40324 }, { "epoch": 0.7009508248014045, "grad_norm": 1.5730575638058826, "learning_rate": 2.1676338948628082e-07, "loss": 0.22, "step": 40325 }, { "epoch": 0.7009682073389073, "grad_norm": 1.4895335020918294, "learning_rate": 2.1674019262556899e-07, "loss": 0.2005, "step": 40326 }, { "epoch": 0.7009855898764101, "grad_norm": 0.8419945693517192, "learning_rate": 2.1671699666266064e-07, "loss": 0.1654, "step": 40327 }, { "epoch": 0.701002972413913, "grad_norm": 2.8335500776547806, "learning_rate": 2.166938015976293e-07, "loss": 0.2447, "step": 40328 }, { "epoch": 0.7010203549514158, "grad_norm": 1.5458842171904223, "learning_rate": 2.1667060743054843e-07, "loss": 0.2019, "step": 40329 }, { "epoch": 0.7010377374889186, "grad_norm": 1.8541906953392013, "learning_rate": 2.1664741416149164e-07, "loss": 0.228, "step": 40330 }, { "epoch": 0.7010551200264215, "grad_norm": 1.2279870660876182, "learning_rate": 2.1662422179053215e-07, "loss": 0.2018, "step": 40331 }, { "epoch": 0.7010725025639243, "grad_norm": 1.6766062410996558, "learning_rate": 2.166010303177439e-07, "loss": 0.1853, "step": 40332 }, { "epoch": 0.7010898851014271, "grad_norm": 2.0216020538775936, "learning_rate": 2.1657783974320044e-07, "loss": 0.1964, "step": 40333 }, { "epoch": 0.7011072676389299, "grad_norm": 1.2976332900220522, "learning_rate": 2.165546500669746e-07, "loss": 0.236, "step": 40334 }, { "epoch": 0.7011246501764328, "grad_norm": 1.8444515763689466, "learning_rate": 2.1653146128914052e-07, "loss": 0.2245, "step": 40335 }, { "epoch": 0.7011420327139356, "grad_norm": 1.2907227956032217, "learning_rate": 2.1650827340977146e-07, "loss": 0.1914, "step": 40336 }, { "epoch": 0.7011594152514384, "grad_norm": 1.3824145802814236, "learning_rate": 2.1648508642894102e-07, "loss": 0.1097, "step": 40337 }, { "epoch": 0.7011767977889413, "grad_norm": 1.18689521210713, "learning_rate": 2.1646190034672256e-07, "loss": 0.1562, "step": 40338 }, { "epoch": 0.7011941803264441, "grad_norm": 1.351711329088628, "learning_rate": 2.1643871516318947e-07, "loss": 0.2251, "step": 40339 }, { "epoch": 0.7012115628639469, "grad_norm": 0.9406606929198655, "learning_rate": 2.1641553087841574e-07, "loss": 0.1985, "step": 40340 }, { "epoch": 0.7012289454014498, "grad_norm": 1.2546307062746764, "learning_rate": 2.1639234749247436e-07, "loss": 0.1672, "step": 40341 }, { "epoch": 0.7012463279389525, "grad_norm": 1.703359149045489, "learning_rate": 2.1636916500543879e-07, "loss": 0.139, "step": 40342 }, { "epoch": 0.7012637104764553, "grad_norm": 1.2268429455185583, "learning_rate": 2.163459834173828e-07, "loss": 0.1754, "step": 40343 }, { "epoch": 0.7012810930139581, "grad_norm": 0.7546554637718806, "learning_rate": 2.1632280272837977e-07, "loss": 0.0935, "step": 40344 }, { "epoch": 0.701298475551461, "grad_norm": 2.025076976465099, "learning_rate": 2.162996229385032e-07, "loss": 0.2429, "step": 40345 }, { "epoch": 0.7013158580889638, "grad_norm": 1.4260747694084166, "learning_rate": 2.162764440478264e-07, "loss": 0.2443, "step": 40346 }, { "epoch": 0.7013332406264666, "grad_norm": 1.465334605264594, "learning_rate": 2.1625326605642302e-07, "loss": 0.1665, "step": 40347 }, { "epoch": 0.7013506231639695, "grad_norm": 1.6561582580036514, "learning_rate": 2.1623008896436639e-07, "loss": 0.1556, "step": 40348 }, { "epoch": 0.7013680057014723, "grad_norm": 1.3233373487156417, "learning_rate": 2.162069127717301e-07, "loss": 0.2324, "step": 40349 }, { "epoch": 0.7013853882389751, "grad_norm": 1.2665257754378678, "learning_rate": 2.1618373747858732e-07, "loss": 0.1674, "step": 40350 }, { "epoch": 0.701402770776478, "grad_norm": 1.3486342865793532, "learning_rate": 2.1616056308501186e-07, "loss": 0.1191, "step": 40351 }, { "epoch": 0.7014201533139808, "grad_norm": 1.1932373560933967, "learning_rate": 2.1613738959107725e-07, "loss": 0.2515, "step": 40352 }, { "epoch": 0.7014375358514836, "grad_norm": 2.166439760785682, "learning_rate": 2.161142169968565e-07, "loss": 0.2505, "step": 40353 }, { "epoch": 0.7014549183889864, "grad_norm": 1.742860576317012, "learning_rate": 2.160910453024231e-07, "loss": 0.154, "step": 40354 }, { "epoch": 0.7014723009264893, "grad_norm": 1.8321263251666482, "learning_rate": 2.160678745078509e-07, "loss": 0.239, "step": 40355 }, { "epoch": 0.7014896834639921, "grad_norm": 1.1236830403479197, "learning_rate": 2.1604470461321306e-07, "loss": 0.1668, "step": 40356 }, { "epoch": 0.7015070660014949, "grad_norm": 1.2963594126100302, "learning_rate": 2.1602153561858311e-07, "loss": 0.2016, "step": 40357 }, { "epoch": 0.7015244485389978, "grad_norm": 1.2282149508412248, "learning_rate": 2.1599836752403438e-07, "loss": 0.2219, "step": 40358 }, { "epoch": 0.7015418310765006, "grad_norm": 1.4031034288682194, "learning_rate": 2.159752003296404e-07, "loss": 0.1771, "step": 40359 }, { "epoch": 0.7015592136140034, "grad_norm": 1.4180617917030467, "learning_rate": 2.1595203403547458e-07, "loss": 0.1308, "step": 40360 }, { "epoch": 0.7015765961515062, "grad_norm": 1.4756166145467875, "learning_rate": 2.159288686416103e-07, "loss": 0.2327, "step": 40361 }, { "epoch": 0.701593978689009, "grad_norm": 1.7588693464258438, "learning_rate": 2.1590570414812088e-07, "loss": 0.2323, "step": 40362 }, { "epoch": 0.7016113612265118, "grad_norm": 2.462764944325282, "learning_rate": 2.1588254055508004e-07, "loss": 0.2334, "step": 40363 }, { "epoch": 0.7016287437640146, "grad_norm": 1.946234544832832, "learning_rate": 2.1585937786256104e-07, "loss": 0.1944, "step": 40364 }, { "epoch": 0.7016461263015175, "grad_norm": 0.9872930565331057, "learning_rate": 2.1583621607063724e-07, "loss": 0.1136, "step": 40365 }, { "epoch": 0.7016635088390203, "grad_norm": 1.385121777427695, "learning_rate": 2.1581305517938214e-07, "loss": 0.2611, "step": 40366 }, { "epoch": 0.7016808913765231, "grad_norm": 2.0698910505176187, "learning_rate": 2.157898951888691e-07, "loss": 0.1898, "step": 40367 }, { "epoch": 0.701698273914026, "grad_norm": 1.5062120755782573, "learning_rate": 2.1576673609917157e-07, "loss": 0.1944, "step": 40368 }, { "epoch": 0.7017156564515288, "grad_norm": 1.5131845419552234, "learning_rate": 2.1574357791036285e-07, "loss": 0.1913, "step": 40369 }, { "epoch": 0.7017330389890316, "grad_norm": 1.9950226434023317, "learning_rate": 2.1572042062251627e-07, "loss": 0.2572, "step": 40370 }, { "epoch": 0.7017504215265344, "grad_norm": 2.2597668180489077, "learning_rate": 2.1569726423570572e-07, "loss": 0.1689, "step": 40371 }, { "epoch": 0.7017678040640373, "grad_norm": 1.231583141911658, "learning_rate": 2.1567410875000408e-07, "loss": 0.1555, "step": 40372 }, { "epoch": 0.7017851866015401, "grad_norm": 1.371910777416112, "learning_rate": 2.1565095416548474e-07, "loss": 0.2174, "step": 40373 }, { "epoch": 0.7018025691390429, "grad_norm": 1.3252780814677132, "learning_rate": 2.156278004822214e-07, "loss": 0.1408, "step": 40374 }, { "epoch": 0.7018199516765458, "grad_norm": 2.1808540482402927, "learning_rate": 2.1560464770028737e-07, "loss": 0.1703, "step": 40375 }, { "epoch": 0.7018373342140486, "grad_norm": 1.650202351762409, "learning_rate": 2.1558149581975586e-07, "loss": 0.194, "step": 40376 }, { "epoch": 0.7018547167515514, "grad_norm": 1.4137649168481454, "learning_rate": 2.1555834484070046e-07, "loss": 0.3046, "step": 40377 }, { "epoch": 0.7018720992890543, "grad_norm": 1.185564703444546, "learning_rate": 2.1553519476319436e-07, "loss": 0.1468, "step": 40378 }, { "epoch": 0.7018894818265571, "grad_norm": 1.909942978777734, "learning_rate": 2.1551204558731106e-07, "loss": 0.1172, "step": 40379 }, { "epoch": 0.7019068643640599, "grad_norm": 1.9935154960883978, "learning_rate": 2.1548889731312392e-07, "loss": 0.1461, "step": 40380 }, { "epoch": 0.7019242469015627, "grad_norm": 1.0615912867777881, "learning_rate": 2.1546574994070605e-07, "loss": 0.1091, "step": 40381 }, { "epoch": 0.7019416294390655, "grad_norm": 1.0824742211949714, "learning_rate": 2.1544260347013126e-07, "loss": 0.2967, "step": 40382 }, { "epoch": 0.7019590119765683, "grad_norm": 2.364348619374943, "learning_rate": 2.154194579014727e-07, "loss": 0.1187, "step": 40383 }, { "epoch": 0.7019763945140711, "grad_norm": 1.8733747784506776, "learning_rate": 2.153963132348039e-07, "loss": 0.192, "step": 40384 }, { "epoch": 0.701993777051574, "grad_norm": 1.5485541455320515, "learning_rate": 2.1537316947019774e-07, "loss": 0.1552, "step": 40385 }, { "epoch": 0.7020111595890768, "grad_norm": 1.5116717796248065, "learning_rate": 2.15350026607728e-07, "loss": 0.2031, "step": 40386 }, { "epoch": 0.7020285421265796, "grad_norm": 4.479484193640767, "learning_rate": 2.1532688464746794e-07, "loss": 0.25, "step": 40387 }, { "epoch": 0.7020459246640824, "grad_norm": 1.622741033299578, "learning_rate": 2.1530374358949089e-07, "loss": 0.1521, "step": 40388 }, { "epoch": 0.7020633072015853, "grad_norm": 1.812508586019212, "learning_rate": 2.1528060343387e-07, "loss": 0.156, "step": 40389 }, { "epoch": 0.7020806897390881, "grad_norm": 1.4538591211973344, "learning_rate": 2.152574641806792e-07, "loss": 0.1638, "step": 40390 }, { "epoch": 0.7020980722765909, "grad_norm": 1.4863760150603003, "learning_rate": 2.1523432582999125e-07, "loss": 0.1471, "step": 40391 }, { "epoch": 0.7021154548140938, "grad_norm": 1.4114896147387066, "learning_rate": 2.1521118838187962e-07, "loss": 0.4211, "step": 40392 }, { "epoch": 0.7021328373515966, "grad_norm": 1.366704146353619, "learning_rate": 2.151880518364176e-07, "loss": 0.2521, "step": 40393 }, { "epoch": 0.7021502198890994, "grad_norm": 2.573072540222013, "learning_rate": 2.1516491619367876e-07, "loss": 0.2531, "step": 40394 }, { "epoch": 0.7021676024266023, "grad_norm": 1.544360840323227, "learning_rate": 2.151417814537363e-07, "loss": 0.1705, "step": 40395 }, { "epoch": 0.7021849849641051, "grad_norm": 1.6217856425793886, "learning_rate": 2.1511864761666354e-07, "loss": 0.344, "step": 40396 }, { "epoch": 0.7022023675016079, "grad_norm": 1.6377611565944814, "learning_rate": 2.1509551468253385e-07, "loss": 0.1903, "step": 40397 }, { "epoch": 0.7022197500391107, "grad_norm": 1.338432356016811, "learning_rate": 2.1507238265142042e-07, "loss": 0.1354, "step": 40398 }, { "epoch": 0.7022371325766136, "grad_norm": 1.2675543351348835, "learning_rate": 2.150492515233967e-07, "loss": 0.2181, "step": 40399 }, { "epoch": 0.7022545151141164, "grad_norm": 1.1550562840992826, "learning_rate": 2.1502612129853597e-07, "loss": 0.1669, "step": 40400 }, { "epoch": 0.7022718976516192, "grad_norm": 1.4432579840666595, "learning_rate": 2.150029919769113e-07, "loss": 0.1553, "step": 40401 }, { "epoch": 0.702289280189122, "grad_norm": 1.9755827568836586, "learning_rate": 2.1497986355859643e-07, "loss": 0.2265, "step": 40402 }, { "epoch": 0.7023066627266248, "grad_norm": 1.768054498380242, "learning_rate": 2.1495673604366466e-07, "loss": 0.1944, "step": 40403 }, { "epoch": 0.7023240452641276, "grad_norm": 1.0453360109629102, "learning_rate": 2.1493360943218874e-07, "loss": 0.2148, "step": 40404 }, { "epoch": 0.7023414278016304, "grad_norm": 2.1549839600818887, "learning_rate": 2.1491048372424254e-07, "loss": 0.3678, "step": 40405 }, { "epoch": 0.7023588103391333, "grad_norm": 1.7020128488263955, "learning_rate": 2.1488735891989907e-07, "loss": 0.13, "step": 40406 }, { "epoch": 0.7023761928766361, "grad_norm": 1.236515398633397, "learning_rate": 2.1486423501923174e-07, "loss": 0.1643, "step": 40407 }, { "epoch": 0.7023935754141389, "grad_norm": 1.798045727604759, "learning_rate": 2.148411120223138e-07, "loss": 0.3382, "step": 40408 }, { "epoch": 0.7024109579516418, "grad_norm": 1.4120586303347145, "learning_rate": 2.1481798992921857e-07, "loss": 0.1464, "step": 40409 }, { "epoch": 0.7024283404891446, "grad_norm": 1.0365710355542601, "learning_rate": 2.147948687400193e-07, "loss": 0.304, "step": 40410 }, { "epoch": 0.7024457230266474, "grad_norm": 7.535499731837951, "learning_rate": 2.1477174845478925e-07, "loss": 0.2455, "step": 40411 }, { "epoch": 0.7024631055641503, "grad_norm": 1.320769712030479, "learning_rate": 2.147486290736016e-07, "loss": 0.2367, "step": 40412 }, { "epoch": 0.7024804881016531, "grad_norm": 3.384554630231667, "learning_rate": 2.1472551059652987e-07, "loss": 0.428, "step": 40413 }, { "epoch": 0.7024978706391559, "grad_norm": 1.4429916131115772, "learning_rate": 2.147023930236473e-07, "loss": 0.1307, "step": 40414 }, { "epoch": 0.7025152531766587, "grad_norm": 0.7960898620702699, "learning_rate": 2.1467927635502725e-07, "loss": 0.1837, "step": 40415 }, { "epoch": 0.7025326357141616, "grad_norm": 2.982326567464446, "learning_rate": 2.1465616059074244e-07, "loss": 0.3112, "step": 40416 }, { "epoch": 0.7025500182516644, "grad_norm": 1.802303268569407, "learning_rate": 2.1463304573086676e-07, "loss": 0.1715, "step": 40417 }, { "epoch": 0.7025674007891672, "grad_norm": 1.5004578488297349, "learning_rate": 2.146099317754732e-07, "loss": 0.2677, "step": 40418 }, { "epoch": 0.7025847833266701, "grad_norm": 1.1058986191714242, "learning_rate": 2.145868187246351e-07, "loss": 0.1732, "step": 40419 }, { "epoch": 0.7026021658641729, "grad_norm": 1.8785255118257647, "learning_rate": 2.1456370657842544e-07, "loss": 0.2495, "step": 40420 }, { "epoch": 0.7026195484016757, "grad_norm": 1.0297066708298264, "learning_rate": 2.1454059533691787e-07, "loss": 0.1343, "step": 40421 }, { "epoch": 0.7026369309391785, "grad_norm": 1.227039309455763, "learning_rate": 2.1451748500018574e-07, "loss": 0.1716, "step": 40422 }, { "epoch": 0.7026543134766813, "grad_norm": 1.285568548626431, "learning_rate": 2.1449437556830164e-07, "loss": 0.1767, "step": 40423 }, { "epoch": 0.7026716960141841, "grad_norm": 1.5079137882499385, "learning_rate": 2.144712670413394e-07, "loss": 0.265, "step": 40424 }, { "epoch": 0.7026890785516869, "grad_norm": 1.3745042229641606, "learning_rate": 2.144481594193721e-07, "loss": 0.1921, "step": 40425 }, { "epoch": 0.7027064610891898, "grad_norm": 1.3071292424397298, "learning_rate": 2.1442505270247298e-07, "loss": 0.3065, "step": 40426 }, { "epoch": 0.7027238436266926, "grad_norm": 1.5272978536036794, "learning_rate": 2.1440194689071516e-07, "loss": 0.2495, "step": 40427 }, { "epoch": 0.7027412261641954, "grad_norm": 1.4514396425959324, "learning_rate": 2.1437884198417205e-07, "loss": 0.2109, "step": 40428 }, { "epoch": 0.7027586087016983, "grad_norm": 1.1649702648848097, "learning_rate": 2.1435573798291685e-07, "loss": 0.2293, "step": 40429 }, { "epoch": 0.7027759912392011, "grad_norm": 2.1831585557201794, "learning_rate": 2.1433263488702268e-07, "loss": 0.2501, "step": 40430 }, { "epoch": 0.7027933737767039, "grad_norm": 0.912756769781308, "learning_rate": 2.1430953269656267e-07, "loss": 0.12, "step": 40431 }, { "epoch": 0.7028107563142068, "grad_norm": 0.9520518290499846, "learning_rate": 2.142864314116104e-07, "loss": 0.1718, "step": 40432 }, { "epoch": 0.7028281388517096, "grad_norm": 1.2418043646129708, "learning_rate": 2.1426333103223887e-07, "loss": 0.2426, "step": 40433 }, { "epoch": 0.7028455213892124, "grad_norm": 1.743986548882055, "learning_rate": 2.1424023155852154e-07, "loss": 0.2273, "step": 40434 }, { "epoch": 0.7028629039267152, "grad_norm": 1.5753676594672619, "learning_rate": 2.1421713299053102e-07, "loss": 0.2506, "step": 40435 }, { "epoch": 0.7028802864642181, "grad_norm": 1.5637249250177239, "learning_rate": 2.1419403532834114e-07, "loss": 0.1851, "step": 40436 }, { "epoch": 0.7028976690017209, "grad_norm": 1.0599493557059645, "learning_rate": 2.1417093857202478e-07, "loss": 0.1269, "step": 40437 }, { "epoch": 0.7029150515392237, "grad_norm": 1.1699344916017218, "learning_rate": 2.1414784272165532e-07, "loss": 0.1103, "step": 40438 }, { "epoch": 0.7029324340767266, "grad_norm": 1.7333996172403219, "learning_rate": 2.1412474777730573e-07, "loss": 0.1324, "step": 40439 }, { "epoch": 0.7029498166142294, "grad_norm": 2.433916962252112, "learning_rate": 2.141016537390495e-07, "loss": 0.303, "step": 40440 }, { "epoch": 0.7029671991517322, "grad_norm": 1.551746154861802, "learning_rate": 2.140785606069599e-07, "loss": 0.2353, "step": 40441 }, { "epoch": 0.7029845816892349, "grad_norm": 1.6164411108538037, "learning_rate": 2.140554683811097e-07, "loss": 0.2033, "step": 40442 }, { "epoch": 0.7030019642267378, "grad_norm": 1.6543376766388405, "learning_rate": 2.1403237706157219e-07, "loss": 0.1387, "step": 40443 }, { "epoch": 0.7030193467642406, "grad_norm": 1.547926331825903, "learning_rate": 2.1400928664842082e-07, "loss": 0.1816, "step": 40444 }, { "epoch": 0.7030367293017434, "grad_norm": 2.06908595480495, "learning_rate": 2.1398619714172867e-07, "loss": 0.2111, "step": 40445 }, { "epoch": 0.7030541118392463, "grad_norm": 1.1118927402771268, "learning_rate": 2.1396310854156885e-07, "loss": 0.2045, "step": 40446 }, { "epoch": 0.7030714943767491, "grad_norm": 1.165608990349819, "learning_rate": 2.139400208480146e-07, "loss": 0.2059, "step": 40447 }, { "epoch": 0.7030888769142519, "grad_norm": 3.323811045867725, "learning_rate": 2.139169340611391e-07, "loss": 0.3066, "step": 40448 }, { "epoch": 0.7031062594517548, "grad_norm": 1.5312189233185571, "learning_rate": 2.1389384818101547e-07, "loss": 0.2771, "step": 40449 }, { "epoch": 0.7031236419892576, "grad_norm": 1.2163633264305238, "learning_rate": 2.1387076320771697e-07, "loss": 0.1642, "step": 40450 }, { "epoch": 0.7031410245267604, "grad_norm": 1.93564614828837, "learning_rate": 2.1384767914131646e-07, "loss": 0.2644, "step": 40451 }, { "epoch": 0.7031584070642632, "grad_norm": 3.0989419028551377, "learning_rate": 2.1382459598188756e-07, "loss": 0.2049, "step": 40452 }, { "epoch": 0.7031757896017661, "grad_norm": 1.210923465479435, "learning_rate": 2.1380151372950345e-07, "loss": 0.2322, "step": 40453 }, { "epoch": 0.7031931721392689, "grad_norm": 1.6114335154506048, "learning_rate": 2.137784323842367e-07, "loss": 0.282, "step": 40454 }, { "epoch": 0.7032105546767717, "grad_norm": 1.598165256883346, "learning_rate": 2.13755351946161e-07, "loss": 0.2131, "step": 40455 }, { "epoch": 0.7032279372142746, "grad_norm": 1.7108657835804209, "learning_rate": 2.1373227241534936e-07, "loss": 0.1815, "step": 40456 }, { "epoch": 0.7032453197517774, "grad_norm": 1.7362495545110388, "learning_rate": 2.137091937918749e-07, "loss": 0.2041, "step": 40457 }, { "epoch": 0.7032627022892802, "grad_norm": 1.3104221517506487, "learning_rate": 2.1368611607581082e-07, "loss": 0.2047, "step": 40458 }, { "epoch": 0.7032800848267831, "grad_norm": 1.0028225017306722, "learning_rate": 2.1366303926723e-07, "loss": 0.3036, "step": 40459 }, { "epoch": 0.7032974673642859, "grad_norm": 1.7235001348385142, "learning_rate": 2.1363996336620622e-07, "loss": 0.1753, "step": 40460 }, { "epoch": 0.7033148499017886, "grad_norm": 0.854845802371991, "learning_rate": 2.1361688837281199e-07, "loss": 0.1857, "step": 40461 }, { "epoch": 0.7033322324392914, "grad_norm": 1.1402001159623143, "learning_rate": 2.1359381428712055e-07, "loss": 0.1893, "step": 40462 }, { "epoch": 0.7033496149767943, "grad_norm": 1.1691513563157312, "learning_rate": 2.1357074110920525e-07, "loss": 0.1856, "step": 40463 }, { "epoch": 0.7033669975142971, "grad_norm": 2.813488997313936, "learning_rate": 2.1354766883913922e-07, "loss": 0.291, "step": 40464 }, { "epoch": 0.7033843800517999, "grad_norm": 1.233750921630061, "learning_rate": 2.1352459747699543e-07, "loss": 0.198, "step": 40465 }, { "epoch": 0.7034017625893028, "grad_norm": 1.362112274240944, "learning_rate": 2.1350152702284708e-07, "loss": 0.2738, "step": 40466 }, { "epoch": 0.7034191451268056, "grad_norm": 1.2312428474075545, "learning_rate": 2.1347845747676728e-07, "loss": 0.204, "step": 40467 }, { "epoch": 0.7034365276643084, "grad_norm": 1.9756091199812955, "learning_rate": 2.1345538883882917e-07, "loss": 0.2487, "step": 40468 }, { "epoch": 0.7034539102018112, "grad_norm": 4.152186750679746, "learning_rate": 2.1343232110910587e-07, "loss": 0.3124, "step": 40469 }, { "epoch": 0.7034712927393141, "grad_norm": 1.4387364428056315, "learning_rate": 2.1340925428767025e-07, "loss": 0.3381, "step": 40470 }, { "epoch": 0.7034886752768169, "grad_norm": 2.487740436205526, "learning_rate": 2.1338618837459582e-07, "loss": 0.1162, "step": 40471 }, { "epoch": 0.7035060578143197, "grad_norm": 1.3706836470550312, "learning_rate": 2.1336312336995572e-07, "loss": 0.2277, "step": 40472 }, { "epoch": 0.7035234403518226, "grad_norm": 2.1868532744809, "learning_rate": 2.1334005927382265e-07, "loss": 0.2557, "step": 40473 }, { "epoch": 0.7035408228893254, "grad_norm": 1.3826632267622467, "learning_rate": 2.1331699608626974e-07, "loss": 0.2231, "step": 40474 }, { "epoch": 0.7035582054268282, "grad_norm": 1.3960098159586471, "learning_rate": 2.132939338073705e-07, "loss": 0.1515, "step": 40475 }, { "epoch": 0.7035755879643311, "grad_norm": 1.3936006909443366, "learning_rate": 2.1327087243719771e-07, "loss": 0.2609, "step": 40476 }, { "epoch": 0.7035929705018339, "grad_norm": 1.7079153411339365, "learning_rate": 2.1324781197582453e-07, "loss": 0.2786, "step": 40477 }, { "epoch": 0.7036103530393367, "grad_norm": 2.9003483465744555, "learning_rate": 2.132247524233241e-07, "loss": 0.2591, "step": 40478 }, { "epoch": 0.7036277355768396, "grad_norm": 1.9211485746826515, "learning_rate": 2.1320169377976948e-07, "loss": 0.1695, "step": 40479 }, { "epoch": 0.7036451181143424, "grad_norm": 1.458373254286594, "learning_rate": 2.1317863604523374e-07, "loss": 0.2352, "step": 40480 }, { "epoch": 0.7036625006518451, "grad_norm": 2.040323359229314, "learning_rate": 2.1315557921979e-07, "loss": 0.2946, "step": 40481 }, { "epoch": 0.7036798831893479, "grad_norm": 2.081571876003236, "learning_rate": 2.1313252330351107e-07, "loss": 0.3947, "step": 40482 }, { "epoch": 0.7036972657268508, "grad_norm": 0.9960683366449982, "learning_rate": 2.1310946829647046e-07, "loss": 0.1826, "step": 40483 }, { "epoch": 0.7037146482643536, "grad_norm": 1.6309766341555278, "learning_rate": 2.1308641419874106e-07, "loss": 0.2175, "step": 40484 }, { "epoch": 0.7037320308018564, "grad_norm": 1.2050240876012537, "learning_rate": 2.130633610103959e-07, "loss": 0.2409, "step": 40485 }, { "epoch": 0.7037494133393593, "grad_norm": 1.4876141404162595, "learning_rate": 2.1304030873150812e-07, "loss": 0.2453, "step": 40486 }, { "epoch": 0.7037667958768621, "grad_norm": 1.5397327672372636, "learning_rate": 2.130172573621507e-07, "loss": 0.2682, "step": 40487 }, { "epoch": 0.7037841784143649, "grad_norm": 1.2856481709701608, "learning_rate": 2.1299420690239678e-07, "loss": 0.1453, "step": 40488 }, { "epoch": 0.7038015609518677, "grad_norm": 2.016616767229265, "learning_rate": 2.129711573523194e-07, "loss": 0.1281, "step": 40489 }, { "epoch": 0.7038189434893706, "grad_norm": 2.1252914749142495, "learning_rate": 2.1294810871199136e-07, "loss": 0.3584, "step": 40490 }, { "epoch": 0.7038363260268734, "grad_norm": 1.1675285744293178, "learning_rate": 2.129250609814864e-07, "loss": 0.2556, "step": 40491 }, { "epoch": 0.7038537085643762, "grad_norm": 1.5970471005304856, "learning_rate": 2.129020141608769e-07, "loss": 0.2685, "step": 40492 }, { "epoch": 0.7038710911018791, "grad_norm": 1.6691819070878402, "learning_rate": 2.1287896825023593e-07, "loss": 0.1431, "step": 40493 }, { "epoch": 0.7038884736393819, "grad_norm": 1.131098276022296, "learning_rate": 2.128559232496369e-07, "loss": 0.1821, "step": 40494 }, { "epoch": 0.7039058561768847, "grad_norm": 1.1135258670322514, "learning_rate": 2.1283287915915277e-07, "loss": 0.2914, "step": 40495 }, { "epoch": 0.7039232387143876, "grad_norm": 1.2637006602249632, "learning_rate": 2.1280983597885644e-07, "loss": 0.2676, "step": 40496 }, { "epoch": 0.7039406212518904, "grad_norm": 1.4140044113450922, "learning_rate": 2.1278679370882102e-07, "loss": 0.1084, "step": 40497 }, { "epoch": 0.7039580037893932, "grad_norm": 1.4290761742145328, "learning_rate": 2.1276375234911952e-07, "loss": 0.2508, "step": 40498 }, { "epoch": 0.703975386326896, "grad_norm": 1.3206959211703742, "learning_rate": 2.12740711899825e-07, "loss": 0.3155, "step": 40499 }, { "epoch": 0.7039927688643989, "grad_norm": 1.6863256433464489, "learning_rate": 2.1271767236101046e-07, "loss": 0.2049, "step": 40500 }, { "epoch": 0.7040101514019016, "grad_norm": 1.1932307105723763, "learning_rate": 2.1269463373274875e-07, "loss": 0.2309, "step": 40501 }, { "epoch": 0.7040275339394044, "grad_norm": 2.0805250902833827, "learning_rate": 2.1267159601511324e-07, "loss": 0.2241, "step": 40502 }, { "epoch": 0.7040449164769073, "grad_norm": 1.6017860842373366, "learning_rate": 2.1264855920817682e-07, "loss": 0.1621, "step": 40503 }, { "epoch": 0.7040622990144101, "grad_norm": 1.4141214594758351, "learning_rate": 2.126255233120126e-07, "loss": 0.1776, "step": 40504 }, { "epoch": 0.7040796815519129, "grad_norm": 0.9600674860113665, "learning_rate": 2.1260248832669315e-07, "loss": 0.3276, "step": 40505 }, { "epoch": 0.7040970640894157, "grad_norm": 1.7037206904274564, "learning_rate": 2.1257945425229196e-07, "loss": 0.2831, "step": 40506 }, { "epoch": 0.7041144466269186, "grad_norm": 1.2748281398893353, "learning_rate": 2.1255642108888182e-07, "loss": 0.3696, "step": 40507 }, { "epoch": 0.7041318291644214, "grad_norm": 1.8028086593981765, "learning_rate": 2.1253338883653582e-07, "loss": 0.2193, "step": 40508 }, { "epoch": 0.7041492117019242, "grad_norm": 1.8671355725293997, "learning_rate": 2.1251035749532675e-07, "loss": 0.1591, "step": 40509 }, { "epoch": 0.7041665942394271, "grad_norm": 1.1814901478718451, "learning_rate": 2.124873270653282e-07, "loss": 0.2752, "step": 40510 }, { "epoch": 0.7041839767769299, "grad_norm": 1.4914834117253106, "learning_rate": 2.124642975466125e-07, "loss": 0.3102, "step": 40511 }, { "epoch": 0.7042013593144327, "grad_norm": 1.860083314172656, "learning_rate": 2.1244126893925274e-07, "loss": 0.2191, "step": 40512 }, { "epoch": 0.7042187418519356, "grad_norm": 2.775243398224167, "learning_rate": 2.124182412433222e-07, "loss": 0.2216, "step": 40513 }, { "epoch": 0.7042361243894384, "grad_norm": 1.6836397695603016, "learning_rate": 2.1239521445889376e-07, "loss": 0.1492, "step": 40514 }, { "epoch": 0.7042535069269412, "grad_norm": 1.8205393411993802, "learning_rate": 2.123721885860404e-07, "loss": 0.2523, "step": 40515 }, { "epoch": 0.704270889464444, "grad_norm": 1.4381480464644316, "learning_rate": 2.1234916362483508e-07, "loss": 0.22, "step": 40516 }, { "epoch": 0.7042882720019469, "grad_norm": 1.2779907397359573, "learning_rate": 2.1232613957535072e-07, "loss": 0.2676, "step": 40517 }, { "epoch": 0.7043056545394497, "grad_norm": 1.2892972616469214, "learning_rate": 2.1230311643766035e-07, "loss": 0.2374, "step": 40518 }, { "epoch": 0.7043230370769525, "grad_norm": 2.116416064781384, "learning_rate": 2.12280094211837e-07, "loss": 0.2427, "step": 40519 }, { "epoch": 0.7043404196144554, "grad_norm": 1.409099278610107, "learning_rate": 2.1225707289795335e-07, "loss": 0.1875, "step": 40520 }, { "epoch": 0.7043578021519581, "grad_norm": 3.540370050650247, "learning_rate": 2.1223405249608284e-07, "loss": 0.2878, "step": 40521 }, { "epoch": 0.7043751846894609, "grad_norm": 2.294975815497956, "learning_rate": 2.1221103300629817e-07, "loss": 0.2914, "step": 40522 }, { "epoch": 0.7043925672269638, "grad_norm": 1.1556786837732, "learning_rate": 2.1218801442867251e-07, "loss": 0.2496, "step": 40523 }, { "epoch": 0.7044099497644666, "grad_norm": 2.5685769884624126, "learning_rate": 2.1216499676327826e-07, "loss": 0.405, "step": 40524 }, { "epoch": 0.7044273323019694, "grad_norm": 1.96743289990443, "learning_rate": 2.121419800101889e-07, "loss": 0.1899, "step": 40525 }, { "epoch": 0.7044447148394722, "grad_norm": 1.6131876280037665, "learning_rate": 2.121189641694773e-07, "loss": 0.2765, "step": 40526 }, { "epoch": 0.7044620973769751, "grad_norm": 1.1634606113367474, "learning_rate": 2.1209594924121632e-07, "loss": 0.1973, "step": 40527 }, { "epoch": 0.7044794799144779, "grad_norm": 1.5128921562052842, "learning_rate": 2.1207293522547869e-07, "loss": 0.3499, "step": 40528 }, { "epoch": 0.7044968624519807, "grad_norm": 1.7797854164442852, "learning_rate": 2.1204992212233803e-07, "loss": 0.2169, "step": 40529 }, { "epoch": 0.7045142449894836, "grad_norm": 1.4299505146680367, "learning_rate": 2.1202690993186662e-07, "loss": 0.2705, "step": 40530 }, { "epoch": 0.7045316275269864, "grad_norm": 1.4623929135229128, "learning_rate": 2.1200389865413766e-07, "loss": 0.2139, "step": 40531 }, { "epoch": 0.7045490100644892, "grad_norm": 2.0491020633119637, "learning_rate": 2.1198088828922383e-07, "loss": 0.3278, "step": 40532 }, { "epoch": 0.704566392601992, "grad_norm": 1.5273973894454793, "learning_rate": 2.1195787883719845e-07, "loss": 0.3088, "step": 40533 }, { "epoch": 0.7045837751394949, "grad_norm": 2.0083905335474097, "learning_rate": 2.1193487029813435e-07, "loss": 0.3428, "step": 40534 }, { "epoch": 0.7046011576769977, "grad_norm": 0.977584088122273, "learning_rate": 2.119118626721043e-07, "loss": 0.2874, "step": 40535 }, { "epoch": 0.7046185402145005, "grad_norm": 1.7125280665388216, "learning_rate": 2.1188885595918133e-07, "loss": 0.1624, "step": 40536 }, { "epoch": 0.7046359227520034, "grad_norm": 1.2091456537238674, "learning_rate": 2.1186585015943837e-07, "loss": 0.2883, "step": 40537 }, { "epoch": 0.7046533052895062, "grad_norm": 0.9256819241188942, "learning_rate": 2.1184284527294826e-07, "loss": 0.1451, "step": 40538 }, { "epoch": 0.704670687827009, "grad_norm": 1.624496571727051, "learning_rate": 2.1181984129978402e-07, "loss": 0.1652, "step": 40539 }, { "epoch": 0.7046880703645119, "grad_norm": 1.0641551104514422, "learning_rate": 2.1179683824001827e-07, "loss": 0.1412, "step": 40540 }, { "epoch": 0.7047054529020146, "grad_norm": 1.097732345349669, "learning_rate": 2.1177383609372434e-07, "loss": 0.1949, "step": 40541 }, { "epoch": 0.7047228354395174, "grad_norm": 1.0063806023150148, "learning_rate": 2.1175083486097517e-07, "loss": 0.1764, "step": 40542 }, { "epoch": 0.7047402179770202, "grad_norm": 2.9256858492088296, "learning_rate": 2.1172783454184307e-07, "loss": 0.2477, "step": 40543 }, { "epoch": 0.7047576005145231, "grad_norm": 1.5278361145404884, "learning_rate": 2.1170483513640146e-07, "loss": 0.1582, "step": 40544 }, { "epoch": 0.7047749830520259, "grad_norm": 1.37632644195288, "learning_rate": 2.1168183664472311e-07, "loss": 0.1804, "step": 40545 }, { "epoch": 0.7047923655895287, "grad_norm": 2.0100796118477433, "learning_rate": 2.1165883906688093e-07, "loss": 0.2859, "step": 40546 }, { "epoch": 0.7048097481270316, "grad_norm": 1.4016096223847156, "learning_rate": 2.1163584240294775e-07, "loss": 0.2182, "step": 40547 }, { "epoch": 0.7048271306645344, "grad_norm": 0.9920161776078654, "learning_rate": 2.1161284665299644e-07, "loss": 0.2413, "step": 40548 }, { "epoch": 0.7048445132020372, "grad_norm": 0.9043293268520576, "learning_rate": 2.115898518171e-07, "loss": 0.102, "step": 40549 }, { "epoch": 0.7048618957395401, "grad_norm": 1.1288622949831821, "learning_rate": 2.1156685789533118e-07, "loss": 0.2625, "step": 40550 }, { "epoch": 0.7048792782770429, "grad_norm": 2.2423977748880395, "learning_rate": 2.1154386488776283e-07, "loss": 0.3641, "step": 40551 }, { "epoch": 0.7048966608145457, "grad_norm": 2.143099256746274, "learning_rate": 2.1152087279446806e-07, "loss": 0.2779, "step": 40552 }, { "epoch": 0.7049140433520485, "grad_norm": 2.827804165843385, "learning_rate": 2.1149788161551957e-07, "loss": 0.2414, "step": 40553 }, { "epoch": 0.7049314258895514, "grad_norm": 1.721856188406279, "learning_rate": 2.114748913509905e-07, "loss": 0.2005, "step": 40554 }, { "epoch": 0.7049488084270542, "grad_norm": 2.5167981651472275, "learning_rate": 2.114519020009531e-07, "loss": 0.3321, "step": 40555 }, { "epoch": 0.704966190964557, "grad_norm": 0.9268812230634104, "learning_rate": 2.1142891356548082e-07, "loss": 0.1724, "step": 40556 }, { "epoch": 0.7049835735020599, "grad_norm": 2.468776539045878, "learning_rate": 2.114059260446463e-07, "loss": 0.153, "step": 40557 }, { "epoch": 0.7050009560395627, "grad_norm": 2.1273459066602225, "learning_rate": 2.1138293943852243e-07, "loss": 0.3548, "step": 40558 }, { "epoch": 0.7050183385770655, "grad_norm": 2.5832519938275444, "learning_rate": 2.1135995374718192e-07, "loss": 0.1686, "step": 40559 }, { "epoch": 0.7050357211145684, "grad_norm": 1.090639975474763, "learning_rate": 2.113369689706979e-07, "loss": 0.1665, "step": 40560 }, { "epoch": 0.7050531036520711, "grad_norm": 2.140204764359604, "learning_rate": 2.1131398510914327e-07, "loss": 0.2703, "step": 40561 }, { "epoch": 0.7050704861895739, "grad_norm": 2.7168529916935027, "learning_rate": 2.1129100216259056e-07, "loss": 0.2092, "step": 40562 }, { "epoch": 0.7050878687270767, "grad_norm": 1.7556644703345397, "learning_rate": 2.1126802013111255e-07, "loss": 0.1217, "step": 40563 }, { "epoch": 0.7051052512645796, "grad_norm": 2.192939841452958, "learning_rate": 2.1124503901478246e-07, "loss": 0.3337, "step": 40564 }, { "epoch": 0.7051226338020824, "grad_norm": 1.2098316284844168, "learning_rate": 2.1122205881367293e-07, "loss": 0.3739, "step": 40565 }, { "epoch": 0.7051400163395852, "grad_norm": 3.0607026326814584, "learning_rate": 2.111990795278568e-07, "loss": 0.2381, "step": 40566 }, { "epoch": 0.7051573988770881, "grad_norm": 1.2540045394294492, "learning_rate": 2.1117610115740702e-07, "loss": 0.1748, "step": 40567 }, { "epoch": 0.7051747814145909, "grad_norm": 1.9571168211329641, "learning_rate": 2.1115312370239623e-07, "loss": 0.2162, "step": 40568 }, { "epoch": 0.7051921639520937, "grad_norm": 1.7145308360854623, "learning_rate": 2.111301471628974e-07, "loss": 0.2187, "step": 40569 }, { "epoch": 0.7052095464895966, "grad_norm": 1.995494237180844, "learning_rate": 2.1110717153898334e-07, "loss": 0.2308, "step": 40570 }, { "epoch": 0.7052269290270994, "grad_norm": 1.534699389242244, "learning_rate": 2.110841968307266e-07, "loss": 0.1916, "step": 40571 }, { "epoch": 0.7052443115646022, "grad_norm": 1.0737217866397517, "learning_rate": 2.110612230382004e-07, "loss": 0.2069, "step": 40572 }, { "epoch": 0.705261694102105, "grad_norm": 1.2026535477603137, "learning_rate": 2.1103825016147764e-07, "loss": 0.1928, "step": 40573 }, { "epoch": 0.7052790766396079, "grad_norm": 1.463682647055461, "learning_rate": 2.1101527820063046e-07, "loss": 0.2222, "step": 40574 }, { "epoch": 0.7052964591771107, "grad_norm": 1.7878280170255534, "learning_rate": 2.1099230715573234e-07, "loss": 0.1683, "step": 40575 }, { "epoch": 0.7053138417146135, "grad_norm": 0.8811886666887327, "learning_rate": 2.109693370268558e-07, "loss": 0.2043, "step": 40576 }, { "epoch": 0.7053312242521164, "grad_norm": 1.0514082537172345, "learning_rate": 2.1094636781407378e-07, "loss": 0.2401, "step": 40577 }, { "epoch": 0.7053486067896192, "grad_norm": 2.3094085512057143, "learning_rate": 2.1092339951745892e-07, "loss": 0.1685, "step": 40578 }, { "epoch": 0.705365989327122, "grad_norm": 2.087713479647319, "learning_rate": 2.109004321370839e-07, "loss": 0.1046, "step": 40579 }, { "epoch": 0.7053833718646249, "grad_norm": 1.50797402465654, "learning_rate": 2.108774656730221e-07, "loss": 0.1734, "step": 40580 }, { "epoch": 0.7054007544021276, "grad_norm": 1.5604307363120875, "learning_rate": 2.108545001253457e-07, "loss": 0.2476, "step": 40581 }, { "epoch": 0.7054181369396304, "grad_norm": 2.613104443724747, "learning_rate": 2.1083153549412763e-07, "loss": 0.1921, "step": 40582 }, { "epoch": 0.7054355194771332, "grad_norm": 3.104114922706612, "learning_rate": 2.1080857177944083e-07, "loss": 0.3483, "step": 40583 }, { "epoch": 0.7054529020146361, "grad_norm": 1.237146172146935, "learning_rate": 2.107856089813581e-07, "loss": 0.1924, "step": 40584 }, { "epoch": 0.7054702845521389, "grad_norm": 1.7255950050903976, "learning_rate": 2.1076264709995207e-07, "loss": 0.1968, "step": 40585 }, { "epoch": 0.7054876670896417, "grad_norm": 1.4616131677027586, "learning_rate": 2.1073968613529564e-07, "loss": 0.1256, "step": 40586 }, { "epoch": 0.7055050496271446, "grad_norm": 1.0549543300582438, "learning_rate": 2.107167260874615e-07, "loss": 0.126, "step": 40587 }, { "epoch": 0.7055224321646474, "grad_norm": 2.4540226948303547, "learning_rate": 2.106937669565224e-07, "loss": 0.3185, "step": 40588 }, { "epoch": 0.7055398147021502, "grad_norm": 1.390650119451682, "learning_rate": 2.106708087425512e-07, "loss": 0.1461, "step": 40589 }, { "epoch": 0.705557197239653, "grad_norm": 1.659015683768516, "learning_rate": 2.1064785144562052e-07, "loss": 0.2388, "step": 40590 }, { "epoch": 0.7055745797771559, "grad_norm": 1.026499549993219, "learning_rate": 2.1062489506580333e-07, "loss": 0.1844, "step": 40591 }, { "epoch": 0.7055919623146587, "grad_norm": 1.0381832082414995, "learning_rate": 2.1060193960317252e-07, "loss": 0.2332, "step": 40592 }, { "epoch": 0.7056093448521615, "grad_norm": 1.3644574626505206, "learning_rate": 2.105789850578004e-07, "loss": 0.1865, "step": 40593 }, { "epoch": 0.7056267273896644, "grad_norm": 1.8684758683615186, "learning_rate": 2.105560314297598e-07, "loss": 0.212, "step": 40594 }, { "epoch": 0.7056441099271672, "grad_norm": 1.2963654059102003, "learning_rate": 2.105330787191238e-07, "loss": 0.2168, "step": 40595 }, { "epoch": 0.70566149246467, "grad_norm": 1.0496899147089545, "learning_rate": 2.10510126925965e-07, "loss": 0.2465, "step": 40596 }, { "epoch": 0.7056788750021729, "grad_norm": 0.9555839403082638, "learning_rate": 2.1048717605035603e-07, "loss": 0.2741, "step": 40597 }, { "epoch": 0.7056962575396757, "grad_norm": 1.1764213693653058, "learning_rate": 2.1046422609236981e-07, "loss": 0.2223, "step": 40598 }, { "epoch": 0.7057136400771785, "grad_norm": 1.615399385496396, "learning_rate": 2.1044127705207892e-07, "loss": 0.1985, "step": 40599 }, { "epoch": 0.7057310226146812, "grad_norm": 1.8406434705148913, "learning_rate": 2.104183289295562e-07, "loss": 0.1891, "step": 40600 }, { "epoch": 0.7057484051521841, "grad_norm": 1.1319963578575918, "learning_rate": 2.1039538172487436e-07, "loss": 0.3237, "step": 40601 }, { "epoch": 0.7057657876896869, "grad_norm": 1.4364633232130875, "learning_rate": 2.1037243543810594e-07, "loss": 0.1718, "step": 40602 }, { "epoch": 0.7057831702271897, "grad_norm": 0.953607744155128, "learning_rate": 2.1034949006932407e-07, "loss": 0.163, "step": 40603 }, { "epoch": 0.7058005527646926, "grad_norm": 1.5036486740401216, "learning_rate": 2.103265456186012e-07, "loss": 0.2265, "step": 40604 }, { "epoch": 0.7058179353021954, "grad_norm": 0.8386173314278784, "learning_rate": 2.1030360208601017e-07, "loss": 0.222, "step": 40605 }, { "epoch": 0.7058353178396982, "grad_norm": 1.9673792281089282, "learning_rate": 2.102806594716236e-07, "loss": 0.3353, "step": 40606 }, { "epoch": 0.705852700377201, "grad_norm": 3.7981987568089512, "learning_rate": 2.1025771777551426e-07, "loss": 0.2312, "step": 40607 }, { "epoch": 0.7058700829147039, "grad_norm": 1.9496029383901927, "learning_rate": 2.1023477699775487e-07, "loss": 0.2625, "step": 40608 }, { "epoch": 0.7058874654522067, "grad_norm": 2.0613372918430266, "learning_rate": 2.1021183713841794e-07, "loss": 0.2403, "step": 40609 }, { "epoch": 0.7059048479897095, "grad_norm": 1.6236049353976918, "learning_rate": 2.1018889819757658e-07, "loss": 0.2387, "step": 40610 }, { "epoch": 0.7059222305272124, "grad_norm": 1.951168688022737, "learning_rate": 2.1016596017530347e-07, "loss": 0.1782, "step": 40611 }, { "epoch": 0.7059396130647152, "grad_norm": 1.4697416680978401, "learning_rate": 2.1014302307167093e-07, "loss": 0.2203, "step": 40612 }, { "epoch": 0.705956995602218, "grad_norm": 1.5534625071381363, "learning_rate": 2.1012008688675164e-07, "loss": 0.1865, "step": 40613 }, { "epoch": 0.7059743781397209, "grad_norm": 1.3665253886121491, "learning_rate": 2.1009715162061876e-07, "loss": 0.1275, "step": 40614 }, { "epoch": 0.7059917606772237, "grad_norm": 4.352664219309952, "learning_rate": 2.1007421727334468e-07, "loss": 0.2489, "step": 40615 }, { "epoch": 0.7060091432147265, "grad_norm": 1.7973400009270744, "learning_rate": 2.1005128384500216e-07, "loss": 0.293, "step": 40616 }, { "epoch": 0.7060265257522294, "grad_norm": 1.445385054556629, "learning_rate": 2.1002835133566387e-07, "loss": 0.1241, "step": 40617 }, { "epoch": 0.7060439082897322, "grad_norm": 1.4540120568623287, "learning_rate": 2.1000541974540254e-07, "loss": 0.1776, "step": 40618 }, { "epoch": 0.706061290827235, "grad_norm": 1.0548480713520159, "learning_rate": 2.0998248907429083e-07, "loss": 0.1728, "step": 40619 }, { "epoch": 0.7060786733647377, "grad_norm": 3.684793405133197, "learning_rate": 2.0995955932240138e-07, "loss": 0.2046, "step": 40620 }, { "epoch": 0.7060960559022406, "grad_norm": 0.9361230605517917, "learning_rate": 2.099366304898067e-07, "loss": 0.1066, "step": 40621 }, { "epoch": 0.7061134384397434, "grad_norm": 1.5248409108328962, "learning_rate": 2.0991370257657987e-07, "loss": 0.1833, "step": 40622 }, { "epoch": 0.7061308209772462, "grad_norm": 1.8381829447523526, "learning_rate": 2.0989077558279328e-07, "loss": 0.2021, "step": 40623 }, { "epoch": 0.706148203514749, "grad_norm": 1.7508275087102845, "learning_rate": 2.0986784950851972e-07, "loss": 0.157, "step": 40624 }, { "epoch": 0.7061655860522519, "grad_norm": 1.47146252985327, "learning_rate": 2.0984492435383184e-07, "loss": 0.2209, "step": 40625 }, { "epoch": 0.7061829685897547, "grad_norm": 2.6403827894380463, "learning_rate": 2.0982200011880218e-07, "loss": 0.2551, "step": 40626 }, { "epoch": 0.7062003511272575, "grad_norm": 1.1584191964288821, "learning_rate": 2.0979907680350355e-07, "loss": 0.1481, "step": 40627 }, { "epoch": 0.7062177336647604, "grad_norm": 1.1397450724408713, "learning_rate": 2.097761544080085e-07, "loss": 0.1298, "step": 40628 }, { "epoch": 0.7062351162022632, "grad_norm": 6.501815787110336, "learning_rate": 2.0975323293238955e-07, "loss": 0.2247, "step": 40629 }, { "epoch": 0.706252498739766, "grad_norm": 1.54231712443854, "learning_rate": 2.097303123767199e-07, "loss": 0.1348, "step": 40630 }, { "epoch": 0.7062698812772689, "grad_norm": 3.229655086059008, "learning_rate": 2.0970739274107162e-07, "loss": 0.2311, "step": 40631 }, { "epoch": 0.7062872638147717, "grad_norm": 1.244622363462642, "learning_rate": 2.0968447402551736e-07, "loss": 0.1853, "step": 40632 }, { "epoch": 0.7063046463522745, "grad_norm": 1.7169771537303584, "learning_rate": 2.0966155623013016e-07, "loss": 0.2056, "step": 40633 }, { "epoch": 0.7063220288897774, "grad_norm": 2.1294821561686494, "learning_rate": 2.0963863935498248e-07, "loss": 0.2796, "step": 40634 }, { "epoch": 0.7063394114272802, "grad_norm": 3.3821479022058356, "learning_rate": 2.0961572340014688e-07, "loss": 0.2574, "step": 40635 }, { "epoch": 0.706356793964783, "grad_norm": 1.5078223041843908, "learning_rate": 2.095928083656961e-07, "loss": 0.1392, "step": 40636 }, { "epoch": 0.7063741765022858, "grad_norm": 1.4715165103802585, "learning_rate": 2.0956989425170267e-07, "loss": 0.1598, "step": 40637 }, { "epoch": 0.7063915590397887, "grad_norm": 1.4227408547368692, "learning_rate": 2.095469810582393e-07, "loss": 0.1664, "step": 40638 }, { "epoch": 0.7064089415772915, "grad_norm": 1.5865111269985814, "learning_rate": 2.0952406878537853e-07, "loss": 0.1605, "step": 40639 }, { "epoch": 0.7064263241147942, "grad_norm": 1.582593107191405, "learning_rate": 2.095011574331929e-07, "loss": 0.1766, "step": 40640 }, { "epoch": 0.706443706652297, "grad_norm": 3.23727520200436, "learning_rate": 2.094782470017553e-07, "loss": 0.3119, "step": 40641 }, { "epoch": 0.7064610891897999, "grad_norm": 1.6336126159859687, "learning_rate": 2.094553374911382e-07, "loss": 0.1267, "step": 40642 }, { "epoch": 0.7064784717273027, "grad_norm": 0.9794511698475862, "learning_rate": 2.0943242890141443e-07, "loss": 0.1007, "step": 40643 }, { "epoch": 0.7064958542648055, "grad_norm": 1.2625160727973208, "learning_rate": 2.0940952123265597e-07, "loss": 0.2437, "step": 40644 }, { "epoch": 0.7065132368023084, "grad_norm": 3.7270859124315323, "learning_rate": 2.093866144849361e-07, "loss": 0.1431, "step": 40645 }, { "epoch": 0.7065306193398112, "grad_norm": 1.3052846237529039, "learning_rate": 2.0936370865832708e-07, "loss": 0.1387, "step": 40646 }, { "epoch": 0.706548001877314, "grad_norm": 1.5822537010275888, "learning_rate": 2.093408037529017e-07, "loss": 0.2734, "step": 40647 }, { "epoch": 0.7065653844148169, "grad_norm": 1.4506892286809947, "learning_rate": 2.0931789976873222e-07, "loss": 0.17, "step": 40648 }, { "epoch": 0.7065827669523197, "grad_norm": 4.681161252679449, "learning_rate": 2.092949967058918e-07, "loss": 0.2899, "step": 40649 }, { "epoch": 0.7066001494898225, "grad_norm": 1.7834144044296518, "learning_rate": 2.0927209456445256e-07, "loss": 0.1707, "step": 40650 }, { "epoch": 0.7066175320273254, "grad_norm": 2.24366849372164, "learning_rate": 2.092491933444872e-07, "loss": 0.2694, "step": 40651 }, { "epoch": 0.7066349145648282, "grad_norm": 1.6994020635075344, "learning_rate": 2.0922629304606814e-07, "loss": 0.161, "step": 40652 }, { "epoch": 0.706652297102331, "grad_norm": 1.9246782078095475, "learning_rate": 2.0920339366926836e-07, "loss": 0.1797, "step": 40653 }, { "epoch": 0.7066696796398338, "grad_norm": 3.4961618707590465, "learning_rate": 2.0918049521416026e-07, "loss": 0.1989, "step": 40654 }, { "epoch": 0.7066870621773367, "grad_norm": 0.7325800135610095, "learning_rate": 2.091575976808164e-07, "loss": 0.1422, "step": 40655 }, { "epoch": 0.7067044447148395, "grad_norm": 3.824683424548151, "learning_rate": 2.0913470106930937e-07, "loss": 0.1866, "step": 40656 }, { "epoch": 0.7067218272523423, "grad_norm": 1.7063940166350593, "learning_rate": 2.0911180537971167e-07, "loss": 0.1267, "step": 40657 }, { "epoch": 0.7067392097898452, "grad_norm": 1.687753287289861, "learning_rate": 2.0908891061209594e-07, "loss": 0.1768, "step": 40658 }, { "epoch": 0.706756592327348, "grad_norm": 2.152837271879697, "learning_rate": 2.0906601676653473e-07, "loss": 0.2158, "step": 40659 }, { "epoch": 0.7067739748648507, "grad_norm": 4.106668800022656, "learning_rate": 2.0904312384310046e-07, "loss": 0.1781, "step": 40660 }, { "epoch": 0.7067913574023535, "grad_norm": 0.8282779539786115, "learning_rate": 2.09020231841866e-07, "loss": 0.1296, "step": 40661 }, { "epoch": 0.7068087399398564, "grad_norm": 2.0102065535649034, "learning_rate": 2.0899734076290388e-07, "loss": 0.1745, "step": 40662 }, { "epoch": 0.7068261224773592, "grad_norm": 1.5696074036739434, "learning_rate": 2.0897445060628622e-07, "loss": 0.1269, "step": 40663 }, { "epoch": 0.706843505014862, "grad_norm": 1.15191913210044, "learning_rate": 2.0895156137208598e-07, "loss": 0.1266, "step": 40664 }, { "epoch": 0.7068608875523649, "grad_norm": 1.5562135024559494, "learning_rate": 2.0892867306037564e-07, "loss": 0.1301, "step": 40665 }, { "epoch": 0.7068782700898677, "grad_norm": 1.8873154129597218, "learning_rate": 2.0890578567122768e-07, "loss": 0.1881, "step": 40666 }, { "epoch": 0.7068956526273705, "grad_norm": 1.4494803402225158, "learning_rate": 2.0888289920471464e-07, "loss": 0.1794, "step": 40667 }, { "epoch": 0.7069130351648734, "grad_norm": 2.4568388074995795, "learning_rate": 2.088600136609091e-07, "loss": 0.1777, "step": 40668 }, { "epoch": 0.7069304177023762, "grad_norm": 1.090116964192912, "learning_rate": 2.088371290398836e-07, "loss": 0.1528, "step": 40669 }, { "epoch": 0.706947800239879, "grad_norm": 1.370257785008208, "learning_rate": 2.088142453417106e-07, "loss": 0.2029, "step": 40670 }, { "epoch": 0.7069651827773819, "grad_norm": 1.4340372450768506, "learning_rate": 2.0879136256646257e-07, "loss": 0.2352, "step": 40671 }, { "epoch": 0.7069825653148847, "grad_norm": 1.657850198950614, "learning_rate": 2.0876848071421228e-07, "loss": 0.1762, "step": 40672 }, { "epoch": 0.7069999478523875, "grad_norm": 1.6395201563711668, "learning_rate": 2.0874559978503216e-07, "loss": 0.1957, "step": 40673 }, { "epoch": 0.7070173303898903, "grad_norm": 0.9064391008741375, "learning_rate": 2.0872271977899486e-07, "loss": 0.1153, "step": 40674 }, { "epoch": 0.7070347129273932, "grad_norm": 1.2491542437753855, "learning_rate": 2.0869984069617237e-07, "loss": 0.1295, "step": 40675 }, { "epoch": 0.707052095464896, "grad_norm": 1.9982007187364745, "learning_rate": 2.0867696253663775e-07, "loss": 0.25, "step": 40676 }, { "epoch": 0.7070694780023988, "grad_norm": 2.001754759412306, "learning_rate": 2.0865408530046337e-07, "loss": 0.1515, "step": 40677 }, { "epoch": 0.7070868605399017, "grad_norm": 2.2400228532288247, "learning_rate": 2.0863120898772162e-07, "loss": 0.3298, "step": 40678 }, { "epoch": 0.7071042430774045, "grad_norm": 1.5291684724250665, "learning_rate": 2.08608333598485e-07, "loss": 0.2321, "step": 40679 }, { "epoch": 0.7071216256149072, "grad_norm": 2.506406649738037, "learning_rate": 2.0858545913282626e-07, "loss": 0.2924, "step": 40680 }, { "epoch": 0.70713900815241, "grad_norm": 1.835705977449204, "learning_rate": 2.085625855908179e-07, "loss": 0.1744, "step": 40681 }, { "epoch": 0.7071563906899129, "grad_norm": 1.1669795601864486, "learning_rate": 2.0853971297253216e-07, "loss": 0.2112, "step": 40682 }, { "epoch": 0.7071737732274157, "grad_norm": 1.1327934462215514, "learning_rate": 2.0851684127804142e-07, "loss": 0.098, "step": 40683 }, { "epoch": 0.7071911557649185, "grad_norm": 1.5132050432772632, "learning_rate": 2.0849397050741857e-07, "loss": 0.1318, "step": 40684 }, { "epoch": 0.7072085383024214, "grad_norm": 5.075188483042055, "learning_rate": 2.0847110066073592e-07, "loss": 0.2908, "step": 40685 }, { "epoch": 0.7072259208399242, "grad_norm": 1.179908764080461, "learning_rate": 2.0844823173806603e-07, "loss": 0.2123, "step": 40686 }, { "epoch": 0.707243303377427, "grad_norm": 3.3616777811757164, "learning_rate": 2.084253637394812e-07, "loss": 0.3073, "step": 40687 }, { "epoch": 0.7072606859149299, "grad_norm": 2.2941973893700913, "learning_rate": 2.0840249666505415e-07, "loss": 0.1834, "step": 40688 }, { "epoch": 0.7072780684524327, "grad_norm": 1.2907110188866464, "learning_rate": 2.083796305148572e-07, "loss": 0.11, "step": 40689 }, { "epoch": 0.7072954509899355, "grad_norm": 1.6155710468522573, "learning_rate": 2.083567652889628e-07, "loss": 0.167, "step": 40690 }, { "epoch": 0.7073128335274383, "grad_norm": 1.74308837474506, "learning_rate": 2.0833390098744335e-07, "loss": 0.1777, "step": 40691 }, { "epoch": 0.7073302160649412, "grad_norm": 2.1316943856059973, "learning_rate": 2.0831103761037167e-07, "loss": 0.2258, "step": 40692 }, { "epoch": 0.707347598602444, "grad_norm": 1.2609405351230711, "learning_rate": 2.082881751578201e-07, "loss": 0.2204, "step": 40693 }, { "epoch": 0.7073649811399468, "grad_norm": 1.8233834802180187, "learning_rate": 2.0826531362986073e-07, "loss": 0.2554, "step": 40694 }, { "epoch": 0.7073823636774497, "grad_norm": 1.3488075632945413, "learning_rate": 2.0824245302656647e-07, "loss": 0.1418, "step": 40695 }, { "epoch": 0.7073997462149525, "grad_norm": 1.5496492125320231, "learning_rate": 2.082195933480096e-07, "loss": 0.2664, "step": 40696 }, { "epoch": 0.7074171287524553, "grad_norm": 1.6460702478682867, "learning_rate": 2.0819673459426258e-07, "loss": 0.1806, "step": 40697 }, { "epoch": 0.7074345112899582, "grad_norm": 1.0020124681390612, "learning_rate": 2.0817387676539765e-07, "loss": 0.1957, "step": 40698 }, { "epoch": 0.707451893827461, "grad_norm": 1.5001723198257857, "learning_rate": 2.0815101986148776e-07, "loss": 0.1834, "step": 40699 }, { "epoch": 0.7074692763649637, "grad_norm": 1.0441506404741394, "learning_rate": 2.0812816388260519e-07, "loss": 0.1497, "step": 40700 }, { "epoch": 0.7074866589024665, "grad_norm": 1.219181376563355, "learning_rate": 2.0810530882882206e-07, "loss": 0.2313, "step": 40701 }, { "epoch": 0.7075040414399694, "grad_norm": 1.4634002540919255, "learning_rate": 2.0808245470021085e-07, "loss": 0.3706, "step": 40702 }, { "epoch": 0.7075214239774722, "grad_norm": 1.0847023959156625, "learning_rate": 2.0805960149684436e-07, "loss": 0.2058, "step": 40703 }, { "epoch": 0.707538806514975, "grad_norm": 2.004827935734022, "learning_rate": 2.0803674921879482e-07, "loss": 0.2635, "step": 40704 }, { "epoch": 0.7075561890524779, "grad_norm": 1.6955466667509094, "learning_rate": 2.080138978661347e-07, "loss": 0.2472, "step": 40705 }, { "epoch": 0.7075735715899807, "grad_norm": 2.26499215801003, "learning_rate": 2.0799104743893638e-07, "loss": 0.237, "step": 40706 }, { "epoch": 0.7075909541274835, "grad_norm": 2.6117752379374215, "learning_rate": 2.0796819793727226e-07, "loss": 0.2075, "step": 40707 }, { "epoch": 0.7076083366649863, "grad_norm": 1.7521172927739808, "learning_rate": 2.0794534936121484e-07, "loss": 0.2688, "step": 40708 }, { "epoch": 0.7076257192024892, "grad_norm": 1.1783612521700944, "learning_rate": 2.0792250171083653e-07, "loss": 0.1553, "step": 40709 }, { "epoch": 0.707643101739992, "grad_norm": 2.00956226330984, "learning_rate": 2.0789965498620948e-07, "loss": 0.2163, "step": 40710 }, { "epoch": 0.7076604842774948, "grad_norm": 1.443352822548445, "learning_rate": 2.0787680918740657e-07, "loss": 0.2396, "step": 40711 }, { "epoch": 0.7076778668149977, "grad_norm": 1.397033022791174, "learning_rate": 2.0785396431450014e-07, "loss": 0.2119, "step": 40712 }, { "epoch": 0.7076952493525005, "grad_norm": 1.496369576289404, "learning_rate": 2.0783112036756207e-07, "loss": 0.2194, "step": 40713 }, { "epoch": 0.7077126318900033, "grad_norm": 2.2470734503304506, "learning_rate": 2.0780827734666534e-07, "loss": 0.2628, "step": 40714 }, { "epoch": 0.7077300144275062, "grad_norm": 1.319207033045248, "learning_rate": 2.0778543525188213e-07, "loss": 0.2148, "step": 40715 }, { "epoch": 0.707747396965009, "grad_norm": 1.039471584507508, "learning_rate": 2.077625940832849e-07, "loss": 0.1912, "step": 40716 }, { "epoch": 0.7077647795025118, "grad_norm": 1.3069205001431383, "learning_rate": 2.0773975384094594e-07, "loss": 0.1967, "step": 40717 }, { "epoch": 0.7077821620400147, "grad_norm": 2.3322071346826463, "learning_rate": 2.0771691452493778e-07, "loss": 0.1462, "step": 40718 }, { "epoch": 0.7077995445775175, "grad_norm": 1.339745525381237, "learning_rate": 2.076940761353327e-07, "loss": 0.161, "step": 40719 }, { "epoch": 0.7078169271150202, "grad_norm": 1.9045463777388014, "learning_rate": 2.076712386722031e-07, "loss": 0.3126, "step": 40720 }, { "epoch": 0.707834309652523, "grad_norm": 2.019450883529216, "learning_rate": 2.0764840213562123e-07, "loss": 0.1471, "step": 40721 }, { "epoch": 0.7078516921900259, "grad_norm": 1.3512035313950508, "learning_rate": 2.0762556652565983e-07, "loss": 0.1683, "step": 40722 }, { "epoch": 0.7078690747275287, "grad_norm": 1.805597272373207, "learning_rate": 2.076027318423911e-07, "loss": 0.3539, "step": 40723 }, { "epoch": 0.7078864572650315, "grad_norm": 1.9349866145559071, "learning_rate": 2.0757989808588732e-07, "loss": 0.2651, "step": 40724 }, { "epoch": 0.7079038398025344, "grad_norm": 1.5695393078250492, "learning_rate": 2.0755706525622096e-07, "loss": 0.2023, "step": 40725 }, { "epoch": 0.7079212223400372, "grad_norm": 1.5236056293239097, "learning_rate": 2.0753423335346437e-07, "loss": 0.2018, "step": 40726 }, { "epoch": 0.70793860487754, "grad_norm": 1.5595105317474711, "learning_rate": 2.0751140237768994e-07, "loss": 0.2477, "step": 40727 }, { "epoch": 0.7079559874150428, "grad_norm": 1.710961141918845, "learning_rate": 2.0748857232897e-07, "loss": 0.1978, "step": 40728 }, { "epoch": 0.7079733699525457, "grad_norm": 1.6316126042491172, "learning_rate": 2.074657432073767e-07, "loss": 0.1571, "step": 40729 }, { "epoch": 0.7079907524900485, "grad_norm": 1.4130830387724902, "learning_rate": 2.0744291501298284e-07, "loss": 0.2525, "step": 40730 }, { "epoch": 0.7080081350275513, "grad_norm": 1.2383644683020798, "learning_rate": 2.0742008774586067e-07, "loss": 0.1655, "step": 40731 }, { "epoch": 0.7080255175650542, "grad_norm": 1.7819432913286821, "learning_rate": 2.0739726140608227e-07, "loss": 0.2073, "step": 40732 }, { "epoch": 0.708042900102557, "grad_norm": 1.4288747326156208, "learning_rate": 2.0737443599372e-07, "loss": 0.3105, "step": 40733 }, { "epoch": 0.7080602826400598, "grad_norm": 3.669187725032161, "learning_rate": 2.073516115088465e-07, "loss": 0.167, "step": 40734 }, { "epoch": 0.7080776651775627, "grad_norm": 1.7310822992275474, "learning_rate": 2.0732878795153398e-07, "loss": 0.2197, "step": 40735 }, { "epoch": 0.7080950477150655, "grad_norm": 2.006563909168443, "learning_rate": 2.0730596532185468e-07, "loss": 0.2514, "step": 40736 }, { "epoch": 0.7081124302525683, "grad_norm": 1.3740914916701257, "learning_rate": 2.0728314361988108e-07, "loss": 0.1724, "step": 40737 }, { "epoch": 0.7081298127900711, "grad_norm": 1.2030641505692168, "learning_rate": 2.0726032284568544e-07, "loss": 0.1232, "step": 40738 }, { "epoch": 0.7081471953275739, "grad_norm": 1.5659524624271584, "learning_rate": 2.0723750299934012e-07, "loss": 0.2581, "step": 40739 }, { "epoch": 0.7081645778650767, "grad_norm": 1.1725586619202635, "learning_rate": 2.072146840809174e-07, "loss": 0.2937, "step": 40740 }, { "epoch": 0.7081819604025795, "grad_norm": 0.907250487190879, "learning_rate": 2.071918660904895e-07, "loss": 0.1636, "step": 40741 }, { "epoch": 0.7081993429400824, "grad_norm": 1.3230790115914053, "learning_rate": 2.0716904902812905e-07, "loss": 0.1468, "step": 40742 }, { "epoch": 0.7082167254775852, "grad_norm": 1.5155033753654372, "learning_rate": 2.0714623289390814e-07, "loss": 0.1059, "step": 40743 }, { "epoch": 0.708234108015088, "grad_norm": 1.6201579154902874, "learning_rate": 2.071234176878992e-07, "loss": 0.2091, "step": 40744 }, { "epoch": 0.7082514905525908, "grad_norm": 1.680461554372028, "learning_rate": 2.071006034101745e-07, "loss": 0.2083, "step": 40745 }, { "epoch": 0.7082688730900937, "grad_norm": 1.1087574325543206, "learning_rate": 2.0707779006080633e-07, "loss": 0.2038, "step": 40746 }, { "epoch": 0.7082862556275965, "grad_norm": 1.5868742721409457, "learning_rate": 2.0705497763986702e-07, "loss": 0.1721, "step": 40747 }, { "epoch": 0.7083036381650993, "grad_norm": 1.8992917216858363, "learning_rate": 2.070321661474288e-07, "loss": 0.1468, "step": 40748 }, { "epoch": 0.7083210207026022, "grad_norm": 1.1164389504937415, "learning_rate": 2.0700935558356392e-07, "loss": 0.1738, "step": 40749 }, { "epoch": 0.708338403240105, "grad_norm": 1.5139307757209652, "learning_rate": 2.0698654594834514e-07, "loss": 0.1614, "step": 40750 }, { "epoch": 0.7083557857776078, "grad_norm": 1.762135035537751, "learning_rate": 2.0696373724184418e-07, "loss": 0.2321, "step": 40751 }, { "epoch": 0.7083731683151107, "grad_norm": 2.109809551637838, "learning_rate": 2.0694092946413343e-07, "loss": 0.1707, "step": 40752 }, { "epoch": 0.7083905508526135, "grad_norm": 1.3253693268270217, "learning_rate": 2.0691812261528552e-07, "loss": 0.2865, "step": 40753 }, { "epoch": 0.7084079333901163, "grad_norm": 1.4089594087398305, "learning_rate": 2.0689531669537247e-07, "loss": 0.1834, "step": 40754 }, { "epoch": 0.7084253159276191, "grad_norm": 1.3389958069298404, "learning_rate": 2.068725117044666e-07, "loss": 0.3472, "step": 40755 }, { "epoch": 0.708442698465122, "grad_norm": 1.846889110919136, "learning_rate": 2.0684970764264032e-07, "loss": 0.2514, "step": 40756 }, { "epoch": 0.7084600810026248, "grad_norm": 1.300927459001129, "learning_rate": 2.0682690450996577e-07, "loss": 0.3007, "step": 40757 }, { "epoch": 0.7084774635401276, "grad_norm": 2.0838334517567976, "learning_rate": 2.0680410230651523e-07, "loss": 0.1991, "step": 40758 }, { "epoch": 0.7084948460776304, "grad_norm": 1.9104281380411297, "learning_rate": 2.06781301032361e-07, "loss": 0.2806, "step": 40759 }, { "epoch": 0.7085122286151332, "grad_norm": 1.136321086816767, "learning_rate": 2.0675850068757517e-07, "loss": 0.1847, "step": 40760 }, { "epoch": 0.708529611152636, "grad_norm": 1.843967723499435, "learning_rate": 2.067357012722304e-07, "loss": 0.2691, "step": 40761 }, { "epoch": 0.7085469936901388, "grad_norm": 1.400739672969548, "learning_rate": 2.0671290278639875e-07, "loss": 0.2361, "step": 40762 }, { "epoch": 0.7085643762276417, "grad_norm": 1.9209932507031786, "learning_rate": 2.0669010523015263e-07, "loss": 0.2376, "step": 40763 }, { "epoch": 0.7085817587651445, "grad_norm": 1.107804370059958, "learning_rate": 2.0666730860356374e-07, "loss": 0.2056, "step": 40764 }, { "epoch": 0.7085991413026473, "grad_norm": 1.1587458046425896, "learning_rate": 2.0664451290670497e-07, "loss": 0.2679, "step": 40765 }, { "epoch": 0.7086165238401502, "grad_norm": 2.191230772166496, "learning_rate": 2.0662171813964835e-07, "loss": 0.273, "step": 40766 }, { "epoch": 0.708633906377653, "grad_norm": 1.2023228083372823, "learning_rate": 2.0659892430246607e-07, "loss": 0.2745, "step": 40767 }, { "epoch": 0.7086512889151558, "grad_norm": 1.6244398674822742, "learning_rate": 2.0657613139523027e-07, "loss": 0.2109, "step": 40768 }, { "epoch": 0.7086686714526587, "grad_norm": 2.125767332957206, "learning_rate": 2.0655333941801372e-07, "loss": 0.2577, "step": 40769 }, { "epoch": 0.7086860539901615, "grad_norm": 1.9571414164849, "learning_rate": 2.0653054837088808e-07, "loss": 0.3226, "step": 40770 }, { "epoch": 0.7087034365276643, "grad_norm": 1.5053611616303249, "learning_rate": 2.065077582539258e-07, "loss": 0.0961, "step": 40771 }, { "epoch": 0.7087208190651672, "grad_norm": 1.4574875936087273, "learning_rate": 2.0648496906719891e-07, "loss": 0.2225, "step": 40772 }, { "epoch": 0.70873820160267, "grad_norm": 1.1444261825234796, "learning_rate": 2.0646218081078005e-07, "loss": 0.2607, "step": 40773 }, { "epoch": 0.7087555841401728, "grad_norm": 1.575608514443196, "learning_rate": 2.0643939348474122e-07, "loss": 0.221, "step": 40774 }, { "epoch": 0.7087729666776756, "grad_norm": 3.541660480459226, "learning_rate": 2.0641660708915466e-07, "loss": 0.2495, "step": 40775 }, { "epoch": 0.7087903492151785, "grad_norm": 1.5694108067254353, "learning_rate": 2.0639382162409258e-07, "loss": 0.3089, "step": 40776 }, { "epoch": 0.7088077317526813, "grad_norm": 1.9077212550104758, "learning_rate": 2.0637103708962722e-07, "loss": 0.1575, "step": 40777 }, { "epoch": 0.7088251142901841, "grad_norm": 2.4004798247826766, "learning_rate": 2.063482534858308e-07, "loss": 0.2221, "step": 40778 }, { "epoch": 0.7088424968276869, "grad_norm": 1.2866462975011652, "learning_rate": 2.0632547081277552e-07, "loss": 0.2366, "step": 40779 }, { "epoch": 0.7088598793651897, "grad_norm": 1.0688361545501048, "learning_rate": 2.0630268907053344e-07, "loss": 0.1383, "step": 40780 }, { "epoch": 0.7088772619026925, "grad_norm": 2.5702560157915255, "learning_rate": 2.0627990825917708e-07, "loss": 0.2872, "step": 40781 }, { "epoch": 0.7088946444401953, "grad_norm": 1.4424784102639285, "learning_rate": 2.062571283787787e-07, "loss": 0.1788, "step": 40782 }, { "epoch": 0.7089120269776982, "grad_norm": 1.3187987257836036, "learning_rate": 2.0623434942940988e-07, "loss": 0.3102, "step": 40783 }, { "epoch": 0.708929409515201, "grad_norm": 0.8994326326932299, "learning_rate": 2.062115714111434e-07, "loss": 0.1528, "step": 40784 }, { "epoch": 0.7089467920527038, "grad_norm": 2.3943404504563364, "learning_rate": 2.061887943240513e-07, "loss": 0.2106, "step": 40785 }, { "epoch": 0.7089641745902067, "grad_norm": 1.1156333335609556, "learning_rate": 2.0616601816820577e-07, "loss": 0.2289, "step": 40786 }, { "epoch": 0.7089815571277095, "grad_norm": 2.187090707100362, "learning_rate": 2.061432429436788e-07, "loss": 0.3532, "step": 40787 }, { "epoch": 0.7089989396652123, "grad_norm": 1.6301058965844892, "learning_rate": 2.0612046865054311e-07, "loss": 0.2136, "step": 40788 }, { "epoch": 0.7090163222027152, "grad_norm": 1.9620607788949587, "learning_rate": 2.060976952888704e-07, "loss": 0.3004, "step": 40789 }, { "epoch": 0.709033704740218, "grad_norm": 1.5562208116211227, "learning_rate": 2.0607492285873297e-07, "loss": 0.1633, "step": 40790 }, { "epoch": 0.7090510872777208, "grad_norm": 1.2505495516370768, "learning_rate": 2.0605215136020282e-07, "loss": 0.2047, "step": 40791 }, { "epoch": 0.7090684698152236, "grad_norm": 1.3555327848875394, "learning_rate": 2.0602938079335247e-07, "loss": 0.2976, "step": 40792 }, { "epoch": 0.7090858523527265, "grad_norm": 1.2484509251316, "learning_rate": 2.0600661115825402e-07, "loss": 0.1621, "step": 40793 }, { "epoch": 0.7091032348902293, "grad_norm": 0.8599539003596249, "learning_rate": 2.0598384245497952e-07, "loss": 0.1351, "step": 40794 }, { "epoch": 0.7091206174277321, "grad_norm": 1.3956238467981341, "learning_rate": 2.0596107468360112e-07, "loss": 0.2284, "step": 40795 }, { "epoch": 0.709137999965235, "grad_norm": 0.8032991173683711, "learning_rate": 2.0593830784419113e-07, "loss": 0.2001, "step": 40796 }, { "epoch": 0.7091553825027378, "grad_norm": 1.636353768531306, "learning_rate": 2.059155419368216e-07, "loss": 0.1786, "step": 40797 }, { "epoch": 0.7091727650402406, "grad_norm": 1.773880254813415, "learning_rate": 2.058927769615647e-07, "loss": 0.1469, "step": 40798 }, { "epoch": 0.7091901475777433, "grad_norm": 1.2581436702334037, "learning_rate": 2.0587001291849243e-07, "loss": 0.2303, "step": 40799 }, { "epoch": 0.7092075301152462, "grad_norm": 1.4288885677969203, "learning_rate": 2.0584724980767732e-07, "loss": 0.2467, "step": 40800 }, { "epoch": 0.709224912652749, "grad_norm": 3.66862038649854, "learning_rate": 2.0582448762919142e-07, "loss": 0.2138, "step": 40801 }, { "epoch": 0.7092422951902518, "grad_norm": 1.7280903351460304, "learning_rate": 2.0580172638310645e-07, "loss": 0.1717, "step": 40802 }, { "epoch": 0.7092596777277547, "grad_norm": 0.9923000294721164, "learning_rate": 2.0577896606949502e-07, "loss": 0.2358, "step": 40803 }, { "epoch": 0.7092770602652575, "grad_norm": 1.524625123090521, "learning_rate": 2.0575620668842908e-07, "loss": 0.2828, "step": 40804 }, { "epoch": 0.7092944428027603, "grad_norm": 2.246671253947626, "learning_rate": 2.0573344823998084e-07, "loss": 0.2805, "step": 40805 }, { "epoch": 0.7093118253402632, "grad_norm": 1.2695149982053295, "learning_rate": 2.0571069072422238e-07, "loss": 0.1548, "step": 40806 }, { "epoch": 0.709329207877766, "grad_norm": 2.0742295927150014, "learning_rate": 2.0568793414122586e-07, "loss": 0.2, "step": 40807 }, { "epoch": 0.7093465904152688, "grad_norm": 1.4403319441518463, "learning_rate": 2.056651784910634e-07, "loss": 0.2275, "step": 40808 }, { "epoch": 0.7093639729527716, "grad_norm": 1.46476741627856, "learning_rate": 2.056424237738071e-07, "loss": 0.2218, "step": 40809 }, { "epoch": 0.7093813554902745, "grad_norm": 1.3336332538335331, "learning_rate": 2.056196699895289e-07, "loss": 0.2148, "step": 40810 }, { "epoch": 0.7093987380277773, "grad_norm": 1.376049775942997, "learning_rate": 2.0559691713830134e-07, "loss": 0.2015, "step": 40811 }, { "epoch": 0.7094161205652801, "grad_norm": 2.0306486021353805, "learning_rate": 2.0557416522019633e-07, "loss": 0.343, "step": 40812 }, { "epoch": 0.709433503102783, "grad_norm": 1.9853081580261676, "learning_rate": 2.055514142352861e-07, "loss": 0.2695, "step": 40813 }, { "epoch": 0.7094508856402858, "grad_norm": 1.3502079294219858, "learning_rate": 2.055286641836423e-07, "loss": 0.1943, "step": 40814 }, { "epoch": 0.7094682681777886, "grad_norm": 1.1489708001201748, "learning_rate": 2.055059150653375e-07, "loss": 0.2717, "step": 40815 }, { "epoch": 0.7094856507152915, "grad_norm": 1.4592389167357838, "learning_rate": 2.054831668804437e-07, "loss": 0.1483, "step": 40816 }, { "epoch": 0.7095030332527943, "grad_norm": 1.3546383249922964, "learning_rate": 2.0546041962903298e-07, "loss": 0.1718, "step": 40817 }, { "epoch": 0.7095204157902971, "grad_norm": 1.6097506270200514, "learning_rate": 2.0543767331117724e-07, "loss": 0.1233, "step": 40818 }, { "epoch": 0.7095377983277998, "grad_norm": 1.5794902630254533, "learning_rate": 2.0541492792694893e-07, "loss": 0.1976, "step": 40819 }, { "epoch": 0.7095551808653027, "grad_norm": 0.9742802233576315, "learning_rate": 2.0539218347642019e-07, "loss": 0.2083, "step": 40820 }, { "epoch": 0.7095725634028055, "grad_norm": 1.958424825695879, "learning_rate": 2.0536943995966273e-07, "loss": 0.2678, "step": 40821 }, { "epoch": 0.7095899459403083, "grad_norm": 1.425951144902323, "learning_rate": 2.0534669737674853e-07, "loss": 0.2631, "step": 40822 }, { "epoch": 0.7096073284778112, "grad_norm": 0.9419915728064008, "learning_rate": 2.0532395572775018e-07, "loss": 0.1824, "step": 40823 }, { "epoch": 0.709624711015314, "grad_norm": 0.9229436195459279, "learning_rate": 2.0530121501273957e-07, "loss": 0.2016, "step": 40824 }, { "epoch": 0.7096420935528168, "grad_norm": 1.9008106178471782, "learning_rate": 2.052784752317887e-07, "loss": 0.2443, "step": 40825 }, { "epoch": 0.7096594760903197, "grad_norm": 0.980764626638253, "learning_rate": 2.052557363849697e-07, "loss": 0.2388, "step": 40826 }, { "epoch": 0.7096768586278225, "grad_norm": 1.2432853366789165, "learning_rate": 2.0523299847235458e-07, "loss": 0.1909, "step": 40827 }, { "epoch": 0.7096942411653253, "grad_norm": 1.1486803978341993, "learning_rate": 2.052102614940155e-07, "loss": 0.2791, "step": 40828 }, { "epoch": 0.7097116237028281, "grad_norm": 1.044698063440649, "learning_rate": 2.0518752545002444e-07, "loss": 0.185, "step": 40829 }, { "epoch": 0.709729006240331, "grad_norm": 0.9826151278389695, "learning_rate": 2.051647903404533e-07, "loss": 0.1593, "step": 40830 }, { "epoch": 0.7097463887778338, "grad_norm": 3.3868402338587904, "learning_rate": 2.0514205616537454e-07, "loss": 0.2497, "step": 40831 }, { "epoch": 0.7097637713153366, "grad_norm": 2.6280554265830207, "learning_rate": 2.0511932292486022e-07, "loss": 0.19, "step": 40832 }, { "epoch": 0.7097811538528395, "grad_norm": 1.5422906707502608, "learning_rate": 2.0509659061898176e-07, "loss": 0.2405, "step": 40833 }, { "epoch": 0.7097985363903423, "grad_norm": 1.107908779489481, "learning_rate": 2.050738592478119e-07, "loss": 0.1901, "step": 40834 }, { "epoch": 0.7098159189278451, "grad_norm": 2.436770123350755, "learning_rate": 2.050511288114224e-07, "loss": 0.1475, "step": 40835 }, { "epoch": 0.709833301465348, "grad_norm": 3.140274418101341, "learning_rate": 2.0502839930988536e-07, "loss": 0.2921, "step": 40836 }, { "epoch": 0.7098506840028508, "grad_norm": 1.9860633631346316, "learning_rate": 2.0500567074327273e-07, "loss": 0.2338, "step": 40837 }, { "epoch": 0.7098680665403536, "grad_norm": 3.126834651835923, "learning_rate": 2.0498294311165666e-07, "loss": 0.2049, "step": 40838 }, { "epoch": 0.7098854490778563, "grad_norm": 1.0324340206713447, "learning_rate": 2.0496021641510914e-07, "loss": 0.152, "step": 40839 }, { "epoch": 0.7099028316153592, "grad_norm": 4.718041930082972, "learning_rate": 2.0493749065370218e-07, "loss": 0.4162, "step": 40840 }, { "epoch": 0.709920214152862, "grad_norm": 1.896220828063024, "learning_rate": 2.0491476582750767e-07, "loss": 0.1985, "step": 40841 }, { "epoch": 0.7099375966903648, "grad_norm": 1.227922527799788, "learning_rate": 2.0489204193659797e-07, "loss": 0.1908, "step": 40842 }, { "epoch": 0.7099549792278677, "grad_norm": 0.9066724587940033, "learning_rate": 2.04869318981045e-07, "loss": 0.2226, "step": 40843 }, { "epoch": 0.7099723617653705, "grad_norm": 1.0599595038666012, "learning_rate": 2.048465969609206e-07, "loss": 0.2579, "step": 40844 }, { "epoch": 0.7099897443028733, "grad_norm": 1.7260335887518006, "learning_rate": 2.0482387587629696e-07, "loss": 0.2743, "step": 40845 }, { "epoch": 0.7100071268403761, "grad_norm": 0.7949717125899345, "learning_rate": 2.0480115572724604e-07, "loss": 0.2268, "step": 40846 }, { "epoch": 0.710024509377879, "grad_norm": 0.8700312990968289, "learning_rate": 2.0477843651383991e-07, "loss": 0.1101, "step": 40847 }, { "epoch": 0.7100418919153818, "grad_norm": 0.8789632937393023, "learning_rate": 2.0475571823615045e-07, "loss": 0.2015, "step": 40848 }, { "epoch": 0.7100592744528846, "grad_norm": 1.6612323866152476, "learning_rate": 2.0473300089424962e-07, "loss": 0.2059, "step": 40849 }, { "epoch": 0.7100766569903875, "grad_norm": 1.4826127262532025, "learning_rate": 2.0471028448820966e-07, "loss": 0.1986, "step": 40850 }, { "epoch": 0.7100940395278903, "grad_norm": 1.6545824390034949, "learning_rate": 2.0468756901810264e-07, "loss": 0.2643, "step": 40851 }, { "epoch": 0.7101114220653931, "grad_norm": 1.1927561388876535, "learning_rate": 2.0466485448400022e-07, "loss": 0.2833, "step": 40852 }, { "epoch": 0.710128804602896, "grad_norm": 2.4961997260816067, "learning_rate": 2.0464214088597438e-07, "loss": 0.2887, "step": 40853 }, { "epoch": 0.7101461871403988, "grad_norm": 2.3818206584963826, "learning_rate": 2.046194282240974e-07, "loss": 0.2897, "step": 40854 }, { "epoch": 0.7101635696779016, "grad_norm": 0.9475912054987256, "learning_rate": 2.045967164984412e-07, "loss": 0.1999, "step": 40855 }, { "epoch": 0.7101809522154044, "grad_norm": 0.9416262497569154, "learning_rate": 2.0457400570907768e-07, "loss": 0.2083, "step": 40856 }, { "epoch": 0.7101983347529073, "grad_norm": 2.1393352103748335, "learning_rate": 2.0455129585607884e-07, "loss": 0.1509, "step": 40857 }, { "epoch": 0.7102157172904101, "grad_norm": 5.881020336593634, "learning_rate": 2.045285869395167e-07, "loss": 0.4114, "step": 40858 }, { "epoch": 0.7102330998279128, "grad_norm": 2.6478844709252467, "learning_rate": 2.045058789594632e-07, "loss": 0.1978, "step": 40859 }, { "epoch": 0.7102504823654157, "grad_norm": 1.4402713646312828, "learning_rate": 2.0448317191599035e-07, "loss": 0.1874, "step": 40860 }, { "epoch": 0.7102678649029185, "grad_norm": 1.7938089268278068, "learning_rate": 2.0446046580916992e-07, "loss": 0.2112, "step": 40861 }, { "epoch": 0.7102852474404213, "grad_norm": 1.482602760775016, "learning_rate": 2.044377606390742e-07, "loss": 0.2356, "step": 40862 }, { "epoch": 0.7103026299779241, "grad_norm": 1.3020888176369176, "learning_rate": 2.04415056405775e-07, "loss": 0.1639, "step": 40863 }, { "epoch": 0.710320012515427, "grad_norm": 1.8400450981794807, "learning_rate": 2.0439235310934433e-07, "loss": 0.2276, "step": 40864 }, { "epoch": 0.7103373950529298, "grad_norm": 1.3507629016203957, "learning_rate": 2.0436965074985407e-07, "loss": 0.2723, "step": 40865 }, { "epoch": 0.7103547775904326, "grad_norm": 1.6016651862948985, "learning_rate": 2.0434694932737622e-07, "loss": 0.2092, "step": 40866 }, { "epoch": 0.7103721601279355, "grad_norm": 2.869068236052978, "learning_rate": 2.043242488419828e-07, "loss": 0.2753, "step": 40867 }, { "epoch": 0.7103895426654383, "grad_norm": 2.3065775803971365, "learning_rate": 2.043015492937456e-07, "loss": 0.261, "step": 40868 }, { "epoch": 0.7104069252029411, "grad_norm": 1.3347377859897114, "learning_rate": 2.0427885068273654e-07, "loss": 0.2711, "step": 40869 }, { "epoch": 0.710424307740444, "grad_norm": 1.5385436613854275, "learning_rate": 2.04256153009028e-07, "loss": 0.2156, "step": 40870 }, { "epoch": 0.7104416902779468, "grad_norm": 1.2819202013539497, "learning_rate": 2.0423345627269144e-07, "loss": 0.1836, "step": 40871 }, { "epoch": 0.7104590728154496, "grad_norm": 2.2037996501820327, "learning_rate": 2.0421076047379876e-07, "loss": 0.2043, "step": 40872 }, { "epoch": 0.7104764553529525, "grad_norm": 2.3461189796108344, "learning_rate": 2.041880656124223e-07, "loss": 0.2362, "step": 40873 }, { "epoch": 0.7104938378904553, "grad_norm": 1.115780773588239, "learning_rate": 2.0416537168863373e-07, "loss": 0.188, "step": 40874 }, { "epoch": 0.7105112204279581, "grad_norm": 0.9550771470807234, "learning_rate": 2.0414267870250512e-07, "loss": 0.0939, "step": 40875 }, { "epoch": 0.7105286029654609, "grad_norm": 1.3942628372917971, "learning_rate": 2.0411998665410823e-07, "loss": 0.2008, "step": 40876 }, { "epoch": 0.7105459855029638, "grad_norm": 1.3028468791999754, "learning_rate": 2.0409729554351512e-07, "loss": 0.2653, "step": 40877 }, { "epoch": 0.7105633680404665, "grad_norm": 2.0039643696715173, "learning_rate": 2.0407460537079767e-07, "loss": 0.19, "step": 40878 }, { "epoch": 0.7105807505779693, "grad_norm": 1.163318444991402, "learning_rate": 2.0405191613602774e-07, "loss": 0.1851, "step": 40879 }, { "epoch": 0.7105981331154722, "grad_norm": 1.413976653028828, "learning_rate": 2.0402922783927713e-07, "loss": 0.2189, "step": 40880 }, { "epoch": 0.710615515652975, "grad_norm": 1.5396920200277089, "learning_rate": 2.040065404806181e-07, "loss": 0.1489, "step": 40881 }, { "epoch": 0.7106328981904778, "grad_norm": 1.5222246019827055, "learning_rate": 2.0398385406012235e-07, "loss": 0.2535, "step": 40882 }, { "epoch": 0.7106502807279806, "grad_norm": 1.9042241320656097, "learning_rate": 2.03961168577862e-07, "loss": 0.1692, "step": 40883 }, { "epoch": 0.7106676632654835, "grad_norm": 1.9799037344001418, "learning_rate": 2.039384840339084e-07, "loss": 0.2245, "step": 40884 }, { "epoch": 0.7106850458029863, "grad_norm": 1.8199699756899714, "learning_rate": 2.0391580042833395e-07, "loss": 0.1759, "step": 40885 }, { "epoch": 0.7107024283404891, "grad_norm": 2.927746088607834, "learning_rate": 2.0389311776121044e-07, "loss": 0.2238, "step": 40886 }, { "epoch": 0.710719810877992, "grad_norm": 1.2591020433359068, "learning_rate": 2.0387043603260966e-07, "loss": 0.2055, "step": 40887 }, { "epoch": 0.7107371934154948, "grad_norm": 1.3342919995788625, "learning_rate": 2.038477552426035e-07, "loss": 0.1389, "step": 40888 }, { "epoch": 0.7107545759529976, "grad_norm": 1.5874308187762238, "learning_rate": 2.038250753912642e-07, "loss": 0.2009, "step": 40889 }, { "epoch": 0.7107719584905005, "grad_norm": 1.0465820900296647, "learning_rate": 2.038023964786632e-07, "loss": 0.1651, "step": 40890 }, { "epoch": 0.7107893410280033, "grad_norm": 1.406435961028282, "learning_rate": 2.0377971850487235e-07, "loss": 0.2344, "step": 40891 }, { "epoch": 0.7108067235655061, "grad_norm": 2.26188800854369, "learning_rate": 2.0375704146996388e-07, "loss": 0.1248, "step": 40892 }, { "epoch": 0.710824106103009, "grad_norm": 1.3338043659594823, "learning_rate": 2.0373436537400956e-07, "loss": 0.1756, "step": 40893 }, { "epoch": 0.7108414886405118, "grad_norm": 1.944662154574383, "learning_rate": 2.0371169021708118e-07, "loss": 0.2209, "step": 40894 }, { "epoch": 0.7108588711780146, "grad_norm": 1.3959420596559267, "learning_rate": 2.0368901599925058e-07, "loss": 0.1627, "step": 40895 }, { "epoch": 0.7108762537155174, "grad_norm": 1.0182546921355853, "learning_rate": 2.0366634272058974e-07, "loss": 0.1468, "step": 40896 }, { "epoch": 0.7108936362530203, "grad_norm": 1.2363510694912878, "learning_rate": 2.0364367038117053e-07, "loss": 0.1643, "step": 40897 }, { "epoch": 0.710911018790523, "grad_norm": 1.3417455016417568, "learning_rate": 2.0362099898106467e-07, "loss": 0.1282, "step": 40898 }, { "epoch": 0.7109284013280258, "grad_norm": 1.2440214230395217, "learning_rate": 2.0359832852034393e-07, "loss": 0.1441, "step": 40899 }, { "epoch": 0.7109457838655286, "grad_norm": 0.9240947298890481, "learning_rate": 2.0357565899908052e-07, "loss": 0.1977, "step": 40900 }, { "epoch": 0.7109631664030315, "grad_norm": 1.8433698442683601, "learning_rate": 2.035529904173461e-07, "loss": 0.2151, "step": 40901 }, { "epoch": 0.7109805489405343, "grad_norm": 2.1623296886174583, "learning_rate": 2.035303227752127e-07, "loss": 0.2595, "step": 40902 }, { "epoch": 0.7109979314780371, "grad_norm": 2.3717900582737124, "learning_rate": 2.0350765607275165e-07, "loss": 0.1605, "step": 40903 }, { "epoch": 0.71101531401554, "grad_norm": 1.7281043074861273, "learning_rate": 2.034849903100353e-07, "loss": 0.2047, "step": 40904 }, { "epoch": 0.7110326965530428, "grad_norm": 3.7030319762363693, "learning_rate": 2.0346232548713533e-07, "loss": 0.2811, "step": 40905 }, { "epoch": 0.7110500790905456, "grad_norm": 2.272137062478584, "learning_rate": 2.0343966160412358e-07, "loss": 0.1776, "step": 40906 }, { "epoch": 0.7110674616280485, "grad_norm": 2.1373483979559955, "learning_rate": 2.0341699866107165e-07, "loss": 0.2309, "step": 40907 }, { "epoch": 0.7110848441655513, "grad_norm": 1.364968256939707, "learning_rate": 2.03394336658052e-07, "loss": 0.1412, "step": 40908 }, { "epoch": 0.7111022267030541, "grad_norm": 1.9447026777199474, "learning_rate": 2.0337167559513585e-07, "loss": 0.1556, "step": 40909 }, { "epoch": 0.711119609240557, "grad_norm": 14.5815371595399, "learning_rate": 2.0334901547239526e-07, "loss": 0.2188, "step": 40910 }, { "epoch": 0.7111369917780598, "grad_norm": 1.1995519060730764, "learning_rate": 2.033263562899018e-07, "loss": 0.1616, "step": 40911 }, { "epoch": 0.7111543743155626, "grad_norm": 1.343999580990175, "learning_rate": 2.0330369804772768e-07, "loss": 0.139, "step": 40912 }, { "epoch": 0.7111717568530654, "grad_norm": 1.0923338930430602, "learning_rate": 2.0328104074594459e-07, "loss": 0.1976, "step": 40913 }, { "epoch": 0.7111891393905683, "grad_norm": 1.179215018545783, "learning_rate": 2.032583843846243e-07, "loss": 0.2271, "step": 40914 }, { "epoch": 0.7112065219280711, "grad_norm": 1.664021711481304, "learning_rate": 2.0323572896383855e-07, "loss": 0.181, "step": 40915 }, { "epoch": 0.7112239044655739, "grad_norm": 1.8878043599143672, "learning_rate": 2.0321307448365932e-07, "loss": 0.1723, "step": 40916 }, { "epoch": 0.7112412870030768, "grad_norm": 1.2214905425067828, "learning_rate": 2.0319042094415821e-07, "loss": 0.211, "step": 40917 }, { "epoch": 0.7112586695405795, "grad_norm": 1.783884484329062, "learning_rate": 2.0316776834540721e-07, "loss": 0.1534, "step": 40918 }, { "epoch": 0.7112760520780823, "grad_norm": 1.4902928708227228, "learning_rate": 2.031451166874778e-07, "loss": 0.2633, "step": 40919 }, { "epoch": 0.7112934346155851, "grad_norm": 5.138003679865312, "learning_rate": 2.0312246597044218e-07, "loss": 0.3269, "step": 40920 }, { "epoch": 0.711310817153088, "grad_norm": 1.968077117935567, "learning_rate": 2.0309981619437218e-07, "loss": 0.2284, "step": 40921 }, { "epoch": 0.7113281996905908, "grad_norm": 0.9954588237268303, "learning_rate": 2.03077167359339e-07, "loss": 0.1693, "step": 40922 }, { "epoch": 0.7113455822280936, "grad_norm": 1.57373812651923, "learning_rate": 2.0305451946541503e-07, "loss": 0.1746, "step": 40923 }, { "epoch": 0.7113629647655965, "grad_norm": 1.4572868973813708, "learning_rate": 2.030318725126718e-07, "loss": 0.2284, "step": 40924 }, { "epoch": 0.7113803473030993, "grad_norm": 0.7592655969880163, "learning_rate": 2.0300922650118107e-07, "loss": 0.2062, "step": 40925 }, { "epoch": 0.7113977298406021, "grad_norm": 1.7792427637114225, "learning_rate": 2.029865814310147e-07, "loss": 0.1666, "step": 40926 }, { "epoch": 0.711415112378105, "grad_norm": 2.72903379919361, "learning_rate": 2.0296393730224444e-07, "loss": 0.1923, "step": 40927 }, { "epoch": 0.7114324949156078, "grad_norm": 1.3680514717163943, "learning_rate": 2.0294129411494205e-07, "loss": 0.1817, "step": 40928 }, { "epoch": 0.7114498774531106, "grad_norm": 1.4599670604327168, "learning_rate": 2.0291865186917928e-07, "loss": 0.2501, "step": 40929 }, { "epoch": 0.7114672599906134, "grad_norm": 2.1489164150520335, "learning_rate": 2.0289601056502776e-07, "loss": 0.1558, "step": 40930 }, { "epoch": 0.7114846425281163, "grad_norm": 3.715726343140808, "learning_rate": 2.0287337020255958e-07, "loss": 0.1474, "step": 40931 }, { "epoch": 0.7115020250656191, "grad_norm": 1.82859036152302, "learning_rate": 2.028507307818463e-07, "loss": 0.1444, "step": 40932 }, { "epoch": 0.7115194076031219, "grad_norm": 1.4085086159598346, "learning_rate": 2.0282809230295987e-07, "loss": 0.2483, "step": 40933 }, { "epoch": 0.7115367901406248, "grad_norm": 1.7014362136814243, "learning_rate": 2.028054547659716e-07, "loss": 0.18, "step": 40934 }, { "epoch": 0.7115541726781276, "grad_norm": 1.5742728453776247, "learning_rate": 2.0278281817095367e-07, "loss": 0.2543, "step": 40935 }, { "epoch": 0.7115715552156304, "grad_norm": 1.3210542578390254, "learning_rate": 2.027601825179776e-07, "loss": 0.1694, "step": 40936 }, { "epoch": 0.7115889377531333, "grad_norm": 1.0979021624749496, "learning_rate": 2.0273754780711533e-07, "loss": 0.1192, "step": 40937 }, { "epoch": 0.711606320290636, "grad_norm": 1.321452646972423, "learning_rate": 2.027149140384382e-07, "loss": 0.204, "step": 40938 }, { "epoch": 0.7116237028281388, "grad_norm": 1.5138101293838142, "learning_rate": 2.0269228121201847e-07, "loss": 0.2559, "step": 40939 }, { "epoch": 0.7116410853656416, "grad_norm": 1.347271277373707, "learning_rate": 2.026696493279278e-07, "loss": 0.1055, "step": 40940 }, { "epoch": 0.7116584679031445, "grad_norm": 2.122754143647348, "learning_rate": 2.026470183862376e-07, "loss": 0.2223, "step": 40941 }, { "epoch": 0.7116758504406473, "grad_norm": 1.7736966561729968, "learning_rate": 2.026243883870195e-07, "loss": 0.2506, "step": 40942 }, { "epoch": 0.7116932329781501, "grad_norm": 1.1937491104289064, "learning_rate": 2.0260175933034574e-07, "loss": 0.187, "step": 40943 }, { "epoch": 0.711710615515653, "grad_norm": 2.165940166346358, "learning_rate": 2.0257913121628777e-07, "loss": 0.2558, "step": 40944 }, { "epoch": 0.7117279980531558, "grad_norm": 2.1270206680472974, "learning_rate": 2.0255650404491737e-07, "loss": 0.247, "step": 40945 }, { "epoch": 0.7117453805906586, "grad_norm": 1.4212068943272318, "learning_rate": 2.0253387781630615e-07, "loss": 0.4011, "step": 40946 }, { "epoch": 0.7117627631281614, "grad_norm": 2.0132900390911423, "learning_rate": 2.0251125253052592e-07, "loss": 0.2588, "step": 40947 }, { "epoch": 0.7117801456656643, "grad_norm": 1.3605386506802233, "learning_rate": 2.0248862818764838e-07, "loss": 0.2629, "step": 40948 }, { "epoch": 0.7117975282031671, "grad_norm": 1.1026062790216458, "learning_rate": 2.0246600478774522e-07, "loss": 0.1377, "step": 40949 }, { "epoch": 0.7118149107406699, "grad_norm": 2.1586559998483184, "learning_rate": 2.0244338233088799e-07, "loss": 0.2478, "step": 40950 }, { "epoch": 0.7118322932781728, "grad_norm": 1.3707467990393518, "learning_rate": 2.0242076081714866e-07, "loss": 0.1483, "step": 40951 }, { "epoch": 0.7118496758156756, "grad_norm": 1.600928704851339, "learning_rate": 2.0239814024659907e-07, "loss": 0.2071, "step": 40952 }, { "epoch": 0.7118670583531784, "grad_norm": 1.135523060430165, "learning_rate": 2.0237552061931025e-07, "loss": 0.239, "step": 40953 }, { "epoch": 0.7118844408906813, "grad_norm": 1.8420598731790574, "learning_rate": 2.0235290193535453e-07, "loss": 0.1835, "step": 40954 }, { "epoch": 0.7119018234281841, "grad_norm": 1.519428825835773, "learning_rate": 2.0233028419480335e-07, "loss": 0.1908, "step": 40955 }, { "epoch": 0.7119192059656869, "grad_norm": 0.8667025766587806, "learning_rate": 2.0230766739772847e-07, "loss": 0.2144, "step": 40956 }, { "epoch": 0.7119365885031897, "grad_norm": 1.6286604575824681, "learning_rate": 2.0228505154420156e-07, "loss": 0.2451, "step": 40957 }, { "epoch": 0.7119539710406925, "grad_norm": 1.0269532265575632, "learning_rate": 2.022624366342942e-07, "loss": 0.2383, "step": 40958 }, { "epoch": 0.7119713535781953, "grad_norm": 1.379003899261416, "learning_rate": 2.022398226680782e-07, "loss": 0.1321, "step": 40959 }, { "epoch": 0.7119887361156981, "grad_norm": 2.2563632063459576, "learning_rate": 2.0221720964562517e-07, "loss": 0.2356, "step": 40960 }, { "epoch": 0.712006118653201, "grad_norm": 1.264163972795613, "learning_rate": 2.0219459756700662e-07, "loss": 0.2128, "step": 40961 }, { "epoch": 0.7120235011907038, "grad_norm": 1.409046327465402, "learning_rate": 2.0217198643229456e-07, "loss": 0.1726, "step": 40962 }, { "epoch": 0.7120408837282066, "grad_norm": 1.3748126008455903, "learning_rate": 2.0214937624156054e-07, "loss": 0.3127, "step": 40963 }, { "epoch": 0.7120582662657094, "grad_norm": 1.8844842267362374, "learning_rate": 2.0212676699487613e-07, "loss": 0.1551, "step": 40964 }, { "epoch": 0.7120756488032123, "grad_norm": 1.1164901638333833, "learning_rate": 2.0210415869231302e-07, "loss": 0.1595, "step": 40965 }, { "epoch": 0.7120930313407151, "grad_norm": 1.2744677065696228, "learning_rate": 2.0208155133394288e-07, "loss": 0.1163, "step": 40966 }, { "epoch": 0.7121104138782179, "grad_norm": 3.76412791109466, "learning_rate": 2.0205894491983745e-07, "loss": 0.2335, "step": 40967 }, { "epoch": 0.7121277964157208, "grad_norm": 0.9773213690770786, "learning_rate": 2.0203633945006827e-07, "loss": 0.2321, "step": 40968 }, { "epoch": 0.7121451789532236, "grad_norm": 0.9392595197069163, "learning_rate": 2.020137349247068e-07, "loss": 0.1802, "step": 40969 }, { "epoch": 0.7121625614907264, "grad_norm": 1.9415320162075738, "learning_rate": 2.0199113134382512e-07, "loss": 0.1924, "step": 40970 }, { "epoch": 0.7121799440282293, "grad_norm": 2.492376472184361, "learning_rate": 2.019685287074948e-07, "loss": 0.2241, "step": 40971 }, { "epoch": 0.7121973265657321, "grad_norm": 5.109976317944125, "learning_rate": 2.0194592701578716e-07, "loss": 0.2048, "step": 40972 }, { "epoch": 0.7122147091032349, "grad_norm": 1.3219073216655124, "learning_rate": 2.019233262687739e-07, "loss": 0.2047, "step": 40973 }, { "epoch": 0.7122320916407378, "grad_norm": 1.3889707375768383, "learning_rate": 2.019007264665269e-07, "loss": 0.237, "step": 40974 }, { "epoch": 0.7122494741782406, "grad_norm": 1.6699413668078624, "learning_rate": 2.0187812760911766e-07, "loss": 0.2379, "step": 40975 }, { "epoch": 0.7122668567157434, "grad_norm": 2.3105164493514447, "learning_rate": 2.0185552969661783e-07, "loss": 0.2398, "step": 40976 }, { "epoch": 0.7122842392532462, "grad_norm": 1.591994816620234, "learning_rate": 2.01832932729099e-07, "loss": 0.2602, "step": 40977 }, { "epoch": 0.712301621790749, "grad_norm": 1.9461490599439837, "learning_rate": 2.0181033670663282e-07, "loss": 0.2173, "step": 40978 }, { "epoch": 0.7123190043282518, "grad_norm": 1.2676087171859975, "learning_rate": 2.0178774162929092e-07, "loss": 0.3036, "step": 40979 }, { "epoch": 0.7123363868657546, "grad_norm": 1.669308875729099, "learning_rate": 2.017651474971447e-07, "loss": 0.2294, "step": 40980 }, { "epoch": 0.7123537694032575, "grad_norm": 1.391609904952903, "learning_rate": 2.017425543102661e-07, "loss": 0.3055, "step": 40981 }, { "epoch": 0.7123711519407603, "grad_norm": 1.09153928334238, "learning_rate": 2.017199620687266e-07, "loss": 0.236, "step": 40982 }, { "epoch": 0.7123885344782631, "grad_norm": 0.9754938625624603, "learning_rate": 2.0169737077259786e-07, "loss": 0.2019, "step": 40983 }, { "epoch": 0.7124059170157659, "grad_norm": 1.4981317969093415, "learning_rate": 2.0167478042195135e-07, "loss": 0.2216, "step": 40984 }, { "epoch": 0.7124232995532688, "grad_norm": 1.5351117181259075, "learning_rate": 2.0165219101685883e-07, "loss": 0.2016, "step": 40985 }, { "epoch": 0.7124406820907716, "grad_norm": 1.2912936243427913, "learning_rate": 2.0162960255739175e-07, "loss": 0.1681, "step": 40986 }, { "epoch": 0.7124580646282744, "grad_norm": 3.872927653198364, "learning_rate": 2.0160701504362176e-07, "loss": 0.27, "step": 40987 }, { "epoch": 0.7124754471657773, "grad_norm": 1.238433139658278, "learning_rate": 2.0158442847562036e-07, "loss": 0.1629, "step": 40988 }, { "epoch": 0.7124928297032801, "grad_norm": 1.7760653401636637, "learning_rate": 2.0156184285345933e-07, "loss": 0.2915, "step": 40989 }, { "epoch": 0.7125102122407829, "grad_norm": 1.021412478565024, "learning_rate": 2.015392581772104e-07, "loss": 0.2957, "step": 40990 }, { "epoch": 0.7125275947782858, "grad_norm": 1.90337475095131, "learning_rate": 2.0151667444694477e-07, "loss": 0.2036, "step": 40991 }, { "epoch": 0.7125449773157886, "grad_norm": 1.70416272988326, "learning_rate": 2.014940916627339e-07, "loss": 0.2336, "step": 40992 }, { "epoch": 0.7125623598532914, "grad_norm": 2.852565633056276, "learning_rate": 2.0147150982464987e-07, "loss": 0.2129, "step": 40993 }, { "epoch": 0.7125797423907942, "grad_norm": 1.7055012796619546, "learning_rate": 2.0144892893276406e-07, "loss": 0.3065, "step": 40994 }, { "epoch": 0.7125971249282971, "grad_norm": 1.555946653280562, "learning_rate": 2.0142634898714793e-07, "loss": 0.2678, "step": 40995 }, { "epoch": 0.7126145074657999, "grad_norm": 1.0262132529871395, "learning_rate": 2.0140376998787317e-07, "loss": 0.2266, "step": 40996 }, { "epoch": 0.7126318900033027, "grad_norm": 1.5561173271136421, "learning_rate": 2.0138119193501124e-07, "loss": 0.2731, "step": 40997 }, { "epoch": 0.7126492725408055, "grad_norm": 2.0350903010462766, "learning_rate": 2.013586148286338e-07, "loss": 0.2469, "step": 40998 }, { "epoch": 0.7126666550783083, "grad_norm": 1.1811682028724004, "learning_rate": 2.0133603866881237e-07, "loss": 0.2883, "step": 40999 }, { "epoch": 0.7126840376158111, "grad_norm": 0.7684585191634834, "learning_rate": 2.0131346345561834e-07, "loss": 0.1047, "step": 41000 }, { "epoch": 0.712701420153314, "grad_norm": 1.2861481160113344, "learning_rate": 2.0129088918912358e-07, "loss": 0.1956, "step": 41001 }, { "epoch": 0.7127188026908168, "grad_norm": 1.0743352111980011, "learning_rate": 2.0126831586939946e-07, "loss": 0.2516, "step": 41002 }, { "epoch": 0.7127361852283196, "grad_norm": 1.7748423823551305, "learning_rate": 2.0124574349651758e-07, "loss": 0.2161, "step": 41003 }, { "epoch": 0.7127535677658224, "grad_norm": 1.491286212722409, "learning_rate": 2.0122317207054944e-07, "loss": 0.1572, "step": 41004 }, { "epoch": 0.7127709503033253, "grad_norm": 2.596983837414817, "learning_rate": 2.012006015915666e-07, "loss": 0.2713, "step": 41005 }, { "epoch": 0.7127883328408281, "grad_norm": 0.8168509728253681, "learning_rate": 2.0117803205964064e-07, "loss": 0.1804, "step": 41006 }, { "epoch": 0.7128057153783309, "grad_norm": 2.034753598945972, "learning_rate": 2.0115546347484303e-07, "loss": 0.3434, "step": 41007 }, { "epoch": 0.7128230979158338, "grad_norm": 1.7358471118862961, "learning_rate": 2.0113289583724508e-07, "loss": 0.1407, "step": 41008 }, { "epoch": 0.7128404804533366, "grad_norm": 1.2189128781377874, "learning_rate": 2.01110329146919e-07, "loss": 0.1636, "step": 41009 }, { "epoch": 0.7128578629908394, "grad_norm": 1.2447992436345838, "learning_rate": 2.0108776340393563e-07, "loss": 0.1627, "step": 41010 }, { "epoch": 0.7128752455283422, "grad_norm": 1.5821894623468618, "learning_rate": 2.0106519860836663e-07, "loss": 0.2153, "step": 41011 }, { "epoch": 0.7128926280658451, "grad_norm": 1.7699059877900707, "learning_rate": 2.0104263476028378e-07, "loss": 0.2354, "step": 41012 }, { "epoch": 0.7129100106033479, "grad_norm": 2.6212476184662905, "learning_rate": 2.0102007185975845e-07, "loss": 0.2355, "step": 41013 }, { "epoch": 0.7129273931408507, "grad_norm": 1.3600101176243955, "learning_rate": 2.0099750990686214e-07, "loss": 0.2094, "step": 41014 }, { "epoch": 0.7129447756783536, "grad_norm": 1.1686407279358222, "learning_rate": 2.009749489016664e-07, "loss": 0.2014, "step": 41015 }, { "epoch": 0.7129621582158564, "grad_norm": 0.855364534650271, "learning_rate": 2.009523888442427e-07, "loss": 0.2396, "step": 41016 }, { "epoch": 0.7129795407533591, "grad_norm": 1.9646293271626025, "learning_rate": 2.0092982973466254e-07, "loss": 0.2854, "step": 41017 }, { "epoch": 0.712996923290862, "grad_norm": 1.8037180540553521, "learning_rate": 2.0090727157299742e-07, "loss": 0.12, "step": 41018 }, { "epoch": 0.7130143058283648, "grad_norm": 1.1866915605587953, "learning_rate": 2.0088471435931874e-07, "loss": 0.1758, "step": 41019 }, { "epoch": 0.7130316883658676, "grad_norm": 2.876895932712462, "learning_rate": 2.008621580936983e-07, "loss": 0.2034, "step": 41020 }, { "epoch": 0.7130490709033704, "grad_norm": 1.4513199837536268, "learning_rate": 2.008396027762073e-07, "loss": 0.2891, "step": 41021 }, { "epoch": 0.7130664534408733, "grad_norm": 1.0472000207185261, "learning_rate": 2.008170484069176e-07, "loss": 0.1634, "step": 41022 }, { "epoch": 0.7130838359783761, "grad_norm": 1.1913469620661703, "learning_rate": 2.0079449498590006e-07, "loss": 0.2568, "step": 41023 }, { "epoch": 0.7131012185158789, "grad_norm": 2.1945003882243985, "learning_rate": 2.0077194251322665e-07, "loss": 0.2297, "step": 41024 }, { "epoch": 0.7131186010533818, "grad_norm": 2.663796467333142, "learning_rate": 2.007493909889688e-07, "loss": 0.2568, "step": 41025 }, { "epoch": 0.7131359835908846, "grad_norm": 1.9666092230100427, "learning_rate": 2.0072684041319786e-07, "loss": 0.2985, "step": 41026 }, { "epoch": 0.7131533661283874, "grad_norm": 2.1116358509033444, "learning_rate": 2.0070429078598523e-07, "loss": 0.1997, "step": 41027 }, { "epoch": 0.7131707486658903, "grad_norm": 1.2991162565209196, "learning_rate": 2.0068174210740284e-07, "loss": 0.2641, "step": 41028 }, { "epoch": 0.7131881312033931, "grad_norm": 1.2183132522360964, "learning_rate": 2.0065919437752164e-07, "loss": 0.1817, "step": 41029 }, { "epoch": 0.7132055137408959, "grad_norm": 1.0956356209256095, "learning_rate": 2.0063664759641336e-07, "loss": 0.1041, "step": 41030 }, { "epoch": 0.7132228962783987, "grad_norm": 1.491585188822614, "learning_rate": 2.006141017641491e-07, "loss": 0.2708, "step": 41031 }, { "epoch": 0.7132402788159016, "grad_norm": 1.0720966529516371, "learning_rate": 2.0059155688080087e-07, "loss": 0.1463, "step": 41032 }, { "epoch": 0.7132576613534044, "grad_norm": 2.4104898567153317, "learning_rate": 2.0056901294643985e-07, "loss": 0.2353, "step": 41033 }, { "epoch": 0.7132750438909072, "grad_norm": 1.957270711703636, "learning_rate": 2.0054646996113756e-07, "loss": 0.2039, "step": 41034 }, { "epoch": 0.7132924264284101, "grad_norm": 1.2005693980959293, "learning_rate": 2.005239279249653e-07, "loss": 0.1704, "step": 41035 }, { "epoch": 0.7133098089659129, "grad_norm": 1.3271254712114793, "learning_rate": 2.0050138683799474e-07, "loss": 0.2153, "step": 41036 }, { "epoch": 0.7133271915034156, "grad_norm": 1.1059672263887594, "learning_rate": 2.0047884670029706e-07, "loss": 0.1579, "step": 41037 }, { "epoch": 0.7133445740409184, "grad_norm": 1.3564999054078895, "learning_rate": 2.0045630751194397e-07, "loss": 0.2098, "step": 41038 }, { "epoch": 0.7133619565784213, "grad_norm": 1.214938771225855, "learning_rate": 2.0043376927300654e-07, "loss": 0.1828, "step": 41039 }, { "epoch": 0.7133793391159241, "grad_norm": 1.3141576850391856, "learning_rate": 2.0041123198355668e-07, "loss": 0.1889, "step": 41040 }, { "epoch": 0.7133967216534269, "grad_norm": 1.3400610613403512, "learning_rate": 2.0038869564366572e-07, "loss": 0.1606, "step": 41041 }, { "epoch": 0.7134141041909298, "grad_norm": 1.353428840577933, "learning_rate": 2.0036616025340463e-07, "loss": 0.252, "step": 41042 }, { "epoch": 0.7134314867284326, "grad_norm": 1.592011910124406, "learning_rate": 2.0034362581284537e-07, "loss": 0.151, "step": 41043 }, { "epoch": 0.7134488692659354, "grad_norm": 1.3194640683497743, "learning_rate": 2.0032109232205919e-07, "loss": 0.1877, "step": 41044 }, { "epoch": 0.7134662518034383, "grad_norm": 1.886734044486819, "learning_rate": 2.0029855978111743e-07, "loss": 0.216, "step": 41045 }, { "epoch": 0.7134836343409411, "grad_norm": 3.8076442956738297, "learning_rate": 2.002760281900916e-07, "loss": 0.3715, "step": 41046 }, { "epoch": 0.7135010168784439, "grad_norm": 2.0975613011167717, "learning_rate": 2.0025349754905302e-07, "loss": 0.4085, "step": 41047 }, { "epoch": 0.7135183994159467, "grad_norm": 5.369822182197648, "learning_rate": 2.002309678580732e-07, "loss": 0.1465, "step": 41048 }, { "epoch": 0.7135357819534496, "grad_norm": 2.02764042276924, "learning_rate": 2.002084391172235e-07, "loss": 0.2638, "step": 41049 }, { "epoch": 0.7135531644909524, "grad_norm": 1.4684044823344111, "learning_rate": 2.001859113265752e-07, "loss": 0.3568, "step": 41050 }, { "epoch": 0.7135705470284552, "grad_norm": 2.299144341674853, "learning_rate": 2.0016338448619996e-07, "loss": 0.298, "step": 41051 }, { "epoch": 0.7135879295659581, "grad_norm": 1.3014846590381683, "learning_rate": 2.0014085859616908e-07, "loss": 0.2017, "step": 41052 }, { "epoch": 0.7136053121034609, "grad_norm": 1.6728587244951671, "learning_rate": 2.0011833365655412e-07, "loss": 0.2559, "step": 41053 }, { "epoch": 0.7136226946409637, "grad_norm": 1.4861020327771552, "learning_rate": 2.000958096674259e-07, "loss": 0.2425, "step": 41054 }, { "epoch": 0.7136400771784666, "grad_norm": 1.5850324797997875, "learning_rate": 2.0007328662885642e-07, "loss": 0.1192, "step": 41055 }, { "epoch": 0.7136574597159694, "grad_norm": 1.5784188700858675, "learning_rate": 2.0005076454091684e-07, "loss": 0.1886, "step": 41056 }, { "epoch": 0.7136748422534721, "grad_norm": 1.4896653046216604, "learning_rate": 2.0002824340367852e-07, "loss": 0.3589, "step": 41057 }, { "epoch": 0.7136922247909749, "grad_norm": 4.335447181780734, "learning_rate": 2.000057232172127e-07, "loss": 0.1781, "step": 41058 }, { "epoch": 0.7137096073284778, "grad_norm": 1.4592453654239215, "learning_rate": 1.9998320398159114e-07, "loss": 0.1984, "step": 41059 }, { "epoch": 0.7137269898659806, "grad_norm": 2.3124803015998374, "learning_rate": 1.9996068569688518e-07, "loss": 0.2348, "step": 41060 }, { "epoch": 0.7137443724034834, "grad_norm": 2.426081584228897, "learning_rate": 1.999381683631658e-07, "loss": 0.1946, "step": 41061 }, { "epoch": 0.7137617549409863, "grad_norm": 1.5478031274966375, "learning_rate": 1.9991565198050446e-07, "loss": 0.2151, "step": 41062 }, { "epoch": 0.7137791374784891, "grad_norm": 1.3314951480895423, "learning_rate": 1.9989313654897277e-07, "loss": 0.1559, "step": 41063 }, { "epoch": 0.7137965200159919, "grad_norm": 1.728069874669644, "learning_rate": 1.9987062206864202e-07, "loss": 0.246, "step": 41064 }, { "epoch": 0.7138139025534948, "grad_norm": 1.2493848296971481, "learning_rate": 1.9984810853958357e-07, "loss": 0.2233, "step": 41065 }, { "epoch": 0.7138312850909976, "grad_norm": 1.6608156286585183, "learning_rate": 1.9982559596186872e-07, "loss": 0.1666, "step": 41066 }, { "epoch": 0.7138486676285004, "grad_norm": 1.7399055726416097, "learning_rate": 1.998030843355688e-07, "loss": 0.2379, "step": 41067 }, { "epoch": 0.7138660501660032, "grad_norm": 1.2791339883249022, "learning_rate": 1.9978057366075523e-07, "loss": 0.3212, "step": 41068 }, { "epoch": 0.7138834327035061, "grad_norm": 2.3826027926583557, "learning_rate": 1.9975806393749932e-07, "loss": 0.2436, "step": 41069 }, { "epoch": 0.7139008152410089, "grad_norm": 1.484915131005547, "learning_rate": 1.9973555516587225e-07, "loss": 0.2483, "step": 41070 }, { "epoch": 0.7139181977785117, "grad_norm": 0.9341707034854236, "learning_rate": 1.9971304734594575e-07, "loss": 0.1923, "step": 41071 }, { "epoch": 0.7139355803160146, "grad_norm": 1.0276813168758583, "learning_rate": 1.996905404777911e-07, "loss": 0.2217, "step": 41072 }, { "epoch": 0.7139529628535174, "grad_norm": 1.2853533235957844, "learning_rate": 1.9966803456147914e-07, "loss": 0.3467, "step": 41073 }, { "epoch": 0.7139703453910202, "grad_norm": 1.3009386074176585, "learning_rate": 1.996455295970817e-07, "loss": 0.1166, "step": 41074 }, { "epoch": 0.713987727928523, "grad_norm": 2.339566134877705, "learning_rate": 1.9962302558466997e-07, "loss": 0.268, "step": 41075 }, { "epoch": 0.7140051104660259, "grad_norm": 2.113414785137907, "learning_rate": 1.9960052252431525e-07, "loss": 0.2461, "step": 41076 }, { "epoch": 0.7140224930035286, "grad_norm": 1.3421906708181808, "learning_rate": 1.9957802041608874e-07, "loss": 0.1817, "step": 41077 }, { "epoch": 0.7140398755410314, "grad_norm": 1.4119423819963473, "learning_rate": 1.9955551926006224e-07, "loss": 0.2034, "step": 41078 }, { "epoch": 0.7140572580785343, "grad_norm": 1.0836342856452845, "learning_rate": 1.995330190563066e-07, "loss": 0.2333, "step": 41079 }, { "epoch": 0.7140746406160371, "grad_norm": 1.316542093573408, "learning_rate": 1.9951051980489318e-07, "loss": 0.2398, "step": 41080 }, { "epoch": 0.7140920231535399, "grad_norm": 2.093603355737921, "learning_rate": 1.9948802150589328e-07, "loss": 0.2279, "step": 41081 }, { "epoch": 0.7141094056910428, "grad_norm": 2.486205777774459, "learning_rate": 1.9946552415937845e-07, "loss": 0.2663, "step": 41082 }, { "epoch": 0.7141267882285456, "grad_norm": 1.1810641460491555, "learning_rate": 1.9944302776541988e-07, "loss": 0.1492, "step": 41083 }, { "epoch": 0.7141441707660484, "grad_norm": 2.354954940587992, "learning_rate": 1.9942053232408877e-07, "loss": 0.2823, "step": 41084 }, { "epoch": 0.7141615533035512, "grad_norm": 2.466592451128575, "learning_rate": 1.9939803783545655e-07, "loss": 0.1872, "step": 41085 }, { "epoch": 0.7141789358410541, "grad_norm": 2.901515859891211, "learning_rate": 1.9937554429959453e-07, "loss": 0.2537, "step": 41086 }, { "epoch": 0.7141963183785569, "grad_norm": 1.226478636295423, "learning_rate": 1.9935305171657386e-07, "loss": 0.1585, "step": 41087 }, { "epoch": 0.7142137009160597, "grad_norm": 1.9205341610346047, "learning_rate": 1.9933056008646594e-07, "loss": 0.2174, "step": 41088 }, { "epoch": 0.7142310834535626, "grad_norm": 2.0146726413765355, "learning_rate": 1.993080694093418e-07, "loss": 0.3838, "step": 41089 }, { "epoch": 0.7142484659910654, "grad_norm": 1.531552853825056, "learning_rate": 1.9928557968527314e-07, "loss": 0.2873, "step": 41090 }, { "epoch": 0.7142658485285682, "grad_norm": 2.1100236814258646, "learning_rate": 1.992630909143313e-07, "loss": 0.2576, "step": 41091 }, { "epoch": 0.7142832310660711, "grad_norm": 1.3957576927150999, "learning_rate": 1.9924060309658692e-07, "loss": 0.3007, "step": 41092 }, { "epoch": 0.7143006136035739, "grad_norm": 1.6391304396528985, "learning_rate": 1.9921811623211183e-07, "loss": 0.2487, "step": 41093 }, { "epoch": 0.7143179961410767, "grad_norm": 1.296014352221358, "learning_rate": 1.991956303209772e-07, "loss": 0.2522, "step": 41094 }, { "epoch": 0.7143353786785795, "grad_norm": 5.12954247742696, "learning_rate": 1.9917314536325418e-07, "loss": 0.2307, "step": 41095 }, { "epoch": 0.7143527612160824, "grad_norm": 1.7134442278880722, "learning_rate": 1.9915066135901414e-07, "loss": 0.2551, "step": 41096 }, { "epoch": 0.7143701437535851, "grad_norm": 1.803445451586398, "learning_rate": 1.991281783083283e-07, "loss": 0.1895, "step": 41097 }, { "epoch": 0.7143875262910879, "grad_norm": 1.1492656780834145, "learning_rate": 1.9910569621126788e-07, "loss": 0.2096, "step": 41098 }, { "epoch": 0.7144049088285908, "grad_norm": 0.8874015095727864, "learning_rate": 1.9908321506790427e-07, "loss": 0.1099, "step": 41099 }, { "epoch": 0.7144222913660936, "grad_norm": 1.1395013623278092, "learning_rate": 1.9906073487830837e-07, "loss": 0.2771, "step": 41100 }, { "epoch": 0.7144396739035964, "grad_norm": 1.4928865130947695, "learning_rate": 1.9903825564255195e-07, "loss": 0.3559, "step": 41101 }, { "epoch": 0.7144570564410992, "grad_norm": 1.3842746135845403, "learning_rate": 1.9901577736070596e-07, "loss": 0.1463, "step": 41102 }, { "epoch": 0.7144744389786021, "grad_norm": 1.4893934345553885, "learning_rate": 1.989933000328417e-07, "loss": 0.2075, "step": 41103 }, { "epoch": 0.7144918215161049, "grad_norm": 1.0846783864637226, "learning_rate": 1.9897082365903045e-07, "loss": 0.1911, "step": 41104 }, { "epoch": 0.7145092040536077, "grad_norm": 1.0744594020360727, "learning_rate": 1.9894834823934335e-07, "loss": 0.2395, "step": 41105 }, { "epoch": 0.7145265865911106, "grad_norm": 2.2316259351451, "learning_rate": 1.9892587377385172e-07, "loss": 0.2347, "step": 41106 }, { "epoch": 0.7145439691286134, "grad_norm": 0.954944479470211, "learning_rate": 1.9890340026262676e-07, "loss": 0.3334, "step": 41107 }, { "epoch": 0.7145613516661162, "grad_norm": 1.892070335231433, "learning_rate": 1.9888092770573956e-07, "loss": 0.2222, "step": 41108 }, { "epoch": 0.7145787342036191, "grad_norm": 1.6394030694511619, "learning_rate": 1.9885845610326163e-07, "loss": 0.2495, "step": 41109 }, { "epoch": 0.7145961167411219, "grad_norm": 1.5883087847360038, "learning_rate": 1.9883598545526432e-07, "loss": 0.1932, "step": 41110 }, { "epoch": 0.7146134992786247, "grad_norm": 1.067419665008982, "learning_rate": 1.9881351576181832e-07, "loss": 0.2043, "step": 41111 }, { "epoch": 0.7146308818161276, "grad_norm": 0.9406882514961443, "learning_rate": 1.98791047022995e-07, "loss": 0.3389, "step": 41112 }, { "epoch": 0.7146482643536304, "grad_norm": 1.1635621141109003, "learning_rate": 1.9876857923886586e-07, "loss": 0.2534, "step": 41113 }, { "epoch": 0.7146656468911332, "grad_norm": 2.352144116773023, "learning_rate": 1.9874611240950195e-07, "loss": 0.3536, "step": 41114 }, { "epoch": 0.714683029428636, "grad_norm": 1.0052892800732625, "learning_rate": 1.9872364653497448e-07, "loss": 0.2506, "step": 41115 }, { "epoch": 0.7147004119661389, "grad_norm": 1.5072501252808055, "learning_rate": 1.9870118161535466e-07, "loss": 0.1713, "step": 41116 }, { "epoch": 0.7147177945036416, "grad_norm": 1.0858605582222056, "learning_rate": 1.9867871765071364e-07, "loss": 0.1082, "step": 41117 }, { "epoch": 0.7147351770411444, "grad_norm": 1.1766365228384243, "learning_rate": 1.9865625464112273e-07, "loss": 0.3247, "step": 41118 }, { "epoch": 0.7147525595786473, "grad_norm": 1.1521888389248405, "learning_rate": 1.9863379258665302e-07, "loss": 0.3001, "step": 41119 }, { "epoch": 0.7147699421161501, "grad_norm": 1.3644452591789877, "learning_rate": 1.9861133148737562e-07, "loss": 0.1508, "step": 41120 }, { "epoch": 0.7147873246536529, "grad_norm": 1.2096708488944512, "learning_rate": 1.98588871343362e-07, "loss": 0.1312, "step": 41121 }, { "epoch": 0.7148047071911557, "grad_norm": 1.6177590133159547, "learning_rate": 1.9856641215468322e-07, "loss": 0.2443, "step": 41122 }, { "epoch": 0.7148220897286586, "grad_norm": 1.696248229866795, "learning_rate": 1.9854395392141043e-07, "loss": 0.3771, "step": 41123 }, { "epoch": 0.7148394722661614, "grad_norm": 1.0509163695776975, "learning_rate": 1.9852149664361484e-07, "loss": 0.2551, "step": 41124 }, { "epoch": 0.7148568548036642, "grad_norm": 1.2784013551101454, "learning_rate": 1.984990403213676e-07, "loss": 0.1683, "step": 41125 }, { "epoch": 0.7148742373411671, "grad_norm": 3.1516021850190947, "learning_rate": 1.9847658495473995e-07, "loss": 0.3787, "step": 41126 }, { "epoch": 0.7148916198786699, "grad_norm": 1.379876806563912, "learning_rate": 1.9845413054380294e-07, "loss": 0.2334, "step": 41127 }, { "epoch": 0.7149090024161727, "grad_norm": 1.7813473112802496, "learning_rate": 1.9843167708862774e-07, "loss": 0.4032, "step": 41128 }, { "epoch": 0.7149263849536756, "grad_norm": 2.8241847854238116, "learning_rate": 1.9840922458928588e-07, "loss": 0.3373, "step": 41129 }, { "epoch": 0.7149437674911784, "grad_norm": 0.9739510773448699, "learning_rate": 1.9838677304584805e-07, "loss": 0.219, "step": 41130 }, { "epoch": 0.7149611500286812, "grad_norm": 1.6636295874738862, "learning_rate": 1.9836432245838548e-07, "loss": 0.3008, "step": 41131 }, { "epoch": 0.714978532566184, "grad_norm": 1.8587199492515574, "learning_rate": 1.9834187282696957e-07, "loss": 0.4125, "step": 41132 }, { "epoch": 0.7149959151036869, "grad_norm": 1.5725858376622275, "learning_rate": 1.9831942415167135e-07, "loss": 0.2704, "step": 41133 }, { "epoch": 0.7150132976411897, "grad_norm": 1.2487156442061287, "learning_rate": 1.9829697643256206e-07, "loss": 0.1944, "step": 41134 }, { "epoch": 0.7150306801786925, "grad_norm": 1.3520711600322826, "learning_rate": 1.9827452966971264e-07, "loss": 0.2795, "step": 41135 }, { "epoch": 0.7150480627161954, "grad_norm": 1.9818196944306017, "learning_rate": 1.9825208386319447e-07, "loss": 0.1653, "step": 41136 }, { "epoch": 0.7150654452536981, "grad_norm": 0.9181763414690195, "learning_rate": 1.9822963901307854e-07, "loss": 0.2817, "step": 41137 }, { "epoch": 0.7150828277912009, "grad_norm": 1.3152522203150747, "learning_rate": 1.98207195119436e-07, "loss": 0.195, "step": 41138 }, { "epoch": 0.7151002103287037, "grad_norm": 1.0050203122720902, "learning_rate": 1.9818475218233787e-07, "loss": 0.1466, "step": 41139 }, { "epoch": 0.7151175928662066, "grad_norm": 1.1723568823585409, "learning_rate": 1.9816231020185559e-07, "loss": 0.2225, "step": 41140 }, { "epoch": 0.7151349754037094, "grad_norm": 1.1859771491260582, "learning_rate": 1.981398691780602e-07, "loss": 0.202, "step": 41141 }, { "epoch": 0.7151523579412122, "grad_norm": 1.7643836467228386, "learning_rate": 1.9811742911102285e-07, "loss": 0.3669, "step": 41142 }, { "epoch": 0.7151697404787151, "grad_norm": 1.1140573545442536, "learning_rate": 1.980949900008143e-07, "loss": 0.1572, "step": 41143 }, { "epoch": 0.7151871230162179, "grad_norm": 1.3121383016888668, "learning_rate": 1.980725518475061e-07, "loss": 0.1736, "step": 41144 }, { "epoch": 0.7152045055537207, "grad_norm": 0.8770507679908748, "learning_rate": 1.9805011465116917e-07, "loss": 0.1801, "step": 41145 }, { "epoch": 0.7152218880912236, "grad_norm": 1.0876118958924432, "learning_rate": 1.9802767841187472e-07, "loss": 0.1923, "step": 41146 }, { "epoch": 0.7152392706287264, "grad_norm": 1.311459667882149, "learning_rate": 1.9800524312969362e-07, "loss": 0.1757, "step": 41147 }, { "epoch": 0.7152566531662292, "grad_norm": 1.3300197130744749, "learning_rate": 1.9798280880469752e-07, "loss": 0.3025, "step": 41148 }, { "epoch": 0.715274035703732, "grad_norm": 1.3450423223271117, "learning_rate": 1.9796037543695705e-07, "loss": 0.1822, "step": 41149 }, { "epoch": 0.7152914182412349, "grad_norm": 2.2913982963057973, "learning_rate": 1.9793794302654338e-07, "loss": 0.2022, "step": 41150 }, { "epoch": 0.7153088007787377, "grad_norm": 2.548713646895222, "learning_rate": 1.9791551157352753e-07, "loss": 0.2089, "step": 41151 }, { "epoch": 0.7153261833162405, "grad_norm": 2.0490929872850576, "learning_rate": 1.978930810779809e-07, "loss": 0.2397, "step": 41152 }, { "epoch": 0.7153435658537434, "grad_norm": 1.8987752765953834, "learning_rate": 1.9787065153997444e-07, "loss": 0.1723, "step": 41153 }, { "epoch": 0.7153609483912462, "grad_norm": 1.3399574212036656, "learning_rate": 1.9784822295957927e-07, "loss": 0.1483, "step": 41154 }, { "epoch": 0.715378330928749, "grad_norm": 1.610298192875117, "learning_rate": 1.978257953368664e-07, "loss": 0.1784, "step": 41155 }, { "epoch": 0.7153957134662517, "grad_norm": 1.5069266949709335, "learning_rate": 1.9780336867190695e-07, "loss": 0.3164, "step": 41156 }, { "epoch": 0.7154130960037546, "grad_norm": 1.2171196343689719, "learning_rate": 1.97780942964772e-07, "loss": 0.1784, "step": 41157 }, { "epoch": 0.7154304785412574, "grad_norm": 1.0493450395782344, "learning_rate": 1.9775851821553263e-07, "loss": 0.4055, "step": 41158 }, { "epoch": 0.7154478610787602, "grad_norm": 1.585531013138721, "learning_rate": 1.9773609442425976e-07, "loss": 0.1811, "step": 41159 }, { "epoch": 0.7154652436162631, "grad_norm": 1.2743773222280554, "learning_rate": 1.977136715910248e-07, "loss": 0.166, "step": 41160 }, { "epoch": 0.7154826261537659, "grad_norm": 1.2742476417015547, "learning_rate": 1.976912497158988e-07, "loss": 0.2994, "step": 41161 }, { "epoch": 0.7155000086912687, "grad_norm": 2.110478620711225, "learning_rate": 1.976688287989523e-07, "loss": 0.2191, "step": 41162 }, { "epoch": 0.7155173912287716, "grad_norm": 2.1713784167383716, "learning_rate": 1.9764640884025697e-07, "loss": 0.1821, "step": 41163 }, { "epoch": 0.7155347737662744, "grad_norm": 2.166682420885461, "learning_rate": 1.976239898398836e-07, "loss": 0.2643, "step": 41164 }, { "epoch": 0.7155521563037772, "grad_norm": 1.0571651588674635, "learning_rate": 1.9760157179790327e-07, "loss": 0.193, "step": 41165 }, { "epoch": 0.71556953884128, "grad_norm": 1.6330001817935336, "learning_rate": 1.9757915471438685e-07, "loss": 0.143, "step": 41166 }, { "epoch": 0.7155869213787829, "grad_norm": 1.084626381267688, "learning_rate": 1.9755673858940597e-07, "loss": 0.3113, "step": 41167 }, { "epoch": 0.7156043039162857, "grad_norm": 1.7050822124287581, "learning_rate": 1.975343234230311e-07, "loss": 0.2973, "step": 41168 }, { "epoch": 0.7156216864537885, "grad_norm": 1.6907673937957344, "learning_rate": 1.975119092153335e-07, "loss": 0.2745, "step": 41169 }, { "epoch": 0.7156390689912914, "grad_norm": 2.048616702460686, "learning_rate": 1.9748949596638404e-07, "loss": 0.2109, "step": 41170 }, { "epoch": 0.7156564515287942, "grad_norm": 1.2345450120678474, "learning_rate": 1.974670836762541e-07, "loss": 0.2009, "step": 41171 }, { "epoch": 0.715673834066297, "grad_norm": 1.7146000943880697, "learning_rate": 1.974446723450145e-07, "loss": 0.3723, "step": 41172 }, { "epoch": 0.7156912166037999, "grad_norm": 1.3233771073771037, "learning_rate": 1.9742226197273632e-07, "loss": 0.1952, "step": 41173 }, { "epoch": 0.7157085991413027, "grad_norm": 1.3414780901300056, "learning_rate": 1.9739985255949054e-07, "loss": 0.2409, "step": 41174 }, { "epoch": 0.7157259816788055, "grad_norm": 2.157169923395789, "learning_rate": 1.973774441053483e-07, "loss": 0.2491, "step": 41175 }, { "epoch": 0.7157433642163082, "grad_norm": 1.4658244006594452, "learning_rate": 1.973550366103805e-07, "loss": 0.2023, "step": 41176 }, { "epoch": 0.7157607467538111, "grad_norm": 3.4619011220170455, "learning_rate": 1.9733263007465822e-07, "loss": 0.2937, "step": 41177 }, { "epoch": 0.7157781292913139, "grad_norm": 1.1376239093690113, "learning_rate": 1.9731022449825223e-07, "loss": 0.22, "step": 41178 }, { "epoch": 0.7157955118288167, "grad_norm": 1.7702329279374494, "learning_rate": 1.9728781988123406e-07, "loss": 0.2331, "step": 41179 }, { "epoch": 0.7158128943663196, "grad_norm": 0.9985138059390021, "learning_rate": 1.972654162236746e-07, "loss": 0.3, "step": 41180 }, { "epoch": 0.7158302769038224, "grad_norm": 0.9715473991607657, "learning_rate": 1.9724301352564433e-07, "loss": 0.1737, "step": 41181 }, { "epoch": 0.7158476594413252, "grad_norm": 1.1173296566428201, "learning_rate": 1.9722061178721477e-07, "loss": 0.1294, "step": 41182 }, { "epoch": 0.715865041978828, "grad_norm": 1.9041509380033466, "learning_rate": 1.9719821100845684e-07, "loss": 0.2009, "step": 41183 }, { "epoch": 0.7158824245163309, "grad_norm": 1.122553199922448, "learning_rate": 1.9717581118944148e-07, "loss": 0.1876, "step": 41184 }, { "epoch": 0.7158998070538337, "grad_norm": 1.5989150568797954, "learning_rate": 1.971534123302397e-07, "loss": 0.2087, "step": 41185 }, { "epoch": 0.7159171895913365, "grad_norm": 1.116245880803447, "learning_rate": 1.9713101443092244e-07, "loss": 0.1571, "step": 41186 }, { "epoch": 0.7159345721288394, "grad_norm": 1.4095080390997152, "learning_rate": 1.9710861749156076e-07, "loss": 0.2655, "step": 41187 }, { "epoch": 0.7159519546663422, "grad_norm": 1.4835814022458211, "learning_rate": 1.9708622151222565e-07, "loss": 0.2227, "step": 41188 }, { "epoch": 0.715969337203845, "grad_norm": 1.0585432566625372, "learning_rate": 1.9706382649298787e-07, "loss": 0.2483, "step": 41189 }, { "epoch": 0.7159867197413479, "grad_norm": 1.1065896935449215, "learning_rate": 1.970414324339188e-07, "loss": 0.198, "step": 41190 }, { "epoch": 0.7160041022788507, "grad_norm": 1.5266523380836277, "learning_rate": 1.9701903933508923e-07, "loss": 0.2696, "step": 41191 }, { "epoch": 0.7160214848163535, "grad_norm": 2.0176247459027548, "learning_rate": 1.9699664719657026e-07, "loss": 0.2906, "step": 41192 }, { "epoch": 0.7160388673538564, "grad_norm": 1.1551562943213736, "learning_rate": 1.9697425601843242e-07, "loss": 0.1295, "step": 41193 }, { "epoch": 0.7160562498913592, "grad_norm": 2.40636024131, "learning_rate": 1.9695186580074714e-07, "loss": 0.2279, "step": 41194 }, { "epoch": 0.716073632428862, "grad_norm": 1.6307181823025318, "learning_rate": 1.969294765435852e-07, "loss": 0.3522, "step": 41195 }, { "epoch": 0.7160910149663647, "grad_norm": 1.2457722038127332, "learning_rate": 1.9690708824701768e-07, "loss": 0.1033, "step": 41196 }, { "epoch": 0.7161083975038676, "grad_norm": 1.7420375302953925, "learning_rate": 1.9688470091111525e-07, "loss": 0.2298, "step": 41197 }, { "epoch": 0.7161257800413704, "grad_norm": 1.16844482743551, "learning_rate": 1.9686231453594925e-07, "loss": 0.4612, "step": 41198 }, { "epoch": 0.7161431625788732, "grad_norm": 1.0170285408966164, "learning_rate": 1.9683992912159064e-07, "loss": 0.243, "step": 41199 }, { "epoch": 0.7161605451163761, "grad_norm": 1.3507982890458272, "learning_rate": 1.9681754466811002e-07, "loss": 0.1419, "step": 41200 }, { "epoch": 0.7161779276538789, "grad_norm": 1.3559849253933687, "learning_rate": 1.9679516117557832e-07, "loss": 0.2363, "step": 41201 }, { "epoch": 0.7161953101913817, "grad_norm": 1.613205840548303, "learning_rate": 1.9677277864406689e-07, "loss": 0.2357, "step": 41202 }, { "epoch": 0.7162126927288845, "grad_norm": 2.019992300205419, "learning_rate": 1.9675039707364644e-07, "loss": 0.2988, "step": 41203 }, { "epoch": 0.7162300752663874, "grad_norm": 1.0882662825906535, "learning_rate": 1.9672801646438792e-07, "loss": 0.1628, "step": 41204 }, { "epoch": 0.7162474578038902, "grad_norm": 1.5148156960986459, "learning_rate": 1.9670563681636227e-07, "loss": 0.2171, "step": 41205 }, { "epoch": 0.716264840341393, "grad_norm": 0.8961228851691855, "learning_rate": 1.9668325812964048e-07, "loss": 0.2429, "step": 41206 }, { "epoch": 0.7162822228788959, "grad_norm": 1.7517260906560501, "learning_rate": 1.9666088040429336e-07, "loss": 0.141, "step": 41207 }, { "epoch": 0.7162996054163987, "grad_norm": 1.863988068710389, "learning_rate": 1.9663850364039197e-07, "loss": 0.1424, "step": 41208 }, { "epoch": 0.7163169879539015, "grad_norm": 1.6788561356200757, "learning_rate": 1.9661612783800697e-07, "loss": 0.2365, "step": 41209 }, { "epoch": 0.7163343704914044, "grad_norm": 0.9726783663132014, "learning_rate": 1.9659375299720966e-07, "loss": 0.1757, "step": 41210 }, { "epoch": 0.7163517530289072, "grad_norm": 1.9036786323300035, "learning_rate": 1.9657137911807093e-07, "loss": 0.2262, "step": 41211 }, { "epoch": 0.71636913556641, "grad_norm": 1.1506270856224923, "learning_rate": 1.9654900620066122e-07, "loss": 0.2418, "step": 41212 }, { "epoch": 0.7163865181039129, "grad_norm": 1.0673926136952276, "learning_rate": 1.965266342450519e-07, "loss": 0.2428, "step": 41213 }, { "epoch": 0.7164039006414157, "grad_norm": 2.138070054397331, "learning_rate": 1.965042632513138e-07, "loss": 0.227, "step": 41214 }, { "epoch": 0.7164212831789185, "grad_norm": 1.0201027520358523, "learning_rate": 1.9648189321951775e-07, "loss": 0.2485, "step": 41215 }, { "epoch": 0.7164386657164212, "grad_norm": 1.897699557590845, "learning_rate": 1.9645952414973467e-07, "loss": 0.1482, "step": 41216 }, { "epoch": 0.7164560482539241, "grad_norm": 1.4043752496906972, "learning_rate": 1.9643715604203548e-07, "loss": 0.1894, "step": 41217 }, { "epoch": 0.7164734307914269, "grad_norm": 1.5627994989713336, "learning_rate": 1.9641478889649104e-07, "loss": 0.1565, "step": 41218 }, { "epoch": 0.7164908133289297, "grad_norm": 1.7373890842971056, "learning_rate": 1.9639242271317225e-07, "loss": 0.2479, "step": 41219 }, { "epoch": 0.7165081958664326, "grad_norm": 4.434539824557519, "learning_rate": 1.9637005749214986e-07, "loss": 0.3021, "step": 41220 }, { "epoch": 0.7165255784039354, "grad_norm": 1.4487152133489247, "learning_rate": 1.963476932334951e-07, "loss": 0.2684, "step": 41221 }, { "epoch": 0.7165429609414382, "grad_norm": 1.663170576398088, "learning_rate": 1.9632532993727863e-07, "loss": 0.3345, "step": 41222 }, { "epoch": 0.716560343478941, "grad_norm": 1.2341083881207495, "learning_rate": 1.963029676035714e-07, "loss": 0.1938, "step": 41223 }, { "epoch": 0.7165777260164439, "grad_norm": 2.071071834711228, "learning_rate": 1.9628060623244425e-07, "loss": 0.186, "step": 41224 }, { "epoch": 0.7165951085539467, "grad_norm": 2.0960454970648352, "learning_rate": 1.9625824582396805e-07, "loss": 0.2052, "step": 41225 }, { "epoch": 0.7166124910914495, "grad_norm": 1.6473504429228467, "learning_rate": 1.962358863782137e-07, "loss": 0.2807, "step": 41226 }, { "epoch": 0.7166298736289524, "grad_norm": 1.4049600331592802, "learning_rate": 1.9621352789525204e-07, "loss": 0.2181, "step": 41227 }, { "epoch": 0.7166472561664552, "grad_norm": 1.6432064466224874, "learning_rate": 1.9619117037515375e-07, "loss": 0.1878, "step": 41228 }, { "epoch": 0.716664638703958, "grad_norm": 1.0415914551373537, "learning_rate": 1.9616881381799012e-07, "loss": 0.1729, "step": 41229 }, { "epoch": 0.7166820212414609, "grad_norm": 2.2567850294762186, "learning_rate": 1.961464582238319e-07, "loss": 0.2568, "step": 41230 }, { "epoch": 0.7166994037789637, "grad_norm": 2.003340533244079, "learning_rate": 1.9612410359274973e-07, "loss": 0.3386, "step": 41231 }, { "epoch": 0.7167167863164665, "grad_norm": 0.9808219784369853, "learning_rate": 1.9610174992481432e-07, "loss": 0.3599, "step": 41232 }, { "epoch": 0.7167341688539693, "grad_norm": 1.0728410066611214, "learning_rate": 1.96079397220097e-07, "loss": 0.1561, "step": 41233 }, { "epoch": 0.7167515513914722, "grad_norm": 1.080570493104749, "learning_rate": 1.960570454786683e-07, "loss": 0.2322, "step": 41234 }, { "epoch": 0.716768933928975, "grad_norm": 1.0719036449073378, "learning_rate": 1.9603469470059918e-07, "loss": 0.2095, "step": 41235 }, { "epoch": 0.7167863164664777, "grad_norm": 1.4725196869649617, "learning_rate": 1.9601234488596046e-07, "loss": 0.1891, "step": 41236 }, { "epoch": 0.7168036990039806, "grad_norm": 1.6931984098320993, "learning_rate": 1.9598999603482296e-07, "loss": 0.3415, "step": 41237 }, { "epoch": 0.7168210815414834, "grad_norm": 11.697019807440412, "learning_rate": 1.9596764814725752e-07, "loss": 0.2024, "step": 41238 }, { "epoch": 0.7168384640789862, "grad_norm": 1.1706866693628155, "learning_rate": 1.9594530122333496e-07, "loss": 0.2022, "step": 41239 }, { "epoch": 0.716855846616489, "grad_norm": 0.8847143390743255, "learning_rate": 1.9592295526312596e-07, "loss": 0.1671, "step": 41240 }, { "epoch": 0.7168732291539919, "grad_norm": 1.158417541189008, "learning_rate": 1.9590061026670168e-07, "loss": 0.3311, "step": 41241 }, { "epoch": 0.7168906116914947, "grad_norm": 1.9610993238283716, "learning_rate": 1.9587826623413279e-07, "loss": 0.2345, "step": 41242 }, { "epoch": 0.7169079942289975, "grad_norm": 1.9216353084468685, "learning_rate": 1.958559231654901e-07, "loss": 0.2077, "step": 41243 }, { "epoch": 0.7169253767665004, "grad_norm": 0.9832292491400004, "learning_rate": 1.958335810608444e-07, "loss": 0.1876, "step": 41244 }, { "epoch": 0.7169427593040032, "grad_norm": 1.313905603541624, "learning_rate": 1.9581123992026654e-07, "loss": 0.2107, "step": 41245 }, { "epoch": 0.716960141841506, "grad_norm": 2.828166908314076, "learning_rate": 1.957888997438273e-07, "loss": 0.2559, "step": 41246 }, { "epoch": 0.7169775243790089, "grad_norm": 2.222848640289435, "learning_rate": 1.957665605315975e-07, "loss": 0.2136, "step": 41247 }, { "epoch": 0.7169949069165117, "grad_norm": 1.347087049856501, "learning_rate": 1.957442222836478e-07, "loss": 0.1673, "step": 41248 }, { "epoch": 0.7170122894540145, "grad_norm": 1.0184334300145423, "learning_rate": 1.957218850000495e-07, "loss": 0.272, "step": 41249 }, { "epoch": 0.7170296719915173, "grad_norm": 1.2718464859045744, "learning_rate": 1.9569954868087286e-07, "loss": 0.1681, "step": 41250 }, { "epoch": 0.7170470545290202, "grad_norm": 1.9166327769557447, "learning_rate": 1.956772133261887e-07, "loss": 0.2024, "step": 41251 }, { "epoch": 0.717064437066523, "grad_norm": 1.6750281003242722, "learning_rate": 1.9565487893606812e-07, "loss": 0.2773, "step": 41252 }, { "epoch": 0.7170818196040258, "grad_norm": 1.2689985846341645, "learning_rate": 1.9563254551058183e-07, "loss": 0.192, "step": 41253 }, { "epoch": 0.7170992021415287, "grad_norm": 1.3608846615177934, "learning_rate": 1.9561021304980053e-07, "loss": 0.2681, "step": 41254 }, { "epoch": 0.7171165846790315, "grad_norm": 1.3719283741002364, "learning_rate": 1.9558788155379502e-07, "loss": 0.1965, "step": 41255 }, { "epoch": 0.7171339672165342, "grad_norm": 1.418628785827507, "learning_rate": 1.9556555102263611e-07, "loss": 0.1342, "step": 41256 }, { "epoch": 0.717151349754037, "grad_norm": 1.17154547106365, "learning_rate": 1.955432214563945e-07, "loss": 0.1155, "step": 41257 }, { "epoch": 0.7171687322915399, "grad_norm": 1.6735158873849754, "learning_rate": 1.9552089285514112e-07, "loss": 0.211, "step": 41258 }, { "epoch": 0.7171861148290427, "grad_norm": 2.0322105969935786, "learning_rate": 1.9549856521894638e-07, "loss": 0.2308, "step": 41259 }, { "epoch": 0.7172034973665455, "grad_norm": 1.7085815693049156, "learning_rate": 1.9547623854788154e-07, "loss": 0.2214, "step": 41260 }, { "epoch": 0.7172208799040484, "grad_norm": 1.470115678441438, "learning_rate": 1.9545391284201713e-07, "loss": 0.139, "step": 41261 }, { "epoch": 0.7172382624415512, "grad_norm": 2.182999282568251, "learning_rate": 1.954315881014239e-07, "loss": 0.3589, "step": 41262 }, { "epoch": 0.717255644979054, "grad_norm": 2.1609103513999592, "learning_rate": 1.9540926432617262e-07, "loss": 0.202, "step": 41263 }, { "epoch": 0.7172730275165569, "grad_norm": 1.0863902239400158, "learning_rate": 1.9538694151633405e-07, "loss": 0.1729, "step": 41264 }, { "epoch": 0.7172904100540597, "grad_norm": 1.5712216078609151, "learning_rate": 1.9536461967197897e-07, "loss": 0.283, "step": 41265 }, { "epoch": 0.7173077925915625, "grad_norm": 2.186726072893963, "learning_rate": 1.9534229879317814e-07, "loss": 0.3948, "step": 41266 }, { "epoch": 0.7173251751290654, "grad_norm": 1.089381150963227, "learning_rate": 1.9531997888000207e-07, "loss": 0.2564, "step": 41267 }, { "epoch": 0.7173425576665682, "grad_norm": 0.9466896219151167, "learning_rate": 1.95297659932522e-07, "loss": 0.1541, "step": 41268 }, { "epoch": 0.717359940204071, "grad_norm": 1.346217030801474, "learning_rate": 1.952753419508083e-07, "loss": 0.1946, "step": 41269 }, { "epoch": 0.7173773227415738, "grad_norm": 1.1209696137949148, "learning_rate": 1.9525302493493156e-07, "loss": 0.1234, "step": 41270 }, { "epoch": 0.7173947052790767, "grad_norm": 1.4975480762703202, "learning_rate": 1.952307088849629e-07, "loss": 0.2424, "step": 41271 }, { "epoch": 0.7174120878165795, "grad_norm": 1.1075122109053919, "learning_rate": 1.9520839380097292e-07, "loss": 0.2296, "step": 41272 }, { "epoch": 0.7174294703540823, "grad_norm": 1.1939923813925528, "learning_rate": 1.9518607968303237e-07, "loss": 0.1545, "step": 41273 }, { "epoch": 0.7174468528915852, "grad_norm": 1.6422681856606087, "learning_rate": 1.9516376653121185e-07, "loss": 0.2446, "step": 41274 }, { "epoch": 0.717464235429088, "grad_norm": 3.650277475621919, "learning_rate": 1.951414543455822e-07, "loss": 0.2449, "step": 41275 }, { "epoch": 0.7174816179665907, "grad_norm": 1.021814446232394, "learning_rate": 1.9511914312621408e-07, "loss": 0.1462, "step": 41276 }, { "epoch": 0.7174990005040935, "grad_norm": 1.1793669958196213, "learning_rate": 1.9509683287317825e-07, "loss": 0.2168, "step": 41277 }, { "epoch": 0.7175163830415964, "grad_norm": 1.2841555690378295, "learning_rate": 1.950745235865452e-07, "loss": 0.1898, "step": 41278 }, { "epoch": 0.7175337655790992, "grad_norm": 2.933405416295299, "learning_rate": 1.95052215266386e-07, "loss": 0.3759, "step": 41279 }, { "epoch": 0.717551148116602, "grad_norm": 2.578828265685034, "learning_rate": 1.9502990791277125e-07, "loss": 0.2349, "step": 41280 }, { "epoch": 0.7175685306541049, "grad_norm": 2.5312782865112613, "learning_rate": 1.9500760152577172e-07, "loss": 0.2817, "step": 41281 }, { "epoch": 0.7175859131916077, "grad_norm": 1.8306354511358076, "learning_rate": 1.9498529610545768e-07, "loss": 0.1599, "step": 41282 }, { "epoch": 0.7176032957291105, "grad_norm": 1.501932373929691, "learning_rate": 1.949629916519003e-07, "loss": 0.2627, "step": 41283 }, { "epoch": 0.7176206782666134, "grad_norm": 1.7419735034768966, "learning_rate": 1.9494068816517005e-07, "loss": 0.2197, "step": 41284 }, { "epoch": 0.7176380608041162, "grad_norm": 1.3890657590157784, "learning_rate": 1.949183856453378e-07, "loss": 0.2314, "step": 41285 }, { "epoch": 0.717655443341619, "grad_norm": 1.513762124667306, "learning_rate": 1.9489608409247388e-07, "loss": 0.2024, "step": 41286 }, { "epoch": 0.7176728258791218, "grad_norm": 1.3500073423079952, "learning_rate": 1.9487378350664956e-07, "loss": 0.3389, "step": 41287 }, { "epoch": 0.7176902084166247, "grad_norm": 2.8424131788818374, "learning_rate": 1.9485148388793504e-07, "loss": 0.3202, "step": 41288 }, { "epoch": 0.7177075909541275, "grad_norm": 2.8402677357952184, "learning_rate": 1.9482918523640114e-07, "loss": 0.2024, "step": 41289 }, { "epoch": 0.7177249734916303, "grad_norm": 1.8530318873091243, "learning_rate": 1.9480688755211838e-07, "loss": 0.2307, "step": 41290 }, { "epoch": 0.7177423560291332, "grad_norm": 0.9650256062683408, "learning_rate": 1.9478459083515776e-07, "loss": 0.1988, "step": 41291 }, { "epoch": 0.717759738566636, "grad_norm": 0.9437512664865177, "learning_rate": 1.9476229508558973e-07, "loss": 0.2897, "step": 41292 }, { "epoch": 0.7177771211041388, "grad_norm": 1.519773785091679, "learning_rate": 1.9474000030348508e-07, "loss": 0.3123, "step": 41293 }, { "epoch": 0.7177945036416417, "grad_norm": 1.4532564228299234, "learning_rate": 1.9471770648891433e-07, "loss": 0.2292, "step": 41294 }, { "epoch": 0.7178118861791444, "grad_norm": 1.3437956933904462, "learning_rate": 1.946954136419483e-07, "loss": 0.1943, "step": 41295 }, { "epoch": 0.7178292687166472, "grad_norm": 0.8456474829402357, "learning_rate": 1.9467312176265748e-07, "loss": 0.1533, "step": 41296 }, { "epoch": 0.71784665125415, "grad_norm": 1.563390122348713, "learning_rate": 1.9465083085111266e-07, "loss": 0.1705, "step": 41297 }, { "epoch": 0.7178640337916529, "grad_norm": 1.9323725993978185, "learning_rate": 1.946285409073843e-07, "loss": 0.3338, "step": 41298 }, { "epoch": 0.7178814163291557, "grad_norm": 1.552101074920563, "learning_rate": 1.9460625193154328e-07, "loss": 0.2328, "step": 41299 }, { "epoch": 0.7178987988666585, "grad_norm": 1.2808179485153464, "learning_rate": 1.9458396392366038e-07, "loss": 0.213, "step": 41300 }, { "epoch": 0.7179161814041614, "grad_norm": 1.5127910322930835, "learning_rate": 1.9456167688380565e-07, "loss": 0.1947, "step": 41301 }, { "epoch": 0.7179335639416642, "grad_norm": 2.2819516298953295, "learning_rate": 1.9453939081205028e-07, "loss": 0.2725, "step": 41302 }, { "epoch": 0.717950946479167, "grad_norm": 1.3086424084188026, "learning_rate": 1.9451710570846474e-07, "loss": 0.195, "step": 41303 }, { "epoch": 0.7179683290166698, "grad_norm": 1.5693448071702163, "learning_rate": 1.9449482157311963e-07, "loss": 0.1635, "step": 41304 }, { "epoch": 0.7179857115541727, "grad_norm": 1.7578887878501448, "learning_rate": 1.9447253840608558e-07, "loss": 0.217, "step": 41305 }, { "epoch": 0.7180030940916755, "grad_norm": 3.091740551384588, "learning_rate": 1.9445025620743326e-07, "loss": 0.2593, "step": 41306 }, { "epoch": 0.7180204766291783, "grad_norm": 1.609129763837369, "learning_rate": 1.9442797497723328e-07, "loss": 0.1901, "step": 41307 }, { "epoch": 0.7180378591666812, "grad_norm": 1.546407098953122, "learning_rate": 1.9440569471555623e-07, "loss": 0.3561, "step": 41308 }, { "epoch": 0.718055241704184, "grad_norm": 1.587714625712694, "learning_rate": 1.943834154224726e-07, "loss": 0.1927, "step": 41309 }, { "epoch": 0.7180726242416868, "grad_norm": 1.188505194479452, "learning_rate": 1.943611370980533e-07, "loss": 0.2048, "step": 41310 }, { "epoch": 0.7180900067791897, "grad_norm": 2.832435028993836, "learning_rate": 1.943388597423688e-07, "loss": 0.1606, "step": 41311 }, { "epoch": 0.7181073893166925, "grad_norm": 0.8217710814646955, "learning_rate": 1.9431658335548988e-07, "loss": 0.228, "step": 41312 }, { "epoch": 0.7181247718541953, "grad_norm": 1.0636746404799489, "learning_rate": 1.9429430793748662e-07, "loss": 0.1667, "step": 41313 }, { "epoch": 0.7181421543916982, "grad_norm": 1.6204908216791583, "learning_rate": 1.942720334884302e-07, "loss": 0.2118, "step": 41314 }, { "epoch": 0.7181595369292009, "grad_norm": 1.1472896037369318, "learning_rate": 1.9424976000839093e-07, "loss": 0.2493, "step": 41315 }, { "epoch": 0.7181769194667037, "grad_norm": 1.6833192280711946, "learning_rate": 1.942274874974395e-07, "loss": 0.2314, "step": 41316 }, { "epoch": 0.7181943020042065, "grad_norm": 1.1968192149964192, "learning_rate": 1.9420521595564626e-07, "loss": 0.2319, "step": 41317 }, { "epoch": 0.7182116845417094, "grad_norm": 1.8086282978593906, "learning_rate": 1.9418294538308222e-07, "loss": 0.16, "step": 41318 }, { "epoch": 0.7182290670792122, "grad_norm": 1.8545874476191666, "learning_rate": 1.9416067577981792e-07, "loss": 0.3165, "step": 41319 }, { "epoch": 0.718246449616715, "grad_norm": 1.1398925799077744, "learning_rate": 1.9413840714592367e-07, "loss": 0.332, "step": 41320 }, { "epoch": 0.7182638321542179, "grad_norm": 1.5271130502482935, "learning_rate": 1.9411613948146993e-07, "loss": 0.1935, "step": 41321 }, { "epoch": 0.7182812146917207, "grad_norm": 1.5424020868562522, "learning_rate": 1.9409387278652772e-07, "loss": 0.1868, "step": 41322 }, { "epoch": 0.7182985972292235, "grad_norm": 1.4011896956280985, "learning_rate": 1.9407160706116737e-07, "loss": 0.2202, "step": 41323 }, { "epoch": 0.7183159797667263, "grad_norm": 1.3806812556950807, "learning_rate": 1.9404934230545955e-07, "loss": 0.2411, "step": 41324 }, { "epoch": 0.7183333623042292, "grad_norm": 1.074921494528382, "learning_rate": 1.9402707851947476e-07, "loss": 0.2322, "step": 41325 }, { "epoch": 0.718350744841732, "grad_norm": 1.0992772211935655, "learning_rate": 1.9400481570328358e-07, "loss": 0.1577, "step": 41326 }, { "epoch": 0.7183681273792348, "grad_norm": 0.8127078818913872, "learning_rate": 1.9398255385695656e-07, "loss": 0.151, "step": 41327 }, { "epoch": 0.7183855099167377, "grad_norm": 1.2676294617709536, "learning_rate": 1.9396029298056427e-07, "loss": 0.2132, "step": 41328 }, { "epoch": 0.7184028924542405, "grad_norm": 1.4538291908143688, "learning_rate": 1.939380330741771e-07, "loss": 0.1832, "step": 41329 }, { "epoch": 0.7184202749917433, "grad_norm": 1.2934472259174428, "learning_rate": 1.9391577413786592e-07, "loss": 0.1909, "step": 41330 }, { "epoch": 0.7184376575292462, "grad_norm": 0.9962200639708273, "learning_rate": 1.9389351617170135e-07, "loss": 0.185, "step": 41331 }, { "epoch": 0.718455040066749, "grad_norm": 1.1308590765285655, "learning_rate": 1.9387125917575332e-07, "loss": 0.2696, "step": 41332 }, { "epoch": 0.7184724226042518, "grad_norm": 1.6150855044503232, "learning_rate": 1.93849003150093e-07, "loss": 0.2263, "step": 41333 }, { "epoch": 0.7184898051417546, "grad_norm": 1.6139268944462415, "learning_rate": 1.9382674809479065e-07, "loss": 0.2061, "step": 41334 }, { "epoch": 0.7185071876792574, "grad_norm": 1.3420462031497478, "learning_rate": 1.9380449400991695e-07, "loss": 0.2107, "step": 41335 }, { "epoch": 0.7185245702167602, "grad_norm": 0.963546603142441, "learning_rate": 1.937822408955423e-07, "loss": 0.1822, "step": 41336 }, { "epoch": 0.718541952754263, "grad_norm": 1.2594173339684274, "learning_rate": 1.9375998875173726e-07, "loss": 0.1796, "step": 41337 }, { "epoch": 0.7185593352917659, "grad_norm": 1.145584863488799, "learning_rate": 1.9373773757857238e-07, "loss": 0.1413, "step": 41338 }, { "epoch": 0.7185767178292687, "grad_norm": 1.2717994420959546, "learning_rate": 1.937154873761182e-07, "loss": 0.2154, "step": 41339 }, { "epoch": 0.7185941003667715, "grad_norm": 2.142332441625527, "learning_rate": 1.9369323814444511e-07, "loss": 0.2927, "step": 41340 }, { "epoch": 0.7186114829042743, "grad_norm": 2.0674280415965005, "learning_rate": 1.9367098988362384e-07, "loss": 0.2424, "step": 41341 }, { "epoch": 0.7186288654417772, "grad_norm": 2.1841070017997914, "learning_rate": 1.936487425937249e-07, "loss": 0.1995, "step": 41342 }, { "epoch": 0.71864624797928, "grad_norm": 1.9306642524480495, "learning_rate": 1.9362649627481864e-07, "loss": 0.2259, "step": 41343 }, { "epoch": 0.7186636305167828, "grad_norm": 1.2471884893730647, "learning_rate": 1.9360425092697568e-07, "loss": 0.1885, "step": 41344 }, { "epoch": 0.7186810130542857, "grad_norm": 1.2548687355295731, "learning_rate": 1.9358200655026647e-07, "loss": 0.2346, "step": 41345 }, { "epoch": 0.7186983955917885, "grad_norm": 1.8337266475351115, "learning_rate": 1.935597631447616e-07, "loss": 0.1511, "step": 41346 }, { "epoch": 0.7187157781292913, "grad_norm": 0.8805734009418956, "learning_rate": 1.9353752071053147e-07, "loss": 0.2235, "step": 41347 }, { "epoch": 0.7187331606667942, "grad_norm": 2.277403167958241, "learning_rate": 1.935152792476465e-07, "loss": 0.3264, "step": 41348 }, { "epoch": 0.718750543204297, "grad_norm": 1.6559327485014221, "learning_rate": 1.934930387561774e-07, "loss": 0.3562, "step": 41349 }, { "epoch": 0.7187679257417998, "grad_norm": 1.0695371889795724, "learning_rate": 1.934707992361948e-07, "loss": 0.343, "step": 41350 }, { "epoch": 0.7187853082793026, "grad_norm": 1.0682982436099449, "learning_rate": 1.9344856068776878e-07, "loss": 0.186, "step": 41351 }, { "epoch": 0.7188026908168055, "grad_norm": 1.6544758094424108, "learning_rate": 1.934263231109698e-07, "loss": 0.1577, "step": 41352 }, { "epoch": 0.7188200733543083, "grad_norm": 1.073182125171002, "learning_rate": 1.9340408650586875e-07, "loss": 0.1172, "step": 41353 }, { "epoch": 0.7188374558918111, "grad_norm": 1.0617622808085119, "learning_rate": 1.9338185087253588e-07, "loss": 0.1235, "step": 41354 }, { "epoch": 0.7188548384293139, "grad_norm": 0.9951464757296112, "learning_rate": 1.9335961621104174e-07, "loss": 0.1848, "step": 41355 }, { "epoch": 0.7188722209668167, "grad_norm": 1.6774268251283413, "learning_rate": 1.9333738252145676e-07, "loss": 0.2147, "step": 41356 }, { "epoch": 0.7188896035043195, "grad_norm": 1.8335949419953148, "learning_rate": 1.9331514980385139e-07, "loss": 0.3285, "step": 41357 }, { "epoch": 0.7189069860418223, "grad_norm": 1.2847649743744132, "learning_rate": 1.9329291805829607e-07, "loss": 0.2101, "step": 41358 }, { "epoch": 0.7189243685793252, "grad_norm": 1.281715253352104, "learning_rate": 1.932706872848612e-07, "loss": 0.1735, "step": 41359 }, { "epoch": 0.718941751116828, "grad_norm": 1.4323131195637502, "learning_rate": 1.9324845748361746e-07, "loss": 0.1927, "step": 41360 }, { "epoch": 0.7189591336543308, "grad_norm": 2.0735157752324245, "learning_rate": 1.9322622865463523e-07, "loss": 0.1431, "step": 41361 }, { "epoch": 0.7189765161918337, "grad_norm": 2.046405265998523, "learning_rate": 1.9320400079798494e-07, "loss": 0.1583, "step": 41362 }, { "epoch": 0.7189938987293365, "grad_norm": 1.0885510091292505, "learning_rate": 1.9318177391373707e-07, "loss": 0.4481, "step": 41363 }, { "epoch": 0.7190112812668393, "grad_norm": 1.902718360815099, "learning_rate": 1.9315954800196199e-07, "loss": 0.161, "step": 41364 }, { "epoch": 0.7190286638043422, "grad_norm": 1.3009772574541687, "learning_rate": 1.9313732306273023e-07, "loss": 0.1869, "step": 41365 }, { "epoch": 0.719046046341845, "grad_norm": 1.7517399529711386, "learning_rate": 1.9311509909611217e-07, "loss": 0.1913, "step": 41366 }, { "epoch": 0.7190634288793478, "grad_norm": 1.434933247649376, "learning_rate": 1.9309287610217806e-07, "loss": 0.1572, "step": 41367 }, { "epoch": 0.7190808114168507, "grad_norm": 1.0930932563259796, "learning_rate": 1.930706540809988e-07, "loss": 0.1597, "step": 41368 }, { "epoch": 0.7190981939543535, "grad_norm": 1.427749093326213, "learning_rate": 1.9304843303264473e-07, "loss": 0.1133, "step": 41369 }, { "epoch": 0.7191155764918563, "grad_norm": 1.9601137662555599, "learning_rate": 1.930262129571859e-07, "loss": 0.3322, "step": 41370 }, { "epoch": 0.7191329590293591, "grad_norm": 1.4369272183781912, "learning_rate": 1.9300399385469285e-07, "loss": 0.1856, "step": 41371 }, { "epoch": 0.719150341566862, "grad_norm": 2.5570119918999694, "learning_rate": 1.9298177572523628e-07, "loss": 0.2782, "step": 41372 }, { "epoch": 0.7191677241043648, "grad_norm": 1.9914043818048293, "learning_rate": 1.929595585688864e-07, "loss": 0.2082, "step": 41373 }, { "epoch": 0.7191851066418676, "grad_norm": 1.5719608090817718, "learning_rate": 1.9293734238571374e-07, "loss": 0.2251, "step": 41374 }, { "epoch": 0.7192024891793704, "grad_norm": 2.0330600666383725, "learning_rate": 1.9291512717578863e-07, "loss": 0.2224, "step": 41375 }, { "epoch": 0.7192198717168732, "grad_norm": 1.8189598799685698, "learning_rate": 1.9289291293918148e-07, "loss": 0.1917, "step": 41376 }, { "epoch": 0.719237254254376, "grad_norm": 1.4322932783012763, "learning_rate": 1.9287069967596276e-07, "loss": 0.2283, "step": 41377 }, { "epoch": 0.7192546367918788, "grad_norm": 2.198135019883274, "learning_rate": 1.928484873862028e-07, "loss": 0.2352, "step": 41378 }, { "epoch": 0.7192720193293817, "grad_norm": 1.5886213008264345, "learning_rate": 1.9282627606997186e-07, "loss": 0.2664, "step": 41379 }, { "epoch": 0.7192894018668845, "grad_norm": 1.6457094566592145, "learning_rate": 1.9280406572734077e-07, "loss": 0.2649, "step": 41380 }, { "epoch": 0.7193067844043873, "grad_norm": 2.6353488338243363, "learning_rate": 1.9278185635837958e-07, "loss": 0.1879, "step": 41381 }, { "epoch": 0.7193241669418902, "grad_norm": 1.9077768566158282, "learning_rate": 1.9275964796315886e-07, "loss": 0.2298, "step": 41382 }, { "epoch": 0.719341549479393, "grad_norm": 1.1119600371880327, "learning_rate": 1.9273744054174885e-07, "loss": 0.1208, "step": 41383 }, { "epoch": 0.7193589320168958, "grad_norm": 1.5333441333676578, "learning_rate": 1.9271523409422007e-07, "loss": 0.2726, "step": 41384 }, { "epoch": 0.7193763145543987, "grad_norm": 1.678631602650439, "learning_rate": 1.9269302862064278e-07, "loss": 0.1992, "step": 41385 }, { "epoch": 0.7193936970919015, "grad_norm": 0.9006101225296989, "learning_rate": 1.9267082412108748e-07, "loss": 0.1968, "step": 41386 }, { "epoch": 0.7194110796294043, "grad_norm": 1.7217086519936577, "learning_rate": 1.9264862059562432e-07, "loss": 0.4863, "step": 41387 }, { "epoch": 0.7194284621669071, "grad_norm": 1.9124314703704823, "learning_rate": 1.9262641804432418e-07, "loss": 0.2281, "step": 41388 }, { "epoch": 0.71944584470441, "grad_norm": 1.300218022769986, "learning_rate": 1.9260421646725695e-07, "loss": 0.1871, "step": 41389 }, { "epoch": 0.7194632272419128, "grad_norm": 1.0307810186439057, "learning_rate": 1.9258201586449292e-07, "loss": 0.1092, "step": 41390 }, { "epoch": 0.7194806097794156, "grad_norm": 0.9223024352509333, "learning_rate": 1.925598162361029e-07, "loss": 0.2146, "step": 41391 }, { "epoch": 0.7194979923169185, "grad_norm": 1.450296491424992, "learning_rate": 1.9253761758215697e-07, "loss": 0.2568, "step": 41392 }, { "epoch": 0.7195153748544213, "grad_norm": 1.5133369142862685, "learning_rate": 1.9251541990272564e-07, "loss": 0.226, "step": 41393 }, { "epoch": 0.7195327573919241, "grad_norm": 1.09341775888489, "learning_rate": 1.9249322319787914e-07, "loss": 0.1743, "step": 41394 }, { "epoch": 0.7195501399294268, "grad_norm": 1.4481877637659175, "learning_rate": 1.924710274676879e-07, "loss": 0.1822, "step": 41395 }, { "epoch": 0.7195675224669297, "grad_norm": 1.5389556552099104, "learning_rate": 1.9244883271222218e-07, "loss": 0.4377, "step": 41396 }, { "epoch": 0.7195849050044325, "grad_norm": 1.1269176281256434, "learning_rate": 1.9242663893155243e-07, "loss": 0.2298, "step": 41397 }, { "epoch": 0.7196022875419353, "grad_norm": 1.8118036154489745, "learning_rate": 1.9240444612574875e-07, "loss": 0.2954, "step": 41398 }, { "epoch": 0.7196196700794382, "grad_norm": 1.1411537450374531, "learning_rate": 1.9238225429488187e-07, "loss": 0.226, "step": 41399 }, { "epoch": 0.719637052616941, "grad_norm": 2.158048911273351, "learning_rate": 1.923600634390219e-07, "loss": 0.1891, "step": 41400 }, { "epoch": 0.7196544351544438, "grad_norm": 2.281960159911207, "learning_rate": 1.923378735582394e-07, "loss": 0.1971, "step": 41401 }, { "epoch": 0.7196718176919467, "grad_norm": 2.1180747440722554, "learning_rate": 1.9231568465260416e-07, "loss": 0.2551, "step": 41402 }, { "epoch": 0.7196892002294495, "grad_norm": 1.143619883810631, "learning_rate": 1.9229349672218708e-07, "loss": 0.2452, "step": 41403 }, { "epoch": 0.7197065827669523, "grad_norm": 1.120023687461107, "learning_rate": 1.9227130976705824e-07, "loss": 0.1982, "step": 41404 }, { "epoch": 0.7197239653044551, "grad_norm": 1.3580859684742097, "learning_rate": 1.9224912378728797e-07, "loss": 0.1558, "step": 41405 }, { "epoch": 0.719741347841958, "grad_norm": 1.0166357148948482, "learning_rate": 1.9222693878294644e-07, "loss": 0.1946, "step": 41406 }, { "epoch": 0.7197587303794608, "grad_norm": 3.006946253688959, "learning_rate": 1.9220475475410446e-07, "loss": 0.2634, "step": 41407 }, { "epoch": 0.7197761129169636, "grad_norm": 1.4392251792176425, "learning_rate": 1.921825717008319e-07, "loss": 0.1537, "step": 41408 }, { "epoch": 0.7197934954544665, "grad_norm": 1.729687434852051, "learning_rate": 1.9216038962319915e-07, "loss": 0.158, "step": 41409 }, { "epoch": 0.7198108779919693, "grad_norm": 3.190158156892415, "learning_rate": 1.9213820852127637e-07, "loss": 0.3035, "step": 41410 }, { "epoch": 0.7198282605294721, "grad_norm": 1.7956033856176268, "learning_rate": 1.921160283951343e-07, "loss": 0.1516, "step": 41411 }, { "epoch": 0.719845643066975, "grad_norm": 1.3323044333696341, "learning_rate": 1.920938492448429e-07, "loss": 0.2133, "step": 41412 }, { "epoch": 0.7198630256044778, "grad_norm": 1.4098337446189217, "learning_rate": 1.920716710704726e-07, "loss": 0.1942, "step": 41413 }, { "epoch": 0.7198804081419806, "grad_norm": 1.0105375420006208, "learning_rate": 1.9204949387209362e-07, "loss": 0.1985, "step": 41414 }, { "epoch": 0.7198977906794833, "grad_norm": 2.583391773686923, "learning_rate": 1.920273176497763e-07, "loss": 0.2038, "step": 41415 }, { "epoch": 0.7199151732169862, "grad_norm": 1.1736517371992399, "learning_rate": 1.9200514240359094e-07, "loss": 0.1438, "step": 41416 }, { "epoch": 0.719932555754489, "grad_norm": 1.568141282381441, "learning_rate": 1.9198296813360777e-07, "loss": 0.2566, "step": 41417 }, { "epoch": 0.7199499382919918, "grad_norm": 2.0136978078579206, "learning_rate": 1.9196079483989696e-07, "loss": 0.21, "step": 41418 }, { "epoch": 0.7199673208294947, "grad_norm": 1.8607271825249445, "learning_rate": 1.9193862252252908e-07, "loss": 0.2676, "step": 41419 }, { "epoch": 0.7199847033669975, "grad_norm": 1.245246522797017, "learning_rate": 1.919164511815744e-07, "loss": 0.1712, "step": 41420 }, { "epoch": 0.7200020859045003, "grad_norm": 1.3057441340243787, "learning_rate": 1.9189428081710274e-07, "loss": 0.1323, "step": 41421 }, { "epoch": 0.7200194684420032, "grad_norm": 1.5249876893657477, "learning_rate": 1.918721114291848e-07, "loss": 0.1962, "step": 41422 }, { "epoch": 0.720036850979506, "grad_norm": 1.5880614543077818, "learning_rate": 1.9184994301789076e-07, "loss": 0.2607, "step": 41423 }, { "epoch": 0.7200542335170088, "grad_norm": 2.7752763310322006, "learning_rate": 1.918277755832909e-07, "loss": 0.2412, "step": 41424 }, { "epoch": 0.7200716160545116, "grad_norm": 1.4814417704393412, "learning_rate": 1.9180560912545535e-07, "loss": 0.2719, "step": 41425 }, { "epoch": 0.7200889985920145, "grad_norm": 1.417136283424486, "learning_rate": 1.9178344364445447e-07, "loss": 0.2355, "step": 41426 }, { "epoch": 0.7201063811295173, "grad_norm": 2.115360007534651, "learning_rate": 1.917612791403585e-07, "loss": 0.2029, "step": 41427 }, { "epoch": 0.7201237636670201, "grad_norm": 1.6031486444047074, "learning_rate": 1.9173911561323768e-07, "loss": 0.2122, "step": 41428 }, { "epoch": 0.720141146204523, "grad_norm": 0.9192034538400131, "learning_rate": 1.9171695306316204e-07, "loss": 0.2557, "step": 41429 }, { "epoch": 0.7201585287420258, "grad_norm": 1.2186404430366853, "learning_rate": 1.916947914902023e-07, "loss": 0.2428, "step": 41430 }, { "epoch": 0.7201759112795286, "grad_norm": 1.5649351737771557, "learning_rate": 1.9167263089442836e-07, "loss": 0.2234, "step": 41431 }, { "epoch": 0.7201932938170315, "grad_norm": 4.2949987668227605, "learning_rate": 1.9165047127591078e-07, "loss": 0.2867, "step": 41432 }, { "epoch": 0.7202106763545343, "grad_norm": 2.181685225229733, "learning_rate": 1.9162831263471924e-07, "loss": 0.283, "step": 41433 }, { "epoch": 0.720228058892037, "grad_norm": 1.8272248679557295, "learning_rate": 1.916061549709244e-07, "loss": 0.126, "step": 41434 }, { "epoch": 0.7202454414295398, "grad_norm": 1.7690697600088454, "learning_rate": 1.9158399828459642e-07, "loss": 0.1699, "step": 41435 }, { "epoch": 0.7202628239670427, "grad_norm": 1.7650089071040027, "learning_rate": 1.915618425758055e-07, "loss": 0.2348, "step": 41436 }, { "epoch": 0.7202802065045455, "grad_norm": 2.045602886044347, "learning_rate": 1.9153968784462167e-07, "loss": 0.3246, "step": 41437 }, { "epoch": 0.7202975890420483, "grad_norm": 3.79575405281429, "learning_rate": 1.9151753409111548e-07, "loss": 0.3404, "step": 41438 }, { "epoch": 0.7203149715795512, "grad_norm": 1.0285681514965088, "learning_rate": 1.9149538131535718e-07, "loss": 0.3719, "step": 41439 }, { "epoch": 0.720332354117054, "grad_norm": 1.9898093885279942, "learning_rate": 1.914732295174167e-07, "loss": 0.2706, "step": 41440 }, { "epoch": 0.7203497366545568, "grad_norm": 2.2187390619148877, "learning_rate": 1.914510786973641e-07, "loss": 0.3023, "step": 41441 }, { "epoch": 0.7203671191920596, "grad_norm": 0.847717090212596, "learning_rate": 1.9142892885527007e-07, "loss": 0.2154, "step": 41442 }, { "epoch": 0.7203845017295625, "grad_norm": 1.353130615853344, "learning_rate": 1.9140677999120458e-07, "loss": 0.2565, "step": 41443 }, { "epoch": 0.7204018842670653, "grad_norm": 1.4372455283613965, "learning_rate": 1.913846321052378e-07, "loss": 0.1455, "step": 41444 }, { "epoch": 0.7204192668045681, "grad_norm": 2.0150510052271344, "learning_rate": 1.9136248519744002e-07, "loss": 0.234, "step": 41445 }, { "epoch": 0.720436649342071, "grad_norm": 1.604726546679367, "learning_rate": 1.9134033926788135e-07, "loss": 0.3417, "step": 41446 }, { "epoch": 0.7204540318795738, "grad_norm": 1.0329210428708906, "learning_rate": 1.9131819431663204e-07, "loss": 0.2012, "step": 41447 }, { "epoch": 0.7204714144170766, "grad_norm": 1.339268562045693, "learning_rate": 1.9129605034376207e-07, "loss": 0.2048, "step": 41448 }, { "epoch": 0.7204887969545795, "grad_norm": 0.9169183985887781, "learning_rate": 1.91273907349342e-07, "loss": 0.2598, "step": 41449 }, { "epoch": 0.7205061794920823, "grad_norm": 1.23253389223677, "learning_rate": 1.9125176533344184e-07, "loss": 0.1517, "step": 41450 }, { "epoch": 0.7205235620295851, "grad_norm": 1.499653693156276, "learning_rate": 1.9122962429613194e-07, "loss": 0.2076, "step": 41451 }, { "epoch": 0.720540944567088, "grad_norm": 2.0223379358851736, "learning_rate": 1.912074842374819e-07, "loss": 0.1794, "step": 41452 }, { "epoch": 0.7205583271045908, "grad_norm": 1.6215327956578711, "learning_rate": 1.9118534515756246e-07, "loss": 0.283, "step": 41453 }, { "epoch": 0.7205757096420935, "grad_norm": 1.1631846589772896, "learning_rate": 1.9116320705644361e-07, "loss": 0.1708, "step": 41454 }, { "epoch": 0.7205930921795963, "grad_norm": 1.520316825422185, "learning_rate": 1.911410699341956e-07, "loss": 0.3325, "step": 41455 }, { "epoch": 0.7206104747170992, "grad_norm": 2.699300286535901, "learning_rate": 1.9111893379088827e-07, "loss": 0.2424, "step": 41456 }, { "epoch": 0.720627857254602, "grad_norm": 1.8862725702331964, "learning_rate": 1.9109679862659245e-07, "loss": 0.2676, "step": 41457 }, { "epoch": 0.7206452397921048, "grad_norm": 1.0142479423682427, "learning_rate": 1.9107466444137765e-07, "loss": 0.1549, "step": 41458 }, { "epoch": 0.7206626223296076, "grad_norm": 1.4480480480087756, "learning_rate": 1.9105253123531428e-07, "loss": 0.2131, "step": 41459 }, { "epoch": 0.7206800048671105, "grad_norm": 1.7045681787834035, "learning_rate": 1.910303990084723e-07, "loss": 0.2032, "step": 41460 }, { "epoch": 0.7206973874046133, "grad_norm": 1.6189790345479045, "learning_rate": 1.9100826776092215e-07, "loss": 0.2372, "step": 41461 }, { "epoch": 0.7207147699421161, "grad_norm": 1.9577283330938582, "learning_rate": 1.909861374927339e-07, "loss": 0.1963, "step": 41462 }, { "epoch": 0.720732152479619, "grad_norm": 2.744277853562071, "learning_rate": 1.9096400820397762e-07, "loss": 0.3514, "step": 41463 }, { "epoch": 0.7207495350171218, "grad_norm": 0.9775413536788035, "learning_rate": 1.909418798947235e-07, "loss": 0.2103, "step": 41464 }, { "epoch": 0.7207669175546246, "grad_norm": 1.1891800306661962, "learning_rate": 1.9091975256504162e-07, "loss": 0.2356, "step": 41465 }, { "epoch": 0.7207843000921275, "grad_norm": 1.2701027983680058, "learning_rate": 1.9089762621500217e-07, "loss": 0.1621, "step": 41466 }, { "epoch": 0.7208016826296303, "grad_norm": 2.1815100838750277, "learning_rate": 1.9087550084467524e-07, "loss": 0.3103, "step": 41467 }, { "epoch": 0.7208190651671331, "grad_norm": 1.9226718828284748, "learning_rate": 1.908533764541308e-07, "loss": 0.2756, "step": 41468 }, { "epoch": 0.720836447704636, "grad_norm": 2.052934340254295, "learning_rate": 1.9083125304343933e-07, "loss": 0.1861, "step": 41469 }, { "epoch": 0.7208538302421388, "grad_norm": 1.4519045490483753, "learning_rate": 1.9080913061267094e-07, "loss": 0.1894, "step": 41470 }, { "epoch": 0.7208712127796416, "grad_norm": 1.4724695390766673, "learning_rate": 1.907870091618952e-07, "loss": 0.1869, "step": 41471 }, { "epoch": 0.7208885953171444, "grad_norm": 1.582085470665155, "learning_rate": 1.907648886911828e-07, "loss": 0.1436, "step": 41472 }, { "epoch": 0.7209059778546473, "grad_norm": 1.5276045078076397, "learning_rate": 1.9074276920060367e-07, "loss": 0.193, "step": 41473 }, { "epoch": 0.72092336039215, "grad_norm": 1.8134516452805767, "learning_rate": 1.9072065069022786e-07, "loss": 0.2518, "step": 41474 }, { "epoch": 0.7209407429296528, "grad_norm": 3.053967830167123, "learning_rate": 1.9069853316012552e-07, "loss": 0.2262, "step": 41475 }, { "epoch": 0.7209581254671557, "grad_norm": 1.4761103369942288, "learning_rate": 1.9067641661036676e-07, "loss": 0.1787, "step": 41476 }, { "epoch": 0.7209755080046585, "grad_norm": 1.3290516339014926, "learning_rate": 1.9065430104102171e-07, "loss": 0.1678, "step": 41477 }, { "epoch": 0.7209928905421613, "grad_norm": 1.0952218826276356, "learning_rate": 1.906321864521604e-07, "loss": 0.1622, "step": 41478 }, { "epoch": 0.7210102730796641, "grad_norm": 1.8745842445464767, "learning_rate": 1.9061007284385272e-07, "loss": 0.1873, "step": 41479 }, { "epoch": 0.721027655617167, "grad_norm": 2.055182333422619, "learning_rate": 1.905879602161692e-07, "loss": 0.2755, "step": 41480 }, { "epoch": 0.7210450381546698, "grad_norm": 2.041084037573926, "learning_rate": 1.9056584856917974e-07, "loss": 0.225, "step": 41481 }, { "epoch": 0.7210624206921726, "grad_norm": 1.6427014450579664, "learning_rate": 1.9054373790295437e-07, "loss": 0.1771, "step": 41482 }, { "epoch": 0.7210798032296755, "grad_norm": 2.4776215324922473, "learning_rate": 1.9052162821756318e-07, "loss": 0.2741, "step": 41483 }, { "epoch": 0.7210971857671783, "grad_norm": 1.858005521229147, "learning_rate": 1.904995195130763e-07, "loss": 0.3205, "step": 41484 }, { "epoch": 0.7211145683046811, "grad_norm": 2.6377561443754276, "learning_rate": 1.9047741178956372e-07, "loss": 0.3282, "step": 41485 }, { "epoch": 0.721131950842184, "grad_norm": 1.313239149665432, "learning_rate": 1.904553050470956e-07, "loss": 0.1715, "step": 41486 }, { "epoch": 0.7211493333796868, "grad_norm": 1.1801223251022983, "learning_rate": 1.904331992857418e-07, "loss": 0.1233, "step": 41487 }, { "epoch": 0.7211667159171896, "grad_norm": 1.4098035557393107, "learning_rate": 1.9041109450557276e-07, "loss": 0.2425, "step": 41488 }, { "epoch": 0.7211840984546924, "grad_norm": 1.7583931720403017, "learning_rate": 1.9038899070665843e-07, "loss": 0.1842, "step": 41489 }, { "epoch": 0.7212014809921953, "grad_norm": 1.5524473475443923, "learning_rate": 1.903668878890687e-07, "loss": 0.1826, "step": 41490 }, { "epoch": 0.7212188635296981, "grad_norm": 1.8622383270112055, "learning_rate": 1.903447860528734e-07, "loss": 0.2661, "step": 41491 }, { "epoch": 0.7212362460672009, "grad_norm": 1.5434574073120844, "learning_rate": 1.9032268519814316e-07, "loss": 0.3092, "step": 41492 }, { "epoch": 0.7212536286047038, "grad_norm": 2.450566981903052, "learning_rate": 1.9030058532494768e-07, "loss": 0.3417, "step": 41493 }, { "epoch": 0.7212710111422065, "grad_norm": 1.7373568037197704, "learning_rate": 1.9027848643335714e-07, "loss": 0.1568, "step": 41494 }, { "epoch": 0.7212883936797093, "grad_norm": 1.846461892508123, "learning_rate": 1.9025638852344157e-07, "loss": 0.3031, "step": 41495 }, { "epoch": 0.7213057762172121, "grad_norm": 1.7129030468960844, "learning_rate": 1.9023429159527088e-07, "loss": 0.1703, "step": 41496 }, { "epoch": 0.721323158754715, "grad_norm": 1.4273469166966928, "learning_rate": 1.9021219564891523e-07, "loss": 0.1765, "step": 41497 }, { "epoch": 0.7213405412922178, "grad_norm": 0.9809403696174228, "learning_rate": 1.9019010068444458e-07, "loss": 0.2642, "step": 41498 }, { "epoch": 0.7213579238297206, "grad_norm": 3.398256392280789, "learning_rate": 1.9016800670192885e-07, "loss": 0.3372, "step": 41499 }, { "epoch": 0.7213753063672235, "grad_norm": 1.2047854418674164, "learning_rate": 1.9014591370143835e-07, "loss": 0.2101, "step": 41500 }, { "epoch": 0.7213926889047263, "grad_norm": 2.8430889508032036, "learning_rate": 1.9012382168304298e-07, "loss": 0.2354, "step": 41501 }, { "epoch": 0.7214100714422291, "grad_norm": 2.033840405944727, "learning_rate": 1.901017306468128e-07, "loss": 0.1738, "step": 41502 }, { "epoch": 0.721427453979732, "grad_norm": 1.1606229685998644, "learning_rate": 1.9007964059281772e-07, "loss": 0.15, "step": 41503 }, { "epoch": 0.7214448365172348, "grad_norm": 2.1524053057763006, "learning_rate": 1.9005755152112784e-07, "loss": 0.3626, "step": 41504 }, { "epoch": 0.7214622190547376, "grad_norm": 0.8460040771367221, "learning_rate": 1.9003546343181314e-07, "loss": 0.2261, "step": 41505 }, { "epoch": 0.7214796015922404, "grad_norm": 1.1703496262839694, "learning_rate": 1.9001337632494363e-07, "loss": 0.2234, "step": 41506 }, { "epoch": 0.7214969841297433, "grad_norm": 4.058259874627373, "learning_rate": 1.8999129020058917e-07, "loss": 0.2206, "step": 41507 }, { "epoch": 0.7215143666672461, "grad_norm": 1.927808802966033, "learning_rate": 1.8996920505882025e-07, "loss": 0.1554, "step": 41508 }, { "epoch": 0.7215317492047489, "grad_norm": 1.1941632049672055, "learning_rate": 1.8994712089970634e-07, "loss": 0.1232, "step": 41509 }, { "epoch": 0.7215491317422518, "grad_norm": 1.4042436312745754, "learning_rate": 1.8992503772331746e-07, "loss": 0.2559, "step": 41510 }, { "epoch": 0.7215665142797546, "grad_norm": 1.4243833958473815, "learning_rate": 1.8990295552972397e-07, "loss": 0.1767, "step": 41511 }, { "epoch": 0.7215838968172574, "grad_norm": 1.5443810018569424, "learning_rate": 1.8988087431899562e-07, "loss": 0.2442, "step": 41512 }, { "epoch": 0.7216012793547603, "grad_norm": 1.623747588370277, "learning_rate": 1.8985879409120247e-07, "loss": 0.2255, "step": 41513 }, { "epoch": 0.721618661892263, "grad_norm": 1.42909841608406, "learning_rate": 1.898367148464145e-07, "loss": 0.2263, "step": 41514 }, { "epoch": 0.7216360444297658, "grad_norm": 1.3568282952973412, "learning_rate": 1.8981463658470164e-07, "loss": 0.176, "step": 41515 }, { "epoch": 0.7216534269672686, "grad_norm": 2.2946889280598017, "learning_rate": 1.897925593061339e-07, "loss": 0.2011, "step": 41516 }, { "epoch": 0.7216708095047715, "grad_norm": 1.8934636558653832, "learning_rate": 1.8977048301078124e-07, "loss": 0.2228, "step": 41517 }, { "epoch": 0.7216881920422743, "grad_norm": 2.0263043802313248, "learning_rate": 1.8974840769871354e-07, "loss": 0.2353, "step": 41518 }, { "epoch": 0.7217055745797771, "grad_norm": 2.4119120508672376, "learning_rate": 1.8972633337000094e-07, "loss": 0.2236, "step": 41519 }, { "epoch": 0.72172295711728, "grad_norm": 2.4100817554695113, "learning_rate": 1.897042600247134e-07, "loss": 0.3422, "step": 41520 }, { "epoch": 0.7217403396547828, "grad_norm": 2.0401950710698475, "learning_rate": 1.8968218766292095e-07, "loss": 0.3025, "step": 41521 }, { "epoch": 0.7217577221922856, "grad_norm": 1.1949351834641346, "learning_rate": 1.8966011628469308e-07, "loss": 0.2163, "step": 41522 }, { "epoch": 0.7217751047297885, "grad_norm": 1.6329136499781591, "learning_rate": 1.8963804589010023e-07, "loss": 0.1897, "step": 41523 }, { "epoch": 0.7217924872672913, "grad_norm": 1.3797418484074038, "learning_rate": 1.896159764792123e-07, "loss": 0.1479, "step": 41524 }, { "epoch": 0.7218098698047941, "grad_norm": 1.2476395565769571, "learning_rate": 1.8959390805209907e-07, "loss": 0.1676, "step": 41525 }, { "epoch": 0.7218272523422969, "grad_norm": 2.255766166420237, "learning_rate": 1.8957184060883042e-07, "loss": 0.3819, "step": 41526 }, { "epoch": 0.7218446348797998, "grad_norm": 1.2963624179858475, "learning_rate": 1.8954977414947675e-07, "loss": 0.2323, "step": 41527 }, { "epoch": 0.7218620174173026, "grad_norm": 2.035923359169507, "learning_rate": 1.895277086741075e-07, "loss": 0.3065, "step": 41528 }, { "epoch": 0.7218793999548054, "grad_norm": 1.3281371069762555, "learning_rate": 1.8950564418279287e-07, "loss": 0.2401, "step": 41529 }, { "epoch": 0.7218967824923083, "grad_norm": 8.60543437880794, "learning_rate": 1.894835806756025e-07, "loss": 0.4215, "step": 41530 }, { "epoch": 0.7219141650298111, "grad_norm": 2.1414584962138865, "learning_rate": 1.894615181526067e-07, "loss": 0.1483, "step": 41531 }, { "epoch": 0.7219315475673139, "grad_norm": 1.4808756049033576, "learning_rate": 1.8943945661387524e-07, "loss": 0.2406, "step": 41532 }, { "epoch": 0.7219489301048168, "grad_norm": 1.3169117872075902, "learning_rate": 1.8941739605947803e-07, "loss": 0.1818, "step": 41533 }, { "epoch": 0.7219663126423195, "grad_norm": 1.240936256202372, "learning_rate": 1.8939533648948502e-07, "loss": 0.1774, "step": 41534 }, { "epoch": 0.7219836951798223, "grad_norm": 0.9086733860007508, "learning_rate": 1.8937327790396617e-07, "loss": 0.1727, "step": 41535 }, { "epoch": 0.7220010777173251, "grad_norm": 3.276523892770273, "learning_rate": 1.8935122030299127e-07, "loss": 0.251, "step": 41536 }, { "epoch": 0.722018460254828, "grad_norm": 0.9370011423637121, "learning_rate": 1.8932916368663031e-07, "loss": 0.1998, "step": 41537 }, { "epoch": 0.7220358427923308, "grad_norm": 1.4882553565946026, "learning_rate": 1.8930710805495305e-07, "loss": 0.1574, "step": 41538 }, { "epoch": 0.7220532253298336, "grad_norm": 1.2031207397421009, "learning_rate": 1.892850534080297e-07, "loss": 0.2794, "step": 41539 }, { "epoch": 0.7220706078673365, "grad_norm": 0.9018201959366989, "learning_rate": 1.8926299974593013e-07, "loss": 0.2194, "step": 41540 }, { "epoch": 0.7220879904048393, "grad_norm": 1.20882410088364, "learning_rate": 1.892409470687238e-07, "loss": 0.1809, "step": 41541 }, { "epoch": 0.7221053729423421, "grad_norm": 1.9903211186005758, "learning_rate": 1.8921889537648112e-07, "loss": 0.1589, "step": 41542 }, { "epoch": 0.722122755479845, "grad_norm": 2.4457216624322595, "learning_rate": 1.8919684466927171e-07, "loss": 0.3093, "step": 41543 }, { "epoch": 0.7221401380173478, "grad_norm": 2.0044756269069626, "learning_rate": 1.891747949471656e-07, "loss": 0.2454, "step": 41544 }, { "epoch": 0.7221575205548506, "grad_norm": 1.6789043750227968, "learning_rate": 1.8915274621023236e-07, "loss": 0.3008, "step": 41545 }, { "epoch": 0.7221749030923534, "grad_norm": 1.079257236002431, "learning_rate": 1.8913069845854252e-07, "loss": 0.1136, "step": 41546 }, { "epoch": 0.7221922856298563, "grad_norm": 2.021714893276946, "learning_rate": 1.891086516921654e-07, "loss": 0.3271, "step": 41547 }, { "epoch": 0.7222096681673591, "grad_norm": 1.234663882614679, "learning_rate": 1.8908660591117097e-07, "loss": 0.2483, "step": 41548 }, { "epoch": 0.7222270507048619, "grad_norm": 1.0726621847368774, "learning_rate": 1.8906456111562908e-07, "loss": 0.1367, "step": 41549 }, { "epoch": 0.7222444332423648, "grad_norm": 3.912625931729695, "learning_rate": 1.8904251730560989e-07, "loss": 0.222, "step": 41550 }, { "epoch": 0.7222618157798676, "grad_norm": 1.6385323372890983, "learning_rate": 1.89020474481183e-07, "loss": 0.1994, "step": 41551 }, { "epoch": 0.7222791983173704, "grad_norm": 1.8006499344535285, "learning_rate": 1.8899843264241844e-07, "loss": 0.2249, "step": 41552 }, { "epoch": 0.7222965808548732, "grad_norm": 1.1609760989795692, "learning_rate": 1.889763917893859e-07, "loss": 0.3078, "step": 41553 }, { "epoch": 0.722313963392376, "grad_norm": 1.4092445072690214, "learning_rate": 1.8895435192215536e-07, "loss": 0.2598, "step": 41554 }, { "epoch": 0.7223313459298788, "grad_norm": 1.2209223220193637, "learning_rate": 1.8893231304079666e-07, "loss": 0.1594, "step": 41555 }, { "epoch": 0.7223487284673816, "grad_norm": 3.4038247529131147, "learning_rate": 1.889102751453796e-07, "loss": 0.3135, "step": 41556 }, { "epoch": 0.7223661110048845, "grad_norm": 1.8923340427819395, "learning_rate": 1.8888823823597394e-07, "loss": 0.156, "step": 41557 }, { "epoch": 0.7223834935423873, "grad_norm": 1.16802424202655, "learning_rate": 1.8886620231264977e-07, "loss": 0.165, "step": 41558 }, { "epoch": 0.7224008760798901, "grad_norm": 0.9817169098665682, "learning_rate": 1.8884416737547704e-07, "loss": 0.1062, "step": 41559 }, { "epoch": 0.722418258617393, "grad_norm": 1.598058649010671, "learning_rate": 1.8882213342452497e-07, "loss": 0.4335, "step": 41560 }, { "epoch": 0.7224356411548958, "grad_norm": 1.1008486036684764, "learning_rate": 1.88800100459864e-07, "loss": 0.213, "step": 41561 }, { "epoch": 0.7224530236923986, "grad_norm": 0.9795282414783524, "learning_rate": 1.8877806848156375e-07, "loss": 0.2168, "step": 41562 }, { "epoch": 0.7224704062299014, "grad_norm": 1.5955190050402595, "learning_rate": 1.887560374896941e-07, "loss": 0.2422, "step": 41563 }, { "epoch": 0.7224877887674043, "grad_norm": 1.4571619961051325, "learning_rate": 1.8873400748432478e-07, "loss": 0.1695, "step": 41564 }, { "epoch": 0.7225051713049071, "grad_norm": 1.607671752099832, "learning_rate": 1.8871197846552566e-07, "loss": 0.1211, "step": 41565 }, { "epoch": 0.7225225538424099, "grad_norm": 1.3936552299642406, "learning_rate": 1.886899504333666e-07, "loss": 0.1415, "step": 41566 }, { "epoch": 0.7225399363799128, "grad_norm": 1.2596630295485436, "learning_rate": 1.8866792338791742e-07, "loss": 0.1059, "step": 41567 }, { "epoch": 0.7225573189174156, "grad_norm": 1.3583761862785233, "learning_rate": 1.8864589732924775e-07, "loss": 0.0872, "step": 41568 }, { "epoch": 0.7225747014549184, "grad_norm": 0.9622533103491486, "learning_rate": 1.8862387225742765e-07, "loss": 0.1675, "step": 41569 }, { "epoch": 0.7225920839924213, "grad_norm": 1.8326214968965775, "learning_rate": 1.886018481725269e-07, "loss": 0.2394, "step": 41570 }, { "epoch": 0.7226094665299241, "grad_norm": 1.4474016248929706, "learning_rate": 1.8857982507461538e-07, "loss": 0.2571, "step": 41571 }, { "epoch": 0.7226268490674269, "grad_norm": 1.1899040811849608, "learning_rate": 1.8855780296376244e-07, "loss": 0.1364, "step": 41572 }, { "epoch": 0.7226442316049296, "grad_norm": 1.299858220761238, "learning_rate": 1.8853578184003838e-07, "loss": 0.2072, "step": 41573 }, { "epoch": 0.7226616141424325, "grad_norm": 9.65408860287189, "learning_rate": 1.8851376170351285e-07, "loss": 0.3276, "step": 41574 }, { "epoch": 0.7226789966799353, "grad_norm": 1.8522151230070734, "learning_rate": 1.8849174255425554e-07, "loss": 0.1832, "step": 41575 }, { "epoch": 0.7226963792174381, "grad_norm": 1.557353566348947, "learning_rate": 1.8846972439233616e-07, "loss": 0.3235, "step": 41576 }, { "epoch": 0.722713761754941, "grad_norm": 1.610988063523909, "learning_rate": 1.88447707217825e-07, "loss": 0.2406, "step": 41577 }, { "epoch": 0.7227311442924438, "grad_norm": 2.6427804582800145, "learning_rate": 1.8842569103079135e-07, "loss": 0.1685, "step": 41578 }, { "epoch": 0.7227485268299466, "grad_norm": 1.6444435609028523, "learning_rate": 1.8840367583130512e-07, "loss": 0.335, "step": 41579 }, { "epoch": 0.7227659093674494, "grad_norm": 1.3917794558440517, "learning_rate": 1.8838166161943592e-07, "loss": 0.169, "step": 41580 }, { "epoch": 0.7227832919049523, "grad_norm": 1.6053504558092706, "learning_rate": 1.8835964839525386e-07, "loss": 0.201, "step": 41581 }, { "epoch": 0.7228006744424551, "grad_norm": 3.4728587354608376, "learning_rate": 1.8833763615882858e-07, "loss": 0.2686, "step": 41582 }, { "epoch": 0.7228180569799579, "grad_norm": 0.9504696559552188, "learning_rate": 1.8831562491022977e-07, "loss": 0.1158, "step": 41583 }, { "epoch": 0.7228354395174608, "grad_norm": 1.1967654286149763, "learning_rate": 1.8829361464952732e-07, "loss": 0.1667, "step": 41584 }, { "epoch": 0.7228528220549636, "grad_norm": 1.9213738070446893, "learning_rate": 1.8827160537679094e-07, "loss": 0.1423, "step": 41585 }, { "epoch": 0.7228702045924664, "grad_norm": 1.5482170166437916, "learning_rate": 1.8824959709209032e-07, "loss": 0.216, "step": 41586 }, { "epoch": 0.7228875871299693, "grad_norm": 5.3765895239421315, "learning_rate": 1.882275897954953e-07, "loss": 0.2135, "step": 41587 }, { "epoch": 0.7229049696674721, "grad_norm": 3.7400266005939016, "learning_rate": 1.8820558348707542e-07, "loss": 0.2743, "step": 41588 }, { "epoch": 0.7229223522049749, "grad_norm": 1.5312524477294112, "learning_rate": 1.881835781669008e-07, "loss": 0.179, "step": 41589 }, { "epoch": 0.7229397347424777, "grad_norm": 1.3465308728159637, "learning_rate": 1.8816157383504116e-07, "loss": 0.2214, "step": 41590 }, { "epoch": 0.7229571172799806, "grad_norm": 1.3582621573864204, "learning_rate": 1.8813957049156577e-07, "loss": 0.1993, "step": 41591 }, { "epoch": 0.7229744998174834, "grad_norm": 1.5519530297881428, "learning_rate": 1.8811756813654485e-07, "loss": 0.2528, "step": 41592 }, { "epoch": 0.7229918823549861, "grad_norm": 1.6868499619337078, "learning_rate": 1.880955667700479e-07, "loss": 0.1553, "step": 41593 }, { "epoch": 0.723009264892489, "grad_norm": 1.264077441837427, "learning_rate": 1.880735663921448e-07, "loss": 0.2133, "step": 41594 }, { "epoch": 0.7230266474299918, "grad_norm": 3.3712609091669288, "learning_rate": 1.880515670029052e-07, "loss": 0.2115, "step": 41595 }, { "epoch": 0.7230440299674946, "grad_norm": 1.4802933583219948, "learning_rate": 1.880295686023988e-07, "loss": 0.1814, "step": 41596 }, { "epoch": 0.7230614125049974, "grad_norm": 1.4328932985825902, "learning_rate": 1.880075711906954e-07, "loss": 0.1756, "step": 41597 }, { "epoch": 0.7230787950425003, "grad_norm": 1.569025426061681, "learning_rate": 1.8798557476786463e-07, "loss": 0.1683, "step": 41598 }, { "epoch": 0.7230961775800031, "grad_norm": 1.6911112313246102, "learning_rate": 1.8796357933397615e-07, "loss": 0.3059, "step": 41599 }, { "epoch": 0.7231135601175059, "grad_norm": 1.9714122016250488, "learning_rate": 1.879415848890999e-07, "loss": 0.2283, "step": 41600 }, { "epoch": 0.7231309426550088, "grad_norm": 1.0533419742231696, "learning_rate": 1.8791959143330556e-07, "loss": 0.2301, "step": 41601 }, { "epoch": 0.7231483251925116, "grad_norm": 1.0608031159601106, "learning_rate": 1.878975989666627e-07, "loss": 0.2645, "step": 41602 }, { "epoch": 0.7231657077300144, "grad_norm": 0.8388098208314473, "learning_rate": 1.8787560748924108e-07, "loss": 0.1808, "step": 41603 }, { "epoch": 0.7231830902675173, "grad_norm": 0.8521977946593073, "learning_rate": 1.878536170011104e-07, "loss": 0.1596, "step": 41604 }, { "epoch": 0.7232004728050201, "grad_norm": 1.6414188632560185, "learning_rate": 1.878316275023404e-07, "loss": 0.1673, "step": 41605 }, { "epoch": 0.7232178553425229, "grad_norm": 1.1100934119916817, "learning_rate": 1.8780963899300068e-07, "loss": 0.2245, "step": 41606 }, { "epoch": 0.7232352378800257, "grad_norm": 6.176698434620749, "learning_rate": 1.8778765147316088e-07, "loss": 0.3688, "step": 41607 }, { "epoch": 0.7232526204175286, "grad_norm": 1.381928841281477, "learning_rate": 1.8776566494289097e-07, "loss": 0.2352, "step": 41608 }, { "epoch": 0.7232700029550314, "grad_norm": 1.2279915503881216, "learning_rate": 1.8774367940226065e-07, "loss": 0.1802, "step": 41609 }, { "epoch": 0.7232873854925342, "grad_norm": 1.6449656951588179, "learning_rate": 1.877216948513392e-07, "loss": 0.2234, "step": 41610 }, { "epoch": 0.7233047680300371, "grad_norm": 2.432927045415702, "learning_rate": 1.876997112901964e-07, "loss": 0.2209, "step": 41611 }, { "epoch": 0.7233221505675399, "grad_norm": 2.0558195398177195, "learning_rate": 1.8767772871890226e-07, "loss": 0.5843, "step": 41612 }, { "epoch": 0.7233395331050426, "grad_norm": 1.5081973297982323, "learning_rate": 1.8765574713752623e-07, "loss": 0.2523, "step": 41613 }, { "epoch": 0.7233569156425455, "grad_norm": 1.6071623795337198, "learning_rate": 1.87633766546138e-07, "loss": 0.2818, "step": 41614 }, { "epoch": 0.7233742981800483, "grad_norm": 1.3366102170611978, "learning_rate": 1.8761178694480722e-07, "loss": 0.2964, "step": 41615 }, { "epoch": 0.7233916807175511, "grad_norm": 0.8949911343839866, "learning_rate": 1.875898083336036e-07, "loss": 0.1604, "step": 41616 }, { "epoch": 0.7234090632550539, "grad_norm": 1.077598632306943, "learning_rate": 1.8756783071259674e-07, "loss": 0.2969, "step": 41617 }, { "epoch": 0.7234264457925568, "grad_norm": 1.6398724469001156, "learning_rate": 1.8754585408185637e-07, "loss": 0.2224, "step": 41618 }, { "epoch": 0.7234438283300596, "grad_norm": 1.2252952311066239, "learning_rate": 1.8752387844145196e-07, "loss": 0.2141, "step": 41619 }, { "epoch": 0.7234612108675624, "grad_norm": 2.255891303245231, "learning_rate": 1.8750190379145348e-07, "loss": 0.2894, "step": 41620 }, { "epoch": 0.7234785934050653, "grad_norm": 2.348390357132215, "learning_rate": 1.8747993013193035e-07, "loss": 0.4088, "step": 41621 }, { "epoch": 0.7234959759425681, "grad_norm": 2.2965325889269526, "learning_rate": 1.8745795746295236e-07, "loss": 0.2419, "step": 41622 }, { "epoch": 0.7235133584800709, "grad_norm": 1.1511794690282149, "learning_rate": 1.8743598578458903e-07, "loss": 0.1624, "step": 41623 }, { "epoch": 0.7235307410175738, "grad_norm": 1.800697275290163, "learning_rate": 1.8741401509691006e-07, "loss": 0.2885, "step": 41624 }, { "epoch": 0.7235481235550766, "grad_norm": 1.59241736039035, "learning_rate": 1.873920453999851e-07, "loss": 0.3885, "step": 41625 }, { "epoch": 0.7235655060925794, "grad_norm": 1.7211031829606875, "learning_rate": 1.8737007669388373e-07, "loss": 0.2702, "step": 41626 }, { "epoch": 0.7235828886300822, "grad_norm": 1.4523869633837954, "learning_rate": 1.8734810897867543e-07, "loss": 0.2078, "step": 41627 }, { "epoch": 0.7236002711675851, "grad_norm": 1.18464874836207, "learning_rate": 1.8732614225443033e-07, "loss": 0.1568, "step": 41628 }, { "epoch": 0.7236176537050879, "grad_norm": 1.0928385318784777, "learning_rate": 1.8730417652121754e-07, "loss": 0.2052, "step": 41629 }, { "epoch": 0.7236350362425907, "grad_norm": 1.9277222061719872, "learning_rate": 1.8728221177910675e-07, "loss": 0.2351, "step": 41630 }, { "epoch": 0.7236524187800936, "grad_norm": 1.49468790305718, "learning_rate": 1.8726024802816787e-07, "loss": 0.2384, "step": 41631 }, { "epoch": 0.7236698013175964, "grad_norm": 1.151273796174094, "learning_rate": 1.872382852684703e-07, "loss": 0.3405, "step": 41632 }, { "epoch": 0.7236871838550991, "grad_norm": 1.2572972662665016, "learning_rate": 1.8721632350008376e-07, "loss": 0.1897, "step": 41633 }, { "epoch": 0.723704566392602, "grad_norm": 1.9118749473338066, "learning_rate": 1.8719436272307777e-07, "loss": 0.3225, "step": 41634 }, { "epoch": 0.7237219489301048, "grad_norm": 1.340359153514241, "learning_rate": 1.871724029375219e-07, "loss": 0.3048, "step": 41635 }, { "epoch": 0.7237393314676076, "grad_norm": 1.6178892622364285, "learning_rate": 1.8715044414348585e-07, "loss": 0.2172, "step": 41636 }, { "epoch": 0.7237567140051104, "grad_norm": 4.1977643189566916, "learning_rate": 1.8712848634103923e-07, "loss": 0.3503, "step": 41637 }, { "epoch": 0.7237740965426133, "grad_norm": 1.5276652298211049, "learning_rate": 1.8710652953025135e-07, "loss": 0.2485, "step": 41638 }, { "epoch": 0.7237914790801161, "grad_norm": 1.552215275027959, "learning_rate": 1.8708457371119223e-07, "loss": 0.2185, "step": 41639 }, { "epoch": 0.7238088616176189, "grad_norm": 1.1245453015510163, "learning_rate": 1.8706261888393127e-07, "loss": 0.2073, "step": 41640 }, { "epoch": 0.7238262441551218, "grad_norm": 1.3482045334101311, "learning_rate": 1.8704066504853805e-07, "loss": 0.1729, "step": 41641 }, { "epoch": 0.7238436266926246, "grad_norm": 0.9897562330825527, "learning_rate": 1.870187122050822e-07, "loss": 0.1708, "step": 41642 }, { "epoch": 0.7238610092301274, "grad_norm": 1.3634375013021032, "learning_rate": 1.869967603536332e-07, "loss": 0.2798, "step": 41643 }, { "epoch": 0.7238783917676302, "grad_norm": 2.861681222819656, "learning_rate": 1.869748094942607e-07, "loss": 0.1889, "step": 41644 }, { "epoch": 0.7238957743051331, "grad_norm": 1.4081085641250102, "learning_rate": 1.8695285962703428e-07, "loss": 0.1544, "step": 41645 }, { "epoch": 0.7239131568426359, "grad_norm": 1.186024119627215, "learning_rate": 1.8693091075202334e-07, "loss": 0.1778, "step": 41646 }, { "epoch": 0.7239305393801387, "grad_norm": 3.5882774335672525, "learning_rate": 1.8690896286929791e-07, "loss": 0.197, "step": 41647 }, { "epoch": 0.7239479219176416, "grad_norm": 1.4829700121675173, "learning_rate": 1.868870159789271e-07, "loss": 0.1786, "step": 41648 }, { "epoch": 0.7239653044551444, "grad_norm": 3.73847204100538, "learning_rate": 1.8686507008098047e-07, "loss": 0.2365, "step": 41649 }, { "epoch": 0.7239826869926472, "grad_norm": 4.808293815302548, "learning_rate": 1.868431251755278e-07, "loss": 0.2358, "step": 41650 }, { "epoch": 0.7240000695301501, "grad_norm": 1.141623962943679, "learning_rate": 1.8682118126263863e-07, "loss": 0.2866, "step": 41651 }, { "epoch": 0.7240174520676529, "grad_norm": 1.1219467467650983, "learning_rate": 1.867992383423825e-07, "loss": 0.2948, "step": 41652 }, { "epoch": 0.7240348346051556, "grad_norm": 1.3613124660321259, "learning_rate": 1.8677729641482887e-07, "loss": 0.1707, "step": 41653 }, { "epoch": 0.7240522171426584, "grad_norm": 1.1383597974910482, "learning_rate": 1.867553554800473e-07, "loss": 0.1642, "step": 41654 }, { "epoch": 0.7240695996801613, "grad_norm": 2.0254439697169975, "learning_rate": 1.8673341553810738e-07, "loss": 0.1721, "step": 41655 }, { "epoch": 0.7240869822176641, "grad_norm": 1.9578988281769394, "learning_rate": 1.8671147658907866e-07, "loss": 0.2437, "step": 41656 }, { "epoch": 0.7241043647551669, "grad_norm": 2.9586765152123524, "learning_rate": 1.8668953863303045e-07, "loss": 0.2203, "step": 41657 }, { "epoch": 0.7241217472926698, "grad_norm": 1.8056706699135414, "learning_rate": 1.866676016700326e-07, "loss": 0.2567, "step": 41658 }, { "epoch": 0.7241391298301726, "grad_norm": 1.7190784572798337, "learning_rate": 1.8664566570015462e-07, "loss": 0.2308, "step": 41659 }, { "epoch": 0.7241565123676754, "grad_norm": 1.7200658501584336, "learning_rate": 1.8662373072346605e-07, "loss": 0.1783, "step": 41660 }, { "epoch": 0.7241738949051783, "grad_norm": 6.371760474184091, "learning_rate": 1.8660179674003595e-07, "loss": 0.3933, "step": 41661 }, { "epoch": 0.7241912774426811, "grad_norm": 1.1288597091554344, "learning_rate": 1.8657986374993433e-07, "loss": 0.2755, "step": 41662 }, { "epoch": 0.7242086599801839, "grad_norm": 1.5310772366785688, "learning_rate": 1.8655793175323065e-07, "loss": 0.2739, "step": 41663 }, { "epoch": 0.7242260425176867, "grad_norm": 1.4409788382831594, "learning_rate": 1.865360007499943e-07, "loss": 0.1721, "step": 41664 }, { "epoch": 0.7242434250551896, "grad_norm": 1.6097159803715935, "learning_rate": 1.865140707402947e-07, "loss": 0.1983, "step": 41665 }, { "epoch": 0.7242608075926924, "grad_norm": 1.17372036575144, "learning_rate": 1.864921417242018e-07, "loss": 0.2059, "step": 41666 }, { "epoch": 0.7242781901301952, "grad_norm": 2.394989248737375, "learning_rate": 1.8647021370178462e-07, "loss": 0.2401, "step": 41667 }, { "epoch": 0.7242955726676981, "grad_norm": 1.5595837192393074, "learning_rate": 1.8644828667311285e-07, "loss": 0.2216, "step": 41668 }, { "epoch": 0.7243129552052009, "grad_norm": 1.0977992839708601, "learning_rate": 1.8642636063825578e-07, "loss": 0.1631, "step": 41669 }, { "epoch": 0.7243303377427037, "grad_norm": 1.2738238134718964, "learning_rate": 1.864044355972833e-07, "loss": 0.2076, "step": 41670 }, { "epoch": 0.7243477202802066, "grad_norm": 3.8501503920209403, "learning_rate": 1.8638251155026475e-07, "loss": 0.2051, "step": 41671 }, { "epoch": 0.7243651028177094, "grad_norm": 1.0298735754257422, "learning_rate": 1.8636058849726948e-07, "loss": 0.1805, "step": 41672 }, { "epoch": 0.7243824853552121, "grad_norm": 1.522903354029371, "learning_rate": 1.8633866643836715e-07, "loss": 0.1824, "step": 41673 }, { "epoch": 0.7243998678927149, "grad_norm": 1.7021156186980708, "learning_rate": 1.8631674537362712e-07, "loss": 0.2174, "step": 41674 }, { "epoch": 0.7244172504302178, "grad_norm": 1.8318238088151695, "learning_rate": 1.862948253031189e-07, "loss": 0.3877, "step": 41675 }, { "epoch": 0.7244346329677206, "grad_norm": 1.226526257128267, "learning_rate": 1.8627290622691205e-07, "loss": 0.2478, "step": 41676 }, { "epoch": 0.7244520155052234, "grad_norm": 1.6283385270324193, "learning_rate": 1.8625098814507573e-07, "loss": 0.168, "step": 41677 }, { "epoch": 0.7244693980427263, "grad_norm": 1.0329516515730095, "learning_rate": 1.8622907105767989e-07, "loss": 0.2229, "step": 41678 }, { "epoch": 0.7244867805802291, "grad_norm": 1.478428939558862, "learning_rate": 1.862071549647939e-07, "loss": 0.1668, "step": 41679 }, { "epoch": 0.7245041631177319, "grad_norm": 1.686690087194724, "learning_rate": 1.8618523986648672e-07, "loss": 0.1973, "step": 41680 }, { "epoch": 0.7245215456552347, "grad_norm": 1.1630246293114885, "learning_rate": 1.8616332576282833e-07, "loss": 0.1386, "step": 41681 }, { "epoch": 0.7245389281927376, "grad_norm": 0.991130310951484, "learning_rate": 1.8614141265388806e-07, "loss": 0.1432, "step": 41682 }, { "epoch": 0.7245563107302404, "grad_norm": 8.267321408623172, "learning_rate": 1.8611950053973536e-07, "loss": 0.4045, "step": 41683 }, { "epoch": 0.7245736932677432, "grad_norm": 2.2291013898542626, "learning_rate": 1.8609758942043962e-07, "loss": 0.278, "step": 41684 }, { "epoch": 0.7245910758052461, "grad_norm": 1.654758403691761, "learning_rate": 1.8607567929607037e-07, "loss": 0.1589, "step": 41685 }, { "epoch": 0.7246084583427489, "grad_norm": 1.8774234958184781, "learning_rate": 1.8605377016669698e-07, "loss": 0.2501, "step": 41686 }, { "epoch": 0.7246258408802517, "grad_norm": 0.8592207580806851, "learning_rate": 1.860318620323889e-07, "loss": 0.2764, "step": 41687 }, { "epoch": 0.7246432234177546, "grad_norm": 2.0099775441139487, "learning_rate": 1.8600995489321546e-07, "loss": 0.1998, "step": 41688 }, { "epoch": 0.7246606059552574, "grad_norm": 1.0877871636378662, "learning_rate": 1.859880487492464e-07, "loss": 0.2704, "step": 41689 }, { "epoch": 0.7246779884927602, "grad_norm": 2.0611084957772303, "learning_rate": 1.8596614360055097e-07, "loss": 0.1877, "step": 41690 }, { "epoch": 0.724695371030263, "grad_norm": 1.1137728288638147, "learning_rate": 1.8594423944719878e-07, "loss": 0.1864, "step": 41691 }, { "epoch": 0.7247127535677659, "grad_norm": 1.7021727445122916, "learning_rate": 1.8592233628925875e-07, "loss": 0.1955, "step": 41692 }, { "epoch": 0.7247301361052686, "grad_norm": 2.344976652627126, "learning_rate": 1.8590043412680078e-07, "loss": 0.1716, "step": 41693 }, { "epoch": 0.7247475186427714, "grad_norm": 1.2377723957811617, "learning_rate": 1.858785329598942e-07, "loss": 0.2847, "step": 41694 }, { "epoch": 0.7247649011802743, "grad_norm": 1.1891014035193035, "learning_rate": 1.8585663278860836e-07, "loss": 0.2057, "step": 41695 }, { "epoch": 0.7247822837177771, "grad_norm": 0.9033902079469943, "learning_rate": 1.858347336130125e-07, "loss": 0.2507, "step": 41696 }, { "epoch": 0.7247996662552799, "grad_norm": 1.5565259646794625, "learning_rate": 1.8581283543317655e-07, "loss": 0.2993, "step": 41697 }, { "epoch": 0.7248170487927827, "grad_norm": 1.1315370085255108, "learning_rate": 1.8579093824916942e-07, "loss": 0.2004, "step": 41698 }, { "epoch": 0.7248344313302856, "grad_norm": 1.1420367257506834, "learning_rate": 1.857690420610607e-07, "loss": 0.1685, "step": 41699 }, { "epoch": 0.7248518138677884, "grad_norm": 1.2290977815492332, "learning_rate": 1.8574714686891957e-07, "loss": 0.1632, "step": 41700 }, { "epoch": 0.7248691964052912, "grad_norm": 1.334639882290989, "learning_rate": 1.8572525267281586e-07, "loss": 0.215, "step": 41701 }, { "epoch": 0.7248865789427941, "grad_norm": 2.312012056680486, "learning_rate": 1.8570335947281867e-07, "loss": 0.5563, "step": 41702 }, { "epoch": 0.7249039614802969, "grad_norm": 2.4291013633883747, "learning_rate": 1.8568146726899747e-07, "loss": 0.2736, "step": 41703 }, { "epoch": 0.7249213440177997, "grad_norm": 1.4832321972047016, "learning_rate": 1.8565957606142162e-07, "loss": 0.2998, "step": 41704 }, { "epoch": 0.7249387265553026, "grad_norm": 1.659450098668055, "learning_rate": 1.8563768585016054e-07, "loss": 0.2644, "step": 41705 }, { "epoch": 0.7249561090928054, "grad_norm": 1.4885977091936717, "learning_rate": 1.856157966352836e-07, "loss": 0.1937, "step": 41706 }, { "epoch": 0.7249734916303082, "grad_norm": 5.45672124317442, "learning_rate": 1.8559390841686017e-07, "loss": 0.1878, "step": 41707 }, { "epoch": 0.724990874167811, "grad_norm": 1.7167009417436665, "learning_rate": 1.8557202119495942e-07, "loss": 0.3359, "step": 41708 }, { "epoch": 0.7250082567053139, "grad_norm": 1.1491468184251707, "learning_rate": 1.855501349696511e-07, "loss": 0.1834, "step": 41709 }, { "epoch": 0.7250256392428167, "grad_norm": 1.1308798190204854, "learning_rate": 1.8552824974100461e-07, "loss": 0.2105, "step": 41710 }, { "epoch": 0.7250430217803195, "grad_norm": 2.2617293010091903, "learning_rate": 1.8550636550908877e-07, "loss": 0.1612, "step": 41711 }, { "epoch": 0.7250604043178224, "grad_norm": 2.1304802937360883, "learning_rate": 1.8548448227397339e-07, "loss": 0.2127, "step": 41712 }, { "epoch": 0.7250777868553251, "grad_norm": 1.276825648716866, "learning_rate": 1.8546260003572777e-07, "loss": 0.3015, "step": 41713 }, { "epoch": 0.7250951693928279, "grad_norm": 1.6490096244299652, "learning_rate": 1.8544071879442125e-07, "loss": 0.1453, "step": 41714 }, { "epoch": 0.7251125519303308, "grad_norm": 1.6709259379284083, "learning_rate": 1.8541883855012315e-07, "loss": 0.2214, "step": 41715 }, { "epoch": 0.7251299344678336, "grad_norm": 0.9869008769025474, "learning_rate": 1.8539695930290278e-07, "loss": 0.1101, "step": 41716 }, { "epoch": 0.7251473170053364, "grad_norm": 1.4803515697559557, "learning_rate": 1.8537508105282956e-07, "loss": 0.233, "step": 41717 }, { "epoch": 0.7251646995428392, "grad_norm": 1.3542841694628875, "learning_rate": 1.8535320379997283e-07, "loss": 0.227, "step": 41718 }, { "epoch": 0.7251820820803421, "grad_norm": 1.1735517203937944, "learning_rate": 1.8533132754440168e-07, "loss": 0.3229, "step": 41719 }, { "epoch": 0.7251994646178449, "grad_norm": 1.117915146567213, "learning_rate": 1.853094522861859e-07, "loss": 0.2512, "step": 41720 }, { "epoch": 0.7252168471553477, "grad_norm": 1.4925381655796286, "learning_rate": 1.8528757802539451e-07, "loss": 0.239, "step": 41721 }, { "epoch": 0.7252342296928506, "grad_norm": 0.9890092233747254, "learning_rate": 1.85265704762097e-07, "loss": 0.295, "step": 41722 }, { "epoch": 0.7252516122303534, "grad_norm": 1.3128532259117773, "learning_rate": 1.8524383249636267e-07, "loss": 0.1896, "step": 41723 }, { "epoch": 0.7252689947678562, "grad_norm": 1.5631108928828295, "learning_rate": 1.8522196122826072e-07, "loss": 0.2555, "step": 41724 }, { "epoch": 0.725286377305359, "grad_norm": 2.3272565720633964, "learning_rate": 1.852000909578606e-07, "loss": 0.1814, "step": 41725 }, { "epoch": 0.7253037598428619, "grad_norm": 1.606723479904052, "learning_rate": 1.8517822168523155e-07, "loss": 0.2528, "step": 41726 }, { "epoch": 0.7253211423803647, "grad_norm": 3.1293515470016153, "learning_rate": 1.8515635341044284e-07, "loss": 0.2804, "step": 41727 }, { "epoch": 0.7253385249178675, "grad_norm": 1.055306904637726, "learning_rate": 1.8513448613356393e-07, "loss": 0.3724, "step": 41728 }, { "epoch": 0.7253559074553704, "grad_norm": 2.2776879324742056, "learning_rate": 1.8511261985466426e-07, "loss": 0.1431, "step": 41729 }, { "epoch": 0.7253732899928732, "grad_norm": 1.7595633925858836, "learning_rate": 1.8509075457381263e-07, "loss": 0.2373, "step": 41730 }, { "epoch": 0.725390672530376, "grad_norm": 3.1929124169989587, "learning_rate": 1.850688902910788e-07, "loss": 0.3141, "step": 41731 }, { "epoch": 0.7254080550678788, "grad_norm": 1.299287772953366, "learning_rate": 1.8504702700653192e-07, "loss": 0.3276, "step": 41732 }, { "epoch": 0.7254254376053816, "grad_norm": 0.9011238501972695, "learning_rate": 1.8502516472024126e-07, "loss": 0.3899, "step": 41733 }, { "epoch": 0.7254428201428844, "grad_norm": 1.222368837250096, "learning_rate": 1.8500330343227616e-07, "loss": 0.173, "step": 41734 }, { "epoch": 0.7254602026803872, "grad_norm": 1.1463193808024954, "learning_rate": 1.8498144314270591e-07, "loss": 0.1568, "step": 41735 }, { "epoch": 0.7254775852178901, "grad_norm": 2.460115758112294, "learning_rate": 1.8495958385159976e-07, "loss": 0.238, "step": 41736 }, { "epoch": 0.7254949677553929, "grad_norm": 2.807106147676368, "learning_rate": 1.8493772555902705e-07, "loss": 0.3206, "step": 41737 }, { "epoch": 0.7255123502928957, "grad_norm": 2.4704369857056165, "learning_rate": 1.849158682650568e-07, "loss": 0.2166, "step": 41738 }, { "epoch": 0.7255297328303986, "grad_norm": 2.8722011841206054, "learning_rate": 1.8489401196975868e-07, "loss": 0.2327, "step": 41739 }, { "epoch": 0.7255471153679014, "grad_norm": 1.7591188107056996, "learning_rate": 1.8487215667320183e-07, "loss": 0.2893, "step": 41740 }, { "epoch": 0.7255644979054042, "grad_norm": 1.5149205536133272, "learning_rate": 1.8485030237545545e-07, "loss": 0.1534, "step": 41741 }, { "epoch": 0.7255818804429071, "grad_norm": 1.0797333765641113, "learning_rate": 1.8482844907658883e-07, "loss": 0.2506, "step": 41742 }, { "epoch": 0.7255992629804099, "grad_norm": 1.8929437829940106, "learning_rate": 1.848065967766713e-07, "loss": 0.338, "step": 41743 }, { "epoch": 0.7256166455179127, "grad_norm": 1.34628326305734, "learning_rate": 1.8478474547577205e-07, "loss": 0.2685, "step": 41744 }, { "epoch": 0.7256340280554155, "grad_norm": 1.8928830142899185, "learning_rate": 1.8476289517396039e-07, "loss": 0.2114, "step": 41745 }, { "epoch": 0.7256514105929184, "grad_norm": 1.7475225285240186, "learning_rate": 1.847410458713053e-07, "loss": 0.2683, "step": 41746 }, { "epoch": 0.7256687931304212, "grad_norm": 1.7983405770776426, "learning_rate": 1.847191975678765e-07, "loss": 0.2051, "step": 41747 }, { "epoch": 0.725686175667924, "grad_norm": 4.795801773762706, "learning_rate": 1.8469735026374317e-07, "loss": 0.3205, "step": 41748 }, { "epoch": 0.7257035582054269, "grad_norm": 1.8256070187131384, "learning_rate": 1.8467550395897426e-07, "loss": 0.1928, "step": 41749 }, { "epoch": 0.7257209407429297, "grad_norm": 1.9453944526456632, "learning_rate": 1.846536586536389e-07, "loss": 0.2163, "step": 41750 }, { "epoch": 0.7257383232804325, "grad_norm": 1.9848763262562998, "learning_rate": 1.8463181434780684e-07, "loss": 0.2921, "step": 41751 }, { "epoch": 0.7257557058179352, "grad_norm": 1.0322058137367283, "learning_rate": 1.8460997104154703e-07, "loss": 0.2557, "step": 41752 }, { "epoch": 0.7257730883554381, "grad_norm": 1.43952780039998, "learning_rate": 1.8458812873492874e-07, "loss": 0.1803, "step": 41753 }, { "epoch": 0.7257904708929409, "grad_norm": 1.3593366355912921, "learning_rate": 1.8456628742802123e-07, "loss": 0.1325, "step": 41754 }, { "epoch": 0.7258078534304437, "grad_norm": 0.9216946235951345, "learning_rate": 1.8454444712089362e-07, "loss": 0.1674, "step": 41755 }, { "epoch": 0.7258252359679466, "grad_norm": 1.0897566003318502, "learning_rate": 1.8452260781361522e-07, "loss": 0.119, "step": 41756 }, { "epoch": 0.7258426185054494, "grad_norm": 2.0035739165810513, "learning_rate": 1.8450076950625526e-07, "loss": 0.1652, "step": 41757 }, { "epoch": 0.7258600010429522, "grad_norm": 1.331365744978685, "learning_rate": 1.8447893219888277e-07, "loss": 0.3095, "step": 41758 }, { "epoch": 0.7258773835804551, "grad_norm": 1.01058171325185, "learning_rate": 1.8445709589156726e-07, "loss": 0.2453, "step": 41759 }, { "epoch": 0.7258947661179579, "grad_norm": 1.3203395964994467, "learning_rate": 1.844352605843778e-07, "loss": 0.1783, "step": 41760 }, { "epoch": 0.7259121486554607, "grad_norm": 1.1772966052467884, "learning_rate": 1.8441342627738366e-07, "loss": 0.1059, "step": 41761 }, { "epoch": 0.7259295311929636, "grad_norm": 0.9586867663483903, "learning_rate": 1.8439159297065394e-07, "loss": 0.184, "step": 41762 }, { "epoch": 0.7259469137304664, "grad_norm": 1.2514669736898207, "learning_rate": 1.843697606642579e-07, "loss": 0.2574, "step": 41763 }, { "epoch": 0.7259642962679692, "grad_norm": 1.7312956291785835, "learning_rate": 1.8434792935826477e-07, "loss": 0.2559, "step": 41764 }, { "epoch": 0.725981678805472, "grad_norm": 1.1411830670016643, "learning_rate": 1.8432609905274365e-07, "loss": 0.2167, "step": 41765 }, { "epoch": 0.7259990613429749, "grad_norm": 1.2120121873728844, "learning_rate": 1.843042697477637e-07, "loss": 0.2902, "step": 41766 }, { "epoch": 0.7260164438804777, "grad_norm": 1.3339272561675188, "learning_rate": 1.8428244144339445e-07, "loss": 0.228, "step": 41767 }, { "epoch": 0.7260338264179805, "grad_norm": 0.9369550622563081, "learning_rate": 1.8426061413970468e-07, "loss": 0.3908, "step": 41768 }, { "epoch": 0.7260512089554834, "grad_norm": 2.8450234173598927, "learning_rate": 1.842387878367636e-07, "loss": 0.2447, "step": 41769 }, { "epoch": 0.7260685914929862, "grad_norm": 1.7536047620221182, "learning_rate": 1.8421696253464064e-07, "loss": 0.3653, "step": 41770 }, { "epoch": 0.726085974030489, "grad_norm": 0.6250908409090014, "learning_rate": 1.8419513823340487e-07, "loss": 0.2045, "step": 41771 }, { "epoch": 0.7261033565679917, "grad_norm": 1.3642892717214412, "learning_rate": 1.8417331493312545e-07, "loss": 0.2977, "step": 41772 }, { "epoch": 0.7261207391054946, "grad_norm": 2.6240022002905343, "learning_rate": 1.8415149263387147e-07, "loss": 0.1867, "step": 41773 }, { "epoch": 0.7261381216429974, "grad_norm": 1.1432226645897747, "learning_rate": 1.841296713357123e-07, "loss": 0.2856, "step": 41774 }, { "epoch": 0.7261555041805002, "grad_norm": 2.2169987029593394, "learning_rate": 1.8410785103871686e-07, "loss": 0.276, "step": 41775 }, { "epoch": 0.7261728867180031, "grad_norm": 1.2274258480313072, "learning_rate": 1.8408603174295444e-07, "loss": 0.1879, "step": 41776 }, { "epoch": 0.7261902692555059, "grad_norm": 0.9516602172031016, "learning_rate": 1.8406421344849405e-07, "loss": 0.2709, "step": 41777 }, { "epoch": 0.7262076517930087, "grad_norm": 1.5841416202191765, "learning_rate": 1.8404239615540512e-07, "loss": 0.2808, "step": 41778 }, { "epoch": 0.7262250343305116, "grad_norm": 1.4969199021751471, "learning_rate": 1.840205798637567e-07, "loss": 0.2451, "step": 41779 }, { "epoch": 0.7262424168680144, "grad_norm": 1.7482320199443966, "learning_rate": 1.8399876457361801e-07, "loss": 0.1317, "step": 41780 }, { "epoch": 0.7262597994055172, "grad_norm": 1.8966633031057425, "learning_rate": 1.8397695028505773e-07, "loss": 0.4012, "step": 41781 }, { "epoch": 0.72627718194302, "grad_norm": 0.999138994430616, "learning_rate": 1.8395513699814553e-07, "loss": 0.1243, "step": 41782 }, { "epoch": 0.7262945644805229, "grad_norm": 1.327149373022105, "learning_rate": 1.8393332471295038e-07, "loss": 0.2428, "step": 41783 }, { "epoch": 0.7263119470180257, "grad_norm": 1.0216660332704144, "learning_rate": 1.839115134295414e-07, "loss": 0.2015, "step": 41784 }, { "epoch": 0.7263293295555285, "grad_norm": 1.3130959332023766, "learning_rate": 1.8388970314798758e-07, "loss": 0.1954, "step": 41785 }, { "epoch": 0.7263467120930314, "grad_norm": 2.2212222973210247, "learning_rate": 1.8386789386835848e-07, "loss": 0.2859, "step": 41786 }, { "epoch": 0.7263640946305342, "grad_norm": 1.1379273934280387, "learning_rate": 1.838460855907228e-07, "loss": 0.2288, "step": 41787 }, { "epoch": 0.726381477168037, "grad_norm": 1.440828954330725, "learning_rate": 1.838242783151498e-07, "loss": 0.1054, "step": 41788 }, { "epoch": 0.7263988597055399, "grad_norm": 1.5878118823271672, "learning_rate": 1.838024720417084e-07, "loss": 0.3469, "step": 41789 }, { "epoch": 0.7264162422430427, "grad_norm": 2.1509600178134454, "learning_rate": 1.837806667704681e-07, "loss": 0.2174, "step": 41790 }, { "epoch": 0.7264336247805455, "grad_norm": 1.6689968032091127, "learning_rate": 1.8375886250149785e-07, "loss": 0.1757, "step": 41791 }, { "epoch": 0.7264510073180482, "grad_norm": 1.68844339569597, "learning_rate": 1.837370592348667e-07, "loss": 0.2435, "step": 41792 }, { "epoch": 0.7264683898555511, "grad_norm": 1.432461540004432, "learning_rate": 1.8371525697064383e-07, "loss": 0.1928, "step": 41793 }, { "epoch": 0.7264857723930539, "grad_norm": 1.3888013002183914, "learning_rate": 1.8369345570889827e-07, "loss": 0.158, "step": 41794 }, { "epoch": 0.7265031549305567, "grad_norm": 1.55136318180395, "learning_rate": 1.8367165544969916e-07, "loss": 0.2215, "step": 41795 }, { "epoch": 0.7265205374680596, "grad_norm": 2.558357639652035, "learning_rate": 1.8364985619311564e-07, "loss": 0.2443, "step": 41796 }, { "epoch": 0.7265379200055624, "grad_norm": 1.5276910210489156, "learning_rate": 1.8362805793921655e-07, "loss": 0.154, "step": 41797 }, { "epoch": 0.7265553025430652, "grad_norm": 1.9320675736014863, "learning_rate": 1.8360626068807133e-07, "loss": 0.1396, "step": 41798 }, { "epoch": 0.726572685080568, "grad_norm": 1.6699777262376512, "learning_rate": 1.8358446443974912e-07, "loss": 0.1703, "step": 41799 }, { "epoch": 0.7265900676180709, "grad_norm": 1.8802349228349222, "learning_rate": 1.8356266919431844e-07, "loss": 0.2445, "step": 41800 }, { "epoch": 0.7266074501555737, "grad_norm": 2.0707474270033686, "learning_rate": 1.83540874951849e-07, "loss": 0.3061, "step": 41801 }, { "epoch": 0.7266248326930765, "grad_norm": 1.9921630071156013, "learning_rate": 1.8351908171240955e-07, "loss": 0.3212, "step": 41802 }, { "epoch": 0.7266422152305794, "grad_norm": 2.2196743709344338, "learning_rate": 1.8349728947606924e-07, "loss": 0.6641, "step": 41803 }, { "epoch": 0.7266595977680822, "grad_norm": 1.572016844736347, "learning_rate": 1.834754982428971e-07, "loss": 0.2164, "step": 41804 }, { "epoch": 0.726676980305585, "grad_norm": 3.333011819286715, "learning_rate": 1.8345370801296227e-07, "loss": 0.2783, "step": 41805 }, { "epoch": 0.7266943628430879, "grad_norm": 1.7901774574645932, "learning_rate": 1.834319187863338e-07, "loss": 0.2368, "step": 41806 }, { "epoch": 0.7267117453805907, "grad_norm": 1.1721754570585667, "learning_rate": 1.8341013056308068e-07, "loss": 0.258, "step": 41807 }, { "epoch": 0.7267291279180935, "grad_norm": 1.1082964311640413, "learning_rate": 1.8338834334327186e-07, "loss": 0.2974, "step": 41808 }, { "epoch": 0.7267465104555964, "grad_norm": 2.2281656544637523, "learning_rate": 1.8336655712697673e-07, "loss": 0.3711, "step": 41809 }, { "epoch": 0.7267638929930992, "grad_norm": 1.4870812400432236, "learning_rate": 1.8334477191426417e-07, "loss": 0.1853, "step": 41810 }, { "epoch": 0.726781275530602, "grad_norm": 1.3819460905189112, "learning_rate": 1.8332298770520338e-07, "loss": 0.1483, "step": 41811 }, { "epoch": 0.7267986580681047, "grad_norm": 1.4491607260337196, "learning_rate": 1.8330120449986292e-07, "loss": 0.3942, "step": 41812 }, { "epoch": 0.7268160406056076, "grad_norm": 1.7817500524284016, "learning_rate": 1.8327942229831228e-07, "loss": 0.1367, "step": 41813 }, { "epoch": 0.7268334231431104, "grad_norm": 1.6823746082337017, "learning_rate": 1.8325764110062048e-07, "loss": 0.1931, "step": 41814 }, { "epoch": 0.7268508056806132, "grad_norm": 1.3187714628685019, "learning_rate": 1.8323586090685638e-07, "loss": 0.2914, "step": 41815 }, { "epoch": 0.726868188218116, "grad_norm": 1.118351699269404, "learning_rate": 1.8321408171708895e-07, "loss": 0.2387, "step": 41816 }, { "epoch": 0.7268855707556189, "grad_norm": 2.0540475640308027, "learning_rate": 1.831923035313877e-07, "loss": 0.242, "step": 41817 }, { "epoch": 0.7269029532931217, "grad_norm": 1.5951563029464235, "learning_rate": 1.831705263498211e-07, "loss": 0.2629, "step": 41818 }, { "epoch": 0.7269203358306245, "grad_norm": 3.269953137653736, "learning_rate": 1.8314875017245846e-07, "loss": 0.3109, "step": 41819 }, { "epoch": 0.7269377183681274, "grad_norm": 0.9540288169373005, "learning_rate": 1.8312697499936857e-07, "loss": 0.189, "step": 41820 }, { "epoch": 0.7269551009056302, "grad_norm": 1.6299871669042458, "learning_rate": 1.8310520083062077e-07, "loss": 0.2223, "step": 41821 }, { "epoch": 0.726972483443133, "grad_norm": 1.4756702401394317, "learning_rate": 1.8308342766628388e-07, "loss": 0.2572, "step": 41822 }, { "epoch": 0.7269898659806359, "grad_norm": 1.6213448577141485, "learning_rate": 1.8306165550642694e-07, "loss": 0.2022, "step": 41823 }, { "epoch": 0.7270072485181387, "grad_norm": 2.1845854410357393, "learning_rate": 1.83039884351119e-07, "loss": 0.2026, "step": 41824 }, { "epoch": 0.7270246310556415, "grad_norm": 1.9458318611435053, "learning_rate": 1.83018114200429e-07, "loss": 0.2414, "step": 41825 }, { "epoch": 0.7270420135931444, "grad_norm": 4.338786636851773, "learning_rate": 1.8299634505442596e-07, "loss": 0.3728, "step": 41826 }, { "epoch": 0.7270593961306472, "grad_norm": 1.3082673865858585, "learning_rate": 1.8297457691317874e-07, "loss": 0.1647, "step": 41827 }, { "epoch": 0.72707677866815, "grad_norm": 1.9668923729794743, "learning_rate": 1.8295280977675664e-07, "loss": 0.2119, "step": 41828 }, { "epoch": 0.7270941612056528, "grad_norm": 1.6529580983208065, "learning_rate": 1.829310436452285e-07, "loss": 0.188, "step": 41829 }, { "epoch": 0.7271115437431557, "grad_norm": 1.2740595169653528, "learning_rate": 1.8290927851866344e-07, "loss": 0.2555, "step": 41830 }, { "epoch": 0.7271289262806585, "grad_norm": 1.1905434572066436, "learning_rate": 1.8288751439713002e-07, "loss": 0.1948, "step": 41831 }, { "epoch": 0.7271463088181612, "grad_norm": 1.711924390813192, "learning_rate": 1.8286575128069765e-07, "loss": 0.4049, "step": 41832 }, { "epoch": 0.727163691355664, "grad_norm": 1.4740387303126008, "learning_rate": 1.8284398916943516e-07, "loss": 0.2136, "step": 41833 }, { "epoch": 0.7271810738931669, "grad_norm": 2.0617300472271434, "learning_rate": 1.828222280634116e-07, "loss": 0.2361, "step": 41834 }, { "epoch": 0.7271984564306697, "grad_norm": 1.8143129083329201, "learning_rate": 1.8280046796269566e-07, "loss": 0.2692, "step": 41835 }, { "epoch": 0.7272158389681725, "grad_norm": 1.4015808463585582, "learning_rate": 1.8277870886735687e-07, "loss": 0.2205, "step": 41836 }, { "epoch": 0.7272332215056754, "grad_norm": 2.227313140751681, "learning_rate": 1.8275695077746367e-07, "loss": 0.2784, "step": 41837 }, { "epoch": 0.7272506040431782, "grad_norm": 0.7905789917485442, "learning_rate": 1.827351936930852e-07, "loss": 0.3389, "step": 41838 }, { "epoch": 0.727267986580681, "grad_norm": 1.0009766623809018, "learning_rate": 1.8271343761429026e-07, "loss": 0.239, "step": 41839 }, { "epoch": 0.7272853691181839, "grad_norm": 1.4447360647791672, "learning_rate": 1.8269168254114814e-07, "loss": 0.2236, "step": 41840 }, { "epoch": 0.7273027516556867, "grad_norm": 2.0919192172741785, "learning_rate": 1.8266992847372764e-07, "loss": 0.2459, "step": 41841 }, { "epoch": 0.7273201341931895, "grad_norm": 1.4782310395209968, "learning_rate": 1.826481754120977e-07, "loss": 0.2394, "step": 41842 }, { "epoch": 0.7273375167306924, "grad_norm": 1.2929964623781725, "learning_rate": 1.8262642335632726e-07, "loss": 0.1441, "step": 41843 }, { "epoch": 0.7273548992681952, "grad_norm": 1.4085062083029163, "learning_rate": 1.826046723064853e-07, "loss": 0.3089, "step": 41844 }, { "epoch": 0.727372281805698, "grad_norm": 1.6934975557727536, "learning_rate": 1.8258292226264072e-07, "loss": 0.2209, "step": 41845 }, { "epoch": 0.7273896643432008, "grad_norm": 0.6885928696173221, "learning_rate": 1.8256117322486243e-07, "loss": 0.1422, "step": 41846 }, { "epoch": 0.7274070468807037, "grad_norm": 1.2788419126299218, "learning_rate": 1.8253942519321925e-07, "loss": 0.2498, "step": 41847 }, { "epoch": 0.7274244294182065, "grad_norm": 1.815285479725115, "learning_rate": 1.825176781677804e-07, "loss": 0.2496, "step": 41848 }, { "epoch": 0.7274418119557093, "grad_norm": 1.3863760957721685, "learning_rate": 1.824959321486149e-07, "loss": 0.1724, "step": 41849 }, { "epoch": 0.7274591944932122, "grad_norm": 1.7698451544450575, "learning_rate": 1.824741871357911e-07, "loss": 0.2983, "step": 41850 }, { "epoch": 0.727476577030715, "grad_norm": 1.3291483265276751, "learning_rate": 1.824524431293784e-07, "loss": 0.1423, "step": 41851 }, { "epoch": 0.7274939595682177, "grad_norm": 0.9837186284697653, "learning_rate": 1.824307001294456e-07, "loss": 0.1813, "step": 41852 }, { "epoch": 0.7275113421057205, "grad_norm": 1.309478745660052, "learning_rate": 1.824089581360616e-07, "loss": 0.2064, "step": 41853 }, { "epoch": 0.7275287246432234, "grad_norm": 2.108071083050874, "learning_rate": 1.8238721714929532e-07, "loss": 0.3289, "step": 41854 }, { "epoch": 0.7275461071807262, "grad_norm": 1.4655346532508962, "learning_rate": 1.8236547716921568e-07, "loss": 0.1447, "step": 41855 }, { "epoch": 0.727563489718229, "grad_norm": 0.8519957793828964, "learning_rate": 1.8234373819589155e-07, "loss": 0.1961, "step": 41856 }, { "epoch": 0.7275808722557319, "grad_norm": 1.6114703972710918, "learning_rate": 1.823220002293918e-07, "loss": 0.14, "step": 41857 }, { "epoch": 0.7275982547932347, "grad_norm": 0.9817744635291376, "learning_rate": 1.8230026326978525e-07, "loss": 0.114, "step": 41858 }, { "epoch": 0.7276156373307375, "grad_norm": 1.7500145169894377, "learning_rate": 1.8227852731714112e-07, "loss": 0.1871, "step": 41859 }, { "epoch": 0.7276330198682404, "grad_norm": 1.4653705137273325, "learning_rate": 1.8225679237152813e-07, "loss": 0.2299, "step": 41860 }, { "epoch": 0.7276504024057432, "grad_norm": 1.3043797055266637, "learning_rate": 1.8223505843301506e-07, "loss": 0.214, "step": 41861 }, { "epoch": 0.727667784943246, "grad_norm": 3.5278334075311686, "learning_rate": 1.8221332550167097e-07, "loss": 0.2416, "step": 41862 }, { "epoch": 0.7276851674807489, "grad_norm": 0.982107264110723, "learning_rate": 1.8219159357756464e-07, "loss": 0.1278, "step": 41863 }, { "epoch": 0.7277025500182517, "grad_norm": 1.0313041357251853, "learning_rate": 1.8216986266076495e-07, "loss": 0.2882, "step": 41864 }, { "epoch": 0.7277199325557545, "grad_norm": 1.4294463694118376, "learning_rate": 1.8214813275134078e-07, "loss": 0.2306, "step": 41865 }, { "epoch": 0.7277373150932573, "grad_norm": 1.7777649022276263, "learning_rate": 1.821264038493609e-07, "loss": 0.3478, "step": 41866 }, { "epoch": 0.7277546976307602, "grad_norm": 1.3177678455397737, "learning_rate": 1.8210467595489443e-07, "loss": 0.1363, "step": 41867 }, { "epoch": 0.727772080168263, "grad_norm": 2.044849101997643, "learning_rate": 1.8208294906801032e-07, "loss": 0.2706, "step": 41868 }, { "epoch": 0.7277894627057658, "grad_norm": 1.3607125593517992, "learning_rate": 1.8206122318877698e-07, "loss": 0.3344, "step": 41869 }, { "epoch": 0.7278068452432687, "grad_norm": 1.3523920541709098, "learning_rate": 1.820394983172634e-07, "loss": 0.2189, "step": 41870 }, { "epoch": 0.7278242277807714, "grad_norm": 2.1420412134132714, "learning_rate": 1.8201777445353872e-07, "loss": 0.2765, "step": 41871 }, { "epoch": 0.7278416103182742, "grad_norm": 1.249878154729419, "learning_rate": 1.8199605159767157e-07, "loss": 0.2476, "step": 41872 }, { "epoch": 0.727858992855777, "grad_norm": 2.4271804689847936, "learning_rate": 1.8197432974973093e-07, "loss": 0.2184, "step": 41873 }, { "epoch": 0.7278763753932799, "grad_norm": 1.8177630070048179, "learning_rate": 1.8195260890978554e-07, "loss": 0.3195, "step": 41874 }, { "epoch": 0.7278937579307827, "grad_norm": 1.0144870188511175, "learning_rate": 1.8193088907790428e-07, "loss": 0.4451, "step": 41875 }, { "epoch": 0.7279111404682855, "grad_norm": 1.9094171240152598, "learning_rate": 1.8190917025415597e-07, "loss": 0.1503, "step": 41876 }, { "epoch": 0.7279285230057884, "grad_norm": 1.50677146142176, "learning_rate": 1.8188745243860953e-07, "loss": 0.4052, "step": 41877 }, { "epoch": 0.7279459055432912, "grad_norm": 2.332888625945587, "learning_rate": 1.8186573563133356e-07, "loss": 0.2856, "step": 41878 }, { "epoch": 0.727963288080794, "grad_norm": 1.1742435672248797, "learning_rate": 1.8184401983239716e-07, "loss": 0.2533, "step": 41879 }, { "epoch": 0.7279806706182969, "grad_norm": 1.8975729236223833, "learning_rate": 1.8182230504186912e-07, "loss": 0.2637, "step": 41880 }, { "epoch": 0.7279980531557997, "grad_norm": 2.6085586464676465, "learning_rate": 1.8180059125981822e-07, "loss": 0.4069, "step": 41881 }, { "epoch": 0.7280154356933025, "grad_norm": 1.3495981055663928, "learning_rate": 1.8177887848631328e-07, "loss": 0.1203, "step": 41882 }, { "epoch": 0.7280328182308053, "grad_norm": 1.3773153523408341, "learning_rate": 1.8175716672142317e-07, "loss": 0.2645, "step": 41883 }, { "epoch": 0.7280502007683082, "grad_norm": 1.7266321481667093, "learning_rate": 1.8173545596521656e-07, "loss": 0.1923, "step": 41884 }, { "epoch": 0.728067583305811, "grad_norm": 1.5483058720435108, "learning_rate": 1.8171374621776242e-07, "loss": 0.4273, "step": 41885 }, { "epoch": 0.7280849658433138, "grad_norm": 3.260092311122479, "learning_rate": 1.8169203747912925e-07, "loss": 0.1999, "step": 41886 }, { "epoch": 0.7281023483808167, "grad_norm": 1.7703963004921952, "learning_rate": 1.816703297493865e-07, "loss": 0.2117, "step": 41887 }, { "epoch": 0.7281197309183195, "grad_norm": 1.0109914786716263, "learning_rate": 1.816486230286024e-07, "loss": 0.1678, "step": 41888 }, { "epoch": 0.7281371134558223, "grad_norm": 0.854896414845625, "learning_rate": 1.8162691731684576e-07, "loss": 0.1098, "step": 41889 }, { "epoch": 0.7281544959933252, "grad_norm": 2.6362522826017316, "learning_rate": 1.816052126141857e-07, "loss": 0.287, "step": 41890 }, { "epoch": 0.7281718785308279, "grad_norm": 0.8502365773113181, "learning_rate": 1.8158350892069086e-07, "loss": 0.1773, "step": 41891 }, { "epoch": 0.7281892610683307, "grad_norm": 1.7646504073244507, "learning_rate": 1.8156180623643004e-07, "loss": 0.1916, "step": 41892 }, { "epoch": 0.7282066436058335, "grad_norm": 1.9532412841746027, "learning_rate": 1.8154010456147202e-07, "loss": 0.3071, "step": 41893 }, { "epoch": 0.7282240261433364, "grad_norm": 3.1097911153091005, "learning_rate": 1.8151840389588552e-07, "loss": 0.4173, "step": 41894 }, { "epoch": 0.7282414086808392, "grad_norm": 1.8045792099170743, "learning_rate": 1.8149670423973945e-07, "loss": 0.1836, "step": 41895 }, { "epoch": 0.728258791218342, "grad_norm": 1.9558889149682774, "learning_rate": 1.8147500559310247e-07, "loss": 0.2271, "step": 41896 }, { "epoch": 0.7282761737558449, "grad_norm": 1.5813163745418803, "learning_rate": 1.814533079560433e-07, "loss": 0.2659, "step": 41897 }, { "epoch": 0.7282935562933477, "grad_norm": 1.1022761968527572, "learning_rate": 1.8143161132863094e-07, "loss": 0.1548, "step": 41898 }, { "epoch": 0.7283109388308505, "grad_norm": 1.4427454087229807, "learning_rate": 1.8140991571093401e-07, "loss": 0.1831, "step": 41899 }, { "epoch": 0.7283283213683533, "grad_norm": 2.585383971614803, "learning_rate": 1.8138822110302154e-07, "loss": 0.2064, "step": 41900 }, { "epoch": 0.7283457039058562, "grad_norm": 4.456914950394664, "learning_rate": 1.8136652750496162e-07, "loss": 0.2961, "step": 41901 }, { "epoch": 0.728363086443359, "grad_norm": 1.2307048410449346, "learning_rate": 1.8134483491682366e-07, "loss": 0.2736, "step": 41902 }, { "epoch": 0.7283804689808618, "grad_norm": 5.161538963537623, "learning_rate": 1.813231433386762e-07, "loss": 0.436, "step": 41903 }, { "epoch": 0.7283978515183647, "grad_norm": 1.1841757876510262, "learning_rate": 1.81301452770588e-07, "loss": 0.1496, "step": 41904 }, { "epoch": 0.7284152340558675, "grad_norm": 1.251441587447481, "learning_rate": 1.8127976321262762e-07, "loss": 0.2754, "step": 41905 }, { "epoch": 0.7284326165933703, "grad_norm": 0.6001914334513695, "learning_rate": 1.8125807466486432e-07, "loss": 0.1844, "step": 41906 }, { "epoch": 0.7284499991308732, "grad_norm": 1.4235406302286997, "learning_rate": 1.8123638712736628e-07, "loss": 0.186, "step": 41907 }, { "epoch": 0.728467381668376, "grad_norm": 2.2574729141692558, "learning_rate": 1.8121470060020257e-07, "loss": 0.2265, "step": 41908 }, { "epoch": 0.7284847642058788, "grad_norm": 1.9383618783283012, "learning_rate": 1.8119301508344153e-07, "loss": 0.1992, "step": 41909 }, { "epoch": 0.7285021467433817, "grad_norm": 1.631241883355752, "learning_rate": 1.8117133057715246e-07, "loss": 0.2192, "step": 41910 }, { "epoch": 0.7285195292808844, "grad_norm": 2.125837709822187, "learning_rate": 1.8114964708140373e-07, "loss": 0.2225, "step": 41911 }, { "epoch": 0.7285369118183872, "grad_norm": 1.3554259690753376, "learning_rate": 1.8112796459626422e-07, "loss": 0.3187, "step": 41912 }, { "epoch": 0.72855429435589, "grad_norm": 1.1614469480323093, "learning_rate": 1.8110628312180255e-07, "loss": 0.1184, "step": 41913 }, { "epoch": 0.7285716768933929, "grad_norm": 1.489125460802831, "learning_rate": 1.8108460265808745e-07, "loss": 0.1441, "step": 41914 }, { "epoch": 0.7285890594308957, "grad_norm": 1.100939065545573, "learning_rate": 1.8106292320518775e-07, "loss": 0.225, "step": 41915 }, { "epoch": 0.7286064419683985, "grad_norm": 1.88443553129094, "learning_rate": 1.8104124476317179e-07, "loss": 0.3106, "step": 41916 }, { "epoch": 0.7286238245059014, "grad_norm": 1.1798219219222652, "learning_rate": 1.8101956733210881e-07, "loss": 0.2317, "step": 41917 }, { "epoch": 0.7286412070434042, "grad_norm": 2.026577196560053, "learning_rate": 1.8099789091206725e-07, "loss": 0.2458, "step": 41918 }, { "epoch": 0.728658589580907, "grad_norm": 1.4819848912591396, "learning_rate": 1.8097621550311603e-07, "loss": 0.1328, "step": 41919 }, { "epoch": 0.7286759721184098, "grad_norm": 1.2198675489416557, "learning_rate": 1.8095454110532333e-07, "loss": 0.1986, "step": 41920 }, { "epoch": 0.7286933546559127, "grad_norm": 1.24273790012163, "learning_rate": 1.8093286771875827e-07, "loss": 0.2669, "step": 41921 }, { "epoch": 0.7287107371934155, "grad_norm": 1.1143307282081194, "learning_rate": 1.8091119534348953e-07, "loss": 0.1507, "step": 41922 }, { "epoch": 0.7287281197309183, "grad_norm": 2.778330484259021, "learning_rate": 1.8088952397958568e-07, "loss": 0.4387, "step": 41923 }, { "epoch": 0.7287455022684212, "grad_norm": 1.6910196624178537, "learning_rate": 1.8086785362711532e-07, "loss": 0.277, "step": 41924 }, { "epoch": 0.728762884805924, "grad_norm": 1.1682942371464415, "learning_rate": 1.8084618428614756e-07, "loss": 0.2234, "step": 41925 }, { "epoch": 0.7287802673434268, "grad_norm": 1.6247617839741202, "learning_rate": 1.8082451595675063e-07, "loss": 0.2382, "step": 41926 }, { "epoch": 0.7287976498809297, "grad_norm": 1.9232520514295837, "learning_rate": 1.808028486389934e-07, "loss": 0.248, "step": 41927 }, { "epoch": 0.7288150324184325, "grad_norm": 1.8931446030247379, "learning_rate": 1.807811823329443e-07, "loss": 0.2713, "step": 41928 }, { "epoch": 0.7288324149559353, "grad_norm": 0.7767173010075719, "learning_rate": 1.8075951703867243e-07, "loss": 0.2388, "step": 41929 }, { "epoch": 0.7288497974934381, "grad_norm": 1.0939437974717534, "learning_rate": 1.8073785275624627e-07, "loss": 0.3827, "step": 41930 }, { "epoch": 0.7288671800309409, "grad_norm": 1.7810395520042561, "learning_rate": 1.8071618948573436e-07, "loss": 0.2001, "step": 41931 }, { "epoch": 0.7288845625684437, "grad_norm": 1.0737469370128678, "learning_rate": 1.8069452722720552e-07, "loss": 0.1645, "step": 41932 }, { "epoch": 0.7289019451059465, "grad_norm": 1.464095668039999, "learning_rate": 1.8067286598072834e-07, "loss": 0.2502, "step": 41933 }, { "epoch": 0.7289193276434494, "grad_norm": 1.2285220246911512, "learning_rate": 1.806512057463715e-07, "loss": 0.2397, "step": 41934 }, { "epoch": 0.7289367101809522, "grad_norm": 1.073499262393589, "learning_rate": 1.8062954652420364e-07, "loss": 0.2939, "step": 41935 }, { "epoch": 0.728954092718455, "grad_norm": 2.1859946588323473, "learning_rate": 1.8060788831429323e-07, "loss": 0.1793, "step": 41936 }, { "epoch": 0.7289714752559578, "grad_norm": 2.2536370949426203, "learning_rate": 1.8058623111670946e-07, "loss": 0.3137, "step": 41937 }, { "epoch": 0.7289888577934607, "grad_norm": 1.3265817096687966, "learning_rate": 1.8056457493152045e-07, "loss": 0.1515, "step": 41938 }, { "epoch": 0.7290062403309635, "grad_norm": 0.9499822982902119, "learning_rate": 1.8054291975879477e-07, "loss": 0.2227, "step": 41939 }, { "epoch": 0.7290236228684663, "grad_norm": 2.1702528607070284, "learning_rate": 1.8052126559860148e-07, "loss": 0.4858, "step": 41940 }, { "epoch": 0.7290410054059692, "grad_norm": 2.5883949619198248, "learning_rate": 1.8049961245100902e-07, "loss": 0.2714, "step": 41941 }, { "epoch": 0.729058387943472, "grad_norm": 2.367072422773876, "learning_rate": 1.804779603160861e-07, "loss": 0.2874, "step": 41942 }, { "epoch": 0.7290757704809748, "grad_norm": 1.6040943239113694, "learning_rate": 1.8045630919390121e-07, "loss": 0.1743, "step": 41943 }, { "epoch": 0.7290931530184777, "grad_norm": 1.1204928958037814, "learning_rate": 1.8043465908452298e-07, "loss": 0.1568, "step": 41944 }, { "epoch": 0.7291105355559805, "grad_norm": 1.4438595694371887, "learning_rate": 1.8041300998802017e-07, "loss": 0.1932, "step": 41945 }, { "epoch": 0.7291279180934833, "grad_norm": 1.272776238364885, "learning_rate": 1.8039136190446125e-07, "loss": 0.1219, "step": 41946 }, { "epoch": 0.7291453006309861, "grad_norm": 2.1448573787669685, "learning_rate": 1.8036971483391478e-07, "loss": 0.1939, "step": 41947 }, { "epoch": 0.729162683168489, "grad_norm": 1.0127142276717769, "learning_rate": 1.803480687764496e-07, "loss": 0.2643, "step": 41948 }, { "epoch": 0.7291800657059918, "grad_norm": 1.1372625323408416, "learning_rate": 1.8032642373213425e-07, "loss": 0.1578, "step": 41949 }, { "epoch": 0.7291974482434946, "grad_norm": 1.1748680411957975, "learning_rate": 1.8030477970103747e-07, "loss": 0.2267, "step": 41950 }, { "epoch": 0.7292148307809974, "grad_norm": 1.5933054682805234, "learning_rate": 1.802831366832273e-07, "loss": 0.201, "step": 41951 }, { "epoch": 0.7292322133185002, "grad_norm": 1.251710818002531, "learning_rate": 1.802614946787729e-07, "loss": 0.1888, "step": 41952 }, { "epoch": 0.729249595856003, "grad_norm": 1.4915764607454212, "learning_rate": 1.8023985368774268e-07, "loss": 0.2558, "step": 41953 }, { "epoch": 0.7292669783935058, "grad_norm": 1.2075753676809022, "learning_rate": 1.802182137102053e-07, "loss": 0.2, "step": 41954 }, { "epoch": 0.7292843609310087, "grad_norm": 2.1491187815553396, "learning_rate": 1.80196574746229e-07, "loss": 0.178, "step": 41955 }, { "epoch": 0.7293017434685115, "grad_norm": 8.300615199862298, "learning_rate": 1.8017493679588308e-07, "loss": 0.432, "step": 41956 }, { "epoch": 0.7293191260060143, "grad_norm": 1.4579010231269276, "learning_rate": 1.8015329985923555e-07, "loss": 0.1533, "step": 41957 }, { "epoch": 0.7293365085435172, "grad_norm": 1.6015285341334877, "learning_rate": 1.801316639363551e-07, "loss": 0.3292, "step": 41958 }, { "epoch": 0.72935389108102, "grad_norm": 1.703711653786935, "learning_rate": 1.8011002902731016e-07, "loss": 0.2305, "step": 41959 }, { "epoch": 0.7293712736185228, "grad_norm": 2.421977187200214, "learning_rate": 1.800883951321696e-07, "loss": 0.4227, "step": 41960 }, { "epoch": 0.7293886561560257, "grad_norm": 2.3465111233744462, "learning_rate": 1.80066762251002e-07, "loss": 0.2459, "step": 41961 }, { "epoch": 0.7294060386935285, "grad_norm": 2.222278532911452, "learning_rate": 1.800451303838757e-07, "loss": 0.2621, "step": 41962 }, { "epoch": 0.7294234212310313, "grad_norm": 2.6161371102590887, "learning_rate": 1.8002349953085938e-07, "loss": 0.1793, "step": 41963 }, { "epoch": 0.7294408037685342, "grad_norm": 2.062715540827162, "learning_rate": 1.8000186969202158e-07, "loss": 0.1187, "step": 41964 }, { "epoch": 0.729458186306037, "grad_norm": 1.221948639530986, "learning_rate": 1.7998024086743085e-07, "loss": 0.1988, "step": 41965 }, { "epoch": 0.7294755688435398, "grad_norm": 2.7105820285623765, "learning_rate": 1.7995861305715577e-07, "loss": 0.175, "step": 41966 }, { "epoch": 0.7294929513810426, "grad_norm": 1.2769628898675964, "learning_rate": 1.799369862612647e-07, "loss": 0.2625, "step": 41967 }, { "epoch": 0.7295103339185455, "grad_norm": 1.4523474762119721, "learning_rate": 1.7991536047982652e-07, "loss": 0.2112, "step": 41968 }, { "epoch": 0.7295277164560483, "grad_norm": 2.0970354199701537, "learning_rate": 1.7989373571290978e-07, "loss": 0.2786, "step": 41969 }, { "epoch": 0.7295450989935511, "grad_norm": 1.3544068507694298, "learning_rate": 1.7987211196058255e-07, "loss": 0.2926, "step": 41970 }, { "epoch": 0.7295624815310539, "grad_norm": 1.0906757927672577, "learning_rate": 1.798504892229138e-07, "loss": 0.1906, "step": 41971 }, { "epoch": 0.7295798640685567, "grad_norm": 1.991986128211146, "learning_rate": 1.7982886749997195e-07, "loss": 0.2079, "step": 41972 }, { "epoch": 0.7295972466060595, "grad_norm": 1.7640924754818814, "learning_rate": 1.7980724679182552e-07, "loss": 0.144, "step": 41973 }, { "epoch": 0.7296146291435623, "grad_norm": 1.3660971625992344, "learning_rate": 1.79785627098543e-07, "loss": 0.3729, "step": 41974 }, { "epoch": 0.7296320116810652, "grad_norm": 1.888401579142016, "learning_rate": 1.79764008420193e-07, "loss": 0.26, "step": 41975 }, { "epoch": 0.729649394218568, "grad_norm": 1.0300996523176245, "learning_rate": 1.7974239075684393e-07, "loss": 0.1682, "step": 41976 }, { "epoch": 0.7296667767560708, "grad_norm": 1.399713741746226, "learning_rate": 1.797207741085644e-07, "loss": 0.2356, "step": 41977 }, { "epoch": 0.7296841592935737, "grad_norm": 2.1132164692413156, "learning_rate": 1.7969915847542278e-07, "loss": 0.2681, "step": 41978 }, { "epoch": 0.7297015418310765, "grad_norm": 1.3358692156054488, "learning_rate": 1.7967754385748778e-07, "loss": 0.1758, "step": 41979 }, { "epoch": 0.7297189243685793, "grad_norm": 1.7588740280706712, "learning_rate": 1.7965593025482784e-07, "loss": 0.2714, "step": 41980 }, { "epoch": 0.7297363069060822, "grad_norm": 1.9977127500398024, "learning_rate": 1.7963431766751148e-07, "loss": 0.3237, "step": 41981 }, { "epoch": 0.729753689443585, "grad_norm": 1.4583277508497867, "learning_rate": 1.7961270609560712e-07, "loss": 0.2134, "step": 41982 }, { "epoch": 0.7297710719810878, "grad_norm": 1.5344519284170675, "learning_rate": 1.7959109553918333e-07, "loss": 0.2181, "step": 41983 }, { "epoch": 0.7297884545185906, "grad_norm": 2.6376349834663517, "learning_rate": 1.7956948599830856e-07, "loss": 0.1814, "step": 41984 }, { "epoch": 0.7298058370560935, "grad_norm": 2.810990101419976, "learning_rate": 1.795478774730514e-07, "loss": 0.2507, "step": 41985 }, { "epoch": 0.7298232195935963, "grad_norm": 1.4934104739223493, "learning_rate": 1.7952626996348003e-07, "loss": 0.167, "step": 41986 }, { "epoch": 0.7298406021310991, "grad_norm": 1.2215889823078394, "learning_rate": 1.7950466346966332e-07, "loss": 0.2734, "step": 41987 }, { "epoch": 0.729857984668602, "grad_norm": 1.7702163537158955, "learning_rate": 1.7948305799166985e-07, "loss": 0.1971, "step": 41988 }, { "epoch": 0.7298753672061048, "grad_norm": 1.7048406384361225, "learning_rate": 1.794614535295676e-07, "loss": 0.4481, "step": 41989 }, { "epoch": 0.7298927497436076, "grad_norm": 1.6540149956979975, "learning_rate": 1.7943985008342516e-07, "loss": 0.227, "step": 41990 }, { "epoch": 0.7299101322811103, "grad_norm": 1.953297756070878, "learning_rate": 1.7941824765331137e-07, "loss": 0.1955, "step": 41991 }, { "epoch": 0.7299275148186132, "grad_norm": 1.4258888086717647, "learning_rate": 1.793966462392944e-07, "loss": 0.2802, "step": 41992 }, { "epoch": 0.729944897356116, "grad_norm": 1.605992851687088, "learning_rate": 1.793750458414428e-07, "loss": 0.1935, "step": 41993 }, { "epoch": 0.7299622798936188, "grad_norm": 1.9147635104041074, "learning_rate": 1.7935344645982503e-07, "loss": 0.1542, "step": 41994 }, { "epoch": 0.7299796624311217, "grad_norm": 1.3434991699926515, "learning_rate": 1.7933184809450957e-07, "loss": 0.1932, "step": 41995 }, { "epoch": 0.7299970449686245, "grad_norm": 1.4367809989480136, "learning_rate": 1.7931025074556482e-07, "loss": 0.1425, "step": 41996 }, { "epoch": 0.7300144275061273, "grad_norm": 0.9469484438781675, "learning_rate": 1.7928865441305924e-07, "loss": 0.1685, "step": 41997 }, { "epoch": 0.7300318100436302, "grad_norm": 1.0475551790855013, "learning_rate": 1.792670590970612e-07, "loss": 0.1724, "step": 41998 }, { "epoch": 0.730049192581133, "grad_norm": 1.1420981723933525, "learning_rate": 1.7924546479763936e-07, "loss": 0.2317, "step": 41999 }, { "epoch": 0.7300665751186358, "grad_norm": 1.0709545464432084, "learning_rate": 1.7922387151486207e-07, "loss": 0.2981, "step": 42000 }, { "epoch": 0.7300839576561386, "grad_norm": 1.775050991925348, "learning_rate": 1.7920227924879777e-07, "loss": 0.2292, "step": 42001 }, { "epoch": 0.7301013401936415, "grad_norm": 0.9840471928089831, "learning_rate": 1.7918068799951486e-07, "loss": 0.294, "step": 42002 }, { "epoch": 0.7301187227311443, "grad_norm": 2.1650944752317938, "learning_rate": 1.7915909776708183e-07, "loss": 0.3055, "step": 42003 }, { "epoch": 0.7301361052686471, "grad_norm": 1.4892124763785228, "learning_rate": 1.79137508551567e-07, "loss": 0.1686, "step": 42004 }, { "epoch": 0.73015348780615, "grad_norm": 1.6769249658699183, "learning_rate": 1.7911592035303897e-07, "loss": 0.2144, "step": 42005 }, { "epoch": 0.7301708703436528, "grad_norm": 1.9066816902014554, "learning_rate": 1.7909433317156585e-07, "loss": 0.3213, "step": 42006 }, { "epoch": 0.7301882528811556, "grad_norm": 1.516308556422897, "learning_rate": 1.7907274700721663e-07, "loss": 0.2658, "step": 42007 }, { "epoch": 0.7302056354186585, "grad_norm": 1.1253093381918529, "learning_rate": 1.7905116186005918e-07, "loss": 0.1718, "step": 42008 }, { "epoch": 0.7302230179561613, "grad_norm": 1.1374705344117986, "learning_rate": 1.7902957773016202e-07, "loss": 0.1328, "step": 42009 }, { "epoch": 0.730240400493664, "grad_norm": 1.8523224792569537, "learning_rate": 1.7900799461759376e-07, "loss": 0.1821, "step": 42010 }, { "epoch": 0.7302577830311668, "grad_norm": 2.28926893815535, "learning_rate": 1.7898641252242274e-07, "loss": 0.1587, "step": 42011 }, { "epoch": 0.7302751655686697, "grad_norm": 1.7489775450148237, "learning_rate": 1.789648314447173e-07, "loss": 0.2567, "step": 42012 }, { "epoch": 0.7302925481061725, "grad_norm": 1.2524771838706055, "learning_rate": 1.7894325138454592e-07, "loss": 0.2595, "step": 42013 }, { "epoch": 0.7303099306436753, "grad_norm": 1.208323407491987, "learning_rate": 1.7892167234197692e-07, "loss": 0.2031, "step": 42014 }, { "epoch": 0.7303273131811782, "grad_norm": 1.2269317617117967, "learning_rate": 1.7890009431707875e-07, "loss": 0.205, "step": 42015 }, { "epoch": 0.730344695718681, "grad_norm": 3.0893966506301567, "learning_rate": 1.7887851730991976e-07, "loss": 0.2268, "step": 42016 }, { "epoch": 0.7303620782561838, "grad_norm": 2.2887522172063766, "learning_rate": 1.7885694132056816e-07, "loss": 0.1288, "step": 42017 }, { "epoch": 0.7303794607936867, "grad_norm": 1.0463968017893268, "learning_rate": 1.7883536634909275e-07, "loss": 0.3143, "step": 42018 }, { "epoch": 0.7303968433311895, "grad_norm": 0.8180341916618301, "learning_rate": 1.788137923955617e-07, "loss": 0.1993, "step": 42019 }, { "epoch": 0.7304142258686923, "grad_norm": 2.856246909345793, "learning_rate": 1.7879221946004337e-07, "loss": 0.2286, "step": 42020 }, { "epoch": 0.7304316084061951, "grad_norm": 1.7078770813498974, "learning_rate": 1.7877064754260613e-07, "loss": 0.359, "step": 42021 }, { "epoch": 0.730448990943698, "grad_norm": 1.8268341828418548, "learning_rate": 1.787490766433184e-07, "loss": 0.1852, "step": 42022 }, { "epoch": 0.7304663734812008, "grad_norm": 1.5379056350950813, "learning_rate": 1.7872750676224857e-07, "loss": 0.1305, "step": 42023 }, { "epoch": 0.7304837560187036, "grad_norm": 1.7799648802730423, "learning_rate": 1.7870593789946492e-07, "loss": 0.178, "step": 42024 }, { "epoch": 0.7305011385562065, "grad_norm": 1.6364295272296383, "learning_rate": 1.7868437005503568e-07, "loss": 0.1777, "step": 42025 }, { "epoch": 0.7305185210937093, "grad_norm": 1.2150327298531072, "learning_rate": 1.786628032290297e-07, "loss": 0.3307, "step": 42026 }, { "epoch": 0.7305359036312121, "grad_norm": 1.1983678487136158, "learning_rate": 1.7864123742151489e-07, "loss": 0.1427, "step": 42027 }, { "epoch": 0.730553286168715, "grad_norm": 1.3154092087146105, "learning_rate": 1.7861967263255957e-07, "loss": 0.1922, "step": 42028 }, { "epoch": 0.7305706687062178, "grad_norm": 3.219356687306752, "learning_rate": 1.7859810886223241e-07, "loss": 0.2722, "step": 42029 }, { "epoch": 0.7305880512437205, "grad_norm": 1.4351890645133676, "learning_rate": 1.7857654611060164e-07, "loss": 0.1919, "step": 42030 }, { "epoch": 0.7306054337812233, "grad_norm": 1.4863229137452163, "learning_rate": 1.7855498437773553e-07, "loss": 0.2434, "step": 42031 }, { "epoch": 0.7306228163187262, "grad_norm": 0.7839734876973472, "learning_rate": 1.7853342366370249e-07, "loss": 0.1836, "step": 42032 }, { "epoch": 0.730640198856229, "grad_norm": 2.2979411111174954, "learning_rate": 1.7851186396857083e-07, "loss": 0.2356, "step": 42033 }, { "epoch": 0.7306575813937318, "grad_norm": 1.250400513592461, "learning_rate": 1.7849030529240882e-07, "loss": 0.2364, "step": 42034 }, { "epoch": 0.7306749639312347, "grad_norm": 1.3755541925876467, "learning_rate": 1.7846874763528492e-07, "loss": 0.1851, "step": 42035 }, { "epoch": 0.7306923464687375, "grad_norm": 0.8148062621056815, "learning_rate": 1.784471909972672e-07, "loss": 0.2591, "step": 42036 }, { "epoch": 0.7307097290062403, "grad_norm": 1.734011135134638, "learning_rate": 1.7842563537842437e-07, "loss": 0.2637, "step": 42037 }, { "epoch": 0.7307271115437431, "grad_norm": 1.3521840528678781, "learning_rate": 1.7840408077882452e-07, "loss": 0.1852, "step": 42038 }, { "epoch": 0.730744494081246, "grad_norm": 2.08849114457159, "learning_rate": 1.7838252719853618e-07, "loss": 0.4634, "step": 42039 }, { "epoch": 0.7307618766187488, "grad_norm": 1.9246582158793881, "learning_rate": 1.7836097463762716e-07, "loss": 0.2414, "step": 42040 }, { "epoch": 0.7307792591562516, "grad_norm": 2.042069500311846, "learning_rate": 1.7833942309616624e-07, "loss": 0.1887, "step": 42041 }, { "epoch": 0.7307966416937545, "grad_norm": 3.3114158256072503, "learning_rate": 1.7831787257422164e-07, "loss": 0.2414, "step": 42042 }, { "epoch": 0.7308140242312573, "grad_norm": 1.5298267160368173, "learning_rate": 1.782963230718616e-07, "loss": 0.2251, "step": 42043 }, { "epoch": 0.7308314067687601, "grad_norm": 3.312742589249865, "learning_rate": 1.7827477458915424e-07, "loss": 0.3139, "step": 42044 }, { "epoch": 0.730848789306263, "grad_norm": 1.8455396545827878, "learning_rate": 1.7825322712616842e-07, "loss": 0.241, "step": 42045 }, { "epoch": 0.7308661718437658, "grad_norm": 2.167229982739433, "learning_rate": 1.7823168068297188e-07, "loss": 0.3368, "step": 42046 }, { "epoch": 0.7308835543812686, "grad_norm": 1.6904295746513478, "learning_rate": 1.7821013525963314e-07, "loss": 0.297, "step": 42047 }, { "epoch": 0.7309009369187714, "grad_norm": 3.3621765515503164, "learning_rate": 1.7818859085622024e-07, "loss": 0.3873, "step": 42048 }, { "epoch": 0.7309183194562743, "grad_norm": 1.5348159913778836, "learning_rate": 1.7816704747280187e-07, "loss": 0.2302, "step": 42049 }, { "epoch": 0.730935701993777, "grad_norm": 0.9439939428743088, "learning_rate": 1.7814550510944615e-07, "loss": 0.2353, "step": 42050 }, { "epoch": 0.7309530845312798, "grad_norm": 1.2707882501666674, "learning_rate": 1.781239637662213e-07, "loss": 0.228, "step": 42051 }, { "epoch": 0.7309704670687827, "grad_norm": 1.7915785733129352, "learning_rate": 1.7810242344319566e-07, "loss": 0.4221, "step": 42052 }, { "epoch": 0.7309878496062855, "grad_norm": 1.3151146936322036, "learning_rate": 1.7808088414043742e-07, "loss": 0.1964, "step": 42053 }, { "epoch": 0.7310052321437883, "grad_norm": 1.995845503479082, "learning_rate": 1.7805934585801492e-07, "loss": 0.2691, "step": 42054 }, { "epoch": 0.7310226146812911, "grad_norm": 0.7227119045242152, "learning_rate": 1.7803780859599643e-07, "loss": 0.2222, "step": 42055 }, { "epoch": 0.731039997218794, "grad_norm": 1.6925695090492474, "learning_rate": 1.7801627235445e-07, "loss": 0.1379, "step": 42056 }, { "epoch": 0.7310573797562968, "grad_norm": 1.8590035919654624, "learning_rate": 1.7799473713344438e-07, "loss": 0.2231, "step": 42057 }, { "epoch": 0.7310747622937996, "grad_norm": 1.2607071936665055, "learning_rate": 1.7797320293304742e-07, "loss": 0.2568, "step": 42058 }, { "epoch": 0.7310921448313025, "grad_norm": 1.1620767797940776, "learning_rate": 1.7795166975332725e-07, "loss": 0.1455, "step": 42059 }, { "epoch": 0.7311095273688053, "grad_norm": 3.1962131111691936, "learning_rate": 1.7793013759435254e-07, "loss": 0.2522, "step": 42060 }, { "epoch": 0.7311269099063081, "grad_norm": 1.0628025866895334, "learning_rate": 1.779086064561914e-07, "loss": 0.3313, "step": 42061 }, { "epoch": 0.731144292443811, "grad_norm": 0.9901782122690226, "learning_rate": 1.7788707633891199e-07, "loss": 0.3364, "step": 42062 }, { "epoch": 0.7311616749813138, "grad_norm": 1.2985209734507508, "learning_rate": 1.7786554724258257e-07, "loss": 0.2357, "step": 42063 }, { "epoch": 0.7311790575188166, "grad_norm": 1.5467189184916368, "learning_rate": 1.7784401916727138e-07, "loss": 0.2157, "step": 42064 }, { "epoch": 0.7311964400563195, "grad_norm": 1.458304382597066, "learning_rate": 1.7782249211304669e-07, "loss": 0.3041, "step": 42065 }, { "epoch": 0.7312138225938223, "grad_norm": 1.0821812884305957, "learning_rate": 1.7780096607997664e-07, "loss": 0.141, "step": 42066 }, { "epoch": 0.7312312051313251, "grad_norm": 1.342591025876585, "learning_rate": 1.7777944106812943e-07, "loss": 0.1516, "step": 42067 }, { "epoch": 0.7312485876688279, "grad_norm": 1.0534335461811766, "learning_rate": 1.777579170775735e-07, "loss": 0.2562, "step": 42068 }, { "epoch": 0.7312659702063308, "grad_norm": 1.7192675900857415, "learning_rate": 1.77736394108377e-07, "loss": 0.2282, "step": 42069 }, { "epoch": 0.7312833527438335, "grad_norm": 1.3252885448763199, "learning_rate": 1.777148721606082e-07, "loss": 0.2588, "step": 42070 }, { "epoch": 0.7313007352813363, "grad_norm": 1.7925414159317863, "learning_rate": 1.7769335123433487e-07, "loss": 0.1233, "step": 42071 }, { "epoch": 0.7313181178188392, "grad_norm": 1.6226430685026252, "learning_rate": 1.7767183132962576e-07, "loss": 0.2548, "step": 42072 }, { "epoch": 0.731335500356342, "grad_norm": 1.0932052956494671, "learning_rate": 1.776503124465489e-07, "loss": 0.2337, "step": 42073 }, { "epoch": 0.7313528828938448, "grad_norm": 1.5829985826177424, "learning_rate": 1.7762879458517245e-07, "loss": 0.1965, "step": 42074 }, { "epoch": 0.7313702654313476, "grad_norm": 1.4628566345804723, "learning_rate": 1.7760727774556444e-07, "loss": 0.2271, "step": 42075 }, { "epoch": 0.7313876479688505, "grad_norm": 0.9595991180443866, "learning_rate": 1.7758576192779367e-07, "loss": 0.2925, "step": 42076 }, { "epoch": 0.7314050305063533, "grad_norm": 1.930639584784156, "learning_rate": 1.775642471319277e-07, "loss": 0.3376, "step": 42077 }, { "epoch": 0.7314224130438561, "grad_norm": 1.2660869827503485, "learning_rate": 1.7754273335803498e-07, "loss": 0.3193, "step": 42078 }, { "epoch": 0.731439795581359, "grad_norm": 2.027680500680993, "learning_rate": 1.7752122060618347e-07, "loss": 0.1857, "step": 42079 }, { "epoch": 0.7314571781188618, "grad_norm": 1.682346590506591, "learning_rate": 1.7749970887644177e-07, "loss": 0.1907, "step": 42080 }, { "epoch": 0.7314745606563646, "grad_norm": 0.9984690750350141, "learning_rate": 1.7747819816887783e-07, "loss": 0.1283, "step": 42081 }, { "epoch": 0.7314919431938675, "grad_norm": 1.5301913000351863, "learning_rate": 1.774566884835598e-07, "loss": 0.1778, "step": 42082 }, { "epoch": 0.7315093257313703, "grad_norm": 1.2082075513732033, "learning_rate": 1.77435179820556e-07, "loss": 0.3249, "step": 42083 }, { "epoch": 0.7315267082688731, "grad_norm": 2.984780001532726, "learning_rate": 1.7741367217993442e-07, "loss": 0.3126, "step": 42084 }, { "epoch": 0.731544090806376, "grad_norm": 1.358165849918586, "learning_rate": 1.7739216556176334e-07, "loss": 0.3548, "step": 42085 }, { "epoch": 0.7315614733438788, "grad_norm": 1.3672043414308963, "learning_rate": 1.7737065996611094e-07, "loss": 0.2101, "step": 42086 }, { "epoch": 0.7315788558813816, "grad_norm": 1.5317109343741258, "learning_rate": 1.7734915539304511e-07, "loss": 0.2883, "step": 42087 }, { "epoch": 0.7315962384188844, "grad_norm": 1.2615106728639716, "learning_rate": 1.7732765184263448e-07, "loss": 0.2473, "step": 42088 }, { "epoch": 0.7316136209563873, "grad_norm": 1.7568809541240777, "learning_rate": 1.773061493149471e-07, "loss": 0.2243, "step": 42089 }, { "epoch": 0.73163100349389, "grad_norm": 1.7927165735336228, "learning_rate": 1.7728464781005065e-07, "loss": 0.1726, "step": 42090 }, { "epoch": 0.7316483860313928, "grad_norm": 1.7360922884813494, "learning_rate": 1.7726314732801385e-07, "loss": 0.1599, "step": 42091 }, { "epoch": 0.7316657685688956, "grad_norm": 10.737774597301074, "learning_rate": 1.7724164786890457e-07, "loss": 0.2417, "step": 42092 }, { "epoch": 0.7316831511063985, "grad_norm": 1.4709239156700264, "learning_rate": 1.7722014943279101e-07, "loss": 0.1267, "step": 42093 }, { "epoch": 0.7317005336439013, "grad_norm": 1.2516641737085645, "learning_rate": 1.7719865201974127e-07, "loss": 0.2675, "step": 42094 }, { "epoch": 0.7317179161814041, "grad_norm": 1.9432105580494945, "learning_rate": 1.7717715562982356e-07, "loss": 0.1499, "step": 42095 }, { "epoch": 0.731735298718907, "grad_norm": 1.5036220956481057, "learning_rate": 1.7715566026310597e-07, "loss": 0.3011, "step": 42096 }, { "epoch": 0.7317526812564098, "grad_norm": 2.1203717259213817, "learning_rate": 1.7713416591965662e-07, "loss": 0.138, "step": 42097 }, { "epoch": 0.7317700637939126, "grad_norm": 1.8413959486446578, "learning_rate": 1.7711267259954348e-07, "loss": 0.3004, "step": 42098 }, { "epoch": 0.7317874463314155, "grad_norm": 1.1766613254644456, "learning_rate": 1.7709118030283503e-07, "loss": 0.27, "step": 42099 }, { "epoch": 0.7318048288689183, "grad_norm": 1.0288280637282374, "learning_rate": 1.7706968902959924e-07, "loss": 0.2211, "step": 42100 }, { "epoch": 0.7318222114064211, "grad_norm": 1.2986002321181829, "learning_rate": 1.7704819877990412e-07, "loss": 0.1706, "step": 42101 }, { "epoch": 0.731839593943924, "grad_norm": 1.211121378925538, "learning_rate": 1.7702670955381787e-07, "loss": 0.1431, "step": 42102 }, { "epoch": 0.7318569764814268, "grad_norm": 1.6714346422932684, "learning_rate": 1.7700522135140866e-07, "loss": 0.1486, "step": 42103 }, { "epoch": 0.7318743590189296, "grad_norm": 0.7614485509074087, "learning_rate": 1.769837341727445e-07, "loss": 0.1842, "step": 42104 }, { "epoch": 0.7318917415564324, "grad_norm": 1.5424430838184415, "learning_rate": 1.7696224801789346e-07, "loss": 0.3674, "step": 42105 }, { "epoch": 0.7319091240939353, "grad_norm": 1.7984652401116916, "learning_rate": 1.7694076288692355e-07, "loss": 0.2381, "step": 42106 }, { "epoch": 0.7319265066314381, "grad_norm": 1.4557546655490885, "learning_rate": 1.7691927877990326e-07, "loss": 0.2705, "step": 42107 }, { "epoch": 0.7319438891689409, "grad_norm": 1.2083750210561979, "learning_rate": 1.7689779569690056e-07, "loss": 0.3404, "step": 42108 }, { "epoch": 0.7319612717064438, "grad_norm": 1.3012860404062732, "learning_rate": 1.768763136379831e-07, "loss": 0.2826, "step": 42109 }, { "epoch": 0.7319786542439465, "grad_norm": 1.2845521480260151, "learning_rate": 1.7685483260321944e-07, "loss": 0.169, "step": 42110 }, { "epoch": 0.7319960367814493, "grad_norm": 1.3411807546020014, "learning_rate": 1.7683335259267757e-07, "loss": 0.3564, "step": 42111 }, { "epoch": 0.7320134193189521, "grad_norm": 1.1691254323675582, "learning_rate": 1.7681187360642545e-07, "loss": 0.3727, "step": 42112 }, { "epoch": 0.732030801856455, "grad_norm": 1.2404869827829605, "learning_rate": 1.7679039564453129e-07, "loss": 0.2006, "step": 42113 }, { "epoch": 0.7320481843939578, "grad_norm": 1.7185663374538411, "learning_rate": 1.7676891870706311e-07, "loss": 0.3202, "step": 42114 }, { "epoch": 0.7320655669314606, "grad_norm": 1.3987946913853344, "learning_rate": 1.7674744279408892e-07, "loss": 0.1926, "step": 42115 }, { "epoch": 0.7320829494689635, "grad_norm": 1.6371362574342958, "learning_rate": 1.767259679056769e-07, "loss": 0.3354, "step": 42116 }, { "epoch": 0.7321003320064663, "grad_norm": 1.3760115202397794, "learning_rate": 1.7670449404189487e-07, "loss": 0.1257, "step": 42117 }, { "epoch": 0.7321177145439691, "grad_norm": 1.6928687827992737, "learning_rate": 1.7668302120281126e-07, "loss": 0.1252, "step": 42118 }, { "epoch": 0.732135097081472, "grad_norm": 0.8220751129906904, "learning_rate": 1.76661549388494e-07, "loss": 0.264, "step": 42119 }, { "epoch": 0.7321524796189748, "grad_norm": 1.7080199727991554, "learning_rate": 1.7664007859901097e-07, "loss": 0.2272, "step": 42120 }, { "epoch": 0.7321698621564776, "grad_norm": 1.4687478380250085, "learning_rate": 1.7661860883443048e-07, "loss": 0.1921, "step": 42121 }, { "epoch": 0.7321872446939804, "grad_norm": 1.3086726645055973, "learning_rate": 1.765971400948204e-07, "loss": 0.2422, "step": 42122 }, { "epoch": 0.7322046272314833, "grad_norm": 1.5069561388589965, "learning_rate": 1.765756723802488e-07, "loss": 0.3025, "step": 42123 }, { "epoch": 0.7322220097689861, "grad_norm": 2.2356325366164085, "learning_rate": 1.7655420569078378e-07, "loss": 0.3628, "step": 42124 }, { "epoch": 0.7322393923064889, "grad_norm": 1.295887189251282, "learning_rate": 1.765327400264932e-07, "loss": 0.2734, "step": 42125 }, { "epoch": 0.7322567748439918, "grad_norm": 1.6534426594189982, "learning_rate": 1.7651127538744537e-07, "loss": 0.2792, "step": 42126 }, { "epoch": 0.7322741573814946, "grad_norm": 1.6148902122446316, "learning_rate": 1.7648981177370842e-07, "loss": 0.1517, "step": 42127 }, { "epoch": 0.7322915399189974, "grad_norm": 1.368614451423374, "learning_rate": 1.7646834918534993e-07, "loss": 0.2047, "step": 42128 }, { "epoch": 0.7323089224565003, "grad_norm": 0.8112171765168549, "learning_rate": 1.7644688762243804e-07, "loss": 0.1653, "step": 42129 }, { "epoch": 0.732326304994003, "grad_norm": 1.3995802278727931, "learning_rate": 1.7642542708504106e-07, "loss": 0.3133, "step": 42130 }, { "epoch": 0.7323436875315058, "grad_norm": 1.2918011304566686, "learning_rate": 1.7640396757322678e-07, "loss": 0.1752, "step": 42131 }, { "epoch": 0.7323610700690086, "grad_norm": 1.9938649089859013, "learning_rate": 1.763825090870633e-07, "loss": 0.2049, "step": 42132 }, { "epoch": 0.7323784526065115, "grad_norm": 1.048190344906101, "learning_rate": 1.7636105162661862e-07, "loss": 0.147, "step": 42133 }, { "epoch": 0.7323958351440143, "grad_norm": 1.8980122755609, "learning_rate": 1.7633959519196074e-07, "loss": 0.5914, "step": 42134 }, { "epoch": 0.7324132176815171, "grad_norm": 1.6647681403897145, "learning_rate": 1.7631813978315762e-07, "loss": 0.1612, "step": 42135 }, { "epoch": 0.73243060021902, "grad_norm": 1.4441594219296465, "learning_rate": 1.7629668540027736e-07, "loss": 0.1299, "step": 42136 }, { "epoch": 0.7324479827565228, "grad_norm": 1.432203630047812, "learning_rate": 1.7627523204338773e-07, "loss": 0.1724, "step": 42137 }, { "epoch": 0.7324653652940256, "grad_norm": 1.4004677198682074, "learning_rate": 1.7625377971255705e-07, "loss": 0.2637, "step": 42138 }, { "epoch": 0.7324827478315284, "grad_norm": 1.0791140830353638, "learning_rate": 1.7623232840785323e-07, "loss": 0.1749, "step": 42139 }, { "epoch": 0.7325001303690313, "grad_norm": 1.600692691490822, "learning_rate": 1.7621087812934415e-07, "loss": 0.1618, "step": 42140 }, { "epoch": 0.7325175129065341, "grad_norm": 1.525084704458066, "learning_rate": 1.7618942887709786e-07, "loss": 0.3512, "step": 42141 }, { "epoch": 0.7325348954440369, "grad_norm": 1.0962248494387365, "learning_rate": 1.7616798065118239e-07, "loss": 0.2097, "step": 42142 }, { "epoch": 0.7325522779815398, "grad_norm": 1.4652930947302283, "learning_rate": 1.7614653345166557e-07, "loss": 0.2573, "step": 42143 }, { "epoch": 0.7325696605190426, "grad_norm": 2.848176581188442, "learning_rate": 1.7612508727861552e-07, "loss": 0.253, "step": 42144 }, { "epoch": 0.7325870430565454, "grad_norm": 1.305132961052749, "learning_rate": 1.7610364213210005e-07, "loss": 0.2241, "step": 42145 }, { "epoch": 0.7326044255940483, "grad_norm": 0.9790231824746854, "learning_rate": 1.7608219801218749e-07, "loss": 0.1859, "step": 42146 }, { "epoch": 0.7326218081315511, "grad_norm": 1.322514938885706, "learning_rate": 1.7606075491894545e-07, "loss": 0.2474, "step": 42147 }, { "epoch": 0.7326391906690539, "grad_norm": 1.0045611139883945, "learning_rate": 1.7603931285244183e-07, "loss": 0.2009, "step": 42148 }, { "epoch": 0.7326565732065566, "grad_norm": 1.4406680976276476, "learning_rate": 1.7601787181274496e-07, "loss": 0.2651, "step": 42149 }, { "epoch": 0.7326739557440595, "grad_norm": 0.9399606362730484, "learning_rate": 1.7599643179992263e-07, "loss": 0.2065, "step": 42150 }, { "epoch": 0.7326913382815623, "grad_norm": 0.8109941171646201, "learning_rate": 1.7597499281404272e-07, "loss": 0.2758, "step": 42151 }, { "epoch": 0.7327087208190651, "grad_norm": 1.3015835787391399, "learning_rate": 1.7595355485517326e-07, "loss": 0.151, "step": 42152 }, { "epoch": 0.732726103356568, "grad_norm": 0.8159993901797047, "learning_rate": 1.7593211792338219e-07, "loss": 0.1832, "step": 42153 }, { "epoch": 0.7327434858940708, "grad_norm": 1.372425243099834, "learning_rate": 1.7591068201873743e-07, "loss": 0.1956, "step": 42154 }, { "epoch": 0.7327608684315736, "grad_norm": 1.5801117176994297, "learning_rate": 1.7588924714130692e-07, "loss": 0.1624, "step": 42155 }, { "epoch": 0.7327782509690765, "grad_norm": 1.6099982070566852, "learning_rate": 1.7586781329115845e-07, "loss": 0.2143, "step": 42156 }, { "epoch": 0.7327956335065793, "grad_norm": 1.1278819169731402, "learning_rate": 1.758463804683603e-07, "loss": 0.1324, "step": 42157 }, { "epoch": 0.7328130160440821, "grad_norm": 1.1883823852080935, "learning_rate": 1.758249486729802e-07, "loss": 0.2165, "step": 42158 }, { "epoch": 0.7328303985815849, "grad_norm": 2.1635759519540074, "learning_rate": 1.758035179050863e-07, "loss": 0.2685, "step": 42159 }, { "epoch": 0.7328477811190878, "grad_norm": 1.472731710627315, "learning_rate": 1.7578208816474598e-07, "loss": 0.1821, "step": 42160 }, { "epoch": 0.7328651636565906, "grad_norm": 1.0312083084888128, "learning_rate": 1.7576065945202767e-07, "loss": 0.1392, "step": 42161 }, { "epoch": 0.7328825461940934, "grad_norm": 0.8444852839740636, "learning_rate": 1.757392317669991e-07, "loss": 0.2211, "step": 42162 }, { "epoch": 0.7328999287315963, "grad_norm": 1.4262985388263505, "learning_rate": 1.7571780510972821e-07, "loss": 0.1917, "step": 42163 }, { "epoch": 0.7329173112690991, "grad_norm": 1.4604827851965434, "learning_rate": 1.756963794802828e-07, "loss": 0.2556, "step": 42164 }, { "epoch": 0.7329346938066019, "grad_norm": 1.5244139592522241, "learning_rate": 1.7567495487873118e-07, "loss": 0.1988, "step": 42165 }, { "epoch": 0.7329520763441048, "grad_norm": 1.2760857675004802, "learning_rate": 1.7565353130514083e-07, "loss": 0.126, "step": 42166 }, { "epoch": 0.7329694588816076, "grad_norm": 1.6624649645645022, "learning_rate": 1.756321087595798e-07, "loss": 0.2163, "step": 42167 }, { "epoch": 0.7329868414191104, "grad_norm": 2.2138318683302107, "learning_rate": 1.7561068724211582e-07, "loss": 0.2302, "step": 42168 }, { "epoch": 0.7330042239566131, "grad_norm": 2.0640897531875804, "learning_rate": 1.755892667528171e-07, "loss": 0.2365, "step": 42169 }, { "epoch": 0.733021606494116, "grad_norm": 1.6328102053003888, "learning_rate": 1.7556784729175134e-07, "loss": 0.1986, "step": 42170 }, { "epoch": 0.7330389890316188, "grad_norm": 1.0381003690183, "learning_rate": 1.755464288589865e-07, "loss": 0.1148, "step": 42171 }, { "epoch": 0.7330563715691216, "grad_norm": 1.1746178490208299, "learning_rate": 1.7552501145459048e-07, "loss": 0.214, "step": 42172 }, { "epoch": 0.7330737541066245, "grad_norm": 1.7561824210131476, "learning_rate": 1.755035950786311e-07, "loss": 0.204, "step": 42173 }, { "epoch": 0.7330911366441273, "grad_norm": 2.075701766326556, "learning_rate": 1.7548217973117623e-07, "loss": 0.2719, "step": 42174 }, { "epoch": 0.7331085191816301, "grad_norm": 2.103145535321609, "learning_rate": 1.7546076541229382e-07, "loss": 0.2154, "step": 42175 }, { "epoch": 0.7331259017191329, "grad_norm": 1.8646758849332874, "learning_rate": 1.754393521220515e-07, "loss": 0.1834, "step": 42176 }, { "epoch": 0.7331432842566358, "grad_norm": 1.3165540262626245, "learning_rate": 1.7541793986051772e-07, "loss": 0.2046, "step": 42177 }, { "epoch": 0.7331606667941386, "grad_norm": 1.5456046082296158, "learning_rate": 1.7539652862775978e-07, "loss": 0.2199, "step": 42178 }, { "epoch": 0.7331780493316414, "grad_norm": 3.1498338014257077, "learning_rate": 1.7537511842384557e-07, "loss": 0.1954, "step": 42179 }, { "epoch": 0.7331954318691443, "grad_norm": 1.2692866977488089, "learning_rate": 1.7535370924884324e-07, "loss": 0.1021, "step": 42180 }, { "epoch": 0.7332128144066471, "grad_norm": 1.0165127455205671, "learning_rate": 1.753323011028206e-07, "loss": 0.3886, "step": 42181 }, { "epoch": 0.7332301969441499, "grad_norm": 1.483071714850869, "learning_rate": 1.7531089398584538e-07, "loss": 0.1423, "step": 42182 }, { "epoch": 0.7332475794816528, "grad_norm": 1.4025375981052024, "learning_rate": 1.7528948789798547e-07, "loss": 0.1561, "step": 42183 }, { "epoch": 0.7332649620191556, "grad_norm": 1.1295244479474704, "learning_rate": 1.752680828393087e-07, "loss": 0.2757, "step": 42184 }, { "epoch": 0.7332823445566584, "grad_norm": 2.15629289920808, "learning_rate": 1.7524667880988299e-07, "loss": 0.3509, "step": 42185 }, { "epoch": 0.7332997270941612, "grad_norm": 1.1294391600750588, "learning_rate": 1.7522527580977615e-07, "loss": 0.1855, "step": 42186 }, { "epoch": 0.7333171096316641, "grad_norm": 2.6294158514736696, "learning_rate": 1.7520387383905578e-07, "loss": 0.2678, "step": 42187 }, { "epoch": 0.7333344921691669, "grad_norm": 0.8225773222570458, "learning_rate": 1.7518247289779008e-07, "loss": 0.2455, "step": 42188 }, { "epoch": 0.7333518747066696, "grad_norm": 1.2711123968270148, "learning_rate": 1.7516107298604676e-07, "loss": 0.1632, "step": 42189 }, { "epoch": 0.7333692572441725, "grad_norm": 1.1688577319974895, "learning_rate": 1.751396741038938e-07, "loss": 0.2722, "step": 42190 }, { "epoch": 0.7333866397816753, "grad_norm": 0.7892884573184396, "learning_rate": 1.7511827625139847e-07, "loss": 0.2139, "step": 42191 }, { "epoch": 0.7334040223191781, "grad_norm": 1.053523149243195, "learning_rate": 1.7509687942862917e-07, "loss": 0.2941, "step": 42192 }, { "epoch": 0.733421404856681, "grad_norm": 1.7908682843137171, "learning_rate": 1.750754836356535e-07, "loss": 0.2919, "step": 42193 }, { "epoch": 0.7334387873941838, "grad_norm": 1.2493053346362477, "learning_rate": 1.750540888725393e-07, "loss": 0.257, "step": 42194 }, { "epoch": 0.7334561699316866, "grad_norm": 1.1738901804740232, "learning_rate": 1.750326951393542e-07, "loss": 0.1591, "step": 42195 }, { "epoch": 0.7334735524691894, "grad_norm": 1.2778733584230655, "learning_rate": 1.7501130243616653e-07, "loss": 0.1852, "step": 42196 }, { "epoch": 0.7334909350066923, "grad_norm": 0.9841525336003885, "learning_rate": 1.7498991076304354e-07, "loss": 0.1362, "step": 42197 }, { "epoch": 0.7335083175441951, "grad_norm": 1.0058282486611168, "learning_rate": 1.749685201200531e-07, "loss": 0.2158, "step": 42198 }, { "epoch": 0.7335257000816979, "grad_norm": 1.8050000501922816, "learning_rate": 1.7494713050726324e-07, "loss": 0.2183, "step": 42199 }, { "epoch": 0.7335430826192008, "grad_norm": 1.097126828705087, "learning_rate": 1.7492574192474174e-07, "loss": 0.2048, "step": 42200 }, { "epoch": 0.7335604651567036, "grad_norm": 1.6646426051523922, "learning_rate": 1.749043543725563e-07, "loss": 0.31, "step": 42201 }, { "epoch": 0.7335778476942064, "grad_norm": 1.512128523850735, "learning_rate": 1.7488296785077467e-07, "loss": 0.1166, "step": 42202 }, { "epoch": 0.7335952302317092, "grad_norm": 1.8077295621577152, "learning_rate": 1.7486158235946476e-07, "loss": 0.1776, "step": 42203 }, { "epoch": 0.7336126127692121, "grad_norm": 1.2369301468329996, "learning_rate": 1.748401978986942e-07, "loss": 0.1627, "step": 42204 }, { "epoch": 0.7336299953067149, "grad_norm": 1.2958289825372613, "learning_rate": 1.7481881446853086e-07, "loss": 0.2962, "step": 42205 }, { "epoch": 0.7336473778442177, "grad_norm": 0.9687508471596221, "learning_rate": 1.747974320690424e-07, "loss": 0.1456, "step": 42206 }, { "epoch": 0.7336647603817206, "grad_norm": 1.2288204368965308, "learning_rate": 1.7477605070029677e-07, "loss": 0.1869, "step": 42207 }, { "epoch": 0.7336821429192234, "grad_norm": 1.2496877790917122, "learning_rate": 1.7475467036236175e-07, "loss": 0.2689, "step": 42208 }, { "epoch": 0.7336995254567261, "grad_norm": 2.2457655815598154, "learning_rate": 1.7473329105530517e-07, "loss": 0.186, "step": 42209 }, { "epoch": 0.733716907994229, "grad_norm": 1.6705443433902392, "learning_rate": 1.7471191277919427e-07, "loss": 0.3082, "step": 42210 }, { "epoch": 0.7337342905317318, "grad_norm": 1.8653073290813396, "learning_rate": 1.7469053553409736e-07, "loss": 0.217, "step": 42211 }, { "epoch": 0.7337516730692346, "grad_norm": 1.881891020886945, "learning_rate": 1.7466915932008203e-07, "loss": 0.1958, "step": 42212 }, { "epoch": 0.7337690556067374, "grad_norm": 2.0096198477829796, "learning_rate": 1.7464778413721598e-07, "loss": 0.2203, "step": 42213 }, { "epoch": 0.7337864381442403, "grad_norm": 1.8504767701447393, "learning_rate": 1.7462640998556687e-07, "loss": 0.3018, "step": 42214 }, { "epoch": 0.7338038206817431, "grad_norm": 0.9076087083169128, "learning_rate": 1.7460503686520296e-07, "loss": 0.1589, "step": 42215 }, { "epoch": 0.7338212032192459, "grad_norm": 1.171282975906212, "learning_rate": 1.7458366477619134e-07, "loss": 0.3468, "step": 42216 }, { "epoch": 0.7338385857567488, "grad_norm": 1.3620269571267365, "learning_rate": 1.745622937186001e-07, "loss": 0.2906, "step": 42217 }, { "epoch": 0.7338559682942516, "grad_norm": 2.0246602765157182, "learning_rate": 1.7454092369249667e-07, "loss": 0.2118, "step": 42218 }, { "epoch": 0.7338733508317544, "grad_norm": 1.459553733310785, "learning_rate": 1.7451955469794922e-07, "loss": 0.2168, "step": 42219 }, { "epoch": 0.7338907333692573, "grad_norm": 1.0372410240776258, "learning_rate": 1.7449818673502525e-07, "loss": 0.1993, "step": 42220 }, { "epoch": 0.7339081159067601, "grad_norm": 1.6365380302025745, "learning_rate": 1.7447681980379248e-07, "loss": 0.4008, "step": 42221 }, { "epoch": 0.7339254984442629, "grad_norm": 2.1091709775141463, "learning_rate": 1.744554539043187e-07, "loss": 0.2722, "step": 42222 }, { "epoch": 0.7339428809817657, "grad_norm": 1.302430600000966, "learning_rate": 1.7443408903667157e-07, "loss": 0.232, "step": 42223 }, { "epoch": 0.7339602635192686, "grad_norm": 1.3813909877941881, "learning_rate": 1.7441272520091883e-07, "loss": 0.1786, "step": 42224 }, { "epoch": 0.7339776460567714, "grad_norm": 1.0718287343371635, "learning_rate": 1.7439136239712815e-07, "loss": 0.1697, "step": 42225 }, { "epoch": 0.7339950285942742, "grad_norm": 1.2856385241230075, "learning_rate": 1.7437000062536717e-07, "loss": 0.2354, "step": 42226 }, { "epoch": 0.7340124111317771, "grad_norm": 1.3965375045346982, "learning_rate": 1.7434863988570387e-07, "loss": 0.2488, "step": 42227 }, { "epoch": 0.7340297936692799, "grad_norm": 1.323692481548315, "learning_rate": 1.7432728017820592e-07, "loss": 0.1186, "step": 42228 }, { "epoch": 0.7340471762067826, "grad_norm": 1.1191889027593502, "learning_rate": 1.7430592150294053e-07, "loss": 0.1077, "step": 42229 }, { "epoch": 0.7340645587442854, "grad_norm": 1.4993697582282846, "learning_rate": 1.7428456385997599e-07, "loss": 0.1601, "step": 42230 }, { "epoch": 0.7340819412817883, "grad_norm": 1.867599082972298, "learning_rate": 1.742632072493797e-07, "loss": 0.1444, "step": 42231 }, { "epoch": 0.7340993238192911, "grad_norm": 3.8335547364178297, "learning_rate": 1.7424185167121947e-07, "loss": 0.2757, "step": 42232 }, { "epoch": 0.7341167063567939, "grad_norm": 3.0561792203153426, "learning_rate": 1.742204971255629e-07, "loss": 0.2805, "step": 42233 }, { "epoch": 0.7341340888942968, "grad_norm": 1.1723916865373765, "learning_rate": 1.7419914361247772e-07, "loss": 0.2426, "step": 42234 }, { "epoch": 0.7341514714317996, "grad_norm": 2.209822212130801, "learning_rate": 1.7417779113203157e-07, "loss": 0.2151, "step": 42235 }, { "epoch": 0.7341688539693024, "grad_norm": 0.8778320286492033, "learning_rate": 1.7415643968429216e-07, "loss": 0.2223, "step": 42236 }, { "epoch": 0.7341862365068053, "grad_norm": 1.5062541046707425, "learning_rate": 1.74135089269327e-07, "loss": 0.1605, "step": 42237 }, { "epoch": 0.7342036190443081, "grad_norm": 1.2148628349546584, "learning_rate": 1.7411373988720407e-07, "loss": 0.2297, "step": 42238 }, { "epoch": 0.7342210015818109, "grad_norm": 1.1838985136247497, "learning_rate": 1.7409239153799092e-07, "loss": 0.2909, "step": 42239 }, { "epoch": 0.7342383841193137, "grad_norm": 1.8238923090161312, "learning_rate": 1.7407104422175518e-07, "loss": 0.3273, "step": 42240 }, { "epoch": 0.7342557666568166, "grad_norm": 1.3672355505046134, "learning_rate": 1.7404969793856445e-07, "loss": 0.3236, "step": 42241 }, { "epoch": 0.7342731491943194, "grad_norm": 2.039833175441299, "learning_rate": 1.7402835268848647e-07, "loss": 0.2271, "step": 42242 }, { "epoch": 0.7342905317318222, "grad_norm": 1.3356810902967802, "learning_rate": 1.7400700847158888e-07, "loss": 0.229, "step": 42243 }, { "epoch": 0.7343079142693251, "grad_norm": 2.0031814967650967, "learning_rate": 1.7398566528793933e-07, "loss": 0.3028, "step": 42244 }, { "epoch": 0.7343252968068279, "grad_norm": 1.9564420672583465, "learning_rate": 1.7396432313760528e-07, "loss": 0.1645, "step": 42245 }, { "epoch": 0.7343426793443307, "grad_norm": 2.4425173749437237, "learning_rate": 1.7394298202065476e-07, "loss": 0.2524, "step": 42246 }, { "epoch": 0.7343600618818336, "grad_norm": 1.4405161249259153, "learning_rate": 1.7392164193715536e-07, "loss": 0.2244, "step": 42247 }, { "epoch": 0.7343774444193364, "grad_norm": 2.4970207354852803, "learning_rate": 1.739003028871744e-07, "loss": 0.3059, "step": 42248 }, { "epoch": 0.7343948269568391, "grad_norm": 1.983006106786512, "learning_rate": 1.738789648707795e-07, "loss": 0.3029, "step": 42249 }, { "epoch": 0.7344122094943419, "grad_norm": 1.9683590714972188, "learning_rate": 1.7385762788803865e-07, "loss": 0.1387, "step": 42250 }, { "epoch": 0.7344295920318448, "grad_norm": 1.2711852073119434, "learning_rate": 1.7383629193901926e-07, "loss": 0.2101, "step": 42251 }, { "epoch": 0.7344469745693476, "grad_norm": 0.7760524089064387, "learning_rate": 1.7381495702378905e-07, "loss": 0.1181, "step": 42252 }, { "epoch": 0.7344643571068504, "grad_norm": 1.7916207330570837, "learning_rate": 1.7379362314241558e-07, "loss": 0.2184, "step": 42253 }, { "epoch": 0.7344817396443533, "grad_norm": 3.226332571138097, "learning_rate": 1.737722902949665e-07, "loss": 0.4691, "step": 42254 }, { "epoch": 0.7344991221818561, "grad_norm": 1.984158526658138, "learning_rate": 1.737509584815094e-07, "loss": 0.1542, "step": 42255 }, { "epoch": 0.7345165047193589, "grad_norm": 1.5494308937726675, "learning_rate": 1.7372962770211186e-07, "loss": 0.2204, "step": 42256 }, { "epoch": 0.7345338872568618, "grad_norm": 1.8283860668647935, "learning_rate": 1.7370829795684132e-07, "loss": 0.2084, "step": 42257 }, { "epoch": 0.7345512697943646, "grad_norm": 1.2151988378106011, "learning_rate": 1.7368696924576582e-07, "loss": 0.1723, "step": 42258 }, { "epoch": 0.7345686523318674, "grad_norm": 2.405986072518622, "learning_rate": 1.7366564156895275e-07, "loss": 0.232, "step": 42259 }, { "epoch": 0.7345860348693702, "grad_norm": 2.534621029442598, "learning_rate": 1.7364431492646965e-07, "loss": 0.1893, "step": 42260 }, { "epoch": 0.7346034174068731, "grad_norm": 2.0093812633822004, "learning_rate": 1.7362298931838415e-07, "loss": 0.2772, "step": 42261 }, { "epoch": 0.7346207999443759, "grad_norm": 2.39852109526287, "learning_rate": 1.7360166474476385e-07, "loss": 0.2126, "step": 42262 }, { "epoch": 0.7346381824818787, "grad_norm": 0.983069434372195, "learning_rate": 1.7358034120567637e-07, "loss": 0.1956, "step": 42263 }, { "epoch": 0.7346555650193816, "grad_norm": 1.809855705941621, "learning_rate": 1.735590187011892e-07, "loss": 0.3101, "step": 42264 }, { "epoch": 0.7346729475568844, "grad_norm": 0.9345251063603613, "learning_rate": 1.735376972313699e-07, "loss": 0.1899, "step": 42265 }, { "epoch": 0.7346903300943872, "grad_norm": 1.8814867761626952, "learning_rate": 1.7351637679628645e-07, "loss": 0.2239, "step": 42266 }, { "epoch": 0.73470771263189, "grad_norm": 1.2063422743930596, "learning_rate": 1.734950573960059e-07, "loss": 0.2335, "step": 42267 }, { "epoch": 0.7347250951693929, "grad_norm": 1.4877506425496554, "learning_rate": 1.7347373903059593e-07, "loss": 0.3403, "step": 42268 }, { "epoch": 0.7347424777068956, "grad_norm": 1.0641659740125837, "learning_rate": 1.734524217001243e-07, "loss": 0.2937, "step": 42269 }, { "epoch": 0.7347598602443984, "grad_norm": 2.93513344010426, "learning_rate": 1.7343110540465855e-07, "loss": 0.1689, "step": 42270 }, { "epoch": 0.7347772427819013, "grad_norm": 1.4712242395459971, "learning_rate": 1.7340979014426614e-07, "loss": 0.198, "step": 42271 }, { "epoch": 0.7347946253194041, "grad_norm": 1.1429296722148137, "learning_rate": 1.7338847591901473e-07, "loss": 0.317, "step": 42272 }, { "epoch": 0.7348120078569069, "grad_norm": 3.772246491408994, "learning_rate": 1.7336716272897177e-07, "loss": 0.3113, "step": 42273 }, { "epoch": 0.7348293903944098, "grad_norm": 1.6628352668848467, "learning_rate": 1.7334585057420486e-07, "loss": 0.2912, "step": 42274 }, { "epoch": 0.7348467729319126, "grad_norm": 2.0553435440179504, "learning_rate": 1.733245394547816e-07, "loss": 0.187, "step": 42275 }, { "epoch": 0.7348641554694154, "grad_norm": 1.2841637743958356, "learning_rate": 1.733032293707693e-07, "loss": 0.1873, "step": 42276 }, { "epoch": 0.7348815380069182, "grad_norm": 1.551058253639392, "learning_rate": 1.7328192032223582e-07, "loss": 0.2844, "step": 42277 }, { "epoch": 0.7348989205444211, "grad_norm": 1.323813459278198, "learning_rate": 1.732606123092486e-07, "loss": 0.1684, "step": 42278 }, { "epoch": 0.7349163030819239, "grad_norm": 1.6518325995788268, "learning_rate": 1.7323930533187532e-07, "loss": 0.1752, "step": 42279 }, { "epoch": 0.7349336856194267, "grad_norm": 1.3127045199074958, "learning_rate": 1.73217999390183e-07, "loss": 0.254, "step": 42280 }, { "epoch": 0.7349510681569296, "grad_norm": 1.220644575779934, "learning_rate": 1.7319669448423967e-07, "loss": 0.1956, "step": 42281 }, { "epoch": 0.7349684506944324, "grad_norm": 1.5188469471200274, "learning_rate": 1.7317539061411268e-07, "loss": 0.1355, "step": 42282 }, { "epoch": 0.7349858332319352, "grad_norm": 1.3485995420524286, "learning_rate": 1.7315408777986963e-07, "loss": 0.3154, "step": 42283 }, { "epoch": 0.7350032157694381, "grad_norm": 1.800944026379611, "learning_rate": 1.7313278598157776e-07, "loss": 0.24, "step": 42284 }, { "epoch": 0.7350205983069409, "grad_norm": 1.6762465475015231, "learning_rate": 1.7311148521930512e-07, "loss": 0.2365, "step": 42285 }, { "epoch": 0.7350379808444437, "grad_norm": 2.712443637041338, "learning_rate": 1.7309018549311882e-07, "loss": 0.2608, "step": 42286 }, { "epoch": 0.7350553633819465, "grad_norm": 2.1609996806174885, "learning_rate": 1.730688868030864e-07, "loss": 0.154, "step": 42287 }, { "epoch": 0.7350727459194493, "grad_norm": 2.0319404750913774, "learning_rate": 1.730475891492753e-07, "loss": 0.2721, "step": 42288 }, { "epoch": 0.7350901284569521, "grad_norm": 1.4441333282977717, "learning_rate": 1.7302629253175334e-07, "loss": 0.1844, "step": 42289 }, { "epoch": 0.7351075109944549, "grad_norm": 2.0204814932968156, "learning_rate": 1.730049969505878e-07, "loss": 0.2145, "step": 42290 }, { "epoch": 0.7351248935319578, "grad_norm": 2.118061525383213, "learning_rate": 1.729837024058462e-07, "loss": 0.3017, "step": 42291 }, { "epoch": 0.7351422760694606, "grad_norm": 1.5318741184892375, "learning_rate": 1.7296240889759606e-07, "loss": 0.2592, "step": 42292 }, { "epoch": 0.7351596586069634, "grad_norm": 4.371053040384547, "learning_rate": 1.729411164259048e-07, "loss": 0.2356, "step": 42293 }, { "epoch": 0.7351770411444662, "grad_norm": 0.8922032733135838, "learning_rate": 1.7291982499084002e-07, "loss": 0.1445, "step": 42294 }, { "epoch": 0.7351944236819691, "grad_norm": 0.7881053336391516, "learning_rate": 1.72898534592469e-07, "loss": 0.2382, "step": 42295 }, { "epoch": 0.7352118062194719, "grad_norm": 1.146289363021359, "learning_rate": 1.728772452308595e-07, "loss": 0.1823, "step": 42296 }, { "epoch": 0.7352291887569747, "grad_norm": 0.972907430589501, "learning_rate": 1.7285595690607903e-07, "loss": 0.1643, "step": 42297 }, { "epoch": 0.7352465712944776, "grad_norm": 1.476466941036998, "learning_rate": 1.7283466961819476e-07, "loss": 0.2272, "step": 42298 }, { "epoch": 0.7352639538319804, "grad_norm": 1.1708311898436747, "learning_rate": 1.7281338336727408e-07, "loss": 0.6328, "step": 42299 }, { "epoch": 0.7352813363694832, "grad_norm": 2.1402306620031104, "learning_rate": 1.7279209815338492e-07, "loss": 0.305, "step": 42300 }, { "epoch": 0.7352987189069861, "grad_norm": 1.485524303257792, "learning_rate": 1.7277081397659443e-07, "loss": 0.165, "step": 42301 }, { "epoch": 0.7353161014444889, "grad_norm": 0.8396472343135644, "learning_rate": 1.727495308369702e-07, "loss": 0.2841, "step": 42302 }, { "epoch": 0.7353334839819917, "grad_norm": 1.6291265204298644, "learning_rate": 1.727282487345794e-07, "loss": 0.2758, "step": 42303 }, { "epoch": 0.7353508665194946, "grad_norm": 5.101331389246153, "learning_rate": 1.727069676694901e-07, "loss": 0.2842, "step": 42304 }, { "epoch": 0.7353682490569974, "grad_norm": 1.2730930716698161, "learning_rate": 1.7268568764176917e-07, "loss": 0.2057, "step": 42305 }, { "epoch": 0.7353856315945002, "grad_norm": 1.1395137966181152, "learning_rate": 1.726644086514843e-07, "loss": 0.2551, "step": 42306 }, { "epoch": 0.735403014132003, "grad_norm": 1.4664085086389074, "learning_rate": 1.7264313069870267e-07, "loss": 0.1601, "step": 42307 }, { "epoch": 0.7354203966695058, "grad_norm": 1.8127609127450175, "learning_rate": 1.726218537834921e-07, "loss": 0.2203, "step": 42308 }, { "epoch": 0.7354377792070086, "grad_norm": 1.1664991806187557, "learning_rate": 1.7260057790591992e-07, "loss": 0.243, "step": 42309 }, { "epoch": 0.7354551617445114, "grad_norm": 1.4585561690462598, "learning_rate": 1.725793030660535e-07, "loss": 0.1641, "step": 42310 }, { "epoch": 0.7354725442820143, "grad_norm": 2.4160594075889206, "learning_rate": 1.7255802926396024e-07, "loss": 0.1971, "step": 42311 }, { "epoch": 0.7354899268195171, "grad_norm": 1.0106675831831615, "learning_rate": 1.7253675649970766e-07, "loss": 0.1326, "step": 42312 }, { "epoch": 0.7355073093570199, "grad_norm": 1.2214616032981112, "learning_rate": 1.7251548477336315e-07, "loss": 0.2081, "step": 42313 }, { "epoch": 0.7355246918945227, "grad_norm": 1.822923081207049, "learning_rate": 1.7249421408499403e-07, "loss": 0.2128, "step": 42314 }, { "epoch": 0.7355420744320256, "grad_norm": 1.059430160387654, "learning_rate": 1.7247294443466776e-07, "loss": 0.3487, "step": 42315 }, { "epoch": 0.7355594569695284, "grad_norm": 1.133803995494807, "learning_rate": 1.7245167582245208e-07, "loss": 0.3506, "step": 42316 }, { "epoch": 0.7355768395070312, "grad_norm": 1.7986804944923727, "learning_rate": 1.7243040824841392e-07, "loss": 0.3087, "step": 42317 }, { "epoch": 0.7355942220445341, "grad_norm": 1.2264691600523, "learning_rate": 1.7240914171262078e-07, "loss": 0.2531, "step": 42318 }, { "epoch": 0.7356116045820369, "grad_norm": 0.7418946079018282, "learning_rate": 1.7238787621514033e-07, "loss": 0.3412, "step": 42319 }, { "epoch": 0.7356289871195397, "grad_norm": 1.6960965764695661, "learning_rate": 1.723666117560398e-07, "loss": 0.24, "step": 42320 }, { "epoch": 0.7356463696570426, "grad_norm": 1.6716101773538716, "learning_rate": 1.723453483353866e-07, "loss": 0.2832, "step": 42321 }, { "epoch": 0.7356637521945454, "grad_norm": 1.201931083441892, "learning_rate": 1.723240859532482e-07, "loss": 0.2174, "step": 42322 }, { "epoch": 0.7356811347320482, "grad_norm": 1.3138205196796287, "learning_rate": 1.7230282460969185e-07, "loss": 0.1549, "step": 42323 }, { "epoch": 0.735698517269551, "grad_norm": 1.8913318217050703, "learning_rate": 1.7228156430478507e-07, "loss": 0.2167, "step": 42324 }, { "epoch": 0.7357158998070539, "grad_norm": 1.1139450747945718, "learning_rate": 1.7226030503859517e-07, "loss": 0.2538, "step": 42325 }, { "epoch": 0.7357332823445567, "grad_norm": 1.5653149774134747, "learning_rate": 1.722390468111894e-07, "loss": 0.1764, "step": 42326 }, { "epoch": 0.7357506648820595, "grad_norm": 1.2467069772556676, "learning_rate": 1.7221778962263543e-07, "loss": 0.1311, "step": 42327 }, { "epoch": 0.7357680474195623, "grad_norm": 2.1286800440393074, "learning_rate": 1.721965334730005e-07, "loss": 0.2879, "step": 42328 }, { "epoch": 0.7357854299570651, "grad_norm": 1.7806303670028838, "learning_rate": 1.7217527836235214e-07, "loss": 0.2926, "step": 42329 }, { "epoch": 0.7358028124945679, "grad_norm": 1.7855363520359122, "learning_rate": 1.7215402429075727e-07, "loss": 0.1785, "step": 42330 }, { "epoch": 0.7358201950320707, "grad_norm": 2.2625635383686773, "learning_rate": 1.7213277125828363e-07, "loss": 0.2058, "step": 42331 }, { "epoch": 0.7358375775695736, "grad_norm": 2.030122378124606, "learning_rate": 1.7211151926499857e-07, "loss": 0.2448, "step": 42332 }, { "epoch": 0.7358549601070764, "grad_norm": 1.4686286857344557, "learning_rate": 1.7209026831096935e-07, "loss": 0.2152, "step": 42333 }, { "epoch": 0.7358723426445792, "grad_norm": 3.7231839792881583, "learning_rate": 1.7206901839626315e-07, "loss": 0.2556, "step": 42334 }, { "epoch": 0.7358897251820821, "grad_norm": 1.3411068209521224, "learning_rate": 1.720477695209479e-07, "loss": 0.2425, "step": 42335 }, { "epoch": 0.7359071077195849, "grad_norm": 1.9745295060983719, "learning_rate": 1.720265216850904e-07, "loss": 0.3923, "step": 42336 }, { "epoch": 0.7359244902570877, "grad_norm": 1.8419753739577747, "learning_rate": 1.7200527488875816e-07, "loss": 0.2066, "step": 42337 }, { "epoch": 0.7359418727945906, "grad_norm": 1.605107248382437, "learning_rate": 1.719840291320183e-07, "loss": 0.254, "step": 42338 }, { "epoch": 0.7359592553320934, "grad_norm": 2.2779482395240107, "learning_rate": 1.7196278441493868e-07, "loss": 0.2454, "step": 42339 }, { "epoch": 0.7359766378695962, "grad_norm": 1.6437237381699226, "learning_rate": 1.7194154073758628e-07, "loss": 0.2346, "step": 42340 }, { "epoch": 0.735994020407099, "grad_norm": 1.2822528527526367, "learning_rate": 1.719202981000285e-07, "loss": 0.3787, "step": 42341 }, { "epoch": 0.7360114029446019, "grad_norm": 1.125308113767777, "learning_rate": 1.7189905650233267e-07, "loss": 0.247, "step": 42342 }, { "epoch": 0.7360287854821047, "grad_norm": 1.9292502124439186, "learning_rate": 1.7187781594456608e-07, "loss": 0.2838, "step": 42343 }, { "epoch": 0.7360461680196075, "grad_norm": 1.547455110462669, "learning_rate": 1.7185657642679613e-07, "loss": 0.1379, "step": 42344 }, { "epoch": 0.7360635505571104, "grad_norm": 1.4964178838166515, "learning_rate": 1.7183533794909005e-07, "loss": 0.3103, "step": 42345 }, { "epoch": 0.7360809330946132, "grad_norm": 1.5829531300721666, "learning_rate": 1.7181410051151507e-07, "loss": 0.2353, "step": 42346 }, { "epoch": 0.736098315632116, "grad_norm": 0.9469433088656636, "learning_rate": 1.7179286411413885e-07, "loss": 0.2323, "step": 42347 }, { "epoch": 0.7361156981696187, "grad_norm": 1.8555429624624362, "learning_rate": 1.7177162875702856e-07, "loss": 0.3663, "step": 42348 }, { "epoch": 0.7361330807071216, "grad_norm": 1.3538404496943341, "learning_rate": 1.7175039444025113e-07, "loss": 0.1881, "step": 42349 }, { "epoch": 0.7361504632446244, "grad_norm": 1.8988650413320658, "learning_rate": 1.7172916116387437e-07, "loss": 0.2364, "step": 42350 }, { "epoch": 0.7361678457821272, "grad_norm": 1.2084451058454837, "learning_rate": 1.7170792892796533e-07, "loss": 0.2014, "step": 42351 }, { "epoch": 0.7361852283196301, "grad_norm": 1.285312454510728, "learning_rate": 1.7168669773259136e-07, "loss": 0.3375, "step": 42352 }, { "epoch": 0.7362026108571329, "grad_norm": 1.7866631990212862, "learning_rate": 1.7166546757781975e-07, "loss": 0.2493, "step": 42353 }, { "epoch": 0.7362199933946357, "grad_norm": 1.8032852345870238, "learning_rate": 1.7164423846371778e-07, "loss": 0.2593, "step": 42354 }, { "epoch": 0.7362373759321386, "grad_norm": 0.9543970810955003, "learning_rate": 1.7162301039035276e-07, "loss": 0.2328, "step": 42355 }, { "epoch": 0.7362547584696414, "grad_norm": 0.7857327810901632, "learning_rate": 1.7160178335779192e-07, "loss": 0.1588, "step": 42356 }, { "epoch": 0.7362721410071442, "grad_norm": 1.516018220245211, "learning_rate": 1.7158055736610245e-07, "loss": 0.3332, "step": 42357 }, { "epoch": 0.736289523544647, "grad_norm": 1.8524559618836056, "learning_rate": 1.7155933241535186e-07, "loss": 0.2155, "step": 42358 }, { "epoch": 0.7363069060821499, "grad_norm": 1.3821351507167046, "learning_rate": 1.7153810850560734e-07, "loss": 0.186, "step": 42359 }, { "epoch": 0.7363242886196527, "grad_norm": 2.637881099100168, "learning_rate": 1.715168856369361e-07, "loss": 0.2805, "step": 42360 }, { "epoch": 0.7363416711571555, "grad_norm": 1.0802893013403343, "learning_rate": 1.714956638094055e-07, "loss": 0.1923, "step": 42361 }, { "epoch": 0.7363590536946584, "grad_norm": 1.8842480844460472, "learning_rate": 1.7147444302308272e-07, "loss": 0.2189, "step": 42362 }, { "epoch": 0.7363764362321612, "grad_norm": 2.155286643001652, "learning_rate": 1.7145322327803503e-07, "loss": 0.1953, "step": 42363 }, { "epoch": 0.736393818769664, "grad_norm": 2.715991850024617, "learning_rate": 1.7143200457432972e-07, "loss": 0.4156, "step": 42364 }, { "epoch": 0.7364112013071669, "grad_norm": 1.7415430660801068, "learning_rate": 1.7141078691203388e-07, "loss": 0.2412, "step": 42365 }, { "epoch": 0.7364285838446697, "grad_norm": 0.9258725235886356, "learning_rate": 1.7138957029121499e-07, "loss": 0.2328, "step": 42366 }, { "epoch": 0.7364459663821725, "grad_norm": 2.102965320550115, "learning_rate": 1.7136835471194045e-07, "loss": 0.2511, "step": 42367 }, { "epoch": 0.7364633489196752, "grad_norm": 0.75591763342377, "learning_rate": 1.7134714017427704e-07, "loss": 0.2313, "step": 42368 }, { "epoch": 0.7364807314571781, "grad_norm": 4.504495845125259, "learning_rate": 1.713259266782921e-07, "loss": 0.3363, "step": 42369 }, { "epoch": 0.7364981139946809, "grad_norm": 1.9768882068512073, "learning_rate": 1.7130471422405314e-07, "loss": 0.1933, "step": 42370 }, { "epoch": 0.7365154965321837, "grad_norm": 2.079844322523578, "learning_rate": 1.712835028116273e-07, "loss": 0.3768, "step": 42371 }, { "epoch": 0.7365328790696866, "grad_norm": 1.9390835472357624, "learning_rate": 1.712622924410817e-07, "loss": 0.25, "step": 42372 }, { "epoch": 0.7365502616071894, "grad_norm": 1.3671371933781806, "learning_rate": 1.712410831124836e-07, "loss": 0.2095, "step": 42373 }, { "epoch": 0.7365676441446922, "grad_norm": 1.6433714157575239, "learning_rate": 1.7121987482590028e-07, "loss": 0.2124, "step": 42374 }, { "epoch": 0.736585026682195, "grad_norm": 6.867693276157617, "learning_rate": 1.7119866758139895e-07, "loss": 0.1997, "step": 42375 }, { "epoch": 0.7366024092196979, "grad_norm": 1.5703587458253965, "learning_rate": 1.7117746137904677e-07, "loss": 0.1814, "step": 42376 }, { "epoch": 0.7366197917572007, "grad_norm": 1.080036343914024, "learning_rate": 1.7115625621891082e-07, "loss": 0.1594, "step": 42377 }, { "epoch": 0.7366371742947035, "grad_norm": 1.4033136814876361, "learning_rate": 1.7113505210105866e-07, "loss": 0.3017, "step": 42378 }, { "epoch": 0.7366545568322064, "grad_norm": 1.0025624790408034, "learning_rate": 1.711138490255573e-07, "loss": 0.1349, "step": 42379 }, { "epoch": 0.7366719393697092, "grad_norm": 1.999156957721954, "learning_rate": 1.71092646992474e-07, "loss": 0.2744, "step": 42380 }, { "epoch": 0.736689321907212, "grad_norm": 2.0825410190374227, "learning_rate": 1.710714460018759e-07, "loss": 0.17, "step": 42381 }, { "epoch": 0.7367067044447149, "grad_norm": 4.058966804140462, "learning_rate": 1.7105024605383022e-07, "loss": 0.4225, "step": 42382 }, { "epoch": 0.7367240869822177, "grad_norm": 4.184447349594355, "learning_rate": 1.7102904714840417e-07, "loss": 0.2648, "step": 42383 }, { "epoch": 0.7367414695197205, "grad_norm": 1.148319007542267, "learning_rate": 1.7100784928566475e-07, "loss": 0.2475, "step": 42384 }, { "epoch": 0.7367588520572234, "grad_norm": 1.366960849849281, "learning_rate": 1.7098665246567945e-07, "loss": 0.2264, "step": 42385 }, { "epoch": 0.7367762345947262, "grad_norm": 2.791215516219719, "learning_rate": 1.7096545668851553e-07, "loss": 0.222, "step": 42386 }, { "epoch": 0.736793617132229, "grad_norm": 1.470096073606094, "learning_rate": 1.7094426195423978e-07, "loss": 0.3207, "step": 42387 }, { "epoch": 0.7368109996697317, "grad_norm": 1.4841009961256226, "learning_rate": 1.709230682629194e-07, "loss": 0.215, "step": 42388 }, { "epoch": 0.7368283822072346, "grad_norm": 1.7569895783645888, "learning_rate": 1.7090187561462188e-07, "loss": 0.2963, "step": 42389 }, { "epoch": 0.7368457647447374, "grad_norm": 1.561641205659289, "learning_rate": 1.708806840094143e-07, "loss": 0.2223, "step": 42390 }, { "epoch": 0.7368631472822402, "grad_norm": 1.2705205987295936, "learning_rate": 1.7085949344736373e-07, "loss": 0.2293, "step": 42391 }, { "epoch": 0.7368805298197431, "grad_norm": 1.814075099514596, "learning_rate": 1.7083830392853731e-07, "loss": 0.2572, "step": 42392 }, { "epoch": 0.7368979123572459, "grad_norm": 2.039621761786274, "learning_rate": 1.7081711545300236e-07, "loss": 0.2796, "step": 42393 }, { "epoch": 0.7369152948947487, "grad_norm": 2.5176628494493167, "learning_rate": 1.7079592802082589e-07, "loss": 0.2822, "step": 42394 }, { "epoch": 0.7369326774322515, "grad_norm": 2.0941236148939586, "learning_rate": 1.707747416320751e-07, "loss": 0.1956, "step": 42395 }, { "epoch": 0.7369500599697544, "grad_norm": 1.620857642245182, "learning_rate": 1.7075355628681698e-07, "loss": 0.2656, "step": 42396 }, { "epoch": 0.7369674425072572, "grad_norm": 1.4989134826125299, "learning_rate": 1.7073237198511903e-07, "loss": 0.1913, "step": 42397 }, { "epoch": 0.73698482504476, "grad_norm": 0.7098620935529789, "learning_rate": 1.707111887270482e-07, "loss": 0.378, "step": 42398 }, { "epoch": 0.7370022075822629, "grad_norm": 2.1152911613087815, "learning_rate": 1.706900065126716e-07, "loss": 0.2195, "step": 42399 }, { "epoch": 0.7370195901197657, "grad_norm": 1.8572825733665734, "learning_rate": 1.7066882534205645e-07, "loss": 0.2487, "step": 42400 }, { "epoch": 0.7370369726572685, "grad_norm": 2.2338451771537797, "learning_rate": 1.7064764521526987e-07, "loss": 0.2316, "step": 42401 }, { "epoch": 0.7370543551947714, "grad_norm": 1.3648403650191847, "learning_rate": 1.7062646613237892e-07, "loss": 0.2366, "step": 42402 }, { "epoch": 0.7370717377322742, "grad_norm": 1.043545429224779, "learning_rate": 1.7060528809345076e-07, "loss": 0.2574, "step": 42403 }, { "epoch": 0.737089120269777, "grad_norm": 2.705860399506026, "learning_rate": 1.705841110985524e-07, "loss": 0.2572, "step": 42404 }, { "epoch": 0.7371065028072799, "grad_norm": 1.5116181556659913, "learning_rate": 1.705629351477514e-07, "loss": 0.1861, "step": 42405 }, { "epoch": 0.7371238853447827, "grad_norm": 1.906187497650181, "learning_rate": 1.7054176024111438e-07, "loss": 0.269, "step": 42406 }, { "epoch": 0.7371412678822855, "grad_norm": 0.748977369049215, "learning_rate": 1.7052058637870849e-07, "loss": 0.1984, "step": 42407 }, { "epoch": 0.7371586504197882, "grad_norm": 1.4551534939006334, "learning_rate": 1.7049941356060115e-07, "loss": 0.3919, "step": 42408 }, { "epoch": 0.7371760329572911, "grad_norm": 1.5000473414417788, "learning_rate": 1.7047824178685933e-07, "loss": 0.2706, "step": 42409 }, { "epoch": 0.7371934154947939, "grad_norm": 2.8108275840213874, "learning_rate": 1.704570710575501e-07, "loss": 0.178, "step": 42410 }, { "epoch": 0.7372107980322967, "grad_norm": 1.3136433658090656, "learning_rate": 1.7043590137274054e-07, "loss": 0.1916, "step": 42411 }, { "epoch": 0.7372281805697996, "grad_norm": 1.0499081480724521, "learning_rate": 1.7041473273249786e-07, "loss": 0.1954, "step": 42412 }, { "epoch": 0.7372455631073024, "grad_norm": 6.4440033685110105, "learning_rate": 1.7039356513688902e-07, "loss": 0.3329, "step": 42413 }, { "epoch": 0.7372629456448052, "grad_norm": 2.4682749075409336, "learning_rate": 1.7037239858598118e-07, "loss": 0.3196, "step": 42414 }, { "epoch": 0.737280328182308, "grad_norm": 0.9582883109668066, "learning_rate": 1.7035123307984123e-07, "loss": 0.1266, "step": 42415 }, { "epoch": 0.7372977107198109, "grad_norm": 1.0451301071598287, "learning_rate": 1.7033006861853665e-07, "loss": 0.2022, "step": 42416 }, { "epoch": 0.7373150932573137, "grad_norm": 1.1289525737328616, "learning_rate": 1.7030890520213448e-07, "loss": 0.1329, "step": 42417 }, { "epoch": 0.7373324757948165, "grad_norm": 0.954283526874735, "learning_rate": 1.702877428307014e-07, "loss": 0.2323, "step": 42418 }, { "epoch": 0.7373498583323194, "grad_norm": 1.696194133680117, "learning_rate": 1.702665815043046e-07, "loss": 0.1902, "step": 42419 }, { "epoch": 0.7373672408698222, "grad_norm": 1.4936100842253752, "learning_rate": 1.7024542122301143e-07, "loss": 0.2689, "step": 42420 }, { "epoch": 0.737384623407325, "grad_norm": 2.0388197484690327, "learning_rate": 1.7022426198688883e-07, "loss": 0.2386, "step": 42421 }, { "epoch": 0.7374020059448279, "grad_norm": 1.6871135713200303, "learning_rate": 1.7020310379600383e-07, "loss": 0.2679, "step": 42422 }, { "epoch": 0.7374193884823307, "grad_norm": 1.2136263015598174, "learning_rate": 1.7018194665042328e-07, "loss": 0.1095, "step": 42423 }, { "epoch": 0.7374367710198335, "grad_norm": 1.1243081952341818, "learning_rate": 1.701607905502148e-07, "loss": 0.2082, "step": 42424 }, { "epoch": 0.7374541535573363, "grad_norm": 0.9602096866009954, "learning_rate": 1.7013963549544496e-07, "loss": 0.2076, "step": 42425 }, { "epoch": 0.7374715360948392, "grad_norm": 1.6346401882007047, "learning_rate": 1.7011848148618096e-07, "loss": 0.2598, "step": 42426 }, { "epoch": 0.7374889186323419, "grad_norm": 1.3049748534272132, "learning_rate": 1.7009732852248964e-07, "loss": 0.187, "step": 42427 }, { "epoch": 0.7375063011698447, "grad_norm": 2.0682432637518975, "learning_rate": 1.700761766044384e-07, "loss": 0.2447, "step": 42428 }, { "epoch": 0.7375236837073476, "grad_norm": 1.272834800546062, "learning_rate": 1.7005502573209418e-07, "loss": 0.1414, "step": 42429 }, { "epoch": 0.7375410662448504, "grad_norm": 1.8060482774283013, "learning_rate": 1.7003387590552393e-07, "loss": 0.2885, "step": 42430 }, { "epoch": 0.7375584487823532, "grad_norm": 1.2327138092181231, "learning_rate": 1.700127271247947e-07, "loss": 0.1963, "step": 42431 }, { "epoch": 0.737575831319856, "grad_norm": 0.8877408202171497, "learning_rate": 1.6999157938997356e-07, "loss": 0.1909, "step": 42432 }, { "epoch": 0.7375932138573589, "grad_norm": 1.232971852429735, "learning_rate": 1.6997043270112748e-07, "loss": 0.377, "step": 42433 }, { "epoch": 0.7376105963948617, "grad_norm": 1.6451460653893806, "learning_rate": 1.6994928705832356e-07, "loss": 0.19, "step": 42434 }, { "epoch": 0.7376279789323645, "grad_norm": 1.4865063923648925, "learning_rate": 1.6992814246162867e-07, "loss": 0.2132, "step": 42435 }, { "epoch": 0.7376453614698674, "grad_norm": 1.7630485983675508, "learning_rate": 1.699069989111102e-07, "loss": 0.2442, "step": 42436 }, { "epoch": 0.7376627440073702, "grad_norm": 1.4809963702239195, "learning_rate": 1.698858564068348e-07, "loss": 0.1961, "step": 42437 }, { "epoch": 0.737680126544873, "grad_norm": 1.1115588560255556, "learning_rate": 1.698647149488694e-07, "loss": 0.1573, "step": 42438 }, { "epoch": 0.7376975090823759, "grad_norm": 1.3698779734385957, "learning_rate": 1.6984357453728137e-07, "loss": 0.2947, "step": 42439 }, { "epoch": 0.7377148916198787, "grad_norm": 0.9793379468443003, "learning_rate": 1.6982243517213757e-07, "loss": 0.2276, "step": 42440 }, { "epoch": 0.7377322741573815, "grad_norm": 3.2731798212147027, "learning_rate": 1.6980129685350493e-07, "loss": 0.1962, "step": 42441 }, { "epoch": 0.7377496566948843, "grad_norm": 1.0947743290901983, "learning_rate": 1.6978015958145053e-07, "loss": 0.2359, "step": 42442 }, { "epoch": 0.7377670392323872, "grad_norm": 1.7349407185556847, "learning_rate": 1.697590233560413e-07, "loss": 0.2789, "step": 42443 }, { "epoch": 0.73778442176989, "grad_norm": 1.519408572169109, "learning_rate": 1.697378881773443e-07, "loss": 0.1593, "step": 42444 }, { "epoch": 0.7378018043073928, "grad_norm": 1.639941068562077, "learning_rate": 1.697167540454265e-07, "loss": 0.2555, "step": 42445 }, { "epoch": 0.7378191868448957, "grad_norm": 1.2917981151342592, "learning_rate": 1.6969562096035466e-07, "loss": 0.2239, "step": 42446 }, { "epoch": 0.7378365693823984, "grad_norm": 1.9675222427300316, "learning_rate": 1.696744889221962e-07, "loss": 0.2946, "step": 42447 }, { "epoch": 0.7378539519199012, "grad_norm": 1.3128479900223577, "learning_rate": 1.6965335793101781e-07, "loss": 0.238, "step": 42448 }, { "epoch": 0.737871334457404, "grad_norm": 1.0956045461390778, "learning_rate": 1.696322279868867e-07, "loss": 0.2386, "step": 42449 }, { "epoch": 0.7378887169949069, "grad_norm": 1.4357593441638328, "learning_rate": 1.6961109908986936e-07, "loss": 0.2704, "step": 42450 }, { "epoch": 0.7379060995324097, "grad_norm": 2.3046958306409735, "learning_rate": 1.6958997124003328e-07, "loss": 0.1848, "step": 42451 }, { "epoch": 0.7379234820699125, "grad_norm": 1.8626357738219053, "learning_rate": 1.6956884443744513e-07, "loss": 0.2072, "step": 42452 }, { "epoch": 0.7379408646074154, "grad_norm": 1.3372883928368111, "learning_rate": 1.69547718682172e-07, "loss": 0.2449, "step": 42453 }, { "epoch": 0.7379582471449182, "grad_norm": 1.0352567321534187, "learning_rate": 1.695265939742806e-07, "loss": 0.1707, "step": 42454 }, { "epoch": 0.737975629682421, "grad_norm": 1.1037112108688711, "learning_rate": 1.6950547031383843e-07, "loss": 0.1515, "step": 42455 }, { "epoch": 0.7379930122199239, "grad_norm": 1.659143782284936, "learning_rate": 1.6948434770091195e-07, "loss": 0.1285, "step": 42456 }, { "epoch": 0.7380103947574267, "grad_norm": 3.334282775863569, "learning_rate": 1.694632261355683e-07, "loss": 0.1834, "step": 42457 }, { "epoch": 0.7380277772949295, "grad_norm": 2.322427235985803, "learning_rate": 1.6944210561787414e-07, "loss": 0.3121, "step": 42458 }, { "epoch": 0.7380451598324324, "grad_norm": 0.9910413407373301, "learning_rate": 1.694209861478969e-07, "loss": 0.1868, "step": 42459 }, { "epoch": 0.7380625423699352, "grad_norm": 1.2762108999033035, "learning_rate": 1.6939986772570325e-07, "loss": 0.217, "step": 42460 }, { "epoch": 0.738079924907438, "grad_norm": 1.3540462356722722, "learning_rate": 1.693787503513601e-07, "loss": 0.3654, "step": 42461 }, { "epoch": 0.7380973074449408, "grad_norm": 2.0899350420351004, "learning_rate": 1.6935763402493453e-07, "loss": 0.2496, "step": 42462 }, { "epoch": 0.7381146899824437, "grad_norm": 1.079123064274925, "learning_rate": 1.6933651874649334e-07, "loss": 0.2142, "step": 42463 }, { "epoch": 0.7381320725199465, "grad_norm": 4.993093708254791, "learning_rate": 1.693154045161035e-07, "loss": 0.2146, "step": 42464 }, { "epoch": 0.7381494550574493, "grad_norm": 1.7037444210656798, "learning_rate": 1.692942913338319e-07, "loss": 0.1893, "step": 42465 }, { "epoch": 0.7381668375949522, "grad_norm": 1.4178296591156838, "learning_rate": 1.6927317919974532e-07, "loss": 0.2093, "step": 42466 }, { "epoch": 0.7381842201324549, "grad_norm": 1.4700284162296695, "learning_rate": 1.6925206811391102e-07, "loss": 0.4096, "step": 42467 }, { "epoch": 0.7382016026699577, "grad_norm": 1.234957156139359, "learning_rate": 1.6923095807639588e-07, "loss": 0.205, "step": 42468 }, { "epoch": 0.7382189852074605, "grad_norm": 1.1876665646384381, "learning_rate": 1.692098490872663e-07, "loss": 0.2886, "step": 42469 }, { "epoch": 0.7382363677449634, "grad_norm": 1.542975008939426, "learning_rate": 1.6918874114658975e-07, "loss": 0.2123, "step": 42470 }, { "epoch": 0.7382537502824662, "grad_norm": 1.6257718379561812, "learning_rate": 1.6916763425443291e-07, "loss": 0.1725, "step": 42471 }, { "epoch": 0.738271132819969, "grad_norm": 0.825450270342629, "learning_rate": 1.6914652841086274e-07, "loss": 0.1626, "step": 42472 }, { "epoch": 0.7382885153574719, "grad_norm": 2.0064634746717225, "learning_rate": 1.6912542361594605e-07, "loss": 0.2922, "step": 42473 }, { "epoch": 0.7383058978949747, "grad_norm": 1.3232009203791728, "learning_rate": 1.691043198697498e-07, "loss": 0.1724, "step": 42474 }, { "epoch": 0.7383232804324775, "grad_norm": 1.5539055210616173, "learning_rate": 1.6908321717234087e-07, "loss": 0.2467, "step": 42475 }, { "epoch": 0.7383406629699804, "grad_norm": 0.9424718203252325, "learning_rate": 1.6906211552378607e-07, "loss": 0.1856, "step": 42476 }, { "epoch": 0.7383580455074832, "grad_norm": 1.8720147892428365, "learning_rate": 1.6904101492415224e-07, "loss": 0.1861, "step": 42477 }, { "epoch": 0.738375428044986, "grad_norm": 1.6720764775122938, "learning_rate": 1.690199153735065e-07, "loss": 0.142, "step": 42478 }, { "epoch": 0.7383928105824888, "grad_norm": 1.6299806929767817, "learning_rate": 1.6899881687191558e-07, "loss": 0.206, "step": 42479 }, { "epoch": 0.7384101931199917, "grad_norm": 1.7551699663970217, "learning_rate": 1.6897771941944638e-07, "loss": 0.2303, "step": 42480 }, { "epoch": 0.7384275756574945, "grad_norm": 1.0195198073336322, "learning_rate": 1.6895662301616576e-07, "loss": 0.242, "step": 42481 }, { "epoch": 0.7384449581949973, "grad_norm": 1.2899067845865566, "learning_rate": 1.6893552766214053e-07, "loss": 0.1961, "step": 42482 }, { "epoch": 0.7384623407325002, "grad_norm": 1.1781707154063696, "learning_rate": 1.6891443335743767e-07, "loss": 0.2305, "step": 42483 }, { "epoch": 0.738479723270003, "grad_norm": 1.3045314770131977, "learning_rate": 1.6889334010212387e-07, "loss": 0.2116, "step": 42484 }, { "epoch": 0.7384971058075058, "grad_norm": 1.7819654412876191, "learning_rate": 1.6887224789626597e-07, "loss": 0.2476, "step": 42485 }, { "epoch": 0.7385144883450087, "grad_norm": 1.3633951325298908, "learning_rate": 1.688511567399311e-07, "loss": 0.1675, "step": 42486 }, { "epoch": 0.7385318708825114, "grad_norm": 1.1497309082892022, "learning_rate": 1.6883006663318615e-07, "loss": 0.2251, "step": 42487 }, { "epoch": 0.7385492534200142, "grad_norm": 2.12758773644033, "learning_rate": 1.6880897757609742e-07, "loss": 0.2104, "step": 42488 }, { "epoch": 0.738566635957517, "grad_norm": 1.9208234797929082, "learning_rate": 1.6878788956873218e-07, "loss": 0.2068, "step": 42489 }, { "epoch": 0.7385840184950199, "grad_norm": 4.542117202214676, "learning_rate": 1.687668026111573e-07, "loss": 0.2379, "step": 42490 }, { "epoch": 0.7386014010325227, "grad_norm": 1.1009676780292743, "learning_rate": 1.687457167034394e-07, "loss": 0.2331, "step": 42491 }, { "epoch": 0.7386187835700255, "grad_norm": 1.8659527605254753, "learning_rate": 1.6872463184564544e-07, "loss": 0.1816, "step": 42492 }, { "epoch": 0.7386361661075284, "grad_norm": 4.625679173725922, "learning_rate": 1.6870354803784226e-07, "loss": 0.3396, "step": 42493 }, { "epoch": 0.7386535486450312, "grad_norm": 1.2928479399089967, "learning_rate": 1.686824652800966e-07, "loss": 0.1445, "step": 42494 }, { "epoch": 0.738670931182534, "grad_norm": 4.626820596207282, "learning_rate": 1.686613835724754e-07, "loss": 0.2256, "step": 42495 }, { "epoch": 0.7386883137200368, "grad_norm": 1.7441604874578138, "learning_rate": 1.6864030291504516e-07, "loss": 0.2183, "step": 42496 }, { "epoch": 0.7387056962575397, "grad_norm": 1.5955567039275105, "learning_rate": 1.6861922330787315e-07, "loss": 0.2352, "step": 42497 }, { "epoch": 0.7387230787950425, "grad_norm": 1.3572897056193378, "learning_rate": 1.6859814475102595e-07, "loss": 0.2584, "step": 42498 }, { "epoch": 0.7387404613325453, "grad_norm": 1.690430846774349, "learning_rate": 1.6857706724457043e-07, "loss": 0.201, "step": 42499 }, { "epoch": 0.7387578438700482, "grad_norm": 1.1788768723379366, "learning_rate": 1.6855599078857336e-07, "loss": 0.1969, "step": 42500 }, { "epoch": 0.738775226407551, "grad_norm": 1.4389441970853865, "learning_rate": 1.6853491538310154e-07, "loss": 0.2318, "step": 42501 }, { "epoch": 0.7387926089450538, "grad_norm": 6.467976272187766, "learning_rate": 1.6851384102822182e-07, "loss": 0.1932, "step": 42502 }, { "epoch": 0.7388099914825567, "grad_norm": 1.2087238209423377, "learning_rate": 1.6849276772400088e-07, "loss": 0.2112, "step": 42503 }, { "epoch": 0.7388273740200595, "grad_norm": 1.6187888408119777, "learning_rate": 1.6847169547050545e-07, "loss": 0.1959, "step": 42504 }, { "epoch": 0.7388447565575623, "grad_norm": 1.5748495690551916, "learning_rate": 1.684506242678026e-07, "loss": 0.152, "step": 42505 }, { "epoch": 0.7388621390950652, "grad_norm": 1.1965032963146744, "learning_rate": 1.6842955411595916e-07, "loss": 0.21, "step": 42506 }, { "epoch": 0.7388795216325679, "grad_norm": 1.1452062161632433, "learning_rate": 1.6840848501504158e-07, "loss": 0.2123, "step": 42507 }, { "epoch": 0.7388969041700707, "grad_norm": 1.500912984642418, "learning_rate": 1.683874169651166e-07, "loss": 0.306, "step": 42508 }, { "epoch": 0.7389142867075735, "grad_norm": 8.239217062608716, "learning_rate": 1.683663499662513e-07, "loss": 0.2338, "step": 42509 }, { "epoch": 0.7389316692450764, "grad_norm": 2.496143035694013, "learning_rate": 1.683452840185124e-07, "loss": 0.1743, "step": 42510 }, { "epoch": 0.7389490517825792, "grad_norm": 1.1241275405644118, "learning_rate": 1.6832421912196654e-07, "loss": 0.1606, "step": 42511 }, { "epoch": 0.738966434320082, "grad_norm": 2.129539459680101, "learning_rate": 1.6830315527668053e-07, "loss": 0.1813, "step": 42512 }, { "epoch": 0.7389838168575849, "grad_norm": 1.5066827176288717, "learning_rate": 1.6828209248272123e-07, "loss": 0.2258, "step": 42513 }, { "epoch": 0.7390011993950877, "grad_norm": 2.095007642248251, "learning_rate": 1.6826103074015523e-07, "loss": 0.261, "step": 42514 }, { "epoch": 0.7390185819325905, "grad_norm": 1.7126448512971564, "learning_rate": 1.6823997004904943e-07, "loss": 0.2391, "step": 42515 }, { "epoch": 0.7390359644700933, "grad_norm": 2.6073620209853776, "learning_rate": 1.6821891040947033e-07, "loss": 0.3051, "step": 42516 }, { "epoch": 0.7390533470075962, "grad_norm": 1.6035051788395709, "learning_rate": 1.681978518214851e-07, "loss": 0.2852, "step": 42517 }, { "epoch": 0.739070729545099, "grad_norm": 0.9638291396167094, "learning_rate": 1.6817679428516024e-07, "loss": 0.3015, "step": 42518 }, { "epoch": 0.7390881120826018, "grad_norm": 1.380463478589833, "learning_rate": 1.6815573780056248e-07, "loss": 0.1478, "step": 42519 }, { "epoch": 0.7391054946201047, "grad_norm": 1.4443569596088996, "learning_rate": 1.6813468236775863e-07, "loss": 0.2035, "step": 42520 }, { "epoch": 0.7391228771576075, "grad_norm": 1.6815721092665097, "learning_rate": 1.6811362798681534e-07, "loss": 0.213, "step": 42521 }, { "epoch": 0.7391402596951103, "grad_norm": 1.2356365881948026, "learning_rate": 1.6809257465779942e-07, "loss": 0.168, "step": 42522 }, { "epoch": 0.7391576422326132, "grad_norm": 1.1068279469113087, "learning_rate": 1.6807152238077758e-07, "loss": 0.1982, "step": 42523 }, { "epoch": 0.739175024770116, "grad_norm": 1.1339658807926438, "learning_rate": 1.6805047115581638e-07, "loss": 0.1859, "step": 42524 }, { "epoch": 0.7391924073076188, "grad_norm": 1.5700326285005881, "learning_rate": 1.68029420982983e-07, "loss": 0.2585, "step": 42525 }, { "epoch": 0.7392097898451216, "grad_norm": 8.01982811645191, "learning_rate": 1.6800837186234373e-07, "loss": 0.2272, "step": 42526 }, { "epoch": 0.7392271723826244, "grad_norm": 2.1132672974819315, "learning_rate": 1.6798732379396524e-07, "loss": 0.3872, "step": 42527 }, { "epoch": 0.7392445549201272, "grad_norm": 0.9837953647646545, "learning_rate": 1.679662767779146e-07, "loss": 0.2177, "step": 42528 }, { "epoch": 0.73926193745763, "grad_norm": 1.9469824501753814, "learning_rate": 1.6794523081425833e-07, "loss": 0.1888, "step": 42529 }, { "epoch": 0.7392793199951329, "grad_norm": 1.9504170352574604, "learning_rate": 1.6792418590306312e-07, "loss": 0.2102, "step": 42530 }, { "epoch": 0.7392967025326357, "grad_norm": 1.1689744368615973, "learning_rate": 1.679031420443957e-07, "loss": 0.2448, "step": 42531 }, { "epoch": 0.7393140850701385, "grad_norm": 1.9319713333956088, "learning_rate": 1.6788209923832281e-07, "loss": 0.1929, "step": 42532 }, { "epoch": 0.7393314676076413, "grad_norm": 2.1159763923113437, "learning_rate": 1.6786105748491108e-07, "loss": 0.2863, "step": 42533 }, { "epoch": 0.7393488501451442, "grad_norm": 1.2171631508135548, "learning_rate": 1.678400167842272e-07, "loss": 0.137, "step": 42534 }, { "epoch": 0.739366232682647, "grad_norm": 1.7173328900989633, "learning_rate": 1.6781897713633776e-07, "loss": 0.2553, "step": 42535 }, { "epoch": 0.7393836152201498, "grad_norm": 7.801074808257151, "learning_rate": 1.6779793854130974e-07, "loss": 0.3, "step": 42536 }, { "epoch": 0.7394009977576527, "grad_norm": 2.02714597696473, "learning_rate": 1.6777690099920976e-07, "loss": 0.2171, "step": 42537 }, { "epoch": 0.7394183802951555, "grad_norm": 1.3027117611478018, "learning_rate": 1.6775586451010425e-07, "loss": 0.2521, "step": 42538 }, { "epoch": 0.7394357628326583, "grad_norm": 1.4856645905552888, "learning_rate": 1.677348290740599e-07, "loss": 0.1854, "step": 42539 }, { "epoch": 0.7394531453701612, "grad_norm": 1.7734602262199624, "learning_rate": 1.6771379469114366e-07, "loss": 0.2216, "step": 42540 }, { "epoch": 0.739470527907664, "grad_norm": 1.456969129172716, "learning_rate": 1.6769276136142207e-07, "loss": 0.2183, "step": 42541 }, { "epoch": 0.7394879104451668, "grad_norm": 1.354353227240015, "learning_rate": 1.6767172908496174e-07, "loss": 0.3392, "step": 42542 }, { "epoch": 0.7395052929826696, "grad_norm": 1.6768684755556977, "learning_rate": 1.6765069786182922e-07, "loss": 0.2074, "step": 42543 }, { "epoch": 0.7395226755201725, "grad_norm": 2.1862088831812416, "learning_rate": 1.6762966769209163e-07, "loss": 0.3605, "step": 42544 }, { "epoch": 0.7395400580576753, "grad_norm": 0.8748513513249275, "learning_rate": 1.676086385758152e-07, "loss": 0.1774, "step": 42545 }, { "epoch": 0.7395574405951781, "grad_norm": 0.8442368435739067, "learning_rate": 1.6758761051306664e-07, "loss": 0.2733, "step": 42546 }, { "epoch": 0.7395748231326809, "grad_norm": 5.73514127533844, "learning_rate": 1.675665835039125e-07, "loss": 0.1163, "step": 42547 }, { "epoch": 0.7395922056701837, "grad_norm": 0.6173976763922691, "learning_rate": 1.675455575484198e-07, "loss": 0.1733, "step": 42548 }, { "epoch": 0.7396095882076865, "grad_norm": 1.5880433097380737, "learning_rate": 1.675245326466549e-07, "loss": 0.2326, "step": 42549 }, { "epoch": 0.7396269707451893, "grad_norm": 0.9446018219180958, "learning_rate": 1.6750350879868458e-07, "loss": 0.1259, "step": 42550 }, { "epoch": 0.7396443532826922, "grad_norm": 1.1232116256672513, "learning_rate": 1.674824860045753e-07, "loss": 0.1864, "step": 42551 }, { "epoch": 0.739661735820195, "grad_norm": 1.011152728978429, "learning_rate": 1.674614642643939e-07, "loss": 0.216, "step": 42552 }, { "epoch": 0.7396791183576978, "grad_norm": 1.3647065308821724, "learning_rate": 1.674404435782068e-07, "loss": 0.2398, "step": 42553 }, { "epoch": 0.7396965008952007, "grad_norm": 3.1892624228717965, "learning_rate": 1.6741942394608082e-07, "loss": 0.3279, "step": 42554 }, { "epoch": 0.7397138834327035, "grad_norm": 1.2798446833021806, "learning_rate": 1.6739840536808226e-07, "loss": 0.1554, "step": 42555 }, { "epoch": 0.7397312659702063, "grad_norm": 4.850015866100987, "learning_rate": 1.6737738784427835e-07, "loss": 0.2733, "step": 42556 }, { "epoch": 0.7397486485077092, "grad_norm": 2.3429948001269767, "learning_rate": 1.6735637137473513e-07, "loss": 0.2566, "step": 42557 }, { "epoch": 0.739766031045212, "grad_norm": 1.8936130280971732, "learning_rate": 1.6733535595951924e-07, "loss": 0.1483, "step": 42558 }, { "epoch": 0.7397834135827148, "grad_norm": 1.8065219117549791, "learning_rate": 1.673143415986976e-07, "loss": 0.2156, "step": 42559 }, { "epoch": 0.7398007961202177, "grad_norm": 1.6365337011901462, "learning_rate": 1.6729332829233673e-07, "loss": 0.2271, "step": 42560 }, { "epoch": 0.7398181786577205, "grad_norm": 1.683169793610555, "learning_rate": 1.6727231604050318e-07, "loss": 0.3065, "step": 42561 }, { "epoch": 0.7398355611952233, "grad_norm": 1.1258446067883972, "learning_rate": 1.672513048432635e-07, "loss": 0.1396, "step": 42562 }, { "epoch": 0.7398529437327261, "grad_norm": 2.7272729609304807, "learning_rate": 1.6723029470068435e-07, "loss": 0.2885, "step": 42563 }, { "epoch": 0.739870326270229, "grad_norm": 2.3688004576474504, "learning_rate": 1.672092856128323e-07, "loss": 0.166, "step": 42564 }, { "epoch": 0.7398877088077318, "grad_norm": 1.335721204447539, "learning_rate": 1.67188277579774e-07, "loss": 0.1077, "step": 42565 }, { "epoch": 0.7399050913452345, "grad_norm": 1.1016395112351203, "learning_rate": 1.671672706015757e-07, "loss": 0.1252, "step": 42566 }, { "epoch": 0.7399224738827374, "grad_norm": 1.278398864355294, "learning_rate": 1.671462646783045e-07, "loss": 0.1783, "step": 42567 }, { "epoch": 0.7399398564202402, "grad_norm": 3.0643226133889425, "learning_rate": 1.6712525981002674e-07, "loss": 0.3095, "step": 42568 }, { "epoch": 0.739957238957743, "grad_norm": 1.2641555799320439, "learning_rate": 1.6710425599680916e-07, "loss": 0.1821, "step": 42569 }, { "epoch": 0.7399746214952458, "grad_norm": 1.3838900050779601, "learning_rate": 1.670832532387178e-07, "loss": 0.2413, "step": 42570 }, { "epoch": 0.7399920040327487, "grad_norm": 1.273852396392861, "learning_rate": 1.670622515358198e-07, "loss": 0.2865, "step": 42571 }, { "epoch": 0.7400093865702515, "grad_norm": 1.4813191214475954, "learning_rate": 1.6704125088818154e-07, "loss": 0.2167, "step": 42572 }, { "epoch": 0.7400267691077543, "grad_norm": 1.5164866865142983, "learning_rate": 1.6702025129586954e-07, "loss": 0.3029, "step": 42573 }, { "epoch": 0.7400441516452572, "grad_norm": 0.955575751751977, "learning_rate": 1.6699925275895022e-07, "loss": 0.1367, "step": 42574 }, { "epoch": 0.74006153418276, "grad_norm": 1.2267027774270503, "learning_rate": 1.6697825527749067e-07, "loss": 0.3392, "step": 42575 }, { "epoch": 0.7400789167202628, "grad_norm": 1.2744029289515382, "learning_rate": 1.669572588515568e-07, "loss": 0.3129, "step": 42576 }, { "epoch": 0.7400962992577657, "grad_norm": 1.2153215024425135, "learning_rate": 1.6693626348121536e-07, "loss": 0.2342, "step": 42577 }, { "epoch": 0.7401136817952685, "grad_norm": 1.4836499606604443, "learning_rate": 1.669152691665331e-07, "loss": 0.22, "step": 42578 }, { "epoch": 0.7401310643327713, "grad_norm": 2.7079465482370897, "learning_rate": 1.6689427590757643e-07, "loss": 0.2795, "step": 42579 }, { "epoch": 0.7401484468702741, "grad_norm": 1.1833423708738346, "learning_rate": 1.6687328370441196e-07, "loss": 0.181, "step": 42580 }, { "epoch": 0.740165829407777, "grad_norm": 0.8305332688429904, "learning_rate": 1.6685229255710603e-07, "loss": 0.128, "step": 42581 }, { "epoch": 0.7401832119452798, "grad_norm": 1.9579521777593085, "learning_rate": 1.668313024657254e-07, "loss": 0.1697, "step": 42582 }, { "epoch": 0.7402005944827826, "grad_norm": 19.78059912412242, "learning_rate": 1.6681031343033646e-07, "loss": 0.4279, "step": 42583 }, { "epoch": 0.7402179770202855, "grad_norm": 2.1150142846920774, "learning_rate": 1.6678932545100578e-07, "loss": 0.1703, "step": 42584 }, { "epoch": 0.7402353595577883, "grad_norm": 1.5517111855987247, "learning_rate": 1.6676833852779963e-07, "loss": 0.2479, "step": 42585 }, { "epoch": 0.740252742095291, "grad_norm": 1.0434562926325974, "learning_rate": 1.6674735266078505e-07, "loss": 0.2136, "step": 42586 }, { "epoch": 0.7402701246327938, "grad_norm": 1.2598598570249329, "learning_rate": 1.6672636785002826e-07, "loss": 0.2794, "step": 42587 }, { "epoch": 0.7402875071702967, "grad_norm": 4.510313112432473, "learning_rate": 1.6670538409559593e-07, "loss": 0.3508, "step": 42588 }, { "epoch": 0.7403048897077995, "grad_norm": 5.574292480827891, "learning_rate": 1.6668440139755408e-07, "loss": 0.2004, "step": 42589 }, { "epoch": 0.7403222722453023, "grad_norm": 1.7522874552977463, "learning_rate": 1.6666341975596976e-07, "loss": 0.2477, "step": 42590 }, { "epoch": 0.7403396547828052, "grad_norm": 5.8491327685513195, "learning_rate": 1.6664243917090925e-07, "loss": 0.3148, "step": 42591 }, { "epoch": 0.740357037320308, "grad_norm": 1.6404596888278422, "learning_rate": 1.6662145964243906e-07, "loss": 0.2245, "step": 42592 }, { "epoch": 0.7403744198578108, "grad_norm": 1.364720344699467, "learning_rate": 1.666004811706256e-07, "loss": 0.2325, "step": 42593 }, { "epoch": 0.7403918023953137, "grad_norm": 2.495674695162491, "learning_rate": 1.6657950375553576e-07, "loss": 0.2449, "step": 42594 }, { "epoch": 0.7404091849328165, "grad_norm": 3.3600135603766303, "learning_rate": 1.665585273972356e-07, "loss": 0.2727, "step": 42595 }, { "epoch": 0.7404265674703193, "grad_norm": 1.044640774406756, "learning_rate": 1.6653755209579167e-07, "loss": 0.1977, "step": 42596 }, { "epoch": 0.7404439500078221, "grad_norm": 1.6810840365182256, "learning_rate": 1.6651657785127037e-07, "loss": 0.2931, "step": 42597 }, { "epoch": 0.740461332545325, "grad_norm": 1.3175697343877386, "learning_rate": 1.6649560466373852e-07, "loss": 0.2451, "step": 42598 }, { "epoch": 0.7404787150828278, "grad_norm": 1.0625073852614377, "learning_rate": 1.6647463253326238e-07, "loss": 0.1856, "step": 42599 }, { "epoch": 0.7404960976203306, "grad_norm": 1.020894799024381, "learning_rate": 1.6645366145990848e-07, "loss": 0.2499, "step": 42600 }, { "epoch": 0.7405134801578335, "grad_norm": 1.2974256494342933, "learning_rate": 1.6643269144374323e-07, "loss": 0.2173, "step": 42601 }, { "epoch": 0.7405308626953363, "grad_norm": 2.0626173771709126, "learning_rate": 1.6641172248483315e-07, "loss": 0.1685, "step": 42602 }, { "epoch": 0.7405482452328391, "grad_norm": 0.9018978032691484, "learning_rate": 1.6639075458324463e-07, "loss": 0.117, "step": 42603 }, { "epoch": 0.740565627770342, "grad_norm": 1.3121483498406417, "learning_rate": 1.663697877390442e-07, "loss": 0.1327, "step": 42604 }, { "epoch": 0.7405830103078448, "grad_norm": 1.0200604679692966, "learning_rate": 1.6634882195229811e-07, "loss": 0.2199, "step": 42605 }, { "epoch": 0.7406003928453475, "grad_norm": 1.5124784490986705, "learning_rate": 1.6632785722307318e-07, "loss": 0.2272, "step": 42606 }, { "epoch": 0.7406177753828503, "grad_norm": 1.276150391422774, "learning_rate": 1.6630689355143578e-07, "loss": 0.2587, "step": 42607 }, { "epoch": 0.7406351579203532, "grad_norm": 1.6957206698604737, "learning_rate": 1.6628593093745196e-07, "loss": 0.2166, "step": 42608 }, { "epoch": 0.740652540457856, "grad_norm": 1.5897903754190315, "learning_rate": 1.6626496938118855e-07, "loss": 0.2251, "step": 42609 }, { "epoch": 0.7406699229953588, "grad_norm": 2.1887513712927325, "learning_rate": 1.6624400888271195e-07, "loss": 0.3748, "step": 42610 }, { "epoch": 0.7406873055328617, "grad_norm": 1.9641577513605546, "learning_rate": 1.6622304944208848e-07, "loss": 0.3985, "step": 42611 }, { "epoch": 0.7407046880703645, "grad_norm": 1.520127576331935, "learning_rate": 1.6620209105938464e-07, "loss": 0.1321, "step": 42612 }, { "epoch": 0.7407220706078673, "grad_norm": 2.343033666964305, "learning_rate": 1.661811337346668e-07, "loss": 0.2163, "step": 42613 }, { "epoch": 0.7407394531453702, "grad_norm": 1.148668689056819, "learning_rate": 1.6616017746800149e-07, "loss": 0.1507, "step": 42614 }, { "epoch": 0.740756835682873, "grad_norm": 1.461246200685627, "learning_rate": 1.66139222259455e-07, "loss": 0.208, "step": 42615 }, { "epoch": 0.7407742182203758, "grad_norm": 2.3051862929754696, "learning_rate": 1.6611826810909369e-07, "loss": 0.2618, "step": 42616 }, { "epoch": 0.7407916007578786, "grad_norm": 0.9063036806306307, "learning_rate": 1.6609731501698421e-07, "loss": 0.2438, "step": 42617 }, { "epoch": 0.7408089832953815, "grad_norm": 1.4514655568123873, "learning_rate": 1.6607636298319294e-07, "loss": 0.1301, "step": 42618 }, { "epoch": 0.7408263658328843, "grad_norm": 1.3969097640740886, "learning_rate": 1.660554120077861e-07, "loss": 0.1346, "step": 42619 }, { "epoch": 0.7408437483703871, "grad_norm": 1.5596474032151366, "learning_rate": 1.6603446209083028e-07, "loss": 0.2078, "step": 42620 }, { "epoch": 0.74086113090789, "grad_norm": 0.9468232935891701, "learning_rate": 1.6601351323239183e-07, "loss": 0.2352, "step": 42621 }, { "epoch": 0.7408785134453928, "grad_norm": 1.1615466223535096, "learning_rate": 1.6599256543253704e-07, "loss": 0.2193, "step": 42622 }, { "epoch": 0.7408958959828956, "grad_norm": 1.1794459645875712, "learning_rate": 1.6597161869133248e-07, "loss": 0.1596, "step": 42623 }, { "epoch": 0.7409132785203985, "grad_norm": 1.4285390693753202, "learning_rate": 1.659506730088442e-07, "loss": 0.2199, "step": 42624 }, { "epoch": 0.7409306610579013, "grad_norm": 1.349370841442003, "learning_rate": 1.6592972838513902e-07, "loss": 0.1594, "step": 42625 }, { "epoch": 0.740948043595404, "grad_norm": 0.6596442022180199, "learning_rate": 1.6590878482028332e-07, "loss": 0.23, "step": 42626 }, { "epoch": 0.7409654261329068, "grad_norm": 1.0085456232090226, "learning_rate": 1.6588784231434316e-07, "loss": 0.2348, "step": 42627 }, { "epoch": 0.7409828086704097, "grad_norm": 1.140633455559924, "learning_rate": 1.6586690086738487e-07, "loss": 0.1476, "step": 42628 }, { "epoch": 0.7410001912079125, "grad_norm": 1.4568915250552221, "learning_rate": 1.6584596047947518e-07, "loss": 0.2585, "step": 42629 }, { "epoch": 0.7410175737454153, "grad_norm": 1.5190258805499888, "learning_rate": 1.6582502115068032e-07, "loss": 0.3186, "step": 42630 }, { "epoch": 0.7410349562829182, "grad_norm": 1.749096664742642, "learning_rate": 1.6580408288106657e-07, "loss": 0.2242, "step": 42631 }, { "epoch": 0.741052338820421, "grad_norm": 1.5609029812966535, "learning_rate": 1.6578314567070045e-07, "loss": 0.1605, "step": 42632 }, { "epoch": 0.7410697213579238, "grad_norm": 0.8674686279047719, "learning_rate": 1.6576220951964814e-07, "loss": 0.2083, "step": 42633 }, { "epoch": 0.7410871038954266, "grad_norm": 1.703812141691665, "learning_rate": 1.6574127442797614e-07, "loss": 0.2838, "step": 42634 }, { "epoch": 0.7411044864329295, "grad_norm": 1.6269384658318027, "learning_rate": 1.6572034039575078e-07, "loss": 0.2507, "step": 42635 }, { "epoch": 0.7411218689704323, "grad_norm": 1.8114116120882293, "learning_rate": 1.6569940742303817e-07, "loss": 0.2146, "step": 42636 }, { "epoch": 0.7411392515079351, "grad_norm": 2.407455054379822, "learning_rate": 1.656784755099051e-07, "loss": 0.3643, "step": 42637 }, { "epoch": 0.741156634045438, "grad_norm": 1.5581694934549808, "learning_rate": 1.6565754465641763e-07, "loss": 0.2806, "step": 42638 }, { "epoch": 0.7411740165829408, "grad_norm": 3.3851400516067933, "learning_rate": 1.656366148626422e-07, "loss": 0.311, "step": 42639 }, { "epoch": 0.7411913991204436, "grad_norm": 0.8375880892490117, "learning_rate": 1.6561568612864508e-07, "loss": 0.1107, "step": 42640 }, { "epoch": 0.7412087816579465, "grad_norm": 2.7157701309636284, "learning_rate": 1.6559475845449262e-07, "loss": 0.3807, "step": 42641 }, { "epoch": 0.7412261641954493, "grad_norm": 1.7031689315957848, "learning_rate": 1.655738318402512e-07, "loss": 0.2178, "step": 42642 }, { "epoch": 0.7412435467329521, "grad_norm": 1.4750366333608138, "learning_rate": 1.6555290628598707e-07, "loss": 0.1564, "step": 42643 }, { "epoch": 0.741260929270455, "grad_norm": 1.681146828226312, "learning_rate": 1.6553198179176647e-07, "loss": 0.2271, "step": 42644 }, { "epoch": 0.7412783118079578, "grad_norm": 1.6030700921707135, "learning_rate": 1.6551105835765617e-07, "loss": 0.2073, "step": 42645 }, { "epoch": 0.7412956943454605, "grad_norm": 2.1179202288497683, "learning_rate": 1.6549013598372196e-07, "loss": 0.2138, "step": 42646 }, { "epoch": 0.7413130768829633, "grad_norm": 2.1470775480200324, "learning_rate": 1.654692146700302e-07, "loss": 0.2914, "step": 42647 }, { "epoch": 0.7413304594204662, "grad_norm": 1.4181199214155478, "learning_rate": 1.6544829441664753e-07, "loss": 0.2891, "step": 42648 }, { "epoch": 0.741347841957969, "grad_norm": 1.6802709085957852, "learning_rate": 1.6542737522364015e-07, "loss": 0.3189, "step": 42649 }, { "epoch": 0.7413652244954718, "grad_norm": 1.6184376682640396, "learning_rate": 1.654064570910742e-07, "loss": 0.1954, "step": 42650 }, { "epoch": 0.7413826070329746, "grad_norm": 4.106373149614183, "learning_rate": 1.6538554001901618e-07, "loss": 0.2336, "step": 42651 }, { "epoch": 0.7413999895704775, "grad_norm": 1.5916948962877382, "learning_rate": 1.653646240075322e-07, "loss": 0.3371, "step": 42652 }, { "epoch": 0.7414173721079803, "grad_norm": 2.122067592674441, "learning_rate": 1.6534370905668866e-07, "loss": 0.1811, "step": 42653 }, { "epoch": 0.7414347546454831, "grad_norm": 1.6092342262919208, "learning_rate": 1.653227951665519e-07, "loss": 0.3297, "step": 42654 }, { "epoch": 0.741452137182986, "grad_norm": 1.297171532628153, "learning_rate": 1.6530188233718788e-07, "loss": 0.2261, "step": 42655 }, { "epoch": 0.7414695197204888, "grad_norm": 1.7700570955807136, "learning_rate": 1.6528097056866336e-07, "loss": 0.3065, "step": 42656 }, { "epoch": 0.7414869022579916, "grad_norm": 1.9254245107848844, "learning_rate": 1.6526005986104458e-07, "loss": 0.2651, "step": 42657 }, { "epoch": 0.7415042847954945, "grad_norm": 1.5564774705891853, "learning_rate": 1.652391502143975e-07, "loss": 0.175, "step": 42658 }, { "epoch": 0.7415216673329973, "grad_norm": 1.7911270486295026, "learning_rate": 1.6521824162878834e-07, "loss": 0.1878, "step": 42659 }, { "epoch": 0.7415390498705001, "grad_norm": 1.1939323952133714, "learning_rate": 1.6519733410428372e-07, "loss": 0.2007, "step": 42660 }, { "epoch": 0.741556432408003, "grad_norm": 1.883993694214895, "learning_rate": 1.6517642764094974e-07, "loss": 0.2756, "step": 42661 }, { "epoch": 0.7415738149455058, "grad_norm": 0.9363923790337927, "learning_rate": 1.6515552223885272e-07, "loss": 0.1264, "step": 42662 }, { "epoch": 0.7415911974830086, "grad_norm": 1.4641565132651304, "learning_rate": 1.651346178980587e-07, "loss": 0.2581, "step": 42663 }, { "epoch": 0.7416085800205114, "grad_norm": 1.8792551424209958, "learning_rate": 1.6511371461863448e-07, "loss": 0.2979, "step": 42664 }, { "epoch": 0.7416259625580143, "grad_norm": 0.826161695480436, "learning_rate": 1.6509281240064578e-07, "loss": 0.2317, "step": 42665 }, { "epoch": 0.741643345095517, "grad_norm": 2.5229503708628602, "learning_rate": 1.6507191124415881e-07, "loss": 0.1216, "step": 42666 }, { "epoch": 0.7416607276330198, "grad_norm": 1.4313014151273165, "learning_rate": 1.650510111492402e-07, "loss": 0.2417, "step": 42667 }, { "epoch": 0.7416781101705227, "grad_norm": 2.0525470303142113, "learning_rate": 1.6503011211595603e-07, "loss": 0.3053, "step": 42668 }, { "epoch": 0.7416954927080255, "grad_norm": 1.6397749418635803, "learning_rate": 1.6500921414437253e-07, "loss": 0.1823, "step": 42669 }, { "epoch": 0.7417128752455283, "grad_norm": 1.5049779710845705, "learning_rate": 1.6498831723455598e-07, "loss": 0.3235, "step": 42670 }, { "epoch": 0.7417302577830311, "grad_norm": 1.5727315406355857, "learning_rate": 1.6496742138657249e-07, "loss": 0.3265, "step": 42671 }, { "epoch": 0.741747640320534, "grad_norm": 2.039962077154364, "learning_rate": 1.6494652660048847e-07, "loss": 0.2014, "step": 42672 }, { "epoch": 0.7417650228580368, "grad_norm": 1.6102203229545655, "learning_rate": 1.6492563287636995e-07, "loss": 0.3128, "step": 42673 }, { "epoch": 0.7417824053955396, "grad_norm": 1.3845106297769967, "learning_rate": 1.6490474021428318e-07, "loss": 0.2914, "step": 42674 }, { "epoch": 0.7417997879330425, "grad_norm": 1.2244448373541805, "learning_rate": 1.6488384861429454e-07, "loss": 0.1091, "step": 42675 }, { "epoch": 0.7418171704705453, "grad_norm": 1.8905252544352082, "learning_rate": 1.648629580764704e-07, "loss": 0.377, "step": 42676 }, { "epoch": 0.7418345530080481, "grad_norm": 2.5049762535960536, "learning_rate": 1.6484206860087651e-07, "loss": 0.2626, "step": 42677 }, { "epoch": 0.741851935545551, "grad_norm": 1.404841410889948, "learning_rate": 1.648211801875792e-07, "loss": 0.2586, "step": 42678 }, { "epoch": 0.7418693180830538, "grad_norm": 1.5381958528345354, "learning_rate": 1.6480029283664488e-07, "loss": 0.1484, "step": 42679 }, { "epoch": 0.7418867006205566, "grad_norm": 1.5870392169114693, "learning_rate": 1.6477940654813965e-07, "loss": 0.2996, "step": 42680 }, { "epoch": 0.7419040831580594, "grad_norm": 2.571429039086338, "learning_rate": 1.6475852132212976e-07, "loss": 0.3498, "step": 42681 }, { "epoch": 0.7419214656955623, "grad_norm": 1.5528523308282782, "learning_rate": 1.6473763715868118e-07, "loss": 0.1491, "step": 42682 }, { "epoch": 0.7419388482330651, "grad_norm": 1.3289036824966503, "learning_rate": 1.6471675405786062e-07, "loss": 0.2243, "step": 42683 }, { "epoch": 0.7419562307705679, "grad_norm": 1.2126789801779971, "learning_rate": 1.646958720197338e-07, "loss": 0.2267, "step": 42684 }, { "epoch": 0.7419736133080708, "grad_norm": 1.6681384766201972, "learning_rate": 1.6467499104436693e-07, "loss": 0.1424, "step": 42685 }, { "epoch": 0.7419909958455735, "grad_norm": 1.2291699801250768, "learning_rate": 1.6465411113182624e-07, "loss": 0.2401, "step": 42686 }, { "epoch": 0.7420083783830763, "grad_norm": 1.0688376348235114, "learning_rate": 1.6463323228217813e-07, "loss": 0.2946, "step": 42687 }, { "epoch": 0.7420257609205791, "grad_norm": 1.6552595701142272, "learning_rate": 1.6461235449548855e-07, "loss": 0.2098, "step": 42688 }, { "epoch": 0.742043143458082, "grad_norm": 1.3256372451272256, "learning_rate": 1.645914777718238e-07, "loss": 0.1649, "step": 42689 }, { "epoch": 0.7420605259955848, "grad_norm": 1.8658828019578713, "learning_rate": 1.6457060211124995e-07, "loss": 0.5053, "step": 42690 }, { "epoch": 0.7420779085330876, "grad_norm": 0.9871182772241286, "learning_rate": 1.6454972751383328e-07, "loss": 0.2393, "step": 42691 }, { "epoch": 0.7420952910705905, "grad_norm": 1.8857378808922363, "learning_rate": 1.645288539796398e-07, "loss": 0.2188, "step": 42692 }, { "epoch": 0.7421126736080933, "grad_norm": 1.6250844387828025, "learning_rate": 1.645079815087358e-07, "loss": 0.2392, "step": 42693 }, { "epoch": 0.7421300561455961, "grad_norm": 2.1574216601352014, "learning_rate": 1.644871101011872e-07, "loss": 0.2955, "step": 42694 }, { "epoch": 0.742147438683099, "grad_norm": 1.1200719296058113, "learning_rate": 1.644662397570607e-07, "loss": 0.2081, "step": 42695 }, { "epoch": 0.7421648212206018, "grad_norm": 1.0825165031832022, "learning_rate": 1.6444537047642187e-07, "loss": 0.1347, "step": 42696 }, { "epoch": 0.7421822037581046, "grad_norm": 1.3108082428764174, "learning_rate": 1.6442450225933696e-07, "loss": 0.3179, "step": 42697 }, { "epoch": 0.7421995862956074, "grad_norm": 1.5995902606266257, "learning_rate": 1.6440363510587236e-07, "loss": 0.2459, "step": 42698 }, { "epoch": 0.7422169688331103, "grad_norm": 1.7217124272712931, "learning_rate": 1.6438276901609406e-07, "loss": 0.224, "step": 42699 }, { "epoch": 0.7422343513706131, "grad_norm": 1.0558468277215598, "learning_rate": 1.6436190399006827e-07, "loss": 0.2878, "step": 42700 }, { "epoch": 0.7422517339081159, "grad_norm": 1.416022872916777, "learning_rate": 1.6434104002786105e-07, "loss": 0.2474, "step": 42701 }, { "epoch": 0.7422691164456188, "grad_norm": 1.8217105787400112, "learning_rate": 1.643201771295385e-07, "loss": 0.2979, "step": 42702 }, { "epoch": 0.7422864989831216, "grad_norm": 2.4411361419369517, "learning_rate": 1.6429931529516683e-07, "loss": 0.347, "step": 42703 }, { "epoch": 0.7423038815206244, "grad_norm": 1.3950857076674663, "learning_rate": 1.642784545248121e-07, "loss": 0.2507, "step": 42704 }, { "epoch": 0.7423212640581272, "grad_norm": 0.821481732189736, "learning_rate": 1.642575948185403e-07, "loss": 0.0867, "step": 42705 }, { "epoch": 0.74233864659563, "grad_norm": 1.5285463109614297, "learning_rate": 1.642367361764178e-07, "loss": 0.1794, "step": 42706 }, { "epoch": 0.7423560291331328, "grad_norm": 1.3416158608249202, "learning_rate": 1.6421587859851073e-07, "loss": 0.2331, "step": 42707 }, { "epoch": 0.7423734116706356, "grad_norm": 5.276263219333697, "learning_rate": 1.6419502208488516e-07, "loss": 0.3943, "step": 42708 }, { "epoch": 0.7423907942081385, "grad_norm": 1.0987260975330013, "learning_rate": 1.641741666356068e-07, "loss": 0.1282, "step": 42709 }, { "epoch": 0.7424081767456413, "grad_norm": 1.2768100627273078, "learning_rate": 1.6415331225074226e-07, "loss": 0.2443, "step": 42710 }, { "epoch": 0.7424255592831441, "grad_norm": 1.5415993356670787, "learning_rate": 1.6413245893035738e-07, "loss": 0.1766, "step": 42711 }, { "epoch": 0.742442941820647, "grad_norm": 1.3359765826408991, "learning_rate": 1.6411160667451834e-07, "loss": 0.2946, "step": 42712 }, { "epoch": 0.7424603243581498, "grad_norm": 1.7907510064330656, "learning_rate": 1.640907554832911e-07, "loss": 0.3126, "step": 42713 }, { "epoch": 0.7424777068956526, "grad_norm": 0.6759535106809252, "learning_rate": 1.6406990535674215e-07, "loss": 0.1142, "step": 42714 }, { "epoch": 0.7424950894331555, "grad_norm": 2.4967323056263675, "learning_rate": 1.6404905629493714e-07, "loss": 0.2318, "step": 42715 }, { "epoch": 0.7425124719706583, "grad_norm": 2.084418694197931, "learning_rate": 1.6402820829794228e-07, "loss": 0.3024, "step": 42716 }, { "epoch": 0.7425298545081611, "grad_norm": 0.9235853460736766, "learning_rate": 1.640073613658235e-07, "loss": 0.2685, "step": 42717 }, { "epoch": 0.7425472370456639, "grad_norm": 1.0407478491278908, "learning_rate": 1.6398651549864722e-07, "loss": 0.2383, "step": 42718 }, { "epoch": 0.7425646195831668, "grad_norm": 1.1392772053415006, "learning_rate": 1.6396567069647932e-07, "loss": 0.2684, "step": 42719 }, { "epoch": 0.7425820021206696, "grad_norm": 1.2532966635770912, "learning_rate": 1.639448269593859e-07, "loss": 0.162, "step": 42720 }, { "epoch": 0.7425993846581724, "grad_norm": 1.7331080446319405, "learning_rate": 1.6392398428743298e-07, "loss": 0.3365, "step": 42721 }, { "epoch": 0.7426167671956753, "grad_norm": 0.8147522927229256, "learning_rate": 1.6390314268068666e-07, "loss": 0.1575, "step": 42722 }, { "epoch": 0.7426341497331781, "grad_norm": 1.723936904616765, "learning_rate": 1.63882302139213e-07, "loss": 0.2337, "step": 42723 }, { "epoch": 0.7426515322706809, "grad_norm": 1.4670104058130558, "learning_rate": 1.6386146266307803e-07, "loss": 0.1815, "step": 42724 }, { "epoch": 0.7426689148081836, "grad_norm": 1.859221073783062, "learning_rate": 1.638406242523477e-07, "loss": 0.3175, "step": 42725 }, { "epoch": 0.7426862973456865, "grad_norm": 4.095126026366166, "learning_rate": 1.638197869070883e-07, "loss": 0.4625, "step": 42726 }, { "epoch": 0.7427036798831893, "grad_norm": 1.5568658736363437, "learning_rate": 1.6379895062736588e-07, "loss": 0.1776, "step": 42727 }, { "epoch": 0.7427210624206921, "grad_norm": 1.6379783880407934, "learning_rate": 1.6377811541324603e-07, "loss": 0.1789, "step": 42728 }, { "epoch": 0.742738444958195, "grad_norm": 1.2369577226355748, "learning_rate": 1.6375728126479533e-07, "loss": 0.2188, "step": 42729 }, { "epoch": 0.7427558274956978, "grad_norm": 1.004313642201491, "learning_rate": 1.6373644818207954e-07, "loss": 0.1567, "step": 42730 }, { "epoch": 0.7427732100332006, "grad_norm": 2.8530933557551243, "learning_rate": 1.6371561616516477e-07, "loss": 0.3363, "step": 42731 }, { "epoch": 0.7427905925707035, "grad_norm": 2.017367388250061, "learning_rate": 1.63694785214117e-07, "loss": 0.1812, "step": 42732 }, { "epoch": 0.7428079751082063, "grad_norm": 1.1995957455709536, "learning_rate": 1.6367395532900224e-07, "loss": 0.168, "step": 42733 }, { "epoch": 0.7428253576457091, "grad_norm": 2.408182872214685, "learning_rate": 1.6365312650988662e-07, "loss": 0.1944, "step": 42734 }, { "epoch": 0.742842740183212, "grad_norm": 2.7599783841415357, "learning_rate": 1.6363229875683605e-07, "loss": 0.2683, "step": 42735 }, { "epoch": 0.7428601227207148, "grad_norm": 1.155455124038307, "learning_rate": 1.636114720699164e-07, "loss": 0.208, "step": 42736 }, { "epoch": 0.7428775052582176, "grad_norm": 1.056767225247864, "learning_rate": 1.6359064644919402e-07, "loss": 0.3176, "step": 42737 }, { "epoch": 0.7428948877957204, "grad_norm": 1.2910790527036196, "learning_rate": 1.6356982189473479e-07, "loss": 0.3742, "step": 42738 }, { "epoch": 0.7429122703332233, "grad_norm": 1.573293818180534, "learning_rate": 1.6354899840660464e-07, "loss": 0.2455, "step": 42739 }, { "epoch": 0.7429296528707261, "grad_norm": 1.3812016478071147, "learning_rate": 1.6352817598486962e-07, "loss": 0.1441, "step": 42740 }, { "epoch": 0.7429470354082289, "grad_norm": 1.0985499217536614, "learning_rate": 1.6350735462959575e-07, "loss": 0.1487, "step": 42741 }, { "epoch": 0.7429644179457318, "grad_norm": 1.9586111563785937, "learning_rate": 1.6348653434084896e-07, "loss": 0.1829, "step": 42742 }, { "epoch": 0.7429818004832346, "grad_norm": 1.6115550062218633, "learning_rate": 1.6346571511869527e-07, "loss": 0.2082, "step": 42743 }, { "epoch": 0.7429991830207374, "grad_norm": 2.0980724577773233, "learning_rate": 1.6344489696320056e-07, "loss": 0.1926, "step": 42744 }, { "epoch": 0.7430165655582401, "grad_norm": 1.7881710126263073, "learning_rate": 1.6342407987443107e-07, "loss": 0.193, "step": 42745 }, { "epoch": 0.743033948095743, "grad_norm": 1.205614244690012, "learning_rate": 1.634032638524528e-07, "loss": 0.201, "step": 42746 }, { "epoch": 0.7430513306332458, "grad_norm": 1.2924000633777881, "learning_rate": 1.633824488973314e-07, "loss": 0.1592, "step": 42747 }, { "epoch": 0.7430687131707486, "grad_norm": 4.78551178684932, "learning_rate": 1.633616350091328e-07, "loss": 0.243, "step": 42748 }, { "epoch": 0.7430860957082515, "grad_norm": 1.4467458587059483, "learning_rate": 1.633408221879234e-07, "loss": 0.289, "step": 42749 }, { "epoch": 0.7431034782457543, "grad_norm": 1.7392055443937784, "learning_rate": 1.6332001043376887e-07, "loss": 0.2319, "step": 42750 }, { "epoch": 0.7431208607832571, "grad_norm": 1.8244216653690641, "learning_rate": 1.6329919974673535e-07, "loss": 0.2154, "step": 42751 }, { "epoch": 0.74313824332076, "grad_norm": 1.997883624341866, "learning_rate": 1.632783901268886e-07, "loss": 0.2078, "step": 42752 }, { "epoch": 0.7431556258582628, "grad_norm": 1.2034328203014684, "learning_rate": 1.6325758157429475e-07, "loss": 0.2218, "step": 42753 }, { "epoch": 0.7431730083957656, "grad_norm": 1.2874528438973483, "learning_rate": 1.6323677408901964e-07, "loss": 0.1764, "step": 42754 }, { "epoch": 0.7431903909332684, "grad_norm": 1.1228539826559334, "learning_rate": 1.6321596767112928e-07, "loss": 0.1862, "step": 42755 }, { "epoch": 0.7432077734707713, "grad_norm": 1.3183827059273536, "learning_rate": 1.631951623206894e-07, "loss": 0.246, "step": 42756 }, { "epoch": 0.7432251560082741, "grad_norm": 1.4328472313874792, "learning_rate": 1.6317435803776634e-07, "loss": 0.2968, "step": 42757 }, { "epoch": 0.7432425385457769, "grad_norm": 1.525211204417314, "learning_rate": 1.6315355482242582e-07, "loss": 0.2725, "step": 42758 }, { "epoch": 0.7432599210832798, "grad_norm": 1.0548443437967074, "learning_rate": 1.631327526747338e-07, "loss": 0.2671, "step": 42759 }, { "epoch": 0.7432773036207826, "grad_norm": 2.137106222027608, "learning_rate": 1.6311195159475622e-07, "loss": 0.2285, "step": 42760 }, { "epoch": 0.7432946861582854, "grad_norm": 1.1541097708981163, "learning_rate": 1.6309115158255904e-07, "loss": 0.2066, "step": 42761 }, { "epoch": 0.7433120686957883, "grad_norm": 1.3542669259086817, "learning_rate": 1.6307035263820806e-07, "loss": 0.146, "step": 42762 }, { "epoch": 0.7433294512332911, "grad_norm": 1.4770830891688513, "learning_rate": 1.6304955476176917e-07, "loss": 0.2256, "step": 42763 }, { "epoch": 0.7433468337707939, "grad_norm": 1.3104719746344666, "learning_rate": 1.6302875795330857e-07, "loss": 0.1277, "step": 42764 }, { "epoch": 0.7433642163082966, "grad_norm": 1.13342866954191, "learning_rate": 1.630079622128922e-07, "loss": 0.1283, "step": 42765 }, { "epoch": 0.7433815988457995, "grad_norm": 1.5149519534974278, "learning_rate": 1.629871675405856e-07, "loss": 0.2157, "step": 42766 }, { "epoch": 0.7433989813833023, "grad_norm": 1.1543850979068329, "learning_rate": 1.6296637393645467e-07, "loss": 0.3168, "step": 42767 }, { "epoch": 0.7434163639208051, "grad_norm": 1.233532769163783, "learning_rate": 1.6294558140056574e-07, "loss": 0.2432, "step": 42768 }, { "epoch": 0.743433746458308, "grad_norm": 1.3330853700806222, "learning_rate": 1.6292478993298446e-07, "loss": 0.3133, "step": 42769 }, { "epoch": 0.7434511289958108, "grad_norm": 2.314476807618804, "learning_rate": 1.6290399953377675e-07, "loss": 0.2886, "step": 42770 }, { "epoch": 0.7434685115333136, "grad_norm": 1.3592511642571796, "learning_rate": 1.6288321020300856e-07, "loss": 0.2277, "step": 42771 }, { "epoch": 0.7434858940708164, "grad_norm": 1.5079359305752777, "learning_rate": 1.6286242194074567e-07, "loss": 0.2354, "step": 42772 }, { "epoch": 0.7435032766083193, "grad_norm": 1.9402404376655575, "learning_rate": 1.628416347470541e-07, "loss": 0.2438, "step": 42773 }, { "epoch": 0.7435206591458221, "grad_norm": 1.0633932733868443, "learning_rate": 1.6282084862199956e-07, "loss": 0.2881, "step": 42774 }, { "epoch": 0.7435380416833249, "grad_norm": 1.4851656327328628, "learning_rate": 1.6280006356564796e-07, "loss": 0.1351, "step": 42775 }, { "epoch": 0.7435554242208278, "grad_norm": 1.207885031297636, "learning_rate": 1.6277927957806543e-07, "loss": 0.3217, "step": 42776 }, { "epoch": 0.7435728067583306, "grad_norm": 1.018276341091602, "learning_rate": 1.6275849665931785e-07, "loss": 0.1605, "step": 42777 }, { "epoch": 0.7435901892958334, "grad_norm": 2.3426581069256676, "learning_rate": 1.6273771480947056e-07, "loss": 0.2986, "step": 42778 }, { "epoch": 0.7436075718333363, "grad_norm": 1.8607933467332083, "learning_rate": 1.6271693402858994e-07, "loss": 0.1945, "step": 42779 }, { "epoch": 0.7436249543708391, "grad_norm": 1.153777680184547, "learning_rate": 1.6269615431674172e-07, "loss": 0.2566, "step": 42780 }, { "epoch": 0.7436423369083419, "grad_norm": 2.078420962144731, "learning_rate": 1.6267537567399174e-07, "loss": 0.1625, "step": 42781 }, { "epoch": 0.7436597194458447, "grad_norm": 1.6898752173489124, "learning_rate": 1.6265459810040588e-07, "loss": 0.2279, "step": 42782 }, { "epoch": 0.7436771019833476, "grad_norm": 2.258900833819567, "learning_rate": 1.6263382159604982e-07, "loss": 0.1841, "step": 42783 }, { "epoch": 0.7436944845208504, "grad_norm": 2.8807029820071857, "learning_rate": 1.6261304616098992e-07, "loss": 0.1323, "step": 42784 }, { "epoch": 0.7437118670583531, "grad_norm": 1.5649669655829286, "learning_rate": 1.6259227179529144e-07, "loss": 0.2891, "step": 42785 }, { "epoch": 0.743729249595856, "grad_norm": 1.9667417892492094, "learning_rate": 1.6257149849902034e-07, "loss": 0.2125, "step": 42786 }, { "epoch": 0.7437466321333588, "grad_norm": 0.9866678021327661, "learning_rate": 1.6255072627224282e-07, "loss": 0.4823, "step": 42787 }, { "epoch": 0.7437640146708616, "grad_norm": 0.8252182711935276, "learning_rate": 1.6252995511502438e-07, "loss": 0.385, "step": 42788 }, { "epoch": 0.7437813972083644, "grad_norm": 1.256273628715565, "learning_rate": 1.6250918502743106e-07, "loss": 0.2519, "step": 42789 }, { "epoch": 0.7437987797458673, "grad_norm": 1.7777916847080586, "learning_rate": 1.624884160095285e-07, "loss": 0.2101, "step": 42790 }, { "epoch": 0.7438161622833701, "grad_norm": 1.307177828814433, "learning_rate": 1.624676480613827e-07, "loss": 0.1458, "step": 42791 }, { "epoch": 0.7438335448208729, "grad_norm": 2.2122556144068204, "learning_rate": 1.6244688118305932e-07, "loss": 0.2388, "step": 42792 }, { "epoch": 0.7438509273583758, "grad_norm": 1.2482830761719503, "learning_rate": 1.6242611537462435e-07, "loss": 0.189, "step": 42793 }, { "epoch": 0.7438683098958786, "grad_norm": 0.9845285422973042, "learning_rate": 1.624053506361433e-07, "loss": 0.2618, "step": 42794 }, { "epoch": 0.7438856924333814, "grad_norm": 1.2521531964399517, "learning_rate": 1.6238458696768243e-07, "loss": 0.2837, "step": 42795 }, { "epoch": 0.7439030749708843, "grad_norm": 1.1710079325069442, "learning_rate": 1.6236382436930746e-07, "loss": 0.2222, "step": 42796 }, { "epoch": 0.7439204575083871, "grad_norm": 2.4216946465688984, "learning_rate": 1.623430628410839e-07, "loss": 0.2419, "step": 42797 }, { "epoch": 0.7439378400458899, "grad_norm": 2.0362247372195195, "learning_rate": 1.6232230238307759e-07, "loss": 0.2004, "step": 42798 }, { "epoch": 0.7439552225833928, "grad_norm": 1.1075767799201006, "learning_rate": 1.6230154299535458e-07, "loss": 0.1799, "step": 42799 }, { "epoch": 0.7439726051208956, "grad_norm": 1.6248324959497302, "learning_rate": 1.622807846779806e-07, "loss": 0.3201, "step": 42800 }, { "epoch": 0.7439899876583984, "grad_norm": 2.850442048231282, "learning_rate": 1.6226002743102141e-07, "loss": 0.2349, "step": 42801 }, { "epoch": 0.7440073701959012, "grad_norm": 1.9535985193721035, "learning_rate": 1.6223927125454256e-07, "loss": 0.281, "step": 42802 }, { "epoch": 0.7440247527334041, "grad_norm": 1.6856758920466384, "learning_rate": 1.6221851614861042e-07, "loss": 0.2822, "step": 42803 }, { "epoch": 0.7440421352709069, "grad_norm": 1.5422582738487165, "learning_rate": 1.6219776211329028e-07, "loss": 0.2722, "step": 42804 }, { "epoch": 0.7440595178084096, "grad_norm": 0.8194555167748507, "learning_rate": 1.6217700914864803e-07, "loss": 0.238, "step": 42805 }, { "epoch": 0.7440769003459125, "grad_norm": 1.5056575361354063, "learning_rate": 1.6215625725474936e-07, "loss": 0.2909, "step": 42806 }, { "epoch": 0.7440942828834153, "grad_norm": 2.4133710407010525, "learning_rate": 1.621355064316603e-07, "loss": 0.2226, "step": 42807 }, { "epoch": 0.7441116654209181, "grad_norm": 2.8968254899043586, "learning_rate": 1.6211475667944645e-07, "loss": 0.1957, "step": 42808 }, { "epoch": 0.7441290479584209, "grad_norm": 1.4171694830651405, "learning_rate": 1.6209400799817364e-07, "loss": 0.1903, "step": 42809 }, { "epoch": 0.7441464304959238, "grad_norm": 1.6735901840389977, "learning_rate": 1.620732603879076e-07, "loss": 0.2345, "step": 42810 }, { "epoch": 0.7441638130334266, "grad_norm": 2.766731843749077, "learning_rate": 1.620525138487141e-07, "loss": 0.2561, "step": 42811 }, { "epoch": 0.7441811955709294, "grad_norm": 0.9755687803393573, "learning_rate": 1.6203176838065884e-07, "loss": 0.1254, "step": 42812 }, { "epoch": 0.7441985781084323, "grad_norm": 2.145509744457194, "learning_rate": 1.6201102398380774e-07, "loss": 0.1935, "step": 42813 }, { "epoch": 0.7442159606459351, "grad_norm": 1.5359930946467815, "learning_rate": 1.6199028065822617e-07, "loss": 0.1958, "step": 42814 }, { "epoch": 0.7442333431834379, "grad_norm": 1.1347357555418829, "learning_rate": 1.6196953840398048e-07, "loss": 0.3937, "step": 42815 }, { "epoch": 0.7442507257209408, "grad_norm": 1.6490689478739444, "learning_rate": 1.6194879722113592e-07, "loss": 0.241, "step": 42816 }, { "epoch": 0.7442681082584436, "grad_norm": 1.9078188407141305, "learning_rate": 1.6192805710975825e-07, "loss": 0.2188, "step": 42817 }, { "epoch": 0.7442854907959464, "grad_norm": 2.322818678815459, "learning_rate": 1.6190731806991347e-07, "loss": 0.1932, "step": 42818 }, { "epoch": 0.7443028733334492, "grad_norm": 1.7701926777640633, "learning_rate": 1.6188658010166723e-07, "loss": 0.3458, "step": 42819 }, { "epoch": 0.7443202558709521, "grad_norm": 1.2308223097783653, "learning_rate": 1.6186584320508517e-07, "loss": 0.2639, "step": 42820 }, { "epoch": 0.7443376384084549, "grad_norm": 2.721380395932927, "learning_rate": 1.6184510738023306e-07, "loss": 0.271, "step": 42821 }, { "epoch": 0.7443550209459577, "grad_norm": 3.714716947248585, "learning_rate": 1.6182437262717663e-07, "loss": 0.2974, "step": 42822 }, { "epoch": 0.7443724034834606, "grad_norm": 3.2788793458729564, "learning_rate": 1.6180363894598164e-07, "loss": 0.2703, "step": 42823 }, { "epoch": 0.7443897860209634, "grad_norm": 1.7399220472760106, "learning_rate": 1.6178290633671372e-07, "loss": 0.1589, "step": 42824 }, { "epoch": 0.7444071685584661, "grad_norm": 2.572587819456729, "learning_rate": 1.6176217479943844e-07, "loss": 0.2023, "step": 42825 }, { "epoch": 0.744424551095969, "grad_norm": 1.27251308554951, "learning_rate": 1.6174144433422188e-07, "loss": 0.2287, "step": 42826 }, { "epoch": 0.7444419336334718, "grad_norm": 1.4462399591433897, "learning_rate": 1.6172071494112954e-07, "loss": 0.1967, "step": 42827 }, { "epoch": 0.7444593161709746, "grad_norm": 1.023926372777666, "learning_rate": 1.6169998662022733e-07, "loss": 0.2099, "step": 42828 }, { "epoch": 0.7444766987084774, "grad_norm": 0.9537429071505293, "learning_rate": 1.6167925937158045e-07, "loss": 0.1829, "step": 42829 }, { "epoch": 0.7444940812459803, "grad_norm": 2.9128441179108164, "learning_rate": 1.6165853319525502e-07, "loss": 0.2642, "step": 42830 }, { "epoch": 0.7445114637834831, "grad_norm": 1.5609733120283324, "learning_rate": 1.6163780809131666e-07, "loss": 0.2568, "step": 42831 }, { "epoch": 0.7445288463209859, "grad_norm": 1.2822729727895406, "learning_rate": 1.61617084059831e-07, "loss": 0.1812, "step": 42832 }, { "epoch": 0.7445462288584888, "grad_norm": 2.204667578995966, "learning_rate": 1.6159636110086354e-07, "loss": 0.1592, "step": 42833 }, { "epoch": 0.7445636113959916, "grad_norm": 3.415351484333755, "learning_rate": 1.6157563921448053e-07, "loss": 0.2401, "step": 42834 }, { "epoch": 0.7445809939334944, "grad_norm": 5.860826410358067, "learning_rate": 1.6155491840074713e-07, "loss": 0.2581, "step": 42835 }, { "epoch": 0.7445983764709972, "grad_norm": 1.2732339075756902, "learning_rate": 1.6153419865972916e-07, "loss": 0.2119, "step": 42836 }, { "epoch": 0.7446157590085001, "grad_norm": 1.9343524653982573, "learning_rate": 1.6151347999149218e-07, "loss": 0.2299, "step": 42837 }, { "epoch": 0.7446331415460029, "grad_norm": 1.218141415045815, "learning_rate": 1.6149276239610205e-07, "loss": 0.095, "step": 42838 }, { "epoch": 0.7446505240835057, "grad_norm": 0.7479869943981934, "learning_rate": 1.6147204587362444e-07, "loss": 0.2618, "step": 42839 }, { "epoch": 0.7446679066210086, "grad_norm": 1.8014552539848623, "learning_rate": 1.614513304241249e-07, "loss": 0.2267, "step": 42840 }, { "epoch": 0.7446852891585114, "grad_norm": 1.5733774925385986, "learning_rate": 1.6143061604766916e-07, "loss": 0.2432, "step": 42841 }, { "epoch": 0.7447026716960142, "grad_norm": 1.5424961259959094, "learning_rate": 1.6140990274432282e-07, "loss": 0.3024, "step": 42842 }, { "epoch": 0.7447200542335171, "grad_norm": 1.3632253732568222, "learning_rate": 1.6138919051415156e-07, "loss": 0.2138, "step": 42843 }, { "epoch": 0.7447374367710198, "grad_norm": 1.9086372211398004, "learning_rate": 1.6136847935722104e-07, "loss": 0.2638, "step": 42844 }, { "epoch": 0.7447548193085226, "grad_norm": 1.747345109419819, "learning_rate": 1.6134776927359668e-07, "loss": 0.1264, "step": 42845 }, { "epoch": 0.7447722018460254, "grad_norm": 1.1842764283820213, "learning_rate": 1.6132706026334453e-07, "loss": 0.1975, "step": 42846 }, { "epoch": 0.7447895843835283, "grad_norm": 2.0660462594156703, "learning_rate": 1.6130635232653012e-07, "loss": 0.4026, "step": 42847 }, { "epoch": 0.7448069669210311, "grad_norm": 1.745637508250397, "learning_rate": 1.6128564546321872e-07, "loss": 0.3196, "step": 42848 }, { "epoch": 0.7448243494585339, "grad_norm": 0.9315155389627912, "learning_rate": 1.6126493967347637e-07, "loss": 0.2032, "step": 42849 }, { "epoch": 0.7448417319960368, "grad_norm": 1.0358532573846932, "learning_rate": 1.612442349573686e-07, "loss": 0.1863, "step": 42850 }, { "epoch": 0.7448591145335396, "grad_norm": 3.324542692174449, "learning_rate": 1.6122353131496098e-07, "loss": 0.2816, "step": 42851 }, { "epoch": 0.7448764970710424, "grad_norm": 2.0388745207441237, "learning_rate": 1.6120282874631896e-07, "loss": 0.2854, "step": 42852 }, { "epoch": 0.7448938796085453, "grad_norm": 1.8501823112491063, "learning_rate": 1.611821272515087e-07, "loss": 0.3573, "step": 42853 }, { "epoch": 0.7449112621460481, "grad_norm": 1.5214858010910601, "learning_rate": 1.6116142683059525e-07, "loss": 0.189, "step": 42854 }, { "epoch": 0.7449286446835509, "grad_norm": 1.2382083067444507, "learning_rate": 1.6114072748364445e-07, "loss": 0.2909, "step": 42855 }, { "epoch": 0.7449460272210537, "grad_norm": 1.5236531057474092, "learning_rate": 1.611200292107217e-07, "loss": 0.3033, "step": 42856 }, { "epoch": 0.7449634097585566, "grad_norm": 1.097937332672344, "learning_rate": 1.6109933201189302e-07, "loss": 0.1604, "step": 42857 }, { "epoch": 0.7449807922960594, "grad_norm": 0.7778590883446357, "learning_rate": 1.610786358872237e-07, "loss": 0.1626, "step": 42858 }, { "epoch": 0.7449981748335622, "grad_norm": 1.3755727695388997, "learning_rate": 1.610579408367795e-07, "loss": 0.3127, "step": 42859 }, { "epoch": 0.7450155573710651, "grad_norm": 1.1874458979638167, "learning_rate": 1.6103724686062587e-07, "loss": 0.1727, "step": 42860 }, { "epoch": 0.7450329399085679, "grad_norm": 1.376637860225219, "learning_rate": 1.6101655395882847e-07, "loss": 0.3348, "step": 42861 }, { "epoch": 0.7450503224460707, "grad_norm": 1.8482906885039398, "learning_rate": 1.6099586213145284e-07, "loss": 0.2823, "step": 42862 }, { "epoch": 0.7450677049835736, "grad_norm": 1.6717452173176999, "learning_rate": 1.6097517137856466e-07, "loss": 0.2583, "step": 42863 }, { "epoch": 0.7450850875210763, "grad_norm": 1.2371149973045035, "learning_rate": 1.6095448170022924e-07, "loss": 0.2179, "step": 42864 }, { "epoch": 0.7451024700585791, "grad_norm": 2.514097368566274, "learning_rate": 1.6093379309651257e-07, "loss": 0.2227, "step": 42865 }, { "epoch": 0.7451198525960819, "grad_norm": 1.6874484686648765, "learning_rate": 1.6091310556748017e-07, "loss": 0.4273, "step": 42866 }, { "epoch": 0.7451372351335848, "grad_norm": 1.4603256753179357, "learning_rate": 1.6089241911319713e-07, "loss": 0.2564, "step": 42867 }, { "epoch": 0.7451546176710876, "grad_norm": 1.1929691090068035, "learning_rate": 1.6087173373372953e-07, "loss": 0.28, "step": 42868 }, { "epoch": 0.7451720002085904, "grad_norm": 1.1045266602771182, "learning_rate": 1.6085104942914274e-07, "loss": 0.2121, "step": 42869 }, { "epoch": 0.7451893827460933, "grad_norm": 1.0859752650422183, "learning_rate": 1.6083036619950235e-07, "loss": 0.2103, "step": 42870 }, { "epoch": 0.7452067652835961, "grad_norm": 1.6052452588074513, "learning_rate": 1.608096840448739e-07, "loss": 0.1777, "step": 42871 }, { "epoch": 0.7452241478210989, "grad_norm": 1.4112520117575205, "learning_rate": 1.6078900296532288e-07, "loss": 0.5461, "step": 42872 }, { "epoch": 0.7452415303586017, "grad_norm": 1.5723001045880944, "learning_rate": 1.6076832296091497e-07, "loss": 0.2715, "step": 42873 }, { "epoch": 0.7452589128961046, "grad_norm": 1.1821399457475108, "learning_rate": 1.607476440317156e-07, "loss": 0.2082, "step": 42874 }, { "epoch": 0.7452762954336074, "grad_norm": 1.64767317056359, "learning_rate": 1.607269661777902e-07, "loss": 0.1814, "step": 42875 }, { "epoch": 0.7452936779711102, "grad_norm": 2.564869306661554, "learning_rate": 1.6070628939920467e-07, "loss": 0.1583, "step": 42876 }, { "epoch": 0.7453110605086131, "grad_norm": 2.0882961375889924, "learning_rate": 1.606856136960243e-07, "loss": 0.2081, "step": 42877 }, { "epoch": 0.7453284430461159, "grad_norm": 0.7243946703282317, "learning_rate": 1.606649390683147e-07, "loss": 0.2764, "step": 42878 }, { "epoch": 0.7453458255836187, "grad_norm": 1.0587498968420768, "learning_rate": 1.606442655161413e-07, "loss": 0.1973, "step": 42879 }, { "epoch": 0.7453632081211216, "grad_norm": 1.4651969001804475, "learning_rate": 1.6062359303956973e-07, "loss": 0.1672, "step": 42880 }, { "epoch": 0.7453805906586244, "grad_norm": 1.7837501842614205, "learning_rate": 1.6060292163866552e-07, "loss": 0.2806, "step": 42881 }, { "epoch": 0.7453979731961272, "grad_norm": 1.6411211174179168, "learning_rate": 1.6058225131349408e-07, "loss": 0.3808, "step": 42882 }, { "epoch": 0.74541535573363, "grad_norm": 2.005228736334495, "learning_rate": 1.6056158206412084e-07, "loss": 0.2572, "step": 42883 }, { "epoch": 0.7454327382711328, "grad_norm": 2.078110027641409, "learning_rate": 1.6054091389061164e-07, "loss": 0.287, "step": 42884 }, { "epoch": 0.7454501208086356, "grad_norm": 1.2557235460750844, "learning_rate": 1.60520246793032e-07, "loss": 0.4613, "step": 42885 }, { "epoch": 0.7454675033461384, "grad_norm": 4.535786671494732, "learning_rate": 1.6049958077144698e-07, "loss": 0.2016, "step": 42886 }, { "epoch": 0.7454848858836413, "grad_norm": 0.8076864063292986, "learning_rate": 1.6047891582592227e-07, "loss": 0.143, "step": 42887 }, { "epoch": 0.7455022684211441, "grad_norm": 3.077745500484852, "learning_rate": 1.6045825195652353e-07, "loss": 0.2171, "step": 42888 }, { "epoch": 0.7455196509586469, "grad_norm": 2.0668815439358434, "learning_rate": 1.6043758916331617e-07, "loss": 0.1691, "step": 42889 }, { "epoch": 0.7455370334961497, "grad_norm": 1.922126526363832, "learning_rate": 1.604169274463657e-07, "loss": 0.3326, "step": 42890 }, { "epoch": 0.7455544160336526, "grad_norm": 2.4327361903932947, "learning_rate": 1.6039626680573754e-07, "loss": 0.3354, "step": 42891 }, { "epoch": 0.7455717985711554, "grad_norm": 1.2091972352420137, "learning_rate": 1.6037560724149723e-07, "loss": 0.1583, "step": 42892 }, { "epoch": 0.7455891811086582, "grad_norm": 1.5325085504914675, "learning_rate": 1.6035494875371025e-07, "loss": 0.1894, "step": 42893 }, { "epoch": 0.7456065636461611, "grad_norm": 1.4527030172523792, "learning_rate": 1.6033429134244204e-07, "loss": 0.3286, "step": 42894 }, { "epoch": 0.7456239461836639, "grad_norm": 1.05636018973477, "learning_rate": 1.6031363500775796e-07, "loss": 0.152, "step": 42895 }, { "epoch": 0.7456413287211667, "grad_norm": 1.1362131047135224, "learning_rate": 1.6029297974972379e-07, "loss": 0.3168, "step": 42896 }, { "epoch": 0.7456587112586696, "grad_norm": 2.7645991825916663, "learning_rate": 1.602723255684049e-07, "loss": 0.2793, "step": 42897 }, { "epoch": 0.7456760937961724, "grad_norm": 1.3864318116723253, "learning_rate": 1.6025167246386645e-07, "loss": 0.1659, "step": 42898 }, { "epoch": 0.7456934763336752, "grad_norm": 1.3601810200806925, "learning_rate": 1.6023102043617426e-07, "loss": 0.1823, "step": 42899 }, { "epoch": 0.745710858871178, "grad_norm": 1.621094642914244, "learning_rate": 1.6021036948539363e-07, "loss": 0.2321, "step": 42900 }, { "epoch": 0.7457282414086809, "grad_norm": 1.5774696050899655, "learning_rate": 1.601897196115901e-07, "loss": 0.1503, "step": 42901 }, { "epoch": 0.7457456239461837, "grad_norm": 1.6206160403325351, "learning_rate": 1.60169070814829e-07, "loss": 0.2827, "step": 42902 }, { "epoch": 0.7457630064836865, "grad_norm": 1.3623136820605963, "learning_rate": 1.6014842309517573e-07, "loss": 0.1838, "step": 42903 }, { "epoch": 0.7457803890211893, "grad_norm": 1.874557537946312, "learning_rate": 1.6012777645269615e-07, "loss": 0.2168, "step": 42904 }, { "epoch": 0.7457977715586921, "grad_norm": 1.5130825765784177, "learning_rate": 1.601071308874552e-07, "loss": 0.294, "step": 42905 }, { "epoch": 0.7458151540961949, "grad_norm": 1.329429586141968, "learning_rate": 1.6008648639951837e-07, "loss": 0.2554, "step": 42906 }, { "epoch": 0.7458325366336978, "grad_norm": 1.901309300013824, "learning_rate": 1.6006584298895142e-07, "loss": 0.3051, "step": 42907 }, { "epoch": 0.7458499191712006, "grad_norm": 0.9563903396122966, "learning_rate": 1.600452006558196e-07, "loss": 0.1977, "step": 42908 }, { "epoch": 0.7458673017087034, "grad_norm": 0.9827254444452329, "learning_rate": 1.6002455940018833e-07, "loss": 0.1417, "step": 42909 }, { "epoch": 0.7458846842462062, "grad_norm": 0.9219080323454405, "learning_rate": 1.6000391922212296e-07, "loss": 0.1843, "step": 42910 }, { "epoch": 0.7459020667837091, "grad_norm": 1.245619696587737, "learning_rate": 1.5998328012168911e-07, "loss": 0.1757, "step": 42911 }, { "epoch": 0.7459194493212119, "grad_norm": 1.3629709335837514, "learning_rate": 1.59962642098952e-07, "loss": 0.2582, "step": 42912 }, { "epoch": 0.7459368318587147, "grad_norm": 1.1093972450883411, "learning_rate": 1.599420051539771e-07, "loss": 0.1067, "step": 42913 }, { "epoch": 0.7459542143962176, "grad_norm": 1.891470399987106, "learning_rate": 1.599213692868297e-07, "loss": 0.1827, "step": 42914 }, { "epoch": 0.7459715969337204, "grad_norm": 1.6298524425301024, "learning_rate": 1.5990073449757546e-07, "loss": 0.2727, "step": 42915 }, { "epoch": 0.7459889794712232, "grad_norm": 1.3155361326439379, "learning_rate": 1.5988010078627985e-07, "loss": 0.2275, "step": 42916 }, { "epoch": 0.746006362008726, "grad_norm": 1.6048205256205306, "learning_rate": 1.5985946815300793e-07, "loss": 0.1802, "step": 42917 }, { "epoch": 0.7460237445462289, "grad_norm": 2.687511958848064, "learning_rate": 1.598388365978251e-07, "loss": 0.2186, "step": 42918 }, { "epoch": 0.7460411270837317, "grad_norm": 2.147302555964707, "learning_rate": 1.5981820612079705e-07, "loss": 0.2103, "step": 42919 }, { "epoch": 0.7460585096212345, "grad_norm": 1.064502308897075, "learning_rate": 1.59797576721989e-07, "loss": 0.1639, "step": 42920 }, { "epoch": 0.7460758921587374, "grad_norm": 1.3834341749162153, "learning_rate": 1.597769484014664e-07, "loss": 0.2358, "step": 42921 }, { "epoch": 0.7460932746962402, "grad_norm": 1.7635008046599656, "learning_rate": 1.5975632115929438e-07, "loss": 0.178, "step": 42922 }, { "epoch": 0.746110657233743, "grad_norm": 1.481459319682891, "learning_rate": 1.5973569499553884e-07, "loss": 0.1327, "step": 42923 }, { "epoch": 0.7461280397712458, "grad_norm": 1.149032723568243, "learning_rate": 1.5971506991026473e-07, "loss": 0.2641, "step": 42924 }, { "epoch": 0.7461454223087486, "grad_norm": 1.4931221947370918, "learning_rate": 1.5969444590353748e-07, "loss": 0.2802, "step": 42925 }, { "epoch": 0.7461628048462514, "grad_norm": 1.6789977119719623, "learning_rate": 1.5967382297542236e-07, "loss": 0.185, "step": 42926 }, { "epoch": 0.7461801873837542, "grad_norm": 1.5092648112425158, "learning_rate": 1.5965320112598513e-07, "loss": 0.1847, "step": 42927 }, { "epoch": 0.7461975699212571, "grad_norm": 1.147947116733891, "learning_rate": 1.5963258035529082e-07, "loss": 0.424, "step": 42928 }, { "epoch": 0.7462149524587599, "grad_norm": 1.9079654470404441, "learning_rate": 1.5961196066340494e-07, "loss": 0.3233, "step": 42929 }, { "epoch": 0.7462323349962627, "grad_norm": 1.10001530864499, "learning_rate": 1.595913420503927e-07, "loss": 0.1436, "step": 42930 }, { "epoch": 0.7462497175337656, "grad_norm": 0.78670524265637, "learning_rate": 1.5957072451631965e-07, "loss": 0.2438, "step": 42931 }, { "epoch": 0.7462671000712684, "grad_norm": 1.409296429650065, "learning_rate": 1.595501080612509e-07, "loss": 0.1518, "step": 42932 }, { "epoch": 0.7462844826087712, "grad_norm": 1.6900157928769217, "learning_rate": 1.5952949268525202e-07, "loss": 0.2014, "step": 42933 }, { "epoch": 0.7463018651462741, "grad_norm": 0.8078283573621503, "learning_rate": 1.5950887838838807e-07, "loss": 0.2824, "step": 42934 }, { "epoch": 0.7463192476837769, "grad_norm": 4.530024994254607, "learning_rate": 1.5948826517072482e-07, "loss": 0.2682, "step": 42935 }, { "epoch": 0.7463366302212797, "grad_norm": 1.1072675912481493, "learning_rate": 1.5946765303232724e-07, "loss": 0.316, "step": 42936 }, { "epoch": 0.7463540127587825, "grad_norm": 1.7305689189107432, "learning_rate": 1.5944704197326058e-07, "loss": 0.2679, "step": 42937 }, { "epoch": 0.7463713952962854, "grad_norm": 1.3620675820906565, "learning_rate": 1.5942643199359057e-07, "loss": 0.1876, "step": 42938 }, { "epoch": 0.7463887778337882, "grad_norm": 1.831645711190335, "learning_rate": 1.594058230933823e-07, "loss": 0.2255, "step": 42939 }, { "epoch": 0.746406160371291, "grad_norm": 1.6748195666817414, "learning_rate": 1.593852152727011e-07, "loss": 0.1966, "step": 42940 }, { "epoch": 0.7464235429087939, "grad_norm": 1.7819853088029207, "learning_rate": 1.593646085316123e-07, "loss": 0.2615, "step": 42941 }, { "epoch": 0.7464409254462967, "grad_norm": 1.5025772047071466, "learning_rate": 1.5934400287018128e-07, "loss": 0.2841, "step": 42942 }, { "epoch": 0.7464583079837995, "grad_norm": 0.9239353375175257, "learning_rate": 1.5932339828847323e-07, "loss": 0.1731, "step": 42943 }, { "epoch": 0.7464756905213022, "grad_norm": 0.9373216803599578, "learning_rate": 1.593027947865535e-07, "loss": 0.2765, "step": 42944 }, { "epoch": 0.7464930730588051, "grad_norm": 1.116419115894022, "learning_rate": 1.5928219236448732e-07, "loss": 0.2629, "step": 42945 }, { "epoch": 0.7465104555963079, "grad_norm": 1.4953433700411625, "learning_rate": 1.5926159102234016e-07, "loss": 0.2247, "step": 42946 }, { "epoch": 0.7465278381338107, "grad_norm": 1.9313965849096038, "learning_rate": 1.5924099076017727e-07, "loss": 0.3835, "step": 42947 }, { "epoch": 0.7465452206713136, "grad_norm": 1.350141832830175, "learning_rate": 1.5922039157806405e-07, "loss": 0.1381, "step": 42948 }, { "epoch": 0.7465626032088164, "grad_norm": 1.2415008053974048, "learning_rate": 1.5919979347606533e-07, "loss": 0.1584, "step": 42949 }, { "epoch": 0.7465799857463192, "grad_norm": 2.366579820516033, "learning_rate": 1.5917919645424688e-07, "loss": 0.2556, "step": 42950 }, { "epoch": 0.7465973682838221, "grad_norm": 1.0519824446146908, "learning_rate": 1.5915860051267382e-07, "loss": 0.2748, "step": 42951 }, { "epoch": 0.7466147508213249, "grad_norm": 1.6832981107533795, "learning_rate": 1.5913800565141145e-07, "loss": 0.24, "step": 42952 }, { "epoch": 0.7466321333588277, "grad_norm": 1.1601073168930607, "learning_rate": 1.591174118705248e-07, "loss": 0.3054, "step": 42953 }, { "epoch": 0.7466495158963306, "grad_norm": 0.9883505018612735, "learning_rate": 1.5909681917007973e-07, "loss": 0.2958, "step": 42954 }, { "epoch": 0.7466668984338334, "grad_norm": 2.1335220359662066, "learning_rate": 1.59076227550141e-07, "loss": 0.2523, "step": 42955 }, { "epoch": 0.7466842809713362, "grad_norm": 1.7622242901510365, "learning_rate": 1.5905563701077378e-07, "loss": 0.2083, "step": 42956 }, { "epoch": 0.746701663508839, "grad_norm": 1.5693233048105895, "learning_rate": 1.5903504755204383e-07, "loss": 0.3126, "step": 42957 }, { "epoch": 0.7467190460463419, "grad_norm": 1.2876472206832261, "learning_rate": 1.5901445917401614e-07, "loss": 0.1566, "step": 42958 }, { "epoch": 0.7467364285838447, "grad_norm": 1.6023452207162348, "learning_rate": 1.5899387187675595e-07, "loss": 0.3804, "step": 42959 }, { "epoch": 0.7467538111213475, "grad_norm": 1.919229825755799, "learning_rate": 1.5897328566032852e-07, "loss": 0.2638, "step": 42960 }, { "epoch": 0.7467711936588504, "grad_norm": 1.427674234808894, "learning_rate": 1.5895270052479915e-07, "loss": 0.2566, "step": 42961 }, { "epoch": 0.7467885761963532, "grad_norm": 1.391086674578164, "learning_rate": 1.5893211647023302e-07, "loss": 0.197, "step": 42962 }, { "epoch": 0.746805958733856, "grad_norm": 1.6223871555444647, "learning_rate": 1.5891153349669545e-07, "loss": 0.2882, "step": 42963 }, { "epoch": 0.7468233412713587, "grad_norm": 1.6837888750711796, "learning_rate": 1.5889095160425146e-07, "loss": 0.3832, "step": 42964 }, { "epoch": 0.7468407238088616, "grad_norm": 9.648591645100812, "learning_rate": 1.588703707929666e-07, "loss": 0.3271, "step": 42965 }, { "epoch": 0.7468581063463644, "grad_norm": 1.0898829818640285, "learning_rate": 1.5884979106290602e-07, "loss": 0.1767, "step": 42966 }, { "epoch": 0.7468754888838672, "grad_norm": 1.8061469778152963, "learning_rate": 1.58829212414135e-07, "loss": 0.1503, "step": 42967 }, { "epoch": 0.7468928714213701, "grad_norm": 2.4283221345181163, "learning_rate": 1.5880863484671835e-07, "loss": 0.3978, "step": 42968 }, { "epoch": 0.7469102539588729, "grad_norm": 0.9165521716348357, "learning_rate": 1.5878805836072174e-07, "loss": 0.1864, "step": 42969 }, { "epoch": 0.7469276364963757, "grad_norm": 2.1262337689024435, "learning_rate": 1.587674829562103e-07, "loss": 0.2261, "step": 42970 }, { "epoch": 0.7469450190338786, "grad_norm": 0.9381262762340897, "learning_rate": 1.5874690863324913e-07, "loss": 0.2692, "step": 42971 }, { "epoch": 0.7469624015713814, "grad_norm": 1.5571144111743767, "learning_rate": 1.587263353919034e-07, "loss": 0.1575, "step": 42972 }, { "epoch": 0.7469797841088842, "grad_norm": 1.6839676373628252, "learning_rate": 1.5870576323223865e-07, "loss": 0.1809, "step": 42973 }, { "epoch": 0.746997166646387, "grad_norm": 1.975879345145269, "learning_rate": 1.5868519215431974e-07, "loss": 0.4447, "step": 42974 }, { "epoch": 0.7470145491838899, "grad_norm": 0.9155500986773936, "learning_rate": 1.5866462215821202e-07, "loss": 0.1684, "step": 42975 }, { "epoch": 0.7470319317213927, "grad_norm": 1.0091442716750019, "learning_rate": 1.5864405324398044e-07, "loss": 0.1419, "step": 42976 }, { "epoch": 0.7470493142588955, "grad_norm": 1.461995521167248, "learning_rate": 1.5862348541169056e-07, "loss": 0.1538, "step": 42977 }, { "epoch": 0.7470666967963984, "grad_norm": 2.902438744531139, "learning_rate": 1.5860291866140745e-07, "loss": 0.4058, "step": 42978 }, { "epoch": 0.7470840793339012, "grad_norm": 1.7012733067204646, "learning_rate": 1.5858235299319628e-07, "loss": 0.254, "step": 42979 }, { "epoch": 0.747101461871404, "grad_norm": 2.6050573091685174, "learning_rate": 1.585617884071222e-07, "loss": 0.2074, "step": 42980 }, { "epoch": 0.7471188444089069, "grad_norm": 1.7315456837013636, "learning_rate": 1.585412249032504e-07, "loss": 0.1857, "step": 42981 }, { "epoch": 0.7471362269464097, "grad_norm": 1.1168230592613821, "learning_rate": 1.5852066248164603e-07, "loss": 0.352, "step": 42982 }, { "epoch": 0.7471536094839124, "grad_norm": 1.4963845056695233, "learning_rate": 1.5850010114237432e-07, "loss": 0.1688, "step": 42983 }, { "epoch": 0.7471709920214152, "grad_norm": 1.0342907928727507, "learning_rate": 1.584795408855003e-07, "loss": 0.2067, "step": 42984 }, { "epoch": 0.7471883745589181, "grad_norm": 0.88768396928597, "learning_rate": 1.5845898171108933e-07, "loss": 0.1572, "step": 42985 }, { "epoch": 0.7472057570964209, "grad_norm": 1.183706376420739, "learning_rate": 1.5843842361920674e-07, "loss": 0.2764, "step": 42986 }, { "epoch": 0.7472231396339237, "grad_norm": 1.4656705546948867, "learning_rate": 1.5841786660991708e-07, "loss": 0.2439, "step": 42987 }, { "epoch": 0.7472405221714266, "grad_norm": 1.5417487619486212, "learning_rate": 1.5839731068328605e-07, "loss": 0.2126, "step": 42988 }, { "epoch": 0.7472579047089294, "grad_norm": 1.2575390504574187, "learning_rate": 1.583767558393786e-07, "loss": 0.1065, "step": 42989 }, { "epoch": 0.7472752872464322, "grad_norm": 1.6014827576735844, "learning_rate": 1.5835620207825995e-07, "loss": 0.2148, "step": 42990 }, { "epoch": 0.747292669783935, "grad_norm": 1.886943718668404, "learning_rate": 1.5833564939999516e-07, "loss": 0.1546, "step": 42991 }, { "epoch": 0.7473100523214379, "grad_norm": 1.6488355229987055, "learning_rate": 1.5831509780464946e-07, "loss": 0.1589, "step": 42992 }, { "epoch": 0.7473274348589407, "grad_norm": 1.4160362782289397, "learning_rate": 1.582945472922879e-07, "loss": 0.2341, "step": 42993 }, { "epoch": 0.7473448173964435, "grad_norm": 1.74255393229542, "learning_rate": 1.582739978629757e-07, "loss": 0.4278, "step": 42994 }, { "epoch": 0.7473621999339464, "grad_norm": 1.5234270012152638, "learning_rate": 1.582534495167777e-07, "loss": 0.2235, "step": 42995 }, { "epoch": 0.7473795824714492, "grad_norm": 1.343906959619842, "learning_rate": 1.582329022537595e-07, "loss": 0.1593, "step": 42996 }, { "epoch": 0.747396965008952, "grad_norm": 1.1599869441668698, "learning_rate": 1.5821235607398603e-07, "loss": 0.3916, "step": 42997 }, { "epoch": 0.7474143475464549, "grad_norm": 1.109479955750472, "learning_rate": 1.5819181097752239e-07, "loss": 0.1831, "step": 42998 }, { "epoch": 0.7474317300839577, "grad_norm": 2.4210777623500643, "learning_rate": 1.5817126696443363e-07, "loss": 0.2616, "step": 42999 }, { "epoch": 0.7474491126214605, "grad_norm": 2.8200284067991186, "learning_rate": 1.5815072403478497e-07, "loss": 0.2351, "step": 43000 }, { "epoch": 0.7474664951589634, "grad_norm": 1.5831792145034866, "learning_rate": 1.581301821886415e-07, "loss": 0.2599, "step": 43001 }, { "epoch": 0.7474838776964662, "grad_norm": 1.8311591785140409, "learning_rate": 1.5810964142606832e-07, "loss": 0.2367, "step": 43002 }, { "epoch": 0.7475012602339689, "grad_norm": 0.8167896933624774, "learning_rate": 1.580891017471303e-07, "loss": 0.1903, "step": 43003 }, { "epoch": 0.7475186427714717, "grad_norm": 1.4353333219202336, "learning_rate": 1.5806856315189298e-07, "loss": 0.2058, "step": 43004 }, { "epoch": 0.7475360253089746, "grad_norm": 1.4601662903615136, "learning_rate": 1.5804802564042136e-07, "loss": 0.2281, "step": 43005 }, { "epoch": 0.7475534078464774, "grad_norm": 1.57248924221715, "learning_rate": 1.580274892127803e-07, "loss": 0.3656, "step": 43006 }, { "epoch": 0.7475707903839802, "grad_norm": 2.598540354840629, "learning_rate": 1.5800695386903485e-07, "loss": 0.2391, "step": 43007 }, { "epoch": 0.747588172921483, "grad_norm": 1.4731415906053782, "learning_rate": 1.5798641960925034e-07, "loss": 0.1999, "step": 43008 }, { "epoch": 0.7476055554589859, "grad_norm": 1.2653214461720426, "learning_rate": 1.579658864334919e-07, "loss": 0.1847, "step": 43009 }, { "epoch": 0.7476229379964887, "grad_norm": 1.6006226775673442, "learning_rate": 1.5794535434182437e-07, "loss": 0.2242, "step": 43010 }, { "epoch": 0.7476403205339915, "grad_norm": 1.6872114657912338, "learning_rate": 1.57924823334313e-07, "loss": 0.2231, "step": 43011 }, { "epoch": 0.7476577030714944, "grad_norm": 5.163068813133375, "learning_rate": 1.579042934110228e-07, "loss": 0.1232, "step": 43012 }, { "epoch": 0.7476750856089972, "grad_norm": 1.4554983600883111, "learning_rate": 1.5788376457201885e-07, "loss": 0.1914, "step": 43013 }, { "epoch": 0.7476924681465, "grad_norm": 1.8191057650753055, "learning_rate": 1.578632368173662e-07, "loss": 0.137, "step": 43014 }, { "epoch": 0.7477098506840029, "grad_norm": 1.9793044373444064, "learning_rate": 1.5784271014712975e-07, "loss": 0.249, "step": 43015 }, { "epoch": 0.7477272332215057, "grad_norm": 1.4822352361974815, "learning_rate": 1.5782218456137492e-07, "loss": 0.1911, "step": 43016 }, { "epoch": 0.7477446157590085, "grad_norm": 1.3894350122114754, "learning_rate": 1.5780166006016672e-07, "loss": 0.191, "step": 43017 }, { "epoch": 0.7477619982965114, "grad_norm": 1.1740507440089298, "learning_rate": 1.5778113664356972e-07, "loss": 0.2075, "step": 43018 }, { "epoch": 0.7477793808340142, "grad_norm": 1.5657189692428644, "learning_rate": 1.577606143116495e-07, "loss": 0.2391, "step": 43019 }, { "epoch": 0.747796763371517, "grad_norm": 2.197059022767012, "learning_rate": 1.5774009306447093e-07, "loss": 0.205, "step": 43020 }, { "epoch": 0.7478141459090198, "grad_norm": 2.2473416005899436, "learning_rate": 1.5771957290209903e-07, "loss": 0.23, "step": 43021 }, { "epoch": 0.7478315284465227, "grad_norm": 1.0956221213028823, "learning_rate": 1.5769905382459885e-07, "loss": 0.2662, "step": 43022 }, { "epoch": 0.7478489109840254, "grad_norm": 1.5791273730114552, "learning_rate": 1.5767853583203533e-07, "loss": 0.2878, "step": 43023 }, { "epoch": 0.7478662935215282, "grad_norm": 1.3293948993007132, "learning_rate": 1.5765801892447384e-07, "loss": 0.3355, "step": 43024 }, { "epoch": 0.747883676059031, "grad_norm": 0.9153217347254825, "learning_rate": 1.5763750310197903e-07, "loss": 0.1901, "step": 43025 }, { "epoch": 0.7479010585965339, "grad_norm": 2.019626403902373, "learning_rate": 1.5761698836461596e-07, "loss": 0.2374, "step": 43026 }, { "epoch": 0.7479184411340367, "grad_norm": 6.70875401537105, "learning_rate": 1.5759647471244984e-07, "loss": 0.4926, "step": 43027 }, { "epoch": 0.7479358236715395, "grad_norm": 1.6510490239916271, "learning_rate": 1.575759621455457e-07, "loss": 0.3328, "step": 43028 }, { "epoch": 0.7479532062090424, "grad_norm": 1.2749738890701139, "learning_rate": 1.575554506639684e-07, "loss": 0.1377, "step": 43029 }, { "epoch": 0.7479705887465452, "grad_norm": 1.7792547314760356, "learning_rate": 1.5753494026778302e-07, "loss": 0.1395, "step": 43030 }, { "epoch": 0.747987971284048, "grad_norm": 0.758564868439719, "learning_rate": 1.5751443095705457e-07, "loss": 0.193, "step": 43031 }, { "epoch": 0.7480053538215509, "grad_norm": 1.0890555259787857, "learning_rate": 1.5749392273184804e-07, "loss": 0.1373, "step": 43032 }, { "epoch": 0.7480227363590537, "grad_norm": 1.2486731568313842, "learning_rate": 1.5747341559222844e-07, "loss": 0.1776, "step": 43033 }, { "epoch": 0.7480401188965565, "grad_norm": 1.854886694532607, "learning_rate": 1.5745290953826063e-07, "loss": 0.4185, "step": 43034 }, { "epoch": 0.7480575014340594, "grad_norm": 1.2270095901204157, "learning_rate": 1.5743240457000988e-07, "loss": 0.1987, "step": 43035 }, { "epoch": 0.7480748839715622, "grad_norm": 0.9481860710047126, "learning_rate": 1.5741190068754124e-07, "loss": 0.1746, "step": 43036 }, { "epoch": 0.748092266509065, "grad_norm": 5.131624051285431, "learning_rate": 1.5739139789091933e-07, "loss": 0.2056, "step": 43037 }, { "epoch": 0.7481096490465678, "grad_norm": 1.8982764711123978, "learning_rate": 1.573708961802091e-07, "loss": 0.1325, "step": 43038 }, { "epoch": 0.7481270315840707, "grad_norm": 1.54959973015281, "learning_rate": 1.5735039555547597e-07, "loss": 0.2556, "step": 43039 }, { "epoch": 0.7481444141215735, "grad_norm": 1.752053643151056, "learning_rate": 1.5732989601678466e-07, "loss": 0.2609, "step": 43040 }, { "epoch": 0.7481617966590763, "grad_norm": 1.3198253588916393, "learning_rate": 1.5730939756420013e-07, "loss": 0.3302, "step": 43041 }, { "epoch": 0.7481791791965792, "grad_norm": 1.0261287494559965, "learning_rate": 1.5728890019778723e-07, "loss": 0.2606, "step": 43042 }, { "epoch": 0.7481965617340819, "grad_norm": 1.432294601530482, "learning_rate": 1.5726840391761147e-07, "loss": 0.2157, "step": 43043 }, { "epoch": 0.7482139442715847, "grad_norm": 1.7808778654375574, "learning_rate": 1.5724790872373723e-07, "loss": 0.2336, "step": 43044 }, { "epoch": 0.7482313268090875, "grad_norm": 2.4156973745228107, "learning_rate": 1.5722741461622952e-07, "loss": 0.3755, "step": 43045 }, { "epoch": 0.7482487093465904, "grad_norm": 1.6791238156045833, "learning_rate": 1.5720692159515364e-07, "loss": 0.2245, "step": 43046 }, { "epoch": 0.7482660918840932, "grad_norm": 1.3935047835696435, "learning_rate": 1.5718642966057432e-07, "loss": 0.3209, "step": 43047 }, { "epoch": 0.748283474421596, "grad_norm": 2.1766399186435295, "learning_rate": 1.5716593881255652e-07, "loss": 0.1651, "step": 43048 }, { "epoch": 0.7483008569590989, "grad_norm": 2.4373450460285917, "learning_rate": 1.5714544905116522e-07, "loss": 0.2081, "step": 43049 }, { "epoch": 0.7483182394966017, "grad_norm": 3.4854378686750165, "learning_rate": 1.571249603764654e-07, "loss": 0.2637, "step": 43050 }, { "epoch": 0.7483356220341045, "grad_norm": 1.2109984373993263, "learning_rate": 1.571044727885219e-07, "loss": 0.2496, "step": 43051 }, { "epoch": 0.7483530045716074, "grad_norm": 1.4672725945106206, "learning_rate": 1.5708398628739972e-07, "loss": 0.1822, "step": 43052 }, { "epoch": 0.7483703871091102, "grad_norm": 2.672625231395157, "learning_rate": 1.5706350087316362e-07, "loss": 0.2921, "step": 43053 }, { "epoch": 0.748387769646613, "grad_norm": 1.193337151930557, "learning_rate": 1.5704301654587887e-07, "loss": 0.1851, "step": 43054 }, { "epoch": 0.7484051521841159, "grad_norm": 1.256309465984634, "learning_rate": 1.5702253330561038e-07, "loss": 0.1942, "step": 43055 }, { "epoch": 0.7484225347216187, "grad_norm": 1.2284854147325563, "learning_rate": 1.570020511524227e-07, "loss": 0.1933, "step": 43056 }, { "epoch": 0.7484399172591215, "grad_norm": 2.4185999566994036, "learning_rate": 1.5698157008638087e-07, "loss": 0.2987, "step": 43057 }, { "epoch": 0.7484572997966243, "grad_norm": 1.456725645833277, "learning_rate": 1.5696109010754998e-07, "loss": 0.1829, "step": 43058 }, { "epoch": 0.7484746823341272, "grad_norm": 1.1082167841399688, "learning_rate": 1.5694061121599488e-07, "loss": 0.334, "step": 43059 }, { "epoch": 0.74849206487163, "grad_norm": 1.3719224877321026, "learning_rate": 1.5692013341178045e-07, "loss": 0.458, "step": 43060 }, { "epoch": 0.7485094474091328, "grad_norm": 1.4759389665058131, "learning_rate": 1.5689965669497145e-07, "loss": 0.2514, "step": 43061 }, { "epoch": 0.7485268299466357, "grad_norm": 0.6534634712517965, "learning_rate": 1.5687918106563325e-07, "loss": 0.2153, "step": 43062 }, { "epoch": 0.7485442124841384, "grad_norm": 1.8314810330634104, "learning_rate": 1.5685870652383033e-07, "loss": 0.3191, "step": 43063 }, { "epoch": 0.7485615950216412, "grad_norm": 2.426241476125032, "learning_rate": 1.5683823306962758e-07, "loss": 0.2518, "step": 43064 }, { "epoch": 0.748578977559144, "grad_norm": 1.682837097901593, "learning_rate": 1.5681776070308989e-07, "loss": 0.4562, "step": 43065 }, { "epoch": 0.7485963600966469, "grad_norm": 2.033065806000661, "learning_rate": 1.567972894242824e-07, "loss": 0.2304, "step": 43066 }, { "epoch": 0.7486137426341497, "grad_norm": 0.9908407982499479, "learning_rate": 1.5677681923326986e-07, "loss": 0.203, "step": 43067 }, { "epoch": 0.7486311251716525, "grad_norm": 1.6944433378756358, "learning_rate": 1.5675635013011713e-07, "loss": 0.2099, "step": 43068 }, { "epoch": 0.7486485077091554, "grad_norm": 2.2055869745580696, "learning_rate": 1.567358821148891e-07, "loss": 0.2068, "step": 43069 }, { "epoch": 0.7486658902466582, "grad_norm": 1.3698256072817883, "learning_rate": 1.5671541518765063e-07, "loss": 0.2366, "step": 43070 }, { "epoch": 0.748683272784161, "grad_norm": 2.088007063370618, "learning_rate": 1.5669494934846662e-07, "loss": 0.2333, "step": 43071 }, { "epoch": 0.7487006553216639, "grad_norm": 2.1112523570121566, "learning_rate": 1.566744845974019e-07, "loss": 0.2613, "step": 43072 }, { "epoch": 0.7487180378591667, "grad_norm": 1.863445036160846, "learning_rate": 1.5665402093452117e-07, "loss": 0.1961, "step": 43073 }, { "epoch": 0.7487354203966695, "grad_norm": 0.7090018652319321, "learning_rate": 1.5663355835988983e-07, "loss": 0.2022, "step": 43074 }, { "epoch": 0.7487528029341723, "grad_norm": 1.8435472531925794, "learning_rate": 1.5661309687357222e-07, "loss": 0.2238, "step": 43075 }, { "epoch": 0.7487701854716752, "grad_norm": 1.9534261278593148, "learning_rate": 1.565926364756332e-07, "loss": 0.2547, "step": 43076 }, { "epoch": 0.748787568009178, "grad_norm": 1.4416296308072452, "learning_rate": 1.565721771661379e-07, "loss": 0.2267, "step": 43077 }, { "epoch": 0.7488049505466808, "grad_norm": 1.9980564482993768, "learning_rate": 1.5655171894515112e-07, "loss": 0.2036, "step": 43078 }, { "epoch": 0.7488223330841837, "grad_norm": 1.6609541572931872, "learning_rate": 1.5653126181273757e-07, "loss": 0.1212, "step": 43079 }, { "epoch": 0.7488397156216865, "grad_norm": 1.1040260384669367, "learning_rate": 1.5651080576896213e-07, "loss": 0.1145, "step": 43080 }, { "epoch": 0.7488570981591893, "grad_norm": 1.0994741643686856, "learning_rate": 1.5649035081388968e-07, "loss": 0.2266, "step": 43081 }, { "epoch": 0.7488744806966922, "grad_norm": 1.6947212580653968, "learning_rate": 1.5646989694758493e-07, "loss": 0.2036, "step": 43082 }, { "epoch": 0.7488918632341949, "grad_norm": 1.486682655600095, "learning_rate": 1.564494441701129e-07, "loss": 0.2464, "step": 43083 }, { "epoch": 0.7489092457716977, "grad_norm": 0.8778807331387676, "learning_rate": 1.5642899248153813e-07, "loss": 0.2329, "step": 43084 }, { "epoch": 0.7489266283092005, "grad_norm": 1.03321981022635, "learning_rate": 1.5640854188192582e-07, "loss": 0.1771, "step": 43085 }, { "epoch": 0.7489440108467034, "grad_norm": 1.725556335933013, "learning_rate": 1.5638809237134054e-07, "loss": 0.3857, "step": 43086 }, { "epoch": 0.7489613933842062, "grad_norm": 1.6836097216080397, "learning_rate": 1.5636764394984736e-07, "loss": 0.2777, "step": 43087 }, { "epoch": 0.748978775921709, "grad_norm": 6.305162436012864, "learning_rate": 1.5634719661751062e-07, "loss": 0.1938, "step": 43088 }, { "epoch": 0.7489961584592119, "grad_norm": 1.3888649009222767, "learning_rate": 1.563267503743955e-07, "loss": 0.183, "step": 43089 }, { "epoch": 0.7490135409967147, "grad_norm": 1.375253794014842, "learning_rate": 1.563063052205667e-07, "loss": 0.1974, "step": 43090 }, { "epoch": 0.7490309235342175, "grad_norm": 1.6125326925041843, "learning_rate": 1.5628586115608911e-07, "loss": 0.2461, "step": 43091 }, { "epoch": 0.7490483060717203, "grad_norm": 1.3219227690683446, "learning_rate": 1.5626541818102723e-07, "loss": 0.1763, "step": 43092 }, { "epoch": 0.7490656886092232, "grad_norm": 1.5771419707232484, "learning_rate": 1.5624497629544641e-07, "loss": 0.1457, "step": 43093 }, { "epoch": 0.749083071146726, "grad_norm": 0.7909910903902585, "learning_rate": 1.5622453549941094e-07, "loss": 0.2598, "step": 43094 }, { "epoch": 0.7491004536842288, "grad_norm": 3.6618650574711906, "learning_rate": 1.5620409579298577e-07, "loss": 0.3376, "step": 43095 }, { "epoch": 0.7491178362217317, "grad_norm": 1.9049090729443041, "learning_rate": 1.5618365717623555e-07, "loss": 0.1674, "step": 43096 }, { "epoch": 0.7491352187592345, "grad_norm": 1.641003016529219, "learning_rate": 1.5616321964922536e-07, "loss": 0.1701, "step": 43097 }, { "epoch": 0.7491526012967373, "grad_norm": 1.291010679609397, "learning_rate": 1.561427832120198e-07, "loss": 0.1955, "step": 43098 }, { "epoch": 0.7491699838342402, "grad_norm": 0.9686781642080199, "learning_rate": 1.5612234786468365e-07, "loss": 0.2032, "step": 43099 }, { "epoch": 0.749187366371743, "grad_norm": 1.4163088259917398, "learning_rate": 1.5610191360728176e-07, "loss": 0.3276, "step": 43100 }, { "epoch": 0.7492047489092458, "grad_norm": 1.9866155999904558, "learning_rate": 1.5608148043987874e-07, "loss": 0.2228, "step": 43101 }, { "epoch": 0.7492221314467487, "grad_norm": 1.6824738331785827, "learning_rate": 1.5606104836253952e-07, "loss": 0.1764, "step": 43102 }, { "epoch": 0.7492395139842514, "grad_norm": 0.7467545040431283, "learning_rate": 1.5604061737532874e-07, "loss": 0.2079, "step": 43103 }, { "epoch": 0.7492568965217542, "grad_norm": 2.5513243699172947, "learning_rate": 1.5602018747831102e-07, "loss": 0.3346, "step": 43104 }, { "epoch": 0.749274279059257, "grad_norm": 1.2152696770262648, "learning_rate": 1.559997586715515e-07, "loss": 0.3074, "step": 43105 }, { "epoch": 0.7492916615967599, "grad_norm": 2.17661972717091, "learning_rate": 1.5597933095511488e-07, "loss": 0.1578, "step": 43106 }, { "epoch": 0.7493090441342627, "grad_norm": 1.160973460432679, "learning_rate": 1.5595890432906544e-07, "loss": 0.1373, "step": 43107 }, { "epoch": 0.7493264266717655, "grad_norm": 1.6520070172274504, "learning_rate": 1.5593847879346837e-07, "loss": 0.2975, "step": 43108 }, { "epoch": 0.7493438092092684, "grad_norm": 1.0263063622173925, "learning_rate": 1.559180543483883e-07, "loss": 0.2185, "step": 43109 }, { "epoch": 0.7493611917467712, "grad_norm": 1.4344950543191561, "learning_rate": 1.5589763099388992e-07, "loss": 0.2631, "step": 43110 }, { "epoch": 0.749378574284274, "grad_norm": 2.179818998953792, "learning_rate": 1.5587720873003795e-07, "loss": 0.2241, "step": 43111 }, { "epoch": 0.7493959568217768, "grad_norm": 1.2162699311626946, "learning_rate": 1.5585678755689718e-07, "loss": 0.1532, "step": 43112 }, { "epoch": 0.7494133393592797, "grad_norm": 1.2939409959940347, "learning_rate": 1.5583636747453232e-07, "loss": 0.3325, "step": 43113 }, { "epoch": 0.7494307218967825, "grad_norm": 1.215703164474506, "learning_rate": 1.5581594848300806e-07, "loss": 0.2839, "step": 43114 }, { "epoch": 0.7494481044342853, "grad_norm": 1.7939956225339333, "learning_rate": 1.5579553058238898e-07, "loss": 0.2506, "step": 43115 }, { "epoch": 0.7494654869717882, "grad_norm": 1.9385615487514811, "learning_rate": 1.557751137727401e-07, "loss": 0.3549, "step": 43116 }, { "epoch": 0.749482869509291, "grad_norm": 4.190471638989998, "learning_rate": 1.5575469805412606e-07, "loss": 0.2685, "step": 43117 }, { "epoch": 0.7495002520467938, "grad_norm": 1.2404365588485238, "learning_rate": 1.5573428342661143e-07, "loss": 0.3708, "step": 43118 }, { "epoch": 0.7495176345842967, "grad_norm": 1.6081110323179224, "learning_rate": 1.5571386989026097e-07, "loss": 0.302, "step": 43119 }, { "epoch": 0.7495350171217995, "grad_norm": 1.4754304832640512, "learning_rate": 1.556934574451394e-07, "loss": 0.2481, "step": 43120 }, { "epoch": 0.7495523996593023, "grad_norm": 1.218208381539907, "learning_rate": 1.5567304609131142e-07, "loss": 0.2367, "step": 43121 }, { "epoch": 0.749569782196805, "grad_norm": 1.6842668552506999, "learning_rate": 1.556526358288417e-07, "loss": 0.2902, "step": 43122 }, { "epoch": 0.7495871647343079, "grad_norm": 2.411865611419294, "learning_rate": 1.556322266577948e-07, "loss": 0.3931, "step": 43123 }, { "epoch": 0.7496045472718107, "grad_norm": 1.1168263215546417, "learning_rate": 1.5561181857823568e-07, "loss": 0.1886, "step": 43124 }, { "epoch": 0.7496219298093135, "grad_norm": 1.2433929434429667, "learning_rate": 1.555914115902291e-07, "loss": 0.1979, "step": 43125 }, { "epoch": 0.7496393123468164, "grad_norm": 1.2718490469765478, "learning_rate": 1.5557100569383936e-07, "loss": 0.1803, "step": 43126 }, { "epoch": 0.7496566948843192, "grad_norm": 0.9246184887078274, "learning_rate": 1.555506008891312e-07, "loss": 0.248, "step": 43127 }, { "epoch": 0.749674077421822, "grad_norm": 1.750974965955514, "learning_rate": 1.555301971761695e-07, "loss": 0.2512, "step": 43128 }, { "epoch": 0.7496914599593248, "grad_norm": 1.371962432189188, "learning_rate": 1.5550979455501883e-07, "loss": 0.2668, "step": 43129 }, { "epoch": 0.7497088424968277, "grad_norm": 2.245625617220232, "learning_rate": 1.5548939302574392e-07, "loss": 0.2267, "step": 43130 }, { "epoch": 0.7497262250343305, "grad_norm": 2.0419897250478676, "learning_rate": 1.5546899258840935e-07, "loss": 0.1894, "step": 43131 }, { "epoch": 0.7497436075718333, "grad_norm": 1.4939712935152314, "learning_rate": 1.554485932430798e-07, "loss": 0.1955, "step": 43132 }, { "epoch": 0.7497609901093362, "grad_norm": 1.5235184834428832, "learning_rate": 1.5542819498981996e-07, "loss": 0.1493, "step": 43133 }, { "epoch": 0.749778372646839, "grad_norm": 0.7867714031708023, "learning_rate": 1.5540779782869428e-07, "loss": 0.1588, "step": 43134 }, { "epoch": 0.7497957551843418, "grad_norm": 1.0977781090034335, "learning_rate": 1.553874017597678e-07, "loss": 0.2325, "step": 43135 }, { "epoch": 0.7498131377218447, "grad_norm": 1.3272123400020437, "learning_rate": 1.5536700678310487e-07, "loss": 0.2025, "step": 43136 }, { "epoch": 0.7498305202593475, "grad_norm": 1.5103644953058408, "learning_rate": 1.5534661289877042e-07, "loss": 0.2407, "step": 43137 }, { "epoch": 0.7498479027968503, "grad_norm": 1.3119862065057901, "learning_rate": 1.5532622010682856e-07, "loss": 0.2357, "step": 43138 }, { "epoch": 0.7498652853343531, "grad_norm": 1.3510360679856126, "learning_rate": 1.553058284073444e-07, "loss": 0.2094, "step": 43139 }, { "epoch": 0.749882667871856, "grad_norm": 1.7360403301595968, "learning_rate": 1.5528543780038245e-07, "loss": 0.2286, "step": 43140 }, { "epoch": 0.7499000504093588, "grad_norm": 1.7651989046696426, "learning_rate": 1.552650482860073e-07, "loss": 0.1969, "step": 43141 }, { "epoch": 0.7499174329468615, "grad_norm": 1.0737194822782636, "learning_rate": 1.5524465986428343e-07, "loss": 0.2699, "step": 43142 }, { "epoch": 0.7499348154843644, "grad_norm": 1.3064515136893924, "learning_rate": 1.5522427253527577e-07, "loss": 0.2593, "step": 43143 }, { "epoch": 0.7499521980218672, "grad_norm": 1.5983655539171746, "learning_rate": 1.5520388629904895e-07, "loss": 0.2723, "step": 43144 }, { "epoch": 0.74996958055937, "grad_norm": 1.7846768532987762, "learning_rate": 1.551835011556673e-07, "loss": 0.2622, "step": 43145 }, { "epoch": 0.7499869630968728, "grad_norm": 2.272339310714583, "learning_rate": 1.5516311710519535e-07, "loss": 0.337, "step": 43146 }, { "epoch": 0.7500043456343757, "grad_norm": 2.069686956296988, "learning_rate": 1.5514273414769813e-07, "loss": 0.1975, "step": 43147 }, { "epoch": 0.7500217281718785, "grad_norm": 1.6920651047983266, "learning_rate": 1.5512235228323994e-07, "loss": 0.2385, "step": 43148 }, { "epoch": 0.7500391107093813, "grad_norm": 2.0117135572012765, "learning_rate": 1.5510197151188552e-07, "loss": 0.2935, "step": 43149 }, { "epoch": 0.7500564932468842, "grad_norm": 1.1176351853782425, "learning_rate": 1.5508159183369944e-07, "loss": 0.2459, "step": 43150 }, { "epoch": 0.750073875784387, "grad_norm": 1.0950682197531076, "learning_rate": 1.5506121324874628e-07, "loss": 0.2387, "step": 43151 }, { "epoch": 0.7500912583218898, "grad_norm": 1.2920389442527924, "learning_rate": 1.550408357570906e-07, "loss": 0.2362, "step": 43152 }, { "epoch": 0.7501086408593927, "grad_norm": 2.198893912625416, "learning_rate": 1.5502045935879698e-07, "loss": 0.176, "step": 43153 }, { "epoch": 0.7501260233968955, "grad_norm": 1.217778873470964, "learning_rate": 1.5500008405392988e-07, "loss": 0.1914, "step": 43154 }, { "epoch": 0.7501434059343983, "grad_norm": 7.017323335117627, "learning_rate": 1.549797098425542e-07, "loss": 0.2019, "step": 43155 }, { "epoch": 0.7501607884719012, "grad_norm": 1.1816810466958787, "learning_rate": 1.5495933672473455e-07, "loss": 0.2178, "step": 43156 }, { "epoch": 0.750178171009404, "grad_norm": 1.4923996689649215, "learning_rate": 1.549389647005349e-07, "loss": 0.283, "step": 43157 }, { "epoch": 0.7501955535469068, "grad_norm": 2.8692668567884914, "learning_rate": 1.5491859377002047e-07, "loss": 0.3254, "step": 43158 }, { "epoch": 0.7502129360844096, "grad_norm": 1.0622994413835243, "learning_rate": 1.5489822393325553e-07, "loss": 0.2317, "step": 43159 }, { "epoch": 0.7502303186219125, "grad_norm": 1.5018492132904677, "learning_rate": 1.5487785519030471e-07, "loss": 0.2059, "step": 43160 }, { "epoch": 0.7502477011594153, "grad_norm": 1.295995455288964, "learning_rate": 1.5485748754123257e-07, "loss": 0.169, "step": 43161 }, { "epoch": 0.750265083696918, "grad_norm": 1.2739619897415564, "learning_rate": 1.5483712098610346e-07, "loss": 0.2119, "step": 43162 }, { "epoch": 0.7502824662344209, "grad_norm": 1.4742897495686118, "learning_rate": 1.5481675552498246e-07, "loss": 0.2044, "step": 43163 }, { "epoch": 0.7502998487719237, "grad_norm": 1.5802545160443042, "learning_rate": 1.5479639115793363e-07, "loss": 0.2999, "step": 43164 }, { "epoch": 0.7503172313094265, "grad_norm": 1.4874025938572062, "learning_rate": 1.5477602788502148e-07, "loss": 0.1836, "step": 43165 }, { "epoch": 0.7503346138469293, "grad_norm": 1.5994864472953874, "learning_rate": 1.5475566570631088e-07, "loss": 0.1404, "step": 43166 }, { "epoch": 0.7503519963844322, "grad_norm": 1.1427929075078296, "learning_rate": 1.547353046218663e-07, "loss": 0.2632, "step": 43167 }, { "epoch": 0.750369378921935, "grad_norm": 1.7393642083217626, "learning_rate": 1.5471494463175217e-07, "loss": 0.197, "step": 43168 }, { "epoch": 0.7503867614594378, "grad_norm": 1.0724818220728782, "learning_rate": 1.54694585736033e-07, "loss": 0.1984, "step": 43169 }, { "epoch": 0.7504041439969407, "grad_norm": 1.5626434721141393, "learning_rate": 1.546742279347734e-07, "loss": 0.1335, "step": 43170 }, { "epoch": 0.7504215265344435, "grad_norm": 2.220082135708255, "learning_rate": 1.546538712280379e-07, "loss": 0.2514, "step": 43171 }, { "epoch": 0.7504389090719463, "grad_norm": 1.4464026444510047, "learning_rate": 1.54633515615891e-07, "loss": 0.2335, "step": 43172 }, { "epoch": 0.7504562916094492, "grad_norm": 1.6445464773482281, "learning_rate": 1.54613161098397e-07, "loss": 0.2412, "step": 43173 }, { "epoch": 0.750473674146952, "grad_norm": 2.320462673211591, "learning_rate": 1.5459280767562082e-07, "loss": 0.1808, "step": 43174 }, { "epoch": 0.7504910566844548, "grad_norm": 0.9993046280714202, "learning_rate": 1.5457245534762693e-07, "loss": 0.2908, "step": 43175 }, { "epoch": 0.7505084392219576, "grad_norm": 1.171935673011166, "learning_rate": 1.545521041144795e-07, "loss": 0.1832, "step": 43176 }, { "epoch": 0.7505258217594605, "grad_norm": 1.127843837053862, "learning_rate": 1.54531753976243e-07, "loss": 0.216, "step": 43177 }, { "epoch": 0.7505432042969633, "grad_norm": 0.7106318575416889, "learning_rate": 1.5451140493298242e-07, "loss": 0.2014, "step": 43178 }, { "epoch": 0.7505605868344661, "grad_norm": 1.1495989014695955, "learning_rate": 1.544910569847619e-07, "loss": 0.3234, "step": 43179 }, { "epoch": 0.750577969371969, "grad_norm": 1.5239150500296879, "learning_rate": 1.5447071013164603e-07, "loss": 0.2307, "step": 43180 }, { "epoch": 0.7505953519094718, "grad_norm": 1.840466551943081, "learning_rate": 1.544503643736992e-07, "loss": 0.3235, "step": 43181 }, { "epoch": 0.7506127344469745, "grad_norm": 1.703321516990249, "learning_rate": 1.5443001971098617e-07, "loss": 0.1548, "step": 43182 }, { "epoch": 0.7506301169844773, "grad_norm": 1.2757014888192477, "learning_rate": 1.544096761435712e-07, "loss": 0.2722, "step": 43183 }, { "epoch": 0.7506474995219802, "grad_norm": 1.2167751909323292, "learning_rate": 1.5438933367151873e-07, "loss": 0.3932, "step": 43184 }, { "epoch": 0.750664882059483, "grad_norm": 1.8286156836132068, "learning_rate": 1.5436899229489314e-07, "loss": 0.2686, "step": 43185 }, { "epoch": 0.7506822645969858, "grad_norm": 1.1395326220167257, "learning_rate": 1.543486520137593e-07, "loss": 0.2511, "step": 43186 }, { "epoch": 0.7506996471344887, "grad_norm": 1.4449568023831227, "learning_rate": 1.5432831282818143e-07, "loss": 0.2075, "step": 43187 }, { "epoch": 0.7507170296719915, "grad_norm": 1.1264573529671824, "learning_rate": 1.54307974738224e-07, "loss": 0.3303, "step": 43188 }, { "epoch": 0.7507344122094943, "grad_norm": 1.0004702851710632, "learning_rate": 1.542876377439515e-07, "loss": 0.1479, "step": 43189 }, { "epoch": 0.7507517947469972, "grad_norm": 5.691984564871884, "learning_rate": 1.5426730184542834e-07, "loss": 0.278, "step": 43190 }, { "epoch": 0.7507691772845, "grad_norm": 1.282240335135701, "learning_rate": 1.542469670427191e-07, "loss": 0.2908, "step": 43191 }, { "epoch": 0.7507865598220028, "grad_norm": 0.9650796374971202, "learning_rate": 1.542266333358881e-07, "loss": 0.1709, "step": 43192 }, { "epoch": 0.7508039423595056, "grad_norm": 0.9878719810442321, "learning_rate": 1.542063007249997e-07, "loss": 0.1559, "step": 43193 }, { "epoch": 0.7508213248970085, "grad_norm": 2.1614392501345314, "learning_rate": 1.5418596921011877e-07, "loss": 0.3312, "step": 43194 }, { "epoch": 0.7508387074345113, "grad_norm": 1.4948327820237703, "learning_rate": 1.5416563879130934e-07, "loss": 0.2561, "step": 43195 }, { "epoch": 0.7508560899720141, "grad_norm": 1.1265324303523119, "learning_rate": 1.541453094686358e-07, "loss": 0.2561, "step": 43196 }, { "epoch": 0.750873472509517, "grad_norm": 1.3012653010935473, "learning_rate": 1.5412498124216288e-07, "loss": 0.2827, "step": 43197 }, { "epoch": 0.7508908550470198, "grad_norm": 2.9013753690512103, "learning_rate": 1.5410465411195494e-07, "loss": 0.1245, "step": 43198 }, { "epoch": 0.7509082375845226, "grad_norm": 2.1369243418130863, "learning_rate": 1.540843280780763e-07, "loss": 0.1915, "step": 43199 }, { "epoch": 0.7509256201220255, "grad_norm": 1.1375264853606053, "learning_rate": 1.540640031405915e-07, "loss": 0.2045, "step": 43200 }, { "epoch": 0.7509430026595283, "grad_norm": 1.0519630331775731, "learning_rate": 1.5404367929956485e-07, "loss": 0.2564, "step": 43201 }, { "epoch": 0.750960385197031, "grad_norm": 1.6679635709695826, "learning_rate": 1.5402335655506087e-07, "loss": 0.3281, "step": 43202 }, { "epoch": 0.7509777677345338, "grad_norm": 1.5940372956497066, "learning_rate": 1.5400303490714388e-07, "loss": 0.2223, "step": 43203 }, { "epoch": 0.7509951502720367, "grad_norm": 1.003102582183639, "learning_rate": 1.5398271435587817e-07, "loss": 0.214, "step": 43204 }, { "epoch": 0.7510125328095395, "grad_norm": 0.7140999506728588, "learning_rate": 1.539623949013285e-07, "loss": 0.141, "step": 43205 }, { "epoch": 0.7510299153470423, "grad_norm": 1.2260433422121773, "learning_rate": 1.5394207654355907e-07, "loss": 0.2303, "step": 43206 }, { "epoch": 0.7510472978845452, "grad_norm": 1.7762735829931124, "learning_rate": 1.539217592826344e-07, "loss": 0.3128, "step": 43207 }, { "epoch": 0.751064680422048, "grad_norm": 1.7033115254988886, "learning_rate": 1.5390144311861854e-07, "loss": 0.2886, "step": 43208 }, { "epoch": 0.7510820629595508, "grad_norm": 1.6617650808755473, "learning_rate": 1.5388112805157622e-07, "loss": 0.174, "step": 43209 }, { "epoch": 0.7510994454970537, "grad_norm": 2.3349394553524707, "learning_rate": 1.5386081408157175e-07, "loss": 0.2366, "step": 43210 }, { "epoch": 0.7511168280345565, "grad_norm": 1.141817097486119, "learning_rate": 1.5384050120866948e-07, "loss": 0.1491, "step": 43211 }, { "epoch": 0.7511342105720593, "grad_norm": 2.1242519411268033, "learning_rate": 1.538201894329337e-07, "loss": 0.1902, "step": 43212 }, { "epoch": 0.7511515931095621, "grad_norm": 1.934086336285371, "learning_rate": 1.5379987875442919e-07, "loss": 0.1728, "step": 43213 }, { "epoch": 0.751168975647065, "grad_norm": 1.3062502774574225, "learning_rate": 1.5377956917321982e-07, "loss": 0.282, "step": 43214 }, { "epoch": 0.7511863581845678, "grad_norm": 2.404883361295206, "learning_rate": 1.5375926068937024e-07, "loss": 0.2203, "step": 43215 }, { "epoch": 0.7512037407220706, "grad_norm": 1.0410780126601151, "learning_rate": 1.5373895330294458e-07, "loss": 0.161, "step": 43216 }, { "epoch": 0.7512211232595735, "grad_norm": 2.46270445996337, "learning_rate": 1.5371864701400756e-07, "loss": 0.1316, "step": 43217 }, { "epoch": 0.7512385057970763, "grad_norm": 1.501459989748458, "learning_rate": 1.536983418226233e-07, "loss": 0.2761, "step": 43218 }, { "epoch": 0.7512558883345791, "grad_norm": 2.2393596065603116, "learning_rate": 1.5367803772885624e-07, "loss": 0.2, "step": 43219 }, { "epoch": 0.751273270872082, "grad_norm": 1.4505555183546754, "learning_rate": 1.5365773473277072e-07, "loss": 0.407, "step": 43220 }, { "epoch": 0.7512906534095848, "grad_norm": 1.2388868661264785, "learning_rate": 1.536374328344311e-07, "loss": 0.1181, "step": 43221 }, { "epoch": 0.7513080359470875, "grad_norm": 1.0686984323058524, "learning_rate": 1.536171320339017e-07, "loss": 0.2443, "step": 43222 }, { "epoch": 0.7513254184845903, "grad_norm": 1.1787088214677093, "learning_rate": 1.5359683233124687e-07, "loss": 0.1904, "step": 43223 }, { "epoch": 0.7513428010220932, "grad_norm": 1.3619263032839615, "learning_rate": 1.5357653372653084e-07, "loss": 0.2434, "step": 43224 }, { "epoch": 0.751360183559596, "grad_norm": 1.4340697820808392, "learning_rate": 1.5355623621981816e-07, "loss": 0.1322, "step": 43225 }, { "epoch": 0.7513775660970988, "grad_norm": 1.2177748157382218, "learning_rate": 1.5353593981117325e-07, "loss": 0.1889, "step": 43226 }, { "epoch": 0.7513949486346017, "grad_norm": 1.5850914615694387, "learning_rate": 1.5351564450065996e-07, "loss": 0.1494, "step": 43227 }, { "epoch": 0.7514123311721045, "grad_norm": 2.101398323263011, "learning_rate": 1.5349535028834305e-07, "loss": 0.2759, "step": 43228 }, { "epoch": 0.7514297137096073, "grad_norm": 0.969443403392373, "learning_rate": 1.5347505717428673e-07, "loss": 0.2508, "step": 43229 }, { "epoch": 0.7514470962471101, "grad_norm": 1.1605300848248166, "learning_rate": 1.5345476515855526e-07, "loss": 0.3507, "step": 43230 }, { "epoch": 0.751464478784613, "grad_norm": 1.6968136585167657, "learning_rate": 1.5343447424121288e-07, "loss": 0.3474, "step": 43231 }, { "epoch": 0.7514818613221158, "grad_norm": 1.4377041602978047, "learning_rate": 1.5341418442232433e-07, "loss": 0.1359, "step": 43232 }, { "epoch": 0.7514992438596186, "grad_norm": 1.2774382939585083, "learning_rate": 1.5339389570195343e-07, "loss": 0.1951, "step": 43233 }, { "epoch": 0.7515166263971215, "grad_norm": 1.2835054132016925, "learning_rate": 1.5337360808016464e-07, "loss": 0.264, "step": 43234 }, { "epoch": 0.7515340089346243, "grad_norm": 0.9488727790842374, "learning_rate": 1.533533215570222e-07, "loss": 0.2908, "step": 43235 }, { "epoch": 0.7515513914721271, "grad_norm": 0.9740457384990132, "learning_rate": 1.5333303613259058e-07, "loss": 0.2168, "step": 43236 }, { "epoch": 0.75156877400963, "grad_norm": 1.8868153993561187, "learning_rate": 1.5331275180693402e-07, "loss": 0.183, "step": 43237 }, { "epoch": 0.7515861565471328, "grad_norm": 1.1365494206716493, "learning_rate": 1.532924685801168e-07, "loss": 0.2513, "step": 43238 }, { "epoch": 0.7516035390846356, "grad_norm": 1.0165824087258715, "learning_rate": 1.5327218645220313e-07, "loss": 0.1019, "step": 43239 }, { "epoch": 0.7516209216221384, "grad_norm": 1.7649919541885752, "learning_rate": 1.5325190542325744e-07, "loss": 0.2059, "step": 43240 }, { "epoch": 0.7516383041596413, "grad_norm": 1.227131099283587, "learning_rate": 1.5323162549334385e-07, "loss": 0.3323, "step": 43241 }, { "epoch": 0.751655686697144, "grad_norm": 1.7340487548550443, "learning_rate": 1.5321134666252677e-07, "loss": 0.1499, "step": 43242 }, { "epoch": 0.7516730692346468, "grad_norm": 1.808524535496844, "learning_rate": 1.531910689308702e-07, "loss": 0.1976, "step": 43243 }, { "epoch": 0.7516904517721497, "grad_norm": 1.110703375842245, "learning_rate": 1.5317079229843883e-07, "loss": 0.1408, "step": 43244 }, { "epoch": 0.7517078343096525, "grad_norm": 1.8351081001810732, "learning_rate": 1.531505167652969e-07, "loss": 0.2541, "step": 43245 }, { "epoch": 0.7517252168471553, "grad_norm": 1.4994915591975575, "learning_rate": 1.5313024233150818e-07, "loss": 0.1415, "step": 43246 }, { "epoch": 0.7517425993846582, "grad_norm": 1.5614318991558334, "learning_rate": 1.5310996899713742e-07, "loss": 0.3185, "step": 43247 }, { "epoch": 0.751759981922161, "grad_norm": 1.1750998797627457, "learning_rate": 1.5308969676224864e-07, "loss": 0.2274, "step": 43248 }, { "epoch": 0.7517773644596638, "grad_norm": 1.837077690324342, "learning_rate": 1.5306942562690622e-07, "loss": 0.1723, "step": 43249 }, { "epoch": 0.7517947469971666, "grad_norm": 1.4135855203209442, "learning_rate": 1.5304915559117438e-07, "loss": 0.2317, "step": 43250 }, { "epoch": 0.7518121295346695, "grad_norm": 1.6666638069571806, "learning_rate": 1.5302888665511727e-07, "loss": 0.2253, "step": 43251 }, { "epoch": 0.7518295120721723, "grad_norm": 1.425110283523838, "learning_rate": 1.5300861881879923e-07, "loss": 0.2362, "step": 43252 }, { "epoch": 0.7518468946096751, "grad_norm": 1.3935156861018445, "learning_rate": 1.529883520822845e-07, "loss": 0.2403, "step": 43253 }, { "epoch": 0.751864277147178, "grad_norm": 2.0257563206510962, "learning_rate": 1.5296808644563713e-07, "loss": 0.2448, "step": 43254 }, { "epoch": 0.7518816596846808, "grad_norm": 1.6583694800557416, "learning_rate": 1.5294782190892163e-07, "loss": 0.1871, "step": 43255 }, { "epoch": 0.7518990422221836, "grad_norm": 2.591113585070246, "learning_rate": 1.5292755847220217e-07, "loss": 0.4406, "step": 43256 }, { "epoch": 0.7519164247596865, "grad_norm": 1.0590472643625717, "learning_rate": 1.5290729613554303e-07, "loss": 0.1503, "step": 43257 }, { "epoch": 0.7519338072971893, "grad_norm": 1.954831432110451, "learning_rate": 1.52887034899008e-07, "loss": 0.2018, "step": 43258 }, { "epoch": 0.7519511898346921, "grad_norm": 1.3599637951801604, "learning_rate": 1.528667747626618e-07, "loss": 0.3928, "step": 43259 }, { "epoch": 0.7519685723721949, "grad_norm": 1.4079985525013103, "learning_rate": 1.5284651572656848e-07, "loss": 0.2137, "step": 43260 }, { "epoch": 0.7519859549096977, "grad_norm": 1.5327090574360631, "learning_rate": 1.5282625779079224e-07, "loss": 0.2062, "step": 43261 }, { "epoch": 0.7520033374472005, "grad_norm": 2.593917500772024, "learning_rate": 1.5280600095539707e-07, "loss": 0.401, "step": 43262 }, { "epoch": 0.7520207199847033, "grad_norm": 1.2279020671128213, "learning_rate": 1.5278574522044758e-07, "loss": 0.239, "step": 43263 }, { "epoch": 0.7520381025222062, "grad_norm": 1.6748315409640568, "learning_rate": 1.527654905860079e-07, "loss": 0.1743, "step": 43264 }, { "epoch": 0.752055485059709, "grad_norm": 1.0984180051998118, "learning_rate": 1.5274523705214205e-07, "loss": 0.1711, "step": 43265 }, { "epoch": 0.7520728675972118, "grad_norm": 3.5110461582117787, "learning_rate": 1.5272498461891404e-07, "loss": 0.15, "step": 43266 }, { "epoch": 0.7520902501347146, "grad_norm": 1.9322915929883089, "learning_rate": 1.527047332863885e-07, "loss": 0.1846, "step": 43267 }, { "epoch": 0.7521076326722175, "grad_norm": 1.5309753992219437, "learning_rate": 1.526844830546294e-07, "loss": 0.2871, "step": 43268 }, { "epoch": 0.7521250152097203, "grad_norm": 2.065876276994558, "learning_rate": 1.526642339237009e-07, "loss": 0.2725, "step": 43269 }, { "epoch": 0.7521423977472231, "grad_norm": 2.566924783416583, "learning_rate": 1.5264398589366727e-07, "loss": 0.2498, "step": 43270 }, { "epoch": 0.752159780284726, "grad_norm": 2.09529948927874, "learning_rate": 1.5262373896459268e-07, "loss": 0.1949, "step": 43271 }, { "epoch": 0.7521771628222288, "grad_norm": 1.464487988473024, "learning_rate": 1.5260349313654126e-07, "loss": 0.1663, "step": 43272 }, { "epoch": 0.7521945453597316, "grad_norm": 1.5216265171624361, "learning_rate": 1.5258324840957715e-07, "loss": 0.2065, "step": 43273 }, { "epoch": 0.7522119278972345, "grad_norm": 1.535878645241385, "learning_rate": 1.5256300478376438e-07, "loss": 0.2009, "step": 43274 }, { "epoch": 0.7522293104347373, "grad_norm": 0.8053548191743898, "learning_rate": 1.5254276225916747e-07, "loss": 0.2954, "step": 43275 }, { "epoch": 0.7522466929722401, "grad_norm": 2.058087417587243, "learning_rate": 1.5252252083585055e-07, "loss": 0.2241, "step": 43276 }, { "epoch": 0.752264075509743, "grad_norm": 1.7021103537598703, "learning_rate": 1.5250228051387732e-07, "loss": 0.2412, "step": 43277 }, { "epoch": 0.7522814580472458, "grad_norm": 1.1551019085680685, "learning_rate": 1.524820412933124e-07, "loss": 0.1585, "step": 43278 }, { "epoch": 0.7522988405847486, "grad_norm": 1.0097260236363346, "learning_rate": 1.524618031742197e-07, "loss": 0.1486, "step": 43279 }, { "epoch": 0.7523162231222514, "grad_norm": 1.570814709742064, "learning_rate": 1.5244156615666348e-07, "loss": 0.1527, "step": 43280 }, { "epoch": 0.7523336056597542, "grad_norm": 1.756800485635737, "learning_rate": 1.5242133024070786e-07, "loss": 0.3864, "step": 43281 }, { "epoch": 0.752350988197257, "grad_norm": 1.774739152368003, "learning_rate": 1.5240109542641676e-07, "loss": 0.2337, "step": 43282 }, { "epoch": 0.7523683707347598, "grad_norm": 1.7082116738100794, "learning_rate": 1.523808617138549e-07, "loss": 0.1936, "step": 43283 }, { "epoch": 0.7523857532722626, "grad_norm": 1.2450968595322949, "learning_rate": 1.523606291030858e-07, "loss": 0.3089, "step": 43284 }, { "epoch": 0.7524031358097655, "grad_norm": 1.5620759032096565, "learning_rate": 1.5234039759417366e-07, "loss": 0.1573, "step": 43285 }, { "epoch": 0.7524205183472683, "grad_norm": 1.131246932516181, "learning_rate": 1.5232016718718293e-07, "loss": 0.3638, "step": 43286 }, { "epoch": 0.7524379008847711, "grad_norm": 1.2463233954093202, "learning_rate": 1.522999378821776e-07, "loss": 0.1725, "step": 43287 }, { "epoch": 0.752455283422274, "grad_norm": 1.2627894621765547, "learning_rate": 1.522797096792217e-07, "loss": 0.2037, "step": 43288 }, { "epoch": 0.7524726659597768, "grad_norm": 1.452916315244273, "learning_rate": 1.522594825783794e-07, "loss": 0.1566, "step": 43289 }, { "epoch": 0.7524900484972796, "grad_norm": 1.2084950590954706, "learning_rate": 1.5223925657971482e-07, "loss": 0.2054, "step": 43290 }, { "epoch": 0.7525074310347825, "grad_norm": 1.7559999105525002, "learning_rate": 1.5221903168329203e-07, "loss": 0.2848, "step": 43291 }, { "epoch": 0.7525248135722853, "grad_norm": 1.5273886420674911, "learning_rate": 1.5219880788917515e-07, "loss": 0.1985, "step": 43292 }, { "epoch": 0.7525421961097881, "grad_norm": 2.525677892814988, "learning_rate": 1.521785851974281e-07, "loss": 0.2467, "step": 43293 }, { "epoch": 0.752559578647291, "grad_norm": 1.5691996469377956, "learning_rate": 1.5215836360811534e-07, "loss": 0.2375, "step": 43294 }, { "epoch": 0.7525769611847938, "grad_norm": 1.0991613908672424, "learning_rate": 1.5213814312130097e-07, "loss": 0.2189, "step": 43295 }, { "epoch": 0.7525943437222966, "grad_norm": 2.575208527965957, "learning_rate": 1.5211792373704868e-07, "loss": 0.2316, "step": 43296 }, { "epoch": 0.7526117262597994, "grad_norm": 1.5720288440412875, "learning_rate": 1.5209770545542267e-07, "loss": 0.2239, "step": 43297 }, { "epoch": 0.7526291087973023, "grad_norm": 1.7954269528961688, "learning_rate": 1.5207748827648727e-07, "loss": 0.1877, "step": 43298 }, { "epoch": 0.7526464913348051, "grad_norm": 2.1917417210927232, "learning_rate": 1.5205727220030633e-07, "loss": 0.1631, "step": 43299 }, { "epoch": 0.7526638738723079, "grad_norm": 1.899515676612297, "learning_rate": 1.520370572269441e-07, "loss": 0.3061, "step": 43300 }, { "epoch": 0.7526812564098107, "grad_norm": 0.6848602657895564, "learning_rate": 1.520168433564643e-07, "loss": 0.256, "step": 43301 }, { "epoch": 0.7526986389473135, "grad_norm": 1.233363590911605, "learning_rate": 1.5199663058893165e-07, "loss": 0.2439, "step": 43302 }, { "epoch": 0.7527160214848163, "grad_norm": 1.2922039170965833, "learning_rate": 1.5197641892440966e-07, "loss": 0.2722, "step": 43303 }, { "epoch": 0.7527334040223191, "grad_norm": 1.3979483460339044, "learning_rate": 1.519562083629625e-07, "loss": 0.3545, "step": 43304 }, { "epoch": 0.752750786559822, "grad_norm": 1.4582571224109282, "learning_rate": 1.519359989046542e-07, "loss": 0.1818, "step": 43305 }, { "epoch": 0.7527681690973248, "grad_norm": 1.6536760259164998, "learning_rate": 1.51915790549549e-07, "loss": 0.2472, "step": 43306 }, { "epoch": 0.7527855516348276, "grad_norm": 1.5116306392299006, "learning_rate": 1.518955832977109e-07, "loss": 0.2077, "step": 43307 }, { "epoch": 0.7528029341723305, "grad_norm": 2.2223586081267475, "learning_rate": 1.5187537714920384e-07, "loss": 0.3374, "step": 43308 }, { "epoch": 0.7528203167098333, "grad_norm": 2.895781162446039, "learning_rate": 1.5185517210409193e-07, "loss": 0.221, "step": 43309 }, { "epoch": 0.7528376992473361, "grad_norm": 2.2185929193365768, "learning_rate": 1.5183496816243924e-07, "loss": 0.1989, "step": 43310 }, { "epoch": 0.752855081784839, "grad_norm": 2.1029817883136426, "learning_rate": 1.518147653243097e-07, "loss": 0.1496, "step": 43311 }, { "epoch": 0.7528724643223418, "grad_norm": 1.693932642846885, "learning_rate": 1.5179456358976744e-07, "loss": 0.1776, "step": 43312 }, { "epoch": 0.7528898468598446, "grad_norm": 1.5401055847759793, "learning_rate": 1.517743629588763e-07, "loss": 0.2715, "step": 43313 }, { "epoch": 0.7529072293973474, "grad_norm": 2.193474132345772, "learning_rate": 1.5175416343170083e-07, "loss": 0.208, "step": 43314 }, { "epoch": 0.7529246119348503, "grad_norm": 1.1474028705583574, "learning_rate": 1.5173396500830448e-07, "loss": 0.1928, "step": 43315 }, { "epoch": 0.7529419944723531, "grad_norm": 1.563017040133493, "learning_rate": 1.5171376768875132e-07, "loss": 0.2383, "step": 43316 }, { "epoch": 0.7529593770098559, "grad_norm": 1.7351577835779497, "learning_rate": 1.516935714731057e-07, "loss": 0.1729, "step": 43317 }, { "epoch": 0.7529767595473588, "grad_norm": 4.123364719214853, "learning_rate": 1.5167337636143147e-07, "loss": 0.25, "step": 43318 }, { "epoch": 0.7529941420848616, "grad_norm": 1.2487565858977527, "learning_rate": 1.516531823537926e-07, "loss": 0.1558, "step": 43319 }, { "epoch": 0.7530115246223644, "grad_norm": 0.8283600357127198, "learning_rate": 1.51632989450253e-07, "loss": 0.1784, "step": 43320 }, { "epoch": 0.7530289071598671, "grad_norm": 1.300580270333284, "learning_rate": 1.5161279765087708e-07, "loss": 0.2551, "step": 43321 }, { "epoch": 0.75304628969737, "grad_norm": 0.9731865847133446, "learning_rate": 1.5159260695572845e-07, "loss": 0.1113, "step": 43322 }, { "epoch": 0.7530636722348728, "grad_norm": 1.9488282166055737, "learning_rate": 1.5157241736487115e-07, "loss": 0.3178, "step": 43323 }, { "epoch": 0.7530810547723756, "grad_norm": 1.4669376720454101, "learning_rate": 1.515522288783691e-07, "loss": 0.3361, "step": 43324 }, { "epoch": 0.7530984373098785, "grad_norm": 1.3038391287881037, "learning_rate": 1.515320414962866e-07, "loss": 0.2225, "step": 43325 }, { "epoch": 0.7531158198473813, "grad_norm": 0.8874065497675048, "learning_rate": 1.5151185521868743e-07, "loss": 0.1512, "step": 43326 }, { "epoch": 0.7531332023848841, "grad_norm": 1.6492336599355366, "learning_rate": 1.5149167004563563e-07, "loss": 0.0915, "step": 43327 }, { "epoch": 0.753150584922387, "grad_norm": 4.295985180362484, "learning_rate": 1.514714859771951e-07, "loss": 0.3225, "step": 43328 }, { "epoch": 0.7531679674598898, "grad_norm": 1.400119706993011, "learning_rate": 1.5145130301342985e-07, "loss": 0.2273, "step": 43329 }, { "epoch": 0.7531853499973926, "grad_norm": 1.6905961424240101, "learning_rate": 1.5143112115440388e-07, "loss": 0.2339, "step": 43330 }, { "epoch": 0.7532027325348954, "grad_norm": 2.152357587096467, "learning_rate": 1.5141094040018117e-07, "loss": 0.2038, "step": 43331 }, { "epoch": 0.7532201150723983, "grad_norm": 1.3675059477578078, "learning_rate": 1.5139076075082547e-07, "loss": 0.1603, "step": 43332 }, { "epoch": 0.7532374976099011, "grad_norm": 1.7123311004354487, "learning_rate": 1.513705822064012e-07, "loss": 0.1847, "step": 43333 }, { "epoch": 0.7532548801474039, "grad_norm": 0.9424928301771645, "learning_rate": 1.5135040476697192e-07, "loss": 0.2897, "step": 43334 }, { "epoch": 0.7532722626849068, "grad_norm": 1.9021034099992185, "learning_rate": 1.513302284326015e-07, "loss": 0.2761, "step": 43335 }, { "epoch": 0.7532896452224096, "grad_norm": 2.13200242815053, "learning_rate": 1.5131005320335432e-07, "loss": 0.1766, "step": 43336 }, { "epoch": 0.7533070277599124, "grad_norm": 1.2839891986322416, "learning_rate": 1.5128987907929402e-07, "loss": 0.1878, "step": 43337 }, { "epoch": 0.7533244102974153, "grad_norm": 1.2832085206363844, "learning_rate": 1.5126970606048466e-07, "loss": 0.1306, "step": 43338 }, { "epoch": 0.7533417928349181, "grad_norm": 1.1164747567418745, "learning_rate": 1.5124953414699017e-07, "loss": 0.1713, "step": 43339 }, { "epoch": 0.7533591753724209, "grad_norm": 1.5434569309460988, "learning_rate": 1.5122936333887438e-07, "loss": 0.1426, "step": 43340 }, { "epoch": 0.7533765579099236, "grad_norm": 0.8974489369177594, "learning_rate": 1.5120919363620134e-07, "loss": 0.1863, "step": 43341 }, { "epoch": 0.7533939404474265, "grad_norm": 3.540178275857075, "learning_rate": 1.511890250390349e-07, "loss": 0.3136, "step": 43342 }, { "epoch": 0.7534113229849293, "grad_norm": 1.347135006497693, "learning_rate": 1.511688575474389e-07, "loss": 0.1948, "step": 43343 }, { "epoch": 0.7534287055224321, "grad_norm": 1.3739519957174633, "learning_rate": 1.5114869116147756e-07, "loss": 0.2671, "step": 43344 }, { "epoch": 0.753446088059935, "grad_norm": 2.150065765355251, "learning_rate": 1.5112852588121457e-07, "loss": 0.3868, "step": 43345 }, { "epoch": 0.7534634705974378, "grad_norm": 1.094595736210387, "learning_rate": 1.511083617067141e-07, "loss": 0.1985, "step": 43346 }, { "epoch": 0.7534808531349406, "grad_norm": 1.8086795001360343, "learning_rate": 1.5108819863803952e-07, "loss": 0.229, "step": 43347 }, { "epoch": 0.7534982356724435, "grad_norm": 2.7451787755087946, "learning_rate": 1.510680366752552e-07, "loss": 0.2732, "step": 43348 }, { "epoch": 0.7535156182099463, "grad_norm": 1.8419814694836696, "learning_rate": 1.5104787581842493e-07, "loss": 0.1912, "step": 43349 }, { "epoch": 0.7535330007474491, "grad_norm": 1.2630324819151983, "learning_rate": 1.510277160676126e-07, "loss": 0.1667, "step": 43350 }, { "epoch": 0.7535503832849519, "grad_norm": 1.8107414502383428, "learning_rate": 1.5100755742288197e-07, "loss": 0.7055, "step": 43351 }, { "epoch": 0.7535677658224548, "grad_norm": 0.96835253222646, "learning_rate": 1.5098739988429736e-07, "loss": 0.1381, "step": 43352 }, { "epoch": 0.7535851483599576, "grad_norm": 1.667277170125239, "learning_rate": 1.5096724345192224e-07, "loss": 0.1691, "step": 43353 }, { "epoch": 0.7536025308974604, "grad_norm": 1.254384912210219, "learning_rate": 1.5094708812582059e-07, "loss": 0.2836, "step": 43354 }, { "epoch": 0.7536199134349633, "grad_norm": 1.4740525651793697, "learning_rate": 1.509269339060562e-07, "loss": 0.2042, "step": 43355 }, { "epoch": 0.7536372959724661, "grad_norm": 1.8586532578650454, "learning_rate": 1.509067807926932e-07, "loss": 0.3138, "step": 43356 }, { "epoch": 0.7536546785099689, "grad_norm": 1.5880887085395528, "learning_rate": 1.508866287857954e-07, "loss": 0.2154, "step": 43357 }, { "epoch": 0.7536720610474718, "grad_norm": 1.392391228235196, "learning_rate": 1.5086647788542655e-07, "loss": 0.2474, "step": 43358 }, { "epoch": 0.7536894435849746, "grad_norm": 1.0554406141695072, "learning_rate": 1.5084632809165066e-07, "loss": 0.1512, "step": 43359 }, { "epoch": 0.7537068261224774, "grad_norm": 1.5603037624841494, "learning_rate": 1.5082617940453147e-07, "loss": 0.1991, "step": 43360 }, { "epoch": 0.7537242086599801, "grad_norm": 1.0603005881276193, "learning_rate": 1.5080603182413292e-07, "loss": 0.1661, "step": 43361 }, { "epoch": 0.753741591197483, "grad_norm": 1.5310774440704662, "learning_rate": 1.5078588535051885e-07, "loss": 0.227, "step": 43362 }, { "epoch": 0.7537589737349858, "grad_norm": 1.2495994819811849, "learning_rate": 1.5076573998375293e-07, "loss": 0.3357, "step": 43363 }, { "epoch": 0.7537763562724886, "grad_norm": 1.4970823266520357, "learning_rate": 1.5074559572389932e-07, "loss": 0.1777, "step": 43364 }, { "epoch": 0.7537937388099915, "grad_norm": 1.5928291374501908, "learning_rate": 1.5072545257102193e-07, "loss": 0.1873, "step": 43365 }, { "epoch": 0.7538111213474943, "grad_norm": 1.2048166810141967, "learning_rate": 1.507053105251841e-07, "loss": 0.3651, "step": 43366 }, { "epoch": 0.7538285038849971, "grad_norm": 1.5813943108638302, "learning_rate": 1.5068516958645012e-07, "loss": 0.3018, "step": 43367 }, { "epoch": 0.7538458864225, "grad_norm": 2.8840080018320746, "learning_rate": 1.5066502975488366e-07, "loss": 0.2933, "step": 43368 }, { "epoch": 0.7538632689600028, "grad_norm": 1.2169978215828445, "learning_rate": 1.5064489103054863e-07, "loss": 0.1547, "step": 43369 }, { "epoch": 0.7538806514975056, "grad_norm": 2.1940919573617093, "learning_rate": 1.5062475341350882e-07, "loss": 0.2698, "step": 43370 }, { "epoch": 0.7538980340350084, "grad_norm": 1.6401680421397078, "learning_rate": 1.5060461690382804e-07, "loss": 0.3028, "step": 43371 }, { "epoch": 0.7539154165725113, "grad_norm": 1.2053875035998927, "learning_rate": 1.505844815015701e-07, "loss": 0.2294, "step": 43372 }, { "epoch": 0.7539327991100141, "grad_norm": 2.131035237300726, "learning_rate": 1.505643472067989e-07, "loss": 0.2497, "step": 43373 }, { "epoch": 0.7539501816475169, "grad_norm": 1.4672825656137043, "learning_rate": 1.50544214019578e-07, "loss": 0.1455, "step": 43374 }, { "epoch": 0.7539675641850198, "grad_norm": 3.755690712560781, "learning_rate": 1.5052408193997158e-07, "loss": 0.2235, "step": 43375 }, { "epoch": 0.7539849467225226, "grad_norm": 0.957097881332369, "learning_rate": 1.505039509680433e-07, "loss": 0.151, "step": 43376 }, { "epoch": 0.7540023292600254, "grad_norm": 1.2541943623599705, "learning_rate": 1.504838211038571e-07, "loss": 0.272, "step": 43377 }, { "epoch": 0.7540197117975282, "grad_norm": 1.4287463990378515, "learning_rate": 1.504636923474763e-07, "loss": 0.313, "step": 43378 }, { "epoch": 0.7540370943350311, "grad_norm": 2.148799721777568, "learning_rate": 1.5044356469896524e-07, "loss": 0.2719, "step": 43379 }, { "epoch": 0.7540544768725339, "grad_norm": 1.2009388135449814, "learning_rate": 1.5042343815838748e-07, "loss": 0.2295, "step": 43380 }, { "epoch": 0.7540718594100366, "grad_norm": 1.3047582256476224, "learning_rate": 1.504033127258068e-07, "loss": 0.2623, "step": 43381 }, { "epoch": 0.7540892419475395, "grad_norm": 1.3746381749141372, "learning_rate": 1.5038318840128689e-07, "loss": 0.2853, "step": 43382 }, { "epoch": 0.7541066244850423, "grad_norm": 1.6060657017713993, "learning_rate": 1.5036306518489184e-07, "loss": 0.1439, "step": 43383 }, { "epoch": 0.7541240070225451, "grad_norm": 1.529454358378725, "learning_rate": 1.5034294307668543e-07, "loss": 0.2885, "step": 43384 }, { "epoch": 0.754141389560048, "grad_norm": 8.195747958056545, "learning_rate": 1.5032282207673113e-07, "loss": 0.3796, "step": 43385 }, { "epoch": 0.7541587720975508, "grad_norm": 9.485843762967141, "learning_rate": 1.5030270218509266e-07, "loss": 0.2207, "step": 43386 }, { "epoch": 0.7541761546350536, "grad_norm": 1.2860047603533238, "learning_rate": 1.5028258340183414e-07, "loss": 0.123, "step": 43387 }, { "epoch": 0.7541935371725564, "grad_norm": 1.2352819240239812, "learning_rate": 1.5026246572701922e-07, "loss": 0.148, "step": 43388 }, { "epoch": 0.7542109197100593, "grad_norm": 1.8415671677450371, "learning_rate": 1.502423491607116e-07, "loss": 0.1856, "step": 43389 }, { "epoch": 0.7542283022475621, "grad_norm": 1.0105621312665445, "learning_rate": 1.502222337029751e-07, "loss": 0.22, "step": 43390 }, { "epoch": 0.7542456847850649, "grad_norm": 1.7072176395929979, "learning_rate": 1.502021193538734e-07, "loss": 0.1474, "step": 43391 }, { "epoch": 0.7542630673225678, "grad_norm": 1.5293315508309955, "learning_rate": 1.5018200611347032e-07, "loss": 0.1815, "step": 43392 }, { "epoch": 0.7542804498600706, "grad_norm": 0.9539925651460778, "learning_rate": 1.501618939818296e-07, "loss": 0.254, "step": 43393 }, { "epoch": 0.7542978323975734, "grad_norm": 1.2094998576575409, "learning_rate": 1.5014178295901475e-07, "loss": 0.1848, "step": 43394 }, { "epoch": 0.7543152149350763, "grad_norm": 1.473545071921137, "learning_rate": 1.5012167304509e-07, "loss": 0.1711, "step": 43395 }, { "epoch": 0.7543325974725791, "grad_norm": 1.9032750505752278, "learning_rate": 1.501015642401189e-07, "loss": 0.2779, "step": 43396 }, { "epoch": 0.7543499800100819, "grad_norm": 1.5214924937430423, "learning_rate": 1.5008145654416482e-07, "loss": 0.1696, "step": 43397 }, { "epoch": 0.7543673625475847, "grad_norm": 2.025748662939828, "learning_rate": 1.5006134995729192e-07, "loss": 0.2344, "step": 43398 }, { "epoch": 0.7543847450850876, "grad_norm": 0.9268328056501127, "learning_rate": 1.5004124447956385e-07, "loss": 0.2154, "step": 43399 }, { "epoch": 0.7544021276225903, "grad_norm": 1.024499848338874, "learning_rate": 1.5002114011104421e-07, "loss": 0.2336, "step": 43400 }, { "epoch": 0.7544195101600931, "grad_norm": 1.6969329548157734, "learning_rate": 1.500010368517968e-07, "loss": 0.306, "step": 43401 }, { "epoch": 0.754436892697596, "grad_norm": 0.5142004303203875, "learning_rate": 1.4998093470188522e-07, "loss": 0.1595, "step": 43402 }, { "epoch": 0.7544542752350988, "grad_norm": 1.2898330470463584, "learning_rate": 1.4996083366137357e-07, "loss": 0.199, "step": 43403 }, { "epoch": 0.7544716577726016, "grad_norm": 1.354751609277882, "learning_rate": 1.4994073373032516e-07, "loss": 0.224, "step": 43404 }, { "epoch": 0.7544890403101044, "grad_norm": 1.385658443550244, "learning_rate": 1.4992063490880362e-07, "loss": 0.1986, "step": 43405 }, { "epoch": 0.7545064228476073, "grad_norm": 1.838429343815551, "learning_rate": 1.49900537196873e-07, "loss": 0.2743, "step": 43406 }, { "epoch": 0.7545238053851101, "grad_norm": 0.818790438750725, "learning_rate": 1.4988044059459693e-07, "loss": 0.2471, "step": 43407 }, { "epoch": 0.7545411879226129, "grad_norm": 1.7061326708801527, "learning_rate": 1.49860345102039e-07, "loss": 0.3084, "step": 43408 }, { "epoch": 0.7545585704601158, "grad_norm": 1.0588600715384109, "learning_rate": 1.4984025071926288e-07, "loss": 0.1172, "step": 43409 }, { "epoch": 0.7545759529976186, "grad_norm": 1.0714285633988663, "learning_rate": 1.4982015744633235e-07, "loss": 0.2834, "step": 43410 }, { "epoch": 0.7545933355351214, "grad_norm": 1.2996320646455648, "learning_rate": 1.4980006528331108e-07, "loss": 0.3725, "step": 43411 }, { "epoch": 0.7546107180726243, "grad_norm": 1.882071970840322, "learning_rate": 1.4977997423026272e-07, "loss": 0.2706, "step": 43412 }, { "epoch": 0.7546281006101271, "grad_norm": 1.6209089597591413, "learning_rate": 1.4975988428725078e-07, "loss": 0.2664, "step": 43413 }, { "epoch": 0.7546454831476299, "grad_norm": 1.834307415063895, "learning_rate": 1.497397954543393e-07, "loss": 0.1938, "step": 43414 }, { "epoch": 0.7546628656851327, "grad_norm": 2.2267538341043736, "learning_rate": 1.4971970773159193e-07, "loss": 0.2679, "step": 43415 }, { "epoch": 0.7546802482226356, "grad_norm": 1.211722331534201, "learning_rate": 1.4969962111907203e-07, "loss": 0.1361, "step": 43416 }, { "epoch": 0.7546976307601384, "grad_norm": 1.260752636850141, "learning_rate": 1.4967953561684326e-07, "loss": 0.2212, "step": 43417 }, { "epoch": 0.7547150132976412, "grad_norm": 3.266068898016593, "learning_rate": 1.496594512249696e-07, "loss": 0.2333, "step": 43418 }, { "epoch": 0.7547323958351441, "grad_norm": 1.512677037409589, "learning_rate": 1.4963936794351451e-07, "loss": 0.3166, "step": 43419 }, { "epoch": 0.7547497783726468, "grad_norm": 1.7100641883024827, "learning_rate": 1.4961928577254173e-07, "loss": 0.2158, "step": 43420 }, { "epoch": 0.7547671609101496, "grad_norm": 1.9249323765888873, "learning_rate": 1.4959920471211467e-07, "loss": 0.2421, "step": 43421 }, { "epoch": 0.7547845434476524, "grad_norm": 1.8145582598670067, "learning_rate": 1.495791247622975e-07, "loss": 0.2064, "step": 43422 }, { "epoch": 0.7548019259851553, "grad_norm": 1.1383772717316452, "learning_rate": 1.495590459231534e-07, "loss": 0.2301, "step": 43423 }, { "epoch": 0.7548193085226581, "grad_norm": 3.3551468575291135, "learning_rate": 1.4953896819474594e-07, "loss": 0.3718, "step": 43424 }, { "epoch": 0.7548366910601609, "grad_norm": 2.1887863892099357, "learning_rate": 1.4951889157713914e-07, "loss": 0.1307, "step": 43425 }, { "epoch": 0.7548540735976638, "grad_norm": 2.1713286722937326, "learning_rate": 1.4949881607039643e-07, "loss": 0.2212, "step": 43426 }, { "epoch": 0.7548714561351666, "grad_norm": 1.360461868708528, "learning_rate": 1.494787416745815e-07, "loss": 0.4071, "step": 43427 }, { "epoch": 0.7548888386726694, "grad_norm": 1.4947895139529033, "learning_rate": 1.4945866838975795e-07, "loss": 0.2638, "step": 43428 }, { "epoch": 0.7549062212101723, "grad_norm": 0.9348778545170109, "learning_rate": 1.4943859621598936e-07, "loss": 0.1764, "step": 43429 }, { "epoch": 0.7549236037476751, "grad_norm": 1.5824015374929457, "learning_rate": 1.4941852515333943e-07, "loss": 0.2215, "step": 43430 }, { "epoch": 0.7549409862851779, "grad_norm": 1.004528068769089, "learning_rate": 1.4939845520187173e-07, "loss": 0.1195, "step": 43431 }, { "epoch": 0.7549583688226807, "grad_norm": 0.887112130413299, "learning_rate": 1.493783863616497e-07, "loss": 0.278, "step": 43432 }, { "epoch": 0.7549757513601836, "grad_norm": 1.173415790471589, "learning_rate": 1.4935831863273723e-07, "loss": 0.1379, "step": 43433 }, { "epoch": 0.7549931338976864, "grad_norm": 1.7415449706336203, "learning_rate": 1.4933825201519802e-07, "loss": 0.2609, "step": 43434 }, { "epoch": 0.7550105164351892, "grad_norm": 1.4517916725258961, "learning_rate": 1.4931818650909534e-07, "loss": 0.1452, "step": 43435 }, { "epoch": 0.7550278989726921, "grad_norm": 1.415224490507358, "learning_rate": 1.492981221144927e-07, "loss": 0.2293, "step": 43436 }, { "epoch": 0.7550452815101949, "grad_norm": 3.416682321221382, "learning_rate": 1.492780588314541e-07, "loss": 0.2324, "step": 43437 }, { "epoch": 0.7550626640476977, "grad_norm": 1.4379639040291332, "learning_rate": 1.4925799666004297e-07, "loss": 0.2073, "step": 43438 }, { "epoch": 0.7550800465852006, "grad_norm": 3.3163644199972024, "learning_rate": 1.4923793560032283e-07, "loss": 0.2139, "step": 43439 }, { "epoch": 0.7550974291227033, "grad_norm": 2.0937554286400064, "learning_rate": 1.4921787565235722e-07, "loss": 0.2286, "step": 43440 }, { "epoch": 0.7551148116602061, "grad_norm": 2.6067035792008215, "learning_rate": 1.4919781681621008e-07, "loss": 0.2827, "step": 43441 }, { "epoch": 0.7551321941977089, "grad_norm": 1.2660990091857611, "learning_rate": 1.4917775909194451e-07, "loss": 0.3097, "step": 43442 }, { "epoch": 0.7551495767352118, "grad_norm": 1.778504239401476, "learning_rate": 1.4915770247962433e-07, "loss": 0.2237, "step": 43443 }, { "epoch": 0.7551669592727146, "grad_norm": 1.5124360047865208, "learning_rate": 1.4913764697931293e-07, "loss": 0.2323, "step": 43444 }, { "epoch": 0.7551843418102174, "grad_norm": 1.4242092511195212, "learning_rate": 1.4911759259107415e-07, "loss": 0.1689, "step": 43445 }, { "epoch": 0.7552017243477203, "grad_norm": 1.8147292273638842, "learning_rate": 1.4909753931497142e-07, "loss": 0.191, "step": 43446 }, { "epoch": 0.7552191068852231, "grad_norm": 1.732762525486086, "learning_rate": 1.4907748715106832e-07, "loss": 0.2385, "step": 43447 }, { "epoch": 0.7552364894227259, "grad_norm": 1.7912536536788481, "learning_rate": 1.4905743609942838e-07, "loss": 0.2066, "step": 43448 }, { "epoch": 0.7552538719602288, "grad_norm": 1.3824905563014023, "learning_rate": 1.4903738616011512e-07, "loss": 0.2793, "step": 43449 }, { "epoch": 0.7552712544977316, "grad_norm": 1.3318659907055144, "learning_rate": 1.4901733733319216e-07, "loss": 0.2189, "step": 43450 }, { "epoch": 0.7552886370352344, "grad_norm": 1.4176488009565278, "learning_rate": 1.4899728961872304e-07, "loss": 0.324, "step": 43451 }, { "epoch": 0.7553060195727372, "grad_norm": 0.8847159077558118, "learning_rate": 1.4897724301677105e-07, "loss": 0.1799, "step": 43452 }, { "epoch": 0.7553234021102401, "grad_norm": 1.4587752169062917, "learning_rate": 1.4895719752740033e-07, "loss": 0.3175, "step": 43453 }, { "epoch": 0.7553407846477429, "grad_norm": 1.809123280024605, "learning_rate": 1.4893715315067385e-07, "loss": 0.1275, "step": 43454 }, { "epoch": 0.7553581671852457, "grad_norm": 2.833540390791417, "learning_rate": 1.4891710988665517e-07, "loss": 0.4096, "step": 43455 }, { "epoch": 0.7553755497227486, "grad_norm": 1.1991602248050037, "learning_rate": 1.4889706773540816e-07, "loss": 0.2388, "step": 43456 }, { "epoch": 0.7553929322602514, "grad_norm": 1.415673995200351, "learning_rate": 1.4887702669699615e-07, "loss": 0.2237, "step": 43457 }, { "epoch": 0.7554103147977542, "grad_norm": 0.954862379915836, "learning_rate": 1.488569867714826e-07, "loss": 0.2187, "step": 43458 }, { "epoch": 0.755427697335257, "grad_norm": 1.3579629476352888, "learning_rate": 1.488369479589312e-07, "loss": 0.4179, "step": 43459 }, { "epoch": 0.7554450798727598, "grad_norm": 1.2540915863201374, "learning_rate": 1.4881691025940534e-07, "loss": 0.191, "step": 43460 }, { "epoch": 0.7554624624102626, "grad_norm": 1.574383414933348, "learning_rate": 1.4879687367296857e-07, "loss": 0.2808, "step": 43461 }, { "epoch": 0.7554798449477654, "grad_norm": 0.7290580449033038, "learning_rate": 1.4877683819968434e-07, "loss": 0.3164, "step": 43462 }, { "epoch": 0.7554972274852683, "grad_norm": 1.234992671657463, "learning_rate": 1.4875680383961602e-07, "loss": 0.1599, "step": 43463 }, { "epoch": 0.7555146100227711, "grad_norm": 2.659631222885072, "learning_rate": 1.4873677059282746e-07, "loss": 0.2709, "step": 43464 }, { "epoch": 0.7555319925602739, "grad_norm": 2.83689577451876, "learning_rate": 1.48716738459382e-07, "loss": 0.166, "step": 43465 }, { "epoch": 0.7555493750977768, "grad_norm": 2.0042883499740856, "learning_rate": 1.4869670743934325e-07, "loss": 0.2095, "step": 43466 }, { "epoch": 0.7555667576352796, "grad_norm": 1.8453381588051878, "learning_rate": 1.486766775327743e-07, "loss": 0.2382, "step": 43467 }, { "epoch": 0.7555841401727824, "grad_norm": 0.9812094430975998, "learning_rate": 1.48656648739739e-07, "loss": 0.2013, "step": 43468 }, { "epoch": 0.7556015227102852, "grad_norm": 1.7280779445427146, "learning_rate": 1.4863662106030074e-07, "loss": 0.1816, "step": 43469 }, { "epoch": 0.7556189052477881, "grad_norm": 1.8333918080048408, "learning_rate": 1.4861659449452303e-07, "loss": 0.2652, "step": 43470 }, { "epoch": 0.7556362877852909, "grad_norm": 1.153125377229422, "learning_rate": 1.4859656904246908e-07, "loss": 0.2997, "step": 43471 }, { "epoch": 0.7556536703227937, "grad_norm": 1.2691435349992293, "learning_rate": 1.485765447042029e-07, "loss": 0.2525, "step": 43472 }, { "epoch": 0.7556710528602966, "grad_norm": 1.536075701496825, "learning_rate": 1.4855652147978753e-07, "loss": 0.261, "step": 43473 }, { "epoch": 0.7556884353977994, "grad_norm": 1.0456271850781074, "learning_rate": 1.4853649936928652e-07, "loss": 0.2137, "step": 43474 }, { "epoch": 0.7557058179353022, "grad_norm": 1.1510249828445343, "learning_rate": 1.485164783727632e-07, "loss": 0.3352, "step": 43475 }, { "epoch": 0.7557232004728051, "grad_norm": 1.619423819123241, "learning_rate": 1.4849645849028136e-07, "loss": 0.2276, "step": 43476 }, { "epoch": 0.7557405830103079, "grad_norm": 1.3874932482096123, "learning_rate": 1.4847643972190428e-07, "loss": 0.1245, "step": 43477 }, { "epoch": 0.7557579655478107, "grad_norm": 0.9336619390726443, "learning_rate": 1.484564220676954e-07, "loss": 0.1988, "step": 43478 }, { "epoch": 0.7557753480853135, "grad_norm": 0.9784983325467659, "learning_rate": 1.4843640552771818e-07, "loss": 0.1752, "step": 43479 }, { "epoch": 0.7557927306228163, "grad_norm": 2.380291664074255, "learning_rate": 1.4841639010203606e-07, "loss": 0.2296, "step": 43480 }, { "epoch": 0.7558101131603191, "grad_norm": 1.4270977890297027, "learning_rate": 1.4839637579071247e-07, "loss": 0.2743, "step": 43481 }, { "epoch": 0.7558274956978219, "grad_norm": 1.7270810396804568, "learning_rate": 1.4837636259381085e-07, "loss": 0.212, "step": 43482 }, { "epoch": 0.7558448782353248, "grad_norm": 1.063098413307679, "learning_rate": 1.483563505113945e-07, "loss": 0.2919, "step": 43483 }, { "epoch": 0.7558622607728276, "grad_norm": 1.5833596452175627, "learning_rate": 1.4833633954352714e-07, "loss": 0.2467, "step": 43484 }, { "epoch": 0.7558796433103304, "grad_norm": 1.251207703612363, "learning_rate": 1.4831632969027218e-07, "loss": 0.2019, "step": 43485 }, { "epoch": 0.7558970258478332, "grad_norm": 1.482022994492115, "learning_rate": 1.482963209516926e-07, "loss": 0.2069, "step": 43486 }, { "epoch": 0.7559144083853361, "grad_norm": 1.5686894867299397, "learning_rate": 1.4827631332785228e-07, "loss": 0.3017, "step": 43487 }, { "epoch": 0.7559317909228389, "grad_norm": 2.0510406421656566, "learning_rate": 1.4825630681881452e-07, "loss": 0.2222, "step": 43488 }, { "epoch": 0.7559491734603417, "grad_norm": 1.5014268002716225, "learning_rate": 1.4823630142464267e-07, "loss": 0.2385, "step": 43489 }, { "epoch": 0.7559665559978446, "grad_norm": 1.5984264372762962, "learning_rate": 1.4821629714540011e-07, "loss": 0.1998, "step": 43490 }, { "epoch": 0.7559839385353474, "grad_norm": 1.7497824661972683, "learning_rate": 1.481962939811503e-07, "loss": 0.2741, "step": 43491 }, { "epoch": 0.7560013210728502, "grad_norm": 1.0228650028426165, "learning_rate": 1.481762919319567e-07, "loss": 0.0791, "step": 43492 }, { "epoch": 0.7560187036103531, "grad_norm": 1.881080648776629, "learning_rate": 1.4815629099788257e-07, "loss": 0.1859, "step": 43493 }, { "epoch": 0.7560360861478559, "grad_norm": 1.836845335080449, "learning_rate": 1.4813629117899124e-07, "loss": 0.1606, "step": 43494 }, { "epoch": 0.7560534686853587, "grad_norm": 1.1885742242231854, "learning_rate": 1.481162924753464e-07, "loss": 0.369, "step": 43495 }, { "epoch": 0.7560708512228616, "grad_norm": 2.151762343643122, "learning_rate": 1.4809629488701125e-07, "loss": 0.2606, "step": 43496 }, { "epoch": 0.7560882337603644, "grad_norm": 1.3260380815778463, "learning_rate": 1.480762984140494e-07, "loss": 0.169, "step": 43497 }, { "epoch": 0.7561056162978672, "grad_norm": 1.3610705521200104, "learning_rate": 1.4805630305652367e-07, "loss": 0.1645, "step": 43498 }, { "epoch": 0.75612299883537, "grad_norm": 1.8840261551614146, "learning_rate": 1.480363088144979e-07, "loss": 0.1894, "step": 43499 }, { "epoch": 0.7561403813728728, "grad_norm": 0.9323398618190504, "learning_rate": 1.480163156880354e-07, "loss": 0.2333, "step": 43500 }, { "epoch": 0.7561577639103756, "grad_norm": 1.4764577767861458, "learning_rate": 1.479963236771995e-07, "loss": 0.13, "step": 43501 }, { "epoch": 0.7561751464478784, "grad_norm": 1.274405203879053, "learning_rate": 1.479763327820534e-07, "loss": 0.2512, "step": 43502 }, { "epoch": 0.7561925289853813, "grad_norm": 1.358957079079448, "learning_rate": 1.479563430026608e-07, "loss": 0.2109, "step": 43503 }, { "epoch": 0.7562099115228841, "grad_norm": 2.3841236050738006, "learning_rate": 1.4793635433908497e-07, "loss": 0.26, "step": 43504 }, { "epoch": 0.7562272940603869, "grad_norm": 1.4240351581028483, "learning_rate": 1.479163667913891e-07, "loss": 0.194, "step": 43505 }, { "epoch": 0.7562446765978897, "grad_norm": 1.2273595390377041, "learning_rate": 1.4789638035963636e-07, "loss": 0.2468, "step": 43506 }, { "epoch": 0.7562620591353926, "grad_norm": 1.9589610641248607, "learning_rate": 1.4787639504389054e-07, "loss": 0.213, "step": 43507 }, { "epoch": 0.7562794416728954, "grad_norm": 2.2668590614083897, "learning_rate": 1.4785641084421484e-07, "loss": 0.1805, "step": 43508 }, { "epoch": 0.7562968242103982, "grad_norm": 1.2510741544810975, "learning_rate": 1.478364277606725e-07, "loss": 0.1149, "step": 43509 }, { "epoch": 0.7563142067479011, "grad_norm": 1.4511098349333675, "learning_rate": 1.4781644579332696e-07, "loss": 0.275, "step": 43510 }, { "epoch": 0.7563315892854039, "grad_norm": 1.8583659794082519, "learning_rate": 1.4779646494224146e-07, "loss": 0.2505, "step": 43511 }, { "epoch": 0.7563489718229067, "grad_norm": 1.2558322310006251, "learning_rate": 1.4777648520747942e-07, "loss": 0.3257, "step": 43512 }, { "epoch": 0.7563663543604096, "grad_norm": 2.036869176018513, "learning_rate": 1.4775650658910392e-07, "loss": 0.2038, "step": 43513 }, { "epoch": 0.7563837368979124, "grad_norm": 1.624141067440473, "learning_rate": 1.477365290871787e-07, "loss": 0.1914, "step": 43514 }, { "epoch": 0.7564011194354152, "grad_norm": 1.6835764266894973, "learning_rate": 1.4771655270176686e-07, "loss": 0.2762, "step": 43515 }, { "epoch": 0.756418501972918, "grad_norm": 1.7403984053302781, "learning_rate": 1.4769657743293186e-07, "loss": 0.3716, "step": 43516 }, { "epoch": 0.7564358845104209, "grad_norm": 1.3779111908352204, "learning_rate": 1.4767660328073655e-07, "loss": 0.3482, "step": 43517 }, { "epoch": 0.7564532670479237, "grad_norm": 1.4628410143272035, "learning_rate": 1.4765663024524472e-07, "loss": 0.1849, "step": 43518 }, { "epoch": 0.7564706495854265, "grad_norm": 1.0033965552567987, "learning_rate": 1.476366583265195e-07, "loss": 0.1858, "step": 43519 }, { "epoch": 0.7564880321229293, "grad_norm": 1.742977738755232, "learning_rate": 1.4761668752462425e-07, "loss": 0.2604, "step": 43520 }, { "epoch": 0.7565054146604321, "grad_norm": 1.3192432050899938, "learning_rate": 1.4759671783962207e-07, "loss": 0.2516, "step": 43521 }, { "epoch": 0.7565227971979349, "grad_norm": 2.518396605799159, "learning_rate": 1.4757674927157654e-07, "loss": 0.252, "step": 43522 }, { "epoch": 0.7565401797354377, "grad_norm": 1.0316485527114756, "learning_rate": 1.4755678182055105e-07, "loss": 0.3016, "step": 43523 }, { "epoch": 0.7565575622729406, "grad_norm": 1.902659933865451, "learning_rate": 1.475368154866084e-07, "loss": 0.3288, "step": 43524 }, { "epoch": 0.7565749448104434, "grad_norm": 1.1687283503440045, "learning_rate": 1.4751685026981204e-07, "loss": 0.1454, "step": 43525 }, { "epoch": 0.7565923273479462, "grad_norm": 1.7505582886817888, "learning_rate": 1.4749688617022554e-07, "loss": 0.1573, "step": 43526 }, { "epoch": 0.7566097098854491, "grad_norm": 6.456896562854001, "learning_rate": 1.4747692318791193e-07, "loss": 0.3151, "step": 43527 }, { "epoch": 0.7566270924229519, "grad_norm": 1.9651508858774056, "learning_rate": 1.4745696132293455e-07, "loss": 0.314, "step": 43528 }, { "epoch": 0.7566444749604547, "grad_norm": 1.0912081364737338, "learning_rate": 1.4743700057535663e-07, "loss": 0.2228, "step": 43529 }, { "epoch": 0.7566618574979576, "grad_norm": 1.3056219843123666, "learning_rate": 1.4741704094524154e-07, "loss": 0.2527, "step": 43530 }, { "epoch": 0.7566792400354604, "grad_norm": 1.8381536136971734, "learning_rate": 1.4739708243265242e-07, "loss": 0.1357, "step": 43531 }, { "epoch": 0.7566966225729632, "grad_norm": 1.2653284782805532, "learning_rate": 1.4737712503765253e-07, "loss": 0.2371, "step": 43532 }, { "epoch": 0.756714005110466, "grad_norm": 1.8163766387524924, "learning_rate": 1.4735716876030507e-07, "loss": 0.2593, "step": 43533 }, { "epoch": 0.7567313876479689, "grad_norm": 1.0934656658127335, "learning_rate": 1.4733721360067357e-07, "loss": 0.1914, "step": 43534 }, { "epoch": 0.7567487701854717, "grad_norm": 1.4264315588295362, "learning_rate": 1.4731725955882123e-07, "loss": 0.2498, "step": 43535 }, { "epoch": 0.7567661527229745, "grad_norm": 1.5838699556154499, "learning_rate": 1.4729730663481088e-07, "loss": 0.1765, "step": 43536 }, { "epoch": 0.7567835352604774, "grad_norm": 2.528375532413099, "learning_rate": 1.472773548287062e-07, "loss": 0.1371, "step": 43537 }, { "epoch": 0.7568009177979802, "grad_norm": 1.982855077575965, "learning_rate": 1.472574041405702e-07, "loss": 0.2378, "step": 43538 }, { "epoch": 0.7568183003354829, "grad_norm": 1.571752708347985, "learning_rate": 1.4723745457046628e-07, "loss": 0.2627, "step": 43539 }, { "epoch": 0.7568356828729857, "grad_norm": 1.7217942775471022, "learning_rate": 1.4721750611845758e-07, "loss": 0.199, "step": 43540 }, { "epoch": 0.7568530654104886, "grad_norm": 1.5800381704504824, "learning_rate": 1.471975587846071e-07, "loss": 0.2935, "step": 43541 }, { "epoch": 0.7568704479479914, "grad_norm": 1.8005303892181155, "learning_rate": 1.471776125689787e-07, "loss": 0.3199, "step": 43542 }, { "epoch": 0.7568878304854942, "grad_norm": 1.1152031981042525, "learning_rate": 1.47157667471635e-07, "loss": 0.2395, "step": 43543 }, { "epoch": 0.7569052130229971, "grad_norm": 1.5125650817583098, "learning_rate": 1.4713772349263926e-07, "loss": 0.1998, "step": 43544 }, { "epoch": 0.7569225955604999, "grad_norm": 3.2547723936180994, "learning_rate": 1.4711778063205498e-07, "loss": 0.2313, "step": 43545 }, { "epoch": 0.7569399780980027, "grad_norm": 1.4399956255512347, "learning_rate": 1.470978388899452e-07, "loss": 0.1605, "step": 43546 }, { "epoch": 0.7569573606355056, "grad_norm": 2.3037390624316822, "learning_rate": 1.4707789826637324e-07, "loss": 0.2564, "step": 43547 }, { "epoch": 0.7569747431730084, "grad_norm": 1.080122783266286, "learning_rate": 1.470579587614022e-07, "loss": 0.2486, "step": 43548 }, { "epoch": 0.7569921257105112, "grad_norm": 1.5195625041676197, "learning_rate": 1.4703802037509527e-07, "loss": 0.2475, "step": 43549 }, { "epoch": 0.757009508248014, "grad_norm": 2.077835198733092, "learning_rate": 1.4701808310751573e-07, "loss": 0.1612, "step": 43550 }, { "epoch": 0.7570268907855169, "grad_norm": 1.390735685163747, "learning_rate": 1.4699814695872669e-07, "loss": 0.1461, "step": 43551 }, { "epoch": 0.7570442733230197, "grad_norm": 1.4359175554762444, "learning_rate": 1.4697821192879122e-07, "loss": 0.31, "step": 43552 }, { "epoch": 0.7570616558605225, "grad_norm": 0.6889365079999561, "learning_rate": 1.4695827801777278e-07, "loss": 0.1225, "step": 43553 }, { "epoch": 0.7570790383980254, "grad_norm": 1.2692106744964669, "learning_rate": 1.4693834522573462e-07, "loss": 0.1491, "step": 43554 }, { "epoch": 0.7570964209355282, "grad_norm": 1.5857629109487859, "learning_rate": 1.4691841355273954e-07, "loss": 0.2229, "step": 43555 }, { "epoch": 0.757113803473031, "grad_norm": 2.182570886206405, "learning_rate": 1.4689848299885076e-07, "loss": 0.2786, "step": 43556 }, { "epoch": 0.7571311860105339, "grad_norm": 1.23930202571631, "learning_rate": 1.468785535641317e-07, "loss": 0.1641, "step": 43557 }, { "epoch": 0.7571485685480367, "grad_norm": 1.9586671652790513, "learning_rate": 1.4685862524864546e-07, "loss": 0.3918, "step": 43558 }, { "epoch": 0.7571659510855394, "grad_norm": 1.8423056949777297, "learning_rate": 1.468386980524552e-07, "loss": 0.1456, "step": 43559 }, { "epoch": 0.7571833336230422, "grad_norm": 1.3623418317273104, "learning_rate": 1.468187719756238e-07, "loss": 0.2443, "step": 43560 }, { "epoch": 0.7572007161605451, "grad_norm": 1.1549102301778218, "learning_rate": 1.4679884701821498e-07, "loss": 0.1503, "step": 43561 }, { "epoch": 0.7572180986980479, "grad_norm": 2.2205327287032985, "learning_rate": 1.4677892318029144e-07, "loss": 0.1462, "step": 43562 }, { "epoch": 0.7572354812355507, "grad_norm": 1.1043982690604175, "learning_rate": 1.4675900046191642e-07, "loss": 0.237, "step": 43563 }, { "epoch": 0.7572528637730536, "grad_norm": 1.4599628553385726, "learning_rate": 1.46739078863153e-07, "loss": 0.2584, "step": 43564 }, { "epoch": 0.7572702463105564, "grad_norm": 1.1933594181863838, "learning_rate": 1.4671915838406456e-07, "loss": 0.2268, "step": 43565 }, { "epoch": 0.7572876288480592, "grad_norm": 2.086503999590277, "learning_rate": 1.466992390247141e-07, "loss": 0.3113, "step": 43566 }, { "epoch": 0.757305011385562, "grad_norm": 1.8103690011313567, "learning_rate": 1.4667932078516477e-07, "loss": 0.1985, "step": 43567 }, { "epoch": 0.7573223939230649, "grad_norm": 1.1268771643229745, "learning_rate": 1.4665940366547964e-07, "loss": 0.1465, "step": 43568 }, { "epoch": 0.7573397764605677, "grad_norm": 1.5348993081531235, "learning_rate": 1.4663948766572193e-07, "loss": 0.2621, "step": 43569 }, { "epoch": 0.7573571589980705, "grad_norm": 1.5880219873992034, "learning_rate": 1.466195727859547e-07, "loss": 0.3, "step": 43570 }, { "epoch": 0.7573745415355734, "grad_norm": 1.7457018071446013, "learning_rate": 1.4659965902624112e-07, "loss": 0.2307, "step": 43571 }, { "epoch": 0.7573919240730762, "grad_norm": 1.7511417067600028, "learning_rate": 1.465797463866441e-07, "loss": 0.2736, "step": 43572 }, { "epoch": 0.757409306610579, "grad_norm": 1.4970860651425126, "learning_rate": 1.465598348672273e-07, "loss": 0.1317, "step": 43573 }, { "epoch": 0.7574266891480819, "grad_norm": 1.3560846574776864, "learning_rate": 1.465399244680533e-07, "loss": 0.2003, "step": 43574 }, { "epoch": 0.7574440716855847, "grad_norm": 1.2912313371362873, "learning_rate": 1.4652001518918517e-07, "loss": 0.1756, "step": 43575 }, { "epoch": 0.7574614542230875, "grad_norm": 1.5933423043893098, "learning_rate": 1.4650010703068638e-07, "loss": 0.2354, "step": 43576 }, { "epoch": 0.7574788367605904, "grad_norm": 2.2120812208233196, "learning_rate": 1.464801999926199e-07, "loss": 0.1565, "step": 43577 }, { "epoch": 0.7574962192980932, "grad_norm": 1.437297552136027, "learning_rate": 1.464602940750488e-07, "loss": 0.2512, "step": 43578 }, { "epoch": 0.7575136018355959, "grad_norm": 2.5862275512427915, "learning_rate": 1.4644038927803614e-07, "loss": 0.1592, "step": 43579 }, { "epoch": 0.7575309843730987, "grad_norm": 1.4123459003335705, "learning_rate": 1.4642048560164498e-07, "loss": 0.1321, "step": 43580 }, { "epoch": 0.7575483669106016, "grad_norm": 1.7111112195992773, "learning_rate": 1.4640058304593855e-07, "loss": 0.2358, "step": 43581 }, { "epoch": 0.7575657494481044, "grad_norm": 1.169168896947842, "learning_rate": 1.4638068161097982e-07, "loss": 0.256, "step": 43582 }, { "epoch": 0.7575831319856072, "grad_norm": 1.2116742013451485, "learning_rate": 1.463607812968317e-07, "loss": 0.2073, "step": 43583 }, { "epoch": 0.7576005145231101, "grad_norm": 1.108936713017468, "learning_rate": 1.4634088210355766e-07, "loss": 0.3449, "step": 43584 }, { "epoch": 0.7576178970606129, "grad_norm": 1.7149896385195964, "learning_rate": 1.463209840312205e-07, "loss": 0.2589, "step": 43585 }, { "epoch": 0.7576352795981157, "grad_norm": 1.459915375562763, "learning_rate": 1.4630108707988352e-07, "loss": 0.1907, "step": 43586 }, { "epoch": 0.7576526621356185, "grad_norm": 1.157065511537917, "learning_rate": 1.4628119124960935e-07, "loss": 0.3474, "step": 43587 }, { "epoch": 0.7576700446731214, "grad_norm": 1.2647699839435194, "learning_rate": 1.462612965404615e-07, "loss": 0.2398, "step": 43588 }, { "epoch": 0.7576874272106242, "grad_norm": 1.200455259320592, "learning_rate": 1.4624140295250276e-07, "loss": 0.3143, "step": 43589 }, { "epoch": 0.757704809748127, "grad_norm": 2.376214658821604, "learning_rate": 1.4622151048579634e-07, "loss": 0.2547, "step": 43590 }, { "epoch": 0.7577221922856299, "grad_norm": 1.6270923344011705, "learning_rate": 1.4620161914040502e-07, "loss": 0.1616, "step": 43591 }, { "epoch": 0.7577395748231327, "grad_norm": 1.256595528387313, "learning_rate": 1.4618172891639234e-07, "loss": 0.2002, "step": 43592 }, { "epoch": 0.7577569573606355, "grad_norm": 2.407997126761305, "learning_rate": 1.4616183981382095e-07, "loss": 0.3023, "step": 43593 }, { "epoch": 0.7577743398981384, "grad_norm": 1.2103430995290487, "learning_rate": 1.4614195183275395e-07, "loss": 0.2224, "step": 43594 }, { "epoch": 0.7577917224356412, "grad_norm": 1.7807878002399404, "learning_rate": 1.461220649732543e-07, "loss": 0.1784, "step": 43595 }, { "epoch": 0.757809104973144, "grad_norm": 1.7022963131308517, "learning_rate": 1.4610217923538525e-07, "loss": 0.2113, "step": 43596 }, { "epoch": 0.7578264875106469, "grad_norm": 1.6180794343338496, "learning_rate": 1.4608229461920978e-07, "loss": 0.2365, "step": 43597 }, { "epoch": 0.7578438700481497, "grad_norm": 1.7633126633908305, "learning_rate": 1.4606241112479083e-07, "loss": 0.1967, "step": 43598 }, { "epoch": 0.7578612525856524, "grad_norm": 1.1180714554462912, "learning_rate": 1.4604252875219142e-07, "loss": 0.2032, "step": 43599 }, { "epoch": 0.7578786351231552, "grad_norm": 3.277462470333295, "learning_rate": 1.4602264750147463e-07, "loss": 0.2606, "step": 43600 }, { "epoch": 0.7578960176606581, "grad_norm": 1.6571290431945798, "learning_rate": 1.4600276737270346e-07, "loss": 0.2663, "step": 43601 }, { "epoch": 0.7579134001981609, "grad_norm": 2.1516607081574186, "learning_rate": 1.4598288836594074e-07, "loss": 0.1899, "step": 43602 }, { "epoch": 0.7579307827356637, "grad_norm": 1.2055089305559366, "learning_rate": 1.4596301048124977e-07, "loss": 0.1381, "step": 43603 }, { "epoch": 0.7579481652731666, "grad_norm": 1.2398514896829784, "learning_rate": 1.4594313371869343e-07, "loss": 0.2565, "step": 43604 }, { "epoch": 0.7579655478106694, "grad_norm": 2.3928076715446864, "learning_rate": 1.459232580783349e-07, "loss": 0.2455, "step": 43605 }, { "epoch": 0.7579829303481722, "grad_norm": 2.741187565679617, "learning_rate": 1.459033835602367e-07, "loss": 0.2326, "step": 43606 }, { "epoch": 0.758000312885675, "grad_norm": 1.4427393761551544, "learning_rate": 1.458835101644622e-07, "loss": 0.1733, "step": 43607 }, { "epoch": 0.7580176954231779, "grad_norm": 1.3843949584107131, "learning_rate": 1.4586363789107431e-07, "loss": 0.1937, "step": 43608 }, { "epoch": 0.7580350779606807, "grad_norm": 0.9784234242458911, "learning_rate": 1.4584376674013603e-07, "loss": 0.1874, "step": 43609 }, { "epoch": 0.7580524604981835, "grad_norm": 2.203110783215386, "learning_rate": 1.4582389671171019e-07, "loss": 0.1823, "step": 43610 }, { "epoch": 0.7580698430356864, "grad_norm": 1.524203107347142, "learning_rate": 1.458040278058602e-07, "loss": 0.2166, "step": 43611 }, { "epoch": 0.7580872255731892, "grad_norm": 1.5222259162237446, "learning_rate": 1.457841600226486e-07, "loss": 0.2855, "step": 43612 }, { "epoch": 0.758104608110692, "grad_norm": 1.3099566588805274, "learning_rate": 1.4576429336213847e-07, "loss": 0.2326, "step": 43613 }, { "epoch": 0.7581219906481949, "grad_norm": 1.5888751357620587, "learning_rate": 1.4574442782439268e-07, "loss": 0.2423, "step": 43614 }, { "epoch": 0.7581393731856977, "grad_norm": 2.188429911489, "learning_rate": 1.4572456340947449e-07, "loss": 0.2773, "step": 43615 }, { "epoch": 0.7581567557232005, "grad_norm": 3.100016509230537, "learning_rate": 1.4570470011744661e-07, "loss": 0.1806, "step": 43616 }, { "epoch": 0.7581741382607033, "grad_norm": 1.4532255557209077, "learning_rate": 1.4568483794837217e-07, "loss": 0.1387, "step": 43617 }, { "epoch": 0.7581915207982062, "grad_norm": 1.5639422165460697, "learning_rate": 1.45664976902314e-07, "loss": 0.1926, "step": 43618 }, { "epoch": 0.7582089033357089, "grad_norm": 3.0361028248879327, "learning_rate": 1.4564511697933508e-07, "loss": 0.1403, "step": 43619 }, { "epoch": 0.7582262858732117, "grad_norm": 1.6232221224260566, "learning_rate": 1.456252581794984e-07, "loss": 0.2097, "step": 43620 }, { "epoch": 0.7582436684107146, "grad_norm": 1.00603477051373, "learning_rate": 1.456054005028668e-07, "loss": 0.0921, "step": 43621 }, { "epoch": 0.7582610509482174, "grad_norm": 1.0748500906951588, "learning_rate": 1.4558554394950318e-07, "loss": 0.1966, "step": 43622 }, { "epoch": 0.7582784334857202, "grad_norm": 1.693080286995576, "learning_rate": 1.455656885194707e-07, "loss": 0.2472, "step": 43623 }, { "epoch": 0.758295816023223, "grad_norm": 1.6472221117212462, "learning_rate": 1.4554583421283233e-07, "loss": 0.325, "step": 43624 }, { "epoch": 0.7583131985607259, "grad_norm": 1.3516433558499354, "learning_rate": 1.4552598102965059e-07, "loss": 0.2006, "step": 43625 }, { "epoch": 0.7583305810982287, "grad_norm": 1.2552699430430136, "learning_rate": 1.4550612896998877e-07, "loss": 0.0805, "step": 43626 }, { "epoch": 0.7583479636357315, "grad_norm": 1.3226640622557726, "learning_rate": 1.4548627803390968e-07, "loss": 0.1719, "step": 43627 }, { "epoch": 0.7583653461732344, "grad_norm": 1.7570675601686025, "learning_rate": 1.4546642822147625e-07, "loss": 0.3367, "step": 43628 }, { "epoch": 0.7583827287107372, "grad_norm": 1.4316036517094854, "learning_rate": 1.4544657953275135e-07, "loss": 0.129, "step": 43629 }, { "epoch": 0.75840011124824, "grad_norm": 1.8206509288259767, "learning_rate": 1.4542673196779798e-07, "loss": 0.2784, "step": 43630 }, { "epoch": 0.7584174937857429, "grad_norm": 1.450387546994254, "learning_rate": 1.4540688552667895e-07, "loss": 0.182, "step": 43631 }, { "epoch": 0.7584348763232457, "grad_norm": 1.1808730857653296, "learning_rate": 1.4538704020945724e-07, "loss": 0.3047, "step": 43632 }, { "epoch": 0.7584522588607485, "grad_norm": 1.1081468919641912, "learning_rate": 1.453671960161955e-07, "loss": 0.2761, "step": 43633 }, { "epoch": 0.7584696413982513, "grad_norm": 1.1011325804246928, "learning_rate": 1.4534735294695704e-07, "loss": 0.2764, "step": 43634 }, { "epoch": 0.7584870239357542, "grad_norm": 1.1135640130459816, "learning_rate": 1.4532751100180457e-07, "loss": 0.1231, "step": 43635 }, { "epoch": 0.758504406473257, "grad_norm": 1.3748895499295863, "learning_rate": 1.453076701808011e-07, "loss": 0.2455, "step": 43636 }, { "epoch": 0.7585217890107598, "grad_norm": 1.3930842686330986, "learning_rate": 1.4528783048400912e-07, "loss": 0.1902, "step": 43637 }, { "epoch": 0.7585391715482627, "grad_norm": 1.3365249630655798, "learning_rate": 1.4526799191149186e-07, "loss": 0.1472, "step": 43638 }, { "epoch": 0.7585565540857654, "grad_norm": 1.595490531624551, "learning_rate": 1.4524815446331218e-07, "loss": 0.2366, "step": 43639 }, { "epoch": 0.7585739366232682, "grad_norm": 1.0796421009084363, "learning_rate": 1.4522831813953285e-07, "loss": 0.1982, "step": 43640 }, { "epoch": 0.758591319160771, "grad_norm": 1.501129233969267, "learning_rate": 1.452084829402166e-07, "loss": 0.1562, "step": 43641 }, { "epoch": 0.7586087016982739, "grad_norm": 1.3028719755532554, "learning_rate": 1.451886488654267e-07, "loss": 0.1457, "step": 43642 }, { "epoch": 0.7586260842357767, "grad_norm": 1.408289635813573, "learning_rate": 1.451688159152259e-07, "loss": 0.3647, "step": 43643 }, { "epoch": 0.7586434667732795, "grad_norm": 1.663782937985679, "learning_rate": 1.4514898408967685e-07, "loss": 0.2099, "step": 43644 }, { "epoch": 0.7586608493107824, "grad_norm": 2.1224893805302556, "learning_rate": 1.451291533888423e-07, "loss": 0.2155, "step": 43645 }, { "epoch": 0.7586782318482852, "grad_norm": 1.4585455095324984, "learning_rate": 1.451093238127855e-07, "loss": 0.2084, "step": 43646 }, { "epoch": 0.758695614385788, "grad_norm": 2.2992594955751042, "learning_rate": 1.4508949536156917e-07, "loss": 0.221, "step": 43647 }, { "epoch": 0.7587129969232909, "grad_norm": 2.142575312993243, "learning_rate": 1.4506966803525605e-07, "loss": 0.3178, "step": 43648 }, { "epoch": 0.7587303794607937, "grad_norm": 1.1539730946900628, "learning_rate": 1.4504984183390907e-07, "loss": 0.1716, "step": 43649 }, { "epoch": 0.7587477619982965, "grad_norm": 1.0704222010678222, "learning_rate": 1.4503001675759103e-07, "loss": 0.1631, "step": 43650 }, { "epoch": 0.7587651445357994, "grad_norm": 0.8739439950511164, "learning_rate": 1.4501019280636479e-07, "loss": 0.2031, "step": 43651 }, { "epoch": 0.7587825270733022, "grad_norm": 3.3899024100028377, "learning_rate": 1.4499036998029314e-07, "loss": 0.3463, "step": 43652 }, { "epoch": 0.758799909610805, "grad_norm": 1.2832622137971603, "learning_rate": 1.4497054827943877e-07, "loss": 0.2084, "step": 43653 }, { "epoch": 0.7588172921483078, "grad_norm": 2.1522423030075766, "learning_rate": 1.4495072770386485e-07, "loss": 0.2193, "step": 43654 }, { "epoch": 0.7588346746858107, "grad_norm": 1.037189093616737, "learning_rate": 1.449309082536342e-07, "loss": 0.1618, "step": 43655 }, { "epoch": 0.7588520572233135, "grad_norm": 1.356419854506879, "learning_rate": 1.4491108992880913e-07, "loss": 0.2452, "step": 43656 }, { "epoch": 0.7588694397608163, "grad_norm": 1.3963563172950488, "learning_rate": 1.44891272729453e-07, "loss": 0.3207, "step": 43657 }, { "epoch": 0.7588868222983192, "grad_norm": 1.5888774693458163, "learning_rate": 1.4487145665562834e-07, "loss": 0.1921, "step": 43658 }, { "epoch": 0.7589042048358219, "grad_norm": 1.6200624309894696, "learning_rate": 1.4485164170739806e-07, "loss": 0.2255, "step": 43659 }, { "epoch": 0.7589215873733247, "grad_norm": 1.4377255019512578, "learning_rate": 1.4483182788482495e-07, "loss": 0.3095, "step": 43660 }, { "epoch": 0.7589389699108275, "grad_norm": 3.2433124029312093, "learning_rate": 1.4481201518797166e-07, "loss": 0.2608, "step": 43661 }, { "epoch": 0.7589563524483304, "grad_norm": 0.9719984997343976, "learning_rate": 1.4479220361690136e-07, "loss": 0.2057, "step": 43662 }, { "epoch": 0.7589737349858332, "grad_norm": 2.249044995859516, "learning_rate": 1.447723931716765e-07, "loss": 0.2078, "step": 43663 }, { "epoch": 0.758991117523336, "grad_norm": 0.9012051116539345, "learning_rate": 1.4475258385235983e-07, "loss": 0.1448, "step": 43664 }, { "epoch": 0.7590085000608389, "grad_norm": 1.3703690901393002, "learning_rate": 1.4473277565901438e-07, "loss": 0.201, "step": 43665 }, { "epoch": 0.7590258825983417, "grad_norm": 1.809144574718524, "learning_rate": 1.447129685917029e-07, "loss": 0.1486, "step": 43666 }, { "epoch": 0.7590432651358445, "grad_norm": 1.6299419051205215, "learning_rate": 1.4469316265048802e-07, "loss": 0.2753, "step": 43667 }, { "epoch": 0.7590606476733474, "grad_norm": 1.075254048202326, "learning_rate": 1.446733578354327e-07, "loss": 0.3178, "step": 43668 }, { "epoch": 0.7590780302108502, "grad_norm": 2.0788231310082033, "learning_rate": 1.4465355414659958e-07, "loss": 0.2057, "step": 43669 }, { "epoch": 0.759095412748353, "grad_norm": 1.3748251801777405, "learning_rate": 1.4463375158405145e-07, "loss": 0.2977, "step": 43670 }, { "epoch": 0.7591127952858558, "grad_norm": 0.9983974045484634, "learning_rate": 1.4461395014785106e-07, "loss": 0.0741, "step": 43671 }, { "epoch": 0.7591301778233587, "grad_norm": 1.7102481157740443, "learning_rate": 1.4459414983806106e-07, "loss": 0.2352, "step": 43672 }, { "epoch": 0.7591475603608615, "grad_norm": 1.9025563429663253, "learning_rate": 1.4457435065474448e-07, "loss": 0.209, "step": 43673 }, { "epoch": 0.7591649428983643, "grad_norm": 1.382190836937401, "learning_rate": 1.4455455259796418e-07, "loss": 0.1977, "step": 43674 }, { "epoch": 0.7591823254358672, "grad_norm": 0.8539283630986899, "learning_rate": 1.4453475566778246e-07, "loss": 0.2543, "step": 43675 }, { "epoch": 0.75919970797337, "grad_norm": 1.2877982159000498, "learning_rate": 1.445149598642621e-07, "loss": 0.2027, "step": 43676 }, { "epoch": 0.7592170905108728, "grad_norm": 0.959374514751165, "learning_rate": 1.444951651874662e-07, "loss": 0.2575, "step": 43677 }, { "epoch": 0.7592344730483755, "grad_norm": 1.01925542453131, "learning_rate": 1.4447537163745727e-07, "loss": 0.1041, "step": 43678 }, { "epoch": 0.7592518555858784, "grad_norm": 2.832506809374067, "learning_rate": 1.4445557921429813e-07, "loss": 0.309, "step": 43679 }, { "epoch": 0.7592692381233812, "grad_norm": 2.1360943429832844, "learning_rate": 1.4443578791805134e-07, "loss": 0.1877, "step": 43680 }, { "epoch": 0.759286620660884, "grad_norm": 1.6729775911404303, "learning_rate": 1.444159977487801e-07, "loss": 0.1525, "step": 43681 }, { "epoch": 0.7593040031983869, "grad_norm": 1.2762409049090304, "learning_rate": 1.443962087065466e-07, "loss": 0.1454, "step": 43682 }, { "epoch": 0.7593213857358897, "grad_norm": 1.6237367136012355, "learning_rate": 1.443764207914138e-07, "loss": 0.2075, "step": 43683 }, { "epoch": 0.7593387682733925, "grad_norm": 1.8059275500999123, "learning_rate": 1.4435663400344422e-07, "loss": 0.1695, "step": 43684 }, { "epoch": 0.7593561508108954, "grad_norm": 1.1933478532927466, "learning_rate": 1.4433684834270088e-07, "loss": 0.1513, "step": 43685 }, { "epoch": 0.7593735333483982, "grad_norm": 1.3876826834984015, "learning_rate": 1.443170638092464e-07, "loss": 0.1284, "step": 43686 }, { "epoch": 0.759390915885901, "grad_norm": 2.0613817545791613, "learning_rate": 1.4429728040314342e-07, "loss": 0.1983, "step": 43687 }, { "epoch": 0.7594082984234038, "grad_norm": 1.3397240038806195, "learning_rate": 1.4427749812445468e-07, "loss": 0.2083, "step": 43688 }, { "epoch": 0.7594256809609067, "grad_norm": 1.8232487653461873, "learning_rate": 1.4425771697324278e-07, "loss": 0.1725, "step": 43689 }, { "epoch": 0.7594430634984095, "grad_norm": 1.3936790162621338, "learning_rate": 1.442379369495706e-07, "loss": 0.1978, "step": 43690 }, { "epoch": 0.7594604460359123, "grad_norm": 1.2037620528296404, "learning_rate": 1.4421815805350068e-07, "loss": 0.1317, "step": 43691 }, { "epoch": 0.7594778285734152, "grad_norm": 1.653971592638862, "learning_rate": 1.4419838028509563e-07, "loss": 0.1417, "step": 43692 }, { "epoch": 0.759495211110918, "grad_norm": 1.3452334128926549, "learning_rate": 1.441786036444186e-07, "loss": 0.3489, "step": 43693 }, { "epoch": 0.7595125936484208, "grad_norm": 1.1463765489315123, "learning_rate": 1.441588281315318e-07, "loss": 0.1384, "step": 43694 }, { "epoch": 0.7595299761859237, "grad_norm": 1.9143398276436432, "learning_rate": 1.441390537464978e-07, "loss": 0.2306, "step": 43695 }, { "epoch": 0.7595473587234265, "grad_norm": 3.3774078238961103, "learning_rate": 1.4411928048937983e-07, "loss": 0.2857, "step": 43696 }, { "epoch": 0.7595647412609293, "grad_norm": 1.6204390593868168, "learning_rate": 1.4409950836024016e-07, "loss": 0.1442, "step": 43697 }, { "epoch": 0.759582123798432, "grad_norm": 1.8286058659626598, "learning_rate": 1.4407973735914158e-07, "loss": 0.2419, "step": 43698 }, { "epoch": 0.7595995063359349, "grad_norm": 2.0502573034378124, "learning_rate": 1.4405996748614664e-07, "loss": 0.2334, "step": 43699 }, { "epoch": 0.7596168888734377, "grad_norm": 1.5054430277891522, "learning_rate": 1.4404019874131835e-07, "loss": 0.4334, "step": 43700 }, { "epoch": 0.7596342714109405, "grad_norm": 1.79843219815095, "learning_rate": 1.4402043112471895e-07, "loss": 0.2427, "step": 43701 }, { "epoch": 0.7596516539484434, "grad_norm": 2.1114433581756815, "learning_rate": 1.4400066463641132e-07, "loss": 0.2874, "step": 43702 }, { "epoch": 0.7596690364859462, "grad_norm": 1.2957907429022117, "learning_rate": 1.4398089927645784e-07, "loss": 0.2471, "step": 43703 }, { "epoch": 0.759686419023449, "grad_norm": 2.727222603845305, "learning_rate": 1.4396113504492153e-07, "loss": 0.3254, "step": 43704 }, { "epoch": 0.7597038015609519, "grad_norm": 1.3195398518370696, "learning_rate": 1.439413719418649e-07, "loss": 0.3087, "step": 43705 }, { "epoch": 0.7597211840984547, "grad_norm": 1.1975432185086798, "learning_rate": 1.4392160996735053e-07, "loss": 0.3428, "step": 43706 }, { "epoch": 0.7597385666359575, "grad_norm": 0.7437587029168976, "learning_rate": 1.439018491214411e-07, "loss": 0.1548, "step": 43707 }, { "epoch": 0.7597559491734603, "grad_norm": 1.2241645802948646, "learning_rate": 1.4388208940419922e-07, "loss": 0.2362, "step": 43708 }, { "epoch": 0.7597733317109632, "grad_norm": 1.5426226799500362, "learning_rate": 1.4386233081568754e-07, "loss": 0.2575, "step": 43709 }, { "epoch": 0.759790714248466, "grad_norm": 1.0548753097132542, "learning_rate": 1.4384257335596868e-07, "loss": 0.2022, "step": 43710 }, { "epoch": 0.7598080967859688, "grad_norm": 1.5433333231502322, "learning_rate": 1.4382281702510508e-07, "loss": 0.3302, "step": 43711 }, { "epoch": 0.7598254793234717, "grad_norm": 1.1594315537377298, "learning_rate": 1.4380306182315987e-07, "loss": 0.2254, "step": 43712 }, { "epoch": 0.7598428618609745, "grad_norm": 1.296345988135996, "learning_rate": 1.437833077501952e-07, "loss": 0.1102, "step": 43713 }, { "epoch": 0.7598602443984773, "grad_norm": 2.0664296672727396, "learning_rate": 1.437635548062736e-07, "loss": 0.255, "step": 43714 }, { "epoch": 0.7598776269359802, "grad_norm": 1.2708795245254674, "learning_rate": 1.4374380299145806e-07, "loss": 0.1846, "step": 43715 }, { "epoch": 0.759895009473483, "grad_norm": 0.9244023572964324, "learning_rate": 1.4372405230581103e-07, "loss": 0.158, "step": 43716 }, { "epoch": 0.7599123920109858, "grad_norm": 1.7700022776727333, "learning_rate": 1.437043027493951e-07, "loss": 0.2156, "step": 43717 }, { "epoch": 0.7599297745484885, "grad_norm": 0.9720400110442469, "learning_rate": 1.4368455432227283e-07, "loss": 0.2107, "step": 43718 }, { "epoch": 0.7599471570859914, "grad_norm": 4.562483698762673, "learning_rate": 1.4366480702450688e-07, "loss": 0.2692, "step": 43719 }, { "epoch": 0.7599645396234942, "grad_norm": 0.7689460408018515, "learning_rate": 1.436450608561598e-07, "loss": 0.1268, "step": 43720 }, { "epoch": 0.759981922160997, "grad_norm": 1.3923385198458371, "learning_rate": 1.4362531581729415e-07, "loss": 0.2717, "step": 43721 }, { "epoch": 0.7599993046984999, "grad_norm": 1.917847092935067, "learning_rate": 1.436055719079724e-07, "loss": 0.2907, "step": 43722 }, { "epoch": 0.7600166872360027, "grad_norm": 1.5604815243347085, "learning_rate": 1.4358582912825745e-07, "loss": 0.1568, "step": 43723 }, { "epoch": 0.7600340697735055, "grad_norm": 1.871174949965343, "learning_rate": 1.435660874782117e-07, "loss": 0.1861, "step": 43724 }, { "epoch": 0.7600514523110083, "grad_norm": 1.5296431464343314, "learning_rate": 1.4354634695789785e-07, "loss": 0.2008, "step": 43725 }, { "epoch": 0.7600688348485112, "grad_norm": 1.8833508020950191, "learning_rate": 1.4352660756737805e-07, "loss": 0.2048, "step": 43726 }, { "epoch": 0.760086217386014, "grad_norm": 1.1027233485960521, "learning_rate": 1.435068693067153e-07, "loss": 0.2315, "step": 43727 }, { "epoch": 0.7601035999235168, "grad_norm": 1.4110840488880048, "learning_rate": 1.4348713217597196e-07, "loss": 0.1669, "step": 43728 }, { "epoch": 0.7601209824610197, "grad_norm": 6.496731532257684, "learning_rate": 1.434673961752107e-07, "loss": 0.1657, "step": 43729 }, { "epoch": 0.7601383649985225, "grad_norm": 1.754410844829392, "learning_rate": 1.4344766130449388e-07, "loss": 0.1329, "step": 43730 }, { "epoch": 0.7601557475360253, "grad_norm": 1.155800426608304, "learning_rate": 1.4342792756388445e-07, "loss": 0.212, "step": 43731 }, { "epoch": 0.7601731300735282, "grad_norm": 0.9141272464834721, "learning_rate": 1.4340819495344458e-07, "loss": 0.2099, "step": 43732 }, { "epoch": 0.760190512611031, "grad_norm": 1.785145890992337, "learning_rate": 1.433884634732369e-07, "loss": 0.1973, "step": 43733 }, { "epoch": 0.7602078951485338, "grad_norm": 1.229659809871577, "learning_rate": 1.4336873312332382e-07, "loss": 0.2266, "step": 43734 }, { "epoch": 0.7602252776860366, "grad_norm": 1.235459186014268, "learning_rate": 1.433490039037682e-07, "loss": 0.1706, "step": 43735 }, { "epoch": 0.7602426602235395, "grad_norm": 1.407379173200098, "learning_rate": 1.4332927581463245e-07, "loss": 0.2187, "step": 43736 }, { "epoch": 0.7602600427610423, "grad_norm": 1.4274809645675162, "learning_rate": 1.4330954885597894e-07, "loss": 0.1732, "step": 43737 }, { "epoch": 0.760277425298545, "grad_norm": 2.9416665784785363, "learning_rate": 1.432898230278704e-07, "loss": 0.1868, "step": 43738 }, { "epoch": 0.7602948078360479, "grad_norm": 1.6374338080078454, "learning_rate": 1.4327009833036924e-07, "loss": 0.2385, "step": 43739 }, { "epoch": 0.7603121903735507, "grad_norm": 1.690663436130126, "learning_rate": 1.4325037476353802e-07, "loss": 0.1731, "step": 43740 }, { "epoch": 0.7603295729110535, "grad_norm": 1.1311818615410014, "learning_rate": 1.432306523274392e-07, "loss": 0.2354, "step": 43741 }, { "epoch": 0.7603469554485563, "grad_norm": 1.7878808323967064, "learning_rate": 1.432109310221352e-07, "loss": 0.3912, "step": 43742 }, { "epoch": 0.7603643379860592, "grad_norm": 1.8206729118854614, "learning_rate": 1.431912108476888e-07, "loss": 0.2543, "step": 43743 }, { "epoch": 0.760381720523562, "grad_norm": 1.4672495221310076, "learning_rate": 1.431714918041625e-07, "loss": 0.1578, "step": 43744 }, { "epoch": 0.7603991030610648, "grad_norm": 1.7615792851377896, "learning_rate": 1.4315177389161836e-07, "loss": 0.2106, "step": 43745 }, { "epoch": 0.7604164855985677, "grad_norm": 1.6650133167518868, "learning_rate": 1.4313205711011932e-07, "loss": 0.3185, "step": 43746 }, { "epoch": 0.7604338681360705, "grad_norm": 1.4098309913821383, "learning_rate": 1.4311234145972773e-07, "loss": 0.2219, "step": 43747 }, { "epoch": 0.7604512506735733, "grad_norm": 1.3889954753784526, "learning_rate": 1.4309262694050607e-07, "loss": 0.2187, "step": 43748 }, { "epoch": 0.7604686332110762, "grad_norm": 1.5193634988227882, "learning_rate": 1.4307291355251681e-07, "loss": 0.2744, "step": 43749 }, { "epoch": 0.760486015748579, "grad_norm": 2.3678920981956773, "learning_rate": 1.430532012958225e-07, "loss": 0.3101, "step": 43750 }, { "epoch": 0.7605033982860818, "grad_norm": 2.445589347011509, "learning_rate": 1.4303349017048554e-07, "loss": 0.2162, "step": 43751 }, { "epoch": 0.7605207808235847, "grad_norm": 2.106860456709944, "learning_rate": 1.430137801765685e-07, "loss": 0.2498, "step": 43752 }, { "epoch": 0.7605381633610875, "grad_norm": 1.3222295148650267, "learning_rate": 1.429940713141336e-07, "loss": 0.1536, "step": 43753 }, { "epoch": 0.7605555458985903, "grad_norm": 1.424397499949447, "learning_rate": 1.429743635832436e-07, "loss": 0.1217, "step": 43754 }, { "epoch": 0.7605729284360931, "grad_norm": 2.24486379967379, "learning_rate": 1.4295465698396093e-07, "loss": 0.1295, "step": 43755 }, { "epoch": 0.760590310973596, "grad_norm": 1.4822935739976266, "learning_rate": 1.429349515163481e-07, "loss": 0.3179, "step": 43756 }, { "epoch": 0.7606076935110988, "grad_norm": 1.3560428277597503, "learning_rate": 1.4291524718046715e-07, "loss": 0.2209, "step": 43757 }, { "epoch": 0.7606250760486015, "grad_norm": 1.0355107552942477, "learning_rate": 1.4289554397638104e-07, "loss": 0.1717, "step": 43758 }, { "epoch": 0.7606424585861044, "grad_norm": 1.8509520347632578, "learning_rate": 1.4287584190415197e-07, "loss": 0.161, "step": 43759 }, { "epoch": 0.7606598411236072, "grad_norm": 1.4306256547642078, "learning_rate": 1.4285614096384247e-07, "loss": 0.2327, "step": 43760 }, { "epoch": 0.76067722366111, "grad_norm": 1.007160626258113, "learning_rate": 1.4283644115551475e-07, "loss": 0.2936, "step": 43761 }, { "epoch": 0.7606946061986128, "grad_norm": 1.2655328818166813, "learning_rate": 1.428167424792316e-07, "loss": 0.1971, "step": 43762 }, { "epoch": 0.7607119887361157, "grad_norm": 1.257302817294317, "learning_rate": 1.4279704493505545e-07, "loss": 0.2722, "step": 43763 }, { "epoch": 0.7607293712736185, "grad_norm": 1.2712976825334432, "learning_rate": 1.4277734852304846e-07, "loss": 0.2328, "step": 43764 }, { "epoch": 0.7607467538111213, "grad_norm": 2.767936480546684, "learning_rate": 1.42757653243273e-07, "loss": 0.1688, "step": 43765 }, { "epoch": 0.7607641363486242, "grad_norm": 1.4670604107093153, "learning_rate": 1.4273795909579178e-07, "loss": 0.1784, "step": 43766 }, { "epoch": 0.760781518886127, "grad_norm": 0.9424957377964605, "learning_rate": 1.4271826608066722e-07, "loss": 0.1853, "step": 43767 }, { "epoch": 0.7607989014236298, "grad_norm": 1.1499415468811216, "learning_rate": 1.4269857419796154e-07, "loss": 0.2505, "step": 43768 }, { "epoch": 0.7608162839611327, "grad_norm": 1.210559711374364, "learning_rate": 1.4267888344773733e-07, "loss": 0.1944, "step": 43769 }, { "epoch": 0.7608336664986355, "grad_norm": 1.8989594198713207, "learning_rate": 1.4265919383005686e-07, "loss": 0.2857, "step": 43770 }, { "epoch": 0.7608510490361383, "grad_norm": 3.851880255273831, "learning_rate": 1.4263950534498264e-07, "loss": 0.2608, "step": 43771 }, { "epoch": 0.7608684315736411, "grad_norm": 1.3932157885202912, "learning_rate": 1.42619817992577e-07, "loss": 0.1894, "step": 43772 }, { "epoch": 0.760885814111144, "grad_norm": 1.8147277485339632, "learning_rate": 1.4260013177290226e-07, "loss": 0.2233, "step": 43773 }, { "epoch": 0.7609031966486468, "grad_norm": 2.1060418239964935, "learning_rate": 1.4258044668602104e-07, "loss": 0.2795, "step": 43774 }, { "epoch": 0.7609205791861496, "grad_norm": 2.4066800829623687, "learning_rate": 1.4256076273199575e-07, "loss": 0.2041, "step": 43775 }, { "epoch": 0.7609379617236525, "grad_norm": 1.4211823531656231, "learning_rate": 1.4254107991088838e-07, "loss": 0.2095, "step": 43776 }, { "epoch": 0.7609553442611553, "grad_norm": 1.7608826884422368, "learning_rate": 1.4252139822276172e-07, "loss": 0.1886, "step": 43777 }, { "epoch": 0.760972726798658, "grad_norm": 1.3573460793866154, "learning_rate": 1.4250171766767806e-07, "loss": 0.1935, "step": 43778 }, { "epoch": 0.7609901093361608, "grad_norm": 1.3084175347178593, "learning_rate": 1.4248203824569966e-07, "loss": 0.2929, "step": 43779 }, { "epoch": 0.7610074918736637, "grad_norm": 1.5578555130158256, "learning_rate": 1.42462359956889e-07, "loss": 0.2649, "step": 43780 }, { "epoch": 0.7610248744111665, "grad_norm": 2.4853872122058056, "learning_rate": 1.4244268280130827e-07, "loss": 0.2139, "step": 43781 }, { "epoch": 0.7610422569486693, "grad_norm": 1.8239621475281265, "learning_rate": 1.424230067790203e-07, "loss": 0.1776, "step": 43782 }, { "epoch": 0.7610596394861722, "grad_norm": 1.2130457078441257, "learning_rate": 1.4240333189008696e-07, "loss": 0.1037, "step": 43783 }, { "epoch": 0.761077022023675, "grad_norm": 2.0976807116266905, "learning_rate": 1.423836581345707e-07, "loss": 0.277, "step": 43784 }, { "epoch": 0.7610944045611778, "grad_norm": 2.218553566774048, "learning_rate": 1.423639855125341e-07, "loss": 0.2654, "step": 43785 }, { "epoch": 0.7611117870986807, "grad_norm": 2.0849127115007473, "learning_rate": 1.4234431402403935e-07, "loss": 0.1571, "step": 43786 }, { "epoch": 0.7611291696361835, "grad_norm": 1.7499257292138954, "learning_rate": 1.423246436691488e-07, "loss": 0.1798, "step": 43787 }, { "epoch": 0.7611465521736863, "grad_norm": 1.4813266010456418, "learning_rate": 1.4230497444792488e-07, "loss": 0.1764, "step": 43788 }, { "epoch": 0.7611639347111891, "grad_norm": 1.8825541879072616, "learning_rate": 1.4228530636042991e-07, "loss": 0.1783, "step": 43789 }, { "epoch": 0.761181317248692, "grad_norm": 1.2009172455840411, "learning_rate": 1.4226563940672614e-07, "loss": 0.2113, "step": 43790 }, { "epoch": 0.7611986997861948, "grad_norm": 1.6037414867800375, "learning_rate": 1.42245973586876e-07, "loss": 0.1151, "step": 43791 }, { "epoch": 0.7612160823236976, "grad_norm": 3.4088833785362525, "learning_rate": 1.4222630890094162e-07, "loss": 0.2291, "step": 43792 }, { "epoch": 0.7612334648612005, "grad_norm": 1.2488944562834319, "learning_rate": 1.4220664534898563e-07, "loss": 0.123, "step": 43793 }, { "epoch": 0.7612508473987033, "grad_norm": 1.735105326203379, "learning_rate": 1.421869829310704e-07, "loss": 0.1922, "step": 43794 }, { "epoch": 0.7612682299362061, "grad_norm": 1.4030312258868418, "learning_rate": 1.421673216472577e-07, "loss": 0.2776, "step": 43795 }, { "epoch": 0.761285612473709, "grad_norm": 1.1847933356855824, "learning_rate": 1.4214766149761044e-07, "loss": 0.2136, "step": 43796 }, { "epoch": 0.7613029950112118, "grad_norm": 1.2247798184406258, "learning_rate": 1.4212800248219065e-07, "loss": 0.189, "step": 43797 }, { "epoch": 0.7613203775487145, "grad_norm": 1.1733409372293548, "learning_rate": 1.4210834460106074e-07, "loss": 0.2212, "step": 43798 }, { "epoch": 0.7613377600862173, "grad_norm": 1.2758070711883158, "learning_rate": 1.4208868785428297e-07, "loss": 0.2427, "step": 43799 }, { "epoch": 0.7613551426237202, "grad_norm": 1.4145767817564454, "learning_rate": 1.4206903224191953e-07, "loss": 0.1962, "step": 43800 }, { "epoch": 0.761372525161223, "grad_norm": 1.5901226160930253, "learning_rate": 1.4204937776403308e-07, "loss": 0.2043, "step": 43801 }, { "epoch": 0.7613899076987258, "grad_norm": 1.0944697918195685, "learning_rate": 1.4202972442068557e-07, "loss": 0.1216, "step": 43802 }, { "epoch": 0.7614072902362287, "grad_norm": 1.1682049169369497, "learning_rate": 1.4201007221193917e-07, "loss": 0.1468, "step": 43803 }, { "epoch": 0.7614246727737315, "grad_norm": 1.8055933731713054, "learning_rate": 1.4199042113785658e-07, "loss": 0.1834, "step": 43804 }, { "epoch": 0.7614420553112343, "grad_norm": 1.6535876995879881, "learning_rate": 1.4197077119849993e-07, "loss": 0.2828, "step": 43805 }, { "epoch": 0.7614594378487372, "grad_norm": 2.453756075121536, "learning_rate": 1.4195112239393148e-07, "loss": 0.1711, "step": 43806 }, { "epoch": 0.76147682038624, "grad_norm": 1.1885299604669115, "learning_rate": 1.4193147472421346e-07, "loss": 0.1682, "step": 43807 }, { "epoch": 0.7614942029237428, "grad_norm": 1.3084859391843084, "learning_rate": 1.4191182818940818e-07, "loss": 0.1725, "step": 43808 }, { "epoch": 0.7615115854612456, "grad_norm": 1.4872749262886962, "learning_rate": 1.4189218278957792e-07, "loss": 0.3019, "step": 43809 }, { "epoch": 0.7615289679987485, "grad_norm": 0.8250790936576543, "learning_rate": 1.41872538524785e-07, "loss": 0.1944, "step": 43810 }, { "epoch": 0.7615463505362513, "grad_norm": 1.2349663280841077, "learning_rate": 1.4185289539509138e-07, "loss": 0.1594, "step": 43811 }, { "epoch": 0.7615637330737541, "grad_norm": 1.1509845607269966, "learning_rate": 1.4183325340055972e-07, "loss": 0.1691, "step": 43812 }, { "epoch": 0.761581115611257, "grad_norm": 1.6634847725696662, "learning_rate": 1.4181361254125236e-07, "loss": 0.196, "step": 43813 }, { "epoch": 0.7615984981487598, "grad_norm": 1.3158737075510996, "learning_rate": 1.4179397281723106e-07, "loss": 0.1936, "step": 43814 }, { "epoch": 0.7616158806862626, "grad_norm": 0.8690851571276927, "learning_rate": 1.417743342285582e-07, "loss": 0.1826, "step": 43815 }, { "epoch": 0.7616332632237655, "grad_norm": 2.7104638712121933, "learning_rate": 1.4175469677529632e-07, "loss": 0.1965, "step": 43816 }, { "epoch": 0.7616506457612682, "grad_norm": 1.0815601114714564, "learning_rate": 1.4173506045750743e-07, "loss": 0.1631, "step": 43817 }, { "epoch": 0.761668028298771, "grad_norm": 1.3078625155060348, "learning_rate": 1.4171542527525387e-07, "loss": 0.1882, "step": 43818 }, { "epoch": 0.7616854108362738, "grad_norm": 3.5102252621941785, "learning_rate": 1.416957912285977e-07, "loss": 0.2583, "step": 43819 }, { "epoch": 0.7617027933737767, "grad_norm": 1.3589959143305972, "learning_rate": 1.4167615831760154e-07, "loss": 0.199, "step": 43820 }, { "epoch": 0.7617201759112795, "grad_norm": 1.252297143028206, "learning_rate": 1.4165652654232723e-07, "loss": 0.1693, "step": 43821 }, { "epoch": 0.7617375584487823, "grad_norm": 2.102010965767782, "learning_rate": 1.4163689590283712e-07, "loss": 0.2433, "step": 43822 }, { "epoch": 0.7617549409862852, "grad_norm": 1.799006605175782, "learning_rate": 1.416172663991933e-07, "loss": 0.2891, "step": 43823 }, { "epoch": 0.761772323523788, "grad_norm": 1.1738825809580418, "learning_rate": 1.4159763803145825e-07, "loss": 0.193, "step": 43824 }, { "epoch": 0.7617897060612908, "grad_norm": 3.8513268780086465, "learning_rate": 1.41578010799694e-07, "loss": 0.2573, "step": 43825 }, { "epoch": 0.7618070885987936, "grad_norm": 1.3613973074672399, "learning_rate": 1.4155838470396285e-07, "loss": 0.2085, "step": 43826 }, { "epoch": 0.7618244711362965, "grad_norm": 1.2821667370981304, "learning_rate": 1.41538759744327e-07, "loss": 0.2175, "step": 43827 }, { "epoch": 0.7618418536737993, "grad_norm": 1.4272230188471013, "learning_rate": 1.4151913592084858e-07, "loss": 0.1657, "step": 43828 }, { "epoch": 0.7618592362113021, "grad_norm": 1.2435463033643641, "learning_rate": 1.4149951323358982e-07, "loss": 0.247, "step": 43829 }, { "epoch": 0.761876618748805, "grad_norm": 1.6613593792307921, "learning_rate": 1.4147989168261292e-07, "loss": 0.2266, "step": 43830 }, { "epoch": 0.7618940012863078, "grad_norm": 1.3030795593103164, "learning_rate": 1.4146027126798e-07, "loss": 0.3118, "step": 43831 }, { "epoch": 0.7619113838238106, "grad_norm": 2.052057989778013, "learning_rate": 1.4144065198975353e-07, "loss": 0.2651, "step": 43832 }, { "epoch": 0.7619287663613135, "grad_norm": 1.8377660250652001, "learning_rate": 1.4142103384799536e-07, "loss": 0.2513, "step": 43833 }, { "epoch": 0.7619461488988163, "grad_norm": 1.4338410967683783, "learning_rate": 1.4140141684276762e-07, "loss": 0.258, "step": 43834 }, { "epoch": 0.7619635314363191, "grad_norm": 8.070368341517998, "learning_rate": 1.413818009741329e-07, "loss": 0.1994, "step": 43835 }, { "epoch": 0.761980913973822, "grad_norm": 1.0407838180638114, "learning_rate": 1.4136218624215307e-07, "loss": 0.2924, "step": 43836 }, { "epoch": 0.7619982965113247, "grad_norm": 1.2374507725475135, "learning_rate": 1.4134257264689037e-07, "loss": 0.2621, "step": 43837 }, { "epoch": 0.7620156790488275, "grad_norm": 1.2122294907510962, "learning_rate": 1.4132296018840696e-07, "loss": 0.2993, "step": 43838 }, { "epoch": 0.7620330615863303, "grad_norm": 1.1094951383754654, "learning_rate": 1.4130334886676497e-07, "loss": 0.1547, "step": 43839 }, { "epoch": 0.7620504441238332, "grad_norm": 1.706457830717108, "learning_rate": 1.4128373868202665e-07, "loss": 0.1769, "step": 43840 }, { "epoch": 0.762067826661336, "grad_norm": 1.5890721888904076, "learning_rate": 1.4126412963425404e-07, "loss": 0.3208, "step": 43841 }, { "epoch": 0.7620852091988388, "grad_norm": 1.7887194868156941, "learning_rate": 1.4124452172350926e-07, "loss": 0.2773, "step": 43842 }, { "epoch": 0.7621025917363417, "grad_norm": 0.950566153723687, "learning_rate": 1.4122491494985468e-07, "loss": 0.2141, "step": 43843 }, { "epoch": 0.7621199742738445, "grad_norm": 1.161966167156185, "learning_rate": 1.4120530931335228e-07, "loss": 0.3573, "step": 43844 }, { "epoch": 0.7621373568113473, "grad_norm": 1.6268012497498658, "learning_rate": 1.4118570481406446e-07, "loss": 0.2876, "step": 43845 }, { "epoch": 0.7621547393488501, "grad_norm": 1.4759270877190644, "learning_rate": 1.4116610145205275e-07, "loss": 0.2898, "step": 43846 }, { "epoch": 0.762172121886353, "grad_norm": 0.9980123605257647, "learning_rate": 1.4114649922737982e-07, "loss": 0.2942, "step": 43847 }, { "epoch": 0.7621895044238558, "grad_norm": 1.78486434737766, "learning_rate": 1.4112689814010768e-07, "loss": 0.2276, "step": 43848 }, { "epoch": 0.7622068869613586, "grad_norm": 1.264513429400815, "learning_rate": 1.4110729819029838e-07, "loss": 0.1778, "step": 43849 }, { "epoch": 0.7622242694988615, "grad_norm": 2.0747999512887167, "learning_rate": 1.4108769937801396e-07, "loss": 0.2875, "step": 43850 }, { "epoch": 0.7622416520363643, "grad_norm": 4.256367585314516, "learning_rate": 1.4106810170331695e-07, "loss": 0.4845, "step": 43851 }, { "epoch": 0.7622590345738671, "grad_norm": 1.5377604432250662, "learning_rate": 1.4104850516626903e-07, "loss": 0.3613, "step": 43852 }, { "epoch": 0.76227641711137, "grad_norm": 1.543429770539526, "learning_rate": 1.4102890976693248e-07, "loss": 0.2378, "step": 43853 }, { "epoch": 0.7622937996488728, "grad_norm": 0.9051404944576001, "learning_rate": 1.4100931550536926e-07, "loss": 0.1193, "step": 43854 }, { "epoch": 0.7623111821863756, "grad_norm": 1.1928310439757581, "learning_rate": 1.409897223816417e-07, "loss": 0.2058, "step": 43855 }, { "epoch": 0.7623285647238784, "grad_norm": 0.8490990259824635, "learning_rate": 1.409701303958118e-07, "loss": 0.1686, "step": 43856 }, { "epoch": 0.7623459472613812, "grad_norm": 1.4490364200165353, "learning_rate": 1.4095053954794172e-07, "loss": 0.2154, "step": 43857 }, { "epoch": 0.762363329798884, "grad_norm": 1.4976395174885597, "learning_rate": 1.4093094983809345e-07, "loss": 0.189, "step": 43858 }, { "epoch": 0.7623807123363868, "grad_norm": 1.3479422718517367, "learning_rate": 1.4091136126632912e-07, "loss": 0.2243, "step": 43859 }, { "epoch": 0.7623980948738897, "grad_norm": 1.8116056329480055, "learning_rate": 1.4089177383271082e-07, "loss": 0.2498, "step": 43860 }, { "epoch": 0.7624154774113925, "grad_norm": 2.5472614061311463, "learning_rate": 1.4087218753730067e-07, "loss": 0.2658, "step": 43861 }, { "epoch": 0.7624328599488953, "grad_norm": 1.1756312437521819, "learning_rate": 1.4085260238016055e-07, "loss": 0.1931, "step": 43862 }, { "epoch": 0.7624502424863981, "grad_norm": 0.8099963171149677, "learning_rate": 1.4083301836135287e-07, "loss": 0.1568, "step": 43863 }, { "epoch": 0.762467625023901, "grad_norm": 0.8539790004046787, "learning_rate": 1.4081343548093966e-07, "loss": 0.1059, "step": 43864 }, { "epoch": 0.7624850075614038, "grad_norm": 1.5472700046201358, "learning_rate": 1.4079385373898257e-07, "loss": 0.1912, "step": 43865 }, { "epoch": 0.7625023900989066, "grad_norm": 1.9887729384191866, "learning_rate": 1.4077427313554406e-07, "loss": 0.2339, "step": 43866 }, { "epoch": 0.7625197726364095, "grad_norm": 1.6917113724551256, "learning_rate": 1.4075469367068616e-07, "loss": 0.2194, "step": 43867 }, { "epoch": 0.7625371551739123, "grad_norm": 1.0738837229229317, "learning_rate": 1.4073511534447085e-07, "loss": 0.2012, "step": 43868 }, { "epoch": 0.7625545377114151, "grad_norm": 2.0902393993310295, "learning_rate": 1.407155381569602e-07, "loss": 0.2328, "step": 43869 }, { "epoch": 0.762571920248918, "grad_norm": 0.76342460033248, "learning_rate": 1.4069596210821621e-07, "loss": 0.1428, "step": 43870 }, { "epoch": 0.7625893027864208, "grad_norm": 2.1204978258341556, "learning_rate": 1.4067638719830098e-07, "loss": 0.2899, "step": 43871 }, { "epoch": 0.7626066853239236, "grad_norm": 1.2172615585256883, "learning_rate": 1.4065681342727653e-07, "loss": 0.235, "step": 43872 }, { "epoch": 0.7626240678614264, "grad_norm": 1.2777996643671796, "learning_rate": 1.406372407952048e-07, "loss": 0.3338, "step": 43873 }, { "epoch": 0.7626414503989293, "grad_norm": 0.9327594486327807, "learning_rate": 1.4061766930214807e-07, "loss": 0.261, "step": 43874 }, { "epoch": 0.7626588329364321, "grad_norm": 1.5871887426180558, "learning_rate": 1.4059809894816827e-07, "loss": 0.1724, "step": 43875 }, { "epoch": 0.7626762154739349, "grad_norm": 2.440295719361918, "learning_rate": 1.405785297333275e-07, "loss": 0.2154, "step": 43876 }, { "epoch": 0.7626935980114377, "grad_norm": 1.015436513803808, "learning_rate": 1.4055896165768739e-07, "loss": 0.2089, "step": 43877 }, { "epoch": 0.7627109805489405, "grad_norm": 3.903324784233259, "learning_rate": 1.4053939472131043e-07, "loss": 0.3702, "step": 43878 }, { "epoch": 0.7627283630864433, "grad_norm": 2.2569463820863276, "learning_rate": 1.4051982892425845e-07, "loss": 0.2665, "step": 43879 }, { "epoch": 0.7627457456239461, "grad_norm": 1.193899043740309, "learning_rate": 1.405002642665935e-07, "loss": 0.29, "step": 43880 }, { "epoch": 0.762763128161449, "grad_norm": 2.336661631440808, "learning_rate": 1.4048070074837743e-07, "loss": 0.2038, "step": 43881 }, { "epoch": 0.7627805106989518, "grad_norm": 1.1679393811791434, "learning_rate": 1.404611383696725e-07, "loss": 0.2824, "step": 43882 }, { "epoch": 0.7627978932364546, "grad_norm": 0.9923398114205401, "learning_rate": 1.4044157713054078e-07, "loss": 0.1754, "step": 43883 }, { "epoch": 0.7628152757739575, "grad_norm": 0.9555026331698651, "learning_rate": 1.4042201703104394e-07, "loss": 0.237, "step": 43884 }, { "epoch": 0.7628326583114603, "grad_norm": 3.081432736445256, "learning_rate": 1.40402458071244e-07, "loss": 0.2416, "step": 43885 }, { "epoch": 0.7628500408489631, "grad_norm": 1.4393390892914046, "learning_rate": 1.4038290025120319e-07, "loss": 0.2829, "step": 43886 }, { "epoch": 0.762867423386466, "grad_norm": 1.1943678234617456, "learning_rate": 1.4036334357098345e-07, "loss": 0.3347, "step": 43887 }, { "epoch": 0.7628848059239688, "grad_norm": 1.4729443974485639, "learning_rate": 1.4034378803064666e-07, "loss": 0.171, "step": 43888 }, { "epoch": 0.7629021884614716, "grad_norm": 1.6697538052327061, "learning_rate": 1.4032423363025485e-07, "loss": 0.1949, "step": 43889 }, { "epoch": 0.7629195709989745, "grad_norm": 1.0866764535013596, "learning_rate": 1.4030468036987003e-07, "loss": 0.1823, "step": 43890 }, { "epoch": 0.7629369535364773, "grad_norm": 0.9879692841918291, "learning_rate": 1.4028512824955407e-07, "loss": 0.3105, "step": 43891 }, { "epoch": 0.7629543360739801, "grad_norm": 1.5983067780939448, "learning_rate": 1.4026557726936894e-07, "loss": 0.3056, "step": 43892 }, { "epoch": 0.7629717186114829, "grad_norm": 1.5116107941757784, "learning_rate": 1.402460274293768e-07, "loss": 0.1713, "step": 43893 }, { "epoch": 0.7629891011489858, "grad_norm": 0.8678842256966645, "learning_rate": 1.402264787296395e-07, "loss": 0.1585, "step": 43894 }, { "epoch": 0.7630064836864886, "grad_norm": 1.5104854042145628, "learning_rate": 1.4020693117021915e-07, "loss": 0.3475, "step": 43895 }, { "epoch": 0.7630238662239914, "grad_norm": 1.2744266912732742, "learning_rate": 1.4018738475117725e-07, "loss": 0.1809, "step": 43896 }, { "epoch": 0.7630412487614942, "grad_norm": 1.3777542476266187, "learning_rate": 1.401678394725762e-07, "loss": 0.223, "step": 43897 }, { "epoch": 0.763058631298997, "grad_norm": 3.1558804284730244, "learning_rate": 1.401482953344778e-07, "loss": 0.3837, "step": 43898 }, { "epoch": 0.7630760138364998, "grad_norm": 0.9135068253495365, "learning_rate": 1.4012875233694404e-07, "loss": 0.2254, "step": 43899 }, { "epoch": 0.7630933963740026, "grad_norm": 1.5703694393276457, "learning_rate": 1.4010921048003665e-07, "loss": 0.195, "step": 43900 }, { "epoch": 0.7631107789115055, "grad_norm": 2.798354386674938, "learning_rate": 1.4008966976381786e-07, "loss": 0.1887, "step": 43901 }, { "epoch": 0.7631281614490083, "grad_norm": 1.7196092376048187, "learning_rate": 1.4007013018834967e-07, "loss": 0.2129, "step": 43902 }, { "epoch": 0.7631455439865111, "grad_norm": 1.6595096951275154, "learning_rate": 1.4005059175369371e-07, "loss": 0.1835, "step": 43903 }, { "epoch": 0.763162926524014, "grad_norm": 0.8966417415360731, "learning_rate": 1.4003105445991181e-07, "loss": 0.26, "step": 43904 }, { "epoch": 0.7631803090615168, "grad_norm": 1.5440444745899407, "learning_rate": 1.400115183070663e-07, "loss": 0.1734, "step": 43905 }, { "epoch": 0.7631976915990196, "grad_norm": 1.157681989298987, "learning_rate": 1.3999198329521887e-07, "loss": 0.1533, "step": 43906 }, { "epoch": 0.7632150741365225, "grad_norm": 1.1349364500304666, "learning_rate": 1.3997244942443153e-07, "loss": 0.2165, "step": 43907 }, { "epoch": 0.7632324566740253, "grad_norm": 1.4385827297984175, "learning_rate": 1.3995291669476617e-07, "loss": 0.216, "step": 43908 }, { "epoch": 0.7632498392115281, "grad_norm": 5.610419121377587, "learning_rate": 1.3993338510628462e-07, "loss": 0.445, "step": 43909 }, { "epoch": 0.7632672217490309, "grad_norm": 2.4441576649849264, "learning_rate": 1.3991385465904883e-07, "loss": 0.2026, "step": 43910 }, { "epoch": 0.7632846042865338, "grad_norm": 0.6838514539990329, "learning_rate": 1.3989432535312074e-07, "loss": 0.097, "step": 43911 }, { "epoch": 0.7633019868240366, "grad_norm": 2.154293029025394, "learning_rate": 1.3987479718856203e-07, "loss": 0.1767, "step": 43912 }, { "epoch": 0.7633193693615394, "grad_norm": 1.2040344471833693, "learning_rate": 1.3985527016543496e-07, "loss": 0.2151, "step": 43913 }, { "epoch": 0.7633367518990423, "grad_norm": 1.5705107776418883, "learning_rate": 1.398357442838014e-07, "loss": 0.2221, "step": 43914 }, { "epoch": 0.7633541344365451, "grad_norm": 1.9448817216583518, "learning_rate": 1.398162195437228e-07, "loss": 0.2344, "step": 43915 }, { "epoch": 0.7633715169740479, "grad_norm": 2.0661117155956363, "learning_rate": 1.3979669594526145e-07, "loss": 0.2292, "step": 43916 }, { "epoch": 0.7633888995115506, "grad_norm": 1.0952184857687746, "learning_rate": 1.3977717348847912e-07, "loss": 0.2521, "step": 43917 }, { "epoch": 0.7634062820490535, "grad_norm": 1.234137282701224, "learning_rate": 1.397576521734377e-07, "loss": 0.2356, "step": 43918 }, { "epoch": 0.7634236645865563, "grad_norm": 1.3669692195668346, "learning_rate": 1.39738132000199e-07, "loss": 0.2076, "step": 43919 }, { "epoch": 0.7634410471240591, "grad_norm": 1.0446475689799133, "learning_rate": 1.397186129688248e-07, "loss": 0.1977, "step": 43920 }, { "epoch": 0.763458429661562, "grad_norm": 0.8552774954826744, "learning_rate": 1.3969909507937743e-07, "loss": 0.2084, "step": 43921 }, { "epoch": 0.7634758121990648, "grad_norm": 1.4256904173634266, "learning_rate": 1.3967957833191825e-07, "loss": 0.1625, "step": 43922 }, { "epoch": 0.7634931947365676, "grad_norm": 1.1332176852439364, "learning_rate": 1.396600627265091e-07, "loss": 0.1691, "step": 43923 }, { "epoch": 0.7635105772740705, "grad_norm": 1.5139998762124875, "learning_rate": 1.396405482632122e-07, "loss": 0.2466, "step": 43924 }, { "epoch": 0.7635279598115733, "grad_norm": 1.9972738224221354, "learning_rate": 1.396210349420892e-07, "loss": 0.2145, "step": 43925 }, { "epoch": 0.7635453423490761, "grad_norm": 3.997209829861498, "learning_rate": 1.3960152276320204e-07, "loss": 0.2307, "step": 43926 }, { "epoch": 0.763562724886579, "grad_norm": 2.134137512982574, "learning_rate": 1.3958201172661244e-07, "loss": 0.2251, "step": 43927 }, { "epoch": 0.7635801074240818, "grad_norm": 0.940789177085427, "learning_rate": 1.3956250183238233e-07, "loss": 0.2598, "step": 43928 }, { "epoch": 0.7635974899615846, "grad_norm": 1.633082232980753, "learning_rate": 1.3954299308057355e-07, "loss": 0.1837, "step": 43929 }, { "epoch": 0.7636148724990874, "grad_norm": 0.8744670139968996, "learning_rate": 1.3952348547124782e-07, "loss": 0.2426, "step": 43930 }, { "epoch": 0.7636322550365903, "grad_norm": 1.8821408397939683, "learning_rate": 1.3950397900446697e-07, "loss": 0.1631, "step": 43931 }, { "epoch": 0.7636496375740931, "grad_norm": 2.6178361113963198, "learning_rate": 1.3948447368029303e-07, "loss": 0.1753, "step": 43932 }, { "epoch": 0.7636670201115959, "grad_norm": 7.148136169701244, "learning_rate": 1.3946496949878782e-07, "loss": 0.231, "step": 43933 }, { "epoch": 0.7636844026490988, "grad_norm": 2.000252650325733, "learning_rate": 1.3944546646001294e-07, "loss": 0.2923, "step": 43934 }, { "epoch": 0.7637017851866016, "grad_norm": 1.3974015340120896, "learning_rate": 1.3942596456403016e-07, "loss": 0.1006, "step": 43935 }, { "epoch": 0.7637191677241044, "grad_norm": 1.2020420120075492, "learning_rate": 1.3940646381090155e-07, "loss": 0.5144, "step": 43936 }, { "epoch": 0.7637365502616071, "grad_norm": 1.249883016729708, "learning_rate": 1.3938696420068884e-07, "loss": 0.2155, "step": 43937 }, { "epoch": 0.76375393279911, "grad_norm": 1.8081523812040614, "learning_rate": 1.3936746573345377e-07, "loss": 0.2251, "step": 43938 }, { "epoch": 0.7637713153366128, "grad_norm": 1.5199496108747597, "learning_rate": 1.3934796840925805e-07, "loss": 0.2938, "step": 43939 }, { "epoch": 0.7637886978741156, "grad_norm": 1.8630213774973698, "learning_rate": 1.3932847222816384e-07, "loss": 0.2134, "step": 43940 }, { "epoch": 0.7638060804116185, "grad_norm": 1.5568803750766467, "learning_rate": 1.3930897719023266e-07, "loss": 0.2368, "step": 43941 }, { "epoch": 0.7638234629491213, "grad_norm": 1.6788866050055375, "learning_rate": 1.3928948329552625e-07, "loss": 0.1782, "step": 43942 }, { "epoch": 0.7638408454866241, "grad_norm": 1.3758562045049532, "learning_rate": 1.3926999054410632e-07, "loss": 0.2401, "step": 43943 }, { "epoch": 0.763858228024127, "grad_norm": 1.366143843471917, "learning_rate": 1.3925049893603503e-07, "loss": 0.1638, "step": 43944 }, { "epoch": 0.7638756105616298, "grad_norm": 1.159750672202246, "learning_rate": 1.3923100847137388e-07, "loss": 0.1994, "step": 43945 }, { "epoch": 0.7638929930991326, "grad_norm": 1.908533241444148, "learning_rate": 1.3921151915018476e-07, "loss": 0.2827, "step": 43946 }, { "epoch": 0.7639103756366354, "grad_norm": 1.3070737841989377, "learning_rate": 1.3919203097252934e-07, "loss": 0.1338, "step": 43947 }, { "epoch": 0.7639277581741383, "grad_norm": 3.6921151327495516, "learning_rate": 1.3917254393846945e-07, "loss": 0.2466, "step": 43948 }, { "epoch": 0.7639451407116411, "grad_norm": 2.3038983973593843, "learning_rate": 1.3915305804806687e-07, "loss": 0.1749, "step": 43949 }, { "epoch": 0.7639625232491439, "grad_norm": 2.0296948513810027, "learning_rate": 1.3913357330138337e-07, "loss": 0.2774, "step": 43950 }, { "epoch": 0.7639799057866468, "grad_norm": 1.5165500876906013, "learning_rate": 1.3911408969848043e-07, "loss": 0.3143, "step": 43951 }, { "epoch": 0.7639972883241496, "grad_norm": 1.5646808549239917, "learning_rate": 1.390946072394204e-07, "loss": 0.3201, "step": 43952 }, { "epoch": 0.7640146708616524, "grad_norm": 1.9446142359727825, "learning_rate": 1.3907512592426456e-07, "loss": 0.1995, "step": 43953 }, { "epoch": 0.7640320533991553, "grad_norm": 2.500747617986549, "learning_rate": 1.390556457530746e-07, "loss": 0.355, "step": 43954 }, { "epoch": 0.7640494359366581, "grad_norm": 1.4993114434575856, "learning_rate": 1.3903616672591255e-07, "loss": 0.2162, "step": 43955 }, { "epoch": 0.7640668184741608, "grad_norm": 1.7351662113981705, "learning_rate": 1.3901668884284007e-07, "loss": 0.1447, "step": 43956 }, { "epoch": 0.7640842010116636, "grad_norm": 0.9951262465075066, "learning_rate": 1.389972121039189e-07, "loss": 0.1158, "step": 43957 }, { "epoch": 0.7641015835491665, "grad_norm": 1.121778045065446, "learning_rate": 1.3897773650921068e-07, "loss": 0.2024, "step": 43958 }, { "epoch": 0.7641189660866693, "grad_norm": 0.6403047042487767, "learning_rate": 1.3895826205877725e-07, "loss": 0.1724, "step": 43959 }, { "epoch": 0.7641363486241721, "grad_norm": 3.2204606663030826, "learning_rate": 1.3893878875268022e-07, "loss": 0.2529, "step": 43960 }, { "epoch": 0.764153731161675, "grad_norm": 1.8855899989966545, "learning_rate": 1.3891931659098138e-07, "loss": 0.1845, "step": 43961 }, { "epoch": 0.7641711136991778, "grad_norm": 2.7822290000648295, "learning_rate": 1.3889984557374235e-07, "loss": 0.2046, "step": 43962 }, { "epoch": 0.7641884962366806, "grad_norm": 1.0263599911037404, "learning_rate": 1.38880375701025e-07, "loss": 0.2337, "step": 43963 }, { "epoch": 0.7642058787741834, "grad_norm": 1.0970026518194516, "learning_rate": 1.3886090697289105e-07, "loss": 0.1344, "step": 43964 }, { "epoch": 0.7642232613116863, "grad_norm": 1.0150095876500036, "learning_rate": 1.3884143938940224e-07, "loss": 0.2418, "step": 43965 }, { "epoch": 0.7642406438491891, "grad_norm": 1.2271104152956103, "learning_rate": 1.3882197295061986e-07, "loss": 0.233, "step": 43966 }, { "epoch": 0.7642580263866919, "grad_norm": 1.2275616149128004, "learning_rate": 1.388025076566061e-07, "loss": 0.1981, "step": 43967 }, { "epoch": 0.7642754089241948, "grad_norm": 1.6959307852519185, "learning_rate": 1.3878304350742242e-07, "loss": 0.1922, "step": 43968 }, { "epoch": 0.7642927914616976, "grad_norm": 1.2535070229236482, "learning_rate": 1.3876358050313063e-07, "loss": 0.2704, "step": 43969 }, { "epoch": 0.7643101739992004, "grad_norm": 1.3042244982081934, "learning_rate": 1.3874411864379215e-07, "loss": 0.3, "step": 43970 }, { "epoch": 0.7643275565367033, "grad_norm": 2.1236973147484637, "learning_rate": 1.3872465792946918e-07, "loss": 0.179, "step": 43971 }, { "epoch": 0.7643449390742061, "grad_norm": 1.9631207035592602, "learning_rate": 1.3870519836022294e-07, "loss": 0.1792, "step": 43972 }, { "epoch": 0.7643623216117089, "grad_norm": 1.0475567609015988, "learning_rate": 1.3868573993611525e-07, "loss": 0.1383, "step": 43973 }, { "epoch": 0.7643797041492117, "grad_norm": 2.2861670861301646, "learning_rate": 1.3866628265720765e-07, "loss": 0.2278, "step": 43974 }, { "epoch": 0.7643970866867146, "grad_norm": 1.1592426413416983, "learning_rate": 1.3864682652356206e-07, "loss": 0.2274, "step": 43975 }, { "epoch": 0.7644144692242173, "grad_norm": 1.6504365574797726, "learning_rate": 1.3862737153524008e-07, "loss": 0.3954, "step": 43976 }, { "epoch": 0.7644318517617201, "grad_norm": 1.0897593340008966, "learning_rate": 1.3860791769230336e-07, "loss": 0.2401, "step": 43977 }, { "epoch": 0.764449234299223, "grad_norm": 1.6212222650865677, "learning_rate": 1.3858846499481347e-07, "loss": 0.2514, "step": 43978 }, { "epoch": 0.7644666168367258, "grad_norm": 1.336398227230342, "learning_rate": 1.3856901344283213e-07, "loss": 0.1626, "step": 43979 }, { "epoch": 0.7644839993742286, "grad_norm": 2.4787049509157555, "learning_rate": 1.3854956303642102e-07, "loss": 0.1714, "step": 43980 }, { "epoch": 0.7645013819117314, "grad_norm": 1.3221722203782518, "learning_rate": 1.3853011377564162e-07, "loss": 0.2145, "step": 43981 }, { "epoch": 0.7645187644492343, "grad_norm": 1.2859868919287474, "learning_rate": 1.385106656605558e-07, "loss": 0.2492, "step": 43982 }, { "epoch": 0.7645361469867371, "grad_norm": 1.3661693456798678, "learning_rate": 1.3849121869122514e-07, "loss": 0.1478, "step": 43983 }, { "epoch": 0.7645535295242399, "grad_norm": 1.0835476790415002, "learning_rate": 1.3847177286771145e-07, "loss": 0.2204, "step": 43984 }, { "epoch": 0.7645709120617428, "grad_norm": 1.5823636077850762, "learning_rate": 1.3845232819007586e-07, "loss": 0.3099, "step": 43985 }, { "epoch": 0.7645882945992456, "grad_norm": 1.3441157109044415, "learning_rate": 1.3843288465838043e-07, "loss": 0.2265, "step": 43986 }, { "epoch": 0.7646056771367484, "grad_norm": 0.7439547027497526, "learning_rate": 1.3841344227268664e-07, "loss": 0.2118, "step": 43987 }, { "epoch": 0.7646230596742513, "grad_norm": 1.4081608748163608, "learning_rate": 1.383940010330562e-07, "loss": 0.2603, "step": 43988 }, { "epoch": 0.7646404422117541, "grad_norm": 3.8980757535298687, "learning_rate": 1.3837456093955048e-07, "loss": 0.231, "step": 43989 }, { "epoch": 0.7646578247492569, "grad_norm": 2.259256430296919, "learning_rate": 1.3835512199223159e-07, "loss": 0.3242, "step": 43990 }, { "epoch": 0.7646752072867598, "grad_norm": 1.3254464414506362, "learning_rate": 1.3833568419116072e-07, "loss": 0.2134, "step": 43991 }, { "epoch": 0.7646925898242626, "grad_norm": 1.9678469778665917, "learning_rate": 1.3831624753639952e-07, "loss": 0.2766, "step": 43992 }, { "epoch": 0.7647099723617654, "grad_norm": 1.5720959742552185, "learning_rate": 1.3829681202800957e-07, "loss": 0.1293, "step": 43993 }, { "epoch": 0.7647273548992682, "grad_norm": 2.4047392871355258, "learning_rate": 1.3827737766605274e-07, "loss": 0.3128, "step": 43994 }, { "epoch": 0.7647447374367711, "grad_norm": 1.6297097955829998, "learning_rate": 1.382579444505904e-07, "loss": 0.17, "step": 43995 }, { "epoch": 0.7647621199742738, "grad_norm": 1.1270168095661226, "learning_rate": 1.3823851238168427e-07, "loss": 0.2001, "step": 43996 }, { "epoch": 0.7647795025117766, "grad_norm": 2.08110764729943, "learning_rate": 1.3821908145939587e-07, "loss": 0.2927, "step": 43997 }, { "epoch": 0.7647968850492795, "grad_norm": 2.055885332633348, "learning_rate": 1.3819965168378679e-07, "loss": 0.2174, "step": 43998 }, { "epoch": 0.7648142675867823, "grad_norm": 1.8543358952818607, "learning_rate": 1.381802230549186e-07, "loss": 0.1989, "step": 43999 }, { "epoch": 0.7648316501242851, "grad_norm": 0.7558094764401769, "learning_rate": 1.3816079557285287e-07, "loss": 0.2447, "step": 44000 }, { "epoch": 0.7648490326617879, "grad_norm": 2.6421412611136863, "learning_rate": 1.3814136923765106e-07, "loss": 0.2179, "step": 44001 }, { "epoch": 0.7648664151992908, "grad_norm": 2.5853572083388072, "learning_rate": 1.3812194404937512e-07, "loss": 0.3621, "step": 44002 }, { "epoch": 0.7648837977367936, "grad_norm": 1.238917700488134, "learning_rate": 1.3810252000808647e-07, "loss": 0.2114, "step": 44003 }, { "epoch": 0.7649011802742964, "grad_norm": 0.9630644796926557, "learning_rate": 1.3808309711384625e-07, "loss": 0.2625, "step": 44004 }, { "epoch": 0.7649185628117993, "grad_norm": 1.728663622701258, "learning_rate": 1.3806367536671659e-07, "loss": 0.2886, "step": 44005 }, { "epoch": 0.7649359453493021, "grad_norm": 1.6517291638011733, "learning_rate": 1.3804425476675874e-07, "loss": 0.333, "step": 44006 }, { "epoch": 0.7649533278868049, "grad_norm": 3.2194307574005427, "learning_rate": 1.3802483531403442e-07, "loss": 0.1851, "step": 44007 }, { "epoch": 0.7649707104243078, "grad_norm": 2.6832456171487746, "learning_rate": 1.38005417008605e-07, "loss": 0.3635, "step": 44008 }, { "epoch": 0.7649880929618106, "grad_norm": 1.667862964870493, "learning_rate": 1.3798599985053222e-07, "loss": 0.3023, "step": 44009 }, { "epoch": 0.7650054754993134, "grad_norm": 1.6849261959504305, "learning_rate": 1.3796658383987748e-07, "loss": 0.2834, "step": 44010 }, { "epoch": 0.7650228580368162, "grad_norm": 1.4824626455132073, "learning_rate": 1.3794716897670232e-07, "loss": 0.2046, "step": 44011 }, { "epoch": 0.7650402405743191, "grad_norm": 1.8349534741907316, "learning_rate": 1.379277552610682e-07, "loss": 0.1932, "step": 44012 }, { "epoch": 0.7650576231118219, "grad_norm": 1.3878951423130923, "learning_rate": 1.3790834269303697e-07, "loss": 0.2662, "step": 44013 }, { "epoch": 0.7650750056493247, "grad_norm": 1.4872646617860559, "learning_rate": 1.3788893127266992e-07, "loss": 0.2401, "step": 44014 }, { "epoch": 0.7650923881868276, "grad_norm": 1.5326113808562956, "learning_rate": 1.378695210000288e-07, "loss": 0.2538, "step": 44015 }, { "epoch": 0.7651097707243303, "grad_norm": 1.2404941354912404, "learning_rate": 1.378501118751746e-07, "loss": 0.3044, "step": 44016 }, { "epoch": 0.7651271532618331, "grad_norm": 1.161594414718397, "learning_rate": 1.3783070389816935e-07, "loss": 0.1894, "step": 44017 }, { "epoch": 0.765144535799336, "grad_norm": 1.3620133699948085, "learning_rate": 1.378112970690744e-07, "loss": 0.1583, "step": 44018 }, { "epoch": 0.7651619183368388, "grad_norm": 1.77373708306946, "learning_rate": 1.3779189138795128e-07, "loss": 0.2109, "step": 44019 }, { "epoch": 0.7651793008743416, "grad_norm": 1.882784247724389, "learning_rate": 1.3777248685486132e-07, "loss": 0.1672, "step": 44020 }, { "epoch": 0.7651966834118444, "grad_norm": 1.7213890283827884, "learning_rate": 1.3775308346986637e-07, "loss": 0.2019, "step": 44021 }, { "epoch": 0.7652140659493473, "grad_norm": 1.2459296125972112, "learning_rate": 1.3773368123302786e-07, "loss": 0.1766, "step": 44022 }, { "epoch": 0.7652314484868501, "grad_norm": 0.9109713128971378, "learning_rate": 1.37714280144407e-07, "loss": 0.2582, "step": 44023 }, { "epoch": 0.7652488310243529, "grad_norm": 1.1663056900030673, "learning_rate": 1.3769488020406533e-07, "loss": 0.1784, "step": 44024 }, { "epoch": 0.7652662135618558, "grad_norm": 1.3715974941446902, "learning_rate": 1.3767548141206458e-07, "loss": 0.126, "step": 44025 }, { "epoch": 0.7652835960993586, "grad_norm": 1.492415863111355, "learning_rate": 1.3765608376846615e-07, "loss": 0.2767, "step": 44026 }, { "epoch": 0.7653009786368614, "grad_norm": 1.3592828394669723, "learning_rate": 1.376366872733315e-07, "loss": 0.1962, "step": 44027 }, { "epoch": 0.7653183611743642, "grad_norm": 1.2542158099185348, "learning_rate": 1.3761729192672205e-07, "loss": 0.1834, "step": 44028 }, { "epoch": 0.7653357437118671, "grad_norm": 2.6270318595227273, "learning_rate": 1.3759789772869933e-07, "loss": 0.2633, "step": 44029 }, { "epoch": 0.7653531262493699, "grad_norm": 0.9492175655441994, "learning_rate": 1.375785046793248e-07, "loss": 0.2173, "step": 44030 }, { "epoch": 0.7653705087868727, "grad_norm": 1.3010759049490959, "learning_rate": 1.3755911277865995e-07, "loss": 0.3081, "step": 44031 }, { "epoch": 0.7653878913243756, "grad_norm": 1.6351175704711658, "learning_rate": 1.3753972202676605e-07, "loss": 0.1955, "step": 44032 }, { "epoch": 0.7654052738618784, "grad_norm": 0.960907398722121, "learning_rate": 1.3752033242370493e-07, "loss": 0.1518, "step": 44033 }, { "epoch": 0.7654226563993812, "grad_norm": 1.6276646034494746, "learning_rate": 1.3750094396953793e-07, "loss": 0.2198, "step": 44034 }, { "epoch": 0.7654400389368841, "grad_norm": 1.5253784014939742, "learning_rate": 1.3748155666432616e-07, "loss": 0.2637, "step": 44035 }, { "epoch": 0.7654574214743868, "grad_norm": 1.586463219289468, "learning_rate": 1.374621705081314e-07, "loss": 0.2161, "step": 44036 }, { "epoch": 0.7654748040118896, "grad_norm": 1.6467092773188632, "learning_rate": 1.3744278550101513e-07, "loss": 0.2588, "step": 44037 }, { "epoch": 0.7654921865493924, "grad_norm": 1.6930174602604238, "learning_rate": 1.374234016430386e-07, "loss": 0.1713, "step": 44038 }, { "epoch": 0.7655095690868953, "grad_norm": 1.6812837278907493, "learning_rate": 1.374040189342634e-07, "loss": 0.2415, "step": 44039 }, { "epoch": 0.7655269516243981, "grad_norm": 1.540477601550334, "learning_rate": 1.373846373747507e-07, "loss": 0.2765, "step": 44040 }, { "epoch": 0.7655443341619009, "grad_norm": 1.786312422690581, "learning_rate": 1.3736525696456246e-07, "loss": 0.2833, "step": 44041 }, { "epoch": 0.7655617166994038, "grad_norm": 1.9122098830541352, "learning_rate": 1.373458777037596e-07, "loss": 0.2178, "step": 44042 }, { "epoch": 0.7655790992369066, "grad_norm": 2.9028642125421653, "learning_rate": 1.373264995924036e-07, "loss": 0.1478, "step": 44043 }, { "epoch": 0.7655964817744094, "grad_norm": 1.4221878295032735, "learning_rate": 1.373071226305561e-07, "loss": 0.1827, "step": 44044 }, { "epoch": 0.7656138643119123, "grad_norm": 2.3632177780917663, "learning_rate": 1.372877468182785e-07, "loss": 0.1966, "step": 44045 }, { "epoch": 0.7656312468494151, "grad_norm": 1.7081783563682493, "learning_rate": 1.3726837215563202e-07, "loss": 0.317, "step": 44046 }, { "epoch": 0.7656486293869179, "grad_norm": 1.9679251506702928, "learning_rate": 1.3724899864267826e-07, "loss": 0.2284, "step": 44047 }, { "epoch": 0.7656660119244207, "grad_norm": 1.5257822049223881, "learning_rate": 1.3722962627947848e-07, "loss": 0.2573, "step": 44048 }, { "epoch": 0.7656833944619236, "grad_norm": 2.2222728864244305, "learning_rate": 1.372102550660942e-07, "loss": 0.3624, "step": 44049 }, { "epoch": 0.7657007769994264, "grad_norm": 1.0993151166331039, "learning_rate": 1.371908850025867e-07, "loss": 0.212, "step": 44050 }, { "epoch": 0.7657181595369292, "grad_norm": 1.3457110292512993, "learning_rate": 1.3717151608901728e-07, "loss": 0.1931, "step": 44051 }, { "epoch": 0.7657355420744321, "grad_norm": 1.4058536219540825, "learning_rate": 1.371521483254477e-07, "loss": 0.1952, "step": 44052 }, { "epoch": 0.7657529246119349, "grad_norm": 1.4796622860927509, "learning_rate": 1.371327817119392e-07, "loss": 0.3075, "step": 44053 }, { "epoch": 0.7657703071494377, "grad_norm": 1.4504922962475468, "learning_rate": 1.37113416248553e-07, "loss": 0.17, "step": 44054 }, { "epoch": 0.7657876896869406, "grad_norm": 2.470906294155716, "learning_rate": 1.3709405193535045e-07, "loss": 0.2372, "step": 44055 }, { "epoch": 0.7658050722244433, "grad_norm": 1.4309630217759113, "learning_rate": 1.3707468877239315e-07, "loss": 0.1914, "step": 44056 }, { "epoch": 0.7658224547619461, "grad_norm": 1.8388978372235816, "learning_rate": 1.3705532675974236e-07, "loss": 0.3222, "step": 44057 }, { "epoch": 0.7658398372994489, "grad_norm": 0.7057063165521646, "learning_rate": 1.3703596589745948e-07, "loss": 0.13, "step": 44058 }, { "epoch": 0.7658572198369518, "grad_norm": 1.4369160328238493, "learning_rate": 1.3701660618560574e-07, "loss": 0.1559, "step": 44059 }, { "epoch": 0.7658746023744546, "grad_norm": 1.3172798656952927, "learning_rate": 1.3699724762424286e-07, "loss": 0.3093, "step": 44060 }, { "epoch": 0.7658919849119574, "grad_norm": 1.0834886475477024, "learning_rate": 1.3697789021343186e-07, "loss": 0.228, "step": 44061 }, { "epoch": 0.7659093674494603, "grad_norm": 1.5291480934688872, "learning_rate": 1.3695853395323415e-07, "loss": 0.3061, "step": 44062 }, { "epoch": 0.7659267499869631, "grad_norm": 1.1385096531360883, "learning_rate": 1.3693917884371098e-07, "loss": 0.189, "step": 44063 }, { "epoch": 0.7659441325244659, "grad_norm": 1.1534152366028219, "learning_rate": 1.36919824884924e-07, "loss": 0.2984, "step": 44064 }, { "epoch": 0.7659615150619687, "grad_norm": 1.7833023704408006, "learning_rate": 1.369004720769344e-07, "loss": 0.2392, "step": 44065 }, { "epoch": 0.7659788975994716, "grad_norm": 2.4212413632490546, "learning_rate": 1.3688112041980348e-07, "loss": 0.2078, "step": 44066 }, { "epoch": 0.7659962801369744, "grad_norm": 2.4024522602779323, "learning_rate": 1.3686176991359262e-07, "loss": 0.267, "step": 44067 }, { "epoch": 0.7660136626744772, "grad_norm": 1.5034051475671526, "learning_rate": 1.368424205583631e-07, "loss": 0.3627, "step": 44068 }, { "epoch": 0.7660310452119801, "grad_norm": 1.73941602244975, "learning_rate": 1.3682307235417635e-07, "loss": 0.1838, "step": 44069 }, { "epoch": 0.7660484277494829, "grad_norm": 1.2929231431855597, "learning_rate": 1.3680372530109342e-07, "loss": 0.2061, "step": 44070 }, { "epoch": 0.7660658102869857, "grad_norm": 2.045516976957719, "learning_rate": 1.3678437939917604e-07, "loss": 0.2126, "step": 44071 }, { "epoch": 0.7660831928244886, "grad_norm": 2.1565994463412554, "learning_rate": 1.3676503464848544e-07, "loss": 0.4905, "step": 44072 }, { "epoch": 0.7661005753619914, "grad_norm": 2.1223773921594677, "learning_rate": 1.367456910490827e-07, "loss": 0.2078, "step": 44073 }, { "epoch": 0.7661179578994942, "grad_norm": 2.5514391798750595, "learning_rate": 1.367263486010291e-07, "loss": 0.2067, "step": 44074 }, { "epoch": 0.766135340436997, "grad_norm": 1.7278254598784863, "learning_rate": 1.3670700730438622e-07, "loss": 0.2141, "step": 44075 }, { "epoch": 0.7661527229744998, "grad_norm": 1.192153839531601, "learning_rate": 1.3668766715921527e-07, "loss": 0.1878, "step": 44076 }, { "epoch": 0.7661701055120026, "grad_norm": 1.9479945434374883, "learning_rate": 1.3666832816557754e-07, "loss": 0.1207, "step": 44077 }, { "epoch": 0.7661874880495054, "grad_norm": 1.1407546715927637, "learning_rate": 1.3664899032353412e-07, "loss": 0.1652, "step": 44078 }, { "epoch": 0.7662048705870083, "grad_norm": 1.3111527273798589, "learning_rate": 1.3662965363314682e-07, "loss": 0.2853, "step": 44079 }, { "epoch": 0.7662222531245111, "grad_norm": 1.6767425916282204, "learning_rate": 1.3661031809447643e-07, "loss": 0.2525, "step": 44080 }, { "epoch": 0.7662396356620139, "grad_norm": 1.7149716051864423, "learning_rate": 1.3659098370758443e-07, "loss": 0.2131, "step": 44081 }, { "epoch": 0.7662570181995167, "grad_norm": 2.1370257103681847, "learning_rate": 1.3657165047253193e-07, "loss": 0.2269, "step": 44082 }, { "epoch": 0.7662744007370196, "grad_norm": 1.4477081607820295, "learning_rate": 1.3655231838938047e-07, "loss": 0.1622, "step": 44083 }, { "epoch": 0.7662917832745224, "grad_norm": 1.8954838126390008, "learning_rate": 1.3653298745819125e-07, "loss": 0.3335, "step": 44084 }, { "epoch": 0.7663091658120252, "grad_norm": 1.6079373785534856, "learning_rate": 1.3651365767902545e-07, "loss": 0.1991, "step": 44085 }, { "epoch": 0.7663265483495281, "grad_norm": 1.8983644857532502, "learning_rate": 1.364943290519444e-07, "loss": 0.2353, "step": 44086 }, { "epoch": 0.7663439308870309, "grad_norm": 1.571463330350901, "learning_rate": 1.3647500157700937e-07, "loss": 0.2393, "step": 44087 }, { "epoch": 0.7663613134245337, "grad_norm": 1.4159173121501754, "learning_rate": 1.3645567525428154e-07, "loss": 0.283, "step": 44088 }, { "epoch": 0.7663786959620366, "grad_norm": 2.337511911301844, "learning_rate": 1.3643635008382227e-07, "loss": 0.1415, "step": 44089 }, { "epoch": 0.7663960784995394, "grad_norm": 1.0938912057462433, "learning_rate": 1.364170260656926e-07, "loss": 0.2397, "step": 44090 }, { "epoch": 0.7664134610370422, "grad_norm": 1.2393558021624556, "learning_rate": 1.3639770319995426e-07, "loss": 0.1603, "step": 44091 }, { "epoch": 0.766430843574545, "grad_norm": 2.967474270785797, "learning_rate": 1.3637838148666798e-07, "loss": 0.2927, "step": 44092 }, { "epoch": 0.7664482261120479, "grad_norm": 1.863710402466233, "learning_rate": 1.3635906092589507e-07, "loss": 0.3277, "step": 44093 }, { "epoch": 0.7664656086495507, "grad_norm": 1.9608619218684507, "learning_rate": 1.36339741517697e-07, "loss": 0.1881, "step": 44094 }, { "epoch": 0.7664829911870534, "grad_norm": 1.2070972852005017, "learning_rate": 1.36320423262135e-07, "loss": 0.2951, "step": 44095 }, { "epoch": 0.7665003737245563, "grad_norm": 1.2815836578420874, "learning_rate": 1.363011061592701e-07, "loss": 0.2438, "step": 44096 }, { "epoch": 0.7665177562620591, "grad_norm": 0.8563386876431323, "learning_rate": 1.362817902091637e-07, "loss": 0.1368, "step": 44097 }, { "epoch": 0.7665351387995619, "grad_norm": 1.8996702441242923, "learning_rate": 1.362624754118769e-07, "loss": 0.3623, "step": 44098 }, { "epoch": 0.7665525213370648, "grad_norm": 1.0733856261854104, "learning_rate": 1.3624316176747097e-07, "loss": 0.1741, "step": 44099 }, { "epoch": 0.7665699038745676, "grad_norm": 1.2855764245593204, "learning_rate": 1.362238492760071e-07, "loss": 0.283, "step": 44100 }, { "epoch": 0.7665872864120704, "grad_norm": 1.00381609532173, "learning_rate": 1.362045379375464e-07, "loss": 0.2989, "step": 44101 }, { "epoch": 0.7666046689495732, "grad_norm": 1.4280013145516606, "learning_rate": 1.3618522775215036e-07, "loss": 0.1581, "step": 44102 }, { "epoch": 0.7666220514870761, "grad_norm": 1.3087406502878554, "learning_rate": 1.3616591871988004e-07, "loss": 0.1943, "step": 44103 }, { "epoch": 0.7666394340245789, "grad_norm": 1.586405371234551, "learning_rate": 1.3614661084079672e-07, "loss": 0.2327, "step": 44104 }, { "epoch": 0.7666568165620817, "grad_norm": 1.6831121294266684, "learning_rate": 1.3612730411496127e-07, "loss": 0.3279, "step": 44105 }, { "epoch": 0.7666741990995846, "grad_norm": 1.9863629619141925, "learning_rate": 1.361079985424352e-07, "loss": 0.1913, "step": 44106 }, { "epoch": 0.7666915816370874, "grad_norm": 1.065675951295626, "learning_rate": 1.360886941232796e-07, "loss": 0.2638, "step": 44107 }, { "epoch": 0.7667089641745902, "grad_norm": 1.0590911062736004, "learning_rate": 1.3606939085755575e-07, "loss": 0.2223, "step": 44108 }, { "epoch": 0.766726346712093, "grad_norm": 1.535469397829819, "learning_rate": 1.360500887453246e-07, "loss": 0.2155, "step": 44109 }, { "epoch": 0.7667437292495959, "grad_norm": 1.5823509090796406, "learning_rate": 1.3603078778664773e-07, "loss": 0.1891, "step": 44110 }, { "epoch": 0.7667611117870987, "grad_norm": 2.2725238147546225, "learning_rate": 1.3601148798158596e-07, "loss": 0.3012, "step": 44111 }, { "epoch": 0.7667784943246015, "grad_norm": 1.408602582504922, "learning_rate": 1.3599218933020056e-07, "loss": 0.2272, "step": 44112 }, { "epoch": 0.7667958768621044, "grad_norm": 1.2949098009335553, "learning_rate": 1.3597289183255257e-07, "loss": 0.1752, "step": 44113 }, { "epoch": 0.7668132593996072, "grad_norm": 1.7122362933098074, "learning_rate": 1.3595359548870338e-07, "loss": 0.2463, "step": 44114 }, { "epoch": 0.7668306419371099, "grad_norm": 1.233926840707795, "learning_rate": 1.3593430029871412e-07, "loss": 0.1642, "step": 44115 }, { "epoch": 0.7668480244746128, "grad_norm": 1.0139524093124774, "learning_rate": 1.3591500626264592e-07, "loss": 0.1641, "step": 44116 }, { "epoch": 0.7668654070121156, "grad_norm": 1.9598785119897972, "learning_rate": 1.3589571338055984e-07, "loss": 0.2323, "step": 44117 }, { "epoch": 0.7668827895496184, "grad_norm": 1.1973525285603244, "learning_rate": 1.358764216525171e-07, "loss": 0.1335, "step": 44118 }, { "epoch": 0.7669001720871212, "grad_norm": 1.4961929537568388, "learning_rate": 1.3585713107857887e-07, "loss": 0.2062, "step": 44119 }, { "epoch": 0.7669175546246241, "grad_norm": 1.992524200805277, "learning_rate": 1.3583784165880625e-07, "loss": 0.2799, "step": 44120 }, { "epoch": 0.7669349371621269, "grad_norm": 1.4244143808737562, "learning_rate": 1.358185533932602e-07, "loss": 0.1769, "step": 44121 }, { "epoch": 0.7669523196996297, "grad_norm": 0.9487627160123477, "learning_rate": 1.3579926628200228e-07, "loss": 0.2883, "step": 44122 }, { "epoch": 0.7669697022371326, "grad_norm": 3.437137549313105, "learning_rate": 1.3577998032509346e-07, "loss": 0.1746, "step": 44123 }, { "epoch": 0.7669870847746354, "grad_norm": 1.1217894254832026, "learning_rate": 1.357606955225945e-07, "loss": 0.1799, "step": 44124 }, { "epoch": 0.7670044673121382, "grad_norm": 1.5313898300718987, "learning_rate": 1.35741411874567e-07, "loss": 0.2464, "step": 44125 }, { "epoch": 0.7670218498496411, "grad_norm": 1.5458351901950118, "learning_rate": 1.357221293810719e-07, "loss": 0.2651, "step": 44126 }, { "epoch": 0.7670392323871439, "grad_norm": 1.8639349271918066, "learning_rate": 1.3570284804217031e-07, "loss": 0.3848, "step": 44127 }, { "epoch": 0.7670566149246467, "grad_norm": 2.6071093559189356, "learning_rate": 1.3568356785792333e-07, "loss": 0.2473, "step": 44128 }, { "epoch": 0.7670739974621495, "grad_norm": 1.2267285567246378, "learning_rate": 1.3566428882839208e-07, "loss": 0.1393, "step": 44129 }, { "epoch": 0.7670913799996524, "grad_norm": 2.0564643267295986, "learning_rate": 1.3564501095363766e-07, "loss": 0.1531, "step": 44130 }, { "epoch": 0.7671087625371552, "grad_norm": 1.330163330213115, "learning_rate": 1.3562573423372126e-07, "loss": 0.1574, "step": 44131 }, { "epoch": 0.767126145074658, "grad_norm": 1.870099842117936, "learning_rate": 1.3560645866870368e-07, "loss": 0.2191, "step": 44132 }, { "epoch": 0.7671435276121609, "grad_norm": 2.0569879424944633, "learning_rate": 1.355871842586464e-07, "loss": 0.2536, "step": 44133 }, { "epoch": 0.7671609101496637, "grad_norm": 3.797324398436772, "learning_rate": 1.3556791100361038e-07, "loss": 0.3147, "step": 44134 }, { "epoch": 0.7671782926871664, "grad_norm": 1.5910306086407877, "learning_rate": 1.355486389036568e-07, "loss": 0.2142, "step": 44135 }, { "epoch": 0.7671956752246692, "grad_norm": 1.1933858764688074, "learning_rate": 1.3552936795884628e-07, "loss": 0.1886, "step": 44136 }, { "epoch": 0.7672130577621721, "grad_norm": 1.433572560430375, "learning_rate": 1.3551009816924048e-07, "loss": 0.2266, "step": 44137 }, { "epoch": 0.7672304402996749, "grad_norm": 1.4239119820668402, "learning_rate": 1.3549082953490016e-07, "loss": 0.3201, "step": 44138 }, { "epoch": 0.7672478228371777, "grad_norm": 1.4884002866071453, "learning_rate": 1.354715620558865e-07, "loss": 0.1728, "step": 44139 }, { "epoch": 0.7672652053746806, "grad_norm": 3.2348849069676513, "learning_rate": 1.3545229573226035e-07, "loss": 0.2186, "step": 44140 }, { "epoch": 0.7672825879121834, "grad_norm": 2.6677752152374437, "learning_rate": 1.3543303056408317e-07, "loss": 0.3322, "step": 44141 }, { "epoch": 0.7672999704496862, "grad_norm": 1.0166317629326567, "learning_rate": 1.3541376655141596e-07, "loss": 0.1595, "step": 44142 }, { "epoch": 0.7673173529871891, "grad_norm": 1.2946240228399417, "learning_rate": 1.3539450369431942e-07, "loss": 0.232, "step": 44143 }, { "epoch": 0.7673347355246919, "grad_norm": 1.3182596605490218, "learning_rate": 1.3537524199285478e-07, "loss": 0.2139, "step": 44144 }, { "epoch": 0.7673521180621947, "grad_norm": 1.3228947738802228, "learning_rate": 1.3535598144708315e-07, "loss": 0.2197, "step": 44145 }, { "epoch": 0.7673695005996976, "grad_norm": 1.1684677273474176, "learning_rate": 1.3533672205706566e-07, "loss": 0.1301, "step": 44146 }, { "epoch": 0.7673868831372004, "grad_norm": 2.5433951314996803, "learning_rate": 1.3531746382286319e-07, "loss": 0.3472, "step": 44147 }, { "epoch": 0.7674042656747032, "grad_norm": 1.3524490800554578, "learning_rate": 1.3529820674453686e-07, "loss": 0.257, "step": 44148 }, { "epoch": 0.767421648212206, "grad_norm": 1.3474387353957875, "learning_rate": 1.3527895082214768e-07, "loss": 0.1408, "step": 44149 }, { "epoch": 0.7674390307497089, "grad_norm": 1.491286532417836, "learning_rate": 1.3525969605575665e-07, "loss": 0.1277, "step": 44150 }, { "epoch": 0.7674564132872117, "grad_norm": 0.7876553607156732, "learning_rate": 1.3524044244542488e-07, "loss": 0.3123, "step": 44151 }, { "epoch": 0.7674737958247145, "grad_norm": 0.894732972797584, "learning_rate": 1.352211899912132e-07, "loss": 0.1413, "step": 44152 }, { "epoch": 0.7674911783622174, "grad_norm": 1.2768967430038711, "learning_rate": 1.3520193869318292e-07, "loss": 0.2752, "step": 44153 }, { "epoch": 0.7675085608997202, "grad_norm": 2.65469361930855, "learning_rate": 1.3518268855139503e-07, "loss": 0.2147, "step": 44154 }, { "epoch": 0.7675259434372229, "grad_norm": 1.3468431109568628, "learning_rate": 1.3516343956591018e-07, "loss": 0.2104, "step": 44155 }, { "epoch": 0.7675433259747257, "grad_norm": 1.8206437356026535, "learning_rate": 1.3514419173678982e-07, "loss": 0.225, "step": 44156 }, { "epoch": 0.7675607085122286, "grad_norm": 1.6308555131978086, "learning_rate": 1.3512494506409472e-07, "loss": 0.217, "step": 44157 }, { "epoch": 0.7675780910497314, "grad_norm": 1.0263757601686927, "learning_rate": 1.351056995478859e-07, "loss": 0.3564, "step": 44158 }, { "epoch": 0.7675954735872342, "grad_norm": 1.7012643766120268, "learning_rate": 1.350864551882245e-07, "loss": 0.1572, "step": 44159 }, { "epoch": 0.7676128561247371, "grad_norm": 1.3577587100789237, "learning_rate": 1.3506721198517118e-07, "loss": 0.1654, "step": 44160 }, { "epoch": 0.7676302386622399, "grad_norm": 3.7650758871392105, "learning_rate": 1.3504796993878747e-07, "loss": 0.2029, "step": 44161 }, { "epoch": 0.7676476211997427, "grad_norm": 1.1838750258553126, "learning_rate": 1.3502872904913393e-07, "loss": 0.1588, "step": 44162 }, { "epoch": 0.7676650037372456, "grad_norm": 1.525611370591988, "learning_rate": 1.3500948931627148e-07, "loss": 0.257, "step": 44163 }, { "epoch": 0.7676823862747484, "grad_norm": 2.7194367718916523, "learning_rate": 1.3499025074026143e-07, "loss": 0.2262, "step": 44164 }, { "epoch": 0.7676997688122512, "grad_norm": 1.3901640067946375, "learning_rate": 1.349710133211646e-07, "loss": 0.3255, "step": 44165 }, { "epoch": 0.767717151349754, "grad_norm": 1.6843780929603487, "learning_rate": 1.3495177705904205e-07, "loss": 0.2105, "step": 44166 }, { "epoch": 0.7677345338872569, "grad_norm": 0.8982552939477267, "learning_rate": 1.3493254195395464e-07, "loss": 0.2323, "step": 44167 }, { "epoch": 0.7677519164247597, "grad_norm": 1.149503790889552, "learning_rate": 1.3491330800596334e-07, "loss": 0.2008, "step": 44168 }, { "epoch": 0.7677692989622625, "grad_norm": 1.9587672365817834, "learning_rate": 1.348940752151292e-07, "loss": 0.3186, "step": 44169 }, { "epoch": 0.7677866814997654, "grad_norm": 1.666612014039725, "learning_rate": 1.3487484358151314e-07, "loss": 0.1699, "step": 44170 }, { "epoch": 0.7678040640372682, "grad_norm": 1.7994772452045609, "learning_rate": 1.348556131051759e-07, "loss": 0.2114, "step": 44171 }, { "epoch": 0.767821446574771, "grad_norm": 1.2424788330454564, "learning_rate": 1.3483638378617878e-07, "loss": 0.1511, "step": 44172 }, { "epoch": 0.7678388291122739, "grad_norm": 1.1963429413837128, "learning_rate": 1.3481715562458273e-07, "loss": 0.1526, "step": 44173 }, { "epoch": 0.7678562116497767, "grad_norm": 1.2641659413317021, "learning_rate": 1.3479792862044826e-07, "loss": 0.1865, "step": 44174 }, { "epoch": 0.7678735941872794, "grad_norm": 1.729314488252878, "learning_rate": 1.347787027738368e-07, "loss": 0.3726, "step": 44175 }, { "epoch": 0.7678909767247822, "grad_norm": 1.7678942875240349, "learning_rate": 1.3475947808480902e-07, "loss": 0.1908, "step": 44176 }, { "epoch": 0.7679083592622851, "grad_norm": 1.0126526642440896, "learning_rate": 1.347402545534259e-07, "loss": 0.2669, "step": 44177 }, { "epoch": 0.7679257417997879, "grad_norm": 2.21031254399875, "learning_rate": 1.3472103217974845e-07, "loss": 0.2565, "step": 44178 }, { "epoch": 0.7679431243372907, "grad_norm": 1.866907441245473, "learning_rate": 1.3470181096383732e-07, "loss": 0.4816, "step": 44179 }, { "epoch": 0.7679605068747936, "grad_norm": 1.1107247706269892, "learning_rate": 1.3468259090575397e-07, "loss": 0.113, "step": 44180 }, { "epoch": 0.7679778894122964, "grad_norm": 1.097114356946521, "learning_rate": 1.3466337200555888e-07, "loss": 0.1947, "step": 44181 }, { "epoch": 0.7679952719497992, "grad_norm": 2.141366295268684, "learning_rate": 1.3464415426331287e-07, "loss": 0.192, "step": 44182 }, { "epoch": 0.768012654487302, "grad_norm": 1.0412394046967932, "learning_rate": 1.3462493767907723e-07, "loss": 0.21, "step": 44183 }, { "epoch": 0.7680300370248049, "grad_norm": 2.1823658642296837, "learning_rate": 1.3460572225291271e-07, "loss": 0.255, "step": 44184 }, { "epoch": 0.7680474195623077, "grad_norm": 2.1766241122483487, "learning_rate": 1.345865079848802e-07, "loss": 0.3358, "step": 44185 }, { "epoch": 0.7680648020998105, "grad_norm": 1.4994385272856552, "learning_rate": 1.345672948750406e-07, "loss": 0.4008, "step": 44186 }, { "epoch": 0.7680821846373134, "grad_norm": 2.5051349276621218, "learning_rate": 1.3454808292345483e-07, "loss": 0.2365, "step": 44187 }, { "epoch": 0.7680995671748162, "grad_norm": 1.8055281949200197, "learning_rate": 1.345288721301837e-07, "loss": 0.2493, "step": 44188 }, { "epoch": 0.768116949712319, "grad_norm": 1.1487526871910148, "learning_rate": 1.3450966249528822e-07, "loss": 0.2304, "step": 44189 }, { "epoch": 0.7681343322498219, "grad_norm": 1.7367253856318996, "learning_rate": 1.3449045401882903e-07, "loss": 0.2021, "step": 44190 }, { "epoch": 0.7681517147873247, "grad_norm": 1.2704121320069461, "learning_rate": 1.344712467008674e-07, "loss": 0.2128, "step": 44191 }, { "epoch": 0.7681690973248275, "grad_norm": 1.502071148641992, "learning_rate": 1.3445204054146415e-07, "loss": 0.2317, "step": 44192 }, { "epoch": 0.7681864798623304, "grad_norm": 2.0463642660959933, "learning_rate": 1.344328355406798e-07, "loss": 0.1575, "step": 44193 }, { "epoch": 0.7682038623998332, "grad_norm": 1.854160969525442, "learning_rate": 1.3441363169857535e-07, "loss": 0.2102, "step": 44194 }, { "epoch": 0.7682212449373359, "grad_norm": 1.6061463689459472, "learning_rate": 1.3439442901521186e-07, "loss": 0.2521, "step": 44195 }, { "epoch": 0.7682386274748387, "grad_norm": 1.5061089980532947, "learning_rate": 1.3437522749065012e-07, "loss": 0.1998, "step": 44196 }, { "epoch": 0.7682560100123416, "grad_norm": 1.3042572705895172, "learning_rate": 1.3435602712495092e-07, "loss": 0.2511, "step": 44197 }, { "epoch": 0.7682733925498444, "grad_norm": 1.388209904419101, "learning_rate": 1.3433682791817495e-07, "loss": 0.1803, "step": 44198 }, { "epoch": 0.7682907750873472, "grad_norm": 1.3373166383333994, "learning_rate": 1.343176298703837e-07, "loss": 0.2546, "step": 44199 }, { "epoch": 0.76830815762485, "grad_norm": 1.7688829074032473, "learning_rate": 1.3429843298163734e-07, "loss": 0.2143, "step": 44200 }, { "epoch": 0.7683255401623529, "grad_norm": 2.6803738820550493, "learning_rate": 1.3427923725199698e-07, "loss": 0.2158, "step": 44201 }, { "epoch": 0.7683429226998557, "grad_norm": 0.99046012608589, "learning_rate": 1.342600426815233e-07, "loss": 0.2392, "step": 44202 }, { "epoch": 0.7683603052373585, "grad_norm": 1.9748778262017193, "learning_rate": 1.342408492702774e-07, "loss": 0.2619, "step": 44203 }, { "epoch": 0.7683776877748614, "grad_norm": 2.18194174885275, "learning_rate": 1.3422165701832e-07, "loss": 0.3, "step": 44204 }, { "epoch": 0.7683950703123642, "grad_norm": 3.1457696354065563, "learning_rate": 1.3420246592571195e-07, "loss": 0.1876, "step": 44205 }, { "epoch": 0.768412452849867, "grad_norm": 3.839554831230247, "learning_rate": 1.34183275992514e-07, "loss": 0.2892, "step": 44206 }, { "epoch": 0.7684298353873699, "grad_norm": 1.1849614181716597, "learning_rate": 1.3416408721878703e-07, "loss": 0.2492, "step": 44207 }, { "epoch": 0.7684472179248727, "grad_norm": 1.178326013671456, "learning_rate": 1.3414489960459191e-07, "loss": 0.2566, "step": 44208 }, { "epoch": 0.7684646004623755, "grad_norm": 1.6611395288054784, "learning_rate": 1.3412571314998932e-07, "loss": 0.199, "step": 44209 }, { "epoch": 0.7684819829998784, "grad_norm": 1.1226517932361686, "learning_rate": 1.3410652785504006e-07, "loss": 0.2037, "step": 44210 }, { "epoch": 0.7684993655373812, "grad_norm": 0.7978410190757128, "learning_rate": 1.3408734371980534e-07, "loss": 0.2281, "step": 44211 }, { "epoch": 0.768516748074884, "grad_norm": 1.3699052489563084, "learning_rate": 1.3406816074434545e-07, "loss": 0.2535, "step": 44212 }, { "epoch": 0.7685341306123868, "grad_norm": 1.3602506466836592, "learning_rate": 1.3404897892872124e-07, "loss": 0.3204, "step": 44213 }, { "epoch": 0.7685515131498897, "grad_norm": 0.9284058047953704, "learning_rate": 1.3402979827299387e-07, "loss": 0.2071, "step": 44214 }, { "epoch": 0.7685688956873924, "grad_norm": 1.2791923615720693, "learning_rate": 1.3401061877722392e-07, "loss": 0.2532, "step": 44215 }, { "epoch": 0.7685862782248952, "grad_norm": 1.5439879728281303, "learning_rate": 1.3399144044147215e-07, "loss": 0.2246, "step": 44216 }, { "epoch": 0.768603660762398, "grad_norm": 2.698443394576258, "learning_rate": 1.3397226326579942e-07, "loss": 0.1637, "step": 44217 }, { "epoch": 0.7686210432999009, "grad_norm": 1.3902099363988751, "learning_rate": 1.3395308725026645e-07, "loss": 0.1997, "step": 44218 }, { "epoch": 0.7686384258374037, "grad_norm": 1.9039748536093914, "learning_rate": 1.3393391239493402e-07, "loss": 0.2483, "step": 44219 }, { "epoch": 0.7686558083749065, "grad_norm": 1.4778480205036808, "learning_rate": 1.3391473869986298e-07, "loss": 0.2266, "step": 44220 }, { "epoch": 0.7686731909124094, "grad_norm": 1.6871041954021704, "learning_rate": 1.338955661651139e-07, "loss": 0.2822, "step": 44221 }, { "epoch": 0.7686905734499122, "grad_norm": 1.1547108198303797, "learning_rate": 1.338763947907478e-07, "loss": 0.1499, "step": 44222 }, { "epoch": 0.768707955987415, "grad_norm": 2.4079361414831206, "learning_rate": 1.338572245768254e-07, "loss": 0.271, "step": 44223 }, { "epoch": 0.7687253385249179, "grad_norm": 1.418704868869588, "learning_rate": 1.3383805552340754e-07, "loss": 0.2503, "step": 44224 }, { "epoch": 0.7687427210624207, "grad_norm": 1.8615658830516806, "learning_rate": 1.3381888763055454e-07, "loss": 0.3322, "step": 44225 }, { "epoch": 0.7687601035999235, "grad_norm": 3.0265991936018013, "learning_rate": 1.3379972089832758e-07, "loss": 0.179, "step": 44226 }, { "epoch": 0.7687774861374264, "grad_norm": 1.2800664872149603, "learning_rate": 1.3378055532678735e-07, "loss": 0.1631, "step": 44227 }, { "epoch": 0.7687948686749292, "grad_norm": 1.4900792169069559, "learning_rate": 1.3376139091599447e-07, "loss": 0.1579, "step": 44228 }, { "epoch": 0.768812251212432, "grad_norm": 1.504515815303541, "learning_rate": 1.3374222766600963e-07, "loss": 0.1738, "step": 44229 }, { "epoch": 0.7688296337499348, "grad_norm": 1.5781644720601782, "learning_rate": 1.3372306557689395e-07, "loss": 0.306, "step": 44230 }, { "epoch": 0.7688470162874377, "grad_norm": 2.579953356116052, "learning_rate": 1.337039046487078e-07, "loss": 0.3123, "step": 44231 }, { "epoch": 0.7688643988249405, "grad_norm": 1.1639522299915765, "learning_rate": 1.3368474488151195e-07, "loss": 0.2321, "step": 44232 }, { "epoch": 0.7688817813624433, "grad_norm": 1.1412121376680082, "learning_rate": 1.3366558627536706e-07, "loss": 0.2051, "step": 44233 }, { "epoch": 0.7688991638999461, "grad_norm": 1.0797385495068772, "learning_rate": 1.336464288303341e-07, "loss": 0.1731, "step": 44234 }, { "epoch": 0.7689165464374489, "grad_norm": 2.201572405307443, "learning_rate": 1.3362727254647378e-07, "loss": 0.1494, "step": 44235 }, { "epoch": 0.7689339289749517, "grad_norm": 0.871973258936733, "learning_rate": 1.336081174238466e-07, "loss": 0.1878, "step": 44236 }, { "epoch": 0.7689513115124545, "grad_norm": 2.561035935641358, "learning_rate": 1.335889634625134e-07, "loss": 0.2574, "step": 44237 }, { "epoch": 0.7689686940499574, "grad_norm": 3.6841561009906036, "learning_rate": 1.3356981066253493e-07, "loss": 0.2685, "step": 44238 }, { "epoch": 0.7689860765874602, "grad_norm": 1.3980547841379154, "learning_rate": 1.3355065902397178e-07, "loss": 0.124, "step": 44239 }, { "epoch": 0.769003459124963, "grad_norm": 1.1153055873669384, "learning_rate": 1.3353150854688473e-07, "loss": 0.1159, "step": 44240 }, { "epoch": 0.7690208416624659, "grad_norm": 1.7114225275824515, "learning_rate": 1.3351235923133424e-07, "loss": 0.2372, "step": 44241 }, { "epoch": 0.7690382241999687, "grad_norm": 1.8254434183162682, "learning_rate": 1.3349321107738148e-07, "loss": 0.2324, "step": 44242 }, { "epoch": 0.7690556067374715, "grad_norm": 2.1524117757466503, "learning_rate": 1.3347406408508694e-07, "loss": 0.2907, "step": 44243 }, { "epoch": 0.7690729892749744, "grad_norm": 1.3154291966971876, "learning_rate": 1.33454918254511e-07, "loss": 0.2051, "step": 44244 }, { "epoch": 0.7690903718124772, "grad_norm": 1.2166876337279502, "learning_rate": 1.3343577358571473e-07, "loss": 0.3158, "step": 44245 }, { "epoch": 0.76910775434998, "grad_norm": 0.9511654150714833, "learning_rate": 1.334166300787587e-07, "loss": 0.2076, "step": 44246 }, { "epoch": 0.7691251368874829, "grad_norm": 1.3197397617805198, "learning_rate": 1.3339748773370352e-07, "loss": 0.3285, "step": 44247 }, { "epoch": 0.7691425194249857, "grad_norm": 1.1945784132950437, "learning_rate": 1.3337834655060993e-07, "loss": 0.1268, "step": 44248 }, { "epoch": 0.7691599019624885, "grad_norm": 1.9325914311120695, "learning_rate": 1.3335920652953853e-07, "loss": 0.191, "step": 44249 }, { "epoch": 0.7691772844999913, "grad_norm": 1.0363917140824825, "learning_rate": 1.3334006767055007e-07, "loss": 0.2257, "step": 44250 }, { "epoch": 0.7691946670374942, "grad_norm": 2.584750135519202, "learning_rate": 1.333209299737051e-07, "loss": 0.3694, "step": 44251 }, { "epoch": 0.769212049574997, "grad_norm": 1.9567353633850488, "learning_rate": 1.3330179343906421e-07, "loss": 0.2419, "step": 44252 }, { "epoch": 0.7692294321124998, "grad_norm": 1.3775360864124742, "learning_rate": 1.3328265806668838e-07, "loss": 0.1885, "step": 44253 }, { "epoch": 0.7692468146500026, "grad_norm": 1.036000782235377, "learning_rate": 1.3326352385663807e-07, "loss": 0.1997, "step": 44254 }, { "epoch": 0.7692641971875054, "grad_norm": 1.491025349691903, "learning_rate": 1.3324439080897405e-07, "loss": 0.225, "step": 44255 }, { "epoch": 0.7692815797250082, "grad_norm": 1.5389730854762884, "learning_rate": 1.3322525892375652e-07, "loss": 0.2713, "step": 44256 }, { "epoch": 0.769298962262511, "grad_norm": 1.6218765521645944, "learning_rate": 1.332061282010466e-07, "loss": 0.1979, "step": 44257 }, { "epoch": 0.7693163448000139, "grad_norm": 1.669005175540916, "learning_rate": 1.3318699864090476e-07, "loss": 0.1712, "step": 44258 }, { "epoch": 0.7693337273375167, "grad_norm": 2.3022726677376886, "learning_rate": 1.3316787024339166e-07, "loss": 0.2429, "step": 44259 }, { "epoch": 0.7693511098750195, "grad_norm": 1.6550960387426628, "learning_rate": 1.3314874300856776e-07, "loss": 0.1837, "step": 44260 }, { "epoch": 0.7693684924125224, "grad_norm": 3.5322896387382756, "learning_rate": 1.3312961693649393e-07, "loss": 0.231, "step": 44261 }, { "epoch": 0.7693858749500252, "grad_norm": 1.647469611499798, "learning_rate": 1.3311049202723086e-07, "loss": 0.1809, "step": 44262 }, { "epoch": 0.769403257487528, "grad_norm": 1.2143494862830795, "learning_rate": 1.3309136828083872e-07, "loss": 0.2836, "step": 44263 }, { "epoch": 0.7694206400250309, "grad_norm": 1.5555015957073777, "learning_rate": 1.330722456973785e-07, "loss": 0.1567, "step": 44264 }, { "epoch": 0.7694380225625337, "grad_norm": 1.2629881662737124, "learning_rate": 1.3305312427691073e-07, "loss": 0.1649, "step": 44265 }, { "epoch": 0.7694554051000365, "grad_norm": 1.3403483423305902, "learning_rate": 1.3303400401949599e-07, "loss": 0.1914, "step": 44266 }, { "epoch": 0.7694727876375393, "grad_norm": 3.026158351279888, "learning_rate": 1.330148849251949e-07, "loss": 0.2865, "step": 44267 }, { "epoch": 0.7694901701750422, "grad_norm": 1.7498021724107684, "learning_rate": 1.3299576699406805e-07, "loss": 0.1626, "step": 44268 }, { "epoch": 0.769507552712545, "grad_norm": 1.6020748615849345, "learning_rate": 1.3297665022617605e-07, "loss": 0.284, "step": 44269 }, { "epoch": 0.7695249352500478, "grad_norm": 1.2867235243732973, "learning_rate": 1.3295753462157943e-07, "loss": 0.2326, "step": 44270 }, { "epoch": 0.7695423177875507, "grad_norm": 1.907866308780207, "learning_rate": 1.3293842018033867e-07, "loss": 0.151, "step": 44271 }, { "epoch": 0.7695597003250535, "grad_norm": 1.4092713923757016, "learning_rate": 1.329193069025147e-07, "loss": 0.2439, "step": 44272 }, { "epoch": 0.7695770828625563, "grad_norm": 1.3827172844351394, "learning_rate": 1.3290019478816783e-07, "loss": 0.2701, "step": 44273 }, { "epoch": 0.769594465400059, "grad_norm": 1.2852073425948403, "learning_rate": 1.328810838373589e-07, "loss": 0.2507, "step": 44274 }, { "epoch": 0.7696118479375619, "grad_norm": 0.899426357715685, "learning_rate": 1.3286197405014798e-07, "loss": 0.1532, "step": 44275 }, { "epoch": 0.7696292304750647, "grad_norm": 1.8909728035276692, "learning_rate": 1.328428654265961e-07, "loss": 0.2705, "step": 44276 }, { "epoch": 0.7696466130125675, "grad_norm": 2.5313512363970263, "learning_rate": 1.3282375796676372e-07, "loss": 0.2086, "step": 44277 }, { "epoch": 0.7696639955500704, "grad_norm": 2.6878782509373456, "learning_rate": 1.3280465167071137e-07, "loss": 0.4126, "step": 44278 }, { "epoch": 0.7696813780875732, "grad_norm": 2.809901431535756, "learning_rate": 1.3278554653849937e-07, "loss": 0.271, "step": 44279 }, { "epoch": 0.769698760625076, "grad_norm": 1.0075020939856054, "learning_rate": 1.327664425701887e-07, "loss": 0.2577, "step": 44280 }, { "epoch": 0.7697161431625789, "grad_norm": 1.3670661299976274, "learning_rate": 1.3274733976583984e-07, "loss": 0.3259, "step": 44281 }, { "epoch": 0.7697335257000817, "grad_norm": 8.056931774015663, "learning_rate": 1.327282381255131e-07, "loss": 0.3242, "step": 44282 }, { "epoch": 0.7697509082375845, "grad_norm": 0.8770478396184223, "learning_rate": 1.3270913764926899e-07, "loss": 0.1705, "step": 44283 }, { "epoch": 0.7697682907750873, "grad_norm": 1.6219407422522911, "learning_rate": 1.326900383371683e-07, "loss": 0.2023, "step": 44284 }, { "epoch": 0.7697856733125902, "grad_norm": 1.0251477722175761, "learning_rate": 1.326709401892715e-07, "loss": 0.1841, "step": 44285 }, { "epoch": 0.769803055850093, "grad_norm": 1.2304248215060447, "learning_rate": 1.3265184320563903e-07, "loss": 0.1642, "step": 44286 }, { "epoch": 0.7698204383875958, "grad_norm": 1.8222173923004008, "learning_rate": 1.3263274738633152e-07, "loss": 0.2998, "step": 44287 }, { "epoch": 0.7698378209250987, "grad_norm": 1.6766117504613238, "learning_rate": 1.3261365273140946e-07, "loss": 0.2487, "step": 44288 }, { "epoch": 0.7698552034626015, "grad_norm": 1.1648050148105777, "learning_rate": 1.3259455924093333e-07, "loss": 0.5072, "step": 44289 }, { "epoch": 0.7698725860001043, "grad_norm": 1.4757311565874247, "learning_rate": 1.3257546691496362e-07, "loss": 0.2216, "step": 44290 }, { "epoch": 0.7698899685376072, "grad_norm": 1.792511043814777, "learning_rate": 1.3255637575356082e-07, "loss": 0.2434, "step": 44291 }, { "epoch": 0.76990735107511, "grad_norm": 1.0029606892022844, "learning_rate": 1.3253728575678563e-07, "loss": 0.1632, "step": 44292 }, { "epoch": 0.7699247336126128, "grad_norm": 1.5299370882179304, "learning_rate": 1.3251819692469863e-07, "loss": 0.2898, "step": 44293 }, { "epoch": 0.7699421161501155, "grad_norm": 1.2010123406399418, "learning_rate": 1.3249910925735984e-07, "loss": 0.1368, "step": 44294 }, { "epoch": 0.7699594986876184, "grad_norm": 1.1573943455217415, "learning_rate": 1.3248002275483016e-07, "loss": 0.2025, "step": 44295 }, { "epoch": 0.7699768812251212, "grad_norm": 1.6339931180580378, "learning_rate": 1.3246093741717002e-07, "loss": 0.2808, "step": 44296 }, { "epoch": 0.769994263762624, "grad_norm": 0.8773946769413468, "learning_rate": 1.324418532444399e-07, "loss": 0.2413, "step": 44297 }, { "epoch": 0.7700116463001269, "grad_norm": 1.7092646894387147, "learning_rate": 1.3242277023670024e-07, "loss": 0.2631, "step": 44298 }, { "epoch": 0.7700290288376297, "grad_norm": 1.2632963459901712, "learning_rate": 1.3240368839401138e-07, "loss": 0.2557, "step": 44299 }, { "epoch": 0.7700464113751325, "grad_norm": 1.315420107842243, "learning_rate": 1.323846077164343e-07, "loss": 0.1976, "step": 44300 }, { "epoch": 0.7700637939126354, "grad_norm": 1.9226471983723314, "learning_rate": 1.3236552820402892e-07, "loss": 0.2835, "step": 44301 }, { "epoch": 0.7700811764501382, "grad_norm": 1.5188897940465305, "learning_rate": 1.3234644985685583e-07, "loss": 0.156, "step": 44302 }, { "epoch": 0.770098558987641, "grad_norm": 1.4792418826889533, "learning_rate": 1.323273726749758e-07, "loss": 0.1653, "step": 44303 }, { "epoch": 0.7701159415251438, "grad_norm": 2.4472177723169946, "learning_rate": 1.3230829665844907e-07, "loss": 0.3185, "step": 44304 }, { "epoch": 0.7701333240626467, "grad_norm": 0.7343280780886074, "learning_rate": 1.3228922180733615e-07, "loss": 0.174, "step": 44305 }, { "epoch": 0.7701507066001495, "grad_norm": 0.8810073340648013, "learning_rate": 1.3227014812169745e-07, "loss": 0.1987, "step": 44306 }, { "epoch": 0.7701680891376523, "grad_norm": 1.2550787929373044, "learning_rate": 1.322510756015935e-07, "loss": 0.2129, "step": 44307 }, { "epoch": 0.7701854716751552, "grad_norm": 1.2540491016890098, "learning_rate": 1.322320042470847e-07, "loss": 0.2544, "step": 44308 }, { "epoch": 0.770202854212658, "grad_norm": 2.168779988857718, "learning_rate": 1.3221293405823147e-07, "loss": 0.1323, "step": 44309 }, { "epoch": 0.7702202367501608, "grad_norm": 1.9149097808678985, "learning_rate": 1.3219386503509418e-07, "loss": 0.1377, "step": 44310 }, { "epoch": 0.7702376192876637, "grad_norm": 1.2293819390104486, "learning_rate": 1.321747971777335e-07, "loss": 0.1178, "step": 44311 }, { "epoch": 0.7702550018251665, "grad_norm": 1.8902257600060677, "learning_rate": 1.3215573048620992e-07, "loss": 0.2653, "step": 44312 }, { "epoch": 0.7702723843626693, "grad_norm": 0.970962989646761, "learning_rate": 1.3213666496058358e-07, "loss": 0.2185, "step": 44313 }, { "epoch": 0.770289766900172, "grad_norm": 1.5549050206462058, "learning_rate": 1.3211760060091482e-07, "loss": 0.2731, "step": 44314 }, { "epoch": 0.7703071494376749, "grad_norm": 2.553342381820959, "learning_rate": 1.3209853740726446e-07, "loss": 0.2483, "step": 44315 }, { "epoch": 0.7703245319751777, "grad_norm": 1.339644177990451, "learning_rate": 1.320794753796927e-07, "loss": 0.2062, "step": 44316 }, { "epoch": 0.7703419145126805, "grad_norm": 0.8796891350255861, "learning_rate": 1.3206041451826005e-07, "loss": 0.1454, "step": 44317 }, { "epoch": 0.7703592970501834, "grad_norm": 1.7898618398786743, "learning_rate": 1.320413548230267e-07, "loss": 0.2559, "step": 44318 }, { "epoch": 0.7703766795876862, "grad_norm": 1.631713896348685, "learning_rate": 1.3202229629405353e-07, "loss": 0.2249, "step": 44319 }, { "epoch": 0.770394062125189, "grad_norm": 1.1090978814296146, "learning_rate": 1.3200323893140048e-07, "loss": 0.1827, "step": 44320 }, { "epoch": 0.7704114446626918, "grad_norm": 3.8348293656168484, "learning_rate": 1.3198418273512811e-07, "loss": 0.3674, "step": 44321 }, { "epoch": 0.7704288272001947, "grad_norm": 1.2813330792008355, "learning_rate": 1.3196512770529672e-07, "loss": 0.1929, "step": 44322 }, { "epoch": 0.7704462097376975, "grad_norm": 1.2329384948450943, "learning_rate": 1.3194607384196693e-07, "loss": 0.174, "step": 44323 }, { "epoch": 0.7704635922752003, "grad_norm": 2.2105864814951897, "learning_rate": 1.3192702114519903e-07, "loss": 0.181, "step": 44324 }, { "epoch": 0.7704809748127032, "grad_norm": 1.5444221206526698, "learning_rate": 1.319079696150534e-07, "loss": 0.1839, "step": 44325 }, { "epoch": 0.770498357350206, "grad_norm": 1.38093052336776, "learning_rate": 1.318889192515904e-07, "loss": 0.1714, "step": 44326 }, { "epoch": 0.7705157398877088, "grad_norm": 1.8566135091812832, "learning_rate": 1.3186987005487043e-07, "loss": 0.2799, "step": 44327 }, { "epoch": 0.7705331224252117, "grad_norm": 2.027628663458661, "learning_rate": 1.3185082202495385e-07, "loss": 0.275, "step": 44328 }, { "epoch": 0.7705505049627145, "grad_norm": 1.1408557808566009, "learning_rate": 1.3183177516190107e-07, "loss": 0.2417, "step": 44329 }, { "epoch": 0.7705678875002173, "grad_norm": 1.001184393738248, "learning_rate": 1.3181272946577231e-07, "loss": 0.1143, "step": 44330 }, { "epoch": 0.7705852700377201, "grad_norm": 1.633609975588952, "learning_rate": 1.3179368493662835e-07, "loss": 0.2346, "step": 44331 }, { "epoch": 0.770602652575223, "grad_norm": 1.3668279434493829, "learning_rate": 1.317746415745291e-07, "loss": 0.2358, "step": 44332 }, { "epoch": 0.7706200351127258, "grad_norm": 1.5197144123944637, "learning_rate": 1.3175559937953496e-07, "loss": 0.1765, "step": 44333 }, { "epoch": 0.7706374176502285, "grad_norm": 1.7711053880487073, "learning_rate": 1.3173655835170655e-07, "loss": 0.2222, "step": 44334 }, { "epoch": 0.7706548001877314, "grad_norm": 1.2353455985282997, "learning_rate": 1.3171751849110413e-07, "loss": 0.175, "step": 44335 }, { "epoch": 0.7706721827252342, "grad_norm": 1.5205403456305606, "learning_rate": 1.3169847979778797e-07, "loss": 0.2492, "step": 44336 }, { "epoch": 0.770689565262737, "grad_norm": 1.974612125926256, "learning_rate": 1.3167944227181842e-07, "loss": 0.2537, "step": 44337 }, { "epoch": 0.7707069478002398, "grad_norm": 2.035998491467731, "learning_rate": 1.3166040591325588e-07, "loss": 0.3041, "step": 44338 }, { "epoch": 0.7707243303377427, "grad_norm": 2.2260728560027943, "learning_rate": 1.3164137072216065e-07, "loss": 0.0951, "step": 44339 }, { "epoch": 0.7707417128752455, "grad_norm": 1.1894857439453428, "learning_rate": 1.31622336698593e-07, "loss": 0.1972, "step": 44340 }, { "epoch": 0.7707590954127483, "grad_norm": 1.834218854181568, "learning_rate": 1.3160330384261327e-07, "loss": 0.2789, "step": 44341 }, { "epoch": 0.7707764779502512, "grad_norm": 1.2607111024955464, "learning_rate": 1.3158427215428192e-07, "loss": 0.253, "step": 44342 }, { "epoch": 0.770793860487754, "grad_norm": 1.5712244014957926, "learning_rate": 1.3156524163365923e-07, "loss": 0.1993, "step": 44343 }, { "epoch": 0.7708112430252568, "grad_norm": 0.827368856249739, "learning_rate": 1.315462122808056e-07, "loss": 0.1749, "step": 44344 }, { "epoch": 0.7708286255627597, "grad_norm": 1.0454655234674366, "learning_rate": 1.3152718409578094e-07, "loss": 0.1603, "step": 44345 }, { "epoch": 0.7708460081002625, "grad_norm": 1.628530039703456, "learning_rate": 1.3150815707864598e-07, "loss": 0.2756, "step": 44346 }, { "epoch": 0.7708633906377653, "grad_norm": 1.4721740274942092, "learning_rate": 1.3148913122946087e-07, "loss": 0.2284, "step": 44347 }, { "epoch": 0.7708807731752682, "grad_norm": 1.0960872383593594, "learning_rate": 1.31470106548286e-07, "loss": 0.3003, "step": 44348 }, { "epoch": 0.770898155712771, "grad_norm": 1.4390972267007025, "learning_rate": 1.3145108303518142e-07, "loss": 0.3133, "step": 44349 }, { "epoch": 0.7709155382502738, "grad_norm": 1.646968744902443, "learning_rate": 1.314320606902079e-07, "loss": 0.2643, "step": 44350 }, { "epoch": 0.7709329207877766, "grad_norm": 1.4997339330860224, "learning_rate": 1.314130395134253e-07, "loss": 0.1668, "step": 44351 }, { "epoch": 0.7709503033252795, "grad_norm": 1.3466705704515811, "learning_rate": 1.3139401950489403e-07, "loss": 0.2136, "step": 44352 }, { "epoch": 0.7709676858627823, "grad_norm": 1.806965261166142, "learning_rate": 1.3137500066467427e-07, "loss": 0.1768, "step": 44353 }, { "epoch": 0.770985068400285, "grad_norm": 1.2818003146648387, "learning_rate": 1.3135598299282658e-07, "loss": 0.221, "step": 44354 }, { "epoch": 0.7710024509377879, "grad_norm": 1.8223858990319828, "learning_rate": 1.3133696648941107e-07, "loss": 0.2306, "step": 44355 }, { "epoch": 0.7710198334752907, "grad_norm": 2.534377130223765, "learning_rate": 1.3131795115448803e-07, "loss": 0.276, "step": 44356 }, { "epoch": 0.7710372160127935, "grad_norm": 0.9863738868124495, "learning_rate": 1.3129893698811772e-07, "loss": 0.1648, "step": 44357 }, { "epoch": 0.7710545985502963, "grad_norm": 1.1131343712314796, "learning_rate": 1.3127992399036043e-07, "loss": 0.1642, "step": 44358 }, { "epoch": 0.7710719810877992, "grad_norm": 1.5407649348519286, "learning_rate": 1.312609121612764e-07, "loss": 0.2941, "step": 44359 }, { "epoch": 0.771089363625302, "grad_norm": 1.4236681687246162, "learning_rate": 1.3124190150092567e-07, "loss": 0.2613, "step": 44360 }, { "epoch": 0.7711067461628048, "grad_norm": 1.5481607444600562, "learning_rate": 1.3122289200936897e-07, "loss": 0.2783, "step": 44361 }, { "epoch": 0.7711241287003077, "grad_norm": 1.7669171249234057, "learning_rate": 1.3120388368666625e-07, "loss": 0.3246, "step": 44362 }, { "epoch": 0.7711415112378105, "grad_norm": 0.953932174069641, "learning_rate": 1.3118487653287798e-07, "loss": 0.1278, "step": 44363 }, { "epoch": 0.7711588937753133, "grad_norm": 1.9021698106171625, "learning_rate": 1.3116587054806394e-07, "loss": 0.1312, "step": 44364 }, { "epoch": 0.7711762763128162, "grad_norm": 1.6506007899070256, "learning_rate": 1.3114686573228477e-07, "loss": 0.2802, "step": 44365 }, { "epoch": 0.771193658850319, "grad_norm": 1.4099600587737877, "learning_rate": 1.311278620856006e-07, "loss": 0.2815, "step": 44366 }, { "epoch": 0.7712110413878218, "grad_norm": 3.917448382385968, "learning_rate": 1.3110885960807172e-07, "loss": 0.4842, "step": 44367 }, { "epoch": 0.7712284239253246, "grad_norm": 2.0148436718891043, "learning_rate": 1.310898582997581e-07, "loss": 0.1633, "step": 44368 }, { "epoch": 0.7712458064628275, "grad_norm": 0.7554267633094461, "learning_rate": 1.310708581607205e-07, "loss": 0.1941, "step": 44369 }, { "epoch": 0.7712631890003303, "grad_norm": 1.8112923473554925, "learning_rate": 1.3105185919101863e-07, "loss": 0.2433, "step": 44370 }, { "epoch": 0.7712805715378331, "grad_norm": 2.5782372679093486, "learning_rate": 1.310328613907129e-07, "loss": 0.1909, "step": 44371 }, { "epoch": 0.771297954075336, "grad_norm": 1.443200248998594, "learning_rate": 1.3101386475986332e-07, "loss": 0.2274, "step": 44372 }, { "epoch": 0.7713153366128387, "grad_norm": 1.0094747426311128, "learning_rate": 1.3099486929853048e-07, "loss": 0.2567, "step": 44373 }, { "epoch": 0.7713327191503415, "grad_norm": 1.017096582552032, "learning_rate": 1.3097587500677438e-07, "loss": 0.2029, "step": 44374 }, { "epoch": 0.7713501016878443, "grad_norm": 2.1553220182679307, "learning_rate": 1.3095688188465526e-07, "loss": 0.2546, "step": 44375 }, { "epoch": 0.7713674842253472, "grad_norm": 1.415845711726835, "learning_rate": 1.3093788993223332e-07, "loss": 0.2135, "step": 44376 }, { "epoch": 0.77138486676285, "grad_norm": 1.0167024690571573, "learning_rate": 1.3091889914956867e-07, "loss": 0.1468, "step": 44377 }, { "epoch": 0.7714022493003528, "grad_norm": 2.623461332801174, "learning_rate": 1.3089990953672158e-07, "loss": 0.2869, "step": 44378 }, { "epoch": 0.7714196318378557, "grad_norm": 2.185858584352102, "learning_rate": 1.3088092109375225e-07, "loss": 0.234, "step": 44379 }, { "epoch": 0.7714370143753585, "grad_norm": 1.8819050334451761, "learning_rate": 1.3086193382072076e-07, "loss": 0.2303, "step": 44380 }, { "epoch": 0.7714543969128613, "grad_norm": 3.3099123308192397, "learning_rate": 1.3084294771768739e-07, "loss": 0.3048, "step": 44381 }, { "epoch": 0.7714717794503642, "grad_norm": 1.6039979570244025, "learning_rate": 1.3082396278471253e-07, "loss": 0.2032, "step": 44382 }, { "epoch": 0.771489161987867, "grad_norm": 0.9159053458578814, "learning_rate": 1.3080497902185582e-07, "loss": 0.1441, "step": 44383 }, { "epoch": 0.7715065445253698, "grad_norm": 1.927334084913154, "learning_rate": 1.3078599642917786e-07, "loss": 0.1816, "step": 44384 }, { "epoch": 0.7715239270628726, "grad_norm": 1.2974309043060772, "learning_rate": 1.307670150067387e-07, "loss": 0.1921, "step": 44385 }, { "epoch": 0.7715413096003755, "grad_norm": 1.6354695471671, "learning_rate": 1.3074803475459846e-07, "loss": 0.3316, "step": 44386 }, { "epoch": 0.7715586921378783, "grad_norm": 1.154823925360229, "learning_rate": 1.3072905567281732e-07, "loss": 0.2766, "step": 44387 }, { "epoch": 0.7715760746753811, "grad_norm": 2.762292665007807, "learning_rate": 1.3071007776145554e-07, "loss": 0.2646, "step": 44388 }, { "epoch": 0.771593457212884, "grad_norm": 3.327746038543306, "learning_rate": 1.3069110102057306e-07, "loss": 0.3616, "step": 44389 }, { "epoch": 0.7716108397503868, "grad_norm": 0.9376994741662589, "learning_rate": 1.306721254502302e-07, "loss": 0.2746, "step": 44390 }, { "epoch": 0.7716282222878896, "grad_norm": 2.172224022108501, "learning_rate": 1.306531510504869e-07, "loss": 0.1724, "step": 44391 }, { "epoch": 0.7716456048253925, "grad_norm": 1.218191611494402, "learning_rate": 1.3063417782140362e-07, "loss": 0.2186, "step": 44392 }, { "epoch": 0.7716629873628952, "grad_norm": 1.6221225937386234, "learning_rate": 1.306152057630403e-07, "loss": 0.1829, "step": 44393 }, { "epoch": 0.771680369900398, "grad_norm": 1.589118859572989, "learning_rate": 1.3059623487545724e-07, "loss": 0.2121, "step": 44394 }, { "epoch": 0.7716977524379008, "grad_norm": 1.1714244885223486, "learning_rate": 1.305772651587141e-07, "loss": 0.178, "step": 44395 }, { "epoch": 0.7717151349754037, "grad_norm": 0.839065789684171, "learning_rate": 1.3055829661287155e-07, "loss": 0.1057, "step": 44396 }, { "epoch": 0.7717325175129065, "grad_norm": 1.3858399325999915, "learning_rate": 1.3053932923798945e-07, "loss": 0.1713, "step": 44397 }, { "epoch": 0.7717499000504093, "grad_norm": 0.8680523517152429, "learning_rate": 1.3052036303412794e-07, "loss": 0.1655, "step": 44398 }, { "epoch": 0.7717672825879122, "grad_norm": 2.1844422338472027, "learning_rate": 1.3050139800134702e-07, "loss": 0.2287, "step": 44399 }, { "epoch": 0.771784665125415, "grad_norm": 9.741156979048577, "learning_rate": 1.3048243413970705e-07, "loss": 0.2643, "step": 44400 }, { "epoch": 0.7718020476629178, "grad_norm": 2.3774925876076276, "learning_rate": 1.3046347144926822e-07, "loss": 0.175, "step": 44401 }, { "epoch": 0.7718194302004207, "grad_norm": 1.5865313884637626, "learning_rate": 1.3044450993009027e-07, "loss": 0.2169, "step": 44402 }, { "epoch": 0.7718368127379235, "grad_norm": 1.2660108696579302, "learning_rate": 1.3042554958223333e-07, "loss": 0.184, "step": 44403 }, { "epoch": 0.7718541952754263, "grad_norm": 1.352019827429087, "learning_rate": 1.3040659040575775e-07, "loss": 0.2868, "step": 44404 }, { "epoch": 0.7718715778129291, "grad_norm": 1.52654107166914, "learning_rate": 1.3038763240072354e-07, "loss": 0.2226, "step": 44405 }, { "epoch": 0.771888960350432, "grad_norm": 1.5014526218291855, "learning_rate": 1.303686755671907e-07, "loss": 0.2319, "step": 44406 }, { "epoch": 0.7719063428879348, "grad_norm": 2.307406010041941, "learning_rate": 1.303497199052194e-07, "loss": 0.2995, "step": 44407 }, { "epoch": 0.7719237254254376, "grad_norm": 1.0601073628084114, "learning_rate": 1.303307654148697e-07, "loss": 0.1767, "step": 44408 }, { "epoch": 0.7719411079629405, "grad_norm": 1.2743899461862596, "learning_rate": 1.3031181209620162e-07, "loss": 0.1764, "step": 44409 }, { "epoch": 0.7719584905004433, "grad_norm": 2.1635260277818795, "learning_rate": 1.302928599492753e-07, "loss": 0.2035, "step": 44410 }, { "epoch": 0.7719758730379461, "grad_norm": 2.0775804371661697, "learning_rate": 1.3027390897415057e-07, "loss": 0.3095, "step": 44411 }, { "epoch": 0.771993255575449, "grad_norm": 1.1179067402315586, "learning_rate": 1.3025495917088798e-07, "loss": 0.1918, "step": 44412 }, { "epoch": 0.7720106381129517, "grad_norm": 1.262869596581017, "learning_rate": 1.3023601053954735e-07, "loss": 0.1782, "step": 44413 }, { "epoch": 0.7720280206504545, "grad_norm": 1.7093579322073975, "learning_rate": 1.3021706308018847e-07, "loss": 0.3012, "step": 44414 }, { "epoch": 0.7720454031879573, "grad_norm": 3.5557830830442305, "learning_rate": 1.301981167928718e-07, "loss": 0.1466, "step": 44415 }, { "epoch": 0.7720627857254602, "grad_norm": 1.5378803678770787, "learning_rate": 1.3017917167765725e-07, "loss": 0.1715, "step": 44416 }, { "epoch": 0.772080168262963, "grad_norm": 1.8601756777023606, "learning_rate": 1.3016022773460478e-07, "loss": 0.2016, "step": 44417 }, { "epoch": 0.7720975508004658, "grad_norm": 1.7286957235911906, "learning_rate": 1.3014128496377452e-07, "loss": 0.2016, "step": 44418 }, { "epoch": 0.7721149333379687, "grad_norm": 1.7880637343718724, "learning_rate": 1.3012234336522632e-07, "loss": 0.2089, "step": 44419 }, { "epoch": 0.7721323158754715, "grad_norm": 1.499576602706082, "learning_rate": 1.301034029390207e-07, "loss": 0.1763, "step": 44420 }, { "epoch": 0.7721496984129743, "grad_norm": 1.2009778795352224, "learning_rate": 1.3008446368521726e-07, "loss": 0.2047, "step": 44421 }, { "epoch": 0.7721670809504771, "grad_norm": 1.652668629802801, "learning_rate": 1.3006552560387596e-07, "loss": 0.2078, "step": 44422 }, { "epoch": 0.77218446348798, "grad_norm": 1.5273245964549131, "learning_rate": 1.3004658869505715e-07, "loss": 0.3145, "step": 44423 }, { "epoch": 0.7722018460254828, "grad_norm": 1.6136338751942119, "learning_rate": 1.3002765295882074e-07, "loss": 0.1307, "step": 44424 }, { "epoch": 0.7722192285629856, "grad_norm": 1.2778331221759953, "learning_rate": 1.300087183952267e-07, "loss": 0.2781, "step": 44425 }, { "epoch": 0.7722366111004885, "grad_norm": 1.8149579042504345, "learning_rate": 1.2998978500433506e-07, "loss": 0.2884, "step": 44426 }, { "epoch": 0.7722539936379913, "grad_norm": 1.5284789036907789, "learning_rate": 1.2997085278620583e-07, "loss": 0.2118, "step": 44427 }, { "epoch": 0.7722713761754941, "grad_norm": 1.3096198628653992, "learning_rate": 1.2995192174089903e-07, "loss": 0.1779, "step": 44428 }, { "epoch": 0.772288758712997, "grad_norm": 1.8677244296314817, "learning_rate": 1.2993299186847468e-07, "loss": 0.1474, "step": 44429 }, { "epoch": 0.7723061412504998, "grad_norm": 2.0525050951476906, "learning_rate": 1.2991406316899255e-07, "loss": 0.2386, "step": 44430 }, { "epoch": 0.7723235237880026, "grad_norm": 2.301242704295529, "learning_rate": 1.2989513564251298e-07, "loss": 0.1895, "step": 44431 }, { "epoch": 0.7723409063255054, "grad_norm": 1.9080405277429653, "learning_rate": 1.2987620928909603e-07, "loss": 0.235, "step": 44432 }, { "epoch": 0.7723582888630082, "grad_norm": 1.1463777018010848, "learning_rate": 1.2985728410880127e-07, "loss": 0.2477, "step": 44433 }, { "epoch": 0.772375671400511, "grad_norm": 2.340111908023354, "learning_rate": 1.2983836010168875e-07, "loss": 0.1798, "step": 44434 }, { "epoch": 0.7723930539380138, "grad_norm": 2.990756625183885, "learning_rate": 1.2981943726781874e-07, "loss": 0.4425, "step": 44435 }, { "epoch": 0.7724104364755167, "grad_norm": 1.5214912454642489, "learning_rate": 1.2980051560725108e-07, "loss": 0.2649, "step": 44436 }, { "epoch": 0.7724278190130195, "grad_norm": 2.4963033389887506, "learning_rate": 1.2978159512004577e-07, "loss": 0.2395, "step": 44437 }, { "epoch": 0.7724452015505223, "grad_norm": 1.4442897333118583, "learning_rate": 1.2976267580626253e-07, "loss": 0.278, "step": 44438 }, { "epoch": 0.7724625840880252, "grad_norm": 1.3994766891950345, "learning_rate": 1.2974375766596186e-07, "loss": 0.2028, "step": 44439 }, { "epoch": 0.772479966625528, "grad_norm": 1.4392552223058952, "learning_rate": 1.297248406992032e-07, "loss": 0.1856, "step": 44440 }, { "epoch": 0.7724973491630308, "grad_norm": 2.6616716169950596, "learning_rate": 1.2970592490604676e-07, "loss": 0.2234, "step": 44441 }, { "epoch": 0.7725147317005336, "grad_norm": 1.341671606858173, "learning_rate": 1.2968701028655228e-07, "loss": 0.1787, "step": 44442 }, { "epoch": 0.7725321142380365, "grad_norm": 1.5476802593529886, "learning_rate": 1.2966809684078002e-07, "loss": 0.2207, "step": 44443 }, { "epoch": 0.7725494967755393, "grad_norm": 1.8715114821335899, "learning_rate": 1.2964918456878975e-07, "loss": 0.1769, "step": 44444 }, { "epoch": 0.7725668793130421, "grad_norm": 0.5997392544111075, "learning_rate": 1.2963027347064148e-07, "loss": 0.145, "step": 44445 }, { "epoch": 0.772584261850545, "grad_norm": 1.2807426654457112, "learning_rate": 1.2961136354639507e-07, "loss": 0.2047, "step": 44446 }, { "epoch": 0.7726016443880478, "grad_norm": 1.2350992636101117, "learning_rate": 1.2959245479611048e-07, "loss": 0.2021, "step": 44447 }, { "epoch": 0.7726190269255506, "grad_norm": 1.5791742722178013, "learning_rate": 1.295735472198477e-07, "loss": 0.218, "step": 44448 }, { "epoch": 0.7726364094630535, "grad_norm": 1.5945357009744534, "learning_rate": 1.295546408176665e-07, "loss": 0.186, "step": 44449 }, { "epoch": 0.7726537920005563, "grad_norm": 0.8455919966656439, "learning_rate": 1.29535735589627e-07, "loss": 0.2481, "step": 44450 }, { "epoch": 0.7726711745380591, "grad_norm": 0.907292606524955, "learning_rate": 1.2951683153578919e-07, "loss": 0.1475, "step": 44451 }, { "epoch": 0.7726885570755619, "grad_norm": 0.7565269869458184, "learning_rate": 1.2949792865621272e-07, "loss": 0.1744, "step": 44452 }, { "epoch": 0.7727059396130647, "grad_norm": 3.2572505681347317, "learning_rate": 1.2947902695095746e-07, "loss": 0.2863, "step": 44453 }, { "epoch": 0.7727233221505675, "grad_norm": 3.6730717984204246, "learning_rate": 1.2946012642008364e-07, "loss": 0.2503, "step": 44454 }, { "epoch": 0.7727407046880703, "grad_norm": 1.653025274601943, "learning_rate": 1.29441227063651e-07, "loss": 0.1768, "step": 44455 }, { "epoch": 0.7727580872255732, "grad_norm": 0.8589258609569934, "learning_rate": 1.2942232888171945e-07, "loss": 0.2935, "step": 44456 }, { "epoch": 0.772775469763076, "grad_norm": 1.7237062862225387, "learning_rate": 1.2940343187434887e-07, "loss": 0.4535, "step": 44457 }, { "epoch": 0.7727928523005788, "grad_norm": 1.5007531859622218, "learning_rate": 1.2938453604159922e-07, "loss": 0.2659, "step": 44458 }, { "epoch": 0.7728102348380816, "grad_norm": 1.5614394200880337, "learning_rate": 1.293656413835303e-07, "loss": 0.2005, "step": 44459 }, { "epoch": 0.7728276173755845, "grad_norm": 1.4583259417025414, "learning_rate": 1.2934674790020206e-07, "loss": 0.1525, "step": 44460 }, { "epoch": 0.7728449999130873, "grad_norm": 1.1906875969209882, "learning_rate": 1.293278555916742e-07, "loss": 0.144, "step": 44461 }, { "epoch": 0.7728623824505901, "grad_norm": 1.3735309573548262, "learning_rate": 1.2930896445800693e-07, "loss": 0.1494, "step": 44462 }, { "epoch": 0.772879764988093, "grad_norm": 1.316855742206516, "learning_rate": 1.2929007449925993e-07, "loss": 0.155, "step": 44463 }, { "epoch": 0.7728971475255958, "grad_norm": 1.4270184919255888, "learning_rate": 1.2927118571549318e-07, "loss": 0.1758, "step": 44464 }, { "epoch": 0.7729145300630986, "grad_norm": 1.2400265282458915, "learning_rate": 1.292522981067664e-07, "loss": 0.1943, "step": 44465 }, { "epoch": 0.7729319126006015, "grad_norm": 0.9844958065604141, "learning_rate": 1.2923341167313956e-07, "loss": 0.2234, "step": 44466 }, { "epoch": 0.7729492951381043, "grad_norm": 1.1259887655943186, "learning_rate": 1.2921452641467245e-07, "loss": 0.4124, "step": 44467 }, { "epoch": 0.7729666776756071, "grad_norm": 1.4904447657878839, "learning_rate": 1.29195642331425e-07, "loss": 0.1931, "step": 44468 }, { "epoch": 0.77298406021311, "grad_norm": 1.2625208917840953, "learning_rate": 1.291767594234569e-07, "loss": 0.2402, "step": 44469 }, { "epoch": 0.7730014427506128, "grad_norm": 2.386533762106266, "learning_rate": 1.2915787769082843e-07, "loss": 0.178, "step": 44470 }, { "epoch": 0.7730188252881156, "grad_norm": 1.0007895266753934, "learning_rate": 1.2913899713359892e-07, "loss": 0.2679, "step": 44471 }, { "epoch": 0.7730362078256184, "grad_norm": 1.0929403156235682, "learning_rate": 1.2912011775182836e-07, "loss": 0.2073, "step": 44472 }, { "epoch": 0.7730535903631212, "grad_norm": 0.9695494571258964, "learning_rate": 1.2910123954557673e-07, "loss": 0.1741, "step": 44473 }, { "epoch": 0.773070972900624, "grad_norm": 1.3780854011507893, "learning_rate": 1.2908236251490384e-07, "loss": 0.211, "step": 44474 }, { "epoch": 0.7730883554381268, "grad_norm": 2.7308452771877776, "learning_rate": 1.290634866598695e-07, "loss": 0.2661, "step": 44475 }, { "epoch": 0.7731057379756296, "grad_norm": 1.0028880060083039, "learning_rate": 1.2904461198053346e-07, "loss": 0.2322, "step": 44476 }, { "epoch": 0.7731231205131325, "grad_norm": 1.1106194727692333, "learning_rate": 1.2902573847695563e-07, "loss": 0.3221, "step": 44477 }, { "epoch": 0.7731405030506353, "grad_norm": 0.9045644474552705, "learning_rate": 1.2900686614919575e-07, "loss": 0.2495, "step": 44478 }, { "epoch": 0.7731578855881381, "grad_norm": 1.5155401206516266, "learning_rate": 1.2898799499731377e-07, "loss": 0.365, "step": 44479 }, { "epoch": 0.773175268125641, "grad_norm": 1.3309406719899581, "learning_rate": 1.2896912502136918e-07, "loss": 0.1944, "step": 44480 }, { "epoch": 0.7731926506631438, "grad_norm": 1.1389837149219972, "learning_rate": 1.2895025622142225e-07, "loss": 0.2009, "step": 44481 }, { "epoch": 0.7732100332006466, "grad_norm": 1.2821305389587174, "learning_rate": 1.289313885975325e-07, "loss": 0.3319, "step": 44482 }, { "epoch": 0.7732274157381495, "grad_norm": 1.381590168369001, "learning_rate": 1.2891252214975995e-07, "loss": 0.1881, "step": 44483 }, { "epoch": 0.7732447982756523, "grad_norm": 2.18092621232066, "learning_rate": 1.2889365687816401e-07, "loss": 0.1597, "step": 44484 }, { "epoch": 0.7732621808131551, "grad_norm": 1.0596557715595998, "learning_rate": 1.2887479278280478e-07, "loss": 0.2611, "step": 44485 }, { "epoch": 0.773279563350658, "grad_norm": 1.4911765066108413, "learning_rate": 1.28855929863742e-07, "loss": 0.3525, "step": 44486 }, { "epoch": 0.7732969458881608, "grad_norm": 1.3673976322953898, "learning_rate": 1.2883706812103546e-07, "loss": 0.1591, "step": 44487 }, { "epoch": 0.7733143284256636, "grad_norm": 2.083763322163941, "learning_rate": 1.288182075547447e-07, "loss": 0.4204, "step": 44488 }, { "epoch": 0.7733317109631664, "grad_norm": 1.6050588722362493, "learning_rate": 1.287993481649301e-07, "loss": 0.1699, "step": 44489 }, { "epoch": 0.7733490935006693, "grad_norm": 1.2694586424693395, "learning_rate": 1.287804899516508e-07, "loss": 0.2754, "step": 44490 }, { "epoch": 0.7733664760381721, "grad_norm": 1.7813749284298492, "learning_rate": 1.2876163291496688e-07, "loss": 0.2423, "step": 44491 }, { "epoch": 0.7733838585756749, "grad_norm": 2.016024194051815, "learning_rate": 1.287427770549379e-07, "loss": 0.2651, "step": 44492 }, { "epoch": 0.7734012411131777, "grad_norm": 2.059552256012096, "learning_rate": 1.2872392237162394e-07, "loss": 0.3621, "step": 44493 }, { "epoch": 0.7734186236506805, "grad_norm": 1.5505812101083545, "learning_rate": 1.287050688650846e-07, "loss": 0.1797, "step": 44494 }, { "epoch": 0.7734360061881833, "grad_norm": 1.1881326231207971, "learning_rate": 1.286862165353796e-07, "loss": 0.1646, "step": 44495 }, { "epoch": 0.7734533887256861, "grad_norm": 1.4296360860424184, "learning_rate": 1.2866736538256873e-07, "loss": 0.2053, "step": 44496 }, { "epoch": 0.773470771263189, "grad_norm": 2.7103484172105143, "learning_rate": 1.2864851540671173e-07, "loss": 0.3902, "step": 44497 }, { "epoch": 0.7734881538006918, "grad_norm": 1.2679949793251406, "learning_rate": 1.286296666078684e-07, "loss": 0.1494, "step": 44498 }, { "epoch": 0.7735055363381946, "grad_norm": 3.903620185576056, "learning_rate": 1.2861081898609838e-07, "loss": 0.3806, "step": 44499 }, { "epoch": 0.7735229188756975, "grad_norm": 1.0890698925082951, "learning_rate": 1.285919725414613e-07, "loss": 0.2194, "step": 44500 }, { "epoch": 0.7735403014132003, "grad_norm": 1.413214495493993, "learning_rate": 1.2857312727401726e-07, "loss": 0.2696, "step": 44501 }, { "epoch": 0.7735576839507031, "grad_norm": 1.503501764151983, "learning_rate": 1.2855428318382594e-07, "loss": 0.1715, "step": 44502 }, { "epoch": 0.773575066488206, "grad_norm": 2.2818083882568616, "learning_rate": 1.2853544027094658e-07, "loss": 0.1716, "step": 44503 }, { "epoch": 0.7735924490257088, "grad_norm": 1.3467765599847994, "learning_rate": 1.285165985354394e-07, "loss": 0.3256, "step": 44504 }, { "epoch": 0.7736098315632116, "grad_norm": 0.9833671467638211, "learning_rate": 1.2849775797736396e-07, "loss": 0.2046, "step": 44505 }, { "epoch": 0.7736272141007144, "grad_norm": 1.9123405086610052, "learning_rate": 1.2847891859677996e-07, "loss": 0.1138, "step": 44506 }, { "epoch": 0.7736445966382173, "grad_norm": 1.5325254115712421, "learning_rate": 1.2846008039374713e-07, "loss": 0.2097, "step": 44507 }, { "epoch": 0.7736619791757201, "grad_norm": 2.659597042524206, "learning_rate": 1.2844124336832517e-07, "loss": 0.2812, "step": 44508 }, { "epoch": 0.7736793617132229, "grad_norm": 1.8345562346564201, "learning_rate": 1.2842240752057375e-07, "loss": 0.2914, "step": 44509 }, { "epoch": 0.7736967442507258, "grad_norm": 1.53313134096141, "learning_rate": 1.284035728505527e-07, "loss": 0.2789, "step": 44510 }, { "epoch": 0.7737141267882286, "grad_norm": 1.002032974314413, "learning_rate": 1.2838473935832138e-07, "loss": 0.224, "step": 44511 }, { "epoch": 0.7737315093257313, "grad_norm": 2.0989525797451436, "learning_rate": 1.2836590704393995e-07, "loss": 0.2044, "step": 44512 }, { "epoch": 0.7737488918632341, "grad_norm": 1.2883832379694056, "learning_rate": 1.2834707590746786e-07, "loss": 0.2684, "step": 44513 }, { "epoch": 0.773766274400737, "grad_norm": 6.841250107831623, "learning_rate": 1.2832824594896497e-07, "loss": 0.2022, "step": 44514 }, { "epoch": 0.7737836569382398, "grad_norm": 2.186475587747603, "learning_rate": 1.2830941716849054e-07, "loss": 0.2555, "step": 44515 }, { "epoch": 0.7738010394757426, "grad_norm": 2.1374635097084065, "learning_rate": 1.282905895661046e-07, "loss": 0.2309, "step": 44516 }, { "epoch": 0.7738184220132455, "grad_norm": 1.6007156601983188, "learning_rate": 1.2827176314186678e-07, "loss": 0.1198, "step": 44517 }, { "epoch": 0.7738358045507483, "grad_norm": 1.0905269262171413, "learning_rate": 1.282529378958367e-07, "loss": 0.1996, "step": 44518 }, { "epoch": 0.7738531870882511, "grad_norm": 2.0661645939423186, "learning_rate": 1.2823411382807386e-07, "loss": 0.2773, "step": 44519 }, { "epoch": 0.773870569625754, "grad_norm": 1.5366050918967789, "learning_rate": 1.282152909386383e-07, "loss": 0.1507, "step": 44520 }, { "epoch": 0.7738879521632568, "grad_norm": 1.679668274581394, "learning_rate": 1.2819646922758965e-07, "loss": 0.1548, "step": 44521 }, { "epoch": 0.7739053347007596, "grad_norm": 2.6750973137654492, "learning_rate": 1.2817764869498722e-07, "loss": 0.3166, "step": 44522 }, { "epoch": 0.7739227172382624, "grad_norm": 1.196029532361931, "learning_rate": 1.2815882934089068e-07, "loss": 0.1904, "step": 44523 }, { "epoch": 0.7739400997757653, "grad_norm": 1.5178656483618196, "learning_rate": 1.2814001116536e-07, "loss": 0.2816, "step": 44524 }, { "epoch": 0.7739574823132681, "grad_norm": 1.5299233072558087, "learning_rate": 1.2812119416845469e-07, "loss": 0.1754, "step": 44525 }, { "epoch": 0.7739748648507709, "grad_norm": 1.2636413008697613, "learning_rate": 1.2810237835023435e-07, "loss": 0.1644, "step": 44526 }, { "epoch": 0.7739922473882738, "grad_norm": 1.5846998471865545, "learning_rate": 1.2808356371075863e-07, "loss": 0.19, "step": 44527 }, { "epoch": 0.7740096299257766, "grad_norm": 1.0712867907455697, "learning_rate": 1.2806475025008716e-07, "loss": 0.161, "step": 44528 }, { "epoch": 0.7740270124632794, "grad_norm": 0.9979955779357941, "learning_rate": 1.2804593796827962e-07, "loss": 0.1881, "step": 44529 }, { "epoch": 0.7740443950007823, "grad_norm": 2.705771879649363, "learning_rate": 1.2802712686539553e-07, "loss": 0.3097, "step": 44530 }, { "epoch": 0.7740617775382851, "grad_norm": 1.332491744055106, "learning_rate": 1.280083169414945e-07, "loss": 0.2681, "step": 44531 }, { "epoch": 0.7740791600757878, "grad_norm": 1.5391233991375795, "learning_rate": 1.2798950819663635e-07, "loss": 0.2357, "step": 44532 }, { "epoch": 0.7740965426132906, "grad_norm": 5.628379265365603, "learning_rate": 1.2797070063088067e-07, "loss": 0.2823, "step": 44533 }, { "epoch": 0.7741139251507935, "grad_norm": 1.1099414175436852, "learning_rate": 1.279518942442867e-07, "loss": 0.268, "step": 44534 }, { "epoch": 0.7741313076882963, "grad_norm": 0.7554035521845246, "learning_rate": 1.2793308903691453e-07, "loss": 0.1308, "step": 44535 }, { "epoch": 0.7741486902257991, "grad_norm": 1.033042332068119, "learning_rate": 1.279142850088235e-07, "loss": 0.1805, "step": 44536 }, { "epoch": 0.774166072763302, "grad_norm": 1.4191338959911974, "learning_rate": 1.2789548216007324e-07, "loss": 0.2095, "step": 44537 }, { "epoch": 0.7741834553008048, "grad_norm": 1.1157140759002602, "learning_rate": 1.2787668049072325e-07, "loss": 0.3507, "step": 44538 }, { "epoch": 0.7742008378383076, "grad_norm": 1.2932568129014805, "learning_rate": 1.2785788000083342e-07, "loss": 0.18, "step": 44539 }, { "epoch": 0.7742182203758105, "grad_norm": 2.579256986408531, "learning_rate": 1.2783908069046328e-07, "loss": 0.1325, "step": 44540 }, { "epoch": 0.7742356029133133, "grad_norm": 1.3302408425492274, "learning_rate": 1.2782028255967216e-07, "loss": 0.263, "step": 44541 }, { "epoch": 0.7742529854508161, "grad_norm": 1.6567924935671468, "learning_rate": 1.2780148560851962e-07, "loss": 0.218, "step": 44542 }, { "epoch": 0.7742703679883189, "grad_norm": 1.2512928702847304, "learning_rate": 1.2778268983706558e-07, "loss": 0.1506, "step": 44543 }, { "epoch": 0.7742877505258218, "grad_norm": 2.0561451108104847, "learning_rate": 1.277638952453694e-07, "loss": 0.1992, "step": 44544 }, { "epoch": 0.7743051330633246, "grad_norm": 1.4453871805830332, "learning_rate": 1.277451018334907e-07, "loss": 0.1655, "step": 44545 }, { "epoch": 0.7743225156008274, "grad_norm": 3.7741419644114433, "learning_rate": 1.27726309601489e-07, "loss": 0.2026, "step": 44546 }, { "epoch": 0.7743398981383303, "grad_norm": 2.056219613860295, "learning_rate": 1.2770751854942386e-07, "loss": 0.2669, "step": 44547 }, { "epoch": 0.7743572806758331, "grad_norm": 1.7028859092360396, "learning_rate": 1.2768872867735493e-07, "loss": 0.175, "step": 44548 }, { "epoch": 0.7743746632133359, "grad_norm": 2.2537576219863915, "learning_rate": 1.276699399853417e-07, "loss": 0.1711, "step": 44549 }, { "epoch": 0.7743920457508388, "grad_norm": 1.327376978013826, "learning_rate": 1.2765115247344354e-07, "loss": 0.1652, "step": 44550 }, { "epoch": 0.7744094282883416, "grad_norm": 1.423581851368226, "learning_rate": 1.2763236614172036e-07, "loss": 0.1705, "step": 44551 }, { "epoch": 0.7744268108258443, "grad_norm": 1.6788404918959718, "learning_rate": 1.2761358099023168e-07, "loss": 0.3625, "step": 44552 }, { "epoch": 0.7744441933633471, "grad_norm": 2.157649987254359, "learning_rate": 1.275947970190366e-07, "loss": 0.2633, "step": 44553 }, { "epoch": 0.77446157590085, "grad_norm": 1.685308555048806, "learning_rate": 1.2757601422819502e-07, "loss": 0.1831, "step": 44554 }, { "epoch": 0.7744789584383528, "grad_norm": 1.7294680747977162, "learning_rate": 1.2755723261776646e-07, "loss": 0.2301, "step": 44555 }, { "epoch": 0.7744963409758556, "grad_norm": 2.3850531510565824, "learning_rate": 1.275384521878104e-07, "loss": 0.2351, "step": 44556 }, { "epoch": 0.7745137235133585, "grad_norm": 1.3238768298794445, "learning_rate": 1.2751967293838634e-07, "loss": 0.1966, "step": 44557 }, { "epoch": 0.7745311060508613, "grad_norm": 1.2957411835935317, "learning_rate": 1.2750089486955363e-07, "loss": 0.1509, "step": 44558 }, { "epoch": 0.7745484885883641, "grad_norm": 1.3529116980284925, "learning_rate": 1.2748211798137232e-07, "loss": 0.1964, "step": 44559 }, { "epoch": 0.774565871125867, "grad_norm": 1.0225671431051095, "learning_rate": 1.2746334227390137e-07, "loss": 0.1783, "step": 44560 }, { "epoch": 0.7745832536633698, "grad_norm": 3.357166009971855, "learning_rate": 1.2744456774720035e-07, "loss": 0.2328, "step": 44561 }, { "epoch": 0.7746006362008726, "grad_norm": 1.2545236089340241, "learning_rate": 1.2742579440132912e-07, "loss": 0.1888, "step": 44562 }, { "epoch": 0.7746180187383754, "grad_norm": 0.9869750528496118, "learning_rate": 1.274070222363469e-07, "loss": 0.2221, "step": 44563 }, { "epoch": 0.7746354012758783, "grad_norm": 2.024921323286816, "learning_rate": 1.2738825125231334e-07, "loss": 0.1992, "step": 44564 }, { "epoch": 0.7746527838133811, "grad_norm": 1.522144923386041, "learning_rate": 1.2736948144928783e-07, "loss": 0.202, "step": 44565 }, { "epoch": 0.7746701663508839, "grad_norm": 0.8114141588664622, "learning_rate": 1.273507128273299e-07, "loss": 0.3135, "step": 44566 }, { "epoch": 0.7746875488883868, "grad_norm": 1.0791407589722513, "learning_rate": 1.27331945386499e-07, "loss": 0.274, "step": 44567 }, { "epoch": 0.7747049314258896, "grad_norm": 1.3692572606229458, "learning_rate": 1.2731317912685463e-07, "loss": 0.1786, "step": 44568 }, { "epoch": 0.7747223139633924, "grad_norm": 1.1088555499536952, "learning_rate": 1.272944140484562e-07, "loss": 0.1542, "step": 44569 }, { "epoch": 0.7747396965008952, "grad_norm": 1.2333155986868631, "learning_rate": 1.2727565015136337e-07, "loss": 0.3387, "step": 44570 }, { "epoch": 0.7747570790383981, "grad_norm": 1.0740445205060187, "learning_rate": 1.2725688743563566e-07, "loss": 0.2206, "step": 44571 }, { "epoch": 0.7747744615759008, "grad_norm": 1.6912540553440834, "learning_rate": 1.2723812590133225e-07, "loss": 0.221, "step": 44572 }, { "epoch": 0.7747918441134036, "grad_norm": 2.101733118292077, "learning_rate": 1.2721936554851265e-07, "loss": 0.2991, "step": 44573 }, { "epoch": 0.7748092266509065, "grad_norm": 1.5295980575934027, "learning_rate": 1.272006063772365e-07, "loss": 0.1411, "step": 44574 }, { "epoch": 0.7748266091884093, "grad_norm": 1.6688632017162397, "learning_rate": 1.2718184838756325e-07, "loss": 0.3019, "step": 44575 }, { "epoch": 0.7748439917259121, "grad_norm": 1.7273850824582644, "learning_rate": 1.2716309157955225e-07, "loss": 0.1823, "step": 44576 }, { "epoch": 0.774861374263415, "grad_norm": 0.9563179679193844, "learning_rate": 1.2714433595326296e-07, "loss": 0.2794, "step": 44577 }, { "epoch": 0.7748787568009178, "grad_norm": 1.5912965876916672, "learning_rate": 1.2712558150875484e-07, "loss": 0.2503, "step": 44578 }, { "epoch": 0.7748961393384206, "grad_norm": 1.7837428657041141, "learning_rate": 1.2710682824608738e-07, "loss": 0.2488, "step": 44579 }, { "epoch": 0.7749135218759234, "grad_norm": 0.9860717844692977, "learning_rate": 1.270880761653199e-07, "loss": 0.3389, "step": 44580 }, { "epoch": 0.7749309044134263, "grad_norm": 0.9139231881377822, "learning_rate": 1.2706932526651187e-07, "loss": 0.1821, "step": 44581 }, { "epoch": 0.7749482869509291, "grad_norm": 2.018197275534185, "learning_rate": 1.2705057554972288e-07, "loss": 0.2549, "step": 44582 }, { "epoch": 0.7749656694884319, "grad_norm": 1.4916905160954628, "learning_rate": 1.2703182701501226e-07, "loss": 0.1589, "step": 44583 }, { "epoch": 0.7749830520259348, "grad_norm": 1.4028377107938257, "learning_rate": 1.270130796624394e-07, "loss": 0.2591, "step": 44584 }, { "epoch": 0.7750004345634376, "grad_norm": 1.2569595360864132, "learning_rate": 1.2699433349206372e-07, "loss": 0.1624, "step": 44585 }, { "epoch": 0.7750178171009404, "grad_norm": 1.2101708544041325, "learning_rate": 1.2697558850394469e-07, "loss": 0.237, "step": 44586 }, { "epoch": 0.7750351996384433, "grad_norm": 1.0507297597736118, "learning_rate": 1.2695684469814171e-07, "loss": 0.157, "step": 44587 }, { "epoch": 0.7750525821759461, "grad_norm": 1.487467238187779, "learning_rate": 1.2693810207471412e-07, "loss": 0.2786, "step": 44588 }, { "epoch": 0.7750699647134489, "grad_norm": 1.3585268021987127, "learning_rate": 1.2691936063372126e-07, "loss": 0.2336, "step": 44589 }, { "epoch": 0.7750873472509517, "grad_norm": 1.5302676554497925, "learning_rate": 1.2690062037522292e-07, "loss": 0.1985, "step": 44590 }, { "epoch": 0.7751047297884546, "grad_norm": 1.9013581407042315, "learning_rate": 1.2688188129927813e-07, "loss": 0.1477, "step": 44591 }, { "epoch": 0.7751221123259573, "grad_norm": 0.9415683351027441, "learning_rate": 1.2686314340594618e-07, "loss": 0.1838, "step": 44592 }, { "epoch": 0.7751394948634601, "grad_norm": 1.715988316959345, "learning_rate": 1.2684440669528685e-07, "loss": 0.2151, "step": 44593 }, { "epoch": 0.775156877400963, "grad_norm": 1.0442421872897905, "learning_rate": 1.268256711673593e-07, "loss": 0.1717, "step": 44594 }, { "epoch": 0.7751742599384658, "grad_norm": 1.4229676476594022, "learning_rate": 1.2680693682222294e-07, "loss": 0.1176, "step": 44595 }, { "epoch": 0.7751916424759686, "grad_norm": 1.096761837054684, "learning_rate": 1.267882036599372e-07, "loss": 0.3186, "step": 44596 }, { "epoch": 0.7752090250134714, "grad_norm": 3.4667265750445733, "learning_rate": 1.267694716805614e-07, "loss": 0.2347, "step": 44597 }, { "epoch": 0.7752264075509743, "grad_norm": 1.7861720706339923, "learning_rate": 1.2675074088415494e-07, "loss": 0.1615, "step": 44598 }, { "epoch": 0.7752437900884771, "grad_norm": 2.3674743861200622, "learning_rate": 1.2673201127077715e-07, "loss": 0.1697, "step": 44599 }, { "epoch": 0.7752611726259799, "grad_norm": 1.8368696454474611, "learning_rate": 1.2671328284048726e-07, "loss": 0.2188, "step": 44600 }, { "epoch": 0.7752785551634828, "grad_norm": 1.681417461809364, "learning_rate": 1.2669455559334498e-07, "loss": 0.2656, "step": 44601 }, { "epoch": 0.7752959377009856, "grad_norm": 1.1567180150015592, "learning_rate": 1.266758295294094e-07, "loss": 0.1673, "step": 44602 }, { "epoch": 0.7753133202384884, "grad_norm": 1.7826923070284464, "learning_rate": 1.2665710464874018e-07, "loss": 0.2588, "step": 44603 }, { "epoch": 0.7753307027759913, "grad_norm": 2.502387105957352, "learning_rate": 1.2663838095139612e-07, "loss": 0.2591, "step": 44604 }, { "epoch": 0.7753480853134941, "grad_norm": 1.7107489024934293, "learning_rate": 1.26619658437437e-07, "loss": 0.3253, "step": 44605 }, { "epoch": 0.7753654678509969, "grad_norm": 1.4735721308169318, "learning_rate": 1.266009371069221e-07, "loss": 0.1655, "step": 44606 }, { "epoch": 0.7753828503884997, "grad_norm": 2.8433233967141165, "learning_rate": 1.2658221695991068e-07, "loss": 0.3686, "step": 44607 }, { "epoch": 0.7754002329260026, "grad_norm": 1.0444275270748549, "learning_rate": 1.2656349799646194e-07, "loss": 0.2397, "step": 44608 }, { "epoch": 0.7754176154635054, "grad_norm": 2.114195133216987, "learning_rate": 1.2654478021663566e-07, "loss": 0.2259, "step": 44609 }, { "epoch": 0.7754349980010082, "grad_norm": 1.565069552815218, "learning_rate": 1.2652606362049072e-07, "loss": 0.1634, "step": 44610 }, { "epoch": 0.7754523805385111, "grad_norm": 1.924814081701826, "learning_rate": 1.2650734820808661e-07, "loss": 0.2675, "step": 44611 }, { "epoch": 0.7754697630760138, "grad_norm": 1.8715684216550696, "learning_rate": 1.2648863397948252e-07, "loss": 0.212, "step": 44612 }, { "epoch": 0.7754871456135166, "grad_norm": 0.9169250427753121, "learning_rate": 1.2646992093473802e-07, "loss": 0.2773, "step": 44613 }, { "epoch": 0.7755045281510194, "grad_norm": 0.9386786393278849, "learning_rate": 1.2645120907391227e-07, "loss": 0.1815, "step": 44614 }, { "epoch": 0.7755219106885223, "grad_norm": 1.4276632677696386, "learning_rate": 1.264324983970646e-07, "loss": 0.2428, "step": 44615 }, { "epoch": 0.7755392932260251, "grad_norm": 2.002007587033643, "learning_rate": 1.264137889042543e-07, "loss": 0.267, "step": 44616 }, { "epoch": 0.7755566757635279, "grad_norm": 2.4411766596856452, "learning_rate": 1.2639508059554065e-07, "loss": 0.2127, "step": 44617 }, { "epoch": 0.7755740583010308, "grad_norm": 1.3050624834227493, "learning_rate": 1.26376373470983e-07, "loss": 0.1435, "step": 44618 }, { "epoch": 0.7755914408385336, "grad_norm": 0.858461405013448, "learning_rate": 1.2635766753064063e-07, "loss": 0.1935, "step": 44619 }, { "epoch": 0.7756088233760364, "grad_norm": 1.1733927075797417, "learning_rate": 1.2633896277457267e-07, "loss": 0.2361, "step": 44620 }, { "epoch": 0.7756262059135393, "grad_norm": 1.2844932136548264, "learning_rate": 1.2632025920283868e-07, "loss": 0.1531, "step": 44621 }, { "epoch": 0.7756435884510421, "grad_norm": 1.13665033478628, "learning_rate": 1.2630155681549798e-07, "loss": 0.205, "step": 44622 }, { "epoch": 0.7756609709885449, "grad_norm": 1.8382272462931106, "learning_rate": 1.2628285561260938e-07, "loss": 0.2108, "step": 44623 }, { "epoch": 0.7756783535260477, "grad_norm": 8.611147378861027, "learning_rate": 1.2626415559423258e-07, "loss": 0.3454, "step": 44624 }, { "epoch": 0.7756957360635506, "grad_norm": 1.0643217300768053, "learning_rate": 1.2624545676042676e-07, "loss": 0.133, "step": 44625 }, { "epoch": 0.7757131186010534, "grad_norm": 0.9321072526454781, "learning_rate": 1.2622675911125114e-07, "loss": 0.1301, "step": 44626 }, { "epoch": 0.7757305011385562, "grad_norm": 1.7861869025008779, "learning_rate": 1.2620806264676498e-07, "loss": 0.2069, "step": 44627 }, { "epoch": 0.7757478836760591, "grad_norm": 1.0924216758472831, "learning_rate": 1.261893673670276e-07, "loss": 0.1821, "step": 44628 }, { "epoch": 0.7757652662135619, "grad_norm": 0.8852229750547557, "learning_rate": 1.261706732720982e-07, "loss": 0.1829, "step": 44629 }, { "epoch": 0.7757826487510647, "grad_norm": 1.466103075743716, "learning_rate": 1.26151980362036e-07, "loss": 0.2378, "step": 44630 }, { "epoch": 0.7758000312885676, "grad_norm": 1.5945172090442346, "learning_rate": 1.2613328863690016e-07, "loss": 0.1666, "step": 44631 }, { "epoch": 0.7758174138260703, "grad_norm": 1.7590749464035467, "learning_rate": 1.2611459809675023e-07, "loss": 0.3188, "step": 44632 }, { "epoch": 0.7758347963635731, "grad_norm": 1.4366531061920231, "learning_rate": 1.2609590874164527e-07, "loss": 0.3578, "step": 44633 }, { "epoch": 0.7758521789010759, "grad_norm": 2.6990254286707054, "learning_rate": 1.2607722057164467e-07, "loss": 0.1155, "step": 44634 }, { "epoch": 0.7758695614385788, "grad_norm": 1.0225373517899192, "learning_rate": 1.2605853358680717e-07, "loss": 0.1716, "step": 44635 }, { "epoch": 0.7758869439760816, "grad_norm": 1.306030631966005, "learning_rate": 1.2603984778719251e-07, "loss": 0.1888, "step": 44636 }, { "epoch": 0.7759043265135844, "grad_norm": 1.0831688699740216, "learning_rate": 1.2602116317285978e-07, "loss": 0.1535, "step": 44637 }, { "epoch": 0.7759217090510873, "grad_norm": 1.2869947112190712, "learning_rate": 1.2600247974386818e-07, "loss": 0.1783, "step": 44638 }, { "epoch": 0.7759390915885901, "grad_norm": 1.324476583328465, "learning_rate": 1.2598379750027672e-07, "loss": 0.283, "step": 44639 }, { "epoch": 0.7759564741260929, "grad_norm": 1.6708460622588648, "learning_rate": 1.2596511644214503e-07, "loss": 0.2166, "step": 44640 }, { "epoch": 0.7759738566635958, "grad_norm": 3.1429437195433123, "learning_rate": 1.259464365695322e-07, "loss": 0.2074, "step": 44641 }, { "epoch": 0.7759912392010986, "grad_norm": 2.979389942176584, "learning_rate": 1.2592775788249703e-07, "loss": 0.1574, "step": 44642 }, { "epoch": 0.7760086217386014, "grad_norm": 0.7337451217960593, "learning_rate": 1.259090803810992e-07, "loss": 0.1667, "step": 44643 }, { "epoch": 0.7760260042761042, "grad_norm": 1.3052661900829792, "learning_rate": 1.2589040406539775e-07, "loss": 0.3138, "step": 44644 }, { "epoch": 0.7760433868136071, "grad_norm": 1.521620397108576, "learning_rate": 1.2587172893545188e-07, "loss": 0.1375, "step": 44645 }, { "epoch": 0.7760607693511099, "grad_norm": 1.4824932013213168, "learning_rate": 1.2585305499132077e-07, "loss": 0.2344, "step": 44646 }, { "epoch": 0.7760781518886127, "grad_norm": 1.3472353962897055, "learning_rate": 1.2583438223306357e-07, "loss": 0.2073, "step": 44647 }, { "epoch": 0.7760955344261156, "grad_norm": 1.801819311959989, "learning_rate": 1.2581571066073954e-07, "loss": 0.2398, "step": 44648 }, { "epoch": 0.7761129169636184, "grad_norm": 1.30406800038448, "learning_rate": 1.257970402744078e-07, "loss": 0.1588, "step": 44649 }, { "epoch": 0.7761302995011212, "grad_norm": 1.5444467471623533, "learning_rate": 1.2577837107412737e-07, "loss": 0.2461, "step": 44650 }, { "epoch": 0.776147682038624, "grad_norm": 1.0091930878238695, "learning_rate": 1.257597030599578e-07, "loss": 0.1164, "step": 44651 }, { "epoch": 0.7761650645761268, "grad_norm": 1.4179713191642078, "learning_rate": 1.2574103623195803e-07, "loss": 0.3226, "step": 44652 }, { "epoch": 0.7761824471136296, "grad_norm": 1.8442668594235965, "learning_rate": 1.2572237059018742e-07, "loss": 0.2471, "step": 44653 }, { "epoch": 0.7761998296511324, "grad_norm": 2.7184286352002984, "learning_rate": 1.2570370613470465e-07, "loss": 0.122, "step": 44654 }, { "epoch": 0.7762172121886353, "grad_norm": 1.2029645110586293, "learning_rate": 1.2568504286556936e-07, "loss": 0.1838, "step": 44655 }, { "epoch": 0.7762345947261381, "grad_norm": 2.8807308999477814, "learning_rate": 1.2566638078284054e-07, "loss": 0.2752, "step": 44656 }, { "epoch": 0.7762519772636409, "grad_norm": 2.6934966144390153, "learning_rate": 1.256477198865773e-07, "loss": 0.3234, "step": 44657 }, { "epoch": 0.7762693598011438, "grad_norm": 2.396929259714506, "learning_rate": 1.2562906017683872e-07, "loss": 0.212, "step": 44658 }, { "epoch": 0.7762867423386466, "grad_norm": 1.309077344257465, "learning_rate": 1.256104016536842e-07, "loss": 0.1623, "step": 44659 }, { "epoch": 0.7763041248761494, "grad_norm": 1.313158227463218, "learning_rate": 1.2559174431717284e-07, "loss": 0.2338, "step": 44660 }, { "epoch": 0.7763215074136522, "grad_norm": 1.3293097815060362, "learning_rate": 1.255730881673635e-07, "loss": 0.2351, "step": 44661 }, { "epoch": 0.7763388899511551, "grad_norm": 1.8285263185348337, "learning_rate": 1.255544332043154e-07, "loss": 0.2302, "step": 44662 }, { "epoch": 0.7763562724886579, "grad_norm": 1.4907194952962295, "learning_rate": 1.2553577942808784e-07, "loss": 0.2974, "step": 44663 }, { "epoch": 0.7763736550261607, "grad_norm": 1.3804427662783465, "learning_rate": 1.2551712683873984e-07, "loss": 0.1309, "step": 44664 }, { "epoch": 0.7763910375636636, "grad_norm": 0.8888661328525368, "learning_rate": 1.2549847543633053e-07, "loss": 0.1689, "step": 44665 }, { "epoch": 0.7764084201011664, "grad_norm": 1.4290028821797716, "learning_rate": 1.2547982522091904e-07, "loss": 0.31, "step": 44666 }, { "epoch": 0.7764258026386692, "grad_norm": 1.2095207501781604, "learning_rate": 1.254611761925644e-07, "loss": 0.1821, "step": 44667 }, { "epoch": 0.7764431851761721, "grad_norm": 1.0753682321899452, "learning_rate": 1.2544252835132584e-07, "loss": 0.2046, "step": 44668 }, { "epoch": 0.7764605677136749, "grad_norm": 1.6081595572469196, "learning_rate": 1.2542388169726242e-07, "loss": 0.18, "step": 44669 }, { "epoch": 0.7764779502511777, "grad_norm": 1.6302205163699732, "learning_rate": 1.2540523623043302e-07, "loss": 0.1669, "step": 44670 }, { "epoch": 0.7764953327886804, "grad_norm": 1.2215770004079405, "learning_rate": 1.2538659195089717e-07, "loss": 0.2029, "step": 44671 }, { "epoch": 0.7765127153261833, "grad_norm": 1.7673445474070126, "learning_rate": 1.2536794885871383e-07, "loss": 0.2084, "step": 44672 }, { "epoch": 0.7765300978636861, "grad_norm": 1.8590920686007286, "learning_rate": 1.2534930695394176e-07, "loss": 0.1901, "step": 44673 }, { "epoch": 0.7765474804011889, "grad_norm": 1.4660138668071947, "learning_rate": 1.2533066623664034e-07, "loss": 0.2861, "step": 44674 }, { "epoch": 0.7765648629386918, "grad_norm": 1.2145055909034823, "learning_rate": 1.2531202670686868e-07, "loss": 0.2226, "step": 44675 }, { "epoch": 0.7765822454761946, "grad_norm": 1.2920666528093707, "learning_rate": 1.252933883646858e-07, "loss": 0.2282, "step": 44676 }, { "epoch": 0.7765996280136974, "grad_norm": 1.0383786877625558, "learning_rate": 1.2527475121015069e-07, "loss": 0.1611, "step": 44677 }, { "epoch": 0.7766170105512002, "grad_norm": 2.4546244065346885, "learning_rate": 1.2525611524332235e-07, "loss": 0.2056, "step": 44678 }, { "epoch": 0.7766343930887031, "grad_norm": 2.536202379269726, "learning_rate": 1.2523748046426026e-07, "loss": 0.3623, "step": 44679 }, { "epoch": 0.7766517756262059, "grad_norm": 1.0638708905820917, "learning_rate": 1.252188468730231e-07, "loss": 0.205, "step": 44680 }, { "epoch": 0.7766691581637087, "grad_norm": 1.4225622516840952, "learning_rate": 1.2520021446966983e-07, "loss": 0.2026, "step": 44681 }, { "epoch": 0.7766865407012116, "grad_norm": 1.581499560413922, "learning_rate": 1.2518158325425993e-07, "loss": 0.1551, "step": 44682 }, { "epoch": 0.7767039232387144, "grad_norm": 2.3257538486496734, "learning_rate": 1.2516295322685216e-07, "loss": 0.2324, "step": 44683 }, { "epoch": 0.7767213057762172, "grad_norm": 2.197602183612356, "learning_rate": 1.2514432438750572e-07, "loss": 0.3072, "step": 44684 }, { "epoch": 0.7767386883137201, "grad_norm": 1.461088788601471, "learning_rate": 1.2512569673627954e-07, "loss": 0.2598, "step": 44685 }, { "epoch": 0.7767560708512229, "grad_norm": 1.9122232205320266, "learning_rate": 1.2510707027323268e-07, "loss": 0.2041, "step": 44686 }, { "epoch": 0.7767734533887257, "grad_norm": 0.8560888711245479, "learning_rate": 1.250884449984242e-07, "loss": 0.108, "step": 44687 }, { "epoch": 0.7767908359262286, "grad_norm": 1.3645479215402623, "learning_rate": 1.2506982091191316e-07, "loss": 0.2462, "step": 44688 }, { "epoch": 0.7768082184637314, "grad_norm": 1.2781470395955048, "learning_rate": 1.2505119801375836e-07, "loss": 0.1451, "step": 44689 }, { "epoch": 0.7768256010012342, "grad_norm": 1.1191895748095484, "learning_rate": 1.250325763040192e-07, "loss": 0.2136, "step": 44690 }, { "epoch": 0.7768429835387369, "grad_norm": 1.2309262012976305, "learning_rate": 1.250139557827547e-07, "loss": 0.2361, "step": 44691 }, { "epoch": 0.7768603660762398, "grad_norm": 1.868297411516735, "learning_rate": 1.249953364500235e-07, "loss": 0.2839, "step": 44692 }, { "epoch": 0.7768777486137426, "grad_norm": 2.0202453063677317, "learning_rate": 1.2497671830588475e-07, "loss": 0.1777, "step": 44693 }, { "epoch": 0.7768951311512454, "grad_norm": 2.488697478899602, "learning_rate": 1.2495810135039765e-07, "loss": 0.3806, "step": 44694 }, { "epoch": 0.7769125136887483, "grad_norm": 4.6834890894554455, "learning_rate": 1.2493948558362104e-07, "loss": 0.1807, "step": 44695 }, { "epoch": 0.7769298962262511, "grad_norm": 2.823515268111362, "learning_rate": 1.2492087100561405e-07, "loss": 0.4953, "step": 44696 }, { "epoch": 0.7769472787637539, "grad_norm": 1.1329310103303805, "learning_rate": 1.2490225761643553e-07, "loss": 0.1714, "step": 44697 }, { "epoch": 0.7769646613012567, "grad_norm": 2.017838484879496, "learning_rate": 1.248836454161446e-07, "loss": 0.2434, "step": 44698 }, { "epoch": 0.7769820438387596, "grad_norm": 1.6161078968066043, "learning_rate": 1.2486503440480019e-07, "loss": 0.2259, "step": 44699 }, { "epoch": 0.7769994263762624, "grad_norm": 1.5445647975541494, "learning_rate": 1.2484642458246126e-07, "loss": 0.1529, "step": 44700 }, { "epoch": 0.7770168089137652, "grad_norm": 1.7441379010836258, "learning_rate": 1.2482781594918674e-07, "loss": 0.1435, "step": 44701 }, { "epoch": 0.7770341914512681, "grad_norm": 1.6101900204173605, "learning_rate": 1.248092085050358e-07, "loss": 0.1711, "step": 44702 }, { "epoch": 0.7770515739887709, "grad_norm": 1.44967776023942, "learning_rate": 1.2479060225006732e-07, "loss": 0.2667, "step": 44703 }, { "epoch": 0.7770689565262737, "grad_norm": 2.0794808042669812, "learning_rate": 1.2477199718434027e-07, "loss": 0.2216, "step": 44704 }, { "epoch": 0.7770863390637766, "grad_norm": 1.8009014746217065, "learning_rate": 1.2475339330791368e-07, "loss": 0.2261, "step": 44705 }, { "epoch": 0.7771037216012794, "grad_norm": 5.434696331256435, "learning_rate": 1.2473479062084642e-07, "loss": 0.189, "step": 44706 }, { "epoch": 0.7771211041387822, "grad_norm": 1.3730127477548435, "learning_rate": 1.2471618912319754e-07, "loss": 0.1347, "step": 44707 }, { "epoch": 0.777138486676285, "grad_norm": 1.4192907500158825, "learning_rate": 1.246975888150259e-07, "loss": 0.1484, "step": 44708 }, { "epoch": 0.7771558692137879, "grad_norm": 2.008702162168529, "learning_rate": 1.2467898969639035e-07, "loss": 0.1776, "step": 44709 }, { "epoch": 0.7771732517512907, "grad_norm": 3.529510973994006, "learning_rate": 1.2466039176735033e-07, "loss": 0.2626, "step": 44710 }, { "epoch": 0.7771906342887934, "grad_norm": 1.035717954258375, "learning_rate": 1.2464179502796425e-07, "loss": 0.2533, "step": 44711 }, { "epoch": 0.7772080168262963, "grad_norm": 2.283543791856001, "learning_rate": 1.2462319947829115e-07, "loss": 0.2177, "step": 44712 }, { "epoch": 0.7772253993637991, "grad_norm": 1.496923570205446, "learning_rate": 1.2460460511839016e-07, "loss": 0.1701, "step": 44713 }, { "epoch": 0.7772427819013019, "grad_norm": 1.8739805468244133, "learning_rate": 1.2458601194832018e-07, "loss": 0.2923, "step": 44714 }, { "epoch": 0.7772601644388047, "grad_norm": 1.6238666897000278, "learning_rate": 1.2456741996814012e-07, "loss": 0.394, "step": 44715 }, { "epoch": 0.7772775469763076, "grad_norm": 1.9098305425250464, "learning_rate": 1.2454882917790882e-07, "loss": 0.2242, "step": 44716 }, { "epoch": 0.7772949295138104, "grad_norm": 1.8694082401003487, "learning_rate": 1.2453023957768532e-07, "loss": 0.1934, "step": 44717 }, { "epoch": 0.7773123120513132, "grad_norm": 1.0974219847451985, "learning_rate": 1.2451165116752843e-07, "loss": 0.1544, "step": 44718 }, { "epoch": 0.7773296945888161, "grad_norm": 1.517927676209993, "learning_rate": 1.2449306394749713e-07, "loss": 0.165, "step": 44719 }, { "epoch": 0.7773470771263189, "grad_norm": 1.4590460990751384, "learning_rate": 1.244744779176502e-07, "loss": 0.1958, "step": 44720 }, { "epoch": 0.7773644596638217, "grad_norm": 3.5860344760068092, "learning_rate": 1.2445589307804683e-07, "loss": 0.2747, "step": 44721 }, { "epoch": 0.7773818422013246, "grad_norm": 1.5723605970024026, "learning_rate": 1.2443730942874576e-07, "loss": 0.1457, "step": 44722 }, { "epoch": 0.7773992247388274, "grad_norm": 1.0824547378437055, "learning_rate": 1.24418726969806e-07, "loss": 0.2294, "step": 44723 }, { "epoch": 0.7774166072763302, "grad_norm": 1.9715574185142928, "learning_rate": 1.244001457012861e-07, "loss": 0.4334, "step": 44724 }, { "epoch": 0.777433989813833, "grad_norm": 1.117615423559741, "learning_rate": 1.243815656232453e-07, "loss": 0.148, "step": 44725 }, { "epoch": 0.7774513723513359, "grad_norm": 1.5706409065622287, "learning_rate": 1.2436298673574247e-07, "loss": 0.1475, "step": 44726 }, { "epoch": 0.7774687548888387, "grad_norm": 2.1757253617245746, "learning_rate": 1.2434440903883637e-07, "loss": 0.3317, "step": 44727 }, { "epoch": 0.7774861374263415, "grad_norm": 1.6846232656266804, "learning_rate": 1.2432583253258578e-07, "loss": 0.3724, "step": 44728 }, { "epoch": 0.7775035199638444, "grad_norm": 1.4203333997831127, "learning_rate": 1.2430725721705003e-07, "loss": 0.2398, "step": 44729 }, { "epoch": 0.7775209025013472, "grad_norm": 1.1023852138683539, "learning_rate": 1.2428868309228757e-07, "loss": 0.1577, "step": 44730 }, { "epoch": 0.7775382850388499, "grad_norm": 2.1171171151497044, "learning_rate": 1.242701101583572e-07, "loss": 0.2479, "step": 44731 }, { "epoch": 0.7775556675763527, "grad_norm": 1.2582884253154465, "learning_rate": 1.242515384153182e-07, "loss": 0.1589, "step": 44732 }, { "epoch": 0.7775730501138556, "grad_norm": 1.8120617339084995, "learning_rate": 1.2423296786322918e-07, "loss": 0.1927, "step": 44733 }, { "epoch": 0.7775904326513584, "grad_norm": 1.1348811462560924, "learning_rate": 1.2421439850214905e-07, "loss": 0.2252, "step": 44734 }, { "epoch": 0.7776078151888612, "grad_norm": 2.3213666374719293, "learning_rate": 1.2419583033213665e-07, "loss": 0.2688, "step": 44735 }, { "epoch": 0.7776251977263641, "grad_norm": 0.9792950009021947, "learning_rate": 1.241772633532508e-07, "loss": 0.2679, "step": 44736 }, { "epoch": 0.7776425802638669, "grad_norm": 1.091812847259013, "learning_rate": 1.241586975655504e-07, "loss": 0.1566, "step": 44737 }, { "epoch": 0.7776599628013697, "grad_norm": 1.2784504383707005, "learning_rate": 1.2414013296909431e-07, "loss": 0.2729, "step": 44738 }, { "epoch": 0.7776773453388726, "grad_norm": 1.8311443540525687, "learning_rate": 1.241215695639412e-07, "loss": 0.2321, "step": 44739 }, { "epoch": 0.7776947278763754, "grad_norm": 1.4421056508477534, "learning_rate": 1.2410300735015022e-07, "loss": 0.1904, "step": 44740 }, { "epoch": 0.7777121104138782, "grad_norm": 1.0613843205856026, "learning_rate": 1.2408444632777998e-07, "loss": 0.197, "step": 44741 }, { "epoch": 0.777729492951381, "grad_norm": 1.3265438083861518, "learning_rate": 1.2406588649688954e-07, "loss": 0.1872, "step": 44742 }, { "epoch": 0.7777468754888839, "grad_norm": 1.7643976527748029, "learning_rate": 1.2404732785753724e-07, "loss": 0.2855, "step": 44743 }, { "epoch": 0.7777642580263867, "grad_norm": 1.386288748417122, "learning_rate": 1.2402877040978238e-07, "loss": 0.4249, "step": 44744 }, { "epoch": 0.7777816405638895, "grad_norm": 1.7387300947886881, "learning_rate": 1.2401021415368356e-07, "loss": 0.2245, "step": 44745 }, { "epoch": 0.7777990231013924, "grad_norm": 2.4877724562103856, "learning_rate": 1.2399165908929972e-07, "loss": 0.1439, "step": 44746 }, { "epoch": 0.7778164056388952, "grad_norm": 1.9417973140665126, "learning_rate": 1.239731052166894e-07, "loss": 0.2716, "step": 44747 }, { "epoch": 0.777833788176398, "grad_norm": 1.2765079332703253, "learning_rate": 1.239545525359119e-07, "loss": 0.2167, "step": 44748 }, { "epoch": 0.7778511707139009, "grad_norm": 2.032295139298291, "learning_rate": 1.2393600104702556e-07, "loss": 0.2221, "step": 44749 }, { "epoch": 0.7778685532514037, "grad_norm": 1.5468473043283921, "learning_rate": 1.2391745075008942e-07, "loss": 0.1846, "step": 44750 }, { "epoch": 0.7778859357889064, "grad_norm": 8.62910368985318, "learning_rate": 1.2389890164516197e-07, "loss": 0.3116, "step": 44751 }, { "epoch": 0.7779033183264092, "grad_norm": 1.8691957876140752, "learning_rate": 1.2388035373230245e-07, "loss": 0.2419, "step": 44752 }, { "epoch": 0.7779207008639121, "grad_norm": 1.4768071020664328, "learning_rate": 1.238618070115694e-07, "loss": 0.1777, "step": 44753 }, { "epoch": 0.7779380834014149, "grad_norm": 1.4027356409824412, "learning_rate": 1.2384326148302165e-07, "loss": 0.2033, "step": 44754 }, { "epoch": 0.7779554659389177, "grad_norm": 1.2524392578978651, "learning_rate": 1.2382471714671793e-07, "loss": 0.1883, "step": 44755 }, { "epoch": 0.7779728484764206, "grad_norm": 1.1613396850742566, "learning_rate": 1.2380617400271708e-07, "loss": 0.3586, "step": 44756 }, { "epoch": 0.7779902310139234, "grad_norm": 2.535257740016714, "learning_rate": 1.237876320510779e-07, "loss": 0.3099, "step": 44757 }, { "epoch": 0.7780076135514262, "grad_norm": 1.005559780448456, "learning_rate": 1.2376909129185909e-07, "loss": 0.1538, "step": 44758 }, { "epoch": 0.778024996088929, "grad_norm": 4.076119566822779, "learning_rate": 1.2375055172511922e-07, "loss": 0.2952, "step": 44759 }, { "epoch": 0.7780423786264319, "grad_norm": 1.165271755670926, "learning_rate": 1.237320133509175e-07, "loss": 0.2699, "step": 44760 }, { "epoch": 0.7780597611639347, "grad_norm": 1.0014316405625117, "learning_rate": 1.2371347616931254e-07, "loss": 0.205, "step": 44761 }, { "epoch": 0.7780771437014375, "grad_norm": 2.497512336429492, "learning_rate": 1.2369494018036274e-07, "loss": 0.2152, "step": 44762 }, { "epoch": 0.7780945262389404, "grad_norm": 2.0619677776432748, "learning_rate": 1.2367640538412726e-07, "loss": 0.3069, "step": 44763 }, { "epoch": 0.7781119087764432, "grad_norm": 1.3050620661090175, "learning_rate": 1.2365787178066468e-07, "loss": 0.2186, "step": 44764 }, { "epoch": 0.778129291313946, "grad_norm": 1.195135609748442, "learning_rate": 1.2363933937003378e-07, "loss": 0.1821, "step": 44765 }, { "epoch": 0.7781466738514489, "grad_norm": 1.1507958768069155, "learning_rate": 1.236208081522933e-07, "loss": 0.1958, "step": 44766 }, { "epoch": 0.7781640563889517, "grad_norm": 1.3345172264784841, "learning_rate": 1.2360227812750202e-07, "loss": 0.2711, "step": 44767 }, { "epoch": 0.7781814389264545, "grad_norm": 1.4762796891606185, "learning_rate": 1.2358374929571852e-07, "loss": 0.1619, "step": 44768 }, { "epoch": 0.7781988214639574, "grad_norm": 2.0236263489143833, "learning_rate": 1.2356522165700168e-07, "loss": 0.2937, "step": 44769 }, { "epoch": 0.7782162040014602, "grad_norm": 1.0464715587575413, "learning_rate": 1.2354669521141005e-07, "loss": 0.1711, "step": 44770 }, { "epoch": 0.7782335865389629, "grad_norm": 1.9496775403221913, "learning_rate": 1.235281699590025e-07, "loss": 0.2106, "step": 44771 }, { "epoch": 0.7782509690764657, "grad_norm": 2.1187283928105693, "learning_rate": 1.235096458998378e-07, "loss": 0.1133, "step": 44772 }, { "epoch": 0.7782683516139686, "grad_norm": 1.2903219297051332, "learning_rate": 1.2349112303397473e-07, "loss": 0.1865, "step": 44773 }, { "epoch": 0.7782857341514714, "grad_norm": 1.2469432205459514, "learning_rate": 1.2347260136147154e-07, "loss": 0.2371, "step": 44774 }, { "epoch": 0.7783031166889742, "grad_norm": 1.5227168059087066, "learning_rate": 1.2345408088238735e-07, "loss": 0.2384, "step": 44775 }, { "epoch": 0.7783204992264771, "grad_norm": 1.5733775841733058, "learning_rate": 1.234355615967808e-07, "loss": 0.1899, "step": 44776 }, { "epoch": 0.7783378817639799, "grad_norm": 1.0363709943821746, "learning_rate": 1.234170435047105e-07, "loss": 0.206, "step": 44777 }, { "epoch": 0.7783552643014827, "grad_norm": 0.8500774318466265, "learning_rate": 1.2339852660623507e-07, "loss": 0.1231, "step": 44778 }, { "epoch": 0.7783726468389855, "grad_norm": 1.6898035836374403, "learning_rate": 1.233800109014135e-07, "loss": 0.1978, "step": 44779 }, { "epoch": 0.7783900293764884, "grad_norm": 1.6142551464687795, "learning_rate": 1.2336149639030435e-07, "loss": 0.2756, "step": 44780 }, { "epoch": 0.7784074119139912, "grad_norm": 1.534942465258167, "learning_rate": 1.2334298307296616e-07, "loss": 0.2905, "step": 44781 }, { "epoch": 0.778424794451494, "grad_norm": 4.330939820230698, "learning_rate": 1.2332447094945748e-07, "loss": 0.2861, "step": 44782 }, { "epoch": 0.7784421769889969, "grad_norm": 2.110707338978431, "learning_rate": 1.233059600198374e-07, "loss": 0.2485, "step": 44783 }, { "epoch": 0.7784595595264997, "grad_norm": 0.8911765177021777, "learning_rate": 1.2328745028416436e-07, "loss": 0.1685, "step": 44784 }, { "epoch": 0.7784769420640025, "grad_norm": 2.2110098865598653, "learning_rate": 1.2326894174249708e-07, "loss": 0.2136, "step": 44785 }, { "epoch": 0.7784943246015054, "grad_norm": 1.6697532089991973, "learning_rate": 1.2325043439489413e-07, "loss": 0.3616, "step": 44786 }, { "epoch": 0.7785117071390082, "grad_norm": 1.7703496791919442, "learning_rate": 1.2323192824141432e-07, "loss": 0.2545, "step": 44787 }, { "epoch": 0.778529089676511, "grad_norm": 1.110415655148735, "learning_rate": 1.2321342328211621e-07, "loss": 0.2835, "step": 44788 }, { "epoch": 0.7785464722140139, "grad_norm": 1.5483808548823965, "learning_rate": 1.231949195170584e-07, "loss": 0.1738, "step": 44789 }, { "epoch": 0.7785638547515167, "grad_norm": 0.8221107103515883, "learning_rate": 1.2317641694629954e-07, "loss": 0.2016, "step": 44790 }, { "epoch": 0.7785812372890194, "grad_norm": 1.0784917592627328, "learning_rate": 1.2315791556989846e-07, "loss": 0.2886, "step": 44791 }, { "epoch": 0.7785986198265222, "grad_norm": 1.7447030599292124, "learning_rate": 1.2313941538791382e-07, "loss": 0.1381, "step": 44792 }, { "epoch": 0.7786160023640251, "grad_norm": 0.9562589813646827, "learning_rate": 1.2312091640040385e-07, "loss": 0.1529, "step": 44793 }, { "epoch": 0.7786333849015279, "grad_norm": 1.455894660560334, "learning_rate": 1.2310241860742754e-07, "loss": 0.1827, "step": 44794 }, { "epoch": 0.7786507674390307, "grad_norm": 1.1974791231639057, "learning_rate": 1.230839220090435e-07, "loss": 0.238, "step": 44795 }, { "epoch": 0.7786681499765336, "grad_norm": 1.7200250185469395, "learning_rate": 1.230654266053102e-07, "loss": 0.156, "step": 44796 }, { "epoch": 0.7786855325140364, "grad_norm": 1.4143293408325994, "learning_rate": 1.2304693239628645e-07, "loss": 0.1767, "step": 44797 }, { "epoch": 0.7787029150515392, "grad_norm": 2.0194131476386517, "learning_rate": 1.2302843938203056e-07, "loss": 0.229, "step": 44798 }, { "epoch": 0.778720297589042, "grad_norm": 1.1158355289616133, "learning_rate": 1.2300994756260165e-07, "loss": 0.3168, "step": 44799 }, { "epoch": 0.7787376801265449, "grad_norm": 1.021315275089364, "learning_rate": 1.229914569380579e-07, "loss": 0.2075, "step": 44800 }, { "epoch": 0.7787550626640477, "grad_norm": 1.3739106900800089, "learning_rate": 1.2297296750845788e-07, "loss": 0.1575, "step": 44801 }, { "epoch": 0.7787724452015505, "grad_norm": 2.580472584337686, "learning_rate": 1.229544792738605e-07, "loss": 0.2005, "step": 44802 }, { "epoch": 0.7787898277390534, "grad_norm": 1.4844228213046895, "learning_rate": 1.2293599223432426e-07, "loss": 0.3441, "step": 44803 }, { "epoch": 0.7788072102765562, "grad_norm": 0.9268997657572886, "learning_rate": 1.2291750638990766e-07, "loss": 0.1766, "step": 44804 }, { "epoch": 0.778824592814059, "grad_norm": 1.905243932323236, "learning_rate": 1.228990217406694e-07, "loss": 0.244, "step": 44805 }, { "epoch": 0.7788419753515619, "grad_norm": 1.2277918654481133, "learning_rate": 1.22880538286668e-07, "loss": 0.1567, "step": 44806 }, { "epoch": 0.7788593578890647, "grad_norm": 1.6169957662319279, "learning_rate": 1.2286205602796206e-07, "loss": 0.1596, "step": 44807 }, { "epoch": 0.7788767404265675, "grad_norm": 2.079514084436564, "learning_rate": 1.2284357496461012e-07, "loss": 0.2871, "step": 44808 }, { "epoch": 0.7788941229640703, "grad_norm": 2.967561088592323, "learning_rate": 1.2282509509667072e-07, "loss": 0.2747, "step": 44809 }, { "epoch": 0.7789115055015731, "grad_norm": 1.1882716661668082, "learning_rate": 1.228066164242026e-07, "loss": 0.1564, "step": 44810 }, { "epoch": 0.7789288880390759, "grad_norm": 1.5781660288118016, "learning_rate": 1.2278813894726442e-07, "loss": 0.202, "step": 44811 }, { "epoch": 0.7789462705765787, "grad_norm": 1.1122042579235958, "learning_rate": 1.2276966266591438e-07, "loss": 0.3791, "step": 44812 }, { "epoch": 0.7789636531140816, "grad_norm": 1.308968545673101, "learning_rate": 1.2275118758021109e-07, "loss": 0.2776, "step": 44813 }, { "epoch": 0.7789810356515844, "grad_norm": 1.1245365983056013, "learning_rate": 1.2273271369021338e-07, "loss": 0.2079, "step": 44814 }, { "epoch": 0.7789984181890872, "grad_norm": 1.1845024298802178, "learning_rate": 1.2271424099597971e-07, "loss": 0.1105, "step": 44815 }, { "epoch": 0.77901580072659, "grad_norm": 0.9163204259869404, "learning_rate": 1.2269576949756855e-07, "loss": 0.1456, "step": 44816 }, { "epoch": 0.7790331832640929, "grad_norm": 1.390434346511625, "learning_rate": 1.226772991950385e-07, "loss": 0.2445, "step": 44817 }, { "epoch": 0.7790505658015957, "grad_norm": 1.520811411251519, "learning_rate": 1.22658830088448e-07, "loss": 0.289, "step": 44818 }, { "epoch": 0.7790679483390985, "grad_norm": 0.8844117212593998, "learning_rate": 1.2264036217785578e-07, "loss": 0.2386, "step": 44819 }, { "epoch": 0.7790853308766014, "grad_norm": 1.5527821365534646, "learning_rate": 1.2262189546332023e-07, "loss": 0.256, "step": 44820 }, { "epoch": 0.7791027134141042, "grad_norm": 1.0905225730766208, "learning_rate": 1.2260342994489974e-07, "loss": 0.4659, "step": 44821 }, { "epoch": 0.779120095951607, "grad_norm": 1.259619620577831, "learning_rate": 1.2258496562265325e-07, "loss": 0.2029, "step": 44822 }, { "epoch": 0.7791374784891099, "grad_norm": 0.9252339146238868, "learning_rate": 1.2256650249663896e-07, "loss": 0.2075, "step": 44823 }, { "epoch": 0.7791548610266127, "grad_norm": 1.5364546484570998, "learning_rate": 1.2254804056691553e-07, "loss": 0.237, "step": 44824 }, { "epoch": 0.7791722435641155, "grad_norm": 1.3773925758927814, "learning_rate": 1.225295798335414e-07, "loss": 0.3047, "step": 44825 }, { "epoch": 0.7791896261016183, "grad_norm": 1.9393187148663151, "learning_rate": 1.2251112029657508e-07, "loss": 0.19, "step": 44826 }, { "epoch": 0.7792070086391212, "grad_norm": 1.1542938607211133, "learning_rate": 1.2249266195607515e-07, "loss": 0.1665, "step": 44827 }, { "epoch": 0.779224391176624, "grad_norm": 1.0880533627865736, "learning_rate": 1.2247420481209992e-07, "loss": 0.3324, "step": 44828 }, { "epoch": 0.7792417737141268, "grad_norm": 1.773348380544553, "learning_rate": 1.2245574886470818e-07, "loss": 0.2176, "step": 44829 }, { "epoch": 0.7792591562516296, "grad_norm": 1.3786290930075848, "learning_rate": 1.2243729411395836e-07, "loss": 0.1855, "step": 44830 }, { "epoch": 0.7792765387891324, "grad_norm": 2.4561634547820805, "learning_rate": 1.224188405599088e-07, "loss": 0.4455, "step": 44831 }, { "epoch": 0.7792939213266352, "grad_norm": 1.4459670979925991, "learning_rate": 1.2240038820261795e-07, "loss": 0.1682, "step": 44832 }, { "epoch": 0.779311303864138, "grad_norm": 1.5670642158772108, "learning_rate": 1.2238193704214455e-07, "loss": 0.2861, "step": 44833 }, { "epoch": 0.7793286864016409, "grad_norm": 1.4622295970190973, "learning_rate": 1.2236348707854694e-07, "loss": 0.2102, "step": 44834 }, { "epoch": 0.7793460689391437, "grad_norm": 2.6674376034537453, "learning_rate": 1.2234503831188357e-07, "loss": 0.3273, "step": 44835 }, { "epoch": 0.7793634514766465, "grad_norm": 1.7975996047108982, "learning_rate": 1.2232659074221297e-07, "loss": 0.1597, "step": 44836 }, { "epoch": 0.7793808340141494, "grad_norm": 2.90912721876733, "learning_rate": 1.2230814436959357e-07, "loss": 0.1657, "step": 44837 }, { "epoch": 0.7793982165516522, "grad_norm": 1.6808519036340037, "learning_rate": 1.2228969919408387e-07, "loss": 0.3191, "step": 44838 }, { "epoch": 0.779415599089155, "grad_norm": 1.0497169285218024, "learning_rate": 1.2227125521574233e-07, "loss": 0.1692, "step": 44839 }, { "epoch": 0.7794329816266579, "grad_norm": 0.7461999426032293, "learning_rate": 1.2225281243462725e-07, "loss": 0.2259, "step": 44840 }, { "epoch": 0.7794503641641607, "grad_norm": 1.8128734345010145, "learning_rate": 1.2223437085079735e-07, "loss": 0.2211, "step": 44841 }, { "epoch": 0.7794677467016635, "grad_norm": 1.3119331243042514, "learning_rate": 1.22215930464311e-07, "loss": 0.1773, "step": 44842 }, { "epoch": 0.7794851292391664, "grad_norm": 1.3728406387380125, "learning_rate": 1.2219749127522655e-07, "loss": 0.2553, "step": 44843 }, { "epoch": 0.7795025117766692, "grad_norm": 1.4276005041002382, "learning_rate": 1.2217905328360257e-07, "loss": 0.1514, "step": 44844 }, { "epoch": 0.779519894314172, "grad_norm": 1.167377339200078, "learning_rate": 1.2216061648949744e-07, "loss": 0.1645, "step": 44845 }, { "epoch": 0.7795372768516748, "grad_norm": 1.8911545035546933, "learning_rate": 1.221421808929695e-07, "loss": 0.2327, "step": 44846 }, { "epoch": 0.7795546593891777, "grad_norm": 0.9381122767261487, "learning_rate": 1.2212374649407736e-07, "loss": 0.1704, "step": 44847 }, { "epoch": 0.7795720419266805, "grad_norm": 5.432894215437483, "learning_rate": 1.221053132928792e-07, "loss": 0.3898, "step": 44848 }, { "epoch": 0.7795894244641833, "grad_norm": 3.5751129717041863, "learning_rate": 1.220868812894339e-07, "loss": 0.2861, "step": 44849 }, { "epoch": 0.779606807001686, "grad_norm": 1.3701937760526697, "learning_rate": 1.2206845048379943e-07, "loss": 0.2715, "step": 44850 }, { "epoch": 0.7796241895391889, "grad_norm": 1.7881337394174461, "learning_rate": 1.2205002087603422e-07, "loss": 0.2331, "step": 44851 }, { "epoch": 0.7796415720766917, "grad_norm": 2.0113364775815787, "learning_rate": 1.2203159246619692e-07, "loss": 0.2318, "step": 44852 }, { "epoch": 0.7796589546141945, "grad_norm": 1.185352810546878, "learning_rate": 1.220131652543459e-07, "loss": 0.0988, "step": 44853 }, { "epoch": 0.7796763371516974, "grad_norm": 1.668742085820832, "learning_rate": 1.2199473924053955e-07, "loss": 0.2085, "step": 44854 }, { "epoch": 0.7796937196892002, "grad_norm": 1.421902319719417, "learning_rate": 1.2197631442483618e-07, "loss": 0.1794, "step": 44855 }, { "epoch": 0.779711102226703, "grad_norm": 1.9508737167102845, "learning_rate": 1.2195789080729425e-07, "loss": 0.2791, "step": 44856 }, { "epoch": 0.7797284847642059, "grad_norm": 1.3024307683161964, "learning_rate": 1.2193946838797213e-07, "loss": 0.2447, "step": 44857 }, { "epoch": 0.7797458673017087, "grad_norm": 1.2795973509686094, "learning_rate": 1.219210471669283e-07, "loss": 0.2062, "step": 44858 }, { "epoch": 0.7797632498392115, "grad_norm": 1.397004535633173, "learning_rate": 1.2190262714422089e-07, "loss": 0.2466, "step": 44859 }, { "epoch": 0.7797806323767144, "grad_norm": 1.16094182210702, "learning_rate": 1.218842083199086e-07, "loss": 0.1553, "step": 44860 }, { "epoch": 0.7797980149142172, "grad_norm": 1.1071505835578954, "learning_rate": 1.2186579069404968e-07, "loss": 0.1618, "step": 44861 }, { "epoch": 0.77981539745172, "grad_norm": 2.466558971304723, "learning_rate": 1.2184737426670267e-07, "loss": 0.2086, "step": 44862 }, { "epoch": 0.7798327799892228, "grad_norm": 1.8353989766887195, "learning_rate": 1.2182895903792546e-07, "loss": 0.2058, "step": 44863 }, { "epoch": 0.7798501625267257, "grad_norm": 1.5619599922127458, "learning_rate": 1.2181054500777687e-07, "loss": 0.176, "step": 44864 }, { "epoch": 0.7798675450642285, "grad_norm": 1.1104902289137877, "learning_rate": 1.217921321763152e-07, "loss": 0.2323, "step": 44865 }, { "epoch": 0.7798849276017313, "grad_norm": 1.022448662275008, "learning_rate": 1.2177372054359865e-07, "loss": 0.3278, "step": 44866 }, { "epoch": 0.7799023101392342, "grad_norm": 4.172449022491792, "learning_rate": 1.2175531010968553e-07, "loss": 0.3342, "step": 44867 }, { "epoch": 0.779919692676737, "grad_norm": 1.2606991135400656, "learning_rate": 1.217369008746346e-07, "loss": 0.268, "step": 44868 }, { "epoch": 0.7799370752142398, "grad_norm": 1.794258823202157, "learning_rate": 1.2171849283850387e-07, "loss": 0.282, "step": 44869 }, { "epoch": 0.7799544577517425, "grad_norm": 2.1254565549362887, "learning_rate": 1.2170008600135168e-07, "loss": 0.2448, "step": 44870 }, { "epoch": 0.7799718402892454, "grad_norm": 2.3946202238702634, "learning_rate": 1.2168168036323634e-07, "loss": 0.2182, "step": 44871 }, { "epoch": 0.7799892228267482, "grad_norm": 1.2170063800856321, "learning_rate": 1.2166327592421638e-07, "loss": 0.2892, "step": 44872 }, { "epoch": 0.780006605364251, "grad_norm": 1.0543340468743454, "learning_rate": 1.216448726843501e-07, "loss": 0.1279, "step": 44873 }, { "epoch": 0.7800239879017539, "grad_norm": 1.619350389732966, "learning_rate": 1.2162647064369574e-07, "loss": 0.2155, "step": 44874 }, { "epoch": 0.7800413704392567, "grad_norm": 1.408967141526341, "learning_rate": 1.2160806980231164e-07, "loss": 0.1645, "step": 44875 }, { "epoch": 0.7800587529767595, "grad_norm": 2.0043275789405777, "learning_rate": 1.2158967016025618e-07, "loss": 0.242, "step": 44876 }, { "epoch": 0.7800761355142624, "grad_norm": 6.218056152663832, "learning_rate": 1.2157127171758763e-07, "loss": 0.233, "step": 44877 }, { "epoch": 0.7800935180517652, "grad_norm": 1.4690457740322351, "learning_rate": 1.215528744743643e-07, "loss": 0.186, "step": 44878 }, { "epoch": 0.780110900589268, "grad_norm": 1.1702345700438008, "learning_rate": 1.2153447843064436e-07, "loss": 0.1697, "step": 44879 }, { "epoch": 0.7801282831267708, "grad_norm": 0.7812231792137996, "learning_rate": 1.215160835864864e-07, "loss": 0.2854, "step": 44880 }, { "epoch": 0.7801456656642737, "grad_norm": 1.3107631357630647, "learning_rate": 1.2149768994194882e-07, "loss": 0.1176, "step": 44881 }, { "epoch": 0.7801630482017765, "grad_norm": 1.751144841559047, "learning_rate": 1.214792974970893e-07, "loss": 0.1883, "step": 44882 }, { "epoch": 0.7801804307392793, "grad_norm": 1.206412168217529, "learning_rate": 1.2146090625196677e-07, "loss": 0.1779, "step": 44883 }, { "epoch": 0.7801978132767822, "grad_norm": 1.7886059279989552, "learning_rate": 1.214425162066392e-07, "loss": 0.2178, "step": 44884 }, { "epoch": 0.780215195814285, "grad_norm": 1.1634123762483743, "learning_rate": 1.2142412736116498e-07, "loss": 0.2477, "step": 44885 }, { "epoch": 0.7802325783517878, "grad_norm": 2.650382092703318, "learning_rate": 1.2140573971560242e-07, "loss": 0.2445, "step": 44886 }, { "epoch": 0.7802499608892907, "grad_norm": 1.4290384234391942, "learning_rate": 1.213873532700097e-07, "loss": 0.2063, "step": 44887 }, { "epoch": 0.7802673434267935, "grad_norm": 1.3513644806442402, "learning_rate": 1.2136896802444513e-07, "loss": 0.2827, "step": 44888 }, { "epoch": 0.7802847259642963, "grad_norm": 1.5553390257121846, "learning_rate": 1.2135058397896707e-07, "loss": 0.1689, "step": 44889 }, { "epoch": 0.780302108501799, "grad_norm": 1.1677704897242256, "learning_rate": 1.213322011336335e-07, "loss": 0.1865, "step": 44890 }, { "epoch": 0.7803194910393019, "grad_norm": 1.872982413986383, "learning_rate": 1.2131381948850312e-07, "loss": 0.157, "step": 44891 }, { "epoch": 0.7803368735768047, "grad_norm": 2.616617915125496, "learning_rate": 1.2129543904363398e-07, "loss": 0.2436, "step": 44892 }, { "epoch": 0.7803542561143075, "grad_norm": 1.0975693931834918, "learning_rate": 1.2127705979908443e-07, "loss": 0.2134, "step": 44893 }, { "epoch": 0.7803716386518104, "grad_norm": 3.1370645499095153, "learning_rate": 1.212586817549124e-07, "loss": 0.2894, "step": 44894 }, { "epoch": 0.7803890211893132, "grad_norm": 1.470089329766445, "learning_rate": 1.2124030491117643e-07, "loss": 0.1851, "step": 44895 }, { "epoch": 0.780406403726816, "grad_norm": 1.4218597596645801, "learning_rate": 1.212219292679348e-07, "loss": 0.1836, "step": 44896 }, { "epoch": 0.7804237862643189, "grad_norm": 2.1793215100005687, "learning_rate": 1.2120355482524557e-07, "loss": 0.575, "step": 44897 }, { "epoch": 0.7804411688018217, "grad_norm": 1.347736213616286, "learning_rate": 1.21185181583167e-07, "loss": 0.3019, "step": 44898 }, { "epoch": 0.7804585513393245, "grad_norm": 1.016221350966916, "learning_rate": 1.2116680954175752e-07, "loss": 0.1936, "step": 44899 }, { "epoch": 0.7804759338768273, "grad_norm": 1.6802838480162874, "learning_rate": 1.2114843870107532e-07, "loss": 0.2453, "step": 44900 }, { "epoch": 0.7804933164143302, "grad_norm": 0.9710377025276864, "learning_rate": 1.2113006906117845e-07, "loss": 0.203, "step": 44901 }, { "epoch": 0.780510698951833, "grad_norm": 1.803207161743391, "learning_rate": 1.2111170062212506e-07, "loss": 0.3576, "step": 44902 }, { "epoch": 0.7805280814893358, "grad_norm": 1.5037718693455049, "learning_rate": 1.2109333338397364e-07, "loss": 0.1932, "step": 44903 }, { "epoch": 0.7805454640268387, "grad_norm": 1.5596338855415213, "learning_rate": 1.2107496734678236e-07, "loss": 0.2898, "step": 44904 }, { "epoch": 0.7805628465643415, "grad_norm": 2.2601998642102137, "learning_rate": 1.2105660251060934e-07, "loss": 0.1974, "step": 44905 }, { "epoch": 0.7805802291018443, "grad_norm": 0.852797713838109, "learning_rate": 1.2103823887551278e-07, "loss": 0.145, "step": 44906 }, { "epoch": 0.7805976116393472, "grad_norm": 1.5202970282924066, "learning_rate": 1.2101987644155098e-07, "loss": 0.2391, "step": 44907 }, { "epoch": 0.78061499417685, "grad_norm": 2.83583558341114, "learning_rate": 1.2100151520878206e-07, "loss": 0.258, "step": 44908 }, { "epoch": 0.7806323767143528, "grad_norm": 1.4595258020814341, "learning_rate": 1.2098315517726427e-07, "loss": 0.2069, "step": 44909 }, { "epoch": 0.7806497592518555, "grad_norm": 1.214351569477427, "learning_rate": 1.2096479634705558e-07, "loss": 0.1856, "step": 44910 }, { "epoch": 0.7806671417893584, "grad_norm": 1.1909036278194178, "learning_rate": 1.2094643871821454e-07, "loss": 0.235, "step": 44911 }, { "epoch": 0.7806845243268612, "grad_norm": 1.288558295712202, "learning_rate": 1.2092808229079932e-07, "loss": 0.3995, "step": 44912 }, { "epoch": 0.780701906864364, "grad_norm": 1.1331641300509918, "learning_rate": 1.2090972706486764e-07, "loss": 0.1777, "step": 44913 }, { "epoch": 0.7807192894018669, "grad_norm": 1.9484275617791977, "learning_rate": 1.2089137304047814e-07, "loss": 0.1879, "step": 44914 }, { "epoch": 0.7807366719393697, "grad_norm": 1.3244997247925183, "learning_rate": 1.208730202176888e-07, "loss": 0.2166, "step": 44915 }, { "epoch": 0.7807540544768725, "grad_norm": 1.3310303691265657, "learning_rate": 1.208546685965579e-07, "loss": 0.3051, "step": 44916 }, { "epoch": 0.7807714370143753, "grad_norm": 3.166470890056198, "learning_rate": 1.2083631817714334e-07, "loss": 0.3634, "step": 44917 }, { "epoch": 0.7807888195518782, "grad_norm": 0.9733742943728334, "learning_rate": 1.2081796895950363e-07, "loss": 0.2066, "step": 44918 }, { "epoch": 0.780806202089381, "grad_norm": 3.983213627036952, "learning_rate": 1.2079962094369695e-07, "loss": 0.3279, "step": 44919 }, { "epoch": 0.7808235846268838, "grad_norm": 2.354563845510921, "learning_rate": 1.207812741297811e-07, "loss": 0.2332, "step": 44920 }, { "epoch": 0.7808409671643867, "grad_norm": 1.0109230545497516, "learning_rate": 1.2076292851781428e-07, "loss": 0.2174, "step": 44921 }, { "epoch": 0.7808583497018895, "grad_norm": 1.8386296613554869, "learning_rate": 1.2074458410785494e-07, "loss": 0.1535, "step": 44922 }, { "epoch": 0.7808757322393923, "grad_norm": 1.517583677954702, "learning_rate": 1.2072624089996103e-07, "loss": 0.1861, "step": 44923 }, { "epoch": 0.7808931147768952, "grad_norm": 1.3205004723694473, "learning_rate": 1.2070789889419068e-07, "loss": 0.2422, "step": 44924 }, { "epoch": 0.780910497314398, "grad_norm": 1.1147634988302704, "learning_rate": 1.206895580906021e-07, "loss": 0.2701, "step": 44925 }, { "epoch": 0.7809278798519008, "grad_norm": 1.4025192054820088, "learning_rate": 1.2067121848925338e-07, "loss": 0.1389, "step": 44926 }, { "epoch": 0.7809452623894036, "grad_norm": 3.4690097877748003, "learning_rate": 1.2065288009020263e-07, "loss": 0.2537, "step": 44927 }, { "epoch": 0.7809626449269065, "grad_norm": 1.2495555725392753, "learning_rate": 1.20634542893508e-07, "loss": 0.1739, "step": 44928 }, { "epoch": 0.7809800274644093, "grad_norm": 1.480249438049186, "learning_rate": 1.2061620689922747e-07, "loss": 0.2266, "step": 44929 }, { "epoch": 0.780997410001912, "grad_norm": 1.413977711658654, "learning_rate": 1.2059787210741939e-07, "loss": 0.1535, "step": 44930 }, { "epoch": 0.7810147925394149, "grad_norm": 2.7715879902709464, "learning_rate": 1.2057953851814194e-07, "loss": 0.3091, "step": 44931 }, { "epoch": 0.7810321750769177, "grad_norm": 1.2647383229686138, "learning_rate": 1.205612061314528e-07, "loss": 0.246, "step": 44932 }, { "epoch": 0.7810495576144205, "grad_norm": 1.5204708179940574, "learning_rate": 1.205428749474105e-07, "loss": 0.197, "step": 44933 }, { "epoch": 0.7810669401519234, "grad_norm": 1.2936672099892859, "learning_rate": 1.2052454496607288e-07, "loss": 0.2662, "step": 44934 }, { "epoch": 0.7810843226894262, "grad_norm": 0.9763989021531351, "learning_rate": 1.2050621618749823e-07, "loss": 0.1798, "step": 44935 }, { "epoch": 0.781101705226929, "grad_norm": 1.4580010942766357, "learning_rate": 1.2048788861174454e-07, "loss": 0.2998, "step": 44936 }, { "epoch": 0.7811190877644318, "grad_norm": 2.8557828682541215, "learning_rate": 1.2046956223886985e-07, "loss": 0.1876, "step": 44937 }, { "epoch": 0.7811364703019347, "grad_norm": 2.1509664755128908, "learning_rate": 1.2045123706893235e-07, "loss": 0.203, "step": 44938 }, { "epoch": 0.7811538528394375, "grad_norm": 1.5839129720279603, "learning_rate": 1.2043291310199005e-07, "loss": 0.2061, "step": 44939 }, { "epoch": 0.7811712353769403, "grad_norm": 1.7610438894909408, "learning_rate": 1.2041459033810096e-07, "loss": 0.1983, "step": 44940 }, { "epoch": 0.7811886179144432, "grad_norm": 1.2609277105942673, "learning_rate": 1.2039626877732333e-07, "loss": 0.1649, "step": 44941 }, { "epoch": 0.781206000451946, "grad_norm": 0.7321414434103812, "learning_rate": 1.2037794841971522e-07, "loss": 0.1857, "step": 44942 }, { "epoch": 0.7812233829894488, "grad_norm": 1.100124818632797, "learning_rate": 1.203596292653346e-07, "loss": 0.1288, "step": 44943 }, { "epoch": 0.7812407655269517, "grad_norm": 0.9173493034474522, "learning_rate": 1.2034131131423958e-07, "loss": 0.3237, "step": 44944 }, { "epoch": 0.7812581480644545, "grad_norm": 1.311366641265288, "learning_rate": 1.2032299456648813e-07, "loss": 0.1659, "step": 44945 }, { "epoch": 0.7812755306019573, "grad_norm": 1.5793762787612518, "learning_rate": 1.2030467902213847e-07, "loss": 0.1766, "step": 44946 }, { "epoch": 0.7812929131394601, "grad_norm": 1.2218955815156713, "learning_rate": 1.2028636468124853e-07, "loss": 0.2345, "step": 44947 }, { "epoch": 0.781310295676963, "grad_norm": 3.1230577635412073, "learning_rate": 1.202680515438762e-07, "loss": 0.273, "step": 44948 }, { "epoch": 0.7813276782144657, "grad_norm": 0.951176846813534, "learning_rate": 1.202497396100799e-07, "loss": 0.2143, "step": 44949 }, { "epoch": 0.7813450607519685, "grad_norm": 3.0012332040132033, "learning_rate": 1.202314288799176e-07, "loss": 0.2207, "step": 44950 }, { "epoch": 0.7813624432894714, "grad_norm": 1.280540844451231, "learning_rate": 1.2021311935344713e-07, "loss": 0.195, "step": 44951 }, { "epoch": 0.7813798258269742, "grad_norm": 1.720408342237192, "learning_rate": 1.2019481103072644e-07, "loss": 0.3882, "step": 44952 }, { "epoch": 0.781397208364477, "grad_norm": 0.844334209901324, "learning_rate": 1.2017650391181384e-07, "loss": 0.2916, "step": 44953 }, { "epoch": 0.7814145909019798, "grad_norm": 1.3607845904079094, "learning_rate": 1.2015819799676729e-07, "loss": 0.1996, "step": 44954 }, { "epoch": 0.7814319734394827, "grad_norm": 1.2822359766338491, "learning_rate": 1.2013989328564473e-07, "loss": 0.3309, "step": 44955 }, { "epoch": 0.7814493559769855, "grad_norm": 1.3160180222815654, "learning_rate": 1.2012158977850424e-07, "loss": 0.1596, "step": 44956 }, { "epoch": 0.7814667385144883, "grad_norm": 1.5968712407959675, "learning_rate": 1.2010328747540382e-07, "loss": 0.3321, "step": 44957 }, { "epoch": 0.7814841210519912, "grad_norm": 2.0713523951813113, "learning_rate": 1.200849863764014e-07, "loss": 0.2759, "step": 44958 }, { "epoch": 0.781501503589494, "grad_norm": 1.6208432151241785, "learning_rate": 1.2006668648155516e-07, "loss": 0.1699, "step": 44959 }, { "epoch": 0.7815188861269968, "grad_norm": 4.810817554602925, "learning_rate": 1.2004838779092275e-07, "loss": 0.3481, "step": 44960 }, { "epoch": 0.7815362686644997, "grad_norm": 1.6446489468754604, "learning_rate": 1.2003009030456261e-07, "loss": 0.2937, "step": 44961 }, { "epoch": 0.7815536512020025, "grad_norm": 1.879854610400551, "learning_rate": 1.2001179402253253e-07, "loss": 0.4268, "step": 44962 }, { "epoch": 0.7815710337395053, "grad_norm": 1.692888494294607, "learning_rate": 1.1999349894489052e-07, "loss": 0.1879, "step": 44963 }, { "epoch": 0.7815884162770081, "grad_norm": 2.422933117934974, "learning_rate": 1.1997520507169455e-07, "loss": 0.2886, "step": 44964 }, { "epoch": 0.781605798814511, "grad_norm": 1.71138040872651, "learning_rate": 1.1995691240300265e-07, "loss": 0.2677, "step": 44965 }, { "epoch": 0.7816231813520138, "grad_norm": 1.583000640788635, "learning_rate": 1.199386209388727e-07, "loss": 0.1217, "step": 44966 }, { "epoch": 0.7816405638895166, "grad_norm": 1.090028414658706, "learning_rate": 1.199203306793628e-07, "loss": 0.1761, "step": 44967 }, { "epoch": 0.7816579464270195, "grad_norm": 1.526188894202935, "learning_rate": 1.1990204162453065e-07, "loss": 0.3148, "step": 44968 }, { "epoch": 0.7816753289645222, "grad_norm": 1.2905971335980433, "learning_rate": 1.1988375377443473e-07, "loss": 0.144, "step": 44969 }, { "epoch": 0.781692711502025, "grad_norm": 1.937248899962628, "learning_rate": 1.198654671291326e-07, "loss": 0.2403, "step": 44970 }, { "epoch": 0.7817100940395278, "grad_norm": 3.009147019307334, "learning_rate": 1.1984718168868212e-07, "loss": 0.32, "step": 44971 }, { "epoch": 0.7817274765770307, "grad_norm": 1.1593646332156786, "learning_rate": 1.1982889745314161e-07, "loss": 0.161, "step": 44972 }, { "epoch": 0.7817448591145335, "grad_norm": 0.7313953568062672, "learning_rate": 1.1981061442256884e-07, "loss": 0.2098, "step": 44973 }, { "epoch": 0.7817622416520363, "grad_norm": 1.0562805358733731, "learning_rate": 1.197923325970218e-07, "loss": 0.1217, "step": 44974 }, { "epoch": 0.7817796241895392, "grad_norm": 1.0969929775908336, "learning_rate": 1.1977405197655844e-07, "loss": 0.1077, "step": 44975 }, { "epoch": 0.781797006727042, "grad_norm": 0.7219839447780947, "learning_rate": 1.197557725612366e-07, "loss": 0.2485, "step": 44976 }, { "epoch": 0.7818143892645448, "grad_norm": 1.4785831933628149, "learning_rate": 1.1973749435111435e-07, "loss": 0.155, "step": 44977 }, { "epoch": 0.7818317718020477, "grad_norm": 2.8093010677979695, "learning_rate": 1.1971921734624958e-07, "loss": 0.3255, "step": 44978 }, { "epoch": 0.7818491543395505, "grad_norm": 1.484649928413759, "learning_rate": 1.1970094154669996e-07, "loss": 0.1836, "step": 44979 }, { "epoch": 0.7818665368770533, "grad_norm": 1.1935570920137892, "learning_rate": 1.196826669525239e-07, "loss": 0.2455, "step": 44980 }, { "epoch": 0.7818839194145561, "grad_norm": 0.9943903517325648, "learning_rate": 1.1966439356377904e-07, "loss": 0.1929, "step": 44981 }, { "epoch": 0.781901301952059, "grad_norm": 1.0495908466100685, "learning_rate": 1.196461213805235e-07, "loss": 0.219, "step": 44982 }, { "epoch": 0.7819186844895618, "grad_norm": 1.231627241766309, "learning_rate": 1.1962785040281475e-07, "loss": 0.1193, "step": 44983 }, { "epoch": 0.7819360670270646, "grad_norm": 1.3171637910661385, "learning_rate": 1.1960958063071114e-07, "loss": 0.195, "step": 44984 }, { "epoch": 0.7819534495645675, "grad_norm": 2.8043717164381463, "learning_rate": 1.1959131206427042e-07, "loss": 0.278, "step": 44985 }, { "epoch": 0.7819708321020703, "grad_norm": 1.0561195672674648, "learning_rate": 1.1957304470355045e-07, "loss": 0.2323, "step": 44986 }, { "epoch": 0.7819882146395731, "grad_norm": 2.007999442964637, "learning_rate": 1.1955477854860908e-07, "loss": 0.2132, "step": 44987 }, { "epoch": 0.782005597177076, "grad_norm": 2.0829799949464545, "learning_rate": 1.1953651359950456e-07, "loss": 0.2938, "step": 44988 }, { "epoch": 0.7820229797145787, "grad_norm": 1.1893167832784903, "learning_rate": 1.1951824985629438e-07, "loss": 0.2586, "step": 44989 }, { "epoch": 0.7820403622520815, "grad_norm": 1.523225939238792, "learning_rate": 1.194999873190366e-07, "loss": 0.171, "step": 44990 }, { "epoch": 0.7820577447895843, "grad_norm": 3.2881496521484, "learning_rate": 1.1948172598778888e-07, "loss": 0.3598, "step": 44991 }, { "epoch": 0.7820751273270872, "grad_norm": 2.5580064638734266, "learning_rate": 1.1946346586260947e-07, "loss": 0.2373, "step": 44992 }, { "epoch": 0.78209250986459, "grad_norm": 1.9139840487619277, "learning_rate": 1.1944520694355609e-07, "loss": 0.2222, "step": 44993 }, { "epoch": 0.7821098924020928, "grad_norm": 1.4638054082946932, "learning_rate": 1.194269492306865e-07, "loss": 0.4398, "step": 44994 }, { "epoch": 0.7821272749395957, "grad_norm": 1.2148492019694306, "learning_rate": 1.194086927240588e-07, "loss": 0.2211, "step": 44995 }, { "epoch": 0.7821446574770985, "grad_norm": 1.2520469192077304, "learning_rate": 1.1939043742373062e-07, "loss": 0.2076, "step": 44996 }, { "epoch": 0.7821620400146013, "grad_norm": 1.2653328271921696, "learning_rate": 1.193721833297599e-07, "loss": 0.3051, "step": 44997 }, { "epoch": 0.7821794225521042, "grad_norm": 1.6052241713257647, "learning_rate": 1.193539304422046e-07, "loss": 0.2008, "step": 44998 }, { "epoch": 0.782196805089607, "grad_norm": 1.7779042297954706, "learning_rate": 1.1933567876112226e-07, "loss": 0.1258, "step": 44999 }, { "epoch": 0.7822141876271098, "grad_norm": 1.0227674021312794, "learning_rate": 1.1931742828657115e-07, "loss": 0.2003, "step": 45000 }, { "epoch": 0.7822315701646126, "grad_norm": 1.4443120410329087, "learning_rate": 1.1929917901860903e-07, "loss": 0.2249, "step": 45001 }, { "epoch": 0.7822489527021155, "grad_norm": 0.9416576365299293, "learning_rate": 1.1928093095729335e-07, "loss": 0.1493, "step": 45002 }, { "epoch": 0.7822663352396183, "grad_norm": 0.7986903199862986, "learning_rate": 1.1926268410268236e-07, "loss": 0.0788, "step": 45003 }, { "epoch": 0.7822837177771211, "grad_norm": 1.4356457590621934, "learning_rate": 1.1924443845483378e-07, "loss": 0.1578, "step": 45004 }, { "epoch": 0.782301100314624, "grad_norm": 1.6793724214519443, "learning_rate": 1.1922619401380545e-07, "loss": 0.2443, "step": 45005 }, { "epoch": 0.7823184828521268, "grad_norm": 1.333933483516271, "learning_rate": 1.1920795077965518e-07, "loss": 0.2365, "step": 45006 }, { "epoch": 0.7823358653896296, "grad_norm": 1.2908399707644373, "learning_rate": 1.1918970875244072e-07, "loss": 0.1122, "step": 45007 }, { "epoch": 0.7823532479271325, "grad_norm": 1.2086275499556667, "learning_rate": 1.1917146793221999e-07, "loss": 0.268, "step": 45008 }, { "epoch": 0.7823706304646352, "grad_norm": 3.728170158780389, "learning_rate": 1.1915322831905078e-07, "loss": 0.1666, "step": 45009 }, { "epoch": 0.782388013002138, "grad_norm": 1.8972248286258857, "learning_rate": 1.1913498991299076e-07, "loss": 0.225, "step": 45010 }, { "epoch": 0.7824053955396408, "grad_norm": 3.1520897534221843, "learning_rate": 1.1911675271409799e-07, "loss": 0.1532, "step": 45011 }, { "epoch": 0.7824227780771437, "grad_norm": 1.2736178423447813, "learning_rate": 1.1909851672243016e-07, "loss": 0.1801, "step": 45012 }, { "epoch": 0.7824401606146465, "grad_norm": 1.6165749483671397, "learning_rate": 1.1908028193804504e-07, "loss": 0.3172, "step": 45013 }, { "epoch": 0.7824575431521493, "grad_norm": 1.3737680438613333, "learning_rate": 1.1906204836100043e-07, "loss": 0.1245, "step": 45014 }, { "epoch": 0.7824749256896522, "grad_norm": 1.7729435541402052, "learning_rate": 1.1904381599135416e-07, "loss": 0.2245, "step": 45015 }, { "epoch": 0.782492308227155, "grad_norm": 1.1536163221839877, "learning_rate": 1.1902558482916403e-07, "loss": 0.2587, "step": 45016 }, { "epoch": 0.7825096907646578, "grad_norm": 2.290722181727724, "learning_rate": 1.1900735487448776e-07, "loss": 0.1965, "step": 45017 }, { "epoch": 0.7825270733021606, "grad_norm": 0.8238184139995038, "learning_rate": 1.1898912612738298e-07, "loss": 0.1428, "step": 45018 }, { "epoch": 0.7825444558396635, "grad_norm": 1.0100937359865545, "learning_rate": 1.1897089858790782e-07, "loss": 0.25, "step": 45019 }, { "epoch": 0.7825618383771663, "grad_norm": 1.9431007538004548, "learning_rate": 1.1895267225612005e-07, "loss": 0.2639, "step": 45020 }, { "epoch": 0.7825792209146691, "grad_norm": 1.8901493682462207, "learning_rate": 1.1893444713207695e-07, "loss": 0.2353, "step": 45021 }, { "epoch": 0.782596603452172, "grad_norm": 1.4860197081699857, "learning_rate": 1.1891622321583677e-07, "loss": 0.1896, "step": 45022 }, { "epoch": 0.7826139859896748, "grad_norm": 0.9622910359675838, "learning_rate": 1.1889800050745713e-07, "loss": 0.2501, "step": 45023 }, { "epoch": 0.7826313685271776, "grad_norm": 2.6085281915981584, "learning_rate": 1.1887977900699575e-07, "loss": 0.3883, "step": 45024 }, { "epoch": 0.7826487510646805, "grad_norm": 0.7707881687798394, "learning_rate": 1.1886155871451036e-07, "loss": 0.1539, "step": 45025 }, { "epoch": 0.7826661336021833, "grad_norm": 1.1644458086618985, "learning_rate": 1.1884333963005877e-07, "loss": 0.206, "step": 45026 }, { "epoch": 0.7826835161396861, "grad_norm": 1.3844216953571031, "learning_rate": 1.1882512175369869e-07, "loss": 0.1532, "step": 45027 }, { "epoch": 0.782700898677189, "grad_norm": 1.8410962578242391, "learning_rate": 1.1880690508548791e-07, "loss": 0.2177, "step": 45028 }, { "epoch": 0.7827182812146917, "grad_norm": 1.6390795460559908, "learning_rate": 1.1878868962548399e-07, "loss": 0.2813, "step": 45029 }, { "epoch": 0.7827356637521945, "grad_norm": 1.3314766391114148, "learning_rate": 1.1877047537374491e-07, "loss": 0.241, "step": 45030 }, { "epoch": 0.7827530462896973, "grad_norm": 2.106394171655196, "learning_rate": 1.1875226233032837e-07, "loss": 0.2666, "step": 45031 }, { "epoch": 0.7827704288272002, "grad_norm": 1.0012597781034844, "learning_rate": 1.1873405049529212e-07, "loss": 0.2299, "step": 45032 }, { "epoch": 0.782787811364703, "grad_norm": 1.1053708998172744, "learning_rate": 1.1871583986869349e-07, "loss": 0.1435, "step": 45033 }, { "epoch": 0.7828051939022058, "grad_norm": 1.1462144083122423, "learning_rate": 1.1869763045059067e-07, "loss": 0.2306, "step": 45034 }, { "epoch": 0.7828225764397087, "grad_norm": 0.9901706987183896, "learning_rate": 1.1867942224104122e-07, "loss": 0.1381, "step": 45035 }, { "epoch": 0.7828399589772115, "grad_norm": 1.7619639562153926, "learning_rate": 1.1866121524010281e-07, "loss": 0.245, "step": 45036 }, { "epoch": 0.7828573415147143, "grad_norm": 1.658550497645612, "learning_rate": 1.1864300944783306e-07, "loss": 0.2216, "step": 45037 }, { "epoch": 0.7828747240522171, "grad_norm": 1.6850627683494315, "learning_rate": 1.1862480486428989e-07, "loss": 0.3528, "step": 45038 }, { "epoch": 0.78289210658972, "grad_norm": 1.2826185552287468, "learning_rate": 1.1860660148953105e-07, "loss": 0.2166, "step": 45039 }, { "epoch": 0.7829094891272228, "grad_norm": 1.996940859988996, "learning_rate": 1.1858839932361397e-07, "loss": 0.2083, "step": 45040 }, { "epoch": 0.7829268716647256, "grad_norm": 1.183509916067681, "learning_rate": 1.1857019836659632e-07, "loss": 0.243, "step": 45041 }, { "epoch": 0.7829442542022285, "grad_norm": 1.8328028248060657, "learning_rate": 1.1855199861853604e-07, "loss": 0.2961, "step": 45042 }, { "epoch": 0.7829616367397313, "grad_norm": 2.313672758150325, "learning_rate": 1.185338000794907e-07, "loss": 0.2461, "step": 45043 }, { "epoch": 0.7829790192772341, "grad_norm": 0.9872764139536733, "learning_rate": 1.18515602749518e-07, "loss": 0.2192, "step": 45044 }, { "epoch": 0.782996401814737, "grad_norm": 2.4679959604640436, "learning_rate": 1.1849740662867563e-07, "loss": 0.2436, "step": 45045 }, { "epoch": 0.7830137843522398, "grad_norm": 1.6134846770734763, "learning_rate": 1.184792117170212e-07, "loss": 0.2255, "step": 45046 }, { "epoch": 0.7830311668897426, "grad_norm": 1.3485002927501404, "learning_rate": 1.184610180146124e-07, "loss": 0.1486, "step": 45047 }, { "epoch": 0.7830485494272454, "grad_norm": 1.9325651070876386, "learning_rate": 1.1844282552150692e-07, "loss": 0.1647, "step": 45048 }, { "epoch": 0.7830659319647482, "grad_norm": 1.2868590950434675, "learning_rate": 1.1842463423776228e-07, "loss": 0.1181, "step": 45049 }, { "epoch": 0.783083314502251, "grad_norm": 1.6499890112335234, "learning_rate": 1.1840644416343637e-07, "loss": 0.1935, "step": 45050 }, { "epoch": 0.7831006970397538, "grad_norm": 3.1641514884116124, "learning_rate": 1.1838825529858692e-07, "loss": 0.2686, "step": 45051 }, { "epoch": 0.7831180795772567, "grad_norm": 1.0847872652587982, "learning_rate": 1.1837006764327112e-07, "loss": 0.2212, "step": 45052 }, { "epoch": 0.7831354621147595, "grad_norm": 1.7380095193156668, "learning_rate": 1.1835188119754703e-07, "loss": 0.1698, "step": 45053 }, { "epoch": 0.7831528446522623, "grad_norm": 1.8339755810499687, "learning_rate": 1.1833369596147219e-07, "loss": 0.1758, "step": 45054 }, { "epoch": 0.7831702271897651, "grad_norm": 1.788363904798066, "learning_rate": 1.1831551193510414e-07, "loss": 0.1462, "step": 45055 }, { "epoch": 0.783187609727268, "grad_norm": 1.2999015118262098, "learning_rate": 1.1829732911850066e-07, "loss": 0.1353, "step": 45056 }, { "epoch": 0.7832049922647708, "grad_norm": 1.300566750018219, "learning_rate": 1.1827914751171925e-07, "loss": 0.1948, "step": 45057 }, { "epoch": 0.7832223748022736, "grad_norm": 1.3492988408618998, "learning_rate": 1.1826096711481765e-07, "loss": 0.3201, "step": 45058 }, { "epoch": 0.7832397573397765, "grad_norm": 1.6597019244356863, "learning_rate": 1.1824278792785342e-07, "loss": 0.1615, "step": 45059 }, { "epoch": 0.7832571398772793, "grad_norm": 1.5113857995969076, "learning_rate": 1.1822460995088402e-07, "loss": 0.1988, "step": 45060 }, { "epoch": 0.7832745224147821, "grad_norm": 1.4714209152534083, "learning_rate": 1.1820643318396734e-07, "loss": 0.2644, "step": 45061 }, { "epoch": 0.783291904952285, "grad_norm": 1.535286463336646, "learning_rate": 1.1818825762716095e-07, "loss": 0.1723, "step": 45062 }, { "epoch": 0.7833092874897878, "grad_norm": 0.9356380962846916, "learning_rate": 1.1817008328052236e-07, "loss": 0.2928, "step": 45063 }, { "epoch": 0.7833266700272906, "grad_norm": 1.0428699637113243, "learning_rate": 1.1815191014410925e-07, "loss": 0.3035, "step": 45064 }, { "epoch": 0.7833440525647934, "grad_norm": 1.1880585443837126, "learning_rate": 1.1813373821797912e-07, "loss": 0.2465, "step": 45065 }, { "epoch": 0.7833614351022963, "grad_norm": 1.6421471360652726, "learning_rate": 1.1811556750218965e-07, "loss": 0.1786, "step": 45066 }, { "epoch": 0.7833788176397991, "grad_norm": 1.2886065030891942, "learning_rate": 1.180973979967984e-07, "loss": 0.2593, "step": 45067 }, { "epoch": 0.7833962001773019, "grad_norm": 1.0836252875591954, "learning_rate": 1.1807922970186285e-07, "loss": 0.3283, "step": 45068 }, { "epoch": 0.7834135827148047, "grad_norm": 6.644619045862214, "learning_rate": 1.1806106261744082e-07, "loss": 0.3834, "step": 45069 }, { "epoch": 0.7834309652523075, "grad_norm": 2.149688648585122, "learning_rate": 1.1804289674358991e-07, "loss": 0.2237, "step": 45070 }, { "epoch": 0.7834483477898103, "grad_norm": 1.4062220730012318, "learning_rate": 1.1802473208036745e-07, "loss": 0.2763, "step": 45071 }, { "epoch": 0.7834657303273131, "grad_norm": 1.0872659977152994, "learning_rate": 1.1800656862783093e-07, "loss": 0.2234, "step": 45072 }, { "epoch": 0.783483112864816, "grad_norm": 1.1903393884920874, "learning_rate": 1.179884063860383e-07, "loss": 0.2545, "step": 45073 }, { "epoch": 0.7835004954023188, "grad_norm": 1.5966122649820789, "learning_rate": 1.179702453550469e-07, "loss": 0.1924, "step": 45074 }, { "epoch": 0.7835178779398216, "grad_norm": 1.0731119256219364, "learning_rate": 1.1795208553491437e-07, "loss": 0.1419, "step": 45075 }, { "epoch": 0.7835352604773245, "grad_norm": 1.2514174086093484, "learning_rate": 1.1793392692569826e-07, "loss": 0.2417, "step": 45076 }, { "epoch": 0.7835526430148273, "grad_norm": 1.3805851539053802, "learning_rate": 1.1791576952745602e-07, "loss": 0.3197, "step": 45077 }, { "epoch": 0.7835700255523301, "grad_norm": 1.9815516300438825, "learning_rate": 1.1789761334024534e-07, "loss": 0.2281, "step": 45078 }, { "epoch": 0.783587408089833, "grad_norm": 1.3522133496096063, "learning_rate": 1.1787945836412366e-07, "loss": 0.2174, "step": 45079 }, { "epoch": 0.7836047906273358, "grad_norm": 1.3134391158655265, "learning_rate": 1.1786130459914845e-07, "loss": 0.1416, "step": 45080 }, { "epoch": 0.7836221731648386, "grad_norm": 1.109148053668964, "learning_rate": 1.1784315204537747e-07, "loss": 0.1872, "step": 45081 }, { "epoch": 0.7836395557023415, "grad_norm": 1.391050215430437, "learning_rate": 1.1782500070286816e-07, "loss": 0.1686, "step": 45082 }, { "epoch": 0.7836569382398443, "grad_norm": 1.3511772290776927, "learning_rate": 1.1780685057167805e-07, "loss": 0.2151, "step": 45083 }, { "epoch": 0.7836743207773471, "grad_norm": 3.1557575931158, "learning_rate": 1.1778870165186467e-07, "loss": 0.2307, "step": 45084 }, { "epoch": 0.7836917033148499, "grad_norm": 1.2951251437745512, "learning_rate": 1.1777055394348551e-07, "loss": 0.1585, "step": 45085 }, { "epoch": 0.7837090858523528, "grad_norm": 1.285705125042693, "learning_rate": 1.1775240744659815e-07, "loss": 0.2738, "step": 45086 }, { "epoch": 0.7837264683898556, "grad_norm": 1.1025362272660297, "learning_rate": 1.1773426216126004e-07, "loss": 0.3628, "step": 45087 }, { "epoch": 0.7837438509273583, "grad_norm": 0.9939290435024523, "learning_rate": 1.1771611808752857e-07, "loss": 0.1842, "step": 45088 }, { "epoch": 0.7837612334648612, "grad_norm": 1.6746805140330587, "learning_rate": 1.1769797522546171e-07, "loss": 0.4069, "step": 45089 }, { "epoch": 0.783778616002364, "grad_norm": 1.4126859288807834, "learning_rate": 1.1767983357511647e-07, "loss": 0.2414, "step": 45090 }, { "epoch": 0.7837959985398668, "grad_norm": 1.934943074508481, "learning_rate": 1.1766169313655044e-07, "loss": 0.3133, "step": 45091 }, { "epoch": 0.7838133810773696, "grad_norm": 0.9841333393840673, "learning_rate": 1.1764355390982134e-07, "loss": 0.1375, "step": 45092 }, { "epoch": 0.7838307636148725, "grad_norm": 1.4806458141213334, "learning_rate": 1.1762541589498648e-07, "loss": 0.227, "step": 45093 }, { "epoch": 0.7838481461523753, "grad_norm": 1.6000355145763334, "learning_rate": 1.176072790921035e-07, "loss": 0.2078, "step": 45094 }, { "epoch": 0.7838655286898781, "grad_norm": 2.17992498272561, "learning_rate": 1.1758914350122973e-07, "loss": 0.1908, "step": 45095 }, { "epoch": 0.783882911227381, "grad_norm": 1.300223624507524, "learning_rate": 1.1757100912242268e-07, "loss": 0.1659, "step": 45096 }, { "epoch": 0.7839002937648838, "grad_norm": 1.377029447806116, "learning_rate": 1.1755287595573993e-07, "loss": 0.2164, "step": 45097 }, { "epoch": 0.7839176763023866, "grad_norm": 1.5482117946510427, "learning_rate": 1.1753474400123881e-07, "loss": 0.164, "step": 45098 }, { "epoch": 0.7839350588398895, "grad_norm": 1.0178314427371784, "learning_rate": 1.1751661325897677e-07, "loss": 0.1064, "step": 45099 }, { "epoch": 0.7839524413773923, "grad_norm": 1.3580505859729766, "learning_rate": 1.174984837290115e-07, "loss": 0.2875, "step": 45100 }, { "epoch": 0.7839698239148951, "grad_norm": 2.013868200867094, "learning_rate": 1.1748035541140034e-07, "loss": 0.2651, "step": 45101 }, { "epoch": 0.7839872064523979, "grad_norm": 1.0743874082576839, "learning_rate": 1.1746222830620084e-07, "loss": 0.1536, "step": 45102 }, { "epoch": 0.7840045889899008, "grad_norm": 1.8790760233944614, "learning_rate": 1.1744410241347008e-07, "loss": 0.2771, "step": 45103 }, { "epoch": 0.7840219715274036, "grad_norm": 1.4497072955916974, "learning_rate": 1.1742597773326596e-07, "loss": 0.2305, "step": 45104 }, { "epoch": 0.7840393540649064, "grad_norm": 2.0510050878836474, "learning_rate": 1.174078542656457e-07, "loss": 0.1584, "step": 45105 }, { "epoch": 0.7840567366024093, "grad_norm": 0.795528910543245, "learning_rate": 1.1738973201066682e-07, "loss": 0.269, "step": 45106 }, { "epoch": 0.7840741191399121, "grad_norm": 1.2757418191800634, "learning_rate": 1.173716109683866e-07, "loss": 0.2561, "step": 45107 }, { "epoch": 0.7840915016774148, "grad_norm": 1.6135633045558493, "learning_rate": 1.173534911388629e-07, "loss": 0.2503, "step": 45108 }, { "epoch": 0.7841088842149176, "grad_norm": 0.8524217750894991, "learning_rate": 1.173353725221527e-07, "loss": 0.1329, "step": 45109 }, { "epoch": 0.7841262667524205, "grad_norm": 2.054663294679304, "learning_rate": 1.1731725511831347e-07, "loss": 0.2383, "step": 45110 }, { "epoch": 0.7841436492899233, "grad_norm": 1.410811396771186, "learning_rate": 1.1729913892740284e-07, "loss": 0.1519, "step": 45111 }, { "epoch": 0.7841610318274261, "grad_norm": 1.6865003591484304, "learning_rate": 1.172810239494782e-07, "loss": 0.2455, "step": 45112 }, { "epoch": 0.784178414364929, "grad_norm": 1.2205779072601133, "learning_rate": 1.1726291018459688e-07, "loss": 0.152, "step": 45113 }, { "epoch": 0.7841957969024318, "grad_norm": 1.2672221022393, "learning_rate": 1.1724479763281631e-07, "loss": 0.204, "step": 45114 }, { "epoch": 0.7842131794399346, "grad_norm": 2.573578376081064, "learning_rate": 1.1722668629419391e-07, "loss": 0.2309, "step": 45115 }, { "epoch": 0.7842305619774375, "grad_norm": 1.1262029355100205, "learning_rate": 1.172085761687871e-07, "loss": 0.1713, "step": 45116 }, { "epoch": 0.7842479445149403, "grad_norm": 1.3076589890893133, "learning_rate": 1.1719046725665327e-07, "loss": 0.4002, "step": 45117 }, { "epoch": 0.7842653270524431, "grad_norm": 1.231846677873445, "learning_rate": 1.1717235955784966e-07, "loss": 0.2642, "step": 45118 }, { "epoch": 0.784282709589946, "grad_norm": 1.326810192848411, "learning_rate": 1.1715425307243393e-07, "loss": 0.1603, "step": 45119 }, { "epoch": 0.7843000921274488, "grad_norm": 2.148648927002642, "learning_rate": 1.1713614780046338e-07, "loss": 0.3194, "step": 45120 }, { "epoch": 0.7843174746649516, "grad_norm": 1.2823062860636396, "learning_rate": 1.1711804374199546e-07, "loss": 0.2139, "step": 45121 }, { "epoch": 0.7843348572024544, "grad_norm": 2.2796021106858086, "learning_rate": 1.1709994089708725e-07, "loss": 0.4021, "step": 45122 }, { "epoch": 0.7843522397399573, "grad_norm": 1.0162464410095102, "learning_rate": 1.1708183926579645e-07, "loss": 0.1096, "step": 45123 }, { "epoch": 0.7843696222774601, "grad_norm": 1.3137549711008214, "learning_rate": 1.1706373884818033e-07, "loss": 0.3068, "step": 45124 }, { "epoch": 0.7843870048149629, "grad_norm": 2.4671781837610647, "learning_rate": 1.170456396442962e-07, "loss": 0.2705, "step": 45125 }, { "epoch": 0.7844043873524658, "grad_norm": 1.6167623790405234, "learning_rate": 1.1702754165420137e-07, "loss": 0.2036, "step": 45126 }, { "epoch": 0.7844217698899686, "grad_norm": 2.2948454304079333, "learning_rate": 1.1700944487795362e-07, "loss": 0.2187, "step": 45127 }, { "epoch": 0.7844391524274713, "grad_norm": 1.3655934338012279, "learning_rate": 1.1699134931560984e-07, "loss": 0.1837, "step": 45128 }, { "epoch": 0.7844565349649741, "grad_norm": 1.9191382951878309, "learning_rate": 1.1697325496722754e-07, "loss": 0.2685, "step": 45129 }, { "epoch": 0.784473917502477, "grad_norm": 7.569122387951552, "learning_rate": 1.1695516183286392e-07, "loss": 0.3556, "step": 45130 }, { "epoch": 0.7844913000399798, "grad_norm": 1.7040143442138675, "learning_rate": 1.1693706991257663e-07, "loss": 0.2969, "step": 45131 }, { "epoch": 0.7845086825774826, "grad_norm": 1.1669396690057643, "learning_rate": 1.1691897920642285e-07, "loss": 0.2348, "step": 45132 }, { "epoch": 0.7845260651149855, "grad_norm": 0.6893224382017958, "learning_rate": 1.1690088971445993e-07, "loss": 0.2115, "step": 45133 }, { "epoch": 0.7845434476524883, "grad_norm": 1.487772755946558, "learning_rate": 1.1688280143674528e-07, "loss": 0.2721, "step": 45134 }, { "epoch": 0.7845608301899911, "grad_norm": 1.1138371087660985, "learning_rate": 1.1686471437333606e-07, "loss": 0.2586, "step": 45135 }, { "epoch": 0.784578212727494, "grad_norm": 1.012063797315836, "learning_rate": 1.1684662852428978e-07, "loss": 0.2896, "step": 45136 }, { "epoch": 0.7845955952649968, "grad_norm": 1.5459850360040948, "learning_rate": 1.1682854388966362e-07, "loss": 0.1807, "step": 45137 }, { "epoch": 0.7846129778024996, "grad_norm": 1.1130696333755405, "learning_rate": 1.1681046046951488e-07, "loss": 0.2307, "step": 45138 }, { "epoch": 0.7846303603400024, "grad_norm": 1.4435141099618745, "learning_rate": 1.1679237826390104e-07, "loss": 0.2419, "step": 45139 }, { "epoch": 0.7846477428775053, "grad_norm": 1.6259380430350998, "learning_rate": 1.1677429727287951e-07, "loss": 0.1875, "step": 45140 }, { "epoch": 0.7846651254150081, "grad_norm": 0.7418844804630041, "learning_rate": 1.167562174965071e-07, "loss": 0.1211, "step": 45141 }, { "epoch": 0.7846825079525109, "grad_norm": 2.1669566216425307, "learning_rate": 1.1673813893484158e-07, "loss": 0.2228, "step": 45142 }, { "epoch": 0.7846998904900138, "grad_norm": 1.3254708236745911, "learning_rate": 1.1672006158794013e-07, "loss": 0.2965, "step": 45143 }, { "epoch": 0.7847172730275166, "grad_norm": 1.4866356898561364, "learning_rate": 1.1670198545586002e-07, "loss": 0.1577, "step": 45144 }, { "epoch": 0.7847346555650194, "grad_norm": 1.5222053908739457, "learning_rate": 1.166839105386585e-07, "loss": 0.1632, "step": 45145 }, { "epoch": 0.7847520381025223, "grad_norm": 1.2724484476579414, "learning_rate": 1.1666583683639298e-07, "loss": 0.2017, "step": 45146 }, { "epoch": 0.7847694206400251, "grad_norm": 0.8934371143020916, "learning_rate": 1.1664776434912061e-07, "loss": 0.1402, "step": 45147 }, { "epoch": 0.7847868031775278, "grad_norm": 1.4437348390694167, "learning_rate": 1.1662969307689874e-07, "loss": 0.1778, "step": 45148 }, { "epoch": 0.7848041857150306, "grad_norm": 0.9182182620803429, "learning_rate": 1.166116230197845e-07, "loss": 0.2437, "step": 45149 }, { "epoch": 0.7848215682525335, "grad_norm": 1.4157122443313839, "learning_rate": 1.1659355417783546e-07, "loss": 0.2406, "step": 45150 }, { "epoch": 0.7848389507900363, "grad_norm": 1.8530340809515482, "learning_rate": 1.1657548655110866e-07, "loss": 0.1812, "step": 45151 }, { "epoch": 0.7848563333275391, "grad_norm": 1.0446833858913926, "learning_rate": 1.1655742013966163e-07, "loss": 0.2942, "step": 45152 }, { "epoch": 0.784873715865042, "grad_norm": 1.3545983339989276, "learning_rate": 1.1653935494355116e-07, "loss": 0.146, "step": 45153 }, { "epoch": 0.7848910984025448, "grad_norm": 3.3150990559027926, "learning_rate": 1.1652129096283492e-07, "loss": 0.1537, "step": 45154 }, { "epoch": 0.7849084809400476, "grad_norm": 1.6646768897008752, "learning_rate": 1.1650322819757003e-07, "loss": 0.3131, "step": 45155 }, { "epoch": 0.7849258634775504, "grad_norm": 1.6298286884163498, "learning_rate": 1.1648516664781377e-07, "loss": 0.2863, "step": 45156 }, { "epoch": 0.7849432460150533, "grad_norm": 1.8300488032094844, "learning_rate": 1.1646710631362322e-07, "loss": 0.1808, "step": 45157 }, { "epoch": 0.7849606285525561, "grad_norm": 1.5303591676013784, "learning_rate": 1.1644904719505588e-07, "loss": 0.3062, "step": 45158 }, { "epoch": 0.7849780110900589, "grad_norm": 1.1620450974554881, "learning_rate": 1.1643098929216899e-07, "loss": 0.1657, "step": 45159 }, { "epoch": 0.7849953936275618, "grad_norm": 1.9161986292935793, "learning_rate": 1.1641293260501956e-07, "loss": 0.2596, "step": 45160 }, { "epoch": 0.7850127761650646, "grad_norm": 2.2315478536059734, "learning_rate": 1.1639487713366475e-07, "loss": 0.1779, "step": 45161 }, { "epoch": 0.7850301587025674, "grad_norm": 1.2447968510290153, "learning_rate": 1.1637682287816214e-07, "loss": 0.1632, "step": 45162 }, { "epoch": 0.7850475412400703, "grad_norm": 2.3401530008943787, "learning_rate": 1.163587698385688e-07, "loss": 0.1689, "step": 45163 }, { "epoch": 0.7850649237775731, "grad_norm": 1.7920385561253596, "learning_rate": 1.1634071801494189e-07, "loss": 0.1856, "step": 45164 }, { "epoch": 0.7850823063150759, "grad_norm": 1.0892267819740837, "learning_rate": 1.1632266740733865e-07, "loss": 0.2099, "step": 45165 }, { "epoch": 0.7850996888525787, "grad_norm": 1.7519964995523667, "learning_rate": 1.1630461801581637e-07, "loss": 0.2198, "step": 45166 }, { "epoch": 0.7851170713900816, "grad_norm": 1.185609668884748, "learning_rate": 1.1628656984043211e-07, "loss": 0.1807, "step": 45167 }, { "epoch": 0.7851344539275843, "grad_norm": 2.4507475757274304, "learning_rate": 1.1626852288124323e-07, "loss": 0.2448, "step": 45168 }, { "epoch": 0.7851518364650871, "grad_norm": 0.8321434432928609, "learning_rate": 1.1625047713830671e-07, "loss": 0.1303, "step": 45169 }, { "epoch": 0.78516921900259, "grad_norm": 1.452142912014614, "learning_rate": 1.1623243261168004e-07, "loss": 0.2502, "step": 45170 }, { "epoch": 0.7851866015400928, "grad_norm": 1.3212415343707244, "learning_rate": 1.1621438930142036e-07, "loss": 0.2052, "step": 45171 }, { "epoch": 0.7852039840775956, "grad_norm": 1.408864986367406, "learning_rate": 1.1619634720758453e-07, "loss": 0.1758, "step": 45172 }, { "epoch": 0.7852213666150984, "grad_norm": 3.1411222895332016, "learning_rate": 1.1617830633023012e-07, "loss": 0.6029, "step": 45173 }, { "epoch": 0.7852387491526013, "grad_norm": 2.2158433723738216, "learning_rate": 1.1616026666941414e-07, "loss": 0.3179, "step": 45174 }, { "epoch": 0.7852561316901041, "grad_norm": 1.7950653611316822, "learning_rate": 1.1614222822519382e-07, "loss": 0.1858, "step": 45175 }, { "epoch": 0.7852735142276069, "grad_norm": 0.9626426590168208, "learning_rate": 1.1612419099762633e-07, "loss": 0.1741, "step": 45176 }, { "epoch": 0.7852908967651098, "grad_norm": 1.2644061397500315, "learning_rate": 1.1610615498676874e-07, "loss": 0.1724, "step": 45177 }, { "epoch": 0.7853082793026126, "grad_norm": 0.9036630862395716, "learning_rate": 1.1608812019267833e-07, "loss": 0.2252, "step": 45178 }, { "epoch": 0.7853256618401154, "grad_norm": 2.6234022758625946, "learning_rate": 1.1607008661541224e-07, "loss": 0.1782, "step": 45179 }, { "epoch": 0.7853430443776183, "grad_norm": 1.0903563852470353, "learning_rate": 1.1605205425502746e-07, "loss": 0.2097, "step": 45180 }, { "epoch": 0.7853604269151211, "grad_norm": 1.7953502632300586, "learning_rate": 1.1603402311158145e-07, "loss": 0.4984, "step": 45181 }, { "epoch": 0.7853778094526239, "grad_norm": 1.1327235472096318, "learning_rate": 1.1601599318513117e-07, "loss": 0.2455, "step": 45182 }, { "epoch": 0.7853951919901268, "grad_norm": 1.6849202976976152, "learning_rate": 1.1599796447573385e-07, "loss": 0.3802, "step": 45183 }, { "epoch": 0.7854125745276296, "grad_norm": 1.3124055694703076, "learning_rate": 1.1597993698344655e-07, "loss": 0.2579, "step": 45184 }, { "epoch": 0.7854299570651324, "grad_norm": 1.4157985546139356, "learning_rate": 1.159619107083265e-07, "loss": 0.1169, "step": 45185 }, { "epoch": 0.7854473396026352, "grad_norm": 1.7647119909464704, "learning_rate": 1.1594388565043073e-07, "loss": 0.1884, "step": 45186 }, { "epoch": 0.7854647221401381, "grad_norm": 1.11600630191762, "learning_rate": 1.1592586180981645e-07, "loss": 0.2184, "step": 45187 }, { "epoch": 0.7854821046776408, "grad_norm": 1.486799118739623, "learning_rate": 1.1590783918654056e-07, "loss": 0.1787, "step": 45188 }, { "epoch": 0.7854994872151436, "grad_norm": 2.839883859228933, "learning_rate": 1.158898177806606e-07, "loss": 0.2838, "step": 45189 }, { "epoch": 0.7855168697526465, "grad_norm": 1.4064694610981243, "learning_rate": 1.1587179759223354e-07, "loss": 0.1876, "step": 45190 }, { "epoch": 0.7855342522901493, "grad_norm": 1.6048918382001305, "learning_rate": 1.1585377862131635e-07, "loss": 0.1955, "step": 45191 }, { "epoch": 0.7855516348276521, "grad_norm": 1.0090279400145252, "learning_rate": 1.1583576086796604e-07, "loss": 0.2006, "step": 45192 }, { "epoch": 0.7855690173651549, "grad_norm": 1.9793105383614433, "learning_rate": 1.1581774433224007e-07, "loss": 0.2699, "step": 45193 }, { "epoch": 0.7855863999026578, "grad_norm": 1.8449248313540514, "learning_rate": 1.1579972901419532e-07, "loss": 0.1737, "step": 45194 }, { "epoch": 0.7856037824401606, "grad_norm": 1.9651914517484956, "learning_rate": 1.15781714913889e-07, "loss": 0.2574, "step": 45195 }, { "epoch": 0.7856211649776634, "grad_norm": 2.4151393836259536, "learning_rate": 1.1576370203137809e-07, "loss": 0.247, "step": 45196 }, { "epoch": 0.7856385475151663, "grad_norm": 1.3993960982175806, "learning_rate": 1.1574569036671977e-07, "loss": 0.1418, "step": 45197 }, { "epoch": 0.7856559300526691, "grad_norm": 0.9968811107379413, "learning_rate": 1.1572767991997112e-07, "loss": 0.247, "step": 45198 }, { "epoch": 0.7856733125901719, "grad_norm": 1.3202730762900798, "learning_rate": 1.1570967069118903e-07, "loss": 0.2644, "step": 45199 }, { "epoch": 0.7856906951276748, "grad_norm": 1.9613504875744714, "learning_rate": 1.1569166268043096e-07, "loss": 0.2162, "step": 45200 }, { "epoch": 0.7857080776651776, "grad_norm": 1.3242736900799674, "learning_rate": 1.1567365588775369e-07, "loss": 0.2223, "step": 45201 }, { "epoch": 0.7857254602026804, "grad_norm": 1.3538827742763244, "learning_rate": 1.1565565031321444e-07, "loss": 0.2351, "step": 45202 }, { "epoch": 0.7857428427401832, "grad_norm": 1.210009936546054, "learning_rate": 1.156376459568702e-07, "loss": 0.1925, "step": 45203 }, { "epoch": 0.7857602252776861, "grad_norm": 1.077685801352255, "learning_rate": 1.1561964281877812e-07, "loss": 0.1539, "step": 45204 }, { "epoch": 0.7857776078151889, "grad_norm": 2.375426576174044, "learning_rate": 1.1560164089899516e-07, "loss": 0.2612, "step": 45205 }, { "epoch": 0.7857949903526917, "grad_norm": 1.1778502627881269, "learning_rate": 1.1558364019757838e-07, "loss": 0.2288, "step": 45206 }, { "epoch": 0.7858123728901946, "grad_norm": 1.458860115822222, "learning_rate": 1.155656407145848e-07, "loss": 0.1655, "step": 45207 }, { "epoch": 0.7858297554276973, "grad_norm": 1.5134098347630702, "learning_rate": 1.1554764245007165e-07, "loss": 0.1647, "step": 45208 }, { "epoch": 0.7858471379652001, "grad_norm": 1.171095083156891, "learning_rate": 1.1552964540409605e-07, "loss": 0.1362, "step": 45209 }, { "epoch": 0.785864520502703, "grad_norm": 1.4107015282611952, "learning_rate": 1.1551164957671467e-07, "loss": 0.3312, "step": 45210 }, { "epoch": 0.7858819030402058, "grad_norm": 1.372691423222253, "learning_rate": 1.1549365496798464e-07, "loss": 0.2224, "step": 45211 }, { "epoch": 0.7858992855777086, "grad_norm": 1.29643246048078, "learning_rate": 1.1547566157796317e-07, "loss": 0.1555, "step": 45212 }, { "epoch": 0.7859166681152114, "grad_norm": 2.249112296731947, "learning_rate": 1.1545766940670725e-07, "loss": 0.2081, "step": 45213 }, { "epoch": 0.7859340506527143, "grad_norm": 1.4378395110914648, "learning_rate": 1.1543967845427394e-07, "loss": 0.2666, "step": 45214 }, { "epoch": 0.7859514331902171, "grad_norm": 5.9381072997876965, "learning_rate": 1.1542168872072012e-07, "loss": 0.216, "step": 45215 }, { "epoch": 0.7859688157277199, "grad_norm": 1.5891404237359505, "learning_rate": 1.154037002061029e-07, "loss": 0.1943, "step": 45216 }, { "epoch": 0.7859861982652228, "grad_norm": 1.4432195143346667, "learning_rate": 1.1538571291047933e-07, "loss": 0.255, "step": 45217 }, { "epoch": 0.7860035808027256, "grad_norm": 1.2281957870902678, "learning_rate": 1.1536772683390634e-07, "loss": 0.177, "step": 45218 }, { "epoch": 0.7860209633402284, "grad_norm": 2.0419909346863063, "learning_rate": 1.1534974197644081e-07, "loss": 0.2785, "step": 45219 }, { "epoch": 0.7860383458777312, "grad_norm": 1.2117538948880873, "learning_rate": 1.1533175833814008e-07, "loss": 0.2096, "step": 45220 }, { "epoch": 0.7860557284152341, "grad_norm": 2.732448371121973, "learning_rate": 1.1531377591906094e-07, "loss": 0.257, "step": 45221 }, { "epoch": 0.7860731109527369, "grad_norm": 1.065858125847797, "learning_rate": 1.1529579471926043e-07, "loss": 0.1395, "step": 45222 }, { "epoch": 0.7860904934902397, "grad_norm": 1.2590410425452212, "learning_rate": 1.152778147387955e-07, "loss": 0.301, "step": 45223 }, { "epoch": 0.7861078760277426, "grad_norm": 1.1307723314054356, "learning_rate": 1.1525983597772321e-07, "loss": 0.1906, "step": 45224 }, { "epoch": 0.7861252585652454, "grad_norm": 2.945663460820138, "learning_rate": 1.1524185843610052e-07, "loss": 0.1597, "step": 45225 }, { "epoch": 0.7861426411027482, "grad_norm": 1.3690865308469469, "learning_rate": 1.1522388211398437e-07, "loss": 0.1977, "step": 45226 }, { "epoch": 0.786160023640251, "grad_norm": 1.8227783943690365, "learning_rate": 1.1520590701143162e-07, "loss": 0.2695, "step": 45227 }, { "epoch": 0.7861774061777538, "grad_norm": 1.639580354704659, "learning_rate": 1.1518793312849967e-07, "loss": 0.2412, "step": 45228 }, { "epoch": 0.7861947887152566, "grad_norm": 1.3434130894020382, "learning_rate": 1.1516996046524508e-07, "loss": 0.2428, "step": 45229 }, { "epoch": 0.7862121712527594, "grad_norm": 1.4193688551307557, "learning_rate": 1.1515198902172479e-07, "loss": 0.2885, "step": 45230 }, { "epoch": 0.7862295537902623, "grad_norm": 1.384766453082142, "learning_rate": 1.1513401879799606e-07, "loss": 0.1636, "step": 45231 }, { "epoch": 0.7862469363277651, "grad_norm": 1.7872866481213958, "learning_rate": 1.1511604979411565e-07, "loss": 0.4655, "step": 45232 }, { "epoch": 0.7862643188652679, "grad_norm": 1.4094286197136452, "learning_rate": 1.150980820101406e-07, "loss": 0.2343, "step": 45233 }, { "epoch": 0.7862817014027708, "grad_norm": 0.8826064771055316, "learning_rate": 1.1508011544612784e-07, "loss": 0.1116, "step": 45234 }, { "epoch": 0.7862990839402736, "grad_norm": 1.3287413674336104, "learning_rate": 1.1506215010213427e-07, "loss": 0.2688, "step": 45235 }, { "epoch": 0.7863164664777764, "grad_norm": 1.8452705498395963, "learning_rate": 1.1504418597821686e-07, "loss": 0.142, "step": 45236 }, { "epoch": 0.7863338490152793, "grad_norm": 0.8304725994202431, "learning_rate": 1.1502622307443255e-07, "loss": 0.3971, "step": 45237 }, { "epoch": 0.7863512315527821, "grad_norm": 1.6625481714872434, "learning_rate": 1.1500826139083808e-07, "loss": 0.1829, "step": 45238 }, { "epoch": 0.7863686140902849, "grad_norm": 1.6597282557030293, "learning_rate": 1.149903009274908e-07, "loss": 0.2146, "step": 45239 }, { "epoch": 0.7863859966277877, "grad_norm": 3.9374858656304954, "learning_rate": 1.1497234168444737e-07, "loss": 0.1972, "step": 45240 }, { "epoch": 0.7864033791652906, "grad_norm": 1.2834193033818606, "learning_rate": 1.1495438366176486e-07, "loss": 0.2981, "step": 45241 }, { "epoch": 0.7864207617027934, "grad_norm": 1.06266973107632, "learning_rate": 1.1493642685949983e-07, "loss": 0.1869, "step": 45242 }, { "epoch": 0.7864381442402962, "grad_norm": 1.2056902168527421, "learning_rate": 1.1491847127770954e-07, "loss": 0.2176, "step": 45243 }, { "epoch": 0.7864555267777991, "grad_norm": 1.5516477095280687, "learning_rate": 1.1490051691645087e-07, "loss": 0.1342, "step": 45244 }, { "epoch": 0.7864729093153019, "grad_norm": 1.0154314414273449, "learning_rate": 1.1488256377578059e-07, "loss": 0.2067, "step": 45245 }, { "epoch": 0.7864902918528047, "grad_norm": 1.147508897587365, "learning_rate": 1.1486461185575558e-07, "loss": 0.1826, "step": 45246 }, { "epoch": 0.7865076743903074, "grad_norm": 0.9128422661434428, "learning_rate": 1.148466611564331e-07, "loss": 0.2401, "step": 45247 }, { "epoch": 0.7865250569278103, "grad_norm": 1.893107654572888, "learning_rate": 1.1482871167786967e-07, "loss": 0.1871, "step": 45248 }, { "epoch": 0.7865424394653131, "grad_norm": 1.0785053608770527, "learning_rate": 1.1481076342012225e-07, "loss": 0.2789, "step": 45249 }, { "epoch": 0.7865598220028159, "grad_norm": 1.9518904137701765, "learning_rate": 1.1479281638324768e-07, "loss": 0.1627, "step": 45250 }, { "epoch": 0.7865772045403188, "grad_norm": 1.03855365022525, "learning_rate": 1.14774870567303e-07, "loss": 0.2093, "step": 45251 }, { "epoch": 0.7865945870778216, "grad_norm": 2.398265258614914, "learning_rate": 1.147569259723451e-07, "loss": 0.1954, "step": 45252 }, { "epoch": 0.7866119696153244, "grad_norm": 1.409298222438722, "learning_rate": 1.1473898259843073e-07, "loss": 0.2635, "step": 45253 }, { "epoch": 0.7866293521528273, "grad_norm": 1.9733690531881223, "learning_rate": 1.1472104044561682e-07, "loss": 0.3242, "step": 45254 }, { "epoch": 0.7866467346903301, "grad_norm": 1.3714067186094236, "learning_rate": 1.1470309951396024e-07, "loss": 0.1554, "step": 45255 }, { "epoch": 0.7866641172278329, "grad_norm": 2.6249869333949727, "learning_rate": 1.1468515980351784e-07, "loss": 0.3201, "step": 45256 }, { "epoch": 0.7866814997653357, "grad_norm": 1.9277692685993477, "learning_rate": 1.1466722131434647e-07, "loss": 0.2614, "step": 45257 }, { "epoch": 0.7866988823028386, "grad_norm": 1.5691026461704956, "learning_rate": 1.1464928404650287e-07, "loss": 0.2226, "step": 45258 }, { "epoch": 0.7867162648403414, "grad_norm": 4.330676835341532, "learning_rate": 1.1463134800004415e-07, "loss": 0.3205, "step": 45259 }, { "epoch": 0.7867336473778442, "grad_norm": 1.2861243109659175, "learning_rate": 1.1461341317502715e-07, "loss": 0.4764, "step": 45260 }, { "epoch": 0.7867510299153471, "grad_norm": 2.004952136947325, "learning_rate": 1.145954795715084e-07, "loss": 0.3318, "step": 45261 }, { "epoch": 0.7867684124528499, "grad_norm": 1.3324093362094176, "learning_rate": 1.1457754718954499e-07, "loss": 0.2242, "step": 45262 }, { "epoch": 0.7867857949903527, "grad_norm": 2.633351052974845, "learning_rate": 1.1455961602919378e-07, "loss": 0.2483, "step": 45263 }, { "epoch": 0.7868031775278556, "grad_norm": 1.9925195275430938, "learning_rate": 1.1454168609051151e-07, "loss": 0.4099, "step": 45264 }, { "epoch": 0.7868205600653584, "grad_norm": 2.11800267347601, "learning_rate": 1.1452375737355502e-07, "loss": 0.2777, "step": 45265 }, { "epoch": 0.7868379426028612, "grad_norm": 1.8739616467610811, "learning_rate": 1.1450582987838114e-07, "loss": 0.2091, "step": 45266 }, { "epoch": 0.7868553251403639, "grad_norm": 1.2639367416803973, "learning_rate": 1.1448790360504668e-07, "loss": 0.1317, "step": 45267 }, { "epoch": 0.7868727076778668, "grad_norm": 1.3553306436647292, "learning_rate": 1.1446997855360851e-07, "loss": 0.2123, "step": 45268 }, { "epoch": 0.7868900902153696, "grad_norm": 1.4123479965203636, "learning_rate": 1.1445205472412322e-07, "loss": 0.1728, "step": 45269 }, { "epoch": 0.7869074727528724, "grad_norm": 0.6462291896039489, "learning_rate": 1.1443413211664804e-07, "loss": 0.2443, "step": 45270 }, { "epoch": 0.7869248552903753, "grad_norm": 1.2596348935391022, "learning_rate": 1.1441621073123947e-07, "loss": 0.5003, "step": 45271 }, { "epoch": 0.7869422378278781, "grad_norm": 1.3628992007437721, "learning_rate": 1.1439829056795458e-07, "loss": 0.1958, "step": 45272 }, { "epoch": 0.7869596203653809, "grad_norm": 1.4322886815743932, "learning_rate": 1.1438037162684966e-07, "loss": 0.3437, "step": 45273 }, { "epoch": 0.7869770029028837, "grad_norm": 1.5890464816470167, "learning_rate": 1.1436245390798199e-07, "loss": 0.2278, "step": 45274 }, { "epoch": 0.7869943854403866, "grad_norm": 1.8624038110528316, "learning_rate": 1.1434453741140815e-07, "loss": 0.2378, "step": 45275 }, { "epoch": 0.7870117679778894, "grad_norm": 1.4681943411051446, "learning_rate": 1.1432662213718503e-07, "loss": 0.1995, "step": 45276 }, { "epoch": 0.7870291505153922, "grad_norm": 1.4900821803683082, "learning_rate": 1.1430870808536919e-07, "loss": 0.1515, "step": 45277 }, { "epoch": 0.7870465330528951, "grad_norm": 1.3171840889961086, "learning_rate": 1.142907952560177e-07, "loss": 0.2813, "step": 45278 }, { "epoch": 0.7870639155903979, "grad_norm": 0.9718518513596387, "learning_rate": 1.1427288364918735e-07, "loss": 0.1717, "step": 45279 }, { "epoch": 0.7870812981279007, "grad_norm": 1.0123293398416955, "learning_rate": 1.142549732649346e-07, "loss": 0.1342, "step": 45280 }, { "epoch": 0.7870986806654036, "grad_norm": 2.0500070544106763, "learning_rate": 1.1423706410331629e-07, "loss": 0.1254, "step": 45281 }, { "epoch": 0.7871160632029064, "grad_norm": 1.312972744032468, "learning_rate": 1.1421915616438942e-07, "loss": 0.1657, "step": 45282 }, { "epoch": 0.7871334457404092, "grad_norm": 1.4371437609486766, "learning_rate": 1.1420124944821063e-07, "loss": 0.2359, "step": 45283 }, { "epoch": 0.787150828277912, "grad_norm": 2.4181485189691747, "learning_rate": 1.1418334395483664e-07, "loss": 0.2361, "step": 45284 }, { "epoch": 0.7871682108154149, "grad_norm": 0.9044932412854501, "learning_rate": 1.1416543968432419e-07, "loss": 0.1394, "step": 45285 }, { "epoch": 0.7871855933529177, "grad_norm": 0.8647046345012936, "learning_rate": 1.1414753663673011e-07, "loss": 0.2094, "step": 45286 }, { "epoch": 0.7872029758904204, "grad_norm": 2.3456282370234276, "learning_rate": 1.1412963481211108e-07, "loss": 0.2478, "step": 45287 }, { "epoch": 0.7872203584279233, "grad_norm": 1.2362978778510774, "learning_rate": 1.1411173421052383e-07, "loss": 0.2543, "step": 45288 }, { "epoch": 0.7872377409654261, "grad_norm": 1.5585688695508095, "learning_rate": 1.14093834832025e-07, "loss": 0.124, "step": 45289 }, { "epoch": 0.7872551235029289, "grad_norm": 1.4254319422802502, "learning_rate": 1.1407593667667154e-07, "loss": 0.1664, "step": 45290 }, { "epoch": 0.7872725060404318, "grad_norm": 1.9453531890186353, "learning_rate": 1.1405803974452028e-07, "loss": 0.2401, "step": 45291 }, { "epoch": 0.7872898885779346, "grad_norm": 1.1776811266911564, "learning_rate": 1.1404014403562745e-07, "loss": 0.2492, "step": 45292 }, { "epoch": 0.7873072711154374, "grad_norm": 1.1147104203520029, "learning_rate": 1.1402224955005019e-07, "loss": 0.1156, "step": 45293 }, { "epoch": 0.7873246536529402, "grad_norm": 1.4611143681703824, "learning_rate": 1.140043562878451e-07, "loss": 0.1286, "step": 45294 }, { "epoch": 0.7873420361904431, "grad_norm": 1.5194798031138483, "learning_rate": 1.1398646424906887e-07, "loss": 0.1127, "step": 45295 }, { "epoch": 0.7873594187279459, "grad_norm": 2.19829567957443, "learning_rate": 1.1396857343377814e-07, "loss": 0.1782, "step": 45296 }, { "epoch": 0.7873768012654487, "grad_norm": 2.823201549720876, "learning_rate": 1.1395068384202993e-07, "loss": 0.1517, "step": 45297 }, { "epoch": 0.7873941838029516, "grad_norm": 1.943607525273451, "learning_rate": 1.1393279547388063e-07, "loss": 0.1976, "step": 45298 }, { "epoch": 0.7874115663404544, "grad_norm": 2.783620804640528, "learning_rate": 1.13914908329387e-07, "loss": 0.3289, "step": 45299 }, { "epoch": 0.7874289488779572, "grad_norm": 0.7887066386116482, "learning_rate": 1.138970224086056e-07, "loss": 0.2321, "step": 45300 }, { "epoch": 0.78744633141546, "grad_norm": 1.2480847634771166, "learning_rate": 1.1387913771159342e-07, "loss": 0.208, "step": 45301 }, { "epoch": 0.7874637139529629, "grad_norm": 1.2664850941988592, "learning_rate": 1.1386125423840703e-07, "loss": 0.1902, "step": 45302 }, { "epoch": 0.7874810964904657, "grad_norm": 2.0902590163784174, "learning_rate": 1.1384337198910305e-07, "loss": 0.1819, "step": 45303 }, { "epoch": 0.7874984790279685, "grad_norm": 1.3306254683938312, "learning_rate": 1.1382549096373822e-07, "loss": 0.182, "step": 45304 }, { "epoch": 0.7875158615654714, "grad_norm": 1.9883245225452302, "learning_rate": 1.138076111623692e-07, "loss": 0.2259, "step": 45305 }, { "epoch": 0.7875332441029742, "grad_norm": 1.2812594582456749, "learning_rate": 1.1378973258505264e-07, "loss": 0.1758, "step": 45306 }, { "epoch": 0.7875506266404769, "grad_norm": 1.515809230250281, "learning_rate": 1.1377185523184518e-07, "loss": 0.1295, "step": 45307 }, { "epoch": 0.7875680091779798, "grad_norm": 3.0061194780732188, "learning_rate": 1.1375397910280343e-07, "loss": 0.2234, "step": 45308 }, { "epoch": 0.7875853917154826, "grad_norm": 1.4804822682103385, "learning_rate": 1.1373610419798424e-07, "loss": 0.2051, "step": 45309 }, { "epoch": 0.7876027742529854, "grad_norm": 1.3067482758541356, "learning_rate": 1.1371823051744434e-07, "loss": 0.1897, "step": 45310 }, { "epoch": 0.7876201567904882, "grad_norm": 1.772149418922651, "learning_rate": 1.137003580612399e-07, "loss": 0.2189, "step": 45311 }, { "epoch": 0.7876375393279911, "grad_norm": 1.320213075523303, "learning_rate": 1.1368248682942804e-07, "loss": 0.205, "step": 45312 }, { "epoch": 0.7876549218654939, "grad_norm": 1.3908974285901206, "learning_rate": 1.1366461682206524e-07, "loss": 0.2808, "step": 45313 }, { "epoch": 0.7876723044029967, "grad_norm": 0.9732366505214535, "learning_rate": 1.136467480392081e-07, "loss": 0.2393, "step": 45314 }, { "epoch": 0.7876896869404996, "grad_norm": 0.891415943610393, "learning_rate": 1.136288804809133e-07, "loss": 0.1724, "step": 45315 }, { "epoch": 0.7877070694780024, "grad_norm": 1.8191313873855572, "learning_rate": 1.1361101414723751e-07, "loss": 0.2066, "step": 45316 }, { "epoch": 0.7877244520155052, "grad_norm": 1.8915621142069352, "learning_rate": 1.1359314903823725e-07, "loss": 0.2089, "step": 45317 }, { "epoch": 0.7877418345530081, "grad_norm": 1.4441410488321025, "learning_rate": 1.1357528515396925e-07, "loss": 0.2788, "step": 45318 }, { "epoch": 0.7877592170905109, "grad_norm": 2.155427892582057, "learning_rate": 1.135574224944899e-07, "loss": 0.2559, "step": 45319 }, { "epoch": 0.7877765996280137, "grad_norm": 1.1456984968264383, "learning_rate": 1.1353956105985624e-07, "loss": 0.1695, "step": 45320 }, { "epoch": 0.7877939821655165, "grad_norm": 1.4930841029084652, "learning_rate": 1.1352170085012458e-07, "loss": 0.1974, "step": 45321 }, { "epoch": 0.7878113647030194, "grad_norm": 1.0475734586727403, "learning_rate": 1.1350384186535156e-07, "loss": 0.2274, "step": 45322 }, { "epoch": 0.7878287472405222, "grad_norm": 1.4880681207102955, "learning_rate": 1.1348598410559385e-07, "loss": 0.2085, "step": 45323 }, { "epoch": 0.787846129778025, "grad_norm": 1.5751359744870916, "learning_rate": 1.1346812757090807e-07, "loss": 0.2318, "step": 45324 }, { "epoch": 0.7878635123155279, "grad_norm": 1.3731204082892434, "learning_rate": 1.1345027226135067e-07, "loss": 0.2045, "step": 45325 }, { "epoch": 0.7878808948530307, "grad_norm": 1.4884882424010475, "learning_rate": 1.134324181769784e-07, "loss": 0.2844, "step": 45326 }, { "epoch": 0.7878982773905334, "grad_norm": 2.537880088034613, "learning_rate": 1.1341456531784765e-07, "loss": 0.2517, "step": 45327 }, { "epoch": 0.7879156599280362, "grad_norm": 1.4652333220985128, "learning_rate": 1.1339671368401533e-07, "loss": 0.1569, "step": 45328 }, { "epoch": 0.7879330424655391, "grad_norm": 7.7636027758895265, "learning_rate": 1.1337886327553792e-07, "loss": 0.4242, "step": 45329 }, { "epoch": 0.7879504250030419, "grad_norm": 1.354544288071769, "learning_rate": 1.1336101409247173e-07, "loss": 0.1922, "step": 45330 }, { "epoch": 0.7879678075405447, "grad_norm": 0.888321813645869, "learning_rate": 1.1334316613487343e-07, "loss": 0.2035, "step": 45331 }, { "epoch": 0.7879851900780476, "grad_norm": 1.447258499754905, "learning_rate": 1.1332531940279982e-07, "loss": 0.1488, "step": 45332 }, { "epoch": 0.7880025726155504, "grad_norm": 1.152373735321956, "learning_rate": 1.1330747389630735e-07, "loss": 0.1483, "step": 45333 }, { "epoch": 0.7880199551530532, "grad_norm": 1.251533348137893, "learning_rate": 1.1328962961545252e-07, "loss": 0.2711, "step": 45334 }, { "epoch": 0.7880373376905561, "grad_norm": 3.5631222575457597, "learning_rate": 1.132717865602919e-07, "loss": 0.2886, "step": 45335 }, { "epoch": 0.7880547202280589, "grad_norm": 1.4688768890966128, "learning_rate": 1.1325394473088212e-07, "loss": 0.2573, "step": 45336 }, { "epoch": 0.7880721027655617, "grad_norm": 1.17063962693244, "learning_rate": 1.1323610412727969e-07, "loss": 0.1538, "step": 45337 }, { "epoch": 0.7880894853030646, "grad_norm": 1.8298774534337627, "learning_rate": 1.132182647495411e-07, "loss": 0.2048, "step": 45338 }, { "epoch": 0.7881068678405674, "grad_norm": 1.6882430378016424, "learning_rate": 1.1320042659772278e-07, "loss": 0.3031, "step": 45339 }, { "epoch": 0.7881242503780702, "grad_norm": 1.5188107866148686, "learning_rate": 1.131825896718816e-07, "loss": 0.2279, "step": 45340 }, { "epoch": 0.788141632915573, "grad_norm": 1.7597512571705654, "learning_rate": 1.1316475397207392e-07, "loss": 0.1678, "step": 45341 }, { "epoch": 0.7881590154530759, "grad_norm": 1.043348542888039, "learning_rate": 1.1314691949835625e-07, "loss": 0.152, "step": 45342 }, { "epoch": 0.7881763979905787, "grad_norm": 0.7952923158554874, "learning_rate": 1.1312908625078521e-07, "loss": 0.2093, "step": 45343 }, { "epoch": 0.7881937805280815, "grad_norm": 1.525351115836522, "learning_rate": 1.131112542294172e-07, "loss": 0.1549, "step": 45344 }, { "epoch": 0.7882111630655844, "grad_norm": 1.1534676550902159, "learning_rate": 1.1309342343430878e-07, "loss": 0.2078, "step": 45345 }, { "epoch": 0.7882285456030872, "grad_norm": 1.95827307462861, "learning_rate": 1.1307559386551646e-07, "loss": 0.2371, "step": 45346 }, { "epoch": 0.7882459281405899, "grad_norm": 3.568706145142946, "learning_rate": 1.130577655230967e-07, "loss": 0.3283, "step": 45347 }, { "epoch": 0.7882633106780927, "grad_norm": 4.621787892646912, "learning_rate": 1.130399384071063e-07, "loss": 0.2844, "step": 45348 }, { "epoch": 0.7882806932155956, "grad_norm": 1.4315061827866336, "learning_rate": 1.1302211251760141e-07, "loss": 0.3178, "step": 45349 }, { "epoch": 0.7882980757530984, "grad_norm": 1.513382497970985, "learning_rate": 1.1300428785463855e-07, "loss": 0.1901, "step": 45350 }, { "epoch": 0.7883154582906012, "grad_norm": 1.4853676556254924, "learning_rate": 1.1298646441827442e-07, "loss": 0.3002, "step": 45351 }, { "epoch": 0.7883328408281041, "grad_norm": 1.5953935161853905, "learning_rate": 1.1296864220856545e-07, "loss": 0.1956, "step": 45352 }, { "epoch": 0.7883502233656069, "grad_norm": 1.4223553829848992, "learning_rate": 1.1295082122556809e-07, "loss": 0.2015, "step": 45353 }, { "epoch": 0.7883676059031097, "grad_norm": 4.0179912564372, "learning_rate": 1.1293300146933881e-07, "loss": 0.2652, "step": 45354 }, { "epoch": 0.7883849884406126, "grad_norm": 1.0001074848611708, "learning_rate": 1.1291518293993413e-07, "loss": 0.1307, "step": 45355 }, { "epoch": 0.7884023709781154, "grad_norm": 1.0006545811889782, "learning_rate": 1.1289736563741054e-07, "loss": 0.1442, "step": 45356 }, { "epoch": 0.7884197535156182, "grad_norm": 1.665900566698287, "learning_rate": 1.1287954956182444e-07, "loss": 0.268, "step": 45357 }, { "epoch": 0.788437136053121, "grad_norm": 1.066792398137421, "learning_rate": 1.1286173471323218e-07, "loss": 0.2941, "step": 45358 }, { "epoch": 0.7884545185906239, "grad_norm": 9.250548118733349, "learning_rate": 1.1284392109169055e-07, "loss": 0.4541, "step": 45359 }, { "epoch": 0.7884719011281267, "grad_norm": 1.5624181175444538, "learning_rate": 1.128261086972559e-07, "loss": 0.3224, "step": 45360 }, { "epoch": 0.7884892836656295, "grad_norm": 0.7061117849032392, "learning_rate": 1.1280829752998467e-07, "loss": 0.2177, "step": 45361 }, { "epoch": 0.7885066662031324, "grad_norm": 1.3791588463302549, "learning_rate": 1.1279048758993304e-07, "loss": 0.4209, "step": 45362 }, { "epoch": 0.7885240487406352, "grad_norm": 2.823685940499433, "learning_rate": 1.1277267887715786e-07, "loss": 0.2239, "step": 45363 }, { "epoch": 0.788541431278138, "grad_norm": 1.0866882386494938, "learning_rate": 1.1275487139171542e-07, "loss": 0.3611, "step": 45364 }, { "epoch": 0.7885588138156409, "grad_norm": 2.199976490142988, "learning_rate": 1.127370651336621e-07, "loss": 0.3273, "step": 45365 }, { "epoch": 0.7885761963531436, "grad_norm": 1.3272144653575717, "learning_rate": 1.1271926010305427e-07, "loss": 0.2051, "step": 45366 }, { "epoch": 0.7885935788906464, "grad_norm": 1.551631287924047, "learning_rate": 1.1270145629994876e-07, "loss": 0.1608, "step": 45367 }, { "epoch": 0.7886109614281492, "grad_norm": 1.578057472367604, "learning_rate": 1.1268365372440159e-07, "loss": 0.3083, "step": 45368 }, { "epoch": 0.7886283439656521, "grad_norm": 1.757083350425066, "learning_rate": 1.126658523764693e-07, "loss": 0.3238, "step": 45369 }, { "epoch": 0.7886457265031549, "grad_norm": 1.6867553532035535, "learning_rate": 1.1264805225620822e-07, "loss": 0.2032, "step": 45370 }, { "epoch": 0.7886631090406577, "grad_norm": 1.817151921339015, "learning_rate": 1.1263025336367499e-07, "loss": 0.2366, "step": 45371 }, { "epoch": 0.7886804915781606, "grad_norm": 1.514484438669186, "learning_rate": 1.1261245569892597e-07, "loss": 0.2481, "step": 45372 }, { "epoch": 0.7886978741156634, "grad_norm": 1.2325261817123612, "learning_rate": 1.1259465926201744e-07, "loss": 0.479, "step": 45373 }, { "epoch": 0.7887152566531662, "grad_norm": 1.4032603894671196, "learning_rate": 1.1257686405300593e-07, "loss": 0.2478, "step": 45374 }, { "epoch": 0.788732639190669, "grad_norm": 2.859114306996757, "learning_rate": 1.1255907007194776e-07, "loss": 0.2402, "step": 45375 }, { "epoch": 0.7887500217281719, "grad_norm": 1.8706796595435444, "learning_rate": 1.125412773188994e-07, "loss": 0.1589, "step": 45376 }, { "epoch": 0.7887674042656747, "grad_norm": 0.7486146786939235, "learning_rate": 1.1252348579391719e-07, "loss": 0.1447, "step": 45377 }, { "epoch": 0.7887847868031775, "grad_norm": 2.4478661022357833, "learning_rate": 1.125056954970574e-07, "loss": 0.2692, "step": 45378 }, { "epoch": 0.7888021693406804, "grad_norm": 1.25400880201464, "learning_rate": 1.124879064283767e-07, "loss": 0.2273, "step": 45379 }, { "epoch": 0.7888195518781832, "grad_norm": 1.7649777589662061, "learning_rate": 1.1247011858793142e-07, "loss": 0.3507, "step": 45380 }, { "epoch": 0.788836934415686, "grad_norm": 0.9547483100082239, "learning_rate": 1.1245233197577763e-07, "loss": 0.2304, "step": 45381 }, { "epoch": 0.7888543169531889, "grad_norm": 1.6086723317375364, "learning_rate": 1.1243454659197205e-07, "loss": 0.1047, "step": 45382 }, { "epoch": 0.7888716994906917, "grad_norm": 2.2466641016652686, "learning_rate": 1.1241676243657088e-07, "loss": 0.311, "step": 45383 }, { "epoch": 0.7888890820281945, "grad_norm": 1.6510207544249418, "learning_rate": 1.1239897950963062e-07, "loss": 0.1842, "step": 45384 }, { "epoch": 0.7889064645656974, "grad_norm": 2.041886893812867, "learning_rate": 1.1238119781120731e-07, "loss": 0.1475, "step": 45385 }, { "epoch": 0.7889238471032001, "grad_norm": 1.5583646316785933, "learning_rate": 1.123634173413579e-07, "loss": 0.1398, "step": 45386 }, { "epoch": 0.7889412296407029, "grad_norm": 1.2585182301548887, "learning_rate": 1.123456381001382e-07, "loss": 0.1567, "step": 45387 }, { "epoch": 0.7889586121782057, "grad_norm": 1.0437048725574312, "learning_rate": 1.1232786008760476e-07, "loss": 0.2142, "step": 45388 }, { "epoch": 0.7889759947157086, "grad_norm": 1.887303290162537, "learning_rate": 1.123100833038138e-07, "loss": 0.2544, "step": 45389 }, { "epoch": 0.7889933772532114, "grad_norm": 1.5055215380653129, "learning_rate": 1.1229230774882192e-07, "loss": 0.1373, "step": 45390 }, { "epoch": 0.7890107597907142, "grad_norm": 1.5640915070492385, "learning_rate": 1.1227453342268534e-07, "loss": 0.2126, "step": 45391 }, { "epoch": 0.789028142328217, "grad_norm": 2.0864459880673194, "learning_rate": 1.1225676032546039e-07, "loss": 0.1987, "step": 45392 }, { "epoch": 0.7890455248657199, "grad_norm": 1.0318916612641393, "learning_rate": 1.1223898845720337e-07, "loss": 0.1619, "step": 45393 }, { "epoch": 0.7890629074032227, "grad_norm": 2.969009561455029, "learning_rate": 1.1222121781797061e-07, "loss": 0.2739, "step": 45394 }, { "epoch": 0.7890802899407255, "grad_norm": 1.8152248159425934, "learning_rate": 1.1220344840781848e-07, "loss": 0.1705, "step": 45395 }, { "epoch": 0.7890976724782284, "grad_norm": 1.4244754212721555, "learning_rate": 1.121856802268033e-07, "loss": 0.2073, "step": 45396 }, { "epoch": 0.7891150550157312, "grad_norm": 0.9176779076977609, "learning_rate": 1.121679132749812e-07, "loss": 0.162, "step": 45397 }, { "epoch": 0.789132437553234, "grad_norm": 9.861302939757413, "learning_rate": 1.1215014755240881e-07, "loss": 0.6257, "step": 45398 }, { "epoch": 0.7891498200907369, "grad_norm": 1.2170266926187214, "learning_rate": 1.1213238305914241e-07, "loss": 0.1795, "step": 45399 }, { "epoch": 0.7891672026282397, "grad_norm": 1.050711026192802, "learning_rate": 1.1211461979523796e-07, "loss": 0.1768, "step": 45400 }, { "epoch": 0.7891845851657425, "grad_norm": 2.15061679415215, "learning_rate": 1.1209685776075207e-07, "loss": 0.3254, "step": 45401 }, { "epoch": 0.7892019677032454, "grad_norm": 0.9800250660080274, "learning_rate": 1.1207909695574097e-07, "loss": 0.1733, "step": 45402 }, { "epoch": 0.7892193502407482, "grad_norm": 1.33345254234155, "learning_rate": 1.1206133738026097e-07, "loss": 0.2032, "step": 45403 }, { "epoch": 0.789236732778251, "grad_norm": 1.4546230781221563, "learning_rate": 1.1204357903436828e-07, "loss": 0.2792, "step": 45404 }, { "epoch": 0.7892541153157538, "grad_norm": 1.6169414694838165, "learning_rate": 1.1202582191811921e-07, "loss": 0.1761, "step": 45405 }, { "epoch": 0.7892714978532566, "grad_norm": 1.2950389559330229, "learning_rate": 1.1200806603157009e-07, "loss": 0.2483, "step": 45406 }, { "epoch": 0.7892888803907594, "grad_norm": 1.6800549540741867, "learning_rate": 1.1199031137477721e-07, "loss": 0.1679, "step": 45407 }, { "epoch": 0.7893062629282622, "grad_norm": 1.7844163765927763, "learning_rate": 1.1197255794779664e-07, "loss": 0.1521, "step": 45408 }, { "epoch": 0.7893236454657651, "grad_norm": 1.7583338414754275, "learning_rate": 1.1195480575068494e-07, "loss": 0.198, "step": 45409 }, { "epoch": 0.7893410280032679, "grad_norm": 1.8414421229273648, "learning_rate": 1.1193705478349829e-07, "loss": 0.2777, "step": 45410 }, { "epoch": 0.7893584105407707, "grad_norm": 2.0523434953182726, "learning_rate": 1.1191930504629299e-07, "loss": 0.2576, "step": 45411 }, { "epoch": 0.7893757930782735, "grad_norm": 1.1753029170289682, "learning_rate": 1.1190155653912503e-07, "loss": 0.1487, "step": 45412 }, { "epoch": 0.7893931756157764, "grad_norm": 0.9937208893445362, "learning_rate": 1.1188380926205093e-07, "loss": 0.2349, "step": 45413 }, { "epoch": 0.7894105581532792, "grad_norm": 2.7961303804268565, "learning_rate": 1.1186606321512687e-07, "loss": 0.1944, "step": 45414 }, { "epoch": 0.789427940690782, "grad_norm": 2.640378167276446, "learning_rate": 1.1184831839840913e-07, "loss": 0.2147, "step": 45415 }, { "epoch": 0.7894453232282849, "grad_norm": 1.3628515143123434, "learning_rate": 1.1183057481195373e-07, "loss": 0.1851, "step": 45416 }, { "epoch": 0.7894627057657877, "grad_norm": 0.9501249643288003, "learning_rate": 1.1181283245581746e-07, "loss": 0.1725, "step": 45417 }, { "epoch": 0.7894800883032905, "grad_norm": 1.243378875164458, "learning_rate": 1.1179509133005599e-07, "loss": 0.2171, "step": 45418 }, { "epoch": 0.7894974708407934, "grad_norm": 1.0590412264903009, "learning_rate": 1.1177735143472578e-07, "loss": 0.2811, "step": 45419 }, { "epoch": 0.7895148533782962, "grad_norm": 1.2036957260355035, "learning_rate": 1.1175961276988288e-07, "loss": 0.1514, "step": 45420 }, { "epoch": 0.789532235915799, "grad_norm": 1.6910831347311686, "learning_rate": 1.1174187533558377e-07, "loss": 0.4361, "step": 45421 }, { "epoch": 0.7895496184533018, "grad_norm": 3.7514017524432464, "learning_rate": 1.1172413913188461e-07, "loss": 0.3291, "step": 45422 }, { "epoch": 0.7895670009908047, "grad_norm": 1.5675559066905798, "learning_rate": 1.1170640415884158e-07, "loss": 0.2584, "step": 45423 }, { "epoch": 0.7895843835283075, "grad_norm": 1.8122040891632736, "learning_rate": 1.116886704165108e-07, "loss": 0.36, "step": 45424 }, { "epoch": 0.7896017660658103, "grad_norm": 1.670592696020133, "learning_rate": 1.1167093790494863e-07, "loss": 0.3455, "step": 45425 }, { "epoch": 0.7896191486033131, "grad_norm": 2.3589317885083054, "learning_rate": 1.1165320662421124e-07, "loss": 0.2066, "step": 45426 }, { "epoch": 0.7896365311408159, "grad_norm": 1.555157191530332, "learning_rate": 1.1163547657435474e-07, "loss": 0.1884, "step": 45427 }, { "epoch": 0.7896539136783187, "grad_norm": 1.5118442935242942, "learning_rate": 1.116177477554352e-07, "loss": 0.2285, "step": 45428 }, { "epoch": 0.7896712962158215, "grad_norm": 1.9293323416588397, "learning_rate": 1.116000201675092e-07, "loss": 0.241, "step": 45429 }, { "epoch": 0.7896886787533244, "grad_norm": 1.1101903036956737, "learning_rate": 1.1158229381063283e-07, "loss": 0.3416, "step": 45430 }, { "epoch": 0.7897060612908272, "grad_norm": 1.3666406266975257, "learning_rate": 1.1156456868486186e-07, "loss": 0.191, "step": 45431 }, { "epoch": 0.78972344382833, "grad_norm": 1.7855474490480956, "learning_rate": 1.1154684479025294e-07, "loss": 0.2992, "step": 45432 }, { "epoch": 0.7897408263658329, "grad_norm": 1.3292695291062422, "learning_rate": 1.1152912212686211e-07, "loss": 0.1137, "step": 45433 }, { "epoch": 0.7897582089033357, "grad_norm": 1.2161275683246355, "learning_rate": 1.1151140069474546e-07, "loss": 0.1795, "step": 45434 }, { "epoch": 0.7897755914408385, "grad_norm": 1.210543154640288, "learning_rate": 1.1149368049395924e-07, "loss": 0.1464, "step": 45435 }, { "epoch": 0.7897929739783414, "grad_norm": 1.1220120038667265, "learning_rate": 1.1147596152455962e-07, "loss": 0.2818, "step": 45436 }, { "epoch": 0.7898103565158442, "grad_norm": 1.2954596038820194, "learning_rate": 1.1145824378660268e-07, "loss": 0.2042, "step": 45437 }, { "epoch": 0.789827739053347, "grad_norm": 2.2848805739460922, "learning_rate": 1.1144052728014464e-07, "loss": 0.1468, "step": 45438 }, { "epoch": 0.7898451215908499, "grad_norm": 1.607811753486178, "learning_rate": 1.114228120052415e-07, "loss": 0.2039, "step": 45439 }, { "epoch": 0.7898625041283527, "grad_norm": 1.4811245660356875, "learning_rate": 1.1140509796194969e-07, "loss": 0.2819, "step": 45440 }, { "epoch": 0.7898798866658555, "grad_norm": 1.5241478208286285, "learning_rate": 1.113873851503252e-07, "loss": 0.2907, "step": 45441 }, { "epoch": 0.7898972692033583, "grad_norm": 2.077694606765246, "learning_rate": 1.113696735704242e-07, "loss": 0.3377, "step": 45442 }, { "epoch": 0.7899146517408612, "grad_norm": 1.327865187268009, "learning_rate": 1.1135196322230283e-07, "loss": 0.2198, "step": 45443 }, { "epoch": 0.789932034278364, "grad_norm": 2.0051468872123, "learning_rate": 1.1133425410601715e-07, "loss": 0.2448, "step": 45444 }, { "epoch": 0.7899494168158668, "grad_norm": 1.6399021368477116, "learning_rate": 1.113165462216234e-07, "loss": 0.1205, "step": 45445 }, { "epoch": 0.7899667993533696, "grad_norm": 1.7243269543889104, "learning_rate": 1.1129883956917763e-07, "loss": 0.2526, "step": 45446 }, { "epoch": 0.7899841818908724, "grad_norm": 0.9443518939355279, "learning_rate": 1.1128113414873586e-07, "loss": 0.1743, "step": 45447 }, { "epoch": 0.7900015644283752, "grad_norm": 1.253863870992088, "learning_rate": 1.1126342996035448e-07, "loss": 0.1786, "step": 45448 }, { "epoch": 0.790018946965878, "grad_norm": 1.2685732667368599, "learning_rate": 1.1124572700408957e-07, "loss": 0.1623, "step": 45449 }, { "epoch": 0.7900363295033809, "grad_norm": 1.5080342179514903, "learning_rate": 1.1122802527999698e-07, "loss": 0.2267, "step": 45450 }, { "epoch": 0.7900537120408837, "grad_norm": 1.2103292791158753, "learning_rate": 1.1121032478813286e-07, "loss": 0.1996, "step": 45451 }, { "epoch": 0.7900710945783865, "grad_norm": 1.8623045717262057, "learning_rate": 1.1119262552855352e-07, "loss": 0.2529, "step": 45452 }, { "epoch": 0.7900884771158894, "grad_norm": 5.189690727608832, "learning_rate": 1.1117492750131497e-07, "loss": 0.4029, "step": 45453 }, { "epoch": 0.7901058596533922, "grad_norm": 1.8632472284194892, "learning_rate": 1.1115723070647326e-07, "loss": 0.4277, "step": 45454 }, { "epoch": 0.790123242190895, "grad_norm": 1.9880631998134883, "learning_rate": 1.1113953514408453e-07, "loss": 0.2019, "step": 45455 }, { "epoch": 0.7901406247283979, "grad_norm": 3.808297158917963, "learning_rate": 1.1112184081420483e-07, "loss": 0.2953, "step": 45456 }, { "epoch": 0.7901580072659007, "grad_norm": 1.2834942473446869, "learning_rate": 1.1110414771689031e-07, "loss": 0.2097, "step": 45457 }, { "epoch": 0.7901753898034035, "grad_norm": 1.5025958311684982, "learning_rate": 1.1108645585219695e-07, "loss": 0.2225, "step": 45458 }, { "epoch": 0.7901927723409063, "grad_norm": 2.0622652959269674, "learning_rate": 1.1106876522018077e-07, "loss": 0.2626, "step": 45459 }, { "epoch": 0.7902101548784092, "grad_norm": 3.2764827118588253, "learning_rate": 1.1105107582089801e-07, "loss": 0.286, "step": 45460 }, { "epoch": 0.790227537415912, "grad_norm": 2.3129165673968877, "learning_rate": 1.1103338765440472e-07, "loss": 0.1909, "step": 45461 }, { "epoch": 0.7902449199534148, "grad_norm": 1.0328013082013598, "learning_rate": 1.1101570072075695e-07, "loss": 0.1617, "step": 45462 }, { "epoch": 0.7902623024909177, "grad_norm": 1.7888066592525174, "learning_rate": 1.1099801502001066e-07, "loss": 0.2524, "step": 45463 }, { "epoch": 0.7902796850284205, "grad_norm": 1.6386344058045474, "learning_rate": 1.10980330552222e-07, "loss": 0.1686, "step": 45464 }, { "epoch": 0.7902970675659233, "grad_norm": 1.2554980009022374, "learning_rate": 1.1096264731744704e-07, "loss": 0.2505, "step": 45465 }, { "epoch": 0.790314450103426, "grad_norm": 2.0907227621005253, "learning_rate": 1.109449653157417e-07, "loss": 0.2026, "step": 45466 }, { "epoch": 0.7903318326409289, "grad_norm": 1.3556647837688818, "learning_rate": 1.10927284547162e-07, "loss": 0.2526, "step": 45467 }, { "epoch": 0.7903492151784317, "grad_norm": 1.4347553981863244, "learning_rate": 1.1090960501176432e-07, "loss": 0.1514, "step": 45468 }, { "epoch": 0.7903665977159345, "grad_norm": 2.538253328678951, "learning_rate": 1.1089192670960435e-07, "loss": 0.2302, "step": 45469 }, { "epoch": 0.7903839802534374, "grad_norm": 2.052749580693796, "learning_rate": 1.1087424964073811e-07, "loss": 0.2484, "step": 45470 }, { "epoch": 0.7904013627909402, "grad_norm": 0.8203478810060421, "learning_rate": 1.108565738052219e-07, "loss": 0.1742, "step": 45471 }, { "epoch": 0.790418745328443, "grad_norm": 1.0965910297306065, "learning_rate": 1.1083889920311151e-07, "loss": 0.2751, "step": 45472 }, { "epoch": 0.7904361278659459, "grad_norm": 1.4501019875226155, "learning_rate": 1.1082122583446313e-07, "loss": 0.1848, "step": 45473 }, { "epoch": 0.7904535104034487, "grad_norm": 1.1505082609624095, "learning_rate": 1.1080355369933265e-07, "loss": 0.2505, "step": 45474 }, { "epoch": 0.7904708929409515, "grad_norm": 1.322976267813356, "learning_rate": 1.1078588279777612e-07, "loss": 0.2682, "step": 45475 }, { "epoch": 0.7904882754784543, "grad_norm": 1.8490482201861105, "learning_rate": 1.1076821312984958e-07, "loss": 0.207, "step": 45476 }, { "epoch": 0.7905056580159572, "grad_norm": 1.376220038957974, "learning_rate": 1.1075054469560896e-07, "loss": 0.2071, "step": 45477 }, { "epoch": 0.79052304055346, "grad_norm": 1.6804025821430162, "learning_rate": 1.1073287749511023e-07, "loss": 0.2165, "step": 45478 }, { "epoch": 0.7905404230909628, "grad_norm": 2.5205339258794117, "learning_rate": 1.1071521152840957e-07, "loss": 0.2152, "step": 45479 }, { "epoch": 0.7905578056284657, "grad_norm": 2.0890476237333186, "learning_rate": 1.1069754679556287e-07, "loss": 0.197, "step": 45480 }, { "epoch": 0.7905751881659685, "grad_norm": 1.476814249566384, "learning_rate": 1.1067988329662609e-07, "loss": 0.3442, "step": 45481 }, { "epoch": 0.7905925707034713, "grad_norm": 1.3195211016555206, "learning_rate": 1.1066222103165529e-07, "loss": 0.2819, "step": 45482 }, { "epoch": 0.7906099532409742, "grad_norm": 0.8430576537670611, "learning_rate": 1.1064456000070637e-07, "loss": 0.171, "step": 45483 }, { "epoch": 0.790627335778477, "grad_norm": 2.414716633142023, "learning_rate": 1.106269002038353e-07, "loss": 0.2727, "step": 45484 }, { "epoch": 0.7906447183159798, "grad_norm": 1.2592844669509295, "learning_rate": 1.1060924164109814e-07, "loss": 0.2076, "step": 45485 }, { "epoch": 0.7906621008534825, "grad_norm": 1.358417541224791, "learning_rate": 1.1059158431255067e-07, "loss": 0.1112, "step": 45486 }, { "epoch": 0.7906794833909854, "grad_norm": 1.1002297382080737, "learning_rate": 1.1057392821824929e-07, "loss": 0.0955, "step": 45487 }, { "epoch": 0.7906968659284882, "grad_norm": 1.2604196204452132, "learning_rate": 1.1055627335824951e-07, "loss": 0.1337, "step": 45488 }, { "epoch": 0.790714248465991, "grad_norm": 1.009395278138627, "learning_rate": 1.1053861973260731e-07, "loss": 0.193, "step": 45489 }, { "epoch": 0.7907316310034939, "grad_norm": 2.704148316838141, "learning_rate": 1.1052096734137895e-07, "loss": 0.3043, "step": 45490 }, { "epoch": 0.7907490135409967, "grad_norm": 2.8498859682343274, "learning_rate": 1.1050331618462016e-07, "loss": 0.1914, "step": 45491 }, { "epoch": 0.7907663960784995, "grad_norm": 1.8843137896642257, "learning_rate": 1.1048566626238698e-07, "loss": 0.3688, "step": 45492 }, { "epoch": 0.7907837786160024, "grad_norm": 2.3705417445709074, "learning_rate": 1.1046801757473528e-07, "loss": 0.2271, "step": 45493 }, { "epoch": 0.7908011611535052, "grad_norm": 1.723359643124518, "learning_rate": 1.1045037012172109e-07, "loss": 0.2447, "step": 45494 }, { "epoch": 0.790818543691008, "grad_norm": 1.5652339549097312, "learning_rate": 1.1043272390340019e-07, "loss": 0.2266, "step": 45495 }, { "epoch": 0.7908359262285108, "grad_norm": 1.5340193781692353, "learning_rate": 1.1041507891982866e-07, "loss": 0.197, "step": 45496 }, { "epoch": 0.7908533087660137, "grad_norm": 1.6432089764754134, "learning_rate": 1.103974351710622e-07, "loss": 0.2368, "step": 45497 }, { "epoch": 0.7908706913035165, "grad_norm": 1.3158477309459298, "learning_rate": 1.1037979265715708e-07, "loss": 0.209, "step": 45498 }, { "epoch": 0.7908880738410193, "grad_norm": 0.8176904314268475, "learning_rate": 1.1036215137816901e-07, "loss": 0.1656, "step": 45499 }, { "epoch": 0.7909054563785222, "grad_norm": 1.2397229570914725, "learning_rate": 1.1034451133415413e-07, "loss": 0.158, "step": 45500 }, { "epoch": 0.790922838916025, "grad_norm": 1.3280547100925468, "learning_rate": 1.1032687252516782e-07, "loss": 0.1931, "step": 45501 }, { "epoch": 0.7909402214535278, "grad_norm": 1.3609179908333298, "learning_rate": 1.1030923495126643e-07, "loss": 0.2463, "step": 45502 }, { "epoch": 0.7909576039910307, "grad_norm": 1.7681275963286847, "learning_rate": 1.102915986125058e-07, "loss": 0.1541, "step": 45503 }, { "epoch": 0.7909749865285335, "grad_norm": 2.2490747934711424, "learning_rate": 1.1027396350894181e-07, "loss": 0.3135, "step": 45504 }, { "epoch": 0.7909923690660362, "grad_norm": 0.8171447938660495, "learning_rate": 1.1025632964063014e-07, "loss": 0.2328, "step": 45505 }, { "epoch": 0.791009751603539, "grad_norm": 1.038468505825615, "learning_rate": 1.1023869700762717e-07, "loss": 0.1843, "step": 45506 }, { "epoch": 0.7910271341410419, "grad_norm": 0.9020622703941829, "learning_rate": 1.102210656099884e-07, "loss": 0.1654, "step": 45507 }, { "epoch": 0.7910445166785447, "grad_norm": 0.8051577778690346, "learning_rate": 1.1020343544776973e-07, "loss": 0.282, "step": 45508 }, { "epoch": 0.7910618992160475, "grad_norm": 1.8466977494394659, "learning_rate": 1.1018580652102699e-07, "loss": 0.1512, "step": 45509 }, { "epoch": 0.7910792817535504, "grad_norm": 1.7687932445893855, "learning_rate": 1.1016817882981632e-07, "loss": 0.1725, "step": 45510 }, { "epoch": 0.7910966642910532, "grad_norm": 1.2610575151083558, "learning_rate": 1.1015055237419346e-07, "loss": 0.2896, "step": 45511 }, { "epoch": 0.791114046828556, "grad_norm": 1.4252600088688916, "learning_rate": 1.101329271542143e-07, "loss": 0.2123, "step": 45512 }, { "epoch": 0.7911314293660588, "grad_norm": 1.398620363433311, "learning_rate": 1.1011530316993461e-07, "loss": 0.2121, "step": 45513 }, { "epoch": 0.7911488119035617, "grad_norm": 1.9460193749251447, "learning_rate": 1.1009768042141033e-07, "loss": 0.2279, "step": 45514 }, { "epoch": 0.7911661944410645, "grad_norm": 1.0400564437266566, "learning_rate": 1.1008005890869732e-07, "loss": 0.2108, "step": 45515 }, { "epoch": 0.7911835769785673, "grad_norm": 1.5165996689592964, "learning_rate": 1.1006243863185138e-07, "loss": 0.2903, "step": 45516 }, { "epoch": 0.7912009595160702, "grad_norm": 1.4850449434819437, "learning_rate": 1.1004481959092826e-07, "loss": 0.3079, "step": 45517 }, { "epoch": 0.791218342053573, "grad_norm": 1.318664584809593, "learning_rate": 1.1002720178598406e-07, "loss": 0.156, "step": 45518 }, { "epoch": 0.7912357245910758, "grad_norm": 1.5668579731880172, "learning_rate": 1.1000958521707459e-07, "loss": 0.3548, "step": 45519 }, { "epoch": 0.7912531071285787, "grad_norm": 1.0103651962598994, "learning_rate": 1.099919698842554e-07, "loss": 0.2621, "step": 45520 }, { "epoch": 0.7912704896660815, "grad_norm": 1.1513551113229257, "learning_rate": 1.0997435578758257e-07, "loss": 0.1673, "step": 45521 }, { "epoch": 0.7912878722035843, "grad_norm": 1.7974424919235081, "learning_rate": 1.0995674292711193e-07, "loss": 0.2869, "step": 45522 }, { "epoch": 0.7913052547410871, "grad_norm": 2.4341345726822676, "learning_rate": 1.0993913130289922e-07, "loss": 0.2214, "step": 45523 }, { "epoch": 0.79132263727859, "grad_norm": 1.1984295959574978, "learning_rate": 1.0992152091500029e-07, "loss": 0.2767, "step": 45524 }, { "epoch": 0.7913400198160927, "grad_norm": 2.0481836275836645, "learning_rate": 1.0990391176347091e-07, "loss": 0.2512, "step": 45525 }, { "epoch": 0.7913574023535955, "grad_norm": 1.1322705319353588, "learning_rate": 1.0988630384836701e-07, "loss": 0.1756, "step": 45526 }, { "epoch": 0.7913747848910984, "grad_norm": 1.7672763513658085, "learning_rate": 1.0986869716974428e-07, "loss": 0.1735, "step": 45527 }, { "epoch": 0.7913921674286012, "grad_norm": 1.1592597972700207, "learning_rate": 1.0985109172765844e-07, "loss": 0.1603, "step": 45528 }, { "epoch": 0.791409549966104, "grad_norm": 1.4467306457744353, "learning_rate": 1.0983348752216554e-07, "loss": 0.2104, "step": 45529 }, { "epoch": 0.7914269325036069, "grad_norm": 1.3435888914146772, "learning_rate": 1.0981588455332125e-07, "loss": 0.1912, "step": 45530 }, { "epoch": 0.7914443150411097, "grad_norm": 3.172918883091158, "learning_rate": 1.0979828282118153e-07, "loss": 0.2427, "step": 45531 }, { "epoch": 0.7914616975786125, "grad_norm": 2.2959756336198085, "learning_rate": 1.0978068232580173e-07, "loss": 0.1832, "step": 45532 }, { "epoch": 0.7914790801161153, "grad_norm": 1.9288985182485263, "learning_rate": 1.0976308306723803e-07, "loss": 0.227, "step": 45533 }, { "epoch": 0.7914964626536182, "grad_norm": 2.2431596204027873, "learning_rate": 1.0974548504554615e-07, "loss": 0.2692, "step": 45534 }, { "epoch": 0.791513845191121, "grad_norm": 1.2189698769581487, "learning_rate": 1.0972788826078177e-07, "loss": 0.1181, "step": 45535 }, { "epoch": 0.7915312277286238, "grad_norm": 1.084052327932493, "learning_rate": 1.0971029271300058e-07, "loss": 0.1586, "step": 45536 }, { "epoch": 0.7915486102661267, "grad_norm": 1.3141720183158454, "learning_rate": 1.0969269840225858e-07, "loss": 0.2048, "step": 45537 }, { "epoch": 0.7915659928036295, "grad_norm": 0.9156915011328886, "learning_rate": 1.0967510532861162e-07, "loss": 0.1694, "step": 45538 }, { "epoch": 0.7915833753411323, "grad_norm": 1.166192947927917, "learning_rate": 1.0965751349211517e-07, "loss": 0.1536, "step": 45539 }, { "epoch": 0.7916007578786352, "grad_norm": 1.4110195974960489, "learning_rate": 1.0963992289282486e-07, "loss": 0.1769, "step": 45540 }, { "epoch": 0.791618140416138, "grad_norm": 2.166582038335584, "learning_rate": 1.0962233353079692e-07, "loss": 0.2589, "step": 45541 }, { "epoch": 0.7916355229536408, "grad_norm": 1.126773696321869, "learning_rate": 1.0960474540608677e-07, "loss": 0.2219, "step": 45542 }, { "epoch": 0.7916529054911436, "grad_norm": 2.2276346685213455, "learning_rate": 1.0958715851875027e-07, "loss": 0.2635, "step": 45543 }, { "epoch": 0.7916702880286465, "grad_norm": 3.28720345735097, "learning_rate": 1.0956957286884316e-07, "loss": 0.6625, "step": 45544 }, { "epoch": 0.7916876705661492, "grad_norm": 1.5907435829372931, "learning_rate": 1.0955198845642116e-07, "loss": 0.1509, "step": 45545 }, { "epoch": 0.791705053103652, "grad_norm": 1.4160564328445742, "learning_rate": 1.0953440528153995e-07, "loss": 0.2158, "step": 45546 }, { "epoch": 0.7917224356411549, "grad_norm": 2.212437750514869, "learning_rate": 1.0951682334425533e-07, "loss": 0.3547, "step": 45547 }, { "epoch": 0.7917398181786577, "grad_norm": 1.7688984203140838, "learning_rate": 1.0949924264462291e-07, "loss": 0.2199, "step": 45548 }, { "epoch": 0.7917572007161605, "grad_norm": 2.6019296693237353, "learning_rate": 1.0948166318269858e-07, "loss": 0.2593, "step": 45549 }, { "epoch": 0.7917745832536633, "grad_norm": 1.2876866489815957, "learning_rate": 1.0946408495853821e-07, "loss": 0.2547, "step": 45550 }, { "epoch": 0.7917919657911662, "grad_norm": 1.2374667194534714, "learning_rate": 1.0944650797219695e-07, "loss": 0.2194, "step": 45551 }, { "epoch": 0.791809348328669, "grad_norm": 1.3125919933309065, "learning_rate": 1.09428932223731e-07, "loss": 0.2467, "step": 45552 }, { "epoch": 0.7918267308661718, "grad_norm": 2.362535060535905, "learning_rate": 1.0941135771319593e-07, "loss": 0.6215, "step": 45553 }, { "epoch": 0.7918441134036747, "grad_norm": 1.3918031414808723, "learning_rate": 1.0939378444064745e-07, "loss": 0.1706, "step": 45554 }, { "epoch": 0.7918614959411775, "grad_norm": 2.6326373100652023, "learning_rate": 1.0937621240614126e-07, "loss": 0.2953, "step": 45555 }, { "epoch": 0.7918788784786803, "grad_norm": 1.2337963531980922, "learning_rate": 1.0935864160973296e-07, "loss": 0.2573, "step": 45556 }, { "epoch": 0.7918962610161832, "grad_norm": 1.4742158282050248, "learning_rate": 1.0934107205147841e-07, "loss": 0.1575, "step": 45557 }, { "epoch": 0.791913643553686, "grad_norm": 1.1528486103039013, "learning_rate": 1.0932350373143312e-07, "loss": 0.3187, "step": 45558 }, { "epoch": 0.7919310260911888, "grad_norm": 1.1859020379940541, "learning_rate": 1.0930593664965282e-07, "loss": 0.1711, "step": 45559 }, { "epoch": 0.7919484086286916, "grad_norm": 1.4033829825528321, "learning_rate": 1.0928837080619329e-07, "loss": 0.1815, "step": 45560 }, { "epoch": 0.7919657911661945, "grad_norm": 1.6911780739701245, "learning_rate": 1.0927080620111018e-07, "loss": 0.1989, "step": 45561 }, { "epoch": 0.7919831737036973, "grad_norm": 0.8311219483220293, "learning_rate": 1.0925324283445914e-07, "loss": 0.2406, "step": 45562 }, { "epoch": 0.7920005562412001, "grad_norm": 1.0120640987569343, "learning_rate": 1.0923568070629579e-07, "loss": 0.2306, "step": 45563 }, { "epoch": 0.792017938778703, "grad_norm": 1.7232103209110672, "learning_rate": 1.0921811981667583e-07, "loss": 0.2382, "step": 45564 }, { "epoch": 0.7920353213162057, "grad_norm": 1.3880645494844306, "learning_rate": 1.0920056016565499e-07, "loss": 0.2408, "step": 45565 }, { "epoch": 0.7920527038537085, "grad_norm": 1.5067343063591316, "learning_rate": 1.0918300175328876e-07, "loss": 0.1996, "step": 45566 }, { "epoch": 0.7920700863912113, "grad_norm": 1.161806149548903, "learning_rate": 1.0916544457963278e-07, "loss": 0.2831, "step": 45567 }, { "epoch": 0.7920874689287142, "grad_norm": 1.1310934126332466, "learning_rate": 1.09147888644743e-07, "loss": 0.1822, "step": 45568 }, { "epoch": 0.792104851466217, "grad_norm": 1.5085837274509628, "learning_rate": 1.0913033394867493e-07, "loss": 0.1409, "step": 45569 }, { "epoch": 0.7921222340037198, "grad_norm": 1.0044162554099725, "learning_rate": 1.0911278049148409e-07, "loss": 0.2776, "step": 45570 }, { "epoch": 0.7921396165412227, "grad_norm": 2.503877902951593, "learning_rate": 1.0909522827322598e-07, "loss": 0.3539, "step": 45571 }, { "epoch": 0.7921569990787255, "grad_norm": 1.391922585133283, "learning_rate": 1.0907767729395656e-07, "loss": 0.172, "step": 45572 }, { "epoch": 0.7921743816162283, "grad_norm": 1.6027130686874256, "learning_rate": 1.090601275537314e-07, "loss": 0.1616, "step": 45573 }, { "epoch": 0.7921917641537312, "grad_norm": 1.490041553455735, "learning_rate": 1.09042579052606e-07, "loss": 0.2229, "step": 45574 }, { "epoch": 0.792209146691234, "grad_norm": 2.148956080371741, "learning_rate": 1.0902503179063604e-07, "loss": 0.2652, "step": 45575 }, { "epoch": 0.7922265292287368, "grad_norm": 1.5976777972400376, "learning_rate": 1.090074857678771e-07, "loss": 0.2675, "step": 45576 }, { "epoch": 0.7922439117662397, "grad_norm": 0.9131286598022927, "learning_rate": 1.0898994098438485e-07, "loss": 0.1678, "step": 45577 }, { "epoch": 0.7922612943037425, "grad_norm": 1.256530380722484, "learning_rate": 1.0897239744021475e-07, "loss": 0.1975, "step": 45578 }, { "epoch": 0.7922786768412453, "grad_norm": 1.382720710759041, "learning_rate": 1.0895485513542262e-07, "loss": 0.1784, "step": 45579 }, { "epoch": 0.7922960593787481, "grad_norm": 1.4390970003473673, "learning_rate": 1.0893731407006402e-07, "loss": 0.161, "step": 45580 }, { "epoch": 0.792313441916251, "grad_norm": 1.5848548934546556, "learning_rate": 1.0891977424419446e-07, "loss": 0.1546, "step": 45581 }, { "epoch": 0.7923308244537538, "grad_norm": 1.360931790412645, "learning_rate": 1.089022356578696e-07, "loss": 0.1961, "step": 45582 }, { "epoch": 0.7923482069912566, "grad_norm": 1.2253342325118022, "learning_rate": 1.08884698311145e-07, "loss": 0.2181, "step": 45583 }, { "epoch": 0.7923655895287595, "grad_norm": 1.9751093872712508, "learning_rate": 1.0886716220407621e-07, "loss": 0.2558, "step": 45584 }, { "epoch": 0.7923829720662622, "grad_norm": 1.285901780534053, "learning_rate": 1.0884962733671882e-07, "loss": 0.1389, "step": 45585 }, { "epoch": 0.792400354603765, "grad_norm": 1.4108735302210555, "learning_rate": 1.0883209370912833e-07, "loss": 0.2312, "step": 45586 }, { "epoch": 0.7924177371412678, "grad_norm": 2.2504290380820993, "learning_rate": 1.0881456132136058e-07, "loss": 0.1436, "step": 45587 }, { "epoch": 0.7924351196787707, "grad_norm": 0.7264977676685046, "learning_rate": 1.0879703017347108e-07, "loss": 0.2085, "step": 45588 }, { "epoch": 0.7924525022162735, "grad_norm": 0.7406713821559313, "learning_rate": 1.0877950026551513e-07, "loss": 0.1942, "step": 45589 }, { "epoch": 0.7924698847537763, "grad_norm": 1.3726716469666953, "learning_rate": 1.0876197159754836e-07, "loss": 0.2255, "step": 45590 }, { "epoch": 0.7924872672912792, "grad_norm": 1.317518673852017, "learning_rate": 1.0874444416962652e-07, "loss": 0.1931, "step": 45591 }, { "epoch": 0.792504649828782, "grad_norm": 2.9997840332773325, "learning_rate": 1.0872691798180511e-07, "loss": 0.2143, "step": 45592 }, { "epoch": 0.7925220323662848, "grad_norm": 1.5291681447090955, "learning_rate": 1.0870939303413962e-07, "loss": 0.179, "step": 45593 }, { "epoch": 0.7925394149037877, "grad_norm": 1.1794565345903194, "learning_rate": 1.0869186932668557e-07, "loss": 0.2038, "step": 45594 }, { "epoch": 0.7925567974412905, "grad_norm": 2.127425168962923, "learning_rate": 1.0867434685949855e-07, "loss": 0.3207, "step": 45595 }, { "epoch": 0.7925741799787933, "grad_norm": 1.6566707778331273, "learning_rate": 1.0865682563263412e-07, "loss": 0.1767, "step": 45596 }, { "epoch": 0.7925915625162961, "grad_norm": 2.089893376830394, "learning_rate": 1.0863930564614776e-07, "loss": 0.148, "step": 45597 }, { "epoch": 0.792608945053799, "grad_norm": 1.2195819650464415, "learning_rate": 1.0862178690009494e-07, "loss": 0.294, "step": 45598 }, { "epoch": 0.7926263275913018, "grad_norm": 1.642978884750951, "learning_rate": 1.0860426939453138e-07, "loss": 0.1763, "step": 45599 }, { "epoch": 0.7926437101288046, "grad_norm": 6.8943366704049645, "learning_rate": 1.0858675312951243e-07, "loss": 0.2061, "step": 45600 }, { "epoch": 0.7926610926663075, "grad_norm": 1.439334801257288, "learning_rate": 1.0856923810509377e-07, "loss": 0.1497, "step": 45601 }, { "epoch": 0.7926784752038103, "grad_norm": 8.794486163983695, "learning_rate": 1.0855172432133075e-07, "loss": 0.3829, "step": 45602 }, { "epoch": 0.7926958577413131, "grad_norm": 1.4482470070944558, "learning_rate": 1.0853421177827899e-07, "loss": 0.2196, "step": 45603 }, { "epoch": 0.792713240278816, "grad_norm": 1.08465818799763, "learning_rate": 1.0851670047599393e-07, "loss": 0.136, "step": 45604 }, { "epoch": 0.7927306228163187, "grad_norm": 1.7204126767763743, "learning_rate": 1.084991904145311e-07, "loss": 0.1941, "step": 45605 }, { "epoch": 0.7927480053538215, "grad_norm": 1.7663587332000172, "learning_rate": 1.0848168159394583e-07, "loss": 0.321, "step": 45606 }, { "epoch": 0.7927653878913243, "grad_norm": 2.4039465206847193, "learning_rate": 1.0846417401429408e-07, "loss": 0.3303, "step": 45607 }, { "epoch": 0.7927827704288272, "grad_norm": 0.8111253028344177, "learning_rate": 1.0844666767563093e-07, "loss": 0.2108, "step": 45608 }, { "epoch": 0.79280015296633, "grad_norm": 1.3307664118927374, "learning_rate": 1.0842916257801182e-07, "loss": 0.2345, "step": 45609 }, { "epoch": 0.7928175355038328, "grad_norm": 1.3278292278617967, "learning_rate": 1.084116587214925e-07, "loss": 0.2959, "step": 45610 }, { "epoch": 0.7928349180413357, "grad_norm": 0.6566662393491373, "learning_rate": 1.0839415610612839e-07, "loss": 0.2365, "step": 45611 }, { "epoch": 0.7928523005788385, "grad_norm": 1.1709496428919337, "learning_rate": 1.0837665473197488e-07, "loss": 0.2306, "step": 45612 }, { "epoch": 0.7928696831163413, "grad_norm": 1.1306134963720478, "learning_rate": 1.083591545990875e-07, "loss": 0.1884, "step": 45613 }, { "epoch": 0.7928870656538441, "grad_norm": 0.9140391436545441, "learning_rate": 1.0834165570752168e-07, "loss": 0.2148, "step": 45614 }, { "epoch": 0.792904448191347, "grad_norm": 1.1525852957243599, "learning_rate": 1.0832415805733291e-07, "loss": 0.1905, "step": 45615 }, { "epoch": 0.7929218307288498, "grad_norm": 3.5388020455430604, "learning_rate": 1.0830666164857665e-07, "loss": 0.2083, "step": 45616 }, { "epoch": 0.7929392132663526, "grad_norm": 1.7462989106224922, "learning_rate": 1.0828916648130815e-07, "loss": 0.1992, "step": 45617 }, { "epoch": 0.7929565958038555, "grad_norm": 2.3102482376183544, "learning_rate": 1.0827167255558322e-07, "loss": 0.1511, "step": 45618 }, { "epoch": 0.7929739783413583, "grad_norm": 1.4115992987389137, "learning_rate": 1.0825417987145718e-07, "loss": 0.2608, "step": 45619 }, { "epoch": 0.7929913608788611, "grad_norm": 1.41610777316235, "learning_rate": 1.0823668842898554e-07, "loss": 0.1613, "step": 45620 }, { "epoch": 0.793008743416364, "grad_norm": 1.3437603078816331, "learning_rate": 1.082191982282234e-07, "loss": 0.209, "step": 45621 }, { "epoch": 0.7930261259538668, "grad_norm": 0.74380109989092, "learning_rate": 1.082017092692265e-07, "loss": 0.1801, "step": 45622 }, { "epoch": 0.7930435084913696, "grad_norm": 1.6644178325677008, "learning_rate": 1.0818422155205026e-07, "loss": 0.2104, "step": 45623 }, { "epoch": 0.7930608910288725, "grad_norm": 2.384831773553913, "learning_rate": 1.0816673507675006e-07, "loss": 0.2316, "step": 45624 }, { "epoch": 0.7930782735663752, "grad_norm": 2.332011371217506, "learning_rate": 1.0814924984338114e-07, "loss": 0.2426, "step": 45625 }, { "epoch": 0.793095656103878, "grad_norm": 1.2334838095599152, "learning_rate": 1.081317658519994e-07, "loss": 0.2008, "step": 45626 }, { "epoch": 0.7931130386413808, "grad_norm": 1.867922324855821, "learning_rate": 1.081142831026598e-07, "loss": 0.2454, "step": 45627 }, { "epoch": 0.7931304211788837, "grad_norm": 1.801190722081555, "learning_rate": 1.0809680159541789e-07, "loss": 0.3707, "step": 45628 }, { "epoch": 0.7931478037163865, "grad_norm": 1.5856243864703583, "learning_rate": 1.0807932133032899e-07, "loss": 0.2922, "step": 45629 }, { "epoch": 0.7931651862538893, "grad_norm": 1.5271275953098866, "learning_rate": 1.0806184230744869e-07, "loss": 0.2122, "step": 45630 }, { "epoch": 0.7931825687913922, "grad_norm": 1.7484686204339783, "learning_rate": 1.0804436452683236e-07, "loss": 0.1527, "step": 45631 }, { "epoch": 0.793199951328895, "grad_norm": 1.1051119731796648, "learning_rate": 1.0802688798853533e-07, "loss": 0.2816, "step": 45632 }, { "epoch": 0.7932173338663978, "grad_norm": 1.5799096441763896, "learning_rate": 1.0800941269261299e-07, "loss": 0.164, "step": 45633 }, { "epoch": 0.7932347164039006, "grad_norm": 2.1228748395710197, "learning_rate": 1.0799193863912076e-07, "loss": 0.2608, "step": 45634 }, { "epoch": 0.7932520989414035, "grad_norm": 1.0456198464230426, "learning_rate": 1.0797446582811403e-07, "loss": 0.173, "step": 45635 }, { "epoch": 0.7932694814789063, "grad_norm": 1.5282242588175654, "learning_rate": 1.0795699425964817e-07, "loss": 0.1743, "step": 45636 }, { "epoch": 0.7932868640164091, "grad_norm": 0.9214284144849637, "learning_rate": 1.0793952393377836e-07, "loss": 0.2627, "step": 45637 }, { "epoch": 0.793304246553912, "grad_norm": 1.2188609716728416, "learning_rate": 1.0792205485056033e-07, "loss": 0.1593, "step": 45638 }, { "epoch": 0.7933216290914148, "grad_norm": 1.5655895865308347, "learning_rate": 1.079045870100494e-07, "loss": 0.2087, "step": 45639 }, { "epoch": 0.7933390116289176, "grad_norm": 1.6252597948158503, "learning_rate": 1.0788712041230058e-07, "loss": 0.3019, "step": 45640 }, { "epoch": 0.7933563941664205, "grad_norm": 1.6971531089505612, "learning_rate": 1.0786965505736956e-07, "loss": 0.1984, "step": 45641 }, { "epoch": 0.7933737767039233, "grad_norm": 1.549641257640225, "learning_rate": 1.0785219094531162e-07, "loss": 0.2593, "step": 45642 }, { "epoch": 0.7933911592414261, "grad_norm": 1.2096896999856894, "learning_rate": 1.0783472807618211e-07, "loss": 0.1287, "step": 45643 }, { "epoch": 0.7934085417789288, "grad_norm": 1.0299453798104787, "learning_rate": 1.0781726645003636e-07, "loss": 0.2036, "step": 45644 }, { "epoch": 0.7934259243164317, "grad_norm": 1.8633181754599466, "learning_rate": 1.0779980606692968e-07, "loss": 0.1619, "step": 45645 }, { "epoch": 0.7934433068539345, "grad_norm": 0.9286157459332726, "learning_rate": 1.0778234692691751e-07, "loss": 0.1981, "step": 45646 }, { "epoch": 0.7934606893914373, "grad_norm": 0.7023004209896642, "learning_rate": 1.0776488903005509e-07, "loss": 0.1626, "step": 45647 }, { "epoch": 0.7934780719289402, "grad_norm": 1.3489520793906256, "learning_rate": 1.0774743237639766e-07, "loss": 0.2302, "step": 45648 }, { "epoch": 0.793495454466443, "grad_norm": 0.8487831895375442, "learning_rate": 1.0772997696600083e-07, "loss": 0.1439, "step": 45649 }, { "epoch": 0.7935128370039458, "grad_norm": 2.1107482712479095, "learning_rate": 1.0771252279891979e-07, "loss": 0.2982, "step": 45650 }, { "epoch": 0.7935302195414486, "grad_norm": 2.808849559916068, "learning_rate": 1.0769506987520999e-07, "loss": 0.2504, "step": 45651 }, { "epoch": 0.7935476020789515, "grad_norm": 1.2269793418722492, "learning_rate": 1.0767761819492633e-07, "loss": 0.185, "step": 45652 }, { "epoch": 0.7935649846164543, "grad_norm": 1.561016769946934, "learning_rate": 1.0766016775812453e-07, "loss": 0.1513, "step": 45653 }, { "epoch": 0.7935823671539571, "grad_norm": 0.6426610227060503, "learning_rate": 1.0764271856485979e-07, "loss": 0.2375, "step": 45654 }, { "epoch": 0.79359974969146, "grad_norm": 2.4707305237621027, "learning_rate": 1.0762527061518739e-07, "loss": 0.1988, "step": 45655 }, { "epoch": 0.7936171322289628, "grad_norm": 1.130577494446086, "learning_rate": 1.0760782390916251e-07, "loss": 0.1417, "step": 45656 }, { "epoch": 0.7936345147664656, "grad_norm": 1.5302720133691448, "learning_rate": 1.0759037844684066e-07, "loss": 0.2555, "step": 45657 }, { "epoch": 0.7936518973039685, "grad_norm": 2.1615720689835123, "learning_rate": 1.0757293422827724e-07, "loss": 0.5491, "step": 45658 }, { "epoch": 0.7936692798414713, "grad_norm": 0.833662457483588, "learning_rate": 1.075554912535272e-07, "loss": 0.2226, "step": 45659 }, { "epoch": 0.7936866623789741, "grad_norm": 1.9055400005591312, "learning_rate": 1.0753804952264584e-07, "loss": 0.2277, "step": 45660 }, { "epoch": 0.793704044916477, "grad_norm": 1.5971266660550907, "learning_rate": 1.0752060903568877e-07, "loss": 0.1891, "step": 45661 }, { "epoch": 0.7937214274539798, "grad_norm": 1.3155785214111604, "learning_rate": 1.0750316979271101e-07, "loss": 0.2018, "step": 45662 }, { "epoch": 0.7937388099914826, "grad_norm": 1.1134787705353637, "learning_rate": 1.074857317937679e-07, "loss": 0.3512, "step": 45663 }, { "epoch": 0.7937561925289853, "grad_norm": 2.0403557910555596, "learning_rate": 1.0746829503891475e-07, "loss": 0.3806, "step": 45664 }, { "epoch": 0.7937735750664882, "grad_norm": 1.243481705993502, "learning_rate": 1.0745085952820681e-07, "loss": 0.1625, "step": 45665 }, { "epoch": 0.793790957603991, "grad_norm": 1.0580229016221736, "learning_rate": 1.074334252616993e-07, "loss": 0.2229, "step": 45666 }, { "epoch": 0.7938083401414938, "grad_norm": 0.8934601292145408, "learning_rate": 1.0741599223944736e-07, "loss": 0.1253, "step": 45667 }, { "epoch": 0.7938257226789966, "grad_norm": 2.326939605069165, "learning_rate": 1.0739856046150652e-07, "loss": 0.2455, "step": 45668 }, { "epoch": 0.7938431052164995, "grad_norm": 1.455329102019087, "learning_rate": 1.0738112992793192e-07, "loss": 0.1237, "step": 45669 }, { "epoch": 0.7938604877540023, "grad_norm": 1.4304533753314614, "learning_rate": 1.0736370063877886e-07, "loss": 0.2206, "step": 45670 }, { "epoch": 0.7938778702915051, "grad_norm": 1.8909488974569009, "learning_rate": 1.0734627259410229e-07, "loss": 0.3313, "step": 45671 }, { "epoch": 0.793895252829008, "grad_norm": 1.3232419026839448, "learning_rate": 1.0732884579395778e-07, "loss": 0.2672, "step": 45672 }, { "epoch": 0.7939126353665108, "grad_norm": 1.8607515648579576, "learning_rate": 1.0731142023840046e-07, "loss": 0.1978, "step": 45673 }, { "epoch": 0.7939300179040136, "grad_norm": 0.8279767513730614, "learning_rate": 1.0729399592748556e-07, "loss": 0.2259, "step": 45674 }, { "epoch": 0.7939474004415165, "grad_norm": 2.9034477286145237, "learning_rate": 1.0727657286126813e-07, "loss": 0.2689, "step": 45675 }, { "epoch": 0.7939647829790193, "grad_norm": 1.614831441520732, "learning_rate": 1.0725915103980388e-07, "loss": 0.2789, "step": 45676 }, { "epoch": 0.7939821655165221, "grad_norm": 1.1514727280451655, "learning_rate": 1.0724173046314755e-07, "loss": 0.1651, "step": 45677 }, { "epoch": 0.793999548054025, "grad_norm": 3.39748363335073, "learning_rate": 1.0722431113135449e-07, "loss": 0.2244, "step": 45678 }, { "epoch": 0.7940169305915278, "grad_norm": 0.9972993433478244, "learning_rate": 1.0720689304447983e-07, "loss": 0.1927, "step": 45679 }, { "epoch": 0.7940343131290306, "grad_norm": 1.5955549471061479, "learning_rate": 1.0718947620257901e-07, "loss": 0.2562, "step": 45680 }, { "epoch": 0.7940516956665334, "grad_norm": 1.3841302071172545, "learning_rate": 1.071720606057071e-07, "loss": 0.1724, "step": 45681 }, { "epoch": 0.7940690782040363, "grad_norm": 1.3510555882301554, "learning_rate": 1.0715464625391935e-07, "loss": 0.2373, "step": 45682 }, { "epoch": 0.7940864607415391, "grad_norm": 0.7004967243639314, "learning_rate": 1.071372331472709e-07, "loss": 0.1244, "step": 45683 }, { "epoch": 0.7941038432790418, "grad_norm": 1.7430085605668602, "learning_rate": 1.0711982128581693e-07, "loss": 0.2214, "step": 45684 }, { "epoch": 0.7941212258165447, "grad_norm": 0.9705927969997317, "learning_rate": 1.0710241066961268e-07, "loss": 0.2905, "step": 45685 }, { "epoch": 0.7941386083540475, "grad_norm": 3.960471401737594, "learning_rate": 1.0708500129871329e-07, "loss": 0.3584, "step": 45686 }, { "epoch": 0.7941559908915503, "grad_norm": 1.5339193085244485, "learning_rate": 1.0706759317317376e-07, "loss": 0.145, "step": 45687 }, { "epoch": 0.7941733734290531, "grad_norm": 1.5697180577301426, "learning_rate": 1.0705018629304968e-07, "loss": 0.1948, "step": 45688 }, { "epoch": 0.794190755966556, "grad_norm": 1.4689945441546235, "learning_rate": 1.070327806583961e-07, "loss": 0.2687, "step": 45689 }, { "epoch": 0.7942081385040588, "grad_norm": 0.7828668001064268, "learning_rate": 1.0701537626926788e-07, "loss": 0.1589, "step": 45690 }, { "epoch": 0.7942255210415616, "grad_norm": 1.0952688041186724, "learning_rate": 1.0699797312572045e-07, "loss": 0.3112, "step": 45691 }, { "epoch": 0.7942429035790645, "grad_norm": 1.4051923813081217, "learning_rate": 1.0698057122780896e-07, "loss": 0.2228, "step": 45692 }, { "epoch": 0.7942602861165673, "grad_norm": 1.507232767456659, "learning_rate": 1.0696317057558852e-07, "loss": 0.1768, "step": 45693 }, { "epoch": 0.7942776686540701, "grad_norm": 1.7723103744352393, "learning_rate": 1.0694577116911424e-07, "loss": 0.2059, "step": 45694 }, { "epoch": 0.794295051191573, "grad_norm": 1.31821931512328, "learning_rate": 1.0692837300844138e-07, "loss": 0.1471, "step": 45695 }, { "epoch": 0.7943124337290758, "grad_norm": 1.288745613677171, "learning_rate": 1.0691097609362498e-07, "loss": 0.1467, "step": 45696 }, { "epoch": 0.7943298162665786, "grad_norm": 1.64711402071298, "learning_rate": 1.0689358042472019e-07, "loss": 0.1793, "step": 45697 }, { "epoch": 0.7943471988040814, "grad_norm": 2.2817182771970557, "learning_rate": 1.0687618600178205e-07, "loss": 0.3106, "step": 45698 }, { "epoch": 0.7943645813415843, "grad_norm": 1.3142357730090293, "learning_rate": 1.0685879282486598e-07, "loss": 0.2615, "step": 45699 }, { "epoch": 0.7943819638790871, "grad_norm": 1.7037715617283615, "learning_rate": 1.0684140089402694e-07, "loss": 0.1631, "step": 45700 }, { "epoch": 0.7943993464165899, "grad_norm": 0.8737219493695632, "learning_rate": 1.0682401020932003e-07, "loss": 0.3048, "step": 45701 }, { "epoch": 0.7944167289540928, "grad_norm": 1.5278498449814086, "learning_rate": 1.0680662077080039e-07, "loss": 0.256, "step": 45702 }, { "epoch": 0.7944341114915956, "grad_norm": 1.166742041275871, "learning_rate": 1.0678923257852318e-07, "loss": 0.158, "step": 45703 }, { "epoch": 0.7944514940290983, "grad_norm": 1.980172575626852, "learning_rate": 1.0677184563254343e-07, "loss": 0.1965, "step": 45704 }, { "epoch": 0.7944688765666011, "grad_norm": 1.264253978850725, "learning_rate": 1.0675445993291632e-07, "loss": 0.2106, "step": 45705 }, { "epoch": 0.794486259104104, "grad_norm": 1.7920484084460375, "learning_rate": 1.0673707547969679e-07, "loss": 0.2009, "step": 45706 }, { "epoch": 0.7945036416416068, "grad_norm": 1.2494649682162073, "learning_rate": 1.0671969227294026e-07, "loss": 0.2368, "step": 45707 }, { "epoch": 0.7945210241791096, "grad_norm": 1.477014848583923, "learning_rate": 1.0670231031270172e-07, "loss": 0.1837, "step": 45708 }, { "epoch": 0.7945384067166125, "grad_norm": 1.0391354917932993, "learning_rate": 1.0668492959903607e-07, "loss": 0.1597, "step": 45709 }, { "epoch": 0.7945557892541153, "grad_norm": 1.3050551059375646, "learning_rate": 1.066675501319984e-07, "loss": 0.3872, "step": 45710 }, { "epoch": 0.7945731717916181, "grad_norm": 3.0012913237160923, "learning_rate": 1.0665017191164406e-07, "loss": 0.1938, "step": 45711 }, { "epoch": 0.794590554329121, "grad_norm": 1.1193561689671254, "learning_rate": 1.0663279493802796e-07, "loss": 0.1923, "step": 45712 }, { "epoch": 0.7946079368666238, "grad_norm": 1.2190726080208278, "learning_rate": 1.0661541921120521e-07, "loss": 0.2129, "step": 45713 }, { "epoch": 0.7946253194041266, "grad_norm": 1.2043643662854269, "learning_rate": 1.0659804473123085e-07, "loss": 0.2882, "step": 45714 }, { "epoch": 0.7946427019416294, "grad_norm": 1.2169157925912242, "learning_rate": 1.0658067149816003e-07, "loss": 0.224, "step": 45715 }, { "epoch": 0.7946600844791323, "grad_norm": 1.3987245178489667, "learning_rate": 1.0656329951204773e-07, "loss": 0.2626, "step": 45716 }, { "epoch": 0.7946774670166351, "grad_norm": 1.2495274878186196, "learning_rate": 1.0654592877294905e-07, "loss": 0.1669, "step": 45717 }, { "epoch": 0.7946948495541379, "grad_norm": 1.257503214918974, "learning_rate": 1.0652855928091886e-07, "loss": 0.221, "step": 45718 }, { "epoch": 0.7947122320916408, "grad_norm": 1.2681799833001488, "learning_rate": 1.0651119103601258e-07, "loss": 0.1103, "step": 45719 }, { "epoch": 0.7947296146291436, "grad_norm": 1.1520351121803083, "learning_rate": 1.0649382403828505e-07, "loss": 0.1807, "step": 45720 }, { "epoch": 0.7947469971666464, "grad_norm": 1.1580172469136245, "learning_rate": 1.0647645828779134e-07, "loss": 0.3675, "step": 45721 }, { "epoch": 0.7947643797041493, "grad_norm": 1.0298782804844673, "learning_rate": 1.0645909378458646e-07, "loss": 0.192, "step": 45722 }, { "epoch": 0.7947817622416521, "grad_norm": 1.4879200814831004, "learning_rate": 1.0644173052872552e-07, "loss": 0.154, "step": 45723 }, { "epoch": 0.7947991447791548, "grad_norm": 1.5537925332391431, "learning_rate": 1.0642436852026342e-07, "loss": 0.1898, "step": 45724 }, { "epoch": 0.7948165273166576, "grad_norm": 2.6514973363320116, "learning_rate": 1.0640700775925538e-07, "loss": 0.1887, "step": 45725 }, { "epoch": 0.7948339098541605, "grad_norm": 2.450011441256615, "learning_rate": 1.0638964824575614e-07, "loss": 0.2525, "step": 45726 }, { "epoch": 0.7948512923916633, "grad_norm": 2.3100786241460387, "learning_rate": 1.0637228997982117e-07, "loss": 0.1525, "step": 45727 }, { "epoch": 0.7948686749291661, "grad_norm": 1.1196591383204528, "learning_rate": 1.063549329615051e-07, "loss": 0.2475, "step": 45728 }, { "epoch": 0.794886057466669, "grad_norm": 2.0621339720302565, "learning_rate": 1.0633757719086295e-07, "loss": 0.2483, "step": 45729 }, { "epoch": 0.7949034400041718, "grad_norm": 1.2949783601672549, "learning_rate": 1.0632022266794999e-07, "loss": 0.2253, "step": 45730 }, { "epoch": 0.7949208225416746, "grad_norm": 1.7966952841247923, "learning_rate": 1.0630286939282107e-07, "loss": 0.2036, "step": 45731 }, { "epoch": 0.7949382050791775, "grad_norm": 1.481992574292921, "learning_rate": 1.0628551736553115e-07, "loss": 0.2218, "step": 45732 }, { "epoch": 0.7949555876166803, "grad_norm": 1.7198553025427052, "learning_rate": 1.0626816658613535e-07, "loss": 0.4566, "step": 45733 }, { "epoch": 0.7949729701541831, "grad_norm": 1.45133004273164, "learning_rate": 1.0625081705468858e-07, "loss": 0.1546, "step": 45734 }, { "epoch": 0.7949903526916859, "grad_norm": 1.5776878617833536, "learning_rate": 1.0623346877124584e-07, "loss": 0.2168, "step": 45735 }, { "epoch": 0.7950077352291888, "grad_norm": 0.9735990966179625, "learning_rate": 1.0621612173586209e-07, "loss": 0.2773, "step": 45736 }, { "epoch": 0.7950251177666916, "grad_norm": 1.0047963903417703, "learning_rate": 1.0619877594859228e-07, "loss": 0.1782, "step": 45737 }, { "epoch": 0.7950425003041944, "grad_norm": 0.9451560633450279, "learning_rate": 1.0618143140949155e-07, "loss": 0.1883, "step": 45738 }, { "epoch": 0.7950598828416973, "grad_norm": 2.1460861502130237, "learning_rate": 1.0616408811861477e-07, "loss": 0.2077, "step": 45739 }, { "epoch": 0.7950772653792001, "grad_norm": 0.9565486842477914, "learning_rate": 1.0614674607601703e-07, "loss": 0.1035, "step": 45740 }, { "epoch": 0.7950946479167029, "grad_norm": 0.8536997266352209, "learning_rate": 1.0612940528175296e-07, "loss": 0.1474, "step": 45741 }, { "epoch": 0.7951120304542058, "grad_norm": 1.3918625583439288, "learning_rate": 1.0611206573587783e-07, "loss": 0.2981, "step": 45742 }, { "epoch": 0.7951294129917086, "grad_norm": 1.7233962811199826, "learning_rate": 1.0609472743844655e-07, "loss": 0.1215, "step": 45743 }, { "epoch": 0.7951467955292113, "grad_norm": 1.4122415420173406, "learning_rate": 1.0607739038951407e-07, "loss": 0.1794, "step": 45744 }, { "epoch": 0.7951641780667141, "grad_norm": 1.5724228375511322, "learning_rate": 1.0606005458913514e-07, "loss": 0.2385, "step": 45745 }, { "epoch": 0.795181560604217, "grad_norm": 1.364932546476719, "learning_rate": 1.0604272003736515e-07, "loss": 0.2371, "step": 45746 }, { "epoch": 0.7951989431417198, "grad_norm": 1.5690120241503749, "learning_rate": 1.0602538673425859e-07, "loss": 0.2203, "step": 45747 }, { "epoch": 0.7952163256792226, "grad_norm": 1.1809705170199105, "learning_rate": 1.0600805467987061e-07, "loss": 0.2154, "step": 45748 }, { "epoch": 0.7952337082167255, "grad_norm": 1.1456152883633421, "learning_rate": 1.05990723874256e-07, "loss": 0.264, "step": 45749 }, { "epoch": 0.7952510907542283, "grad_norm": 1.3316443479440845, "learning_rate": 1.0597339431746994e-07, "loss": 0.3067, "step": 45750 }, { "epoch": 0.7952684732917311, "grad_norm": 5.60267445562776, "learning_rate": 1.0595606600956718e-07, "loss": 0.2788, "step": 45751 }, { "epoch": 0.795285855829234, "grad_norm": 2.2549512570264327, "learning_rate": 1.0593873895060268e-07, "loss": 0.2171, "step": 45752 }, { "epoch": 0.7953032383667368, "grad_norm": 0.8499069351225699, "learning_rate": 1.0592141314063141e-07, "loss": 0.1309, "step": 45753 }, { "epoch": 0.7953206209042396, "grad_norm": 1.5400681276928312, "learning_rate": 1.0590408857970823e-07, "loss": 0.1579, "step": 45754 }, { "epoch": 0.7953380034417424, "grad_norm": 0.9118561759141264, "learning_rate": 1.0588676526788798e-07, "loss": 0.1774, "step": 45755 }, { "epoch": 0.7953553859792453, "grad_norm": 1.1064743803518895, "learning_rate": 1.0586944320522573e-07, "loss": 0.1335, "step": 45756 }, { "epoch": 0.7953727685167481, "grad_norm": 1.8470809425275592, "learning_rate": 1.058521223917761e-07, "loss": 0.1814, "step": 45757 }, { "epoch": 0.7953901510542509, "grad_norm": 1.0975747524914639, "learning_rate": 1.0583480282759438e-07, "loss": 0.266, "step": 45758 }, { "epoch": 0.7954075335917538, "grad_norm": 1.0641548859812344, "learning_rate": 1.0581748451273536e-07, "loss": 0.2181, "step": 45759 }, { "epoch": 0.7954249161292566, "grad_norm": 1.4463218414900085, "learning_rate": 1.0580016744725356e-07, "loss": 0.1593, "step": 45760 }, { "epoch": 0.7954422986667594, "grad_norm": 1.2115782551131835, "learning_rate": 1.0578285163120432e-07, "loss": 0.3054, "step": 45761 }, { "epoch": 0.7954596812042622, "grad_norm": 1.3864150238691182, "learning_rate": 1.0576553706464236e-07, "loss": 0.1839, "step": 45762 }, { "epoch": 0.7954770637417651, "grad_norm": 0.788021565688101, "learning_rate": 1.0574822374762254e-07, "loss": 0.129, "step": 45763 }, { "epoch": 0.7954944462792678, "grad_norm": 1.3089185861539288, "learning_rate": 1.057309116801996e-07, "loss": 0.322, "step": 45764 }, { "epoch": 0.7955118288167706, "grad_norm": 1.2822638022249346, "learning_rate": 1.0571360086242881e-07, "loss": 0.2132, "step": 45765 }, { "epoch": 0.7955292113542735, "grad_norm": 2.6976349610706722, "learning_rate": 1.056962912943647e-07, "loss": 0.3167, "step": 45766 }, { "epoch": 0.7955465938917763, "grad_norm": 1.566120547577727, "learning_rate": 1.0567898297606214e-07, "loss": 0.1575, "step": 45767 }, { "epoch": 0.7955639764292791, "grad_norm": 1.7853527567052254, "learning_rate": 1.0566167590757603e-07, "loss": 0.1988, "step": 45768 }, { "epoch": 0.795581358966782, "grad_norm": 1.0920248347996133, "learning_rate": 1.0564437008896132e-07, "loss": 0.2338, "step": 45769 }, { "epoch": 0.7955987415042848, "grad_norm": 2.75221355643935, "learning_rate": 1.0562706552027284e-07, "loss": 0.1516, "step": 45770 }, { "epoch": 0.7956161240417876, "grad_norm": 3.237156756547989, "learning_rate": 1.0560976220156536e-07, "loss": 0.2146, "step": 45771 }, { "epoch": 0.7956335065792904, "grad_norm": 1.2158474025500952, "learning_rate": 1.055924601328938e-07, "loss": 0.2303, "step": 45772 }, { "epoch": 0.7956508891167933, "grad_norm": 3.172419310126676, "learning_rate": 1.0557515931431299e-07, "loss": 0.1647, "step": 45773 }, { "epoch": 0.7956682716542961, "grad_norm": 0.9207492407651596, "learning_rate": 1.0555785974587772e-07, "loss": 0.1433, "step": 45774 }, { "epoch": 0.7956856541917989, "grad_norm": 3.24118324835441, "learning_rate": 1.0554056142764284e-07, "loss": 0.247, "step": 45775 }, { "epoch": 0.7957030367293018, "grad_norm": 1.7695287443691168, "learning_rate": 1.0552326435966302e-07, "loss": 0.1656, "step": 45776 }, { "epoch": 0.7957204192668046, "grad_norm": 2.078298132099459, "learning_rate": 1.0550596854199346e-07, "loss": 0.2014, "step": 45777 }, { "epoch": 0.7957378018043074, "grad_norm": 1.4934798839766066, "learning_rate": 1.054886739746888e-07, "loss": 0.1632, "step": 45778 }, { "epoch": 0.7957551843418103, "grad_norm": 1.3510351125139994, "learning_rate": 1.0547138065780358e-07, "loss": 0.2213, "step": 45779 }, { "epoch": 0.7957725668793131, "grad_norm": 1.3267644944838057, "learning_rate": 1.0545408859139304e-07, "loss": 0.3931, "step": 45780 }, { "epoch": 0.7957899494168159, "grad_norm": 1.7609560296317575, "learning_rate": 1.0543679777551173e-07, "loss": 0.1828, "step": 45781 }, { "epoch": 0.7958073319543187, "grad_norm": 1.6526206855896548, "learning_rate": 1.0541950821021456e-07, "loss": 0.2488, "step": 45782 }, { "epoch": 0.7958247144918215, "grad_norm": 1.2230958644848626, "learning_rate": 1.0540221989555631e-07, "loss": 0.3251, "step": 45783 }, { "epoch": 0.7958420970293243, "grad_norm": 1.8092354673998776, "learning_rate": 1.0538493283159173e-07, "loss": 0.3041, "step": 45784 }, { "epoch": 0.7958594795668271, "grad_norm": 0.9029702698155527, "learning_rate": 1.0536764701837564e-07, "loss": 0.3494, "step": 45785 }, { "epoch": 0.79587686210433, "grad_norm": 0.587898914388804, "learning_rate": 1.0535036245596285e-07, "loss": 0.1118, "step": 45786 }, { "epoch": 0.7958942446418328, "grad_norm": 2.1762011085942716, "learning_rate": 1.0533307914440798e-07, "loss": 0.2656, "step": 45787 }, { "epoch": 0.7959116271793356, "grad_norm": 2.7303175596752802, "learning_rate": 1.0531579708376609e-07, "loss": 0.2362, "step": 45788 }, { "epoch": 0.7959290097168384, "grad_norm": 1.7761471927902164, "learning_rate": 1.0529851627409181e-07, "loss": 0.1926, "step": 45789 }, { "epoch": 0.7959463922543413, "grad_norm": 0.8843525790534165, "learning_rate": 1.0528123671544004e-07, "loss": 0.1908, "step": 45790 }, { "epoch": 0.7959637747918441, "grad_norm": 1.7586033745795608, "learning_rate": 1.0526395840786517e-07, "loss": 0.1795, "step": 45791 }, { "epoch": 0.7959811573293469, "grad_norm": 1.8714323798416748, "learning_rate": 1.052466813514224e-07, "loss": 0.1097, "step": 45792 }, { "epoch": 0.7959985398668498, "grad_norm": 1.3103269648979232, "learning_rate": 1.0522940554616627e-07, "loss": 0.2288, "step": 45793 }, { "epoch": 0.7960159224043526, "grad_norm": 1.5134720668135975, "learning_rate": 1.0521213099215159e-07, "loss": 0.1551, "step": 45794 }, { "epoch": 0.7960333049418554, "grad_norm": 1.2567627337504657, "learning_rate": 1.0519485768943299e-07, "loss": 0.245, "step": 45795 }, { "epoch": 0.7960506874793583, "grad_norm": 1.1201276550658492, "learning_rate": 1.0517758563806561e-07, "loss": 0.1294, "step": 45796 }, { "epoch": 0.7960680700168611, "grad_norm": 1.9721148666969088, "learning_rate": 1.0516031483810377e-07, "loss": 0.3387, "step": 45797 }, { "epoch": 0.7960854525543639, "grad_norm": 0.9557550116923582, "learning_rate": 1.0514304528960233e-07, "loss": 0.1749, "step": 45798 }, { "epoch": 0.7961028350918667, "grad_norm": 1.0919652257996824, "learning_rate": 1.0512577699261593e-07, "loss": 0.1557, "step": 45799 }, { "epoch": 0.7961202176293696, "grad_norm": 2.2722670165076972, "learning_rate": 1.0510850994719961e-07, "loss": 0.2698, "step": 45800 }, { "epoch": 0.7961376001668724, "grad_norm": 1.2700156617528688, "learning_rate": 1.0509124415340787e-07, "loss": 0.292, "step": 45801 }, { "epoch": 0.7961549827043752, "grad_norm": 2.0229414183791543, "learning_rate": 1.050739796112955e-07, "loss": 0.2598, "step": 45802 }, { "epoch": 0.796172365241878, "grad_norm": 1.3755459272074293, "learning_rate": 1.0505671632091717e-07, "loss": 0.258, "step": 45803 }, { "epoch": 0.7961897477793808, "grad_norm": 1.4378499917641792, "learning_rate": 1.0503945428232763e-07, "loss": 0.182, "step": 45804 }, { "epoch": 0.7962071303168836, "grad_norm": 1.4796470590581519, "learning_rate": 1.0502219349558161e-07, "loss": 0.2695, "step": 45805 }, { "epoch": 0.7962245128543864, "grad_norm": 0.8749008211574328, "learning_rate": 1.050049339607338e-07, "loss": 0.1729, "step": 45806 }, { "epoch": 0.7962418953918893, "grad_norm": 2.0673813951766333, "learning_rate": 1.0498767567783878e-07, "loss": 0.2857, "step": 45807 }, { "epoch": 0.7962592779293921, "grad_norm": 1.5470320288329058, "learning_rate": 1.049704186469515e-07, "loss": 0.2325, "step": 45808 }, { "epoch": 0.7962766604668949, "grad_norm": 1.358966689498647, "learning_rate": 1.0495316286812661e-07, "loss": 0.2085, "step": 45809 }, { "epoch": 0.7962940430043978, "grad_norm": 2.2486543438952116, "learning_rate": 1.0493590834141852e-07, "loss": 0.4486, "step": 45810 }, { "epoch": 0.7963114255419006, "grad_norm": 0.920981213709975, "learning_rate": 1.0491865506688224e-07, "loss": 0.1859, "step": 45811 }, { "epoch": 0.7963288080794034, "grad_norm": 1.5073486903707272, "learning_rate": 1.0490140304457234e-07, "loss": 0.1625, "step": 45812 }, { "epoch": 0.7963461906169063, "grad_norm": 1.1467027515267814, "learning_rate": 1.048841522745435e-07, "loss": 0.2135, "step": 45813 }, { "epoch": 0.7963635731544091, "grad_norm": 4.011145674439456, "learning_rate": 1.0486690275685039e-07, "loss": 0.2369, "step": 45814 }, { "epoch": 0.7963809556919119, "grad_norm": 1.376322216100689, "learning_rate": 1.048496544915477e-07, "loss": 0.2268, "step": 45815 }, { "epoch": 0.7963983382294147, "grad_norm": 0.7869069704480732, "learning_rate": 1.0483240747869005e-07, "loss": 0.1943, "step": 45816 }, { "epoch": 0.7964157207669176, "grad_norm": 1.345012546997883, "learning_rate": 1.0481516171833221e-07, "loss": 0.2043, "step": 45817 }, { "epoch": 0.7964331033044204, "grad_norm": 1.174554876147363, "learning_rate": 1.0479791721052856e-07, "loss": 0.2965, "step": 45818 }, { "epoch": 0.7964504858419232, "grad_norm": 1.7128899656583458, "learning_rate": 1.0478067395533419e-07, "loss": 0.2391, "step": 45819 }, { "epoch": 0.7964678683794261, "grad_norm": 1.140508818962914, "learning_rate": 1.0476343195280346e-07, "loss": 0.3171, "step": 45820 }, { "epoch": 0.7964852509169289, "grad_norm": 1.4482315335828546, "learning_rate": 1.0474619120299111e-07, "loss": 0.1853, "step": 45821 }, { "epoch": 0.7965026334544317, "grad_norm": 1.124216212906737, "learning_rate": 1.047289517059518e-07, "loss": 0.1358, "step": 45822 }, { "epoch": 0.7965200159919344, "grad_norm": 2.2161195968320953, "learning_rate": 1.0471171346174013e-07, "loss": 0.2333, "step": 45823 }, { "epoch": 0.7965373985294373, "grad_norm": 2.437356935564819, "learning_rate": 1.0469447647041074e-07, "loss": 0.253, "step": 45824 }, { "epoch": 0.7965547810669401, "grad_norm": 1.243187005701563, "learning_rate": 1.0467724073201828e-07, "loss": 0.3428, "step": 45825 }, { "epoch": 0.7965721636044429, "grad_norm": 0.8959243749127319, "learning_rate": 1.0466000624661725e-07, "loss": 0.1287, "step": 45826 }, { "epoch": 0.7965895461419458, "grad_norm": 1.888150360928276, "learning_rate": 1.046427730142625e-07, "loss": 0.2747, "step": 45827 }, { "epoch": 0.7966069286794486, "grad_norm": 1.1435297983724506, "learning_rate": 1.0462554103500871e-07, "loss": 0.2909, "step": 45828 }, { "epoch": 0.7966243112169514, "grad_norm": 1.4788410517544037, "learning_rate": 1.0460831030891016e-07, "loss": 0.1849, "step": 45829 }, { "epoch": 0.7966416937544543, "grad_norm": 1.8171637435763992, "learning_rate": 1.0459108083602153e-07, "loss": 0.1957, "step": 45830 }, { "epoch": 0.7966590762919571, "grad_norm": 1.5189266879748027, "learning_rate": 1.0457385261639767e-07, "loss": 0.1933, "step": 45831 }, { "epoch": 0.7966764588294599, "grad_norm": 2.1434820782614388, "learning_rate": 1.0455662565009304e-07, "loss": 0.1605, "step": 45832 }, { "epoch": 0.7966938413669628, "grad_norm": 6.201985003998171, "learning_rate": 1.0453939993716227e-07, "loss": 0.2419, "step": 45833 }, { "epoch": 0.7967112239044656, "grad_norm": 0.9113161732793934, "learning_rate": 1.0452217547765996e-07, "loss": 0.1446, "step": 45834 }, { "epoch": 0.7967286064419684, "grad_norm": 1.587322615281891, "learning_rate": 1.0450495227164064e-07, "loss": 0.3253, "step": 45835 }, { "epoch": 0.7967459889794712, "grad_norm": 1.0275580700878642, "learning_rate": 1.0448773031915897e-07, "loss": 0.1172, "step": 45836 }, { "epoch": 0.7967633715169741, "grad_norm": 2.201327927186213, "learning_rate": 1.0447050962026954e-07, "loss": 0.2823, "step": 45837 }, { "epoch": 0.7967807540544769, "grad_norm": 1.5844140030739342, "learning_rate": 1.0445329017502674e-07, "loss": 0.2193, "step": 45838 }, { "epoch": 0.7967981365919797, "grad_norm": 1.5046313661216384, "learning_rate": 1.0443607198348542e-07, "loss": 0.2631, "step": 45839 }, { "epoch": 0.7968155191294826, "grad_norm": 1.0825440291002266, "learning_rate": 1.0441885504570008e-07, "loss": 0.1618, "step": 45840 }, { "epoch": 0.7968329016669854, "grad_norm": 1.4578157586889309, "learning_rate": 1.0440163936172525e-07, "loss": 0.3043, "step": 45841 }, { "epoch": 0.7968502842044882, "grad_norm": 2.8878896153581524, "learning_rate": 1.0438442493161548e-07, "loss": 0.1887, "step": 45842 }, { "epoch": 0.7968676667419909, "grad_norm": 2.709227119847782, "learning_rate": 1.043672117554254e-07, "loss": 0.1656, "step": 45843 }, { "epoch": 0.7968850492794938, "grad_norm": 2.145292004467188, "learning_rate": 1.0434999983320946e-07, "loss": 0.3015, "step": 45844 }, { "epoch": 0.7969024318169966, "grad_norm": 1.7118476660233675, "learning_rate": 1.0433278916502231e-07, "loss": 0.338, "step": 45845 }, { "epoch": 0.7969198143544994, "grad_norm": 1.4857970561475262, "learning_rate": 1.0431557975091832e-07, "loss": 0.2065, "step": 45846 }, { "epoch": 0.7969371968920023, "grad_norm": 1.5133311896585635, "learning_rate": 1.0429837159095245e-07, "loss": 0.1595, "step": 45847 }, { "epoch": 0.7969545794295051, "grad_norm": 1.3514283885802918, "learning_rate": 1.0428116468517884e-07, "loss": 0.1954, "step": 45848 }, { "epoch": 0.7969719619670079, "grad_norm": 1.5277863337448785, "learning_rate": 1.0426395903365203e-07, "loss": 0.2567, "step": 45849 }, { "epoch": 0.7969893445045108, "grad_norm": 0.9397637499860638, "learning_rate": 1.042467546364268e-07, "loss": 0.2735, "step": 45850 }, { "epoch": 0.7970067270420136, "grad_norm": 4.833692153598085, "learning_rate": 1.0422955149355755e-07, "loss": 0.4257, "step": 45851 }, { "epoch": 0.7970241095795164, "grad_norm": 2.3143915251157026, "learning_rate": 1.0421234960509889e-07, "loss": 0.154, "step": 45852 }, { "epoch": 0.7970414921170192, "grad_norm": 2.038723051603554, "learning_rate": 1.041951489711052e-07, "loss": 0.2465, "step": 45853 }, { "epoch": 0.7970588746545221, "grad_norm": 0.714914652442269, "learning_rate": 1.0417794959163112e-07, "loss": 0.1417, "step": 45854 }, { "epoch": 0.7970762571920249, "grad_norm": 0.7886029888451972, "learning_rate": 1.0416075146673109e-07, "loss": 0.2814, "step": 45855 }, { "epoch": 0.7970936397295277, "grad_norm": 1.1145781702999988, "learning_rate": 1.0414355459645963e-07, "loss": 0.2844, "step": 45856 }, { "epoch": 0.7971110222670306, "grad_norm": 1.4939106402227293, "learning_rate": 1.0412635898087113e-07, "loss": 0.1936, "step": 45857 }, { "epoch": 0.7971284048045334, "grad_norm": 0.6768470133412282, "learning_rate": 1.0410916462002034e-07, "loss": 0.1217, "step": 45858 }, { "epoch": 0.7971457873420362, "grad_norm": 1.7137415495360124, "learning_rate": 1.040919715139617e-07, "loss": 0.173, "step": 45859 }, { "epoch": 0.7971631698795391, "grad_norm": 1.3958050429700133, "learning_rate": 1.0407477966274958e-07, "loss": 0.2198, "step": 45860 }, { "epoch": 0.7971805524170419, "grad_norm": 1.3459047454189517, "learning_rate": 1.0405758906643857e-07, "loss": 0.1776, "step": 45861 }, { "epoch": 0.7971979349545447, "grad_norm": 1.3750589772920268, "learning_rate": 1.0404039972508317e-07, "loss": 0.3415, "step": 45862 }, { "epoch": 0.7972153174920474, "grad_norm": 1.206702575029965, "learning_rate": 1.0402321163873773e-07, "loss": 0.4351, "step": 45863 }, { "epoch": 0.7972327000295503, "grad_norm": 1.205139279185541, "learning_rate": 1.0400602480745685e-07, "loss": 0.1729, "step": 45864 }, { "epoch": 0.7972500825670531, "grad_norm": 1.384475050128957, "learning_rate": 1.0398883923129482e-07, "loss": 0.2006, "step": 45865 }, { "epoch": 0.7972674651045559, "grad_norm": 0.940591129209909, "learning_rate": 1.0397165491030652e-07, "loss": 0.1582, "step": 45866 }, { "epoch": 0.7972848476420588, "grad_norm": 1.574391378801912, "learning_rate": 1.0395447184454603e-07, "loss": 0.2977, "step": 45867 }, { "epoch": 0.7973022301795616, "grad_norm": 1.4867279247287533, "learning_rate": 1.0393729003406782e-07, "loss": 0.2656, "step": 45868 }, { "epoch": 0.7973196127170644, "grad_norm": 2.0681942827927835, "learning_rate": 1.039201094789266e-07, "loss": 0.2633, "step": 45869 }, { "epoch": 0.7973369952545672, "grad_norm": 1.3017233985386165, "learning_rate": 1.0390293017917668e-07, "loss": 0.1856, "step": 45870 }, { "epoch": 0.7973543777920701, "grad_norm": 1.0897792124086394, "learning_rate": 1.038857521348725e-07, "loss": 0.1771, "step": 45871 }, { "epoch": 0.7973717603295729, "grad_norm": 1.9629291273318386, "learning_rate": 1.0386857534606851e-07, "loss": 0.3868, "step": 45872 }, { "epoch": 0.7973891428670757, "grad_norm": 1.4612455275215956, "learning_rate": 1.0385139981281921e-07, "loss": 0.2485, "step": 45873 }, { "epoch": 0.7974065254045786, "grad_norm": 2.243298293645263, "learning_rate": 1.0383422553517896e-07, "loss": 0.1858, "step": 45874 }, { "epoch": 0.7974239079420814, "grad_norm": 1.396028904559211, "learning_rate": 1.0381705251320228e-07, "loss": 0.2132, "step": 45875 }, { "epoch": 0.7974412904795842, "grad_norm": 2.820088200360949, "learning_rate": 1.0379988074694334e-07, "loss": 0.3296, "step": 45876 }, { "epoch": 0.7974586730170871, "grad_norm": 1.3824921112302888, "learning_rate": 1.0378271023645702e-07, "loss": 0.2513, "step": 45877 }, { "epoch": 0.7974760555545899, "grad_norm": 1.4309071385957686, "learning_rate": 1.037655409817974e-07, "loss": 0.2073, "step": 45878 }, { "epoch": 0.7974934380920927, "grad_norm": 1.1429218472971494, "learning_rate": 1.037483729830192e-07, "loss": 0.229, "step": 45879 }, { "epoch": 0.7975108206295956, "grad_norm": 1.725852357447761, "learning_rate": 1.0373120624017633e-07, "loss": 0.2479, "step": 45880 }, { "epoch": 0.7975282031670984, "grad_norm": 1.9225886015962712, "learning_rate": 1.0371404075332368e-07, "loss": 0.1779, "step": 45881 }, { "epoch": 0.7975455857046012, "grad_norm": 1.6734903736025342, "learning_rate": 1.0369687652251541e-07, "loss": 0.2278, "step": 45882 }, { "epoch": 0.7975629682421039, "grad_norm": 1.6053934823233327, "learning_rate": 1.0367971354780608e-07, "loss": 0.2079, "step": 45883 }, { "epoch": 0.7975803507796068, "grad_norm": 2.179101509511406, "learning_rate": 1.0366255182924983e-07, "loss": 0.1883, "step": 45884 }, { "epoch": 0.7975977333171096, "grad_norm": 2.3638248865001996, "learning_rate": 1.0364539136690148e-07, "loss": 0.3512, "step": 45885 }, { "epoch": 0.7976151158546124, "grad_norm": 1.212342981626278, "learning_rate": 1.0362823216081507e-07, "loss": 0.2437, "step": 45886 }, { "epoch": 0.7976324983921153, "grad_norm": 1.1052124038665492, "learning_rate": 1.0361107421104504e-07, "loss": 0.1658, "step": 45887 }, { "epoch": 0.7976498809296181, "grad_norm": 3.0313563526713403, "learning_rate": 1.0359391751764573e-07, "loss": 0.2343, "step": 45888 }, { "epoch": 0.7976672634671209, "grad_norm": 1.4210371686877252, "learning_rate": 1.0357676208067173e-07, "loss": 0.1978, "step": 45889 }, { "epoch": 0.7976846460046237, "grad_norm": 1.8363365231000524, "learning_rate": 1.0355960790017726e-07, "loss": 0.2402, "step": 45890 }, { "epoch": 0.7977020285421266, "grad_norm": 1.306672231000045, "learning_rate": 1.0354245497621678e-07, "loss": 0.1866, "step": 45891 }, { "epoch": 0.7977194110796294, "grad_norm": 3.9832921266717674, "learning_rate": 1.0352530330884451e-07, "loss": 0.2893, "step": 45892 }, { "epoch": 0.7977367936171322, "grad_norm": 1.4149494108948106, "learning_rate": 1.0350815289811499e-07, "loss": 0.1866, "step": 45893 }, { "epoch": 0.7977541761546351, "grad_norm": 0.9592096939194699, "learning_rate": 1.0349100374408248e-07, "loss": 0.1987, "step": 45894 }, { "epoch": 0.7977715586921379, "grad_norm": 1.7811851671348147, "learning_rate": 1.0347385584680129e-07, "loss": 0.2285, "step": 45895 }, { "epoch": 0.7977889412296407, "grad_norm": 1.2094906797150227, "learning_rate": 1.0345670920632577e-07, "loss": 0.2823, "step": 45896 }, { "epoch": 0.7978063237671436, "grad_norm": 1.7468418005600839, "learning_rate": 1.034395638227104e-07, "loss": 0.2051, "step": 45897 }, { "epoch": 0.7978237063046464, "grad_norm": 2.4440065598174616, "learning_rate": 1.0342241969600962e-07, "loss": 0.2495, "step": 45898 }, { "epoch": 0.7978410888421492, "grad_norm": 0.882677157956987, "learning_rate": 1.034052768262773e-07, "loss": 0.2554, "step": 45899 }, { "epoch": 0.797858471379652, "grad_norm": 1.6752752323623457, "learning_rate": 1.033881352135682e-07, "loss": 0.248, "step": 45900 }, { "epoch": 0.7978758539171549, "grad_norm": 1.0130369223387727, "learning_rate": 1.0337099485793655e-07, "loss": 0.2508, "step": 45901 }, { "epoch": 0.7978932364546577, "grad_norm": 1.642435333993978, "learning_rate": 1.0335385575943667e-07, "loss": 0.2028, "step": 45902 }, { "epoch": 0.7979106189921604, "grad_norm": 0.9345346494240794, "learning_rate": 1.0333671791812282e-07, "loss": 0.237, "step": 45903 }, { "epoch": 0.7979280015296633, "grad_norm": 1.6493154375295018, "learning_rate": 1.0331958133404939e-07, "loss": 0.1616, "step": 45904 }, { "epoch": 0.7979453840671661, "grad_norm": 0.9230446921967143, "learning_rate": 1.0330244600727061e-07, "loss": 0.2696, "step": 45905 }, { "epoch": 0.7979627666046689, "grad_norm": 2.456347297068275, "learning_rate": 1.032853119378409e-07, "loss": 0.2304, "step": 45906 }, { "epoch": 0.7979801491421717, "grad_norm": 1.0649004338762103, "learning_rate": 1.0326817912581437e-07, "loss": 0.3287, "step": 45907 }, { "epoch": 0.7979975316796746, "grad_norm": 1.0591478844368647, "learning_rate": 1.0325104757124559e-07, "loss": 0.338, "step": 45908 }, { "epoch": 0.7980149142171774, "grad_norm": 1.3947669206139628, "learning_rate": 1.0323391727418879e-07, "loss": 0.242, "step": 45909 }, { "epoch": 0.7980322967546802, "grad_norm": 1.2339426871366044, "learning_rate": 1.0321678823469831e-07, "loss": 0.2848, "step": 45910 }, { "epoch": 0.7980496792921831, "grad_norm": 1.3099987622574714, "learning_rate": 1.0319966045282807e-07, "loss": 0.1869, "step": 45911 }, { "epoch": 0.7980670618296859, "grad_norm": 0.949873415567708, "learning_rate": 1.0318253392863274e-07, "loss": 0.2519, "step": 45912 }, { "epoch": 0.7980844443671887, "grad_norm": 0.9198977391597161, "learning_rate": 1.0316540866216656e-07, "loss": 0.236, "step": 45913 }, { "epoch": 0.7981018269046916, "grad_norm": 1.0213778004105625, "learning_rate": 1.0314828465348368e-07, "loss": 0.2925, "step": 45914 }, { "epoch": 0.7981192094421944, "grad_norm": 1.071635269145951, "learning_rate": 1.0313116190263832e-07, "loss": 0.2072, "step": 45915 }, { "epoch": 0.7981365919796972, "grad_norm": 1.0923701574124502, "learning_rate": 1.0311404040968519e-07, "loss": 0.1849, "step": 45916 }, { "epoch": 0.7981539745172, "grad_norm": 1.2349614008740684, "learning_rate": 1.0309692017467802e-07, "loss": 0.3065, "step": 45917 }, { "epoch": 0.7981713570547029, "grad_norm": 1.2533595154762178, "learning_rate": 1.0307980119767135e-07, "loss": 0.1826, "step": 45918 }, { "epoch": 0.7981887395922057, "grad_norm": 1.8952161123857403, "learning_rate": 1.0306268347871927e-07, "loss": 0.3585, "step": 45919 }, { "epoch": 0.7982061221297085, "grad_norm": 1.3915284473262706, "learning_rate": 1.030455670178762e-07, "loss": 0.32, "step": 45920 }, { "epoch": 0.7982235046672114, "grad_norm": 2.0394146937368873, "learning_rate": 1.0302845181519638e-07, "loss": 0.219, "step": 45921 }, { "epoch": 0.7982408872047141, "grad_norm": 1.8179469449927355, "learning_rate": 1.0301133787073401e-07, "loss": 0.3512, "step": 45922 }, { "epoch": 0.7982582697422169, "grad_norm": 1.394522707172477, "learning_rate": 1.0299422518454337e-07, "loss": 0.2268, "step": 45923 }, { "epoch": 0.7982756522797197, "grad_norm": 1.9445550801534266, "learning_rate": 1.0297711375667862e-07, "loss": 0.2614, "step": 45924 }, { "epoch": 0.7982930348172226, "grad_norm": 1.4358350220600726, "learning_rate": 1.0296000358719404e-07, "loss": 0.3608, "step": 45925 }, { "epoch": 0.7983104173547254, "grad_norm": 1.6914715591640186, "learning_rate": 1.0294289467614391e-07, "loss": 0.2855, "step": 45926 }, { "epoch": 0.7983277998922282, "grad_norm": 1.5747872028939345, "learning_rate": 1.0292578702358223e-07, "loss": 0.1769, "step": 45927 }, { "epoch": 0.7983451824297311, "grad_norm": 1.5448659826182516, "learning_rate": 1.0290868062956354e-07, "loss": 0.2788, "step": 45928 }, { "epoch": 0.7983625649672339, "grad_norm": 1.6869277938884353, "learning_rate": 1.0289157549414207e-07, "loss": 0.1751, "step": 45929 }, { "epoch": 0.7983799475047367, "grad_norm": 1.4733120018473926, "learning_rate": 1.0287447161737168e-07, "loss": 0.1656, "step": 45930 }, { "epoch": 0.7983973300422396, "grad_norm": 2.8894357211689736, "learning_rate": 1.0285736899930686e-07, "loss": 0.2262, "step": 45931 }, { "epoch": 0.7984147125797424, "grad_norm": 6.009626525557517, "learning_rate": 1.0284026764000175e-07, "loss": 0.1995, "step": 45932 }, { "epoch": 0.7984320951172452, "grad_norm": 1.9398732767699491, "learning_rate": 1.0282316753951053e-07, "loss": 0.3491, "step": 45933 }, { "epoch": 0.798449477654748, "grad_norm": 1.2949221118533767, "learning_rate": 1.0280606869788743e-07, "loss": 0.1968, "step": 45934 }, { "epoch": 0.7984668601922509, "grad_norm": 2.2177452941922535, "learning_rate": 1.027889711151867e-07, "loss": 0.1765, "step": 45935 }, { "epoch": 0.7984842427297537, "grad_norm": 1.4219648125450748, "learning_rate": 1.0277187479146238e-07, "loss": 0.147, "step": 45936 }, { "epoch": 0.7985016252672565, "grad_norm": 0.7338779322154998, "learning_rate": 1.0275477972676883e-07, "loss": 0.3587, "step": 45937 }, { "epoch": 0.7985190078047594, "grad_norm": 1.7122652809760455, "learning_rate": 1.0273768592115995e-07, "loss": 0.2287, "step": 45938 }, { "epoch": 0.7985363903422622, "grad_norm": 1.6793101194000837, "learning_rate": 1.027205933746903e-07, "loss": 0.206, "step": 45939 }, { "epoch": 0.798553772879765, "grad_norm": 1.1504178337716424, "learning_rate": 1.0270350208741385e-07, "loss": 0.1611, "step": 45940 }, { "epoch": 0.7985711554172679, "grad_norm": 1.1342969455270242, "learning_rate": 1.0268641205938477e-07, "loss": 0.1973, "step": 45941 }, { "epoch": 0.7985885379547706, "grad_norm": 1.20557556811577, "learning_rate": 1.0266932329065725e-07, "loss": 0.1579, "step": 45942 }, { "epoch": 0.7986059204922734, "grad_norm": 1.8569205185344415, "learning_rate": 1.0265223578128551e-07, "loss": 0.2289, "step": 45943 }, { "epoch": 0.7986233030297762, "grad_norm": 2.0606012726486824, "learning_rate": 1.0263514953132363e-07, "loss": 0.2137, "step": 45944 }, { "epoch": 0.7986406855672791, "grad_norm": 1.5653723969636373, "learning_rate": 1.0261806454082578e-07, "loss": 0.1452, "step": 45945 }, { "epoch": 0.7986580681047819, "grad_norm": 1.2674782882362579, "learning_rate": 1.0260098080984603e-07, "loss": 0.1995, "step": 45946 }, { "epoch": 0.7986754506422847, "grad_norm": 1.4807291376646836, "learning_rate": 1.0258389833843873e-07, "loss": 0.2354, "step": 45947 }, { "epoch": 0.7986928331797876, "grad_norm": 1.4676778615534936, "learning_rate": 1.02566817126658e-07, "loss": 0.1306, "step": 45948 }, { "epoch": 0.7987102157172904, "grad_norm": 1.6039606283799661, "learning_rate": 1.0254973717455768e-07, "loss": 0.1592, "step": 45949 }, { "epoch": 0.7987275982547932, "grad_norm": 1.3726156911255702, "learning_rate": 1.0253265848219222e-07, "loss": 0.2058, "step": 45950 }, { "epoch": 0.798744980792296, "grad_norm": 1.9140418784894646, "learning_rate": 1.0251558104961571e-07, "loss": 0.1453, "step": 45951 }, { "epoch": 0.7987623633297989, "grad_norm": 1.2435799101981153, "learning_rate": 1.0249850487688216e-07, "loss": 0.1381, "step": 45952 }, { "epoch": 0.7987797458673017, "grad_norm": 1.9265106519132107, "learning_rate": 1.024814299640458e-07, "loss": 0.2084, "step": 45953 }, { "epoch": 0.7987971284048045, "grad_norm": 1.212255090919148, "learning_rate": 1.0246435631116063e-07, "loss": 0.2321, "step": 45954 }, { "epoch": 0.7988145109423074, "grad_norm": 1.300891041364936, "learning_rate": 1.024472839182809e-07, "loss": 0.1416, "step": 45955 }, { "epoch": 0.7988318934798102, "grad_norm": 1.3241454223916653, "learning_rate": 1.0243021278546065e-07, "loss": 0.1544, "step": 45956 }, { "epoch": 0.798849276017313, "grad_norm": 1.4696276530683683, "learning_rate": 1.0241314291275383e-07, "loss": 0.1389, "step": 45957 }, { "epoch": 0.7988666585548159, "grad_norm": 1.852152643637852, "learning_rate": 1.0239607430021491e-07, "loss": 0.167, "step": 45958 }, { "epoch": 0.7988840410923187, "grad_norm": 1.5344840293659874, "learning_rate": 1.0237900694789775e-07, "loss": 0.1825, "step": 45959 }, { "epoch": 0.7989014236298215, "grad_norm": 0.9636883062220144, "learning_rate": 1.0236194085585647e-07, "loss": 0.1009, "step": 45960 }, { "epoch": 0.7989188061673244, "grad_norm": 1.5178260210963186, "learning_rate": 1.0234487602414521e-07, "loss": 0.188, "step": 45961 }, { "epoch": 0.7989361887048271, "grad_norm": 3.8270263366704684, "learning_rate": 1.0232781245281796e-07, "loss": 0.1656, "step": 45962 }, { "epoch": 0.7989535712423299, "grad_norm": 0.8444523436118202, "learning_rate": 1.0231075014192897e-07, "loss": 0.1265, "step": 45963 }, { "epoch": 0.7989709537798327, "grad_norm": 2.2282380527964505, "learning_rate": 1.0229368909153213e-07, "loss": 0.1337, "step": 45964 }, { "epoch": 0.7989883363173356, "grad_norm": 1.0061271836741899, "learning_rate": 1.0227662930168152e-07, "loss": 0.1162, "step": 45965 }, { "epoch": 0.7990057188548384, "grad_norm": 1.5423234501664485, "learning_rate": 1.0225957077243141e-07, "loss": 0.1742, "step": 45966 }, { "epoch": 0.7990231013923412, "grad_norm": 1.4659710138997686, "learning_rate": 1.0224251350383589e-07, "loss": 0.1303, "step": 45967 }, { "epoch": 0.7990404839298441, "grad_norm": 2.2732955019519787, "learning_rate": 1.0222545749594874e-07, "loss": 0.1999, "step": 45968 }, { "epoch": 0.7990578664673469, "grad_norm": 1.060023608768742, "learning_rate": 1.0220840274882408e-07, "loss": 0.1258, "step": 45969 }, { "epoch": 0.7990752490048497, "grad_norm": 1.4718925266873022, "learning_rate": 1.0219134926251616e-07, "loss": 0.1247, "step": 45970 }, { "epoch": 0.7990926315423525, "grad_norm": 1.0493239811409907, "learning_rate": 1.0217429703707891e-07, "loss": 0.115, "step": 45971 }, { "epoch": 0.7991100140798554, "grad_norm": 0.960668423389998, "learning_rate": 1.0215724607256642e-07, "loss": 0.1225, "step": 45972 }, { "epoch": 0.7991273966173582, "grad_norm": 0.9649201344623147, "learning_rate": 1.0214019636903265e-07, "loss": 0.1305, "step": 45973 }, { "epoch": 0.799144779154861, "grad_norm": 1.0918981776532501, "learning_rate": 1.0212314792653177e-07, "loss": 0.1071, "step": 45974 }, { "epoch": 0.7991621616923639, "grad_norm": 1.1115078691214142, "learning_rate": 1.0210610074511772e-07, "loss": 0.232, "step": 45975 }, { "epoch": 0.7991795442298667, "grad_norm": 1.4755389190868393, "learning_rate": 1.0208905482484453e-07, "loss": 0.1249, "step": 45976 }, { "epoch": 0.7991969267673695, "grad_norm": 6.064498190504647, "learning_rate": 1.0207201016576611e-07, "loss": 0.1373, "step": 45977 }, { "epoch": 0.7992143093048724, "grad_norm": 1.3222483326497136, "learning_rate": 1.0205496676793679e-07, "loss": 0.0901, "step": 45978 }, { "epoch": 0.7992316918423752, "grad_norm": 2.694306902071439, "learning_rate": 1.0203792463141037e-07, "loss": 0.1421, "step": 45979 }, { "epoch": 0.799249074379878, "grad_norm": 1.2093485690452024, "learning_rate": 1.0202088375624096e-07, "loss": 0.122, "step": 45980 }, { "epoch": 0.7992664569173809, "grad_norm": 1.585077318598726, "learning_rate": 1.0200384414248253e-07, "loss": 0.1746, "step": 45981 }, { "epoch": 0.7992838394548836, "grad_norm": 1.8841912446860418, "learning_rate": 1.0198680579018909e-07, "loss": 0.1629, "step": 45982 }, { "epoch": 0.7993012219923864, "grad_norm": 0.9547229002751574, "learning_rate": 1.0196976869941465e-07, "loss": 0.1386, "step": 45983 }, { "epoch": 0.7993186045298892, "grad_norm": 1.0235501479202453, "learning_rate": 1.0195273287021322e-07, "loss": 0.1171, "step": 45984 }, { "epoch": 0.7993359870673921, "grad_norm": 0.9120291699696668, "learning_rate": 1.019356983026386e-07, "loss": 0.1308, "step": 45985 }, { "epoch": 0.7993533696048949, "grad_norm": 2.677520351048679, "learning_rate": 1.0191866499674528e-07, "loss": 0.1418, "step": 45986 }, { "epoch": 0.7993707521423977, "grad_norm": 1.8218241024192547, "learning_rate": 1.0190163295258675e-07, "loss": 0.1394, "step": 45987 }, { "epoch": 0.7993881346799006, "grad_norm": 1.7012890185150993, "learning_rate": 1.0188460217021706e-07, "loss": 0.1718, "step": 45988 }, { "epoch": 0.7994055172174034, "grad_norm": 1.735731857901386, "learning_rate": 1.0186757264969042e-07, "loss": 0.1776, "step": 45989 }, { "epoch": 0.7994228997549062, "grad_norm": 0.8257701869845352, "learning_rate": 1.0185054439106072e-07, "loss": 0.0855, "step": 45990 }, { "epoch": 0.799440282292409, "grad_norm": 1.349014022044478, "learning_rate": 1.0183351739438184e-07, "loss": 0.1526, "step": 45991 }, { "epoch": 0.7994576648299119, "grad_norm": 1.511427407819454, "learning_rate": 1.0181649165970785e-07, "loss": 0.1484, "step": 45992 }, { "epoch": 0.7994750473674147, "grad_norm": 2.239675119278396, "learning_rate": 1.017994671870927e-07, "loss": 0.1261, "step": 45993 }, { "epoch": 0.7994924299049175, "grad_norm": 0.8817584781634863, "learning_rate": 1.0178244397659025e-07, "loss": 0.1724, "step": 45994 }, { "epoch": 0.7995098124424204, "grad_norm": 2.6978111986300406, "learning_rate": 1.0176542202825455e-07, "loss": 0.1833, "step": 45995 }, { "epoch": 0.7995271949799232, "grad_norm": 1.741236732308877, "learning_rate": 1.0174840134213942e-07, "loss": 0.1195, "step": 45996 }, { "epoch": 0.799544577517426, "grad_norm": 2.027665393221779, "learning_rate": 1.0173138191829905e-07, "loss": 0.1792, "step": 45997 }, { "epoch": 0.7995619600549289, "grad_norm": 0.9462485285573926, "learning_rate": 1.0171436375678721e-07, "loss": 0.1071, "step": 45998 }, { "epoch": 0.7995793425924317, "grad_norm": 1.8117357353270098, "learning_rate": 1.0169734685765807e-07, "loss": 0.1358, "step": 45999 }, { "epoch": 0.7995967251299345, "grad_norm": 1.2879761724700833, "learning_rate": 1.0168033122096509e-07, "loss": 0.1247, "step": 46000 }, { "epoch": 0.7996141076674373, "grad_norm": 1.2519643172353345, "learning_rate": 1.0166331684676261e-07, "loss": 0.1314, "step": 46001 }, { "epoch": 0.7996314902049401, "grad_norm": 3.4122618008805556, "learning_rate": 1.0164630373510441e-07, "loss": 0.1628, "step": 46002 }, { "epoch": 0.7996488727424429, "grad_norm": 1.1869361733879438, "learning_rate": 1.0162929188604447e-07, "loss": 0.1082, "step": 46003 }, { "epoch": 0.7996662552799457, "grad_norm": 2.826987237089119, "learning_rate": 1.016122812996365e-07, "loss": 0.196, "step": 46004 }, { "epoch": 0.7996836378174486, "grad_norm": 3.7048903227878447, "learning_rate": 1.0159527197593493e-07, "loss": 0.226, "step": 46005 }, { "epoch": 0.7997010203549514, "grad_norm": 2.001020264167494, "learning_rate": 1.0157826391499314e-07, "loss": 0.1697, "step": 46006 }, { "epoch": 0.7997184028924542, "grad_norm": 1.4229857244788766, "learning_rate": 1.0156125711686525e-07, "loss": 0.0906, "step": 46007 }, { "epoch": 0.799735785429957, "grad_norm": 1.154585219065948, "learning_rate": 1.01544251581605e-07, "loss": 0.1296, "step": 46008 }, { "epoch": 0.7997531679674599, "grad_norm": 1.8000524851526605, "learning_rate": 1.0152724730926665e-07, "loss": 0.1209, "step": 46009 }, { "epoch": 0.7997705505049627, "grad_norm": 1.6660034980750118, "learning_rate": 1.0151024429990379e-07, "loss": 0.1458, "step": 46010 }, { "epoch": 0.7997879330424655, "grad_norm": 1.152201360822137, "learning_rate": 1.0149324255357044e-07, "loss": 0.1065, "step": 46011 }, { "epoch": 0.7998053155799684, "grad_norm": 0.933114110540367, "learning_rate": 1.0147624207032046e-07, "loss": 0.1754, "step": 46012 }, { "epoch": 0.7998226981174712, "grad_norm": 0.9107297719458686, "learning_rate": 1.0145924285020773e-07, "loss": 0.1493, "step": 46013 }, { "epoch": 0.799840080654974, "grad_norm": 1.3858777668902045, "learning_rate": 1.0144224489328612e-07, "loss": 0.1478, "step": 46014 }, { "epoch": 0.7998574631924769, "grad_norm": 1.072262673845236, "learning_rate": 1.0142524819960952e-07, "loss": 0.1549, "step": 46015 }, { "epoch": 0.7998748457299797, "grad_norm": 1.411036985684255, "learning_rate": 1.0140825276923165e-07, "loss": 0.133, "step": 46016 }, { "epoch": 0.7998922282674825, "grad_norm": 0.9789954349283835, "learning_rate": 1.0139125860220666e-07, "loss": 0.1246, "step": 46017 }, { "epoch": 0.7999096108049853, "grad_norm": 0.9537292215469426, "learning_rate": 1.0137426569858837e-07, "loss": 0.0937, "step": 46018 }, { "epoch": 0.7999269933424882, "grad_norm": 1.8456769546946028, "learning_rate": 1.013572740584303e-07, "loss": 0.1814, "step": 46019 }, { "epoch": 0.799944375879991, "grad_norm": 1.2497149900616658, "learning_rate": 1.013402836817867e-07, "loss": 0.1602, "step": 46020 }, { "epoch": 0.7999617584174938, "grad_norm": 1.827355131589904, "learning_rate": 1.0132329456871124e-07, "loss": 0.2033, "step": 46021 }, { "epoch": 0.7999791409549966, "grad_norm": 1.4859682280927222, "learning_rate": 1.0130630671925783e-07, "loss": 0.1288, "step": 46022 }, { "epoch": 0.7999965234924994, "grad_norm": 1.7530473423248947, "learning_rate": 1.0128932013348029e-07, "loss": 0.168, "step": 46023 }, { "epoch": 0.8000139060300022, "grad_norm": 1.334342637765125, "learning_rate": 1.0127233481143243e-07, "loss": 0.1506, "step": 46024 }, { "epoch": 0.800031288567505, "grad_norm": 1.8564040501260557, "learning_rate": 1.012553507531681e-07, "loss": 0.1821, "step": 46025 }, { "epoch": 0.8000486711050079, "grad_norm": 2.0702822183409237, "learning_rate": 1.0123836795874114e-07, "loss": 0.1768, "step": 46026 }, { "epoch": 0.8000660536425107, "grad_norm": 1.442457531175269, "learning_rate": 1.0122138642820527e-07, "loss": 0.2188, "step": 46027 }, { "epoch": 0.8000834361800135, "grad_norm": 1.1160714091318666, "learning_rate": 1.0120440616161452e-07, "loss": 0.1601, "step": 46028 }, { "epoch": 0.8001008187175164, "grad_norm": 1.8314591627980883, "learning_rate": 1.0118742715902267e-07, "loss": 0.2255, "step": 46029 }, { "epoch": 0.8001182012550192, "grad_norm": 5.882207687386413, "learning_rate": 1.0117044942048353e-07, "loss": 0.1974, "step": 46030 }, { "epoch": 0.800135583792522, "grad_norm": 2.5717701927851473, "learning_rate": 1.0115347294605065e-07, "loss": 0.2451, "step": 46031 }, { "epoch": 0.8001529663300249, "grad_norm": 1.4047296710350645, "learning_rate": 1.0113649773577815e-07, "loss": 0.1175, "step": 46032 }, { "epoch": 0.8001703488675277, "grad_norm": 1.1321933677332086, "learning_rate": 1.0111952378971977e-07, "loss": 0.212, "step": 46033 }, { "epoch": 0.8001877314050305, "grad_norm": 1.407565551227737, "learning_rate": 1.011025511079292e-07, "loss": 0.1576, "step": 46034 }, { "epoch": 0.8002051139425334, "grad_norm": 1.8408989357116488, "learning_rate": 1.0108557969046022e-07, "loss": 0.2327, "step": 46035 }, { "epoch": 0.8002224964800362, "grad_norm": 0.7986123425760925, "learning_rate": 1.0106860953736695e-07, "loss": 0.142, "step": 46036 }, { "epoch": 0.800239879017539, "grad_norm": 1.1343314467002916, "learning_rate": 1.0105164064870286e-07, "loss": 0.2453, "step": 46037 }, { "epoch": 0.8002572615550418, "grad_norm": 1.1276028298739735, "learning_rate": 1.0103467302452173e-07, "loss": 0.2693, "step": 46038 }, { "epoch": 0.8002746440925447, "grad_norm": 2.50840804308958, "learning_rate": 1.010177066648773e-07, "loss": 0.2175, "step": 46039 }, { "epoch": 0.8002920266300475, "grad_norm": 1.9457123388002282, "learning_rate": 1.0100074156982363e-07, "loss": 0.201, "step": 46040 }, { "epoch": 0.8003094091675503, "grad_norm": 1.1423315360049007, "learning_rate": 1.009837777394143e-07, "loss": 0.1102, "step": 46041 }, { "epoch": 0.800326791705053, "grad_norm": 0.8545033772527092, "learning_rate": 1.009668151737031e-07, "loss": 0.1986, "step": 46042 }, { "epoch": 0.8003441742425559, "grad_norm": 0.8954767205272487, "learning_rate": 1.009498538727438e-07, "loss": 0.2043, "step": 46043 }, { "epoch": 0.8003615567800587, "grad_norm": 9.006416942558284, "learning_rate": 1.0093289383659015e-07, "loss": 0.3287, "step": 46044 }, { "epoch": 0.8003789393175615, "grad_norm": 1.0345018618461737, "learning_rate": 1.0091593506529595e-07, "loss": 0.1754, "step": 46045 }, { "epoch": 0.8003963218550644, "grad_norm": 1.7453414660751447, "learning_rate": 1.0089897755891475e-07, "loss": 0.1966, "step": 46046 }, { "epoch": 0.8004137043925672, "grad_norm": 1.2526551816690517, "learning_rate": 1.0088202131750056e-07, "loss": 0.3102, "step": 46047 }, { "epoch": 0.80043108693007, "grad_norm": 1.443237072569707, "learning_rate": 1.0086506634110703e-07, "loss": 0.2503, "step": 46048 }, { "epoch": 0.8004484694675729, "grad_norm": 1.3947972428017568, "learning_rate": 1.008481126297881e-07, "loss": 0.2009, "step": 46049 }, { "epoch": 0.8004658520050757, "grad_norm": 2.069591265198497, "learning_rate": 1.0083116018359694e-07, "loss": 0.2309, "step": 46050 }, { "epoch": 0.8004832345425785, "grad_norm": 1.15509960148904, "learning_rate": 1.008142090025878e-07, "loss": 0.2023, "step": 46051 }, { "epoch": 0.8005006170800814, "grad_norm": 2.2224733461985613, "learning_rate": 1.0079725908681425e-07, "loss": 0.2121, "step": 46052 }, { "epoch": 0.8005179996175842, "grad_norm": 2.117051048580951, "learning_rate": 1.0078031043633001e-07, "loss": 0.1483, "step": 46053 }, { "epoch": 0.800535382155087, "grad_norm": 1.3812042235257957, "learning_rate": 1.0076336305118871e-07, "loss": 0.1721, "step": 46054 }, { "epoch": 0.8005527646925898, "grad_norm": 2.1962655324095177, "learning_rate": 1.0074641693144437e-07, "loss": 0.2004, "step": 46055 }, { "epoch": 0.8005701472300927, "grad_norm": 1.2696740946127443, "learning_rate": 1.0072947207715037e-07, "loss": 0.1483, "step": 46056 }, { "epoch": 0.8005875297675955, "grad_norm": 1.8250809827232959, "learning_rate": 1.0071252848836054e-07, "loss": 0.1781, "step": 46057 }, { "epoch": 0.8006049123050983, "grad_norm": 1.8525884186357973, "learning_rate": 1.0069558616512841e-07, "loss": 0.3766, "step": 46058 }, { "epoch": 0.8006222948426012, "grad_norm": 1.484641578507826, "learning_rate": 1.0067864510750801e-07, "loss": 0.2426, "step": 46059 }, { "epoch": 0.800639677380104, "grad_norm": 1.546875660928651, "learning_rate": 1.0066170531555285e-07, "loss": 0.2748, "step": 46060 }, { "epoch": 0.8006570599176067, "grad_norm": 1.6436864210187325, "learning_rate": 1.0064476678931666e-07, "loss": 0.1929, "step": 46061 }, { "epoch": 0.8006744424551095, "grad_norm": 0.9467602585941022, "learning_rate": 1.0062782952885313e-07, "loss": 0.2033, "step": 46062 }, { "epoch": 0.8006918249926124, "grad_norm": 1.2752234435380465, "learning_rate": 1.0061089353421587e-07, "loss": 0.2313, "step": 46063 }, { "epoch": 0.8007092075301152, "grad_norm": 3.383286792238772, "learning_rate": 1.0059395880545863e-07, "loss": 0.1769, "step": 46064 }, { "epoch": 0.800726590067618, "grad_norm": 1.2893498486710462, "learning_rate": 1.0057702534263512e-07, "loss": 0.2728, "step": 46065 }, { "epoch": 0.8007439726051209, "grad_norm": 2.0197249846634664, "learning_rate": 1.0056009314579878e-07, "loss": 0.2388, "step": 46066 }, { "epoch": 0.8007613551426237, "grad_norm": 1.1693495719072848, "learning_rate": 1.0054316221500359e-07, "loss": 0.2059, "step": 46067 }, { "epoch": 0.8007787376801265, "grad_norm": 2.062481148183042, "learning_rate": 1.0052623255030324e-07, "loss": 0.2445, "step": 46068 }, { "epoch": 0.8007961202176294, "grad_norm": 1.3261930740615053, "learning_rate": 1.0050930415175091e-07, "loss": 0.2427, "step": 46069 }, { "epoch": 0.8008135027551322, "grad_norm": 2.3730112777031285, "learning_rate": 1.0049237701940078e-07, "loss": 0.1977, "step": 46070 }, { "epoch": 0.800830885292635, "grad_norm": 1.4116057153201436, "learning_rate": 1.0047545115330624e-07, "loss": 0.247, "step": 46071 }, { "epoch": 0.8008482678301378, "grad_norm": 1.2312331686438862, "learning_rate": 1.00458526553521e-07, "loss": 0.4427, "step": 46072 }, { "epoch": 0.8008656503676407, "grad_norm": 2.521857218123303, "learning_rate": 1.004416032200987e-07, "loss": 0.3315, "step": 46073 }, { "epoch": 0.8008830329051435, "grad_norm": 1.3900855996851806, "learning_rate": 1.00424681153093e-07, "loss": 0.2156, "step": 46074 }, { "epoch": 0.8009004154426463, "grad_norm": 5.981984434407479, "learning_rate": 1.0040776035255749e-07, "loss": 0.2668, "step": 46075 }, { "epoch": 0.8009177979801492, "grad_norm": 1.0375272581525596, "learning_rate": 1.0039084081854582e-07, "loss": 0.1654, "step": 46076 }, { "epoch": 0.800935180517652, "grad_norm": 1.148684504543588, "learning_rate": 1.003739225511115e-07, "loss": 0.2646, "step": 46077 }, { "epoch": 0.8009525630551548, "grad_norm": 1.4648693649324485, "learning_rate": 1.0035700555030841e-07, "loss": 0.1647, "step": 46078 }, { "epoch": 0.8009699455926577, "grad_norm": 3.0379548571760018, "learning_rate": 1.0034008981619002e-07, "loss": 0.3838, "step": 46079 }, { "epoch": 0.8009873281301605, "grad_norm": 1.07334299490962, "learning_rate": 1.0032317534880996e-07, "loss": 0.2071, "step": 46080 }, { "epoch": 0.8010047106676632, "grad_norm": 1.4648489842127879, "learning_rate": 1.0030626214822185e-07, "loss": 0.1825, "step": 46081 }, { "epoch": 0.801022093205166, "grad_norm": 1.1108005141523585, "learning_rate": 1.0028935021447927e-07, "loss": 0.2068, "step": 46082 }, { "epoch": 0.8010394757426689, "grad_norm": 2.5732765341804105, "learning_rate": 1.0027243954763582e-07, "loss": 0.1716, "step": 46083 }, { "epoch": 0.8010568582801717, "grad_norm": 1.8088767039088078, "learning_rate": 1.0025553014774512e-07, "loss": 0.3037, "step": 46084 }, { "epoch": 0.8010742408176745, "grad_norm": 1.0868284111142954, "learning_rate": 1.0023862201486066e-07, "loss": 0.1712, "step": 46085 }, { "epoch": 0.8010916233551774, "grad_norm": 1.3346933544874868, "learning_rate": 1.0022171514903626e-07, "loss": 0.1311, "step": 46086 }, { "epoch": 0.8011090058926802, "grad_norm": 1.249172457826523, "learning_rate": 1.0020480955032551e-07, "loss": 0.1624, "step": 46087 }, { "epoch": 0.801126388430183, "grad_norm": 1.3725530623817963, "learning_rate": 1.0018790521878174e-07, "loss": 0.1535, "step": 46088 }, { "epoch": 0.8011437709676859, "grad_norm": 1.5726694358805564, "learning_rate": 1.0017100215445845e-07, "loss": 0.236, "step": 46089 }, { "epoch": 0.8011611535051887, "grad_norm": 1.575511614572299, "learning_rate": 1.0015410035740962e-07, "loss": 0.2158, "step": 46090 }, { "epoch": 0.8011785360426915, "grad_norm": 1.6929710835755816, "learning_rate": 1.0013719982768859e-07, "loss": 0.1876, "step": 46091 }, { "epoch": 0.8011959185801943, "grad_norm": 1.2863432490367241, "learning_rate": 1.0012030056534898e-07, "loss": 0.1704, "step": 46092 }, { "epoch": 0.8012133011176972, "grad_norm": 1.0640417712414456, "learning_rate": 1.001034025704443e-07, "loss": 0.1853, "step": 46093 }, { "epoch": 0.8012306836552, "grad_norm": 1.1297419550861338, "learning_rate": 1.0008650584302814e-07, "loss": 0.3371, "step": 46094 }, { "epoch": 0.8012480661927028, "grad_norm": 3.9261507474954254, "learning_rate": 1.0006961038315409e-07, "loss": 0.324, "step": 46095 }, { "epoch": 0.8012654487302057, "grad_norm": 1.075626317336763, "learning_rate": 1.0005271619087562e-07, "loss": 0.1785, "step": 46096 }, { "epoch": 0.8012828312677085, "grad_norm": 2.610396516689352, "learning_rate": 1.0003582326624615e-07, "loss": 0.1894, "step": 46097 }, { "epoch": 0.8013002138052113, "grad_norm": 1.0202555276118959, "learning_rate": 1.0001893160931963e-07, "loss": 0.1951, "step": 46098 }, { "epoch": 0.8013175963427142, "grad_norm": 1.386395195120798, "learning_rate": 1.0000204122014926e-07, "loss": 0.3654, "step": 46099 }, { "epoch": 0.801334978880217, "grad_norm": 0.6972995366754892, "learning_rate": 9.998515209878871e-08, "loss": 0.1938, "step": 46100 }, { "epoch": 0.8013523614177197, "grad_norm": 1.7970706900092253, "learning_rate": 9.996826424529148e-08, "loss": 0.2437, "step": 46101 }, { "epoch": 0.8013697439552225, "grad_norm": 1.5987792702421109, "learning_rate": 9.995137765971112e-08, "loss": 0.2197, "step": 46102 }, { "epoch": 0.8013871264927254, "grad_norm": 3.2672175288167518, "learning_rate": 9.993449234210111e-08, "loss": 0.1881, "step": 46103 }, { "epoch": 0.8014045090302282, "grad_norm": 0.9616469697710981, "learning_rate": 9.991760829251494e-08, "loss": 0.1968, "step": 46104 }, { "epoch": 0.801421891567731, "grad_norm": 1.311776513642374, "learning_rate": 9.99007255110061e-08, "loss": 0.4199, "step": 46105 }, { "epoch": 0.8014392741052339, "grad_norm": 1.886505585998841, "learning_rate": 9.988384399762839e-08, "loss": 0.3673, "step": 46106 }, { "epoch": 0.8014566566427367, "grad_norm": 2.0009887757746996, "learning_rate": 9.986696375243497e-08, "loss": 0.2115, "step": 46107 }, { "epoch": 0.8014740391802395, "grad_norm": 1.3920584980393564, "learning_rate": 9.985008477547935e-08, "loss": 0.3298, "step": 46108 }, { "epoch": 0.8014914217177423, "grad_norm": 1.4823694564073415, "learning_rate": 9.983320706681531e-08, "loss": 0.1514, "step": 46109 }, { "epoch": 0.8015088042552452, "grad_norm": 1.3649812562178796, "learning_rate": 9.981633062649613e-08, "loss": 0.1889, "step": 46110 }, { "epoch": 0.801526186792748, "grad_norm": 2.024566757320553, "learning_rate": 9.979945545457536e-08, "loss": 0.138, "step": 46111 }, { "epoch": 0.8015435693302508, "grad_norm": 1.5641999259537949, "learning_rate": 9.978258155110653e-08, "loss": 0.2453, "step": 46112 }, { "epoch": 0.8015609518677537, "grad_norm": 1.4913368537016565, "learning_rate": 9.9765708916143e-08, "loss": 0.1457, "step": 46113 }, { "epoch": 0.8015783344052565, "grad_norm": 1.7895795035574853, "learning_rate": 9.974883754973834e-08, "loss": 0.1611, "step": 46114 }, { "epoch": 0.8015957169427593, "grad_norm": 3.2104110826939456, "learning_rate": 9.9731967451946e-08, "loss": 0.2347, "step": 46115 }, { "epoch": 0.8016130994802622, "grad_norm": 1.7666792586169322, "learning_rate": 9.971509862281934e-08, "loss": 0.1565, "step": 46116 }, { "epoch": 0.801630482017765, "grad_norm": 1.3989707816460515, "learning_rate": 9.96982310624121e-08, "loss": 0.2121, "step": 46117 }, { "epoch": 0.8016478645552678, "grad_norm": 2.133489029296897, "learning_rate": 9.968136477077754e-08, "loss": 0.2375, "step": 46118 }, { "epoch": 0.8016652470927706, "grad_norm": 1.3333334272325579, "learning_rate": 9.966449974796932e-08, "loss": 0.2219, "step": 46119 }, { "epoch": 0.8016826296302735, "grad_norm": 2.0879221264912213, "learning_rate": 9.964763599404047e-08, "loss": 0.276, "step": 46120 }, { "epoch": 0.8017000121677762, "grad_norm": 1.4922494368772077, "learning_rate": 9.96307735090448e-08, "loss": 0.1752, "step": 46121 }, { "epoch": 0.801717394705279, "grad_norm": 2.0779137353318116, "learning_rate": 9.961391229303573e-08, "loss": 0.1641, "step": 46122 }, { "epoch": 0.8017347772427819, "grad_norm": 2.081689878392958, "learning_rate": 9.959705234606658e-08, "loss": 0.2102, "step": 46123 }, { "epoch": 0.8017521597802847, "grad_norm": 1.44703259917517, "learning_rate": 9.958019366819076e-08, "loss": 0.192, "step": 46124 }, { "epoch": 0.8017695423177875, "grad_norm": 1.8160871266288452, "learning_rate": 9.956333625946201e-08, "loss": 0.2283, "step": 46125 }, { "epoch": 0.8017869248552904, "grad_norm": 1.338410693550644, "learning_rate": 9.954648011993344e-08, "loss": 0.2004, "step": 46126 }, { "epoch": 0.8018043073927932, "grad_norm": 1.1259467871840125, "learning_rate": 9.952962524965858e-08, "loss": 0.3132, "step": 46127 }, { "epoch": 0.801821689930296, "grad_norm": 2.5419033196412135, "learning_rate": 9.951277164869066e-08, "loss": 0.2816, "step": 46128 }, { "epoch": 0.8018390724677988, "grad_norm": 1.27006360482625, "learning_rate": 9.949591931708345e-08, "loss": 0.1762, "step": 46129 }, { "epoch": 0.8018564550053017, "grad_norm": 1.1588862559766724, "learning_rate": 9.947906825489022e-08, "loss": 0.1952, "step": 46130 }, { "epoch": 0.8018738375428045, "grad_norm": 2.920532143677208, "learning_rate": 9.946221846216429e-08, "loss": 0.2156, "step": 46131 }, { "epoch": 0.8018912200803073, "grad_norm": 1.1545764748015521, "learning_rate": 9.944536993895919e-08, "loss": 0.1342, "step": 46132 }, { "epoch": 0.8019086026178102, "grad_norm": 1.2010206951255697, "learning_rate": 9.94285226853283e-08, "loss": 0.1508, "step": 46133 }, { "epoch": 0.801925985155313, "grad_norm": 1.0286484693157043, "learning_rate": 9.941167670132494e-08, "loss": 0.1311, "step": 46134 }, { "epoch": 0.8019433676928158, "grad_norm": 1.945049563097978, "learning_rate": 9.939483198700244e-08, "loss": 0.2988, "step": 46135 }, { "epoch": 0.8019607502303187, "grad_norm": 1.156563393066036, "learning_rate": 9.937798854241441e-08, "loss": 0.1427, "step": 46136 }, { "epoch": 0.8019781327678215, "grad_norm": 1.2933880004376819, "learning_rate": 9.936114636761411e-08, "loss": 0.1638, "step": 46137 }, { "epoch": 0.8019955153053243, "grad_norm": 1.4290828464813745, "learning_rate": 9.93443054626551e-08, "loss": 0.1992, "step": 46138 }, { "epoch": 0.8020128978428271, "grad_norm": 1.7898347902438771, "learning_rate": 9.932746582759033e-08, "loss": 0.2316, "step": 46139 }, { "epoch": 0.80203028038033, "grad_norm": 1.8824770530985975, "learning_rate": 9.931062746247354e-08, "loss": 0.2842, "step": 46140 }, { "epoch": 0.8020476629178327, "grad_norm": 1.733200919452957, "learning_rate": 9.929379036735803e-08, "loss": 0.2331, "step": 46141 }, { "epoch": 0.8020650454553355, "grad_norm": 1.653356593009204, "learning_rate": 9.927695454229712e-08, "loss": 0.238, "step": 46142 }, { "epoch": 0.8020824279928384, "grad_norm": 1.3299190463523185, "learning_rate": 9.926011998734407e-08, "loss": 0.1539, "step": 46143 }, { "epoch": 0.8020998105303412, "grad_norm": 1.256877367387248, "learning_rate": 9.924328670255261e-08, "loss": 0.2479, "step": 46144 }, { "epoch": 0.802117193067844, "grad_norm": 0.9824398750872477, "learning_rate": 9.92264546879757e-08, "loss": 0.1229, "step": 46145 }, { "epoch": 0.8021345756053468, "grad_norm": 1.5989668194417983, "learning_rate": 9.920962394366683e-08, "loss": 0.1704, "step": 46146 }, { "epoch": 0.8021519581428497, "grad_norm": 1.6520766996542944, "learning_rate": 9.919279446967927e-08, "loss": 0.3722, "step": 46147 }, { "epoch": 0.8021693406803525, "grad_norm": 1.8240109685344514, "learning_rate": 9.91759662660665e-08, "loss": 0.237, "step": 46148 }, { "epoch": 0.8021867232178553, "grad_norm": 1.3056079803713256, "learning_rate": 9.915913933288179e-08, "loss": 0.3512, "step": 46149 }, { "epoch": 0.8022041057553582, "grad_norm": 1.2180371317195404, "learning_rate": 9.91423136701785e-08, "loss": 0.2425, "step": 46150 }, { "epoch": 0.802221488292861, "grad_norm": 2.970590087318413, "learning_rate": 9.912548927800996e-08, "loss": 0.2295, "step": 46151 }, { "epoch": 0.8022388708303638, "grad_norm": 1.344395525115194, "learning_rate": 9.910866615642943e-08, "loss": 0.2002, "step": 46152 }, { "epoch": 0.8022562533678667, "grad_norm": 3.101435678031766, "learning_rate": 9.909184430549028e-08, "loss": 0.3155, "step": 46153 }, { "epoch": 0.8022736359053695, "grad_norm": 1.5666796397667577, "learning_rate": 9.907502372524584e-08, "loss": 0.1449, "step": 46154 }, { "epoch": 0.8022910184428723, "grad_norm": 1.7119126364899717, "learning_rate": 9.905820441574925e-08, "loss": 0.1994, "step": 46155 }, { "epoch": 0.8023084009803751, "grad_norm": 1.8664327631812108, "learning_rate": 9.904138637705429e-08, "loss": 0.2158, "step": 46156 }, { "epoch": 0.802325783517878, "grad_norm": 1.2722217952696304, "learning_rate": 9.902456960921374e-08, "loss": 0.1715, "step": 46157 }, { "epoch": 0.8023431660553808, "grad_norm": 1.469655149403561, "learning_rate": 9.900775411228102e-08, "loss": 0.2744, "step": 46158 }, { "epoch": 0.8023605485928836, "grad_norm": 1.8617465228461283, "learning_rate": 9.899093988630969e-08, "loss": 0.189, "step": 46159 }, { "epoch": 0.8023779311303865, "grad_norm": 1.658771545681689, "learning_rate": 9.897412693135282e-08, "loss": 0.2156, "step": 46160 }, { "epoch": 0.8023953136678892, "grad_norm": 1.0957146242710627, "learning_rate": 9.895731524746375e-08, "loss": 0.1676, "step": 46161 }, { "epoch": 0.802412696205392, "grad_norm": 2.0598030521934723, "learning_rate": 9.894050483469579e-08, "loss": 0.3366, "step": 46162 }, { "epoch": 0.8024300787428948, "grad_norm": 1.9798605856630171, "learning_rate": 9.892369569310216e-08, "loss": 0.2662, "step": 46163 }, { "epoch": 0.8024474612803977, "grad_norm": 1.2822055105913024, "learning_rate": 9.89068878227362e-08, "loss": 0.1995, "step": 46164 }, { "epoch": 0.8024648438179005, "grad_norm": 1.3253385018290031, "learning_rate": 9.889008122365117e-08, "loss": 0.3305, "step": 46165 }, { "epoch": 0.8024822263554033, "grad_norm": 0.7579427264616668, "learning_rate": 9.887327589590022e-08, "loss": 0.1777, "step": 46166 }, { "epoch": 0.8024996088929062, "grad_norm": 0.9261758249365024, "learning_rate": 9.885647183953682e-08, "loss": 0.1561, "step": 46167 }, { "epoch": 0.802516991430409, "grad_norm": 1.4089982669183703, "learning_rate": 9.883966905461411e-08, "loss": 0.2383, "step": 46168 }, { "epoch": 0.8025343739679118, "grad_norm": 1.1340150673557277, "learning_rate": 9.882286754118557e-08, "loss": 0.2123, "step": 46169 }, { "epoch": 0.8025517565054147, "grad_norm": 1.381853150391652, "learning_rate": 9.88060672993039e-08, "loss": 0.1785, "step": 46170 }, { "epoch": 0.8025691390429175, "grad_norm": 3.2602400250911825, "learning_rate": 9.87892683290229e-08, "loss": 0.1667, "step": 46171 }, { "epoch": 0.8025865215804203, "grad_norm": 3.2257089933631558, "learning_rate": 9.877247063039562e-08, "loss": 0.2597, "step": 46172 }, { "epoch": 0.8026039041179232, "grad_norm": 1.8671753644173223, "learning_rate": 9.875567420347531e-08, "loss": 0.1882, "step": 46173 }, { "epoch": 0.802621286655426, "grad_norm": 1.0151682957706891, "learning_rate": 9.873887904831502e-08, "loss": 0.2399, "step": 46174 }, { "epoch": 0.8026386691929288, "grad_norm": 1.7169078267139797, "learning_rate": 9.872208516496843e-08, "loss": 0.296, "step": 46175 }, { "epoch": 0.8026560517304316, "grad_norm": 1.9357100855136649, "learning_rate": 9.870529255348836e-08, "loss": 0.1362, "step": 46176 }, { "epoch": 0.8026734342679345, "grad_norm": 1.3743854439676433, "learning_rate": 9.868850121392818e-08, "loss": 0.3236, "step": 46177 }, { "epoch": 0.8026908168054373, "grad_norm": 0.9059204868481439, "learning_rate": 9.867171114634099e-08, "loss": 0.3751, "step": 46178 }, { "epoch": 0.8027081993429401, "grad_norm": 2.6338122725891684, "learning_rate": 9.865492235078027e-08, "loss": 0.2222, "step": 46179 }, { "epoch": 0.802725581880443, "grad_norm": 1.410014705226369, "learning_rate": 9.863813482729905e-08, "loss": 0.2656, "step": 46180 }, { "epoch": 0.8027429644179457, "grad_norm": 1.4983787013824053, "learning_rate": 9.862134857595056e-08, "loss": 0.203, "step": 46181 }, { "epoch": 0.8027603469554485, "grad_norm": 1.4491289709518143, "learning_rate": 9.860456359678804e-08, "loss": 0.2129, "step": 46182 }, { "epoch": 0.8027777294929513, "grad_norm": 1.5248845024805666, "learning_rate": 9.858777988986467e-08, "loss": 0.1537, "step": 46183 }, { "epoch": 0.8027951120304542, "grad_norm": 1.8208837673983553, "learning_rate": 9.857099745523367e-08, "loss": 0.2229, "step": 46184 }, { "epoch": 0.802812494567957, "grad_norm": 1.3002365047916649, "learning_rate": 9.855421629294813e-08, "loss": 0.1408, "step": 46185 }, { "epoch": 0.8028298771054598, "grad_norm": 1.3415126299106253, "learning_rate": 9.853743640306122e-08, "loss": 0.2007, "step": 46186 }, { "epoch": 0.8028472596429627, "grad_norm": 1.3081682603708646, "learning_rate": 9.85206577856264e-08, "loss": 0.2309, "step": 46187 }, { "epoch": 0.8028646421804655, "grad_norm": 1.5038022338209394, "learning_rate": 9.850388044069674e-08, "loss": 0.1448, "step": 46188 }, { "epoch": 0.8028820247179683, "grad_norm": 1.694198739681242, "learning_rate": 9.848710436832514e-08, "loss": 0.2153, "step": 46189 }, { "epoch": 0.8028994072554712, "grad_norm": 0.9997857691495349, "learning_rate": 9.847032956856505e-08, "loss": 0.2388, "step": 46190 }, { "epoch": 0.802916789792974, "grad_norm": 1.7738424167429563, "learning_rate": 9.84535560414696e-08, "loss": 0.2775, "step": 46191 }, { "epoch": 0.8029341723304768, "grad_norm": 1.8133916003472428, "learning_rate": 9.843678378709191e-08, "loss": 0.238, "step": 46192 }, { "epoch": 0.8029515548679796, "grad_norm": 1.5631102416825986, "learning_rate": 9.842001280548518e-08, "loss": 0.2339, "step": 46193 }, { "epoch": 0.8029689374054825, "grad_norm": 1.9159445706535934, "learning_rate": 9.84032430967025e-08, "loss": 0.2043, "step": 46194 }, { "epoch": 0.8029863199429853, "grad_norm": 1.560002262092054, "learning_rate": 9.838647466079707e-08, "loss": 0.2195, "step": 46195 }, { "epoch": 0.8030037024804881, "grad_norm": 1.7992427227873293, "learning_rate": 9.836970749782202e-08, "loss": 0.2422, "step": 46196 }, { "epoch": 0.803021085017991, "grad_norm": 1.0411714273553527, "learning_rate": 9.835294160783042e-08, "loss": 0.5052, "step": 46197 }, { "epoch": 0.8030384675554938, "grad_norm": 1.430935055042027, "learning_rate": 9.83361769908756e-08, "loss": 0.1975, "step": 46198 }, { "epoch": 0.8030558500929966, "grad_norm": 0.7521644018549078, "learning_rate": 9.831941364701057e-08, "loss": 0.152, "step": 46199 }, { "epoch": 0.8030732326304993, "grad_norm": 1.1193524012549758, "learning_rate": 9.830265157628847e-08, "loss": 0.1873, "step": 46200 }, { "epoch": 0.8030906151680022, "grad_norm": 1.5416654256167, "learning_rate": 9.828589077876248e-08, "loss": 0.2315, "step": 46201 }, { "epoch": 0.803107997705505, "grad_norm": 1.9272964671293804, "learning_rate": 9.826913125448567e-08, "loss": 0.238, "step": 46202 }, { "epoch": 0.8031253802430078, "grad_norm": 5.7821290058167865, "learning_rate": 9.825237300351119e-08, "loss": 0.3242, "step": 46203 }, { "epoch": 0.8031427627805107, "grad_norm": 1.8316149882042139, "learning_rate": 9.82356160258921e-08, "loss": 0.2708, "step": 46204 }, { "epoch": 0.8031601453180135, "grad_norm": 0.9942498338342537, "learning_rate": 9.821886032168142e-08, "loss": 0.1744, "step": 46205 }, { "epoch": 0.8031775278555163, "grad_norm": 2.0709309674520537, "learning_rate": 9.820210589093258e-08, "loss": 0.3112, "step": 46206 }, { "epoch": 0.8031949103930192, "grad_norm": 1.1198381047708343, "learning_rate": 9.818535273369854e-08, "loss": 0.1446, "step": 46207 }, { "epoch": 0.803212292930522, "grad_norm": 1.6819304242838915, "learning_rate": 9.81686008500323e-08, "loss": 0.1376, "step": 46208 }, { "epoch": 0.8032296754680248, "grad_norm": 1.3939950890173196, "learning_rate": 9.815185023998684e-08, "loss": 0.3553, "step": 46209 }, { "epoch": 0.8032470580055276, "grad_norm": 1.1568125772690068, "learning_rate": 9.81351009036156e-08, "loss": 0.1463, "step": 46210 }, { "epoch": 0.8032644405430305, "grad_norm": 1.1437914562013831, "learning_rate": 9.811835284097142e-08, "loss": 0.2333, "step": 46211 }, { "epoch": 0.8032818230805333, "grad_norm": 1.3244462057195938, "learning_rate": 9.810160605210748e-08, "loss": 0.2937, "step": 46212 }, { "epoch": 0.8032992056180361, "grad_norm": 1.0910961611157792, "learning_rate": 9.808486053707682e-08, "loss": 0.1653, "step": 46213 }, { "epoch": 0.803316588155539, "grad_norm": 2.145445552416922, "learning_rate": 9.806811629593253e-08, "loss": 0.2255, "step": 46214 }, { "epoch": 0.8033339706930418, "grad_norm": 2.224512853996877, "learning_rate": 9.80513733287277e-08, "loss": 0.2284, "step": 46215 }, { "epoch": 0.8033513532305446, "grad_norm": 1.8655502761856029, "learning_rate": 9.80346316355154e-08, "loss": 0.2126, "step": 46216 }, { "epoch": 0.8033687357680475, "grad_norm": 1.2728674793864299, "learning_rate": 9.801789121634845e-08, "loss": 0.1396, "step": 46217 }, { "epoch": 0.8033861183055503, "grad_norm": 1.1786322530665347, "learning_rate": 9.800115207128035e-08, "loss": 0.1703, "step": 46218 }, { "epoch": 0.8034035008430531, "grad_norm": 1.4035387149327685, "learning_rate": 9.79844142003639e-08, "loss": 0.1189, "step": 46219 }, { "epoch": 0.8034208833805558, "grad_norm": 1.137035002588535, "learning_rate": 9.796767760365215e-08, "loss": 0.2399, "step": 46220 }, { "epoch": 0.8034382659180587, "grad_norm": 1.2136248327283048, "learning_rate": 9.795094228119827e-08, "loss": 0.2385, "step": 46221 }, { "epoch": 0.8034556484555615, "grad_norm": 1.675902344251228, "learning_rate": 9.793420823305516e-08, "loss": 0.1601, "step": 46222 }, { "epoch": 0.8034730309930643, "grad_norm": 1.4151340376405663, "learning_rate": 9.791747545927592e-08, "loss": 0.1617, "step": 46223 }, { "epoch": 0.8034904135305672, "grad_norm": 1.1270490912374254, "learning_rate": 9.790074395991354e-08, "loss": 0.1696, "step": 46224 }, { "epoch": 0.80350779606807, "grad_norm": 1.0140930672479023, "learning_rate": 9.788401373502103e-08, "loss": 0.2324, "step": 46225 }, { "epoch": 0.8035251786055728, "grad_norm": 0.7933684235688814, "learning_rate": 9.786728478465173e-08, "loss": 0.1067, "step": 46226 }, { "epoch": 0.8035425611430757, "grad_norm": 1.5157257898936833, "learning_rate": 9.785055710885825e-08, "loss": 0.1771, "step": 46227 }, { "epoch": 0.8035599436805785, "grad_norm": 3.048323517119159, "learning_rate": 9.783383070769364e-08, "loss": 0.3637, "step": 46228 }, { "epoch": 0.8035773262180813, "grad_norm": 1.4637193878873245, "learning_rate": 9.78171055812112e-08, "loss": 0.22, "step": 46229 }, { "epoch": 0.8035947087555841, "grad_norm": 1.5936954498821043, "learning_rate": 9.780038172946382e-08, "loss": 0.3247, "step": 46230 }, { "epoch": 0.803612091293087, "grad_norm": 0.8477066230590606, "learning_rate": 9.778365915250442e-08, "loss": 0.2661, "step": 46231 }, { "epoch": 0.8036294738305898, "grad_norm": 1.6652443617990667, "learning_rate": 9.776693785038609e-08, "loss": 0.1587, "step": 46232 }, { "epoch": 0.8036468563680926, "grad_norm": 1.722909776679626, "learning_rate": 9.775021782316179e-08, "loss": 0.2969, "step": 46233 }, { "epoch": 0.8036642389055955, "grad_norm": 1.3113726487855428, "learning_rate": 9.773349907088446e-08, "loss": 0.1669, "step": 46234 }, { "epoch": 0.8036816214430983, "grad_norm": 2.0637701471409073, "learning_rate": 9.771678159360724e-08, "loss": 0.3022, "step": 46235 }, { "epoch": 0.8036990039806011, "grad_norm": 1.5474853527264225, "learning_rate": 9.770006539138287e-08, "loss": 0.2574, "step": 46236 }, { "epoch": 0.803716386518104, "grad_norm": 2.0796492595669185, "learning_rate": 9.768335046426457e-08, "loss": 0.2505, "step": 46237 }, { "epoch": 0.8037337690556068, "grad_norm": 0.9363803002214612, "learning_rate": 9.76666368123053e-08, "loss": 0.1986, "step": 46238 }, { "epoch": 0.8037511515931096, "grad_norm": 4.133593669231036, "learning_rate": 9.764992443555797e-08, "loss": 0.2613, "step": 46239 }, { "epoch": 0.8037685341306123, "grad_norm": 1.8658696169393567, "learning_rate": 9.763321333407554e-08, "loss": 0.246, "step": 46240 }, { "epoch": 0.8037859166681152, "grad_norm": 2.8478077728584323, "learning_rate": 9.7616503507911e-08, "loss": 0.4342, "step": 46241 }, { "epoch": 0.803803299205618, "grad_norm": 1.544817267362033, "learning_rate": 9.759979495711729e-08, "loss": 0.1643, "step": 46242 }, { "epoch": 0.8038206817431208, "grad_norm": 1.8196789946908936, "learning_rate": 9.758308768174739e-08, "loss": 0.2116, "step": 46243 }, { "epoch": 0.8038380642806237, "grad_norm": 1.9949219347478353, "learning_rate": 9.75663816818541e-08, "loss": 0.1592, "step": 46244 }, { "epoch": 0.8038554468181265, "grad_norm": 1.2845557303370494, "learning_rate": 9.754967695749083e-08, "loss": 0.2256, "step": 46245 }, { "epoch": 0.8038728293556293, "grad_norm": 0.7675466287215543, "learning_rate": 9.753297350871004e-08, "loss": 0.214, "step": 46246 }, { "epoch": 0.8038902118931321, "grad_norm": 3.176324399549978, "learning_rate": 9.751627133556473e-08, "loss": 0.2393, "step": 46247 }, { "epoch": 0.803907594430635, "grad_norm": 1.2598392933596352, "learning_rate": 9.749957043810808e-08, "loss": 0.3245, "step": 46248 }, { "epoch": 0.8039249769681378, "grad_norm": 1.4505535552971707, "learning_rate": 9.74828708163929e-08, "loss": 0.247, "step": 46249 }, { "epoch": 0.8039423595056406, "grad_norm": 0.8722350425130669, "learning_rate": 9.746617247047212e-08, "loss": 0.2675, "step": 46250 }, { "epoch": 0.8039597420431435, "grad_norm": 2.065954116300493, "learning_rate": 9.744947540039866e-08, "loss": 0.2255, "step": 46251 }, { "epoch": 0.8039771245806463, "grad_norm": 1.2808019540750037, "learning_rate": 9.743277960622543e-08, "loss": 0.2079, "step": 46252 }, { "epoch": 0.8039945071181491, "grad_norm": 1.1818265981562952, "learning_rate": 9.741608508800537e-08, "loss": 0.2767, "step": 46253 }, { "epoch": 0.804011889655652, "grad_norm": 2.466318714639734, "learning_rate": 9.739939184579138e-08, "loss": 0.3626, "step": 46254 }, { "epoch": 0.8040292721931548, "grad_norm": 0.9922040855225641, "learning_rate": 9.738269987963626e-08, "loss": 0.2063, "step": 46255 }, { "epoch": 0.8040466547306576, "grad_norm": 1.4904176712260078, "learning_rate": 9.73660091895932e-08, "loss": 0.1736, "step": 46256 }, { "epoch": 0.8040640372681604, "grad_norm": 1.682366300064708, "learning_rate": 9.734931977571492e-08, "loss": 0.3243, "step": 46257 }, { "epoch": 0.8040814198056633, "grad_norm": 1.1264408712285405, "learning_rate": 9.733263163805443e-08, "loss": 0.1857, "step": 46258 }, { "epoch": 0.8040988023431661, "grad_norm": 2.9887832838514288, "learning_rate": 9.731594477666427e-08, "loss": 0.3609, "step": 46259 }, { "epoch": 0.8041161848806688, "grad_norm": 1.2561637935071992, "learning_rate": 9.729925919159776e-08, "loss": 0.1651, "step": 46260 }, { "epoch": 0.8041335674181717, "grad_norm": 1.464302521215318, "learning_rate": 9.728257488290759e-08, "loss": 0.2587, "step": 46261 }, { "epoch": 0.8041509499556745, "grad_norm": 1.0710362085076472, "learning_rate": 9.726589185064665e-08, "loss": 0.2049, "step": 46262 }, { "epoch": 0.8041683324931773, "grad_norm": 1.7346324986396529, "learning_rate": 9.72492100948677e-08, "loss": 0.145, "step": 46263 }, { "epoch": 0.8041857150306801, "grad_norm": 1.7441294102640865, "learning_rate": 9.723252961562406e-08, "loss": 0.2486, "step": 46264 }, { "epoch": 0.804203097568183, "grad_norm": 2.021077724807998, "learning_rate": 9.72158504129681e-08, "loss": 0.1526, "step": 46265 }, { "epoch": 0.8042204801056858, "grad_norm": 1.3218269689460092, "learning_rate": 9.719917248695286e-08, "loss": 0.2751, "step": 46266 }, { "epoch": 0.8042378626431886, "grad_norm": 1.3832079611053139, "learning_rate": 9.71824958376311e-08, "loss": 0.4472, "step": 46267 }, { "epoch": 0.8042552451806915, "grad_norm": 1.721262905236664, "learning_rate": 9.716582046505594e-08, "loss": 0.1142, "step": 46268 }, { "epoch": 0.8042726277181943, "grad_norm": 1.2969677419772976, "learning_rate": 9.714914636928007e-08, "loss": 0.2197, "step": 46269 }, { "epoch": 0.8042900102556971, "grad_norm": 2.581507400934799, "learning_rate": 9.713247355035637e-08, "loss": 0.2352, "step": 46270 }, { "epoch": 0.8043073927932, "grad_norm": 1.0108274094230814, "learning_rate": 9.711580200833763e-08, "loss": 0.1679, "step": 46271 }, { "epoch": 0.8043247753307028, "grad_norm": 1.4957541575763391, "learning_rate": 9.709913174327672e-08, "loss": 0.1976, "step": 46272 }, { "epoch": 0.8043421578682056, "grad_norm": 1.3316099863775779, "learning_rate": 9.708246275522653e-08, "loss": 0.1982, "step": 46273 }, { "epoch": 0.8043595404057085, "grad_norm": 2.3606427097681615, "learning_rate": 9.706579504423984e-08, "loss": 0.2125, "step": 46274 }, { "epoch": 0.8043769229432113, "grad_norm": 2.3400988929351345, "learning_rate": 9.704912861036935e-08, "loss": 0.2106, "step": 46275 }, { "epoch": 0.8043943054807141, "grad_norm": 1.2107734846684561, "learning_rate": 9.703246345366828e-08, "loss": 0.1881, "step": 46276 }, { "epoch": 0.8044116880182169, "grad_norm": 1.2549854577173225, "learning_rate": 9.701579957418904e-08, "loss": 0.2611, "step": 46277 }, { "epoch": 0.8044290705557198, "grad_norm": 1.6708299621269835, "learning_rate": 9.699913697198448e-08, "loss": 0.1688, "step": 46278 }, { "epoch": 0.8044464530932226, "grad_norm": 0.9963991060212466, "learning_rate": 9.698247564710766e-08, "loss": 0.1324, "step": 46279 }, { "epoch": 0.8044638356307253, "grad_norm": 1.690218771495049, "learning_rate": 9.696581559961126e-08, "loss": 0.2513, "step": 46280 }, { "epoch": 0.8044812181682282, "grad_norm": 1.8507565892109965, "learning_rate": 9.694915682954812e-08, "loss": 0.1943, "step": 46281 }, { "epoch": 0.804498600705731, "grad_norm": 1.5912790232739993, "learning_rate": 9.693249933697096e-08, "loss": 0.2584, "step": 46282 }, { "epoch": 0.8045159832432338, "grad_norm": 1.9831094225146602, "learning_rate": 9.691584312193269e-08, "loss": 0.2031, "step": 46283 }, { "epoch": 0.8045333657807366, "grad_norm": 2.0016657680265575, "learning_rate": 9.689918818448595e-08, "loss": 0.1562, "step": 46284 }, { "epoch": 0.8045507483182395, "grad_norm": 1.742232793990529, "learning_rate": 9.688253452468365e-08, "loss": 0.1899, "step": 46285 }, { "epoch": 0.8045681308557423, "grad_norm": 2.323990865457122, "learning_rate": 9.686588214257846e-08, "loss": 0.191, "step": 46286 }, { "epoch": 0.8045855133932451, "grad_norm": 1.164343045331485, "learning_rate": 9.684923103822329e-08, "loss": 0.189, "step": 46287 }, { "epoch": 0.804602895930748, "grad_norm": 1.6918325830631196, "learning_rate": 9.683258121167093e-08, "loss": 0.2351, "step": 46288 }, { "epoch": 0.8046202784682508, "grad_norm": 1.1598090178907918, "learning_rate": 9.681593266297416e-08, "loss": 0.1947, "step": 46289 }, { "epoch": 0.8046376610057536, "grad_norm": 2.4824795641878303, "learning_rate": 9.679928539218546e-08, "loss": 0.2929, "step": 46290 }, { "epoch": 0.8046550435432565, "grad_norm": 3.6504411448366922, "learning_rate": 9.678263939935788e-08, "loss": 0.341, "step": 46291 }, { "epoch": 0.8046724260807593, "grad_norm": 1.2420108629458708, "learning_rate": 9.676599468454415e-08, "loss": 0.2735, "step": 46292 }, { "epoch": 0.8046898086182621, "grad_norm": 1.275164504764598, "learning_rate": 9.674935124779704e-08, "loss": 0.2584, "step": 46293 }, { "epoch": 0.804707191155765, "grad_norm": 1.7156786186461042, "learning_rate": 9.673270908916902e-08, "loss": 0.1921, "step": 46294 }, { "epoch": 0.8047245736932678, "grad_norm": 0.9890665099606962, "learning_rate": 9.671606820871336e-08, "loss": 0.1307, "step": 46295 }, { "epoch": 0.8047419562307706, "grad_norm": 1.9363627847989782, "learning_rate": 9.669942860648245e-08, "loss": 0.1373, "step": 46296 }, { "epoch": 0.8047593387682734, "grad_norm": 5.195907928876299, "learning_rate": 9.668279028252902e-08, "loss": 0.2164, "step": 46297 }, { "epoch": 0.8047767213057763, "grad_norm": 1.230966485655616, "learning_rate": 9.666615323690574e-08, "loss": 0.3424, "step": 46298 }, { "epoch": 0.8047941038432791, "grad_norm": 1.2907751050283665, "learning_rate": 9.664951746966565e-08, "loss": 0.2539, "step": 46299 }, { "epoch": 0.8048114863807818, "grad_norm": 0.9982520389266333, "learning_rate": 9.663288298086125e-08, "loss": 0.2219, "step": 46300 }, { "epoch": 0.8048288689182846, "grad_norm": 0.9448198300172301, "learning_rate": 9.661624977054538e-08, "loss": 0.1588, "step": 46301 }, { "epoch": 0.8048462514557875, "grad_norm": 2.191643776544065, "learning_rate": 9.659961783877069e-08, "loss": 0.2156, "step": 46302 }, { "epoch": 0.8048636339932903, "grad_norm": 1.1784417593404066, "learning_rate": 9.658298718558989e-08, "loss": 0.1255, "step": 46303 }, { "epoch": 0.8048810165307931, "grad_norm": 1.6939561466604158, "learning_rate": 9.656635781105565e-08, "loss": 0.2651, "step": 46304 }, { "epoch": 0.804898399068296, "grad_norm": 1.4437362928531967, "learning_rate": 9.65497297152208e-08, "loss": 0.1953, "step": 46305 }, { "epoch": 0.8049157816057988, "grad_norm": 0.942110693322377, "learning_rate": 9.653310289813781e-08, "loss": 0.2817, "step": 46306 }, { "epoch": 0.8049331641433016, "grad_norm": 1.993198435797544, "learning_rate": 9.651647735985974e-08, "loss": 0.133, "step": 46307 }, { "epoch": 0.8049505466808045, "grad_norm": 1.9250696307814346, "learning_rate": 9.649985310043918e-08, "loss": 0.2287, "step": 46308 }, { "epoch": 0.8049679292183073, "grad_norm": 2.4536717791616036, "learning_rate": 9.648323011992842e-08, "loss": 0.2373, "step": 46309 }, { "epoch": 0.8049853117558101, "grad_norm": 1.6223295196156575, "learning_rate": 9.646660841838067e-08, "loss": 0.2275, "step": 46310 }, { "epoch": 0.805002694293313, "grad_norm": 3.1868223433236564, "learning_rate": 9.644998799584836e-08, "loss": 0.3344, "step": 46311 }, { "epoch": 0.8050200768308158, "grad_norm": 0.992290019844651, "learning_rate": 9.643336885238424e-08, "loss": 0.3, "step": 46312 }, { "epoch": 0.8050374593683186, "grad_norm": 2.261948945241878, "learning_rate": 9.641675098804097e-08, "loss": 0.4284, "step": 46313 }, { "epoch": 0.8050548419058214, "grad_norm": 1.4926309114109477, "learning_rate": 9.640013440287114e-08, "loss": 0.1752, "step": 46314 }, { "epoch": 0.8050722244433243, "grad_norm": 1.2566914183274862, "learning_rate": 9.638351909692754e-08, "loss": 0.1938, "step": 46315 }, { "epoch": 0.8050896069808271, "grad_norm": 1.4835303888715692, "learning_rate": 9.636690507026279e-08, "loss": 0.3195, "step": 46316 }, { "epoch": 0.8051069895183299, "grad_norm": 1.5353599070579311, "learning_rate": 9.63502923229294e-08, "loss": 0.2699, "step": 46317 }, { "epoch": 0.8051243720558328, "grad_norm": 1.2613258088788195, "learning_rate": 9.633368085498028e-08, "loss": 0.2809, "step": 46318 }, { "epoch": 0.8051417545933356, "grad_norm": 1.8924906529012855, "learning_rate": 9.631707066646793e-08, "loss": 0.2397, "step": 46319 }, { "epoch": 0.8051591371308383, "grad_norm": 1.2665387813490157, "learning_rate": 9.630046175744505e-08, "loss": 0.1287, "step": 46320 }, { "epoch": 0.8051765196683411, "grad_norm": 1.023074117221928, "learning_rate": 9.62838541279643e-08, "loss": 0.3927, "step": 46321 }, { "epoch": 0.805193902205844, "grad_norm": 1.344589081695326, "learning_rate": 9.626724777807821e-08, "loss": 0.2588, "step": 46322 }, { "epoch": 0.8052112847433468, "grad_norm": 1.54511686994278, "learning_rate": 9.625064270783956e-08, "loss": 0.253, "step": 46323 }, { "epoch": 0.8052286672808496, "grad_norm": 1.9175665752510027, "learning_rate": 9.623403891730086e-08, "loss": 0.2611, "step": 46324 }, { "epoch": 0.8052460498183525, "grad_norm": 1.0584577421427102, "learning_rate": 9.621743640651464e-08, "loss": 0.2313, "step": 46325 }, { "epoch": 0.8052634323558553, "grad_norm": 1.8215225925805154, "learning_rate": 9.620083517553384e-08, "loss": 0.2869, "step": 46326 }, { "epoch": 0.8052808148933581, "grad_norm": 2.9448308781415515, "learning_rate": 9.618423522441099e-08, "loss": 0.2808, "step": 46327 }, { "epoch": 0.805298197430861, "grad_norm": 1.1123741944034737, "learning_rate": 9.616763655319838e-08, "loss": 0.1528, "step": 46328 }, { "epoch": 0.8053155799683638, "grad_norm": 2.182275855088613, "learning_rate": 9.6151039161949e-08, "loss": 0.2444, "step": 46329 }, { "epoch": 0.8053329625058666, "grad_norm": 1.161375831965165, "learning_rate": 9.613444305071528e-08, "loss": 0.1735, "step": 46330 }, { "epoch": 0.8053503450433694, "grad_norm": 1.361818841896027, "learning_rate": 9.611784821954987e-08, "loss": 0.1356, "step": 46331 }, { "epoch": 0.8053677275808723, "grad_norm": 1.6860104927469042, "learning_rate": 9.610125466850538e-08, "loss": 0.3908, "step": 46332 }, { "epoch": 0.8053851101183751, "grad_norm": 0.8526120159168711, "learning_rate": 9.608466239763436e-08, "loss": 0.3078, "step": 46333 }, { "epoch": 0.8054024926558779, "grad_norm": 1.6846770378509994, "learning_rate": 9.606807140698936e-08, "loss": 0.1781, "step": 46334 }, { "epoch": 0.8054198751933808, "grad_norm": 2.6990410833534573, "learning_rate": 9.605148169662309e-08, "loss": 0.2723, "step": 46335 }, { "epoch": 0.8054372577308836, "grad_norm": 1.183746229548349, "learning_rate": 9.603489326658792e-08, "loss": 0.217, "step": 46336 }, { "epoch": 0.8054546402683864, "grad_norm": 1.2493296143119135, "learning_rate": 9.601830611693668e-08, "loss": 0.1583, "step": 46337 }, { "epoch": 0.8054720228058893, "grad_norm": 1.1476797029269563, "learning_rate": 9.600172024772186e-08, "loss": 0.1912, "step": 46338 }, { "epoch": 0.805489405343392, "grad_norm": 1.8995717214881602, "learning_rate": 9.598513565899597e-08, "loss": 0.3214, "step": 46339 }, { "epoch": 0.8055067878808948, "grad_norm": 2.4697997995824426, "learning_rate": 9.596855235081163e-08, "loss": 0.2202, "step": 46340 }, { "epoch": 0.8055241704183976, "grad_norm": 2.208769137769186, "learning_rate": 9.595197032322139e-08, "loss": 0.1344, "step": 46341 }, { "epoch": 0.8055415529559005, "grad_norm": 1.555196958134478, "learning_rate": 9.593538957627783e-08, "loss": 0.1871, "step": 46342 }, { "epoch": 0.8055589354934033, "grad_norm": 0.8325071551754473, "learning_rate": 9.591881011003344e-08, "loss": 0.262, "step": 46343 }, { "epoch": 0.8055763180309061, "grad_norm": 1.835781983468927, "learning_rate": 9.590223192454067e-08, "loss": 0.1511, "step": 46344 }, { "epoch": 0.805593700568409, "grad_norm": 2.2888869134457948, "learning_rate": 9.588565501985229e-08, "loss": 0.1755, "step": 46345 }, { "epoch": 0.8056110831059118, "grad_norm": 3.979521246590554, "learning_rate": 9.586907939602096e-08, "loss": 0.2944, "step": 46346 }, { "epoch": 0.8056284656434146, "grad_norm": 3.61202262621311, "learning_rate": 9.585250505309884e-08, "loss": 0.2042, "step": 46347 }, { "epoch": 0.8056458481809174, "grad_norm": 2.074008091643268, "learning_rate": 9.583593199113849e-08, "loss": 0.2259, "step": 46348 }, { "epoch": 0.8056632307184203, "grad_norm": 3.4337530220371226, "learning_rate": 9.581936021019271e-08, "loss": 0.2438, "step": 46349 }, { "epoch": 0.8056806132559231, "grad_norm": 1.2006573005794998, "learning_rate": 9.580278971031385e-08, "loss": 0.2366, "step": 46350 }, { "epoch": 0.8056979957934259, "grad_norm": 1.8431866359372149, "learning_rate": 9.578622049155455e-08, "loss": 0.208, "step": 46351 }, { "epoch": 0.8057153783309288, "grad_norm": 1.5710996764279055, "learning_rate": 9.576965255396724e-08, "loss": 0.5803, "step": 46352 }, { "epoch": 0.8057327608684316, "grad_norm": 1.3036634768588238, "learning_rate": 9.575308589760439e-08, "loss": 0.2346, "step": 46353 }, { "epoch": 0.8057501434059344, "grad_norm": 1.5809776228510541, "learning_rate": 9.573652052251862e-08, "loss": 0.1475, "step": 46354 }, { "epoch": 0.8057675259434373, "grad_norm": 2.304074143708696, "learning_rate": 9.571995642876235e-08, "loss": 0.269, "step": 46355 }, { "epoch": 0.8057849084809401, "grad_norm": 1.1473758295856324, "learning_rate": 9.570339361638796e-08, "loss": 0.2132, "step": 46356 }, { "epoch": 0.8058022910184429, "grad_norm": 2.7518434813591823, "learning_rate": 9.568683208544825e-08, "loss": 0.2165, "step": 46357 }, { "epoch": 0.8058196735559457, "grad_norm": 2.3395163498742297, "learning_rate": 9.567027183599553e-08, "loss": 0.267, "step": 46358 }, { "epoch": 0.8058370560934485, "grad_norm": 3.2646179944942793, "learning_rate": 9.565371286808233e-08, "loss": 0.2944, "step": 46359 }, { "epoch": 0.8058544386309513, "grad_norm": 2.2596446641367063, "learning_rate": 9.563715518176113e-08, "loss": 0.3459, "step": 46360 }, { "epoch": 0.8058718211684541, "grad_norm": 2.5444543319507815, "learning_rate": 9.562059877708439e-08, "loss": 0.2691, "step": 46361 }, { "epoch": 0.805889203705957, "grad_norm": 1.7608359965488638, "learning_rate": 9.560404365410457e-08, "loss": 0.3157, "step": 46362 }, { "epoch": 0.8059065862434598, "grad_norm": 3.628596793416325, "learning_rate": 9.558748981287423e-08, "loss": 0.3122, "step": 46363 }, { "epoch": 0.8059239687809626, "grad_norm": 0.7887829309005329, "learning_rate": 9.557093725344556e-08, "loss": 0.1044, "step": 46364 }, { "epoch": 0.8059413513184654, "grad_norm": 1.450758215781574, "learning_rate": 9.555438597587156e-08, "loss": 0.3969, "step": 46365 }, { "epoch": 0.8059587338559683, "grad_norm": 1.3040562333308394, "learning_rate": 9.553783598020426e-08, "loss": 0.1489, "step": 46366 }, { "epoch": 0.8059761163934711, "grad_norm": 1.2537404001170567, "learning_rate": 9.552128726649605e-08, "loss": 0.1928, "step": 46367 }, { "epoch": 0.8059934989309739, "grad_norm": 2.745837525842559, "learning_rate": 9.550473983479967e-08, "loss": 0.2264, "step": 46368 }, { "epoch": 0.8060108814684768, "grad_norm": 1.1433441308243852, "learning_rate": 9.548819368516748e-08, "loss": 0.2138, "step": 46369 }, { "epoch": 0.8060282640059796, "grad_norm": 0.9958918642912917, "learning_rate": 9.54716488176519e-08, "loss": 0.3377, "step": 46370 }, { "epoch": 0.8060456465434824, "grad_norm": 2.4307626789895247, "learning_rate": 9.545510523230538e-08, "loss": 0.2751, "step": 46371 }, { "epoch": 0.8060630290809853, "grad_norm": 1.8385533658843132, "learning_rate": 9.543856292918034e-08, "loss": 0.1808, "step": 46372 }, { "epoch": 0.8060804116184881, "grad_norm": 1.5441458537300021, "learning_rate": 9.54220219083292e-08, "loss": 0.2568, "step": 46373 }, { "epoch": 0.8060977941559909, "grad_norm": 1.0275493509275457, "learning_rate": 9.540548216980443e-08, "loss": 0.1691, "step": 46374 }, { "epoch": 0.8061151766934938, "grad_norm": 2.146980652729273, "learning_rate": 9.538894371365824e-08, "loss": 0.182, "step": 46375 }, { "epoch": 0.8061325592309966, "grad_norm": 2.009537023587889, "learning_rate": 9.537240653994343e-08, "loss": 0.2589, "step": 46376 }, { "epoch": 0.8061499417684994, "grad_norm": 2.0678821189195244, "learning_rate": 9.53558706487122e-08, "loss": 0.0997, "step": 46377 }, { "epoch": 0.8061673243060022, "grad_norm": 1.6831417894354617, "learning_rate": 9.53393360400171e-08, "loss": 0.1622, "step": 46378 }, { "epoch": 0.806184706843505, "grad_norm": 1.0199038841662573, "learning_rate": 9.532280271391013e-08, "loss": 0.1053, "step": 46379 }, { "epoch": 0.8062020893810078, "grad_norm": 1.6952112451397492, "learning_rate": 9.530627067044416e-08, "loss": 0.1883, "step": 46380 }, { "epoch": 0.8062194719185106, "grad_norm": 2.001267531640612, "learning_rate": 9.528973990967143e-08, "loss": 0.3364, "step": 46381 }, { "epoch": 0.8062368544560135, "grad_norm": 3.9714635841384474, "learning_rate": 9.527321043164427e-08, "loss": 0.3584, "step": 46382 }, { "epoch": 0.8062542369935163, "grad_norm": 1.6456310379370644, "learning_rate": 9.525668223641497e-08, "loss": 0.3148, "step": 46383 }, { "epoch": 0.8062716195310191, "grad_norm": 2.3723456750080296, "learning_rate": 9.524015532403634e-08, "loss": 0.2564, "step": 46384 }, { "epoch": 0.8062890020685219, "grad_norm": 1.3657028639884674, "learning_rate": 9.522362969456038e-08, "loss": 0.2905, "step": 46385 }, { "epoch": 0.8063063846060248, "grad_norm": 1.702934521324455, "learning_rate": 9.520710534803955e-08, "loss": 0.2098, "step": 46386 }, { "epoch": 0.8063237671435276, "grad_norm": 2.206401575495625, "learning_rate": 9.519058228452615e-08, "loss": 0.2015, "step": 46387 }, { "epoch": 0.8063411496810304, "grad_norm": 2.0161086522299834, "learning_rate": 9.517406050407278e-08, "loss": 0.3398, "step": 46388 }, { "epoch": 0.8063585322185333, "grad_norm": 1.1865089778657765, "learning_rate": 9.515754000673165e-08, "loss": 0.1966, "step": 46389 }, { "epoch": 0.8063759147560361, "grad_norm": 1.683882740984249, "learning_rate": 9.514102079255515e-08, "loss": 0.2182, "step": 46390 }, { "epoch": 0.8063932972935389, "grad_norm": 1.9673182808221832, "learning_rate": 9.512450286159562e-08, "loss": 0.2471, "step": 46391 }, { "epoch": 0.8064106798310418, "grad_norm": 1.6305174739713386, "learning_rate": 9.510798621390547e-08, "loss": 0.2826, "step": 46392 }, { "epoch": 0.8064280623685446, "grad_norm": 1.1252136821336942, "learning_rate": 9.5091470849537e-08, "loss": 0.3723, "step": 46393 }, { "epoch": 0.8064454449060474, "grad_norm": 1.5370468767095193, "learning_rate": 9.507495676854261e-08, "loss": 0.2906, "step": 46394 }, { "epoch": 0.8064628274435502, "grad_norm": 1.1423536042893152, "learning_rate": 9.50584439709744e-08, "loss": 0.2178, "step": 46395 }, { "epoch": 0.8064802099810531, "grad_norm": 2.0543451149592173, "learning_rate": 9.50419324568852e-08, "loss": 0.1923, "step": 46396 }, { "epoch": 0.8064975925185559, "grad_norm": 2.079578859367711, "learning_rate": 9.502542222632693e-08, "loss": 0.2457, "step": 46397 }, { "epoch": 0.8065149750560587, "grad_norm": 1.4054233359794994, "learning_rate": 9.500891327935184e-08, "loss": 0.2689, "step": 46398 }, { "epoch": 0.8065323575935615, "grad_norm": 1.357250834241738, "learning_rate": 9.499240561601269e-08, "loss": 0.2814, "step": 46399 }, { "epoch": 0.8065497401310643, "grad_norm": 0.8919434849577565, "learning_rate": 9.497589923636151e-08, "loss": 0.2625, "step": 46400 }, { "epoch": 0.8065671226685671, "grad_norm": 1.5154447870094405, "learning_rate": 9.495939414045067e-08, "loss": 0.1831, "step": 46401 }, { "epoch": 0.80658450520607, "grad_norm": 1.1663304270926826, "learning_rate": 9.494289032833253e-08, "loss": 0.3962, "step": 46402 }, { "epoch": 0.8066018877435728, "grad_norm": 1.9410849397670538, "learning_rate": 9.492638780005929e-08, "loss": 0.2088, "step": 46403 }, { "epoch": 0.8066192702810756, "grad_norm": 1.2596620629400426, "learning_rate": 9.490988655568338e-08, "loss": 0.4692, "step": 46404 }, { "epoch": 0.8066366528185784, "grad_norm": 1.4801450959112006, "learning_rate": 9.489338659525709e-08, "loss": 0.2058, "step": 46405 }, { "epoch": 0.8066540353560813, "grad_norm": 1.3118884773368844, "learning_rate": 9.487688791883247e-08, "loss": 0.1583, "step": 46406 }, { "epoch": 0.8066714178935841, "grad_norm": 1.3771743596612747, "learning_rate": 9.486039052646216e-08, "loss": 0.1873, "step": 46407 }, { "epoch": 0.8066888004310869, "grad_norm": 1.9368760676576973, "learning_rate": 9.484389441819828e-08, "loss": 0.2029, "step": 46408 }, { "epoch": 0.8067061829685898, "grad_norm": 0.7983117789156633, "learning_rate": 9.482739959409336e-08, "loss": 0.1588, "step": 46409 }, { "epoch": 0.8067235655060926, "grad_norm": 1.9080541274032414, "learning_rate": 9.481090605419911e-08, "loss": 0.2055, "step": 46410 }, { "epoch": 0.8067409480435954, "grad_norm": 1.5847014301897226, "learning_rate": 9.479441379856834e-08, "loss": 0.2242, "step": 46411 }, { "epoch": 0.8067583305810982, "grad_norm": 1.9781146657325925, "learning_rate": 9.477792282725316e-08, "loss": 0.2715, "step": 46412 }, { "epoch": 0.8067757131186011, "grad_norm": 1.6663071151427937, "learning_rate": 9.476143314030577e-08, "loss": 0.3249, "step": 46413 }, { "epoch": 0.8067930956561039, "grad_norm": 1.3848961159279707, "learning_rate": 9.474494473777839e-08, "loss": 0.1322, "step": 46414 }, { "epoch": 0.8068104781936067, "grad_norm": 1.2424854661349785, "learning_rate": 9.472845761972359e-08, "loss": 0.1924, "step": 46415 }, { "epoch": 0.8068278607311096, "grad_norm": 1.2354186145973627, "learning_rate": 9.471197178619333e-08, "loss": 0.216, "step": 46416 }, { "epoch": 0.8068452432686124, "grad_norm": 1.6999923797562306, "learning_rate": 9.46954872372398e-08, "loss": 0.286, "step": 46417 }, { "epoch": 0.8068626258061152, "grad_norm": 1.4194161321414869, "learning_rate": 9.467900397291556e-08, "loss": 0.2516, "step": 46418 }, { "epoch": 0.806880008343618, "grad_norm": 1.5228794653115836, "learning_rate": 9.466252199327263e-08, "loss": 0.2422, "step": 46419 }, { "epoch": 0.8068973908811208, "grad_norm": 1.4386431802536097, "learning_rate": 9.464604129836335e-08, "loss": 0.1532, "step": 46420 }, { "epoch": 0.8069147734186236, "grad_norm": 0.9156095941132257, "learning_rate": 9.46295618882399e-08, "loss": 0.2022, "step": 46421 }, { "epoch": 0.8069321559561264, "grad_norm": 1.0527557990281298, "learning_rate": 9.461308376295451e-08, "loss": 0.2511, "step": 46422 }, { "epoch": 0.8069495384936293, "grad_norm": 1.0808660178989768, "learning_rate": 9.459660692255944e-08, "loss": 0.1547, "step": 46423 }, { "epoch": 0.8069669210311321, "grad_norm": 1.1754397389965408, "learning_rate": 9.458013136710691e-08, "loss": 0.3248, "step": 46424 }, { "epoch": 0.8069843035686349, "grad_norm": 1.8199162329306693, "learning_rate": 9.456365709664899e-08, "loss": 0.1952, "step": 46425 }, { "epoch": 0.8070016861061378, "grad_norm": 1.2076764301258507, "learning_rate": 9.454718411123814e-08, "loss": 0.1431, "step": 46426 }, { "epoch": 0.8070190686436406, "grad_norm": 1.03515547861582, "learning_rate": 9.453071241092647e-08, "loss": 0.2281, "step": 46427 }, { "epoch": 0.8070364511811434, "grad_norm": 1.0881024209630987, "learning_rate": 9.451424199576636e-08, "loss": 0.3024, "step": 46428 }, { "epoch": 0.8070538337186463, "grad_norm": 3.0502586163496646, "learning_rate": 9.449777286580951e-08, "loss": 0.452, "step": 46429 }, { "epoch": 0.8070712162561491, "grad_norm": 1.1974682992861898, "learning_rate": 9.448130502110862e-08, "loss": 0.3149, "step": 46430 }, { "epoch": 0.8070885987936519, "grad_norm": 1.9392227522517356, "learning_rate": 9.446483846171571e-08, "loss": 0.2721, "step": 46431 }, { "epoch": 0.8071059813311547, "grad_norm": 1.1682669313723886, "learning_rate": 9.444837318768301e-08, "loss": 0.1971, "step": 46432 }, { "epoch": 0.8071233638686576, "grad_norm": 2.1719693427511615, "learning_rate": 9.443190919906247e-08, "loss": 0.2642, "step": 46433 }, { "epoch": 0.8071407464061604, "grad_norm": 1.4125136179399664, "learning_rate": 9.441544649590677e-08, "loss": 0.1982, "step": 46434 }, { "epoch": 0.8071581289436632, "grad_norm": 0.8091011689333868, "learning_rate": 9.439898507826765e-08, "loss": 0.3365, "step": 46435 }, { "epoch": 0.8071755114811661, "grad_norm": 1.2478795220144343, "learning_rate": 9.438252494619748e-08, "loss": 0.2409, "step": 46436 }, { "epoch": 0.8071928940186689, "grad_norm": 1.215412200272534, "learning_rate": 9.436606609974823e-08, "loss": 0.2035, "step": 46437 }, { "epoch": 0.8072102765561717, "grad_norm": 1.589870503079166, "learning_rate": 9.434960853897233e-08, "loss": 0.2346, "step": 46438 }, { "epoch": 0.8072276590936744, "grad_norm": 1.1081093975199452, "learning_rate": 9.43331522639218e-08, "loss": 0.1642, "step": 46439 }, { "epoch": 0.8072450416311773, "grad_norm": 1.4984335095600896, "learning_rate": 9.431669727464885e-08, "loss": 0.1613, "step": 46440 }, { "epoch": 0.8072624241686801, "grad_norm": 1.569991368743707, "learning_rate": 9.430024357120564e-08, "loss": 0.1928, "step": 46441 }, { "epoch": 0.8072798067061829, "grad_norm": 2.1078339617792783, "learning_rate": 9.428379115364432e-08, "loss": 0.1838, "step": 46442 }, { "epoch": 0.8072971892436858, "grad_norm": 1.4436028793337092, "learning_rate": 9.426734002201698e-08, "loss": 0.2276, "step": 46443 }, { "epoch": 0.8073145717811886, "grad_norm": 1.4863464388055465, "learning_rate": 9.425089017637578e-08, "loss": 0.4205, "step": 46444 }, { "epoch": 0.8073319543186914, "grad_norm": 1.2408382970302527, "learning_rate": 9.423444161677274e-08, "loss": 0.1525, "step": 46445 }, { "epoch": 0.8073493368561943, "grad_norm": 2.381200530270984, "learning_rate": 9.421799434326028e-08, "loss": 0.251, "step": 46446 }, { "epoch": 0.8073667193936971, "grad_norm": 1.042572419561695, "learning_rate": 9.420154835589045e-08, "loss": 0.1901, "step": 46447 }, { "epoch": 0.8073841019311999, "grad_norm": 2.4359382414140205, "learning_rate": 9.41851036547151e-08, "loss": 0.2827, "step": 46448 }, { "epoch": 0.8074014844687027, "grad_norm": 1.6565795146493258, "learning_rate": 9.41686602397867e-08, "loss": 0.1735, "step": 46449 }, { "epoch": 0.8074188670062056, "grad_norm": 0.9914642398296861, "learning_rate": 9.415221811115714e-08, "loss": 0.2015, "step": 46450 }, { "epoch": 0.8074362495437084, "grad_norm": 0.941110241548424, "learning_rate": 9.413577726887873e-08, "loss": 0.2588, "step": 46451 }, { "epoch": 0.8074536320812112, "grad_norm": 1.7087830016158088, "learning_rate": 9.41193377130034e-08, "loss": 0.2312, "step": 46452 }, { "epoch": 0.8074710146187141, "grad_norm": 1.3021517009775434, "learning_rate": 9.410289944358329e-08, "loss": 0.3196, "step": 46453 }, { "epoch": 0.8074883971562169, "grad_norm": 1.6977571444616322, "learning_rate": 9.408646246067059e-08, "loss": 0.1708, "step": 46454 }, { "epoch": 0.8075057796937197, "grad_norm": 0.9829605477343033, "learning_rate": 9.407002676431736e-08, "loss": 0.2116, "step": 46455 }, { "epoch": 0.8075231622312226, "grad_norm": 1.9288232635570726, "learning_rate": 9.40535923545755e-08, "loss": 0.3206, "step": 46456 }, { "epoch": 0.8075405447687254, "grad_norm": 1.1900081771370812, "learning_rate": 9.40371592314974e-08, "loss": 0.186, "step": 46457 }, { "epoch": 0.8075579273062282, "grad_norm": 1.8769253726863142, "learning_rate": 9.402072739513505e-08, "loss": 0.2641, "step": 46458 }, { "epoch": 0.8075753098437309, "grad_norm": 1.0187046992881814, "learning_rate": 9.400429684554052e-08, "loss": 0.2005, "step": 46459 }, { "epoch": 0.8075926923812338, "grad_norm": 1.9932913106666208, "learning_rate": 9.398786758276583e-08, "loss": 0.1776, "step": 46460 }, { "epoch": 0.8076100749187366, "grad_norm": 2.116071924311508, "learning_rate": 9.39714396068631e-08, "loss": 0.2796, "step": 46461 }, { "epoch": 0.8076274574562394, "grad_norm": 1.9344696683237237, "learning_rate": 9.395501291788438e-08, "loss": 0.1353, "step": 46462 }, { "epoch": 0.8076448399937423, "grad_norm": 1.753137662105335, "learning_rate": 9.393858751588179e-08, "loss": 0.2375, "step": 46463 }, { "epoch": 0.8076622225312451, "grad_norm": 1.189013497635482, "learning_rate": 9.39221634009072e-08, "loss": 0.1426, "step": 46464 }, { "epoch": 0.8076796050687479, "grad_norm": 2.1006243859511744, "learning_rate": 9.390574057301292e-08, "loss": 0.2999, "step": 46465 }, { "epoch": 0.8076969876062507, "grad_norm": 1.2821902361334845, "learning_rate": 9.388931903225105e-08, "loss": 0.1835, "step": 46466 }, { "epoch": 0.8077143701437536, "grad_norm": 1.5499998861543987, "learning_rate": 9.387289877867339e-08, "loss": 0.2811, "step": 46467 }, { "epoch": 0.8077317526812564, "grad_norm": 1.5466406161987645, "learning_rate": 9.385647981233191e-08, "loss": 0.3116, "step": 46468 }, { "epoch": 0.8077491352187592, "grad_norm": 1.4781785737936848, "learning_rate": 9.384006213327894e-08, "loss": 0.2721, "step": 46469 }, { "epoch": 0.8077665177562621, "grad_norm": 2.0174696942117087, "learning_rate": 9.382364574156643e-08, "loss": 0.2358, "step": 46470 }, { "epoch": 0.8077839002937649, "grad_norm": 3.1339853005776486, "learning_rate": 9.380723063724638e-08, "loss": 0.224, "step": 46471 }, { "epoch": 0.8078012828312677, "grad_norm": 2.265375466067768, "learning_rate": 9.379081682037076e-08, "loss": 0.3964, "step": 46472 }, { "epoch": 0.8078186653687706, "grad_norm": 1.7548925316513075, "learning_rate": 9.377440429099171e-08, "loss": 0.3185, "step": 46473 }, { "epoch": 0.8078360479062734, "grad_norm": 1.2137353764918966, "learning_rate": 9.375799304916121e-08, "loss": 0.305, "step": 46474 }, { "epoch": 0.8078534304437762, "grad_norm": 1.4546015035554054, "learning_rate": 9.374158309493125e-08, "loss": 0.2109, "step": 46475 }, { "epoch": 0.807870812981279, "grad_norm": 0.9847391033559529, "learning_rate": 9.37251744283537e-08, "loss": 0.1933, "step": 46476 }, { "epoch": 0.8078881955187819, "grad_norm": 1.4081918835051626, "learning_rate": 9.370876704948083e-08, "loss": 0.299, "step": 46477 }, { "epoch": 0.8079055780562846, "grad_norm": 1.500051887770723, "learning_rate": 9.369236095836457e-08, "loss": 0.3065, "step": 46478 }, { "epoch": 0.8079229605937874, "grad_norm": 1.4356995235322552, "learning_rate": 9.367595615505686e-08, "loss": 0.2023, "step": 46479 }, { "epoch": 0.8079403431312903, "grad_norm": 1.4433624656540487, "learning_rate": 9.365955263960973e-08, "loss": 0.1702, "step": 46480 }, { "epoch": 0.8079577256687931, "grad_norm": 0.8063424855633838, "learning_rate": 9.364315041207515e-08, "loss": 0.3535, "step": 46481 }, { "epoch": 0.8079751082062959, "grad_norm": 2.7407439475611493, "learning_rate": 9.362674947250515e-08, "loss": 0.2107, "step": 46482 }, { "epoch": 0.8079924907437988, "grad_norm": 1.1959006874152045, "learning_rate": 9.361034982095168e-08, "loss": 0.2347, "step": 46483 }, { "epoch": 0.8080098732813016, "grad_norm": 4.347719588629617, "learning_rate": 9.359395145746657e-08, "loss": 0.3091, "step": 46484 }, { "epoch": 0.8080272558188044, "grad_norm": 1.1590285667339941, "learning_rate": 9.357755438210219e-08, "loss": 0.2001, "step": 46485 }, { "epoch": 0.8080446383563072, "grad_norm": 1.4120107125132728, "learning_rate": 9.356115859491015e-08, "loss": 0.2547, "step": 46486 }, { "epoch": 0.8080620208938101, "grad_norm": 1.171317702317038, "learning_rate": 9.354476409594242e-08, "loss": 0.282, "step": 46487 }, { "epoch": 0.8080794034313129, "grad_norm": 1.2321751998835482, "learning_rate": 9.352837088525117e-08, "loss": 0.2025, "step": 46488 }, { "epoch": 0.8080967859688157, "grad_norm": 1.1009444300233695, "learning_rate": 9.351197896288831e-08, "loss": 0.2949, "step": 46489 }, { "epoch": 0.8081141685063186, "grad_norm": 1.7070711909732184, "learning_rate": 9.349558832890575e-08, "loss": 0.2302, "step": 46490 }, { "epoch": 0.8081315510438214, "grad_norm": 1.5909740456577874, "learning_rate": 9.347919898335544e-08, "loss": 0.3713, "step": 46491 }, { "epoch": 0.8081489335813242, "grad_norm": 1.3254814197780267, "learning_rate": 9.346281092628932e-08, "loss": 0.2789, "step": 46492 }, { "epoch": 0.808166316118827, "grad_norm": 0.8786864589943533, "learning_rate": 9.344642415775934e-08, "loss": 0.2268, "step": 46493 }, { "epoch": 0.8081836986563299, "grad_norm": 1.3073898684332639, "learning_rate": 9.34300386778174e-08, "loss": 0.2721, "step": 46494 }, { "epoch": 0.8082010811938327, "grad_norm": 1.2782948209341507, "learning_rate": 9.341365448651539e-08, "loss": 0.1289, "step": 46495 }, { "epoch": 0.8082184637313355, "grad_norm": 1.438548382150589, "learning_rate": 9.339727158390547e-08, "loss": 0.3145, "step": 46496 }, { "epoch": 0.8082358462688384, "grad_norm": 2.6809829783510657, "learning_rate": 9.33808899700394e-08, "loss": 0.2431, "step": 46497 }, { "epoch": 0.8082532288063411, "grad_norm": 1.0458003483461555, "learning_rate": 9.336450964496922e-08, "loss": 0.1907, "step": 46498 }, { "epoch": 0.8082706113438439, "grad_norm": 2.087546976858505, "learning_rate": 9.334813060874652e-08, "loss": 0.2093, "step": 46499 }, { "epoch": 0.8082879938813468, "grad_norm": 1.42506124747677, "learning_rate": 9.333175286142358e-08, "loss": 0.3373, "step": 46500 }, { "epoch": 0.8083053764188496, "grad_norm": 2.237726614328289, "learning_rate": 9.331537640305221e-08, "loss": 0.251, "step": 46501 }, { "epoch": 0.8083227589563524, "grad_norm": 1.4295659401468397, "learning_rate": 9.329900123368422e-08, "loss": 0.2554, "step": 46502 }, { "epoch": 0.8083401414938552, "grad_norm": 2.231918401404629, "learning_rate": 9.32826273533715e-08, "loss": 0.2793, "step": 46503 }, { "epoch": 0.8083575240313581, "grad_norm": 1.0561931633324741, "learning_rate": 9.32662547621662e-08, "loss": 0.3025, "step": 46504 }, { "epoch": 0.8083749065688609, "grad_norm": 1.7395999741163604, "learning_rate": 9.324988346011997e-08, "loss": 0.2408, "step": 46505 }, { "epoch": 0.8083922891063637, "grad_norm": 1.020781104437388, "learning_rate": 9.323351344728475e-08, "loss": 0.2187, "step": 46506 }, { "epoch": 0.8084096716438666, "grad_norm": 1.0505815623731087, "learning_rate": 9.321714472371233e-08, "loss": 0.3105, "step": 46507 }, { "epoch": 0.8084270541813694, "grad_norm": 1.6560412792662376, "learning_rate": 9.32007772894548e-08, "loss": 0.2075, "step": 46508 }, { "epoch": 0.8084444367188722, "grad_norm": 1.6271100831088845, "learning_rate": 9.318441114456394e-08, "loss": 0.252, "step": 46509 }, { "epoch": 0.8084618192563751, "grad_norm": 1.4348658788917232, "learning_rate": 9.316804628909164e-08, "loss": 0.316, "step": 46510 }, { "epoch": 0.8084792017938779, "grad_norm": 1.0516312629662032, "learning_rate": 9.315168272308971e-08, "loss": 0.2659, "step": 46511 }, { "epoch": 0.8084965843313807, "grad_norm": 1.490353796476616, "learning_rate": 9.31353204466101e-08, "loss": 0.2473, "step": 46512 }, { "epoch": 0.8085139668688835, "grad_norm": 1.6867377045439644, "learning_rate": 9.31189594597046e-08, "loss": 0.2317, "step": 46513 }, { "epoch": 0.8085313494063864, "grad_norm": 0.9962385857121906, "learning_rate": 9.310259976242495e-08, "loss": 0.2489, "step": 46514 }, { "epoch": 0.8085487319438892, "grad_norm": 1.8370691106508152, "learning_rate": 9.30862413548233e-08, "loss": 0.2488, "step": 46515 }, { "epoch": 0.808566114481392, "grad_norm": 1.2635924748227956, "learning_rate": 9.306988423695145e-08, "loss": 0.3069, "step": 46516 }, { "epoch": 0.8085834970188949, "grad_norm": 1.6702585332385618, "learning_rate": 9.305352840886105e-08, "loss": 0.2923, "step": 46517 }, { "epoch": 0.8086008795563976, "grad_norm": 1.5453969806829022, "learning_rate": 9.30371738706039e-08, "loss": 0.2488, "step": 46518 }, { "epoch": 0.8086182620939004, "grad_norm": 1.3502333877843375, "learning_rate": 9.302082062223205e-08, "loss": 0.1244, "step": 46519 }, { "epoch": 0.8086356446314032, "grad_norm": 1.6105055277910518, "learning_rate": 9.300446866379724e-08, "loss": 0.1939, "step": 46520 }, { "epoch": 0.8086530271689061, "grad_norm": 2.3026902881922613, "learning_rate": 9.298811799535133e-08, "loss": 0.2725, "step": 46521 }, { "epoch": 0.8086704097064089, "grad_norm": 1.6611502794228623, "learning_rate": 9.2971768616946e-08, "loss": 0.1793, "step": 46522 }, { "epoch": 0.8086877922439117, "grad_norm": 2.262089676480039, "learning_rate": 9.295542052863348e-08, "loss": 0.2475, "step": 46523 }, { "epoch": 0.8087051747814146, "grad_norm": 1.3089565934715097, "learning_rate": 9.293907373046512e-08, "loss": 0.2111, "step": 46524 }, { "epoch": 0.8087225573189174, "grad_norm": 2.0664014409610907, "learning_rate": 9.292272822249292e-08, "loss": 0.2067, "step": 46525 }, { "epoch": 0.8087399398564202, "grad_norm": 1.1182410528783904, "learning_rate": 9.290638400476852e-08, "loss": 0.1585, "step": 46526 }, { "epoch": 0.8087573223939231, "grad_norm": 1.7042347250878835, "learning_rate": 9.289004107734404e-08, "loss": 0.1764, "step": 46527 }, { "epoch": 0.8087747049314259, "grad_norm": 2.068556553945571, "learning_rate": 9.287369944027112e-08, "loss": 0.453, "step": 46528 }, { "epoch": 0.8087920874689287, "grad_norm": 1.6604355902009251, "learning_rate": 9.285735909360154e-08, "loss": 0.2495, "step": 46529 }, { "epoch": 0.8088094700064316, "grad_norm": 1.3551624182753628, "learning_rate": 9.284102003738709e-08, "loss": 0.2316, "step": 46530 }, { "epoch": 0.8088268525439344, "grad_norm": 1.6041210977072666, "learning_rate": 9.282468227167961e-08, "loss": 0.2594, "step": 46531 }, { "epoch": 0.8088442350814372, "grad_norm": 1.316108297620251, "learning_rate": 9.280834579653084e-08, "loss": 0.1894, "step": 46532 }, { "epoch": 0.80886161761894, "grad_norm": 1.12677441680687, "learning_rate": 9.279201061199254e-08, "loss": 0.1738, "step": 46533 }, { "epoch": 0.8088790001564429, "grad_norm": 1.3554802235352295, "learning_rate": 9.277567671811642e-08, "loss": 0.2873, "step": 46534 }, { "epoch": 0.8088963826939457, "grad_norm": 3.5115627600203556, "learning_rate": 9.275934411495457e-08, "loss": 0.2977, "step": 46535 }, { "epoch": 0.8089137652314485, "grad_norm": 1.861849057451871, "learning_rate": 9.274301280255841e-08, "loss": 0.1664, "step": 46536 }, { "epoch": 0.8089311477689514, "grad_norm": 1.0577737817085584, "learning_rate": 9.272668278097967e-08, "loss": 0.276, "step": 46537 }, { "epoch": 0.8089485303064541, "grad_norm": 1.2016148873459387, "learning_rate": 9.271035405027039e-08, "loss": 0.1803, "step": 46538 }, { "epoch": 0.8089659128439569, "grad_norm": 1.3830546391724472, "learning_rate": 9.26940266104822e-08, "loss": 0.1701, "step": 46539 }, { "epoch": 0.8089832953814597, "grad_norm": 1.7385020120633807, "learning_rate": 9.267770046166689e-08, "loss": 0.2357, "step": 46540 }, { "epoch": 0.8090006779189626, "grad_norm": 1.5474054154639552, "learning_rate": 9.266137560387611e-08, "loss": 0.1482, "step": 46541 }, { "epoch": 0.8090180604564654, "grad_norm": 1.3769703885062854, "learning_rate": 9.264505203716166e-08, "loss": 0.2468, "step": 46542 }, { "epoch": 0.8090354429939682, "grad_norm": 2.1434241661457296, "learning_rate": 9.262872976157531e-08, "loss": 0.2216, "step": 46543 }, { "epoch": 0.8090528255314711, "grad_norm": 1.4317755843665794, "learning_rate": 9.26124087771687e-08, "loss": 0.1911, "step": 46544 }, { "epoch": 0.8090702080689739, "grad_norm": 1.6985324757037712, "learning_rate": 9.259608908399352e-08, "loss": 0.2396, "step": 46545 }, { "epoch": 0.8090875906064767, "grad_norm": 1.2486319349623318, "learning_rate": 9.257977068210171e-08, "loss": 0.1399, "step": 46546 }, { "epoch": 0.8091049731439796, "grad_norm": 1.1810948229497233, "learning_rate": 9.256345357154489e-08, "loss": 0.2139, "step": 46547 }, { "epoch": 0.8091223556814824, "grad_norm": 0.853840819414322, "learning_rate": 9.254713775237488e-08, "loss": 0.2872, "step": 46548 }, { "epoch": 0.8091397382189852, "grad_norm": 0.8578266861916318, "learning_rate": 9.2530823224643e-08, "loss": 0.3047, "step": 46549 }, { "epoch": 0.809157120756488, "grad_norm": 1.5071472362133327, "learning_rate": 9.251450998840138e-08, "loss": 0.1416, "step": 46550 }, { "epoch": 0.8091745032939909, "grad_norm": 1.4939020232700837, "learning_rate": 9.249819804370157e-08, "loss": 0.173, "step": 46551 }, { "epoch": 0.8091918858314937, "grad_norm": 2.1845764583301133, "learning_rate": 9.248188739059531e-08, "loss": 0.3608, "step": 46552 }, { "epoch": 0.8092092683689965, "grad_norm": 4.356766713660382, "learning_rate": 9.246557802913408e-08, "loss": 0.269, "step": 46553 }, { "epoch": 0.8092266509064994, "grad_norm": 1.2904382722350733, "learning_rate": 9.244926995937008e-08, "loss": 0.1629, "step": 46554 }, { "epoch": 0.8092440334440022, "grad_norm": 1.493251707175573, "learning_rate": 9.243296318135452e-08, "loss": 0.1527, "step": 46555 }, { "epoch": 0.809261415981505, "grad_norm": 1.4182204563589982, "learning_rate": 9.241665769513923e-08, "loss": 0.1419, "step": 46556 }, { "epoch": 0.8092787985190079, "grad_norm": 1.5549315165076343, "learning_rate": 9.240035350077585e-08, "loss": 0.238, "step": 46557 }, { "epoch": 0.8092961810565106, "grad_norm": 1.8698030532973382, "learning_rate": 9.238405059831616e-08, "loss": 0.2334, "step": 46558 }, { "epoch": 0.8093135635940134, "grad_norm": 1.5231375182993339, "learning_rate": 9.236774898781186e-08, "loss": 0.3031, "step": 46559 }, { "epoch": 0.8093309461315162, "grad_norm": 1.51045766996709, "learning_rate": 9.235144866931449e-08, "loss": 0.1394, "step": 46560 }, { "epoch": 0.8093483286690191, "grad_norm": 1.873846039918826, "learning_rate": 9.233514964287576e-08, "loss": 0.3134, "step": 46561 }, { "epoch": 0.8093657112065219, "grad_norm": 1.5195652451972275, "learning_rate": 9.231885190854737e-08, "loss": 0.2339, "step": 46562 }, { "epoch": 0.8093830937440247, "grad_norm": 1.2199259510791691, "learning_rate": 9.230255546638094e-08, "loss": 0.1545, "step": 46563 }, { "epoch": 0.8094004762815276, "grad_norm": 2.5074568477128762, "learning_rate": 9.228626031642811e-08, "loss": 0.4363, "step": 46564 }, { "epoch": 0.8094178588190304, "grad_norm": 1.6187234156090529, "learning_rate": 9.226996645874047e-08, "loss": 0.2479, "step": 46565 }, { "epoch": 0.8094352413565332, "grad_norm": 0.9868233230290888, "learning_rate": 9.22536738933698e-08, "loss": 0.2175, "step": 46566 }, { "epoch": 0.809452623894036, "grad_norm": 1.0216109505331108, "learning_rate": 9.223738262036785e-08, "loss": 0.2864, "step": 46567 }, { "epoch": 0.8094700064315389, "grad_norm": 1.619881027826241, "learning_rate": 9.222109263978584e-08, "loss": 0.1392, "step": 46568 }, { "epoch": 0.8094873889690417, "grad_norm": 1.502899953406789, "learning_rate": 9.220480395167574e-08, "loss": 0.2346, "step": 46569 }, { "epoch": 0.8095047715065445, "grad_norm": 1.8033290132166393, "learning_rate": 9.218851655608912e-08, "loss": 0.3337, "step": 46570 }, { "epoch": 0.8095221540440474, "grad_norm": 0.9776997027467365, "learning_rate": 9.217223045307754e-08, "loss": 0.1411, "step": 46571 }, { "epoch": 0.8095395365815502, "grad_norm": 0.8524163848311693, "learning_rate": 9.215594564269269e-08, "loss": 0.1647, "step": 46572 }, { "epoch": 0.809556919119053, "grad_norm": 1.701096653846041, "learning_rate": 9.213966212498609e-08, "loss": 0.2686, "step": 46573 }, { "epoch": 0.8095743016565559, "grad_norm": 0.9429879338837789, "learning_rate": 9.212337990000945e-08, "loss": 0.1672, "step": 46574 }, { "epoch": 0.8095916841940587, "grad_norm": 1.4929482898303255, "learning_rate": 9.210709896781432e-08, "loss": 0.2372, "step": 46575 }, { "epoch": 0.8096090667315615, "grad_norm": 2.6154216175924727, "learning_rate": 9.20908193284522e-08, "loss": 0.2609, "step": 46576 }, { "epoch": 0.8096264492690644, "grad_norm": 1.9860634055545374, "learning_rate": 9.207454098197487e-08, "loss": 0.2481, "step": 46577 }, { "epoch": 0.8096438318065671, "grad_norm": 1.726030389973147, "learning_rate": 9.205826392843392e-08, "loss": 0.2082, "step": 46578 }, { "epoch": 0.8096612143440699, "grad_norm": 1.1895997543793548, "learning_rate": 9.204198816788089e-08, "loss": 0.281, "step": 46579 }, { "epoch": 0.8096785968815727, "grad_norm": 1.2265556624426524, "learning_rate": 9.202571370036732e-08, "loss": 0.199, "step": 46580 }, { "epoch": 0.8096959794190756, "grad_norm": 2.531034827849607, "learning_rate": 9.200944052594484e-08, "loss": 0.2487, "step": 46581 }, { "epoch": 0.8097133619565784, "grad_norm": 1.3233413919865389, "learning_rate": 9.199316864466499e-08, "loss": 0.2091, "step": 46582 }, { "epoch": 0.8097307444940812, "grad_norm": 1.5729335375890967, "learning_rate": 9.197689805657944e-08, "loss": 0.3407, "step": 46583 }, { "epoch": 0.809748127031584, "grad_norm": 1.1687413672468296, "learning_rate": 9.196062876173955e-08, "loss": 0.2183, "step": 46584 }, { "epoch": 0.8097655095690869, "grad_norm": 1.8093830693670627, "learning_rate": 9.194436076019713e-08, "loss": 0.1872, "step": 46585 }, { "epoch": 0.8097828921065897, "grad_norm": 1.5020175480050335, "learning_rate": 9.192809405200374e-08, "loss": 0.1362, "step": 46586 }, { "epoch": 0.8098002746440925, "grad_norm": 1.703051335124407, "learning_rate": 9.191182863721076e-08, "loss": 0.2938, "step": 46587 }, { "epoch": 0.8098176571815954, "grad_norm": 1.6056341271919583, "learning_rate": 9.189556451586966e-08, "loss": 0.2193, "step": 46588 }, { "epoch": 0.8098350397190982, "grad_norm": 2.1900655667468403, "learning_rate": 9.187930168803232e-08, "loss": 0.2949, "step": 46589 }, { "epoch": 0.809852422256601, "grad_norm": 1.603199723953957, "learning_rate": 9.186304015375007e-08, "loss": 0.3548, "step": 46590 }, { "epoch": 0.8098698047941039, "grad_norm": 0.7453899097433321, "learning_rate": 9.184677991307455e-08, "loss": 0.1022, "step": 46591 }, { "epoch": 0.8098871873316067, "grad_norm": 0.9426071845205142, "learning_rate": 9.18305209660572e-08, "loss": 0.2944, "step": 46592 }, { "epoch": 0.8099045698691095, "grad_norm": 1.7356393529899938, "learning_rate": 9.181426331274966e-08, "loss": 0.2868, "step": 46593 }, { "epoch": 0.8099219524066124, "grad_norm": 1.4906384336192158, "learning_rate": 9.179800695320333e-08, "loss": 0.2507, "step": 46594 }, { "epoch": 0.8099393349441152, "grad_norm": 1.3679789664144202, "learning_rate": 9.178175188746984e-08, "loss": 0.2563, "step": 46595 }, { "epoch": 0.809956717481618, "grad_norm": 1.9617806385048284, "learning_rate": 9.176549811560058e-08, "loss": 0.2595, "step": 46596 }, { "epoch": 0.8099741000191208, "grad_norm": 1.0760826826405219, "learning_rate": 9.174924563764725e-08, "loss": 0.1576, "step": 46597 }, { "epoch": 0.8099914825566236, "grad_norm": 1.226911829744228, "learning_rate": 9.173299445366129e-08, "loss": 0.1908, "step": 46598 }, { "epoch": 0.8100088650941264, "grad_norm": 0.7251822379107273, "learning_rate": 9.171674456369416e-08, "loss": 0.2783, "step": 46599 }, { "epoch": 0.8100262476316292, "grad_norm": 1.8717191993815208, "learning_rate": 9.170049596779744e-08, "loss": 0.2476, "step": 46600 }, { "epoch": 0.8100436301691321, "grad_norm": 1.7485574569060867, "learning_rate": 9.168424866602259e-08, "loss": 0.2497, "step": 46601 }, { "epoch": 0.8100610127066349, "grad_norm": 1.3783364551042228, "learning_rate": 9.166800265842112e-08, "loss": 0.1756, "step": 46602 }, { "epoch": 0.8100783952441377, "grad_norm": 1.2938878742790154, "learning_rate": 9.165175794504437e-08, "loss": 0.22, "step": 46603 }, { "epoch": 0.8100957777816405, "grad_norm": 1.939692585932063, "learning_rate": 9.163551452594409e-08, "loss": 0.175, "step": 46604 }, { "epoch": 0.8101131603191434, "grad_norm": 1.3097910485252613, "learning_rate": 9.161927240117173e-08, "loss": 0.2354, "step": 46605 }, { "epoch": 0.8101305428566462, "grad_norm": 1.6525245256469523, "learning_rate": 9.160303157077859e-08, "loss": 0.1855, "step": 46606 }, { "epoch": 0.810147925394149, "grad_norm": 1.4027402371934239, "learning_rate": 9.158679203481612e-08, "loss": 0.1646, "step": 46607 }, { "epoch": 0.8101653079316519, "grad_norm": 1.453963383816455, "learning_rate": 9.157055379333601e-08, "loss": 0.1458, "step": 46608 }, { "epoch": 0.8101826904691547, "grad_norm": 1.8660657451467515, "learning_rate": 9.155431684638965e-08, "loss": 0.1947, "step": 46609 }, { "epoch": 0.8102000730066575, "grad_norm": 1.7849488950801973, "learning_rate": 9.15380811940285e-08, "loss": 0.2066, "step": 46610 }, { "epoch": 0.8102174555441604, "grad_norm": 1.7278078401090067, "learning_rate": 9.152184683630398e-08, "loss": 0.2667, "step": 46611 }, { "epoch": 0.8102348380816632, "grad_norm": 2.1486430915727297, "learning_rate": 9.150561377326755e-08, "loss": 0.2535, "step": 46612 }, { "epoch": 0.810252220619166, "grad_norm": 0.6518918210757, "learning_rate": 9.14893820049707e-08, "loss": 0.2022, "step": 46613 }, { "epoch": 0.8102696031566688, "grad_norm": 1.3363703319181175, "learning_rate": 9.147315153146485e-08, "loss": 0.174, "step": 46614 }, { "epoch": 0.8102869856941717, "grad_norm": 1.429833415463651, "learning_rate": 9.145692235280128e-08, "loss": 0.1692, "step": 46615 }, { "epoch": 0.8103043682316745, "grad_norm": 1.1953120094011824, "learning_rate": 9.144069446903174e-08, "loss": 0.2223, "step": 46616 }, { "epoch": 0.8103217507691772, "grad_norm": 1.1741493357961084, "learning_rate": 9.142446788020757e-08, "loss": 0.1967, "step": 46617 }, { "epoch": 0.8103391333066801, "grad_norm": 1.508025361192844, "learning_rate": 9.140824258638014e-08, "loss": 0.1906, "step": 46618 }, { "epoch": 0.8103565158441829, "grad_norm": 1.8368189184074621, "learning_rate": 9.139201858760081e-08, "loss": 0.2307, "step": 46619 }, { "epoch": 0.8103738983816857, "grad_norm": 1.1964227982340667, "learning_rate": 9.137579588392119e-08, "loss": 0.2498, "step": 46620 }, { "epoch": 0.8103912809191886, "grad_norm": 0.839691252190267, "learning_rate": 9.135957447539255e-08, "loss": 0.2175, "step": 46621 }, { "epoch": 0.8104086634566914, "grad_norm": 2.283687004880747, "learning_rate": 9.134335436206631e-08, "loss": 0.2079, "step": 46622 }, { "epoch": 0.8104260459941942, "grad_norm": 2.0085065074220996, "learning_rate": 9.132713554399386e-08, "loss": 0.1656, "step": 46623 }, { "epoch": 0.810443428531697, "grad_norm": 1.7196260054116914, "learning_rate": 9.131091802122681e-08, "loss": 0.1459, "step": 46624 }, { "epoch": 0.8104608110691999, "grad_norm": 1.6652223470425551, "learning_rate": 9.129470179381638e-08, "loss": 0.2238, "step": 46625 }, { "epoch": 0.8104781936067027, "grad_norm": 1.271865372959276, "learning_rate": 9.127848686181383e-08, "loss": 0.2064, "step": 46626 }, { "epoch": 0.8104955761442055, "grad_norm": 0.8674444383921467, "learning_rate": 9.126227322527086e-08, "loss": 0.1684, "step": 46627 }, { "epoch": 0.8105129586817084, "grad_norm": 2.2443741282050524, "learning_rate": 9.124606088423875e-08, "loss": 0.2005, "step": 46628 }, { "epoch": 0.8105303412192112, "grad_norm": 1.2939151918572003, "learning_rate": 9.122984983876886e-08, "loss": 0.2204, "step": 46629 }, { "epoch": 0.810547723756714, "grad_norm": 0.9757906500727775, "learning_rate": 9.121364008891252e-08, "loss": 0.2018, "step": 46630 }, { "epoch": 0.8105651062942169, "grad_norm": 1.8907979606727843, "learning_rate": 9.119743163472121e-08, "loss": 0.1441, "step": 46631 }, { "epoch": 0.8105824888317197, "grad_norm": 0.7782158839583837, "learning_rate": 9.118122447624621e-08, "loss": 0.3512, "step": 46632 }, { "epoch": 0.8105998713692225, "grad_norm": 1.1555860210925806, "learning_rate": 9.116501861353898e-08, "loss": 0.2905, "step": 46633 }, { "epoch": 0.8106172539067253, "grad_norm": 1.5195378048766863, "learning_rate": 9.11488140466507e-08, "loss": 0.2122, "step": 46634 }, { "epoch": 0.8106346364442282, "grad_norm": 1.4379023815353484, "learning_rate": 9.113261077563294e-08, "loss": 0.1973, "step": 46635 }, { "epoch": 0.810652018981731, "grad_norm": 2.178002661118418, "learning_rate": 9.111640880053717e-08, "loss": 0.3539, "step": 46636 }, { "epoch": 0.8106694015192337, "grad_norm": 1.6440550605902198, "learning_rate": 9.110020812141445e-08, "loss": 0.34, "step": 46637 }, { "epoch": 0.8106867840567366, "grad_norm": 0.8964517405253437, "learning_rate": 9.108400873831607e-08, "loss": 0.1489, "step": 46638 }, { "epoch": 0.8107041665942394, "grad_norm": 1.5797093035998184, "learning_rate": 9.106781065129366e-08, "loss": 0.2878, "step": 46639 }, { "epoch": 0.8107215491317422, "grad_norm": 2.170452335396817, "learning_rate": 9.10516138603985e-08, "loss": 0.2024, "step": 46640 }, { "epoch": 0.810738931669245, "grad_norm": 0.9470380650946332, "learning_rate": 9.10354183656818e-08, "loss": 0.101, "step": 46641 }, { "epoch": 0.8107563142067479, "grad_norm": 4.232924854471311, "learning_rate": 9.101922416719488e-08, "loss": 0.2518, "step": 46642 }, { "epoch": 0.8107736967442507, "grad_norm": 1.3220663808858069, "learning_rate": 9.100303126498938e-08, "loss": 0.1615, "step": 46643 }, { "epoch": 0.8107910792817535, "grad_norm": 1.6439717384669885, "learning_rate": 9.098683965911624e-08, "loss": 0.2364, "step": 46644 }, { "epoch": 0.8108084618192564, "grad_norm": 1.199799797715068, "learning_rate": 9.097064934962701e-08, "loss": 0.3324, "step": 46645 }, { "epoch": 0.8108258443567592, "grad_norm": 1.9896506932901659, "learning_rate": 9.095446033657272e-08, "loss": 0.15, "step": 46646 }, { "epoch": 0.810843226894262, "grad_norm": 0.9618481078680986, "learning_rate": 9.093827262000509e-08, "loss": 0.1426, "step": 46647 }, { "epoch": 0.8108606094317649, "grad_norm": 0.9315652935886866, "learning_rate": 9.092208619997515e-08, "loss": 0.0777, "step": 46648 }, { "epoch": 0.8108779919692677, "grad_norm": 1.5426723681932621, "learning_rate": 9.090590107653434e-08, "loss": 0.2308, "step": 46649 }, { "epoch": 0.8108953745067705, "grad_norm": 1.4528490886303786, "learning_rate": 9.088971724973388e-08, "loss": 0.2262, "step": 46650 }, { "epoch": 0.8109127570442733, "grad_norm": 1.6183035994726356, "learning_rate": 9.087353471962511e-08, "loss": 0.1576, "step": 46651 }, { "epoch": 0.8109301395817762, "grad_norm": 1.8351116147521551, "learning_rate": 9.085735348625928e-08, "loss": 0.4775, "step": 46652 }, { "epoch": 0.810947522119279, "grad_norm": 1.179632503751096, "learning_rate": 9.084117354968767e-08, "loss": 0.1785, "step": 46653 }, { "epoch": 0.8109649046567818, "grad_norm": 0.9827229428910599, "learning_rate": 9.082499490996149e-08, "loss": 0.2607, "step": 46654 }, { "epoch": 0.8109822871942847, "grad_norm": 1.2470796971319484, "learning_rate": 9.080881756713238e-08, "loss": 0.3565, "step": 46655 }, { "epoch": 0.8109996697317875, "grad_norm": 0.9061993695175755, "learning_rate": 9.079264152125122e-08, "loss": 0.1965, "step": 46656 }, { "epoch": 0.8110170522692902, "grad_norm": 0.7665923307083495, "learning_rate": 9.077646677236927e-08, "loss": 0.213, "step": 46657 }, { "epoch": 0.811034434806793, "grad_norm": 1.2627444261664493, "learning_rate": 9.076029332053802e-08, "loss": 0.1782, "step": 46658 }, { "epoch": 0.8110518173442959, "grad_norm": 1.3791960202650586, "learning_rate": 9.074412116580871e-08, "loss": 0.2016, "step": 46659 }, { "epoch": 0.8110691998817987, "grad_norm": 1.7122404308905166, "learning_rate": 9.072795030823254e-08, "loss": 0.1671, "step": 46660 }, { "epoch": 0.8110865824193015, "grad_norm": 4.485268994893123, "learning_rate": 9.071178074786079e-08, "loss": 0.2174, "step": 46661 }, { "epoch": 0.8111039649568044, "grad_norm": 1.3534185235830438, "learning_rate": 9.069561248474461e-08, "loss": 0.1847, "step": 46662 }, { "epoch": 0.8111213474943072, "grad_norm": 1.4079357443499834, "learning_rate": 9.067944551893536e-08, "loss": 0.3848, "step": 46663 }, { "epoch": 0.81113873003181, "grad_norm": 1.330074204056233, "learning_rate": 9.066327985048422e-08, "loss": 0.2178, "step": 46664 }, { "epoch": 0.8111561125693129, "grad_norm": 2.055603273384764, "learning_rate": 9.064711547944236e-08, "loss": 0.2974, "step": 46665 }, { "epoch": 0.8111734951068157, "grad_norm": 1.6632989605488417, "learning_rate": 9.063095240586122e-08, "loss": 0.199, "step": 46666 }, { "epoch": 0.8111908776443185, "grad_norm": 1.2160200484126253, "learning_rate": 9.061479062979188e-08, "loss": 0.2173, "step": 46667 }, { "epoch": 0.8112082601818214, "grad_norm": 1.2751401109032487, "learning_rate": 9.059863015128577e-08, "loss": 0.304, "step": 46668 }, { "epoch": 0.8112256427193242, "grad_norm": 1.281750508539747, "learning_rate": 9.058247097039362e-08, "loss": 0.2736, "step": 46669 }, { "epoch": 0.811243025256827, "grad_norm": 1.087146227764883, "learning_rate": 9.056631308716717e-08, "loss": 0.2103, "step": 46670 }, { "epoch": 0.8112604077943298, "grad_norm": 0.951086664622265, "learning_rate": 9.055015650165738e-08, "loss": 0.2143, "step": 46671 }, { "epoch": 0.8112777903318327, "grad_norm": 1.7266905360723237, "learning_rate": 9.053400121391552e-08, "loss": 0.1497, "step": 46672 }, { "epoch": 0.8112951728693355, "grad_norm": 1.789687625909814, "learning_rate": 9.051784722399264e-08, "loss": 0.1395, "step": 46673 }, { "epoch": 0.8113125554068383, "grad_norm": 0.8287172660983453, "learning_rate": 9.050169453194034e-08, "loss": 0.2295, "step": 46674 }, { "epoch": 0.8113299379443412, "grad_norm": 1.4288773417816882, "learning_rate": 9.048554313780948e-08, "loss": 0.3291, "step": 46675 }, { "epoch": 0.811347320481844, "grad_norm": 2.4977888696763153, "learning_rate": 9.04693930416513e-08, "loss": 0.2348, "step": 46676 }, { "epoch": 0.8113647030193467, "grad_norm": 1.6629391307217645, "learning_rate": 9.045324424351686e-08, "loss": 0.2257, "step": 46677 }, { "epoch": 0.8113820855568495, "grad_norm": 1.5131652183917144, "learning_rate": 9.043709674345767e-08, "loss": 0.2347, "step": 46678 }, { "epoch": 0.8113994680943524, "grad_norm": 1.1706944125851746, "learning_rate": 9.042095054152476e-08, "loss": 0.3567, "step": 46679 }, { "epoch": 0.8114168506318552, "grad_norm": 1.274090037455984, "learning_rate": 9.040480563776925e-08, "loss": 0.194, "step": 46680 }, { "epoch": 0.811434233169358, "grad_norm": 1.1858375671883068, "learning_rate": 9.038866203224238e-08, "loss": 0.2716, "step": 46681 }, { "epoch": 0.8114516157068609, "grad_norm": 2.0876408455768987, "learning_rate": 9.037251972499533e-08, "loss": 0.2282, "step": 46682 }, { "epoch": 0.8114689982443637, "grad_norm": 2.3731333159875305, "learning_rate": 9.035637871607915e-08, "loss": 0.266, "step": 46683 }, { "epoch": 0.8114863807818665, "grad_norm": 1.0089446892636835, "learning_rate": 9.034023900554511e-08, "loss": 0.2046, "step": 46684 }, { "epoch": 0.8115037633193694, "grad_norm": 2.844663030470842, "learning_rate": 9.03241005934442e-08, "loss": 0.2499, "step": 46685 }, { "epoch": 0.8115211458568722, "grad_norm": 2.0795640165504476, "learning_rate": 9.030796347982783e-08, "loss": 0.1682, "step": 46686 }, { "epoch": 0.811538528394375, "grad_norm": 1.2092553472716003, "learning_rate": 9.029182766474714e-08, "loss": 0.1849, "step": 46687 }, { "epoch": 0.8115559109318778, "grad_norm": 1.1782532887902715, "learning_rate": 9.02756931482529e-08, "loss": 0.1501, "step": 46688 }, { "epoch": 0.8115732934693807, "grad_norm": 1.5270933890138223, "learning_rate": 9.025955993039664e-08, "loss": 0.2309, "step": 46689 }, { "epoch": 0.8115906760068835, "grad_norm": 1.490877011389188, "learning_rate": 9.024342801122937e-08, "loss": 0.1514, "step": 46690 }, { "epoch": 0.8116080585443863, "grad_norm": 1.1467929564746406, "learning_rate": 9.022729739080215e-08, "loss": 0.1876, "step": 46691 }, { "epoch": 0.8116254410818892, "grad_norm": 2.0046885644792978, "learning_rate": 9.021116806916618e-08, "loss": 0.2348, "step": 46692 }, { "epoch": 0.811642823619392, "grad_norm": 1.486016325626884, "learning_rate": 9.019504004637257e-08, "loss": 0.2345, "step": 46693 }, { "epoch": 0.8116602061568948, "grad_norm": 1.456773371035422, "learning_rate": 9.017891332247246e-08, "loss": 0.1945, "step": 46694 }, { "epoch": 0.8116775886943977, "grad_norm": 2.376295151466605, "learning_rate": 9.016278789751691e-08, "loss": 0.2429, "step": 46695 }, { "epoch": 0.8116949712319005, "grad_norm": 1.3883324571930422, "learning_rate": 9.014666377155694e-08, "loss": 0.2608, "step": 46696 }, { "epoch": 0.8117123537694032, "grad_norm": 1.0865426480326619, "learning_rate": 9.013054094464395e-08, "loss": 0.1887, "step": 46697 }, { "epoch": 0.811729736306906, "grad_norm": 1.9228172029174335, "learning_rate": 9.011441941682878e-08, "loss": 0.2717, "step": 46698 }, { "epoch": 0.8117471188444089, "grad_norm": 1.0186874435999962, "learning_rate": 9.00982991881627e-08, "loss": 0.2885, "step": 46699 }, { "epoch": 0.8117645013819117, "grad_norm": 0.9017445017424568, "learning_rate": 9.008218025869668e-08, "loss": 0.1643, "step": 46700 }, { "epoch": 0.8117818839194145, "grad_norm": 1.1677726254068759, "learning_rate": 9.006606262848182e-08, "loss": 0.1634, "step": 46701 }, { "epoch": 0.8117992664569174, "grad_norm": 1.7588758629191994, "learning_rate": 9.004994629756923e-08, "loss": 0.2129, "step": 46702 }, { "epoch": 0.8118166489944202, "grad_norm": 1.8510679772070622, "learning_rate": 9.003383126601005e-08, "loss": 0.2984, "step": 46703 }, { "epoch": 0.811834031531923, "grad_norm": 1.6425727466059425, "learning_rate": 9.001771753385513e-08, "loss": 0.2434, "step": 46704 }, { "epoch": 0.8118514140694258, "grad_norm": 1.2270598873517289, "learning_rate": 9.000160510115585e-08, "loss": 0.2047, "step": 46705 }, { "epoch": 0.8118687966069287, "grad_norm": 2.01314319036956, "learning_rate": 8.99854939679633e-08, "loss": 0.2399, "step": 46706 }, { "epoch": 0.8118861791444315, "grad_norm": 0.9556945919725615, "learning_rate": 8.996938413432809e-08, "loss": 0.2376, "step": 46707 }, { "epoch": 0.8119035616819343, "grad_norm": 1.3156528351779195, "learning_rate": 8.995327560030174e-08, "loss": 0.2065, "step": 46708 }, { "epoch": 0.8119209442194372, "grad_norm": 3.2972993152282957, "learning_rate": 8.993716836593513e-08, "loss": 0.2401, "step": 46709 }, { "epoch": 0.81193832675694, "grad_norm": 1.675096303639732, "learning_rate": 8.992106243127933e-08, "loss": 0.2528, "step": 46710 }, { "epoch": 0.8119557092944428, "grad_norm": 1.1746074592903906, "learning_rate": 8.99049577963854e-08, "loss": 0.1289, "step": 46711 }, { "epoch": 0.8119730918319457, "grad_norm": 1.1934397432478154, "learning_rate": 8.988885446130439e-08, "loss": 0.266, "step": 46712 }, { "epoch": 0.8119904743694485, "grad_norm": 0.9413060233240803, "learning_rate": 8.987275242608727e-08, "loss": 0.2489, "step": 46713 }, { "epoch": 0.8120078569069513, "grad_norm": 0.7739466973333231, "learning_rate": 8.985665169078516e-08, "loss": 0.1826, "step": 46714 }, { "epoch": 0.8120252394444541, "grad_norm": 1.5992884554970352, "learning_rate": 8.984055225544896e-08, "loss": 0.2935, "step": 46715 }, { "epoch": 0.812042621981957, "grad_norm": 1.347858147861629, "learning_rate": 8.982445412012984e-08, "loss": 0.3059, "step": 46716 }, { "epoch": 0.8120600045194597, "grad_norm": 1.2382227149338598, "learning_rate": 8.980835728487885e-08, "loss": 0.2171, "step": 46717 }, { "epoch": 0.8120773870569625, "grad_norm": 1.162658461086589, "learning_rate": 8.979226174974691e-08, "loss": 0.2588, "step": 46718 }, { "epoch": 0.8120947695944654, "grad_norm": 2.226541773966677, "learning_rate": 8.977616751478511e-08, "loss": 0.1785, "step": 46719 }, { "epoch": 0.8121121521319682, "grad_norm": 1.3083908900003907, "learning_rate": 8.976007458004436e-08, "loss": 0.1493, "step": 46720 }, { "epoch": 0.812129534669471, "grad_norm": 0.7689779861485294, "learning_rate": 8.974398294557578e-08, "loss": 0.1313, "step": 46721 }, { "epoch": 0.8121469172069739, "grad_norm": 1.1353993917276555, "learning_rate": 8.972789261143027e-08, "loss": 0.2274, "step": 46722 }, { "epoch": 0.8121642997444767, "grad_norm": 1.071647267835198, "learning_rate": 8.971180357765878e-08, "loss": 0.2426, "step": 46723 }, { "epoch": 0.8121816822819795, "grad_norm": 1.8849918860777306, "learning_rate": 8.969571584431251e-08, "loss": 0.277, "step": 46724 }, { "epoch": 0.8121990648194823, "grad_norm": 1.7093056888444047, "learning_rate": 8.967962941144247e-08, "loss": 0.1833, "step": 46725 }, { "epoch": 0.8122164473569852, "grad_norm": 1.0955968267874137, "learning_rate": 8.96635442790994e-08, "loss": 0.1382, "step": 46726 }, { "epoch": 0.812233829894488, "grad_norm": 1.4258488287766253, "learning_rate": 8.96474604473343e-08, "loss": 0.2239, "step": 46727 }, { "epoch": 0.8122512124319908, "grad_norm": 1.6860412031654324, "learning_rate": 8.963137791619834e-08, "loss": 0.3011, "step": 46728 }, { "epoch": 0.8122685949694937, "grad_norm": 2.095704732344182, "learning_rate": 8.961529668574242e-08, "loss": 0.3495, "step": 46729 }, { "epoch": 0.8122859775069965, "grad_norm": 1.3077266194610175, "learning_rate": 8.959921675601745e-08, "loss": 0.2103, "step": 46730 }, { "epoch": 0.8123033600444993, "grad_norm": 1.7550479346043093, "learning_rate": 8.958313812707452e-08, "loss": 0.1974, "step": 46731 }, { "epoch": 0.8123207425820022, "grad_norm": 1.246347983474165, "learning_rate": 8.956706079896443e-08, "loss": 0.2422, "step": 46732 }, { "epoch": 0.812338125119505, "grad_norm": 1.6409377815416228, "learning_rate": 8.955098477173828e-08, "loss": 0.2795, "step": 46733 }, { "epoch": 0.8123555076570078, "grad_norm": 1.8700139315573734, "learning_rate": 8.953491004544695e-08, "loss": 0.1913, "step": 46734 }, { "epoch": 0.8123728901945106, "grad_norm": 1.6011416255928925, "learning_rate": 8.951883662014126e-08, "loss": 0.2239, "step": 46735 }, { "epoch": 0.8123902727320135, "grad_norm": 1.3504217100636222, "learning_rate": 8.950276449587246e-08, "loss": 0.1733, "step": 46736 }, { "epoch": 0.8124076552695162, "grad_norm": 1.0142706955655367, "learning_rate": 8.948669367269129e-08, "loss": 0.1071, "step": 46737 }, { "epoch": 0.812425037807019, "grad_norm": 0.8256365094913407, "learning_rate": 8.947062415064871e-08, "loss": 0.1782, "step": 46738 }, { "epoch": 0.8124424203445219, "grad_norm": 2.4043363653472047, "learning_rate": 8.945455592979573e-08, "loss": 0.2577, "step": 46739 }, { "epoch": 0.8124598028820247, "grad_norm": 1.611957244550589, "learning_rate": 8.94384890101832e-08, "loss": 0.1532, "step": 46740 }, { "epoch": 0.8124771854195275, "grad_norm": 0.8343557938996452, "learning_rate": 8.942242339186201e-08, "loss": 0.2076, "step": 46741 }, { "epoch": 0.8124945679570303, "grad_norm": 1.2735103813276014, "learning_rate": 8.94063590748832e-08, "loss": 0.2765, "step": 46742 }, { "epoch": 0.8125119504945332, "grad_norm": 1.5239794717127304, "learning_rate": 8.939029605929749e-08, "loss": 0.2596, "step": 46743 }, { "epoch": 0.812529333032036, "grad_norm": 1.1530669970636032, "learning_rate": 8.937423434515612e-08, "loss": 0.2193, "step": 46744 }, { "epoch": 0.8125467155695388, "grad_norm": 2.142234804933904, "learning_rate": 8.935817393250972e-08, "loss": 0.1818, "step": 46745 }, { "epoch": 0.8125640981070417, "grad_norm": 2.072943928884425, "learning_rate": 8.934211482140913e-08, "loss": 0.2146, "step": 46746 }, { "epoch": 0.8125814806445445, "grad_norm": 1.2122697180427955, "learning_rate": 8.932605701190555e-08, "loss": 0.1279, "step": 46747 }, { "epoch": 0.8125988631820473, "grad_norm": 0.9081923442413994, "learning_rate": 8.931000050404969e-08, "loss": 0.1331, "step": 46748 }, { "epoch": 0.8126162457195502, "grad_norm": 1.2922900841522988, "learning_rate": 8.929394529789247e-08, "loss": 0.2146, "step": 46749 }, { "epoch": 0.812633628257053, "grad_norm": 1.924726808017471, "learning_rate": 8.927789139348486e-08, "loss": 0.3174, "step": 46750 }, { "epoch": 0.8126510107945558, "grad_norm": 2.6286246605509658, "learning_rate": 8.926183879087756e-08, "loss": 0.4113, "step": 46751 }, { "epoch": 0.8126683933320586, "grad_norm": 2.416663312298021, "learning_rate": 8.924578749012157e-08, "loss": 0.2311, "step": 46752 }, { "epoch": 0.8126857758695615, "grad_norm": 1.308628298078731, "learning_rate": 8.922973749126778e-08, "loss": 0.1884, "step": 46753 }, { "epoch": 0.8127031584070643, "grad_norm": 1.877090201132674, "learning_rate": 8.921368879436691e-08, "loss": 0.2808, "step": 46754 }, { "epoch": 0.8127205409445671, "grad_norm": 2.268666604161552, "learning_rate": 8.919764139947005e-08, "loss": 0.2231, "step": 46755 }, { "epoch": 0.8127379234820699, "grad_norm": 1.0619829580022386, "learning_rate": 8.918159530662811e-08, "loss": 0.3053, "step": 46756 }, { "epoch": 0.8127553060195727, "grad_norm": 1.0059268525339107, "learning_rate": 8.916555051589164e-08, "loss": 0.1931, "step": 46757 }, { "epoch": 0.8127726885570755, "grad_norm": 14.985511835940475, "learning_rate": 8.914950702731156e-08, "loss": 0.3612, "step": 46758 }, { "epoch": 0.8127900710945783, "grad_norm": 1.929847270348802, "learning_rate": 8.913346484093893e-08, "loss": 0.4144, "step": 46759 }, { "epoch": 0.8128074536320812, "grad_norm": 1.352087927493913, "learning_rate": 8.911742395682447e-08, "loss": 0.3454, "step": 46760 }, { "epoch": 0.812824836169584, "grad_norm": 1.0156355068098846, "learning_rate": 8.910138437501902e-08, "loss": 0.2108, "step": 46761 }, { "epoch": 0.8128422187070868, "grad_norm": 1.158626474477989, "learning_rate": 8.908534609557334e-08, "loss": 0.1769, "step": 46762 }, { "epoch": 0.8128596012445897, "grad_norm": 1.4422086980537616, "learning_rate": 8.906930911853855e-08, "loss": 0.2673, "step": 46763 }, { "epoch": 0.8128769837820925, "grad_norm": 1.4755857332367044, "learning_rate": 8.905327344396518e-08, "loss": 0.2675, "step": 46764 }, { "epoch": 0.8128943663195953, "grad_norm": 2.0347903041388764, "learning_rate": 8.903723907190419e-08, "loss": 0.2255, "step": 46765 }, { "epoch": 0.8129117488570982, "grad_norm": 1.696960666313344, "learning_rate": 8.902120600240625e-08, "loss": 0.2304, "step": 46766 }, { "epoch": 0.812929131394601, "grad_norm": 2.3192380549749374, "learning_rate": 8.90051742355224e-08, "loss": 0.2253, "step": 46767 }, { "epoch": 0.8129465139321038, "grad_norm": 1.3079598006245317, "learning_rate": 8.898914377130334e-08, "loss": 0.1008, "step": 46768 }, { "epoch": 0.8129638964696067, "grad_norm": 0.8048664177584688, "learning_rate": 8.897311460979989e-08, "loss": 0.2067, "step": 46769 }, { "epoch": 0.8129812790071095, "grad_norm": 2.000923523389345, "learning_rate": 8.89570867510629e-08, "loss": 0.1474, "step": 46770 }, { "epoch": 0.8129986615446123, "grad_norm": 1.4917713970668278, "learning_rate": 8.894106019514308e-08, "loss": 0.1694, "step": 46771 }, { "epoch": 0.8130160440821151, "grad_norm": 1.9481684991636037, "learning_rate": 8.89250349420913e-08, "loss": 0.3852, "step": 46772 }, { "epoch": 0.813033426619618, "grad_norm": 1.4279590949347893, "learning_rate": 8.89090109919583e-08, "loss": 0.2363, "step": 46773 }, { "epoch": 0.8130508091571208, "grad_norm": 1.345131188947798, "learning_rate": 8.88929883447948e-08, "loss": 0.2258, "step": 46774 }, { "epoch": 0.8130681916946236, "grad_norm": 1.4039403607666234, "learning_rate": 8.887696700065195e-08, "loss": 0.1272, "step": 46775 }, { "epoch": 0.8130855742321264, "grad_norm": 2.335925476688128, "learning_rate": 8.886094695958007e-08, "loss": 0.3278, "step": 46776 }, { "epoch": 0.8131029567696292, "grad_norm": 2.210857681697854, "learning_rate": 8.884492822163004e-08, "loss": 0.1438, "step": 46777 }, { "epoch": 0.813120339307132, "grad_norm": 1.825632949462465, "learning_rate": 8.882891078685284e-08, "loss": 0.2499, "step": 46778 }, { "epoch": 0.8131377218446348, "grad_norm": 2.113433388582814, "learning_rate": 8.881289465529911e-08, "loss": 0.2696, "step": 46779 }, { "epoch": 0.8131551043821377, "grad_norm": 1.268301827934045, "learning_rate": 8.879687982701967e-08, "loss": 0.1915, "step": 46780 }, { "epoch": 0.8131724869196405, "grad_norm": 1.1062960205293897, "learning_rate": 8.878086630206517e-08, "loss": 0.2001, "step": 46781 }, { "epoch": 0.8131898694571433, "grad_norm": 1.0660670948635358, "learning_rate": 8.876485408048645e-08, "loss": 0.1772, "step": 46782 }, { "epoch": 0.8132072519946462, "grad_norm": 2.4860231140453712, "learning_rate": 8.874884316233422e-08, "loss": 0.2072, "step": 46783 }, { "epoch": 0.813224634532149, "grad_norm": 1.2657936075498266, "learning_rate": 8.873283354765926e-08, "loss": 0.225, "step": 46784 }, { "epoch": 0.8132420170696518, "grad_norm": 1.1823272539193388, "learning_rate": 8.871682523651214e-08, "loss": 0.283, "step": 46785 }, { "epoch": 0.8132593996071547, "grad_norm": 1.7002950461188036, "learning_rate": 8.870081822894394e-08, "loss": 0.2309, "step": 46786 }, { "epoch": 0.8132767821446575, "grad_norm": 1.4586578392088647, "learning_rate": 8.868481252500515e-08, "loss": 0.205, "step": 46787 }, { "epoch": 0.8132941646821603, "grad_norm": 2.2789098554715115, "learning_rate": 8.86688081247467e-08, "loss": 0.2513, "step": 46788 }, { "epoch": 0.8133115472196631, "grad_norm": 2.062604457488888, "learning_rate": 8.865280502821898e-08, "loss": 0.1918, "step": 46789 }, { "epoch": 0.813328929757166, "grad_norm": 2.377400780869491, "learning_rate": 8.863680323547295e-08, "loss": 0.1704, "step": 46790 }, { "epoch": 0.8133463122946688, "grad_norm": 1.2490224039437658, "learning_rate": 8.862080274655936e-08, "loss": 0.1357, "step": 46791 }, { "epoch": 0.8133636948321716, "grad_norm": 1.7158634848192278, "learning_rate": 8.860480356152882e-08, "loss": 0.2377, "step": 46792 }, { "epoch": 0.8133810773696745, "grad_norm": 1.9139327426396802, "learning_rate": 8.858880568043198e-08, "loss": 0.1797, "step": 46793 }, { "epoch": 0.8133984599071773, "grad_norm": 1.7638374660320937, "learning_rate": 8.857280910331983e-08, "loss": 0.3127, "step": 46794 }, { "epoch": 0.8134158424446801, "grad_norm": 1.4808504496505226, "learning_rate": 8.855681383024282e-08, "loss": 0.2903, "step": 46795 }, { "epoch": 0.8134332249821828, "grad_norm": 0.9439184126105842, "learning_rate": 8.854081986125156e-08, "loss": 0.3442, "step": 46796 }, { "epoch": 0.8134506075196857, "grad_norm": 1.3936672767104876, "learning_rate": 8.852482719639698e-08, "loss": 0.1922, "step": 46797 }, { "epoch": 0.8134679900571885, "grad_norm": 1.2683203682401376, "learning_rate": 8.850883583572972e-08, "loss": 0.159, "step": 46798 }, { "epoch": 0.8134853725946913, "grad_norm": 2.0033576602593803, "learning_rate": 8.849284577930044e-08, "loss": 0.2512, "step": 46799 }, { "epoch": 0.8135027551321942, "grad_norm": 1.1192011031553744, "learning_rate": 8.84768570271598e-08, "loss": 0.1798, "step": 46800 }, { "epoch": 0.813520137669697, "grad_norm": 1.264120396745505, "learning_rate": 8.846086957935844e-08, "loss": 0.2331, "step": 46801 }, { "epoch": 0.8135375202071998, "grad_norm": 1.3626956370503505, "learning_rate": 8.844488343594714e-08, "loss": 0.1471, "step": 46802 }, { "epoch": 0.8135549027447027, "grad_norm": 2.344143891333346, "learning_rate": 8.842889859697644e-08, "loss": 0.3646, "step": 46803 }, { "epoch": 0.8135722852822055, "grad_norm": 1.7262308407493063, "learning_rate": 8.841291506249698e-08, "loss": 0.2482, "step": 46804 }, { "epoch": 0.8135896678197083, "grad_norm": 1.2100592565440615, "learning_rate": 8.839693283255967e-08, "loss": 0.206, "step": 46805 }, { "epoch": 0.8136070503572111, "grad_norm": 0.9276451684915583, "learning_rate": 8.838095190721495e-08, "loss": 0.2658, "step": 46806 }, { "epoch": 0.813624432894714, "grad_norm": 1.500476606720919, "learning_rate": 8.836497228651369e-08, "loss": 0.2891, "step": 46807 }, { "epoch": 0.8136418154322168, "grad_norm": 1.2877826859893606, "learning_rate": 8.834899397050616e-08, "loss": 0.1817, "step": 46808 }, { "epoch": 0.8136591979697196, "grad_norm": 3.379138476086639, "learning_rate": 8.833301695924327e-08, "loss": 0.2317, "step": 46809 }, { "epoch": 0.8136765805072225, "grad_norm": 2.7270306079158635, "learning_rate": 8.831704125277567e-08, "loss": 0.185, "step": 46810 }, { "epoch": 0.8136939630447253, "grad_norm": 1.3115182712206002, "learning_rate": 8.83010668511539e-08, "loss": 0.3049, "step": 46811 }, { "epoch": 0.8137113455822281, "grad_norm": 1.6454371261752676, "learning_rate": 8.828509375442855e-08, "loss": 0.2113, "step": 46812 }, { "epoch": 0.813728728119731, "grad_norm": 1.5791792184338713, "learning_rate": 8.826912196265052e-08, "loss": 0.2316, "step": 46813 }, { "epoch": 0.8137461106572338, "grad_norm": 1.3982738951606686, "learning_rate": 8.825315147587015e-08, "loss": 0.4543, "step": 46814 }, { "epoch": 0.8137634931947366, "grad_norm": 1.3314604165581394, "learning_rate": 8.823718229413818e-08, "loss": 0.1676, "step": 46815 }, { "epoch": 0.8137808757322393, "grad_norm": 2.2080354255626085, "learning_rate": 8.822121441750502e-08, "loss": 0.3411, "step": 46816 }, { "epoch": 0.8137982582697422, "grad_norm": 1.0565486708009793, "learning_rate": 8.820524784602157e-08, "loss": 0.1508, "step": 46817 }, { "epoch": 0.813815640807245, "grad_norm": 1.0825590399293283, "learning_rate": 8.818928257973835e-08, "loss": 0.2102, "step": 46818 }, { "epoch": 0.8138330233447478, "grad_norm": 2.28176300741775, "learning_rate": 8.817331861870592e-08, "loss": 0.1693, "step": 46819 }, { "epoch": 0.8138504058822507, "grad_norm": 1.2431570485899506, "learning_rate": 8.815735596297491e-08, "loss": 0.201, "step": 46820 }, { "epoch": 0.8138677884197535, "grad_norm": 1.2236522621756294, "learning_rate": 8.81413946125959e-08, "loss": 0.3539, "step": 46821 }, { "epoch": 0.8138851709572563, "grad_norm": 1.7783450722869423, "learning_rate": 8.812543456761945e-08, "loss": 0.2292, "step": 46822 }, { "epoch": 0.8139025534947592, "grad_norm": 1.8428102509555597, "learning_rate": 8.810947582809619e-08, "loss": 0.2277, "step": 46823 }, { "epoch": 0.813919936032262, "grad_norm": 1.5051478629039459, "learning_rate": 8.809351839407653e-08, "loss": 0.2873, "step": 46824 }, { "epoch": 0.8139373185697648, "grad_norm": 0.9588497020499888, "learning_rate": 8.807756226561136e-08, "loss": 0.1644, "step": 46825 }, { "epoch": 0.8139547011072676, "grad_norm": 1.6345465262572447, "learning_rate": 8.80616074427512e-08, "loss": 0.2893, "step": 46826 }, { "epoch": 0.8139720836447705, "grad_norm": 1.1325423557111376, "learning_rate": 8.80456539255463e-08, "loss": 0.3863, "step": 46827 }, { "epoch": 0.8139894661822733, "grad_norm": 1.3692800224688573, "learning_rate": 8.802970171404756e-08, "loss": 0.1371, "step": 46828 }, { "epoch": 0.8140068487197761, "grad_norm": 0.9014967898783075, "learning_rate": 8.801375080830536e-08, "loss": 0.2871, "step": 46829 }, { "epoch": 0.814024231257279, "grad_norm": 1.62063809559841, "learning_rate": 8.799780120837035e-08, "loss": 0.3126, "step": 46830 }, { "epoch": 0.8140416137947818, "grad_norm": 1.137402410413497, "learning_rate": 8.798185291429311e-08, "loss": 0.2038, "step": 46831 }, { "epoch": 0.8140589963322846, "grad_norm": 1.1820140892156648, "learning_rate": 8.796590592612407e-08, "loss": 0.171, "step": 46832 }, { "epoch": 0.8140763788697875, "grad_norm": 1.4728359675737317, "learning_rate": 8.794996024391387e-08, "loss": 0.1087, "step": 46833 }, { "epoch": 0.8140937614072903, "grad_norm": 1.0490204635284082, "learning_rate": 8.793401586771298e-08, "loss": 0.2583, "step": 46834 }, { "epoch": 0.8141111439447931, "grad_norm": 1.757340933450513, "learning_rate": 8.791807279757185e-08, "loss": 0.2372, "step": 46835 }, { "epoch": 0.8141285264822958, "grad_norm": 0.9485402248287015, "learning_rate": 8.790213103354127e-08, "loss": 0.1404, "step": 46836 }, { "epoch": 0.8141459090197987, "grad_norm": 1.8390135797062457, "learning_rate": 8.788619057567165e-08, "loss": 0.2734, "step": 46837 }, { "epoch": 0.8141632915573015, "grad_norm": 1.209633083527886, "learning_rate": 8.78702514240135e-08, "loss": 0.1939, "step": 46838 }, { "epoch": 0.8141806740948043, "grad_norm": 1.4737492603504336, "learning_rate": 8.785431357861734e-08, "loss": 0.2617, "step": 46839 }, { "epoch": 0.8141980566323072, "grad_norm": 1.1767780900565339, "learning_rate": 8.783837703953362e-08, "loss": 0.1209, "step": 46840 }, { "epoch": 0.81421543916981, "grad_norm": 1.5485811879807927, "learning_rate": 8.782244180681298e-08, "loss": 0.2533, "step": 46841 }, { "epoch": 0.8142328217073128, "grad_norm": 2.1805500710489523, "learning_rate": 8.780650788050581e-08, "loss": 0.3937, "step": 46842 }, { "epoch": 0.8142502042448156, "grad_norm": 1.2816935000644396, "learning_rate": 8.779057526066259e-08, "loss": 0.0948, "step": 46843 }, { "epoch": 0.8142675867823185, "grad_norm": 2.186993936575619, "learning_rate": 8.777464394733398e-08, "loss": 0.143, "step": 46844 }, { "epoch": 0.8142849693198213, "grad_norm": 0.6655141975486575, "learning_rate": 8.77587139405705e-08, "loss": 0.2568, "step": 46845 }, { "epoch": 0.8143023518573241, "grad_norm": 1.2838771940732427, "learning_rate": 8.774278524042245e-08, "loss": 0.2259, "step": 46846 }, { "epoch": 0.814319734394827, "grad_norm": 1.048781196258683, "learning_rate": 8.772685784694022e-08, "loss": 0.2019, "step": 46847 }, { "epoch": 0.8143371169323298, "grad_norm": 1.7744326576178944, "learning_rate": 8.771093176017463e-08, "loss": 0.1858, "step": 46848 }, { "epoch": 0.8143544994698326, "grad_norm": 1.4575091086611907, "learning_rate": 8.7695006980176e-08, "loss": 0.2309, "step": 46849 }, { "epoch": 0.8143718820073355, "grad_norm": 1.4991340929342636, "learning_rate": 8.767908350699477e-08, "loss": 0.1643, "step": 46850 }, { "epoch": 0.8143892645448383, "grad_norm": 1.6139414236054104, "learning_rate": 8.766316134068147e-08, "loss": 0.0944, "step": 46851 }, { "epoch": 0.8144066470823411, "grad_norm": 1.003395424358599, "learning_rate": 8.764724048128652e-08, "loss": 0.141, "step": 46852 }, { "epoch": 0.814424029619844, "grad_norm": 1.7992200787391448, "learning_rate": 8.763132092886039e-08, "loss": 0.1848, "step": 46853 }, { "epoch": 0.8144414121573468, "grad_norm": 1.960394821116666, "learning_rate": 8.761540268345357e-08, "loss": 0.2053, "step": 46854 }, { "epoch": 0.8144587946948496, "grad_norm": 1.1149549232510358, "learning_rate": 8.759948574511633e-08, "loss": 0.2704, "step": 46855 }, { "epoch": 0.8144761772323523, "grad_norm": 1.5632092093051693, "learning_rate": 8.758357011389944e-08, "loss": 0.2683, "step": 46856 }, { "epoch": 0.8144935597698552, "grad_norm": 1.9931320124349747, "learning_rate": 8.756765578985321e-08, "loss": 0.2094, "step": 46857 }, { "epoch": 0.814510942307358, "grad_norm": 1.968230810496354, "learning_rate": 8.755174277302802e-08, "loss": 0.1731, "step": 46858 }, { "epoch": 0.8145283248448608, "grad_norm": 1.4363203572329337, "learning_rate": 8.753583106347429e-08, "loss": 0.2411, "step": 46859 }, { "epoch": 0.8145457073823636, "grad_norm": 0.7644882684349591, "learning_rate": 8.751992066124259e-08, "loss": 0.1647, "step": 46860 }, { "epoch": 0.8145630899198665, "grad_norm": 1.1116785938255105, "learning_rate": 8.75040115663832e-08, "loss": 0.1566, "step": 46861 }, { "epoch": 0.8145804724573693, "grad_norm": 1.8145225219229073, "learning_rate": 8.748810377894666e-08, "loss": 0.2422, "step": 46862 }, { "epoch": 0.8145978549948721, "grad_norm": 1.3132565471858029, "learning_rate": 8.747219729898319e-08, "loss": 0.1709, "step": 46863 }, { "epoch": 0.814615237532375, "grad_norm": 1.5181870341243064, "learning_rate": 8.74562921265436e-08, "loss": 0.1995, "step": 46864 }, { "epoch": 0.8146326200698778, "grad_norm": 1.6343760773933411, "learning_rate": 8.744038826167794e-08, "loss": 0.2241, "step": 46865 }, { "epoch": 0.8146500026073806, "grad_norm": 1.3248540974244933, "learning_rate": 8.74244857044366e-08, "loss": 0.1969, "step": 46866 }, { "epoch": 0.8146673851448835, "grad_norm": 0.9849761688226284, "learning_rate": 8.740858445487026e-08, "loss": 0.1775, "step": 46867 }, { "epoch": 0.8146847676823863, "grad_norm": 1.5339170869632082, "learning_rate": 8.739268451302917e-08, "loss": 0.3113, "step": 46868 }, { "epoch": 0.8147021502198891, "grad_norm": 1.0550777725432636, "learning_rate": 8.737678587896368e-08, "loss": 0.1515, "step": 46869 }, { "epoch": 0.814719532757392, "grad_norm": 1.325358869558313, "learning_rate": 8.736088855272434e-08, "loss": 0.5456, "step": 46870 }, { "epoch": 0.8147369152948948, "grad_norm": 0.7932244783027895, "learning_rate": 8.734499253436134e-08, "loss": 0.2753, "step": 46871 }, { "epoch": 0.8147542978323976, "grad_norm": 2.1771665802602564, "learning_rate": 8.732909782392522e-08, "loss": 0.4065, "step": 46872 }, { "epoch": 0.8147716803699004, "grad_norm": 1.7641279697610859, "learning_rate": 8.731320442146622e-08, "loss": 0.1737, "step": 46873 }, { "epoch": 0.8147890629074033, "grad_norm": 1.0451194325059785, "learning_rate": 8.729731232703475e-08, "loss": 0.2424, "step": 46874 }, { "epoch": 0.8148064454449061, "grad_norm": 1.6326419133130607, "learning_rate": 8.72814215406813e-08, "loss": 0.2969, "step": 46875 }, { "epoch": 0.8148238279824088, "grad_norm": 0.917192766810129, "learning_rate": 8.72655320624563e-08, "loss": 0.1842, "step": 46876 }, { "epoch": 0.8148412105199117, "grad_norm": 1.2710691002709698, "learning_rate": 8.724964389240979e-08, "loss": 0.2043, "step": 46877 }, { "epoch": 0.8148585930574145, "grad_norm": 1.0595668936728364, "learning_rate": 8.723375703059227e-08, "loss": 0.2914, "step": 46878 }, { "epoch": 0.8148759755949173, "grad_norm": 1.8118147269595428, "learning_rate": 8.721787147705417e-08, "loss": 0.2833, "step": 46879 }, { "epoch": 0.8148933581324201, "grad_norm": 1.317988462179085, "learning_rate": 8.720198723184586e-08, "loss": 0.0967, "step": 46880 }, { "epoch": 0.814910740669923, "grad_norm": 1.8861693463170166, "learning_rate": 8.718610429501761e-08, "loss": 0.2203, "step": 46881 }, { "epoch": 0.8149281232074258, "grad_norm": 1.0466739951526598, "learning_rate": 8.717022266661966e-08, "loss": 0.2182, "step": 46882 }, { "epoch": 0.8149455057449286, "grad_norm": 9.592113352486047, "learning_rate": 8.715434234670272e-08, "loss": 0.3049, "step": 46883 }, { "epoch": 0.8149628882824315, "grad_norm": 1.7073780959157334, "learning_rate": 8.713846333531671e-08, "loss": 0.3987, "step": 46884 }, { "epoch": 0.8149802708199343, "grad_norm": 1.2498051317317793, "learning_rate": 8.712258563251212e-08, "loss": 0.2127, "step": 46885 }, { "epoch": 0.8149976533574371, "grad_norm": 1.5548819055141905, "learning_rate": 8.710670923833918e-08, "loss": 0.3023, "step": 46886 }, { "epoch": 0.81501503589494, "grad_norm": 1.2274097041346506, "learning_rate": 8.709083415284841e-08, "loss": 0.2465, "step": 46887 }, { "epoch": 0.8150324184324428, "grad_norm": 1.106021440063322, "learning_rate": 8.707496037609002e-08, "loss": 0.2217, "step": 46888 }, { "epoch": 0.8150498009699456, "grad_norm": 1.198137705672504, "learning_rate": 8.705908790811434e-08, "loss": 0.1452, "step": 46889 }, { "epoch": 0.8150671835074484, "grad_norm": 1.40629789739468, "learning_rate": 8.704321674897164e-08, "loss": 0.1922, "step": 46890 }, { "epoch": 0.8150845660449513, "grad_norm": 1.710203487668422, "learning_rate": 8.702734689871227e-08, "loss": 0.2735, "step": 46891 }, { "epoch": 0.8151019485824541, "grad_norm": 1.445077028726129, "learning_rate": 8.701147835738648e-08, "loss": 0.2384, "step": 46892 }, { "epoch": 0.8151193311199569, "grad_norm": 1.3214545154103157, "learning_rate": 8.69956111250445e-08, "loss": 0.2003, "step": 46893 }, { "epoch": 0.8151367136574598, "grad_norm": 1.228032253809492, "learning_rate": 8.697974520173685e-08, "loss": 0.2483, "step": 46894 }, { "epoch": 0.8151540961949625, "grad_norm": 2.072061648124078, "learning_rate": 8.696388058751375e-08, "loss": 0.2321, "step": 46895 }, { "epoch": 0.8151714787324653, "grad_norm": 1.8075455591001428, "learning_rate": 8.69480172824253e-08, "loss": 0.4597, "step": 46896 }, { "epoch": 0.8151888612699681, "grad_norm": 1.4186744197238117, "learning_rate": 8.693215528652182e-08, "loss": 0.3159, "step": 46897 }, { "epoch": 0.815206243807471, "grad_norm": 1.1543200172494965, "learning_rate": 8.691629459985373e-08, "loss": 0.2641, "step": 46898 }, { "epoch": 0.8152236263449738, "grad_norm": 1.6547677416972, "learning_rate": 8.690043522247125e-08, "loss": 0.1788, "step": 46899 }, { "epoch": 0.8152410088824766, "grad_norm": 0.9556951152841826, "learning_rate": 8.688457715442466e-08, "loss": 0.3149, "step": 46900 }, { "epoch": 0.8152583914199795, "grad_norm": 2.124118103751065, "learning_rate": 8.686872039576403e-08, "loss": 0.1818, "step": 46901 }, { "epoch": 0.8152757739574823, "grad_norm": 1.092175016422236, "learning_rate": 8.685286494654004e-08, "loss": 0.1962, "step": 46902 }, { "epoch": 0.8152931564949851, "grad_norm": 0.9914221427799347, "learning_rate": 8.683701080680256e-08, "loss": 0.1804, "step": 46903 }, { "epoch": 0.815310539032488, "grad_norm": 1.5147130118456753, "learning_rate": 8.682115797660194e-08, "loss": 0.1378, "step": 46904 }, { "epoch": 0.8153279215699908, "grad_norm": 1.0971796665719837, "learning_rate": 8.680530645598833e-08, "loss": 0.3198, "step": 46905 }, { "epoch": 0.8153453041074936, "grad_norm": 2.3463844999328347, "learning_rate": 8.678945624501222e-08, "loss": 0.4131, "step": 46906 }, { "epoch": 0.8153626866449964, "grad_norm": 1.878196469202059, "learning_rate": 8.677360734372374e-08, "loss": 0.3241, "step": 46907 }, { "epoch": 0.8153800691824993, "grad_norm": 2.292544147876034, "learning_rate": 8.675775975217309e-08, "loss": 0.2514, "step": 46908 }, { "epoch": 0.8153974517200021, "grad_norm": 2.021928525082562, "learning_rate": 8.674191347041055e-08, "loss": 0.2517, "step": 46909 }, { "epoch": 0.8154148342575049, "grad_norm": 1.4907045806572783, "learning_rate": 8.672606849848624e-08, "loss": 0.2284, "step": 46910 }, { "epoch": 0.8154322167950078, "grad_norm": 2.0843092905401495, "learning_rate": 8.671022483645052e-08, "loss": 0.1838, "step": 46911 }, { "epoch": 0.8154495993325106, "grad_norm": 1.5590785854220117, "learning_rate": 8.669438248435351e-08, "loss": 0.1773, "step": 46912 }, { "epoch": 0.8154669818700134, "grad_norm": 1.2726143018964757, "learning_rate": 8.667854144224529e-08, "loss": 0.2036, "step": 46913 }, { "epoch": 0.8154843644075163, "grad_norm": 1.0527314625073472, "learning_rate": 8.666270171017654e-08, "loss": 0.164, "step": 46914 }, { "epoch": 0.815501746945019, "grad_norm": 1.7749479301223001, "learning_rate": 8.664686328819698e-08, "loss": 0.2675, "step": 46915 }, { "epoch": 0.8155191294825218, "grad_norm": 1.6922456182449872, "learning_rate": 8.663102617635687e-08, "loss": 0.2149, "step": 46916 }, { "epoch": 0.8155365120200246, "grad_norm": 1.4042927478870189, "learning_rate": 8.661519037470666e-08, "loss": 0.1774, "step": 46917 }, { "epoch": 0.8155538945575275, "grad_norm": 1.3343763535556517, "learning_rate": 8.659935588329642e-08, "loss": 0.1717, "step": 46918 }, { "epoch": 0.8155712770950303, "grad_norm": 1.6333218592231111, "learning_rate": 8.658352270217629e-08, "loss": 0.3084, "step": 46919 }, { "epoch": 0.8155886596325331, "grad_norm": 1.0564055623294384, "learning_rate": 8.656769083139648e-08, "loss": 0.2566, "step": 46920 }, { "epoch": 0.815606042170036, "grad_norm": 1.8661339842340225, "learning_rate": 8.655186027100719e-08, "loss": 0.2048, "step": 46921 }, { "epoch": 0.8156234247075388, "grad_norm": 2.374918954546979, "learning_rate": 8.653603102105861e-08, "loss": 0.3193, "step": 46922 }, { "epoch": 0.8156408072450416, "grad_norm": 1.4570561918783764, "learning_rate": 8.652020308160086e-08, "loss": 0.3201, "step": 46923 }, { "epoch": 0.8156581897825445, "grad_norm": 1.5461396335773248, "learning_rate": 8.650437645268399e-08, "loss": 0.185, "step": 46924 }, { "epoch": 0.8156755723200473, "grad_norm": 1.5561212014781167, "learning_rate": 8.648855113435843e-08, "loss": 0.1882, "step": 46925 }, { "epoch": 0.8156929548575501, "grad_norm": 1.1739878452684105, "learning_rate": 8.64727271266742e-08, "loss": 0.2525, "step": 46926 }, { "epoch": 0.8157103373950529, "grad_norm": 1.367794111135859, "learning_rate": 8.645690442968162e-08, "loss": 0.239, "step": 46927 }, { "epoch": 0.8157277199325558, "grad_norm": 1.120322922444643, "learning_rate": 8.644108304343039e-08, "loss": 0.1898, "step": 46928 }, { "epoch": 0.8157451024700586, "grad_norm": 2.0221440852618, "learning_rate": 8.642526296797115e-08, "loss": 0.1771, "step": 46929 }, { "epoch": 0.8157624850075614, "grad_norm": 1.502545284638133, "learning_rate": 8.640944420335378e-08, "loss": 0.1916, "step": 46930 }, { "epoch": 0.8157798675450643, "grad_norm": 1.0228010978394266, "learning_rate": 8.639362674962852e-08, "loss": 0.1945, "step": 46931 }, { "epoch": 0.8157972500825671, "grad_norm": 1.189711267219381, "learning_rate": 8.637781060684535e-08, "loss": 0.2379, "step": 46932 }, { "epoch": 0.8158146326200699, "grad_norm": 1.625763181755152, "learning_rate": 8.63619957750547e-08, "loss": 0.2456, "step": 46933 }, { "epoch": 0.8158320151575728, "grad_norm": 1.3157813300710508, "learning_rate": 8.634618225430646e-08, "loss": 0.149, "step": 46934 }, { "epoch": 0.8158493976950755, "grad_norm": 1.645409758524832, "learning_rate": 8.633037004465077e-08, "loss": 0.2937, "step": 46935 }, { "epoch": 0.8158667802325783, "grad_norm": 1.5074121427016545, "learning_rate": 8.631455914613772e-08, "loss": 0.1533, "step": 46936 }, { "epoch": 0.8158841627700811, "grad_norm": 2.1805137856857106, "learning_rate": 8.629874955881755e-08, "loss": 0.2437, "step": 46937 }, { "epoch": 0.815901545307584, "grad_norm": 3.3142261421170756, "learning_rate": 8.628294128274033e-08, "loss": 0.32, "step": 46938 }, { "epoch": 0.8159189278450868, "grad_norm": 2.3563049983361086, "learning_rate": 8.626713431795613e-08, "loss": 0.1857, "step": 46939 }, { "epoch": 0.8159363103825896, "grad_norm": 1.1306671591618183, "learning_rate": 8.625132866451501e-08, "loss": 0.2487, "step": 46940 }, { "epoch": 0.8159536929200925, "grad_norm": 0.990262181841152, "learning_rate": 8.623552432246716e-08, "loss": 0.206, "step": 46941 }, { "epoch": 0.8159710754575953, "grad_norm": 1.5146971942820109, "learning_rate": 8.621972129186267e-08, "loss": 0.168, "step": 46942 }, { "epoch": 0.8159884579950981, "grad_norm": 0.8323511805780549, "learning_rate": 8.620391957275159e-08, "loss": 0.1373, "step": 46943 }, { "epoch": 0.816005840532601, "grad_norm": 1.398481898417962, "learning_rate": 8.618811916518381e-08, "loss": 0.2981, "step": 46944 }, { "epoch": 0.8160232230701038, "grad_norm": 1.3716511924872974, "learning_rate": 8.617232006920977e-08, "loss": 0.2002, "step": 46945 }, { "epoch": 0.8160406056076066, "grad_norm": 1.8074992504568879, "learning_rate": 8.615652228487946e-08, "loss": 0.3178, "step": 46946 }, { "epoch": 0.8160579881451094, "grad_norm": 0.9586182357082742, "learning_rate": 8.614072581224268e-08, "loss": 0.2678, "step": 46947 }, { "epoch": 0.8160753706826123, "grad_norm": 1.5585652087037627, "learning_rate": 8.612493065134979e-08, "loss": 0.2623, "step": 46948 }, { "epoch": 0.8160927532201151, "grad_norm": 1.5442632364621707, "learning_rate": 8.610913680225074e-08, "loss": 0.2796, "step": 46949 }, { "epoch": 0.8161101357576179, "grad_norm": 1.188693389920435, "learning_rate": 8.609334426499559e-08, "loss": 0.1286, "step": 46950 }, { "epoch": 0.8161275182951208, "grad_norm": 1.7822029553417889, "learning_rate": 8.607755303963444e-08, "loss": 0.1601, "step": 46951 }, { "epoch": 0.8161449008326236, "grad_norm": 1.512620453116267, "learning_rate": 8.606176312621733e-08, "loss": 0.1982, "step": 46952 }, { "epoch": 0.8161622833701264, "grad_norm": 2.098667786084655, "learning_rate": 8.604597452479423e-08, "loss": 0.2469, "step": 46953 }, { "epoch": 0.8161796659076292, "grad_norm": 1.2435990962771106, "learning_rate": 8.603018723541522e-08, "loss": 0.1593, "step": 46954 }, { "epoch": 0.816197048445132, "grad_norm": 1.6959928057621305, "learning_rate": 8.601440125813025e-08, "loss": 0.3236, "step": 46955 }, { "epoch": 0.8162144309826348, "grad_norm": 2.42864005195038, "learning_rate": 8.599861659298962e-08, "loss": 0.2461, "step": 46956 }, { "epoch": 0.8162318135201376, "grad_norm": 1.1138695035057888, "learning_rate": 8.598283324004318e-08, "loss": 0.1319, "step": 46957 }, { "epoch": 0.8162491960576405, "grad_norm": 1.081113489399705, "learning_rate": 8.596705119934095e-08, "loss": 0.1331, "step": 46958 }, { "epoch": 0.8162665785951433, "grad_norm": 1.2562255520608503, "learning_rate": 8.595127047093298e-08, "loss": 0.2023, "step": 46959 }, { "epoch": 0.8162839611326461, "grad_norm": 1.2222408892124397, "learning_rate": 8.59354910548693e-08, "loss": 0.2716, "step": 46960 }, { "epoch": 0.816301343670149, "grad_norm": 1.5306310155186875, "learning_rate": 8.591971295119993e-08, "loss": 0.2933, "step": 46961 }, { "epoch": 0.8163187262076518, "grad_norm": 1.5301689778106125, "learning_rate": 8.590393615997482e-08, "loss": 0.1383, "step": 46962 }, { "epoch": 0.8163361087451546, "grad_norm": 1.0424453806850211, "learning_rate": 8.588816068124388e-08, "loss": 0.2403, "step": 46963 }, { "epoch": 0.8163534912826574, "grad_norm": 0.7358861466157912, "learning_rate": 8.58723865150574e-08, "loss": 0.0788, "step": 46964 }, { "epoch": 0.8163708738201603, "grad_norm": 1.3881954563617407, "learning_rate": 8.585661366146535e-08, "loss": 0.3505, "step": 46965 }, { "epoch": 0.8163882563576631, "grad_norm": 1.49564699005779, "learning_rate": 8.584084212051745e-08, "loss": 0.1573, "step": 46966 }, { "epoch": 0.8164056388951659, "grad_norm": 1.1415486780515507, "learning_rate": 8.582507189226368e-08, "loss": 0.186, "step": 46967 }, { "epoch": 0.8164230214326688, "grad_norm": 0.8957914202441566, "learning_rate": 8.580930297675432e-08, "loss": 0.2581, "step": 46968 }, { "epoch": 0.8164404039701716, "grad_norm": 1.8711109371239563, "learning_rate": 8.579353537403922e-08, "loss": 0.1877, "step": 46969 }, { "epoch": 0.8164577865076744, "grad_norm": 1.6478638545783506, "learning_rate": 8.577776908416834e-08, "loss": 0.1591, "step": 46970 }, { "epoch": 0.8164751690451773, "grad_norm": 0.8494132147044596, "learning_rate": 8.576200410719165e-08, "loss": 0.166, "step": 46971 }, { "epoch": 0.8164925515826801, "grad_norm": 2.2130052473833794, "learning_rate": 8.574624044315914e-08, "loss": 0.1393, "step": 46972 }, { "epoch": 0.8165099341201829, "grad_norm": 1.5477501727705583, "learning_rate": 8.57304780921207e-08, "loss": 0.2661, "step": 46973 }, { "epoch": 0.8165273166576857, "grad_norm": 2.2709480027225086, "learning_rate": 8.571471705412641e-08, "loss": 0.1723, "step": 46974 }, { "epoch": 0.8165446991951885, "grad_norm": 1.883995728595746, "learning_rate": 8.569895732922599e-08, "loss": 0.2311, "step": 46975 }, { "epoch": 0.8165620817326913, "grad_norm": 1.9163875715574952, "learning_rate": 8.568319891746972e-08, "loss": 0.1885, "step": 46976 }, { "epoch": 0.8165794642701941, "grad_norm": 1.0648728586146454, "learning_rate": 8.566744181890734e-08, "loss": 0.2252, "step": 46977 }, { "epoch": 0.816596846807697, "grad_norm": 1.7915348206628456, "learning_rate": 8.565168603358886e-08, "loss": 0.1889, "step": 46978 }, { "epoch": 0.8166142293451998, "grad_norm": 1.049399067693932, "learning_rate": 8.563593156156424e-08, "loss": 0.3552, "step": 46979 }, { "epoch": 0.8166316118827026, "grad_norm": 1.1479417579600848, "learning_rate": 8.562017840288333e-08, "loss": 0.194, "step": 46980 }, { "epoch": 0.8166489944202054, "grad_norm": 2.0153399539621395, "learning_rate": 8.560442655759614e-08, "loss": 0.2478, "step": 46981 }, { "epoch": 0.8166663769577083, "grad_norm": 1.1511662352275946, "learning_rate": 8.558867602575237e-08, "loss": 0.3052, "step": 46982 }, { "epoch": 0.8166837594952111, "grad_norm": 1.5250456408508217, "learning_rate": 8.55729268074023e-08, "loss": 0.1763, "step": 46983 }, { "epoch": 0.8167011420327139, "grad_norm": 1.675125121903797, "learning_rate": 8.555717890259583e-08, "loss": 0.2122, "step": 46984 }, { "epoch": 0.8167185245702168, "grad_norm": 1.1128487096652777, "learning_rate": 8.554143231138255e-08, "loss": 0.1909, "step": 46985 }, { "epoch": 0.8167359071077196, "grad_norm": 1.3336523621551866, "learning_rate": 8.552568703381247e-08, "loss": 0.167, "step": 46986 }, { "epoch": 0.8167532896452224, "grad_norm": 1.508824719229685, "learning_rate": 8.550994306993564e-08, "loss": 0.2863, "step": 46987 }, { "epoch": 0.8167706721827253, "grad_norm": 1.1637341937909096, "learning_rate": 8.549420041980194e-08, "loss": 0.2222, "step": 46988 }, { "epoch": 0.8167880547202281, "grad_norm": 2.733488786834604, "learning_rate": 8.547845908346119e-08, "loss": 0.2573, "step": 46989 }, { "epoch": 0.8168054372577309, "grad_norm": 1.5853443755446122, "learning_rate": 8.54627190609633e-08, "loss": 0.1852, "step": 46990 }, { "epoch": 0.8168228197952337, "grad_norm": 2.2569722676925803, "learning_rate": 8.544698035235814e-08, "loss": 0.2488, "step": 46991 }, { "epoch": 0.8168402023327366, "grad_norm": 1.36898481353195, "learning_rate": 8.543124295769565e-08, "loss": 0.2928, "step": 46992 }, { "epoch": 0.8168575848702394, "grad_norm": 2.929593912294651, "learning_rate": 8.54155068770257e-08, "loss": 0.1859, "step": 46993 }, { "epoch": 0.8168749674077422, "grad_norm": 1.3021212622929763, "learning_rate": 8.5399772110398e-08, "loss": 0.1605, "step": 46994 }, { "epoch": 0.816892349945245, "grad_norm": 1.0435309680571623, "learning_rate": 8.538403865786276e-08, "loss": 0.3344, "step": 46995 }, { "epoch": 0.8169097324827478, "grad_norm": 1.5208154470156625, "learning_rate": 8.536830651946969e-08, "loss": 0.2884, "step": 46996 }, { "epoch": 0.8169271150202506, "grad_norm": 1.4593506785645158, "learning_rate": 8.535257569526843e-08, "loss": 0.2746, "step": 46997 }, { "epoch": 0.8169444975577534, "grad_norm": 1.9699879417860024, "learning_rate": 8.533684618530912e-08, "loss": 0.2491, "step": 46998 }, { "epoch": 0.8169618800952563, "grad_norm": 1.8115537290695822, "learning_rate": 8.532111798964159e-08, "loss": 0.2078, "step": 46999 }, { "epoch": 0.8169792626327591, "grad_norm": 1.6233875221190328, "learning_rate": 8.530539110831558e-08, "loss": 0.2319, "step": 47000 }, { "epoch": 0.8169966451702619, "grad_norm": 1.3816949441692512, "learning_rate": 8.528966554138096e-08, "loss": 0.2479, "step": 47001 }, { "epoch": 0.8170140277077648, "grad_norm": 1.0982439370979495, "learning_rate": 8.527394128888755e-08, "loss": 0.143, "step": 47002 }, { "epoch": 0.8170314102452676, "grad_norm": 1.335173201087395, "learning_rate": 8.525821835088543e-08, "loss": 0.165, "step": 47003 }, { "epoch": 0.8170487927827704, "grad_norm": 1.5238148399278373, "learning_rate": 8.524249672742412e-08, "loss": 0.2757, "step": 47004 }, { "epoch": 0.8170661753202733, "grad_norm": 1.1396639475325248, "learning_rate": 8.522677641855341e-08, "loss": 0.2454, "step": 47005 }, { "epoch": 0.8170835578577761, "grad_norm": 1.5841237081634572, "learning_rate": 8.52110574243235e-08, "loss": 0.2113, "step": 47006 }, { "epoch": 0.8171009403952789, "grad_norm": 1.462092940450351, "learning_rate": 8.519533974478394e-08, "loss": 0.244, "step": 47007 }, { "epoch": 0.8171183229327817, "grad_norm": 1.5873261479888146, "learning_rate": 8.517962337998464e-08, "loss": 0.2225, "step": 47008 }, { "epoch": 0.8171357054702846, "grad_norm": 1.5916885694193437, "learning_rate": 8.516390832997533e-08, "loss": 0.1865, "step": 47009 }, { "epoch": 0.8171530880077874, "grad_norm": 1.5103406966871302, "learning_rate": 8.514819459480593e-08, "loss": 0.1955, "step": 47010 }, { "epoch": 0.8171704705452902, "grad_norm": 1.6126459817574506, "learning_rate": 8.513248217452617e-08, "loss": 0.3672, "step": 47011 }, { "epoch": 0.8171878530827931, "grad_norm": 1.6907801159098512, "learning_rate": 8.511677106918586e-08, "loss": 0.255, "step": 47012 }, { "epoch": 0.8172052356202959, "grad_norm": 1.408470570703166, "learning_rate": 8.510106127883466e-08, "loss": 0.2787, "step": 47013 }, { "epoch": 0.8172226181577987, "grad_norm": 1.0503693028822756, "learning_rate": 8.508535280352264e-08, "loss": 0.1445, "step": 47014 }, { "epoch": 0.8172400006953014, "grad_norm": 0.9816890897864596, "learning_rate": 8.506964564329961e-08, "loss": 0.2459, "step": 47015 }, { "epoch": 0.8172573832328043, "grad_norm": 1.579540346413989, "learning_rate": 8.505393979821502e-08, "loss": 0.1615, "step": 47016 }, { "epoch": 0.8172747657703071, "grad_norm": 3.1804894143737394, "learning_rate": 8.503823526831877e-08, "loss": 0.2793, "step": 47017 }, { "epoch": 0.8172921483078099, "grad_norm": 1.616109902664114, "learning_rate": 8.502253205366078e-08, "loss": 0.1924, "step": 47018 }, { "epoch": 0.8173095308453128, "grad_norm": 2.440082239119646, "learning_rate": 8.500683015429072e-08, "loss": 0.1295, "step": 47019 }, { "epoch": 0.8173269133828156, "grad_norm": 0.6701519719954232, "learning_rate": 8.49911295702584e-08, "loss": 0.1679, "step": 47020 }, { "epoch": 0.8173442959203184, "grad_norm": 1.8486355977205267, "learning_rate": 8.497543030161347e-08, "loss": 0.1945, "step": 47021 }, { "epoch": 0.8173616784578213, "grad_norm": 2.0604592564128312, "learning_rate": 8.495973234840598e-08, "loss": 0.3504, "step": 47022 }, { "epoch": 0.8173790609953241, "grad_norm": 1.6426772005466248, "learning_rate": 8.494403571068537e-08, "loss": 0.1723, "step": 47023 }, { "epoch": 0.8173964435328269, "grad_norm": 1.9852910978126157, "learning_rate": 8.49283403885015e-08, "loss": 0.1783, "step": 47024 }, { "epoch": 0.8174138260703298, "grad_norm": 1.0569308135702133, "learning_rate": 8.491264638190403e-08, "loss": 0.2175, "step": 47025 }, { "epoch": 0.8174312086078326, "grad_norm": 1.0936961436017807, "learning_rate": 8.48969536909429e-08, "loss": 0.2473, "step": 47026 }, { "epoch": 0.8174485911453354, "grad_norm": 1.2372110940888401, "learning_rate": 8.488126231566773e-08, "loss": 0.138, "step": 47027 }, { "epoch": 0.8174659736828382, "grad_norm": 1.5274639357493955, "learning_rate": 8.486557225612828e-08, "loss": 0.3106, "step": 47028 }, { "epoch": 0.8174833562203411, "grad_norm": 3.005930970667969, "learning_rate": 8.484988351237433e-08, "loss": 0.2781, "step": 47029 }, { "epoch": 0.8175007387578439, "grad_norm": 1.6941675657333064, "learning_rate": 8.483419608445547e-08, "loss": 0.2057, "step": 47030 }, { "epoch": 0.8175181212953467, "grad_norm": 1.5709907983751692, "learning_rate": 8.481850997242152e-08, "loss": 0.3624, "step": 47031 }, { "epoch": 0.8175355038328496, "grad_norm": 1.6195195374456866, "learning_rate": 8.480282517632215e-08, "loss": 0.2837, "step": 47032 }, { "epoch": 0.8175528863703524, "grad_norm": 1.261125230305065, "learning_rate": 8.478714169620699e-08, "loss": 0.2482, "step": 47033 }, { "epoch": 0.8175702689078551, "grad_norm": 1.0472752216323196, "learning_rate": 8.47714595321261e-08, "loss": 0.1579, "step": 47034 }, { "epoch": 0.8175876514453579, "grad_norm": 1.1533736849276661, "learning_rate": 8.475577868412881e-08, "loss": 0.141, "step": 47035 }, { "epoch": 0.8176050339828608, "grad_norm": 2.0460175097813535, "learning_rate": 8.474009915226487e-08, "loss": 0.3001, "step": 47036 }, { "epoch": 0.8176224165203636, "grad_norm": 1.9359020925619534, "learning_rate": 8.472442093658417e-08, "loss": 0.1689, "step": 47037 }, { "epoch": 0.8176397990578664, "grad_norm": 1.3182809581851587, "learning_rate": 8.47087440371363e-08, "loss": 0.2512, "step": 47038 }, { "epoch": 0.8176571815953693, "grad_norm": 3.2941523949839024, "learning_rate": 8.469306845397089e-08, "loss": 0.2636, "step": 47039 }, { "epoch": 0.8176745641328721, "grad_norm": 1.5198343998212374, "learning_rate": 8.467739418713771e-08, "loss": 0.2135, "step": 47040 }, { "epoch": 0.8176919466703749, "grad_norm": 1.508149501467945, "learning_rate": 8.466172123668642e-08, "loss": 0.2688, "step": 47041 }, { "epoch": 0.8177093292078778, "grad_norm": 1.9899711150188124, "learning_rate": 8.464604960266669e-08, "loss": 0.3607, "step": 47042 }, { "epoch": 0.8177267117453806, "grad_norm": 1.901532890781904, "learning_rate": 8.463037928512817e-08, "loss": 0.2952, "step": 47043 }, { "epoch": 0.8177440942828834, "grad_norm": 1.8124087252605725, "learning_rate": 8.461471028412042e-08, "loss": 0.1837, "step": 47044 }, { "epoch": 0.8177614768203862, "grad_norm": 1.6980572787758883, "learning_rate": 8.459904259969335e-08, "loss": 0.2779, "step": 47045 }, { "epoch": 0.8177788593578891, "grad_norm": 1.5004451097930038, "learning_rate": 8.458337623189648e-08, "loss": 0.1716, "step": 47046 }, { "epoch": 0.8177962418953919, "grad_norm": 1.4543105991467462, "learning_rate": 8.45677111807796e-08, "loss": 0.3845, "step": 47047 }, { "epoch": 0.8178136244328947, "grad_norm": 1.34762715810934, "learning_rate": 8.455204744639205e-08, "loss": 0.2176, "step": 47048 }, { "epoch": 0.8178310069703976, "grad_norm": 2.1813962931494055, "learning_rate": 8.453638502878374e-08, "loss": 0.3502, "step": 47049 }, { "epoch": 0.8178483895079004, "grad_norm": 1.1543979169604766, "learning_rate": 8.452072392800425e-08, "loss": 0.1828, "step": 47050 }, { "epoch": 0.8178657720454032, "grad_norm": 0.9180377698722416, "learning_rate": 8.450506414410325e-08, "loss": 0.231, "step": 47051 }, { "epoch": 0.8178831545829061, "grad_norm": 1.0186397573994426, "learning_rate": 8.448940567713014e-08, "loss": 0.1701, "step": 47052 }, { "epoch": 0.8179005371204089, "grad_norm": 1.246588851595469, "learning_rate": 8.447374852713501e-08, "loss": 0.2551, "step": 47053 }, { "epoch": 0.8179179196579116, "grad_norm": 1.1073182329737357, "learning_rate": 8.445809269416715e-08, "loss": 0.3252, "step": 47054 }, { "epoch": 0.8179353021954144, "grad_norm": 1.4784566964990706, "learning_rate": 8.444243817827617e-08, "loss": 0.252, "step": 47055 }, { "epoch": 0.8179526847329173, "grad_norm": 1.778037434667565, "learning_rate": 8.442678497951172e-08, "loss": 0.2076, "step": 47056 }, { "epoch": 0.8179700672704201, "grad_norm": 0.997215636742968, "learning_rate": 8.441113309792353e-08, "loss": 0.3838, "step": 47057 }, { "epoch": 0.8179874498079229, "grad_norm": 4.545947447501956, "learning_rate": 8.439548253356122e-08, "loss": 0.2417, "step": 47058 }, { "epoch": 0.8180048323454258, "grad_norm": 1.3582931726941534, "learning_rate": 8.437983328647424e-08, "loss": 0.3794, "step": 47059 }, { "epoch": 0.8180222148829286, "grad_norm": 1.7149146283005086, "learning_rate": 8.43641853567123e-08, "loss": 0.1556, "step": 47060 }, { "epoch": 0.8180395974204314, "grad_norm": 1.3752218210807432, "learning_rate": 8.434853874432496e-08, "loss": 0.2131, "step": 47061 }, { "epoch": 0.8180569799579342, "grad_norm": 1.4661608432997522, "learning_rate": 8.433289344936179e-08, "loss": 0.1946, "step": 47062 }, { "epoch": 0.8180743624954371, "grad_norm": 2.252522642283683, "learning_rate": 8.431724947187246e-08, "loss": 0.329, "step": 47063 }, { "epoch": 0.8180917450329399, "grad_norm": 1.5644067371248256, "learning_rate": 8.430160681190634e-08, "loss": 0.1391, "step": 47064 }, { "epoch": 0.8181091275704427, "grad_norm": 1.8513012772864816, "learning_rate": 8.428596546951327e-08, "loss": 0.2736, "step": 47065 }, { "epoch": 0.8181265101079456, "grad_norm": 0.9329591451356581, "learning_rate": 8.427032544474288e-08, "loss": 0.1241, "step": 47066 }, { "epoch": 0.8181438926454484, "grad_norm": 1.45661638282244, "learning_rate": 8.425468673764435e-08, "loss": 0.1289, "step": 47067 }, { "epoch": 0.8181612751829512, "grad_norm": 1.4840399700467917, "learning_rate": 8.423904934826764e-08, "loss": 0.2, "step": 47068 }, { "epoch": 0.8181786577204541, "grad_norm": 1.4346673153850973, "learning_rate": 8.42234132766621e-08, "loss": 0.1267, "step": 47069 }, { "epoch": 0.8181960402579569, "grad_norm": 0.9413403595299754, "learning_rate": 8.420777852287735e-08, "loss": 0.2096, "step": 47070 }, { "epoch": 0.8182134227954597, "grad_norm": 3.054048667906192, "learning_rate": 8.419214508696287e-08, "loss": 0.249, "step": 47071 }, { "epoch": 0.8182308053329626, "grad_norm": 1.3351429722186285, "learning_rate": 8.417651296896849e-08, "loss": 0.1776, "step": 47072 }, { "epoch": 0.8182481878704654, "grad_norm": 0.9981245107781864, "learning_rate": 8.41608821689434e-08, "loss": 0.3698, "step": 47073 }, { "epoch": 0.8182655704079681, "grad_norm": 1.7519166484460802, "learning_rate": 8.414525268693735e-08, "loss": 0.1805, "step": 47074 }, { "epoch": 0.8182829529454709, "grad_norm": 1.2519043392221028, "learning_rate": 8.412962452299971e-08, "loss": 0.2635, "step": 47075 }, { "epoch": 0.8183003354829738, "grad_norm": 1.9244167729265684, "learning_rate": 8.41139976771802e-08, "loss": 0.3575, "step": 47076 }, { "epoch": 0.8183177180204766, "grad_norm": 2.3523731831173675, "learning_rate": 8.409837214952825e-08, "loss": 0.1763, "step": 47077 }, { "epoch": 0.8183351005579794, "grad_norm": 1.4287197933162987, "learning_rate": 8.408274794009346e-08, "loss": 0.1837, "step": 47078 }, { "epoch": 0.8183524830954823, "grad_norm": 1.1153567229489607, "learning_rate": 8.406712504892532e-08, "loss": 0.1856, "step": 47079 }, { "epoch": 0.8183698656329851, "grad_norm": 1.915764868396898, "learning_rate": 8.405150347607325e-08, "loss": 0.1981, "step": 47080 }, { "epoch": 0.8183872481704879, "grad_norm": 1.4120975196145358, "learning_rate": 8.403588322158694e-08, "loss": 0.2179, "step": 47081 }, { "epoch": 0.8184046307079907, "grad_norm": 0.9299701399139809, "learning_rate": 8.402026428551573e-08, "loss": 0.3407, "step": 47082 }, { "epoch": 0.8184220132454936, "grad_norm": 1.710048294271771, "learning_rate": 8.400464666790913e-08, "loss": 0.2067, "step": 47083 }, { "epoch": 0.8184393957829964, "grad_norm": 0.9012616040320683, "learning_rate": 8.398903036881677e-08, "loss": 0.3338, "step": 47084 }, { "epoch": 0.8184567783204992, "grad_norm": 1.670182899008194, "learning_rate": 8.397341538828823e-08, "loss": 0.2432, "step": 47085 }, { "epoch": 0.8184741608580021, "grad_norm": 2.1631705021764556, "learning_rate": 8.395780172637262e-08, "loss": 0.338, "step": 47086 }, { "epoch": 0.8184915433955049, "grad_norm": 1.1408620982083615, "learning_rate": 8.394218938311981e-08, "loss": 0.1906, "step": 47087 }, { "epoch": 0.8185089259330077, "grad_norm": 1.789828674608344, "learning_rate": 8.392657835857908e-08, "loss": 0.3503, "step": 47088 }, { "epoch": 0.8185263084705106, "grad_norm": 1.3657069868334113, "learning_rate": 8.391096865280001e-08, "loss": 0.1678, "step": 47089 }, { "epoch": 0.8185436910080134, "grad_norm": 1.1561664546474768, "learning_rate": 8.389536026583205e-08, "loss": 0.2532, "step": 47090 }, { "epoch": 0.8185610735455162, "grad_norm": 1.610565401289662, "learning_rate": 8.38797531977246e-08, "loss": 0.2305, "step": 47091 }, { "epoch": 0.818578456083019, "grad_norm": 1.1499895422533284, "learning_rate": 8.38641474485272e-08, "loss": 0.2144, "step": 47092 }, { "epoch": 0.8185958386205219, "grad_norm": 1.601319490068422, "learning_rate": 8.384854301828937e-08, "loss": 0.193, "step": 47093 }, { "epoch": 0.8186132211580246, "grad_norm": 0.9199463679534846, "learning_rate": 8.383293990706025e-08, "loss": 0.1976, "step": 47094 }, { "epoch": 0.8186306036955274, "grad_norm": 1.4385387173075923, "learning_rate": 8.381733811488972e-08, "loss": 0.1836, "step": 47095 }, { "epoch": 0.8186479862330303, "grad_norm": 1.0272756349758994, "learning_rate": 8.380173764182708e-08, "loss": 0.1337, "step": 47096 }, { "epoch": 0.8186653687705331, "grad_norm": 1.8635239920388136, "learning_rate": 8.378613848792171e-08, "loss": 0.2409, "step": 47097 }, { "epoch": 0.8186827513080359, "grad_norm": 0.9558489923279141, "learning_rate": 8.377054065322309e-08, "loss": 0.2164, "step": 47098 }, { "epoch": 0.8187001338455387, "grad_norm": 1.1890561533420416, "learning_rate": 8.375494413778068e-08, "loss": 0.2493, "step": 47099 }, { "epoch": 0.8187175163830416, "grad_norm": 1.4174505162825886, "learning_rate": 8.373934894164387e-08, "loss": 0.213, "step": 47100 }, { "epoch": 0.8187348989205444, "grad_norm": 1.4850394171109385, "learning_rate": 8.372375506486206e-08, "loss": 0.2224, "step": 47101 }, { "epoch": 0.8187522814580472, "grad_norm": 0.9972917535422027, "learning_rate": 8.370816250748469e-08, "loss": 0.2149, "step": 47102 }, { "epoch": 0.8187696639955501, "grad_norm": 2.7642166290759995, "learning_rate": 8.369257126956131e-08, "loss": 0.1835, "step": 47103 }, { "epoch": 0.8187870465330529, "grad_norm": 2.2019422477205106, "learning_rate": 8.367698135114132e-08, "loss": 0.2755, "step": 47104 }, { "epoch": 0.8188044290705557, "grad_norm": 1.0514319493295097, "learning_rate": 8.366139275227396e-08, "loss": 0.2245, "step": 47105 }, { "epoch": 0.8188218116080586, "grad_norm": 1.4556629149939213, "learning_rate": 8.364580547300865e-08, "loss": 0.229, "step": 47106 }, { "epoch": 0.8188391941455614, "grad_norm": 2.336768805990215, "learning_rate": 8.3630219513395e-08, "loss": 0.1975, "step": 47107 }, { "epoch": 0.8188565766830642, "grad_norm": 0.8085032054999963, "learning_rate": 8.361463487348225e-08, "loss": 0.291, "step": 47108 }, { "epoch": 0.818873959220567, "grad_norm": 0.8296654640463605, "learning_rate": 8.35990515533198e-08, "loss": 0.1531, "step": 47109 }, { "epoch": 0.8188913417580699, "grad_norm": 1.2161960362763875, "learning_rate": 8.358346955295715e-08, "loss": 0.2211, "step": 47110 }, { "epoch": 0.8189087242955727, "grad_norm": 1.5150689782408175, "learning_rate": 8.356788887244359e-08, "loss": 0.2091, "step": 47111 }, { "epoch": 0.8189261068330755, "grad_norm": 5.798124713369094, "learning_rate": 8.355230951182851e-08, "loss": 0.2778, "step": 47112 }, { "epoch": 0.8189434893705784, "grad_norm": 1.0769866899010418, "learning_rate": 8.353673147116135e-08, "loss": 0.1838, "step": 47113 }, { "epoch": 0.8189608719080811, "grad_norm": 0.9544891717916719, "learning_rate": 8.352115475049132e-08, "loss": 0.1385, "step": 47114 }, { "epoch": 0.8189782544455839, "grad_norm": 1.0486985793985963, "learning_rate": 8.350557934986802e-08, "loss": 0.2507, "step": 47115 }, { "epoch": 0.8189956369830867, "grad_norm": 1.7937388360230173, "learning_rate": 8.34900052693408e-08, "loss": 0.1825, "step": 47116 }, { "epoch": 0.8190130195205896, "grad_norm": 1.8623835965270437, "learning_rate": 8.347443250895875e-08, "loss": 0.208, "step": 47117 }, { "epoch": 0.8190304020580924, "grad_norm": 2.1966654126252068, "learning_rate": 8.345886106877148e-08, "loss": 0.3009, "step": 47118 }, { "epoch": 0.8190477845955952, "grad_norm": 1.5201113982750039, "learning_rate": 8.344329094882829e-08, "loss": 0.3772, "step": 47119 }, { "epoch": 0.8190651671330981, "grad_norm": 1.9422920478645775, "learning_rate": 8.342772214917854e-08, "loss": 0.2767, "step": 47120 }, { "epoch": 0.8190825496706009, "grad_norm": 3.065185800554924, "learning_rate": 8.341215466987155e-08, "loss": 0.2121, "step": 47121 }, { "epoch": 0.8190999322081037, "grad_norm": 1.6130773122004818, "learning_rate": 8.339658851095655e-08, "loss": 0.1005, "step": 47122 }, { "epoch": 0.8191173147456066, "grad_norm": 1.0033420059546554, "learning_rate": 8.338102367248323e-08, "loss": 0.2475, "step": 47123 }, { "epoch": 0.8191346972831094, "grad_norm": 2.3496462311564645, "learning_rate": 8.336546015450052e-08, "loss": 0.2665, "step": 47124 }, { "epoch": 0.8191520798206122, "grad_norm": 0.9510393379861495, "learning_rate": 8.334989795705783e-08, "loss": 0.2374, "step": 47125 }, { "epoch": 0.819169462358115, "grad_norm": 1.0086604504844408, "learning_rate": 8.33343370802047e-08, "loss": 0.3375, "step": 47126 }, { "epoch": 0.8191868448956179, "grad_norm": 2.1180588744998667, "learning_rate": 8.33187775239903e-08, "loss": 0.1864, "step": 47127 }, { "epoch": 0.8192042274331207, "grad_norm": 0.9745223627943291, "learning_rate": 8.330321928846401e-08, "loss": 0.1609, "step": 47128 }, { "epoch": 0.8192216099706235, "grad_norm": 1.3673906392926845, "learning_rate": 8.328766237367507e-08, "loss": 0.2729, "step": 47129 }, { "epoch": 0.8192389925081264, "grad_norm": 1.3891789248608164, "learning_rate": 8.327210677967278e-08, "loss": 0.1431, "step": 47130 }, { "epoch": 0.8192563750456292, "grad_norm": 1.3931990050516807, "learning_rate": 8.325655250650654e-08, "loss": 0.254, "step": 47131 }, { "epoch": 0.819273757583132, "grad_norm": 1.0484506590890748, "learning_rate": 8.324099955422559e-08, "loss": 0.1672, "step": 47132 }, { "epoch": 0.8192911401206349, "grad_norm": 1.7229676886653196, "learning_rate": 8.322544792287906e-08, "loss": 0.1804, "step": 47133 }, { "epoch": 0.8193085226581376, "grad_norm": 1.5868937491772104, "learning_rate": 8.320989761251656e-08, "loss": 0.2702, "step": 47134 }, { "epoch": 0.8193259051956404, "grad_norm": 2.749143243157015, "learning_rate": 8.319434862318735e-08, "loss": 0.4094, "step": 47135 }, { "epoch": 0.8193432877331432, "grad_norm": 5.338945173118826, "learning_rate": 8.317880095494045e-08, "loss": 0.3267, "step": 47136 }, { "epoch": 0.8193606702706461, "grad_norm": 5.243451893436105, "learning_rate": 8.31632546078252e-08, "loss": 0.2223, "step": 47137 }, { "epoch": 0.8193780528081489, "grad_norm": 4.436664409180105, "learning_rate": 8.314770958189104e-08, "loss": 0.248, "step": 47138 }, { "epoch": 0.8193954353456517, "grad_norm": 1.4855686337889098, "learning_rate": 8.313216587718718e-08, "loss": 0.1657, "step": 47139 }, { "epoch": 0.8194128178831546, "grad_norm": 1.0632115311906545, "learning_rate": 8.311662349376286e-08, "loss": 0.1278, "step": 47140 }, { "epoch": 0.8194302004206574, "grad_norm": 1.038186839797345, "learning_rate": 8.31010824316672e-08, "loss": 0.3343, "step": 47141 }, { "epoch": 0.8194475829581602, "grad_norm": 1.0711361467399276, "learning_rate": 8.308554269094986e-08, "loss": 0.209, "step": 47142 }, { "epoch": 0.8194649654956631, "grad_norm": 2.3151222168194994, "learning_rate": 8.307000427165967e-08, "loss": 0.2633, "step": 47143 }, { "epoch": 0.8194823480331659, "grad_norm": 1.3274341359310378, "learning_rate": 8.305446717384607e-08, "loss": 0.2218, "step": 47144 }, { "epoch": 0.8194997305706687, "grad_norm": 1.8651902756767462, "learning_rate": 8.303893139755819e-08, "loss": 0.4054, "step": 47145 }, { "epoch": 0.8195171131081715, "grad_norm": 0.9987833532246021, "learning_rate": 8.302339694284543e-08, "loss": 0.1386, "step": 47146 }, { "epoch": 0.8195344956456744, "grad_norm": 1.3187406127208203, "learning_rate": 8.300786380975699e-08, "loss": 0.3042, "step": 47147 }, { "epoch": 0.8195518781831772, "grad_norm": 1.5787115786151273, "learning_rate": 8.29923319983421e-08, "loss": 0.1911, "step": 47148 }, { "epoch": 0.81956926072068, "grad_norm": 1.486375646026608, "learning_rate": 8.297680150864994e-08, "loss": 0.179, "step": 47149 }, { "epoch": 0.8195866432581829, "grad_norm": 1.5057952968817194, "learning_rate": 8.296127234072969e-08, "loss": 0.1905, "step": 47150 }, { "epoch": 0.8196040257956857, "grad_norm": 1.4695858785146323, "learning_rate": 8.29457444946307e-08, "loss": 0.2602, "step": 47151 }, { "epoch": 0.8196214083331885, "grad_norm": 1.523162130853142, "learning_rate": 8.293021797040211e-08, "loss": 0.1338, "step": 47152 }, { "epoch": 0.8196387908706914, "grad_norm": 1.5229951455076425, "learning_rate": 8.291469276809304e-08, "loss": 0.2882, "step": 47153 }, { "epoch": 0.8196561734081941, "grad_norm": 1.3678215870085804, "learning_rate": 8.2899168887753e-08, "loss": 0.2159, "step": 47154 }, { "epoch": 0.8196735559456969, "grad_norm": 1.9320629995040965, "learning_rate": 8.28836463294309e-08, "loss": 0.1871, "step": 47155 }, { "epoch": 0.8196909384831997, "grad_norm": 1.5196306287247092, "learning_rate": 8.286812509317593e-08, "loss": 0.2558, "step": 47156 }, { "epoch": 0.8197083210207026, "grad_norm": 1.2639353169947276, "learning_rate": 8.285260517903748e-08, "loss": 0.19, "step": 47157 }, { "epoch": 0.8197257035582054, "grad_norm": 1.6864269635827758, "learning_rate": 8.283708658706468e-08, "loss": 0.2064, "step": 47158 }, { "epoch": 0.8197430860957082, "grad_norm": 1.3479415785690856, "learning_rate": 8.282156931730667e-08, "loss": 0.3141, "step": 47159 }, { "epoch": 0.8197604686332111, "grad_norm": 1.4958954069153023, "learning_rate": 8.280605336981261e-08, "loss": 0.1204, "step": 47160 }, { "epoch": 0.8197778511707139, "grad_norm": 1.042980388616535, "learning_rate": 8.279053874463177e-08, "loss": 0.262, "step": 47161 }, { "epoch": 0.8197952337082167, "grad_norm": 4.455619596953732, "learning_rate": 8.277502544181326e-08, "loss": 0.3023, "step": 47162 }, { "epoch": 0.8198126162457195, "grad_norm": 1.589723984737023, "learning_rate": 8.275951346140624e-08, "loss": 0.214, "step": 47163 }, { "epoch": 0.8198299987832224, "grad_norm": 1.2858245821283891, "learning_rate": 8.274400280345978e-08, "loss": 0.2614, "step": 47164 }, { "epoch": 0.8198473813207252, "grad_norm": 1.3031503152822232, "learning_rate": 8.27284934680233e-08, "loss": 0.2362, "step": 47165 }, { "epoch": 0.819864763858228, "grad_norm": 0.9977306167181389, "learning_rate": 8.271298545514583e-08, "loss": 0.1794, "step": 47166 }, { "epoch": 0.8198821463957309, "grad_norm": 1.1879582605731835, "learning_rate": 8.269747876487659e-08, "loss": 0.196, "step": 47167 }, { "epoch": 0.8198995289332337, "grad_norm": 1.4404167762228404, "learning_rate": 8.26819733972644e-08, "loss": 0.178, "step": 47168 }, { "epoch": 0.8199169114707365, "grad_norm": 2.1440256628064254, "learning_rate": 8.266646935235883e-08, "loss": 0.2704, "step": 47169 }, { "epoch": 0.8199342940082394, "grad_norm": 1.583337989248455, "learning_rate": 8.265096663020882e-08, "loss": 0.2098, "step": 47170 }, { "epoch": 0.8199516765457422, "grad_norm": 1.3595450408395322, "learning_rate": 8.26354652308635e-08, "loss": 0.262, "step": 47171 }, { "epoch": 0.819969059083245, "grad_norm": 1.4611957155252568, "learning_rate": 8.261996515437198e-08, "loss": 0.2914, "step": 47172 }, { "epoch": 0.8199864416207477, "grad_norm": 1.6008003877944146, "learning_rate": 8.260446640078362e-08, "loss": 0.2299, "step": 47173 }, { "epoch": 0.8200038241582506, "grad_norm": 2.6553101587065577, "learning_rate": 8.258896897014728e-08, "loss": 0.2207, "step": 47174 }, { "epoch": 0.8200212066957534, "grad_norm": 1.2921329299781068, "learning_rate": 8.257347286251204e-08, "loss": 0.3727, "step": 47175 }, { "epoch": 0.8200385892332562, "grad_norm": 1.160311370290771, "learning_rate": 8.255797807792725e-08, "loss": 0.1065, "step": 47176 }, { "epoch": 0.8200559717707591, "grad_norm": 0.9802158356784839, "learning_rate": 8.254248461644192e-08, "loss": 0.2586, "step": 47177 }, { "epoch": 0.8200733543082619, "grad_norm": 1.3514193077384333, "learning_rate": 8.252699247810513e-08, "loss": 0.1748, "step": 47178 }, { "epoch": 0.8200907368457647, "grad_norm": 0.9185566673177475, "learning_rate": 8.251150166296605e-08, "loss": 0.1164, "step": 47179 }, { "epoch": 0.8201081193832676, "grad_norm": 1.5807559671476268, "learning_rate": 8.249601217107371e-08, "loss": 0.2766, "step": 47180 }, { "epoch": 0.8201255019207704, "grad_norm": 1.6511485269632504, "learning_rate": 8.248052400247718e-08, "loss": 0.1641, "step": 47181 }, { "epoch": 0.8201428844582732, "grad_norm": 2.280043983255633, "learning_rate": 8.246503715722564e-08, "loss": 0.2797, "step": 47182 }, { "epoch": 0.820160266995776, "grad_norm": 1.9577123363262088, "learning_rate": 8.244955163536804e-08, "loss": 0.3613, "step": 47183 }, { "epoch": 0.8201776495332789, "grad_norm": 0.9067862072005458, "learning_rate": 8.243406743695363e-08, "loss": 0.2361, "step": 47184 }, { "epoch": 0.8201950320707817, "grad_norm": 1.816956354206387, "learning_rate": 8.241858456203143e-08, "loss": 0.1827, "step": 47185 }, { "epoch": 0.8202124146082845, "grad_norm": 1.2869738951956091, "learning_rate": 8.240310301065057e-08, "loss": 0.3733, "step": 47186 }, { "epoch": 0.8202297971457874, "grad_norm": 1.1390020469128912, "learning_rate": 8.238762278285988e-08, "loss": 0.2058, "step": 47187 }, { "epoch": 0.8202471796832902, "grad_norm": 1.3903779954425164, "learning_rate": 8.237214387870866e-08, "loss": 0.1722, "step": 47188 }, { "epoch": 0.820264562220793, "grad_norm": 0.9966293313962259, "learning_rate": 8.235666629824589e-08, "loss": 0.2377, "step": 47189 }, { "epoch": 0.8202819447582959, "grad_norm": 1.6811192879466341, "learning_rate": 8.234119004152068e-08, "loss": 0.1334, "step": 47190 }, { "epoch": 0.8202993272957987, "grad_norm": 1.6121391671244782, "learning_rate": 8.232571510858189e-08, "loss": 0.2397, "step": 47191 }, { "epoch": 0.8203167098333015, "grad_norm": 2.501216200124611, "learning_rate": 8.231024149947896e-08, "loss": 0.2609, "step": 47192 }, { "epoch": 0.8203340923708042, "grad_norm": 0.7378277778444612, "learning_rate": 8.229476921426059e-08, "loss": 0.1529, "step": 47193 }, { "epoch": 0.8203514749083071, "grad_norm": 1.1913395919627339, "learning_rate": 8.227929825297597e-08, "loss": 0.1226, "step": 47194 }, { "epoch": 0.8203688574458099, "grad_norm": 1.3416716533533373, "learning_rate": 8.226382861567388e-08, "loss": 0.2993, "step": 47195 }, { "epoch": 0.8203862399833127, "grad_norm": 1.2285437745998171, "learning_rate": 8.224836030240372e-08, "loss": 0.2967, "step": 47196 }, { "epoch": 0.8204036225208156, "grad_norm": 2.35418482807036, "learning_rate": 8.223289331321436e-08, "loss": 0.3045, "step": 47197 }, { "epoch": 0.8204210050583184, "grad_norm": 2.8382851339890696, "learning_rate": 8.221742764815481e-08, "loss": 0.2026, "step": 47198 }, { "epoch": 0.8204383875958212, "grad_norm": 1.8036266701474393, "learning_rate": 8.220196330727414e-08, "loss": 0.2986, "step": 47199 }, { "epoch": 0.820455770133324, "grad_norm": 1.3585362821110556, "learning_rate": 8.21865002906213e-08, "loss": 0.1396, "step": 47200 }, { "epoch": 0.8204731526708269, "grad_norm": 1.457097792348697, "learning_rate": 8.217103859824531e-08, "loss": 0.2405, "step": 47201 }, { "epoch": 0.8204905352083297, "grad_norm": 0.9958347369053062, "learning_rate": 8.21555782301952e-08, "loss": 0.1202, "step": 47202 }, { "epoch": 0.8205079177458325, "grad_norm": 0.9699766079005261, "learning_rate": 8.214011918651986e-08, "loss": 0.1927, "step": 47203 }, { "epoch": 0.8205253002833354, "grad_norm": 1.5459113788710006, "learning_rate": 8.212466146726848e-08, "loss": 0.2426, "step": 47204 }, { "epoch": 0.8205426828208382, "grad_norm": 1.951851578063498, "learning_rate": 8.210920507249014e-08, "loss": 0.3802, "step": 47205 }, { "epoch": 0.820560065358341, "grad_norm": 0.9505747489407679, "learning_rate": 8.20937500022334e-08, "loss": 0.2386, "step": 47206 }, { "epoch": 0.8205774478958439, "grad_norm": 1.7014792742305314, "learning_rate": 8.207829625654761e-08, "loss": 0.2738, "step": 47207 }, { "epoch": 0.8205948304333467, "grad_norm": 1.6371484743707678, "learning_rate": 8.206284383548163e-08, "loss": 0.2441, "step": 47208 }, { "epoch": 0.8206122129708495, "grad_norm": 1.5118768604680604, "learning_rate": 8.204739273908445e-08, "loss": 0.2076, "step": 47209 }, { "epoch": 0.8206295955083523, "grad_norm": 1.7576538432892206, "learning_rate": 8.203194296740507e-08, "loss": 0.197, "step": 47210 }, { "epoch": 0.8206469780458552, "grad_norm": 1.5174907921092013, "learning_rate": 8.201649452049242e-08, "loss": 0.2436, "step": 47211 }, { "epoch": 0.820664360583358, "grad_norm": 1.3340517363527182, "learning_rate": 8.200104739839547e-08, "loss": 0.1196, "step": 47212 }, { "epoch": 0.8206817431208607, "grad_norm": 1.4281660113678152, "learning_rate": 8.198560160116318e-08, "loss": 0.2749, "step": 47213 }, { "epoch": 0.8206991256583636, "grad_norm": 1.1929026783563608, "learning_rate": 8.197015712884442e-08, "loss": 0.1896, "step": 47214 }, { "epoch": 0.8207165081958664, "grad_norm": 1.1342769062282825, "learning_rate": 8.195471398148835e-08, "loss": 0.2496, "step": 47215 }, { "epoch": 0.8207338907333692, "grad_norm": 1.0805001902893399, "learning_rate": 8.193927215914376e-08, "loss": 0.108, "step": 47216 }, { "epoch": 0.820751273270872, "grad_norm": 1.5254578478757692, "learning_rate": 8.192383166185968e-08, "loss": 0.2944, "step": 47217 }, { "epoch": 0.8207686558083749, "grad_norm": 1.8352165181996272, "learning_rate": 8.190839248968501e-08, "loss": 0.5986, "step": 47218 }, { "epoch": 0.8207860383458777, "grad_norm": 2.1885385682324157, "learning_rate": 8.189295464266866e-08, "loss": 0.3382, "step": 47219 }, { "epoch": 0.8208034208833805, "grad_norm": 0.9824272616122568, "learning_rate": 8.187751812085958e-08, "loss": 0.3578, "step": 47220 }, { "epoch": 0.8208208034208834, "grad_norm": 1.5853441614372785, "learning_rate": 8.186208292430668e-08, "loss": 0.1783, "step": 47221 }, { "epoch": 0.8208381859583862, "grad_norm": 1.2408334098084468, "learning_rate": 8.184664905305882e-08, "loss": 0.1236, "step": 47222 }, { "epoch": 0.820855568495889, "grad_norm": 0.907618394827952, "learning_rate": 8.183121650716507e-08, "loss": 0.1831, "step": 47223 }, { "epoch": 0.8208729510333919, "grad_norm": 1.0398381836638164, "learning_rate": 8.181578528667443e-08, "loss": 0.2216, "step": 47224 }, { "epoch": 0.8208903335708947, "grad_norm": 1.7937934235118898, "learning_rate": 8.180035539163554e-08, "loss": 0.2737, "step": 47225 }, { "epoch": 0.8209077161083975, "grad_norm": 1.7036280216149668, "learning_rate": 8.178492682209726e-08, "loss": 0.2843, "step": 47226 }, { "epoch": 0.8209250986459004, "grad_norm": 0.8328969559764945, "learning_rate": 8.176949957810881e-08, "loss": 0.1288, "step": 47227 }, { "epoch": 0.8209424811834032, "grad_norm": 1.3632017405252128, "learning_rate": 8.175407365971892e-08, "loss": 0.3509, "step": 47228 }, { "epoch": 0.820959863720906, "grad_norm": 1.2440534215913672, "learning_rate": 8.173864906697647e-08, "loss": 0.2105, "step": 47229 }, { "epoch": 0.8209772462584088, "grad_norm": 2.757718082503879, "learning_rate": 8.17232257999304e-08, "loss": 0.2639, "step": 47230 }, { "epoch": 0.8209946287959117, "grad_norm": 1.427821492225062, "learning_rate": 8.170780385862952e-08, "loss": 0.191, "step": 47231 }, { "epoch": 0.8210120113334145, "grad_norm": 1.4294419470582023, "learning_rate": 8.169238324312277e-08, "loss": 0.2026, "step": 47232 }, { "epoch": 0.8210293938709172, "grad_norm": 1.3671548107769813, "learning_rate": 8.167696395345902e-08, "loss": 0.167, "step": 47233 }, { "epoch": 0.82104677640842, "grad_norm": 1.5244360387372646, "learning_rate": 8.166154598968699e-08, "loss": 0.2646, "step": 47234 }, { "epoch": 0.8210641589459229, "grad_norm": 1.9065916946106911, "learning_rate": 8.164612935185589e-08, "loss": 0.2132, "step": 47235 }, { "epoch": 0.8210815414834257, "grad_norm": 2.2143430606749632, "learning_rate": 8.163071404001443e-08, "loss": 0.3129, "step": 47236 }, { "epoch": 0.8210989240209285, "grad_norm": 1.093057853542209, "learning_rate": 8.161530005421119e-08, "loss": 0.1584, "step": 47237 }, { "epoch": 0.8211163065584314, "grad_norm": 3.0789567890586005, "learning_rate": 8.159988739449535e-08, "loss": 0.4831, "step": 47238 }, { "epoch": 0.8211336890959342, "grad_norm": 0.996293375889677, "learning_rate": 8.158447606091573e-08, "loss": 0.1914, "step": 47239 }, { "epoch": 0.821151071633437, "grad_norm": 1.2788769652404721, "learning_rate": 8.15690660535211e-08, "loss": 0.2117, "step": 47240 }, { "epoch": 0.8211684541709399, "grad_norm": 2.6305114749521508, "learning_rate": 8.15536573723603e-08, "loss": 0.3209, "step": 47241 }, { "epoch": 0.8211858367084427, "grad_norm": 0.9887123368393251, "learning_rate": 8.15382500174821e-08, "loss": 0.2018, "step": 47242 }, { "epoch": 0.8212032192459455, "grad_norm": 1.8937884201882227, "learning_rate": 8.15228439889356e-08, "loss": 0.1835, "step": 47243 }, { "epoch": 0.8212206017834484, "grad_norm": 1.5341151494156413, "learning_rate": 8.150743928676939e-08, "loss": 0.1609, "step": 47244 }, { "epoch": 0.8212379843209512, "grad_norm": 1.350172972066915, "learning_rate": 8.149203591103221e-08, "loss": 0.2023, "step": 47245 }, { "epoch": 0.821255366858454, "grad_norm": 1.9001535682398378, "learning_rate": 8.147663386177317e-08, "loss": 0.2639, "step": 47246 }, { "epoch": 0.8212727493959568, "grad_norm": 1.3235505277877955, "learning_rate": 8.146123313904096e-08, "loss": 0.1572, "step": 47247 }, { "epoch": 0.8212901319334597, "grad_norm": 3.637197704340641, "learning_rate": 8.144583374288433e-08, "loss": 0.3466, "step": 47248 }, { "epoch": 0.8213075144709625, "grad_norm": 3.1944081595470872, "learning_rate": 8.143043567335217e-08, "loss": 0.1702, "step": 47249 }, { "epoch": 0.8213248970084653, "grad_norm": 3.5455347196377085, "learning_rate": 8.141503893049323e-08, "loss": 0.3101, "step": 47250 }, { "epoch": 0.8213422795459682, "grad_norm": 1.2672623787452104, "learning_rate": 8.139964351435635e-08, "loss": 0.2375, "step": 47251 }, { "epoch": 0.821359662083471, "grad_norm": 2.006115538646878, "learning_rate": 8.138424942499034e-08, "loss": 0.2246, "step": 47252 }, { "epoch": 0.8213770446209737, "grad_norm": 1.9256475522140142, "learning_rate": 8.136885666244381e-08, "loss": 0.1997, "step": 47253 }, { "epoch": 0.8213944271584765, "grad_norm": 2.349223502356153, "learning_rate": 8.135346522676584e-08, "loss": 0.2301, "step": 47254 }, { "epoch": 0.8214118096959794, "grad_norm": 1.8214619035698816, "learning_rate": 8.133807511800517e-08, "loss": 0.2116, "step": 47255 }, { "epoch": 0.8214291922334822, "grad_norm": 1.4553662421389566, "learning_rate": 8.132268633621037e-08, "loss": 0.1454, "step": 47256 }, { "epoch": 0.821446574770985, "grad_norm": 2.155694547989378, "learning_rate": 8.130729888143022e-08, "loss": 0.3067, "step": 47257 }, { "epoch": 0.8214639573084879, "grad_norm": 1.8085121591745574, "learning_rate": 8.129191275371372e-08, "loss": 0.218, "step": 47258 }, { "epoch": 0.8214813398459907, "grad_norm": 1.8789843504863837, "learning_rate": 8.127652795310952e-08, "loss": 0.3132, "step": 47259 }, { "epoch": 0.8214987223834935, "grad_norm": 1.1820584980955997, "learning_rate": 8.126114447966636e-08, "loss": 0.1379, "step": 47260 }, { "epoch": 0.8215161049209964, "grad_norm": 1.081734512522426, "learning_rate": 8.124576233343294e-08, "loss": 0.1936, "step": 47261 }, { "epoch": 0.8215334874584992, "grad_norm": 0.8384448164683335, "learning_rate": 8.123038151445832e-08, "loss": 0.2087, "step": 47262 }, { "epoch": 0.821550869996002, "grad_norm": 1.7218228031993192, "learning_rate": 8.121500202279086e-08, "loss": 0.2458, "step": 47263 }, { "epoch": 0.8215682525335049, "grad_norm": 1.432388667738714, "learning_rate": 8.119962385847939e-08, "loss": 0.2284, "step": 47264 }, { "epoch": 0.8215856350710077, "grad_norm": 1.3953873302795268, "learning_rate": 8.118424702157283e-08, "loss": 0.1812, "step": 47265 }, { "epoch": 0.8216030176085105, "grad_norm": 1.175925938321583, "learning_rate": 8.116887151211988e-08, "loss": 0.1262, "step": 47266 }, { "epoch": 0.8216204001460133, "grad_norm": 0.8724627572840911, "learning_rate": 8.115349733016913e-08, "loss": 0.1353, "step": 47267 }, { "epoch": 0.8216377826835162, "grad_norm": 1.7151926705368157, "learning_rate": 8.113812447576945e-08, "loss": 0.162, "step": 47268 }, { "epoch": 0.821655165221019, "grad_norm": 1.8509878897943266, "learning_rate": 8.112275294896948e-08, "loss": 0.3031, "step": 47269 }, { "epoch": 0.8216725477585218, "grad_norm": 2.050933612107637, "learning_rate": 8.110738274981793e-08, "loss": 0.2513, "step": 47270 }, { "epoch": 0.8216899302960247, "grad_norm": 2.155006224725209, "learning_rate": 8.109201387836361e-08, "loss": 0.1842, "step": 47271 }, { "epoch": 0.8217073128335275, "grad_norm": 1.1316672580838751, "learning_rate": 8.107664633465505e-08, "loss": 0.1298, "step": 47272 }, { "epoch": 0.8217246953710302, "grad_norm": 1.5038770853462882, "learning_rate": 8.106128011874119e-08, "loss": 0.2693, "step": 47273 }, { "epoch": 0.821742077908533, "grad_norm": 1.5833731313040176, "learning_rate": 8.104591523067073e-08, "loss": 0.2161, "step": 47274 }, { "epoch": 0.8217594604460359, "grad_norm": 6.600049595117362, "learning_rate": 8.103055167049216e-08, "loss": 0.3854, "step": 47275 }, { "epoch": 0.8217768429835387, "grad_norm": 1.2879309844378355, "learning_rate": 8.10151894382542e-08, "loss": 0.4256, "step": 47276 }, { "epoch": 0.8217942255210415, "grad_norm": 1.3253138414600265, "learning_rate": 8.099982853400567e-08, "loss": 0.2721, "step": 47277 }, { "epoch": 0.8218116080585444, "grad_norm": 1.7423156280636056, "learning_rate": 8.098446895779526e-08, "loss": 0.2307, "step": 47278 }, { "epoch": 0.8218289905960472, "grad_norm": 2.6165469235887646, "learning_rate": 8.09691107096716e-08, "loss": 0.2681, "step": 47279 }, { "epoch": 0.82184637313355, "grad_norm": 1.6588363458113955, "learning_rate": 8.095375378968322e-08, "loss": 0.1968, "step": 47280 }, { "epoch": 0.8218637556710529, "grad_norm": 0.9495809031675414, "learning_rate": 8.093839819787923e-08, "loss": 0.1441, "step": 47281 }, { "epoch": 0.8218811382085557, "grad_norm": 1.476428086824061, "learning_rate": 8.092304393430787e-08, "loss": 0.3393, "step": 47282 }, { "epoch": 0.8218985207460585, "grad_norm": 3.0239582580253273, "learning_rate": 8.090769099901795e-08, "loss": 0.3619, "step": 47283 }, { "epoch": 0.8219159032835613, "grad_norm": 1.0608890916921339, "learning_rate": 8.089233939205803e-08, "loss": 0.2042, "step": 47284 }, { "epoch": 0.8219332858210642, "grad_norm": 1.036563131141334, "learning_rate": 8.087698911347701e-08, "loss": 0.2585, "step": 47285 }, { "epoch": 0.821950668358567, "grad_norm": 1.2364633365899291, "learning_rate": 8.086164016332336e-08, "loss": 0.1345, "step": 47286 }, { "epoch": 0.8219680508960698, "grad_norm": 1.729056694398267, "learning_rate": 8.084629254164583e-08, "loss": 0.3377, "step": 47287 }, { "epoch": 0.8219854334335727, "grad_norm": 0.8021175608901312, "learning_rate": 8.083094624849296e-08, "loss": 0.2986, "step": 47288 }, { "epoch": 0.8220028159710755, "grad_norm": 0.9523829678918433, "learning_rate": 8.081560128391346e-08, "loss": 0.3139, "step": 47289 }, { "epoch": 0.8220201985085783, "grad_norm": 1.175530053735847, "learning_rate": 8.080025764795595e-08, "loss": 0.139, "step": 47290 }, { "epoch": 0.8220375810460812, "grad_norm": 1.6081421573762722, "learning_rate": 8.078491534066906e-08, "loss": 0.1539, "step": 47291 }, { "epoch": 0.822054963583584, "grad_norm": 1.0116261621564284, "learning_rate": 8.076957436210124e-08, "loss": 0.2473, "step": 47292 }, { "epoch": 0.8220723461210867, "grad_norm": 1.1482405851630875, "learning_rate": 8.075423471230163e-08, "loss": 0.1563, "step": 47293 }, { "epoch": 0.8220897286585895, "grad_norm": 1.045863816967203, "learning_rate": 8.073889639131832e-08, "loss": 0.2049, "step": 47294 }, { "epoch": 0.8221071111960924, "grad_norm": 1.4337370571442518, "learning_rate": 8.07235593992e-08, "loss": 0.3224, "step": 47295 }, { "epoch": 0.8221244937335952, "grad_norm": 1.4631439324556488, "learning_rate": 8.070822373599556e-08, "loss": 0.2663, "step": 47296 }, { "epoch": 0.822141876271098, "grad_norm": 1.1134396204041106, "learning_rate": 8.069288940175339e-08, "loss": 0.2184, "step": 47297 }, { "epoch": 0.8221592588086009, "grad_norm": 1.7809627309503293, "learning_rate": 8.067755639652219e-08, "loss": 0.2443, "step": 47298 }, { "epoch": 0.8221766413461037, "grad_norm": 2.2554833920191135, "learning_rate": 8.06622247203505e-08, "loss": 0.2645, "step": 47299 }, { "epoch": 0.8221940238836065, "grad_norm": 1.2853157774801895, "learning_rate": 8.064689437328692e-08, "loss": 0.1627, "step": 47300 }, { "epoch": 0.8222114064211093, "grad_norm": 1.5922372226184103, "learning_rate": 8.063156535538007e-08, "loss": 0.2477, "step": 47301 }, { "epoch": 0.8222287889586122, "grad_norm": 1.3020457644478767, "learning_rate": 8.061623766667847e-08, "loss": 0.1511, "step": 47302 }, { "epoch": 0.822246171496115, "grad_norm": 2.0888163617600735, "learning_rate": 8.060091130723068e-08, "loss": 0.196, "step": 47303 }, { "epoch": 0.8222635540336178, "grad_norm": 1.2478142180431955, "learning_rate": 8.058558627708545e-08, "loss": 0.2289, "step": 47304 }, { "epoch": 0.8222809365711207, "grad_norm": 1.632353351232988, "learning_rate": 8.057026257629124e-08, "loss": 0.3904, "step": 47305 }, { "epoch": 0.8222983191086235, "grad_norm": 1.5153119242959117, "learning_rate": 8.055494020489673e-08, "loss": 0.1828, "step": 47306 }, { "epoch": 0.8223157016461263, "grad_norm": 1.747018498391157, "learning_rate": 8.053961916295016e-08, "loss": 0.2333, "step": 47307 }, { "epoch": 0.8223330841836292, "grad_norm": 2.1367225260314164, "learning_rate": 8.052429945050043e-08, "loss": 0.2035, "step": 47308 }, { "epoch": 0.822350466721132, "grad_norm": 1.0041630975241274, "learning_rate": 8.050898106759602e-08, "loss": 0.1792, "step": 47309 }, { "epoch": 0.8223678492586348, "grad_norm": 1.2976711144332578, "learning_rate": 8.049366401428536e-08, "loss": 0.2337, "step": 47310 }, { "epoch": 0.8223852317961377, "grad_norm": 1.0677199275641178, "learning_rate": 8.047834829061701e-08, "loss": 0.2113, "step": 47311 }, { "epoch": 0.8224026143336404, "grad_norm": 1.3230847271612358, "learning_rate": 8.046303389663983e-08, "loss": 0.1854, "step": 47312 }, { "epoch": 0.8224199968711432, "grad_norm": 3.1431649957821155, "learning_rate": 8.044772083240197e-08, "loss": 0.3804, "step": 47313 }, { "epoch": 0.822437379408646, "grad_norm": 2.073216000772689, "learning_rate": 8.043240909795212e-08, "loss": 0.1831, "step": 47314 }, { "epoch": 0.8224547619461489, "grad_norm": 1.2139980616750687, "learning_rate": 8.041709869333868e-08, "loss": 0.1935, "step": 47315 }, { "epoch": 0.8224721444836517, "grad_norm": 1.3401471063650876, "learning_rate": 8.040178961861043e-08, "loss": 0.1472, "step": 47316 }, { "epoch": 0.8224895270211545, "grad_norm": 1.2429848491670026, "learning_rate": 8.038648187381575e-08, "loss": 0.224, "step": 47317 }, { "epoch": 0.8225069095586574, "grad_norm": 1.4063744108286564, "learning_rate": 8.037117545900318e-08, "loss": 0.2139, "step": 47318 }, { "epoch": 0.8225242920961602, "grad_norm": 1.9461011351667084, "learning_rate": 8.035587037422121e-08, "loss": 0.3137, "step": 47319 }, { "epoch": 0.822541674633663, "grad_norm": 1.4483880209193263, "learning_rate": 8.034056661951833e-08, "loss": 0.1992, "step": 47320 }, { "epoch": 0.8225590571711658, "grad_norm": 1.1593141592938714, "learning_rate": 8.032526419494312e-08, "loss": 0.2567, "step": 47321 }, { "epoch": 0.8225764397086687, "grad_norm": 1.1990023117830402, "learning_rate": 8.0309963100544e-08, "loss": 0.3178, "step": 47322 }, { "epoch": 0.8225938222461715, "grad_norm": 1.3514701012576655, "learning_rate": 8.029466333636942e-08, "loss": 0.2306, "step": 47323 }, { "epoch": 0.8226112047836743, "grad_norm": 1.1264168731283832, "learning_rate": 8.027936490246806e-08, "loss": 0.1925, "step": 47324 }, { "epoch": 0.8226285873211772, "grad_norm": 1.9431628224613746, "learning_rate": 8.026406779888844e-08, "loss": 0.3287, "step": 47325 }, { "epoch": 0.82264596985868, "grad_norm": 2.0693258381161193, "learning_rate": 8.02487720256787e-08, "loss": 0.208, "step": 47326 }, { "epoch": 0.8226633523961828, "grad_norm": 2.3306965416108674, "learning_rate": 8.023347758288763e-08, "loss": 0.2098, "step": 47327 }, { "epoch": 0.8226807349336857, "grad_norm": 1.6086494328022876, "learning_rate": 8.021818447056361e-08, "loss": 0.29, "step": 47328 }, { "epoch": 0.8226981174711885, "grad_norm": 1.7410633039304566, "learning_rate": 8.020289268875513e-08, "loss": 0.2578, "step": 47329 }, { "epoch": 0.8227155000086913, "grad_norm": 0.9654574795180477, "learning_rate": 8.018760223751065e-08, "loss": 0.141, "step": 47330 }, { "epoch": 0.8227328825461941, "grad_norm": 2.20537833581156, "learning_rate": 8.017231311687855e-08, "loss": 0.2115, "step": 47331 }, { "epoch": 0.8227502650836969, "grad_norm": 1.0087149282861527, "learning_rate": 8.015702532690744e-08, "loss": 0.1451, "step": 47332 }, { "epoch": 0.8227676476211997, "grad_norm": 1.4810392433296058, "learning_rate": 8.014173886764569e-08, "loss": 0.146, "step": 47333 }, { "epoch": 0.8227850301587025, "grad_norm": 1.7090386659219023, "learning_rate": 8.012645373914162e-08, "loss": 0.1871, "step": 47334 }, { "epoch": 0.8228024126962054, "grad_norm": 1.6728663522815017, "learning_rate": 8.011116994144395e-08, "loss": 0.3891, "step": 47335 }, { "epoch": 0.8228197952337082, "grad_norm": 1.864284147039049, "learning_rate": 8.0095887474601e-08, "loss": 0.2521, "step": 47336 }, { "epoch": 0.822837177771211, "grad_norm": 1.1586343814102118, "learning_rate": 8.008060633866115e-08, "loss": 0.2219, "step": 47337 }, { "epoch": 0.8228545603087138, "grad_norm": 1.1233770582227152, "learning_rate": 8.006532653367292e-08, "loss": 0.1599, "step": 47338 }, { "epoch": 0.8228719428462167, "grad_norm": 1.9958349830200526, "learning_rate": 8.005004805968469e-08, "loss": 0.329, "step": 47339 }, { "epoch": 0.8228893253837195, "grad_norm": 1.3352091042497853, "learning_rate": 8.003477091674494e-08, "loss": 0.2084, "step": 47340 }, { "epoch": 0.8229067079212223, "grad_norm": 1.8594764035675346, "learning_rate": 8.001949510490197e-08, "loss": 0.1866, "step": 47341 }, { "epoch": 0.8229240904587252, "grad_norm": 1.228753427037967, "learning_rate": 8.00042206242042e-08, "loss": 0.197, "step": 47342 }, { "epoch": 0.822941472996228, "grad_norm": 1.429517266274715, "learning_rate": 7.998894747470025e-08, "loss": 0.1574, "step": 47343 }, { "epoch": 0.8229588555337308, "grad_norm": 1.3624141120722018, "learning_rate": 7.997367565643853e-08, "loss": 0.1924, "step": 47344 }, { "epoch": 0.8229762380712337, "grad_norm": 0.764199499865669, "learning_rate": 7.995840516946717e-08, "loss": 0.1204, "step": 47345 }, { "epoch": 0.8229936206087365, "grad_norm": 1.149974422499575, "learning_rate": 7.994313601383462e-08, "loss": 0.2109, "step": 47346 }, { "epoch": 0.8230110031462393, "grad_norm": 1.4825051117180474, "learning_rate": 7.992786818958946e-08, "loss": 0.2169, "step": 47347 }, { "epoch": 0.8230283856837421, "grad_norm": 1.334167459198354, "learning_rate": 7.991260169677999e-08, "loss": 0.1807, "step": 47348 }, { "epoch": 0.823045768221245, "grad_norm": 1.158985076882789, "learning_rate": 7.989733653545461e-08, "loss": 0.2667, "step": 47349 }, { "epoch": 0.8230631507587478, "grad_norm": 2.49555601193477, "learning_rate": 7.988207270566172e-08, "loss": 0.4311, "step": 47350 }, { "epoch": 0.8230805332962506, "grad_norm": 4.718928860994252, "learning_rate": 7.986681020744968e-08, "loss": 0.2908, "step": 47351 }, { "epoch": 0.8230979158337534, "grad_norm": 1.1293861181051652, "learning_rate": 7.985154904086677e-08, "loss": 0.2647, "step": 47352 }, { "epoch": 0.8231152983712562, "grad_norm": 0.8464388109320148, "learning_rate": 7.983628920596153e-08, "loss": 0.1541, "step": 47353 }, { "epoch": 0.823132680908759, "grad_norm": 1.8793924940808657, "learning_rate": 7.982103070278212e-08, "loss": 0.2516, "step": 47354 }, { "epoch": 0.8231500634462618, "grad_norm": 1.6077764536821848, "learning_rate": 7.980577353137712e-08, "loss": 0.1628, "step": 47355 }, { "epoch": 0.8231674459837647, "grad_norm": 1.0446083359859677, "learning_rate": 7.979051769179495e-08, "loss": 0.2665, "step": 47356 }, { "epoch": 0.8231848285212675, "grad_norm": 1.6348361723196467, "learning_rate": 7.977526318408351e-08, "loss": 0.2908, "step": 47357 }, { "epoch": 0.8232022110587703, "grad_norm": 1.229518899952053, "learning_rate": 7.976001000829164e-08, "loss": 0.3377, "step": 47358 }, { "epoch": 0.8232195935962732, "grad_norm": 1.5620586860854238, "learning_rate": 7.974475816446746e-08, "loss": 0.1899, "step": 47359 }, { "epoch": 0.823236976133776, "grad_norm": 1.4869785877565032, "learning_rate": 7.972950765265935e-08, "loss": 0.1878, "step": 47360 }, { "epoch": 0.8232543586712788, "grad_norm": 2.0014263192860784, "learning_rate": 7.971425847291552e-08, "loss": 0.3317, "step": 47361 }, { "epoch": 0.8232717412087817, "grad_norm": 1.8049791201971706, "learning_rate": 7.969901062528455e-08, "loss": 0.3118, "step": 47362 }, { "epoch": 0.8232891237462845, "grad_norm": 1.3314983986021547, "learning_rate": 7.968376410981481e-08, "loss": 0.2947, "step": 47363 }, { "epoch": 0.8233065062837873, "grad_norm": 2.019244288125756, "learning_rate": 7.96685189265543e-08, "loss": 0.2501, "step": 47364 }, { "epoch": 0.8233238888212902, "grad_norm": 2.165310959986917, "learning_rate": 7.965327507555137e-08, "loss": 0.3355, "step": 47365 }, { "epoch": 0.823341271358793, "grad_norm": 1.2820434598406223, "learning_rate": 7.963803255685459e-08, "loss": 0.3805, "step": 47366 }, { "epoch": 0.8233586538962958, "grad_norm": 1.6027275522689473, "learning_rate": 7.962279137051215e-08, "loss": 0.378, "step": 47367 }, { "epoch": 0.8233760364337986, "grad_norm": 1.0730701551766397, "learning_rate": 7.960755151657234e-08, "loss": 0.2838, "step": 47368 }, { "epoch": 0.8233934189713015, "grad_norm": 1.0605065977017587, "learning_rate": 7.959231299508352e-08, "loss": 0.179, "step": 47369 }, { "epoch": 0.8234108015088043, "grad_norm": 1.480071453563394, "learning_rate": 7.95770758060939e-08, "loss": 0.2958, "step": 47370 }, { "epoch": 0.8234281840463071, "grad_norm": 1.751986290006854, "learning_rate": 7.956183994965188e-08, "loss": 0.2285, "step": 47371 }, { "epoch": 0.8234455665838099, "grad_norm": 1.0051572722232476, "learning_rate": 7.954660542580566e-08, "loss": 0.2226, "step": 47372 }, { "epoch": 0.8234629491213127, "grad_norm": 2.130316430918903, "learning_rate": 7.953137223460343e-08, "loss": 0.1513, "step": 47373 }, { "epoch": 0.8234803316588155, "grad_norm": 1.9922460157582196, "learning_rate": 7.951614037609377e-08, "loss": 0.3288, "step": 47374 }, { "epoch": 0.8234977141963183, "grad_norm": 1.1801977310185925, "learning_rate": 7.950090985032487e-08, "loss": 0.167, "step": 47375 }, { "epoch": 0.8235150967338212, "grad_norm": 0.8488899718641301, "learning_rate": 7.948568065734468e-08, "loss": 0.2966, "step": 47376 }, { "epoch": 0.823532479271324, "grad_norm": 0.8570250728799024, "learning_rate": 7.947045279720187e-08, "loss": 0.1102, "step": 47377 }, { "epoch": 0.8235498618088268, "grad_norm": 1.0519856064591377, "learning_rate": 7.945522626994455e-08, "loss": 0.3233, "step": 47378 }, { "epoch": 0.8235672443463297, "grad_norm": 1.283432834763018, "learning_rate": 7.944000107562094e-08, "loss": 0.1949, "step": 47379 }, { "epoch": 0.8235846268838325, "grad_norm": 0.9381529875852674, "learning_rate": 7.942477721427937e-08, "loss": 0.2541, "step": 47380 }, { "epoch": 0.8236020094213353, "grad_norm": 1.2809080127898853, "learning_rate": 7.940955468596789e-08, "loss": 0.351, "step": 47381 }, { "epoch": 0.8236193919588382, "grad_norm": 1.8027568023612597, "learning_rate": 7.939433349073521e-08, "loss": 0.1895, "step": 47382 }, { "epoch": 0.823636774496341, "grad_norm": 1.7892073074296084, "learning_rate": 7.937911362862914e-08, "loss": 0.2491, "step": 47383 }, { "epoch": 0.8236541570338438, "grad_norm": 1.5214866859039662, "learning_rate": 7.936389509969793e-08, "loss": 0.1727, "step": 47384 }, { "epoch": 0.8236715395713466, "grad_norm": 3.911310876760216, "learning_rate": 7.934867790399007e-08, "loss": 0.2531, "step": 47385 }, { "epoch": 0.8236889221088495, "grad_norm": 0.9872238720851446, "learning_rate": 7.933346204155372e-08, "loss": 0.2294, "step": 47386 }, { "epoch": 0.8237063046463523, "grad_norm": 1.9218761537707816, "learning_rate": 7.931824751243704e-08, "loss": 0.2389, "step": 47387 }, { "epoch": 0.8237236871838551, "grad_norm": 1.752796335751735, "learning_rate": 7.930303431668822e-08, "loss": 0.2246, "step": 47388 }, { "epoch": 0.823741069721358, "grad_norm": 1.7355356895934246, "learning_rate": 7.928782245435556e-08, "loss": 0.1544, "step": 47389 }, { "epoch": 0.8237584522588608, "grad_norm": 1.8419967849299397, "learning_rate": 7.927261192548723e-08, "loss": 0.2187, "step": 47390 }, { "epoch": 0.8237758347963636, "grad_norm": 1.427139041732445, "learning_rate": 7.925740273013149e-08, "loss": 0.23, "step": 47391 }, { "epoch": 0.8237932173338663, "grad_norm": 1.0809950145556568, "learning_rate": 7.924219486833639e-08, "loss": 0.2235, "step": 47392 }, { "epoch": 0.8238105998713692, "grad_norm": 1.8174176952407537, "learning_rate": 7.922698834015035e-08, "loss": 0.1909, "step": 47393 }, { "epoch": 0.823827982408872, "grad_norm": 1.4288961744379893, "learning_rate": 7.921178314562155e-08, "loss": 0.2125, "step": 47394 }, { "epoch": 0.8238453649463748, "grad_norm": 1.1479530942726335, "learning_rate": 7.919657928479806e-08, "loss": 0.2414, "step": 47395 }, { "epoch": 0.8238627474838777, "grad_norm": 1.4866504385738475, "learning_rate": 7.918137675772796e-08, "loss": 0.1522, "step": 47396 }, { "epoch": 0.8238801300213805, "grad_norm": 2.536200215529614, "learning_rate": 7.916617556445971e-08, "loss": 0.2542, "step": 47397 }, { "epoch": 0.8238975125588833, "grad_norm": 1.3542690702734046, "learning_rate": 7.915097570504131e-08, "loss": 0.1638, "step": 47398 }, { "epoch": 0.8239148950963862, "grad_norm": 2.0879631820946005, "learning_rate": 7.913577717952108e-08, "loss": 0.1576, "step": 47399 }, { "epoch": 0.823932277633889, "grad_norm": 1.537361678124506, "learning_rate": 7.912057998794697e-08, "loss": 0.2372, "step": 47400 }, { "epoch": 0.8239496601713918, "grad_norm": 0.947444058823916, "learning_rate": 7.910538413036749e-08, "loss": 0.2448, "step": 47401 }, { "epoch": 0.8239670427088946, "grad_norm": 1.0919315990358467, "learning_rate": 7.909018960683051e-08, "loss": 0.2126, "step": 47402 }, { "epoch": 0.8239844252463975, "grad_norm": 1.3963335898308884, "learning_rate": 7.907499641738424e-08, "loss": 0.1983, "step": 47403 }, { "epoch": 0.8240018077839003, "grad_norm": 0.9855000289275111, "learning_rate": 7.905980456207678e-08, "loss": 0.2572, "step": 47404 }, { "epoch": 0.8240191903214031, "grad_norm": 0.9452948751244485, "learning_rate": 7.904461404095647e-08, "loss": 0.1521, "step": 47405 }, { "epoch": 0.824036572858906, "grad_norm": 1.9781311127490036, "learning_rate": 7.902942485407144e-08, "loss": 0.2326, "step": 47406 }, { "epoch": 0.8240539553964088, "grad_norm": 1.2606422053424162, "learning_rate": 7.90142370014697e-08, "loss": 0.175, "step": 47407 }, { "epoch": 0.8240713379339116, "grad_norm": 1.8908521477051004, "learning_rate": 7.899905048319944e-08, "loss": 0.3555, "step": 47408 }, { "epoch": 0.8240887204714145, "grad_norm": 2.1923074173672967, "learning_rate": 7.898386529930878e-08, "loss": 0.2494, "step": 47409 }, { "epoch": 0.8241061030089173, "grad_norm": 2.2580584297657262, "learning_rate": 7.896868144984592e-08, "loss": 0.2521, "step": 47410 }, { "epoch": 0.8241234855464201, "grad_norm": 1.2248762540345692, "learning_rate": 7.895349893485887e-08, "loss": 0.1967, "step": 47411 }, { "epoch": 0.8241408680839228, "grad_norm": 1.4880805703033653, "learning_rate": 7.893831775439575e-08, "loss": 0.1657, "step": 47412 }, { "epoch": 0.8241582506214257, "grad_norm": 1.1354374466253307, "learning_rate": 7.892313790850497e-08, "loss": 0.1431, "step": 47413 }, { "epoch": 0.8241756331589285, "grad_norm": 1.2680609514639647, "learning_rate": 7.890795939723426e-08, "loss": 0.2253, "step": 47414 }, { "epoch": 0.8241930156964313, "grad_norm": 2.208616720527818, "learning_rate": 7.889278222063184e-08, "loss": 0.2495, "step": 47415 }, { "epoch": 0.8242103982339342, "grad_norm": 1.4490008309055733, "learning_rate": 7.887760637874591e-08, "loss": 0.2067, "step": 47416 }, { "epoch": 0.824227780771437, "grad_norm": 2.064416655056767, "learning_rate": 7.886243187162456e-08, "loss": 0.2031, "step": 47417 }, { "epoch": 0.8242451633089398, "grad_norm": 1.3560357835443246, "learning_rate": 7.88472586993158e-08, "loss": 0.1993, "step": 47418 }, { "epoch": 0.8242625458464427, "grad_norm": 1.691762822647737, "learning_rate": 7.883208686186782e-08, "loss": 0.2201, "step": 47419 }, { "epoch": 0.8242799283839455, "grad_norm": 1.7913381438156193, "learning_rate": 7.881691635932863e-08, "loss": 0.2852, "step": 47420 }, { "epoch": 0.8242973109214483, "grad_norm": 0.8601254169734257, "learning_rate": 7.880174719174637e-08, "loss": 0.1462, "step": 47421 }, { "epoch": 0.8243146934589511, "grad_norm": 0.9676247201616551, "learning_rate": 7.878657935916905e-08, "loss": 0.2681, "step": 47422 }, { "epoch": 0.824332075996454, "grad_norm": 1.4412247320797915, "learning_rate": 7.877141286164469e-08, "loss": 0.2149, "step": 47423 }, { "epoch": 0.8243494585339568, "grad_norm": 1.3480412167556015, "learning_rate": 7.875624769922157e-08, "loss": 0.155, "step": 47424 }, { "epoch": 0.8243668410714596, "grad_norm": 1.003111674391143, "learning_rate": 7.874108387194761e-08, "loss": 0.1609, "step": 47425 }, { "epoch": 0.8243842236089625, "grad_norm": 1.7871138132668147, "learning_rate": 7.872592137987105e-08, "loss": 0.2227, "step": 47426 }, { "epoch": 0.8244016061464653, "grad_norm": 1.6859851283863028, "learning_rate": 7.871076022303958e-08, "loss": 0.2679, "step": 47427 }, { "epoch": 0.8244189886839681, "grad_norm": 4.960227612744596, "learning_rate": 7.869560040150153e-08, "loss": 0.2417, "step": 47428 }, { "epoch": 0.824436371221471, "grad_norm": 1.7445715796257422, "learning_rate": 7.868044191530493e-08, "loss": 0.2017, "step": 47429 }, { "epoch": 0.8244537537589738, "grad_norm": 1.5660994844001463, "learning_rate": 7.866528476449785e-08, "loss": 0.1846, "step": 47430 }, { "epoch": 0.8244711362964766, "grad_norm": 1.686070705479029, "learning_rate": 7.865012894912809e-08, "loss": 0.1351, "step": 47431 }, { "epoch": 0.8244885188339793, "grad_norm": 1.17170241940682, "learning_rate": 7.86349744692441e-08, "loss": 0.1376, "step": 47432 }, { "epoch": 0.8245059013714822, "grad_norm": 0.985947730291343, "learning_rate": 7.861982132489359e-08, "loss": 0.3573, "step": 47433 }, { "epoch": 0.824523283908985, "grad_norm": 1.678350931426125, "learning_rate": 7.860466951612465e-08, "loss": 0.1457, "step": 47434 }, { "epoch": 0.8245406664464878, "grad_norm": 0.8137740087565941, "learning_rate": 7.858951904298527e-08, "loss": 0.1866, "step": 47435 }, { "epoch": 0.8245580489839907, "grad_norm": 1.1369041753883862, "learning_rate": 7.857436990552363e-08, "loss": 0.1778, "step": 47436 }, { "epoch": 0.8245754315214935, "grad_norm": 1.5357472877999294, "learning_rate": 7.855922210378763e-08, "loss": 0.1582, "step": 47437 }, { "epoch": 0.8245928140589963, "grad_norm": 3.6785986165278772, "learning_rate": 7.854407563782534e-08, "loss": 0.211, "step": 47438 }, { "epoch": 0.8246101965964991, "grad_norm": 1.8240570801613325, "learning_rate": 7.852893050768467e-08, "loss": 0.2032, "step": 47439 }, { "epoch": 0.824627579134002, "grad_norm": 2.2794873264189257, "learning_rate": 7.851378671341374e-08, "loss": 0.2386, "step": 47440 }, { "epoch": 0.8246449616715048, "grad_norm": 1.3733623571649798, "learning_rate": 7.849864425506048e-08, "loss": 0.1702, "step": 47441 }, { "epoch": 0.8246623442090076, "grad_norm": 1.2263821610785406, "learning_rate": 7.848350313267282e-08, "loss": 0.1737, "step": 47442 }, { "epoch": 0.8246797267465105, "grad_norm": 1.607761572282288, "learning_rate": 7.846836334629881e-08, "loss": 0.2314, "step": 47443 }, { "epoch": 0.8246971092840133, "grad_norm": 1.270542626472085, "learning_rate": 7.845322489598654e-08, "loss": 0.2205, "step": 47444 }, { "epoch": 0.8247144918215161, "grad_norm": 1.427040568626213, "learning_rate": 7.843808778178402e-08, "loss": 0.2327, "step": 47445 }, { "epoch": 0.824731874359019, "grad_norm": 1.3203016009766961, "learning_rate": 7.842295200373883e-08, "loss": 0.1918, "step": 47446 }, { "epoch": 0.8247492568965218, "grad_norm": 1.7926283338696747, "learning_rate": 7.84078175618994e-08, "loss": 0.4137, "step": 47447 }, { "epoch": 0.8247666394340246, "grad_norm": 1.312941448398297, "learning_rate": 7.839268445631353e-08, "loss": 0.2635, "step": 47448 }, { "epoch": 0.8247840219715274, "grad_norm": 0.8805108430076567, "learning_rate": 7.837755268702912e-08, "loss": 0.0988, "step": 47449 }, { "epoch": 0.8248014045090303, "grad_norm": 1.242015317318852, "learning_rate": 7.836242225409411e-08, "loss": 0.2886, "step": 47450 }, { "epoch": 0.824818787046533, "grad_norm": 1.6234491212332947, "learning_rate": 7.834729315755678e-08, "loss": 0.1485, "step": 47451 }, { "epoch": 0.8248361695840358, "grad_norm": 1.7211947524094107, "learning_rate": 7.833216539746474e-08, "loss": 0.2763, "step": 47452 }, { "epoch": 0.8248535521215387, "grad_norm": 2.081332915701607, "learning_rate": 7.831703897386598e-08, "loss": 0.2237, "step": 47453 }, { "epoch": 0.8248709346590415, "grad_norm": 1.2547714779863466, "learning_rate": 7.830191388680841e-08, "loss": 0.2144, "step": 47454 }, { "epoch": 0.8248883171965443, "grad_norm": 1.0427475157276807, "learning_rate": 7.828679013634021e-08, "loss": 0.1316, "step": 47455 }, { "epoch": 0.8249056997340471, "grad_norm": 0.7393169941837302, "learning_rate": 7.827166772250915e-08, "loss": 0.2668, "step": 47456 }, { "epoch": 0.82492308227155, "grad_norm": 1.1592385959481932, "learning_rate": 7.825654664536313e-08, "loss": 0.1927, "step": 47457 }, { "epoch": 0.8249404648090528, "grad_norm": 2.1105869095054413, "learning_rate": 7.824142690495016e-08, "loss": 0.2404, "step": 47458 }, { "epoch": 0.8249578473465556, "grad_norm": 0.9975288721465367, "learning_rate": 7.822630850131811e-08, "loss": 0.2834, "step": 47459 }, { "epoch": 0.8249752298840585, "grad_norm": 1.2544908044735017, "learning_rate": 7.821119143451499e-08, "loss": 0.2092, "step": 47460 }, { "epoch": 0.8249926124215613, "grad_norm": 9.4771432772039, "learning_rate": 7.819607570458858e-08, "loss": 0.3861, "step": 47461 }, { "epoch": 0.8250099949590641, "grad_norm": 1.518407785851913, "learning_rate": 7.818096131158669e-08, "loss": 0.1325, "step": 47462 }, { "epoch": 0.825027377496567, "grad_norm": 2.109132558971726, "learning_rate": 7.816584825555755e-08, "loss": 0.2421, "step": 47463 }, { "epoch": 0.8250447600340698, "grad_norm": 1.1197828738543112, "learning_rate": 7.815073653654903e-08, "loss": 0.2124, "step": 47464 }, { "epoch": 0.8250621425715726, "grad_norm": 0.9119536020771782, "learning_rate": 7.813562615460862e-08, "loss": 0.2145, "step": 47465 }, { "epoch": 0.8250795251090755, "grad_norm": 2.542657699451872, "learning_rate": 7.812051710978463e-08, "loss": 0.219, "step": 47466 }, { "epoch": 0.8250969076465783, "grad_norm": 1.461911733709455, "learning_rate": 7.810540940212479e-08, "loss": 0.2391, "step": 47467 }, { "epoch": 0.8251142901840811, "grad_norm": 1.4150386296335613, "learning_rate": 7.809030303167701e-08, "loss": 0.1969, "step": 47468 }, { "epoch": 0.8251316727215839, "grad_norm": 2.0782721051157624, "learning_rate": 7.807519799848916e-08, "loss": 0.23, "step": 47469 }, { "epoch": 0.8251490552590868, "grad_norm": 1.241742643539246, "learning_rate": 7.806009430260912e-08, "loss": 0.2465, "step": 47470 }, { "epoch": 0.8251664377965895, "grad_norm": 0.8584623647775022, "learning_rate": 7.804499194408475e-08, "loss": 0.1794, "step": 47471 }, { "epoch": 0.8251838203340923, "grad_norm": 1.058096006672184, "learning_rate": 7.802989092296391e-08, "loss": 0.1556, "step": 47472 }, { "epoch": 0.8252012028715952, "grad_norm": 1.688636517900526, "learning_rate": 7.801479123929439e-08, "loss": 0.2746, "step": 47473 }, { "epoch": 0.825218585409098, "grad_norm": 1.515986323867003, "learning_rate": 7.799969289312425e-08, "loss": 0.2128, "step": 47474 }, { "epoch": 0.8252359679466008, "grad_norm": 3.9698755764671887, "learning_rate": 7.798459588450123e-08, "loss": 0.4181, "step": 47475 }, { "epoch": 0.8252533504841036, "grad_norm": 2.3216836089081037, "learning_rate": 7.796950021347326e-08, "loss": 0.1594, "step": 47476 }, { "epoch": 0.8252707330216065, "grad_norm": 0.7957094389237132, "learning_rate": 7.795440588008794e-08, "loss": 0.236, "step": 47477 }, { "epoch": 0.8252881155591093, "grad_norm": 2.2372591172203413, "learning_rate": 7.793931288439337e-08, "loss": 0.1648, "step": 47478 }, { "epoch": 0.8253054980966121, "grad_norm": 1.815243977875023, "learning_rate": 7.792422122643728e-08, "loss": 0.2657, "step": 47479 }, { "epoch": 0.825322880634115, "grad_norm": 1.2659473261818974, "learning_rate": 7.790913090626754e-08, "loss": 0.1553, "step": 47480 }, { "epoch": 0.8253402631716178, "grad_norm": 1.1390011990123778, "learning_rate": 7.789404192393184e-08, "loss": 0.3295, "step": 47481 }, { "epoch": 0.8253576457091206, "grad_norm": 2.0490675424545177, "learning_rate": 7.787895427947827e-08, "loss": 0.1367, "step": 47482 }, { "epoch": 0.8253750282466235, "grad_norm": 1.8771103162043714, "learning_rate": 7.786386797295458e-08, "loss": 0.2119, "step": 47483 }, { "epoch": 0.8253924107841263, "grad_norm": 1.1085428016872783, "learning_rate": 7.784878300440844e-08, "loss": 0.132, "step": 47484 }, { "epoch": 0.8254097933216291, "grad_norm": 1.1497421973438902, "learning_rate": 7.783369937388762e-08, "loss": 0.1576, "step": 47485 }, { "epoch": 0.825427175859132, "grad_norm": 1.0599304492672954, "learning_rate": 7.78186170814401e-08, "loss": 0.2376, "step": 47486 }, { "epoch": 0.8254445583966348, "grad_norm": 1.6048872814939494, "learning_rate": 7.780353612711372e-08, "loss": 0.2822, "step": 47487 }, { "epoch": 0.8254619409341376, "grad_norm": 1.7237145663549855, "learning_rate": 7.778845651095612e-08, "loss": 0.2189, "step": 47488 }, { "epoch": 0.8254793234716404, "grad_norm": 1.0232894408040973, "learning_rate": 7.777337823301522e-08, "loss": 0.1593, "step": 47489 }, { "epoch": 0.8254967060091433, "grad_norm": 1.2697044888059619, "learning_rate": 7.775830129333871e-08, "loss": 0.3927, "step": 47490 }, { "epoch": 0.825514088546646, "grad_norm": 2.1486603482519677, "learning_rate": 7.774322569197444e-08, "loss": 0.1993, "step": 47491 }, { "epoch": 0.8255314710841488, "grad_norm": 1.6637126775699898, "learning_rate": 7.772815142897016e-08, "loss": 0.2612, "step": 47492 }, { "epoch": 0.8255488536216516, "grad_norm": 1.449656453870287, "learning_rate": 7.771307850437358e-08, "loss": 0.1062, "step": 47493 }, { "epoch": 0.8255662361591545, "grad_norm": 1.5567229282491815, "learning_rate": 7.769800691823264e-08, "loss": 0.2674, "step": 47494 }, { "epoch": 0.8255836186966573, "grad_norm": 1.1913942028034883, "learning_rate": 7.768293667059516e-08, "loss": 0.2239, "step": 47495 }, { "epoch": 0.8256010012341601, "grad_norm": 1.8321610573520595, "learning_rate": 7.766786776150857e-08, "loss": 0.2448, "step": 47496 }, { "epoch": 0.825618383771663, "grad_norm": 1.6287444657835777, "learning_rate": 7.765280019102094e-08, "loss": 0.1761, "step": 47497 }, { "epoch": 0.8256357663091658, "grad_norm": 1.097992413983518, "learning_rate": 7.763773395917988e-08, "loss": 0.1485, "step": 47498 }, { "epoch": 0.8256531488466686, "grad_norm": 1.9165910153132908, "learning_rate": 7.76226690660332e-08, "loss": 0.3605, "step": 47499 }, { "epoch": 0.8256705313841715, "grad_norm": 1.573764333497509, "learning_rate": 7.760760551162865e-08, "loss": 0.4995, "step": 47500 }, { "epoch": 0.8256879139216743, "grad_norm": 1.282843854052658, "learning_rate": 7.759254329601383e-08, "loss": 0.1598, "step": 47501 }, { "epoch": 0.8257052964591771, "grad_norm": 1.3331340185897824, "learning_rate": 7.75774824192369e-08, "loss": 0.1927, "step": 47502 }, { "epoch": 0.82572267899668, "grad_norm": 1.5796272108181537, "learning_rate": 7.756242288134513e-08, "loss": 0.175, "step": 47503 }, { "epoch": 0.8257400615341828, "grad_norm": 1.458933215840652, "learning_rate": 7.754736468238632e-08, "loss": 0.1623, "step": 47504 }, { "epoch": 0.8257574440716856, "grad_norm": 1.4361284895764115, "learning_rate": 7.753230782240844e-08, "loss": 0.3229, "step": 47505 }, { "epoch": 0.8257748266091884, "grad_norm": 1.4071869211394326, "learning_rate": 7.751725230145906e-08, "loss": 0.3253, "step": 47506 }, { "epoch": 0.8257922091466913, "grad_norm": 1.2895403319613743, "learning_rate": 7.750219811958592e-08, "loss": 0.2526, "step": 47507 }, { "epoch": 0.8258095916841941, "grad_norm": 1.4053156036684096, "learning_rate": 7.748714527683675e-08, "loss": 0.1285, "step": 47508 }, { "epoch": 0.8258269742216969, "grad_norm": 2.0259962023456954, "learning_rate": 7.74720937732592e-08, "loss": 0.1382, "step": 47509 }, { "epoch": 0.8258443567591998, "grad_norm": 2.496962084972085, "learning_rate": 7.745704360890104e-08, "loss": 0.2271, "step": 47510 }, { "epoch": 0.8258617392967025, "grad_norm": 1.8922989975059623, "learning_rate": 7.744199478380998e-08, "loss": 0.2013, "step": 47511 }, { "epoch": 0.8258791218342053, "grad_norm": 1.5323320540036736, "learning_rate": 7.742694729803356e-08, "loss": 0.1291, "step": 47512 }, { "epoch": 0.8258965043717081, "grad_norm": 1.5312662711634293, "learning_rate": 7.741190115161972e-08, "loss": 0.2458, "step": 47513 }, { "epoch": 0.825913886909211, "grad_norm": 1.3839939812730255, "learning_rate": 7.739685634461612e-08, "loss": 0.253, "step": 47514 }, { "epoch": 0.8259312694467138, "grad_norm": 2.0754307431216956, "learning_rate": 7.738181287707024e-08, "loss": 0.2057, "step": 47515 }, { "epoch": 0.8259486519842166, "grad_norm": 1.2331510588535557, "learning_rate": 7.736677074902981e-08, "loss": 0.2054, "step": 47516 }, { "epoch": 0.8259660345217195, "grad_norm": 1.3154076612355152, "learning_rate": 7.735172996054263e-08, "loss": 0.1819, "step": 47517 }, { "epoch": 0.8259834170592223, "grad_norm": 1.3426279247252635, "learning_rate": 7.733669051165637e-08, "loss": 0.1825, "step": 47518 }, { "epoch": 0.8260007995967251, "grad_norm": 1.2854673331235318, "learning_rate": 7.732165240241861e-08, "loss": 0.1829, "step": 47519 }, { "epoch": 0.826018182134228, "grad_norm": 1.7474061558888667, "learning_rate": 7.730661563287693e-08, "loss": 0.2407, "step": 47520 }, { "epoch": 0.8260355646717308, "grad_norm": 1.7050657914604703, "learning_rate": 7.72915802030794e-08, "loss": 0.3284, "step": 47521 }, { "epoch": 0.8260529472092336, "grad_norm": 1.5536495064373277, "learning_rate": 7.727654611307321e-08, "loss": 0.1218, "step": 47522 }, { "epoch": 0.8260703297467364, "grad_norm": 1.3721674530847676, "learning_rate": 7.726151336290621e-08, "loss": 0.176, "step": 47523 }, { "epoch": 0.8260877122842393, "grad_norm": 1.4314719159155231, "learning_rate": 7.72464819526259e-08, "loss": 0.112, "step": 47524 }, { "epoch": 0.8261050948217421, "grad_norm": 1.6483632548001237, "learning_rate": 7.723145188228019e-08, "loss": 0.2284, "step": 47525 }, { "epoch": 0.8261224773592449, "grad_norm": 1.3945739355939, "learning_rate": 7.72164231519165e-08, "loss": 0.1821, "step": 47526 }, { "epoch": 0.8261398598967478, "grad_norm": 1.3704508746835016, "learning_rate": 7.720139576158263e-08, "loss": 0.1347, "step": 47527 }, { "epoch": 0.8261572424342506, "grad_norm": 4.105646017157311, "learning_rate": 7.718636971132603e-08, "loss": 0.3497, "step": 47528 }, { "epoch": 0.8261746249717534, "grad_norm": 0.8257362808908213, "learning_rate": 7.717134500119449e-08, "loss": 0.2145, "step": 47529 }, { "epoch": 0.8261920075092563, "grad_norm": 3.903339357290112, "learning_rate": 7.715632163123548e-08, "loss": 0.257, "step": 47530 }, { "epoch": 0.826209390046759, "grad_norm": 1.6475851495649945, "learning_rate": 7.714129960149673e-08, "loss": 0.145, "step": 47531 }, { "epoch": 0.8262267725842618, "grad_norm": 3.064340196398611, "learning_rate": 7.712627891202572e-08, "loss": 0.2971, "step": 47532 }, { "epoch": 0.8262441551217646, "grad_norm": 1.5910022423735437, "learning_rate": 7.711125956287034e-08, "loss": 0.241, "step": 47533 }, { "epoch": 0.8262615376592675, "grad_norm": 1.8828968257078131, "learning_rate": 7.709624155407795e-08, "loss": 0.2743, "step": 47534 }, { "epoch": 0.8262789201967703, "grad_norm": 1.588907586990981, "learning_rate": 7.708122488569608e-08, "loss": 0.2191, "step": 47535 }, { "epoch": 0.8262963027342731, "grad_norm": 2.9269713476589057, "learning_rate": 7.706620955777255e-08, "loss": 0.206, "step": 47536 }, { "epoch": 0.826313685271776, "grad_norm": 1.385018516295436, "learning_rate": 7.705119557035484e-08, "loss": 0.3115, "step": 47537 }, { "epoch": 0.8263310678092788, "grad_norm": 1.499693289746252, "learning_rate": 7.703618292349056e-08, "loss": 0.1563, "step": 47538 }, { "epoch": 0.8263484503467816, "grad_norm": 3.77459250767366, "learning_rate": 7.702117161722715e-08, "loss": 0.2922, "step": 47539 }, { "epoch": 0.8263658328842844, "grad_norm": 3.4281860462313176, "learning_rate": 7.700616165161261e-08, "loss": 0.203, "step": 47540 }, { "epoch": 0.8263832154217873, "grad_norm": 2.2045945051969422, "learning_rate": 7.699115302669413e-08, "loss": 0.1667, "step": 47541 }, { "epoch": 0.8264005979592901, "grad_norm": 1.6235904281516182, "learning_rate": 7.697614574251932e-08, "loss": 0.1834, "step": 47542 }, { "epoch": 0.8264179804967929, "grad_norm": 1.5149446958947193, "learning_rate": 7.696113979913571e-08, "loss": 0.1461, "step": 47543 }, { "epoch": 0.8264353630342958, "grad_norm": 1.601743262085201, "learning_rate": 7.694613519659105e-08, "loss": 0.2573, "step": 47544 }, { "epoch": 0.8264527455717986, "grad_norm": 0.8333575347421678, "learning_rate": 7.693113193493284e-08, "loss": 0.1226, "step": 47545 }, { "epoch": 0.8264701281093014, "grad_norm": 1.2539377726727687, "learning_rate": 7.69161300142086e-08, "loss": 0.2002, "step": 47546 }, { "epoch": 0.8264875106468043, "grad_norm": 0.9115346626534144, "learning_rate": 7.690112943446586e-08, "loss": 0.1599, "step": 47547 }, { "epoch": 0.8265048931843071, "grad_norm": 1.5760721116720082, "learning_rate": 7.688613019575219e-08, "loss": 0.2062, "step": 47548 }, { "epoch": 0.8265222757218099, "grad_norm": 2.0831113929563383, "learning_rate": 7.687113229811515e-08, "loss": 0.2753, "step": 47549 }, { "epoch": 0.8265396582593127, "grad_norm": 1.3208405007152926, "learning_rate": 7.685613574160221e-08, "loss": 0.311, "step": 47550 }, { "epoch": 0.8265570407968155, "grad_norm": 1.0398404078066308, "learning_rate": 7.684114052626084e-08, "loss": 0.1256, "step": 47551 }, { "epoch": 0.8265744233343183, "grad_norm": 1.2608405192154142, "learning_rate": 7.682614665213894e-08, "loss": 0.607, "step": 47552 }, { "epoch": 0.8265918058718211, "grad_norm": 4.0146329814890205, "learning_rate": 7.681115411928363e-08, "loss": 0.2535, "step": 47553 }, { "epoch": 0.826609188409324, "grad_norm": 1.619606015986718, "learning_rate": 7.679616292774244e-08, "loss": 0.2886, "step": 47554 }, { "epoch": 0.8266265709468268, "grad_norm": 1.642943478153507, "learning_rate": 7.67811730775631e-08, "loss": 0.1168, "step": 47555 }, { "epoch": 0.8266439534843296, "grad_norm": 2.2062651961597104, "learning_rate": 7.676618456879309e-08, "loss": 0.2325, "step": 47556 }, { "epoch": 0.8266613360218324, "grad_norm": 0.9562157984660734, "learning_rate": 7.675119740147978e-08, "loss": 0.2038, "step": 47557 }, { "epoch": 0.8266787185593353, "grad_norm": 0.7797464497741344, "learning_rate": 7.673621157567085e-08, "loss": 0.2179, "step": 47558 }, { "epoch": 0.8266961010968381, "grad_norm": 1.259353055968631, "learning_rate": 7.67212270914136e-08, "loss": 0.1692, "step": 47559 }, { "epoch": 0.8267134836343409, "grad_norm": 2.2814099063703694, "learning_rate": 7.67062439487557e-08, "loss": 0.2516, "step": 47560 }, { "epoch": 0.8267308661718438, "grad_norm": 1.2961480668244736, "learning_rate": 7.669126214774452e-08, "loss": 0.1732, "step": 47561 }, { "epoch": 0.8267482487093466, "grad_norm": 1.1537662153097499, "learning_rate": 7.667628168842744e-08, "loss": 0.228, "step": 47562 }, { "epoch": 0.8267656312468494, "grad_norm": 1.8946460686269746, "learning_rate": 7.666130257085224e-08, "loss": 0.3266, "step": 47563 }, { "epoch": 0.8267830137843523, "grad_norm": 1.2989758172360117, "learning_rate": 7.664632479506621e-08, "loss": 0.1506, "step": 47564 }, { "epoch": 0.8268003963218551, "grad_norm": 1.2786532723449027, "learning_rate": 7.663134836111706e-08, "loss": 0.1956, "step": 47565 }, { "epoch": 0.8268177788593579, "grad_norm": 1.8047568477000757, "learning_rate": 7.661637326905174e-08, "loss": 0.1921, "step": 47566 }, { "epoch": 0.8268351613968608, "grad_norm": 1.338050601247016, "learning_rate": 7.66013995189182e-08, "loss": 0.3734, "step": 47567 }, { "epoch": 0.8268525439343636, "grad_norm": 1.1844517756537247, "learning_rate": 7.658642711076369e-08, "loss": 0.1941, "step": 47568 }, { "epoch": 0.8268699264718664, "grad_norm": 1.6601390985266682, "learning_rate": 7.657145604463572e-08, "loss": 0.1995, "step": 47569 }, { "epoch": 0.8268873090093692, "grad_norm": 1.1323954378332586, "learning_rate": 7.655648632058165e-08, "loss": 0.161, "step": 47570 }, { "epoch": 0.826904691546872, "grad_norm": 1.1160199573827532, "learning_rate": 7.654151793864916e-08, "loss": 0.2331, "step": 47571 }, { "epoch": 0.8269220740843748, "grad_norm": 3.9834323137729397, "learning_rate": 7.652655089888549e-08, "loss": 0.2248, "step": 47572 }, { "epoch": 0.8269394566218776, "grad_norm": 1.6284153488149533, "learning_rate": 7.65115852013381e-08, "loss": 0.2382, "step": 47573 }, { "epoch": 0.8269568391593805, "grad_norm": 2.4738804824872735, "learning_rate": 7.649662084605429e-08, "loss": 0.5388, "step": 47574 }, { "epoch": 0.8269742216968833, "grad_norm": 0.9655735523097295, "learning_rate": 7.648165783308186e-08, "loss": 0.1559, "step": 47575 }, { "epoch": 0.8269916042343861, "grad_norm": 1.5705429691003485, "learning_rate": 7.646669616246792e-08, "loss": 0.2304, "step": 47576 }, { "epoch": 0.8270089867718889, "grad_norm": 1.2416133024979275, "learning_rate": 7.645173583426007e-08, "loss": 0.1706, "step": 47577 }, { "epoch": 0.8270263693093918, "grad_norm": 1.0007715353054432, "learning_rate": 7.643677684850564e-08, "loss": 0.1431, "step": 47578 }, { "epoch": 0.8270437518468946, "grad_norm": 1.1924344224600072, "learning_rate": 7.64218192052521e-08, "loss": 0.2716, "step": 47579 }, { "epoch": 0.8270611343843974, "grad_norm": 3.5668890870222327, "learning_rate": 7.640686290454673e-08, "loss": 0.2401, "step": 47580 }, { "epoch": 0.8270785169219003, "grad_norm": 1.8918259643957553, "learning_rate": 7.639190794643707e-08, "loss": 0.1906, "step": 47581 }, { "epoch": 0.8270958994594031, "grad_norm": 4.078264450346812, "learning_rate": 7.637695433097036e-08, "loss": 0.1729, "step": 47582 }, { "epoch": 0.8271132819969059, "grad_norm": 1.4130967462012183, "learning_rate": 7.636200205819421e-08, "loss": 0.2151, "step": 47583 }, { "epoch": 0.8271306645344088, "grad_norm": 1.295816977880814, "learning_rate": 7.634705112815603e-08, "loss": 0.1461, "step": 47584 }, { "epoch": 0.8271480470719116, "grad_norm": 1.2901665050699782, "learning_rate": 7.633210154090286e-08, "loss": 0.2742, "step": 47585 }, { "epoch": 0.8271654296094144, "grad_norm": 0.9399212779979896, "learning_rate": 7.631715329648242e-08, "loss": 0.2598, "step": 47586 }, { "epoch": 0.8271828121469172, "grad_norm": 1.028787311021349, "learning_rate": 7.6302206394942e-08, "loss": 0.1426, "step": 47587 }, { "epoch": 0.8272001946844201, "grad_norm": 0.7740602177253276, "learning_rate": 7.628726083632892e-08, "loss": 0.146, "step": 47588 }, { "epoch": 0.8272175772219229, "grad_norm": 1.3563618291495338, "learning_rate": 7.627231662069061e-08, "loss": 0.163, "step": 47589 }, { "epoch": 0.8272349597594257, "grad_norm": 1.6923034280357139, "learning_rate": 7.625737374807439e-08, "loss": 0.2987, "step": 47590 }, { "epoch": 0.8272523422969285, "grad_norm": 2.758592860973432, "learning_rate": 7.624243221852767e-08, "loss": 0.1441, "step": 47591 }, { "epoch": 0.8272697248344313, "grad_norm": 1.8761549051857398, "learning_rate": 7.622749203209777e-08, "loss": 0.2284, "step": 47592 }, { "epoch": 0.8272871073719341, "grad_norm": 1.2033812767656344, "learning_rate": 7.621255318883197e-08, "loss": 0.1681, "step": 47593 }, { "epoch": 0.827304489909437, "grad_norm": 0.9912837551710865, "learning_rate": 7.619761568877776e-08, "loss": 0.1765, "step": 47594 }, { "epoch": 0.8273218724469398, "grad_norm": 2.2041994712202246, "learning_rate": 7.618267953198249e-08, "loss": 0.3409, "step": 47595 }, { "epoch": 0.8273392549844426, "grad_norm": 2.063281794841114, "learning_rate": 7.616774471849352e-08, "loss": 0.2462, "step": 47596 }, { "epoch": 0.8273566375219454, "grad_norm": 1.067289023658521, "learning_rate": 7.615281124835788e-08, "loss": 0.1484, "step": 47597 }, { "epoch": 0.8273740200594483, "grad_norm": 1.280647042675127, "learning_rate": 7.613787912162323e-08, "loss": 0.2085, "step": 47598 }, { "epoch": 0.8273914025969511, "grad_norm": 0.8450089242051863, "learning_rate": 7.612294833833682e-08, "loss": 0.1127, "step": 47599 }, { "epoch": 0.8274087851344539, "grad_norm": 1.6615543451178423, "learning_rate": 7.610801889854595e-08, "loss": 0.3204, "step": 47600 }, { "epoch": 0.8274261676719568, "grad_norm": 1.8893568626239943, "learning_rate": 7.609309080229782e-08, "loss": 0.1747, "step": 47601 }, { "epoch": 0.8274435502094596, "grad_norm": 1.6120486928664155, "learning_rate": 7.607816404963996e-08, "loss": 0.2189, "step": 47602 }, { "epoch": 0.8274609327469624, "grad_norm": 1.4424319212325662, "learning_rate": 7.606323864061975e-08, "loss": 0.2094, "step": 47603 }, { "epoch": 0.8274783152844652, "grad_norm": 1.4841457562538405, "learning_rate": 7.604831457528415e-08, "loss": 0.1806, "step": 47604 }, { "epoch": 0.8274956978219681, "grad_norm": 1.4293411652975916, "learning_rate": 7.603339185368057e-08, "loss": 0.2249, "step": 47605 }, { "epoch": 0.8275130803594709, "grad_norm": 1.324640562132827, "learning_rate": 7.601847047585652e-08, "loss": 0.1283, "step": 47606 }, { "epoch": 0.8275304628969737, "grad_norm": 1.458543150325937, "learning_rate": 7.600355044185914e-08, "loss": 0.2661, "step": 47607 }, { "epoch": 0.8275478454344766, "grad_norm": 1.6141673660433127, "learning_rate": 7.59886317517357e-08, "loss": 0.2336, "step": 47608 }, { "epoch": 0.8275652279719794, "grad_norm": 1.9218259969473037, "learning_rate": 7.597371440553358e-08, "loss": 0.325, "step": 47609 }, { "epoch": 0.8275826105094821, "grad_norm": 1.986632062009221, "learning_rate": 7.59587984033e-08, "loss": 0.199, "step": 47610 }, { "epoch": 0.827599993046985, "grad_norm": 1.4961515283365896, "learning_rate": 7.59438837450822e-08, "loss": 0.3209, "step": 47611 }, { "epoch": 0.8276173755844878, "grad_norm": 2.560453220385189, "learning_rate": 7.59289704309275e-08, "loss": 0.2089, "step": 47612 }, { "epoch": 0.8276347581219906, "grad_norm": 0.6287956804390565, "learning_rate": 7.591405846088305e-08, "loss": 0.1529, "step": 47613 }, { "epoch": 0.8276521406594934, "grad_norm": 1.6587866151605843, "learning_rate": 7.589914783499635e-08, "loss": 0.2254, "step": 47614 }, { "epoch": 0.8276695231969963, "grad_norm": 1.395402208868462, "learning_rate": 7.588423855331466e-08, "loss": 0.1603, "step": 47615 }, { "epoch": 0.8276869057344991, "grad_norm": 1.292576225302386, "learning_rate": 7.586933061588485e-08, "loss": 0.1914, "step": 47616 }, { "epoch": 0.8277042882720019, "grad_norm": 1.205759064131912, "learning_rate": 7.585442402275455e-08, "loss": 0.2341, "step": 47617 }, { "epoch": 0.8277216708095048, "grad_norm": 1.9475304082479739, "learning_rate": 7.583951877397094e-08, "loss": 0.2277, "step": 47618 }, { "epoch": 0.8277390533470076, "grad_norm": 2.2931143552347635, "learning_rate": 7.58246148695812e-08, "loss": 0.304, "step": 47619 }, { "epoch": 0.8277564358845104, "grad_norm": 1.2100728035679322, "learning_rate": 7.580971230963251e-08, "loss": 0.2001, "step": 47620 }, { "epoch": 0.8277738184220133, "grad_norm": 1.2506177178190832, "learning_rate": 7.579481109417213e-08, "loss": 0.1685, "step": 47621 }, { "epoch": 0.8277912009595161, "grad_norm": 1.1706677293642551, "learning_rate": 7.577991122324757e-08, "loss": 0.1908, "step": 47622 }, { "epoch": 0.8278085834970189, "grad_norm": 1.3616299583310518, "learning_rate": 7.576501269690566e-08, "loss": 0.3071, "step": 47623 }, { "epoch": 0.8278259660345217, "grad_norm": 1.1499967317851416, "learning_rate": 7.575011551519368e-08, "loss": 0.2144, "step": 47624 }, { "epoch": 0.8278433485720246, "grad_norm": 2.250418116472189, "learning_rate": 7.573521967815905e-08, "loss": 0.2261, "step": 47625 }, { "epoch": 0.8278607311095274, "grad_norm": 1.3466686592483703, "learning_rate": 7.572032518584892e-08, "loss": 0.1807, "step": 47626 }, { "epoch": 0.8278781136470302, "grad_norm": 1.283483005247315, "learning_rate": 7.570543203831042e-08, "loss": 0.1699, "step": 47627 }, { "epoch": 0.8278954961845331, "grad_norm": 1.710827871671127, "learning_rate": 7.569054023559084e-08, "loss": 0.1408, "step": 47628 }, { "epoch": 0.8279128787220359, "grad_norm": 1.8055610891420237, "learning_rate": 7.567564977773733e-08, "loss": 0.2421, "step": 47629 }, { "epoch": 0.8279302612595386, "grad_norm": 9.794397596188245, "learning_rate": 7.566076066479705e-08, "loss": 0.3675, "step": 47630 }, { "epoch": 0.8279476437970414, "grad_norm": 1.6098438280734995, "learning_rate": 7.564587289681728e-08, "loss": 0.2229, "step": 47631 }, { "epoch": 0.8279650263345443, "grad_norm": 1.4255064745097448, "learning_rate": 7.563098647384503e-08, "loss": 0.1835, "step": 47632 }, { "epoch": 0.8279824088720471, "grad_norm": 1.6572450653445956, "learning_rate": 7.561610139592773e-08, "loss": 0.1555, "step": 47633 }, { "epoch": 0.8279997914095499, "grad_norm": 1.5455695299497518, "learning_rate": 7.560121766311251e-08, "loss": 0.3349, "step": 47634 }, { "epoch": 0.8280171739470528, "grad_norm": 1.843221635574737, "learning_rate": 7.558633527544644e-08, "loss": 0.3019, "step": 47635 }, { "epoch": 0.8280345564845556, "grad_norm": 0.7862169586415471, "learning_rate": 7.557145423297656e-08, "loss": 0.2003, "step": 47636 }, { "epoch": 0.8280519390220584, "grad_norm": 1.2900315430522713, "learning_rate": 7.555657453575032e-08, "loss": 0.2256, "step": 47637 }, { "epoch": 0.8280693215595613, "grad_norm": 1.2060934080086176, "learning_rate": 7.554169618381479e-08, "loss": 0.2509, "step": 47638 }, { "epoch": 0.8280867040970641, "grad_norm": 1.6885579719727575, "learning_rate": 7.55268191772171e-08, "loss": 0.182, "step": 47639 }, { "epoch": 0.8281040866345669, "grad_norm": 1.5992095705883678, "learning_rate": 7.551194351600427e-08, "loss": 0.2138, "step": 47640 }, { "epoch": 0.8281214691720697, "grad_norm": 1.616410490634324, "learning_rate": 7.549706920022386e-08, "loss": 0.2832, "step": 47641 }, { "epoch": 0.8281388517095726, "grad_norm": 1.3799902337876464, "learning_rate": 7.548219622992258e-08, "loss": 0.1191, "step": 47642 }, { "epoch": 0.8281562342470754, "grad_norm": 2.0420002069076735, "learning_rate": 7.54673246051476e-08, "loss": 0.3225, "step": 47643 }, { "epoch": 0.8281736167845782, "grad_norm": 1.4799639397416098, "learning_rate": 7.545245432594632e-08, "loss": 0.2215, "step": 47644 }, { "epoch": 0.8281909993220811, "grad_norm": 1.5698436229957107, "learning_rate": 7.543758539236577e-08, "loss": 0.2629, "step": 47645 }, { "epoch": 0.8282083818595839, "grad_norm": 1.5002969754036375, "learning_rate": 7.542271780445303e-08, "loss": 0.5328, "step": 47646 }, { "epoch": 0.8282257643970867, "grad_norm": 0.9121906498878283, "learning_rate": 7.540785156225527e-08, "loss": 0.3077, "step": 47647 }, { "epoch": 0.8282431469345896, "grad_norm": 1.8373711672437076, "learning_rate": 7.539298666581951e-08, "loss": 0.2894, "step": 47648 }, { "epoch": 0.8282605294720924, "grad_norm": 2.2339573142421463, "learning_rate": 7.537812311519298e-08, "loss": 0.1874, "step": 47649 }, { "epoch": 0.8282779120095951, "grad_norm": 1.1118969704709445, "learning_rate": 7.536326091042272e-08, "loss": 0.0906, "step": 47650 }, { "epoch": 0.8282952945470979, "grad_norm": 1.884753091213042, "learning_rate": 7.534840005155573e-08, "loss": 0.2399, "step": 47651 }, { "epoch": 0.8283126770846008, "grad_norm": 0.9548905186238216, "learning_rate": 7.533354053863944e-08, "loss": 0.1981, "step": 47652 }, { "epoch": 0.8283300596221036, "grad_norm": 1.9825257220475347, "learning_rate": 7.531868237172079e-08, "loss": 0.219, "step": 47653 }, { "epoch": 0.8283474421596064, "grad_norm": 3.728280586008841, "learning_rate": 7.530382555084674e-08, "loss": 0.2302, "step": 47654 }, { "epoch": 0.8283648246971093, "grad_norm": 0.9167139532919845, "learning_rate": 7.528897007606438e-08, "loss": 0.201, "step": 47655 }, { "epoch": 0.8283822072346121, "grad_norm": 1.7036805499036052, "learning_rate": 7.527411594742095e-08, "loss": 0.3138, "step": 47656 }, { "epoch": 0.8283995897721149, "grad_norm": 1.4081015731079698, "learning_rate": 7.52592631649635e-08, "loss": 0.1375, "step": 47657 }, { "epoch": 0.8284169723096177, "grad_norm": 1.9697155074618626, "learning_rate": 7.524441172873908e-08, "loss": 0.2477, "step": 47658 }, { "epoch": 0.8284343548471206, "grad_norm": 1.3517268164345293, "learning_rate": 7.522956163879468e-08, "loss": 0.1737, "step": 47659 }, { "epoch": 0.8284517373846234, "grad_norm": 1.4636765707200077, "learning_rate": 7.521471289517761e-08, "loss": 0.1939, "step": 47660 }, { "epoch": 0.8284691199221262, "grad_norm": 1.470162751736489, "learning_rate": 7.519986549793467e-08, "loss": 0.2408, "step": 47661 }, { "epoch": 0.8284865024596291, "grad_norm": 1.6446532146329347, "learning_rate": 7.518501944711304e-08, "loss": 0.194, "step": 47662 }, { "epoch": 0.8285038849971319, "grad_norm": 1.0880054954460903, "learning_rate": 7.517017474275966e-08, "loss": 0.1768, "step": 47663 }, { "epoch": 0.8285212675346347, "grad_norm": 1.5071269987268314, "learning_rate": 7.515533138492174e-08, "loss": 0.1548, "step": 47664 }, { "epoch": 0.8285386500721376, "grad_norm": 1.0785515776161465, "learning_rate": 7.514048937364631e-08, "loss": 0.1452, "step": 47665 }, { "epoch": 0.8285560326096404, "grad_norm": 1.192447655701552, "learning_rate": 7.51256487089803e-08, "loss": 0.1373, "step": 47666 }, { "epoch": 0.8285734151471432, "grad_norm": 1.3979357263680423, "learning_rate": 7.511080939097086e-08, "loss": 0.19, "step": 47667 }, { "epoch": 0.828590797684646, "grad_norm": 1.5074449502618859, "learning_rate": 7.509597141966495e-08, "loss": 0.2223, "step": 47668 }, { "epoch": 0.8286081802221489, "grad_norm": 0.9905164398727866, "learning_rate": 7.508113479510963e-08, "loss": 0.1127, "step": 47669 }, { "epoch": 0.8286255627596516, "grad_norm": 1.2924412233768148, "learning_rate": 7.506629951735194e-08, "loss": 0.1936, "step": 47670 }, { "epoch": 0.8286429452971544, "grad_norm": 0.7837130870006964, "learning_rate": 7.505146558643877e-08, "loss": 0.2532, "step": 47671 }, { "epoch": 0.8286603278346573, "grad_norm": 2.2322371795242297, "learning_rate": 7.503663300241748e-08, "loss": 0.2214, "step": 47672 }, { "epoch": 0.8286777103721601, "grad_norm": 1.3208961346662549, "learning_rate": 7.502180176533474e-08, "loss": 0.1657, "step": 47673 }, { "epoch": 0.8286950929096629, "grad_norm": 2.231214189245741, "learning_rate": 7.500697187523752e-08, "loss": 0.1607, "step": 47674 }, { "epoch": 0.8287124754471658, "grad_norm": 1.2221123976398651, "learning_rate": 7.499214333217313e-08, "loss": 0.1659, "step": 47675 }, { "epoch": 0.8287298579846686, "grad_norm": 1.3120682032933682, "learning_rate": 7.497731613618834e-08, "loss": 0.2109, "step": 47676 }, { "epoch": 0.8287472405221714, "grad_norm": 1.2248649896774135, "learning_rate": 7.496249028733032e-08, "loss": 0.2346, "step": 47677 }, { "epoch": 0.8287646230596742, "grad_norm": 0.8862906205479155, "learning_rate": 7.494766578564587e-08, "loss": 0.1751, "step": 47678 }, { "epoch": 0.8287820055971771, "grad_norm": 1.0846017251150195, "learning_rate": 7.493284263118211e-08, "loss": 0.173, "step": 47679 }, { "epoch": 0.8287993881346799, "grad_norm": 1.0760399448247333, "learning_rate": 7.491802082398596e-08, "loss": 0.2131, "step": 47680 }, { "epoch": 0.8288167706721827, "grad_norm": 1.0643867629618136, "learning_rate": 7.490320036410447e-08, "loss": 0.2383, "step": 47681 }, { "epoch": 0.8288341532096856, "grad_norm": 2.331984631216619, "learning_rate": 7.488838125158437e-08, "loss": 0.2264, "step": 47682 }, { "epoch": 0.8288515357471884, "grad_norm": 2.0019373712214876, "learning_rate": 7.487356348647301e-08, "loss": 0.2511, "step": 47683 }, { "epoch": 0.8288689182846912, "grad_norm": 1.0218416377481367, "learning_rate": 7.485874706881712e-08, "loss": 0.1623, "step": 47684 }, { "epoch": 0.828886300822194, "grad_norm": 1.290048378762999, "learning_rate": 7.484393199866384e-08, "loss": 0.2597, "step": 47685 }, { "epoch": 0.8289036833596969, "grad_norm": 1.3625089007826003, "learning_rate": 7.482911827605976e-08, "loss": 0.2421, "step": 47686 }, { "epoch": 0.8289210658971997, "grad_norm": 0.8105278119316514, "learning_rate": 7.481430590105215e-08, "loss": 0.174, "step": 47687 }, { "epoch": 0.8289384484347025, "grad_norm": 1.548879763684274, "learning_rate": 7.479949487368792e-08, "loss": 0.2158, "step": 47688 }, { "epoch": 0.8289558309722054, "grad_norm": 0.8857958077170988, "learning_rate": 7.478468519401393e-08, "loss": 0.2001, "step": 47689 }, { "epoch": 0.8289732135097081, "grad_norm": 1.7606609600340992, "learning_rate": 7.476987686207709e-08, "loss": 0.2532, "step": 47690 }, { "epoch": 0.8289905960472109, "grad_norm": 1.0892086768916724, "learning_rate": 7.475506987792463e-08, "loss": 0.1456, "step": 47691 }, { "epoch": 0.8290079785847138, "grad_norm": 2.298804101797484, "learning_rate": 7.474026424160312e-08, "loss": 0.2547, "step": 47692 }, { "epoch": 0.8290253611222166, "grad_norm": 1.5160717100648895, "learning_rate": 7.47254599531596e-08, "loss": 0.2738, "step": 47693 }, { "epoch": 0.8290427436597194, "grad_norm": 1.8235845888716156, "learning_rate": 7.471065701264096e-08, "loss": 0.3441, "step": 47694 }, { "epoch": 0.8290601261972222, "grad_norm": 1.7350151799233087, "learning_rate": 7.469585542009426e-08, "loss": 0.1638, "step": 47695 }, { "epoch": 0.8290775087347251, "grad_norm": 1.1286007594356622, "learning_rate": 7.468105517556634e-08, "loss": 0.2551, "step": 47696 }, { "epoch": 0.8290948912722279, "grad_norm": 1.5766372876710304, "learning_rate": 7.466625627910411e-08, "loss": 0.1626, "step": 47697 }, { "epoch": 0.8291122738097307, "grad_norm": 1.582531006944917, "learning_rate": 7.46514587307544e-08, "loss": 0.2033, "step": 47698 }, { "epoch": 0.8291296563472336, "grad_norm": 2.4815274585061378, "learning_rate": 7.463666253056427e-08, "loss": 0.3109, "step": 47699 }, { "epoch": 0.8291470388847364, "grad_norm": 1.815699348684496, "learning_rate": 7.462186767858047e-08, "loss": 0.1885, "step": 47700 }, { "epoch": 0.8291644214222392, "grad_norm": 1.5752763721242116, "learning_rate": 7.460707417484996e-08, "loss": 0.1587, "step": 47701 }, { "epoch": 0.8291818039597421, "grad_norm": 1.2160935980920464, "learning_rate": 7.459228201941948e-08, "loss": 0.2057, "step": 47702 }, { "epoch": 0.8291991864972449, "grad_norm": 1.3125539905201038, "learning_rate": 7.457749121233619e-08, "loss": 0.2163, "step": 47703 }, { "epoch": 0.8292165690347477, "grad_norm": 1.083852587383675, "learning_rate": 7.456270175364693e-08, "loss": 0.1775, "step": 47704 }, { "epoch": 0.8292339515722505, "grad_norm": 1.6382695491480657, "learning_rate": 7.454791364339824e-08, "loss": 0.2103, "step": 47705 }, { "epoch": 0.8292513341097534, "grad_norm": 1.551993342270448, "learning_rate": 7.453312688163737e-08, "loss": 0.3179, "step": 47706 }, { "epoch": 0.8292687166472562, "grad_norm": 1.6165050895749595, "learning_rate": 7.451834146841102e-08, "loss": 0.2141, "step": 47707 }, { "epoch": 0.829286099184759, "grad_norm": 1.1006029143742044, "learning_rate": 7.450355740376607e-08, "loss": 0.2749, "step": 47708 }, { "epoch": 0.8293034817222619, "grad_norm": 1.3386161360247364, "learning_rate": 7.448877468774945e-08, "loss": 0.2386, "step": 47709 }, { "epoch": 0.8293208642597646, "grad_norm": 1.145601033115371, "learning_rate": 7.447399332040788e-08, "loss": 0.2859, "step": 47710 }, { "epoch": 0.8293382467972674, "grad_norm": 1.1560431411618959, "learning_rate": 7.445921330178828e-08, "loss": 0.1821, "step": 47711 }, { "epoch": 0.8293556293347703, "grad_norm": 1.3047635788491794, "learning_rate": 7.444443463193751e-08, "loss": 0.1311, "step": 47712 }, { "epoch": 0.8293730118722731, "grad_norm": 1.2702239865357154, "learning_rate": 7.442965731090228e-08, "loss": 0.1973, "step": 47713 }, { "epoch": 0.8293903944097759, "grad_norm": 1.7332504533580149, "learning_rate": 7.441488133872969e-08, "loss": 0.1887, "step": 47714 }, { "epoch": 0.8294077769472787, "grad_norm": 1.9384817649289636, "learning_rate": 7.440010671546642e-08, "loss": 0.1724, "step": 47715 }, { "epoch": 0.8294251594847816, "grad_norm": 1.0013386301805367, "learning_rate": 7.438533344115943e-08, "loss": 0.2014, "step": 47716 }, { "epoch": 0.8294425420222844, "grad_norm": 2.0358604423776563, "learning_rate": 7.437056151585513e-08, "loss": 0.1666, "step": 47717 }, { "epoch": 0.8294599245597872, "grad_norm": 1.4827119579396724, "learning_rate": 7.435579093960081e-08, "loss": 0.2457, "step": 47718 }, { "epoch": 0.8294773070972901, "grad_norm": 2.009187607115769, "learning_rate": 7.43410217124431e-08, "loss": 0.2436, "step": 47719 }, { "epoch": 0.8294946896347929, "grad_norm": 1.3723687346447582, "learning_rate": 7.432625383442881e-08, "loss": 0.2899, "step": 47720 }, { "epoch": 0.8295120721722957, "grad_norm": 1.328784726575516, "learning_rate": 7.43114873056046e-08, "loss": 0.1983, "step": 47721 }, { "epoch": 0.8295294547097986, "grad_norm": 2.3327884063801316, "learning_rate": 7.429672212601762e-08, "loss": 0.256, "step": 47722 }, { "epoch": 0.8295468372473014, "grad_norm": 1.5217798863516283, "learning_rate": 7.428195829571454e-08, "loss": 0.1722, "step": 47723 }, { "epoch": 0.8295642197848042, "grad_norm": 1.45945947166957, "learning_rate": 7.4267195814742e-08, "loss": 0.2037, "step": 47724 }, { "epoch": 0.829581602322307, "grad_norm": 1.5093888530667308, "learning_rate": 7.42524346831468e-08, "loss": 0.1897, "step": 47725 }, { "epoch": 0.8295989848598099, "grad_norm": 1.2422769628131367, "learning_rate": 7.423767490097587e-08, "loss": 0.1734, "step": 47726 }, { "epoch": 0.8296163673973127, "grad_norm": 1.2418570405849876, "learning_rate": 7.422291646827595e-08, "loss": 0.1869, "step": 47727 }, { "epoch": 0.8296337499348155, "grad_norm": 1.1276525315064292, "learning_rate": 7.420815938509378e-08, "loss": 0.1907, "step": 47728 }, { "epoch": 0.8296511324723184, "grad_norm": 1.5960561721727022, "learning_rate": 7.419340365147619e-08, "loss": 0.2663, "step": 47729 }, { "epoch": 0.8296685150098211, "grad_norm": 1.172362367215587, "learning_rate": 7.417864926746986e-08, "loss": 0.215, "step": 47730 }, { "epoch": 0.8296858975473239, "grad_norm": 1.7934773062770069, "learning_rate": 7.416389623312169e-08, "loss": 0.2692, "step": 47731 }, { "epoch": 0.8297032800848267, "grad_norm": 1.1027646409827165, "learning_rate": 7.414914454847815e-08, "loss": 0.3042, "step": 47732 }, { "epoch": 0.8297206626223296, "grad_norm": 1.271536594227809, "learning_rate": 7.413439421358637e-08, "loss": 0.2574, "step": 47733 }, { "epoch": 0.8297380451598324, "grad_norm": 1.3525201912840668, "learning_rate": 7.41196452284929e-08, "loss": 0.1266, "step": 47734 }, { "epoch": 0.8297554276973352, "grad_norm": 1.702462387772068, "learning_rate": 7.410489759324467e-08, "loss": 0.198, "step": 47735 }, { "epoch": 0.8297728102348381, "grad_norm": 1.184895562829971, "learning_rate": 7.409015130788803e-08, "loss": 0.1834, "step": 47736 }, { "epoch": 0.8297901927723409, "grad_norm": 1.7998158060794331, "learning_rate": 7.407540637247006e-08, "loss": 0.1686, "step": 47737 }, { "epoch": 0.8298075753098437, "grad_norm": 1.4616914233483893, "learning_rate": 7.406066278703743e-08, "loss": 0.2331, "step": 47738 }, { "epoch": 0.8298249578473466, "grad_norm": 1.133183375427891, "learning_rate": 7.404592055163683e-08, "loss": 0.093, "step": 47739 }, { "epoch": 0.8298423403848494, "grad_norm": 1.8301203641331418, "learning_rate": 7.403117966631489e-08, "loss": 0.242, "step": 47740 }, { "epoch": 0.8298597229223522, "grad_norm": 1.4780606828933205, "learning_rate": 7.401644013111852e-08, "loss": 0.2194, "step": 47741 }, { "epoch": 0.829877105459855, "grad_norm": 1.1667534218274718, "learning_rate": 7.400170194609451e-08, "loss": 0.1644, "step": 47742 }, { "epoch": 0.8298944879973579, "grad_norm": 1.9523055986622289, "learning_rate": 7.398696511128927e-08, "loss": 0.1762, "step": 47743 }, { "epoch": 0.8299118705348607, "grad_norm": 1.1044958333445354, "learning_rate": 7.397222962674954e-08, "loss": 0.2294, "step": 47744 }, { "epoch": 0.8299292530723635, "grad_norm": 1.8791352501960479, "learning_rate": 7.395749549252228e-08, "loss": 0.2121, "step": 47745 }, { "epoch": 0.8299466356098664, "grad_norm": 0.8692322966789625, "learning_rate": 7.394276270865402e-08, "loss": 0.1856, "step": 47746 }, { "epoch": 0.8299640181473692, "grad_norm": 1.4126065500627851, "learning_rate": 7.392803127519148e-08, "loss": 0.2166, "step": 47747 }, { "epoch": 0.829981400684872, "grad_norm": 2.1340697248973153, "learning_rate": 7.391330119218141e-08, "loss": 0.2826, "step": 47748 }, { "epoch": 0.8299987832223747, "grad_norm": 1.3206154350379014, "learning_rate": 7.389857245967041e-08, "loss": 0.1779, "step": 47749 }, { "epoch": 0.8300161657598776, "grad_norm": 2.269559049147065, "learning_rate": 7.388384507770524e-08, "loss": 0.2996, "step": 47750 }, { "epoch": 0.8300335482973804, "grad_norm": 1.8527976522898366, "learning_rate": 7.386911904633247e-08, "loss": 0.3025, "step": 47751 }, { "epoch": 0.8300509308348832, "grad_norm": 1.163445385558449, "learning_rate": 7.385439436559876e-08, "loss": 0.2315, "step": 47752 }, { "epoch": 0.8300683133723861, "grad_norm": 1.445740232202907, "learning_rate": 7.3839671035551e-08, "loss": 0.1665, "step": 47753 }, { "epoch": 0.8300856959098889, "grad_norm": 1.0801060887050746, "learning_rate": 7.382494905623582e-08, "loss": 0.2152, "step": 47754 }, { "epoch": 0.8301030784473917, "grad_norm": 1.7571677168052067, "learning_rate": 7.381022842769958e-08, "loss": 0.1458, "step": 47755 }, { "epoch": 0.8301204609848946, "grad_norm": 1.0774837183180606, "learning_rate": 7.379550914998922e-08, "loss": 0.141, "step": 47756 }, { "epoch": 0.8301378435223974, "grad_norm": 1.192519054552085, "learning_rate": 7.378079122315133e-08, "loss": 0.2272, "step": 47757 }, { "epoch": 0.8301552260599002, "grad_norm": 1.6932036893040532, "learning_rate": 7.376607464723256e-08, "loss": 0.1519, "step": 47758 }, { "epoch": 0.830172608597403, "grad_norm": 1.4511015426483111, "learning_rate": 7.37513594222795e-08, "loss": 0.1956, "step": 47759 }, { "epoch": 0.8301899911349059, "grad_norm": 1.22412061719023, "learning_rate": 7.373664554833869e-08, "loss": 0.1612, "step": 47760 }, { "epoch": 0.8302073736724087, "grad_norm": 0.7647637925209358, "learning_rate": 7.37219330254572e-08, "loss": 0.2379, "step": 47761 }, { "epoch": 0.8302247562099115, "grad_norm": 1.2391282539992214, "learning_rate": 7.370722185368122e-08, "loss": 0.1862, "step": 47762 }, { "epoch": 0.8302421387474144, "grad_norm": 0.9273632357569759, "learning_rate": 7.369251203305743e-08, "loss": 0.1374, "step": 47763 }, { "epoch": 0.8302595212849172, "grad_norm": 0.9389222379888841, "learning_rate": 7.367780356363262e-08, "loss": 0.2584, "step": 47764 }, { "epoch": 0.83027690382242, "grad_norm": 0.8863646817243445, "learning_rate": 7.366309644545338e-08, "loss": 0.2993, "step": 47765 }, { "epoch": 0.8302942863599229, "grad_norm": 1.1005701121473905, "learning_rate": 7.364839067856626e-08, "loss": 0.2723, "step": 47766 }, { "epoch": 0.8303116688974257, "grad_norm": 1.5503280446378307, "learning_rate": 7.363368626301791e-08, "loss": 0.222, "step": 47767 }, { "epoch": 0.8303290514349285, "grad_norm": 1.126762928729913, "learning_rate": 7.36189831988549e-08, "loss": 0.2787, "step": 47768 }, { "epoch": 0.8303464339724312, "grad_norm": 1.124938384806276, "learning_rate": 7.360428148612386e-08, "loss": 0.271, "step": 47769 }, { "epoch": 0.8303638165099341, "grad_norm": 1.398119730188291, "learning_rate": 7.358958112487135e-08, "loss": 0.3496, "step": 47770 }, { "epoch": 0.8303811990474369, "grad_norm": 1.7408254587484375, "learning_rate": 7.35748821151439e-08, "loss": 0.211, "step": 47771 }, { "epoch": 0.8303985815849397, "grad_norm": 1.10852505485454, "learning_rate": 7.35601844569883e-08, "loss": 0.2897, "step": 47772 }, { "epoch": 0.8304159641224426, "grad_norm": 1.5206715660956704, "learning_rate": 7.354548815045114e-08, "loss": 0.2304, "step": 47773 }, { "epoch": 0.8304333466599454, "grad_norm": 1.7139506060067398, "learning_rate": 7.353079319557876e-08, "loss": 0.2714, "step": 47774 }, { "epoch": 0.8304507291974482, "grad_norm": 1.1440495020006143, "learning_rate": 7.351609959241778e-08, "loss": 0.2627, "step": 47775 }, { "epoch": 0.830468111734951, "grad_norm": 4.194869809873722, "learning_rate": 7.35014073410149e-08, "loss": 0.2678, "step": 47776 }, { "epoch": 0.8304854942724539, "grad_norm": 1.1730074959060934, "learning_rate": 7.348671644141668e-08, "loss": 0.1715, "step": 47777 }, { "epoch": 0.8305028768099567, "grad_norm": 1.5514060972830543, "learning_rate": 7.347202689366965e-08, "loss": 0.2149, "step": 47778 }, { "epoch": 0.8305202593474595, "grad_norm": 1.1814023160479277, "learning_rate": 7.345733869782029e-08, "loss": 0.1881, "step": 47779 }, { "epoch": 0.8305376418849624, "grad_norm": 2.6237214090731804, "learning_rate": 7.34426518539154e-08, "loss": 0.2347, "step": 47780 }, { "epoch": 0.8305550244224652, "grad_norm": 1.7702041570938143, "learning_rate": 7.342796636200127e-08, "loss": 0.2844, "step": 47781 }, { "epoch": 0.830572406959968, "grad_norm": 1.6909220024201486, "learning_rate": 7.34132822221245e-08, "loss": 0.4322, "step": 47782 }, { "epoch": 0.8305897894974709, "grad_norm": 2.3875519635955476, "learning_rate": 7.339859943433164e-08, "loss": 0.3197, "step": 47783 }, { "epoch": 0.8306071720349737, "grad_norm": 1.6549468081221015, "learning_rate": 7.338391799866928e-08, "loss": 0.2043, "step": 47784 }, { "epoch": 0.8306245545724765, "grad_norm": 1.2821140226837053, "learning_rate": 7.336923791518396e-08, "loss": 0.1623, "step": 47785 }, { "epoch": 0.8306419371099794, "grad_norm": 1.835100453199642, "learning_rate": 7.335455918392219e-08, "loss": 0.1867, "step": 47786 }, { "epoch": 0.8306593196474822, "grad_norm": 1.817758828328073, "learning_rate": 7.333988180493045e-08, "loss": 0.2092, "step": 47787 }, { "epoch": 0.830676702184985, "grad_norm": 1.521298535134032, "learning_rate": 7.332520577825535e-08, "loss": 0.3762, "step": 47788 }, { "epoch": 0.8306940847224877, "grad_norm": 1.0889679308970308, "learning_rate": 7.331053110394331e-08, "loss": 0.1828, "step": 47789 }, { "epoch": 0.8307114672599906, "grad_norm": 1.240540538255654, "learning_rate": 7.329585778204083e-08, "loss": 0.5015, "step": 47790 }, { "epoch": 0.8307288497974934, "grad_norm": 1.5418682177156684, "learning_rate": 7.328118581259446e-08, "loss": 0.2911, "step": 47791 }, { "epoch": 0.8307462323349962, "grad_norm": 1.1733509804569928, "learning_rate": 7.326651519565086e-08, "loss": 0.1422, "step": 47792 }, { "epoch": 0.8307636148724991, "grad_norm": 1.2115664011384992, "learning_rate": 7.325184593125632e-08, "loss": 0.1694, "step": 47793 }, { "epoch": 0.8307809974100019, "grad_norm": 1.285791305073973, "learning_rate": 7.323717801945723e-08, "loss": 0.2036, "step": 47794 }, { "epoch": 0.8307983799475047, "grad_norm": 0.9207476733558889, "learning_rate": 7.32225114603004e-08, "loss": 0.1482, "step": 47795 }, { "epoch": 0.8308157624850075, "grad_norm": 1.1544252707583116, "learning_rate": 7.320784625383214e-08, "loss": 0.239, "step": 47796 }, { "epoch": 0.8308331450225104, "grad_norm": 1.6681024568005474, "learning_rate": 7.319318240009897e-08, "loss": 0.1751, "step": 47797 }, { "epoch": 0.8308505275600132, "grad_norm": 1.6434336689421256, "learning_rate": 7.317851989914736e-08, "loss": 0.2588, "step": 47798 }, { "epoch": 0.830867910097516, "grad_norm": 1.3500927822315063, "learning_rate": 7.316385875102377e-08, "loss": 0.2038, "step": 47799 }, { "epoch": 0.8308852926350189, "grad_norm": 1.8011890986452248, "learning_rate": 7.314919895577465e-08, "loss": 0.1939, "step": 47800 }, { "epoch": 0.8309026751725217, "grad_norm": 1.658187360537434, "learning_rate": 7.313454051344653e-08, "loss": 0.171, "step": 47801 }, { "epoch": 0.8309200577100245, "grad_norm": 1.9476232690844129, "learning_rate": 7.31198834240857e-08, "loss": 0.1644, "step": 47802 }, { "epoch": 0.8309374402475274, "grad_norm": 1.514213323783082, "learning_rate": 7.310522768773885e-08, "loss": 0.2065, "step": 47803 }, { "epoch": 0.8309548227850302, "grad_norm": 1.6950836355263719, "learning_rate": 7.309057330445234e-08, "loss": 0.3089, "step": 47804 }, { "epoch": 0.830972205322533, "grad_norm": 1.7100094691528147, "learning_rate": 7.307592027427272e-08, "loss": 0.2049, "step": 47805 }, { "epoch": 0.8309895878600358, "grad_norm": 1.598233758386071, "learning_rate": 7.30612685972461e-08, "loss": 0.1361, "step": 47806 }, { "epoch": 0.8310069703975387, "grad_norm": 1.8996794547845441, "learning_rate": 7.304661827341924e-08, "loss": 0.1938, "step": 47807 }, { "epoch": 0.8310243529350415, "grad_norm": 0.8952569026046732, "learning_rate": 7.303196930283851e-08, "loss": 0.143, "step": 47808 }, { "epoch": 0.8310417354725442, "grad_norm": 1.6460479305881135, "learning_rate": 7.301732168555024e-08, "loss": 0.206, "step": 47809 }, { "epoch": 0.8310591180100471, "grad_norm": 1.492068071143203, "learning_rate": 7.300267542160082e-08, "loss": 0.1592, "step": 47810 }, { "epoch": 0.8310765005475499, "grad_norm": 1.2765543920551266, "learning_rate": 7.298803051103702e-08, "loss": 0.2617, "step": 47811 }, { "epoch": 0.8310938830850527, "grad_norm": 2.0660772480782956, "learning_rate": 7.297338695390492e-08, "loss": 0.1496, "step": 47812 }, { "epoch": 0.8311112656225556, "grad_norm": 1.5679967499231762, "learning_rate": 7.295874475025099e-08, "loss": 0.1356, "step": 47813 }, { "epoch": 0.8311286481600584, "grad_norm": 1.1038939928147686, "learning_rate": 7.294410390012157e-08, "loss": 0.1671, "step": 47814 }, { "epoch": 0.8311460306975612, "grad_norm": 1.222192172524884, "learning_rate": 7.29294644035633e-08, "loss": 0.2034, "step": 47815 }, { "epoch": 0.831163413235064, "grad_norm": 2.7492200983783275, "learning_rate": 7.29148262606224e-08, "loss": 0.1712, "step": 47816 }, { "epoch": 0.8311807957725669, "grad_norm": 2.5780065895479827, "learning_rate": 7.290018947134535e-08, "loss": 0.4511, "step": 47817 }, { "epoch": 0.8311981783100697, "grad_norm": 1.9274913567969272, "learning_rate": 7.28855540357785e-08, "loss": 0.3115, "step": 47818 }, { "epoch": 0.8312155608475725, "grad_norm": 1.0165008456269768, "learning_rate": 7.287091995396821e-08, "loss": 0.2777, "step": 47819 }, { "epoch": 0.8312329433850754, "grad_norm": 1.6064242411772407, "learning_rate": 7.285628722596094e-08, "loss": 0.1453, "step": 47820 }, { "epoch": 0.8312503259225782, "grad_norm": 1.3730851023690227, "learning_rate": 7.284165585180302e-08, "loss": 0.2798, "step": 47821 }, { "epoch": 0.831267708460081, "grad_norm": 0.546417822076201, "learning_rate": 7.282702583154071e-08, "loss": 0.177, "step": 47822 }, { "epoch": 0.8312850909975839, "grad_norm": 0.8991789044583014, "learning_rate": 7.281239716522058e-08, "loss": 0.2888, "step": 47823 }, { "epoch": 0.8313024735350867, "grad_norm": 1.5685957905422758, "learning_rate": 7.27977698528891e-08, "loss": 0.2157, "step": 47824 }, { "epoch": 0.8313198560725895, "grad_norm": 0.9411798004340193, "learning_rate": 7.278314389459217e-08, "loss": 0.2871, "step": 47825 }, { "epoch": 0.8313372386100923, "grad_norm": 0.6654826885625442, "learning_rate": 7.276851929037659e-08, "loss": 0.1792, "step": 47826 }, { "epoch": 0.8313546211475952, "grad_norm": 0.8724152039110467, "learning_rate": 7.275389604028847e-08, "loss": 0.0967, "step": 47827 }, { "epoch": 0.831372003685098, "grad_norm": 0.6621249395824353, "learning_rate": 7.27392741443743e-08, "loss": 0.1734, "step": 47828 }, { "epoch": 0.8313893862226007, "grad_norm": 1.6975533706789043, "learning_rate": 7.27246536026802e-08, "loss": 0.191, "step": 47829 }, { "epoch": 0.8314067687601036, "grad_norm": 1.0763250458125817, "learning_rate": 7.271003441525298e-08, "loss": 0.2023, "step": 47830 }, { "epoch": 0.8314241512976064, "grad_norm": 1.878453129354071, "learning_rate": 7.269541658213851e-08, "loss": 0.1735, "step": 47831 }, { "epoch": 0.8314415338351092, "grad_norm": 2.4708423560838932, "learning_rate": 7.268080010338329e-08, "loss": 0.1283, "step": 47832 }, { "epoch": 0.831458916372612, "grad_norm": 1.5096020102847214, "learning_rate": 7.266618497903348e-08, "loss": 0.2375, "step": 47833 }, { "epoch": 0.8314762989101149, "grad_norm": 1.482260003248337, "learning_rate": 7.265157120913573e-08, "loss": 0.208, "step": 47834 }, { "epoch": 0.8314936814476177, "grad_norm": 1.1108575290953837, "learning_rate": 7.263695879373616e-08, "loss": 0.1819, "step": 47835 }, { "epoch": 0.8315110639851205, "grad_norm": 1.5493537526893812, "learning_rate": 7.262234773288111e-08, "loss": 0.3717, "step": 47836 }, { "epoch": 0.8315284465226234, "grad_norm": 2.1805695937455014, "learning_rate": 7.260773802661696e-08, "loss": 0.2437, "step": 47837 }, { "epoch": 0.8315458290601262, "grad_norm": 1.813945320386849, "learning_rate": 7.259312967498993e-08, "loss": 0.1808, "step": 47838 }, { "epoch": 0.831563211597629, "grad_norm": 1.0344529421314352, "learning_rate": 7.257852267804632e-08, "loss": 0.2653, "step": 47839 }, { "epoch": 0.8315805941351319, "grad_norm": 1.2254026764354358, "learning_rate": 7.256391703583242e-08, "loss": 0.2202, "step": 47840 }, { "epoch": 0.8315979766726347, "grad_norm": 1.7876067389011903, "learning_rate": 7.254931274839448e-08, "loss": 0.2497, "step": 47841 }, { "epoch": 0.8316153592101375, "grad_norm": 1.3643189467508594, "learning_rate": 7.253470981577898e-08, "loss": 0.1463, "step": 47842 }, { "epoch": 0.8316327417476403, "grad_norm": 3.0252191230144923, "learning_rate": 7.252010823803223e-08, "loss": 0.2071, "step": 47843 }, { "epoch": 0.8316501242851432, "grad_norm": 1.6612104138392987, "learning_rate": 7.250550801520006e-08, "loss": 0.1664, "step": 47844 }, { "epoch": 0.831667506822646, "grad_norm": 3.1158406736600406, "learning_rate": 7.249090914732924e-08, "loss": 0.3191, "step": 47845 }, { "epoch": 0.8316848893601488, "grad_norm": 3.1053677333385, "learning_rate": 7.247631163446583e-08, "loss": 0.2646, "step": 47846 }, { "epoch": 0.8317022718976517, "grad_norm": 0.9722885528561913, "learning_rate": 7.246171547665613e-08, "loss": 0.2697, "step": 47847 }, { "epoch": 0.8317196544351545, "grad_norm": 2.18598786665429, "learning_rate": 7.244712067394637e-08, "loss": 0.1575, "step": 47848 }, { "epoch": 0.8317370369726572, "grad_norm": 0.7222901825743089, "learning_rate": 7.243252722638287e-08, "loss": 0.2929, "step": 47849 }, { "epoch": 0.83175441951016, "grad_norm": 2.864694071908549, "learning_rate": 7.241793513401184e-08, "loss": 0.2124, "step": 47850 }, { "epoch": 0.8317718020476629, "grad_norm": 1.6513761727448102, "learning_rate": 7.240334439687956e-08, "loss": 0.3286, "step": 47851 }, { "epoch": 0.8317891845851657, "grad_norm": 1.0383473700816883, "learning_rate": 7.238875501503211e-08, "loss": 0.1372, "step": 47852 }, { "epoch": 0.8318065671226685, "grad_norm": 1.2839855699481986, "learning_rate": 7.2374166988516e-08, "loss": 0.2069, "step": 47853 }, { "epoch": 0.8318239496601714, "grad_norm": 3.719237228678123, "learning_rate": 7.235958031737737e-08, "loss": 0.3069, "step": 47854 }, { "epoch": 0.8318413321976742, "grad_norm": 2.044506685423947, "learning_rate": 7.234499500166253e-08, "loss": 0.2296, "step": 47855 }, { "epoch": 0.831858714735177, "grad_norm": 1.3357415300301951, "learning_rate": 7.233041104141735e-08, "loss": 0.2736, "step": 47856 }, { "epoch": 0.8318760972726799, "grad_norm": 1.94931986106537, "learning_rate": 7.231582843668848e-08, "loss": 0.2199, "step": 47857 }, { "epoch": 0.8318934798101827, "grad_norm": 1.6906960389367187, "learning_rate": 7.23012471875219e-08, "loss": 0.1963, "step": 47858 }, { "epoch": 0.8319108623476855, "grad_norm": 0.9069819129353399, "learning_rate": 7.228666729396394e-08, "loss": 0.1662, "step": 47859 }, { "epoch": 0.8319282448851884, "grad_norm": 0.9470146024669398, "learning_rate": 7.227208875606067e-08, "loss": 0.2283, "step": 47860 }, { "epoch": 0.8319456274226912, "grad_norm": 1.4531820997403808, "learning_rate": 7.22575115738585e-08, "loss": 0.1849, "step": 47861 }, { "epoch": 0.831963009960194, "grad_norm": 0.7211915524063292, "learning_rate": 7.224293574740364e-08, "loss": 0.1141, "step": 47862 }, { "epoch": 0.8319803924976968, "grad_norm": 1.1994520503963726, "learning_rate": 7.222836127674204e-08, "loss": 0.2045, "step": 47863 }, { "epoch": 0.8319977750351997, "grad_norm": 1.4077327173275112, "learning_rate": 7.221378816191997e-08, "loss": 0.2046, "step": 47864 }, { "epoch": 0.8320151575727025, "grad_norm": 1.5362729886833906, "learning_rate": 7.219921640298376e-08, "loss": 0.6269, "step": 47865 }, { "epoch": 0.8320325401102053, "grad_norm": 1.2194656205645558, "learning_rate": 7.218464599997953e-08, "loss": 0.3393, "step": 47866 }, { "epoch": 0.8320499226477082, "grad_norm": 1.2503594374066627, "learning_rate": 7.217007695295341e-08, "loss": 0.1829, "step": 47867 }, { "epoch": 0.832067305185211, "grad_norm": 2.6253692195264318, "learning_rate": 7.215550926195168e-08, "loss": 0.1461, "step": 47868 }, { "epoch": 0.8320846877227137, "grad_norm": 1.2214417045062993, "learning_rate": 7.214094292702038e-08, "loss": 0.3652, "step": 47869 }, { "epoch": 0.8321020702602165, "grad_norm": 1.4084822460179252, "learning_rate": 7.21263779482058e-08, "loss": 0.1535, "step": 47870 }, { "epoch": 0.8321194527977194, "grad_norm": 1.2084355641123348, "learning_rate": 7.211181432555407e-08, "loss": 0.2666, "step": 47871 }, { "epoch": 0.8321368353352222, "grad_norm": 1.3854374439598787, "learning_rate": 7.209725205911116e-08, "loss": 0.1632, "step": 47872 }, { "epoch": 0.832154217872725, "grad_norm": 1.8348569023948467, "learning_rate": 7.208269114892357e-08, "loss": 0.1973, "step": 47873 }, { "epoch": 0.8321716004102279, "grad_norm": 1.2660350326780838, "learning_rate": 7.206813159503738e-08, "loss": 0.1354, "step": 47874 }, { "epoch": 0.8321889829477307, "grad_norm": 1.6640488967489209, "learning_rate": 7.20535733974984e-08, "loss": 0.4794, "step": 47875 }, { "epoch": 0.8322063654852335, "grad_norm": 1.1126518492107522, "learning_rate": 7.203901655635308e-08, "loss": 0.1796, "step": 47876 }, { "epoch": 0.8322237480227364, "grad_norm": 3.2972121890898776, "learning_rate": 7.202446107164755e-08, "loss": 0.2822, "step": 47877 }, { "epoch": 0.8322411305602392, "grad_norm": 2.707952748156602, "learning_rate": 7.200990694342785e-08, "loss": 0.1447, "step": 47878 }, { "epoch": 0.832258513097742, "grad_norm": 1.0738042553200082, "learning_rate": 7.199535417174018e-08, "loss": 0.1427, "step": 47879 }, { "epoch": 0.8322758956352448, "grad_norm": 1.8411137801737691, "learning_rate": 7.198080275663048e-08, "loss": 0.2185, "step": 47880 }, { "epoch": 0.8322932781727477, "grad_norm": 1.174593922720944, "learning_rate": 7.196625269814527e-08, "loss": 0.2897, "step": 47881 }, { "epoch": 0.8323106607102505, "grad_norm": 1.4709056101290563, "learning_rate": 7.195170399633028e-08, "loss": 0.2071, "step": 47882 }, { "epoch": 0.8323280432477533, "grad_norm": 1.6914931402979616, "learning_rate": 7.193715665123162e-08, "loss": 0.1683, "step": 47883 }, { "epoch": 0.8323454257852562, "grad_norm": 1.279275718573868, "learning_rate": 7.192261066289568e-08, "loss": 0.1671, "step": 47884 }, { "epoch": 0.832362808322759, "grad_norm": 1.4781457474421014, "learning_rate": 7.190806603136845e-08, "loss": 0.176, "step": 47885 }, { "epoch": 0.8323801908602618, "grad_norm": 1.2824063007929212, "learning_rate": 7.189352275669597e-08, "loss": 0.1376, "step": 47886 }, { "epoch": 0.8323975733977647, "grad_norm": 1.498022692710575, "learning_rate": 7.187898083892435e-08, "loss": 0.2047, "step": 47887 }, { "epoch": 0.8324149559352674, "grad_norm": 1.3423704116702035, "learning_rate": 7.186444027809973e-08, "loss": 0.2074, "step": 47888 }, { "epoch": 0.8324323384727702, "grad_norm": 1.2131092585561152, "learning_rate": 7.184990107426819e-08, "loss": 0.2834, "step": 47889 }, { "epoch": 0.832449721010273, "grad_norm": 1.0350037751991132, "learning_rate": 7.183536322747574e-08, "loss": 0.1434, "step": 47890 }, { "epoch": 0.8324671035477759, "grad_norm": 0.9241039420066124, "learning_rate": 7.18208267377684e-08, "loss": 0.3171, "step": 47891 }, { "epoch": 0.8324844860852787, "grad_norm": 1.3318955026599086, "learning_rate": 7.180629160519247e-08, "loss": 0.275, "step": 47892 }, { "epoch": 0.8325018686227815, "grad_norm": 2.8726166909122464, "learning_rate": 7.179175782979402e-08, "loss": 0.2007, "step": 47893 }, { "epoch": 0.8325192511602844, "grad_norm": 0.8354968386435127, "learning_rate": 7.177722541161891e-08, "loss": 0.2678, "step": 47894 }, { "epoch": 0.8325366336977872, "grad_norm": 1.4602451161660193, "learning_rate": 7.176269435071309e-08, "loss": 0.235, "step": 47895 }, { "epoch": 0.83255401623529, "grad_norm": 1.2197268982067244, "learning_rate": 7.174816464712297e-08, "loss": 0.2164, "step": 47896 }, { "epoch": 0.8325713987727928, "grad_norm": 1.6546027607872453, "learning_rate": 7.173363630089447e-08, "loss": 0.2984, "step": 47897 }, { "epoch": 0.8325887813102957, "grad_norm": 1.2057754621746628, "learning_rate": 7.171910931207359e-08, "loss": 0.1875, "step": 47898 }, { "epoch": 0.8326061638477985, "grad_norm": 0.7476218207625264, "learning_rate": 7.170458368070625e-08, "loss": 0.2239, "step": 47899 }, { "epoch": 0.8326235463853013, "grad_norm": 1.8479305117483538, "learning_rate": 7.169005940683892e-08, "loss": 0.238, "step": 47900 }, { "epoch": 0.8326409289228042, "grad_norm": 1.2579991174965777, "learning_rate": 7.167553649051721e-08, "loss": 0.1775, "step": 47901 }, { "epoch": 0.832658311460307, "grad_norm": 1.8114152313484957, "learning_rate": 7.166101493178734e-08, "loss": 0.1869, "step": 47902 }, { "epoch": 0.8326756939978098, "grad_norm": 1.3445455892814855, "learning_rate": 7.164649473069511e-08, "loss": 0.3016, "step": 47903 }, { "epoch": 0.8326930765353127, "grad_norm": 1.0805677278253234, "learning_rate": 7.163197588728687e-08, "loss": 0.2122, "step": 47904 }, { "epoch": 0.8327104590728155, "grad_norm": 1.6543680172525561, "learning_rate": 7.161745840160849e-08, "loss": 0.1157, "step": 47905 }, { "epoch": 0.8327278416103183, "grad_norm": 0.8633225812186504, "learning_rate": 7.160294227370595e-08, "loss": 0.2582, "step": 47906 }, { "epoch": 0.8327452241478212, "grad_norm": 1.420739010120588, "learning_rate": 7.158842750362537e-08, "loss": 0.3282, "step": 47907 }, { "epoch": 0.8327626066853239, "grad_norm": 1.319660415147923, "learning_rate": 7.157391409141261e-08, "loss": 0.2346, "step": 47908 }, { "epoch": 0.8327799892228267, "grad_norm": 2.164231506634303, "learning_rate": 7.155940203711374e-08, "loss": 0.2626, "step": 47909 }, { "epoch": 0.8327973717603295, "grad_norm": 1.0412447115016747, "learning_rate": 7.15448913407748e-08, "loss": 0.1922, "step": 47910 }, { "epoch": 0.8328147542978324, "grad_norm": 1.6740508868401103, "learning_rate": 7.15303820024416e-08, "loss": 0.2026, "step": 47911 }, { "epoch": 0.8328321368353352, "grad_norm": 1.8517800733715961, "learning_rate": 7.151587402216047e-08, "loss": 0.342, "step": 47912 }, { "epoch": 0.832849519372838, "grad_norm": 1.4631201013883333, "learning_rate": 7.150136739997709e-08, "loss": 0.1505, "step": 47913 }, { "epoch": 0.8328669019103409, "grad_norm": 1.907160581955104, "learning_rate": 7.148686213593746e-08, "loss": 0.2244, "step": 47914 }, { "epoch": 0.8328842844478437, "grad_norm": 1.174343214864599, "learning_rate": 7.147235823008773e-08, "loss": 0.2347, "step": 47915 }, { "epoch": 0.8329016669853465, "grad_norm": 2.462250993432743, "learning_rate": 7.145785568247375e-08, "loss": 0.2027, "step": 47916 }, { "epoch": 0.8329190495228493, "grad_norm": 2.2855070434739613, "learning_rate": 7.144335449314154e-08, "loss": 0.1782, "step": 47917 }, { "epoch": 0.8329364320603522, "grad_norm": 0.961340493399157, "learning_rate": 7.142885466213689e-08, "loss": 0.5336, "step": 47918 }, { "epoch": 0.832953814597855, "grad_norm": 1.4728689772174162, "learning_rate": 7.141435618950614e-08, "loss": 0.3179, "step": 47919 }, { "epoch": 0.8329711971353578, "grad_norm": 1.2301974178314405, "learning_rate": 7.139985907529494e-08, "loss": 0.1705, "step": 47920 }, { "epoch": 0.8329885796728607, "grad_norm": 1.27649897513226, "learning_rate": 7.138536331954925e-08, "loss": 0.2066, "step": 47921 }, { "epoch": 0.8330059622103635, "grad_norm": 2.267755843630692, "learning_rate": 7.137086892231498e-08, "loss": 0.2112, "step": 47922 }, { "epoch": 0.8330233447478663, "grad_norm": 1.034839165278939, "learning_rate": 7.135637588363825e-08, "loss": 0.113, "step": 47923 }, { "epoch": 0.8330407272853692, "grad_norm": 0.8007124510924841, "learning_rate": 7.134188420356491e-08, "loss": 0.177, "step": 47924 }, { "epoch": 0.833058109822872, "grad_norm": 0.8964184197497805, "learning_rate": 7.132739388214087e-08, "loss": 0.1194, "step": 47925 }, { "epoch": 0.8330754923603748, "grad_norm": 2.8871129872328685, "learning_rate": 7.13129049194121e-08, "loss": 0.3563, "step": 47926 }, { "epoch": 0.8330928748978776, "grad_norm": 0.9135165466098811, "learning_rate": 7.129841731542452e-08, "loss": 0.1123, "step": 47927 }, { "epoch": 0.8331102574353804, "grad_norm": 4.322405921528963, "learning_rate": 7.128393107022401e-08, "loss": 0.2951, "step": 47928 }, { "epoch": 0.8331276399728832, "grad_norm": 2.0352935190527877, "learning_rate": 7.12694461838565e-08, "loss": 0.2322, "step": 47929 }, { "epoch": 0.833145022510386, "grad_norm": 2.0925203893865008, "learning_rate": 7.125496265636777e-08, "loss": 0.2244, "step": 47930 }, { "epoch": 0.8331624050478889, "grad_norm": 1.0301198827691884, "learning_rate": 7.124048048780412e-08, "loss": 0.1529, "step": 47931 }, { "epoch": 0.8331797875853917, "grad_norm": 1.7875889391900557, "learning_rate": 7.122599967821108e-08, "loss": 0.166, "step": 47932 }, { "epoch": 0.8331971701228945, "grad_norm": 1.0819331082734334, "learning_rate": 7.121152022763449e-08, "loss": 0.1654, "step": 47933 }, { "epoch": 0.8332145526603973, "grad_norm": 1.4223897712967042, "learning_rate": 7.11970421361206e-08, "loss": 0.1635, "step": 47934 }, { "epoch": 0.8332319351979002, "grad_norm": 0.8164674716273906, "learning_rate": 7.118256540371503e-08, "loss": 0.2289, "step": 47935 }, { "epoch": 0.833249317735403, "grad_norm": 1.3448331055558322, "learning_rate": 7.11680900304638e-08, "loss": 0.141, "step": 47936 }, { "epoch": 0.8332667002729058, "grad_norm": 1.292255687176482, "learning_rate": 7.115361601641273e-08, "loss": 0.169, "step": 47937 }, { "epoch": 0.8332840828104087, "grad_norm": 1.6656951277759415, "learning_rate": 7.11391433616077e-08, "loss": 0.2108, "step": 47938 }, { "epoch": 0.8333014653479115, "grad_norm": 1.4840338878808068, "learning_rate": 7.112467206609457e-08, "loss": 0.2806, "step": 47939 }, { "epoch": 0.8333188478854143, "grad_norm": 2.291151873946628, "learning_rate": 7.111020212991925e-08, "loss": 0.1217, "step": 47940 }, { "epoch": 0.8333362304229172, "grad_norm": 1.072307269518536, "learning_rate": 7.109573355312742e-08, "loss": 0.2849, "step": 47941 }, { "epoch": 0.83335361296042, "grad_norm": 1.1177999749820815, "learning_rate": 7.108126633576522e-08, "loss": 0.1493, "step": 47942 }, { "epoch": 0.8333709954979228, "grad_norm": 1.3378313958538455, "learning_rate": 7.106680047787839e-08, "loss": 0.2591, "step": 47943 }, { "epoch": 0.8333883780354256, "grad_norm": 1.3366821031162353, "learning_rate": 7.105233597951282e-08, "loss": 0.1477, "step": 47944 }, { "epoch": 0.8334057605729285, "grad_norm": 1.9783920984774, "learning_rate": 7.103787284071416e-08, "loss": 0.1826, "step": 47945 }, { "epoch": 0.8334231431104313, "grad_norm": 2.1204329074988357, "learning_rate": 7.102341106152848e-08, "loss": 0.1859, "step": 47946 }, { "epoch": 0.8334405256479341, "grad_norm": 2.1957117809161946, "learning_rate": 7.100895064200152e-08, "loss": 0.3791, "step": 47947 }, { "epoch": 0.8334579081854369, "grad_norm": 1.1032183609788235, "learning_rate": 7.099449158217913e-08, "loss": 0.2409, "step": 47948 }, { "epoch": 0.8334752907229397, "grad_norm": 1.7128691758382864, "learning_rate": 7.0980033882107e-08, "loss": 0.1742, "step": 47949 }, { "epoch": 0.8334926732604425, "grad_norm": 1.5822772167845691, "learning_rate": 7.096557754183136e-08, "loss": 0.198, "step": 47950 }, { "epoch": 0.8335100557979453, "grad_norm": 1.6126732143538602, "learning_rate": 7.095112256139757e-08, "loss": 0.1769, "step": 47951 }, { "epoch": 0.8335274383354482, "grad_norm": 1.801569997665558, "learning_rate": 7.09366689408517e-08, "loss": 0.2227, "step": 47952 }, { "epoch": 0.833544820872951, "grad_norm": 1.1613113469014293, "learning_rate": 7.092221668023934e-08, "loss": 0.2297, "step": 47953 }, { "epoch": 0.8335622034104538, "grad_norm": 1.2268310605698731, "learning_rate": 7.090776577960661e-08, "loss": 0.2416, "step": 47954 }, { "epoch": 0.8335795859479567, "grad_norm": 2.232528986377708, "learning_rate": 7.089331623899907e-08, "loss": 0.1755, "step": 47955 }, { "epoch": 0.8335969684854595, "grad_norm": 0.6439799653991883, "learning_rate": 7.087886805846271e-08, "loss": 0.1904, "step": 47956 }, { "epoch": 0.8336143510229623, "grad_norm": 2.3028704225654173, "learning_rate": 7.086442123804315e-08, "loss": 0.2768, "step": 47957 }, { "epoch": 0.8336317335604652, "grad_norm": 1.6776431246728682, "learning_rate": 7.084997577778623e-08, "loss": 0.2024, "step": 47958 }, { "epoch": 0.833649116097968, "grad_norm": 1.1025685977788962, "learning_rate": 7.083553167773781e-08, "loss": 0.1763, "step": 47959 }, { "epoch": 0.8336664986354708, "grad_norm": 1.5922418611710158, "learning_rate": 7.082108893794359e-08, "loss": 0.1965, "step": 47960 }, { "epoch": 0.8336838811729737, "grad_norm": 1.4849653333987394, "learning_rate": 7.080664755844923e-08, "loss": 0.1934, "step": 47961 }, { "epoch": 0.8337012637104765, "grad_norm": 1.5054861114604818, "learning_rate": 7.079220753930077e-08, "loss": 0.2408, "step": 47962 }, { "epoch": 0.8337186462479793, "grad_norm": 1.843796028763744, "learning_rate": 7.077776888054399e-08, "loss": 0.2075, "step": 47963 }, { "epoch": 0.8337360287854821, "grad_norm": 1.360950668546191, "learning_rate": 7.076333158222425e-08, "loss": 0.3915, "step": 47964 }, { "epoch": 0.833753411322985, "grad_norm": 0.8024644479444908, "learning_rate": 7.074889564438768e-08, "loss": 0.2172, "step": 47965 }, { "epoch": 0.8337707938604878, "grad_norm": 1.9551266982618447, "learning_rate": 7.073446106707997e-08, "loss": 0.3096, "step": 47966 }, { "epoch": 0.8337881763979906, "grad_norm": 1.0651145345033997, "learning_rate": 7.072002785034681e-08, "loss": 0.2691, "step": 47967 }, { "epoch": 0.8338055589354934, "grad_norm": 1.8310638263891814, "learning_rate": 7.070559599423398e-08, "loss": 0.137, "step": 47968 }, { "epoch": 0.8338229414729962, "grad_norm": 1.7288860193464413, "learning_rate": 7.069116549878718e-08, "loss": 0.1495, "step": 47969 }, { "epoch": 0.833840324010499, "grad_norm": 1.653121766186275, "learning_rate": 7.067673636405219e-08, "loss": 0.224, "step": 47970 }, { "epoch": 0.8338577065480018, "grad_norm": 1.2367720262997637, "learning_rate": 7.066230859007478e-08, "loss": 0.3299, "step": 47971 }, { "epoch": 0.8338750890855047, "grad_norm": 3.118269404743599, "learning_rate": 7.064788217690043e-08, "loss": 0.2914, "step": 47972 }, { "epoch": 0.8338924716230075, "grad_norm": 1.1368750779311267, "learning_rate": 7.063345712457525e-08, "loss": 0.2504, "step": 47973 }, { "epoch": 0.8339098541605103, "grad_norm": 2.187109263219079, "learning_rate": 7.061903343314474e-08, "loss": 0.1967, "step": 47974 }, { "epoch": 0.8339272366980132, "grad_norm": 1.9200565115799573, "learning_rate": 7.060461110265475e-08, "loss": 0.1776, "step": 47975 }, { "epoch": 0.833944619235516, "grad_norm": 2.157288686084149, "learning_rate": 7.059019013315071e-08, "loss": 0.1661, "step": 47976 }, { "epoch": 0.8339620017730188, "grad_norm": 1.3443174751642404, "learning_rate": 7.057577052467861e-08, "loss": 0.2556, "step": 47977 }, { "epoch": 0.8339793843105217, "grad_norm": 2.8688921293832994, "learning_rate": 7.056135227728404e-08, "loss": 0.2026, "step": 47978 }, { "epoch": 0.8339967668480245, "grad_norm": 1.2012021468275107, "learning_rate": 7.05469353910127e-08, "loss": 0.2322, "step": 47979 }, { "epoch": 0.8340141493855273, "grad_norm": 0.9380086238099993, "learning_rate": 7.053251986591024e-08, "loss": 0.1484, "step": 47980 }, { "epoch": 0.8340315319230301, "grad_norm": 1.1324152411500605, "learning_rate": 7.051810570202249e-08, "loss": 0.2212, "step": 47981 }, { "epoch": 0.834048914460533, "grad_norm": 2.038791963917993, "learning_rate": 7.050369289939518e-08, "loss": 0.2311, "step": 47982 }, { "epoch": 0.8340662969980358, "grad_norm": 1.3387930511681578, "learning_rate": 7.048928145807375e-08, "loss": 0.3812, "step": 47983 }, { "epoch": 0.8340836795355386, "grad_norm": 1.4337274602721295, "learning_rate": 7.047487137810387e-08, "loss": 0.1326, "step": 47984 }, { "epoch": 0.8341010620730415, "grad_norm": 2.374714240032376, "learning_rate": 7.046046265953149e-08, "loss": 0.2579, "step": 47985 }, { "epoch": 0.8341184446105443, "grad_norm": 1.3867366042494709, "learning_rate": 7.044605530240211e-08, "loss": 0.1413, "step": 47986 }, { "epoch": 0.8341358271480471, "grad_norm": 2.1302782337934336, "learning_rate": 7.043164930676143e-08, "loss": 0.1369, "step": 47987 }, { "epoch": 0.8341532096855498, "grad_norm": 0.8190258637976209, "learning_rate": 7.041724467265508e-08, "loss": 0.1939, "step": 47988 }, { "epoch": 0.8341705922230527, "grad_norm": 4.704193425503409, "learning_rate": 7.04028414001287e-08, "loss": 0.3519, "step": 47989 }, { "epoch": 0.8341879747605555, "grad_norm": 1.8804329379098883, "learning_rate": 7.038843948922802e-08, "loss": 0.1821, "step": 47990 }, { "epoch": 0.8342053572980583, "grad_norm": 1.4937930420751901, "learning_rate": 7.037403893999861e-08, "loss": 0.1865, "step": 47991 }, { "epoch": 0.8342227398355612, "grad_norm": 1.0279376538450564, "learning_rate": 7.035963975248605e-08, "loss": 0.3392, "step": 47992 }, { "epoch": 0.834240122373064, "grad_norm": 0.988862619668658, "learning_rate": 7.034524192673613e-08, "loss": 0.1949, "step": 47993 }, { "epoch": 0.8342575049105668, "grad_norm": 2.394212137049064, "learning_rate": 7.033084546279461e-08, "loss": 0.3135, "step": 47994 }, { "epoch": 0.8342748874480697, "grad_norm": 1.4433469245975306, "learning_rate": 7.031645036070666e-08, "loss": 0.2733, "step": 47995 }, { "epoch": 0.8342922699855725, "grad_norm": 1.5027712541822045, "learning_rate": 7.030205662051836e-08, "loss": 0.1765, "step": 47996 }, { "epoch": 0.8343096525230753, "grad_norm": 1.8089648791175752, "learning_rate": 7.028766424227511e-08, "loss": 0.217, "step": 47997 }, { "epoch": 0.8343270350605781, "grad_norm": 1.5583309804491012, "learning_rate": 7.027327322602256e-08, "loss": 0.1247, "step": 47998 }, { "epoch": 0.834344417598081, "grad_norm": 1.6457640788401657, "learning_rate": 7.025888357180631e-08, "loss": 0.3145, "step": 47999 }, { "epoch": 0.8343618001355838, "grad_norm": 1.8248431548464372, "learning_rate": 7.024449527967191e-08, "loss": 0.2432, "step": 48000 }, { "epoch": 0.8343791826730866, "grad_norm": 1.0896464455172772, "learning_rate": 7.023010834966531e-08, "loss": 0.2035, "step": 48001 }, { "epoch": 0.8343965652105895, "grad_norm": 1.4503055029849792, "learning_rate": 7.021572278183168e-08, "loss": 0.1998, "step": 48002 }, { "epoch": 0.8344139477480923, "grad_norm": 1.2552293391841898, "learning_rate": 7.020133857621668e-08, "loss": 0.1819, "step": 48003 }, { "epoch": 0.8344313302855951, "grad_norm": 1.6528016525882656, "learning_rate": 7.018695573286615e-08, "loss": 0.2695, "step": 48004 }, { "epoch": 0.834448712823098, "grad_norm": 1.3419823946826979, "learning_rate": 7.01725742518255e-08, "loss": 0.1743, "step": 48005 }, { "epoch": 0.8344660953606008, "grad_norm": 1.3343955212901102, "learning_rate": 7.015819413314033e-08, "loss": 0.1803, "step": 48006 }, { "epoch": 0.8344834778981036, "grad_norm": 1.2525921733257948, "learning_rate": 7.014381537685626e-08, "loss": 0.1185, "step": 48007 }, { "epoch": 0.8345008604356063, "grad_norm": 3.133035016555236, "learning_rate": 7.012943798301885e-08, "loss": 0.3078, "step": 48008 }, { "epoch": 0.8345182429731092, "grad_norm": 1.6769486521506152, "learning_rate": 7.011506195167361e-08, "loss": 0.1821, "step": 48009 }, { "epoch": 0.834535625510612, "grad_norm": 1.2322506721971, "learning_rate": 7.010068728286617e-08, "loss": 0.2113, "step": 48010 }, { "epoch": 0.8345530080481148, "grad_norm": 1.0211750608184076, "learning_rate": 7.008631397664194e-08, "loss": 0.1924, "step": 48011 }, { "epoch": 0.8345703905856177, "grad_norm": 2.1402610332708925, "learning_rate": 7.007194203304679e-08, "loss": 0.282, "step": 48012 }, { "epoch": 0.8345877731231205, "grad_norm": 1.2899156719117089, "learning_rate": 7.005757145212615e-08, "loss": 0.3439, "step": 48013 }, { "epoch": 0.8346051556606233, "grad_norm": 1.8238570046168003, "learning_rate": 7.004320223392529e-08, "loss": 0.1486, "step": 48014 }, { "epoch": 0.8346225381981262, "grad_norm": 2.1750081417186755, "learning_rate": 7.002883437849006e-08, "loss": 0.1923, "step": 48015 }, { "epoch": 0.834639920735629, "grad_norm": 1.8477273420281821, "learning_rate": 7.001446788586596e-08, "loss": 0.3448, "step": 48016 }, { "epoch": 0.8346573032731318, "grad_norm": 2.001851445791709, "learning_rate": 7.00001027560984e-08, "loss": 0.1831, "step": 48017 }, { "epoch": 0.8346746858106346, "grad_norm": 1.4537849604747926, "learning_rate": 6.99857389892331e-08, "loss": 0.2939, "step": 48018 }, { "epoch": 0.8346920683481375, "grad_norm": 0.8571473123140749, "learning_rate": 6.997137658531526e-08, "loss": 0.2257, "step": 48019 }, { "epoch": 0.8347094508856403, "grad_norm": 1.3931411233553523, "learning_rate": 6.995701554439093e-08, "loss": 0.1631, "step": 48020 }, { "epoch": 0.8347268334231431, "grad_norm": 2.0034634454222067, "learning_rate": 6.994265586650516e-08, "loss": 0.2063, "step": 48021 }, { "epoch": 0.834744215960646, "grad_norm": 1.9521745221548645, "learning_rate": 6.992829755170348e-08, "loss": 0.1812, "step": 48022 }, { "epoch": 0.8347615984981488, "grad_norm": 1.3664031233446268, "learning_rate": 6.99139406000317e-08, "loss": 0.1802, "step": 48023 }, { "epoch": 0.8347789810356516, "grad_norm": 2.839170454825999, "learning_rate": 6.989958501153515e-08, "loss": 0.1851, "step": 48024 }, { "epoch": 0.8347963635731545, "grad_norm": 1.324405262524116, "learning_rate": 6.98852307862593e-08, "loss": 0.1399, "step": 48025 }, { "epoch": 0.8348137461106573, "grad_norm": 0.8525985504764053, "learning_rate": 6.987087792424972e-08, "loss": 0.2425, "step": 48026 }, { "epoch": 0.83483112864816, "grad_norm": 1.5869339840195866, "learning_rate": 6.98565264255519e-08, "loss": 0.1716, "step": 48027 }, { "epoch": 0.8348485111856628, "grad_norm": 1.2476743241057975, "learning_rate": 6.984217629021128e-08, "loss": 0.1516, "step": 48028 }, { "epoch": 0.8348658937231657, "grad_norm": 1.451742101628286, "learning_rate": 6.982782751827332e-08, "loss": 0.1232, "step": 48029 }, { "epoch": 0.8348832762606685, "grad_norm": 1.589279883900346, "learning_rate": 6.98134801097835e-08, "loss": 0.1739, "step": 48030 }, { "epoch": 0.8349006587981713, "grad_norm": 3.607112128692599, "learning_rate": 6.979913406478738e-08, "loss": 0.2772, "step": 48031 }, { "epoch": 0.8349180413356742, "grad_norm": 1.0007043972791596, "learning_rate": 6.978478938333055e-08, "loss": 0.1157, "step": 48032 }, { "epoch": 0.834935423873177, "grad_norm": 1.5738599263078579, "learning_rate": 6.977044606545818e-08, "loss": 0.262, "step": 48033 }, { "epoch": 0.8349528064106798, "grad_norm": 1.4354735149399438, "learning_rate": 6.975610411121574e-08, "loss": 0.1791, "step": 48034 }, { "epoch": 0.8349701889481826, "grad_norm": 0.6507464098129179, "learning_rate": 6.974176352064897e-08, "loss": 0.193, "step": 48035 }, { "epoch": 0.8349875714856855, "grad_norm": 1.6065597690037507, "learning_rate": 6.972742429380312e-08, "loss": 0.2034, "step": 48036 }, { "epoch": 0.8350049540231883, "grad_norm": 1.4185321820287748, "learning_rate": 6.971308643072371e-08, "loss": 0.1518, "step": 48037 }, { "epoch": 0.8350223365606911, "grad_norm": 1.107669702905407, "learning_rate": 6.969874993145603e-08, "loss": 0.144, "step": 48038 }, { "epoch": 0.835039719098194, "grad_norm": 1.433786510998228, "learning_rate": 6.968441479604591e-08, "loss": 0.1677, "step": 48039 }, { "epoch": 0.8350571016356968, "grad_norm": 4.103909333923844, "learning_rate": 6.967008102453836e-08, "loss": 0.1519, "step": 48040 }, { "epoch": 0.8350744841731996, "grad_norm": 0.7392613398081096, "learning_rate": 6.965574861697898e-08, "loss": 0.1791, "step": 48041 }, { "epoch": 0.8350918667107025, "grad_norm": 1.167918847261958, "learning_rate": 6.964141757341313e-08, "loss": 0.1755, "step": 48042 }, { "epoch": 0.8351092492482053, "grad_norm": 2.5758379887530993, "learning_rate": 6.962708789388638e-08, "loss": 0.1526, "step": 48043 }, { "epoch": 0.8351266317857081, "grad_norm": 1.3474781694911915, "learning_rate": 6.961275957844403e-08, "loss": 0.166, "step": 48044 }, { "epoch": 0.835144014323211, "grad_norm": 1.4065425533441493, "learning_rate": 6.95984326271316e-08, "loss": 0.2584, "step": 48045 }, { "epoch": 0.8351613968607138, "grad_norm": 2.276159334623168, "learning_rate": 6.95841070399944e-08, "loss": 0.266, "step": 48046 }, { "epoch": 0.8351787793982165, "grad_norm": 2.3259966357518786, "learning_rate": 6.956978281707781e-08, "loss": 0.1963, "step": 48047 }, { "epoch": 0.8351961619357193, "grad_norm": 1.3512344665695641, "learning_rate": 6.955545995842732e-08, "loss": 0.2038, "step": 48048 }, { "epoch": 0.8352135444732222, "grad_norm": 1.3621907423683974, "learning_rate": 6.954113846408832e-08, "loss": 0.1738, "step": 48049 }, { "epoch": 0.835230927010725, "grad_norm": 0.945162403906664, "learning_rate": 6.952681833410606e-08, "loss": 0.2085, "step": 48050 }, { "epoch": 0.8352483095482278, "grad_norm": 1.2670441668524504, "learning_rate": 6.951249956852623e-08, "loss": 0.2572, "step": 48051 }, { "epoch": 0.8352656920857306, "grad_norm": 1.5324968687297622, "learning_rate": 6.949818216739395e-08, "loss": 0.305, "step": 48052 }, { "epoch": 0.8352830746232335, "grad_norm": 1.357012019780423, "learning_rate": 6.948386613075452e-08, "loss": 0.2883, "step": 48053 }, { "epoch": 0.8353004571607363, "grad_norm": 1.624325895387053, "learning_rate": 6.946955145865364e-08, "loss": 0.1935, "step": 48054 }, { "epoch": 0.8353178396982391, "grad_norm": 1.085871512952584, "learning_rate": 6.945523815113646e-08, "loss": 0.2219, "step": 48055 }, { "epoch": 0.835335222235742, "grad_norm": 1.1670872304806617, "learning_rate": 6.944092620824843e-08, "loss": 0.1981, "step": 48056 }, { "epoch": 0.8353526047732448, "grad_norm": 1.3275120187186626, "learning_rate": 6.942661563003488e-08, "loss": 0.1257, "step": 48057 }, { "epoch": 0.8353699873107476, "grad_norm": 1.3904108873819043, "learning_rate": 6.941230641654117e-08, "loss": 0.1788, "step": 48058 }, { "epoch": 0.8353873698482505, "grad_norm": 1.511179092277634, "learning_rate": 6.939799856781264e-08, "loss": 0.1848, "step": 48059 }, { "epoch": 0.8354047523857533, "grad_norm": 1.8293879395111128, "learning_rate": 6.93836920838946e-08, "loss": 0.1762, "step": 48060 }, { "epoch": 0.8354221349232561, "grad_norm": 1.4298139885582772, "learning_rate": 6.936938696483241e-08, "loss": 0.2566, "step": 48061 }, { "epoch": 0.835439517460759, "grad_norm": 1.487718340652544, "learning_rate": 6.935508321067157e-08, "loss": 0.1435, "step": 48062 }, { "epoch": 0.8354568999982618, "grad_norm": 1.6479898432211246, "learning_rate": 6.934078082145729e-08, "loss": 0.1897, "step": 48063 }, { "epoch": 0.8354742825357646, "grad_norm": 1.1836302038578355, "learning_rate": 6.932647979723494e-08, "loss": 0.208, "step": 48064 }, { "epoch": 0.8354916650732674, "grad_norm": 1.7696875649500035, "learning_rate": 6.931218013804968e-08, "loss": 0.1805, "step": 48065 }, { "epoch": 0.8355090476107703, "grad_norm": 0.8045108651239139, "learning_rate": 6.929788184394703e-08, "loss": 0.1691, "step": 48066 }, { "epoch": 0.835526430148273, "grad_norm": 0.8971009696555526, "learning_rate": 6.928358491497227e-08, "loss": 0.1815, "step": 48067 }, { "epoch": 0.8355438126857758, "grad_norm": 3.802075335284201, "learning_rate": 6.926928935117066e-08, "loss": 0.2869, "step": 48068 }, { "epoch": 0.8355611952232787, "grad_norm": 1.7274990733949214, "learning_rate": 6.925499515258743e-08, "loss": 0.1899, "step": 48069 }, { "epoch": 0.8355785777607815, "grad_norm": 1.3430838221683474, "learning_rate": 6.924070231926826e-08, "loss": 0.3113, "step": 48070 }, { "epoch": 0.8355959602982843, "grad_norm": 2.9919826619775596, "learning_rate": 6.922641085125807e-08, "loss": 0.1475, "step": 48071 }, { "epoch": 0.8356133428357871, "grad_norm": 1.0467638524892966, "learning_rate": 6.921212074860228e-08, "loss": 0.2916, "step": 48072 }, { "epoch": 0.83563072537329, "grad_norm": 2.6701878381766457, "learning_rate": 6.919783201134604e-08, "loss": 0.2283, "step": 48073 }, { "epoch": 0.8356481079107928, "grad_norm": 0.8501287846153764, "learning_rate": 6.918354463953491e-08, "loss": 0.2807, "step": 48074 }, { "epoch": 0.8356654904482956, "grad_norm": 1.7698155640187658, "learning_rate": 6.916925863321398e-08, "loss": 0.1807, "step": 48075 }, { "epoch": 0.8356828729857985, "grad_norm": 1.4512867760584978, "learning_rate": 6.91549739924287e-08, "loss": 0.2344, "step": 48076 }, { "epoch": 0.8357002555233013, "grad_norm": 2.91596626449438, "learning_rate": 6.914069071722417e-08, "loss": 0.1535, "step": 48077 }, { "epoch": 0.8357176380608041, "grad_norm": 1.490098433670156, "learning_rate": 6.912640880764575e-08, "loss": 0.2044, "step": 48078 }, { "epoch": 0.835735020598307, "grad_norm": 4.283268240639308, "learning_rate": 6.91121282637387e-08, "loss": 0.2168, "step": 48079 }, { "epoch": 0.8357524031358098, "grad_norm": 0.9147739972975071, "learning_rate": 6.909784908554822e-08, "loss": 0.1291, "step": 48080 }, { "epoch": 0.8357697856733126, "grad_norm": 2.5029283728996647, "learning_rate": 6.908357127311953e-08, "loss": 0.1963, "step": 48081 }, { "epoch": 0.8357871682108154, "grad_norm": 0.8462599441119968, "learning_rate": 6.906929482649809e-08, "loss": 0.1778, "step": 48082 }, { "epoch": 0.8358045507483183, "grad_norm": 1.1307920932705693, "learning_rate": 6.905501974572914e-08, "loss": 0.2155, "step": 48083 }, { "epoch": 0.8358219332858211, "grad_norm": 1.3140907244236237, "learning_rate": 6.904074603085763e-08, "loss": 0.184, "step": 48084 }, { "epoch": 0.8358393158233239, "grad_norm": 1.1742502912542538, "learning_rate": 6.902647368192904e-08, "loss": 0.2779, "step": 48085 }, { "epoch": 0.8358566983608268, "grad_norm": 1.6819265634368263, "learning_rate": 6.901220269898861e-08, "loss": 0.2466, "step": 48086 }, { "epoch": 0.8358740808983295, "grad_norm": 1.1249828388915195, "learning_rate": 6.899793308208147e-08, "loss": 0.2081, "step": 48087 }, { "epoch": 0.8358914634358323, "grad_norm": 1.6013252053557427, "learning_rate": 6.898366483125295e-08, "loss": 0.1371, "step": 48088 }, { "epoch": 0.8359088459733351, "grad_norm": 1.1113090835276427, "learning_rate": 6.896939794654816e-08, "loss": 0.1478, "step": 48089 }, { "epoch": 0.835926228510838, "grad_norm": 1.0844153474577358, "learning_rate": 6.895513242801243e-08, "loss": 0.1192, "step": 48090 }, { "epoch": 0.8359436110483408, "grad_norm": 1.4633835093210228, "learning_rate": 6.89408682756909e-08, "loss": 0.2078, "step": 48091 }, { "epoch": 0.8359609935858436, "grad_norm": 1.2724586786069512, "learning_rate": 6.89266054896287e-08, "loss": 0.1886, "step": 48092 }, { "epoch": 0.8359783761233465, "grad_norm": 1.0414893651180552, "learning_rate": 6.891234406987124e-08, "loss": 0.2471, "step": 48093 }, { "epoch": 0.8359957586608493, "grad_norm": 1.266231084220493, "learning_rate": 6.889808401646363e-08, "loss": 0.2255, "step": 48094 }, { "epoch": 0.8360131411983521, "grad_norm": 0.9095011535266201, "learning_rate": 6.88838253294512e-08, "loss": 0.2339, "step": 48095 }, { "epoch": 0.836030523735855, "grad_norm": 1.9728528017784592, "learning_rate": 6.886956800887872e-08, "loss": 0.2485, "step": 48096 }, { "epoch": 0.8360479062733578, "grad_norm": 1.9519040140833077, "learning_rate": 6.885531205479183e-08, "loss": 0.38, "step": 48097 }, { "epoch": 0.8360652888108606, "grad_norm": 1.0335335224859354, "learning_rate": 6.88410574672355e-08, "loss": 0.1444, "step": 48098 }, { "epoch": 0.8360826713483634, "grad_norm": 1.5459948831627865, "learning_rate": 6.882680424625503e-08, "loss": 0.2322, "step": 48099 }, { "epoch": 0.8361000538858663, "grad_norm": 1.5213066918086642, "learning_rate": 6.881255239189537e-08, "loss": 0.2687, "step": 48100 }, { "epoch": 0.8361174364233691, "grad_norm": 1.434145849129468, "learning_rate": 6.879830190420199e-08, "loss": 0.139, "step": 48101 }, { "epoch": 0.8361348189608719, "grad_norm": 2.122408233366055, "learning_rate": 6.878405278321997e-08, "loss": 0.2711, "step": 48102 }, { "epoch": 0.8361522014983748, "grad_norm": 1.5106618545087136, "learning_rate": 6.876980502899438e-08, "loss": 0.2051, "step": 48103 }, { "epoch": 0.8361695840358776, "grad_norm": 1.311528307041989, "learning_rate": 6.875555864157024e-08, "loss": 0.14, "step": 48104 }, { "epoch": 0.8361869665733804, "grad_norm": 1.2049640889574367, "learning_rate": 6.8741313620993e-08, "loss": 0.175, "step": 48105 }, { "epoch": 0.8362043491108833, "grad_norm": 0.9076425837450378, "learning_rate": 6.872706996730771e-08, "loss": 0.1287, "step": 48106 }, { "epoch": 0.836221731648386, "grad_norm": 1.3369503284762292, "learning_rate": 6.871282768055952e-08, "loss": 0.2901, "step": 48107 }, { "epoch": 0.8362391141858888, "grad_norm": 1.0102204741542702, "learning_rate": 6.869858676079354e-08, "loss": 0.1671, "step": 48108 }, { "epoch": 0.8362564967233916, "grad_norm": 0.886227893285241, "learning_rate": 6.868434720805489e-08, "loss": 0.1477, "step": 48109 }, { "epoch": 0.8362738792608945, "grad_norm": 1.2260847711001994, "learning_rate": 6.867010902238874e-08, "loss": 0.1857, "step": 48110 }, { "epoch": 0.8362912617983973, "grad_norm": 1.3210554701405044, "learning_rate": 6.865587220384012e-08, "loss": 0.1831, "step": 48111 }, { "epoch": 0.8363086443359001, "grad_norm": 1.4031835962554633, "learning_rate": 6.864163675245432e-08, "loss": 0.1445, "step": 48112 }, { "epoch": 0.836326026873403, "grad_norm": 0.9645422363142675, "learning_rate": 6.862740266827638e-08, "loss": 0.2286, "step": 48113 }, { "epoch": 0.8363434094109058, "grad_norm": 2.5285348950056914, "learning_rate": 6.861316995135152e-08, "loss": 0.1925, "step": 48114 }, { "epoch": 0.8363607919484086, "grad_norm": 1.106116260104663, "learning_rate": 6.859893860172456e-08, "loss": 0.176, "step": 48115 }, { "epoch": 0.8363781744859115, "grad_norm": 1.27930776139204, "learning_rate": 6.858470861944094e-08, "loss": 0.2334, "step": 48116 }, { "epoch": 0.8363955570234143, "grad_norm": 1.812654201043465, "learning_rate": 6.857048000454552e-08, "loss": 0.278, "step": 48117 }, { "epoch": 0.8364129395609171, "grad_norm": 1.3213996983425367, "learning_rate": 6.855625275708355e-08, "loss": 0.2672, "step": 48118 }, { "epoch": 0.8364303220984199, "grad_norm": 1.7578269696928648, "learning_rate": 6.854202687709998e-08, "loss": 0.17, "step": 48119 }, { "epoch": 0.8364477046359228, "grad_norm": 1.327966544089984, "learning_rate": 6.852780236464006e-08, "loss": 0.2745, "step": 48120 }, { "epoch": 0.8364650871734256, "grad_norm": 2.0403903350840156, "learning_rate": 6.851357921974893e-08, "loss": 0.1873, "step": 48121 }, { "epoch": 0.8364824697109284, "grad_norm": 0.9632542017689928, "learning_rate": 6.849935744247142e-08, "loss": 0.1691, "step": 48122 }, { "epoch": 0.8364998522484313, "grad_norm": 0.9948982858814918, "learning_rate": 6.848513703285269e-08, "loss": 0.2766, "step": 48123 }, { "epoch": 0.8365172347859341, "grad_norm": 1.1310920625669578, "learning_rate": 6.847091799093791e-08, "loss": 0.1857, "step": 48124 }, { "epoch": 0.8365346173234369, "grad_norm": 1.393014404502584, "learning_rate": 6.845670031677208e-08, "loss": 0.2135, "step": 48125 }, { "epoch": 0.8365519998609398, "grad_norm": 1.2172099524768556, "learning_rate": 6.844248401040026e-08, "loss": 0.2152, "step": 48126 }, { "epoch": 0.8365693823984425, "grad_norm": 1.186844998828714, "learning_rate": 6.84282690718676e-08, "loss": 0.2772, "step": 48127 }, { "epoch": 0.8365867649359453, "grad_norm": 1.3965832324277072, "learning_rate": 6.841405550121904e-08, "loss": 0.247, "step": 48128 }, { "epoch": 0.8366041474734481, "grad_norm": 1.618086792954694, "learning_rate": 6.839984329849963e-08, "loss": 0.231, "step": 48129 }, { "epoch": 0.836621530010951, "grad_norm": 1.5650524755521933, "learning_rate": 6.838563246375451e-08, "loss": 0.1424, "step": 48130 }, { "epoch": 0.8366389125484538, "grad_norm": 0.8813063927786831, "learning_rate": 6.837142299702853e-08, "loss": 0.1733, "step": 48131 }, { "epoch": 0.8366562950859566, "grad_norm": 1.0175927254511714, "learning_rate": 6.835721489836699e-08, "loss": 0.2259, "step": 48132 }, { "epoch": 0.8366736776234595, "grad_norm": 1.2538691932759294, "learning_rate": 6.834300816781485e-08, "loss": 0.1666, "step": 48133 }, { "epoch": 0.8366910601609623, "grad_norm": 1.4054364612147436, "learning_rate": 6.832880280541692e-08, "loss": 0.1685, "step": 48134 }, { "epoch": 0.8367084426984651, "grad_norm": 1.3045778196948699, "learning_rate": 6.831459881121854e-08, "loss": 0.1968, "step": 48135 }, { "epoch": 0.836725825235968, "grad_norm": 1.7171019365715245, "learning_rate": 6.83003961852645e-08, "loss": 0.2568, "step": 48136 }, { "epoch": 0.8367432077734708, "grad_norm": 0.9114781932283477, "learning_rate": 6.828619492759996e-08, "loss": 0.1863, "step": 48137 }, { "epoch": 0.8367605903109736, "grad_norm": 0.9957424758971472, "learning_rate": 6.827199503826986e-08, "loss": 0.11, "step": 48138 }, { "epoch": 0.8367779728484764, "grad_norm": 1.2924065611288036, "learning_rate": 6.825779651731905e-08, "loss": 0.1983, "step": 48139 }, { "epoch": 0.8367953553859793, "grad_norm": 1.0167528563930475, "learning_rate": 6.824359936479302e-08, "loss": 0.1643, "step": 48140 }, { "epoch": 0.8368127379234821, "grad_norm": 2.8476311873396365, "learning_rate": 6.822940358073626e-08, "loss": 0.207, "step": 48141 }, { "epoch": 0.8368301204609849, "grad_norm": 1.7887470619771886, "learning_rate": 6.821520916519385e-08, "loss": 0.2481, "step": 48142 }, { "epoch": 0.8368475029984878, "grad_norm": 2.268136026876758, "learning_rate": 6.8201016118211e-08, "loss": 0.1893, "step": 48143 }, { "epoch": 0.8368648855359906, "grad_norm": 1.8033421008125616, "learning_rate": 6.818682443983259e-08, "loss": 0.1732, "step": 48144 }, { "epoch": 0.8368822680734934, "grad_norm": 1.150322961202354, "learning_rate": 6.817263413010354e-08, "loss": 0.1362, "step": 48145 }, { "epoch": 0.8368996506109962, "grad_norm": 1.3432695725318669, "learning_rate": 6.815844518906893e-08, "loss": 0.1446, "step": 48146 }, { "epoch": 0.836917033148499, "grad_norm": 1.5359188846792662, "learning_rate": 6.814425761677367e-08, "loss": 0.1484, "step": 48147 }, { "epoch": 0.8369344156860018, "grad_norm": 1.6738698106119403, "learning_rate": 6.813007141326271e-08, "loss": 0.254, "step": 48148 }, { "epoch": 0.8369517982235046, "grad_norm": 1.368834073477031, "learning_rate": 6.811588657858103e-08, "loss": 0.1331, "step": 48149 }, { "epoch": 0.8369691807610075, "grad_norm": 1.0686299049437158, "learning_rate": 6.810170311277352e-08, "loss": 0.1629, "step": 48150 }, { "epoch": 0.8369865632985103, "grad_norm": 1.342712817989802, "learning_rate": 6.808752101588533e-08, "loss": 0.1399, "step": 48151 }, { "epoch": 0.8370039458360131, "grad_norm": 1.1967107609670125, "learning_rate": 6.807334028796135e-08, "loss": 0.1057, "step": 48152 }, { "epoch": 0.837021328373516, "grad_norm": 1.6405763905053767, "learning_rate": 6.805916092904646e-08, "loss": 0.3919, "step": 48153 }, { "epoch": 0.8370387109110188, "grad_norm": 1.1426475173308759, "learning_rate": 6.804498293918542e-08, "loss": 0.1682, "step": 48154 }, { "epoch": 0.8370560934485216, "grad_norm": 1.497877783722356, "learning_rate": 6.80308063184235e-08, "loss": 0.1406, "step": 48155 }, { "epoch": 0.8370734759860244, "grad_norm": 1.7052702809168998, "learning_rate": 6.801663106680544e-08, "loss": 0.1921, "step": 48156 }, { "epoch": 0.8370908585235273, "grad_norm": 2.3663238981421064, "learning_rate": 6.80024571843763e-08, "loss": 0.2601, "step": 48157 }, { "epoch": 0.8371082410610301, "grad_norm": 1.9091963742284315, "learning_rate": 6.798828467118078e-08, "loss": 0.1849, "step": 48158 }, { "epoch": 0.8371256235985329, "grad_norm": 1.0757859782851427, "learning_rate": 6.797411352726418e-08, "loss": 0.2025, "step": 48159 }, { "epoch": 0.8371430061360358, "grad_norm": 0.8535669133952449, "learning_rate": 6.795994375267105e-08, "loss": 0.3372, "step": 48160 }, { "epoch": 0.8371603886735386, "grad_norm": 2.136411097508967, "learning_rate": 6.794577534744644e-08, "loss": 0.2777, "step": 48161 }, { "epoch": 0.8371777712110414, "grad_norm": 1.1417247855020667, "learning_rate": 6.793160831163513e-08, "loss": 0.2208, "step": 48162 }, { "epoch": 0.8371951537485443, "grad_norm": 1.0002751249803827, "learning_rate": 6.791744264528231e-08, "loss": 0.2532, "step": 48163 }, { "epoch": 0.8372125362860471, "grad_norm": 1.910770340798625, "learning_rate": 6.790327834843268e-08, "loss": 0.1481, "step": 48164 }, { "epoch": 0.8372299188235499, "grad_norm": 1.6185113957271837, "learning_rate": 6.788911542113118e-08, "loss": 0.205, "step": 48165 }, { "epoch": 0.8372473013610526, "grad_norm": 1.8557086657244684, "learning_rate": 6.787495386342268e-08, "loss": 0.2365, "step": 48166 }, { "epoch": 0.8372646838985555, "grad_norm": 1.5875475052643038, "learning_rate": 6.786079367535208e-08, "loss": 0.1922, "step": 48167 }, { "epoch": 0.8372820664360583, "grad_norm": 1.5678177453612925, "learning_rate": 6.784663485696423e-08, "loss": 0.193, "step": 48168 }, { "epoch": 0.8372994489735611, "grad_norm": 1.251029295483668, "learning_rate": 6.783247740830406e-08, "loss": 0.3634, "step": 48169 }, { "epoch": 0.837316831511064, "grad_norm": 1.188719819419347, "learning_rate": 6.781832132941628e-08, "loss": 0.3125, "step": 48170 }, { "epoch": 0.8373342140485668, "grad_norm": 1.2473682544724798, "learning_rate": 6.780416662034616e-08, "loss": 0.1698, "step": 48171 }, { "epoch": 0.8373515965860696, "grad_norm": 1.6832422990249898, "learning_rate": 6.779001328113814e-08, "loss": 0.2846, "step": 48172 }, { "epoch": 0.8373689791235724, "grad_norm": 1.129204569060071, "learning_rate": 6.777586131183716e-08, "loss": 0.1789, "step": 48173 }, { "epoch": 0.8373863616610753, "grad_norm": 1.6920331345610344, "learning_rate": 6.776171071248821e-08, "loss": 0.2808, "step": 48174 }, { "epoch": 0.8374037441985781, "grad_norm": 4.116042905420506, "learning_rate": 6.774756148313615e-08, "loss": 0.3295, "step": 48175 }, { "epoch": 0.8374211267360809, "grad_norm": 2.4667692041070457, "learning_rate": 6.773341362382573e-08, "loss": 0.2564, "step": 48176 }, { "epoch": 0.8374385092735838, "grad_norm": 1.0034355964143435, "learning_rate": 6.77192671346018e-08, "loss": 0.1216, "step": 48177 }, { "epoch": 0.8374558918110866, "grad_norm": 1.4426796643256072, "learning_rate": 6.770512201550927e-08, "loss": 0.2812, "step": 48178 }, { "epoch": 0.8374732743485894, "grad_norm": 1.9498795743774286, "learning_rate": 6.769097826659287e-08, "loss": 0.2555, "step": 48179 }, { "epoch": 0.8374906568860923, "grad_norm": 0.8466299992258011, "learning_rate": 6.767683588789752e-08, "loss": 0.1488, "step": 48180 }, { "epoch": 0.8375080394235951, "grad_norm": 1.2009138830936805, "learning_rate": 6.766269487946785e-08, "loss": 0.3887, "step": 48181 }, { "epoch": 0.8375254219610979, "grad_norm": 1.2239176757013956, "learning_rate": 6.764855524134899e-08, "loss": 0.2293, "step": 48182 }, { "epoch": 0.8375428044986007, "grad_norm": 0.9741316553959583, "learning_rate": 6.763441697358563e-08, "loss": 0.1472, "step": 48183 }, { "epoch": 0.8375601870361036, "grad_norm": 1.577451411832572, "learning_rate": 6.762028007622262e-08, "loss": 0.1904, "step": 48184 }, { "epoch": 0.8375775695736064, "grad_norm": 1.938417156534759, "learning_rate": 6.760614454930446e-08, "loss": 0.2411, "step": 48185 }, { "epoch": 0.8375949521111091, "grad_norm": 1.770380341686451, "learning_rate": 6.759201039287638e-08, "loss": 0.1197, "step": 48186 }, { "epoch": 0.837612334648612, "grad_norm": 4.372268601206549, "learning_rate": 6.757787760698291e-08, "loss": 0.2118, "step": 48187 }, { "epoch": 0.8376297171861148, "grad_norm": 1.5120743889030297, "learning_rate": 6.756374619166898e-08, "loss": 0.1855, "step": 48188 }, { "epoch": 0.8376470997236176, "grad_norm": 1.3439933903623986, "learning_rate": 6.754961614697924e-08, "loss": 0.2195, "step": 48189 }, { "epoch": 0.8376644822611204, "grad_norm": 1.5767925997840757, "learning_rate": 6.753548747295879e-08, "loss": 0.2182, "step": 48190 }, { "epoch": 0.8376818647986233, "grad_norm": 1.413011415833252, "learning_rate": 6.752136016965204e-08, "loss": 0.1751, "step": 48191 }, { "epoch": 0.8376992473361261, "grad_norm": 1.4761975997137244, "learning_rate": 6.750723423710391e-08, "loss": 0.2358, "step": 48192 }, { "epoch": 0.8377166298736289, "grad_norm": 1.589000085860755, "learning_rate": 6.749310967535904e-08, "loss": 0.178, "step": 48193 }, { "epoch": 0.8377340124111318, "grad_norm": 1.5130560763503933, "learning_rate": 6.747898648446249e-08, "loss": 0.2507, "step": 48194 }, { "epoch": 0.8377513949486346, "grad_norm": 1.3743626290267745, "learning_rate": 6.746486466445889e-08, "loss": 0.2025, "step": 48195 }, { "epoch": 0.8377687774861374, "grad_norm": 1.1295067730826616, "learning_rate": 6.745074421539293e-08, "loss": 0.2146, "step": 48196 }, { "epoch": 0.8377861600236403, "grad_norm": 1.4450930521111809, "learning_rate": 6.743662513730947e-08, "loss": 0.258, "step": 48197 }, { "epoch": 0.8378035425611431, "grad_norm": 1.550511718724687, "learning_rate": 6.742250743025313e-08, "loss": 0.1953, "step": 48198 }, { "epoch": 0.8378209250986459, "grad_norm": 1.200249434306313, "learning_rate": 6.740839109426876e-08, "loss": 0.1889, "step": 48199 }, { "epoch": 0.8378383076361487, "grad_norm": 1.9824984641979064, "learning_rate": 6.739427612940102e-08, "loss": 0.4315, "step": 48200 }, { "epoch": 0.8378556901736516, "grad_norm": 1.2006893140029722, "learning_rate": 6.738016253569473e-08, "loss": 0.1922, "step": 48201 }, { "epoch": 0.8378730727111544, "grad_norm": 1.3708265357299816, "learning_rate": 6.736605031319465e-08, "loss": 0.2769, "step": 48202 }, { "epoch": 0.8378904552486572, "grad_norm": 1.235104462223904, "learning_rate": 6.735193946194556e-08, "loss": 0.1995, "step": 48203 }, { "epoch": 0.8379078377861601, "grad_norm": 1.0965664618429376, "learning_rate": 6.733782998199184e-08, "loss": 0.2243, "step": 48204 }, { "epoch": 0.8379252203236629, "grad_norm": 0.896513160887515, "learning_rate": 6.732372187337854e-08, "loss": 0.2796, "step": 48205 }, { "epoch": 0.8379426028611656, "grad_norm": 1.370632120506136, "learning_rate": 6.73096151361503e-08, "loss": 0.2096, "step": 48206 }, { "epoch": 0.8379599853986684, "grad_norm": 0.7549065485747634, "learning_rate": 6.729550977035186e-08, "loss": 0.2137, "step": 48207 }, { "epoch": 0.8379773679361713, "grad_norm": 0.9502160551505585, "learning_rate": 6.728140577602776e-08, "loss": 0.233, "step": 48208 }, { "epoch": 0.8379947504736741, "grad_norm": 1.5041190346533888, "learning_rate": 6.726730315322298e-08, "loss": 0.1707, "step": 48209 }, { "epoch": 0.8380121330111769, "grad_norm": 1.8206183509642342, "learning_rate": 6.725320190198203e-08, "loss": 0.2282, "step": 48210 }, { "epoch": 0.8380295155486798, "grad_norm": 1.9198253429943735, "learning_rate": 6.723910202234961e-08, "loss": 0.2335, "step": 48211 }, { "epoch": 0.8380468980861826, "grad_norm": 1.2168666233527063, "learning_rate": 6.722500351437038e-08, "loss": 0.1942, "step": 48212 }, { "epoch": 0.8380642806236854, "grad_norm": 1.3665800540117388, "learning_rate": 6.721090637808918e-08, "loss": 0.1446, "step": 48213 }, { "epoch": 0.8380816631611883, "grad_norm": 2.031391293809047, "learning_rate": 6.71968106135506e-08, "loss": 0.2134, "step": 48214 }, { "epoch": 0.8380990456986911, "grad_norm": 1.4847407150411422, "learning_rate": 6.71827162207993e-08, "loss": 0.2574, "step": 48215 }, { "epoch": 0.8381164282361939, "grad_norm": 1.1495532091639618, "learning_rate": 6.716862319987993e-08, "loss": 0.1457, "step": 48216 }, { "epoch": 0.8381338107736968, "grad_norm": 1.0107089789200503, "learning_rate": 6.715453155083728e-08, "loss": 0.1661, "step": 48217 }, { "epoch": 0.8381511933111996, "grad_norm": 1.8444025555622845, "learning_rate": 6.714044127371588e-08, "loss": 0.1874, "step": 48218 }, { "epoch": 0.8381685758487024, "grad_norm": 1.632697877361535, "learning_rate": 6.712635236856045e-08, "loss": 0.1786, "step": 48219 }, { "epoch": 0.8381859583862052, "grad_norm": 0.7874535239736141, "learning_rate": 6.711226483541555e-08, "loss": 0.2442, "step": 48220 }, { "epoch": 0.8382033409237081, "grad_norm": 1.1735112731842618, "learning_rate": 6.709817867432599e-08, "loss": 0.2907, "step": 48221 }, { "epoch": 0.8382207234612109, "grad_norm": 1.0797955934629648, "learning_rate": 6.708409388533648e-08, "loss": 0.0981, "step": 48222 }, { "epoch": 0.8382381059987137, "grad_norm": 1.220554428303033, "learning_rate": 6.707001046849132e-08, "loss": 0.2331, "step": 48223 }, { "epoch": 0.8382554885362166, "grad_norm": 1.2502289553609987, "learning_rate": 6.70559284238354e-08, "loss": 0.1174, "step": 48224 }, { "epoch": 0.8382728710737194, "grad_norm": 1.2932455139143657, "learning_rate": 6.704184775141336e-08, "loss": 0.1454, "step": 48225 }, { "epoch": 0.8382902536112221, "grad_norm": 2.508809204583358, "learning_rate": 6.702776845126979e-08, "loss": 0.2748, "step": 48226 }, { "epoch": 0.8383076361487249, "grad_norm": 2.187231460325588, "learning_rate": 6.70136905234493e-08, "loss": 0.1912, "step": 48227 }, { "epoch": 0.8383250186862278, "grad_norm": 2.3142464199621418, "learning_rate": 6.699961396799647e-08, "loss": 0.1942, "step": 48228 }, { "epoch": 0.8383424012237306, "grad_norm": 2.8742204529491504, "learning_rate": 6.6985538784956e-08, "loss": 0.2416, "step": 48229 }, { "epoch": 0.8383597837612334, "grad_norm": 0.9739456552070774, "learning_rate": 6.697146497437245e-08, "loss": 0.1562, "step": 48230 }, { "epoch": 0.8383771662987363, "grad_norm": 1.1972548210320342, "learning_rate": 6.695739253629035e-08, "loss": 0.1975, "step": 48231 }, { "epoch": 0.8383945488362391, "grad_norm": 1.5390704570108327, "learning_rate": 6.694332147075443e-08, "loss": 0.2283, "step": 48232 }, { "epoch": 0.8384119313737419, "grad_norm": 1.4306642849784146, "learning_rate": 6.692925177780934e-08, "loss": 0.2984, "step": 48233 }, { "epoch": 0.8384293139112448, "grad_norm": 1.1483071061136367, "learning_rate": 6.691518345749964e-08, "loss": 0.2104, "step": 48234 }, { "epoch": 0.8384466964487476, "grad_norm": 1.7561060510985753, "learning_rate": 6.690111650986968e-08, "loss": 0.2927, "step": 48235 }, { "epoch": 0.8384640789862504, "grad_norm": 1.4227874345779767, "learning_rate": 6.688705093496433e-08, "loss": 0.1443, "step": 48236 }, { "epoch": 0.8384814615237532, "grad_norm": 0.9331474349901726, "learning_rate": 6.687298673282805e-08, "loss": 0.2383, "step": 48237 }, { "epoch": 0.8384988440612561, "grad_norm": 2.01310756704948, "learning_rate": 6.685892390350544e-08, "loss": 0.2443, "step": 48238 }, { "epoch": 0.8385162265987589, "grad_norm": 2.715789231063646, "learning_rate": 6.684486244704102e-08, "loss": 0.1723, "step": 48239 }, { "epoch": 0.8385336091362617, "grad_norm": 1.203064526392933, "learning_rate": 6.683080236347943e-08, "loss": 0.1974, "step": 48240 }, { "epoch": 0.8385509916737646, "grad_norm": 0.9520408477321488, "learning_rate": 6.681674365286538e-08, "loss": 0.2521, "step": 48241 }, { "epoch": 0.8385683742112674, "grad_norm": 1.4063199609559005, "learning_rate": 6.680268631524316e-08, "loss": 0.17, "step": 48242 }, { "epoch": 0.8385857567487702, "grad_norm": 1.1339533664130594, "learning_rate": 6.678863035065734e-08, "loss": 0.1295, "step": 48243 }, { "epoch": 0.8386031392862731, "grad_norm": 2.723112471316029, "learning_rate": 6.677457575915262e-08, "loss": 0.299, "step": 48244 }, { "epoch": 0.8386205218237759, "grad_norm": 1.7452883674111621, "learning_rate": 6.676052254077352e-08, "loss": 0.3709, "step": 48245 }, { "epoch": 0.8386379043612786, "grad_norm": 2.026770016113858, "learning_rate": 6.674647069556455e-08, "loss": 0.3109, "step": 48246 }, { "epoch": 0.8386552868987814, "grad_norm": 1.2946532580655767, "learning_rate": 6.673242022357022e-08, "loss": 0.1988, "step": 48247 }, { "epoch": 0.8386726694362843, "grad_norm": 1.732453646079874, "learning_rate": 6.671837112483508e-08, "loss": 0.2451, "step": 48248 }, { "epoch": 0.8386900519737871, "grad_norm": 1.5091194231544118, "learning_rate": 6.670432339940374e-08, "loss": 0.2188, "step": 48249 }, { "epoch": 0.8387074345112899, "grad_norm": 2.2632718950704587, "learning_rate": 6.66902770473206e-08, "loss": 0.196, "step": 48250 }, { "epoch": 0.8387248170487928, "grad_norm": 1.775439557277872, "learning_rate": 6.66762320686301e-08, "loss": 0.2438, "step": 48251 }, { "epoch": 0.8387421995862956, "grad_norm": 1.12312335221834, "learning_rate": 6.666218846337706e-08, "loss": 0.0796, "step": 48252 }, { "epoch": 0.8387595821237984, "grad_norm": 1.4972121124914524, "learning_rate": 6.664814623160592e-08, "loss": 0.2684, "step": 48253 }, { "epoch": 0.8387769646613012, "grad_norm": 1.1724943012953026, "learning_rate": 6.663410537336084e-08, "loss": 0.1467, "step": 48254 }, { "epoch": 0.8387943471988041, "grad_norm": 1.6428207992069157, "learning_rate": 6.662006588868674e-08, "loss": 0.1695, "step": 48255 }, { "epoch": 0.8388117297363069, "grad_norm": 1.5415719678874726, "learning_rate": 6.660602777762791e-08, "loss": 0.139, "step": 48256 }, { "epoch": 0.8388291122738097, "grad_norm": 0.9455542614747631, "learning_rate": 6.659199104022889e-08, "loss": 0.1716, "step": 48257 }, { "epoch": 0.8388464948113126, "grad_norm": 1.440598497848401, "learning_rate": 6.657795567653419e-08, "loss": 0.2587, "step": 48258 }, { "epoch": 0.8388638773488154, "grad_norm": 1.137999664460818, "learning_rate": 6.656392168658814e-08, "loss": 0.2273, "step": 48259 }, { "epoch": 0.8388812598863182, "grad_norm": 1.2861132513879496, "learning_rate": 6.654988907043557e-08, "loss": 0.2056, "step": 48260 }, { "epoch": 0.8388986424238211, "grad_norm": 0.753264338290245, "learning_rate": 6.653585782812065e-08, "loss": 0.2649, "step": 48261 }, { "epoch": 0.8389160249613239, "grad_norm": 0.748450652522337, "learning_rate": 6.65218279596878e-08, "loss": 0.2642, "step": 48262 }, { "epoch": 0.8389334074988267, "grad_norm": 2.313797501019839, "learning_rate": 6.650779946518176e-08, "loss": 0.1913, "step": 48263 }, { "epoch": 0.8389507900363296, "grad_norm": 1.35920721387927, "learning_rate": 6.649377234464687e-08, "loss": 0.1791, "step": 48264 }, { "epoch": 0.8389681725738324, "grad_norm": 1.616819478399196, "learning_rate": 6.64797465981276e-08, "loss": 0.2019, "step": 48265 }, { "epoch": 0.8389855551113351, "grad_norm": 1.3531553066827713, "learning_rate": 6.646572222566838e-08, "loss": 0.1616, "step": 48266 }, { "epoch": 0.8390029376488379, "grad_norm": 1.4494889750799314, "learning_rate": 6.645169922731369e-08, "loss": 0.1976, "step": 48267 }, { "epoch": 0.8390203201863408, "grad_norm": 1.706164513929486, "learning_rate": 6.643767760310793e-08, "loss": 0.1634, "step": 48268 }, { "epoch": 0.8390377027238436, "grad_norm": 1.9143811981569456, "learning_rate": 6.642365735309557e-08, "loss": 0.3908, "step": 48269 }, { "epoch": 0.8390550852613464, "grad_norm": 3.0857130197866693, "learning_rate": 6.640963847732095e-08, "loss": 0.2944, "step": 48270 }, { "epoch": 0.8390724677988493, "grad_norm": 0.9786635175783212, "learning_rate": 6.639562097582868e-08, "loss": 0.2808, "step": 48271 }, { "epoch": 0.8390898503363521, "grad_norm": 4.3974154848678895, "learning_rate": 6.638160484866329e-08, "loss": 0.3446, "step": 48272 }, { "epoch": 0.8391072328738549, "grad_norm": 2.8138375585646647, "learning_rate": 6.636759009586884e-08, "loss": 0.3307, "step": 48273 }, { "epoch": 0.8391246154113577, "grad_norm": 0.9879053754814728, "learning_rate": 6.635357671748987e-08, "loss": 0.2893, "step": 48274 }, { "epoch": 0.8391419979488606, "grad_norm": 1.5756729773175964, "learning_rate": 6.633956471357093e-08, "loss": 0.2343, "step": 48275 }, { "epoch": 0.8391593804863634, "grad_norm": 1.0207851536762034, "learning_rate": 6.632555408415641e-08, "loss": 0.257, "step": 48276 }, { "epoch": 0.8391767630238662, "grad_norm": 0.9489093654289777, "learning_rate": 6.631154482929063e-08, "loss": 0.1372, "step": 48277 }, { "epoch": 0.8391941455613691, "grad_norm": 1.8888218679031887, "learning_rate": 6.629753694901796e-08, "loss": 0.2075, "step": 48278 }, { "epoch": 0.8392115280988719, "grad_norm": 1.376727748537895, "learning_rate": 6.628353044338309e-08, "loss": 0.2403, "step": 48279 }, { "epoch": 0.8392289106363747, "grad_norm": 2.0590720714562023, "learning_rate": 6.626952531243002e-08, "loss": 0.2608, "step": 48280 }, { "epoch": 0.8392462931738776, "grad_norm": 1.734211015514376, "learning_rate": 6.62555215562034e-08, "loss": 0.1568, "step": 48281 }, { "epoch": 0.8392636757113804, "grad_norm": 3.9672710875963997, "learning_rate": 6.624151917474735e-08, "loss": 0.3275, "step": 48282 }, { "epoch": 0.8392810582488832, "grad_norm": 1.092316081462326, "learning_rate": 6.622751816810657e-08, "loss": 0.1545, "step": 48283 }, { "epoch": 0.839298440786386, "grad_norm": 1.1988523305526702, "learning_rate": 6.621351853632534e-08, "loss": 0.2108, "step": 48284 }, { "epoch": 0.8393158233238889, "grad_norm": 0.9666198369904851, "learning_rate": 6.619952027944791e-08, "loss": 0.2145, "step": 48285 }, { "epoch": 0.8393332058613916, "grad_norm": 1.3947331234170974, "learning_rate": 6.618552339751882e-08, "loss": 0.2467, "step": 48286 }, { "epoch": 0.8393505883988944, "grad_norm": 1.4987059884151208, "learning_rate": 6.617152789058228e-08, "loss": 0.1853, "step": 48287 }, { "epoch": 0.8393679709363973, "grad_norm": 0.9869813348756605, "learning_rate": 6.615753375868272e-08, "loss": 0.1277, "step": 48288 }, { "epoch": 0.8393853534739001, "grad_norm": 1.059304419474994, "learning_rate": 6.614354100186448e-08, "loss": 0.1416, "step": 48289 }, { "epoch": 0.8394027360114029, "grad_norm": 1.1665148630115587, "learning_rate": 6.612954962017186e-08, "loss": 0.1748, "step": 48290 }, { "epoch": 0.8394201185489057, "grad_norm": 11.597581921906682, "learning_rate": 6.611555961364945e-08, "loss": 0.1516, "step": 48291 }, { "epoch": 0.8394375010864086, "grad_norm": 1.780909848597575, "learning_rate": 6.610157098234125e-08, "loss": 0.2863, "step": 48292 }, { "epoch": 0.8394548836239114, "grad_norm": 2.7722861284521767, "learning_rate": 6.60875837262917e-08, "loss": 0.2816, "step": 48293 }, { "epoch": 0.8394722661614142, "grad_norm": 0.6854368926332681, "learning_rate": 6.607359784554529e-08, "loss": 0.1401, "step": 48294 }, { "epoch": 0.8394896486989171, "grad_norm": 1.2023465143525076, "learning_rate": 6.605961334014626e-08, "loss": 0.1723, "step": 48295 }, { "epoch": 0.8395070312364199, "grad_norm": 0.9051788857222289, "learning_rate": 6.604563021013887e-08, "loss": 0.1709, "step": 48296 }, { "epoch": 0.8395244137739227, "grad_norm": 1.4330615318749196, "learning_rate": 6.603164845556742e-08, "loss": 0.1988, "step": 48297 }, { "epoch": 0.8395417963114256, "grad_norm": 1.6962876505685764, "learning_rate": 6.601766807647652e-08, "loss": 0.1955, "step": 48298 }, { "epoch": 0.8395591788489284, "grad_norm": 2.969215530665399, "learning_rate": 6.600368907291009e-08, "loss": 0.2425, "step": 48299 }, { "epoch": 0.8395765613864312, "grad_norm": 1.190826028386037, "learning_rate": 6.598971144491267e-08, "loss": 0.3035, "step": 48300 }, { "epoch": 0.839593943923934, "grad_norm": 1.4020065045012673, "learning_rate": 6.597573519252836e-08, "loss": 0.1866, "step": 48301 }, { "epoch": 0.8396113264614369, "grad_norm": 0.7577199524951648, "learning_rate": 6.59617603158017e-08, "loss": 0.1971, "step": 48302 }, { "epoch": 0.8396287089989397, "grad_norm": 1.9041718917503898, "learning_rate": 6.594778681477692e-08, "loss": 0.2225, "step": 48303 }, { "epoch": 0.8396460915364425, "grad_norm": 1.5531282349561315, "learning_rate": 6.593381468949821e-08, "loss": 0.1742, "step": 48304 }, { "epoch": 0.8396634740739453, "grad_norm": 0.9408447903851587, "learning_rate": 6.591984394000994e-08, "loss": 0.1381, "step": 48305 }, { "epoch": 0.8396808566114481, "grad_norm": 2.3936267786773726, "learning_rate": 6.590587456635638e-08, "loss": 0.3252, "step": 48306 }, { "epoch": 0.8396982391489509, "grad_norm": 1.1901356099501705, "learning_rate": 6.589190656858174e-08, "loss": 0.2224, "step": 48307 }, { "epoch": 0.8397156216864538, "grad_norm": 2.3954146396953417, "learning_rate": 6.587793994673041e-08, "loss": 0.2431, "step": 48308 }, { "epoch": 0.8397330042239566, "grad_norm": 1.2294185944466591, "learning_rate": 6.586397470084643e-08, "loss": 0.1344, "step": 48309 }, { "epoch": 0.8397503867614594, "grad_norm": 1.186025545475202, "learning_rate": 6.585001083097447e-08, "loss": 0.198, "step": 48310 }, { "epoch": 0.8397677692989622, "grad_norm": 1.3381497709587826, "learning_rate": 6.583604833715839e-08, "loss": 0.2641, "step": 48311 }, { "epoch": 0.8397851518364651, "grad_norm": 1.2073244023268137, "learning_rate": 6.582208721944255e-08, "loss": 0.1806, "step": 48312 }, { "epoch": 0.8398025343739679, "grad_norm": 2.8095830788129676, "learning_rate": 6.580812747787135e-08, "loss": 0.1739, "step": 48313 }, { "epoch": 0.8398199169114707, "grad_norm": 1.2879334870911459, "learning_rate": 6.579416911248892e-08, "loss": 0.1826, "step": 48314 }, { "epoch": 0.8398372994489736, "grad_norm": 0.9301004327397866, "learning_rate": 6.578021212333951e-08, "loss": 0.1527, "step": 48315 }, { "epoch": 0.8398546819864764, "grad_norm": 1.3867324863303183, "learning_rate": 6.576625651046735e-08, "loss": 0.1404, "step": 48316 }, { "epoch": 0.8398720645239792, "grad_norm": 1.7047875183006174, "learning_rate": 6.57523022739167e-08, "loss": 0.1306, "step": 48317 }, { "epoch": 0.839889447061482, "grad_norm": 1.8136933863848634, "learning_rate": 6.57383494137318e-08, "loss": 0.2223, "step": 48318 }, { "epoch": 0.8399068295989849, "grad_norm": 1.51737157929344, "learning_rate": 6.572439792995682e-08, "loss": 0.2176, "step": 48319 }, { "epoch": 0.8399242121364877, "grad_norm": 0.8516619502143015, "learning_rate": 6.57104478226359e-08, "loss": 0.1735, "step": 48320 }, { "epoch": 0.8399415946739905, "grad_norm": 1.2426720844846046, "learning_rate": 6.569649909181347e-08, "loss": 0.2521, "step": 48321 }, { "epoch": 0.8399589772114934, "grad_norm": 1.6898546417706803, "learning_rate": 6.568255173753362e-08, "loss": 0.2285, "step": 48322 }, { "epoch": 0.8399763597489962, "grad_norm": 3.0382005641115812, "learning_rate": 6.566860575984074e-08, "loss": 0.1646, "step": 48323 }, { "epoch": 0.839993742286499, "grad_norm": 1.6445315028333063, "learning_rate": 6.56546611587786e-08, "loss": 0.1441, "step": 48324 }, { "epoch": 0.8400111248240018, "grad_norm": 1.5103976914846948, "learning_rate": 6.56407179343918e-08, "loss": 0.2272, "step": 48325 }, { "epoch": 0.8400285073615046, "grad_norm": 1.3590428943791273, "learning_rate": 6.562677608672435e-08, "loss": 0.2204, "step": 48326 }, { "epoch": 0.8400458898990074, "grad_norm": 1.2090627474059965, "learning_rate": 6.561283561582054e-08, "loss": 0.1852, "step": 48327 }, { "epoch": 0.8400632724365102, "grad_norm": 2.2473728070633934, "learning_rate": 6.559889652172434e-08, "loss": 0.21, "step": 48328 }, { "epoch": 0.8400806549740131, "grad_norm": 1.2573712098274614, "learning_rate": 6.558495880448034e-08, "loss": 0.1329, "step": 48329 }, { "epoch": 0.8400980375115159, "grad_norm": 1.3547441110095135, "learning_rate": 6.557102246413232e-08, "loss": 0.1904, "step": 48330 }, { "epoch": 0.8401154200490187, "grad_norm": 2.1328835224631346, "learning_rate": 6.555708750072465e-08, "loss": 0.2568, "step": 48331 }, { "epoch": 0.8401328025865216, "grad_norm": 1.200430935875711, "learning_rate": 6.554315391430132e-08, "loss": 0.2142, "step": 48332 }, { "epoch": 0.8401501851240244, "grad_norm": 2.827512623764973, "learning_rate": 6.55292217049067e-08, "loss": 0.3105, "step": 48333 }, { "epoch": 0.8401675676615272, "grad_norm": 1.742265897307511, "learning_rate": 6.55152908725849e-08, "loss": 0.1621, "step": 48334 }, { "epoch": 0.8401849501990301, "grad_norm": 4.04288470635751, "learning_rate": 6.550136141738005e-08, "loss": 0.4808, "step": 48335 }, { "epoch": 0.8402023327365329, "grad_norm": 1.2129935242255618, "learning_rate": 6.548743333933627e-08, "loss": 0.2309, "step": 48336 }, { "epoch": 0.8402197152740357, "grad_norm": 1.702579883731668, "learning_rate": 6.547350663849771e-08, "loss": 0.1951, "step": 48337 }, { "epoch": 0.8402370978115385, "grad_norm": 2.441879983512689, "learning_rate": 6.545958131490858e-08, "loss": 0.1658, "step": 48338 }, { "epoch": 0.8402544803490414, "grad_norm": 2.183972767652985, "learning_rate": 6.54456573686129e-08, "loss": 0.2126, "step": 48339 }, { "epoch": 0.8402718628865442, "grad_norm": 1.3125888936865289, "learning_rate": 6.543173479965481e-08, "loss": 0.1488, "step": 48340 }, { "epoch": 0.840289245424047, "grad_norm": 2.097276534382364, "learning_rate": 6.541781360807858e-08, "loss": 0.2247, "step": 48341 }, { "epoch": 0.8403066279615499, "grad_norm": 2.2744341252143756, "learning_rate": 6.54038937939284e-08, "loss": 0.2389, "step": 48342 }, { "epoch": 0.8403240104990527, "grad_norm": 1.5007155199006925, "learning_rate": 6.5389975357248e-08, "loss": 0.1727, "step": 48343 }, { "epoch": 0.8403413930365555, "grad_norm": 1.171705091051804, "learning_rate": 6.537605829808179e-08, "loss": 0.1194, "step": 48344 }, { "epoch": 0.8403587755740582, "grad_norm": 2.2974444820168465, "learning_rate": 6.536214261647389e-08, "loss": 0.2972, "step": 48345 }, { "epoch": 0.8403761581115611, "grad_norm": 0.9708963911750406, "learning_rate": 6.534822831246834e-08, "loss": 0.1704, "step": 48346 }, { "epoch": 0.8403935406490639, "grad_norm": 1.7311328842997709, "learning_rate": 6.533431538610923e-08, "loss": 0.1872, "step": 48347 }, { "epoch": 0.8404109231865667, "grad_norm": 0.9705520190588492, "learning_rate": 6.532040383744064e-08, "loss": 0.17, "step": 48348 }, { "epoch": 0.8404283057240696, "grad_norm": 1.3080074902339045, "learning_rate": 6.530649366650675e-08, "loss": 0.1878, "step": 48349 }, { "epoch": 0.8404456882615724, "grad_norm": 1.9653415513292776, "learning_rate": 6.529258487335155e-08, "loss": 0.1875, "step": 48350 }, { "epoch": 0.8404630707990752, "grad_norm": 1.946034618276067, "learning_rate": 6.527867745801907e-08, "loss": 0.1788, "step": 48351 }, { "epoch": 0.8404804533365781, "grad_norm": 0.9548853059453016, "learning_rate": 6.526477142055359e-08, "loss": 0.1661, "step": 48352 }, { "epoch": 0.8404978358740809, "grad_norm": 1.2921040804498098, "learning_rate": 6.52508667609991e-08, "loss": 0.2489, "step": 48353 }, { "epoch": 0.8405152184115837, "grad_norm": 0.8556690581767649, "learning_rate": 6.523696347939978e-08, "loss": 0.1591, "step": 48354 }, { "epoch": 0.8405326009490866, "grad_norm": 0.9534402228271208, "learning_rate": 6.522306157579932e-08, "loss": 0.1255, "step": 48355 }, { "epoch": 0.8405499834865894, "grad_norm": 1.0765334991985995, "learning_rate": 6.520916105024216e-08, "loss": 0.1356, "step": 48356 }, { "epoch": 0.8405673660240922, "grad_norm": 1.1691212648769702, "learning_rate": 6.519526190277224e-08, "loss": 0.2129, "step": 48357 }, { "epoch": 0.840584748561595, "grad_norm": 1.255845980184336, "learning_rate": 6.518136413343355e-08, "loss": 0.1653, "step": 48358 }, { "epoch": 0.8406021310990979, "grad_norm": 1.4731459447660737, "learning_rate": 6.516746774227017e-08, "loss": 0.1893, "step": 48359 }, { "epoch": 0.8406195136366007, "grad_norm": 1.6220687378465926, "learning_rate": 6.515357272932626e-08, "loss": 0.1453, "step": 48360 }, { "epoch": 0.8406368961741035, "grad_norm": 1.7014333239969055, "learning_rate": 6.513967909464585e-08, "loss": 0.2009, "step": 48361 }, { "epoch": 0.8406542787116064, "grad_norm": 1.2544706826059098, "learning_rate": 6.51257868382728e-08, "loss": 0.2495, "step": 48362 }, { "epoch": 0.8406716612491092, "grad_norm": 0.9482430257686765, "learning_rate": 6.511189596025113e-08, "loss": 0.1322, "step": 48363 }, { "epoch": 0.840689043786612, "grad_norm": 1.474071410079222, "learning_rate": 6.509800646062508e-08, "loss": 0.1526, "step": 48364 }, { "epoch": 0.8407064263241147, "grad_norm": 1.737241864966463, "learning_rate": 6.508411833943861e-08, "loss": 0.1984, "step": 48365 }, { "epoch": 0.8407238088616176, "grad_norm": 2.202906182710123, "learning_rate": 6.507023159673564e-08, "loss": 0.2567, "step": 48366 }, { "epoch": 0.8407411913991204, "grad_norm": 1.609265962688202, "learning_rate": 6.505634623256029e-08, "loss": 0.2276, "step": 48367 }, { "epoch": 0.8407585739366232, "grad_norm": 3.2417495013422832, "learning_rate": 6.504246224695653e-08, "loss": 0.1908, "step": 48368 }, { "epoch": 0.8407759564741261, "grad_norm": 1.2662202461305296, "learning_rate": 6.502857963996833e-08, "loss": 0.1673, "step": 48369 }, { "epoch": 0.8407933390116289, "grad_norm": 1.4312181188228068, "learning_rate": 6.501469841163975e-08, "loss": 0.1404, "step": 48370 }, { "epoch": 0.8408107215491317, "grad_norm": 1.8476847485333256, "learning_rate": 6.500081856201467e-08, "loss": 0.1766, "step": 48371 }, { "epoch": 0.8408281040866346, "grad_norm": 3.453059561729076, "learning_rate": 6.498694009113726e-08, "loss": 0.2346, "step": 48372 }, { "epoch": 0.8408454866241374, "grad_norm": 1.4140563951878309, "learning_rate": 6.497306299905148e-08, "loss": 0.2114, "step": 48373 }, { "epoch": 0.8408628691616402, "grad_norm": 0.9735146870511568, "learning_rate": 6.495918728580113e-08, "loss": 0.2023, "step": 48374 }, { "epoch": 0.840880251699143, "grad_norm": 1.1246227036421392, "learning_rate": 6.494531295143035e-08, "loss": 0.1394, "step": 48375 }, { "epoch": 0.8408976342366459, "grad_norm": 4.84221533539606, "learning_rate": 6.49314399959831e-08, "loss": 0.2318, "step": 48376 }, { "epoch": 0.8409150167741487, "grad_norm": 1.8937107300619422, "learning_rate": 6.491756841950336e-08, "loss": 0.1868, "step": 48377 }, { "epoch": 0.8409323993116515, "grad_norm": 2.131830202899245, "learning_rate": 6.490369822203501e-08, "loss": 0.2084, "step": 48378 }, { "epoch": 0.8409497818491544, "grad_norm": 1.0556193580285473, "learning_rate": 6.488982940362203e-08, "loss": 0.1346, "step": 48379 }, { "epoch": 0.8409671643866572, "grad_norm": 1.4707885965327305, "learning_rate": 6.487596196430861e-08, "loss": 0.1238, "step": 48380 }, { "epoch": 0.84098454692416, "grad_norm": 0.9595547890949236, "learning_rate": 6.486209590413838e-08, "loss": 0.2001, "step": 48381 }, { "epoch": 0.8410019294616629, "grad_norm": 1.5541178008240777, "learning_rate": 6.484823122315531e-08, "loss": 0.2465, "step": 48382 }, { "epoch": 0.8410193119991657, "grad_norm": 1.9287494915284602, "learning_rate": 6.483436792140357e-08, "loss": 0.2303, "step": 48383 }, { "epoch": 0.8410366945366685, "grad_norm": 2.8114916039941136, "learning_rate": 6.482050599892702e-08, "loss": 0.2369, "step": 48384 }, { "epoch": 0.8410540770741712, "grad_norm": 2.4584262746492924, "learning_rate": 6.480664545576953e-08, "loss": 0.195, "step": 48385 }, { "epoch": 0.8410714596116741, "grad_norm": 1.1168916472962949, "learning_rate": 6.479278629197504e-08, "loss": 0.1727, "step": 48386 }, { "epoch": 0.8410888421491769, "grad_norm": 1.5220964596191, "learning_rate": 6.477892850758754e-08, "loss": 0.1282, "step": 48387 }, { "epoch": 0.8411062246866797, "grad_norm": 0.8765240759232859, "learning_rate": 6.476507210265092e-08, "loss": 0.1548, "step": 48388 }, { "epoch": 0.8411236072241826, "grad_norm": 1.296635545349213, "learning_rate": 6.475121707720904e-08, "loss": 0.1489, "step": 48389 }, { "epoch": 0.8411409897616854, "grad_norm": 0.9473479643870694, "learning_rate": 6.473736343130581e-08, "loss": 0.1101, "step": 48390 }, { "epoch": 0.8411583722991882, "grad_norm": 1.401248423600964, "learning_rate": 6.472351116498525e-08, "loss": 0.136, "step": 48391 }, { "epoch": 0.841175754836691, "grad_norm": 1.114530177054882, "learning_rate": 6.470966027829139e-08, "loss": 0.1505, "step": 48392 }, { "epoch": 0.8411931373741939, "grad_norm": 1.9281006007643444, "learning_rate": 6.469581077126767e-08, "loss": 0.2933, "step": 48393 }, { "epoch": 0.8412105199116967, "grad_norm": 1.1039867942087909, "learning_rate": 6.468196264395836e-08, "loss": 0.1951, "step": 48394 }, { "epoch": 0.8412279024491995, "grad_norm": 2.946391100065629, "learning_rate": 6.46681158964073e-08, "loss": 0.2732, "step": 48395 }, { "epoch": 0.8412452849867024, "grad_norm": 3.0098133028007537, "learning_rate": 6.465427052865835e-08, "loss": 0.2336, "step": 48396 }, { "epoch": 0.8412626675242052, "grad_norm": 2.285096260138393, "learning_rate": 6.464042654075536e-08, "loss": 0.391, "step": 48397 }, { "epoch": 0.841280050061708, "grad_norm": 2.0628440529885186, "learning_rate": 6.462658393274212e-08, "loss": 0.2754, "step": 48398 }, { "epoch": 0.8412974325992109, "grad_norm": 1.2867135771913663, "learning_rate": 6.461274270466288e-08, "loss": 0.1608, "step": 48399 }, { "epoch": 0.8413148151367137, "grad_norm": 4.486391141334042, "learning_rate": 6.459890285656106e-08, "loss": 0.232, "step": 48400 }, { "epoch": 0.8413321976742165, "grad_norm": 1.557886618024139, "learning_rate": 6.458506438848066e-08, "loss": 0.1621, "step": 48401 }, { "epoch": 0.8413495802117194, "grad_norm": 1.568030624771222, "learning_rate": 6.457122730046566e-08, "loss": 0.1498, "step": 48402 }, { "epoch": 0.8413669627492222, "grad_norm": 1.5047105621972638, "learning_rate": 6.45573915925599e-08, "loss": 0.1074, "step": 48403 }, { "epoch": 0.841384345286725, "grad_norm": 1.3304444198401393, "learning_rate": 6.454355726480715e-08, "loss": 0.2198, "step": 48404 }, { "epoch": 0.8414017278242277, "grad_norm": 1.1827183418026899, "learning_rate": 6.45297243172513e-08, "loss": 0.1685, "step": 48405 }, { "epoch": 0.8414191103617306, "grad_norm": 2.071135452959063, "learning_rate": 6.451589274993613e-08, "loss": 0.2189, "step": 48406 }, { "epoch": 0.8414364928992334, "grad_norm": 0.8002380923018599, "learning_rate": 6.450206256290558e-08, "loss": 0.1374, "step": 48407 }, { "epoch": 0.8414538754367362, "grad_norm": 0.8940621358393038, "learning_rate": 6.448823375620349e-08, "loss": 0.189, "step": 48408 }, { "epoch": 0.841471257974239, "grad_norm": 1.0800296917341068, "learning_rate": 6.447440632987344e-08, "loss": 0.2198, "step": 48409 }, { "epoch": 0.8414886405117419, "grad_norm": 1.4428830913287054, "learning_rate": 6.446058028395963e-08, "loss": 0.1636, "step": 48410 }, { "epoch": 0.8415060230492447, "grad_norm": 2.0205701056956284, "learning_rate": 6.444675561850582e-08, "loss": 0.1986, "step": 48411 }, { "epoch": 0.8415234055867475, "grad_norm": 1.3439570607841964, "learning_rate": 6.443293233355556e-08, "loss": 0.1252, "step": 48412 }, { "epoch": 0.8415407881242504, "grad_norm": 2.125861869219669, "learning_rate": 6.441911042915277e-08, "loss": 0.2497, "step": 48413 }, { "epoch": 0.8415581706617532, "grad_norm": 1.8738243859638648, "learning_rate": 6.440528990534139e-08, "loss": 0.2264, "step": 48414 }, { "epoch": 0.841575553199256, "grad_norm": 1.7025934784065229, "learning_rate": 6.439147076216511e-08, "loss": 0.1837, "step": 48415 }, { "epoch": 0.8415929357367589, "grad_norm": 1.100264847949471, "learning_rate": 6.437765299966785e-08, "loss": 0.1773, "step": 48416 }, { "epoch": 0.8416103182742617, "grad_norm": 2.8313095803405677, "learning_rate": 6.436383661789313e-08, "loss": 0.3568, "step": 48417 }, { "epoch": 0.8416277008117645, "grad_norm": 1.3939626967139098, "learning_rate": 6.435002161688519e-08, "loss": 0.1873, "step": 48418 }, { "epoch": 0.8416450833492674, "grad_norm": 1.1207463980608137, "learning_rate": 6.433620799668743e-08, "loss": 0.1936, "step": 48419 }, { "epoch": 0.8416624658867702, "grad_norm": 1.4454201125132857, "learning_rate": 6.432239575734383e-08, "loss": 0.1824, "step": 48420 }, { "epoch": 0.841679848424273, "grad_norm": 1.291884249246171, "learning_rate": 6.43085848988979e-08, "loss": 0.2784, "step": 48421 }, { "epoch": 0.8416972309617758, "grad_norm": 1.5909022755890703, "learning_rate": 6.429477542139378e-08, "loss": 0.1497, "step": 48422 }, { "epoch": 0.8417146134992787, "grad_norm": 1.2183483098959969, "learning_rate": 6.428096732487508e-08, "loss": 0.2501, "step": 48423 }, { "epoch": 0.8417319960367815, "grad_norm": 1.226293493190274, "learning_rate": 6.426716060938553e-08, "loss": 0.1571, "step": 48424 }, { "epoch": 0.8417493785742842, "grad_norm": 1.0873771690162062, "learning_rate": 6.425335527496895e-08, "loss": 0.1694, "step": 48425 }, { "epoch": 0.841766761111787, "grad_norm": 1.655202842045397, "learning_rate": 6.423955132166903e-08, "loss": 0.194, "step": 48426 }, { "epoch": 0.8417841436492899, "grad_norm": 0.9737654481954945, "learning_rate": 6.422574874952958e-08, "loss": 0.2826, "step": 48427 }, { "epoch": 0.8418015261867927, "grad_norm": 1.9461806176549603, "learning_rate": 6.421194755859433e-08, "loss": 0.3107, "step": 48428 }, { "epoch": 0.8418189087242955, "grad_norm": 1.2304630333109565, "learning_rate": 6.419814774890691e-08, "loss": 0.1379, "step": 48429 }, { "epoch": 0.8418362912617984, "grad_norm": 1.6083459542103637, "learning_rate": 6.418434932051143e-08, "loss": 0.1379, "step": 48430 }, { "epoch": 0.8418536737993012, "grad_norm": 1.0678397217232907, "learning_rate": 6.41705522734512e-08, "loss": 0.2805, "step": 48431 }, { "epoch": 0.841871056336804, "grad_norm": 1.5494610009757208, "learning_rate": 6.415675660777004e-08, "loss": 0.1835, "step": 48432 }, { "epoch": 0.8418884388743069, "grad_norm": 2.181674897058136, "learning_rate": 6.414296232351185e-08, "loss": 0.2029, "step": 48433 }, { "epoch": 0.8419058214118097, "grad_norm": 1.5782917181015235, "learning_rate": 6.412916942072022e-08, "loss": 0.2671, "step": 48434 }, { "epoch": 0.8419232039493125, "grad_norm": 1.212438578683037, "learning_rate": 6.411537789943894e-08, "loss": 0.1213, "step": 48435 }, { "epoch": 0.8419405864868154, "grad_norm": 2.4116817447292704, "learning_rate": 6.41015877597117e-08, "loss": 0.2925, "step": 48436 }, { "epoch": 0.8419579690243182, "grad_norm": 1.2486244105607232, "learning_rate": 6.408779900158212e-08, "loss": 0.1605, "step": 48437 }, { "epoch": 0.841975351561821, "grad_norm": 1.4826317478433357, "learning_rate": 6.407401162509402e-08, "loss": 0.2079, "step": 48438 }, { "epoch": 0.8419927340993238, "grad_norm": 1.4272507693834127, "learning_rate": 6.406022563029106e-08, "loss": 0.2203, "step": 48439 }, { "epoch": 0.8420101166368267, "grad_norm": 1.1041407085153152, "learning_rate": 6.404644101721685e-08, "loss": 0.0984, "step": 48440 }, { "epoch": 0.8420274991743295, "grad_norm": 1.7626336298686356, "learning_rate": 6.403265778591521e-08, "loss": 0.236, "step": 48441 }, { "epoch": 0.8420448817118323, "grad_norm": 1.6178930383104866, "learning_rate": 6.401887593642979e-08, "loss": 0.1914, "step": 48442 }, { "epoch": 0.8420622642493352, "grad_norm": 1.2443567109860882, "learning_rate": 6.40050954688044e-08, "loss": 0.1494, "step": 48443 }, { "epoch": 0.8420796467868379, "grad_norm": 3.1274175398777726, "learning_rate": 6.399131638308237e-08, "loss": 0.2553, "step": 48444 }, { "epoch": 0.8420970293243407, "grad_norm": 1.5408605148606236, "learning_rate": 6.397753867930761e-08, "loss": 0.2212, "step": 48445 }, { "epoch": 0.8421144118618435, "grad_norm": 1.6779224891333124, "learning_rate": 6.396376235752382e-08, "loss": 0.1936, "step": 48446 }, { "epoch": 0.8421317943993464, "grad_norm": 0.9779175745307515, "learning_rate": 6.394998741777463e-08, "loss": 0.0966, "step": 48447 }, { "epoch": 0.8421491769368492, "grad_norm": 1.1381254138796577, "learning_rate": 6.39362138601035e-08, "loss": 0.2062, "step": 48448 }, { "epoch": 0.842166559474352, "grad_norm": 1.25489959537923, "learning_rate": 6.392244168455451e-08, "loss": 0.1587, "step": 48449 }, { "epoch": 0.8421839420118549, "grad_norm": 1.710655517829211, "learning_rate": 6.390867089117097e-08, "loss": 0.2055, "step": 48450 }, { "epoch": 0.8422013245493577, "grad_norm": 1.1966165276768816, "learning_rate": 6.389490147999659e-08, "loss": 0.1687, "step": 48451 }, { "epoch": 0.8422187070868605, "grad_norm": 2.1138463906430394, "learning_rate": 6.388113345107499e-08, "loss": 0.1909, "step": 48452 }, { "epoch": 0.8422360896243634, "grad_norm": 1.7100900594911506, "learning_rate": 6.386736680444993e-08, "loss": 0.1606, "step": 48453 }, { "epoch": 0.8422534721618662, "grad_norm": 1.7453269683417638, "learning_rate": 6.385360154016495e-08, "loss": 0.1848, "step": 48454 }, { "epoch": 0.842270854699369, "grad_norm": 1.8269653902957588, "learning_rate": 6.383983765826373e-08, "loss": 0.2126, "step": 48455 }, { "epoch": 0.8422882372368719, "grad_norm": 1.7893494925889033, "learning_rate": 6.382607515878985e-08, "loss": 0.1356, "step": 48456 }, { "epoch": 0.8423056197743747, "grad_norm": 1.724172083426907, "learning_rate": 6.381231404178694e-08, "loss": 0.19, "step": 48457 }, { "epoch": 0.8423230023118775, "grad_norm": 4.047555138576135, "learning_rate": 6.37985543072987e-08, "loss": 0.3209, "step": 48458 }, { "epoch": 0.8423403848493803, "grad_norm": 0.8950697390726153, "learning_rate": 6.378479595536862e-08, "loss": 0.1606, "step": 48459 }, { "epoch": 0.8423577673868832, "grad_norm": 0.9627445815112439, "learning_rate": 6.377103898604025e-08, "loss": 0.1323, "step": 48460 }, { "epoch": 0.842375149924386, "grad_norm": 1.2580306630032154, "learning_rate": 6.375728339935737e-08, "loss": 0.1573, "step": 48461 }, { "epoch": 0.8423925324618888, "grad_norm": 1.7805041561171744, "learning_rate": 6.374352919536363e-08, "loss": 0.1493, "step": 48462 }, { "epoch": 0.8424099149993917, "grad_norm": 2.9211323856216778, "learning_rate": 6.372977637410232e-08, "loss": 0.1927, "step": 48463 }, { "epoch": 0.8424272975368944, "grad_norm": 0.77979367972322, "learning_rate": 6.371602493561734e-08, "loss": 0.1361, "step": 48464 }, { "epoch": 0.8424446800743972, "grad_norm": 1.3173614563018945, "learning_rate": 6.370227487995211e-08, "loss": 0.2071, "step": 48465 }, { "epoch": 0.8424620626119, "grad_norm": 1.898683570308063, "learning_rate": 6.368852620715026e-08, "loss": 0.2181, "step": 48466 }, { "epoch": 0.8424794451494029, "grad_norm": 1.1788430578020659, "learning_rate": 6.367477891725531e-08, "loss": 0.1548, "step": 48467 }, { "epoch": 0.8424968276869057, "grad_norm": 1.0555228362950182, "learning_rate": 6.366103301031095e-08, "loss": 0.2034, "step": 48468 }, { "epoch": 0.8425142102244085, "grad_norm": 0.9642466956092564, "learning_rate": 6.364728848636064e-08, "loss": 0.1575, "step": 48469 }, { "epoch": 0.8425315927619114, "grad_norm": 1.5303495449541065, "learning_rate": 6.363354534544802e-08, "loss": 0.1156, "step": 48470 }, { "epoch": 0.8425489752994142, "grad_norm": 1.2377314298973214, "learning_rate": 6.361980358761642e-08, "loss": 0.1863, "step": 48471 }, { "epoch": 0.842566357836917, "grad_norm": 1.285422266783284, "learning_rate": 6.360606321290979e-08, "loss": 0.1662, "step": 48472 }, { "epoch": 0.8425837403744199, "grad_norm": 2.2490228617011008, "learning_rate": 6.35923242213714e-08, "loss": 0.1963, "step": 48473 }, { "epoch": 0.8426011229119227, "grad_norm": 2.268850810242591, "learning_rate": 6.357858661304504e-08, "loss": 0.1769, "step": 48474 }, { "epoch": 0.8426185054494255, "grad_norm": 1.4292263390115414, "learning_rate": 6.356485038797382e-08, "loss": 0.2355, "step": 48475 }, { "epoch": 0.8426358879869283, "grad_norm": 1.5159217599133448, "learning_rate": 6.355111554620168e-08, "loss": 0.1605, "step": 48476 }, { "epoch": 0.8426532705244312, "grad_norm": 1.4394558164794224, "learning_rate": 6.353738208777205e-08, "loss": 0.1521, "step": 48477 }, { "epoch": 0.842670653061934, "grad_norm": 1.2076502690234914, "learning_rate": 6.352365001272836e-08, "loss": 0.1661, "step": 48478 }, { "epoch": 0.8426880355994368, "grad_norm": 1.1818926144157562, "learning_rate": 6.35099193211141e-08, "loss": 0.1585, "step": 48479 }, { "epoch": 0.8427054181369397, "grad_norm": 1.4746799339333452, "learning_rate": 6.349619001297301e-08, "loss": 0.1458, "step": 48480 }, { "epoch": 0.8427228006744425, "grad_norm": 1.568837254056081, "learning_rate": 6.348246208834861e-08, "loss": 0.1123, "step": 48481 }, { "epoch": 0.8427401832119453, "grad_norm": 1.0853921943321494, "learning_rate": 6.346873554728404e-08, "loss": 0.1421, "step": 48482 }, { "epoch": 0.8427575657494482, "grad_norm": 2.1433796436906825, "learning_rate": 6.34550103898232e-08, "loss": 0.1708, "step": 48483 }, { "epoch": 0.8427749482869509, "grad_norm": 2.3785830646821555, "learning_rate": 6.344128661600939e-08, "loss": 0.2481, "step": 48484 }, { "epoch": 0.8427923308244537, "grad_norm": 1.898905874742327, "learning_rate": 6.342756422588619e-08, "loss": 0.1369, "step": 48485 }, { "epoch": 0.8428097133619565, "grad_norm": 0.9502696577525048, "learning_rate": 6.341384321949705e-08, "loss": 0.2641, "step": 48486 }, { "epoch": 0.8428270958994594, "grad_norm": 1.4467711230408282, "learning_rate": 6.340012359688552e-08, "loss": 0.1779, "step": 48487 }, { "epoch": 0.8428444784369622, "grad_norm": 1.3590852557064321, "learning_rate": 6.338640535809503e-08, "loss": 0.2747, "step": 48488 }, { "epoch": 0.842861860974465, "grad_norm": 1.4507347874542098, "learning_rate": 6.337268850316902e-08, "loss": 0.1833, "step": 48489 }, { "epoch": 0.8428792435119679, "grad_norm": 1.2333401022123034, "learning_rate": 6.335897303215099e-08, "loss": 0.172, "step": 48490 }, { "epoch": 0.8428966260494707, "grad_norm": 1.1824487408465099, "learning_rate": 6.334525894508447e-08, "loss": 0.1161, "step": 48491 }, { "epoch": 0.8429140085869735, "grad_norm": 0.8718933103680448, "learning_rate": 6.333154624201293e-08, "loss": 0.1131, "step": 48492 }, { "epoch": 0.8429313911244763, "grad_norm": 1.15492075484761, "learning_rate": 6.331783492297992e-08, "loss": 0.371, "step": 48493 }, { "epoch": 0.8429487736619792, "grad_norm": 2.125270529988637, "learning_rate": 6.330412498802851e-08, "loss": 0.2753, "step": 48494 }, { "epoch": 0.842966156199482, "grad_norm": 1.7118444741693122, "learning_rate": 6.329041643720256e-08, "loss": 0.2344, "step": 48495 }, { "epoch": 0.8429835387369848, "grad_norm": 0.9077802381027213, "learning_rate": 6.32767092705454e-08, "loss": 0.1626, "step": 48496 }, { "epoch": 0.8430009212744877, "grad_norm": 1.5511039668946593, "learning_rate": 6.326300348810049e-08, "loss": 0.177, "step": 48497 }, { "epoch": 0.8430183038119905, "grad_norm": 1.474173226886971, "learning_rate": 6.324929908991105e-08, "loss": 0.1408, "step": 48498 }, { "epoch": 0.8430356863494933, "grad_norm": 1.8842458135214173, "learning_rate": 6.323559607602086e-08, "loss": 0.1776, "step": 48499 }, { "epoch": 0.8430530688869962, "grad_norm": 0.966987802008956, "learning_rate": 6.32218944464733e-08, "loss": 0.2541, "step": 48500 }, { "epoch": 0.843070451424499, "grad_norm": 2.5864234250906533, "learning_rate": 6.320819420131158e-08, "loss": 0.2281, "step": 48501 }, { "epoch": 0.8430878339620018, "grad_norm": 3.9650403238331027, "learning_rate": 6.319449534057914e-08, "loss": 0.4605, "step": 48502 }, { "epoch": 0.8431052164995047, "grad_norm": 1.6457419507609883, "learning_rate": 6.318079786431957e-08, "loss": 0.1864, "step": 48503 }, { "epoch": 0.8431225990370074, "grad_norm": 1.6950443671377837, "learning_rate": 6.316710177257628e-08, "loss": 0.2889, "step": 48504 }, { "epoch": 0.8431399815745102, "grad_norm": 3.4508165311023165, "learning_rate": 6.315340706539258e-08, "loss": 0.2536, "step": 48505 }, { "epoch": 0.843157364112013, "grad_norm": 2.5657063183350193, "learning_rate": 6.313971374281186e-08, "loss": 0.1697, "step": 48506 }, { "epoch": 0.8431747466495159, "grad_norm": 0.5923816969311041, "learning_rate": 6.312602180487764e-08, "loss": 0.1571, "step": 48507 }, { "epoch": 0.8431921291870187, "grad_norm": 1.6133472372200675, "learning_rate": 6.311233125163317e-08, "loss": 0.174, "step": 48508 }, { "epoch": 0.8432095117245215, "grad_norm": 1.1286561979374765, "learning_rate": 6.309864208312199e-08, "loss": 0.2027, "step": 48509 }, { "epoch": 0.8432268942620244, "grad_norm": 2.245932594973843, "learning_rate": 6.308495429938732e-08, "loss": 0.2073, "step": 48510 }, { "epoch": 0.8432442767995272, "grad_norm": 1.3395983915935874, "learning_rate": 6.307126790047268e-08, "loss": 0.1518, "step": 48511 }, { "epoch": 0.84326165933703, "grad_norm": 2.2947883823695334, "learning_rate": 6.30575828864216e-08, "loss": 0.2427, "step": 48512 }, { "epoch": 0.8432790418745328, "grad_norm": 1.4701283631990192, "learning_rate": 6.304389925727699e-08, "loss": 0.1941, "step": 48513 }, { "epoch": 0.8432964244120357, "grad_norm": 1.5100780248533094, "learning_rate": 6.303021701308264e-08, "loss": 0.2164, "step": 48514 }, { "epoch": 0.8433138069495385, "grad_norm": 1.2102648743762037, "learning_rate": 6.301653615388174e-08, "loss": 0.1418, "step": 48515 }, { "epoch": 0.8433311894870413, "grad_norm": 0.9443848848185792, "learning_rate": 6.300285667971771e-08, "loss": 0.1629, "step": 48516 }, { "epoch": 0.8433485720245442, "grad_norm": 2.9315788509472727, "learning_rate": 6.298917859063391e-08, "loss": 0.2328, "step": 48517 }, { "epoch": 0.843365954562047, "grad_norm": 1.1477313846697486, "learning_rate": 6.29755018866735e-08, "loss": 0.1496, "step": 48518 }, { "epoch": 0.8433833370995498, "grad_norm": 1.5172339175387068, "learning_rate": 6.296182656788029e-08, "loss": 0.2449, "step": 48519 }, { "epoch": 0.8434007196370527, "grad_norm": 1.0014690199852854, "learning_rate": 6.294815263429715e-08, "loss": 0.2695, "step": 48520 }, { "epoch": 0.8434181021745555, "grad_norm": 5.64641217122436, "learning_rate": 6.293448008596752e-08, "loss": 0.3421, "step": 48521 }, { "epoch": 0.8434354847120583, "grad_norm": 1.0996920901777203, "learning_rate": 6.292080892293494e-08, "loss": 0.2177, "step": 48522 }, { "epoch": 0.8434528672495611, "grad_norm": 1.2366535590800587, "learning_rate": 6.290713914524259e-08, "loss": 0.1852, "step": 48523 }, { "epoch": 0.8434702497870639, "grad_norm": 0.9792694919036345, "learning_rate": 6.289347075293382e-08, "loss": 0.2383, "step": 48524 }, { "epoch": 0.8434876323245667, "grad_norm": 1.1910031050321006, "learning_rate": 6.287980374605201e-08, "loss": 0.1812, "step": 48525 }, { "epoch": 0.8435050148620695, "grad_norm": 1.3712321868496102, "learning_rate": 6.286613812464037e-08, "loss": 0.2983, "step": 48526 }, { "epoch": 0.8435223973995724, "grad_norm": 1.7368890618263602, "learning_rate": 6.285247388874231e-08, "loss": 0.1591, "step": 48527 }, { "epoch": 0.8435397799370752, "grad_norm": 1.026058478392969, "learning_rate": 6.283881103840111e-08, "loss": 0.1447, "step": 48528 }, { "epoch": 0.843557162474578, "grad_norm": 1.843742735620341, "learning_rate": 6.282514957365993e-08, "loss": 0.1552, "step": 48529 }, { "epoch": 0.8435745450120808, "grad_norm": 0.9866026554866156, "learning_rate": 6.281148949456228e-08, "loss": 0.1612, "step": 48530 }, { "epoch": 0.8435919275495837, "grad_norm": 1.5622661876858979, "learning_rate": 6.279783080115152e-08, "loss": 0.1822, "step": 48531 }, { "epoch": 0.8436093100870865, "grad_norm": 1.7077543863470592, "learning_rate": 6.278417349347071e-08, "loss": 0.2189, "step": 48532 }, { "epoch": 0.8436266926245893, "grad_norm": 2.3177231970265892, "learning_rate": 6.27705175715631e-08, "loss": 0.245, "step": 48533 }, { "epoch": 0.8436440751620922, "grad_norm": 0.6378608943436365, "learning_rate": 6.275686303547217e-08, "loss": 0.2072, "step": 48534 }, { "epoch": 0.843661457699595, "grad_norm": 2.4284652439358383, "learning_rate": 6.274320988524117e-08, "loss": 0.2558, "step": 48535 }, { "epoch": 0.8436788402370978, "grad_norm": 2.8347243271725264, "learning_rate": 6.272955812091335e-08, "loss": 0.2251, "step": 48536 }, { "epoch": 0.8436962227746007, "grad_norm": 2.1833170856418866, "learning_rate": 6.271590774253183e-08, "loss": 0.3274, "step": 48537 }, { "epoch": 0.8437136053121035, "grad_norm": 1.1224350820852191, "learning_rate": 6.270225875014024e-08, "loss": 0.1972, "step": 48538 }, { "epoch": 0.8437309878496063, "grad_norm": 2.986562295600446, "learning_rate": 6.268861114378154e-08, "loss": 0.135, "step": 48539 }, { "epoch": 0.8437483703871091, "grad_norm": 2.0530331440160663, "learning_rate": 6.267496492349899e-08, "loss": 0.2122, "step": 48540 }, { "epoch": 0.843765752924612, "grad_norm": 1.2563653085238222, "learning_rate": 6.266132008933583e-08, "loss": 0.1261, "step": 48541 }, { "epoch": 0.8437831354621148, "grad_norm": 1.8701205502160065, "learning_rate": 6.264767664133552e-08, "loss": 0.2379, "step": 48542 }, { "epoch": 0.8438005179996176, "grad_norm": 1.1021138194746718, "learning_rate": 6.263403457954114e-08, "loss": 0.255, "step": 48543 }, { "epoch": 0.8438179005371204, "grad_norm": 1.2646653871673799, "learning_rate": 6.262039390399598e-08, "loss": 0.1463, "step": 48544 }, { "epoch": 0.8438352830746232, "grad_norm": 1.0337638361679966, "learning_rate": 6.26067546147433e-08, "loss": 0.2139, "step": 48545 }, { "epoch": 0.843852665612126, "grad_norm": 1.0472807048172548, "learning_rate": 6.259311671182621e-08, "loss": 0.1303, "step": 48546 }, { "epoch": 0.8438700481496288, "grad_norm": 1.112574838404821, "learning_rate": 6.257948019528803e-08, "loss": 0.1369, "step": 48547 }, { "epoch": 0.8438874306871317, "grad_norm": 1.108852613603369, "learning_rate": 6.256584506517198e-08, "loss": 0.2007, "step": 48548 }, { "epoch": 0.8439048132246345, "grad_norm": 1.270391308366485, "learning_rate": 6.255221132152116e-08, "loss": 0.2278, "step": 48549 }, { "epoch": 0.8439221957621373, "grad_norm": 2.2204901032978155, "learning_rate": 6.253857896437908e-08, "loss": 0.2807, "step": 48550 }, { "epoch": 0.8439395782996402, "grad_norm": 1.1202418859945107, "learning_rate": 6.252494799378866e-08, "loss": 0.1846, "step": 48551 }, { "epoch": 0.843956960837143, "grad_norm": 1.154230705897889, "learning_rate": 6.251131840979307e-08, "loss": 0.2315, "step": 48552 }, { "epoch": 0.8439743433746458, "grad_norm": 2.5357948304725086, "learning_rate": 6.249769021243573e-08, "loss": 0.1976, "step": 48553 }, { "epoch": 0.8439917259121487, "grad_norm": 2.452085278223627, "learning_rate": 6.248406340175977e-08, "loss": 0.1986, "step": 48554 }, { "epoch": 0.8440091084496515, "grad_norm": 1.1033887990729443, "learning_rate": 6.247043797780832e-08, "loss": 0.136, "step": 48555 }, { "epoch": 0.8440264909871543, "grad_norm": 2.0671130688917447, "learning_rate": 6.245681394062463e-08, "loss": 0.1967, "step": 48556 }, { "epoch": 0.8440438735246572, "grad_norm": 1.6333382091621194, "learning_rate": 6.244319129025183e-08, "loss": 0.1488, "step": 48557 }, { "epoch": 0.84406125606216, "grad_norm": 2.677754848185436, "learning_rate": 6.24295700267331e-08, "loss": 0.2252, "step": 48558 }, { "epoch": 0.8440786385996628, "grad_norm": 1.0936107917456908, "learning_rate": 6.241595015011164e-08, "loss": 0.2209, "step": 48559 }, { "epoch": 0.8440960211371656, "grad_norm": 1.460391534562361, "learning_rate": 6.240233166043052e-08, "loss": 0.1679, "step": 48560 }, { "epoch": 0.8441134036746685, "grad_norm": 3.137172509271992, "learning_rate": 6.238871455773309e-08, "loss": 0.2968, "step": 48561 }, { "epoch": 0.8441307862121713, "grad_norm": 1.5006805199548727, "learning_rate": 6.237509884206244e-08, "loss": 0.256, "step": 48562 }, { "epoch": 0.8441481687496741, "grad_norm": 1.2148533135994186, "learning_rate": 6.236148451346173e-08, "loss": 0.1441, "step": 48563 }, { "epoch": 0.8441655512871769, "grad_norm": 1.785516252233542, "learning_rate": 6.234787157197396e-08, "loss": 0.4198, "step": 48564 }, { "epoch": 0.8441829338246797, "grad_norm": 1.6419852221300193, "learning_rate": 6.233426001764241e-08, "loss": 0.1576, "step": 48565 }, { "epoch": 0.8442003163621825, "grad_norm": 1.0425172353476755, "learning_rate": 6.232064985051027e-08, "loss": 0.2317, "step": 48566 }, { "epoch": 0.8442176988996853, "grad_norm": 1.249805863949001, "learning_rate": 6.230704107062062e-08, "loss": 0.1762, "step": 48567 }, { "epoch": 0.8442350814371882, "grad_norm": 3.3236320682622127, "learning_rate": 6.229343367801648e-08, "loss": 0.2226, "step": 48568 }, { "epoch": 0.844252463974691, "grad_norm": 1.3901380363106177, "learning_rate": 6.227982767274126e-08, "loss": 0.1697, "step": 48569 }, { "epoch": 0.8442698465121938, "grad_norm": 1.8735504347435195, "learning_rate": 6.226622305483786e-08, "loss": 0.2158, "step": 48570 }, { "epoch": 0.8442872290496967, "grad_norm": 2.6217154190539826, "learning_rate": 6.225261982434943e-08, "loss": 0.2477, "step": 48571 }, { "epoch": 0.8443046115871995, "grad_norm": 1.5707082689877345, "learning_rate": 6.223901798131897e-08, "loss": 0.2518, "step": 48572 }, { "epoch": 0.8443219941247023, "grad_norm": 1.5069387612837748, "learning_rate": 6.222541752578991e-08, "loss": 0.1699, "step": 48573 }, { "epoch": 0.8443393766622052, "grad_norm": 1.7787137121651961, "learning_rate": 6.22118184578051e-08, "loss": 0.1594, "step": 48574 }, { "epoch": 0.844356759199708, "grad_norm": 1.8081852896588233, "learning_rate": 6.219822077740777e-08, "loss": 0.1833, "step": 48575 }, { "epoch": 0.8443741417372108, "grad_norm": 2.384078808096105, "learning_rate": 6.218462448464096e-08, "loss": 0.2072, "step": 48576 }, { "epoch": 0.8443915242747136, "grad_norm": 1.5039785422604086, "learning_rate": 6.217102957954778e-08, "loss": 0.2015, "step": 48577 }, { "epoch": 0.8444089068122165, "grad_norm": 0.8837535772245207, "learning_rate": 6.215743606217133e-08, "loss": 0.1463, "step": 48578 }, { "epoch": 0.8444262893497193, "grad_norm": 2.0731227682073787, "learning_rate": 6.214384393255456e-08, "loss": 0.1932, "step": 48579 }, { "epoch": 0.8444436718872221, "grad_norm": 1.6731323502039042, "learning_rate": 6.213025319074077e-08, "loss": 0.213, "step": 48580 }, { "epoch": 0.844461054424725, "grad_norm": 1.1123786681969947, "learning_rate": 6.211666383677294e-08, "loss": 0.2669, "step": 48581 }, { "epoch": 0.8444784369622278, "grad_norm": 2.0889823041540776, "learning_rate": 6.210307587069425e-08, "loss": 0.2214, "step": 48582 }, { "epoch": 0.8444958194997305, "grad_norm": 1.7685867101534933, "learning_rate": 6.208948929254743e-08, "loss": 0.2018, "step": 48583 }, { "epoch": 0.8445132020372333, "grad_norm": 1.5672931716867915, "learning_rate": 6.20759041023759e-08, "loss": 0.2377, "step": 48584 }, { "epoch": 0.8445305845747362, "grad_norm": 0.941115167748488, "learning_rate": 6.206232030022257e-08, "loss": 0.2657, "step": 48585 }, { "epoch": 0.844547967112239, "grad_norm": 1.798630482818509, "learning_rate": 6.204873788613047e-08, "loss": 0.2374, "step": 48586 }, { "epoch": 0.8445653496497418, "grad_norm": 1.3110770169841672, "learning_rate": 6.203515686014266e-08, "loss": 0.1466, "step": 48587 }, { "epoch": 0.8445827321872447, "grad_norm": 1.59496555078414, "learning_rate": 6.202157722230239e-08, "loss": 0.2028, "step": 48588 }, { "epoch": 0.8446001147247475, "grad_norm": 1.5396858142824328, "learning_rate": 6.200799897265241e-08, "loss": 0.1251, "step": 48589 }, { "epoch": 0.8446174972622503, "grad_norm": 1.5271673953410647, "learning_rate": 6.19944221112359e-08, "loss": 0.1355, "step": 48590 }, { "epoch": 0.8446348797997532, "grad_norm": 1.0190471979675257, "learning_rate": 6.198084663809577e-08, "loss": 0.1403, "step": 48591 }, { "epoch": 0.844652262337256, "grad_norm": 1.3473520442376694, "learning_rate": 6.196727255327527e-08, "loss": 0.125, "step": 48592 }, { "epoch": 0.8446696448747588, "grad_norm": 1.0442895572818978, "learning_rate": 6.195369985681726e-08, "loss": 0.1851, "step": 48593 }, { "epoch": 0.8446870274122616, "grad_norm": 1.0999053667075305, "learning_rate": 6.194012854876479e-08, "loss": 0.1151, "step": 48594 }, { "epoch": 0.8447044099497645, "grad_norm": 2.2805730649909353, "learning_rate": 6.192655862916091e-08, "loss": 0.2705, "step": 48595 }, { "epoch": 0.8447217924872673, "grad_norm": 1.5223651791276118, "learning_rate": 6.191299009804863e-08, "loss": 0.2107, "step": 48596 }, { "epoch": 0.8447391750247701, "grad_norm": 0.8949107248234642, "learning_rate": 6.18994229554709e-08, "loss": 0.1552, "step": 48597 }, { "epoch": 0.844756557562273, "grad_norm": 1.8519143382290941, "learning_rate": 6.188585720147077e-08, "loss": 0.1512, "step": 48598 }, { "epoch": 0.8447739400997758, "grad_norm": 1.6782361759719475, "learning_rate": 6.187229283609108e-08, "loss": 0.164, "step": 48599 }, { "epoch": 0.8447913226372786, "grad_norm": 1.409711046993669, "learning_rate": 6.185872985937507e-08, "loss": 0.1637, "step": 48600 }, { "epoch": 0.8448087051747815, "grad_norm": 1.5017506249283552, "learning_rate": 6.184516827136577e-08, "loss": 0.1768, "step": 48601 }, { "epoch": 0.8448260877122843, "grad_norm": 2.273661400698869, "learning_rate": 6.183160807210575e-08, "loss": 0.1788, "step": 48602 }, { "epoch": 0.844843470249787, "grad_norm": 1.6059610619475932, "learning_rate": 6.181804926163842e-08, "loss": 0.1827, "step": 48603 }, { "epoch": 0.8448608527872898, "grad_norm": 1.7596271710411484, "learning_rate": 6.180449184000653e-08, "loss": 0.22, "step": 48604 }, { "epoch": 0.8448782353247927, "grad_norm": 1.3307194767359145, "learning_rate": 6.179093580725315e-08, "loss": 0.1551, "step": 48605 }, { "epoch": 0.8448956178622955, "grad_norm": 0.9245524783745822, "learning_rate": 6.177738116342118e-08, "loss": 0.1077, "step": 48606 }, { "epoch": 0.8449130003997983, "grad_norm": 1.3806300292738365, "learning_rate": 6.176382790855367e-08, "loss": 0.1994, "step": 48607 }, { "epoch": 0.8449303829373012, "grad_norm": 1.6234945329565582, "learning_rate": 6.175027604269345e-08, "loss": 0.2884, "step": 48608 }, { "epoch": 0.844947765474804, "grad_norm": 1.1579539720130025, "learning_rate": 6.173672556588355e-08, "loss": 0.2318, "step": 48609 }, { "epoch": 0.8449651480123068, "grad_norm": 1.1028817563124627, "learning_rate": 6.172317647816683e-08, "loss": 0.1564, "step": 48610 }, { "epoch": 0.8449825305498097, "grad_norm": 2.2368911710397303, "learning_rate": 6.170962877958635e-08, "loss": 0.2024, "step": 48611 }, { "epoch": 0.8449999130873125, "grad_norm": 2.248554537688092, "learning_rate": 6.16960824701851e-08, "loss": 0.2333, "step": 48612 }, { "epoch": 0.8450172956248153, "grad_norm": 1.6694870995334574, "learning_rate": 6.168253755000597e-08, "loss": 0.107, "step": 48613 }, { "epoch": 0.8450346781623181, "grad_norm": 1.2892370334789718, "learning_rate": 6.16689940190917e-08, "loss": 0.1502, "step": 48614 }, { "epoch": 0.845052060699821, "grad_norm": 1.9040197631645668, "learning_rate": 6.165545187748544e-08, "loss": 0.1681, "step": 48615 }, { "epoch": 0.8450694432373238, "grad_norm": 1.3685115726205084, "learning_rate": 6.164191112522998e-08, "loss": 0.1679, "step": 48616 }, { "epoch": 0.8450868257748266, "grad_norm": 1.455606856304484, "learning_rate": 6.16283717623684e-08, "loss": 0.1283, "step": 48617 }, { "epoch": 0.8451042083123295, "grad_norm": 2.3731893382395604, "learning_rate": 6.161483378894334e-08, "loss": 0.4076, "step": 48618 }, { "epoch": 0.8451215908498323, "grad_norm": 2.147217334472756, "learning_rate": 6.160129720499802e-08, "loss": 0.3244, "step": 48619 }, { "epoch": 0.8451389733873351, "grad_norm": 2.400427260785678, "learning_rate": 6.158776201057525e-08, "loss": 0.2782, "step": 48620 }, { "epoch": 0.845156355924838, "grad_norm": 1.3618458727070448, "learning_rate": 6.157422820571783e-08, "loss": 0.2019, "step": 48621 }, { "epoch": 0.8451737384623408, "grad_norm": 1.4671679394544122, "learning_rate": 6.156069579046857e-08, "loss": 0.2145, "step": 48622 }, { "epoch": 0.8451911209998435, "grad_norm": 1.2252233340102334, "learning_rate": 6.154716476487065e-08, "loss": 0.177, "step": 48623 }, { "epoch": 0.8452085035373463, "grad_norm": 1.4009494280046384, "learning_rate": 6.153363512896681e-08, "loss": 0.0906, "step": 48624 }, { "epoch": 0.8452258860748492, "grad_norm": 1.0875631357273206, "learning_rate": 6.15201068827999e-08, "loss": 0.2576, "step": 48625 }, { "epoch": 0.845243268612352, "grad_norm": 1.0754914192455167, "learning_rate": 6.150658002641285e-08, "loss": 0.154, "step": 48626 }, { "epoch": 0.8452606511498548, "grad_norm": 1.8161042024930683, "learning_rate": 6.14930545598485e-08, "loss": 0.2362, "step": 48627 }, { "epoch": 0.8452780336873577, "grad_norm": 1.1736656471090523, "learning_rate": 6.147953048314974e-08, "loss": 0.1734, "step": 48628 }, { "epoch": 0.8452954162248605, "grad_norm": 3.271041084191084, "learning_rate": 6.146600779635941e-08, "loss": 0.2733, "step": 48629 }, { "epoch": 0.8453127987623633, "grad_norm": 1.2092204990477253, "learning_rate": 6.145248649952028e-08, "loss": 0.1458, "step": 48630 }, { "epoch": 0.8453301812998661, "grad_norm": 2.5277993408827215, "learning_rate": 6.143896659267545e-08, "loss": 0.2276, "step": 48631 }, { "epoch": 0.845347563837369, "grad_norm": 1.0400297816555137, "learning_rate": 6.142544807586774e-08, "loss": 0.2293, "step": 48632 }, { "epoch": 0.8453649463748718, "grad_norm": 2.5733580689944104, "learning_rate": 6.141193094913972e-08, "loss": 0.2717, "step": 48633 }, { "epoch": 0.8453823289123746, "grad_norm": 1.0986077275287771, "learning_rate": 6.139841521253446e-08, "loss": 0.1654, "step": 48634 }, { "epoch": 0.8453997114498775, "grad_norm": 1.7124362889058398, "learning_rate": 6.138490086609477e-08, "loss": 0.2805, "step": 48635 }, { "epoch": 0.8454170939873803, "grad_norm": 1.3787338835146303, "learning_rate": 6.13713879098634e-08, "loss": 0.1957, "step": 48636 }, { "epoch": 0.8454344765248831, "grad_norm": 1.8225458415719484, "learning_rate": 6.135787634388335e-08, "loss": 0.1963, "step": 48637 }, { "epoch": 0.845451859062386, "grad_norm": 1.6541843717522868, "learning_rate": 6.134436616819716e-08, "loss": 0.2463, "step": 48638 }, { "epoch": 0.8454692415998888, "grad_norm": 1.8119886470363258, "learning_rate": 6.133085738284805e-08, "loss": 0.2796, "step": 48639 }, { "epoch": 0.8454866241373916, "grad_norm": 1.2358232395999653, "learning_rate": 6.131734998787846e-08, "loss": 0.2617, "step": 48640 }, { "epoch": 0.8455040066748944, "grad_norm": 1.6407958048098354, "learning_rate": 6.130384398333133e-08, "loss": 0.1751, "step": 48641 }, { "epoch": 0.8455213892123973, "grad_norm": 0.9493357920064818, "learning_rate": 6.129033936924955e-08, "loss": 0.2076, "step": 48642 }, { "epoch": 0.8455387717499, "grad_norm": 1.8802680422340203, "learning_rate": 6.127683614567592e-08, "loss": 0.2109, "step": 48643 }, { "epoch": 0.8455561542874028, "grad_norm": 1.5966703697550042, "learning_rate": 6.126333431265313e-08, "loss": 0.3312, "step": 48644 }, { "epoch": 0.8455735368249057, "grad_norm": 1.2952890372215766, "learning_rate": 6.124983387022403e-08, "loss": 0.1533, "step": 48645 }, { "epoch": 0.8455909193624085, "grad_norm": 1.0076789630560594, "learning_rate": 6.123633481843144e-08, "loss": 0.18, "step": 48646 }, { "epoch": 0.8456083018999113, "grad_norm": 1.015122621645632, "learning_rate": 6.122283715731813e-08, "loss": 0.1572, "step": 48647 }, { "epoch": 0.8456256844374141, "grad_norm": 1.6241640116259946, "learning_rate": 6.120934088692687e-08, "loss": 0.2694, "step": 48648 }, { "epoch": 0.845643066974917, "grad_norm": 0.9645200986811896, "learning_rate": 6.119584600730027e-08, "loss": 0.1396, "step": 48649 }, { "epoch": 0.8456604495124198, "grad_norm": 1.0105412901286424, "learning_rate": 6.118235251848142e-08, "loss": 0.1462, "step": 48650 }, { "epoch": 0.8456778320499226, "grad_norm": 1.3932795947607213, "learning_rate": 6.116886042051306e-08, "loss": 0.2645, "step": 48651 }, { "epoch": 0.8456952145874255, "grad_norm": 3.4677138072361213, "learning_rate": 6.115536971343772e-08, "loss": 0.2304, "step": 48652 }, { "epoch": 0.8457125971249283, "grad_norm": 1.3732061489464804, "learning_rate": 6.114188039729812e-08, "loss": 0.1805, "step": 48653 }, { "epoch": 0.8457299796624311, "grad_norm": 2.9345561056625, "learning_rate": 6.11283924721373e-08, "loss": 0.3928, "step": 48654 }, { "epoch": 0.845747362199934, "grad_norm": 1.3013981677522388, "learning_rate": 6.111490593799784e-08, "loss": 0.1516, "step": 48655 }, { "epoch": 0.8457647447374368, "grad_norm": 1.2555937057766604, "learning_rate": 6.110142079492258e-08, "loss": 0.2705, "step": 48656 }, { "epoch": 0.8457821272749396, "grad_norm": 2.3061920603519526, "learning_rate": 6.108793704295407e-08, "loss": 0.1458, "step": 48657 }, { "epoch": 0.8457995098124425, "grad_norm": 1.0139878420355763, "learning_rate": 6.107445468213534e-08, "loss": 0.1425, "step": 48658 }, { "epoch": 0.8458168923499453, "grad_norm": 1.7936983595270894, "learning_rate": 6.106097371250896e-08, "loss": 0.1078, "step": 48659 }, { "epoch": 0.8458342748874481, "grad_norm": 0.6662838523794165, "learning_rate": 6.104749413411758e-08, "loss": 0.3026, "step": 48660 }, { "epoch": 0.8458516574249509, "grad_norm": 1.298844408189827, "learning_rate": 6.103401594700391e-08, "loss": 0.1185, "step": 48661 }, { "epoch": 0.8458690399624538, "grad_norm": 1.4039304220526279, "learning_rate": 6.102053915121092e-08, "loss": 0.1427, "step": 48662 }, { "epoch": 0.8458864224999565, "grad_norm": 1.9982472413339418, "learning_rate": 6.100706374678111e-08, "loss": 0.1566, "step": 48663 }, { "epoch": 0.8459038050374593, "grad_norm": 0.9384948580117023, "learning_rate": 6.099358973375729e-08, "loss": 0.2114, "step": 48664 }, { "epoch": 0.8459211875749622, "grad_norm": 1.223630335096651, "learning_rate": 6.09801171121821e-08, "loss": 0.1371, "step": 48665 }, { "epoch": 0.845938570112465, "grad_norm": 1.3013086609418658, "learning_rate": 6.096664588209826e-08, "loss": 0.189, "step": 48666 }, { "epoch": 0.8459559526499678, "grad_norm": 1.131075896856582, "learning_rate": 6.095317604354855e-08, "loss": 0.1558, "step": 48667 }, { "epoch": 0.8459733351874706, "grad_norm": 1.0142200908856753, "learning_rate": 6.09397075965754e-08, "loss": 0.2494, "step": 48668 }, { "epoch": 0.8459907177249735, "grad_norm": 1.2097097388642848, "learning_rate": 6.092624054122186e-08, "loss": 0.1463, "step": 48669 }, { "epoch": 0.8460081002624763, "grad_norm": 1.1539403218835917, "learning_rate": 6.09127748775305e-08, "loss": 0.1656, "step": 48670 }, { "epoch": 0.8460254827999791, "grad_norm": 1.3428409027360384, "learning_rate": 6.089931060554387e-08, "loss": 0.1911, "step": 48671 }, { "epoch": 0.846042865337482, "grad_norm": 1.2975344760679521, "learning_rate": 6.088584772530458e-08, "loss": 0.2255, "step": 48672 }, { "epoch": 0.8460602478749848, "grad_norm": 0.9993232719648384, "learning_rate": 6.087238623685559e-08, "loss": 0.2468, "step": 48673 }, { "epoch": 0.8460776304124876, "grad_norm": 1.0826188466642286, "learning_rate": 6.08589261402394e-08, "loss": 0.1454, "step": 48674 }, { "epoch": 0.8460950129499905, "grad_norm": 1.5036704519565398, "learning_rate": 6.084546743549868e-08, "loss": 0.1541, "step": 48675 }, { "epoch": 0.8461123954874933, "grad_norm": 1.2928952861931422, "learning_rate": 6.083201012267608e-08, "loss": 0.2303, "step": 48676 }, { "epoch": 0.8461297780249961, "grad_norm": 1.552951279195679, "learning_rate": 6.081855420181432e-08, "loss": 0.235, "step": 48677 }, { "epoch": 0.846147160562499, "grad_norm": 0.8819080257822197, "learning_rate": 6.080509967295593e-08, "loss": 0.1735, "step": 48678 }, { "epoch": 0.8461645431000018, "grad_norm": 0.9870620375636273, "learning_rate": 6.079164653614366e-08, "loss": 0.1642, "step": 48679 }, { "epoch": 0.8461819256375046, "grad_norm": 1.0248681181276715, "learning_rate": 6.077819479142005e-08, "loss": 0.1722, "step": 48680 }, { "epoch": 0.8461993081750074, "grad_norm": 1.712569053448815, "learning_rate": 6.076474443882784e-08, "loss": 0.1775, "step": 48681 }, { "epoch": 0.8462166907125103, "grad_norm": 1.0352603985539517, "learning_rate": 6.075129547840969e-08, "loss": 0.1691, "step": 48682 }, { "epoch": 0.846234073250013, "grad_norm": 1.0662950043471406, "learning_rate": 6.073784791020809e-08, "loss": 0.1773, "step": 48683 }, { "epoch": 0.8462514557875158, "grad_norm": 1.513048397047465, "learning_rate": 6.072440173426574e-08, "loss": 0.2189, "step": 48684 }, { "epoch": 0.8462688383250186, "grad_norm": 1.673224946503901, "learning_rate": 6.071095695062528e-08, "loss": 0.1748, "step": 48685 }, { "epoch": 0.8462862208625215, "grad_norm": 1.324552919989136, "learning_rate": 6.069751355932929e-08, "loss": 0.2005, "step": 48686 }, { "epoch": 0.8463036034000243, "grad_norm": 3.422037009288772, "learning_rate": 6.068407156042038e-08, "loss": 0.24, "step": 48687 }, { "epoch": 0.8463209859375271, "grad_norm": 0.6805898532124727, "learning_rate": 6.067063095394103e-08, "loss": 0.1429, "step": 48688 }, { "epoch": 0.84633836847503, "grad_norm": 1.6375235861659223, "learning_rate": 6.065719173993422e-08, "loss": 0.2279, "step": 48689 }, { "epoch": 0.8463557510125328, "grad_norm": 1.1773463208109538, "learning_rate": 6.064375391844218e-08, "loss": 0.1782, "step": 48690 }, { "epoch": 0.8463731335500356, "grad_norm": 1.0592775845482643, "learning_rate": 6.063031748950753e-08, "loss": 0.1266, "step": 48691 }, { "epoch": 0.8463905160875385, "grad_norm": 1.4312723500077251, "learning_rate": 6.061688245317298e-08, "loss": 0.2391, "step": 48692 }, { "epoch": 0.8464078986250413, "grad_norm": 1.2272673978165554, "learning_rate": 6.060344880948115e-08, "loss": 0.2765, "step": 48693 }, { "epoch": 0.8464252811625441, "grad_norm": 1.6810184153415009, "learning_rate": 6.05900165584745e-08, "loss": 0.2802, "step": 48694 }, { "epoch": 0.846442663700047, "grad_norm": 3.2821619771450545, "learning_rate": 6.057658570019563e-08, "loss": 0.2437, "step": 48695 }, { "epoch": 0.8464600462375498, "grad_norm": 2.207461625920049, "learning_rate": 6.056315623468716e-08, "loss": 0.1992, "step": 48696 }, { "epoch": 0.8464774287750526, "grad_norm": 1.2715239575000177, "learning_rate": 6.054972816199167e-08, "loss": 0.1406, "step": 48697 }, { "epoch": 0.8464948113125554, "grad_norm": 1.4462966578347498, "learning_rate": 6.05363014821516e-08, "loss": 0.1843, "step": 48698 }, { "epoch": 0.8465121938500583, "grad_norm": 5.5732097219831145, "learning_rate": 6.052287619520952e-08, "loss": 0.2688, "step": 48699 }, { "epoch": 0.8465295763875611, "grad_norm": 1.3916924885240827, "learning_rate": 6.050945230120813e-08, "loss": 0.2799, "step": 48700 }, { "epoch": 0.8465469589250639, "grad_norm": 0.9129498561910577, "learning_rate": 6.049602980018987e-08, "loss": 0.236, "step": 48701 }, { "epoch": 0.8465643414625668, "grad_norm": 0.9949762399625056, "learning_rate": 6.048260869219746e-08, "loss": 0.1839, "step": 48702 }, { "epoch": 0.8465817240000695, "grad_norm": 1.3033601934426613, "learning_rate": 6.046918897727304e-08, "loss": 0.1, "step": 48703 }, { "epoch": 0.8465991065375723, "grad_norm": 1.5756800283454397, "learning_rate": 6.045577065545949e-08, "loss": 0.206, "step": 48704 }, { "epoch": 0.8466164890750751, "grad_norm": 1.6911800259191772, "learning_rate": 6.044235372679923e-08, "loss": 0.1219, "step": 48705 }, { "epoch": 0.846633871612578, "grad_norm": 2.647584995982075, "learning_rate": 6.042893819133482e-08, "loss": 0.178, "step": 48706 }, { "epoch": 0.8466512541500808, "grad_norm": 1.2454315046201059, "learning_rate": 6.04155240491086e-08, "loss": 0.1499, "step": 48707 }, { "epoch": 0.8466686366875836, "grad_norm": 1.7681815608892661, "learning_rate": 6.040211130016353e-08, "loss": 0.3003, "step": 48708 }, { "epoch": 0.8466860192250865, "grad_norm": 0.6450682566186262, "learning_rate": 6.038869994454165e-08, "loss": 0.2545, "step": 48709 }, { "epoch": 0.8467034017625893, "grad_norm": 1.7171502605723779, "learning_rate": 6.03752899822857e-08, "loss": 0.1984, "step": 48710 }, { "epoch": 0.8467207843000921, "grad_norm": 3.3954472953693147, "learning_rate": 6.036188141343801e-08, "loss": 0.2355, "step": 48711 }, { "epoch": 0.846738166837595, "grad_norm": 1.5476341772934692, "learning_rate": 6.034847423804129e-08, "loss": 0.1688, "step": 48712 }, { "epoch": 0.8467555493750978, "grad_norm": 1.1267980956058266, "learning_rate": 6.0335068456138e-08, "loss": 0.247, "step": 48713 }, { "epoch": 0.8467729319126006, "grad_norm": 1.355543332993492, "learning_rate": 6.032166406777051e-08, "loss": 0.234, "step": 48714 }, { "epoch": 0.8467903144501034, "grad_norm": 1.224494061508897, "learning_rate": 6.030826107298142e-08, "loss": 0.1549, "step": 48715 }, { "epoch": 0.8468076969876063, "grad_norm": 1.6284938102822677, "learning_rate": 6.029485947181317e-08, "loss": 0.2913, "step": 48716 }, { "epoch": 0.8468250795251091, "grad_norm": 1.121472232575673, "learning_rate": 6.028145926430822e-08, "loss": 0.2365, "step": 48717 }, { "epoch": 0.8468424620626119, "grad_norm": 1.2731253357353665, "learning_rate": 6.026806045050903e-08, "loss": 0.2037, "step": 48718 }, { "epoch": 0.8468598446001148, "grad_norm": 1.2756966997831987, "learning_rate": 6.025466303045801e-08, "loss": 0.1899, "step": 48719 }, { "epoch": 0.8468772271376176, "grad_norm": 1.4422838213309634, "learning_rate": 6.02412670041978e-08, "loss": 0.1485, "step": 48720 }, { "epoch": 0.8468946096751204, "grad_norm": 1.156570213040399, "learning_rate": 6.022787237177085e-08, "loss": 0.1175, "step": 48721 }, { "epoch": 0.8469119922126231, "grad_norm": 2.147266922849254, "learning_rate": 6.021447913321936e-08, "loss": 0.2386, "step": 48722 }, { "epoch": 0.846929374750126, "grad_norm": 1.4866187442952188, "learning_rate": 6.020108728858603e-08, "loss": 0.2028, "step": 48723 }, { "epoch": 0.8469467572876288, "grad_norm": 1.5470796717238193, "learning_rate": 6.018769683791325e-08, "loss": 0.2452, "step": 48724 }, { "epoch": 0.8469641398251316, "grad_norm": 2.0951633231809947, "learning_rate": 6.017430778124344e-08, "loss": 0.2054, "step": 48725 }, { "epoch": 0.8469815223626345, "grad_norm": 1.8059454358048839, "learning_rate": 6.0160920118619e-08, "loss": 0.2006, "step": 48726 }, { "epoch": 0.8469989049001373, "grad_norm": 1.0961426645981716, "learning_rate": 6.014753385008242e-08, "loss": 0.1454, "step": 48727 }, { "epoch": 0.8470162874376401, "grad_norm": 0.9493979688092998, "learning_rate": 6.013414897567608e-08, "loss": 0.1663, "step": 48728 }, { "epoch": 0.847033669975143, "grad_norm": 1.5634162951647534, "learning_rate": 6.012076549544248e-08, "loss": 0.1535, "step": 48729 }, { "epoch": 0.8470510525126458, "grad_norm": 1.0462899345461967, "learning_rate": 6.010738340942384e-08, "loss": 0.1459, "step": 48730 }, { "epoch": 0.8470684350501486, "grad_norm": 0.9978973410163988, "learning_rate": 6.009400271766285e-08, "loss": 0.2214, "step": 48731 }, { "epoch": 0.8470858175876514, "grad_norm": 1.1877713298429595, "learning_rate": 6.008062342020176e-08, "loss": 0.1303, "step": 48732 }, { "epoch": 0.8471032001251543, "grad_norm": 1.4410934592294162, "learning_rate": 6.00672455170832e-08, "loss": 0.3207, "step": 48733 }, { "epoch": 0.8471205826626571, "grad_norm": 1.3012842077150222, "learning_rate": 6.005386900834908e-08, "loss": 0.1731, "step": 48734 }, { "epoch": 0.8471379652001599, "grad_norm": 1.3404774980797383, "learning_rate": 6.004049389404226e-08, "loss": 0.2071, "step": 48735 }, { "epoch": 0.8471553477376628, "grad_norm": 1.7678897387293215, "learning_rate": 6.002712017420498e-08, "loss": 0.2137, "step": 48736 }, { "epoch": 0.8471727302751656, "grad_norm": 1.2951327414815832, "learning_rate": 6.001374784887958e-08, "loss": 0.1911, "step": 48737 }, { "epoch": 0.8471901128126684, "grad_norm": 1.5626494704252163, "learning_rate": 6.000037691810839e-08, "loss": 0.1922, "step": 48738 }, { "epoch": 0.8472074953501713, "grad_norm": 1.3865260311090244, "learning_rate": 5.998700738193402e-08, "loss": 0.1639, "step": 48739 }, { "epoch": 0.8472248778876741, "grad_norm": 1.1571540778851015, "learning_rate": 5.997363924039878e-08, "loss": 0.1297, "step": 48740 }, { "epoch": 0.8472422604251769, "grad_norm": 1.1846650210972773, "learning_rate": 5.99602724935449e-08, "loss": 0.128, "step": 48741 }, { "epoch": 0.8472596429626796, "grad_norm": 1.4663028821495727, "learning_rate": 5.99469071414147e-08, "loss": 0.1294, "step": 48742 }, { "epoch": 0.8472770255001825, "grad_norm": 1.12520804370536, "learning_rate": 5.993354318405075e-08, "loss": 0.1373, "step": 48743 }, { "epoch": 0.8472944080376853, "grad_norm": 1.9443118751803712, "learning_rate": 5.992018062149534e-08, "loss": 0.1576, "step": 48744 }, { "epoch": 0.8473117905751881, "grad_norm": 1.5161162603361582, "learning_rate": 5.990681945379078e-08, "loss": 0.1242, "step": 48745 }, { "epoch": 0.847329173112691, "grad_norm": 0.9413139184794994, "learning_rate": 5.989345968097947e-08, "loss": 0.1663, "step": 48746 }, { "epoch": 0.8473465556501938, "grad_norm": 1.4111898037759547, "learning_rate": 5.988010130310373e-08, "loss": 0.2442, "step": 48747 }, { "epoch": 0.8473639381876966, "grad_norm": 1.5122372602973786, "learning_rate": 5.986674432020589e-08, "loss": 0.1773, "step": 48748 }, { "epoch": 0.8473813207251994, "grad_norm": 1.4087290764114702, "learning_rate": 5.985338873232825e-08, "loss": 0.163, "step": 48749 }, { "epoch": 0.8473987032627023, "grad_norm": 1.3315446346850315, "learning_rate": 5.984003453951309e-08, "loss": 0.2077, "step": 48750 }, { "epoch": 0.8474160858002051, "grad_norm": 1.5474717492686956, "learning_rate": 5.982668174180294e-08, "loss": 0.2177, "step": 48751 }, { "epoch": 0.8474334683377079, "grad_norm": 1.5838697255388183, "learning_rate": 5.981333033924014e-08, "loss": 0.1707, "step": 48752 }, { "epoch": 0.8474508508752108, "grad_norm": 1.2488335711624057, "learning_rate": 5.979998033186662e-08, "loss": 0.1757, "step": 48753 }, { "epoch": 0.8474682334127136, "grad_norm": 0.8946382410515185, "learning_rate": 5.978663171972509e-08, "loss": 0.0986, "step": 48754 }, { "epoch": 0.8474856159502164, "grad_norm": 1.9600575982112627, "learning_rate": 5.977328450285768e-08, "loss": 0.1603, "step": 48755 }, { "epoch": 0.8475029984877193, "grad_norm": 2.658325881609525, "learning_rate": 5.97599386813068e-08, "loss": 0.1835, "step": 48756 }, { "epoch": 0.8475203810252221, "grad_norm": 1.292922861987057, "learning_rate": 5.974659425511463e-08, "loss": 0.2231, "step": 48757 }, { "epoch": 0.8475377635627249, "grad_norm": 1.7876119147239227, "learning_rate": 5.973325122432343e-08, "loss": 0.2158, "step": 48758 }, { "epoch": 0.8475551461002278, "grad_norm": 1.349246429518983, "learning_rate": 5.971990958897582e-08, "loss": 0.2087, "step": 48759 }, { "epoch": 0.8475725286377306, "grad_norm": 1.793172571432596, "learning_rate": 5.970656934911373e-08, "loss": 0.1766, "step": 48760 }, { "epoch": 0.8475899111752334, "grad_norm": 1.7747011326467057, "learning_rate": 5.969323050477943e-08, "loss": 0.1099, "step": 48761 }, { "epoch": 0.8476072937127361, "grad_norm": 1.8174005475204702, "learning_rate": 5.967989305601545e-08, "loss": 0.2119, "step": 48762 }, { "epoch": 0.847624676250239, "grad_norm": 1.2503183535906517, "learning_rate": 5.966655700286393e-08, "loss": 0.1379, "step": 48763 }, { "epoch": 0.8476420587877418, "grad_norm": 1.0117591251472264, "learning_rate": 5.965322234536719e-08, "loss": 0.1358, "step": 48764 }, { "epoch": 0.8476594413252446, "grad_norm": 1.9253092808959744, "learning_rate": 5.963988908356743e-08, "loss": 0.1708, "step": 48765 }, { "epoch": 0.8476768238627475, "grad_norm": 1.4901215191026798, "learning_rate": 5.962655721750692e-08, "loss": 0.2392, "step": 48766 }, { "epoch": 0.8476942064002503, "grad_norm": 0.6841131134346067, "learning_rate": 5.961322674722796e-08, "loss": 0.1437, "step": 48767 }, { "epoch": 0.8477115889377531, "grad_norm": 1.367794218528592, "learning_rate": 5.9599897672772735e-08, "loss": 0.1491, "step": 48768 }, { "epoch": 0.8477289714752559, "grad_norm": 1.7433709934166297, "learning_rate": 5.958656999418349e-08, "loss": 0.129, "step": 48769 }, { "epoch": 0.8477463540127588, "grad_norm": 1.4112346074384572, "learning_rate": 5.9573243711502575e-08, "loss": 0.1779, "step": 48770 }, { "epoch": 0.8477637365502616, "grad_norm": 1.1621430344913737, "learning_rate": 5.955991882477224e-08, "loss": 0.1682, "step": 48771 }, { "epoch": 0.8477811190877644, "grad_norm": 1.7936360111512673, "learning_rate": 5.95465953340345e-08, "loss": 0.22, "step": 48772 }, { "epoch": 0.8477985016252673, "grad_norm": 0.7219359986059825, "learning_rate": 5.953327323933177e-08, "loss": 0.0931, "step": 48773 }, { "epoch": 0.8478158841627701, "grad_norm": 1.0409402854689345, "learning_rate": 5.951995254070624e-08, "loss": 0.0882, "step": 48774 }, { "epoch": 0.8478332667002729, "grad_norm": 2.1567731609479, "learning_rate": 5.9506633238200154e-08, "loss": 0.1745, "step": 48775 }, { "epoch": 0.8478506492377758, "grad_norm": 1.500923113689069, "learning_rate": 5.9493315331855644e-08, "loss": 0.2181, "step": 48776 }, { "epoch": 0.8478680317752786, "grad_norm": 3.8968304351167316, "learning_rate": 5.9479998821714894e-08, "loss": 0.2964, "step": 48777 }, { "epoch": 0.8478854143127814, "grad_norm": 1.3317176188227862, "learning_rate": 5.946668370782043e-08, "loss": 0.2227, "step": 48778 }, { "epoch": 0.8479027968502842, "grad_norm": 1.4039840449612817, "learning_rate": 5.945336999021405e-08, "loss": 0.1098, "step": 48779 }, { "epoch": 0.8479201793877871, "grad_norm": 0.9380631532994518, "learning_rate": 5.944005766893806e-08, "loss": 0.2294, "step": 48780 }, { "epoch": 0.8479375619252899, "grad_norm": 1.6433546461484068, "learning_rate": 5.9426746744034805e-08, "loss": 0.2053, "step": 48781 }, { "epoch": 0.8479549444627926, "grad_norm": 1.0726465386914972, "learning_rate": 5.941343721554637e-08, "loss": 0.1261, "step": 48782 }, { "epoch": 0.8479723270002955, "grad_norm": 0.9952093440364591, "learning_rate": 5.940012908351494e-08, "loss": 0.1369, "step": 48783 }, { "epoch": 0.8479897095377983, "grad_norm": 0.9527236578164591, "learning_rate": 5.93868223479827e-08, "loss": 0.134, "step": 48784 }, { "epoch": 0.8480070920753011, "grad_norm": 1.6228457969344139, "learning_rate": 5.9373517008991845e-08, "loss": 0.1833, "step": 48785 }, { "epoch": 0.848024474612804, "grad_norm": 1.04459992959415, "learning_rate": 5.936021306658456e-08, "loss": 0.2496, "step": 48786 }, { "epoch": 0.8480418571503068, "grad_norm": 1.1277050659213381, "learning_rate": 5.9346910520802985e-08, "loss": 0.1242, "step": 48787 }, { "epoch": 0.8480592396878096, "grad_norm": 1.598987607865977, "learning_rate": 5.933360937168913e-08, "loss": 0.1343, "step": 48788 }, { "epoch": 0.8480766222253124, "grad_norm": 1.6445302422150394, "learning_rate": 5.932030961928547e-08, "loss": 0.2416, "step": 48789 }, { "epoch": 0.8480940047628153, "grad_norm": 1.5484118227590757, "learning_rate": 5.930701126363408e-08, "loss": 0.1705, "step": 48790 }, { "epoch": 0.8481113873003181, "grad_norm": 1.886262567681078, "learning_rate": 5.929371430477692e-08, "loss": 0.2733, "step": 48791 }, { "epoch": 0.8481287698378209, "grad_norm": 2.0300550466929086, "learning_rate": 5.928041874275608e-08, "loss": 0.1959, "step": 48792 }, { "epoch": 0.8481461523753238, "grad_norm": 1.4625010870190502, "learning_rate": 5.926712457761407e-08, "loss": 0.2452, "step": 48793 }, { "epoch": 0.8481635349128266, "grad_norm": 1.0475425878411961, "learning_rate": 5.9253831809392753e-08, "loss": 0.1806, "step": 48794 }, { "epoch": 0.8481809174503294, "grad_norm": 1.3929869782714606, "learning_rate": 5.924054043813431e-08, "loss": 0.2004, "step": 48795 }, { "epoch": 0.8481982999878322, "grad_norm": 1.8215468747624877, "learning_rate": 5.922725046388094e-08, "loss": 0.1635, "step": 48796 }, { "epoch": 0.8482156825253351, "grad_norm": 1.021642181558668, "learning_rate": 5.921396188667471e-08, "loss": 0.1428, "step": 48797 }, { "epoch": 0.8482330650628379, "grad_norm": 1.3668854570764417, "learning_rate": 5.9200674706557705e-08, "loss": 0.1977, "step": 48798 }, { "epoch": 0.8482504476003407, "grad_norm": 1.68920743280043, "learning_rate": 5.918738892357206e-08, "loss": 0.1871, "step": 48799 }, { "epoch": 0.8482678301378436, "grad_norm": 1.4506674794716496, "learning_rate": 5.917410453775984e-08, "loss": 0.2225, "step": 48800 }, { "epoch": 0.8482852126753464, "grad_norm": 1.5558979750775004, "learning_rate": 5.91608215491633e-08, "loss": 0.1408, "step": 48801 }, { "epoch": 0.8483025952128491, "grad_norm": 1.910366707629953, "learning_rate": 5.9147539957824456e-08, "loss": 0.1131, "step": 48802 }, { "epoch": 0.848319977750352, "grad_norm": 1.2129614318116984, "learning_rate": 5.913425976378539e-08, "loss": 0.2162, "step": 48803 }, { "epoch": 0.8483373602878548, "grad_norm": 1.4247860577262068, "learning_rate": 5.912098096708823e-08, "loss": 0.1408, "step": 48804 }, { "epoch": 0.8483547428253576, "grad_norm": 1.4241885874706115, "learning_rate": 5.9107703567775e-08, "loss": 0.1931, "step": 48805 }, { "epoch": 0.8483721253628604, "grad_norm": 1.9595117709942826, "learning_rate": 5.9094427565887836e-08, "loss": 0.219, "step": 48806 }, { "epoch": 0.8483895079003633, "grad_norm": 1.227126478222437, "learning_rate": 5.908115296146876e-08, "loss": 0.1455, "step": 48807 }, { "epoch": 0.8484068904378661, "grad_norm": 1.4744842681961519, "learning_rate": 5.906787975455985e-08, "loss": 0.2977, "step": 48808 }, { "epoch": 0.8484242729753689, "grad_norm": 1.4788841807424826, "learning_rate": 5.9054607945203396e-08, "loss": 0.1489, "step": 48809 }, { "epoch": 0.8484416555128718, "grad_norm": 1.1796029813388624, "learning_rate": 5.904133753344115e-08, "loss": 0.1402, "step": 48810 }, { "epoch": 0.8484590380503746, "grad_norm": 1.9460257595540447, "learning_rate": 5.9028068519315244e-08, "loss": 0.2596, "step": 48811 }, { "epoch": 0.8484764205878774, "grad_norm": 3.523736002545981, "learning_rate": 5.901480090286787e-08, "loss": 0.3074, "step": 48812 }, { "epoch": 0.8484938031253803, "grad_norm": 1.2261235544817888, "learning_rate": 5.900153468414104e-08, "loss": 0.1808, "step": 48813 }, { "epoch": 0.8485111856628831, "grad_norm": 1.2649457584552593, "learning_rate": 5.898826986317673e-08, "loss": 0.2179, "step": 48814 }, { "epoch": 0.8485285682003859, "grad_norm": 0.9231289502504182, "learning_rate": 5.897500644001702e-08, "loss": 0.1631, "step": 48815 }, { "epoch": 0.8485459507378887, "grad_norm": 1.806167576384303, "learning_rate": 5.8961744414703976e-08, "loss": 0.1954, "step": 48816 }, { "epoch": 0.8485633332753916, "grad_norm": 1.790232801469819, "learning_rate": 5.894848378727957e-08, "loss": 0.1896, "step": 48817 }, { "epoch": 0.8485807158128944, "grad_norm": 1.235666165857319, "learning_rate": 5.8935224557785876e-08, "loss": 0.1728, "step": 48818 }, { "epoch": 0.8485980983503972, "grad_norm": 1.623108764638647, "learning_rate": 5.892196672626487e-08, "loss": 0.2051, "step": 48819 }, { "epoch": 0.8486154808879001, "grad_norm": 0.951161354586205, "learning_rate": 5.890871029275862e-08, "loss": 0.1947, "step": 48820 }, { "epoch": 0.8486328634254029, "grad_norm": 1.1769923865360585, "learning_rate": 5.8895455257309215e-08, "loss": 0.1542, "step": 48821 }, { "epoch": 0.8486502459629056, "grad_norm": 1.3839502264986276, "learning_rate": 5.888220161995866e-08, "loss": 0.1198, "step": 48822 }, { "epoch": 0.8486676285004084, "grad_norm": 1.555654742332884, "learning_rate": 5.886894938074871e-08, "loss": 0.1215, "step": 48823 }, { "epoch": 0.8486850110379113, "grad_norm": 1.1972220408330403, "learning_rate": 5.885569853972161e-08, "loss": 0.0935, "step": 48824 }, { "epoch": 0.8487023935754141, "grad_norm": 1.7079143399454184, "learning_rate": 5.884244909691932e-08, "loss": 0.2154, "step": 48825 }, { "epoch": 0.8487197761129169, "grad_norm": 0.7613391804205922, "learning_rate": 5.882920105238382e-08, "loss": 0.1102, "step": 48826 }, { "epoch": 0.8487371586504198, "grad_norm": 1.4718695627556975, "learning_rate": 5.8815954406157e-08, "loss": 0.1366, "step": 48827 }, { "epoch": 0.8487545411879226, "grad_norm": 1.3836349373005117, "learning_rate": 5.880270915828117e-08, "loss": 0.1356, "step": 48828 }, { "epoch": 0.8487719237254254, "grad_norm": 1.1138245880976012, "learning_rate": 5.8789465308797915e-08, "loss": 0.1006, "step": 48829 }, { "epoch": 0.8487893062629283, "grad_norm": 0.6321594478514417, "learning_rate": 5.877622285774941e-08, "loss": 0.1115, "step": 48830 }, { "epoch": 0.8488066888004311, "grad_norm": 8.795827027194452, "learning_rate": 5.876298180517753e-08, "loss": 0.2007, "step": 48831 }, { "epoch": 0.8488240713379339, "grad_norm": 1.2752226787162737, "learning_rate": 5.874974215112438e-08, "loss": 0.1005, "step": 48832 }, { "epoch": 0.8488414538754367, "grad_norm": 2.0488542015552507, "learning_rate": 5.873650389563184e-08, "loss": 0.175, "step": 48833 }, { "epoch": 0.8488588364129396, "grad_norm": 2.0338581346546474, "learning_rate": 5.872326703874192e-08, "loss": 0.19, "step": 48834 }, { "epoch": 0.8488762189504424, "grad_norm": 1.5487419825676763, "learning_rate": 5.8710031580496475e-08, "loss": 0.3359, "step": 48835 }, { "epoch": 0.8488936014879452, "grad_norm": 2.263418333662953, "learning_rate": 5.869679752093759e-08, "loss": 0.1601, "step": 48836 }, { "epoch": 0.8489109840254481, "grad_norm": 2.111621893672435, "learning_rate": 5.868356486010712e-08, "loss": 0.2451, "step": 48837 }, { "epoch": 0.8489283665629509, "grad_norm": 1.6140799374822181, "learning_rate": 5.8670333598047016e-08, "loss": 0.1207, "step": 48838 }, { "epoch": 0.8489457491004537, "grad_norm": 1.2824335516068872, "learning_rate": 5.8657103734799093e-08, "loss": 0.2043, "step": 48839 }, { "epoch": 0.8489631316379566, "grad_norm": 1.0685796283753182, "learning_rate": 5.864387527040554e-08, "loss": 0.1275, "step": 48840 }, { "epoch": 0.8489805141754594, "grad_norm": 1.4994167822588582, "learning_rate": 5.863064820490826e-08, "loss": 0.1691, "step": 48841 }, { "epoch": 0.8489978967129621, "grad_norm": 2.2294639693449514, "learning_rate": 5.861742253834889e-08, "loss": 0.2208, "step": 48842 }, { "epoch": 0.8490152792504649, "grad_norm": 1.2759363871902234, "learning_rate": 5.860419827076962e-08, "loss": 0.1321, "step": 48843 }, { "epoch": 0.8490326617879678, "grad_norm": 1.627461604806006, "learning_rate": 5.8590975402212305e-08, "loss": 0.2028, "step": 48844 }, { "epoch": 0.8490500443254706, "grad_norm": 1.4308362846215208, "learning_rate": 5.85777539327188e-08, "loss": 0.1947, "step": 48845 }, { "epoch": 0.8490674268629734, "grad_norm": 0.9214043238282457, "learning_rate": 5.8564533862331066e-08, "loss": 0.1269, "step": 48846 }, { "epoch": 0.8490848094004763, "grad_norm": 1.1465962403843788, "learning_rate": 5.855131519109097e-08, "loss": 0.1062, "step": 48847 }, { "epoch": 0.8491021919379791, "grad_norm": 1.2671432363465183, "learning_rate": 5.853809791904041e-08, "loss": 0.1453, "step": 48848 }, { "epoch": 0.8491195744754819, "grad_norm": 0.9390724813847678, "learning_rate": 5.85248820462213e-08, "loss": 0.1284, "step": 48849 }, { "epoch": 0.8491369570129847, "grad_norm": 1.6054604263086778, "learning_rate": 5.8511667572675393e-08, "loss": 0.1652, "step": 48850 }, { "epoch": 0.8491543395504876, "grad_norm": 1.3078880607577776, "learning_rate": 5.8498454498444814e-08, "loss": 0.1123, "step": 48851 }, { "epoch": 0.8491717220879904, "grad_norm": 0.7634768781165909, "learning_rate": 5.848524282357137e-08, "loss": 0.1145, "step": 48852 }, { "epoch": 0.8491891046254932, "grad_norm": 1.0186914059971703, "learning_rate": 5.847203254809691e-08, "loss": 0.0982, "step": 48853 }, { "epoch": 0.8492064871629961, "grad_norm": 1.4752247112705619, "learning_rate": 5.845882367206312e-08, "loss": 0.2575, "step": 48854 }, { "epoch": 0.8492238697004989, "grad_norm": 2.6869235355185594, "learning_rate": 5.8445616195512134e-08, "loss": 0.431, "step": 48855 }, { "epoch": 0.8492412522380017, "grad_norm": 1.3991146966958095, "learning_rate": 5.843241011848576e-08, "loss": 0.14, "step": 48856 }, { "epoch": 0.8492586347755046, "grad_norm": 1.3197125421774392, "learning_rate": 5.841920544102574e-08, "loss": 0.1862, "step": 48857 }, { "epoch": 0.8492760173130074, "grad_norm": 2.0479596430680806, "learning_rate": 5.840600216317393e-08, "loss": 0.2022, "step": 48858 }, { "epoch": 0.8492933998505102, "grad_norm": 1.127751100918875, "learning_rate": 5.8392800284972285e-08, "loss": 0.1705, "step": 48859 }, { "epoch": 0.849310782388013, "grad_norm": 1.3614030669713744, "learning_rate": 5.837959980646279e-08, "loss": 0.1123, "step": 48860 }, { "epoch": 0.8493281649255158, "grad_norm": 1.2935391017323705, "learning_rate": 5.8366400727686835e-08, "loss": 0.1469, "step": 48861 }, { "epoch": 0.8493455474630186, "grad_norm": 1.5974985569065911, "learning_rate": 5.835320304868663e-08, "loss": 0.2353, "step": 48862 }, { "epoch": 0.8493629300005214, "grad_norm": 1.0635761861812125, "learning_rate": 5.83400067695039e-08, "loss": 0.1938, "step": 48863 }, { "epoch": 0.8493803125380243, "grad_norm": 0.982716836068421, "learning_rate": 5.8326811890180514e-08, "loss": 0.1468, "step": 48864 }, { "epoch": 0.8493976950755271, "grad_norm": 1.400933907537199, "learning_rate": 5.831361841075816e-08, "loss": 0.1603, "step": 48865 }, { "epoch": 0.8494150776130299, "grad_norm": 1.3093050064181702, "learning_rate": 5.83004263312788e-08, "loss": 0.2317, "step": 48866 }, { "epoch": 0.8494324601505328, "grad_norm": 0.9786052636121814, "learning_rate": 5.828723565178417e-08, "loss": 0.2053, "step": 48867 }, { "epoch": 0.8494498426880356, "grad_norm": 1.5842697611712537, "learning_rate": 5.827404637231609e-08, "loss": 0.2115, "step": 48868 }, { "epoch": 0.8494672252255384, "grad_norm": 2.055140182682852, "learning_rate": 5.826085849291629e-08, "loss": 0.1936, "step": 48869 }, { "epoch": 0.8494846077630412, "grad_norm": 1.0156859919855614, "learning_rate": 5.8247672013626736e-08, "loss": 0.1954, "step": 48870 }, { "epoch": 0.8495019903005441, "grad_norm": 1.0044076139015312, "learning_rate": 5.823448693448918e-08, "loss": 0.0948, "step": 48871 }, { "epoch": 0.8495193728380469, "grad_norm": 1.483164635557119, "learning_rate": 5.8221303255545416e-08, "loss": 0.1613, "step": 48872 }, { "epoch": 0.8495367553755497, "grad_norm": 2.1292553003828445, "learning_rate": 5.820812097683703e-08, "loss": 0.2274, "step": 48873 }, { "epoch": 0.8495541379130526, "grad_norm": 1.6770732171527847, "learning_rate": 5.819494009840598e-08, "loss": 0.2386, "step": 48874 }, { "epoch": 0.8495715204505554, "grad_norm": 1.515606763709481, "learning_rate": 5.818176062029412e-08, "loss": 0.1725, "step": 48875 }, { "epoch": 0.8495889029880582, "grad_norm": 1.461597317829395, "learning_rate": 5.8168582542543034e-08, "loss": 0.141, "step": 48876 }, { "epoch": 0.8496062855255611, "grad_norm": 1.1981703684473903, "learning_rate": 5.815540586519452e-08, "loss": 0.1165, "step": 48877 }, { "epoch": 0.8496236680630639, "grad_norm": 1.4143450963030617, "learning_rate": 5.814223058829054e-08, "loss": 0.324, "step": 48878 }, { "epoch": 0.8496410506005667, "grad_norm": 1.6791176087549817, "learning_rate": 5.8129056711872736e-08, "loss": 0.1374, "step": 48879 }, { "epoch": 0.8496584331380695, "grad_norm": 1.377902179427944, "learning_rate": 5.8115884235982795e-08, "loss": 0.1952, "step": 48880 }, { "epoch": 0.8496758156755723, "grad_norm": 2.7464109010521334, "learning_rate": 5.8102713160662396e-08, "loss": 0.2211, "step": 48881 }, { "epoch": 0.8496931982130751, "grad_norm": 1.1301468936675485, "learning_rate": 5.808954348595352e-08, "loss": 0.1602, "step": 48882 }, { "epoch": 0.8497105807505779, "grad_norm": 0.8151349635860038, "learning_rate": 5.8076375211897786e-08, "loss": 0.1375, "step": 48883 }, { "epoch": 0.8497279632880808, "grad_norm": 1.2845435731698174, "learning_rate": 5.806320833853695e-08, "loss": 0.1719, "step": 48884 }, { "epoch": 0.8497453458255836, "grad_norm": 1.7256181401054742, "learning_rate": 5.805004286591275e-08, "loss": 0.1583, "step": 48885 }, { "epoch": 0.8497627283630864, "grad_norm": 1.8691584162541155, "learning_rate": 5.803687879406688e-08, "loss": 0.2227, "step": 48886 }, { "epoch": 0.8497801109005892, "grad_norm": 2.5565947694972238, "learning_rate": 5.802371612304108e-08, "loss": 0.2137, "step": 48887 }, { "epoch": 0.8497974934380921, "grad_norm": 1.0540653939392126, "learning_rate": 5.801055485287709e-08, "loss": 0.2189, "step": 48888 }, { "epoch": 0.8498148759755949, "grad_norm": 2.1092712758260914, "learning_rate": 5.79973949836165e-08, "loss": 0.1374, "step": 48889 }, { "epoch": 0.8498322585130977, "grad_norm": 2.4579573987558967, "learning_rate": 5.798423651530121e-08, "loss": 0.1981, "step": 48890 }, { "epoch": 0.8498496410506006, "grad_norm": 1.0981191805490376, "learning_rate": 5.797107944797297e-08, "loss": 0.1145, "step": 48891 }, { "epoch": 0.8498670235881034, "grad_norm": 1.8026672616823483, "learning_rate": 5.795792378167313e-08, "loss": 0.2429, "step": 48892 }, { "epoch": 0.8498844061256062, "grad_norm": 1.5101210771319056, "learning_rate": 5.7944769516443727e-08, "loss": 0.1722, "step": 48893 }, { "epoch": 0.8499017886631091, "grad_norm": 1.1340004819623328, "learning_rate": 5.793161665232632e-08, "loss": 0.1593, "step": 48894 }, { "epoch": 0.8499191712006119, "grad_norm": 1.0755084922176892, "learning_rate": 5.791846518936266e-08, "loss": 0.1546, "step": 48895 }, { "epoch": 0.8499365537381147, "grad_norm": 1.837296712990232, "learning_rate": 5.7905315127594336e-08, "loss": 0.2354, "step": 48896 }, { "epoch": 0.8499539362756175, "grad_norm": 1.689376265950442, "learning_rate": 5.789216646706302e-08, "loss": 0.2452, "step": 48897 }, { "epoch": 0.8499713188131204, "grad_norm": 1.2864805290929229, "learning_rate": 5.7879019207810634e-08, "loss": 0.1361, "step": 48898 }, { "epoch": 0.8499887013506232, "grad_norm": 2.221494001761867, "learning_rate": 5.786587334987858e-08, "loss": 0.2176, "step": 48899 }, { "epoch": 0.850006083888126, "grad_norm": 0.963773801860022, "learning_rate": 5.78527288933085e-08, "loss": 0.1422, "step": 48900 }, { "epoch": 0.8500234664256288, "grad_norm": 1.0406982248286702, "learning_rate": 5.783958583814225e-08, "loss": 0.2163, "step": 48901 }, { "epoch": 0.8500408489631316, "grad_norm": 1.4560275844634456, "learning_rate": 5.782644418442145e-08, "loss": 0.1449, "step": 48902 }, { "epoch": 0.8500582315006344, "grad_norm": 1.3628413864939442, "learning_rate": 5.781330393218764e-08, "loss": 0.1698, "step": 48903 }, { "epoch": 0.8500756140381373, "grad_norm": 1.9019916481229977, "learning_rate": 5.7800165081482556e-08, "loss": 0.1736, "step": 48904 }, { "epoch": 0.8500929965756401, "grad_norm": 2.350766702206273, "learning_rate": 5.7787027632347826e-08, "loss": 0.1999, "step": 48905 }, { "epoch": 0.8501103791131429, "grad_norm": 1.6905630454147609, "learning_rate": 5.77738915848251e-08, "loss": 0.1518, "step": 48906 }, { "epoch": 0.8501277616506457, "grad_norm": 2.4357582799748756, "learning_rate": 5.776075693895599e-08, "loss": 0.2133, "step": 48907 }, { "epoch": 0.8501451441881486, "grad_norm": 1.6743313638039508, "learning_rate": 5.7747623694781975e-08, "loss": 0.1863, "step": 48908 }, { "epoch": 0.8501625267256514, "grad_norm": 1.2495726524216315, "learning_rate": 5.773449185234502e-08, "loss": 0.1526, "step": 48909 }, { "epoch": 0.8501799092631542, "grad_norm": 2.317242926898829, "learning_rate": 5.77213614116866e-08, "loss": 0.26, "step": 48910 }, { "epoch": 0.8501972918006571, "grad_norm": 1.931861836583529, "learning_rate": 5.770823237284822e-08, "loss": 0.217, "step": 48911 }, { "epoch": 0.8502146743381599, "grad_norm": 6.706599278405593, "learning_rate": 5.769510473587147e-08, "loss": 0.2258, "step": 48912 }, { "epoch": 0.8502320568756627, "grad_norm": 2.6672170517317775, "learning_rate": 5.76819785007982e-08, "loss": 0.2561, "step": 48913 }, { "epoch": 0.8502494394131656, "grad_norm": 2.5238534794569376, "learning_rate": 5.7668853667669824e-08, "loss": 0.2254, "step": 48914 }, { "epoch": 0.8502668219506684, "grad_norm": 1.7678978808686456, "learning_rate": 5.765573023652803e-08, "loss": 0.1751, "step": 48915 }, { "epoch": 0.8502842044881712, "grad_norm": 1.17977496079992, "learning_rate": 5.7642608207414344e-08, "loss": 0.2448, "step": 48916 }, { "epoch": 0.850301587025674, "grad_norm": 1.9745850800606342, "learning_rate": 5.76294875803704e-08, "loss": 0.2173, "step": 48917 }, { "epoch": 0.8503189695631769, "grad_norm": 1.3669284772517285, "learning_rate": 5.761636835543776e-08, "loss": 0.1159, "step": 48918 }, { "epoch": 0.8503363521006797, "grad_norm": 1.2301044300369546, "learning_rate": 5.760325053265802e-08, "loss": 0.1692, "step": 48919 }, { "epoch": 0.8503537346381825, "grad_norm": 1.1429353466389163, "learning_rate": 5.759013411207264e-08, "loss": 0.2048, "step": 48920 }, { "epoch": 0.8503711171756853, "grad_norm": 0.9638933358855327, "learning_rate": 5.7577019093723424e-08, "loss": 0.1435, "step": 48921 }, { "epoch": 0.8503884997131881, "grad_norm": 1.5142415921446803, "learning_rate": 5.756390547765183e-08, "loss": 0.1344, "step": 48922 }, { "epoch": 0.8504058822506909, "grad_norm": 1.1592737312048482, "learning_rate": 5.755079326389944e-08, "loss": 0.2594, "step": 48923 }, { "epoch": 0.8504232647881937, "grad_norm": 1.8505057724981477, "learning_rate": 5.7537682452507784e-08, "loss": 0.2152, "step": 48924 }, { "epoch": 0.8504406473256966, "grad_norm": 1.442188528302314, "learning_rate": 5.7524573043518376e-08, "loss": 0.1755, "step": 48925 }, { "epoch": 0.8504580298631994, "grad_norm": 1.1957886219549458, "learning_rate": 5.751146503697285e-08, "loss": 0.4215, "step": 48926 }, { "epoch": 0.8504754124007022, "grad_norm": 2.3845410335084227, "learning_rate": 5.749835843291273e-08, "loss": 0.2429, "step": 48927 }, { "epoch": 0.8504927949382051, "grad_norm": 1.55916385341934, "learning_rate": 5.7485253231379424e-08, "loss": 0.2807, "step": 48928 }, { "epoch": 0.8505101774757079, "grad_norm": 1.0105786195181972, "learning_rate": 5.74721494324148e-08, "loss": 0.1192, "step": 48929 }, { "epoch": 0.8505275600132107, "grad_norm": 1.6332486998779998, "learning_rate": 5.745904703606008e-08, "loss": 0.2456, "step": 48930 }, { "epoch": 0.8505449425507136, "grad_norm": 1.6357520908438705, "learning_rate": 5.744594604235681e-08, "loss": 0.1618, "step": 48931 }, { "epoch": 0.8505623250882164, "grad_norm": 1.19499419516628, "learning_rate": 5.743284645134666e-08, "loss": 0.2182, "step": 48932 }, { "epoch": 0.8505797076257192, "grad_norm": 1.1675310960490952, "learning_rate": 5.741974826307111e-08, "loss": 0.1521, "step": 48933 }, { "epoch": 0.850597090163222, "grad_norm": 1.2678969186415208, "learning_rate": 5.740665147757168e-08, "loss": 0.1327, "step": 48934 }, { "epoch": 0.8506144727007249, "grad_norm": 1.1829102036049741, "learning_rate": 5.739355609488983e-08, "loss": 0.1569, "step": 48935 }, { "epoch": 0.8506318552382277, "grad_norm": 1.7721147499957741, "learning_rate": 5.7380462115067087e-08, "loss": 0.1999, "step": 48936 }, { "epoch": 0.8506492377757305, "grad_norm": 1.3049209105410302, "learning_rate": 5.7367369538144915e-08, "loss": 0.2202, "step": 48937 }, { "epoch": 0.8506666203132334, "grad_norm": 1.80149318221986, "learning_rate": 5.7354278364164896e-08, "loss": 0.1368, "step": 48938 }, { "epoch": 0.8506840028507362, "grad_norm": 3.0373528768538365, "learning_rate": 5.734118859316839e-08, "loss": 0.2302, "step": 48939 }, { "epoch": 0.850701385388239, "grad_norm": 2.346193165033572, "learning_rate": 5.732810022519707e-08, "loss": 0.1997, "step": 48940 }, { "epoch": 0.8507187679257417, "grad_norm": 1.6369899326510102, "learning_rate": 5.731501326029231e-08, "loss": 0.2063, "step": 48941 }, { "epoch": 0.8507361504632446, "grad_norm": 1.5902444413768009, "learning_rate": 5.7301927698495733e-08, "loss": 0.1497, "step": 48942 }, { "epoch": 0.8507535330007474, "grad_norm": 0.9217729166239698, "learning_rate": 5.728884353984842e-08, "loss": 0.1563, "step": 48943 }, { "epoch": 0.8507709155382502, "grad_norm": 1.7930847475459795, "learning_rate": 5.727576078439228e-08, "loss": 0.1533, "step": 48944 }, { "epoch": 0.8507882980757531, "grad_norm": 1.4190376447241335, "learning_rate": 5.726267943216856e-08, "loss": 0.1109, "step": 48945 }, { "epoch": 0.8508056806132559, "grad_norm": 2.045156240909835, "learning_rate": 5.724959948321884e-08, "loss": 0.222, "step": 48946 }, { "epoch": 0.8508230631507587, "grad_norm": 1.0664847916018723, "learning_rate": 5.7236520937584353e-08, "loss": 0.1289, "step": 48947 }, { "epoch": 0.8508404456882616, "grad_norm": 1.3491679855572978, "learning_rate": 5.7223443795306916e-08, "loss": 0.146, "step": 48948 }, { "epoch": 0.8508578282257644, "grad_norm": 1.6269792844379738, "learning_rate": 5.7210368056427646e-08, "loss": 0.153, "step": 48949 }, { "epoch": 0.8508752107632672, "grad_norm": 1.4319692466231682, "learning_rate": 5.719729372098803e-08, "loss": 0.1822, "step": 48950 }, { "epoch": 0.85089259330077, "grad_norm": 2.3459840883057934, "learning_rate": 5.718422078902968e-08, "loss": 0.1637, "step": 48951 }, { "epoch": 0.8509099758382729, "grad_norm": 0.9998672580959498, "learning_rate": 5.717114926059391e-08, "loss": 0.1699, "step": 48952 }, { "epoch": 0.8509273583757757, "grad_norm": 1.0320162976302827, "learning_rate": 5.7158079135722237e-08, "loss": 0.1359, "step": 48953 }, { "epoch": 0.8509447409132785, "grad_norm": 3.1286776383111117, "learning_rate": 5.7145010414455964e-08, "loss": 0.1856, "step": 48954 }, { "epoch": 0.8509621234507814, "grad_norm": 1.999030373486412, "learning_rate": 5.713194309683661e-08, "loss": 0.1759, "step": 48955 }, { "epoch": 0.8509795059882842, "grad_norm": 1.0775361154510736, "learning_rate": 5.711887718290559e-08, "loss": 0.1371, "step": 48956 }, { "epoch": 0.850996888525787, "grad_norm": 1.4115227902356482, "learning_rate": 5.710581267270426e-08, "loss": 0.1133, "step": 48957 }, { "epoch": 0.8510142710632899, "grad_norm": 1.571408178664852, "learning_rate": 5.7092749566273914e-08, "loss": 0.1631, "step": 48958 }, { "epoch": 0.8510316536007927, "grad_norm": 2.0294500848940613, "learning_rate": 5.707968786365624e-08, "loss": 0.1006, "step": 48959 }, { "epoch": 0.8510490361382955, "grad_norm": 1.1634744107560888, "learning_rate": 5.70666275648925e-08, "loss": 0.1504, "step": 48960 }, { "epoch": 0.8510664186757982, "grad_norm": 1.1614259357967902, "learning_rate": 5.705356867002414e-08, "loss": 0.1333, "step": 48961 }, { "epoch": 0.8510838012133011, "grad_norm": 1.0238650838312073, "learning_rate": 5.7040511179092366e-08, "loss": 0.1329, "step": 48962 }, { "epoch": 0.8511011837508039, "grad_norm": 1.521971401279949, "learning_rate": 5.702745509213874e-08, "loss": 0.1693, "step": 48963 }, { "epoch": 0.8511185662883067, "grad_norm": 0.8900901360130126, "learning_rate": 5.701440040920458e-08, "loss": 0.2523, "step": 48964 }, { "epoch": 0.8511359488258096, "grad_norm": 1.809421014392395, "learning_rate": 5.7001347130331335e-08, "loss": 0.1664, "step": 48965 }, { "epoch": 0.8511533313633124, "grad_norm": 1.226270196821611, "learning_rate": 5.69882952555602e-08, "loss": 0.182, "step": 48966 }, { "epoch": 0.8511707139008152, "grad_norm": 1.2418391073016637, "learning_rate": 5.6975244784932876e-08, "loss": 0.1201, "step": 48967 }, { "epoch": 0.851188096438318, "grad_norm": 1.418454299535603, "learning_rate": 5.696219571849037e-08, "loss": 0.1617, "step": 48968 }, { "epoch": 0.8512054789758209, "grad_norm": 1.8310381158129492, "learning_rate": 5.694914805627421e-08, "loss": 0.1621, "step": 48969 }, { "epoch": 0.8512228615133237, "grad_norm": 1.440672223473275, "learning_rate": 5.6936101798325645e-08, "loss": 0.2105, "step": 48970 }, { "epoch": 0.8512402440508265, "grad_norm": 1.7243967985224145, "learning_rate": 5.692305694468624e-08, "loss": 0.3076, "step": 48971 }, { "epoch": 0.8512576265883294, "grad_norm": 1.974248832706276, "learning_rate": 5.6910013495397144e-08, "loss": 0.3016, "step": 48972 }, { "epoch": 0.8512750091258322, "grad_norm": 1.3042263324844368, "learning_rate": 5.689697145049982e-08, "loss": 0.1465, "step": 48973 }, { "epoch": 0.851292391663335, "grad_norm": 1.529939197480086, "learning_rate": 5.688393081003556e-08, "loss": 0.1311, "step": 48974 }, { "epoch": 0.8513097742008379, "grad_norm": 0.6057013921987544, "learning_rate": 5.687089157404562e-08, "loss": 0.1554, "step": 48975 }, { "epoch": 0.8513271567383407, "grad_norm": 1.4051007037344596, "learning_rate": 5.685785374257146e-08, "loss": 0.1646, "step": 48976 }, { "epoch": 0.8513445392758435, "grad_norm": 0.8969511027503256, "learning_rate": 5.684481731565433e-08, "loss": 0.1831, "step": 48977 }, { "epoch": 0.8513619218133464, "grad_norm": 1.5155771252358776, "learning_rate": 5.683178229333546e-08, "loss": 0.1922, "step": 48978 }, { "epoch": 0.8513793043508492, "grad_norm": 0.8126080183700182, "learning_rate": 5.681874867565634e-08, "loss": 0.1608, "step": 48979 }, { "epoch": 0.851396686888352, "grad_norm": 4.9105550679125844, "learning_rate": 5.6805716462658304e-08, "loss": 0.2359, "step": 48980 }, { "epoch": 0.8514140694258547, "grad_norm": 1.1421111159716966, "learning_rate": 5.679268565438239e-08, "loss": 0.1526, "step": 48981 }, { "epoch": 0.8514314519633576, "grad_norm": 1.0462467088032783, "learning_rate": 5.677965625087017e-08, "loss": 0.072, "step": 48982 }, { "epoch": 0.8514488345008604, "grad_norm": 1.3446868547621422, "learning_rate": 5.676662825216283e-08, "loss": 0.1674, "step": 48983 }, { "epoch": 0.8514662170383632, "grad_norm": 1.2032041667854685, "learning_rate": 5.675360165830162e-08, "loss": 0.2265, "step": 48984 }, { "epoch": 0.8514835995758661, "grad_norm": 1.8015878702134742, "learning_rate": 5.674057646932795e-08, "loss": 0.1961, "step": 48985 }, { "epoch": 0.8515009821133689, "grad_norm": 1.167600387384701, "learning_rate": 5.6727552685283006e-08, "loss": 0.1586, "step": 48986 }, { "epoch": 0.8515183646508717, "grad_norm": 1.4144988574763975, "learning_rate": 5.671453030620815e-08, "loss": 0.1647, "step": 48987 }, { "epoch": 0.8515357471883745, "grad_norm": 4.68736842225529, "learning_rate": 5.670150933214457e-08, "loss": 0.3072, "step": 48988 }, { "epoch": 0.8515531297258774, "grad_norm": 1.1182799341309169, "learning_rate": 5.668848976313345e-08, "loss": 0.1955, "step": 48989 }, { "epoch": 0.8515705122633802, "grad_norm": 1.6857983576190452, "learning_rate": 5.6675471599216264e-08, "loss": 0.1912, "step": 48990 }, { "epoch": 0.851587894800883, "grad_norm": 1.7865363142757609, "learning_rate": 5.666245484043425e-08, "loss": 0.1877, "step": 48991 }, { "epoch": 0.8516052773383859, "grad_norm": 2.8239114800230642, "learning_rate": 5.664943948682866e-08, "loss": 0.1638, "step": 48992 }, { "epoch": 0.8516226598758887, "grad_norm": 1.2270226905024566, "learning_rate": 5.66364255384405e-08, "loss": 0.1073, "step": 48993 }, { "epoch": 0.8516400424133915, "grad_norm": 1.4613078393117989, "learning_rate": 5.662341299531126e-08, "loss": 0.1469, "step": 48994 }, { "epoch": 0.8516574249508944, "grad_norm": 0.9894248405269944, "learning_rate": 5.661040185748217e-08, "loss": 0.1135, "step": 48995 }, { "epoch": 0.8516748074883972, "grad_norm": 1.6647957310842902, "learning_rate": 5.659739212499448e-08, "loss": 0.1721, "step": 48996 }, { "epoch": 0.8516921900259, "grad_norm": 1.0257856539948265, "learning_rate": 5.658438379788921e-08, "loss": 0.1288, "step": 48997 }, { "epoch": 0.8517095725634029, "grad_norm": 2.3427953901107794, "learning_rate": 5.657137687620783e-08, "loss": 0.1733, "step": 48998 }, { "epoch": 0.8517269551009057, "grad_norm": 1.4431378192734452, "learning_rate": 5.655837135999164e-08, "loss": 0.2212, "step": 48999 }, { "epoch": 0.8517443376384084, "grad_norm": 1.1063339711950038, "learning_rate": 5.654536724928161e-08, "loss": 0.1918, "step": 49000 }, { "epoch": 0.8517617201759112, "grad_norm": 1.2029916904023472, "learning_rate": 5.653236454411897e-08, "loss": 0.1707, "step": 49001 }, { "epoch": 0.8517791027134141, "grad_norm": 1.408863364319715, "learning_rate": 5.6519363244545093e-08, "loss": 0.1965, "step": 49002 }, { "epoch": 0.8517964852509169, "grad_norm": 1.3075396221937292, "learning_rate": 5.650636335060111e-08, "loss": 0.3069, "step": 49003 }, { "epoch": 0.8518138677884197, "grad_norm": 1.8588091676636576, "learning_rate": 5.649336486232825e-08, "loss": 0.2032, "step": 49004 }, { "epoch": 0.8518312503259226, "grad_norm": 0.95567836794077, "learning_rate": 5.6480367779767726e-08, "loss": 0.182, "step": 49005 }, { "epoch": 0.8518486328634254, "grad_norm": 1.258283865185959, "learning_rate": 5.646737210296065e-08, "loss": 0.1799, "step": 49006 }, { "epoch": 0.8518660154009282, "grad_norm": 4.703065554564779, "learning_rate": 5.6454377831948285e-08, "loss": 0.2193, "step": 49007 }, { "epoch": 0.851883397938431, "grad_norm": 1.1225700121236963, "learning_rate": 5.6441384966771754e-08, "loss": 0.1649, "step": 49008 }, { "epoch": 0.8519007804759339, "grad_norm": 1.166417951327108, "learning_rate": 5.642839350747225e-08, "loss": 0.1508, "step": 49009 }, { "epoch": 0.8519181630134367, "grad_norm": 0.8748259423550949, "learning_rate": 5.641540345409101e-08, "loss": 0.117, "step": 49010 }, { "epoch": 0.8519355455509395, "grad_norm": 1.561656647888208, "learning_rate": 5.64024148066693e-08, "loss": 0.1829, "step": 49011 }, { "epoch": 0.8519529280884424, "grad_norm": 1.9161200552910258, "learning_rate": 5.6389427565248005e-08, "loss": 0.1451, "step": 49012 }, { "epoch": 0.8519703106259452, "grad_norm": 1.984945290022564, "learning_rate": 5.63764417298685e-08, "loss": 0.1731, "step": 49013 }, { "epoch": 0.851987693163448, "grad_norm": 1.2760311902815562, "learning_rate": 5.63634573005719e-08, "loss": 0.1299, "step": 49014 }, { "epoch": 0.8520050757009509, "grad_norm": 1.0642573726894606, "learning_rate": 5.6350474277399363e-08, "loss": 0.1425, "step": 49015 }, { "epoch": 0.8520224582384537, "grad_norm": 1.5180257860030166, "learning_rate": 5.6337492660392005e-08, "loss": 0.2297, "step": 49016 }, { "epoch": 0.8520398407759565, "grad_norm": 1.2989636217560208, "learning_rate": 5.6324512449590914e-08, "loss": 0.2154, "step": 49017 }, { "epoch": 0.8520572233134593, "grad_norm": 2.0226242778659977, "learning_rate": 5.631153364503749e-08, "loss": 0.2575, "step": 49018 }, { "epoch": 0.8520746058509622, "grad_norm": 1.0010543356862944, "learning_rate": 5.62985562467726e-08, "loss": 0.1383, "step": 49019 }, { "epoch": 0.8520919883884649, "grad_norm": 1.637745732269356, "learning_rate": 5.628558025483737e-08, "loss": 0.2244, "step": 49020 }, { "epoch": 0.8521093709259677, "grad_norm": 1.118120876639865, "learning_rate": 5.627260566927311e-08, "loss": 0.2032, "step": 49021 }, { "epoch": 0.8521267534634706, "grad_norm": 1.8167985127627029, "learning_rate": 5.6259632490120836e-08, "loss": 0.3622, "step": 49022 }, { "epoch": 0.8521441360009734, "grad_norm": 1.5050769872027405, "learning_rate": 5.624666071742174e-08, "loss": 0.194, "step": 49023 }, { "epoch": 0.8521615185384762, "grad_norm": 1.4336924929836303, "learning_rate": 5.623369035121689e-08, "loss": 0.1377, "step": 49024 }, { "epoch": 0.852178901075979, "grad_norm": 1.173236830792344, "learning_rate": 5.622072139154738e-08, "loss": 0.2345, "step": 49025 }, { "epoch": 0.8521962836134819, "grad_norm": 0.78678484270326, "learning_rate": 5.620775383845428e-08, "loss": 0.1534, "step": 49026 }, { "epoch": 0.8522136661509847, "grad_norm": 1.8336084281238654, "learning_rate": 5.619478769197877e-08, "loss": 0.1275, "step": 49027 }, { "epoch": 0.8522310486884875, "grad_norm": 0.9905324982302497, "learning_rate": 5.618182295216184e-08, "loss": 0.1705, "step": 49028 }, { "epoch": 0.8522484312259904, "grad_norm": 1.2285509166810695, "learning_rate": 5.6168859619044715e-08, "loss": 0.2323, "step": 49029 }, { "epoch": 0.8522658137634932, "grad_norm": 1.6766583228510037, "learning_rate": 5.61558976926686e-08, "loss": 0.1454, "step": 49030 }, { "epoch": 0.852283196300996, "grad_norm": 1.9579008191251315, "learning_rate": 5.614293717307422e-08, "loss": 0.2449, "step": 49031 }, { "epoch": 0.8523005788384989, "grad_norm": 1.9594146397166297, "learning_rate": 5.6129978060302783e-08, "loss": 0.1988, "step": 49032 }, { "epoch": 0.8523179613760017, "grad_norm": 2.977805504929577, "learning_rate": 5.6117020354395526e-08, "loss": 0.1877, "step": 49033 }, { "epoch": 0.8523353439135045, "grad_norm": 2.532490041748809, "learning_rate": 5.6104064055393365e-08, "loss": 0.2412, "step": 49034 }, { "epoch": 0.8523527264510073, "grad_norm": 0.9893251669654028, "learning_rate": 5.609110916333748e-08, "loss": 0.1288, "step": 49035 }, { "epoch": 0.8523701089885102, "grad_norm": 0.8687844260302279, "learning_rate": 5.6078155678268793e-08, "loss": 0.1963, "step": 49036 }, { "epoch": 0.852387491526013, "grad_norm": 2.5488284378555166, "learning_rate": 5.6065203600228485e-08, "loss": 0.1722, "step": 49037 }, { "epoch": 0.8524048740635158, "grad_norm": 1.9919113773962491, "learning_rate": 5.6052252929257525e-08, "loss": 0.1091, "step": 49038 }, { "epoch": 0.8524222566010187, "grad_norm": 1.3158319441969475, "learning_rate": 5.6039303665397e-08, "loss": 0.2842, "step": 49039 }, { "epoch": 0.8524396391385214, "grad_norm": 1.263263993403968, "learning_rate": 5.602635580868781e-08, "loss": 0.1875, "step": 49040 }, { "epoch": 0.8524570216760242, "grad_norm": 2.99866715827941, "learning_rate": 5.601340935917126e-08, "loss": 0.1923, "step": 49041 }, { "epoch": 0.852474404213527, "grad_norm": 0.866402277174721, "learning_rate": 5.600046431688826e-08, "loss": 0.1834, "step": 49042 }, { "epoch": 0.8524917867510299, "grad_norm": 1.4420974605537866, "learning_rate": 5.5987520681879844e-08, "loss": 0.207, "step": 49043 }, { "epoch": 0.8525091692885327, "grad_norm": 2.4429414649725083, "learning_rate": 5.5974578454187015e-08, "loss": 0.2244, "step": 49044 }, { "epoch": 0.8525265518260355, "grad_norm": 1.9473121807038893, "learning_rate": 5.596163763385076e-08, "loss": 0.2051, "step": 49045 }, { "epoch": 0.8525439343635384, "grad_norm": 1.051247652896751, "learning_rate": 5.594869822091219e-08, "loss": 0.1304, "step": 49046 }, { "epoch": 0.8525613169010412, "grad_norm": 1.1471220757536016, "learning_rate": 5.5935760215412133e-08, "loss": 0.2346, "step": 49047 }, { "epoch": 0.852578699438544, "grad_norm": 1.677449167311139, "learning_rate": 5.592282361739187e-08, "loss": 0.1449, "step": 49048 }, { "epoch": 0.8525960819760469, "grad_norm": 1.3209961812404198, "learning_rate": 5.5909888426892317e-08, "loss": 0.2293, "step": 49049 }, { "epoch": 0.8526134645135497, "grad_norm": 1.510355092869322, "learning_rate": 5.589695464395433e-08, "loss": 0.2719, "step": 49050 }, { "epoch": 0.8526308470510525, "grad_norm": 0.9192631977369076, "learning_rate": 5.5884022268618936e-08, "loss": 0.1717, "step": 49051 }, { "epoch": 0.8526482295885554, "grad_norm": 1.3946421259022477, "learning_rate": 5.587109130092721e-08, "loss": 0.1348, "step": 49052 }, { "epoch": 0.8526656121260582, "grad_norm": 3.4412064631172337, "learning_rate": 5.585816174092017e-08, "loss": 0.2597, "step": 49053 }, { "epoch": 0.852682994663561, "grad_norm": 1.8573549280215682, "learning_rate": 5.5845233588638684e-08, "loss": 0.2218, "step": 49054 }, { "epoch": 0.8527003772010638, "grad_norm": 1.0848309251226944, "learning_rate": 5.5832306844123824e-08, "loss": 0.263, "step": 49055 }, { "epoch": 0.8527177597385667, "grad_norm": 1.375458225401197, "learning_rate": 5.5819381507416494e-08, "loss": 0.2163, "step": 49056 }, { "epoch": 0.8527351422760695, "grad_norm": 2.0390460497194898, "learning_rate": 5.5806457578557676e-08, "loss": 0.1763, "step": 49057 }, { "epoch": 0.8527525248135723, "grad_norm": 1.7128091639937557, "learning_rate": 5.5793535057588384e-08, "loss": 0.2704, "step": 49058 }, { "epoch": 0.8527699073510752, "grad_norm": 1.5640430400281111, "learning_rate": 5.5780613944549416e-08, "loss": 0.1855, "step": 49059 }, { "epoch": 0.8527872898885779, "grad_norm": 1.0694497928319038, "learning_rate": 5.576769423948191e-08, "loss": 0.1149, "step": 49060 }, { "epoch": 0.8528046724260807, "grad_norm": 1.647382727464384, "learning_rate": 5.575477594242678e-08, "loss": 0.2285, "step": 49061 }, { "epoch": 0.8528220549635835, "grad_norm": 1.0155515952498155, "learning_rate": 5.574185905342493e-08, "loss": 0.179, "step": 49062 }, { "epoch": 0.8528394375010864, "grad_norm": 1.585713884972056, "learning_rate": 5.572894357251734e-08, "loss": 0.1687, "step": 49063 }, { "epoch": 0.8528568200385892, "grad_norm": 1.1518907048547171, "learning_rate": 5.571602949974485e-08, "loss": 0.1642, "step": 49064 }, { "epoch": 0.852874202576092, "grad_norm": 1.1054728238727858, "learning_rate": 5.57031168351485e-08, "loss": 0.1442, "step": 49065 }, { "epoch": 0.8528915851135949, "grad_norm": 2.3063726947795056, "learning_rate": 5.569020557876919e-08, "loss": 0.2164, "step": 49066 }, { "epoch": 0.8529089676510977, "grad_norm": 1.676849130111994, "learning_rate": 5.567729573064772e-08, "loss": 0.19, "step": 49067 }, { "epoch": 0.8529263501886005, "grad_norm": 1.0329388449939545, "learning_rate": 5.5664387290825344e-08, "loss": 0.1486, "step": 49068 }, { "epoch": 0.8529437327261034, "grad_norm": 2.3788215982873027, "learning_rate": 5.565148025934263e-08, "loss": 0.1856, "step": 49069 }, { "epoch": 0.8529611152636062, "grad_norm": 1.078627502886519, "learning_rate": 5.563857463624044e-08, "loss": 0.1608, "step": 49070 }, { "epoch": 0.852978497801109, "grad_norm": 0.9589045981566848, "learning_rate": 5.5625670421560025e-08, "loss": 0.1157, "step": 49071 }, { "epoch": 0.8529958803386118, "grad_norm": 0.967143746066785, "learning_rate": 5.561276761534212e-08, "loss": 0.2231, "step": 49072 }, { "epoch": 0.8530132628761147, "grad_norm": 0.8742004321508303, "learning_rate": 5.5599866217627525e-08, "loss": 0.2142, "step": 49073 }, { "epoch": 0.8530306454136175, "grad_norm": 1.4943916047039558, "learning_rate": 5.558696622845727e-08, "loss": 0.1526, "step": 49074 }, { "epoch": 0.8530480279511203, "grad_norm": 1.0985270474175495, "learning_rate": 5.5574067647872205e-08, "loss": 0.1757, "step": 49075 }, { "epoch": 0.8530654104886232, "grad_norm": 1.1617603802249656, "learning_rate": 5.5561170475913136e-08, "loss": 0.1966, "step": 49076 }, { "epoch": 0.853082793026126, "grad_norm": 1.7782940000437901, "learning_rate": 5.5548274712621025e-08, "loss": 0.1712, "step": 49077 }, { "epoch": 0.8531001755636288, "grad_norm": 1.662198632286462, "learning_rate": 5.553538035803662e-08, "loss": 0.1385, "step": 49078 }, { "epoch": 0.8531175581011317, "grad_norm": 1.385626871642816, "learning_rate": 5.5522487412201e-08, "loss": 0.1131, "step": 49079 }, { "epoch": 0.8531349406386344, "grad_norm": 1.2403111457457818, "learning_rate": 5.5509595875154916e-08, "loss": 0.1731, "step": 49080 }, { "epoch": 0.8531523231761372, "grad_norm": 1.010669047256314, "learning_rate": 5.549670574693932e-08, "loss": 0.1793, "step": 49081 }, { "epoch": 0.85316970571364, "grad_norm": 1.255418565343543, "learning_rate": 5.5483817027594746e-08, "loss": 0.2281, "step": 49082 }, { "epoch": 0.8531870882511429, "grad_norm": 2.43293560462727, "learning_rate": 5.547092971716244e-08, "loss": 0.2961, "step": 49083 }, { "epoch": 0.8532044707886457, "grad_norm": 1.7636277582639657, "learning_rate": 5.545804381568303e-08, "loss": 0.2254, "step": 49084 }, { "epoch": 0.8532218533261485, "grad_norm": 0.8895614686971625, "learning_rate": 5.544515932319743e-08, "loss": 0.1584, "step": 49085 }, { "epoch": 0.8532392358636514, "grad_norm": 1.2780197413378749, "learning_rate": 5.543227623974639e-08, "loss": 0.196, "step": 49086 }, { "epoch": 0.8532566184011542, "grad_norm": 1.0374103948192932, "learning_rate": 5.5419394565371035e-08, "loss": 0.0729, "step": 49087 }, { "epoch": 0.853274000938657, "grad_norm": 3.1084946017609343, "learning_rate": 5.540651430011184e-08, "loss": 0.189, "step": 49088 }, { "epoch": 0.8532913834761598, "grad_norm": 1.9945070369880689, "learning_rate": 5.5393635444009766e-08, "loss": 0.2599, "step": 49089 }, { "epoch": 0.8533087660136627, "grad_norm": 1.5040001441161843, "learning_rate": 5.538075799710551e-08, "loss": 0.1677, "step": 49090 }, { "epoch": 0.8533261485511655, "grad_norm": 1.4516915532442216, "learning_rate": 5.5367881959440144e-08, "loss": 0.0927, "step": 49091 }, { "epoch": 0.8533435310886683, "grad_norm": 1.155623114636844, "learning_rate": 5.5355007331054305e-08, "loss": 0.189, "step": 49092 }, { "epoch": 0.8533609136261712, "grad_norm": 1.3470304905911963, "learning_rate": 5.53421341119889e-08, "loss": 0.201, "step": 49093 }, { "epoch": 0.853378296163674, "grad_norm": 1.3243556855507312, "learning_rate": 5.532926230228463e-08, "loss": 0.1781, "step": 49094 }, { "epoch": 0.8533956787011768, "grad_norm": 1.5083725209238634, "learning_rate": 5.531639190198234e-08, "loss": 0.1153, "step": 49095 }, { "epoch": 0.8534130612386797, "grad_norm": 2.362034802830705, "learning_rate": 5.530352291112284e-08, "loss": 0.2197, "step": 49096 }, { "epoch": 0.8534304437761825, "grad_norm": 1.7317483312705282, "learning_rate": 5.5290655329746864e-08, "loss": 0.1423, "step": 49097 }, { "epoch": 0.8534478263136853, "grad_norm": 1.1906260455011908, "learning_rate": 5.527778915789516e-08, "loss": 0.1527, "step": 49098 }, { "epoch": 0.8534652088511882, "grad_norm": 1.5951928795227266, "learning_rate": 5.5264924395608645e-08, "loss": 0.1742, "step": 49099 }, { "epoch": 0.8534825913886909, "grad_norm": 1.566702682224461, "learning_rate": 5.525206104292812e-08, "loss": 0.3066, "step": 49100 }, { "epoch": 0.8534999739261937, "grad_norm": 1.3162238731418523, "learning_rate": 5.5239199099894106e-08, "loss": 0.1709, "step": 49101 }, { "epoch": 0.8535173564636965, "grad_norm": 2.2575268345171358, "learning_rate": 5.522633856654757e-08, "loss": 0.1858, "step": 49102 }, { "epoch": 0.8535347390011994, "grad_norm": 1.0884534672757795, "learning_rate": 5.5213479442929254e-08, "loss": 0.1702, "step": 49103 }, { "epoch": 0.8535521215387022, "grad_norm": 1.2346703354609467, "learning_rate": 5.520062172907991e-08, "loss": 0.1422, "step": 49104 }, { "epoch": 0.853569504076205, "grad_norm": 1.7839093498399003, "learning_rate": 5.5187765425040225e-08, "loss": 0.1796, "step": 49105 }, { "epoch": 0.8535868866137079, "grad_norm": 3.6724334050487326, "learning_rate": 5.5174910530851047e-08, "loss": 0.2371, "step": 49106 }, { "epoch": 0.8536042691512107, "grad_norm": 1.2101726553544676, "learning_rate": 5.5162057046553026e-08, "loss": 0.1636, "step": 49107 }, { "epoch": 0.8536216516887135, "grad_norm": 1.185666193049262, "learning_rate": 5.514920497218695e-08, "loss": 0.2005, "step": 49108 }, { "epoch": 0.8536390342262163, "grad_norm": 1.673353081455084, "learning_rate": 5.5136354307793454e-08, "loss": 0.2042, "step": 49109 }, { "epoch": 0.8536564167637192, "grad_norm": 1.5463389065128432, "learning_rate": 5.5123505053413456e-08, "loss": 0.1925, "step": 49110 }, { "epoch": 0.853673799301222, "grad_norm": 1.2006361041123605, "learning_rate": 5.5110657209087584e-08, "loss": 0.1946, "step": 49111 }, { "epoch": 0.8536911818387248, "grad_norm": 1.7504620383443634, "learning_rate": 5.50978107748567e-08, "loss": 0.1775, "step": 49112 }, { "epoch": 0.8537085643762277, "grad_norm": 3.588635929809089, "learning_rate": 5.508496575076116e-08, "loss": 0.1629, "step": 49113 }, { "epoch": 0.8537259469137305, "grad_norm": 1.484092197932226, "learning_rate": 5.507212213684198e-08, "loss": 0.1624, "step": 49114 }, { "epoch": 0.8537433294512333, "grad_norm": 1.429662519759964, "learning_rate": 5.50592799331398e-08, "loss": 0.2085, "step": 49115 }, { "epoch": 0.8537607119887362, "grad_norm": 1.494741412804972, "learning_rate": 5.5046439139695314e-08, "loss": 0.1448, "step": 49116 }, { "epoch": 0.853778094526239, "grad_norm": 1.575618209837072, "learning_rate": 5.5033599756549145e-08, "loss": 0.1598, "step": 49117 }, { "epoch": 0.8537954770637418, "grad_norm": 1.1799460920463574, "learning_rate": 5.5020761783742156e-08, "loss": 0.1418, "step": 49118 }, { "epoch": 0.8538128596012446, "grad_norm": 1.2467334697680708, "learning_rate": 5.500792522131503e-08, "loss": 0.2213, "step": 49119 }, { "epoch": 0.8538302421387474, "grad_norm": 5.820006980432301, "learning_rate": 5.4995090069308245e-08, "loss": 0.2774, "step": 49120 }, { "epoch": 0.8538476246762502, "grad_norm": 1.773174654195228, "learning_rate": 5.4982256327762547e-08, "loss": 0.1611, "step": 49121 }, { "epoch": 0.853865007213753, "grad_norm": 0.9448929087339517, "learning_rate": 5.4969423996718777e-08, "loss": 0.1088, "step": 49122 }, { "epoch": 0.8538823897512559, "grad_norm": 1.1315034898850527, "learning_rate": 5.4956593076217473e-08, "loss": 0.0894, "step": 49123 }, { "epoch": 0.8538997722887587, "grad_norm": 2.1694021688160707, "learning_rate": 5.4943763566299315e-08, "loss": 0.2405, "step": 49124 }, { "epoch": 0.8539171548262615, "grad_norm": 1.0189296835798964, "learning_rate": 5.4930935467005004e-08, "loss": 0.1521, "step": 49125 }, { "epoch": 0.8539345373637643, "grad_norm": 1.525860809115823, "learning_rate": 5.491810877837521e-08, "loss": 0.2247, "step": 49126 }, { "epoch": 0.8539519199012672, "grad_norm": 1.2150270452130152, "learning_rate": 5.490528350045054e-08, "loss": 0.2744, "step": 49127 }, { "epoch": 0.85396930243877, "grad_norm": 2.3791640449836517, "learning_rate": 5.489245963327166e-08, "loss": 0.3154, "step": 49128 }, { "epoch": 0.8539866849762728, "grad_norm": 0.9130025127357093, "learning_rate": 5.48796371768791e-08, "loss": 0.1266, "step": 49129 }, { "epoch": 0.8540040675137757, "grad_norm": 2.434267711986738, "learning_rate": 5.486681613131372e-08, "loss": 0.1898, "step": 49130 }, { "epoch": 0.8540214500512785, "grad_norm": 1.1915964634061784, "learning_rate": 5.4853996496616206e-08, "loss": 0.2112, "step": 49131 }, { "epoch": 0.8540388325887813, "grad_norm": 1.47086364707872, "learning_rate": 5.48411782728268e-08, "loss": 0.2095, "step": 49132 }, { "epoch": 0.8540562151262842, "grad_norm": 4.640528604505711, "learning_rate": 5.482836145998648e-08, "loss": 0.1653, "step": 49133 }, { "epoch": 0.854073597663787, "grad_norm": 1.0620733656484445, "learning_rate": 5.481554605813571e-08, "loss": 0.0995, "step": 49134 }, { "epoch": 0.8540909802012898, "grad_norm": 1.0731981951380751, "learning_rate": 5.480273206731523e-08, "loss": 0.1963, "step": 49135 }, { "epoch": 0.8541083627387926, "grad_norm": 1.280343529749983, "learning_rate": 5.478991948756545e-08, "loss": 0.2771, "step": 49136 }, { "epoch": 0.8541257452762955, "grad_norm": 2.5867885723009785, "learning_rate": 5.477710831892724e-08, "loss": 0.1795, "step": 49137 }, { "epoch": 0.8541431278137983, "grad_norm": 1.1349170830568138, "learning_rate": 5.47642985614411e-08, "loss": 0.3029, "step": 49138 }, { "epoch": 0.854160510351301, "grad_norm": 1.241277764719186, "learning_rate": 5.4751490215147575e-08, "loss": 0.1574, "step": 49139 }, { "epoch": 0.8541778928888039, "grad_norm": 1.8509454065632607, "learning_rate": 5.4738683280087186e-08, "loss": 0.1325, "step": 49140 }, { "epoch": 0.8541952754263067, "grad_norm": 1.2216627585986348, "learning_rate": 5.472587775630072e-08, "loss": 0.1613, "step": 49141 }, { "epoch": 0.8542126579638095, "grad_norm": 1.4657892016276395, "learning_rate": 5.471307364382871e-08, "loss": 0.194, "step": 49142 }, { "epoch": 0.8542300405013123, "grad_norm": 1.0928821877094677, "learning_rate": 5.470027094271168e-08, "loss": 0.12, "step": 49143 }, { "epoch": 0.8542474230388152, "grad_norm": 2.356588761541686, "learning_rate": 5.468746965299026e-08, "loss": 0.3566, "step": 49144 }, { "epoch": 0.854264805576318, "grad_norm": 0.8983868952651374, "learning_rate": 5.467466977470497e-08, "loss": 0.1368, "step": 49145 }, { "epoch": 0.8542821881138208, "grad_norm": 1.2108612733400173, "learning_rate": 5.4661871307896455e-08, "loss": 0.1503, "step": 49146 }, { "epoch": 0.8542995706513237, "grad_norm": 1.6525834911658155, "learning_rate": 5.4649074252605175e-08, "loss": 0.2303, "step": 49147 }, { "epoch": 0.8543169531888265, "grad_norm": 1.786806700881573, "learning_rate": 5.463627860887171e-08, "loss": 0.1181, "step": 49148 }, { "epoch": 0.8543343357263293, "grad_norm": 1.0840462987585062, "learning_rate": 5.462348437673675e-08, "loss": 0.1648, "step": 49149 }, { "epoch": 0.8543517182638322, "grad_norm": 1.276232272412422, "learning_rate": 5.4610691556240815e-08, "loss": 0.2447, "step": 49150 }, { "epoch": 0.854369100801335, "grad_norm": 1.5516969547044108, "learning_rate": 5.4597900147424217e-08, "loss": 0.1604, "step": 49151 }, { "epoch": 0.8543864833388378, "grad_norm": 1.3252268799442566, "learning_rate": 5.45851101503278e-08, "loss": 0.1566, "step": 49152 }, { "epoch": 0.8544038658763407, "grad_norm": 1.9161900354197672, "learning_rate": 5.457232156499192e-08, "loss": 0.2432, "step": 49153 }, { "epoch": 0.8544212484138435, "grad_norm": 1.0496416596678424, "learning_rate": 5.455953439145722e-08, "loss": 0.2383, "step": 49154 }, { "epoch": 0.8544386309513463, "grad_norm": 2.0498503145420153, "learning_rate": 5.4546748629764163e-08, "loss": 0.3139, "step": 49155 }, { "epoch": 0.8544560134888491, "grad_norm": 1.2285790151972698, "learning_rate": 5.453396427995327e-08, "loss": 0.1759, "step": 49156 }, { "epoch": 0.854473396026352, "grad_norm": 1.174023073639028, "learning_rate": 5.452118134206507e-08, "loss": 0.1502, "step": 49157 }, { "epoch": 0.8544907785638548, "grad_norm": 1.2008574005976937, "learning_rate": 5.450839981614014e-08, "loss": 0.1887, "step": 49158 }, { "epoch": 0.8545081611013575, "grad_norm": 1.3699291110556593, "learning_rate": 5.4495619702218776e-08, "loss": 0.1745, "step": 49159 }, { "epoch": 0.8545255436388604, "grad_norm": 2.973857754291559, "learning_rate": 5.448284100034184e-08, "loss": 0.1378, "step": 49160 }, { "epoch": 0.8545429261763632, "grad_norm": 1.13601179117189, "learning_rate": 5.4470063710549576e-08, "loss": 0.1621, "step": 49161 }, { "epoch": 0.854560308713866, "grad_norm": 0.9968278430059301, "learning_rate": 5.445728783288256e-08, "loss": 0.2293, "step": 49162 }, { "epoch": 0.8545776912513688, "grad_norm": 1.7773826605538987, "learning_rate": 5.444451336738126e-08, "loss": 0.3857, "step": 49163 }, { "epoch": 0.8545950737888717, "grad_norm": 1.388602342113447, "learning_rate": 5.4431740314086206e-08, "loss": 0.2428, "step": 49164 }, { "epoch": 0.8546124563263745, "grad_norm": 2.4301270316866637, "learning_rate": 5.441896867303786e-08, "loss": 0.2043, "step": 49165 }, { "epoch": 0.8546298388638773, "grad_norm": 1.9761682644575442, "learning_rate": 5.440619844427668e-08, "loss": 0.165, "step": 49166 }, { "epoch": 0.8546472214013802, "grad_norm": 1.8565132043749364, "learning_rate": 5.43934296278431e-08, "loss": 0.1336, "step": 49167 }, { "epoch": 0.854664603938883, "grad_norm": 1.1545104538422424, "learning_rate": 5.438066222377774e-08, "loss": 0.1358, "step": 49168 }, { "epoch": 0.8546819864763858, "grad_norm": 2.9113968289382854, "learning_rate": 5.436789623212107e-08, "loss": 0.3013, "step": 49169 }, { "epoch": 0.8546993690138887, "grad_norm": 0.8769591721407005, "learning_rate": 5.4355131652913336e-08, "loss": 0.2113, "step": 49170 }, { "epoch": 0.8547167515513915, "grad_norm": 1.4311786644147595, "learning_rate": 5.4342368486195055e-08, "loss": 0.2083, "step": 49171 }, { "epoch": 0.8547341340888943, "grad_norm": 1.934002185105293, "learning_rate": 5.432960673200682e-08, "loss": 0.1618, "step": 49172 }, { "epoch": 0.8547515166263971, "grad_norm": 1.0434610818974333, "learning_rate": 5.431684639038903e-08, "loss": 0.1746, "step": 49173 }, { "epoch": 0.8547688991639, "grad_norm": 2.0807390638355825, "learning_rate": 5.4304087461382095e-08, "loss": 0.1957, "step": 49174 }, { "epoch": 0.8547862817014028, "grad_norm": 1.8004418278283678, "learning_rate": 5.4291329945026495e-08, "loss": 0.2025, "step": 49175 }, { "epoch": 0.8548036642389056, "grad_norm": 1.3953919240088306, "learning_rate": 5.427857384136264e-08, "loss": 0.2492, "step": 49176 }, { "epoch": 0.8548210467764085, "grad_norm": 1.4694166064670533, "learning_rate": 5.426581915043094e-08, "loss": 0.2364, "step": 49177 }, { "epoch": 0.8548384293139113, "grad_norm": 1.6352137717617246, "learning_rate": 5.4253065872271796e-08, "loss": 0.2161, "step": 49178 }, { "epoch": 0.854855811851414, "grad_norm": 1.2875837683822648, "learning_rate": 5.4240314006925635e-08, "loss": 0.1456, "step": 49179 }, { "epoch": 0.8548731943889168, "grad_norm": 2.5261136511592857, "learning_rate": 5.4227563554432977e-08, "loss": 0.1187, "step": 49180 }, { "epoch": 0.8548905769264197, "grad_norm": 1.1977617967907348, "learning_rate": 5.4214814514834176e-08, "loss": 0.1841, "step": 49181 }, { "epoch": 0.8549079594639225, "grad_norm": 1.3403052631881185, "learning_rate": 5.420206688816964e-08, "loss": 0.1683, "step": 49182 }, { "epoch": 0.8549253420014253, "grad_norm": 1.3039760055388194, "learning_rate": 5.418932067447979e-08, "loss": 0.1339, "step": 49183 }, { "epoch": 0.8549427245389282, "grad_norm": 1.5991165579718647, "learning_rate": 5.4176575873804975e-08, "loss": 0.1327, "step": 49184 }, { "epoch": 0.854960107076431, "grad_norm": 0.8563003445220461, "learning_rate": 5.416383248618567e-08, "loss": 0.1841, "step": 49185 }, { "epoch": 0.8549774896139338, "grad_norm": 1.542543019022526, "learning_rate": 5.4151090511662165e-08, "loss": 0.1554, "step": 49186 }, { "epoch": 0.8549948721514367, "grad_norm": 1.666174326861086, "learning_rate": 5.413834995027483e-08, "loss": 0.3258, "step": 49187 }, { "epoch": 0.8550122546889395, "grad_norm": 0.9259551405494264, "learning_rate": 5.412561080206429e-08, "loss": 0.1878, "step": 49188 }, { "epoch": 0.8550296372264423, "grad_norm": 1.6058322264022569, "learning_rate": 5.411287306707063e-08, "loss": 0.1282, "step": 49189 }, { "epoch": 0.8550470197639451, "grad_norm": 1.9759262348198328, "learning_rate": 5.410013674533426e-08, "loss": 0.1638, "step": 49190 }, { "epoch": 0.855064402301448, "grad_norm": 1.4623951243424302, "learning_rate": 5.4087401836895755e-08, "loss": 0.2767, "step": 49191 }, { "epoch": 0.8550817848389508, "grad_norm": 1.4795546917260727, "learning_rate": 5.4074668341795304e-08, "loss": 0.2846, "step": 49192 }, { "epoch": 0.8550991673764536, "grad_norm": 3.596998050593424, "learning_rate": 5.4061936260073324e-08, "loss": 0.1247, "step": 49193 }, { "epoch": 0.8551165499139565, "grad_norm": 0.8784806413998822, "learning_rate": 5.404920559177018e-08, "loss": 0.2237, "step": 49194 }, { "epoch": 0.8551339324514593, "grad_norm": 1.271383220079026, "learning_rate": 5.403647633692621e-08, "loss": 0.1409, "step": 49195 }, { "epoch": 0.8551513149889621, "grad_norm": 1.1966900823575393, "learning_rate": 5.402374849558172e-08, "loss": 0.2361, "step": 49196 }, { "epoch": 0.855168697526465, "grad_norm": 3.060526383941292, "learning_rate": 5.401102206777708e-08, "loss": 0.2238, "step": 49197 }, { "epoch": 0.8551860800639678, "grad_norm": 0.8931071758666155, "learning_rate": 5.3998297053552523e-08, "loss": 0.2246, "step": 49198 }, { "epoch": 0.8552034626014705, "grad_norm": 0.9474546352736856, "learning_rate": 5.398557345294863e-08, "loss": 0.1652, "step": 49199 }, { "epoch": 0.8552208451389733, "grad_norm": 7.100777765659918, "learning_rate": 5.3972851266005534e-08, "loss": 0.2005, "step": 49200 }, { "epoch": 0.8552382276764762, "grad_norm": 1.967945910935062, "learning_rate": 5.396013049276377e-08, "loss": 0.2518, "step": 49201 }, { "epoch": 0.855255610213979, "grad_norm": 1.4064595802404851, "learning_rate": 5.3947411133263285e-08, "loss": 0.2674, "step": 49202 }, { "epoch": 0.8552729927514818, "grad_norm": 1.4697117961923218, "learning_rate": 5.393469318754468e-08, "loss": 0.1794, "step": 49203 }, { "epoch": 0.8552903752889847, "grad_norm": 0.8939144925820155, "learning_rate": 5.3921976655648186e-08, "loss": 0.1896, "step": 49204 }, { "epoch": 0.8553077578264875, "grad_norm": 1.4192062845408613, "learning_rate": 5.390926153761416e-08, "loss": 0.2717, "step": 49205 }, { "epoch": 0.8553251403639903, "grad_norm": 1.8051471062049513, "learning_rate": 5.3896547833482684e-08, "loss": 0.1631, "step": 49206 }, { "epoch": 0.8553425229014932, "grad_norm": 1.6953551944148286, "learning_rate": 5.388383554329451e-08, "loss": 0.1819, "step": 49207 }, { "epoch": 0.855359905438996, "grad_norm": 1.689454294648968, "learning_rate": 5.3871124667089486e-08, "loss": 0.2585, "step": 49208 }, { "epoch": 0.8553772879764988, "grad_norm": 2.0512679525607727, "learning_rate": 5.385841520490808e-08, "loss": 0.1825, "step": 49209 }, { "epoch": 0.8553946705140016, "grad_norm": 1.3917707509816863, "learning_rate": 5.384570715679049e-08, "loss": 0.1543, "step": 49210 }, { "epoch": 0.8554120530515045, "grad_norm": 1.2006698106908298, "learning_rate": 5.383300052277712e-08, "loss": 0.1738, "step": 49211 }, { "epoch": 0.8554294355890073, "grad_norm": 2.0606914428017746, "learning_rate": 5.382029530290821e-08, "loss": 0.1638, "step": 49212 }, { "epoch": 0.8554468181265101, "grad_norm": 1.4555002864480646, "learning_rate": 5.380759149722402e-08, "loss": 0.1859, "step": 49213 }, { "epoch": 0.855464200664013, "grad_norm": 1.5881462501085954, "learning_rate": 5.379488910576474e-08, "loss": 0.1358, "step": 49214 }, { "epoch": 0.8554815832015158, "grad_norm": 4.3291798314034, "learning_rate": 5.378218812857077e-08, "loss": 0.2778, "step": 49215 }, { "epoch": 0.8554989657390186, "grad_norm": 1.7642892418939766, "learning_rate": 5.376948856568225e-08, "loss": 0.28, "step": 49216 }, { "epoch": 0.8555163482765215, "grad_norm": 1.4827350378626893, "learning_rate": 5.375679041713943e-08, "loss": 0.2308, "step": 49217 }, { "epoch": 0.8555337308140243, "grad_norm": 0.9521293820732681, "learning_rate": 5.374409368298255e-08, "loss": 0.1583, "step": 49218 }, { "epoch": 0.855551113351527, "grad_norm": 1.9560176478656541, "learning_rate": 5.3731398363252026e-08, "loss": 0.2462, "step": 49219 }, { "epoch": 0.8555684958890298, "grad_norm": 1.1189825079889526, "learning_rate": 5.3718704457987986e-08, "loss": 0.1999, "step": 49220 }, { "epoch": 0.8555858784265327, "grad_norm": 1.2588111380044666, "learning_rate": 5.3706011967230514e-08, "loss": 0.2576, "step": 49221 }, { "epoch": 0.8556032609640355, "grad_norm": 0.9485822514286505, "learning_rate": 5.369332089102002e-08, "loss": 0.2665, "step": 49222 }, { "epoch": 0.8556206435015383, "grad_norm": 1.5539758303501794, "learning_rate": 5.368063122939675e-08, "loss": 0.2778, "step": 49223 }, { "epoch": 0.8556380260390412, "grad_norm": 1.5038760286337574, "learning_rate": 5.366794298240079e-08, "loss": 0.1441, "step": 49224 }, { "epoch": 0.855655408576544, "grad_norm": 1.6751508681555543, "learning_rate": 5.3655256150072495e-08, "loss": 0.1872, "step": 49225 }, { "epoch": 0.8556727911140468, "grad_norm": 2.465620080871568, "learning_rate": 5.364257073245193e-08, "loss": 0.1802, "step": 49226 }, { "epoch": 0.8556901736515496, "grad_norm": 1.6663729353937897, "learning_rate": 5.362988672957941e-08, "loss": 0.2561, "step": 49227 }, { "epoch": 0.8557075561890525, "grad_norm": 1.3948206927873927, "learning_rate": 5.361720414149512e-08, "loss": 0.1441, "step": 49228 }, { "epoch": 0.8557249387265553, "grad_norm": 1.8800539795777427, "learning_rate": 5.360452296823908e-08, "loss": 0.1518, "step": 49229 }, { "epoch": 0.8557423212640581, "grad_norm": 1.4050935603467472, "learning_rate": 5.359184320985183e-08, "loss": 0.1468, "step": 49230 }, { "epoch": 0.855759703801561, "grad_norm": 1.247187079636131, "learning_rate": 5.3579164866373316e-08, "loss": 0.1326, "step": 49231 }, { "epoch": 0.8557770863390638, "grad_norm": 1.1009691225579012, "learning_rate": 5.356648793784391e-08, "loss": 0.1797, "step": 49232 }, { "epoch": 0.8557944688765666, "grad_norm": 1.2238365075663895, "learning_rate": 5.3553812424303404e-08, "loss": 0.1588, "step": 49233 }, { "epoch": 0.8558118514140695, "grad_norm": 1.3232027038313723, "learning_rate": 5.354113832579238e-08, "loss": 0.2451, "step": 49234 }, { "epoch": 0.8558292339515723, "grad_norm": 1.3232123677754972, "learning_rate": 5.3528465642350864e-08, "loss": 0.1905, "step": 49235 }, { "epoch": 0.8558466164890751, "grad_norm": 1.3584467569213798, "learning_rate": 5.351579437401904e-08, "loss": 0.2107, "step": 49236 }, { "epoch": 0.855863999026578, "grad_norm": 0.7477652234805555, "learning_rate": 5.3503124520836886e-08, "loss": 0.2032, "step": 49237 }, { "epoch": 0.8558813815640808, "grad_norm": 1.2580704928759427, "learning_rate": 5.349045608284486e-08, "loss": 0.1453, "step": 49238 }, { "epoch": 0.8558987641015835, "grad_norm": 1.1936004887344143, "learning_rate": 5.34777890600831e-08, "loss": 0.1832, "step": 49239 }, { "epoch": 0.8559161466390863, "grad_norm": 0.9571602544669349, "learning_rate": 5.346512345259141e-08, "loss": 0.2344, "step": 49240 }, { "epoch": 0.8559335291765892, "grad_norm": 1.4640496035495114, "learning_rate": 5.345245926041025e-08, "loss": 0.2272, "step": 49241 }, { "epoch": 0.855950911714092, "grad_norm": 1.1565252165597328, "learning_rate": 5.3439796483579655e-08, "loss": 0.1445, "step": 49242 }, { "epoch": 0.8559682942515948, "grad_norm": 1.0177175331391597, "learning_rate": 5.342713512213981e-08, "loss": 0.2719, "step": 49243 }, { "epoch": 0.8559856767890976, "grad_norm": 1.8395569082202716, "learning_rate": 5.3414475176130794e-08, "loss": 0.1741, "step": 49244 }, { "epoch": 0.8560030593266005, "grad_norm": 1.8303874745375872, "learning_rate": 5.3401816645592735e-08, "loss": 0.2388, "step": 49245 }, { "epoch": 0.8560204418641033, "grad_norm": 1.0879799827189662, "learning_rate": 5.338915953056578e-08, "loss": 0.1439, "step": 49246 }, { "epoch": 0.8560378244016061, "grad_norm": 3.011753721447448, "learning_rate": 5.3376503831090046e-08, "loss": 0.2501, "step": 49247 }, { "epoch": 0.856055206939109, "grad_norm": 1.5801631502888167, "learning_rate": 5.336384954720552e-08, "loss": 0.2807, "step": 49248 }, { "epoch": 0.8560725894766118, "grad_norm": 1.619833415423793, "learning_rate": 5.3351196678952496e-08, "loss": 0.216, "step": 49249 }, { "epoch": 0.8560899720141146, "grad_norm": 1.0944026819698245, "learning_rate": 5.3338545226371047e-08, "loss": 0.0926, "step": 49250 }, { "epoch": 0.8561073545516175, "grad_norm": 1.7521853110892673, "learning_rate": 5.3325895189501315e-08, "loss": 0.2213, "step": 49251 }, { "epoch": 0.8561247370891203, "grad_norm": 1.2095912528420152, "learning_rate": 5.33132465683831e-08, "loss": 0.1004, "step": 49252 }, { "epoch": 0.8561421196266231, "grad_norm": 1.0942436035694068, "learning_rate": 5.330059936305681e-08, "loss": 0.1339, "step": 49253 }, { "epoch": 0.856159502164126, "grad_norm": 1.8955273870844025, "learning_rate": 5.328795357356241e-08, "loss": 0.1879, "step": 49254 }, { "epoch": 0.8561768847016288, "grad_norm": 1.6391413496226268, "learning_rate": 5.327530919994e-08, "loss": 0.1636, "step": 49255 }, { "epoch": 0.8561942672391316, "grad_norm": 1.205255200492852, "learning_rate": 5.3262666242229583e-08, "loss": 0.1724, "step": 49256 }, { "epoch": 0.8562116497766344, "grad_norm": 0.8610830785462443, "learning_rate": 5.325002470047135e-08, "loss": 0.1884, "step": 49257 }, { "epoch": 0.8562290323141373, "grad_norm": 1.638811396156274, "learning_rate": 5.323738457470545e-08, "loss": 0.2008, "step": 49258 }, { "epoch": 0.85624641485164, "grad_norm": 1.8319957252882888, "learning_rate": 5.322474586497172e-08, "loss": 0.1897, "step": 49259 }, { "epoch": 0.8562637973891428, "grad_norm": 1.3305369887507954, "learning_rate": 5.32121085713102e-08, "loss": 0.2074, "step": 49260 }, { "epoch": 0.8562811799266457, "grad_norm": 1.6216292960783791, "learning_rate": 5.319947269376113e-08, "loss": 0.2133, "step": 49261 }, { "epoch": 0.8562985624641485, "grad_norm": 1.2589710580970666, "learning_rate": 5.318683823236453e-08, "loss": 0.1368, "step": 49262 }, { "epoch": 0.8563159450016513, "grad_norm": 1.4838694732994444, "learning_rate": 5.317420518716037e-08, "loss": 0.1857, "step": 49263 }, { "epoch": 0.8563333275391541, "grad_norm": 3.7172451254601335, "learning_rate": 5.3161573558188786e-08, "loss": 0.1581, "step": 49264 }, { "epoch": 0.856350710076657, "grad_norm": 1.4771659226837461, "learning_rate": 5.314894334548969e-08, "loss": 0.1069, "step": 49265 }, { "epoch": 0.8563680926141598, "grad_norm": 1.9089043598391757, "learning_rate": 5.3136314549103224e-08, "loss": 0.2475, "step": 49266 }, { "epoch": 0.8563854751516626, "grad_norm": 1.9907624239945922, "learning_rate": 5.312368716906934e-08, "loss": 0.211, "step": 49267 }, { "epoch": 0.8564028576891655, "grad_norm": 1.0455255732821516, "learning_rate": 5.3111061205427964e-08, "loss": 0.1543, "step": 49268 }, { "epoch": 0.8564202402266683, "grad_norm": 1.3356182832127703, "learning_rate": 5.3098436658219394e-08, "loss": 0.2343, "step": 49269 }, { "epoch": 0.8564376227641711, "grad_norm": 2.873072007412653, "learning_rate": 5.3085813527483534e-08, "loss": 0.2488, "step": 49270 }, { "epoch": 0.856455005301674, "grad_norm": 4.085657718241515, "learning_rate": 5.30731918132602e-08, "loss": 0.2151, "step": 49271 }, { "epoch": 0.8564723878391768, "grad_norm": 2.147475101657313, "learning_rate": 5.3060571515589623e-08, "loss": 0.2795, "step": 49272 }, { "epoch": 0.8564897703766796, "grad_norm": 1.3256342535473173, "learning_rate": 5.304795263451173e-08, "loss": 0.1971, "step": 49273 }, { "epoch": 0.8565071529141824, "grad_norm": 2.052157794746867, "learning_rate": 5.303533517006653e-08, "loss": 0.2173, "step": 49274 }, { "epoch": 0.8565245354516853, "grad_norm": 1.5034646170789687, "learning_rate": 5.3022719122293946e-08, "loss": 0.1602, "step": 49275 }, { "epoch": 0.8565419179891881, "grad_norm": 2.928728036942657, "learning_rate": 5.301010449123411e-08, "loss": 0.1531, "step": 49276 }, { "epoch": 0.8565593005266909, "grad_norm": 1.2706118424454373, "learning_rate": 5.2997491276926823e-08, "loss": 0.203, "step": 49277 }, { "epoch": 0.8565766830641937, "grad_norm": 1.2646201882483492, "learning_rate": 5.2984879479412216e-08, "loss": 0.1673, "step": 49278 }, { "epoch": 0.8565940656016965, "grad_norm": 3.106780212994218, "learning_rate": 5.297226909873004e-08, "loss": 0.1892, "step": 49279 }, { "epoch": 0.8566114481391993, "grad_norm": 1.4477258927145387, "learning_rate": 5.295966013492059e-08, "loss": 0.1848, "step": 49280 }, { "epoch": 0.8566288306767021, "grad_norm": 2.0093941672380065, "learning_rate": 5.294705258802362e-08, "loss": 0.1639, "step": 49281 }, { "epoch": 0.856646213214205, "grad_norm": 1.4229960538027313, "learning_rate": 5.293444645807915e-08, "loss": 0.1352, "step": 49282 }, { "epoch": 0.8566635957517078, "grad_norm": 2.577995746825954, "learning_rate": 5.2921841745127096e-08, "loss": 0.2935, "step": 49283 }, { "epoch": 0.8566809782892106, "grad_norm": 1.0961784490340551, "learning_rate": 5.290923844920742e-08, "loss": 0.1753, "step": 49284 }, { "epoch": 0.8566983608267135, "grad_norm": 1.7036162246920237, "learning_rate": 5.289663657036014e-08, "loss": 0.1801, "step": 49285 }, { "epoch": 0.8567157433642163, "grad_norm": 1.546772465878072, "learning_rate": 5.288403610862508e-08, "loss": 0.2341, "step": 49286 }, { "epoch": 0.8567331259017191, "grad_norm": 1.9901139700611472, "learning_rate": 5.287143706404218e-08, "loss": 0.2186, "step": 49287 }, { "epoch": 0.856750508439222, "grad_norm": 1.1497133090724727, "learning_rate": 5.2858839436651535e-08, "loss": 0.1565, "step": 49288 }, { "epoch": 0.8567678909767248, "grad_norm": 1.3409363901259714, "learning_rate": 5.284624322649306e-08, "loss": 0.2487, "step": 49289 }, { "epoch": 0.8567852735142276, "grad_norm": 1.4318192418175004, "learning_rate": 5.283364843360644e-08, "loss": 0.14, "step": 49290 }, { "epoch": 0.8568026560517304, "grad_norm": 0.9094994526919048, "learning_rate": 5.282105505803169e-08, "loss": 0.1341, "step": 49291 }, { "epoch": 0.8568200385892333, "grad_norm": 1.3442709733516376, "learning_rate": 5.280846309980891e-08, "loss": 0.1614, "step": 49292 }, { "epoch": 0.8568374211267361, "grad_norm": 3.4595220653218783, "learning_rate": 5.2795872558977825e-08, "loss": 0.2483, "step": 49293 }, { "epoch": 0.8568548036642389, "grad_norm": 1.5239551961133901, "learning_rate": 5.278328343557842e-08, "loss": 0.1952, "step": 49294 }, { "epoch": 0.8568721862017418, "grad_norm": 2.9059936212151714, "learning_rate": 5.277069572965054e-08, "loss": 0.2003, "step": 49295 }, { "epoch": 0.8568895687392446, "grad_norm": 1.1896154528435687, "learning_rate": 5.275810944123415e-08, "loss": 0.215, "step": 49296 }, { "epoch": 0.8569069512767474, "grad_norm": 1.1331561857964874, "learning_rate": 5.274552457036907e-08, "loss": 0.2186, "step": 49297 }, { "epoch": 0.8569243338142501, "grad_norm": 1.2106454947573062, "learning_rate": 5.2732941117095255e-08, "loss": 0.204, "step": 49298 }, { "epoch": 0.856941716351753, "grad_norm": 2.1573811921220933, "learning_rate": 5.2720359081452445e-08, "loss": 0.2211, "step": 49299 }, { "epoch": 0.8569590988892558, "grad_norm": 1.4537352934116907, "learning_rate": 5.270777846348073e-08, "loss": 0.1675, "step": 49300 }, { "epoch": 0.8569764814267586, "grad_norm": 1.1186928133919378, "learning_rate": 5.269519926321991e-08, "loss": 0.2281, "step": 49301 }, { "epoch": 0.8569938639642615, "grad_norm": 2.5082645292875196, "learning_rate": 5.268262148070984e-08, "loss": 0.1995, "step": 49302 }, { "epoch": 0.8570112465017643, "grad_norm": 1.5084377894512355, "learning_rate": 5.267004511599038e-08, "loss": 0.1537, "step": 49303 }, { "epoch": 0.8570286290392671, "grad_norm": 1.1032429344133599, "learning_rate": 5.2657470169101324e-08, "loss": 0.1668, "step": 49304 }, { "epoch": 0.85704601157677, "grad_norm": 1.1741877682172657, "learning_rate": 5.264489664008265e-08, "loss": 0.2159, "step": 49305 }, { "epoch": 0.8570633941142728, "grad_norm": 0.8059162196537071, "learning_rate": 5.263232452897415e-08, "loss": 0.1383, "step": 49306 }, { "epoch": 0.8570807766517756, "grad_norm": 1.8646366282421976, "learning_rate": 5.261975383581557e-08, "loss": 0.2506, "step": 49307 }, { "epoch": 0.8570981591892785, "grad_norm": 1.1343625904457544, "learning_rate": 5.260718456064711e-08, "loss": 0.105, "step": 49308 }, { "epoch": 0.8571155417267813, "grad_norm": 1.388830343394088, "learning_rate": 5.2594616703508176e-08, "loss": 0.1465, "step": 49309 }, { "epoch": 0.8571329242642841, "grad_norm": 1.3566054337481925, "learning_rate": 5.258205026443874e-08, "loss": 0.1573, "step": 49310 }, { "epoch": 0.8571503068017869, "grad_norm": 2.0811055155803095, "learning_rate": 5.256948524347876e-08, "loss": 0.2399, "step": 49311 }, { "epoch": 0.8571676893392898, "grad_norm": 1.4247557580677932, "learning_rate": 5.2556921640668e-08, "loss": 0.1267, "step": 49312 }, { "epoch": 0.8571850718767926, "grad_norm": 1.366591998852103, "learning_rate": 5.254435945604624e-08, "loss": 0.1611, "step": 49313 }, { "epoch": 0.8572024544142954, "grad_norm": 2.4457836928662116, "learning_rate": 5.25317986896533e-08, "loss": 0.2179, "step": 49314 }, { "epoch": 0.8572198369517983, "grad_norm": 1.2870396381562401, "learning_rate": 5.251923934152902e-08, "loss": 0.1582, "step": 49315 }, { "epoch": 0.8572372194893011, "grad_norm": 1.887024616299693, "learning_rate": 5.250668141171322e-08, "loss": 0.1593, "step": 49316 }, { "epoch": 0.8572546020268039, "grad_norm": 1.1668685171122872, "learning_rate": 5.249412490024568e-08, "loss": 0.1593, "step": 49317 }, { "epoch": 0.8572719845643066, "grad_norm": 4.518200948761457, "learning_rate": 5.248156980716606e-08, "loss": 0.2094, "step": 49318 }, { "epoch": 0.8572893671018095, "grad_norm": 1.6675160829956666, "learning_rate": 5.2469016132514423e-08, "loss": 0.2735, "step": 49319 }, { "epoch": 0.8573067496393123, "grad_norm": 1.102074998832611, "learning_rate": 5.24564638763304e-08, "loss": 0.1646, "step": 49320 }, { "epoch": 0.8573241321768151, "grad_norm": 1.3335313051189275, "learning_rate": 5.244391303865392e-08, "loss": 0.1831, "step": 49321 }, { "epoch": 0.857341514714318, "grad_norm": 1.7246044150204103, "learning_rate": 5.243136361952444e-08, "loss": 0.1628, "step": 49322 }, { "epoch": 0.8573588972518208, "grad_norm": 1.4050690720397911, "learning_rate": 5.2418815618982037e-08, "loss": 0.1245, "step": 49323 }, { "epoch": 0.8573762797893236, "grad_norm": 1.1696497104985835, "learning_rate": 5.240626903706635e-08, "loss": 0.121, "step": 49324 }, { "epoch": 0.8573936623268265, "grad_norm": 1.3600481067917647, "learning_rate": 5.239372387381724e-08, "loss": 0.1337, "step": 49325 }, { "epoch": 0.8574110448643293, "grad_norm": 0.9942805345615464, "learning_rate": 5.238118012927428e-08, "loss": 0.109, "step": 49326 }, { "epoch": 0.8574284274018321, "grad_norm": 5.210203103327101, "learning_rate": 5.236863780347761e-08, "loss": 0.3035, "step": 49327 }, { "epoch": 0.857445809939335, "grad_norm": 1.6620564206160473, "learning_rate": 5.2356096896466584e-08, "loss": 0.1184, "step": 49328 }, { "epoch": 0.8574631924768378, "grad_norm": 0.997693974497746, "learning_rate": 5.2343557408281e-08, "loss": 0.2285, "step": 49329 }, { "epoch": 0.8574805750143406, "grad_norm": 1.4442694360537056, "learning_rate": 5.233101933896084e-08, "loss": 0.1623, "step": 49330 }, { "epoch": 0.8574979575518434, "grad_norm": 2.2118721434847806, "learning_rate": 5.2318482688545665e-08, "loss": 0.165, "step": 49331 }, { "epoch": 0.8575153400893463, "grad_norm": 1.2229586088125368, "learning_rate": 5.230594745707523e-08, "loss": 0.1831, "step": 49332 }, { "epoch": 0.8575327226268491, "grad_norm": 1.0493903999494507, "learning_rate": 5.229341364458933e-08, "loss": 0.1538, "step": 49333 }, { "epoch": 0.8575501051643519, "grad_norm": 1.7180361466194156, "learning_rate": 5.2280881251127675e-08, "loss": 0.117, "step": 49334 }, { "epoch": 0.8575674877018548, "grad_norm": 0.8130609298289507, "learning_rate": 5.226835027672993e-08, "loss": 0.1454, "step": 49335 }, { "epoch": 0.8575848702393576, "grad_norm": 3.2388352127678592, "learning_rate": 5.2255820721435806e-08, "loss": 0.2277, "step": 49336 }, { "epoch": 0.8576022527768604, "grad_norm": 1.284003210825806, "learning_rate": 5.2243292585285037e-08, "loss": 0.1254, "step": 49337 }, { "epoch": 0.8576196353143631, "grad_norm": 1.654517673287682, "learning_rate": 5.223076586831737e-08, "loss": 0.132, "step": 49338 }, { "epoch": 0.857637017851866, "grad_norm": 0.9050956812657035, "learning_rate": 5.2218240570572506e-08, "loss": 0.1741, "step": 49339 }, { "epoch": 0.8576544003893688, "grad_norm": 0.7350588640365094, "learning_rate": 5.2205716692090286e-08, "loss": 0.1678, "step": 49340 }, { "epoch": 0.8576717829268716, "grad_norm": 1.3703047204760685, "learning_rate": 5.2193194232909965e-08, "loss": 0.1722, "step": 49341 }, { "epoch": 0.8576891654643745, "grad_norm": 1.5050728522422998, "learning_rate": 5.218067319307168e-08, "loss": 0.2776, "step": 49342 }, { "epoch": 0.8577065480018773, "grad_norm": 0.9155968496307735, "learning_rate": 5.216815357261489e-08, "loss": 0.1667, "step": 49343 }, { "epoch": 0.8577239305393801, "grad_norm": 1.9938918442345201, "learning_rate": 5.215563537157935e-08, "loss": 0.218, "step": 49344 }, { "epoch": 0.857741313076883, "grad_norm": 2.077184137417122, "learning_rate": 5.214311859000464e-08, "loss": 0.1407, "step": 49345 }, { "epoch": 0.8577586956143858, "grad_norm": 1.6934560302832222, "learning_rate": 5.2130603227930724e-08, "loss": 0.2408, "step": 49346 }, { "epoch": 0.8577760781518886, "grad_norm": 2.7562862280816725, "learning_rate": 5.21180892853969e-08, "loss": 0.1914, "step": 49347 }, { "epoch": 0.8577934606893914, "grad_norm": 1.2109897104780754, "learning_rate": 5.2105576762443037e-08, "loss": 0.1895, "step": 49348 }, { "epoch": 0.8578108432268943, "grad_norm": 1.0203355442046276, "learning_rate": 5.20930656591087e-08, "loss": 0.1084, "step": 49349 }, { "epoch": 0.8578282257643971, "grad_norm": 1.3292239038862286, "learning_rate": 5.208055597543365e-08, "loss": 0.171, "step": 49350 }, { "epoch": 0.8578456083018999, "grad_norm": 0.7361979666260801, "learning_rate": 5.2068047711457454e-08, "loss": 0.1806, "step": 49351 }, { "epoch": 0.8578629908394028, "grad_norm": 1.2377229215845538, "learning_rate": 5.2055540867219806e-08, "loss": 0.18, "step": 49352 }, { "epoch": 0.8578803733769056, "grad_norm": 1.679645893896857, "learning_rate": 5.204303544276029e-08, "loss": 0.2334, "step": 49353 }, { "epoch": 0.8578977559144084, "grad_norm": 2.082404676179439, "learning_rate": 5.2030531438118644e-08, "loss": 0.3315, "step": 49354 }, { "epoch": 0.8579151384519113, "grad_norm": 1.1961482266141257, "learning_rate": 5.2018028853334395e-08, "loss": 0.1499, "step": 49355 }, { "epoch": 0.8579325209894141, "grad_norm": 1.487562323515104, "learning_rate": 5.2005527688447185e-08, "loss": 0.156, "step": 49356 }, { "epoch": 0.8579499035269169, "grad_norm": 0.9233532368488394, "learning_rate": 5.199302794349658e-08, "loss": 0.1758, "step": 49357 }, { "epoch": 0.8579672860644196, "grad_norm": 1.3392397517659627, "learning_rate": 5.1980529618522397e-08, "loss": 0.1511, "step": 49358 }, { "epoch": 0.8579846686019225, "grad_norm": 1.4020374747463507, "learning_rate": 5.196803271356426e-08, "loss": 0.225, "step": 49359 }, { "epoch": 0.8580020511394253, "grad_norm": 0.8075445736615059, "learning_rate": 5.1955537228661415e-08, "loss": 0.1406, "step": 49360 }, { "epoch": 0.8580194336769281, "grad_norm": 1.4771236610399991, "learning_rate": 5.1943043163853787e-08, "loss": 0.1733, "step": 49361 }, { "epoch": 0.858036816214431, "grad_norm": 1.4983847081272506, "learning_rate": 5.1930550519180883e-08, "loss": 0.2516, "step": 49362 }, { "epoch": 0.8580541987519338, "grad_norm": 1.1213076973624971, "learning_rate": 5.191805929468235e-08, "loss": 0.099, "step": 49363 }, { "epoch": 0.8580715812894366, "grad_norm": 1.118088303950616, "learning_rate": 5.1905569490397705e-08, "loss": 0.2559, "step": 49364 }, { "epoch": 0.8580889638269394, "grad_norm": 1.77661637267548, "learning_rate": 5.189308110636659e-08, "loss": 0.2054, "step": 49365 }, { "epoch": 0.8581063463644423, "grad_norm": 1.1540154110217138, "learning_rate": 5.188059414262852e-08, "loss": 0.1951, "step": 49366 }, { "epoch": 0.8581237289019451, "grad_norm": 1.9402322990099532, "learning_rate": 5.1868108599223135e-08, "loss": 0.1488, "step": 49367 }, { "epoch": 0.8581411114394479, "grad_norm": 2.0134896683541674, "learning_rate": 5.185562447618991e-08, "loss": 0.27, "step": 49368 }, { "epoch": 0.8581584939769508, "grad_norm": 1.821392147976747, "learning_rate": 5.184314177356863e-08, "loss": 0.2686, "step": 49369 }, { "epoch": 0.8581758765144536, "grad_norm": 1.5499665698649567, "learning_rate": 5.183066049139867e-08, "loss": 0.1774, "step": 49370 }, { "epoch": 0.8581932590519564, "grad_norm": 2.027272604882218, "learning_rate": 5.181818062971977e-08, "loss": 0.2205, "step": 49371 }, { "epoch": 0.8582106415894593, "grad_norm": 1.198465087228794, "learning_rate": 5.180570218857111e-08, "loss": 0.3424, "step": 49372 }, { "epoch": 0.8582280241269621, "grad_norm": 1.1831916371969136, "learning_rate": 5.179322516799267e-08, "loss": 0.1204, "step": 49373 }, { "epoch": 0.8582454066644649, "grad_norm": 1.67134297670344, "learning_rate": 5.178074956802375e-08, "loss": 0.2708, "step": 49374 }, { "epoch": 0.8582627892019677, "grad_norm": 1.091216083146872, "learning_rate": 5.1768275388703984e-08, "loss": 0.2374, "step": 49375 }, { "epoch": 0.8582801717394706, "grad_norm": 1.9113307885528195, "learning_rate": 5.175580263007279e-08, "loss": 0.2356, "step": 49376 }, { "epoch": 0.8582975542769734, "grad_norm": 1.440783509061514, "learning_rate": 5.17433312921699e-08, "loss": 0.1401, "step": 49377 }, { "epoch": 0.8583149368144761, "grad_norm": 1.0494515815811596, "learning_rate": 5.1730861375034796e-08, "loss": 0.1481, "step": 49378 }, { "epoch": 0.858332319351979, "grad_norm": 1.2272981371467162, "learning_rate": 5.1718392878706884e-08, "loss": 0.1461, "step": 49379 }, { "epoch": 0.8583497018894818, "grad_norm": 1.2594311481966842, "learning_rate": 5.170592580322563e-08, "loss": 0.3297, "step": 49380 }, { "epoch": 0.8583670844269846, "grad_norm": 0.9293762179745229, "learning_rate": 5.1693460148630743e-08, "loss": 0.1814, "step": 49381 }, { "epoch": 0.8583844669644874, "grad_norm": 1.6133652319689833, "learning_rate": 5.168099591496172e-08, "loss": 0.1284, "step": 49382 }, { "epoch": 0.8584018495019903, "grad_norm": 1.4439969324367021, "learning_rate": 5.166853310225794e-08, "loss": 0.2088, "step": 49383 }, { "epoch": 0.8584192320394931, "grad_norm": 1.327696967972368, "learning_rate": 5.165607171055897e-08, "loss": 0.2446, "step": 49384 }, { "epoch": 0.8584366145769959, "grad_norm": 2.013815292750205, "learning_rate": 5.164361173990428e-08, "loss": 0.21, "step": 49385 }, { "epoch": 0.8584539971144988, "grad_norm": 1.6699766245265697, "learning_rate": 5.163115319033345e-08, "loss": 0.2782, "step": 49386 }, { "epoch": 0.8584713796520016, "grad_norm": 1.05051723001288, "learning_rate": 5.1618696061885845e-08, "loss": 0.1374, "step": 49387 }, { "epoch": 0.8584887621895044, "grad_norm": 2.867222084979782, "learning_rate": 5.160624035460093e-08, "loss": 0.1215, "step": 49388 }, { "epoch": 0.8585061447270073, "grad_norm": 1.3011809724393955, "learning_rate": 5.159378606851833e-08, "loss": 0.1601, "step": 49389 }, { "epoch": 0.8585235272645101, "grad_norm": 1.6331887772092588, "learning_rate": 5.1581333203677576e-08, "loss": 0.2038, "step": 49390 }, { "epoch": 0.8585409098020129, "grad_norm": 1.2084597086858961, "learning_rate": 5.1568881760117754e-08, "loss": 0.1316, "step": 49391 }, { "epoch": 0.8585582923395157, "grad_norm": 1.401758071781842, "learning_rate": 5.1556431737878766e-08, "loss": 0.1715, "step": 49392 }, { "epoch": 0.8585756748770186, "grad_norm": 2.527190488219343, "learning_rate": 5.1543983136999816e-08, "loss": 0.2472, "step": 49393 }, { "epoch": 0.8585930574145214, "grad_norm": 4.295362160766414, "learning_rate": 5.153153595752047e-08, "loss": 0.2376, "step": 49394 }, { "epoch": 0.8586104399520242, "grad_norm": 1.2104393584443383, "learning_rate": 5.15190901994802e-08, "loss": 0.1789, "step": 49395 }, { "epoch": 0.8586278224895271, "grad_norm": 2.2066103667648767, "learning_rate": 5.1506645862918304e-08, "loss": 0.173, "step": 49396 }, { "epoch": 0.8586452050270299, "grad_norm": 1.7855277446386164, "learning_rate": 5.149420294787438e-08, "loss": 0.1829, "step": 49397 }, { "epoch": 0.8586625875645326, "grad_norm": 0.8925466465868362, "learning_rate": 5.1481761454387765e-08, "loss": 0.2103, "step": 49398 }, { "epoch": 0.8586799701020355, "grad_norm": 1.9955907640979156, "learning_rate": 5.146932138249788e-08, "loss": 0.2053, "step": 49399 }, { "epoch": 0.8586973526395383, "grad_norm": 2.0425709139052257, "learning_rate": 5.14568827322443e-08, "loss": 0.2577, "step": 49400 }, { "epoch": 0.8587147351770411, "grad_norm": 1.512385906744954, "learning_rate": 5.144444550366633e-08, "loss": 0.1697, "step": 49401 }, { "epoch": 0.8587321177145439, "grad_norm": 1.4350413224758862, "learning_rate": 5.143200969680339e-08, "loss": 0.2292, "step": 49402 }, { "epoch": 0.8587495002520468, "grad_norm": 1.2327828454830752, "learning_rate": 5.1419575311694996e-08, "loss": 0.1746, "step": 49403 }, { "epoch": 0.8587668827895496, "grad_norm": 1.2376998774964383, "learning_rate": 5.140714234838045e-08, "loss": 0.1538, "step": 49404 }, { "epoch": 0.8587842653270524, "grad_norm": 1.2715921728219237, "learning_rate": 5.139471080689922e-08, "loss": 0.1829, "step": 49405 }, { "epoch": 0.8588016478645553, "grad_norm": 1.624758267998518, "learning_rate": 5.1382280687290666e-08, "loss": 0.2269, "step": 49406 }, { "epoch": 0.8588190304020581, "grad_norm": 1.4751297952045386, "learning_rate": 5.1369851989594096e-08, "loss": 0.1792, "step": 49407 }, { "epoch": 0.8588364129395609, "grad_norm": 0.7642859545541529, "learning_rate": 5.135742471384913e-08, "loss": 0.1246, "step": 49408 }, { "epoch": 0.8588537954770638, "grad_norm": 1.0705990700216448, "learning_rate": 5.1344998860095135e-08, "loss": 0.1339, "step": 49409 }, { "epoch": 0.8588711780145666, "grad_norm": 1.5532210908010515, "learning_rate": 5.1332574428371247e-08, "loss": 0.1789, "step": 49410 }, { "epoch": 0.8588885605520694, "grad_norm": 2.4266291598702283, "learning_rate": 5.1320151418716984e-08, "loss": 0.1651, "step": 49411 }, { "epoch": 0.8589059430895722, "grad_norm": 3.524008040196777, "learning_rate": 5.130772983117177e-08, "loss": 0.2581, "step": 49412 }, { "epoch": 0.8589233256270751, "grad_norm": 1.529893022367732, "learning_rate": 5.1295309665774944e-08, "loss": 0.1627, "step": 49413 }, { "epoch": 0.8589407081645779, "grad_norm": 1.3990950420319106, "learning_rate": 5.128289092256588e-08, "loss": 0.2127, "step": 49414 }, { "epoch": 0.8589580907020807, "grad_norm": 1.5597049360571926, "learning_rate": 5.127047360158393e-08, "loss": 0.3078, "step": 49415 }, { "epoch": 0.8589754732395836, "grad_norm": 1.3276994686169257, "learning_rate": 5.125805770286845e-08, "loss": 0.1963, "step": 49416 }, { "epoch": 0.8589928557770863, "grad_norm": 1.408830062036198, "learning_rate": 5.12456432264588e-08, "loss": 0.1277, "step": 49417 }, { "epoch": 0.8590102383145891, "grad_norm": 1.2178906272608252, "learning_rate": 5.123323017239423e-08, "loss": 0.1723, "step": 49418 }, { "epoch": 0.8590276208520919, "grad_norm": 1.9539183879923032, "learning_rate": 5.122081854071425e-08, "loss": 0.312, "step": 49419 }, { "epoch": 0.8590450033895948, "grad_norm": 3.501179295843526, "learning_rate": 5.120840833145812e-08, "loss": 0.1861, "step": 49420 }, { "epoch": 0.8590623859270976, "grad_norm": 1.6117475573376796, "learning_rate": 5.1195999544665194e-08, "loss": 0.1933, "step": 49421 }, { "epoch": 0.8590797684646004, "grad_norm": 1.8022205320692481, "learning_rate": 5.1183592180374825e-08, "loss": 0.1974, "step": 49422 }, { "epoch": 0.8590971510021033, "grad_norm": 1.5723479463011707, "learning_rate": 5.117118623862621e-08, "loss": 0.2149, "step": 49423 }, { "epoch": 0.8591145335396061, "grad_norm": 1.2088425703386771, "learning_rate": 5.115878171945881e-08, "loss": 0.1309, "step": 49424 }, { "epoch": 0.8591319160771089, "grad_norm": 1.6999468550356476, "learning_rate": 5.1146378622911876e-08, "loss": 0.1443, "step": 49425 }, { "epoch": 0.8591492986146118, "grad_norm": 1.2673022159737442, "learning_rate": 5.113397694902466e-08, "loss": 0.1467, "step": 49426 }, { "epoch": 0.8591666811521146, "grad_norm": 0.921362810961546, "learning_rate": 5.112157669783662e-08, "loss": 0.1257, "step": 49427 }, { "epoch": 0.8591840636896174, "grad_norm": 1.1911765578530673, "learning_rate": 5.1109177869387124e-08, "loss": 0.1274, "step": 49428 }, { "epoch": 0.8592014462271202, "grad_norm": 1.5840815490365205, "learning_rate": 5.109678046371518e-08, "loss": 0.216, "step": 49429 }, { "epoch": 0.8592188287646231, "grad_norm": 1.139329354733427, "learning_rate": 5.108438448086017e-08, "loss": 0.0942, "step": 49430 }, { "epoch": 0.8592362113021259, "grad_norm": 1.011085887332703, "learning_rate": 5.107198992086154e-08, "loss": 0.135, "step": 49431 }, { "epoch": 0.8592535938396287, "grad_norm": 1.7746793318087892, "learning_rate": 5.105959678375843e-08, "loss": 0.2527, "step": 49432 }, { "epoch": 0.8592709763771316, "grad_norm": 2.1029766576313285, "learning_rate": 5.104720506959026e-08, "loss": 0.2771, "step": 49433 }, { "epoch": 0.8592883589146344, "grad_norm": 1.1442507968566837, "learning_rate": 5.103481477839616e-08, "loss": 0.1391, "step": 49434 }, { "epoch": 0.8593057414521372, "grad_norm": 1.5582834062291293, "learning_rate": 5.1022425910215436e-08, "loss": 0.1912, "step": 49435 }, { "epoch": 0.8593231239896401, "grad_norm": 1.7651838722727957, "learning_rate": 5.1010038465087445e-08, "loss": 0.1725, "step": 49436 }, { "epoch": 0.8593405065271428, "grad_norm": 1.0070888853177167, "learning_rate": 5.099765244305132e-08, "loss": 0.1991, "step": 49437 }, { "epoch": 0.8593578890646456, "grad_norm": 1.5623336467343356, "learning_rate": 5.0985267844146304e-08, "loss": 0.2117, "step": 49438 }, { "epoch": 0.8593752716021484, "grad_norm": 1.7604264550924575, "learning_rate": 5.0972884668411875e-08, "loss": 0.265, "step": 49439 }, { "epoch": 0.8593926541396513, "grad_norm": 1.2904971814516635, "learning_rate": 5.0960502915887106e-08, "loss": 0.2698, "step": 49440 }, { "epoch": 0.8594100366771541, "grad_norm": 1.254704464643159, "learning_rate": 5.0948122586611245e-08, "loss": 0.1618, "step": 49441 }, { "epoch": 0.8594274192146569, "grad_norm": 2.218453559457874, "learning_rate": 5.093574368062359e-08, "loss": 0.2411, "step": 49442 }, { "epoch": 0.8594448017521598, "grad_norm": 1.0659688915494903, "learning_rate": 5.0923366197963335e-08, "loss": 0.2457, "step": 49443 }, { "epoch": 0.8594621842896626, "grad_norm": 1.1849102209967124, "learning_rate": 5.091099013866967e-08, "loss": 0.18, "step": 49444 }, { "epoch": 0.8594795668271654, "grad_norm": 1.7074361415747523, "learning_rate": 5.089861550278196e-08, "loss": 0.2026, "step": 49445 }, { "epoch": 0.8594969493646683, "grad_norm": 1.3741110523254962, "learning_rate": 5.0886242290339165e-08, "loss": 0.2249, "step": 49446 }, { "epoch": 0.8595143319021711, "grad_norm": 1.2299693343580038, "learning_rate": 5.0873870501380925e-08, "loss": 0.2675, "step": 49447 }, { "epoch": 0.8595317144396739, "grad_norm": 1.7080915624164061, "learning_rate": 5.086150013594609e-08, "loss": 0.1699, "step": 49448 }, { "epoch": 0.8595490969771767, "grad_norm": 2.429253197009769, "learning_rate": 5.0849131194073856e-08, "loss": 0.2175, "step": 49449 }, { "epoch": 0.8595664795146796, "grad_norm": 1.5951884401856586, "learning_rate": 5.0836763675803696e-08, "loss": 0.2603, "step": 49450 }, { "epoch": 0.8595838620521824, "grad_norm": 1.2385536696905266, "learning_rate": 5.0824397581174627e-08, "loss": 0.2482, "step": 49451 }, { "epoch": 0.8596012445896852, "grad_norm": 1.09037454845356, "learning_rate": 5.08120329102259e-08, "loss": 0.1251, "step": 49452 }, { "epoch": 0.8596186271271881, "grad_norm": 1.1256613811227079, "learning_rate": 5.0799669662996704e-08, "loss": 0.1473, "step": 49453 }, { "epoch": 0.8596360096646909, "grad_norm": 1.2401623137051112, "learning_rate": 5.078730783952617e-08, "loss": 0.1682, "step": 49454 }, { "epoch": 0.8596533922021937, "grad_norm": 2.220090604429969, "learning_rate": 5.077494743985356e-08, "loss": 0.2732, "step": 49455 }, { "epoch": 0.8596707747396966, "grad_norm": 1.2260460325294376, "learning_rate": 5.0762588464017986e-08, "loss": 0.2378, "step": 49456 }, { "epoch": 0.8596881572771993, "grad_norm": 1.9832892533955524, "learning_rate": 5.0750230912058543e-08, "loss": 0.2367, "step": 49457 }, { "epoch": 0.8597055398147021, "grad_norm": 0.9924001940838619, "learning_rate": 5.073787478401459e-08, "loss": 0.1566, "step": 49458 }, { "epoch": 0.8597229223522049, "grad_norm": 1.0353002465723116, "learning_rate": 5.0725520079925254e-08, "loss": 0.1451, "step": 49459 }, { "epoch": 0.8597403048897078, "grad_norm": 2.3367505092653458, "learning_rate": 5.071316679982968e-08, "loss": 0.4389, "step": 49460 }, { "epoch": 0.8597576874272106, "grad_norm": 1.138788623094395, "learning_rate": 5.070081494376682e-08, "loss": 0.1758, "step": 49461 }, { "epoch": 0.8597750699647134, "grad_norm": 1.7319158454156653, "learning_rate": 5.0688464511776055e-08, "loss": 0.1247, "step": 49462 }, { "epoch": 0.8597924525022163, "grad_norm": 1.1487452248017032, "learning_rate": 5.06761155038965e-08, "loss": 0.1126, "step": 49463 }, { "epoch": 0.8598098350397191, "grad_norm": 1.1287970725972842, "learning_rate": 5.0663767920167186e-08, "loss": 0.1606, "step": 49464 }, { "epoch": 0.8598272175772219, "grad_norm": 1.1714687018303205, "learning_rate": 5.065142176062731e-08, "loss": 0.1816, "step": 49465 }, { "epoch": 0.8598446001147247, "grad_norm": 1.6656355148937374, "learning_rate": 5.063907702531617e-08, "loss": 0.1973, "step": 49466 }, { "epoch": 0.8598619826522276, "grad_norm": 0.8643916516455912, "learning_rate": 5.062673371427262e-08, "loss": 0.1883, "step": 49467 }, { "epoch": 0.8598793651897304, "grad_norm": 1.7558187022610883, "learning_rate": 5.0614391827535905e-08, "loss": 0.2259, "step": 49468 }, { "epoch": 0.8598967477272332, "grad_norm": 1.3545599034488114, "learning_rate": 5.060205136514506e-08, "loss": 0.1749, "step": 49469 }, { "epoch": 0.8599141302647361, "grad_norm": 0.9088589926785959, "learning_rate": 5.0589712327139377e-08, "loss": 0.2056, "step": 49470 }, { "epoch": 0.8599315128022389, "grad_norm": 1.629980903831992, "learning_rate": 5.057737471355783e-08, "loss": 0.1533, "step": 49471 }, { "epoch": 0.8599488953397417, "grad_norm": 0.8786322697377457, "learning_rate": 5.056503852443955e-08, "loss": 0.1563, "step": 49472 }, { "epoch": 0.8599662778772446, "grad_norm": 0.9764769952033276, "learning_rate": 5.0552703759823677e-08, "loss": 0.096, "step": 49473 }, { "epoch": 0.8599836604147474, "grad_norm": 1.4262284485206975, "learning_rate": 5.054037041974929e-08, "loss": 0.2366, "step": 49474 }, { "epoch": 0.8600010429522502, "grad_norm": 1.338266009843305, "learning_rate": 5.052803850425541e-08, "loss": 0.1328, "step": 49475 }, { "epoch": 0.860018425489753, "grad_norm": 2.531316645313527, "learning_rate": 5.051570801338123e-08, "loss": 0.5391, "step": 49476 }, { "epoch": 0.8600358080272558, "grad_norm": 1.2790223871574407, "learning_rate": 5.050337894716566e-08, "loss": 0.281, "step": 49477 }, { "epoch": 0.8600531905647586, "grad_norm": 2.8147728931126696, "learning_rate": 5.049105130564801e-08, "loss": 0.2034, "step": 49478 }, { "epoch": 0.8600705731022614, "grad_norm": 1.1311587993744683, "learning_rate": 5.04787250888673e-08, "loss": 0.2233, "step": 49479 }, { "epoch": 0.8600879556397643, "grad_norm": 0.8911255223892912, "learning_rate": 5.046640029686239e-08, "loss": 0.1535, "step": 49480 }, { "epoch": 0.8601053381772671, "grad_norm": 1.5210290085627722, "learning_rate": 5.045407692967252e-08, "loss": 0.108, "step": 49481 }, { "epoch": 0.8601227207147699, "grad_norm": 1.5722024555693748, "learning_rate": 5.044175498733677e-08, "loss": 0.1371, "step": 49482 }, { "epoch": 0.8601401032522727, "grad_norm": 1.1892727988446543, "learning_rate": 5.0429434469894175e-08, "loss": 0.1489, "step": 49483 }, { "epoch": 0.8601574857897756, "grad_norm": 1.3448461476972546, "learning_rate": 5.041711537738369e-08, "loss": 0.2147, "step": 49484 }, { "epoch": 0.8601748683272784, "grad_norm": 1.9093078068047344, "learning_rate": 5.040479770984446e-08, "loss": 0.2059, "step": 49485 }, { "epoch": 0.8601922508647812, "grad_norm": 1.0452926938004605, "learning_rate": 5.0392481467315505e-08, "loss": 0.1856, "step": 49486 }, { "epoch": 0.8602096334022841, "grad_norm": 1.8728159296432472, "learning_rate": 5.0380166649835854e-08, "loss": 0.2137, "step": 49487 }, { "epoch": 0.8602270159397869, "grad_norm": 1.934942949736062, "learning_rate": 5.0367853257444416e-08, "loss": 0.1707, "step": 49488 }, { "epoch": 0.8602443984772897, "grad_norm": 1.536585117202093, "learning_rate": 5.035554129018044e-08, "loss": 0.2427, "step": 49489 }, { "epoch": 0.8602617810147926, "grad_norm": 4.401596111061481, "learning_rate": 5.0343230748082886e-08, "loss": 0.1616, "step": 49490 }, { "epoch": 0.8602791635522954, "grad_norm": 0.8915889621467643, "learning_rate": 5.0330921631190794e-08, "loss": 0.1969, "step": 49491 }, { "epoch": 0.8602965460897982, "grad_norm": 1.3188930712446365, "learning_rate": 5.0318613939542896e-08, "loss": 0.1843, "step": 49492 }, { "epoch": 0.860313928627301, "grad_norm": 1.0260324428235887, "learning_rate": 5.0306307673178564e-08, "loss": 0.1202, "step": 49493 }, { "epoch": 0.8603313111648039, "grad_norm": 1.590516241410761, "learning_rate": 5.0294002832136696e-08, "loss": 0.2167, "step": 49494 }, { "epoch": 0.8603486937023067, "grad_norm": 1.8118262685626025, "learning_rate": 5.0281699416456216e-08, "loss": 0.1635, "step": 49495 }, { "epoch": 0.8603660762398095, "grad_norm": 1.5521890280128137, "learning_rate": 5.026939742617609e-08, "loss": 0.154, "step": 49496 }, { "epoch": 0.8603834587773123, "grad_norm": 1.2546974439020617, "learning_rate": 5.0257096861335445e-08, "loss": 0.1967, "step": 49497 }, { "epoch": 0.8604008413148151, "grad_norm": 1.0488251232768722, "learning_rate": 5.024479772197332e-08, "loss": 0.1383, "step": 49498 }, { "epoch": 0.8604182238523179, "grad_norm": 1.019300101275353, "learning_rate": 5.023250000812851e-08, "loss": 0.1778, "step": 49499 }, { "epoch": 0.8604356063898208, "grad_norm": 1.6200198658598068, "learning_rate": 5.022020371983998e-08, "loss": 0.0969, "step": 49500 }, { "epoch": 0.8604529889273236, "grad_norm": 1.161912501317426, "learning_rate": 5.020790885714682e-08, "loss": 0.1247, "step": 49501 }, { "epoch": 0.8604703714648264, "grad_norm": 0.9566658430533422, "learning_rate": 5.019561542008804e-08, "loss": 0.1376, "step": 49502 }, { "epoch": 0.8604877540023292, "grad_norm": 1.2350343488294324, "learning_rate": 5.0183323408702516e-08, "loss": 0.1656, "step": 49503 }, { "epoch": 0.8605051365398321, "grad_norm": 1.359058457298579, "learning_rate": 5.017103282302926e-08, "loss": 0.2204, "step": 49504 }, { "epoch": 0.8605225190773349, "grad_norm": 1.10111791557054, "learning_rate": 5.015874366310713e-08, "loss": 0.1564, "step": 49505 }, { "epoch": 0.8605399016148377, "grad_norm": 1.7241206334880734, "learning_rate": 5.0146455928975153e-08, "loss": 0.2525, "step": 49506 }, { "epoch": 0.8605572841523406, "grad_norm": 1.2597835120783474, "learning_rate": 5.0134169620672294e-08, "loss": 0.236, "step": 49507 }, { "epoch": 0.8605746666898434, "grad_norm": 3.271576766596614, "learning_rate": 5.012188473823736e-08, "loss": 0.2811, "step": 49508 }, { "epoch": 0.8605920492273462, "grad_norm": 1.5466268402279126, "learning_rate": 5.010960128170949e-08, "loss": 0.1472, "step": 49509 }, { "epoch": 0.860609431764849, "grad_norm": 1.813454073256048, "learning_rate": 5.009731925112765e-08, "loss": 0.15, "step": 49510 }, { "epoch": 0.8606268143023519, "grad_norm": 1.2762973783175562, "learning_rate": 5.008503864653041e-08, "loss": 0.2214, "step": 49511 }, { "epoch": 0.8606441968398547, "grad_norm": 1.705341726736091, "learning_rate": 5.0072759467957024e-08, "loss": 0.1543, "step": 49512 }, { "epoch": 0.8606615793773575, "grad_norm": 3.9860049420669488, "learning_rate": 5.0060481715446304e-08, "loss": 0.197, "step": 49513 }, { "epoch": 0.8606789619148604, "grad_norm": 3.774515567890608, "learning_rate": 5.00482053890372e-08, "loss": 0.1764, "step": 49514 }, { "epoch": 0.8606963444523632, "grad_norm": 1.2360696352014062, "learning_rate": 5.0035930488768474e-08, "loss": 0.1673, "step": 49515 }, { "epoch": 0.860713726989866, "grad_norm": 1.3409594118928971, "learning_rate": 5.0023657014679364e-08, "loss": 0.123, "step": 49516 }, { "epoch": 0.8607311095273688, "grad_norm": 1.719331419588405, "learning_rate": 5.0011384966808455e-08, "loss": 0.2141, "step": 49517 }, { "epoch": 0.8607484920648716, "grad_norm": 1.3300683027672315, "learning_rate": 4.999911434519471e-08, "loss": 0.169, "step": 49518 }, { "epoch": 0.8607658746023744, "grad_norm": 1.896272876452889, "learning_rate": 4.9986845149877e-08, "loss": 0.2047, "step": 49519 }, { "epoch": 0.8607832571398772, "grad_norm": 1.5069365456465666, "learning_rate": 4.997457738089439e-08, "loss": 0.1621, "step": 49520 }, { "epoch": 0.8608006396773801, "grad_norm": 1.2073535992225246, "learning_rate": 4.9962311038285576e-08, "loss": 0.1667, "step": 49521 }, { "epoch": 0.8608180222148829, "grad_norm": 1.487975991119603, "learning_rate": 4.9950046122089584e-08, "loss": 0.2368, "step": 49522 }, { "epoch": 0.8608354047523857, "grad_norm": 1.1734509597994405, "learning_rate": 4.993778263234516e-08, "loss": 0.1941, "step": 49523 }, { "epoch": 0.8608527872898886, "grad_norm": 1.4286698440004575, "learning_rate": 4.9925520569091214e-08, "loss": 0.1614, "step": 49524 }, { "epoch": 0.8608701698273914, "grad_norm": 1.2124920958133387, "learning_rate": 4.991325993236667e-08, "loss": 0.1397, "step": 49525 }, { "epoch": 0.8608875523648942, "grad_norm": 1.2846970077667923, "learning_rate": 4.990100072221032e-08, "loss": 0.1422, "step": 49526 }, { "epoch": 0.8609049349023971, "grad_norm": 2.0509851827841854, "learning_rate": 4.988874293866091e-08, "loss": 0.1723, "step": 49527 }, { "epoch": 0.8609223174398999, "grad_norm": 1.7883152623645904, "learning_rate": 4.9876486581757527e-08, "loss": 0.2516, "step": 49528 }, { "epoch": 0.8609396999774027, "grad_norm": 1.5513776735000955, "learning_rate": 4.9864231651539027e-08, "loss": 0.2438, "step": 49529 }, { "epoch": 0.8609570825149055, "grad_norm": 1.3182174335391776, "learning_rate": 4.985197814804393e-08, "loss": 0.1719, "step": 49530 }, { "epoch": 0.8609744650524084, "grad_norm": 1.3427834084379233, "learning_rate": 4.9839726071311426e-08, "loss": 0.2219, "step": 49531 }, { "epoch": 0.8609918475899112, "grad_norm": 1.3328442981221404, "learning_rate": 4.982747542138016e-08, "loss": 0.2395, "step": 49532 }, { "epoch": 0.861009230127414, "grad_norm": 1.2232416524308962, "learning_rate": 4.981522619828898e-08, "loss": 0.1467, "step": 49533 }, { "epoch": 0.8610266126649169, "grad_norm": 0.7255686605741618, "learning_rate": 4.980297840207676e-08, "loss": 0.2387, "step": 49534 }, { "epoch": 0.8610439952024197, "grad_norm": 1.1420846210098463, "learning_rate": 4.979073203278233e-08, "loss": 0.1722, "step": 49535 }, { "epoch": 0.8610613777399225, "grad_norm": 0.9078505796237172, "learning_rate": 4.977848709044441e-08, "loss": 0.1469, "step": 49536 }, { "epoch": 0.8610787602774252, "grad_norm": 1.7699173296326736, "learning_rate": 4.97662435751019e-08, "loss": 0.224, "step": 49537 }, { "epoch": 0.8610961428149281, "grad_norm": 1.3999300683184739, "learning_rate": 4.975400148679348e-08, "loss": 0.1701, "step": 49538 }, { "epoch": 0.8611135253524309, "grad_norm": 1.2323170104597727, "learning_rate": 4.974176082555814e-08, "loss": 0.1694, "step": 49539 }, { "epoch": 0.8611309078899337, "grad_norm": 1.4217313250259889, "learning_rate": 4.972952159143462e-08, "loss": 0.2237, "step": 49540 }, { "epoch": 0.8611482904274366, "grad_norm": 0.8179983343068323, "learning_rate": 4.971728378446166e-08, "loss": 0.1818, "step": 49541 }, { "epoch": 0.8611656729649394, "grad_norm": 1.1890336819951675, "learning_rate": 4.970504740467807e-08, "loss": 0.1715, "step": 49542 }, { "epoch": 0.8611830555024422, "grad_norm": 2.3307795335292116, "learning_rate": 4.969281245212259e-08, "loss": 0.2248, "step": 49543 }, { "epoch": 0.8612004380399451, "grad_norm": 1.335177030001818, "learning_rate": 4.968057892683408e-08, "loss": 0.1907, "step": 49544 }, { "epoch": 0.8612178205774479, "grad_norm": 1.5166093152904967, "learning_rate": 4.9668346828851295e-08, "loss": 0.2309, "step": 49545 }, { "epoch": 0.8612352031149507, "grad_norm": 1.0217555071586577, "learning_rate": 4.965611615821286e-08, "loss": 0.182, "step": 49546 }, { "epoch": 0.8612525856524536, "grad_norm": 1.8199201193456047, "learning_rate": 4.9643886914957745e-08, "loss": 0.2423, "step": 49547 }, { "epoch": 0.8612699681899564, "grad_norm": 1.6558269398173149, "learning_rate": 4.9631659099124756e-08, "loss": 0.1328, "step": 49548 }, { "epoch": 0.8612873507274592, "grad_norm": 0.9514957811068848, "learning_rate": 4.961943271075242e-08, "loss": 0.137, "step": 49549 }, { "epoch": 0.861304733264962, "grad_norm": 0.773683075577521, "learning_rate": 4.960720774987948e-08, "loss": 0.2018, "step": 49550 }, { "epoch": 0.8613221158024649, "grad_norm": 1.222008615087314, "learning_rate": 4.95949842165449e-08, "loss": 0.2034, "step": 49551 }, { "epoch": 0.8613394983399677, "grad_norm": 1.6526692237538343, "learning_rate": 4.958276211078732e-08, "loss": 0.2194, "step": 49552 }, { "epoch": 0.8613568808774705, "grad_norm": 2.082805166520043, "learning_rate": 4.9570541432645494e-08, "loss": 0.2148, "step": 49553 }, { "epoch": 0.8613742634149734, "grad_norm": 1.364240810670485, "learning_rate": 4.9558322182158164e-08, "loss": 0.1738, "step": 49554 }, { "epoch": 0.8613916459524762, "grad_norm": 1.4704109613083, "learning_rate": 4.954610435936402e-08, "loss": 0.2265, "step": 49555 }, { "epoch": 0.8614090284899789, "grad_norm": 1.0986543601080658, "learning_rate": 4.95338879643018e-08, "loss": 0.1819, "step": 49556 }, { "epoch": 0.8614264110274817, "grad_norm": 1.3667256493686895, "learning_rate": 4.952167299701021e-08, "loss": 0.223, "step": 49557 }, { "epoch": 0.8614437935649846, "grad_norm": 10.470576669308604, "learning_rate": 4.950945945752788e-08, "loss": 0.224, "step": 49558 }, { "epoch": 0.8614611761024874, "grad_norm": 0.818020434141819, "learning_rate": 4.949724734589378e-08, "loss": 0.2271, "step": 49559 }, { "epoch": 0.8614785586399902, "grad_norm": 1.187068300896451, "learning_rate": 4.9485036662146375e-08, "loss": 0.124, "step": 49560 }, { "epoch": 0.8614959411774931, "grad_norm": 1.3853384464061105, "learning_rate": 4.947282740632453e-08, "loss": 0.181, "step": 49561 }, { "epoch": 0.8615133237149959, "grad_norm": 4.046018390488638, "learning_rate": 4.9460619578466814e-08, "loss": 0.2215, "step": 49562 }, { "epoch": 0.8615307062524987, "grad_norm": 1.1987121003110333, "learning_rate": 4.9448413178612034e-08, "loss": 0.183, "step": 49563 }, { "epoch": 0.8615480887900016, "grad_norm": 0.9947446894620099, "learning_rate": 4.943620820679878e-08, "loss": 0.1909, "step": 49564 }, { "epoch": 0.8615654713275044, "grad_norm": 0.9988620209777609, "learning_rate": 4.942400466306579e-08, "loss": 0.2379, "step": 49565 }, { "epoch": 0.8615828538650072, "grad_norm": 1.089795566854593, "learning_rate": 4.9411802547451584e-08, "loss": 0.2064, "step": 49566 }, { "epoch": 0.86160023640251, "grad_norm": 1.0693322280999402, "learning_rate": 4.93996018599952e-08, "loss": 0.3133, "step": 49567 }, { "epoch": 0.8616176189400129, "grad_norm": 0.9953086530227945, "learning_rate": 4.938740260073504e-08, "loss": 0.1562, "step": 49568 }, { "epoch": 0.8616350014775157, "grad_norm": 1.267830775615115, "learning_rate": 4.9375204769709635e-08, "loss": 0.1242, "step": 49569 }, { "epoch": 0.8616523840150185, "grad_norm": 1.5059031349106886, "learning_rate": 4.9363008366958005e-08, "loss": 0.181, "step": 49570 }, { "epoch": 0.8616697665525214, "grad_norm": 1.5670288029533925, "learning_rate": 4.935081339251862e-08, "loss": 0.1685, "step": 49571 }, { "epoch": 0.8616871490900242, "grad_norm": 1.586280370628626, "learning_rate": 4.933861984643012e-08, "loss": 0.2264, "step": 49572 }, { "epoch": 0.861704531627527, "grad_norm": 1.0562204732111424, "learning_rate": 4.932642772873119e-08, "loss": 0.1579, "step": 49573 }, { "epoch": 0.8617219141650299, "grad_norm": 6.169184767194317, "learning_rate": 4.931423703946041e-08, "loss": 0.1559, "step": 49574 }, { "epoch": 0.8617392967025327, "grad_norm": 1.759034003372318, "learning_rate": 4.930204777865654e-08, "loss": 0.1892, "step": 49575 }, { "epoch": 0.8617566792400354, "grad_norm": 1.5759924321630268, "learning_rate": 4.9289859946358146e-08, "loss": 0.1263, "step": 49576 }, { "epoch": 0.8617740617775382, "grad_norm": 1.1265930712811658, "learning_rate": 4.9277673542603706e-08, "loss": 0.2141, "step": 49577 }, { "epoch": 0.8617914443150411, "grad_norm": 2.1816281071758152, "learning_rate": 4.9265488567432124e-08, "loss": 0.1722, "step": 49578 }, { "epoch": 0.8618088268525439, "grad_norm": 2.2796412641095785, "learning_rate": 4.9253305020881876e-08, "loss": 0.2092, "step": 49579 }, { "epoch": 0.8618262093900467, "grad_norm": 1.1179817141754658, "learning_rate": 4.9241122902991706e-08, "loss": 0.1103, "step": 49580 }, { "epoch": 0.8618435919275496, "grad_norm": 1.4264814727966404, "learning_rate": 4.9228942213799975e-08, "loss": 0.2189, "step": 49581 }, { "epoch": 0.8618609744650524, "grad_norm": 1.8881815870653165, "learning_rate": 4.921676295334542e-08, "loss": 0.1338, "step": 49582 }, { "epoch": 0.8618783570025552, "grad_norm": 1.2410331510974908, "learning_rate": 4.9204585121666753e-08, "loss": 0.1838, "step": 49583 }, { "epoch": 0.861895739540058, "grad_norm": 1.164455713146197, "learning_rate": 4.919240871880242e-08, "loss": 0.1882, "step": 49584 }, { "epoch": 0.8619131220775609, "grad_norm": 2.147218834855153, "learning_rate": 4.918023374479097e-08, "loss": 0.2041, "step": 49585 }, { "epoch": 0.8619305046150637, "grad_norm": 1.12858383514275, "learning_rate": 4.916806019967129e-08, "loss": 0.1241, "step": 49586 }, { "epoch": 0.8619478871525665, "grad_norm": 1.5135611348165146, "learning_rate": 4.9155888083481646e-08, "loss": 0.265, "step": 49587 }, { "epoch": 0.8619652696900694, "grad_norm": 3.783653629392843, "learning_rate": 4.914371739626077e-08, "loss": 0.1938, "step": 49588 }, { "epoch": 0.8619826522275722, "grad_norm": 1.8255846237285183, "learning_rate": 4.9131548138047095e-08, "loss": 0.1302, "step": 49589 }, { "epoch": 0.862000034765075, "grad_norm": 1.2295419415069846, "learning_rate": 4.911938030887941e-08, "loss": 0.1517, "step": 49590 }, { "epoch": 0.8620174173025779, "grad_norm": 1.4019573386776905, "learning_rate": 4.9107213908796184e-08, "loss": 0.1739, "step": 49591 }, { "epoch": 0.8620347998400807, "grad_norm": 1.4262006493942878, "learning_rate": 4.909504893783589e-08, "loss": 0.1499, "step": 49592 }, { "epoch": 0.8620521823775835, "grad_norm": 1.7738360188367903, "learning_rate": 4.908288539603722e-08, "loss": 0.3431, "step": 49593 }, { "epoch": 0.8620695649150864, "grad_norm": 1.1811872127985592, "learning_rate": 4.9070723283438695e-08, "loss": 0.1703, "step": 49594 }, { "epoch": 0.8620869474525892, "grad_norm": 1.6695214363936735, "learning_rate": 4.905856260007879e-08, "loss": 0.1764, "step": 49595 }, { "epoch": 0.8621043299900919, "grad_norm": 0.8494165872692704, "learning_rate": 4.904640334599613e-08, "loss": 0.1424, "step": 49596 }, { "epoch": 0.8621217125275947, "grad_norm": 1.2321399441887078, "learning_rate": 4.903424552122909e-08, "loss": 0.1822, "step": 49597 }, { "epoch": 0.8621390950650976, "grad_norm": 2.2044000401179757, "learning_rate": 4.902208912581646e-08, "loss": 0.2587, "step": 49598 }, { "epoch": 0.8621564776026004, "grad_norm": 1.7128847127370694, "learning_rate": 4.900993415979671e-08, "loss": 0.1968, "step": 49599 }, { "epoch": 0.8621738601401032, "grad_norm": 1.681168812662447, "learning_rate": 4.899778062320814e-08, "loss": 0.1783, "step": 49600 }, { "epoch": 0.862191242677606, "grad_norm": 1.2599504259110799, "learning_rate": 4.8985628516089505e-08, "loss": 0.1467, "step": 49601 }, { "epoch": 0.8622086252151089, "grad_norm": 1.6671736294869013, "learning_rate": 4.897347783847927e-08, "loss": 0.1947, "step": 49602 }, { "epoch": 0.8622260077526117, "grad_norm": 1.8708723245563, "learning_rate": 4.896132859041591e-08, "loss": 0.2207, "step": 49603 }, { "epoch": 0.8622433902901145, "grad_norm": 1.5600506418134257, "learning_rate": 4.894918077193783e-08, "loss": 0.2482, "step": 49604 }, { "epoch": 0.8622607728276174, "grad_norm": 1.0480204164862092, "learning_rate": 4.8937034383083886e-08, "loss": 0.1499, "step": 49605 }, { "epoch": 0.8622781553651202, "grad_norm": 1.5614033989065819, "learning_rate": 4.892488942389222e-08, "loss": 0.2169, "step": 49606 }, { "epoch": 0.862295537902623, "grad_norm": 1.2215398325510147, "learning_rate": 4.8912745894401476e-08, "loss": 0.1459, "step": 49607 }, { "epoch": 0.8623129204401259, "grad_norm": 1.5802017921703269, "learning_rate": 4.890060379465e-08, "loss": 0.1778, "step": 49608 }, { "epoch": 0.8623303029776287, "grad_norm": 0.9319906318410764, "learning_rate": 4.888846312467648e-08, "loss": 0.1667, "step": 49609 }, { "epoch": 0.8623476855151315, "grad_norm": 1.2522103800546658, "learning_rate": 4.887632388451929e-08, "loss": 0.2064, "step": 49610 }, { "epoch": 0.8623650680526344, "grad_norm": 1.1860998778794014, "learning_rate": 4.886418607421694e-08, "loss": 0.1427, "step": 49611 }, { "epoch": 0.8623824505901372, "grad_norm": 1.684121642349492, "learning_rate": 4.885204969380791e-08, "loss": 0.2501, "step": 49612 }, { "epoch": 0.86239983312764, "grad_norm": 1.7664553938009147, "learning_rate": 4.8839914743330656e-08, "loss": 0.1823, "step": 49613 }, { "epoch": 0.8624172156651428, "grad_norm": 2.3118033217919063, "learning_rate": 4.882778122282355e-08, "loss": 0.175, "step": 49614 }, { "epoch": 0.8624345982026457, "grad_norm": 2.657651402341559, "learning_rate": 4.881564913232522e-08, "loss": 0.2482, "step": 49615 }, { "epoch": 0.8624519807401484, "grad_norm": 1.715490844112687, "learning_rate": 4.880351847187386e-08, "loss": 0.1716, "step": 49616 }, { "epoch": 0.8624693632776512, "grad_norm": 2.15651679966375, "learning_rate": 4.8791389241508216e-08, "loss": 0.1788, "step": 49617 }, { "epoch": 0.862486745815154, "grad_norm": 1.4897985837321064, "learning_rate": 4.877926144126665e-08, "loss": 0.2224, "step": 49618 }, { "epoch": 0.8625041283526569, "grad_norm": 0.9315414941163161, "learning_rate": 4.8767135071187404e-08, "loss": 0.1496, "step": 49619 }, { "epoch": 0.8625215108901597, "grad_norm": 1.3052392995618989, "learning_rate": 4.875501013130917e-08, "loss": 0.1942, "step": 49620 }, { "epoch": 0.8625388934276625, "grad_norm": 1.4645570801640508, "learning_rate": 4.8742886621670195e-08, "loss": 0.1698, "step": 49621 }, { "epoch": 0.8625562759651654, "grad_norm": 0.9806300224351364, "learning_rate": 4.8730764542309065e-08, "loss": 0.1981, "step": 49622 }, { "epoch": 0.8625736585026682, "grad_norm": 2.4726974638121515, "learning_rate": 4.8718643893264075e-08, "loss": 0.2595, "step": 49623 }, { "epoch": 0.862591041040171, "grad_norm": 2.073754493170231, "learning_rate": 4.8706524674573646e-08, "loss": 0.122, "step": 49624 }, { "epoch": 0.8626084235776739, "grad_norm": 1.6585765819919334, "learning_rate": 4.86944068862763e-08, "loss": 0.2284, "step": 49625 }, { "epoch": 0.8626258061151767, "grad_norm": 1.9672675982383383, "learning_rate": 4.868229052841033e-08, "loss": 0.1617, "step": 49626 }, { "epoch": 0.8626431886526795, "grad_norm": 0.8802787622594412, "learning_rate": 4.8670175601014105e-08, "loss": 0.1302, "step": 49627 }, { "epoch": 0.8626605711901824, "grad_norm": 1.5351487601706764, "learning_rate": 4.865806210412615e-08, "loss": 0.2075, "step": 49628 }, { "epoch": 0.8626779537276852, "grad_norm": 1.3296431192377582, "learning_rate": 4.8645950037784824e-08, "loss": 0.1722, "step": 49629 }, { "epoch": 0.862695336265188, "grad_norm": 1.7874933922232201, "learning_rate": 4.863383940202864e-08, "loss": 0.163, "step": 49630 }, { "epoch": 0.8627127188026908, "grad_norm": 0.9083830316097221, "learning_rate": 4.862173019689564e-08, "loss": 0.1788, "step": 49631 }, { "epoch": 0.8627301013401937, "grad_norm": 0.9451276477083568, "learning_rate": 4.8609622422424445e-08, "loss": 0.1683, "step": 49632 }, { "epoch": 0.8627474838776965, "grad_norm": 1.4816127650083493, "learning_rate": 4.8597516078653475e-08, "loss": 0.1611, "step": 49633 }, { "epoch": 0.8627648664151993, "grad_norm": 1.5657609164856234, "learning_rate": 4.8585411165620984e-08, "loss": 0.1618, "step": 49634 }, { "epoch": 0.8627822489527022, "grad_norm": 1.4369459415395134, "learning_rate": 4.857330768336526e-08, "loss": 0.2272, "step": 49635 }, { "epoch": 0.8627996314902049, "grad_norm": 1.5568355260262627, "learning_rate": 4.856120563192501e-08, "loss": 0.2351, "step": 49636 }, { "epoch": 0.8628170140277077, "grad_norm": 1.2396492573349323, "learning_rate": 4.854910501133824e-08, "loss": 0.183, "step": 49637 }, { "epoch": 0.8628343965652105, "grad_norm": 1.5667762569847317, "learning_rate": 4.853700582164344e-08, "loss": 0.1791, "step": 49638 }, { "epoch": 0.8628517791027134, "grad_norm": 2.2827225904189743, "learning_rate": 4.8524908062878846e-08, "loss": 0.2538, "step": 49639 }, { "epoch": 0.8628691616402162, "grad_norm": 2.5854847918638555, "learning_rate": 4.851281173508304e-08, "loss": 0.1485, "step": 49640 }, { "epoch": 0.862886544177719, "grad_norm": 1.1236293393801073, "learning_rate": 4.8500716838294155e-08, "loss": 0.1664, "step": 49641 }, { "epoch": 0.8629039267152219, "grad_norm": 1.432167525967128, "learning_rate": 4.848862337255066e-08, "loss": 0.3102, "step": 49642 }, { "epoch": 0.8629213092527247, "grad_norm": 1.2753035708862126, "learning_rate": 4.847653133789082e-08, "loss": 0.1678, "step": 49643 }, { "epoch": 0.8629386917902275, "grad_norm": 1.9963011723371886, "learning_rate": 4.8464440734352906e-08, "loss": 0.2507, "step": 49644 }, { "epoch": 0.8629560743277304, "grad_norm": 0.8699582207904271, "learning_rate": 4.8452351561975304e-08, "loss": 0.2849, "step": 49645 }, { "epoch": 0.8629734568652332, "grad_norm": 2.249251742058919, "learning_rate": 4.8440263820796357e-08, "loss": 0.5271, "step": 49646 }, { "epoch": 0.862990839402736, "grad_norm": 1.0828614125184068, "learning_rate": 4.842817751085426e-08, "loss": 0.1814, "step": 49647 }, { "epoch": 0.8630082219402389, "grad_norm": 1.3684886955868347, "learning_rate": 4.841609263218749e-08, "loss": 0.2332, "step": 49648 }, { "epoch": 0.8630256044777417, "grad_norm": 1.125334911920297, "learning_rate": 4.8404009184834336e-08, "loss": 0.1949, "step": 49649 }, { "epoch": 0.8630429870152445, "grad_norm": 1.6902582123878402, "learning_rate": 4.839192716883284e-08, "loss": 0.2711, "step": 49650 }, { "epoch": 0.8630603695527473, "grad_norm": 1.3113106140841508, "learning_rate": 4.8379846584221564e-08, "loss": 0.2494, "step": 49651 }, { "epoch": 0.8630777520902502, "grad_norm": 1.2202182999140891, "learning_rate": 4.8367767431038763e-08, "loss": 0.1226, "step": 49652 }, { "epoch": 0.863095134627753, "grad_norm": 1.7109422935365992, "learning_rate": 4.835568970932263e-08, "loss": 0.1908, "step": 49653 }, { "epoch": 0.8631125171652558, "grad_norm": 1.1489089279304614, "learning_rate": 4.834361341911153e-08, "loss": 0.2514, "step": 49654 }, { "epoch": 0.8631298997027587, "grad_norm": 1.0514923450959814, "learning_rate": 4.8331538560443694e-08, "loss": 0.1791, "step": 49655 }, { "epoch": 0.8631472822402614, "grad_norm": 1.1135704381084908, "learning_rate": 4.831946513335739e-08, "loss": 0.1706, "step": 49656 }, { "epoch": 0.8631646647777642, "grad_norm": 1.600160610875469, "learning_rate": 4.8307393137890896e-08, "loss": 0.1262, "step": 49657 }, { "epoch": 0.863182047315267, "grad_norm": 1.1620557319077423, "learning_rate": 4.8295322574082364e-08, "loss": 0.286, "step": 49658 }, { "epoch": 0.8631994298527699, "grad_norm": 1.8408287116328357, "learning_rate": 4.828325344197032e-08, "loss": 0.28, "step": 49659 }, { "epoch": 0.8632168123902727, "grad_norm": 1.6401274538375636, "learning_rate": 4.827118574159278e-08, "loss": 0.195, "step": 49660 }, { "epoch": 0.8632341949277755, "grad_norm": 1.3185879512330598, "learning_rate": 4.8259119472988165e-08, "loss": 0.244, "step": 49661 }, { "epoch": 0.8632515774652784, "grad_norm": 0.909339652232755, "learning_rate": 4.824705463619455e-08, "loss": 0.2699, "step": 49662 }, { "epoch": 0.8632689600027812, "grad_norm": 0.8962827692743307, "learning_rate": 4.82349912312503e-08, "loss": 0.251, "step": 49663 }, { "epoch": 0.863286342540284, "grad_norm": 0.9572249363627982, "learning_rate": 4.8222929258193544e-08, "loss": 0.152, "step": 49664 }, { "epoch": 0.8633037250777869, "grad_norm": 2.4098549793923985, "learning_rate": 4.821086871706265e-08, "loss": 0.1914, "step": 49665 }, { "epoch": 0.8633211076152897, "grad_norm": 1.051054585672038, "learning_rate": 4.819880960789563e-08, "loss": 0.2024, "step": 49666 }, { "epoch": 0.8633384901527925, "grad_norm": 1.454353772682953, "learning_rate": 4.818675193073096e-08, "loss": 0.1296, "step": 49667 }, { "epoch": 0.8633558726902953, "grad_norm": 1.391383835266814, "learning_rate": 4.817469568560678e-08, "loss": 0.2759, "step": 49668 }, { "epoch": 0.8633732552277982, "grad_norm": 1.6969172325809978, "learning_rate": 4.816264087256122e-08, "loss": 0.1753, "step": 49669 }, { "epoch": 0.863390637765301, "grad_norm": 2.475448398737402, "learning_rate": 4.8150587491632424e-08, "loss": 0.1948, "step": 49670 }, { "epoch": 0.8634080203028038, "grad_norm": 1.1375943622495093, "learning_rate": 4.8138535542858796e-08, "loss": 0.1437, "step": 49671 }, { "epoch": 0.8634254028403067, "grad_norm": 0.8674157289086833, "learning_rate": 4.812648502627847e-08, "loss": 0.126, "step": 49672 }, { "epoch": 0.8634427853778095, "grad_norm": 2.5289717197069583, "learning_rate": 4.811443594192954e-08, "loss": 0.1879, "step": 49673 }, { "epoch": 0.8634601679153123, "grad_norm": 1.488705403725941, "learning_rate": 4.810238828985036e-08, "loss": 0.1571, "step": 49674 }, { "epoch": 0.8634775504528152, "grad_norm": 0.9877089272126838, "learning_rate": 4.8090342070078946e-08, "loss": 0.2149, "step": 49675 }, { "epoch": 0.8634949329903179, "grad_norm": 1.2490458934185993, "learning_rate": 4.807829728265361e-08, "loss": 0.1268, "step": 49676 }, { "epoch": 0.8635123155278207, "grad_norm": 1.3250902561928366, "learning_rate": 4.8066253927612435e-08, "loss": 0.1311, "step": 49677 }, { "epoch": 0.8635296980653235, "grad_norm": 2.8308787428841096, "learning_rate": 4.80542120049936e-08, "loss": 0.1553, "step": 49678 }, { "epoch": 0.8635470806028264, "grad_norm": 1.076873400277941, "learning_rate": 4.804217151483536e-08, "loss": 0.2201, "step": 49679 }, { "epoch": 0.8635644631403292, "grad_norm": 1.165025626640087, "learning_rate": 4.80301324571758e-08, "loss": 0.1777, "step": 49680 }, { "epoch": 0.863581845677832, "grad_norm": 1.2435283118939342, "learning_rate": 4.801809483205316e-08, "loss": 0.2162, "step": 49681 }, { "epoch": 0.8635992282153349, "grad_norm": 1.5929865337945768, "learning_rate": 4.8006058639505466e-08, "loss": 0.1956, "step": 49682 }, { "epoch": 0.8636166107528377, "grad_norm": 1.1916776411146839, "learning_rate": 4.799402387957097e-08, "loss": 0.1423, "step": 49683 }, { "epoch": 0.8636339932903405, "grad_norm": 2.444465299190147, "learning_rate": 4.79819905522878e-08, "loss": 0.2058, "step": 49684 }, { "epoch": 0.8636513758278433, "grad_norm": 1.7975266028988612, "learning_rate": 4.7969958657694096e-08, "loss": 0.2441, "step": 49685 }, { "epoch": 0.8636687583653462, "grad_norm": 1.2346181530188414, "learning_rate": 4.795792819582783e-08, "loss": 0.1122, "step": 49686 }, { "epoch": 0.863686140902849, "grad_norm": 1.3488296877908643, "learning_rate": 4.794589916672748e-08, "loss": 0.1464, "step": 49687 }, { "epoch": 0.8637035234403518, "grad_norm": 3.8079544678696986, "learning_rate": 4.7933871570430936e-08, "loss": 0.2919, "step": 49688 }, { "epoch": 0.8637209059778547, "grad_norm": 1.2940474847578185, "learning_rate": 4.7921845406976194e-08, "loss": 0.2641, "step": 49689 }, { "epoch": 0.8637382885153575, "grad_norm": 1.5742511073826395, "learning_rate": 4.7909820676401646e-08, "loss": 0.1731, "step": 49690 }, { "epoch": 0.8637556710528603, "grad_norm": 1.8005790852243198, "learning_rate": 4.789779737874533e-08, "loss": 0.1841, "step": 49691 }, { "epoch": 0.8637730535903632, "grad_norm": 1.5546308207427875, "learning_rate": 4.788577551404527e-08, "loss": 0.1588, "step": 49692 }, { "epoch": 0.863790436127866, "grad_norm": 1.9902470320563794, "learning_rate": 4.787375508233965e-08, "loss": 0.2118, "step": 49693 }, { "epoch": 0.8638078186653688, "grad_norm": 3.4039125256975056, "learning_rate": 4.7861736083666504e-08, "loss": 0.1861, "step": 49694 }, { "epoch": 0.8638252012028715, "grad_norm": 1.7823423563316394, "learning_rate": 4.784971851806396e-08, "loss": 0.2274, "step": 49695 }, { "epoch": 0.8638425837403744, "grad_norm": 2.220283423871202, "learning_rate": 4.7837702385570164e-08, "loss": 0.1972, "step": 49696 }, { "epoch": 0.8638599662778772, "grad_norm": 1.4735057417219461, "learning_rate": 4.7825687686223025e-08, "loss": 0.1422, "step": 49697 }, { "epoch": 0.86387734881538, "grad_norm": 2.8080108043450753, "learning_rate": 4.781367442006079e-08, "loss": 0.2752, "step": 49698 }, { "epoch": 0.8638947313528829, "grad_norm": 2.5032611022062237, "learning_rate": 4.780166258712154e-08, "loss": 0.1622, "step": 49699 }, { "epoch": 0.8639121138903857, "grad_norm": 1.0639846158555215, "learning_rate": 4.778965218744335e-08, "loss": 0.1545, "step": 49700 }, { "epoch": 0.8639294964278885, "grad_norm": 1.026356211918975, "learning_rate": 4.777764322106409e-08, "loss": 0.1521, "step": 49701 }, { "epoch": 0.8639468789653914, "grad_norm": 1.7013530077620838, "learning_rate": 4.776563568802206e-08, "loss": 0.1658, "step": 49702 }, { "epoch": 0.8639642615028942, "grad_norm": 2.3258489255884345, "learning_rate": 4.7753629588355216e-08, "loss": 0.2098, "step": 49703 }, { "epoch": 0.863981644040397, "grad_norm": 0.8230208504913414, "learning_rate": 4.77416249221016e-08, "loss": 0.2186, "step": 49704 }, { "epoch": 0.8639990265778998, "grad_norm": 1.4843967279923551, "learning_rate": 4.7729621689299226e-08, "loss": 0.1535, "step": 49705 }, { "epoch": 0.8640164091154027, "grad_norm": 1.4013408935645864, "learning_rate": 4.771761988998635e-08, "loss": 0.2226, "step": 49706 }, { "epoch": 0.8640337916529055, "grad_norm": 1.3333511006463068, "learning_rate": 4.770561952420077e-08, "loss": 0.1465, "step": 49707 }, { "epoch": 0.8640511741904083, "grad_norm": 1.3429898064352994, "learning_rate": 4.769362059198051e-08, "loss": 0.1476, "step": 49708 }, { "epoch": 0.8640685567279112, "grad_norm": 1.0890228386424123, "learning_rate": 4.768162309336382e-08, "loss": 0.1982, "step": 49709 }, { "epoch": 0.864085939265414, "grad_norm": 2.0781856910470466, "learning_rate": 4.766962702838856e-08, "loss": 0.1634, "step": 49710 }, { "epoch": 0.8641033218029168, "grad_norm": 7.296649081185224, "learning_rate": 4.765763239709286e-08, "loss": 0.2866, "step": 49711 }, { "epoch": 0.8641207043404197, "grad_norm": 1.1535943819767616, "learning_rate": 4.764563919951464e-08, "loss": 0.1642, "step": 49712 }, { "epoch": 0.8641380868779225, "grad_norm": 1.3908198292819207, "learning_rate": 4.763364743569193e-08, "loss": 0.3173, "step": 49713 }, { "epoch": 0.8641554694154253, "grad_norm": 1.2726001191784466, "learning_rate": 4.7621657105662795e-08, "loss": 0.1414, "step": 49714 }, { "epoch": 0.864172851952928, "grad_norm": 0.9824378485240179, "learning_rate": 4.760966820946516e-08, "loss": 0.1158, "step": 49715 }, { "epoch": 0.8641902344904309, "grad_norm": 1.629730309184688, "learning_rate": 4.759768074713699e-08, "loss": 0.2172, "step": 49716 }, { "epoch": 0.8642076170279337, "grad_norm": 1.8164308096772037, "learning_rate": 4.7585694718716484e-08, "loss": 0.1584, "step": 49717 }, { "epoch": 0.8642249995654365, "grad_norm": 1.1543316826477548, "learning_rate": 4.7573710124241496e-08, "loss": 0.1971, "step": 49718 }, { "epoch": 0.8642423821029394, "grad_norm": 1.257420154910558, "learning_rate": 4.75617269637501e-08, "loss": 0.2866, "step": 49719 }, { "epoch": 0.8642597646404422, "grad_norm": 4.421548667021739, "learning_rate": 4.754974523728e-08, "loss": 0.2892, "step": 49720 }, { "epoch": 0.864277147177945, "grad_norm": 1.1247725337130248, "learning_rate": 4.753776494486944e-08, "loss": 0.2027, "step": 49721 }, { "epoch": 0.8642945297154478, "grad_norm": 1.0314129804711676, "learning_rate": 4.752578608655633e-08, "loss": 0.155, "step": 49722 }, { "epoch": 0.8643119122529507, "grad_norm": 1.780799297396664, "learning_rate": 4.7513808662378696e-08, "loss": 0.3131, "step": 49723 }, { "epoch": 0.8643292947904535, "grad_norm": 1.065790267368743, "learning_rate": 4.750183267237429e-08, "loss": 0.2181, "step": 49724 }, { "epoch": 0.8643466773279563, "grad_norm": 1.4463831451754339, "learning_rate": 4.748985811658135e-08, "loss": 0.2145, "step": 49725 }, { "epoch": 0.8643640598654592, "grad_norm": 1.08420338336454, "learning_rate": 4.747788499503769e-08, "loss": 0.1656, "step": 49726 }, { "epoch": 0.864381442402962, "grad_norm": 1.5304916431473286, "learning_rate": 4.746591330778121e-08, "loss": 0.21, "step": 49727 }, { "epoch": 0.8643988249404648, "grad_norm": 1.8634549569902044, "learning_rate": 4.7453943054849844e-08, "loss": 0.205, "step": 49728 }, { "epoch": 0.8644162074779677, "grad_norm": 1.9321241250962882, "learning_rate": 4.7441974236281654e-08, "loss": 0.2584, "step": 49729 }, { "epoch": 0.8644335900154705, "grad_norm": 1.7144065816275234, "learning_rate": 4.7430006852114513e-08, "loss": 0.2753, "step": 49730 }, { "epoch": 0.8644509725529733, "grad_norm": 1.7733937915284168, "learning_rate": 4.741804090238638e-08, "loss": 0.1335, "step": 49731 }, { "epoch": 0.8644683550904761, "grad_norm": 1.5203287908935275, "learning_rate": 4.7406076387135177e-08, "loss": 0.1199, "step": 49732 }, { "epoch": 0.864485737627979, "grad_norm": 0.958307503397956, "learning_rate": 4.739411330639881e-08, "loss": 0.1387, "step": 49733 }, { "epoch": 0.8645031201654818, "grad_norm": 1.4225912436755672, "learning_rate": 4.7382151660215144e-08, "loss": 0.1939, "step": 49734 }, { "epoch": 0.8645205027029845, "grad_norm": 0.7647667116873921, "learning_rate": 4.73701914486222e-08, "loss": 0.1588, "step": 49735 }, { "epoch": 0.8645378852404874, "grad_norm": 1.1488629471016283, "learning_rate": 4.7358232671657675e-08, "loss": 0.1894, "step": 49736 }, { "epoch": 0.8645552677779902, "grad_norm": 0.7934757948029897, "learning_rate": 4.734627532935975e-08, "loss": 0.2188, "step": 49737 }, { "epoch": 0.864572650315493, "grad_norm": 1.3201354777857501, "learning_rate": 4.73343194217663e-08, "loss": 0.2334, "step": 49738 }, { "epoch": 0.8645900328529958, "grad_norm": 0.9089176265861679, "learning_rate": 4.732236494891495e-08, "loss": 0.2117, "step": 49739 }, { "epoch": 0.8646074153904987, "grad_norm": 2.532337302865337, "learning_rate": 4.7310411910843784e-08, "loss": 0.1479, "step": 49740 }, { "epoch": 0.8646247979280015, "grad_norm": 1.7919820105556998, "learning_rate": 4.7298460307590716e-08, "loss": 0.2248, "step": 49741 }, { "epoch": 0.8646421804655043, "grad_norm": 1.610555578330064, "learning_rate": 4.72865101391936e-08, "loss": 0.1505, "step": 49742 }, { "epoch": 0.8646595630030072, "grad_norm": 1.424072074561533, "learning_rate": 4.727456140569025e-08, "loss": 0.3254, "step": 49743 }, { "epoch": 0.86467694554051, "grad_norm": 1.3042309371408252, "learning_rate": 4.726261410711857e-08, "loss": 0.2826, "step": 49744 }, { "epoch": 0.8646943280780128, "grad_norm": 1.2476122659196536, "learning_rate": 4.7250668243516424e-08, "loss": 0.1442, "step": 49745 }, { "epoch": 0.8647117106155157, "grad_norm": 1.1325807302024191, "learning_rate": 4.723872381492172e-08, "loss": 0.1848, "step": 49746 }, { "epoch": 0.8647290931530185, "grad_norm": 1.28387882611262, "learning_rate": 4.7226780821372156e-08, "loss": 0.1561, "step": 49747 }, { "epoch": 0.8647464756905213, "grad_norm": 1.6899397588847254, "learning_rate": 4.721483926290581e-08, "loss": 0.2688, "step": 49748 }, { "epoch": 0.8647638582280242, "grad_norm": 1.5143904237318815, "learning_rate": 4.720289913956044e-08, "loss": 0.1384, "step": 49749 }, { "epoch": 0.864781240765527, "grad_norm": 1.8524044627226814, "learning_rate": 4.7190960451373994e-08, "loss": 0.2279, "step": 49750 }, { "epoch": 0.8647986233030298, "grad_norm": 1.0985005205903473, "learning_rate": 4.717902319838396e-08, "loss": 0.1519, "step": 49751 }, { "epoch": 0.8648160058405326, "grad_norm": 2.7252112122132734, "learning_rate": 4.716708738062858e-08, "loss": 0.1997, "step": 49752 }, { "epoch": 0.8648333883780355, "grad_norm": 1.0959718607296485, "learning_rate": 4.715515299814543e-08, "loss": 0.1601, "step": 49753 }, { "epoch": 0.8648507709155383, "grad_norm": 2.869455225413354, "learning_rate": 4.714322005097249e-08, "loss": 0.2344, "step": 49754 }, { "epoch": 0.864868153453041, "grad_norm": 0.735795557827651, "learning_rate": 4.713128853914744e-08, "loss": 0.2254, "step": 49755 }, { "epoch": 0.8648855359905439, "grad_norm": 2.043946042557584, "learning_rate": 4.711935846270831e-08, "loss": 0.2729, "step": 49756 }, { "epoch": 0.8649029185280467, "grad_norm": 1.0749874490597104, "learning_rate": 4.7107429821692693e-08, "loss": 0.1454, "step": 49757 }, { "epoch": 0.8649203010655495, "grad_norm": 1.45915535312492, "learning_rate": 4.709550261613848e-08, "loss": 0.2558, "step": 49758 }, { "epoch": 0.8649376836030523, "grad_norm": 1.4157269798874363, "learning_rate": 4.708357684608338e-08, "loss": 0.1433, "step": 49759 }, { "epoch": 0.8649550661405552, "grad_norm": 1.7786016576717307, "learning_rate": 4.707165251156542e-08, "loss": 0.1919, "step": 49760 }, { "epoch": 0.864972448678058, "grad_norm": 0.7808407688535813, "learning_rate": 4.7059729612622226e-08, "loss": 0.1437, "step": 49761 }, { "epoch": 0.8649898312155608, "grad_norm": 1.4454671131477899, "learning_rate": 4.704780814929166e-08, "loss": 0.1867, "step": 49762 }, { "epoch": 0.8650072137530637, "grad_norm": 1.2902178202717844, "learning_rate": 4.703588812161141e-08, "loss": 0.2003, "step": 49763 }, { "epoch": 0.8650245962905665, "grad_norm": 1.522007317409448, "learning_rate": 4.70239695296194e-08, "loss": 0.1975, "step": 49764 }, { "epoch": 0.8650419788280693, "grad_norm": 0.9281955112160875, "learning_rate": 4.7012052373353264e-08, "loss": 0.1301, "step": 49765 }, { "epoch": 0.8650593613655722, "grad_norm": 0.9871821561848881, "learning_rate": 4.700013665285085e-08, "loss": 0.1823, "step": 49766 }, { "epoch": 0.865076743903075, "grad_norm": 2.3364408727270174, "learning_rate": 4.698822236814987e-08, "loss": 0.2407, "step": 49767 }, { "epoch": 0.8650941264405778, "grad_norm": 0.9201892977032968, "learning_rate": 4.697630951928816e-08, "loss": 0.201, "step": 49768 }, { "epoch": 0.8651115089780806, "grad_norm": 1.3863747522586534, "learning_rate": 4.696439810630354e-08, "loss": 0.1265, "step": 49769 }, { "epoch": 0.8651288915155835, "grad_norm": 0.9764495500128623, "learning_rate": 4.695248812923352e-08, "loss": 0.1462, "step": 49770 }, { "epoch": 0.8651462740530863, "grad_norm": 1.2906930544700945, "learning_rate": 4.694057958811609e-08, "loss": 0.1547, "step": 49771 }, { "epoch": 0.8651636565905891, "grad_norm": 1.985934393690566, "learning_rate": 4.692867248298887e-08, "loss": 0.2314, "step": 49772 }, { "epoch": 0.865181039128092, "grad_norm": 2.162382380852213, "learning_rate": 4.6916766813889674e-08, "loss": 0.1872, "step": 49773 }, { "epoch": 0.8651984216655948, "grad_norm": 1.7527191026648725, "learning_rate": 4.690486258085613e-08, "loss": 0.191, "step": 49774 }, { "epoch": 0.8652158042030975, "grad_norm": 2.8553795154604362, "learning_rate": 4.68929597839261e-08, "loss": 0.2597, "step": 49775 }, { "epoch": 0.8652331867406003, "grad_norm": 2.2775180752722095, "learning_rate": 4.688105842313722e-08, "loss": 0.2131, "step": 49776 }, { "epoch": 0.8652505692781032, "grad_norm": 1.3974889254148541, "learning_rate": 4.6869158498527294e-08, "loss": 0.2554, "step": 49777 }, { "epoch": 0.865267951815606, "grad_norm": 1.8302822954457, "learning_rate": 4.685726001013385e-08, "loss": 0.2619, "step": 49778 }, { "epoch": 0.8652853343531088, "grad_norm": 0.7594084838178186, "learning_rate": 4.6845362957994796e-08, "loss": 0.2661, "step": 49779 }, { "epoch": 0.8653027168906117, "grad_norm": 2.300164013864605, "learning_rate": 4.683346734214783e-08, "loss": 0.1933, "step": 49780 }, { "epoch": 0.8653200994281145, "grad_norm": 1.2215240266949046, "learning_rate": 4.682157316263058e-08, "loss": 0.1646, "step": 49781 }, { "epoch": 0.8653374819656173, "grad_norm": 1.7763167102906774, "learning_rate": 4.680968041948074e-08, "loss": 0.3304, "step": 49782 }, { "epoch": 0.8653548645031202, "grad_norm": 2.106895274525345, "learning_rate": 4.679778911273608e-08, "loss": 0.1852, "step": 49783 }, { "epoch": 0.865372247040623, "grad_norm": 1.128470742944656, "learning_rate": 4.678589924243426e-08, "loss": 0.1411, "step": 49784 }, { "epoch": 0.8653896295781258, "grad_norm": 1.2273022441520889, "learning_rate": 4.677401080861293e-08, "loss": 0.3003, "step": 49785 }, { "epoch": 0.8654070121156286, "grad_norm": 1.4350593381273329, "learning_rate": 4.676212381130967e-08, "loss": 0.1822, "step": 49786 }, { "epoch": 0.8654243946531315, "grad_norm": 1.323549671921842, "learning_rate": 4.67502382505624e-08, "loss": 0.124, "step": 49787 }, { "epoch": 0.8654417771906343, "grad_norm": 1.692264168409106, "learning_rate": 4.6738354126408754e-08, "loss": 0.2148, "step": 49788 }, { "epoch": 0.8654591597281371, "grad_norm": 1.8169026745771006, "learning_rate": 4.6726471438886196e-08, "loss": 0.1471, "step": 49789 }, { "epoch": 0.86547654226564, "grad_norm": 1.5670020744466135, "learning_rate": 4.671459018803242e-08, "loss": 0.1986, "step": 49790 }, { "epoch": 0.8654939248031428, "grad_norm": 1.3489266948834553, "learning_rate": 4.670271037388529e-08, "loss": 0.146, "step": 49791 }, { "epoch": 0.8655113073406456, "grad_norm": 2.6003104589566606, "learning_rate": 4.669083199648233e-08, "loss": 0.2012, "step": 49792 }, { "epoch": 0.8655286898781485, "grad_norm": 1.3101813954783361, "learning_rate": 4.667895505586123e-08, "loss": 0.3, "step": 49793 }, { "epoch": 0.8655460724156513, "grad_norm": 1.2694327929261224, "learning_rate": 4.6667079552059566e-08, "loss": 0.2182, "step": 49794 }, { "epoch": 0.865563454953154, "grad_norm": 1.0766069780309502, "learning_rate": 4.665520548511503e-08, "loss": 0.0878, "step": 49795 }, { "epoch": 0.8655808374906568, "grad_norm": 2.0023291285365055, "learning_rate": 4.664333285506522e-08, "loss": 0.1796, "step": 49796 }, { "epoch": 0.8655982200281597, "grad_norm": 1.3862310064617887, "learning_rate": 4.66314616619477e-08, "loss": 0.2093, "step": 49797 }, { "epoch": 0.8656156025656625, "grad_norm": 1.4136515976017492, "learning_rate": 4.661959190580034e-08, "loss": 0.2372, "step": 49798 }, { "epoch": 0.8656329851031653, "grad_norm": 1.2556258964167868, "learning_rate": 4.660772358666054e-08, "loss": 0.2116, "step": 49799 }, { "epoch": 0.8656503676406682, "grad_norm": 1.7111610289428136, "learning_rate": 4.6595856704566005e-08, "loss": 0.183, "step": 49800 }, { "epoch": 0.865667750178171, "grad_norm": 2.110595970650451, "learning_rate": 4.658399125955431e-08, "loss": 0.34, "step": 49801 }, { "epoch": 0.8656851327156738, "grad_norm": 2.1412433319679542, "learning_rate": 4.6572127251663096e-08, "loss": 0.1888, "step": 49802 }, { "epoch": 0.8657025152531767, "grad_norm": 4.640097133627465, "learning_rate": 4.656026468093e-08, "loss": 0.3624, "step": 49803 }, { "epoch": 0.8657198977906795, "grad_norm": 4.070646856526986, "learning_rate": 4.654840354739248e-08, "loss": 0.1449, "step": 49804 }, { "epoch": 0.8657372803281823, "grad_norm": 1.959036299150897, "learning_rate": 4.653654385108818e-08, "loss": 0.2235, "step": 49805 }, { "epoch": 0.8657546628656851, "grad_norm": 2.659997490379927, "learning_rate": 4.652468559205486e-08, "loss": 0.2137, "step": 49806 }, { "epoch": 0.865772045403188, "grad_norm": 1.345493409872178, "learning_rate": 4.6512828770330023e-08, "loss": 0.1806, "step": 49807 }, { "epoch": 0.8657894279406908, "grad_norm": 1.2286574777416297, "learning_rate": 4.65009733859511e-08, "loss": 0.1461, "step": 49808 }, { "epoch": 0.8658068104781936, "grad_norm": 1.635273483848959, "learning_rate": 4.648911943895567e-08, "loss": 0.2105, "step": 49809 }, { "epoch": 0.8658241930156965, "grad_norm": 1.5208208322646755, "learning_rate": 4.647726692938153e-08, "loss": 0.2009, "step": 49810 }, { "epoch": 0.8658415755531993, "grad_norm": 1.0638348097441448, "learning_rate": 4.6465415857266154e-08, "loss": 0.1519, "step": 49811 }, { "epoch": 0.8658589580907021, "grad_norm": 1.7066396894160165, "learning_rate": 4.645356622264701e-08, "loss": 0.2115, "step": 49812 }, { "epoch": 0.865876340628205, "grad_norm": 1.9155859170636864, "learning_rate": 4.6441718025561736e-08, "loss": 0.2312, "step": 49813 }, { "epoch": 0.8658937231657078, "grad_norm": 1.2769080421252244, "learning_rate": 4.642987126604786e-08, "loss": 0.1223, "step": 49814 }, { "epoch": 0.8659111057032105, "grad_norm": 1.1751714218263796, "learning_rate": 4.6418025944142956e-08, "loss": 0.1434, "step": 49815 }, { "epoch": 0.8659284882407133, "grad_norm": 1.173067991688288, "learning_rate": 4.640618205988456e-08, "loss": 0.1708, "step": 49816 }, { "epoch": 0.8659458707782162, "grad_norm": 1.1568123483603856, "learning_rate": 4.639433961331013e-08, "loss": 0.1578, "step": 49817 }, { "epoch": 0.865963253315719, "grad_norm": 1.2156344271684731, "learning_rate": 4.638249860445731e-08, "loss": 0.297, "step": 49818 }, { "epoch": 0.8659806358532218, "grad_norm": 1.6610339420289524, "learning_rate": 4.637065903336362e-08, "loss": 0.2487, "step": 49819 }, { "epoch": 0.8659980183907247, "grad_norm": 1.549087325817336, "learning_rate": 4.63588209000666e-08, "loss": 0.1147, "step": 49820 }, { "epoch": 0.8660154009282275, "grad_norm": 0.8649829339494505, "learning_rate": 4.63469842046037e-08, "loss": 0.1939, "step": 49821 }, { "epoch": 0.8660327834657303, "grad_norm": 1.0386463316336636, "learning_rate": 4.633514894701246e-08, "loss": 0.1921, "step": 49822 }, { "epoch": 0.8660501660032331, "grad_norm": 1.5754780971382236, "learning_rate": 4.6323315127330395e-08, "loss": 0.2475, "step": 49823 }, { "epoch": 0.866067548540736, "grad_norm": 1.7402691516904667, "learning_rate": 4.6311482745595046e-08, "loss": 0.2405, "step": 49824 }, { "epoch": 0.8660849310782388, "grad_norm": 0.9365687724062033, "learning_rate": 4.6299651801843755e-08, "loss": 0.138, "step": 49825 }, { "epoch": 0.8661023136157416, "grad_norm": 1.8282386833860724, "learning_rate": 4.628782229611439e-08, "loss": 0.1485, "step": 49826 }, { "epoch": 0.8661196961532445, "grad_norm": 1.6243707166488484, "learning_rate": 4.627599422844408e-08, "loss": 0.1865, "step": 49827 }, { "epoch": 0.8661370786907473, "grad_norm": 2.8186674614944396, "learning_rate": 4.626416759887036e-08, "loss": 0.252, "step": 49828 }, { "epoch": 0.8661544612282501, "grad_norm": 0.9436287795455339, "learning_rate": 4.625234240743087e-08, "loss": 0.1652, "step": 49829 }, { "epoch": 0.866171843765753, "grad_norm": 1.0305806174344478, "learning_rate": 4.6240518654163064e-08, "loss": 0.1532, "step": 49830 }, { "epoch": 0.8661892263032558, "grad_norm": 1.3179917630904336, "learning_rate": 4.6228696339104365e-08, "loss": 0.1417, "step": 49831 }, { "epoch": 0.8662066088407586, "grad_norm": 1.5390924809853768, "learning_rate": 4.621687546229225e-08, "loss": 0.226, "step": 49832 }, { "epoch": 0.8662239913782614, "grad_norm": 2.581884223342032, "learning_rate": 4.620505602376418e-08, "loss": 0.2032, "step": 49833 }, { "epoch": 0.8662413739157642, "grad_norm": 1.7238966869842864, "learning_rate": 4.6193238023557626e-08, "loss": 0.2499, "step": 49834 }, { "epoch": 0.866258756453267, "grad_norm": 0.9402179667996858, "learning_rate": 4.6181421461710056e-08, "loss": 0.2519, "step": 49835 }, { "epoch": 0.8662761389907698, "grad_norm": 1.3183261113523925, "learning_rate": 4.6169606338258785e-08, "loss": 0.2039, "step": 49836 }, { "epoch": 0.8662935215282727, "grad_norm": 1.8860302068114176, "learning_rate": 4.615779265324149e-08, "loss": 0.2477, "step": 49837 }, { "epoch": 0.8663109040657755, "grad_norm": 2.1092171699183595, "learning_rate": 4.614598040669554e-08, "loss": 0.2249, "step": 49838 }, { "epoch": 0.8663282866032783, "grad_norm": 1.1380716964569282, "learning_rate": 4.6134169598658455e-08, "loss": 0.1727, "step": 49839 }, { "epoch": 0.8663456691407811, "grad_norm": 1.638183544069087, "learning_rate": 4.612236022916738e-08, "loss": 0.1968, "step": 49840 }, { "epoch": 0.866363051678284, "grad_norm": 1.1321303408200691, "learning_rate": 4.611055229826e-08, "loss": 0.1317, "step": 49841 }, { "epoch": 0.8663804342157868, "grad_norm": 1.155529761807495, "learning_rate": 4.609874580597367e-08, "loss": 0.1501, "step": 49842 }, { "epoch": 0.8663978167532896, "grad_norm": 1.8209408661198279, "learning_rate": 4.6086940752345816e-08, "loss": 0.1592, "step": 49843 }, { "epoch": 0.8664151992907925, "grad_norm": 1.5330891846437216, "learning_rate": 4.607513713741373e-08, "loss": 0.1641, "step": 49844 }, { "epoch": 0.8664325818282953, "grad_norm": 1.597757467159214, "learning_rate": 4.6063334961215114e-08, "loss": 0.1599, "step": 49845 }, { "epoch": 0.8664499643657981, "grad_norm": 1.0604128835773758, "learning_rate": 4.605153422378716e-08, "loss": 0.1833, "step": 49846 }, { "epoch": 0.866467346903301, "grad_norm": 1.346300305776881, "learning_rate": 4.603973492516733e-08, "loss": 0.2229, "step": 49847 }, { "epoch": 0.8664847294408038, "grad_norm": 1.2588512053715586, "learning_rate": 4.602793706539287e-08, "loss": 0.1703, "step": 49848 }, { "epoch": 0.8665021119783066, "grad_norm": 1.5787158892737811, "learning_rate": 4.601614064450143e-08, "loss": 0.138, "step": 49849 }, { "epoch": 0.8665194945158095, "grad_norm": 1.361974984293162, "learning_rate": 4.600434566253025e-08, "loss": 0.1825, "step": 49850 }, { "epoch": 0.8665368770533123, "grad_norm": 1.108881158219276, "learning_rate": 4.59925521195168e-08, "loss": 0.1397, "step": 49851 }, { "epoch": 0.8665542595908151, "grad_norm": 2.5438840003584637, "learning_rate": 4.598076001549839e-08, "loss": 0.3729, "step": 49852 }, { "epoch": 0.8665716421283179, "grad_norm": 1.6795082086184976, "learning_rate": 4.5968969350512364e-08, "loss": 0.2631, "step": 49853 }, { "epoch": 0.8665890246658207, "grad_norm": 1.4474300182301911, "learning_rate": 4.595718012459621e-08, "loss": 0.1949, "step": 49854 }, { "epoch": 0.8666064072033235, "grad_norm": 1.8078650651966128, "learning_rate": 4.594539233778716e-08, "loss": 0.235, "step": 49855 }, { "epoch": 0.8666237897408263, "grad_norm": 1.87077202901667, "learning_rate": 4.593360599012258e-08, "loss": 0.1987, "step": 49856 }, { "epoch": 0.8666411722783292, "grad_norm": 1.3547188616469075, "learning_rate": 4.592182108164e-08, "loss": 0.2099, "step": 49857 }, { "epoch": 0.866658554815832, "grad_norm": 1.3711006372178884, "learning_rate": 4.591003761237672e-08, "loss": 0.1188, "step": 49858 }, { "epoch": 0.8666759373533348, "grad_norm": 1.3686032998335464, "learning_rate": 4.589825558236982e-08, "loss": 0.1626, "step": 49859 }, { "epoch": 0.8666933198908376, "grad_norm": 1.8856079601281843, "learning_rate": 4.5886474991656984e-08, "loss": 0.2438, "step": 49860 }, { "epoch": 0.8667107024283405, "grad_norm": 1.97271182227158, "learning_rate": 4.587469584027542e-08, "loss": 0.1862, "step": 49861 }, { "epoch": 0.8667280849658433, "grad_norm": 1.163741106533727, "learning_rate": 4.5862918128262486e-08, "loss": 0.1203, "step": 49862 }, { "epoch": 0.8667454675033461, "grad_norm": 1.8926624144831419, "learning_rate": 4.585114185565542e-08, "loss": 0.1699, "step": 49863 }, { "epoch": 0.866762850040849, "grad_norm": 1.525961802387114, "learning_rate": 4.5839367022491695e-08, "loss": 0.1465, "step": 49864 }, { "epoch": 0.8667802325783518, "grad_norm": 1.9582237814069263, "learning_rate": 4.5827593628808505e-08, "loss": 0.1642, "step": 49865 }, { "epoch": 0.8667976151158546, "grad_norm": 0.9797091156486767, "learning_rate": 4.5815821674643204e-08, "loss": 0.1262, "step": 49866 }, { "epoch": 0.8668149976533575, "grad_norm": 1.6260864303486113, "learning_rate": 4.5804051160033056e-08, "loss": 0.1935, "step": 49867 }, { "epoch": 0.8668323801908603, "grad_norm": 1.1919754449238258, "learning_rate": 4.5792282085015454e-08, "loss": 0.1632, "step": 49868 }, { "epoch": 0.8668497627283631, "grad_norm": 1.2598576599160431, "learning_rate": 4.5780514449627715e-08, "loss": 0.1414, "step": 49869 }, { "epoch": 0.866867145265866, "grad_norm": 1.1745620068167175, "learning_rate": 4.5768748253907195e-08, "loss": 0.1792, "step": 49870 }, { "epoch": 0.8668845278033688, "grad_norm": 0.8755148436625338, "learning_rate": 4.5756983497890925e-08, "loss": 0.2118, "step": 49871 }, { "epoch": 0.8669019103408716, "grad_norm": 1.5465520073326902, "learning_rate": 4.574522018161642e-08, "loss": 0.1123, "step": 49872 }, { "epoch": 0.8669192928783744, "grad_norm": 0.8463832586983036, "learning_rate": 4.573345830512088e-08, "loss": 0.1755, "step": 49873 }, { "epoch": 0.8669366754158772, "grad_norm": 1.4387485098341453, "learning_rate": 4.57216978684416e-08, "loss": 0.1477, "step": 49874 }, { "epoch": 0.86695405795338, "grad_norm": 2.2852942190297996, "learning_rate": 4.5709938871615786e-08, "loss": 0.2002, "step": 49875 }, { "epoch": 0.8669714404908828, "grad_norm": 1.3462747018498396, "learning_rate": 4.56981813146809e-08, "loss": 0.1402, "step": 49876 }, { "epoch": 0.8669888230283856, "grad_norm": 1.257423189389659, "learning_rate": 4.5686425197674126e-08, "loss": 0.1753, "step": 49877 }, { "epoch": 0.8670062055658885, "grad_norm": 1.3294032260487525, "learning_rate": 4.567467052063262e-08, "loss": 0.1729, "step": 49878 }, { "epoch": 0.8670235881033913, "grad_norm": 1.4071537614971712, "learning_rate": 4.566291728359362e-08, "loss": 0.1582, "step": 49879 }, { "epoch": 0.8670409706408941, "grad_norm": 0.9141423996881115, "learning_rate": 4.5651165486594586e-08, "loss": 0.1535, "step": 49880 }, { "epoch": 0.867058353178397, "grad_norm": 2.9672619915622382, "learning_rate": 4.563941512967262e-08, "loss": 0.2873, "step": 49881 }, { "epoch": 0.8670757357158998, "grad_norm": 1.4772942085293426, "learning_rate": 4.5627666212865014e-08, "loss": 0.2434, "step": 49882 }, { "epoch": 0.8670931182534026, "grad_norm": 1.311698664963631, "learning_rate": 4.561591873620896e-08, "loss": 0.1825, "step": 49883 }, { "epoch": 0.8671105007909055, "grad_norm": 1.3736292581218161, "learning_rate": 4.5604172699741704e-08, "loss": 0.1654, "step": 49884 }, { "epoch": 0.8671278833284083, "grad_norm": 1.392151768664899, "learning_rate": 4.5592428103500504e-08, "loss": 0.2373, "step": 49885 }, { "epoch": 0.8671452658659111, "grad_norm": 1.0099810679625436, "learning_rate": 4.558068494752248e-08, "loss": 0.1255, "step": 49886 }, { "epoch": 0.867162648403414, "grad_norm": 2.050791047830082, "learning_rate": 4.556894323184507e-08, "loss": 0.19, "step": 49887 }, { "epoch": 0.8671800309409168, "grad_norm": 1.1565390448005362, "learning_rate": 4.555720295650528e-08, "loss": 0.1167, "step": 49888 }, { "epoch": 0.8671974134784196, "grad_norm": 1.847477238204899, "learning_rate": 4.554546412154059e-08, "loss": 0.1835, "step": 49889 }, { "epoch": 0.8672147960159224, "grad_norm": 1.170056573371235, "learning_rate": 4.553372672698774e-08, "loss": 0.1832, "step": 49890 }, { "epoch": 0.8672321785534253, "grad_norm": 1.6685802980327682, "learning_rate": 4.552199077288438e-08, "loss": 0.1979, "step": 49891 }, { "epoch": 0.8672495610909281, "grad_norm": 1.7175413616927184, "learning_rate": 4.551025625926752e-08, "loss": 0.1577, "step": 49892 }, { "epoch": 0.8672669436284309, "grad_norm": 1.085391040205656, "learning_rate": 4.549852318617437e-08, "loss": 0.1067, "step": 49893 }, { "epoch": 0.8672843261659337, "grad_norm": 1.018890102482493, "learning_rate": 4.548679155364199e-08, "loss": 0.1729, "step": 49894 }, { "epoch": 0.8673017087034365, "grad_norm": 3.450112852496407, "learning_rate": 4.547506136170792e-08, "loss": 0.2052, "step": 49895 }, { "epoch": 0.8673190912409393, "grad_norm": 1.4705370892412535, "learning_rate": 4.546333261040902e-08, "loss": 0.2076, "step": 49896 }, { "epoch": 0.8673364737784421, "grad_norm": 3.2408963623366343, "learning_rate": 4.5451605299782535e-08, "loss": 0.2637, "step": 49897 }, { "epoch": 0.867353856315945, "grad_norm": 2.132306807227751, "learning_rate": 4.543987942986555e-08, "loss": 0.2185, "step": 49898 }, { "epoch": 0.8673712388534478, "grad_norm": 0.8893002492793953, "learning_rate": 4.542815500069547e-08, "loss": 0.0885, "step": 49899 }, { "epoch": 0.8673886213909506, "grad_norm": 0.992161511158695, "learning_rate": 4.541643201230933e-08, "loss": 0.2283, "step": 49900 }, { "epoch": 0.8674060039284535, "grad_norm": 1.6199987366887119, "learning_rate": 4.5404710464744256e-08, "loss": 0.1342, "step": 49901 }, { "epoch": 0.8674233864659563, "grad_norm": 3.228131052870844, "learning_rate": 4.539299035803745e-08, "loss": 0.2422, "step": 49902 }, { "epoch": 0.8674407690034591, "grad_norm": 0.7979005919230211, "learning_rate": 4.5381271692225995e-08, "loss": 0.1387, "step": 49903 }, { "epoch": 0.867458151540962, "grad_norm": 1.9112936860943832, "learning_rate": 4.536955446734714e-08, "loss": 0.1948, "step": 49904 }, { "epoch": 0.8674755340784648, "grad_norm": 1.1625475780204118, "learning_rate": 4.53578386834379e-08, "loss": 0.1771, "step": 49905 }, { "epoch": 0.8674929166159676, "grad_norm": 1.940131267252521, "learning_rate": 4.5346124340535416e-08, "loss": 0.1396, "step": 49906 }, { "epoch": 0.8675102991534704, "grad_norm": 1.219111909124117, "learning_rate": 4.5334411438676943e-08, "loss": 0.1551, "step": 49907 }, { "epoch": 0.8675276816909733, "grad_norm": 1.1168423567324743, "learning_rate": 4.532269997789961e-08, "loss": 0.1853, "step": 49908 }, { "epoch": 0.8675450642284761, "grad_norm": 0.9671287585023263, "learning_rate": 4.531098995824023e-08, "loss": 0.1194, "step": 49909 }, { "epoch": 0.8675624467659789, "grad_norm": 1.7200218807645558, "learning_rate": 4.529928137973632e-08, "loss": 0.116, "step": 49910 }, { "epoch": 0.8675798293034818, "grad_norm": 1.3124550751142947, "learning_rate": 4.52875742424248e-08, "loss": 0.1448, "step": 49911 }, { "epoch": 0.8675972118409846, "grad_norm": 2.3881123125986607, "learning_rate": 4.527586854634274e-08, "loss": 0.158, "step": 49912 }, { "epoch": 0.8676145943784874, "grad_norm": 1.4190798948374654, "learning_rate": 4.526416429152735e-08, "loss": 0.1436, "step": 49913 }, { "epoch": 0.8676319769159901, "grad_norm": 1.5135285300314139, "learning_rate": 4.5252461478015644e-08, "loss": 0.1771, "step": 49914 }, { "epoch": 0.867649359453493, "grad_norm": 1.8018578938907694, "learning_rate": 4.524076010584471e-08, "loss": 0.2588, "step": 49915 }, { "epoch": 0.8676667419909958, "grad_norm": 1.0436419386568079, "learning_rate": 4.5229060175051733e-08, "loss": 0.2904, "step": 49916 }, { "epoch": 0.8676841245284986, "grad_norm": 0.8939497288728823, "learning_rate": 4.521736168567358e-08, "loss": 0.131, "step": 49917 }, { "epoch": 0.8677015070660015, "grad_norm": 2.010215859982229, "learning_rate": 4.520566463774761e-08, "loss": 0.181, "step": 49918 }, { "epoch": 0.8677188896035043, "grad_norm": 2.025530606048102, "learning_rate": 4.519396903131073e-08, "loss": 0.1766, "step": 49919 }, { "epoch": 0.8677362721410071, "grad_norm": 1.5662695166544434, "learning_rate": 4.5182274866400086e-08, "loss": 0.1735, "step": 49920 }, { "epoch": 0.86775365467851, "grad_norm": 1.3659668302845804, "learning_rate": 4.5170582143052695e-08, "loss": 0.1279, "step": 49921 }, { "epoch": 0.8677710372160128, "grad_norm": 1.302072501167102, "learning_rate": 4.515889086130564e-08, "loss": 0.1913, "step": 49922 }, { "epoch": 0.8677884197535156, "grad_norm": 1.3114723614236965, "learning_rate": 4.51472010211959e-08, "loss": 0.0906, "step": 49923 }, { "epoch": 0.8678058022910184, "grad_norm": 1.3683486630517916, "learning_rate": 4.5135512622760663e-08, "loss": 0.1329, "step": 49924 }, { "epoch": 0.8678231848285213, "grad_norm": 1.9143527662818152, "learning_rate": 4.5123825666036785e-08, "loss": 0.1416, "step": 49925 }, { "epoch": 0.8678405673660241, "grad_norm": 1.7660150926752618, "learning_rate": 4.5112140151061516e-08, "loss": 0.1769, "step": 49926 }, { "epoch": 0.8678579499035269, "grad_norm": 1.1983371793135187, "learning_rate": 4.510045607787188e-08, "loss": 0.1446, "step": 49927 }, { "epoch": 0.8678753324410298, "grad_norm": 0.908379041446965, "learning_rate": 4.50887734465048e-08, "loss": 0.1734, "step": 49928 }, { "epoch": 0.8678927149785326, "grad_norm": 1.3423260758818074, "learning_rate": 4.5077092256997185e-08, "loss": 0.2786, "step": 49929 }, { "epoch": 0.8679100975160354, "grad_norm": 1.0246798472833591, "learning_rate": 4.506541250938634e-08, "loss": 0.106, "step": 49930 }, { "epoch": 0.8679274800535383, "grad_norm": 1.0477682548768166, "learning_rate": 4.505373420370917e-08, "loss": 0.1338, "step": 49931 }, { "epoch": 0.8679448625910411, "grad_norm": 5.720505901363886, "learning_rate": 4.504205734000266e-08, "loss": 0.3021, "step": 49932 }, { "epoch": 0.8679622451285439, "grad_norm": 0.9876208350057948, "learning_rate": 4.503038191830383e-08, "loss": 0.1041, "step": 49933 }, { "epoch": 0.8679796276660466, "grad_norm": 1.3355383821649862, "learning_rate": 4.50187079386497e-08, "loss": 0.1947, "step": 49934 }, { "epoch": 0.8679970102035495, "grad_norm": 3.022079985232636, "learning_rate": 4.5007035401077256e-08, "loss": 0.2105, "step": 49935 }, { "epoch": 0.8680143927410523, "grad_norm": 0.7064167295543007, "learning_rate": 4.499536430562351e-08, "loss": 0.1586, "step": 49936 }, { "epoch": 0.8680317752785551, "grad_norm": 1.295952381730169, "learning_rate": 4.498369465232532e-08, "loss": 0.1657, "step": 49937 }, { "epoch": 0.868049157816058, "grad_norm": 1.2837415321684487, "learning_rate": 4.497202644121995e-08, "loss": 0.2023, "step": 49938 }, { "epoch": 0.8680665403535608, "grad_norm": 1.2216958707201917, "learning_rate": 4.4960359672344186e-08, "loss": 0.2104, "step": 49939 }, { "epoch": 0.8680839228910636, "grad_norm": 0.9731555813591338, "learning_rate": 4.494869434573506e-08, "loss": 0.1245, "step": 49940 }, { "epoch": 0.8681013054285664, "grad_norm": 1.5226635608131525, "learning_rate": 4.4937030461429606e-08, "loss": 0.1849, "step": 49941 }, { "epoch": 0.8681186879660693, "grad_norm": 1.1159263981340144, "learning_rate": 4.492536801946467e-08, "loss": 0.1422, "step": 49942 }, { "epoch": 0.8681360705035721, "grad_norm": 0.993226745486498, "learning_rate": 4.491370701987723e-08, "loss": 0.2384, "step": 49943 }, { "epoch": 0.8681534530410749, "grad_norm": 0.8216053038561034, "learning_rate": 4.490204746270437e-08, "loss": 0.1384, "step": 49944 }, { "epoch": 0.8681708355785778, "grad_norm": 1.3626939425417064, "learning_rate": 4.4890389347982835e-08, "loss": 0.1789, "step": 49945 }, { "epoch": 0.8681882181160806, "grad_norm": 1.0815851938330243, "learning_rate": 4.487873267574988e-08, "loss": 0.1045, "step": 49946 }, { "epoch": 0.8682056006535834, "grad_norm": 1.5288661016742726, "learning_rate": 4.486707744604223e-08, "loss": 0.1638, "step": 49947 }, { "epoch": 0.8682229831910863, "grad_norm": 0.9658198448249558, "learning_rate": 4.485542365889672e-08, "loss": 0.1873, "step": 49948 }, { "epoch": 0.8682403657285891, "grad_norm": 1.3059732890564564, "learning_rate": 4.4843771314350574e-08, "loss": 0.1758, "step": 49949 }, { "epoch": 0.8682577482660919, "grad_norm": 1.6651178889386427, "learning_rate": 4.483212041244061e-08, "loss": 0.1392, "step": 49950 }, { "epoch": 0.8682751308035948, "grad_norm": 0.938151066902443, "learning_rate": 4.482047095320368e-08, "loss": 0.1291, "step": 49951 }, { "epoch": 0.8682925133410976, "grad_norm": 2.558872020527648, "learning_rate": 4.480882293667682e-08, "loss": 0.2212, "step": 49952 }, { "epoch": 0.8683098958786004, "grad_norm": 1.1229123879577283, "learning_rate": 4.479717636289687e-08, "loss": 0.0897, "step": 49953 }, { "epoch": 0.8683272784161031, "grad_norm": 1.192609424514541, "learning_rate": 4.478553123190076e-08, "loss": 0.2071, "step": 49954 }, { "epoch": 0.868344660953606, "grad_norm": 2.1356595617115226, "learning_rate": 4.4773887543725406e-08, "loss": 0.2034, "step": 49955 }, { "epoch": 0.8683620434911088, "grad_norm": 1.3130609907107667, "learning_rate": 4.4762245298407606e-08, "loss": 0.1617, "step": 49956 }, { "epoch": 0.8683794260286116, "grad_norm": 1.1900807176796937, "learning_rate": 4.47506044959845e-08, "loss": 0.1804, "step": 49957 }, { "epoch": 0.8683968085661145, "grad_norm": 1.2381507822277904, "learning_rate": 4.4738965136492835e-08, "loss": 0.1513, "step": 49958 }, { "epoch": 0.8684141911036173, "grad_norm": 1.0324324067629775, "learning_rate": 4.4727327219969635e-08, "loss": 0.1906, "step": 49959 }, { "epoch": 0.8684315736411201, "grad_norm": 1.602323486968096, "learning_rate": 4.4715690746451486e-08, "loss": 0.1177, "step": 49960 }, { "epoch": 0.8684489561786229, "grad_norm": 1.5908236105605296, "learning_rate": 4.470405571597552e-08, "loss": 0.1416, "step": 49961 }, { "epoch": 0.8684663387161258, "grad_norm": 1.1457399915279778, "learning_rate": 4.4692422128578546e-08, "loss": 0.106, "step": 49962 }, { "epoch": 0.8684837212536286, "grad_norm": 3.91485059768924, "learning_rate": 4.468078998429747e-08, "loss": 0.2011, "step": 49963 }, { "epoch": 0.8685011037911314, "grad_norm": 1.8531723248600749, "learning_rate": 4.4669159283169e-08, "loss": 0.1761, "step": 49964 }, { "epoch": 0.8685184863286343, "grad_norm": 1.6391380297604605, "learning_rate": 4.465753002523037e-08, "loss": 0.1628, "step": 49965 }, { "epoch": 0.8685358688661371, "grad_norm": 4.223090418350914, "learning_rate": 4.464590221051812e-08, "loss": 0.141, "step": 49966 }, { "epoch": 0.8685532514036399, "grad_norm": 1.0998119603390883, "learning_rate": 4.463427583906915e-08, "loss": 0.2498, "step": 49967 }, { "epoch": 0.8685706339411428, "grad_norm": 1.899533411369553, "learning_rate": 4.4622650910920265e-08, "loss": 0.1563, "step": 49968 }, { "epoch": 0.8685880164786456, "grad_norm": 1.8987741332197787, "learning_rate": 4.461102742610845e-08, "loss": 0.178, "step": 49969 }, { "epoch": 0.8686053990161484, "grad_norm": 6.372793357449224, "learning_rate": 4.459940538467055e-08, "loss": 0.2405, "step": 49970 }, { "epoch": 0.8686227815536512, "grad_norm": 1.7617245781717092, "learning_rate": 4.458778478664332e-08, "loss": 0.1737, "step": 49971 }, { "epoch": 0.8686401640911541, "grad_norm": 1.114621849837718, "learning_rate": 4.457616563206357e-08, "loss": 0.1811, "step": 49972 }, { "epoch": 0.8686575466286568, "grad_norm": 1.9222422664088696, "learning_rate": 4.45645479209682e-08, "loss": 0.2594, "step": 49973 }, { "epoch": 0.8686749291661596, "grad_norm": 1.2372192025559074, "learning_rate": 4.455293165339402e-08, "loss": 0.1167, "step": 49974 }, { "epoch": 0.8686923117036625, "grad_norm": 0.8844190346688195, "learning_rate": 4.454131682937784e-08, "loss": 0.1731, "step": 49975 }, { "epoch": 0.8687096942411653, "grad_norm": 1.5818348569114777, "learning_rate": 4.4529703448956344e-08, "loss": 0.1852, "step": 49976 }, { "epoch": 0.8687270767786681, "grad_norm": 1.4809912442451687, "learning_rate": 4.4518091512166566e-08, "loss": 0.1447, "step": 49977 }, { "epoch": 0.868744459316171, "grad_norm": 1.838867876724223, "learning_rate": 4.4506481019045304e-08, "loss": 0.2032, "step": 49978 }, { "epoch": 0.8687618418536738, "grad_norm": 1.5039417600131908, "learning_rate": 4.4494871969629086e-08, "loss": 0.1803, "step": 49979 }, { "epoch": 0.8687792243911766, "grad_norm": 1.796384209028186, "learning_rate": 4.4483264363954943e-08, "loss": 0.1708, "step": 49980 }, { "epoch": 0.8687966069286794, "grad_norm": 1.8247100854793585, "learning_rate": 4.4471658202059617e-08, "loss": 0.2181, "step": 49981 }, { "epoch": 0.8688139894661823, "grad_norm": 1.4088451632452372, "learning_rate": 4.446005348397991e-08, "loss": 0.1831, "step": 49982 }, { "epoch": 0.8688313720036851, "grad_norm": 0.9387795660958533, "learning_rate": 4.444845020975241e-08, "loss": 0.0832, "step": 49983 }, { "epoch": 0.8688487545411879, "grad_norm": 1.2705453199043912, "learning_rate": 4.443684837941431e-08, "loss": 0.2074, "step": 49984 }, { "epoch": 0.8688661370786908, "grad_norm": 0.8595516214621416, "learning_rate": 4.4425247993002014e-08, "loss": 0.2001, "step": 49985 }, { "epoch": 0.8688835196161936, "grad_norm": 2.242823128671204, "learning_rate": 4.441364905055245e-08, "loss": 0.1565, "step": 49986 }, { "epoch": 0.8689009021536964, "grad_norm": 4.026831421921974, "learning_rate": 4.440205155210219e-08, "loss": 0.2605, "step": 49987 }, { "epoch": 0.8689182846911992, "grad_norm": 3.52312429240618, "learning_rate": 4.4390455497688275e-08, "loss": 0.195, "step": 49988 }, { "epoch": 0.8689356672287021, "grad_norm": 1.0028346279454123, "learning_rate": 4.437886088734727e-08, "loss": 0.132, "step": 49989 }, { "epoch": 0.8689530497662049, "grad_norm": 1.7626527490956556, "learning_rate": 4.436726772111604e-08, "loss": 0.1552, "step": 49990 }, { "epoch": 0.8689704323037077, "grad_norm": 1.144880908111111, "learning_rate": 4.4355675999031225e-08, "loss": 0.1449, "step": 49991 }, { "epoch": 0.8689878148412106, "grad_norm": 1.2600634563392201, "learning_rate": 4.434408572112963e-08, "loss": 0.136, "step": 49992 }, { "epoch": 0.8690051973787133, "grad_norm": 1.2807265642055337, "learning_rate": 4.4332496887448e-08, "loss": 0.1248, "step": 49993 }, { "epoch": 0.8690225799162161, "grad_norm": 1.6265919971401537, "learning_rate": 4.432090949802303e-08, "loss": 0.138, "step": 49994 }, { "epoch": 0.869039962453719, "grad_norm": 2.330210067826834, "learning_rate": 4.4309323552891357e-08, "loss": 0.161, "step": 49995 }, { "epoch": 0.8690573449912218, "grad_norm": 1.5743285563464169, "learning_rate": 4.429773905208989e-08, "loss": 0.1191, "step": 49996 }, { "epoch": 0.8690747275287246, "grad_norm": 2.117685580116536, "learning_rate": 4.428615599565533e-08, "loss": 0.1833, "step": 49997 }, { "epoch": 0.8690921100662274, "grad_norm": 1.150276208901396, "learning_rate": 4.427457438362414e-08, "loss": 0.119, "step": 49998 }, { "epoch": 0.8691094926037303, "grad_norm": 1.586058264111433, "learning_rate": 4.42629942160333e-08, "loss": 0.2504, "step": 49999 }, { "epoch": 0.8691268751412331, "grad_norm": 1.8486838812589563, "learning_rate": 4.425141549291944e-08, "loss": 0.182, "step": 50000 }, { "epoch": 0.8691442576787359, "grad_norm": 14.437005850654277, "learning_rate": 4.42398382143192e-08, "loss": 0.4297, "step": 50001 }, { "epoch": 0.8691616402162388, "grad_norm": 1.0552161952715848, "learning_rate": 4.4228262380269335e-08, "loss": 0.1593, "step": 50002 }, { "epoch": 0.8691790227537416, "grad_norm": 1.2200757101213529, "learning_rate": 4.421668799080652e-08, "loss": 0.1241, "step": 50003 }, { "epoch": 0.8691964052912444, "grad_norm": 1.5622117034077987, "learning_rate": 4.420511504596747e-08, "loss": 0.2004, "step": 50004 }, { "epoch": 0.8692137878287473, "grad_norm": 0.8948224659245345, "learning_rate": 4.4193543545788745e-08, "loss": 0.2258, "step": 50005 }, { "epoch": 0.8692311703662501, "grad_norm": 1.3885194413038753, "learning_rate": 4.418197349030711e-08, "loss": 0.1325, "step": 50006 }, { "epoch": 0.8692485529037529, "grad_norm": 1.0528262120588952, "learning_rate": 4.417040487955925e-08, "loss": 0.1469, "step": 50007 }, { "epoch": 0.8692659354412557, "grad_norm": 1.506153765931702, "learning_rate": 4.4158837713581864e-08, "loss": 0.1561, "step": 50008 }, { "epoch": 0.8692833179787586, "grad_norm": 1.1588067222332423, "learning_rate": 4.4147271992411636e-08, "loss": 0.1185, "step": 50009 }, { "epoch": 0.8693007005162614, "grad_norm": 0.7836724417224897, "learning_rate": 4.413570771608494e-08, "loss": 0.1221, "step": 50010 }, { "epoch": 0.8693180830537642, "grad_norm": 1.3915340945619343, "learning_rate": 4.412414488463878e-08, "loss": 0.1616, "step": 50011 }, { "epoch": 0.8693354655912671, "grad_norm": 0.8140761354044359, "learning_rate": 4.4112583498109645e-08, "loss": 0.18, "step": 50012 }, { "epoch": 0.8693528481287698, "grad_norm": 1.2283984050578094, "learning_rate": 4.410102355653422e-08, "loss": 0.1492, "step": 50013 }, { "epoch": 0.8693702306662726, "grad_norm": 1.133040519951345, "learning_rate": 4.408946505994904e-08, "loss": 0.1571, "step": 50014 }, { "epoch": 0.8693876132037754, "grad_norm": 2.550884846677294, "learning_rate": 4.407790800839101e-08, "loss": 0.1779, "step": 50015 }, { "epoch": 0.8694049957412783, "grad_norm": 1.0913050694316389, "learning_rate": 4.406635240189649e-08, "loss": 0.1943, "step": 50016 }, { "epoch": 0.8694223782787811, "grad_norm": 1.3720282247714903, "learning_rate": 4.4054798240502176e-08, "loss": 0.1992, "step": 50017 }, { "epoch": 0.8694397608162839, "grad_norm": 0.613049099962406, "learning_rate": 4.40432455242446e-08, "loss": 0.1135, "step": 50018 }, { "epoch": 0.8694571433537868, "grad_norm": 1.388897815787951, "learning_rate": 4.4031694253160614e-08, "loss": 0.2472, "step": 50019 }, { "epoch": 0.8694745258912896, "grad_norm": 1.407095607727482, "learning_rate": 4.402014442728669e-08, "loss": 0.1694, "step": 50020 }, { "epoch": 0.8694919084287924, "grad_norm": 1.8048629326934815, "learning_rate": 4.400859604665941e-08, "loss": 0.1896, "step": 50021 }, { "epoch": 0.8695092909662953, "grad_norm": 1.2121499387261359, "learning_rate": 4.399704911131547e-08, "loss": 0.1796, "step": 50022 }, { "epoch": 0.8695266735037981, "grad_norm": 1.2404145322573616, "learning_rate": 4.3985503621291386e-08, "loss": 0.3342, "step": 50023 }, { "epoch": 0.8695440560413009, "grad_norm": 0.8363380503219434, "learning_rate": 4.397395957662381e-08, "loss": 0.2215, "step": 50024 }, { "epoch": 0.8695614385788037, "grad_norm": 2.4881185734734355, "learning_rate": 4.396241697734926e-08, "loss": 0.143, "step": 50025 }, { "epoch": 0.8695788211163066, "grad_norm": 1.13103434118016, "learning_rate": 4.395087582350432e-08, "loss": 0.1572, "step": 50026 }, { "epoch": 0.8695962036538094, "grad_norm": 1.4006634669434521, "learning_rate": 4.393933611512562e-08, "loss": 0.2112, "step": 50027 }, { "epoch": 0.8696135861913122, "grad_norm": 1.8373151708157907, "learning_rate": 4.392779785224987e-08, "loss": 0.1795, "step": 50028 }, { "epoch": 0.8696309687288151, "grad_norm": 2.232303261435067, "learning_rate": 4.391626103491336e-08, "loss": 0.2392, "step": 50029 }, { "epoch": 0.8696483512663179, "grad_norm": 1.8442586216232912, "learning_rate": 4.3904725663152845e-08, "loss": 0.2191, "step": 50030 }, { "epoch": 0.8696657338038207, "grad_norm": 1.5430063137681365, "learning_rate": 4.389319173700484e-08, "loss": 0.1889, "step": 50031 }, { "epoch": 0.8696831163413236, "grad_norm": 2.0764504967841177, "learning_rate": 4.3881659256505885e-08, "loss": 0.199, "step": 50032 }, { "epoch": 0.8697004988788263, "grad_norm": 2.7387077357521097, "learning_rate": 4.387012822169256e-08, "loss": 0.2436, "step": 50033 }, { "epoch": 0.8697178814163291, "grad_norm": 1.0101963006664954, "learning_rate": 4.385859863260138e-08, "loss": 0.1967, "step": 50034 }, { "epoch": 0.8697352639538319, "grad_norm": 1.223009806988738, "learning_rate": 4.3847070489268886e-08, "loss": 0.2624, "step": 50035 }, { "epoch": 0.8697526464913348, "grad_norm": 1.5509987926847553, "learning_rate": 4.383554379173171e-08, "loss": 0.1936, "step": 50036 }, { "epoch": 0.8697700290288376, "grad_norm": 0.8751819379777732, "learning_rate": 4.382401854002621e-08, "loss": 0.2552, "step": 50037 }, { "epoch": 0.8697874115663404, "grad_norm": 0.9889982934101195, "learning_rate": 4.3812494734189075e-08, "loss": 0.2118, "step": 50038 }, { "epoch": 0.8698047941038433, "grad_norm": 1.349483794822312, "learning_rate": 4.3800972374256783e-08, "loss": 0.1204, "step": 50039 }, { "epoch": 0.8698221766413461, "grad_norm": 1.663006056218384, "learning_rate": 4.3789451460265856e-08, "loss": 0.168, "step": 50040 }, { "epoch": 0.8698395591788489, "grad_norm": 0.8583127658862207, "learning_rate": 4.3777931992252826e-08, "loss": 0.1948, "step": 50041 }, { "epoch": 0.8698569417163518, "grad_norm": 1.073489212264081, "learning_rate": 4.376641397025416e-08, "loss": 0.137, "step": 50042 }, { "epoch": 0.8698743242538546, "grad_norm": 3.2321704470378583, "learning_rate": 4.375489739430638e-08, "loss": 0.2126, "step": 50043 }, { "epoch": 0.8698917067913574, "grad_norm": 1.216856345851039, "learning_rate": 4.374338226444596e-08, "loss": 0.2056, "step": 50044 }, { "epoch": 0.8699090893288602, "grad_norm": 2.468225484940991, "learning_rate": 4.373186858070937e-08, "loss": 0.1554, "step": 50045 }, { "epoch": 0.8699264718663631, "grad_norm": 1.437825860991494, "learning_rate": 4.3720356343133316e-08, "loss": 0.1619, "step": 50046 }, { "epoch": 0.8699438544038659, "grad_norm": 0.924588899434585, "learning_rate": 4.3708845551754136e-08, "loss": 0.1492, "step": 50047 }, { "epoch": 0.8699612369413687, "grad_norm": 1.0348335882310553, "learning_rate": 4.369733620660826e-08, "loss": 0.1661, "step": 50048 }, { "epoch": 0.8699786194788716, "grad_norm": 1.1223073094698885, "learning_rate": 4.3685828307732155e-08, "loss": 0.1106, "step": 50049 }, { "epoch": 0.8699960020163744, "grad_norm": 1.3521231567599785, "learning_rate": 4.3674321855162397e-08, "loss": 0.2085, "step": 50050 }, { "epoch": 0.8700133845538772, "grad_norm": 0.9555325281379643, "learning_rate": 4.366281684893547e-08, "loss": 0.0758, "step": 50051 }, { "epoch": 0.87003076709138, "grad_norm": 1.0315302217399975, "learning_rate": 4.365131328908772e-08, "loss": 0.1924, "step": 50052 }, { "epoch": 0.8700481496288828, "grad_norm": 0.903064751984413, "learning_rate": 4.3639811175655735e-08, "loss": 0.2283, "step": 50053 }, { "epoch": 0.8700655321663856, "grad_norm": 1.7488873755277634, "learning_rate": 4.3628310508675934e-08, "loss": 0.2085, "step": 50054 }, { "epoch": 0.8700829147038884, "grad_norm": 1.8026372844036873, "learning_rate": 4.361681128818467e-08, "loss": 0.2666, "step": 50055 }, { "epoch": 0.8701002972413913, "grad_norm": 0.9045737030251308, "learning_rate": 4.360531351421853e-08, "loss": 0.277, "step": 50056 }, { "epoch": 0.8701176797788941, "grad_norm": 1.2275807220308252, "learning_rate": 4.359381718681382e-08, "loss": 0.2454, "step": 50057 }, { "epoch": 0.8701350623163969, "grad_norm": 1.2498197621721565, "learning_rate": 4.358232230600711e-08, "loss": 0.2039, "step": 50058 }, { "epoch": 0.8701524448538998, "grad_norm": 1.3694359715447186, "learning_rate": 4.357082887183477e-08, "loss": 0.1893, "step": 50059 }, { "epoch": 0.8701698273914026, "grad_norm": 0.9472140325305702, "learning_rate": 4.355933688433322e-08, "loss": 0.1135, "step": 50060 }, { "epoch": 0.8701872099289054, "grad_norm": 1.6133527663393161, "learning_rate": 4.354784634353897e-08, "loss": 0.1458, "step": 50061 }, { "epoch": 0.8702045924664082, "grad_norm": 1.5704122002568508, "learning_rate": 4.3536357249488285e-08, "loss": 0.2781, "step": 50062 }, { "epoch": 0.8702219750039111, "grad_norm": 1.1814011531346476, "learning_rate": 4.3524869602217737e-08, "loss": 0.3401, "step": 50063 }, { "epoch": 0.8702393575414139, "grad_norm": 0.7546343616144003, "learning_rate": 4.3513383401763635e-08, "loss": 0.1487, "step": 50064 }, { "epoch": 0.8702567400789167, "grad_norm": 1.4543673377418778, "learning_rate": 4.350189864816234e-08, "loss": 0.2337, "step": 50065 }, { "epoch": 0.8702741226164196, "grad_norm": 1.3808744988224162, "learning_rate": 4.349041534145048e-08, "loss": 0.1067, "step": 50066 }, { "epoch": 0.8702915051539224, "grad_norm": 1.4881823898639517, "learning_rate": 4.3478933481664204e-08, "loss": 0.1489, "step": 50067 }, { "epoch": 0.8703088876914252, "grad_norm": 1.6823549059245528, "learning_rate": 4.346745306883992e-08, "loss": 0.1374, "step": 50068 }, { "epoch": 0.8703262702289281, "grad_norm": 1.340474746269783, "learning_rate": 4.3455974103014215e-08, "loss": 0.2022, "step": 50069 }, { "epoch": 0.8703436527664309, "grad_norm": 1.0993762426945584, "learning_rate": 4.3444496584223334e-08, "loss": 0.1338, "step": 50070 }, { "epoch": 0.8703610353039337, "grad_norm": 2.070902112674164, "learning_rate": 4.343302051250369e-08, "loss": 0.2014, "step": 50071 }, { "epoch": 0.8703784178414365, "grad_norm": 1.1606269779936542, "learning_rate": 4.34215458878916e-08, "loss": 0.2086, "step": 50072 }, { "epoch": 0.8703958003789393, "grad_norm": 1.9683104479774078, "learning_rate": 4.341007271042352e-08, "loss": 0.2201, "step": 50073 }, { "epoch": 0.8704131829164421, "grad_norm": 0.7053578445367491, "learning_rate": 4.339860098013576e-08, "loss": 0.1542, "step": 50074 }, { "epoch": 0.8704305654539449, "grad_norm": 1.1737378406333088, "learning_rate": 4.338713069706468e-08, "loss": 0.2141, "step": 50075 }, { "epoch": 0.8704479479914478, "grad_norm": 0.8989611453756258, "learning_rate": 4.337566186124653e-08, "loss": 0.192, "step": 50076 }, { "epoch": 0.8704653305289506, "grad_norm": 1.2869010403254735, "learning_rate": 4.3364194472717884e-08, "loss": 0.1825, "step": 50077 }, { "epoch": 0.8704827130664534, "grad_norm": 1.307201158286921, "learning_rate": 4.3352728531515005e-08, "loss": 0.1642, "step": 50078 }, { "epoch": 0.8705000956039562, "grad_norm": 1.5515377175995502, "learning_rate": 4.334126403767419e-08, "loss": 0.2093, "step": 50079 }, { "epoch": 0.8705174781414591, "grad_norm": 1.17015506699622, "learning_rate": 4.332980099123179e-08, "loss": 0.1485, "step": 50080 }, { "epoch": 0.8705348606789619, "grad_norm": 1.7268266635936391, "learning_rate": 4.331833939222418e-08, "loss": 0.1993, "step": 50081 }, { "epoch": 0.8705522432164647, "grad_norm": 4.150018561693127, "learning_rate": 4.33068792406876e-08, "loss": 0.1872, "step": 50082 }, { "epoch": 0.8705696257539676, "grad_norm": 0.7377989791173457, "learning_rate": 4.329542053665847e-08, "loss": 0.1108, "step": 50083 }, { "epoch": 0.8705870082914704, "grad_norm": 1.0453373203850191, "learning_rate": 4.328396328017292e-08, "loss": 0.0996, "step": 50084 }, { "epoch": 0.8706043908289732, "grad_norm": 2.103399983936382, "learning_rate": 4.327250747126765e-08, "loss": 0.2116, "step": 50085 }, { "epoch": 0.8706217733664761, "grad_norm": 1.4397649237257288, "learning_rate": 4.326105310997857e-08, "loss": 0.2901, "step": 50086 }, { "epoch": 0.8706391559039789, "grad_norm": 1.1240691201761073, "learning_rate": 4.32496001963421e-08, "loss": 0.1855, "step": 50087 }, { "epoch": 0.8706565384414817, "grad_norm": 1.715220378368019, "learning_rate": 4.3238148730394665e-08, "loss": 0.2336, "step": 50088 }, { "epoch": 0.8706739209789846, "grad_norm": 0.8635761646049067, "learning_rate": 4.3226698712172494e-08, "loss": 0.1719, "step": 50089 }, { "epoch": 0.8706913035164874, "grad_norm": 2.1536662139388048, "learning_rate": 4.321525014171185e-08, "loss": 0.161, "step": 50090 }, { "epoch": 0.8707086860539902, "grad_norm": 0.9626134999225369, "learning_rate": 4.3203803019049024e-08, "loss": 0.1414, "step": 50091 }, { "epoch": 0.870726068591493, "grad_norm": 1.6074753340023442, "learning_rate": 4.319235734422028e-08, "loss": 0.1424, "step": 50092 }, { "epoch": 0.8707434511289958, "grad_norm": 0.9519879695801419, "learning_rate": 4.318091311726196e-08, "loss": 0.174, "step": 50093 }, { "epoch": 0.8707608336664986, "grad_norm": 1.4280264702788608, "learning_rate": 4.3169470338210335e-08, "loss": 0.2242, "step": 50094 }, { "epoch": 0.8707782162040014, "grad_norm": 1.1610144022219522, "learning_rate": 4.315802900710147e-08, "loss": 0.1569, "step": 50095 }, { "epoch": 0.8707955987415043, "grad_norm": 2.093979147681679, "learning_rate": 4.3146589123971954e-08, "loss": 0.3972, "step": 50096 }, { "epoch": 0.8708129812790071, "grad_norm": 1.6694360900280698, "learning_rate": 4.3135150688857866e-08, "loss": 0.2016, "step": 50097 }, { "epoch": 0.8708303638165099, "grad_norm": 0.8558155878774454, "learning_rate": 4.312371370179557e-08, "loss": 0.2088, "step": 50098 }, { "epoch": 0.8708477463540127, "grad_norm": 1.4375372127188892, "learning_rate": 4.311227816282104e-08, "loss": 0.1762, "step": 50099 }, { "epoch": 0.8708651288915156, "grad_norm": 1.4127025843746595, "learning_rate": 4.310084407197084e-08, "loss": 0.1268, "step": 50100 }, { "epoch": 0.8708825114290184, "grad_norm": 1.5776638628631328, "learning_rate": 4.308941142928102e-08, "loss": 0.1611, "step": 50101 }, { "epoch": 0.8708998939665212, "grad_norm": 4.224866172797297, "learning_rate": 4.307798023478793e-08, "loss": 0.258, "step": 50102 }, { "epoch": 0.8709172765040241, "grad_norm": 2.423060623634539, "learning_rate": 4.306655048852764e-08, "loss": 0.2342, "step": 50103 }, { "epoch": 0.8709346590415269, "grad_norm": 1.1341904307437023, "learning_rate": 4.3055122190536643e-08, "loss": 0.1403, "step": 50104 }, { "epoch": 0.8709520415790297, "grad_norm": 1.4720859830866406, "learning_rate": 4.304369534085095e-08, "loss": 0.2088, "step": 50105 }, { "epoch": 0.8709694241165326, "grad_norm": 1.1560637626554975, "learning_rate": 4.303226993950682e-08, "loss": 0.1796, "step": 50106 }, { "epoch": 0.8709868066540354, "grad_norm": 2.676219869042988, "learning_rate": 4.302084598654043e-08, "loss": 0.1689, "step": 50107 }, { "epoch": 0.8710041891915382, "grad_norm": 1.0631620780128967, "learning_rate": 4.300942348198805e-08, "loss": 0.3727, "step": 50108 }, { "epoch": 0.871021571729041, "grad_norm": 1.6350682759341104, "learning_rate": 4.299800242588597e-08, "loss": 0.1239, "step": 50109 }, { "epoch": 0.8710389542665439, "grad_norm": 1.2730884430113365, "learning_rate": 4.298658281827022e-08, "loss": 0.1809, "step": 50110 }, { "epoch": 0.8710563368040467, "grad_norm": 1.2677123490041717, "learning_rate": 4.297516465917711e-08, "loss": 0.1868, "step": 50111 }, { "epoch": 0.8710737193415494, "grad_norm": 1.1949673456667402, "learning_rate": 4.296374794864272e-08, "loss": 0.2101, "step": 50112 }, { "epoch": 0.8710911018790523, "grad_norm": 1.8130928300707512, "learning_rate": 4.2952332686703394e-08, "loss": 0.1952, "step": 50113 }, { "epoch": 0.8711084844165551, "grad_norm": 1.5633387702865855, "learning_rate": 4.294091887339513e-08, "loss": 0.1134, "step": 50114 }, { "epoch": 0.8711258669540579, "grad_norm": 1.5259928841885777, "learning_rate": 4.2929506508754156e-08, "loss": 0.3208, "step": 50115 }, { "epoch": 0.8711432494915607, "grad_norm": 1.6050124947215836, "learning_rate": 4.291809559281678e-08, "loss": 0.207, "step": 50116 }, { "epoch": 0.8711606320290636, "grad_norm": 1.9281787355037805, "learning_rate": 4.290668612561921e-08, "loss": 0.2708, "step": 50117 }, { "epoch": 0.8711780145665664, "grad_norm": 1.9513337981253847, "learning_rate": 4.289527810719723e-08, "loss": 0.1253, "step": 50118 }, { "epoch": 0.8711953971040692, "grad_norm": 1.46959630185476, "learning_rate": 4.2883871537587324e-08, "loss": 0.1456, "step": 50119 }, { "epoch": 0.8712127796415721, "grad_norm": 1.6831522574899564, "learning_rate": 4.287246641682557e-08, "loss": 0.1188, "step": 50120 }, { "epoch": 0.8712301621790749, "grad_norm": 1.8346422548964991, "learning_rate": 4.286106274494816e-08, "loss": 0.2307, "step": 50121 }, { "epoch": 0.8712475447165777, "grad_norm": 0.9744440517913341, "learning_rate": 4.284966052199113e-08, "loss": 0.1311, "step": 50122 }, { "epoch": 0.8712649272540806, "grad_norm": 1.0709852868767562, "learning_rate": 4.283825974799071e-08, "loss": 0.1096, "step": 50123 }, { "epoch": 0.8712823097915834, "grad_norm": 1.0980609069385783, "learning_rate": 4.2826860422983e-08, "loss": 0.1843, "step": 50124 }, { "epoch": 0.8712996923290862, "grad_norm": 1.6900039701702578, "learning_rate": 4.2815462547004135e-08, "loss": 0.285, "step": 50125 }, { "epoch": 0.871317074866589, "grad_norm": 1.6887916278196007, "learning_rate": 4.280406612009013e-08, "loss": 0.1761, "step": 50126 }, { "epoch": 0.8713344574040919, "grad_norm": 0.9137559657538934, "learning_rate": 4.279267114227736e-08, "loss": 0.1506, "step": 50127 }, { "epoch": 0.8713518399415947, "grad_norm": 2.284640612910394, "learning_rate": 4.278127761360173e-08, "loss": 0.2178, "step": 50128 }, { "epoch": 0.8713692224790975, "grad_norm": 2.1062506156190404, "learning_rate": 4.276988553409955e-08, "loss": 0.1805, "step": 50129 }, { "epoch": 0.8713866050166004, "grad_norm": 1.569204816270646, "learning_rate": 4.275849490380662e-08, "loss": 0.2553, "step": 50130 }, { "epoch": 0.8714039875541032, "grad_norm": 1.5293710498148083, "learning_rate": 4.27471057227593e-08, "loss": 0.1881, "step": 50131 }, { "epoch": 0.8714213700916059, "grad_norm": 1.0855907946123544, "learning_rate": 4.273571799099363e-08, "loss": 0.1966, "step": 50132 }, { "epoch": 0.8714387526291087, "grad_norm": 1.7759878826055786, "learning_rate": 4.272433170854561e-08, "loss": 0.1303, "step": 50133 }, { "epoch": 0.8714561351666116, "grad_norm": 1.0897722681714412, "learning_rate": 4.271294687545141e-08, "loss": 0.1418, "step": 50134 }, { "epoch": 0.8714735177041144, "grad_norm": 5.84646941783927, "learning_rate": 4.270156349174725e-08, "loss": 0.3363, "step": 50135 }, { "epoch": 0.8714909002416172, "grad_norm": 1.4961658611528013, "learning_rate": 4.269018155746895e-08, "loss": 0.1746, "step": 50136 }, { "epoch": 0.8715082827791201, "grad_norm": 1.2975829248842263, "learning_rate": 4.2678801072652756e-08, "loss": 0.15, "step": 50137 }, { "epoch": 0.8715256653166229, "grad_norm": 1.8423145156536866, "learning_rate": 4.2667422037334535e-08, "loss": 0.2685, "step": 50138 }, { "epoch": 0.8715430478541257, "grad_norm": 2.092834684504221, "learning_rate": 4.265604445155063e-08, "loss": 0.221, "step": 50139 }, { "epoch": 0.8715604303916286, "grad_norm": 1.7892260091389502, "learning_rate": 4.2644668315336975e-08, "loss": 0.2116, "step": 50140 }, { "epoch": 0.8715778129291314, "grad_norm": 1.130769973235516, "learning_rate": 4.263329362872964e-08, "loss": 0.2039, "step": 50141 }, { "epoch": 0.8715951954666342, "grad_norm": 1.7468150072179556, "learning_rate": 4.262192039176465e-08, "loss": 0.1843, "step": 50142 }, { "epoch": 0.871612578004137, "grad_norm": 0.7597417590808776, "learning_rate": 4.26105486044781e-08, "loss": 0.0913, "step": 50143 }, { "epoch": 0.8716299605416399, "grad_norm": 2.5378206439038884, "learning_rate": 4.259917826690596e-08, "loss": 0.1769, "step": 50144 }, { "epoch": 0.8716473430791427, "grad_norm": 1.9858521612059938, "learning_rate": 4.258780937908435e-08, "loss": 0.1988, "step": 50145 }, { "epoch": 0.8716647256166455, "grad_norm": 2.6920461575050956, "learning_rate": 4.2576441941049146e-08, "loss": 0.1757, "step": 50146 }, { "epoch": 0.8716821081541484, "grad_norm": 1.3753664267101582, "learning_rate": 4.256507595283659e-08, "loss": 0.2025, "step": 50147 }, { "epoch": 0.8716994906916512, "grad_norm": 1.7569953434368317, "learning_rate": 4.255371141448272e-08, "loss": 0.2326, "step": 50148 }, { "epoch": 0.871716873229154, "grad_norm": 1.405510080663782, "learning_rate": 4.254234832602327e-08, "loss": 0.157, "step": 50149 }, { "epoch": 0.8717342557666569, "grad_norm": 1.0513448568428048, "learning_rate": 4.253098668749455e-08, "loss": 0.1926, "step": 50150 }, { "epoch": 0.8717516383041597, "grad_norm": 1.330189552571843, "learning_rate": 4.251962649893243e-08, "loss": 0.1433, "step": 50151 }, { "epoch": 0.8717690208416624, "grad_norm": 1.4777324833497936, "learning_rate": 4.2508267760372915e-08, "loss": 0.2328, "step": 50152 }, { "epoch": 0.8717864033791652, "grad_norm": 1.427186378256504, "learning_rate": 4.249691047185211e-08, "loss": 0.2166, "step": 50153 }, { "epoch": 0.8718037859166681, "grad_norm": 1.1220762751189024, "learning_rate": 4.248555463340586e-08, "loss": 0.2041, "step": 50154 }, { "epoch": 0.8718211684541709, "grad_norm": 2.4780153894457313, "learning_rate": 4.2474200245070313e-08, "loss": 0.1809, "step": 50155 }, { "epoch": 0.8718385509916737, "grad_norm": 2.7047243271540404, "learning_rate": 4.246284730688132e-08, "loss": 0.328, "step": 50156 }, { "epoch": 0.8718559335291766, "grad_norm": 0.6640291740709321, "learning_rate": 4.245149581887486e-08, "loss": 0.2036, "step": 50157 }, { "epoch": 0.8718733160666794, "grad_norm": 1.293636800974829, "learning_rate": 4.244014578108707e-08, "loss": 0.2767, "step": 50158 }, { "epoch": 0.8718906986041822, "grad_norm": 0.7969357066341782, "learning_rate": 4.242879719355386e-08, "loss": 0.2377, "step": 50159 }, { "epoch": 0.871908081141685, "grad_norm": 1.8487466559658066, "learning_rate": 4.241745005631109e-08, "loss": 0.2114, "step": 50160 }, { "epoch": 0.8719254636791879, "grad_norm": 1.7491620329047959, "learning_rate": 4.24061043693949e-08, "loss": 0.2381, "step": 50161 }, { "epoch": 0.8719428462166907, "grad_norm": 0.9007700854418047, "learning_rate": 4.23947601328411e-08, "loss": 0.1753, "step": 50162 }, { "epoch": 0.8719602287541935, "grad_norm": 1.0963214334632243, "learning_rate": 4.2383417346685715e-08, "loss": 0.2345, "step": 50163 }, { "epoch": 0.8719776112916964, "grad_norm": 1.4050360174845777, "learning_rate": 4.237207601096471e-08, "loss": 0.2331, "step": 50164 }, { "epoch": 0.8719949938291992, "grad_norm": 0.9364206160303398, "learning_rate": 4.236073612571389e-08, "loss": 0.1771, "step": 50165 }, { "epoch": 0.872012376366702, "grad_norm": 1.4965734452842088, "learning_rate": 4.2349397690969405e-08, "loss": 0.1458, "step": 50166 }, { "epoch": 0.8720297589042049, "grad_norm": 1.331045344010986, "learning_rate": 4.233806070676721e-08, "loss": 0.1028, "step": 50167 }, { "epoch": 0.8720471414417077, "grad_norm": 1.154917507084844, "learning_rate": 4.2326725173143005e-08, "loss": 0.2339, "step": 50168 }, { "epoch": 0.8720645239792105, "grad_norm": 1.7616379602847698, "learning_rate": 4.231539109013277e-08, "loss": 0.3724, "step": 50169 }, { "epoch": 0.8720819065167134, "grad_norm": 1.2213275824700809, "learning_rate": 4.2304058457772575e-08, "loss": 0.2318, "step": 50170 }, { "epoch": 0.8720992890542162, "grad_norm": 1.5819990793614067, "learning_rate": 4.2292727276098286e-08, "loss": 0.263, "step": 50171 }, { "epoch": 0.8721166715917189, "grad_norm": 1.9030363979866929, "learning_rate": 4.2281397545145816e-08, "loss": 0.2291, "step": 50172 }, { "epoch": 0.8721340541292217, "grad_norm": 2.4117620784837173, "learning_rate": 4.2270069264951025e-08, "loss": 0.2097, "step": 50173 }, { "epoch": 0.8721514366667246, "grad_norm": 1.2964790364039638, "learning_rate": 4.2258742435549885e-08, "loss": 0.1296, "step": 50174 }, { "epoch": 0.8721688192042274, "grad_norm": 1.794064161756962, "learning_rate": 4.2247417056978264e-08, "loss": 0.19, "step": 50175 }, { "epoch": 0.8721862017417302, "grad_norm": 1.5906933911898766, "learning_rate": 4.223609312927195e-08, "loss": 0.2287, "step": 50176 }, { "epoch": 0.8722035842792331, "grad_norm": 1.6032436013868943, "learning_rate": 4.222477065246699e-08, "loss": 0.1431, "step": 50177 }, { "epoch": 0.8722209668167359, "grad_norm": 1.6725929996712035, "learning_rate": 4.221344962659928e-08, "loss": 0.2541, "step": 50178 }, { "epoch": 0.8722383493542387, "grad_norm": 0.8224852934143998, "learning_rate": 4.220213005170464e-08, "loss": 0.1589, "step": 50179 }, { "epoch": 0.8722557318917415, "grad_norm": 1.489578234558907, "learning_rate": 4.2190811927818926e-08, "loss": 0.1339, "step": 50180 }, { "epoch": 0.8722731144292444, "grad_norm": 1.2951816090678645, "learning_rate": 4.2179495254978046e-08, "loss": 0.1939, "step": 50181 }, { "epoch": 0.8722904969667472, "grad_norm": 1.3939839961184621, "learning_rate": 4.216818003321787e-08, "loss": 0.2782, "step": 50182 }, { "epoch": 0.87230787950425, "grad_norm": 0.9363807271390779, "learning_rate": 4.215686626257425e-08, "loss": 0.1365, "step": 50183 }, { "epoch": 0.8723252620417529, "grad_norm": 1.0218989253627837, "learning_rate": 4.214555394308295e-08, "loss": 0.1419, "step": 50184 }, { "epoch": 0.8723426445792557, "grad_norm": 1.6187875772450315, "learning_rate": 4.213424307478003e-08, "loss": 0.3078, "step": 50185 }, { "epoch": 0.8723600271167585, "grad_norm": 0.6408398997040936, "learning_rate": 4.212293365770131e-08, "loss": 0.1047, "step": 50186 }, { "epoch": 0.8723774096542614, "grad_norm": 1.341063770596661, "learning_rate": 4.211162569188248e-08, "loss": 0.2303, "step": 50187 }, { "epoch": 0.8723947921917642, "grad_norm": 1.2821048112306073, "learning_rate": 4.210031917735935e-08, "loss": 0.285, "step": 50188 }, { "epoch": 0.872412174729267, "grad_norm": 1.546090359882309, "learning_rate": 4.208901411416793e-08, "loss": 0.2069, "step": 50189 }, { "epoch": 0.8724295572667699, "grad_norm": 2.6757866026750396, "learning_rate": 4.207771050234404e-08, "loss": 0.2996, "step": 50190 }, { "epoch": 0.8724469398042727, "grad_norm": 1.6906633771011912, "learning_rate": 4.206640834192343e-08, "loss": 0.1499, "step": 50191 }, { "epoch": 0.8724643223417754, "grad_norm": 1.2756598851464367, "learning_rate": 4.2055107632941955e-08, "loss": 0.1809, "step": 50192 }, { "epoch": 0.8724817048792782, "grad_norm": 1.4863614371568037, "learning_rate": 4.2043808375435476e-08, "loss": 0.1858, "step": 50193 }, { "epoch": 0.8724990874167811, "grad_norm": 1.533703369435538, "learning_rate": 4.203251056943968e-08, "loss": 0.1495, "step": 50194 }, { "epoch": 0.8725164699542839, "grad_norm": 3.3781594438123737, "learning_rate": 4.20212142149905e-08, "loss": 0.2636, "step": 50195 }, { "epoch": 0.8725338524917867, "grad_norm": 1.9180765614417685, "learning_rate": 4.2009919312123556e-08, "loss": 0.1939, "step": 50196 }, { "epoch": 0.8725512350292896, "grad_norm": 0.9504272100506942, "learning_rate": 4.1998625860874935e-08, "loss": 0.1599, "step": 50197 }, { "epoch": 0.8725686175667924, "grad_norm": 1.0349804166357526, "learning_rate": 4.198733386128023e-08, "loss": 0.1764, "step": 50198 }, { "epoch": 0.8725860001042952, "grad_norm": 1.300259194856384, "learning_rate": 4.1976043313375286e-08, "loss": 0.1216, "step": 50199 }, { "epoch": 0.872603382641798, "grad_norm": 1.667116394421113, "learning_rate": 4.196475421719592e-08, "loss": 0.1816, "step": 50200 }, { "epoch": 0.8726207651793009, "grad_norm": 1.9661113642699577, "learning_rate": 4.1953466572777875e-08, "loss": 0.1852, "step": 50201 }, { "epoch": 0.8726381477168037, "grad_norm": 1.6187312108862841, "learning_rate": 4.19421803801569e-08, "loss": 0.1217, "step": 50202 }, { "epoch": 0.8726555302543065, "grad_norm": 2.405448589220376, "learning_rate": 4.19308956393688e-08, "loss": 0.3342, "step": 50203 }, { "epoch": 0.8726729127918094, "grad_norm": 1.7836459393502255, "learning_rate": 4.191961235044928e-08, "loss": 0.1984, "step": 50204 }, { "epoch": 0.8726902953293122, "grad_norm": 2.7622339501717117, "learning_rate": 4.190833051343434e-08, "loss": 0.2434, "step": 50205 }, { "epoch": 0.872707677866815, "grad_norm": 1.7625418562816486, "learning_rate": 4.1897050128359476e-08, "loss": 0.2481, "step": 50206 }, { "epoch": 0.8727250604043179, "grad_norm": 1.4668744809292085, "learning_rate": 4.1885771195260436e-08, "loss": 0.2045, "step": 50207 }, { "epoch": 0.8727424429418207, "grad_norm": 1.2844325781281245, "learning_rate": 4.187449371417312e-08, "loss": 0.1488, "step": 50208 }, { "epoch": 0.8727598254793235, "grad_norm": 1.7599327952998598, "learning_rate": 4.1863217685133236e-08, "loss": 0.1651, "step": 50209 }, { "epoch": 0.8727772080168263, "grad_norm": 2.2722901746409443, "learning_rate": 4.1851943108176465e-08, "loss": 0.2976, "step": 50210 }, { "epoch": 0.8727945905543292, "grad_norm": 1.0870743288591747, "learning_rate": 4.184066998333863e-08, "loss": 0.1424, "step": 50211 }, { "epoch": 0.8728119730918319, "grad_norm": 1.14717059322815, "learning_rate": 4.1829398310655406e-08, "loss": 0.229, "step": 50212 }, { "epoch": 0.8728293556293347, "grad_norm": 1.4345819858164712, "learning_rate": 4.1818128090162494e-08, "loss": 0.2077, "step": 50213 }, { "epoch": 0.8728467381668376, "grad_norm": 1.3033537042228511, "learning_rate": 4.180685932189565e-08, "loss": 0.2042, "step": 50214 }, { "epoch": 0.8728641207043404, "grad_norm": 1.1332556772186073, "learning_rate": 4.17955920058905e-08, "loss": 0.1198, "step": 50215 }, { "epoch": 0.8728815032418432, "grad_norm": 2.5046232214880226, "learning_rate": 4.178432614218297e-08, "loss": 0.1794, "step": 50216 }, { "epoch": 0.872898885779346, "grad_norm": 1.459658712585536, "learning_rate": 4.177306173080858e-08, "loss": 0.1706, "step": 50217 }, { "epoch": 0.8729162683168489, "grad_norm": 0.9820385256731189, "learning_rate": 4.176179877180319e-08, "loss": 0.2376, "step": 50218 }, { "epoch": 0.8729336508543517, "grad_norm": 0.8776606095881202, "learning_rate": 4.175053726520228e-08, "loss": 0.1683, "step": 50219 }, { "epoch": 0.8729510333918545, "grad_norm": 1.1563518700110822, "learning_rate": 4.17392772110417e-08, "loss": 0.1004, "step": 50220 }, { "epoch": 0.8729684159293574, "grad_norm": 1.2444044014642832, "learning_rate": 4.17280186093571e-08, "loss": 0.1164, "step": 50221 }, { "epoch": 0.8729857984668602, "grad_norm": 0.9113544801663691, "learning_rate": 4.1716761460184214e-08, "loss": 0.1749, "step": 50222 }, { "epoch": 0.873003181004363, "grad_norm": 1.22377237539831, "learning_rate": 4.1705505763558526e-08, "loss": 0.1228, "step": 50223 }, { "epoch": 0.8730205635418659, "grad_norm": 2.2678816424941446, "learning_rate": 4.169425151951606e-08, "loss": 0.2476, "step": 50224 }, { "epoch": 0.8730379460793687, "grad_norm": 1.4973026949591557, "learning_rate": 4.168299872809222e-08, "loss": 0.1395, "step": 50225 }, { "epoch": 0.8730553286168715, "grad_norm": 2.439996401617308, "learning_rate": 4.167174738932272e-08, "loss": 0.1702, "step": 50226 }, { "epoch": 0.8730727111543743, "grad_norm": 1.6225386987005723, "learning_rate": 4.1660497503243185e-08, "loss": 0.1905, "step": 50227 }, { "epoch": 0.8730900936918772, "grad_norm": 1.4854031241340164, "learning_rate": 4.164924906988937e-08, "loss": 0.2329, "step": 50228 }, { "epoch": 0.87310747622938, "grad_norm": 1.420988177066181, "learning_rate": 4.1638002089296906e-08, "loss": 0.1574, "step": 50229 }, { "epoch": 0.8731248587668828, "grad_norm": 1.8089479419999233, "learning_rate": 4.1626756561501444e-08, "loss": 0.1854, "step": 50230 }, { "epoch": 0.8731422413043857, "grad_norm": 1.535000848675194, "learning_rate": 4.161551248653861e-08, "loss": 0.1923, "step": 50231 }, { "epoch": 0.8731596238418884, "grad_norm": 1.8166785123319718, "learning_rate": 4.1604269864443984e-08, "loss": 0.2139, "step": 50232 }, { "epoch": 0.8731770063793912, "grad_norm": 1.3800157944470386, "learning_rate": 4.1593028695253275e-08, "loss": 0.2084, "step": 50233 }, { "epoch": 0.873194388916894, "grad_norm": 1.7100419323491478, "learning_rate": 4.158178897900211e-08, "loss": 0.138, "step": 50234 }, { "epoch": 0.8732117714543969, "grad_norm": 1.488619397462332, "learning_rate": 4.1570550715726017e-08, "loss": 0.1638, "step": 50235 }, { "epoch": 0.8732291539918997, "grad_norm": 2.0788316161500733, "learning_rate": 4.155931390546075e-08, "loss": 0.1711, "step": 50236 }, { "epoch": 0.8732465365294025, "grad_norm": 1.3347883982441284, "learning_rate": 4.154807854824194e-08, "loss": 0.1882, "step": 50237 }, { "epoch": 0.8732639190669054, "grad_norm": 1.6555923083002668, "learning_rate": 4.153684464410501e-08, "loss": 0.2277, "step": 50238 }, { "epoch": 0.8732813016044082, "grad_norm": 1.8438353922758888, "learning_rate": 4.15256121930857e-08, "loss": 0.188, "step": 50239 }, { "epoch": 0.873298684141911, "grad_norm": 1.6267649895424336, "learning_rate": 4.151438119521966e-08, "loss": 0.2787, "step": 50240 }, { "epoch": 0.8733160666794139, "grad_norm": 1.274342433755739, "learning_rate": 4.1503151650542357e-08, "loss": 0.1443, "step": 50241 }, { "epoch": 0.8733334492169167, "grad_norm": 0.7640687205426674, "learning_rate": 4.149192355908948e-08, "loss": 0.1304, "step": 50242 }, { "epoch": 0.8733508317544195, "grad_norm": 2.2935369801072762, "learning_rate": 4.1480696920896565e-08, "loss": 0.1531, "step": 50243 }, { "epoch": 0.8733682142919224, "grad_norm": 1.5731882083083621, "learning_rate": 4.1469471735999237e-08, "loss": 0.2104, "step": 50244 }, { "epoch": 0.8733855968294252, "grad_norm": 0.8622541738802324, "learning_rate": 4.145824800443304e-08, "loss": 0.277, "step": 50245 }, { "epoch": 0.873402979366928, "grad_norm": 1.6777281294296462, "learning_rate": 4.144702572623349e-08, "loss": 0.1745, "step": 50246 }, { "epoch": 0.8734203619044308, "grad_norm": 1.0261039585003067, "learning_rate": 4.143580490143628e-08, "loss": 0.1904, "step": 50247 }, { "epoch": 0.8734377444419337, "grad_norm": 1.3395324285283907, "learning_rate": 4.1424585530076936e-08, "loss": 0.1243, "step": 50248 }, { "epoch": 0.8734551269794365, "grad_norm": 1.3857912800473218, "learning_rate": 4.141336761219111e-08, "loss": 0.1537, "step": 50249 }, { "epoch": 0.8734725095169393, "grad_norm": 1.2896241429209387, "learning_rate": 4.140215114781409e-08, "loss": 0.1302, "step": 50250 }, { "epoch": 0.873489892054442, "grad_norm": 1.2137954917053553, "learning_rate": 4.1390936136981635e-08, "loss": 0.3019, "step": 50251 }, { "epoch": 0.8735072745919449, "grad_norm": 2.8195545195634977, "learning_rate": 4.137972257972927e-08, "loss": 0.1364, "step": 50252 }, { "epoch": 0.8735246571294477, "grad_norm": 1.2901948721385, "learning_rate": 4.1368510476092464e-08, "loss": 0.1498, "step": 50253 }, { "epoch": 0.8735420396669505, "grad_norm": 1.4049581325135247, "learning_rate": 4.135729982610675e-08, "loss": 0.1246, "step": 50254 }, { "epoch": 0.8735594222044534, "grad_norm": 1.0523891713047975, "learning_rate": 4.134609062980793e-08, "loss": 0.1888, "step": 50255 }, { "epoch": 0.8735768047419562, "grad_norm": 1.6404225575445626, "learning_rate": 4.133488288723114e-08, "loss": 0.191, "step": 50256 }, { "epoch": 0.873594187279459, "grad_norm": 0.8607902731711189, "learning_rate": 4.132367659841213e-08, "loss": 0.2116, "step": 50257 }, { "epoch": 0.8736115698169619, "grad_norm": 2.292607259610243, "learning_rate": 4.131247176338626e-08, "loss": 0.1631, "step": 50258 }, { "epoch": 0.8736289523544647, "grad_norm": 1.0464005582988782, "learning_rate": 4.130126838218928e-08, "loss": 0.1023, "step": 50259 }, { "epoch": 0.8736463348919675, "grad_norm": 1.1772603439250853, "learning_rate": 4.1290066454856556e-08, "loss": 0.2186, "step": 50260 }, { "epoch": 0.8736637174294704, "grad_norm": 1.0273593648042552, "learning_rate": 4.127886598142355e-08, "loss": 0.1784, "step": 50261 }, { "epoch": 0.8736810999669732, "grad_norm": 1.5185435067119741, "learning_rate": 4.126766696192585e-08, "loss": 0.1724, "step": 50262 }, { "epoch": 0.873698482504476, "grad_norm": 1.3318022071312556, "learning_rate": 4.125646939639899e-08, "loss": 0.1674, "step": 50263 }, { "epoch": 0.8737158650419788, "grad_norm": 0.9797183183578411, "learning_rate": 4.124527328487831e-08, "loss": 0.2571, "step": 50264 }, { "epoch": 0.8737332475794817, "grad_norm": 1.8457040616140512, "learning_rate": 4.1234078627399305e-08, "loss": 0.2182, "step": 50265 }, { "epoch": 0.8737506301169845, "grad_norm": 1.5811553051072247, "learning_rate": 4.122288542399765e-08, "loss": 0.1931, "step": 50266 }, { "epoch": 0.8737680126544873, "grad_norm": 1.0074788427505796, "learning_rate": 4.121169367470867e-08, "loss": 0.1592, "step": 50267 }, { "epoch": 0.8737853951919902, "grad_norm": 1.2982994600599689, "learning_rate": 4.120050337956799e-08, "loss": 0.1765, "step": 50268 }, { "epoch": 0.873802777729493, "grad_norm": 0.7742719632300704, "learning_rate": 4.118931453861074e-08, "loss": 0.1684, "step": 50269 }, { "epoch": 0.8738201602669958, "grad_norm": 0.7984675953909353, "learning_rate": 4.117812715187269e-08, "loss": 0.1525, "step": 50270 }, { "epoch": 0.8738375428044985, "grad_norm": 0.8294652002517651, "learning_rate": 4.116694121938924e-08, "loss": 0.1398, "step": 50271 }, { "epoch": 0.8738549253420014, "grad_norm": 2.134154139012607, "learning_rate": 4.115575674119581e-08, "loss": 0.2066, "step": 50272 }, { "epoch": 0.8738723078795042, "grad_norm": 2.0721286053707884, "learning_rate": 4.1144573717327767e-08, "loss": 0.2424, "step": 50273 }, { "epoch": 0.873889690417007, "grad_norm": 1.4146490648696068, "learning_rate": 4.1133392147820856e-08, "loss": 0.352, "step": 50274 }, { "epoch": 0.8739070729545099, "grad_norm": 1.6384946630646229, "learning_rate": 4.112221203271016e-08, "loss": 0.1897, "step": 50275 }, { "epoch": 0.8739244554920127, "grad_norm": 2.2109580469622063, "learning_rate": 4.1111033372031314e-08, "loss": 0.2313, "step": 50276 }, { "epoch": 0.8739418380295155, "grad_norm": 1.8070221040924894, "learning_rate": 4.109985616581957e-08, "loss": 0.3657, "step": 50277 }, { "epoch": 0.8739592205670184, "grad_norm": 2.450697894161602, "learning_rate": 4.1088680414110565e-08, "loss": 0.1559, "step": 50278 }, { "epoch": 0.8739766031045212, "grad_norm": 2.097436946574841, "learning_rate": 4.107750611693966e-08, "loss": 0.1452, "step": 50279 }, { "epoch": 0.873993985642024, "grad_norm": 5.641559200835378, "learning_rate": 4.106633327434222e-08, "loss": 0.2575, "step": 50280 }, { "epoch": 0.8740113681795268, "grad_norm": 1.2162460320403243, "learning_rate": 4.105516188635372e-08, "loss": 0.2603, "step": 50281 }, { "epoch": 0.8740287507170297, "grad_norm": 1.5536190953669144, "learning_rate": 4.10439919530095e-08, "loss": 0.1176, "step": 50282 }, { "epoch": 0.8740461332545325, "grad_norm": 1.182436857864394, "learning_rate": 4.1032823474345055e-08, "loss": 0.1827, "step": 50283 }, { "epoch": 0.8740635157920353, "grad_norm": 1.4594494951174661, "learning_rate": 4.102165645039568e-08, "loss": 0.1887, "step": 50284 }, { "epoch": 0.8740808983295382, "grad_norm": 1.3072798286630265, "learning_rate": 4.1010490881196736e-08, "loss": 0.1308, "step": 50285 }, { "epoch": 0.874098280867041, "grad_norm": 0.8374443623829957, "learning_rate": 4.099932676678375e-08, "loss": 0.2631, "step": 50286 }, { "epoch": 0.8741156634045438, "grad_norm": 1.6139854925078816, "learning_rate": 4.0988164107192194e-08, "loss": 0.1662, "step": 50287 }, { "epoch": 0.8741330459420467, "grad_norm": 1.2932667935765907, "learning_rate": 4.097700290245709e-08, "loss": 0.2419, "step": 50288 }, { "epoch": 0.8741504284795495, "grad_norm": 1.0127205746743193, "learning_rate": 4.0965843152614145e-08, "loss": 0.2241, "step": 50289 }, { "epoch": 0.8741678110170523, "grad_norm": 0.9882064035944046, "learning_rate": 4.09546848576986e-08, "loss": 0.0961, "step": 50290 }, { "epoch": 0.874185193554555, "grad_norm": 1.6141421511632303, "learning_rate": 4.0943528017745864e-08, "loss": 0.149, "step": 50291 }, { "epoch": 0.8742025760920579, "grad_norm": 1.2707055898297042, "learning_rate": 4.0932372632791254e-08, "loss": 0.1949, "step": 50292 }, { "epoch": 0.8742199586295607, "grad_norm": 2.0346572922770743, "learning_rate": 4.092121870287013e-08, "loss": 0.1941, "step": 50293 }, { "epoch": 0.8742373411670635, "grad_norm": 4.403646600567414, "learning_rate": 4.091006622801785e-08, "loss": 0.2933, "step": 50294 }, { "epoch": 0.8742547237045664, "grad_norm": 1.468092154000556, "learning_rate": 4.089891520826977e-08, "loss": 0.2005, "step": 50295 }, { "epoch": 0.8742721062420692, "grad_norm": 1.4178278309157035, "learning_rate": 4.08877656436612e-08, "loss": 0.1475, "step": 50296 }, { "epoch": 0.874289488779572, "grad_norm": 1.7409153688972663, "learning_rate": 4.0876617534227554e-08, "loss": 0.2346, "step": 50297 }, { "epoch": 0.8743068713170749, "grad_norm": 1.213171082927854, "learning_rate": 4.086547088000414e-08, "loss": 0.1204, "step": 50298 }, { "epoch": 0.8743242538545777, "grad_norm": 1.4917550507738107, "learning_rate": 4.085432568102626e-08, "loss": 0.1709, "step": 50299 }, { "epoch": 0.8743416363920805, "grad_norm": 1.4613174825547925, "learning_rate": 4.084318193732927e-08, "loss": 0.1728, "step": 50300 }, { "epoch": 0.8743590189295833, "grad_norm": 1.305291083444641, "learning_rate": 4.083203964894849e-08, "loss": 0.1391, "step": 50301 }, { "epoch": 0.8743764014670862, "grad_norm": 1.7002479291443053, "learning_rate": 4.0820898815919157e-08, "loss": 0.14, "step": 50302 }, { "epoch": 0.874393784004589, "grad_norm": 1.1244012978404916, "learning_rate": 4.080975943827669e-08, "loss": 0.1512, "step": 50303 }, { "epoch": 0.8744111665420918, "grad_norm": 1.3727660326781208, "learning_rate": 4.079862151605629e-08, "loss": 0.0907, "step": 50304 }, { "epoch": 0.8744285490795947, "grad_norm": 1.5129697775384534, "learning_rate": 4.078748504929336e-08, "loss": 0.1838, "step": 50305 }, { "epoch": 0.8744459316170975, "grad_norm": 1.410550012937017, "learning_rate": 4.077635003802327e-08, "loss": 0.1195, "step": 50306 }, { "epoch": 0.8744633141546003, "grad_norm": 0.7825413675688038, "learning_rate": 4.0765216482281096e-08, "loss": 0.2845, "step": 50307 }, { "epoch": 0.8744806966921032, "grad_norm": 1.68291522873792, "learning_rate": 4.075408438210215e-08, "loss": 0.1743, "step": 50308 }, { "epoch": 0.874498079229606, "grad_norm": 1.5660802637694295, "learning_rate": 4.07429537375219e-08, "loss": 0.1341, "step": 50309 }, { "epoch": 0.8745154617671088, "grad_norm": 1.1421200226322763, "learning_rate": 4.073182454857549e-08, "loss": 0.1484, "step": 50310 }, { "epoch": 0.8745328443046115, "grad_norm": 1.1974237997922654, "learning_rate": 4.072069681529827e-08, "loss": 0.1306, "step": 50311 }, { "epoch": 0.8745502268421144, "grad_norm": 1.5432728476373396, "learning_rate": 4.070957053772545e-08, "loss": 0.1763, "step": 50312 }, { "epoch": 0.8745676093796172, "grad_norm": 1.85829061862757, "learning_rate": 4.069844571589226e-08, "loss": 0.1879, "step": 50313 }, { "epoch": 0.87458499191712, "grad_norm": 0.7307001549894272, "learning_rate": 4.068732234983407e-08, "loss": 0.2082, "step": 50314 }, { "epoch": 0.8746023744546229, "grad_norm": 1.2525295912288235, "learning_rate": 4.067620043958608e-08, "loss": 0.2396, "step": 50315 }, { "epoch": 0.8746197569921257, "grad_norm": 2.02421503904968, "learning_rate": 4.066507998518343e-08, "loss": 0.2165, "step": 50316 }, { "epoch": 0.8746371395296285, "grad_norm": 1.049840894328472, "learning_rate": 4.065396098666152e-08, "loss": 0.1736, "step": 50317 }, { "epoch": 0.8746545220671313, "grad_norm": 2.0298354157451985, "learning_rate": 4.064284344405561e-08, "loss": 0.2243, "step": 50318 }, { "epoch": 0.8746719046046342, "grad_norm": 1.1060406152609732, "learning_rate": 4.063172735740084e-08, "loss": 0.1213, "step": 50319 }, { "epoch": 0.874689287142137, "grad_norm": 2.103422227236825, "learning_rate": 4.06206127267325e-08, "loss": 0.2164, "step": 50320 }, { "epoch": 0.8747066696796398, "grad_norm": 1.0676298291953774, "learning_rate": 4.060949955208581e-08, "loss": 0.2209, "step": 50321 }, { "epoch": 0.8747240522171427, "grad_norm": 1.8124572269949328, "learning_rate": 4.059838783349595e-08, "loss": 0.1738, "step": 50322 }, { "epoch": 0.8747414347546455, "grad_norm": 1.9175209900075976, "learning_rate": 4.058727757099817e-08, "loss": 0.1606, "step": 50323 }, { "epoch": 0.8747588172921483, "grad_norm": 1.230433497799744, "learning_rate": 4.0576168764627606e-08, "loss": 0.2161, "step": 50324 }, { "epoch": 0.8747761998296512, "grad_norm": 1.46205629825576, "learning_rate": 4.056506141441973e-08, "loss": 0.165, "step": 50325 }, { "epoch": 0.874793582367154, "grad_norm": 10.282238110871416, "learning_rate": 4.055395552040947e-08, "loss": 0.2688, "step": 50326 }, { "epoch": 0.8748109649046568, "grad_norm": 1.4670508556266737, "learning_rate": 4.0542851082632005e-08, "loss": 0.1379, "step": 50327 }, { "epoch": 0.8748283474421596, "grad_norm": 1.7637244642852214, "learning_rate": 4.053174810112275e-08, "loss": 0.2676, "step": 50328 }, { "epoch": 0.8748457299796625, "grad_norm": 1.1628698800819084, "learning_rate": 4.052064657591681e-08, "loss": 0.1669, "step": 50329 }, { "epoch": 0.8748631125171653, "grad_norm": 0.7565611485685623, "learning_rate": 4.05095465070493e-08, "loss": 0.0945, "step": 50330 }, { "epoch": 0.874880495054668, "grad_norm": 1.3030163730591773, "learning_rate": 4.0498447894555476e-08, "loss": 0.1244, "step": 50331 }, { "epoch": 0.8748978775921709, "grad_norm": 1.0688759122162337, "learning_rate": 4.048735073847054e-08, "loss": 0.2111, "step": 50332 }, { "epoch": 0.8749152601296737, "grad_norm": 1.298074712677629, "learning_rate": 4.047625503882957e-08, "loss": 0.1674, "step": 50333 }, { "epoch": 0.8749326426671765, "grad_norm": 1.7988702252501896, "learning_rate": 4.046516079566781e-08, "loss": 0.2364, "step": 50334 }, { "epoch": 0.8749500252046793, "grad_norm": 2.0232587131172406, "learning_rate": 4.0454068009020293e-08, "loss": 0.2613, "step": 50335 }, { "epoch": 0.8749674077421822, "grad_norm": 1.1083442827700885, "learning_rate": 4.0442976678922384e-08, "loss": 0.1473, "step": 50336 }, { "epoch": 0.874984790279685, "grad_norm": 1.9999827762537152, "learning_rate": 4.04318868054091e-08, "loss": 0.2943, "step": 50337 }, { "epoch": 0.8750021728171878, "grad_norm": 1.2313164828739076, "learning_rate": 4.042079838851575e-08, "loss": 0.153, "step": 50338 }, { "epoch": 0.8750195553546907, "grad_norm": 0.9256003151695746, "learning_rate": 4.040971142827715e-08, "loss": 0.305, "step": 50339 }, { "epoch": 0.8750369378921935, "grad_norm": 2.8714168440816503, "learning_rate": 4.039862592472876e-08, "loss": 0.2547, "step": 50340 }, { "epoch": 0.8750543204296963, "grad_norm": 1.8084635063624377, "learning_rate": 4.0387541877905604e-08, "loss": 0.156, "step": 50341 }, { "epoch": 0.8750717029671992, "grad_norm": 0.7891918425764523, "learning_rate": 4.0376459287842833e-08, "loss": 0.1305, "step": 50342 }, { "epoch": 0.875089085504702, "grad_norm": 0.8686967204307704, "learning_rate": 4.036537815457541e-08, "loss": 0.21, "step": 50343 }, { "epoch": 0.8751064680422048, "grad_norm": 0.9962868883080391, "learning_rate": 4.035429847813882e-08, "loss": 0.1465, "step": 50344 }, { "epoch": 0.8751238505797077, "grad_norm": 2.255289243830671, "learning_rate": 4.034322025856784e-08, "loss": 0.2115, "step": 50345 }, { "epoch": 0.8751412331172105, "grad_norm": 1.4070508452926507, "learning_rate": 4.033214349589775e-08, "loss": 0.1454, "step": 50346 }, { "epoch": 0.8751586156547133, "grad_norm": 1.6646261828402917, "learning_rate": 4.032106819016345e-08, "loss": 0.1833, "step": 50347 }, { "epoch": 0.8751759981922161, "grad_norm": 1.3446157600529656, "learning_rate": 4.030999434140037e-08, "loss": 0.1335, "step": 50348 }, { "epoch": 0.875193380729719, "grad_norm": 1.8802028608888672, "learning_rate": 4.0298921949643406e-08, "loss": 0.2568, "step": 50349 }, { "epoch": 0.8752107632672218, "grad_norm": 2.1382987479175286, "learning_rate": 4.028785101492765e-08, "loss": 0.2203, "step": 50350 }, { "epoch": 0.8752281458047245, "grad_norm": 1.277871249391377, "learning_rate": 4.027678153728831e-08, "loss": 0.1213, "step": 50351 }, { "epoch": 0.8752455283422274, "grad_norm": 2.052554848661661, "learning_rate": 4.026571351676034e-08, "loss": 0.1532, "step": 50352 }, { "epoch": 0.8752629108797302, "grad_norm": 1.6461544331902285, "learning_rate": 4.025464695337888e-08, "loss": 0.3259, "step": 50353 }, { "epoch": 0.875280293417233, "grad_norm": 1.7223184713114337, "learning_rate": 4.024358184717891e-08, "loss": 0.1434, "step": 50354 }, { "epoch": 0.8752976759547358, "grad_norm": 1.1471163882057511, "learning_rate": 4.0232518198195677e-08, "loss": 0.1473, "step": 50355 }, { "epoch": 0.8753150584922387, "grad_norm": 0.9675860379892399, "learning_rate": 4.0221456006464204e-08, "loss": 0.2168, "step": 50356 }, { "epoch": 0.8753324410297415, "grad_norm": 1.705686513327691, "learning_rate": 4.021039527201958e-08, "loss": 0.1995, "step": 50357 }, { "epoch": 0.8753498235672443, "grad_norm": 1.1325363044676968, "learning_rate": 4.019933599489661e-08, "loss": 0.1925, "step": 50358 }, { "epoch": 0.8753672061047472, "grad_norm": 1.3395662729582343, "learning_rate": 4.01882781751306e-08, "loss": 0.1373, "step": 50359 }, { "epoch": 0.87538458864225, "grad_norm": 1.5173176604815777, "learning_rate": 4.017722181275651e-08, "loss": 0.1764, "step": 50360 }, { "epoch": 0.8754019711797528, "grad_norm": 1.3193435103610671, "learning_rate": 4.016616690780944e-08, "loss": 0.1578, "step": 50361 }, { "epoch": 0.8754193537172557, "grad_norm": 1.5242469304101038, "learning_rate": 4.0155113460324295e-08, "loss": 0.1789, "step": 50362 }, { "epoch": 0.8754367362547585, "grad_norm": 2.621272082564243, "learning_rate": 4.014406147033639e-08, "loss": 0.1684, "step": 50363 }, { "epoch": 0.8754541187922613, "grad_norm": 2.0549366835823606, "learning_rate": 4.013301093788046e-08, "loss": 0.2393, "step": 50364 }, { "epoch": 0.8754715013297641, "grad_norm": 0.7365760144656223, "learning_rate": 4.012196186299166e-08, "loss": 0.1427, "step": 50365 }, { "epoch": 0.875488883867267, "grad_norm": 1.2296171361157269, "learning_rate": 4.0110914245704895e-08, "loss": 0.1922, "step": 50366 }, { "epoch": 0.8755062664047698, "grad_norm": 1.2075168526754152, "learning_rate": 4.0099868086055364e-08, "loss": 0.2729, "step": 50367 }, { "epoch": 0.8755236489422726, "grad_norm": 0.9955691727388529, "learning_rate": 4.0088823384077987e-08, "loss": 0.1997, "step": 50368 }, { "epoch": 0.8755410314797755, "grad_norm": 1.1715451927837626, "learning_rate": 4.0077780139807783e-08, "loss": 0.1341, "step": 50369 }, { "epoch": 0.8755584140172783, "grad_norm": 1.8914609703380787, "learning_rate": 4.006673835327973e-08, "loss": 0.1792, "step": 50370 }, { "epoch": 0.875575796554781, "grad_norm": 0.9750049841269286, "learning_rate": 4.005569802452885e-08, "loss": 0.236, "step": 50371 }, { "epoch": 0.8755931790922838, "grad_norm": 0.9763648976493244, "learning_rate": 4.004465915359012e-08, "loss": 0.1417, "step": 50372 }, { "epoch": 0.8756105616297867, "grad_norm": 1.4977866936285826, "learning_rate": 4.003362174049851e-08, "loss": 0.1897, "step": 50373 }, { "epoch": 0.8756279441672895, "grad_norm": 1.132252799547303, "learning_rate": 4.0022585785289e-08, "loss": 0.1002, "step": 50374 }, { "epoch": 0.8756453267047923, "grad_norm": 1.0776876851371577, "learning_rate": 4.0011551287996715e-08, "loss": 0.1668, "step": 50375 }, { "epoch": 0.8756627092422952, "grad_norm": 1.8748374280006663, "learning_rate": 4.000051824865647e-08, "loss": 0.2276, "step": 50376 }, { "epoch": 0.875680091779798, "grad_norm": 1.3808676817810024, "learning_rate": 3.9989486667303175e-08, "loss": 0.2112, "step": 50377 }, { "epoch": 0.8756974743173008, "grad_norm": 1.4670724593102449, "learning_rate": 3.997845654397197e-08, "loss": 0.1887, "step": 50378 }, { "epoch": 0.8757148568548037, "grad_norm": 1.325772458494148, "learning_rate": 3.996742787869772e-08, "loss": 0.1355, "step": 50379 }, { "epoch": 0.8757322393923065, "grad_norm": 1.52773935278934, "learning_rate": 3.995640067151546e-08, "loss": 0.1555, "step": 50380 }, { "epoch": 0.8757496219298093, "grad_norm": 2.235313957706945, "learning_rate": 3.994537492246003e-08, "loss": 0.2709, "step": 50381 }, { "epoch": 0.8757670044673121, "grad_norm": 1.008242020766505, "learning_rate": 3.993435063156647e-08, "loss": 0.2264, "step": 50382 }, { "epoch": 0.875784387004815, "grad_norm": 1.4589987308309946, "learning_rate": 3.992332779886964e-08, "loss": 0.1527, "step": 50383 }, { "epoch": 0.8758017695423178, "grad_norm": 1.6698617247086687, "learning_rate": 3.991230642440457e-08, "loss": 0.1293, "step": 50384 }, { "epoch": 0.8758191520798206, "grad_norm": 1.5886998760339595, "learning_rate": 3.990128650820601e-08, "loss": 0.2465, "step": 50385 }, { "epoch": 0.8758365346173235, "grad_norm": 2.1371235351073907, "learning_rate": 3.989026805030915e-08, "loss": 0.1579, "step": 50386 }, { "epoch": 0.8758539171548263, "grad_norm": 1.5503597388258832, "learning_rate": 3.987925105074874e-08, "loss": 0.2047, "step": 50387 }, { "epoch": 0.8758712996923291, "grad_norm": 2.3548464837429504, "learning_rate": 3.986823550955987e-08, "loss": 0.272, "step": 50388 }, { "epoch": 0.875888682229832, "grad_norm": 1.4508626872580062, "learning_rate": 3.985722142677711e-08, "loss": 0.2497, "step": 50389 }, { "epoch": 0.8759060647673348, "grad_norm": 1.4924113288703624, "learning_rate": 3.984620880243567e-08, "loss": 0.1598, "step": 50390 }, { "epoch": 0.8759234473048375, "grad_norm": 1.04578808653486, "learning_rate": 3.983519763657039e-08, "loss": 0.2305, "step": 50391 }, { "epoch": 0.8759408298423403, "grad_norm": 1.2575249240596378, "learning_rate": 3.9824187929216157e-08, "loss": 0.2843, "step": 50392 }, { "epoch": 0.8759582123798432, "grad_norm": 1.5492713993053062, "learning_rate": 3.981317968040776e-08, "loss": 0.215, "step": 50393 }, { "epoch": 0.875975594917346, "grad_norm": 1.502056700344127, "learning_rate": 3.98021728901804e-08, "loss": 0.1766, "step": 50394 }, { "epoch": 0.8759929774548488, "grad_norm": 2.170569949168412, "learning_rate": 3.979116755856865e-08, "loss": 0.2268, "step": 50395 }, { "epoch": 0.8760103599923517, "grad_norm": 1.4416932371820248, "learning_rate": 3.978016368560749e-08, "loss": 0.1973, "step": 50396 }, { "epoch": 0.8760277425298545, "grad_norm": 0.8632744015233561, "learning_rate": 3.976916127133168e-08, "loss": 0.18, "step": 50397 }, { "epoch": 0.8760451250673573, "grad_norm": 2.591758980833274, "learning_rate": 3.9758160315776334e-08, "loss": 0.201, "step": 50398 }, { "epoch": 0.8760625076048602, "grad_norm": 1.148058644826265, "learning_rate": 3.974716081897622e-08, "loss": 0.2334, "step": 50399 }, { "epoch": 0.876079890142363, "grad_norm": 3.89403319780441, "learning_rate": 3.9736162780966195e-08, "loss": 0.2586, "step": 50400 }, { "epoch": 0.8760972726798658, "grad_norm": 0.8621456283980794, "learning_rate": 3.972516620178107e-08, "loss": 0.3475, "step": 50401 }, { "epoch": 0.8761146552173686, "grad_norm": 2.0970033660630762, "learning_rate": 3.971417108145575e-08, "loss": 0.2038, "step": 50402 }, { "epoch": 0.8761320377548715, "grad_norm": 1.1695236690410185, "learning_rate": 3.9703177420025045e-08, "loss": 0.2296, "step": 50403 }, { "epoch": 0.8761494202923743, "grad_norm": 1.3281094303836725, "learning_rate": 3.969218521752382e-08, "loss": 0.1561, "step": 50404 }, { "epoch": 0.8761668028298771, "grad_norm": 3.130685086490328, "learning_rate": 3.968119447398688e-08, "loss": 0.2124, "step": 50405 }, { "epoch": 0.87618418536738, "grad_norm": 1.0686909784494454, "learning_rate": 3.9670205189449136e-08, "loss": 0.2475, "step": 50406 }, { "epoch": 0.8762015679048828, "grad_norm": 1.4559617504159519, "learning_rate": 3.9659217363945504e-08, "loss": 0.194, "step": 50407 }, { "epoch": 0.8762189504423856, "grad_norm": 0.7696265201428454, "learning_rate": 3.964823099751052e-08, "loss": 0.1066, "step": 50408 }, { "epoch": 0.8762363329798885, "grad_norm": 1.59522976049953, "learning_rate": 3.963724609017927e-08, "loss": 0.2416, "step": 50409 }, { "epoch": 0.8762537155173912, "grad_norm": 2.291841079270035, "learning_rate": 3.962626264198643e-08, "loss": 0.1998, "step": 50410 }, { "epoch": 0.876271098054894, "grad_norm": 1.6639589835763378, "learning_rate": 3.961528065296693e-08, "loss": 0.1692, "step": 50411 }, { "epoch": 0.8762884805923968, "grad_norm": 1.206848810290225, "learning_rate": 3.9604300123155465e-08, "loss": 0.3083, "step": 50412 }, { "epoch": 0.8763058631298997, "grad_norm": 1.239770552137359, "learning_rate": 3.959332105258684e-08, "loss": 0.1551, "step": 50413 }, { "epoch": 0.8763232456674025, "grad_norm": 1.7929763577239894, "learning_rate": 3.9582343441295963e-08, "loss": 0.1238, "step": 50414 }, { "epoch": 0.8763406282049053, "grad_norm": 1.117548841456715, "learning_rate": 3.9571367289317535e-08, "loss": 0.1731, "step": 50415 }, { "epoch": 0.8763580107424082, "grad_norm": 1.2942204151618617, "learning_rate": 3.9560392596686256e-08, "loss": 0.1564, "step": 50416 }, { "epoch": 0.876375393279911, "grad_norm": 1.749830634962666, "learning_rate": 3.9549419363437145e-08, "loss": 0.169, "step": 50417 }, { "epoch": 0.8763927758174138, "grad_norm": 0.9073879224762351, "learning_rate": 3.953844758960484e-08, "loss": 0.1281, "step": 50418 }, { "epoch": 0.8764101583549166, "grad_norm": 2.6915220790877017, "learning_rate": 3.952747727522415e-08, "loss": 0.254, "step": 50419 }, { "epoch": 0.8764275408924195, "grad_norm": 2.130055814532526, "learning_rate": 3.951650842032983e-08, "loss": 0.2035, "step": 50420 }, { "epoch": 0.8764449234299223, "grad_norm": 1.245825439763113, "learning_rate": 3.950554102495662e-08, "loss": 0.1739, "step": 50421 }, { "epoch": 0.8764623059674251, "grad_norm": 2.780419733642279, "learning_rate": 3.9494575089139336e-08, "loss": 0.2053, "step": 50422 }, { "epoch": 0.876479688504928, "grad_norm": 1.0597110536749004, "learning_rate": 3.948361061291272e-08, "loss": 0.2376, "step": 50423 }, { "epoch": 0.8764970710424308, "grad_norm": 0.9731635110211833, "learning_rate": 3.9472647596311415e-08, "loss": 0.1596, "step": 50424 }, { "epoch": 0.8765144535799336, "grad_norm": 1.073292497699248, "learning_rate": 3.946168603937039e-08, "loss": 0.1116, "step": 50425 }, { "epoch": 0.8765318361174365, "grad_norm": 1.2426021558865887, "learning_rate": 3.945072594212428e-08, "loss": 0.1703, "step": 50426 }, { "epoch": 0.8765492186549393, "grad_norm": 1.3923213540621766, "learning_rate": 3.9439767304607794e-08, "loss": 0.1778, "step": 50427 }, { "epoch": 0.8765666011924421, "grad_norm": 1.0885991346684567, "learning_rate": 3.942881012685556e-08, "loss": 0.2067, "step": 50428 }, { "epoch": 0.876583983729945, "grad_norm": 1.9712719498266873, "learning_rate": 3.941785440890255e-08, "loss": 0.1443, "step": 50429 }, { "epoch": 0.8766013662674477, "grad_norm": 1.431173906937434, "learning_rate": 3.94069001507833e-08, "loss": 0.3109, "step": 50430 }, { "epoch": 0.8766187488049505, "grad_norm": 1.2192914382429114, "learning_rate": 3.9395947352532655e-08, "loss": 0.1753, "step": 50431 }, { "epoch": 0.8766361313424533, "grad_norm": 1.6499190611461145, "learning_rate": 3.938499601418527e-08, "loss": 0.2503, "step": 50432 }, { "epoch": 0.8766535138799562, "grad_norm": 1.0705116033517343, "learning_rate": 3.937404613577583e-08, "loss": 0.1801, "step": 50433 }, { "epoch": 0.876670896417459, "grad_norm": 2.2400977668562665, "learning_rate": 3.936309771733909e-08, "loss": 0.1748, "step": 50434 }, { "epoch": 0.8766882789549618, "grad_norm": 1.305260520838294, "learning_rate": 3.935215075890974e-08, "loss": 0.2097, "step": 50435 }, { "epoch": 0.8767056614924646, "grad_norm": 1.4062803813144635, "learning_rate": 3.934120526052231e-08, "loss": 0.2518, "step": 50436 }, { "epoch": 0.8767230440299675, "grad_norm": 0.8824141523101142, "learning_rate": 3.933026122221184e-08, "loss": 0.1877, "step": 50437 }, { "epoch": 0.8767404265674703, "grad_norm": 1.3976573409067148, "learning_rate": 3.931931864401278e-08, "loss": 0.2752, "step": 50438 }, { "epoch": 0.8767578091049731, "grad_norm": 1.2533692330381483, "learning_rate": 3.9308377525959836e-08, "loss": 0.196, "step": 50439 }, { "epoch": 0.876775191642476, "grad_norm": 1.695281237525588, "learning_rate": 3.929743786808776e-08, "loss": 0.2181, "step": 50440 }, { "epoch": 0.8767925741799788, "grad_norm": 0.8161760568429077, "learning_rate": 3.9286499670431126e-08, "loss": 0.1557, "step": 50441 }, { "epoch": 0.8768099567174816, "grad_norm": 2.2990193789084366, "learning_rate": 3.92755629330247e-08, "loss": 0.1466, "step": 50442 }, { "epoch": 0.8768273392549845, "grad_norm": 1.4324743704709944, "learning_rate": 3.926462765590305e-08, "loss": 0.1966, "step": 50443 }, { "epoch": 0.8768447217924873, "grad_norm": 1.688979368436735, "learning_rate": 3.925369383910082e-08, "loss": 0.1798, "step": 50444 }, { "epoch": 0.8768621043299901, "grad_norm": 1.381730573382601, "learning_rate": 3.924276148265293e-08, "loss": 0.1634, "step": 50445 }, { "epoch": 0.876879486867493, "grad_norm": 1.2407080935335026, "learning_rate": 3.923183058659374e-08, "loss": 0.1187, "step": 50446 }, { "epoch": 0.8768968694049958, "grad_norm": 1.1038130533417936, "learning_rate": 3.9220901150957883e-08, "loss": 0.2324, "step": 50447 }, { "epoch": 0.8769142519424986, "grad_norm": 0.9386867704999541, "learning_rate": 3.9209973175780175e-08, "loss": 0.2041, "step": 50448 }, { "epoch": 0.8769316344800014, "grad_norm": 1.372190542269804, "learning_rate": 3.9199046661095194e-08, "loss": 0.2722, "step": 50449 }, { "epoch": 0.8769490170175042, "grad_norm": 1.9365430536897643, "learning_rate": 3.9188121606937576e-08, "loss": 0.2163, "step": 50450 }, { "epoch": 0.876966399555007, "grad_norm": 1.3750133870058427, "learning_rate": 3.9177198013341964e-08, "loss": 0.1789, "step": 50451 }, { "epoch": 0.8769837820925098, "grad_norm": 1.052226317449967, "learning_rate": 3.916627588034288e-08, "loss": 0.1332, "step": 50452 }, { "epoch": 0.8770011646300127, "grad_norm": 1.0732903186997413, "learning_rate": 3.915535520797503e-08, "loss": 0.2019, "step": 50453 }, { "epoch": 0.8770185471675155, "grad_norm": 1.2477784407862431, "learning_rate": 3.914443599627304e-08, "loss": 0.2433, "step": 50454 }, { "epoch": 0.8770359297050183, "grad_norm": 1.1398546547951056, "learning_rate": 3.913351824527139e-08, "loss": 0.138, "step": 50455 }, { "epoch": 0.8770533122425211, "grad_norm": 2.1834613492370236, "learning_rate": 3.9122601955004886e-08, "loss": 0.1961, "step": 50456 }, { "epoch": 0.877070694780024, "grad_norm": 2.100568319347891, "learning_rate": 3.9111687125508e-08, "loss": 0.2402, "step": 50457 }, { "epoch": 0.8770880773175268, "grad_norm": 0.9521578855173691, "learning_rate": 3.910077375681531e-08, "loss": 0.3087, "step": 50458 }, { "epoch": 0.8771054598550296, "grad_norm": 1.4661571895600545, "learning_rate": 3.9089861848961516e-08, "loss": 0.1847, "step": 50459 }, { "epoch": 0.8771228423925325, "grad_norm": 1.5575745060907453, "learning_rate": 3.907895140198109e-08, "loss": 0.3097, "step": 50460 }, { "epoch": 0.8771402249300353, "grad_norm": 1.3716438848451749, "learning_rate": 3.906804241590872e-08, "loss": 0.1969, "step": 50461 }, { "epoch": 0.8771576074675381, "grad_norm": 2.405838406432589, "learning_rate": 3.905713489077883e-08, "loss": 0.2816, "step": 50462 }, { "epoch": 0.877174990005041, "grad_norm": 1.208569708051513, "learning_rate": 3.904622882662606e-08, "loss": 0.1366, "step": 50463 }, { "epoch": 0.8771923725425438, "grad_norm": 1.1823172185753588, "learning_rate": 3.903532422348515e-08, "loss": 0.1239, "step": 50464 }, { "epoch": 0.8772097550800466, "grad_norm": 2.052880333987587, "learning_rate": 3.902442108139042e-08, "loss": 0.2582, "step": 50465 }, { "epoch": 0.8772271376175494, "grad_norm": 1.0592122076956803, "learning_rate": 3.901351940037645e-08, "loss": 0.1234, "step": 50466 }, { "epoch": 0.8772445201550523, "grad_norm": 1.0003645483903905, "learning_rate": 3.900261918047798e-08, "loss": 0.1017, "step": 50467 }, { "epoch": 0.8772619026925551, "grad_norm": 1.3298278747794927, "learning_rate": 3.899172042172938e-08, "loss": 0.1564, "step": 50468 }, { "epoch": 0.8772792852300579, "grad_norm": 1.1862767274462587, "learning_rate": 3.8980823124165286e-08, "loss": 0.1571, "step": 50469 }, { "epoch": 0.8772966677675607, "grad_norm": 2.30746966594202, "learning_rate": 3.8969927287820225e-08, "loss": 0.2418, "step": 50470 }, { "epoch": 0.8773140503050635, "grad_norm": 1.8992898312655662, "learning_rate": 3.895903291272867e-08, "loss": 0.2344, "step": 50471 }, { "epoch": 0.8773314328425663, "grad_norm": 1.3850085663140685, "learning_rate": 3.8948139998925265e-08, "loss": 0.1263, "step": 50472 }, { "epoch": 0.8773488153800691, "grad_norm": 0.948802771998138, "learning_rate": 3.8937248546444426e-08, "loss": 0.1256, "step": 50473 }, { "epoch": 0.877366197917572, "grad_norm": 0.7646238730619316, "learning_rate": 3.892635855532061e-08, "loss": 0.1611, "step": 50474 }, { "epoch": 0.8773835804550748, "grad_norm": 2.609837060456293, "learning_rate": 3.891547002558854e-08, "loss": 0.1944, "step": 50475 }, { "epoch": 0.8774009629925776, "grad_norm": 0.9273600603883418, "learning_rate": 3.8904582957282606e-08, "loss": 0.1663, "step": 50476 }, { "epoch": 0.8774183455300805, "grad_norm": 0.7010067464684334, "learning_rate": 3.889369735043746e-08, "loss": 0.2462, "step": 50477 }, { "epoch": 0.8774357280675833, "grad_norm": 1.4254273345078103, "learning_rate": 3.88828132050873e-08, "loss": 0.3371, "step": 50478 }, { "epoch": 0.8774531106050861, "grad_norm": 1.5473583378453881, "learning_rate": 3.887193052126686e-08, "loss": 0.1911, "step": 50479 }, { "epoch": 0.877470493142589, "grad_norm": 1.6022639935220262, "learning_rate": 3.886104929901057e-08, "loss": 0.2434, "step": 50480 }, { "epoch": 0.8774878756800918, "grad_norm": 1.3587739106820151, "learning_rate": 3.8850169538352964e-08, "loss": 0.1435, "step": 50481 }, { "epoch": 0.8775052582175946, "grad_norm": 1.569189613425546, "learning_rate": 3.883929123932839e-08, "loss": 0.1994, "step": 50482 }, { "epoch": 0.8775226407550974, "grad_norm": 1.948309914559725, "learning_rate": 3.8828414401971544e-08, "loss": 0.1578, "step": 50483 }, { "epoch": 0.8775400232926003, "grad_norm": 1.6271717653982734, "learning_rate": 3.881753902631674e-08, "loss": 0.2252, "step": 50484 }, { "epoch": 0.8775574058301031, "grad_norm": 1.3399258801907812, "learning_rate": 3.880666511239849e-08, "loss": 0.12, "step": 50485 }, { "epoch": 0.8775747883676059, "grad_norm": 0.8548371794007409, "learning_rate": 3.879579266025118e-08, "loss": 0.141, "step": 50486 }, { "epoch": 0.8775921709051088, "grad_norm": 1.475925740292547, "learning_rate": 3.8784921669909376e-08, "loss": 0.1942, "step": 50487 }, { "epoch": 0.8776095534426116, "grad_norm": 1.0671842427517166, "learning_rate": 3.8774052141407555e-08, "loss": 0.228, "step": 50488 }, { "epoch": 0.8776269359801144, "grad_norm": 1.7603649093874625, "learning_rate": 3.876318407478013e-08, "loss": 0.1863, "step": 50489 }, { "epoch": 0.8776443185176172, "grad_norm": 1.4678935743237176, "learning_rate": 3.8752317470061523e-08, "loss": 0.1166, "step": 50490 }, { "epoch": 0.87766170105512, "grad_norm": 2.7034548828108065, "learning_rate": 3.8741452327286153e-08, "loss": 0.1501, "step": 50491 }, { "epoch": 0.8776790835926228, "grad_norm": 3.1275055197093433, "learning_rate": 3.873058864648854e-08, "loss": 0.4103, "step": 50492 }, { "epoch": 0.8776964661301256, "grad_norm": 1.6675497164381439, "learning_rate": 3.871972642770305e-08, "loss": 0.1646, "step": 50493 }, { "epoch": 0.8777138486676285, "grad_norm": 0.8056220203270497, "learning_rate": 3.870886567096404e-08, "loss": 0.1181, "step": 50494 }, { "epoch": 0.8777312312051313, "grad_norm": 1.2333280398254096, "learning_rate": 3.8698006376306214e-08, "loss": 0.1589, "step": 50495 }, { "epoch": 0.8777486137426341, "grad_norm": 1.1834033824871675, "learning_rate": 3.86871485437637e-08, "loss": 0.1498, "step": 50496 }, { "epoch": 0.877765996280137, "grad_norm": 1.2462119324724907, "learning_rate": 3.867629217337093e-08, "loss": 0.1257, "step": 50497 }, { "epoch": 0.8777833788176398, "grad_norm": 1.4082821298126558, "learning_rate": 3.8665437265162525e-08, "loss": 0.2115, "step": 50498 }, { "epoch": 0.8778007613551426, "grad_norm": 1.9795328522541593, "learning_rate": 3.865458381917275e-08, "loss": 0.1662, "step": 50499 }, { "epoch": 0.8778181438926455, "grad_norm": 2.308365552854921, "learning_rate": 3.8643731835435957e-08, "loss": 0.1845, "step": 50500 }, { "epoch": 0.8778355264301483, "grad_norm": 2.0679636518388365, "learning_rate": 3.8632881313986674e-08, "loss": 0.2212, "step": 50501 }, { "epoch": 0.8778529089676511, "grad_norm": 1.2850503083480296, "learning_rate": 3.8622032254859214e-08, "loss": 0.1782, "step": 50502 }, { "epoch": 0.8778702915051539, "grad_norm": 1.5908232126163053, "learning_rate": 3.8611184658087935e-08, "loss": 0.1836, "step": 50503 }, { "epoch": 0.8778876740426568, "grad_norm": 2.1345354446656963, "learning_rate": 3.860033852370731e-08, "loss": 0.1882, "step": 50504 }, { "epoch": 0.8779050565801596, "grad_norm": 1.6396021592252679, "learning_rate": 3.8589493851751534e-08, "loss": 0.2012, "step": 50505 }, { "epoch": 0.8779224391176624, "grad_norm": 1.4184662313498566, "learning_rate": 3.857865064225519e-08, "loss": 0.2096, "step": 50506 }, { "epoch": 0.8779398216551653, "grad_norm": 4.060422215169089, "learning_rate": 3.856780889525257e-08, "loss": 0.2596, "step": 50507 }, { "epoch": 0.8779572041926681, "grad_norm": 2.079095668967958, "learning_rate": 3.8556968610778116e-08, "loss": 0.1731, "step": 50508 }, { "epoch": 0.8779745867301709, "grad_norm": 3.513486848626885, "learning_rate": 3.8546129788865955e-08, "loss": 0.2433, "step": 50509 }, { "epoch": 0.8779919692676736, "grad_norm": 1.580956334672954, "learning_rate": 3.853529242955067e-08, "loss": 0.2272, "step": 50510 }, { "epoch": 0.8780093518051765, "grad_norm": 0.9525587082083176, "learning_rate": 3.852445653286651e-08, "loss": 0.2204, "step": 50511 }, { "epoch": 0.8780267343426793, "grad_norm": 2.3172876680816556, "learning_rate": 3.851362209884784e-08, "loss": 0.1504, "step": 50512 }, { "epoch": 0.8780441168801821, "grad_norm": 1.4323688843199125, "learning_rate": 3.850278912752891e-08, "loss": 0.1748, "step": 50513 }, { "epoch": 0.878061499417685, "grad_norm": 1.2974413907432474, "learning_rate": 3.849195761894436e-08, "loss": 0.2109, "step": 50514 }, { "epoch": 0.8780788819551878, "grad_norm": 1.9388587725810453, "learning_rate": 3.848112757312816e-08, "loss": 0.1771, "step": 50515 }, { "epoch": 0.8780962644926906, "grad_norm": 1.2389537834684725, "learning_rate": 3.8470298990114833e-08, "loss": 0.182, "step": 50516 }, { "epoch": 0.8781136470301935, "grad_norm": 0.9045609632619891, "learning_rate": 3.845947186993853e-08, "loss": 0.179, "step": 50517 }, { "epoch": 0.8781310295676963, "grad_norm": 1.677806052395093, "learning_rate": 3.844864621263377e-08, "loss": 0.1633, "step": 50518 }, { "epoch": 0.8781484121051991, "grad_norm": 1.3911339270190204, "learning_rate": 3.843782201823475e-08, "loss": 0.2189, "step": 50519 }, { "epoch": 0.878165794642702, "grad_norm": 1.1428344122910172, "learning_rate": 3.8426999286775895e-08, "loss": 0.1933, "step": 50520 }, { "epoch": 0.8781831771802048, "grad_norm": 1.5164665612242574, "learning_rate": 3.8416178018291334e-08, "loss": 0.1774, "step": 50521 }, { "epoch": 0.8782005597177076, "grad_norm": 3.043846174440055, "learning_rate": 3.840535821281554e-08, "loss": 0.3916, "step": 50522 }, { "epoch": 0.8782179422552104, "grad_norm": 1.2225330095375528, "learning_rate": 3.839453987038266e-08, "loss": 0.1302, "step": 50523 }, { "epoch": 0.8782353247927133, "grad_norm": 1.6957285486570888, "learning_rate": 3.8383722991027045e-08, "loss": 0.2081, "step": 50524 }, { "epoch": 0.8782527073302161, "grad_norm": 1.987972608494279, "learning_rate": 3.8372907574782955e-08, "loss": 0.1712, "step": 50525 }, { "epoch": 0.8782700898677189, "grad_norm": 1.0744839062926774, "learning_rate": 3.8362093621684685e-08, "loss": 0.1497, "step": 50526 }, { "epoch": 0.8782874724052218, "grad_norm": 1.548665427072597, "learning_rate": 3.8351281131766666e-08, "loss": 0.2399, "step": 50527 }, { "epoch": 0.8783048549427246, "grad_norm": 1.7333816056535156, "learning_rate": 3.834047010506286e-08, "loss": 0.1524, "step": 50528 }, { "epoch": 0.8783222374802274, "grad_norm": 1.9197655230097301, "learning_rate": 3.8329660541607746e-08, "loss": 0.0982, "step": 50529 }, { "epoch": 0.8783396200177301, "grad_norm": 0.9125995200884219, "learning_rate": 3.831885244143557e-08, "loss": 0.2193, "step": 50530 }, { "epoch": 0.878357002555233, "grad_norm": 1.3902025165867626, "learning_rate": 3.830804580458052e-08, "loss": 0.1571, "step": 50531 }, { "epoch": 0.8783743850927358, "grad_norm": 2.0434733943648125, "learning_rate": 3.829724063107687e-08, "loss": 0.174, "step": 50532 }, { "epoch": 0.8783917676302386, "grad_norm": 1.1437391932005403, "learning_rate": 3.8286436920958895e-08, "loss": 0.1291, "step": 50533 }, { "epoch": 0.8784091501677415, "grad_norm": 1.5719649936840339, "learning_rate": 3.827563467426087e-08, "loss": 0.2026, "step": 50534 }, { "epoch": 0.8784265327052443, "grad_norm": 1.2883994966306023, "learning_rate": 3.826483389101692e-08, "loss": 0.1588, "step": 50535 }, { "epoch": 0.8784439152427471, "grad_norm": 0.8283980070143019, "learning_rate": 3.825403457126125e-08, "loss": 0.1013, "step": 50536 }, { "epoch": 0.87846129778025, "grad_norm": 1.4372686868960964, "learning_rate": 3.8243236715028325e-08, "loss": 0.1669, "step": 50537 }, { "epoch": 0.8784786803177528, "grad_norm": 2.8681953491683747, "learning_rate": 3.8232440322352175e-08, "loss": 0.2129, "step": 50538 }, { "epoch": 0.8784960628552556, "grad_norm": 2.010375120245326, "learning_rate": 3.822164539326711e-08, "loss": 0.1421, "step": 50539 }, { "epoch": 0.8785134453927584, "grad_norm": 1.2808842420680697, "learning_rate": 3.8210851927807266e-08, "loss": 0.0984, "step": 50540 }, { "epoch": 0.8785308279302613, "grad_norm": 1.2605619147450569, "learning_rate": 3.8200059926006944e-08, "loss": 0.1996, "step": 50541 }, { "epoch": 0.8785482104677641, "grad_norm": 2.068897000687443, "learning_rate": 3.8189269387900235e-08, "loss": 0.1811, "step": 50542 }, { "epoch": 0.8785655930052669, "grad_norm": 1.105451126983198, "learning_rate": 3.8178480313521444e-08, "loss": 0.1753, "step": 50543 }, { "epoch": 0.8785829755427698, "grad_norm": 1.2428126215309008, "learning_rate": 3.8167692702904656e-08, "loss": 0.1585, "step": 50544 }, { "epoch": 0.8786003580802726, "grad_norm": 1.130574776136437, "learning_rate": 3.815690655608417e-08, "loss": 0.1196, "step": 50545 }, { "epoch": 0.8786177406177754, "grad_norm": 2.3858676998616404, "learning_rate": 3.814612187309424e-08, "loss": 0.1648, "step": 50546 }, { "epoch": 0.8786351231552783, "grad_norm": 1.2135782084441387, "learning_rate": 3.8135338653968786e-08, "loss": 0.162, "step": 50547 }, { "epoch": 0.8786525056927811, "grad_norm": 1.0757869117529646, "learning_rate": 3.812455689874228e-08, "loss": 0.2814, "step": 50548 }, { "epoch": 0.8786698882302838, "grad_norm": 1.7630298766025432, "learning_rate": 3.8113776607448687e-08, "loss": 0.0966, "step": 50549 }, { "epoch": 0.8786872707677866, "grad_norm": 1.5680363442459697, "learning_rate": 3.810299778012227e-08, "loss": 0.2178, "step": 50550 }, { "epoch": 0.8787046533052895, "grad_norm": 1.940934924287582, "learning_rate": 3.809222041679716e-08, "loss": 0.1333, "step": 50551 }, { "epoch": 0.8787220358427923, "grad_norm": 1.8602752373914686, "learning_rate": 3.8081444517507555e-08, "loss": 0.155, "step": 50552 }, { "epoch": 0.8787394183802951, "grad_norm": 0.9941637481988331, "learning_rate": 3.8070670082287535e-08, "loss": 0.2473, "step": 50553 }, { "epoch": 0.878756800917798, "grad_norm": 1.378056815660488, "learning_rate": 3.8059897111171355e-08, "loss": 0.1816, "step": 50554 }, { "epoch": 0.8787741834553008, "grad_norm": 0.9629734804367913, "learning_rate": 3.804912560419299e-08, "loss": 0.1762, "step": 50555 }, { "epoch": 0.8787915659928036, "grad_norm": 1.726223073684819, "learning_rate": 3.803835556138679e-08, "loss": 0.1469, "step": 50556 }, { "epoch": 0.8788089485303064, "grad_norm": 1.635073561259615, "learning_rate": 3.8027586982786794e-08, "loss": 0.1418, "step": 50557 }, { "epoch": 0.8788263310678093, "grad_norm": 1.2204396497808196, "learning_rate": 3.801681986842714e-08, "loss": 0.1429, "step": 50558 }, { "epoch": 0.8788437136053121, "grad_norm": 1.0766765857477805, "learning_rate": 3.8006054218341907e-08, "loss": 0.1841, "step": 50559 }, { "epoch": 0.8788610961428149, "grad_norm": 2.1701011276735094, "learning_rate": 3.79952900325653e-08, "loss": 0.2506, "step": 50560 }, { "epoch": 0.8788784786803178, "grad_norm": 4.431030190769156, "learning_rate": 3.7984527311131385e-08, "loss": 0.2165, "step": 50561 }, { "epoch": 0.8788958612178206, "grad_norm": 1.325501339067616, "learning_rate": 3.7973766054074317e-08, "loss": 0.2606, "step": 50562 }, { "epoch": 0.8789132437553234, "grad_norm": 1.2874946972106478, "learning_rate": 3.7963006261428066e-08, "loss": 0.2524, "step": 50563 }, { "epoch": 0.8789306262928263, "grad_norm": 1.1175210084089051, "learning_rate": 3.795224793322688e-08, "loss": 0.1155, "step": 50564 }, { "epoch": 0.8789480088303291, "grad_norm": 1.6181616294025658, "learning_rate": 3.794149106950495e-08, "loss": 0.1583, "step": 50565 }, { "epoch": 0.8789653913678319, "grad_norm": 1.5845656104463042, "learning_rate": 3.7930735670296143e-08, "loss": 0.1557, "step": 50566 }, { "epoch": 0.8789827739053347, "grad_norm": 1.1912201465169192, "learning_rate": 3.79199817356346e-08, "loss": 0.2273, "step": 50567 }, { "epoch": 0.8790001564428376, "grad_norm": 1.7776500750283968, "learning_rate": 3.790922926555451e-08, "loss": 0.123, "step": 50568 }, { "epoch": 0.8790175389803403, "grad_norm": 1.3274598054705125, "learning_rate": 3.7898478260089903e-08, "loss": 0.1544, "step": 50569 }, { "epoch": 0.8790349215178431, "grad_norm": 2.0610731473078823, "learning_rate": 3.7887728719274816e-08, "loss": 0.2278, "step": 50570 }, { "epoch": 0.879052304055346, "grad_norm": 1.9698168958336433, "learning_rate": 3.787698064314338e-08, "loss": 0.1831, "step": 50571 }, { "epoch": 0.8790696865928488, "grad_norm": 1.172422871153688, "learning_rate": 3.7866234031729614e-08, "loss": 0.174, "step": 50572 }, { "epoch": 0.8790870691303516, "grad_norm": 2.2480885994756363, "learning_rate": 3.785548888506762e-08, "loss": 0.1309, "step": 50573 }, { "epoch": 0.8791044516678544, "grad_norm": 1.2483033021665944, "learning_rate": 3.784474520319147e-08, "loss": 0.2288, "step": 50574 }, { "epoch": 0.8791218342053573, "grad_norm": 1.8640282010499387, "learning_rate": 3.783400298613504e-08, "loss": 0.171, "step": 50575 }, { "epoch": 0.8791392167428601, "grad_norm": 2.457040016983406, "learning_rate": 3.782326223393262e-08, "loss": 0.1047, "step": 50576 }, { "epoch": 0.8791565992803629, "grad_norm": 1.7618469387136757, "learning_rate": 3.781252294661813e-08, "loss": 0.228, "step": 50577 }, { "epoch": 0.8791739818178658, "grad_norm": 1.2151392944763382, "learning_rate": 3.780178512422566e-08, "loss": 0.1804, "step": 50578 }, { "epoch": 0.8791913643553686, "grad_norm": 1.101472175343155, "learning_rate": 3.7791048766789234e-08, "loss": 0.2088, "step": 50579 }, { "epoch": 0.8792087468928714, "grad_norm": 0.9969139431089002, "learning_rate": 3.778031387434283e-08, "loss": 0.176, "step": 50580 }, { "epoch": 0.8792261294303743, "grad_norm": 1.3074819779574003, "learning_rate": 3.776958044692047e-08, "loss": 0.1559, "step": 50581 }, { "epoch": 0.8792435119678771, "grad_norm": 4.429605535143362, "learning_rate": 3.7758848484556294e-08, "loss": 0.2526, "step": 50582 }, { "epoch": 0.8792608945053799, "grad_norm": 1.1965851409421273, "learning_rate": 3.7748117987284056e-08, "loss": 0.0994, "step": 50583 }, { "epoch": 0.8792782770428827, "grad_norm": 0.9215026032718021, "learning_rate": 3.7737388955138174e-08, "loss": 0.1488, "step": 50584 }, { "epoch": 0.8792956595803856, "grad_norm": 2.246539169003588, "learning_rate": 3.772666138815234e-08, "loss": 0.1323, "step": 50585 }, { "epoch": 0.8793130421178884, "grad_norm": 1.2857453435833326, "learning_rate": 3.7715935286360526e-08, "loss": 0.1719, "step": 50586 }, { "epoch": 0.8793304246553912, "grad_norm": 1.7973821013379045, "learning_rate": 3.7705210649796925e-08, "loss": 0.1755, "step": 50587 }, { "epoch": 0.8793478071928941, "grad_norm": 1.002841476962359, "learning_rate": 3.769448747849546e-08, "loss": 0.1878, "step": 50588 }, { "epoch": 0.8793651897303968, "grad_norm": 2.0388059924919, "learning_rate": 3.768376577249011e-08, "loss": 0.2053, "step": 50589 }, { "epoch": 0.8793825722678996, "grad_norm": 1.3390859260221761, "learning_rate": 3.767304553181483e-08, "loss": 0.2142, "step": 50590 }, { "epoch": 0.8793999548054025, "grad_norm": 0.809638272960486, "learning_rate": 3.76623267565036e-08, "loss": 0.18, "step": 50591 }, { "epoch": 0.8794173373429053, "grad_norm": 1.5215108590712043, "learning_rate": 3.765160944659046e-08, "loss": 0.1673, "step": 50592 }, { "epoch": 0.8794347198804081, "grad_norm": 1.839419228155505, "learning_rate": 3.7640893602109316e-08, "loss": 0.2225, "step": 50593 }, { "epoch": 0.8794521024179109, "grad_norm": 2.1434463978113496, "learning_rate": 3.763017922309408e-08, "loss": 0.1466, "step": 50594 }, { "epoch": 0.8794694849554138, "grad_norm": 1.6750628594894195, "learning_rate": 3.761946630957885e-08, "loss": 0.14, "step": 50595 }, { "epoch": 0.8794868674929166, "grad_norm": 1.594065260207782, "learning_rate": 3.760875486159754e-08, "loss": 0.1899, "step": 50596 }, { "epoch": 0.8795042500304194, "grad_norm": 1.6032175813562721, "learning_rate": 3.759804487918411e-08, "loss": 0.2636, "step": 50597 }, { "epoch": 0.8795216325679223, "grad_norm": 1.2147276948194377, "learning_rate": 3.758733636237232e-08, "loss": 0.1357, "step": 50598 }, { "epoch": 0.8795390151054251, "grad_norm": 1.7601923956022447, "learning_rate": 3.7576629311196315e-08, "loss": 0.1768, "step": 50599 }, { "epoch": 0.8795563976429279, "grad_norm": 1.2580981921240406, "learning_rate": 3.756592372569001e-08, "loss": 0.1343, "step": 50600 }, { "epoch": 0.8795737801804308, "grad_norm": 0.961865559358192, "learning_rate": 3.7555219605887314e-08, "loss": 0.1315, "step": 50601 }, { "epoch": 0.8795911627179336, "grad_norm": 2.3956635264945843, "learning_rate": 3.7544516951822034e-08, "loss": 0.2738, "step": 50602 }, { "epoch": 0.8796085452554364, "grad_norm": 1.91467961412508, "learning_rate": 3.753381576352838e-08, "loss": 0.1713, "step": 50603 }, { "epoch": 0.8796259277929392, "grad_norm": 1.150856438049833, "learning_rate": 3.752311604103997e-08, "loss": 0.1903, "step": 50604 }, { "epoch": 0.8796433103304421, "grad_norm": 4.337397292851025, "learning_rate": 3.7512417784390847e-08, "loss": 0.2965, "step": 50605 }, { "epoch": 0.8796606928679449, "grad_norm": 1.243962640567142, "learning_rate": 3.7501720993614875e-08, "loss": 0.1556, "step": 50606 }, { "epoch": 0.8796780754054477, "grad_norm": 1.1558501380609383, "learning_rate": 3.749102566874607e-08, "loss": 0.1822, "step": 50607 }, { "epoch": 0.8796954579429506, "grad_norm": 1.3794051797122446, "learning_rate": 3.748033180981819e-08, "loss": 0.1657, "step": 50608 }, { "epoch": 0.8797128404804533, "grad_norm": 1.7652911839170236, "learning_rate": 3.7469639416865265e-08, "loss": 0.266, "step": 50609 }, { "epoch": 0.8797302230179561, "grad_norm": 1.7376806458047132, "learning_rate": 3.745894848992109e-08, "loss": 0.194, "step": 50610 }, { "epoch": 0.879747605555459, "grad_norm": 1.358125967237239, "learning_rate": 3.7448259029019546e-08, "loss": 0.2084, "step": 50611 }, { "epoch": 0.8797649880929618, "grad_norm": 0.7555932391141862, "learning_rate": 3.7437571034194534e-08, "loss": 0.218, "step": 50612 }, { "epoch": 0.8797823706304646, "grad_norm": 1.1833284429435857, "learning_rate": 3.7426884505479985e-08, "loss": 0.2002, "step": 50613 }, { "epoch": 0.8797997531679674, "grad_norm": 1.8955606372968186, "learning_rate": 3.7416199442909633e-08, "loss": 0.2942, "step": 50614 }, { "epoch": 0.8798171357054703, "grad_norm": 1.7762816048997587, "learning_rate": 3.740551584651758e-08, "loss": 0.1363, "step": 50615 }, { "epoch": 0.8798345182429731, "grad_norm": 1.3744719455883392, "learning_rate": 3.7394833716337505e-08, "loss": 0.2575, "step": 50616 }, { "epoch": 0.8798519007804759, "grad_norm": 3.2396808998296813, "learning_rate": 3.738415305240317e-08, "loss": 0.213, "step": 50617 }, { "epoch": 0.8798692833179788, "grad_norm": 1.618678883228481, "learning_rate": 3.737347385474865e-08, "loss": 0.3321, "step": 50618 }, { "epoch": 0.8798866658554816, "grad_norm": 1.718795556164315, "learning_rate": 3.73627961234077e-08, "loss": 0.134, "step": 50619 }, { "epoch": 0.8799040483929844, "grad_norm": 1.7524432362216116, "learning_rate": 3.735211985841419e-08, "loss": 0.2332, "step": 50620 }, { "epoch": 0.8799214309304872, "grad_norm": 2.3926739494277114, "learning_rate": 3.7341445059801965e-08, "loss": 0.1078, "step": 50621 }, { "epoch": 0.8799388134679901, "grad_norm": 1.8250234708781243, "learning_rate": 3.733077172760479e-08, "loss": 0.1731, "step": 50622 }, { "epoch": 0.8799561960054929, "grad_norm": 1.5338267100744054, "learning_rate": 3.732009986185652e-08, "loss": 0.1165, "step": 50623 }, { "epoch": 0.8799735785429957, "grad_norm": 0.7769538769285715, "learning_rate": 3.7309429462591025e-08, "loss": 0.2265, "step": 50624 }, { "epoch": 0.8799909610804986, "grad_norm": 1.6655057091738683, "learning_rate": 3.729876052984204e-08, "loss": 0.1632, "step": 50625 }, { "epoch": 0.8800083436180014, "grad_norm": 0.7986373579472438, "learning_rate": 3.72880930636435e-08, "loss": 0.2263, "step": 50626 }, { "epoch": 0.8800257261555042, "grad_norm": 1.2398315029499374, "learning_rate": 3.727742706402909e-08, "loss": 0.2175, "step": 50627 }, { "epoch": 0.8800431086930071, "grad_norm": 2.1086967626841586, "learning_rate": 3.7266762531032836e-08, "loss": 0.1937, "step": 50628 }, { "epoch": 0.8800604912305098, "grad_norm": 1.2413041087597079, "learning_rate": 3.725609946468822e-08, "loss": 0.1595, "step": 50629 }, { "epoch": 0.8800778737680126, "grad_norm": 1.1060477373581412, "learning_rate": 3.7245437865029205e-08, "loss": 0.2056, "step": 50630 }, { "epoch": 0.8800952563055154, "grad_norm": 2.712976459836542, "learning_rate": 3.7234777732089664e-08, "loss": 0.2372, "step": 50631 }, { "epoch": 0.8801126388430183, "grad_norm": 1.3250026974394358, "learning_rate": 3.7224119065903284e-08, "loss": 0.172, "step": 50632 }, { "epoch": 0.8801300213805211, "grad_norm": 1.7232331402477188, "learning_rate": 3.721346186650376e-08, "loss": 0.2601, "step": 50633 }, { "epoch": 0.8801474039180239, "grad_norm": 2.036112406574486, "learning_rate": 3.7202806133925124e-08, "loss": 0.1763, "step": 50634 }, { "epoch": 0.8801647864555268, "grad_norm": 0.8093547137240942, "learning_rate": 3.719215186820096e-08, "loss": 0.1571, "step": 50635 }, { "epoch": 0.8801821689930296, "grad_norm": 1.83855138720997, "learning_rate": 3.718149906936502e-08, "loss": 0.304, "step": 50636 }, { "epoch": 0.8801995515305324, "grad_norm": 1.0045551244252464, "learning_rate": 3.717084773745111e-08, "loss": 0.1523, "step": 50637 }, { "epoch": 0.8802169340680353, "grad_norm": 1.623600967370826, "learning_rate": 3.716019787249308e-08, "loss": 0.2218, "step": 50638 }, { "epoch": 0.8802343166055381, "grad_norm": 1.562205214845385, "learning_rate": 3.714954947452459e-08, "loss": 0.217, "step": 50639 }, { "epoch": 0.8802516991430409, "grad_norm": 1.3981647824699903, "learning_rate": 3.713890254357938e-08, "loss": 0.1658, "step": 50640 }, { "epoch": 0.8802690816805437, "grad_norm": 1.2591115753387059, "learning_rate": 3.712825707969131e-08, "loss": 0.1855, "step": 50641 }, { "epoch": 0.8802864642180466, "grad_norm": 1.106087897501612, "learning_rate": 3.711761308289396e-08, "loss": 0.129, "step": 50642 }, { "epoch": 0.8803038467555494, "grad_norm": 1.2937919161190405, "learning_rate": 3.7106970553221206e-08, "loss": 0.1165, "step": 50643 }, { "epoch": 0.8803212292930522, "grad_norm": 2.618380325697485, "learning_rate": 3.709632949070657e-08, "loss": 0.3541, "step": 50644 }, { "epoch": 0.8803386118305551, "grad_norm": 1.3492559072077226, "learning_rate": 3.708568989538408e-08, "loss": 0.2247, "step": 50645 }, { "epoch": 0.8803559943680579, "grad_norm": 0.8162243318627791, "learning_rate": 3.7075051767287256e-08, "loss": 0.1204, "step": 50646 }, { "epoch": 0.8803733769055607, "grad_norm": 1.9571459663171937, "learning_rate": 3.7064415106449976e-08, "loss": 0.1793, "step": 50647 }, { "epoch": 0.8803907594430636, "grad_norm": 0.6921808740043898, "learning_rate": 3.70537799129057e-08, "loss": 0.2088, "step": 50648 }, { "epoch": 0.8804081419805663, "grad_norm": 1.7257082720491286, "learning_rate": 3.70431461866883e-08, "loss": 0.1943, "step": 50649 }, { "epoch": 0.8804255245180691, "grad_norm": 1.1894896973195348, "learning_rate": 3.7032513927831524e-08, "loss": 0.1544, "step": 50650 }, { "epoch": 0.8804429070555719, "grad_norm": 3.2331309913494795, "learning_rate": 3.7021883136369e-08, "loss": 0.2085, "step": 50651 }, { "epoch": 0.8804602895930748, "grad_norm": 1.6023788996127262, "learning_rate": 3.701125381233433e-08, "loss": 0.2386, "step": 50652 }, { "epoch": 0.8804776721305776, "grad_norm": 1.3798340076554287, "learning_rate": 3.7000625955761476e-08, "loss": 0.1511, "step": 50653 }, { "epoch": 0.8804950546680804, "grad_norm": 1.361099591942165, "learning_rate": 3.698999956668386e-08, "loss": 0.1528, "step": 50654 }, { "epoch": 0.8805124372055833, "grad_norm": 1.0550751478658764, "learning_rate": 3.697937464513529e-08, "loss": 0.104, "step": 50655 }, { "epoch": 0.8805298197430861, "grad_norm": 1.1527391193367056, "learning_rate": 3.6968751191149285e-08, "loss": 0.2437, "step": 50656 }, { "epoch": 0.8805472022805889, "grad_norm": 1.499566725961823, "learning_rate": 3.695812920475977e-08, "loss": 0.2167, "step": 50657 }, { "epoch": 0.8805645848180917, "grad_norm": 4.126823750143891, "learning_rate": 3.6947508686000215e-08, "loss": 0.2316, "step": 50658 }, { "epoch": 0.8805819673555946, "grad_norm": 1.5939915305276324, "learning_rate": 3.693688963490438e-08, "loss": 0.1206, "step": 50659 }, { "epoch": 0.8805993498930974, "grad_norm": 2.034259865613405, "learning_rate": 3.692627205150589e-08, "loss": 0.1882, "step": 50660 }, { "epoch": 0.8806167324306002, "grad_norm": 1.201964779328216, "learning_rate": 3.691565593583845e-08, "loss": 0.1429, "step": 50661 }, { "epoch": 0.8806341149681031, "grad_norm": 1.4432632323943912, "learning_rate": 3.690504128793559e-08, "loss": 0.2379, "step": 50662 }, { "epoch": 0.8806514975056059, "grad_norm": 1.1048659749314975, "learning_rate": 3.6894428107831056e-08, "loss": 0.215, "step": 50663 }, { "epoch": 0.8806688800431087, "grad_norm": 0.893892199053951, "learning_rate": 3.688381639555832e-08, "loss": 0.1266, "step": 50664 }, { "epoch": 0.8806862625806116, "grad_norm": 0.8892663847086254, "learning_rate": 3.6873206151151304e-08, "loss": 0.2035, "step": 50665 }, { "epoch": 0.8807036451181144, "grad_norm": 0.9839631386791629, "learning_rate": 3.686259737464353e-08, "loss": 0.2579, "step": 50666 }, { "epoch": 0.8807210276556172, "grad_norm": 1.1828356472761257, "learning_rate": 3.685199006606837e-08, "loss": 0.2094, "step": 50667 }, { "epoch": 0.88073841019312, "grad_norm": 0.8910897077725163, "learning_rate": 3.684138422545979e-08, "loss": 0.1145, "step": 50668 }, { "epoch": 0.8807557927306228, "grad_norm": 1.5011498679260293, "learning_rate": 3.683077985285121e-08, "loss": 0.215, "step": 50669 }, { "epoch": 0.8807731752681256, "grad_norm": 1.6446862370868918, "learning_rate": 3.6820176948276385e-08, "loss": 0.2706, "step": 50670 }, { "epoch": 0.8807905578056284, "grad_norm": 1.1046027727624432, "learning_rate": 3.680957551176872e-08, "loss": 0.1784, "step": 50671 }, { "epoch": 0.8808079403431313, "grad_norm": 1.226876829610465, "learning_rate": 3.679897554336203e-08, "loss": 0.1401, "step": 50672 }, { "epoch": 0.8808253228806341, "grad_norm": 1.2754152806966879, "learning_rate": 3.6788377043089733e-08, "loss": 0.1658, "step": 50673 }, { "epoch": 0.8808427054181369, "grad_norm": 0.8170107511649842, "learning_rate": 3.6777780010985524e-08, "loss": 0.2676, "step": 50674 }, { "epoch": 0.8808600879556397, "grad_norm": 1.866327234170298, "learning_rate": 3.6767184447082876e-08, "loss": 0.238, "step": 50675 }, { "epoch": 0.8808774704931426, "grad_norm": 1.4898993556076745, "learning_rate": 3.67565903514156e-08, "loss": 0.191, "step": 50676 }, { "epoch": 0.8808948530306454, "grad_norm": 1.0888920562859157, "learning_rate": 3.674599772401704e-08, "loss": 0.1703, "step": 50677 }, { "epoch": 0.8809122355681482, "grad_norm": 2.7727785655811266, "learning_rate": 3.673540656492097e-08, "loss": 0.299, "step": 50678 }, { "epoch": 0.8809296181056511, "grad_norm": 1.0383020768690023, "learning_rate": 3.672481687416079e-08, "loss": 0.1617, "step": 50679 }, { "epoch": 0.8809470006431539, "grad_norm": 2.594793359110564, "learning_rate": 3.6714228651770155e-08, "loss": 0.3095, "step": 50680 }, { "epoch": 0.8809643831806567, "grad_norm": 1.3571239778999709, "learning_rate": 3.670364189778258e-08, "loss": 0.2031, "step": 50681 }, { "epoch": 0.8809817657181596, "grad_norm": 1.1892943352330585, "learning_rate": 3.66930566122316e-08, "loss": 0.1716, "step": 50682 }, { "epoch": 0.8809991482556624, "grad_norm": 1.4865393043302533, "learning_rate": 3.66824727951508e-08, "loss": 0.2027, "step": 50683 }, { "epoch": 0.8810165307931652, "grad_norm": 1.461691097739137, "learning_rate": 3.667189044657376e-08, "loss": 0.1737, "step": 50684 }, { "epoch": 0.881033913330668, "grad_norm": 1.3572190854546207, "learning_rate": 3.6661309566534125e-08, "loss": 0.1775, "step": 50685 }, { "epoch": 0.8810512958681709, "grad_norm": 1.5074257217742453, "learning_rate": 3.6650730155065145e-08, "loss": 0.1388, "step": 50686 }, { "epoch": 0.8810686784056737, "grad_norm": 0.8589520574346386, "learning_rate": 3.664015221220046e-08, "loss": 0.1554, "step": 50687 }, { "epoch": 0.8810860609431764, "grad_norm": 1.656485083438418, "learning_rate": 3.662957573797376e-08, "loss": 0.1864, "step": 50688 }, { "epoch": 0.8811034434806793, "grad_norm": 2.6129577209803903, "learning_rate": 3.6619000732418416e-08, "loss": 0.2237, "step": 50689 }, { "epoch": 0.8811208260181821, "grad_norm": 0.9946192593693206, "learning_rate": 3.660842719556795e-08, "loss": 0.2362, "step": 50690 }, { "epoch": 0.8811382085556849, "grad_norm": 1.298665729885775, "learning_rate": 3.6597855127455944e-08, "loss": 0.1451, "step": 50691 }, { "epoch": 0.8811555910931878, "grad_norm": 2.7829884385799493, "learning_rate": 3.658728452811588e-08, "loss": 0.2618, "step": 50692 }, { "epoch": 0.8811729736306906, "grad_norm": 1.8789368731610092, "learning_rate": 3.657671539758123e-08, "loss": 0.3274, "step": 50693 }, { "epoch": 0.8811903561681934, "grad_norm": 2.1136345960194154, "learning_rate": 3.656614773588551e-08, "loss": 0.1691, "step": 50694 }, { "epoch": 0.8812077387056962, "grad_norm": 2.0066611739306035, "learning_rate": 3.655558154306215e-08, "loss": 0.2309, "step": 50695 }, { "epoch": 0.8812251212431991, "grad_norm": 1.6156726015582321, "learning_rate": 3.654501681914474e-08, "loss": 0.1742, "step": 50696 }, { "epoch": 0.8812425037807019, "grad_norm": 2.698279761180493, "learning_rate": 3.653445356416679e-08, "loss": 0.3446, "step": 50697 }, { "epoch": 0.8812598863182047, "grad_norm": 1.8087413948595168, "learning_rate": 3.652389177816173e-08, "loss": 0.1678, "step": 50698 }, { "epoch": 0.8812772688557076, "grad_norm": 0.6603713376570223, "learning_rate": 3.651333146116298e-08, "loss": 0.1122, "step": 50699 }, { "epoch": 0.8812946513932104, "grad_norm": 1.267878653715072, "learning_rate": 3.6502772613204055e-08, "loss": 0.2832, "step": 50700 }, { "epoch": 0.8813120339307132, "grad_norm": 0.7231734157451327, "learning_rate": 3.64922152343185e-08, "loss": 0.1588, "step": 50701 }, { "epoch": 0.881329416468216, "grad_norm": 1.0855422534751107, "learning_rate": 3.648165932453967e-08, "loss": 0.1871, "step": 50702 }, { "epoch": 0.8813467990057189, "grad_norm": 0.7918740240554903, "learning_rate": 3.6471104883900926e-08, "loss": 0.1906, "step": 50703 }, { "epoch": 0.8813641815432217, "grad_norm": 0.7725901985115247, "learning_rate": 3.646055191243608e-08, "loss": 0.1769, "step": 50704 }, { "epoch": 0.8813815640807245, "grad_norm": 1.0035463918949263, "learning_rate": 3.645000041017826e-08, "loss": 0.2, "step": 50705 }, { "epoch": 0.8813989466182274, "grad_norm": 1.396775640017363, "learning_rate": 3.64394503771609e-08, "loss": 0.1615, "step": 50706 }, { "epoch": 0.8814163291557302, "grad_norm": 1.551653324949368, "learning_rate": 3.6428901813417624e-08, "loss": 0.1401, "step": 50707 }, { "epoch": 0.8814337116932329, "grad_norm": 1.9807047295848024, "learning_rate": 3.6418354718981745e-08, "loss": 0.2492, "step": 50708 }, { "epoch": 0.8814510942307358, "grad_norm": 1.8735697666347961, "learning_rate": 3.64078090938868e-08, "loss": 0.3239, "step": 50709 }, { "epoch": 0.8814684767682386, "grad_norm": 2.800307990273305, "learning_rate": 3.639726493816614e-08, "loss": 0.2487, "step": 50710 }, { "epoch": 0.8814858593057414, "grad_norm": 1.037982853118131, "learning_rate": 3.638672225185313e-08, "loss": 0.1961, "step": 50711 }, { "epoch": 0.8815032418432442, "grad_norm": 1.1983698224576846, "learning_rate": 3.63761810349813e-08, "loss": 0.1501, "step": 50712 }, { "epoch": 0.8815206243807471, "grad_norm": 2.133384252357717, "learning_rate": 3.636564128758396e-08, "loss": 0.1924, "step": 50713 }, { "epoch": 0.8815380069182499, "grad_norm": 1.7408374489750407, "learning_rate": 3.6355103009694524e-08, "loss": 0.3215, "step": 50714 }, { "epoch": 0.8815553894557527, "grad_norm": 1.0565435045500158, "learning_rate": 3.634456620134646e-08, "loss": 0.2904, "step": 50715 }, { "epoch": 0.8815727719932556, "grad_norm": 1.5356654696162162, "learning_rate": 3.633403086257319e-08, "loss": 0.146, "step": 50716 }, { "epoch": 0.8815901545307584, "grad_norm": 1.2076510688988684, "learning_rate": 3.632349699340809e-08, "loss": 0.167, "step": 50717 }, { "epoch": 0.8816075370682612, "grad_norm": 0.8759704084795705, "learning_rate": 3.631296459388433e-08, "loss": 0.1967, "step": 50718 }, { "epoch": 0.8816249196057641, "grad_norm": 1.5112956937586617, "learning_rate": 3.630243366403563e-08, "loss": 0.1643, "step": 50719 }, { "epoch": 0.8816423021432669, "grad_norm": 2.01505191952279, "learning_rate": 3.6291904203895165e-08, "loss": 0.2937, "step": 50720 }, { "epoch": 0.8816596846807697, "grad_norm": 1.288166146786927, "learning_rate": 3.628137621349636e-08, "loss": 0.2338, "step": 50721 }, { "epoch": 0.8816770672182725, "grad_norm": 1.7123549039631385, "learning_rate": 3.627084969287248e-08, "loss": 0.1761, "step": 50722 }, { "epoch": 0.8816944497557754, "grad_norm": 1.399751569985243, "learning_rate": 3.626032464205714e-08, "loss": 0.3788, "step": 50723 }, { "epoch": 0.8817118322932782, "grad_norm": 1.9119797549416204, "learning_rate": 3.624980106108349e-08, "loss": 0.1999, "step": 50724 }, { "epoch": 0.881729214830781, "grad_norm": 1.169133897719921, "learning_rate": 3.6239278949984965e-08, "loss": 0.1931, "step": 50725 }, { "epoch": 0.8817465973682839, "grad_norm": 1.9246398545280266, "learning_rate": 3.622875830879474e-08, "loss": 0.2006, "step": 50726 }, { "epoch": 0.8817639799057867, "grad_norm": 1.630810343252263, "learning_rate": 3.621823913754646e-08, "loss": 0.1688, "step": 50727 }, { "epoch": 0.8817813624432894, "grad_norm": 1.286654323077405, "learning_rate": 3.620772143627326e-08, "loss": 0.1786, "step": 50728 }, { "epoch": 0.8817987449807922, "grad_norm": 1.136641527544694, "learning_rate": 3.6197205205008574e-08, "loss": 0.2121, "step": 50729 }, { "epoch": 0.8818161275182951, "grad_norm": 1.9488621324284716, "learning_rate": 3.618669044378569e-08, "loss": 0.139, "step": 50730 }, { "epoch": 0.8818335100557979, "grad_norm": 2.063251217760053, "learning_rate": 3.617617715263793e-08, "loss": 0.1512, "step": 50731 }, { "epoch": 0.8818508925933007, "grad_norm": 0.8629243257826816, "learning_rate": 3.616566533159865e-08, "loss": 0.1084, "step": 50732 }, { "epoch": 0.8818682751308036, "grad_norm": 1.7963819169446849, "learning_rate": 3.6155154980701044e-08, "loss": 0.1543, "step": 50733 }, { "epoch": 0.8818856576683064, "grad_norm": 2.901833626814241, "learning_rate": 3.6144646099978584e-08, "loss": 0.1498, "step": 50734 }, { "epoch": 0.8819030402058092, "grad_norm": 0.9392451975821148, "learning_rate": 3.613413868946463e-08, "loss": 0.2358, "step": 50735 }, { "epoch": 0.8819204227433121, "grad_norm": 1.0200734035020416, "learning_rate": 3.612363274919228e-08, "loss": 0.1723, "step": 50736 }, { "epoch": 0.8819378052808149, "grad_norm": 1.4297695943929212, "learning_rate": 3.6113128279194826e-08, "loss": 0.1679, "step": 50737 }, { "epoch": 0.8819551878183177, "grad_norm": 2.2320046897808705, "learning_rate": 3.6102625279505803e-08, "loss": 0.1297, "step": 50738 }, { "epoch": 0.8819725703558206, "grad_norm": 1.3946649753908456, "learning_rate": 3.60921237501583e-08, "loss": 0.1879, "step": 50739 }, { "epoch": 0.8819899528933234, "grad_norm": 1.0574525341304652, "learning_rate": 3.608162369118573e-08, "loss": 0.1563, "step": 50740 }, { "epoch": 0.8820073354308262, "grad_norm": 1.5853808706086534, "learning_rate": 3.607112510262117e-08, "loss": 0.2045, "step": 50741 }, { "epoch": 0.882024717968329, "grad_norm": 1.0710940804081324, "learning_rate": 3.606062798449816e-08, "loss": 0.1716, "step": 50742 }, { "epoch": 0.8820421005058319, "grad_norm": 1.4474053619581981, "learning_rate": 3.6050132336849834e-08, "loss": 0.2435, "step": 50743 }, { "epoch": 0.8820594830433347, "grad_norm": 1.3241739888575497, "learning_rate": 3.603963815970945e-08, "loss": 0.3248, "step": 50744 }, { "epoch": 0.8820768655808375, "grad_norm": 1.0425611285238594, "learning_rate": 3.60291454531102e-08, "loss": 0.1385, "step": 50745 }, { "epoch": 0.8820942481183404, "grad_norm": 1.3716178979501792, "learning_rate": 3.60186542170855e-08, "loss": 0.1902, "step": 50746 }, { "epoch": 0.8821116306558432, "grad_norm": 2.0971650811825646, "learning_rate": 3.6008164451668547e-08, "loss": 0.199, "step": 50747 }, { "epoch": 0.8821290131933459, "grad_norm": 1.1179573188682856, "learning_rate": 3.599767615689253e-08, "loss": 0.3798, "step": 50748 }, { "epoch": 0.8821463957308487, "grad_norm": 2.27375315193054, "learning_rate": 3.598718933279077e-08, "loss": 0.2146, "step": 50749 }, { "epoch": 0.8821637782683516, "grad_norm": 1.7128803101514503, "learning_rate": 3.597670397939645e-08, "loss": 0.2092, "step": 50750 }, { "epoch": 0.8821811608058544, "grad_norm": 2.3384240149101356, "learning_rate": 3.5966220096742885e-08, "loss": 0.232, "step": 50751 }, { "epoch": 0.8821985433433572, "grad_norm": 1.5754378708475487, "learning_rate": 3.595573768486315e-08, "loss": 0.265, "step": 50752 }, { "epoch": 0.8822159258808601, "grad_norm": 1.5174942013189012, "learning_rate": 3.594525674379057e-08, "loss": 0.2079, "step": 50753 }, { "epoch": 0.8822333084183629, "grad_norm": 1.8093474226252975, "learning_rate": 3.5934777273558484e-08, "loss": 0.3234, "step": 50754 }, { "epoch": 0.8822506909558657, "grad_norm": 1.8052265236457836, "learning_rate": 3.5924299274199875e-08, "loss": 0.2344, "step": 50755 }, { "epoch": 0.8822680734933686, "grad_norm": 1.361784159431193, "learning_rate": 3.5913822745748e-08, "loss": 0.1857, "step": 50756 }, { "epoch": 0.8822854560308714, "grad_norm": 1.8326829704000636, "learning_rate": 3.590334768823616e-08, "loss": 0.1048, "step": 50757 }, { "epoch": 0.8823028385683742, "grad_norm": 1.6011515594421335, "learning_rate": 3.589287410169756e-08, "loss": 0.2167, "step": 50758 }, { "epoch": 0.882320221105877, "grad_norm": 1.6334923518199869, "learning_rate": 3.588240198616538e-08, "loss": 0.2015, "step": 50759 }, { "epoch": 0.8823376036433799, "grad_norm": 1.5534977623965398, "learning_rate": 3.587193134167277e-08, "loss": 0.2512, "step": 50760 }, { "epoch": 0.8823549861808827, "grad_norm": 1.9569912436120598, "learning_rate": 3.586146216825292e-08, "loss": 0.1411, "step": 50761 }, { "epoch": 0.8823723687183855, "grad_norm": 1.0110862828515663, "learning_rate": 3.585099446593903e-08, "loss": 0.3247, "step": 50762 }, { "epoch": 0.8823897512558884, "grad_norm": 1.344042285406357, "learning_rate": 3.5840528234764245e-08, "loss": 0.1694, "step": 50763 }, { "epoch": 0.8824071337933912, "grad_norm": 2.1611731689857905, "learning_rate": 3.583006347476175e-08, "loss": 0.2223, "step": 50764 }, { "epoch": 0.882424516330894, "grad_norm": 1.4970979288201391, "learning_rate": 3.5819600185964804e-08, "loss": 0.1661, "step": 50765 }, { "epoch": 0.8824418988683969, "grad_norm": 2.19440114197288, "learning_rate": 3.580913836840643e-08, "loss": 0.1753, "step": 50766 }, { "epoch": 0.8824592814058997, "grad_norm": 1.0772229712411456, "learning_rate": 3.579867802212e-08, "loss": 0.2564, "step": 50767 }, { "epoch": 0.8824766639434024, "grad_norm": 1.1647137547923458, "learning_rate": 3.578821914713837e-08, "loss": 0.1593, "step": 50768 }, { "epoch": 0.8824940464809052, "grad_norm": 1.0069571126486108, "learning_rate": 3.5777761743494893e-08, "loss": 0.1531, "step": 50769 }, { "epoch": 0.8825114290184081, "grad_norm": 1.9843645112648045, "learning_rate": 3.576730581122267e-08, "loss": 0.2468, "step": 50770 }, { "epoch": 0.8825288115559109, "grad_norm": 2.2174538153237893, "learning_rate": 3.575685135035483e-08, "loss": 0.1367, "step": 50771 }, { "epoch": 0.8825461940934137, "grad_norm": 2.1159133740504377, "learning_rate": 3.5746398360924414e-08, "loss": 0.1946, "step": 50772 }, { "epoch": 0.8825635766309166, "grad_norm": 0.7594391295726967, "learning_rate": 3.573594684296488e-08, "loss": 0.0936, "step": 50773 }, { "epoch": 0.8825809591684194, "grad_norm": 1.611753304371949, "learning_rate": 3.572549679650899e-08, "loss": 0.1481, "step": 50774 }, { "epoch": 0.8825983417059222, "grad_norm": 1.1583414772257243, "learning_rate": 3.571504822159005e-08, "loss": 0.233, "step": 50775 }, { "epoch": 0.882615724243425, "grad_norm": 1.3297290448497794, "learning_rate": 3.570460111824103e-08, "loss": 0.1461, "step": 50776 }, { "epoch": 0.8826331067809279, "grad_norm": 1.9637514593994274, "learning_rate": 3.5694155486495234e-08, "loss": 0.2266, "step": 50777 }, { "epoch": 0.8826504893184307, "grad_norm": 1.8352752528473013, "learning_rate": 3.5683711326385645e-08, "loss": 0.2617, "step": 50778 }, { "epoch": 0.8826678718559335, "grad_norm": 2.1011893919735933, "learning_rate": 3.5673268637945456e-08, "loss": 0.1903, "step": 50779 }, { "epoch": 0.8826852543934364, "grad_norm": 0.7666964380016468, "learning_rate": 3.566282742120769e-08, "loss": 0.2252, "step": 50780 }, { "epoch": 0.8827026369309392, "grad_norm": 1.0824650995817449, "learning_rate": 3.565238767620543e-08, "loss": 0.1391, "step": 50781 }, { "epoch": 0.882720019468442, "grad_norm": 0.9800372632154261, "learning_rate": 3.5641949402971825e-08, "loss": 0.1265, "step": 50782 }, { "epoch": 0.8827374020059449, "grad_norm": 1.438074958357168, "learning_rate": 3.5631512601539904e-08, "loss": 0.184, "step": 50783 }, { "epoch": 0.8827547845434477, "grad_norm": 1.5796554626831245, "learning_rate": 3.5621077271942747e-08, "loss": 0.1966, "step": 50784 }, { "epoch": 0.8827721670809505, "grad_norm": 1.0950871206695987, "learning_rate": 3.561064341421349e-08, "loss": 0.1504, "step": 50785 }, { "epoch": 0.8827895496184534, "grad_norm": 1.4868766589364912, "learning_rate": 3.5600211028385284e-08, "loss": 0.1378, "step": 50786 }, { "epoch": 0.8828069321559562, "grad_norm": 1.0825951522765294, "learning_rate": 3.558978011449093e-08, "loss": 0.1929, "step": 50787 }, { "epoch": 0.8828243146934589, "grad_norm": 1.3931401485896502, "learning_rate": 3.557935067256368e-08, "loss": 0.1969, "step": 50788 }, { "epoch": 0.8828416972309617, "grad_norm": 1.351315258776136, "learning_rate": 3.556892270263656e-08, "loss": 0.1595, "step": 50789 }, { "epoch": 0.8828590797684646, "grad_norm": 2.0610953205940734, "learning_rate": 3.5558496204742606e-08, "loss": 0.1772, "step": 50790 }, { "epoch": 0.8828764623059674, "grad_norm": 1.5670171225634368, "learning_rate": 3.554807117891489e-08, "loss": 0.146, "step": 50791 }, { "epoch": 0.8828938448434702, "grad_norm": 1.657609041308623, "learning_rate": 3.553764762518646e-08, "loss": 0.2189, "step": 50792 }, { "epoch": 0.882911227380973, "grad_norm": 1.8447059082762425, "learning_rate": 3.5527225543590276e-08, "loss": 0.1349, "step": 50793 }, { "epoch": 0.8829286099184759, "grad_norm": 1.0159979242740975, "learning_rate": 3.551680493415948e-08, "loss": 0.1766, "step": 50794 }, { "epoch": 0.8829459924559787, "grad_norm": 1.2313857842679279, "learning_rate": 3.550638579692694e-08, "loss": 0.098, "step": 50795 }, { "epoch": 0.8829633749934815, "grad_norm": 1.1467759626889222, "learning_rate": 3.5495968131925844e-08, "loss": 0.109, "step": 50796 }, { "epoch": 0.8829807575309844, "grad_norm": 0.9989720534516643, "learning_rate": 3.5485551939189176e-08, "loss": 0.1264, "step": 50797 }, { "epoch": 0.8829981400684872, "grad_norm": 1.7084243166000874, "learning_rate": 3.547513721874995e-08, "loss": 0.1696, "step": 50798 }, { "epoch": 0.88301552260599, "grad_norm": 1.2986760271856679, "learning_rate": 3.5464723970641095e-08, "loss": 0.1988, "step": 50799 }, { "epoch": 0.8830329051434929, "grad_norm": 1.2485971156373978, "learning_rate": 3.545431219489575e-08, "loss": 0.144, "step": 50800 }, { "epoch": 0.8830502876809957, "grad_norm": 2.997143477339859, "learning_rate": 3.544390189154678e-08, "loss": 0.1274, "step": 50801 }, { "epoch": 0.8830676702184985, "grad_norm": 3.333351375452818, "learning_rate": 3.543349306062726e-08, "loss": 0.1967, "step": 50802 }, { "epoch": 0.8830850527560014, "grad_norm": 1.352875396105456, "learning_rate": 3.542308570217012e-08, "loss": 0.2128, "step": 50803 }, { "epoch": 0.8831024352935042, "grad_norm": 0.9078839530660998, "learning_rate": 3.5412679816208433e-08, "loss": 0.1807, "step": 50804 }, { "epoch": 0.883119817831007, "grad_norm": 1.2735457297435662, "learning_rate": 3.5402275402775184e-08, "loss": 0.2419, "step": 50805 }, { "epoch": 0.8831372003685098, "grad_norm": 0.9585772975693325, "learning_rate": 3.539187246190328e-08, "loss": 0.1915, "step": 50806 }, { "epoch": 0.8831545829060127, "grad_norm": 1.2088589561777188, "learning_rate": 3.538147099362559e-08, "loss": 0.2242, "step": 50807 }, { "epoch": 0.8831719654435154, "grad_norm": 1.3153109072438969, "learning_rate": 3.537107099797532e-08, "loss": 0.1919, "step": 50808 }, { "epoch": 0.8831893479810182, "grad_norm": 1.8952729446088834, "learning_rate": 3.536067247498531e-08, "loss": 0.1936, "step": 50809 }, { "epoch": 0.883206730518521, "grad_norm": 1.3347666025404432, "learning_rate": 3.535027542468855e-08, "loss": 0.2811, "step": 50810 }, { "epoch": 0.8832241130560239, "grad_norm": 1.7481385597842323, "learning_rate": 3.533987984711795e-08, "loss": 0.1706, "step": 50811 }, { "epoch": 0.8832414955935267, "grad_norm": 1.2921844402249225, "learning_rate": 3.5329485742306486e-08, "loss": 0.2122, "step": 50812 }, { "epoch": 0.8832588781310295, "grad_norm": 0.8329143182180643, "learning_rate": 3.531909311028714e-08, "loss": 0.0811, "step": 50813 }, { "epoch": 0.8832762606685324, "grad_norm": 1.1574391751047721, "learning_rate": 3.5308701951092754e-08, "loss": 0.2157, "step": 50814 }, { "epoch": 0.8832936432060352, "grad_norm": 1.2165361472661063, "learning_rate": 3.529831226475627e-08, "loss": 0.2064, "step": 50815 }, { "epoch": 0.883311025743538, "grad_norm": 1.3353543698342827, "learning_rate": 3.5287924051310755e-08, "loss": 0.2311, "step": 50816 }, { "epoch": 0.8833284082810409, "grad_norm": 2.378163911863, "learning_rate": 3.527753731078903e-08, "loss": 0.1981, "step": 50817 }, { "epoch": 0.8833457908185437, "grad_norm": 1.1054423827010322, "learning_rate": 3.5267152043224054e-08, "loss": 0.1752, "step": 50818 }, { "epoch": 0.8833631733560465, "grad_norm": 1.1766650955787095, "learning_rate": 3.525676824864871e-08, "loss": 0.2324, "step": 50819 }, { "epoch": 0.8833805558935494, "grad_norm": 1.2050431035728868, "learning_rate": 3.524638592709595e-08, "loss": 0.1084, "step": 50820 }, { "epoch": 0.8833979384310522, "grad_norm": 2.6040960994613753, "learning_rate": 3.523600507859864e-08, "loss": 0.2104, "step": 50821 }, { "epoch": 0.883415320968555, "grad_norm": 2.0855095257264233, "learning_rate": 3.52256257031896e-08, "loss": 0.2441, "step": 50822 }, { "epoch": 0.8834327035060578, "grad_norm": 0.9276346256510292, "learning_rate": 3.521524780090196e-08, "loss": 0.1705, "step": 50823 }, { "epoch": 0.8834500860435607, "grad_norm": 4.3800141710274785, "learning_rate": 3.520487137176853e-08, "loss": 0.222, "step": 50824 }, { "epoch": 0.8834674685810635, "grad_norm": 1.3721972098365542, "learning_rate": 3.519449641582206e-08, "loss": 0.1803, "step": 50825 }, { "epoch": 0.8834848511185663, "grad_norm": 1.1567476412127926, "learning_rate": 3.518412293309542e-08, "loss": 0.153, "step": 50826 }, { "epoch": 0.8835022336560691, "grad_norm": 1.6804200368927864, "learning_rate": 3.517375092362168e-08, "loss": 0.1621, "step": 50827 }, { "epoch": 0.8835196161935719, "grad_norm": 0.9012390790109746, "learning_rate": 3.516338038743366e-08, "loss": 0.1516, "step": 50828 }, { "epoch": 0.8835369987310747, "grad_norm": 1.8386411380953718, "learning_rate": 3.515301132456416e-08, "loss": 0.2173, "step": 50829 }, { "epoch": 0.8835543812685775, "grad_norm": 1.8534966656065348, "learning_rate": 3.514264373504611e-08, "loss": 0.2512, "step": 50830 }, { "epoch": 0.8835717638060804, "grad_norm": 1.0943877477031574, "learning_rate": 3.5132277618912355e-08, "loss": 0.1392, "step": 50831 }, { "epoch": 0.8835891463435832, "grad_norm": 2.292579950593339, "learning_rate": 3.512191297619571e-08, "loss": 0.1361, "step": 50832 }, { "epoch": 0.883606528881086, "grad_norm": 0.9630258354671121, "learning_rate": 3.511154980692904e-08, "loss": 0.1511, "step": 50833 }, { "epoch": 0.8836239114185889, "grad_norm": 1.1160623886808674, "learning_rate": 3.5101188111145153e-08, "loss": 0.1669, "step": 50834 }, { "epoch": 0.8836412939560917, "grad_norm": 1.1037102422951568, "learning_rate": 3.509082788887702e-08, "loss": 0.1325, "step": 50835 }, { "epoch": 0.8836586764935945, "grad_norm": 2.3076955993692985, "learning_rate": 3.5080469140157385e-08, "loss": 0.201, "step": 50836 }, { "epoch": 0.8836760590310974, "grad_norm": 1.7054576104250212, "learning_rate": 3.507011186501907e-08, "loss": 0.2263, "step": 50837 }, { "epoch": 0.8836934415686002, "grad_norm": 1.7962834680896953, "learning_rate": 3.5059756063494985e-08, "loss": 0.2346, "step": 50838 }, { "epoch": 0.883710824106103, "grad_norm": 1.224025542493715, "learning_rate": 3.504940173561788e-08, "loss": 0.143, "step": 50839 }, { "epoch": 0.8837282066436059, "grad_norm": 1.1571518934445726, "learning_rate": 3.503904888142056e-08, "loss": 0.1951, "step": 50840 }, { "epoch": 0.8837455891811087, "grad_norm": 1.615952431377768, "learning_rate": 3.50286975009359e-08, "loss": 0.1633, "step": 50841 }, { "epoch": 0.8837629717186115, "grad_norm": 1.1042507085904243, "learning_rate": 3.501834759419658e-08, "loss": 0.1735, "step": 50842 }, { "epoch": 0.8837803542561143, "grad_norm": 0.9157766730355236, "learning_rate": 3.50079991612357e-08, "loss": 0.2379, "step": 50843 }, { "epoch": 0.8837977367936172, "grad_norm": 2.30532614559104, "learning_rate": 3.4997652202085724e-08, "loss": 0.3543, "step": 50844 }, { "epoch": 0.88381511933112, "grad_norm": 1.1044604042924064, "learning_rate": 3.4987306716779575e-08, "loss": 0.1188, "step": 50845 }, { "epoch": 0.8838325018686228, "grad_norm": 2.025541157360827, "learning_rate": 3.4976962705350054e-08, "loss": 0.2025, "step": 50846 }, { "epoch": 0.8838498844061256, "grad_norm": 0.7344658464300443, "learning_rate": 3.496662016783003e-08, "loss": 0.1972, "step": 50847 }, { "epoch": 0.8838672669436284, "grad_norm": 1.9668980881184444, "learning_rate": 3.495627910425214e-08, "loss": 0.1663, "step": 50848 }, { "epoch": 0.8838846494811312, "grad_norm": 1.3675310787141206, "learning_rate": 3.494593951464925e-08, "loss": 0.2224, "step": 50849 }, { "epoch": 0.883902032018634, "grad_norm": 1.389418049204413, "learning_rate": 3.493560139905411e-08, "loss": 0.203, "step": 50850 }, { "epoch": 0.8839194145561369, "grad_norm": 1.6598544215591051, "learning_rate": 3.492526475749946e-08, "loss": 0.1661, "step": 50851 }, { "epoch": 0.8839367970936397, "grad_norm": 2.0607178453925203, "learning_rate": 3.491492959001807e-08, "loss": 0.193, "step": 50852 }, { "epoch": 0.8839541796311425, "grad_norm": 2.0211348889258307, "learning_rate": 3.490459589664269e-08, "loss": 0.1792, "step": 50853 }, { "epoch": 0.8839715621686454, "grad_norm": 2.1160574726733263, "learning_rate": 3.489426367740611e-08, "loss": 0.2941, "step": 50854 }, { "epoch": 0.8839889447061482, "grad_norm": 0.9493238646712853, "learning_rate": 3.488393293234121e-08, "loss": 0.1667, "step": 50855 }, { "epoch": 0.884006327243651, "grad_norm": 2.11353930160694, "learning_rate": 3.4873603661480455e-08, "loss": 0.2142, "step": 50856 }, { "epoch": 0.8840237097811539, "grad_norm": 0.9712466103925383, "learning_rate": 3.48632758648566e-08, "loss": 0.0782, "step": 50857 }, { "epoch": 0.8840410923186567, "grad_norm": 2.1524740314101156, "learning_rate": 3.485294954250262e-08, "loss": 0.1741, "step": 50858 }, { "epoch": 0.8840584748561595, "grad_norm": 1.0540017463907119, "learning_rate": 3.484262469445115e-08, "loss": 0.1049, "step": 50859 }, { "epoch": 0.8840758573936623, "grad_norm": 1.1874676884521427, "learning_rate": 3.483230132073483e-08, "loss": 0.2309, "step": 50860 }, { "epoch": 0.8840932399311652, "grad_norm": 1.883514352518829, "learning_rate": 3.4821979421386373e-08, "loss": 0.138, "step": 50861 }, { "epoch": 0.884110622468668, "grad_norm": 0.8802990678391714, "learning_rate": 3.481165899643873e-08, "loss": 0.2126, "step": 50862 }, { "epoch": 0.8841280050061708, "grad_norm": 1.581318639089317, "learning_rate": 3.480134004592433e-08, "loss": 0.129, "step": 50863 }, { "epoch": 0.8841453875436737, "grad_norm": 4.669320129610731, "learning_rate": 3.479102256987604e-08, "loss": 0.2163, "step": 50864 }, { "epoch": 0.8841627700811765, "grad_norm": 1.2387595461923262, "learning_rate": 3.478070656832638e-08, "loss": 0.1505, "step": 50865 }, { "epoch": 0.8841801526186793, "grad_norm": 1.2935008717243928, "learning_rate": 3.4770392041308274e-08, "loss": 0.2312, "step": 50866 }, { "epoch": 0.884197535156182, "grad_norm": 1.3512466402253271, "learning_rate": 3.47600789888543e-08, "loss": 0.1737, "step": 50867 }, { "epoch": 0.8842149176936849, "grad_norm": 1.744887982262293, "learning_rate": 3.4749767410997167e-08, "loss": 0.19, "step": 50868 }, { "epoch": 0.8842323002311877, "grad_norm": 1.6147780826382112, "learning_rate": 3.473945730776956e-08, "loss": 0.2993, "step": 50869 }, { "epoch": 0.8842496827686905, "grad_norm": 1.2789718238294316, "learning_rate": 3.472914867920412e-08, "loss": 0.1755, "step": 50870 }, { "epoch": 0.8842670653061934, "grad_norm": 1.4443290189234175, "learning_rate": 3.4718841525333606e-08, "loss": 0.1778, "step": 50871 }, { "epoch": 0.8842844478436962, "grad_norm": 1.4092789921941034, "learning_rate": 3.4708535846190603e-08, "loss": 0.3348, "step": 50872 }, { "epoch": 0.884301830381199, "grad_norm": 2.0116618309378578, "learning_rate": 3.469823164180774e-08, "loss": 0.2677, "step": 50873 }, { "epoch": 0.8843192129187019, "grad_norm": 1.0280949635589014, "learning_rate": 3.468792891221789e-08, "loss": 0.257, "step": 50874 }, { "epoch": 0.8843365954562047, "grad_norm": 1.323524256332129, "learning_rate": 3.4677627657453466e-08, "loss": 0.1657, "step": 50875 }, { "epoch": 0.8843539779937075, "grad_norm": 1.2823996818200298, "learning_rate": 3.4667327877547114e-08, "loss": 0.1808, "step": 50876 }, { "epoch": 0.8843713605312103, "grad_norm": 2.85622519534072, "learning_rate": 3.4657029572531694e-08, "loss": 0.2441, "step": 50877 }, { "epoch": 0.8843887430687132, "grad_norm": 1.592676325864209, "learning_rate": 3.464673274243973e-08, "loss": 0.2034, "step": 50878 }, { "epoch": 0.884406125606216, "grad_norm": 1.3905271553117535, "learning_rate": 3.463643738730382e-08, "loss": 0.1877, "step": 50879 }, { "epoch": 0.8844235081437188, "grad_norm": 1.0031666851064418, "learning_rate": 3.462614350715665e-08, "loss": 0.1785, "step": 50880 }, { "epoch": 0.8844408906812217, "grad_norm": 1.988723892309639, "learning_rate": 3.4615851102030855e-08, "loss": 0.1709, "step": 50881 }, { "epoch": 0.8844582732187245, "grad_norm": 2.1110878016790795, "learning_rate": 3.4605560171959035e-08, "loss": 0.1435, "step": 50882 }, { "epoch": 0.8844756557562273, "grad_norm": 2.528418443920877, "learning_rate": 3.459527071697377e-08, "loss": 0.2155, "step": 50883 }, { "epoch": 0.8844930382937302, "grad_norm": 1.0196027381051467, "learning_rate": 3.4584982737107634e-08, "loss": 0.1928, "step": 50884 }, { "epoch": 0.884510420831233, "grad_norm": 0.9902000346410529, "learning_rate": 3.4574696232393395e-08, "loss": 0.1718, "step": 50885 }, { "epoch": 0.8845278033687358, "grad_norm": 0.8285461858870948, "learning_rate": 3.4564411202863577e-08, "loss": 0.2034, "step": 50886 }, { "epoch": 0.8845451859062385, "grad_norm": 1.0763403454802631, "learning_rate": 3.455412764855081e-08, "loss": 0.1174, "step": 50887 }, { "epoch": 0.8845625684437414, "grad_norm": 1.1125641443334089, "learning_rate": 3.454384556948753e-08, "loss": 0.1152, "step": 50888 }, { "epoch": 0.8845799509812442, "grad_norm": 2.6323684450248788, "learning_rate": 3.453356496570653e-08, "loss": 0.1652, "step": 50889 }, { "epoch": 0.884597333518747, "grad_norm": 0.9903238435526714, "learning_rate": 3.452328583724029e-08, "loss": 0.1702, "step": 50890 }, { "epoch": 0.8846147160562499, "grad_norm": 0.6933631411856931, "learning_rate": 3.451300818412145e-08, "loss": 0.1703, "step": 50891 }, { "epoch": 0.8846320985937527, "grad_norm": 1.0694177922182477, "learning_rate": 3.450273200638243e-08, "loss": 0.1348, "step": 50892 }, { "epoch": 0.8846494811312555, "grad_norm": 1.195847781559673, "learning_rate": 3.449245730405609e-08, "loss": 0.2129, "step": 50893 }, { "epoch": 0.8846668636687584, "grad_norm": 1.5017877726707016, "learning_rate": 3.4482184077174737e-08, "loss": 0.1433, "step": 50894 }, { "epoch": 0.8846842462062612, "grad_norm": 1.4679446893147998, "learning_rate": 3.447191232577101e-08, "loss": 0.1673, "step": 50895 }, { "epoch": 0.884701628743764, "grad_norm": 1.6734474744002168, "learning_rate": 3.446164204987745e-08, "loss": 0.2121, "step": 50896 }, { "epoch": 0.8847190112812668, "grad_norm": 1.0480201888742737, "learning_rate": 3.445137324952668e-08, "loss": 0.2251, "step": 50897 }, { "epoch": 0.8847363938187697, "grad_norm": 1.2447018120288766, "learning_rate": 3.444110592475119e-08, "loss": 0.1792, "step": 50898 }, { "epoch": 0.8847537763562725, "grad_norm": 1.4819453418461566, "learning_rate": 3.4430840075583504e-08, "loss": 0.1455, "step": 50899 }, { "epoch": 0.8847711588937753, "grad_norm": 2.859311959567704, "learning_rate": 3.442057570205625e-08, "loss": 0.2428, "step": 50900 }, { "epoch": 0.8847885414312782, "grad_norm": 1.3053932805512596, "learning_rate": 3.441031280420187e-08, "loss": 0.2357, "step": 50901 }, { "epoch": 0.884805923968781, "grad_norm": 1.2907656106023395, "learning_rate": 3.440005138205293e-08, "loss": 0.1415, "step": 50902 }, { "epoch": 0.8848233065062838, "grad_norm": 1.3369912611584054, "learning_rate": 3.438979143564197e-08, "loss": 0.227, "step": 50903 }, { "epoch": 0.8848406890437867, "grad_norm": 1.2551348367534665, "learning_rate": 3.437953296500135e-08, "loss": 0.1454, "step": 50904 }, { "epoch": 0.8848580715812895, "grad_norm": 1.8328653584602892, "learning_rate": 3.436927597016387e-08, "loss": 0.2145, "step": 50905 }, { "epoch": 0.8848754541187923, "grad_norm": 3.6464564451883144, "learning_rate": 3.4359020451161905e-08, "loss": 0.2797, "step": 50906 }, { "epoch": 0.884892836656295, "grad_norm": 1.0587064275748526, "learning_rate": 3.4348766408027805e-08, "loss": 0.1336, "step": 50907 }, { "epoch": 0.8849102191937979, "grad_norm": 1.6930532500093978, "learning_rate": 3.433851384079434e-08, "loss": 0.2183, "step": 50908 }, { "epoch": 0.8849276017313007, "grad_norm": 1.1210866595591025, "learning_rate": 3.432826274949385e-08, "loss": 0.2168, "step": 50909 }, { "epoch": 0.8849449842688035, "grad_norm": 1.9085241415257328, "learning_rate": 3.431801313415883e-08, "loss": 0.2911, "step": 50910 }, { "epoch": 0.8849623668063064, "grad_norm": 1.2347937861475309, "learning_rate": 3.4307764994821856e-08, "loss": 0.1086, "step": 50911 }, { "epoch": 0.8849797493438092, "grad_norm": 0.8194386860072899, "learning_rate": 3.429751833151529e-08, "loss": 0.0819, "step": 50912 }, { "epoch": 0.884997131881312, "grad_norm": 4.03472197573304, "learning_rate": 3.428727314427165e-08, "loss": 0.2534, "step": 50913 }, { "epoch": 0.8850145144188148, "grad_norm": 1.4790233509654185, "learning_rate": 3.427702943312344e-08, "loss": 0.3916, "step": 50914 }, { "epoch": 0.8850318969563177, "grad_norm": 1.3090997865261018, "learning_rate": 3.426678719810305e-08, "loss": 0.2191, "step": 50915 }, { "epoch": 0.8850492794938205, "grad_norm": 1.0405079509168735, "learning_rate": 3.4256546439243084e-08, "loss": 0.1661, "step": 50916 }, { "epoch": 0.8850666620313233, "grad_norm": 2.0348528728114266, "learning_rate": 3.424630715657595e-08, "loss": 0.1324, "step": 50917 }, { "epoch": 0.8850840445688262, "grad_norm": 1.3689230259723455, "learning_rate": 3.4236069350134014e-08, "loss": 0.2009, "step": 50918 }, { "epoch": 0.885101427106329, "grad_norm": 2.215677678689589, "learning_rate": 3.4225833019949804e-08, "loss": 0.331, "step": 50919 }, { "epoch": 0.8851188096438318, "grad_norm": 0.9078028717564564, "learning_rate": 3.421559816605574e-08, "loss": 0.2165, "step": 50920 }, { "epoch": 0.8851361921813347, "grad_norm": 1.1658040664786047, "learning_rate": 3.4205364788484294e-08, "loss": 0.1279, "step": 50921 }, { "epoch": 0.8851535747188375, "grad_norm": 1.0775937994215081, "learning_rate": 3.419513288726783e-08, "loss": 0.1378, "step": 50922 }, { "epoch": 0.8851709572563403, "grad_norm": 1.9868992136855554, "learning_rate": 3.418490246243877e-08, "loss": 0.2413, "step": 50923 }, { "epoch": 0.8851883397938431, "grad_norm": 1.7358765260753766, "learning_rate": 3.417467351402964e-08, "loss": 0.2354, "step": 50924 }, { "epoch": 0.885205722331346, "grad_norm": 1.3033890662686223, "learning_rate": 3.416444604207291e-08, "loss": 0.2011, "step": 50925 }, { "epoch": 0.8852231048688488, "grad_norm": 1.5267070685831952, "learning_rate": 3.415422004660079e-08, "loss": 0.1705, "step": 50926 }, { "epoch": 0.8852404874063515, "grad_norm": 2.074500591200506, "learning_rate": 3.4143995527645794e-08, "loss": 0.2118, "step": 50927 }, { "epoch": 0.8852578699438544, "grad_norm": 1.2272046233456375, "learning_rate": 3.4133772485240406e-08, "loss": 0.3015, "step": 50928 }, { "epoch": 0.8852752524813572, "grad_norm": 1.880802648634521, "learning_rate": 3.412355091941693e-08, "loss": 0.2648, "step": 50929 }, { "epoch": 0.88529263501886, "grad_norm": 1.3339911566790459, "learning_rate": 3.4113330830207784e-08, "loss": 0.1471, "step": 50930 }, { "epoch": 0.8853100175563628, "grad_norm": 1.4602638671064674, "learning_rate": 3.4103112217645394e-08, "loss": 0.2054, "step": 50931 }, { "epoch": 0.8853274000938657, "grad_norm": 1.3111667498089352, "learning_rate": 3.4092895081762055e-08, "loss": 0.2403, "step": 50932 }, { "epoch": 0.8853447826313685, "grad_norm": 1.700352735825464, "learning_rate": 3.40826794225903e-08, "loss": 0.1595, "step": 50933 }, { "epoch": 0.8853621651688713, "grad_norm": 0.9213947898942783, "learning_rate": 3.407246524016227e-08, "loss": 0.1214, "step": 50934 }, { "epoch": 0.8853795477063742, "grad_norm": 1.6364881805621494, "learning_rate": 3.406225253451067e-08, "loss": 0.155, "step": 50935 }, { "epoch": 0.885396930243877, "grad_norm": 0.9296921692669997, "learning_rate": 3.405204130566763e-08, "loss": 0.1778, "step": 50936 }, { "epoch": 0.8854143127813798, "grad_norm": 1.0724789846537572, "learning_rate": 3.404183155366558e-08, "loss": 0.1481, "step": 50937 }, { "epoch": 0.8854316953188827, "grad_norm": 1.4900434890189593, "learning_rate": 3.4031623278536924e-08, "loss": 0.1602, "step": 50938 }, { "epoch": 0.8854490778563855, "grad_norm": 1.2266541974845073, "learning_rate": 3.402141648031392e-08, "loss": 0.1478, "step": 50939 }, { "epoch": 0.8854664603938883, "grad_norm": 2.2490427143546707, "learning_rate": 3.4011211159029e-08, "loss": 0.3653, "step": 50940 }, { "epoch": 0.8854838429313912, "grad_norm": 1.8023707330811516, "learning_rate": 3.400100731471445e-08, "loss": 0.1617, "step": 50941 }, { "epoch": 0.885501225468894, "grad_norm": 0.9043562172715627, "learning_rate": 3.399080494740264e-08, "loss": 0.1586, "step": 50942 }, { "epoch": 0.8855186080063968, "grad_norm": 1.759518576609361, "learning_rate": 3.398060405712594e-08, "loss": 0.1355, "step": 50943 }, { "epoch": 0.8855359905438996, "grad_norm": 2.01349844553853, "learning_rate": 3.3970404643916705e-08, "loss": 0.239, "step": 50944 }, { "epoch": 0.8855533730814025, "grad_norm": 1.0993530141320205, "learning_rate": 3.3960206707807194e-08, "loss": 0.1811, "step": 50945 }, { "epoch": 0.8855707556189053, "grad_norm": 1.013734522640938, "learning_rate": 3.39500102488296e-08, "loss": 0.1235, "step": 50946 }, { "epoch": 0.885588138156408, "grad_norm": 0.9137296769760458, "learning_rate": 3.3939815267016504e-08, "loss": 0.1662, "step": 50947 }, { "epoch": 0.8856055206939109, "grad_norm": 1.3940587504391277, "learning_rate": 3.392962176240011e-08, "loss": 0.1531, "step": 50948 }, { "epoch": 0.8856229032314137, "grad_norm": 1.180584997344013, "learning_rate": 3.3919429735012716e-08, "loss": 0.1337, "step": 50949 }, { "epoch": 0.8856402857689165, "grad_norm": 1.514842529438406, "learning_rate": 3.390923918488664e-08, "loss": 0.1225, "step": 50950 }, { "epoch": 0.8856576683064193, "grad_norm": 0.9876065944454597, "learning_rate": 3.389905011205413e-08, "loss": 0.2084, "step": 50951 }, { "epoch": 0.8856750508439222, "grad_norm": 0.895088769262454, "learning_rate": 3.38888625165476e-08, "loss": 0.126, "step": 50952 }, { "epoch": 0.885692433381425, "grad_norm": 1.4630162237898874, "learning_rate": 3.38786763983992e-08, "loss": 0.1618, "step": 50953 }, { "epoch": 0.8857098159189278, "grad_norm": 1.4164916034759234, "learning_rate": 3.386849175764117e-08, "loss": 0.234, "step": 50954 }, { "epoch": 0.8857271984564307, "grad_norm": 1.3171928758851292, "learning_rate": 3.385830859430605e-08, "loss": 0.1608, "step": 50955 }, { "epoch": 0.8857445809939335, "grad_norm": 1.8735712008828838, "learning_rate": 3.384812690842592e-08, "loss": 0.2285, "step": 50956 }, { "epoch": 0.8857619635314363, "grad_norm": 1.4639081670149205, "learning_rate": 3.383794670003309e-08, "loss": 0.131, "step": 50957 }, { "epoch": 0.8857793460689392, "grad_norm": 1.1158376405481238, "learning_rate": 3.382776796915987e-08, "loss": 0.0991, "step": 50958 }, { "epoch": 0.885796728606442, "grad_norm": 2.8259852675975314, "learning_rate": 3.381759071583845e-08, "loss": 0.3558, "step": 50959 }, { "epoch": 0.8858141111439448, "grad_norm": 1.57005186497638, "learning_rate": 3.380741494010114e-08, "loss": 0.1862, "step": 50960 }, { "epoch": 0.8858314936814476, "grad_norm": 2.716022406891669, "learning_rate": 3.379724064198014e-08, "loss": 0.3581, "step": 50961 }, { "epoch": 0.8858488762189505, "grad_norm": 1.2271044541171319, "learning_rate": 3.37870678215077e-08, "loss": 0.1335, "step": 50962 }, { "epoch": 0.8858662587564533, "grad_norm": 1.1923838942799012, "learning_rate": 3.377689647871623e-08, "loss": 0.1366, "step": 50963 }, { "epoch": 0.8858836412939561, "grad_norm": 2.9974276747706523, "learning_rate": 3.3766726613637776e-08, "loss": 0.1948, "step": 50964 }, { "epoch": 0.885901023831459, "grad_norm": 1.7433207525616468, "learning_rate": 3.375655822630452e-08, "loss": 0.2521, "step": 50965 }, { "epoch": 0.8859184063689617, "grad_norm": 1.5011623935742473, "learning_rate": 3.374639131674889e-08, "loss": 0.1947, "step": 50966 }, { "epoch": 0.8859357889064645, "grad_norm": 1.3288442837186807, "learning_rate": 3.373622588500297e-08, "loss": 0.1702, "step": 50967 }, { "epoch": 0.8859531714439673, "grad_norm": 2.2228283580470865, "learning_rate": 3.372606193109912e-08, "loss": 0.2002, "step": 50968 }, { "epoch": 0.8859705539814702, "grad_norm": 1.8027523224415736, "learning_rate": 3.371589945506942e-08, "loss": 0.1619, "step": 50969 }, { "epoch": 0.885987936518973, "grad_norm": 1.4471285337274067, "learning_rate": 3.370573845694608e-08, "loss": 0.1718, "step": 50970 }, { "epoch": 0.8860053190564758, "grad_norm": 3.1900122823104198, "learning_rate": 3.3695578936761395e-08, "loss": 0.2067, "step": 50971 }, { "epoch": 0.8860227015939787, "grad_norm": 1.2827933753412148, "learning_rate": 3.3685420894547567e-08, "loss": 0.2013, "step": 50972 }, { "epoch": 0.8860400841314815, "grad_norm": 1.4605457509692121, "learning_rate": 3.367526433033663e-08, "loss": 0.2269, "step": 50973 }, { "epoch": 0.8860574666689843, "grad_norm": 1.9912486623388599, "learning_rate": 3.3665109244160935e-08, "loss": 0.1932, "step": 50974 }, { "epoch": 0.8860748492064872, "grad_norm": 0.9676092053113591, "learning_rate": 3.365495563605275e-08, "loss": 0.1164, "step": 50975 }, { "epoch": 0.88609223174399, "grad_norm": 1.0430217044465955, "learning_rate": 3.364480350604404e-08, "loss": 0.135, "step": 50976 }, { "epoch": 0.8861096142814928, "grad_norm": 1.6832493953493455, "learning_rate": 3.3634652854167e-08, "loss": 0.37, "step": 50977 }, { "epoch": 0.8861269968189956, "grad_norm": 1.4972368945564536, "learning_rate": 3.362450368045394e-08, "loss": 0.229, "step": 50978 }, { "epoch": 0.8861443793564985, "grad_norm": 1.3010348852712457, "learning_rate": 3.3614355984937e-08, "loss": 0.1729, "step": 50979 }, { "epoch": 0.8861617618940013, "grad_norm": 1.5577442781594135, "learning_rate": 3.360420976764833e-08, "loss": 0.1378, "step": 50980 }, { "epoch": 0.8861791444315041, "grad_norm": 1.055451361952906, "learning_rate": 3.359406502861995e-08, "loss": 0.1361, "step": 50981 }, { "epoch": 0.886196526969007, "grad_norm": 1.132571478006806, "learning_rate": 3.358392176788427e-08, "loss": 0.1801, "step": 50982 }, { "epoch": 0.8862139095065098, "grad_norm": 1.2502015688897665, "learning_rate": 3.357377998547328e-08, "loss": 0.1489, "step": 50983 }, { "epoch": 0.8862312920440126, "grad_norm": 1.7870223683526858, "learning_rate": 3.356363968141912e-08, "loss": 0.173, "step": 50984 }, { "epoch": 0.8862486745815155, "grad_norm": 1.6065610337717249, "learning_rate": 3.355350085575387e-08, "loss": 0.1966, "step": 50985 }, { "epoch": 0.8862660571190182, "grad_norm": 1.3870060677909006, "learning_rate": 3.3543363508509895e-08, "loss": 0.1142, "step": 50986 }, { "epoch": 0.886283439656521, "grad_norm": 4.211926181357149, "learning_rate": 3.353322763971911e-08, "loss": 0.2369, "step": 50987 }, { "epoch": 0.8863008221940238, "grad_norm": 2.280896098261692, "learning_rate": 3.352309324941377e-08, "loss": 0.2749, "step": 50988 }, { "epoch": 0.8863182047315267, "grad_norm": 1.574897462866431, "learning_rate": 3.351296033762591e-08, "loss": 0.1865, "step": 50989 }, { "epoch": 0.8863355872690295, "grad_norm": 1.3379023232212175, "learning_rate": 3.3502828904387656e-08, "loss": 0.1785, "step": 50990 }, { "epoch": 0.8863529698065323, "grad_norm": 1.4051777306397784, "learning_rate": 3.349269894973117e-08, "loss": 0.148, "step": 50991 }, { "epoch": 0.8863703523440352, "grad_norm": 1.24825907395237, "learning_rate": 3.3482570473688576e-08, "loss": 0.1611, "step": 50992 }, { "epoch": 0.886387734881538, "grad_norm": 2.024193091600216, "learning_rate": 3.3472443476291804e-08, "loss": 0.1996, "step": 50993 }, { "epoch": 0.8864051174190408, "grad_norm": 2.056417261913037, "learning_rate": 3.346231795757326e-08, "loss": 0.1031, "step": 50994 }, { "epoch": 0.8864224999565437, "grad_norm": 2.2218636572381536, "learning_rate": 3.3452193917564765e-08, "loss": 0.2076, "step": 50995 }, { "epoch": 0.8864398824940465, "grad_norm": 2.6559393360016905, "learning_rate": 3.3442071356298396e-08, "loss": 0.1455, "step": 50996 }, { "epoch": 0.8864572650315493, "grad_norm": 2.6369048549404477, "learning_rate": 3.3431950273806466e-08, "loss": 0.1674, "step": 50997 }, { "epoch": 0.8864746475690521, "grad_norm": 1.481049534291167, "learning_rate": 3.342183067012094e-08, "loss": 0.2476, "step": 50998 }, { "epoch": 0.886492030106555, "grad_norm": 1.3434554140183599, "learning_rate": 3.341171254527386e-08, "loss": 0.22, "step": 50999 }, { "epoch": 0.8865094126440578, "grad_norm": 1.5733857537248115, "learning_rate": 3.3401595899297306e-08, "loss": 0.1813, "step": 51000 }, { "epoch": 0.8865267951815606, "grad_norm": 0.9087849122515084, "learning_rate": 3.339148073222337e-08, "loss": 0.1773, "step": 51001 }, { "epoch": 0.8865441777190635, "grad_norm": 1.9920408700899443, "learning_rate": 3.338136704408406e-08, "loss": 0.1636, "step": 51002 }, { "epoch": 0.8865615602565663, "grad_norm": 1.5092204240771085, "learning_rate": 3.337125483491149e-08, "loss": 0.1191, "step": 51003 }, { "epoch": 0.8865789427940691, "grad_norm": 2.599894211555438, "learning_rate": 3.336114410473767e-08, "loss": 0.182, "step": 51004 }, { "epoch": 0.886596325331572, "grad_norm": 1.9393933516216695, "learning_rate": 3.3351034853594696e-08, "loss": 0.218, "step": 51005 }, { "epoch": 0.8866137078690747, "grad_norm": 1.3128321390995903, "learning_rate": 3.3340927081514534e-08, "loss": 0.1728, "step": 51006 }, { "epoch": 0.8866310904065775, "grad_norm": 4.17369421175955, "learning_rate": 3.3330820788529445e-08, "loss": 0.2536, "step": 51007 }, { "epoch": 0.8866484729440803, "grad_norm": 1.3876807173415369, "learning_rate": 3.332071597467107e-08, "loss": 0.3231, "step": 51008 }, { "epoch": 0.8866658554815832, "grad_norm": 1.1866755081810878, "learning_rate": 3.33106126399717e-08, "loss": 0.2118, "step": 51009 }, { "epoch": 0.886683238019086, "grad_norm": 1.4520261250360247, "learning_rate": 3.330051078446339e-08, "loss": 0.1386, "step": 51010 }, { "epoch": 0.8867006205565888, "grad_norm": 1.9320830343835615, "learning_rate": 3.3290410408177985e-08, "loss": 0.2914, "step": 51011 }, { "epoch": 0.8867180030940917, "grad_norm": 1.2674996212760208, "learning_rate": 3.328031151114757e-08, "loss": 0.1949, "step": 51012 }, { "epoch": 0.8867353856315945, "grad_norm": 1.7036513921468301, "learning_rate": 3.32702140934043e-08, "loss": 0.16, "step": 51013 }, { "epoch": 0.8867527681690973, "grad_norm": 0.9924387796363282, "learning_rate": 3.326011815498003e-08, "loss": 0.1593, "step": 51014 }, { "epoch": 0.8867701507066001, "grad_norm": 2.0254396365285636, "learning_rate": 3.325002369590668e-08, "loss": 0.2163, "step": 51015 }, { "epoch": 0.886787533244103, "grad_norm": 1.0427883811981058, "learning_rate": 3.3239930716216394e-08, "loss": 0.179, "step": 51016 }, { "epoch": 0.8868049157816058, "grad_norm": 1.2081536902531014, "learning_rate": 3.3229839215941136e-08, "loss": 0.2129, "step": 51017 }, { "epoch": 0.8868222983191086, "grad_norm": 1.8908313564694728, "learning_rate": 3.3219749195112886e-08, "loss": 0.1817, "step": 51018 }, { "epoch": 0.8868396808566115, "grad_norm": 1.4190821903044857, "learning_rate": 3.320966065376357e-08, "loss": 0.1543, "step": 51019 }, { "epoch": 0.8868570633941143, "grad_norm": 2.5157845726431862, "learning_rate": 3.319957359192521e-08, "loss": 0.1448, "step": 51020 }, { "epoch": 0.8868744459316171, "grad_norm": 1.354897971620919, "learning_rate": 3.318948800962978e-08, "loss": 0.2258, "step": 51021 }, { "epoch": 0.88689182846912, "grad_norm": 1.2805915531447163, "learning_rate": 3.3179403906909265e-08, "loss": 0.1107, "step": 51022 }, { "epoch": 0.8869092110066228, "grad_norm": 1.187082781020098, "learning_rate": 3.3169321283795514e-08, "loss": 0.1641, "step": 51023 }, { "epoch": 0.8869265935441256, "grad_norm": 1.389908856797752, "learning_rate": 3.315924014032062e-08, "loss": 0.1805, "step": 51024 }, { "epoch": 0.8869439760816284, "grad_norm": 1.2994345183260423, "learning_rate": 3.31491604765165e-08, "loss": 0.1705, "step": 51025 }, { "epoch": 0.8869613586191312, "grad_norm": 2.4732436163210494, "learning_rate": 3.3139082292415185e-08, "loss": 0.1534, "step": 51026 }, { "epoch": 0.886978741156634, "grad_norm": 1.2628674112229632, "learning_rate": 3.312900558804832e-08, "loss": 0.1158, "step": 51027 }, { "epoch": 0.8869961236941368, "grad_norm": 2.1944031288124286, "learning_rate": 3.311893036344815e-08, "loss": 0.1859, "step": 51028 }, { "epoch": 0.8870135062316397, "grad_norm": 2.6940993662593318, "learning_rate": 3.310885661864654e-08, "loss": 0.1757, "step": 51029 }, { "epoch": 0.8870308887691425, "grad_norm": 1.1699913852783872, "learning_rate": 3.309878435367536e-08, "loss": 0.2387, "step": 51030 }, { "epoch": 0.8870482713066453, "grad_norm": 1.570977739886714, "learning_rate": 3.308871356856646e-08, "loss": 0.2217, "step": 51031 }, { "epoch": 0.8870656538441481, "grad_norm": 2.852571918240419, "learning_rate": 3.307864426335199e-08, "loss": 0.1606, "step": 51032 }, { "epoch": 0.887083036381651, "grad_norm": 2.347830132421867, "learning_rate": 3.306857643806371e-08, "loss": 0.2755, "step": 51033 }, { "epoch": 0.8871004189191538, "grad_norm": 2.071634242419413, "learning_rate": 3.3058510092733525e-08, "loss": 0.2287, "step": 51034 }, { "epoch": 0.8871178014566566, "grad_norm": 1.0883060855893494, "learning_rate": 3.304844522739325e-08, "loss": 0.2015, "step": 51035 }, { "epoch": 0.8871351839941595, "grad_norm": 2.2981173714183694, "learning_rate": 3.3038381842075025e-08, "loss": 0.1917, "step": 51036 }, { "epoch": 0.8871525665316623, "grad_norm": 1.2309895313565242, "learning_rate": 3.30283199368106e-08, "loss": 0.1821, "step": 51037 }, { "epoch": 0.8871699490691651, "grad_norm": 1.1749228378503085, "learning_rate": 3.301825951163195e-08, "loss": 0.1773, "step": 51038 }, { "epoch": 0.887187331606668, "grad_norm": 1.436038985216448, "learning_rate": 3.3008200566570834e-08, "loss": 0.2435, "step": 51039 }, { "epoch": 0.8872047141441708, "grad_norm": 1.5006256302494525, "learning_rate": 3.299814310165927e-08, "loss": 0.1772, "step": 51040 }, { "epoch": 0.8872220966816736, "grad_norm": 0.8643103813349655, "learning_rate": 3.298808711692902e-08, "loss": 0.1316, "step": 51041 }, { "epoch": 0.8872394792191765, "grad_norm": 1.6945185581217816, "learning_rate": 3.2978032612412e-08, "loss": 0.1715, "step": 51042 }, { "epoch": 0.8872568617566793, "grad_norm": 0.987884752768289, "learning_rate": 3.296797958814002e-08, "loss": 0.1569, "step": 51043 }, { "epoch": 0.8872742442941821, "grad_norm": 1.5015779955185253, "learning_rate": 3.29579280441451e-08, "loss": 0.1548, "step": 51044 }, { "epoch": 0.8872916268316849, "grad_norm": 1.0694588825743567, "learning_rate": 3.294787798045906e-08, "loss": 0.2771, "step": 51045 }, { "epoch": 0.8873090093691877, "grad_norm": 2.279570334657551, "learning_rate": 3.293782939711359e-08, "loss": 0.2149, "step": 51046 }, { "epoch": 0.8873263919066905, "grad_norm": 1.044012799596133, "learning_rate": 3.292778229414067e-08, "loss": 0.1863, "step": 51047 }, { "epoch": 0.8873437744441933, "grad_norm": 1.5667367532111145, "learning_rate": 3.291773667157216e-08, "loss": 0.1604, "step": 51048 }, { "epoch": 0.8873611569816962, "grad_norm": 1.8317573224467354, "learning_rate": 3.290769252943987e-08, "loss": 0.1489, "step": 51049 }, { "epoch": 0.887378539519199, "grad_norm": 1.083693928765068, "learning_rate": 3.28976498677756e-08, "loss": 0.1266, "step": 51050 }, { "epoch": 0.8873959220567018, "grad_norm": 1.257232939007996, "learning_rate": 3.2887608686611224e-08, "loss": 0.3342, "step": 51051 }, { "epoch": 0.8874133045942046, "grad_norm": 1.1486770322496325, "learning_rate": 3.28775689859786e-08, "loss": 0.2145, "step": 51052 }, { "epoch": 0.8874306871317075, "grad_norm": 1.1508510155321419, "learning_rate": 3.286753076590942e-08, "loss": 0.2393, "step": 51053 }, { "epoch": 0.8874480696692103, "grad_norm": 1.7446400795934185, "learning_rate": 3.285749402643556e-08, "loss": 0.2072, "step": 51054 }, { "epoch": 0.8874654522067131, "grad_norm": 1.3396192502851296, "learning_rate": 3.284745876758893e-08, "loss": 0.1361, "step": 51055 }, { "epoch": 0.887482834744216, "grad_norm": 3.8543384614190552, "learning_rate": 3.283742498940123e-08, "loss": 0.2786, "step": 51056 }, { "epoch": 0.8875002172817188, "grad_norm": 0.8893856690963233, "learning_rate": 3.282739269190432e-08, "loss": 0.1611, "step": 51057 }, { "epoch": 0.8875175998192216, "grad_norm": 1.6340054549760503, "learning_rate": 3.281736187513001e-08, "loss": 0.1738, "step": 51058 }, { "epoch": 0.8875349823567245, "grad_norm": 1.7723971438658772, "learning_rate": 3.280733253911e-08, "loss": 0.1579, "step": 51059 }, { "epoch": 0.8875523648942273, "grad_norm": 1.4022088310096519, "learning_rate": 3.279730468387615e-08, "loss": 0.1251, "step": 51060 }, { "epoch": 0.8875697474317301, "grad_norm": 1.1092876860964673, "learning_rate": 3.278727830946026e-08, "loss": 0.172, "step": 51061 }, { "epoch": 0.887587129969233, "grad_norm": 1.2740758731501511, "learning_rate": 3.277725341589399e-08, "loss": 0.1811, "step": 51062 }, { "epoch": 0.8876045125067358, "grad_norm": 1.2944586490817092, "learning_rate": 3.276723000320925e-08, "loss": 0.1359, "step": 51063 }, { "epoch": 0.8876218950442386, "grad_norm": 1.4465464907692236, "learning_rate": 3.275720807143784e-08, "loss": 0.1571, "step": 51064 }, { "epoch": 0.8876392775817414, "grad_norm": 1.4131753098740343, "learning_rate": 3.2747187620611404e-08, "loss": 0.2119, "step": 51065 }, { "epoch": 0.8876566601192442, "grad_norm": 1.6787625858833592, "learning_rate": 3.273716865076165e-08, "loss": 0.1292, "step": 51066 }, { "epoch": 0.887674042656747, "grad_norm": 2.1713313466972965, "learning_rate": 3.2727151161920483e-08, "loss": 0.2134, "step": 51067 }, { "epoch": 0.8876914251942498, "grad_norm": 1.0200125799875133, "learning_rate": 3.271713515411961e-08, "loss": 0.1525, "step": 51068 }, { "epoch": 0.8877088077317526, "grad_norm": 1.0834422186736565, "learning_rate": 3.270712062739078e-08, "loss": 0.1198, "step": 51069 }, { "epoch": 0.8877261902692555, "grad_norm": 1.8960282064415608, "learning_rate": 3.269710758176575e-08, "loss": 0.1506, "step": 51070 }, { "epoch": 0.8877435728067583, "grad_norm": 0.8168784349547389, "learning_rate": 3.268709601727621e-08, "loss": 0.1103, "step": 51071 }, { "epoch": 0.8877609553442611, "grad_norm": 1.663646848217834, "learning_rate": 3.267708593395385e-08, "loss": 0.227, "step": 51072 }, { "epoch": 0.887778337881764, "grad_norm": 1.3848934964536475, "learning_rate": 3.266707733183055e-08, "loss": 0.1698, "step": 51073 }, { "epoch": 0.8877957204192668, "grad_norm": 0.6002876417549647, "learning_rate": 3.2657070210937775e-08, "loss": 0.1068, "step": 51074 }, { "epoch": 0.8878131029567696, "grad_norm": 1.4464634997020247, "learning_rate": 3.264706457130756e-08, "loss": 0.2182, "step": 51075 }, { "epoch": 0.8878304854942725, "grad_norm": 1.0119975213907506, "learning_rate": 3.263706041297143e-08, "loss": 0.0915, "step": 51076 }, { "epoch": 0.8878478680317753, "grad_norm": 1.928915105079923, "learning_rate": 3.262705773596108e-08, "loss": 0.1815, "step": 51077 }, { "epoch": 0.8878652505692781, "grad_norm": 2.670662132801185, "learning_rate": 3.261705654030833e-08, "loss": 0.2067, "step": 51078 }, { "epoch": 0.887882633106781, "grad_norm": 0.9827457721583294, "learning_rate": 3.260705682604481e-08, "loss": 0.166, "step": 51079 }, { "epoch": 0.8879000156442838, "grad_norm": 2.6213533382188894, "learning_rate": 3.259705859320222e-08, "loss": 0.1839, "step": 51080 }, { "epoch": 0.8879173981817866, "grad_norm": 1.1982346572845377, "learning_rate": 3.25870618418122e-08, "loss": 0.1493, "step": 51081 }, { "epoch": 0.8879347807192894, "grad_norm": 1.1339421316480789, "learning_rate": 3.257706657190645e-08, "loss": 0.1749, "step": 51082 }, { "epoch": 0.8879521632567923, "grad_norm": 1.4329950875945672, "learning_rate": 3.256707278351684e-08, "loss": 0.2006, "step": 51083 }, { "epoch": 0.8879695457942951, "grad_norm": 1.0347605917645948, "learning_rate": 3.255708047667477e-08, "loss": 0.1298, "step": 51084 }, { "epoch": 0.8879869283317979, "grad_norm": 1.4143190960462693, "learning_rate": 3.254708965141195e-08, "loss": 0.0952, "step": 51085 }, { "epoch": 0.8880043108693007, "grad_norm": 1.9443505828710914, "learning_rate": 3.2537100307760244e-08, "loss": 0.1746, "step": 51086 }, { "epoch": 0.8880216934068035, "grad_norm": 1.3335922850004274, "learning_rate": 3.252711244575118e-08, "loss": 0.1478, "step": 51087 }, { "epoch": 0.8880390759443063, "grad_norm": 1.1485210060684532, "learning_rate": 3.25171260654164e-08, "loss": 0.1428, "step": 51088 }, { "epoch": 0.8880564584818091, "grad_norm": 0.9281745735557811, "learning_rate": 3.250714116678765e-08, "loss": 0.124, "step": 51089 }, { "epoch": 0.888073841019312, "grad_norm": 1.659857572562468, "learning_rate": 3.2497157749896476e-08, "loss": 0.1627, "step": 51090 }, { "epoch": 0.8880912235568148, "grad_norm": 1.560196049222302, "learning_rate": 3.248717581477456e-08, "loss": 0.1982, "step": 51091 }, { "epoch": 0.8881086060943176, "grad_norm": 0.9592995567279843, "learning_rate": 3.247719536145355e-08, "loss": 0.1485, "step": 51092 }, { "epoch": 0.8881259886318205, "grad_norm": 1.2782766306867646, "learning_rate": 3.246721638996497e-08, "loss": 0.1141, "step": 51093 }, { "epoch": 0.8881433711693233, "grad_norm": 0.8772366004007708, "learning_rate": 3.2457238900340635e-08, "loss": 0.1403, "step": 51094 }, { "epoch": 0.8881607537068261, "grad_norm": 2.178631664583003, "learning_rate": 3.2447262892612124e-08, "loss": 0.2872, "step": 51095 }, { "epoch": 0.888178136244329, "grad_norm": 1.6507662117340551, "learning_rate": 3.243728836681098e-08, "loss": 0.1659, "step": 51096 }, { "epoch": 0.8881955187818318, "grad_norm": 1.7284837594054023, "learning_rate": 3.2427315322968773e-08, "loss": 0.2242, "step": 51097 }, { "epoch": 0.8882129013193346, "grad_norm": 0.9535449921162312, "learning_rate": 3.241734376111727e-08, "loss": 0.1242, "step": 51098 }, { "epoch": 0.8882302838568374, "grad_norm": 1.4752182240142508, "learning_rate": 3.2407373681287985e-08, "loss": 0.1171, "step": 51099 }, { "epoch": 0.8882476663943403, "grad_norm": 1.29698828395484, "learning_rate": 3.239740508351252e-08, "loss": 0.1948, "step": 51100 }, { "epoch": 0.8882650489318431, "grad_norm": 1.577614736335077, "learning_rate": 3.2387437967822454e-08, "loss": 0.1105, "step": 51101 }, { "epoch": 0.8882824314693459, "grad_norm": 1.4617415120661328, "learning_rate": 3.2377472334249475e-08, "loss": 0.231, "step": 51102 }, { "epoch": 0.8882998140068488, "grad_norm": 1.6755248651131032, "learning_rate": 3.236750818282508e-08, "loss": 0.2068, "step": 51103 }, { "epoch": 0.8883171965443516, "grad_norm": 1.2646045234063472, "learning_rate": 3.2357545513580886e-08, "loss": 0.2748, "step": 51104 }, { "epoch": 0.8883345790818543, "grad_norm": 1.2281620343459851, "learning_rate": 3.2347584326548326e-08, "loss": 0.1755, "step": 51105 }, { "epoch": 0.8883519616193571, "grad_norm": 1.9520110488125704, "learning_rate": 3.233762462175921e-08, "loss": 0.2383, "step": 51106 }, { "epoch": 0.88836934415686, "grad_norm": 1.2448282551305845, "learning_rate": 3.2327666399245e-08, "loss": 0.1507, "step": 51107 }, { "epoch": 0.8883867266943628, "grad_norm": 1.0983282215023529, "learning_rate": 3.231770965903724e-08, "loss": 0.1546, "step": 51108 }, { "epoch": 0.8884041092318656, "grad_norm": 0.9401815232610485, "learning_rate": 3.2307754401167564e-08, "loss": 0.1294, "step": 51109 }, { "epoch": 0.8884214917693685, "grad_norm": 1.1922157145346666, "learning_rate": 3.229780062566739e-08, "loss": 0.1532, "step": 51110 }, { "epoch": 0.8884388743068713, "grad_norm": 1.1777781249863148, "learning_rate": 3.228784833256837e-08, "loss": 0.2102, "step": 51111 }, { "epoch": 0.8884562568443741, "grad_norm": 0.9196055128756738, "learning_rate": 3.227789752190197e-08, "loss": 0.1351, "step": 51112 }, { "epoch": 0.888473639381877, "grad_norm": 1.7118984701189874, "learning_rate": 3.2267948193699823e-08, "loss": 0.1609, "step": 51113 }, { "epoch": 0.8884910219193798, "grad_norm": 1.7262767731867192, "learning_rate": 3.225800034799353e-08, "loss": 0.1468, "step": 51114 }, { "epoch": 0.8885084044568826, "grad_norm": 1.6525898772437397, "learning_rate": 3.2248053984814396e-08, "loss": 0.1445, "step": 51115 }, { "epoch": 0.8885257869943854, "grad_norm": 0.9732841045341973, "learning_rate": 3.2238109104194e-08, "loss": 0.1662, "step": 51116 }, { "epoch": 0.8885431695318883, "grad_norm": 1.6919810864388132, "learning_rate": 3.222816570616405e-08, "loss": 0.1628, "step": 51117 }, { "epoch": 0.8885605520693911, "grad_norm": 1.4140536472907494, "learning_rate": 3.221822379075584e-08, "loss": 0.1366, "step": 51118 }, { "epoch": 0.8885779346068939, "grad_norm": 1.3418398558099778, "learning_rate": 3.2208283358001076e-08, "loss": 0.1332, "step": 51119 }, { "epoch": 0.8885953171443968, "grad_norm": 1.1438274706283622, "learning_rate": 3.219834440793101e-08, "loss": 0.2456, "step": 51120 }, { "epoch": 0.8886126996818996, "grad_norm": 1.1856866485465556, "learning_rate": 3.2188406940577506e-08, "loss": 0.123, "step": 51121 }, { "epoch": 0.8886300822194024, "grad_norm": 1.7183369795318564, "learning_rate": 3.2178470955971816e-08, "loss": 0.1533, "step": 51122 }, { "epoch": 0.8886474647569053, "grad_norm": 1.378336946284977, "learning_rate": 3.216853645414541e-08, "loss": 0.1414, "step": 51123 }, { "epoch": 0.8886648472944081, "grad_norm": 1.3118697373864916, "learning_rate": 3.215860343512977e-08, "loss": 0.2084, "step": 51124 }, { "epoch": 0.8886822298319108, "grad_norm": 1.0068034145632478, "learning_rate": 3.2148671898956533e-08, "loss": 0.1053, "step": 51125 }, { "epoch": 0.8886996123694136, "grad_norm": 1.8544721785261602, "learning_rate": 3.213874184565712e-08, "loss": 0.2224, "step": 51126 }, { "epoch": 0.8887169949069165, "grad_norm": 1.4411667496940455, "learning_rate": 3.2128813275262956e-08, "loss": 0.1299, "step": 51127 }, { "epoch": 0.8887343774444193, "grad_norm": 1.2812736036946781, "learning_rate": 3.21188861878055e-08, "loss": 0.1489, "step": 51128 }, { "epoch": 0.8887517599819221, "grad_norm": 1.4724585780981319, "learning_rate": 3.21089605833163e-08, "loss": 0.1524, "step": 51129 }, { "epoch": 0.888769142519425, "grad_norm": 1.2542819257011548, "learning_rate": 3.2099036461826715e-08, "loss": 0.1737, "step": 51130 }, { "epoch": 0.8887865250569278, "grad_norm": 1.430246657122226, "learning_rate": 3.2089113823368266e-08, "loss": 0.1276, "step": 51131 }, { "epoch": 0.8888039075944306, "grad_norm": 1.2269490520351316, "learning_rate": 3.207919266797232e-08, "loss": 0.1395, "step": 51132 }, { "epoch": 0.8888212901319335, "grad_norm": 1.234568834276097, "learning_rate": 3.206927299567053e-08, "loss": 0.1301, "step": 51133 }, { "epoch": 0.8888386726694363, "grad_norm": 1.0263364821601058, "learning_rate": 3.205935480649413e-08, "loss": 0.1278, "step": 51134 }, { "epoch": 0.8888560552069391, "grad_norm": 1.1230290823228057, "learning_rate": 3.204943810047456e-08, "loss": 0.1817, "step": 51135 }, { "epoch": 0.8888734377444419, "grad_norm": 5.1074380675150595, "learning_rate": 3.203952287764339e-08, "loss": 0.2329, "step": 51136 }, { "epoch": 0.8888908202819448, "grad_norm": 2.0404251578892985, "learning_rate": 3.2029609138031934e-08, "loss": 0.2569, "step": 51137 }, { "epoch": 0.8889082028194476, "grad_norm": 2.4577409231316234, "learning_rate": 3.2019696881671665e-08, "loss": 0.1686, "step": 51138 }, { "epoch": 0.8889255853569504, "grad_norm": 1.1875132341423655, "learning_rate": 3.200978610859395e-08, "loss": 0.1383, "step": 51139 }, { "epoch": 0.8889429678944533, "grad_norm": 1.0533854073306785, "learning_rate": 3.199987681883026e-08, "loss": 0.197, "step": 51140 }, { "epoch": 0.8889603504319561, "grad_norm": 1.7613725769616488, "learning_rate": 3.198996901241202e-08, "loss": 0.1742, "step": 51141 }, { "epoch": 0.8889777329694589, "grad_norm": 1.2023169180928839, "learning_rate": 3.1980062689370535e-08, "loss": 0.116, "step": 51142 }, { "epoch": 0.8889951155069618, "grad_norm": 1.3303415624864545, "learning_rate": 3.197015784973722e-08, "loss": 0.1496, "step": 51143 }, { "epoch": 0.8890124980444646, "grad_norm": 3.7296642456648286, "learning_rate": 3.196025449354356e-08, "loss": 0.2061, "step": 51144 }, { "epoch": 0.8890298805819673, "grad_norm": 1.7897782261034167, "learning_rate": 3.195035262082091e-08, "loss": 0.1672, "step": 51145 }, { "epoch": 0.8890472631194701, "grad_norm": 1.7990205890390316, "learning_rate": 3.194045223160074e-08, "loss": 0.2262, "step": 51146 }, { "epoch": 0.889064645656973, "grad_norm": 1.1605067613726554, "learning_rate": 3.1930553325914154e-08, "loss": 0.1072, "step": 51147 }, { "epoch": 0.8890820281944758, "grad_norm": 0.8513721945347661, "learning_rate": 3.192065590379273e-08, "loss": 0.1961, "step": 51148 }, { "epoch": 0.8890994107319786, "grad_norm": 1.0659442484254888, "learning_rate": 3.1910759965267874e-08, "loss": 0.1195, "step": 51149 }, { "epoch": 0.8891167932694815, "grad_norm": 1.4914652981497034, "learning_rate": 3.190086551037086e-08, "loss": 0.113, "step": 51150 }, { "epoch": 0.8891341758069843, "grad_norm": 1.9496852702388976, "learning_rate": 3.189097253913298e-08, "loss": 0.2015, "step": 51151 }, { "epoch": 0.8891515583444871, "grad_norm": 1.2985678649365278, "learning_rate": 3.188108105158588e-08, "loss": 0.1944, "step": 51152 }, { "epoch": 0.8891689408819899, "grad_norm": 1.7908590405273916, "learning_rate": 3.1871191047760594e-08, "loss": 0.1843, "step": 51153 }, { "epoch": 0.8891863234194928, "grad_norm": 1.321210086815858, "learning_rate": 3.186130252768865e-08, "loss": 0.1104, "step": 51154 }, { "epoch": 0.8892037059569956, "grad_norm": 1.299679831208666, "learning_rate": 3.185141549140119e-08, "loss": 0.1087, "step": 51155 }, { "epoch": 0.8892210884944984, "grad_norm": 1.6982840371306718, "learning_rate": 3.18415299389298e-08, "loss": 0.2145, "step": 51156 }, { "epoch": 0.8892384710320013, "grad_norm": 1.7810191827740052, "learning_rate": 3.183164587030573e-08, "loss": 0.1613, "step": 51157 }, { "epoch": 0.8892558535695041, "grad_norm": 1.7291147482872178, "learning_rate": 3.182176328556024e-08, "loss": 0.2395, "step": 51158 }, { "epoch": 0.8892732361070069, "grad_norm": 2.245682279324769, "learning_rate": 3.181188218472469e-08, "loss": 0.1959, "step": 51159 }, { "epoch": 0.8892906186445098, "grad_norm": 0.980656085960826, "learning_rate": 3.180200256783044e-08, "loss": 0.1803, "step": 51160 }, { "epoch": 0.8893080011820126, "grad_norm": 1.5036653210795197, "learning_rate": 3.1792124434908753e-08, "loss": 0.2161, "step": 51161 }, { "epoch": 0.8893253837195154, "grad_norm": 1.1875195062353237, "learning_rate": 3.178224778599098e-08, "loss": 0.1526, "step": 51162 }, { "epoch": 0.8893427662570182, "grad_norm": 3.1078904789850337, "learning_rate": 3.1772372621108324e-08, "loss": 0.1528, "step": 51163 }, { "epoch": 0.8893601487945211, "grad_norm": 1.512538282475821, "learning_rate": 3.1762498940292205e-08, "loss": 0.1993, "step": 51164 }, { "epoch": 0.8893775313320238, "grad_norm": 1.07636417742584, "learning_rate": 3.175262674357399e-08, "loss": 0.1611, "step": 51165 }, { "epoch": 0.8893949138695266, "grad_norm": 1.9894362647250539, "learning_rate": 3.174275603098464e-08, "loss": 0.3317, "step": 51166 }, { "epoch": 0.8894122964070295, "grad_norm": 1.0821357010924642, "learning_rate": 3.173288680255576e-08, "loss": 0.1426, "step": 51167 }, { "epoch": 0.8894296789445323, "grad_norm": 1.1337656501821254, "learning_rate": 3.172301905831853e-08, "loss": 0.1797, "step": 51168 }, { "epoch": 0.8894470614820351, "grad_norm": 2.2783657823164427, "learning_rate": 3.1713152798304223e-08, "loss": 0.2125, "step": 51169 }, { "epoch": 0.889464444019538, "grad_norm": 0.9824590666031356, "learning_rate": 3.170328802254413e-08, "loss": 0.148, "step": 51170 }, { "epoch": 0.8894818265570408, "grad_norm": 1.0848437086255545, "learning_rate": 3.169342473106945e-08, "loss": 0.1245, "step": 51171 }, { "epoch": 0.8894992090945436, "grad_norm": 1.14453689636546, "learning_rate": 3.16835629239115e-08, "loss": 0.105, "step": 51172 }, { "epoch": 0.8895165916320464, "grad_norm": 0.8156680086920108, "learning_rate": 3.1673702601101524e-08, "loss": 0.128, "step": 51173 }, { "epoch": 0.8895339741695493, "grad_norm": 0.924672483271566, "learning_rate": 3.1663843762670727e-08, "loss": 0.1539, "step": 51174 }, { "epoch": 0.8895513567070521, "grad_norm": 0.6051155318704488, "learning_rate": 3.1653986408650465e-08, "loss": 0.1209, "step": 51175 }, { "epoch": 0.8895687392445549, "grad_norm": 2.262742873471375, "learning_rate": 3.164413053907195e-08, "loss": 0.1735, "step": 51176 }, { "epoch": 0.8895861217820578, "grad_norm": 1.8117466246969585, "learning_rate": 3.163427615396641e-08, "loss": 0.2137, "step": 51177 }, { "epoch": 0.8896035043195606, "grad_norm": 3.479412326611943, "learning_rate": 3.162442325336501e-08, "loss": 0.1714, "step": 51178 }, { "epoch": 0.8896208868570634, "grad_norm": 1.1947884987889754, "learning_rate": 3.16145718372991e-08, "loss": 0.1103, "step": 51179 }, { "epoch": 0.8896382693945663, "grad_norm": 1.8192288987994594, "learning_rate": 3.160472190579977e-08, "loss": 0.1172, "step": 51180 }, { "epoch": 0.8896556519320691, "grad_norm": 1.1892027266557352, "learning_rate": 3.159487345889833e-08, "loss": 0.1347, "step": 51181 }, { "epoch": 0.8896730344695719, "grad_norm": 1.5909297104215059, "learning_rate": 3.158502649662592e-08, "loss": 0.1433, "step": 51182 }, { "epoch": 0.8896904170070747, "grad_norm": 1.3898920054621393, "learning_rate": 3.157518101901385e-08, "loss": 0.1523, "step": 51183 }, { "epoch": 0.8897077995445776, "grad_norm": 1.4532411789610091, "learning_rate": 3.1565337026093376e-08, "loss": 0.1995, "step": 51184 }, { "epoch": 0.8897251820820803, "grad_norm": 1.276767661683144, "learning_rate": 3.155549451789558e-08, "loss": 0.1893, "step": 51185 }, { "epoch": 0.8897425646195831, "grad_norm": 1.3844730140888395, "learning_rate": 3.154565349445154e-08, "loss": 0.0886, "step": 51186 }, { "epoch": 0.889759947157086, "grad_norm": 1.5502692497091108, "learning_rate": 3.1535813955792635e-08, "loss": 0.1402, "step": 51187 }, { "epoch": 0.8897773296945888, "grad_norm": 1.0069732654994108, "learning_rate": 3.152597590195005e-08, "loss": 0.1228, "step": 51188 }, { "epoch": 0.8897947122320916, "grad_norm": 1.5530435449827165, "learning_rate": 3.151613933295494e-08, "loss": 0.182, "step": 51189 }, { "epoch": 0.8898120947695944, "grad_norm": 0.7642844874351645, "learning_rate": 3.1506304248838436e-08, "loss": 0.2042, "step": 51190 }, { "epoch": 0.8898294773070973, "grad_norm": 1.37523569341872, "learning_rate": 3.149647064963174e-08, "loss": 0.2259, "step": 51191 }, { "epoch": 0.8898468598446001, "grad_norm": 2.2461505065823766, "learning_rate": 3.148663853536598e-08, "loss": 0.2285, "step": 51192 }, { "epoch": 0.8898642423821029, "grad_norm": 1.002104963093371, "learning_rate": 3.1476807906072433e-08, "loss": 0.1438, "step": 51193 }, { "epoch": 0.8898816249196058, "grad_norm": 1.595512836074907, "learning_rate": 3.146697876178206e-08, "loss": 0.1791, "step": 51194 }, { "epoch": 0.8898990074571086, "grad_norm": 1.0564256934346659, "learning_rate": 3.1457151102526225e-08, "loss": 0.1884, "step": 51195 }, { "epoch": 0.8899163899946114, "grad_norm": 0.8300249799997105, "learning_rate": 3.1447324928336014e-08, "loss": 0.1285, "step": 51196 }, { "epoch": 0.8899337725321143, "grad_norm": 1.2388354711235823, "learning_rate": 3.143750023924252e-08, "loss": 0.1886, "step": 51197 }, { "epoch": 0.8899511550696171, "grad_norm": 0.7865718791747837, "learning_rate": 3.142767703527688e-08, "loss": 0.2188, "step": 51198 }, { "epoch": 0.8899685376071199, "grad_norm": 0.7828953806691862, "learning_rate": 3.141785531647029e-08, "loss": 0.1107, "step": 51199 }, { "epoch": 0.8899859201446227, "grad_norm": 1.5969994002155528, "learning_rate": 3.140803508285389e-08, "loss": 0.1276, "step": 51200 }, { "epoch": 0.8900033026821256, "grad_norm": 1.1451104172205882, "learning_rate": 3.139821633445866e-08, "loss": 0.1997, "step": 51201 }, { "epoch": 0.8900206852196284, "grad_norm": 1.1752648183295908, "learning_rate": 3.1388399071315854e-08, "loss": 0.1537, "step": 51202 }, { "epoch": 0.8900380677571312, "grad_norm": 1.5144628319295548, "learning_rate": 3.137858329345672e-08, "loss": 0.1729, "step": 51203 }, { "epoch": 0.8900554502946341, "grad_norm": 1.8337532605896998, "learning_rate": 3.1368769000912066e-08, "loss": 0.2287, "step": 51204 }, { "epoch": 0.8900728328321368, "grad_norm": 1.8935686435419048, "learning_rate": 3.1358956193713096e-08, "loss": 0.2066, "step": 51205 }, { "epoch": 0.8900902153696396, "grad_norm": 1.3540712913867552, "learning_rate": 3.134914487189105e-08, "loss": 0.1591, "step": 51206 }, { "epoch": 0.8901075979071424, "grad_norm": 1.2986390347190895, "learning_rate": 3.133933503547692e-08, "loss": 0.1474, "step": 51207 }, { "epoch": 0.8901249804446453, "grad_norm": 2.0416067703762986, "learning_rate": 3.132952668450178e-08, "loss": 0.1901, "step": 51208 }, { "epoch": 0.8901423629821481, "grad_norm": 1.6298151095228728, "learning_rate": 3.1319719818996784e-08, "loss": 0.1341, "step": 51209 }, { "epoch": 0.8901597455196509, "grad_norm": 1.5261859035623946, "learning_rate": 3.1309914438993e-08, "loss": 0.2335, "step": 51210 }, { "epoch": 0.8901771280571538, "grad_norm": 1.1378510061366431, "learning_rate": 3.130011054452142e-08, "loss": 0.1218, "step": 51211 }, { "epoch": 0.8901945105946566, "grad_norm": 2.2377971557885, "learning_rate": 3.129030813561323e-08, "loss": 0.1683, "step": 51212 }, { "epoch": 0.8902118931321594, "grad_norm": 1.2385288463813955, "learning_rate": 3.128050721229941e-08, "loss": 0.1876, "step": 51213 }, { "epoch": 0.8902292756696623, "grad_norm": 1.324212180912453, "learning_rate": 3.1270707774611106e-08, "loss": 0.1284, "step": 51214 }, { "epoch": 0.8902466582071651, "grad_norm": 1.404595773211717, "learning_rate": 3.126090982257945e-08, "loss": 0.2221, "step": 51215 }, { "epoch": 0.8902640407446679, "grad_norm": 1.171578394732169, "learning_rate": 3.1251113356235203e-08, "loss": 0.1664, "step": 51216 }, { "epoch": 0.8902814232821707, "grad_norm": 2.483147104784511, "learning_rate": 3.124131837560967e-08, "loss": 0.1865, "step": 51217 }, { "epoch": 0.8902988058196736, "grad_norm": 0.8779985740575229, "learning_rate": 3.1231524880733885e-08, "loss": 0.1788, "step": 51218 }, { "epoch": 0.8903161883571764, "grad_norm": 2.02019681536391, "learning_rate": 3.122173287163876e-08, "loss": 0.2431, "step": 51219 }, { "epoch": 0.8903335708946792, "grad_norm": 1.0900004720680296, "learning_rate": 3.1211942348355445e-08, "loss": 0.1833, "step": 51220 }, { "epoch": 0.8903509534321821, "grad_norm": 1.7335787732223127, "learning_rate": 3.120215331091486e-08, "loss": 0.1674, "step": 51221 }, { "epoch": 0.8903683359696849, "grad_norm": 1.2864319156451585, "learning_rate": 3.119236575934825e-08, "loss": 0.1511, "step": 51222 }, { "epoch": 0.8903857185071877, "grad_norm": 1.365910222702365, "learning_rate": 3.118257969368637e-08, "loss": 0.166, "step": 51223 }, { "epoch": 0.8904031010446906, "grad_norm": 2.3446507802411185, "learning_rate": 3.117279511396032e-08, "loss": 0.1639, "step": 51224 }, { "epoch": 0.8904204835821933, "grad_norm": 1.496987113388688, "learning_rate": 3.1163012020201216e-08, "loss": 0.1516, "step": 51225 }, { "epoch": 0.8904378661196961, "grad_norm": 1.258741160161482, "learning_rate": 3.1153230412440004e-08, "loss": 0.1367, "step": 51226 }, { "epoch": 0.8904552486571989, "grad_norm": 2.0585709895143136, "learning_rate": 3.114345029070764e-08, "loss": 0.2237, "step": 51227 }, { "epoch": 0.8904726311947018, "grad_norm": 1.1099399470887474, "learning_rate": 3.113367165503522e-08, "loss": 0.1814, "step": 51228 }, { "epoch": 0.8904900137322046, "grad_norm": 1.8195114664802734, "learning_rate": 3.1123894505453664e-08, "loss": 0.1262, "step": 51229 }, { "epoch": 0.8905073962697074, "grad_norm": 1.3581978621973978, "learning_rate": 3.111411884199394e-08, "loss": 0.1615, "step": 51230 }, { "epoch": 0.8905247788072103, "grad_norm": 0.8057138745479318, "learning_rate": 3.1104344664687144e-08, "loss": 0.1135, "step": 51231 }, { "epoch": 0.8905421613447131, "grad_norm": 2.7226897816452094, "learning_rate": 3.109457197356402e-08, "loss": 0.2558, "step": 51232 }, { "epoch": 0.8905595438822159, "grad_norm": 0.6117094286912077, "learning_rate": 3.108480076865583e-08, "loss": 0.1378, "step": 51233 }, { "epoch": 0.8905769264197188, "grad_norm": 1.4505218162677047, "learning_rate": 3.107503104999354e-08, "loss": 0.1416, "step": 51234 }, { "epoch": 0.8905943089572216, "grad_norm": 1.1678068761003066, "learning_rate": 3.1065262817607855e-08, "loss": 0.1218, "step": 51235 }, { "epoch": 0.8906116914947244, "grad_norm": 2.1689367632089, "learning_rate": 3.10554960715298e-08, "loss": 0.1596, "step": 51236 }, { "epoch": 0.8906290740322272, "grad_norm": 1.6033549686363096, "learning_rate": 3.104573081179052e-08, "loss": 0.1491, "step": 51237 }, { "epoch": 0.8906464565697301, "grad_norm": 1.2951752290663867, "learning_rate": 3.103596703842082e-08, "loss": 0.1439, "step": 51238 }, { "epoch": 0.8906638391072329, "grad_norm": 1.1376918853296005, "learning_rate": 3.102620475145168e-08, "loss": 0.1738, "step": 51239 }, { "epoch": 0.8906812216447357, "grad_norm": 2.3451164846285057, "learning_rate": 3.1016443950913965e-08, "loss": 0.175, "step": 51240 }, { "epoch": 0.8906986041822386, "grad_norm": 1.235197324925316, "learning_rate": 3.100668463683886e-08, "loss": 0.1214, "step": 51241 }, { "epoch": 0.8907159867197414, "grad_norm": 1.7008155007848629, "learning_rate": 3.099692680925703e-08, "loss": 0.1177, "step": 51242 }, { "epoch": 0.8907333692572442, "grad_norm": 1.8856371385962087, "learning_rate": 3.098717046819954e-08, "loss": 0.1341, "step": 51243 }, { "epoch": 0.8907507517947469, "grad_norm": 1.6106888272817754, "learning_rate": 3.097741561369716e-08, "loss": 0.1568, "step": 51244 }, { "epoch": 0.8907681343322498, "grad_norm": 1.4449705924531793, "learning_rate": 3.096766224578101e-08, "loss": 0.1877, "step": 51245 }, { "epoch": 0.8907855168697526, "grad_norm": 1.2289513808850023, "learning_rate": 3.095791036448192e-08, "loss": 0.1064, "step": 51246 }, { "epoch": 0.8908028994072554, "grad_norm": 1.9345905018276537, "learning_rate": 3.09481599698308e-08, "loss": 0.1057, "step": 51247 }, { "epoch": 0.8908202819447583, "grad_norm": 2.207163516550298, "learning_rate": 3.0938411061858516e-08, "loss": 0.1505, "step": 51248 }, { "epoch": 0.8908376644822611, "grad_norm": 1.2383838384819235, "learning_rate": 3.092866364059604e-08, "loss": 0.131, "step": 51249 }, { "epoch": 0.8908550470197639, "grad_norm": 1.5318356636764012, "learning_rate": 3.091891770607419e-08, "loss": 0.1937, "step": 51250 }, { "epoch": 0.8908724295572668, "grad_norm": 2.7013112882934887, "learning_rate": 3.090917325832393e-08, "loss": 0.1528, "step": 51251 }, { "epoch": 0.8908898120947696, "grad_norm": 1.8341509608455364, "learning_rate": 3.089943029737596e-08, "loss": 0.1888, "step": 51252 }, { "epoch": 0.8909071946322724, "grad_norm": 1.3792435971983399, "learning_rate": 3.088968882326154e-08, "loss": 0.1465, "step": 51253 }, { "epoch": 0.8909245771697752, "grad_norm": 2.430523981489641, "learning_rate": 3.0879948836011195e-08, "loss": 0.2208, "step": 51254 }, { "epoch": 0.8909419597072781, "grad_norm": 1.739448461517126, "learning_rate": 3.087021033565584e-08, "loss": 0.1104, "step": 51255 }, { "epoch": 0.8909593422447809, "grad_norm": 2.7246091572816544, "learning_rate": 3.086047332222652e-08, "loss": 0.1563, "step": 51256 }, { "epoch": 0.8909767247822837, "grad_norm": 1.3581441694368188, "learning_rate": 3.085073779575398e-08, "loss": 0.141, "step": 51257 }, { "epoch": 0.8909941073197866, "grad_norm": 1.3534897620721154, "learning_rate": 3.0841003756269134e-08, "loss": 0.1264, "step": 51258 }, { "epoch": 0.8910114898572894, "grad_norm": 2.257136884187673, "learning_rate": 3.083127120380274e-08, "loss": 0.1655, "step": 51259 }, { "epoch": 0.8910288723947922, "grad_norm": 1.151484536411594, "learning_rate": 3.082154013838567e-08, "loss": 0.2479, "step": 51260 }, { "epoch": 0.8910462549322951, "grad_norm": 1.348042589312056, "learning_rate": 3.081181056004889e-08, "loss": 0.119, "step": 51261 }, { "epoch": 0.8910636374697979, "grad_norm": 0.9831653794740852, "learning_rate": 3.080208246882304e-08, "loss": 0.1143, "step": 51262 }, { "epoch": 0.8910810200073007, "grad_norm": 1.1232615920423732, "learning_rate": 3.079235586473905e-08, "loss": 0.1227, "step": 51263 }, { "epoch": 0.8910984025448034, "grad_norm": 1.6999598471590178, "learning_rate": 3.078263074782783e-08, "loss": 0.2375, "step": 51264 }, { "epoch": 0.8911157850823063, "grad_norm": 1.2815978639599248, "learning_rate": 3.077290711812008e-08, "loss": 0.1561, "step": 51265 }, { "epoch": 0.8911331676198091, "grad_norm": 1.197580712567668, "learning_rate": 3.076318497564678e-08, "loss": 0.1474, "step": 51266 }, { "epoch": 0.8911505501573119, "grad_norm": 0.7663191431110375, "learning_rate": 3.075346432043846e-08, "loss": 0.1148, "step": 51267 }, { "epoch": 0.8911679326948148, "grad_norm": 1.1464976452321687, "learning_rate": 3.0743745152526204e-08, "loss": 0.1187, "step": 51268 }, { "epoch": 0.8911853152323176, "grad_norm": 1.7120695745550694, "learning_rate": 3.0734027471940706e-08, "loss": 0.1495, "step": 51269 }, { "epoch": 0.8912026977698204, "grad_norm": 1.0236634740491184, "learning_rate": 3.072431127871278e-08, "loss": 0.0898, "step": 51270 }, { "epoch": 0.8912200803073232, "grad_norm": 1.22335471186035, "learning_rate": 3.0714596572873175e-08, "loss": 0.1256, "step": 51271 }, { "epoch": 0.8912374628448261, "grad_norm": 1.2871933732986187, "learning_rate": 3.070488335445282e-08, "loss": 0.1368, "step": 51272 }, { "epoch": 0.8912548453823289, "grad_norm": 1.4334537655692499, "learning_rate": 3.0695171623482393e-08, "loss": 0.1073, "step": 51273 }, { "epoch": 0.8912722279198317, "grad_norm": 1.2739312122419342, "learning_rate": 3.068546137999267e-08, "loss": 0.1894, "step": 51274 }, { "epoch": 0.8912896104573346, "grad_norm": 1.2575945099928119, "learning_rate": 3.067575262401434e-08, "loss": 0.1342, "step": 51275 }, { "epoch": 0.8913069929948374, "grad_norm": 2.883992081896337, "learning_rate": 3.066604535557837e-08, "loss": 0.1483, "step": 51276 }, { "epoch": 0.8913243755323402, "grad_norm": 0.9633657883495149, "learning_rate": 3.0656339574715473e-08, "loss": 0.1361, "step": 51277 }, { "epoch": 0.8913417580698431, "grad_norm": 1.0223330271422122, "learning_rate": 3.064663528145633e-08, "loss": 0.1409, "step": 51278 }, { "epoch": 0.8913591406073459, "grad_norm": 1.527259946531643, "learning_rate": 3.0636932475831826e-08, "loss": 0.2087, "step": 51279 }, { "epoch": 0.8913765231448487, "grad_norm": 1.2856717472589152, "learning_rate": 3.062723115787258e-08, "loss": 0.1312, "step": 51280 }, { "epoch": 0.8913939056823516, "grad_norm": 1.1775999588923207, "learning_rate": 3.061753132760941e-08, "loss": 0.1827, "step": 51281 }, { "epoch": 0.8914112882198544, "grad_norm": 1.2805926219441799, "learning_rate": 3.0607832985073024e-08, "loss": 0.163, "step": 51282 }, { "epoch": 0.8914286707573572, "grad_norm": 2.0070197697814582, "learning_rate": 3.059813613029416e-08, "loss": 0.1739, "step": 51283 }, { "epoch": 0.8914460532948599, "grad_norm": 1.6777864774230284, "learning_rate": 3.058844076330364e-08, "loss": 0.1826, "step": 51284 }, { "epoch": 0.8914634358323628, "grad_norm": 1.6565609044858942, "learning_rate": 3.05787468841322e-08, "loss": 0.1784, "step": 51285 }, { "epoch": 0.8914808183698656, "grad_norm": 1.56190416577954, "learning_rate": 3.056905449281033e-08, "loss": 0.1532, "step": 51286 }, { "epoch": 0.8914982009073684, "grad_norm": 1.1859309227092167, "learning_rate": 3.0559363589369e-08, "loss": 0.1536, "step": 51287 }, { "epoch": 0.8915155834448713, "grad_norm": 1.375420684280772, "learning_rate": 3.0549674173838915e-08, "loss": 0.1291, "step": 51288 }, { "epoch": 0.8915329659823741, "grad_norm": 2.403429419748559, "learning_rate": 3.0539986246250646e-08, "loss": 0.1418, "step": 51289 }, { "epoch": 0.8915503485198769, "grad_norm": 1.5236173989410298, "learning_rate": 3.0530299806634964e-08, "loss": 0.2004, "step": 51290 }, { "epoch": 0.8915677310573797, "grad_norm": 0.9047912147539774, "learning_rate": 3.052061485502261e-08, "loss": 0.0951, "step": 51291 }, { "epoch": 0.8915851135948826, "grad_norm": 1.4060576539763205, "learning_rate": 3.051093139144423e-08, "loss": 0.1043, "step": 51292 }, { "epoch": 0.8916024961323854, "grad_norm": 1.4729769051539103, "learning_rate": 3.050124941593052e-08, "loss": 0.1052, "step": 51293 }, { "epoch": 0.8916198786698882, "grad_norm": 1.1512527759633608, "learning_rate": 3.049156892851207e-08, "loss": 0.0938, "step": 51294 }, { "epoch": 0.8916372612073911, "grad_norm": 1.5416736990817317, "learning_rate": 3.0481889929219795e-08, "loss": 0.1522, "step": 51295 }, { "epoch": 0.8916546437448939, "grad_norm": 1.5922042194976782, "learning_rate": 3.047221241808423e-08, "loss": 0.1661, "step": 51296 }, { "epoch": 0.8916720262823967, "grad_norm": 1.4095964514294292, "learning_rate": 3.046253639513613e-08, "loss": 0.156, "step": 51297 }, { "epoch": 0.8916894088198996, "grad_norm": 1.1743005506748827, "learning_rate": 3.045286186040602e-08, "loss": 0.1325, "step": 51298 }, { "epoch": 0.8917067913574024, "grad_norm": 1.3232433982432807, "learning_rate": 3.0443188813924716e-08, "loss": 0.1748, "step": 51299 }, { "epoch": 0.8917241738949052, "grad_norm": 1.0290340111763556, "learning_rate": 3.043351725572274e-08, "loss": 0.1434, "step": 51300 }, { "epoch": 0.891741556432408, "grad_norm": 2.414011673375563, "learning_rate": 3.042384718583085e-08, "loss": 0.1812, "step": 51301 }, { "epoch": 0.8917589389699109, "grad_norm": 3.5460344283910827, "learning_rate": 3.0414178604279585e-08, "loss": 0.1526, "step": 51302 }, { "epoch": 0.8917763215074137, "grad_norm": 0.974938450436226, "learning_rate": 3.040451151109974e-08, "loss": 0.1115, "step": 51303 }, { "epoch": 0.8917937040449164, "grad_norm": 1.0391819775741902, "learning_rate": 3.039484590632191e-08, "loss": 0.1343, "step": 51304 }, { "epoch": 0.8918110865824193, "grad_norm": 1.6188218035851354, "learning_rate": 3.038518178997657e-08, "loss": 0.141, "step": 51305 }, { "epoch": 0.8918284691199221, "grad_norm": 1.1139396856216488, "learning_rate": 3.037551916209457e-08, "loss": 0.1111, "step": 51306 }, { "epoch": 0.8918458516574249, "grad_norm": 1.8081877841655434, "learning_rate": 3.036585802270647e-08, "loss": 0.1509, "step": 51307 }, { "epoch": 0.8918632341949277, "grad_norm": 1.445036134979162, "learning_rate": 3.035619837184283e-08, "loss": 0.1369, "step": 51308 }, { "epoch": 0.8918806167324306, "grad_norm": 1.5376782475475532, "learning_rate": 3.034654020953431e-08, "loss": 0.1847, "step": 51309 }, { "epoch": 0.8918979992699334, "grad_norm": 1.4881073726269243, "learning_rate": 3.0336883535811544e-08, "loss": 0.2053, "step": 51310 }, { "epoch": 0.8919153818074362, "grad_norm": 3.8503273245464733, "learning_rate": 3.0327228350705115e-08, "loss": 0.1926, "step": 51311 }, { "epoch": 0.8919327643449391, "grad_norm": 1.81566588199513, "learning_rate": 3.031757465424556e-08, "loss": 0.1133, "step": 51312 }, { "epoch": 0.8919501468824419, "grad_norm": 1.2347631425800998, "learning_rate": 3.030792244646352e-08, "loss": 0.152, "step": 51313 }, { "epoch": 0.8919675294199447, "grad_norm": 1.0487691128717136, "learning_rate": 3.029827172738964e-08, "loss": 0.0959, "step": 51314 }, { "epoch": 0.8919849119574476, "grad_norm": 1.3274910122495978, "learning_rate": 3.02886224970545e-08, "loss": 0.1098, "step": 51315 }, { "epoch": 0.8920022944949504, "grad_norm": 0.8403258669879463, "learning_rate": 3.0278974755488685e-08, "loss": 0.1195, "step": 51316 }, { "epoch": 0.8920196770324532, "grad_norm": 1.1147811355040922, "learning_rate": 3.026932850272273e-08, "loss": 0.1672, "step": 51317 }, { "epoch": 0.892037059569956, "grad_norm": 2.1717253236462657, "learning_rate": 3.025968373878718e-08, "loss": 0.1352, "step": 51318 }, { "epoch": 0.8920544421074589, "grad_norm": 1.2411100238626, "learning_rate": 3.025004046371271e-08, "loss": 0.0989, "step": 51319 }, { "epoch": 0.8920718246449617, "grad_norm": 1.4753714213026914, "learning_rate": 3.024039867752981e-08, "loss": 0.1202, "step": 51320 }, { "epoch": 0.8920892071824645, "grad_norm": 0.950999418213905, "learning_rate": 3.0230758380269006e-08, "loss": 0.0714, "step": 51321 }, { "epoch": 0.8921065897199674, "grad_norm": 1.6560237862514688, "learning_rate": 3.022111957196094e-08, "loss": 0.1491, "step": 51322 }, { "epoch": 0.8921239722574702, "grad_norm": 1.2462697866212107, "learning_rate": 3.02114822526362e-08, "loss": 0.1484, "step": 51323 }, { "epoch": 0.8921413547949729, "grad_norm": 1.1009764179913795, "learning_rate": 3.0201846422325206e-08, "loss": 0.1626, "step": 51324 }, { "epoch": 0.8921587373324757, "grad_norm": 1.0967744648840607, "learning_rate": 3.019221208105843e-08, "loss": 0.1283, "step": 51325 }, { "epoch": 0.8921761198699786, "grad_norm": 1.0113899349112037, "learning_rate": 3.0182579228866636e-08, "loss": 0.109, "step": 51326 }, { "epoch": 0.8921935024074814, "grad_norm": 1.8083792925437678, "learning_rate": 3.017294786578028e-08, "loss": 0.14, "step": 51327 }, { "epoch": 0.8922108849449842, "grad_norm": 1.3901022904303517, "learning_rate": 3.0163317991829804e-08, "loss": 0.1454, "step": 51328 }, { "epoch": 0.8922282674824871, "grad_norm": 0.890124071641922, "learning_rate": 3.015368960704584e-08, "loss": 0.1006, "step": 51329 }, { "epoch": 0.8922456500199899, "grad_norm": 1.0751374398502949, "learning_rate": 3.014406271145881e-08, "loss": 0.1023, "step": 51330 }, { "epoch": 0.8922630325574927, "grad_norm": 1.4152950077687034, "learning_rate": 3.013443730509929e-08, "loss": 0.1123, "step": 51331 }, { "epoch": 0.8922804150949956, "grad_norm": 1.0159121061905323, "learning_rate": 3.012481338799777e-08, "loss": 0.095, "step": 51332 }, { "epoch": 0.8922977976324984, "grad_norm": 1.147647336888478, "learning_rate": 3.0115190960184614e-08, "loss": 0.0658, "step": 51333 }, { "epoch": 0.8923151801700012, "grad_norm": 1.8215030429611732, "learning_rate": 3.010557002169062e-08, "loss": 0.1695, "step": 51334 }, { "epoch": 0.892332562707504, "grad_norm": 1.268237088031751, "learning_rate": 3.00959505725461e-08, "loss": 0.1324, "step": 51335 }, { "epoch": 0.8923499452450069, "grad_norm": 2.1561206195698217, "learning_rate": 3.008633261278148e-08, "loss": 0.1829, "step": 51336 }, { "epoch": 0.8923673277825097, "grad_norm": 1.9709985743278813, "learning_rate": 3.007671614242735e-08, "loss": 0.1352, "step": 51337 }, { "epoch": 0.8923847103200125, "grad_norm": 1.0520321176163965, "learning_rate": 3.006710116151423e-08, "loss": 0.1028, "step": 51338 }, { "epoch": 0.8924020928575154, "grad_norm": 1.3094513483650594, "learning_rate": 3.005748767007249e-08, "loss": 0.1569, "step": 51339 }, { "epoch": 0.8924194753950182, "grad_norm": 2.218036719739113, "learning_rate": 3.00478756681326e-08, "loss": 0.1292, "step": 51340 }, { "epoch": 0.892436857932521, "grad_norm": 1.4406742487613546, "learning_rate": 3.003826515572505e-08, "loss": 0.1638, "step": 51341 }, { "epoch": 0.8924542404700239, "grad_norm": 1.0923092982721285, "learning_rate": 3.002865613288047e-08, "loss": 0.1658, "step": 51342 }, { "epoch": 0.8924716230075267, "grad_norm": 1.2142748971496684, "learning_rate": 3.001904859962906e-08, "loss": 0.0944, "step": 51343 }, { "epoch": 0.8924890055450294, "grad_norm": 1.3754848030641715, "learning_rate": 3.00094425560013e-08, "loss": 0.1154, "step": 51344 }, { "epoch": 0.8925063880825322, "grad_norm": 2.0603734910127534, "learning_rate": 2.999983800202782e-08, "loss": 0.1429, "step": 51345 }, { "epoch": 0.8925237706200351, "grad_norm": 1.448163996692511, "learning_rate": 2.999023493773889e-08, "loss": 0.1441, "step": 51346 }, { "epoch": 0.8925411531575379, "grad_norm": 1.0028507523344699, "learning_rate": 2.998063336316503e-08, "loss": 0.1044, "step": 51347 }, { "epoch": 0.8925585356950407, "grad_norm": 1.1958440630924487, "learning_rate": 2.997103327833672e-08, "loss": 0.1134, "step": 51348 }, { "epoch": 0.8925759182325436, "grad_norm": 1.4405263884835158, "learning_rate": 2.996143468328427e-08, "loss": 0.1938, "step": 51349 }, { "epoch": 0.8925933007700464, "grad_norm": 3.8800139418690662, "learning_rate": 2.995183757803815e-08, "loss": 0.1893, "step": 51350 }, { "epoch": 0.8926106833075492, "grad_norm": 2.7552937750307622, "learning_rate": 2.994224196262879e-08, "loss": 0.1321, "step": 51351 }, { "epoch": 0.892628065845052, "grad_norm": 1.542722108905292, "learning_rate": 2.993264783708649e-08, "loss": 0.1335, "step": 51352 }, { "epoch": 0.8926454483825549, "grad_norm": 1.1685080607519573, "learning_rate": 2.9923055201441895e-08, "loss": 0.1329, "step": 51353 }, { "epoch": 0.8926628309200577, "grad_norm": 1.0132044469212975, "learning_rate": 2.991346405572531e-08, "loss": 0.1019, "step": 51354 }, { "epoch": 0.8926802134575605, "grad_norm": 1.3310039379530616, "learning_rate": 2.990387439996705e-08, "loss": 0.0757, "step": 51355 }, { "epoch": 0.8926975959950634, "grad_norm": 1.168630220108673, "learning_rate": 2.9894286234197484e-08, "loss": 0.102, "step": 51356 }, { "epoch": 0.8927149785325662, "grad_norm": 1.1112557663712017, "learning_rate": 2.9884699558447134e-08, "loss": 0.1646, "step": 51357 }, { "epoch": 0.892732361070069, "grad_norm": 0.9247531984321122, "learning_rate": 2.987511437274637e-08, "loss": 0.2294, "step": 51358 }, { "epoch": 0.8927497436075719, "grad_norm": 1.5718742937123895, "learning_rate": 2.9865530677125497e-08, "loss": 0.1346, "step": 51359 }, { "epoch": 0.8927671261450747, "grad_norm": 1.8418305645824968, "learning_rate": 2.985594847161488e-08, "loss": 0.2135, "step": 51360 }, { "epoch": 0.8927845086825775, "grad_norm": 1.9962588215531243, "learning_rate": 2.9846367756245106e-08, "loss": 0.1373, "step": 51361 }, { "epoch": 0.8928018912200804, "grad_norm": 2.439436561998854, "learning_rate": 2.9836788531046264e-08, "loss": 0.1804, "step": 51362 }, { "epoch": 0.8928192737575832, "grad_norm": 1.6889601923980841, "learning_rate": 2.982721079604888e-08, "loss": 0.1199, "step": 51363 }, { "epoch": 0.8928366562950859, "grad_norm": 2.135927191136643, "learning_rate": 2.9817634551283114e-08, "loss": 0.1072, "step": 51364 }, { "epoch": 0.8928540388325887, "grad_norm": 0.8235156781996377, "learning_rate": 2.980805979677958e-08, "loss": 0.1696, "step": 51365 }, { "epoch": 0.8928714213700916, "grad_norm": 1.4488891003301538, "learning_rate": 2.97984865325685e-08, "loss": 0.1625, "step": 51366 }, { "epoch": 0.8928888039075944, "grad_norm": 1.474605951725312, "learning_rate": 2.978891475868023e-08, "loss": 0.1462, "step": 51367 }, { "epoch": 0.8929061864450972, "grad_norm": 1.3676356394828306, "learning_rate": 2.977934447514513e-08, "loss": 0.1147, "step": 51368 }, { "epoch": 0.8929235689826001, "grad_norm": 1.3404677315196003, "learning_rate": 2.976977568199346e-08, "loss": 0.1238, "step": 51369 }, { "epoch": 0.8929409515201029, "grad_norm": 1.5941259581690763, "learning_rate": 2.976020837925558e-08, "loss": 0.1448, "step": 51370 }, { "epoch": 0.8929583340576057, "grad_norm": 1.1641551129012062, "learning_rate": 2.975064256696186e-08, "loss": 0.2069, "step": 51371 }, { "epoch": 0.8929757165951085, "grad_norm": 3.972786445092464, "learning_rate": 2.9741078245142548e-08, "loss": 0.1988, "step": 51372 }, { "epoch": 0.8929930991326114, "grad_norm": 1.6100563167226913, "learning_rate": 2.9731515413828066e-08, "loss": 0.1937, "step": 51373 }, { "epoch": 0.8930104816701142, "grad_norm": 1.6088502180561963, "learning_rate": 2.9721954073048672e-08, "loss": 0.1384, "step": 51374 }, { "epoch": 0.893027864207617, "grad_norm": 1.3957449042459897, "learning_rate": 2.97123942228345e-08, "loss": 0.1438, "step": 51375 }, { "epoch": 0.8930452467451199, "grad_norm": 1.7025932941859354, "learning_rate": 2.9702835863216145e-08, "loss": 0.135, "step": 51376 }, { "epoch": 0.8930626292826227, "grad_norm": 0.959223936087882, "learning_rate": 2.9693278994223748e-08, "loss": 0.1122, "step": 51377 }, { "epoch": 0.8930800118201255, "grad_norm": 1.892724878015747, "learning_rate": 2.9683723615887556e-08, "loss": 0.2389, "step": 51378 }, { "epoch": 0.8930973943576284, "grad_norm": 0.7740871542986975, "learning_rate": 2.9674169728237995e-08, "loss": 0.1066, "step": 51379 }, { "epoch": 0.8931147768951312, "grad_norm": 2.8800808274350125, "learning_rate": 2.966461733130521e-08, "loss": 0.1744, "step": 51380 }, { "epoch": 0.893132159432634, "grad_norm": 1.076555496154693, "learning_rate": 2.9655066425119557e-08, "loss": 0.1435, "step": 51381 }, { "epoch": 0.8931495419701369, "grad_norm": 1.6396199253038528, "learning_rate": 2.9645517009711297e-08, "loss": 0.176, "step": 51382 }, { "epoch": 0.8931669245076396, "grad_norm": 1.0347366612007338, "learning_rate": 2.9635969085110623e-08, "loss": 0.1171, "step": 51383 }, { "epoch": 0.8931843070451424, "grad_norm": 1.086367538888424, "learning_rate": 2.9626422651347904e-08, "loss": 0.1186, "step": 51384 }, { "epoch": 0.8932016895826452, "grad_norm": 1.4194915917918796, "learning_rate": 2.9616877708453335e-08, "loss": 0.1021, "step": 51385 }, { "epoch": 0.8932190721201481, "grad_norm": 1.4489322790181636, "learning_rate": 2.9607334256457283e-08, "loss": 0.0941, "step": 51386 }, { "epoch": 0.8932364546576509, "grad_norm": 1.1972907989720512, "learning_rate": 2.9597792295389778e-08, "loss": 0.125, "step": 51387 }, { "epoch": 0.8932538371951537, "grad_norm": 1.386152549068137, "learning_rate": 2.958825182528124e-08, "loss": 0.1219, "step": 51388 }, { "epoch": 0.8932712197326566, "grad_norm": 1.930713312096441, "learning_rate": 2.9578712846161814e-08, "loss": 0.0943, "step": 51389 }, { "epoch": 0.8932886022701594, "grad_norm": 1.3921470318097138, "learning_rate": 2.9569175358061805e-08, "loss": 0.1057, "step": 51390 }, { "epoch": 0.8933059848076622, "grad_norm": 3.685683329880644, "learning_rate": 2.9559639361011357e-08, "loss": 0.2044, "step": 51391 }, { "epoch": 0.893323367345165, "grad_norm": 0.7673470818524717, "learning_rate": 2.9550104855040893e-08, "loss": 0.121, "step": 51392 }, { "epoch": 0.8933407498826679, "grad_norm": 1.3826693464198818, "learning_rate": 2.9540571840180383e-08, "loss": 0.1529, "step": 51393 }, { "epoch": 0.8933581324201707, "grad_norm": 1.255318401559758, "learning_rate": 2.9531040316460143e-08, "loss": 0.1218, "step": 51394 }, { "epoch": 0.8933755149576735, "grad_norm": 1.5044006433388157, "learning_rate": 2.9521510283910367e-08, "loss": 0.0804, "step": 51395 }, { "epoch": 0.8933928974951764, "grad_norm": 1.0759506432564192, "learning_rate": 2.9511981742561366e-08, "loss": 0.0931, "step": 51396 }, { "epoch": 0.8934102800326792, "grad_norm": 0.8421943289418089, "learning_rate": 2.9502454692443225e-08, "loss": 0.067, "step": 51397 }, { "epoch": 0.893427662570182, "grad_norm": 1.409932572165353, "learning_rate": 2.9492929133586197e-08, "loss": 0.1196, "step": 51398 }, { "epoch": 0.8934450451076849, "grad_norm": 1.4746144447222702, "learning_rate": 2.9483405066020428e-08, "loss": 0.1608, "step": 51399 }, { "epoch": 0.8934624276451877, "grad_norm": 0.8774410473965666, "learning_rate": 2.9473882489776114e-08, "loss": 0.11, "step": 51400 }, { "epoch": 0.8934798101826905, "grad_norm": 0.9319974669464551, "learning_rate": 2.9464361404883454e-08, "loss": 0.1121, "step": 51401 }, { "epoch": 0.8934971927201933, "grad_norm": 2.253958033263154, "learning_rate": 2.945484181137259e-08, "loss": 0.1361, "step": 51402 }, { "epoch": 0.8935145752576961, "grad_norm": 1.2225954369730831, "learning_rate": 2.9445323709273716e-08, "loss": 0.0925, "step": 51403 }, { "epoch": 0.8935319577951989, "grad_norm": 1.396039749178818, "learning_rate": 2.9435807098617092e-08, "loss": 0.1376, "step": 51404 }, { "epoch": 0.8935493403327017, "grad_norm": 1.0665624963827176, "learning_rate": 2.942629197943286e-08, "loss": 0.1679, "step": 51405 }, { "epoch": 0.8935667228702046, "grad_norm": 1.7689602524992698, "learning_rate": 2.9416778351750936e-08, "loss": 0.1574, "step": 51406 }, { "epoch": 0.8935841054077074, "grad_norm": 1.3103344272728186, "learning_rate": 2.9407266215601743e-08, "loss": 0.1015, "step": 51407 }, { "epoch": 0.8936014879452102, "grad_norm": 0.9852339104632678, "learning_rate": 2.939775557101537e-08, "loss": 0.1419, "step": 51408 }, { "epoch": 0.893618870482713, "grad_norm": 1.1953472775860734, "learning_rate": 2.93882464180219e-08, "loss": 0.1432, "step": 51409 }, { "epoch": 0.8936362530202159, "grad_norm": 0.9512694284933134, "learning_rate": 2.9378738756651478e-08, "loss": 0.0871, "step": 51410 }, { "epoch": 0.8936536355577187, "grad_norm": 1.912942457065255, "learning_rate": 2.9369232586934356e-08, "loss": 0.1781, "step": 51411 }, { "epoch": 0.8936710180952215, "grad_norm": 1.203074848187849, "learning_rate": 2.9359727908900566e-08, "loss": 0.103, "step": 51412 }, { "epoch": 0.8936884006327244, "grad_norm": 1.0576332506055637, "learning_rate": 2.9350224722580196e-08, "loss": 0.0913, "step": 51413 }, { "epoch": 0.8937057831702272, "grad_norm": 1.3036020769883168, "learning_rate": 2.9340723028003388e-08, "loss": 0.1159, "step": 51414 }, { "epoch": 0.89372316570773, "grad_norm": 0.9068994600344462, "learning_rate": 2.9331222825200287e-08, "loss": 0.1478, "step": 51415 }, { "epoch": 0.8937405482452329, "grad_norm": 1.415399564778698, "learning_rate": 2.9321724114201086e-08, "loss": 0.1206, "step": 51416 }, { "epoch": 0.8937579307827357, "grad_norm": 1.6526413385532774, "learning_rate": 2.9312226895035764e-08, "loss": 0.103, "step": 51417 }, { "epoch": 0.8937753133202385, "grad_norm": 1.5285763160213122, "learning_rate": 2.9302731167734462e-08, "loss": 0.1014, "step": 51418 }, { "epoch": 0.8937926958577413, "grad_norm": 1.442173814263455, "learning_rate": 2.929323693232727e-08, "loss": 0.1394, "step": 51419 }, { "epoch": 0.8938100783952442, "grad_norm": 1.060790973599442, "learning_rate": 2.928374418884433e-08, "loss": 0.1117, "step": 51420 }, { "epoch": 0.893827460932747, "grad_norm": 1.691714266051107, "learning_rate": 2.927425293731567e-08, "loss": 0.1023, "step": 51421 }, { "epoch": 0.8938448434702498, "grad_norm": 1.0191892676224132, "learning_rate": 2.9264763177771323e-08, "loss": 0.1357, "step": 51422 }, { "epoch": 0.8938622260077526, "grad_norm": 0.9033325894925617, "learning_rate": 2.9255274910241488e-08, "loss": 0.0888, "step": 51423 }, { "epoch": 0.8938796085452554, "grad_norm": 1.927198727470556, "learning_rate": 2.924578813475631e-08, "loss": 0.1688, "step": 51424 }, { "epoch": 0.8938969910827582, "grad_norm": 1.4198608361643708, "learning_rate": 2.923630285134554e-08, "loss": 0.1315, "step": 51425 }, { "epoch": 0.893914373620261, "grad_norm": 1.450663893601963, "learning_rate": 2.922681906003954e-08, "loss": 0.1204, "step": 51426 }, { "epoch": 0.8939317561577639, "grad_norm": 1.8606534129788515, "learning_rate": 2.9217336760868293e-08, "loss": 0.2364, "step": 51427 }, { "epoch": 0.8939491386952667, "grad_norm": 1.005755358514104, "learning_rate": 2.920785595386177e-08, "loss": 0.1498, "step": 51428 }, { "epoch": 0.8939665212327695, "grad_norm": 1.279761768077398, "learning_rate": 2.9198376639050114e-08, "loss": 0.1474, "step": 51429 }, { "epoch": 0.8939839037702724, "grad_norm": 2.098298595705187, "learning_rate": 2.9188898816463358e-08, "loss": 0.0999, "step": 51430 }, { "epoch": 0.8940012863077752, "grad_norm": 1.1511400726911272, "learning_rate": 2.9179422486131477e-08, "loss": 0.092, "step": 51431 }, { "epoch": 0.894018668845278, "grad_norm": 1.038960321731423, "learning_rate": 2.9169947648084557e-08, "loss": 0.2044, "step": 51432 }, { "epoch": 0.8940360513827809, "grad_norm": 1.1570414045526007, "learning_rate": 2.916047430235258e-08, "loss": 0.1257, "step": 51433 }, { "epoch": 0.8940534339202837, "grad_norm": 2.4530938663566393, "learning_rate": 2.915100244896568e-08, "loss": 0.3362, "step": 51434 }, { "epoch": 0.8940708164577865, "grad_norm": 1.0012454179900174, "learning_rate": 2.914153208795378e-08, "loss": 0.1591, "step": 51435 }, { "epoch": 0.8940881989952894, "grad_norm": 1.9884758744100086, "learning_rate": 2.9132063219346913e-08, "loss": 0.1664, "step": 51436 }, { "epoch": 0.8941055815327922, "grad_norm": 1.7965195423851628, "learning_rate": 2.9122595843175168e-08, "loss": 0.1816, "step": 51437 }, { "epoch": 0.894122964070295, "grad_norm": 1.5006378689734197, "learning_rate": 2.9113129959468463e-08, "loss": 0.1367, "step": 51438 }, { "epoch": 0.8941403466077978, "grad_norm": 8.932105089687637, "learning_rate": 2.910366556825683e-08, "loss": 0.1538, "step": 51439 }, { "epoch": 0.8941577291453007, "grad_norm": 9.07527361075819, "learning_rate": 2.9094202669570245e-08, "loss": 0.1638, "step": 51440 }, { "epoch": 0.8941751116828035, "grad_norm": 0.6694485358603827, "learning_rate": 2.9084741263438684e-08, "loss": 0.0885, "step": 51441 }, { "epoch": 0.8941924942203063, "grad_norm": 1.5857280855361833, "learning_rate": 2.9075281349892232e-08, "loss": 0.1257, "step": 51442 }, { "epoch": 0.894209876757809, "grad_norm": 1.4128410951478672, "learning_rate": 2.906582292896087e-08, "loss": 0.1221, "step": 51443 }, { "epoch": 0.8942272592953119, "grad_norm": 1.010634588440787, "learning_rate": 2.905636600067446e-08, "loss": 0.1123, "step": 51444 }, { "epoch": 0.8942446418328147, "grad_norm": 3.0833855732407156, "learning_rate": 2.904691056506292e-08, "loss": 0.2367, "step": 51445 }, { "epoch": 0.8942620243703175, "grad_norm": 1.1790096805676045, "learning_rate": 2.9037456622156453e-08, "loss": 0.171, "step": 51446 }, { "epoch": 0.8942794069078204, "grad_norm": 1.789544709678044, "learning_rate": 2.902800417198492e-08, "loss": 0.1555, "step": 51447 }, { "epoch": 0.8942967894453232, "grad_norm": 1.5689228522567111, "learning_rate": 2.9018553214578244e-08, "loss": 0.1637, "step": 51448 }, { "epoch": 0.894314171982826, "grad_norm": 1.5536107942482869, "learning_rate": 2.90091037499664e-08, "loss": 0.2171, "step": 51449 }, { "epoch": 0.8943315545203289, "grad_norm": 1.7936131130674398, "learning_rate": 2.8999655778179367e-08, "loss": 0.1225, "step": 51450 }, { "epoch": 0.8943489370578317, "grad_norm": 1.3148364138916813, "learning_rate": 2.899020929924706e-08, "loss": 0.1338, "step": 51451 }, { "epoch": 0.8943663195953345, "grad_norm": 1.3259382835072933, "learning_rate": 2.8980764313199403e-08, "loss": 0.1266, "step": 51452 }, { "epoch": 0.8943837021328374, "grad_norm": 1.1649029262895685, "learning_rate": 2.8971320820066315e-08, "loss": 0.1984, "step": 51453 }, { "epoch": 0.8944010846703402, "grad_norm": 1.7965442997384764, "learning_rate": 2.896187881987777e-08, "loss": 0.1257, "step": 51454 }, { "epoch": 0.894418467207843, "grad_norm": 1.4324543523644342, "learning_rate": 2.8952438312663807e-08, "loss": 0.1948, "step": 51455 }, { "epoch": 0.8944358497453458, "grad_norm": 1.5943849444092557, "learning_rate": 2.894299929845412e-08, "loss": 0.1232, "step": 51456 }, { "epoch": 0.8944532322828487, "grad_norm": 3.223229104951714, "learning_rate": 2.8933561777278793e-08, "loss": 0.136, "step": 51457 }, { "epoch": 0.8944706148203515, "grad_norm": 1.1115026338636653, "learning_rate": 2.892412574916764e-08, "loss": 0.1534, "step": 51458 }, { "epoch": 0.8944879973578543, "grad_norm": 1.949596430929385, "learning_rate": 2.8914691214150687e-08, "loss": 0.1109, "step": 51459 }, { "epoch": 0.8945053798953572, "grad_norm": 1.5664653236907684, "learning_rate": 2.8905258172257695e-08, "loss": 0.1317, "step": 51460 }, { "epoch": 0.89452276243286, "grad_norm": 1.5830644899365465, "learning_rate": 2.889582662351858e-08, "loss": 0.1553, "step": 51461 }, { "epoch": 0.8945401449703628, "grad_norm": 1.8532265260093113, "learning_rate": 2.888639656796349e-08, "loss": 0.1601, "step": 51462 }, { "epoch": 0.8945575275078655, "grad_norm": 1.8996556011755814, "learning_rate": 2.887696800562195e-08, "loss": 0.1694, "step": 51463 }, { "epoch": 0.8945749100453684, "grad_norm": 1.1390382808844883, "learning_rate": 2.8867540936523993e-08, "loss": 0.1133, "step": 51464 }, { "epoch": 0.8945922925828712, "grad_norm": 1.63164060914961, "learning_rate": 2.885811536069954e-08, "loss": 0.146, "step": 51465 }, { "epoch": 0.894609675120374, "grad_norm": 1.3233991331966335, "learning_rate": 2.884869127817846e-08, "loss": 0.103, "step": 51466 }, { "epoch": 0.8946270576578769, "grad_norm": 1.994363467555096, "learning_rate": 2.8839268688990615e-08, "loss": 0.1377, "step": 51467 }, { "epoch": 0.8946444401953797, "grad_norm": 3.3793787825689634, "learning_rate": 2.882984759316587e-08, "loss": 0.1621, "step": 51468 }, { "epoch": 0.8946618227328825, "grad_norm": 2.285073163422601, "learning_rate": 2.8820427990734032e-08, "loss": 0.1762, "step": 51469 }, { "epoch": 0.8946792052703854, "grad_norm": 1.407726502742575, "learning_rate": 2.8811009881724967e-08, "loss": 0.0982, "step": 51470 }, { "epoch": 0.8946965878078882, "grad_norm": 0.9597990854227417, "learning_rate": 2.88015932661686e-08, "loss": 0.1039, "step": 51471 }, { "epoch": 0.894713970345391, "grad_norm": 2.18785248720621, "learning_rate": 2.8792178144094682e-08, "loss": 0.1974, "step": 51472 }, { "epoch": 0.8947313528828938, "grad_norm": 1.5623596857323565, "learning_rate": 2.878276451553313e-08, "loss": 0.1251, "step": 51473 }, { "epoch": 0.8947487354203967, "grad_norm": 2.2023326997689017, "learning_rate": 2.8773352380513817e-08, "loss": 0.1502, "step": 51474 }, { "epoch": 0.8947661179578995, "grad_norm": 2.3780190685038667, "learning_rate": 2.8763941739066488e-08, "loss": 0.2487, "step": 51475 }, { "epoch": 0.8947835004954023, "grad_norm": 1.1981746465823682, "learning_rate": 2.8754532591220902e-08, "loss": 0.0855, "step": 51476 }, { "epoch": 0.8948008830329052, "grad_norm": 0.9845674135312787, "learning_rate": 2.8745124937007037e-08, "loss": 0.1711, "step": 51477 }, { "epoch": 0.894818265570408, "grad_norm": 1.1315563085555638, "learning_rate": 2.873571877645464e-08, "loss": 0.2153, "step": 51478 }, { "epoch": 0.8948356481079108, "grad_norm": 1.22152829247547, "learning_rate": 2.8726314109593586e-08, "loss": 0.1722, "step": 51479 }, { "epoch": 0.8948530306454137, "grad_norm": 1.2807702978874869, "learning_rate": 2.871691093645351e-08, "loss": 0.1414, "step": 51480 }, { "epoch": 0.8948704131829165, "grad_norm": 2.139644717099357, "learning_rate": 2.87075092570645e-08, "loss": 0.1653, "step": 51481 }, { "epoch": 0.8948877957204193, "grad_norm": 2.011132076283547, "learning_rate": 2.8698109071456088e-08, "loss": 0.2179, "step": 51482 }, { "epoch": 0.894905178257922, "grad_norm": 1.7916840178694806, "learning_rate": 2.8688710379658143e-08, "loss": 0.1845, "step": 51483 }, { "epoch": 0.8949225607954249, "grad_norm": 1.2802026376101712, "learning_rate": 2.8679313181700527e-08, "loss": 0.1242, "step": 51484 }, { "epoch": 0.8949399433329277, "grad_norm": 1.1782916661831069, "learning_rate": 2.8669917477612992e-08, "loss": 0.1255, "step": 51485 }, { "epoch": 0.8949573258704305, "grad_norm": 1.0895615135156187, "learning_rate": 2.8660523267425296e-08, "loss": 0.1123, "step": 51486 }, { "epoch": 0.8949747084079334, "grad_norm": 1.4849056525192834, "learning_rate": 2.8651130551167245e-08, "loss": 0.162, "step": 51487 }, { "epoch": 0.8949920909454362, "grad_norm": 1.4902957589074146, "learning_rate": 2.8641739328868597e-08, "loss": 0.1505, "step": 51488 }, { "epoch": 0.895009473482939, "grad_norm": 1.2022476345977913, "learning_rate": 2.86323496005591e-08, "loss": 0.1383, "step": 51489 }, { "epoch": 0.8950268560204419, "grad_norm": 1.6168904188892534, "learning_rate": 2.8622961366268516e-08, "loss": 0.1391, "step": 51490 }, { "epoch": 0.8950442385579447, "grad_norm": 1.2849003875715987, "learning_rate": 2.8613574626026537e-08, "loss": 0.1134, "step": 51491 }, { "epoch": 0.8950616210954475, "grad_norm": 1.271636479546422, "learning_rate": 2.8604189379863085e-08, "loss": 0.1422, "step": 51492 }, { "epoch": 0.8950790036329503, "grad_norm": 1.7132362914828398, "learning_rate": 2.859480562780786e-08, "loss": 0.1827, "step": 51493 }, { "epoch": 0.8950963861704532, "grad_norm": 2.02213432009374, "learning_rate": 2.8585423369890503e-08, "loss": 0.1336, "step": 51494 }, { "epoch": 0.895113768707956, "grad_norm": 1.0700005466567157, "learning_rate": 2.8576042606140715e-08, "loss": 0.1415, "step": 51495 }, { "epoch": 0.8951311512454588, "grad_norm": 1.1254224370210293, "learning_rate": 2.8566663336588416e-08, "loss": 0.1218, "step": 51496 }, { "epoch": 0.8951485337829617, "grad_norm": 1.2267263656379173, "learning_rate": 2.855728556126319e-08, "loss": 0.1535, "step": 51497 }, { "epoch": 0.8951659163204645, "grad_norm": 0.8561959412631595, "learning_rate": 2.854790928019485e-08, "loss": 0.1026, "step": 51498 }, { "epoch": 0.8951832988579673, "grad_norm": 2.0005277295408694, "learning_rate": 2.853853449341298e-08, "loss": 0.1212, "step": 51499 }, { "epoch": 0.8952006813954702, "grad_norm": 1.2710347884682216, "learning_rate": 2.8529161200947504e-08, "loss": 0.1385, "step": 51500 }, { "epoch": 0.895218063932973, "grad_norm": 1.7199571470328334, "learning_rate": 2.8519789402827954e-08, "loss": 0.2292, "step": 51501 }, { "epoch": 0.8952354464704758, "grad_norm": 1.5413543275031023, "learning_rate": 2.8510419099084137e-08, "loss": 0.1421, "step": 51502 }, { "epoch": 0.8952528290079785, "grad_norm": 1.4450022908121498, "learning_rate": 2.8501050289745587e-08, "loss": 0.1721, "step": 51503 }, { "epoch": 0.8952702115454814, "grad_norm": 1.2018291011571585, "learning_rate": 2.8491682974842168e-08, "loss": 0.1758, "step": 51504 }, { "epoch": 0.8952875940829842, "grad_norm": 1.338815458031372, "learning_rate": 2.848231715440358e-08, "loss": 0.1023, "step": 51505 }, { "epoch": 0.895304976620487, "grad_norm": 1.3553437819819847, "learning_rate": 2.8472952828459408e-08, "loss": 0.1414, "step": 51506 }, { "epoch": 0.8953223591579899, "grad_norm": 2.0272560003988405, "learning_rate": 2.8463589997039406e-08, "loss": 0.1994, "step": 51507 }, { "epoch": 0.8953397416954927, "grad_norm": 1.5033189991633866, "learning_rate": 2.845422866017322e-08, "loss": 0.1473, "step": 51508 }, { "epoch": 0.8953571242329955, "grad_norm": 0.920954347266737, "learning_rate": 2.844486881789049e-08, "loss": 0.1444, "step": 51509 }, { "epoch": 0.8953745067704983, "grad_norm": 1.2187103633490919, "learning_rate": 2.8435510470220914e-08, "loss": 0.1813, "step": 51510 }, { "epoch": 0.8953918893080012, "grad_norm": 3.5384771129586765, "learning_rate": 2.8426153617194082e-08, "loss": 0.1514, "step": 51511 }, { "epoch": 0.895409271845504, "grad_norm": 1.1276865810915275, "learning_rate": 2.841679825883986e-08, "loss": 0.1192, "step": 51512 }, { "epoch": 0.8954266543830068, "grad_norm": 2.136977845926364, "learning_rate": 2.8407444395187662e-08, "loss": 0.1541, "step": 51513 }, { "epoch": 0.8954440369205097, "grad_norm": 1.0247550801333627, "learning_rate": 2.8398092026267197e-08, "loss": 0.1197, "step": 51514 }, { "epoch": 0.8954614194580125, "grad_norm": 2.17683571168678, "learning_rate": 2.838874115210821e-08, "loss": 0.1925, "step": 51515 }, { "epoch": 0.8954788019955153, "grad_norm": 1.903280783619685, "learning_rate": 2.837939177274029e-08, "loss": 0.1402, "step": 51516 }, { "epoch": 0.8954961845330182, "grad_norm": 1.0970884966974794, "learning_rate": 2.837004388819303e-08, "loss": 0.1762, "step": 51517 }, { "epoch": 0.895513567070521, "grad_norm": 1.8460123134496615, "learning_rate": 2.8360697498496122e-08, "loss": 0.1132, "step": 51518 }, { "epoch": 0.8955309496080238, "grad_norm": 1.4043090255046133, "learning_rate": 2.83513526036791e-08, "loss": 0.1934, "step": 51519 }, { "epoch": 0.8955483321455266, "grad_norm": 0.6883204750992447, "learning_rate": 2.8342009203771665e-08, "loss": 0.1076, "step": 51520 }, { "epoch": 0.8955657146830295, "grad_norm": 1.2348294916389315, "learning_rate": 2.8332667298803402e-08, "loss": 0.1223, "step": 51521 }, { "epoch": 0.8955830972205322, "grad_norm": 1.3430318291535193, "learning_rate": 2.8323326888803788e-08, "loss": 0.1714, "step": 51522 }, { "epoch": 0.895600479758035, "grad_norm": 1.914238545371555, "learning_rate": 2.8313987973802688e-08, "loss": 0.1803, "step": 51523 }, { "epoch": 0.8956178622955379, "grad_norm": 1.719251006186646, "learning_rate": 2.8304650553829523e-08, "loss": 0.2205, "step": 51524 }, { "epoch": 0.8956352448330407, "grad_norm": 1.0619672532067488, "learning_rate": 2.8295314628914046e-08, "loss": 0.1314, "step": 51525 }, { "epoch": 0.8956526273705435, "grad_norm": 1.5268598236394557, "learning_rate": 2.8285980199085624e-08, "loss": 0.1323, "step": 51526 }, { "epoch": 0.8956700099080463, "grad_norm": 1.4832738083352637, "learning_rate": 2.8276647264373954e-08, "loss": 0.106, "step": 51527 }, { "epoch": 0.8956873924455492, "grad_norm": 3.0715400492927105, "learning_rate": 2.826731582480868e-08, "loss": 0.1663, "step": 51528 }, { "epoch": 0.895704774983052, "grad_norm": 1.191473768459073, "learning_rate": 2.8257985880419276e-08, "loss": 0.1463, "step": 51529 }, { "epoch": 0.8957221575205548, "grad_norm": 0.9513076509746607, "learning_rate": 2.8248657431235333e-08, "loss": 0.1647, "step": 51530 }, { "epoch": 0.8957395400580577, "grad_norm": 1.4536091730677636, "learning_rate": 2.823933047728655e-08, "loss": 0.1639, "step": 51531 }, { "epoch": 0.8957569225955605, "grad_norm": 2.1282872762674288, "learning_rate": 2.8230005018602286e-08, "loss": 0.2173, "step": 51532 }, { "epoch": 0.8957743051330633, "grad_norm": 1.4420878441385243, "learning_rate": 2.8220681055212247e-08, "loss": 0.1226, "step": 51533 }, { "epoch": 0.8957916876705662, "grad_norm": 1.8770196472820742, "learning_rate": 2.821135858714585e-08, "loss": 0.1235, "step": 51534 }, { "epoch": 0.895809070208069, "grad_norm": 1.5003863719410442, "learning_rate": 2.820203761443274e-08, "loss": 0.135, "step": 51535 }, { "epoch": 0.8958264527455718, "grad_norm": 2.27781083645764, "learning_rate": 2.81927181371025e-08, "loss": 0.192, "step": 51536 }, { "epoch": 0.8958438352830747, "grad_norm": 1.4395299029874267, "learning_rate": 2.8183400155184612e-08, "loss": 0.1746, "step": 51537 }, { "epoch": 0.8958612178205775, "grad_norm": 1.9270188381611746, "learning_rate": 2.8174083668708604e-08, "loss": 0.1565, "step": 51538 }, { "epoch": 0.8958786003580803, "grad_norm": 1.0026088915695555, "learning_rate": 2.816476867770401e-08, "loss": 0.1299, "step": 51539 }, { "epoch": 0.8958959828955831, "grad_norm": 1.3626362400898817, "learning_rate": 2.8155455182200307e-08, "loss": 0.1451, "step": 51540 }, { "epoch": 0.895913365433086, "grad_norm": 1.102571431725789, "learning_rate": 2.8146143182227133e-08, "loss": 0.1153, "step": 51541 }, { "epoch": 0.8959307479705887, "grad_norm": 0.7858401930359504, "learning_rate": 2.8136832677813805e-08, "loss": 0.1588, "step": 51542 }, { "epoch": 0.8959481305080915, "grad_norm": 1.1248889569108607, "learning_rate": 2.8127523668990072e-08, "loss": 0.1549, "step": 51543 }, { "epoch": 0.8959655130455944, "grad_norm": 1.259091809993995, "learning_rate": 2.8118216155785413e-08, "loss": 0.1835, "step": 51544 }, { "epoch": 0.8959828955830972, "grad_norm": 0.8706943747912522, "learning_rate": 2.8108910138229082e-08, "loss": 0.1215, "step": 51545 }, { "epoch": 0.8960002781206, "grad_norm": 1.764371452054437, "learning_rate": 2.8099605616350773e-08, "loss": 0.1114, "step": 51546 }, { "epoch": 0.8960176606581028, "grad_norm": 0.7626308009072469, "learning_rate": 2.809030259017997e-08, "loss": 0.0863, "step": 51547 }, { "epoch": 0.8960350431956057, "grad_norm": 0.7439443129892368, "learning_rate": 2.8081001059746146e-08, "loss": 0.0965, "step": 51548 }, { "epoch": 0.8960524257331085, "grad_norm": 1.7492841790660838, "learning_rate": 2.8071701025078775e-08, "loss": 0.1564, "step": 51549 }, { "epoch": 0.8960698082706113, "grad_norm": 0.8471443189721436, "learning_rate": 2.806240248620728e-08, "loss": 0.1281, "step": 51550 }, { "epoch": 0.8960871908081142, "grad_norm": 1.3214310787012846, "learning_rate": 2.8053105443161196e-08, "loss": 0.165, "step": 51551 }, { "epoch": 0.896104573345617, "grad_norm": 1.2964230996039086, "learning_rate": 2.8043809895969993e-08, "loss": 0.1262, "step": 51552 }, { "epoch": 0.8961219558831198, "grad_norm": 1.321149442324861, "learning_rate": 2.8034515844663042e-08, "loss": 0.122, "step": 51553 }, { "epoch": 0.8961393384206227, "grad_norm": 1.9584677745307215, "learning_rate": 2.8025223289269927e-08, "loss": 0.2189, "step": 51554 }, { "epoch": 0.8961567209581255, "grad_norm": 1.6243454126455794, "learning_rate": 2.8015932229820072e-08, "loss": 0.1595, "step": 51555 }, { "epoch": 0.8961741034956283, "grad_norm": 2.005086638943904, "learning_rate": 2.800664266634284e-08, "loss": 0.1782, "step": 51556 }, { "epoch": 0.8961914860331311, "grad_norm": 1.4421233601124657, "learning_rate": 2.7997354598867818e-08, "loss": 0.1136, "step": 51557 }, { "epoch": 0.896208868570634, "grad_norm": 1.3999251616495325, "learning_rate": 2.7988068027424317e-08, "loss": 0.1924, "step": 51558 }, { "epoch": 0.8962262511081368, "grad_norm": 1.3998102524207878, "learning_rate": 2.7978782952041814e-08, "loss": 0.1073, "step": 51559 }, { "epoch": 0.8962436336456396, "grad_norm": 0.9666217747385619, "learning_rate": 2.7969499372749727e-08, "loss": 0.1653, "step": 51560 }, { "epoch": 0.8962610161831425, "grad_norm": 1.670850555458439, "learning_rate": 2.7960217289577427e-08, "loss": 0.181, "step": 51561 }, { "epoch": 0.8962783987206452, "grad_norm": 1.5694693467681176, "learning_rate": 2.7950936702554494e-08, "loss": 0.1539, "step": 51562 }, { "epoch": 0.896295781258148, "grad_norm": 1.8595090585893255, "learning_rate": 2.79416576117103e-08, "loss": 0.1285, "step": 51563 }, { "epoch": 0.8963131637956508, "grad_norm": 1.355128934857356, "learning_rate": 2.7932380017074153e-08, "loss": 0.1252, "step": 51564 }, { "epoch": 0.8963305463331537, "grad_norm": 1.0093110623803558, "learning_rate": 2.792310391867542e-08, "loss": 0.1605, "step": 51565 }, { "epoch": 0.8963479288706565, "grad_norm": 1.338292566913451, "learning_rate": 2.791382931654368e-08, "loss": 0.1044, "step": 51566 }, { "epoch": 0.8963653114081593, "grad_norm": 8.886937990750235, "learning_rate": 2.790455621070825e-08, "loss": 0.199, "step": 51567 }, { "epoch": 0.8963826939456622, "grad_norm": 1.5397871929097686, "learning_rate": 2.78952846011985e-08, "loss": 0.0836, "step": 51568 }, { "epoch": 0.896400076483165, "grad_norm": 1.1710050156044693, "learning_rate": 2.788601448804384e-08, "loss": 0.1611, "step": 51569 }, { "epoch": 0.8964174590206678, "grad_norm": 1.80562615221078, "learning_rate": 2.7876745871273644e-08, "loss": 0.1695, "step": 51570 }, { "epoch": 0.8964348415581707, "grad_norm": 1.9983297578225085, "learning_rate": 2.7867478750917274e-08, "loss": 0.1248, "step": 51571 }, { "epoch": 0.8964522240956735, "grad_norm": 1.5028550022557003, "learning_rate": 2.7858213127004092e-08, "loss": 0.1345, "step": 51572 }, { "epoch": 0.8964696066331763, "grad_norm": 1.2050497483230473, "learning_rate": 2.784894899956347e-08, "loss": 0.1074, "step": 51573 }, { "epoch": 0.8964869891706791, "grad_norm": 1.7003827305034838, "learning_rate": 2.7839686368624827e-08, "loss": 0.1437, "step": 51574 }, { "epoch": 0.896504371708182, "grad_norm": 1.0603105122893417, "learning_rate": 2.783042523421758e-08, "loss": 0.0998, "step": 51575 }, { "epoch": 0.8965217542456848, "grad_norm": 2.1528790418548995, "learning_rate": 2.7821165596370822e-08, "loss": 0.1989, "step": 51576 }, { "epoch": 0.8965391367831876, "grad_norm": 3.23856356925851, "learning_rate": 2.7811907455114192e-08, "loss": 0.1982, "step": 51577 }, { "epoch": 0.8965565193206905, "grad_norm": 1.1298004684070537, "learning_rate": 2.7802650810476837e-08, "loss": 0.1416, "step": 51578 }, { "epoch": 0.8965739018581933, "grad_norm": 1.9526764152013096, "learning_rate": 2.7793395662488228e-08, "loss": 0.2009, "step": 51579 }, { "epoch": 0.8965912843956961, "grad_norm": 1.4064324609256333, "learning_rate": 2.7784142011177514e-08, "loss": 0.2061, "step": 51580 }, { "epoch": 0.896608666933199, "grad_norm": 0.8182283616808352, "learning_rate": 2.777488985657428e-08, "loss": 0.0911, "step": 51581 }, { "epoch": 0.8966260494707017, "grad_norm": 0.9973065791367213, "learning_rate": 2.776563919870778e-08, "loss": 0.1234, "step": 51582 }, { "epoch": 0.8966434320082045, "grad_norm": 1.6546625122075798, "learning_rate": 2.7756390037607157e-08, "loss": 0.201, "step": 51583 }, { "epoch": 0.8966608145457073, "grad_norm": 1.329768117880274, "learning_rate": 2.7747142373301835e-08, "loss": 0.1099, "step": 51584 }, { "epoch": 0.8966781970832102, "grad_norm": 1.2750291695257616, "learning_rate": 2.7737896205821178e-08, "loss": 0.1233, "step": 51585 }, { "epoch": 0.896695579620713, "grad_norm": 0.86723573733572, "learning_rate": 2.7728651535194493e-08, "loss": 0.1329, "step": 51586 }, { "epoch": 0.8967129621582158, "grad_norm": 2.1106826982253355, "learning_rate": 2.771940836145098e-08, "loss": 0.1776, "step": 51587 }, { "epoch": 0.8967303446957187, "grad_norm": 1.5129502243702846, "learning_rate": 2.7710166684620006e-08, "loss": 0.1342, "step": 51588 }, { "epoch": 0.8967477272332215, "grad_norm": 2.92689958102684, "learning_rate": 2.770092650473088e-08, "loss": 0.178, "step": 51589 }, { "epoch": 0.8967651097707243, "grad_norm": 1.3866657964991753, "learning_rate": 2.769168782181286e-08, "loss": 0.0887, "step": 51590 }, { "epoch": 0.8967824923082272, "grad_norm": 2.20482843107841, "learning_rate": 2.7682450635895192e-08, "loss": 0.207, "step": 51591 }, { "epoch": 0.89679987484573, "grad_norm": 1.0233887913061404, "learning_rate": 2.767321494700714e-08, "loss": 0.114, "step": 51592 }, { "epoch": 0.8968172573832328, "grad_norm": 1.2729598567021947, "learning_rate": 2.766398075517812e-08, "loss": 0.117, "step": 51593 }, { "epoch": 0.8968346399207356, "grad_norm": 2.9005065712027083, "learning_rate": 2.7654748060437327e-08, "loss": 0.1909, "step": 51594 }, { "epoch": 0.8968520224582385, "grad_norm": 1.5566912499980197, "learning_rate": 2.764551686281391e-08, "loss": 0.1449, "step": 51595 }, { "epoch": 0.8968694049957413, "grad_norm": 1.1328667003749844, "learning_rate": 2.763628716233729e-08, "loss": 0.0891, "step": 51596 }, { "epoch": 0.8968867875332441, "grad_norm": 1.3234243873777098, "learning_rate": 2.762705895903661e-08, "loss": 0.1425, "step": 51597 }, { "epoch": 0.896904170070747, "grad_norm": 1.3522350015560483, "learning_rate": 2.7617832252941232e-08, "loss": 0.1536, "step": 51598 }, { "epoch": 0.8969215526082498, "grad_norm": 1.1934593683228303, "learning_rate": 2.7608607044080246e-08, "loss": 0.1905, "step": 51599 }, { "epoch": 0.8969389351457526, "grad_norm": 2.211194540773315, "learning_rate": 2.7599383332482962e-08, "loss": 0.128, "step": 51600 }, { "epoch": 0.8969563176832555, "grad_norm": 1.3036078161340026, "learning_rate": 2.75901611181788e-08, "loss": 0.1517, "step": 51601 }, { "epoch": 0.8969737002207582, "grad_norm": 3.683296151218022, "learning_rate": 2.7580940401196684e-08, "loss": 0.1723, "step": 51602 }, { "epoch": 0.896991082758261, "grad_norm": 0.9930602682958966, "learning_rate": 2.757172118156592e-08, "loss": 0.1233, "step": 51603 }, { "epoch": 0.8970084652957638, "grad_norm": 1.752062851846449, "learning_rate": 2.7562503459315878e-08, "loss": 0.0989, "step": 51604 }, { "epoch": 0.8970258478332667, "grad_norm": 1.287580361753556, "learning_rate": 2.7553287234475698e-08, "loss": 0.1363, "step": 51605 }, { "epoch": 0.8970432303707695, "grad_norm": 0.9779495300551848, "learning_rate": 2.7544072507074522e-08, "loss": 0.1051, "step": 51606 }, { "epoch": 0.8970606129082723, "grad_norm": 1.4821791761388838, "learning_rate": 2.753485927714161e-08, "loss": 0.143, "step": 51607 }, { "epoch": 0.8970779954457752, "grad_norm": 1.6207716421564113, "learning_rate": 2.752564754470621e-08, "loss": 0.1524, "step": 51608 }, { "epoch": 0.897095377983278, "grad_norm": 1.0855358252930492, "learning_rate": 2.7516437309797414e-08, "loss": 0.1256, "step": 51609 }, { "epoch": 0.8971127605207808, "grad_norm": 2.1897447157421346, "learning_rate": 2.750722857244453e-08, "loss": 0.1608, "step": 51610 }, { "epoch": 0.8971301430582836, "grad_norm": 1.3789932761801638, "learning_rate": 2.749802133267659e-08, "loss": 0.2551, "step": 51611 }, { "epoch": 0.8971475255957865, "grad_norm": 1.3890393498475129, "learning_rate": 2.7488815590522905e-08, "loss": 0.1557, "step": 51612 }, { "epoch": 0.8971649081332893, "grad_norm": 2.0044985372712474, "learning_rate": 2.747961134601273e-08, "loss": 0.1452, "step": 51613 }, { "epoch": 0.8971822906707921, "grad_norm": 1.6761425227234923, "learning_rate": 2.7470408599175045e-08, "loss": 0.1603, "step": 51614 }, { "epoch": 0.897199673208295, "grad_norm": 3.146906899186419, "learning_rate": 2.746120735003904e-08, "loss": 0.2873, "step": 51615 }, { "epoch": 0.8972170557457978, "grad_norm": 1.679600460857143, "learning_rate": 2.745200759863403e-08, "loss": 0.1422, "step": 51616 }, { "epoch": 0.8972344382833006, "grad_norm": 1.1355366934735316, "learning_rate": 2.744280934498905e-08, "loss": 0.1226, "step": 51617 }, { "epoch": 0.8972518208208035, "grad_norm": 2.224461049041311, "learning_rate": 2.7433612589133293e-08, "loss": 0.2251, "step": 51618 }, { "epoch": 0.8972692033583063, "grad_norm": 1.1252707512983668, "learning_rate": 2.742441733109585e-08, "loss": 0.1578, "step": 51619 }, { "epoch": 0.8972865858958091, "grad_norm": 1.3127361485922173, "learning_rate": 2.741522357090609e-08, "loss": 0.1124, "step": 51620 }, { "epoch": 0.897303968433312, "grad_norm": 2.149048555177382, "learning_rate": 2.740603130859287e-08, "loss": 0.1627, "step": 51621 }, { "epoch": 0.8973213509708147, "grad_norm": 1.6613549669066714, "learning_rate": 2.739684054418545e-08, "loss": 0.1712, "step": 51622 }, { "epoch": 0.8973387335083175, "grad_norm": 1.3103570986213129, "learning_rate": 2.7387651277712866e-08, "loss": 0.1545, "step": 51623 }, { "epoch": 0.8973561160458203, "grad_norm": 1.0917152243511412, "learning_rate": 2.737846350920442e-08, "loss": 0.111, "step": 51624 }, { "epoch": 0.8973734985833232, "grad_norm": 1.5442875983088642, "learning_rate": 2.736927723868909e-08, "loss": 0.1293, "step": 51625 }, { "epoch": 0.897390881120826, "grad_norm": 1.0854418221527038, "learning_rate": 2.7360092466196083e-08, "loss": 0.1521, "step": 51626 }, { "epoch": 0.8974082636583288, "grad_norm": 1.2306368319450134, "learning_rate": 2.735090919175448e-08, "loss": 0.1393, "step": 51627 }, { "epoch": 0.8974256461958316, "grad_norm": 2.809846184758354, "learning_rate": 2.7341727415393368e-08, "loss": 0.1161, "step": 51628 }, { "epoch": 0.8974430287333345, "grad_norm": 1.2491851592807015, "learning_rate": 2.7332547137141892e-08, "loss": 0.1278, "step": 51629 }, { "epoch": 0.8974604112708373, "grad_norm": 2.7573525929387697, "learning_rate": 2.7323368357029086e-08, "loss": 0.1934, "step": 51630 }, { "epoch": 0.8974777938083401, "grad_norm": 1.4531751012135241, "learning_rate": 2.7314191075083982e-08, "loss": 0.1348, "step": 51631 }, { "epoch": 0.897495176345843, "grad_norm": 1.277072995828384, "learning_rate": 2.7305015291335887e-08, "loss": 0.1375, "step": 51632 }, { "epoch": 0.8975125588833458, "grad_norm": 1.2638896063133587, "learning_rate": 2.7295841005813724e-08, "loss": 0.1226, "step": 51633 }, { "epoch": 0.8975299414208486, "grad_norm": 8.22372550677424, "learning_rate": 2.7286668218546526e-08, "loss": 0.1722, "step": 51634 }, { "epoch": 0.8975473239583515, "grad_norm": 1.2518052673449178, "learning_rate": 2.7277496929563493e-08, "loss": 0.1904, "step": 51635 }, { "epoch": 0.8975647064958543, "grad_norm": 0.9423217948472562, "learning_rate": 2.7268327138893655e-08, "loss": 0.1235, "step": 51636 }, { "epoch": 0.8975820890333571, "grad_norm": 0.8179281630720624, "learning_rate": 2.72591588465661e-08, "loss": 0.1449, "step": 51637 }, { "epoch": 0.89759947157086, "grad_norm": 1.1703317786935887, "learning_rate": 2.7249992052609805e-08, "loss": 0.1159, "step": 51638 }, { "epoch": 0.8976168541083628, "grad_norm": 1.7344696338903756, "learning_rate": 2.7240826757053858e-08, "loss": 0.1162, "step": 51639 }, { "epoch": 0.8976342366458656, "grad_norm": 1.935455285579736, "learning_rate": 2.7231662959927404e-08, "loss": 0.1338, "step": 51640 }, { "epoch": 0.8976516191833684, "grad_norm": 1.7081391100667525, "learning_rate": 2.7222500661259307e-08, "loss": 0.1198, "step": 51641 }, { "epoch": 0.8976690017208712, "grad_norm": 1.017862312641193, "learning_rate": 2.721333986107871e-08, "loss": 0.113, "step": 51642 }, { "epoch": 0.897686384258374, "grad_norm": 0.8350158389389059, "learning_rate": 2.7204180559414702e-08, "loss": 0.1058, "step": 51643 }, { "epoch": 0.8977037667958768, "grad_norm": 1.335279495560379, "learning_rate": 2.7195022756296204e-08, "loss": 0.1827, "step": 51644 }, { "epoch": 0.8977211493333797, "grad_norm": 0.9442845359914346, "learning_rate": 2.7185866451752416e-08, "loss": 0.08, "step": 51645 }, { "epoch": 0.8977385318708825, "grad_norm": 1.221715390902254, "learning_rate": 2.7176711645812088e-08, "loss": 0.1269, "step": 51646 }, { "epoch": 0.8977559144083853, "grad_norm": 1.2258961371459673, "learning_rate": 2.7167558338504482e-08, "loss": 0.1122, "step": 51647 }, { "epoch": 0.8977732969458881, "grad_norm": 1.5036527354926166, "learning_rate": 2.7158406529858457e-08, "loss": 0.1659, "step": 51648 }, { "epoch": 0.897790679483391, "grad_norm": 2.627288170349047, "learning_rate": 2.7149256219903106e-08, "loss": 0.2073, "step": 51649 }, { "epoch": 0.8978080620208938, "grad_norm": 0.9536798171017434, "learning_rate": 2.7140107408667345e-08, "loss": 0.1398, "step": 51650 }, { "epoch": 0.8978254445583966, "grad_norm": 1.3795847352946187, "learning_rate": 2.7130960096180376e-08, "loss": 0.1337, "step": 51651 }, { "epoch": 0.8978428270958995, "grad_norm": 1.4980357045743538, "learning_rate": 2.7121814282470956e-08, "loss": 0.1538, "step": 51652 }, { "epoch": 0.8978602096334023, "grad_norm": 2.0720237445850276, "learning_rate": 2.711266996756817e-08, "loss": 0.1689, "step": 51653 }, { "epoch": 0.8978775921709051, "grad_norm": 1.1369342273591916, "learning_rate": 2.7103527151500883e-08, "loss": 0.1255, "step": 51654 }, { "epoch": 0.897894974708408, "grad_norm": 1.1075187113499436, "learning_rate": 2.7094385834298295e-08, "loss": 0.139, "step": 51655 }, { "epoch": 0.8979123572459108, "grad_norm": 0.9446477819951876, "learning_rate": 2.7085246015989272e-08, "loss": 0.0787, "step": 51656 }, { "epoch": 0.8979297397834136, "grad_norm": 2.3009697167482366, "learning_rate": 2.707610769660279e-08, "loss": 0.1154, "step": 51657 }, { "epoch": 0.8979471223209164, "grad_norm": 0.9383614569789266, "learning_rate": 2.706697087616777e-08, "loss": 0.1151, "step": 51658 }, { "epoch": 0.8979645048584193, "grad_norm": 1.1063813926097612, "learning_rate": 2.7057835554713248e-08, "loss": 0.2203, "step": 51659 }, { "epoch": 0.8979818873959221, "grad_norm": 1.3726400954343265, "learning_rate": 2.7048701732268086e-08, "loss": 0.1075, "step": 51660 }, { "epoch": 0.8979992699334248, "grad_norm": 0.8425787664706871, "learning_rate": 2.703956940886132e-08, "loss": 0.1227, "step": 51661 }, { "epoch": 0.8980166524709277, "grad_norm": 1.163864483880456, "learning_rate": 2.7030438584521752e-08, "loss": 0.1272, "step": 51662 }, { "epoch": 0.8980340350084305, "grad_norm": 1.2971023195558582, "learning_rate": 2.7021309259278536e-08, "loss": 0.095, "step": 51663 }, { "epoch": 0.8980514175459333, "grad_norm": 1.5298408457947266, "learning_rate": 2.701218143316053e-08, "loss": 0.1311, "step": 51664 }, { "epoch": 0.8980688000834361, "grad_norm": 1.7441866978141292, "learning_rate": 2.700305510619655e-08, "loss": 0.122, "step": 51665 }, { "epoch": 0.898086182620939, "grad_norm": 4.459453691778648, "learning_rate": 2.699393027841562e-08, "loss": 0.1401, "step": 51666 }, { "epoch": 0.8981035651584418, "grad_norm": 1.5412327166713091, "learning_rate": 2.698480694984667e-08, "loss": 0.1075, "step": 51667 }, { "epoch": 0.8981209476959446, "grad_norm": 1.2276203736613265, "learning_rate": 2.6975685120518622e-08, "loss": 0.1115, "step": 51668 }, { "epoch": 0.8981383302334475, "grad_norm": 1.4687714990777596, "learning_rate": 2.6966564790460278e-08, "loss": 0.0951, "step": 51669 }, { "epoch": 0.8981557127709503, "grad_norm": 1.0121659240305496, "learning_rate": 2.6957445959700787e-08, "loss": 0.1444, "step": 51670 }, { "epoch": 0.8981730953084531, "grad_norm": 1.159923958081853, "learning_rate": 2.694832862826879e-08, "loss": 0.1637, "step": 51671 }, { "epoch": 0.898190477845956, "grad_norm": 0.9893376796872467, "learning_rate": 2.6939212796193324e-08, "loss": 0.109, "step": 51672 }, { "epoch": 0.8982078603834588, "grad_norm": 1.469451192940229, "learning_rate": 2.6930098463503137e-08, "loss": 0.1058, "step": 51673 }, { "epoch": 0.8982252429209616, "grad_norm": 1.1475718225812728, "learning_rate": 2.6920985630227322e-08, "loss": 0.1194, "step": 51674 }, { "epoch": 0.8982426254584644, "grad_norm": 1.7341900256767564, "learning_rate": 2.6911874296394688e-08, "loss": 0.1457, "step": 51675 }, { "epoch": 0.8982600079959673, "grad_norm": 1.7801054574910893, "learning_rate": 2.6902764462034043e-08, "loss": 0.1257, "step": 51676 }, { "epoch": 0.8982773905334701, "grad_norm": 1.378816326798268, "learning_rate": 2.689365612717437e-08, "loss": 0.1867, "step": 51677 }, { "epoch": 0.8982947730709729, "grad_norm": 0.948174599912223, "learning_rate": 2.6884549291844415e-08, "loss": 0.1299, "step": 51678 }, { "epoch": 0.8983121556084758, "grad_norm": 1.105515787997886, "learning_rate": 2.6875443956073163e-08, "loss": 0.1284, "step": 51679 }, { "epoch": 0.8983295381459786, "grad_norm": 1.7599867132476494, "learning_rate": 2.6866340119889364e-08, "loss": 0.1323, "step": 51680 }, { "epoch": 0.8983469206834813, "grad_norm": 1.032034269995103, "learning_rate": 2.6857237783321885e-08, "loss": 0.1405, "step": 51681 }, { "epoch": 0.8983643032209842, "grad_norm": 1.6019892629108106, "learning_rate": 2.6848136946399703e-08, "loss": 0.1752, "step": 51682 }, { "epoch": 0.898381685758487, "grad_norm": 1.1384966511640164, "learning_rate": 2.6839037609151627e-08, "loss": 0.1401, "step": 51683 }, { "epoch": 0.8983990682959898, "grad_norm": 1.8901450488030738, "learning_rate": 2.6829939771606357e-08, "loss": 0.1967, "step": 51684 }, { "epoch": 0.8984164508334926, "grad_norm": 0.9982980451491581, "learning_rate": 2.6820843433792818e-08, "loss": 0.1009, "step": 51685 }, { "epoch": 0.8984338333709955, "grad_norm": 1.3692501840595832, "learning_rate": 2.6811748595739925e-08, "loss": 0.1584, "step": 51686 }, { "epoch": 0.8984512159084983, "grad_norm": 1.1904192422890727, "learning_rate": 2.680265525747638e-08, "loss": 0.0961, "step": 51687 }, { "epoch": 0.8984685984460011, "grad_norm": 1.6079303967042602, "learning_rate": 2.679356341903105e-08, "loss": 0.1112, "step": 51688 }, { "epoch": 0.898485980983504, "grad_norm": 1.1769590963144185, "learning_rate": 2.6784473080432745e-08, "loss": 0.1137, "step": 51689 }, { "epoch": 0.8985033635210068, "grad_norm": 1.237168945251893, "learning_rate": 2.6775384241710275e-08, "loss": 0.0924, "step": 51690 }, { "epoch": 0.8985207460585096, "grad_norm": 1.140343715145666, "learning_rate": 2.676629690289245e-08, "loss": 0.1294, "step": 51691 }, { "epoch": 0.8985381285960125, "grad_norm": 1.0001589820722616, "learning_rate": 2.6757211064008024e-08, "loss": 0.1286, "step": 51692 }, { "epoch": 0.8985555111335153, "grad_norm": 1.272635833890066, "learning_rate": 2.6748126725085918e-08, "loss": 0.1361, "step": 51693 }, { "epoch": 0.8985728936710181, "grad_norm": 1.6844693371605024, "learning_rate": 2.6739043886154887e-08, "loss": 0.1299, "step": 51694 }, { "epoch": 0.8985902762085209, "grad_norm": 1.6012439454202987, "learning_rate": 2.672996254724369e-08, "loss": 0.1564, "step": 51695 }, { "epoch": 0.8986076587460238, "grad_norm": 1.6216025855881409, "learning_rate": 2.672088270838102e-08, "loss": 0.1381, "step": 51696 }, { "epoch": 0.8986250412835266, "grad_norm": 1.4183585243136656, "learning_rate": 2.6711804369595804e-08, "loss": 0.1494, "step": 51697 }, { "epoch": 0.8986424238210294, "grad_norm": 1.7148068257796345, "learning_rate": 2.670272753091679e-08, "loss": 0.1209, "step": 51698 }, { "epoch": 0.8986598063585323, "grad_norm": 1.0231894623182625, "learning_rate": 2.6693652192372686e-08, "loss": 0.102, "step": 51699 }, { "epoch": 0.8986771888960351, "grad_norm": 1.703976076860296, "learning_rate": 2.6684578353992183e-08, "loss": 0.1413, "step": 51700 }, { "epoch": 0.8986945714335378, "grad_norm": 1.7486612311048586, "learning_rate": 2.667550601580426e-08, "loss": 0.1634, "step": 51701 }, { "epoch": 0.8987119539710406, "grad_norm": 2.322414718516291, "learning_rate": 2.666643517783762e-08, "loss": 0.1504, "step": 51702 }, { "epoch": 0.8987293365085435, "grad_norm": 0.769676827532036, "learning_rate": 2.665736584012085e-08, "loss": 0.1245, "step": 51703 }, { "epoch": 0.8987467190460463, "grad_norm": 1.4819036123205025, "learning_rate": 2.6648298002682756e-08, "loss": 0.1972, "step": 51704 }, { "epoch": 0.8987641015835491, "grad_norm": 0.8729445845398441, "learning_rate": 2.6639231665552153e-08, "loss": 0.1082, "step": 51705 }, { "epoch": 0.898781484121052, "grad_norm": 2.0187247228255982, "learning_rate": 2.663016682875774e-08, "loss": 0.1551, "step": 51706 }, { "epoch": 0.8987988666585548, "grad_norm": 1.0138681944526642, "learning_rate": 2.6621103492328266e-08, "loss": 0.1283, "step": 51707 }, { "epoch": 0.8988162491960576, "grad_norm": 0.9402101489098388, "learning_rate": 2.6612041656292493e-08, "loss": 0.1268, "step": 51708 }, { "epoch": 0.8988336317335605, "grad_norm": 1.3312338586939614, "learning_rate": 2.6602981320679008e-08, "loss": 0.1638, "step": 51709 }, { "epoch": 0.8988510142710633, "grad_norm": 2.1124755838771168, "learning_rate": 2.6593922485516673e-08, "loss": 0.1883, "step": 51710 }, { "epoch": 0.8988683968085661, "grad_norm": 2.4081382427052764, "learning_rate": 2.6584865150834078e-08, "loss": 0.2084, "step": 51711 }, { "epoch": 0.898885779346069, "grad_norm": 1.4464307636109521, "learning_rate": 2.657580931665998e-08, "loss": 0.1214, "step": 51712 }, { "epoch": 0.8989031618835718, "grad_norm": 1.8378000330303164, "learning_rate": 2.656675498302313e-08, "loss": 0.1532, "step": 51713 }, { "epoch": 0.8989205444210746, "grad_norm": 1.6887789381862168, "learning_rate": 2.6557702149952287e-08, "loss": 0.1451, "step": 51714 }, { "epoch": 0.8989379269585774, "grad_norm": 1.195663948896137, "learning_rate": 2.654865081747587e-08, "loss": 0.141, "step": 51715 }, { "epoch": 0.8989553094960803, "grad_norm": 1.4654519157131098, "learning_rate": 2.6539600985622856e-08, "loss": 0.1607, "step": 51716 }, { "epoch": 0.8989726920335831, "grad_norm": 2.333442686402449, "learning_rate": 2.6530552654421778e-08, "loss": 0.168, "step": 51717 }, { "epoch": 0.8989900745710859, "grad_norm": 1.1043748747984221, "learning_rate": 2.6521505823901393e-08, "loss": 0.1592, "step": 51718 }, { "epoch": 0.8990074571085888, "grad_norm": 1.5162106702459428, "learning_rate": 2.6512460494090283e-08, "loss": 0.0933, "step": 51719 }, { "epoch": 0.8990248396460916, "grad_norm": 1.161656313878086, "learning_rate": 2.6503416665017153e-08, "loss": 0.139, "step": 51720 }, { "epoch": 0.8990422221835943, "grad_norm": 1.045279875297404, "learning_rate": 2.6494374336710812e-08, "loss": 0.0895, "step": 51721 }, { "epoch": 0.8990596047210971, "grad_norm": 1.4802508016347242, "learning_rate": 2.6485333509199682e-08, "loss": 0.1977, "step": 51722 }, { "epoch": 0.8990769872586, "grad_norm": 1.6991321057624038, "learning_rate": 2.6476294182512515e-08, "loss": 0.16, "step": 51723 }, { "epoch": 0.8990943697961028, "grad_norm": 1.1586808234991854, "learning_rate": 2.6467256356678014e-08, "loss": 0.1223, "step": 51724 }, { "epoch": 0.8991117523336056, "grad_norm": 1.7878445476706193, "learning_rate": 2.645822003172482e-08, "loss": 0.2167, "step": 51725 }, { "epoch": 0.8991291348711085, "grad_norm": 2.7161749389610605, "learning_rate": 2.6449185207681524e-08, "loss": 0.1742, "step": 51726 }, { "epoch": 0.8991465174086113, "grad_norm": 1.7988693457331526, "learning_rate": 2.644015188457677e-08, "loss": 0.1045, "step": 51727 }, { "epoch": 0.8991638999461141, "grad_norm": 2.6903654537083255, "learning_rate": 2.6431120062439195e-08, "loss": 0.1575, "step": 51728 }, { "epoch": 0.899181282483617, "grad_norm": 1.3284357156915632, "learning_rate": 2.642208974129745e-08, "loss": 0.1512, "step": 51729 }, { "epoch": 0.8991986650211198, "grad_norm": 1.0141761426419578, "learning_rate": 2.641306092118012e-08, "loss": 0.1098, "step": 51730 }, { "epoch": 0.8992160475586226, "grad_norm": 1.9653261543215947, "learning_rate": 2.6404033602115793e-08, "loss": 0.2082, "step": 51731 }, { "epoch": 0.8992334300961254, "grad_norm": 2.759911813316062, "learning_rate": 2.6395007784133172e-08, "loss": 0.1666, "step": 51732 }, { "epoch": 0.8992508126336283, "grad_norm": 1.0696928352518715, "learning_rate": 2.6385983467260898e-08, "loss": 0.0983, "step": 51733 }, { "epoch": 0.8992681951711311, "grad_norm": 1.9482999335364517, "learning_rate": 2.6376960651527392e-08, "loss": 0.187, "step": 51734 }, { "epoch": 0.8992855777086339, "grad_norm": 1.7413333050767419, "learning_rate": 2.6367939336961352e-08, "loss": 0.1714, "step": 51735 }, { "epoch": 0.8993029602461368, "grad_norm": 1.9239516591858274, "learning_rate": 2.6358919523591374e-08, "loss": 0.1344, "step": 51736 }, { "epoch": 0.8993203427836396, "grad_norm": 2.065025283303952, "learning_rate": 2.634990121144609e-08, "loss": 0.1353, "step": 51737 }, { "epoch": 0.8993377253211424, "grad_norm": 1.0422601777756866, "learning_rate": 2.6340884400554043e-08, "loss": 0.1175, "step": 51738 }, { "epoch": 0.8993551078586453, "grad_norm": 1.016903472679081, "learning_rate": 2.633186909094376e-08, "loss": 0.1832, "step": 51739 }, { "epoch": 0.8993724903961481, "grad_norm": 1.2144769281740329, "learning_rate": 2.632285528264394e-08, "loss": 0.1984, "step": 51740 }, { "epoch": 0.8993898729336508, "grad_norm": 1.008684856379236, "learning_rate": 2.6313842975683063e-08, "loss": 0.1158, "step": 51741 }, { "epoch": 0.8994072554711536, "grad_norm": 1.2534751153013706, "learning_rate": 2.630483217008972e-08, "loss": 0.1406, "step": 51742 }, { "epoch": 0.8994246380086565, "grad_norm": 1.9590227486294443, "learning_rate": 2.629582286589238e-08, "loss": 0.1164, "step": 51743 }, { "epoch": 0.8994420205461593, "grad_norm": 1.0698699628399413, "learning_rate": 2.628681506311975e-08, "loss": 0.1434, "step": 51744 }, { "epoch": 0.8994594030836621, "grad_norm": 1.8769525211715643, "learning_rate": 2.62778087618003e-08, "loss": 0.1472, "step": 51745 }, { "epoch": 0.899476785621165, "grad_norm": 2.8195609378626294, "learning_rate": 2.626880396196257e-08, "loss": 0.1863, "step": 51746 }, { "epoch": 0.8994941681586678, "grad_norm": 3.4935417934068087, "learning_rate": 2.6259800663635144e-08, "loss": 0.2712, "step": 51747 }, { "epoch": 0.8995115506961706, "grad_norm": 0.996313127954397, "learning_rate": 2.6250798866846503e-08, "loss": 0.1453, "step": 51748 }, { "epoch": 0.8995289332336734, "grad_norm": 1.0327670108120395, "learning_rate": 2.624179857162523e-08, "loss": 0.1774, "step": 51749 }, { "epoch": 0.8995463157711763, "grad_norm": 1.3843432697484497, "learning_rate": 2.6232799777999805e-08, "loss": 0.0971, "step": 51750 }, { "epoch": 0.8995636983086791, "grad_norm": 1.1405430775985537, "learning_rate": 2.622380248599876e-08, "loss": 0.1098, "step": 51751 }, { "epoch": 0.8995810808461819, "grad_norm": 1.0212160801188808, "learning_rate": 2.621480669565068e-08, "loss": 0.1168, "step": 51752 }, { "epoch": 0.8995984633836848, "grad_norm": 1.484035208621295, "learning_rate": 2.620581240698405e-08, "loss": 0.1156, "step": 51753 }, { "epoch": 0.8996158459211876, "grad_norm": 1.421183600882135, "learning_rate": 2.6196819620027232e-08, "loss": 0.2718, "step": 51754 }, { "epoch": 0.8996332284586904, "grad_norm": 1.587091461710586, "learning_rate": 2.6187828334808927e-08, "loss": 0.1276, "step": 51755 }, { "epoch": 0.8996506109961933, "grad_norm": 1.7724605142628447, "learning_rate": 2.6178838551357552e-08, "loss": 0.1726, "step": 51756 }, { "epoch": 0.8996679935336961, "grad_norm": 1.2432989998992252, "learning_rate": 2.6169850269701587e-08, "loss": 0.0935, "step": 51757 }, { "epoch": 0.8996853760711989, "grad_norm": 0.8234943856087596, "learning_rate": 2.6160863489869512e-08, "loss": 0.1765, "step": 51758 }, { "epoch": 0.8997027586087017, "grad_norm": 1.5444484529089495, "learning_rate": 2.615187821188991e-08, "loss": 0.1514, "step": 51759 }, { "epoch": 0.8997201411462046, "grad_norm": 0.9862022574392303, "learning_rate": 2.6142894435791153e-08, "loss": 0.0942, "step": 51760 }, { "epoch": 0.8997375236837073, "grad_norm": 1.259224598382651, "learning_rate": 2.6133912161601713e-08, "loss": 0.1358, "step": 51761 }, { "epoch": 0.8997549062212101, "grad_norm": 1.1161164196580964, "learning_rate": 2.6124931389350068e-08, "loss": 0.1324, "step": 51762 }, { "epoch": 0.899772288758713, "grad_norm": 1.3329732047194256, "learning_rate": 2.6115952119064756e-08, "loss": 0.1414, "step": 51763 }, { "epoch": 0.8997896712962158, "grad_norm": 0.9775138340781385, "learning_rate": 2.6106974350774192e-08, "loss": 0.1094, "step": 51764 }, { "epoch": 0.8998070538337186, "grad_norm": 2.055740069038741, "learning_rate": 2.6097998084506855e-08, "loss": 0.1929, "step": 51765 }, { "epoch": 0.8998244363712214, "grad_norm": 0.8851189631838006, "learning_rate": 2.608902332029117e-08, "loss": 0.1234, "step": 51766 }, { "epoch": 0.8998418189087243, "grad_norm": 1.7665797038363764, "learning_rate": 2.6080050058155557e-08, "loss": 0.1683, "step": 51767 }, { "epoch": 0.8998592014462271, "grad_norm": 1.058403533734508, "learning_rate": 2.6071078298128546e-08, "loss": 0.1343, "step": 51768 }, { "epoch": 0.8998765839837299, "grad_norm": 1.2066403728624993, "learning_rate": 2.6062108040238452e-08, "loss": 0.1226, "step": 51769 }, { "epoch": 0.8998939665212328, "grad_norm": 1.370962542202785, "learning_rate": 2.605313928451369e-08, "loss": 0.1436, "step": 51770 }, { "epoch": 0.8999113490587356, "grad_norm": 1.1446026908734341, "learning_rate": 2.6044172030982913e-08, "loss": 0.1291, "step": 51771 }, { "epoch": 0.8999287315962384, "grad_norm": 1.0449352114378632, "learning_rate": 2.603520627967437e-08, "loss": 0.1469, "step": 51772 }, { "epoch": 0.8999461141337413, "grad_norm": 1.030184711318501, "learning_rate": 2.6026242030616375e-08, "loss": 0.1254, "step": 51773 }, { "epoch": 0.8999634966712441, "grad_norm": 0.8437217572167113, "learning_rate": 2.6017279283837568e-08, "loss": 0.1164, "step": 51774 }, { "epoch": 0.8999808792087469, "grad_norm": 1.4525690695785767, "learning_rate": 2.6008318039366262e-08, "loss": 0.1619, "step": 51775 }, { "epoch": 0.8999982617462498, "grad_norm": 1.265150968345659, "learning_rate": 2.599935829723082e-08, "loss": 0.1547, "step": 51776 }, { "epoch": 0.9000156442837526, "grad_norm": 1.637060606321404, "learning_rate": 2.5990400057459673e-08, "loss": 0.1206, "step": 51777 }, { "epoch": 0.9000330268212554, "grad_norm": 1.422225906154844, "learning_rate": 2.5981443320081232e-08, "loss": 0.0996, "step": 51778 }, { "epoch": 0.9000504093587582, "grad_norm": 1.7567458068011392, "learning_rate": 2.5972488085123866e-08, "loss": 0.1462, "step": 51779 }, { "epoch": 0.9000677918962611, "grad_norm": 2.702090859869913, "learning_rate": 2.5963534352616e-08, "loss": 0.1327, "step": 51780 }, { "epoch": 0.9000851744337638, "grad_norm": 1.9916764288426978, "learning_rate": 2.5954582122585833e-08, "loss": 0.1306, "step": 51781 }, { "epoch": 0.9001025569712666, "grad_norm": 1.2160012632918427, "learning_rate": 2.5945631395062008e-08, "loss": 0.142, "step": 51782 }, { "epoch": 0.9001199395087695, "grad_norm": 1.3261509025157567, "learning_rate": 2.5936682170072778e-08, "loss": 0.1296, "step": 51783 }, { "epoch": 0.9001373220462723, "grad_norm": 1.3700188136636828, "learning_rate": 2.592773444764651e-08, "loss": 0.19, "step": 51784 }, { "epoch": 0.9001547045837751, "grad_norm": 1.2723217119457848, "learning_rate": 2.591878822781146e-08, "loss": 0.1411, "step": 51785 }, { "epoch": 0.9001720871212779, "grad_norm": 1.0975554463707262, "learning_rate": 2.5909843510596107e-08, "loss": 0.1205, "step": 51786 }, { "epoch": 0.9001894696587808, "grad_norm": 2.0287884103833997, "learning_rate": 2.5900900296028816e-08, "loss": 0.1373, "step": 51787 }, { "epoch": 0.9002068521962836, "grad_norm": 1.8997404847586687, "learning_rate": 2.5891958584137896e-08, "loss": 0.1198, "step": 51788 }, { "epoch": 0.9002242347337864, "grad_norm": 1.5830659558523208, "learning_rate": 2.588301837495155e-08, "loss": 0.226, "step": 51789 }, { "epoch": 0.9002416172712893, "grad_norm": 0.7799584231107168, "learning_rate": 2.5874079668498416e-08, "loss": 0.1276, "step": 51790 }, { "epoch": 0.9002589998087921, "grad_norm": 2.0063232490978846, "learning_rate": 2.586514246480659e-08, "loss": 0.1516, "step": 51791 }, { "epoch": 0.9002763823462949, "grad_norm": 1.5268144045810552, "learning_rate": 2.5856206763904488e-08, "loss": 0.1777, "step": 51792 }, { "epoch": 0.9002937648837978, "grad_norm": 1.111015405274104, "learning_rate": 2.5847272565820256e-08, "loss": 0.1479, "step": 51793 }, { "epoch": 0.9003111474213006, "grad_norm": 0.8894423608353913, "learning_rate": 2.5838339870582483e-08, "loss": 0.1055, "step": 51794 }, { "epoch": 0.9003285299588034, "grad_norm": 1.543179219020406, "learning_rate": 2.5829408678219367e-08, "loss": 0.2339, "step": 51795 }, { "epoch": 0.9003459124963062, "grad_norm": 3.1840556808224236, "learning_rate": 2.582047898875922e-08, "loss": 0.2388, "step": 51796 }, { "epoch": 0.9003632950338091, "grad_norm": 1.4283387937642098, "learning_rate": 2.5811550802230296e-08, "loss": 0.2711, "step": 51797 }, { "epoch": 0.9003806775713119, "grad_norm": 1.0736908865845927, "learning_rate": 2.5802624118660963e-08, "loss": 0.1265, "step": 51798 }, { "epoch": 0.9003980601088147, "grad_norm": 2.0868923673646242, "learning_rate": 2.579369893807948e-08, "loss": 0.1613, "step": 51799 }, { "epoch": 0.9004154426463175, "grad_norm": 0.9788798385005593, "learning_rate": 2.5784775260514148e-08, "loss": 0.1535, "step": 51800 }, { "epoch": 0.9004328251838203, "grad_norm": 1.1578173476333438, "learning_rate": 2.5775853085993116e-08, "loss": 0.1189, "step": 51801 }, { "epoch": 0.9004502077213231, "grad_norm": 2.070110748016502, "learning_rate": 2.576693241454492e-08, "loss": 0.1181, "step": 51802 }, { "epoch": 0.900467590258826, "grad_norm": 1.803965432691403, "learning_rate": 2.5758013246197752e-08, "loss": 0.1199, "step": 51803 }, { "epoch": 0.9004849727963288, "grad_norm": 0.8373554236913312, "learning_rate": 2.5749095580979652e-08, "loss": 0.142, "step": 51804 }, { "epoch": 0.9005023553338316, "grad_norm": 1.1570599001758741, "learning_rate": 2.5740179418919204e-08, "loss": 0.1216, "step": 51805 }, { "epoch": 0.9005197378713344, "grad_norm": 2.1426407933244382, "learning_rate": 2.5731264760044502e-08, "loss": 0.1491, "step": 51806 }, { "epoch": 0.9005371204088373, "grad_norm": 0.9258479304846801, "learning_rate": 2.5722351604383852e-08, "loss": 0.115, "step": 51807 }, { "epoch": 0.9005545029463401, "grad_norm": 1.4190012513029646, "learning_rate": 2.5713439951965454e-08, "loss": 0.1671, "step": 51808 }, { "epoch": 0.9005718854838429, "grad_norm": 1.5633411210796708, "learning_rate": 2.5704529802817566e-08, "loss": 0.2078, "step": 51809 }, { "epoch": 0.9005892680213458, "grad_norm": 1.6444819643498099, "learning_rate": 2.569562115696844e-08, "loss": 0.1239, "step": 51810 }, { "epoch": 0.9006066505588486, "grad_norm": 1.235322712385541, "learning_rate": 2.5686714014446387e-08, "loss": 0.1022, "step": 51811 }, { "epoch": 0.9006240330963514, "grad_norm": 2.334822759445895, "learning_rate": 2.567780837527944e-08, "loss": 0.219, "step": 51812 }, { "epoch": 0.9006414156338542, "grad_norm": 0.9632410258096309, "learning_rate": 2.5668904239496027e-08, "loss": 0.1181, "step": 51813 }, { "epoch": 0.9006587981713571, "grad_norm": 1.3243941930971428, "learning_rate": 2.5660001607124338e-08, "loss": 0.1472, "step": 51814 }, { "epoch": 0.9006761807088599, "grad_norm": 1.2060817345523205, "learning_rate": 2.5651100478192577e-08, "loss": 0.1016, "step": 51815 }, { "epoch": 0.9006935632463627, "grad_norm": 1.5363033050273034, "learning_rate": 2.564220085272878e-08, "loss": 0.1909, "step": 51816 }, { "epoch": 0.9007109457838656, "grad_norm": 1.16198449309635, "learning_rate": 2.563330273076142e-08, "loss": 0.1154, "step": 51817 }, { "epoch": 0.9007283283213684, "grad_norm": 1.5669483919796086, "learning_rate": 2.5624406112318586e-08, "loss": 0.1025, "step": 51818 }, { "epoch": 0.9007457108588712, "grad_norm": 2.241156069789202, "learning_rate": 2.5615510997428423e-08, "loss": 0.1558, "step": 51819 }, { "epoch": 0.900763093396374, "grad_norm": 1.3118174084864342, "learning_rate": 2.5606617386119134e-08, "loss": 0.1557, "step": 51820 }, { "epoch": 0.9007804759338768, "grad_norm": 1.520606831626766, "learning_rate": 2.5597725278419025e-08, "loss": 0.1438, "step": 51821 }, { "epoch": 0.9007978584713796, "grad_norm": 1.438055490238605, "learning_rate": 2.558883467435624e-08, "loss": 0.2797, "step": 51822 }, { "epoch": 0.9008152410088824, "grad_norm": 1.333219260382131, "learning_rate": 2.557994557395893e-08, "loss": 0.1305, "step": 51823 }, { "epoch": 0.9008326235463853, "grad_norm": 1.592115001336419, "learning_rate": 2.5571057977255117e-08, "loss": 0.3347, "step": 51824 }, { "epoch": 0.9008500060838881, "grad_norm": 2.5764254560574678, "learning_rate": 2.5562171884273233e-08, "loss": 0.2183, "step": 51825 }, { "epoch": 0.9008673886213909, "grad_norm": 5.343483402197234, "learning_rate": 2.5553287295041305e-08, "loss": 0.2441, "step": 51826 }, { "epoch": 0.9008847711588938, "grad_norm": 2.5614317889802534, "learning_rate": 2.5544404209587477e-08, "loss": 0.2034, "step": 51827 }, { "epoch": 0.9009021536963966, "grad_norm": 1.1706542869743828, "learning_rate": 2.5535522627940008e-08, "loss": 0.1004, "step": 51828 }, { "epoch": 0.9009195362338994, "grad_norm": 1.1208545901702598, "learning_rate": 2.552664255012693e-08, "loss": 0.1068, "step": 51829 }, { "epoch": 0.9009369187714023, "grad_norm": 1.039399888209848, "learning_rate": 2.5517763976176442e-08, "loss": 0.1141, "step": 51830 }, { "epoch": 0.9009543013089051, "grad_norm": 1.822960549103041, "learning_rate": 2.550888690611669e-08, "loss": 0.2015, "step": 51831 }, { "epoch": 0.9009716838464079, "grad_norm": 0.8086418014013504, "learning_rate": 2.5500011339975758e-08, "loss": 0.0869, "step": 51832 }, { "epoch": 0.9009890663839107, "grad_norm": 1.1493673680946357, "learning_rate": 2.5491137277781904e-08, "loss": 0.1579, "step": 51833 }, { "epoch": 0.9010064489214136, "grad_norm": 1.2879176313110323, "learning_rate": 2.5482264719563218e-08, "loss": 0.1034, "step": 51834 }, { "epoch": 0.9010238314589164, "grad_norm": 1.2626268054116019, "learning_rate": 2.547339366534762e-08, "loss": 0.122, "step": 51835 }, { "epoch": 0.9010412139964192, "grad_norm": 3.094339107347701, "learning_rate": 2.5464524115163477e-08, "loss": 0.2248, "step": 51836 }, { "epoch": 0.9010585965339221, "grad_norm": 1.501111543985121, "learning_rate": 2.5455656069038766e-08, "loss": 0.1315, "step": 51837 }, { "epoch": 0.9010759790714249, "grad_norm": 1.2572531512019554, "learning_rate": 2.544678952700169e-08, "loss": 0.1187, "step": 51838 }, { "epoch": 0.9010933616089277, "grad_norm": 2.66127605716233, "learning_rate": 2.5437924489080276e-08, "loss": 0.239, "step": 51839 }, { "epoch": 0.9011107441464304, "grad_norm": 1.2519300122530843, "learning_rate": 2.5429060955302616e-08, "loss": 0.0934, "step": 51840 }, { "epoch": 0.9011281266839333, "grad_norm": 1.278972145204085, "learning_rate": 2.5420198925696913e-08, "loss": 0.1206, "step": 51841 }, { "epoch": 0.9011455092214361, "grad_norm": 1.6207570402978801, "learning_rate": 2.5411338400291138e-08, "loss": 0.1131, "step": 51842 }, { "epoch": 0.9011628917589389, "grad_norm": 1.4735097898402174, "learning_rate": 2.5402479379113328e-08, "loss": 0.1126, "step": 51843 }, { "epoch": 0.9011802742964418, "grad_norm": 0.7825004967577681, "learning_rate": 2.5393621862191737e-08, "loss": 0.0808, "step": 51844 }, { "epoch": 0.9011976568339446, "grad_norm": 6.243742621856961, "learning_rate": 2.5384765849554346e-08, "loss": 0.1918, "step": 51845 }, { "epoch": 0.9012150393714474, "grad_norm": 2.7761867196821437, "learning_rate": 2.5375911341229185e-08, "loss": 0.1973, "step": 51846 }, { "epoch": 0.9012324219089503, "grad_norm": 1.4871450149956922, "learning_rate": 2.5367058337244395e-08, "loss": 0.1464, "step": 51847 }, { "epoch": 0.9012498044464531, "grad_norm": 1.401242873162388, "learning_rate": 2.5358206837628017e-08, "loss": 0.1202, "step": 51848 }, { "epoch": 0.9012671869839559, "grad_norm": 2.3399970443654627, "learning_rate": 2.5349356842408076e-08, "loss": 0.1805, "step": 51849 }, { "epoch": 0.9012845695214587, "grad_norm": 0.883089586180818, "learning_rate": 2.534050835161261e-08, "loss": 0.0918, "step": 51850 }, { "epoch": 0.9013019520589616, "grad_norm": 1.654820337050872, "learning_rate": 2.5331661365269653e-08, "loss": 0.1138, "step": 51851 }, { "epoch": 0.9013193345964644, "grad_norm": 2.4977082779638007, "learning_rate": 2.5322815883407346e-08, "loss": 0.2045, "step": 51852 }, { "epoch": 0.9013367171339672, "grad_norm": 1.4749491360492557, "learning_rate": 2.531397190605372e-08, "loss": 0.1685, "step": 51853 }, { "epoch": 0.9013540996714701, "grad_norm": 2.9235885127485264, "learning_rate": 2.5305129433236705e-08, "loss": 0.1752, "step": 51854 }, { "epoch": 0.9013714822089729, "grad_norm": 1.0753315372290662, "learning_rate": 2.529628846498427e-08, "loss": 0.0953, "step": 51855 }, { "epoch": 0.9013888647464757, "grad_norm": 1.074607594693903, "learning_rate": 2.5287449001324625e-08, "loss": 0.1094, "step": 51856 }, { "epoch": 0.9014062472839786, "grad_norm": 1.9638479401662692, "learning_rate": 2.5278611042285737e-08, "loss": 0.1423, "step": 51857 }, { "epoch": 0.9014236298214814, "grad_norm": 2.701405989852387, "learning_rate": 2.526977458789553e-08, "loss": 0.1539, "step": 51858 }, { "epoch": 0.9014410123589842, "grad_norm": 1.5005798229803717, "learning_rate": 2.5260939638182043e-08, "loss": 0.1518, "step": 51859 }, { "epoch": 0.9014583948964869, "grad_norm": 1.0532715962730885, "learning_rate": 2.5252106193173418e-08, "loss": 0.1222, "step": 51860 }, { "epoch": 0.9014757774339898, "grad_norm": 1.7570497519325754, "learning_rate": 2.524327425289746e-08, "loss": 0.1538, "step": 51861 }, { "epoch": 0.9014931599714926, "grad_norm": 1.4790420393091317, "learning_rate": 2.5234443817382157e-08, "loss": 0.1613, "step": 51862 }, { "epoch": 0.9015105425089954, "grad_norm": 1.2046968274210599, "learning_rate": 2.5225614886655643e-08, "loss": 0.1133, "step": 51863 }, { "epoch": 0.9015279250464983, "grad_norm": 1.630178030156887, "learning_rate": 2.5216787460745846e-08, "loss": 0.1293, "step": 51864 }, { "epoch": 0.9015453075840011, "grad_norm": 1.006074134775229, "learning_rate": 2.5207961539680743e-08, "loss": 0.1524, "step": 51865 }, { "epoch": 0.9015626901215039, "grad_norm": 2.4611943752704692, "learning_rate": 2.519913712348831e-08, "loss": 0.2584, "step": 51866 }, { "epoch": 0.9015800726590067, "grad_norm": 1.0241809498028414, "learning_rate": 2.5190314212196527e-08, "loss": 0.1261, "step": 51867 }, { "epoch": 0.9015974551965096, "grad_norm": 1.5453669288124914, "learning_rate": 2.518149280583326e-08, "loss": 0.135, "step": 51868 }, { "epoch": 0.9016148377340124, "grad_norm": 1.2126635130252912, "learning_rate": 2.5172672904426594e-08, "loss": 0.1311, "step": 51869 }, { "epoch": 0.9016322202715152, "grad_norm": 1.1583230967043288, "learning_rate": 2.51638545080044e-08, "loss": 0.2558, "step": 51870 }, { "epoch": 0.9016496028090181, "grad_norm": 1.5385780453261277, "learning_rate": 2.515503761659471e-08, "loss": 0.1367, "step": 51871 }, { "epoch": 0.9016669853465209, "grad_norm": 1.5680562441182309, "learning_rate": 2.5146222230225445e-08, "loss": 0.1737, "step": 51872 }, { "epoch": 0.9016843678840237, "grad_norm": 1.9519805386981932, "learning_rate": 2.513740834892453e-08, "loss": 0.1442, "step": 51873 }, { "epoch": 0.9017017504215266, "grad_norm": 1.431310764160583, "learning_rate": 2.512859597271977e-08, "loss": 0.113, "step": 51874 }, { "epoch": 0.9017191329590294, "grad_norm": 4.065251984661677, "learning_rate": 2.5119785101639312e-08, "loss": 0.1231, "step": 51875 }, { "epoch": 0.9017365154965322, "grad_norm": 2.4062932858891486, "learning_rate": 2.5110975735711028e-08, "loss": 0.1751, "step": 51876 }, { "epoch": 0.901753898034035, "grad_norm": 2.2985381171097354, "learning_rate": 2.5102167874962776e-08, "loss": 0.1927, "step": 51877 }, { "epoch": 0.9017712805715379, "grad_norm": 2.0557044644352525, "learning_rate": 2.509336151942243e-08, "loss": 0.1421, "step": 51878 }, { "epoch": 0.9017886631090407, "grad_norm": 1.1325476000861963, "learning_rate": 2.508455666911813e-08, "loss": 0.1141, "step": 51879 }, { "epoch": 0.9018060456465434, "grad_norm": 1.486773189385442, "learning_rate": 2.507575332407752e-08, "loss": 0.1003, "step": 51880 }, { "epoch": 0.9018234281840463, "grad_norm": 1.2771664360115516, "learning_rate": 2.5066951484328635e-08, "loss": 0.1562, "step": 51881 }, { "epoch": 0.9018408107215491, "grad_norm": 1.6945051339185482, "learning_rate": 2.505815114989929e-08, "loss": 0.1423, "step": 51882 }, { "epoch": 0.9018581932590519, "grad_norm": 1.5356541499510514, "learning_rate": 2.5049352320817452e-08, "loss": 0.0914, "step": 51883 }, { "epoch": 0.9018755757965548, "grad_norm": 1.7833709323380853, "learning_rate": 2.504055499711105e-08, "loss": 0.1839, "step": 51884 }, { "epoch": 0.9018929583340576, "grad_norm": 1.2928883409566725, "learning_rate": 2.50317591788079e-08, "loss": 0.2205, "step": 51885 }, { "epoch": 0.9019103408715604, "grad_norm": 1.118209463996856, "learning_rate": 2.5022964865935858e-08, "loss": 0.2226, "step": 51886 }, { "epoch": 0.9019277234090632, "grad_norm": 1.6512308160017217, "learning_rate": 2.5014172058522853e-08, "loss": 0.149, "step": 51887 }, { "epoch": 0.9019451059465661, "grad_norm": 1.80333311878323, "learning_rate": 2.500538075659675e-08, "loss": 0.1463, "step": 51888 }, { "epoch": 0.9019624884840689, "grad_norm": 1.4225756620417969, "learning_rate": 2.499659096018536e-08, "loss": 0.1147, "step": 51889 }, { "epoch": 0.9019798710215717, "grad_norm": 0.8908502274166243, "learning_rate": 2.498780266931655e-08, "loss": 0.1005, "step": 51890 }, { "epoch": 0.9019972535590746, "grad_norm": 1.4501595249191963, "learning_rate": 2.4979015884018296e-08, "loss": 0.1433, "step": 51891 }, { "epoch": 0.9020146360965774, "grad_norm": 1.4565647564047954, "learning_rate": 2.49702306043183e-08, "loss": 0.1021, "step": 51892 }, { "epoch": 0.9020320186340802, "grad_norm": 1.6168401517476743, "learning_rate": 2.4961446830244424e-08, "loss": 0.1721, "step": 51893 }, { "epoch": 0.902049401171583, "grad_norm": 1.2580393133752346, "learning_rate": 2.4952664561824543e-08, "loss": 0.1343, "step": 51894 }, { "epoch": 0.9020667837090859, "grad_norm": 1.715882863982556, "learning_rate": 2.4943883799086575e-08, "loss": 0.2016, "step": 51895 }, { "epoch": 0.9020841662465887, "grad_norm": 1.4723688011170655, "learning_rate": 2.4935104542058216e-08, "loss": 0.1451, "step": 51896 }, { "epoch": 0.9021015487840915, "grad_norm": 2.483925288629793, "learning_rate": 2.4926326790767393e-08, "loss": 0.2018, "step": 51897 }, { "epoch": 0.9021189313215944, "grad_norm": 1.361851066507189, "learning_rate": 2.4917550545241862e-08, "loss": 0.1939, "step": 51898 }, { "epoch": 0.9021363138590972, "grad_norm": 2.1077834344256696, "learning_rate": 2.4908775805509486e-08, "loss": 0.1756, "step": 51899 }, { "epoch": 0.9021536963965999, "grad_norm": 1.5838519300159999, "learning_rate": 2.490000257159802e-08, "loss": 0.1427, "step": 51900 }, { "epoch": 0.9021710789341028, "grad_norm": 1.0052129720460163, "learning_rate": 2.4891230843535225e-08, "loss": 0.1243, "step": 51901 }, { "epoch": 0.9021884614716056, "grad_norm": 2.3302397734031963, "learning_rate": 2.4882460621349077e-08, "loss": 0.1443, "step": 51902 }, { "epoch": 0.9022058440091084, "grad_norm": 1.5565079127006094, "learning_rate": 2.4873691905067273e-08, "loss": 0.1691, "step": 51903 }, { "epoch": 0.9022232265466112, "grad_norm": 1.9661501047563055, "learning_rate": 2.4864924694717683e-08, "loss": 0.1905, "step": 51904 }, { "epoch": 0.9022406090841141, "grad_norm": 1.59530129391894, "learning_rate": 2.4856158990327892e-08, "loss": 0.1675, "step": 51905 }, { "epoch": 0.9022579916216169, "grad_norm": 1.2654011275890278, "learning_rate": 2.4847394791925825e-08, "loss": 0.1844, "step": 51906 }, { "epoch": 0.9022753741591197, "grad_norm": 1.3718668562468546, "learning_rate": 2.4838632099539292e-08, "loss": 0.163, "step": 51907 }, { "epoch": 0.9022927566966226, "grad_norm": 1.0340213152108217, "learning_rate": 2.482987091319605e-08, "loss": 0.1075, "step": 51908 }, { "epoch": 0.9023101392341254, "grad_norm": 1.6171540972140899, "learning_rate": 2.482111123292374e-08, "loss": 0.0976, "step": 51909 }, { "epoch": 0.9023275217716282, "grad_norm": 1.0548985517612448, "learning_rate": 2.48123530587504e-08, "loss": 0.1332, "step": 51910 }, { "epoch": 0.9023449043091311, "grad_norm": 1.9200060271533044, "learning_rate": 2.4803596390703508e-08, "loss": 0.1608, "step": 51911 }, { "epoch": 0.9023622868466339, "grad_norm": 1.0200528216653149, "learning_rate": 2.4794841228810926e-08, "loss": 0.0978, "step": 51912 }, { "epoch": 0.9023796693841367, "grad_norm": 1.5107567625274272, "learning_rate": 2.4786087573100357e-08, "loss": 0.1443, "step": 51913 }, { "epoch": 0.9023970519216395, "grad_norm": 1.0148099944525377, "learning_rate": 2.4777335423599667e-08, "loss": 0.1498, "step": 51914 }, { "epoch": 0.9024144344591424, "grad_norm": 2.6389567651988566, "learning_rate": 2.47685847803365e-08, "loss": 0.1709, "step": 51915 }, { "epoch": 0.9024318169966452, "grad_norm": 1.5355188597177418, "learning_rate": 2.4759835643338668e-08, "loss": 0.1436, "step": 51916 }, { "epoch": 0.902449199534148, "grad_norm": 2.1931110335338913, "learning_rate": 2.4751088012633813e-08, "loss": 0.1315, "step": 51917 }, { "epoch": 0.9024665820716509, "grad_norm": 1.505039368176254, "learning_rate": 2.4742341888249697e-08, "loss": 0.1467, "step": 51918 }, { "epoch": 0.9024839646091537, "grad_norm": 1.7025800204738772, "learning_rate": 2.4733597270214014e-08, "loss": 0.114, "step": 51919 }, { "epoch": 0.9025013471466564, "grad_norm": 1.3948207240528236, "learning_rate": 2.472485415855452e-08, "loss": 0.0838, "step": 51920 }, { "epoch": 0.9025187296841592, "grad_norm": 1.5452460540561286, "learning_rate": 2.4716112553298862e-08, "loss": 0.1419, "step": 51921 }, { "epoch": 0.9025361122216621, "grad_norm": 1.6984755407671568, "learning_rate": 2.4707372454474906e-08, "loss": 0.1453, "step": 51922 }, { "epoch": 0.9025534947591649, "grad_norm": 1.691470366849842, "learning_rate": 2.4698633862110296e-08, "loss": 0.1551, "step": 51923 }, { "epoch": 0.9025708772966677, "grad_norm": 1.3943791446535032, "learning_rate": 2.4689896776232512e-08, "loss": 0.1232, "step": 51924 }, { "epoch": 0.9025882598341706, "grad_norm": 4.151898391247927, "learning_rate": 2.4681161196869472e-08, "loss": 0.2812, "step": 51925 }, { "epoch": 0.9026056423716734, "grad_norm": 1.0696419695880095, "learning_rate": 2.4672427124048876e-08, "loss": 0.1105, "step": 51926 }, { "epoch": 0.9026230249091762, "grad_norm": 0.9052714037123929, "learning_rate": 2.4663694557798265e-08, "loss": 0.1475, "step": 51927 }, { "epoch": 0.9026404074466791, "grad_norm": 1.3024013383045707, "learning_rate": 2.4654963498145443e-08, "loss": 0.1275, "step": 51928 }, { "epoch": 0.9026577899841819, "grad_norm": 2.3095289956659935, "learning_rate": 2.4646233945118e-08, "loss": 0.1896, "step": 51929 }, { "epoch": 0.9026751725216847, "grad_norm": 1.1372362867730417, "learning_rate": 2.4637505898743637e-08, "loss": 0.0954, "step": 51930 }, { "epoch": 0.9026925550591876, "grad_norm": 3.0673531138868433, "learning_rate": 2.4628779359050057e-08, "loss": 0.2085, "step": 51931 }, { "epoch": 0.9027099375966904, "grad_norm": 4.16707405963158, "learning_rate": 2.4620054326064786e-08, "loss": 0.2743, "step": 51932 }, { "epoch": 0.9027273201341932, "grad_norm": 1.2277999381361904, "learning_rate": 2.461133079981559e-08, "loss": 0.1356, "step": 51933 }, { "epoch": 0.902744702671696, "grad_norm": 1.327122519406578, "learning_rate": 2.4602608780330158e-08, "loss": 0.1478, "step": 51934 }, { "epoch": 0.9027620852091989, "grad_norm": 1.8134479054927823, "learning_rate": 2.4593888267636142e-08, "loss": 0.1285, "step": 51935 }, { "epoch": 0.9027794677467017, "grad_norm": 1.4073173549306168, "learning_rate": 2.4585169261760963e-08, "loss": 0.1484, "step": 51936 }, { "epoch": 0.9027968502842045, "grad_norm": 1.3827313100371317, "learning_rate": 2.4576451762732486e-08, "loss": 0.179, "step": 51937 }, { "epoch": 0.9028142328217074, "grad_norm": 1.0020027539047653, "learning_rate": 2.4567735770578245e-08, "loss": 0.101, "step": 51938 }, { "epoch": 0.9028316153592101, "grad_norm": 0.8312308837131356, "learning_rate": 2.4559021285325942e-08, "loss": 0.1365, "step": 51939 }, { "epoch": 0.9028489978967129, "grad_norm": 2.1478470243608427, "learning_rate": 2.4550308307003e-08, "loss": 0.1739, "step": 51940 }, { "epoch": 0.9028663804342157, "grad_norm": 1.786529368731916, "learning_rate": 2.454159683563728e-08, "loss": 0.1348, "step": 51941 }, { "epoch": 0.9028837629717186, "grad_norm": 1.1607426228336561, "learning_rate": 2.4532886871256374e-08, "loss": 0.1348, "step": 51942 }, { "epoch": 0.9029011455092214, "grad_norm": 1.3945274232514902, "learning_rate": 2.452417841388771e-08, "loss": 0.1467, "step": 51943 }, { "epoch": 0.9029185280467242, "grad_norm": 0.773931011339106, "learning_rate": 2.4515471463558923e-08, "loss": 0.0941, "step": 51944 }, { "epoch": 0.9029359105842271, "grad_norm": 1.1519887892263818, "learning_rate": 2.4506766020297775e-08, "loss": 0.1157, "step": 51945 }, { "epoch": 0.9029532931217299, "grad_norm": 1.3530454229986744, "learning_rate": 2.449806208413169e-08, "loss": 0.131, "step": 51946 }, { "epoch": 0.9029706756592327, "grad_norm": 1.0179245851484675, "learning_rate": 2.448935965508836e-08, "loss": 0.1001, "step": 51947 }, { "epoch": 0.9029880581967356, "grad_norm": 1.1956559182660353, "learning_rate": 2.4480658733195325e-08, "loss": 0.112, "step": 51948 }, { "epoch": 0.9030054407342384, "grad_norm": 1.2609291397430198, "learning_rate": 2.4471959318480172e-08, "loss": 0.1301, "step": 51949 }, { "epoch": 0.9030228232717412, "grad_norm": 2.6333454984305225, "learning_rate": 2.4463261410970436e-08, "loss": 0.2004, "step": 51950 }, { "epoch": 0.903040205809244, "grad_norm": 1.2853263158773431, "learning_rate": 2.4454565010693705e-08, "loss": 0.1733, "step": 51951 }, { "epoch": 0.9030575883467469, "grad_norm": 1.3159634157768723, "learning_rate": 2.444587011767757e-08, "loss": 0.1207, "step": 51952 }, { "epoch": 0.9030749708842497, "grad_norm": 1.153596435587892, "learning_rate": 2.4437176731949617e-08, "loss": 0.1891, "step": 51953 }, { "epoch": 0.9030923534217525, "grad_norm": 2.0130402627637918, "learning_rate": 2.442848485353738e-08, "loss": 0.1633, "step": 51954 }, { "epoch": 0.9031097359592554, "grad_norm": 1.811707891240576, "learning_rate": 2.4419794482468283e-08, "loss": 0.1372, "step": 51955 }, { "epoch": 0.9031271184967582, "grad_norm": 0.9826927618952856, "learning_rate": 2.4411105618770078e-08, "loss": 0.1478, "step": 51956 }, { "epoch": 0.903144501034261, "grad_norm": 1.1786922078112414, "learning_rate": 2.4402418262470136e-08, "loss": 0.1419, "step": 51957 }, { "epoch": 0.9031618835717639, "grad_norm": 1.1443989544729343, "learning_rate": 2.43937324135961e-08, "loss": 0.1052, "step": 51958 }, { "epoch": 0.9031792661092666, "grad_norm": 1.7914476983147858, "learning_rate": 2.4385048072175342e-08, "loss": 0.156, "step": 51959 }, { "epoch": 0.9031966486467694, "grad_norm": 5.447340405777523, "learning_rate": 2.4376365238235607e-08, "loss": 0.364, "step": 51960 }, { "epoch": 0.9032140311842722, "grad_norm": 1.8924121610487734, "learning_rate": 2.436768391180438e-08, "loss": 0.1405, "step": 51961 }, { "epoch": 0.9032314137217751, "grad_norm": 1.2797720099169712, "learning_rate": 2.435900409290903e-08, "loss": 0.1305, "step": 51962 }, { "epoch": 0.9032487962592779, "grad_norm": 1.2446520305251363, "learning_rate": 2.435032578157703e-08, "loss": 0.2194, "step": 51963 }, { "epoch": 0.9032661787967807, "grad_norm": 2.1783522271290554, "learning_rate": 2.4341648977836137e-08, "loss": 0.1645, "step": 51964 }, { "epoch": 0.9032835613342836, "grad_norm": 1.3526285264149125, "learning_rate": 2.4332973681713664e-08, "loss": 0.1062, "step": 51965 }, { "epoch": 0.9033009438717864, "grad_norm": 1.0432308165176554, "learning_rate": 2.4324299893237198e-08, "loss": 0.1128, "step": 51966 }, { "epoch": 0.9033183264092892, "grad_norm": 3.7414947881708045, "learning_rate": 2.431562761243422e-08, "loss": 0.1424, "step": 51967 }, { "epoch": 0.903335708946792, "grad_norm": 0.98694292276207, "learning_rate": 2.430695683933215e-08, "loss": 0.1421, "step": 51968 }, { "epoch": 0.9033530914842949, "grad_norm": 1.5911291552498892, "learning_rate": 2.429828757395852e-08, "loss": 0.1659, "step": 51969 }, { "epoch": 0.9033704740217977, "grad_norm": 1.441923773492735, "learning_rate": 2.4289619816340757e-08, "loss": 0.138, "step": 51970 }, { "epoch": 0.9033878565593005, "grad_norm": 1.0647705501503992, "learning_rate": 2.428095356650639e-08, "loss": 0.2086, "step": 51971 }, { "epoch": 0.9034052390968034, "grad_norm": 1.40184753827097, "learning_rate": 2.4272288824482902e-08, "loss": 0.1269, "step": 51972 }, { "epoch": 0.9034226216343062, "grad_norm": 1.3449921653121515, "learning_rate": 2.4263625590297765e-08, "loss": 0.1363, "step": 51973 }, { "epoch": 0.903440004171809, "grad_norm": 1.5629527775219292, "learning_rate": 2.4254963863978294e-08, "loss": 0.1914, "step": 51974 }, { "epoch": 0.9034573867093119, "grad_norm": 1.1677836102754875, "learning_rate": 2.4246303645552137e-08, "loss": 0.0883, "step": 51975 }, { "epoch": 0.9034747692468147, "grad_norm": 1.9066622392067787, "learning_rate": 2.4237644935046596e-08, "loss": 0.1944, "step": 51976 }, { "epoch": 0.9034921517843175, "grad_norm": 1.01460347659672, "learning_rate": 2.4228987732489215e-08, "loss": 0.144, "step": 51977 }, { "epoch": 0.9035095343218204, "grad_norm": 1.9200261793172286, "learning_rate": 2.422033203790741e-08, "loss": 0.1454, "step": 51978 }, { "epoch": 0.9035269168593231, "grad_norm": 2.0702240328399104, "learning_rate": 2.4211677851328494e-08, "loss": 0.2186, "step": 51979 }, { "epoch": 0.9035442993968259, "grad_norm": 1.7135371523066816, "learning_rate": 2.4203025172780113e-08, "loss": 0.1027, "step": 51980 }, { "epoch": 0.9035616819343287, "grad_norm": 1.4828495058901239, "learning_rate": 2.4194374002289575e-08, "loss": 0.1111, "step": 51981 }, { "epoch": 0.9035790644718316, "grad_norm": 1.3176360164322984, "learning_rate": 2.4185724339884196e-08, "loss": 0.1236, "step": 51982 }, { "epoch": 0.9035964470093344, "grad_norm": 0.8608569300489491, "learning_rate": 2.417707618559156e-08, "loss": 0.1269, "step": 51983 }, { "epoch": 0.9036138295468372, "grad_norm": 0.6927362705540163, "learning_rate": 2.416842953943904e-08, "loss": 0.0923, "step": 51984 }, { "epoch": 0.90363121208434, "grad_norm": 1.77109337101272, "learning_rate": 2.4159784401453998e-08, "loss": 0.1403, "step": 51985 }, { "epoch": 0.9036485946218429, "grad_norm": 1.558019304036064, "learning_rate": 2.4151140771663857e-08, "loss": 0.167, "step": 51986 }, { "epoch": 0.9036659771593457, "grad_norm": 1.4648802218719388, "learning_rate": 2.4142498650096045e-08, "loss": 0.1761, "step": 51987 }, { "epoch": 0.9036833596968485, "grad_norm": 2.076266255632942, "learning_rate": 2.413385803677792e-08, "loss": 0.2033, "step": 51988 }, { "epoch": 0.9037007422343514, "grad_norm": 1.5296995774647102, "learning_rate": 2.4125218931736857e-08, "loss": 0.1506, "step": 51989 }, { "epoch": 0.9037181247718542, "grad_norm": 1.4665863391696783, "learning_rate": 2.4116581335000163e-08, "loss": 0.1032, "step": 51990 }, { "epoch": 0.903735507309357, "grad_norm": 0.919842712798741, "learning_rate": 2.4107945246595374e-08, "loss": 0.1383, "step": 51991 }, { "epoch": 0.9037528898468599, "grad_norm": 1.2605816584240725, "learning_rate": 2.4099310666549856e-08, "loss": 0.2684, "step": 51992 }, { "epoch": 0.9037702723843627, "grad_norm": 1.6484802004062613, "learning_rate": 2.4090677594890863e-08, "loss": 0.1609, "step": 51993 }, { "epoch": 0.9037876549218655, "grad_norm": 1.6421791449751284, "learning_rate": 2.4082046031645765e-08, "loss": 0.1674, "step": 51994 }, { "epoch": 0.9038050374593684, "grad_norm": 1.1819141329214986, "learning_rate": 2.407341597684198e-08, "loss": 0.0961, "step": 51995 }, { "epoch": 0.9038224199968712, "grad_norm": 1.1093956145050259, "learning_rate": 2.4064787430506883e-08, "loss": 0.1628, "step": 51996 }, { "epoch": 0.903839802534374, "grad_norm": 0.9155101376906154, "learning_rate": 2.405616039266778e-08, "loss": 0.2015, "step": 51997 }, { "epoch": 0.9038571850718768, "grad_norm": 1.1897655981704647, "learning_rate": 2.4047534863351925e-08, "loss": 0.1443, "step": 51998 }, { "epoch": 0.9038745676093796, "grad_norm": 2.7159323096361483, "learning_rate": 2.4038910842586913e-08, "loss": 0.1739, "step": 51999 }, { "epoch": 0.9038919501468824, "grad_norm": 2.5226875045914268, "learning_rate": 2.403028833039983e-08, "loss": 0.1177, "step": 52000 }, { "epoch": 0.9039093326843852, "grad_norm": 0.8251793527635802, "learning_rate": 2.402166732681804e-08, "loss": 0.1763, "step": 52001 }, { "epoch": 0.903926715221888, "grad_norm": 1.746586044845769, "learning_rate": 2.4013047831868917e-08, "loss": 0.1276, "step": 52002 }, { "epoch": 0.9039440977593909, "grad_norm": 1.6708759519801013, "learning_rate": 2.400442984557982e-08, "loss": 0.1475, "step": 52003 }, { "epoch": 0.9039614802968937, "grad_norm": 1.7634542232870745, "learning_rate": 2.3995813367978067e-08, "loss": 0.162, "step": 52004 }, { "epoch": 0.9039788628343965, "grad_norm": 0.9557877629537674, "learning_rate": 2.3987198399090914e-08, "loss": 0.1263, "step": 52005 }, { "epoch": 0.9039962453718994, "grad_norm": 2.458612298206606, "learning_rate": 2.3978584938945666e-08, "loss": 0.1214, "step": 52006 }, { "epoch": 0.9040136279094022, "grad_norm": 2.4357144985296504, "learning_rate": 2.3969972987569587e-08, "loss": 0.2026, "step": 52007 }, { "epoch": 0.904031010446905, "grad_norm": 1.7542359291655711, "learning_rate": 2.3961362544990092e-08, "loss": 0.195, "step": 52008 }, { "epoch": 0.9040483929844079, "grad_norm": 1.3072957430934489, "learning_rate": 2.395275361123439e-08, "loss": 0.1724, "step": 52009 }, { "epoch": 0.9040657755219107, "grad_norm": 1.9297857449175186, "learning_rate": 2.3944146186329728e-08, "loss": 0.2252, "step": 52010 }, { "epoch": 0.9040831580594135, "grad_norm": 2.125047214667606, "learning_rate": 2.3935540270303535e-08, "loss": 0.2112, "step": 52011 }, { "epoch": 0.9041005405969164, "grad_norm": 1.1263534882292283, "learning_rate": 2.392693586318295e-08, "loss": 0.1388, "step": 52012 }, { "epoch": 0.9041179231344192, "grad_norm": 2.8843998241758606, "learning_rate": 2.3918332964995235e-08, "loss": 0.1877, "step": 52013 }, { "epoch": 0.904135305671922, "grad_norm": 1.658000049955297, "learning_rate": 2.3909731575767755e-08, "loss": 0.1673, "step": 52014 }, { "epoch": 0.9041526882094248, "grad_norm": 2.091850275656724, "learning_rate": 2.390113169552771e-08, "loss": 0.1842, "step": 52015 }, { "epoch": 0.9041700707469277, "grad_norm": 1.3335328891549352, "learning_rate": 2.389253332430241e-08, "loss": 0.1481, "step": 52016 }, { "epoch": 0.9041874532844305, "grad_norm": 1.663760214440193, "learning_rate": 2.3883936462119057e-08, "loss": 0.2087, "step": 52017 }, { "epoch": 0.9042048358219333, "grad_norm": 0.8151737076348965, "learning_rate": 2.3875341109004908e-08, "loss": 0.2181, "step": 52018 }, { "epoch": 0.9042222183594361, "grad_norm": 1.1365792945678885, "learning_rate": 2.386674726498722e-08, "loss": 0.1421, "step": 52019 }, { "epoch": 0.9042396008969389, "grad_norm": 1.1278396629612264, "learning_rate": 2.3858154930093243e-08, "loss": 0.1283, "step": 52020 }, { "epoch": 0.9042569834344417, "grad_norm": 1.10258967529126, "learning_rate": 2.384956410435013e-08, "loss": 0.1476, "step": 52021 }, { "epoch": 0.9042743659719445, "grad_norm": 1.268443764725188, "learning_rate": 2.3840974787785183e-08, "loss": 0.1798, "step": 52022 }, { "epoch": 0.9042917485094474, "grad_norm": 1.5056510450308331, "learning_rate": 2.3832386980425666e-08, "loss": 0.1612, "step": 52023 }, { "epoch": 0.9043091310469502, "grad_norm": 1.1415982026844749, "learning_rate": 2.3823800682298834e-08, "loss": 0.1042, "step": 52024 }, { "epoch": 0.904326513584453, "grad_norm": 1.0162530130977698, "learning_rate": 2.3815215893431663e-08, "loss": 0.0996, "step": 52025 }, { "epoch": 0.9043438961219559, "grad_norm": 2.1405857385130878, "learning_rate": 2.3806632613851574e-08, "loss": 0.2404, "step": 52026 }, { "epoch": 0.9043612786594587, "grad_norm": 1.179038257954165, "learning_rate": 2.3798050843585717e-08, "loss": 0.1242, "step": 52027 }, { "epoch": 0.9043786611969615, "grad_norm": 2.113685606577474, "learning_rate": 2.3789470582661285e-08, "loss": 0.1897, "step": 52028 }, { "epoch": 0.9043960437344644, "grad_norm": 1.5885002819203073, "learning_rate": 2.3780891831105377e-08, "loss": 0.144, "step": 52029 }, { "epoch": 0.9044134262719672, "grad_norm": 1.7698941406825976, "learning_rate": 2.3772314588945463e-08, "loss": 0.3128, "step": 52030 }, { "epoch": 0.90443080880947, "grad_norm": 1.6861561474726572, "learning_rate": 2.376373885620847e-08, "loss": 0.1776, "step": 52031 }, { "epoch": 0.9044481913469729, "grad_norm": 1.752306633657985, "learning_rate": 2.375516463292171e-08, "loss": 0.2045, "step": 52032 }, { "epoch": 0.9044655738844757, "grad_norm": 1.1034528495587328, "learning_rate": 2.374659191911216e-08, "loss": 0.1404, "step": 52033 }, { "epoch": 0.9044829564219785, "grad_norm": 1.7668872415566845, "learning_rate": 2.3738020714807293e-08, "loss": 0.1639, "step": 52034 }, { "epoch": 0.9045003389594813, "grad_norm": 1.0079601143533272, "learning_rate": 2.372945102003404e-08, "loss": 0.1216, "step": 52035 }, { "epoch": 0.9045177214969842, "grad_norm": 5.965174612048714, "learning_rate": 2.372088283481971e-08, "loss": 0.2368, "step": 52036 }, { "epoch": 0.904535104034487, "grad_norm": 4.253558831521543, "learning_rate": 2.3712316159191338e-08, "loss": 0.1894, "step": 52037 }, { "epoch": 0.9045524865719898, "grad_norm": 1.8930283401873005, "learning_rate": 2.3703750993176175e-08, "loss": 0.1763, "step": 52038 }, { "epoch": 0.9045698691094926, "grad_norm": 1.8342938434459772, "learning_rate": 2.3695187336801315e-08, "loss": 0.1752, "step": 52039 }, { "epoch": 0.9045872516469954, "grad_norm": 1.646482807326108, "learning_rate": 2.3686625190093955e-08, "loss": 0.2189, "step": 52040 }, { "epoch": 0.9046046341844982, "grad_norm": 4.227374550893938, "learning_rate": 2.3678064553081078e-08, "loss": 0.1819, "step": 52041 }, { "epoch": 0.904622016722001, "grad_norm": 2.012204578109435, "learning_rate": 2.3669505425789993e-08, "loss": 0.164, "step": 52042 }, { "epoch": 0.9046393992595039, "grad_norm": 0.9875003889435033, "learning_rate": 2.3660947808247844e-08, "loss": 0.1787, "step": 52043 }, { "epoch": 0.9046567817970067, "grad_norm": 1.0893643514516242, "learning_rate": 2.3652391700481555e-08, "loss": 0.1542, "step": 52044 }, { "epoch": 0.9046741643345095, "grad_norm": 1.8676963577476031, "learning_rate": 2.3643837102518437e-08, "loss": 0.1689, "step": 52045 }, { "epoch": 0.9046915468720124, "grad_norm": 2.248405442218007, "learning_rate": 2.363528401438558e-08, "loss": 0.1283, "step": 52046 }, { "epoch": 0.9047089294095152, "grad_norm": 1.8587879990506184, "learning_rate": 2.3626732436109965e-08, "loss": 0.1827, "step": 52047 }, { "epoch": 0.904726311947018, "grad_norm": 1.4562834073701985, "learning_rate": 2.3618182367718786e-08, "loss": 0.1134, "step": 52048 }, { "epoch": 0.9047436944845209, "grad_norm": 1.2712915938025722, "learning_rate": 2.3609633809239194e-08, "loss": 0.1952, "step": 52049 }, { "epoch": 0.9047610770220237, "grad_norm": 2.1560581267741186, "learning_rate": 2.360108676069822e-08, "loss": 0.2226, "step": 52050 }, { "epoch": 0.9047784595595265, "grad_norm": 1.2307810092863065, "learning_rate": 2.3592541222122953e-08, "loss": 0.1308, "step": 52051 }, { "epoch": 0.9047958420970293, "grad_norm": 1.12964940450886, "learning_rate": 2.358399719354037e-08, "loss": 0.2127, "step": 52052 }, { "epoch": 0.9048132246345322, "grad_norm": 1.2518737108751192, "learning_rate": 2.3575454674977792e-08, "loss": 0.1394, "step": 52053 }, { "epoch": 0.904830607172035, "grad_norm": 1.2005404906017236, "learning_rate": 2.3566913666462186e-08, "loss": 0.098, "step": 52054 }, { "epoch": 0.9048479897095378, "grad_norm": 1.3252852642378166, "learning_rate": 2.3558374168020534e-08, "loss": 0.1505, "step": 52055 }, { "epoch": 0.9048653722470407, "grad_norm": 2.0322535801278425, "learning_rate": 2.3549836179680037e-08, "loss": 0.1832, "step": 52056 }, { "epoch": 0.9048827547845435, "grad_norm": 1.097073348710485, "learning_rate": 2.3541299701467677e-08, "loss": 0.1189, "step": 52057 }, { "epoch": 0.9049001373220463, "grad_norm": 0.8645307011222596, "learning_rate": 2.353276473341054e-08, "loss": 0.1872, "step": 52058 }, { "epoch": 0.904917519859549, "grad_norm": 1.2267901684118419, "learning_rate": 2.3524231275535656e-08, "loss": 0.1312, "step": 52059 }, { "epoch": 0.9049349023970519, "grad_norm": 1.1289875866171466, "learning_rate": 2.351569932787001e-08, "loss": 0.1363, "step": 52060 }, { "epoch": 0.9049522849345547, "grad_norm": 1.4457761461372627, "learning_rate": 2.35071688904408e-08, "loss": 0.1524, "step": 52061 }, { "epoch": 0.9049696674720575, "grad_norm": 1.1036220835856103, "learning_rate": 2.3498639963275002e-08, "loss": 0.1445, "step": 52062 }, { "epoch": 0.9049870500095604, "grad_norm": 2.3274232810394486, "learning_rate": 2.3490112546399543e-08, "loss": 0.1504, "step": 52063 }, { "epoch": 0.9050044325470632, "grad_norm": 1.3596721408912096, "learning_rate": 2.3481586639841623e-08, "loss": 0.1571, "step": 52064 }, { "epoch": 0.905021815084566, "grad_norm": 1.2103040131458491, "learning_rate": 2.347306224362816e-08, "loss": 0.1237, "step": 52065 }, { "epoch": 0.9050391976220689, "grad_norm": 1.0884157434398114, "learning_rate": 2.346453935778614e-08, "loss": 0.1903, "step": 52066 }, { "epoch": 0.9050565801595717, "grad_norm": 2.141474933126976, "learning_rate": 2.3456017982342702e-08, "loss": 0.0972, "step": 52067 }, { "epoch": 0.9050739626970745, "grad_norm": 1.692326494911781, "learning_rate": 2.3447498117324715e-08, "loss": 0.2115, "step": 52068 }, { "epoch": 0.9050913452345773, "grad_norm": 1.1693588506051635, "learning_rate": 2.343897976275927e-08, "loss": 0.156, "step": 52069 }, { "epoch": 0.9051087277720802, "grad_norm": 1.208479142394957, "learning_rate": 2.34304629186734e-08, "loss": 0.1391, "step": 52070 }, { "epoch": 0.905126110309583, "grad_norm": 1.0831624713145525, "learning_rate": 2.3421947585093914e-08, "loss": 0.2683, "step": 52071 }, { "epoch": 0.9051434928470858, "grad_norm": 2.9815656044369345, "learning_rate": 2.341343376204802e-08, "loss": 0.1699, "step": 52072 }, { "epoch": 0.9051608753845887, "grad_norm": 0.8329783396485928, "learning_rate": 2.3404921449562632e-08, "loss": 0.092, "step": 52073 }, { "epoch": 0.9051782579220915, "grad_norm": 2.155381559519439, "learning_rate": 2.3396410647664733e-08, "loss": 0.1619, "step": 52074 }, { "epoch": 0.9051956404595943, "grad_norm": 1.5558449690898222, "learning_rate": 2.338790135638119e-08, "loss": 0.1347, "step": 52075 }, { "epoch": 0.9052130229970972, "grad_norm": 1.4307204010522303, "learning_rate": 2.3379393575739094e-08, "loss": 0.1649, "step": 52076 }, { "epoch": 0.9052304055346, "grad_norm": 1.3124548643251106, "learning_rate": 2.337088730576542e-08, "loss": 0.1341, "step": 52077 }, { "epoch": 0.9052477880721027, "grad_norm": 1.0369934758081145, "learning_rate": 2.3362382546487036e-08, "loss": 0.1124, "step": 52078 }, { "epoch": 0.9052651706096055, "grad_norm": 1.8314481799512372, "learning_rate": 2.335387929793092e-08, "loss": 0.1506, "step": 52079 }, { "epoch": 0.9052825531471084, "grad_norm": 0.9784939254260306, "learning_rate": 2.3345377560124114e-08, "loss": 0.1019, "step": 52080 }, { "epoch": 0.9052999356846112, "grad_norm": 0.9724246412516576, "learning_rate": 2.3336877333093585e-08, "loss": 0.1507, "step": 52081 }, { "epoch": 0.905317318222114, "grad_norm": 1.0880138856619412, "learning_rate": 2.3328378616866094e-08, "loss": 0.149, "step": 52082 }, { "epoch": 0.9053347007596169, "grad_norm": 1.0677784116194609, "learning_rate": 2.3319881411468623e-08, "loss": 0.0888, "step": 52083 }, { "epoch": 0.9053520832971197, "grad_norm": 1.4270340973132347, "learning_rate": 2.3311385716928256e-08, "loss": 0.1033, "step": 52084 }, { "epoch": 0.9053694658346225, "grad_norm": 1.808123101529467, "learning_rate": 2.3302891533271805e-08, "loss": 0.1685, "step": 52085 }, { "epoch": 0.9053868483721254, "grad_norm": 2.3498182163671957, "learning_rate": 2.32943988605262e-08, "loss": 0.1568, "step": 52086 }, { "epoch": 0.9054042309096282, "grad_norm": 2.0494150836200893, "learning_rate": 2.3285907698718356e-08, "loss": 0.1507, "step": 52087 }, { "epoch": 0.905421613447131, "grad_norm": 0.7038903717136576, "learning_rate": 2.3277418047875197e-08, "loss": 0.0944, "step": 52088 }, { "epoch": 0.9054389959846338, "grad_norm": 3.2789585969694923, "learning_rate": 2.3268929908023705e-08, "loss": 0.103, "step": 52089 }, { "epoch": 0.9054563785221367, "grad_norm": 1.8106803517443018, "learning_rate": 2.3260443279190632e-08, "loss": 0.1059, "step": 52090 }, { "epoch": 0.9054737610596395, "grad_norm": 0.9096461730732346, "learning_rate": 2.3251958161402908e-08, "loss": 0.1009, "step": 52091 }, { "epoch": 0.9054911435971423, "grad_norm": 0.9608379301261946, "learning_rate": 2.324347455468756e-08, "loss": 0.0983, "step": 52092 }, { "epoch": 0.9055085261346452, "grad_norm": 1.3554364420883809, "learning_rate": 2.3234992459071456e-08, "loss": 0.1006, "step": 52093 }, { "epoch": 0.905525908672148, "grad_norm": 1.3655675655257091, "learning_rate": 2.32265118745813e-08, "loss": 0.1247, "step": 52094 }, { "epoch": 0.9055432912096508, "grad_norm": 1.3677698925564787, "learning_rate": 2.3218032801244126e-08, "loss": 0.1097, "step": 52095 }, { "epoch": 0.9055606737471537, "grad_norm": 1.0608970872651438, "learning_rate": 2.3209555239086796e-08, "loss": 0.0923, "step": 52096 }, { "epoch": 0.9055780562846565, "grad_norm": 0.896395024604414, "learning_rate": 2.3201079188136184e-08, "loss": 0.0715, "step": 52097 }, { "epoch": 0.9055954388221592, "grad_norm": 1.3478117726589205, "learning_rate": 2.3192604648419045e-08, "loss": 0.1691, "step": 52098 }, { "epoch": 0.905612821359662, "grad_norm": 2.7903337056842186, "learning_rate": 2.3184131619962356e-08, "loss": 0.1198, "step": 52099 }, { "epoch": 0.9056302038971649, "grad_norm": 1.2173278655352615, "learning_rate": 2.3175660102792983e-08, "loss": 0.0919, "step": 52100 }, { "epoch": 0.9056475864346677, "grad_norm": 1.5872257934874676, "learning_rate": 2.316719009693774e-08, "loss": 0.1204, "step": 52101 }, { "epoch": 0.9056649689721705, "grad_norm": 1.0251024523027543, "learning_rate": 2.3158721602423325e-08, "loss": 0.1009, "step": 52102 }, { "epoch": 0.9056823515096734, "grad_norm": 1.3545728064857472, "learning_rate": 2.3150254619276833e-08, "loss": 0.1144, "step": 52103 }, { "epoch": 0.9056997340471762, "grad_norm": 1.3413335203390109, "learning_rate": 2.314178914752496e-08, "loss": 0.1187, "step": 52104 }, { "epoch": 0.905717116584679, "grad_norm": 1.7918528958865239, "learning_rate": 2.3133325187194573e-08, "loss": 0.1045, "step": 52105 }, { "epoch": 0.9057344991221818, "grad_norm": 1.1300919953679813, "learning_rate": 2.312486273831249e-08, "loss": 0.1181, "step": 52106 }, { "epoch": 0.9057518816596847, "grad_norm": 1.4958510324657024, "learning_rate": 2.3116401800905573e-08, "loss": 0.0801, "step": 52107 }, { "epoch": 0.9057692641971875, "grad_norm": 1.7901543286009154, "learning_rate": 2.3107942375000576e-08, "loss": 0.1832, "step": 52108 }, { "epoch": 0.9057866467346903, "grad_norm": 0.9853670786196441, "learning_rate": 2.3099484460624318e-08, "loss": 0.0861, "step": 52109 }, { "epoch": 0.9058040292721932, "grad_norm": 2.3098117922267036, "learning_rate": 2.3091028057803554e-08, "loss": 0.1835, "step": 52110 }, { "epoch": 0.905821411809696, "grad_norm": 1.4421471182970982, "learning_rate": 2.3082573166565256e-08, "loss": 0.1309, "step": 52111 }, { "epoch": 0.9058387943471988, "grad_norm": 1.6651292183871167, "learning_rate": 2.3074119786936132e-08, "loss": 0.1302, "step": 52112 }, { "epoch": 0.9058561768847017, "grad_norm": 1.910072641020944, "learning_rate": 2.3065667918942934e-08, "loss": 0.1111, "step": 52113 }, { "epoch": 0.9058735594222045, "grad_norm": 1.0693076236046963, "learning_rate": 2.3057217562612418e-08, "loss": 0.0728, "step": 52114 }, { "epoch": 0.9058909419597073, "grad_norm": 2.3571295544773503, "learning_rate": 2.3048768717971512e-08, "loss": 0.144, "step": 52115 }, { "epoch": 0.9059083244972101, "grad_norm": 1.5361404859904269, "learning_rate": 2.3040321385046914e-08, "loss": 0.1261, "step": 52116 }, { "epoch": 0.905925707034713, "grad_norm": 1.095364789363904, "learning_rate": 2.3031875563865377e-08, "loss": 0.1061, "step": 52117 }, { "epoch": 0.9059430895722157, "grad_norm": 1.2479857012980007, "learning_rate": 2.3023431254453663e-08, "loss": 0.0854, "step": 52118 }, { "epoch": 0.9059604721097185, "grad_norm": 2.002932550519143, "learning_rate": 2.301498845683869e-08, "loss": 0.1461, "step": 52119 }, { "epoch": 0.9059778546472214, "grad_norm": 1.8901960367746726, "learning_rate": 2.3006547171046998e-08, "loss": 0.1511, "step": 52120 }, { "epoch": 0.9059952371847242, "grad_norm": 1.6297530504699627, "learning_rate": 2.299810739710545e-08, "loss": 0.1204, "step": 52121 }, { "epoch": 0.906012619722227, "grad_norm": 0.8448630972461482, "learning_rate": 2.2989669135040745e-08, "loss": 0.1123, "step": 52122 }, { "epoch": 0.9060300022597298, "grad_norm": 1.4792637965839177, "learning_rate": 2.29812323848797e-08, "loss": 0.125, "step": 52123 }, { "epoch": 0.9060473847972327, "grad_norm": 1.9075078690077116, "learning_rate": 2.2972797146649016e-08, "loss": 0.1197, "step": 52124 }, { "epoch": 0.9060647673347355, "grad_norm": 1.4626153016828374, "learning_rate": 2.29643634203755e-08, "loss": 0.1403, "step": 52125 }, { "epoch": 0.9060821498722383, "grad_norm": 1.9119649184151977, "learning_rate": 2.2955931206085745e-08, "loss": 0.1471, "step": 52126 }, { "epoch": 0.9060995324097412, "grad_norm": 1.550956694768399, "learning_rate": 2.2947500503806615e-08, "loss": 0.1325, "step": 52127 }, { "epoch": 0.906116914947244, "grad_norm": 2.055143348585125, "learning_rate": 2.2939071313564706e-08, "loss": 0.1378, "step": 52128 }, { "epoch": 0.9061342974847468, "grad_norm": 1.6202288983595872, "learning_rate": 2.2930643635386827e-08, "loss": 0.1161, "step": 52129 }, { "epoch": 0.9061516800222497, "grad_norm": 1.2981219692404453, "learning_rate": 2.292221746929962e-08, "loss": 0.1269, "step": 52130 }, { "epoch": 0.9061690625597525, "grad_norm": 1.783574910982793, "learning_rate": 2.291379281532996e-08, "loss": 0.1232, "step": 52131 }, { "epoch": 0.9061864450972553, "grad_norm": 1.9732332830569668, "learning_rate": 2.2905369673504316e-08, "loss": 0.1299, "step": 52132 }, { "epoch": 0.9062038276347582, "grad_norm": 1.624167897509959, "learning_rate": 2.289694804384945e-08, "loss": 0.1202, "step": 52133 }, { "epoch": 0.906221210172261, "grad_norm": 1.5519740980262091, "learning_rate": 2.2888527926392178e-08, "loss": 0.1075, "step": 52134 }, { "epoch": 0.9062385927097638, "grad_norm": 1.9925468060062335, "learning_rate": 2.288010932115908e-08, "loss": 0.1248, "step": 52135 }, { "epoch": 0.9062559752472666, "grad_norm": 1.652509946578467, "learning_rate": 2.2871692228176864e-08, "loss": 0.1956, "step": 52136 }, { "epoch": 0.9062733577847695, "grad_norm": 1.294637341870785, "learning_rate": 2.2863276647472117e-08, "loss": 0.0843, "step": 52137 }, { "epoch": 0.9062907403222722, "grad_norm": 1.1152160549240475, "learning_rate": 2.2854862579071766e-08, "loss": 0.1131, "step": 52138 }, { "epoch": 0.906308122859775, "grad_norm": 1.3728532953704298, "learning_rate": 2.2846450023002284e-08, "loss": 0.1133, "step": 52139 }, { "epoch": 0.9063255053972779, "grad_norm": 1.8237345155903497, "learning_rate": 2.283803897929032e-08, "loss": 0.1669, "step": 52140 }, { "epoch": 0.9063428879347807, "grad_norm": 1.229790294709531, "learning_rate": 2.2829629447962517e-08, "loss": 0.0862, "step": 52141 }, { "epoch": 0.9063602704722835, "grad_norm": 1.1120999309127875, "learning_rate": 2.282122142904569e-08, "loss": 0.1157, "step": 52142 }, { "epoch": 0.9063776530097863, "grad_norm": 0.7323334506905429, "learning_rate": 2.281281492256637e-08, "loss": 0.0837, "step": 52143 }, { "epoch": 0.9063950355472892, "grad_norm": 1.786440497550906, "learning_rate": 2.280440992855126e-08, "loss": 0.1122, "step": 52144 }, { "epoch": 0.906412418084792, "grad_norm": 0.9945699034840259, "learning_rate": 2.2796006447026948e-08, "loss": 0.1075, "step": 52145 }, { "epoch": 0.9064298006222948, "grad_norm": 1.2897483918829382, "learning_rate": 2.2787604478020084e-08, "loss": 0.1143, "step": 52146 }, { "epoch": 0.9064471831597977, "grad_norm": 3.356995510967034, "learning_rate": 2.2779204021557254e-08, "loss": 0.2272, "step": 52147 }, { "epoch": 0.9064645656973005, "grad_norm": 1.3357918063464869, "learning_rate": 2.2770805077665156e-08, "loss": 0.1047, "step": 52148 }, { "epoch": 0.9064819482348033, "grad_norm": 1.2558377668757519, "learning_rate": 2.2762407646370385e-08, "loss": 0.12, "step": 52149 }, { "epoch": 0.9064993307723062, "grad_norm": 2.059814956474718, "learning_rate": 2.2754011727699584e-08, "loss": 0.1354, "step": 52150 }, { "epoch": 0.906516713309809, "grad_norm": 1.6379330617525771, "learning_rate": 2.2745617321679344e-08, "loss": 0.1815, "step": 52151 }, { "epoch": 0.9065340958473118, "grad_norm": 1.844568677485444, "learning_rate": 2.27372244283362e-08, "loss": 0.0945, "step": 52152 }, { "epoch": 0.9065514783848146, "grad_norm": 1.194330957803846, "learning_rate": 2.272883304769685e-08, "loss": 0.0827, "step": 52153 }, { "epoch": 0.9065688609223175, "grad_norm": 2.215424655782584, "learning_rate": 2.2720443179787885e-08, "loss": 0.1186, "step": 52154 }, { "epoch": 0.9065862434598203, "grad_norm": 1.785245712773386, "learning_rate": 2.271205482463584e-08, "loss": 0.1079, "step": 52155 }, { "epoch": 0.9066036259973231, "grad_norm": 1.7609690715378807, "learning_rate": 2.270366798226736e-08, "loss": 0.1545, "step": 52156 }, { "epoch": 0.906621008534826, "grad_norm": 1.7407276348030725, "learning_rate": 2.269528265270898e-08, "loss": 0.129, "step": 52157 }, { "epoch": 0.9066383910723287, "grad_norm": 1.9182289389096443, "learning_rate": 2.268689883598729e-08, "loss": 0.0943, "step": 52158 }, { "epoch": 0.9066557736098315, "grad_norm": 1.373601153728355, "learning_rate": 2.2678516532128933e-08, "loss": 0.1107, "step": 52159 }, { "epoch": 0.9066731561473343, "grad_norm": 1.6537523686643445, "learning_rate": 2.2670135741160278e-08, "loss": 0.0806, "step": 52160 }, { "epoch": 0.9066905386848372, "grad_norm": 1.3300351609153338, "learning_rate": 2.266175646310814e-08, "loss": 0.1051, "step": 52161 }, { "epoch": 0.90670792122234, "grad_norm": 1.0284771117220364, "learning_rate": 2.265337869799894e-08, "loss": 0.0773, "step": 52162 }, { "epoch": 0.9067253037598428, "grad_norm": 0.9833166680399074, "learning_rate": 2.264500244585932e-08, "loss": 0.1026, "step": 52163 }, { "epoch": 0.9067426862973457, "grad_norm": 1.3829959916135763, "learning_rate": 2.2636627706715593e-08, "loss": 0.1043, "step": 52164 }, { "epoch": 0.9067600688348485, "grad_norm": 0.8850097962315783, "learning_rate": 2.2628254480594577e-08, "loss": 0.0864, "step": 52165 }, { "epoch": 0.9067774513723513, "grad_norm": 2.190050453370354, "learning_rate": 2.2619882767522748e-08, "loss": 0.0966, "step": 52166 }, { "epoch": 0.9067948339098542, "grad_norm": 1.2402147933962835, "learning_rate": 2.2611512567526525e-08, "loss": 0.0843, "step": 52167 }, { "epoch": 0.906812216447357, "grad_norm": 1.022231732192044, "learning_rate": 2.2603143880632504e-08, "loss": 0.1406, "step": 52168 }, { "epoch": 0.9068295989848598, "grad_norm": 3.3302625826043766, "learning_rate": 2.2594776706867324e-08, "loss": 0.1417, "step": 52169 }, { "epoch": 0.9068469815223626, "grad_norm": 1.0815319109895485, "learning_rate": 2.258641104625736e-08, "loss": 0.0837, "step": 52170 }, { "epoch": 0.9068643640598655, "grad_norm": 1.6827355340563448, "learning_rate": 2.2578046898829138e-08, "loss": 0.1145, "step": 52171 }, { "epoch": 0.9068817465973683, "grad_norm": 1.765902143446134, "learning_rate": 2.2569684264609147e-08, "loss": 0.1544, "step": 52172 }, { "epoch": 0.9068991291348711, "grad_norm": 1.648287896678096, "learning_rate": 2.256132314362402e-08, "loss": 0.1303, "step": 52173 }, { "epoch": 0.906916511672374, "grad_norm": 2.536764787043403, "learning_rate": 2.2552963535900138e-08, "loss": 0.1335, "step": 52174 }, { "epoch": 0.9069338942098768, "grad_norm": 1.6153233520350612, "learning_rate": 2.254460544146408e-08, "loss": 0.0959, "step": 52175 }, { "epoch": 0.9069512767473796, "grad_norm": 1.7247538596166392, "learning_rate": 2.2536248860342277e-08, "loss": 0.0935, "step": 52176 }, { "epoch": 0.9069686592848825, "grad_norm": 1.6368903782187614, "learning_rate": 2.2527893792561258e-08, "loss": 0.1299, "step": 52177 }, { "epoch": 0.9069860418223852, "grad_norm": 1.1281790540114467, "learning_rate": 2.251954023814745e-08, "loss": 0.0813, "step": 52178 }, { "epoch": 0.907003424359888, "grad_norm": 1.2329878725437318, "learning_rate": 2.2511188197127383e-08, "loss": 0.1092, "step": 52179 }, { "epoch": 0.9070208068973908, "grad_norm": 1.3273775089419733, "learning_rate": 2.250283766952743e-08, "loss": 0.0959, "step": 52180 }, { "epoch": 0.9070381894348937, "grad_norm": 1.8798352931299884, "learning_rate": 2.2494488655374233e-08, "loss": 0.119, "step": 52181 }, { "epoch": 0.9070555719723965, "grad_norm": 2.0613240094254457, "learning_rate": 2.2486141154694215e-08, "loss": 0.1385, "step": 52182 }, { "epoch": 0.9070729545098993, "grad_norm": 1.3267356426016295, "learning_rate": 2.2477795167513635e-08, "loss": 0.1224, "step": 52183 }, { "epoch": 0.9070903370474022, "grad_norm": 1.9209223505231376, "learning_rate": 2.2469450693859137e-08, "loss": 0.1261, "step": 52184 }, { "epoch": 0.907107719584905, "grad_norm": 1.2470533279762768, "learning_rate": 2.2461107733757146e-08, "loss": 0.153, "step": 52185 }, { "epoch": 0.9071251021224078, "grad_norm": 1.2614549961159405, "learning_rate": 2.245276628723408e-08, "loss": 0.1334, "step": 52186 }, { "epoch": 0.9071424846599107, "grad_norm": 1.3693723527251398, "learning_rate": 2.244442635431637e-08, "loss": 0.111, "step": 52187 }, { "epoch": 0.9071598671974135, "grad_norm": 1.4331395984169037, "learning_rate": 2.243608793503049e-08, "loss": 0.1759, "step": 52188 }, { "epoch": 0.9071772497349163, "grad_norm": 1.2807170254561064, "learning_rate": 2.2427751029402807e-08, "loss": 0.0903, "step": 52189 }, { "epoch": 0.9071946322724191, "grad_norm": 1.2649942269326364, "learning_rate": 2.2419415637459748e-08, "loss": 0.1132, "step": 52190 }, { "epoch": 0.907212014809922, "grad_norm": 1.562695171442992, "learning_rate": 2.2411081759227736e-08, "loss": 0.1222, "step": 52191 }, { "epoch": 0.9072293973474248, "grad_norm": 1.8229094942295072, "learning_rate": 2.2402749394733243e-08, "loss": 0.1276, "step": 52192 }, { "epoch": 0.9072467798849276, "grad_norm": 1.2553393319749244, "learning_rate": 2.2394418544002648e-08, "loss": 0.1532, "step": 52193 }, { "epoch": 0.9072641624224305, "grad_norm": 1.2709637755814742, "learning_rate": 2.238608920706242e-08, "loss": 0.1534, "step": 52194 }, { "epoch": 0.9072815449599333, "grad_norm": 1.1988180551642504, "learning_rate": 2.2377761383938763e-08, "loss": 0.1157, "step": 52195 }, { "epoch": 0.9072989274974361, "grad_norm": 1.290967898954152, "learning_rate": 2.236943507465827e-08, "loss": 0.1173, "step": 52196 }, { "epoch": 0.907316310034939, "grad_norm": 1.4659357962375, "learning_rate": 2.2361110279247307e-08, "loss": 0.0974, "step": 52197 }, { "epoch": 0.9073336925724417, "grad_norm": 1.1664985441745568, "learning_rate": 2.2352786997732132e-08, "loss": 0.1068, "step": 52198 }, { "epoch": 0.9073510751099445, "grad_norm": 1.3089544937746738, "learning_rate": 2.234446523013922e-08, "loss": 0.1112, "step": 52199 }, { "epoch": 0.9073684576474473, "grad_norm": 1.6953607836931215, "learning_rate": 2.233614497649494e-08, "loss": 0.092, "step": 52200 }, { "epoch": 0.9073858401849502, "grad_norm": 2.559547979548645, "learning_rate": 2.2327826236825774e-08, "loss": 0.1866, "step": 52201 }, { "epoch": 0.907403222722453, "grad_norm": 1.7625277623897089, "learning_rate": 2.2319509011157865e-08, "loss": 0.1241, "step": 52202 }, { "epoch": 0.9074206052599558, "grad_norm": 1.0075106733633215, "learning_rate": 2.231119329951764e-08, "loss": 0.1665, "step": 52203 }, { "epoch": 0.9074379877974587, "grad_norm": 0.7238566570764717, "learning_rate": 2.230287910193157e-08, "loss": 0.0593, "step": 52204 }, { "epoch": 0.9074553703349615, "grad_norm": 1.2392915667049877, "learning_rate": 2.229456641842592e-08, "loss": 0.2092, "step": 52205 }, { "epoch": 0.9074727528724643, "grad_norm": 2.335971534931441, "learning_rate": 2.228625524902705e-08, "loss": 0.1756, "step": 52206 }, { "epoch": 0.9074901354099671, "grad_norm": 1.0395347117501041, "learning_rate": 2.227794559376134e-08, "loss": 0.1023, "step": 52207 }, { "epoch": 0.90750751794747, "grad_norm": 2.0327694468563613, "learning_rate": 2.226963745265509e-08, "loss": 0.1703, "step": 52208 }, { "epoch": 0.9075249004849728, "grad_norm": 0.896584695712545, "learning_rate": 2.2261330825734614e-08, "loss": 0.0882, "step": 52209 }, { "epoch": 0.9075422830224756, "grad_norm": 0.8399552005307561, "learning_rate": 2.2253025713026286e-08, "loss": 0.1179, "step": 52210 }, { "epoch": 0.9075596655599785, "grad_norm": 1.8551368265116503, "learning_rate": 2.2244722114556302e-08, "loss": 0.1707, "step": 52211 }, { "epoch": 0.9075770480974813, "grad_norm": 1.773298304114449, "learning_rate": 2.22364200303512e-08, "loss": 0.1415, "step": 52212 }, { "epoch": 0.9075944306349841, "grad_norm": 1.3186705015660871, "learning_rate": 2.2228119460437234e-08, "loss": 0.098, "step": 52213 }, { "epoch": 0.907611813172487, "grad_norm": 1.9124648459951967, "learning_rate": 2.2219820404840494e-08, "loss": 0.0974, "step": 52214 }, { "epoch": 0.9076291957099898, "grad_norm": 1.0434879727365785, "learning_rate": 2.221152286358757e-08, "loss": 0.0879, "step": 52215 }, { "epoch": 0.9076465782474926, "grad_norm": 1.1135274422810129, "learning_rate": 2.2203226836704614e-08, "loss": 0.1332, "step": 52216 }, { "epoch": 0.9076639607849953, "grad_norm": 0.9688154202916853, "learning_rate": 2.219493232421793e-08, "loss": 0.1287, "step": 52217 }, { "epoch": 0.9076813433224982, "grad_norm": 1.2004150231073871, "learning_rate": 2.2186639326153834e-08, "loss": 0.1947, "step": 52218 }, { "epoch": 0.907698725860001, "grad_norm": 1.141575259568911, "learning_rate": 2.217834784253858e-08, "loss": 0.1113, "step": 52219 }, { "epoch": 0.9077161083975038, "grad_norm": 1.192843631453792, "learning_rate": 2.217005787339854e-08, "loss": 0.075, "step": 52220 }, { "epoch": 0.9077334909350067, "grad_norm": 0.9968426029658624, "learning_rate": 2.2161769418759857e-08, "loss": 0.0995, "step": 52221 }, { "epoch": 0.9077508734725095, "grad_norm": 1.500085517846302, "learning_rate": 2.2153482478648844e-08, "loss": 0.1019, "step": 52222 }, { "epoch": 0.9077682560100123, "grad_norm": 1.0299814897392723, "learning_rate": 2.2145197053091814e-08, "loss": 0.1216, "step": 52223 }, { "epoch": 0.9077856385475152, "grad_norm": 1.6392062104094922, "learning_rate": 2.2136913142115023e-08, "loss": 0.0932, "step": 52224 }, { "epoch": 0.907803021085018, "grad_norm": 1.7551674914638593, "learning_rate": 2.212863074574467e-08, "loss": 0.1675, "step": 52225 }, { "epoch": 0.9078204036225208, "grad_norm": 1.9064966433161221, "learning_rate": 2.212034986400707e-08, "loss": 0.1257, "step": 52226 }, { "epoch": 0.9078377861600236, "grad_norm": 0.9305442979590038, "learning_rate": 2.2112070496928424e-08, "loss": 0.1587, "step": 52227 }, { "epoch": 0.9078551686975265, "grad_norm": 0.8763664704101863, "learning_rate": 2.2103792644535047e-08, "loss": 0.1125, "step": 52228 }, { "epoch": 0.9078725512350293, "grad_norm": 1.6615870677682323, "learning_rate": 2.209551630685308e-08, "loss": 0.1396, "step": 52229 }, { "epoch": 0.9078899337725321, "grad_norm": 1.3895334491602556, "learning_rate": 2.2087241483908724e-08, "loss": 0.109, "step": 52230 }, { "epoch": 0.907907316310035, "grad_norm": 0.980643489589946, "learning_rate": 2.2078968175728352e-08, "loss": 0.0875, "step": 52231 }, { "epoch": 0.9079246988475378, "grad_norm": 2.2100463449113548, "learning_rate": 2.207069638233816e-08, "loss": 0.1165, "step": 52232 }, { "epoch": 0.9079420813850406, "grad_norm": 1.3946996656165422, "learning_rate": 2.2062426103764186e-08, "loss": 0.1355, "step": 52233 }, { "epoch": 0.9079594639225435, "grad_norm": 1.4425075009053487, "learning_rate": 2.2054157340032854e-08, "loss": 0.1499, "step": 52234 }, { "epoch": 0.9079768464600463, "grad_norm": 1.7976588225525731, "learning_rate": 2.2045890091170304e-08, "loss": 0.0953, "step": 52235 }, { "epoch": 0.9079942289975491, "grad_norm": 1.3154576926307462, "learning_rate": 2.203762435720269e-08, "loss": 0.1387, "step": 52236 }, { "epoch": 0.9080116115350518, "grad_norm": 0.8477977989517317, "learning_rate": 2.2029360138156262e-08, "loss": 0.1454, "step": 52237 }, { "epoch": 0.9080289940725547, "grad_norm": 3.034998089198481, "learning_rate": 2.2021097434057167e-08, "loss": 0.1899, "step": 52238 }, { "epoch": 0.9080463766100575, "grad_norm": 0.6821230779005414, "learning_rate": 2.201283624493172e-08, "loss": 0.125, "step": 52239 }, { "epoch": 0.9080637591475603, "grad_norm": 0.965886092421947, "learning_rate": 2.2004576570805956e-08, "loss": 0.076, "step": 52240 }, { "epoch": 0.9080811416850632, "grad_norm": 1.5923759140533966, "learning_rate": 2.199631841170607e-08, "loss": 0.1278, "step": 52241 }, { "epoch": 0.908098524222566, "grad_norm": 0.8323513048799916, "learning_rate": 2.1988061767658328e-08, "loss": 0.1181, "step": 52242 }, { "epoch": 0.9081159067600688, "grad_norm": 1.1656997818125772, "learning_rate": 2.197980663868887e-08, "loss": 0.0896, "step": 52243 }, { "epoch": 0.9081332892975716, "grad_norm": 1.0303777888630328, "learning_rate": 2.1971553024823786e-08, "loss": 0.1404, "step": 52244 }, { "epoch": 0.9081506718350745, "grad_norm": 1.2688670555065686, "learning_rate": 2.1963300926089334e-08, "loss": 0.1716, "step": 52245 }, { "epoch": 0.9081680543725773, "grad_norm": 3.6484770450848356, "learning_rate": 2.1955050342511606e-08, "loss": 0.1549, "step": 52246 }, { "epoch": 0.9081854369100801, "grad_norm": 7.953057433062742, "learning_rate": 2.19468012741168e-08, "loss": 0.1919, "step": 52247 }, { "epoch": 0.908202819447583, "grad_norm": 1.1613604031899794, "learning_rate": 2.193855372093101e-08, "loss": 0.1006, "step": 52248 }, { "epoch": 0.9082202019850858, "grad_norm": 1.075045032936473, "learning_rate": 2.1930307682980374e-08, "loss": 0.1239, "step": 52249 }, { "epoch": 0.9082375845225886, "grad_norm": 2.2435192365560623, "learning_rate": 2.192206316029105e-08, "loss": 0.1108, "step": 52250 }, { "epoch": 0.9082549670600915, "grad_norm": 1.1122532236895706, "learning_rate": 2.1913820152889284e-08, "loss": 0.1263, "step": 52251 }, { "epoch": 0.9082723495975943, "grad_norm": 0.7937809678127533, "learning_rate": 2.190557866080106e-08, "loss": 0.1178, "step": 52252 }, { "epoch": 0.9082897321350971, "grad_norm": 2.6861359917523435, "learning_rate": 2.1897338684052412e-08, "loss": 0.1635, "step": 52253 }, { "epoch": 0.9083071146726, "grad_norm": 2.945342959094561, "learning_rate": 2.1889100222669653e-08, "loss": 0.1353, "step": 52254 }, { "epoch": 0.9083244972101028, "grad_norm": 0.9245098639283044, "learning_rate": 2.1880863276678874e-08, "loss": 0.0873, "step": 52255 }, { "epoch": 0.9083418797476056, "grad_norm": 1.2157993466337211, "learning_rate": 2.1872627846106052e-08, "loss": 0.0992, "step": 52256 }, { "epoch": 0.9083592622851083, "grad_norm": 1.2449656911246765, "learning_rate": 2.1864393930977333e-08, "loss": 0.1207, "step": 52257 }, { "epoch": 0.9083766448226112, "grad_norm": 0.7696050099525722, "learning_rate": 2.185616153131897e-08, "loss": 0.0785, "step": 52258 }, { "epoch": 0.908394027360114, "grad_norm": 1.7559291624081617, "learning_rate": 2.1847930647156898e-08, "loss": 0.1309, "step": 52259 }, { "epoch": 0.9084114098976168, "grad_norm": 2.2497127644687613, "learning_rate": 2.1839701278517198e-08, "loss": 0.1819, "step": 52260 }, { "epoch": 0.9084287924351196, "grad_norm": 1.3687068474745017, "learning_rate": 2.183147342542596e-08, "loss": 0.1359, "step": 52261 }, { "epoch": 0.9084461749726225, "grad_norm": 1.10246538334058, "learning_rate": 2.182324708790939e-08, "loss": 0.2536, "step": 52262 }, { "epoch": 0.9084635575101253, "grad_norm": 1.9658912341081123, "learning_rate": 2.181502226599341e-08, "loss": 0.1449, "step": 52263 }, { "epoch": 0.9084809400476281, "grad_norm": 0.7741100322239634, "learning_rate": 2.1806798959704165e-08, "loss": 0.1062, "step": 52264 }, { "epoch": 0.908498322585131, "grad_norm": 1.313999499319314, "learning_rate": 2.1798577169067743e-08, "loss": 0.1392, "step": 52265 }, { "epoch": 0.9085157051226338, "grad_norm": 2.8054773958797434, "learning_rate": 2.1790356894110128e-08, "loss": 0.1454, "step": 52266 }, { "epoch": 0.9085330876601366, "grad_norm": 1.7094175930822535, "learning_rate": 2.178213813485741e-08, "loss": 0.1462, "step": 52267 }, { "epoch": 0.9085504701976395, "grad_norm": 0.9729441499041013, "learning_rate": 2.1773920891335617e-08, "loss": 0.0987, "step": 52268 }, { "epoch": 0.9085678527351423, "grad_norm": 0.8126826919078762, "learning_rate": 2.176570516357079e-08, "loss": 0.0909, "step": 52269 }, { "epoch": 0.9085852352726451, "grad_norm": 1.398061880967051, "learning_rate": 2.175749095158913e-08, "loss": 0.1354, "step": 52270 }, { "epoch": 0.908602617810148, "grad_norm": 1.5123858955482892, "learning_rate": 2.1749278255416447e-08, "loss": 0.161, "step": 52271 }, { "epoch": 0.9086200003476508, "grad_norm": 0.8813997327152739, "learning_rate": 2.1741067075078834e-08, "loss": 0.1097, "step": 52272 }, { "epoch": 0.9086373828851536, "grad_norm": 1.456357470224862, "learning_rate": 2.173285741060238e-08, "loss": 0.1362, "step": 52273 }, { "epoch": 0.9086547654226564, "grad_norm": 0.7634106620430555, "learning_rate": 2.172464926201306e-08, "loss": 0.1508, "step": 52274 }, { "epoch": 0.9086721479601593, "grad_norm": 1.5902595667904187, "learning_rate": 2.1716442629336916e-08, "loss": 0.1533, "step": 52275 }, { "epoch": 0.9086895304976621, "grad_norm": 1.3603655986202048, "learning_rate": 2.1708237512599926e-08, "loss": 0.1243, "step": 52276 }, { "epoch": 0.9087069130351648, "grad_norm": 1.3391525663155537, "learning_rate": 2.170003391182812e-08, "loss": 0.1371, "step": 52277 }, { "epoch": 0.9087242955726677, "grad_norm": 1.5158725381285798, "learning_rate": 2.1691831827047535e-08, "loss": 0.134, "step": 52278 }, { "epoch": 0.9087416781101705, "grad_norm": 1.2033988533888118, "learning_rate": 2.1683631258284042e-08, "loss": 0.1003, "step": 52279 }, { "epoch": 0.9087590606476733, "grad_norm": 0.9033837243519639, "learning_rate": 2.1675432205563725e-08, "loss": 0.1097, "step": 52280 }, { "epoch": 0.9087764431851761, "grad_norm": 2.769113248160559, "learning_rate": 2.1667234668912625e-08, "loss": 0.1558, "step": 52281 }, { "epoch": 0.908793825722679, "grad_norm": 1.427717383299334, "learning_rate": 2.165903864835661e-08, "loss": 0.1013, "step": 52282 }, { "epoch": 0.9088112082601818, "grad_norm": 2.1878963898887824, "learning_rate": 2.165084414392182e-08, "loss": 0.1351, "step": 52283 }, { "epoch": 0.9088285907976846, "grad_norm": 2.529877230741795, "learning_rate": 2.164265115563396e-08, "loss": 0.131, "step": 52284 }, { "epoch": 0.9088459733351875, "grad_norm": 1.6467336216619903, "learning_rate": 2.1634459683519237e-08, "loss": 0.1679, "step": 52285 }, { "epoch": 0.9088633558726903, "grad_norm": 1.5358524920829535, "learning_rate": 2.1626269727603562e-08, "loss": 0.135, "step": 52286 }, { "epoch": 0.9088807384101931, "grad_norm": 1.707819203395895, "learning_rate": 2.1618081287912817e-08, "loss": 0.1197, "step": 52287 }, { "epoch": 0.908898120947696, "grad_norm": 1.7749344625707684, "learning_rate": 2.160989436447297e-08, "loss": 0.1197, "step": 52288 }, { "epoch": 0.9089155034851988, "grad_norm": 1.5652212323482115, "learning_rate": 2.1601708957310116e-08, "loss": 0.1783, "step": 52289 }, { "epoch": 0.9089328860227016, "grad_norm": 1.0529371133501253, "learning_rate": 2.1593525066450015e-08, "loss": 0.1068, "step": 52290 }, { "epoch": 0.9089502685602044, "grad_norm": 1.0082667072810736, "learning_rate": 2.1585342691918696e-08, "loss": 0.1025, "step": 52291 }, { "epoch": 0.9089676510977073, "grad_norm": 2.1786747142235643, "learning_rate": 2.157716183374203e-08, "loss": 0.0902, "step": 52292 }, { "epoch": 0.9089850336352101, "grad_norm": 1.5986879645368148, "learning_rate": 2.1568982491946052e-08, "loss": 0.1197, "step": 52293 }, { "epoch": 0.9090024161727129, "grad_norm": 1.2714193022919864, "learning_rate": 2.1560804666556574e-08, "loss": 0.1034, "step": 52294 }, { "epoch": 0.9090197987102158, "grad_norm": 1.6931669858519138, "learning_rate": 2.1552628357599634e-08, "loss": 0.1262, "step": 52295 }, { "epoch": 0.9090371812477186, "grad_norm": 1.1460164834579576, "learning_rate": 2.1544453565101038e-08, "loss": 0.0991, "step": 52296 }, { "epoch": 0.9090545637852213, "grad_norm": 1.9275858646005697, "learning_rate": 2.153628028908672e-08, "loss": 0.186, "step": 52297 }, { "epoch": 0.9090719463227241, "grad_norm": 0.7815137375714021, "learning_rate": 2.1528108529582645e-08, "loss": 0.1098, "step": 52298 }, { "epoch": 0.909089328860227, "grad_norm": 1.2021210236930393, "learning_rate": 2.151993828661469e-08, "loss": 0.1761, "step": 52299 }, { "epoch": 0.9091067113977298, "grad_norm": 1.5617967940797084, "learning_rate": 2.1511769560208616e-08, "loss": 0.1518, "step": 52300 }, { "epoch": 0.9091240939352326, "grad_norm": 2.74434069870228, "learning_rate": 2.1503602350390505e-08, "loss": 0.1411, "step": 52301 }, { "epoch": 0.9091414764727355, "grad_norm": 1.248335091449753, "learning_rate": 2.1495436657186283e-08, "loss": 0.1422, "step": 52302 }, { "epoch": 0.9091588590102383, "grad_norm": 2.237829846634422, "learning_rate": 2.1487272480621542e-08, "loss": 0.113, "step": 52303 }, { "epoch": 0.9091762415477411, "grad_norm": 1.875705698378056, "learning_rate": 2.147910982072243e-08, "loss": 0.1557, "step": 52304 }, { "epoch": 0.909193624085244, "grad_norm": 1.217371197050207, "learning_rate": 2.1470948677514755e-08, "loss": 0.0965, "step": 52305 }, { "epoch": 0.9092110066227468, "grad_norm": 2.367599758442631, "learning_rate": 2.1462789051024276e-08, "loss": 0.153, "step": 52306 }, { "epoch": 0.9092283891602496, "grad_norm": 1.3009558679795168, "learning_rate": 2.1454630941276973e-08, "loss": 0.1274, "step": 52307 }, { "epoch": 0.9092457716977524, "grad_norm": 1.2343222334333754, "learning_rate": 2.144647434829866e-08, "loss": 0.0906, "step": 52308 }, { "epoch": 0.9092631542352553, "grad_norm": 1.1714612053866922, "learning_rate": 2.1438319272115203e-08, "loss": 0.17, "step": 52309 }, { "epoch": 0.9092805367727581, "grad_norm": 2.627827192201855, "learning_rate": 2.1430165712752467e-08, "loss": 0.1583, "step": 52310 }, { "epoch": 0.9092979193102609, "grad_norm": 1.3406251031098042, "learning_rate": 2.142201367023616e-08, "loss": 0.1386, "step": 52311 }, { "epoch": 0.9093153018477638, "grad_norm": 1.0414830371668986, "learning_rate": 2.1413863144592314e-08, "loss": 0.1775, "step": 52312 }, { "epoch": 0.9093326843852666, "grad_norm": 1.94492199584556, "learning_rate": 2.1405714135846686e-08, "loss": 0.2217, "step": 52313 }, { "epoch": 0.9093500669227694, "grad_norm": 1.1720177563481962, "learning_rate": 2.1397566644025145e-08, "loss": 0.117, "step": 52314 }, { "epoch": 0.9093674494602723, "grad_norm": 2.2872383306818227, "learning_rate": 2.1389420669153334e-08, "loss": 0.1804, "step": 52315 }, { "epoch": 0.9093848319977751, "grad_norm": 1.6571373163869019, "learning_rate": 2.1381276211257293e-08, "loss": 0.1177, "step": 52316 }, { "epoch": 0.9094022145352778, "grad_norm": 3.0568740389982727, "learning_rate": 2.137313327036272e-08, "loss": 0.1401, "step": 52317 }, { "epoch": 0.9094195970727806, "grad_norm": 0.8313494675346157, "learning_rate": 2.136499184649543e-08, "loss": 0.0627, "step": 52318 }, { "epoch": 0.9094369796102835, "grad_norm": 1.5555737247933576, "learning_rate": 2.1356851939681232e-08, "loss": 0.0834, "step": 52319 }, { "epoch": 0.9094543621477863, "grad_norm": 0.9609099017501751, "learning_rate": 2.1348713549945997e-08, "loss": 0.0914, "step": 52320 }, { "epoch": 0.9094717446852891, "grad_norm": 0.7570164074054633, "learning_rate": 2.134057667731548e-08, "loss": 0.0729, "step": 52321 }, { "epoch": 0.909489127222792, "grad_norm": 1.4312407496480781, "learning_rate": 2.133244132181544e-08, "loss": 0.0976, "step": 52322 }, { "epoch": 0.9095065097602948, "grad_norm": 1.0008029654020005, "learning_rate": 2.1324307483471638e-08, "loss": 0.0871, "step": 52323 }, { "epoch": 0.9095238922977976, "grad_norm": 1.970480481615563, "learning_rate": 2.1316175162309936e-08, "loss": 0.0878, "step": 52324 }, { "epoch": 0.9095412748353005, "grad_norm": 1.6359468225064928, "learning_rate": 2.1308044358356038e-08, "loss": 0.0968, "step": 52325 }, { "epoch": 0.9095586573728033, "grad_norm": 1.1520717772810272, "learning_rate": 2.1299915071635755e-08, "loss": 0.093, "step": 52326 }, { "epoch": 0.9095760399103061, "grad_norm": 1.4023230267890892, "learning_rate": 2.1291787302174902e-08, "loss": 0.1019, "step": 52327 }, { "epoch": 0.9095934224478089, "grad_norm": 4.648925428308229, "learning_rate": 2.1283661049999126e-08, "loss": 0.0973, "step": 52328 }, { "epoch": 0.9096108049853118, "grad_norm": 4.412947869458873, "learning_rate": 2.1275536315134236e-08, "loss": 0.2146, "step": 52329 }, { "epoch": 0.9096281875228146, "grad_norm": 1.1806223014854398, "learning_rate": 2.1267413097605936e-08, "loss": 0.1168, "step": 52330 }, { "epoch": 0.9096455700603174, "grad_norm": 1.4039309833969325, "learning_rate": 2.1259291397440093e-08, "loss": 0.1253, "step": 52331 }, { "epoch": 0.9096629525978203, "grad_norm": 1.5185417708466495, "learning_rate": 2.1251171214662413e-08, "loss": 0.0944, "step": 52332 }, { "epoch": 0.9096803351353231, "grad_norm": 0.9684808605525806, "learning_rate": 2.1243052549298647e-08, "loss": 0.0863, "step": 52333 }, { "epoch": 0.9096977176728259, "grad_norm": 1.3247705335831437, "learning_rate": 2.123493540137433e-08, "loss": 0.0937, "step": 52334 }, { "epoch": 0.9097151002103288, "grad_norm": 1.210225775762611, "learning_rate": 2.1226819770915394e-08, "loss": 0.0615, "step": 52335 }, { "epoch": 0.9097324827478316, "grad_norm": 1.3472386943317403, "learning_rate": 2.1218705657947534e-08, "loss": 0.1733, "step": 52336 }, { "epoch": 0.9097498652853343, "grad_norm": 1.2384029604764903, "learning_rate": 2.121059306249645e-08, "loss": 0.073, "step": 52337 }, { "epoch": 0.9097672478228371, "grad_norm": 1.542118221809573, "learning_rate": 2.1202481984587794e-08, "loss": 0.1262, "step": 52338 }, { "epoch": 0.90978463036034, "grad_norm": 1.4033511778750802, "learning_rate": 2.1194372424247374e-08, "loss": 0.1107, "step": 52339 }, { "epoch": 0.9098020128978428, "grad_norm": 2.02731015140192, "learning_rate": 2.1186264381500897e-08, "loss": 0.1013, "step": 52340 }, { "epoch": 0.9098193954353456, "grad_norm": 2.558417367770108, "learning_rate": 2.1178157856373947e-08, "loss": 0.1143, "step": 52341 }, { "epoch": 0.9098367779728485, "grad_norm": 1.6503178908342953, "learning_rate": 2.1170052848892228e-08, "loss": 0.1271, "step": 52342 }, { "epoch": 0.9098541605103513, "grad_norm": 1.1485971676597324, "learning_rate": 2.1161949359081554e-08, "loss": 0.0931, "step": 52343 }, { "epoch": 0.9098715430478541, "grad_norm": 5.940684091385251, "learning_rate": 2.115384738696757e-08, "loss": 0.2165, "step": 52344 }, { "epoch": 0.909888925585357, "grad_norm": 1.8741576122294206, "learning_rate": 2.1145746932575924e-08, "loss": 0.2676, "step": 52345 }, { "epoch": 0.9099063081228598, "grad_norm": 1.0170847427598755, "learning_rate": 2.113764799593226e-08, "loss": 0.0815, "step": 52346 }, { "epoch": 0.9099236906603626, "grad_norm": 2.134184439797418, "learning_rate": 2.1129550577062282e-08, "loss": 0.1883, "step": 52347 }, { "epoch": 0.9099410731978654, "grad_norm": 1.2431457115361488, "learning_rate": 2.1121454675991634e-08, "loss": 0.1038, "step": 52348 }, { "epoch": 0.9099584557353683, "grad_norm": 2.183707154374189, "learning_rate": 2.111336029274602e-08, "loss": 0.1327, "step": 52349 }, { "epoch": 0.9099758382728711, "grad_norm": 1.3861213485954942, "learning_rate": 2.1105267427351027e-08, "loss": 0.1166, "step": 52350 }, { "epoch": 0.9099932208103739, "grad_norm": 1.3893189561510944, "learning_rate": 2.1097176079832412e-08, "loss": 0.0917, "step": 52351 }, { "epoch": 0.9100106033478768, "grad_norm": 1.3439796637907504, "learning_rate": 2.108908625021577e-08, "loss": 0.087, "step": 52352 }, { "epoch": 0.9100279858853796, "grad_norm": 1.8501396554949614, "learning_rate": 2.108099793852669e-08, "loss": 0.1158, "step": 52353 }, { "epoch": 0.9100453684228824, "grad_norm": 1.746484614828881, "learning_rate": 2.1072911144790817e-08, "loss": 0.1481, "step": 52354 }, { "epoch": 0.9100627509603852, "grad_norm": 1.4959621204082274, "learning_rate": 2.1064825869033908e-08, "loss": 0.1094, "step": 52355 }, { "epoch": 0.910080133497888, "grad_norm": 1.1128117175666081, "learning_rate": 2.1056742111281443e-08, "loss": 0.1065, "step": 52356 }, { "epoch": 0.9100975160353908, "grad_norm": 0.8372332127237759, "learning_rate": 2.104865987155907e-08, "loss": 0.1119, "step": 52357 }, { "epoch": 0.9101148985728936, "grad_norm": 8.244062550248598, "learning_rate": 2.1040579149892434e-08, "loss": 0.3156, "step": 52358 }, { "epoch": 0.9101322811103965, "grad_norm": 2.0452538526158444, "learning_rate": 2.1032499946307237e-08, "loss": 0.1115, "step": 52359 }, { "epoch": 0.9101496636478993, "grad_norm": 2.931840063831108, "learning_rate": 2.10244222608289e-08, "loss": 0.1711, "step": 52360 }, { "epoch": 0.9101670461854021, "grad_norm": 1.4243919691116456, "learning_rate": 2.1016346093483128e-08, "loss": 0.1297, "step": 52361 }, { "epoch": 0.910184428722905, "grad_norm": 3.1596607193878006, "learning_rate": 2.100827144429551e-08, "loss": 0.1093, "step": 52362 }, { "epoch": 0.9102018112604078, "grad_norm": 1.0903482969426115, "learning_rate": 2.1000198313291695e-08, "loss": 0.1064, "step": 52363 }, { "epoch": 0.9102191937979106, "grad_norm": 1.232140621258013, "learning_rate": 2.0992126700497214e-08, "loss": 0.1005, "step": 52364 }, { "epoch": 0.9102365763354134, "grad_norm": 0.8798803946290173, "learning_rate": 2.0984056605937604e-08, "loss": 0.1133, "step": 52365 }, { "epoch": 0.9102539588729163, "grad_norm": 1.650199451582048, "learning_rate": 2.0975988029638515e-08, "loss": 0.1174, "step": 52366 }, { "epoch": 0.9102713414104191, "grad_norm": 1.8943830294333057, "learning_rate": 2.096792097162553e-08, "loss": 0.121, "step": 52367 }, { "epoch": 0.9102887239479219, "grad_norm": 1.3118218209106272, "learning_rate": 2.0959855431924133e-08, "loss": 0.1142, "step": 52368 }, { "epoch": 0.9103061064854248, "grad_norm": 1.4043896214707592, "learning_rate": 2.0951791410559914e-08, "loss": 0.1444, "step": 52369 }, { "epoch": 0.9103234890229276, "grad_norm": 0.743558107724935, "learning_rate": 2.0943728907558522e-08, "loss": 0.1106, "step": 52370 }, { "epoch": 0.9103408715604304, "grad_norm": 1.399739161458369, "learning_rate": 2.093566792294549e-08, "loss": 0.1404, "step": 52371 }, { "epoch": 0.9103582540979333, "grad_norm": 3.345569147046888, "learning_rate": 2.0927608456746292e-08, "loss": 0.2221, "step": 52372 }, { "epoch": 0.9103756366354361, "grad_norm": 1.3403273383792205, "learning_rate": 2.0919550508986416e-08, "loss": 0.1146, "step": 52373 }, { "epoch": 0.9103930191729389, "grad_norm": 2.394561091030133, "learning_rate": 2.0911494079691616e-08, "loss": 0.1292, "step": 52374 }, { "epoch": 0.9104104017104417, "grad_norm": 1.1004536167573562, "learning_rate": 2.090343916888726e-08, "loss": 0.1536, "step": 52375 }, { "epoch": 0.9104277842479445, "grad_norm": 1.0869107511416056, "learning_rate": 2.0895385776598885e-08, "loss": 0.1411, "step": 52376 }, { "epoch": 0.9104451667854473, "grad_norm": 1.443897072296829, "learning_rate": 2.0887333902852022e-08, "loss": 0.1124, "step": 52377 }, { "epoch": 0.9104625493229501, "grad_norm": 2.442220518337119, "learning_rate": 2.0879283547672378e-08, "loss": 0.1875, "step": 52378 }, { "epoch": 0.910479931860453, "grad_norm": 1.1705302909127975, "learning_rate": 2.0871234711085205e-08, "loss": 0.1016, "step": 52379 }, { "epoch": 0.9104973143979558, "grad_norm": 1.1700198178345644, "learning_rate": 2.0863187393116156e-08, "loss": 0.128, "step": 52380 }, { "epoch": 0.9105146969354586, "grad_norm": 1.5207518139382399, "learning_rate": 2.085514159379065e-08, "loss": 0.1284, "step": 52381 }, { "epoch": 0.9105320794729614, "grad_norm": 0.9913267693739451, "learning_rate": 2.0847097313134276e-08, "loss": 0.1137, "step": 52382 }, { "epoch": 0.9105494620104643, "grad_norm": 1.728434761052855, "learning_rate": 2.0839054551172517e-08, "loss": 0.1838, "step": 52383 }, { "epoch": 0.9105668445479671, "grad_norm": 1.2758031063015225, "learning_rate": 2.0831013307930855e-08, "loss": 0.0941, "step": 52384 }, { "epoch": 0.9105842270854699, "grad_norm": 1.669979474910624, "learning_rate": 2.0822973583434767e-08, "loss": 0.1197, "step": 52385 }, { "epoch": 0.9106016096229728, "grad_norm": 1.115892165748394, "learning_rate": 2.081493537770973e-08, "loss": 0.0867, "step": 52386 }, { "epoch": 0.9106189921604756, "grad_norm": 1.1734686298950912, "learning_rate": 2.0806898690781226e-08, "loss": 0.0991, "step": 52387 }, { "epoch": 0.9106363746979784, "grad_norm": 1.2254529287320874, "learning_rate": 2.0798863522674735e-08, "loss": 0.1011, "step": 52388 }, { "epoch": 0.9106537572354813, "grad_norm": 1.3914143097634277, "learning_rate": 2.0790829873415683e-08, "loss": 0.1534, "step": 52389 }, { "epoch": 0.9106711397729841, "grad_norm": 1.9092947956391382, "learning_rate": 2.0782797743029656e-08, "loss": 0.1543, "step": 52390 }, { "epoch": 0.9106885223104869, "grad_norm": 1.4822979060855006, "learning_rate": 2.077476713154197e-08, "loss": 0.0953, "step": 52391 }, { "epoch": 0.9107059048479897, "grad_norm": 1.3021282362491782, "learning_rate": 2.076673803897805e-08, "loss": 0.2011, "step": 52392 }, { "epoch": 0.9107232873854926, "grad_norm": 1.3930463385737661, "learning_rate": 2.075871046536354e-08, "loss": 0.1399, "step": 52393 }, { "epoch": 0.9107406699229954, "grad_norm": 1.1512852470376735, "learning_rate": 2.0750684410723752e-08, "loss": 0.1402, "step": 52394 }, { "epoch": 0.9107580524604982, "grad_norm": 1.0200290785533133, "learning_rate": 2.074265987508411e-08, "loss": 0.101, "step": 52395 }, { "epoch": 0.910775434998001, "grad_norm": 0.7324152308631434, "learning_rate": 2.0734636858470155e-08, "loss": 0.0703, "step": 52396 }, { "epoch": 0.9107928175355038, "grad_norm": 2.3304515357208344, "learning_rate": 2.0726615360907197e-08, "loss": 0.2373, "step": 52397 }, { "epoch": 0.9108102000730066, "grad_norm": 1.742222157203996, "learning_rate": 2.0718595382420713e-08, "loss": 0.1314, "step": 52398 }, { "epoch": 0.9108275826105094, "grad_norm": 1.8922454542877432, "learning_rate": 2.0710576923036128e-08, "loss": 0.1893, "step": 52399 }, { "epoch": 0.9108449651480123, "grad_norm": 1.3549773348218248, "learning_rate": 2.0702559982778757e-08, "loss": 0.1276, "step": 52400 }, { "epoch": 0.9108623476855151, "grad_norm": 1.1679100727773488, "learning_rate": 2.069454456167419e-08, "loss": 0.0995, "step": 52401 }, { "epoch": 0.9108797302230179, "grad_norm": 1.2292601256228257, "learning_rate": 2.0686530659747737e-08, "loss": 0.1351, "step": 52402 }, { "epoch": 0.9108971127605208, "grad_norm": 1.6756148444312928, "learning_rate": 2.0678518277024827e-08, "loss": 0.0868, "step": 52403 }, { "epoch": 0.9109144952980236, "grad_norm": 1.4604228811202915, "learning_rate": 2.0670507413530768e-08, "loss": 0.0935, "step": 52404 }, { "epoch": 0.9109318778355264, "grad_norm": 1.4904907009369872, "learning_rate": 2.0662498069291047e-08, "loss": 0.1291, "step": 52405 }, { "epoch": 0.9109492603730293, "grad_norm": 1.4040689051979123, "learning_rate": 2.0654490244331023e-08, "loss": 0.171, "step": 52406 }, { "epoch": 0.9109666429105321, "grad_norm": 1.2995391989980496, "learning_rate": 2.0646483938676072e-08, "loss": 0.1731, "step": 52407 }, { "epoch": 0.9109840254480349, "grad_norm": 1.344672559946033, "learning_rate": 2.0638479152351508e-08, "loss": 0.1217, "step": 52408 }, { "epoch": 0.9110014079855377, "grad_norm": 1.29133330979309, "learning_rate": 2.063047588538286e-08, "loss": 0.1139, "step": 52409 }, { "epoch": 0.9110187905230406, "grad_norm": 1.3439171244788093, "learning_rate": 2.0622474137795386e-08, "loss": 0.1395, "step": 52410 }, { "epoch": 0.9110361730605434, "grad_norm": 1.2585139807487182, "learning_rate": 2.061447390961446e-08, "loss": 0.1087, "step": 52411 }, { "epoch": 0.9110535555980462, "grad_norm": 1.394469484065524, "learning_rate": 2.0606475200865335e-08, "loss": 0.1206, "step": 52412 }, { "epoch": 0.9110709381355491, "grad_norm": 1.320724737816823, "learning_rate": 2.059847801157355e-08, "loss": 0.1318, "step": 52413 }, { "epoch": 0.9110883206730519, "grad_norm": 0.8000160240518864, "learning_rate": 2.059048234176436e-08, "loss": 0.1175, "step": 52414 }, { "epoch": 0.9111057032105547, "grad_norm": 1.382336675015568, "learning_rate": 2.058248819146313e-08, "loss": 0.1161, "step": 52415 }, { "epoch": 0.9111230857480574, "grad_norm": 3.0189715273905042, "learning_rate": 2.0574495560695182e-08, "loss": 0.1749, "step": 52416 }, { "epoch": 0.9111404682855603, "grad_norm": 1.4075586943280762, "learning_rate": 2.0566504449485877e-08, "loss": 0.1553, "step": 52417 }, { "epoch": 0.9111578508230631, "grad_norm": 1.851557911722372, "learning_rate": 2.0558514857860476e-08, "loss": 0.1447, "step": 52418 }, { "epoch": 0.9111752333605659, "grad_norm": 1.1001488321356045, "learning_rate": 2.0550526785844347e-08, "loss": 0.107, "step": 52419 }, { "epoch": 0.9111926158980688, "grad_norm": 1.0970638573446652, "learning_rate": 2.05425402334628e-08, "loss": 0.1099, "step": 52420 }, { "epoch": 0.9112099984355716, "grad_norm": 1.3297557035488563, "learning_rate": 2.053455520074121e-08, "loss": 0.1058, "step": 52421 }, { "epoch": 0.9112273809730744, "grad_norm": 1.351195621555158, "learning_rate": 2.052657168770483e-08, "loss": 0.0964, "step": 52422 }, { "epoch": 0.9112447635105773, "grad_norm": 1.3233317350360618, "learning_rate": 2.0518589694378917e-08, "loss": 0.1574, "step": 52423 }, { "epoch": 0.9112621460480801, "grad_norm": 1.190474946406449, "learning_rate": 2.0510609220788844e-08, "loss": 0.1262, "step": 52424 }, { "epoch": 0.9112795285855829, "grad_norm": 1.474223751163803, "learning_rate": 2.0502630266959863e-08, "loss": 0.16, "step": 52425 }, { "epoch": 0.9112969111230858, "grad_norm": 1.6332675365406206, "learning_rate": 2.0494652832917293e-08, "loss": 0.1352, "step": 52426 }, { "epoch": 0.9113142936605886, "grad_norm": 2.628110350491636, "learning_rate": 2.048667691868633e-08, "loss": 0.2973, "step": 52427 }, { "epoch": 0.9113316761980914, "grad_norm": 1.6395864576245092, "learning_rate": 2.047870252429251e-08, "loss": 0.1091, "step": 52428 }, { "epoch": 0.9113490587355942, "grad_norm": 1.3407187908647624, "learning_rate": 2.047072964976082e-08, "loss": 0.121, "step": 52429 }, { "epoch": 0.9113664412730971, "grad_norm": 1.779216607669363, "learning_rate": 2.0462758295116676e-08, "loss": 0.1311, "step": 52430 }, { "epoch": 0.9113838238105999, "grad_norm": 1.2350541961021426, "learning_rate": 2.0454788460385176e-08, "loss": 0.1031, "step": 52431 }, { "epoch": 0.9114012063481027, "grad_norm": 1.0685477866154474, "learning_rate": 2.0446820145591848e-08, "loss": 0.1448, "step": 52432 }, { "epoch": 0.9114185888856056, "grad_norm": 1.8667935065496235, "learning_rate": 2.0438853350761788e-08, "loss": 0.107, "step": 52433 }, { "epoch": 0.9114359714231084, "grad_norm": 1.1615941643246976, "learning_rate": 2.0430888075920248e-08, "loss": 0.1481, "step": 52434 }, { "epoch": 0.9114533539606112, "grad_norm": 8.188272287936995, "learning_rate": 2.0422924321092494e-08, "loss": 0.1644, "step": 52435 }, { "epoch": 0.9114707364981139, "grad_norm": 1.5807372501570554, "learning_rate": 2.041496208630378e-08, "loss": 0.1635, "step": 52436 }, { "epoch": 0.9114881190356168, "grad_norm": 0.9612750259714147, "learning_rate": 2.0407001371579357e-08, "loss": 0.1037, "step": 52437 }, { "epoch": 0.9115055015731196, "grad_norm": 2.5052194446107205, "learning_rate": 2.0399042176944438e-08, "loss": 0.2089, "step": 52438 }, { "epoch": 0.9115228841106224, "grad_norm": 1.1681617409965084, "learning_rate": 2.039108450242416e-08, "loss": 0.1059, "step": 52439 }, { "epoch": 0.9115402666481253, "grad_norm": 2.42267326849918, "learning_rate": 2.0383128348043898e-08, "loss": 0.165, "step": 52440 }, { "epoch": 0.9115576491856281, "grad_norm": 1.6752443771268655, "learning_rate": 2.037517371382885e-08, "loss": 0.1607, "step": 52441 }, { "epoch": 0.9115750317231309, "grad_norm": 1.635541245409051, "learning_rate": 2.0367220599804058e-08, "loss": 0.1095, "step": 52442 }, { "epoch": 0.9115924142606338, "grad_norm": 1.3314944529297665, "learning_rate": 2.035926900599494e-08, "loss": 0.1239, "step": 52443 }, { "epoch": 0.9116097967981366, "grad_norm": 2.669634646772801, "learning_rate": 2.0351318932426587e-08, "loss": 0.1856, "step": 52444 }, { "epoch": 0.9116271793356394, "grad_norm": 1.7396182127858966, "learning_rate": 2.0343370379124203e-08, "loss": 0.1322, "step": 52445 }, { "epoch": 0.9116445618731422, "grad_norm": 2.652233771973966, "learning_rate": 2.0335423346113047e-08, "loss": 0.2065, "step": 52446 }, { "epoch": 0.9116619444106451, "grad_norm": 1.3216124330065608, "learning_rate": 2.0327477833418207e-08, "loss": 0.1436, "step": 52447 }, { "epoch": 0.9116793269481479, "grad_norm": 0.9068113016799182, "learning_rate": 2.031953384106494e-08, "loss": 0.1075, "step": 52448 }, { "epoch": 0.9116967094856507, "grad_norm": 1.1845297610527052, "learning_rate": 2.0311591369078396e-08, "loss": 0.1358, "step": 52449 }, { "epoch": 0.9117140920231536, "grad_norm": 1.140339349002241, "learning_rate": 2.0303650417483718e-08, "loss": 0.131, "step": 52450 }, { "epoch": 0.9117314745606564, "grad_norm": 1.497250993550502, "learning_rate": 2.029571098630617e-08, "loss": 0.1268, "step": 52451 }, { "epoch": 0.9117488570981592, "grad_norm": 1.9156468606212584, "learning_rate": 2.0287773075570837e-08, "loss": 0.1191, "step": 52452 }, { "epoch": 0.9117662396356621, "grad_norm": 1.0454977170741304, "learning_rate": 2.0279836685302975e-08, "loss": 0.1101, "step": 52453 }, { "epoch": 0.9117836221731649, "grad_norm": 1.8587667603863307, "learning_rate": 2.027190181552757e-08, "loss": 0.1297, "step": 52454 }, { "epoch": 0.9118010047106677, "grad_norm": 0.8774163479433016, "learning_rate": 2.0263968466269878e-08, "loss": 0.1145, "step": 52455 }, { "epoch": 0.9118183872481704, "grad_norm": 1.0987092208073845, "learning_rate": 2.0256036637555096e-08, "loss": 0.1612, "step": 52456 }, { "epoch": 0.9118357697856733, "grad_norm": 1.6218728488354297, "learning_rate": 2.0248106329408265e-08, "loss": 0.1562, "step": 52457 }, { "epoch": 0.9118531523231761, "grad_norm": 1.3102885062431662, "learning_rate": 2.0240177541854475e-08, "loss": 0.1005, "step": 52458 }, { "epoch": 0.9118705348606789, "grad_norm": 1.0620273951209493, "learning_rate": 2.0232250274919038e-08, "loss": 0.1118, "step": 52459 }, { "epoch": 0.9118879173981818, "grad_norm": 1.2955119126838468, "learning_rate": 2.022432452862699e-08, "loss": 0.1226, "step": 52460 }, { "epoch": 0.9119052999356846, "grad_norm": 1.4558851852159977, "learning_rate": 2.0216400303003424e-08, "loss": 0.1393, "step": 52461 }, { "epoch": 0.9119226824731874, "grad_norm": 1.058679427913872, "learning_rate": 2.0208477598073425e-08, "loss": 0.1411, "step": 52462 }, { "epoch": 0.9119400650106902, "grad_norm": 1.0466382631125333, "learning_rate": 2.02005564138622e-08, "loss": 0.1283, "step": 52463 }, { "epoch": 0.9119574475481931, "grad_norm": 1.255107199455598, "learning_rate": 2.0192636750394787e-08, "loss": 0.1028, "step": 52464 }, { "epoch": 0.9119748300856959, "grad_norm": 1.41220648451866, "learning_rate": 2.0184718607696328e-08, "loss": 0.1612, "step": 52465 }, { "epoch": 0.9119922126231987, "grad_norm": 2.2052527746211052, "learning_rate": 2.0176801985791915e-08, "loss": 0.1747, "step": 52466 }, { "epoch": 0.9120095951607016, "grad_norm": 1.3818319223281772, "learning_rate": 2.016888688470658e-08, "loss": 0.1165, "step": 52467 }, { "epoch": 0.9120269776982044, "grad_norm": 1.7198789576799551, "learning_rate": 2.016097330446548e-08, "loss": 0.1295, "step": 52468 }, { "epoch": 0.9120443602357072, "grad_norm": 2.3025216593220983, "learning_rate": 2.0153061245093693e-08, "loss": 0.1966, "step": 52469 }, { "epoch": 0.9120617427732101, "grad_norm": 2.138737701639728, "learning_rate": 2.014515070661621e-08, "loss": 0.1462, "step": 52470 }, { "epoch": 0.9120791253107129, "grad_norm": 1.96806774095434, "learning_rate": 2.0137241689058226e-08, "loss": 0.1916, "step": 52471 }, { "epoch": 0.9120965078482157, "grad_norm": 2.414362505449144, "learning_rate": 2.0129334192444835e-08, "loss": 0.1705, "step": 52472 }, { "epoch": 0.9121138903857186, "grad_norm": 0.9888467531353661, "learning_rate": 2.0121428216800908e-08, "loss": 0.0919, "step": 52473 }, { "epoch": 0.9121312729232214, "grad_norm": 2.1217972976266317, "learning_rate": 2.011352376215164e-08, "loss": 0.2434, "step": 52474 }, { "epoch": 0.9121486554607242, "grad_norm": 1.5038083619026965, "learning_rate": 2.0105620828522074e-08, "loss": 0.1681, "step": 52475 }, { "epoch": 0.9121660379982269, "grad_norm": 4.458859784830957, "learning_rate": 2.009771941593724e-08, "loss": 0.1621, "step": 52476 }, { "epoch": 0.9121834205357298, "grad_norm": 1.6383496929253172, "learning_rate": 2.008981952442218e-08, "loss": 0.1314, "step": 52477 }, { "epoch": 0.9122008030732326, "grad_norm": 1.717319714872222, "learning_rate": 2.0081921154001924e-08, "loss": 0.2615, "step": 52478 }, { "epoch": 0.9122181856107354, "grad_norm": 2.4777391056498805, "learning_rate": 2.0074024304701563e-08, "loss": 0.1604, "step": 52479 }, { "epoch": 0.9122355681482383, "grad_norm": 1.5686177614503476, "learning_rate": 2.0066128976546082e-08, "loss": 0.1372, "step": 52480 }, { "epoch": 0.9122529506857411, "grad_norm": 2.1766460012731335, "learning_rate": 2.0058235169560457e-08, "loss": 0.1875, "step": 52481 }, { "epoch": 0.9122703332232439, "grad_norm": 1.2040379856892087, "learning_rate": 2.0050342883769776e-08, "loss": 0.1346, "step": 52482 }, { "epoch": 0.9122877157607467, "grad_norm": 1.055613162725241, "learning_rate": 2.0042452119199082e-08, "loss": 0.1288, "step": 52483 }, { "epoch": 0.9123050982982496, "grad_norm": 1.7671617003379025, "learning_rate": 2.0034562875873294e-08, "loss": 0.1749, "step": 52484 }, { "epoch": 0.9123224808357524, "grad_norm": 2.1775448749933135, "learning_rate": 2.0026675153817507e-08, "loss": 0.1188, "step": 52485 }, { "epoch": 0.9123398633732552, "grad_norm": 1.356024875823293, "learning_rate": 2.001878895305664e-08, "loss": 0.1271, "step": 52486 }, { "epoch": 0.9123572459107581, "grad_norm": 1.7571953486005223, "learning_rate": 2.0010904273615735e-08, "loss": 0.1574, "step": 52487 }, { "epoch": 0.9123746284482609, "grad_norm": 1.4287994322672655, "learning_rate": 2.0003021115519826e-08, "loss": 0.1384, "step": 52488 }, { "epoch": 0.9123920109857637, "grad_norm": 1.4416305549585677, "learning_rate": 1.999513947879372e-08, "loss": 0.1375, "step": 52489 }, { "epoch": 0.9124093935232666, "grad_norm": 0.9248719021157772, "learning_rate": 1.9987259363462627e-08, "loss": 0.1612, "step": 52490 }, { "epoch": 0.9124267760607694, "grad_norm": 1.6393885972418443, "learning_rate": 1.9979380769551467e-08, "loss": 0.1238, "step": 52491 }, { "epoch": 0.9124441585982722, "grad_norm": 1.5622940988346639, "learning_rate": 1.9971503697085112e-08, "loss": 0.1263, "step": 52492 }, { "epoch": 0.912461541135775, "grad_norm": 0.850445574548158, "learning_rate": 1.9963628146088484e-08, "loss": 0.1028, "step": 52493 }, { "epoch": 0.9124789236732779, "grad_norm": 1.2633865147423036, "learning_rate": 1.995575411658673e-08, "loss": 0.2404, "step": 52494 }, { "epoch": 0.9124963062107806, "grad_norm": 1.3172333126331581, "learning_rate": 1.9947881608604723e-08, "loss": 0.1403, "step": 52495 }, { "epoch": 0.9125136887482834, "grad_norm": 2.6664504850628314, "learning_rate": 1.9940010622167437e-08, "loss": 0.2384, "step": 52496 }, { "epoch": 0.9125310712857863, "grad_norm": 3.3157543096747255, "learning_rate": 1.9932141157299744e-08, "loss": 0.301, "step": 52497 }, { "epoch": 0.9125484538232891, "grad_norm": 1.0771998096378887, "learning_rate": 1.992427321402673e-08, "loss": 0.1317, "step": 52498 }, { "epoch": 0.9125658363607919, "grad_norm": 0.9224113336851357, "learning_rate": 1.9916406792373165e-08, "loss": 0.1503, "step": 52499 }, { "epoch": 0.9125832188982947, "grad_norm": 0.9950768920733278, "learning_rate": 1.9908541892364072e-08, "loss": 0.0766, "step": 52500 }, { "epoch": 0.9126006014357976, "grad_norm": 1.0638202982743128, "learning_rate": 1.9900678514024327e-08, "loss": 0.137, "step": 52501 }, { "epoch": 0.9126179839733004, "grad_norm": 0.8513261277774029, "learning_rate": 1.9892816657378908e-08, "loss": 0.1066, "step": 52502 }, { "epoch": 0.9126353665108032, "grad_norm": 1.3205832218949813, "learning_rate": 1.988495632245274e-08, "loss": 0.1093, "step": 52503 }, { "epoch": 0.9126527490483061, "grad_norm": 0.9256184530964182, "learning_rate": 1.9877097509270745e-08, "loss": 0.091, "step": 52504 }, { "epoch": 0.9126701315858089, "grad_norm": 1.1724594915885203, "learning_rate": 1.9869240217857742e-08, "loss": 0.084, "step": 52505 }, { "epoch": 0.9126875141233117, "grad_norm": 1.7568733042166622, "learning_rate": 1.986138444823876e-08, "loss": 0.128, "step": 52506 }, { "epoch": 0.9127048966608146, "grad_norm": 1.297592233688811, "learning_rate": 1.985353020043856e-08, "loss": 0.1281, "step": 52507 }, { "epoch": 0.9127222791983174, "grad_norm": 1.1194998349340577, "learning_rate": 1.9845677474482182e-08, "loss": 0.1122, "step": 52508 }, { "epoch": 0.9127396617358202, "grad_norm": 2.4811265105911353, "learning_rate": 1.983782627039432e-08, "loss": 0.1792, "step": 52509 }, { "epoch": 0.912757044273323, "grad_norm": 1.4192925140007606, "learning_rate": 1.9829976588200125e-08, "loss": 0.139, "step": 52510 }, { "epoch": 0.9127744268108259, "grad_norm": 2.357949218502653, "learning_rate": 1.9822128427924246e-08, "loss": 0.1439, "step": 52511 }, { "epoch": 0.9127918093483287, "grad_norm": 1.5917087411791562, "learning_rate": 1.9814281789591658e-08, "loss": 0.1602, "step": 52512 }, { "epoch": 0.9128091918858315, "grad_norm": 0.9667170318542766, "learning_rate": 1.980643667322718e-08, "loss": 0.1422, "step": 52513 }, { "epoch": 0.9128265744233344, "grad_norm": 0.9759598247321524, "learning_rate": 1.9798593078855786e-08, "loss": 0.089, "step": 52514 }, { "epoch": 0.9128439569608371, "grad_norm": 1.478786770152184, "learning_rate": 1.9790751006502237e-08, "loss": 0.1398, "step": 52515 }, { "epoch": 0.9128613394983399, "grad_norm": 0.9127264547935215, "learning_rate": 1.978291045619135e-08, "loss": 0.0952, "step": 52516 }, { "epoch": 0.9128787220358427, "grad_norm": 1.5385310082548576, "learning_rate": 1.977507142794821e-08, "loss": 0.1713, "step": 52517 }, { "epoch": 0.9128961045733456, "grad_norm": 1.2837916678681924, "learning_rate": 1.976723392179741e-08, "loss": 0.0854, "step": 52518 }, { "epoch": 0.9129134871108484, "grad_norm": 1.7425242193423793, "learning_rate": 1.9759397937763824e-08, "loss": 0.1165, "step": 52519 }, { "epoch": 0.9129308696483512, "grad_norm": 1.7116618519147964, "learning_rate": 1.9751563475872368e-08, "loss": 0.195, "step": 52520 }, { "epoch": 0.9129482521858541, "grad_norm": 1.592033256135968, "learning_rate": 1.9743730536147862e-08, "loss": 0.1483, "step": 52521 }, { "epoch": 0.9129656347233569, "grad_norm": 1.7656271973727158, "learning_rate": 1.973589911861512e-08, "loss": 0.1262, "step": 52522 }, { "epoch": 0.9129830172608597, "grad_norm": 1.2488513753074484, "learning_rate": 1.972806922329895e-08, "loss": 0.1658, "step": 52523 }, { "epoch": 0.9130003997983626, "grad_norm": 1.317664447969443, "learning_rate": 1.9720240850224224e-08, "loss": 0.1459, "step": 52524 }, { "epoch": 0.9130177823358654, "grad_norm": 1.4898348138943043, "learning_rate": 1.9712413999415644e-08, "loss": 0.1228, "step": 52525 }, { "epoch": 0.9130351648733682, "grad_norm": 1.089464374899494, "learning_rate": 1.9704588670898138e-08, "loss": 0.1548, "step": 52526 }, { "epoch": 0.913052547410871, "grad_norm": 1.4784609140473925, "learning_rate": 1.9696764864696458e-08, "loss": 0.2073, "step": 52527 }, { "epoch": 0.9130699299483739, "grad_norm": 2.6505682644355586, "learning_rate": 1.968894258083531e-08, "loss": 0.2023, "step": 52528 }, { "epoch": 0.9130873124858767, "grad_norm": 1.070897427743716, "learning_rate": 1.968112181933973e-08, "loss": 0.134, "step": 52529 }, { "epoch": 0.9131046950233795, "grad_norm": 2.1550089283511182, "learning_rate": 1.9673302580234308e-08, "loss": 0.158, "step": 52530 }, { "epoch": 0.9131220775608824, "grad_norm": 2.2653306940316815, "learning_rate": 1.9665484863543747e-08, "loss": 0.1182, "step": 52531 }, { "epoch": 0.9131394600983852, "grad_norm": 0.8465276474725361, "learning_rate": 1.9657668669293082e-08, "loss": 0.1255, "step": 52532 }, { "epoch": 0.913156842635888, "grad_norm": 1.6167202805926086, "learning_rate": 1.9649853997506904e-08, "loss": 0.1993, "step": 52533 }, { "epoch": 0.9131742251733909, "grad_norm": 1.2255770616891124, "learning_rate": 1.964204084821003e-08, "loss": 0.1828, "step": 52534 }, { "epoch": 0.9131916077108936, "grad_norm": 1.2761594182636327, "learning_rate": 1.9634229221427266e-08, "loss": 0.1472, "step": 52535 }, { "epoch": 0.9132089902483964, "grad_norm": 1.3214753927932628, "learning_rate": 1.9626419117183323e-08, "loss": 0.1704, "step": 52536 }, { "epoch": 0.9132263727858992, "grad_norm": 1.1666902881768397, "learning_rate": 1.9618610535502954e-08, "loss": 0.1405, "step": 52537 }, { "epoch": 0.9132437553234021, "grad_norm": 2.0669506683635563, "learning_rate": 1.961080347641092e-08, "loss": 0.133, "step": 52538 }, { "epoch": 0.9132611378609049, "grad_norm": 1.3258996123239837, "learning_rate": 1.960299793993192e-08, "loss": 0.1174, "step": 52539 }, { "epoch": 0.9132785203984077, "grad_norm": 0.9656127582167302, "learning_rate": 1.959519392609077e-08, "loss": 0.1253, "step": 52540 }, { "epoch": 0.9132959029359106, "grad_norm": 1.1670335607126296, "learning_rate": 1.9587391434912227e-08, "loss": 0.1667, "step": 52541 }, { "epoch": 0.9133132854734134, "grad_norm": 1.5836293156507741, "learning_rate": 1.957959046642099e-08, "loss": 0.1574, "step": 52542 }, { "epoch": 0.9133306680109162, "grad_norm": 1.481175851648715, "learning_rate": 1.9571791020641658e-08, "loss": 0.1539, "step": 52543 }, { "epoch": 0.913348050548419, "grad_norm": 0.8847794547857762, "learning_rate": 1.9563993097599097e-08, "loss": 0.0775, "step": 52544 }, { "epoch": 0.9133654330859219, "grad_norm": 2.142471011187135, "learning_rate": 1.9556196697318007e-08, "loss": 0.2279, "step": 52545 }, { "epoch": 0.9133828156234247, "grad_norm": 5.788652606406496, "learning_rate": 1.9548401819823092e-08, "loss": 0.3327, "step": 52546 }, { "epoch": 0.9134001981609275, "grad_norm": 1.1703781208920883, "learning_rate": 1.954060846513894e-08, "loss": 0.1594, "step": 52547 }, { "epoch": 0.9134175806984304, "grad_norm": 1.4312467378397136, "learning_rate": 1.9532816633290484e-08, "loss": 0.1571, "step": 52548 }, { "epoch": 0.9134349632359332, "grad_norm": 1.6502550756514998, "learning_rate": 1.9525026324302253e-08, "loss": 0.1552, "step": 52549 }, { "epoch": 0.913452345773436, "grad_norm": 1.5260049840863736, "learning_rate": 1.951723753819895e-08, "loss": 0.164, "step": 52550 }, { "epoch": 0.9134697283109389, "grad_norm": 1.075751799031334, "learning_rate": 1.9509450275005278e-08, "loss": 0.1301, "step": 52551 }, { "epoch": 0.9134871108484417, "grad_norm": 1.0794033433840378, "learning_rate": 1.9501664534745942e-08, "loss": 0.1825, "step": 52552 }, { "epoch": 0.9135044933859445, "grad_norm": 1.3055307041371191, "learning_rate": 1.9493880317445645e-08, "loss": 0.1553, "step": 52553 }, { "epoch": 0.9135218759234474, "grad_norm": 1.6195835649639698, "learning_rate": 1.948609762312897e-08, "loss": 0.2067, "step": 52554 }, { "epoch": 0.9135392584609501, "grad_norm": 1.2492235991626084, "learning_rate": 1.947831645182063e-08, "loss": 0.1385, "step": 52555 }, { "epoch": 0.9135566409984529, "grad_norm": 1.0772145644976465, "learning_rate": 1.9470536803545323e-08, "loss": 0.1155, "step": 52556 }, { "epoch": 0.9135740235359557, "grad_norm": 1.135689970459112, "learning_rate": 1.9462758678327694e-08, "loss": 0.1495, "step": 52557 }, { "epoch": 0.9135914060734586, "grad_norm": 1.9806812397260682, "learning_rate": 1.9454982076192396e-08, "loss": 0.1674, "step": 52558 }, { "epoch": 0.9136087886109614, "grad_norm": 2.063114943671401, "learning_rate": 1.9447206997163957e-08, "loss": 0.2095, "step": 52559 }, { "epoch": 0.9136261711484642, "grad_norm": 1.577809922633292, "learning_rate": 1.9439433441267196e-08, "loss": 0.1538, "step": 52560 }, { "epoch": 0.9136435536859671, "grad_norm": 0.9799871335086159, "learning_rate": 1.94316614085267e-08, "loss": 0.231, "step": 52561 }, { "epoch": 0.9136609362234699, "grad_norm": 1.8451612518285774, "learning_rate": 1.942389089896701e-08, "loss": 0.1532, "step": 52562 }, { "epoch": 0.9136783187609727, "grad_norm": 0.748157126997732, "learning_rate": 1.941612191261288e-08, "loss": 0.0839, "step": 52563 }, { "epoch": 0.9136957012984755, "grad_norm": 1.4454112695888734, "learning_rate": 1.9408354449488907e-08, "loss": 0.1662, "step": 52564 }, { "epoch": 0.9137130838359784, "grad_norm": 0.9947130517000914, "learning_rate": 1.940058850961962e-08, "loss": 0.1206, "step": 52565 }, { "epoch": 0.9137304663734812, "grad_norm": 2.2670937312364594, "learning_rate": 1.9392824093029725e-08, "loss": 0.1595, "step": 52566 }, { "epoch": 0.913747848910984, "grad_norm": 1.263493362066325, "learning_rate": 1.9385061199743813e-08, "loss": 0.1294, "step": 52567 }, { "epoch": 0.9137652314484869, "grad_norm": 0.8667956671385332, "learning_rate": 1.937729982978653e-08, "loss": 0.1213, "step": 52568 }, { "epoch": 0.9137826139859897, "grad_norm": 1.6144692342473652, "learning_rate": 1.936953998318236e-08, "loss": 0.1628, "step": 52569 }, { "epoch": 0.9137999965234925, "grad_norm": 2.154648639364327, "learning_rate": 1.9361781659955945e-08, "loss": 0.2304, "step": 52570 }, { "epoch": 0.9138173790609954, "grad_norm": 1.1846877516232897, "learning_rate": 1.935402486013188e-08, "loss": 0.1295, "step": 52571 }, { "epoch": 0.9138347615984982, "grad_norm": 1.1627225818635258, "learning_rate": 1.9346269583734863e-08, "loss": 0.1577, "step": 52572 }, { "epoch": 0.913852144136001, "grad_norm": 1.0701504349664908, "learning_rate": 1.9338515830789383e-08, "loss": 0.1144, "step": 52573 }, { "epoch": 0.9138695266735039, "grad_norm": 1.0717474288388111, "learning_rate": 1.933076360131991e-08, "loss": 0.1357, "step": 52574 }, { "epoch": 0.9138869092110066, "grad_norm": 0.9901319274416474, "learning_rate": 1.9323012895351155e-08, "loss": 0.1764, "step": 52575 }, { "epoch": 0.9139042917485094, "grad_norm": 1.2429775871718427, "learning_rate": 1.9315263712907648e-08, "loss": 0.1247, "step": 52576 }, { "epoch": 0.9139216742860122, "grad_norm": 0.922728059013219, "learning_rate": 1.930751605401393e-08, "loss": 0.1724, "step": 52577 }, { "epoch": 0.9139390568235151, "grad_norm": 1.3680048066428032, "learning_rate": 1.9299769918694532e-08, "loss": 0.1628, "step": 52578 }, { "epoch": 0.9139564393610179, "grad_norm": 1.6218259643970874, "learning_rate": 1.9292025306974156e-08, "loss": 0.17, "step": 52579 }, { "epoch": 0.9139738218985207, "grad_norm": 1.4395461870676063, "learning_rate": 1.9284282218877234e-08, "loss": 0.2093, "step": 52580 }, { "epoch": 0.9139912044360236, "grad_norm": 1.3998365202907366, "learning_rate": 1.9276540654428296e-08, "loss": 0.135, "step": 52581 }, { "epoch": 0.9140085869735264, "grad_norm": 1.7245636500041446, "learning_rate": 1.926880061365177e-08, "loss": 0.1497, "step": 52582 }, { "epoch": 0.9140259695110292, "grad_norm": 1.372789830876276, "learning_rate": 1.9261062096572466e-08, "loss": 0.1978, "step": 52583 }, { "epoch": 0.914043352048532, "grad_norm": 1.191111644834339, "learning_rate": 1.92533251032147e-08, "loss": 0.1397, "step": 52584 }, { "epoch": 0.9140607345860349, "grad_norm": 1.152308943490463, "learning_rate": 1.9245589633603065e-08, "loss": 0.1592, "step": 52585 }, { "epoch": 0.9140781171235377, "grad_norm": 1.598045446106065, "learning_rate": 1.9237855687762093e-08, "loss": 0.1829, "step": 52586 }, { "epoch": 0.9140954996610405, "grad_norm": 1.3647118214151295, "learning_rate": 1.923012326571627e-08, "loss": 0.1671, "step": 52587 }, { "epoch": 0.9141128821985434, "grad_norm": 1.621525621568507, "learning_rate": 1.9222392367490125e-08, "loss": 0.1372, "step": 52588 }, { "epoch": 0.9141302647360462, "grad_norm": 2.45662862098439, "learning_rate": 1.921466299310809e-08, "loss": 0.1483, "step": 52589 }, { "epoch": 0.914147647273549, "grad_norm": 1.5942115298233825, "learning_rate": 1.9206935142594695e-08, "loss": 0.1612, "step": 52590 }, { "epoch": 0.9141650298110519, "grad_norm": 1.346701431436971, "learning_rate": 1.919920881597453e-08, "loss": 0.1187, "step": 52591 }, { "epoch": 0.9141824123485547, "grad_norm": 0.9794561711843541, "learning_rate": 1.919148401327203e-08, "loss": 0.0891, "step": 52592 }, { "epoch": 0.9141997948860575, "grad_norm": 1.9035719818640997, "learning_rate": 1.9183760734511556e-08, "loss": 0.1408, "step": 52593 }, { "epoch": 0.9142171774235603, "grad_norm": 1.3896184401461786, "learning_rate": 1.917603897971781e-08, "loss": 0.1911, "step": 52594 }, { "epoch": 0.9142345599610631, "grad_norm": 0.8152063297776643, "learning_rate": 1.9168318748915058e-08, "loss": 0.114, "step": 52595 }, { "epoch": 0.9142519424985659, "grad_norm": 1.3846113911932463, "learning_rate": 1.9160600042127938e-08, "loss": 0.1465, "step": 52596 }, { "epoch": 0.9142693250360687, "grad_norm": 1.701912795535941, "learning_rate": 1.915288285938077e-08, "loss": 0.1479, "step": 52597 }, { "epoch": 0.9142867075735716, "grad_norm": 1.299530454172155, "learning_rate": 1.9145167200698032e-08, "loss": 0.1208, "step": 52598 }, { "epoch": 0.9143040901110744, "grad_norm": 1.8066205648519345, "learning_rate": 1.913745306610437e-08, "loss": 0.1443, "step": 52599 }, { "epoch": 0.9143214726485772, "grad_norm": 0.8468580722826815, "learning_rate": 1.9129740455624045e-08, "loss": 0.117, "step": 52600 }, { "epoch": 0.91433885518608, "grad_norm": 1.3258564512048965, "learning_rate": 1.9122029369281477e-08, "loss": 0.1069, "step": 52601 }, { "epoch": 0.9143562377235829, "grad_norm": 1.8754548961588275, "learning_rate": 1.9114319807101208e-08, "loss": 0.1583, "step": 52602 }, { "epoch": 0.9143736202610857, "grad_norm": 0.8541398916348012, "learning_rate": 1.9106611769107662e-08, "loss": 0.1377, "step": 52603 }, { "epoch": 0.9143910027985885, "grad_norm": 1.5637075424673097, "learning_rate": 1.9098905255325258e-08, "loss": 0.1777, "step": 52604 }, { "epoch": 0.9144083853360914, "grad_norm": 0.9172534592980963, "learning_rate": 1.9091200265778428e-08, "loss": 0.1159, "step": 52605 }, { "epoch": 0.9144257678735942, "grad_norm": 0.9202978420899129, "learning_rate": 1.9083496800491593e-08, "loss": 0.1485, "step": 52606 }, { "epoch": 0.914443150411097, "grad_norm": 1.0385792060409027, "learning_rate": 1.9075794859489125e-08, "loss": 0.0966, "step": 52607 }, { "epoch": 0.9144605329485999, "grad_norm": 1.3954679013035074, "learning_rate": 1.9068094442795502e-08, "loss": 0.1147, "step": 52608 }, { "epoch": 0.9144779154861027, "grad_norm": 1.5707977297000537, "learning_rate": 1.906039555043498e-08, "loss": 0.2134, "step": 52609 }, { "epoch": 0.9144952980236055, "grad_norm": 1.4315620326402645, "learning_rate": 1.9052698182432158e-08, "loss": 0.1094, "step": 52610 }, { "epoch": 0.9145126805611083, "grad_norm": 2.0560231180525816, "learning_rate": 1.9045002338811455e-08, "loss": 0.1375, "step": 52611 }, { "epoch": 0.9145300630986112, "grad_norm": 1.459826662733647, "learning_rate": 1.903730801959702e-08, "loss": 0.1253, "step": 52612 }, { "epoch": 0.914547445636114, "grad_norm": 1.2215043015187141, "learning_rate": 1.9029615224813445e-08, "loss": 0.1199, "step": 52613 }, { "epoch": 0.9145648281736168, "grad_norm": 0.7637018332683294, "learning_rate": 1.902192395448504e-08, "loss": 0.1345, "step": 52614 }, { "epoch": 0.9145822107111196, "grad_norm": 0.46388257743288885, "learning_rate": 1.9014234208636182e-08, "loss": 0.095, "step": 52615 }, { "epoch": 0.9145995932486224, "grad_norm": 0.8082361983585309, "learning_rate": 1.900654598729129e-08, "loss": 0.0955, "step": 52616 }, { "epoch": 0.9146169757861252, "grad_norm": 0.9348648658134966, "learning_rate": 1.899885929047462e-08, "loss": 0.1479, "step": 52617 }, { "epoch": 0.914634358323628, "grad_norm": 1.1408961062033112, "learning_rate": 1.8991174118210717e-08, "loss": 0.1437, "step": 52618 }, { "epoch": 0.9146517408611309, "grad_norm": 1.6674602105159753, "learning_rate": 1.8983490470523777e-08, "loss": 0.1647, "step": 52619 }, { "epoch": 0.9146691233986337, "grad_norm": 1.4719137324826264, "learning_rate": 1.897580834743817e-08, "loss": 0.1601, "step": 52620 }, { "epoch": 0.9146865059361365, "grad_norm": 0.9411580863145299, "learning_rate": 1.8968127748978324e-08, "loss": 0.1037, "step": 52621 }, { "epoch": 0.9147038884736394, "grad_norm": 1.105229768465685, "learning_rate": 1.896044867516855e-08, "loss": 0.1197, "step": 52622 }, { "epoch": 0.9147212710111422, "grad_norm": 2.0288100078595286, "learning_rate": 1.895277112603322e-08, "loss": 0.1829, "step": 52623 }, { "epoch": 0.914738653548645, "grad_norm": 3.462631547187798, "learning_rate": 1.8945095101596585e-08, "loss": 0.1464, "step": 52624 }, { "epoch": 0.9147560360861479, "grad_norm": 1.392175926982864, "learning_rate": 1.8937420601883025e-08, "loss": 0.1627, "step": 52625 }, { "epoch": 0.9147734186236507, "grad_norm": 1.6711032138719422, "learning_rate": 1.8929747626916848e-08, "loss": 0.1008, "step": 52626 }, { "epoch": 0.9147908011611535, "grad_norm": 1.3409463744464838, "learning_rate": 1.8922076176722423e-08, "loss": 0.1815, "step": 52627 }, { "epoch": 0.9148081836986564, "grad_norm": 1.2770459138246422, "learning_rate": 1.8914406251323957e-08, "loss": 0.1275, "step": 52628 }, { "epoch": 0.9148255662361592, "grad_norm": 1.3125919387523663, "learning_rate": 1.8906737850745867e-08, "loss": 0.1316, "step": 52629 }, { "epoch": 0.914842948773662, "grad_norm": 1.0769166009506068, "learning_rate": 1.8899070975012534e-08, "loss": 0.1282, "step": 52630 }, { "epoch": 0.9148603313111648, "grad_norm": 1.699651941890181, "learning_rate": 1.889140562414804e-08, "loss": 0.1316, "step": 52631 }, { "epoch": 0.9148777138486677, "grad_norm": 2.079565026967219, "learning_rate": 1.8883741798176754e-08, "loss": 0.1551, "step": 52632 }, { "epoch": 0.9148950963861705, "grad_norm": 1.086408488888474, "learning_rate": 1.8876079497123053e-08, "loss": 0.1135, "step": 52633 }, { "epoch": 0.9149124789236732, "grad_norm": 1.4660786632366132, "learning_rate": 1.8868418721011135e-08, "loss": 0.1184, "step": 52634 }, { "epoch": 0.914929861461176, "grad_norm": 1.2453082495501344, "learning_rate": 1.8860759469865318e-08, "loss": 0.121, "step": 52635 }, { "epoch": 0.9149472439986789, "grad_norm": 0.8659496366374803, "learning_rate": 1.8853101743709853e-08, "loss": 0.1504, "step": 52636 }, { "epoch": 0.9149646265361817, "grad_norm": 1.8476194910528494, "learning_rate": 1.8845445542569172e-08, "loss": 0.1254, "step": 52637 }, { "epoch": 0.9149820090736845, "grad_norm": 1.1490456461921652, "learning_rate": 1.8837790866467306e-08, "loss": 0.1341, "step": 52638 }, { "epoch": 0.9149993916111874, "grad_norm": 1.883879651261922, "learning_rate": 1.883013771542863e-08, "loss": 0.1627, "step": 52639 }, { "epoch": 0.9150167741486902, "grad_norm": 1.2869461660827066, "learning_rate": 1.8822486089477286e-08, "loss": 0.1587, "step": 52640 }, { "epoch": 0.915034156686193, "grad_norm": 0.9612937250494068, "learning_rate": 1.8814835988637755e-08, "loss": 0.1039, "step": 52641 }, { "epoch": 0.9150515392236959, "grad_norm": 1.8355407038477338, "learning_rate": 1.880718741293408e-08, "loss": 0.0979, "step": 52642 }, { "epoch": 0.9150689217611987, "grad_norm": 1.4982215758376325, "learning_rate": 1.879954036239062e-08, "loss": 0.1034, "step": 52643 }, { "epoch": 0.9150863042987015, "grad_norm": 1.5705715774135653, "learning_rate": 1.8791894837031586e-08, "loss": 0.1834, "step": 52644 }, { "epoch": 0.9151036868362044, "grad_norm": 0.8355759125672442, "learning_rate": 1.8784250836881176e-08, "loss": 0.1156, "step": 52645 }, { "epoch": 0.9151210693737072, "grad_norm": 1.9133156790513275, "learning_rate": 1.8776608361963652e-08, "loss": 0.1598, "step": 52646 }, { "epoch": 0.91513845191121, "grad_norm": 1.2952993328625348, "learning_rate": 1.8768967412303217e-08, "loss": 0.2055, "step": 52647 }, { "epoch": 0.9151558344487128, "grad_norm": 1.3459123813970428, "learning_rate": 1.8761327987924014e-08, "loss": 0.1908, "step": 52648 }, { "epoch": 0.9151732169862157, "grad_norm": 0.8451767059583527, "learning_rate": 1.8753690088850528e-08, "loss": 0.0816, "step": 52649 }, { "epoch": 0.9151905995237185, "grad_norm": 1.0604010364821606, "learning_rate": 1.8746053715106624e-08, "loss": 0.1541, "step": 52650 }, { "epoch": 0.9152079820612213, "grad_norm": 0.9937427724964079, "learning_rate": 1.8738418866716676e-08, "loss": 0.1246, "step": 52651 }, { "epoch": 0.9152253645987242, "grad_norm": 1.4088901784778003, "learning_rate": 1.8730785543704885e-08, "loss": 0.0992, "step": 52652 }, { "epoch": 0.915242747136227, "grad_norm": 1.3580918645645486, "learning_rate": 1.8723153746095456e-08, "loss": 0.1402, "step": 52653 }, { "epoch": 0.9152601296737297, "grad_norm": 1.5472581704507897, "learning_rate": 1.8715523473912532e-08, "loss": 0.1125, "step": 52654 }, { "epoch": 0.9152775122112325, "grad_norm": 1.3329251421879897, "learning_rate": 1.8707894727180317e-08, "loss": 0.1303, "step": 52655 }, { "epoch": 0.9152948947487354, "grad_norm": 1.0411428266372584, "learning_rate": 1.8700267505922964e-08, "loss": 0.1064, "step": 52656 }, { "epoch": 0.9153122772862382, "grad_norm": 1.378898528210291, "learning_rate": 1.8692641810164666e-08, "loss": 0.1536, "step": 52657 }, { "epoch": 0.915329659823741, "grad_norm": 1.1445238441892063, "learning_rate": 1.8685017639929633e-08, "loss": 0.1161, "step": 52658 }, { "epoch": 0.9153470423612439, "grad_norm": 1.1500443123592259, "learning_rate": 1.8677394995241957e-08, "loss": 0.1515, "step": 52659 }, { "epoch": 0.9153644248987467, "grad_norm": 1.7845870616876607, "learning_rate": 1.8669773876125837e-08, "loss": 0.1659, "step": 52660 }, { "epoch": 0.9153818074362495, "grad_norm": 0.945827867339007, "learning_rate": 1.866215428260548e-08, "loss": 0.1909, "step": 52661 }, { "epoch": 0.9153991899737524, "grad_norm": 1.845535198420604, "learning_rate": 1.865453621470503e-08, "loss": 0.1626, "step": 52662 }, { "epoch": 0.9154165725112552, "grad_norm": 0.8721018247416914, "learning_rate": 1.8646919672448467e-08, "loss": 0.0857, "step": 52663 }, { "epoch": 0.915433955048758, "grad_norm": 1.0292046847988194, "learning_rate": 1.8639304655860112e-08, "loss": 0.1094, "step": 52664 }, { "epoch": 0.9154513375862608, "grad_norm": 1.554447711668742, "learning_rate": 1.863169116496405e-08, "loss": 0.1157, "step": 52665 }, { "epoch": 0.9154687201237637, "grad_norm": 2.282941845562582, "learning_rate": 1.862407919978437e-08, "loss": 0.1598, "step": 52666 }, { "epoch": 0.9154861026612665, "grad_norm": 1.1648595954540244, "learning_rate": 1.8616468760345173e-08, "loss": 0.1118, "step": 52667 }, { "epoch": 0.9155034851987693, "grad_norm": 1.987020630286991, "learning_rate": 1.860885984667082e-08, "loss": 0.1428, "step": 52668 }, { "epoch": 0.9155208677362722, "grad_norm": 2.605355342793841, "learning_rate": 1.8601252458785132e-08, "loss": 0.1552, "step": 52669 }, { "epoch": 0.915538250273775, "grad_norm": 1.0907371320487835, "learning_rate": 1.8593646596712365e-08, "loss": 0.1013, "step": 52670 }, { "epoch": 0.9155556328112778, "grad_norm": 2.0240254192064793, "learning_rate": 1.8586042260476553e-08, "loss": 0.1314, "step": 52671 }, { "epoch": 0.9155730153487807, "grad_norm": 1.1168283526021179, "learning_rate": 1.8578439450101847e-08, "loss": 0.2064, "step": 52672 }, { "epoch": 0.9155903978862835, "grad_norm": 1.4341902126141888, "learning_rate": 1.857083816561239e-08, "loss": 0.089, "step": 52673 }, { "epoch": 0.9156077804237862, "grad_norm": 1.0150128537095782, "learning_rate": 1.8563238407032222e-08, "loss": 0.1323, "step": 52674 }, { "epoch": 0.915625162961289, "grad_norm": 1.447584975541827, "learning_rate": 1.8555640174385434e-08, "loss": 0.1635, "step": 52675 }, { "epoch": 0.9156425454987919, "grad_norm": 1.9136817804131667, "learning_rate": 1.8548043467696063e-08, "loss": 0.1721, "step": 52676 }, { "epoch": 0.9156599280362947, "grad_norm": 3.4506258062454274, "learning_rate": 1.8540448286988308e-08, "loss": 0.1877, "step": 52677 }, { "epoch": 0.9156773105737975, "grad_norm": 1.087062570326326, "learning_rate": 1.8532854632286098e-08, "loss": 0.1326, "step": 52678 }, { "epoch": 0.9156946931113004, "grad_norm": 0.9131394400023767, "learning_rate": 1.8525262503613527e-08, "loss": 0.1431, "step": 52679 }, { "epoch": 0.9157120756488032, "grad_norm": 1.6092526842458623, "learning_rate": 1.851767190099479e-08, "loss": 0.1078, "step": 52680 }, { "epoch": 0.915729458186306, "grad_norm": 1.039541923668144, "learning_rate": 1.8510082824453932e-08, "loss": 0.1175, "step": 52681 }, { "epoch": 0.9157468407238089, "grad_norm": 1.4391834488315898, "learning_rate": 1.8502495274014763e-08, "loss": 0.114, "step": 52682 }, { "epoch": 0.9157642232613117, "grad_norm": 1.4613105975103746, "learning_rate": 1.8494909249701596e-08, "loss": 0.1747, "step": 52683 }, { "epoch": 0.9157816057988145, "grad_norm": 1.2419741480114108, "learning_rate": 1.848732475153836e-08, "loss": 0.2166, "step": 52684 }, { "epoch": 0.9157989883363173, "grad_norm": 2.408075980069706, "learning_rate": 1.8479741779549142e-08, "loss": 0.1896, "step": 52685 }, { "epoch": 0.9158163708738202, "grad_norm": 1.2796820808418001, "learning_rate": 1.8472160333757924e-08, "loss": 0.1402, "step": 52686 }, { "epoch": 0.915833753411323, "grad_norm": 1.3243981349365697, "learning_rate": 1.8464580414188803e-08, "loss": 0.0962, "step": 52687 }, { "epoch": 0.9158511359488258, "grad_norm": 1.6768006838350187, "learning_rate": 1.84570020208657e-08, "loss": 0.1443, "step": 52688 }, { "epoch": 0.9158685184863287, "grad_norm": 1.32564108082938, "learning_rate": 1.8449425153812758e-08, "loss": 0.149, "step": 52689 }, { "epoch": 0.9158859010238315, "grad_norm": 1.5944604797685418, "learning_rate": 1.84418498130538e-08, "loss": 0.1152, "step": 52690 }, { "epoch": 0.9159032835613343, "grad_norm": 1.1497902315114648, "learning_rate": 1.843427599861308e-08, "loss": 0.1814, "step": 52691 }, { "epoch": 0.9159206660988372, "grad_norm": 1.1243965762705757, "learning_rate": 1.8426703710514524e-08, "loss": 0.1088, "step": 52692 }, { "epoch": 0.91593804863634, "grad_norm": 1.0914993881312678, "learning_rate": 1.8419132948782112e-08, "loss": 0.1338, "step": 52693 }, { "epoch": 0.9159554311738427, "grad_norm": 1.6547175029985597, "learning_rate": 1.8411563713439714e-08, "loss": 0.138, "step": 52694 }, { "epoch": 0.9159728137113455, "grad_norm": 2.016877734438583, "learning_rate": 1.840399600451148e-08, "loss": 0.1547, "step": 52695 }, { "epoch": 0.9159901962488484, "grad_norm": 2.126813318607824, "learning_rate": 1.8396429822021387e-08, "loss": 0.1506, "step": 52696 }, { "epoch": 0.9160075787863512, "grad_norm": 1.025993506017158, "learning_rate": 1.838886516599336e-08, "loss": 0.1136, "step": 52697 }, { "epoch": 0.916024961323854, "grad_norm": 1.7104653717350926, "learning_rate": 1.838130203645133e-08, "loss": 0.1207, "step": 52698 }, { "epoch": 0.9160423438613569, "grad_norm": 1.3191702445121565, "learning_rate": 1.8373740433419436e-08, "loss": 0.181, "step": 52699 }, { "epoch": 0.9160597263988597, "grad_norm": 1.05073432257371, "learning_rate": 1.8366180356921557e-08, "loss": 0.1466, "step": 52700 }, { "epoch": 0.9160771089363625, "grad_norm": 1.4538450679242219, "learning_rate": 1.8358621806981554e-08, "loss": 0.1207, "step": 52701 }, { "epoch": 0.9160944914738653, "grad_norm": 0.917666148672169, "learning_rate": 1.8351064783623472e-08, "loss": 0.1677, "step": 52702 }, { "epoch": 0.9161118740113682, "grad_norm": 1.074582314485891, "learning_rate": 1.834350928687134e-08, "loss": 0.141, "step": 52703 }, { "epoch": 0.916129256548871, "grad_norm": 2.2119875007900194, "learning_rate": 1.8335955316748975e-08, "loss": 0.2387, "step": 52704 }, { "epoch": 0.9161466390863738, "grad_norm": 1.2558628075123133, "learning_rate": 1.8328402873280413e-08, "loss": 0.1556, "step": 52705 }, { "epoch": 0.9161640216238767, "grad_norm": 2.519797055774924, "learning_rate": 1.8320851956489524e-08, "loss": 0.1243, "step": 52706 }, { "epoch": 0.9161814041613795, "grad_norm": 1.9667201488316859, "learning_rate": 1.831330256640029e-08, "loss": 0.1731, "step": 52707 }, { "epoch": 0.9161987866988823, "grad_norm": 0.8266201057325242, "learning_rate": 1.830575470303658e-08, "loss": 0.1389, "step": 52708 }, { "epoch": 0.9162161692363852, "grad_norm": 3.128086744145663, "learning_rate": 1.829820836642232e-08, "loss": 0.237, "step": 52709 }, { "epoch": 0.916233551773888, "grad_norm": 2.4303078029095055, "learning_rate": 1.829066355658154e-08, "loss": 0.1259, "step": 52710 }, { "epoch": 0.9162509343113908, "grad_norm": 0.7043918568638518, "learning_rate": 1.8283120273538065e-08, "loss": 0.1609, "step": 52711 }, { "epoch": 0.9162683168488936, "grad_norm": 1.690024304994106, "learning_rate": 1.8275578517315872e-08, "loss": 0.0781, "step": 52712 }, { "epoch": 0.9162856993863965, "grad_norm": 1.1778593784743967, "learning_rate": 1.8268038287938713e-08, "loss": 0.1092, "step": 52713 }, { "epoch": 0.9163030819238992, "grad_norm": 1.4156907483167946, "learning_rate": 1.8260499585430633e-08, "loss": 0.1283, "step": 52714 }, { "epoch": 0.916320464461402, "grad_norm": 0.9016920764316054, "learning_rate": 1.8252962409815444e-08, "loss": 0.156, "step": 52715 }, { "epoch": 0.9163378469989049, "grad_norm": 3.2941392391920976, "learning_rate": 1.8245426761117068e-08, "loss": 0.2461, "step": 52716 }, { "epoch": 0.9163552295364077, "grad_norm": 1.3899185893433401, "learning_rate": 1.8237892639359374e-08, "loss": 0.1913, "step": 52717 }, { "epoch": 0.9163726120739105, "grad_norm": 0.742602761248907, "learning_rate": 1.8230360044566295e-08, "loss": 0.1251, "step": 52718 }, { "epoch": 0.9163899946114133, "grad_norm": 1.5162155669327804, "learning_rate": 1.8222828976761696e-08, "loss": 0.1493, "step": 52719 }, { "epoch": 0.9164073771489162, "grad_norm": 1.9193396484311016, "learning_rate": 1.821529943596939e-08, "loss": 0.2058, "step": 52720 }, { "epoch": 0.916424759686419, "grad_norm": 3.0928466715754173, "learning_rate": 1.8207771422213247e-08, "loss": 0.21, "step": 52721 }, { "epoch": 0.9164421422239218, "grad_norm": 1.4089455010375755, "learning_rate": 1.8200244935517138e-08, "loss": 0.1667, "step": 52722 }, { "epoch": 0.9164595247614247, "grad_norm": 1.0377091845490245, "learning_rate": 1.819271997590499e-08, "loss": 0.1118, "step": 52723 }, { "epoch": 0.9164769072989275, "grad_norm": 1.5480160458421535, "learning_rate": 1.818519654340056e-08, "loss": 0.1693, "step": 52724 }, { "epoch": 0.9164942898364303, "grad_norm": 1.5956996880875296, "learning_rate": 1.8177674638027774e-08, "loss": 0.1289, "step": 52725 }, { "epoch": 0.9165116723739332, "grad_norm": 1.5973449621594413, "learning_rate": 1.817015425981039e-08, "loss": 0.1652, "step": 52726 }, { "epoch": 0.916529054911436, "grad_norm": 1.110253908136882, "learning_rate": 1.8162635408772275e-08, "loss": 0.1135, "step": 52727 }, { "epoch": 0.9165464374489388, "grad_norm": 1.2640673354303367, "learning_rate": 1.8155118084937305e-08, "loss": 0.1535, "step": 52728 }, { "epoch": 0.9165638199864417, "grad_norm": 1.8349369309441552, "learning_rate": 1.814760228832918e-08, "loss": 0.146, "step": 52729 }, { "epoch": 0.9165812025239445, "grad_norm": 3.19172381901398, "learning_rate": 1.8140088018971878e-08, "loss": 0.2645, "step": 52730 }, { "epoch": 0.9165985850614473, "grad_norm": 1.0992188966383936, "learning_rate": 1.8132575276889162e-08, "loss": 0.1408, "step": 52731 }, { "epoch": 0.9166159675989501, "grad_norm": 1.25743776241099, "learning_rate": 1.812506406210479e-08, "loss": 0.1649, "step": 52732 }, { "epoch": 0.916633350136453, "grad_norm": 1.9650805873334385, "learning_rate": 1.811755437464263e-08, "loss": 0.13, "step": 52733 }, { "epoch": 0.9166507326739557, "grad_norm": 1.552962729529214, "learning_rate": 1.8110046214526443e-08, "loss": 0.1598, "step": 52734 }, { "epoch": 0.9166681152114585, "grad_norm": 1.2825824815584828, "learning_rate": 1.8102539581780095e-08, "loss": 0.1036, "step": 52735 }, { "epoch": 0.9166854977489614, "grad_norm": 1.1157955578773944, "learning_rate": 1.8095034476427295e-08, "loss": 0.1472, "step": 52736 }, { "epoch": 0.9167028802864642, "grad_norm": 1.915708314189035, "learning_rate": 1.8087530898491855e-08, "loss": 0.1268, "step": 52737 }, { "epoch": 0.916720262823967, "grad_norm": 1.6170040544068323, "learning_rate": 1.808002884799764e-08, "loss": 0.1287, "step": 52738 }, { "epoch": 0.9167376453614698, "grad_norm": 0.8800253075364979, "learning_rate": 1.8072528324968307e-08, "loss": 0.1356, "step": 52739 }, { "epoch": 0.9167550278989727, "grad_norm": 1.1537547436053037, "learning_rate": 1.8065029329427663e-08, "loss": 0.1178, "step": 52740 }, { "epoch": 0.9167724104364755, "grad_norm": 1.4203909831638457, "learning_rate": 1.8057531861399468e-08, "loss": 0.1381, "step": 52741 }, { "epoch": 0.9167897929739783, "grad_norm": 1.6662231961015137, "learning_rate": 1.805003592090759e-08, "loss": 0.1305, "step": 52742 }, { "epoch": 0.9168071755114812, "grad_norm": 1.0424869275569724, "learning_rate": 1.8042541507975685e-08, "loss": 0.1198, "step": 52743 }, { "epoch": 0.916824558048984, "grad_norm": 1.3205438410610377, "learning_rate": 1.8035048622627504e-08, "loss": 0.1007, "step": 52744 }, { "epoch": 0.9168419405864868, "grad_norm": 1.3670415127285502, "learning_rate": 1.8027557264886862e-08, "loss": 0.3663, "step": 52745 }, { "epoch": 0.9168593231239897, "grad_norm": 1.219157458533064, "learning_rate": 1.802006743477741e-08, "loss": 0.1955, "step": 52746 }, { "epoch": 0.9168767056614925, "grad_norm": 1.4574316099807656, "learning_rate": 1.8012579132323013e-08, "loss": 0.2745, "step": 52747 }, { "epoch": 0.9168940881989953, "grad_norm": 1.2515623777823885, "learning_rate": 1.8005092357547213e-08, "loss": 0.1825, "step": 52748 }, { "epoch": 0.9169114707364981, "grad_norm": 1.052018923862185, "learning_rate": 1.7997607110473932e-08, "loss": 0.1491, "step": 52749 }, { "epoch": 0.916928853274001, "grad_norm": 9.615295229665422, "learning_rate": 1.7990123391126877e-08, "loss": 0.2916, "step": 52750 }, { "epoch": 0.9169462358115038, "grad_norm": 1.0311443003922651, "learning_rate": 1.7982641199529693e-08, "loss": 0.1727, "step": 52751 }, { "epoch": 0.9169636183490066, "grad_norm": 1.1822572353937197, "learning_rate": 1.7975160535706026e-08, "loss": 0.1096, "step": 52752 }, { "epoch": 0.9169810008865095, "grad_norm": 4.429500483749316, "learning_rate": 1.7967681399679746e-08, "loss": 0.213, "step": 52753 }, { "epoch": 0.9169983834240122, "grad_norm": 1.5501924485325438, "learning_rate": 1.7960203791474503e-08, "loss": 0.156, "step": 52754 }, { "epoch": 0.917015765961515, "grad_norm": 1.8456194752032051, "learning_rate": 1.7952727711113945e-08, "loss": 0.149, "step": 52755 }, { "epoch": 0.9170331484990178, "grad_norm": 5.485437317738854, "learning_rate": 1.7945253158621776e-08, "loss": 0.1989, "step": 52756 }, { "epoch": 0.9170505310365207, "grad_norm": 1.1298166350275174, "learning_rate": 1.7937780134021806e-08, "loss": 0.1122, "step": 52757 }, { "epoch": 0.9170679135740235, "grad_norm": 1.126591621897609, "learning_rate": 1.7930308637337578e-08, "loss": 0.1794, "step": 52758 }, { "epoch": 0.9170852961115263, "grad_norm": 1.197105219870669, "learning_rate": 1.7922838668592843e-08, "loss": 0.1991, "step": 52759 }, { "epoch": 0.9171026786490292, "grad_norm": 0.9981717824647153, "learning_rate": 1.79153702278112e-08, "loss": 0.1101, "step": 52760 }, { "epoch": 0.917120061186532, "grad_norm": 2.37966480804762, "learning_rate": 1.790790331501646e-08, "loss": 0.2108, "step": 52761 }, { "epoch": 0.9171374437240348, "grad_norm": 1.822843078107567, "learning_rate": 1.7900437930232158e-08, "loss": 0.1457, "step": 52762 }, { "epoch": 0.9171548262615377, "grad_norm": 1.313231556364501, "learning_rate": 1.789297407348206e-08, "loss": 0.1681, "step": 52763 }, { "epoch": 0.9171722087990405, "grad_norm": 3.32564223523257, "learning_rate": 1.788551174478975e-08, "loss": 0.1804, "step": 52764 }, { "epoch": 0.9171895913365433, "grad_norm": 1.2204435788369214, "learning_rate": 1.7878050944178937e-08, "loss": 0.1353, "step": 52765 }, { "epoch": 0.9172069738740461, "grad_norm": 1.0777311084401273, "learning_rate": 1.787059167167321e-08, "loss": 0.1397, "step": 52766 }, { "epoch": 0.917224356411549, "grad_norm": 1.7252948524337641, "learning_rate": 1.7863133927296213e-08, "loss": 0.1454, "step": 52767 }, { "epoch": 0.9172417389490518, "grad_norm": 2.6222631854453926, "learning_rate": 1.7855677711071547e-08, "loss": 0.2463, "step": 52768 }, { "epoch": 0.9172591214865546, "grad_norm": 0.727760283105462, "learning_rate": 1.7848223023023024e-08, "loss": 0.2246, "step": 52769 }, { "epoch": 0.9172765040240575, "grad_norm": 1.9037233294889917, "learning_rate": 1.784076986317412e-08, "loss": 0.139, "step": 52770 }, { "epoch": 0.9172938865615603, "grad_norm": 1.8359054171495934, "learning_rate": 1.7833318231548434e-08, "loss": 0.0919, "step": 52771 }, { "epoch": 0.9173112690990631, "grad_norm": 0.8521920274579103, "learning_rate": 1.7825868128169662e-08, "loss": 0.0966, "step": 52772 }, { "epoch": 0.9173286516365659, "grad_norm": 1.0296200146844303, "learning_rate": 1.7818419553061404e-08, "loss": 0.0983, "step": 52773 }, { "epoch": 0.9173460341740687, "grad_norm": 0.9109842530171833, "learning_rate": 1.78109725062473e-08, "loss": 0.1118, "step": 52774 }, { "epoch": 0.9173634167115715, "grad_norm": 1.391470898205359, "learning_rate": 1.7803526987750894e-08, "loss": 0.2148, "step": 52775 }, { "epoch": 0.9173807992490743, "grad_norm": 1.7862673977118266, "learning_rate": 1.7796082997595772e-08, "loss": 0.133, "step": 52776 }, { "epoch": 0.9173981817865772, "grad_norm": 1.4521980908034406, "learning_rate": 1.778864053580559e-08, "loss": 0.1146, "step": 52777 }, { "epoch": 0.91741556432408, "grad_norm": 1.2871927526067923, "learning_rate": 1.778119960240393e-08, "loss": 0.147, "step": 52778 }, { "epoch": 0.9174329468615828, "grad_norm": 1.078337102046039, "learning_rate": 1.7773760197414224e-08, "loss": 0.1368, "step": 52779 }, { "epoch": 0.9174503293990857, "grad_norm": 1.6493869733406155, "learning_rate": 1.7766322320860284e-08, "loss": 0.1423, "step": 52780 }, { "epoch": 0.9174677119365885, "grad_norm": 1.3572094763179354, "learning_rate": 1.775888597276559e-08, "loss": 0.1311, "step": 52781 }, { "epoch": 0.9174850944740913, "grad_norm": 0.8845281036472684, "learning_rate": 1.775145115315374e-08, "loss": 0.1656, "step": 52782 }, { "epoch": 0.9175024770115942, "grad_norm": 1.6500477496305712, "learning_rate": 1.7744017862048153e-08, "loss": 0.1535, "step": 52783 }, { "epoch": 0.917519859549097, "grad_norm": 1.457831412629649, "learning_rate": 1.7736586099472594e-08, "loss": 0.1203, "step": 52784 }, { "epoch": 0.9175372420865998, "grad_norm": 1.5296487797649332, "learning_rate": 1.7729155865450483e-08, "loss": 0.1316, "step": 52785 }, { "epoch": 0.9175546246241026, "grad_norm": 1.0040693136670187, "learning_rate": 1.7721727160005418e-08, "loss": 0.1208, "step": 52786 }, { "epoch": 0.9175720071616055, "grad_norm": 1.236858659558544, "learning_rate": 1.7714299983160875e-08, "loss": 0.1223, "step": 52787 }, { "epoch": 0.9175893896991083, "grad_norm": 1.3844945822468544, "learning_rate": 1.7706874334940614e-08, "loss": 0.147, "step": 52788 }, { "epoch": 0.9176067722366111, "grad_norm": 0.8779566259393395, "learning_rate": 1.7699450215367894e-08, "loss": 0.1341, "step": 52789 }, { "epoch": 0.917624154774114, "grad_norm": 2.021550785911521, "learning_rate": 1.769202762446642e-08, "loss": 0.1817, "step": 52790 }, { "epoch": 0.9176415373116168, "grad_norm": 2.3791917618965086, "learning_rate": 1.7684606562259563e-08, "loss": 0.2456, "step": 52791 }, { "epoch": 0.9176589198491196, "grad_norm": 1.0678454420136427, "learning_rate": 1.7677187028771023e-08, "loss": 0.1235, "step": 52792 }, { "epoch": 0.9176763023866223, "grad_norm": 1.3887053865569439, "learning_rate": 1.7669769024024228e-08, "loss": 0.1074, "step": 52793 }, { "epoch": 0.9176936849241252, "grad_norm": 1.4611807044626268, "learning_rate": 1.7662352548042714e-08, "loss": 0.2026, "step": 52794 }, { "epoch": 0.917711067461628, "grad_norm": 2.2125698854415345, "learning_rate": 1.7654937600850016e-08, "loss": 0.1298, "step": 52795 }, { "epoch": 0.9177284499991308, "grad_norm": 1.1970569300096765, "learning_rate": 1.764752418246951e-08, "loss": 0.1701, "step": 52796 }, { "epoch": 0.9177458325366337, "grad_norm": 1.4919232622653318, "learning_rate": 1.7640112292924835e-08, "loss": 0.1486, "step": 52797 }, { "epoch": 0.9177632150741365, "grad_norm": 1.3708959988826175, "learning_rate": 1.763270193223937e-08, "loss": 0.1348, "step": 52798 }, { "epoch": 0.9177805976116393, "grad_norm": 0.9748507433526664, "learning_rate": 1.76252931004367e-08, "loss": 0.096, "step": 52799 }, { "epoch": 0.9177979801491422, "grad_norm": 1.4170674058625539, "learning_rate": 1.7617885797540255e-08, "loss": 0.1182, "step": 52800 }, { "epoch": 0.917815362686645, "grad_norm": 1.967342636247214, "learning_rate": 1.761048002357357e-08, "loss": 0.2128, "step": 52801 }, { "epoch": 0.9178327452241478, "grad_norm": 0.7118519791278906, "learning_rate": 1.760307577855996e-08, "loss": 0.1357, "step": 52802 }, { "epoch": 0.9178501277616506, "grad_norm": 2.139925377283498, "learning_rate": 1.759567306252313e-08, "loss": 0.1646, "step": 52803 }, { "epoch": 0.9178675102991535, "grad_norm": 1.865851011346843, "learning_rate": 1.7588271875486338e-08, "loss": 0.1707, "step": 52804 }, { "epoch": 0.9178848928366563, "grad_norm": 1.5580634881598607, "learning_rate": 1.7580872217473174e-08, "loss": 0.1531, "step": 52805 }, { "epoch": 0.9179022753741591, "grad_norm": 1.345514915721412, "learning_rate": 1.7573474088506956e-08, "loss": 0.1185, "step": 52806 }, { "epoch": 0.917919657911662, "grad_norm": 1.4379187392205641, "learning_rate": 1.756607748861133e-08, "loss": 0.1018, "step": 52807 }, { "epoch": 0.9179370404491648, "grad_norm": 2.9357907153169744, "learning_rate": 1.7558682417809556e-08, "loss": 0.1008, "step": 52808 }, { "epoch": 0.9179544229866676, "grad_norm": 1.1991403483979206, "learning_rate": 1.755128887612517e-08, "loss": 0.1067, "step": 52809 }, { "epoch": 0.9179718055241705, "grad_norm": 0.9349874837408295, "learning_rate": 1.7543896863581488e-08, "loss": 0.1893, "step": 52810 }, { "epoch": 0.9179891880616733, "grad_norm": 1.3126223132108268, "learning_rate": 1.7536506380202155e-08, "loss": 0.1225, "step": 52811 }, { "epoch": 0.9180065705991761, "grad_norm": 0.6775037465781323, "learning_rate": 1.7529117426010432e-08, "loss": 0.0872, "step": 52812 }, { "epoch": 0.9180239531366788, "grad_norm": 3.1438838892416827, "learning_rate": 1.7521730001029744e-08, "loss": 0.1653, "step": 52813 }, { "epoch": 0.9180413356741817, "grad_norm": 1.0675196051339457, "learning_rate": 1.7514344105283573e-08, "loss": 0.1536, "step": 52814 }, { "epoch": 0.9180587182116845, "grad_norm": 1.5398566073919584, "learning_rate": 1.750695973879529e-08, "loss": 0.1657, "step": 52815 }, { "epoch": 0.9180761007491873, "grad_norm": 0.8523123342297607, "learning_rate": 1.749957690158832e-08, "loss": 0.1193, "step": 52816 }, { "epoch": 0.9180934832866902, "grad_norm": 2.037749682919228, "learning_rate": 1.7492195593686033e-08, "loss": 0.1333, "step": 52817 }, { "epoch": 0.918110865824193, "grad_norm": 1.1861393217985292, "learning_rate": 1.7484815815111797e-08, "loss": 0.1191, "step": 52818 }, { "epoch": 0.9181282483616958, "grad_norm": 1.4674756116740277, "learning_rate": 1.74774375658891e-08, "loss": 0.1702, "step": 52819 }, { "epoch": 0.9181456308991987, "grad_norm": 2.093457311293641, "learning_rate": 1.7470060846041303e-08, "loss": 0.1547, "step": 52820 }, { "epoch": 0.9181630134367015, "grad_norm": 1.281493276202548, "learning_rate": 1.746268565559167e-08, "loss": 0.2847, "step": 52821 }, { "epoch": 0.9181803959742043, "grad_norm": 1.1969950326180656, "learning_rate": 1.7455311994563736e-08, "loss": 0.118, "step": 52822 }, { "epoch": 0.9181977785117071, "grad_norm": 1.0869975068897775, "learning_rate": 1.7447939862980767e-08, "loss": 0.1414, "step": 52823 }, { "epoch": 0.91821516104921, "grad_norm": 1.0747219786586428, "learning_rate": 1.744056926086618e-08, "loss": 0.1076, "step": 52824 }, { "epoch": 0.9182325435867128, "grad_norm": 1.2912472220008406, "learning_rate": 1.743320018824329e-08, "loss": 0.0902, "step": 52825 }, { "epoch": 0.9182499261242156, "grad_norm": 0.9929596167079193, "learning_rate": 1.742583264513553e-08, "loss": 0.1217, "step": 52826 }, { "epoch": 0.9182673086617185, "grad_norm": 1.104521440731721, "learning_rate": 1.7418466631566153e-08, "loss": 0.2103, "step": 52827 }, { "epoch": 0.9182846911992213, "grad_norm": 0.7312009763168127, "learning_rate": 1.7411102147558588e-08, "loss": 0.1037, "step": 52828 }, { "epoch": 0.9183020737367241, "grad_norm": 1.3060605186241538, "learning_rate": 1.740373919313609e-08, "loss": 0.1651, "step": 52829 }, { "epoch": 0.918319456274227, "grad_norm": 0.9333449556163657, "learning_rate": 1.739637776832209e-08, "loss": 0.1636, "step": 52830 }, { "epoch": 0.9183368388117298, "grad_norm": 1.0640823029402469, "learning_rate": 1.73890178731399e-08, "loss": 0.3059, "step": 52831 }, { "epoch": 0.9183542213492326, "grad_norm": 1.0028696214779493, "learning_rate": 1.738165950761289e-08, "loss": 0.1202, "step": 52832 }, { "epoch": 0.9183716038867353, "grad_norm": 1.3841253205949322, "learning_rate": 1.7374302671764206e-08, "loss": 0.1533, "step": 52833 }, { "epoch": 0.9183889864242382, "grad_norm": 2.7705004621180542, "learning_rate": 1.7366947365617334e-08, "loss": 0.2437, "step": 52834 }, { "epoch": 0.918406368961741, "grad_norm": 1.4169927667033535, "learning_rate": 1.7359593589195533e-08, "loss": 0.1242, "step": 52835 }, { "epoch": 0.9184237514992438, "grad_norm": 1.540770759322244, "learning_rate": 1.735224134252211e-08, "loss": 0.2153, "step": 52836 }, { "epoch": 0.9184411340367467, "grad_norm": 2.3422057209244747, "learning_rate": 1.7344890625620334e-08, "loss": 0.131, "step": 52837 }, { "epoch": 0.9184585165742495, "grad_norm": 2.056349107635557, "learning_rate": 1.733754143851357e-08, "loss": 0.1807, "step": 52838 }, { "epoch": 0.9184758991117523, "grad_norm": 1.2153406846644579, "learning_rate": 1.7330193781225133e-08, "loss": 0.1403, "step": 52839 }, { "epoch": 0.9184932816492551, "grad_norm": 1.9314794428560402, "learning_rate": 1.7322847653778228e-08, "loss": 0.1892, "step": 52840 }, { "epoch": 0.918510664186758, "grad_norm": 1.5292091934082097, "learning_rate": 1.731550305619611e-08, "loss": 0.174, "step": 52841 }, { "epoch": 0.9185280467242608, "grad_norm": 1.7667621233547377, "learning_rate": 1.7308159988502213e-08, "loss": 0.1779, "step": 52842 }, { "epoch": 0.9185454292617636, "grad_norm": 1.022013336102581, "learning_rate": 1.730081845071968e-08, "loss": 0.1374, "step": 52843 }, { "epoch": 0.9185628117992665, "grad_norm": 1.7251666849740084, "learning_rate": 1.7293478442871823e-08, "loss": 0.1719, "step": 52844 }, { "epoch": 0.9185801943367693, "grad_norm": 0.8868659926259738, "learning_rate": 1.7286139964981906e-08, "loss": 0.0884, "step": 52845 }, { "epoch": 0.9185975768742721, "grad_norm": 0.8723893572934491, "learning_rate": 1.7278803017073185e-08, "loss": 0.1636, "step": 52846 }, { "epoch": 0.918614959411775, "grad_norm": 1.1196378922079633, "learning_rate": 1.727146759916892e-08, "loss": 0.1264, "step": 52847 }, { "epoch": 0.9186323419492778, "grad_norm": 1.3008826458347948, "learning_rate": 1.7264133711292315e-08, "loss": 0.1785, "step": 52848 }, { "epoch": 0.9186497244867806, "grad_norm": 1.3822371620478655, "learning_rate": 1.7256801353466634e-08, "loss": 0.157, "step": 52849 }, { "epoch": 0.9186671070242834, "grad_norm": 1.1274850888828474, "learning_rate": 1.7249470525715183e-08, "loss": 0.1822, "step": 52850 }, { "epoch": 0.9186844895617863, "grad_norm": 1.3397009073393042, "learning_rate": 1.7242141228061224e-08, "loss": 0.1319, "step": 52851 }, { "epoch": 0.9187018720992891, "grad_norm": 1.8607614193996926, "learning_rate": 1.72348134605278e-08, "loss": 0.1501, "step": 52852 }, { "epoch": 0.9187192546367918, "grad_norm": 0.8015080358559267, "learning_rate": 1.7227487223138325e-08, "loss": 0.208, "step": 52853 }, { "epoch": 0.9187366371742947, "grad_norm": 1.9447812260159676, "learning_rate": 1.722016251591596e-08, "loss": 0.2174, "step": 52854 }, { "epoch": 0.9187540197117975, "grad_norm": 1.266091274365574, "learning_rate": 1.7212839338883845e-08, "loss": 0.1671, "step": 52855 }, { "epoch": 0.9187714022493003, "grad_norm": 1.577786067273253, "learning_rate": 1.7205517692065295e-08, "loss": 0.1404, "step": 52856 }, { "epoch": 0.9187887847868031, "grad_norm": 1.4900665433664868, "learning_rate": 1.719819757548341e-08, "loss": 0.1126, "step": 52857 }, { "epoch": 0.918806167324306, "grad_norm": 6.3807203389512654, "learning_rate": 1.7190878989161607e-08, "loss": 0.2364, "step": 52858 }, { "epoch": 0.9188235498618088, "grad_norm": 1.5096360624408798, "learning_rate": 1.7183561933122814e-08, "loss": 0.1652, "step": 52859 }, { "epoch": 0.9188409323993116, "grad_norm": 1.13824266914437, "learning_rate": 1.7176246407390293e-08, "loss": 0.1861, "step": 52860 }, { "epoch": 0.9188583149368145, "grad_norm": 1.5608127637221554, "learning_rate": 1.7168932411987413e-08, "loss": 0.2268, "step": 52861 }, { "epoch": 0.9188756974743173, "grad_norm": 1.3272479086975821, "learning_rate": 1.7161619946937158e-08, "loss": 0.2421, "step": 52862 }, { "epoch": 0.9188930800118201, "grad_norm": 1.3230306758026344, "learning_rate": 1.715430901226278e-08, "loss": 0.1644, "step": 52863 }, { "epoch": 0.918910462549323, "grad_norm": 2.19398932837395, "learning_rate": 1.7146999607987433e-08, "loss": 0.1787, "step": 52864 }, { "epoch": 0.9189278450868258, "grad_norm": 1.7229462522973202, "learning_rate": 1.7139691734134322e-08, "loss": 0.1491, "step": 52865 }, { "epoch": 0.9189452276243286, "grad_norm": 1.2580863633356991, "learning_rate": 1.7132385390726532e-08, "loss": 0.1393, "step": 52866 }, { "epoch": 0.9189626101618315, "grad_norm": 1.3551870421057446, "learning_rate": 1.7125080577787278e-08, "loss": 0.1268, "step": 52867 }, { "epoch": 0.9189799926993343, "grad_norm": 1.4654283472016476, "learning_rate": 1.7117777295339696e-08, "loss": 0.1014, "step": 52868 }, { "epoch": 0.9189973752368371, "grad_norm": 1.657367845102839, "learning_rate": 1.7110475543406945e-08, "loss": 0.207, "step": 52869 }, { "epoch": 0.9190147577743399, "grad_norm": 1.5052445175988558, "learning_rate": 1.7103175322012274e-08, "loss": 0.1105, "step": 52870 }, { "epoch": 0.9190321403118428, "grad_norm": 1.3184338667488609, "learning_rate": 1.7095876631178618e-08, "loss": 0.1569, "step": 52871 }, { "epoch": 0.9190495228493456, "grad_norm": 0.9459455763761855, "learning_rate": 1.7088579470929176e-08, "loss": 0.1861, "step": 52872 }, { "epoch": 0.9190669053868483, "grad_norm": 1.0557442266963157, "learning_rate": 1.7081283841287152e-08, "loss": 0.1657, "step": 52873 }, { "epoch": 0.9190842879243512, "grad_norm": 1.2738241236672163, "learning_rate": 1.7073989742275586e-08, "loss": 0.2156, "step": 52874 }, { "epoch": 0.919101670461854, "grad_norm": 2.3047905581967902, "learning_rate": 1.7066697173917678e-08, "loss": 0.1069, "step": 52875 }, { "epoch": 0.9191190529993568, "grad_norm": 1.4602908281227045, "learning_rate": 1.705940613623641e-08, "loss": 0.1338, "step": 52876 }, { "epoch": 0.9191364355368596, "grad_norm": 1.819647962260842, "learning_rate": 1.7052116629255152e-08, "loss": 0.1502, "step": 52877 }, { "epoch": 0.9191538180743625, "grad_norm": 1.4733597992425564, "learning_rate": 1.7044828652996724e-08, "loss": 0.1049, "step": 52878 }, { "epoch": 0.9191712006118653, "grad_norm": 1.0566389955824116, "learning_rate": 1.7037542207484323e-08, "loss": 0.1472, "step": 52879 }, { "epoch": 0.9191885831493681, "grad_norm": 1.1195374953373065, "learning_rate": 1.7030257292741045e-08, "loss": 0.1505, "step": 52880 }, { "epoch": 0.919205965686871, "grad_norm": 1.1430739875066687, "learning_rate": 1.7022973908790038e-08, "loss": 0.093, "step": 52881 }, { "epoch": 0.9192233482243738, "grad_norm": 1.2295689040447904, "learning_rate": 1.701569205565434e-08, "loss": 0.096, "step": 52882 }, { "epoch": 0.9192407307618766, "grad_norm": 1.4475235513098963, "learning_rate": 1.700841173335704e-08, "loss": 0.1144, "step": 52883 }, { "epoch": 0.9192581132993795, "grad_norm": 1.4289040446167993, "learning_rate": 1.700113294192118e-08, "loss": 0.1522, "step": 52884 }, { "epoch": 0.9192754958368823, "grad_norm": 1.7399730205081188, "learning_rate": 1.6993855681369906e-08, "loss": 0.1524, "step": 52885 }, { "epoch": 0.9192928783743851, "grad_norm": 2.4342402536628165, "learning_rate": 1.69865799517262e-08, "loss": 0.1824, "step": 52886 }, { "epoch": 0.9193102609118879, "grad_norm": 1.066232366679295, "learning_rate": 1.6979305753013095e-08, "loss": 0.1202, "step": 52887 }, { "epoch": 0.9193276434493908, "grad_norm": 2.2244117357156687, "learning_rate": 1.6972033085253802e-08, "loss": 0.196, "step": 52888 }, { "epoch": 0.9193450259868936, "grad_norm": 0.8080335245954401, "learning_rate": 1.6964761948471297e-08, "loss": 0.1258, "step": 52889 }, { "epoch": 0.9193624085243964, "grad_norm": 1.232270299244943, "learning_rate": 1.6957492342688562e-08, "loss": 0.1248, "step": 52890 }, { "epoch": 0.9193797910618993, "grad_norm": 1.6253475158674693, "learning_rate": 1.6950224267928636e-08, "loss": 0.1489, "step": 52891 }, { "epoch": 0.9193971735994021, "grad_norm": 1.1479918984654498, "learning_rate": 1.6942957724214667e-08, "loss": 0.1347, "step": 52892 }, { "epoch": 0.9194145561369048, "grad_norm": 2.169298036004215, "learning_rate": 1.693569271156964e-08, "loss": 0.1273, "step": 52893 }, { "epoch": 0.9194319386744076, "grad_norm": 2.352191702705151, "learning_rate": 1.6928429230016527e-08, "loss": 0.197, "step": 52894 }, { "epoch": 0.9194493212119105, "grad_norm": 1.7856637207429622, "learning_rate": 1.6921167279578375e-08, "loss": 0.1335, "step": 52895 }, { "epoch": 0.9194667037494133, "grad_norm": 1.1652576391511111, "learning_rate": 1.691390686027827e-08, "loss": 0.1263, "step": 52896 }, { "epoch": 0.9194840862869161, "grad_norm": 1.2504774297800791, "learning_rate": 1.690664797213914e-08, "loss": 0.0977, "step": 52897 }, { "epoch": 0.919501468824419, "grad_norm": 1.9431270958604068, "learning_rate": 1.689939061518403e-08, "loss": 0.2044, "step": 52898 }, { "epoch": 0.9195188513619218, "grad_norm": 1.234867301398458, "learning_rate": 1.689213478943585e-08, "loss": 0.1616, "step": 52899 }, { "epoch": 0.9195362338994246, "grad_norm": 0.848089835892511, "learning_rate": 1.6884880494917765e-08, "loss": 0.1264, "step": 52900 }, { "epoch": 0.9195536164369275, "grad_norm": 0.956219354223157, "learning_rate": 1.687762773165263e-08, "loss": 0.1045, "step": 52901 }, { "epoch": 0.9195709989744303, "grad_norm": 0.9988805214379026, "learning_rate": 1.6870376499663553e-08, "loss": 0.1449, "step": 52902 }, { "epoch": 0.9195883815119331, "grad_norm": 1.9164856880916687, "learning_rate": 1.6863126798973447e-08, "loss": 0.1422, "step": 52903 }, { "epoch": 0.919605764049436, "grad_norm": 2.100280969752492, "learning_rate": 1.6855878629605247e-08, "loss": 0.1626, "step": 52904 }, { "epoch": 0.9196231465869388, "grad_norm": 1.6609643025019536, "learning_rate": 1.684863199158204e-08, "loss": 0.1162, "step": 52905 }, { "epoch": 0.9196405291244416, "grad_norm": 1.524957788800369, "learning_rate": 1.6841386884926646e-08, "loss": 0.1891, "step": 52906 }, { "epoch": 0.9196579116619444, "grad_norm": 1.712078009984479, "learning_rate": 1.6834143309662096e-08, "loss": 0.1055, "step": 52907 }, { "epoch": 0.9196752941994473, "grad_norm": 1.605747836963795, "learning_rate": 1.682690126581149e-08, "loss": 0.1677, "step": 52908 }, { "epoch": 0.9196926767369501, "grad_norm": 1.5224127408740753, "learning_rate": 1.681966075339758e-08, "loss": 0.1454, "step": 52909 }, { "epoch": 0.9197100592744529, "grad_norm": 2.001651250369748, "learning_rate": 1.6812421772443352e-08, "loss": 0.1536, "step": 52910 }, { "epoch": 0.9197274418119558, "grad_norm": 1.4637708025358684, "learning_rate": 1.6805184322971843e-08, "loss": 0.1105, "step": 52911 }, { "epoch": 0.9197448243494585, "grad_norm": 1.6031178226789835, "learning_rate": 1.6797948405005924e-08, "loss": 0.1451, "step": 52912 }, { "epoch": 0.9197622068869613, "grad_norm": 0.7263682352391362, "learning_rate": 1.6790714018568576e-08, "loss": 0.0754, "step": 52913 }, { "epoch": 0.9197795894244641, "grad_norm": 1.6688787743896525, "learning_rate": 1.6783481163682668e-08, "loss": 0.1617, "step": 52914 }, { "epoch": 0.919796971961967, "grad_norm": 1.4027225045771259, "learning_rate": 1.6776249840371127e-08, "loss": 0.1487, "step": 52915 }, { "epoch": 0.9198143544994698, "grad_norm": 1.8413825866031015, "learning_rate": 1.6769020048656933e-08, "loss": 0.2084, "step": 52916 }, { "epoch": 0.9198317370369726, "grad_norm": 1.0193272044801924, "learning_rate": 1.6761791788562962e-08, "loss": 0.1679, "step": 52917 }, { "epoch": 0.9198491195744755, "grad_norm": 1.2099496849082154, "learning_rate": 1.675456506011208e-08, "loss": 0.1595, "step": 52918 }, { "epoch": 0.9198665021119783, "grad_norm": 1.115292603545617, "learning_rate": 1.674733986332727e-08, "loss": 0.1002, "step": 52919 }, { "epoch": 0.9198838846494811, "grad_norm": 1.318052132351539, "learning_rate": 1.674011619823146e-08, "loss": 0.1804, "step": 52920 }, { "epoch": 0.919901267186984, "grad_norm": 1.5102065522103127, "learning_rate": 1.6732894064847513e-08, "loss": 0.1847, "step": 52921 }, { "epoch": 0.9199186497244868, "grad_norm": 1.5666154191936752, "learning_rate": 1.67256734631982e-08, "loss": 0.1155, "step": 52922 }, { "epoch": 0.9199360322619896, "grad_norm": 0.869007789447568, "learning_rate": 1.6718454393306548e-08, "loss": 0.1146, "step": 52923 }, { "epoch": 0.9199534147994924, "grad_norm": 1.6595226596411328, "learning_rate": 1.6711236855195376e-08, "loss": 0.1358, "step": 52924 }, { "epoch": 0.9199707973369953, "grad_norm": 2.014934216900427, "learning_rate": 1.6704020848887557e-08, "loss": 0.1435, "step": 52925 }, { "epoch": 0.9199881798744981, "grad_norm": 1.3931652953577929, "learning_rate": 1.6696806374406014e-08, "loss": 0.1449, "step": 52926 }, { "epoch": 0.9200055624120009, "grad_norm": 3.4395434272659764, "learning_rate": 1.6689593431773617e-08, "loss": 0.2479, "step": 52927 }, { "epoch": 0.9200229449495038, "grad_norm": 1.43603031008157, "learning_rate": 1.6682382021013186e-08, "loss": 0.1782, "step": 52928 }, { "epoch": 0.9200403274870066, "grad_norm": 1.6962069166354392, "learning_rate": 1.667517214214753e-08, "loss": 0.2131, "step": 52929 }, { "epoch": 0.9200577100245094, "grad_norm": 1.6196867170433187, "learning_rate": 1.666796379519958e-08, "loss": 0.1242, "step": 52930 }, { "epoch": 0.9200750925620123, "grad_norm": 0.9683057274233388, "learning_rate": 1.666075698019215e-08, "loss": 0.1347, "step": 52931 }, { "epoch": 0.920092475099515, "grad_norm": 1.8288031499437118, "learning_rate": 1.6653551697148105e-08, "loss": 0.1535, "step": 52932 }, { "epoch": 0.9201098576370178, "grad_norm": 3.2894139661773107, "learning_rate": 1.6646347946090268e-08, "loss": 0.173, "step": 52933 }, { "epoch": 0.9201272401745206, "grad_norm": 1.864764968676712, "learning_rate": 1.6639145727041503e-08, "loss": 0.1486, "step": 52934 }, { "epoch": 0.9201446227120235, "grad_norm": 1.7423216213550352, "learning_rate": 1.6631945040024575e-08, "loss": 0.187, "step": 52935 }, { "epoch": 0.9201620052495263, "grad_norm": 3.5628284479657246, "learning_rate": 1.6624745885062407e-08, "loss": 0.1614, "step": 52936 }, { "epoch": 0.9201793877870291, "grad_norm": 0.9914822783472808, "learning_rate": 1.661754826217765e-08, "loss": 0.1181, "step": 52937 }, { "epoch": 0.920196770324532, "grad_norm": 3.605397944695111, "learning_rate": 1.6610352171393227e-08, "loss": 0.1921, "step": 52938 }, { "epoch": 0.9202141528620348, "grad_norm": 1.304649466191886, "learning_rate": 1.660315761273201e-08, "loss": 0.1573, "step": 52939 }, { "epoch": 0.9202315353995376, "grad_norm": 1.7874203045982457, "learning_rate": 1.6595964586216703e-08, "loss": 0.1639, "step": 52940 }, { "epoch": 0.9202489179370404, "grad_norm": 1.2829668747360938, "learning_rate": 1.6588773091870124e-08, "loss": 0.1439, "step": 52941 }, { "epoch": 0.9202663004745433, "grad_norm": 1.5539208075050153, "learning_rate": 1.6581583129715028e-08, "loss": 0.1042, "step": 52942 }, { "epoch": 0.9202836830120461, "grad_norm": 2.9056718754669943, "learning_rate": 1.6574394699774284e-08, "loss": 0.2233, "step": 52943 }, { "epoch": 0.9203010655495489, "grad_norm": 1.9758520667123318, "learning_rate": 1.656720780207066e-08, "loss": 0.1707, "step": 52944 }, { "epoch": 0.9203184480870518, "grad_norm": 1.9519453871018266, "learning_rate": 1.6560022436626907e-08, "loss": 0.1851, "step": 52945 }, { "epoch": 0.9203358306245546, "grad_norm": 2.692561015856985, "learning_rate": 1.6552838603465845e-08, "loss": 0.1761, "step": 52946 }, { "epoch": 0.9203532131620574, "grad_norm": 1.8448493212142028, "learning_rate": 1.6545656302610178e-08, "loss": 0.1867, "step": 52947 }, { "epoch": 0.9203705956995603, "grad_norm": 3.5813562375552404, "learning_rate": 1.6538475534082664e-08, "loss": 0.1934, "step": 52948 }, { "epoch": 0.9203879782370631, "grad_norm": 0.9573588777250648, "learning_rate": 1.653129629790606e-08, "loss": 0.0946, "step": 52949 }, { "epoch": 0.9204053607745659, "grad_norm": 1.5569610609283284, "learning_rate": 1.6524118594103242e-08, "loss": 0.1338, "step": 52950 }, { "epoch": 0.9204227433120687, "grad_norm": 0.9948788597708084, "learning_rate": 1.6516942422696856e-08, "loss": 0.1406, "step": 52951 }, { "epoch": 0.9204401258495715, "grad_norm": 1.3884976786957282, "learning_rate": 1.6509767783709772e-08, "loss": 0.135, "step": 52952 }, { "epoch": 0.9204575083870743, "grad_norm": 0.9660673608326038, "learning_rate": 1.6502594677164472e-08, "loss": 0.1273, "step": 52953 }, { "epoch": 0.9204748909245771, "grad_norm": 1.6395375294811416, "learning_rate": 1.6495423103083882e-08, "loss": 0.1619, "step": 52954 }, { "epoch": 0.92049227346208, "grad_norm": 1.2470146903159005, "learning_rate": 1.648825306149071e-08, "loss": 0.1427, "step": 52955 }, { "epoch": 0.9205096559995828, "grad_norm": 0.8714902257878788, "learning_rate": 1.6481084552407708e-08, "loss": 0.1523, "step": 52956 }, { "epoch": 0.9205270385370856, "grad_norm": 1.1816431319221012, "learning_rate": 1.6473917575857476e-08, "loss": 0.1133, "step": 52957 }, { "epoch": 0.9205444210745884, "grad_norm": 2.687568892551208, "learning_rate": 1.6466752131862825e-08, "loss": 0.2041, "step": 52958 }, { "epoch": 0.9205618036120913, "grad_norm": 0.7385775994607192, "learning_rate": 1.6459588220446576e-08, "loss": 0.0959, "step": 52959 }, { "epoch": 0.9205791861495941, "grad_norm": 0.8278448425888343, "learning_rate": 1.64524258416312e-08, "loss": 0.1426, "step": 52960 }, { "epoch": 0.9205965686870969, "grad_norm": 1.2247767231343567, "learning_rate": 1.6445264995439467e-08, "loss": 0.2004, "step": 52961 }, { "epoch": 0.9206139512245998, "grad_norm": 1.7218550436132087, "learning_rate": 1.6438105681894188e-08, "loss": 0.1874, "step": 52962 }, { "epoch": 0.9206313337621026, "grad_norm": 1.4358535599046574, "learning_rate": 1.643094790101801e-08, "loss": 0.1493, "step": 52963 }, { "epoch": 0.9206487162996054, "grad_norm": 1.522148268095734, "learning_rate": 1.6423791652833586e-08, "loss": 0.2466, "step": 52964 }, { "epoch": 0.9206660988371083, "grad_norm": 1.7751877271628445, "learning_rate": 1.641663693736356e-08, "loss": 0.1565, "step": 52965 }, { "epoch": 0.9206834813746111, "grad_norm": 1.0600375560946862, "learning_rate": 1.6409483754630692e-08, "loss": 0.1816, "step": 52966 }, { "epoch": 0.9207008639121139, "grad_norm": 1.3932129404877138, "learning_rate": 1.6402332104657578e-08, "loss": 0.1288, "step": 52967 }, { "epoch": 0.9207182464496168, "grad_norm": 2.0557292421820565, "learning_rate": 1.6395181987466977e-08, "loss": 0.1657, "step": 52968 }, { "epoch": 0.9207356289871196, "grad_norm": 1.7424086847133178, "learning_rate": 1.6388033403081425e-08, "loss": 0.1586, "step": 52969 }, { "epoch": 0.9207530115246224, "grad_norm": 1.4337425941022477, "learning_rate": 1.6380886351523682e-08, "loss": 0.1338, "step": 52970 }, { "epoch": 0.9207703940621252, "grad_norm": 1.1682430793944014, "learning_rate": 1.637374083281645e-08, "loss": 0.1165, "step": 52971 }, { "epoch": 0.920787776599628, "grad_norm": 1.3896057425065094, "learning_rate": 1.636659684698222e-08, "loss": 0.2063, "step": 52972 }, { "epoch": 0.9208051591371308, "grad_norm": 1.4364648783839824, "learning_rate": 1.6359454394043737e-08, "loss": 0.1483, "step": 52973 }, { "epoch": 0.9208225416746336, "grad_norm": 1.621645600074593, "learning_rate": 1.6352313474023606e-08, "loss": 0.1653, "step": 52974 }, { "epoch": 0.9208399242121365, "grad_norm": 1.0671327961972803, "learning_rate": 1.6345174086944525e-08, "loss": 0.1446, "step": 52975 }, { "epoch": 0.9208573067496393, "grad_norm": 1.0093020865097633, "learning_rate": 1.6338036232828977e-08, "loss": 0.1812, "step": 52976 }, { "epoch": 0.9208746892871421, "grad_norm": 1.3348521112466278, "learning_rate": 1.6330899911699724e-08, "loss": 0.1724, "step": 52977 }, { "epoch": 0.9208920718246449, "grad_norm": 1.4440260276007155, "learning_rate": 1.632376512357936e-08, "loss": 0.1034, "step": 52978 }, { "epoch": 0.9209094543621478, "grad_norm": 1.0715349322400434, "learning_rate": 1.631663186849047e-08, "loss": 0.0973, "step": 52979 }, { "epoch": 0.9209268368996506, "grad_norm": 1.2350853665274857, "learning_rate": 1.63095001464556e-08, "loss": 0.1394, "step": 52980 }, { "epoch": 0.9209442194371534, "grad_norm": 1.5370007611353296, "learning_rate": 1.630236995749751e-08, "loss": 0.1683, "step": 52981 }, { "epoch": 0.9209616019746563, "grad_norm": 0.9200157046429912, "learning_rate": 1.6295241301638728e-08, "loss": 0.1503, "step": 52982 }, { "epoch": 0.9209789845121591, "grad_norm": 1.3563630048911073, "learning_rate": 1.6288114178901802e-08, "loss": 0.1412, "step": 52983 }, { "epoch": 0.9209963670496619, "grad_norm": 1.452327226790492, "learning_rate": 1.6280988589309318e-08, "loss": 0.1462, "step": 52984 }, { "epoch": 0.9210137495871648, "grad_norm": 1.5261808517316386, "learning_rate": 1.627386453288393e-08, "loss": 0.1702, "step": 52985 }, { "epoch": 0.9210311321246676, "grad_norm": 1.3020841032291568, "learning_rate": 1.6266742009648228e-08, "loss": 0.1277, "step": 52986 }, { "epoch": 0.9210485146621704, "grad_norm": 1.0494062974984313, "learning_rate": 1.625962101962469e-08, "loss": 0.1933, "step": 52987 }, { "epoch": 0.9210658971996732, "grad_norm": 1.4145134559382655, "learning_rate": 1.625250156283592e-08, "loss": 0.1426, "step": 52988 }, { "epoch": 0.9210832797371761, "grad_norm": 1.6769791606961162, "learning_rate": 1.6245383639304556e-08, "loss": 0.1467, "step": 52989 }, { "epoch": 0.9211006622746789, "grad_norm": 1.7233443461167663, "learning_rate": 1.623826724905314e-08, "loss": 0.2109, "step": 52990 }, { "epoch": 0.9211180448121817, "grad_norm": 1.972641803022995, "learning_rate": 1.6231152392104098e-08, "loss": 0.1229, "step": 52991 }, { "epoch": 0.9211354273496845, "grad_norm": 1.3415755029268137, "learning_rate": 1.6224039068480132e-08, "loss": 0.1376, "step": 52992 }, { "epoch": 0.9211528098871873, "grad_norm": 2.3823240195480384, "learning_rate": 1.621692727820373e-08, "loss": 0.1758, "step": 52993 }, { "epoch": 0.9211701924246901, "grad_norm": 1.0399854321845048, "learning_rate": 1.620981702129742e-08, "loss": 0.1435, "step": 52994 }, { "epoch": 0.921187574962193, "grad_norm": 1.361699650093818, "learning_rate": 1.6202708297783752e-08, "loss": 0.1823, "step": 52995 }, { "epoch": 0.9212049574996958, "grad_norm": 2.2368412402040985, "learning_rate": 1.6195601107685254e-08, "loss": 0.2504, "step": 52996 }, { "epoch": 0.9212223400371986, "grad_norm": 1.8787177838718654, "learning_rate": 1.618849545102452e-08, "loss": 0.1683, "step": 52997 }, { "epoch": 0.9212397225747014, "grad_norm": 1.1469308057711207, "learning_rate": 1.618139132782398e-08, "loss": 0.1529, "step": 52998 }, { "epoch": 0.9212571051122043, "grad_norm": 1.8111642926865477, "learning_rate": 1.617428873810611e-08, "loss": 0.192, "step": 52999 }, { "epoch": 0.9212744876497071, "grad_norm": 1.1881735011247219, "learning_rate": 1.616718768189357e-08, "loss": 0.1407, "step": 53000 }, { "epoch": 0.9212918701872099, "grad_norm": 1.3482561475202735, "learning_rate": 1.6160088159208775e-08, "loss": 0.2783, "step": 53001 }, { "epoch": 0.9213092527247128, "grad_norm": 1.9868710316560705, "learning_rate": 1.6152990170074266e-08, "loss": 0.173, "step": 53002 }, { "epoch": 0.9213266352622156, "grad_norm": 1.1739232524921972, "learning_rate": 1.6145893714512525e-08, "loss": 0.1198, "step": 53003 }, { "epoch": 0.9213440177997184, "grad_norm": 0.8922470346245897, "learning_rate": 1.6138798792545983e-08, "loss": 0.0988, "step": 53004 }, { "epoch": 0.9213614003372212, "grad_norm": 1.8502223002446818, "learning_rate": 1.6131705404197227e-08, "loss": 0.2134, "step": 53005 }, { "epoch": 0.9213787828747241, "grad_norm": 0.881998527092122, "learning_rate": 1.6124613549488685e-08, "loss": 0.1997, "step": 53006 }, { "epoch": 0.9213961654122269, "grad_norm": 1.1302280417727915, "learning_rate": 1.6117523228442787e-08, "loss": 0.1298, "step": 53007 }, { "epoch": 0.9214135479497297, "grad_norm": 2.1306759163000053, "learning_rate": 1.611043444108212e-08, "loss": 0.1628, "step": 53008 }, { "epoch": 0.9214309304872326, "grad_norm": 1.900640500925661, "learning_rate": 1.6103347187429118e-08, "loss": 0.1684, "step": 53009 }, { "epoch": 0.9214483130247354, "grad_norm": 1.9070540669702147, "learning_rate": 1.60962614675062e-08, "loss": 0.1539, "step": 53010 }, { "epoch": 0.9214656955622382, "grad_norm": 1.7363838893678543, "learning_rate": 1.6089177281335796e-08, "loss": 0.1376, "step": 53011 }, { "epoch": 0.921483078099741, "grad_norm": 1.6360205042942628, "learning_rate": 1.6082094628940447e-08, "loss": 0.1382, "step": 53012 }, { "epoch": 0.9215004606372438, "grad_norm": 1.2991075917160622, "learning_rate": 1.6075013510342573e-08, "loss": 0.1778, "step": 53013 }, { "epoch": 0.9215178431747466, "grad_norm": 2.051020152532858, "learning_rate": 1.60679339255646e-08, "loss": 0.211, "step": 53014 }, { "epoch": 0.9215352257122494, "grad_norm": 1.3074940952472007, "learning_rate": 1.6060855874628964e-08, "loss": 0.2627, "step": 53015 }, { "epoch": 0.9215526082497523, "grad_norm": 1.1732698037941618, "learning_rate": 1.605377935755814e-08, "loss": 0.1175, "step": 53016 }, { "epoch": 0.9215699907872551, "grad_norm": 1.0003123833369747, "learning_rate": 1.6046704374374497e-08, "loss": 0.2094, "step": 53017 }, { "epoch": 0.9215873733247579, "grad_norm": 1.7984404785895713, "learning_rate": 1.603963092510052e-08, "loss": 0.2444, "step": 53018 }, { "epoch": 0.9216047558622608, "grad_norm": 1.2856499002700639, "learning_rate": 1.6032559009758528e-08, "loss": 0.2226, "step": 53019 }, { "epoch": 0.9216221383997636, "grad_norm": 0.8831025539145293, "learning_rate": 1.6025488628371054e-08, "loss": 0.1501, "step": 53020 }, { "epoch": 0.9216395209372664, "grad_norm": 2.000496530791112, "learning_rate": 1.6018419780960522e-08, "loss": 0.1973, "step": 53021 }, { "epoch": 0.9216569034747693, "grad_norm": 1.3049356540509733, "learning_rate": 1.60113524675492e-08, "loss": 0.123, "step": 53022 }, { "epoch": 0.9216742860122721, "grad_norm": 2.435343544418061, "learning_rate": 1.6004286688159617e-08, "loss": 0.1995, "step": 53023 }, { "epoch": 0.9216916685497749, "grad_norm": 2.0466793044953344, "learning_rate": 1.5997222442814097e-08, "loss": 0.1872, "step": 53024 }, { "epoch": 0.9217090510872777, "grad_norm": 1.7604003056430917, "learning_rate": 1.599015973153506e-08, "loss": 0.1617, "step": 53025 }, { "epoch": 0.9217264336247806, "grad_norm": 1.5027499494454482, "learning_rate": 1.598309855434482e-08, "loss": 0.1918, "step": 53026 }, { "epoch": 0.9217438161622834, "grad_norm": 1.7220189978553844, "learning_rate": 1.597603891126581e-08, "loss": 0.2069, "step": 53027 }, { "epoch": 0.9217611986997862, "grad_norm": 1.4747838148327008, "learning_rate": 1.5968980802320565e-08, "loss": 0.2327, "step": 53028 }, { "epoch": 0.9217785812372891, "grad_norm": 3.0427003609467373, "learning_rate": 1.5961924227531175e-08, "loss": 0.2361, "step": 53029 }, { "epoch": 0.9217959637747919, "grad_norm": 2.9200393204522896, "learning_rate": 1.5954869186920128e-08, "loss": 0.2371, "step": 53030 }, { "epoch": 0.9218133463122947, "grad_norm": 0.8881242311481026, "learning_rate": 1.5947815680509845e-08, "loss": 0.1249, "step": 53031 }, { "epoch": 0.9218307288497974, "grad_norm": 1.0252101922341361, "learning_rate": 1.5940763708322647e-08, "loss": 0.1251, "step": 53032 }, { "epoch": 0.9218481113873003, "grad_norm": 1.6650705981760765, "learning_rate": 1.5933713270380845e-08, "loss": 0.1761, "step": 53033 }, { "epoch": 0.9218654939248031, "grad_norm": 0.9103138747108975, "learning_rate": 1.592666436670681e-08, "loss": 0.1081, "step": 53034 }, { "epoch": 0.9218828764623059, "grad_norm": 1.449779339958647, "learning_rate": 1.591961699732286e-08, "loss": 0.1724, "step": 53035 }, { "epoch": 0.9219002589998088, "grad_norm": 1.2641796165847554, "learning_rate": 1.5912571162251365e-08, "loss": 0.19, "step": 53036 }, { "epoch": 0.9219176415373116, "grad_norm": 1.115988741684749, "learning_rate": 1.5905526861514694e-08, "loss": 0.1725, "step": 53037 }, { "epoch": 0.9219350240748144, "grad_norm": 1.2391346844831648, "learning_rate": 1.5898484095135055e-08, "loss": 0.1691, "step": 53038 }, { "epoch": 0.9219524066123173, "grad_norm": 1.6525387592955754, "learning_rate": 1.5891442863134873e-08, "loss": 0.2061, "step": 53039 }, { "epoch": 0.9219697891498201, "grad_norm": 1.3248002248795148, "learning_rate": 1.5884403165536464e-08, "loss": 0.1819, "step": 53040 }, { "epoch": 0.9219871716873229, "grad_norm": 2.163639107104193, "learning_rate": 1.587736500236214e-08, "loss": 0.1621, "step": 53041 }, { "epoch": 0.9220045542248257, "grad_norm": 1.1349675630200475, "learning_rate": 1.5870328373634056e-08, "loss": 0.2367, "step": 53042 }, { "epoch": 0.9220219367623286, "grad_norm": 1.5727460974155545, "learning_rate": 1.5863293279374745e-08, "loss": 0.1156, "step": 53043 }, { "epoch": 0.9220393192998314, "grad_norm": 1.364960822494193, "learning_rate": 1.5856259719606356e-08, "loss": 0.177, "step": 53044 }, { "epoch": 0.9220567018373342, "grad_norm": 1.2794854480631193, "learning_rate": 1.5849227694351263e-08, "loss": 0.1302, "step": 53045 }, { "epoch": 0.9220740843748371, "grad_norm": 1.3316173297408511, "learning_rate": 1.5842197203631668e-08, "loss": 0.1327, "step": 53046 }, { "epoch": 0.9220914669123399, "grad_norm": 0.9057086930305995, "learning_rate": 1.583516824747e-08, "loss": 0.1314, "step": 53047 }, { "epoch": 0.9221088494498427, "grad_norm": 1.4861677946112872, "learning_rate": 1.5828140825888347e-08, "loss": 0.1925, "step": 53048 }, { "epoch": 0.9221262319873456, "grad_norm": 2.2342355491680763, "learning_rate": 1.5821114938909142e-08, "loss": 0.1728, "step": 53049 }, { "epoch": 0.9221436145248484, "grad_norm": 5.289949942875424, "learning_rate": 1.5814090586554475e-08, "loss": 0.3535, "step": 53050 }, { "epoch": 0.9221609970623511, "grad_norm": 1.3484372973806453, "learning_rate": 1.5807067768846825e-08, "loss": 0.1079, "step": 53051 }, { "epoch": 0.9221783795998539, "grad_norm": 0.7590282609613975, "learning_rate": 1.5800046485808293e-08, "loss": 0.1052, "step": 53052 }, { "epoch": 0.9221957621373568, "grad_norm": 1.3754659120757906, "learning_rate": 1.5793026737461245e-08, "loss": 0.1405, "step": 53053 }, { "epoch": 0.9222131446748596, "grad_norm": 1.8677016974718834, "learning_rate": 1.5786008523827888e-08, "loss": 0.1633, "step": 53054 }, { "epoch": 0.9222305272123624, "grad_norm": 1.0437925722355879, "learning_rate": 1.5778991844930422e-08, "loss": 0.1343, "step": 53055 }, { "epoch": 0.9222479097498653, "grad_norm": 1.5907758854231504, "learning_rate": 1.5771976700791114e-08, "loss": 0.2416, "step": 53056 }, { "epoch": 0.9222652922873681, "grad_norm": 1.5910557483676995, "learning_rate": 1.5764963091432214e-08, "loss": 0.2256, "step": 53057 }, { "epoch": 0.9222826748248709, "grad_norm": 2.0887408933989557, "learning_rate": 1.575795101687588e-08, "loss": 0.1233, "step": 53058 }, { "epoch": 0.9223000573623737, "grad_norm": 1.2111830051071686, "learning_rate": 1.5750940477144426e-08, "loss": 0.145, "step": 53059 }, { "epoch": 0.9223174398998766, "grad_norm": 1.0229177466006838, "learning_rate": 1.5743931472260162e-08, "loss": 0.2006, "step": 53060 }, { "epoch": 0.9223348224373794, "grad_norm": 0.8947715262081428, "learning_rate": 1.573692400224502e-08, "loss": 0.1912, "step": 53061 }, { "epoch": 0.9223522049748822, "grad_norm": 1.4635968348275468, "learning_rate": 1.5729918067121484e-08, "loss": 0.1816, "step": 53062 }, { "epoch": 0.9223695875123851, "grad_norm": 1.0287463843123268, "learning_rate": 1.5722913666911642e-08, "loss": 0.1633, "step": 53063 }, { "epoch": 0.9223869700498879, "grad_norm": 1.028691023312304, "learning_rate": 1.5715910801637644e-08, "loss": 0.1552, "step": 53064 }, { "epoch": 0.9224043525873907, "grad_norm": 0.6726141520769371, "learning_rate": 1.5708909471321808e-08, "loss": 0.1146, "step": 53065 }, { "epoch": 0.9224217351248936, "grad_norm": 0.9364544497273831, "learning_rate": 1.5701909675986282e-08, "loss": 0.1058, "step": 53066 }, { "epoch": 0.9224391176623964, "grad_norm": 1.6526915839316807, "learning_rate": 1.5694911415653156e-08, "loss": 0.131, "step": 53067 }, { "epoch": 0.9224565001998992, "grad_norm": 0.953943592430256, "learning_rate": 1.568791469034475e-08, "loss": 0.123, "step": 53068 }, { "epoch": 0.922473882737402, "grad_norm": 1.3551807761882255, "learning_rate": 1.56809195000831e-08, "loss": 0.2231, "step": 53069 }, { "epoch": 0.9224912652749049, "grad_norm": 1.2264027353665097, "learning_rate": 1.5673925844890524e-08, "loss": 0.1123, "step": 53070 }, { "epoch": 0.9225086478124076, "grad_norm": 1.0485112141349373, "learning_rate": 1.5666933724789112e-08, "loss": 0.1059, "step": 53071 }, { "epoch": 0.9225260303499104, "grad_norm": 1.5623699657471302, "learning_rate": 1.5659943139801125e-08, "loss": 0.1513, "step": 53072 }, { "epoch": 0.9225434128874133, "grad_norm": 2.0958216043549673, "learning_rate": 1.565295408994849e-08, "loss": 0.2018, "step": 53073 }, { "epoch": 0.9225607954249161, "grad_norm": 2.1168733755075446, "learning_rate": 1.564596657525352e-08, "loss": 0.163, "step": 53074 }, { "epoch": 0.9225781779624189, "grad_norm": 1.538292818974339, "learning_rate": 1.5638980595738427e-08, "loss": 0.17, "step": 53075 }, { "epoch": 0.9225955604999218, "grad_norm": 1.953485992367219, "learning_rate": 1.5631996151425188e-08, "loss": 0.2086, "step": 53076 }, { "epoch": 0.9226129430374246, "grad_norm": 1.1682412319982585, "learning_rate": 1.5625013242336004e-08, "loss": 0.1113, "step": 53077 }, { "epoch": 0.9226303255749274, "grad_norm": 6.0419304844421555, "learning_rate": 1.5618031868493087e-08, "loss": 0.1652, "step": 53078 }, { "epoch": 0.9226477081124302, "grad_norm": 1.7765332079821778, "learning_rate": 1.5611052029918527e-08, "loss": 0.1742, "step": 53079 }, { "epoch": 0.9226650906499331, "grad_norm": 1.4128051793835326, "learning_rate": 1.560407372663436e-08, "loss": 0.1435, "step": 53080 }, { "epoch": 0.9226824731874359, "grad_norm": 0.8952950206629701, "learning_rate": 1.5597096958662792e-08, "loss": 0.107, "step": 53081 }, { "epoch": 0.9226998557249387, "grad_norm": 0.8915504113956876, "learning_rate": 1.559012172602586e-08, "loss": 0.1291, "step": 53082 }, { "epoch": 0.9227172382624416, "grad_norm": 1.127059705109251, "learning_rate": 1.5583148028745773e-08, "loss": 0.1094, "step": 53083 }, { "epoch": 0.9227346207999444, "grad_norm": 1.238334942084071, "learning_rate": 1.557617586684462e-08, "loss": 0.1744, "step": 53084 }, { "epoch": 0.9227520033374472, "grad_norm": 1.2344680962624988, "learning_rate": 1.5569205240344386e-08, "loss": 0.1239, "step": 53085 }, { "epoch": 0.92276938587495, "grad_norm": 1.295045909610216, "learning_rate": 1.5562236149267272e-08, "loss": 0.1189, "step": 53086 }, { "epoch": 0.9227867684124529, "grad_norm": 1.2257914732428752, "learning_rate": 1.5555268593635374e-08, "loss": 0.1657, "step": 53087 }, { "epoch": 0.9228041509499557, "grad_norm": 1.3591729033066393, "learning_rate": 1.5548302573470672e-08, "loss": 0.2854, "step": 53088 }, { "epoch": 0.9228215334874585, "grad_norm": 1.0168523800715095, "learning_rate": 1.554133808879532e-08, "loss": 0.1537, "step": 53089 }, { "epoch": 0.9228389160249614, "grad_norm": 1.3946074244257431, "learning_rate": 1.5534375139631405e-08, "loss": 0.1143, "step": 53090 }, { "epoch": 0.9228562985624641, "grad_norm": 1.3405525374430174, "learning_rate": 1.552741372600108e-08, "loss": 0.2227, "step": 53091 }, { "epoch": 0.9228736810999669, "grad_norm": 1.2017299778783899, "learning_rate": 1.552045384792616e-08, "loss": 0.1274, "step": 53092 }, { "epoch": 0.9228910636374698, "grad_norm": 0.9952317886384479, "learning_rate": 1.551349550542885e-08, "loss": 0.1366, "step": 53093 }, { "epoch": 0.9229084461749726, "grad_norm": 1.7719263954374473, "learning_rate": 1.5506538698531302e-08, "loss": 0.1721, "step": 53094 }, { "epoch": 0.9229258287124754, "grad_norm": 1.5486434407747236, "learning_rate": 1.549958342725538e-08, "loss": 0.1385, "step": 53095 }, { "epoch": 0.9229432112499782, "grad_norm": 1.3158018143846582, "learning_rate": 1.549262969162324e-08, "loss": 0.1455, "step": 53096 }, { "epoch": 0.9229605937874811, "grad_norm": 1.4432746779593058, "learning_rate": 1.5485677491656913e-08, "loss": 0.1323, "step": 53097 }, { "epoch": 0.9229779763249839, "grad_norm": 1.8827095991997524, "learning_rate": 1.5478726827378442e-08, "loss": 0.144, "step": 53098 }, { "epoch": 0.9229953588624867, "grad_norm": 2.9913584809529157, "learning_rate": 1.5471777698809864e-08, "loss": 0.1817, "step": 53099 }, { "epoch": 0.9230127413999896, "grad_norm": 1.0467606402686662, "learning_rate": 1.5464830105973048e-08, "loss": 0.1543, "step": 53100 }, { "epoch": 0.9230301239374924, "grad_norm": 1.0153371363907735, "learning_rate": 1.54578840488902e-08, "loss": 0.1799, "step": 53101 }, { "epoch": 0.9230475064749952, "grad_norm": 1.6327893297794445, "learning_rate": 1.5450939527583306e-08, "loss": 0.1319, "step": 53102 }, { "epoch": 0.9230648890124981, "grad_norm": 2.0294086547945507, "learning_rate": 1.54439965420744e-08, "loss": 0.1353, "step": 53103 }, { "epoch": 0.9230822715500009, "grad_norm": 1.6133006150536144, "learning_rate": 1.5437055092385353e-08, "loss": 0.0935, "step": 53104 }, { "epoch": 0.9230996540875037, "grad_norm": 1.1923701494944778, "learning_rate": 1.5430115178538316e-08, "loss": 0.1248, "step": 53105 }, { "epoch": 0.9231170366250065, "grad_norm": 1.0251305539520326, "learning_rate": 1.542317680055516e-08, "loss": 0.169, "step": 53106 }, { "epoch": 0.9231344191625094, "grad_norm": 0.8164067096869515, "learning_rate": 1.5416239958457978e-08, "loss": 0.1451, "step": 53107 }, { "epoch": 0.9231518017000122, "grad_norm": 0.8831007408589592, "learning_rate": 1.540930465226864e-08, "loss": 0.1581, "step": 53108 }, { "epoch": 0.923169184237515, "grad_norm": 1.160127372844699, "learning_rate": 1.5402370882009297e-08, "loss": 0.0944, "step": 53109 }, { "epoch": 0.9231865667750179, "grad_norm": 0.8869775853138246, "learning_rate": 1.539543864770182e-08, "loss": 0.157, "step": 53110 }, { "epoch": 0.9232039493125206, "grad_norm": 1.2622587167476664, "learning_rate": 1.5388507949368134e-08, "loss": 0.1852, "step": 53111 }, { "epoch": 0.9232213318500234, "grad_norm": 1.0442539345369313, "learning_rate": 1.5381578787030337e-08, "loss": 0.1698, "step": 53112 }, { "epoch": 0.9232387143875262, "grad_norm": 1.5028520834461074, "learning_rate": 1.5374651160710296e-08, "loss": 0.116, "step": 53113 }, { "epoch": 0.9232560969250291, "grad_norm": 1.3857062123484873, "learning_rate": 1.5367725070429993e-08, "loss": 0.154, "step": 53114 }, { "epoch": 0.9232734794625319, "grad_norm": 5.371252132223151, "learning_rate": 1.536080051621136e-08, "loss": 0.1747, "step": 53115 }, { "epoch": 0.9232908620000347, "grad_norm": 1.1951747470312146, "learning_rate": 1.5353877498076317e-08, "loss": 0.1515, "step": 53116 }, { "epoch": 0.9233082445375376, "grad_norm": 0.7370217324395008, "learning_rate": 1.5346956016046964e-08, "loss": 0.1375, "step": 53117 }, { "epoch": 0.9233256270750404, "grad_norm": 1.0835404577187757, "learning_rate": 1.5340036070145113e-08, "loss": 0.1384, "step": 53118 }, { "epoch": 0.9233430096125432, "grad_norm": 1.1437301899596235, "learning_rate": 1.533311766039258e-08, "loss": 0.1116, "step": 53119 }, { "epoch": 0.9233603921500461, "grad_norm": 1.550746351141273, "learning_rate": 1.532620078681157e-08, "loss": 0.1327, "step": 53120 }, { "epoch": 0.9233777746875489, "grad_norm": 1.4214702157691528, "learning_rate": 1.531928544942379e-08, "loss": 0.0963, "step": 53121 }, { "epoch": 0.9233951572250517, "grad_norm": 1.0663750796122946, "learning_rate": 1.5312371648251276e-08, "loss": 0.1029, "step": 53122 }, { "epoch": 0.9234125397625546, "grad_norm": 1.2598580328555906, "learning_rate": 1.53054593833159e-08, "loss": 0.2171, "step": 53123 }, { "epoch": 0.9234299223000574, "grad_norm": 1.3388892581506595, "learning_rate": 1.5298548654639533e-08, "loss": 0.1444, "step": 53124 }, { "epoch": 0.9234473048375602, "grad_norm": 1.061719288436987, "learning_rate": 1.52916394622441e-08, "loss": 0.0928, "step": 53125 }, { "epoch": 0.923464687375063, "grad_norm": 0.9118964264932753, "learning_rate": 1.5284731806151585e-08, "loss": 0.0975, "step": 53126 }, { "epoch": 0.9234820699125659, "grad_norm": 1.6633040391044627, "learning_rate": 1.5277825686383693e-08, "loss": 0.1481, "step": 53127 }, { "epoch": 0.9234994524500687, "grad_norm": 1.3282901460545964, "learning_rate": 1.527092110296252e-08, "loss": 0.1465, "step": 53128 }, { "epoch": 0.9235168349875715, "grad_norm": 1.7813256914269837, "learning_rate": 1.5264018055909878e-08, "loss": 0.2197, "step": 53129 }, { "epoch": 0.9235342175250744, "grad_norm": 2.5175538123286847, "learning_rate": 1.5257116545247585e-08, "loss": 0.2138, "step": 53130 }, { "epoch": 0.9235516000625771, "grad_norm": 1.2930465966683105, "learning_rate": 1.525021657099751e-08, "loss": 0.2069, "step": 53131 }, { "epoch": 0.9235689826000799, "grad_norm": 0.968759834549373, "learning_rate": 1.524331813318164e-08, "loss": 0.1513, "step": 53132 }, { "epoch": 0.9235863651375827, "grad_norm": 1.611870881728778, "learning_rate": 1.5236421231821784e-08, "loss": 0.1941, "step": 53133 }, { "epoch": 0.9236037476750856, "grad_norm": 1.9173446014134279, "learning_rate": 1.5229525866939762e-08, "loss": 0.1473, "step": 53134 }, { "epoch": 0.9236211302125884, "grad_norm": 0.9308667878221377, "learning_rate": 1.5222632038557504e-08, "loss": 0.1463, "step": 53135 }, { "epoch": 0.9236385127500912, "grad_norm": 1.2987024732078896, "learning_rate": 1.5215739746696764e-08, "loss": 0.139, "step": 53136 }, { "epoch": 0.9236558952875941, "grad_norm": 1.3040250505494781, "learning_rate": 1.5208848991379473e-08, "loss": 0.1177, "step": 53137 }, { "epoch": 0.9236732778250969, "grad_norm": 1.727427508325673, "learning_rate": 1.5201959772627392e-08, "loss": 0.1146, "step": 53138 }, { "epoch": 0.9236906603625997, "grad_norm": 1.7031508861198386, "learning_rate": 1.5195072090462392e-08, "loss": 0.1892, "step": 53139 }, { "epoch": 0.9237080429001026, "grad_norm": 1.8412974967189426, "learning_rate": 1.518818594490634e-08, "loss": 0.1765, "step": 53140 }, { "epoch": 0.9237254254376054, "grad_norm": 0.9967162441875391, "learning_rate": 1.5181301335981e-08, "loss": 0.1652, "step": 53141 }, { "epoch": 0.9237428079751082, "grad_norm": 0.7378946327960096, "learning_rate": 1.51744182637083e-08, "loss": 0.1808, "step": 53142 }, { "epoch": 0.923760190512611, "grad_norm": 1.1251243167210863, "learning_rate": 1.5167536728109943e-08, "loss": 0.2098, "step": 53143 }, { "epoch": 0.9237775730501139, "grad_norm": 0.9191911236471296, "learning_rate": 1.5160656729207798e-08, "loss": 0.0918, "step": 53144 }, { "epoch": 0.9237949555876167, "grad_norm": 1.3303241093005127, "learning_rate": 1.5153778267023633e-08, "loss": 0.1565, "step": 53145 }, { "epoch": 0.9238123381251195, "grad_norm": 1.098089686244498, "learning_rate": 1.5146901341579256e-08, "loss": 0.1575, "step": 53146 }, { "epoch": 0.9238297206626224, "grad_norm": 2.0107678296833664, "learning_rate": 1.5140025952896485e-08, "loss": 0.1698, "step": 53147 }, { "epoch": 0.9238471032001252, "grad_norm": 1.3211223953984579, "learning_rate": 1.5133152100997138e-08, "loss": 0.0897, "step": 53148 }, { "epoch": 0.923864485737628, "grad_norm": 1.3415205388402796, "learning_rate": 1.5126279785902972e-08, "loss": 0.2122, "step": 53149 }, { "epoch": 0.9238818682751309, "grad_norm": 1.4465038650510482, "learning_rate": 1.5119409007635697e-08, "loss": 0.1366, "step": 53150 }, { "epoch": 0.9238992508126336, "grad_norm": 1.0868135169783126, "learning_rate": 1.5112539766217236e-08, "loss": 0.1808, "step": 53151 }, { "epoch": 0.9239166333501364, "grad_norm": 1.5901861485674391, "learning_rate": 1.510567206166924e-08, "loss": 0.1614, "step": 53152 }, { "epoch": 0.9239340158876392, "grad_norm": 1.5962520995958873, "learning_rate": 1.5098805894013523e-08, "loss": 0.1263, "step": 53153 }, { "epoch": 0.9239513984251421, "grad_norm": 1.785371920366675, "learning_rate": 1.50919412632719e-08, "loss": 0.1748, "step": 53154 }, { "epoch": 0.9239687809626449, "grad_norm": 1.4489284461036336, "learning_rate": 1.5085078169466025e-08, "loss": 0.1189, "step": 53155 }, { "epoch": 0.9239861635001477, "grad_norm": 1.115213392584781, "learning_rate": 1.507821661261771e-08, "loss": 0.1434, "step": 53156 }, { "epoch": 0.9240035460376506, "grad_norm": 1.2767275987130282, "learning_rate": 1.5071356592748717e-08, "loss": 0.1309, "step": 53157 }, { "epoch": 0.9240209285751534, "grad_norm": 2.8804523725233224, "learning_rate": 1.5064498109880696e-08, "loss": 0.1356, "step": 53158 }, { "epoch": 0.9240383111126562, "grad_norm": 0.9589414413290649, "learning_rate": 1.505764116403546e-08, "loss": 0.1195, "step": 53159 }, { "epoch": 0.924055693650159, "grad_norm": 1.5582428886954491, "learning_rate": 1.505078575523483e-08, "loss": 0.123, "step": 53160 }, { "epoch": 0.9240730761876619, "grad_norm": 1.6491184518334192, "learning_rate": 1.504393188350045e-08, "loss": 0.1486, "step": 53161 }, { "epoch": 0.9240904587251647, "grad_norm": 2.037376133500421, "learning_rate": 1.5037079548853914e-08, "loss": 0.1902, "step": 53162 }, { "epoch": 0.9241078412626675, "grad_norm": 1.6112270149594259, "learning_rate": 1.50302287513171e-08, "loss": 0.1565, "step": 53163 }, { "epoch": 0.9241252238001704, "grad_norm": 1.5600515078460238, "learning_rate": 1.5023379490911702e-08, "loss": 0.1515, "step": 53164 }, { "epoch": 0.9241426063376732, "grad_norm": 1.0495815004955649, "learning_rate": 1.5016531767659436e-08, "loss": 0.1528, "step": 53165 }, { "epoch": 0.924159988875176, "grad_norm": 1.676276111399897, "learning_rate": 1.5009685581581943e-08, "loss": 0.0939, "step": 53166 }, { "epoch": 0.9241773714126789, "grad_norm": 2.166593933886836, "learning_rate": 1.5002840932701043e-08, "loss": 0.1482, "step": 53167 }, { "epoch": 0.9241947539501817, "grad_norm": 1.441907059828022, "learning_rate": 1.4995997821038277e-08, "loss": 0.1915, "step": 53168 }, { "epoch": 0.9242121364876845, "grad_norm": 1.6304011241847352, "learning_rate": 1.4989156246615343e-08, "loss": 0.1522, "step": 53169 }, { "epoch": 0.9242295190251874, "grad_norm": 1.4309384942870709, "learning_rate": 1.498231620945406e-08, "loss": 0.1544, "step": 53170 }, { "epoch": 0.9242469015626901, "grad_norm": 1.2423430510938243, "learning_rate": 1.4975477709576024e-08, "loss": 0.1244, "step": 53171 }, { "epoch": 0.9242642841001929, "grad_norm": 1.5192875333002729, "learning_rate": 1.4968640747002936e-08, "loss": 0.151, "step": 53172 }, { "epoch": 0.9242816666376957, "grad_norm": 1.1275607789962092, "learning_rate": 1.496180532175645e-08, "loss": 0.1613, "step": 53173 }, { "epoch": 0.9242990491751986, "grad_norm": 1.2872870469033206, "learning_rate": 1.4954971433858264e-08, "loss": 0.1323, "step": 53174 }, { "epoch": 0.9243164317127014, "grad_norm": 1.415886405999285, "learning_rate": 1.494813908332998e-08, "loss": 0.1269, "step": 53175 }, { "epoch": 0.9243338142502042, "grad_norm": 1.0678512086045198, "learning_rate": 1.4941308270193243e-08, "loss": 0.2096, "step": 53176 }, { "epoch": 0.924351196787707, "grad_norm": 3.6168909113928045, "learning_rate": 1.4934478994469755e-08, "loss": 0.1811, "step": 53177 }, { "epoch": 0.9243685793252099, "grad_norm": 1.292941316210239, "learning_rate": 1.4927651256181172e-08, "loss": 0.1591, "step": 53178 }, { "epoch": 0.9243859618627127, "grad_norm": 1.6917307709338727, "learning_rate": 1.4920825055349086e-08, "loss": 0.1863, "step": 53179 }, { "epoch": 0.9244033444002155, "grad_norm": 2.4202847677025057, "learning_rate": 1.491400039199525e-08, "loss": 0.1685, "step": 53180 }, { "epoch": 0.9244207269377184, "grad_norm": 1.3200336463828763, "learning_rate": 1.4907177266141047e-08, "loss": 0.1639, "step": 53181 }, { "epoch": 0.9244381094752212, "grad_norm": 1.5374769875374976, "learning_rate": 1.4900355677808341e-08, "loss": 0.181, "step": 53182 }, { "epoch": 0.924455492012724, "grad_norm": 1.2121583828757507, "learning_rate": 1.4893535627018672e-08, "loss": 0.1309, "step": 53183 }, { "epoch": 0.9244728745502269, "grad_norm": 0.7952935133190999, "learning_rate": 1.488671711379369e-08, "loss": 0.1629, "step": 53184 }, { "epoch": 0.9244902570877297, "grad_norm": 1.185590913020483, "learning_rate": 1.4879900138154877e-08, "loss": 0.2183, "step": 53185 }, { "epoch": 0.9245076396252325, "grad_norm": 2.7404861256874686, "learning_rate": 1.487308470012405e-08, "loss": 0.2005, "step": 53186 }, { "epoch": 0.9245250221627354, "grad_norm": 0.9484755204469717, "learning_rate": 1.4866270799722636e-08, "loss": 0.1025, "step": 53187 }, { "epoch": 0.9245424047002382, "grad_norm": 1.5867807052748872, "learning_rate": 1.4859458436972283e-08, "loss": 0.1862, "step": 53188 }, { "epoch": 0.924559787237741, "grad_norm": 1.068188239242512, "learning_rate": 1.485264761189453e-08, "loss": 0.1089, "step": 53189 }, { "epoch": 0.9245771697752437, "grad_norm": 1.018275244574803, "learning_rate": 1.4845838324511139e-08, "loss": 0.1273, "step": 53190 }, { "epoch": 0.9245945523127466, "grad_norm": 1.6390044600977631, "learning_rate": 1.4839030574843536e-08, "loss": 0.1988, "step": 53191 }, { "epoch": 0.9246119348502494, "grad_norm": 0.9478387628974229, "learning_rate": 1.4832224362913315e-08, "loss": 0.1468, "step": 53192 }, { "epoch": 0.9246293173877522, "grad_norm": 1.2824140628394172, "learning_rate": 1.4825419688742125e-08, "loss": 0.2585, "step": 53193 }, { "epoch": 0.9246466999252551, "grad_norm": 1.4780114307902303, "learning_rate": 1.4818616552351449e-08, "loss": 0.1424, "step": 53194 }, { "epoch": 0.9246640824627579, "grad_norm": 1.3749355679188284, "learning_rate": 1.481181495376288e-08, "loss": 0.1317, "step": 53195 }, { "epoch": 0.9246814650002607, "grad_norm": 0.9471575521327288, "learning_rate": 1.4805014892998013e-08, "loss": 0.0999, "step": 53196 }, { "epoch": 0.9246988475377635, "grad_norm": 2.802786008364428, "learning_rate": 1.4798216370078332e-08, "loss": 0.1286, "step": 53197 }, { "epoch": 0.9247162300752664, "grad_norm": 1.4121300870528397, "learning_rate": 1.4791419385025428e-08, "loss": 0.155, "step": 53198 }, { "epoch": 0.9247336126127692, "grad_norm": 1.407530495432384, "learning_rate": 1.4784623937860896e-08, "loss": 0.103, "step": 53199 }, { "epoch": 0.924750995150272, "grad_norm": 2.0945633732531155, "learning_rate": 1.4777830028606108e-08, "loss": 0.1592, "step": 53200 }, { "epoch": 0.9247683776877749, "grad_norm": 1.0309229193699778, "learning_rate": 1.477103765728277e-08, "loss": 0.1447, "step": 53201 }, { "epoch": 0.9247857602252777, "grad_norm": 2.243591888039373, "learning_rate": 1.4764246823912364e-08, "loss": 0.1197, "step": 53202 }, { "epoch": 0.9248031427627805, "grad_norm": 1.2094723609645002, "learning_rate": 1.475745752851637e-08, "loss": 0.1422, "step": 53203 }, { "epoch": 0.9248205253002834, "grad_norm": 1.7487007322831067, "learning_rate": 1.4750669771116386e-08, "loss": 0.1255, "step": 53204 }, { "epoch": 0.9248379078377862, "grad_norm": 1.5375285875753066, "learning_rate": 1.4743883551733838e-08, "loss": 0.1383, "step": 53205 }, { "epoch": 0.924855290375289, "grad_norm": 1.6624450814266805, "learning_rate": 1.4737098870390263e-08, "loss": 0.1714, "step": 53206 }, { "epoch": 0.9248726729127918, "grad_norm": 1.5899394223868073, "learning_rate": 1.4730315727107145e-08, "loss": 0.1073, "step": 53207 }, { "epoch": 0.9248900554502947, "grad_norm": 0.7317522099867458, "learning_rate": 1.4723534121906023e-08, "loss": 0.1617, "step": 53208 }, { "epoch": 0.9249074379877975, "grad_norm": 1.6152828955975431, "learning_rate": 1.4716754054808378e-08, "loss": 0.1927, "step": 53209 }, { "epoch": 0.9249248205253002, "grad_norm": 0.9576497939716118, "learning_rate": 1.4709975525835694e-08, "loss": 0.1234, "step": 53210 }, { "epoch": 0.9249422030628031, "grad_norm": 1.2949022306822526, "learning_rate": 1.4703198535009564e-08, "loss": 0.1136, "step": 53211 }, { "epoch": 0.9249595856003059, "grad_norm": 1.6187674872741808, "learning_rate": 1.469642308235125e-08, "loss": 0.1703, "step": 53212 }, { "epoch": 0.9249769681378087, "grad_norm": 0.894945673738528, "learning_rate": 1.46896491678824e-08, "loss": 0.1714, "step": 53213 }, { "epoch": 0.9249943506753115, "grad_norm": 1.8878202682888992, "learning_rate": 1.4682876791624388e-08, "loss": 0.1469, "step": 53214 }, { "epoch": 0.9250117332128144, "grad_norm": 1.1119918677200686, "learning_rate": 1.467610595359875e-08, "loss": 0.143, "step": 53215 }, { "epoch": 0.9250291157503172, "grad_norm": 1.1511501132525992, "learning_rate": 1.4669336653826914e-08, "loss": 0.1943, "step": 53216 }, { "epoch": 0.92504649828782, "grad_norm": 1.9519438590166818, "learning_rate": 1.4662568892330307e-08, "loss": 0.1799, "step": 53217 }, { "epoch": 0.9250638808253229, "grad_norm": 3.060374137476071, "learning_rate": 1.4655802669130523e-08, "loss": 0.1632, "step": 53218 }, { "epoch": 0.9250812633628257, "grad_norm": 1.4008301989230747, "learning_rate": 1.4649037984248825e-08, "loss": 0.1473, "step": 53219 }, { "epoch": 0.9250986459003285, "grad_norm": 2.0131400353775932, "learning_rate": 1.4642274837706636e-08, "loss": 0.1244, "step": 53220 }, { "epoch": 0.9251160284378314, "grad_norm": 1.6750772885739904, "learning_rate": 1.4635513229525553e-08, "loss": 0.203, "step": 53221 }, { "epoch": 0.9251334109753342, "grad_norm": 0.9136495793436268, "learning_rate": 1.4628753159726947e-08, "loss": 0.16, "step": 53222 }, { "epoch": 0.925150793512837, "grad_norm": 1.1419837649106397, "learning_rate": 1.4621994628332246e-08, "loss": 0.1915, "step": 53223 }, { "epoch": 0.9251681760503399, "grad_norm": 4.558496861081713, "learning_rate": 1.461523763536282e-08, "loss": 0.2017, "step": 53224 }, { "epoch": 0.9251855585878427, "grad_norm": 2.4932136125687894, "learning_rate": 1.4608482180840153e-08, "loss": 0.1236, "step": 53225 }, { "epoch": 0.9252029411253455, "grad_norm": 1.447314968386379, "learning_rate": 1.4601728264785618e-08, "loss": 0.1321, "step": 53226 }, { "epoch": 0.9252203236628483, "grad_norm": 1.6406102672760965, "learning_rate": 1.459497588722064e-08, "loss": 0.1865, "step": 53227 }, { "epoch": 0.9252377062003512, "grad_norm": 0.9421065142072756, "learning_rate": 1.4588225048166535e-08, "loss": 0.1674, "step": 53228 }, { "epoch": 0.925255088737854, "grad_norm": 2.718446497671784, "learning_rate": 1.4581475747644845e-08, "loss": 0.1652, "step": 53229 }, { "epoch": 0.9252724712753567, "grad_norm": 2.2694586080999577, "learning_rate": 1.4574727985676938e-08, "loss": 0.215, "step": 53230 }, { "epoch": 0.9252898538128596, "grad_norm": 1.5651522006475802, "learning_rate": 1.4567981762284076e-08, "loss": 0.1336, "step": 53231 }, { "epoch": 0.9253072363503624, "grad_norm": 2.978528605214861, "learning_rate": 1.4561237077487742e-08, "loss": 0.3517, "step": 53232 }, { "epoch": 0.9253246188878652, "grad_norm": 1.2882387411974099, "learning_rate": 1.4554493931309308e-08, "loss": 0.2162, "step": 53233 }, { "epoch": 0.925342001425368, "grad_norm": 1.7365671808978127, "learning_rate": 1.4547752323770146e-08, "loss": 0.1404, "step": 53234 }, { "epoch": 0.9253593839628709, "grad_norm": 2.5693161404190574, "learning_rate": 1.4541012254891628e-08, "loss": 0.1771, "step": 53235 }, { "epoch": 0.9253767665003737, "grad_norm": 1.602404546051709, "learning_rate": 1.4534273724695012e-08, "loss": 0.1595, "step": 53236 }, { "epoch": 0.9253941490378765, "grad_norm": 1.8459297417798568, "learning_rate": 1.452753673320184e-08, "loss": 0.1086, "step": 53237 }, { "epoch": 0.9254115315753794, "grad_norm": 2.429365852835649, "learning_rate": 1.452080128043337e-08, "loss": 0.1689, "step": 53238 }, { "epoch": 0.9254289141128822, "grad_norm": 1.0608099239557478, "learning_rate": 1.451406736641081e-08, "loss": 0.1187, "step": 53239 }, { "epoch": 0.925446296650385, "grad_norm": 1.1925810619914934, "learning_rate": 1.4507334991155752e-08, "loss": 0.1508, "step": 53240 }, { "epoch": 0.9254636791878879, "grad_norm": 1.430153909710285, "learning_rate": 1.4500604154689455e-08, "loss": 0.1451, "step": 53241 }, { "epoch": 0.9254810617253907, "grad_norm": 2.0692199674863927, "learning_rate": 1.4493874857033184e-08, "loss": 0.1287, "step": 53242 }, { "epoch": 0.9254984442628935, "grad_norm": 1.1543653313442825, "learning_rate": 1.4487147098208308e-08, "loss": 0.1106, "step": 53243 }, { "epoch": 0.9255158268003963, "grad_norm": 0.9216912920052376, "learning_rate": 1.4480420878236143e-08, "loss": 0.1757, "step": 53244 }, { "epoch": 0.9255332093378992, "grad_norm": 1.6416745165273459, "learning_rate": 1.4473696197138008e-08, "loss": 0.1338, "step": 53245 }, { "epoch": 0.925550591875402, "grad_norm": 1.6868177189488738, "learning_rate": 1.4466973054935216e-08, "loss": 0.1271, "step": 53246 }, { "epoch": 0.9255679744129048, "grad_norm": 1.3160913622107853, "learning_rate": 1.4460251451649085e-08, "loss": 0.1316, "step": 53247 }, { "epoch": 0.9255853569504077, "grad_norm": 1.3525027940621683, "learning_rate": 1.4453531387300932e-08, "loss": 0.1801, "step": 53248 }, { "epoch": 0.9256027394879105, "grad_norm": 1.842269032641828, "learning_rate": 1.4446812861912072e-08, "loss": 0.1399, "step": 53249 }, { "epoch": 0.9256201220254132, "grad_norm": 1.555424660957496, "learning_rate": 1.4440095875503766e-08, "loss": 0.1733, "step": 53250 }, { "epoch": 0.925637504562916, "grad_norm": 1.553676912300134, "learning_rate": 1.4433380428097219e-08, "loss": 0.1622, "step": 53251 }, { "epoch": 0.9256548871004189, "grad_norm": 1.2386100681853287, "learning_rate": 1.442666651971386e-08, "loss": 0.1255, "step": 53252 }, { "epoch": 0.9256722696379217, "grad_norm": 2.1635012076536544, "learning_rate": 1.4419954150374947e-08, "loss": 0.1604, "step": 53253 }, { "epoch": 0.9256896521754245, "grad_norm": 1.8839104356310221, "learning_rate": 1.4413243320101686e-08, "loss": 0.1423, "step": 53254 }, { "epoch": 0.9257070347129274, "grad_norm": 1.2477908832732199, "learning_rate": 1.4406534028915395e-08, "loss": 0.1363, "step": 53255 }, { "epoch": 0.9257244172504302, "grad_norm": 1.3247324767310928, "learning_rate": 1.4399826276837334e-08, "loss": 0.1652, "step": 53256 }, { "epoch": 0.925741799787933, "grad_norm": 2.460764094841008, "learning_rate": 1.4393120063888764e-08, "loss": 0.1786, "step": 53257 }, { "epoch": 0.9257591823254359, "grad_norm": 1.4512800427041748, "learning_rate": 1.4386415390090944e-08, "loss": 0.1551, "step": 53258 }, { "epoch": 0.9257765648629387, "grad_norm": 1.845851934552928, "learning_rate": 1.4379712255465026e-08, "loss": 0.1284, "step": 53259 }, { "epoch": 0.9257939474004415, "grad_norm": 1.7538568762871656, "learning_rate": 1.4373010660032437e-08, "loss": 0.1466, "step": 53260 }, { "epoch": 0.9258113299379443, "grad_norm": 1.1211679222446407, "learning_rate": 1.436631060381427e-08, "loss": 0.1302, "step": 53261 }, { "epoch": 0.9258287124754472, "grad_norm": 1.9640567475026804, "learning_rate": 1.4359612086831896e-08, "loss": 0.1609, "step": 53262 }, { "epoch": 0.92584609501295, "grad_norm": 1.0053816944165046, "learning_rate": 1.4352915109106412e-08, "loss": 0.2306, "step": 53263 }, { "epoch": 0.9258634775504528, "grad_norm": 1.897021117038881, "learning_rate": 1.4346219670659077e-08, "loss": 0.162, "step": 53264 }, { "epoch": 0.9258808600879557, "grad_norm": 2.263063846781066, "learning_rate": 1.4339525771511151e-08, "loss": 0.1896, "step": 53265 }, { "epoch": 0.9258982426254585, "grad_norm": 2.097866783881035, "learning_rate": 1.4332833411683842e-08, "loss": 0.1666, "step": 53266 }, { "epoch": 0.9259156251629613, "grad_norm": 1.7079704381332295, "learning_rate": 1.4326142591198353e-08, "loss": 0.1957, "step": 53267 }, { "epoch": 0.9259330077004642, "grad_norm": 1.8603934912003763, "learning_rate": 1.4319453310075946e-08, "loss": 0.1809, "step": 53268 }, { "epoch": 0.925950390237967, "grad_norm": 0.9678300077926719, "learning_rate": 1.4312765568337715e-08, "loss": 0.17, "step": 53269 }, { "epoch": 0.9259677727754697, "grad_norm": 1.2136875996022165, "learning_rate": 1.4306079366004863e-08, "loss": 0.1449, "step": 53270 }, { "epoch": 0.9259851553129725, "grad_norm": 1.2359779781097622, "learning_rate": 1.4299394703098655e-08, "loss": 0.1081, "step": 53271 }, { "epoch": 0.9260025378504754, "grad_norm": 1.300560577664293, "learning_rate": 1.4292711579640293e-08, "loss": 0.1377, "step": 53272 }, { "epoch": 0.9260199203879782, "grad_norm": 1.031557760573969, "learning_rate": 1.4286029995650928e-08, "loss": 0.1483, "step": 53273 }, { "epoch": 0.926037302925481, "grad_norm": 0.9030905651963984, "learning_rate": 1.4279349951151708e-08, "loss": 0.0913, "step": 53274 }, { "epoch": 0.9260546854629839, "grad_norm": 0.7874169470238277, "learning_rate": 1.4272671446163843e-08, "loss": 0.1174, "step": 53275 }, { "epoch": 0.9260720680004867, "grad_norm": 1.6019072582219036, "learning_rate": 1.4265994480708476e-08, "loss": 0.1905, "step": 53276 }, { "epoch": 0.9260894505379895, "grad_norm": 1.7646879016548926, "learning_rate": 1.4259319054806762e-08, "loss": 0.1631, "step": 53277 }, { "epoch": 0.9261068330754924, "grad_norm": 0.7616025701054749, "learning_rate": 1.4252645168479849e-08, "loss": 0.1873, "step": 53278 }, { "epoch": 0.9261242156129952, "grad_norm": 1.1406688431700025, "learning_rate": 1.4245972821748941e-08, "loss": 0.1608, "step": 53279 }, { "epoch": 0.926141598150498, "grad_norm": 0.7007436597578298, "learning_rate": 1.4239302014635136e-08, "loss": 0.2026, "step": 53280 }, { "epoch": 0.9261589806880008, "grad_norm": 1.1911831888562043, "learning_rate": 1.423263274715969e-08, "loss": 0.1355, "step": 53281 }, { "epoch": 0.9261763632255037, "grad_norm": 1.401057779149509, "learning_rate": 1.4225965019343589e-08, "loss": 0.1178, "step": 53282 }, { "epoch": 0.9261937457630065, "grad_norm": 2.0537484966550363, "learning_rate": 1.4219298831208038e-08, "loss": 0.1876, "step": 53283 }, { "epoch": 0.9262111283005093, "grad_norm": 1.1309036034995532, "learning_rate": 1.4212634182774186e-08, "loss": 0.1951, "step": 53284 }, { "epoch": 0.9262285108380122, "grad_norm": 1.5947798062581853, "learning_rate": 1.4205971074063127e-08, "loss": 0.1404, "step": 53285 }, { "epoch": 0.926245893375515, "grad_norm": 1.6162533089992928, "learning_rate": 1.4199309505095902e-08, "loss": 0.1916, "step": 53286 }, { "epoch": 0.9262632759130178, "grad_norm": 1.136895402419355, "learning_rate": 1.4192649475893825e-08, "loss": 0.1462, "step": 53287 }, { "epoch": 0.9262806584505207, "grad_norm": 1.5515284090764871, "learning_rate": 1.4185990986477881e-08, "loss": 0.2796, "step": 53288 }, { "epoch": 0.9262980409880235, "grad_norm": 1.3951462074949699, "learning_rate": 1.4179334036869051e-08, "loss": 0.2248, "step": 53289 }, { "epoch": 0.9263154235255262, "grad_norm": 1.7220754202891082, "learning_rate": 1.4172678627088652e-08, "loss": 0.1745, "step": 53290 }, { "epoch": 0.926332806063029, "grad_norm": 1.2989684396702428, "learning_rate": 1.4166024757157724e-08, "loss": 0.1088, "step": 53291 }, { "epoch": 0.9263501886005319, "grad_norm": 1.213496980098892, "learning_rate": 1.4159372427097304e-08, "loss": 0.1545, "step": 53292 }, { "epoch": 0.9263675711380347, "grad_norm": 1.1843756216620784, "learning_rate": 1.4152721636928489e-08, "loss": 0.14, "step": 53293 }, { "epoch": 0.9263849536755375, "grad_norm": 0.9095987515744052, "learning_rate": 1.414607238667237e-08, "loss": 0.1467, "step": 53294 }, { "epoch": 0.9264023362130404, "grad_norm": 1.163000749153635, "learning_rate": 1.4139424676349986e-08, "loss": 0.1075, "step": 53295 }, { "epoch": 0.9264197187505432, "grad_norm": 1.0752129617642823, "learning_rate": 1.413277850598249e-08, "loss": 0.1456, "step": 53296 }, { "epoch": 0.926437101288046, "grad_norm": 2.018507441476938, "learning_rate": 1.4126133875590807e-08, "loss": 0.1568, "step": 53297 }, { "epoch": 0.9264544838255488, "grad_norm": 1.4649395107282166, "learning_rate": 1.4119490785196142e-08, "loss": 0.1895, "step": 53298 }, { "epoch": 0.9264718663630517, "grad_norm": 1.2665892860353503, "learning_rate": 1.4112849234819535e-08, "loss": 0.1249, "step": 53299 }, { "epoch": 0.9264892489005545, "grad_norm": 2.3973096980908166, "learning_rate": 1.4106209224482024e-08, "loss": 0.1782, "step": 53300 }, { "epoch": 0.9265066314380573, "grad_norm": 1.3401210152774754, "learning_rate": 1.4099570754204481e-08, "loss": 0.173, "step": 53301 }, { "epoch": 0.9265240139755602, "grad_norm": 2.7990468243007016, "learning_rate": 1.4092933824008224e-08, "loss": 0.1833, "step": 53302 }, { "epoch": 0.926541396513063, "grad_norm": 2.080002781566596, "learning_rate": 1.4086298433914123e-08, "loss": 0.296, "step": 53303 }, { "epoch": 0.9265587790505658, "grad_norm": 1.8238972405503702, "learning_rate": 1.4079664583943218e-08, "loss": 0.2078, "step": 53304 }, { "epoch": 0.9265761615880687, "grad_norm": 1.1986317687185908, "learning_rate": 1.4073032274116548e-08, "loss": 0.2085, "step": 53305 }, { "epoch": 0.9265935441255715, "grad_norm": 1.50782733161898, "learning_rate": 1.4066401504455262e-08, "loss": 0.1616, "step": 53306 }, { "epoch": 0.9266109266630743, "grad_norm": 1.1924486135709504, "learning_rate": 1.4059772274980175e-08, "loss": 0.1185, "step": 53307 }, { "epoch": 0.9266283092005771, "grad_norm": 1.3699987042697217, "learning_rate": 1.405314458571244e-08, "loss": 0.1017, "step": 53308 }, { "epoch": 0.92664569173808, "grad_norm": 1.1295361504832224, "learning_rate": 1.4046518436672927e-08, "loss": 0.1882, "step": 53309 }, { "epoch": 0.9266630742755827, "grad_norm": 2.2327642989190832, "learning_rate": 1.4039893827882731e-08, "loss": 0.1341, "step": 53310 }, { "epoch": 0.9266804568130855, "grad_norm": 0.9019622987844451, "learning_rate": 1.4033270759362892e-08, "loss": 0.1425, "step": 53311 }, { "epoch": 0.9266978393505884, "grad_norm": 0.9490010449351177, "learning_rate": 1.4026649231134335e-08, "loss": 0.1834, "step": 53312 }, { "epoch": 0.9267152218880912, "grad_norm": 1.4649095105679548, "learning_rate": 1.4020029243218101e-08, "loss": 0.1492, "step": 53313 }, { "epoch": 0.926732604425594, "grad_norm": 2.0902822890252297, "learning_rate": 1.4013410795635062e-08, "loss": 0.2023, "step": 53314 }, { "epoch": 0.9267499869630969, "grad_norm": 1.0970970446081534, "learning_rate": 1.4006793888406309e-08, "loss": 0.2102, "step": 53315 }, { "epoch": 0.9267673695005997, "grad_norm": 1.991010200225389, "learning_rate": 1.4000178521552775e-08, "loss": 0.3858, "step": 53316 }, { "epoch": 0.9267847520381025, "grad_norm": 0.8099220414073097, "learning_rate": 1.3993564695095383e-08, "loss": 0.1183, "step": 53317 }, { "epoch": 0.9268021345756053, "grad_norm": 1.1209057324816756, "learning_rate": 1.3986952409055175e-08, "loss": 0.1129, "step": 53318 }, { "epoch": 0.9268195171131082, "grad_norm": 1.4792822181178942, "learning_rate": 1.3980341663453078e-08, "loss": 0.1276, "step": 53319 }, { "epoch": 0.926836899650611, "grad_norm": 3.528709410387659, "learning_rate": 1.3973732458310017e-08, "loss": 0.1774, "step": 53320 }, { "epoch": 0.9268542821881138, "grad_norm": 0.9390319425949848, "learning_rate": 1.3967124793646922e-08, "loss": 0.1689, "step": 53321 }, { "epoch": 0.9268716647256167, "grad_norm": 1.3156992518394428, "learning_rate": 1.3960518669484833e-08, "loss": 0.1687, "step": 53322 }, { "epoch": 0.9268890472631195, "grad_norm": 1.5036949811406473, "learning_rate": 1.395391408584462e-08, "loss": 0.1884, "step": 53323 }, { "epoch": 0.9269064298006223, "grad_norm": 1.5347106600558031, "learning_rate": 1.3947311042747267e-08, "loss": 0.1402, "step": 53324 }, { "epoch": 0.9269238123381252, "grad_norm": 1.1768358806895678, "learning_rate": 1.3940709540213646e-08, "loss": 0.1303, "step": 53325 }, { "epoch": 0.926941194875628, "grad_norm": 0.8335816116326609, "learning_rate": 1.3934109578264686e-08, "loss": 0.2224, "step": 53326 }, { "epoch": 0.9269585774131308, "grad_norm": 1.6538289728793465, "learning_rate": 1.3927511156921313e-08, "loss": 0.1564, "step": 53327 }, { "epoch": 0.9269759599506336, "grad_norm": 2.7971917455636626, "learning_rate": 1.3920914276204398e-08, "loss": 0.5354, "step": 53328 }, { "epoch": 0.9269933424881365, "grad_norm": 1.3407410581792838, "learning_rate": 1.3914318936134927e-08, "loss": 0.1736, "step": 53329 }, { "epoch": 0.9270107250256392, "grad_norm": 2.0885295293785147, "learning_rate": 1.3907725136733772e-08, "loss": 0.1477, "step": 53330 }, { "epoch": 0.927028107563142, "grad_norm": 1.3200433401700133, "learning_rate": 1.3901132878021915e-08, "loss": 0.199, "step": 53331 }, { "epoch": 0.9270454901006449, "grad_norm": 1.2569552040721594, "learning_rate": 1.3894542160020062e-08, "loss": 0.1271, "step": 53332 }, { "epoch": 0.9270628726381477, "grad_norm": 2.8472261779190142, "learning_rate": 1.3887952982749307e-08, "loss": 0.1762, "step": 53333 }, { "epoch": 0.9270802551756505, "grad_norm": 1.3296699430014918, "learning_rate": 1.388136534623041e-08, "loss": 0.1826, "step": 53334 }, { "epoch": 0.9270976377131533, "grad_norm": 1.8374748021568943, "learning_rate": 1.3874779250484248e-08, "loss": 0.1755, "step": 53335 }, { "epoch": 0.9271150202506562, "grad_norm": 0.7006367973062476, "learning_rate": 1.3868194695531687e-08, "loss": 0.1052, "step": 53336 }, { "epoch": 0.927132402788159, "grad_norm": 1.6326116238348003, "learning_rate": 1.3861611681393714e-08, "loss": 0.1605, "step": 53337 }, { "epoch": 0.9271497853256618, "grad_norm": 1.302010455855381, "learning_rate": 1.3855030208091145e-08, "loss": 0.1713, "step": 53338 }, { "epoch": 0.9271671678631647, "grad_norm": 4.3139674778391885, "learning_rate": 1.3848450275644796e-08, "loss": 0.1607, "step": 53339 }, { "epoch": 0.9271845504006675, "grad_norm": 1.15062666516132, "learning_rate": 1.384187188407543e-08, "loss": 0.1846, "step": 53340 }, { "epoch": 0.9272019329381703, "grad_norm": 1.056940994073023, "learning_rate": 1.3835295033404138e-08, "loss": 0.1626, "step": 53341 }, { "epoch": 0.9272193154756732, "grad_norm": 1.5191213505376389, "learning_rate": 1.3828719723651571e-08, "loss": 0.1628, "step": 53342 }, { "epoch": 0.927236698013176, "grad_norm": 0.8948354536550484, "learning_rate": 1.382214595483866e-08, "loss": 0.2251, "step": 53343 }, { "epoch": 0.9272540805506788, "grad_norm": 1.5309364820898963, "learning_rate": 1.3815573726986217e-08, "loss": 0.1299, "step": 53344 }, { "epoch": 0.9272714630881816, "grad_norm": 1.5358917679775101, "learning_rate": 1.3809003040115064e-08, "loss": 0.2261, "step": 53345 }, { "epoch": 0.9272888456256845, "grad_norm": 2.951192796477156, "learning_rate": 1.3802433894246012e-08, "loss": 0.137, "step": 53346 }, { "epoch": 0.9273062281631873, "grad_norm": 1.460932217929937, "learning_rate": 1.3795866289399937e-08, "loss": 0.1552, "step": 53347 }, { "epoch": 0.9273236107006901, "grad_norm": 1.3719097441066048, "learning_rate": 1.3789300225597544e-08, "loss": 0.1591, "step": 53348 }, { "epoch": 0.9273409932381929, "grad_norm": 1.2203962156047612, "learning_rate": 1.3782735702859815e-08, "loss": 0.1536, "step": 53349 }, { "epoch": 0.9273583757756957, "grad_norm": 3.1756170617546577, "learning_rate": 1.3776172721207458e-08, "loss": 0.2357, "step": 53350 }, { "epoch": 0.9273757583131985, "grad_norm": 0.9238386708319295, "learning_rate": 1.376961128066123e-08, "loss": 0.1939, "step": 53351 }, { "epoch": 0.9273931408507013, "grad_norm": 1.2533164374181591, "learning_rate": 1.3763051381242007e-08, "loss": 0.1404, "step": 53352 }, { "epoch": 0.9274105233882042, "grad_norm": 1.3861312508313202, "learning_rate": 1.375649302297055e-08, "loss": 0.1273, "step": 53353 }, { "epoch": 0.927427905925707, "grad_norm": 1.390459964366849, "learning_rate": 1.3749936205867617e-08, "loss": 0.2413, "step": 53354 }, { "epoch": 0.9274452884632098, "grad_norm": 0.7820504565330095, "learning_rate": 1.374338092995403e-08, "loss": 0.1995, "step": 53355 }, { "epoch": 0.9274626710007127, "grad_norm": 1.4356816331828186, "learning_rate": 1.3736827195250544e-08, "loss": 0.1502, "step": 53356 }, { "epoch": 0.9274800535382155, "grad_norm": 1.5872697619536207, "learning_rate": 1.3730275001778036e-08, "loss": 0.1379, "step": 53357 }, { "epoch": 0.9274974360757183, "grad_norm": 8.443042187574921, "learning_rate": 1.3723724349557152e-08, "loss": 0.2672, "step": 53358 }, { "epoch": 0.9275148186132212, "grad_norm": 1.5131902878693255, "learning_rate": 1.3717175238608603e-08, "loss": 0.1593, "step": 53359 }, { "epoch": 0.927532201150724, "grad_norm": 1.0314158258490944, "learning_rate": 1.3710627668953313e-08, "loss": 0.2104, "step": 53360 }, { "epoch": 0.9275495836882268, "grad_norm": 3.776682738980463, "learning_rate": 1.370408164061193e-08, "loss": 0.2489, "step": 53361 }, { "epoch": 0.9275669662257296, "grad_norm": 1.4543115955241424, "learning_rate": 1.3697537153605166e-08, "loss": 0.2571, "step": 53362 }, { "epoch": 0.9275843487632325, "grad_norm": 1.145065834199394, "learning_rate": 1.3690994207953887e-08, "loss": 0.1762, "step": 53363 }, { "epoch": 0.9276017313007353, "grad_norm": 1.2607470524511932, "learning_rate": 1.3684452803678748e-08, "loss": 0.1444, "step": 53364 }, { "epoch": 0.9276191138382381, "grad_norm": 1.164773140287527, "learning_rate": 1.3677912940800507e-08, "loss": 0.1489, "step": 53365 }, { "epoch": 0.927636496375741, "grad_norm": 1.2917873540442575, "learning_rate": 1.367137461933987e-08, "loss": 0.1498, "step": 53366 }, { "epoch": 0.9276538789132438, "grad_norm": 1.3657286437394012, "learning_rate": 1.3664837839317544e-08, "loss": 0.2308, "step": 53367 }, { "epoch": 0.9276712614507466, "grad_norm": 1.2134876585761878, "learning_rate": 1.365830260075429e-08, "loss": 0.175, "step": 53368 }, { "epoch": 0.9276886439882494, "grad_norm": 0.8916163607747921, "learning_rate": 1.3651768903670925e-08, "loss": 0.0782, "step": 53369 }, { "epoch": 0.9277060265257522, "grad_norm": 1.2100966154669794, "learning_rate": 1.3645236748087874e-08, "loss": 0.1687, "step": 53370 }, { "epoch": 0.927723409063255, "grad_norm": 1.345550916575792, "learning_rate": 1.3638706134026067e-08, "loss": 0.2509, "step": 53371 }, { "epoch": 0.9277407916007578, "grad_norm": 1.1436050990417377, "learning_rate": 1.3632177061506155e-08, "loss": 0.104, "step": 53372 }, { "epoch": 0.9277581741382607, "grad_norm": 1.0240095081387708, "learning_rate": 1.3625649530548844e-08, "loss": 0.1422, "step": 53373 }, { "epoch": 0.9277755566757635, "grad_norm": 1.8810391737162366, "learning_rate": 1.3619123541174782e-08, "loss": 0.203, "step": 53374 }, { "epoch": 0.9277929392132663, "grad_norm": 2.1129096477035985, "learning_rate": 1.3612599093404676e-08, "loss": 0.173, "step": 53375 }, { "epoch": 0.9278103217507692, "grad_norm": 2.846929728706613, "learning_rate": 1.360607618725923e-08, "loss": 0.2372, "step": 53376 }, { "epoch": 0.927827704288272, "grad_norm": 0.7027383159653507, "learning_rate": 1.3599554822759097e-08, "loss": 0.2767, "step": 53377 }, { "epoch": 0.9278450868257748, "grad_norm": 1.0892672636579273, "learning_rate": 1.3593034999924814e-08, "loss": 0.1803, "step": 53378 }, { "epoch": 0.9278624693632777, "grad_norm": 1.0588090690883079, "learning_rate": 1.3586516718777309e-08, "loss": 0.1115, "step": 53379 }, { "epoch": 0.9278798519007805, "grad_norm": 1.5578955589606789, "learning_rate": 1.3579999979337065e-08, "loss": 0.2203, "step": 53380 }, { "epoch": 0.9278972344382833, "grad_norm": 0.8883683228262682, "learning_rate": 1.3573484781624789e-08, "loss": 0.1504, "step": 53381 }, { "epoch": 0.9279146169757861, "grad_norm": 1.8656127420434483, "learning_rate": 1.3566971125661076e-08, "loss": 0.1952, "step": 53382 }, { "epoch": 0.927931999513289, "grad_norm": 1.1506617371966539, "learning_rate": 1.3560459011466685e-08, "loss": 0.2564, "step": 53383 }, { "epoch": 0.9279493820507918, "grad_norm": 1.3748005150207971, "learning_rate": 1.3553948439062157e-08, "loss": 0.1949, "step": 53384 }, { "epoch": 0.9279667645882946, "grad_norm": 1.363226895827317, "learning_rate": 1.3547439408468142e-08, "loss": 0.155, "step": 53385 }, { "epoch": 0.9279841471257975, "grad_norm": 1.399810714634045, "learning_rate": 1.3540931919705234e-08, "loss": 0.1829, "step": 53386 }, { "epoch": 0.9280015296633003, "grad_norm": 1.5995605234005796, "learning_rate": 1.3534425972794194e-08, "loss": 0.1414, "step": 53387 }, { "epoch": 0.9280189122008031, "grad_norm": 2.109460487905558, "learning_rate": 1.3527921567755562e-08, "loss": 0.2206, "step": 53388 }, { "epoch": 0.9280362947383058, "grad_norm": 1.2520142723577037, "learning_rate": 1.3521418704609932e-08, "loss": 0.173, "step": 53389 }, { "epoch": 0.9280536772758087, "grad_norm": 1.9367471492613182, "learning_rate": 1.3514917383377899e-08, "loss": 0.1904, "step": 53390 }, { "epoch": 0.9280710598133115, "grad_norm": 1.259182492923322, "learning_rate": 1.350841760408017e-08, "loss": 0.1115, "step": 53391 }, { "epoch": 0.9280884423508143, "grad_norm": 0.6628133558955533, "learning_rate": 1.3501919366737224e-08, "loss": 0.116, "step": 53392 }, { "epoch": 0.9281058248883172, "grad_norm": 1.1960878745823293, "learning_rate": 1.3495422671369772e-08, "loss": 0.1521, "step": 53393 }, { "epoch": 0.92812320742582, "grad_norm": 1.1822666944662237, "learning_rate": 1.3488927517998295e-08, "loss": 0.2514, "step": 53394 }, { "epoch": 0.9281405899633228, "grad_norm": 1.5880525059563058, "learning_rate": 1.3482433906643442e-08, "loss": 0.1722, "step": 53395 }, { "epoch": 0.9281579725008257, "grad_norm": 1.5205591779305652, "learning_rate": 1.3475941837325811e-08, "loss": 0.2005, "step": 53396 }, { "epoch": 0.9281753550383285, "grad_norm": 1.6894216019315462, "learning_rate": 1.3469451310065937e-08, "loss": 0.1527, "step": 53397 }, { "epoch": 0.9281927375758313, "grad_norm": 1.2426245265933764, "learning_rate": 1.3462962324884363e-08, "loss": 0.2168, "step": 53398 }, { "epoch": 0.9282101201133341, "grad_norm": 1.1885679823722106, "learning_rate": 1.3456474881801738e-08, "loss": 0.1879, "step": 53399 }, { "epoch": 0.928227502650837, "grad_norm": 1.4406461890560966, "learning_rate": 1.34499889808386e-08, "loss": 0.2433, "step": 53400 }, { "epoch": 0.9282448851883398, "grad_norm": 2.004239392644335, "learning_rate": 1.3443504622015545e-08, "loss": 0.12, "step": 53401 }, { "epoch": 0.9282622677258426, "grad_norm": 1.1696159920139904, "learning_rate": 1.3437021805352999e-08, "loss": 0.1675, "step": 53402 }, { "epoch": 0.9282796502633455, "grad_norm": 0.8389152067819564, "learning_rate": 1.3430540530871615e-08, "loss": 0.2593, "step": 53403 }, { "epoch": 0.9282970328008483, "grad_norm": 2.158219044165075, "learning_rate": 1.3424060798591873e-08, "loss": 0.2654, "step": 53404 }, { "epoch": 0.9283144153383511, "grad_norm": 2.0802719640021823, "learning_rate": 1.3417582608534372e-08, "loss": 0.2264, "step": 53405 }, { "epoch": 0.928331797875854, "grad_norm": 1.5008766277881793, "learning_rate": 1.3411105960719592e-08, "loss": 0.184, "step": 53406 }, { "epoch": 0.9283491804133568, "grad_norm": 1.0892908719562706, "learning_rate": 1.3404630855168187e-08, "loss": 0.1181, "step": 53407 }, { "epoch": 0.9283665629508596, "grad_norm": 1.228280080285595, "learning_rate": 1.339815729190047e-08, "loss": 0.1709, "step": 53408 }, { "epoch": 0.9283839454883623, "grad_norm": 1.7888965136857204, "learning_rate": 1.3391685270937036e-08, "loss": 0.1454, "step": 53409 }, { "epoch": 0.9284013280258652, "grad_norm": 1.4130001685469058, "learning_rate": 1.3385214792298538e-08, "loss": 0.212, "step": 53410 }, { "epoch": 0.928418710563368, "grad_norm": 0.9543773731819998, "learning_rate": 1.3378745856005291e-08, "loss": 0.1341, "step": 53411 }, { "epoch": 0.9284360931008708, "grad_norm": 1.1699964399034632, "learning_rate": 1.3372278462077946e-08, "loss": 0.1386, "step": 53412 }, { "epoch": 0.9284534756383737, "grad_norm": 1.059400021364973, "learning_rate": 1.3365812610536931e-08, "loss": 0.2135, "step": 53413 }, { "epoch": 0.9284708581758765, "grad_norm": 1.7016590479767137, "learning_rate": 1.3359348301402729e-08, "loss": 0.2748, "step": 53414 }, { "epoch": 0.9284882407133793, "grad_norm": 0.803138573893429, "learning_rate": 1.335288553469588e-08, "loss": 0.1118, "step": 53415 }, { "epoch": 0.9285056232508822, "grad_norm": 1.674877465317054, "learning_rate": 1.334642431043681e-08, "loss": 0.1813, "step": 53416 }, { "epoch": 0.928523005788385, "grad_norm": 1.6503834981915206, "learning_rate": 1.333996462864595e-08, "loss": 0.1828, "step": 53417 }, { "epoch": 0.9285403883258878, "grad_norm": 1.361887786729103, "learning_rate": 1.3333506489343949e-08, "loss": 0.1791, "step": 53418 }, { "epoch": 0.9285577708633906, "grad_norm": 1.0967197972386387, "learning_rate": 1.3327049892551123e-08, "loss": 0.1832, "step": 53419 }, { "epoch": 0.9285751534008935, "grad_norm": 1.742155938139833, "learning_rate": 1.3320594838288069e-08, "loss": 0.151, "step": 53420 }, { "epoch": 0.9285925359383963, "grad_norm": 0.8248875859446584, "learning_rate": 1.3314141326575101e-08, "loss": 0.1076, "step": 53421 }, { "epoch": 0.9286099184758991, "grad_norm": 1.4052156358238224, "learning_rate": 1.330768935743276e-08, "loss": 0.1524, "step": 53422 }, { "epoch": 0.928627301013402, "grad_norm": 0.9386409754226048, "learning_rate": 1.3301238930881476e-08, "loss": 0.1949, "step": 53423 }, { "epoch": 0.9286446835509048, "grad_norm": 1.3946221472623914, "learning_rate": 1.3294790046941673e-08, "loss": 0.105, "step": 53424 }, { "epoch": 0.9286620660884076, "grad_norm": 0.8130745565133828, "learning_rate": 1.3288342705633781e-08, "loss": 0.1113, "step": 53425 }, { "epoch": 0.9286794486259105, "grad_norm": 0.9593158319833881, "learning_rate": 1.328189690697834e-08, "loss": 0.2549, "step": 53426 }, { "epoch": 0.9286968311634133, "grad_norm": 1.7694917936609793, "learning_rate": 1.3275452650995723e-08, "loss": 0.2089, "step": 53427 }, { "epoch": 0.9287142137009161, "grad_norm": 1.7864988711390344, "learning_rate": 1.3269009937706243e-08, "loss": 0.159, "step": 53428 }, { "epoch": 0.9287315962384188, "grad_norm": 1.698326365012132, "learning_rate": 1.3262568767130444e-08, "loss": 0.1502, "step": 53429 }, { "epoch": 0.9287489787759217, "grad_norm": 1.273550746733304, "learning_rate": 1.3256129139288753e-08, "loss": 0.2154, "step": 53430 }, { "epoch": 0.9287663613134245, "grad_norm": 1.1166939031784695, "learning_rate": 1.3249691054201485e-08, "loss": 0.2731, "step": 53431 }, { "epoch": 0.9287837438509273, "grad_norm": 0.8575699247221292, "learning_rate": 1.324325451188918e-08, "loss": 0.0987, "step": 53432 }, { "epoch": 0.9288011263884302, "grad_norm": 1.1656726945505103, "learning_rate": 1.3236819512372099e-08, "loss": 0.1943, "step": 53433 }, { "epoch": 0.928818508925933, "grad_norm": 1.2501789942254322, "learning_rate": 1.3230386055670728e-08, "loss": 0.1466, "step": 53434 }, { "epoch": 0.9288358914634358, "grad_norm": 1.7441662049786995, "learning_rate": 1.3223954141805439e-08, "loss": 0.2751, "step": 53435 }, { "epoch": 0.9288532740009386, "grad_norm": 1.591499394405297, "learning_rate": 1.3217523770796546e-08, "loss": 0.1565, "step": 53436 }, { "epoch": 0.9288706565384415, "grad_norm": 1.3441762306247904, "learning_rate": 1.3211094942664535e-08, "loss": 0.1816, "step": 53437 }, { "epoch": 0.9288880390759443, "grad_norm": 1.0428338084028734, "learning_rate": 1.3204667657429725e-08, "loss": 0.1719, "step": 53438 }, { "epoch": 0.9289054216134471, "grad_norm": 1.077439662347849, "learning_rate": 1.3198241915112596e-08, "loss": 0.2245, "step": 53439 }, { "epoch": 0.92892280415095, "grad_norm": 1.3555370199427637, "learning_rate": 1.31918177157333e-08, "loss": 0.1618, "step": 53440 }, { "epoch": 0.9289401866884528, "grad_norm": 1.2114322637592214, "learning_rate": 1.3185395059312321e-08, "loss": 0.1145, "step": 53441 }, { "epoch": 0.9289575692259556, "grad_norm": 1.7209099933020915, "learning_rate": 1.3178973945870087e-08, "loss": 0.211, "step": 53442 }, { "epoch": 0.9289749517634585, "grad_norm": 1.1809338659212676, "learning_rate": 1.3172554375426858e-08, "loss": 0.177, "step": 53443 }, { "epoch": 0.9289923343009613, "grad_norm": 1.838265403453687, "learning_rate": 1.3166136348003009e-08, "loss": 0.121, "step": 53444 }, { "epoch": 0.9290097168384641, "grad_norm": 2.385472928304803, "learning_rate": 1.3159719863618856e-08, "loss": 0.1536, "step": 53445 }, { "epoch": 0.929027099375967, "grad_norm": 3.292746614694573, "learning_rate": 1.3153304922294772e-08, "loss": 0.2325, "step": 53446 }, { "epoch": 0.9290444819134698, "grad_norm": 1.2628178196409947, "learning_rate": 1.3146891524051074e-08, "loss": 0.1606, "step": 53447 }, { "epoch": 0.9290618644509726, "grad_norm": 1.9441524703383959, "learning_rate": 1.3140479668908022e-08, "loss": 0.1773, "step": 53448 }, { "epoch": 0.9290792469884753, "grad_norm": 1.24471313842133, "learning_rate": 1.3134069356886045e-08, "loss": 0.111, "step": 53449 }, { "epoch": 0.9290966295259782, "grad_norm": 1.2274421819864407, "learning_rate": 1.3127660588005407e-08, "loss": 0.1383, "step": 53450 }, { "epoch": 0.929114012063481, "grad_norm": 1.6485675391913395, "learning_rate": 1.3121253362286533e-08, "loss": 0.2071, "step": 53451 }, { "epoch": 0.9291313946009838, "grad_norm": 1.4996331078790957, "learning_rate": 1.3114847679749518e-08, "loss": 0.1599, "step": 53452 }, { "epoch": 0.9291487771384866, "grad_norm": 1.6154383029207966, "learning_rate": 1.3108443540414794e-08, "loss": 0.1321, "step": 53453 }, { "epoch": 0.9291661596759895, "grad_norm": 1.253513969003451, "learning_rate": 1.3102040944302672e-08, "loss": 0.1503, "step": 53454 }, { "epoch": 0.9291835422134923, "grad_norm": 1.537536273389476, "learning_rate": 1.309563989143342e-08, "loss": 0.1607, "step": 53455 }, { "epoch": 0.9292009247509951, "grad_norm": 2.5442396126898816, "learning_rate": 1.3089240381827293e-08, "loss": 0.1916, "step": 53456 }, { "epoch": 0.929218307288498, "grad_norm": 3.320942805023136, "learning_rate": 1.3082842415504614e-08, "loss": 0.1846, "step": 53457 }, { "epoch": 0.9292356898260008, "grad_norm": 1.3323027659554696, "learning_rate": 1.3076445992485751e-08, "loss": 0.1781, "step": 53458 }, { "epoch": 0.9292530723635036, "grad_norm": 0.9728258582427679, "learning_rate": 1.3070051112790747e-08, "loss": 0.1578, "step": 53459 }, { "epoch": 0.9292704549010065, "grad_norm": 1.7107504130643347, "learning_rate": 1.3063657776440029e-08, "loss": 0.2207, "step": 53460 }, { "epoch": 0.9292878374385093, "grad_norm": 1.693464381942757, "learning_rate": 1.3057265983453914e-08, "loss": 0.2212, "step": 53461 }, { "epoch": 0.9293052199760121, "grad_norm": 2.0366889434835924, "learning_rate": 1.3050875733852496e-08, "loss": 0.2772, "step": 53462 }, { "epoch": 0.929322602513515, "grad_norm": 2.559913937680805, "learning_rate": 1.3044487027656147e-08, "loss": 0.166, "step": 53463 }, { "epoch": 0.9293399850510178, "grad_norm": 2.2840246840957428, "learning_rate": 1.3038099864885132e-08, "loss": 0.2344, "step": 53464 }, { "epoch": 0.9293573675885206, "grad_norm": 0.9464901326033738, "learning_rate": 1.30317142455596e-08, "loss": 0.115, "step": 53465 }, { "epoch": 0.9293747501260234, "grad_norm": 1.2966846832729866, "learning_rate": 1.302533016969981e-08, "loss": 0.1931, "step": 53466 }, { "epoch": 0.9293921326635263, "grad_norm": 1.158901736226852, "learning_rate": 1.3018947637326028e-08, "loss": 0.2538, "step": 53467 }, { "epoch": 0.9294095152010291, "grad_norm": 2.9936138975552105, "learning_rate": 1.3012566648458511e-08, "loss": 0.1551, "step": 53468 }, { "epoch": 0.9294268977385318, "grad_norm": 1.644060057958314, "learning_rate": 1.300618720311747e-08, "loss": 0.1581, "step": 53469 }, { "epoch": 0.9294442802760347, "grad_norm": 1.1076894852513637, "learning_rate": 1.299980930132316e-08, "loss": 0.1788, "step": 53470 }, { "epoch": 0.9294616628135375, "grad_norm": 0.9960036169714265, "learning_rate": 1.2993432943095628e-08, "loss": 0.1774, "step": 53471 }, { "epoch": 0.9294790453510403, "grad_norm": 0.9895836302931559, "learning_rate": 1.2987058128455242e-08, "loss": 0.1142, "step": 53472 }, { "epoch": 0.9294964278885431, "grad_norm": 1.2936597562573549, "learning_rate": 1.2980684857422209e-08, "loss": 0.15, "step": 53473 }, { "epoch": 0.929513810426046, "grad_norm": 1.1597071966653474, "learning_rate": 1.2974313130016622e-08, "loss": 0.1671, "step": 53474 }, { "epoch": 0.9295311929635488, "grad_norm": 0.6690489859380868, "learning_rate": 1.2967942946258748e-08, "loss": 0.1572, "step": 53475 }, { "epoch": 0.9295485755010516, "grad_norm": 1.6106840784615633, "learning_rate": 1.2961574306168788e-08, "loss": 0.1303, "step": 53476 }, { "epoch": 0.9295659580385545, "grad_norm": 0.8984025809563145, "learning_rate": 1.2955207209766949e-08, "loss": 0.1287, "step": 53477 }, { "epoch": 0.9295833405760573, "grad_norm": 1.5220626091378624, "learning_rate": 1.2948841657073384e-08, "loss": 0.1717, "step": 53478 }, { "epoch": 0.9296007231135601, "grad_norm": 1.2876115197448819, "learning_rate": 1.2942477648108185e-08, "loss": 0.1433, "step": 53479 }, { "epoch": 0.929618105651063, "grad_norm": 1.1293006442328117, "learning_rate": 1.2936115182891616e-08, "loss": 0.1784, "step": 53480 }, { "epoch": 0.9296354881885658, "grad_norm": 1.124810377199136, "learning_rate": 1.2929754261443826e-08, "loss": 0.1761, "step": 53481 }, { "epoch": 0.9296528707260686, "grad_norm": 2.0073370993855875, "learning_rate": 1.2923394883784967e-08, "loss": 0.1678, "step": 53482 }, { "epoch": 0.9296702532635714, "grad_norm": 1.2650750321801707, "learning_rate": 1.2917037049935242e-08, "loss": 0.1191, "step": 53483 }, { "epoch": 0.9296876358010743, "grad_norm": 1.9150267504658403, "learning_rate": 1.291068075991475e-08, "loss": 0.1697, "step": 53484 }, { "epoch": 0.9297050183385771, "grad_norm": 1.3805102606426487, "learning_rate": 1.2904326013743638e-08, "loss": 0.109, "step": 53485 }, { "epoch": 0.9297224008760799, "grad_norm": 1.3173655573737542, "learning_rate": 1.2897972811442059e-08, "loss": 0.1401, "step": 53486 }, { "epoch": 0.9297397834135828, "grad_norm": 0.7376458148164631, "learning_rate": 1.2891621153030108e-08, "loss": 0.245, "step": 53487 }, { "epoch": 0.9297571659510855, "grad_norm": 1.1840312930265964, "learning_rate": 1.288527103852799e-08, "loss": 0.2171, "step": 53488 }, { "epoch": 0.9297745484885883, "grad_norm": 1.2572460164179156, "learning_rate": 1.2878922467955854e-08, "loss": 0.1617, "step": 53489 }, { "epoch": 0.9297919310260911, "grad_norm": 2.1326802844611072, "learning_rate": 1.2872575441333688e-08, "loss": 0.2236, "step": 53490 }, { "epoch": 0.929809313563594, "grad_norm": 1.422275727582031, "learning_rate": 1.2866229958681696e-08, "loss": 0.1266, "step": 53491 }, { "epoch": 0.9298266961010968, "grad_norm": 2.7275671736836475, "learning_rate": 1.2859886020020028e-08, "loss": 0.1333, "step": 53492 }, { "epoch": 0.9298440786385996, "grad_norm": 1.56875944513052, "learning_rate": 1.2853543625368723e-08, "loss": 0.1809, "step": 53493 }, { "epoch": 0.9298614611761025, "grad_norm": 1.197231476311849, "learning_rate": 1.2847202774747934e-08, "loss": 0.1382, "step": 53494 }, { "epoch": 0.9298788437136053, "grad_norm": 2.131850994500644, "learning_rate": 1.2840863468177753e-08, "loss": 0.2245, "step": 53495 }, { "epoch": 0.9298962262511081, "grad_norm": 1.892510866902276, "learning_rate": 1.2834525705678168e-08, "loss": 0.1906, "step": 53496 }, { "epoch": 0.929913608788611, "grad_norm": 2.6409966209514564, "learning_rate": 1.2828189487269436e-08, "loss": 0.1066, "step": 53497 }, { "epoch": 0.9299309913261138, "grad_norm": 1.096363607429078, "learning_rate": 1.2821854812971488e-08, "loss": 0.1209, "step": 53498 }, { "epoch": 0.9299483738636166, "grad_norm": 0.97682894447774, "learning_rate": 1.2815521682804475e-08, "loss": 0.1245, "step": 53499 }, { "epoch": 0.9299657564011194, "grad_norm": 1.2477513877436404, "learning_rate": 1.2809190096788491e-08, "loss": 0.1274, "step": 53500 }, { "epoch": 0.9299831389386223, "grad_norm": 1.6346359162656048, "learning_rate": 1.2802860054943577e-08, "loss": 0.195, "step": 53501 }, { "epoch": 0.9300005214761251, "grad_norm": 1.3425990280750504, "learning_rate": 1.279653155728977e-08, "loss": 0.1782, "step": 53502 }, { "epoch": 0.9300179040136279, "grad_norm": 1.9009954379526326, "learning_rate": 1.2790204603847222e-08, "loss": 0.1509, "step": 53503 }, { "epoch": 0.9300352865511308, "grad_norm": 1.1700786594316046, "learning_rate": 1.2783879194635861e-08, "loss": 0.1646, "step": 53504 }, { "epoch": 0.9300526690886336, "grad_norm": 0.7943670456885761, "learning_rate": 1.2777555329675838e-08, "loss": 0.1142, "step": 53505 }, { "epoch": 0.9300700516261364, "grad_norm": 1.5488510506246114, "learning_rate": 1.2771233008987025e-08, "loss": 0.1415, "step": 53506 }, { "epoch": 0.9300874341636393, "grad_norm": 1.6176282250910496, "learning_rate": 1.2764912232589742e-08, "loss": 0.1769, "step": 53507 }, { "epoch": 0.930104816701142, "grad_norm": 1.7485061329933609, "learning_rate": 1.2758593000503858e-08, "loss": 0.2041, "step": 53508 }, { "epoch": 0.9301221992386448, "grad_norm": 0.9477647490168782, "learning_rate": 1.275227531274936e-08, "loss": 0.1272, "step": 53509 }, { "epoch": 0.9301395817761476, "grad_norm": 2.139791491456095, "learning_rate": 1.2745959169346287e-08, "loss": 0.2389, "step": 53510 }, { "epoch": 0.9301569643136505, "grad_norm": 1.3669480760164519, "learning_rate": 1.2739644570314734e-08, "loss": 0.1218, "step": 53511 }, { "epoch": 0.9301743468511533, "grad_norm": 0.895234996123536, "learning_rate": 1.2733331515674739e-08, "loss": 0.0934, "step": 53512 }, { "epoch": 0.9301917293886561, "grad_norm": 1.3649702892075761, "learning_rate": 1.2727020005446176e-08, "loss": 0.144, "step": 53513 }, { "epoch": 0.930209111926159, "grad_norm": 1.1470214297050425, "learning_rate": 1.2720710039649141e-08, "loss": 0.1276, "step": 53514 }, { "epoch": 0.9302264944636618, "grad_norm": 1.3356141111222084, "learning_rate": 1.2714401618303672e-08, "loss": 0.1351, "step": 53515 }, { "epoch": 0.9302438770011646, "grad_norm": 1.1009517032334826, "learning_rate": 1.2708094741429643e-08, "loss": 0.1425, "step": 53516 }, { "epoch": 0.9302612595386675, "grad_norm": 1.4396718686230856, "learning_rate": 1.2701789409047148e-08, "loss": 0.1904, "step": 53517 }, { "epoch": 0.9302786420761703, "grad_norm": 1.1699471305127815, "learning_rate": 1.269548562117606e-08, "loss": 0.1583, "step": 53518 }, { "epoch": 0.9302960246136731, "grad_norm": 1.2411724774199713, "learning_rate": 1.2689183377836532e-08, "loss": 0.1433, "step": 53519 }, { "epoch": 0.9303134071511759, "grad_norm": 1.5195692502677034, "learning_rate": 1.268288267904838e-08, "loss": 0.1699, "step": 53520 }, { "epoch": 0.9303307896886788, "grad_norm": 1.0349087780506887, "learning_rate": 1.267658352483164e-08, "loss": 0.1237, "step": 53521 }, { "epoch": 0.9303481722261816, "grad_norm": 1.6882167967059172, "learning_rate": 1.2670285915206303e-08, "loss": 0.1481, "step": 53522 }, { "epoch": 0.9303655547636844, "grad_norm": 1.2836374645447355, "learning_rate": 1.266398985019229e-08, "loss": 0.1585, "step": 53523 }, { "epoch": 0.9303829373011873, "grad_norm": 0.5971495449966188, "learning_rate": 1.2657695329809537e-08, "loss": 0.1776, "step": 53524 }, { "epoch": 0.9304003198386901, "grad_norm": 1.65794399756207, "learning_rate": 1.2651402354078078e-08, "loss": 0.1356, "step": 53525 }, { "epoch": 0.9304177023761929, "grad_norm": 1.1651570442950276, "learning_rate": 1.2645110923017732e-08, "loss": 0.1549, "step": 53526 }, { "epoch": 0.9304350849136958, "grad_norm": 1.2594820489225858, "learning_rate": 1.2638821036648595e-08, "loss": 0.2465, "step": 53527 }, { "epoch": 0.9304524674511985, "grad_norm": 1.6839991451232472, "learning_rate": 1.2632532694990483e-08, "loss": 0.1984, "step": 53528 }, { "epoch": 0.9304698499887013, "grad_norm": 2.766163555815801, "learning_rate": 1.2626245898063326e-08, "loss": 0.2578, "step": 53529 }, { "epoch": 0.9304872325262041, "grad_norm": 1.5870865284007218, "learning_rate": 1.2619960645887107e-08, "loss": 0.1584, "step": 53530 }, { "epoch": 0.930504615063707, "grad_norm": 1.294356307783389, "learning_rate": 1.2613676938481755e-08, "loss": 0.2115, "step": 53531 }, { "epoch": 0.9305219976012098, "grad_norm": 2.0096506135348386, "learning_rate": 1.2607394775867141e-08, "loss": 0.1528, "step": 53532 }, { "epoch": 0.9305393801387126, "grad_norm": 1.1442346283786724, "learning_rate": 1.2601114158063197e-08, "loss": 0.1748, "step": 53533 }, { "epoch": 0.9305567626762155, "grad_norm": 1.3260849605997056, "learning_rate": 1.259483508508985e-08, "loss": 0.1413, "step": 53534 }, { "epoch": 0.9305741452137183, "grad_norm": 3.416751435770628, "learning_rate": 1.2588557556966972e-08, "loss": 0.2074, "step": 53535 }, { "epoch": 0.9305915277512211, "grad_norm": 1.895598832467125, "learning_rate": 1.2582281573714437e-08, "loss": 0.2372, "step": 53536 }, { "epoch": 0.930608910288724, "grad_norm": 1.2258360521047102, "learning_rate": 1.2576007135352118e-08, "loss": 0.1721, "step": 53537 }, { "epoch": 0.9306262928262268, "grad_norm": 2.0907894762376733, "learning_rate": 1.2569734241900054e-08, "loss": 0.1986, "step": 53538 }, { "epoch": 0.9306436753637296, "grad_norm": 4.2523289835678275, "learning_rate": 1.2563462893378007e-08, "loss": 0.17, "step": 53539 }, { "epoch": 0.9306610579012324, "grad_norm": 1.0359642704699805, "learning_rate": 1.2557193089805907e-08, "loss": 0.1141, "step": 53540 }, { "epoch": 0.9306784404387353, "grad_norm": 1.6733329470785132, "learning_rate": 1.2550924831203513e-08, "loss": 0.1461, "step": 53541 }, { "epoch": 0.9306958229762381, "grad_norm": 1.1180121231192404, "learning_rate": 1.2544658117590756e-08, "loss": 0.1716, "step": 53542 }, { "epoch": 0.9307132055137409, "grad_norm": 1.5178873167455402, "learning_rate": 1.253839294898762e-08, "loss": 0.0969, "step": 53543 }, { "epoch": 0.9307305880512438, "grad_norm": 1.0687139108375456, "learning_rate": 1.2532129325413754e-08, "loss": 0.1456, "step": 53544 }, { "epoch": 0.9307479705887466, "grad_norm": 0.9541079665146189, "learning_rate": 1.2525867246889143e-08, "loss": 0.0935, "step": 53545 }, { "epoch": 0.9307653531262494, "grad_norm": 1.3887332255975335, "learning_rate": 1.251960671343366e-08, "loss": 0.1185, "step": 53546 }, { "epoch": 0.9307827356637522, "grad_norm": 0.9118641414460681, "learning_rate": 1.2513347725067069e-08, "loss": 0.1193, "step": 53547 }, { "epoch": 0.930800118201255, "grad_norm": 1.115032486167577, "learning_rate": 1.2507090281809185e-08, "loss": 0.2112, "step": 53548 }, { "epoch": 0.9308175007387578, "grad_norm": 1.2243654306782739, "learning_rate": 1.2500834383679881e-08, "loss": 0.0922, "step": 53549 }, { "epoch": 0.9308348832762606, "grad_norm": 1.6050320933439162, "learning_rate": 1.249458003069903e-08, "loss": 0.1453, "step": 53550 }, { "epoch": 0.9308522658137635, "grad_norm": 1.0227289007116829, "learning_rate": 1.2488327222886452e-08, "loss": 0.1409, "step": 53551 }, { "epoch": 0.9308696483512663, "grad_norm": 0.7529889379251075, "learning_rate": 1.2482075960261906e-08, "loss": 0.1171, "step": 53552 }, { "epoch": 0.9308870308887691, "grad_norm": 1.7002398341752498, "learning_rate": 1.247582624284521e-08, "loss": 0.2295, "step": 53553 }, { "epoch": 0.930904413426272, "grad_norm": 1.2728644427810032, "learning_rate": 1.2469578070656239e-08, "loss": 0.1196, "step": 53554 }, { "epoch": 0.9309217959637748, "grad_norm": 0.8737219533600358, "learning_rate": 1.2463331443714754e-08, "loss": 0.1783, "step": 53555 }, { "epoch": 0.9309391785012776, "grad_norm": 1.2770296398939098, "learning_rate": 1.2457086362040458e-08, "loss": 0.2423, "step": 53556 }, { "epoch": 0.9309565610387804, "grad_norm": 1.6266536897333985, "learning_rate": 1.2450842825653284e-08, "loss": 0.1899, "step": 53557 }, { "epoch": 0.9309739435762833, "grad_norm": 1.1090512415143763, "learning_rate": 1.244460083457305e-08, "loss": 0.1862, "step": 53558 }, { "epoch": 0.9309913261137861, "grad_norm": 1.0332212977785344, "learning_rate": 1.2438360388819457e-08, "loss": 0.2838, "step": 53559 }, { "epoch": 0.9310087086512889, "grad_norm": 1.5953469715003175, "learning_rate": 1.2432121488412218e-08, "loss": 0.1486, "step": 53560 }, { "epoch": 0.9310260911887918, "grad_norm": 1.3404577977242014, "learning_rate": 1.24258841333712e-08, "loss": 0.1525, "step": 53561 }, { "epoch": 0.9310434737262946, "grad_norm": 0.7630574850345825, "learning_rate": 1.241964832371617e-08, "loss": 0.166, "step": 53562 }, { "epoch": 0.9310608562637974, "grad_norm": 1.42004508704924, "learning_rate": 1.2413414059466887e-08, "loss": 0.1581, "step": 53563 }, { "epoch": 0.9310782388013003, "grad_norm": 2.268789953766661, "learning_rate": 1.2407181340643058e-08, "loss": 0.2373, "step": 53564 }, { "epoch": 0.9310956213388031, "grad_norm": 1.1876507496156221, "learning_rate": 1.2400950167264556e-08, "loss": 0.2014, "step": 53565 }, { "epoch": 0.9311130038763059, "grad_norm": 3.8670277491530207, "learning_rate": 1.2394720539351033e-08, "loss": 0.274, "step": 53566 }, { "epoch": 0.9311303864138087, "grad_norm": 3.0945400743980525, "learning_rate": 1.238849245692225e-08, "loss": 0.2015, "step": 53567 }, { "epoch": 0.9311477689513115, "grad_norm": 2.036028375486374, "learning_rate": 1.2382265919997914e-08, "loss": 0.2445, "step": 53568 }, { "epoch": 0.9311651514888143, "grad_norm": 1.1976847142600175, "learning_rate": 1.2376040928597842e-08, "loss": 0.1444, "step": 53569 }, { "epoch": 0.9311825340263171, "grad_norm": 1.0657404303939924, "learning_rate": 1.236981748274174e-08, "loss": 0.1681, "step": 53570 }, { "epoch": 0.93119991656382, "grad_norm": 1.2895115493912568, "learning_rate": 1.2363595582449316e-08, "loss": 0.1417, "step": 53571 }, { "epoch": 0.9312172991013228, "grad_norm": 1.0582481641901857, "learning_rate": 1.2357375227740274e-08, "loss": 0.1515, "step": 53572 }, { "epoch": 0.9312346816388256, "grad_norm": 1.948827943240618, "learning_rate": 1.2351156418634324e-08, "loss": 0.2425, "step": 53573 }, { "epoch": 0.9312520641763284, "grad_norm": 1.0313109679943298, "learning_rate": 1.2344939155151279e-08, "loss": 0.1653, "step": 53574 }, { "epoch": 0.9312694467138313, "grad_norm": 1.3237214979364225, "learning_rate": 1.2338723437310683e-08, "loss": 0.1719, "step": 53575 }, { "epoch": 0.9312868292513341, "grad_norm": 2.0195913353593826, "learning_rate": 1.2332509265132297e-08, "loss": 0.1486, "step": 53576 }, { "epoch": 0.9313042117888369, "grad_norm": 1.1693464656670314, "learning_rate": 1.2326296638635936e-08, "loss": 0.1749, "step": 53577 }, { "epoch": 0.9313215943263398, "grad_norm": 1.4339137013237397, "learning_rate": 1.2320085557841198e-08, "loss": 0.2944, "step": 53578 }, { "epoch": 0.9313389768638426, "grad_norm": 1.4445251381759423, "learning_rate": 1.2313876022767677e-08, "loss": 0.1731, "step": 53579 }, { "epoch": 0.9313563594013454, "grad_norm": 3.1614165875320173, "learning_rate": 1.2307668033435193e-08, "loss": 0.1917, "step": 53580 }, { "epoch": 0.9313737419388483, "grad_norm": 0.9765270164944377, "learning_rate": 1.2301461589863393e-08, "loss": 0.1462, "step": 53581 }, { "epoch": 0.9313911244763511, "grad_norm": 2.2779196509013158, "learning_rate": 1.2295256692071875e-08, "loss": 0.1825, "step": 53582 }, { "epoch": 0.9314085070138539, "grad_norm": 1.9198420701917687, "learning_rate": 1.2289053340080402e-08, "loss": 0.1834, "step": 53583 }, { "epoch": 0.9314258895513567, "grad_norm": 1.1441080892943127, "learning_rate": 1.2282851533908622e-08, "loss": 0.1656, "step": 53584 }, { "epoch": 0.9314432720888596, "grad_norm": 1.5527454544975605, "learning_rate": 1.2276651273576133e-08, "loss": 0.2558, "step": 53585 }, { "epoch": 0.9314606546263624, "grad_norm": 2.566058464233124, "learning_rate": 1.2270452559102584e-08, "loss": 0.2558, "step": 53586 }, { "epoch": 0.9314780371638652, "grad_norm": 1.5007221154974923, "learning_rate": 1.2264255390507682e-08, "loss": 0.1746, "step": 53587 }, { "epoch": 0.931495419701368, "grad_norm": 0.7774244737813674, "learning_rate": 1.2258059767811025e-08, "loss": 0.2838, "step": 53588 }, { "epoch": 0.9315128022388708, "grad_norm": 1.0891320641192301, "learning_rate": 1.2251865691032315e-08, "loss": 0.1329, "step": 53589 }, { "epoch": 0.9315301847763736, "grad_norm": 1.7063770387125272, "learning_rate": 1.224567316019115e-08, "loss": 0.1839, "step": 53590 }, { "epoch": 0.9315475673138764, "grad_norm": 1.0733971981349788, "learning_rate": 1.223948217530707e-08, "loss": 0.1533, "step": 53591 }, { "epoch": 0.9315649498513793, "grad_norm": 1.1159335789128189, "learning_rate": 1.223329273639978e-08, "loss": 0.1455, "step": 53592 }, { "epoch": 0.9315823323888821, "grad_norm": 1.4232854003537767, "learning_rate": 1.2227104843488934e-08, "loss": 0.216, "step": 53593 }, { "epoch": 0.9315997149263849, "grad_norm": 2.3783602838427758, "learning_rate": 1.2220918496594123e-08, "loss": 0.1711, "step": 53594 }, { "epoch": 0.9316170974638878, "grad_norm": 0.8699652873329796, "learning_rate": 1.2214733695734835e-08, "loss": 0.0985, "step": 53595 }, { "epoch": 0.9316344800013906, "grad_norm": 1.5684759050871253, "learning_rate": 1.2208550440930832e-08, "loss": 0.2642, "step": 53596 }, { "epoch": 0.9316518625388934, "grad_norm": 1.2937872673975068, "learning_rate": 1.2202368732201707e-08, "loss": 0.099, "step": 53597 }, { "epoch": 0.9316692450763963, "grad_norm": 1.7798337097360464, "learning_rate": 1.2196188569566946e-08, "loss": 0.1116, "step": 53598 }, { "epoch": 0.9316866276138991, "grad_norm": 1.3140789129504, "learning_rate": 1.2190009953046143e-08, "loss": 0.1882, "step": 53599 }, { "epoch": 0.9317040101514019, "grad_norm": 1.2257809098078745, "learning_rate": 1.218383288265895e-08, "loss": 0.2219, "step": 53600 }, { "epoch": 0.9317213926889047, "grad_norm": 1.469771029846331, "learning_rate": 1.2177657358424964e-08, "loss": 0.1302, "step": 53601 }, { "epoch": 0.9317387752264076, "grad_norm": 1.8248482606720717, "learning_rate": 1.2171483380363667e-08, "loss": 0.2437, "step": 53602 }, { "epoch": 0.9317561577639104, "grad_norm": 0.9822862706176752, "learning_rate": 1.216531094849471e-08, "loss": 0.1757, "step": 53603 }, { "epoch": 0.9317735403014132, "grad_norm": 1.0205681779758087, "learning_rate": 1.2159140062837635e-08, "loss": 0.1577, "step": 53604 }, { "epoch": 0.9317909228389161, "grad_norm": 1.4267841645530166, "learning_rate": 1.2152970723411981e-08, "loss": 0.2035, "step": 53605 }, { "epoch": 0.9318083053764189, "grad_norm": 1.851294782619396, "learning_rate": 1.2146802930237287e-08, "loss": 0.3148, "step": 53606 }, { "epoch": 0.9318256879139217, "grad_norm": 2.186531852258266, "learning_rate": 1.2140636683333094e-08, "loss": 0.2025, "step": 53607 }, { "epoch": 0.9318430704514244, "grad_norm": 2.346084717569979, "learning_rate": 1.2134471982719052e-08, "loss": 0.2064, "step": 53608 }, { "epoch": 0.9318604529889273, "grad_norm": 1.120417032808544, "learning_rate": 1.2128308828414647e-08, "loss": 0.1151, "step": 53609 }, { "epoch": 0.9318778355264301, "grad_norm": 1.2916485470879786, "learning_rate": 1.2122147220439305e-08, "loss": 0.1243, "step": 53610 }, { "epoch": 0.9318952180639329, "grad_norm": 1.2657631953284796, "learning_rate": 1.2115987158812679e-08, "loss": 0.2128, "step": 53611 }, { "epoch": 0.9319126006014358, "grad_norm": 1.4626079663008003, "learning_rate": 1.2109828643554309e-08, "loss": 0.1709, "step": 53612 }, { "epoch": 0.9319299831389386, "grad_norm": 0.8951702013991724, "learning_rate": 1.2103671674683624e-08, "loss": 0.2057, "step": 53613 }, { "epoch": 0.9319473656764414, "grad_norm": 0.7803331441874414, "learning_rate": 1.2097516252220163e-08, "loss": 0.3156, "step": 53614 }, { "epoch": 0.9319647482139443, "grad_norm": 0.9012820445487778, "learning_rate": 1.2091362376183467e-08, "loss": 0.1958, "step": 53615 }, { "epoch": 0.9319821307514471, "grad_norm": 1.2111636454089243, "learning_rate": 1.208521004659302e-08, "loss": 0.0972, "step": 53616 }, { "epoch": 0.9319995132889499, "grad_norm": 1.2907925947520442, "learning_rate": 1.2079059263468362e-08, "loss": 0.1789, "step": 53617 }, { "epoch": 0.9320168958264528, "grad_norm": 1.3552946070449734, "learning_rate": 1.2072910026828865e-08, "loss": 0.2164, "step": 53618 }, { "epoch": 0.9320342783639556, "grad_norm": 2.0009765182534887, "learning_rate": 1.206676233669418e-08, "loss": 0.148, "step": 53619 }, { "epoch": 0.9320516609014584, "grad_norm": 3.390974134186893, "learning_rate": 1.2060616193083683e-08, "loss": 0.1933, "step": 53620 }, { "epoch": 0.9320690434389612, "grad_norm": 0.6927538378899086, "learning_rate": 1.2054471596016913e-08, "loss": 0.1555, "step": 53621 }, { "epoch": 0.9320864259764641, "grad_norm": 1.782254237857319, "learning_rate": 1.2048328545513354e-08, "loss": 0.2648, "step": 53622 }, { "epoch": 0.9321038085139669, "grad_norm": 1.654987704446139, "learning_rate": 1.2042187041592378e-08, "loss": 0.2416, "step": 53623 }, { "epoch": 0.9321211910514697, "grad_norm": 1.363807750405008, "learning_rate": 1.2036047084273582e-08, "loss": 0.1483, "step": 53624 }, { "epoch": 0.9321385735889726, "grad_norm": 1.814464348126194, "learning_rate": 1.2029908673576339e-08, "loss": 0.2228, "step": 53625 }, { "epoch": 0.9321559561264754, "grad_norm": 1.4816641211233674, "learning_rate": 1.2023771809520079e-08, "loss": 0.1518, "step": 53626 }, { "epoch": 0.9321733386639781, "grad_norm": 1.0801952847153518, "learning_rate": 1.2017636492124394e-08, "loss": 0.1243, "step": 53627 }, { "epoch": 0.9321907212014809, "grad_norm": 1.1679958738451428, "learning_rate": 1.2011502721408662e-08, "loss": 0.2435, "step": 53628 }, { "epoch": 0.9322081037389838, "grad_norm": 1.1571732345617822, "learning_rate": 1.2005370497392197e-08, "loss": 0.1762, "step": 53629 }, { "epoch": 0.9322254862764866, "grad_norm": 0.757027961205436, "learning_rate": 1.1999239820094598e-08, "loss": 0.141, "step": 53630 }, { "epoch": 0.9322428688139894, "grad_norm": 1.1979110944819311, "learning_rate": 1.1993110689535235e-08, "loss": 0.1284, "step": 53631 }, { "epoch": 0.9322602513514923, "grad_norm": 2.2236504471611362, "learning_rate": 1.1986983105733483e-08, "loss": 0.2039, "step": 53632 }, { "epoch": 0.9322776338889951, "grad_norm": 1.4927786409322965, "learning_rate": 1.1980857068708883e-08, "loss": 0.1658, "step": 53633 }, { "epoch": 0.9322950164264979, "grad_norm": 2.713205928484724, "learning_rate": 1.1974732578480806e-08, "loss": 0.1155, "step": 53634 }, { "epoch": 0.9323123989640008, "grad_norm": 0.9764379624063041, "learning_rate": 1.1968609635068627e-08, "loss": 0.1476, "step": 53635 }, { "epoch": 0.9323297815015036, "grad_norm": 0.9983674859201814, "learning_rate": 1.1962488238491774e-08, "loss": 0.1569, "step": 53636 }, { "epoch": 0.9323471640390064, "grad_norm": 0.9386697848484682, "learning_rate": 1.1956368388769622e-08, "loss": 0.1522, "step": 53637 }, { "epoch": 0.9323645465765092, "grad_norm": 1.3139187232982483, "learning_rate": 1.1950250085921543e-08, "loss": 0.2045, "step": 53638 }, { "epoch": 0.9323819291140121, "grad_norm": 3.191089690245391, "learning_rate": 1.1944133329967076e-08, "loss": 0.1901, "step": 53639 }, { "epoch": 0.9323993116515149, "grad_norm": 2.0690146601040666, "learning_rate": 1.1938018120925486e-08, "loss": 0.1616, "step": 53640 }, { "epoch": 0.9324166941890177, "grad_norm": 1.5138186742508764, "learning_rate": 1.1931904458816145e-08, "loss": 0.1858, "step": 53641 }, { "epoch": 0.9324340767265206, "grad_norm": 1.3511220095024974, "learning_rate": 1.1925792343658537e-08, "loss": 0.1212, "step": 53642 }, { "epoch": 0.9324514592640234, "grad_norm": 2.0167407207085737, "learning_rate": 1.1919681775471924e-08, "loss": 0.1669, "step": 53643 }, { "epoch": 0.9324688418015262, "grad_norm": 1.251303588339268, "learning_rate": 1.191357275427568e-08, "loss": 0.2225, "step": 53644 }, { "epoch": 0.9324862243390291, "grad_norm": 0.9897959360545665, "learning_rate": 1.1907465280089235e-08, "loss": 0.133, "step": 53645 }, { "epoch": 0.9325036068765319, "grad_norm": 1.5739956076655899, "learning_rate": 1.1901359352931905e-08, "loss": 0.2275, "step": 53646 }, { "epoch": 0.9325209894140346, "grad_norm": 1.384310525922723, "learning_rate": 1.1895254972823122e-08, "loss": 0.1237, "step": 53647 }, { "epoch": 0.9325383719515374, "grad_norm": 2.4353820565740025, "learning_rate": 1.1889152139782089e-08, "loss": 0.1711, "step": 53648 }, { "epoch": 0.9325557544890403, "grad_norm": 2.9458016670535208, "learning_rate": 1.188305085382818e-08, "loss": 0.1704, "step": 53649 }, { "epoch": 0.9325731370265431, "grad_norm": 0.8005558289895719, "learning_rate": 1.1876951114980827e-08, "loss": 0.1717, "step": 53650 }, { "epoch": 0.9325905195640459, "grad_norm": 1.6486741005006407, "learning_rate": 1.1870852923259345e-08, "loss": 0.116, "step": 53651 }, { "epoch": 0.9326079021015488, "grad_norm": 1.5304491661761068, "learning_rate": 1.1864756278682997e-08, "loss": 0.1377, "step": 53652 }, { "epoch": 0.9326252846390516, "grad_norm": 0.9834501538976629, "learning_rate": 1.1858661181271157e-08, "loss": 0.2166, "step": 53653 }, { "epoch": 0.9326426671765544, "grad_norm": 0.9955071060663496, "learning_rate": 1.1852567631043142e-08, "loss": 0.175, "step": 53654 }, { "epoch": 0.9326600497140572, "grad_norm": 2.6630008652308796, "learning_rate": 1.1846475628018215e-08, "loss": 0.2836, "step": 53655 }, { "epoch": 0.9326774322515601, "grad_norm": 5.150587299569474, "learning_rate": 1.184038517221575e-08, "loss": 0.1828, "step": 53656 }, { "epoch": 0.9326948147890629, "grad_norm": 1.284259421001974, "learning_rate": 1.1834296263654953e-08, "loss": 0.2181, "step": 53657 }, { "epoch": 0.9327121973265657, "grad_norm": 1.0651016731395655, "learning_rate": 1.1828208902355252e-08, "loss": 0.1196, "step": 53658 }, { "epoch": 0.9327295798640686, "grad_norm": 1.3625195371392878, "learning_rate": 1.182212308833591e-08, "loss": 0.1901, "step": 53659 }, { "epoch": 0.9327469624015714, "grad_norm": 2.3476466355517425, "learning_rate": 1.1816038821616137e-08, "loss": 0.3021, "step": 53660 }, { "epoch": 0.9327643449390742, "grad_norm": 1.3286859489578804, "learning_rate": 1.1809956102215301e-08, "loss": 0.1507, "step": 53661 }, { "epoch": 0.9327817274765771, "grad_norm": 3.4628995138542717, "learning_rate": 1.1803874930152614e-08, "loss": 0.1367, "step": 53662 }, { "epoch": 0.9327991100140799, "grad_norm": 2.4597714925580045, "learning_rate": 1.1797795305447388e-08, "loss": 0.1594, "step": 53663 }, { "epoch": 0.9328164925515827, "grad_norm": 1.6447162545888672, "learning_rate": 1.1791717228118948e-08, "loss": 0.136, "step": 53664 }, { "epoch": 0.9328338750890856, "grad_norm": 2.1801682066528825, "learning_rate": 1.1785640698186383e-08, "loss": 0.1831, "step": 53665 }, { "epoch": 0.9328512576265884, "grad_norm": 1.611563197703838, "learning_rate": 1.1779565715669182e-08, "loss": 0.2335, "step": 53666 }, { "epoch": 0.9328686401640911, "grad_norm": 0.7935834262035447, "learning_rate": 1.1773492280586495e-08, "loss": 0.217, "step": 53667 }, { "epoch": 0.9328860227015939, "grad_norm": 1.490318159767501, "learning_rate": 1.1767420392957472e-08, "loss": 0.4098, "step": 53668 }, { "epoch": 0.9329034052390968, "grad_norm": 1.691783282788634, "learning_rate": 1.1761350052801489e-08, "loss": 0.2117, "step": 53669 }, { "epoch": 0.9329207877765996, "grad_norm": 2.0268668523146496, "learning_rate": 1.175528126013775e-08, "loss": 0.1552, "step": 53670 }, { "epoch": 0.9329381703141024, "grad_norm": 1.2834759017900361, "learning_rate": 1.1749214014985464e-08, "loss": 0.1594, "step": 53671 }, { "epoch": 0.9329555528516053, "grad_norm": 1.2030036609101527, "learning_rate": 1.174314831736395e-08, "loss": 0.1666, "step": 53672 }, { "epoch": 0.9329729353891081, "grad_norm": 2.4734315006622305, "learning_rate": 1.1737084167292299e-08, "loss": 0.2289, "step": 53673 }, { "epoch": 0.9329903179266109, "grad_norm": 1.7404847923474585, "learning_rate": 1.1731021564789889e-08, "loss": 0.1677, "step": 53674 }, { "epoch": 0.9330077004641137, "grad_norm": 1.7661213057954173, "learning_rate": 1.1724960509875759e-08, "loss": 0.2473, "step": 53675 }, { "epoch": 0.9330250830016166, "grad_norm": 1.2879371136520363, "learning_rate": 1.1718901002569225e-08, "loss": 0.2946, "step": 53676 }, { "epoch": 0.9330424655391194, "grad_norm": 1.3032742141289424, "learning_rate": 1.1712843042889497e-08, "loss": 0.1859, "step": 53677 }, { "epoch": 0.9330598480766222, "grad_norm": 1.8260573019161734, "learning_rate": 1.1706786630855724e-08, "loss": 0.166, "step": 53678 }, { "epoch": 0.9330772306141251, "grad_norm": 1.287671727700063, "learning_rate": 1.1700731766487226e-08, "loss": 0.2114, "step": 53679 }, { "epoch": 0.9330946131516279, "grad_norm": 1.3988510394937874, "learning_rate": 1.1694678449802986e-08, "loss": 0.2675, "step": 53680 }, { "epoch": 0.9331119956891307, "grad_norm": 2.4448109216955474, "learning_rate": 1.1688626680822378e-08, "loss": 0.2001, "step": 53681 }, { "epoch": 0.9331293782266336, "grad_norm": 1.8933294043084077, "learning_rate": 1.1682576459564497e-08, "loss": 0.1668, "step": 53682 }, { "epoch": 0.9331467607641364, "grad_norm": 1.5048200091226418, "learning_rate": 1.1676527786048551e-08, "loss": 0.139, "step": 53683 }, { "epoch": 0.9331641433016392, "grad_norm": 0.9927338355178236, "learning_rate": 1.1670480660293636e-08, "loss": 0.1074, "step": 53684 }, { "epoch": 0.933181525839142, "grad_norm": 0.9188140700124581, "learning_rate": 1.1664435082319068e-08, "loss": 0.1284, "step": 53685 }, { "epoch": 0.9331989083766449, "grad_norm": 1.0210787149662475, "learning_rate": 1.1658391052143834e-08, "loss": 0.1583, "step": 53686 }, { "epoch": 0.9332162909141476, "grad_norm": 0.8484296345871669, "learning_rate": 1.165234856978725e-08, "loss": 0.1935, "step": 53687 }, { "epoch": 0.9332336734516504, "grad_norm": 1.7107056938600624, "learning_rate": 1.1646307635268304e-08, "loss": 0.2333, "step": 53688 }, { "epoch": 0.9332510559891533, "grad_norm": 1.6721892464712282, "learning_rate": 1.1640268248606256e-08, "loss": 0.2248, "step": 53689 }, { "epoch": 0.9332684385266561, "grad_norm": 1.546932902386997, "learning_rate": 1.1634230409820256e-08, "loss": 0.1533, "step": 53690 }, { "epoch": 0.9332858210641589, "grad_norm": 1.5574451115441184, "learning_rate": 1.1628194118929402e-08, "loss": 0.1792, "step": 53691 }, { "epoch": 0.9333032036016617, "grad_norm": 2.0642951890142047, "learning_rate": 1.1622159375952845e-08, "loss": 0.1842, "step": 53692 }, { "epoch": 0.9333205861391646, "grad_norm": 1.237319737518221, "learning_rate": 1.161612618090968e-08, "loss": 0.2005, "step": 53693 }, { "epoch": 0.9333379686766674, "grad_norm": 1.4224459877269455, "learning_rate": 1.1610094533819059e-08, "loss": 0.2019, "step": 53694 }, { "epoch": 0.9333553512141702, "grad_norm": 2.691636038660821, "learning_rate": 1.1604064434700078e-08, "loss": 0.2954, "step": 53695 }, { "epoch": 0.9333727337516731, "grad_norm": 1.1605047522462615, "learning_rate": 1.1598035883571833e-08, "loss": 0.2286, "step": 53696 }, { "epoch": 0.9333901162891759, "grad_norm": 1.535432579861075, "learning_rate": 1.1592008880453474e-08, "loss": 0.1831, "step": 53697 }, { "epoch": 0.9334074988266787, "grad_norm": 1.193009334689672, "learning_rate": 1.1585983425364153e-08, "loss": 0.1665, "step": 53698 }, { "epoch": 0.9334248813641816, "grad_norm": 1.2376908219705294, "learning_rate": 1.1579959518322802e-08, "loss": 0.1477, "step": 53699 }, { "epoch": 0.9334422639016844, "grad_norm": 1.356432893528642, "learning_rate": 1.1573937159348735e-08, "loss": 0.1598, "step": 53700 }, { "epoch": 0.9334596464391872, "grad_norm": 2.7008124296945915, "learning_rate": 1.1567916348460827e-08, "loss": 0.2069, "step": 53701 }, { "epoch": 0.93347702897669, "grad_norm": 1.9793160558955316, "learning_rate": 1.156189708567834e-08, "loss": 0.2077, "step": 53702 }, { "epoch": 0.9334944115141929, "grad_norm": 1.1520581571007942, "learning_rate": 1.1555879371020204e-08, "loss": 0.1583, "step": 53703 }, { "epoch": 0.9335117940516957, "grad_norm": 0.9393418688992847, "learning_rate": 1.1549863204505572e-08, "loss": 0.1471, "step": 53704 }, { "epoch": 0.9335291765891985, "grad_norm": 1.1768113135146747, "learning_rate": 1.1543848586153538e-08, "loss": 0.148, "step": 53705 }, { "epoch": 0.9335465591267014, "grad_norm": 1.2654781227920502, "learning_rate": 1.1537835515983086e-08, "loss": 0.1253, "step": 53706 }, { "epoch": 0.9335639416642041, "grad_norm": 1.6523457459415942, "learning_rate": 1.1531823994013256e-08, "loss": 0.2776, "step": 53707 }, { "epoch": 0.9335813242017069, "grad_norm": 2.199959854500022, "learning_rate": 1.1525814020263259e-08, "loss": 0.2222, "step": 53708 }, { "epoch": 0.9335987067392097, "grad_norm": 1.2981232008710553, "learning_rate": 1.151980559475202e-08, "loss": 0.1433, "step": 53709 }, { "epoch": 0.9336160892767126, "grad_norm": 1.112796786292504, "learning_rate": 1.1513798717498635e-08, "loss": 0.2045, "step": 53710 }, { "epoch": 0.9336334718142154, "grad_norm": 1.1826079189339105, "learning_rate": 1.1507793388522035e-08, "loss": 0.2404, "step": 53711 }, { "epoch": 0.9336508543517182, "grad_norm": 2.8120844804069143, "learning_rate": 1.1501789607841373e-08, "loss": 0.2431, "step": 53712 }, { "epoch": 0.9336682368892211, "grad_norm": 0.9110947186876319, "learning_rate": 1.1495787375475686e-08, "loss": 0.2583, "step": 53713 }, { "epoch": 0.9336856194267239, "grad_norm": 1.2728799724750806, "learning_rate": 1.1489786691443904e-08, "loss": 0.1266, "step": 53714 }, { "epoch": 0.9337030019642267, "grad_norm": 1.4146374140369253, "learning_rate": 1.1483787555765068e-08, "loss": 0.2262, "step": 53715 }, { "epoch": 0.9337203845017296, "grad_norm": 1.7321973558867418, "learning_rate": 1.1477789968458218e-08, "loss": 0.2506, "step": 53716 }, { "epoch": 0.9337377670392324, "grad_norm": 2.828602748757532, "learning_rate": 1.1471793929542506e-08, "loss": 0.174, "step": 53717 }, { "epoch": 0.9337551495767352, "grad_norm": 3.1704987227124994, "learning_rate": 1.1465799439036693e-08, "loss": 0.1766, "step": 53718 }, { "epoch": 0.933772532114238, "grad_norm": 2.161741057547529, "learning_rate": 1.1459806496959823e-08, "loss": 0.2201, "step": 53719 }, { "epoch": 0.9337899146517409, "grad_norm": 0.8611118646982764, "learning_rate": 1.1453815103330989e-08, "loss": 0.1273, "step": 53720 }, { "epoch": 0.9338072971892437, "grad_norm": 1.1138173620293204, "learning_rate": 1.1447825258169175e-08, "loss": 0.1737, "step": 53721 }, { "epoch": 0.9338246797267465, "grad_norm": 3.182525198214611, "learning_rate": 1.1441836961493312e-08, "loss": 0.1222, "step": 53722 }, { "epoch": 0.9338420622642494, "grad_norm": 1.0621505624657237, "learning_rate": 1.143585021332244e-08, "loss": 0.1846, "step": 53723 }, { "epoch": 0.9338594448017522, "grad_norm": 1.2324740021326084, "learning_rate": 1.1429865013675433e-08, "loss": 0.2846, "step": 53724 }, { "epoch": 0.933876827339255, "grad_norm": 0.9374567786476392, "learning_rate": 1.1423881362571386e-08, "loss": 0.0913, "step": 53725 }, { "epoch": 0.9338942098767579, "grad_norm": 0.8244586698009105, "learning_rate": 1.1417899260029173e-08, "loss": 0.1363, "step": 53726 }, { "epoch": 0.9339115924142606, "grad_norm": 1.8674789884121252, "learning_rate": 1.1411918706067724e-08, "loss": 0.1575, "step": 53727 }, { "epoch": 0.9339289749517634, "grad_norm": 2.395283247748534, "learning_rate": 1.140593970070608e-08, "loss": 0.2507, "step": 53728 }, { "epoch": 0.9339463574892662, "grad_norm": 1.373928686809319, "learning_rate": 1.1399962243963279e-08, "loss": 0.2066, "step": 53729 }, { "epoch": 0.9339637400267691, "grad_norm": 1.209547103264044, "learning_rate": 1.139398633585803e-08, "loss": 0.217, "step": 53730 }, { "epoch": 0.9339811225642719, "grad_norm": 3.7541279674009567, "learning_rate": 1.1388011976409429e-08, "loss": 0.2816, "step": 53731 }, { "epoch": 0.9339985051017747, "grad_norm": 1.4359444100696415, "learning_rate": 1.1382039165636348e-08, "loss": 0.1877, "step": 53732 }, { "epoch": 0.9340158876392776, "grad_norm": 1.419898420501994, "learning_rate": 1.1376067903557828e-08, "loss": 0.1737, "step": 53733 }, { "epoch": 0.9340332701767804, "grad_norm": 1.1938738508692703, "learning_rate": 1.1370098190192634e-08, "loss": 0.1467, "step": 53734 }, { "epoch": 0.9340506527142832, "grad_norm": 2.2069534441947303, "learning_rate": 1.136413002555986e-08, "loss": 0.1685, "step": 53735 }, { "epoch": 0.934068035251786, "grad_norm": 1.067129229688127, "learning_rate": 1.1358163409678267e-08, "loss": 0.1458, "step": 53736 }, { "epoch": 0.9340854177892889, "grad_norm": 1.272993456901827, "learning_rate": 1.1352198342566843e-08, "loss": 0.1801, "step": 53737 }, { "epoch": 0.9341028003267917, "grad_norm": 0.6896524804023698, "learning_rate": 1.134623482424446e-08, "loss": 0.1633, "step": 53738 }, { "epoch": 0.9341201828642945, "grad_norm": 1.0931846742914657, "learning_rate": 1.1340272854730104e-08, "loss": 0.1579, "step": 53739 }, { "epoch": 0.9341375654017974, "grad_norm": 2.242739265844852, "learning_rate": 1.1334312434042593e-08, "loss": 0.2637, "step": 53740 }, { "epoch": 0.9341549479393002, "grad_norm": 1.3977049013784435, "learning_rate": 1.1328353562200798e-08, "loss": 0.2319, "step": 53741 }, { "epoch": 0.934172330476803, "grad_norm": 1.0747641594787238, "learning_rate": 1.1322396239223708e-08, "loss": 0.1364, "step": 53742 }, { "epoch": 0.9341897130143059, "grad_norm": 1.894085133889713, "learning_rate": 1.1316440465130084e-08, "loss": 0.1771, "step": 53743 }, { "epoch": 0.9342070955518087, "grad_norm": 1.5507039587233042, "learning_rate": 1.131048623993891e-08, "loss": 0.1646, "step": 53744 }, { "epoch": 0.9342244780893115, "grad_norm": 1.0236562697737264, "learning_rate": 1.130453356366895e-08, "loss": 0.144, "step": 53745 }, { "epoch": 0.9342418606268144, "grad_norm": 1.2935234438347807, "learning_rate": 1.1298582436339133e-08, "loss": 0.1346, "step": 53746 }, { "epoch": 0.9342592431643171, "grad_norm": 1.4365306322696356, "learning_rate": 1.1292632857968332e-08, "loss": 0.2346, "step": 53747 }, { "epoch": 0.9342766257018199, "grad_norm": 2.7124000567216733, "learning_rate": 1.1286684828575476e-08, "loss": 0.2389, "step": 53748 }, { "epoch": 0.9342940082393227, "grad_norm": 1.2039516251302682, "learning_rate": 1.128073834817922e-08, "loss": 0.1355, "step": 53749 }, { "epoch": 0.9343113907768256, "grad_norm": 1.3815444937548975, "learning_rate": 1.1274793416798545e-08, "loss": 0.3622, "step": 53750 }, { "epoch": 0.9343287733143284, "grad_norm": 2.0528050354362595, "learning_rate": 1.1268850034452326e-08, "loss": 0.202, "step": 53751 }, { "epoch": 0.9343461558518312, "grad_norm": 1.597065122709259, "learning_rate": 1.1262908201159327e-08, "loss": 0.1417, "step": 53752 }, { "epoch": 0.9343635383893341, "grad_norm": 1.7163107228780206, "learning_rate": 1.1256967916938365e-08, "loss": 0.2562, "step": 53753 }, { "epoch": 0.9343809209268369, "grad_norm": 1.7342945650892527, "learning_rate": 1.125102918180837e-08, "loss": 0.2958, "step": 53754 }, { "epoch": 0.9343983034643397, "grad_norm": 1.4873392579407019, "learning_rate": 1.1245091995788048e-08, "loss": 0.1916, "step": 53755 }, { "epoch": 0.9344156860018425, "grad_norm": 1.1956077334983244, "learning_rate": 1.1239156358896273e-08, "loss": 0.1462, "step": 53756 }, { "epoch": 0.9344330685393454, "grad_norm": 2.0839134814165137, "learning_rate": 1.123322227115181e-08, "loss": 0.2007, "step": 53757 }, { "epoch": 0.9344504510768482, "grad_norm": 0.9693685572939168, "learning_rate": 1.1227289732573587e-08, "loss": 0.3274, "step": 53758 }, { "epoch": 0.934467833614351, "grad_norm": 1.6808330776700267, "learning_rate": 1.1221358743180309e-08, "loss": 0.1867, "step": 53759 }, { "epoch": 0.9344852161518539, "grad_norm": 1.4060666268319566, "learning_rate": 1.1215429302990798e-08, "loss": 0.1859, "step": 53760 }, { "epoch": 0.9345025986893567, "grad_norm": 1.1664288283895135, "learning_rate": 1.120950141202387e-08, "loss": 0.1855, "step": 53761 }, { "epoch": 0.9345199812268595, "grad_norm": 1.3450909459288496, "learning_rate": 1.1203575070298288e-08, "loss": 0.1594, "step": 53762 }, { "epoch": 0.9345373637643624, "grad_norm": 0.7700117557980981, "learning_rate": 1.1197650277832815e-08, "loss": 0.165, "step": 53763 }, { "epoch": 0.9345547463018652, "grad_norm": 1.0658814185380907, "learning_rate": 1.1191727034646214e-08, "loss": 0.1993, "step": 53764 }, { "epoch": 0.934572128839368, "grad_norm": 1.2521172098329216, "learning_rate": 1.1185805340757304e-08, "loss": 0.363, "step": 53765 }, { "epoch": 0.9345895113768707, "grad_norm": 1.2266325288619206, "learning_rate": 1.1179885196184902e-08, "loss": 0.1388, "step": 53766 }, { "epoch": 0.9346068939143736, "grad_norm": 1.4043247919012125, "learning_rate": 1.1173966600947771e-08, "loss": 0.2119, "step": 53767 }, { "epoch": 0.9346242764518764, "grad_norm": 1.2465029887451111, "learning_rate": 1.1168049555064507e-08, "loss": 0.1424, "step": 53768 }, { "epoch": 0.9346416589893792, "grad_norm": 1.089424670584347, "learning_rate": 1.1162134058553985e-08, "loss": 0.1182, "step": 53769 }, { "epoch": 0.9346590415268821, "grad_norm": 1.513499269001945, "learning_rate": 1.1156220111434966e-08, "loss": 0.2302, "step": 53770 }, { "epoch": 0.9346764240643849, "grad_norm": 1.0337346873042779, "learning_rate": 1.1150307713726215e-08, "loss": 0.1803, "step": 53771 }, { "epoch": 0.9346938066018877, "grad_norm": 1.145023525042811, "learning_rate": 1.1144396865446382e-08, "loss": 0.1429, "step": 53772 }, { "epoch": 0.9347111891393906, "grad_norm": 1.3878072356491304, "learning_rate": 1.1138487566614286e-08, "loss": 0.2465, "step": 53773 }, { "epoch": 0.9347285716768934, "grad_norm": 0.9511682243859244, "learning_rate": 1.1132579817248578e-08, "loss": 0.1326, "step": 53774 }, { "epoch": 0.9347459542143962, "grad_norm": 1.3906153973547453, "learning_rate": 1.1126673617368021e-08, "loss": 0.2041, "step": 53775 }, { "epoch": 0.934763336751899, "grad_norm": 0.8175434619817975, "learning_rate": 1.1120768966991379e-08, "loss": 0.1976, "step": 53776 }, { "epoch": 0.9347807192894019, "grad_norm": 1.398351774448889, "learning_rate": 1.1114865866137301e-08, "loss": 0.3094, "step": 53777 }, { "epoch": 0.9347981018269047, "grad_norm": 1.4925759133066137, "learning_rate": 1.1108964314824498e-08, "loss": 0.1523, "step": 53778 }, { "epoch": 0.9348154843644075, "grad_norm": 1.7134369546503365, "learning_rate": 1.1103064313071731e-08, "loss": 0.3175, "step": 53779 }, { "epoch": 0.9348328669019104, "grad_norm": 1.3614706277297384, "learning_rate": 1.1097165860897707e-08, "loss": 0.2207, "step": 53780 }, { "epoch": 0.9348502494394132, "grad_norm": 1.1924370397622708, "learning_rate": 1.109126895832102e-08, "loss": 0.1671, "step": 53781 }, { "epoch": 0.934867631976916, "grad_norm": 1.733819540838476, "learning_rate": 1.1085373605360438e-08, "loss": 0.1823, "step": 53782 }, { "epoch": 0.9348850145144189, "grad_norm": 0.9614186441027792, "learning_rate": 1.1079479802034664e-08, "loss": 0.1865, "step": 53783 }, { "epoch": 0.9349023970519217, "grad_norm": 3.6854520506432284, "learning_rate": 1.1073587548362352e-08, "loss": 0.264, "step": 53784 }, { "epoch": 0.9349197795894245, "grad_norm": 1.2210979491894607, "learning_rate": 1.1067696844362096e-08, "loss": 0.1249, "step": 53785 }, { "epoch": 0.9349371621269272, "grad_norm": 1.4089701008480593, "learning_rate": 1.1061807690052716e-08, "loss": 0.1965, "step": 53786 }, { "epoch": 0.9349545446644301, "grad_norm": 1.5258756654434311, "learning_rate": 1.1055920085452807e-08, "loss": 0.2187, "step": 53787 }, { "epoch": 0.9349719272019329, "grad_norm": 1.5446764430452355, "learning_rate": 1.1050034030580969e-08, "loss": 0.2132, "step": 53788 }, { "epoch": 0.9349893097394357, "grad_norm": 1.9818519048018268, "learning_rate": 1.104414952545596e-08, "loss": 0.1615, "step": 53789 }, { "epoch": 0.9350066922769386, "grad_norm": 2.805349070724403, "learning_rate": 1.1038266570096377e-08, "loss": 0.2818, "step": 53790 }, { "epoch": 0.9350240748144414, "grad_norm": 0.7402225237993073, "learning_rate": 1.103238516452093e-08, "loss": 0.2009, "step": 53791 }, { "epoch": 0.9350414573519442, "grad_norm": 1.1811823712243423, "learning_rate": 1.1026505308748157e-08, "loss": 0.1394, "step": 53792 }, { "epoch": 0.935058839889447, "grad_norm": 1.2577217163708208, "learning_rate": 1.1020627002796767e-08, "loss": 0.2699, "step": 53793 }, { "epoch": 0.9350762224269499, "grad_norm": 1.1076390321908565, "learning_rate": 1.1014750246685355e-08, "loss": 0.1203, "step": 53794 }, { "epoch": 0.9350936049644527, "grad_norm": 1.248518788090091, "learning_rate": 1.1008875040432574e-08, "loss": 0.1388, "step": 53795 }, { "epoch": 0.9351109875019555, "grad_norm": 1.9461342532422783, "learning_rate": 1.1003001384056965e-08, "loss": 0.1519, "step": 53796 }, { "epoch": 0.9351283700394584, "grad_norm": 0.862651955580533, "learning_rate": 1.0997129277577288e-08, "loss": 0.2334, "step": 53797 }, { "epoch": 0.9351457525769612, "grad_norm": 1.6890093680300862, "learning_rate": 1.0991258721012087e-08, "loss": 0.2452, "step": 53798 }, { "epoch": 0.935163135114464, "grad_norm": 1.078965115259572, "learning_rate": 1.0985389714380011e-08, "loss": 0.2241, "step": 53799 }, { "epoch": 0.9351805176519669, "grad_norm": 1.7146963323827873, "learning_rate": 1.0979522257699492e-08, "loss": 0.1393, "step": 53800 }, { "epoch": 0.9351979001894697, "grad_norm": 1.732123350118604, "learning_rate": 1.0973656350989291e-08, "loss": 0.1986, "step": 53801 }, { "epoch": 0.9352152827269725, "grad_norm": 1.3775507488000691, "learning_rate": 1.0967791994268005e-08, "loss": 0.2202, "step": 53802 }, { "epoch": 0.9352326652644753, "grad_norm": 1.5215457854613075, "learning_rate": 1.096192918755412e-08, "loss": 0.1312, "step": 53803 }, { "epoch": 0.9352500478019782, "grad_norm": 0.8709871250330107, "learning_rate": 1.0956067930866287e-08, "loss": 0.1676, "step": 53804 }, { "epoch": 0.935267430339481, "grad_norm": 1.840041109777266, "learning_rate": 1.0950208224223156e-08, "loss": 0.2436, "step": 53805 }, { "epoch": 0.9352848128769837, "grad_norm": 1.6952380633755257, "learning_rate": 1.0944350067643104e-08, "loss": 0.1525, "step": 53806 }, { "epoch": 0.9353021954144866, "grad_norm": 1.0392798601569493, "learning_rate": 1.0938493461144893e-08, "loss": 0.1631, "step": 53807 }, { "epoch": 0.9353195779519894, "grad_norm": 1.7355711626705337, "learning_rate": 1.0932638404746897e-08, "loss": 0.2344, "step": 53808 }, { "epoch": 0.9353369604894922, "grad_norm": 0.8619135745262267, "learning_rate": 1.0926784898467822e-08, "loss": 0.1985, "step": 53809 }, { "epoch": 0.935354343026995, "grad_norm": 3.1656996308807814, "learning_rate": 1.092093294232621e-08, "loss": 0.191, "step": 53810 }, { "epoch": 0.9353717255644979, "grad_norm": 1.3172088041677628, "learning_rate": 1.0915082536340548e-08, "loss": 0.1908, "step": 53811 }, { "epoch": 0.9353891081020007, "grad_norm": 2.4589700684390645, "learning_rate": 1.0909233680529428e-08, "loss": 0.2528, "step": 53812 }, { "epoch": 0.9354064906395035, "grad_norm": 1.443960267797629, "learning_rate": 1.0903386374911338e-08, "loss": 0.227, "step": 53813 }, { "epoch": 0.9354238731770064, "grad_norm": 1.5464324048391815, "learning_rate": 1.0897540619504874e-08, "loss": 0.4343, "step": 53814 }, { "epoch": 0.9354412557145092, "grad_norm": 1.4364903381807694, "learning_rate": 1.0891696414328522e-08, "loss": 0.2789, "step": 53815 }, { "epoch": 0.935458638252012, "grad_norm": 1.7419519497452003, "learning_rate": 1.0885853759400765e-08, "loss": 0.2019, "step": 53816 }, { "epoch": 0.9354760207895149, "grad_norm": 1.3854274716130985, "learning_rate": 1.0880012654740256e-08, "loss": 0.1813, "step": 53817 }, { "epoch": 0.9354934033270177, "grad_norm": 0.8453611571264704, "learning_rate": 1.0874173100365424e-08, "loss": 0.2428, "step": 53818 }, { "epoch": 0.9355107858645205, "grad_norm": 0.9686183556660595, "learning_rate": 1.08683350962947e-08, "loss": 0.234, "step": 53819 }, { "epoch": 0.9355281684020234, "grad_norm": 0.7951628081456734, "learning_rate": 1.086249864254668e-08, "loss": 0.1376, "step": 53820 }, { "epoch": 0.9355455509395262, "grad_norm": 2.0605441833006775, "learning_rate": 1.0856663739139849e-08, "loss": 0.1984, "step": 53821 }, { "epoch": 0.935562933477029, "grad_norm": 0.9139063755437397, "learning_rate": 1.0850830386092746e-08, "loss": 0.108, "step": 53822 }, { "epoch": 0.9355803160145318, "grad_norm": 1.2185718531928147, "learning_rate": 1.0844998583423747e-08, "loss": 0.3268, "step": 53823 }, { "epoch": 0.9355976985520347, "grad_norm": 1.2821885321428945, "learning_rate": 1.0839168331151505e-08, "loss": 0.1443, "step": 53824 }, { "epoch": 0.9356150810895375, "grad_norm": 1.297501942607565, "learning_rate": 1.0833339629294336e-08, "loss": 0.185, "step": 53825 }, { "epoch": 0.9356324636270402, "grad_norm": 1.641780483152512, "learning_rate": 1.0827512477870782e-08, "loss": 0.1948, "step": 53826 }, { "epoch": 0.935649846164543, "grad_norm": 1.236161055541334, "learning_rate": 1.0821686876899217e-08, "loss": 0.1344, "step": 53827 }, { "epoch": 0.9356672287020459, "grad_norm": 2.0806466206500334, "learning_rate": 1.0815862826398291e-08, "loss": 0.3131, "step": 53828 }, { "epoch": 0.9356846112395487, "grad_norm": 4.352212782555039, "learning_rate": 1.0810040326386383e-08, "loss": 0.2715, "step": 53829 }, { "epoch": 0.9357019937770515, "grad_norm": 1.1048214189247056, "learning_rate": 1.0804219376881863e-08, "loss": 0.1368, "step": 53830 }, { "epoch": 0.9357193763145544, "grad_norm": 1.0793930782272558, "learning_rate": 1.0798399977903328e-08, "loss": 0.2228, "step": 53831 }, { "epoch": 0.9357367588520572, "grad_norm": 1.1562601263479102, "learning_rate": 1.0792582129469097e-08, "loss": 0.2057, "step": 53832 }, { "epoch": 0.93575414138956, "grad_norm": 1.0965626634133339, "learning_rate": 1.0786765831597655e-08, "loss": 0.2953, "step": 53833 }, { "epoch": 0.9357715239270629, "grad_norm": 0.9871659111499139, "learning_rate": 1.0780951084307433e-08, "loss": 0.1911, "step": 53834 }, { "epoch": 0.9357889064645657, "grad_norm": 1.205832276773973, "learning_rate": 1.0775137887616858e-08, "loss": 0.1475, "step": 53835 }, { "epoch": 0.9358062890020685, "grad_norm": 1.9983529110721343, "learning_rate": 1.0769326241544418e-08, "loss": 0.1942, "step": 53836 }, { "epoch": 0.9358236715395714, "grad_norm": 1.5393837981951146, "learning_rate": 1.0763516146108486e-08, "loss": 0.2223, "step": 53837 }, { "epoch": 0.9358410540770742, "grad_norm": 1.1753842982893057, "learning_rate": 1.0757707601327382e-08, "loss": 0.1477, "step": 53838 }, { "epoch": 0.935858436614577, "grad_norm": 0.7997514080940099, "learning_rate": 1.0751900607219645e-08, "loss": 0.1785, "step": 53839 }, { "epoch": 0.9358758191520798, "grad_norm": 1.438314319654946, "learning_rate": 1.0746095163803703e-08, "loss": 0.1379, "step": 53840 }, { "epoch": 0.9358932016895827, "grad_norm": 1.689697535834963, "learning_rate": 1.0740291271097824e-08, "loss": 0.1429, "step": 53841 }, { "epoch": 0.9359105842270855, "grad_norm": 1.2795519577989551, "learning_rate": 1.0734488929120489e-08, "loss": 0.1677, "step": 53842 }, { "epoch": 0.9359279667645883, "grad_norm": 1.0904834273959971, "learning_rate": 1.0728688137890075e-08, "loss": 0.1834, "step": 53843 }, { "epoch": 0.9359453493020912, "grad_norm": 1.785734149355635, "learning_rate": 1.0722888897425009e-08, "loss": 0.2551, "step": 53844 }, { "epoch": 0.935962731839594, "grad_norm": 0.9428729489212456, "learning_rate": 1.0717091207743556e-08, "loss": 0.1488, "step": 53845 }, { "epoch": 0.9359801143770967, "grad_norm": 1.2314068908142182, "learning_rate": 1.0711295068864201e-08, "loss": 0.1981, "step": 53846 }, { "epoch": 0.9359974969145995, "grad_norm": 1.2340045256788195, "learning_rate": 1.0705500480805264e-08, "loss": 0.1083, "step": 53847 }, { "epoch": 0.9360148794521024, "grad_norm": 2.1035510988876123, "learning_rate": 1.0699707443585171e-08, "loss": 0.1579, "step": 53848 }, { "epoch": 0.9360322619896052, "grad_norm": 1.520231363182609, "learning_rate": 1.0693915957222243e-08, "loss": 0.119, "step": 53849 }, { "epoch": 0.936049644527108, "grad_norm": 1.3868196273297666, "learning_rate": 1.0688126021734745e-08, "loss": 0.1817, "step": 53850 }, { "epoch": 0.9360670270646109, "grad_norm": 1.360294669972935, "learning_rate": 1.0682337637141215e-08, "loss": 0.1953, "step": 53851 }, { "epoch": 0.9360844096021137, "grad_norm": 1.2224588876736342, "learning_rate": 1.0676550803459861e-08, "loss": 0.1819, "step": 53852 }, { "epoch": 0.9361017921396165, "grad_norm": 2.164482169311234, "learning_rate": 1.0670765520709057e-08, "loss": 0.1958, "step": 53853 }, { "epoch": 0.9361191746771194, "grad_norm": 1.171487176973139, "learning_rate": 1.0664981788907123e-08, "loss": 0.1709, "step": 53854 }, { "epoch": 0.9361365572146222, "grad_norm": 1.2902728752241002, "learning_rate": 1.0659199608072489e-08, "loss": 0.1867, "step": 53855 }, { "epoch": 0.936153939752125, "grad_norm": 2.052414501765186, "learning_rate": 1.0653418978223361e-08, "loss": 0.2309, "step": 53856 }, { "epoch": 0.9361713222896278, "grad_norm": 1.695687190974504, "learning_rate": 1.0647639899378114e-08, "loss": 0.2076, "step": 53857 }, { "epoch": 0.9361887048271307, "grad_norm": 1.6430015359732353, "learning_rate": 1.064186237155501e-08, "loss": 0.1568, "step": 53858 }, { "epoch": 0.9362060873646335, "grad_norm": 2.124636619868624, "learning_rate": 1.0636086394772425e-08, "loss": 0.148, "step": 53859 }, { "epoch": 0.9362234699021363, "grad_norm": 1.0640712742667406, "learning_rate": 1.0630311969048678e-08, "loss": 0.1214, "step": 53860 }, { "epoch": 0.9362408524396392, "grad_norm": 1.300383872913731, "learning_rate": 1.0624539094402084e-08, "loss": 0.1874, "step": 53861 }, { "epoch": 0.936258234977142, "grad_norm": 1.8810657519560647, "learning_rate": 1.0618767770850856e-08, "loss": 0.1808, "step": 53862 }, { "epoch": 0.9362756175146448, "grad_norm": 1.0785226278465063, "learning_rate": 1.0612997998413309e-08, "loss": 0.2519, "step": 53863 }, { "epoch": 0.9362930000521477, "grad_norm": 1.683330261533866, "learning_rate": 1.0607229777107819e-08, "loss": 0.1806, "step": 53864 }, { "epoch": 0.9363103825896505, "grad_norm": 2.170659342721208, "learning_rate": 1.0601463106952535e-08, "loss": 0.169, "step": 53865 }, { "epoch": 0.9363277651271532, "grad_norm": 1.759813241847947, "learning_rate": 1.0595697987965778e-08, "loss": 0.2702, "step": 53866 }, { "epoch": 0.936345147664656, "grad_norm": 3.5569852893301857, "learning_rate": 1.0589934420165925e-08, "loss": 0.2442, "step": 53867 }, { "epoch": 0.9363625302021589, "grad_norm": 3.0229162727659875, "learning_rate": 1.058417240357118e-08, "loss": 0.2628, "step": 53868 }, { "epoch": 0.9363799127396617, "grad_norm": 1.8571642565946869, "learning_rate": 1.0578411938199693e-08, "loss": 0.3607, "step": 53869 }, { "epoch": 0.9363972952771645, "grad_norm": 1.4685887048955544, "learning_rate": 1.05726530240699e-08, "loss": 0.1887, "step": 53870 }, { "epoch": 0.9364146778146674, "grad_norm": 0.7806113668808453, "learning_rate": 1.0566895661199893e-08, "loss": 0.2216, "step": 53871 }, { "epoch": 0.9364320603521702, "grad_norm": 1.0623191791554003, "learning_rate": 1.0561139849608047e-08, "loss": 0.1782, "step": 53872 }, { "epoch": 0.936449442889673, "grad_norm": 0.9556978006447959, "learning_rate": 1.0555385589312571e-08, "loss": 0.1936, "step": 53873 }, { "epoch": 0.9364668254271759, "grad_norm": 1.8981527369941256, "learning_rate": 1.0549632880331672e-08, "loss": 0.1342, "step": 53874 }, { "epoch": 0.9364842079646787, "grad_norm": 1.187155387409457, "learning_rate": 1.0543881722683612e-08, "loss": 0.1565, "step": 53875 }, { "epoch": 0.9365015905021815, "grad_norm": 1.9024812596518532, "learning_rate": 1.0538132116386655e-08, "loss": 0.1915, "step": 53876 }, { "epoch": 0.9365189730396843, "grad_norm": 0.8556328856306785, "learning_rate": 1.0532384061458842e-08, "loss": 0.1983, "step": 53877 }, { "epoch": 0.9365363555771872, "grad_norm": 1.0255512241429314, "learning_rate": 1.0526637557918661e-08, "loss": 0.1878, "step": 53878 }, { "epoch": 0.93655373811469, "grad_norm": 1.0109779593262074, "learning_rate": 1.0520892605784149e-08, "loss": 0.2458, "step": 53879 }, { "epoch": 0.9365711206521928, "grad_norm": 1.8937014002332158, "learning_rate": 1.0515149205073515e-08, "loss": 0.1434, "step": 53880 }, { "epoch": 0.9365885031896957, "grad_norm": 1.781512320594193, "learning_rate": 1.0509407355805077e-08, "loss": 0.2408, "step": 53881 }, { "epoch": 0.9366058857271985, "grad_norm": 4.155928630028073, "learning_rate": 1.0503667057996934e-08, "loss": 0.1801, "step": 53882 }, { "epoch": 0.9366232682647013, "grad_norm": 0.8498615411271163, "learning_rate": 1.0497928311667293e-08, "loss": 0.209, "step": 53883 }, { "epoch": 0.9366406508022042, "grad_norm": 1.9169762121624596, "learning_rate": 1.0492191116834414e-08, "loss": 0.1215, "step": 53884 }, { "epoch": 0.936658033339707, "grad_norm": 1.1457096310672426, "learning_rate": 1.0486455473516342e-08, "loss": 0.1747, "step": 53885 }, { "epoch": 0.9366754158772097, "grad_norm": 3.3822936721372776, "learning_rate": 1.0480721381731394e-08, "loss": 0.2111, "step": 53886 }, { "epoch": 0.9366927984147125, "grad_norm": 2.143013444124969, "learning_rate": 1.0474988841497723e-08, "loss": 0.2112, "step": 53887 }, { "epoch": 0.9367101809522154, "grad_norm": 1.789552360656234, "learning_rate": 1.046925785283348e-08, "loss": 0.1711, "step": 53888 }, { "epoch": 0.9367275634897182, "grad_norm": 1.2453459330026415, "learning_rate": 1.0463528415756706e-08, "loss": 0.1216, "step": 53889 }, { "epoch": 0.936744946027221, "grad_norm": 2.114043704209251, "learning_rate": 1.045780053028572e-08, "loss": 0.216, "step": 53890 }, { "epoch": 0.9367623285647239, "grad_norm": 0.9294311667988008, "learning_rate": 1.0452074196438677e-08, "loss": 0.1587, "step": 53891 }, { "epoch": 0.9367797111022267, "grad_norm": 0.9751576802791555, "learning_rate": 1.0446349414233668e-08, "loss": 0.1864, "step": 53892 }, { "epoch": 0.9367970936397295, "grad_norm": 0.7959224899019532, "learning_rate": 1.0440626183688795e-08, "loss": 0.1694, "step": 53893 }, { "epoch": 0.9368144761772323, "grad_norm": 1.746596377945079, "learning_rate": 1.0434904504822317e-08, "loss": 0.135, "step": 53894 }, { "epoch": 0.9368318587147352, "grad_norm": 1.2192174616247602, "learning_rate": 1.0429184377652278e-08, "loss": 0.2009, "step": 53895 }, { "epoch": 0.936849241252238, "grad_norm": 1.935429156282631, "learning_rate": 1.042346580219683e-08, "loss": 0.1656, "step": 53896 }, { "epoch": 0.9368666237897408, "grad_norm": 1.4379374086332974, "learning_rate": 1.0417748778474068e-08, "loss": 0.2227, "step": 53897 }, { "epoch": 0.9368840063272437, "grad_norm": 0.9266259531796613, "learning_rate": 1.0412033306502199e-08, "loss": 0.219, "step": 53898 }, { "epoch": 0.9369013888647465, "grad_norm": 0.9421642424329669, "learning_rate": 1.0406319386299267e-08, "loss": 0.1532, "step": 53899 }, { "epoch": 0.9369187714022493, "grad_norm": 1.2960819954043432, "learning_rate": 1.0400607017883422e-08, "loss": 0.2023, "step": 53900 }, { "epoch": 0.9369361539397522, "grad_norm": 1.2237201322447877, "learning_rate": 1.0394896201272706e-08, "loss": 0.1778, "step": 53901 }, { "epoch": 0.936953536477255, "grad_norm": 0.9112683156636664, "learning_rate": 1.0389186936485272e-08, "loss": 0.1656, "step": 53902 }, { "epoch": 0.9369709190147578, "grad_norm": 0.8603702228474492, "learning_rate": 1.038347922353927e-08, "loss": 0.1336, "step": 53903 }, { "epoch": 0.9369883015522606, "grad_norm": 1.389045802239432, "learning_rate": 1.0377773062452688e-08, "loss": 0.1721, "step": 53904 }, { "epoch": 0.9370056840897634, "grad_norm": 0.7594538493226296, "learning_rate": 1.0372068453243565e-08, "loss": 0.1781, "step": 53905 }, { "epoch": 0.9370230666272662, "grad_norm": 0.7055889333750451, "learning_rate": 1.036636539593022e-08, "loss": 0.099, "step": 53906 }, { "epoch": 0.937040449164769, "grad_norm": 1.2395396460478976, "learning_rate": 1.0360663890530474e-08, "loss": 0.1987, "step": 53907 }, { "epoch": 0.9370578317022719, "grad_norm": 3.9921482666886865, "learning_rate": 1.0354963937062533e-08, "loss": 0.2734, "step": 53908 }, { "epoch": 0.9370752142397747, "grad_norm": 1.4406333052997256, "learning_rate": 1.0349265535544438e-08, "loss": 0.1959, "step": 53909 }, { "epoch": 0.9370925967772775, "grad_norm": 1.3369920350609659, "learning_rate": 1.034356868599423e-08, "loss": 0.1309, "step": 53910 }, { "epoch": 0.9371099793147804, "grad_norm": 1.5672595829512612, "learning_rate": 1.0337873388429952e-08, "loss": 0.3248, "step": 53911 }, { "epoch": 0.9371273618522832, "grad_norm": 2.8317036613851467, "learning_rate": 1.0332179642869754e-08, "loss": 0.1494, "step": 53912 }, { "epoch": 0.937144744389786, "grad_norm": 1.0303993246634318, "learning_rate": 1.0326487449331567e-08, "loss": 0.1839, "step": 53913 }, { "epoch": 0.9371621269272888, "grad_norm": 2.8960598907522153, "learning_rate": 1.0320796807833488e-08, "loss": 0.3289, "step": 53914 }, { "epoch": 0.9371795094647917, "grad_norm": 1.0962537385188207, "learning_rate": 1.0315107718393557e-08, "loss": 0.145, "step": 53915 }, { "epoch": 0.9371968920022945, "grad_norm": 1.405322579374702, "learning_rate": 1.0309420181029704e-08, "loss": 0.1559, "step": 53916 }, { "epoch": 0.9372142745397973, "grad_norm": 0.942815083738903, "learning_rate": 1.0303734195760139e-08, "loss": 0.1799, "step": 53917 }, { "epoch": 0.9372316570773002, "grad_norm": 1.180457943342295, "learning_rate": 1.0298049762602789e-08, "loss": 0.1818, "step": 53918 }, { "epoch": 0.937249039614803, "grad_norm": 1.9617033421141818, "learning_rate": 1.0292366881575642e-08, "loss": 0.2082, "step": 53919 }, { "epoch": 0.9372664221523058, "grad_norm": 1.2770491524481198, "learning_rate": 1.0286685552696739e-08, "loss": 0.1676, "step": 53920 }, { "epoch": 0.9372838046898087, "grad_norm": 1.4368795701993162, "learning_rate": 1.0281005775984065e-08, "loss": 0.1449, "step": 53921 }, { "epoch": 0.9373011872273115, "grad_norm": 1.414335240960152, "learning_rate": 1.0275327551455714e-08, "loss": 0.1534, "step": 53922 }, { "epoch": 0.9373185697648143, "grad_norm": 1.3175398960995033, "learning_rate": 1.0269650879129566e-08, "loss": 0.1606, "step": 53923 }, { "epoch": 0.9373359523023171, "grad_norm": 1.1076154354217431, "learning_rate": 1.0263975759023602e-08, "loss": 0.2334, "step": 53924 }, { "epoch": 0.9373533348398199, "grad_norm": 1.3552845153206627, "learning_rate": 1.0258302191155977e-08, "loss": 0.1087, "step": 53925 }, { "epoch": 0.9373707173773227, "grad_norm": 1.7579607606551788, "learning_rate": 1.0252630175544508e-08, "loss": 0.1349, "step": 53926 }, { "epoch": 0.9373880999148255, "grad_norm": 0.7652774954199101, "learning_rate": 1.0246959712207236e-08, "loss": 0.1813, "step": 53927 }, { "epoch": 0.9374054824523284, "grad_norm": 1.4930796305904113, "learning_rate": 1.0241290801162095e-08, "loss": 0.2807, "step": 53928 }, { "epoch": 0.9374228649898312, "grad_norm": 1.4033359999092154, "learning_rate": 1.0235623442427122e-08, "loss": 0.2558, "step": 53929 }, { "epoch": 0.937440247527334, "grad_norm": 0.9898923682192459, "learning_rate": 1.0229957636020247e-08, "loss": 0.197, "step": 53930 }, { "epoch": 0.9374576300648368, "grad_norm": 1.5615175083680224, "learning_rate": 1.0224293381959459e-08, "loss": 0.1284, "step": 53931 }, { "epoch": 0.9374750126023397, "grad_norm": 1.1341331931380139, "learning_rate": 1.021863068026263e-08, "loss": 0.1689, "step": 53932 }, { "epoch": 0.9374923951398425, "grad_norm": 0.9180810639757951, "learning_rate": 1.0212969530947745e-08, "loss": 0.1673, "step": 53933 }, { "epoch": 0.9375097776773453, "grad_norm": 1.8195513749107044, "learning_rate": 1.0207309934032792e-08, "loss": 0.2675, "step": 53934 }, { "epoch": 0.9375271602148482, "grad_norm": 1.3327463577175571, "learning_rate": 1.0201651889535645e-08, "loss": 0.1592, "step": 53935 }, { "epoch": 0.937544542752351, "grad_norm": 1.0762739529895269, "learning_rate": 1.0195995397474233e-08, "loss": 0.1812, "step": 53936 }, { "epoch": 0.9375619252898538, "grad_norm": 2.302191718563811, "learning_rate": 1.0190340457866598e-08, "loss": 0.205, "step": 53937 }, { "epoch": 0.9375793078273567, "grad_norm": 1.600647580178881, "learning_rate": 1.0184687070730614e-08, "loss": 0.119, "step": 53938 }, { "epoch": 0.9375966903648595, "grad_norm": 2.2543911964414325, "learning_rate": 1.0179035236084043e-08, "loss": 0.2305, "step": 53939 }, { "epoch": 0.9376140729023623, "grad_norm": 2.1244503127409717, "learning_rate": 1.0173384953944985e-08, "loss": 0.2322, "step": 53940 }, { "epoch": 0.9376314554398651, "grad_norm": 1.4996489458058537, "learning_rate": 1.0167736224331313e-08, "loss": 0.1621, "step": 53941 }, { "epoch": 0.937648837977368, "grad_norm": 1.6671805303362763, "learning_rate": 1.0162089047260847e-08, "loss": 0.1935, "step": 53942 }, { "epoch": 0.9376662205148708, "grad_norm": 1.734853878101032, "learning_rate": 1.015644342275157e-08, "loss": 0.14, "step": 53943 }, { "epoch": 0.9376836030523736, "grad_norm": 2.3620193054444734, "learning_rate": 1.0150799350821416e-08, "loss": 0.1708, "step": 53944 }, { "epoch": 0.9377009855898764, "grad_norm": 1.7340786654920513, "learning_rate": 1.0145156831488145e-08, "loss": 0.218, "step": 53945 }, { "epoch": 0.9377183681273792, "grad_norm": 1.7013395857980296, "learning_rate": 1.0139515864769744e-08, "loss": 0.26, "step": 53946 }, { "epoch": 0.937735750664882, "grad_norm": 1.1206678156990522, "learning_rate": 1.0133876450683975e-08, "loss": 0.1427, "step": 53947 }, { "epoch": 0.9377531332023848, "grad_norm": 1.3038163195933976, "learning_rate": 1.0128238589248827e-08, "loss": 0.154, "step": 53948 }, { "epoch": 0.9377705157398877, "grad_norm": 1.8506126153770415, "learning_rate": 1.0122602280482174e-08, "loss": 0.1981, "step": 53949 }, { "epoch": 0.9377878982773905, "grad_norm": 1.2790062425601665, "learning_rate": 1.0116967524401831e-08, "loss": 0.2782, "step": 53950 }, { "epoch": 0.9378052808148933, "grad_norm": 1.4682188280256925, "learning_rate": 1.0111334321025622e-08, "loss": 0.1845, "step": 53951 }, { "epoch": 0.9378226633523962, "grad_norm": 2.2146436233399194, "learning_rate": 1.0105702670371475e-08, "loss": 0.1388, "step": 53952 }, { "epoch": 0.937840045889899, "grad_norm": 2.2610417266879743, "learning_rate": 1.0100072572457264e-08, "loss": 0.1541, "step": 53953 }, { "epoch": 0.9378574284274018, "grad_norm": 1.7804719768026693, "learning_rate": 1.0094444027300698e-08, "loss": 0.2441, "step": 53954 }, { "epoch": 0.9378748109649047, "grad_norm": 1.631633210257285, "learning_rate": 1.0088817034919706e-08, "loss": 0.1447, "step": 53955 }, { "epoch": 0.9378921935024075, "grad_norm": 1.4446813131135472, "learning_rate": 1.0083191595332163e-08, "loss": 0.1785, "step": 53956 }, { "epoch": 0.9379095760399103, "grad_norm": 1.6473392628638854, "learning_rate": 1.007756770855589e-08, "loss": 0.1668, "step": 53957 }, { "epoch": 0.9379269585774132, "grad_norm": 1.510071699051566, "learning_rate": 1.0071945374608593e-08, "loss": 0.1779, "step": 53958 }, { "epoch": 0.937944341114916, "grad_norm": 2.0020564915995926, "learning_rate": 1.0066324593508202e-08, "loss": 0.1777, "step": 53959 }, { "epoch": 0.9379617236524188, "grad_norm": 1.2594095547553388, "learning_rate": 1.0060705365272482e-08, "loss": 0.1686, "step": 53960 }, { "epoch": 0.9379791061899216, "grad_norm": 1.4055410358032496, "learning_rate": 1.0055087689919307e-08, "loss": 0.1385, "step": 53961 }, { "epoch": 0.9379964887274245, "grad_norm": 1.6680084522413974, "learning_rate": 1.0049471567466438e-08, "loss": 0.1296, "step": 53962 }, { "epoch": 0.9380138712649273, "grad_norm": 1.2157140552641228, "learning_rate": 1.0043856997931643e-08, "loss": 0.1225, "step": 53963 }, { "epoch": 0.9380312538024301, "grad_norm": 1.5633740515590517, "learning_rate": 1.0038243981332795e-08, "loss": 0.2343, "step": 53964 }, { "epoch": 0.9380486363399329, "grad_norm": 1.5518291155530712, "learning_rate": 1.0032632517687656e-08, "loss": 0.2307, "step": 53965 }, { "epoch": 0.9380660188774357, "grad_norm": 0.9324380537502018, "learning_rate": 1.002702260701388e-08, "loss": 0.1596, "step": 53966 }, { "epoch": 0.9380834014149385, "grad_norm": 1.444730571894242, "learning_rate": 1.0021414249329507e-08, "loss": 0.2026, "step": 53967 }, { "epoch": 0.9381007839524413, "grad_norm": 1.9360608915552056, "learning_rate": 1.001580744465208e-08, "loss": 0.1509, "step": 53968 }, { "epoch": 0.9381181664899442, "grad_norm": 1.1840614392468738, "learning_rate": 1.0010202192999584e-08, "loss": 0.1924, "step": 53969 }, { "epoch": 0.938135549027447, "grad_norm": 2.585067481039624, "learning_rate": 1.000459849438956e-08, "loss": 0.2387, "step": 53970 }, { "epoch": 0.9381529315649498, "grad_norm": 1.0589283384853125, "learning_rate": 9.998996348839883e-09, "loss": 0.2016, "step": 53971 }, { "epoch": 0.9381703141024527, "grad_norm": 1.2016292213799082, "learning_rate": 9.993395756368316e-09, "loss": 0.1558, "step": 53972 }, { "epoch": 0.9381876966399555, "grad_norm": 0.8322657929716519, "learning_rate": 9.987796716992625e-09, "loss": 0.1617, "step": 53973 }, { "epoch": 0.9382050791774583, "grad_norm": 2.0777311373084206, "learning_rate": 9.982199230730404e-09, "loss": 0.2073, "step": 53974 }, { "epoch": 0.9382224617149612, "grad_norm": 1.2564223937986059, "learning_rate": 9.976603297599695e-09, "loss": 0.1801, "step": 53975 }, { "epoch": 0.938239844252464, "grad_norm": 1.1025655642703929, "learning_rate": 9.97100891761793e-09, "loss": 0.1402, "step": 53976 }, { "epoch": 0.9382572267899668, "grad_norm": 2.317902108928887, "learning_rate": 9.965416090803036e-09, "loss": 0.2659, "step": 53977 }, { "epoch": 0.9382746093274696, "grad_norm": 1.145387725965896, "learning_rate": 9.959824817172614e-09, "loss": 0.2447, "step": 53978 }, { "epoch": 0.9382919918649725, "grad_norm": 1.6986217079467503, "learning_rate": 9.954235096744424e-09, "loss": 0.1557, "step": 53979 }, { "epoch": 0.9383093744024753, "grad_norm": 3.6753578654927446, "learning_rate": 9.948646929536231e-09, "loss": 0.2931, "step": 53980 }, { "epoch": 0.9383267569399781, "grad_norm": 3.118512309360751, "learning_rate": 9.943060315565744e-09, "loss": 0.2946, "step": 53981 }, { "epoch": 0.938344139477481, "grad_norm": 2.324362659341503, "learning_rate": 9.937475254850614e-09, "loss": 0.2654, "step": 53982 }, { "epoch": 0.9383615220149838, "grad_norm": 1.1219806390783686, "learning_rate": 9.931891747408606e-09, "loss": 0.1842, "step": 53983 }, { "epoch": 0.9383789045524866, "grad_norm": 1.1736952224836643, "learning_rate": 9.92630979325737e-09, "loss": 0.1844, "step": 53984 }, { "epoch": 0.9383962870899893, "grad_norm": 1.857325332600523, "learning_rate": 9.920729392414562e-09, "loss": 0.218, "step": 53985 }, { "epoch": 0.9384136696274922, "grad_norm": 1.3165771337824896, "learning_rate": 9.915150544897944e-09, "loss": 0.1151, "step": 53986 }, { "epoch": 0.938431052164995, "grad_norm": 1.4400846798548805, "learning_rate": 9.909573250725167e-09, "loss": 0.201, "step": 53987 }, { "epoch": 0.9384484347024978, "grad_norm": 1.1212045445103567, "learning_rate": 9.903997509913998e-09, "loss": 0.1713, "step": 53988 }, { "epoch": 0.9384658172400007, "grad_norm": 1.5282383714152001, "learning_rate": 9.898423322481918e-09, "loss": 0.1926, "step": 53989 }, { "epoch": 0.9384831997775035, "grad_norm": 2.3466120821402225, "learning_rate": 9.892850688446697e-09, "loss": 0.1928, "step": 53990 }, { "epoch": 0.9385005823150063, "grad_norm": 2.805203202619137, "learning_rate": 9.887279607826094e-09, "loss": 0.1506, "step": 53991 }, { "epoch": 0.9385179648525092, "grad_norm": 1.3266152344760815, "learning_rate": 9.881710080637595e-09, "loss": 0.1457, "step": 53992 }, { "epoch": 0.938535347390012, "grad_norm": 0.7767497665250914, "learning_rate": 9.876142106898965e-09, "loss": 0.2894, "step": 53993 }, { "epoch": 0.9385527299275148, "grad_norm": 1.0775558684244562, "learning_rate": 9.870575686627801e-09, "loss": 0.2144, "step": 53994 }, { "epoch": 0.9385701124650176, "grad_norm": 1.1786491204289604, "learning_rate": 9.865010819841812e-09, "loss": 0.1083, "step": 53995 }, { "epoch": 0.9385874950025205, "grad_norm": 1.1758953732182553, "learning_rate": 9.859447506558538e-09, "loss": 0.2414, "step": 53996 }, { "epoch": 0.9386048775400233, "grad_norm": 1.3307987506624572, "learning_rate": 9.853885746795687e-09, "loss": 0.1235, "step": 53997 }, { "epoch": 0.9386222600775261, "grad_norm": 1.582905485227108, "learning_rate": 9.848325540570856e-09, "loss": 0.1536, "step": 53998 }, { "epoch": 0.938639642615029, "grad_norm": 1.102653374489968, "learning_rate": 9.842766887901699e-09, "loss": 0.1268, "step": 53999 }, { "epoch": 0.9386570251525318, "grad_norm": 0.8354999784817365, "learning_rate": 9.837209788805756e-09, "loss": 0.1491, "step": 54000 }, { "epoch": 0.9386744076900346, "grad_norm": 2.3902557324212235, "learning_rate": 9.831654243300735e-09, "loss": 0.2164, "step": 54001 }, { "epoch": 0.9386917902275375, "grad_norm": 1.176765216177221, "learning_rate": 9.826100251404235e-09, "loss": 0.214, "step": 54002 }, { "epoch": 0.9387091727650403, "grad_norm": 1.3034569313988877, "learning_rate": 9.820547813133796e-09, "loss": 0.1866, "step": 54003 }, { "epoch": 0.9387265553025431, "grad_norm": 1.2316830245290757, "learning_rate": 9.814996928507069e-09, "loss": 0.2048, "step": 54004 }, { "epoch": 0.9387439378400458, "grad_norm": 2.8214781014182013, "learning_rate": 9.809447597541542e-09, "loss": 0.1405, "step": 54005 }, { "epoch": 0.9387613203775487, "grad_norm": 2.5969510273902485, "learning_rate": 9.803899820254979e-09, "loss": 0.2286, "step": 54006 }, { "epoch": 0.9387787029150515, "grad_norm": 1.2090771230256439, "learning_rate": 9.798353596664865e-09, "loss": 0.1629, "step": 54007 }, { "epoch": 0.9387960854525543, "grad_norm": 2.3261655858984978, "learning_rate": 9.792808926788797e-09, "loss": 0.2398, "step": 54008 }, { "epoch": 0.9388134679900572, "grad_norm": 1.755195106546485, "learning_rate": 9.787265810644318e-09, "loss": 0.217, "step": 54009 }, { "epoch": 0.93883085052756, "grad_norm": 1.2057992860459168, "learning_rate": 9.781724248249024e-09, "loss": 0.1798, "step": 54010 }, { "epoch": 0.9388482330650628, "grad_norm": 1.7122973911894372, "learning_rate": 9.776184239620456e-09, "loss": 0.1795, "step": 54011 }, { "epoch": 0.9388656156025657, "grad_norm": 0.9291812281052799, "learning_rate": 9.770645784776265e-09, "loss": 0.1078, "step": 54012 }, { "epoch": 0.9388829981400685, "grad_norm": 1.2509270109052366, "learning_rate": 9.765108883733885e-09, "loss": 0.1807, "step": 54013 }, { "epoch": 0.9389003806775713, "grad_norm": 1.396471235510075, "learning_rate": 9.759573536510913e-09, "loss": 0.1375, "step": 54014 }, { "epoch": 0.9389177632150741, "grad_norm": 1.244808211046305, "learning_rate": 9.754039743124886e-09, "loss": 0.1635, "step": 54015 }, { "epoch": 0.938935145752577, "grad_norm": 1.174436368002283, "learning_rate": 9.74850750359335e-09, "loss": 0.2128, "step": 54016 }, { "epoch": 0.9389525282900798, "grad_norm": 1.4001493960705262, "learning_rate": 9.742976817933846e-09, "loss": 0.1515, "step": 54017 }, { "epoch": 0.9389699108275826, "grad_norm": 1.4216825941904672, "learning_rate": 9.737447686163914e-09, "loss": 0.1793, "step": 54018 }, { "epoch": 0.9389872933650855, "grad_norm": 1.3590784133306202, "learning_rate": 9.731920108301095e-09, "loss": 0.2724, "step": 54019 }, { "epoch": 0.9390046759025883, "grad_norm": 0.9228984186573028, "learning_rate": 9.726394084362821e-09, "loss": 0.1107, "step": 54020 }, { "epoch": 0.9390220584400911, "grad_norm": 1.0328093034182515, "learning_rate": 9.72086961436669e-09, "loss": 0.1228, "step": 54021 }, { "epoch": 0.939039440977594, "grad_norm": 2.132796467344877, "learning_rate": 9.715346698330185e-09, "loss": 0.1747, "step": 54022 }, { "epoch": 0.9390568235150968, "grad_norm": 1.2757234563893267, "learning_rate": 9.709825336270849e-09, "loss": 0.1114, "step": 54023 }, { "epoch": 0.9390742060525996, "grad_norm": 2.3867735100015284, "learning_rate": 9.704305528206058e-09, "loss": 0.1116, "step": 54024 }, { "epoch": 0.9390915885901023, "grad_norm": 2.9995259489773347, "learning_rate": 9.698787274153464e-09, "loss": 0.2857, "step": 54025 }, { "epoch": 0.9391089711276052, "grad_norm": 1.1134893237775056, "learning_rate": 9.693270574130497e-09, "loss": 0.1838, "step": 54026 }, { "epoch": 0.939126353665108, "grad_norm": 1.6632050022297387, "learning_rate": 9.687755428154643e-09, "loss": 0.1777, "step": 54027 }, { "epoch": 0.9391437362026108, "grad_norm": 1.0518241025956243, "learning_rate": 9.682241836243277e-09, "loss": 0.1683, "step": 54028 }, { "epoch": 0.9391611187401137, "grad_norm": 0.790905099372706, "learning_rate": 9.676729798414051e-09, "loss": 0.157, "step": 54029 }, { "epoch": 0.9391785012776165, "grad_norm": 0.8636856835455888, "learning_rate": 9.671219314684342e-09, "loss": 0.1784, "step": 54030 }, { "epoch": 0.9391958838151193, "grad_norm": 1.3322458923843168, "learning_rate": 9.665710385071636e-09, "loss": 0.2347, "step": 54031 }, { "epoch": 0.9392132663526221, "grad_norm": 1.567640392749469, "learning_rate": 9.66020300959336e-09, "loss": 0.1588, "step": 54032 }, { "epoch": 0.939230648890125, "grad_norm": 1.1635975143569468, "learning_rate": 9.654697188267003e-09, "loss": 0.1266, "step": 54033 }, { "epoch": 0.9392480314276278, "grad_norm": 0.9497175544461288, "learning_rate": 9.649192921110049e-09, "loss": 0.1705, "step": 54034 }, { "epoch": 0.9392654139651306, "grad_norm": 0.6449282604150409, "learning_rate": 9.643690208139876e-09, "loss": 0.1872, "step": 54035 }, { "epoch": 0.9392827965026335, "grad_norm": 1.3656525736319485, "learning_rate": 9.638189049373912e-09, "loss": 0.17, "step": 54036 }, { "epoch": 0.9393001790401363, "grad_norm": 1.9468770959902897, "learning_rate": 9.632689444829645e-09, "loss": 0.1434, "step": 54037 }, { "epoch": 0.9393175615776391, "grad_norm": 1.2813234180605766, "learning_rate": 9.627191394524558e-09, "loss": 0.1697, "step": 54038 }, { "epoch": 0.939334944115142, "grad_norm": 1.0549417894474593, "learning_rate": 9.621694898475973e-09, "loss": 0.1138, "step": 54039 }, { "epoch": 0.9393523266526448, "grad_norm": 1.7779763028301825, "learning_rate": 9.616199956701376e-09, "loss": 0.1572, "step": 54040 }, { "epoch": 0.9393697091901476, "grad_norm": 2.337749775483044, "learning_rate": 9.610706569218141e-09, "loss": 0.2283, "step": 54041 }, { "epoch": 0.9393870917276504, "grad_norm": 2.0887522645835213, "learning_rate": 9.605214736043698e-09, "loss": 0.2455, "step": 54042 }, { "epoch": 0.9394044742651533, "grad_norm": 1.0594839395628077, "learning_rate": 9.59972445719548e-09, "loss": 0.2154, "step": 54043 }, { "epoch": 0.939421856802656, "grad_norm": 0.7737302671437005, "learning_rate": 9.594235732690804e-09, "loss": 0.186, "step": 54044 }, { "epoch": 0.9394392393401588, "grad_norm": 1.0428135832179652, "learning_rate": 9.588748562547267e-09, "loss": 0.2229, "step": 54045 }, { "epoch": 0.9394566218776617, "grad_norm": 0.8446109817838396, "learning_rate": 9.583262946782022e-09, "loss": 0.0935, "step": 54046 }, { "epoch": 0.9394740044151645, "grad_norm": 1.3842001415645337, "learning_rate": 9.577778885412502e-09, "loss": 0.1602, "step": 54047 }, { "epoch": 0.9394913869526673, "grad_norm": 1.4137820845814066, "learning_rate": 9.57229637845619e-09, "loss": 0.1804, "step": 54048 }, { "epoch": 0.9395087694901701, "grad_norm": 1.6790935867756065, "learning_rate": 9.566815425930464e-09, "loss": 0.1919, "step": 54049 }, { "epoch": 0.939526152027673, "grad_norm": 1.8808685905668237, "learning_rate": 9.561336027852584e-09, "loss": 0.2267, "step": 54050 }, { "epoch": 0.9395435345651758, "grad_norm": 1.191546674057242, "learning_rate": 9.555858184239985e-09, "loss": 0.184, "step": 54051 }, { "epoch": 0.9395609171026786, "grad_norm": 2.3002826555897053, "learning_rate": 9.550381895110038e-09, "loss": 0.1796, "step": 54052 }, { "epoch": 0.9395782996401815, "grad_norm": 1.2045620353475353, "learning_rate": 9.54490716048012e-09, "loss": 0.1704, "step": 54053 }, { "epoch": 0.9395956821776843, "grad_norm": 1.551195719079754, "learning_rate": 9.539433980367495e-09, "loss": 0.2069, "step": 54054 }, { "epoch": 0.9396130647151871, "grad_norm": 1.572582661375982, "learning_rate": 9.533962354789592e-09, "loss": 0.1611, "step": 54055 }, { "epoch": 0.93963044725269, "grad_norm": 1.3267350498295094, "learning_rate": 9.528492283763734e-09, "loss": 0.107, "step": 54056 }, { "epoch": 0.9396478297901928, "grad_norm": 0.8421154496142341, "learning_rate": 9.523023767307236e-09, "loss": 0.1486, "step": 54057 }, { "epoch": 0.9396652123276956, "grad_norm": 1.24066629626112, "learning_rate": 9.517556805437477e-09, "loss": 0.1644, "step": 54058 }, { "epoch": 0.9396825948651985, "grad_norm": 0.955355744374816, "learning_rate": 9.512091398171662e-09, "loss": 0.1955, "step": 54059 }, { "epoch": 0.9396999774027013, "grad_norm": 1.358453237949648, "learning_rate": 9.506627545527279e-09, "loss": 0.097, "step": 54060 }, { "epoch": 0.9397173599402041, "grad_norm": 1.8497675591088532, "learning_rate": 9.501165247521592e-09, "loss": 0.1361, "step": 54061 }, { "epoch": 0.9397347424777069, "grad_norm": 1.0061783571764784, "learning_rate": 9.495704504171863e-09, "loss": 0.1545, "step": 54062 }, { "epoch": 0.9397521250152098, "grad_norm": 2.50270093065302, "learning_rate": 9.490245315495415e-09, "loss": 0.2319, "step": 54063 }, { "epoch": 0.9397695075527125, "grad_norm": 0.8539714214447395, "learning_rate": 9.48478768150962e-09, "loss": 0.1423, "step": 54064 }, { "epoch": 0.9397868900902153, "grad_norm": 1.028048206051451, "learning_rate": 9.479331602231688e-09, "loss": 0.1646, "step": 54065 }, { "epoch": 0.9398042726277182, "grad_norm": 1.7364740248708794, "learning_rate": 9.473877077678938e-09, "loss": 0.1819, "step": 54066 }, { "epoch": 0.939821655165221, "grad_norm": 3.685495351258569, "learning_rate": 9.468424107868688e-09, "loss": 0.2927, "step": 54067 }, { "epoch": 0.9398390377027238, "grad_norm": 2.840836004610907, "learning_rate": 9.46297269281815e-09, "loss": 0.1897, "step": 54068 }, { "epoch": 0.9398564202402266, "grad_norm": 1.1920991111350148, "learning_rate": 9.45752283254475e-09, "loss": 0.2006, "step": 54069 }, { "epoch": 0.9398738027777295, "grad_norm": 1.2249468529894454, "learning_rate": 9.45207452706559e-09, "loss": 0.2297, "step": 54070 }, { "epoch": 0.9398911853152323, "grad_norm": 1.5880395006832735, "learning_rate": 9.446627776398041e-09, "loss": 0.177, "step": 54071 }, { "epoch": 0.9399085678527351, "grad_norm": 1.174572909378031, "learning_rate": 9.44118258055937e-09, "loss": 0.1701, "step": 54072 }, { "epoch": 0.939925950390238, "grad_norm": 1.0905130911456955, "learning_rate": 9.435738939566784e-09, "loss": 0.1962, "step": 54073 }, { "epoch": 0.9399433329277408, "grad_norm": 1.0550056340324925, "learning_rate": 9.430296853437548e-09, "loss": 0.1783, "step": 54074 }, { "epoch": 0.9399607154652436, "grad_norm": 1.2926311662627321, "learning_rate": 9.424856322188867e-09, "loss": 0.1866, "step": 54075 }, { "epoch": 0.9399780980027465, "grad_norm": 1.0903776516169754, "learning_rate": 9.419417345838122e-09, "loss": 0.108, "step": 54076 }, { "epoch": 0.9399954805402493, "grad_norm": 1.406596264594377, "learning_rate": 9.41397992440246e-09, "loss": 0.1604, "step": 54077 }, { "epoch": 0.9400128630777521, "grad_norm": 1.0053633777298137, "learning_rate": 9.408544057899093e-09, "loss": 0.1485, "step": 54078 }, { "epoch": 0.940030245615255, "grad_norm": 0.9980623999851225, "learning_rate": 9.403109746345284e-09, "loss": 0.1075, "step": 54079 }, { "epoch": 0.9400476281527578, "grad_norm": 1.5865987598727531, "learning_rate": 9.397676989758297e-09, "loss": 0.1434, "step": 54080 }, { "epoch": 0.9400650106902606, "grad_norm": 1.2074760095322201, "learning_rate": 9.392245788155284e-09, "loss": 0.1725, "step": 54081 }, { "epoch": 0.9400823932277634, "grad_norm": 1.2491176112684033, "learning_rate": 9.386816141553455e-09, "loss": 0.1729, "step": 54082 }, { "epoch": 0.9400997757652663, "grad_norm": 1.1082920906642828, "learning_rate": 9.381388049970074e-09, "loss": 0.2356, "step": 54083 }, { "epoch": 0.940117158302769, "grad_norm": 1.8436781319463087, "learning_rate": 9.375961513422349e-09, "loss": 0.2274, "step": 54084 }, { "epoch": 0.9401345408402718, "grad_norm": 0.9467055183454057, "learning_rate": 9.37053653192743e-09, "loss": 0.186, "step": 54085 }, { "epoch": 0.9401519233777746, "grad_norm": 1.3397416514449958, "learning_rate": 9.365113105502476e-09, "loss": 0.1708, "step": 54086 }, { "epoch": 0.9401693059152775, "grad_norm": 1.005992448304343, "learning_rate": 9.359691234164801e-09, "loss": 0.184, "step": 54087 }, { "epoch": 0.9401866884527803, "grad_norm": 1.1679956992620932, "learning_rate": 9.354270917931506e-09, "loss": 0.123, "step": 54088 }, { "epoch": 0.9402040709902831, "grad_norm": 2.092584171377907, "learning_rate": 9.348852156819853e-09, "loss": 0.3081, "step": 54089 }, { "epoch": 0.940221453527786, "grad_norm": 3.6557096332765013, "learning_rate": 9.34343495084683e-09, "loss": 0.1556, "step": 54090 }, { "epoch": 0.9402388360652888, "grad_norm": 2.0851778871720215, "learning_rate": 9.338019300029808e-09, "loss": 0.3097, "step": 54091 }, { "epoch": 0.9402562186027916, "grad_norm": 1.6217749371437156, "learning_rate": 9.33260520438589e-09, "loss": 0.1535, "step": 54092 }, { "epoch": 0.9402736011402945, "grad_norm": 1.1849454949971983, "learning_rate": 9.32719266393217e-09, "loss": 0.1891, "step": 54093 }, { "epoch": 0.9402909836777973, "grad_norm": 1.4687714917837242, "learning_rate": 9.321781678685803e-09, "loss": 0.1941, "step": 54094 }, { "epoch": 0.9403083662153001, "grad_norm": 1.8592791787901102, "learning_rate": 9.316372248664106e-09, "loss": 0.1641, "step": 54095 }, { "epoch": 0.940325748752803, "grad_norm": 1.528451118002897, "learning_rate": 9.310964373884067e-09, "loss": 0.1777, "step": 54096 }, { "epoch": 0.9403431312903058, "grad_norm": 2.406113898930121, "learning_rate": 9.305558054362838e-09, "loss": 0.2295, "step": 54097 }, { "epoch": 0.9403605138278086, "grad_norm": 2.672805881689306, "learning_rate": 9.300153290117574e-09, "loss": 0.3816, "step": 54098 }, { "epoch": 0.9403778963653114, "grad_norm": 3.359244830293993, "learning_rate": 9.294750081165481e-09, "loss": 0.3285, "step": 54099 }, { "epoch": 0.9403952789028143, "grad_norm": 1.3417944394038461, "learning_rate": 9.289348427523546e-09, "loss": 0.0965, "step": 54100 }, { "epoch": 0.9404126614403171, "grad_norm": 1.2277671217134858, "learning_rate": 9.283948329209035e-09, "loss": 0.1496, "step": 54101 }, { "epoch": 0.9404300439778199, "grad_norm": 0.751042978517057, "learning_rate": 9.278549786238931e-09, "loss": 0.119, "step": 54102 }, { "epoch": 0.9404474265153228, "grad_norm": 1.2157887987521634, "learning_rate": 9.273152798630446e-09, "loss": 0.1293, "step": 54103 }, { "epoch": 0.9404648090528255, "grad_norm": 2.848585447256712, "learning_rate": 9.26775736640062e-09, "loss": 0.2724, "step": 54104 }, { "epoch": 0.9404821915903283, "grad_norm": 2.1109041607735817, "learning_rate": 9.262363489566604e-09, "loss": 0.1593, "step": 54105 }, { "epoch": 0.9404995741278311, "grad_norm": 0.9919669609941991, "learning_rate": 9.25697116814539e-09, "loss": 0.1408, "step": 54106 }, { "epoch": 0.940516956665334, "grad_norm": 2.0433079695318717, "learning_rate": 9.251580402154236e-09, "loss": 0.1627, "step": 54107 }, { "epoch": 0.9405343392028368, "grad_norm": 1.6042856582043927, "learning_rate": 9.24619119161013e-09, "loss": 0.1927, "step": 54108 }, { "epoch": 0.9405517217403396, "grad_norm": 0.8834624604553061, "learning_rate": 9.240803536530117e-09, "loss": 0.1134, "step": 54109 }, { "epoch": 0.9405691042778425, "grad_norm": 1.258979323052064, "learning_rate": 9.235417436931348e-09, "loss": 0.1462, "step": 54110 }, { "epoch": 0.9405864868153453, "grad_norm": 1.2717112613990775, "learning_rate": 9.230032892830863e-09, "loss": 0.191, "step": 54111 }, { "epoch": 0.9406038693528481, "grad_norm": 1.1784829553212872, "learning_rate": 9.224649904245763e-09, "loss": 0.0836, "step": 54112 }, { "epoch": 0.940621251890351, "grad_norm": 0.8255087759004436, "learning_rate": 9.21926847119303e-09, "loss": 0.2458, "step": 54113 }, { "epoch": 0.9406386344278538, "grad_norm": 1.3395919217105448, "learning_rate": 9.213888593689823e-09, "loss": 0.1695, "step": 54114 }, { "epoch": 0.9406560169653566, "grad_norm": 2.215794396919787, "learning_rate": 9.208510271753123e-09, "loss": 0.2098, "step": 54115 }, { "epoch": 0.9406733995028594, "grad_norm": 1.278189283798573, "learning_rate": 9.203133505399973e-09, "loss": 0.1471, "step": 54116 }, { "epoch": 0.9406907820403623, "grad_norm": 0.9515057383401788, "learning_rate": 9.197758294647418e-09, "loss": 0.2024, "step": 54117 }, { "epoch": 0.9407081645778651, "grad_norm": 1.6424337448150588, "learning_rate": 9.19238463951255e-09, "loss": 0.2405, "step": 54118 }, { "epoch": 0.9407255471153679, "grad_norm": 1.3979345657666, "learning_rate": 9.187012540012363e-09, "loss": 0.2107, "step": 54119 }, { "epoch": 0.9407429296528708, "grad_norm": 1.147562955180249, "learning_rate": 9.181641996163891e-09, "loss": 0.2075, "step": 54120 }, { "epoch": 0.9407603121903736, "grad_norm": 1.0801635082872216, "learning_rate": 9.176273007984126e-09, "loss": 0.2024, "step": 54121 }, { "epoch": 0.9407776947278764, "grad_norm": 1.0260691064142646, "learning_rate": 9.170905575490107e-09, "loss": 0.1749, "step": 54122 }, { "epoch": 0.9407950772653793, "grad_norm": 2.544479274667959, "learning_rate": 9.165539698698876e-09, "loss": 0.1608, "step": 54123 }, { "epoch": 0.940812459802882, "grad_norm": 1.1215005368839808, "learning_rate": 9.160175377627422e-09, "loss": 0.2097, "step": 54124 }, { "epoch": 0.9408298423403848, "grad_norm": 5.684945842902256, "learning_rate": 9.154812612292673e-09, "loss": 0.2562, "step": 54125 }, { "epoch": 0.9408472248778876, "grad_norm": 1.8578323508411732, "learning_rate": 9.149451402711728e-09, "loss": 0.1834, "step": 54126 }, { "epoch": 0.9408646074153905, "grad_norm": 1.1231232151386734, "learning_rate": 9.144091748901573e-09, "loss": 0.222, "step": 54127 }, { "epoch": 0.9408819899528933, "grad_norm": 0.9385277778396847, "learning_rate": 9.138733650879138e-09, "loss": 0.1931, "step": 54128 }, { "epoch": 0.9408993724903961, "grad_norm": 0.9988026919886214, "learning_rate": 9.13337710866141e-09, "loss": 0.1181, "step": 54129 }, { "epoch": 0.940916755027899, "grad_norm": 1.0099437472016461, "learning_rate": 9.128022122265433e-09, "loss": 0.1661, "step": 54130 }, { "epoch": 0.9409341375654018, "grad_norm": 1.0848814815041514, "learning_rate": 9.122668691708135e-09, "loss": 0.153, "step": 54131 }, { "epoch": 0.9409515201029046, "grad_norm": 0.7597480583943366, "learning_rate": 9.117316817006504e-09, "loss": 0.2073, "step": 54132 }, { "epoch": 0.9409689026404074, "grad_norm": 1.4321103964344883, "learning_rate": 9.11196649817747e-09, "loss": 0.1383, "step": 54133 }, { "epoch": 0.9409862851779103, "grad_norm": 1.456966939624192, "learning_rate": 9.106617735237964e-09, "loss": 0.1729, "step": 54134 }, { "epoch": 0.9410036677154131, "grad_norm": 1.37516832473295, "learning_rate": 9.10127052820503e-09, "loss": 0.1312, "step": 54135 }, { "epoch": 0.9410210502529159, "grad_norm": 1.5668674438932153, "learning_rate": 9.09592487709554e-09, "loss": 0.1599, "step": 54136 }, { "epoch": 0.9410384327904188, "grad_norm": 1.0268510054824502, "learning_rate": 9.090580781926483e-09, "loss": 0.1664, "step": 54137 }, { "epoch": 0.9410558153279216, "grad_norm": 2.0639467013404356, "learning_rate": 9.085238242714731e-09, "loss": 0.1862, "step": 54138 }, { "epoch": 0.9410731978654244, "grad_norm": 1.029793586711262, "learning_rate": 9.079897259477331e-09, "loss": 0.2111, "step": 54139 }, { "epoch": 0.9410905804029273, "grad_norm": 0.854897585666378, "learning_rate": 9.074557832231155e-09, "loss": 0.1576, "step": 54140 }, { "epoch": 0.9411079629404301, "grad_norm": 1.2589613162098527, "learning_rate": 9.06921996099308e-09, "loss": 0.1812, "step": 54141 }, { "epoch": 0.9411253454779329, "grad_norm": 1.1934009995801385, "learning_rate": 9.063883645780091e-09, "loss": 0.1588, "step": 54142 }, { "epoch": 0.9411427280154357, "grad_norm": 1.4132046386319639, "learning_rate": 9.058548886609008e-09, "loss": 0.1828, "step": 54143 }, { "epoch": 0.9411601105529385, "grad_norm": 1.418954674485692, "learning_rate": 9.053215683496818e-09, "loss": 0.1579, "step": 54144 }, { "epoch": 0.9411774930904413, "grad_norm": 1.6817725916834692, "learning_rate": 9.047884036460451e-09, "loss": 0.2641, "step": 54145 }, { "epoch": 0.9411948756279441, "grad_norm": 1.9838582996934162, "learning_rate": 9.04255394551684e-09, "loss": 0.2074, "step": 54146 }, { "epoch": 0.941212258165447, "grad_norm": 1.7870592543964592, "learning_rate": 9.037225410682691e-09, "loss": 0.337, "step": 54147 }, { "epoch": 0.9412296407029498, "grad_norm": 1.5283706577130334, "learning_rate": 9.031898431974994e-09, "loss": 0.1652, "step": 54148 }, { "epoch": 0.9412470232404526, "grad_norm": 1.1365980306582344, "learning_rate": 9.026573009410731e-09, "loss": 0.2054, "step": 54149 }, { "epoch": 0.9412644057779554, "grad_norm": 1.0812220220432671, "learning_rate": 9.02124914300667e-09, "loss": 0.3066, "step": 54150 }, { "epoch": 0.9412817883154583, "grad_norm": 1.5493702502535949, "learning_rate": 9.015926832779686e-09, "loss": 0.265, "step": 54151 }, { "epoch": 0.9412991708529611, "grad_norm": 1.8500809837059173, "learning_rate": 9.010606078746708e-09, "loss": 0.1696, "step": 54152 }, { "epoch": 0.9413165533904639, "grad_norm": 1.4596884070703147, "learning_rate": 9.005286880924556e-09, "loss": 0.158, "step": 54153 }, { "epoch": 0.9413339359279668, "grad_norm": 1.0391833943279325, "learning_rate": 8.999969239330052e-09, "loss": 0.2029, "step": 54154 }, { "epoch": 0.9413513184654696, "grad_norm": 1.262673763248186, "learning_rate": 8.994653153980181e-09, "loss": 0.1606, "step": 54155 }, { "epoch": 0.9413687010029724, "grad_norm": 1.005466260662309, "learning_rate": 8.989338624891596e-09, "loss": 0.0918, "step": 54156 }, { "epoch": 0.9413860835404753, "grad_norm": 1.1761933900777621, "learning_rate": 8.984025652081339e-09, "loss": 0.1714, "step": 54157 }, { "epoch": 0.9414034660779781, "grad_norm": 2.693815326555685, "learning_rate": 8.978714235566176e-09, "loss": 0.2237, "step": 54158 }, { "epoch": 0.9414208486154809, "grad_norm": 0.949807312244704, "learning_rate": 8.973404375362925e-09, "loss": 0.1647, "step": 54159 }, { "epoch": 0.9414382311529838, "grad_norm": 1.2230171900049642, "learning_rate": 8.968096071488407e-09, "loss": 0.215, "step": 54160 }, { "epoch": 0.9414556136904866, "grad_norm": 2.544843741990866, "learning_rate": 8.962789323959442e-09, "loss": 0.1806, "step": 54161 }, { "epoch": 0.9414729962279894, "grad_norm": 1.5489607630012439, "learning_rate": 8.957484132792903e-09, "loss": 0.0796, "step": 54162 }, { "epoch": 0.9414903787654922, "grad_norm": 1.0667862689189092, "learning_rate": 8.952180498005558e-09, "loss": 0.1792, "step": 54163 }, { "epoch": 0.941507761302995, "grad_norm": 1.2674955016632425, "learning_rate": 8.946878419614224e-09, "loss": 0.1125, "step": 54164 }, { "epoch": 0.9415251438404978, "grad_norm": 1.4702088543300145, "learning_rate": 8.941577897635777e-09, "loss": 0.1658, "step": 54165 }, { "epoch": 0.9415425263780006, "grad_norm": 1.140850040556804, "learning_rate": 8.936278932086928e-09, "loss": 0.1264, "step": 54166 }, { "epoch": 0.9415599089155035, "grad_norm": 1.49773357628149, "learning_rate": 8.930981522984437e-09, "loss": 0.1986, "step": 54167 }, { "epoch": 0.9415772914530063, "grad_norm": 1.9575391292076054, "learning_rate": 8.925685670345184e-09, "loss": 0.1338, "step": 54168 }, { "epoch": 0.9415946739905091, "grad_norm": 1.1292675967301558, "learning_rate": 8.920391374185932e-09, "loss": 0.2617, "step": 54169 }, { "epoch": 0.9416120565280119, "grad_norm": 1.5132234740540544, "learning_rate": 8.915098634523444e-09, "loss": 0.221, "step": 54170 }, { "epoch": 0.9416294390655148, "grad_norm": 1.4328920465943673, "learning_rate": 8.90980745137454e-09, "loss": 0.1547, "step": 54171 }, { "epoch": 0.9416468216030176, "grad_norm": 1.102321051267075, "learning_rate": 8.90451782475593e-09, "loss": 0.1443, "step": 54172 }, { "epoch": 0.9416642041405204, "grad_norm": 0.8564832086180424, "learning_rate": 8.899229754684433e-09, "loss": 0.1835, "step": 54173 }, { "epoch": 0.9416815866780233, "grad_norm": 1.2483837724468871, "learning_rate": 8.893943241176816e-09, "loss": 0.2504, "step": 54174 }, { "epoch": 0.9416989692155261, "grad_norm": 1.07939952577609, "learning_rate": 8.888658284249673e-09, "loss": 0.1465, "step": 54175 }, { "epoch": 0.9417163517530289, "grad_norm": 1.2490527670503897, "learning_rate": 8.883374883919992e-09, "loss": 0.186, "step": 54176 }, { "epoch": 0.9417337342905318, "grad_norm": 2.44966150175179, "learning_rate": 8.878093040204371e-09, "loss": 0.2648, "step": 54177 }, { "epoch": 0.9417511168280346, "grad_norm": 1.9155557109542585, "learning_rate": 8.872812753119685e-09, "loss": 0.1629, "step": 54178 }, { "epoch": 0.9417684993655374, "grad_norm": 1.365291955458215, "learning_rate": 8.867534022682478e-09, "loss": 0.219, "step": 54179 }, { "epoch": 0.9417858819030402, "grad_norm": 1.0284463054722925, "learning_rate": 8.862256848909622e-09, "loss": 0.2207, "step": 54180 }, { "epoch": 0.9418032644405431, "grad_norm": 1.3467714339458656, "learning_rate": 8.856981231817772e-09, "loss": 0.1559, "step": 54181 }, { "epoch": 0.9418206469780459, "grad_norm": 2.0451340537847793, "learning_rate": 8.851707171423694e-09, "loss": 0.2514, "step": 54182 }, { "epoch": 0.9418380295155486, "grad_norm": 2.695233194459219, "learning_rate": 8.84643466774404e-09, "loss": 0.2418, "step": 54183 }, { "epoch": 0.9418554120530515, "grad_norm": 0.9308903811557903, "learning_rate": 8.841163720795685e-09, "loss": 0.3895, "step": 54184 }, { "epoch": 0.9418727945905543, "grad_norm": 1.6204221685474844, "learning_rate": 8.835894330595118e-09, "loss": 0.461, "step": 54185 }, { "epoch": 0.9418901771280571, "grad_norm": 1.744838600219306, "learning_rate": 8.83062649715921e-09, "loss": 0.1865, "step": 54186 }, { "epoch": 0.94190755966556, "grad_norm": 1.5883213722534144, "learning_rate": 8.825360220504507e-09, "loss": 0.1555, "step": 54187 }, { "epoch": 0.9419249422030628, "grad_norm": 1.3039284735164844, "learning_rate": 8.820095500647884e-09, "loss": 0.1114, "step": 54188 }, { "epoch": 0.9419423247405656, "grad_norm": 3.5355318791030927, "learning_rate": 8.814832337605882e-09, "loss": 0.2835, "step": 54189 }, { "epoch": 0.9419597072780684, "grad_norm": 0.6283919497260287, "learning_rate": 8.809570731395211e-09, "loss": 0.2208, "step": 54190 }, { "epoch": 0.9419770898155713, "grad_norm": 0.8105409782853625, "learning_rate": 8.804310682032579e-09, "loss": 0.1844, "step": 54191 }, { "epoch": 0.9419944723530741, "grad_norm": 2.248542261117228, "learning_rate": 8.799052189534695e-09, "loss": 0.1754, "step": 54192 }, { "epoch": 0.9420118548905769, "grad_norm": 3.419940488846646, "learning_rate": 8.7937952539181e-09, "loss": 0.1734, "step": 54193 }, { "epoch": 0.9420292374280798, "grad_norm": 1.2597338196882422, "learning_rate": 8.788539875199563e-09, "loss": 0.1054, "step": 54194 }, { "epoch": 0.9420466199655826, "grad_norm": 1.3352659793124437, "learning_rate": 8.783286053395678e-09, "loss": 0.1412, "step": 54195 }, { "epoch": 0.9420640025030854, "grad_norm": 1.988290839973111, "learning_rate": 8.77803378852321e-09, "loss": 0.1752, "step": 54196 }, { "epoch": 0.9420813850405882, "grad_norm": 1.0767755696041652, "learning_rate": 8.772783080598701e-09, "loss": 0.13, "step": 54197 }, { "epoch": 0.9420987675780911, "grad_norm": 1.59754729286397, "learning_rate": 8.767533929638748e-09, "loss": 0.173, "step": 54198 }, { "epoch": 0.9421161501155939, "grad_norm": 1.2225271024347282, "learning_rate": 8.762286335660118e-09, "loss": 0.1826, "step": 54199 }, { "epoch": 0.9421335326530967, "grad_norm": 1.4167180462320192, "learning_rate": 8.757040298679408e-09, "loss": 0.2349, "step": 54200 }, { "epoch": 0.9421509151905996, "grad_norm": 1.5092578195915585, "learning_rate": 8.751795818713159e-09, "loss": 0.2618, "step": 54201 }, { "epoch": 0.9421682977281024, "grad_norm": 0.985691523700428, "learning_rate": 8.74655289577808e-09, "loss": 0.2081, "step": 54202 }, { "epoch": 0.9421856802656051, "grad_norm": 3.255229978175543, "learning_rate": 8.741311529890827e-09, "loss": 0.1804, "step": 54203 }, { "epoch": 0.942203062803108, "grad_norm": 1.405168371104737, "learning_rate": 8.736071721067884e-09, "loss": 0.1514, "step": 54204 }, { "epoch": 0.9422204453406108, "grad_norm": 0.842786003069311, "learning_rate": 8.73083346932596e-09, "loss": 0.1124, "step": 54205 }, { "epoch": 0.9422378278781136, "grad_norm": 1.2967465620023382, "learning_rate": 8.725596774681544e-09, "loss": 0.1806, "step": 54206 }, { "epoch": 0.9422552104156164, "grad_norm": 2.999546729302421, "learning_rate": 8.720361637151397e-09, "loss": 0.2028, "step": 54207 }, { "epoch": 0.9422725929531193, "grad_norm": 1.428129466716812, "learning_rate": 8.715128056752008e-09, "loss": 0.183, "step": 54208 }, { "epoch": 0.9422899754906221, "grad_norm": 1.6649823807290975, "learning_rate": 8.709896033499974e-09, "loss": 0.339, "step": 54209 }, { "epoch": 0.9423073580281249, "grad_norm": 1.4509165859234037, "learning_rate": 8.704665567411895e-09, "loss": 0.1497, "step": 54210 }, { "epoch": 0.9423247405656278, "grad_norm": 0.9554045356242475, "learning_rate": 8.699436658504312e-09, "loss": 0.2203, "step": 54211 }, { "epoch": 0.9423421231031306, "grad_norm": 1.2108766165029208, "learning_rate": 8.694209306793876e-09, "loss": 0.1677, "step": 54212 }, { "epoch": 0.9423595056406334, "grad_norm": 1.312392527415222, "learning_rate": 8.688983512297076e-09, "loss": 0.1316, "step": 54213 }, { "epoch": 0.9423768881781363, "grad_norm": 1.4246099668586785, "learning_rate": 8.68375927503051e-09, "loss": 0.1682, "step": 54214 }, { "epoch": 0.9423942707156391, "grad_norm": 1.1688385527832745, "learning_rate": 8.678536595010776e-09, "loss": 0.1323, "step": 54215 }, { "epoch": 0.9424116532531419, "grad_norm": 1.2886186042639773, "learning_rate": 8.673315472254361e-09, "loss": 0.1886, "step": 54216 }, { "epoch": 0.9424290357906447, "grad_norm": 1.4432484802866785, "learning_rate": 8.668095906777806e-09, "loss": 0.1728, "step": 54217 }, { "epoch": 0.9424464183281476, "grad_norm": 1.3436190853714158, "learning_rate": 8.662877898597765e-09, "loss": 0.1843, "step": 54218 }, { "epoch": 0.9424638008656504, "grad_norm": 0.8556826948306935, "learning_rate": 8.657661447730613e-09, "loss": 0.1578, "step": 54219 }, { "epoch": 0.9424811834031532, "grad_norm": 1.498423426727387, "learning_rate": 8.65244655419306e-09, "loss": 0.2475, "step": 54220 }, { "epoch": 0.9424985659406561, "grad_norm": 1.4136529649610676, "learning_rate": 8.647233218001481e-09, "loss": 0.1561, "step": 54221 }, { "epoch": 0.9425159484781589, "grad_norm": 0.9532071803051639, "learning_rate": 8.642021439172475e-09, "loss": 0.1033, "step": 54222 }, { "epoch": 0.9425333310156616, "grad_norm": 1.8181869511871829, "learning_rate": 8.636811217722584e-09, "loss": 0.3457, "step": 54223 }, { "epoch": 0.9425507135531644, "grad_norm": 1.1215308007134122, "learning_rate": 8.631602553668237e-09, "loss": 0.2062, "step": 54224 }, { "epoch": 0.9425680960906673, "grad_norm": 1.6216284980952342, "learning_rate": 8.62639544702598e-09, "loss": 0.1561, "step": 54225 }, { "epoch": 0.9425854786281701, "grad_norm": 2.2776732868597866, "learning_rate": 8.621189897812353e-09, "loss": 0.2182, "step": 54226 }, { "epoch": 0.9426028611656729, "grad_norm": 0.8828915390263572, "learning_rate": 8.615985906043843e-09, "loss": 0.1769, "step": 54227 }, { "epoch": 0.9426202437031758, "grad_norm": 2.219461571977188, "learning_rate": 8.610783471736993e-09, "loss": 0.137, "step": 54228 }, { "epoch": 0.9426376262406786, "grad_norm": 1.5210960197976886, "learning_rate": 8.605582594908178e-09, "loss": 0.1212, "step": 54229 }, { "epoch": 0.9426550087781814, "grad_norm": 1.3242372996812055, "learning_rate": 8.600383275573941e-09, "loss": 0.1615, "step": 54230 }, { "epoch": 0.9426723913156843, "grad_norm": 1.8915972389787297, "learning_rate": 8.595185513750713e-09, "loss": 0.2266, "step": 54231 }, { "epoch": 0.9426897738531871, "grad_norm": 7.753089780658896, "learning_rate": 8.589989309455092e-09, "loss": 0.2267, "step": 54232 }, { "epoch": 0.9427071563906899, "grad_norm": 0.8981055883530045, "learning_rate": 8.584794662703398e-09, "loss": 0.1197, "step": 54233 }, { "epoch": 0.9427245389281927, "grad_norm": 1.3787317939047843, "learning_rate": 8.57960157351223e-09, "loss": 0.129, "step": 54234 }, { "epoch": 0.9427419214656956, "grad_norm": 1.204661920430045, "learning_rate": 8.574410041897962e-09, "loss": 0.2171, "step": 54235 }, { "epoch": 0.9427593040031984, "grad_norm": 1.1079854745055915, "learning_rate": 8.56922006787708e-09, "loss": 0.2104, "step": 54236 }, { "epoch": 0.9427766865407012, "grad_norm": 2.0896530956910144, "learning_rate": 8.564031651465963e-09, "loss": 0.1691, "step": 54237 }, { "epoch": 0.9427940690782041, "grad_norm": 1.1839889401491959, "learning_rate": 8.558844792681153e-09, "loss": 0.2996, "step": 54238 }, { "epoch": 0.9428114516157069, "grad_norm": 0.9102258190915647, "learning_rate": 8.553659491539078e-09, "loss": 0.1462, "step": 54239 }, { "epoch": 0.9428288341532097, "grad_norm": 2.7588357577812626, "learning_rate": 8.548475748056172e-09, "loss": 0.3999, "step": 54240 }, { "epoch": 0.9428462166907126, "grad_norm": 1.2200518037572101, "learning_rate": 8.54329356224881e-09, "loss": 0.1254, "step": 54241 }, { "epoch": 0.9428635992282154, "grad_norm": 1.3752317589148404, "learning_rate": 8.538112934133479e-09, "loss": 0.1825, "step": 54242 }, { "epoch": 0.9428809817657181, "grad_norm": 1.199275100012656, "learning_rate": 8.532933863726555e-09, "loss": 0.4041, "step": 54243 }, { "epoch": 0.9428983643032209, "grad_norm": 1.4825904143022466, "learning_rate": 8.527756351044413e-09, "loss": 0.1572, "step": 54244 }, { "epoch": 0.9429157468407238, "grad_norm": 4.182850775013342, "learning_rate": 8.52258039610354e-09, "loss": 0.2843, "step": 54245 }, { "epoch": 0.9429331293782266, "grad_norm": 0.8898662996972181, "learning_rate": 8.517405998920369e-09, "loss": 0.1258, "step": 54246 }, { "epoch": 0.9429505119157294, "grad_norm": 1.0412779315615557, "learning_rate": 8.512233159511273e-09, "loss": 0.178, "step": 54247 }, { "epoch": 0.9429678944532323, "grad_norm": 1.5727957653470124, "learning_rate": 8.507061877892574e-09, "loss": 0.1668, "step": 54248 }, { "epoch": 0.9429852769907351, "grad_norm": 1.3352106859471717, "learning_rate": 8.501892154080704e-09, "loss": 0.2732, "step": 54249 }, { "epoch": 0.9430026595282379, "grad_norm": 0.9880641157984171, "learning_rate": 8.496723988092091e-09, "loss": 0.2884, "step": 54250 }, { "epoch": 0.9430200420657407, "grad_norm": 2.2488397830382962, "learning_rate": 8.49155737994306e-09, "loss": 0.1843, "step": 54251 }, { "epoch": 0.9430374246032436, "grad_norm": 1.3694773657792174, "learning_rate": 8.486392329650038e-09, "loss": 0.2236, "step": 54252 }, { "epoch": 0.9430548071407464, "grad_norm": 1.2263349430998718, "learning_rate": 8.481228837229404e-09, "loss": 0.1759, "step": 54253 }, { "epoch": 0.9430721896782492, "grad_norm": 4.033752188489877, "learning_rate": 8.47606690269742e-09, "loss": 0.2996, "step": 54254 }, { "epoch": 0.9430895722157521, "grad_norm": 1.1793649020689414, "learning_rate": 8.470906526070576e-09, "loss": 0.1817, "step": 54255 }, { "epoch": 0.9431069547532549, "grad_norm": 1.244871020691808, "learning_rate": 8.465747707365079e-09, "loss": 0.1327, "step": 54256 }, { "epoch": 0.9431243372907577, "grad_norm": 1.8874139425938365, "learning_rate": 8.460590446597415e-09, "loss": 0.3802, "step": 54257 }, { "epoch": 0.9431417198282606, "grad_norm": 1.660568833586491, "learning_rate": 8.455434743783907e-09, "loss": 0.1766, "step": 54258 }, { "epoch": 0.9431591023657634, "grad_norm": 1.1348089369216066, "learning_rate": 8.450280598940873e-09, "loss": 0.1743, "step": 54259 }, { "epoch": 0.9431764849032662, "grad_norm": 1.8873923836699513, "learning_rate": 8.445128012084634e-09, "loss": 0.2296, "step": 54260 }, { "epoch": 0.943193867440769, "grad_norm": 0.8172061749700711, "learning_rate": 8.439976983231511e-09, "loss": 0.1496, "step": 54261 }, { "epoch": 0.9432112499782719, "grad_norm": 1.259513430779193, "learning_rate": 8.434827512397936e-09, "loss": 0.2365, "step": 54262 }, { "epoch": 0.9432286325157746, "grad_norm": 1.1053362889031562, "learning_rate": 8.429679599600115e-09, "loss": 0.101, "step": 54263 }, { "epoch": 0.9432460150532774, "grad_norm": 1.8771972956391927, "learning_rate": 8.424533244854315e-09, "loss": 0.2482, "step": 54264 }, { "epoch": 0.9432633975907803, "grad_norm": 1.0588673355302602, "learning_rate": 8.419388448177023e-09, "loss": 0.2598, "step": 54265 }, { "epoch": 0.9432807801282831, "grad_norm": 2.6022320844115336, "learning_rate": 8.414245209584503e-09, "loss": 0.3043, "step": 54266 }, { "epoch": 0.9432981626657859, "grad_norm": 1.1053483665591135, "learning_rate": 8.40910352909291e-09, "loss": 0.1925, "step": 54267 }, { "epoch": 0.9433155452032888, "grad_norm": 2.4390179016952445, "learning_rate": 8.403963406718672e-09, "loss": 0.2438, "step": 54268 }, { "epoch": 0.9433329277407916, "grad_norm": 1.2310422870041902, "learning_rate": 8.398824842478059e-09, "loss": 0.2177, "step": 54269 }, { "epoch": 0.9433503102782944, "grad_norm": 1.0537573895204884, "learning_rate": 8.393687836387331e-09, "loss": 0.1378, "step": 54270 }, { "epoch": 0.9433676928157972, "grad_norm": 1.047099813723733, "learning_rate": 8.38855238846281e-09, "loss": 0.1139, "step": 54271 }, { "epoch": 0.9433850753533001, "grad_norm": 1.3881166072683007, "learning_rate": 8.383418498720706e-09, "loss": 0.2749, "step": 54272 }, { "epoch": 0.9434024578908029, "grad_norm": 1.1719395769861731, "learning_rate": 8.378286167177395e-09, "loss": 0.1817, "step": 54273 }, { "epoch": 0.9434198404283057, "grad_norm": 1.3897550767654163, "learning_rate": 8.373155393849085e-09, "loss": 0.2359, "step": 54274 }, { "epoch": 0.9434372229658086, "grad_norm": 0.5033464093418201, "learning_rate": 8.368026178751986e-09, "loss": 0.2238, "step": 54275 }, { "epoch": 0.9434546055033114, "grad_norm": 1.0365603629605256, "learning_rate": 8.362898521902417e-09, "loss": 0.1386, "step": 54276 }, { "epoch": 0.9434719880408142, "grad_norm": 1.8341193670154665, "learning_rate": 8.357772423316645e-09, "loss": 0.29, "step": 54277 }, { "epoch": 0.943489370578317, "grad_norm": 1.4097776893779332, "learning_rate": 8.352647883010877e-09, "loss": 0.1707, "step": 54278 }, { "epoch": 0.9435067531158199, "grad_norm": 0.6735906528552315, "learning_rate": 8.347524901001379e-09, "loss": 0.1474, "step": 54279 }, { "epoch": 0.9435241356533227, "grad_norm": 1.9286044929090933, "learning_rate": 8.34240347730436e-09, "loss": 0.1901, "step": 54280 }, { "epoch": 0.9435415181908255, "grad_norm": 1.2888396752352802, "learning_rate": 8.337283611936085e-09, "loss": 0.1909, "step": 54281 }, { "epoch": 0.9435589007283284, "grad_norm": 1.0968790613977197, "learning_rate": 8.332165304912764e-09, "loss": 0.3342, "step": 54282 }, { "epoch": 0.9435762832658311, "grad_norm": 1.8471158862352839, "learning_rate": 8.32704855625066e-09, "loss": 0.2732, "step": 54283 }, { "epoch": 0.9435936658033339, "grad_norm": 1.1472402928146148, "learning_rate": 8.321933365965872e-09, "loss": 0.1541, "step": 54284 }, { "epoch": 0.9436110483408368, "grad_norm": 1.3435450890484315, "learning_rate": 8.316819734074832e-09, "loss": 0.1672, "step": 54285 }, { "epoch": 0.9436284308783396, "grad_norm": 1.1575116395339522, "learning_rate": 8.311707660593526e-09, "loss": 0.1478, "step": 54286 }, { "epoch": 0.9436458134158424, "grad_norm": 1.1450370688368092, "learning_rate": 8.306597145538219e-09, "loss": 0.2437, "step": 54287 }, { "epoch": 0.9436631959533452, "grad_norm": 6.01551013839848, "learning_rate": 8.301488188925176e-09, "loss": 0.4125, "step": 54288 }, { "epoch": 0.9436805784908481, "grad_norm": 1.8844344594955387, "learning_rate": 8.296380790770551e-09, "loss": 0.4652, "step": 54289 }, { "epoch": 0.9436979610283509, "grad_norm": 2.0234602710854257, "learning_rate": 8.291274951090499e-09, "loss": 0.2403, "step": 54290 }, { "epoch": 0.9437153435658537, "grad_norm": 1.3507461148165483, "learning_rate": 8.286170669901282e-09, "loss": 0.1905, "step": 54291 }, { "epoch": 0.9437327261033566, "grad_norm": 1.3767798777618492, "learning_rate": 8.281067947219e-09, "loss": 0.2054, "step": 54292 }, { "epoch": 0.9437501086408594, "grad_norm": 1.0010375772656837, "learning_rate": 8.275966783059862e-09, "loss": 0.1579, "step": 54293 }, { "epoch": 0.9437674911783622, "grad_norm": 1.7481662530331983, "learning_rate": 8.270867177440021e-09, "loss": 0.202, "step": 54294 }, { "epoch": 0.9437848737158651, "grad_norm": 0.8567708014917987, "learning_rate": 8.265769130375632e-09, "loss": 0.1535, "step": 54295 }, { "epoch": 0.9438022562533679, "grad_norm": 1.7516072552753064, "learning_rate": 8.26067264188285e-09, "loss": 0.1494, "step": 54296 }, { "epoch": 0.9438196387908707, "grad_norm": 1.7828263577016508, "learning_rate": 8.255577711977935e-09, "loss": 0.1682, "step": 54297 }, { "epoch": 0.9438370213283735, "grad_norm": 1.094758605575959, "learning_rate": 8.250484340676878e-09, "loss": 0.1405, "step": 54298 }, { "epoch": 0.9438544038658764, "grad_norm": 3.7358101554906322, "learning_rate": 8.245392527995942e-09, "loss": 0.1577, "step": 54299 }, { "epoch": 0.9438717864033792, "grad_norm": 1.9615332879657323, "learning_rate": 8.240302273951171e-09, "loss": 0.1207, "step": 54300 }, { "epoch": 0.943889168940882, "grad_norm": 1.7379724397480727, "learning_rate": 8.235213578558775e-09, "loss": 0.223, "step": 54301 }, { "epoch": 0.9439065514783849, "grad_norm": 2.076332618749846, "learning_rate": 8.23012644183485e-09, "loss": 0.1933, "step": 54302 }, { "epoch": 0.9439239340158876, "grad_norm": 0.9820231040412029, "learning_rate": 8.225040863795496e-09, "loss": 0.1356, "step": 54303 }, { "epoch": 0.9439413165533904, "grad_norm": 0.9217465283847517, "learning_rate": 8.219956844456921e-09, "loss": 0.097, "step": 54304 }, { "epoch": 0.9439586990908932, "grad_norm": 2.7188748493974475, "learning_rate": 8.21487438383517e-09, "loss": 0.119, "step": 54305 }, { "epoch": 0.9439760816283961, "grad_norm": 1.7608754081019524, "learning_rate": 8.209793481946281e-09, "loss": 0.2325, "step": 54306 }, { "epoch": 0.9439934641658989, "grad_norm": 1.8357424555268786, "learning_rate": 8.204714138806468e-09, "loss": 0.1532, "step": 54307 }, { "epoch": 0.9440108467034017, "grad_norm": 1.2211006613513442, "learning_rate": 8.199636354431827e-09, "loss": 0.162, "step": 54308 }, { "epoch": 0.9440282292409046, "grad_norm": 0.9644249500646148, "learning_rate": 8.194560128838456e-09, "loss": 0.2406, "step": 54309 }, { "epoch": 0.9440456117784074, "grad_norm": 1.1383699955983186, "learning_rate": 8.189485462042345e-09, "loss": 0.3304, "step": 54310 }, { "epoch": 0.9440629943159102, "grad_norm": 2.187932636939814, "learning_rate": 8.1844123540597e-09, "loss": 0.1934, "step": 54311 }, { "epoch": 0.9440803768534131, "grad_norm": 1.1525347105615478, "learning_rate": 8.179340804906509e-09, "loss": 0.2191, "step": 54312 }, { "epoch": 0.9440977593909159, "grad_norm": 1.4529939988761162, "learning_rate": 8.174270814598927e-09, "loss": 0.2349, "step": 54313 }, { "epoch": 0.9441151419284187, "grad_norm": 1.4939195169334536, "learning_rate": 8.169202383152884e-09, "loss": 0.1208, "step": 54314 }, { "epoch": 0.9441325244659216, "grad_norm": 1.4644272438463308, "learning_rate": 8.164135510584591e-09, "loss": 0.2005, "step": 54315 }, { "epoch": 0.9441499070034244, "grad_norm": 1.4101567377845217, "learning_rate": 8.15907019691009e-09, "loss": 0.231, "step": 54316 }, { "epoch": 0.9441672895409272, "grad_norm": 2.5037117760343475, "learning_rate": 8.154006442145423e-09, "loss": 0.1886, "step": 54317 }, { "epoch": 0.94418467207843, "grad_norm": 1.5379028797294707, "learning_rate": 8.148944246306578e-09, "loss": 0.2075, "step": 54318 }, { "epoch": 0.9442020546159329, "grad_norm": 0.8151941523395372, "learning_rate": 8.143883609409652e-09, "loss": 0.1596, "step": 54319 }, { "epoch": 0.9442194371534357, "grad_norm": 1.5187205373397952, "learning_rate": 8.138824531470634e-09, "loss": 0.1426, "step": 54320 }, { "epoch": 0.9442368196909385, "grad_norm": 1.6641778468207948, "learning_rate": 8.133767012505621e-09, "loss": 0.4242, "step": 54321 }, { "epoch": 0.9442542022284413, "grad_norm": 1.1731923163293096, "learning_rate": 8.128711052530601e-09, "loss": 0.1659, "step": 54322 }, { "epoch": 0.9442715847659441, "grad_norm": 1.392485803890011, "learning_rate": 8.123656651561728e-09, "loss": 0.1889, "step": 54323 }, { "epoch": 0.9442889673034469, "grad_norm": 1.1719321674359613, "learning_rate": 8.11860380961482e-09, "loss": 0.1768, "step": 54324 }, { "epoch": 0.9443063498409497, "grad_norm": 1.8458427726721651, "learning_rate": 8.113552526706036e-09, "loss": 0.1232, "step": 54325 }, { "epoch": 0.9443237323784526, "grad_norm": 0.8714872882789628, "learning_rate": 8.108502802851247e-09, "loss": 0.2244, "step": 54326 }, { "epoch": 0.9443411149159554, "grad_norm": 1.2991575300175504, "learning_rate": 8.10345463806661e-09, "loss": 0.0995, "step": 54327 }, { "epoch": 0.9443584974534582, "grad_norm": 1.086192996610066, "learning_rate": 8.098408032368054e-09, "loss": 0.1615, "step": 54328 }, { "epoch": 0.9443758799909611, "grad_norm": 1.0134912197727002, "learning_rate": 8.093362985771623e-09, "loss": 0.2236, "step": 54329 }, { "epoch": 0.9443932625284639, "grad_norm": 1.2279845307553208, "learning_rate": 8.08831949829325e-09, "loss": 0.1397, "step": 54330 }, { "epoch": 0.9444106450659667, "grad_norm": 1.060019994713314, "learning_rate": 8.083277569948922e-09, "loss": 0.2395, "step": 54331 }, { "epoch": 0.9444280276034696, "grad_norm": 1.0890509311227123, "learning_rate": 8.078237200754623e-09, "loss": 0.3194, "step": 54332 }, { "epoch": 0.9444454101409724, "grad_norm": 1.1878745745633037, "learning_rate": 8.073198390726343e-09, "loss": 0.1608, "step": 54333 }, { "epoch": 0.9444627926784752, "grad_norm": 1.0856080423712198, "learning_rate": 8.068161139880013e-09, "loss": 0.2232, "step": 54334 }, { "epoch": 0.944480175215978, "grad_norm": 1.3737416328795584, "learning_rate": 8.063125448231733e-09, "loss": 0.1452, "step": 54335 }, { "epoch": 0.9444975577534809, "grad_norm": 1.4594393446621425, "learning_rate": 8.058091315797321e-09, "loss": 0.1474, "step": 54336 }, { "epoch": 0.9445149402909837, "grad_norm": 1.4564459306233521, "learning_rate": 8.053058742592767e-09, "loss": 0.1386, "step": 54337 }, { "epoch": 0.9445323228284865, "grad_norm": 2.029214124956547, "learning_rate": 8.048027728634056e-09, "loss": 0.2834, "step": 54338 }, { "epoch": 0.9445497053659894, "grad_norm": 1.212877791305213, "learning_rate": 8.042998273937118e-09, "loss": 0.2167, "step": 54339 }, { "epoch": 0.9445670879034922, "grad_norm": 0.9228181564870176, "learning_rate": 8.037970378517889e-09, "loss": 0.1821, "step": 54340 }, { "epoch": 0.944584470440995, "grad_norm": 2.0269672085350945, "learning_rate": 8.032944042392299e-09, "loss": 0.1881, "step": 54341 }, { "epoch": 0.9446018529784977, "grad_norm": 2.5287603093733777, "learning_rate": 8.027919265576277e-09, "loss": 0.2025, "step": 54342 }, { "epoch": 0.9446192355160006, "grad_norm": 11.391531618027715, "learning_rate": 8.02289604808576e-09, "loss": 0.1957, "step": 54343 }, { "epoch": 0.9446366180535034, "grad_norm": 1.1296861437050596, "learning_rate": 8.017874389936674e-09, "loss": 0.2968, "step": 54344 }, { "epoch": 0.9446540005910062, "grad_norm": 1.8850044808178597, "learning_rate": 8.012854291144843e-09, "loss": 0.2329, "step": 54345 }, { "epoch": 0.9446713831285091, "grad_norm": 1.2876347178199872, "learning_rate": 8.007835751726367e-09, "loss": 0.1908, "step": 54346 }, { "epoch": 0.9446887656660119, "grad_norm": 1.8514315633035707, "learning_rate": 8.002818771697007e-09, "loss": 0.1085, "step": 54347 }, { "epoch": 0.9447061482035147, "grad_norm": 1.4674409127551038, "learning_rate": 7.997803351072808e-09, "loss": 0.2525, "step": 54348 }, { "epoch": 0.9447235307410176, "grad_norm": 1.2816806508910716, "learning_rate": 7.992789489869423e-09, "loss": 0.206, "step": 54349 }, { "epoch": 0.9447409132785204, "grad_norm": 1.560723802619583, "learning_rate": 7.987777188102951e-09, "loss": 0.1421, "step": 54350 }, { "epoch": 0.9447582958160232, "grad_norm": 0.9477640070900771, "learning_rate": 7.982766445789213e-09, "loss": 0.1739, "step": 54351 }, { "epoch": 0.944775678353526, "grad_norm": 2.1287482913507842, "learning_rate": 7.977757262944084e-09, "loss": 0.28, "step": 54352 }, { "epoch": 0.9447930608910289, "grad_norm": 1.3181457381049997, "learning_rate": 7.972749639583387e-09, "loss": 0.3466, "step": 54353 }, { "epoch": 0.9448104434285317, "grad_norm": 1.4127538443895828, "learning_rate": 7.967743575723162e-09, "loss": 0.1359, "step": 54354 }, { "epoch": 0.9448278259660345, "grad_norm": 1.2702035374375582, "learning_rate": 7.96273907137912e-09, "loss": 0.1997, "step": 54355 }, { "epoch": 0.9448452085035374, "grad_norm": 2.2453574183742067, "learning_rate": 7.957736126567194e-09, "loss": 0.2303, "step": 54356 }, { "epoch": 0.9448625910410402, "grad_norm": 1.347055693171935, "learning_rate": 7.952734741303146e-09, "loss": 0.2075, "step": 54357 }, { "epoch": 0.944879973578543, "grad_norm": 2.478235506724585, "learning_rate": 7.947734915602967e-09, "loss": 0.1862, "step": 54358 }, { "epoch": 0.9448973561160459, "grad_norm": 2.7222746461506566, "learning_rate": 7.942736649482418e-09, "loss": 0.1328, "step": 54359 }, { "epoch": 0.9449147386535487, "grad_norm": 1.466818036493037, "learning_rate": 7.937739942957378e-09, "loss": 0.3511, "step": 54360 }, { "epoch": 0.9449321211910515, "grad_norm": 1.2149898600030333, "learning_rate": 7.932744796043667e-09, "loss": 0.161, "step": 54361 }, { "epoch": 0.9449495037285542, "grad_norm": 1.7124242052677525, "learning_rate": 7.927751208757105e-09, "loss": 0.1954, "step": 54362 }, { "epoch": 0.9449668862660571, "grad_norm": 1.1981812892613004, "learning_rate": 7.922759181113515e-09, "loss": 0.0963, "step": 54363 }, { "epoch": 0.9449842688035599, "grad_norm": 1.5620699897070038, "learning_rate": 7.91776871312877e-09, "loss": 0.1682, "step": 54364 }, { "epoch": 0.9450016513410627, "grad_norm": 2.460463945485839, "learning_rate": 7.91277980481858e-09, "loss": 0.1916, "step": 54365 }, { "epoch": 0.9450190338785656, "grad_norm": 0.7885156978118529, "learning_rate": 7.907792456198936e-09, "loss": 0.1758, "step": 54366 }, { "epoch": 0.9450364164160684, "grad_norm": 1.5706482668869541, "learning_rate": 7.902806667285544e-09, "loss": 0.1633, "step": 54367 }, { "epoch": 0.9450537989535712, "grad_norm": 1.4515307930707233, "learning_rate": 7.897822438094114e-09, "loss": 0.178, "step": 54368 }, { "epoch": 0.945071181491074, "grad_norm": 1.6812819859901103, "learning_rate": 7.89283976864058e-09, "loss": 0.1559, "step": 54369 }, { "epoch": 0.9450885640285769, "grad_norm": 1.0769053484451367, "learning_rate": 7.887858658940649e-09, "loss": 0.1319, "step": 54370 }, { "epoch": 0.9451059465660797, "grad_norm": 1.4581665038642142, "learning_rate": 7.882879109010199e-09, "loss": 0.1748, "step": 54371 }, { "epoch": 0.9451233291035825, "grad_norm": 1.6341026081172219, "learning_rate": 7.877901118864939e-09, "loss": 0.3154, "step": 54372 }, { "epoch": 0.9451407116410854, "grad_norm": 1.8080076311978077, "learning_rate": 7.872924688520688e-09, "loss": 0.2022, "step": 54373 }, { "epoch": 0.9451580941785882, "grad_norm": 1.3562442582150442, "learning_rate": 7.867949817993213e-09, "loss": 0.2142, "step": 54374 }, { "epoch": 0.945175476716091, "grad_norm": 1.067648982436243, "learning_rate": 7.862976507298224e-09, "loss": 0.2577, "step": 54375 }, { "epoch": 0.9451928592535939, "grad_norm": 1.1853665510310794, "learning_rate": 7.858004756451541e-09, "loss": 0.1936, "step": 54376 }, { "epoch": 0.9452102417910967, "grad_norm": 1.2174714091350578, "learning_rate": 7.85303456546893e-09, "loss": 0.1562, "step": 54377 }, { "epoch": 0.9452276243285995, "grad_norm": 2.152422437947811, "learning_rate": 7.8480659343661e-09, "loss": 0.3448, "step": 54378 }, { "epoch": 0.9452450068661024, "grad_norm": 1.5925636473774796, "learning_rate": 7.84309886315887e-09, "loss": 0.1834, "step": 54379 }, { "epoch": 0.9452623894036052, "grad_norm": 1.4236464010955179, "learning_rate": 7.838133351862898e-09, "loss": 0.226, "step": 54380 }, { "epoch": 0.945279771941108, "grad_norm": 2.944549640872484, "learning_rate": 7.833169400494e-09, "loss": 0.2153, "step": 54381 }, { "epoch": 0.9452971544786107, "grad_norm": 1.3193433800622207, "learning_rate": 7.828207009067834e-09, "loss": 0.1961, "step": 54382 }, { "epoch": 0.9453145370161136, "grad_norm": 1.592795793501719, "learning_rate": 7.823246177600218e-09, "loss": 0.1355, "step": 54383 }, { "epoch": 0.9453319195536164, "grad_norm": 1.2038812643313703, "learning_rate": 7.818286906106753e-09, "loss": 0.1342, "step": 54384 }, { "epoch": 0.9453493020911192, "grad_norm": 1.401251238963245, "learning_rate": 7.813329194603258e-09, "loss": 0.2455, "step": 54385 }, { "epoch": 0.9453666846286221, "grad_norm": 1.6264485811412939, "learning_rate": 7.808373043105443e-09, "loss": 0.2825, "step": 54386 }, { "epoch": 0.9453840671661249, "grad_norm": 1.8729933308787299, "learning_rate": 7.803418451629019e-09, "loss": 0.1657, "step": 54387 }, { "epoch": 0.9454014497036277, "grad_norm": 0.9972834748916258, "learning_rate": 7.798465420189526e-09, "loss": 0.128, "step": 54388 }, { "epoch": 0.9454188322411305, "grad_norm": 1.695167336263874, "learning_rate": 7.7935139488029e-09, "loss": 0.2297, "step": 54389 }, { "epoch": 0.9454362147786334, "grad_norm": 1.107968282520898, "learning_rate": 7.788564037484734e-09, "loss": 0.1601, "step": 54390 }, { "epoch": 0.9454535973161362, "grad_norm": 1.2958785579625136, "learning_rate": 7.783615686250689e-09, "loss": 0.1454, "step": 54391 }, { "epoch": 0.945470979853639, "grad_norm": 0.9840186691529884, "learning_rate": 7.778668895116524e-09, "loss": 0.2657, "step": 54392 }, { "epoch": 0.9454883623911419, "grad_norm": 1.174396666717211, "learning_rate": 7.773723664097842e-09, "loss": 0.215, "step": 54393 }, { "epoch": 0.9455057449286447, "grad_norm": 1.9174375519651057, "learning_rate": 7.76877999321035e-09, "loss": 0.2413, "step": 54394 }, { "epoch": 0.9455231274661475, "grad_norm": 0.9048962415902967, "learning_rate": 7.763837882469648e-09, "loss": 0.1056, "step": 54395 }, { "epoch": 0.9455405100036504, "grad_norm": 1.290499380308052, "learning_rate": 7.758897331891557e-09, "loss": 0.3185, "step": 54396 }, { "epoch": 0.9455578925411532, "grad_norm": 1.7377242999856235, "learning_rate": 7.753958341491563e-09, "loss": 0.1166, "step": 54397 }, { "epoch": 0.945575275078656, "grad_norm": 1.2793024116068537, "learning_rate": 7.749020911285487e-09, "loss": 0.1571, "step": 54398 }, { "epoch": 0.9455926576161588, "grad_norm": 1.343207298516294, "learning_rate": 7.744085041288873e-09, "loss": 0.2211, "step": 54399 }, { "epoch": 0.9456100401536617, "grad_norm": 1.838885102234752, "learning_rate": 7.739150731517319e-09, "loss": 0.1385, "step": 54400 }, { "epoch": 0.9456274226911645, "grad_norm": 1.1455773035490338, "learning_rate": 7.73421798198659e-09, "loss": 0.1971, "step": 54401 }, { "epoch": 0.9456448052286672, "grad_norm": 1.2748208833248185, "learning_rate": 7.729286792712287e-09, "loss": 0.1654, "step": 54402 }, { "epoch": 0.9456621877661701, "grad_norm": 2.5175747203269085, "learning_rate": 7.724357163709949e-09, "loss": 0.2234, "step": 54403 }, { "epoch": 0.9456795703036729, "grad_norm": 1.4999476804605338, "learning_rate": 7.719429094995288e-09, "loss": 0.1176, "step": 54404 }, { "epoch": 0.9456969528411757, "grad_norm": 1.5095818664363443, "learning_rate": 7.71450258658396e-09, "loss": 0.1192, "step": 54405 }, { "epoch": 0.9457143353786786, "grad_norm": 1.4392497657469292, "learning_rate": 7.709577638491449e-09, "loss": 0.2122, "step": 54406 }, { "epoch": 0.9457317179161814, "grad_norm": 0.9136227483007106, "learning_rate": 7.704654250733411e-09, "loss": 0.1537, "step": 54407 }, { "epoch": 0.9457491004536842, "grad_norm": 1.4977083969515672, "learning_rate": 7.699732423325555e-09, "loss": 0.194, "step": 54408 }, { "epoch": 0.945766482991187, "grad_norm": 0.8788724840101872, "learning_rate": 7.69481215628337e-09, "loss": 0.1021, "step": 54409 }, { "epoch": 0.9457838655286899, "grad_norm": 0.9600521643618315, "learning_rate": 7.689893449622453e-09, "loss": 0.1684, "step": 54410 }, { "epoch": 0.9458012480661927, "grad_norm": 1.3012048378898038, "learning_rate": 7.684976303358459e-09, "loss": 0.1366, "step": 54411 }, { "epoch": 0.9458186306036955, "grad_norm": 2.791921051778336, "learning_rate": 7.680060717506931e-09, "loss": 0.1964, "step": 54412 }, { "epoch": 0.9458360131411984, "grad_norm": 2.6872658538711045, "learning_rate": 7.675146692083468e-09, "loss": 0.2402, "step": 54413 }, { "epoch": 0.9458533956787012, "grad_norm": 1.1171362787805945, "learning_rate": 7.670234227103667e-09, "loss": 0.2342, "step": 54414 }, { "epoch": 0.945870778216204, "grad_norm": 1.6241425300906722, "learning_rate": 7.665323322582962e-09, "loss": 0.2609, "step": 54415 }, { "epoch": 0.9458881607537069, "grad_norm": 1.2034536808042022, "learning_rate": 7.660413978537117e-09, "loss": 0.1669, "step": 54416 }, { "epoch": 0.9459055432912097, "grad_norm": 1.3562818040276645, "learning_rate": 7.655506194981565e-09, "loss": 0.1413, "step": 54417 }, { "epoch": 0.9459229258287125, "grad_norm": 1.0787723899598378, "learning_rate": 7.650599971931904e-09, "loss": 0.1113, "step": 54418 }, { "epoch": 0.9459403083662153, "grad_norm": 1.924920807313991, "learning_rate": 7.645695309403677e-09, "loss": 0.1964, "step": 54419 }, { "epoch": 0.9459576909037182, "grad_norm": 1.5610293643897648, "learning_rate": 7.640792207412427e-09, "loss": 0.1894, "step": 54420 }, { "epoch": 0.945975073441221, "grad_norm": 2.1173359645146483, "learning_rate": 7.635890665973698e-09, "loss": 0.2538, "step": 54421 }, { "epoch": 0.9459924559787237, "grad_norm": 1.3918913911327542, "learning_rate": 7.630990685102978e-09, "loss": 0.1513, "step": 54422 }, { "epoch": 0.9460098385162266, "grad_norm": 2.212191870162109, "learning_rate": 7.626092264815864e-09, "loss": 0.2221, "step": 54423 }, { "epoch": 0.9460272210537294, "grad_norm": 1.7095911586696386, "learning_rate": 7.6211954051279e-09, "loss": 0.1317, "step": 54424 }, { "epoch": 0.9460446035912322, "grad_norm": 1.9042887479047448, "learning_rate": 7.616300106054519e-09, "loss": 0.1714, "step": 54425 }, { "epoch": 0.946061986128735, "grad_norm": 2.068980977695856, "learning_rate": 7.611406367611262e-09, "loss": 0.1975, "step": 54426 }, { "epoch": 0.9460793686662379, "grad_norm": 1.7157254914985038, "learning_rate": 7.606514189813728e-09, "loss": 0.2149, "step": 54427 }, { "epoch": 0.9460967512037407, "grad_norm": 1.2458319858431657, "learning_rate": 7.601623572677351e-09, "loss": 0.1466, "step": 54428 }, { "epoch": 0.9461141337412435, "grad_norm": 1.5926411817298551, "learning_rate": 7.596734516217617e-09, "loss": 0.1824, "step": 54429 }, { "epoch": 0.9461315162787464, "grad_norm": 2.7782934966440194, "learning_rate": 7.591847020450014e-09, "loss": 0.259, "step": 54430 }, { "epoch": 0.9461488988162492, "grad_norm": 1.454474042803629, "learning_rate": 7.586961085390087e-09, "loss": 0.147, "step": 54431 }, { "epoch": 0.946166281353752, "grad_norm": 1.9937366030298513, "learning_rate": 7.58207671105332e-09, "loss": 0.2792, "step": 54432 }, { "epoch": 0.9461836638912549, "grad_norm": 1.2618502657469808, "learning_rate": 7.577193897455147e-09, "loss": 0.1329, "step": 54433 }, { "epoch": 0.9462010464287577, "grad_norm": 0.9927454510626044, "learning_rate": 7.572312644611057e-09, "loss": 0.1804, "step": 54434 }, { "epoch": 0.9462184289662605, "grad_norm": 0.9228364152285983, "learning_rate": 7.567432952536534e-09, "loss": 0.171, "step": 54435 }, { "epoch": 0.9462358115037633, "grad_norm": 2.429170963585054, "learning_rate": 7.562554821247069e-09, "loss": 0.1991, "step": 54436 }, { "epoch": 0.9462531940412662, "grad_norm": 1.9509295968079112, "learning_rate": 7.557678250758149e-09, "loss": 0.1475, "step": 54437 }, { "epoch": 0.946270576578769, "grad_norm": 0.6962074507356764, "learning_rate": 7.552803241085037e-09, "loss": 0.1392, "step": 54438 }, { "epoch": 0.9462879591162718, "grad_norm": 1.7311209398829468, "learning_rate": 7.547929792243446e-09, "loss": 0.1877, "step": 54439 }, { "epoch": 0.9463053416537747, "grad_norm": 1.414900212846284, "learning_rate": 7.54305790424864e-09, "loss": 0.2026, "step": 54440 }, { "epoch": 0.9463227241912775, "grad_norm": 0.9905280733020445, "learning_rate": 7.538187577116162e-09, "loss": 0.1815, "step": 54441 }, { "epoch": 0.9463401067287802, "grad_norm": 1.2275721267565887, "learning_rate": 7.533318810861389e-09, "loss": 0.1429, "step": 54442 }, { "epoch": 0.946357489266283, "grad_norm": 1.3817356203027291, "learning_rate": 7.528451605499808e-09, "loss": 0.1398, "step": 54443 }, { "epoch": 0.9463748718037859, "grad_norm": 1.4940175466147199, "learning_rate": 7.523585961046797e-09, "loss": 0.1916, "step": 54444 }, { "epoch": 0.9463922543412887, "grad_norm": 1.558072794326559, "learning_rate": 7.518721877517787e-09, "loss": 0.1892, "step": 54445 }, { "epoch": 0.9464096368787915, "grad_norm": 0.8569236997296812, "learning_rate": 7.513859354928154e-09, "loss": 0.1069, "step": 54446 }, { "epoch": 0.9464270194162944, "grad_norm": 1.1113729483571777, "learning_rate": 7.508998393293442e-09, "loss": 0.1615, "step": 54447 }, { "epoch": 0.9464444019537972, "grad_norm": 1.2458966156404874, "learning_rate": 7.504138992628916e-09, "loss": 0.1527, "step": 54448 }, { "epoch": 0.9464617844913, "grad_norm": 1.7883863550321144, "learning_rate": 7.499281152950066e-09, "loss": 0.1629, "step": 54449 }, { "epoch": 0.9464791670288029, "grad_norm": 1.3188979912526178, "learning_rate": 7.494424874272265e-09, "loss": 0.1291, "step": 54450 }, { "epoch": 0.9464965495663057, "grad_norm": 1.0102932801892326, "learning_rate": 7.489570156610836e-09, "loss": 0.1326, "step": 54451 }, { "epoch": 0.9465139321038085, "grad_norm": 2.441432049858396, "learning_rate": 7.484716999981322e-09, "loss": 0.2068, "step": 54452 }, { "epoch": 0.9465313146413113, "grad_norm": 1.5384306823333727, "learning_rate": 7.479865404398933e-09, "loss": 0.1484, "step": 54453 }, { "epoch": 0.9465486971788142, "grad_norm": 2.58461637294802, "learning_rate": 7.4750153698791e-09, "loss": 0.1913, "step": 54454 }, { "epoch": 0.946566079716317, "grad_norm": 1.9764544666643746, "learning_rate": 7.470166896437314e-09, "loss": 0.1502, "step": 54455 }, { "epoch": 0.9465834622538198, "grad_norm": 1.8913801059887938, "learning_rate": 7.465319984088836e-09, "loss": 0.1752, "step": 54456 }, { "epoch": 0.9466008447913227, "grad_norm": 1.3517598536423776, "learning_rate": 7.46047463284899e-09, "loss": 0.2258, "step": 54457 }, { "epoch": 0.9466182273288255, "grad_norm": 1.8598791765313771, "learning_rate": 7.455630842733206e-09, "loss": 0.202, "step": 54458 }, { "epoch": 0.9466356098663283, "grad_norm": 0.9952896639969565, "learning_rate": 7.450788613756808e-09, "loss": 0.2329, "step": 54459 }, { "epoch": 0.9466529924038312, "grad_norm": 1.8245194060705134, "learning_rate": 7.445947945935172e-09, "loss": 0.226, "step": 54460 }, { "epoch": 0.9466703749413339, "grad_norm": 1.0035651466135969, "learning_rate": 7.4411088392836165e-09, "loss": 0.1567, "step": 54461 }, { "epoch": 0.9466877574788367, "grad_norm": 1.8482925831150938, "learning_rate": 7.43627129381752e-09, "loss": 0.1783, "step": 54462 }, { "epoch": 0.9467051400163395, "grad_norm": 1.273594535473442, "learning_rate": 7.431435309552148e-09, "loss": 0.1344, "step": 54463 }, { "epoch": 0.9467225225538424, "grad_norm": 1.0703753945827845, "learning_rate": 7.426600886502821e-09, "loss": 0.1902, "step": 54464 }, { "epoch": 0.9467399050913452, "grad_norm": 1.5504695877859256, "learning_rate": 7.421768024684916e-09, "loss": 0.1516, "step": 54465 }, { "epoch": 0.946757287628848, "grad_norm": 1.4221495066355718, "learning_rate": 7.416936724113753e-09, "loss": 0.137, "step": 54466 }, { "epoch": 0.9467746701663509, "grad_norm": 2.5382799143669312, "learning_rate": 7.4121069848046e-09, "loss": 0.1609, "step": 54467 }, { "epoch": 0.9467920527038537, "grad_norm": 1.0993872826987712, "learning_rate": 7.407278806772832e-09, "loss": 0.1401, "step": 54468 }, { "epoch": 0.9468094352413565, "grad_norm": 1.2465525762408438, "learning_rate": 7.402452190033659e-09, "loss": 0.1416, "step": 54469 }, { "epoch": 0.9468268177788594, "grad_norm": 0.9920158073999016, "learning_rate": 7.397627134602513e-09, "loss": 0.1447, "step": 54470 }, { "epoch": 0.9468442003163622, "grad_norm": 2.0801153525887175, "learning_rate": 7.392803640494549e-09, "loss": 0.2766, "step": 54471 }, { "epoch": 0.946861582853865, "grad_norm": 1.1277896203852782, "learning_rate": 7.387981707725088e-09, "loss": 0.1681, "step": 54472 }, { "epoch": 0.9468789653913678, "grad_norm": 1.0894907208257723, "learning_rate": 7.383161336309451e-09, "loss": 0.1606, "step": 54473 }, { "epoch": 0.9468963479288707, "grad_norm": 1.2340373939044342, "learning_rate": 7.3783425262629596e-09, "loss": 0.1363, "step": 54474 }, { "epoch": 0.9469137304663735, "grad_norm": 1.6220368119761763, "learning_rate": 7.373525277600823e-09, "loss": 0.1324, "step": 54475 }, { "epoch": 0.9469311130038763, "grad_norm": 1.1197859563804586, "learning_rate": 7.3687095903382514e-09, "loss": 0.0991, "step": 54476 }, { "epoch": 0.9469484955413792, "grad_norm": 1.1788137914158976, "learning_rate": 7.363895464490566e-09, "loss": 0.2178, "step": 54477 }, { "epoch": 0.946965878078882, "grad_norm": 1.6144475462078236, "learning_rate": 7.359082900073088e-09, "loss": 0.1912, "step": 54478 }, { "epoch": 0.9469832606163848, "grad_norm": 1.5891654460836768, "learning_rate": 7.354271897101028e-09, "loss": 0.1648, "step": 54479 }, { "epoch": 0.9470006431538877, "grad_norm": 1.7594867739862987, "learning_rate": 7.349462455589595e-09, "loss": 0.1399, "step": 54480 }, { "epoch": 0.9470180256913904, "grad_norm": 2.864433421915404, "learning_rate": 7.344654575554055e-09, "loss": 0.2226, "step": 54481 }, { "epoch": 0.9470354082288932, "grad_norm": 0.8187843941191237, "learning_rate": 7.339848257009673e-09, "loss": 0.1089, "step": 54482 }, { "epoch": 0.947052790766396, "grad_norm": 2.6597302865054817, "learning_rate": 7.335043499971661e-09, "loss": 0.1914, "step": 54483 }, { "epoch": 0.9470701733038989, "grad_norm": 0.8601956641217323, "learning_rate": 7.330240304455226e-09, "loss": 0.1746, "step": 54484 }, { "epoch": 0.9470875558414017, "grad_norm": 1.5218666851376563, "learning_rate": 7.325438670475636e-09, "loss": 0.3024, "step": 54485 }, { "epoch": 0.9471049383789045, "grad_norm": 2.6236764984211574, "learning_rate": 7.320638598048045e-09, "loss": 0.1147, "step": 54486 }, { "epoch": 0.9471223209164074, "grad_norm": 1.571970413870214, "learning_rate": 7.31584008718783e-09, "loss": 0.1917, "step": 54487 }, { "epoch": 0.9471397034539102, "grad_norm": 2.1826949955867234, "learning_rate": 7.311043137909922e-09, "loss": 0.171, "step": 54488 }, { "epoch": 0.947157085991413, "grad_norm": 1.0473288383041308, "learning_rate": 7.30624775022981e-09, "loss": 0.1585, "step": 54489 }, { "epoch": 0.9471744685289158, "grad_norm": 1.8607173834471726, "learning_rate": 7.301453924162482e-09, "loss": 0.1586, "step": 54490 }, { "epoch": 0.9471918510664187, "grad_norm": 1.082908935258658, "learning_rate": 7.296661659723313e-09, "loss": 0.1857, "step": 54491 }, { "epoch": 0.9472092336039215, "grad_norm": 2.149527889713957, "learning_rate": 7.291870956927293e-09, "loss": 0.206, "step": 54492 }, { "epoch": 0.9472266161414243, "grad_norm": 1.006199575976312, "learning_rate": 7.287081815789797e-09, "loss": 0.1867, "step": 54493 }, { "epoch": 0.9472439986789272, "grad_norm": 1.0893239500948444, "learning_rate": 7.28229423632587e-09, "loss": 0.2168, "step": 54494 }, { "epoch": 0.94726138121643, "grad_norm": 2.4158287053552607, "learning_rate": 7.277508218550721e-09, "loss": 0.1906, "step": 54495 }, { "epoch": 0.9472787637539328, "grad_norm": 1.9662506710911218, "learning_rate": 7.27272376247956e-09, "loss": 0.1988, "step": 54496 }, { "epoch": 0.9472961462914357, "grad_norm": 2.4379207386077866, "learning_rate": 7.267940868127486e-09, "loss": 0.1925, "step": 54497 }, { "epoch": 0.9473135288289385, "grad_norm": 0.9346487788613181, "learning_rate": 7.263159535509767e-09, "loss": 0.2308, "step": 54498 }, { "epoch": 0.9473309113664413, "grad_norm": 0.9181415070578541, "learning_rate": 7.2583797646414424e-09, "loss": 0.2317, "step": 54499 }, { "epoch": 0.9473482939039441, "grad_norm": 0.9075682056670513, "learning_rate": 7.253601555537725e-09, "loss": 0.2247, "step": 54500 }, { "epoch": 0.9473656764414469, "grad_norm": 1.191733645095273, "learning_rate": 7.248824908213713e-09, "loss": 0.1612, "step": 54501 }, { "epoch": 0.9473830589789497, "grad_norm": 1.8670089824756078, "learning_rate": 7.244049822684617e-09, "loss": 0.2285, "step": 54502 }, { "epoch": 0.9474004415164525, "grad_norm": 1.2173804715038412, "learning_rate": 7.239276298965535e-09, "loss": 0.1475, "step": 54503 }, { "epoch": 0.9474178240539554, "grad_norm": 0.9889379996538119, "learning_rate": 7.234504337071512e-09, "loss": 0.1178, "step": 54504 }, { "epoch": 0.9474352065914582, "grad_norm": 2.025110926064175, "learning_rate": 7.229733937017813e-09, "loss": 0.2321, "step": 54505 }, { "epoch": 0.947452589128961, "grad_norm": 1.0630790956836955, "learning_rate": 7.224965098819535e-09, "loss": 0.1718, "step": 54506 }, { "epoch": 0.9474699716664639, "grad_norm": 1.7223972473538487, "learning_rate": 7.220197822491669e-09, "loss": 0.2198, "step": 54507 }, { "epoch": 0.9474873542039667, "grad_norm": 1.4474055458307513, "learning_rate": 7.2154321080494244e-09, "loss": 0.1724, "step": 54508 }, { "epoch": 0.9475047367414695, "grad_norm": 2.30769944584383, "learning_rate": 7.210667955507954e-09, "loss": 0.202, "step": 54509 }, { "epoch": 0.9475221192789723, "grad_norm": 1.4448723972809538, "learning_rate": 7.205905364882248e-09, "loss": 0.2375, "step": 54510 }, { "epoch": 0.9475395018164752, "grad_norm": 1.1574467832265662, "learning_rate": 7.201144336187459e-09, "loss": 0.1759, "step": 54511 }, { "epoch": 0.947556884353978, "grad_norm": 1.5945538062159232, "learning_rate": 7.196384869438687e-09, "loss": 0.1263, "step": 54512 }, { "epoch": 0.9475742668914808, "grad_norm": 0.840943741968549, "learning_rate": 7.191626964650976e-09, "loss": 0.162, "step": 54513 }, { "epoch": 0.9475916494289837, "grad_norm": 1.044382414611858, "learning_rate": 7.18687062183948e-09, "loss": 0.2095, "step": 54514 }, { "epoch": 0.9476090319664865, "grad_norm": 2.332693443903943, "learning_rate": 7.182115841019132e-09, "loss": 0.1515, "step": 54515 }, { "epoch": 0.9476264145039893, "grad_norm": 1.6785312290989987, "learning_rate": 7.177362622205141e-09, "loss": 0.1864, "step": 54516 }, { "epoch": 0.9476437970414922, "grad_norm": 2.459254577183936, "learning_rate": 7.172610965412551e-09, "loss": 0.1719, "step": 54517 }, { "epoch": 0.947661179578995, "grad_norm": 1.3977972991241072, "learning_rate": 7.1678608706563505e-09, "loss": 0.2954, "step": 54518 }, { "epoch": 0.9476785621164978, "grad_norm": 0.9592672368387326, "learning_rate": 7.163112337951693e-09, "loss": 0.1269, "step": 54519 }, { "epoch": 0.9476959446540006, "grad_norm": 1.7928735096852504, "learning_rate": 7.158365367313568e-09, "loss": 0.1807, "step": 54520 }, { "epoch": 0.9477133271915034, "grad_norm": 0.8378887496843855, "learning_rate": 7.153619958757018e-09, "loss": 0.1364, "step": 54521 }, { "epoch": 0.9477307097290062, "grad_norm": 1.4473669224366144, "learning_rate": 7.1488761122970864e-09, "loss": 0.1694, "step": 54522 }, { "epoch": 0.947748092266509, "grad_norm": 1.4830781236756572, "learning_rate": 7.144133827948817e-09, "loss": 0.1402, "step": 54523 }, { "epoch": 0.9477654748040119, "grad_norm": 1.415115409585566, "learning_rate": 7.139393105727254e-09, "loss": 0.1884, "step": 54524 }, { "epoch": 0.9477828573415147, "grad_norm": 1.014677212804494, "learning_rate": 7.134653945647495e-09, "loss": 0.1616, "step": 54525 }, { "epoch": 0.9478002398790175, "grad_norm": 1.785270883447654, "learning_rate": 7.1299163477244185e-09, "loss": 0.1948, "step": 54526 }, { "epoch": 0.9478176224165203, "grad_norm": 1.607596963450456, "learning_rate": 7.125180311973011e-09, "loss": 0.2125, "step": 54527 }, { "epoch": 0.9478350049540232, "grad_norm": 1.8969764489978365, "learning_rate": 7.120445838408484e-09, "loss": 0.1209, "step": 54528 }, { "epoch": 0.947852387491526, "grad_norm": 1.6862805454531977, "learning_rate": 7.115712927045714e-09, "loss": 0.2079, "step": 54529 }, { "epoch": 0.9478697700290288, "grad_norm": 1.3674258905391405, "learning_rate": 7.1109815778996885e-09, "loss": 0.2196, "step": 54530 }, { "epoch": 0.9478871525665317, "grad_norm": 1.4748888340636008, "learning_rate": 7.106251790985451e-09, "loss": 0.2372, "step": 54531 }, { "epoch": 0.9479045351040345, "grad_norm": 1.8276648626645045, "learning_rate": 7.1015235663179905e-09, "loss": 0.1344, "step": 54532 }, { "epoch": 0.9479219176415373, "grad_norm": 1.4641861543760246, "learning_rate": 7.096796903912294e-09, "loss": 0.2228, "step": 54533 }, { "epoch": 0.9479393001790402, "grad_norm": 1.0372969909213425, "learning_rate": 7.092071803783295e-09, "loss": 0.1399, "step": 54534 }, { "epoch": 0.947956682716543, "grad_norm": 2.7634259053597496, "learning_rate": 7.08734826594598e-09, "loss": 0.1951, "step": 54535 }, { "epoch": 0.9479740652540458, "grad_norm": 1.3004646147028238, "learning_rate": 7.082626290415394e-09, "loss": 0.1161, "step": 54536 }, { "epoch": 0.9479914477915486, "grad_norm": 1.3365115121183073, "learning_rate": 7.0779058772064695e-09, "loss": 0.131, "step": 54537 }, { "epoch": 0.9480088303290515, "grad_norm": 1.0371701744755704, "learning_rate": 7.073187026334138e-09, "loss": 0.2034, "step": 54538 }, { "epoch": 0.9480262128665543, "grad_norm": 1.3369376437252802, "learning_rate": 7.068469737813332e-09, "loss": 0.1875, "step": 54539 }, { "epoch": 0.9480435954040571, "grad_norm": 2.0723980208956396, "learning_rate": 7.063754011659096e-09, "loss": 0.3252, "step": 54540 }, { "epoch": 0.9480609779415599, "grad_norm": 1.6673788468267094, "learning_rate": 7.059039847886305e-09, "loss": 0.3591, "step": 54541 }, { "epoch": 0.9480783604790627, "grad_norm": 1.5939198720854222, "learning_rate": 7.05432724650995e-09, "loss": 0.2451, "step": 54542 }, { "epoch": 0.9480957430165655, "grad_norm": 0.8568920049877883, "learning_rate": 7.04961620754485e-09, "loss": 0.1717, "step": 54543 }, { "epoch": 0.9481131255540683, "grad_norm": 1.323242033328228, "learning_rate": 7.044906731006106e-09, "loss": 0.1489, "step": 54544 }, { "epoch": 0.9481305080915712, "grad_norm": 1.050759300648508, "learning_rate": 7.040198816908538e-09, "loss": 0.1935, "step": 54545 }, { "epoch": 0.948147890629074, "grad_norm": 0.8094498988941885, "learning_rate": 7.035492465267079e-09, "loss": 0.142, "step": 54546 }, { "epoch": 0.9481652731665768, "grad_norm": 18.700388691920892, "learning_rate": 7.030787676096661e-09, "loss": 0.2914, "step": 54547 }, { "epoch": 0.9481826557040797, "grad_norm": 1.2300167011403071, "learning_rate": 7.026084449412217e-09, "loss": 0.1378, "step": 54548 }, { "epoch": 0.9482000382415825, "grad_norm": 0.8385061053182924, "learning_rate": 7.021382785228625e-09, "loss": 0.1793, "step": 54549 }, { "epoch": 0.9482174207790853, "grad_norm": 1.107901691082951, "learning_rate": 7.0166826835607596e-09, "loss": 0.1662, "step": 54550 }, { "epoch": 0.9482348033165882, "grad_norm": 1.4025476295090036, "learning_rate": 7.011984144423611e-09, "loss": 0.1551, "step": 54551 }, { "epoch": 0.948252185854091, "grad_norm": 1.3647203648033146, "learning_rate": 7.007287167831944e-09, "loss": 0.2436, "step": 54552 }, { "epoch": 0.9482695683915938, "grad_norm": 8.854382360402177, "learning_rate": 7.0025917538007465e-09, "loss": 0.2389, "step": 54553 }, { "epoch": 0.9482869509290967, "grad_norm": 2.184274163948305, "learning_rate": 6.99789790234484e-09, "loss": 0.1529, "step": 54554 }, { "epoch": 0.9483043334665995, "grad_norm": 1.3584897961852493, "learning_rate": 6.993205613479214e-09, "loss": 0.1434, "step": 54555 }, { "epoch": 0.9483217160041023, "grad_norm": 0.9394428748916883, "learning_rate": 6.988514887218577e-09, "loss": 0.1557, "step": 54556 }, { "epoch": 0.9483390985416051, "grad_norm": 1.0388998686754298, "learning_rate": 6.983825723577974e-09, "loss": 0.1311, "step": 54557 }, { "epoch": 0.948356481079108, "grad_norm": 2.507570895727927, "learning_rate": 6.979138122572059e-09, "loss": 0.1975, "step": 54558 }, { "epoch": 0.9483738636166108, "grad_norm": 1.5785475843063046, "learning_rate": 6.974452084215821e-09, "loss": 0.1516, "step": 54559 }, { "epoch": 0.9483912461541136, "grad_norm": 1.3698916559090115, "learning_rate": 6.969767608524135e-09, "loss": 0.2037, "step": 54560 }, { "epoch": 0.9484086286916164, "grad_norm": 2.8857406646801977, "learning_rate": 6.96508469551177e-09, "loss": 0.2578, "step": 54561 }, { "epoch": 0.9484260112291192, "grad_norm": 1.4617735670559528, "learning_rate": 6.9604033451935995e-09, "loss": 0.1536, "step": 54562 }, { "epoch": 0.948443393766622, "grad_norm": 3.015145462081418, "learning_rate": 6.955723557584559e-09, "loss": 0.2122, "step": 54563 }, { "epoch": 0.9484607763041248, "grad_norm": 2.549975875156446, "learning_rate": 6.951045332699302e-09, "loss": 0.3315, "step": 54564 }, { "epoch": 0.9484781588416277, "grad_norm": 0.8997318425990802, "learning_rate": 6.9463686705527605e-09, "loss": 0.1493, "step": 54565 }, { "epoch": 0.9484955413791305, "grad_norm": 1.9069922739554386, "learning_rate": 6.941693571159701e-09, "loss": 0.2437, "step": 54566 }, { "epoch": 0.9485129239166333, "grad_norm": 1.546972363080935, "learning_rate": 6.937020034535057e-09, "loss": 0.1635, "step": 54567 }, { "epoch": 0.9485303064541362, "grad_norm": 1.662198075667794, "learning_rate": 6.932348060693482e-09, "loss": 0.1736, "step": 54568 }, { "epoch": 0.948547688991639, "grad_norm": 1.5168527909391991, "learning_rate": 6.92767764964991e-09, "loss": 0.171, "step": 54569 }, { "epoch": 0.9485650715291418, "grad_norm": 1.2859789197751408, "learning_rate": 6.923008801419106e-09, "loss": 0.2436, "step": 54570 }, { "epoch": 0.9485824540666447, "grad_norm": 1.2123201372933894, "learning_rate": 6.918341516015891e-09, "loss": 0.1488, "step": 54571 }, { "epoch": 0.9485998366041475, "grad_norm": 0.8204672050572573, "learning_rate": 6.913675793454976e-09, "loss": 0.2203, "step": 54572 }, { "epoch": 0.9486172191416503, "grad_norm": 1.8795559690487478, "learning_rate": 6.909011633751238e-09, "loss": 0.2332, "step": 54573 }, { "epoch": 0.9486346016791531, "grad_norm": 3.5133219177219477, "learning_rate": 6.904349036919388e-09, "loss": 0.2374, "step": 54574 }, { "epoch": 0.948651984216656, "grad_norm": 1.9036990508453453, "learning_rate": 6.899688002974302e-09, "loss": 0.2372, "step": 54575 }, { "epoch": 0.9486693667541588, "grad_norm": 1.3107541415032649, "learning_rate": 6.895028531930691e-09, "loss": 0.2084, "step": 54576 }, { "epoch": 0.9486867492916616, "grad_norm": 1.4156085483588123, "learning_rate": 6.890370623803265e-09, "loss": 0.1968, "step": 54577 }, { "epoch": 0.9487041318291645, "grad_norm": 1.0918675278112602, "learning_rate": 6.8857142786068465e-09, "loss": 0.2076, "step": 54578 }, { "epoch": 0.9487215143666673, "grad_norm": 1.3651241872551385, "learning_rate": 6.881059496356257e-09, "loss": 0.1533, "step": 54579 }, { "epoch": 0.9487388969041701, "grad_norm": 1.5056537647109498, "learning_rate": 6.876406277066149e-09, "loss": 0.1353, "step": 54580 }, { "epoch": 0.9487562794416728, "grad_norm": 1.2302416361389565, "learning_rate": 6.871754620751291e-09, "loss": 0.2309, "step": 54581 }, { "epoch": 0.9487736619791757, "grad_norm": 0.7987427136293942, "learning_rate": 6.867104527426504e-09, "loss": 0.1341, "step": 54582 }, { "epoch": 0.9487910445166785, "grad_norm": 1.4909501669379093, "learning_rate": 6.862455997106442e-09, "loss": 0.2866, "step": 54583 }, { "epoch": 0.9488084270541813, "grad_norm": 1.444329164568493, "learning_rate": 6.857809029805872e-09, "loss": 0.1505, "step": 54584 }, { "epoch": 0.9488258095916842, "grad_norm": 0.9030107963164888, "learning_rate": 6.853163625539449e-09, "loss": 0.2335, "step": 54585 }, { "epoch": 0.948843192129187, "grad_norm": 1.6520922517844494, "learning_rate": 6.8485197843220486e-09, "loss": 0.125, "step": 54586 }, { "epoch": 0.9488605746666898, "grad_norm": 1.4587569952900659, "learning_rate": 6.8438775061682715e-09, "loss": 0.1975, "step": 54587 }, { "epoch": 0.9488779572041927, "grad_norm": 1.195268786055659, "learning_rate": 6.839236791092884e-09, "loss": 0.1857, "step": 54588 }, { "epoch": 0.9488953397416955, "grad_norm": 0.9323879931952888, "learning_rate": 6.83459763911054e-09, "loss": 0.0893, "step": 54589 }, { "epoch": 0.9489127222791983, "grad_norm": 0.9533826173238508, "learning_rate": 6.829960050236061e-09, "loss": 0.119, "step": 54590 }, { "epoch": 0.9489301048167011, "grad_norm": 1.0695732940833775, "learning_rate": 6.825324024483992e-09, "loss": 0.1632, "step": 54591 }, { "epoch": 0.948947487354204, "grad_norm": 1.2856900003509397, "learning_rate": 6.820689561869098e-09, "loss": 0.2319, "step": 54592 }, { "epoch": 0.9489648698917068, "grad_norm": 1.5407896670055357, "learning_rate": 6.816056662406089e-09, "loss": 0.2738, "step": 54593 }, { "epoch": 0.9489822524292096, "grad_norm": 1.5314494422066585, "learning_rate": 6.811425326109677e-09, "loss": 0.219, "step": 54594 }, { "epoch": 0.9489996349667125, "grad_norm": 1.5051784202104723, "learning_rate": 6.806795552994404e-09, "loss": 0.224, "step": 54595 }, { "epoch": 0.9490170175042153, "grad_norm": 1.0491796480764815, "learning_rate": 6.802167343075094e-09, "loss": 0.2595, "step": 54596 }, { "epoch": 0.9490344000417181, "grad_norm": 1.3218102888810337, "learning_rate": 6.797540696366344e-09, "loss": 0.1781, "step": 54597 }, { "epoch": 0.949051782579221, "grad_norm": 1.4700754937259821, "learning_rate": 6.79291561288281e-09, "loss": 0.1067, "step": 54598 }, { "epoch": 0.9490691651167238, "grad_norm": 1.2647009058044456, "learning_rate": 6.788292092639203e-09, "loss": 0.2231, "step": 54599 }, { "epoch": 0.9490865476542265, "grad_norm": 1.2449203558360553, "learning_rate": 6.783670135650122e-09, "loss": 0.1809, "step": 54600 }, { "epoch": 0.9491039301917293, "grad_norm": 1.120021955361384, "learning_rate": 6.779049741930276e-09, "loss": 0.1393, "step": 54601 }, { "epoch": 0.9491213127292322, "grad_norm": 1.2463810592579552, "learning_rate": 6.774430911494267e-09, "loss": 0.2209, "step": 54602 }, { "epoch": 0.949138695266735, "grad_norm": 1.0617415425849719, "learning_rate": 6.769813644356692e-09, "loss": 0.2211, "step": 54603 }, { "epoch": 0.9491560778042378, "grad_norm": 4.436189166256883, "learning_rate": 6.7651979405322635e-09, "loss": 0.1947, "step": 54604 }, { "epoch": 0.9491734603417407, "grad_norm": 1.8840975412090544, "learning_rate": 6.760583800035579e-09, "loss": 0.2133, "step": 54605 }, { "epoch": 0.9491908428792435, "grad_norm": 1.743486468480932, "learning_rate": 6.755971222881296e-09, "loss": 0.1434, "step": 54606 }, { "epoch": 0.9492082254167463, "grad_norm": 2.36294207242326, "learning_rate": 6.751360209084012e-09, "loss": 0.2549, "step": 54607 }, { "epoch": 0.9492256079542492, "grad_norm": 1.4473501564610192, "learning_rate": 6.746750758658326e-09, "loss": 0.149, "step": 54608 }, { "epoch": 0.949242990491752, "grad_norm": 1.8067354890604552, "learning_rate": 6.7421428716188386e-09, "loss": 0.1269, "step": 54609 }, { "epoch": 0.9492603730292548, "grad_norm": 2.120892306674132, "learning_rate": 6.737536547980205e-09, "loss": 0.2345, "step": 54610 }, { "epoch": 0.9492777555667576, "grad_norm": 1.038075024881441, "learning_rate": 6.732931787756968e-09, "loss": 0.2219, "step": 54611 }, { "epoch": 0.9492951381042605, "grad_norm": 1.1952778662130827, "learning_rate": 6.728328590963728e-09, "loss": 0.1128, "step": 54612 }, { "epoch": 0.9493125206417633, "grad_norm": 1.3827395069527755, "learning_rate": 6.723726957615139e-09, "loss": 0.1484, "step": 54613 }, { "epoch": 0.9493299031792661, "grad_norm": 1.8553792440975667, "learning_rate": 6.719126887725746e-09, "loss": 0.1383, "step": 54614 }, { "epoch": 0.949347285716769, "grad_norm": 1.4857905602176045, "learning_rate": 6.714528381310092e-09, "loss": 0.1725, "step": 54615 }, { "epoch": 0.9493646682542718, "grad_norm": 5.555510944784729, "learning_rate": 6.709931438382777e-09, "loss": 0.3432, "step": 54616 }, { "epoch": 0.9493820507917746, "grad_norm": 1.9548927337805642, "learning_rate": 6.7053360589584e-09, "loss": 0.228, "step": 54617 }, { "epoch": 0.9493994333292775, "grad_norm": 1.7864390052546655, "learning_rate": 6.70074224305156e-09, "loss": 0.1851, "step": 54618 }, { "epoch": 0.9494168158667803, "grad_norm": 1.054145924973198, "learning_rate": 6.696149990676692e-09, "loss": 0.1437, "step": 54619 }, { "epoch": 0.949434198404283, "grad_norm": 1.1263925628191793, "learning_rate": 6.691559301848448e-09, "loss": 0.1688, "step": 54620 }, { "epoch": 0.9494515809417858, "grad_norm": 1.0123976439838989, "learning_rate": 6.686970176581375e-09, "loss": 0.13, "step": 54621 }, { "epoch": 0.9494689634792887, "grad_norm": 1.2637748237760889, "learning_rate": 6.6823826148899585e-09, "loss": 0.1965, "step": 54622 }, { "epoch": 0.9494863460167915, "grad_norm": 1.4248592796833357, "learning_rate": 6.6777966167888e-09, "loss": 0.1599, "step": 54623 }, { "epoch": 0.9495037285542943, "grad_norm": 1.307814088494697, "learning_rate": 6.673212182292387e-09, "loss": 0.2039, "step": 54624 }, { "epoch": 0.9495211110917972, "grad_norm": 2.4687828031254746, "learning_rate": 6.668629311415264e-09, "loss": 0.2094, "step": 54625 }, { "epoch": 0.9495384936293, "grad_norm": 1.094363828742732, "learning_rate": 6.6640480041720845e-09, "loss": 0.2368, "step": 54626 }, { "epoch": 0.9495558761668028, "grad_norm": 1.0695154769902109, "learning_rate": 6.659468260577117e-09, "loss": 0.0872, "step": 54627 }, { "epoch": 0.9495732587043056, "grad_norm": 1.0376029803211588, "learning_rate": 6.65489008064507e-09, "loss": 0.1428, "step": 54628 }, { "epoch": 0.9495906412418085, "grad_norm": 3.2760208964809197, "learning_rate": 6.650313464390378e-09, "loss": 0.1068, "step": 54629 }, { "epoch": 0.9496080237793113, "grad_norm": 1.1246963002808166, "learning_rate": 6.645738411827584e-09, "loss": 0.1179, "step": 54630 }, { "epoch": 0.9496254063168141, "grad_norm": 1.1268039765785156, "learning_rate": 6.641164922971176e-09, "loss": 0.1312, "step": 54631 }, { "epoch": 0.949642788854317, "grad_norm": 1.9192243240045745, "learning_rate": 6.636592997835644e-09, "loss": 0.1484, "step": 54632 }, { "epoch": 0.9496601713918198, "grad_norm": 1.2733943890980732, "learning_rate": 6.632022636435475e-09, "loss": 0.1874, "step": 54633 }, { "epoch": 0.9496775539293226, "grad_norm": 2.080028336881566, "learning_rate": 6.6274538387851575e-09, "loss": 0.1699, "step": 54634 }, { "epoch": 0.9496949364668255, "grad_norm": 1.7491402530558637, "learning_rate": 6.622886604899125e-09, "loss": 0.1567, "step": 54635 }, { "epoch": 0.9497123190043283, "grad_norm": 1.6085327485237286, "learning_rate": 6.6183209347919766e-09, "loss": 0.1557, "step": 54636 }, { "epoch": 0.9497297015418311, "grad_norm": 1.887534893640389, "learning_rate": 6.61375682847809e-09, "loss": 0.378, "step": 54637 }, { "epoch": 0.949747084079334, "grad_norm": 1.2571418240203747, "learning_rate": 6.609194285971953e-09, "loss": 0.146, "step": 54638 }, { "epoch": 0.9497644666168368, "grad_norm": 1.5160651804663396, "learning_rate": 6.604633307287999e-09, "loss": 0.0898, "step": 54639 }, { "epoch": 0.9497818491543395, "grad_norm": 1.278668606545193, "learning_rate": 6.6000738924407164e-09, "loss": 0.16, "step": 54640 }, { "epoch": 0.9497992316918423, "grad_norm": 1.6774523679058937, "learning_rate": 6.595516041444593e-09, "loss": 0.1669, "step": 54641 }, { "epoch": 0.9498166142293452, "grad_norm": 1.5431191329128864, "learning_rate": 6.590959754314007e-09, "loss": 0.1409, "step": 54642 }, { "epoch": 0.949833996766848, "grad_norm": 1.706250403856362, "learning_rate": 6.586405031063391e-09, "loss": 0.1358, "step": 54643 }, { "epoch": 0.9498513793043508, "grad_norm": 1.1218458381105114, "learning_rate": 6.581851871707233e-09, "loss": 0.1183, "step": 54644 }, { "epoch": 0.9498687618418536, "grad_norm": 2.090640218568218, "learning_rate": 6.577300276259967e-09, "loss": 0.1889, "step": 54645 }, { "epoch": 0.9498861443793565, "grad_norm": 1.6884661787640285, "learning_rate": 6.572750244736025e-09, "loss": 0.1809, "step": 54646 }, { "epoch": 0.9499035269168593, "grad_norm": 1.254551900890992, "learning_rate": 6.568201777149729e-09, "loss": 0.1775, "step": 54647 }, { "epoch": 0.9499209094543621, "grad_norm": 1.3249036520579067, "learning_rate": 6.563654873515622e-09, "loss": 0.1553, "step": 54648 }, { "epoch": 0.949938291991865, "grad_norm": 1.1875360204244658, "learning_rate": 6.559109533848028e-09, "loss": 0.1605, "step": 54649 }, { "epoch": 0.9499556745293678, "grad_norm": 1.4482044011230029, "learning_rate": 6.554565758161434e-09, "loss": 0.2695, "step": 54650 }, { "epoch": 0.9499730570668706, "grad_norm": 1.757180374882113, "learning_rate": 6.550023546470162e-09, "loss": 0.1951, "step": 54651 }, { "epoch": 0.9499904396043735, "grad_norm": 0.8327467429959381, "learning_rate": 6.545482898788645e-09, "loss": 0.1944, "step": 54652 }, { "epoch": 0.9500078221418763, "grad_norm": 1.3790128575129819, "learning_rate": 6.540943815131262e-09, "loss": 0.1799, "step": 54653 }, { "epoch": 0.9500252046793791, "grad_norm": 0.9433677082764318, "learning_rate": 6.536406295512442e-09, "loss": 0.1659, "step": 54654 }, { "epoch": 0.950042587216882, "grad_norm": 1.568867559629599, "learning_rate": 6.53187033994651e-09, "loss": 0.1581, "step": 54655 }, { "epoch": 0.9500599697543848, "grad_norm": 2.0444934772093233, "learning_rate": 6.527335948447899e-09, "loss": 0.2913, "step": 54656 }, { "epoch": 0.9500773522918876, "grad_norm": 1.7062147678673552, "learning_rate": 6.522803121030929e-09, "loss": 0.3787, "step": 54657 }, { "epoch": 0.9500947348293904, "grad_norm": 1.4641832855228212, "learning_rate": 6.518271857709978e-09, "loss": 0.1606, "step": 54658 }, { "epoch": 0.9501121173668933, "grad_norm": 1.3737540499569465, "learning_rate": 6.513742158499425e-09, "loss": 0.1619, "step": 54659 }, { "epoch": 0.950129499904396, "grad_norm": 1.0786425662340025, "learning_rate": 6.509214023413645e-09, "loss": 0.2229, "step": 54660 }, { "epoch": 0.9501468824418988, "grad_norm": 1.1446086699460822, "learning_rate": 6.504687452466962e-09, "loss": 0.1885, "step": 54661 }, { "epoch": 0.9501642649794017, "grad_norm": 2.0372911962446003, "learning_rate": 6.500162445673696e-09, "loss": 0.1651, "step": 54662 }, { "epoch": 0.9501816475169045, "grad_norm": 0.8815686667756409, "learning_rate": 6.495639003048226e-09, "loss": 0.1937, "step": 54663 }, { "epoch": 0.9501990300544073, "grad_norm": 2.1101637067163135, "learning_rate": 6.4911171246049275e-09, "loss": 0.1788, "step": 54664 }, { "epoch": 0.9502164125919101, "grad_norm": 1.2915499903275562, "learning_rate": 6.486596810358014e-09, "loss": 0.1423, "step": 54665 }, { "epoch": 0.950233795129413, "grad_norm": 1.1747851630666206, "learning_rate": 6.482078060321916e-09, "loss": 0.1453, "step": 54666 }, { "epoch": 0.9502511776669158, "grad_norm": 1.8477408482747748, "learning_rate": 6.477560874510956e-09, "loss": 0.2323, "step": 54667 }, { "epoch": 0.9502685602044186, "grad_norm": 1.0672808014742001, "learning_rate": 6.473045252939402e-09, "loss": 0.1599, "step": 54668 }, { "epoch": 0.9502859427419215, "grad_norm": 1.193486901649796, "learning_rate": 6.4685311956215735e-09, "loss": 0.2034, "step": 54669 }, { "epoch": 0.9503033252794243, "grad_norm": 1.9966128699674413, "learning_rate": 6.464018702571794e-09, "loss": 0.1613, "step": 54670 }, { "epoch": 0.9503207078169271, "grad_norm": 1.1389702834274338, "learning_rate": 6.459507773804384e-09, "loss": 0.1592, "step": 54671 }, { "epoch": 0.95033809035443, "grad_norm": 0.934722739676488, "learning_rate": 6.454998409333612e-09, "loss": 0.1391, "step": 54672 }, { "epoch": 0.9503554728919328, "grad_norm": 1.3241498083740602, "learning_rate": 6.450490609173798e-09, "loss": 0.1908, "step": 54673 }, { "epoch": 0.9503728554294356, "grad_norm": 0.8333866863068689, "learning_rate": 6.445984373339153e-09, "loss": 0.2101, "step": 54674 }, { "epoch": 0.9503902379669384, "grad_norm": 1.6485612700354966, "learning_rate": 6.441479701844055e-09, "loss": 0.1852, "step": 54675 }, { "epoch": 0.9504076205044413, "grad_norm": 2.076812590771524, "learning_rate": 6.43697659470277e-09, "loss": 0.237, "step": 54676 }, { "epoch": 0.9504250030419441, "grad_norm": 2.569363549951077, "learning_rate": 6.432475051929509e-09, "loss": 0.1678, "step": 54677 }, { "epoch": 0.9504423855794469, "grad_norm": 2.4439906612175477, "learning_rate": 6.427975073538594e-09, "loss": 0.1945, "step": 54678 }, { "epoch": 0.9504597681169498, "grad_norm": 1.2011423596447421, "learning_rate": 6.423476659544236e-09, "loss": 0.1717, "step": 54679 }, { "epoch": 0.9504771506544525, "grad_norm": 1.9539802720515937, "learning_rate": 6.418979809960756e-09, "loss": 0.23, "step": 54680 }, { "epoch": 0.9504945331919553, "grad_norm": 1.2183738771367707, "learning_rate": 6.414484524802422e-09, "loss": 0.189, "step": 54681 }, { "epoch": 0.9505119157294581, "grad_norm": 1.0485589474694186, "learning_rate": 6.409990804083332e-09, "loss": 0.1396, "step": 54682 }, { "epoch": 0.950529298266961, "grad_norm": 1.08557880795745, "learning_rate": 6.405498647817919e-09, "loss": 0.1186, "step": 54683 }, { "epoch": 0.9505466808044638, "grad_norm": 0.6022971708159375, "learning_rate": 6.40100805602034e-09, "loss": 0.1504, "step": 54684 }, { "epoch": 0.9505640633419666, "grad_norm": 5.934225789637799, "learning_rate": 6.39651902870475e-09, "loss": 0.3356, "step": 54685 }, { "epoch": 0.9505814458794695, "grad_norm": 0.9861413841143328, "learning_rate": 6.39203156588547e-09, "loss": 0.1437, "step": 54686 }, { "epoch": 0.9505988284169723, "grad_norm": 1.9306737205181508, "learning_rate": 6.3875456675767656e-09, "loss": 0.161, "step": 54687 }, { "epoch": 0.9506162109544751, "grad_norm": 2.697085628213356, "learning_rate": 6.383061333792739e-09, "loss": 0.1459, "step": 54688 }, { "epoch": 0.950633593491978, "grad_norm": 1.584024754347372, "learning_rate": 6.37857856454771e-09, "loss": 0.2786, "step": 54689 }, { "epoch": 0.9506509760294808, "grad_norm": 1.2080265451961407, "learning_rate": 6.374097359855779e-09, "loss": 0.1016, "step": 54690 }, { "epoch": 0.9506683585669836, "grad_norm": 1.596482668083338, "learning_rate": 6.369617719731268e-09, "loss": 0.1604, "step": 54691 }, { "epoch": 0.9506857411044864, "grad_norm": 1.0174883968015962, "learning_rate": 6.365139644188278e-09, "loss": 0.1223, "step": 54692 }, { "epoch": 0.9507031236419893, "grad_norm": 2.2206383718819707, "learning_rate": 6.360663133240962e-09, "loss": 0.198, "step": 54693 }, { "epoch": 0.9507205061794921, "grad_norm": 1.192690675043267, "learning_rate": 6.356188186903699e-09, "loss": 0.1744, "step": 54694 }, { "epoch": 0.9507378887169949, "grad_norm": 1.2156087990664548, "learning_rate": 6.351714805190533e-09, "loss": 0.1728, "step": 54695 }, { "epoch": 0.9507552712544978, "grad_norm": 0.9449916197553108, "learning_rate": 6.3472429881156755e-09, "loss": 0.1353, "step": 54696 }, { "epoch": 0.9507726537920006, "grad_norm": 1.2886128795632745, "learning_rate": 6.342772735693224e-09, "loss": 0.2142, "step": 54697 }, { "epoch": 0.9507900363295034, "grad_norm": 1.7385419394871826, "learning_rate": 6.338304047937449e-09, "loss": 0.1759, "step": 54698 }, { "epoch": 0.9508074188670063, "grad_norm": 1.242750226102247, "learning_rate": 6.3338369248625015e-09, "loss": 0.1353, "step": 54699 }, { "epoch": 0.950824801404509, "grad_norm": 1.4669617039731269, "learning_rate": 6.329371366482539e-09, "loss": 0.2056, "step": 54700 }, { "epoch": 0.9508421839420118, "grad_norm": 0.9321096966697129, "learning_rate": 6.324907372811605e-09, "loss": 0.2019, "step": 54701 }, { "epoch": 0.9508595664795146, "grad_norm": 1.6583005446585437, "learning_rate": 6.3204449438640785e-09, "loss": 0.2496, "step": 54702 }, { "epoch": 0.9508769490170175, "grad_norm": 1.027882147066004, "learning_rate": 6.3159840796538914e-09, "loss": 0.1881, "step": 54703 }, { "epoch": 0.9508943315545203, "grad_norm": 1.0034470506650157, "learning_rate": 6.311524780195254e-09, "loss": 0.1864, "step": 54704 }, { "epoch": 0.9509117140920231, "grad_norm": 1.1579967367521358, "learning_rate": 6.307067045502268e-09, "loss": 0.1646, "step": 54705 }, { "epoch": 0.950929096629526, "grad_norm": 1.0635340163341926, "learning_rate": 6.302610875589087e-09, "loss": 0.1653, "step": 54706 }, { "epoch": 0.9509464791670288, "grad_norm": 2.1076937297732203, "learning_rate": 6.298156270469923e-09, "loss": 0.2259, "step": 54707 }, { "epoch": 0.9509638617045316, "grad_norm": 1.4362543604671594, "learning_rate": 6.293703230158764e-09, "loss": 0.2248, "step": 54708 }, { "epoch": 0.9509812442420345, "grad_norm": 1.2319911962855936, "learning_rate": 6.289251754669766e-09, "loss": 0.2022, "step": 54709 }, { "epoch": 0.9509986267795373, "grad_norm": 1.4718548659507773, "learning_rate": 6.284801844017085e-09, "loss": 0.2783, "step": 54710 }, { "epoch": 0.9510160093170401, "grad_norm": 2.0092009648211633, "learning_rate": 6.280353498214763e-09, "loss": 0.1964, "step": 54711 }, { "epoch": 0.9510333918545429, "grad_norm": 1.1748892143785683, "learning_rate": 6.275906717276958e-09, "loss": 0.1309, "step": 54712 }, { "epoch": 0.9510507743920458, "grad_norm": 2.6127354386623862, "learning_rate": 6.2714615012176564e-09, "loss": 0.2408, "step": 54713 }, { "epoch": 0.9510681569295486, "grad_norm": 3.5062001716883584, "learning_rate": 6.267017850051126e-09, "loss": 0.1897, "step": 54714 }, { "epoch": 0.9510855394670514, "grad_norm": 1.1393788284124098, "learning_rate": 6.2625757637913e-09, "loss": 0.2657, "step": 54715 }, { "epoch": 0.9511029220045543, "grad_norm": 1.2719715647549972, "learning_rate": 6.258135242452222e-09, "loss": 0.1936, "step": 54716 }, { "epoch": 0.9511203045420571, "grad_norm": 0.8754083483630589, "learning_rate": 6.25369628604816e-09, "loss": 0.1409, "step": 54717 }, { "epoch": 0.9511376870795599, "grad_norm": 1.9718808377254013, "learning_rate": 6.2492588945930455e-09, "loss": 0.1884, "step": 54718 }, { "epoch": 0.9511550696170628, "grad_norm": 0.8994978360607414, "learning_rate": 6.244823068100924e-09, "loss": 0.0903, "step": 54719 }, { "epoch": 0.9511724521545655, "grad_norm": 4.424198463375645, "learning_rate": 6.24038880658595e-09, "loss": 0.2171, "step": 54720 }, { "epoch": 0.9511898346920683, "grad_norm": 1.2806509750278436, "learning_rate": 6.2359561100621125e-09, "loss": 0.1918, "step": 54721 }, { "epoch": 0.9512072172295711, "grad_norm": 1.5598174110870184, "learning_rate": 6.231524978543457e-09, "loss": 0.2466, "step": 54722 }, { "epoch": 0.951224599767074, "grad_norm": 1.4589843716993351, "learning_rate": 6.227095412044081e-09, "loss": 0.1409, "step": 54723 }, { "epoch": 0.9512419823045768, "grad_norm": 1.3439813766232764, "learning_rate": 6.222667410577975e-09, "loss": 0.2067, "step": 54724 }, { "epoch": 0.9512593648420796, "grad_norm": 1.2712621031705078, "learning_rate": 6.2182409741591834e-09, "loss": 0.2049, "step": 54725 }, { "epoch": 0.9512767473795825, "grad_norm": 1.2581710152807664, "learning_rate": 6.21381610280175e-09, "loss": 0.1553, "step": 54726 }, { "epoch": 0.9512941299170853, "grad_norm": 1.1665738296848267, "learning_rate": 6.209392796519775e-09, "loss": 0.1161, "step": 54727 }, { "epoch": 0.9513115124545881, "grad_norm": 0.9410321128539222, "learning_rate": 6.204971055327079e-09, "loss": 0.1516, "step": 54728 }, { "epoch": 0.951328894992091, "grad_norm": 1.7729818587976935, "learning_rate": 6.20055087923782e-09, "loss": 0.1963, "step": 54729 }, { "epoch": 0.9513462775295938, "grad_norm": 0.8937691313257711, "learning_rate": 6.196132268265986e-09, "loss": 0.0892, "step": 54730 }, { "epoch": 0.9513636600670966, "grad_norm": 1.4576631833296945, "learning_rate": 6.1917152224256195e-09, "loss": 0.1846, "step": 54731 }, { "epoch": 0.9513810426045994, "grad_norm": 3.693532847887432, "learning_rate": 6.187299741730601e-09, "loss": 0.2484, "step": 54732 }, { "epoch": 0.9513984251421023, "grad_norm": 2.244954287342243, "learning_rate": 6.1828858261950835e-09, "loss": 0.1911, "step": 54733 }, { "epoch": 0.9514158076796051, "grad_norm": 0.8957936876559623, "learning_rate": 6.178473475832946e-09, "loss": 0.1769, "step": 54734 }, { "epoch": 0.9514331902171079, "grad_norm": 1.3460085093188612, "learning_rate": 6.174062690658233e-09, "loss": 0.2198, "step": 54735 }, { "epoch": 0.9514505727546108, "grad_norm": 1.0611236982860708, "learning_rate": 6.169653470684821e-09, "loss": 0.2585, "step": 54736 }, { "epoch": 0.9514679552921136, "grad_norm": 1.872143015838426, "learning_rate": 6.165245815926812e-09, "loss": 0.1625, "step": 54737 }, { "epoch": 0.9514853378296164, "grad_norm": 0.7219822049878761, "learning_rate": 6.160839726398082e-09, "loss": 0.2246, "step": 54738 }, { "epoch": 0.9515027203671191, "grad_norm": 1.4550998178346306, "learning_rate": 6.156435202112675e-09, "loss": 0.2454, "step": 54739 }, { "epoch": 0.951520102904622, "grad_norm": 0.839181793920113, "learning_rate": 6.152032243084526e-09, "loss": 0.1106, "step": 54740 }, { "epoch": 0.9515374854421248, "grad_norm": 2.2339296576776233, "learning_rate": 6.147630849327567e-09, "loss": 0.2045, "step": 54741 }, { "epoch": 0.9515548679796276, "grad_norm": 1.0414190262787606, "learning_rate": 6.143231020855788e-09, "loss": 0.2796, "step": 54742 }, { "epoch": 0.9515722505171305, "grad_norm": 1.574552258887137, "learning_rate": 6.138832757683065e-09, "loss": 0.2433, "step": 54743 }, { "epoch": 0.9515896330546333, "grad_norm": 1.2594856668283672, "learning_rate": 6.134436059823389e-09, "loss": 0.1338, "step": 54744 }, { "epoch": 0.9516070155921361, "grad_norm": 1.43527606881398, "learning_rate": 6.130040927290692e-09, "loss": 0.2886, "step": 54745 }, { "epoch": 0.951624398129639, "grad_norm": 2.087873521395261, "learning_rate": 6.1256473600989065e-09, "loss": 0.1509, "step": 54746 }, { "epoch": 0.9516417806671418, "grad_norm": 1.057849841870955, "learning_rate": 6.121255358261968e-09, "loss": 0.1285, "step": 54747 }, { "epoch": 0.9516591632046446, "grad_norm": 1.2040531979519937, "learning_rate": 6.116864921793752e-09, "loss": 0.1351, "step": 54748 }, { "epoch": 0.9516765457421474, "grad_norm": 0.8953821095443546, "learning_rate": 6.112476050708193e-09, "loss": 0.1428, "step": 54749 }, { "epoch": 0.9516939282796503, "grad_norm": 0.8085026927495376, "learning_rate": 6.108088745019224e-09, "loss": 0.1767, "step": 54750 }, { "epoch": 0.9517113108171531, "grad_norm": 1.1360851078689784, "learning_rate": 6.103703004740779e-09, "loss": 0.172, "step": 54751 }, { "epoch": 0.9517286933546559, "grad_norm": 1.0597001365843772, "learning_rate": 6.0993188298866795e-09, "loss": 0.0958, "step": 54752 }, { "epoch": 0.9517460758921588, "grad_norm": 2.004858317241242, "learning_rate": 6.094936220470859e-09, "loss": 0.186, "step": 54753 }, { "epoch": 0.9517634584296616, "grad_norm": 1.2885295359541282, "learning_rate": 6.090555176507195e-09, "loss": 0.1694, "step": 54754 }, { "epoch": 0.9517808409671644, "grad_norm": 2.1455036114380706, "learning_rate": 6.086175698009621e-09, "loss": 0.2903, "step": 54755 }, { "epoch": 0.9517982235046673, "grad_norm": 1.6603208545173571, "learning_rate": 6.0817977849919596e-09, "loss": 0.1471, "step": 54756 }, { "epoch": 0.9518156060421701, "grad_norm": 1.1931950617824978, "learning_rate": 6.077421437468089e-09, "loss": 0.1436, "step": 54757 }, { "epoch": 0.9518329885796729, "grad_norm": 0.9926652819048474, "learning_rate": 6.073046655451941e-09, "loss": 0.1612, "step": 54758 }, { "epoch": 0.9518503711171756, "grad_norm": 1.0339067376257596, "learning_rate": 6.068673438957339e-09, "loss": 0.2336, "step": 54759 }, { "epoch": 0.9518677536546785, "grad_norm": 1.2512398285687698, "learning_rate": 6.06430178799816e-09, "loss": 0.1746, "step": 54760 }, { "epoch": 0.9518851361921813, "grad_norm": 2.7127826753234423, "learning_rate": 6.059931702588284e-09, "loss": 0.182, "step": 54761 }, { "epoch": 0.9519025187296841, "grad_norm": 0.990884914932426, "learning_rate": 6.055563182741474e-09, "loss": 0.1796, "step": 54762 }, { "epoch": 0.951919901267187, "grad_norm": 0.9902150674396573, "learning_rate": 6.051196228471611e-09, "loss": 0.127, "step": 54763 }, { "epoch": 0.9519372838046898, "grad_norm": 1.2047563094072256, "learning_rate": 6.0468308397925716e-09, "loss": 0.227, "step": 54764 }, { "epoch": 0.9519546663421926, "grad_norm": 0.9499610547679354, "learning_rate": 6.042467016718234e-09, "loss": 0.1284, "step": 54765 }, { "epoch": 0.9519720488796954, "grad_norm": 2.329242620169916, "learning_rate": 6.038104759262253e-09, "loss": 0.1502, "step": 54766 }, { "epoch": 0.9519894314171983, "grad_norm": 1.445825595512952, "learning_rate": 6.033744067438673e-09, "loss": 0.1922, "step": 54767 }, { "epoch": 0.9520068139547011, "grad_norm": 1.161301981450372, "learning_rate": 6.029384941261151e-09, "loss": 0.2538, "step": 54768 }, { "epoch": 0.9520241964922039, "grad_norm": 2.111515456922292, "learning_rate": 6.025027380743619e-09, "loss": 0.2133, "step": 54769 }, { "epoch": 0.9520415790297068, "grad_norm": 0.9292390054350231, "learning_rate": 6.020671385899789e-09, "loss": 0.157, "step": 54770 }, { "epoch": 0.9520589615672096, "grad_norm": 2.4068240990218763, "learning_rate": 6.016316956743539e-09, "loss": 0.1638, "step": 54771 }, { "epoch": 0.9520763441047124, "grad_norm": 1.54393075596352, "learning_rate": 6.0119640932886904e-09, "loss": 0.125, "step": 54772 }, { "epoch": 0.9520937266422153, "grad_norm": 1.68177939500072, "learning_rate": 6.007612795548955e-09, "loss": 0.2442, "step": 54773 }, { "epoch": 0.9521111091797181, "grad_norm": 1.7073265623327538, "learning_rate": 6.0032630635381e-09, "loss": 0.2334, "step": 54774 }, { "epoch": 0.9521284917172209, "grad_norm": 0.9180016581337134, "learning_rate": 5.998914897270057e-09, "loss": 0.206, "step": 54775 }, { "epoch": 0.9521458742547237, "grad_norm": 1.4183843822313995, "learning_rate": 5.9945682967585395e-09, "loss": 0.1192, "step": 54776 }, { "epoch": 0.9521632567922266, "grad_norm": 1.2995148760553046, "learning_rate": 5.990223262017258e-09, "loss": 0.0989, "step": 54777 }, { "epoch": 0.9521806393297294, "grad_norm": 1.2259312979253187, "learning_rate": 5.985879793060089e-09, "loss": 0.1702, "step": 54778 }, { "epoch": 0.9521980218672321, "grad_norm": 1.5075181820628332, "learning_rate": 5.981537889900745e-09, "loss": 0.1504, "step": 54779 }, { "epoch": 0.952215404404735, "grad_norm": 1.7673264820794927, "learning_rate": 5.977197552552937e-09, "loss": 0.28, "step": 54780 }, { "epoch": 0.9522327869422378, "grad_norm": 1.7073316683795634, "learning_rate": 5.972858781030543e-09, "loss": 0.1727, "step": 54781 }, { "epoch": 0.9522501694797406, "grad_norm": 0.8489310951234713, "learning_rate": 5.968521575347163e-09, "loss": 0.2517, "step": 54782 }, { "epoch": 0.9522675520172434, "grad_norm": 1.3076489970307275, "learning_rate": 5.96418593551673e-09, "loss": 0.1841, "step": 54783 }, { "epoch": 0.9522849345547463, "grad_norm": 1.0729259372681115, "learning_rate": 5.9598518615528445e-09, "loss": 0.1644, "step": 54784 }, { "epoch": 0.9523023170922491, "grad_norm": 1.104366827702617, "learning_rate": 5.955519353469329e-09, "loss": 0.1631, "step": 54785 }, { "epoch": 0.9523196996297519, "grad_norm": 2.173721828580161, "learning_rate": 5.951188411279784e-09, "loss": 0.3937, "step": 54786 }, { "epoch": 0.9523370821672548, "grad_norm": 1.4790820659844617, "learning_rate": 5.946859034998086e-09, "loss": 0.1869, "step": 54787 }, { "epoch": 0.9523544647047576, "grad_norm": 1.1120592282877413, "learning_rate": 5.942531224637837e-09, "loss": 0.1632, "step": 54788 }, { "epoch": 0.9523718472422604, "grad_norm": 1.7503248603998265, "learning_rate": 5.938204980212858e-09, "loss": 0.221, "step": 54789 }, { "epoch": 0.9523892297797633, "grad_norm": 1.5571937200679091, "learning_rate": 5.9338803017368595e-09, "loss": 0.1987, "step": 54790 }, { "epoch": 0.9524066123172661, "grad_norm": 1.750707774303522, "learning_rate": 5.929557189223444e-09, "loss": 0.1171, "step": 54791 }, { "epoch": 0.9524239948547689, "grad_norm": 2.291196964678231, "learning_rate": 5.925235642686377e-09, "loss": 0.3494, "step": 54792 }, { "epoch": 0.9524413773922717, "grad_norm": 1.1156001707389274, "learning_rate": 5.9209156621393694e-09, "loss": 0.1658, "step": 54793 }, { "epoch": 0.9524587599297746, "grad_norm": 0.9957694524709225, "learning_rate": 5.916597247596078e-09, "loss": 0.1689, "step": 54794 }, { "epoch": 0.9524761424672774, "grad_norm": 1.147002956066817, "learning_rate": 5.912280399070213e-09, "loss": 0.2359, "step": 54795 }, { "epoch": 0.9524935250047802, "grad_norm": 1.3074219137156378, "learning_rate": 5.907965116575486e-09, "loss": 0.2008, "step": 54796 }, { "epoch": 0.9525109075422831, "grad_norm": 1.6561172419065258, "learning_rate": 5.903651400125497e-09, "loss": 0.1286, "step": 54797 }, { "epoch": 0.9525282900797859, "grad_norm": 2.880464174100238, "learning_rate": 5.8993392497340145e-09, "loss": 0.431, "step": 54798 }, { "epoch": 0.9525456726172886, "grad_norm": 1.4244159037795685, "learning_rate": 5.8950286654146365e-09, "loss": 0.2379, "step": 54799 }, { "epoch": 0.9525630551547914, "grad_norm": 0.8289544201540412, "learning_rate": 5.890719647181019e-09, "loss": 0.2601, "step": 54800 }, { "epoch": 0.9525804376922943, "grad_norm": 1.3580855321303233, "learning_rate": 5.886412195046875e-09, "loss": 0.1869, "step": 54801 }, { "epoch": 0.9525978202297971, "grad_norm": 1.6607282116157744, "learning_rate": 5.882106309025803e-09, "loss": 0.1717, "step": 54802 }, { "epoch": 0.9526152027672999, "grad_norm": 1.9838442831474017, "learning_rate": 5.877801989131515e-09, "loss": 0.3285, "step": 54803 }, { "epoch": 0.9526325853048028, "grad_norm": 4.110299552930448, "learning_rate": 5.873499235377555e-09, "loss": 0.1599, "step": 54804 }, { "epoch": 0.9526499678423056, "grad_norm": 0.7776285468963726, "learning_rate": 5.869198047777635e-09, "loss": 0.1947, "step": 54805 }, { "epoch": 0.9526673503798084, "grad_norm": 1.5529934293283343, "learning_rate": 5.86489842634541e-09, "loss": 0.2645, "step": 54806 }, { "epoch": 0.9526847329173113, "grad_norm": 2.1145873575826832, "learning_rate": 5.860600371094426e-09, "loss": 0.173, "step": 54807 }, { "epoch": 0.9527021154548141, "grad_norm": 1.065223332004211, "learning_rate": 5.856303882038394e-09, "loss": 0.1793, "step": 54808 }, { "epoch": 0.9527194979923169, "grad_norm": 1.1735185750916521, "learning_rate": 5.852008959190857e-09, "loss": 0.2055, "step": 54809 }, { "epoch": 0.9527368805298198, "grad_norm": 1.7058374624374457, "learning_rate": 5.8477156025654725e-09, "loss": 0.1513, "step": 54810 }, { "epoch": 0.9527542630673226, "grad_norm": 1.3921687467669501, "learning_rate": 5.8434238121758406e-09, "loss": 0.1888, "step": 54811 }, { "epoch": 0.9527716456048254, "grad_norm": 1.2634821143288528, "learning_rate": 5.839133588035561e-09, "loss": 0.1809, "step": 54812 }, { "epoch": 0.9527890281423282, "grad_norm": 1.4761741631311414, "learning_rate": 5.834844930158178e-09, "loss": 0.1883, "step": 54813 }, { "epoch": 0.9528064106798311, "grad_norm": 1.9993190477015723, "learning_rate": 5.830557838557348e-09, "loss": 0.1848, "step": 54814 }, { "epoch": 0.9528237932173339, "grad_norm": 4.805799913495466, "learning_rate": 5.826272313246672e-09, "loss": 0.2331, "step": 54815 }, { "epoch": 0.9528411757548367, "grad_norm": 1.2359475610754578, "learning_rate": 5.8219883542397485e-09, "loss": 0.1739, "step": 54816 }, { "epoch": 0.9528585582923396, "grad_norm": 1.6815774214405312, "learning_rate": 5.817705961550068e-09, "loss": 0.2174, "step": 54817 }, { "epoch": 0.9528759408298424, "grad_norm": 1.1844151749683662, "learning_rate": 5.81342513519123e-09, "loss": 0.2064, "step": 54818 }, { "epoch": 0.9528933233673451, "grad_norm": 1.5704087764382961, "learning_rate": 5.809145875176835e-09, "loss": 0.1646, "step": 54819 }, { "epoch": 0.9529107059048479, "grad_norm": 1.9774156579939464, "learning_rate": 5.804868181520484e-09, "loss": 0.1717, "step": 54820 }, { "epoch": 0.9529280884423508, "grad_norm": 0.8400471965414137, "learning_rate": 5.800592054235609e-09, "loss": 0.2382, "step": 54821 }, { "epoch": 0.9529454709798536, "grad_norm": 0.5564000443563465, "learning_rate": 5.7963174933359225e-09, "loss": 0.0977, "step": 54822 }, { "epoch": 0.9529628535173564, "grad_norm": 1.2215113514182283, "learning_rate": 5.792044498834858e-09, "loss": 0.1456, "step": 54823 }, { "epoch": 0.9529802360548593, "grad_norm": 1.4404512091500377, "learning_rate": 5.78777307074596e-09, "loss": 0.1875, "step": 54824 }, { "epoch": 0.9529976185923621, "grad_norm": 1.4083386404171199, "learning_rate": 5.78350320908283e-09, "loss": 0.2243, "step": 54825 }, { "epoch": 0.9530150011298649, "grad_norm": 2.326569291138035, "learning_rate": 5.779234913858955e-09, "loss": 0.205, "step": 54826 }, { "epoch": 0.9530323836673678, "grad_norm": 0.8172616634134334, "learning_rate": 5.774968185087881e-09, "loss": 0.1587, "step": 54827 }, { "epoch": 0.9530497662048706, "grad_norm": 0.6253427382065656, "learning_rate": 5.770703022783152e-09, "loss": 0.1286, "step": 54828 }, { "epoch": 0.9530671487423734, "grad_norm": 1.3851416628967672, "learning_rate": 5.766439426958258e-09, "loss": 0.1922, "step": 54829 }, { "epoch": 0.9530845312798762, "grad_norm": 1.1968499031510766, "learning_rate": 5.762177397626688e-09, "loss": 0.2081, "step": 54830 }, { "epoch": 0.9531019138173791, "grad_norm": 1.196949294267285, "learning_rate": 5.7579169348019874e-09, "loss": 0.1812, "step": 54831 }, { "epoch": 0.9531192963548819, "grad_norm": 0.962296838233832, "learning_rate": 5.7536580384976995e-09, "loss": 0.2377, "step": 54832 }, { "epoch": 0.9531366788923847, "grad_norm": 1.8403651663126956, "learning_rate": 5.749400708727203e-09, "loss": 0.1565, "step": 54833 }, { "epoch": 0.9531540614298876, "grad_norm": 1.2229942839331305, "learning_rate": 5.7451449455041545e-09, "loss": 0.1224, "step": 54834 }, { "epoch": 0.9531714439673904, "grad_norm": 1.6559882680381484, "learning_rate": 5.740890748841931e-09, "loss": 0.2113, "step": 54835 }, { "epoch": 0.9531888265048932, "grad_norm": 1.6936862796511238, "learning_rate": 5.736638118753967e-09, "loss": 0.1861, "step": 54836 }, { "epoch": 0.9532062090423961, "grad_norm": 2.2000191772761815, "learning_rate": 5.732387055253862e-09, "loss": 0.1694, "step": 54837 }, { "epoch": 0.9532235915798989, "grad_norm": 1.2755399183701228, "learning_rate": 5.72813755835505e-09, "loss": 0.1762, "step": 54838 }, { "epoch": 0.9532409741174016, "grad_norm": 1.233436628134618, "learning_rate": 5.723889628071021e-09, "loss": 0.1138, "step": 54839 }, { "epoch": 0.9532583566549044, "grad_norm": 1.7173862639704192, "learning_rate": 5.719643264415153e-09, "loss": 0.195, "step": 54840 }, { "epoch": 0.9532757391924073, "grad_norm": 2.0934459910800123, "learning_rate": 5.715398467400989e-09, "loss": 0.2173, "step": 54841 }, { "epoch": 0.9532931217299101, "grad_norm": 1.4021523618550735, "learning_rate": 5.711155237041965e-09, "loss": 0.2176, "step": 54842 }, { "epoch": 0.9533105042674129, "grad_norm": 1.5552741438944009, "learning_rate": 5.706913573351513e-09, "loss": 0.1039, "step": 54843 }, { "epoch": 0.9533278868049158, "grad_norm": 1.3222023027683973, "learning_rate": 5.702673476343067e-09, "loss": 0.1262, "step": 54844 }, { "epoch": 0.9533452693424186, "grad_norm": 1.842146232723918, "learning_rate": 5.698434946030062e-09, "loss": 0.0896, "step": 54845 }, { "epoch": 0.9533626518799214, "grad_norm": 1.7600442183875222, "learning_rate": 5.6941979824260415e-09, "loss": 0.2237, "step": 54846 }, { "epoch": 0.9533800344174242, "grad_norm": 1.3768459192007596, "learning_rate": 5.689962585544328e-09, "loss": 0.1779, "step": 54847 }, { "epoch": 0.9533974169549271, "grad_norm": 1.8299800337747418, "learning_rate": 5.685728755398301e-09, "loss": 0.1237, "step": 54848 }, { "epoch": 0.9534147994924299, "grad_norm": 2.945504560048597, "learning_rate": 5.681496492001447e-09, "loss": 0.1428, "step": 54849 }, { "epoch": 0.9534321820299327, "grad_norm": 1.2564760348739301, "learning_rate": 5.677265795367259e-09, "loss": 0.3135, "step": 54850 }, { "epoch": 0.9534495645674356, "grad_norm": 1.3454993059099196, "learning_rate": 5.673036665509001e-09, "loss": 0.1619, "step": 54851 }, { "epoch": 0.9534669471049384, "grad_norm": 1.174275871027962, "learning_rate": 5.668809102440109e-09, "loss": 0.1973, "step": 54852 }, { "epoch": 0.9534843296424412, "grad_norm": 2.15800114697632, "learning_rate": 5.664583106174126e-09, "loss": 0.1503, "step": 54853 }, { "epoch": 0.9535017121799441, "grad_norm": 2.270876387087357, "learning_rate": 5.660358676724264e-09, "loss": 0.1948, "step": 54854 }, { "epoch": 0.9535190947174469, "grad_norm": 1.2911940921293958, "learning_rate": 5.656135814104012e-09, "loss": 0.2084, "step": 54855 }, { "epoch": 0.9535364772549497, "grad_norm": 1.0726790322399058, "learning_rate": 5.651914518326639e-09, "loss": 0.1695, "step": 54856 }, { "epoch": 0.9535538597924526, "grad_norm": 1.0829489759876103, "learning_rate": 5.647694789405688e-09, "loss": 0.0861, "step": 54857 }, { "epoch": 0.9535712423299554, "grad_norm": 1.7131886417127995, "learning_rate": 5.643476627354482e-09, "loss": 0.2693, "step": 54858 }, { "epoch": 0.9535886248674581, "grad_norm": 1.384034720429359, "learning_rate": 5.6392600321862886e-09, "loss": 0.1303, "step": 54859 }, { "epoch": 0.9536060074049609, "grad_norm": 0.8348251373292739, "learning_rate": 5.635045003914651e-09, "loss": 0.2156, "step": 54860 }, { "epoch": 0.9536233899424638, "grad_norm": 1.3779800105362259, "learning_rate": 5.630831542552728e-09, "loss": 0.1433, "step": 54861 }, { "epoch": 0.9536407724799666, "grad_norm": 1.0819526540538449, "learning_rate": 5.626619648114061e-09, "loss": 0.1361, "step": 54862 }, { "epoch": 0.9536581550174694, "grad_norm": 1.6222349094508128, "learning_rate": 5.622409320611809e-09, "loss": 0.2545, "step": 54863 }, { "epoch": 0.9536755375549723, "grad_norm": 0.8163579922774261, "learning_rate": 5.618200560059516e-09, "loss": 0.1005, "step": 54864 }, { "epoch": 0.9536929200924751, "grad_norm": 1.8875771047754075, "learning_rate": 5.613993366470393e-09, "loss": 0.2095, "step": 54865 }, { "epoch": 0.9537103026299779, "grad_norm": 1.6832389216097283, "learning_rate": 5.609787739857819e-09, "loss": 0.1758, "step": 54866 }, { "epoch": 0.9537276851674807, "grad_norm": 2.8157679924692363, "learning_rate": 5.60558368023506e-09, "loss": 0.329, "step": 54867 }, { "epoch": 0.9537450677049836, "grad_norm": 0.7662489910668167, "learning_rate": 5.60138118761555e-09, "loss": 0.1954, "step": 54868 }, { "epoch": 0.9537624502424864, "grad_norm": 1.0787988200027459, "learning_rate": 5.5971802620125575e-09, "loss": 0.1308, "step": 54869 }, { "epoch": 0.9537798327799892, "grad_norm": 1.627582616375288, "learning_rate": 5.5929809034394035e-09, "loss": 0.1559, "step": 54870 }, { "epoch": 0.9537972153174921, "grad_norm": 2.1788832110762857, "learning_rate": 5.5887831119093006e-09, "loss": 0.177, "step": 54871 }, { "epoch": 0.9538145978549949, "grad_norm": 1.4328536376285488, "learning_rate": 5.5845868874357385e-09, "loss": 0.2338, "step": 54872 }, { "epoch": 0.9538319803924977, "grad_norm": 1.9445358914182638, "learning_rate": 5.580392230031872e-09, "loss": 0.2386, "step": 54873 }, { "epoch": 0.9538493629300006, "grad_norm": 1.3896994987057771, "learning_rate": 5.57619913971108e-09, "loss": 0.2781, "step": 54874 }, { "epoch": 0.9538667454675034, "grad_norm": 1.491985561056485, "learning_rate": 5.572007616486573e-09, "loss": 0.133, "step": 54875 }, { "epoch": 0.9538841280050062, "grad_norm": 0.7739024834820741, "learning_rate": 5.5678176603716765e-09, "loss": 0.2526, "step": 54876 }, { "epoch": 0.953901510542509, "grad_norm": 1.561163804772454, "learning_rate": 5.563629271379711e-09, "loss": 0.3602, "step": 54877 }, { "epoch": 0.9539188930800118, "grad_norm": 1.3414892469005, "learning_rate": 5.5594424495239434e-09, "loss": 0.182, "step": 54878 }, { "epoch": 0.9539362756175146, "grad_norm": 1.5533591126476007, "learning_rate": 5.555257194817531e-09, "loss": 0.1774, "step": 54879 }, { "epoch": 0.9539536581550174, "grad_norm": 6.385523180503655, "learning_rate": 5.551073507273907e-09, "loss": 0.3475, "step": 54880 }, { "epoch": 0.9539710406925203, "grad_norm": 2.131107123869811, "learning_rate": 5.546891386906172e-09, "loss": 0.2644, "step": 54881 }, { "epoch": 0.9539884232300231, "grad_norm": 1.465878712976146, "learning_rate": 5.542710833727704e-09, "loss": 0.2289, "step": 54882 }, { "epoch": 0.9540058057675259, "grad_norm": 1.4287432310752541, "learning_rate": 5.538531847751715e-09, "loss": 0.2287, "step": 54883 }, { "epoch": 0.9540231883050287, "grad_norm": 2.72326957353082, "learning_rate": 5.534354428991417e-09, "loss": 0.1792, "step": 54884 }, { "epoch": 0.9540405708425316, "grad_norm": 0.9914320496170249, "learning_rate": 5.530178577460132e-09, "loss": 0.1442, "step": 54885 }, { "epoch": 0.9540579533800344, "grad_norm": 1.6525238915252256, "learning_rate": 5.5260042931709605e-09, "loss": 0.0991, "step": 54886 }, { "epoch": 0.9540753359175372, "grad_norm": 1.28745448647001, "learning_rate": 5.5218315761372255e-09, "loss": 0.1509, "step": 54887 }, { "epoch": 0.9540927184550401, "grad_norm": 2.02437427354401, "learning_rate": 5.517660426372195e-09, "loss": 0.2278, "step": 54888 }, { "epoch": 0.9541101009925429, "grad_norm": 1.5217351647886013, "learning_rate": 5.513490843888968e-09, "loss": 0.3754, "step": 54889 }, { "epoch": 0.9541274835300457, "grad_norm": 1.0943559196216006, "learning_rate": 5.509322828700869e-09, "loss": 0.1753, "step": 54890 }, { "epoch": 0.9541448660675486, "grad_norm": 1.5148269009919026, "learning_rate": 5.505156380821052e-09, "loss": 0.199, "step": 54891 }, { "epoch": 0.9541622486050514, "grad_norm": 1.0832251368202221, "learning_rate": 5.500991500262786e-09, "loss": 0.1676, "step": 54892 }, { "epoch": 0.9541796311425542, "grad_norm": 1.6438983023600056, "learning_rate": 5.496828187039171e-09, "loss": 0.1863, "step": 54893 }, { "epoch": 0.954197013680057, "grad_norm": 0.9950622755254416, "learning_rate": 5.4926664411634184e-09, "loss": 0.1723, "step": 54894 }, { "epoch": 0.9542143962175599, "grad_norm": 1.5839916508044503, "learning_rate": 5.488506262648795e-09, "loss": 0.1832, "step": 54895 }, { "epoch": 0.9542317787550627, "grad_norm": 1.9227016486023953, "learning_rate": 5.484347651508459e-09, "loss": 0.1794, "step": 54896 }, { "epoch": 0.9542491612925655, "grad_norm": 1.0459237787726097, "learning_rate": 5.480190607755564e-09, "loss": 0.1849, "step": 54897 }, { "epoch": 0.9542665438300683, "grad_norm": 1.0656072105801602, "learning_rate": 5.476035131403322e-09, "loss": 0.1127, "step": 54898 }, { "epoch": 0.9542839263675711, "grad_norm": 0.8938552428418286, "learning_rate": 5.4718812224648915e-09, "loss": 0.1402, "step": 54899 }, { "epoch": 0.9543013089050739, "grad_norm": 0.8958480372333715, "learning_rate": 5.467728880953371e-09, "loss": 0.2137, "step": 54900 }, { "epoch": 0.9543186914425767, "grad_norm": 1.1561067837630743, "learning_rate": 5.463578106882027e-09, "loss": 0.1668, "step": 54901 }, { "epoch": 0.9543360739800796, "grad_norm": 1.9397825462349518, "learning_rate": 5.459428900263963e-09, "loss": 0.1985, "step": 54902 }, { "epoch": 0.9543534565175824, "grad_norm": 1.0303291276450328, "learning_rate": 5.455281261112332e-09, "loss": 0.1147, "step": 54903 }, { "epoch": 0.9543708390550852, "grad_norm": 1.7320939927396886, "learning_rate": 5.451135189440292e-09, "loss": 0.1616, "step": 54904 }, { "epoch": 0.9543882215925881, "grad_norm": 1.4530178970164327, "learning_rate": 5.446990685260999e-09, "loss": 0.184, "step": 54905 }, { "epoch": 0.9544056041300909, "grad_norm": 1.419644901881371, "learning_rate": 5.442847748587498e-09, "loss": 0.1937, "step": 54906 }, { "epoch": 0.9544229866675937, "grad_norm": 0.8277300858383313, "learning_rate": 5.438706379433e-09, "loss": 0.1293, "step": 54907 }, { "epoch": 0.9544403692050966, "grad_norm": 0.7608521471089609, "learning_rate": 5.434566577810662e-09, "loss": 0.1942, "step": 54908 }, { "epoch": 0.9544577517425994, "grad_norm": 2.553902021353141, "learning_rate": 5.430428343733528e-09, "loss": 0.214, "step": 54909 }, { "epoch": 0.9544751342801022, "grad_norm": 0.8361164565877647, "learning_rate": 5.426291677214812e-09, "loss": 0.2763, "step": 54910 }, { "epoch": 0.954492516817605, "grad_norm": 1.423660596629585, "learning_rate": 5.4221565782675e-09, "loss": 0.162, "step": 54911 }, { "epoch": 0.9545098993551079, "grad_norm": 0.7295886510301838, "learning_rate": 5.418023046904807e-09, "loss": 0.1384, "step": 54912 }, { "epoch": 0.9545272818926107, "grad_norm": 1.300252224271929, "learning_rate": 5.413891083139721e-09, "loss": 0.1774, "step": 54913 }, { "epoch": 0.9545446644301135, "grad_norm": 1.1555984652847773, "learning_rate": 5.409760686985454e-09, "loss": 0.2166, "step": 54914 }, { "epoch": 0.9545620469676164, "grad_norm": 1.4406257811614989, "learning_rate": 5.405631858455051e-09, "loss": 0.2382, "step": 54915 }, { "epoch": 0.9545794295051192, "grad_norm": 0.7861862056672325, "learning_rate": 5.401504597561557e-09, "loss": 0.1624, "step": 54916 }, { "epoch": 0.954596812042622, "grad_norm": 2.5976184691264095, "learning_rate": 5.397378904318129e-09, "loss": 0.2084, "step": 54917 }, { "epoch": 0.9546141945801248, "grad_norm": 1.1814231348819373, "learning_rate": 5.3932547787378104e-09, "loss": 0.1952, "step": 54918 }, { "epoch": 0.9546315771176276, "grad_norm": 1.4478769681001362, "learning_rate": 5.3891322208336475e-09, "loss": 0.1516, "step": 54919 }, { "epoch": 0.9546489596551304, "grad_norm": 1.6273670059003003, "learning_rate": 5.385011230618797e-09, "loss": 0.2235, "step": 54920 }, { "epoch": 0.9546663421926332, "grad_norm": 1.363387111494306, "learning_rate": 5.380891808106192e-09, "loss": 0.2158, "step": 54921 }, { "epoch": 0.9546837247301361, "grad_norm": 2.1424803770169443, "learning_rate": 5.376773953308933e-09, "loss": 0.1849, "step": 54922 }, { "epoch": 0.9547011072676389, "grad_norm": 1.3435645490432244, "learning_rate": 5.372657666240121e-09, "loss": 0.1314, "step": 54923 }, { "epoch": 0.9547184898051417, "grad_norm": 1.2328965481467793, "learning_rate": 5.368542946912802e-09, "loss": 0.2046, "step": 54924 }, { "epoch": 0.9547358723426446, "grad_norm": 1.2656218152036671, "learning_rate": 5.364429795339909e-09, "loss": 0.2235, "step": 54925 }, { "epoch": 0.9547532548801474, "grad_norm": 2.1117421462781274, "learning_rate": 5.360318211534598e-09, "loss": 0.2396, "step": 54926 }, { "epoch": 0.9547706374176502, "grad_norm": 1.3253211516203902, "learning_rate": 5.35620819550986e-09, "loss": 0.1453, "step": 54927 }, { "epoch": 0.9547880199551531, "grad_norm": 1.7700733965953306, "learning_rate": 5.352099747278738e-09, "loss": 0.1753, "step": 54928 }, { "epoch": 0.9548054024926559, "grad_norm": 2.5504201389735277, "learning_rate": 5.347992866854223e-09, "loss": 0.1835, "step": 54929 }, { "epoch": 0.9548227850301587, "grad_norm": 1.6218300165471022, "learning_rate": 5.34388755424936e-09, "loss": 0.2234, "step": 54930 }, { "epoch": 0.9548401675676615, "grad_norm": 1.2544487855466167, "learning_rate": 5.339783809477139e-09, "loss": 0.1614, "step": 54931 }, { "epoch": 0.9548575501051644, "grad_norm": 1.7148900973482233, "learning_rate": 5.335681632550548e-09, "loss": 0.1527, "step": 54932 }, { "epoch": 0.9548749326426672, "grad_norm": 2.061028055989344, "learning_rate": 5.331581023482579e-09, "loss": 0.1761, "step": 54933 }, { "epoch": 0.95489231518017, "grad_norm": 3.6413908907208583, "learning_rate": 5.32748198228633e-09, "loss": 0.1715, "step": 54934 }, { "epoch": 0.9549096977176729, "grad_norm": 1.068082831176473, "learning_rate": 5.323384508974738e-09, "loss": 0.1267, "step": 54935 }, { "epoch": 0.9549270802551757, "grad_norm": 1.0549616527643964, "learning_rate": 5.31928860356079e-09, "loss": 0.1261, "step": 54936 }, { "epoch": 0.9549444627926785, "grad_norm": 2.2564774394873397, "learning_rate": 5.315194266057421e-09, "loss": 0.2001, "step": 54937 }, { "epoch": 0.9549618453301812, "grad_norm": 1.3688959476154428, "learning_rate": 5.311101496477677e-09, "loss": 0.1673, "step": 54938 }, { "epoch": 0.9549792278676841, "grad_norm": 1.0584250707844707, "learning_rate": 5.307010294834491e-09, "loss": 0.1831, "step": 54939 }, { "epoch": 0.9549966104051869, "grad_norm": 0.8806515084653048, "learning_rate": 5.302920661140798e-09, "loss": 0.1831, "step": 54940 }, { "epoch": 0.9550139929426897, "grad_norm": 1.2846824449800824, "learning_rate": 5.298832595409641e-09, "loss": 0.1424, "step": 54941 }, { "epoch": 0.9550313754801926, "grad_norm": 1.2463190528224937, "learning_rate": 5.294746097653957e-09, "loss": 0.1693, "step": 54942 }, { "epoch": 0.9550487580176954, "grad_norm": 1.0698762747205532, "learning_rate": 5.290661167886623e-09, "loss": 0.1222, "step": 54943 }, { "epoch": 0.9550661405551982, "grad_norm": 1.5030233622079043, "learning_rate": 5.286577806120685e-09, "loss": 0.2016, "step": 54944 }, { "epoch": 0.9550835230927011, "grad_norm": 0.705423880865257, "learning_rate": 5.28249601236902e-09, "loss": 0.1941, "step": 54945 }, { "epoch": 0.9551009056302039, "grad_norm": 1.087461081673775, "learning_rate": 5.278415786644563e-09, "loss": 0.3317, "step": 54946 }, { "epoch": 0.9551182881677067, "grad_norm": 1.5395801026733027, "learning_rate": 5.274337128960304e-09, "loss": 0.1327, "step": 54947 }, { "epoch": 0.9551356707052095, "grad_norm": 0.9334616222471549, "learning_rate": 5.270260039329178e-09, "loss": 0.1425, "step": 54948 }, { "epoch": 0.9551530532427124, "grad_norm": 1.6249405812204196, "learning_rate": 5.2661845177640055e-09, "loss": 0.3232, "step": 54949 }, { "epoch": 0.9551704357802152, "grad_norm": 1.2950998100954931, "learning_rate": 5.2621105642777775e-09, "loss": 0.1777, "step": 54950 }, { "epoch": 0.955187818317718, "grad_norm": 2.687980200201594, "learning_rate": 5.2580381788834285e-09, "loss": 0.2189, "step": 54951 }, { "epoch": 0.9552052008552209, "grad_norm": 2.377738666190067, "learning_rate": 5.253967361593781e-09, "loss": 0.2352, "step": 54952 }, { "epoch": 0.9552225833927237, "grad_norm": 1.6452654237957491, "learning_rate": 5.249898112421825e-09, "loss": 0.1665, "step": 54953 }, { "epoch": 0.9552399659302265, "grad_norm": 3.1056178674500705, "learning_rate": 5.245830431380438e-09, "loss": 0.3544, "step": 54954 }, { "epoch": 0.9552573484677294, "grad_norm": 2.697572051163647, "learning_rate": 5.2417643184825e-09, "loss": 0.2597, "step": 54955 }, { "epoch": 0.9552747310052322, "grad_norm": 2.0065580864383126, "learning_rate": 5.2376997737408336e-09, "loss": 0.3758, "step": 54956 }, { "epoch": 0.955292113542735, "grad_norm": 2.996408961323425, "learning_rate": 5.233636797168428e-09, "loss": 0.2162, "step": 54957 }, { "epoch": 0.9553094960802377, "grad_norm": 1.2695853026552357, "learning_rate": 5.229575388778107e-09, "loss": 0.1738, "step": 54958 }, { "epoch": 0.9553268786177406, "grad_norm": 2.743292598230414, "learning_rate": 5.225515548582749e-09, "loss": 0.2406, "step": 54959 }, { "epoch": 0.9553442611552434, "grad_norm": 2.5441861788573785, "learning_rate": 5.221457276595232e-09, "loss": 0.2405, "step": 54960 }, { "epoch": 0.9553616436927462, "grad_norm": 1.307134156654518, "learning_rate": 5.2174005728284346e-09, "loss": 0.1638, "step": 54961 }, { "epoch": 0.9553790262302491, "grad_norm": 2.0463924326806033, "learning_rate": 5.21334543729518e-09, "loss": 0.1177, "step": 54962 }, { "epoch": 0.9553964087677519, "grad_norm": 1.1539092801101567, "learning_rate": 5.209291870008348e-09, "loss": 0.2269, "step": 54963 }, { "epoch": 0.9554137913052547, "grad_norm": 1.1369622162429573, "learning_rate": 5.205239870980705e-09, "loss": 0.1175, "step": 54964 }, { "epoch": 0.9554311738427576, "grad_norm": 1.6584012207325758, "learning_rate": 5.201189440225184e-09, "loss": 0.141, "step": 54965 }, { "epoch": 0.9554485563802604, "grad_norm": 1.644801128832767, "learning_rate": 5.197140577754666e-09, "loss": 0.194, "step": 54966 }, { "epoch": 0.9554659389177632, "grad_norm": 1.0783401618545632, "learning_rate": 5.193093283581862e-09, "loss": 0.1854, "step": 54967 }, { "epoch": 0.955483321455266, "grad_norm": 1.3224749498326251, "learning_rate": 5.18904755771965e-09, "loss": 0.2148, "step": 54968 }, { "epoch": 0.9555007039927689, "grad_norm": 1.1672348919084148, "learning_rate": 5.185003400180909e-09, "loss": 0.1268, "step": 54969 }, { "epoch": 0.9555180865302717, "grad_norm": 1.1679301575398449, "learning_rate": 5.180960810978352e-09, "loss": 0.2283, "step": 54970 }, { "epoch": 0.9555354690677745, "grad_norm": 1.252660277937902, "learning_rate": 5.176919790124912e-09, "loss": 0.1395, "step": 54971 }, { "epoch": 0.9555528516052774, "grad_norm": 1.2531870929936244, "learning_rate": 5.1728803376332455e-09, "loss": 0.1802, "step": 54972 }, { "epoch": 0.9555702341427802, "grad_norm": 1.209990622955002, "learning_rate": 5.168842453516342e-09, "loss": 0.1253, "step": 54973 }, { "epoch": 0.955587616680283, "grad_norm": 0.6548035546008589, "learning_rate": 5.164806137786859e-09, "loss": 0.113, "step": 54974 }, { "epoch": 0.9556049992177859, "grad_norm": 1.0045094928567873, "learning_rate": 5.160771390457563e-09, "loss": 0.0887, "step": 54975 }, { "epoch": 0.9556223817552887, "grad_norm": 1.4663857520909462, "learning_rate": 5.156738211541389e-09, "loss": 0.1694, "step": 54976 }, { "epoch": 0.9556397642927915, "grad_norm": 3.273512756007701, "learning_rate": 5.152706601051049e-09, "loss": 0.1655, "step": 54977 }, { "epoch": 0.9556571468302942, "grad_norm": 1.5940892540206462, "learning_rate": 5.14867655899931e-09, "loss": 0.1234, "step": 54978 }, { "epoch": 0.9556745293677971, "grad_norm": 0.8187824094925842, "learning_rate": 5.14464808539894e-09, "loss": 0.2123, "step": 54979 }, { "epoch": 0.9556919119052999, "grad_norm": 1.3838406004491661, "learning_rate": 5.140621180262705e-09, "loss": 0.2946, "step": 54980 }, { "epoch": 0.9557092944428027, "grad_norm": 1.7467145437003115, "learning_rate": 5.136595843603375e-09, "loss": 0.2444, "step": 54981 }, { "epoch": 0.9557266769803056, "grad_norm": 1.4295891577531261, "learning_rate": 5.1325720754337165e-09, "loss": 0.2438, "step": 54982 }, { "epoch": 0.9557440595178084, "grad_norm": 1.3987818671617176, "learning_rate": 5.128549875766497e-09, "loss": 0.1472, "step": 54983 }, { "epoch": 0.9557614420553112, "grad_norm": 1.5592466312440738, "learning_rate": 5.124529244614428e-09, "loss": 0.2416, "step": 54984 }, { "epoch": 0.955778824592814, "grad_norm": 1.134301825420583, "learning_rate": 5.120510181990278e-09, "loss": 0.1052, "step": 54985 }, { "epoch": 0.9557962071303169, "grad_norm": 3.9089720829841843, "learning_rate": 5.116492687906815e-09, "loss": 0.3414, "step": 54986 }, { "epoch": 0.9558135896678197, "grad_norm": 1.8053358671244804, "learning_rate": 5.112476762376694e-09, "loss": 0.1931, "step": 54987 }, { "epoch": 0.9558309722053225, "grad_norm": 0.809478731511403, "learning_rate": 5.108462405412739e-09, "loss": 0.1517, "step": 54988 }, { "epoch": 0.9558483547428254, "grad_norm": 0.7404260845964749, "learning_rate": 5.104449617027606e-09, "loss": 0.1131, "step": 54989 }, { "epoch": 0.9558657372803282, "grad_norm": 1.797416527214391, "learning_rate": 5.100438397234008e-09, "loss": 0.1656, "step": 54990 }, { "epoch": 0.955883119817831, "grad_norm": 0.9085975943599804, "learning_rate": 5.096428746044656e-09, "loss": 0.1995, "step": 54991 }, { "epoch": 0.9559005023553339, "grad_norm": 1.1279931124597598, "learning_rate": 5.0924206634723745e-09, "loss": 0.1702, "step": 54992 }, { "epoch": 0.9559178848928367, "grad_norm": 2.684447111577342, "learning_rate": 5.088414149529708e-09, "loss": 0.2033, "step": 54993 }, { "epoch": 0.9559352674303395, "grad_norm": 1.2436412856904582, "learning_rate": 5.084409204229479e-09, "loss": 0.2579, "step": 54994 }, { "epoch": 0.9559526499678423, "grad_norm": 0.9695860326052629, "learning_rate": 5.0804058275842336e-09, "loss": 0.2427, "step": 54995 }, { "epoch": 0.9559700325053452, "grad_norm": 4.362862306097159, "learning_rate": 5.076404019606851e-09, "loss": 0.2869, "step": 54996 }, { "epoch": 0.955987415042848, "grad_norm": 2.146393671486814, "learning_rate": 5.072403780309875e-09, "loss": 0.2146, "step": 54997 }, { "epoch": 0.9560047975803507, "grad_norm": 2.1601446504655946, "learning_rate": 5.06840510970602e-09, "loss": 0.173, "step": 54998 }, { "epoch": 0.9560221801178536, "grad_norm": 1.1779595759716026, "learning_rate": 5.064408007807996e-09, "loss": 0.1789, "step": 54999 }, { "epoch": 0.9560395626553564, "grad_norm": 1.4271086754444577, "learning_rate": 5.060412474628462e-09, "loss": 0.1546, "step": 55000 }, { "epoch": 0.9560569451928592, "grad_norm": 1.2675817321711749, "learning_rate": 5.056418510180016e-09, "loss": 0.2315, "step": 55001 }, { "epoch": 0.956074327730362, "grad_norm": 1.6417491631440193, "learning_rate": 5.0524261144753725e-09, "loss": 0.1231, "step": 55002 }, { "epoch": 0.9560917102678649, "grad_norm": 1.6520475740848102, "learning_rate": 5.048435287527131e-09, "loss": 0.2049, "step": 55003 }, { "epoch": 0.9561090928053677, "grad_norm": 0.9525162219078629, "learning_rate": 5.04444602934806e-09, "loss": 0.1978, "step": 55004 }, { "epoch": 0.9561264753428705, "grad_norm": 1.4729643800346863, "learning_rate": 5.0404583399507594e-09, "loss": 0.2592, "step": 55005 }, { "epoch": 0.9561438578803734, "grad_norm": 1.60316086938225, "learning_rate": 5.036472219347776e-09, "loss": 0.0724, "step": 55006 }, { "epoch": 0.9561612404178762, "grad_norm": 1.4589118324711716, "learning_rate": 5.032487667551821e-09, "loss": 0.1546, "step": 55007 }, { "epoch": 0.956178622955379, "grad_norm": 0.9563003331728839, "learning_rate": 5.028504684575496e-09, "loss": 0.1433, "step": 55008 }, { "epoch": 0.9561960054928819, "grad_norm": 1.5718811898400238, "learning_rate": 5.024523270431458e-09, "loss": 0.2136, "step": 55009 }, { "epoch": 0.9562133880303847, "grad_norm": 1.3262036523874863, "learning_rate": 5.020543425132362e-09, "loss": 0.1968, "step": 55010 }, { "epoch": 0.9562307705678875, "grad_norm": 1.0335382318187534, "learning_rate": 5.016565148690699e-09, "loss": 0.1361, "step": 55011 }, { "epoch": 0.9562481531053904, "grad_norm": 3.7472104773458534, "learning_rate": 5.012588441119181e-09, "loss": 0.2187, "step": 55012 }, { "epoch": 0.9562655356428932, "grad_norm": 1.0845768919493408, "learning_rate": 5.008613302430354e-09, "loss": 0.1541, "step": 55013 }, { "epoch": 0.956282918180396, "grad_norm": 0.9713444988078241, "learning_rate": 5.004639732636817e-09, "loss": 0.283, "step": 55014 }, { "epoch": 0.9563003007178988, "grad_norm": 1.0507935752191864, "learning_rate": 5.000667731751229e-09, "loss": 0.2257, "step": 55015 }, { "epoch": 0.9563176832554017, "grad_norm": 2.7112171297123067, "learning_rate": 4.996697299786134e-09, "loss": 0.1812, "step": 55016 }, { "epoch": 0.9563350657929044, "grad_norm": 4.239586715798916, "learning_rate": 4.992728436754134e-09, "loss": 0.1972, "step": 55017 }, { "epoch": 0.9563524483304072, "grad_norm": 1.772833671308208, "learning_rate": 4.988761142667774e-09, "loss": 0.1715, "step": 55018 }, { "epoch": 0.95636983086791, "grad_norm": 2.0670092112373015, "learning_rate": 4.984795417539656e-09, "loss": 0.154, "step": 55019 }, { "epoch": 0.9563872134054129, "grad_norm": 1.1281730827802332, "learning_rate": 4.980831261382379e-09, "loss": 0.1765, "step": 55020 }, { "epoch": 0.9564045959429157, "grad_norm": 1.6968310026455955, "learning_rate": 4.976868674208434e-09, "loss": 0.2934, "step": 55021 }, { "epoch": 0.9564219784804185, "grad_norm": 0.9514936450799307, "learning_rate": 4.9729076560304226e-09, "loss": 0.2434, "step": 55022 }, { "epoch": 0.9564393610179214, "grad_norm": 1.259391410266805, "learning_rate": 4.9689482068608905e-09, "loss": 0.2109, "step": 55023 }, { "epoch": 0.9564567435554242, "grad_norm": 1.642595432190602, "learning_rate": 4.9649903267124375e-09, "loss": 0.1781, "step": 55024 }, { "epoch": 0.956474126092927, "grad_norm": 1.4856935953186345, "learning_rate": 4.961034015597554e-09, "loss": 0.1912, "step": 55025 }, { "epoch": 0.9564915086304299, "grad_norm": 1.1386354756196635, "learning_rate": 4.9570792735287304e-09, "loss": 0.2033, "step": 55026 }, { "epoch": 0.9565088911679327, "grad_norm": 1.6052812681324213, "learning_rate": 4.9531261005186235e-09, "loss": 0.2472, "step": 55027 }, { "epoch": 0.9565262737054355, "grad_norm": 0.9275405839363071, "learning_rate": 4.949174496579722e-09, "loss": 0.1473, "step": 55028 }, { "epoch": 0.9565436562429384, "grad_norm": 1.7774792819326433, "learning_rate": 4.945224461724462e-09, "loss": 0.2355, "step": 55029 }, { "epoch": 0.9565610387804412, "grad_norm": 1.0316727929413871, "learning_rate": 4.941275995965499e-09, "loss": 0.2391, "step": 55030 }, { "epoch": 0.956578421317944, "grad_norm": 2.034573153271601, "learning_rate": 4.937329099315212e-09, "loss": 0.191, "step": 55031 }, { "epoch": 0.9565958038554468, "grad_norm": 2.090606312996844, "learning_rate": 4.933383771786204e-09, "loss": 0.3167, "step": 55032 }, { "epoch": 0.9566131863929497, "grad_norm": 1.352763135131529, "learning_rate": 4.929440013390962e-09, "loss": 0.2107, "step": 55033 }, { "epoch": 0.9566305689304525, "grad_norm": 1.2978109714438735, "learning_rate": 4.925497824141978e-09, "loss": 0.277, "step": 55034 }, { "epoch": 0.9566479514679553, "grad_norm": 1.3032351838263907, "learning_rate": 4.92155720405174e-09, "loss": 0.1771, "step": 55035 }, { "epoch": 0.9566653340054582, "grad_norm": 1.6945424707333188, "learning_rate": 4.917618153132741e-09, "loss": 0.1628, "step": 55036 }, { "epoch": 0.9566827165429609, "grad_norm": 2.635328107362223, "learning_rate": 4.9136806713974685e-09, "loss": 0.2187, "step": 55037 }, { "epoch": 0.9567000990804637, "grad_norm": 1.1515852132817752, "learning_rate": 4.909744758858414e-09, "loss": 0.1845, "step": 55038 }, { "epoch": 0.9567174816179665, "grad_norm": 1.1861183905585118, "learning_rate": 4.905810415528066e-09, "loss": 0.138, "step": 55039 }, { "epoch": 0.9567348641554694, "grad_norm": 0.8357357126998636, "learning_rate": 4.901877641418861e-09, "loss": 0.0914, "step": 55040 }, { "epoch": 0.9567522466929722, "grad_norm": 1.2669454966002218, "learning_rate": 4.897946436543232e-09, "loss": 0.2082, "step": 55041 }, { "epoch": 0.956769629230475, "grad_norm": 1.1009092023812157, "learning_rate": 4.8940168009137255e-09, "loss": 0.1488, "step": 55042 }, { "epoch": 0.9567870117679779, "grad_norm": 2.0863933398050634, "learning_rate": 4.890088734542719e-09, "loss": 0.2556, "step": 55043 }, { "epoch": 0.9568043943054807, "grad_norm": 1.0597408411202835, "learning_rate": 4.886162237442759e-09, "loss": 0.1015, "step": 55044 }, { "epoch": 0.9568217768429835, "grad_norm": 0.9209006603600478, "learning_rate": 4.8822373096261695e-09, "loss": 0.2392, "step": 55045 }, { "epoch": 0.9568391593804864, "grad_norm": 1.5504961981837937, "learning_rate": 4.878313951105439e-09, "loss": 0.1247, "step": 55046 }, { "epoch": 0.9568565419179892, "grad_norm": 1.3790864990686198, "learning_rate": 4.874392161893059e-09, "loss": 0.1616, "step": 55047 }, { "epoch": 0.956873924455492, "grad_norm": 1.1278483005789912, "learning_rate": 4.870471942001353e-09, "loss": 0.2167, "step": 55048 }, { "epoch": 0.9568913069929949, "grad_norm": 1.8598288520419792, "learning_rate": 4.866553291442865e-09, "loss": 0.1563, "step": 55049 }, { "epoch": 0.9569086895304977, "grad_norm": 1.6995108271384582, "learning_rate": 4.862636210229976e-09, "loss": 0.3134, "step": 55050 }, { "epoch": 0.9569260720680005, "grad_norm": 1.0242859622373979, "learning_rate": 4.858720698375007e-09, "loss": 0.2488, "step": 55051 }, { "epoch": 0.9569434546055033, "grad_norm": 1.2688991595316375, "learning_rate": 4.854806755890506e-09, "loss": 0.2262, "step": 55052 }, { "epoch": 0.9569608371430062, "grad_norm": 1.3500624351294557, "learning_rate": 4.85089438278874e-09, "loss": 0.1789, "step": 55053 }, { "epoch": 0.956978219680509, "grad_norm": 2.311687043966832, "learning_rate": 4.8469835790822534e-09, "loss": 0.309, "step": 55054 }, { "epoch": 0.9569956022180118, "grad_norm": 1.1369962978402743, "learning_rate": 4.843074344783371e-09, "loss": 0.1472, "step": 55055 }, { "epoch": 0.9570129847555147, "grad_norm": 2.4223918333985024, "learning_rate": 4.8391666799044714e-09, "loss": 0.1483, "step": 55056 }, { "epoch": 0.9570303672930174, "grad_norm": 1.4205327396745622, "learning_rate": 4.83526058445799e-09, "loss": 0.1341, "step": 55057 }, { "epoch": 0.9570477498305202, "grad_norm": 1.4329307045306117, "learning_rate": 4.831356058456248e-09, "loss": 0.168, "step": 55058 }, { "epoch": 0.957065132368023, "grad_norm": 0.923301387547151, "learning_rate": 4.827453101911627e-09, "loss": 0.1507, "step": 55059 }, { "epoch": 0.9570825149055259, "grad_norm": 0.9981132941967629, "learning_rate": 4.823551714836504e-09, "loss": 0.1449, "step": 55060 }, { "epoch": 0.9570998974430287, "grad_norm": 1.5469365729336462, "learning_rate": 4.8196518972432595e-09, "loss": 0.3049, "step": 55061 }, { "epoch": 0.9571172799805315, "grad_norm": 1.7424160596147398, "learning_rate": 4.815753649144327e-09, "loss": 0.144, "step": 55062 }, { "epoch": 0.9571346625180344, "grad_norm": 1.5511065332767213, "learning_rate": 4.81185697055192e-09, "loss": 0.2179, "step": 55063 }, { "epoch": 0.9571520450555372, "grad_norm": 0.9757808137993517, "learning_rate": 4.807961861478471e-09, "loss": 0.1042, "step": 55064 }, { "epoch": 0.95716942759304, "grad_norm": 1.311933344772579, "learning_rate": 4.804068321936305e-09, "loss": 0.1806, "step": 55065 }, { "epoch": 0.9571868101305429, "grad_norm": 1.3940514008910156, "learning_rate": 4.800176351937746e-09, "loss": 0.2013, "step": 55066 }, { "epoch": 0.9572041926680457, "grad_norm": 1.490304556717212, "learning_rate": 4.796285951495172e-09, "loss": 0.1813, "step": 55067 }, { "epoch": 0.9572215752055485, "grad_norm": 1.2322461590760445, "learning_rate": 4.7923971206209055e-09, "loss": 0.2914, "step": 55068 }, { "epoch": 0.9572389577430513, "grad_norm": 2.2737969575124097, "learning_rate": 4.788509859327272e-09, "loss": 0.1767, "step": 55069 }, { "epoch": 0.9572563402805542, "grad_norm": 2.1590437652501513, "learning_rate": 4.7846241676265385e-09, "loss": 0.1869, "step": 55070 }, { "epoch": 0.957273722818057, "grad_norm": 1.0086705650037073, "learning_rate": 4.78074004553114e-09, "loss": 0.1498, "step": 55071 }, { "epoch": 0.9572911053555598, "grad_norm": 1.286689288817903, "learning_rate": 4.776857493053177e-09, "loss": 0.1531, "step": 55072 }, { "epoch": 0.9573084878930627, "grad_norm": 1.9003302786089122, "learning_rate": 4.772976510205195e-09, "loss": 0.1663, "step": 55073 }, { "epoch": 0.9573258704305655, "grad_norm": 0.7891855174983448, "learning_rate": 4.769097096999408e-09, "loss": 0.1566, "step": 55074 }, { "epoch": 0.9573432529680683, "grad_norm": 1.4845395592871269, "learning_rate": 4.7652192534480275e-09, "loss": 0.1357, "step": 55075 }, { "epoch": 0.9573606355055712, "grad_norm": 2.0478160171523236, "learning_rate": 4.7613429795634315e-09, "loss": 0.1447, "step": 55076 }, { "epoch": 0.9573780180430739, "grad_norm": 1.8445006564938098, "learning_rate": 4.757468275357834e-09, "loss": 0.2715, "step": 55077 }, { "epoch": 0.9573954005805767, "grad_norm": 2.2287331175237397, "learning_rate": 4.753595140843669e-09, "loss": 0.1545, "step": 55078 }, { "epoch": 0.9574127831180795, "grad_norm": 1.5835505479239125, "learning_rate": 4.749723576033038e-09, "loss": 0.2584, "step": 55079 }, { "epoch": 0.9574301656555824, "grad_norm": 1.6022884999984148, "learning_rate": 4.7458535809382635e-09, "loss": 0.2802, "step": 55080 }, { "epoch": 0.9574475481930852, "grad_norm": 1.4016384621224163, "learning_rate": 4.741985155571726e-09, "loss": 0.15, "step": 55081 }, { "epoch": 0.957464930730588, "grad_norm": 0.950917003323505, "learning_rate": 4.738118299945526e-09, "loss": 0.2005, "step": 55082 }, { "epoch": 0.9574823132680909, "grad_norm": 2.2318426426609066, "learning_rate": 4.7342530140719875e-09, "loss": 0.2435, "step": 55083 }, { "epoch": 0.9574996958055937, "grad_norm": 1.3921620010725044, "learning_rate": 4.730389297963322e-09, "loss": 0.1091, "step": 55084 }, { "epoch": 0.9575170783430965, "grad_norm": 1.3055501930675821, "learning_rate": 4.72652715163191e-09, "loss": 0.2401, "step": 55085 }, { "epoch": 0.9575344608805993, "grad_norm": 0.9036637688102113, "learning_rate": 4.722666575089851e-09, "loss": 0.143, "step": 55086 }, { "epoch": 0.9575518434181022, "grad_norm": 0.9729584388225503, "learning_rate": 4.718807568349414e-09, "loss": 0.1863, "step": 55087 }, { "epoch": 0.957569225955605, "grad_norm": 1.1905277030511947, "learning_rate": 4.714950131422868e-09, "loss": 0.1239, "step": 55088 }, { "epoch": 0.9575866084931078, "grad_norm": 0.7525434392021414, "learning_rate": 4.711094264322369e-09, "loss": 0.1487, "step": 55089 }, { "epoch": 0.9576039910306107, "grad_norm": 1.6379834510468068, "learning_rate": 4.7072399670602394e-09, "loss": 0.1373, "step": 55090 }, { "epoch": 0.9576213735681135, "grad_norm": 1.5763791491602839, "learning_rate": 4.703387239648582e-09, "loss": 0.1908, "step": 55091 }, { "epoch": 0.9576387561056163, "grad_norm": 6.6517518733655, "learning_rate": 4.69953608209972e-09, "loss": 0.2511, "step": 55092 }, { "epoch": 0.9576561386431192, "grad_norm": 4.475058488491033, "learning_rate": 4.69568649442581e-09, "loss": 0.2062, "step": 55093 }, { "epoch": 0.957673521180622, "grad_norm": 2.8518364026668923, "learning_rate": 4.69183847663901e-09, "loss": 0.1963, "step": 55094 }, { "epoch": 0.9576909037181248, "grad_norm": 1.2184248605615517, "learning_rate": 4.68799202875153e-09, "loss": 0.1394, "step": 55095 }, { "epoch": 0.9577082862556276, "grad_norm": 1.2477795126811915, "learning_rate": 4.684147150775641e-09, "loss": 0.1612, "step": 55096 }, { "epoch": 0.9577256687931304, "grad_norm": 0.9812378801293427, "learning_rate": 4.6803038427234986e-09, "loss": 0.141, "step": 55097 }, { "epoch": 0.9577430513306332, "grad_norm": 1.2578936301869184, "learning_rate": 4.676462104607204e-09, "loss": 0.1245, "step": 55098 }, { "epoch": 0.957760433868136, "grad_norm": 1.4583285487601945, "learning_rate": 4.672621936439025e-09, "loss": 0.2269, "step": 55099 }, { "epoch": 0.9577778164056389, "grad_norm": 0.980072403053305, "learning_rate": 4.668783338231119e-09, "loss": 0.1303, "step": 55100 }, { "epoch": 0.9577951989431417, "grad_norm": 2.59442486137073, "learning_rate": 4.664946309995588e-09, "loss": 0.2869, "step": 55101 }, { "epoch": 0.9578125814806445, "grad_norm": 3.670435841494511, "learning_rate": 4.661110851744643e-09, "loss": 0.3399, "step": 55102 }, { "epoch": 0.9578299640181474, "grad_norm": 1.162959191657654, "learning_rate": 4.657276963490442e-09, "loss": 0.1382, "step": 55103 }, { "epoch": 0.9578473465556502, "grad_norm": 1.897962359919109, "learning_rate": 4.6534446452451415e-09, "loss": 0.1814, "step": 55104 }, { "epoch": 0.957864729093153, "grad_norm": 1.030559160502815, "learning_rate": 4.649613897020843e-09, "loss": 0.128, "step": 55105 }, { "epoch": 0.9578821116306558, "grad_norm": 1.3188069269573746, "learning_rate": 4.645784718829815e-09, "loss": 0.1924, "step": 55106 }, { "epoch": 0.9578994941681587, "grad_norm": 1.5641202146415063, "learning_rate": 4.641957110683992e-09, "loss": 0.2219, "step": 55107 }, { "epoch": 0.9579168767056615, "grad_norm": 0.6539558145709743, "learning_rate": 4.638131072595697e-09, "loss": 0.1538, "step": 55108 }, { "epoch": 0.9579342592431643, "grad_norm": 1.2503944182324835, "learning_rate": 4.634306604576921e-09, "loss": 0.1362, "step": 55109 }, { "epoch": 0.9579516417806672, "grad_norm": 1.000497133417333, "learning_rate": 4.630483706639876e-09, "loss": 0.1308, "step": 55110 }, { "epoch": 0.95796902431817, "grad_norm": 1.4764911737482969, "learning_rate": 4.626662378796609e-09, "loss": 0.1403, "step": 55111 }, { "epoch": 0.9579864068556728, "grad_norm": 0.9036756031442358, "learning_rate": 4.622842621059331e-09, "loss": 0.1855, "step": 55112 }, { "epoch": 0.9580037893931757, "grad_norm": 1.1509658909508385, "learning_rate": 4.619024433440033e-09, "loss": 0.1025, "step": 55113 }, { "epoch": 0.9580211719306785, "grad_norm": 1.9355076908909077, "learning_rate": 4.615207815950872e-09, "loss": 0.1853, "step": 55114 }, { "epoch": 0.9580385544681813, "grad_norm": 4.004495324793826, "learning_rate": 4.61139276860395e-09, "loss": 0.2253, "step": 55115 }, { "epoch": 0.9580559370056841, "grad_norm": 1.2092197074273532, "learning_rate": 4.607579291411312e-09, "loss": 0.2431, "step": 55116 }, { "epoch": 0.9580733195431869, "grad_norm": 1.110899610252981, "learning_rate": 4.603767384385115e-09, "loss": 0.1961, "step": 55117 }, { "epoch": 0.9580907020806897, "grad_norm": 0.7786311329361904, "learning_rate": 4.599957047537462e-09, "loss": 0.1737, "step": 55118 }, { "epoch": 0.9581080846181925, "grad_norm": 4.458166814832492, "learning_rate": 4.5961482808802855e-09, "loss": 0.2311, "step": 55119 }, { "epoch": 0.9581254671556954, "grad_norm": 1.5527169265772558, "learning_rate": 4.592341084425799e-09, "loss": 0.1883, "step": 55120 }, { "epoch": 0.9581428496931982, "grad_norm": 1.6461525920843152, "learning_rate": 4.588535458186049e-09, "loss": 0.1751, "step": 55121 }, { "epoch": 0.958160232230701, "grad_norm": 1.559669082280476, "learning_rate": 4.5847314021730256e-09, "loss": 0.1347, "step": 55122 }, { "epoch": 0.9581776147682038, "grad_norm": 2.853223822458936, "learning_rate": 4.580928916398774e-09, "loss": 0.4561, "step": 55123 }, { "epoch": 0.9581949973057067, "grad_norm": 0.920126473187877, "learning_rate": 4.5771280008755075e-09, "loss": 0.1239, "step": 55124 }, { "epoch": 0.9582123798432095, "grad_norm": 1.1766402418747555, "learning_rate": 4.573328655615105e-09, "loss": 0.1478, "step": 55125 }, { "epoch": 0.9582297623807123, "grad_norm": 1.1687270546758992, "learning_rate": 4.569530880629668e-09, "loss": 0.106, "step": 55126 }, { "epoch": 0.9582471449182152, "grad_norm": 1.7937697938042443, "learning_rate": 4.565734675931243e-09, "loss": 0.1715, "step": 55127 }, { "epoch": 0.958264527455718, "grad_norm": 1.4285290447392252, "learning_rate": 4.5619400415318755e-09, "loss": 0.1659, "step": 55128 }, { "epoch": 0.9582819099932208, "grad_norm": 1.3013982801070032, "learning_rate": 4.5581469774435555e-09, "loss": 0.1831, "step": 55129 }, { "epoch": 0.9582992925307237, "grad_norm": 2.9521621298209855, "learning_rate": 4.554355483678329e-09, "loss": 0.2857, "step": 55130 }, { "epoch": 0.9583166750682265, "grad_norm": 1.1004606499548468, "learning_rate": 4.550565560248243e-09, "loss": 0.1096, "step": 55131 }, { "epoch": 0.9583340576057293, "grad_norm": 1.0904598798059773, "learning_rate": 4.5467772071652314e-09, "loss": 0.1642, "step": 55132 }, { "epoch": 0.9583514401432321, "grad_norm": 1.0333975446176982, "learning_rate": 4.54299042444134e-09, "loss": 0.1607, "step": 55133 }, { "epoch": 0.958368822680735, "grad_norm": 0.9950953515442762, "learning_rate": 4.539205212088559e-09, "loss": 0.1611, "step": 55134 }, { "epoch": 0.9583862052182378, "grad_norm": 1.3098540491085882, "learning_rate": 4.535421570118936e-09, "loss": 0.1267, "step": 55135 }, { "epoch": 0.9584035877557406, "grad_norm": 1.5601298937993275, "learning_rate": 4.531639498544404e-09, "loss": 0.1781, "step": 55136 }, { "epoch": 0.9584209702932434, "grad_norm": 1.550953742090791, "learning_rate": 4.52785899737701e-09, "loss": 0.1337, "step": 55137 }, { "epoch": 0.9584383528307462, "grad_norm": 1.1813540697145886, "learning_rate": 4.524080066628688e-09, "loss": 0.245, "step": 55138 }, { "epoch": 0.958455735368249, "grad_norm": 0.7586978912327818, "learning_rate": 4.520302706311429e-09, "loss": 0.1997, "step": 55139 }, { "epoch": 0.9584731179057518, "grad_norm": 1.152755901599043, "learning_rate": 4.516526916437169e-09, "loss": 0.1088, "step": 55140 }, { "epoch": 0.9584905004432547, "grad_norm": 1.9294289244951721, "learning_rate": 4.512752697017952e-09, "loss": 0.2184, "step": 55141 }, { "epoch": 0.9585078829807575, "grad_norm": 2.2249795579236675, "learning_rate": 4.508980048065658e-09, "loss": 0.1338, "step": 55142 }, { "epoch": 0.9585252655182603, "grad_norm": 1.0770314258148355, "learning_rate": 4.505208969592334e-09, "loss": 0.1446, "step": 55143 }, { "epoch": 0.9585426480557632, "grad_norm": 1.3824159051181348, "learning_rate": 4.501439461609913e-09, "loss": 0.1444, "step": 55144 }, { "epoch": 0.958560030593266, "grad_norm": 1.44400898560598, "learning_rate": 4.4976715241302195e-09, "loss": 0.1371, "step": 55145 }, { "epoch": 0.9585774131307688, "grad_norm": 1.332320507505839, "learning_rate": 4.493905157165356e-09, "loss": 0.1895, "step": 55146 }, { "epoch": 0.9585947956682717, "grad_norm": 1.2173202901621538, "learning_rate": 4.490140360727202e-09, "loss": 0.1519, "step": 55147 }, { "epoch": 0.9586121782057745, "grad_norm": 2.0978450605097, "learning_rate": 4.48637713482769e-09, "loss": 0.1681, "step": 55148 }, { "epoch": 0.9586295607432773, "grad_norm": 3.585551124577886, "learning_rate": 4.482615479478702e-09, "loss": 0.2541, "step": 55149 }, { "epoch": 0.9586469432807802, "grad_norm": 1.0635744655204582, "learning_rate": 4.478855394692227e-09, "loss": 0.1197, "step": 55150 }, { "epoch": 0.958664325818283, "grad_norm": 2.684589650434134, "learning_rate": 4.4750968804801446e-09, "loss": 0.1664, "step": 55151 }, { "epoch": 0.9586817083557858, "grad_norm": 1.6836101485488773, "learning_rate": 4.47133993685439e-09, "loss": 0.2483, "step": 55152 }, { "epoch": 0.9586990908932886, "grad_norm": 0.9634871884446304, "learning_rate": 4.467584563826843e-09, "loss": 0.1007, "step": 55153 }, { "epoch": 0.9587164734307915, "grad_norm": 1.1200972606619939, "learning_rate": 4.463830761409437e-09, "loss": 0.0978, "step": 55154 }, { "epoch": 0.9587338559682943, "grad_norm": 1.3069434585273532, "learning_rate": 4.460078529614053e-09, "loss": 0.1122, "step": 55155 }, { "epoch": 0.958751238505797, "grad_norm": 1.7895551833805825, "learning_rate": 4.456327868452625e-09, "loss": 0.181, "step": 55156 }, { "epoch": 0.9587686210432999, "grad_norm": 0.9674091234141013, "learning_rate": 4.452578777936922e-09, "loss": 0.1787, "step": 55157 }, { "epoch": 0.9587860035808027, "grad_norm": 1.038691977829758, "learning_rate": 4.4488312580789885e-09, "loss": 0.0927, "step": 55158 }, { "epoch": 0.9588033861183055, "grad_norm": 1.4966673723644914, "learning_rate": 4.4450853088905945e-09, "loss": 0.1371, "step": 55159 }, { "epoch": 0.9588207686558083, "grad_norm": 0.8914381709320297, "learning_rate": 4.441340930383619e-09, "loss": 0.0826, "step": 55160 }, { "epoch": 0.9588381511933112, "grad_norm": 1.6323640542705766, "learning_rate": 4.437598122569941e-09, "loss": 0.1789, "step": 55161 }, { "epoch": 0.958855533730814, "grad_norm": 0.7740253652803263, "learning_rate": 4.433856885461495e-09, "loss": 0.1371, "step": 55162 }, { "epoch": 0.9588729162683168, "grad_norm": 0.9478803018469533, "learning_rate": 4.430117219070051e-09, "loss": 0.1494, "step": 55163 }, { "epoch": 0.9588902988058197, "grad_norm": 0.8839456585389143, "learning_rate": 4.426379123407486e-09, "loss": 0.1483, "step": 55164 }, { "epoch": 0.9589076813433225, "grad_norm": 1.11394076321375, "learning_rate": 4.422642598485626e-09, "loss": 0.1468, "step": 55165 }, { "epoch": 0.9589250638808253, "grad_norm": 1.3809412976476327, "learning_rate": 4.4189076443163494e-09, "loss": 0.094, "step": 55166 }, { "epoch": 0.9589424464183282, "grad_norm": 1.297431261716919, "learning_rate": 4.415174260911536e-09, "loss": 0.1799, "step": 55167 }, { "epoch": 0.958959828955831, "grad_norm": 0.8859774623867432, "learning_rate": 4.411442448282898e-09, "loss": 0.1492, "step": 55168 }, { "epoch": 0.9589772114933338, "grad_norm": 1.314789378721679, "learning_rate": 4.407712206442371e-09, "loss": 0.1802, "step": 55169 }, { "epoch": 0.9589945940308366, "grad_norm": 1.050139446192257, "learning_rate": 4.403983535401723e-09, "loss": 0.1052, "step": 55170 }, { "epoch": 0.9590119765683395, "grad_norm": 1.398135285839473, "learning_rate": 4.400256435172833e-09, "loss": 0.146, "step": 55171 }, { "epoch": 0.9590293591058423, "grad_norm": 2.9221053591129458, "learning_rate": 4.396530905767415e-09, "loss": 0.1886, "step": 55172 }, { "epoch": 0.9590467416433451, "grad_norm": 2.1460369327112976, "learning_rate": 4.392806947197347e-09, "loss": 0.1541, "step": 55173 }, { "epoch": 0.959064124180848, "grad_norm": 1.4873526246061362, "learning_rate": 4.389084559474399e-09, "loss": 0.1191, "step": 55174 }, { "epoch": 0.9590815067183508, "grad_norm": 0.99839921545517, "learning_rate": 4.385363742610448e-09, "loss": 0.1418, "step": 55175 }, { "epoch": 0.9590988892558535, "grad_norm": 1.32326917432996, "learning_rate": 4.381644496617154e-09, "loss": 0.1196, "step": 55176 }, { "epoch": 0.9591162717933563, "grad_norm": 1.0326551869195562, "learning_rate": 4.3779268215064505e-09, "loss": 0.1274, "step": 55177 }, { "epoch": 0.9591336543308592, "grad_norm": 1.8452007277196099, "learning_rate": 4.374210717289995e-09, "loss": 0.149, "step": 55178 }, { "epoch": 0.959151036868362, "grad_norm": 1.141348238355594, "learning_rate": 4.370496183979666e-09, "loss": 0.1155, "step": 55179 }, { "epoch": 0.9591684194058648, "grad_norm": 1.2867605099758077, "learning_rate": 4.3667832215871226e-09, "loss": 0.1103, "step": 55180 }, { "epoch": 0.9591858019433677, "grad_norm": 1.6465449717845015, "learning_rate": 4.363071830124243e-09, "loss": 0.2533, "step": 55181 }, { "epoch": 0.9592031844808705, "grad_norm": 2.641699272513103, "learning_rate": 4.359362009602796e-09, "loss": 0.2254, "step": 55182 }, { "epoch": 0.9592205670183733, "grad_norm": 2.501989035227116, "learning_rate": 4.3556537600344386e-09, "loss": 0.218, "step": 55183 }, { "epoch": 0.9592379495558762, "grad_norm": 1.3425392128552134, "learning_rate": 4.3519470814309404e-09, "loss": 0.2671, "step": 55184 }, { "epoch": 0.959255332093379, "grad_norm": 1.0570513079963173, "learning_rate": 4.3482419738041236e-09, "loss": 0.1706, "step": 55185 }, { "epoch": 0.9592727146308818, "grad_norm": 0.7121580226534687, "learning_rate": 4.344538437165701e-09, "loss": 0.1111, "step": 55186 }, { "epoch": 0.9592900971683846, "grad_norm": 1.1632071637729593, "learning_rate": 4.340836471527442e-09, "loss": 0.0962, "step": 55187 }, { "epoch": 0.9593074797058875, "grad_norm": 1.1897421388149412, "learning_rate": 4.337136076901004e-09, "loss": 0.1489, "step": 55188 }, { "epoch": 0.9593248622433903, "grad_norm": 1.8453356106874512, "learning_rate": 4.3334372532981555e-09, "loss": 0.237, "step": 55189 }, { "epoch": 0.9593422447808931, "grad_norm": 1.1406972445007684, "learning_rate": 4.329740000730664e-09, "loss": 0.2282, "step": 55190 }, { "epoch": 0.959359627318396, "grad_norm": 2.34312750231704, "learning_rate": 4.326044319210132e-09, "loss": 0.1795, "step": 55191 }, { "epoch": 0.9593770098558988, "grad_norm": 1.258611093672153, "learning_rate": 4.322350208748382e-09, "loss": 0.1087, "step": 55192 }, { "epoch": 0.9593943923934016, "grad_norm": 1.1222699665586746, "learning_rate": 4.318657669357073e-09, "loss": 0.1348, "step": 55193 }, { "epoch": 0.9594117749309045, "grad_norm": 2.4583804540044385, "learning_rate": 4.314966701047973e-09, "loss": 0.2099, "step": 55194 }, { "epoch": 0.9594291574684073, "grad_norm": 1.5885170066327308, "learning_rate": 4.311277303832683e-09, "loss": 0.0951, "step": 55195 }, { "epoch": 0.95944654000591, "grad_norm": 0.9167253396763629, "learning_rate": 4.307589477722917e-09, "loss": 0.1701, "step": 55196 }, { "epoch": 0.9594639225434128, "grad_norm": 3.231478222014622, "learning_rate": 4.3039032227304424e-09, "loss": 0.3535, "step": 55197 }, { "epoch": 0.9594813050809157, "grad_norm": 1.8234612327754456, "learning_rate": 4.300218538866862e-09, "loss": 0.129, "step": 55198 }, { "epoch": 0.9594986876184185, "grad_norm": 1.6876293599436796, "learning_rate": 4.296535426143888e-09, "loss": 0.2076, "step": 55199 }, { "epoch": 0.9595160701559213, "grad_norm": 1.7135704320649956, "learning_rate": 4.292853884573177e-09, "loss": 0.179, "step": 55200 }, { "epoch": 0.9595334526934242, "grad_norm": 1.2759636251187159, "learning_rate": 4.289173914166444e-09, "loss": 0.1225, "step": 55201 }, { "epoch": 0.959550835230927, "grad_norm": 3.0049519288611886, "learning_rate": 4.285495514935345e-09, "loss": 0.1927, "step": 55202 }, { "epoch": 0.9595682177684298, "grad_norm": 1.6630178485561873, "learning_rate": 4.281818686891425e-09, "loss": 0.1753, "step": 55203 }, { "epoch": 0.9595856003059327, "grad_norm": 0.9698064054301976, "learning_rate": 4.2781434300464545e-09, "loss": 0.1914, "step": 55204 }, { "epoch": 0.9596029828434355, "grad_norm": 1.3888661952319679, "learning_rate": 4.2744697444120905e-09, "loss": 0.1636, "step": 55205 }, { "epoch": 0.9596203653809383, "grad_norm": 1.3659437872018996, "learning_rate": 4.270797629999934e-09, "loss": 0.1137, "step": 55206 }, { "epoch": 0.9596377479184411, "grad_norm": 1.9884245462102892, "learning_rate": 4.267127086821587e-09, "loss": 0.1577, "step": 55207 }, { "epoch": 0.959655130455944, "grad_norm": 1.4359084002071307, "learning_rate": 4.2634581148887625e-09, "loss": 0.1702, "step": 55208 }, { "epoch": 0.9596725129934468, "grad_norm": 1.1525490821834992, "learning_rate": 4.259790714213063e-09, "loss": 0.1932, "step": 55209 }, { "epoch": 0.9596898955309496, "grad_norm": 1.4509200586288156, "learning_rate": 4.2561248848060896e-09, "loss": 0.2299, "step": 55210 }, { "epoch": 0.9597072780684525, "grad_norm": 1.3594141320597652, "learning_rate": 4.252460626679444e-09, "loss": 0.1861, "step": 55211 }, { "epoch": 0.9597246606059553, "grad_norm": 1.4336173096107512, "learning_rate": 4.248797939844784e-09, "loss": 0.1446, "step": 55212 }, { "epoch": 0.9597420431434581, "grad_norm": 1.003497525896753, "learning_rate": 4.245136824313711e-09, "loss": 0.1206, "step": 55213 }, { "epoch": 0.959759425680961, "grad_norm": 1.0394931775265923, "learning_rate": 4.241477280097827e-09, "loss": 0.1447, "step": 55214 }, { "epoch": 0.9597768082184638, "grad_norm": 2.3235021364781483, "learning_rate": 4.237819307208734e-09, "loss": 0.1153, "step": 55215 }, { "epoch": 0.9597941907559665, "grad_norm": 2.130201376044366, "learning_rate": 4.234162905658034e-09, "loss": 0.1943, "step": 55216 }, { "epoch": 0.9598115732934693, "grad_norm": 1.9582303388548665, "learning_rate": 4.230508075457273e-09, "loss": 0.1859, "step": 55217 }, { "epoch": 0.9598289558309722, "grad_norm": 1.0028205113130075, "learning_rate": 4.226854816618053e-09, "loss": 0.1557, "step": 55218 }, { "epoch": 0.959846338368475, "grad_norm": 1.7541889394634242, "learning_rate": 4.223203129151975e-09, "loss": 0.1757, "step": 55219 }, { "epoch": 0.9598637209059778, "grad_norm": 1.4671932317727319, "learning_rate": 4.219553013070643e-09, "loss": 0.1784, "step": 55220 }, { "epoch": 0.9598811034434807, "grad_norm": 1.7109763146644785, "learning_rate": 4.215904468385545e-09, "loss": 0.183, "step": 55221 }, { "epoch": 0.9598984859809835, "grad_norm": 0.7741942786688095, "learning_rate": 4.212257495108285e-09, "loss": 0.1153, "step": 55222 }, { "epoch": 0.9599158685184863, "grad_norm": 1.129066758598316, "learning_rate": 4.2086120932504075e-09, "loss": 0.1197, "step": 55223 }, { "epoch": 0.9599332510559891, "grad_norm": 1.3638917619863828, "learning_rate": 4.204968262823516e-09, "loss": 0.1353, "step": 55224 }, { "epoch": 0.959950633593492, "grad_norm": 2.048515909985672, "learning_rate": 4.201326003839101e-09, "loss": 0.2354, "step": 55225 }, { "epoch": 0.9599680161309948, "grad_norm": 1.3751871440646035, "learning_rate": 4.197685316308764e-09, "loss": 0.1607, "step": 55226 }, { "epoch": 0.9599853986684976, "grad_norm": 2.5142268171010564, "learning_rate": 4.194046200243939e-09, "loss": 0.1403, "step": 55227 }, { "epoch": 0.9600027812060005, "grad_norm": 1.5937528766764353, "learning_rate": 4.190408655656286e-09, "loss": 0.1492, "step": 55228 }, { "epoch": 0.9600201637435033, "grad_norm": 1.447270750116043, "learning_rate": 4.186772682557294e-09, "loss": 0.2198, "step": 55229 }, { "epoch": 0.9600375462810061, "grad_norm": 1.769554691268749, "learning_rate": 4.183138280958454e-09, "loss": 0.2509, "step": 55230 }, { "epoch": 0.960054928818509, "grad_norm": 2.3570027222447933, "learning_rate": 4.179505450871313e-09, "loss": 0.2009, "step": 55231 }, { "epoch": 0.9600723113560118, "grad_norm": 0.9642367499940404, "learning_rate": 4.175874192307416e-09, "loss": 0.1314, "step": 55232 }, { "epoch": 0.9600896938935146, "grad_norm": 0.9681328084749655, "learning_rate": 4.172244505278199e-09, "loss": 0.1769, "step": 55233 }, { "epoch": 0.9601070764310174, "grad_norm": 1.396762917941641, "learning_rate": 4.168616389795154e-09, "loss": 0.1792, "step": 55234 }, { "epoch": 0.9601244589685203, "grad_norm": 2.3291075127836627, "learning_rate": 4.164989845869882e-09, "loss": 0.2367, "step": 55235 }, { "epoch": 0.960141841506023, "grad_norm": 1.8173162079290488, "learning_rate": 4.161364873513817e-09, "loss": 0.1927, "step": 55236 }, { "epoch": 0.9601592240435258, "grad_norm": 2.1502485583900666, "learning_rate": 4.157741472738452e-09, "loss": 0.2459, "step": 55237 }, { "epoch": 0.9601766065810287, "grad_norm": 0.9136619960005791, "learning_rate": 4.154119643555276e-09, "loss": 0.1116, "step": 55238 }, { "epoch": 0.9601939891185315, "grad_norm": 2.6941472862480365, "learning_rate": 4.150499385975781e-09, "loss": 0.2114, "step": 55239 }, { "epoch": 0.9602113716560343, "grad_norm": 1.9793539827394337, "learning_rate": 4.146880700011402e-09, "loss": 0.1633, "step": 55240 }, { "epoch": 0.9602287541935371, "grad_norm": 1.013479684011782, "learning_rate": 4.143263585673629e-09, "loss": 0.1852, "step": 55241 }, { "epoch": 0.96024613673104, "grad_norm": 1.2128167860698187, "learning_rate": 4.139648042973953e-09, "loss": 0.1189, "step": 55242 }, { "epoch": 0.9602635192685428, "grad_norm": 0.8737157111545903, "learning_rate": 4.136034071923811e-09, "loss": 0.1364, "step": 55243 }, { "epoch": 0.9602809018060456, "grad_norm": 1.1940976912254524, "learning_rate": 4.132421672534692e-09, "loss": 0.1741, "step": 55244 }, { "epoch": 0.9602982843435485, "grad_norm": 1.2593056005584142, "learning_rate": 4.128810844817976e-09, "loss": 0.1342, "step": 55245 }, { "epoch": 0.9603156668810513, "grad_norm": 0.9623004964985229, "learning_rate": 4.125201588785154e-09, "loss": 0.2158, "step": 55246 }, { "epoch": 0.9603330494185541, "grad_norm": 1.1687181474446857, "learning_rate": 4.1215939044476625e-09, "loss": 0.2126, "step": 55247 }, { "epoch": 0.960350431956057, "grad_norm": 1.0042804503876568, "learning_rate": 4.1179877918168795e-09, "loss": 0.2059, "step": 55248 }, { "epoch": 0.9603678144935598, "grad_norm": 1.658585595430447, "learning_rate": 4.114383250904352e-09, "loss": 0.184, "step": 55249 }, { "epoch": 0.9603851970310626, "grad_norm": 2.2844617884745206, "learning_rate": 4.110780281721404e-09, "loss": 0.2817, "step": 55250 }, { "epoch": 0.9604025795685655, "grad_norm": 1.6154039033859406, "learning_rate": 4.1071788842795275e-09, "loss": 0.2119, "step": 55251 }, { "epoch": 0.9604199621060683, "grad_norm": 1.1282872686294392, "learning_rate": 4.103579058590045e-09, "loss": 0.0969, "step": 55252 }, { "epoch": 0.9604373446435711, "grad_norm": 6.030206435024409, "learning_rate": 4.099980804664449e-09, "loss": 0.2647, "step": 55253 }, { "epoch": 0.9604547271810739, "grad_norm": 1.3222034357815093, "learning_rate": 4.096384122514118e-09, "loss": 0.1413, "step": 55254 }, { "epoch": 0.9604721097185768, "grad_norm": 1.3320643276084574, "learning_rate": 4.0927890121504324e-09, "loss": 0.1659, "step": 55255 }, { "epoch": 0.9604894922560795, "grad_norm": 1.4248742018731781, "learning_rate": 4.089195473584828e-09, "loss": 0.1377, "step": 55256 }, { "epoch": 0.9605068747935823, "grad_norm": 1.0927802303671679, "learning_rate": 4.085603506828683e-09, "loss": 0.1229, "step": 55257 }, { "epoch": 0.9605242573310852, "grad_norm": 2.0204550690144694, "learning_rate": 4.0820131118933234e-09, "loss": 0.1568, "step": 55258 }, { "epoch": 0.960541639868588, "grad_norm": 1.612512132628088, "learning_rate": 4.078424288790238e-09, "loss": 0.2126, "step": 55259 }, { "epoch": 0.9605590224060908, "grad_norm": 2.1503637560716906, "learning_rate": 4.074837037530698e-09, "loss": 0.1982, "step": 55260 }, { "epoch": 0.9605764049435936, "grad_norm": 1.6019929103348758, "learning_rate": 4.0712513581261375e-09, "loss": 0.1394, "step": 55261 }, { "epoch": 0.9605937874810965, "grad_norm": 1.2057159870217007, "learning_rate": 4.067667250587825e-09, "loss": 0.1252, "step": 55262 }, { "epoch": 0.9606111700185993, "grad_norm": 1.9822308482900644, "learning_rate": 4.0640847149272514e-09, "loss": 0.2741, "step": 55263 }, { "epoch": 0.9606285525561021, "grad_norm": 1.7502680264853003, "learning_rate": 4.0605037511557415e-09, "loss": 0.1729, "step": 55264 }, { "epoch": 0.960645935093605, "grad_norm": 0.8987023146450198, "learning_rate": 4.0569243592846195e-09, "loss": 0.1758, "step": 55265 }, { "epoch": 0.9606633176311078, "grad_norm": 1.1902245445567032, "learning_rate": 4.0533465393251535e-09, "loss": 0.124, "step": 55266 }, { "epoch": 0.9606807001686106, "grad_norm": 1.3570950390570546, "learning_rate": 4.049770291288835e-09, "loss": 0.126, "step": 55267 }, { "epoch": 0.9606980827061135, "grad_norm": 3.6610848954348865, "learning_rate": 4.046195615186932e-09, "loss": 0.1908, "step": 55268 }, { "epoch": 0.9607154652436163, "grad_norm": 1.222357108059724, "learning_rate": 4.042622511030713e-09, "loss": 0.1633, "step": 55269 }, { "epoch": 0.9607328477811191, "grad_norm": 1.1358069620382034, "learning_rate": 4.039050978831559e-09, "loss": 0.1089, "step": 55270 }, { "epoch": 0.960750230318622, "grad_norm": 0.9725259249213287, "learning_rate": 4.035481018600851e-09, "loss": 0.1167, "step": 55271 }, { "epoch": 0.9607676128561248, "grad_norm": 0.8886656691005447, "learning_rate": 4.0319126303497435e-09, "loss": 0.1866, "step": 55272 }, { "epoch": 0.9607849953936276, "grad_norm": 1.322652979627688, "learning_rate": 4.028345814089673e-09, "loss": 0.1666, "step": 55273 }, { "epoch": 0.9608023779311304, "grad_norm": 1.6061456999125214, "learning_rate": 4.0247805698319646e-09, "loss": 0.1314, "step": 55274 }, { "epoch": 0.9608197604686333, "grad_norm": 1.6921564247173104, "learning_rate": 4.021216897587831e-09, "loss": 0.153, "step": 55275 }, { "epoch": 0.960837143006136, "grad_norm": 1.249911380616078, "learning_rate": 4.017654797368652e-09, "loss": 0.2911, "step": 55276 }, { "epoch": 0.9608545255436388, "grad_norm": 1.5496212426007878, "learning_rate": 4.014094269185642e-09, "loss": 0.1436, "step": 55277 }, { "epoch": 0.9608719080811416, "grad_norm": 1.856911189310324, "learning_rate": 4.010535313050067e-09, "loss": 0.2492, "step": 55278 }, { "epoch": 0.9608892906186445, "grad_norm": 1.3492126992027547, "learning_rate": 4.00697792897331e-09, "loss": 0.1082, "step": 55279 }, { "epoch": 0.9609066731561473, "grad_norm": 1.2110045555062343, "learning_rate": 4.003422116966581e-09, "loss": 0.1744, "step": 55280 }, { "epoch": 0.9609240556936501, "grad_norm": 1.0621549951615987, "learning_rate": 3.9998678770411524e-09, "loss": 0.109, "step": 55281 }, { "epoch": 0.960941438231153, "grad_norm": 2.916611332657058, "learning_rate": 3.996315209208345e-09, "loss": 0.2619, "step": 55282 }, { "epoch": 0.9609588207686558, "grad_norm": 1.7437061019580438, "learning_rate": 3.992764113479319e-09, "loss": 0.1804, "step": 55283 }, { "epoch": 0.9609762033061586, "grad_norm": 1.380291648429569, "learning_rate": 3.9892145898654526e-09, "loss": 0.177, "step": 55284 }, { "epoch": 0.9609935858436615, "grad_norm": 0.8450716488971711, "learning_rate": 3.985666638377849e-09, "loss": 0.1554, "step": 55285 }, { "epoch": 0.9610109683811643, "grad_norm": 2.013648293239123, "learning_rate": 3.982120259027833e-09, "loss": 0.21, "step": 55286 }, { "epoch": 0.9610283509186671, "grad_norm": 1.6239251506664423, "learning_rate": 3.978575451826671e-09, "loss": 0.1539, "step": 55287 }, { "epoch": 0.96104573345617, "grad_norm": 2.8180483205104, "learning_rate": 3.975032216785579e-09, "loss": 0.2101, "step": 55288 }, { "epoch": 0.9610631159936728, "grad_norm": 1.7860271750601375, "learning_rate": 3.9714905539157685e-09, "loss": 0.1653, "step": 55289 }, { "epoch": 0.9610804985311756, "grad_norm": 1.8600402520977706, "learning_rate": 3.967950463228508e-09, "loss": 0.1554, "step": 55290 }, { "epoch": 0.9610978810686784, "grad_norm": 1.630272032729402, "learning_rate": 3.964411944734958e-09, "loss": 0.2887, "step": 55291 }, { "epoch": 0.9611152636061813, "grad_norm": 1.239935705581711, "learning_rate": 3.960874998446329e-09, "loss": 0.1952, "step": 55292 }, { "epoch": 0.9611326461436841, "grad_norm": 1.6747139458000049, "learning_rate": 3.95733962437389e-09, "loss": 0.222, "step": 55293 }, { "epoch": 0.9611500286811869, "grad_norm": 1.4820606090843815, "learning_rate": 3.953805822528855e-09, "loss": 0.153, "step": 55294 }, { "epoch": 0.9611674112186896, "grad_norm": 1.388807373703788, "learning_rate": 3.950273592922326e-09, "loss": 0.1731, "step": 55295 }, { "epoch": 0.9611847937561925, "grad_norm": 1.7496173863586206, "learning_rate": 3.946742935565628e-09, "loss": 0.2237, "step": 55296 }, { "epoch": 0.9612021762936953, "grad_norm": 1.1529697156876244, "learning_rate": 3.943213850469862e-09, "loss": 0.1462, "step": 55297 }, { "epoch": 0.9612195588311981, "grad_norm": 2.3275344423593807, "learning_rate": 3.939686337646242e-09, "loss": 0.1792, "step": 55298 }, { "epoch": 0.961236941368701, "grad_norm": 1.2271362778148012, "learning_rate": 3.936160397105925e-09, "loss": 0.1284, "step": 55299 }, { "epoch": 0.9612543239062038, "grad_norm": 1.5639440635160855, "learning_rate": 3.932636028860126e-09, "loss": 0.175, "step": 55300 }, { "epoch": 0.9612717064437066, "grad_norm": 1.0703197150352834, "learning_rate": 3.929113232919945e-09, "loss": 0.1808, "step": 55301 }, { "epoch": 0.9612890889812095, "grad_norm": 1.8105540263486848, "learning_rate": 3.925592009296707e-09, "loss": 0.2452, "step": 55302 }, { "epoch": 0.9613064715187123, "grad_norm": 0.8335084098770889, "learning_rate": 3.922072358001405e-09, "loss": 0.1845, "step": 55303 }, { "epoch": 0.9613238540562151, "grad_norm": 1.03326552781991, "learning_rate": 3.9185542790451944e-09, "loss": 0.241, "step": 55304 }, { "epoch": 0.961341236593718, "grad_norm": 1.1628233966227428, "learning_rate": 3.915037772439345e-09, "loss": 0.1027, "step": 55305 }, { "epoch": 0.9613586191312208, "grad_norm": 1.1518383850557177, "learning_rate": 3.911522838194959e-09, "loss": 0.2798, "step": 55306 }, { "epoch": 0.9613760016687236, "grad_norm": 1.4823903784665415, "learning_rate": 3.908009476323138e-09, "loss": 0.1567, "step": 55307 }, { "epoch": 0.9613933842062264, "grad_norm": 1.6962320967813196, "learning_rate": 3.90449768683504e-09, "loss": 0.1903, "step": 55308 }, { "epoch": 0.9614107667437293, "grad_norm": 0.9642855619193836, "learning_rate": 3.900987469741823e-09, "loss": 0.1168, "step": 55309 }, { "epoch": 0.9614281492812321, "grad_norm": 0.9859877669077439, "learning_rate": 3.8974788250545345e-09, "loss": 0.1554, "step": 55310 }, { "epoch": 0.9614455318187349, "grad_norm": 1.704667615523861, "learning_rate": 3.893971752784386e-09, "loss": 0.1663, "step": 55311 }, { "epoch": 0.9614629143562378, "grad_norm": 1.3001141339463997, "learning_rate": 3.8904662529424255e-09, "loss": 0.2673, "step": 55312 }, { "epoch": 0.9614802968937406, "grad_norm": 0.8882664753140427, "learning_rate": 3.886962325539811e-09, "loss": 0.1744, "step": 55313 }, { "epoch": 0.9614976794312434, "grad_norm": 0.9835603264311633, "learning_rate": 3.883459970587644e-09, "loss": 0.1387, "step": 55314 }, { "epoch": 0.9615150619687461, "grad_norm": 1.7534535521941306, "learning_rate": 3.8799591880970266e-09, "loss": 0.2708, "step": 55315 }, { "epoch": 0.961532444506249, "grad_norm": 1.3361308849063973, "learning_rate": 3.876459978079005e-09, "loss": 0.1456, "step": 55316 }, { "epoch": 0.9615498270437518, "grad_norm": 1.7194877749458912, "learning_rate": 3.8729623405446835e-09, "loss": 0.2671, "step": 55317 }, { "epoch": 0.9615672095812546, "grad_norm": 2.5494161711070245, "learning_rate": 3.869466275505218e-09, "loss": 0.2197, "step": 55318 }, { "epoch": 0.9615845921187575, "grad_norm": 1.0808495368850068, "learning_rate": 3.8659717829716e-09, "loss": 0.2721, "step": 55319 }, { "epoch": 0.9616019746562603, "grad_norm": 2.0192493296390586, "learning_rate": 3.862478862954987e-09, "loss": 0.1638, "step": 55320 }, { "epoch": 0.9616193571937631, "grad_norm": 1.446421915917116, "learning_rate": 3.858987515466372e-09, "loss": 0.1664, "step": 55321 }, { "epoch": 0.961636739731266, "grad_norm": 0.9406427676602547, "learning_rate": 3.8554977405169106e-09, "loss": 0.1283, "step": 55322 }, { "epoch": 0.9616541222687688, "grad_norm": 1.6929571315567415, "learning_rate": 3.852009538117595e-09, "loss": 0.1478, "step": 55323 }, { "epoch": 0.9616715048062716, "grad_norm": 0.5473459558576499, "learning_rate": 3.8485229082794166e-09, "loss": 0.1836, "step": 55324 }, { "epoch": 0.9616888873437744, "grad_norm": 1.014671923513773, "learning_rate": 3.8450378510135886e-09, "loss": 0.1037, "step": 55325 }, { "epoch": 0.9617062698812773, "grad_norm": 1.0837052857714715, "learning_rate": 3.841554366331046e-09, "loss": 0.1837, "step": 55326 }, { "epoch": 0.9617236524187801, "grad_norm": 1.278086648546547, "learning_rate": 3.838072454242891e-09, "loss": 0.1873, "step": 55327 }, { "epoch": 0.9617410349562829, "grad_norm": 1.9389861260117787, "learning_rate": 3.834592114760116e-09, "loss": 0.1708, "step": 55328 }, { "epoch": 0.9617584174937858, "grad_norm": 2.0248246871181146, "learning_rate": 3.8311133478937106e-09, "loss": 0.1676, "step": 55329 }, { "epoch": 0.9617758000312886, "grad_norm": 0.8900318305576966, "learning_rate": 3.827636153654834e-09, "loss": 0.1333, "step": 55330 }, { "epoch": 0.9617931825687914, "grad_norm": 1.532974318501631, "learning_rate": 3.824160532054365e-09, "loss": 0.1298, "step": 55331 }, { "epoch": 0.9618105651062943, "grad_norm": 1.5030750007809723, "learning_rate": 3.820686483103408e-09, "loss": 0.1521, "step": 55332 }, { "epoch": 0.9618279476437971, "grad_norm": 0.8957723524582459, "learning_rate": 3.817214006813008e-09, "loss": 0.1483, "step": 55333 }, { "epoch": 0.9618453301812999, "grad_norm": 0.9586104003429288, "learning_rate": 3.813743103194045e-09, "loss": 0.1404, "step": 55334 }, { "epoch": 0.9618627127188026, "grad_norm": 1.1329881523613698, "learning_rate": 3.810273772257566e-09, "loss": 0.109, "step": 55335 }, { "epoch": 0.9618800952563055, "grad_norm": 0.8882371030192017, "learning_rate": 3.806806014014618e-09, "loss": 0.1212, "step": 55336 }, { "epoch": 0.9618974777938083, "grad_norm": 1.3781921313147463, "learning_rate": 3.803339828476138e-09, "loss": 0.1441, "step": 55337 }, { "epoch": 0.9619148603313111, "grad_norm": 1.4850508662904538, "learning_rate": 3.79987521565317e-09, "loss": 0.1793, "step": 55338 }, { "epoch": 0.961932242868814, "grad_norm": 0.8888866771790522, "learning_rate": 3.796412175556651e-09, "loss": 0.109, "step": 55339 }, { "epoch": 0.9619496254063168, "grad_norm": 2.529795553143169, "learning_rate": 3.792950708197573e-09, "loss": 0.1418, "step": 55340 }, { "epoch": 0.9619670079438196, "grad_norm": 1.1397022995514257, "learning_rate": 3.7894908135869265e-09, "loss": 0.1484, "step": 55341 }, { "epoch": 0.9619843904813224, "grad_norm": 2.8735879585269686, "learning_rate": 3.786032491735647e-09, "loss": 0.1721, "step": 55342 }, { "epoch": 0.9620017730188253, "grad_norm": 1.3222040142893379, "learning_rate": 3.78257574265467e-09, "loss": 0.1516, "step": 55343 }, { "epoch": 0.9620191555563281, "grad_norm": 1.5384714197134695, "learning_rate": 3.779120566354988e-09, "loss": 0.168, "step": 55344 }, { "epoch": 0.9620365380938309, "grad_norm": 1.369216050170892, "learning_rate": 3.775666962847534e-09, "loss": 0.0657, "step": 55345 }, { "epoch": 0.9620539206313338, "grad_norm": 1.4454439997210062, "learning_rate": 3.772214932143303e-09, "loss": 0.2365, "step": 55346 }, { "epoch": 0.9620713031688366, "grad_norm": 1.3396222261902373, "learning_rate": 3.768764474253172e-09, "loss": 0.1509, "step": 55347 }, { "epoch": 0.9620886857063394, "grad_norm": 3.736877461583106, "learning_rate": 3.765315589188134e-09, "loss": 0.1316, "step": 55348 }, { "epoch": 0.9621060682438423, "grad_norm": 0.8209860336513743, "learning_rate": 3.761868276959123e-09, "loss": 0.1441, "step": 55349 }, { "epoch": 0.9621234507813451, "grad_norm": 1.0155868711376892, "learning_rate": 3.758422537577021e-09, "loss": 0.1947, "step": 55350 }, { "epoch": 0.9621408333188479, "grad_norm": 1.0189588604210313, "learning_rate": 3.754978371052709e-09, "loss": 0.1236, "step": 55351 }, { "epoch": 0.9621582158563508, "grad_norm": 0.9640583268083069, "learning_rate": 3.75153577739723e-09, "loss": 0.1963, "step": 55352 }, { "epoch": 0.9621755983938536, "grad_norm": 1.2658266377200038, "learning_rate": 3.748094756621356e-09, "loss": 0.2287, "step": 55353 }, { "epoch": 0.9621929809313564, "grad_norm": 1.8144022185601656, "learning_rate": 3.744655308736078e-09, "loss": 0.1552, "step": 55354 }, { "epoch": 0.9622103634688591, "grad_norm": 2.206600616887024, "learning_rate": 3.741217433752331e-09, "loss": 0.1512, "step": 55355 }, { "epoch": 0.962227746006362, "grad_norm": 2.4079111347224584, "learning_rate": 3.7377811316809395e-09, "loss": 0.1579, "step": 55356 }, { "epoch": 0.9622451285438648, "grad_norm": 0.9024672436624319, "learning_rate": 3.734346402532784e-09, "loss": 0.1026, "step": 55357 }, { "epoch": 0.9622625110813676, "grad_norm": 1.9721651285879793, "learning_rate": 3.730913246318801e-09, "loss": 0.1785, "step": 55358 }, { "epoch": 0.9622798936188705, "grad_norm": 1.156725734601223, "learning_rate": 3.72748166304987e-09, "loss": 0.1544, "step": 55359 }, { "epoch": 0.9622972761563733, "grad_norm": 1.9785666631945593, "learning_rate": 3.7240516527368705e-09, "loss": 0.1638, "step": 55360 }, { "epoch": 0.9623146586938761, "grad_norm": 0.7410654908170158, "learning_rate": 3.720623215390628e-09, "loss": 0.1798, "step": 55361 }, { "epoch": 0.9623320412313789, "grad_norm": 1.2733018306519674, "learning_rate": 3.7171963510220226e-09, "loss": 0.1429, "step": 55362 }, { "epoch": 0.9623494237688818, "grad_norm": 1.3202558850700439, "learning_rate": 3.713771059641935e-09, "loss": 0.1908, "step": 55363 }, { "epoch": 0.9623668063063846, "grad_norm": 1.829223852061687, "learning_rate": 3.710347341261244e-09, "loss": 0.1767, "step": 55364 }, { "epoch": 0.9623841888438874, "grad_norm": 1.3220139297211084, "learning_rate": 3.706925195890831e-09, "loss": 0.2515, "step": 55365 }, { "epoch": 0.9624015713813903, "grad_norm": 0.9392890416121767, "learning_rate": 3.7035046235413535e-09, "loss": 0.1726, "step": 55366 }, { "epoch": 0.9624189539188931, "grad_norm": 2.681832624730286, "learning_rate": 3.7000856242238585e-09, "loss": 0.2475, "step": 55367 }, { "epoch": 0.9624363364563959, "grad_norm": 1.7673177666792983, "learning_rate": 3.6966681979491153e-09, "loss": 0.2975, "step": 55368 }, { "epoch": 0.9624537189938988, "grad_norm": 1.8194865691692874, "learning_rate": 3.6932523447279484e-09, "loss": 0.1614, "step": 55369 }, { "epoch": 0.9624711015314016, "grad_norm": 1.1422271863980058, "learning_rate": 3.6898380645711822e-09, "loss": 0.1313, "step": 55370 }, { "epoch": 0.9624884840689044, "grad_norm": 1.3916659016097155, "learning_rate": 3.686425357489642e-09, "loss": 0.1637, "step": 55371 }, { "epoch": 0.9625058666064072, "grad_norm": 2.145718458864925, "learning_rate": 3.683014223494152e-09, "loss": 0.22, "step": 55372 }, { "epoch": 0.9625232491439101, "grad_norm": 1.5861886784782497, "learning_rate": 3.679604662595537e-09, "loss": 0.1394, "step": 55373 }, { "epoch": 0.9625406316814129, "grad_norm": 2.100262925395241, "learning_rate": 3.6761966748045103e-09, "loss": 0.1988, "step": 55374 }, { "epoch": 0.9625580142189156, "grad_norm": 0.9388401422517029, "learning_rate": 3.672790260132008e-09, "loss": 0.1718, "step": 55375 }, { "epoch": 0.9625753967564185, "grad_norm": 0.7634953674693813, "learning_rate": 3.669385418588744e-09, "loss": 0.1404, "step": 55376 }, { "epoch": 0.9625927792939213, "grad_norm": 3.243402967556178, "learning_rate": 3.6659821501855425e-09, "loss": 0.2189, "step": 55377 }, { "epoch": 0.9626101618314241, "grad_norm": 2.1034182241385415, "learning_rate": 3.662580454933173e-09, "loss": 0.1308, "step": 55378 }, { "epoch": 0.962627544368927, "grad_norm": 1.033258247200686, "learning_rate": 3.6591803328424596e-09, "loss": 0.1652, "step": 55379 }, { "epoch": 0.9626449269064298, "grad_norm": 1.3786217929854099, "learning_rate": 3.6557817839241167e-09, "loss": 0.15, "step": 55380 }, { "epoch": 0.9626623094439326, "grad_norm": 1.1745083429735095, "learning_rate": 3.652384808188913e-09, "loss": 0.1534, "step": 55381 }, { "epoch": 0.9626796919814354, "grad_norm": 2.6705735738864833, "learning_rate": 3.6489894056476733e-09, "loss": 0.1662, "step": 55382 }, { "epoch": 0.9626970745189383, "grad_norm": 1.6152173454606198, "learning_rate": 3.6455955763111666e-09, "loss": 0.173, "step": 55383 }, { "epoch": 0.9627144570564411, "grad_norm": 1.9869309632030387, "learning_rate": 3.642203320190107e-09, "loss": 0.1739, "step": 55384 }, { "epoch": 0.9627318395939439, "grad_norm": 1.1937859326966007, "learning_rate": 3.638812637295208e-09, "loss": 0.1693, "step": 55385 }, { "epoch": 0.9627492221314468, "grad_norm": 1.3180547490843342, "learning_rate": 3.6354235276372935e-09, "loss": 0.3524, "step": 55386 }, { "epoch": 0.9627666046689496, "grad_norm": 1.103406789834511, "learning_rate": 3.632035991227078e-09, "loss": 0.1527, "step": 55387 }, { "epoch": 0.9627839872064524, "grad_norm": 1.8256805336287187, "learning_rate": 3.628650028075275e-09, "loss": 0.1299, "step": 55388 }, { "epoch": 0.9628013697439552, "grad_norm": 1.1706227031486192, "learning_rate": 3.625265638192654e-09, "loss": 0.1902, "step": 55389 }, { "epoch": 0.9628187522814581, "grad_norm": 2.1865491275136217, "learning_rate": 3.6218828215899276e-09, "loss": 0.1611, "step": 55390 }, { "epoch": 0.9628361348189609, "grad_norm": 3.2878316215293077, "learning_rate": 3.6185015782778107e-09, "loss": 0.22, "step": 55391 }, { "epoch": 0.9628535173564637, "grad_norm": 1.241805605648042, "learning_rate": 3.615121908267016e-09, "loss": 0.164, "step": 55392 }, { "epoch": 0.9628708998939666, "grad_norm": 1.3746550914998745, "learning_rate": 3.6117438115682575e-09, "loss": 0.2232, "step": 55393 }, { "epoch": 0.9628882824314694, "grad_norm": 1.4032942473952896, "learning_rate": 3.608367288192249e-09, "loss": 0.2135, "step": 55394 }, { "epoch": 0.9629056649689721, "grad_norm": 1.3921475134688448, "learning_rate": 3.6049923381497037e-09, "loss": 0.1151, "step": 55395 }, { "epoch": 0.962923047506475, "grad_norm": 1.2101705927513564, "learning_rate": 3.6016189614512803e-09, "loss": 0.1733, "step": 55396 }, { "epoch": 0.9629404300439778, "grad_norm": 2.428059842769766, "learning_rate": 3.5982471581077477e-09, "loss": 0.2008, "step": 55397 }, { "epoch": 0.9629578125814806, "grad_norm": 0.735501013242264, "learning_rate": 3.5948769281297087e-09, "loss": 0.1701, "step": 55398 }, { "epoch": 0.9629751951189834, "grad_norm": 2.1639958456321424, "learning_rate": 3.591508271527932e-09, "loss": 0.2237, "step": 55399 }, { "epoch": 0.9629925776564863, "grad_norm": 1.3342080307420658, "learning_rate": 3.5881411883129652e-09, "loss": 0.1571, "step": 55400 }, { "epoch": 0.9630099601939891, "grad_norm": 2.422405637655046, "learning_rate": 3.5847756784955776e-09, "loss": 0.122, "step": 55401 }, { "epoch": 0.9630273427314919, "grad_norm": 2.1457242473745692, "learning_rate": 3.581411742086482e-09, "loss": 0.2345, "step": 55402 }, { "epoch": 0.9630447252689948, "grad_norm": 1.7708775949636366, "learning_rate": 3.5780493790962264e-09, "loss": 0.3049, "step": 55403 }, { "epoch": 0.9630621078064976, "grad_norm": 0.9701372920181061, "learning_rate": 3.574688589535524e-09, "loss": 0.1794, "step": 55404 }, { "epoch": 0.9630794903440004, "grad_norm": 1.0539544719957956, "learning_rate": 3.5713293734149773e-09, "loss": 0.1705, "step": 55405 }, { "epoch": 0.9630968728815033, "grad_norm": 1.5164881549616416, "learning_rate": 3.567971730745356e-09, "loss": 0.1694, "step": 55406 }, { "epoch": 0.9631142554190061, "grad_norm": 2.176819758873405, "learning_rate": 3.5646156615371515e-09, "loss": 0.2549, "step": 55407 }, { "epoch": 0.9631316379565089, "grad_norm": 1.3039411834945016, "learning_rate": 3.561261165801077e-09, "loss": 0.173, "step": 55408 }, { "epoch": 0.9631490204940117, "grad_norm": 1.3104339656576074, "learning_rate": 3.5579082435477915e-09, "loss": 0.1203, "step": 55409 }, { "epoch": 0.9631664030315146, "grad_norm": 1.1223868686982499, "learning_rate": 3.554556894787897e-09, "loss": 0.1362, "step": 55410 }, { "epoch": 0.9631837855690174, "grad_norm": 1.3928182465875045, "learning_rate": 3.551207119531996e-09, "loss": 0.1793, "step": 55411 }, { "epoch": 0.9632011681065202, "grad_norm": 1.372948202555623, "learning_rate": 3.547858917790747e-09, "loss": 0.1862, "step": 55412 }, { "epoch": 0.9632185506440231, "grad_norm": 1.4029677687810163, "learning_rate": 3.5445122895746417e-09, "loss": 0.0863, "step": 55413 }, { "epoch": 0.9632359331815259, "grad_norm": 2.0104953949132534, "learning_rate": 3.5411672348944487e-09, "loss": 0.1732, "step": 55414 }, { "epoch": 0.9632533157190286, "grad_norm": 1.1222058861878697, "learning_rate": 3.537823753760716e-09, "loss": 0.1156, "step": 55415 }, { "epoch": 0.9632706982565314, "grad_norm": 1.42110550465333, "learning_rate": 3.5344818461839898e-09, "loss": 0.138, "step": 55416 }, { "epoch": 0.9632880807940343, "grad_norm": 0.9623969345810764, "learning_rate": 3.531141512174929e-09, "loss": 0.1613, "step": 55417 }, { "epoch": 0.9633054633315371, "grad_norm": 1.784465525034327, "learning_rate": 3.5278027517440245e-09, "loss": 0.1361, "step": 55418 }, { "epoch": 0.9633228458690399, "grad_norm": 1.4738857817285544, "learning_rate": 3.5244655649019906e-09, "loss": 0.1682, "step": 55419 }, { "epoch": 0.9633402284065428, "grad_norm": 1.473206255412948, "learning_rate": 3.5211299516593183e-09, "loss": 0.1259, "step": 55420 }, { "epoch": 0.9633576109440456, "grad_norm": 0.9360548467838926, "learning_rate": 3.517795912026611e-09, "loss": 0.1625, "step": 55421 }, { "epoch": 0.9633749934815484, "grad_norm": 2.9291058483593724, "learning_rate": 3.5144634460144152e-09, "loss": 0.2221, "step": 55422 }, { "epoch": 0.9633923760190513, "grad_norm": 1.5722967360305498, "learning_rate": 3.511132553633334e-09, "loss": 0.1844, "step": 55423 }, { "epoch": 0.9634097585565541, "grad_norm": 1.0748219040282685, "learning_rate": 3.5078032348938023e-09, "loss": 0.1525, "step": 55424 }, { "epoch": 0.9634271410940569, "grad_norm": 1.131681036605793, "learning_rate": 3.5044754898065355e-09, "loss": 0.2129, "step": 55425 }, { "epoch": 0.9634445236315597, "grad_norm": 1.2006359792033192, "learning_rate": 3.501149318382024e-09, "loss": 0.155, "step": 55426 }, { "epoch": 0.9634619061690626, "grad_norm": 0.9088586272906319, "learning_rate": 3.4978247206307597e-09, "loss": 0.117, "step": 55427 }, { "epoch": 0.9634792887065654, "grad_norm": 1.4937660778065558, "learning_rate": 3.4945016965633454e-09, "loss": 0.1422, "step": 55428 }, { "epoch": 0.9634966712440682, "grad_norm": 1.2341607943450383, "learning_rate": 3.491180246190273e-09, "loss": 0.16, "step": 55429 }, { "epoch": 0.9635140537815711, "grad_norm": 1.0098165174134477, "learning_rate": 3.4878603695220887e-09, "loss": 0.1141, "step": 55430 }, { "epoch": 0.9635314363190739, "grad_norm": 1.90898919823841, "learning_rate": 3.484542066569285e-09, "loss": 0.1249, "step": 55431 }, { "epoch": 0.9635488188565767, "grad_norm": 1.723862596797103, "learning_rate": 3.4812253373424084e-09, "loss": 0.2179, "step": 55432 }, { "epoch": 0.9635662013940796, "grad_norm": 1.76275541350203, "learning_rate": 3.4779101818519507e-09, "loss": 0.1737, "step": 55433 }, { "epoch": 0.9635835839315823, "grad_norm": 1.1045709028921444, "learning_rate": 3.4745966001085146e-09, "loss": 0.2385, "step": 55434 }, { "epoch": 0.9636009664690851, "grad_norm": 1.219264375396269, "learning_rate": 3.4712845921224255e-09, "loss": 0.1765, "step": 55435 }, { "epoch": 0.9636183490065879, "grad_norm": 1.0978900145640007, "learning_rate": 3.4679741579042853e-09, "loss": 0.1401, "step": 55436 }, { "epoch": 0.9636357315440908, "grad_norm": 1.4780898747347389, "learning_rate": 3.4646652974646417e-09, "loss": 0.1544, "step": 55437 }, { "epoch": 0.9636531140815936, "grad_norm": 0.8529392762124376, "learning_rate": 3.461358010813875e-09, "loss": 0.115, "step": 55438 }, { "epoch": 0.9636704966190964, "grad_norm": 1.955848101817616, "learning_rate": 3.4580522979625325e-09, "loss": 0.2477, "step": 55439 }, { "epoch": 0.9636878791565993, "grad_norm": 1.5002540062031227, "learning_rate": 3.4547481589210504e-09, "loss": 0.1797, "step": 55440 }, { "epoch": 0.9637052616941021, "grad_norm": 1.0699062702042068, "learning_rate": 3.4514455936999753e-09, "loss": 0.1511, "step": 55441 }, { "epoch": 0.9637226442316049, "grad_norm": 1.3856697518596781, "learning_rate": 3.448144602309633e-09, "loss": 0.1748, "step": 55442 }, { "epoch": 0.9637400267691077, "grad_norm": 1.164078440951948, "learning_rate": 3.4448451847606252e-09, "loss": 0.1694, "step": 55443 }, { "epoch": 0.9637574093066106, "grad_norm": 2.2873025903628346, "learning_rate": 3.4415473410633333e-09, "loss": 0.1421, "step": 55444 }, { "epoch": 0.9637747918441134, "grad_norm": 0.8871037098670139, "learning_rate": 3.4382510712283043e-09, "loss": 0.1991, "step": 55445 }, { "epoch": 0.9637921743816162, "grad_norm": 1.5729038695063378, "learning_rate": 3.434956375265863e-09, "loss": 0.169, "step": 55446 }, { "epoch": 0.9638095569191191, "grad_norm": 0.6441203279142056, "learning_rate": 3.431663253186501e-09, "loss": 0.1278, "step": 55447 }, { "epoch": 0.9638269394566219, "grad_norm": 1.6417595837432668, "learning_rate": 3.42837170500071e-09, "loss": 0.1878, "step": 55448 }, { "epoch": 0.9638443219941247, "grad_norm": 1.2082981068728436, "learning_rate": 3.425081730718815e-09, "loss": 0.2141, "step": 55449 }, { "epoch": 0.9638617045316276, "grad_norm": 1.1844983937837257, "learning_rate": 3.4217933303513633e-09, "loss": 0.2198, "step": 55450 }, { "epoch": 0.9638790870691304, "grad_norm": 1.4840694713978637, "learning_rate": 3.4185065039086246e-09, "loss": 0.1303, "step": 55451 }, { "epoch": 0.9638964696066332, "grad_norm": 1.1331335430497036, "learning_rate": 3.4152212514012015e-09, "loss": 0.2269, "step": 55452 }, { "epoch": 0.963913852144136, "grad_norm": 1.4149626080671296, "learning_rate": 3.4119375728394185e-09, "loss": 0.235, "step": 55453 }, { "epoch": 0.9639312346816388, "grad_norm": 2.8291017229936153, "learning_rate": 3.408655468233601e-09, "loss": 0.1739, "step": 55454 }, { "epoch": 0.9639486172191416, "grad_norm": 0.7669399179328811, "learning_rate": 3.4053749375942964e-09, "loss": 0.1063, "step": 55455 }, { "epoch": 0.9639659997566444, "grad_norm": 0.979913039623693, "learning_rate": 3.402095980931774e-09, "loss": 0.0754, "step": 55456 }, { "epoch": 0.9639833822941473, "grad_norm": 1.5183844459880795, "learning_rate": 3.3988185982565254e-09, "loss": 0.1427, "step": 55457 }, { "epoch": 0.9640007648316501, "grad_norm": 1.2678870660265713, "learning_rate": 3.3955427895789314e-09, "loss": 0.2021, "step": 55458 }, { "epoch": 0.9640181473691529, "grad_norm": 2.380389858720225, "learning_rate": 3.392268554909317e-09, "loss": 0.1618, "step": 55459 }, { "epoch": 0.9640355299066558, "grad_norm": 0.9028331413339131, "learning_rate": 3.3889958942581176e-09, "loss": 0.1446, "step": 55460 }, { "epoch": 0.9640529124441586, "grad_norm": 1.3613872686622186, "learning_rate": 3.3857248076356038e-09, "loss": 0.1591, "step": 55461 }, { "epoch": 0.9640702949816614, "grad_norm": 0.8368916557100294, "learning_rate": 3.382455295052267e-09, "loss": 0.1621, "step": 55462 }, { "epoch": 0.9640876775191642, "grad_norm": 1.1903651575049834, "learning_rate": 3.3791873565184316e-09, "loss": 0.1886, "step": 55463 }, { "epoch": 0.9641050600566671, "grad_norm": 1.1040060752371144, "learning_rate": 3.3759209920444233e-09, "loss": 0.2782, "step": 55464 }, { "epoch": 0.9641224425941699, "grad_norm": 1.4577566806506224, "learning_rate": 3.3726562016406224e-09, "loss": 0.1881, "step": 55465 }, { "epoch": 0.9641398251316727, "grad_norm": 2.0998492751400324, "learning_rate": 3.369392985317354e-09, "loss": 0.1819, "step": 55466 }, { "epoch": 0.9641572076691756, "grad_norm": 1.353556952107611, "learning_rate": 3.366131343084999e-09, "loss": 0.1476, "step": 55467 }, { "epoch": 0.9641745902066784, "grad_norm": 0.9781826153564614, "learning_rate": 3.3628712749538268e-09, "loss": 0.1019, "step": 55468 }, { "epoch": 0.9641919727441812, "grad_norm": 1.7925315933619899, "learning_rate": 3.3596127809342733e-09, "loss": 0.2119, "step": 55469 }, { "epoch": 0.964209355281684, "grad_norm": 1.2152535544199874, "learning_rate": 3.3563558610366082e-09, "loss": 0.1405, "step": 55470 }, { "epoch": 0.9642267378191869, "grad_norm": 2.416918177807574, "learning_rate": 3.3531005152711013e-09, "loss": 0.1956, "step": 55471 }, { "epoch": 0.9642441203566897, "grad_norm": 1.2367247115776705, "learning_rate": 3.349846743648188e-09, "loss": 0.1689, "step": 55472 }, { "epoch": 0.9642615028941925, "grad_norm": 1.2141968208116989, "learning_rate": 3.346594546178083e-09, "loss": 0.1268, "step": 55473 }, { "epoch": 0.9642788854316953, "grad_norm": 1.8150868979466488, "learning_rate": 3.3433439228711115e-09, "loss": 0.1049, "step": 55474 }, { "epoch": 0.9642962679691981, "grad_norm": 1.889950508216554, "learning_rate": 3.340094873737598e-09, "loss": 0.1367, "step": 55475 }, { "epoch": 0.9643136505067009, "grad_norm": 1.5422578122063395, "learning_rate": 3.3368473987878674e-09, "loss": 0.1155, "step": 55476 }, { "epoch": 0.9643310330442038, "grad_norm": 2.6910499334397002, "learning_rate": 3.33360149803219e-09, "loss": 0.1304, "step": 55477 }, { "epoch": 0.9643484155817066, "grad_norm": 1.4154580375705905, "learning_rate": 3.33035717148078e-09, "loss": 0.1593, "step": 55478 }, { "epoch": 0.9643657981192094, "grad_norm": 2.247543372260396, "learning_rate": 3.3271144191440727e-09, "loss": 0.1949, "step": 55479 }, { "epoch": 0.9643831806567122, "grad_norm": 1.669200690611124, "learning_rate": 3.3238732410322267e-09, "loss": 0.0856, "step": 55480 }, { "epoch": 0.9644005631942151, "grad_norm": 0.8341681751696095, "learning_rate": 3.320633637155512e-09, "loss": 0.1619, "step": 55481 }, { "epoch": 0.9644179457317179, "grad_norm": 1.57430118765659, "learning_rate": 3.3173956075242536e-09, "loss": 0.2237, "step": 55482 }, { "epoch": 0.9644353282692207, "grad_norm": 1.3865455709687569, "learning_rate": 3.314159152148721e-09, "loss": 0.1729, "step": 55483 }, { "epoch": 0.9644527108067236, "grad_norm": 1.2301805377513972, "learning_rate": 3.3109242710391285e-09, "loss": 0.1111, "step": 55484 }, { "epoch": 0.9644700933442264, "grad_norm": 2.382593374219582, "learning_rate": 3.3076909642057447e-09, "loss": 0.1896, "step": 55485 }, { "epoch": 0.9644874758817292, "grad_norm": 1.1136518980596968, "learning_rate": 3.304459231658785e-09, "loss": 0.1299, "step": 55486 }, { "epoch": 0.9645048584192321, "grad_norm": 0.9976077906808163, "learning_rate": 3.3012290734085735e-09, "loss": 0.1989, "step": 55487 }, { "epoch": 0.9645222409567349, "grad_norm": 1.290302224743401, "learning_rate": 3.298000489465269e-09, "loss": 0.1153, "step": 55488 }, { "epoch": 0.9645396234942377, "grad_norm": 1.1301148675596964, "learning_rate": 3.2947734798391415e-09, "loss": 0.1576, "step": 55489 }, { "epoch": 0.9645570060317405, "grad_norm": 2.103990786291917, "learning_rate": 3.291548044540404e-09, "loss": 0.1302, "step": 55490 }, { "epoch": 0.9645743885692434, "grad_norm": 1.3762792138690314, "learning_rate": 3.288324183579383e-09, "loss": 0.1883, "step": 55491 }, { "epoch": 0.9645917711067462, "grad_norm": 1.2216284959259036, "learning_rate": 3.285101896966125e-09, "loss": 0.2019, "step": 55492 }, { "epoch": 0.964609153644249, "grad_norm": 1.6527868966967554, "learning_rate": 3.2818811847108996e-09, "loss": 0.1256, "step": 55493 }, { "epoch": 0.9646265361817518, "grad_norm": 1.830279620710588, "learning_rate": 3.278662046823977e-09, "loss": 0.1638, "step": 55494 }, { "epoch": 0.9646439187192546, "grad_norm": 1.3583692259454963, "learning_rate": 3.275444483315515e-09, "loss": 0.1231, "step": 55495 }, { "epoch": 0.9646613012567574, "grad_norm": 1.669711175885661, "learning_rate": 3.2722284941957277e-09, "loss": 0.1911, "step": 55496 }, { "epoch": 0.9646786837942602, "grad_norm": 1.6470246812983322, "learning_rate": 3.2690140794747747e-09, "loss": 0.2308, "step": 55497 }, { "epoch": 0.9646960663317631, "grad_norm": 1.482540280150223, "learning_rate": 3.2658012391628686e-09, "loss": 0.1687, "step": 55498 }, { "epoch": 0.9647134488692659, "grad_norm": 1.2456996113547285, "learning_rate": 3.2625899732702243e-09, "loss": 0.1435, "step": 55499 }, { "epoch": 0.9647308314067687, "grad_norm": 1.518109784683058, "learning_rate": 3.259380281806945e-09, "loss": 0.146, "step": 55500 }, { "epoch": 0.9647482139442716, "grad_norm": 2.3536303160090637, "learning_rate": 3.256172164783244e-09, "loss": 0.1491, "step": 55501 }, { "epoch": 0.9647655964817744, "grad_norm": 1.2021580777403338, "learning_rate": 3.2529656222092804e-09, "loss": 0.1036, "step": 55502 }, { "epoch": 0.9647829790192772, "grad_norm": 1.4326457203114082, "learning_rate": 3.249760654095268e-09, "loss": 0.1373, "step": 55503 }, { "epoch": 0.9648003615567801, "grad_norm": 1.0345087454533883, "learning_rate": 3.2465572604513658e-09, "loss": 0.1674, "step": 55504 }, { "epoch": 0.9648177440942829, "grad_norm": 0.965982810173225, "learning_rate": 3.2433554412876206e-09, "loss": 0.2225, "step": 55505 }, { "epoch": 0.9648351266317857, "grad_norm": 1.3304346727165168, "learning_rate": 3.240155196614247e-09, "loss": 0.1317, "step": 55506 }, { "epoch": 0.9648525091692886, "grad_norm": 0.9093057141836639, "learning_rate": 3.2369565264414033e-09, "loss": 0.2045, "step": 55507 }, { "epoch": 0.9648698917067914, "grad_norm": 1.1540150527755664, "learning_rate": 3.2337594307792483e-09, "loss": 0.1897, "step": 55508 }, { "epoch": 0.9648872742442942, "grad_norm": 1.592632989084664, "learning_rate": 3.230563909637829e-09, "loss": 0.1538, "step": 55509 }, { "epoch": 0.964904656781797, "grad_norm": 1.376016826204717, "learning_rate": 3.22736996302736e-09, "loss": 0.1607, "step": 55510 }, { "epoch": 0.9649220393192999, "grad_norm": 0.7964960361025429, "learning_rate": 3.224177590957944e-09, "loss": 0.1146, "step": 55511 }, { "epoch": 0.9649394218568027, "grad_norm": 1.989334364431376, "learning_rate": 3.2209867934396284e-09, "loss": 0.3149, "step": 55512 }, { "epoch": 0.9649568043943055, "grad_norm": 1.3828159944554608, "learning_rate": 3.217797570482572e-09, "loss": 0.1077, "step": 55513 }, { "epoch": 0.9649741869318083, "grad_norm": 1.0147126885804238, "learning_rate": 3.2146099220969334e-09, "loss": 0.1696, "step": 55514 }, { "epoch": 0.9649915694693111, "grad_norm": 1.383985035304862, "learning_rate": 3.211423848292816e-09, "loss": 0.1969, "step": 55515 }, { "epoch": 0.9650089520068139, "grad_norm": 1.720430960692507, "learning_rate": 3.2082393490802106e-09, "loss": 0.1334, "step": 55516 }, { "epoch": 0.9650263345443167, "grad_norm": 1.3246034152909862, "learning_rate": 3.2050564244693324e-09, "loss": 0.1566, "step": 55517 }, { "epoch": 0.9650437170818196, "grad_norm": 1.3291204238709697, "learning_rate": 3.201875074470173e-09, "loss": 0.1332, "step": 55518 }, { "epoch": 0.9650610996193224, "grad_norm": 0.7632763257649668, "learning_rate": 3.1986952990928905e-09, "loss": 0.1299, "step": 55519 }, { "epoch": 0.9650784821568252, "grad_norm": 1.090870856222343, "learning_rate": 3.195517098347533e-09, "loss": 0.1725, "step": 55520 }, { "epoch": 0.9650958646943281, "grad_norm": 1.9812339315740892, "learning_rate": 3.1923404722441484e-09, "loss": 0.2031, "step": 55521 }, { "epoch": 0.9651132472318309, "grad_norm": 1.2469151791689361, "learning_rate": 3.1891654207928385e-09, "loss": 0.2306, "step": 55522 }, { "epoch": 0.9651306297693337, "grad_norm": 1.171669954047335, "learning_rate": 3.185991944003652e-09, "loss": 0.1123, "step": 55523 }, { "epoch": 0.9651480123068366, "grad_norm": 1.0293203877747161, "learning_rate": 3.1828200418866358e-09, "loss": 0.2142, "step": 55524 }, { "epoch": 0.9651653948443394, "grad_norm": 1.4343128457655567, "learning_rate": 3.179649714451893e-09, "loss": 0.1096, "step": 55525 }, { "epoch": 0.9651827773818422, "grad_norm": 1.6832282343912297, "learning_rate": 3.1764809617094712e-09, "loss": 0.1475, "step": 55526 }, { "epoch": 0.965200159919345, "grad_norm": 1.739395921387635, "learning_rate": 3.173313783669307e-09, "loss": 0.1429, "step": 55527 }, { "epoch": 0.9652175424568479, "grad_norm": 1.1672045746616426, "learning_rate": 3.1701481803415586e-09, "loss": 0.1981, "step": 55528 }, { "epoch": 0.9652349249943507, "grad_norm": 1.1451742231685456, "learning_rate": 3.1669841517362184e-09, "loss": 0.141, "step": 55529 }, { "epoch": 0.9652523075318535, "grad_norm": 1.3045097858161048, "learning_rate": 3.163821697863334e-09, "loss": 0.1009, "step": 55530 }, { "epoch": 0.9652696900693564, "grad_norm": 2.1629779808646736, "learning_rate": 3.160660818732841e-09, "loss": 0.1606, "step": 55531 }, { "epoch": 0.9652870726068592, "grad_norm": 0.916690922223973, "learning_rate": 3.1575015143548435e-09, "loss": 0.1725, "step": 55532 }, { "epoch": 0.965304455144362, "grad_norm": 1.486327780579104, "learning_rate": 3.1543437847393884e-09, "loss": 0.1538, "step": 55533 }, { "epoch": 0.9653218376818647, "grad_norm": 0.9171599283304991, "learning_rate": 3.151187629896357e-09, "loss": 0.1994, "step": 55534 }, { "epoch": 0.9653392202193676, "grad_norm": 1.5582412558971819, "learning_rate": 3.148033049835852e-09, "loss": 0.1339, "step": 55535 }, { "epoch": 0.9653566027568704, "grad_norm": 1.4100185007630692, "learning_rate": 3.1448800445678656e-09, "loss": 0.1572, "step": 55536 }, { "epoch": 0.9653739852943732, "grad_norm": 1.3269264545309103, "learning_rate": 3.14172861410239e-09, "loss": 0.2152, "step": 55537 }, { "epoch": 0.9653913678318761, "grad_norm": 0.5362021390073166, "learning_rate": 3.138578758449362e-09, "loss": 0.1362, "step": 55538 }, { "epoch": 0.9654087503693789, "grad_norm": 2.1719533609104276, "learning_rate": 3.1354304776188277e-09, "loss": 0.1776, "step": 55539 }, { "epoch": 0.9654261329068817, "grad_norm": 1.1598261599266797, "learning_rate": 3.1322837716206696e-09, "loss": 0.1775, "step": 55540 }, { "epoch": 0.9654435154443846, "grad_norm": 1.215475618787658, "learning_rate": 3.12913864046499e-09, "loss": 0.1353, "step": 55541 }, { "epoch": 0.9654608979818874, "grad_norm": 1.9428077926388407, "learning_rate": 3.1259950841617253e-09, "loss": 0.1281, "step": 55542 }, { "epoch": 0.9654782805193902, "grad_norm": 1.241637922202991, "learning_rate": 3.122853102720757e-09, "loss": 0.1643, "step": 55543 }, { "epoch": 0.965495663056893, "grad_norm": 1.311594314233765, "learning_rate": 3.1197126961521326e-09, "loss": 0.1414, "step": 55544 }, { "epoch": 0.9655130455943959, "grad_norm": 1.2619076736471413, "learning_rate": 3.1165738644657326e-09, "loss": 0.1316, "step": 55545 }, { "epoch": 0.9655304281318987, "grad_norm": 1.0882473994886297, "learning_rate": 3.113436607671549e-09, "loss": 0.1706, "step": 55546 }, { "epoch": 0.9655478106694015, "grad_norm": 2.1174638829762595, "learning_rate": 3.110300925779574e-09, "loss": 0.1227, "step": 55547 }, { "epoch": 0.9655651932069044, "grad_norm": 1.2316212493364687, "learning_rate": 3.1071668187996336e-09, "loss": 0.193, "step": 55548 }, { "epoch": 0.9655825757444072, "grad_norm": 1.5199812958839516, "learning_rate": 3.1040342867417744e-09, "loss": 0.1499, "step": 55549 }, { "epoch": 0.96559995828191, "grad_norm": 2.3721837751993786, "learning_rate": 3.1009033296158226e-09, "loss": 0.2996, "step": 55550 }, { "epoch": 0.9656173408194129, "grad_norm": 1.5055953933235517, "learning_rate": 3.09777394743177e-09, "loss": 0.1546, "step": 55551 }, { "epoch": 0.9656347233569157, "grad_norm": 1.5324926088749424, "learning_rate": 3.0946461401994974e-09, "loss": 0.1147, "step": 55552 }, { "epoch": 0.9656521058944185, "grad_norm": 1.8434816074022593, "learning_rate": 3.091519907928941e-09, "loss": 0.19, "step": 55553 }, { "epoch": 0.9656694884319212, "grad_norm": 2.112311796520525, "learning_rate": 3.088395250630038e-09, "loss": 0.173, "step": 55554 }, { "epoch": 0.9656868709694241, "grad_norm": 1.950940532980164, "learning_rate": 3.0852721683126136e-09, "loss": 0.1918, "step": 55555 }, { "epoch": 0.9657042535069269, "grad_norm": 1.681149925700116, "learning_rate": 3.08215066098666e-09, "loss": 0.1457, "step": 55556 }, { "epoch": 0.9657216360444297, "grad_norm": 1.6098553197317038, "learning_rate": 3.0790307286620023e-09, "loss": 0.1357, "step": 55557 }, { "epoch": 0.9657390185819326, "grad_norm": 1.4915526038842197, "learning_rate": 3.0759123713485215e-09, "loss": 0.2061, "step": 55558 }, { "epoch": 0.9657564011194354, "grad_norm": 1.241138445691002, "learning_rate": 3.0727955890561542e-09, "loss": 0.1453, "step": 55559 }, { "epoch": 0.9657737836569382, "grad_norm": 2.1015556118513063, "learning_rate": 3.069680381794726e-09, "loss": 0.2543, "step": 55560 }, { "epoch": 0.965791166194441, "grad_norm": 1.284644886087411, "learning_rate": 3.0665667495742285e-09, "loss": 0.1902, "step": 55561 }, { "epoch": 0.9658085487319439, "grad_norm": 1.3950653577468455, "learning_rate": 3.063454692404377e-09, "loss": 0.1576, "step": 55562 }, { "epoch": 0.9658259312694467, "grad_norm": 1.2637966486337175, "learning_rate": 3.0603442102950517e-09, "loss": 0.1379, "step": 55563 }, { "epoch": 0.9658433138069495, "grad_norm": 3.128006314428159, "learning_rate": 3.0572353032562447e-09, "loss": 0.1482, "step": 55564 }, { "epoch": 0.9658606963444524, "grad_norm": 1.4839465435879102, "learning_rate": 3.054127971297671e-09, "loss": 0.1694, "step": 55565 }, { "epoch": 0.9658780788819552, "grad_norm": 1.1770405296316326, "learning_rate": 3.051022214429266e-09, "loss": 0.1449, "step": 55566 }, { "epoch": 0.965895461419458, "grad_norm": 2.084310287612511, "learning_rate": 3.047918032660857e-09, "loss": 0.1887, "step": 55567 }, { "epoch": 0.9659128439569609, "grad_norm": 0.8915834812322445, "learning_rate": 3.044815426002212e-09, "loss": 0.1681, "step": 55568 }, { "epoch": 0.9659302264944637, "grad_norm": 0.7578930968466595, "learning_rate": 3.0417143944632682e-09, "loss": 0.1712, "step": 55569 }, { "epoch": 0.9659476090319665, "grad_norm": 2.3719391630907216, "learning_rate": 3.038614938053796e-09, "loss": 0.1857, "step": 55570 }, { "epoch": 0.9659649915694694, "grad_norm": 1.6430579984694946, "learning_rate": 3.0355170567836207e-09, "loss": 0.1495, "step": 55571 }, { "epoch": 0.9659823741069722, "grad_norm": 1.5125308090030238, "learning_rate": 3.0324207506625675e-09, "loss": 0.145, "step": 55572 }, { "epoch": 0.9659997566444749, "grad_norm": 1.6226966832418386, "learning_rate": 3.029326019700462e-09, "loss": 0.2306, "step": 55573 }, { "epoch": 0.9660171391819777, "grad_norm": 1.8727237212092624, "learning_rate": 3.0262328639070744e-09, "loss": 0.2, "step": 55574 }, { "epoch": 0.9660345217194806, "grad_norm": 0.9785998818798767, "learning_rate": 3.0231412832922854e-09, "loss": 0.2551, "step": 55575 }, { "epoch": 0.9660519042569834, "grad_norm": 2.0644011450540294, "learning_rate": 3.0200512778658095e-09, "loss": 0.1644, "step": 55576 }, { "epoch": 0.9660692867944862, "grad_norm": 1.4388729774498255, "learning_rate": 3.016962847637472e-09, "loss": 0.1061, "step": 55577 }, { "epoch": 0.9660866693319891, "grad_norm": 1.684214079149119, "learning_rate": 3.0138759926170986e-09, "loss": 0.1242, "step": 55578 }, { "epoch": 0.9661040518694919, "grad_norm": 1.9763228377906632, "learning_rate": 3.010790712814404e-09, "loss": 0.1337, "step": 55579 }, { "epoch": 0.9661214344069947, "grad_norm": 1.582255837007414, "learning_rate": 3.0077070082392686e-09, "loss": 0.1576, "step": 55580 }, { "epoch": 0.9661388169444975, "grad_norm": 1.6800877440902788, "learning_rate": 3.0046248789013517e-09, "loss": 0.1734, "step": 55581 }, { "epoch": 0.9661561994820004, "grad_norm": 1.347312026149992, "learning_rate": 3.001544324810479e-09, "loss": 0.1405, "step": 55582 }, { "epoch": 0.9661735820195032, "grad_norm": 1.5514480366502361, "learning_rate": 2.9984653459763644e-09, "loss": 0.1774, "step": 55583 }, { "epoch": 0.966190964557006, "grad_norm": 0.7693306772810776, "learning_rate": 2.995387942408889e-09, "loss": 0.0923, "step": 55584 }, { "epoch": 0.9662083470945089, "grad_norm": 1.3749364432644444, "learning_rate": 2.9923121141176568e-09, "loss": 0.1507, "step": 55585 }, { "epoch": 0.9662257296320117, "grad_norm": 1.3741621588850945, "learning_rate": 2.989237861112548e-09, "loss": 0.2119, "step": 55586 }, { "epoch": 0.9662431121695145, "grad_norm": 1.5673404870029684, "learning_rate": 2.986165183403222e-09, "loss": 0.2204, "step": 55587 }, { "epoch": 0.9662604947070174, "grad_norm": 1.1799490459619417, "learning_rate": 2.9830940809994487e-09, "loss": 0.0876, "step": 55588 }, { "epoch": 0.9662778772445202, "grad_norm": 1.0541952176130218, "learning_rate": 2.9800245539109426e-09, "loss": 0.117, "step": 55589 }, { "epoch": 0.966295259782023, "grad_norm": 1.2896348906132078, "learning_rate": 2.9769566021474736e-09, "loss": 0.1696, "step": 55590 }, { "epoch": 0.9663126423195258, "grad_norm": 1.350244676980783, "learning_rate": 2.9738902257187004e-09, "loss": 0.1039, "step": 55591 }, { "epoch": 0.9663300248570287, "grad_norm": 1.0302261899588547, "learning_rate": 2.970825424634449e-09, "loss": 0.1468, "step": 55592 }, { "epoch": 0.9663474073945314, "grad_norm": 0.9100650657056841, "learning_rate": 2.9677621989043223e-09, "loss": 0.157, "step": 55593 }, { "epoch": 0.9663647899320342, "grad_norm": 1.627903458966246, "learning_rate": 2.964700548538035e-09, "loss": 0.1471, "step": 55594 }, { "epoch": 0.9663821724695371, "grad_norm": 1.201910597137098, "learning_rate": 2.9616404735453573e-09, "loss": 0.1357, "step": 55595 }, { "epoch": 0.9663995550070399, "grad_norm": 1.5096335245422376, "learning_rate": 2.9585819739359473e-09, "loss": 0.1907, "step": 55596 }, { "epoch": 0.9664169375445427, "grad_norm": 1.1270311632383627, "learning_rate": 2.955525049719576e-09, "loss": 0.1242, "step": 55597 }, { "epoch": 0.9664343200820456, "grad_norm": 1.4530059812077374, "learning_rate": 2.9524697009057906e-09, "loss": 0.1396, "step": 55598 }, { "epoch": 0.9664517026195484, "grad_norm": 1.2597498961981248, "learning_rate": 2.9494159275043616e-09, "loss": 0.11, "step": 55599 }, { "epoch": 0.9664690851570512, "grad_norm": 1.665058402780721, "learning_rate": 2.946363729525003e-09, "loss": 0.1193, "step": 55600 }, { "epoch": 0.966486467694554, "grad_norm": 1.2671361518704394, "learning_rate": 2.9433131069773185e-09, "loss": 0.1642, "step": 55601 }, { "epoch": 0.9665038502320569, "grad_norm": 1.600331381344051, "learning_rate": 2.940264059870967e-09, "loss": 0.2304, "step": 55602 }, { "epoch": 0.9665212327695597, "grad_norm": 1.1188733066789438, "learning_rate": 2.937216588215663e-09, "loss": 0.2231, "step": 55603 }, { "epoch": 0.9665386153070625, "grad_norm": 0.9084469866232184, "learning_rate": 2.9341706920210653e-09, "loss": 0.1776, "step": 55604 }, { "epoch": 0.9665559978445654, "grad_norm": 1.4839191778463305, "learning_rate": 2.9311263712968324e-09, "loss": 0.1807, "step": 55605 }, { "epoch": 0.9665733803820682, "grad_norm": 2.446399954735675, "learning_rate": 2.9280836260525687e-09, "loss": 0.1853, "step": 55606 }, { "epoch": 0.966590762919571, "grad_norm": 1.973521230296719, "learning_rate": 2.925042456297988e-09, "loss": 0.1852, "step": 55607 }, { "epoch": 0.9666081454570739, "grad_norm": 1.5459336520826041, "learning_rate": 2.9220028620426385e-09, "loss": 0.1236, "step": 55608 }, { "epoch": 0.9666255279945767, "grad_norm": 0.7892527207110451, "learning_rate": 2.9189648432961787e-09, "loss": 0.1877, "step": 55609 }, { "epoch": 0.9666429105320795, "grad_norm": 1.8176907679354848, "learning_rate": 2.9159284000682682e-09, "loss": 0.186, "step": 55610 }, { "epoch": 0.9666602930695823, "grad_norm": 1.1218079652140507, "learning_rate": 2.9128935323685656e-09, "loss": 0.1717, "step": 55611 }, { "epoch": 0.9666776756070852, "grad_norm": 2.291165829734244, "learning_rate": 2.909860240206674e-09, "loss": 0.2248, "step": 55612 }, { "epoch": 0.9666950581445879, "grad_norm": 1.245582798628194, "learning_rate": 2.9068285235920864e-09, "loss": 0.1791, "step": 55613 }, { "epoch": 0.9667124406820907, "grad_norm": 3.0950600557775347, "learning_rate": 2.9037983825345725e-09, "loss": 0.2393, "step": 55614 }, { "epoch": 0.9667298232195936, "grad_norm": 1.603629492520495, "learning_rate": 2.9007698170436244e-09, "loss": 0.2028, "step": 55615 }, { "epoch": 0.9667472057570964, "grad_norm": 1.9668490818527369, "learning_rate": 2.8977428271289574e-09, "loss": 0.2044, "step": 55616 }, { "epoch": 0.9667645882945992, "grad_norm": 1.4019367200485713, "learning_rate": 2.8947174128000076e-09, "loss": 0.1674, "step": 55617 }, { "epoch": 0.966781970832102, "grad_norm": 0.9723623021852432, "learning_rate": 2.8916935740665447e-09, "loss": 0.1523, "step": 55618 }, { "epoch": 0.9667993533696049, "grad_norm": 1.0231918355429197, "learning_rate": 2.8886713109380066e-09, "loss": 0.1839, "step": 55619 }, { "epoch": 0.9668167359071077, "grad_norm": 0.8017843550418036, "learning_rate": 2.8856506234239964e-09, "loss": 0.1144, "step": 55620 }, { "epoch": 0.9668341184446105, "grad_norm": 1.2624053774205501, "learning_rate": 2.8826315115341726e-09, "loss": 0.236, "step": 55621 }, { "epoch": 0.9668515009821134, "grad_norm": 0.9819125445506027, "learning_rate": 2.879613975278028e-09, "loss": 0.1374, "step": 55622 }, { "epoch": 0.9668688835196162, "grad_norm": 1.6180318511294471, "learning_rate": 2.8765980146651657e-09, "loss": 0.1786, "step": 55623 }, { "epoch": 0.966886266057119, "grad_norm": 1.0898089027297182, "learning_rate": 2.8735836297051343e-09, "loss": 0.1452, "step": 55624 }, { "epoch": 0.9669036485946219, "grad_norm": 1.350772601654464, "learning_rate": 2.870570820407425e-09, "loss": 0.0918, "step": 55625 }, { "epoch": 0.9669210311321247, "grad_norm": 0.5394516161012416, "learning_rate": 2.8675595867816983e-09, "loss": 0.1415, "step": 55626 }, { "epoch": 0.9669384136696275, "grad_norm": 1.453443922974417, "learning_rate": 2.8645499288373897e-09, "loss": 0.2234, "step": 55627 }, { "epoch": 0.9669557962071303, "grad_norm": 1.4279962254917058, "learning_rate": 2.861541846584159e-09, "loss": 0.1776, "step": 55628 }, { "epoch": 0.9669731787446332, "grad_norm": 1.541425562426657, "learning_rate": 2.8585353400314426e-09, "loss": 0.1611, "step": 55629 }, { "epoch": 0.966990561282136, "grad_norm": 1.8138681034266202, "learning_rate": 2.855530409188789e-09, "loss": 0.1479, "step": 55630 }, { "epoch": 0.9670079438196388, "grad_norm": 1.7737464781052525, "learning_rate": 2.8525270540657455e-09, "loss": 0.17, "step": 55631 }, { "epoch": 0.9670253263571417, "grad_norm": 1.4737944208051375, "learning_rate": 2.8495252746718603e-09, "loss": 0.1978, "step": 55632 }, { "epoch": 0.9670427088946444, "grad_norm": 1.1186057843110515, "learning_rate": 2.8465250710165147e-09, "loss": 0.2288, "step": 55633 }, { "epoch": 0.9670600914321472, "grad_norm": 1.1228360409374394, "learning_rate": 2.8435264431093676e-09, "loss": 0.2006, "step": 55634 }, { "epoch": 0.96707747396965, "grad_norm": 1.5458148983911164, "learning_rate": 2.8405293909599115e-09, "loss": 0.1457, "step": 55635 }, { "epoch": 0.9670948565071529, "grad_norm": 1.9550396175368607, "learning_rate": 2.837533914577528e-09, "loss": 0.1941, "step": 55636 }, { "epoch": 0.9671122390446557, "grad_norm": 1.3088319541382278, "learning_rate": 2.83454001397182e-09, "loss": 0.2208, "step": 55637 }, { "epoch": 0.9671296215821585, "grad_norm": 1.1710739106090768, "learning_rate": 2.831547689152225e-09, "loss": 0.1829, "step": 55638 }, { "epoch": 0.9671470041196614, "grad_norm": 1.0872150650900638, "learning_rate": 2.828556940128235e-09, "loss": 0.1546, "step": 55639 }, { "epoch": 0.9671643866571642, "grad_norm": 1.69433246164659, "learning_rate": 2.825567766909398e-09, "loss": 0.1763, "step": 55640 }, { "epoch": 0.967181769194667, "grad_norm": 1.0665784695234692, "learning_rate": 2.82258016950504e-09, "loss": 0.1459, "step": 55641 }, { "epoch": 0.9671991517321699, "grad_norm": 1.4042221912281119, "learning_rate": 2.819594147924764e-09, "loss": 0.1926, "step": 55642 }, { "epoch": 0.9672165342696727, "grad_norm": 2.3417413427901104, "learning_rate": 2.8166097021780077e-09, "loss": 0.0878, "step": 55643 }, { "epoch": 0.9672339168071755, "grad_norm": 1.6234957643557477, "learning_rate": 2.813626832274152e-09, "loss": 0.1531, "step": 55644 }, { "epoch": 0.9672512993446784, "grad_norm": 1.2876137464632997, "learning_rate": 2.8106455382227445e-09, "loss": 0.1272, "step": 55645 }, { "epoch": 0.9672686818821812, "grad_norm": 0.9582607831304687, "learning_rate": 2.8076658200332225e-09, "loss": 0.154, "step": 55646 }, { "epoch": 0.967286064419684, "grad_norm": 2.139060488358018, "learning_rate": 2.804687677715023e-09, "loss": 0.1424, "step": 55647 }, { "epoch": 0.9673034469571868, "grad_norm": 1.3526634587361088, "learning_rate": 2.801711111277527e-09, "loss": 0.2272, "step": 55648 }, { "epoch": 0.9673208294946897, "grad_norm": 2.147694780957241, "learning_rate": 2.7987361207302275e-09, "loss": 0.2433, "step": 55649 }, { "epoch": 0.9673382120321925, "grad_norm": 1.0881638557268456, "learning_rate": 2.795762706082505e-09, "loss": 0.1384, "step": 55650 }, { "epoch": 0.9673555945696953, "grad_norm": 1.125495225848565, "learning_rate": 2.7927908673438525e-09, "loss": 0.1816, "step": 55651 }, { "epoch": 0.9673729771071982, "grad_norm": 1.0475745047522365, "learning_rate": 2.789820604523652e-09, "loss": 0.1491, "step": 55652 }, { "epoch": 0.9673903596447009, "grad_norm": 1.3777011181511294, "learning_rate": 2.7868519176313386e-09, "loss": 0.1292, "step": 55653 }, { "epoch": 0.9674077421822037, "grad_norm": 5.971539613974263, "learning_rate": 2.78388480667624e-09, "loss": 0.2089, "step": 55654 }, { "epoch": 0.9674251247197065, "grad_norm": 3.012255945610313, "learning_rate": 2.7809192716679032e-09, "loss": 0.1946, "step": 55655 }, { "epoch": 0.9674425072572094, "grad_norm": 2.226024323608243, "learning_rate": 2.7779553126155986e-09, "loss": 0.2315, "step": 55656 }, { "epoch": 0.9674598897947122, "grad_norm": 0.7355700077485651, "learning_rate": 2.7749929295287632e-09, "loss": 0.1102, "step": 55657 }, { "epoch": 0.967477272332215, "grad_norm": 1.321779449830849, "learning_rate": 2.7720321224168343e-09, "loss": 0.23, "step": 55658 }, { "epoch": 0.9674946548697179, "grad_norm": 1.4458555304099445, "learning_rate": 2.769072891289137e-09, "loss": 0.129, "step": 55659 }, { "epoch": 0.9675120374072207, "grad_norm": 1.3354329204115316, "learning_rate": 2.7661152361550534e-09, "loss": 0.4243, "step": 55660 }, { "epoch": 0.9675294199447235, "grad_norm": 2.5858804915908626, "learning_rate": 2.7631591570239644e-09, "loss": 0.141, "step": 55661 }, { "epoch": 0.9675468024822264, "grad_norm": 2.265684668436897, "learning_rate": 2.760204653905307e-09, "loss": 0.2969, "step": 55662 }, { "epoch": 0.9675641850197292, "grad_norm": 1.6757712320761236, "learning_rate": 2.7572517268083515e-09, "loss": 0.2369, "step": 55663 }, { "epoch": 0.967581567557232, "grad_norm": 0.9341214320785945, "learning_rate": 2.75430037574248e-09, "loss": 0.1251, "step": 55664 }, { "epoch": 0.9675989500947348, "grad_norm": 0.9708947089976555, "learning_rate": 2.7513506007170727e-09, "loss": 0.1432, "step": 55665 }, { "epoch": 0.9676163326322377, "grad_norm": 1.3186024431160748, "learning_rate": 2.748402401741512e-09, "loss": 0.1328, "step": 55666 }, { "epoch": 0.9676337151697405, "grad_norm": 1.1476514251047283, "learning_rate": 2.7454557788250677e-09, "loss": 0.1914, "step": 55667 }, { "epoch": 0.9676510977072433, "grad_norm": 1.4348638369008668, "learning_rate": 2.742510731977066e-09, "loss": 0.2117, "step": 55668 }, { "epoch": 0.9676684802447462, "grad_norm": 1.864328213776326, "learning_rate": 2.739567261206943e-09, "loss": 0.1621, "step": 55669 }, { "epoch": 0.967685862782249, "grad_norm": 0.9337289102008374, "learning_rate": 2.7366253665239147e-09, "loss": 0.1929, "step": 55670 }, { "epoch": 0.9677032453197518, "grad_norm": 1.0168042000128479, "learning_rate": 2.7336850479374173e-09, "loss": 0.1753, "step": 55671 }, { "epoch": 0.9677206278572547, "grad_norm": 6.38985231071473, "learning_rate": 2.7307463054566106e-09, "loss": 0.167, "step": 55672 }, { "epoch": 0.9677380103947574, "grad_norm": 1.484014668055803, "learning_rate": 2.7278091390909862e-09, "loss": 0.1852, "step": 55673 }, { "epoch": 0.9677553929322602, "grad_norm": 1.1542542318464342, "learning_rate": 2.7248735488498155e-09, "loss": 0.1916, "step": 55674 }, { "epoch": 0.967772775469763, "grad_norm": 4.163083955620376, "learning_rate": 2.7219395347423125e-09, "loss": 0.1781, "step": 55675 }, { "epoch": 0.9677901580072659, "grad_norm": 1.339658152743644, "learning_rate": 2.719007096777859e-09, "loss": 0.2073, "step": 55676 }, { "epoch": 0.9678075405447687, "grad_norm": 0.8827723984925258, "learning_rate": 2.71607623496567e-09, "loss": 0.129, "step": 55677 }, { "epoch": 0.9678249230822715, "grad_norm": 0.9141190834246019, "learning_rate": 2.713146949315126e-09, "loss": 0.2374, "step": 55678 }, { "epoch": 0.9678423056197744, "grad_norm": 1.1119230054449278, "learning_rate": 2.7102192398354983e-09, "loss": 0.1133, "step": 55679 }, { "epoch": 0.9678596881572772, "grad_norm": 1.7588132277387682, "learning_rate": 2.707293106535946e-09, "loss": 0.2155, "step": 55680 }, { "epoch": 0.96787707069478, "grad_norm": 1.0308128898807627, "learning_rate": 2.7043685494259617e-09, "loss": 0.2326, "step": 55681 }, { "epoch": 0.9678944532322828, "grad_norm": 1.8180424600976715, "learning_rate": 2.7014455685145932e-09, "loss": 0.2529, "step": 55682 }, { "epoch": 0.9679118357697857, "grad_norm": 1.5293511994317366, "learning_rate": 2.698524163811222e-09, "loss": 0.1682, "step": 55683 }, { "epoch": 0.9679292183072885, "grad_norm": 1.894086633294206, "learning_rate": 2.695604335325119e-09, "loss": 0.1826, "step": 55684 }, { "epoch": 0.9679466008447913, "grad_norm": 1.3689354468604142, "learning_rate": 2.6926860830654983e-09, "loss": 0.2266, "step": 55685 }, { "epoch": 0.9679639833822942, "grad_norm": 1.1167160595397987, "learning_rate": 2.6897694070415757e-09, "loss": 0.1216, "step": 55686 }, { "epoch": 0.967981365919797, "grad_norm": 1.0534324223300897, "learning_rate": 2.6868543072626758e-09, "loss": 0.1281, "step": 55687 }, { "epoch": 0.9679987484572998, "grad_norm": 1.2444575556384347, "learning_rate": 2.6839407837380146e-09, "loss": 0.1614, "step": 55688 }, { "epoch": 0.9680161309948027, "grad_norm": 1.3583179078572027, "learning_rate": 2.681028836476806e-09, "loss": 0.1754, "step": 55689 }, { "epoch": 0.9680335135323055, "grad_norm": 1.259926984074263, "learning_rate": 2.678118465488266e-09, "loss": 0.1213, "step": 55690 }, { "epoch": 0.9680508960698083, "grad_norm": 2.25000461370447, "learning_rate": 2.6752096707816086e-09, "loss": 0.1428, "step": 55691 }, { "epoch": 0.9680682786073112, "grad_norm": 1.5592375337748103, "learning_rate": 2.6723024523661596e-09, "loss": 0.2483, "step": 55692 }, { "epoch": 0.9680856611448139, "grad_norm": 0.9487433509895129, "learning_rate": 2.6693968102510232e-09, "loss": 0.2394, "step": 55693 }, { "epoch": 0.9681030436823167, "grad_norm": 0.9332946057773135, "learning_rate": 2.6664927444454698e-09, "loss": 0.3881, "step": 55694 }, { "epoch": 0.9681204262198195, "grad_norm": 1.2934257812742578, "learning_rate": 2.6635902549586032e-09, "loss": 0.0971, "step": 55695 }, { "epoch": 0.9681378087573224, "grad_norm": 1.0265041996233202, "learning_rate": 2.6606893417997488e-09, "loss": 0.1434, "step": 55696 }, { "epoch": 0.9681551912948252, "grad_norm": 0.8017589621759454, "learning_rate": 2.6577900049780666e-09, "loss": 0.158, "step": 55697 }, { "epoch": 0.968172573832328, "grad_norm": 1.5944262604926691, "learning_rate": 2.6548922445027157e-09, "loss": 0.1938, "step": 55698 }, { "epoch": 0.9681899563698309, "grad_norm": 2.091244251973571, "learning_rate": 2.651996060382855e-09, "loss": 0.147, "step": 55699 }, { "epoch": 0.9682073389073337, "grad_norm": 0.9607000397644211, "learning_rate": 2.6491014526277555e-09, "loss": 0.1524, "step": 55700 }, { "epoch": 0.9682247214448365, "grad_norm": 1.1787626625413439, "learning_rate": 2.6462084212465208e-09, "loss": 0.1206, "step": 55701 }, { "epoch": 0.9682421039823393, "grad_norm": 0.9998893871416994, "learning_rate": 2.6433169662483656e-09, "loss": 0.1307, "step": 55702 }, { "epoch": 0.9682594865198422, "grad_norm": 1.3471755699877574, "learning_rate": 2.640427087642394e-09, "loss": 0.2362, "step": 55703 }, { "epoch": 0.968276869057345, "grad_norm": 0.9986707560504776, "learning_rate": 2.6375387854378207e-09, "loss": 0.1593, "step": 55704 }, { "epoch": 0.9682942515948478, "grad_norm": 1.3960932201309173, "learning_rate": 2.6346520596437493e-09, "loss": 0.1836, "step": 55705 }, { "epoch": 0.9683116341323507, "grad_norm": 1.9631236651414714, "learning_rate": 2.631766910269395e-09, "loss": 0.1781, "step": 55706 }, { "epoch": 0.9683290166698535, "grad_norm": 0.9573748427532551, "learning_rate": 2.6288833373238616e-09, "loss": 0.1709, "step": 55707 }, { "epoch": 0.9683463992073563, "grad_norm": 0.8005058878132598, "learning_rate": 2.6260013408162527e-09, "loss": 0.1418, "step": 55708 }, { "epoch": 0.9683637817448592, "grad_norm": 1.6483909266238232, "learning_rate": 2.6231209207557837e-09, "loss": 0.1364, "step": 55709 }, { "epoch": 0.968381164282362, "grad_norm": 1.936498050905517, "learning_rate": 2.6202420771515574e-09, "loss": 0.1478, "step": 55710 }, { "epoch": 0.9683985468198648, "grad_norm": 1.625780916487914, "learning_rate": 2.617364810012679e-09, "loss": 0.1694, "step": 55711 }, { "epoch": 0.9684159293573675, "grad_norm": 1.767373560464085, "learning_rate": 2.614489119348251e-09, "loss": 0.1965, "step": 55712 }, { "epoch": 0.9684333118948704, "grad_norm": 1.7523397030092707, "learning_rate": 2.6116150051674335e-09, "loss": 0.1733, "step": 55713 }, { "epoch": 0.9684506944323732, "grad_norm": 1.2467151198046131, "learning_rate": 2.6087424674792745e-09, "loss": 0.3124, "step": 55714 }, { "epoch": 0.968468076969876, "grad_norm": 2.006794149390412, "learning_rate": 2.6058715062929894e-09, "loss": 0.1804, "step": 55715 }, { "epoch": 0.9684854595073789, "grad_norm": 0.8698244571793317, "learning_rate": 2.60300212161757e-09, "loss": 0.1148, "step": 55716 }, { "epoch": 0.9685028420448817, "grad_norm": 1.5943234900365644, "learning_rate": 2.600134313462121e-09, "loss": 0.307, "step": 55717 }, { "epoch": 0.9685202245823845, "grad_norm": 1.0606115291814429, "learning_rate": 2.597268081835802e-09, "loss": 0.2168, "step": 55718 }, { "epoch": 0.9685376071198873, "grad_norm": 1.9842880723618939, "learning_rate": 2.59440342674766e-09, "loss": 0.1612, "step": 55719 }, { "epoch": 0.9685549896573902, "grad_norm": 2.240778166022638, "learning_rate": 2.5915403482067444e-09, "loss": 0.223, "step": 55720 }, { "epoch": 0.968572372194893, "grad_norm": 0.9296764670720873, "learning_rate": 2.588678846222159e-09, "loss": 0.2531, "step": 55721 }, { "epoch": 0.9685897547323958, "grad_norm": 1.5200752341097958, "learning_rate": 2.5858189208029515e-09, "loss": 0.1946, "step": 55722 }, { "epoch": 0.9686071372698987, "grad_norm": 1.3784272083728675, "learning_rate": 2.582960571958226e-09, "loss": 0.1427, "step": 55723 }, { "epoch": 0.9686245198074015, "grad_norm": 1.1857098333766734, "learning_rate": 2.580103799697031e-09, "loss": 0.1826, "step": 55724 }, { "epoch": 0.9686419023449043, "grad_norm": 0.9973174886321423, "learning_rate": 2.577248604028359e-09, "loss": 0.1088, "step": 55725 }, { "epoch": 0.9686592848824072, "grad_norm": 1.0627793550532292, "learning_rate": 2.57439498496137e-09, "loss": 0.1768, "step": 55726 }, { "epoch": 0.96867666741991, "grad_norm": 2.139973612858348, "learning_rate": 2.571542942505056e-09, "loss": 0.1601, "step": 55727 }, { "epoch": 0.9686940499574128, "grad_norm": 1.0682403872531423, "learning_rate": 2.56869247666841e-09, "loss": 0.2463, "step": 55728 }, { "epoch": 0.9687114324949156, "grad_norm": 1.229571065815558, "learning_rate": 2.5658435874605365e-09, "loss": 0.2081, "step": 55729 }, { "epoch": 0.9687288150324185, "grad_norm": 1.3669824945871771, "learning_rate": 2.562996274890372e-09, "loss": 0.1997, "step": 55730 }, { "epoch": 0.9687461975699213, "grad_norm": 1.268496869953551, "learning_rate": 2.560150538967132e-09, "loss": 0.2072, "step": 55731 }, { "epoch": 0.968763580107424, "grad_norm": 1.1715761353945786, "learning_rate": 2.5573063796995863e-09, "loss": 0.1384, "step": 55732 }, { "epoch": 0.9687809626449269, "grad_norm": 0.9784868968229936, "learning_rate": 2.5544637970969508e-09, "loss": 0.1485, "step": 55733 }, { "epoch": 0.9687983451824297, "grad_norm": 1.2759723544805488, "learning_rate": 2.5516227911681066e-09, "loss": 0.1912, "step": 55734 }, { "epoch": 0.9688157277199325, "grad_norm": 1.0285767151791347, "learning_rate": 2.5487833619221577e-09, "loss": 0.1749, "step": 55735 }, { "epoch": 0.9688331102574353, "grad_norm": 1.1497334074291907, "learning_rate": 2.545945509367986e-09, "loss": 0.1321, "step": 55736 }, { "epoch": 0.9688504927949382, "grad_norm": 1.7894400180187722, "learning_rate": 2.5431092335146954e-09, "loss": 0.1066, "step": 55737 }, { "epoch": 0.968867875332441, "grad_norm": 4.421664952943615, "learning_rate": 2.5402745343712782e-09, "loss": 0.3038, "step": 55738 }, { "epoch": 0.9688852578699438, "grad_norm": 0.9330783740862824, "learning_rate": 2.537441411946617e-09, "loss": 0.1898, "step": 55739 }, { "epoch": 0.9689026404074467, "grad_norm": 1.2735114384615707, "learning_rate": 2.5346098662497594e-09, "loss": 0.3905, "step": 55740 }, { "epoch": 0.9689200229449495, "grad_norm": 1.6512463502359405, "learning_rate": 2.5317798972896986e-09, "loss": 0.1465, "step": 55741 }, { "epoch": 0.9689374054824523, "grad_norm": 1.1098356572422814, "learning_rate": 2.5289515050753165e-09, "loss": 0.1964, "step": 55742 }, { "epoch": 0.9689547880199552, "grad_norm": 0.5107332446636821, "learning_rate": 2.5261246896157163e-09, "loss": 0.1096, "step": 55743 }, { "epoch": 0.968972170557458, "grad_norm": 1.419489847004262, "learning_rate": 2.52329945091978e-09, "loss": 0.161, "step": 55744 }, { "epoch": 0.9689895530949608, "grad_norm": 2.5870107326706115, "learning_rate": 2.52047578899639e-09, "loss": 0.1321, "step": 55745 }, { "epoch": 0.9690069356324637, "grad_norm": 1.0278752260509647, "learning_rate": 2.517653703854594e-09, "loss": 0.1933, "step": 55746 }, { "epoch": 0.9690243181699665, "grad_norm": 0.9280333177914439, "learning_rate": 2.51483319550333e-09, "loss": 0.1171, "step": 55747 }, { "epoch": 0.9690417007074693, "grad_norm": 1.9529322767361876, "learning_rate": 2.512014263951534e-09, "loss": 0.227, "step": 55748 }, { "epoch": 0.9690590832449721, "grad_norm": 1.6973075325049949, "learning_rate": 2.5091969092080888e-09, "loss": 0.1877, "step": 55749 }, { "epoch": 0.969076465782475, "grad_norm": 2.8357184308771264, "learning_rate": 2.5063811312819873e-09, "loss": 0.2229, "step": 55750 }, { "epoch": 0.9690938483199778, "grad_norm": 1.5436803908491723, "learning_rate": 2.5035669301821105e-09, "loss": 0.22, "step": 55751 }, { "epoch": 0.9691112308574805, "grad_norm": 1.9000797242617302, "learning_rate": 2.5007543059173964e-09, "loss": 0.1804, "step": 55752 }, { "epoch": 0.9691286133949834, "grad_norm": 2.144855966880056, "learning_rate": 2.497943258496782e-09, "loss": 0.1769, "step": 55753 }, { "epoch": 0.9691459959324862, "grad_norm": 1.4505391252388027, "learning_rate": 2.4951337879290933e-09, "loss": 0.1991, "step": 55754 }, { "epoch": 0.969163378469989, "grad_norm": 2.2658553782443147, "learning_rate": 2.4923258942233794e-09, "loss": 0.2848, "step": 55755 }, { "epoch": 0.9691807610074918, "grad_norm": 1.3087719769284312, "learning_rate": 2.48951957738841e-09, "loss": 0.18, "step": 55756 }, { "epoch": 0.9691981435449947, "grad_norm": 1.3199928866774766, "learning_rate": 2.4867148374331238e-09, "loss": 0.1506, "step": 55757 }, { "epoch": 0.9692155260824975, "grad_norm": 1.2731524027154137, "learning_rate": 2.4839116743664012e-09, "loss": 0.1594, "step": 55758 }, { "epoch": 0.9692329086200003, "grad_norm": 1.4955187770719518, "learning_rate": 2.4811100881971802e-09, "loss": 0.1358, "step": 55759 }, { "epoch": 0.9692502911575032, "grad_norm": 1.0467170000507788, "learning_rate": 2.478310078934287e-09, "loss": 0.0927, "step": 55760 }, { "epoch": 0.969267673695006, "grad_norm": 5.140777633084872, "learning_rate": 2.4755116465865478e-09, "loss": 0.2763, "step": 55761 }, { "epoch": 0.9692850562325088, "grad_norm": 0.8646434492895776, "learning_rate": 2.4727147911629554e-09, "loss": 0.1809, "step": 55762 }, { "epoch": 0.9693024387700117, "grad_norm": 1.286910944623665, "learning_rate": 2.469919512672336e-09, "loss": 0.1192, "step": 55763 }, { "epoch": 0.9693198213075145, "grad_norm": 1.6064936839227208, "learning_rate": 2.4671258111234607e-09, "loss": 0.2004, "step": 55764 }, { "epoch": 0.9693372038450173, "grad_norm": 1.7624953017711114, "learning_rate": 2.464333686525266e-09, "loss": 0.1744, "step": 55765 }, { "epoch": 0.9693545863825201, "grad_norm": 1.3869329662939238, "learning_rate": 2.4615431388865794e-09, "loss": 0.1452, "step": 55766 }, { "epoch": 0.969371968920023, "grad_norm": 1.2240490108610165, "learning_rate": 2.4587541682162816e-09, "loss": 0.1829, "step": 55767 }, { "epoch": 0.9693893514575258, "grad_norm": 1.5816533508000594, "learning_rate": 2.455966774523144e-09, "loss": 0.1583, "step": 55768 }, { "epoch": 0.9694067339950286, "grad_norm": 2.2966039285543225, "learning_rate": 2.4531809578161035e-09, "loss": 0.1918, "step": 55769 }, { "epoch": 0.9694241165325315, "grad_norm": 0.9179056394562622, "learning_rate": 2.4503967181038754e-09, "loss": 0.1293, "step": 55770 }, { "epoch": 0.9694414990700343, "grad_norm": 1.9565446943659994, "learning_rate": 2.447614055395342e-09, "loss": 0.1916, "step": 55771 }, { "epoch": 0.969458881607537, "grad_norm": 1.309386441573054, "learning_rate": 2.444832969699273e-09, "loss": 0.1304, "step": 55772 }, { "epoch": 0.9694762641450398, "grad_norm": 3.9107604602757755, "learning_rate": 2.4420534610245512e-09, "loss": 0.1726, "step": 55773 }, { "epoch": 0.9694936466825427, "grad_norm": 1.071927514732445, "learning_rate": 2.4392755293800025e-09, "loss": 0.2275, "step": 55774 }, { "epoch": 0.9695110292200455, "grad_norm": 0.8521513609933711, "learning_rate": 2.4364991747743424e-09, "loss": 0.2211, "step": 55775 }, { "epoch": 0.9695284117575483, "grad_norm": 0.88184460394266, "learning_rate": 2.4337243972164524e-09, "loss": 0.1533, "step": 55776 }, { "epoch": 0.9695457942950512, "grad_norm": 0.7010907721936261, "learning_rate": 2.4309511967150476e-09, "loss": 0.1746, "step": 55777 }, { "epoch": 0.969563176832554, "grad_norm": 1.699478596694061, "learning_rate": 2.42817957327901e-09, "loss": 0.1885, "step": 55778 }, { "epoch": 0.9695805593700568, "grad_norm": 1.0957095683265723, "learning_rate": 2.425409526917055e-09, "loss": 0.2042, "step": 55779 }, { "epoch": 0.9695979419075597, "grad_norm": 1.129594152672455, "learning_rate": 2.4226410576380085e-09, "loss": 0.1733, "step": 55780 }, { "epoch": 0.9696153244450625, "grad_norm": 1.7823204445847218, "learning_rate": 2.419874165450586e-09, "loss": 0.2056, "step": 55781 }, { "epoch": 0.9696327069825653, "grad_norm": 1.609394246676697, "learning_rate": 2.417108850363614e-09, "loss": 0.192, "step": 55782 }, { "epoch": 0.9696500895200681, "grad_norm": 1.0906544010879935, "learning_rate": 2.4143451123858627e-09, "loss": 0.1767, "step": 55783 }, { "epoch": 0.969667472057571, "grad_norm": 1.5848301490908934, "learning_rate": 2.411582951525992e-09, "loss": 0.1813, "step": 55784 }, { "epoch": 0.9696848545950738, "grad_norm": 1.6201139269516422, "learning_rate": 2.4088223677928844e-09, "loss": 0.2092, "step": 55785 }, { "epoch": 0.9697022371325766, "grad_norm": 1.188519233072291, "learning_rate": 2.4060633611951986e-09, "loss": 0.2078, "step": 55786 }, { "epoch": 0.9697196196700795, "grad_norm": 1.4271481098338712, "learning_rate": 2.4033059317417615e-09, "loss": 0.1561, "step": 55787 }, { "epoch": 0.9697370022075823, "grad_norm": 1.152028426673478, "learning_rate": 2.4005500794412324e-09, "loss": 0.1025, "step": 55788 }, { "epoch": 0.9697543847450851, "grad_norm": 1.618594164452829, "learning_rate": 2.3977958043023827e-09, "loss": 0.1651, "step": 55789 }, { "epoch": 0.969771767282588, "grad_norm": 1.7438794614507278, "learning_rate": 2.3950431063339827e-09, "loss": 0.192, "step": 55790 }, { "epoch": 0.9697891498200908, "grad_norm": 1.9683993881490383, "learning_rate": 2.392291985544692e-09, "loss": 0.1785, "step": 55791 }, { "epoch": 0.9698065323575935, "grad_norm": 1.1421809369071012, "learning_rate": 2.3895424419432265e-09, "loss": 0.1342, "step": 55792 }, { "epoch": 0.9698239148950963, "grad_norm": 1.263393532623665, "learning_rate": 2.3867944755383564e-09, "loss": 0.2339, "step": 55793 }, { "epoch": 0.9698412974325992, "grad_norm": 4.449656909009668, "learning_rate": 2.3840480863387413e-09, "loss": 0.2403, "step": 55794 }, { "epoch": 0.969858679970102, "grad_norm": 1.5769514522416557, "learning_rate": 2.381303274353097e-09, "loss": 0.3059, "step": 55795 }, { "epoch": 0.9698760625076048, "grad_norm": 1.5675045984694131, "learning_rate": 2.378560039590194e-09, "loss": 0.2026, "step": 55796 }, { "epoch": 0.9698934450451077, "grad_norm": 1.7835882460303734, "learning_rate": 2.3758183820586362e-09, "loss": 0.2511, "step": 55797 }, { "epoch": 0.9699108275826105, "grad_norm": 0.8251743226116569, "learning_rate": 2.3730783017671396e-09, "loss": 0.2037, "step": 55798 }, { "epoch": 0.9699282101201133, "grad_norm": 0.6333582106132922, "learning_rate": 2.3703397987243633e-09, "loss": 0.1459, "step": 55799 }, { "epoch": 0.9699455926576162, "grad_norm": 1.588138902704398, "learning_rate": 2.367602872939023e-09, "loss": 0.1949, "step": 55800 }, { "epoch": 0.969962975195119, "grad_norm": 1.0382505654609038, "learning_rate": 2.3648675244198337e-09, "loss": 0.2055, "step": 55801 }, { "epoch": 0.9699803577326218, "grad_norm": 1.4793481999727578, "learning_rate": 2.3621337531753995e-09, "loss": 0.141, "step": 55802 }, { "epoch": 0.9699977402701246, "grad_norm": 1.3956228305663472, "learning_rate": 2.35940155921438e-09, "loss": 0.1189, "step": 55803 }, { "epoch": 0.9700151228076275, "grad_norm": 1.2161624118098857, "learning_rate": 2.356670942545491e-09, "loss": 0.1048, "step": 55804 }, { "epoch": 0.9700325053451303, "grad_norm": 1.4343517711832785, "learning_rate": 2.3539419031773923e-09, "loss": 0.1633, "step": 55805 }, { "epoch": 0.9700498878826331, "grad_norm": 1.2017215326182356, "learning_rate": 2.351214441118632e-09, "loss": 0.1348, "step": 55806 }, { "epoch": 0.970067270420136, "grad_norm": 1.007821928890866, "learning_rate": 2.3484885563779256e-09, "loss": 0.1674, "step": 55807 }, { "epoch": 0.9700846529576388, "grad_norm": 0.8043884188700313, "learning_rate": 2.345764248963933e-09, "loss": 0.1132, "step": 55808 }, { "epoch": 0.9701020354951416, "grad_norm": 1.416199943030642, "learning_rate": 2.3430415188852583e-09, "loss": 0.126, "step": 55809 }, { "epoch": 0.9701194180326445, "grad_norm": 1.2070486119079602, "learning_rate": 2.340320366150561e-09, "loss": 0.1107, "step": 55810 }, { "epoch": 0.9701368005701473, "grad_norm": 2.938761083274856, "learning_rate": 2.3376007907683903e-09, "loss": 0.2032, "step": 55811 }, { "epoch": 0.97015418310765, "grad_norm": 0.7556718462761267, "learning_rate": 2.3348827927474613e-09, "loss": 0.1181, "step": 55812 }, { "epoch": 0.9701715656451528, "grad_norm": 1.3256930824177795, "learning_rate": 2.3321663720963226e-09, "loss": 0.1472, "step": 55813 }, { "epoch": 0.9701889481826557, "grad_norm": 1.7869254852852108, "learning_rate": 2.3294515288235784e-09, "loss": 0.1687, "step": 55814 }, { "epoch": 0.9702063307201585, "grad_norm": 1.3951791411908843, "learning_rate": 2.3267382629378886e-09, "loss": 0.1308, "step": 55815 }, { "epoch": 0.9702237132576613, "grad_norm": 1.399075326753959, "learning_rate": 2.324026574447857e-09, "loss": 0.1603, "step": 55816 }, { "epoch": 0.9702410957951642, "grad_norm": 1.2843590222494448, "learning_rate": 2.321316463362033e-09, "loss": 0.1444, "step": 55817 }, { "epoch": 0.970258478332667, "grad_norm": 1.6551193066994765, "learning_rate": 2.3186079296890204e-09, "loss": 0.2248, "step": 55818 }, { "epoch": 0.9702758608701698, "grad_norm": 1.2270911025843925, "learning_rate": 2.315900973437368e-09, "loss": 0.2321, "step": 55819 }, { "epoch": 0.9702932434076726, "grad_norm": 1.620408327286816, "learning_rate": 2.3131955946157355e-09, "loss": 0.1502, "step": 55820 }, { "epoch": 0.9703106259451755, "grad_norm": 1.1452103001239482, "learning_rate": 2.3104917932326163e-09, "loss": 0.152, "step": 55821 }, { "epoch": 0.9703280084826783, "grad_norm": 1.748637240647377, "learning_rate": 2.3077895692966697e-09, "loss": 0.2219, "step": 55822 }, { "epoch": 0.9703453910201811, "grad_norm": 0.7421312917375081, "learning_rate": 2.3050889228163338e-09, "loss": 0.1883, "step": 55823 }, { "epoch": 0.970362773557684, "grad_norm": 1.9833869130987032, "learning_rate": 2.302389853800324e-09, "loss": 0.1713, "step": 55824 }, { "epoch": 0.9703801560951868, "grad_norm": 1.2956279734975302, "learning_rate": 2.2996923622570774e-09, "loss": 0.1409, "step": 55825 }, { "epoch": 0.9703975386326896, "grad_norm": 1.0467415490239398, "learning_rate": 2.2969964481951985e-09, "loss": 0.1766, "step": 55826 }, { "epoch": 0.9704149211701925, "grad_norm": 1.4588601225220144, "learning_rate": 2.294302111623181e-09, "loss": 0.1896, "step": 55827 }, { "epoch": 0.9704323037076953, "grad_norm": 1.472945817174864, "learning_rate": 2.2916093525496282e-09, "loss": 0.2522, "step": 55828 }, { "epoch": 0.9704496862451981, "grad_norm": 0.8624169031794284, "learning_rate": 2.288918170983034e-09, "loss": 0.1442, "step": 55829 }, { "epoch": 0.970467068782701, "grad_norm": 2.1409194237169635, "learning_rate": 2.2862285669318915e-09, "loss": 0.1541, "step": 55830 }, { "epoch": 0.9704844513202038, "grad_norm": 1.2272728038816916, "learning_rate": 2.2835405404048603e-09, "loss": 0.148, "step": 55831 }, { "epoch": 0.9705018338577065, "grad_norm": 1.3112613828873296, "learning_rate": 2.2808540914103225e-09, "loss": 0.1092, "step": 55832 }, { "epoch": 0.9705192163952093, "grad_norm": 2.5815487201271043, "learning_rate": 2.2781692199568824e-09, "loss": 0.1612, "step": 55833 }, { "epoch": 0.9705365989327122, "grad_norm": 1.472947332490605, "learning_rate": 2.275485926052978e-09, "loss": 0.1235, "step": 55834 }, { "epoch": 0.970553981470215, "grad_norm": 1.190495848377744, "learning_rate": 2.2728042097071576e-09, "loss": 0.1416, "step": 55835 }, { "epoch": 0.9705713640077178, "grad_norm": 1.2041977717387446, "learning_rate": 2.2701240709279144e-09, "loss": 0.107, "step": 55836 }, { "epoch": 0.9705887465452206, "grad_norm": 1.1632636748146923, "learning_rate": 2.2674455097236866e-09, "loss": 0.1233, "step": 55837 }, { "epoch": 0.9706061290827235, "grad_norm": 1.5853930745644438, "learning_rate": 2.264768526103078e-09, "loss": 0.1668, "step": 55838 }, { "epoch": 0.9706235116202263, "grad_norm": 2.454399025532184, "learning_rate": 2.2620931200745263e-09, "loss": 0.1335, "step": 55839 }, { "epoch": 0.9706408941577291, "grad_norm": 1.6834101368507783, "learning_rate": 2.2594192916464695e-09, "loss": 0.2351, "step": 55840 }, { "epoch": 0.970658276695232, "grad_norm": 1.4649638015861632, "learning_rate": 2.2567470408274004e-09, "loss": 0.2515, "step": 55841 }, { "epoch": 0.9706756592327348, "grad_norm": 1.509442053548003, "learning_rate": 2.2540763676257566e-09, "loss": 0.1154, "step": 55842 }, { "epoch": 0.9706930417702376, "grad_norm": 0.9231195551273198, "learning_rate": 2.251407272050143e-09, "loss": 0.1916, "step": 55843 }, { "epoch": 0.9707104243077405, "grad_norm": 1.3363505786669994, "learning_rate": 2.2487397541088304e-09, "loss": 0.1445, "step": 55844 }, { "epoch": 0.9707278068452433, "grad_norm": 1.4027825711360296, "learning_rate": 2.246073813810423e-09, "loss": 0.1814, "step": 55845 }, { "epoch": 0.9707451893827461, "grad_norm": 1.1280466585523543, "learning_rate": 2.243409451163303e-09, "loss": 0.1936, "step": 55846 }, { "epoch": 0.970762571920249, "grad_norm": 1.8292965252550057, "learning_rate": 2.2407466661759076e-09, "loss": 0.1602, "step": 55847 }, { "epoch": 0.9707799544577518, "grad_norm": 1.0163891339950388, "learning_rate": 2.238085458856731e-09, "loss": 0.0979, "step": 55848 }, { "epoch": 0.9707973369952546, "grad_norm": 0.8336513512559797, "learning_rate": 2.2354258292141547e-09, "loss": 0.1106, "step": 55849 }, { "epoch": 0.9708147195327574, "grad_norm": 0.9207650770754585, "learning_rate": 2.232767777256561e-09, "loss": 0.1013, "step": 55850 }, { "epoch": 0.9708321020702602, "grad_norm": 1.7731291193689824, "learning_rate": 2.230111302992499e-09, "loss": 0.102, "step": 55851 }, { "epoch": 0.970849484607763, "grad_norm": 1.7592178985981413, "learning_rate": 2.2274564064303505e-09, "loss": 0.1511, "step": 55852 }, { "epoch": 0.9708668671452658, "grad_norm": 1.2340617399975518, "learning_rate": 2.224803087578442e-09, "loss": 0.185, "step": 55853 }, { "epoch": 0.9708842496827687, "grad_norm": 1.7472888085057732, "learning_rate": 2.222151346445267e-09, "loss": 0.1477, "step": 55854 }, { "epoch": 0.9709016322202715, "grad_norm": 1.5131635889894715, "learning_rate": 2.219501183039263e-09, "loss": 0.2083, "step": 55855 }, { "epoch": 0.9709190147577743, "grad_norm": 1.1494849079411937, "learning_rate": 2.216852597368757e-09, "loss": 0.1522, "step": 55856 }, { "epoch": 0.9709363972952771, "grad_norm": 0.8634213251956288, "learning_rate": 2.2142055894421307e-09, "loss": 0.1056, "step": 55857 }, { "epoch": 0.97095377983278, "grad_norm": 1.1371240248315575, "learning_rate": 2.2115601592678224e-09, "loss": 0.1893, "step": 55858 }, { "epoch": 0.9709711623702828, "grad_norm": 2.108776568146392, "learning_rate": 2.2089163068542137e-09, "loss": 0.209, "step": 55859 }, { "epoch": 0.9709885449077856, "grad_norm": 1.3558893330148791, "learning_rate": 2.2062740322096873e-09, "loss": 0.1495, "step": 55860 }, { "epoch": 0.9710059274452885, "grad_norm": 1.6090576121560856, "learning_rate": 2.2036333353425697e-09, "loss": 0.2587, "step": 55861 }, { "epoch": 0.9710233099827913, "grad_norm": 0.9932149904240799, "learning_rate": 2.2009942162612425e-09, "loss": 0.2348, "step": 55862 }, { "epoch": 0.9710406925202941, "grad_norm": 1.9669876247812084, "learning_rate": 2.1983566749741444e-09, "loss": 0.2003, "step": 55863 }, { "epoch": 0.971058075057797, "grad_norm": 1.847970116574515, "learning_rate": 2.1957207114896013e-09, "loss": 0.1798, "step": 55864 }, { "epoch": 0.9710754575952998, "grad_norm": 1.1317748123121047, "learning_rate": 2.193086325815885e-09, "loss": 0.123, "step": 55865 }, { "epoch": 0.9710928401328026, "grad_norm": 1.239236717177823, "learning_rate": 2.1904535179614882e-09, "loss": 0.2345, "step": 55866 }, { "epoch": 0.9711102226703054, "grad_norm": 1.2738436207296218, "learning_rate": 2.1878222879346264e-09, "loss": 0.1637, "step": 55867 }, { "epoch": 0.9711276052078083, "grad_norm": 1.6837655282897799, "learning_rate": 2.1851926357436823e-09, "loss": 0.1148, "step": 55868 }, { "epoch": 0.9711449877453111, "grad_norm": 2.1233273537364745, "learning_rate": 2.1825645613970377e-09, "loss": 0.2455, "step": 55869 }, { "epoch": 0.9711623702828139, "grad_norm": 1.8737482760812467, "learning_rate": 2.179938064902964e-09, "loss": 0.1516, "step": 55870 }, { "epoch": 0.9711797528203167, "grad_norm": 0.767389472784109, "learning_rate": 2.1773131462698435e-09, "loss": 0.1676, "step": 55871 }, { "epoch": 0.9711971353578195, "grad_norm": 1.4922229962515259, "learning_rate": 2.1746898055058916e-09, "loss": 0.2557, "step": 55872 }, { "epoch": 0.9712145178953223, "grad_norm": 2.3273854446255773, "learning_rate": 2.172068042619546e-09, "loss": 0.1512, "step": 55873 }, { "epoch": 0.9712319004328251, "grad_norm": 2.7179464437010092, "learning_rate": 2.1694478576190224e-09, "loss": 0.1553, "step": 55874 }, { "epoch": 0.971249282970328, "grad_norm": 1.2558348862748567, "learning_rate": 2.166829250512703e-09, "loss": 0.2024, "step": 55875 }, { "epoch": 0.9712666655078308, "grad_norm": 1.5745578084695102, "learning_rate": 2.164212221308803e-09, "loss": 0.1605, "step": 55876 }, { "epoch": 0.9712840480453336, "grad_norm": 1.5783842168819335, "learning_rate": 2.1615967700157056e-09, "loss": 0.1652, "step": 55877 }, { "epoch": 0.9713014305828365, "grad_norm": 1.9744988426655177, "learning_rate": 2.158982896641681e-09, "loss": 0.2007, "step": 55878 }, { "epoch": 0.9713188131203393, "grad_norm": 1.879091206805224, "learning_rate": 2.1563706011949458e-09, "loss": 0.1328, "step": 55879 }, { "epoch": 0.9713361956578421, "grad_norm": 1.160827338702949, "learning_rate": 2.1537598836838254e-09, "loss": 0.1453, "step": 55880 }, { "epoch": 0.971353578195345, "grad_norm": 1.9247895181052015, "learning_rate": 2.151150744116592e-09, "loss": 0.2333, "step": 55881 }, { "epoch": 0.9713709607328478, "grad_norm": 1.4034522815950645, "learning_rate": 2.1485431825015164e-09, "loss": 0.1847, "step": 55882 }, { "epoch": 0.9713883432703506, "grad_norm": 1.4090826472898361, "learning_rate": 2.1459371988468698e-09, "loss": 0.1941, "step": 55883 }, { "epoch": 0.9714057258078534, "grad_norm": 1.0539273846685737, "learning_rate": 2.1433327931609236e-09, "loss": 0.1002, "step": 55884 }, { "epoch": 0.9714231083453563, "grad_norm": 1.3202999789331729, "learning_rate": 2.140729965451893e-09, "loss": 0.21, "step": 55885 }, { "epoch": 0.9714404908828591, "grad_norm": 1.1676551098178316, "learning_rate": 2.1381287157280493e-09, "loss": 0.1405, "step": 55886 }, { "epoch": 0.9714578734203619, "grad_norm": 1.580415925009098, "learning_rate": 2.1355290439976636e-09, "loss": 0.1579, "step": 55887 }, { "epoch": 0.9714752559578648, "grad_norm": 1.2280703750053532, "learning_rate": 2.132930950268952e-09, "loss": 0.2661, "step": 55888 }, { "epoch": 0.9714926384953676, "grad_norm": 1.282956133688156, "learning_rate": 2.1303344345501296e-09, "loss": 0.1419, "step": 55889 }, { "epoch": 0.9715100210328704, "grad_norm": 4.041596231485883, "learning_rate": 2.1277394968494678e-09, "loss": 0.2522, "step": 55890 }, { "epoch": 0.9715274035703731, "grad_norm": 1.4101435584253021, "learning_rate": 2.1251461371751265e-09, "loss": 0.1345, "step": 55891 }, { "epoch": 0.971544786107876, "grad_norm": 4.220226621620926, "learning_rate": 2.1225543555353777e-09, "loss": 0.2626, "step": 55892 }, { "epoch": 0.9715621686453788, "grad_norm": 2.2374967211544186, "learning_rate": 2.119964151938436e-09, "loss": 0.167, "step": 55893 }, { "epoch": 0.9715795511828816, "grad_norm": 1.4767341510987169, "learning_rate": 2.1173755263925174e-09, "loss": 0.1926, "step": 55894 }, { "epoch": 0.9715969337203845, "grad_norm": 0.8910372209501171, "learning_rate": 2.1147884789057825e-09, "loss": 0.2202, "step": 55895 }, { "epoch": 0.9716143162578873, "grad_norm": 2.8019923256997163, "learning_rate": 2.1122030094865018e-09, "loss": 0.3065, "step": 55896 }, { "epoch": 0.9716316987953901, "grad_norm": 1.0072562505173568, "learning_rate": 2.1096191181427804e-09, "loss": 0.1857, "step": 55897 }, { "epoch": 0.971649081332893, "grad_norm": 3.322781712813134, "learning_rate": 2.107036804882889e-09, "loss": 0.2739, "step": 55898 }, { "epoch": 0.9716664638703958, "grad_norm": 1.3677647646272528, "learning_rate": 2.1044560697149325e-09, "loss": 0.193, "step": 55899 }, { "epoch": 0.9716838464078986, "grad_norm": 1.721717680326761, "learning_rate": 2.101876912647127e-09, "loss": 0.1519, "step": 55900 }, { "epoch": 0.9717012289454015, "grad_norm": 1.4244429126213702, "learning_rate": 2.0992993336876875e-09, "loss": 0.1835, "step": 55901 }, { "epoch": 0.9717186114829043, "grad_norm": 1.8228954499181826, "learning_rate": 2.0967233328448296e-09, "loss": 0.1383, "step": 55902 }, { "epoch": 0.9717359940204071, "grad_norm": 1.4806866493941537, "learning_rate": 2.0941489101265476e-09, "loss": 0.1848, "step": 55903 }, { "epoch": 0.9717533765579099, "grad_norm": 1.3977590728673408, "learning_rate": 2.091576065541112e-09, "loss": 0.0981, "step": 55904 }, { "epoch": 0.9717707590954128, "grad_norm": 1.4345589648447818, "learning_rate": 2.0890047990966274e-09, "loss": 0.1179, "step": 55905 }, { "epoch": 0.9717881416329156, "grad_norm": 2.57929316235168, "learning_rate": 2.08643511080131e-09, "loss": 0.2371, "step": 55906 }, { "epoch": 0.9718055241704184, "grad_norm": 1.102319395215638, "learning_rate": 2.083867000663264e-09, "loss": 0.1985, "step": 55907 }, { "epoch": 0.9718229067079213, "grad_norm": 2.21210714231606, "learning_rate": 2.08130046869065e-09, "loss": 0.2913, "step": 55908 }, { "epoch": 0.9718402892454241, "grad_norm": 1.3091870441463624, "learning_rate": 2.078735514891572e-09, "loss": 0.1582, "step": 55909 }, { "epoch": 0.9718576717829269, "grad_norm": 0.9757570888069308, "learning_rate": 2.0761721392741903e-09, "loss": 0.2058, "step": 55910 }, { "epoch": 0.9718750543204296, "grad_norm": 1.436391816984031, "learning_rate": 2.07361034184661e-09, "loss": 0.1672, "step": 55911 }, { "epoch": 0.9718924368579325, "grad_norm": 1.0216179158460519, "learning_rate": 2.071050122616935e-09, "loss": 0.0897, "step": 55912 }, { "epoch": 0.9719098193954353, "grad_norm": 0.9179279810475824, "learning_rate": 2.068491481593326e-09, "loss": 0.203, "step": 55913 }, { "epoch": 0.9719272019329381, "grad_norm": 2.2089747502911745, "learning_rate": 2.0659344187838877e-09, "loss": 0.2922, "step": 55914 }, { "epoch": 0.971944584470441, "grad_norm": 1.8487390667689176, "learning_rate": 2.063378934196669e-09, "loss": 0.1769, "step": 55915 }, { "epoch": 0.9719619670079438, "grad_norm": 1.569329758378656, "learning_rate": 2.0608250278398296e-09, "loss": 0.2394, "step": 55916 }, { "epoch": 0.9719793495454466, "grad_norm": 1.128222201953098, "learning_rate": 2.0582726997214194e-09, "loss": 0.1763, "step": 55917 }, { "epoch": 0.9719967320829495, "grad_norm": 1.5741030396616207, "learning_rate": 2.0557219498495427e-09, "loss": 0.1177, "step": 55918 }, { "epoch": 0.9720141146204523, "grad_norm": 2.8819453755544426, "learning_rate": 2.0531727782323037e-09, "loss": 0.1737, "step": 55919 }, { "epoch": 0.9720314971579551, "grad_norm": 2.8625613396642287, "learning_rate": 2.050625184877752e-09, "loss": 0.1693, "step": 55920 }, { "epoch": 0.972048879695458, "grad_norm": 1.776864455879404, "learning_rate": 2.0480791697940482e-09, "loss": 0.1812, "step": 55921 }, { "epoch": 0.9720662622329608, "grad_norm": 1.5937792320961623, "learning_rate": 2.045534732989129e-09, "loss": 0.1755, "step": 55922 }, { "epoch": 0.9720836447704636, "grad_norm": 0.8151470693042823, "learning_rate": 2.042991874471156e-09, "loss": 0.1411, "step": 55923 }, { "epoch": 0.9721010273079664, "grad_norm": 1.6048220883789448, "learning_rate": 2.040450594248122e-09, "loss": 0.2195, "step": 55924 }, { "epoch": 0.9721184098454693, "grad_norm": 1.57630008488416, "learning_rate": 2.0379108923281318e-09, "loss": 0.1472, "step": 55925 }, { "epoch": 0.9721357923829721, "grad_norm": 0.9706748969145926, "learning_rate": 2.0353727687192344e-09, "loss": 0.2364, "step": 55926 }, { "epoch": 0.9721531749204749, "grad_norm": 0.6913940396539036, "learning_rate": 2.0328362234294237e-09, "loss": 0.128, "step": 55927 }, { "epoch": 0.9721705574579778, "grad_norm": 2.5151970880537804, "learning_rate": 2.0303012564668044e-09, "loss": 0.1751, "step": 55928 }, { "epoch": 0.9721879399954806, "grad_norm": 1.951737845479546, "learning_rate": 2.0277678678393696e-09, "loss": 0.2431, "step": 55929 }, { "epoch": 0.9722053225329834, "grad_norm": 0.8864666384915993, "learning_rate": 2.0252360575551686e-09, "loss": 0.161, "step": 55930 }, { "epoch": 0.9722227050704861, "grad_norm": 1.527719297486942, "learning_rate": 2.022705825622195e-09, "loss": 0.1932, "step": 55931 }, { "epoch": 0.972240087607989, "grad_norm": 1.6367905590702063, "learning_rate": 2.0201771720484983e-09, "loss": 0.1561, "step": 55932 }, { "epoch": 0.9722574701454918, "grad_norm": 1.3737838122019124, "learning_rate": 2.017650096842127e-09, "loss": 0.1668, "step": 55933 }, { "epoch": 0.9722748526829946, "grad_norm": 1.317403321723898, "learning_rate": 2.0151246000109645e-09, "loss": 0.1921, "step": 55934 }, { "epoch": 0.9722922352204975, "grad_norm": 1.0186025549214768, "learning_rate": 2.0126006815631703e-09, "loss": 0.1591, "step": 55935 }, { "epoch": 0.9723096177580003, "grad_norm": 0.9238406817449513, "learning_rate": 2.0100783415066824e-09, "loss": 0.1637, "step": 55936 }, { "epoch": 0.9723270002955031, "grad_norm": 1.8019957853148483, "learning_rate": 2.0075575798494395e-09, "loss": 0.1002, "step": 55937 }, { "epoch": 0.972344382833006, "grad_norm": 0.9246937115070565, "learning_rate": 2.0050383965995455e-09, "loss": 0.1136, "step": 55938 }, { "epoch": 0.9723617653705088, "grad_norm": 1.2275072727593825, "learning_rate": 2.0025207917648835e-09, "loss": 0.126, "step": 55939 }, { "epoch": 0.9723791479080116, "grad_norm": 0.9063416531720477, "learning_rate": 2.000004765353447e-09, "loss": 0.1504, "step": 55940 }, { "epoch": 0.9723965304455144, "grad_norm": 2.020373446624679, "learning_rate": 1.9974903173732847e-09, "loss": 0.157, "step": 55941 }, { "epoch": 0.9724139129830173, "grad_norm": 1.7183523952305895, "learning_rate": 1.9949774478322802e-09, "loss": 0.1618, "step": 55942 }, { "epoch": 0.9724312955205201, "grad_norm": 1.3766713751260846, "learning_rate": 1.992466156738426e-09, "loss": 0.1988, "step": 55943 }, { "epoch": 0.9724486780580229, "grad_norm": 1.8390240518946075, "learning_rate": 1.989956444099661e-09, "loss": 0.1612, "step": 55944 }, { "epoch": 0.9724660605955258, "grad_norm": 1.9988990324115825, "learning_rate": 1.9874483099240337e-09, "loss": 0.1366, "step": 55945 }, { "epoch": 0.9724834431330286, "grad_norm": 1.1921636570888974, "learning_rate": 1.984941754219427e-09, "loss": 0.1912, "step": 55946 }, { "epoch": 0.9725008256705314, "grad_norm": 0.7461425529544101, "learning_rate": 1.9824367769937235e-09, "loss": 0.1147, "step": 55947 }, { "epoch": 0.9725182082080343, "grad_norm": 1.22163052263975, "learning_rate": 1.979933378254972e-09, "loss": 0.2133, "step": 55948 }, { "epoch": 0.9725355907455371, "grad_norm": 2.2358290083999814, "learning_rate": 1.9774315580110556e-09, "loss": 0.1, "step": 55949 }, { "epoch": 0.9725529732830399, "grad_norm": 2.654445397856214, "learning_rate": 1.9749313162698567e-09, "loss": 0.1756, "step": 55950 }, { "epoch": 0.9725703558205426, "grad_norm": 1.0926620983964563, "learning_rate": 1.9724326530394243e-09, "loss": 0.1728, "step": 55951 }, { "epoch": 0.9725877383580455, "grad_norm": 1.6398764505981678, "learning_rate": 1.9699355683275854e-09, "loss": 0.1944, "step": 55952 }, { "epoch": 0.9726051208955483, "grad_norm": 1.1399006202385131, "learning_rate": 1.967440062142278e-09, "loss": 0.1655, "step": 55953 }, { "epoch": 0.9726225034330511, "grad_norm": 1.579245669810895, "learning_rate": 1.96494613449133e-09, "loss": 0.1151, "step": 55954 }, { "epoch": 0.972639885970554, "grad_norm": 1.433964551517267, "learning_rate": 1.962453785382789e-09, "loss": 0.1747, "step": 55955 }, { "epoch": 0.9726572685080568, "grad_norm": 1.4895099563666294, "learning_rate": 1.9599630148245395e-09, "loss": 0.1909, "step": 55956 }, { "epoch": 0.9726746510455596, "grad_norm": 1.341185014318411, "learning_rate": 1.9574738228243515e-09, "loss": 0.1456, "step": 55957 }, { "epoch": 0.9726920335830624, "grad_norm": 1.4338240400677476, "learning_rate": 1.9549862093902194e-09, "loss": 0.1134, "step": 55958 }, { "epoch": 0.9727094161205653, "grad_norm": 1.172643102424918, "learning_rate": 1.9525001745299696e-09, "loss": 0.1738, "step": 55959 }, { "epoch": 0.9727267986580681, "grad_norm": 1.551792963891413, "learning_rate": 1.950015718251541e-09, "loss": 0.1302, "step": 55960 }, { "epoch": 0.9727441811955709, "grad_norm": 1.5378007049456615, "learning_rate": 1.94753284056276e-09, "loss": 0.1882, "step": 55961 }, { "epoch": 0.9727615637330738, "grad_norm": 1.0427266441799785, "learning_rate": 1.9450515414715095e-09, "loss": 0.161, "step": 55962 }, { "epoch": 0.9727789462705766, "grad_norm": 1.18103949090598, "learning_rate": 1.9425718209856722e-09, "loss": 0.1909, "step": 55963 }, { "epoch": 0.9727963288080794, "grad_norm": 1.9177883648051877, "learning_rate": 1.9400936791131308e-09, "loss": 0.1604, "step": 55964 }, { "epoch": 0.9728137113455823, "grad_norm": 1.4215042390446573, "learning_rate": 1.937617115861656e-09, "loss": 0.1511, "step": 55965 }, { "epoch": 0.9728310938830851, "grad_norm": 2.068917932269419, "learning_rate": 1.9351421312391312e-09, "loss": 0.211, "step": 55966 }, { "epoch": 0.9728484764205879, "grad_norm": 1.127230985596253, "learning_rate": 1.932668725253439e-09, "loss": 0.184, "step": 55967 }, { "epoch": 0.9728658589580907, "grad_norm": 1.6281374336507555, "learning_rate": 1.9301968979124063e-09, "loss": 0.1737, "step": 55968 }, { "epoch": 0.9728832414955936, "grad_norm": 1.0027132607597091, "learning_rate": 1.9277266492238044e-09, "loss": 0.1842, "step": 55969 }, { "epoch": 0.9729006240330964, "grad_norm": 1.3125674049116383, "learning_rate": 1.9252579791955715e-09, "loss": 0.2219, "step": 55970 }, { "epoch": 0.9729180065705991, "grad_norm": 0.9870344247938795, "learning_rate": 1.9227908878354236e-09, "loss": 0.1908, "step": 55971 }, { "epoch": 0.972935389108102, "grad_norm": 1.483078437020286, "learning_rate": 1.9203253751512995e-09, "loss": 0.1348, "step": 55972 }, { "epoch": 0.9729527716456048, "grad_norm": 1.8764788325626856, "learning_rate": 1.9178614411508585e-09, "loss": 0.1743, "step": 55973 }, { "epoch": 0.9729701541831076, "grad_norm": 0.9806103750894698, "learning_rate": 1.91539908584204e-09, "loss": 0.1176, "step": 55974 }, { "epoch": 0.9729875367206104, "grad_norm": 1.9242883284269519, "learning_rate": 1.9129383092326145e-09, "loss": 0.2006, "step": 55975 }, { "epoch": 0.9730049192581133, "grad_norm": 2.5908864844684674, "learning_rate": 1.9104791113303543e-09, "loss": 0.2361, "step": 55976 }, { "epoch": 0.9730223017956161, "grad_norm": 2.018642814486894, "learning_rate": 1.908021492143086e-09, "loss": 0.1562, "step": 55977 }, { "epoch": 0.9730396843331189, "grad_norm": 1.449563810113651, "learning_rate": 1.9055654516785813e-09, "loss": 0.1659, "step": 55978 }, { "epoch": 0.9730570668706218, "grad_norm": 2.12694869977337, "learning_rate": 1.9031109899446117e-09, "loss": 0.151, "step": 55979 }, { "epoch": 0.9730744494081246, "grad_norm": 1.3657161559492785, "learning_rate": 1.9006581069490047e-09, "loss": 0.1461, "step": 55980 }, { "epoch": 0.9730918319456274, "grad_norm": 1.270184882442767, "learning_rate": 1.898206802699476e-09, "loss": 0.1656, "step": 55981 }, { "epoch": 0.9731092144831303, "grad_norm": 1.2944029482599317, "learning_rate": 1.895757077203797e-09, "loss": 0.1708, "step": 55982 }, { "epoch": 0.9731265970206331, "grad_norm": 0.9820323572930547, "learning_rate": 1.8933089304697947e-09, "loss": 0.2311, "step": 55983 }, { "epoch": 0.9731439795581359, "grad_norm": 1.4739865940451844, "learning_rate": 1.890862362505241e-09, "loss": 0.2362, "step": 55984 }, { "epoch": 0.9731613620956387, "grad_norm": 1.7685372698914856, "learning_rate": 1.8884173733177413e-09, "loss": 0.1185, "step": 55985 }, { "epoch": 0.9731787446331416, "grad_norm": 1.3525345444205332, "learning_rate": 1.8859739629151773e-09, "loss": 0.1668, "step": 55986 }, { "epoch": 0.9731961271706444, "grad_norm": 2.3587906015716342, "learning_rate": 1.883532131305321e-09, "loss": 0.2058, "step": 55987 }, { "epoch": 0.9732135097081472, "grad_norm": 2.9183565682988175, "learning_rate": 1.8810918784957773e-09, "loss": 0.217, "step": 55988 }, { "epoch": 0.9732308922456501, "grad_norm": 1.1315904861906165, "learning_rate": 1.878653204494374e-09, "loss": 0.1709, "step": 55989 }, { "epoch": 0.9732482747831528, "grad_norm": 1.572798188561584, "learning_rate": 1.8762161093088813e-09, "loss": 0.1532, "step": 55990 }, { "epoch": 0.9732656573206556, "grad_norm": 0.9159679816127032, "learning_rate": 1.8737805929469054e-09, "loss": 0.1166, "step": 55991 }, { "epoch": 0.9732830398581584, "grad_norm": 0.7554649479821586, "learning_rate": 1.8713466554162173e-09, "loss": 0.2479, "step": 55992 }, { "epoch": 0.9733004223956613, "grad_norm": 1.6600246899663473, "learning_rate": 1.868914296724533e-09, "loss": 0.1875, "step": 55993 }, { "epoch": 0.9733178049331641, "grad_norm": 1.6117041037218724, "learning_rate": 1.8664835168796244e-09, "loss": 0.1683, "step": 55994 }, { "epoch": 0.9733351874706669, "grad_norm": 1.6939013142424153, "learning_rate": 1.8640543158890966e-09, "loss": 0.1746, "step": 55995 }, { "epoch": 0.9733525700081698, "grad_norm": 1.8311030714726755, "learning_rate": 1.861626693760665e-09, "loss": 0.2133, "step": 55996 }, { "epoch": 0.9733699525456726, "grad_norm": 1.109391720445411, "learning_rate": 1.8592006505021019e-09, "loss": 0.1814, "step": 55997 }, { "epoch": 0.9733873350831754, "grad_norm": 0.92501600331501, "learning_rate": 1.8567761861210673e-09, "loss": 0.172, "step": 55998 }, { "epoch": 0.9734047176206783, "grad_norm": 1.388468086244368, "learning_rate": 1.8543533006251667e-09, "loss": 0.138, "step": 55999 }, { "epoch": 0.9734221001581811, "grad_norm": 1.5647142389139024, "learning_rate": 1.8519319940221157e-09, "loss": 0.176, "step": 56000 }, { "epoch": 0.9734394826956839, "grad_norm": 1.3382491364779612, "learning_rate": 1.849512266319686e-09, "loss": 0.1372, "step": 56001 }, { "epoch": 0.9734568652331868, "grad_norm": 0.9577568854318478, "learning_rate": 1.8470941175254274e-09, "loss": 0.202, "step": 56002 }, { "epoch": 0.9734742477706896, "grad_norm": 1.7950296896074454, "learning_rate": 1.8446775476470555e-09, "loss": 0.1894, "step": 56003 }, { "epoch": 0.9734916303081924, "grad_norm": 1.2036552341436528, "learning_rate": 1.8422625566922311e-09, "loss": 0.1363, "step": 56004 }, { "epoch": 0.9735090128456952, "grad_norm": 1.0387975515263927, "learning_rate": 1.8398491446685593e-09, "loss": 0.1691, "step": 56005 }, { "epoch": 0.9735263953831981, "grad_norm": 1.7586631945293183, "learning_rate": 1.837437311583756e-09, "loss": 0.1512, "step": 56006 }, { "epoch": 0.9735437779207009, "grad_norm": 1.5406209063277643, "learning_rate": 1.8350270574454818e-09, "loss": 0.1322, "step": 56007 }, { "epoch": 0.9735611604582037, "grad_norm": 1.3200731484934236, "learning_rate": 1.8326183822612862e-09, "loss": 0.1626, "step": 56008 }, { "epoch": 0.9735785429957066, "grad_norm": 1.5644704856445928, "learning_rate": 1.8302112860388852e-09, "loss": 0.1246, "step": 56009 }, { "epoch": 0.9735959255332093, "grad_norm": 1.9104970387616853, "learning_rate": 1.8278057687858284e-09, "loss": 0.1818, "step": 56010 }, { "epoch": 0.9736133080707121, "grad_norm": 2.0753977026721326, "learning_rate": 1.8254018305097763e-09, "loss": 0.1434, "step": 56011 }, { "epoch": 0.9736306906082149, "grad_norm": 1.2218659523726094, "learning_rate": 1.8229994712183894e-09, "loss": 0.1998, "step": 56012 }, { "epoch": 0.9736480731457178, "grad_norm": 1.3946998449298826, "learning_rate": 1.8205986909192728e-09, "loss": 0.1725, "step": 56013 }, { "epoch": 0.9736654556832206, "grad_norm": 1.7976843881436178, "learning_rate": 1.8181994896199758e-09, "loss": 0.1243, "step": 56014 }, { "epoch": 0.9736828382207234, "grad_norm": 2.0857752831805856, "learning_rate": 1.8158018673281594e-09, "loss": 0.1609, "step": 56015 }, { "epoch": 0.9737002207582263, "grad_norm": 1.3021741166852863, "learning_rate": 1.8134058240513727e-09, "loss": 0.2489, "step": 56016 }, { "epoch": 0.9737176032957291, "grad_norm": 2.4554108401340655, "learning_rate": 1.8110113597972764e-09, "loss": 0.1745, "step": 56017 }, { "epoch": 0.9737349858332319, "grad_norm": 1.5392946689351195, "learning_rate": 1.80861847457342e-09, "loss": 0.164, "step": 56018 }, { "epoch": 0.9737523683707348, "grad_norm": 1.575807833449724, "learning_rate": 1.8062271683874086e-09, "loss": 0.1799, "step": 56019 }, { "epoch": 0.9737697509082376, "grad_norm": 1.0436975108748234, "learning_rate": 1.8038374412467362e-09, "loss": 0.1708, "step": 56020 }, { "epoch": 0.9737871334457404, "grad_norm": 2.123424767547579, "learning_rate": 1.8014492931591185e-09, "loss": 0.1708, "step": 56021 }, { "epoch": 0.9738045159832432, "grad_norm": 1.5837447473637511, "learning_rate": 1.79906272413205e-09, "loss": 0.2395, "step": 56022 }, { "epoch": 0.9738218985207461, "grad_norm": 2.133532021710342, "learning_rate": 1.79667773417308e-09, "loss": 0.1835, "step": 56023 }, { "epoch": 0.9738392810582489, "grad_norm": 1.244380742954443, "learning_rate": 1.794294323289758e-09, "loss": 0.1815, "step": 56024 }, { "epoch": 0.9738566635957517, "grad_norm": 1.5379294523510172, "learning_rate": 1.7919124914896888e-09, "loss": 0.183, "step": 56025 }, { "epoch": 0.9738740461332546, "grad_norm": 0.5497716576219503, "learning_rate": 1.7895322387804225e-09, "loss": 0.1715, "step": 56026 }, { "epoch": 0.9738914286707574, "grad_norm": 1.1723367191599132, "learning_rate": 1.7871535651694525e-09, "loss": 0.1828, "step": 56027 }, { "epoch": 0.9739088112082602, "grad_norm": 0.8844845661523272, "learning_rate": 1.7847764706643841e-09, "loss": 0.1429, "step": 56028 }, { "epoch": 0.9739261937457631, "grad_norm": 1.4873141534079863, "learning_rate": 1.782400955272656e-09, "loss": 0.1201, "step": 56029 }, { "epoch": 0.9739435762832658, "grad_norm": 1.8319867650482329, "learning_rate": 1.7800270190018729e-09, "loss": 0.2001, "step": 56030 }, { "epoch": 0.9739609588207686, "grad_norm": 1.623462064287357, "learning_rate": 1.777654661859529e-09, "loss": 0.2698, "step": 56031 }, { "epoch": 0.9739783413582714, "grad_norm": 1.3538270417536262, "learning_rate": 1.7752838838531736e-09, "loss": 0.1561, "step": 56032 }, { "epoch": 0.9739957238957743, "grad_norm": 0.7823556567201791, "learning_rate": 1.772914684990301e-09, "loss": 0.1014, "step": 56033 }, { "epoch": 0.9740131064332771, "grad_norm": 1.506052553731878, "learning_rate": 1.7705470652784603e-09, "loss": 0.1461, "step": 56034 }, { "epoch": 0.9740304889707799, "grad_norm": 1.487247588899334, "learning_rate": 1.7681810247250351e-09, "loss": 0.111, "step": 56035 }, { "epoch": 0.9740478715082828, "grad_norm": 1.907668451200532, "learning_rate": 1.7658165633376853e-09, "loss": 0.1721, "step": 56036 }, { "epoch": 0.9740652540457856, "grad_norm": 1.699956816440671, "learning_rate": 1.7634536811237943e-09, "loss": 0.1717, "step": 56037 }, { "epoch": 0.9740826365832884, "grad_norm": 1.3797452459345128, "learning_rate": 1.761092378090856e-09, "loss": 0.1141, "step": 56038 }, { "epoch": 0.9741000191207912, "grad_norm": 1.0214694258670436, "learning_rate": 1.7587326542464198e-09, "loss": 0.13, "step": 56039 }, { "epoch": 0.9741174016582941, "grad_norm": 0.9905743852133811, "learning_rate": 1.7563745095979243e-09, "loss": 0.1791, "step": 56040 }, { "epoch": 0.9741347841957969, "grad_norm": 2.7354885001555145, "learning_rate": 1.7540179441528636e-09, "loss": 0.1544, "step": 56041 }, { "epoch": 0.9741521667332997, "grad_norm": 1.3933835267488264, "learning_rate": 1.751662957918676e-09, "loss": 0.1946, "step": 56042 }, { "epoch": 0.9741695492708026, "grad_norm": 1.0266164898880805, "learning_rate": 1.7493095509028e-09, "loss": 0.1506, "step": 56043 }, { "epoch": 0.9741869318083054, "grad_norm": 1.6993856907337375, "learning_rate": 1.7469577231127852e-09, "loss": 0.135, "step": 56044 }, { "epoch": 0.9742043143458082, "grad_norm": 0.9003457675636259, "learning_rate": 1.7446074745560146e-09, "loss": 0.1182, "step": 56045 }, { "epoch": 0.9742216968833111, "grad_norm": 3.755296308632011, "learning_rate": 1.7422588052399267e-09, "loss": 0.2605, "step": 56046 }, { "epoch": 0.9742390794208139, "grad_norm": 2.1337002459288255, "learning_rate": 1.739911715172071e-09, "loss": 0.1944, "step": 56047 }, { "epoch": 0.9742564619583167, "grad_norm": 1.1108864882295877, "learning_rate": 1.737566204359775e-09, "loss": 0.1431, "step": 56048 }, { "epoch": 0.9742738444958196, "grad_norm": 1.1611445118418902, "learning_rate": 1.7352222728104771e-09, "loss": 0.1775, "step": 56049 }, { "epoch": 0.9742912270333223, "grad_norm": 1.3966970053785808, "learning_rate": 1.7328799205316713e-09, "loss": 0.2227, "step": 56050 }, { "epoch": 0.9743086095708251, "grad_norm": 1.5804346563348293, "learning_rate": 1.7305391475307408e-09, "loss": 0.2634, "step": 56051 }, { "epoch": 0.9743259921083279, "grad_norm": 1.0342835630374947, "learning_rate": 1.7281999538151237e-09, "loss": 0.2163, "step": 56052 }, { "epoch": 0.9743433746458308, "grad_norm": 1.1986704568495188, "learning_rate": 1.725862339392259e-09, "loss": 0.1429, "step": 56053 }, { "epoch": 0.9743607571833336, "grad_norm": 1.1797064535249275, "learning_rate": 1.7235263042694737e-09, "loss": 0.2637, "step": 56054 }, { "epoch": 0.9743781397208364, "grad_norm": 2.3254731214388613, "learning_rate": 1.721191848454262e-09, "loss": 0.1544, "step": 56055 }, { "epoch": 0.9743955222583393, "grad_norm": 1.0861153873561733, "learning_rate": 1.7188589719539515e-09, "loss": 0.1586, "step": 56056 }, { "epoch": 0.9744129047958421, "grad_norm": 1.231741537777683, "learning_rate": 1.7165276747759804e-09, "loss": 0.1742, "step": 56057 }, { "epoch": 0.9744302873333449, "grad_norm": 1.0989793370907412, "learning_rate": 1.714197956927732e-09, "loss": 0.1534, "step": 56058 }, { "epoch": 0.9744476698708477, "grad_norm": 1.204885202739807, "learning_rate": 1.7118698184165336e-09, "loss": 0.141, "step": 56059 }, { "epoch": 0.9744650524083506, "grad_norm": 1.338646364037111, "learning_rate": 1.7095432592498792e-09, "loss": 0.192, "step": 56060 }, { "epoch": 0.9744824349458534, "grad_norm": 1.0605080442827164, "learning_rate": 1.707218279435041e-09, "loss": 0.1825, "step": 56061 }, { "epoch": 0.9744998174833562, "grad_norm": 1.7019323760895644, "learning_rate": 1.7048948789794015e-09, "loss": 0.1486, "step": 56062 }, { "epoch": 0.9745172000208591, "grad_norm": 1.2251742956889213, "learning_rate": 1.7025730578903995e-09, "loss": 0.1173, "step": 56063 }, { "epoch": 0.9745345825583619, "grad_norm": 7.094608797448826, "learning_rate": 1.700252816175307e-09, "loss": 0.2352, "step": 56064 }, { "epoch": 0.9745519650958647, "grad_norm": 1.6990700766077307, "learning_rate": 1.697934153841507e-09, "loss": 0.138, "step": 56065 }, { "epoch": 0.9745693476333676, "grad_norm": 2.415660494168391, "learning_rate": 1.6956170708963823e-09, "loss": 0.1653, "step": 56066 }, { "epoch": 0.9745867301708704, "grad_norm": 1.3682763948479038, "learning_rate": 1.6933015673472606e-09, "loss": 0.206, "step": 56067 }, { "epoch": 0.9746041127083732, "grad_norm": 1.3792031122736992, "learning_rate": 1.6909876432014136e-09, "loss": 0.1344, "step": 56068 }, { "epoch": 0.974621495245876, "grad_norm": 1.7610556121790801, "learning_rate": 1.68867529846628e-09, "loss": 0.1423, "step": 56069 }, { "epoch": 0.9746388777833788, "grad_norm": 2.3655501037686477, "learning_rate": 1.686364533149076e-09, "loss": 0.2254, "step": 56070 }, { "epoch": 0.9746562603208816, "grad_norm": 1.1248635763557, "learning_rate": 1.6840553472572406e-09, "loss": 0.1652, "step": 56071 }, { "epoch": 0.9746736428583844, "grad_norm": 1.4287891024241541, "learning_rate": 1.6817477407981006e-09, "loss": 0.1728, "step": 56072 }, { "epoch": 0.9746910253958873, "grad_norm": 1.5360730178436137, "learning_rate": 1.6794417137788176e-09, "loss": 0.157, "step": 56073 }, { "epoch": 0.9747084079333901, "grad_norm": 1.2310419967803625, "learning_rate": 1.6771372662068294e-09, "loss": 0.1753, "step": 56074 }, { "epoch": 0.9747257904708929, "grad_norm": 1.3273496120903239, "learning_rate": 1.6748343980894087e-09, "loss": 0.1711, "step": 56075 }, { "epoch": 0.9747431730083957, "grad_norm": 1.7850799355051976, "learning_rate": 1.672533109433827e-09, "loss": 0.2397, "step": 56076 }, { "epoch": 0.9747605555458986, "grad_norm": 1.4645519643460376, "learning_rate": 1.6702334002474116e-09, "loss": 0.1623, "step": 56077 }, { "epoch": 0.9747779380834014, "grad_norm": 1.0840915625147143, "learning_rate": 1.667935270537435e-09, "loss": 0.178, "step": 56078 }, { "epoch": 0.9747953206209042, "grad_norm": 1.8691454886300893, "learning_rate": 1.6656387203112244e-09, "loss": 0.2702, "step": 56079 }, { "epoch": 0.9748127031584071, "grad_norm": 1.3111326017047493, "learning_rate": 1.663343749575996e-09, "loss": 0.1651, "step": 56080 }, { "epoch": 0.9748300856959099, "grad_norm": 1.4846252537498845, "learning_rate": 1.661050358339078e-09, "loss": 0.176, "step": 56081 }, { "epoch": 0.9748474682334127, "grad_norm": 1.556824560646223, "learning_rate": 1.658758546607686e-09, "loss": 0.1417, "step": 56082 }, { "epoch": 0.9748648507709156, "grad_norm": 1.7092994360789862, "learning_rate": 1.6564683143890924e-09, "loss": 0.2564, "step": 56083 }, { "epoch": 0.9748822333084184, "grad_norm": 0.8414300497685839, "learning_rate": 1.6541796616906245e-09, "loss": 0.1534, "step": 56084 }, { "epoch": 0.9748996158459212, "grad_norm": 1.0575473156409807, "learning_rate": 1.6518925885194434e-09, "loss": 0.1264, "step": 56085 }, { "epoch": 0.974916998383424, "grad_norm": 0.9808004672710198, "learning_rate": 1.6496070948828766e-09, "loss": 0.1556, "step": 56086 }, { "epoch": 0.9749343809209269, "grad_norm": 1.5125350531544186, "learning_rate": 1.647323180788085e-09, "loss": 0.1841, "step": 56087 }, { "epoch": 0.9749517634584297, "grad_norm": 1.8909996056830218, "learning_rate": 1.645040846242396e-09, "loss": 0.3118, "step": 56088 }, { "epoch": 0.9749691459959325, "grad_norm": 1.948538090004374, "learning_rate": 1.6427600912529704e-09, "loss": 0.1754, "step": 56089 }, { "epoch": 0.9749865285334353, "grad_norm": 0.9626702105254911, "learning_rate": 1.6404809158270805e-09, "loss": 0.14, "step": 56090 }, { "epoch": 0.9750039110709381, "grad_norm": 1.2041241181452476, "learning_rate": 1.638203319971998e-09, "loss": 0.1261, "step": 56091 }, { "epoch": 0.9750212936084409, "grad_norm": 1.401966821028938, "learning_rate": 1.6359273036948284e-09, "loss": 0.1831, "step": 56092 }, { "epoch": 0.9750386761459438, "grad_norm": 1.686036707174955, "learning_rate": 1.6336528670027882e-09, "loss": 0.1199, "step": 56093 }, { "epoch": 0.9750560586834466, "grad_norm": 1.7773917113149234, "learning_rate": 1.6313800099032048e-09, "loss": 0.1822, "step": 56094 }, { "epoch": 0.9750734412209494, "grad_norm": 1.8588314168161595, "learning_rate": 1.6291087324031837e-09, "loss": 0.1733, "step": 56095 }, { "epoch": 0.9750908237584522, "grad_norm": 1.1599508090825832, "learning_rate": 1.626839034509997e-09, "loss": 0.1192, "step": 56096 }, { "epoch": 0.9751082062959551, "grad_norm": 2.389180145554047, "learning_rate": 1.6245709162307498e-09, "loss": 0.179, "step": 56097 }, { "epoch": 0.9751255888334579, "grad_norm": 1.345857113443482, "learning_rate": 1.6223043775727141e-09, "loss": 0.2065, "step": 56098 }, { "epoch": 0.9751429713709607, "grad_norm": 1.8153157153189459, "learning_rate": 1.6200394185429956e-09, "loss": 0.2735, "step": 56099 }, { "epoch": 0.9751603539084636, "grad_norm": 1.6186466993042492, "learning_rate": 1.617776039148866e-09, "loss": 0.1285, "step": 56100 }, { "epoch": 0.9751777364459664, "grad_norm": 1.66187376590523, "learning_rate": 1.6155142393973754e-09, "loss": 0.1519, "step": 56101 }, { "epoch": 0.9751951189834692, "grad_norm": 1.717325439895856, "learning_rate": 1.613254019295851e-09, "loss": 0.1829, "step": 56102 }, { "epoch": 0.975212501520972, "grad_norm": 2.442964435351668, "learning_rate": 1.6109953788512875e-09, "loss": 0.2588, "step": 56103 }, { "epoch": 0.9752298840584749, "grad_norm": 1.8213346655857832, "learning_rate": 1.6087383180710123e-09, "loss": 0.3331, "step": 56104 }, { "epoch": 0.9752472665959777, "grad_norm": 3.126335017255937, "learning_rate": 1.6064828369620198e-09, "loss": 0.1229, "step": 56105 }, { "epoch": 0.9752646491334805, "grad_norm": 1.8040197677733818, "learning_rate": 1.6042289355315819e-09, "loss": 0.1104, "step": 56106 }, { "epoch": 0.9752820316709834, "grad_norm": 0.7816915692578172, "learning_rate": 1.601976613786804e-09, "loss": 0.1605, "step": 56107 }, { "epoch": 0.9752994142084862, "grad_norm": 3.0554965952763706, "learning_rate": 1.5997258717347917e-09, "loss": 0.1456, "step": 56108 }, { "epoch": 0.975316796745989, "grad_norm": 1.0609711400818123, "learning_rate": 1.5974767093826503e-09, "loss": 0.1984, "step": 56109 }, { "epoch": 0.9753341792834918, "grad_norm": 1.9517731109622998, "learning_rate": 1.5952291267376517e-09, "loss": 0.1934, "step": 56110 }, { "epoch": 0.9753515618209946, "grad_norm": 1.53559917024216, "learning_rate": 1.5929831238067903e-09, "loss": 0.1323, "step": 56111 }, { "epoch": 0.9753689443584974, "grad_norm": 1.0407828348614057, "learning_rate": 1.5907387005972273e-09, "loss": 0.1053, "step": 56112 }, { "epoch": 0.9753863268960002, "grad_norm": 1.4046741650852295, "learning_rate": 1.5884958571160679e-09, "loss": 0.15, "step": 56113 }, { "epoch": 0.9754037094335031, "grad_norm": 2.507237623058916, "learning_rate": 1.5862545933704175e-09, "loss": 0.2197, "step": 56114 }, { "epoch": 0.9754210919710059, "grad_norm": 1.293037293622493, "learning_rate": 1.5840149093673815e-09, "loss": 0.1273, "step": 56115 }, { "epoch": 0.9754384745085087, "grad_norm": 0.7548654800125553, "learning_rate": 1.5817768051140655e-09, "loss": 0.1413, "step": 56116 }, { "epoch": 0.9754558570460116, "grad_norm": 1.017489196707305, "learning_rate": 1.579540280617575e-09, "loss": 0.1831, "step": 56117 }, { "epoch": 0.9754732395835144, "grad_norm": 2.2086686510999933, "learning_rate": 1.5773053358850152e-09, "loss": 0.1819, "step": 56118 }, { "epoch": 0.9754906221210172, "grad_norm": 1.4924523025150094, "learning_rate": 1.5750719709233806e-09, "loss": 0.1344, "step": 56119 }, { "epoch": 0.9755080046585201, "grad_norm": 1.4943097262840823, "learning_rate": 1.5728401857398322e-09, "loss": 0.1761, "step": 56120 }, { "epoch": 0.9755253871960229, "grad_norm": 1.4186112732690268, "learning_rate": 1.5706099803414198e-09, "loss": 0.1976, "step": 56121 }, { "epoch": 0.9755427697335257, "grad_norm": 1.3263736748767763, "learning_rate": 1.5683813547352487e-09, "loss": 0.2032, "step": 56122 }, { "epoch": 0.9755601522710285, "grad_norm": 0.8672833691884336, "learning_rate": 1.5661543089283137e-09, "loss": 0.1106, "step": 56123 }, { "epoch": 0.9755775348085314, "grad_norm": 1.4278314050103544, "learning_rate": 1.5639288429277197e-09, "loss": 0.2222, "step": 56124 }, { "epoch": 0.9755949173460342, "grad_norm": 1.6252646133464685, "learning_rate": 1.5617049567405172e-09, "loss": 0.1642, "step": 56125 }, { "epoch": 0.975612299883537, "grad_norm": 0.9232807331581447, "learning_rate": 1.5594826503737002e-09, "loss": 0.2458, "step": 56126 }, { "epoch": 0.9756296824210399, "grad_norm": 0.8524354299200699, "learning_rate": 1.5572619238344298e-09, "loss": 0.1331, "step": 56127 }, { "epoch": 0.9756470649585427, "grad_norm": 1.1576249076467038, "learning_rate": 1.5550427771295894e-09, "loss": 0.1877, "step": 56128 }, { "epoch": 0.9756644474960455, "grad_norm": 0.9382770580673018, "learning_rate": 1.5528252102663952e-09, "loss": 0.1226, "step": 56129 }, { "epoch": 0.9756818300335482, "grad_norm": 1.6119798484536156, "learning_rate": 1.5506092232517309e-09, "loss": 0.1818, "step": 56130 }, { "epoch": 0.9756992125710511, "grad_norm": 1.4715858354867672, "learning_rate": 1.5483948160926464e-09, "loss": 0.204, "step": 56131 }, { "epoch": 0.9757165951085539, "grad_norm": 1.0881311853440134, "learning_rate": 1.5461819887961913e-09, "loss": 0.2206, "step": 56132 }, { "epoch": 0.9757339776460567, "grad_norm": 0.7892783777621548, "learning_rate": 1.5439707413693603e-09, "loss": 0.1496, "step": 56133 }, { "epoch": 0.9757513601835596, "grad_norm": 1.3803879560200016, "learning_rate": 1.5417610738191478e-09, "loss": 0.1283, "step": 56134 }, { "epoch": 0.9757687427210624, "grad_norm": 2.634644273830949, "learning_rate": 1.5395529861526591e-09, "loss": 0.1682, "step": 56135 }, { "epoch": 0.9757861252585652, "grad_norm": 2.386826178994366, "learning_rate": 1.5373464783767775e-09, "loss": 0.1886, "step": 56136 }, { "epoch": 0.9758035077960681, "grad_norm": 1.445859228912602, "learning_rate": 1.5351415504984977e-09, "loss": 0.1954, "step": 56137 }, { "epoch": 0.9758208903335709, "grad_norm": 1.173012344360357, "learning_rate": 1.5329382025248695e-09, "loss": 0.1063, "step": 56138 }, { "epoch": 0.9758382728710737, "grad_norm": 0.7371653381427039, "learning_rate": 1.5307364344628316e-09, "loss": 0.1893, "step": 56139 }, { "epoch": 0.9758556554085765, "grad_norm": 2.0412226728269363, "learning_rate": 1.5285362463193784e-09, "loss": 0.1703, "step": 56140 }, { "epoch": 0.9758730379460794, "grad_norm": 1.2436648225823534, "learning_rate": 1.5263376381015046e-09, "loss": 0.104, "step": 56141 }, { "epoch": 0.9758904204835822, "grad_norm": 3.2126282743745405, "learning_rate": 1.5241406098162047e-09, "loss": 0.1989, "step": 56142 }, { "epoch": 0.975907803021085, "grad_norm": 2.695905163384775, "learning_rate": 1.5219451614703616e-09, "loss": 0.2679, "step": 56143 }, { "epoch": 0.9759251855585879, "grad_norm": 1.547922260253959, "learning_rate": 1.5197512930709699e-09, "loss": 0.164, "step": 56144 }, { "epoch": 0.9759425680960907, "grad_norm": 1.3136875549096556, "learning_rate": 1.5175590046249686e-09, "loss": 0.1533, "step": 56145 }, { "epoch": 0.9759599506335935, "grad_norm": 2.3426831178147753, "learning_rate": 1.5153682961392966e-09, "loss": 0.1098, "step": 56146 }, { "epoch": 0.9759773331710964, "grad_norm": 1.4408206242703012, "learning_rate": 1.5131791676209483e-09, "loss": 0.1517, "step": 56147 }, { "epoch": 0.9759947157085992, "grad_norm": 1.1315690547324164, "learning_rate": 1.5109916190768622e-09, "loss": 0.1528, "step": 56148 }, { "epoch": 0.9760120982461019, "grad_norm": 1.2334524464850751, "learning_rate": 1.5088056505139223e-09, "loss": 0.0756, "step": 56149 }, { "epoch": 0.9760294807836047, "grad_norm": 2.1159651085969426, "learning_rate": 1.5066212619391228e-09, "loss": 0.2204, "step": 56150 }, { "epoch": 0.9760468633211076, "grad_norm": 1.4967718367908285, "learning_rate": 1.5044384533592913e-09, "loss": 0.1281, "step": 56151 }, { "epoch": 0.9760642458586104, "grad_norm": 1.256185895940761, "learning_rate": 1.502257224781367e-09, "loss": 0.1092, "step": 56152 }, { "epoch": 0.9760816283961132, "grad_norm": 1.0565292801023445, "learning_rate": 1.5000775762123441e-09, "loss": 0.1003, "step": 56153 }, { "epoch": 0.9760990109336161, "grad_norm": 0.7900412562886778, "learning_rate": 1.497899507659106e-09, "loss": 0.1509, "step": 56154 }, { "epoch": 0.9761163934711189, "grad_norm": 1.3602520393627706, "learning_rate": 1.4957230191285364e-09, "loss": 0.1758, "step": 56155 }, { "epoch": 0.9761337760086217, "grad_norm": 0.7949935804394453, "learning_rate": 1.4935481106274627e-09, "loss": 0.1553, "step": 56156 }, { "epoch": 0.9761511585461246, "grad_norm": 1.1012253237357288, "learning_rate": 1.4913747821629352e-09, "loss": 0.2556, "step": 56157 }, { "epoch": 0.9761685410836274, "grad_norm": 1.3583189960772828, "learning_rate": 1.4892030337416705e-09, "loss": 0.2032, "step": 56158 }, { "epoch": 0.9761859236211302, "grad_norm": 2.654237462005474, "learning_rate": 1.4870328653706632e-09, "loss": 0.1466, "step": 56159 }, { "epoch": 0.976203306158633, "grad_norm": 1.7667339373333046, "learning_rate": 1.4848642770567965e-09, "loss": 0.1331, "step": 56160 }, { "epoch": 0.9762206886961359, "grad_norm": 1.5281648445074882, "learning_rate": 1.4826972688068984e-09, "loss": 0.1099, "step": 56161 }, { "epoch": 0.9762380712336387, "grad_norm": 0.98824132949013, "learning_rate": 1.4805318406278522e-09, "loss": 0.1588, "step": 56162 }, { "epoch": 0.9762554537711415, "grad_norm": 0.7638885756388833, "learning_rate": 1.4783679925264857e-09, "loss": 0.1293, "step": 56163 }, { "epoch": 0.9762728363086444, "grad_norm": 1.4088145744930727, "learning_rate": 1.4762057245096826e-09, "loss": 0.0899, "step": 56164 }, { "epoch": 0.9762902188461472, "grad_norm": 1.712515094661868, "learning_rate": 1.474045036584326e-09, "loss": 0.1701, "step": 56165 }, { "epoch": 0.97630760138365, "grad_norm": 1.2632503440518854, "learning_rate": 1.4718859287572439e-09, "loss": 0.2004, "step": 56166 }, { "epoch": 0.9763249839211529, "grad_norm": 1.6754024793328284, "learning_rate": 1.4697284010353195e-09, "loss": 0.2001, "step": 56167 }, { "epoch": 0.9763423664586557, "grad_norm": 1.7151476211286663, "learning_rate": 1.4675724534252698e-09, "loss": 0.1045, "step": 56168 }, { "epoch": 0.9763597489961584, "grad_norm": 1.156382130438527, "learning_rate": 1.4654180859340893e-09, "loss": 0.1848, "step": 56169 }, { "epoch": 0.9763771315336612, "grad_norm": 1.2900061006150008, "learning_rate": 1.463265298568439e-09, "loss": 0.1231, "step": 56170 }, { "epoch": 0.9763945140711641, "grad_norm": 1.910292390171075, "learning_rate": 1.4611140913353137e-09, "loss": 0.1807, "step": 56171 }, { "epoch": 0.9764118966086669, "grad_norm": 1.5786089153458325, "learning_rate": 1.4589644642413745e-09, "loss": 0.2138, "step": 56172 }, { "epoch": 0.9764292791461697, "grad_norm": 1.7230785558164068, "learning_rate": 1.4568164172935605e-09, "loss": 0.1822, "step": 56173 }, { "epoch": 0.9764466616836726, "grad_norm": 1.259735761268758, "learning_rate": 1.4546699504985882e-09, "loss": 0.1732, "step": 56174 }, { "epoch": 0.9764640442211754, "grad_norm": 1.587075779411792, "learning_rate": 1.4525250638633413e-09, "loss": 0.112, "step": 56175 }, { "epoch": 0.9764814267586782, "grad_norm": 1.508754736583749, "learning_rate": 1.4503817573945365e-09, "loss": 0.1269, "step": 56176 }, { "epoch": 0.976498809296181, "grad_norm": 1.2920021530278807, "learning_rate": 1.4482400310990572e-09, "loss": 0.1173, "step": 56177 }, { "epoch": 0.9765161918336839, "grad_norm": 1.4368957334684966, "learning_rate": 1.4460998849835648e-09, "loss": 0.1246, "step": 56178 }, { "epoch": 0.9765335743711867, "grad_norm": 1.263650497473234, "learning_rate": 1.4439613190549982e-09, "loss": 0.1744, "step": 56179 }, { "epoch": 0.9765509569086895, "grad_norm": 1.911731623882538, "learning_rate": 1.4418243333200186e-09, "loss": 0.1723, "step": 56180 }, { "epoch": 0.9765683394461924, "grad_norm": 2.729439679556141, "learning_rate": 1.439688927785454e-09, "loss": 0.1907, "step": 56181 }, { "epoch": 0.9765857219836952, "grad_norm": 1.3292718083761477, "learning_rate": 1.4375551024580212e-09, "loss": 0.1999, "step": 56182 }, { "epoch": 0.976603104521198, "grad_norm": 1.0668672831278947, "learning_rate": 1.4354228573445481e-09, "loss": 0.1644, "step": 56183 }, { "epoch": 0.9766204870587009, "grad_norm": 3.4227607378130442, "learning_rate": 1.4332921924517515e-09, "loss": 0.1415, "step": 56184 }, { "epoch": 0.9766378695962037, "grad_norm": 1.2789423701455063, "learning_rate": 1.4311631077863485e-09, "loss": 0.1084, "step": 56185 }, { "epoch": 0.9766552521337065, "grad_norm": 3.1743406725889494, "learning_rate": 1.4290356033552219e-09, "loss": 0.1834, "step": 56186 }, { "epoch": 0.9766726346712093, "grad_norm": 1.0352837223432976, "learning_rate": 1.4269096791649781e-09, "loss": 0.1766, "step": 56187 }, { "epoch": 0.9766900172087122, "grad_norm": 0.9725482478882066, "learning_rate": 1.4247853352223894e-09, "loss": 0.1442, "step": 56188 }, { "epoch": 0.9767073997462149, "grad_norm": 2.5736548609282135, "learning_rate": 1.4226625715342277e-09, "loss": 0.1444, "step": 56189 }, { "epoch": 0.9767247822837177, "grad_norm": 1.3017360858847133, "learning_rate": 1.4205413881072103e-09, "loss": 0.1865, "step": 56190 }, { "epoch": 0.9767421648212206, "grad_norm": 0.8658708833658247, "learning_rate": 1.418421784948054e-09, "loss": 0.1785, "step": 56191 }, { "epoch": 0.9767595473587234, "grad_norm": 2.4788185575644373, "learning_rate": 1.4163037620634754e-09, "loss": 0.1671, "step": 56192 }, { "epoch": 0.9767769298962262, "grad_norm": 1.0304658314155875, "learning_rate": 1.4141873194601361e-09, "loss": 0.1861, "step": 56193 }, { "epoch": 0.976794312433729, "grad_norm": 1.5171823720536877, "learning_rate": 1.4120724571448083e-09, "loss": 0.0954, "step": 56194 }, { "epoch": 0.9768116949712319, "grad_norm": 1.455472193655157, "learning_rate": 1.4099591751242091e-09, "loss": 0.1557, "step": 56195 }, { "epoch": 0.9768290775087347, "grad_norm": 1.1798123258683624, "learning_rate": 1.407847473404944e-09, "loss": 0.1243, "step": 56196 }, { "epoch": 0.9768464600462375, "grad_norm": 2.661788926121822, "learning_rate": 1.4057373519938408e-09, "loss": 0.1887, "step": 56197 }, { "epoch": 0.9768638425837404, "grad_norm": 1.5133739035189415, "learning_rate": 1.4036288108974503e-09, "loss": 0.1422, "step": 56198 }, { "epoch": 0.9768812251212432, "grad_norm": 1.945964981207433, "learning_rate": 1.4015218501226e-09, "loss": 0.2154, "step": 56199 }, { "epoch": 0.976898607658746, "grad_norm": 1.1192326959014751, "learning_rate": 1.3994164696758404e-09, "loss": 0.1116, "step": 56200 }, { "epoch": 0.9769159901962489, "grad_norm": 1.8389516671765453, "learning_rate": 1.3973126695638327e-09, "loss": 0.3025, "step": 56201 }, { "epoch": 0.9769333727337517, "grad_norm": 1.9533072348921814, "learning_rate": 1.3952104497934047e-09, "loss": 0.1139, "step": 56202 }, { "epoch": 0.9769507552712545, "grad_norm": 1.5566917970497003, "learning_rate": 1.3931098103710514e-09, "loss": 0.171, "step": 56203 }, { "epoch": 0.9769681378087574, "grad_norm": 1.1260918699698175, "learning_rate": 1.3910107513035452e-09, "loss": 0.2127, "step": 56204 }, { "epoch": 0.9769855203462602, "grad_norm": 0.839215258679098, "learning_rate": 1.3889132725974363e-09, "loss": 0.1394, "step": 56205 }, { "epoch": 0.977002902883763, "grad_norm": 1.8533288851783003, "learning_rate": 1.386817374259497e-09, "loss": 0.1502, "step": 56206 }, { "epoch": 0.9770202854212658, "grad_norm": 1.2965579957589382, "learning_rate": 1.3847230562962774e-09, "loss": 0.1597, "step": 56207 }, { "epoch": 0.9770376679587687, "grad_norm": 1.6049931336067642, "learning_rate": 1.3826303187144395e-09, "loss": 0.1494, "step": 56208 }, { "epoch": 0.9770550504962714, "grad_norm": 1.2427971077798987, "learning_rate": 1.3805391615205886e-09, "loss": 0.1235, "step": 56209 }, { "epoch": 0.9770724330337742, "grad_norm": 0.9131675244618256, "learning_rate": 1.3784495847213862e-09, "loss": 0.1583, "step": 56210 }, { "epoch": 0.977089815571277, "grad_norm": 1.6072035135022353, "learning_rate": 1.3763615883234935e-09, "loss": 0.2229, "step": 56211 }, { "epoch": 0.9771071981087799, "grad_norm": 1.9103820701235126, "learning_rate": 1.3742751723334611e-09, "loss": 0.1477, "step": 56212 }, { "epoch": 0.9771245806462827, "grad_norm": 1.1503506614129881, "learning_rate": 1.3721903367578946e-09, "loss": 0.1612, "step": 56213 }, { "epoch": 0.9771419631837855, "grad_norm": 1.1872189668430797, "learning_rate": 1.3701070816035109e-09, "loss": 0.1434, "step": 56214 }, { "epoch": 0.9771593457212884, "grad_norm": 1.520232996108798, "learning_rate": 1.3680254068767493e-09, "loss": 0.1239, "step": 56215 }, { "epoch": 0.9771767282587912, "grad_norm": 1.4721291379776582, "learning_rate": 1.3659453125843823e-09, "loss": 0.1506, "step": 56216 }, { "epoch": 0.977194110796294, "grad_norm": 2.3385422141040784, "learning_rate": 1.363866798732849e-09, "loss": 0.273, "step": 56217 }, { "epoch": 0.9772114933337969, "grad_norm": 1.5095668286424466, "learning_rate": 1.3617898653288107e-09, "loss": 0.1827, "step": 56218 }, { "epoch": 0.9772288758712997, "grad_norm": 3.706030142928472, "learning_rate": 1.3597145123788733e-09, "loss": 0.2252, "step": 56219 }, { "epoch": 0.9772462584088025, "grad_norm": 1.080313981269839, "learning_rate": 1.3576407398895872e-09, "loss": 0.2346, "step": 56220 }, { "epoch": 0.9772636409463054, "grad_norm": 1.600520131570978, "learning_rate": 1.3555685478674472e-09, "loss": 0.1639, "step": 56221 }, { "epoch": 0.9772810234838082, "grad_norm": 1.049410568335833, "learning_rate": 1.35349793631917e-09, "loss": 0.1713, "step": 56222 }, { "epoch": 0.977298406021311, "grad_norm": 1.37070145103256, "learning_rate": 1.351428905251195e-09, "loss": 0.1126, "step": 56223 }, { "epoch": 0.9773157885588138, "grad_norm": 1.257568979537234, "learning_rate": 1.3493614546701836e-09, "loss": 0.1612, "step": 56224 }, { "epoch": 0.9773331710963167, "grad_norm": 0.6440346505029998, "learning_rate": 1.3472955845826307e-09, "loss": 0.1115, "step": 56225 }, { "epoch": 0.9773505536338195, "grad_norm": 1.4235751561493384, "learning_rate": 1.3452312949950307e-09, "loss": 0.1804, "step": 56226 }, { "epoch": 0.9773679361713223, "grad_norm": 0.8965061502690318, "learning_rate": 1.3431685859140452e-09, "loss": 0.2545, "step": 56227 }, { "epoch": 0.9773853187088252, "grad_norm": 1.4191510172611246, "learning_rate": 1.3411074573461134e-09, "loss": 0.1404, "step": 56228 }, { "epoch": 0.9774027012463279, "grad_norm": 0.957843533849018, "learning_rate": 1.3390479092978414e-09, "loss": 0.1076, "step": 56229 }, { "epoch": 0.9774200837838307, "grad_norm": 1.6656633843226354, "learning_rate": 1.3369899417757236e-09, "loss": 0.1724, "step": 56230 }, { "epoch": 0.9774374663213335, "grad_norm": 1.3766533717642087, "learning_rate": 1.334933554786255e-09, "loss": 0.1136, "step": 56231 }, { "epoch": 0.9774548488588364, "grad_norm": 1.8081588041271617, "learning_rate": 1.3328787483359304e-09, "loss": 0.1527, "step": 56232 }, { "epoch": 0.9774722313963392, "grad_norm": 1.2931434022932211, "learning_rate": 1.3308255224314113e-09, "loss": 0.1868, "step": 56233 }, { "epoch": 0.977489613933842, "grad_norm": 1.2558460136167677, "learning_rate": 1.3287738770790258e-09, "loss": 0.1342, "step": 56234 }, { "epoch": 0.9775069964713449, "grad_norm": 0.730268940399889, "learning_rate": 1.3267238122853796e-09, "loss": 0.1374, "step": 56235 }, { "epoch": 0.9775243790088477, "grad_norm": 1.0414907485416496, "learning_rate": 1.3246753280569677e-09, "loss": 0.1381, "step": 56236 }, { "epoch": 0.9775417615463505, "grad_norm": 1.124783087379745, "learning_rate": 1.3226284244002295e-09, "loss": 0.0918, "step": 56237 }, { "epoch": 0.9775591440838534, "grad_norm": 1.392518221927925, "learning_rate": 1.320583101321715e-09, "loss": 0.1884, "step": 56238 }, { "epoch": 0.9775765266213562, "grad_norm": 1.361819226281661, "learning_rate": 1.3185393588278637e-09, "loss": 0.1652, "step": 56239 }, { "epoch": 0.977593909158859, "grad_norm": 1.8374553795624138, "learning_rate": 1.3164971969251704e-09, "loss": 0.1855, "step": 56240 }, { "epoch": 0.9776112916963619, "grad_norm": 1.667979940991194, "learning_rate": 1.3144566156201298e-09, "loss": 0.152, "step": 56241 }, { "epoch": 0.9776286742338647, "grad_norm": 0.7621159584074129, "learning_rate": 1.3124176149191256e-09, "loss": 0.1383, "step": 56242 }, { "epoch": 0.9776460567713675, "grad_norm": 1.3983545402407964, "learning_rate": 1.310380194828764e-09, "loss": 0.0997, "step": 56243 }, { "epoch": 0.9776634393088703, "grad_norm": 1.0354447315658628, "learning_rate": 1.3083443553553174e-09, "loss": 0.113, "step": 56244 }, { "epoch": 0.9776808218463732, "grad_norm": 1.8416374191705613, "learning_rate": 1.306310096505392e-09, "loss": 0.1317, "step": 56245 }, { "epoch": 0.977698204383876, "grad_norm": 0.874920835018815, "learning_rate": 1.3042774182854265e-09, "loss": 0.1734, "step": 56246 }, { "epoch": 0.9777155869213788, "grad_norm": 1.0680645193684866, "learning_rate": 1.3022463207017497e-09, "loss": 0.1429, "step": 56247 }, { "epoch": 0.9777329694588817, "grad_norm": 1.6305908224471302, "learning_rate": 1.3002168037608564e-09, "loss": 0.0989, "step": 56248 }, { "epoch": 0.9777503519963844, "grad_norm": 0.7552891371242719, "learning_rate": 1.2981888674692965e-09, "loss": 0.1594, "step": 56249 }, { "epoch": 0.9777677345338872, "grad_norm": 1.3978937309701631, "learning_rate": 1.2961625118332874e-09, "loss": 0.1325, "step": 56250 }, { "epoch": 0.97778511707139, "grad_norm": 1.271920577006634, "learning_rate": 1.2941377368594352e-09, "loss": 0.1236, "step": 56251 }, { "epoch": 0.9778024996088929, "grad_norm": 1.151015489744074, "learning_rate": 1.2921145425540125e-09, "loss": 0.1705, "step": 56252 }, { "epoch": 0.9778198821463957, "grad_norm": 1.3435287806763148, "learning_rate": 1.290092928923514e-09, "loss": 0.1302, "step": 56253 }, { "epoch": 0.9778372646838985, "grad_norm": 1.6648986557320447, "learning_rate": 1.2880728959743237e-09, "loss": 0.1736, "step": 56254 }, { "epoch": 0.9778546472214014, "grad_norm": 1.2629401164132004, "learning_rate": 1.2860544437128807e-09, "loss": 0.1173, "step": 56255 }, { "epoch": 0.9778720297589042, "grad_norm": 1.8973922134698349, "learning_rate": 1.2840375721455688e-09, "loss": 0.1904, "step": 56256 }, { "epoch": 0.977889412296407, "grad_norm": 1.5242194941921485, "learning_rate": 1.2820222812787163e-09, "loss": 0.1804, "step": 56257 }, { "epoch": 0.9779067948339099, "grad_norm": 1.2535146163857336, "learning_rate": 1.280008571118818e-09, "loss": 0.1198, "step": 56258 }, { "epoch": 0.9779241773714127, "grad_norm": 1.6581677291425438, "learning_rate": 1.2779964416721466e-09, "loss": 0.1785, "step": 56259 }, { "epoch": 0.9779415599089155, "grad_norm": 0.8810489581950997, "learning_rate": 1.2759858929451418e-09, "loss": 0.1268, "step": 56260 }, { "epoch": 0.9779589424464183, "grad_norm": 1.4196419825455957, "learning_rate": 1.2739769249441866e-09, "loss": 0.1163, "step": 56261 }, { "epoch": 0.9779763249839212, "grad_norm": 2.099730574344733, "learning_rate": 1.2719695376756656e-09, "loss": 0.1999, "step": 56262 }, { "epoch": 0.977993707521424, "grad_norm": 1.3028791351386029, "learning_rate": 1.269963731145851e-09, "loss": 0.2082, "step": 56263 }, { "epoch": 0.9780110900589268, "grad_norm": 1.551941211685617, "learning_rate": 1.267959505361127e-09, "loss": 0.1191, "step": 56264 }, { "epoch": 0.9780284725964297, "grad_norm": 1.0734134049730915, "learning_rate": 1.2659568603279326e-09, "loss": 0.1198, "step": 56265 }, { "epoch": 0.9780458551339325, "grad_norm": 1.3361548237293663, "learning_rate": 1.2639557960525404e-09, "loss": 0.1961, "step": 56266 }, { "epoch": 0.9780632376714353, "grad_norm": 1.373832718807642, "learning_rate": 1.2619563125412791e-09, "loss": 0.16, "step": 56267 }, { "epoch": 0.9780806202089382, "grad_norm": 0.9266761188266874, "learning_rate": 1.2599584098005322e-09, "loss": 0.1165, "step": 56268 }, { "epoch": 0.9780980027464409, "grad_norm": 2.057096658280585, "learning_rate": 1.257962087836628e-09, "loss": 0.1714, "step": 56269 }, { "epoch": 0.9781153852839437, "grad_norm": 1.3049040262596514, "learning_rate": 1.255967346655895e-09, "loss": 0.2017, "step": 56270 }, { "epoch": 0.9781327678214465, "grad_norm": 1.128572072654924, "learning_rate": 1.2539741862646058e-09, "loss": 0.1974, "step": 56271 }, { "epoch": 0.9781501503589494, "grad_norm": 1.060337777251419, "learning_rate": 1.2519826066690885e-09, "loss": 0.0938, "step": 56272 }, { "epoch": 0.9781675328964522, "grad_norm": 0.9950371123861306, "learning_rate": 1.2499926078757272e-09, "loss": 0.194, "step": 56273 }, { "epoch": 0.978184915433955, "grad_norm": 0.9388926402289755, "learning_rate": 1.2480041898907945e-09, "loss": 0.1284, "step": 56274 }, { "epoch": 0.9782022979714579, "grad_norm": 1.5258904862504523, "learning_rate": 1.2460173527205631e-09, "loss": 0.1248, "step": 56275 }, { "epoch": 0.9782196805089607, "grad_norm": 2.204003186238252, "learning_rate": 1.2440320963713058e-09, "loss": 0.2147, "step": 56276 }, { "epoch": 0.9782370630464635, "grad_norm": 1.4040524066157143, "learning_rate": 1.2420484208494064e-09, "loss": 0.1453, "step": 56277 }, { "epoch": 0.9782544455839663, "grad_norm": 1.1796159610933439, "learning_rate": 1.240066326161082e-09, "loss": 0.1613, "step": 56278 }, { "epoch": 0.9782718281214692, "grad_norm": 1.5076168676249269, "learning_rate": 1.2380858123126615e-09, "loss": 0.098, "step": 56279 }, { "epoch": 0.978289210658972, "grad_norm": 1.9641249163903436, "learning_rate": 1.2361068793103613e-09, "loss": 0.1948, "step": 56280 }, { "epoch": 0.9783065931964748, "grad_norm": 0.7361784159677125, "learning_rate": 1.2341295271605102e-09, "loss": 0.18, "step": 56281 }, { "epoch": 0.9783239757339777, "grad_norm": 3.204344542661802, "learning_rate": 1.232153755869325e-09, "loss": 0.1644, "step": 56282 }, { "epoch": 0.9783413582714805, "grad_norm": 1.8127962787120049, "learning_rate": 1.2301795654431345e-09, "loss": 0.1501, "step": 56283 }, { "epoch": 0.9783587408089833, "grad_norm": 1.8345730011177168, "learning_rate": 1.2282069558881558e-09, "loss": 0.1591, "step": 56284 }, { "epoch": 0.9783761233464862, "grad_norm": 1.1013235902364378, "learning_rate": 1.2262359272106615e-09, "loss": 0.1631, "step": 56285 }, { "epoch": 0.978393505883989, "grad_norm": 1.6761254513538069, "learning_rate": 1.2242664794168135e-09, "loss": 0.1611, "step": 56286 }, { "epoch": 0.9784108884214918, "grad_norm": 1.3960174522885083, "learning_rate": 1.2222986125129952e-09, "loss": 0.1392, "step": 56287 }, { "epoch": 0.9784282709589945, "grad_norm": 1.507306665592849, "learning_rate": 1.2203323265053134e-09, "loss": 0.1908, "step": 56288 }, { "epoch": 0.9784456534964974, "grad_norm": 1.0606749636042783, "learning_rate": 1.218367621400096e-09, "loss": 0.1096, "step": 56289 }, { "epoch": 0.9784630360340002, "grad_norm": 1.8984492627933196, "learning_rate": 1.2164044972035048e-09, "loss": 0.1488, "step": 56290 }, { "epoch": 0.978480418571503, "grad_norm": 0.9524031118586312, "learning_rate": 1.2144429539218127e-09, "loss": 0.1371, "step": 56291 }, { "epoch": 0.9784978011090059, "grad_norm": 1.2157580990059091, "learning_rate": 1.2124829915611812e-09, "loss": 0.2009, "step": 56292 }, { "epoch": 0.9785151836465087, "grad_norm": 1.5738273221091306, "learning_rate": 1.2105246101278833e-09, "loss": 0.1577, "step": 56293 }, { "epoch": 0.9785325661840115, "grad_norm": 1.2649870239120444, "learning_rate": 1.2085678096280804e-09, "loss": 0.1799, "step": 56294 }, { "epoch": 0.9785499487215144, "grad_norm": 0.9820974121992889, "learning_rate": 1.2066125900679903e-09, "loss": 0.1296, "step": 56295 }, { "epoch": 0.9785673312590172, "grad_norm": 1.4353392076482179, "learning_rate": 1.2046589514538296e-09, "loss": 0.1624, "step": 56296 }, { "epoch": 0.97858471379652, "grad_norm": 0.9597275255536807, "learning_rate": 1.2027068937917606e-09, "loss": 0.1303, "step": 56297 }, { "epoch": 0.9786020963340228, "grad_norm": 1.759465911994354, "learning_rate": 1.2007564170880003e-09, "loss": 0.2263, "step": 56298 }, { "epoch": 0.9786194788715257, "grad_norm": 0.8142662532255429, "learning_rate": 1.1988075213487104e-09, "loss": 0.0979, "step": 56299 }, { "epoch": 0.9786368614090285, "grad_norm": 1.4656716663045148, "learning_rate": 1.1968602065800526e-09, "loss": 0.1697, "step": 56300 }, { "epoch": 0.9786542439465313, "grad_norm": 1.5625795297997065, "learning_rate": 1.1949144727882443e-09, "loss": 0.1853, "step": 56301 }, { "epoch": 0.9786716264840342, "grad_norm": 1.7674942330845214, "learning_rate": 1.1929703199793917e-09, "loss": 0.3054, "step": 56302 }, { "epoch": 0.978689009021537, "grad_norm": 1.4629203698749282, "learning_rate": 1.1910277481596564e-09, "loss": 0.1634, "step": 56303 }, { "epoch": 0.9787063915590398, "grad_norm": 1.9329156049046874, "learning_rate": 1.1890867573353114e-09, "loss": 0.1419, "step": 56304 }, { "epoch": 0.9787237740965427, "grad_norm": 1.4153165600113324, "learning_rate": 1.1871473475123517e-09, "loss": 0.1417, "step": 56305 }, { "epoch": 0.9787411566340455, "grad_norm": 0.8144475634973543, "learning_rate": 1.1852095186970501e-09, "loss": 0.1153, "step": 56306 }, { "epoch": 0.9787585391715483, "grad_norm": 1.8109281428502493, "learning_rate": 1.1832732708955128e-09, "loss": 0.1361, "step": 56307 }, { "epoch": 0.978775921709051, "grad_norm": 1.1672858449429584, "learning_rate": 1.1813386041137908e-09, "loss": 0.181, "step": 56308 }, { "epoch": 0.9787933042465539, "grad_norm": 3.1717661438276883, "learning_rate": 1.1794055183581564e-09, "loss": 0.1945, "step": 56309 }, { "epoch": 0.9788106867840567, "grad_norm": 2.108607627839611, "learning_rate": 1.177474013634605e-09, "loss": 0.214, "step": 56310 }, { "epoch": 0.9788280693215595, "grad_norm": 0.9836878458575177, "learning_rate": 1.175544089949354e-09, "loss": 0.1713, "step": 56311 }, { "epoch": 0.9788454518590624, "grad_norm": 1.4315127741184006, "learning_rate": 1.1736157473085096e-09, "loss": 0.1375, "step": 56312 }, { "epoch": 0.9788628343965652, "grad_norm": 1.2973695563530363, "learning_rate": 1.1716889857181223e-09, "loss": 0.1239, "step": 56313 }, { "epoch": 0.978880216934068, "grad_norm": 0.9972534476447472, "learning_rate": 1.169763805184354e-09, "loss": 0.1353, "step": 56314 }, { "epoch": 0.9788975994715708, "grad_norm": 2.390906391760644, "learning_rate": 1.1678402057133107e-09, "loss": 0.2654, "step": 56315 }, { "epoch": 0.9789149820090737, "grad_norm": 2.267263536420716, "learning_rate": 1.1659181873110435e-09, "loss": 0.1958, "step": 56316 }, { "epoch": 0.9789323645465765, "grad_norm": 0.8458797716953902, "learning_rate": 1.1639977499836584e-09, "loss": 0.0976, "step": 56317 }, { "epoch": 0.9789497470840793, "grad_norm": 2.991537194862429, "learning_rate": 1.1620788937372616e-09, "loss": 0.2521, "step": 56318 }, { "epoch": 0.9789671296215822, "grad_norm": 1.8600532702473218, "learning_rate": 1.1601616185779595e-09, "loss": 0.19, "step": 56319 }, { "epoch": 0.978984512159085, "grad_norm": 2.3215575133196897, "learning_rate": 1.1582459245118025e-09, "loss": 0.2111, "step": 56320 }, { "epoch": 0.9790018946965878, "grad_norm": 2.037797833722866, "learning_rate": 1.1563318115447861e-09, "loss": 0.187, "step": 56321 }, { "epoch": 0.9790192772340907, "grad_norm": 1.0964007551638868, "learning_rate": 1.154419279683072e-09, "loss": 0.1917, "step": 56322 }, { "epoch": 0.9790366597715935, "grad_norm": 1.829741453418478, "learning_rate": 1.1525083289327664e-09, "loss": 0.1172, "step": 56323 }, { "epoch": 0.9790540423090963, "grad_norm": 1.1510529384697963, "learning_rate": 1.1505989592998088e-09, "loss": 0.2094, "step": 56324 }, { "epoch": 0.9790714248465991, "grad_norm": 1.3936764950846345, "learning_rate": 1.1486911707902502e-09, "loss": 0.1244, "step": 56325 }, { "epoch": 0.979088807384102, "grad_norm": 1.0412653915391639, "learning_rate": 1.1467849634102523e-09, "loss": 0.1168, "step": 56326 }, { "epoch": 0.9791061899216048, "grad_norm": 1.2667859822556973, "learning_rate": 1.14488033716581e-09, "loss": 0.2052, "step": 56327 }, { "epoch": 0.9791235724591075, "grad_norm": 1.0382525670569012, "learning_rate": 1.1429772920628634e-09, "loss": 0.1176, "step": 56328 }, { "epoch": 0.9791409549966104, "grad_norm": 0.9268484089075864, "learning_rate": 1.141075828107574e-09, "loss": 0.1329, "step": 56329 }, { "epoch": 0.9791583375341132, "grad_norm": 1.7985654359051988, "learning_rate": 1.1391759453058813e-09, "loss": 0.175, "step": 56330 }, { "epoch": 0.979175720071616, "grad_norm": 1.1029210808778915, "learning_rate": 1.137277643663892e-09, "loss": 0.1489, "step": 56331 }, { "epoch": 0.9791931026091188, "grad_norm": 1.432314363113359, "learning_rate": 1.1353809231875455e-09, "loss": 0.1772, "step": 56332 }, { "epoch": 0.9792104851466217, "grad_norm": 0.8697449395196611, "learning_rate": 1.1334857838828927e-09, "loss": 0.1658, "step": 56333 }, { "epoch": 0.9792278676841245, "grad_norm": 1.462306951600272, "learning_rate": 1.1315922257559284e-09, "loss": 0.1741, "step": 56334 }, { "epoch": 0.9792452502216273, "grad_norm": 1.0539483887700292, "learning_rate": 1.1297002488126484e-09, "loss": 0.1454, "step": 56335 }, { "epoch": 0.9792626327591302, "grad_norm": 1.1028861501124732, "learning_rate": 1.1278098530590475e-09, "loss": 0.1675, "step": 56336 }, { "epoch": 0.979280015296633, "grad_norm": 0.7031102030938446, "learning_rate": 1.1259210385011209e-09, "loss": 0.1892, "step": 56337 }, { "epoch": 0.9792973978341358, "grad_norm": 1.9512157503961365, "learning_rate": 1.1240338051449193e-09, "loss": 0.1735, "step": 56338 }, { "epoch": 0.9793147803716387, "grad_norm": 1.0732619666734093, "learning_rate": 1.1221481529963273e-09, "loss": 0.1009, "step": 56339 }, { "epoch": 0.9793321629091415, "grad_norm": 0.7947341963893444, "learning_rate": 1.1202640820613397e-09, "loss": 0.1283, "step": 56340 }, { "epoch": 0.9793495454466443, "grad_norm": 1.1374347786673045, "learning_rate": 1.1183815923459517e-09, "loss": 0.2588, "step": 56341 }, { "epoch": 0.9793669279841472, "grad_norm": 1.0534444933420704, "learning_rate": 1.1165006838561586e-09, "loss": 0.1661, "step": 56342 }, { "epoch": 0.97938431052165, "grad_norm": 1.4731709534623652, "learning_rate": 1.1146213565979001e-09, "loss": 0.1934, "step": 56343 }, { "epoch": 0.9794016930591528, "grad_norm": 1.0905990093677318, "learning_rate": 1.1127436105770604e-09, "loss": 0.1252, "step": 56344 }, { "epoch": 0.9794190755966556, "grad_norm": 2.939458601377794, "learning_rate": 1.1108674457996903e-09, "loss": 0.1821, "step": 56345 }, { "epoch": 0.9794364581341585, "grad_norm": 1.1102653187180274, "learning_rate": 1.1089928622717294e-09, "loss": 0.1689, "step": 56346 }, { "epoch": 0.9794538406716613, "grad_norm": 1.9051550092200318, "learning_rate": 1.1071198599990616e-09, "loss": 0.2272, "step": 56347 }, { "epoch": 0.979471223209164, "grad_norm": 1.9281358340652828, "learning_rate": 1.105248438987627e-09, "loss": 0.2227, "step": 56348 }, { "epoch": 0.9794886057466669, "grad_norm": 1.0314089030283955, "learning_rate": 1.1033785992433652e-09, "loss": 0.1512, "step": 56349 }, { "epoch": 0.9795059882841697, "grad_norm": 1.4148665434544105, "learning_rate": 1.1015103407722715e-09, "loss": 0.1686, "step": 56350 }, { "epoch": 0.9795233708216725, "grad_norm": 1.0604217485398397, "learning_rate": 1.0996436635802297e-09, "loss": 0.1216, "step": 56351 }, { "epoch": 0.9795407533591753, "grad_norm": 0.8737567705810273, "learning_rate": 1.097778567673069e-09, "loss": 0.1211, "step": 56352 }, { "epoch": 0.9795581358966782, "grad_norm": 1.0338257913509177, "learning_rate": 1.0959150530568395e-09, "loss": 0.1151, "step": 56353 }, { "epoch": 0.979575518434181, "grad_norm": 0.8091381739401831, "learning_rate": 1.0940531197373704e-09, "loss": 0.1471, "step": 56354 }, { "epoch": 0.9795929009716838, "grad_norm": 1.1506035753116834, "learning_rate": 1.0921927677205455e-09, "loss": 0.1106, "step": 56355 }, { "epoch": 0.9796102835091867, "grad_norm": 1.4547339129696821, "learning_rate": 1.0903339970123048e-09, "loss": 0.142, "step": 56356 }, { "epoch": 0.9796276660466895, "grad_norm": 1.2092576085806428, "learning_rate": 1.0884768076185325e-09, "loss": 0.2287, "step": 56357 }, { "epoch": 0.9796450485841923, "grad_norm": 1.0850384268071058, "learning_rate": 1.086621199545057e-09, "loss": 0.1849, "step": 56358 }, { "epoch": 0.9796624311216952, "grad_norm": 1.0858570191030852, "learning_rate": 1.0847671727978735e-09, "loss": 0.2059, "step": 56359 }, { "epoch": 0.979679813659198, "grad_norm": 1.3987098823828434, "learning_rate": 1.0829147273827554e-09, "loss": 0.1424, "step": 56360 }, { "epoch": 0.9796971961967008, "grad_norm": 1.0029961447666706, "learning_rate": 1.0810638633056425e-09, "loss": 0.1644, "step": 56361 }, { "epoch": 0.9797145787342036, "grad_norm": 0.9897353436794939, "learning_rate": 1.0792145805723628e-09, "loss": 0.1111, "step": 56362 }, { "epoch": 0.9797319612717065, "grad_norm": 1.2552467531055567, "learning_rate": 1.0773668791888013e-09, "loss": 0.2138, "step": 56363 }, { "epoch": 0.9797493438092093, "grad_norm": 1.5688943166395481, "learning_rate": 1.0755207591607863e-09, "loss": 0.1719, "step": 56364 }, { "epoch": 0.9797667263467121, "grad_norm": 1.6349509142108276, "learning_rate": 1.0736762204942018e-09, "loss": 0.1779, "step": 56365 }, { "epoch": 0.979784108884215, "grad_norm": 1.3812035758448342, "learning_rate": 1.0718332631948212e-09, "loss": 0.1363, "step": 56366 }, { "epoch": 0.9798014914217178, "grad_norm": 1.7321947141416445, "learning_rate": 1.0699918872685842e-09, "loss": 0.0954, "step": 56367 }, { "epoch": 0.9798188739592205, "grad_norm": 1.3981224262816225, "learning_rate": 1.068152092721264e-09, "loss": 0.2189, "step": 56368 }, { "epoch": 0.9798362564967233, "grad_norm": 0.8155315820268585, "learning_rate": 1.0663138795587446e-09, "loss": 0.1158, "step": 56369 }, { "epoch": 0.9798536390342262, "grad_norm": 1.4009283445596195, "learning_rate": 1.064477247786799e-09, "loss": 0.1442, "step": 56370 }, { "epoch": 0.979871021571729, "grad_norm": 1.0826101710993907, "learning_rate": 1.0626421974112564e-09, "loss": 0.0846, "step": 56371 }, { "epoch": 0.9798884041092318, "grad_norm": 1.4835370074274106, "learning_rate": 1.0608087284379452e-09, "loss": 0.1444, "step": 56372 }, { "epoch": 0.9799057866467347, "grad_norm": 1.7803894317404583, "learning_rate": 1.058976840872694e-09, "loss": 0.1354, "step": 56373 }, { "epoch": 0.9799231691842375, "grad_norm": 0.9493680828808125, "learning_rate": 1.057146534721276e-09, "loss": 0.1477, "step": 56374 }, { "epoch": 0.9799405517217403, "grad_norm": 1.754966771046967, "learning_rate": 1.0553178099895198e-09, "loss": 0.1552, "step": 56375 }, { "epoch": 0.9799579342592432, "grad_norm": 1.0441406969249598, "learning_rate": 1.0534906666831988e-09, "loss": 0.1044, "step": 56376 }, { "epoch": 0.979975316796746, "grad_norm": 1.1903215206427324, "learning_rate": 1.0516651048081416e-09, "loss": 0.1385, "step": 56377 }, { "epoch": 0.9799926993342488, "grad_norm": 1.2071370349650083, "learning_rate": 1.0498411243700655e-09, "loss": 0.1196, "step": 56378 }, { "epoch": 0.9800100818717516, "grad_norm": 1.6754059001020731, "learning_rate": 1.0480187253747995e-09, "loss": 0.2005, "step": 56379 }, { "epoch": 0.9800274644092545, "grad_norm": 1.5101709581122438, "learning_rate": 1.0461979078281723e-09, "loss": 0.0964, "step": 56380 }, { "epoch": 0.9800448469467573, "grad_norm": 1.8816048027611156, "learning_rate": 1.044378671735846e-09, "loss": 0.1782, "step": 56381 }, { "epoch": 0.9800622294842601, "grad_norm": 1.2517471751782663, "learning_rate": 1.0425610171036492e-09, "loss": 0.1236, "step": 56382 }, { "epoch": 0.980079612021763, "grad_norm": 2.034813005735635, "learning_rate": 1.0407449439372994e-09, "loss": 0.1912, "step": 56383 }, { "epoch": 0.9800969945592658, "grad_norm": 1.7122524272524795, "learning_rate": 1.0389304522426256e-09, "loss": 0.1804, "step": 56384 }, { "epoch": 0.9801143770967686, "grad_norm": 0.8937528775058216, "learning_rate": 1.0371175420253454e-09, "loss": 0.1182, "step": 56385 }, { "epoch": 0.9801317596342715, "grad_norm": 1.008111173537029, "learning_rate": 1.035306213291176e-09, "loss": 0.1916, "step": 56386 }, { "epoch": 0.9801491421717743, "grad_norm": 1.5432511077659208, "learning_rate": 1.0334964660458912e-09, "loss": 0.1776, "step": 56387 }, { "epoch": 0.980166524709277, "grad_norm": 1.2852663323088171, "learning_rate": 1.0316883002952082e-09, "loss": 0.1641, "step": 56388 }, { "epoch": 0.9801839072467798, "grad_norm": 1.2078455077756987, "learning_rate": 1.0298817160449002e-09, "loss": 0.2112, "step": 56389 }, { "epoch": 0.9802012897842827, "grad_norm": 0.733130744788823, "learning_rate": 1.0280767133005742e-09, "loss": 0.0959, "step": 56390 }, { "epoch": 0.9802186723217855, "grad_norm": 1.2384625026043397, "learning_rate": 1.0262732920681139e-09, "loss": 0.16, "step": 56391 }, { "epoch": 0.9802360548592883, "grad_norm": 2.560182641751861, "learning_rate": 1.0244714523531261e-09, "loss": 0.2429, "step": 56392 }, { "epoch": 0.9802534373967912, "grad_norm": 0.9136416902160182, "learning_rate": 1.0226711941613842e-09, "loss": 0.2267, "step": 56393 }, { "epoch": 0.980270819934294, "grad_norm": 1.8227509401124735, "learning_rate": 1.0208725174984944e-09, "loss": 0.1759, "step": 56394 }, { "epoch": 0.9802882024717968, "grad_norm": 0.8272694529856671, "learning_rate": 1.0190754223702857e-09, "loss": 0.1775, "step": 56395 }, { "epoch": 0.9803055850092997, "grad_norm": 1.2325253754175791, "learning_rate": 1.0172799087823647e-09, "loss": 0.1542, "step": 56396 }, { "epoch": 0.9803229675468025, "grad_norm": 1.210516165922609, "learning_rate": 1.0154859767405044e-09, "loss": 0.203, "step": 56397 }, { "epoch": 0.9803403500843053, "grad_norm": 1.0630640189022886, "learning_rate": 1.0136936262503116e-09, "loss": 0.1084, "step": 56398 }, { "epoch": 0.9803577326218081, "grad_norm": 1.6127543620715161, "learning_rate": 1.0119028573174482e-09, "loss": 0.2449, "step": 56399 }, { "epoch": 0.980375115159311, "grad_norm": 1.360372509470265, "learning_rate": 1.0101136699476876e-09, "loss": 0.1041, "step": 56400 }, { "epoch": 0.9803924976968138, "grad_norm": 1.7849461493814351, "learning_rate": 1.0083260641466361e-09, "loss": 0.1773, "step": 56401 }, { "epoch": 0.9804098802343166, "grad_norm": 1.656797474251858, "learning_rate": 1.0065400399199563e-09, "loss": 0.1703, "step": 56402 }, { "epoch": 0.9804272627718195, "grad_norm": 1.0609153764231745, "learning_rate": 1.0047555972733656e-09, "loss": 0.1584, "step": 56403 }, { "epoch": 0.9804446453093223, "grad_norm": 2.7373250141028267, "learning_rate": 1.0029727362124152e-09, "loss": 0.2023, "step": 56404 }, { "epoch": 0.9804620278468251, "grad_norm": 1.3464365898763517, "learning_rate": 1.0011914567428781e-09, "loss": 0.1287, "step": 56405 }, { "epoch": 0.980479410384328, "grad_norm": 1.8325433752462748, "learning_rate": 9.994117588703055e-10, "loss": 0.1418, "step": 56406 }, { "epoch": 0.9804967929218308, "grad_norm": 1.0755262614440906, "learning_rate": 9.97633642600415e-10, "loss": 0.1339, "step": 56407 }, { "epoch": 0.9805141754593335, "grad_norm": 2.98126088389544, "learning_rate": 9.958571079388134e-10, "loss": 0.148, "step": 56408 }, { "epoch": 0.9805315579968363, "grad_norm": 1.3860651618340272, "learning_rate": 9.940821548910516e-10, "loss": 0.2745, "step": 56409 }, { "epoch": 0.9805489405343392, "grad_norm": 1.155845850455956, "learning_rate": 9.923087834628475e-10, "loss": 0.1149, "step": 56410 }, { "epoch": 0.980566323071842, "grad_norm": 1.9011888590437807, "learning_rate": 9.90536993659863e-10, "loss": 0.1707, "step": 56411 }, { "epoch": 0.9805837056093448, "grad_norm": 1.223753937635788, "learning_rate": 9.887667854875935e-10, "loss": 0.149, "step": 56412 }, { "epoch": 0.9806010881468477, "grad_norm": 1.2063315738891105, "learning_rate": 9.869981589517019e-10, "loss": 0.1753, "step": 56413 }, { "epoch": 0.9806184706843505, "grad_norm": 1.9990268649573708, "learning_rate": 9.85231114057794e-10, "loss": 0.1076, "step": 56414 }, { "epoch": 0.9806358532218533, "grad_norm": 1.6030246569992241, "learning_rate": 9.834656508115324e-10, "loss": 0.0738, "step": 56415 }, { "epoch": 0.9806532357593561, "grad_norm": 1.6720944069653079, "learning_rate": 9.817017692184126e-10, "loss": 0.1692, "step": 56416 }, { "epoch": 0.980670618296859, "grad_norm": 7.471154567378943, "learning_rate": 9.799394692840412e-10, "loss": 0.2353, "step": 56417 }, { "epoch": 0.9806880008343618, "grad_norm": 2.4232660443065344, "learning_rate": 9.781787510140805e-10, "loss": 0.2258, "step": 56418 }, { "epoch": 0.9807053833718646, "grad_norm": 1.044395816880342, "learning_rate": 9.764196144140257e-10, "loss": 0.1992, "step": 56419 }, { "epoch": 0.9807227659093675, "grad_norm": 1.1938757077903495, "learning_rate": 9.746620594895393e-10, "loss": 0.0898, "step": 56420 }, { "epoch": 0.9807401484468703, "grad_norm": 1.1325340562412822, "learning_rate": 9.729060862460615e-10, "loss": 0.146, "step": 56421 }, { "epoch": 0.9807575309843731, "grad_norm": 1.5957675164910112, "learning_rate": 9.711516946893094e-10, "loss": 0.2019, "step": 56422 }, { "epoch": 0.980774913521876, "grad_norm": 1.8515221187477813, "learning_rate": 9.693988848247236e-10, "loss": 0.1298, "step": 56423 }, { "epoch": 0.9807922960593788, "grad_norm": 0.8594513777861384, "learning_rate": 9.676476566579105e-10, "loss": 0.1556, "step": 56424 }, { "epoch": 0.9808096785968816, "grad_norm": 1.4213067426059105, "learning_rate": 9.65898010194477e-10, "loss": 0.1677, "step": 56425 }, { "epoch": 0.9808270611343844, "grad_norm": 1.1035707805592607, "learning_rate": 9.641499454398627e-10, "loss": 0.1523, "step": 56426 }, { "epoch": 0.9808444436718872, "grad_norm": 1.417895927856572, "learning_rate": 9.624034623996191e-10, "loss": 0.1554, "step": 56427 }, { "epoch": 0.98086182620939, "grad_norm": 3.0522635673251193, "learning_rate": 9.606585610794083e-10, "loss": 0.1948, "step": 56428 }, { "epoch": 0.9808792087468928, "grad_norm": 2.0363512686466843, "learning_rate": 9.589152414846146e-10, "loss": 0.1621, "step": 56429 }, { "epoch": 0.9808965912843957, "grad_norm": 1.0740586009047666, "learning_rate": 9.571735036207896e-10, "loss": 0.1163, "step": 56430 }, { "epoch": 0.9809139738218985, "grad_norm": 1.999905401250414, "learning_rate": 9.554333474935395e-10, "loss": 0.1702, "step": 56431 }, { "epoch": 0.9809313563594013, "grad_norm": 0.7837682534772962, "learning_rate": 9.536947731083044e-10, "loss": 0.1337, "step": 56432 }, { "epoch": 0.9809487388969041, "grad_norm": 1.7716405672337727, "learning_rate": 9.519577804705803e-10, "loss": 0.1472, "step": 56433 }, { "epoch": 0.980966121434407, "grad_norm": 1.4320351385193388, "learning_rate": 9.50222369585918e-10, "loss": 0.1601, "step": 56434 }, { "epoch": 0.9809835039719098, "grad_norm": 1.1474764558466413, "learning_rate": 9.484885404598686e-10, "loss": 0.1847, "step": 56435 }, { "epoch": 0.9810008865094126, "grad_norm": 1.2168575601608418, "learning_rate": 9.467562930978168e-10, "loss": 0.118, "step": 56436 }, { "epoch": 0.9810182690469155, "grad_norm": 1.2011368009418868, "learning_rate": 9.450256275052582e-10, "loss": 0.1473, "step": 56437 }, { "epoch": 0.9810356515844183, "grad_norm": 2.6947850550355645, "learning_rate": 9.432965436877994e-10, "loss": 0.177, "step": 56438 }, { "epoch": 0.9810530341219211, "grad_norm": 1.5356669490617942, "learning_rate": 9.415690416508248e-10, "loss": 0.2014, "step": 56439 }, { "epoch": 0.981070416659424, "grad_norm": 1.2086155686404934, "learning_rate": 9.398431213997749e-10, "loss": 0.155, "step": 56440 }, { "epoch": 0.9810877991969268, "grad_norm": 1.8059833495373774, "learning_rate": 9.381187829402005e-10, "loss": 0.2013, "step": 56441 }, { "epoch": 0.9811051817344296, "grad_norm": 1.96555756446767, "learning_rate": 9.363960262775418e-10, "loss": 0.1484, "step": 56442 }, { "epoch": 0.9811225642719325, "grad_norm": 1.6950931734223964, "learning_rate": 9.34674851417294e-10, "loss": 0.2058, "step": 56443 }, { "epoch": 0.9811399468094353, "grad_norm": 1.0385514430334586, "learning_rate": 9.329552583648426e-10, "loss": 0.1406, "step": 56444 }, { "epoch": 0.9811573293469381, "grad_norm": 1.1889685510040242, "learning_rate": 9.312372471256824e-10, "loss": 0.2164, "step": 56445 }, { "epoch": 0.9811747118844409, "grad_norm": 1.200203819167423, "learning_rate": 9.295208177052538e-10, "loss": 0.2656, "step": 56446 }, { "epoch": 0.9811920944219437, "grad_norm": 2.2012883448436646, "learning_rate": 9.278059701089413e-10, "loss": 0.177, "step": 56447 }, { "epoch": 0.9812094769594465, "grad_norm": 0.8963238714847841, "learning_rate": 9.260927043422961e-10, "loss": 0.1698, "step": 56448 }, { "epoch": 0.9812268594969493, "grad_norm": 0.7227672954855873, "learning_rate": 9.243810204105917e-10, "loss": 0.1505, "step": 56449 }, { "epoch": 0.9812442420344522, "grad_norm": 1.0635497361842605, "learning_rate": 9.226709183194348e-10, "loss": 0.2055, "step": 56450 }, { "epoch": 0.981261624571955, "grad_norm": 1.388693503885851, "learning_rate": 9.209623980740988e-10, "loss": 0.2087, "step": 56451 }, { "epoch": 0.9812790071094578, "grad_norm": 1.4220896576554416, "learning_rate": 9.192554596800794e-10, "loss": 0.2308, "step": 56452 }, { "epoch": 0.9812963896469606, "grad_norm": 1.0018784732254602, "learning_rate": 9.175501031427613e-10, "loss": 0.167, "step": 56453 }, { "epoch": 0.9813137721844635, "grad_norm": 1.1092153973135062, "learning_rate": 9.158463284675289e-10, "loss": 0.1522, "step": 56454 }, { "epoch": 0.9813311547219663, "grad_norm": 0.773993007094213, "learning_rate": 9.141441356598223e-10, "loss": 0.1013, "step": 56455 }, { "epoch": 0.9813485372594691, "grad_norm": 1.042235222534903, "learning_rate": 9.124435247250261e-10, "loss": 0.1355, "step": 56456 }, { "epoch": 0.981365919796972, "grad_norm": 1.1545458572739413, "learning_rate": 9.107444956684695e-10, "loss": 0.1531, "step": 56457 }, { "epoch": 0.9813833023344748, "grad_norm": 1.6004840891675896, "learning_rate": 9.09047048495648e-10, "loss": 0.2864, "step": 56458 }, { "epoch": 0.9814006848719776, "grad_norm": 2.428426292863706, "learning_rate": 9.073511832118908e-10, "loss": 0.2295, "step": 56459 }, { "epoch": 0.9814180674094805, "grad_norm": 1.127992445471016, "learning_rate": 9.056568998224712e-10, "loss": 0.2131, "step": 56460 }, { "epoch": 0.9814354499469833, "grad_norm": 1.181208685988131, "learning_rate": 9.039641983329405e-10, "loss": 0.1152, "step": 56461 }, { "epoch": 0.9814528324844861, "grad_norm": 1.3081340696850807, "learning_rate": 9.022730787485722e-10, "loss": 0.1188, "step": 56462 }, { "epoch": 0.981470215021989, "grad_norm": 2.0555036598358942, "learning_rate": 9.005835410746953e-10, "loss": 0.1192, "step": 56463 }, { "epoch": 0.9814875975594918, "grad_norm": 2.528412269107641, "learning_rate": 8.988955853167501e-10, "loss": 0.1984, "step": 56464 }, { "epoch": 0.9815049800969946, "grad_norm": 1.6051927280168272, "learning_rate": 8.972092114800101e-10, "loss": 0.1305, "step": 56465 }, { "epoch": 0.9815223626344974, "grad_norm": 1.1469723367466207, "learning_rate": 8.955244195698597e-10, "loss": 0.1242, "step": 56466 }, { "epoch": 0.9815397451720002, "grad_norm": 1.2915419235690029, "learning_rate": 8.938412095915726e-10, "loss": 0.2145, "step": 56467 }, { "epoch": 0.981557127709503, "grad_norm": 1.410928063178351, "learning_rate": 8.921595815505889e-10, "loss": 0.1806, "step": 56468 }, { "epoch": 0.9815745102470058, "grad_norm": 9.964318997202023, "learning_rate": 8.904795354521821e-10, "loss": 0.4196, "step": 56469 }, { "epoch": 0.9815918927845086, "grad_norm": 1.4979952837244104, "learning_rate": 8.888010713016814e-10, "loss": 0.1387, "step": 56470 }, { "epoch": 0.9816092753220115, "grad_norm": 1.0853938977697617, "learning_rate": 8.871241891044157e-10, "loss": 0.1022, "step": 56471 }, { "epoch": 0.9816266578595143, "grad_norm": 1.0593599254196675, "learning_rate": 8.854488888657141e-10, "loss": 0.1286, "step": 56472 }, { "epoch": 0.9816440403970171, "grad_norm": 4.484765676925385, "learning_rate": 8.837751705907947e-10, "loss": 0.1244, "step": 56473 }, { "epoch": 0.98166142293452, "grad_norm": 1.1235564836461933, "learning_rate": 8.821030342850977e-10, "loss": 0.1222, "step": 56474 }, { "epoch": 0.9816788054720228, "grad_norm": 1.7106557483771183, "learning_rate": 8.80432479953841e-10, "loss": 0.2093, "step": 56475 }, { "epoch": 0.9816961880095256, "grad_norm": 2.2581194172510735, "learning_rate": 8.787635076022981e-10, "loss": 0.1231, "step": 56476 }, { "epoch": 0.9817135705470285, "grad_norm": 1.453889106328777, "learning_rate": 8.770961172358538e-10, "loss": 0.1154, "step": 56477 }, { "epoch": 0.9817309530845313, "grad_norm": 2.502269152255019, "learning_rate": 8.754303088597259e-10, "loss": 0.1923, "step": 56478 }, { "epoch": 0.9817483356220341, "grad_norm": 0.9396183737505044, "learning_rate": 8.737660824791882e-10, "loss": 0.1134, "step": 56479 }, { "epoch": 0.981765718159537, "grad_norm": 1.038443318414238, "learning_rate": 8.72103438099514e-10, "loss": 0.1424, "step": 56480 }, { "epoch": 0.9817831006970398, "grad_norm": 1.1522269269119678, "learning_rate": 8.704423757260326e-10, "loss": 0.1353, "step": 56481 }, { "epoch": 0.9818004832345426, "grad_norm": 0.856849548149425, "learning_rate": 8.687828953639065e-10, "loss": 0.1313, "step": 56482 }, { "epoch": 0.9818178657720454, "grad_norm": 1.1912150766385414, "learning_rate": 8.671249970185202e-10, "loss": 0.1089, "step": 56483 }, { "epoch": 0.9818352483095483, "grad_norm": 1.5764533013172977, "learning_rate": 8.654686806950362e-10, "loss": 0.1155, "step": 56484 }, { "epoch": 0.9818526308470511, "grad_norm": 0.8217948873784872, "learning_rate": 8.638139463987282e-10, "loss": 0.1214, "step": 56485 }, { "epoch": 0.9818700133845539, "grad_norm": 1.1189230203329226, "learning_rate": 8.621607941348697e-10, "loss": 0.2794, "step": 56486 }, { "epoch": 0.9818873959220566, "grad_norm": 1.3906260252776008, "learning_rate": 8.605092239086786e-10, "loss": 0.1762, "step": 56487 }, { "epoch": 0.9819047784595595, "grad_norm": 1.648303768919817, "learning_rate": 8.588592357253177e-10, "loss": 0.201, "step": 56488 }, { "epoch": 0.9819221609970623, "grad_norm": 1.0234514229266918, "learning_rate": 8.572108295901715e-10, "loss": 0.1175, "step": 56489 }, { "epoch": 0.9819395435345651, "grad_norm": 0.8527140457560648, "learning_rate": 8.555640055083469e-10, "loss": 0.1493, "step": 56490 }, { "epoch": 0.981956926072068, "grad_norm": 1.2560566088338232, "learning_rate": 8.539187634851175e-10, "loss": 0.2249, "step": 56491 }, { "epoch": 0.9819743086095708, "grad_norm": 1.378223519518157, "learning_rate": 8.522751035256459e-10, "loss": 0.1791, "step": 56492 }, { "epoch": 0.9819916911470736, "grad_norm": 1.1262421242003413, "learning_rate": 8.506330256351501e-10, "loss": 0.1792, "step": 56493 }, { "epoch": 0.9820090736845765, "grad_norm": 1.8510956389903312, "learning_rate": 8.489925298189038e-10, "loss": 0.1385, "step": 56494 }, { "epoch": 0.9820264562220793, "grad_norm": 1.9644149015028094, "learning_rate": 8.473536160820693e-10, "loss": 0.2036, "step": 56495 }, { "epoch": 0.9820438387595821, "grad_norm": 3.323160383493802, "learning_rate": 8.457162844298094e-10, "loss": 0.2056, "step": 56496 }, { "epoch": 0.982061221297085, "grad_norm": 1.7489834437193605, "learning_rate": 8.440805348673419e-10, "loss": 0.28, "step": 56497 }, { "epoch": 0.9820786038345878, "grad_norm": 2.066903588484693, "learning_rate": 8.424463673998295e-10, "loss": 0.173, "step": 56498 }, { "epoch": 0.9820959863720906, "grad_norm": 1.0072790141435053, "learning_rate": 8.408137820324346e-10, "loss": 0.1583, "step": 56499 }, { "epoch": 0.9821133689095934, "grad_norm": 1.107121040691702, "learning_rate": 8.391827787704308e-10, "loss": 0.2057, "step": 56500 }, { "epoch": 0.9821307514470963, "grad_norm": 1.2504779575333442, "learning_rate": 8.375533576189253e-10, "loss": 0.165, "step": 56501 }, { "epoch": 0.9821481339845991, "grad_norm": 1.449416713026016, "learning_rate": 8.35925518583025e-10, "loss": 0.192, "step": 56502 }, { "epoch": 0.9821655165221019, "grad_norm": 1.4498769061345234, "learning_rate": 8.342992616680033e-10, "loss": 0.1715, "step": 56503 }, { "epoch": 0.9821828990596048, "grad_norm": 1.9202018286899614, "learning_rate": 8.32674586878912e-10, "loss": 0.1676, "step": 56504 }, { "epoch": 0.9822002815971076, "grad_norm": 4.340255275291832, "learning_rate": 8.31051494220969e-10, "loss": 0.1625, "step": 56505 }, { "epoch": 0.9822176641346104, "grad_norm": 1.0837981299652095, "learning_rate": 8.294299836992257e-10, "loss": 0.1632, "step": 56506 }, { "epoch": 0.9822350466721131, "grad_norm": 1.1925786600810617, "learning_rate": 8.27810055318956e-10, "loss": 0.1359, "step": 56507 }, { "epoch": 0.982252429209616, "grad_norm": 1.3445658306692594, "learning_rate": 8.261917090852111e-10, "loss": 0.0953, "step": 56508 }, { "epoch": 0.9822698117471188, "grad_norm": 1.8839153374053612, "learning_rate": 8.245749450031536e-10, "loss": 0.1552, "step": 56509 }, { "epoch": 0.9822871942846216, "grad_norm": 0.7408462181657542, "learning_rate": 8.229597630778907e-10, "loss": 0.1355, "step": 56510 }, { "epoch": 0.9823045768221245, "grad_norm": 1.3617084878657506, "learning_rate": 8.213461633145291e-10, "loss": 0.1859, "step": 56511 }, { "epoch": 0.9823219593596273, "grad_norm": 1.6054883569910363, "learning_rate": 8.197341457181761e-10, "loss": 0.1471, "step": 56512 }, { "epoch": 0.9823393418971301, "grad_norm": 1.844473415624192, "learning_rate": 8.181237102939942e-10, "loss": 0.1518, "step": 56513 }, { "epoch": 0.982356724434633, "grad_norm": 1.1964546209239892, "learning_rate": 8.165148570470348e-10, "loss": 0.1178, "step": 56514 }, { "epoch": 0.9823741069721358, "grad_norm": 0.8572545381299351, "learning_rate": 8.14907585982405e-10, "loss": 0.1004, "step": 56515 }, { "epoch": 0.9823914895096386, "grad_norm": 1.8984128561656217, "learning_rate": 8.133018971052674e-10, "loss": 0.1363, "step": 56516 }, { "epoch": 0.9824088720471414, "grad_norm": 1.0871414220333055, "learning_rate": 8.116977904206179e-10, "loss": 0.2075, "step": 56517 }, { "epoch": 0.9824262545846443, "grad_norm": 1.652921410082233, "learning_rate": 8.100952659335636e-10, "loss": 0.1828, "step": 56518 }, { "epoch": 0.9824436371221471, "grad_norm": 0.9436169988187556, "learning_rate": 8.084943236492115e-10, "loss": 0.1348, "step": 56519 }, { "epoch": 0.9824610196596499, "grad_norm": 1.3718321621680607, "learning_rate": 8.06894963572613e-10, "loss": 0.1715, "step": 56520 }, { "epoch": 0.9824784021971528, "grad_norm": 1.6638849797781536, "learning_rate": 8.052971857088752e-10, "loss": 0.1494, "step": 56521 }, { "epoch": 0.9824957847346556, "grad_norm": 3.7268480651239293, "learning_rate": 8.037009900629943e-10, "loss": 0.1983, "step": 56522 }, { "epoch": 0.9825131672721584, "grad_norm": 2.382464644735044, "learning_rate": 8.021063766400771e-10, "loss": 0.0927, "step": 56523 }, { "epoch": 0.9825305498096613, "grad_norm": 3.675141754987321, "learning_rate": 8.005133454451751e-10, "loss": 0.1696, "step": 56524 }, { "epoch": 0.9825479323471641, "grad_norm": 1.2409972270967555, "learning_rate": 7.989218964833954e-10, "loss": 0.3214, "step": 56525 }, { "epoch": 0.9825653148846669, "grad_norm": 1.0632243022371224, "learning_rate": 7.973320297596231e-10, "loss": 0.1482, "step": 56526 }, { "epoch": 0.9825826974221696, "grad_norm": 1.7105873741730062, "learning_rate": 7.957437452790761e-10, "loss": 0.183, "step": 56527 }, { "epoch": 0.9826000799596725, "grad_norm": 1.2683645701259885, "learning_rate": 7.941570430466393e-10, "loss": 0.2239, "step": 56528 }, { "epoch": 0.9826174624971753, "grad_norm": 2.7268478782600614, "learning_rate": 7.9257192306742e-10, "loss": 0.1475, "step": 56529 }, { "epoch": 0.9826348450346781, "grad_norm": 1.5438952862263553, "learning_rate": 7.909883853464694e-10, "loss": 0.1547, "step": 56530 }, { "epoch": 0.982652227572181, "grad_norm": 1.299176083664741, "learning_rate": 7.894064298887282e-10, "loss": 0.2064, "step": 56531 }, { "epoch": 0.9826696101096838, "grad_norm": 1.209660285522016, "learning_rate": 7.878260566992478e-10, "loss": 0.1593, "step": 56532 }, { "epoch": 0.9826869926471866, "grad_norm": 1.2025122649816948, "learning_rate": 7.862472657830799e-10, "loss": 0.1446, "step": 56533 }, { "epoch": 0.9827043751846894, "grad_norm": 1.1613357291914077, "learning_rate": 7.846700571451648e-10, "loss": 0.1274, "step": 56534 }, { "epoch": 0.9827217577221923, "grad_norm": 1.7895140378245338, "learning_rate": 7.830944307904985e-10, "loss": 0.2165, "step": 56535 }, { "epoch": 0.9827391402596951, "grad_norm": 1.796671904541527, "learning_rate": 7.815203867241326e-10, "loss": 0.1396, "step": 56536 }, { "epoch": 0.9827565227971979, "grad_norm": 1.2054674187156695, "learning_rate": 7.799479249510632e-10, "loss": 0.1587, "step": 56537 }, { "epoch": 0.9827739053347008, "grad_norm": 1.0206739333010544, "learning_rate": 7.783770454761751e-10, "loss": 0.1231, "step": 56538 }, { "epoch": 0.9827912878722036, "grad_norm": 1.3763326058426781, "learning_rate": 7.768077483045199e-10, "loss": 0.129, "step": 56539 }, { "epoch": 0.9828086704097064, "grad_norm": 1.2085496114494483, "learning_rate": 7.752400334410381e-10, "loss": 0.1207, "step": 56540 }, { "epoch": 0.9828260529472093, "grad_norm": 2.955371501411111, "learning_rate": 7.736739008907811e-10, "loss": 0.1662, "step": 56541 }, { "epoch": 0.9828434354847121, "grad_norm": 1.2716118679892903, "learning_rate": 7.721093506585785e-10, "loss": 0.1263, "step": 56542 }, { "epoch": 0.9828608180222149, "grad_norm": 2.0905476258918076, "learning_rate": 7.705463827495373e-10, "loss": 0.248, "step": 56543 }, { "epoch": 0.9828782005597178, "grad_norm": 2.0460184719690413, "learning_rate": 7.689849971684869e-10, "loss": 0.1398, "step": 56544 }, { "epoch": 0.9828955830972206, "grad_norm": 5.294264278413089, "learning_rate": 7.674251939204235e-10, "loss": 0.2242, "step": 56545 }, { "epoch": 0.9829129656347234, "grad_norm": 2.1271511198567, "learning_rate": 7.658669730103429e-10, "loss": 0.145, "step": 56546 }, { "epoch": 0.9829303481722261, "grad_norm": 2.109548711336709, "learning_rate": 7.643103344430746e-10, "loss": 0.1711, "step": 56547 }, { "epoch": 0.982947730709729, "grad_norm": 4.373260941228349, "learning_rate": 7.627552782236701e-10, "loss": 0.1912, "step": 56548 }, { "epoch": 0.9829651132472318, "grad_norm": 2.1718553420899642, "learning_rate": 7.61201804356959e-10, "loss": 0.1139, "step": 56549 }, { "epoch": 0.9829824957847346, "grad_norm": 1.8937527322901548, "learning_rate": 7.596499128479372e-10, "loss": 0.2871, "step": 56550 }, { "epoch": 0.9829998783222375, "grad_norm": 1.2455890071415434, "learning_rate": 7.580996037014342e-10, "loss": 0.1318, "step": 56551 }, { "epoch": 0.9830172608597403, "grad_norm": 2.0344524477829173, "learning_rate": 7.565508769225015e-10, "loss": 0.197, "step": 56552 }, { "epoch": 0.9830346433972431, "grad_norm": 1.2867547908497328, "learning_rate": 7.550037325159131e-10, "loss": 0.1025, "step": 56553 }, { "epoch": 0.9830520259347459, "grad_norm": 1.3233305634836157, "learning_rate": 7.534581704866649e-10, "loss": 0.1643, "step": 56554 }, { "epoch": 0.9830694084722488, "grad_norm": 1.031797332000063, "learning_rate": 7.519141908395866e-10, "loss": 0.1475, "step": 56555 }, { "epoch": 0.9830867910097516, "grad_norm": 2.7417255419032114, "learning_rate": 7.503717935796183e-10, "loss": 0.2646, "step": 56556 }, { "epoch": 0.9831041735472544, "grad_norm": 1.3310990513603067, "learning_rate": 7.488309787116453e-10, "loss": 0.1467, "step": 56557 }, { "epoch": 0.9831215560847573, "grad_norm": 0.8799796818220558, "learning_rate": 7.47291746240497e-10, "loss": 0.1795, "step": 56558 }, { "epoch": 0.9831389386222601, "grad_norm": 0.8912625050417122, "learning_rate": 7.457540961711694e-10, "loss": 0.1596, "step": 56559 }, { "epoch": 0.9831563211597629, "grad_norm": 0.6435544608316341, "learning_rate": 7.44218028508381e-10, "loss": 0.0921, "step": 56560 }, { "epoch": 0.9831737036972658, "grad_norm": 1.6095286814653391, "learning_rate": 7.426835432571277e-10, "loss": 0.2319, "step": 56561 }, { "epoch": 0.9831910862347686, "grad_norm": 0.981104929132097, "learning_rate": 7.411506404222389e-10, "loss": 0.129, "step": 56562 }, { "epoch": 0.9832084687722714, "grad_norm": 2.4207192068127634, "learning_rate": 7.396193200085443e-10, "loss": 0.1132, "step": 56563 }, { "epoch": 0.9832258513097742, "grad_norm": 1.5832175265776582, "learning_rate": 7.380895820209288e-10, "loss": 0.2099, "step": 56564 }, { "epoch": 0.9832432338472771, "grad_norm": 1.1768409224926941, "learning_rate": 7.365614264642217e-10, "loss": 0.1604, "step": 56565 }, { "epoch": 0.9832606163847798, "grad_norm": 1.7152326598483643, "learning_rate": 7.350348533432527e-10, "loss": 0.2345, "step": 56566 }, { "epoch": 0.9832779989222826, "grad_norm": 1.4872326354772083, "learning_rate": 7.335098626629066e-10, "loss": 0.1262, "step": 56567 }, { "epoch": 0.9832953814597855, "grad_norm": 2.820201142770536, "learning_rate": 7.319864544279575e-10, "loss": 0.1707, "step": 56568 }, { "epoch": 0.9833127639972883, "grad_norm": 1.6103489851686088, "learning_rate": 7.304646286432902e-10, "loss": 0.1883, "step": 56569 }, { "epoch": 0.9833301465347911, "grad_norm": 1.7045302158770719, "learning_rate": 7.289443853136789e-10, "loss": 0.1413, "step": 56570 }, { "epoch": 0.983347529072294, "grad_norm": 1.9358001194268848, "learning_rate": 7.274257244440085e-10, "loss": 0.1621, "step": 56571 }, { "epoch": 0.9833649116097968, "grad_norm": 1.4637835670533585, "learning_rate": 7.259086460389974e-10, "loss": 0.1776, "step": 56572 }, { "epoch": 0.9833822941472996, "grad_norm": 2.0309099145441323, "learning_rate": 7.243931501035305e-10, "loss": 0.1792, "step": 56573 }, { "epoch": 0.9833996766848024, "grad_norm": 2.0443062396462386, "learning_rate": 7.228792366424374e-10, "loss": 0.1055, "step": 56574 }, { "epoch": 0.9834170592223053, "grad_norm": 1.466078421052481, "learning_rate": 7.213669056604365e-10, "loss": 0.1207, "step": 56575 }, { "epoch": 0.9834344417598081, "grad_norm": 1.0660779684990638, "learning_rate": 7.198561571623574e-10, "loss": 0.1764, "step": 56576 }, { "epoch": 0.9834518242973109, "grad_norm": 1.383795176608498, "learning_rate": 7.183469911529183e-10, "loss": 0.1631, "step": 56577 }, { "epoch": 0.9834692068348138, "grad_norm": 1.3709141139270702, "learning_rate": 7.168394076370599e-10, "loss": 0.1755, "step": 56578 }, { "epoch": 0.9834865893723166, "grad_norm": 2.0876717511249616, "learning_rate": 7.153334066194449e-10, "loss": 0.4481, "step": 56579 }, { "epoch": 0.9835039719098194, "grad_norm": 1.0266405494749844, "learning_rate": 7.138289881048476e-10, "loss": 0.0957, "step": 56580 }, { "epoch": 0.9835213544473222, "grad_norm": 1.25194103100981, "learning_rate": 7.123261520980417e-10, "loss": 0.172, "step": 56581 }, { "epoch": 0.9835387369848251, "grad_norm": 0.7894766745593094, "learning_rate": 7.108248986038568e-10, "loss": 0.1701, "step": 56582 }, { "epoch": 0.9835561195223279, "grad_norm": 0.9308651422508258, "learning_rate": 7.093252276269556e-10, "loss": 0.1412, "step": 56583 }, { "epoch": 0.9835735020598307, "grad_norm": 1.5073467139259602, "learning_rate": 7.07827139172168e-10, "loss": 0.1414, "step": 56584 }, { "epoch": 0.9835908845973336, "grad_norm": 1.5168611893569424, "learning_rate": 7.063306332442121e-10, "loss": 0.1946, "step": 56585 }, { "epoch": 0.9836082671348363, "grad_norm": 1.264811916436741, "learning_rate": 7.048357098478064e-10, "loss": 0.1402, "step": 56586 }, { "epoch": 0.9836256496723391, "grad_norm": 2.348718251015903, "learning_rate": 7.033423689877804e-10, "loss": 0.1828, "step": 56587 }, { "epoch": 0.983643032209842, "grad_norm": 1.670128005105291, "learning_rate": 7.018506106687417e-10, "loss": 0.1452, "step": 56588 }, { "epoch": 0.9836604147473448, "grad_norm": 0.6569965718456748, "learning_rate": 7.003604348954639e-10, "loss": 0.1203, "step": 56589 }, { "epoch": 0.9836777972848476, "grad_norm": 0.9694118374408256, "learning_rate": 6.988718416727212e-10, "loss": 0.1147, "step": 56590 }, { "epoch": 0.9836951798223504, "grad_norm": 1.6475581447511372, "learning_rate": 6.973848310051767e-10, "loss": 0.1822, "step": 56591 }, { "epoch": 0.9837125623598533, "grad_norm": 1.4107334986193119, "learning_rate": 6.958994028975484e-10, "loss": 0.16, "step": 56592 }, { "epoch": 0.9837299448973561, "grad_norm": 1.610043921098236, "learning_rate": 6.944155573546107e-10, "loss": 0.1452, "step": 56593 }, { "epoch": 0.9837473274348589, "grad_norm": 1.307735421510832, "learning_rate": 6.929332943809707e-10, "loss": 0.1501, "step": 56594 }, { "epoch": 0.9837647099723618, "grad_norm": 1.7076850839826758, "learning_rate": 6.914526139814025e-10, "loss": 0.143, "step": 56595 }, { "epoch": 0.9837820925098646, "grad_norm": 1.3733173241779681, "learning_rate": 6.899735161605136e-10, "loss": 0.1012, "step": 56596 }, { "epoch": 0.9837994750473674, "grad_norm": 1.5036561253125427, "learning_rate": 6.884960009230779e-10, "loss": 0.1446, "step": 56597 }, { "epoch": 0.9838168575848703, "grad_norm": 1.8910806534191362, "learning_rate": 6.870200682737581e-10, "loss": 0.2143, "step": 56598 }, { "epoch": 0.9838342401223731, "grad_norm": 1.3668828557843302, "learning_rate": 6.85545718217162e-10, "loss": 0.2561, "step": 56599 }, { "epoch": 0.9838516226598759, "grad_norm": 2.2299407629254087, "learning_rate": 6.840729507580634e-10, "loss": 0.1917, "step": 56600 }, { "epoch": 0.9838690051973787, "grad_norm": 1.24611326238254, "learning_rate": 6.826017659010697e-10, "loss": 0.1295, "step": 56601 }, { "epoch": 0.9838863877348816, "grad_norm": 1.3841777184119435, "learning_rate": 6.811321636508993e-10, "loss": 0.1317, "step": 56602 }, { "epoch": 0.9839037702723844, "grad_norm": 1.433458082195607, "learning_rate": 6.796641440121598e-10, "loss": 0.1317, "step": 56603 }, { "epoch": 0.9839211528098872, "grad_norm": 0.9795042039810303, "learning_rate": 6.781977069894584e-10, "loss": 0.1622, "step": 56604 }, { "epoch": 0.9839385353473901, "grad_norm": 1.1103962323140897, "learning_rate": 6.767328525875693e-10, "loss": 0.1341, "step": 56605 }, { "epoch": 0.9839559178848928, "grad_norm": 1.1059960966656575, "learning_rate": 6.752695808110443e-10, "loss": 0.1313, "step": 56606 }, { "epoch": 0.9839733004223956, "grad_norm": 1.078359793376408, "learning_rate": 6.738078916645462e-10, "loss": 0.1581, "step": 56607 }, { "epoch": 0.9839906829598984, "grad_norm": 0.9249595881605719, "learning_rate": 6.723477851526827e-10, "loss": 0.1997, "step": 56608 }, { "epoch": 0.9840080654974013, "grad_norm": 1.12176745457465, "learning_rate": 6.708892612801164e-10, "loss": 0.0979, "step": 56609 }, { "epoch": 0.9840254480349041, "grad_norm": 2.039999863244762, "learning_rate": 6.694323200514551e-10, "loss": 0.2625, "step": 56610 }, { "epoch": 0.9840428305724069, "grad_norm": 1.6908269717836002, "learning_rate": 6.679769614713615e-10, "loss": 0.177, "step": 56611 }, { "epoch": 0.9840602131099098, "grad_norm": 1.2797177809280547, "learning_rate": 6.665231855443321e-10, "loss": 0.1094, "step": 56612 }, { "epoch": 0.9840775956474126, "grad_norm": 1.0718362543386544, "learning_rate": 6.650709922750852e-10, "loss": 0.1684, "step": 56613 }, { "epoch": 0.9840949781849154, "grad_norm": 1.4072360612077395, "learning_rate": 6.636203816682285e-10, "loss": 0.1473, "step": 56614 }, { "epoch": 0.9841123607224183, "grad_norm": 1.189929702969404, "learning_rate": 6.62171353728258e-10, "loss": 0.215, "step": 56615 }, { "epoch": 0.9841297432599211, "grad_norm": 0.9772087196239201, "learning_rate": 6.607239084598926e-10, "loss": 0.0884, "step": 56616 }, { "epoch": 0.9841471257974239, "grad_norm": 1.0196564239840595, "learning_rate": 6.592780458675729e-10, "loss": 0.1303, "step": 56617 }, { "epoch": 0.9841645083349267, "grad_norm": 3.0953447476005462, "learning_rate": 6.578337659560173e-10, "loss": 0.1957, "step": 56618 }, { "epoch": 0.9841818908724296, "grad_norm": 1.0028981616678045, "learning_rate": 6.563910687297225e-10, "loss": 0.1767, "step": 56619 }, { "epoch": 0.9841992734099324, "grad_norm": 1.2964381092163624, "learning_rate": 6.549499541932957e-10, "loss": 0.1213, "step": 56620 }, { "epoch": 0.9842166559474352, "grad_norm": 1.0344556645443375, "learning_rate": 6.535104223513444e-10, "loss": 0.1456, "step": 56621 }, { "epoch": 0.9842340384849381, "grad_norm": 1.9496585447477883, "learning_rate": 6.520724732083094e-10, "loss": 0.1932, "step": 56622 }, { "epoch": 0.9842514210224409, "grad_norm": 1.6796068428907527, "learning_rate": 6.506361067688537e-10, "loss": 0.0944, "step": 56623 }, { "epoch": 0.9842688035599437, "grad_norm": 2.5596440968610836, "learning_rate": 6.492013230375293e-10, "loss": 0.2533, "step": 56624 }, { "epoch": 0.9842861860974466, "grad_norm": 1.3045669447288102, "learning_rate": 6.477681220188324e-10, "loss": 0.211, "step": 56625 }, { "epoch": 0.9843035686349493, "grad_norm": 1.3371896972242014, "learning_rate": 6.463365037173152e-10, "loss": 0.1361, "step": 56626 }, { "epoch": 0.9843209511724521, "grad_norm": 1.529308215245988, "learning_rate": 6.449064681375293e-10, "loss": 0.1095, "step": 56627 }, { "epoch": 0.9843383337099549, "grad_norm": 0.6585681713208676, "learning_rate": 6.434780152840269e-10, "loss": 0.1335, "step": 56628 }, { "epoch": 0.9843557162474578, "grad_norm": 1.6040531328094227, "learning_rate": 6.420511451612487e-10, "loss": 0.1155, "step": 56629 }, { "epoch": 0.9843730987849606, "grad_norm": 1.7075093297825112, "learning_rate": 6.406258577738577e-10, "loss": 0.1864, "step": 56630 }, { "epoch": 0.9843904813224634, "grad_norm": 1.873459053612197, "learning_rate": 6.392021531262393e-10, "loss": 0.2811, "step": 56631 }, { "epoch": 0.9844078638599663, "grad_norm": 2.2548712635225203, "learning_rate": 6.377800312230009e-10, "loss": 0.2147, "step": 56632 }, { "epoch": 0.9844252463974691, "grad_norm": 1.3184275373263123, "learning_rate": 6.363594920685833e-10, "loss": 0.1304, "step": 56633 }, { "epoch": 0.9844426289349719, "grad_norm": 0.5489212592194538, "learning_rate": 6.349405356675386e-10, "loss": 0.1194, "step": 56634 }, { "epoch": 0.9844600114724747, "grad_norm": 0.766424818245715, "learning_rate": 6.335231620243631e-10, "loss": 0.1663, "step": 56635 }, { "epoch": 0.9844773940099776, "grad_norm": 1.240704476232697, "learning_rate": 6.321073711434977e-10, "loss": 0.24, "step": 56636 }, { "epoch": 0.9844947765474804, "grad_norm": 1.2749350499410126, "learning_rate": 6.306931630294942e-10, "loss": 0.1797, "step": 56637 }, { "epoch": 0.9845121590849832, "grad_norm": 1.4744260217533494, "learning_rate": 6.292805376867383e-10, "loss": 0.1326, "step": 56638 }, { "epoch": 0.9845295416224861, "grad_norm": 0.9958674737048355, "learning_rate": 6.278694951198371e-10, "loss": 0.1817, "step": 56639 }, { "epoch": 0.9845469241599889, "grad_norm": 1.156712898982341, "learning_rate": 6.264600353331761e-10, "loss": 0.1736, "step": 56640 }, { "epoch": 0.9845643066974917, "grad_norm": 1.1772587946276267, "learning_rate": 6.250521583311963e-10, "loss": 0.1643, "step": 56641 }, { "epoch": 0.9845816892349946, "grad_norm": 1.7975394208649838, "learning_rate": 6.236458641184494e-10, "loss": 0.1334, "step": 56642 }, { "epoch": 0.9845990717724974, "grad_norm": 2.0693676385024165, "learning_rate": 6.222411526993765e-10, "loss": 0.1066, "step": 56643 }, { "epoch": 0.9846164543100002, "grad_norm": 1.5927740313815597, "learning_rate": 6.208380240783628e-10, "loss": 0.1459, "step": 56644 }, { "epoch": 0.984633836847503, "grad_norm": 2.135624364329366, "learning_rate": 6.194364782599048e-10, "loss": 0.1081, "step": 56645 }, { "epoch": 0.9846512193850058, "grad_norm": 0.7761597491066295, "learning_rate": 6.180365152484434e-10, "loss": 0.1636, "step": 56646 }, { "epoch": 0.9846686019225086, "grad_norm": 1.0645077395072975, "learning_rate": 6.166381350484196e-10, "loss": 0.1369, "step": 56647 }, { "epoch": 0.9846859844600114, "grad_norm": 1.536292036278998, "learning_rate": 6.152413376642185e-10, "loss": 0.1281, "step": 56648 }, { "epoch": 0.9847033669975143, "grad_norm": 1.4923902542437846, "learning_rate": 6.138461231003367e-10, "loss": 0.1481, "step": 56649 }, { "epoch": 0.9847207495350171, "grad_norm": 2.3365369017237367, "learning_rate": 6.124524913611595e-10, "loss": 0.2445, "step": 56650 }, { "epoch": 0.9847381320725199, "grad_norm": 2.3439420340822688, "learning_rate": 6.110604424510724e-10, "loss": 0.1613, "step": 56651 }, { "epoch": 0.9847555146100228, "grad_norm": 3.1335447419916, "learning_rate": 6.096699763745716e-10, "loss": 0.3044, "step": 56652 }, { "epoch": 0.9847728971475256, "grad_norm": 1.4121248641791164, "learning_rate": 6.082810931359872e-10, "loss": 0.1455, "step": 56653 }, { "epoch": 0.9847902796850284, "grad_norm": 0.9335951913679312, "learning_rate": 6.068937927397599e-10, "loss": 0.1001, "step": 56654 }, { "epoch": 0.9848076622225312, "grad_norm": 1.4462217154651593, "learning_rate": 6.055080751902752e-10, "loss": 0.1894, "step": 56655 }, { "epoch": 0.9848250447600341, "grad_norm": 1.1489477634211047, "learning_rate": 6.041239404919185e-10, "loss": 0.1469, "step": 56656 }, { "epoch": 0.9848424272975369, "grad_norm": 2.3342723727840626, "learning_rate": 6.027413886491306e-10, "loss": 0.0965, "step": 56657 }, { "epoch": 0.9848598098350397, "grad_norm": 1.220354300604615, "learning_rate": 6.013604196661859e-10, "loss": 0.1446, "step": 56658 }, { "epoch": 0.9848771923725426, "grad_norm": 1.6227468014023207, "learning_rate": 5.999810335475253e-10, "loss": 0.1166, "step": 56659 }, { "epoch": 0.9848945749100454, "grad_norm": 1.3537484171771095, "learning_rate": 5.986032302975896e-10, "loss": 0.1378, "step": 56660 }, { "epoch": 0.9849119574475482, "grad_norm": 1.5425385096888775, "learning_rate": 5.972270099205978e-10, "loss": 0.1598, "step": 56661 }, { "epoch": 0.9849293399850511, "grad_norm": 2.092001496527939, "learning_rate": 5.958523724210463e-10, "loss": 0.2086, "step": 56662 }, { "epoch": 0.9849467225225539, "grad_norm": 1.2711333949199828, "learning_rate": 5.944793178032093e-10, "loss": 0.1481, "step": 56663 }, { "epoch": 0.9849641050600567, "grad_norm": 1.139552199516235, "learning_rate": 5.931078460714722e-10, "loss": 0.1303, "step": 56664 }, { "epoch": 0.9849814875975595, "grad_norm": 1.2404090832475672, "learning_rate": 5.91737957230165e-10, "loss": 0.1344, "step": 56665 }, { "epoch": 0.9849988701350623, "grad_norm": 1.0287115228325698, "learning_rate": 5.903696512836176e-10, "loss": 0.2335, "step": 56666 }, { "epoch": 0.9850162526725651, "grad_norm": 0.8622498692132465, "learning_rate": 5.890029282362152e-10, "loss": 0.2062, "step": 56667 }, { "epoch": 0.9850336352100679, "grad_norm": 1.062393584710041, "learning_rate": 5.876377880922879e-10, "loss": 0.0966, "step": 56668 }, { "epoch": 0.9850510177475708, "grad_norm": 2.2374278934343543, "learning_rate": 5.862742308560542e-10, "loss": 0.3024, "step": 56669 }, { "epoch": 0.9850684002850736, "grad_norm": 1.5968347281861026, "learning_rate": 5.849122565320108e-10, "loss": 0.2912, "step": 56670 }, { "epoch": 0.9850857828225764, "grad_norm": 1.4985467447660472, "learning_rate": 5.83551865124321e-10, "loss": 0.2103, "step": 56671 }, { "epoch": 0.9851031653600792, "grad_norm": 0.875680875463775, "learning_rate": 5.821930566374255e-10, "loss": 0.1581, "step": 56672 }, { "epoch": 0.9851205478975821, "grad_norm": 0.7949705262462957, "learning_rate": 5.80835831075488e-10, "loss": 0.1871, "step": 56673 }, { "epoch": 0.9851379304350849, "grad_norm": 0.9202145009461759, "learning_rate": 5.79480188442949e-10, "loss": 0.2245, "step": 56674 }, { "epoch": 0.9851553129725877, "grad_norm": 6.712180387658605, "learning_rate": 5.781261287440275e-10, "loss": 0.1773, "step": 56675 }, { "epoch": 0.9851726955100906, "grad_norm": 1.2613954668224578, "learning_rate": 5.76773651982998e-10, "loss": 0.1473, "step": 56676 }, { "epoch": 0.9851900780475934, "grad_norm": 1.279151552305869, "learning_rate": 5.754227581641901e-10, "loss": 0.1171, "step": 56677 }, { "epoch": 0.9852074605850962, "grad_norm": 1.184966575986964, "learning_rate": 5.740734472918784e-10, "loss": 0.1265, "step": 56678 }, { "epoch": 0.9852248431225991, "grad_norm": 1.4548187209843906, "learning_rate": 5.727257193703927e-10, "loss": 0.166, "step": 56679 }, { "epoch": 0.9852422256601019, "grad_norm": 1.7968127301290595, "learning_rate": 5.713795744038962e-10, "loss": 0.1403, "step": 56680 }, { "epoch": 0.9852596081976047, "grad_norm": 1.2907680595118356, "learning_rate": 5.700350123966635e-10, "loss": 0.1507, "step": 56681 }, { "epoch": 0.9852769907351075, "grad_norm": 1.337105985245929, "learning_rate": 5.686920333530798e-10, "loss": 0.1497, "step": 56682 }, { "epoch": 0.9852943732726104, "grad_norm": 2.0239208724137274, "learning_rate": 5.67350637277253e-10, "loss": 0.1201, "step": 56683 }, { "epoch": 0.9853117558101132, "grad_norm": 0.8951267170344627, "learning_rate": 5.660108241735683e-10, "loss": 0.2112, "step": 56684 }, { "epoch": 0.985329138347616, "grad_norm": 1.4978814580661246, "learning_rate": 5.646725940461894e-10, "loss": 0.1298, "step": 56685 }, { "epoch": 0.9853465208851188, "grad_norm": 1.172726906125821, "learning_rate": 5.633359468993349e-10, "loss": 0.2228, "step": 56686 }, { "epoch": 0.9853639034226216, "grad_norm": 1.9076212727326127, "learning_rate": 5.620008827373346e-10, "loss": 0.1583, "step": 56687 }, { "epoch": 0.9853812859601244, "grad_norm": 1.2621322730888376, "learning_rate": 5.60667401564352e-10, "loss": 0.172, "step": 56688 }, { "epoch": 0.9853986684976273, "grad_norm": 1.7257206469922435, "learning_rate": 5.593355033846059e-10, "loss": 0.1779, "step": 56689 }, { "epoch": 0.9854160510351301, "grad_norm": 1.0679559910825365, "learning_rate": 5.580051882023707e-10, "loss": 0.1219, "step": 56690 }, { "epoch": 0.9854334335726329, "grad_norm": 0.7574415684195264, "learning_rate": 5.566764560218651e-10, "loss": 0.127, "step": 56691 }, { "epoch": 0.9854508161101357, "grad_norm": 1.3338609628198974, "learning_rate": 5.55349306847197e-10, "loss": 0.1606, "step": 56692 }, { "epoch": 0.9854681986476386, "grad_norm": 1.7992921472984977, "learning_rate": 5.540237406826964e-10, "loss": 0.1218, "step": 56693 }, { "epoch": 0.9854855811851414, "grad_norm": 1.697131673429255, "learning_rate": 5.526997575325265e-10, "loss": 0.2031, "step": 56694 }, { "epoch": 0.9855029637226442, "grad_norm": 1.1925436275268329, "learning_rate": 5.513773574007952e-10, "loss": 0.1886, "step": 56695 }, { "epoch": 0.9855203462601471, "grad_norm": 1.8760181544374634, "learning_rate": 5.500565402918323e-10, "loss": 0.1188, "step": 56696 }, { "epoch": 0.9855377287976499, "grad_norm": 1.4729636071150052, "learning_rate": 5.487373062097455e-10, "loss": 0.1579, "step": 56697 }, { "epoch": 0.9855551113351527, "grad_norm": 1.7151501991072153, "learning_rate": 5.47419655158754e-10, "loss": 0.2332, "step": 56698 }, { "epoch": 0.9855724938726556, "grad_norm": 1.9131711054939284, "learning_rate": 5.461035871429654e-10, "loss": 0.1855, "step": 56699 }, { "epoch": 0.9855898764101584, "grad_norm": 1.7458361196313261, "learning_rate": 5.44789102166654e-10, "loss": 0.193, "step": 56700 }, { "epoch": 0.9856072589476612, "grad_norm": 2.115453910694508, "learning_rate": 5.434762002338722e-10, "loss": 0.198, "step": 56701 }, { "epoch": 0.985624641485164, "grad_norm": 1.3484715986100755, "learning_rate": 5.421648813488943e-10, "loss": 0.1393, "step": 56702 }, { "epoch": 0.9856420240226669, "grad_norm": 0.6042983302003819, "learning_rate": 5.408551455157729e-10, "loss": 0.2189, "step": 56703 }, { "epoch": 0.9856594065601697, "grad_norm": 2.6509787996380703, "learning_rate": 5.395469927387263e-10, "loss": 0.2377, "step": 56704 }, { "epoch": 0.9856767890976724, "grad_norm": 1.1756280599692666, "learning_rate": 5.382404230219184e-10, "loss": 0.1211, "step": 56705 }, { "epoch": 0.9856941716351753, "grad_norm": 1.3650674357572623, "learning_rate": 5.369354363694012e-10, "loss": 0.14, "step": 56706 }, { "epoch": 0.9857115541726781, "grad_norm": 1.234469974987897, "learning_rate": 5.356320327853936e-10, "loss": 0.1489, "step": 56707 }, { "epoch": 0.9857289367101809, "grad_norm": 2.357979607803717, "learning_rate": 5.343302122740034e-10, "loss": 0.3412, "step": 56708 }, { "epoch": 0.9857463192476837, "grad_norm": 1.4040413856145644, "learning_rate": 5.330299748393385e-10, "loss": 0.122, "step": 56709 }, { "epoch": 0.9857637017851866, "grad_norm": 1.8018176798073375, "learning_rate": 5.317313204855067e-10, "loss": 0.1812, "step": 56710 }, { "epoch": 0.9857810843226894, "grad_norm": 1.4114712288808318, "learning_rate": 5.304342492166713e-10, "loss": 0.1181, "step": 56711 }, { "epoch": 0.9857984668601922, "grad_norm": 1.4054887646911989, "learning_rate": 5.291387610369402e-10, "loss": 0.1489, "step": 56712 }, { "epoch": 0.9858158493976951, "grad_norm": 0.9817863462106917, "learning_rate": 5.278448559503656e-10, "loss": 0.1253, "step": 56713 }, { "epoch": 0.9858332319351979, "grad_norm": 1.337304958493744, "learning_rate": 5.26552533961111e-10, "loss": 0.259, "step": 56714 }, { "epoch": 0.9858506144727007, "grad_norm": 1.140141050905013, "learning_rate": 5.252617950732286e-10, "loss": 0.1265, "step": 56715 }, { "epoch": 0.9858679970102036, "grad_norm": 1.0182832537378712, "learning_rate": 5.239726392908261e-10, "loss": 0.1938, "step": 56716 }, { "epoch": 0.9858853795477064, "grad_norm": 1.157858280771576, "learning_rate": 5.226850666180116e-10, "loss": 0.1487, "step": 56717 }, { "epoch": 0.9859027620852092, "grad_norm": 1.9993606773404828, "learning_rate": 5.213990770588372e-10, "loss": 0.1817, "step": 56718 }, { "epoch": 0.985920144622712, "grad_norm": 1.3011014560033902, "learning_rate": 5.201146706173554e-10, "loss": 0.1762, "step": 56719 }, { "epoch": 0.9859375271602149, "grad_norm": 0.6209229292321513, "learning_rate": 5.188318472977293e-10, "loss": 0.0928, "step": 56720 }, { "epoch": 0.9859549096977177, "grad_norm": 1.1428160918562351, "learning_rate": 5.175506071039004e-10, "loss": 0.1689, "step": 56721 }, { "epoch": 0.9859722922352205, "grad_norm": 1.0533959357693048, "learning_rate": 5.162709500400319e-10, "loss": 0.122, "step": 56722 }, { "epoch": 0.9859896747727234, "grad_norm": 1.1458192682527621, "learning_rate": 5.149928761101207e-10, "loss": 0.1466, "step": 56723 }, { "epoch": 0.9860070573102262, "grad_norm": 1.6305520487109564, "learning_rate": 5.137163853182747e-10, "loss": 0.1065, "step": 56724 }, { "epoch": 0.9860244398477289, "grad_norm": 1.022457556761092, "learning_rate": 5.124414776684349e-10, "loss": 0.1345, "step": 56725 }, { "epoch": 0.9860418223852317, "grad_norm": 0.7651028955809848, "learning_rate": 5.111681531647649e-10, "loss": 0.1069, "step": 56726 }, { "epoch": 0.9860592049227346, "grad_norm": 1.2618009079937302, "learning_rate": 5.098964118112059e-10, "loss": 0.1674, "step": 56727 }, { "epoch": 0.9860765874602374, "grad_norm": 0.7336616192699522, "learning_rate": 5.086262536118657e-10, "loss": 0.1738, "step": 56728 }, { "epoch": 0.9860939699977402, "grad_norm": 1.7148098216060197, "learning_rate": 5.073576785707412e-10, "loss": 0.2074, "step": 56729 }, { "epoch": 0.9861113525352431, "grad_norm": 1.777224432270398, "learning_rate": 5.060906866918291e-10, "loss": 0.111, "step": 56730 }, { "epoch": 0.9861287350727459, "grad_norm": 2.2501995055294386, "learning_rate": 5.048252779791262e-10, "loss": 0.157, "step": 56731 }, { "epoch": 0.9861461176102487, "grad_norm": 1.2521338230275887, "learning_rate": 5.035614524366849e-10, "loss": 0.1424, "step": 56732 }, { "epoch": 0.9861635001477516, "grad_norm": 1.3256063387986121, "learning_rate": 5.022992100685575e-10, "loss": 0.1208, "step": 56733 }, { "epoch": 0.9861808826852544, "grad_norm": 1.7557264239791568, "learning_rate": 5.010385508786297e-10, "loss": 0.3193, "step": 56734 }, { "epoch": 0.9861982652227572, "grad_norm": 0.968930144283912, "learning_rate": 4.997794748710094e-10, "loss": 0.1677, "step": 56735 }, { "epoch": 0.98621564776026, "grad_norm": 1.1237102585886445, "learning_rate": 4.985219820495823e-10, "loss": 0.1688, "step": 56736 }, { "epoch": 0.9862330302977629, "grad_norm": 1.1451363567242556, "learning_rate": 4.972660724184563e-10, "loss": 0.1192, "step": 56737 }, { "epoch": 0.9862504128352657, "grad_norm": 1.290766447796887, "learning_rate": 4.960117459814616e-10, "loss": 0.1676, "step": 56738 }, { "epoch": 0.9862677953727685, "grad_norm": 2.841231498875944, "learning_rate": 4.947590027427062e-10, "loss": 0.2462, "step": 56739 }, { "epoch": 0.9862851779102714, "grad_norm": 0.9829742903924729, "learning_rate": 4.935078427061312e-10, "loss": 0.1102, "step": 56740 }, { "epoch": 0.9863025604477742, "grad_norm": 1.9401860633534445, "learning_rate": 4.922582658756225e-10, "loss": 0.2054, "step": 56741 }, { "epoch": 0.986319942985277, "grad_norm": 2.2673078421539365, "learning_rate": 4.910102722552323e-10, "loss": 0.2763, "step": 56742 }, { "epoch": 0.9863373255227799, "grad_norm": 1.0514328180072245, "learning_rate": 4.897638618489019e-10, "loss": 0.108, "step": 56743 }, { "epoch": 0.9863547080602827, "grad_norm": 1.0660906225984559, "learning_rate": 4.885190346605728e-10, "loss": 0.1972, "step": 56744 }, { "epoch": 0.9863720905977854, "grad_norm": 1.4128197059835803, "learning_rate": 4.872757906941305e-10, "loss": 0.0951, "step": 56745 }, { "epoch": 0.9863894731352882, "grad_norm": 1.0536433396309608, "learning_rate": 4.860341299536274e-10, "loss": 0.2347, "step": 56746 }, { "epoch": 0.9864068556727911, "grad_norm": 1.4835476030995298, "learning_rate": 4.84794052442894e-10, "loss": 0.1486, "step": 56747 }, { "epoch": 0.9864242382102939, "grad_norm": 1.2096200013707685, "learning_rate": 4.835555581658712e-10, "loss": 0.1492, "step": 56748 }, { "epoch": 0.9864416207477967, "grad_norm": 1.5063232321669116, "learning_rate": 4.823186471266117e-10, "loss": 0.1718, "step": 56749 }, { "epoch": 0.9864590032852996, "grad_norm": 1.0212528822331222, "learning_rate": 4.810833193288344e-10, "loss": 0.1906, "step": 56750 }, { "epoch": 0.9864763858228024, "grad_norm": 2.4470506997316703, "learning_rate": 4.798495747766473e-10, "loss": 0.2117, "step": 56751 }, { "epoch": 0.9864937683603052, "grad_norm": 1.2289171776712462, "learning_rate": 4.786174134738252e-10, "loss": 0.1919, "step": 56752 }, { "epoch": 0.986511150897808, "grad_norm": 1.1873913516266217, "learning_rate": 4.773868354243649e-10, "loss": 0.2427, "step": 56753 }, { "epoch": 0.9865285334353109, "grad_norm": 1.0493978892652038, "learning_rate": 4.761578406320965e-10, "loss": 0.2648, "step": 56754 }, { "epoch": 0.9865459159728137, "grad_norm": 0.8168627608603792, "learning_rate": 4.749304291009615e-10, "loss": 0.2084, "step": 56755 }, { "epoch": 0.9865632985103165, "grad_norm": 1.6928044849252302, "learning_rate": 4.737046008348455e-10, "loss": 0.138, "step": 56756 }, { "epoch": 0.9865806810478194, "grad_norm": 4.088560960715896, "learning_rate": 4.724803558376345e-10, "loss": 0.2231, "step": 56757 }, { "epoch": 0.9865980635853222, "grad_norm": 1.655027173167512, "learning_rate": 4.712576941131585e-10, "loss": 0.1602, "step": 56758 }, { "epoch": 0.986615446122825, "grad_norm": 1.9106108123957366, "learning_rate": 4.700366156653035e-10, "loss": 0.1549, "step": 56759 }, { "epoch": 0.9866328286603279, "grad_norm": 1.1585609397408179, "learning_rate": 4.688171204980107e-10, "loss": 0.1093, "step": 56760 }, { "epoch": 0.9866502111978307, "grad_norm": 1.5912915753444203, "learning_rate": 4.675992086151103e-10, "loss": 0.1659, "step": 56761 }, { "epoch": 0.9866675937353335, "grad_norm": 1.0111497915102192, "learning_rate": 4.663828800204328e-10, "loss": 0.1563, "step": 56762 }, { "epoch": 0.9866849762728364, "grad_norm": 2.2902797663964005, "learning_rate": 4.6516813471786376e-10, "loss": 0.2148, "step": 56763 }, { "epoch": 0.9867023588103392, "grad_norm": 1.4406425159319294, "learning_rate": 4.639549727112335e-10, "loss": 0.1088, "step": 56764 }, { "epoch": 0.9867197413478419, "grad_norm": 1.775522056552495, "learning_rate": 4.627433940044279e-10, "loss": 0.1909, "step": 56765 }, { "epoch": 0.9867371238853447, "grad_norm": 1.6623184927059727, "learning_rate": 4.615333986012215e-10, "loss": 0.1214, "step": 56766 }, { "epoch": 0.9867545064228476, "grad_norm": 1.7091142100391519, "learning_rate": 4.6032498650550035e-10, "loss": 0.1298, "step": 56767 }, { "epoch": 0.9867718889603504, "grad_norm": 1.2178131244652963, "learning_rate": 4.5911815772109455e-10, "loss": 0.1476, "step": 56768 }, { "epoch": 0.9867892714978532, "grad_norm": 1.206234784157426, "learning_rate": 4.5791291225183436e-10, "loss": 0.1883, "step": 56769 }, { "epoch": 0.9868066540353561, "grad_norm": 1.9657472219220795, "learning_rate": 4.567092501014391e-10, "loss": 0.1964, "step": 56770 }, { "epoch": 0.9868240365728589, "grad_norm": 1.432873966442392, "learning_rate": 4.555071712738501e-10, "loss": 0.1621, "step": 56771 }, { "epoch": 0.9868414191103617, "grad_norm": 1.09364449592194, "learning_rate": 4.5430667577284196e-10, "loss": 0.1512, "step": 56772 }, { "epoch": 0.9868588016478645, "grad_norm": 1.068660472093867, "learning_rate": 4.531077636021896e-10, "loss": 0.1411, "step": 56773 }, { "epoch": 0.9868761841853674, "grad_norm": 2.0135991122035044, "learning_rate": 4.5191043476572324e-10, "loss": 0.1956, "step": 56774 }, { "epoch": 0.9868935667228702, "grad_norm": 0.94090102155473, "learning_rate": 4.5071468926721754e-10, "loss": 0.1812, "step": 56775 }, { "epoch": 0.986910949260373, "grad_norm": 1.4220016950632741, "learning_rate": 4.495205271105029e-10, "loss": 0.1388, "step": 56776 }, { "epoch": 0.9869283317978759, "grad_norm": 1.0665308638012556, "learning_rate": 4.483279482992985e-10, "loss": 0.1667, "step": 56777 }, { "epoch": 0.9869457143353787, "grad_norm": 2.006018590415352, "learning_rate": 4.4713695283743467e-10, "loss": 0.2358, "step": 56778 }, { "epoch": 0.9869630968728815, "grad_norm": 0.7915603694877665, "learning_rate": 4.459475407286306e-10, "loss": 0.1382, "step": 56779 }, { "epoch": 0.9869804794103844, "grad_norm": 1.3836003235024898, "learning_rate": 4.447597119767721e-10, "loss": 0.2519, "step": 56780 }, { "epoch": 0.9869978619478872, "grad_norm": 0.7285437017694198, "learning_rate": 4.4357346658546735e-10, "loss": 0.1356, "step": 56781 }, { "epoch": 0.98701524448539, "grad_norm": 1.4466473544160348, "learning_rate": 4.4238880455860217e-10, "loss": 0.1421, "step": 56782 }, { "epoch": 0.9870326270228929, "grad_norm": 1.4180978630432448, "learning_rate": 4.412057258998958e-10, "loss": 0.1397, "step": 56783 }, { "epoch": 0.9870500095603957, "grad_norm": 0.6363713394407727, "learning_rate": 4.4002423061306747e-10, "loss": 0.1044, "step": 56784 }, { "epoch": 0.9870673920978984, "grad_norm": 1.3554378182851183, "learning_rate": 4.38844318701892e-10, "loss": 0.1623, "step": 56785 }, { "epoch": 0.9870847746354012, "grad_norm": 1.2323555616992472, "learning_rate": 4.376659901700885e-10, "loss": 0.1957, "step": 56786 }, { "epoch": 0.9871021571729041, "grad_norm": 0.9745788406663223, "learning_rate": 4.364892450214319e-10, "loss": 0.1568, "step": 56787 }, { "epoch": 0.9871195397104069, "grad_norm": 3.3585134557875764, "learning_rate": 4.353140832596414e-10, "loss": 0.1552, "step": 56788 }, { "epoch": 0.9871369222479097, "grad_norm": 1.0670642051754309, "learning_rate": 4.341405048883806e-10, "loss": 0.0958, "step": 56789 }, { "epoch": 0.9871543047854126, "grad_norm": 2.0621857392564347, "learning_rate": 4.3296850991136893e-10, "loss": 0.1551, "step": 56790 }, { "epoch": 0.9871716873229154, "grad_norm": 1.8255062436676057, "learning_rate": 4.317980983324365e-10, "loss": 0.2075, "step": 56791 }, { "epoch": 0.9871890698604182, "grad_norm": 1.2911393115856011, "learning_rate": 4.306292701551917e-10, "loss": 0.105, "step": 56792 }, { "epoch": 0.987206452397921, "grad_norm": 1.4686907241567078, "learning_rate": 4.2946202538340913e-10, "loss": 0.1388, "step": 56793 }, { "epoch": 0.9872238349354239, "grad_norm": 15.29368042035834, "learning_rate": 4.2829636402069715e-10, "loss": 0.2118, "step": 56794 }, { "epoch": 0.9872412174729267, "grad_norm": 1.3504833869042658, "learning_rate": 4.271322860708304e-10, "loss": 0.1301, "step": 56795 }, { "epoch": 0.9872586000104295, "grad_norm": 1.8456913998105693, "learning_rate": 4.259697915374727e-10, "loss": 0.1271, "step": 56796 }, { "epoch": 0.9872759825479324, "grad_norm": 1.1072071767438654, "learning_rate": 4.2480888042428775e-10, "loss": 0.0881, "step": 56797 }, { "epoch": 0.9872933650854352, "grad_norm": 1.5596389050793509, "learning_rate": 4.236495527349393e-10, "loss": 0.2488, "step": 56798 }, { "epoch": 0.987310747622938, "grad_norm": 0.9066414071136293, "learning_rate": 4.224918084732021e-10, "loss": 0.1536, "step": 56799 }, { "epoch": 0.9873281301604409, "grad_norm": 1.1065117393112347, "learning_rate": 4.213356476426844e-10, "loss": 0.1822, "step": 56800 }, { "epoch": 0.9873455126979437, "grad_norm": 1.6264163549875792, "learning_rate": 4.201810702469943e-10, "loss": 0.1978, "step": 56801 }, { "epoch": 0.9873628952354465, "grad_norm": 1.8009413128595892, "learning_rate": 4.1902807628985127e-10, "loss": 0.1941, "step": 56802 }, { "epoch": 0.9873802777729493, "grad_norm": 1.3831013687807243, "learning_rate": 4.1787666577491887e-10, "loss": 0.2159, "step": 56803 }, { "epoch": 0.9873976603104522, "grad_norm": 1.3838150791453878, "learning_rate": 4.1672683870580537e-10, "loss": 0.1465, "step": 56804 }, { "epoch": 0.9874150428479549, "grad_norm": 1.6755362526433089, "learning_rate": 4.1557859508617456e-10, "loss": 0.2108, "step": 56805 }, { "epoch": 0.9874324253854577, "grad_norm": 1.1370166622495939, "learning_rate": 4.144319349196901e-10, "loss": 0.0988, "step": 56806 }, { "epoch": 0.9874498079229606, "grad_norm": 0.7364737699822901, "learning_rate": 4.132868582099602e-10, "loss": 0.1623, "step": 56807 }, { "epoch": 0.9874671904604634, "grad_norm": 1.4475599257503526, "learning_rate": 4.1214336496064873e-10, "loss": 0.1352, "step": 56808 }, { "epoch": 0.9874845729979662, "grad_norm": 1.3786075222512537, "learning_rate": 4.1100145517530826e-10, "loss": 0.1375, "step": 56809 }, { "epoch": 0.987501955535469, "grad_norm": 1.297819954383775, "learning_rate": 4.098611288576026e-10, "loss": 0.1578, "step": 56810 }, { "epoch": 0.9875193380729719, "grad_norm": 0.8573220140492737, "learning_rate": 4.0872238601113993e-10, "loss": 0.1544, "step": 56811 }, { "epoch": 0.9875367206104747, "grad_norm": 1.6788193875517514, "learning_rate": 4.0758522663958407e-10, "loss": 0.2218, "step": 56812 }, { "epoch": 0.9875541031479775, "grad_norm": 2.3947798037941714, "learning_rate": 4.0644965074643214e-10, "loss": 0.1654, "step": 56813 }, { "epoch": 0.9875714856854804, "grad_norm": 1.2363184963698215, "learning_rate": 4.053156583353479e-10, "loss": 0.1475, "step": 56814 }, { "epoch": 0.9875888682229832, "grad_norm": 1.9836145299530559, "learning_rate": 4.0418324940993955e-10, "loss": 0.1299, "step": 56815 }, { "epoch": 0.987606250760486, "grad_norm": 1.7275477944599495, "learning_rate": 4.0305242397375984e-10, "loss": 0.1537, "step": 56816 }, { "epoch": 0.9876236332979889, "grad_norm": 0.9956834082455727, "learning_rate": 4.0192318203041696e-10, "loss": 0.1542, "step": 56817 }, { "epoch": 0.9876410158354917, "grad_norm": 2.0874048642932075, "learning_rate": 4.007955235835192e-10, "loss": 0.1903, "step": 56818 }, { "epoch": 0.9876583983729945, "grad_norm": 1.5005627521772644, "learning_rate": 3.9966944863656373e-10, "loss": 0.21, "step": 56819 }, { "epoch": 0.9876757809104973, "grad_norm": 1.035393824344217, "learning_rate": 3.9854495719310323e-10, "loss": 0.1068, "step": 56820 }, { "epoch": 0.9876931634480002, "grad_norm": 1.0535103603369091, "learning_rate": 3.9742204925685695e-10, "loss": 0.2129, "step": 56821 }, { "epoch": 0.987710545985503, "grad_norm": 1.499973249204459, "learning_rate": 3.9630072483121115e-10, "loss": 0.1548, "step": 56822 }, { "epoch": 0.9877279285230058, "grad_norm": 1.3749123362621827, "learning_rate": 3.9518098391982947e-10, "loss": 0.1773, "step": 56823 }, { "epoch": 0.9877453110605087, "grad_norm": 1.9588712675688256, "learning_rate": 3.9406282652620913e-10, "loss": 0.1599, "step": 56824 }, { "epoch": 0.9877626935980114, "grad_norm": 0.9974404062240436, "learning_rate": 3.9294625265390293e-10, "loss": 0.1856, "step": 56825 }, { "epoch": 0.9877800761355142, "grad_norm": 5.207462588699124, "learning_rate": 3.9183126230651896e-10, "loss": 0.1788, "step": 56826 }, { "epoch": 0.987797458673017, "grad_norm": 1.0264788680416774, "learning_rate": 3.907178554874435e-10, "loss": 0.1073, "step": 56827 }, { "epoch": 0.9878148412105199, "grad_norm": 2.1967736737006986, "learning_rate": 3.896060322003403e-10, "loss": 0.1618, "step": 56828 }, { "epoch": 0.9878322237480227, "grad_norm": 1.0699309519118947, "learning_rate": 3.8849579244865093e-10, "loss": 0.161, "step": 56829 }, { "epoch": 0.9878496062855255, "grad_norm": 1.13241769231377, "learning_rate": 3.873871362359837e-10, "loss": 0.1264, "step": 56830 }, { "epoch": 0.9878669888230284, "grad_norm": 0.8533754765834997, "learning_rate": 3.862800635657248e-10, "loss": 0.1821, "step": 56831 }, { "epoch": 0.9878843713605312, "grad_norm": 0.9492274183734494, "learning_rate": 3.851745744414825e-10, "loss": 0.176, "step": 56832 }, { "epoch": 0.987901753898034, "grad_norm": 1.736970604818487, "learning_rate": 3.8407066886675386e-10, "loss": 0.1975, "step": 56833 }, { "epoch": 0.9879191364355369, "grad_norm": 1.6558130757172493, "learning_rate": 3.829683468449807e-10, "loss": 0.1545, "step": 56834 }, { "epoch": 0.9879365189730397, "grad_norm": 1.508073249215208, "learning_rate": 3.8186760837971567e-10, "loss": 0.1371, "step": 56835 }, { "epoch": 0.9879539015105425, "grad_norm": 1.9456209719275739, "learning_rate": 3.80768453474456e-10, "loss": 0.1613, "step": 56836 }, { "epoch": 0.9879712840480454, "grad_norm": 2.179298779767621, "learning_rate": 3.796708821325878e-10, "loss": 0.1416, "step": 56837 }, { "epoch": 0.9879886665855482, "grad_norm": 1.6083513332091461, "learning_rate": 3.785748943577194e-10, "loss": 0.1885, "step": 56838 }, { "epoch": 0.988006049123051, "grad_norm": 1.2858378962675479, "learning_rate": 3.7748049015318137e-10, "loss": 0.1734, "step": 56839 }, { "epoch": 0.9880234316605538, "grad_norm": 1.2686736021194238, "learning_rate": 3.7638766952258205e-10, "loss": 0.1156, "step": 56840 }, { "epoch": 0.9880408141980567, "grad_norm": 1.4699300945531377, "learning_rate": 3.7529643246930753e-10, "loss": 0.1392, "step": 56841 }, { "epoch": 0.9880581967355595, "grad_norm": 1.048206356626436, "learning_rate": 3.7420677899685503e-10, "loss": 0.1534, "step": 56842 }, { "epoch": 0.9880755792730623, "grad_norm": 1.1070408963137948, "learning_rate": 3.7311870910866625e-10, "loss": 0.144, "step": 56843 }, { "epoch": 0.988092961810565, "grad_norm": 1.220104027590671, "learning_rate": 3.720322228081274e-10, "loss": 0.1602, "step": 56844 }, { "epoch": 0.9881103443480679, "grad_norm": 1.387380061299356, "learning_rate": 3.7094732009879116e-10, "loss": 0.1257, "step": 56845 }, { "epoch": 0.9881277268855707, "grad_norm": 1.6329249697340447, "learning_rate": 3.698640009839882e-10, "loss": 0.0829, "step": 56846 }, { "epoch": 0.9881451094230735, "grad_norm": 0.8978628351755915, "learning_rate": 3.6878226546727123e-10, "loss": 0.2263, "step": 56847 }, { "epoch": 0.9881624919605764, "grad_norm": 1.2230785945220957, "learning_rate": 3.677021135519709e-10, "loss": 0.1432, "step": 56848 }, { "epoch": 0.9881798744980792, "grad_norm": 2.6803925275002785, "learning_rate": 3.6662354524152893e-10, "loss": 0.2391, "step": 56849 }, { "epoch": 0.988197257035582, "grad_norm": 1.7595605122594136, "learning_rate": 3.6554656053938703e-10, "loss": 0.1319, "step": 56850 }, { "epoch": 0.9882146395730849, "grad_norm": 2.4130910390690934, "learning_rate": 3.644711594489314e-10, "loss": 0.2097, "step": 56851 }, { "epoch": 0.9882320221105877, "grad_norm": 2.301058486509958, "learning_rate": 3.633973419736036e-10, "loss": 0.1858, "step": 56852 }, { "epoch": 0.9882494046480905, "grad_norm": 1.5594695760212676, "learning_rate": 3.6232510811678996e-10, "loss": 0.1772, "step": 56853 }, { "epoch": 0.9882667871855934, "grad_norm": 1.7726488886640506, "learning_rate": 3.612544578819321e-10, "loss": 0.1961, "step": 56854 }, { "epoch": 0.9882841697230962, "grad_norm": 1.2337869456655701, "learning_rate": 3.601853912723052e-10, "loss": 0.1622, "step": 56855 }, { "epoch": 0.988301552260599, "grad_norm": 1.0811616875063617, "learning_rate": 3.5911790829140643e-10, "loss": 0.1674, "step": 56856 }, { "epoch": 0.9883189347981018, "grad_norm": 1.870389819799296, "learning_rate": 3.58052008942622e-10, "loss": 0.1348, "step": 56857 }, { "epoch": 0.9883363173356047, "grad_norm": 1.6527401578940952, "learning_rate": 3.569876932292826e-10, "loss": 0.1931, "step": 56858 }, { "epoch": 0.9883536998731075, "grad_norm": 1.2936115100903207, "learning_rate": 3.559249611547188e-10, "loss": 0.2069, "step": 56859 }, { "epoch": 0.9883710824106103, "grad_norm": 1.102948672003135, "learning_rate": 3.548638127224279e-10, "loss": 0.2344, "step": 56860 }, { "epoch": 0.9883884649481132, "grad_norm": 1.8993309045568036, "learning_rate": 3.538042479356851e-10, "loss": 0.2289, "step": 56861 }, { "epoch": 0.988405847485616, "grad_norm": 1.2672802363741056, "learning_rate": 3.527462667978209e-10, "loss": 0.2409, "step": 56862 }, { "epoch": 0.9884232300231188, "grad_norm": 1.2431565707870538, "learning_rate": 3.5168986931227717e-10, "loss": 0.1505, "step": 56863 }, { "epoch": 0.9884406125606215, "grad_norm": 2.190033293491841, "learning_rate": 3.5063505548232895e-10, "loss": 0.199, "step": 56864 }, { "epoch": 0.9884579950981244, "grad_norm": 1.3056304257205067, "learning_rate": 3.495818253113625e-10, "loss": 0.2248, "step": 56865 }, { "epoch": 0.9884753776356272, "grad_norm": 1.9442559676325244, "learning_rate": 3.485301788027084e-10, "loss": 0.2476, "step": 56866 }, { "epoch": 0.98849276017313, "grad_norm": 1.8700183664976868, "learning_rate": 3.4748011595964187e-10, "loss": 0.1437, "step": 56867 }, { "epoch": 0.9885101427106329, "grad_norm": 0.9821722712756289, "learning_rate": 3.464316367856046e-10, "loss": 0.1344, "step": 56868 }, { "epoch": 0.9885275252481357, "grad_norm": 1.6154010637306693, "learning_rate": 3.4538474128387176e-10, "loss": 0.1496, "step": 56869 }, { "epoch": 0.9885449077856385, "grad_norm": 1.831306888292102, "learning_rate": 3.443394294576629e-10, "loss": 0.255, "step": 56870 }, { "epoch": 0.9885622903231414, "grad_norm": 1.0208651415890293, "learning_rate": 3.4329570131041985e-10, "loss": 0.1678, "step": 56871 }, { "epoch": 0.9885796728606442, "grad_norm": 1.3087362716094488, "learning_rate": 3.4225355684541766e-10, "loss": 0.1892, "step": 56872 }, { "epoch": 0.988597055398147, "grad_norm": 2.0939683132665494, "learning_rate": 3.4121299606593157e-10, "loss": 0.1806, "step": 56873 }, { "epoch": 0.9886144379356498, "grad_norm": 0.9915769586551106, "learning_rate": 3.4017401897529217e-10, "loss": 0.1274, "step": 56874 }, { "epoch": 0.9886318204731527, "grad_norm": 1.360841573638924, "learning_rate": 3.391366255767747e-10, "loss": 0.1968, "step": 56875 }, { "epoch": 0.9886492030106555, "grad_norm": 2.190741865471597, "learning_rate": 3.3810081587365427e-10, "loss": 0.1962, "step": 56876 }, { "epoch": 0.9886665855481583, "grad_norm": 1.3819226522480186, "learning_rate": 3.37066589869206e-10, "loss": 0.1244, "step": 56877 }, { "epoch": 0.9886839680856612, "grad_norm": 1.677689881338517, "learning_rate": 3.3603394756676064e-10, "loss": 0.1944, "step": 56878 }, { "epoch": 0.988701350623164, "grad_norm": 0.8952127373285703, "learning_rate": 3.350028889695933e-10, "loss": 0.1436, "step": 56879 }, { "epoch": 0.9887187331606668, "grad_norm": 1.3242646487397245, "learning_rate": 3.339734140808681e-10, "loss": 0.1478, "step": 56880 }, { "epoch": 0.9887361156981697, "grad_norm": 1.8486530882492938, "learning_rate": 3.3294552290397126e-10, "loss": 0.2269, "step": 56881 }, { "epoch": 0.9887534982356725, "grad_norm": 1.9132105104644643, "learning_rate": 3.3191921544212245e-10, "loss": 0.2043, "step": 56882 }, { "epoch": 0.9887708807731753, "grad_norm": 1.0070748221548058, "learning_rate": 3.308944916984857e-10, "loss": 0.1223, "step": 56883 }, { "epoch": 0.988788263310678, "grad_norm": 1.3459830042080603, "learning_rate": 3.298713516764473e-10, "loss": 0.1328, "step": 56884 }, { "epoch": 0.9888056458481809, "grad_norm": 2.139539393091748, "learning_rate": 3.2884979537917135e-10, "loss": 0.2562, "step": 56885 }, { "epoch": 0.9888230283856837, "grad_norm": 1.5690301146574632, "learning_rate": 3.2782982280993297e-10, "loss": 0.2183, "step": 56886 }, { "epoch": 0.9888404109231865, "grad_norm": 2.9427260696199022, "learning_rate": 3.2681143397189635e-10, "loss": 0.1775, "step": 56887 }, { "epoch": 0.9888577934606894, "grad_norm": 1.0984027859776873, "learning_rate": 3.257946288683922e-10, "loss": 0.2916, "step": 56888 }, { "epoch": 0.9888751759981922, "grad_norm": 0.7598269209473266, "learning_rate": 3.24779407502529e-10, "loss": 0.1981, "step": 56889 }, { "epoch": 0.988892558535695, "grad_norm": 1.2676960055008208, "learning_rate": 3.2376576987758196e-10, "loss": 0.1922, "step": 56890 }, { "epoch": 0.9889099410731979, "grad_norm": 0.9378995756362071, "learning_rate": 3.227537159968263e-10, "loss": 0.1679, "step": 56891 }, { "epoch": 0.9889273236107007, "grad_norm": 1.667027645586746, "learning_rate": 3.2174324586337064e-10, "loss": 0.1317, "step": 56892 }, { "epoch": 0.9889447061482035, "grad_norm": 1.094232813002017, "learning_rate": 3.207343594804346e-10, "loss": 0.1408, "step": 56893 }, { "epoch": 0.9889620886857063, "grad_norm": 1.166362729545264, "learning_rate": 3.197270568512933e-10, "loss": 0.1508, "step": 56894 }, { "epoch": 0.9889794712232092, "grad_norm": 1.8193086341156743, "learning_rate": 3.187213379790554e-10, "loss": 0.1684, "step": 56895 }, { "epoch": 0.988996853760712, "grad_norm": 1.907152768311913, "learning_rate": 3.177172028669406e-10, "loss": 0.1481, "step": 56896 }, { "epoch": 0.9890142362982148, "grad_norm": 1.2234313837108697, "learning_rate": 3.1671465151811297e-10, "loss": 0.1292, "step": 56897 }, { "epoch": 0.9890316188357177, "grad_norm": 1.4098120581456357, "learning_rate": 3.157136839357366e-10, "loss": 0.221, "step": 56898 }, { "epoch": 0.9890490013732205, "grad_norm": 1.3476981761946, "learning_rate": 3.1471430012303124e-10, "loss": 0.1338, "step": 56899 }, { "epoch": 0.9890663839107233, "grad_norm": 0.7251252983343607, "learning_rate": 3.137165000832165e-10, "loss": 0.1362, "step": 56900 }, { "epoch": 0.9890837664482262, "grad_norm": 1.4625709443170667, "learning_rate": 3.1272028381928995e-10, "loss": 0.1063, "step": 56901 }, { "epoch": 0.989101148985729, "grad_norm": 0.9282507809162023, "learning_rate": 3.1172565133452675e-10, "loss": 0.1495, "step": 56902 }, { "epoch": 0.9891185315232318, "grad_norm": 1.4878407508368814, "learning_rate": 3.107326026320911e-10, "loss": 0.1553, "step": 56903 }, { "epoch": 0.9891359140607345, "grad_norm": 1.0981172958875016, "learning_rate": 3.097411377150916e-10, "loss": 0.2222, "step": 56904 }, { "epoch": 0.9891532965982374, "grad_norm": 2.4922449304824243, "learning_rate": 3.087512565866368e-10, "loss": 0.2133, "step": 56905 }, { "epoch": 0.9891706791357402, "grad_norm": 1.1481843133541294, "learning_rate": 3.0776295924989094e-10, "loss": 0.1511, "step": 56906 }, { "epoch": 0.989188061673243, "grad_norm": 1.3779124921477726, "learning_rate": 3.067762457080181e-10, "loss": 0.1293, "step": 56907 }, { "epoch": 0.9892054442107459, "grad_norm": 2.710501810346436, "learning_rate": 3.0579111596412687e-10, "loss": 0.1673, "step": 56908 }, { "epoch": 0.9892228267482487, "grad_norm": 1.4135938012854172, "learning_rate": 3.0480757002132594e-10, "loss": 0.1705, "step": 56909 }, { "epoch": 0.9892402092857515, "grad_norm": 2.555720716698233, "learning_rate": 3.0382560788277943e-10, "loss": 0.1547, "step": 56910 }, { "epoch": 0.9892575918232543, "grad_norm": 0.9014196394440985, "learning_rate": 3.0284522955154046e-10, "loss": 0.1674, "step": 56911 }, { "epoch": 0.9892749743607572, "grad_norm": 0.8849402285119817, "learning_rate": 3.0186643503077314e-10, "loss": 0.1947, "step": 56912 }, { "epoch": 0.98929235689826, "grad_norm": 1.1037218536991396, "learning_rate": 3.008892243234751e-10, "loss": 0.102, "step": 56913 }, { "epoch": 0.9893097394357628, "grad_norm": 1.225047784862391, "learning_rate": 2.999135974328659e-10, "loss": 0.1305, "step": 56914 }, { "epoch": 0.9893271219732657, "grad_norm": 1.6540740919224346, "learning_rate": 2.989395543619988e-10, "loss": 0.1562, "step": 56915 }, { "epoch": 0.9893445045107685, "grad_norm": 2.466384573330944, "learning_rate": 2.979670951139268e-10, "loss": 0.2558, "step": 56916 }, { "epoch": 0.9893618870482713, "grad_norm": 1.0354997530179944, "learning_rate": 2.969962196917031e-10, "loss": 0.2391, "step": 56917 }, { "epoch": 0.9893792695857742, "grad_norm": 1.1026780200532045, "learning_rate": 2.9602692809854724e-10, "loss": 0.1542, "step": 56918 }, { "epoch": 0.989396652123277, "grad_norm": 0.6737841248821375, "learning_rate": 2.950592203374014e-10, "loss": 0.1957, "step": 56919 }, { "epoch": 0.9894140346607798, "grad_norm": 1.7373136788209307, "learning_rate": 2.9409309641137413e-10, "loss": 0.158, "step": 56920 }, { "epoch": 0.9894314171982826, "grad_norm": 1.5640339718944094, "learning_rate": 2.9312855632351863e-10, "loss": 0.1299, "step": 56921 }, { "epoch": 0.9894487997357855, "grad_norm": 0.8480179111136544, "learning_rate": 2.9216560007688797e-10, "loss": 0.0979, "step": 56922 }, { "epoch": 0.9894661822732883, "grad_norm": 1.2379123759467239, "learning_rate": 2.9120422767459074e-10, "loss": 0.2563, "step": 56923 }, { "epoch": 0.989483564810791, "grad_norm": 1.6373456467725294, "learning_rate": 2.9024443911956907e-10, "loss": 0.259, "step": 56924 }, { "epoch": 0.9895009473482939, "grad_norm": 1.379932763376288, "learning_rate": 2.8928623441498713e-10, "loss": 0.2153, "step": 56925 }, { "epoch": 0.9895183298857967, "grad_norm": 1.9721429991111645, "learning_rate": 2.8832961356378695e-10, "loss": 0.1632, "step": 56926 }, { "epoch": 0.9895357124232995, "grad_norm": 1.4713666486638863, "learning_rate": 2.8737457656902164e-10, "loss": 0.2829, "step": 56927 }, { "epoch": 0.9895530949608023, "grad_norm": 1.6124104889543922, "learning_rate": 2.8642112343374434e-10, "loss": 0.139, "step": 56928 }, { "epoch": 0.9895704774983052, "grad_norm": 1.213737101239267, "learning_rate": 2.854692541609527e-10, "loss": 0.3399, "step": 56929 }, { "epoch": 0.989587860035808, "grad_norm": 1.6322867934218204, "learning_rate": 2.8451896875364425e-10, "loss": 0.1469, "step": 56930 }, { "epoch": 0.9896052425733108, "grad_norm": 1.7339250538477344, "learning_rate": 2.835702672149276e-10, "loss": 0.1778, "step": 56931 }, { "epoch": 0.9896226251108137, "grad_norm": 1.2007771320121279, "learning_rate": 2.826231495476894e-10, "loss": 0.1469, "step": 56932 }, { "epoch": 0.9896400076483165, "grad_norm": 0.9931935514503489, "learning_rate": 2.8167761575503824e-10, "loss": 0.2204, "step": 56933 }, { "epoch": 0.9896573901858193, "grad_norm": 0.8933271477026015, "learning_rate": 2.807336658398607e-10, "loss": 0.2472, "step": 56934 }, { "epoch": 0.9896747727233222, "grad_norm": 1.228117111512536, "learning_rate": 2.7979129980526537e-10, "loss": 0.1897, "step": 56935 }, { "epoch": 0.989692155260825, "grad_norm": 1.0274689331846243, "learning_rate": 2.788505176540834e-10, "loss": 0.132, "step": 56936 }, { "epoch": 0.9897095377983278, "grad_norm": 1.698345104189211, "learning_rate": 2.779113193894789e-10, "loss": 0.2152, "step": 56937 }, { "epoch": 0.9897269203358307, "grad_norm": 1.8457723257211534, "learning_rate": 2.769737050143384e-10, "loss": 0.1431, "step": 56938 }, { "epoch": 0.9897443028733335, "grad_norm": 1.841482150292126, "learning_rate": 2.7603767453160396e-10, "loss": 0.2845, "step": 56939 }, { "epoch": 0.9897616854108363, "grad_norm": 1.4894203870077949, "learning_rate": 2.751032279442733e-10, "loss": 0.1559, "step": 56940 }, { "epoch": 0.9897790679483391, "grad_norm": 0.9953510836174717, "learning_rate": 2.74170365255344e-10, "loss": 0.1785, "step": 56941 }, { "epoch": 0.989796450485842, "grad_norm": 1.6083286848153446, "learning_rate": 2.7323908646770253e-10, "loss": 0.1909, "step": 56942 }, { "epoch": 0.9898138330233448, "grad_norm": 1.3713231406136166, "learning_rate": 2.7230939158434663e-10, "loss": 0.1604, "step": 56943 }, { "epoch": 0.9898312155608475, "grad_norm": 1.42733311157599, "learning_rate": 2.713812806082183e-10, "loss": 0.1753, "step": 56944 }, { "epoch": 0.9898485980983504, "grad_norm": 1.2305108879580824, "learning_rate": 2.704547535422597e-10, "loss": 0.1675, "step": 56945 }, { "epoch": 0.9898659806358532, "grad_norm": 1.489453137821716, "learning_rate": 2.695298103894128e-10, "loss": 0.2261, "step": 56946 }, { "epoch": 0.989883363173356, "grad_norm": 1.2614363072282126, "learning_rate": 2.6860645115256433e-10, "loss": 0.1492, "step": 56947 }, { "epoch": 0.9899007457108588, "grad_norm": 1.390073690429403, "learning_rate": 2.6768467583471177e-10, "loss": 0.2171, "step": 56948 }, { "epoch": 0.9899181282483617, "grad_norm": 1.3419359816834706, "learning_rate": 2.667644844386863e-10, "loss": 0.1592, "step": 56949 }, { "epoch": 0.9899355107858645, "grad_norm": 0.7886155138493096, "learning_rate": 2.658458769675409e-10, "loss": 0.121, "step": 56950 }, { "epoch": 0.9899528933233673, "grad_norm": 2.6479581394548637, "learning_rate": 2.6492885342405126e-10, "loss": 0.136, "step": 56951 }, { "epoch": 0.9899702758608702, "grad_norm": 4.985913696515482, "learning_rate": 2.640134138111594e-10, "loss": 0.2529, "step": 56952 }, { "epoch": 0.989987658398373, "grad_norm": 1.316926978167629, "learning_rate": 2.630995581318629e-10, "loss": 0.1503, "step": 56953 }, { "epoch": 0.9900050409358758, "grad_norm": 1.519747722863175, "learning_rate": 2.621872863889374e-10, "loss": 0.138, "step": 56954 }, { "epoch": 0.9900224234733787, "grad_norm": 0.9213905353684693, "learning_rate": 2.612765985852694e-10, "loss": 0.1782, "step": 56955 }, { "epoch": 0.9900398060108815, "grad_norm": 1.2212601601868573, "learning_rate": 2.6036749472385657e-10, "loss": 0.1443, "step": 56956 }, { "epoch": 0.9900571885483843, "grad_norm": 1.0209084831211448, "learning_rate": 2.594599748074744e-10, "loss": 0.2514, "step": 56957 }, { "epoch": 0.9900745710858871, "grad_norm": 1.5737782584803865, "learning_rate": 2.58554038839065e-10, "loss": 0.122, "step": 56958 }, { "epoch": 0.99009195362339, "grad_norm": 1.3493578234996282, "learning_rate": 2.57649686821515e-10, "loss": 0.1539, "step": 56959 }, { "epoch": 0.9901093361608928, "grad_norm": 1.4602527316130327, "learning_rate": 2.567469187575999e-10, "loss": 0.1915, "step": 56960 }, { "epoch": 0.9901267186983956, "grad_norm": 1.2918228516695573, "learning_rate": 2.558457346502618e-10, "loss": 0.3026, "step": 56961 }, { "epoch": 0.9901441012358985, "grad_norm": 1.2532888749623603, "learning_rate": 2.549461345022763e-10, "loss": 0.1532, "step": 56962 }, { "epoch": 0.9901614837734013, "grad_norm": 1.0993770441109671, "learning_rate": 2.5404811831658547e-10, "loss": 0.2476, "step": 56963 }, { "epoch": 0.990178866310904, "grad_norm": 1.4959710607311414, "learning_rate": 2.531516860960203e-10, "loss": 0.2043, "step": 56964 }, { "epoch": 0.9901962488484068, "grad_norm": 1.5359409393363346, "learning_rate": 2.522568378433565e-10, "loss": 0.2583, "step": 56965 }, { "epoch": 0.9902136313859097, "grad_norm": 1.3920259109165416, "learning_rate": 2.5136357356148054e-10, "loss": 0.2311, "step": 56966 }, { "epoch": 0.9902310139234125, "grad_norm": 1.9360950676710098, "learning_rate": 2.504718932532235e-10, "loss": 0.1936, "step": 56967 }, { "epoch": 0.9902483964609153, "grad_norm": 1.0083772119890164, "learning_rate": 2.4958179692141645e-10, "loss": 0.1384, "step": 56968 }, { "epoch": 0.9902657789984182, "grad_norm": 1.398296816985964, "learning_rate": 2.4869328456889047e-10, "loss": 0.2102, "step": 56969 }, { "epoch": 0.990283161535921, "grad_norm": 2.620282908292013, "learning_rate": 2.478063561984212e-10, "loss": 0.2183, "step": 56970 }, { "epoch": 0.9903005440734238, "grad_norm": 1.479381355279167, "learning_rate": 2.4692101181283953e-10, "loss": 0.2239, "step": 56971 }, { "epoch": 0.9903179266109267, "grad_norm": 1.0343566194718026, "learning_rate": 2.4603725141492114e-10, "loss": 0.208, "step": 56972 }, { "epoch": 0.9903353091484295, "grad_norm": 1.4029872969322796, "learning_rate": 2.451550750074971e-10, "loss": 0.3359, "step": 56973 }, { "epoch": 0.9903526916859323, "grad_norm": 2.0056243308371857, "learning_rate": 2.4427448259339845e-10, "loss": 0.1495, "step": 56974 }, { "epoch": 0.9903700742234351, "grad_norm": 0.9397290865554062, "learning_rate": 2.4339547417534525e-10, "loss": 0.1872, "step": 56975 }, { "epoch": 0.990387456760938, "grad_norm": 1.8503428984198653, "learning_rate": 2.425180497561685e-10, "loss": 0.138, "step": 56976 }, { "epoch": 0.9904048392984408, "grad_norm": 1.332661952606878, "learning_rate": 2.416422093386439e-10, "loss": 0.2117, "step": 56977 }, { "epoch": 0.9904222218359436, "grad_norm": 2.0043518258164847, "learning_rate": 2.407679529256024e-10, "loss": 0.1603, "step": 56978 }, { "epoch": 0.9904396043734465, "grad_norm": 1.140077641482403, "learning_rate": 2.3989528051965304e-10, "loss": 0.2271, "step": 56979 }, { "epoch": 0.9904569869109493, "grad_norm": 1.701696255515327, "learning_rate": 2.390241921237379e-10, "loss": 0.2047, "step": 56980 }, { "epoch": 0.9904743694484521, "grad_norm": 1.8065191968418246, "learning_rate": 2.3815468774052163e-10, "loss": 0.1263, "step": 56981 }, { "epoch": 0.990491751985955, "grad_norm": 1.6790658668798857, "learning_rate": 2.3728676737277966e-10, "loss": 0.3071, "step": 56982 }, { "epoch": 0.9905091345234577, "grad_norm": 1.6686696188462304, "learning_rate": 2.364204310232321e-10, "loss": 0.178, "step": 56983 }, { "epoch": 0.9905265170609605, "grad_norm": 1.4261608374749282, "learning_rate": 2.355556786947099e-10, "loss": 0.1829, "step": 56984 }, { "epoch": 0.9905438995984633, "grad_norm": 2.5929967543229933, "learning_rate": 2.346925103898778e-10, "loss": 0.3562, "step": 56985 }, { "epoch": 0.9905612821359662, "grad_norm": 0.879963502095703, "learning_rate": 2.338309261115112e-10, "loss": 0.1405, "step": 56986 }, { "epoch": 0.990578664673469, "grad_norm": 1.2069845945263422, "learning_rate": 2.3297092586233027e-10, "loss": 0.12, "step": 56987 }, { "epoch": 0.9905960472109718, "grad_norm": 1.1585964750667597, "learning_rate": 2.3211250964505491e-10, "loss": 0.1032, "step": 56988 }, { "epoch": 0.9906134297484747, "grad_norm": 1.5671053754164324, "learning_rate": 2.3125567746240528e-10, "loss": 0.1968, "step": 56989 }, { "epoch": 0.9906308122859775, "grad_norm": 1.4307710558353715, "learning_rate": 2.3040042931710135e-10, "loss": 0.1551, "step": 56990 }, { "epoch": 0.9906481948234803, "grad_norm": 1.9049934316379145, "learning_rate": 2.2954676521186322e-10, "loss": 0.2401, "step": 56991 }, { "epoch": 0.9906655773609832, "grad_norm": 1.5095520114505923, "learning_rate": 2.2869468514935542e-10, "loss": 0.232, "step": 56992 }, { "epoch": 0.990682959898486, "grad_norm": 1.0348907758179087, "learning_rate": 2.278441891323535e-10, "loss": 0.1421, "step": 56993 }, { "epoch": 0.9907003424359888, "grad_norm": 1.182413618246612, "learning_rate": 2.2699527716346645e-10, "loss": 0.2182, "step": 56994 }, { "epoch": 0.9907177249734916, "grad_norm": 1.110883378948312, "learning_rate": 2.2614794924541437e-10, "loss": 0.2071, "step": 56995 }, { "epoch": 0.9907351075109945, "grad_norm": 1.2100028550342685, "learning_rate": 2.2530220538097278e-10, "loss": 0.1374, "step": 56996 }, { "epoch": 0.9907524900484973, "grad_norm": 1.1820954110046318, "learning_rate": 2.244580455726952e-10, "loss": 0.1329, "step": 56997 }, { "epoch": 0.9907698725860001, "grad_norm": 1.1846671371829234, "learning_rate": 2.2361546982330172e-10, "loss": 0.179, "step": 56998 }, { "epoch": 0.990787255123503, "grad_norm": 1.1924808923529309, "learning_rate": 2.227744781354568e-10, "loss": 0.1861, "step": 56999 }, { "epoch": 0.9908046376610058, "grad_norm": 1.5065935790860154, "learning_rate": 2.2193507051188053e-10, "loss": 0.1401, "step": 57000 }, { "epoch": 0.9908220201985086, "grad_norm": 1.2391865963894988, "learning_rate": 2.2109724695512645e-10, "loss": 0.1932, "step": 57001 }, { "epoch": 0.9908394027360115, "grad_norm": 1.6325914998537192, "learning_rate": 2.2026100746797006e-10, "loss": 0.1571, "step": 57002 }, { "epoch": 0.9908567852735142, "grad_norm": 1.257532241635717, "learning_rate": 2.194263520530204e-10, "loss": 0.1626, "step": 57003 }, { "epoch": 0.990874167811017, "grad_norm": 1.85594500325476, "learning_rate": 2.18593280712831e-10, "loss": 0.1636, "step": 57004 }, { "epoch": 0.9908915503485198, "grad_norm": 1.9523025929678235, "learning_rate": 2.177617934501774e-10, "loss": 0.2762, "step": 57005 }, { "epoch": 0.9909089328860227, "grad_norm": 1.0567041342067813, "learning_rate": 2.1693189026766868e-10, "loss": 0.1563, "step": 57006 }, { "epoch": 0.9909263154235255, "grad_norm": 1.9502397990286384, "learning_rate": 2.1610357116785827e-10, "loss": 0.145, "step": 57007 }, { "epoch": 0.9909436979610283, "grad_norm": 1.4366636928334091, "learning_rate": 2.1527683615341074e-10, "loss": 0.1936, "step": 57008 }, { "epoch": 0.9909610804985312, "grad_norm": 2.2995818866637, "learning_rate": 2.1445168522699064e-10, "loss": 0.2663, "step": 57009 }, { "epoch": 0.990978463036034, "grad_norm": 0.9227427792848698, "learning_rate": 2.1362811839115146e-10, "loss": 0.1122, "step": 57010 }, { "epoch": 0.9909958455735368, "grad_norm": 0.8050970968987581, "learning_rate": 2.1280613564850224e-10, "loss": 0.159, "step": 57011 }, { "epoch": 0.9910132281110396, "grad_norm": 0.8052574739463918, "learning_rate": 2.1198573700170753e-10, "loss": 0.2353, "step": 57012 }, { "epoch": 0.9910306106485425, "grad_norm": 1.1597141863392533, "learning_rate": 2.111669224533208e-10, "loss": 0.12, "step": 57013 }, { "epoch": 0.9910479931860453, "grad_norm": 1.5184349434315363, "learning_rate": 2.103496920059511e-10, "loss": 0.129, "step": 57014 }, { "epoch": 0.9910653757235481, "grad_norm": 0.6792595447124049, "learning_rate": 2.0953404566220745e-10, "loss": 0.1407, "step": 57015 }, { "epoch": 0.991082758261051, "grad_norm": 0.9042345130179089, "learning_rate": 2.0871998342464335e-10, "loss": 0.1299, "step": 57016 }, { "epoch": 0.9911001407985538, "grad_norm": 0.8919147626374132, "learning_rate": 2.0790750529586786e-10, "loss": 0.1967, "step": 57017 }, { "epoch": 0.9911175233360566, "grad_norm": 1.3985441901211753, "learning_rate": 2.0709661127837895e-10, "loss": 0.2479, "step": 57018 }, { "epoch": 0.9911349058735595, "grad_norm": 1.146265312651352, "learning_rate": 2.062873013748967e-10, "loss": 0.3399, "step": 57019 }, { "epoch": 0.9911522884110623, "grad_norm": 1.4563816652326307, "learning_rate": 2.0547957558786355e-10, "loss": 0.2719, "step": 57020 }, { "epoch": 0.9911696709485651, "grad_norm": 1.7255773680421622, "learning_rate": 2.0467343391983305e-10, "loss": 0.2696, "step": 57021 }, { "epoch": 0.991187053486068, "grad_norm": 1.201047763333747, "learning_rate": 2.038688763734142e-10, "loss": 0.2205, "step": 57022 }, { "epoch": 0.9912044360235707, "grad_norm": 1.4965515586503289, "learning_rate": 2.0306590295116054e-10, "loss": 0.1778, "step": 57023 }, { "epoch": 0.9912218185610735, "grad_norm": 2.300654076940828, "learning_rate": 2.0226451365562557e-10, "loss": 0.128, "step": 57024 }, { "epoch": 0.9912392010985763, "grad_norm": 1.6437753525087397, "learning_rate": 2.014647084892518e-10, "loss": 0.2217, "step": 57025 }, { "epoch": 0.9912565836360792, "grad_norm": 1.1517202556210142, "learning_rate": 2.0066648745470373e-10, "loss": 0.1134, "step": 57026 }, { "epoch": 0.991273966173582, "grad_norm": 0.9285132997517699, "learning_rate": 1.9986985055447936e-10, "loss": 0.1653, "step": 57027 }, { "epoch": 0.9912913487110848, "grad_norm": 1.0015042535756402, "learning_rate": 1.990747977910212e-10, "loss": 0.3251, "step": 57028 }, { "epoch": 0.9913087312485876, "grad_norm": 1.1144212378670966, "learning_rate": 1.982813291669383e-10, "loss": 0.1041, "step": 57029 }, { "epoch": 0.9913261137860905, "grad_norm": 1.5388027560614705, "learning_rate": 1.974894446846731e-10, "loss": 0.1631, "step": 57030 }, { "epoch": 0.9913434963235933, "grad_norm": 2.1335822043068484, "learning_rate": 1.9669914434683466e-10, "loss": 0.2511, "step": 57031 }, { "epoch": 0.9913608788610961, "grad_norm": 0.9283647624773009, "learning_rate": 1.9591042815580992e-10, "loss": 0.1695, "step": 57032 }, { "epoch": 0.991378261398599, "grad_norm": 1.5389538906368507, "learning_rate": 1.9512329611420796e-10, "loss": 0.176, "step": 57033 }, { "epoch": 0.9913956439361018, "grad_norm": 0.792677411435732, "learning_rate": 1.9433774822441572e-10, "loss": 0.2313, "step": 57034 }, { "epoch": 0.9914130264736046, "grad_norm": 2.138826957030367, "learning_rate": 1.9355378448904224e-10, "loss": 0.1878, "step": 57035 }, { "epoch": 0.9914304090111075, "grad_norm": 1.4175253613971739, "learning_rate": 1.927714049104745e-10, "loss": 0.1431, "step": 57036 }, { "epoch": 0.9914477915486103, "grad_norm": 2.5606573365692493, "learning_rate": 1.91990609491266e-10, "loss": 0.1675, "step": 57037 }, { "epoch": 0.9914651740861131, "grad_norm": 7.826694940087167, "learning_rate": 1.9121139823380373e-10, "loss": 0.1903, "step": 57038 }, { "epoch": 0.991482556623616, "grad_norm": 1.0448140683794698, "learning_rate": 1.904337711406412e-10, "loss": 0.2144, "step": 57039 }, { "epoch": 0.9914999391611188, "grad_norm": 1.8668707023189726, "learning_rate": 1.8965772821422088e-10, "loss": 0.2109, "step": 57040 }, { "epoch": 0.9915173216986216, "grad_norm": 0.9824402175808511, "learning_rate": 1.8888326945692978e-10, "loss": 0.1091, "step": 57041 }, { "epoch": 0.9915347042361244, "grad_norm": 0.7766630355723961, "learning_rate": 1.8811039487132142e-10, "loss": 0.1524, "step": 57042 }, { "epoch": 0.9915520867736272, "grad_norm": 1.1021207173244632, "learning_rate": 1.8733910445983825e-10, "loss": 0.2356, "step": 57043 }, { "epoch": 0.99156946931113, "grad_norm": 2.9674790883434796, "learning_rate": 1.8656939822486728e-10, "loss": 0.1923, "step": 57044 }, { "epoch": 0.9915868518486328, "grad_norm": 0.9647446664730996, "learning_rate": 1.8580127616890651e-10, "loss": 0.1025, "step": 57045 }, { "epoch": 0.9916042343861357, "grad_norm": 1.183086707548801, "learning_rate": 1.850347382942874e-10, "loss": 0.2568, "step": 57046 }, { "epoch": 0.9916216169236385, "grad_norm": 1.5598181305527632, "learning_rate": 1.8426978460356345e-10, "loss": 0.2576, "step": 57047 }, { "epoch": 0.9916389994611413, "grad_norm": 2.0269923548653153, "learning_rate": 1.8350641509912167e-10, "loss": 0.2118, "step": 57048 }, { "epoch": 0.9916563819986441, "grad_norm": 2.134906756412782, "learning_rate": 1.8274462978334905e-10, "loss": 0.1903, "step": 57049 }, { "epoch": 0.991673764536147, "grad_norm": 1.3885000318050644, "learning_rate": 1.819844286586325e-10, "loss": 0.136, "step": 57050 }, { "epoch": 0.9916911470736498, "grad_norm": 1.8185102302447855, "learning_rate": 1.812258117275256e-10, "loss": 0.2421, "step": 57051 }, { "epoch": 0.9917085296111526, "grad_norm": 1.4696948033212494, "learning_rate": 1.8046877899224877e-10, "loss": 0.1323, "step": 57052 }, { "epoch": 0.9917259121486555, "grad_norm": 2.169312986924006, "learning_rate": 1.7971333045530002e-10, "loss": 0.1627, "step": 57053 }, { "epoch": 0.9917432946861583, "grad_norm": 1.0703673069054966, "learning_rate": 1.7895946611912182e-10, "loss": 0.1747, "step": 57054 }, { "epoch": 0.9917606772236611, "grad_norm": 0.9103234974642176, "learning_rate": 1.7820718598599016e-10, "loss": 0.1729, "step": 57055 }, { "epoch": 0.991778059761164, "grad_norm": 1.0259188950241032, "learning_rate": 1.7745649005834751e-10, "loss": 0.1961, "step": 57056 }, { "epoch": 0.9917954422986668, "grad_norm": 1.1374437432774356, "learning_rate": 1.7670737833858085e-10, "loss": 0.1726, "step": 57057 }, { "epoch": 0.9918128248361696, "grad_norm": 1.0508025262629717, "learning_rate": 1.7595985082902164e-10, "loss": 0.1661, "step": 57058 }, { "epoch": 0.9918302073736724, "grad_norm": 1.8097912593952663, "learning_rate": 1.752139075321124e-10, "loss": 0.2427, "step": 57059 }, { "epoch": 0.9918475899111753, "grad_norm": 1.0000962934595206, "learning_rate": 1.7446954845012906e-10, "loss": 0.1105, "step": 57060 }, { "epoch": 0.9918649724486781, "grad_norm": 1.342110692410078, "learning_rate": 1.7372677358551413e-10, "loss": 0.2182, "step": 57061 }, { "epoch": 0.9918823549861809, "grad_norm": 4.468031043525786, "learning_rate": 1.7298558294059905e-10, "loss": 0.2382, "step": 57062 }, { "epoch": 0.9918997375236837, "grad_norm": 1.7476712013782498, "learning_rate": 1.7224597651765982e-10, "loss": 0.2078, "step": 57063 }, { "epoch": 0.9919171200611865, "grad_norm": 1.3375558770216387, "learning_rate": 1.715079543191389e-10, "loss": 0.2477, "step": 57064 }, { "epoch": 0.9919345025986893, "grad_norm": 2.0975247735581988, "learning_rate": 1.7077151634731224e-10, "loss": 0.3096, "step": 57065 }, { "epoch": 0.9919518851361921, "grad_norm": 1.4437255845717976, "learning_rate": 1.7003666260456683e-10, "loss": 0.2969, "step": 57066 }, { "epoch": 0.991969267673695, "grad_norm": 0.9676421659686992, "learning_rate": 1.6930339309317865e-10, "loss": 0.127, "step": 57067 }, { "epoch": 0.9919866502111978, "grad_norm": 2.1256864511639884, "learning_rate": 1.6857170781553464e-10, "loss": 0.2384, "step": 57068 }, { "epoch": 0.9920040327487006, "grad_norm": 3.035843649018928, "learning_rate": 1.6784160677385527e-10, "loss": 0.2391, "step": 57069 }, { "epoch": 0.9920214152862035, "grad_norm": 1.3361110810657548, "learning_rate": 1.6711308997052752e-10, "loss": 0.1256, "step": 57070 }, { "epoch": 0.9920387978237063, "grad_norm": 1.2369030486327852, "learning_rate": 1.6638615740788287e-10, "loss": 0.1861, "step": 57071 }, { "epoch": 0.9920561803612091, "grad_norm": 2.2846196680589697, "learning_rate": 1.6566080908814172e-10, "loss": 0.1857, "step": 57072 }, { "epoch": 0.992073562898712, "grad_norm": 1.5643218450061978, "learning_rate": 1.649370450136911e-10, "loss": 0.1938, "step": 57073 }, { "epoch": 0.9920909454362148, "grad_norm": 1.7740239934235718, "learning_rate": 1.6421486518675143e-10, "loss": 0.2882, "step": 57074 }, { "epoch": 0.9921083279737176, "grad_norm": 2.0739342360139306, "learning_rate": 1.6349426960970968e-10, "loss": 0.1831, "step": 57075 }, { "epoch": 0.9921257105112204, "grad_norm": 1.314658080323409, "learning_rate": 1.6277525828473081e-10, "loss": 0.1798, "step": 57076 }, { "epoch": 0.9921430930487233, "grad_norm": 1.694934236689208, "learning_rate": 1.6205783121414628e-10, "loss": 0.2569, "step": 57077 }, { "epoch": 0.9921604755862261, "grad_norm": 1.7171934528204464, "learning_rate": 1.6134198840023206e-10, "loss": 0.2518, "step": 57078 }, { "epoch": 0.9921778581237289, "grad_norm": 1.2198092252618744, "learning_rate": 1.6062772984526407e-10, "loss": 0.2272, "step": 57079 }, { "epoch": 0.9921952406612318, "grad_norm": 1.0234344587422401, "learning_rate": 1.599150555515183e-10, "loss": 0.1433, "step": 57080 }, { "epoch": 0.9922126231987346, "grad_norm": 0.9020121911634161, "learning_rate": 1.5920396552121517e-10, "loss": 0.0709, "step": 57081 }, { "epoch": 0.9922300057362374, "grad_norm": 1.1317465980194705, "learning_rate": 1.5849445975663067e-10, "loss": 0.1312, "step": 57082 }, { "epoch": 0.9922473882737401, "grad_norm": 2.1562385755987017, "learning_rate": 1.5778653825998523e-10, "loss": 0.1751, "step": 57083 }, { "epoch": 0.992264770811243, "grad_norm": 1.2516976232830246, "learning_rate": 1.5708020103355478e-10, "loss": 0.1744, "step": 57084 }, { "epoch": 0.9922821533487458, "grad_norm": 1.1232999161638042, "learning_rate": 1.5637544807961532e-10, "loss": 0.1487, "step": 57085 }, { "epoch": 0.9922995358862486, "grad_norm": 2.306008558619558, "learning_rate": 1.5567227940033178e-10, "loss": 0.2449, "step": 57086 }, { "epoch": 0.9923169184237515, "grad_norm": 1.8949735916788708, "learning_rate": 1.5497069499792458e-10, "loss": 0.1276, "step": 57087 }, { "epoch": 0.9923343009612543, "grad_norm": 1.4967242616831802, "learning_rate": 1.542706948746697e-10, "loss": 0.1539, "step": 57088 }, { "epoch": 0.9923516834987571, "grad_norm": 1.1298999462887644, "learning_rate": 1.5357227903278757e-10, "loss": 0.1699, "step": 57089 }, { "epoch": 0.99236906603626, "grad_norm": 2.4313423259757774, "learning_rate": 1.5287544747449865e-10, "loss": 0.2212, "step": 57090 }, { "epoch": 0.9923864485737628, "grad_norm": 1.3868906053195136, "learning_rate": 1.5218020020191236e-10, "loss": 0.1777, "step": 57091 }, { "epoch": 0.9924038311112656, "grad_norm": 1.2874455519183345, "learning_rate": 1.5148653721736016e-10, "loss": 0.1477, "step": 57092 }, { "epoch": 0.9924212136487685, "grad_norm": 1.6957233861532526, "learning_rate": 1.5079445852295147e-10, "loss": 0.1967, "step": 57093 }, { "epoch": 0.9924385961862713, "grad_norm": 1.7058256915467123, "learning_rate": 1.5010396412090676e-10, "loss": 0.151, "step": 57094 }, { "epoch": 0.9924559787237741, "grad_norm": 0.8373987548770467, "learning_rate": 1.4941505401344645e-10, "loss": 0.164, "step": 57095 }, { "epoch": 0.9924733612612769, "grad_norm": 1.718848883917419, "learning_rate": 1.4872772820267997e-10, "loss": 0.1875, "step": 57096 }, { "epoch": 0.9924907437987798, "grad_norm": 1.0085257048611762, "learning_rate": 1.480419866908833e-10, "loss": 0.2909, "step": 57097 }, { "epoch": 0.9925081263362826, "grad_norm": 1.2717828989185909, "learning_rate": 1.4735782948016584e-10, "loss": 0.2583, "step": 57098 }, { "epoch": 0.9925255088737854, "grad_norm": 1.3297009302922396, "learning_rate": 1.4667525657269253e-10, "loss": 0.1854, "step": 57099 }, { "epoch": 0.9925428914112883, "grad_norm": 2.062751896676604, "learning_rate": 1.4599426797068383e-10, "loss": 0.2077, "step": 57100 }, { "epoch": 0.9925602739487911, "grad_norm": 0.9473800168848431, "learning_rate": 1.453148636762491e-10, "loss": 0.1107, "step": 57101 }, { "epoch": 0.9925776564862939, "grad_norm": 1.5771937562669363, "learning_rate": 1.4463704369155337e-10, "loss": 0.1675, "step": 57102 }, { "epoch": 0.9925950390237966, "grad_norm": 1.2273059739606649, "learning_rate": 1.4396080801870603e-10, "loss": 0.1757, "step": 57103 }, { "epoch": 0.9926124215612995, "grad_norm": 1.4544418668505146, "learning_rate": 1.432861566599275e-10, "loss": 0.1621, "step": 57104 }, { "epoch": 0.9926298040988023, "grad_norm": 1.6032388644745195, "learning_rate": 1.426130896172717e-10, "loss": 0.1395, "step": 57105 }, { "epoch": 0.9926471866363051, "grad_norm": 2.396565884808721, "learning_rate": 1.419416068929591e-10, "loss": 0.2479, "step": 57106 }, { "epoch": 0.992664569173808, "grad_norm": 0.9162897482004226, "learning_rate": 1.4127170848904357e-10, "loss": 0.2535, "step": 57107 }, { "epoch": 0.9926819517113108, "grad_norm": 1.5747149304045402, "learning_rate": 1.406033944077456e-10, "loss": 0.1741, "step": 57108 }, { "epoch": 0.9926993342488136, "grad_norm": 1.0044461226730976, "learning_rate": 1.3993666465106357e-10, "loss": 0.129, "step": 57109 }, { "epoch": 0.9927167167863165, "grad_norm": 1.063717332239668, "learning_rate": 1.3927151922116243e-10, "loss": 0.2545, "step": 57110 }, { "epoch": 0.9927340993238193, "grad_norm": 2.0511057309899416, "learning_rate": 1.3860795812015157e-10, "loss": 0.1677, "step": 57111 }, { "epoch": 0.9927514818613221, "grad_norm": 1.4097256251589767, "learning_rate": 1.3794598135014046e-10, "loss": 0.1356, "step": 57112 }, { "epoch": 0.992768864398825, "grad_norm": 1.207837988529043, "learning_rate": 1.372855889132385e-10, "loss": 0.2623, "step": 57113 }, { "epoch": 0.9927862469363278, "grad_norm": 2.2409569589876273, "learning_rate": 1.366267808114996e-10, "loss": 0.1348, "step": 57114 }, { "epoch": 0.9928036294738306, "grad_norm": 1.2449928039337326, "learning_rate": 1.3596955704708867e-10, "loss": 0.1065, "step": 57115 }, { "epoch": 0.9928210120113334, "grad_norm": 1.8202385424292638, "learning_rate": 1.3531391762200418e-10, "loss": 0.1566, "step": 57116 }, { "epoch": 0.9928383945488363, "grad_norm": 1.1486060637726843, "learning_rate": 1.3465986253835547e-10, "loss": 0.25, "step": 57117 }, { "epoch": 0.9928557770863391, "grad_norm": 1.2273464785720767, "learning_rate": 1.3400739179819652e-10, "loss": 0.1584, "step": 57118 }, { "epoch": 0.9928731596238419, "grad_norm": 0.9180469884120013, "learning_rate": 1.3335650540363675e-10, "loss": 0.1342, "step": 57119 }, { "epoch": 0.9928905421613448, "grad_norm": 1.488739557543113, "learning_rate": 1.327072033566745e-10, "loss": 0.1454, "step": 57120 }, { "epoch": 0.9929079246988476, "grad_norm": 1.0856734911065067, "learning_rate": 1.3205948565947477e-10, "loss": 0.1649, "step": 57121 }, { "epoch": 0.9929253072363503, "grad_norm": 1.2541089890948232, "learning_rate": 1.3141335231398043e-10, "loss": 0.1965, "step": 57122 }, { "epoch": 0.9929426897738531, "grad_norm": 1.9520913579154362, "learning_rate": 1.307688033223009e-10, "loss": 0.2205, "step": 57123 }, { "epoch": 0.992960072311356, "grad_norm": 2.7419420525530143, "learning_rate": 1.3012583868643457e-10, "loss": 0.2889, "step": 57124 }, { "epoch": 0.9929774548488588, "grad_norm": 1.0522398261411232, "learning_rate": 1.2948445840849087e-10, "loss": 0.1151, "step": 57125 }, { "epoch": 0.9929948373863616, "grad_norm": 1.4083837463385305, "learning_rate": 1.2884466249046822e-10, "loss": 0.2793, "step": 57126 }, { "epoch": 0.9930122199238645, "grad_norm": 1.992064238262032, "learning_rate": 1.28206450934365e-10, "loss": 0.1931, "step": 57127 }, { "epoch": 0.9930296024613673, "grad_norm": 1.0589879844998324, "learning_rate": 1.2756982374217962e-10, "loss": 0.1799, "step": 57128 }, { "epoch": 0.9930469849988701, "grad_norm": 1.0525234177872245, "learning_rate": 1.269347809160215e-10, "loss": 0.1505, "step": 57129 }, { "epoch": 0.993064367536373, "grad_norm": 2.062406145009882, "learning_rate": 1.2630132245788905e-10, "loss": 0.2264, "step": 57130 }, { "epoch": 0.9930817500738758, "grad_norm": 2.0462691830566513, "learning_rate": 1.2566944836966963e-10, "loss": 0.2371, "step": 57131 }, { "epoch": 0.9930991326113786, "grad_norm": 1.908225873279372, "learning_rate": 1.2503915865352822e-10, "loss": 0.2088, "step": 57132 }, { "epoch": 0.9931165151488814, "grad_norm": 1.3193462300531487, "learning_rate": 1.2441045331135214e-10, "loss": 0.1342, "step": 57133 }, { "epoch": 0.9931338976863843, "grad_norm": 1.5145613779375775, "learning_rate": 1.2378333234519534e-10, "loss": 0.2394, "step": 57134 }, { "epoch": 0.9931512802238871, "grad_norm": 1.8467554447903116, "learning_rate": 1.231577957570007e-10, "loss": 0.1468, "step": 57135 }, { "epoch": 0.9931686627613899, "grad_norm": 1.7683582644332936, "learning_rate": 1.2253384354882213e-10, "loss": 0.2176, "step": 57136 }, { "epoch": 0.9931860452988928, "grad_norm": 2.0523739864469643, "learning_rate": 1.21911475722547e-10, "loss": 0.1556, "step": 57137 }, { "epoch": 0.9932034278363956, "grad_norm": 1.2163347954016805, "learning_rate": 1.2129069228017374e-10, "loss": 0.174, "step": 57138 }, { "epoch": 0.9932208103738984, "grad_norm": 1.0386139632011504, "learning_rate": 1.2067149322370075e-10, "loss": 0.1569, "step": 57139 }, { "epoch": 0.9932381929114013, "grad_norm": 1.3737564925588202, "learning_rate": 1.2005387855507088e-10, "loss": 0.1961, "step": 57140 }, { "epoch": 0.9932555754489041, "grad_norm": 0.9583207473998926, "learning_rate": 1.1943784827622704e-10, "loss": 0.2309, "step": 57141 }, { "epoch": 0.9932729579864068, "grad_norm": 1.7371981792105087, "learning_rate": 1.1882340238916767e-10, "loss": 0.2018, "step": 57142 }, { "epoch": 0.9932903405239096, "grad_norm": 2.3190073351856917, "learning_rate": 1.182105408957801e-10, "loss": 0.1768, "step": 57143 }, { "epoch": 0.9933077230614125, "grad_norm": 1.11996441130569, "learning_rate": 1.1759926379806272e-10, "loss": 0.1615, "step": 57144 }, { "epoch": 0.9933251055989153, "grad_norm": 1.8526158676666296, "learning_rate": 1.1698957109790297e-10, "loss": 0.258, "step": 57145 }, { "epoch": 0.9933424881364181, "grad_norm": 2.0138795062085584, "learning_rate": 1.1638146279729921e-10, "loss": 0.1623, "step": 57146 }, { "epoch": 0.993359870673921, "grad_norm": 1.9149323661056774, "learning_rate": 1.1577493889813883e-10, "loss": 0.1432, "step": 57147 }, { "epoch": 0.9933772532114238, "grad_norm": 4.355323408488234, "learning_rate": 1.151699994023092e-10, "loss": 0.1285, "step": 57148 }, { "epoch": 0.9933946357489266, "grad_norm": 1.6285028076589045, "learning_rate": 1.1456664431175322e-10, "loss": 0.1453, "step": 57149 }, { "epoch": 0.9934120182864294, "grad_norm": 1.3903254152987403, "learning_rate": 1.1396487362841378e-10, "loss": 0.2499, "step": 57150 }, { "epoch": 0.9934294008239323, "grad_norm": 2.930960343930514, "learning_rate": 1.1336468735417826e-10, "loss": 0.1271, "step": 57151 }, { "epoch": 0.9934467833614351, "grad_norm": 1.0952066351517835, "learning_rate": 1.1276608549098954e-10, "loss": 0.2174, "step": 57152 }, { "epoch": 0.9934641658989379, "grad_norm": 1.5652035407581546, "learning_rate": 1.1216906804062398e-10, "loss": 0.1575, "step": 57153 }, { "epoch": 0.9934815484364408, "grad_norm": 0.8724008229958778, "learning_rate": 1.1157363500508e-10, "loss": 0.2284, "step": 57154 }, { "epoch": 0.9934989309739436, "grad_norm": 1.2723413828782197, "learning_rate": 1.1097978638624495e-10, "loss": 0.1712, "step": 57155 }, { "epoch": 0.9935163135114464, "grad_norm": 1.3071639404104118, "learning_rate": 1.1038752218595072e-10, "loss": 0.1293, "step": 57156 }, { "epoch": 0.9935336960489493, "grad_norm": 0.8116359714468749, "learning_rate": 1.0979684240608466e-10, "loss": 0.2215, "step": 57157 }, { "epoch": 0.9935510785864521, "grad_norm": 1.1444354307145899, "learning_rate": 1.092077470485897e-10, "loss": 0.1482, "step": 57158 }, { "epoch": 0.9935684611239549, "grad_norm": 0.9745132164373672, "learning_rate": 1.0862023611524219e-10, "loss": 0.2398, "step": 57159 }, { "epoch": 0.9935858436614577, "grad_norm": 1.0314587433240856, "learning_rate": 1.0803430960792948e-10, "loss": 0.2193, "step": 57160 }, { "epoch": 0.9936032261989606, "grad_norm": 0.7792108137470157, "learning_rate": 1.0744996752848346e-10, "loss": 0.1586, "step": 57161 }, { "epoch": 0.9936206087364633, "grad_norm": 1.201597380234224, "learning_rate": 1.0686720987884701e-10, "loss": 0.1775, "step": 57162 }, { "epoch": 0.9936379912739661, "grad_norm": 0.9215811901892016, "learning_rate": 1.0628603666079649e-10, "loss": 0.219, "step": 57163 }, { "epoch": 0.993655373811469, "grad_norm": 1.0203923208393584, "learning_rate": 1.0570644787616378e-10, "loss": 0.1284, "step": 57164 }, { "epoch": 0.9936727563489718, "grad_norm": 1.6033259653586034, "learning_rate": 1.0512844352683625e-10, "loss": 0.1692, "step": 57165 }, { "epoch": 0.9936901388864746, "grad_norm": 1.7322567793405246, "learning_rate": 1.0455202361459026e-10, "loss": 0.1644, "step": 57166 }, { "epoch": 0.9937075214239774, "grad_norm": 1.5066411810853628, "learning_rate": 1.0397718814125766e-10, "loss": 0.1652, "step": 57167 }, { "epoch": 0.9937249039614803, "grad_norm": 1.8275077204784553, "learning_rate": 1.0340393710872586e-10, "loss": 0.1999, "step": 57168 }, { "epoch": 0.9937422864989831, "grad_norm": 1.1897529881167208, "learning_rate": 1.028322705187712e-10, "loss": 0.1689, "step": 57169 }, { "epoch": 0.9937596690364859, "grad_norm": 1.0343928310406525, "learning_rate": 1.0226218837322553e-10, "loss": 0.2563, "step": 57170 }, { "epoch": 0.9937770515739888, "grad_norm": 1.2024509497328086, "learning_rate": 1.0169369067380973e-10, "loss": 0.1224, "step": 57171 }, { "epoch": 0.9937944341114916, "grad_norm": 2.0835179549031375, "learning_rate": 1.0112677742246666e-10, "loss": 0.2026, "step": 57172 }, { "epoch": 0.9938118166489944, "grad_norm": 1.775343099322781, "learning_rate": 1.0056144862086169e-10, "loss": 0.1695, "step": 57173 }, { "epoch": 0.9938291991864973, "grad_norm": 2.3575685766459347, "learning_rate": 9.999770427082666e-11, "loss": 0.2239, "step": 57174 }, { "epoch": 0.9938465817240001, "grad_norm": 1.2630027005346351, "learning_rate": 9.943554437419343e-11, "loss": 0.1568, "step": 57175 }, { "epoch": 0.9938639642615029, "grad_norm": 1.4582585121015303, "learning_rate": 9.887496893273839e-11, "loss": 0.189, "step": 57176 }, { "epoch": 0.9938813467990057, "grad_norm": 3.870762391565327, "learning_rate": 9.831597794818236e-11, "loss": 0.19, "step": 57177 }, { "epoch": 0.9938987293365086, "grad_norm": 2.834791454309751, "learning_rate": 9.775857142235722e-11, "loss": 0.1981, "step": 57178 }, { "epoch": 0.9939161118740114, "grad_norm": 0.9862156570717875, "learning_rate": 9.72027493569838e-11, "loss": 0.1523, "step": 57179 }, { "epoch": 0.9939334944115142, "grad_norm": 1.4108341452080682, "learning_rate": 9.664851175383849e-11, "loss": 0.18, "step": 57180 }, { "epoch": 0.9939508769490171, "grad_norm": 1.288262530571229, "learning_rate": 9.60958586146421e-11, "loss": 0.1372, "step": 57181 }, { "epoch": 0.9939682594865198, "grad_norm": 2.160926411340588, "learning_rate": 9.55447899412265e-11, "loss": 0.2963, "step": 57182 }, { "epoch": 0.9939856420240226, "grad_norm": 1.4274858363930105, "learning_rate": 9.499530573531256e-11, "loss": 0.2239, "step": 57183 }, { "epoch": 0.9940030245615255, "grad_norm": 1.1255852217725926, "learning_rate": 9.444740599862111e-11, "loss": 0.2936, "step": 57184 }, { "epoch": 0.9940204070990283, "grad_norm": 0.6886505593347888, "learning_rate": 9.3901090732873e-11, "loss": 0.1483, "step": 57185 }, { "epoch": 0.9940377896365311, "grad_norm": 0.996113821740294, "learning_rate": 9.335635993978908e-11, "loss": 0.1303, "step": 57186 }, { "epoch": 0.9940551721740339, "grad_norm": 0.9799398203616061, "learning_rate": 9.28132136212012e-11, "loss": 0.067, "step": 57187 }, { "epoch": 0.9940725547115368, "grad_norm": 1.8300588562607676, "learning_rate": 9.22716517787192e-11, "loss": 0.1327, "step": 57188 }, { "epoch": 0.9940899372490396, "grad_norm": 1.6756988856462218, "learning_rate": 9.173167441406393e-11, "loss": 0.1754, "step": 57189 }, { "epoch": 0.9941073197865424, "grad_norm": 1.0226788765918977, "learning_rate": 9.119328152906725e-11, "loss": 0.133, "step": 57190 }, { "epoch": 0.9941247023240453, "grad_norm": 1.4959375457211017, "learning_rate": 9.065647312528346e-11, "loss": 0.1188, "step": 57191 }, { "epoch": 0.9941420848615481, "grad_norm": 1.0941290623398903, "learning_rate": 9.012124920448893e-11, "loss": 0.2332, "step": 57192 }, { "epoch": 0.9941594673990509, "grad_norm": 1.6708727126015037, "learning_rate": 8.9587609768349e-11, "loss": 0.1681, "step": 57193 }, { "epoch": 0.9941768499365538, "grad_norm": 0.7585550650464277, "learning_rate": 8.90555548185845e-11, "loss": 0.163, "step": 57194 }, { "epoch": 0.9941942324740566, "grad_norm": 1.0438715012148423, "learning_rate": 8.852508435691631e-11, "loss": 0.2107, "step": 57195 }, { "epoch": 0.9942116150115594, "grad_norm": 1.0311148898417217, "learning_rate": 8.79961983849542e-11, "loss": 0.2106, "step": 57196 }, { "epoch": 0.9942289975490622, "grad_norm": 0.8072987060122763, "learning_rate": 8.746889690441906e-11, "loss": 0.1217, "step": 57197 }, { "epoch": 0.9942463800865651, "grad_norm": 1.189921804977113, "learning_rate": 8.694317991692068e-11, "loss": 0.1808, "step": 57198 }, { "epoch": 0.9942637626240679, "grad_norm": 1.1202076257969547, "learning_rate": 8.641904742423545e-11, "loss": 0.2751, "step": 57199 }, { "epoch": 0.9942811451615707, "grad_norm": 2.5028366056950873, "learning_rate": 8.589649942791765e-11, "loss": 0.2372, "step": 57200 }, { "epoch": 0.9942985276990736, "grad_norm": 1.0253095709076963, "learning_rate": 8.537553592968816e-11, "loss": 0.2282, "step": 57201 }, { "epoch": 0.9943159102365763, "grad_norm": 2.04628760469749, "learning_rate": 8.485615693115677e-11, "loss": 0.2523, "step": 57202 }, { "epoch": 0.9943332927740791, "grad_norm": 1.484459002181125, "learning_rate": 8.433836243398884e-11, "loss": 0.2489, "step": 57203 }, { "epoch": 0.9943506753115819, "grad_norm": 0.8595228153982122, "learning_rate": 8.382215243984969e-11, "loss": 0.241, "step": 57204 }, { "epoch": 0.9943680578490848, "grad_norm": 1.1718326525020841, "learning_rate": 8.330752695029364e-11, "loss": 0.1635, "step": 57205 }, { "epoch": 0.9943854403865876, "grad_norm": 1.5171998702967293, "learning_rate": 8.279448596704153e-11, "loss": 0.1405, "step": 57206 }, { "epoch": 0.9944028229240904, "grad_norm": 1.8202578420915938, "learning_rate": 8.228302949170318e-11, "loss": 0.221, "step": 57207 }, { "epoch": 0.9944202054615933, "grad_norm": 2.1338658005694686, "learning_rate": 8.177315752588842e-11, "loss": 0.2744, "step": 57208 }, { "epoch": 0.9944375879990961, "grad_norm": 1.5423133318012225, "learning_rate": 8.126487007120709e-11, "loss": 0.1513, "step": 57209 }, { "epoch": 0.9944549705365989, "grad_norm": 1.070528309072862, "learning_rate": 8.075816712926898e-11, "loss": 0.2378, "step": 57210 }, { "epoch": 0.9944723530741018, "grad_norm": 1.5645084887544762, "learning_rate": 8.025304870168392e-11, "loss": 0.1599, "step": 57211 }, { "epoch": 0.9944897356116046, "grad_norm": 1.2426923674775154, "learning_rate": 7.974951479006176e-11, "loss": 0.1676, "step": 57212 }, { "epoch": 0.9945071181491074, "grad_norm": 1.3804930140219118, "learning_rate": 7.924756539595679e-11, "loss": 0.1419, "step": 57213 }, { "epoch": 0.9945245006866102, "grad_norm": 0.7899431693953634, "learning_rate": 7.874720052097883e-11, "loss": 0.1804, "step": 57214 }, { "epoch": 0.9945418832241131, "grad_norm": 1.1507243799856115, "learning_rate": 7.824842016679322e-11, "loss": 0.1721, "step": 57215 }, { "epoch": 0.9945592657616159, "grad_norm": 1.9935593853646985, "learning_rate": 7.775122433489878e-11, "loss": 0.1846, "step": 57216 }, { "epoch": 0.9945766482991187, "grad_norm": 1.3748437920953154, "learning_rate": 7.72556130268498e-11, "loss": 0.2268, "step": 57217 }, { "epoch": 0.9945940308366216, "grad_norm": 0.8145779332684868, "learning_rate": 7.676158624425611e-11, "loss": 0.1371, "step": 57218 }, { "epoch": 0.9946114133741244, "grad_norm": 1.5798663260592185, "learning_rate": 7.626914398872753e-11, "loss": 0.1442, "step": 57219 }, { "epoch": 0.9946287959116272, "grad_norm": 1.5013096383798334, "learning_rate": 7.577828626170735e-11, "loss": 0.1858, "step": 57220 }, { "epoch": 0.9946461784491301, "grad_norm": 2.2580108743196465, "learning_rate": 7.528901306491642e-11, "loss": 0.2011, "step": 57221 }, { "epoch": 0.9946635609866328, "grad_norm": 0.7325434639949859, "learning_rate": 7.480132439974252e-11, "loss": 0.1644, "step": 57222 }, { "epoch": 0.9946809435241356, "grad_norm": 1.900886763743114, "learning_rate": 7.431522026779546e-11, "loss": 0.2268, "step": 57223 }, { "epoch": 0.9946983260616384, "grad_norm": 1.3116733531443054, "learning_rate": 7.383070067068508e-11, "loss": 0.1735, "step": 57224 }, { "epoch": 0.9947157085991413, "grad_norm": 1.5912865718089837, "learning_rate": 7.334776560985467e-11, "loss": 0.1785, "step": 57225 }, { "epoch": 0.9947330911366441, "grad_norm": 1.606640813205419, "learning_rate": 7.286641508685853e-11, "loss": 0.0816, "step": 57226 }, { "epoch": 0.9947504736741469, "grad_norm": 1.3851930680907365, "learning_rate": 7.238664910319548e-11, "loss": 0.1426, "step": 57227 }, { "epoch": 0.9947678562116498, "grad_norm": 2.5650434857542415, "learning_rate": 7.190846766047531e-11, "loss": 0.15, "step": 57228 }, { "epoch": 0.9947852387491526, "grad_norm": 1.1133325540712602, "learning_rate": 7.143187076014135e-11, "loss": 0.2209, "step": 57229 }, { "epoch": 0.9948026212866554, "grad_norm": 1.1006429195760354, "learning_rate": 7.095685840369237e-11, "loss": 0.1937, "step": 57230 }, { "epoch": 0.9948200038241582, "grad_norm": 2.9550768887922882, "learning_rate": 7.048343059268269e-11, "loss": 0.2356, "step": 57231 }, { "epoch": 0.9948373863616611, "grad_norm": 1.0966833527613322, "learning_rate": 7.001158732861113e-11, "loss": 0.1475, "step": 57232 }, { "epoch": 0.9948547688991639, "grad_norm": 1.6190785354439778, "learning_rate": 6.954132861292095e-11, "loss": 0.224, "step": 57233 }, { "epoch": 0.9948721514366667, "grad_norm": 1.4472025834479523, "learning_rate": 6.907265444716648e-11, "loss": 0.2172, "step": 57234 }, { "epoch": 0.9948895339741696, "grad_norm": 3.0578350727878116, "learning_rate": 6.86055648327355e-11, "loss": 0.2189, "step": 57235 }, { "epoch": 0.9949069165116724, "grad_norm": 0.9903510834068122, "learning_rate": 6.814005977123782e-11, "loss": 0.1969, "step": 57236 }, { "epoch": 0.9949242990491752, "grad_norm": 1.0675608506433456, "learning_rate": 6.767613926406124e-11, "loss": 0.1398, "step": 57237 }, { "epoch": 0.9949416815866781, "grad_norm": 0.7605815909113472, "learning_rate": 6.721380331270454e-11, "loss": 0.2184, "step": 57238 }, { "epoch": 0.9949590641241809, "grad_norm": 1.451803437312464, "learning_rate": 6.675305191866654e-11, "loss": 0.1562, "step": 57239 }, { "epoch": 0.9949764466616837, "grad_norm": 1.244139896966216, "learning_rate": 6.6293885083335e-11, "loss": 0.163, "step": 57240 }, { "epoch": 0.9949938291991866, "grad_norm": 1.1320743696994502, "learning_rate": 6.583630280820873e-11, "loss": 0.1721, "step": 57241 }, { "epoch": 0.9950112117366893, "grad_norm": 1.322254995524776, "learning_rate": 6.538030509473102e-11, "loss": 0.0867, "step": 57242 }, { "epoch": 0.9950285942741921, "grad_norm": 2.004808447298078, "learning_rate": 6.492589194434516e-11, "loss": 0.1687, "step": 57243 }, { "epoch": 0.9950459768116949, "grad_norm": 1.234789693122816, "learning_rate": 6.447306335849444e-11, "loss": 0.1519, "step": 57244 }, { "epoch": 0.9950633593491978, "grad_norm": 3.9816441967984026, "learning_rate": 6.402181933862216e-11, "loss": 0.193, "step": 57245 }, { "epoch": 0.9950807418867006, "grad_norm": 1.325665812012344, "learning_rate": 6.357215988611609e-11, "loss": 0.2544, "step": 57246 }, { "epoch": 0.9950981244242034, "grad_norm": 1.047475296111554, "learning_rate": 6.312408500247501e-11, "loss": 0.1795, "step": 57247 }, { "epoch": 0.9951155069617063, "grad_norm": 1.5172258121218434, "learning_rate": 6.267759468903122e-11, "loss": 0.1229, "step": 57248 }, { "epoch": 0.9951328894992091, "grad_norm": 1.796953713932488, "learning_rate": 6.22326889472835e-11, "loss": 0.2454, "step": 57249 }, { "epoch": 0.9951502720367119, "grad_norm": 1.9184812803271136, "learning_rate": 6.178936777856414e-11, "loss": 0.2088, "step": 57250 }, { "epoch": 0.9951676545742147, "grad_norm": 1.326810824703865, "learning_rate": 6.134763118437191e-11, "loss": 0.1881, "step": 57251 }, { "epoch": 0.9951850371117176, "grad_norm": 2.0764765608817797, "learning_rate": 6.090747916603911e-11, "loss": 0.1812, "step": 57252 }, { "epoch": 0.9952024196492204, "grad_norm": 1.2303017674375922, "learning_rate": 6.04689117249535e-11, "loss": 0.1698, "step": 57253 }, { "epoch": 0.9952198021867232, "grad_norm": 1.3976705580768887, "learning_rate": 6.003192886255836e-11, "loss": 0.1369, "step": 57254 }, { "epoch": 0.9952371847242261, "grad_norm": 1.1037878707149749, "learning_rate": 5.959653058018599e-11, "loss": 0.1627, "step": 57255 }, { "epoch": 0.9952545672617289, "grad_norm": 1.5693135099839783, "learning_rate": 5.916271687927965e-11, "loss": 0.2111, "step": 57256 }, { "epoch": 0.9952719497992317, "grad_norm": 0.8385919213601896, "learning_rate": 5.87304877611161e-11, "loss": 0.0583, "step": 57257 }, { "epoch": 0.9952893323367346, "grad_norm": 0.7735251627964439, "learning_rate": 5.829984322713865e-11, "loss": 0.3092, "step": 57258 }, { "epoch": 0.9953067148742374, "grad_norm": 1.0001790902112577, "learning_rate": 5.7870783278735066e-11, "loss": 0.1604, "step": 57259 }, { "epoch": 0.9953240974117402, "grad_norm": 1.375457317207283, "learning_rate": 5.74433079171821e-11, "loss": 0.186, "step": 57260 }, { "epoch": 0.9953414799492429, "grad_norm": 0.8727031067242315, "learning_rate": 5.701741714392305e-11, "loss": 0.1414, "step": 57261 }, { "epoch": 0.9953588624867458, "grad_norm": 2.071966587584172, "learning_rate": 5.659311096023467e-11, "loss": 0.1314, "step": 57262 }, { "epoch": 0.9953762450242486, "grad_norm": 1.8749287606705698, "learning_rate": 5.6170389367449225e-11, "loss": 0.2861, "step": 57263 }, { "epoch": 0.9953936275617514, "grad_norm": 1.5527525507068907, "learning_rate": 5.5749252367010004e-11, "loss": 0.2091, "step": 57264 }, { "epoch": 0.9954110100992543, "grad_norm": 1.466906230687568, "learning_rate": 5.532969996019377e-11, "loss": 0.206, "step": 57265 }, { "epoch": 0.9954283926367571, "grad_norm": 1.1092546030285089, "learning_rate": 5.491173214827727e-11, "loss": 0.111, "step": 57266 }, { "epoch": 0.9954457751742599, "grad_norm": 1.5133088488560995, "learning_rate": 5.449534893264829e-11, "loss": 0.1988, "step": 57267 }, { "epoch": 0.9954631577117627, "grad_norm": 1.1037221028029545, "learning_rate": 5.4080550314639094e-11, "loss": 0.2599, "step": 57268 }, { "epoch": 0.9954805402492656, "grad_norm": 0.7156330599948569, "learning_rate": 5.3667336295526443e-11, "loss": 0.2078, "step": 57269 }, { "epoch": 0.9954979227867684, "grad_norm": 1.2125290648321565, "learning_rate": 5.32557068766426e-11, "loss": 0.2597, "step": 57270 }, { "epoch": 0.9955153053242712, "grad_norm": 1.854881856198716, "learning_rate": 5.284566205926433e-11, "loss": 0.2147, "step": 57271 }, { "epoch": 0.9955326878617741, "grad_norm": 3.212202663222926, "learning_rate": 5.243720184472389e-11, "loss": 0.1935, "step": 57272 }, { "epoch": 0.9955500703992769, "grad_norm": 1.108727813288821, "learning_rate": 5.203032623429804e-11, "loss": 0.2539, "step": 57273 }, { "epoch": 0.9955674529367797, "grad_norm": 4.619573809658969, "learning_rate": 5.1625035229263534e-11, "loss": 0.2427, "step": 57274 }, { "epoch": 0.9955848354742826, "grad_norm": 2.04229423375023, "learning_rate": 5.122132883089714e-11, "loss": 0.1728, "step": 57275 }, { "epoch": 0.9956022180117854, "grad_norm": 1.4709349720280296, "learning_rate": 5.0819207040531107e-11, "loss": 0.2092, "step": 57276 }, { "epoch": 0.9956196005492882, "grad_norm": 0.8645819033925165, "learning_rate": 5.041866985944221e-11, "loss": 0.1586, "step": 57277 }, { "epoch": 0.995636983086791, "grad_norm": 0.8368264189714997, "learning_rate": 5.001971728879617e-11, "loss": 0.1333, "step": 57278 }, { "epoch": 0.9956543656242939, "grad_norm": 0.9671037862581703, "learning_rate": 4.9622349329980775e-11, "loss": 0.1208, "step": 57279 }, { "epoch": 0.9956717481617967, "grad_norm": 1.5217000938263074, "learning_rate": 4.922656598421726e-11, "loss": 0.1711, "step": 57280 }, { "epoch": 0.9956891306992994, "grad_norm": 1.7599035109657568, "learning_rate": 4.883236725272688e-11, "loss": 0.1749, "step": 57281 }, { "epoch": 0.9957065132368023, "grad_norm": 1.2443628047759674, "learning_rate": 4.843975313673088e-11, "loss": 0.1612, "step": 57282 }, { "epoch": 0.9957238957743051, "grad_norm": 1.0801634335057915, "learning_rate": 4.8048723637617025e-11, "loss": 0.207, "step": 57283 }, { "epoch": 0.9957412783118079, "grad_norm": 1.6434720063097596, "learning_rate": 4.7659278756440046e-11, "loss": 0.1472, "step": 57284 }, { "epoch": 0.9957586608493108, "grad_norm": 1.7689298756839456, "learning_rate": 4.727141849458771e-11, "loss": 0.1746, "step": 57285 }, { "epoch": 0.9957760433868136, "grad_norm": 2.0317228607022213, "learning_rate": 4.6885142853170246e-11, "loss": 0.2714, "step": 57286 }, { "epoch": 0.9957934259243164, "grad_norm": 1.2397421145612564, "learning_rate": 4.6500451833519914e-11, "loss": 0.1659, "step": 57287 }, { "epoch": 0.9958108084618192, "grad_norm": 2.5127997938958524, "learning_rate": 4.6117345436746947e-11, "loss": 0.2871, "step": 57288 }, { "epoch": 0.9958281909993221, "grad_norm": 1.9024371369883581, "learning_rate": 4.57358236641836e-11, "loss": 0.2011, "step": 57289 }, { "epoch": 0.9958455735368249, "grad_norm": 1.0463642854167627, "learning_rate": 4.535588651694011e-11, "loss": 0.1937, "step": 57290 }, { "epoch": 0.9958629560743277, "grad_norm": 1.36101107074597, "learning_rate": 4.4977533996293224e-11, "loss": 0.096, "step": 57291 }, { "epoch": 0.9958803386118306, "grad_norm": 1.1324726813470019, "learning_rate": 4.460076610335317e-11, "loss": 0.117, "step": 57292 }, { "epoch": 0.9958977211493334, "grad_norm": 1.4406495144453333, "learning_rate": 4.42255828393967e-11, "loss": 0.1176, "step": 57293 }, { "epoch": 0.9959151036868362, "grad_norm": 1.1635609864209304, "learning_rate": 4.3851984205589554e-11, "loss": 0.172, "step": 57294 }, { "epoch": 0.995932486224339, "grad_norm": 1.6071594841843402, "learning_rate": 4.347997020309746e-11, "loss": 0.2593, "step": 57295 }, { "epoch": 0.9959498687618419, "grad_norm": 2.068583322770931, "learning_rate": 4.3109540833141663e-11, "loss": 0.3763, "step": 57296 }, { "epoch": 0.9959672512993447, "grad_norm": 0.9204253761232464, "learning_rate": 4.274069609683239e-11, "loss": 0.1441, "step": 57297 }, { "epoch": 0.9959846338368475, "grad_norm": 0.9258080024785232, "learning_rate": 4.2373435995390896e-11, "loss": 0.1614, "step": 57298 }, { "epoch": 0.9960020163743504, "grad_norm": 4.638582342388811, "learning_rate": 4.2007760529927383e-11, "loss": 0.1749, "step": 57299 }, { "epoch": 0.9960193989118532, "grad_norm": 2.8764530932112606, "learning_rate": 4.164366970166311e-11, "loss": 0.3403, "step": 57300 }, { "epoch": 0.9960367814493559, "grad_norm": 1.3073439297176181, "learning_rate": 4.128116351176381e-11, "loss": 0.1888, "step": 57301 }, { "epoch": 0.9960541639868588, "grad_norm": 1.6780444176820235, "learning_rate": 4.092024196128418e-11, "loss": 0.1046, "step": 57302 }, { "epoch": 0.9960715465243616, "grad_norm": 0.9805374821232746, "learning_rate": 4.0560905051445496e-11, "loss": 0.1604, "step": 57303 }, { "epoch": 0.9960889290618644, "grad_norm": 1.2074096846034394, "learning_rate": 4.020315278341346e-11, "loss": 0.2463, "step": 57304 }, { "epoch": 0.9961063115993672, "grad_norm": 1.2686183074694428, "learning_rate": 3.9846985158242806e-11, "loss": 0.2196, "step": 57305 }, { "epoch": 0.9961236941368701, "grad_norm": 1.265356013353408, "learning_rate": 3.949240217704375e-11, "loss": 0.2438, "step": 57306 }, { "epoch": 0.9961410766743729, "grad_norm": 1.5972483656659802, "learning_rate": 3.9139403841037534e-11, "loss": 0.1905, "step": 57307 }, { "epoch": 0.9961584592118757, "grad_norm": 1.7716644511618103, "learning_rate": 3.8787990151278874e-11, "loss": 0.2338, "step": 57308 }, { "epoch": 0.9961758417493786, "grad_norm": 1.155001776984593, "learning_rate": 3.84381611089335e-11, "loss": 0.154, "step": 57309 }, { "epoch": 0.9961932242868814, "grad_norm": 1.8441360820193455, "learning_rate": 3.8089916715056123e-11, "loss": 0.2313, "step": 57310 }, { "epoch": 0.9962106068243842, "grad_norm": 2.1369769578610223, "learning_rate": 3.774325697075698e-11, "loss": 0.2511, "step": 57311 }, { "epoch": 0.9962279893618871, "grad_norm": 1.0298909872372226, "learning_rate": 3.7398181877146275e-11, "loss": 0.121, "step": 57312 }, { "epoch": 0.9962453718993899, "grad_norm": 1.2767023282926067, "learning_rate": 3.705469143533424e-11, "loss": 0.1704, "step": 57313 }, { "epoch": 0.9962627544368927, "grad_norm": 2.1436967460366745, "learning_rate": 3.671278564637559e-11, "loss": 0.201, "step": 57314 }, { "epoch": 0.9962801369743955, "grad_norm": 2.0574787398499126, "learning_rate": 3.637246451138054e-11, "loss": 0.1375, "step": 57315 }, { "epoch": 0.9962975195118984, "grad_norm": 1.6254371801851897, "learning_rate": 3.603372803145932e-11, "loss": 0.1306, "step": 57316 }, { "epoch": 0.9963149020494012, "grad_norm": 0.8215906611023257, "learning_rate": 3.5696576207611126e-11, "loss": 0.0969, "step": 57317 }, { "epoch": 0.996332284586904, "grad_norm": 1.260434553251417, "learning_rate": 3.5361009040946186e-11, "loss": 0.0881, "step": 57318 }, { "epoch": 0.9963496671244069, "grad_norm": 1.0447765544746146, "learning_rate": 3.502702653251921e-11, "loss": 0.2068, "step": 57319 }, { "epoch": 0.9963670496619097, "grad_norm": 1.8727609158536676, "learning_rate": 3.4694628683384906e-11, "loss": 0.2199, "step": 57320 }, { "epoch": 0.9963844321994124, "grad_norm": 1.88780194290998, "learning_rate": 3.4363815494597994e-11, "loss": 0.1591, "step": 57321 }, { "epoch": 0.9964018147369152, "grad_norm": 2.27036927457975, "learning_rate": 3.4034586967213174e-11, "loss": 0.3454, "step": 57322 }, { "epoch": 0.9964191972744181, "grad_norm": 2.078532905003446, "learning_rate": 3.3706943102285166e-11, "loss": 0.2772, "step": 57323 }, { "epoch": 0.9964365798119209, "grad_norm": 1.359107224772594, "learning_rate": 3.3380883900868683e-11, "loss": 0.3319, "step": 57324 }, { "epoch": 0.9964539623494237, "grad_norm": 1.6172908516238538, "learning_rate": 3.305640936390741e-11, "loss": 0.4089, "step": 57325 }, { "epoch": 0.9964713448869266, "grad_norm": 1.6290647797556024, "learning_rate": 3.273351949251157e-11, "loss": 0.2822, "step": 57326 }, { "epoch": 0.9964887274244294, "grad_norm": 0.9654661272999914, "learning_rate": 3.241221428773589e-11, "loss": 0.2476, "step": 57327 }, { "epoch": 0.9965061099619322, "grad_norm": 1.870591852006521, "learning_rate": 3.2092493750468516e-11, "loss": 0.2515, "step": 57328 }, { "epoch": 0.9965234924994351, "grad_norm": 2.063430605081936, "learning_rate": 3.1774357881875215e-11, "loss": 0.3379, "step": 57329 }, { "epoch": 0.9965408750369379, "grad_norm": 1.2251919317796047, "learning_rate": 3.145780668284414e-11, "loss": 0.1541, "step": 57330 }, { "epoch": 0.9965582575744407, "grad_norm": 1.621747822270595, "learning_rate": 3.1142840154430025e-11, "loss": 0.1401, "step": 57331 }, { "epoch": 0.9965756401119436, "grad_norm": 1.469050750262883, "learning_rate": 3.082945829763206e-11, "loss": 0.1241, "step": 57332 }, { "epoch": 0.9965930226494464, "grad_norm": 1.542891266073111, "learning_rate": 3.0517661113449445e-11, "loss": 0.1563, "step": 57333 }, { "epoch": 0.9966104051869492, "grad_norm": 2.813555289734275, "learning_rate": 3.020744860282587e-11, "loss": 0.2222, "step": 57334 }, { "epoch": 0.996627787724452, "grad_norm": 1.140196964874403, "learning_rate": 2.9898820766816046e-11, "loss": 0.1529, "step": 57335 }, { "epoch": 0.9966451702619549, "grad_norm": 0.8660415310970712, "learning_rate": 2.9591777606308154e-11, "loss": 0.2045, "step": 57336 }, { "epoch": 0.9966625527994577, "grad_norm": 1.5644419554263342, "learning_rate": 2.9286319122412415e-11, "loss": 0.2084, "step": 57337 }, { "epoch": 0.9966799353369605, "grad_norm": 1.7111360205579136, "learning_rate": 2.898244531590599e-11, "loss": 0.1853, "step": 57338 }, { "epoch": 0.9966973178744634, "grad_norm": 3.1961800331245627, "learning_rate": 2.8680156187954607e-11, "loss": 0.1943, "step": 57339 }, { "epoch": 0.9967147004119662, "grad_norm": 1.7945142410601223, "learning_rate": 2.8379451739335424e-11, "loss": 0.1127, "step": 57340 }, { "epoch": 0.9967320829494689, "grad_norm": 1.7432746848136451, "learning_rate": 2.8080331971158667e-11, "loss": 0.2042, "step": 57341 }, { "epoch": 0.9967494654869717, "grad_norm": 1.2945974642941822, "learning_rate": 2.7782796884256997e-11, "loss": 0.0999, "step": 57342 }, { "epoch": 0.9967668480244746, "grad_norm": 1.3104563058006353, "learning_rate": 2.7486846479629623e-11, "loss": 0.169, "step": 57343 }, { "epoch": 0.9967842305619774, "grad_norm": 1.018031457586295, "learning_rate": 2.7192480758164716e-11, "loss": 0.2078, "step": 57344 }, { "epoch": 0.9968016130994802, "grad_norm": 1.5240533331102097, "learning_rate": 2.689969972086148e-11, "loss": 0.1324, "step": 57345 }, { "epoch": 0.9968189956369831, "grad_norm": 2.5277407859231307, "learning_rate": 2.6608503368608093e-11, "loss": 0.1781, "step": 57346 }, { "epoch": 0.9968363781744859, "grad_norm": 1.4888067537575476, "learning_rate": 2.6318891702348246e-11, "loss": 0.2003, "step": 57347 }, { "epoch": 0.9968537607119887, "grad_norm": 1.1472860127879552, "learning_rate": 2.6030864723025624e-11, "loss": 0.229, "step": 57348 }, { "epoch": 0.9968711432494916, "grad_norm": 1.068286367009059, "learning_rate": 2.57444224314729e-11, "loss": 0.2091, "step": 57349 }, { "epoch": 0.9968885257869944, "grad_norm": 1.9433534074073149, "learning_rate": 2.545956482863376e-11, "loss": 0.2403, "step": 57350 }, { "epoch": 0.9969059083244972, "grad_norm": 1.0585385413999695, "learning_rate": 2.5176291915396386e-11, "loss": 0.1723, "step": 57351 }, { "epoch": 0.996923290862, "grad_norm": 0.8701170460401555, "learning_rate": 2.4894603692759975e-11, "loss": 0.2069, "step": 57352 }, { "epoch": 0.9969406733995029, "grad_norm": 1.733365328990585, "learning_rate": 2.461450016144617e-11, "loss": 0.1497, "step": 57353 }, { "epoch": 0.9969580559370057, "grad_norm": 2.0445650342455637, "learning_rate": 2.433598132250969e-11, "loss": 0.1774, "step": 57354 }, { "epoch": 0.9969754384745085, "grad_norm": 1.3050398708023365, "learning_rate": 2.405904717672769e-11, "loss": 0.1966, "step": 57355 }, { "epoch": 0.9969928210120114, "grad_norm": 1.6355719460990938, "learning_rate": 2.3783697724988338e-11, "loss": 0.1827, "step": 57356 }, { "epoch": 0.9970102035495142, "grad_norm": 0.8595543765268127, "learning_rate": 2.3509932968235335e-11, "loss": 0.2313, "step": 57357 }, { "epoch": 0.997027586087017, "grad_norm": 0.8785357632912801, "learning_rate": 2.3237752907245834e-11, "loss": 0.1236, "step": 57358 }, { "epoch": 0.9970449686245199, "grad_norm": 1.1153134802962337, "learning_rate": 2.296715754290801e-11, "loss": 0.1457, "step": 57359 }, { "epoch": 0.9970623511620227, "grad_norm": 1.2329777883485604, "learning_rate": 2.2698146876110048e-11, "loss": 0.1883, "step": 57360 }, { "epoch": 0.9970797336995254, "grad_norm": 0.9809369662705051, "learning_rate": 2.2430720907684608e-11, "loss": 0.1968, "step": 57361 }, { "epoch": 0.9970971162370282, "grad_norm": 3.6619341893571145, "learning_rate": 2.2164879638519872e-11, "loss": 0.1715, "step": 57362 }, { "epoch": 0.9971144987745311, "grad_norm": 1.286258841352531, "learning_rate": 2.1900623069392997e-11, "loss": 0.1105, "step": 57363 }, { "epoch": 0.9971318813120339, "grad_norm": 5.923669275075251, "learning_rate": 2.1637951201136652e-11, "loss": 0.2691, "step": 57364 }, { "epoch": 0.9971492638495367, "grad_norm": 2.7487439541357603, "learning_rate": 2.137686403469452e-11, "loss": 0.3298, "step": 57365 }, { "epoch": 0.9971666463870396, "grad_norm": 0.8543142111265876, "learning_rate": 2.111736157073274e-11, "loss": 0.1604, "step": 57366 }, { "epoch": 0.9971840289245424, "grad_norm": 1.3468085870560433, "learning_rate": 2.0859443810195e-11, "loss": 0.1816, "step": 57367 }, { "epoch": 0.9972014114620452, "grad_norm": 1.2164577967409629, "learning_rate": 2.0603110753913966e-11, "loss": 0.2231, "step": 57368 }, { "epoch": 0.997218793999548, "grad_norm": 1.4310600834698108, "learning_rate": 2.0348362402555776e-11, "loss": 0.1329, "step": 57369 }, { "epoch": 0.9972361765370509, "grad_norm": 2.0677133297514376, "learning_rate": 2.0095198757064112e-11, "loss": 0.1939, "step": 57370 }, { "epoch": 0.9972535590745537, "grad_norm": 1.0804777849912883, "learning_rate": 1.984361981821614e-11, "loss": 0.1749, "step": 57371 }, { "epoch": 0.9972709416120565, "grad_norm": 1.2361391336910401, "learning_rate": 1.9593625586789007e-11, "loss": 0.1786, "step": 57372 }, { "epoch": 0.9972883241495594, "grad_norm": 1.4617107366564732, "learning_rate": 1.9345216063559877e-11, "loss": 0.2368, "step": 57373 }, { "epoch": 0.9973057066870622, "grad_norm": 1.7448332124781771, "learning_rate": 1.90983912493059e-11, "loss": 0.1334, "step": 57374 }, { "epoch": 0.997323089224565, "grad_norm": 0.9956407266483535, "learning_rate": 1.885315114491526e-11, "loss": 0.155, "step": 57375 }, { "epoch": 0.9973404717620679, "grad_norm": 2.9245940801742987, "learning_rate": 1.860949575099857e-11, "loss": 0.1089, "step": 57376 }, { "epoch": 0.9973578542995707, "grad_norm": 1.867477373241414, "learning_rate": 1.836742506849953e-11, "loss": 0.1697, "step": 57377 }, { "epoch": 0.9973752368370735, "grad_norm": 0.9974615532788286, "learning_rate": 1.812693909802876e-11, "loss": 0.2376, "step": 57378 }, { "epoch": 0.9973926193745764, "grad_norm": 0.8966084129688484, "learning_rate": 1.7888037840474436e-11, "loss": 0.2094, "step": 57379 }, { "epoch": 0.9974100019120792, "grad_norm": 1.500199832000169, "learning_rate": 1.765072129650269e-11, "loss": 0.2202, "step": 57380 }, { "epoch": 0.9974273844495819, "grad_norm": 0.9640508966468375, "learning_rate": 1.7414989466946195e-11, "loss": 0.1122, "step": 57381 }, { "epoch": 0.9974447669870847, "grad_norm": 2.324956814753384, "learning_rate": 1.718084235247108e-11, "loss": 0.1838, "step": 57382 }, { "epoch": 0.9974621495245876, "grad_norm": 1.4658610039656372, "learning_rate": 1.6948279953910016e-11, "loss": 0.1653, "step": 57383 }, { "epoch": 0.9974795320620904, "grad_norm": 1.1481166605927677, "learning_rate": 1.6717302271929133e-11, "loss": 0.1376, "step": 57384 }, { "epoch": 0.9974969145995932, "grad_norm": 1.058724797161308, "learning_rate": 1.648790930725008e-11, "loss": 0.1183, "step": 57385 }, { "epoch": 0.997514297137096, "grad_norm": 5.733210883837069, "learning_rate": 1.626010106065001e-11, "loss": 0.2543, "step": 57386 }, { "epoch": 0.9975316796745989, "grad_norm": 1.062523790818096, "learning_rate": 1.603387753285057e-11, "loss": 0.1778, "step": 57387 }, { "epoch": 0.9975490622121017, "grad_norm": 0.8349435886087233, "learning_rate": 1.5809238724517893e-11, "loss": 0.2003, "step": 57388 }, { "epoch": 0.9975664447496045, "grad_norm": 2.4003792955004664, "learning_rate": 1.5586184636429133e-11, "loss": 0.1111, "step": 57389 }, { "epoch": 0.9975838272871074, "grad_norm": 1.0656920594789432, "learning_rate": 1.5364715269250427e-11, "loss": 0.1038, "step": 57390 }, { "epoch": 0.9976012098246102, "grad_norm": 1.6763939739956397, "learning_rate": 1.514483062364791e-11, "loss": 0.1858, "step": 57391 }, { "epoch": 0.997618592362113, "grad_norm": 1.5291990539834295, "learning_rate": 1.492653070039873e-11, "loss": 0.2608, "step": 57392 }, { "epoch": 0.9976359748996159, "grad_norm": 1.8584944628586777, "learning_rate": 1.4709815500169033e-11, "loss": 0.1978, "step": 57393 }, { "epoch": 0.9976533574371187, "grad_norm": 1.441114640558419, "learning_rate": 1.4494685023624942e-11, "loss": 0.1487, "step": 57394 }, { "epoch": 0.9976707399746215, "grad_norm": 0.856043520677678, "learning_rate": 1.4281139271432597e-11, "loss": 0.1647, "step": 57395 }, { "epoch": 0.9976881225121244, "grad_norm": 1.4232840426125997, "learning_rate": 1.406917824431364e-11, "loss": 0.1678, "step": 57396 }, { "epoch": 0.9977055050496272, "grad_norm": 1.2712036576133063, "learning_rate": 1.3858801942934206e-11, "loss": 0.205, "step": 57397 }, { "epoch": 0.99772288758713, "grad_norm": 1.0464188302685495, "learning_rate": 1.3650010367904918e-11, "loss": 0.1678, "step": 57398 }, { "epoch": 0.9977402701246328, "grad_norm": 1.3506076069313184, "learning_rate": 1.3442803520002932e-11, "loss": 0.1295, "step": 57399 }, { "epoch": 0.9977576526621356, "grad_norm": 1.0045890887937605, "learning_rate": 1.3237181399783359e-11, "loss": 0.1249, "step": 57400 }, { "epoch": 0.9977750351996384, "grad_norm": 4.82568571889624, "learning_rate": 1.3033144007912333e-11, "loss": 0.2421, "step": 57401 }, { "epoch": 0.9977924177371412, "grad_norm": 3.5858395213130594, "learning_rate": 1.2830691345000478e-11, "loss": 0.1564, "step": 57402 }, { "epoch": 0.997809800274644, "grad_norm": 1.1587202071189073, "learning_rate": 1.2629823411824947e-11, "loss": 0.2091, "step": 57403 }, { "epoch": 0.9978271828121469, "grad_norm": 0.9118532284501104, "learning_rate": 1.2430540208885342e-11, "loss": 0.2414, "step": 57404 }, { "epoch": 0.9978445653496497, "grad_norm": 1.3440161762583842, "learning_rate": 1.2232841736903309e-11, "loss": 0.2393, "step": 57405 }, { "epoch": 0.9978619478871525, "grad_norm": 1.1394837480890374, "learning_rate": 1.2036727996433959e-11, "loss": 0.1798, "step": 57406 }, { "epoch": 0.9978793304246554, "grad_norm": 1.3414912889063342, "learning_rate": 1.1842198988143427e-11, "loss": 0.2151, "step": 57407 }, { "epoch": 0.9978967129621582, "grad_norm": 1.4938710175244225, "learning_rate": 1.164925471264233e-11, "loss": 0.2164, "step": 57408 }, { "epoch": 0.997914095499661, "grad_norm": 1.5331607282060964, "learning_rate": 1.1457895170541299e-11, "loss": 0.1354, "step": 57409 }, { "epoch": 0.9979314780371639, "grad_norm": 0.9780374187390982, "learning_rate": 1.126812036245095e-11, "loss": 0.2989, "step": 57410 }, { "epoch": 0.9979488605746667, "grad_norm": 0.9601702361873853, "learning_rate": 1.1079930288926398e-11, "loss": 0.2656, "step": 57411 }, { "epoch": 0.9979662431121695, "grad_norm": 1.244005537726464, "learning_rate": 1.0893324950633776e-11, "loss": 0.2567, "step": 57412 }, { "epoch": 0.9979836256496724, "grad_norm": 1.5673897417006184, "learning_rate": 1.0708304348128195e-11, "loss": 0.4621, "step": 57413 }, { "epoch": 0.9980010081871752, "grad_norm": 1.3165450781672625, "learning_rate": 1.0524868482020277e-11, "loss": 0.1339, "step": 57414 }, { "epoch": 0.998018390724678, "grad_norm": 1.7080177152299019, "learning_rate": 1.0343017352865136e-11, "loss": 0.1834, "step": 57415 }, { "epoch": 0.9980357732621808, "grad_norm": 1.0888428340123764, "learning_rate": 1.0162750961217881e-11, "loss": 0.174, "step": 57416 }, { "epoch": 0.9980531557996837, "grad_norm": 1.0970717681887343, "learning_rate": 9.984069307689136e-12, "loss": 0.1574, "step": 57417 }, { "epoch": 0.9980705383371865, "grad_norm": 0.8933182009162773, "learning_rate": 9.806972392834012e-12, "loss": 0.1651, "step": 57418 }, { "epoch": 0.9980879208746893, "grad_norm": 1.1276550288409537, "learning_rate": 9.631460217207621e-12, "loss": 0.1997, "step": 57419 }, { "epoch": 0.9981053034121921, "grad_norm": 2.316490865202298, "learning_rate": 9.457532781365074e-12, "loss": 0.1444, "step": 57420 }, { "epoch": 0.9981226859496949, "grad_norm": 1.4423667630951247, "learning_rate": 9.285190085916995e-12, "loss": 0.2514, "step": 57421 }, { "epoch": 0.9981400684871977, "grad_norm": 1.4785817923388214, "learning_rate": 9.11443213130747e-12, "loss": 0.1926, "step": 57422 }, { "epoch": 0.9981574510247005, "grad_norm": 1.176173056213727, "learning_rate": 8.945258918147125e-12, "loss": 0.1808, "step": 57423 }, { "epoch": 0.9981748335622034, "grad_norm": 0.5494723539612987, "learning_rate": 8.77767044693556e-12, "loss": 0.1623, "step": 57424 }, { "epoch": 0.9981922160997062, "grad_norm": 1.407448489311934, "learning_rate": 8.611666718283395e-12, "loss": 0.1853, "step": 57425 }, { "epoch": 0.998209598637209, "grad_norm": 1.22625503210732, "learning_rate": 8.44724773257921e-12, "loss": 0.1939, "step": 57426 }, { "epoch": 0.9982269811747119, "grad_norm": 1.1283032241139603, "learning_rate": 8.284413490433628e-12, "loss": 0.1236, "step": 57427 }, { "epoch": 0.9982443637122147, "grad_norm": 1.2658859321406337, "learning_rate": 8.12316399240176e-12, "loss": 0.1952, "step": 57428 }, { "epoch": 0.9982617462497175, "grad_norm": 2.091712899050358, "learning_rate": 7.963499238872185e-12, "loss": 0.1865, "step": 57429 }, { "epoch": 0.9982791287872204, "grad_norm": 1.1146893195608032, "learning_rate": 7.805419230455524e-12, "loss": 0.1757, "step": 57430 }, { "epoch": 0.9982965113247232, "grad_norm": 1.736125892873413, "learning_rate": 7.648923967651377e-12, "loss": 0.1969, "step": 57431 }, { "epoch": 0.998313893862226, "grad_norm": 1.653635257688612, "learning_rate": 7.494013450848324e-12, "loss": 0.1705, "step": 57432 }, { "epoch": 0.9983312763997289, "grad_norm": 1.3290880961395406, "learning_rate": 7.340687680656987e-12, "loss": 0.1245, "step": 57433 }, { "epoch": 0.9983486589372317, "grad_norm": 2.0119082110204434, "learning_rate": 7.188946657521456e-12, "loss": 0.1987, "step": 57434 }, { "epoch": 0.9983660414747345, "grad_norm": 0.9350219904353794, "learning_rate": 7.038790381885818e-12, "loss": 0.4247, "step": 57435 }, { "epoch": 0.9983834240122373, "grad_norm": 1.0817911322214158, "learning_rate": 6.890218854305185e-12, "loss": 0.1079, "step": 57436 }, { "epoch": 0.9984008065497402, "grad_norm": 1.6254550731369406, "learning_rate": 6.743232075168137e-12, "loss": 0.1301, "step": 57437 }, { "epoch": 0.998418189087243, "grad_norm": 1.4604397715418804, "learning_rate": 6.597830045029784e-12, "loss": 0.2214, "step": 57438 }, { "epoch": 0.9984355716247458, "grad_norm": 1.6418203568800502, "learning_rate": 6.4540127642231936e-12, "loss": 0.2124, "step": 57439 }, { "epoch": 0.9984529541622486, "grad_norm": 2.0130916074581466, "learning_rate": 6.311780233303476e-12, "loss": 0.3224, "step": 57440 }, { "epoch": 0.9984703366997514, "grad_norm": 1.872060248628419, "learning_rate": 6.171132452714723e-12, "loss": 0.1688, "step": 57441 }, { "epoch": 0.9984877192372542, "grad_norm": 1.3157810328142554, "learning_rate": 6.03206942290102e-12, "loss": 0.1908, "step": 57442 }, { "epoch": 0.998505101774757, "grad_norm": 1.152200880043127, "learning_rate": 5.894591144250949e-12, "loss": 0.1366, "step": 57443 }, { "epoch": 0.9985224843122599, "grad_norm": 1.6240698387703905, "learning_rate": 5.7586976172641075e-12, "loss": 0.1356, "step": 57444 }, { "epoch": 0.9985398668497627, "grad_norm": 0.9601951550606658, "learning_rate": 5.624388842329075e-12, "loss": 0.1823, "step": 57445 }, { "epoch": 0.9985572493872655, "grad_norm": 1.1227063289462453, "learning_rate": 5.491664819889941e-12, "loss": 0.2059, "step": 57446 }, { "epoch": 0.9985746319247684, "grad_norm": 2.899027527214247, "learning_rate": 5.360525550335282e-12, "loss": 0.1527, "step": 57447 }, { "epoch": 0.9985920144622712, "grad_norm": 1.2588827131387936, "learning_rate": 5.230971034164699e-12, "loss": 0.1582, "step": 57448 }, { "epoch": 0.998609396999774, "grad_norm": 1.4047910393497638, "learning_rate": 5.10300127171126e-12, "loss": 0.1219, "step": 57449 }, { "epoch": 0.9986267795372769, "grad_norm": 1.7378420593597514, "learning_rate": 4.976616263363542e-12, "loss": 0.0834, "step": 57450 }, { "epoch": 0.9986441620747797, "grad_norm": 1.2770293774644497, "learning_rate": 4.851816009621146e-12, "loss": 0.1449, "step": 57451 }, { "epoch": 0.9986615446122825, "grad_norm": 1.2694680277404233, "learning_rate": 4.728600510761626e-12, "loss": 0.1357, "step": 57452 }, { "epoch": 0.9986789271497853, "grad_norm": 0.9263050044104653, "learning_rate": 4.6069697672845855e-12, "loss": 0.1813, "step": 57453 }, { "epoch": 0.9986963096872882, "grad_norm": 1.6477159565947865, "learning_rate": 4.486923779467578e-12, "loss": 0.5327, "step": 57454 }, { "epoch": 0.998713692224791, "grad_norm": 1.3843597002837653, "learning_rate": 4.368462547810203e-12, "loss": 0.1195, "step": 57455 }, { "epoch": 0.9987310747622938, "grad_norm": 0.8251873726609251, "learning_rate": 4.251586072590018e-12, "loss": 0.2241, "step": 57456 }, { "epoch": 0.9987484572997967, "grad_norm": 1.1493697572978576, "learning_rate": 4.136294354195602e-12, "loss": 0.1601, "step": 57457 }, { "epoch": 0.9987658398372995, "grad_norm": 0.7686447472346447, "learning_rate": 4.022587393071042e-12, "loss": 0.12, "step": 57458 }, { "epoch": 0.9987832223748023, "grad_norm": 2.8576266566451296, "learning_rate": 3.910465189438383e-12, "loss": 0.2686, "step": 57459 }, { "epoch": 0.998800604912305, "grad_norm": 4.552809472632486, "learning_rate": 3.7999277437972265e-12, "loss": 0.2442, "step": 57460 }, { "epoch": 0.9988179874498079, "grad_norm": 2.0739260453368313, "learning_rate": 3.690975056369616e-12, "loss": 0.113, "step": 57461 }, { "epoch": 0.9988353699873107, "grad_norm": 1.1258145855746335, "learning_rate": 3.5836071275996415e-12, "loss": 0.2049, "step": 57462 }, { "epoch": 0.9988527525248135, "grad_norm": 1.682780888681106, "learning_rate": 3.4778239578203696e-12, "loss": 0.2161, "step": 57463 }, { "epoch": 0.9988701350623164, "grad_norm": 1.2915002308687165, "learning_rate": 3.3736255473093554e-12, "loss": 0.1279, "step": 57464 }, { "epoch": 0.9988875175998192, "grad_norm": 0.992149498719433, "learning_rate": 3.271011896399667e-12, "loss": 0.0937, "step": 57465 }, { "epoch": 0.998904900137322, "grad_norm": 1.9668974295438004, "learning_rate": 3.1699830054798816e-12, "loss": 0.251, "step": 57466 }, { "epoch": 0.9989222826748249, "grad_norm": 1.3651091815935312, "learning_rate": 3.070538874772044e-12, "loss": 0.1697, "step": 57467 }, { "epoch": 0.9989396652123277, "grad_norm": 1.7202259203882748, "learning_rate": 2.9726795047202435e-12, "loss": 0.2578, "step": 57468 }, { "epoch": 0.9989570477498305, "grad_norm": 1.1620524736127413, "learning_rate": 2.8764048955465248e-12, "loss": 0.1838, "step": 57469 }, { "epoch": 0.9989744302873333, "grad_norm": 2.3477011927344433, "learning_rate": 2.7817150475284433e-12, "loss": 0.2237, "step": 57470 }, { "epoch": 0.9989918128248362, "grad_norm": 1.0870620333153922, "learning_rate": 2.6886099610545776e-12, "loss": 0.0858, "step": 57471 }, { "epoch": 0.999009195362339, "grad_norm": 0.976194265107243, "learning_rate": 2.597089636402483e-12, "loss": 0.2003, "step": 57472 }, { "epoch": 0.9990265778998418, "grad_norm": 1.7417879843401056, "learning_rate": 2.5071540737942043e-12, "loss": 0.1297, "step": 57473 }, { "epoch": 0.9990439604373447, "grad_norm": 1.033912251377176, "learning_rate": 2.4188032735628084e-12, "loss": 0.1792, "step": 57474 }, { "epoch": 0.9990613429748475, "grad_norm": 1.417591425038424, "learning_rate": 2.332037235985851e-12, "loss": 0.1792, "step": 57475 }, { "epoch": 0.9990787255123503, "grad_norm": 1.1823639661645724, "learning_rate": 2.2468559613408878e-12, "loss": 0.1789, "step": 57476 }, { "epoch": 0.9990961080498532, "grad_norm": 1.7679824930773342, "learning_rate": 2.1632594499054747e-12, "loss": 0.1576, "step": 57477 }, { "epoch": 0.999113490587356, "grad_norm": 1.9911231077570435, "learning_rate": 2.081247701901656e-12, "loss": 0.2074, "step": 57478 }, { "epoch": 0.9991308731248588, "grad_norm": 1.9481354976323662, "learning_rate": 2.000820717662499e-12, "loss": 0.1109, "step": 57479 }, { "epoch": 0.9991482556623615, "grad_norm": 1.326051612772885, "learning_rate": 1.9219784973545373e-12, "loss": 0.1289, "step": 57480 }, { "epoch": 0.9991656381998644, "grad_norm": 0.9748615673047281, "learning_rate": 1.8447210413108373e-12, "loss": 0.1937, "step": 57481 }, { "epoch": 0.9991830207373672, "grad_norm": 0.8629988238189231, "learning_rate": 1.7690483496979325e-12, "loss": 0.2124, "step": 57482 }, { "epoch": 0.99920040327487, "grad_norm": 1.3839745156434669, "learning_rate": 1.69496042284889e-12, "loss": 0.1797, "step": 57483 }, { "epoch": 0.9992177858123729, "grad_norm": 1.7506892923520962, "learning_rate": 1.6224572609302434e-12, "loss": 0.2895, "step": 57484 }, { "epoch": 0.9992351683498757, "grad_norm": 1.05816922489465, "learning_rate": 1.551538864164037e-12, "loss": 0.1335, "step": 57485 }, { "epoch": 0.9992525508873785, "grad_norm": 1.506414617680576, "learning_rate": 1.4822052327723155e-12, "loss": 0.3235, "step": 57486 }, { "epoch": 0.9992699334248814, "grad_norm": 1.1568735552370493, "learning_rate": 1.414456367088146e-12, "loss": 0.1624, "step": 57487 }, { "epoch": 0.9992873159623842, "grad_norm": 1.3684534854155839, "learning_rate": 1.3482922671670394e-12, "loss": 0.1772, "step": 57488 }, { "epoch": 0.999304698499887, "grad_norm": 0.6953534926948682, "learning_rate": 1.2837129333420626e-12, "loss": 0.1345, "step": 57489 }, { "epoch": 0.9993220810373898, "grad_norm": 1.3788903769484608, "learning_rate": 1.220718365724238e-12, "loss": 0.2569, "step": 57490 }, { "epoch": 0.9993394635748927, "grad_norm": 1.9667256829973474, "learning_rate": 1.1593085645911215e-12, "loss": 0.3085, "step": 57491 }, { "epoch": 0.9993568461123955, "grad_norm": 2.1037768462241, "learning_rate": 1.0994835300537354e-12, "loss": 0.2689, "step": 57492 }, { "epoch": 0.9993742286498983, "grad_norm": 0.8939638958335778, "learning_rate": 1.041243262389635e-12, "loss": 0.2171, "step": 57493 }, { "epoch": 0.9993916111874012, "grad_norm": 0.7190661426698522, "learning_rate": 9.845877617653542e-13, "loss": 0.2109, "step": 57494 }, { "epoch": 0.999408993724904, "grad_norm": 1.6763897135539803, "learning_rate": 9.29517028291915e-13, "loss": 0.1367, "step": 57495 }, { "epoch": 0.9994263762624068, "grad_norm": 1.675202051200587, "learning_rate": 8.760310621913624e-13, "loss": 0.2159, "step": 57496 }, { "epoch": 0.9994437587999097, "grad_norm": 1.1986158182545394, "learning_rate": 8.241298636857408e-13, "loss": 0.2828, "step": 57497 }, { "epoch": 0.9994611413374125, "grad_norm": 1.0313982696670203, "learning_rate": 7.738134328305612e-13, "loss": 0.1869, "step": 57498 }, { "epoch": 0.9994785238749153, "grad_norm": 0.903764029439812, "learning_rate": 7.250817698478684e-13, "loss": 0.1255, "step": 57499 }, { "epoch": 0.999495906412418, "grad_norm": 1.653811121378046, "learning_rate": 6.779348749041958e-13, "loss": 0.2399, "step": 57500 }, { "epoch": 0.9995132889499209, "grad_norm": 1.6552959697167993, "learning_rate": 6.323727481105656e-13, "loss": 0.22, "step": 57501 }, { "epoch": 0.9995306714874237, "grad_norm": 1.118601111673705, "learning_rate": 5.883953896890226e-13, "loss": 0.1799, "step": 57502 }, { "epoch": 0.9995480540249265, "grad_norm": 1.7252168545841406, "learning_rate": 5.460027996395666e-13, "loss": 0.2128, "step": 57503 }, { "epoch": 0.9995654365624294, "grad_norm": 1.0987595817976457, "learning_rate": 5.051949782397536e-13, "loss": 0.2299, "step": 57504 }, { "epoch": 0.9995828190999322, "grad_norm": 1.0277498516981585, "learning_rate": 4.659719255450944e-13, "loss": 0.2047, "step": 57505 }, { "epoch": 0.999600201637435, "grad_norm": 1.0164471653728038, "learning_rate": 4.2833364166661167e-13, "loss": 0.143, "step": 57506 }, { "epoch": 0.9996175841749378, "grad_norm": 1.3781326602444193, "learning_rate": 3.9228012677083864e-13, "loss": 0.2468, "step": 57507 }, { "epoch": 0.9996349667124407, "grad_norm": 1.0029673269300274, "learning_rate": 3.5781138096879773e-13, "loss": 0.1515, "step": 57508 }, { "epoch": 0.9996523492499435, "grad_norm": 1.5648724991642236, "learning_rate": 3.2492740431599996e-13, "loss": 0.1878, "step": 57509 }, { "epoch": 0.9996697317874463, "grad_norm": 1.5227613545549565, "learning_rate": 2.936281969789789e-13, "loss": 0.2012, "step": 57510 }, { "epoch": 0.9996871143249492, "grad_norm": 1.1221865993054987, "learning_rate": 2.6391375901324564e-13, "loss": 0.1791, "step": 57511 }, { "epoch": 0.999704496862452, "grad_norm": 2.118381965960523, "learning_rate": 2.357840905298225e-13, "loss": 0.1905, "step": 57512 }, { "epoch": 0.9997218793999548, "grad_norm": 1.39387604585207, "learning_rate": 2.0923919163973181e-13, "loss": 0.2717, "step": 57513 }, { "epoch": 0.9997392619374577, "grad_norm": 1.3334703220025985, "learning_rate": 1.8427906234297353e-13, "loss": 0.3107, "step": 57514 }, { "epoch": 0.9997566444749605, "grad_norm": 2.2008738697601222, "learning_rate": 1.6090370286159227e-13, "loss": 0.1903, "step": 57515 }, { "epoch": 0.9997740270124633, "grad_norm": 1.1044714662785715, "learning_rate": 1.391131131400769e-13, "loss": 0.2147, "step": 57516 }, { "epoch": 0.9997914095499661, "grad_norm": 1.7823421398825907, "learning_rate": 1.189072932894497e-13, "loss": 0.2343, "step": 57517 }, { "epoch": 0.999808792087469, "grad_norm": 0.9305555551417559, "learning_rate": 1.00286243420733e-13, "loss": 0.2202, "step": 57518 }, { "epoch": 0.9998261746249718, "grad_norm": 0.8566169783430095, "learning_rate": 8.324996353392677e-14, "loss": 0.2036, "step": 57519 }, { "epoch": 0.9998435571624745, "grad_norm": 1.5949112480181131, "learning_rate": 6.779845368454217e-14, "loss": 0.2091, "step": 57520 }, { "epoch": 0.9998609396999774, "grad_norm": 1.4704408717393533, "learning_rate": 5.3931713983601523e-14, "loss": 0.2333, "step": 57521 }, { "epoch": 0.9998783222374802, "grad_norm": 1.2848668810536958, "learning_rate": 4.164974437559365e-14, "loss": 0.2932, "step": 57522 }, { "epoch": 0.999895704774983, "grad_norm": 0.9625341775988921, "learning_rate": 3.095254497154087e-14, "loss": 0.2179, "step": 57523 }, { "epoch": 0.9999130873124858, "grad_norm": 1.472149393397762, "learning_rate": 2.184011582695433e-14, "loss": 0.1836, "step": 57524 }, { "epoch": 0.9999304698499887, "grad_norm": 1.4810229120649414, "learning_rate": 1.4312456886322875e-14, "loss": 0.2374, "step": 57525 }, { "epoch": 0.9999478523874915, "grad_norm": 1.09527738565386, "learning_rate": 8.369568205157663e-15, "loss": 0.1273, "step": 57526 }, { "epoch": 0.9999652349249943, "grad_norm": 1.1282995770145874, "learning_rate": 4.011449838969838e-15, "loss": 0.2314, "step": 57527 }, { "epoch": 0.9999826174624972, "grad_norm": 1.274204154091363, "learning_rate": 1.2381017877594047e-15, "loss": 0.1559, "step": 57528 }, { "epoch": 1.0, "grad_norm": 3.3877643600135965, "learning_rate": 4.9524051526361745e-17, "loss": 0.2742, "step": 57529 } ], "logging_steps": 1.0, "max_steps": 57529, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }